1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* A network driver using virtio. 3 * 4 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 5 */ 6 //#define DEBUG 7 #include <linux/netdevice.h> 8 #include <linux/etherdevice.h> 9 #include <linux/ethtool.h> 10 #include <linux/module.h> 11 #include <linux/virtio.h> 12 #include <linux/virtio_net.h> 13 #include <linux/bpf.h> 14 #include <linux/bpf_trace.h> 15 #include <linux/scatterlist.h> 16 #include <linux/if_vlan.h> 17 #include <linux/slab.h> 18 #include <linux/cpu.h> 19 #include <linux/average.h> 20 #include <linux/filter.h> 21 #include <linux/kernel.h> 22 #include <linux/dim.h> 23 #include <net/route.h> 24 #include <net/xdp.h> 25 #include <net/net_failover.h> 26 #include <net/netdev_rx_queue.h> 27 #include <net/netdev_queues.h> 28 #include <net/xdp_sock_drv.h> 29 30 static int napi_weight = NAPI_POLL_WEIGHT; 31 module_param(napi_weight, int, 0444); 32 33 static bool csum = true, gso = true, napi_tx = true; 34 module_param(csum, bool, 0444); 35 module_param(gso, bool, 0444); 36 module_param(napi_tx, bool, 0644); 37 38 /* FIXME: MTU in config. */ 39 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) 40 #define GOOD_COPY_LEN 128 41 42 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) 43 44 /* Separating two types of XDP xmit */ 45 #define VIRTIO_XDP_TX BIT(0) 46 #define VIRTIO_XDP_REDIR BIT(1) 47 48 /* RX packet size EWMA. The average packet size is used to determine the packet 49 * buffer size when refilling RX rings. As the entire RX ring may be refilled 50 * at once, the weight is chosen so that the EWMA will be insensitive to short- 51 * term, transient changes in packet size. 52 */ 53 DECLARE_EWMA(pkt_len, 0, 64) 54 55 #define VIRTNET_DRIVER_VERSION "1.0.0" 56 57 static const unsigned long guest_offloads[] = { 58 VIRTIO_NET_F_GUEST_TSO4, 59 VIRTIO_NET_F_GUEST_TSO6, 60 VIRTIO_NET_F_GUEST_ECN, 61 VIRTIO_NET_F_GUEST_UFO, 62 VIRTIO_NET_F_GUEST_CSUM, 63 VIRTIO_NET_F_GUEST_USO4, 64 VIRTIO_NET_F_GUEST_USO6, 65 VIRTIO_NET_F_GUEST_HDRLEN 66 }; 67 68 #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 69 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 70 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 71 (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ 72 (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ 73 (1ULL << VIRTIO_NET_F_GUEST_USO6)) 74 75 struct virtnet_stat_desc { 76 char desc[ETH_GSTRING_LEN]; 77 size_t offset; 78 size_t qstat_offset; 79 }; 80 81 struct virtnet_sq_free_stats { 82 u64 packets; 83 u64 bytes; 84 u64 napi_packets; 85 u64 napi_bytes; 86 u64 xsk; 87 }; 88 89 struct virtnet_sq_stats { 90 struct u64_stats_sync syncp; 91 u64_stats_t packets; 92 u64_stats_t bytes; 93 u64_stats_t xdp_tx; 94 u64_stats_t xdp_tx_drops; 95 u64_stats_t kicks; 96 u64_stats_t tx_timeouts; 97 u64_stats_t stop; 98 u64_stats_t wake; 99 }; 100 101 struct virtnet_rq_stats { 102 struct u64_stats_sync syncp; 103 u64_stats_t packets; 104 u64_stats_t bytes; 105 u64_stats_t drops; 106 u64_stats_t xdp_packets; 107 u64_stats_t xdp_tx; 108 u64_stats_t xdp_redirects; 109 u64_stats_t xdp_drops; 110 u64_stats_t kicks; 111 }; 112 113 #define VIRTNET_SQ_STAT(name, m) {name, offsetof(struct virtnet_sq_stats, m), -1} 114 #define VIRTNET_RQ_STAT(name, m) {name, offsetof(struct virtnet_rq_stats, m), -1} 115 116 #define VIRTNET_SQ_STAT_QSTAT(name, m) \ 117 { \ 118 name, \ 119 offsetof(struct virtnet_sq_stats, m), \ 120 offsetof(struct netdev_queue_stats_tx, m), \ 121 } 122 123 #define VIRTNET_RQ_STAT_QSTAT(name, m) \ 124 { \ 125 name, \ 126 offsetof(struct virtnet_rq_stats, m), \ 127 offsetof(struct netdev_queue_stats_rx, m), \ 128 } 129 130 static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { 131 VIRTNET_SQ_STAT("xdp_tx", xdp_tx), 132 VIRTNET_SQ_STAT("xdp_tx_drops", xdp_tx_drops), 133 VIRTNET_SQ_STAT("kicks", kicks), 134 VIRTNET_SQ_STAT("tx_timeouts", tx_timeouts), 135 }; 136 137 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { 138 VIRTNET_RQ_STAT("drops", drops), 139 VIRTNET_RQ_STAT("xdp_packets", xdp_packets), 140 VIRTNET_RQ_STAT("xdp_tx", xdp_tx), 141 VIRTNET_RQ_STAT("xdp_redirects", xdp_redirects), 142 VIRTNET_RQ_STAT("xdp_drops", xdp_drops), 143 VIRTNET_RQ_STAT("kicks", kicks), 144 }; 145 146 static const struct virtnet_stat_desc virtnet_sq_stats_desc_qstat[] = { 147 VIRTNET_SQ_STAT_QSTAT("packets", packets), 148 VIRTNET_SQ_STAT_QSTAT("bytes", bytes), 149 VIRTNET_SQ_STAT_QSTAT("stop", stop), 150 VIRTNET_SQ_STAT_QSTAT("wake", wake), 151 }; 152 153 static const struct virtnet_stat_desc virtnet_rq_stats_desc_qstat[] = { 154 VIRTNET_RQ_STAT_QSTAT("packets", packets), 155 VIRTNET_RQ_STAT_QSTAT("bytes", bytes), 156 }; 157 158 #define VIRTNET_STATS_DESC_CQ(name) \ 159 {#name, offsetof(struct virtio_net_stats_cvq, name), -1} 160 161 #define VIRTNET_STATS_DESC_RX(class, name) \ 162 {#name, offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), -1} 163 164 #define VIRTNET_STATS_DESC_TX(class, name) \ 165 {#name, offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), -1} 166 167 168 static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = { 169 VIRTNET_STATS_DESC_CQ(command_num), 170 VIRTNET_STATS_DESC_CQ(ok_num), 171 }; 172 173 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = { 174 VIRTNET_STATS_DESC_RX(basic, packets), 175 VIRTNET_STATS_DESC_RX(basic, bytes), 176 177 VIRTNET_STATS_DESC_RX(basic, notifications), 178 VIRTNET_STATS_DESC_RX(basic, interrupts), 179 }; 180 181 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = { 182 VIRTNET_STATS_DESC_TX(basic, packets), 183 VIRTNET_STATS_DESC_TX(basic, bytes), 184 185 VIRTNET_STATS_DESC_TX(basic, notifications), 186 VIRTNET_STATS_DESC_TX(basic, interrupts), 187 }; 188 189 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = { 190 VIRTNET_STATS_DESC_RX(csum, needs_csum), 191 }; 192 193 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = { 194 VIRTNET_STATS_DESC_TX(gso, gso_packets_noseg), 195 VIRTNET_STATS_DESC_TX(gso, gso_bytes_noseg), 196 }; 197 198 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = { 199 VIRTNET_STATS_DESC_RX(speed, ratelimit_bytes), 200 }; 201 202 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = { 203 VIRTNET_STATS_DESC_TX(speed, ratelimit_bytes), 204 }; 205 206 #define VIRTNET_STATS_DESC_RX_QSTAT(class, name, qstat_field) \ 207 { \ 208 #name, \ 209 offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), \ 210 offsetof(struct netdev_queue_stats_rx, qstat_field), \ 211 } 212 213 #define VIRTNET_STATS_DESC_TX_QSTAT(class, name, qstat_field) \ 214 { \ 215 #name, \ 216 offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), \ 217 offsetof(struct netdev_queue_stats_tx, qstat_field), \ 218 } 219 220 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc_qstat[] = { 221 VIRTNET_STATS_DESC_RX_QSTAT(basic, drops, hw_drops), 222 VIRTNET_STATS_DESC_RX_QSTAT(basic, drop_overruns, hw_drop_overruns), 223 }; 224 225 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc_qstat[] = { 226 VIRTNET_STATS_DESC_TX_QSTAT(basic, drops, hw_drops), 227 VIRTNET_STATS_DESC_TX_QSTAT(basic, drop_malformed, hw_drop_errors), 228 }; 229 230 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc_qstat[] = { 231 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_valid, csum_unnecessary), 232 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_none, csum_none), 233 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_bad, csum_bad), 234 }; 235 236 static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc_qstat[] = { 237 VIRTNET_STATS_DESC_TX_QSTAT(csum, csum_none, csum_none), 238 VIRTNET_STATS_DESC_TX_QSTAT(csum, needs_csum, needs_csum), 239 }; 240 241 static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc_qstat[] = { 242 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets, hw_gro_packets), 243 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes, hw_gro_bytes), 244 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets_coalesced, hw_gro_wire_packets), 245 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes_coalesced, hw_gro_wire_bytes), 246 }; 247 248 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc_qstat[] = { 249 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_packets, hw_gso_packets), 250 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_bytes, hw_gso_bytes), 251 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments, hw_gso_wire_packets), 252 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments_bytes, hw_gso_wire_bytes), 253 }; 254 255 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc_qstat[] = { 256 VIRTNET_STATS_DESC_RX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 257 }; 258 259 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc_qstat[] = { 260 VIRTNET_STATS_DESC_TX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 261 }; 262 263 #define VIRTNET_Q_TYPE_RX 0 264 #define VIRTNET_Q_TYPE_TX 1 265 #define VIRTNET_Q_TYPE_CQ 2 266 267 struct virtnet_interrupt_coalesce { 268 u32 max_packets; 269 u32 max_usecs; 270 }; 271 272 /* The dma information of pages allocated at a time. */ 273 struct virtnet_rq_dma { 274 dma_addr_t addr; 275 u32 ref; 276 u16 len; 277 u16 need_sync; 278 }; 279 280 /* Internal representation of a send virtqueue */ 281 struct send_queue { 282 /* Virtqueue associated with this send _queue */ 283 struct virtqueue *vq; 284 285 /* TX: fragments + linear part + virtio header */ 286 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 287 288 /* Name of the send queue: output.$index */ 289 char name[16]; 290 291 struct virtnet_sq_stats stats; 292 293 struct virtnet_interrupt_coalesce intr_coal; 294 295 struct napi_struct napi; 296 297 /* Record whether sq is in reset state. */ 298 bool reset; 299 300 struct xsk_buff_pool *xsk_pool; 301 302 dma_addr_t xsk_hdr_dma_addr; 303 }; 304 305 /* Internal representation of a receive virtqueue */ 306 struct receive_queue { 307 /* Virtqueue associated with this receive_queue */ 308 struct virtqueue *vq; 309 310 struct napi_struct napi; 311 312 struct bpf_prog __rcu *xdp_prog; 313 314 struct virtnet_rq_stats stats; 315 316 /* The number of rx notifications */ 317 u16 calls; 318 319 /* Is dynamic interrupt moderation enabled? */ 320 bool dim_enabled; 321 322 /* Used to protect dim_enabled and inter_coal */ 323 struct mutex dim_lock; 324 325 /* Dynamic Interrupt Moderation */ 326 struct dim dim; 327 328 u32 packets_in_napi; 329 330 struct virtnet_interrupt_coalesce intr_coal; 331 332 /* Chain pages by the private ptr. */ 333 struct page *pages; 334 335 /* Average packet length for mergeable receive buffers. */ 336 struct ewma_pkt_len mrg_avg_pkt_len; 337 338 /* Page frag for packet buffer allocation. */ 339 struct page_frag alloc_frag; 340 341 /* RX: fragments + linear part + virtio header */ 342 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 343 344 /* Min single buffer size for mergeable buffers case. */ 345 unsigned int min_buf_len; 346 347 /* Name of this receive queue: input.$index */ 348 char name[16]; 349 350 struct xdp_rxq_info xdp_rxq; 351 352 /* Record the last dma info to free after new pages is allocated. */ 353 struct virtnet_rq_dma *last_dma; 354 355 struct xsk_buff_pool *xsk_pool; 356 357 /* xdp rxq used by xsk */ 358 struct xdp_rxq_info xsk_rxq_info; 359 360 struct xdp_buff **xsk_buffs; 361 }; 362 363 /* This structure can contain rss message with maximum settings for indirection table and keysize 364 * Note, that default structure that describes RSS configuration virtio_net_rss_config 365 * contains same info but can't handle table values. 366 * In any case, structure would be passed to virtio hw through sg_buf split by parts 367 * because table sizes may be differ according to the device configuration. 368 */ 369 #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 370 struct virtio_net_ctrl_rss { 371 u32 hash_types; 372 u16 indirection_table_mask; 373 u16 unclassified_queue; 374 u16 hash_cfg_reserved; /* for HASH_CONFIG (see virtio_net_hash_config for details) */ 375 u16 max_tx_vq; 376 u8 hash_key_length; 377 u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE]; 378 379 u16 *indirection_table; 380 }; 381 382 /* Control VQ buffers: protected by the rtnl lock */ 383 struct control_buf { 384 struct virtio_net_ctrl_hdr hdr; 385 virtio_net_ctrl_ack status; 386 }; 387 388 struct virtnet_info { 389 struct virtio_device *vdev; 390 struct virtqueue *cvq; 391 struct net_device *dev; 392 struct send_queue *sq; 393 struct receive_queue *rq; 394 unsigned int status; 395 396 /* Max # of queue pairs supported by the device */ 397 u16 max_queue_pairs; 398 399 /* # of queue pairs currently used by the driver */ 400 u16 curr_queue_pairs; 401 402 /* # of XDP queue pairs currently used by the driver */ 403 u16 xdp_queue_pairs; 404 405 /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ 406 bool xdp_enabled; 407 408 /* I like... big packets and I cannot lie! */ 409 bool big_packets; 410 411 /* number of sg entries allocated for big packets */ 412 unsigned int big_packets_num_skbfrags; 413 414 /* Host will merge rx buffers for big packets (shake it! shake it!) */ 415 bool mergeable_rx_bufs; 416 417 /* Host supports rss and/or hash report */ 418 bool has_rss; 419 bool has_rss_hash_report; 420 u8 rss_key_size; 421 u16 rss_indir_table_size; 422 u32 rss_hash_types_supported; 423 u32 rss_hash_types_saved; 424 struct virtio_net_ctrl_rss rss; 425 426 /* Has control virtqueue */ 427 bool has_cvq; 428 429 /* Lock to protect the control VQ */ 430 struct mutex cvq_lock; 431 432 /* Host can handle any s/g split between our header and packet data */ 433 bool any_header_sg; 434 435 /* Packet virtio header size */ 436 u8 hdr_len; 437 438 /* Work struct for delayed refilling if we run low on memory. */ 439 struct delayed_work refill; 440 441 /* Is delayed refill enabled? */ 442 bool refill_enabled; 443 444 /* The lock to synchronize the access to refill_enabled */ 445 spinlock_t refill_lock; 446 447 /* Work struct for config space updates */ 448 struct work_struct config_work; 449 450 /* Work struct for setting rx mode */ 451 struct work_struct rx_mode_work; 452 453 /* OK to queue work setting RX mode? */ 454 bool rx_mode_work_enabled; 455 456 /* Does the affinity hint is set for virtqueues? */ 457 bool affinity_hint_set; 458 459 /* CPU hotplug instances for online & dead */ 460 struct hlist_node node; 461 struct hlist_node node_dead; 462 463 struct control_buf *ctrl; 464 465 /* Ethtool settings */ 466 u8 duplex; 467 u32 speed; 468 469 /* Is rx dynamic interrupt moderation enabled? */ 470 bool rx_dim_enabled; 471 472 /* Interrupt coalescing settings */ 473 struct virtnet_interrupt_coalesce intr_coal_tx; 474 struct virtnet_interrupt_coalesce intr_coal_rx; 475 476 unsigned long guest_offloads; 477 unsigned long guest_offloads_capable; 478 479 /* failover when STANDBY feature enabled */ 480 struct failover *failover; 481 482 u64 device_stats_cap; 483 }; 484 485 struct padded_vnet_hdr { 486 struct virtio_net_hdr_v1_hash hdr; 487 /* 488 * hdr is in a separate sg buffer, and data sg buffer shares same page 489 * with this header sg. This padding makes next sg 16 byte aligned 490 * after the header. 491 */ 492 char padding[12]; 493 }; 494 495 struct virtio_net_common_hdr { 496 union { 497 struct virtio_net_hdr hdr; 498 struct virtio_net_hdr_mrg_rxbuf mrg_hdr; 499 struct virtio_net_hdr_v1_hash hash_v1_hdr; 500 }; 501 }; 502 503 static struct virtio_net_common_hdr xsk_hdr; 504 505 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); 506 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 507 struct net_device *dev, 508 unsigned int *xdp_xmit, 509 struct virtnet_rq_stats *stats); 510 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 511 struct sk_buff *skb, u8 flags); 512 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 513 struct sk_buff *curr_skb, 514 struct page *page, void *buf, 515 int len, int truesize); 516 static void virtnet_xsk_completed(struct send_queue *sq, int num); 517 518 enum virtnet_xmit_type { 519 VIRTNET_XMIT_TYPE_SKB, 520 VIRTNET_XMIT_TYPE_SKB_ORPHAN, 521 VIRTNET_XMIT_TYPE_XDP, 522 VIRTNET_XMIT_TYPE_XSK, 523 }; 524 525 static int rss_indirection_table_alloc(struct virtio_net_ctrl_rss *rss, u16 indir_table_size) 526 { 527 if (!indir_table_size) { 528 rss->indirection_table = NULL; 529 return 0; 530 } 531 532 rss->indirection_table = kmalloc_array(indir_table_size, sizeof(u16), GFP_KERNEL); 533 if (!rss->indirection_table) 534 return -ENOMEM; 535 536 return 0; 537 } 538 539 static void rss_indirection_table_free(struct virtio_net_ctrl_rss *rss) 540 { 541 kfree(rss->indirection_table); 542 } 543 544 /* We use the last two bits of the pointer to distinguish the xmit type. */ 545 #define VIRTNET_XMIT_TYPE_MASK (BIT(0) | BIT(1)) 546 547 #define VIRTIO_XSK_FLAG_OFFSET 2 548 549 static enum virtnet_xmit_type virtnet_xmit_ptr_unpack(void **ptr) 550 { 551 unsigned long p = (unsigned long)*ptr; 552 553 *ptr = (void *)(p & ~VIRTNET_XMIT_TYPE_MASK); 554 555 return p & VIRTNET_XMIT_TYPE_MASK; 556 } 557 558 static void *virtnet_xmit_ptr_pack(void *ptr, enum virtnet_xmit_type type) 559 { 560 return (void *)((unsigned long)ptr | type); 561 } 562 563 static int virtnet_add_outbuf(struct send_queue *sq, int num, void *data, 564 enum virtnet_xmit_type type) 565 { 566 return virtqueue_add_outbuf(sq->vq, sq->sg, num, 567 virtnet_xmit_ptr_pack(data, type), 568 GFP_ATOMIC); 569 } 570 571 static u32 virtnet_ptr_to_xsk_buff_len(void *ptr) 572 { 573 return ((unsigned long)ptr) >> VIRTIO_XSK_FLAG_OFFSET; 574 } 575 576 static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len) 577 { 578 sg_dma_address(sg) = addr; 579 sg_dma_len(sg) = len; 580 } 581 582 static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 583 bool in_napi, struct virtnet_sq_free_stats *stats) 584 { 585 struct xdp_frame *frame; 586 struct sk_buff *skb; 587 unsigned int len; 588 void *ptr; 589 590 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 591 switch (virtnet_xmit_ptr_unpack(&ptr)) { 592 case VIRTNET_XMIT_TYPE_SKB: 593 skb = ptr; 594 595 pr_debug("Sent skb %p\n", skb); 596 stats->napi_packets++; 597 stats->napi_bytes += skb->len; 598 napi_consume_skb(skb, in_napi); 599 break; 600 601 case VIRTNET_XMIT_TYPE_SKB_ORPHAN: 602 skb = ptr; 603 604 stats->packets++; 605 stats->bytes += skb->len; 606 napi_consume_skb(skb, in_napi); 607 break; 608 609 case VIRTNET_XMIT_TYPE_XDP: 610 frame = ptr; 611 612 stats->packets++; 613 stats->bytes += xdp_get_frame_len(frame); 614 xdp_return_frame(frame); 615 break; 616 617 case VIRTNET_XMIT_TYPE_XSK: 618 stats->bytes += virtnet_ptr_to_xsk_buff_len(ptr); 619 stats->xsk++; 620 break; 621 } 622 } 623 netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes); 624 } 625 626 static void virtnet_free_old_xmit(struct send_queue *sq, 627 struct netdev_queue *txq, 628 bool in_napi, 629 struct virtnet_sq_free_stats *stats) 630 { 631 __free_old_xmit(sq, txq, in_napi, stats); 632 633 if (stats->xsk) 634 virtnet_xsk_completed(sq, stats->xsk); 635 } 636 637 /* Converting between virtqueue no. and kernel tx/rx queue no. 638 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq 639 */ 640 static int vq2txq(struct virtqueue *vq) 641 { 642 return (vq->index - 1) / 2; 643 } 644 645 static int txq2vq(int txq) 646 { 647 return txq * 2 + 1; 648 } 649 650 static int vq2rxq(struct virtqueue *vq) 651 { 652 return vq->index / 2; 653 } 654 655 static int rxq2vq(int rxq) 656 { 657 return rxq * 2; 658 } 659 660 static int vq_type(struct virtnet_info *vi, int qid) 661 { 662 if (qid == vi->max_queue_pairs * 2) 663 return VIRTNET_Q_TYPE_CQ; 664 665 if (qid % 2) 666 return VIRTNET_Q_TYPE_TX; 667 668 return VIRTNET_Q_TYPE_RX; 669 } 670 671 static inline struct virtio_net_common_hdr * 672 skb_vnet_common_hdr(struct sk_buff *skb) 673 { 674 return (struct virtio_net_common_hdr *)skb->cb; 675 } 676 677 /* 678 * private is used to chain pages for big packets, put the whole 679 * most recent used list in the beginning for reuse 680 */ 681 static void give_pages(struct receive_queue *rq, struct page *page) 682 { 683 struct page *end; 684 685 /* Find end of list, sew whole thing into vi->rq.pages. */ 686 for (end = page; end->private; end = (struct page *)end->private); 687 end->private = (unsigned long)rq->pages; 688 rq->pages = page; 689 } 690 691 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) 692 { 693 struct page *p = rq->pages; 694 695 if (p) { 696 rq->pages = (struct page *)p->private; 697 /* clear private here, it is used to chain pages */ 698 p->private = 0; 699 } else 700 p = alloc_page(gfp_mask); 701 return p; 702 } 703 704 static void virtnet_rq_free_buf(struct virtnet_info *vi, 705 struct receive_queue *rq, void *buf) 706 { 707 if (vi->mergeable_rx_bufs) 708 put_page(virt_to_head_page(buf)); 709 else if (vi->big_packets) 710 give_pages(rq, buf); 711 else 712 put_page(virt_to_head_page(buf)); 713 } 714 715 static void enable_delayed_refill(struct virtnet_info *vi) 716 { 717 spin_lock_bh(&vi->refill_lock); 718 vi->refill_enabled = true; 719 spin_unlock_bh(&vi->refill_lock); 720 } 721 722 static void disable_delayed_refill(struct virtnet_info *vi) 723 { 724 spin_lock_bh(&vi->refill_lock); 725 vi->refill_enabled = false; 726 spin_unlock_bh(&vi->refill_lock); 727 } 728 729 static void enable_rx_mode_work(struct virtnet_info *vi) 730 { 731 rtnl_lock(); 732 vi->rx_mode_work_enabled = true; 733 rtnl_unlock(); 734 } 735 736 static void disable_rx_mode_work(struct virtnet_info *vi) 737 { 738 rtnl_lock(); 739 vi->rx_mode_work_enabled = false; 740 rtnl_unlock(); 741 } 742 743 static void virtqueue_napi_schedule(struct napi_struct *napi, 744 struct virtqueue *vq) 745 { 746 if (napi_schedule_prep(napi)) { 747 virtqueue_disable_cb(vq); 748 __napi_schedule(napi); 749 } 750 } 751 752 static bool virtqueue_napi_complete(struct napi_struct *napi, 753 struct virtqueue *vq, int processed) 754 { 755 int opaque; 756 757 opaque = virtqueue_enable_cb_prepare(vq); 758 if (napi_complete_done(napi, processed)) { 759 if (unlikely(virtqueue_poll(vq, opaque))) 760 virtqueue_napi_schedule(napi, vq); 761 else 762 return true; 763 } else { 764 virtqueue_disable_cb(vq); 765 } 766 767 return false; 768 } 769 770 static void skb_xmit_done(struct virtqueue *vq) 771 { 772 struct virtnet_info *vi = vq->vdev->priv; 773 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi; 774 775 /* Suppress further interrupts. */ 776 virtqueue_disable_cb(vq); 777 778 if (napi->weight) 779 virtqueue_napi_schedule(napi, vq); 780 else 781 /* We were probably waiting for more output buffers. */ 782 netif_wake_subqueue(vi->dev, vq2txq(vq)); 783 } 784 785 #define MRG_CTX_HEADER_SHIFT 22 786 static void *mergeable_len_to_ctx(unsigned int truesize, 787 unsigned int headroom) 788 { 789 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize); 790 } 791 792 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx) 793 { 794 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT; 795 } 796 797 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) 798 { 799 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); 800 } 801 802 static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, 803 unsigned int headroom, 804 unsigned int len) 805 { 806 struct sk_buff *skb; 807 808 skb = build_skb(buf, buflen); 809 if (unlikely(!skb)) 810 return NULL; 811 812 skb_reserve(skb, headroom); 813 skb_put(skb, len); 814 815 return skb; 816 } 817 818 /* Called from bottom half context */ 819 static struct sk_buff *page_to_skb(struct virtnet_info *vi, 820 struct receive_queue *rq, 821 struct page *page, unsigned int offset, 822 unsigned int len, unsigned int truesize, 823 unsigned int headroom) 824 { 825 struct sk_buff *skb; 826 struct virtio_net_common_hdr *hdr; 827 unsigned int copy, hdr_len, hdr_padded_len; 828 struct page *page_to_free = NULL; 829 int tailroom, shinfo_size; 830 char *p, *hdr_p, *buf; 831 832 p = page_address(page) + offset; 833 hdr_p = p; 834 835 hdr_len = vi->hdr_len; 836 if (vi->mergeable_rx_bufs) 837 hdr_padded_len = hdr_len; 838 else 839 hdr_padded_len = sizeof(struct padded_vnet_hdr); 840 841 buf = p - headroom; 842 len -= hdr_len; 843 offset += hdr_padded_len; 844 p += hdr_padded_len; 845 tailroom = truesize - headroom - hdr_padded_len - len; 846 847 shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 848 849 if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) { 850 skb = virtnet_build_skb(buf, truesize, p - buf, len); 851 if (unlikely(!skb)) 852 return NULL; 853 854 page = (struct page *)page->private; 855 if (page) 856 give_pages(rq, page); 857 goto ok; 858 } 859 860 /* copy small packet so we can reuse these pages for small data */ 861 skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); 862 if (unlikely(!skb)) 863 return NULL; 864 865 /* Copy all frame if it fits skb->head, otherwise 866 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. 867 */ 868 if (len <= skb_tailroom(skb)) 869 copy = len; 870 else 871 copy = ETH_HLEN; 872 skb_put_data(skb, p, copy); 873 874 len -= copy; 875 offset += copy; 876 877 if (vi->mergeable_rx_bufs) { 878 if (len) 879 skb_add_rx_frag(skb, 0, page, offset, len, truesize); 880 else 881 page_to_free = page; 882 goto ok; 883 } 884 885 /* 886 * Verify that we can indeed put this data into a skb. 887 * This is here to handle cases when the device erroneously 888 * tries to receive more than is possible. This is usually 889 * the case of a broken device. 890 */ 891 if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) { 892 net_dbg_ratelimited("%s: too much data\n", skb->dev->name); 893 dev_kfree_skb(skb); 894 return NULL; 895 } 896 BUG_ON(offset >= PAGE_SIZE); 897 while (len) { 898 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); 899 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, 900 frag_size, truesize); 901 len -= frag_size; 902 page = (struct page *)page->private; 903 offset = 0; 904 } 905 906 if (page) 907 give_pages(rq, page); 908 909 ok: 910 hdr = skb_vnet_common_hdr(skb); 911 memcpy(hdr, hdr_p, hdr_len); 912 if (page_to_free) 913 put_page(page_to_free); 914 915 return skb; 916 } 917 918 static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) 919 { 920 struct virtnet_info *vi = rq->vq->vdev->priv; 921 struct page *page = virt_to_head_page(buf); 922 struct virtnet_rq_dma *dma; 923 void *head; 924 int offset; 925 926 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 927 928 head = page_address(page); 929 930 dma = head; 931 932 --dma->ref; 933 934 if (dma->need_sync && len) { 935 offset = buf - (head + sizeof(*dma)); 936 937 virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr, 938 offset, len, 939 DMA_FROM_DEVICE); 940 } 941 942 if (dma->ref) 943 return; 944 945 virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len, 946 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 947 put_page(page); 948 } 949 950 static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) 951 { 952 struct virtnet_info *vi = rq->vq->vdev->priv; 953 void *buf; 954 955 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 956 957 buf = virtqueue_get_buf_ctx(rq->vq, len, ctx); 958 if (buf) 959 virtnet_rq_unmap(rq, buf, *len); 960 961 return buf; 962 } 963 964 static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) 965 { 966 struct virtnet_info *vi = rq->vq->vdev->priv; 967 struct virtnet_rq_dma *dma; 968 dma_addr_t addr; 969 u32 offset; 970 void *head; 971 972 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 973 974 head = page_address(rq->alloc_frag.page); 975 976 offset = buf - head; 977 978 dma = head; 979 980 addr = dma->addr - sizeof(*dma) + offset; 981 982 sg_init_table(rq->sg, 1); 983 sg_fill_dma(rq->sg, addr, len); 984 } 985 986 static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) 987 { 988 struct page_frag *alloc_frag = &rq->alloc_frag; 989 struct virtnet_info *vi = rq->vq->vdev->priv; 990 struct virtnet_rq_dma *dma; 991 void *buf, *head; 992 dma_addr_t addr; 993 994 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 995 996 head = page_address(alloc_frag->page); 997 998 dma = head; 999 1000 /* new pages */ 1001 if (!alloc_frag->offset) { 1002 if (rq->last_dma) { 1003 /* Now, the new page is allocated, the last dma 1004 * will not be used. So the dma can be unmapped 1005 * if the ref is 0. 1006 */ 1007 virtnet_rq_unmap(rq, rq->last_dma, 0); 1008 rq->last_dma = NULL; 1009 } 1010 1011 dma->len = alloc_frag->size - sizeof(*dma); 1012 1013 addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1, 1014 dma->len, DMA_FROM_DEVICE, 0); 1015 if (virtqueue_dma_mapping_error(rq->vq, addr)) 1016 return NULL; 1017 1018 dma->addr = addr; 1019 dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr); 1020 1021 /* Add a reference to dma to prevent the entire dma from 1022 * being released during error handling. This reference 1023 * will be freed after the pages are no longer used. 1024 */ 1025 get_page(alloc_frag->page); 1026 dma->ref = 1; 1027 alloc_frag->offset = sizeof(*dma); 1028 1029 rq->last_dma = dma; 1030 } 1031 1032 ++dma->ref; 1033 1034 buf = head + alloc_frag->offset; 1035 1036 get_page(alloc_frag->page); 1037 alloc_frag->offset += size; 1038 1039 return buf; 1040 } 1041 1042 static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) 1043 { 1044 struct virtnet_info *vi = vq->vdev->priv; 1045 struct receive_queue *rq; 1046 int i = vq2rxq(vq); 1047 1048 rq = &vi->rq[i]; 1049 1050 if (rq->xsk_pool) { 1051 xsk_buff_free((struct xdp_buff *)buf); 1052 return; 1053 } 1054 1055 if (!vi->big_packets || vi->mergeable_rx_bufs) 1056 virtnet_rq_unmap(rq, buf, 0); 1057 1058 virtnet_rq_free_buf(vi, rq, buf); 1059 } 1060 1061 static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 1062 bool in_napi) 1063 { 1064 struct virtnet_sq_free_stats stats = {0}; 1065 1066 virtnet_free_old_xmit(sq, txq, in_napi, &stats); 1067 1068 /* Avoid overhead when no packets have been processed 1069 * happens when called speculatively from start_xmit. 1070 */ 1071 if (!stats.packets && !stats.napi_packets) 1072 return; 1073 1074 u64_stats_update_begin(&sq->stats.syncp); 1075 u64_stats_add(&sq->stats.bytes, stats.bytes + stats.napi_bytes); 1076 u64_stats_add(&sq->stats.packets, stats.packets + stats.napi_packets); 1077 u64_stats_update_end(&sq->stats.syncp); 1078 } 1079 1080 static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) 1081 { 1082 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) 1083 return false; 1084 else if (q < vi->curr_queue_pairs) 1085 return true; 1086 else 1087 return false; 1088 } 1089 1090 static void check_sq_full_and_disable(struct virtnet_info *vi, 1091 struct net_device *dev, 1092 struct send_queue *sq) 1093 { 1094 bool use_napi = sq->napi.weight; 1095 int qnum; 1096 1097 qnum = sq - vi->sq; 1098 1099 /* If running out of space, stop queue to avoid getting packets that we 1100 * are then unable to transmit. 1101 * An alternative would be to force queuing layer to requeue the skb by 1102 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be 1103 * returned in a normal path of operation: it means that driver is not 1104 * maintaining the TX queue stop/start state properly, and causes 1105 * the stack to do a non-trivial amount of useless work. 1106 * Since most packets only take 1 or 2 ring slots, stopping the queue 1107 * early means 16 slots are typically wasted. 1108 */ 1109 if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { 1110 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1111 1112 netif_tx_stop_queue(txq); 1113 u64_stats_update_begin(&sq->stats.syncp); 1114 u64_stats_inc(&sq->stats.stop); 1115 u64_stats_update_end(&sq->stats.syncp); 1116 if (use_napi) { 1117 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 1118 virtqueue_napi_schedule(&sq->napi, sq->vq); 1119 } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { 1120 /* More just got used, free them then recheck. */ 1121 free_old_xmit(sq, txq, false); 1122 if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { 1123 netif_start_subqueue(dev, qnum); 1124 u64_stats_update_begin(&sq->stats.syncp); 1125 u64_stats_inc(&sq->stats.wake); 1126 u64_stats_update_end(&sq->stats.syncp); 1127 virtqueue_disable_cb(sq->vq); 1128 } 1129 } 1130 } 1131 } 1132 1133 static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi, 1134 struct receive_queue *rq, void *buf, u32 len) 1135 { 1136 struct xdp_buff *xdp; 1137 u32 bufsize; 1138 1139 xdp = (struct xdp_buff *)buf; 1140 1141 bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len; 1142 1143 if (unlikely(len > bufsize)) { 1144 pr_debug("%s: rx error: len %u exceeds truesize %u\n", 1145 vi->dev->name, len, bufsize); 1146 DEV_STATS_INC(vi->dev, rx_length_errors); 1147 xsk_buff_free(xdp); 1148 return NULL; 1149 } 1150 1151 xsk_buff_set_size(xdp, len); 1152 xsk_buff_dma_sync_for_cpu(xdp); 1153 1154 return xdp; 1155 } 1156 1157 static struct sk_buff *xsk_construct_skb(struct receive_queue *rq, 1158 struct xdp_buff *xdp) 1159 { 1160 unsigned int metasize = xdp->data - xdp->data_meta; 1161 struct sk_buff *skb; 1162 unsigned int size; 1163 1164 size = xdp->data_end - xdp->data_hard_start; 1165 skb = napi_alloc_skb(&rq->napi, size); 1166 if (unlikely(!skb)) { 1167 xsk_buff_free(xdp); 1168 return NULL; 1169 } 1170 1171 skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); 1172 1173 size = xdp->data_end - xdp->data_meta; 1174 memcpy(__skb_put(skb, size), xdp->data_meta, size); 1175 1176 if (metasize) { 1177 __skb_pull(skb, metasize); 1178 skb_metadata_set(skb, metasize); 1179 } 1180 1181 xsk_buff_free(xdp); 1182 1183 return skb; 1184 } 1185 1186 static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi, 1187 struct receive_queue *rq, struct xdp_buff *xdp, 1188 unsigned int *xdp_xmit, 1189 struct virtnet_rq_stats *stats) 1190 { 1191 struct bpf_prog *prog; 1192 u32 ret; 1193 1194 ret = XDP_PASS; 1195 rcu_read_lock(); 1196 prog = rcu_dereference(rq->xdp_prog); 1197 if (prog) 1198 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); 1199 rcu_read_unlock(); 1200 1201 switch (ret) { 1202 case XDP_PASS: 1203 return xsk_construct_skb(rq, xdp); 1204 1205 case XDP_TX: 1206 case XDP_REDIRECT: 1207 return NULL; 1208 1209 default: 1210 /* drop packet */ 1211 xsk_buff_free(xdp); 1212 u64_stats_inc(&stats->drops); 1213 return NULL; 1214 } 1215 } 1216 1217 static void xsk_drop_follow_bufs(struct net_device *dev, 1218 struct receive_queue *rq, 1219 u32 num_buf, 1220 struct virtnet_rq_stats *stats) 1221 { 1222 struct xdp_buff *xdp; 1223 u32 len; 1224 1225 while (num_buf-- > 1) { 1226 xdp = virtqueue_get_buf(rq->vq, &len); 1227 if (unlikely(!xdp)) { 1228 pr_debug("%s: rx error: %d buffers missing\n", 1229 dev->name, num_buf); 1230 DEV_STATS_INC(dev, rx_length_errors); 1231 break; 1232 } 1233 u64_stats_add(&stats->bytes, len); 1234 xsk_buff_free(xdp); 1235 } 1236 } 1237 1238 static int xsk_append_merge_buffer(struct virtnet_info *vi, 1239 struct receive_queue *rq, 1240 struct sk_buff *head_skb, 1241 u32 num_buf, 1242 struct virtio_net_hdr_mrg_rxbuf *hdr, 1243 struct virtnet_rq_stats *stats) 1244 { 1245 struct sk_buff *curr_skb; 1246 struct xdp_buff *xdp; 1247 u32 len, truesize; 1248 struct page *page; 1249 void *buf; 1250 1251 curr_skb = head_skb; 1252 1253 while (--num_buf) { 1254 buf = virtqueue_get_buf(rq->vq, &len); 1255 if (unlikely(!buf)) { 1256 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1257 vi->dev->name, num_buf, 1258 virtio16_to_cpu(vi->vdev, 1259 hdr->num_buffers)); 1260 DEV_STATS_INC(vi->dev, rx_length_errors); 1261 return -EINVAL; 1262 } 1263 1264 u64_stats_add(&stats->bytes, len); 1265 1266 xdp = buf_to_xdp(vi, rq, buf, len); 1267 if (!xdp) 1268 goto err; 1269 1270 buf = napi_alloc_frag(len); 1271 if (!buf) { 1272 xsk_buff_free(xdp); 1273 goto err; 1274 } 1275 1276 memcpy(buf, xdp->data - vi->hdr_len, len); 1277 1278 xsk_buff_free(xdp); 1279 1280 page = virt_to_page(buf); 1281 1282 truesize = len; 1283 1284 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 1285 buf, len, truesize); 1286 if (!curr_skb) { 1287 put_page(page); 1288 goto err; 1289 } 1290 } 1291 1292 return 0; 1293 1294 err: 1295 xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats); 1296 return -EINVAL; 1297 } 1298 1299 static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi, 1300 struct receive_queue *rq, struct xdp_buff *xdp, 1301 unsigned int *xdp_xmit, 1302 struct virtnet_rq_stats *stats) 1303 { 1304 struct virtio_net_hdr_mrg_rxbuf *hdr; 1305 struct bpf_prog *prog; 1306 struct sk_buff *skb; 1307 u32 ret, num_buf; 1308 1309 hdr = xdp->data - vi->hdr_len; 1310 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 1311 1312 ret = XDP_PASS; 1313 rcu_read_lock(); 1314 prog = rcu_dereference(rq->xdp_prog); 1315 /* TODO: support multi buffer. */ 1316 if (prog && num_buf == 1) 1317 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); 1318 rcu_read_unlock(); 1319 1320 switch (ret) { 1321 case XDP_PASS: 1322 skb = xsk_construct_skb(rq, xdp); 1323 if (!skb) 1324 goto drop_bufs; 1325 1326 if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) { 1327 dev_kfree_skb(skb); 1328 goto drop; 1329 } 1330 1331 return skb; 1332 1333 case XDP_TX: 1334 case XDP_REDIRECT: 1335 return NULL; 1336 1337 default: 1338 /* drop packet */ 1339 xsk_buff_free(xdp); 1340 } 1341 1342 drop_bufs: 1343 xsk_drop_follow_bufs(dev, rq, num_buf, stats); 1344 1345 drop: 1346 u64_stats_inc(&stats->drops); 1347 return NULL; 1348 } 1349 1350 static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq, 1351 void *buf, u32 len, 1352 unsigned int *xdp_xmit, 1353 struct virtnet_rq_stats *stats) 1354 { 1355 struct net_device *dev = vi->dev; 1356 struct sk_buff *skb = NULL; 1357 struct xdp_buff *xdp; 1358 u8 flags; 1359 1360 len -= vi->hdr_len; 1361 1362 u64_stats_add(&stats->bytes, len); 1363 1364 xdp = buf_to_xdp(vi, rq, buf, len); 1365 if (!xdp) 1366 return; 1367 1368 if (unlikely(len < ETH_HLEN)) { 1369 pr_debug("%s: short packet %i\n", dev->name, len); 1370 DEV_STATS_INC(dev, rx_length_errors); 1371 xsk_buff_free(xdp); 1372 return; 1373 } 1374 1375 flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags; 1376 1377 if (!vi->mergeable_rx_bufs) 1378 skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats); 1379 else 1380 skb = virtnet_receive_xsk_merge(dev, vi, rq, xdp, xdp_xmit, stats); 1381 1382 if (skb) 1383 virtnet_receive_done(vi, rq, skb, flags); 1384 } 1385 1386 static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue *rq, 1387 struct xsk_buff_pool *pool, gfp_t gfp) 1388 { 1389 struct xdp_buff **xsk_buffs; 1390 dma_addr_t addr; 1391 int err = 0; 1392 u32 len, i; 1393 int num; 1394 1395 xsk_buffs = rq->xsk_buffs; 1396 1397 num = xsk_buff_alloc_batch(pool, xsk_buffs, rq->vq->num_free); 1398 if (!num) 1399 return -ENOMEM; 1400 1401 len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len; 1402 1403 for (i = 0; i < num; ++i) { 1404 /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space. 1405 * We assume XDP_PACKET_HEADROOM is larger than hdr->len. 1406 * (see function virtnet_xsk_pool_enable) 1407 */ 1408 addr = xsk_buff_xdp_get_dma(xsk_buffs[i]) - vi->hdr_len; 1409 1410 sg_init_table(rq->sg, 1); 1411 sg_fill_dma(rq->sg, addr, len); 1412 1413 err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, 1414 xsk_buffs[i], NULL, gfp); 1415 if (err) 1416 goto err; 1417 } 1418 1419 return num; 1420 1421 err: 1422 for (; i < num; ++i) 1423 xsk_buff_free(xsk_buffs[i]); 1424 1425 return err; 1426 } 1427 1428 static void *virtnet_xsk_to_ptr(u32 len) 1429 { 1430 unsigned long p; 1431 1432 p = len << VIRTIO_XSK_FLAG_OFFSET; 1433 1434 return virtnet_xmit_ptr_pack((void *)p, VIRTNET_XMIT_TYPE_XSK); 1435 } 1436 1437 static int virtnet_xsk_xmit_one(struct send_queue *sq, 1438 struct xsk_buff_pool *pool, 1439 struct xdp_desc *desc) 1440 { 1441 struct virtnet_info *vi; 1442 dma_addr_t addr; 1443 1444 vi = sq->vq->vdev->priv; 1445 1446 addr = xsk_buff_raw_get_dma(pool, desc->addr); 1447 xsk_buff_raw_dma_sync_for_device(pool, addr, desc->len); 1448 1449 sg_init_table(sq->sg, 2); 1450 sg_fill_dma(sq->sg, sq->xsk_hdr_dma_addr, vi->hdr_len); 1451 sg_fill_dma(sq->sg + 1, addr, desc->len); 1452 1453 return virtqueue_add_outbuf_premapped(sq->vq, sq->sg, 2, 1454 virtnet_xsk_to_ptr(desc->len), 1455 GFP_ATOMIC); 1456 } 1457 1458 static int virtnet_xsk_xmit_batch(struct send_queue *sq, 1459 struct xsk_buff_pool *pool, 1460 unsigned int budget, 1461 u64 *kicks) 1462 { 1463 struct xdp_desc *descs = pool->tx_descs; 1464 bool kick = false; 1465 u32 nb_pkts, i; 1466 int err; 1467 1468 budget = min_t(u32, budget, sq->vq->num_free); 1469 1470 nb_pkts = xsk_tx_peek_release_desc_batch(pool, budget); 1471 if (!nb_pkts) 1472 return 0; 1473 1474 for (i = 0; i < nb_pkts; i++) { 1475 err = virtnet_xsk_xmit_one(sq, pool, &descs[i]); 1476 if (unlikely(err)) { 1477 xsk_tx_completed(sq->xsk_pool, nb_pkts - i); 1478 break; 1479 } 1480 1481 kick = true; 1482 } 1483 1484 if (kick && virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 1485 (*kicks)++; 1486 1487 return i; 1488 } 1489 1490 static bool virtnet_xsk_xmit(struct send_queue *sq, struct xsk_buff_pool *pool, 1491 int budget) 1492 { 1493 struct virtnet_info *vi = sq->vq->vdev->priv; 1494 struct virtnet_sq_free_stats stats = {}; 1495 struct net_device *dev = vi->dev; 1496 u64 kicks = 0; 1497 int sent; 1498 1499 /* Avoid to wakeup napi meanless, so call __free_old_xmit instead of 1500 * free_old_xmit(). 1501 */ 1502 __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), true, &stats); 1503 1504 if (stats.xsk) 1505 xsk_tx_completed(sq->xsk_pool, stats.xsk); 1506 1507 sent = virtnet_xsk_xmit_batch(sq, pool, budget, &kicks); 1508 1509 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 1510 check_sq_full_and_disable(vi, vi->dev, sq); 1511 1512 if (sent) { 1513 struct netdev_queue *txq; 1514 1515 txq = netdev_get_tx_queue(vi->dev, sq - vi->sq); 1516 txq_trans_cond_update(txq); 1517 } 1518 1519 u64_stats_update_begin(&sq->stats.syncp); 1520 u64_stats_add(&sq->stats.packets, stats.packets); 1521 u64_stats_add(&sq->stats.bytes, stats.bytes); 1522 u64_stats_add(&sq->stats.kicks, kicks); 1523 u64_stats_add(&sq->stats.xdp_tx, sent); 1524 u64_stats_update_end(&sq->stats.syncp); 1525 1526 if (xsk_uses_need_wakeup(pool)) 1527 xsk_set_tx_need_wakeup(pool); 1528 1529 return sent; 1530 } 1531 1532 static void xsk_wakeup(struct send_queue *sq) 1533 { 1534 if (napi_if_scheduled_mark_missed(&sq->napi)) 1535 return; 1536 1537 local_bh_disable(); 1538 virtqueue_napi_schedule(&sq->napi, sq->vq); 1539 local_bh_enable(); 1540 } 1541 1542 static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag) 1543 { 1544 struct virtnet_info *vi = netdev_priv(dev); 1545 struct send_queue *sq; 1546 1547 if (!netif_running(dev)) 1548 return -ENETDOWN; 1549 1550 if (qid >= vi->curr_queue_pairs) 1551 return -EINVAL; 1552 1553 sq = &vi->sq[qid]; 1554 1555 xsk_wakeup(sq); 1556 return 0; 1557 } 1558 1559 static void virtnet_xsk_completed(struct send_queue *sq, int num) 1560 { 1561 xsk_tx_completed(sq->xsk_pool, num); 1562 1563 /* If this is called by rx poll, start_xmit and xdp xmit we should 1564 * wakeup the tx napi to consume the xsk tx queue, because the tx 1565 * interrupt may not be triggered. 1566 */ 1567 xsk_wakeup(sq); 1568 } 1569 1570 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, 1571 struct send_queue *sq, 1572 struct xdp_frame *xdpf) 1573 { 1574 struct virtio_net_hdr_mrg_rxbuf *hdr; 1575 struct skb_shared_info *shinfo; 1576 u8 nr_frags = 0; 1577 int err, i; 1578 1579 if (unlikely(xdpf->headroom < vi->hdr_len)) 1580 return -EOVERFLOW; 1581 1582 if (unlikely(xdp_frame_has_frags(xdpf))) { 1583 shinfo = xdp_get_shared_info_from_frame(xdpf); 1584 nr_frags = shinfo->nr_frags; 1585 } 1586 1587 /* In wrapping function virtnet_xdp_xmit(), we need to free 1588 * up the pending old buffers, where we need to calculate the 1589 * position of skb_shared_info in xdp_get_frame_len() and 1590 * xdp_return_frame(), which will involve to xdpf->data and 1591 * xdpf->headroom. Therefore, we need to update the value of 1592 * headroom synchronously here. 1593 */ 1594 xdpf->headroom -= vi->hdr_len; 1595 xdpf->data -= vi->hdr_len; 1596 /* Zero header and leave csum up to XDP layers */ 1597 hdr = xdpf->data; 1598 memset(hdr, 0, vi->hdr_len); 1599 xdpf->len += vi->hdr_len; 1600 1601 sg_init_table(sq->sg, nr_frags + 1); 1602 sg_set_buf(sq->sg, xdpf->data, xdpf->len); 1603 for (i = 0; i < nr_frags; i++) { 1604 skb_frag_t *frag = &shinfo->frags[i]; 1605 1606 sg_set_page(&sq->sg[i + 1], skb_frag_page(frag), 1607 skb_frag_size(frag), skb_frag_off(frag)); 1608 } 1609 1610 err = virtnet_add_outbuf(sq, nr_frags + 1, xdpf, VIRTNET_XMIT_TYPE_XDP); 1611 if (unlikely(err)) 1612 return -ENOSPC; /* Caller handle free/refcnt */ 1613 1614 return 0; 1615 } 1616 1617 /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on 1618 * the current cpu, so it does not need to be locked. 1619 * 1620 * Here we use marco instead of inline functions because we have to deal with 1621 * three issues at the same time: 1. the choice of sq. 2. judge and execute the 1622 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline 1623 * functions to perfectly solve these three problems at the same time. 1624 */ 1625 #define virtnet_xdp_get_sq(vi) ({ \ 1626 int cpu = smp_processor_id(); \ 1627 struct netdev_queue *txq; \ 1628 typeof(vi) v = (vi); \ 1629 unsigned int qp; \ 1630 \ 1631 if (v->curr_queue_pairs > nr_cpu_ids) { \ 1632 qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ 1633 qp += cpu; \ 1634 txq = netdev_get_tx_queue(v->dev, qp); \ 1635 __netif_tx_acquire(txq); \ 1636 } else { \ 1637 qp = cpu % v->curr_queue_pairs; \ 1638 txq = netdev_get_tx_queue(v->dev, qp); \ 1639 __netif_tx_lock(txq, cpu); \ 1640 } \ 1641 v->sq + qp; \ 1642 }) 1643 1644 #define virtnet_xdp_put_sq(vi, q) { \ 1645 struct netdev_queue *txq; \ 1646 typeof(vi) v = (vi); \ 1647 \ 1648 txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ 1649 if (v->curr_queue_pairs > nr_cpu_ids) \ 1650 __netif_tx_release(txq); \ 1651 else \ 1652 __netif_tx_unlock(txq); \ 1653 } 1654 1655 static int virtnet_xdp_xmit(struct net_device *dev, 1656 int n, struct xdp_frame **frames, u32 flags) 1657 { 1658 struct virtnet_info *vi = netdev_priv(dev); 1659 struct virtnet_sq_free_stats stats = {0}; 1660 struct receive_queue *rq = vi->rq; 1661 struct bpf_prog *xdp_prog; 1662 struct send_queue *sq; 1663 int nxmit = 0; 1664 int kicks = 0; 1665 int ret; 1666 int i; 1667 1668 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this 1669 * indicate XDP resources have been successfully allocated. 1670 */ 1671 xdp_prog = rcu_access_pointer(rq->xdp_prog); 1672 if (!xdp_prog) 1673 return -ENXIO; 1674 1675 sq = virtnet_xdp_get_sq(vi); 1676 1677 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { 1678 ret = -EINVAL; 1679 goto out; 1680 } 1681 1682 /* Free up any pending old buffers before queueing new ones. */ 1683 virtnet_free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), 1684 false, &stats); 1685 1686 for (i = 0; i < n; i++) { 1687 struct xdp_frame *xdpf = frames[i]; 1688 1689 if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) 1690 break; 1691 nxmit++; 1692 } 1693 ret = nxmit; 1694 1695 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 1696 check_sq_full_and_disable(vi, dev, sq); 1697 1698 if (flags & XDP_XMIT_FLUSH) { 1699 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 1700 kicks = 1; 1701 } 1702 out: 1703 u64_stats_update_begin(&sq->stats.syncp); 1704 u64_stats_add(&sq->stats.bytes, stats.bytes); 1705 u64_stats_add(&sq->stats.packets, stats.packets); 1706 u64_stats_add(&sq->stats.xdp_tx, n); 1707 u64_stats_add(&sq->stats.xdp_tx_drops, n - nxmit); 1708 u64_stats_add(&sq->stats.kicks, kicks); 1709 u64_stats_update_end(&sq->stats.syncp); 1710 1711 virtnet_xdp_put_sq(vi, sq); 1712 return ret; 1713 } 1714 1715 static void put_xdp_frags(struct xdp_buff *xdp) 1716 { 1717 struct skb_shared_info *shinfo; 1718 struct page *xdp_page; 1719 int i; 1720 1721 if (xdp_buff_has_frags(xdp)) { 1722 shinfo = xdp_get_shared_info_from_buff(xdp); 1723 for (i = 0; i < shinfo->nr_frags; i++) { 1724 xdp_page = skb_frag_page(&shinfo->frags[i]); 1725 put_page(xdp_page); 1726 } 1727 } 1728 } 1729 1730 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 1731 struct net_device *dev, 1732 unsigned int *xdp_xmit, 1733 struct virtnet_rq_stats *stats) 1734 { 1735 struct xdp_frame *xdpf; 1736 int err; 1737 u32 act; 1738 1739 act = bpf_prog_run_xdp(xdp_prog, xdp); 1740 u64_stats_inc(&stats->xdp_packets); 1741 1742 switch (act) { 1743 case XDP_PASS: 1744 return act; 1745 1746 case XDP_TX: 1747 u64_stats_inc(&stats->xdp_tx); 1748 xdpf = xdp_convert_buff_to_frame(xdp); 1749 if (unlikely(!xdpf)) { 1750 netdev_dbg(dev, "convert buff to frame failed for xdp\n"); 1751 return XDP_DROP; 1752 } 1753 1754 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); 1755 if (unlikely(!err)) { 1756 xdp_return_frame_rx_napi(xdpf); 1757 } else if (unlikely(err < 0)) { 1758 trace_xdp_exception(dev, xdp_prog, act); 1759 return XDP_DROP; 1760 } 1761 *xdp_xmit |= VIRTIO_XDP_TX; 1762 return act; 1763 1764 case XDP_REDIRECT: 1765 u64_stats_inc(&stats->xdp_redirects); 1766 err = xdp_do_redirect(dev, xdp, xdp_prog); 1767 if (err) 1768 return XDP_DROP; 1769 1770 *xdp_xmit |= VIRTIO_XDP_REDIR; 1771 return act; 1772 1773 default: 1774 bpf_warn_invalid_xdp_action(dev, xdp_prog, act); 1775 fallthrough; 1776 case XDP_ABORTED: 1777 trace_xdp_exception(dev, xdp_prog, act); 1778 fallthrough; 1779 case XDP_DROP: 1780 return XDP_DROP; 1781 } 1782 } 1783 1784 static unsigned int virtnet_get_headroom(struct virtnet_info *vi) 1785 { 1786 return vi->xdp_enabled ? XDP_PACKET_HEADROOM : 0; 1787 } 1788 1789 /* We copy the packet for XDP in the following cases: 1790 * 1791 * 1) Packet is scattered across multiple rx buffers. 1792 * 2) Headroom space is insufficient. 1793 * 1794 * This is inefficient but it's a temporary condition that 1795 * we hit right after XDP is enabled and until queue is refilled 1796 * with large buffers with sufficient headroom - so it should affect 1797 * at most queue size packets. 1798 * Afterwards, the conditions to enable 1799 * XDP should preclude the underlying device from sending packets 1800 * across multiple buffers (num_buf > 1), and we make sure buffers 1801 * have enough headroom. 1802 */ 1803 static struct page *xdp_linearize_page(struct receive_queue *rq, 1804 int *num_buf, 1805 struct page *p, 1806 int offset, 1807 int page_off, 1808 unsigned int *len) 1809 { 1810 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1811 struct page *page; 1812 1813 if (page_off + *len + tailroom > PAGE_SIZE) 1814 return NULL; 1815 1816 page = alloc_page(GFP_ATOMIC); 1817 if (!page) 1818 return NULL; 1819 1820 memcpy(page_address(page) + page_off, page_address(p) + offset, *len); 1821 page_off += *len; 1822 1823 while (--*num_buf) { 1824 unsigned int buflen; 1825 void *buf; 1826 int off; 1827 1828 buf = virtnet_rq_get_buf(rq, &buflen, NULL); 1829 if (unlikely(!buf)) 1830 goto err_buf; 1831 1832 p = virt_to_head_page(buf); 1833 off = buf - page_address(p); 1834 1835 /* guard against a misconfigured or uncooperative backend that 1836 * is sending packet larger than the MTU. 1837 */ 1838 if ((page_off + buflen + tailroom) > PAGE_SIZE) { 1839 put_page(p); 1840 goto err_buf; 1841 } 1842 1843 memcpy(page_address(page) + page_off, 1844 page_address(p) + off, buflen); 1845 page_off += buflen; 1846 put_page(p); 1847 } 1848 1849 /* Headroom does not contribute to packet length */ 1850 *len = page_off - XDP_PACKET_HEADROOM; 1851 return page; 1852 err_buf: 1853 __free_pages(page, 0); 1854 return NULL; 1855 } 1856 1857 static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi, 1858 unsigned int xdp_headroom, 1859 void *buf, 1860 unsigned int len) 1861 { 1862 unsigned int header_offset; 1863 unsigned int headroom; 1864 unsigned int buflen; 1865 struct sk_buff *skb; 1866 1867 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1868 headroom = vi->hdr_len + header_offset; 1869 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1870 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1871 1872 skb = virtnet_build_skb(buf, buflen, headroom, len); 1873 if (unlikely(!skb)) 1874 return NULL; 1875 1876 buf += header_offset; 1877 memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len); 1878 1879 return skb; 1880 } 1881 1882 static struct sk_buff *receive_small_xdp(struct net_device *dev, 1883 struct virtnet_info *vi, 1884 struct receive_queue *rq, 1885 struct bpf_prog *xdp_prog, 1886 void *buf, 1887 unsigned int xdp_headroom, 1888 unsigned int len, 1889 unsigned int *xdp_xmit, 1890 struct virtnet_rq_stats *stats) 1891 { 1892 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom; 1893 unsigned int headroom = vi->hdr_len + header_offset; 1894 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; 1895 struct page *page = virt_to_head_page(buf); 1896 struct page *xdp_page; 1897 unsigned int buflen; 1898 struct xdp_buff xdp; 1899 struct sk_buff *skb; 1900 unsigned int metasize = 0; 1901 u32 act; 1902 1903 if (unlikely(hdr->hdr.gso_type)) 1904 goto err_xdp; 1905 1906 /* Partially checksummed packets must be dropped. */ 1907 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 1908 goto err_xdp; 1909 1910 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1911 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1912 1913 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { 1914 int offset = buf - page_address(page) + header_offset; 1915 unsigned int tlen = len + vi->hdr_len; 1916 int num_buf = 1; 1917 1918 xdp_headroom = virtnet_get_headroom(vi); 1919 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1920 headroom = vi->hdr_len + header_offset; 1921 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1922 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1923 xdp_page = xdp_linearize_page(rq, &num_buf, page, 1924 offset, header_offset, 1925 &tlen); 1926 if (!xdp_page) 1927 goto err_xdp; 1928 1929 buf = page_address(xdp_page); 1930 put_page(page); 1931 page = xdp_page; 1932 } 1933 1934 xdp_init_buff(&xdp, buflen, &rq->xdp_rxq); 1935 xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len, 1936 xdp_headroom, len, true); 1937 1938 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 1939 1940 switch (act) { 1941 case XDP_PASS: 1942 /* Recalculate length in case bpf program changed it */ 1943 len = xdp.data_end - xdp.data; 1944 metasize = xdp.data - xdp.data_meta; 1945 break; 1946 1947 case XDP_TX: 1948 case XDP_REDIRECT: 1949 goto xdp_xmit; 1950 1951 default: 1952 goto err_xdp; 1953 } 1954 1955 skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len); 1956 if (unlikely(!skb)) 1957 goto err; 1958 1959 if (metasize) 1960 skb_metadata_set(skb, metasize); 1961 1962 return skb; 1963 1964 err_xdp: 1965 u64_stats_inc(&stats->xdp_drops); 1966 err: 1967 u64_stats_inc(&stats->drops); 1968 put_page(page); 1969 xdp_xmit: 1970 return NULL; 1971 } 1972 1973 static struct sk_buff *receive_small(struct net_device *dev, 1974 struct virtnet_info *vi, 1975 struct receive_queue *rq, 1976 void *buf, void *ctx, 1977 unsigned int len, 1978 unsigned int *xdp_xmit, 1979 struct virtnet_rq_stats *stats) 1980 { 1981 unsigned int xdp_headroom = (unsigned long)ctx; 1982 struct page *page = virt_to_head_page(buf); 1983 struct sk_buff *skb; 1984 1985 /* We passed the address of virtnet header to virtio-core, 1986 * so truncate the padding. 1987 */ 1988 buf -= VIRTNET_RX_PAD + xdp_headroom; 1989 1990 len -= vi->hdr_len; 1991 u64_stats_add(&stats->bytes, len); 1992 1993 if (unlikely(len > GOOD_PACKET_LEN)) { 1994 pr_debug("%s: rx error: len %u exceeds max size %d\n", 1995 dev->name, len, GOOD_PACKET_LEN); 1996 DEV_STATS_INC(dev, rx_length_errors); 1997 goto err; 1998 } 1999 2000 if (unlikely(vi->xdp_enabled)) { 2001 struct bpf_prog *xdp_prog; 2002 2003 rcu_read_lock(); 2004 xdp_prog = rcu_dereference(rq->xdp_prog); 2005 if (xdp_prog) { 2006 skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf, 2007 xdp_headroom, len, xdp_xmit, 2008 stats); 2009 rcu_read_unlock(); 2010 return skb; 2011 } 2012 rcu_read_unlock(); 2013 } 2014 2015 skb = receive_small_build_skb(vi, xdp_headroom, buf, len); 2016 if (likely(skb)) 2017 return skb; 2018 2019 err: 2020 u64_stats_inc(&stats->drops); 2021 put_page(page); 2022 return NULL; 2023 } 2024 2025 static struct sk_buff *receive_big(struct net_device *dev, 2026 struct virtnet_info *vi, 2027 struct receive_queue *rq, 2028 void *buf, 2029 unsigned int len, 2030 struct virtnet_rq_stats *stats) 2031 { 2032 struct page *page = buf; 2033 struct sk_buff *skb = 2034 page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); 2035 2036 u64_stats_add(&stats->bytes, len - vi->hdr_len); 2037 if (unlikely(!skb)) 2038 goto err; 2039 2040 return skb; 2041 2042 err: 2043 u64_stats_inc(&stats->drops); 2044 give_pages(rq, page); 2045 return NULL; 2046 } 2047 2048 static void mergeable_buf_free(struct receive_queue *rq, int num_buf, 2049 struct net_device *dev, 2050 struct virtnet_rq_stats *stats) 2051 { 2052 struct page *page; 2053 void *buf; 2054 int len; 2055 2056 while (num_buf-- > 1) { 2057 buf = virtnet_rq_get_buf(rq, &len, NULL); 2058 if (unlikely(!buf)) { 2059 pr_debug("%s: rx error: %d buffers missing\n", 2060 dev->name, num_buf); 2061 DEV_STATS_INC(dev, rx_length_errors); 2062 break; 2063 } 2064 u64_stats_add(&stats->bytes, len); 2065 page = virt_to_head_page(buf); 2066 put_page(page); 2067 } 2068 } 2069 2070 /* Why not use xdp_build_skb_from_frame() ? 2071 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in 2072 * virtio-net there are 2 points that do not match its requirements: 2073 * 1. The size of the prefilled buffer is not fixed before xdp is set. 2074 * 2. xdp_build_skb_from_frame() does more checks that we don't need, 2075 * like eth_type_trans() (which virtio-net does in receive_buf()). 2076 */ 2077 static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, 2078 struct virtnet_info *vi, 2079 struct xdp_buff *xdp, 2080 unsigned int xdp_frags_truesz) 2081 { 2082 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 2083 unsigned int headroom, data_len; 2084 struct sk_buff *skb; 2085 int metasize; 2086 u8 nr_frags; 2087 2088 if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { 2089 pr_debug("Error building skb as missing reserved tailroom for xdp"); 2090 return NULL; 2091 } 2092 2093 if (unlikely(xdp_buff_has_frags(xdp))) 2094 nr_frags = sinfo->nr_frags; 2095 2096 skb = build_skb(xdp->data_hard_start, xdp->frame_sz); 2097 if (unlikely(!skb)) 2098 return NULL; 2099 2100 headroom = xdp->data - xdp->data_hard_start; 2101 data_len = xdp->data_end - xdp->data; 2102 skb_reserve(skb, headroom); 2103 __skb_put(skb, data_len); 2104 2105 metasize = xdp->data - xdp->data_meta; 2106 metasize = metasize > 0 ? metasize : 0; 2107 if (metasize) 2108 skb_metadata_set(skb, metasize); 2109 2110 if (unlikely(xdp_buff_has_frags(xdp))) 2111 xdp_update_skb_shared_info(skb, nr_frags, 2112 sinfo->xdp_frags_size, 2113 xdp_frags_truesz, 2114 xdp_buff_is_frag_pfmemalloc(xdp)); 2115 2116 return skb; 2117 } 2118 2119 /* TODO: build xdp in big mode */ 2120 static int virtnet_build_xdp_buff_mrg(struct net_device *dev, 2121 struct virtnet_info *vi, 2122 struct receive_queue *rq, 2123 struct xdp_buff *xdp, 2124 void *buf, 2125 unsigned int len, 2126 unsigned int frame_sz, 2127 int *num_buf, 2128 unsigned int *xdp_frags_truesize, 2129 struct virtnet_rq_stats *stats) 2130 { 2131 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2132 unsigned int headroom, tailroom, room; 2133 unsigned int truesize, cur_frag_size; 2134 struct skb_shared_info *shinfo; 2135 unsigned int xdp_frags_truesz = 0; 2136 struct page *page; 2137 skb_frag_t *frag; 2138 int offset; 2139 void *ctx; 2140 2141 xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 2142 xdp_prepare_buff(xdp, buf - XDP_PACKET_HEADROOM, 2143 XDP_PACKET_HEADROOM + vi->hdr_len, len - vi->hdr_len, true); 2144 2145 if (!*num_buf) 2146 return 0; 2147 2148 if (*num_buf > 1) { 2149 /* If we want to build multi-buffer xdp, we need 2150 * to specify that the flags of xdp_buff have the 2151 * XDP_FLAGS_HAS_FRAG bit. 2152 */ 2153 if (!xdp_buff_has_frags(xdp)) 2154 xdp_buff_set_frags_flag(xdp); 2155 2156 shinfo = xdp_get_shared_info_from_buff(xdp); 2157 shinfo->nr_frags = 0; 2158 shinfo->xdp_frags_size = 0; 2159 } 2160 2161 if (*num_buf > MAX_SKB_FRAGS + 1) 2162 return -EINVAL; 2163 2164 while (--*num_buf > 0) { 2165 buf = virtnet_rq_get_buf(rq, &len, &ctx); 2166 if (unlikely(!buf)) { 2167 pr_debug("%s: rx error: %d buffers out of %d missing\n", 2168 dev->name, *num_buf, 2169 virtio16_to_cpu(vi->vdev, hdr->num_buffers)); 2170 DEV_STATS_INC(dev, rx_length_errors); 2171 goto err; 2172 } 2173 2174 u64_stats_add(&stats->bytes, len); 2175 page = virt_to_head_page(buf); 2176 offset = buf - page_address(page); 2177 2178 truesize = mergeable_ctx_to_truesize(ctx); 2179 headroom = mergeable_ctx_to_headroom(ctx); 2180 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2181 room = SKB_DATA_ALIGN(headroom + tailroom); 2182 2183 cur_frag_size = truesize; 2184 xdp_frags_truesz += cur_frag_size; 2185 if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) { 2186 put_page(page); 2187 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 2188 dev->name, len, (unsigned long)(truesize - room)); 2189 DEV_STATS_INC(dev, rx_length_errors); 2190 goto err; 2191 } 2192 2193 frag = &shinfo->frags[shinfo->nr_frags++]; 2194 skb_frag_fill_page_desc(frag, page, offset, len); 2195 if (page_is_pfmemalloc(page)) 2196 xdp_buff_set_frag_pfmemalloc(xdp); 2197 2198 shinfo->xdp_frags_size += len; 2199 } 2200 2201 *xdp_frags_truesize = xdp_frags_truesz; 2202 return 0; 2203 2204 err: 2205 put_xdp_frags(xdp); 2206 return -EINVAL; 2207 } 2208 2209 static void *mergeable_xdp_get_buf(struct virtnet_info *vi, 2210 struct receive_queue *rq, 2211 struct bpf_prog *xdp_prog, 2212 void *ctx, 2213 unsigned int *frame_sz, 2214 int *num_buf, 2215 struct page **page, 2216 int offset, 2217 unsigned int *len, 2218 struct virtio_net_hdr_mrg_rxbuf *hdr) 2219 { 2220 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2221 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2222 struct page *xdp_page; 2223 unsigned int xdp_room; 2224 2225 /* Transient failure which in theory could occur if 2226 * in-flight packets from before XDP was enabled reach 2227 * the receive path after XDP is loaded. 2228 */ 2229 if (unlikely(hdr->hdr.gso_type)) 2230 return NULL; 2231 2232 /* Partially checksummed packets must be dropped. */ 2233 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 2234 return NULL; 2235 2236 /* Now XDP core assumes frag size is PAGE_SIZE, but buffers 2237 * with headroom may add hole in truesize, which 2238 * make their length exceed PAGE_SIZE. So we disabled the 2239 * hole mechanism for xdp. See add_recvbuf_mergeable(). 2240 */ 2241 *frame_sz = truesize; 2242 2243 if (likely(headroom >= virtnet_get_headroom(vi) && 2244 (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) { 2245 return page_address(*page) + offset; 2246 } 2247 2248 /* This happens when headroom is not enough because 2249 * of the buffer was prefilled before XDP is set. 2250 * This should only happen for the first several packets. 2251 * In fact, vq reset can be used here to help us clean up 2252 * the prefilled buffers, but many existing devices do not 2253 * support it, and we don't want to bother users who are 2254 * using xdp normally. 2255 */ 2256 if (!xdp_prog->aux->xdp_has_frags) { 2257 /* linearize data for XDP */ 2258 xdp_page = xdp_linearize_page(rq, num_buf, 2259 *page, offset, 2260 XDP_PACKET_HEADROOM, 2261 len); 2262 if (!xdp_page) 2263 return NULL; 2264 } else { 2265 xdp_room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 2266 sizeof(struct skb_shared_info)); 2267 if (*len + xdp_room > PAGE_SIZE) 2268 return NULL; 2269 2270 xdp_page = alloc_page(GFP_ATOMIC); 2271 if (!xdp_page) 2272 return NULL; 2273 2274 memcpy(page_address(xdp_page) + XDP_PACKET_HEADROOM, 2275 page_address(*page) + offset, *len); 2276 } 2277 2278 *frame_sz = PAGE_SIZE; 2279 2280 put_page(*page); 2281 2282 *page = xdp_page; 2283 2284 return page_address(*page) + XDP_PACKET_HEADROOM; 2285 } 2286 2287 static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, 2288 struct virtnet_info *vi, 2289 struct receive_queue *rq, 2290 struct bpf_prog *xdp_prog, 2291 void *buf, 2292 void *ctx, 2293 unsigned int len, 2294 unsigned int *xdp_xmit, 2295 struct virtnet_rq_stats *stats) 2296 { 2297 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2298 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2299 struct page *page = virt_to_head_page(buf); 2300 int offset = buf - page_address(page); 2301 unsigned int xdp_frags_truesz = 0; 2302 struct sk_buff *head_skb; 2303 unsigned int frame_sz; 2304 struct xdp_buff xdp; 2305 void *data; 2306 u32 act; 2307 int err; 2308 2309 data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, &frame_sz, &num_buf, &page, 2310 offset, &len, hdr); 2311 if (unlikely(!data)) 2312 goto err_xdp; 2313 2314 err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz, 2315 &num_buf, &xdp_frags_truesz, stats); 2316 if (unlikely(err)) 2317 goto err_xdp; 2318 2319 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 2320 2321 switch (act) { 2322 case XDP_PASS: 2323 head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); 2324 if (unlikely(!head_skb)) 2325 break; 2326 return head_skb; 2327 2328 case XDP_TX: 2329 case XDP_REDIRECT: 2330 return NULL; 2331 2332 default: 2333 break; 2334 } 2335 2336 put_xdp_frags(&xdp); 2337 2338 err_xdp: 2339 put_page(page); 2340 mergeable_buf_free(rq, num_buf, dev, stats); 2341 2342 u64_stats_inc(&stats->xdp_drops); 2343 u64_stats_inc(&stats->drops); 2344 return NULL; 2345 } 2346 2347 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 2348 struct sk_buff *curr_skb, 2349 struct page *page, void *buf, 2350 int len, int truesize) 2351 { 2352 int num_skb_frags; 2353 int offset; 2354 2355 num_skb_frags = skb_shinfo(curr_skb)->nr_frags; 2356 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { 2357 struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); 2358 2359 if (unlikely(!nskb)) 2360 return NULL; 2361 2362 if (curr_skb == head_skb) 2363 skb_shinfo(curr_skb)->frag_list = nskb; 2364 else 2365 curr_skb->next = nskb; 2366 curr_skb = nskb; 2367 head_skb->truesize += nskb->truesize; 2368 num_skb_frags = 0; 2369 } 2370 2371 if (curr_skb != head_skb) { 2372 head_skb->data_len += len; 2373 head_skb->len += len; 2374 head_skb->truesize += truesize; 2375 } 2376 2377 offset = buf - page_address(page); 2378 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { 2379 put_page(page); 2380 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, 2381 len, truesize); 2382 } else { 2383 skb_add_rx_frag(curr_skb, num_skb_frags, page, 2384 offset, len, truesize); 2385 } 2386 2387 return curr_skb; 2388 } 2389 2390 static struct sk_buff *receive_mergeable(struct net_device *dev, 2391 struct virtnet_info *vi, 2392 struct receive_queue *rq, 2393 void *buf, 2394 void *ctx, 2395 unsigned int len, 2396 unsigned int *xdp_xmit, 2397 struct virtnet_rq_stats *stats) 2398 { 2399 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2400 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2401 struct page *page = virt_to_head_page(buf); 2402 int offset = buf - page_address(page); 2403 struct sk_buff *head_skb, *curr_skb; 2404 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2405 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2406 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2407 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 2408 2409 head_skb = NULL; 2410 u64_stats_add(&stats->bytes, len - vi->hdr_len); 2411 2412 if (unlikely(len > truesize - room)) { 2413 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 2414 dev->name, len, (unsigned long)(truesize - room)); 2415 DEV_STATS_INC(dev, rx_length_errors); 2416 goto err_skb; 2417 } 2418 2419 if (unlikely(vi->xdp_enabled)) { 2420 struct bpf_prog *xdp_prog; 2421 2422 rcu_read_lock(); 2423 xdp_prog = rcu_dereference(rq->xdp_prog); 2424 if (xdp_prog) { 2425 head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx, 2426 len, xdp_xmit, stats); 2427 rcu_read_unlock(); 2428 return head_skb; 2429 } 2430 rcu_read_unlock(); 2431 } 2432 2433 head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom); 2434 curr_skb = head_skb; 2435 2436 if (unlikely(!curr_skb)) 2437 goto err_skb; 2438 while (--num_buf) { 2439 buf = virtnet_rq_get_buf(rq, &len, &ctx); 2440 if (unlikely(!buf)) { 2441 pr_debug("%s: rx error: %d buffers out of %d missing\n", 2442 dev->name, num_buf, 2443 virtio16_to_cpu(vi->vdev, 2444 hdr->num_buffers)); 2445 DEV_STATS_INC(dev, rx_length_errors); 2446 goto err_buf; 2447 } 2448 2449 u64_stats_add(&stats->bytes, len); 2450 page = virt_to_head_page(buf); 2451 2452 truesize = mergeable_ctx_to_truesize(ctx); 2453 headroom = mergeable_ctx_to_headroom(ctx); 2454 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2455 room = SKB_DATA_ALIGN(headroom + tailroom); 2456 if (unlikely(len > truesize - room)) { 2457 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 2458 dev->name, len, (unsigned long)(truesize - room)); 2459 DEV_STATS_INC(dev, rx_length_errors); 2460 goto err_skb; 2461 } 2462 2463 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 2464 buf, len, truesize); 2465 if (!curr_skb) 2466 goto err_skb; 2467 } 2468 2469 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); 2470 return head_skb; 2471 2472 err_skb: 2473 put_page(page); 2474 mergeable_buf_free(rq, num_buf, dev, stats); 2475 2476 err_buf: 2477 u64_stats_inc(&stats->drops); 2478 dev_kfree_skb(head_skb); 2479 return NULL; 2480 } 2481 2482 static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, 2483 struct sk_buff *skb) 2484 { 2485 enum pkt_hash_types rss_hash_type; 2486 2487 if (!hdr_hash || !skb) 2488 return; 2489 2490 switch (__le16_to_cpu(hdr_hash->hash_report)) { 2491 case VIRTIO_NET_HASH_REPORT_TCPv4: 2492 case VIRTIO_NET_HASH_REPORT_UDPv4: 2493 case VIRTIO_NET_HASH_REPORT_TCPv6: 2494 case VIRTIO_NET_HASH_REPORT_UDPv6: 2495 case VIRTIO_NET_HASH_REPORT_TCPv6_EX: 2496 case VIRTIO_NET_HASH_REPORT_UDPv6_EX: 2497 rss_hash_type = PKT_HASH_TYPE_L4; 2498 break; 2499 case VIRTIO_NET_HASH_REPORT_IPv4: 2500 case VIRTIO_NET_HASH_REPORT_IPv6: 2501 case VIRTIO_NET_HASH_REPORT_IPv6_EX: 2502 rss_hash_type = PKT_HASH_TYPE_L3; 2503 break; 2504 case VIRTIO_NET_HASH_REPORT_NONE: 2505 default: 2506 rss_hash_type = PKT_HASH_TYPE_NONE; 2507 } 2508 skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type); 2509 } 2510 2511 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 2512 struct sk_buff *skb, u8 flags) 2513 { 2514 struct virtio_net_common_hdr *hdr; 2515 struct net_device *dev = vi->dev; 2516 2517 hdr = skb_vnet_common_hdr(skb); 2518 if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) 2519 virtio_skb_set_hash(&hdr->hash_v1_hdr, skb); 2520 2521 if (flags & VIRTIO_NET_HDR_F_DATA_VALID) 2522 skb->ip_summed = CHECKSUM_UNNECESSARY; 2523 2524 if (virtio_net_hdr_to_skb(skb, &hdr->hdr, 2525 virtio_is_little_endian(vi->vdev))) { 2526 net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n", 2527 dev->name, hdr->hdr.gso_type, 2528 hdr->hdr.gso_size); 2529 goto frame_err; 2530 } 2531 2532 skb_record_rx_queue(skb, vq2rxq(rq->vq)); 2533 skb->protocol = eth_type_trans(skb, dev); 2534 pr_debug("Receiving skb proto 0x%04x len %i type %i\n", 2535 ntohs(skb->protocol), skb->len, skb->pkt_type); 2536 2537 napi_gro_receive(&rq->napi, skb); 2538 return; 2539 2540 frame_err: 2541 DEV_STATS_INC(dev, rx_frame_errors); 2542 dev_kfree_skb(skb); 2543 } 2544 2545 static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, 2546 void *buf, unsigned int len, void **ctx, 2547 unsigned int *xdp_xmit, 2548 struct virtnet_rq_stats *stats) 2549 { 2550 struct net_device *dev = vi->dev; 2551 struct sk_buff *skb; 2552 u8 flags; 2553 2554 if (unlikely(len < vi->hdr_len + ETH_HLEN)) { 2555 pr_debug("%s: short packet %i\n", dev->name, len); 2556 DEV_STATS_INC(dev, rx_length_errors); 2557 virtnet_rq_free_buf(vi, rq, buf); 2558 return; 2559 } 2560 2561 /* 1. Save the flags early, as the XDP program might overwrite them. 2562 * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID 2563 * stay valid after XDP processing. 2564 * 2. XDP doesn't work with partially checksummed packets (refer to 2565 * virtnet_xdp_set()), so packets marked as 2566 * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing. 2567 */ 2568 flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; 2569 2570 if (vi->mergeable_rx_bufs) 2571 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, 2572 stats); 2573 else if (vi->big_packets) 2574 skb = receive_big(dev, vi, rq, buf, len, stats); 2575 else 2576 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); 2577 2578 if (unlikely(!skb)) 2579 return; 2580 2581 virtnet_receive_done(vi, rq, skb, flags); 2582 } 2583 2584 /* Unlike mergeable buffers, all buffers are allocated to the 2585 * same size, except for the headroom. For this reason we do 2586 * not need to use mergeable_len_to_ctx here - it is enough 2587 * to store the headroom as the context ignoring the truesize. 2588 */ 2589 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, 2590 gfp_t gfp) 2591 { 2592 char *buf; 2593 unsigned int xdp_headroom = virtnet_get_headroom(vi); 2594 void *ctx = (void *)(unsigned long)xdp_headroom; 2595 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; 2596 int err; 2597 2598 len = SKB_DATA_ALIGN(len) + 2599 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 2600 2601 if (unlikely(!skb_page_frag_refill(len, &rq->alloc_frag, gfp))) 2602 return -ENOMEM; 2603 2604 buf = virtnet_rq_alloc(rq, len, gfp); 2605 if (unlikely(!buf)) 2606 return -ENOMEM; 2607 2608 buf += VIRTNET_RX_PAD + xdp_headroom; 2609 2610 virtnet_rq_init_one_sg(rq, buf, vi->hdr_len + GOOD_PACKET_LEN); 2611 2612 err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, buf, ctx, gfp); 2613 if (err < 0) { 2614 virtnet_rq_unmap(rq, buf, 0); 2615 put_page(virt_to_head_page(buf)); 2616 } 2617 2618 return err; 2619 } 2620 2621 static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, 2622 gfp_t gfp) 2623 { 2624 struct page *first, *list = NULL; 2625 char *p; 2626 int i, err, offset; 2627 2628 sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2); 2629 2630 /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */ 2631 for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) { 2632 first = get_a_page(rq, gfp); 2633 if (!first) { 2634 if (list) 2635 give_pages(rq, list); 2636 return -ENOMEM; 2637 } 2638 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE); 2639 2640 /* chain new page in list head to match sg */ 2641 first->private = (unsigned long)list; 2642 list = first; 2643 } 2644 2645 first = get_a_page(rq, gfp); 2646 if (!first) { 2647 give_pages(rq, list); 2648 return -ENOMEM; 2649 } 2650 p = page_address(first); 2651 2652 /* rq->sg[0], rq->sg[1] share the same page */ 2653 /* a separated rq->sg[0] for header - required in case !any_header_sg */ 2654 sg_set_buf(&rq->sg[0], p, vi->hdr_len); 2655 2656 /* rq->sg[1] for data packet, from offset */ 2657 offset = sizeof(struct padded_vnet_hdr); 2658 sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset); 2659 2660 /* chain first in list head */ 2661 first->private = (unsigned long)list; 2662 err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2, 2663 first, gfp); 2664 if (err < 0) 2665 give_pages(rq, first); 2666 2667 return err; 2668 } 2669 2670 static unsigned int get_mergeable_buf_len(struct receive_queue *rq, 2671 struct ewma_pkt_len *avg_pkt_len, 2672 unsigned int room) 2673 { 2674 struct virtnet_info *vi = rq->vq->vdev->priv; 2675 const size_t hdr_len = vi->hdr_len; 2676 unsigned int len; 2677 2678 if (room) 2679 return PAGE_SIZE - room; 2680 2681 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), 2682 rq->min_buf_len, PAGE_SIZE - hdr_len); 2683 2684 return ALIGN(len, L1_CACHE_BYTES); 2685 } 2686 2687 static int add_recvbuf_mergeable(struct virtnet_info *vi, 2688 struct receive_queue *rq, gfp_t gfp) 2689 { 2690 struct page_frag *alloc_frag = &rq->alloc_frag; 2691 unsigned int headroom = virtnet_get_headroom(vi); 2692 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2693 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 2694 unsigned int len, hole; 2695 void *ctx; 2696 char *buf; 2697 int err; 2698 2699 /* Extra tailroom is needed to satisfy XDP's assumption. This 2700 * means rx frags coalescing won't work, but consider we've 2701 * disabled GSO for XDP, it won't be a big issue. 2702 */ 2703 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); 2704 2705 if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) 2706 return -ENOMEM; 2707 2708 if (!alloc_frag->offset && len + room + sizeof(struct virtnet_rq_dma) > alloc_frag->size) 2709 len -= sizeof(struct virtnet_rq_dma); 2710 2711 buf = virtnet_rq_alloc(rq, len + room, gfp); 2712 if (unlikely(!buf)) 2713 return -ENOMEM; 2714 2715 buf += headroom; /* advance address leaving hole at front of pkt */ 2716 hole = alloc_frag->size - alloc_frag->offset; 2717 if (hole < len + room) { 2718 /* To avoid internal fragmentation, if there is very likely not 2719 * enough space for another buffer, add the remaining space to 2720 * the current buffer. 2721 * XDP core assumes that frame_size of xdp_buff and the length 2722 * of the frag are PAGE_SIZE, so we disable the hole mechanism. 2723 */ 2724 if (!headroom) 2725 len += hole; 2726 alloc_frag->offset += hole; 2727 } 2728 2729 virtnet_rq_init_one_sg(rq, buf, len); 2730 2731 ctx = mergeable_len_to_ctx(len + room, headroom); 2732 err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, buf, ctx, gfp); 2733 if (err < 0) { 2734 virtnet_rq_unmap(rq, buf, 0); 2735 put_page(virt_to_head_page(buf)); 2736 } 2737 2738 return err; 2739 } 2740 2741 /* 2742 * Returns false if we couldn't fill entirely (OOM). 2743 * 2744 * Normally run in the receive path, but can also be run from ndo_open 2745 * before we're receiving packets, or from refill_work which is 2746 * careful to disable receiving (using napi_disable). 2747 */ 2748 static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, 2749 gfp_t gfp) 2750 { 2751 int err; 2752 2753 if (rq->xsk_pool) { 2754 err = virtnet_add_recvbuf_xsk(vi, rq, rq->xsk_pool, gfp); 2755 goto kick; 2756 } 2757 2758 do { 2759 if (vi->mergeable_rx_bufs) 2760 err = add_recvbuf_mergeable(vi, rq, gfp); 2761 else if (vi->big_packets) 2762 err = add_recvbuf_big(vi, rq, gfp); 2763 else 2764 err = add_recvbuf_small(vi, rq, gfp); 2765 2766 if (err) 2767 break; 2768 } while (rq->vq->num_free); 2769 2770 kick: 2771 if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { 2772 unsigned long flags; 2773 2774 flags = u64_stats_update_begin_irqsave(&rq->stats.syncp); 2775 u64_stats_inc(&rq->stats.kicks); 2776 u64_stats_update_end_irqrestore(&rq->stats.syncp, flags); 2777 } 2778 2779 return err != -ENOMEM; 2780 } 2781 2782 static void skb_recv_done(struct virtqueue *rvq) 2783 { 2784 struct virtnet_info *vi = rvq->vdev->priv; 2785 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)]; 2786 2787 rq->calls++; 2788 virtqueue_napi_schedule(&rq->napi, rvq); 2789 } 2790 2791 static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi) 2792 { 2793 napi_enable(napi); 2794 2795 /* If all buffers were filled by other side before we napi_enabled, we 2796 * won't get another interrupt, so process any outstanding packets now. 2797 * Call local_bh_enable after to trigger softIRQ processing. 2798 */ 2799 local_bh_disable(); 2800 virtqueue_napi_schedule(napi, vq); 2801 local_bh_enable(); 2802 } 2803 2804 static void virtnet_napi_tx_enable(struct virtnet_info *vi, 2805 struct virtqueue *vq, 2806 struct napi_struct *napi) 2807 { 2808 if (!napi->weight) 2809 return; 2810 2811 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only 2812 * enable the feature if this is likely affine with the transmit path. 2813 */ 2814 if (!vi->affinity_hint_set) { 2815 napi->weight = 0; 2816 return; 2817 } 2818 2819 return virtnet_napi_enable(vq, napi); 2820 } 2821 2822 static void virtnet_napi_tx_disable(struct napi_struct *napi) 2823 { 2824 if (napi->weight) 2825 napi_disable(napi); 2826 } 2827 2828 static void refill_work(struct work_struct *work) 2829 { 2830 struct virtnet_info *vi = 2831 container_of(work, struct virtnet_info, refill.work); 2832 bool still_empty; 2833 int i; 2834 2835 for (i = 0; i < vi->curr_queue_pairs; i++) { 2836 struct receive_queue *rq = &vi->rq[i]; 2837 2838 napi_disable(&rq->napi); 2839 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); 2840 virtnet_napi_enable(rq->vq, &rq->napi); 2841 2842 /* In theory, this can happen: if we don't get any buffers in 2843 * we will *never* try to fill again. 2844 */ 2845 if (still_empty) 2846 schedule_delayed_work(&vi->refill, HZ/2); 2847 } 2848 } 2849 2850 static int virtnet_receive_xsk_bufs(struct virtnet_info *vi, 2851 struct receive_queue *rq, 2852 int budget, 2853 unsigned int *xdp_xmit, 2854 struct virtnet_rq_stats *stats) 2855 { 2856 unsigned int len; 2857 int packets = 0; 2858 void *buf; 2859 2860 while (packets < budget) { 2861 buf = virtqueue_get_buf(rq->vq, &len); 2862 if (!buf) 2863 break; 2864 2865 virtnet_receive_xsk_buf(vi, rq, buf, len, xdp_xmit, stats); 2866 packets++; 2867 } 2868 2869 return packets; 2870 } 2871 2872 static int virtnet_receive_packets(struct virtnet_info *vi, 2873 struct receive_queue *rq, 2874 int budget, 2875 unsigned int *xdp_xmit, 2876 struct virtnet_rq_stats *stats) 2877 { 2878 unsigned int len; 2879 int packets = 0; 2880 void *buf; 2881 2882 if (!vi->big_packets || vi->mergeable_rx_bufs) { 2883 void *ctx; 2884 while (packets < budget && 2885 (buf = virtnet_rq_get_buf(rq, &len, &ctx))) { 2886 receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats); 2887 packets++; 2888 } 2889 } else { 2890 while (packets < budget && 2891 (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { 2892 receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats); 2893 packets++; 2894 } 2895 } 2896 2897 return packets; 2898 } 2899 2900 static int virtnet_receive(struct receive_queue *rq, int budget, 2901 unsigned int *xdp_xmit) 2902 { 2903 struct virtnet_info *vi = rq->vq->vdev->priv; 2904 struct virtnet_rq_stats stats = {}; 2905 int i, packets; 2906 2907 if (rq->xsk_pool) 2908 packets = virtnet_receive_xsk_bufs(vi, rq, budget, xdp_xmit, &stats); 2909 else 2910 packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats); 2911 2912 if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { 2913 if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { 2914 spin_lock(&vi->refill_lock); 2915 if (vi->refill_enabled) 2916 schedule_delayed_work(&vi->refill, 0); 2917 spin_unlock(&vi->refill_lock); 2918 } 2919 } 2920 2921 u64_stats_set(&stats.packets, packets); 2922 u64_stats_update_begin(&rq->stats.syncp); 2923 for (i = 0; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) { 2924 size_t offset = virtnet_rq_stats_desc[i].offset; 2925 u64_stats_t *item, *src; 2926 2927 item = (u64_stats_t *)((u8 *)&rq->stats + offset); 2928 src = (u64_stats_t *)((u8 *)&stats + offset); 2929 u64_stats_add(item, u64_stats_read(src)); 2930 } 2931 2932 u64_stats_add(&rq->stats.packets, u64_stats_read(&stats.packets)); 2933 u64_stats_add(&rq->stats.bytes, u64_stats_read(&stats.bytes)); 2934 2935 u64_stats_update_end(&rq->stats.syncp); 2936 2937 return packets; 2938 } 2939 2940 static void virtnet_poll_cleantx(struct receive_queue *rq, int budget) 2941 { 2942 struct virtnet_info *vi = rq->vq->vdev->priv; 2943 unsigned int index = vq2rxq(rq->vq); 2944 struct send_queue *sq = &vi->sq[index]; 2945 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); 2946 2947 if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) 2948 return; 2949 2950 if (__netif_tx_trylock(txq)) { 2951 if (sq->reset) { 2952 __netif_tx_unlock(txq); 2953 return; 2954 } 2955 2956 do { 2957 virtqueue_disable_cb(sq->vq); 2958 free_old_xmit(sq, txq, !!budget); 2959 } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 2960 2961 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { 2962 if (netif_tx_queue_stopped(txq)) { 2963 u64_stats_update_begin(&sq->stats.syncp); 2964 u64_stats_inc(&sq->stats.wake); 2965 u64_stats_update_end(&sq->stats.syncp); 2966 } 2967 netif_tx_wake_queue(txq); 2968 } 2969 2970 __netif_tx_unlock(txq); 2971 } 2972 } 2973 2974 static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue *rq) 2975 { 2976 struct dim_sample cur_sample = {}; 2977 2978 if (!rq->packets_in_napi) 2979 return; 2980 2981 /* Don't need protection when fetching stats, since fetcher and 2982 * updater of the stats are in same context 2983 */ 2984 dim_update_sample(rq->calls, 2985 u64_stats_read(&rq->stats.packets), 2986 u64_stats_read(&rq->stats.bytes), 2987 &cur_sample); 2988 2989 net_dim(&rq->dim, &cur_sample); 2990 rq->packets_in_napi = 0; 2991 } 2992 2993 static int virtnet_poll(struct napi_struct *napi, int budget) 2994 { 2995 struct receive_queue *rq = 2996 container_of(napi, struct receive_queue, napi); 2997 struct virtnet_info *vi = rq->vq->vdev->priv; 2998 struct send_queue *sq; 2999 unsigned int received; 3000 unsigned int xdp_xmit = 0; 3001 bool napi_complete; 3002 3003 virtnet_poll_cleantx(rq, budget); 3004 3005 received = virtnet_receive(rq, budget, &xdp_xmit); 3006 rq->packets_in_napi += received; 3007 3008 if (xdp_xmit & VIRTIO_XDP_REDIR) 3009 xdp_do_flush(); 3010 3011 /* Out of packets? */ 3012 if (received < budget) { 3013 napi_complete = virtqueue_napi_complete(napi, rq->vq, received); 3014 /* Intentionally not taking dim_lock here. This may result in a 3015 * spurious net_dim call. But if that happens virtnet_rx_dim_work 3016 * will not act on the scheduled work. 3017 */ 3018 if (napi_complete && rq->dim_enabled) 3019 virtnet_rx_dim_update(vi, rq); 3020 } 3021 3022 if (xdp_xmit & VIRTIO_XDP_TX) { 3023 sq = virtnet_xdp_get_sq(vi); 3024 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 3025 u64_stats_update_begin(&sq->stats.syncp); 3026 u64_stats_inc(&sq->stats.kicks); 3027 u64_stats_update_end(&sq->stats.syncp); 3028 } 3029 virtnet_xdp_put_sq(vi, sq); 3030 } 3031 3032 return received; 3033 } 3034 3035 static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index) 3036 { 3037 virtnet_napi_tx_disable(&vi->sq[qp_index].napi); 3038 napi_disable(&vi->rq[qp_index].napi); 3039 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 3040 } 3041 3042 static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) 3043 { 3044 struct net_device *dev = vi->dev; 3045 int err; 3046 3047 err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index, 3048 vi->rq[qp_index].napi.napi_id); 3049 if (err < 0) 3050 return err; 3051 3052 err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq, 3053 MEM_TYPE_PAGE_SHARED, NULL); 3054 if (err < 0) 3055 goto err_xdp_reg_mem_model; 3056 3057 netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index)); 3058 virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi); 3059 virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi); 3060 3061 return 0; 3062 3063 err_xdp_reg_mem_model: 3064 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 3065 return err; 3066 } 3067 3068 static void virtnet_cancel_dim(struct virtnet_info *vi, struct dim *dim) 3069 { 3070 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 3071 return; 3072 net_dim_work_cancel(dim); 3073 } 3074 3075 static void virtnet_update_settings(struct virtnet_info *vi) 3076 { 3077 u32 speed; 3078 u8 duplex; 3079 3080 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) 3081 return; 3082 3083 virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); 3084 3085 if (ethtool_validate_speed(speed)) 3086 vi->speed = speed; 3087 3088 virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); 3089 3090 if (ethtool_validate_duplex(duplex)) 3091 vi->duplex = duplex; 3092 } 3093 3094 static int virtnet_open(struct net_device *dev) 3095 { 3096 struct virtnet_info *vi = netdev_priv(dev); 3097 int i, err; 3098 3099 enable_delayed_refill(vi); 3100 3101 for (i = 0; i < vi->max_queue_pairs; i++) { 3102 if (i < vi->curr_queue_pairs) 3103 /* Make sure we have some buffers: if oom use wq. */ 3104 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) 3105 schedule_delayed_work(&vi->refill, 0); 3106 3107 err = virtnet_enable_queue_pair(vi, i); 3108 if (err < 0) 3109 goto err_enable_qp; 3110 } 3111 3112 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 3113 if (vi->status & VIRTIO_NET_S_LINK_UP) 3114 netif_carrier_on(vi->dev); 3115 virtio_config_driver_enable(vi->vdev); 3116 } else { 3117 vi->status = VIRTIO_NET_S_LINK_UP; 3118 netif_carrier_on(dev); 3119 } 3120 3121 return 0; 3122 3123 err_enable_qp: 3124 disable_delayed_refill(vi); 3125 cancel_delayed_work_sync(&vi->refill); 3126 3127 for (i--; i >= 0; i--) { 3128 virtnet_disable_queue_pair(vi, i); 3129 virtnet_cancel_dim(vi, &vi->rq[i].dim); 3130 } 3131 3132 return err; 3133 } 3134 3135 static int virtnet_poll_tx(struct napi_struct *napi, int budget) 3136 { 3137 struct send_queue *sq = container_of(napi, struct send_queue, napi); 3138 struct virtnet_info *vi = sq->vq->vdev->priv; 3139 unsigned int index = vq2txq(sq->vq); 3140 struct netdev_queue *txq; 3141 int opaque, xsk_done = 0; 3142 bool done; 3143 3144 if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { 3145 /* We don't need to enable cb for XDP */ 3146 napi_complete_done(napi, 0); 3147 return 0; 3148 } 3149 3150 txq = netdev_get_tx_queue(vi->dev, index); 3151 __netif_tx_lock(txq, raw_smp_processor_id()); 3152 virtqueue_disable_cb(sq->vq); 3153 3154 if (sq->xsk_pool) 3155 xsk_done = virtnet_xsk_xmit(sq, sq->xsk_pool, budget); 3156 else 3157 free_old_xmit(sq, txq, !!budget); 3158 3159 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { 3160 if (netif_tx_queue_stopped(txq)) { 3161 u64_stats_update_begin(&sq->stats.syncp); 3162 u64_stats_inc(&sq->stats.wake); 3163 u64_stats_update_end(&sq->stats.syncp); 3164 } 3165 netif_tx_wake_queue(txq); 3166 } 3167 3168 if (xsk_done >= budget) { 3169 __netif_tx_unlock(txq); 3170 return budget; 3171 } 3172 3173 opaque = virtqueue_enable_cb_prepare(sq->vq); 3174 3175 done = napi_complete_done(napi, 0); 3176 3177 if (!done) 3178 virtqueue_disable_cb(sq->vq); 3179 3180 __netif_tx_unlock(txq); 3181 3182 if (done) { 3183 if (unlikely(virtqueue_poll(sq->vq, opaque))) { 3184 if (napi_schedule_prep(napi)) { 3185 __netif_tx_lock(txq, raw_smp_processor_id()); 3186 virtqueue_disable_cb(sq->vq); 3187 __netif_tx_unlock(txq); 3188 __napi_schedule(napi); 3189 } 3190 } 3191 } 3192 3193 return 0; 3194 } 3195 3196 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) 3197 { 3198 struct virtio_net_hdr_mrg_rxbuf *hdr; 3199 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; 3200 struct virtnet_info *vi = sq->vq->vdev->priv; 3201 int num_sg; 3202 unsigned hdr_len = vi->hdr_len; 3203 bool can_push; 3204 3205 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); 3206 3207 can_push = vi->any_header_sg && 3208 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && 3209 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; 3210 /* Even if we can, don't push here yet as this would skew 3211 * csum_start offset below. */ 3212 if (can_push) 3213 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len); 3214 else 3215 hdr = &skb_vnet_common_hdr(skb)->mrg_hdr; 3216 3217 if (virtio_net_hdr_from_skb(skb, &hdr->hdr, 3218 virtio_is_little_endian(vi->vdev), false, 3219 0)) 3220 return -EPROTO; 3221 3222 if (vi->mergeable_rx_bufs) 3223 hdr->num_buffers = 0; 3224 3225 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); 3226 if (can_push) { 3227 __skb_push(skb, hdr_len); 3228 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len); 3229 if (unlikely(num_sg < 0)) 3230 return num_sg; 3231 /* Pull header back to avoid skew in tx bytes calculations. */ 3232 __skb_pull(skb, hdr_len); 3233 } else { 3234 sg_set_buf(sq->sg, hdr, hdr_len); 3235 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); 3236 if (unlikely(num_sg < 0)) 3237 return num_sg; 3238 num_sg++; 3239 } 3240 3241 return virtnet_add_outbuf(sq, num_sg, skb, 3242 orphan ? VIRTNET_XMIT_TYPE_SKB_ORPHAN : VIRTNET_XMIT_TYPE_SKB); 3243 } 3244 3245 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) 3246 { 3247 struct virtnet_info *vi = netdev_priv(dev); 3248 int qnum = skb_get_queue_mapping(skb); 3249 struct send_queue *sq = &vi->sq[qnum]; 3250 int err; 3251 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 3252 bool xmit_more = netdev_xmit_more(); 3253 bool use_napi = sq->napi.weight; 3254 bool kick; 3255 3256 /* Free up any pending old buffers before queueing new ones. */ 3257 do { 3258 if (use_napi) 3259 virtqueue_disable_cb(sq->vq); 3260 3261 free_old_xmit(sq, txq, false); 3262 3263 } while (use_napi && !xmit_more && 3264 unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 3265 3266 /* timestamp packet in software */ 3267 skb_tx_timestamp(skb); 3268 3269 /* Try to transmit */ 3270 err = xmit_skb(sq, skb, !use_napi); 3271 3272 /* This should not happen! */ 3273 if (unlikely(err)) { 3274 DEV_STATS_INC(dev, tx_fifo_errors); 3275 if (net_ratelimit()) 3276 dev_warn(&dev->dev, 3277 "Unexpected TXQ (%d) queue failure: %d\n", 3278 qnum, err); 3279 DEV_STATS_INC(dev, tx_dropped); 3280 dev_kfree_skb_any(skb); 3281 return NETDEV_TX_OK; 3282 } 3283 3284 /* Don't wait up for transmitted skbs to be freed. */ 3285 if (!use_napi) { 3286 skb_orphan(skb); 3287 nf_reset_ct(skb); 3288 } 3289 3290 check_sq_full_and_disable(vi, dev, sq); 3291 3292 kick = use_napi ? __netdev_tx_sent_queue(txq, skb->len, xmit_more) : 3293 !xmit_more || netif_xmit_stopped(txq); 3294 if (kick) { 3295 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 3296 u64_stats_update_begin(&sq->stats.syncp); 3297 u64_stats_inc(&sq->stats.kicks); 3298 u64_stats_update_end(&sq->stats.syncp); 3299 } 3300 } 3301 3302 return NETDEV_TX_OK; 3303 } 3304 3305 static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) 3306 { 3307 bool running = netif_running(vi->dev); 3308 3309 if (running) { 3310 napi_disable(&rq->napi); 3311 virtnet_cancel_dim(vi, &rq->dim); 3312 } 3313 } 3314 3315 static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) 3316 { 3317 bool running = netif_running(vi->dev); 3318 3319 if (!try_fill_recv(vi, rq, GFP_KERNEL)) 3320 schedule_delayed_work(&vi->refill, 0); 3321 3322 if (running) 3323 virtnet_napi_enable(rq->vq, &rq->napi); 3324 } 3325 3326 static int virtnet_rx_resize(struct virtnet_info *vi, 3327 struct receive_queue *rq, u32 ring_num) 3328 { 3329 int err, qindex; 3330 3331 qindex = rq - vi->rq; 3332 3333 virtnet_rx_pause(vi, rq); 3334 3335 err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_unmap_free_buf); 3336 if (err) 3337 netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err); 3338 3339 virtnet_rx_resume(vi, rq); 3340 return err; 3341 } 3342 3343 static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq) 3344 { 3345 bool running = netif_running(vi->dev); 3346 struct netdev_queue *txq; 3347 int qindex; 3348 3349 qindex = sq - vi->sq; 3350 3351 if (running) 3352 virtnet_napi_tx_disable(&sq->napi); 3353 3354 txq = netdev_get_tx_queue(vi->dev, qindex); 3355 3356 /* 1. wait all ximt complete 3357 * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue() 3358 */ 3359 __netif_tx_lock_bh(txq); 3360 3361 /* Prevent rx poll from accessing sq. */ 3362 sq->reset = true; 3363 3364 /* Prevent the upper layer from trying to send packets. */ 3365 netif_stop_subqueue(vi->dev, qindex); 3366 3367 __netif_tx_unlock_bh(txq); 3368 } 3369 3370 static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq) 3371 { 3372 bool running = netif_running(vi->dev); 3373 struct netdev_queue *txq; 3374 int qindex; 3375 3376 qindex = sq - vi->sq; 3377 3378 txq = netdev_get_tx_queue(vi->dev, qindex); 3379 3380 __netif_tx_lock_bh(txq); 3381 sq->reset = false; 3382 netif_tx_wake_queue(txq); 3383 __netif_tx_unlock_bh(txq); 3384 3385 if (running) 3386 virtnet_napi_tx_enable(vi, sq->vq, &sq->napi); 3387 } 3388 3389 static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq, 3390 u32 ring_num) 3391 { 3392 int qindex, err; 3393 3394 qindex = sq - vi->sq; 3395 3396 virtnet_tx_pause(vi, sq); 3397 3398 err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf); 3399 if (err) 3400 netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err); 3401 3402 virtnet_tx_resume(vi, sq); 3403 3404 return err; 3405 } 3406 3407 /* 3408 * Send command via the control virtqueue and check status. Commands 3409 * supported by the hypervisor, as indicated by feature bits, should 3410 * never fail unless improperly formatted. 3411 */ 3412 static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd, 3413 struct scatterlist *out, 3414 struct scatterlist *in) 3415 { 3416 struct scatterlist *sgs[5], hdr, stat; 3417 u32 out_num = 0, tmp, in_num = 0; 3418 bool ok; 3419 int ret; 3420 3421 /* Caller should know better */ 3422 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); 3423 3424 mutex_lock(&vi->cvq_lock); 3425 vi->ctrl->status = ~0; 3426 vi->ctrl->hdr.class = class; 3427 vi->ctrl->hdr.cmd = cmd; 3428 /* Add header */ 3429 sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr)); 3430 sgs[out_num++] = &hdr; 3431 3432 if (out) 3433 sgs[out_num++] = out; 3434 3435 /* Add return status. */ 3436 sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status)); 3437 sgs[out_num + in_num++] = &stat; 3438 3439 if (in) 3440 sgs[out_num + in_num++] = in; 3441 3442 BUG_ON(out_num + in_num > ARRAY_SIZE(sgs)); 3443 ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC); 3444 if (ret < 0) { 3445 dev_warn(&vi->vdev->dev, 3446 "Failed to add sgs for command vq: %d\n.", ret); 3447 mutex_unlock(&vi->cvq_lock); 3448 return false; 3449 } 3450 3451 if (unlikely(!virtqueue_kick(vi->cvq))) 3452 goto unlock; 3453 3454 /* Spin for a response, the kick causes an ioport write, trapping 3455 * into the hypervisor, so the request should be handled immediately. 3456 */ 3457 while (!virtqueue_get_buf(vi->cvq, &tmp) && 3458 !virtqueue_is_broken(vi->cvq)) { 3459 cond_resched(); 3460 cpu_relax(); 3461 } 3462 3463 unlock: 3464 ok = vi->ctrl->status == VIRTIO_NET_OK; 3465 mutex_unlock(&vi->cvq_lock); 3466 return ok; 3467 } 3468 3469 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, 3470 struct scatterlist *out) 3471 { 3472 return virtnet_send_command_reply(vi, class, cmd, out, NULL); 3473 } 3474 3475 static int virtnet_set_mac_address(struct net_device *dev, void *p) 3476 { 3477 struct virtnet_info *vi = netdev_priv(dev); 3478 struct virtio_device *vdev = vi->vdev; 3479 int ret; 3480 struct sockaddr *addr; 3481 struct scatterlist sg; 3482 3483 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 3484 return -EOPNOTSUPP; 3485 3486 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL); 3487 if (!addr) 3488 return -ENOMEM; 3489 3490 ret = eth_prepare_mac_addr_change(dev, addr); 3491 if (ret) 3492 goto out; 3493 3494 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 3495 sg_init_one(&sg, addr->sa_data, dev->addr_len); 3496 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3497 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 3498 dev_warn(&vdev->dev, 3499 "Failed to set mac address by vq command.\n"); 3500 ret = -EINVAL; 3501 goto out; 3502 } 3503 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 3504 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { 3505 unsigned int i; 3506 3507 /* Naturally, this has an atomicity problem. */ 3508 for (i = 0; i < dev->addr_len; i++) 3509 virtio_cwrite8(vdev, 3510 offsetof(struct virtio_net_config, mac) + 3511 i, addr->sa_data[i]); 3512 } 3513 3514 eth_commit_mac_addr_change(dev, p); 3515 ret = 0; 3516 3517 out: 3518 kfree(addr); 3519 return ret; 3520 } 3521 3522 static void virtnet_stats(struct net_device *dev, 3523 struct rtnl_link_stats64 *tot) 3524 { 3525 struct virtnet_info *vi = netdev_priv(dev); 3526 unsigned int start; 3527 int i; 3528 3529 for (i = 0; i < vi->max_queue_pairs; i++) { 3530 u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; 3531 struct receive_queue *rq = &vi->rq[i]; 3532 struct send_queue *sq = &vi->sq[i]; 3533 3534 do { 3535 start = u64_stats_fetch_begin(&sq->stats.syncp); 3536 tpackets = u64_stats_read(&sq->stats.packets); 3537 tbytes = u64_stats_read(&sq->stats.bytes); 3538 terrors = u64_stats_read(&sq->stats.tx_timeouts); 3539 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 3540 3541 do { 3542 start = u64_stats_fetch_begin(&rq->stats.syncp); 3543 rpackets = u64_stats_read(&rq->stats.packets); 3544 rbytes = u64_stats_read(&rq->stats.bytes); 3545 rdrops = u64_stats_read(&rq->stats.drops); 3546 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 3547 3548 tot->rx_packets += rpackets; 3549 tot->tx_packets += tpackets; 3550 tot->rx_bytes += rbytes; 3551 tot->tx_bytes += tbytes; 3552 tot->rx_dropped += rdrops; 3553 tot->tx_errors += terrors; 3554 } 3555 3556 tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped); 3557 tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors); 3558 tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors); 3559 tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors); 3560 } 3561 3562 static void virtnet_ack_link_announce(struct virtnet_info *vi) 3563 { 3564 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, 3565 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) 3566 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); 3567 } 3568 3569 static bool virtnet_commit_rss_command(struct virtnet_info *vi); 3570 3571 static void virtnet_rss_update_by_qpairs(struct virtnet_info *vi, u16 queue_pairs) 3572 { 3573 u32 indir_val = 0; 3574 int i = 0; 3575 3576 for (; i < vi->rss_indir_table_size; ++i) { 3577 indir_val = ethtool_rxfh_indir_default(i, queue_pairs); 3578 vi->rss.indirection_table[i] = indir_val; 3579 } 3580 vi->rss.max_tx_vq = queue_pairs; 3581 } 3582 3583 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 3584 { 3585 struct virtio_net_ctrl_mq *mq __free(kfree) = NULL; 3586 struct virtio_net_ctrl_rss old_rss; 3587 struct net_device *dev = vi->dev; 3588 struct scatterlist sg; 3589 3590 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) 3591 return 0; 3592 3593 /* Firstly check if we need update rss. Do updating if both (1) rss enabled and 3594 * (2) no user configuration. 3595 * 3596 * During rss command processing, device updates queue_pairs using rss.max_tx_vq. That is, 3597 * the device updates queue_pairs together with rss, so we can skip the sperate queue_pairs 3598 * update (VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET below) and return directly. 3599 */ 3600 if (vi->has_rss && !netif_is_rxfh_configured(dev)) { 3601 memcpy(&old_rss, &vi->rss, sizeof(old_rss)); 3602 if (rss_indirection_table_alloc(&vi->rss, vi->rss_indir_table_size)) { 3603 vi->rss.indirection_table = old_rss.indirection_table; 3604 return -ENOMEM; 3605 } 3606 3607 virtnet_rss_update_by_qpairs(vi, queue_pairs); 3608 3609 if (!virtnet_commit_rss_command(vi)) { 3610 /* restore ctrl_rss if commit_rss_command failed */ 3611 rss_indirection_table_free(&vi->rss); 3612 memcpy(&vi->rss, &old_rss, sizeof(old_rss)); 3613 3614 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d, because committing RSS failed\n", 3615 queue_pairs); 3616 return -EINVAL; 3617 } 3618 rss_indirection_table_free(&old_rss); 3619 goto succ; 3620 } 3621 3622 mq = kzalloc(sizeof(*mq), GFP_KERNEL); 3623 if (!mq) 3624 return -ENOMEM; 3625 3626 mq->virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); 3627 sg_init_one(&sg, mq, sizeof(*mq)); 3628 3629 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 3630 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { 3631 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", 3632 queue_pairs); 3633 return -EINVAL; 3634 } 3635 succ: 3636 vi->curr_queue_pairs = queue_pairs; 3637 /* virtnet_open() will refill when device is going to up. */ 3638 if (dev->flags & IFF_UP) 3639 schedule_delayed_work(&vi->refill, 0); 3640 3641 return 0; 3642 } 3643 3644 static int virtnet_close(struct net_device *dev) 3645 { 3646 struct virtnet_info *vi = netdev_priv(dev); 3647 int i; 3648 3649 /* Make sure NAPI doesn't schedule refill work */ 3650 disable_delayed_refill(vi); 3651 /* Make sure refill_work doesn't re-enable napi! */ 3652 cancel_delayed_work_sync(&vi->refill); 3653 /* Prevent the config change callback from changing carrier 3654 * after close 3655 */ 3656 virtio_config_driver_disable(vi->vdev); 3657 /* Stop getting status/speed updates: we don't care until next 3658 * open 3659 */ 3660 cancel_work_sync(&vi->config_work); 3661 3662 for (i = 0; i < vi->max_queue_pairs; i++) { 3663 virtnet_disable_queue_pair(vi, i); 3664 virtnet_cancel_dim(vi, &vi->rq[i].dim); 3665 } 3666 3667 netif_carrier_off(dev); 3668 3669 return 0; 3670 } 3671 3672 static void virtnet_rx_mode_work(struct work_struct *work) 3673 { 3674 struct virtnet_info *vi = 3675 container_of(work, struct virtnet_info, rx_mode_work); 3676 u8 *promisc_allmulti __free(kfree) = NULL; 3677 struct net_device *dev = vi->dev; 3678 struct scatterlist sg[2]; 3679 struct virtio_net_ctrl_mac *mac_data; 3680 struct netdev_hw_addr *ha; 3681 int uc_count; 3682 int mc_count; 3683 void *buf; 3684 int i; 3685 3686 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ 3687 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) 3688 return; 3689 3690 promisc_allmulti = kzalloc(sizeof(*promisc_allmulti), GFP_KERNEL); 3691 if (!promisc_allmulti) { 3692 dev_warn(&dev->dev, "Failed to set RX mode, no memory.\n"); 3693 return; 3694 } 3695 3696 rtnl_lock(); 3697 3698 *promisc_allmulti = !!(dev->flags & IFF_PROMISC); 3699 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3700 3701 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3702 VIRTIO_NET_CTRL_RX_PROMISC, sg)) 3703 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", 3704 *promisc_allmulti ? "en" : "dis"); 3705 3706 *promisc_allmulti = !!(dev->flags & IFF_ALLMULTI); 3707 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3708 3709 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3710 VIRTIO_NET_CTRL_RX_ALLMULTI, sg)) 3711 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", 3712 *promisc_allmulti ? "en" : "dis"); 3713 3714 netif_addr_lock_bh(dev); 3715 3716 uc_count = netdev_uc_count(dev); 3717 mc_count = netdev_mc_count(dev); 3718 /* MAC filter - use one buffer for both lists */ 3719 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) + 3720 (2 * sizeof(mac_data->entries)), GFP_ATOMIC); 3721 mac_data = buf; 3722 if (!buf) { 3723 netif_addr_unlock_bh(dev); 3724 rtnl_unlock(); 3725 return; 3726 } 3727 3728 sg_init_table(sg, 2); 3729 3730 /* Store the unicast list and count in the front of the buffer */ 3731 mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count); 3732 i = 0; 3733 netdev_for_each_uc_addr(ha, dev) 3734 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3735 3736 sg_set_buf(&sg[0], mac_data, 3737 sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); 3738 3739 /* multicast list and count fill the end */ 3740 mac_data = (void *)&mac_data->macs[uc_count][0]; 3741 3742 mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count); 3743 i = 0; 3744 netdev_for_each_mc_addr(ha, dev) 3745 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3746 3747 netif_addr_unlock_bh(dev); 3748 3749 sg_set_buf(&sg[1], mac_data, 3750 sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); 3751 3752 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3753 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg)) 3754 dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); 3755 3756 rtnl_unlock(); 3757 3758 kfree(buf); 3759 } 3760 3761 static void virtnet_set_rx_mode(struct net_device *dev) 3762 { 3763 struct virtnet_info *vi = netdev_priv(dev); 3764 3765 if (vi->rx_mode_work_enabled) 3766 schedule_work(&vi->rx_mode_work); 3767 } 3768 3769 static int virtnet_vlan_rx_add_vid(struct net_device *dev, 3770 __be16 proto, u16 vid) 3771 { 3772 struct virtnet_info *vi = netdev_priv(dev); 3773 __virtio16 *_vid __free(kfree) = NULL; 3774 struct scatterlist sg; 3775 3776 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3777 if (!_vid) 3778 return -ENOMEM; 3779 3780 *_vid = cpu_to_virtio16(vi->vdev, vid); 3781 sg_init_one(&sg, _vid, sizeof(*_vid)); 3782 3783 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3784 VIRTIO_NET_CTRL_VLAN_ADD, &sg)) 3785 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); 3786 return 0; 3787 } 3788 3789 static int virtnet_vlan_rx_kill_vid(struct net_device *dev, 3790 __be16 proto, u16 vid) 3791 { 3792 struct virtnet_info *vi = netdev_priv(dev); 3793 __virtio16 *_vid __free(kfree) = NULL; 3794 struct scatterlist sg; 3795 3796 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3797 if (!_vid) 3798 return -ENOMEM; 3799 3800 *_vid = cpu_to_virtio16(vi->vdev, vid); 3801 sg_init_one(&sg, _vid, sizeof(*_vid)); 3802 3803 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3804 VIRTIO_NET_CTRL_VLAN_DEL, &sg)) 3805 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); 3806 return 0; 3807 } 3808 3809 static void virtnet_clean_affinity(struct virtnet_info *vi) 3810 { 3811 int i; 3812 3813 if (vi->affinity_hint_set) { 3814 for (i = 0; i < vi->max_queue_pairs; i++) { 3815 virtqueue_set_affinity(vi->rq[i].vq, NULL); 3816 virtqueue_set_affinity(vi->sq[i].vq, NULL); 3817 } 3818 3819 vi->affinity_hint_set = false; 3820 } 3821 } 3822 3823 static void virtnet_set_affinity(struct virtnet_info *vi) 3824 { 3825 cpumask_var_t mask; 3826 int stragglers; 3827 int group_size; 3828 int i, j, cpu; 3829 int num_cpu; 3830 int stride; 3831 3832 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 3833 virtnet_clean_affinity(vi); 3834 return; 3835 } 3836 3837 num_cpu = num_online_cpus(); 3838 stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); 3839 stragglers = num_cpu >= vi->curr_queue_pairs ? 3840 num_cpu % vi->curr_queue_pairs : 3841 0; 3842 cpu = cpumask_first(cpu_online_mask); 3843 3844 for (i = 0; i < vi->curr_queue_pairs; i++) { 3845 group_size = stride + (i < stragglers ? 1 : 0); 3846 3847 for (j = 0; j < group_size; j++) { 3848 cpumask_set_cpu(cpu, mask); 3849 cpu = cpumask_next_wrap(cpu, cpu_online_mask, 3850 nr_cpu_ids, false); 3851 } 3852 virtqueue_set_affinity(vi->rq[i].vq, mask); 3853 virtqueue_set_affinity(vi->sq[i].vq, mask); 3854 __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS); 3855 cpumask_clear(mask); 3856 } 3857 3858 vi->affinity_hint_set = true; 3859 free_cpumask_var(mask); 3860 } 3861 3862 static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) 3863 { 3864 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3865 node); 3866 virtnet_set_affinity(vi); 3867 return 0; 3868 } 3869 3870 static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node) 3871 { 3872 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3873 node_dead); 3874 virtnet_set_affinity(vi); 3875 return 0; 3876 } 3877 3878 static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node) 3879 { 3880 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3881 node); 3882 3883 virtnet_clean_affinity(vi); 3884 return 0; 3885 } 3886 3887 static enum cpuhp_state virtionet_online; 3888 3889 static int virtnet_cpu_notif_add(struct virtnet_info *vi) 3890 { 3891 int ret; 3892 3893 ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node); 3894 if (ret) 3895 return ret; 3896 ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD, 3897 &vi->node_dead); 3898 if (!ret) 3899 return ret; 3900 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 3901 return ret; 3902 } 3903 3904 static void virtnet_cpu_notif_remove(struct virtnet_info *vi) 3905 { 3906 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 3907 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD, 3908 &vi->node_dead); 3909 } 3910 3911 static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi, 3912 u16 vqn, u32 max_usecs, u32 max_packets) 3913 { 3914 struct virtio_net_ctrl_coal_vq *coal_vq __free(kfree) = NULL; 3915 struct scatterlist sgs; 3916 3917 coal_vq = kzalloc(sizeof(*coal_vq), GFP_KERNEL); 3918 if (!coal_vq) 3919 return -ENOMEM; 3920 3921 coal_vq->vqn = cpu_to_le16(vqn); 3922 coal_vq->coal.max_usecs = cpu_to_le32(max_usecs); 3923 coal_vq->coal.max_packets = cpu_to_le32(max_packets); 3924 sg_init_one(&sgs, coal_vq, sizeof(*coal_vq)); 3925 3926 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 3927 VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET, 3928 &sgs)) 3929 return -EINVAL; 3930 3931 return 0; 3932 } 3933 3934 static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 3935 u16 queue, u32 max_usecs, 3936 u32 max_packets) 3937 { 3938 int err; 3939 3940 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 3941 return -EOPNOTSUPP; 3942 3943 err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), 3944 max_usecs, max_packets); 3945 if (err) 3946 return err; 3947 3948 vi->rq[queue].intr_coal.max_usecs = max_usecs; 3949 vi->rq[queue].intr_coal.max_packets = max_packets; 3950 3951 return 0; 3952 } 3953 3954 static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 3955 u16 queue, u32 max_usecs, 3956 u32 max_packets) 3957 { 3958 int err; 3959 3960 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 3961 return -EOPNOTSUPP; 3962 3963 err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), 3964 max_usecs, max_packets); 3965 if (err) 3966 return err; 3967 3968 vi->sq[queue].intr_coal.max_usecs = max_usecs; 3969 vi->sq[queue].intr_coal.max_packets = max_packets; 3970 3971 return 0; 3972 } 3973 3974 static void virtnet_get_ringparam(struct net_device *dev, 3975 struct ethtool_ringparam *ring, 3976 struct kernel_ethtool_ringparam *kernel_ring, 3977 struct netlink_ext_ack *extack) 3978 { 3979 struct virtnet_info *vi = netdev_priv(dev); 3980 3981 ring->rx_max_pending = vi->rq[0].vq->num_max; 3982 ring->tx_max_pending = vi->sq[0].vq->num_max; 3983 ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 3984 ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 3985 } 3986 3987 static int virtnet_set_ringparam(struct net_device *dev, 3988 struct ethtool_ringparam *ring, 3989 struct kernel_ethtool_ringparam *kernel_ring, 3990 struct netlink_ext_ack *extack) 3991 { 3992 struct virtnet_info *vi = netdev_priv(dev); 3993 u32 rx_pending, tx_pending; 3994 struct receive_queue *rq; 3995 struct send_queue *sq; 3996 int i, err; 3997 3998 if (ring->rx_mini_pending || ring->rx_jumbo_pending) 3999 return -EINVAL; 4000 4001 rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 4002 tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 4003 4004 if (ring->rx_pending == rx_pending && 4005 ring->tx_pending == tx_pending) 4006 return 0; 4007 4008 if (ring->rx_pending > vi->rq[0].vq->num_max) 4009 return -EINVAL; 4010 4011 if (ring->tx_pending > vi->sq[0].vq->num_max) 4012 return -EINVAL; 4013 4014 for (i = 0; i < vi->max_queue_pairs; i++) { 4015 rq = vi->rq + i; 4016 sq = vi->sq + i; 4017 4018 if (ring->tx_pending != tx_pending) { 4019 err = virtnet_tx_resize(vi, sq, ring->tx_pending); 4020 if (err) 4021 return err; 4022 4023 /* Upon disabling and re-enabling a transmit virtqueue, the device must 4024 * set the coalescing parameters of the virtqueue to those configured 4025 * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver 4026 * did not set any TX coalescing parameters, to 0. 4027 */ 4028 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i, 4029 vi->intr_coal_tx.max_usecs, 4030 vi->intr_coal_tx.max_packets); 4031 4032 /* Don't break the tx resize action if the vq coalescing is not 4033 * supported. The same is true for rx resize below. 4034 */ 4035 if (err && err != -EOPNOTSUPP) 4036 return err; 4037 } 4038 4039 if (ring->rx_pending != rx_pending) { 4040 err = virtnet_rx_resize(vi, rq, ring->rx_pending); 4041 if (err) 4042 return err; 4043 4044 /* The reason is same as the transmit virtqueue reset */ 4045 mutex_lock(&vi->rq[i].dim_lock); 4046 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, i, 4047 vi->intr_coal_rx.max_usecs, 4048 vi->intr_coal_rx.max_packets); 4049 mutex_unlock(&vi->rq[i].dim_lock); 4050 if (err && err != -EOPNOTSUPP) 4051 return err; 4052 } 4053 } 4054 4055 return 0; 4056 } 4057 4058 static bool virtnet_commit_rss_command(struct virtnet_info *vi) 4059 { 4060 struct net_device *dev = vi->dev; 4061 struct scatterlist sgs[4]; 4062 unsigned int sg_buf_size; 4063 4064 /* prepare sgs */ 4065 sg_init_table(sgs, 4); 4066 4067 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, hash_cfg_reserved); 4068 sg_set_buf(&sgs[0], &vi->rss, sg_buf_size); 4069 4070 if (vi->has_rss) { 4071 sg_buf_size = sizeof(uint16_t) * vi->rss_indir_table_size; 4072 sg_set_buf(&sgs[1], vi->rss.indirection_table, sg_buf_size); 4073 } else { 4074 sg_set_buf(&sgs[1], &vi->rss.hash_cfg_reserved, sizeof(uint16_t)); 4075 } 4076 4077 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key) 4078 - offsetof(struct virtio_net_ctrl_rss, max_tx_vq); 4079 sg_set_buf(&sgs[2], &vi->rss.max_tx_vq, sg_buf_size); 4080 4081 sg_buf_size = vi->rss_key_size; 4082 sg_set_buf(&sgs[3], vi->rss.key, sg_buf_size); 4083 4084 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 4085 vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG 4086 : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) 4087 goto err; 4088 4089 return true; 4090 4091 err: 4092 dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n"); 4093 return false; 4094 4095 } 4096 4097 static void virtnet_init_default_rss(struct virtnet_info *vi) 4098 { 4099 vi->rss.hash_types = vi->rss_hash_types_supported; 4100 vi->rss_hash_types_saved = vi->rss_hash_types_supported; 4101 vi->rss.indirection_table_mask = vi->rss_indir_table_size 4102 ? vi->rss_indir_table_size - 1 : 0; 4103 vi->rss.unclassified_queue = 0; 4104 4105 virtnet_rss_update_by_qpairs(vi, vi->curr_queue_pairs); 4106 4107 vi->rss.hash_key_length = vi->rss_key_size; 4108 4109 netdev_rss_key_fill(vi->rss.key, vi->rss_key_size); 4110 } 4111 4112 static void virtnet_get_hashflow(const struct virtnet_info *vi, struct ethtool_rxnfc *info) 4113 { 4114 info->data = 0; 4115 switch (info->flow_type) { 4116 case TCP_V4_FLOW: 4117 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 4118 info->data = RXH_IP_SRC | RXH_IP_DST | 4119 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4120 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 4121 info->data = RXH_IP_SRC | RXH_IP_DST; 4122 } 4123 break; 4124 case TCP_V6_FLOW: 4125 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 4126 info->data = RXH_IP_SRC | RXH_IP_DST | 4127 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4128 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 4129 info->data = RXH_IP_SRC | RXH_IP_DST; 4130 } 4131 break; 4132 case UDP_V4_FLOW: 4133 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 4134 info->data = RXH_IP_SRC | RXH_IP_DST | 4135 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4136 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 4137 info->data = RXH_IP_SRC | RXH_IP_DST; 4138 } 4139 break; 4140 case UDP_V6_FLOW: 4141 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 4142 info->data = RXH_IP_SRC | RXH_IP_DST | 4143 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4144 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 4145 info->data = RXH_IP_SRC | RXH_IP_DST; 4146 } 4147 break; 4148 case IPV4_FLOW: 4149 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) 4150 info->data = RXH_IP_SRC | RXH_IP_DST; 4151 4152 break; 4153 case IPV6_FLOW: 4154 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) 4155 info->data = RXH_IP_SRC | RXH_IP_DST; 4156 4157 break; 4158 default: 4159 info->data = 0; 4160 break; 4161 } 4162 } 4163 4164 static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc *info) 4165 { 4166 u32 new_hashtypes = vi->rss_hash_types_saved; 4167 bool is_disable = info->data & RXH_DISCARD; 4168 bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3); 4169 4170 /* supports only 'sd', 'sdfn' and 'r' */ 4171 if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable)) 4172 return false; 4173 4174 switch (info->flow_type) { 4175 case TCP_V4_FLOW: 4176 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4); 4177 if (!is_disable) 4178 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 4179 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0); 4180 break; 4181 case UDP_V4_FLOW: 4182 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4); 4183 if (!is_disable) 4184 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 4185 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0); 4186 break; 4187 case IPV4_FLOW: 4188 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4; 4189 if (!is_disable) 4190 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4; 4191 break; 4192 case TCP_V6_FLOW: 4193 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6); 4194 if (!is_disable) 4195 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 4196 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0); 4197 break; 4198 case UDP_V6_FLOW: 4199 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6); 4200 if (!is_disable) 4201 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 4202 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0); 4203 break; 4204 case IPV6_FLOW: 4205 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6; 4206 if (!is_disable) 4207 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6; 4208 break; 4209 default: 4210 /* unsupported flow */ 4211 return false; 4212 } 4213 4214 /* if unsupported hashtype was set */ 4215 if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported)) 4216 return false; 4217 4218 if (new_hashtypes != vi->rss_hash_types_saved) { 4219 vi->rss_hash_types_saved = new_hashtypes; 4220 vi->rss.hash_types = vi->rss_hash_types_saved; 4221 if (vi->dev->features & NETIF_F_RXHASH) 4222 return virtnet_commit_rss_command(vi); 4223 } 4224 4225 return true; 4226 } 4227 4228 static void virtnet_get_drvinfo(struct net_device *dev, 4229 struct ethtool_drvinfo *info) 4230 { 4231 struct virtnet_info *vi = netdev_priv(dev); 4232 struct virtio_device *vdev = vi->vdev; 4233 4234 strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); 4235 strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version)); 4236 strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info)); 4237 4238 } 4239 4240 /* TODO: Eliminate OOO packets during switching */ 4241 static int virtnet_set_channels(struct net_device *dev, 4242 struct ethtool_channels *channels) 4243 { 4244 struct virtnet_info *vi = netdev_priv(dev); 4245 u16 queue_pairs = channels->combined_count; 4246 int err; 4247 4248 /* We don't support separate rx/tx channels. 4249 * We don't allow setting 'other' channels. 4250 */ 4251 if (channels->rx_count || channels->tx_count || channels->other_count) 4252 return -EINVAL; 4253 4254 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) 4255 return -EINVAL; 4256 4257 /* For now we don't support modifying channels while XDP is loaded 4258 * also when XDP is loaded all RX queues have XDP programs so we only 4259 * need to check a single RX queue. 4260 */ 4261 if (vi->rq[0].xdp_prog) 4262 return -EINVAL; 4263 4264 cpus_read_lock(); 4265 err = virtnet_set_queues(vi, queue_pairs); 4266 if (err) { 4267 cpus_read_unlock(); 4268 goto err; 4269 } 4270 virtnet_set_affinity(vi); 4271 cpus_read_unlock(); 4272 4273 netif_set_real_num_tx_queues(dev, queue_pairs); 4274 netif_set_real_num_rx_queues(dev, queue_pairs); 4275 err: 4276 return err; 4277 } 4278 4279 static void virtnet_stats_sprintf(u8 **p, const char *fmt, const char *noq_fmt, 4280 int num, int qid, const struct virtnet_stat_desc *desc) 4281 { 4282 int i; 4283 4284 if (qid < 0) { 4285 for (i = 0; i < num; ++i) 4286 ethtool_sprintf(p, noq_fmt, desc[i].desc); 4287 } else { 4288 for (i = 0; i < num; ++i) 4289 ethtool_sprintf(p, fmt, qid, desc[i].desc); 4290 } 4291 } 4292 4293 /* qid == -1: for rx/tx queue total field */ 4294 static void virtnet_get_stats_string(struct virtnet_info *vi, int type, int qid, u8 **data) 4295 { 4296 const struct virtnet_stat_desc *desc; 4297 const char *fmt, *noq_fmt; 4298 u8 *p = *data; 4299 u32 num; 4300 4301 if (type == VIRTNET_Q_TYPE_CQ && qid >= 0) { 4302 noq_fmt = "cq_hw_%s"; 4303 4304 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4305 desc = &virtnet_stats_cvq_desc[0]; 4306 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4307 4308 virtnet_stats_sprintf(&p, NULL, noq_fmt, num, -1, desc); 4309 } 4310 } 4311 4312 if (type == VIRTNET_Q_TYPE_RX) { 4313 fmt = "rx%u_%s"; 4314 noq_fmt = "rx_%s"; 4315 4316 desc = &virtnet_rq_stats_desc[0]; 4317 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4318 4319 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4320 4321 fmt = "rx%u_hw_%s"; 4322 noq_fmt = "rx_hw_%s"; 4323 4324 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4325 desc = &virtnet_stats_rx_basic_desc[0]; 4326 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4327 4328 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4329 } 4330 4331 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4332 desc = &virtnet_stats_rx_csum_desc[0]; 4333 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4334 4335 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4336 } 4337 4338 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4339 desc = &virtnet_stats_rx_speed_desc[0]; 4340 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4341 4342 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4343 } 4344 } 4345 4346 if (type == VIRTNET_Q_TYPE_TX) { 4347 fmt = "tx%u_%s"; 4348 noq_fmt = "tx_%s"; 4349 4350 desc = &virtnet_sq_stats_desc[0]; 4351 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4352 4353 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4354 4355 fmt = "tx%u_hw_%s"; 4356 noq_fmt = "tx_hw_%s"; 4357 4358 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4359 desc = &virtnet_stats_tx_basic_desc[0]; 4360 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4361 4362 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4363 } 4364 4365 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4366 desc = &virtnet_stats_tx_gso_desc[0]; 4367 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4368 4369 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4370 } 4371 4372 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4373 desc = &virtnet_stats_tx_speed_desc[0]; 4374 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4375 4376 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4377 } 4378 } 4379 4380 *data = p; 4381 } 4382 4383 struct virtnet_stats_ctx { 4384 /* The stats are write to qstats or ethtool -S */ 4385 bool to_qstat; 4386 4387 /* Used to calculate the offset inside the output buffer. */ 4388 u32 desc_num[3]; 4389 4390 /* The actual supported stat types. */ 4391 u64 bitmap[3]; 4392 4393 /* Used to calculate the reply buffer size. */ 4394 u32 size[3]; 4395 4396 /* Record the output buffer. */ 4397 u64 *data; 4398 }; 4399 4400 static void virtnet_stats_ctx_init(struct virtnet_info *vi, 4401 struct virtnet_stats_ctx *ctx, 4402 u64 *data, bool to_qstat) 4403 { 4404 u32 queue_type; 4405 4406 ctx->data = data; 4407 ctx->to_qstat = to_qstat; 4408 4409 if (to_qstat) { 4410 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4411 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4412 4413 queue_type = VIRTNET_Q_TYPE_RX; 4414 4415 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4416 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4417 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4418 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4419 } 4420 4421 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4422 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4423 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4424 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4425 } 4426 4427 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4428 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_GSO; 4429 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4430 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_gso); 4431 } 4432 4433 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4434 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4435 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4436 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4437 } 4438 4439 queue_type = VIRTNET_Q_TYPE_TX; 4440 4441 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4442 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4443 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4444 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4445 } 4446 4447 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4448 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_CSUM; 4449 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4450 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_csum); 4451 } 4452 4453 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4454 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4455 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4456 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4457 } 4458 4459 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4460 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4461 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4462 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4463 } 4464 4465 return; 4466 } 4467 4468 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc); 4469 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc); 4470 4471 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4472 queue_type = VIRTNET_Q_TYPE_CQ; 4473 4474 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_CVQ; 4475 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_cvq_desc); 4476 ctx->size[queue_type] += sizeof(struct virtio_net_stats_cvq); 4477 } 4478 4479 queue_type = VIRTNET_Q_TYPE_RX; 4480 4481 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4482 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4483 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4484 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4485 } 4486 4487 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4488 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4489 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4490 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4491 } 4492 4493 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4494 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4495 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4496 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4497 } 4498 4499 queue_type = VIRTNET_Q_TYPE_TX; 4500 4501 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4502 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4503 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4504 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4505 } 4506 4507 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4508 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4509 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4510 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4511 } 4512 4513 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4514 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4515 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4516 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4517 } 4518 } 4519 4520 /* stats_sum_queue - Calculate the sum of the same fields in sq or rq. 4521 * @sum: the position to store the sum values 4522 * @num: field num 4523 * @q_value: the first queue fields 4524 * @q_num: number of the queues 4525 */ 4526 static void stats_sum_queue(u64 *sum, u32 num, u64 *q_value, u32 q_num) 4527 { 4528 u32 step = num; 4529 int i, j; 4530 u64 *p; 4531 4532 for (i = 0; i < num; ++i) { 4533 p = sum + i; 4534 *p = 0; 4535 4536 for (j = 0; j < q_num; ++j) 4537 *p += *(q_value + i + j * step); 4538 } 4539 } 4540 4541 static void virtnet_fill_total_fields(struct virtnet_info *vi, 4542 struct virtnet_stats_ctx *ctx) 4543 { 4544 u64 *data, *first_rx_q, *first_tx_q; 4545 u32 num_cq, num_rx, num_tx; 4546 4547 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4548 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4549 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4550 4551 first_rx_q = ctx->data + num_rx + num_tx + num_cq; 4552 first_tx_q = first_rx_q + vi->curr_queue_pairs * num_rx; 4553 4554 data = ctx->data; 4555 4556 stats_sum_queue(data, num_rx, first_rx_q, vi->curr_queue_pairs); 4557 4558 data = ctx->data + num_rx; 4559 4560 stats_sum_queue(data, num_tx, first_tx_q, vi->curr_queue_pairs); 4561 } 4562 4563 static void virtnet_fill_stats_qstat(struct virtnet_info *vi, u32 qid, 4564 struct virtnet_stats_ctx *ctx, 4565 const u8 *base, bool drv_stats, u8 reply_type) 4566 { 4567 const struct virtnet_stat_desc *desc; 4568 const u64_stats_t *v_stat; 4569 u64 offset, bitmap; 4570 const __le64 *v; 4571 u32 queue_type; 4572 int i, num; 4573 4574 queue_type = vq_type(vi, qid); 4575 bitmap = ctx->bitmap[queue_type]; 4576 4577 if (drv_stats) { 4578 if (queue_type == VIRTNET_Q_TYPE_RX) { 4579 desc = &virtnet_rq_stats_desc_qstat[0]; 4580 num = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4581 } else { 4582 desc = &virtnet_sq_stats_desc_qstat[0]; 4583 num = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4584 } 4585 4586 for (i = 0; i < num; ++i) { 4587 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4588 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4589 ctx->data[offset] = u64_stats_read(v_stat); 4590 } 4591 return; 4592 } 4593 4594 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4595 desc = &virtnet_stats_rx_basic_desc_qstat[0]; 4596 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4597 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4598 goto found; 4599 } 4600 4601 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4602 desc = &virtnet_stats_rx_csum_desc_qstat[0]; 4603 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4604 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4605 goto found; 4606 } 4607 4608 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4609 desc = &virtnet_stats_rx_gso_desc_qstat[0]; 4610 num = ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4611 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO) 4612 goto found; 4613 } 4614 4615 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4616 desc = &virtnet_stats_rx_speed_desc_qstat[0]; 4617 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4618 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4619 goto found; 4620 } 4621 4622 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4623 desc = &virtnet_stats_tx_basic_desc_qstat[0]; 4624 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4625 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4626 goto found; 4627 } 4628 4629 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4630 desc = &virtnet_stats_tx_csum_desc_qstat[0]; 4631 num = ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4632 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM) 4633 goto found; 4634 } 4635 4636 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4637 desc = &virtnet_stats_tx_gso_desc_qstat[0]; 4638 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4639 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4640 goto found; 4641 } 4642 4643 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4644 desc = &virtnet_stats_tx_speed_desc_qstat[0]; 4645 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4646 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4647 goto found; 4648 } 4649 4650 return; 4651 4652 found: 4653 for (i = 0; i < num; ++i) { 4654 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4655 v = (const __le64 *)(base + desc[i].offset); 4656 ctx->data[offset] = le64_to_cpu(*v); 4657 } 4658 } 4659 4660 /* virtnet_fill_stats - copy the stats to qstats or ethtool -S 4661 * The stats source is the device or the driver. 4662 * 4663 * @vi: virtio net info 4664 * @qid: the vq id 4665 * @ctx: stats ctx (initiated by virtnet_stats_ctx_init()) 4666 * @base: pointer to the device reply or the driver stats structure. 4667 * @drv_stats: designate the base type (device reply, driver stats) 4668 * @type: the type of the device reply (if drv_stats is true, this must be zero) 4669 */ 4670 static void virtnet_fill_stats(struct virtnet_info *vi, u32 qid, 4671 struct virtnet_stats_ctx *ctx, 4672 const u8 *base, bool drv_stats, u8 reply_type) 4673 { 4674 u32 queue_type, num_rx, num_tx, num_cq; 4675 const struct virtnet_stat_desc *desc; 4676 const u64_stats_t *v_stat; 4677 u64 offset, bitmap; 4678 const __le64 *v; 4679 int i, num; 4680 4681 if (ctx->to_qstat) 4682 return virtnet_fill_stats_qstat(vi, qid, ctx, base, drv_stats, reply_type); 4683 4684 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4685 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4686 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4687 4688 queue_type = vq_type(vi, qid); 4689 bitmap = ctx->bitmap[queue_type]; 4690 4691 /* skip the total fields of pairs */ 4692 offset = num_rx + num_tx; 4693 4694 if (queue_type == VIRTNET_Q_TYPE_TX) { 4695 offset += num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2); 4696 4697 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4698 if (drv_stats) { 4699 desc = &virtnet_sq_stats_desc[0]; 4700 goto drv_stats; 4701 } 4702 4703 offset += num; 4704 4705 } else if (queue_type == VIRTNET_Q_TYPE_RX) { 4706 offset += num_cq + num_rx * (qid / 2); 4707 4708 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4709 if (drv_stats) { 4710 desc = &virtnet_rq_stats_desc[0]; 4711 goto drv_stats; 4712 } 4713 4714 offset += num; 4715 } 4716 4717 if (bitmap & VIRTIO_NET_STATS_TYPE_CVQ) { 4718 desc = &virtnet_stats_cvq_desc[0]; 4719 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4720 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_CVQ) 4721 goto found; 4722 4723 offset += num; 4724 } 4725 4726 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4727 desc = &virtnet_stats_rx_basic_desc[0]; 4728 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4729 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4730 goto found; 4731 4732 offset += num; 4733 } 4734 4735 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4736 desc = &virtnet_stats_rx_csum_desc[0]; 4737 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4738 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4739 goto found; 4740 4741 offset += num; 4742 } 4743 4744 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4745 desc = &virtnet_stats_rx_speed_desc[0]; 4746 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4747 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4748 goto found; 4749 4750 offset += num; 4751 } 4752 4753 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4754 desc = &virtnet_stats_tx_basic_desc[0]; 4755 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4756 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4757 goto found; 4758 4759 offset += num; 4760 } 4761 4762 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4763 desc = &virtnet_stats_tx_gso_desc[0]; 4764 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4765 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4766 goto found; 4767 4768 offset += num; 4769 } 4770 4771 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4772 desc = &virtnet_stats_tx_speed_desc[0]; 4773 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4774 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4775 goto found; 4776 4777 offset += num; 4778 } 4779 4780 return; 4781 4782 found: 4783 for (i = 0; i < num; ++i) { 4784 v = (const __le64 *)(base + desc[i].offset); 4785 ctx->data[offset + i] = le64_to_cpu(*v); 4786 } 4787 4788 return; 4789 4790 drv_stats: 4791 for (i = 0; i < num; ++i) { 4792 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4793 ctx->data[offset + i] = u64_stats_read(v_stat); 4794 } 4795 } 4796 4797 static int __virtnet_get_hw_stats(struct virtnet_info *vi, 4798 struct virtnet_stats_ctx *ctx, 4799 struct virtio_net_ctrl_queue_stats *req, 4800 int req_size, void *reply, int res_size) 4801 { 4802 struct virtio_net_stats_reply_hdr *hdr; 4803 struct scatterlist sgs_in, sgs_out; 4804 void *p; 4805 u32 qid; 4806 int ok; 4807 4808 sg_init_one(&sgs_out, req, req_size); 4809 sg_init_one(&sgs_in, reply, res_size); 4810 4811 ok = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 4812 VIRTIO_NET_CTRL_STATS_GET, 4813 &sgs_out, &sgs_in); 4814 4815 if (!ok) 4816 return ok; 4817 4818 for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) { 4819 hdr = p; 4820 qid = le16_to_cpu(hdr->vq_index); 4821 virtnet_fill_stats(vi, qid, ctx, p, false, hdr->type); 4822 } 4823 4824 return 0; 4825 } 4826 4827 static void virtnet_make_stat_req(struct virtnet_info *vi, 4828 struct virtnet_stats_ctx *ctx, 4829 struct virtio_net_ctrl_queue_stats *req, 4830 int qid, int *idx) 4831 { 4832 int qtype = vq_type(vi, qid); 4833 u64 bitmap = ctx->bitmap[qtype]; 4834 4835 if (!bitmap) 4836 return; 4837 4838 req->stats[*idx].vq_index = cpu_to_le16(qid); 4839 req->stats[*idx].types_bitmap[0] = cpu_to_le64(bitmap); 4840 *idx += 1; 4841 } 4842 4843 /* qid: -1: get stats of all vq. 4844 * > 0: get the stats for the special vq. This must not be cvq. 4845 */ 4846 static int virtnet_get_hw_stats(struct virtnet_info *vi, 4847 struct virtnet_stats_ctx *ctx, int qid) 4848 { 4849 int qnum, i, j, res_size, qtype, last_vq, first_vq; 4850 struct virtio_net_ctrl_queue_stats *req; 4851 bool enable_cvq; 4852 void *reply; 4853 int ok; 4854 4855 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) 4856 return 0; 4857 4858 if (qid == -1) { 4859 last_vq = vi->curr_queue_pairs * 2 - 1; 4860 first_vq = 0; 4861 enable_cvq = true; 4862 } else { 4863 last_vq = qid; 4864 first_vq = qid; 4865 enable_cvq = false; 4866 } 4867 4868 qnum = 0; 4869 res_size = 0; 4870 for (i = first_vq; i <= last_vq ; ++i) { 4871 qtype = vq_type(vi, i); 4872 if (ctx->bitmap[qtype]) { 4873 ++qnum; 4874 res_size += ctx->size[qtype]; 4875 } 4876 } 4877 4878 if (enable_cvq && ctx->bitmap[VIRTNET_Q_TYPE_CQ]) { 4879 res_size += ctx->size[VIRTNET_Q_TYPE_CQ]; 4880 qnum += 1; 4881 } 4882 4883 req = kcalloc(qnum, sizeof(*req), GFP_KERNEL); 4884 if (!req) 4885 return -ENOMEM; 4886 4887 reply = kmalloc(res_size, GFP_KERNEL); 4888 if (!reply) { 4889 kfree(req); 4890 return -ENOMEM; 4891 } 4892 4893 j = 0; 4894 for (i = first_vq; i <= last_vq ; ++i) 4895 virtnet_make_stat_req(vi, ctx, req, i, &j); 4896 4897 if (enable_cvq) 4898 virtnet_make_stat_req(vi, ctx, req, vi->max_queue_pairs * 2, &j); 4899 4900 ok = __virtnet_get_hw_stats(vi, ctx, req, sizeof(*req) * j, reply, res_size); 4901 4902 kfree(req); 4903 kfree(reply); 4904 4905 return ok; 4906 } 4907 4908 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) 4909 { 4910 struct virtnet_info *vi = netdev_priv(dev); 4911 unsigned int i; 4912 u8 *p = data; 4913 4914 switch (stringset) { 4915 case ETH_SS_STATS: 4916 /* Generate the total field names. */ 4917 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, -1, &p); 4918 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, -1, &p); 4919 4920 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_CQ, 0, &p); 4921 4922 for (i = 0; i < vi->curr_queue_pairs; ++i) 4923 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, i, &p); 4924 4925 for (i = 0; i < vi->curr_queue_pairs; ++i) 4926 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, i, &p); 4927 break; 4928 } 4929 } 4930 4931 static int virtnet_get_sset_count(struct net_device *dev, int sset) 4932 { 4933 struct virtnet_info *vi = netdev_priv(dev); 4934 struct virtnet_stats_ctx ctx = {0}; 4935 u32 pair_count; 4936 4937 switch (sset) { 4938 case ETH_SS_STATS: 4939 virtnet_stats_ctx_init(vi, &ctx, NULL, false); 4940 4941 pair_count = ctx.desc_num[VIRTNET_Q_TYPE_RX] + ctx.desc_num[VIRTNET_Q_TYPE_TX]; 4942 4943 return pair_count + ctx.desc_num[VIRTNET_Q_TYPE_CQ] + 4944 vi->curr_queue_pairs * pair_count; 4945 default: 4946 return -EOPNOTSUPP; 4947 } 4948 } 4949 4950 static void virtnet_get_ethtool_stats(struct net_device *dev, 4951 struct ethtool_stats *stats, u64 *data) 4952 { 4953 struct virtnet_info *vi = netdev_priv(dev); 4954 struct virtnet_stats_ctx ctx = {0}; 4955 unsigned int start, i; 4956 const u8 *stats_base; 4957 4958 virtnet_stats_ctx_init(vi, &ctx, data, false); 4959 if (virtnet_get_hw_stats(vi, &ctx, -1)) 4960 dev_warn(&vi->dev->dev, "Failed to get hw stats.\n"); 4961 4962 for (i = 0; i < vi->curr_queue_pairs; i++) { 4963 struct receive_queue *rq = &vi->rq[i]; 4964 struct send_queue *sq = &vi->sq[i]; 4965 4966 stats_base = (const u8 *)&rq->stats; 4967 do { 4968 start = u64_stats_fetch_begin(&rq->stats.syncp); 4969 virtnet_fill_stats(vi, i * 2, &ctx, stats_base, true, 0); 4970 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 4971 4972 stats_base = (const u8 *)&sq->stats; 4973 do { 4974 start = u64_stats_fetch_begin(&sq->stats.syncp); 4975 virtnet_fill_stats(vi, i * 2 + 1, &ctx, stats_base, true, 0); 4976 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 4977 } 4978 4979 virtnet_fill_total_fields(vi, &ctx); 4980 } 4981 4982 static void virtnet_get_channels(struct net_device *dev, 4983 struct ethtool_channels *channels) 4984 { 4985 struct virtnet_info *vi = netdev_priv(dev); 4986 4987 channels->combined_count = vi->curr_queue_pairs; 4988 channels->max_combined = vi->max_queue_pairs; 4989 channels->max_other = 0; 4990 channels->rx_count = 0; 4991 channels->tx_count = 0; 4992 channels->other_count = 0; 4993 } 4994 4995 static int virtnet_set_link_ksettings(struct net_device *dev, 4996 const struct ethtool_link_ksettings *cmd) 4997 { 4998 struct virtnet_info *vi = netdev_priv(dev); 4999 5000 return ethtool_virtdev_set_link_ksettings(dev, cmd, 5001 &vi->speed, &vi->duplex); 5002 } 5003 5004 static int virtnet_get_link_ksettings(struct net_device *dev, 5005 struct ethtool_link_ksettings *cmd) 5006 { 5007 struct virtnet_info *vi = netdev_priv(dev); 5008 5009 cmd->base.speed = vi->speed; 5010 cmd->base.duplex = vi->duplex; 5011 cmd->base.port = PORT_OTHER; 5012 5013 return 0; 5014 } 5015 5016 static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info *vi, 5017 struct ethtool_coalesce *ec) 5018 { 5019 struct virtio_net_ctrl_coal_tx *coal_tx __free(kfree) = NULL; 5020 struct scatterlist sgs_tx; 5021 int i; 5022 5023 coal_tx = kzalloc(sizeof(*coal_tx), GFP_KERNEL); 5024 if (!coal_tx) 5025 return -ENOMEM; 5026 5027 coal_tx->tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); 5028 coal_tx->tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); 5029 sg_init_one(&sgs_tx, coal_tx, sizeof(*coal_tx)); 5030 5031 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 5032 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, 5033 &sgs_tx)) 5034 return -EINVAL; 5035 5036 vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs; 5037 vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames; 5038 for (i = 0; i < vi->max_queue_pairs; i++) { 5039 vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs; 5040 vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames; 5041 } 5042 5043 return 0; 5044 } 5045 5046 static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi, 5047 struct ethtool_coalesce *ec) 5048 { 5049 struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL; 5050 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 5051 struct scatterlist sgs_rx; 5052 int i; 5053 5054 if (rx_ctrl_dim_on && !virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5055 return -EOPNOTSUPP; 5056 5057 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != vi->intr_coal_rx.max_usecs || 5058 ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets)) 5059 return -EINVAL; 5060 5061 if (rx_ctrl_dim_on && !vi->rx_dim_enabled) { 5062 vi->rx_dim_enabled = true; 5063 for (i = 0; i < vi->max_queue_pairs; i++) { 5064 mutex_lock(&vi->rq[i].dim_lock); 5065 vi->rq[i].dim_enabled = true; 5066 mutex_unlock(&vi->rq[i].dim_lock); 5067 } 5068 return 0; 5069 } 5070 5071 coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL); 5072 if (!coal_rx) 5073 return -ENOMEM; 5074 5075 if (!rx_ctrl_dim_on && vi->rx_dim_enabled) { 5076 vi->rx_dim_enabled = false; 5077 for (i = 0; i < vi->max_queue_pairs; i++) { 5078 mutex_lock(&vi->rq[i].dim_lock); 5079 vi->rq[i].dim_enabled = false; 5080 mutex_unlock(&vi->rq[i].dim_lock); 5081 } 5082 } 5083 5084 /* Since the per-queue coalescing params can be set, 5085 * we need apply the global new params even if they 5086 * are not updated. 5087 */ 5088 coal_rx->rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); 5089 coal_rx->rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); 5090 sg_init_one(&sgs_rx, coal_rx, sizeof(*coal_rx)); 5091 5092 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 5093 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, 5094 &sgs_rx)) 5095 return -EINVAL; 5096 5097 vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs; 5098 vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames; 5099 for (i = 0; i < vi->max_queue_pairs; i++) { 5100 mutex_lock(&vi->rq[i].dim_lock); 5101 vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs; 5102 vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames; 5103 mutex_unlock(&vi->rq[i].dim_lock); 5104 } 5105 5106 return 0; 5107 } 5108 5109 static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, 5110 struct ethtool_coalesce *ec) 5111 { 5112 int err; 5113 5114 err = virtnet_send_tx_notf_coal_cmds(vi, ec); 5115 if (err) 5116 return err; 5117 5118 err = virtnet_send_rx_notf_coal_cmds(vi, ec); 5119 if (err) 5120 return err; 5121 5122 return 0; 5123 } 5124 5125 static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi, 5126 struct ethtool_coalesce *ec, 5127 u16 queue) 5128 { 5129 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 5130 u32 max_usecs, max_packets; 5131 bool cur_rx_dim; 5132 int err; 5133 5134 mutex_lock(&vi->rq[queue].dim_lock); 5135 cur_rx_dim = vi->rq[queue].dim_enabled; 5136 max_usecs = vi->rq[queue].intr_coal.max_usecs; 5137 max_packets = vi->rq[queue].intr_coal.max_packets; 5138 5139 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != max_usecs || 5140 ec->rx_max_coalesced_frames != max_packets)) { 5141 mutex_unlock(&vi->rq[queue].dim_lock); 5142 return -EINVAL; 5143 } 5144 5145 if (rx_ctrl_dim_on && !cur_rx_dim) { 5146 vi->rq[queue].dim_enabled = true; 5147 mutex_unlock(&vi->rq[queue].dim_lock); 5148 return 0; 5149 } 5150 5151 if (!rx_ctrl_dim_on && cur_rx_dim) 5152 vi->rq[queue].dim_enabled = false; 5153 5154 /* If no params are updated, userspace ethtool will 5155 * reject the modification. 5156 */ 5157 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue, 5158 ec->rx_coalesce_usecs, 5159 ec->rx_max_coalesced_frames); 5160 mutex_unlock(&vi->rq[queue].dim_lock); 5161 return err; 5162 } 5163 5164 static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi, 5165 struct ethtool_coalesce *ec, 5166 u16 queue) 5167 { 5168 int err; 5169 5170 err = virtnet_send_rx_notf_coal_vq_cmds(vi, ec, queue); 5171 if (err) 5172 return err; 5173 5174 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue, 5175 ec->tx_coalesce_usecs, 5176 ec->tx_max_coalesced_frames); 5177 if (err) 5178 return err; 5179 5180 return 0; 5181 } 5182 5183 static void virtnet_rx_dim_work(struct work_struct *work) 5184 { 5185 struct dim *dim = container_of(work, struct dim, work); 5186 struct receive_queue *rq = container_of(dim, 5187 struct receive_queue, dim); 5188 struct virtnet_info *vi = rq->vq->vdev->priv; 5189 struct net_device *dev = vi->dev; 5190 struct dim_cq_moder update_moder; 5191 int qnum, err; 5192 5193 qnum = rq - vi->rq; 5194 5195 mutex_lock(&rq->dim_lock); 5196 if (!rq->dim_enabled) 5197 goto out; 5198 5199 update_moder = net_dim_get_rx_irq_moder(dev, dim); 5200 if (update_moder.usec != rq->intr_coal.max_usecs || 5201 update_moder.pkts != rq->intr_coal.max_packets) { 5202 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum, 5203 update_moder.usec, 5204 update_moder.pkts); 5205 if (err) 5206 pr_debug("%s: Failed to send dim parameters on rxq%d\n", 5207 dev->name, qnum); 5208 } 5209 out: 5210 dim->state = DIM_START_MEASURE; 5211 mutex_unlock(&rq->dim_lock); 5212 } 5213 5214 static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) 5215 { 5216 /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL 5217 * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated. 5218 */ 5219 if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) 5220 return -EOPNOTSUPP; 5221 5222 if (ec->tx_max_coalesced_frames > 1 || 5223 ec->rx_max_coalesced_frames != 1) 5224 return -EINVAL; 5225 5226 return 0; 5227 } 5228 5229 static int virtnet_should_update_vq_weight(int dev_flags, int weight, 5230 int vq_weight, bool *should_update) 5231 { 5232 if (weight ^ vq_weight) { 5233 if (dev_flags & IFF_UP) 5234 return -EBUSY; 5235 *should_update = true; 5236 } 5237 5238 return 0; 5239 } 5240 5241 static int virtnet_set_coalesce(struct net_device *dev, 5242 struct ethtool_coalesce *ec, 5243 struct kernel_ethtool_coalesce *kernel_coal, 5244 struct netlink_ext_ack *extack) 5245 { 5246 struct virtnet_info *vi = netdev_priv(dev); 5247 int ret, queue_number, napi_weight, i; 5248 bool update_napi = false; 5249 5250 /* Can't change NAPI weight if the link is up */ 5251 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5252 for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) { 5253 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5254 vi->sq[queue_number].napi.weight, 5255 &update_napi); 5256 if (ret) 5257 return ret; 5258 5259 if (update_napi) { 5260 /* All queues that belong to [queue_number, vi->max_queue_pairs] will be 5261 * updated for the sake of simplicity, which might not be necessary 5262 */ 5263 break; 5264 } 5265 } 5266 5267 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) 5268 ret = virtnet_send_notf_coal_cmds(vi, ec); 5269 else 5270 ret = virtnet_coal_params_supported(ec); 5271 5272 if (ret) 5273 return ret; 5274 5275 if (update_napi) { 5276 /* xsk xmit depends on the tx napi. So if xsk is active, 5277 * prevent modifications to tx napi. 5278 */ 5279 for (i = queue_number; i < vi->max_queue_pairs; i++) { 5280 if (vi->sq[i].xsk_pool) 5281 return -EBUSY; 5282 } 5283 5284 for (; queue_number < vi->max_queue_pairs; queue_number++) 5285 vi->sq[queue_number].napi.weight = napi_weight; 5286 } 5287 5288 return ret; 5289 } 5290 5291 static int virtnet_get_coalesce(struct net_device *dev, 5292 struct ethtool_coalesce *ec, 5293 struct kernel_ethtool_coalesce *kernel_coal, 5294 struct netlink_ext_ack *extack) 5295 { 5296 struct virtnet_info *vi = netdev_priv(dev); 5297 5298 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 5299 ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs; 5300 ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs; 5301 ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets; 5302 ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets; 5303 ec->use_adaptive_rx_coalesce = vi->rx_dim_enabled; 5304 } else { 5305 ec->rx_max_coalesced_frames = 1; 5306 5307 if (vi->sq[0].napi.weight) 5308 ec->tx_max_coalesced_frames = 1; 5309 } 5310 5311 return 0; 5312 } 5313 5314 static int virtnet_set_per_queue_coalesce(struct net_device *dev, 5315 u32 queue, 5316 struct ethtool_coalesce *ec) 5317 { 5318 struct virtnet_info *vi = netdev_priv(dev); 5319 int ret, napi_weight; 5320 bool update_napi = false; 5321 5322 if (queue >= vi->max_queue_pairs) 5323 return -EINVAL; 5324 5325 /* Can't change NAPI weight if the link is up */ 5326 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5327 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5328 vi->sq[queue].napi.weight, 5329 &update_napi); 5330 if (ret) 5331 return ret; 5332 5333 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5334 ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue); 5335 else 5336 ret = virtnet_coal_params_supported(ec); 5337 5338 if (ret) 5339 return ret; 5340 5341 if (update_napi) 5342 vi->sq[queue].napi.weight = napi_weight; 5343 5344 return 0; 5345 } 5346 5347 static int virtnet_get_per_queue_coalesce(struct net_device *dev, 5348 u32 queue, 5349 struct ethtool_coalesce *ec) 5350 { 5351 struct virtnet_info *vi = netdev_priv(dev); 5352 5353 if (queue >= vi->max_queue_pairs) 5354 return -EINVAL; 5355 5356 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 5357 mutex_lock(&vi->rq[queue].dim_lock); 5358 ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs; 5359 ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs; 5360 ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets; 5361 ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets; 5362 ec->use_adaptive_rx_coalesce = vi->rq[queue].dim_enabled; 5363 mutex_unlock(&vi->rq[queue].dim_lock); 5364 } else { 5365 ec->rx_max_coalesced_frames = 1; 5366 5367 if (vi->sq[queue].napi.weight) 5368 ec->tx_max_coalesced_frames = 1; 5369 } 5370 5371 return 0; 5372 } 5373 5374 static void virtnet_init_settings(struct net_device *dev) 5375 { 5376 struct virtnet_info *vi = netdev_priv(dev); 5377 5378 vi->speed = SPEED_UNKNOWN; 5379 vi->duplex = DUPLEX_UNKNOWN; 5380 } 5381 5382 static u32 virtnet_get_rxfh_key_size(struct net_device *dev) 5383 { 5384 return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size; 5385 } 5386 5387 static u32 virtnet_get_rxfh_indir_size(struct net_device *dev) 5388 { 5389 return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size; 5390 } 5391 5392 static int virtnet_get_rxfh(struct net_device *dev, 5393 struct ethtool_rxfh_param *rxfh) 5394 { 5395 struct virtnet_info *vi = netdev_priv(dev); 5396 int i; 5397 5398 if (rxfh->indir) { 5399 for (i = 0; i < vi->rss_indir_table_size; ++i) 5400 rxfh->indir[i] = vi->rss.indirection_table[i]; 5401 } 5402 5403 if (rxfh->key) 5404 memcpy(rxfh->key, vi->rss.key, vi->rss_key_size); 5405 5406 rxfh->hfunc = ETH_RSS_HASH_TOP; 5407 5408 return 0; 5409 } 5410 5411 static int virtnet_set_rxfh(struct net_device *dev, 5412 struct ethtool_rxfh_param *rxfh, 5413 struct netlink_ext_ack *extack) 5414 { 5415 struct virtnet_info *vi = netdev_priv(dev); 5416 bool update = false; 5417 int i; 5418 5419 if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && 5420 rxfh->hfunc != ETH_RSS_HASH_TOP) 5421 return -EOPNOTSUPP; 5422 5423 if (rxfh->indir) { 5424 if (!vi->has_rss) 5425 return -EOPNOTSUPP; 5426 5427 for (i = 0; i < vi->rss_indir_table_size; ++i) 5428 vi->rss.indirection_table[i] = rxfh->indir[i]; 5429 update = true; 5430 } 5431 5432 if (rxfh->key) { 5433 /* If either _F_HASH_REPORT or _F_RSS are negotiated, the 5434 * device provides hash calculation capabilities, that is, 5435 * hash_key is configured. 5436 */ 5437 if (!vi->has_rss && !vi->has_rss_hash_report) 5438 return -EOPNOTSUPP; 5439 5440 memcpy(vi->rss.key, rxfh->key, vi->rss_key_size); 5441 update = true; 5442 } 5443 5444 if (update) 5445 virtnet_commit_rss_command(vi); 5446 5447 return 0; 5448 } 5449 5450 static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) 5451 { 5452 struct virtnet_info *vi = netdev_priv(dev); 5453 int rc = 0; 5454 5455 switch (info->cmd) { 5456 case ETHTOOL_GRXRINGS: 5457 info->data = vi->curr_queue_pairs; 5458 break; 5459 case ETHTOOL_GRXFH: 5460 virtnet_get_hashflow(vi, info); 5461 break; 5462 default: 5463 rc = -EOPNOTSUPP; 5464 } 5465 5466 return rc; 5467 } 5468 5469 static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) 5470 { 5471 struct virtnet_info *vi = netdev_priv(dev); 5472 int rc = 0; 5473 5474 switch (info->cmd) { 5475 case ETHTOOL_SRXFH: 5476 if (!virtnet_set_hashflow(vi, info)) 5477 rc = -EINVAL; 5478 5479 break; 5480 default: 5481 rc = -EOPNOTSUPP; 5482 } 5483 5484 return rc; 5485 } 5486 5487 static const struct ethtool_ops virtnet_ethtool_ops = { 5488 .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | 5489 ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX, 5490 .get_drvinfo = virtnet_get_drvinfo, 5491 .get_link = ethtool_op_get_link, 5492 .get_ringparam = virtnet_get_ringparam, 5493 .set_ringparam = virtnet_set_ringparam, 5494 .get_strings = virtnet_get_strings, 5495 .get_sset_count = virtnet_get_sset_count, 5496 .get_ethtool_stats = virtnet_get_ethtool_stats, 5497 .set_channels = virtnet_set_channels, 5498 .get_channels = virtnet_get_channels, 5499 .get_ts_info = ethtool_op_get_ts_info, 5500 .get_link_ksettings = virtnet_get_link_ksettings, 5501 .set_link_ksettings = virtnet_set_link_ksettings, 5502 .set_coalesce = virtnet_set_coalesce, 5503 .get_coalesce = virtnet_get_coalesce, 5504 .set_per_queue_coalesce = virtnet_set_per_queue_coalesce, 5505 .get_per_queue_coalesce = virtnet_get_per_queue_coalesce, 5506 .get_rxfh_key_size = virtnet_get_rxfh_key_size, 5507 .get_rxfh_indir_size = virtnet_get_rxfh_indir_size, 5508 .get_rxfh = virtnet_get_rxfh, 5509 .set_rxfh = virtnet_set_rxfh, 5510 .get_rxnfc = virtnet_get_rxnfc, 5511 .set_rxnfc = virtnet_set_rxnfc, 5512 }; 5513 5514 static void virtnet_get_queue_stats_rx(struct net_device *dev, int i, 5515 struct netdev_queue_stats_rx *stats) 5516 { 5517 struct virtnet_info *vi = netdev_priv(dev); 5518 struct receive_queue *rq = &vi->rq[i]; 5519 struct virtnet_stats_ctx ctx = {0}; 5520 5521 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5522 5523 virtnet_get_hw_stats(vi, &ctx, i * 2); 5524 virtnet_fill_stats(vi, i * 2, &ctx, (void *)&rq->stats, true, 0); 5525 } 5526 5527 static void virtnet_get_queue_stats_tx(struct net_device *dev, int i, 5528 struct netdev_queue_stats_tx *stats) 5529 { 5530 struct virtnet_info *vi = netdev_priv(dev); 5531 struct send_queue *sq = &vi->sq[i]; 5532 struct virtnet_stats_ctx ctx = {0}; 5533 5534 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5535 5536 virtnet_get_hw_stats(vi, &ctx, i * 2 + 1); 5537 virtnet_fill_stats(vi, i * 2 + 1, &ctx, (void *)&sq->stats, true, 0); 5538 } 5539 5540 static void virtnet_get_base_stats(struct net_device *dev, 5541 struct netdev_queue_stats_rx *rx, 5542 struct netdev_queue_stats_tx *tx) 5543 { 5544 struct virtnet_info *vi = netdev_priv(dev); 5545 5546 /* The queue stats of the virtio-net will not be reset. So here we 5547 * return 0. 5548 */ 5549 rx->bytes = 0; 5550 rx->packets = 0; 5551 5552 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 5553 rx->hw_drops = 0; 5554 rx->hw_drop_overruns = 0; 5555 } 5556 5557 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 5558 rx->csum_unnecessary = 0; 5559 rx->csum_none = 0; 5560 rx->csum_bad = 0; 5561 } 5562 5563 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 5564 rx->hw_gro_packets = 0; 5565 rx->hw_gro_bytes = 0; 5566 rx->hw_gro_wire_packets = 0; 5567 rx->hw_gro_wire_bytes = 0; 5568 } 5569 5570 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) 5571 rx->hw_drop_ratelimits = 0; 5572 5573 tx->bytes = 0; 5574 tx->packets = 0; 5575 tx->stop = 0; 5576 tx->wake = 0; 5577 5578 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 5579 tx->hw_drops = 0; 5580 tx->hw_drop_errors = 0; 5581 } 5582 5583 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 5584 tx->csum_none = 0; 5585 tx->needs_csum = 0; 5586 } 5587 5588 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 5589 tx->hw_gso_packets = 0; 5590 tx->hw_gso_bytes = 0; 5591 tx->hw_gso_wire_packets = 0; 5592 tx->hw_gso_wire_bytes = 0; 5593 } 5594 5595 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) 5596 tx->hw_drop_ratelimits = 0; 5597 } 5598 5599 static const struct netdev_stat_ops virtnet_stat_ops = { 5600 .get_queue_stats_rx = virtnet_get_queue_stats_rx, 5601 .get_queue_stats_tx = virtnet_get_queue_stats_tx, 5602 .get_base_stats = virtnet_get_base_stats, 5603 }; 5604 5605 static void virtnet_freeze_down(struct virtio_device *vdev) 5606 { 5607 struct virtnet_info *vi = vdev->priv; 5608 5609 /* Make sure no work handler is accessing the device */ 5610 flush_work(&vi->config_work); 5611 disable_rx_mode_work(vi); 5612 flush_work(&vi->rx_mode_work); 5613 5614 netif_tx_lock_bh(vi->dev); 5615 netif_device_detach(vi->dev); 5616 netif_tx_unlock_bh(vi->dev); 5617 if (netif_running(vi->dev)) 5618 virtnet_close(vi->dev); 5619 } 5620 5621 static int init_vqs(struct virtnet_info *vi); 5622 5623 static int virtnet_restore_up(struct virtio_device *vdev) 5624 { 5625 struct virtnet_info *vi = vdev->priv; 5626 int err; 5627 5628 err = init_vqs(vi); 5629 if (err) 5630 return err; 5631 5632 virtio_device_ready(vdev); 5633 5634 enable_delayed_refill(vi); 5635 enable_rx_mode_work(vi); 5636 5637 if (netif_running(vi->dev)) { 5638 err = virtnet_open(vi->dev); 5639 if (err) 5640 return err; 5641 } 5642 5643 netif_tx_lock_bh(vi->dev); 5644 netif_device_attach(vi->dev); 5645 netif_tx_unlock_bh(vi->dev); 5646 return err; 5647 } 5648 5649 static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) 5650 { 5651 __virtio64 *_offloads __free(kfree) = NULL; 5652 struct scatterlist sg; 5653 5654 _offloads = kzalloc(sizeof(*_offloads), GFP_KERNEL); 5655 if (!_offloads) 5656 return -ENOMEM; 5657 5658 *_offloads = cpu_to_virtio64(vi->vdev, offloads); 5659 5660 sg_init_one(&sg, _offloads, sizeof(*_offloads)); 5661 5662 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, 5663 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) { 5664 dev_warn(&vi->dev->dev, "Fail to set guest offload.\n"); 5665 return -EINVAL; 5666 } 5667 5668 return 0; 5669 } 5670 5671 static int virtnet_clear_guest_offloads(struct virtnet_info *vi) 5672 { 5673 u64 offloads = 0; 5674 5675 if (!vi->guest_offloads) 5676 return 0; 5677 5678 return virtnet_set_guest_offloads(vi, offloads); 5679 } 5680 5681 static int virtnet_restore_guest_offloads(struct virtnet_info *vi) 5682 { 5683 u64 offloads = vi->guest_offloads; 5684 5685 if (!vi->guest_offloads) 5686 return 0; 5687 5688 return virtnet_set_guest_offloads(vi, offloads); 5689 } 5690 5691 static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queue *rq, 5692 struct xsk_buff_pool *pool) 5693 { 5694 int err, qindex; 5695 5696 qindex = rq - vi->rq; 5697 5698 if (pool) { 5699 err = xdp_rxq_info_reg(&rq->xsk_rxq_info, vi->dev, qindex, rq->napi.napi_id); 5700 if (err < 0) 5701 return err; 5702 5703 err = xdp_rxq_info_reg_mem_model(&rq->xsk_rxq_info, 5704 MEM_TYPE_XSK_BUFF_POOL, NULL); 5705 if (err < 0) 5706 goto unreg; 5707 5708 xsk_pool_set_rxq_info(pool, &rq->xsk_rxq_info); 5709 } 5710 5711 virtnet_rx_pause(vi, rq); 5712 5713 err = virtqueue_reset(rq->vq, virtnet_rq_unmap_free_buf); 5714 if (err) { 5715 netdev_err(vi->dev, "reset rx fail: rx queue index: %d err: %d\n", qindex, err); 5716 5717 pool = NULL; 5718 } 5719 5720 rq->xsk_pool = pool; 5721 5722 virtnet_rx_resume(vi, rq); 5723 5724 if (pool) 5725 return 0; 5726 5727 unreg: 5728 xdp_rxq_info_unreg(&rq->xsk_rxq_info); 5729 return err; 5730 } 5731 5732 static int virtnet_sq_bind_xsk_pool(struct virtnet_info *vi, 5733 struct send_queue *sq, 5734 struct xsk_buff_pool *pool) 5735 { 5736 int err, qindex; 5737 5738 qindex = sq - vi->sq; 5739 5740 virtnet_tx_pause(vi, sq); 5741 5742 err = virtqueue_reset(sq->vq, virtnet_sq_free_unused_buf); 5743 if (err) { 5744 netdev_err(vi->dev, "reset tx fail: tx queue index: %d err: %d\n", qindex, err); 5745 pool = NULL; 5746 } 5747 5748 sq->xsk_pool = pool; 5749 5750 virtnet_tx_resume(vi, sq); 5751 5752 return err; 5753 } 5754 5755 static int virtnet_xsk_pool_enable(struct net_device *dev, 5756 struct xsk_buff_pool *pool, 5757 u16 qid) 5758 { 5759 struct virtnet_info *vi = netdev_priv(dev); 5760 struct receive_queue *rq; 5761 struct device *dma_dev; 5762 struct send_queue *sq; 5763 dma_addr_t hdr_dma; 5764 int err, size; 5765 5766 if (vi->hdr_len > xsk_pool_get_headroom(pool)) 5767 return -EINVAL; 5768 5769 /* In big_packets mode, xdp cannot work, so there is no need to 5770 * initialize xsk of rq. 5771 */ 5772 if (vi->big_packets && !vi->mergeable_rx_bufs) 5773 return -ENOENT; 5774 5775 if (qid >= vi->curr_queue_pairs) 5776 return -EINVAL; 5777 5778 sq = &vi->sq[qid]; 5779 rq = &vi->rq[qid]; 5780 5781 /* xsk assumes that tx and rx must have the same dma device. The af-xdp 5782 * may use one buffer to receive from the rx and reuse this buffer to 5783 * send by the tx. So the dma dev of sq and rq must be the same one. 5784 * 5785 * But vq->dma_dev allows every vq has the respective dma dev. So I 5786 * check the dma dev of vq and sq is the same dev. 5787 */ 5788 if (virtqueue_dma_dev(rq->vq) != virtqueue_dma_dev(sq->vq)) 5789 return -EINVAL; 5790 5791 dma_dev = virtqueue_dma_dev(rq->vq); 5792 if (!dma_dev) 5793 return -EINVAL; 5794 5795 size = virtqueue_get_vring_size(rq->vq); 5796 5797 rq->xsk_buffs = kvcalloc(size, sizeof(*rq->xsk_buffs), GFP_KERNEL); 5798 if (!rq->xsk_buffs) 5799 return -ENOMEM; 5800 5801 hdr_dma = virtqueue_dma_map_single_attrs(sq->vq, &xsk_hdr, vi->hdr_len, 5802 DMA_TO_DEVICE, 0); 5803 if (virtqueue_dma_mapping_error(sq->vq, hdr_dma)) 5804 return -ENOMEM; 5805 5806 err = xsk_pool_dma_map(pool, dma_dev, 0); 5807 if (err) 5808 goto err_xsk_map; 5809 5810 err = virtnet_rq_bind_xsk_pool(vi, rq, pool); 5811 if (err) 5812 goto err_rq; 5813 5814 err = virtnet_sq_bind_xsk_pool(vi, sq, pool); 5815 if (err) 5816 goto err_sq; 5817 5818 /* Now, we do not support tx offload(such as tx csum), so all the tx 5819 * virtnet hdr is zero. So all the tx packets can share a single hdr. 5820 */ 5821 sq->xsk_hdr_dma_addr = hdr_dma; 5822 5823 return 0; 5824 5825 err_sq: 5826 virtnet_rq_bind_xsk_pool(vi, rq, NULL); 5827 err_rq: 5828 xsk_pool_dma_unmap(pool, 0); 5829 err_xsk_map: 5830 virtqueue_dma_unmap_single_attrs(rq->vq, hdr_dma, vi->hdr_len, 5831 DMA_TO_DEVICE, 0); 5832 return err; 5833 } 5834 5835 static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid) 5836 { 5837 struct virtnet_info *vi = netdev_priv(dev); 5838 struct xsk_buff_pool *pool; 5839 struct receive_queue *rq; 5840 struct send_queue *sq; 5841 int err; 5842 5843 if (qid >= vi->curr_queue_pairs) 5844 return -EINVAL; 5845 5846 sq = &vi->sq[qid]; 5847 rq = &vi->rq[qid]; 5848 5849 pool = rq->xsk_pool; 5850 5851 err = virtnet_rq_bind_xsk_pool(vi, rq, NULL); 5852 err |= virtnet_sq_bind_xsk_pool(vi, sq, NULL); 5853 5854 xsk_pool_dma_unmap(pool, 0); 5855 5856 virtqueue_dma_unmap_single_attrs(sq->vq, sq->xsk_hdr_dma_addr, 5857 vi->hdr_len, DMA_TO_DEVICE, 0); 5858 kvfree(rq->xsk_buffs); 5859 5860 return err; 5861 } 5862 5863 static int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp) 5864 { 5865 if (xdp->xsk.pool) 5866 return virtnet_xsk_pool_enable(dev, xdp->xsk.pool, 5867 xdp->xsk.queue_id); 5868 else 5869 return virtnet_xsk_pool_disable(dev, xdp->xsk.queue_id); 5870 } 5871 5872 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, 5873 struct netlink_ext_ack *extack) 5874 { 5875 unsigned int room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 5876 sizeof(struct skb_shared_info)); 5877 unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; 5878 struct virtnet_info *vi = netdev_priv(dev); 5879 struct bpf_prog *old_prog; 5880 u16 xdp_qp = 0, curr_qp; 5881 int i, err; 5882 5883 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) 5884 && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 5885 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 5886 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 5887 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 5888 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) || 5889 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) || 5890 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) { 5891 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); 5892 return -EOPNOTSUPP; 5893 } 5894 5895 if (vi->mergeable_rx_bufs && !vi->any_header_sg) { 5896 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required"); 5897 return -EINVAL; 5898 } 5899 5900 if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { 5901 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags"); 5902 netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz); 5903 return -EINVAL; 5904 } 5905 5906 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; 5907 if (prog) 5908 xdp_qp = nr_cpu_ids; 5909 5910 /* XDP requires extra queues for XDP_TX */ 5911 if (curr_qp + xdp_qp > vi->max_queue_pairs) { 5912 netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", 5913 curr_qp + xdp_qp, vi->max_queue_pairs); 5914 xdp_qp = 0; 5915 } 5916 5917 old_prog = rtnl_dereference(vi->rq[0].xdp_prog); 5918 if (!prog && !old_prog) 5919 return 0; 5920 5921 if (prog) 5922 bpf_prog_add(prog, vi->max_queue_pairs - 1); 5923 5924 /* Make sure NAPI is not using any XDP TX queues for RX. */ 5925 if (netif_running(dev)) { 5926 for (i = 0; i < vi->max_queue_pairs; i++) { 5927 napi_disable(&vi->rq[i].napi); 5928 virtnet_napi_tx_disable(&vi->sq[i].napi); 5929 } 5930 } 5931 5932 if (!prog) { 5933 for (i = 0; i < vi->max_queue_pairs; i++) { 5934 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 5935 if (i == 0) 5936 virtnet_restore_guest_offloads(vi); 5937 } 5938 synchronize_net(); 5939 } 5940 5941 err = virtnet_set_queues(vi, curr_qp + xdp_qp); 5942 if (err) 5943 goto err; 5944 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); 5945 vi->xdp_queue_pairs = xdp_qp; 5946 5947 if (prog) { 5948 vi->xdp_enabled = true; 5949 for (i = 0; i < vi->max_queue_pairs; i++) { 5950 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 5951 if (i == 0 && !old_prog) 5952 virtnet_clear_guest_offloads(vi); 5953 } 5954 if (!old_prog) 5955 xdp_features_set_redirect_target(dev, true); 5956 } else { 5957 xdp_features_clear_redirect_target(dev); 5958 vi->xdp_enabled = false; 5959 } 5960 5961 for (i = 0; i < vi->max_queue_pairs; i++) { 5962 if (old_prog) 5963 bpf_prog_put(old_prog); 5964 if (netif_running(dev)) { 5965 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 5966 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 5967 &vi->sq[i].napi); 5968 } 5969 } 5970 5971 return 0; 5972 5973 err: 5974 if (!prog) { 5975 virtnet_clear_guest_offloads(vi); 5976 for (i = 0; i < vi->max_queue_pairs; i++) 5977 rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); 5978 } 5979 5980 if (netif_running(dev)) { 5981 for (i = 0; i < vi->max_queue_pairs; i++) { 5982 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 5983 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 5984 &vi->sq[i].napi); 5985 } 5986 } 5987 if (prog) 5988 bpf_prog_sub(prog, vi->max_queue_pairs - 1); 5989 return err; 5990 } 5991 5992 static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) 5993 { 5994 switch (xdp->command) { 5995 case XDP_SETUP_PROG: 5996 return virtnet_xdp_set(dev, xdp->prog, xdp->extack); 5997 case XDP_SETUP_XSK_POOL: 5998 return virtnet_xsk_pool_setup(dev, xdp); 5999 default: 6000 return -EINVAL; 6001 } 6002 } 6003 6004 static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, 6005 size_t len) 6006 { 6007 struct virtnet_info *vi = netdev_priv(dev); 6008 int ret; 6009 6010 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 6011 return -EOPNOTSUPP; 6012 6013 ret = snprintf(buf, len, "sby"); 6014 if (ret >= len) 6015 return -EOPNOTSUPP; 6016 6017 return 0; 6018 } 6019 6020 static int virtnet_set_features(struct net_device *dev, 6021 netdev_features_t features) 6022 { 6023 struct virtnet_info *vi = netdev_priv(dev); 6024 u64 offloads; 6025 int err; 6026 6027 if ((dev->features ^ features) & NETIF_F_GRO_HW) { 6028 if (vi->xdp_enabled) 6029 return -EBUSY; 6030 6031 if (features & NETIF_F_GRO_HW) 6032 offloads = vi->guest_offloads_capable; 6033 else 6034 offloads = vi->guest_offloads_capable & 6035 ~GUEST_OFFLOAD_GRO_HW_MASK; 6036 6037 err = virtnet_set_guest_offloads(vi, offloads); 6038 if (err) 6039 return err; 6040 vi->guest_offloads = offloads; 6041 } 6042 6043 if ((dev->features ^ features) & NETIF_F_RXHASH) { 6044 if (features & NETIF_F_RXHASH) 6045 vi->rss.hash_types = vi->rss_hash_types_saved; 6046 else 6047 vi->rss.hash_types = VIRTIO_NET_HASH_REPORT_NONE; 6048 6049 if (!virtnet_commit_rss_command(vi)) 6050 return -EINVAL; 6051 } 6052 6053 return 0; 6054 } 6055 6056 static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) 6057 { 6058 struct virtnet_info *priv = netdev_priv(dev); 6059 struct send_queue *sq = &priv->sq[txqueue]; 6060 struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue); 6061 6062 u64_stats_update_begin(&sq->stats.syncp); 6063 u64_stats_inc(&sq->stats.tx_timeouts); 6064 u64_stats_update_end(&sq->stats.syncp); 6065 6066 netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n", 6067 txqueue, sq->name, sq->vq->index, sq->vq->name, 6068 jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start))); 6069 } 6070 6071 static int virtnet_init_irq_moder(struct virtnet_info *vi) 6072 { 6073 u8 profile_flags = 0, coal_flags = 0; 6074 int ret, i; 6075 6076 profile_flags |= DIM_PROFILE_RX; 6077 coal_flags |= DIM_COALESCE_USEC | DIM_COALESCE_PKTS; 6078 ret = net_dim_init_irq_moder(vi->dev, profile_flags, coal_flags, 6079 DIM_CQ_PERIOD_MODE_START_FROM_EQE, 6080 0, virtnet_rx_dim_work, NULL); 6081 6082 if (ret) 6083 return ret; 6084 6085 for (i = 0; i < vi->max_queue_pairs; i++) 6086 net_dim_setting(vi->dev, &vi->rq[i].dim, false); 6087 6088 return 0; 6089 } 6090 6091 static void virtnet_free_irq_moder(struct virtnet_info *vi) 6092 { 6093 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 6094 return; 6095 6096 rtnl_lock(); 6097 net_dim_free_irq_moder(vi->dev); 6098 rtnl_unlock(); 6099 } 6100 6101 static const struct net_device_ops virtnet_netdev = { 6102 .ndo_open = virtnet_open, 6103 .ndo_stop = virtnet_close, 6104 .ndo_start_xmit = start_xmit, 6105 .ndo_validate_addr = eth_validate_addr, 6106 .ndo_set_mac_address = virtnet_set_mac_address, 6107 .ndo_set_rx_mode = virtnet_set_rx_mode, 6108 .ndo_get_stats64 = virtnet_stats, 6109 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, 6110 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, 6111 .ndo_bpf = virtnet_xdp, 6112 .ndo_xdp_xmit = virtnet_xdp_xmit, 6113 .ndo_xsk_wakeup = virtnet_xsk_wakeup, 6114 .ndo_features_check = passthru_features_check, 6115 .ndo_get_phys_port_name = virtnet_get_phys_port_name, 6116 .ndo_set_features = virtnet_set_features, 6117 .ndo_tx_timeout = virtnet_tx_timeout, 6118 }; 6119 6120 static void virtnet_config_changed_work(struct work_struct *work) 6121 { 6122 struct virtnet_info *vi = 6123 container_of(work, struct virtnet_info, config_work); 6124 u16 v; 6125 6126 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, 6127 struct virtio_net_config, status, &v) < 0) 6128 return; 6129 6130 if (v & VIRTIO_NET_S_ANNOUNCE) { 6131 netdev_notify_peers(vi->dev); 6132 virtnet_ack_link_announce(vi); 6133 } 6134 6135 /* Ignore unknown (future) status bits */ 6136 v &= VIRTIO_NET_S_LINK_UP; 6137 6138 if (vi->status == v) 6139 return; 6140 6141 vi->status = v; 6142 6143 if (vi->status & VIRTIO_NET_S_LINK_UP) { 6144 virtnet_update_settings(vi); 6145 netif_carrier_on(vi->dev); 6146 netif_tx_wake_all_queues(vi->dev); 6147 } else { 6148 netif_carrier_off(vi->dev); 6149 netif_tx_stop_all_queues(vi->dev); 6150 } 6151 } 6152 6153 static void virtnet_config_changed(struct virtio_device *vdev) 6154 { 6155 struct virtnet_info *vi = vdev->priv; 6156 6157 schedule_work(&vi->config_work); 6158 } 6159 6160 static void virtnet_free_queues(struct virtnet_info *vi) 6161 { 6162 int i; 6163 6164 for (i = 0; i < vi->max_queue_pairs; i++) { 6165 __netif_napi_del(&vi->rq[i].napi); 6166 __netif_napi_del(&vi->sq[i].napi); 6167 } 6168 6169 /* We called __netif_napi_del(), 6170 * we need to respect an RCU grace period before freeing vi->rq 6171 */ 6172 synchronize_net(); 6173 6174 kfree(vi->rq); 6175 kfree(vi->sq); 6176 kfree(vi->ctrl); 6177 } 6178 6179 static void _free_receive_bufs(struct virtnet_info *vi) 6180 { 6181 struct bpf_prog *old_prog; 6182 int i; 6183 6184 for (i = 0; i < vi->max_queue_pairs; i++) { 6185 while (vi->rq[i].pages) 6186 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); 6187 6188 old_prog = rtnl_dereference(vi->rq[i].xdp_prog); 6189 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); 6190 if (old_prog) 6191 bpf_prog_put(old_prog); 6192 } 6193 } 6194 6195 static void free_receive_bufs(struct virtnet_info *vi) 6196 { 6197 rtnl_lock(); 6198 _free_receive_bufs(vi); 6199 rtnl_unlock(); 6200 } 6201 6202 static void free_receive_page_frags(struct virtnet_info *vi) 6203 { 6204 int i; 6205 for (i = 0; i < vi->max_queue_pairs; i++) 6206 if (vi->rq[i].alloc_frag.page) { 6207 if (vi->rq[i].last_dma) 6208 virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0); 6209 put_page(vi->rq[i].alloc_frag.page); 6210 } 6211 } 6212 6213 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) 6214 { 6215 struct virtnet_info *vi = vq->vdev->priv; 6216 struct send_queue *sq; 6217 int i = vq2rxq(vq); 6218 6219 sq = &vi->sq[i]; 6220 6221 switch (virtnet_xmit_ptr_unpack(&buf)) { 6222 case VIRTNET_XMIT_TYPE_SKB: 6223 case VIRTNET_XMIT_TYPE_SKB_ORPHAN: 6224 dev_kfree_skb(buf); 6225 break; 6226 6227 case VIRTNET_XMIT_TYPE_XDP: 6228 xdp_return_frame(buf); 6229 break; 6230 6231 case VIRTNET_XMIT_TYPE_XSK: 6232 xsk_tx_completed(sq->xsk_pool, 1); 6233 break; 6234 } 6235 } 6236 6237 static void free_unused_bufs(struct virtnet_info *vi) 6238 { 6239 void *buf; 6240 int i; 6241 6242 for (i = 0; i < vi->max_queue_pairs; i++) { 6243 struct virtqueue *vq = vi->sq[i].vq; 6244 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 6245 virtnet_sq_free_unused_buf(vq, buf); 6246 cond_resched(); 6247 } 6248 6249 for (i = 0; i < vi->max_queue_pairs; i++) { 6250 struct virtqueue *vq = vi->rq[i].vq; 6251 6252 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 6253 virtnet_rq_unmap_free_buf(vq, buf); 6254 cond_resched(); 6255 } 6256 } 6257 6258 static void virtnet_del_vqs(struct virtnet_info *vi) 6259 { 6260 struct virtio_device *vdev = vi->vdev; 6261 6262 virtnet_clean_affinity(vi); 6263 6264 vdev->config->del_vqs(vdev); 6265 6266 virtnet_free_queues(vi); 6267 } 6268 6269 /* How large should a single buffer be so a queue full of these can fit at 6270 * least one full packet? 6271 * Logic below assumes the mergeable buffer header is used. 6272 */ 6273 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) 6274 { 6275 const unsigned int hdr_len = vi->hdr_len; 6276 unsigned int rq_size = virtqueue_get_vring_size(vq); 6277 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; 6278 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; 6279 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size); 6280 6281 return max(max(min_buf_len, hdr_len) - hdr_len, 6282 (unsigned int)GOOD_PACKET_LEN); 6283 } 6284 6285 static int virtnet_find_vqs(struct virtnet_info *vi) 6286 { 6287 struct virtqueue_info *vqs_info; 6288 struct virtqueue **vqs; 6289 int ret = -ENOMEM; 6290 int total_vqs; 6291 bool *ctx; 6292 u16 i; 6293 6294 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by 6295 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by 6296 * possible control vq. 6297 */ 6298 total_vqs = vi->max_queue_pairs * 2 + 6299 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); 6300 6301 /* Allocate space for find_vqs parameters */ 6302 vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL); 6303 if (!vqs) 6304 goto err_vq; 6305 vqs_info = kcalloc(total_vqs, sizeof(*vqs_info), GFP_KERNEL); 6306 if (!vqs_info) 6307 goto err_vqs_info; 6308 if (!vi->big_packets || vi->mergeable_rx_bufs) { 6309 ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL); 6310 if (!ctx) 6311 goto err_ctx; 6312 } else { 6313 ctx = NULL; 6314 } 6315 6316 /* Parameters for control virtqueue, if any */ 6317 if (vi->has_cvq) { 6318 vqs_info[total_vqs - 1].name = "control"; 6319 } 6320 6321 /* Allocate/initialize parameters for send/receive virtqueues */ 6322 for (i = 0; i < vi->max_queue_pairs; i++) { 6323 vqs_info[rxq2vq(i)].callback = skb_recv_done; 6324 vqs_info[txq2vq(i)].callback = skb_xmit_done; 6325 sprintf(vi->rq[i].name, "input.%u", i); 6326 sprintf(vi->sq[i].name, "output.%u", i); 6327 vqs_info[rxq2vq(i)].name = vi->rq[i].name; 6328 vqs_info[txq2vq(i)].name = vi->sq[i].name; 6329 if (ctx) 6330 vqs_info[rxq2vq(i)].ctx = true; 6331 } 6332 6333 ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, vqs_info, NULL); 6334 if (ret) 6335 goto err_find; 6336 6337 if (vi->has_cvq) { 6338 vi->cvq = vqs[total_vqs - 1]; 6339 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) 6340 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 6341 } 6342 6343 for (i = 0; i < vi->max_queue_pairs; i++) { 6344 vi->rq[i].vq = vqs[rxq2vq(i)]; 6345 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq); 6346 vi->sq[i].vq = vqs[txq2vq(i)]; 6347 } 6348 6349 /* run here: ret == 0. */ 6350 6351 6352 err_find: 6353 kfree(ctx); 6354 err_ctx: 6355 kfree(vqs_info); 6356 err_vqs_info: 6357 kfree(vqs); 6358 err_vq: 6359 return ret; 6360 } 6361 6362 static int virtnet_alloc_queues(struct virtnet_info *vi) 6363 { 6364 int i; 6365 6366 if (vi->has_cvq) { 6367 vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); 6368 if (!vi->ctrl) 6369 goto err_ctrl; 6370 } else { 6371 vi->ctrl = NULL; 6372 } 6373 vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL); 6374 if (!vi->sq) 6375 goto err_sq; 6376 vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL); 6377 if (!vi->rq) 6378 goto err_rq; 6379 6380 INIT_DELAYED_WORK(&vi->refill, refill_work); 6381 for (i = 0; i < vi->max_queue_pairs; i++) { 6382 vi->rq[i].pages = NULL; 6383 netif_napi_add_weight(vi->dev, &vi->rq[i].napi, virtnet_poll, 6384 napi_weight); 6385 netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi, 6386 virtnet_poll_tx, 6387 napi_tx ? napi_weight : 0); 6388 6389 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); 6390 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); 6391 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); 6392 6393 u64_stats_init(&vi->rq[i].stats.syncp); 6394 u64_stats_init(&vi->sq[i].stats.syncp); 6395 mutex_init(&vi->rq[i].dim_lock); 6396 } 6397 6398 return 0; 6399 6400 err_rq: 6401 kfree(vi->sq); 6402 err_sq: 6403 kfree(vi->ctrl); 6404 err_ctrl: 6405 return -ENOMEM; 6406 } 6407 6408 static int init_vqs(struct virtnet_info *vi) 6409 { 6410 int ret; 6411 6412 /* Allocate send & receive queues */ 6413 ret = virtnet_alloc_queues(vi); 6414 if (ret) 6415 goto err; 6416 6417 ret = virtnet_find_vqs(vi); 6418 if (ret) 6419 goto err_free; 6420 6421 cpus_read_lock(); 6422 virtnet_set_affinity(vi); 6423 cpus_read_unlock(); 6424 6425 return 0; 6426 6427 err_free: 6428 virtnet_free_queues(vi); 6429 err: 6430 return ret; 6431 } 6432 6433 #ifdef CONFIG_SYSFS 6434 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, 6435 char *buf) 6436 { 6437 struct virtnet_info *vi = netdev_priv(queue->dev); 6438 unsigned int queue_index = get_netdev_rx_queue_index(queue); 6439 unsigned int headroom = virtnet_get_headroom(vi); 6440 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 6441 struct ewma_pkt_len *avg; 6442 6443 BUG_ON(queue_index >= vi->max_queue_pairs); 6444 avg = &vi->rq[queue_index].mrg_avg_pkt_len; 6445 return sprintf(buf, "%u\n", 6446 get_mergeable_buf_len(&vi->rq[queue_index], avg, 6447 SKB_DATA_ALIGN(headroom + tailroom))); 6448 } 6449 6450 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = 6451 __ATTR_RO(mergeable_rx_buffer_size); 6452 6453 static struct attribute *virtio_net_mrg_rx_attrs[] = { 6454 &mergeable_rx_buffer_size_attribute.attr, 6455 NULL 6456 }; 6457 6458 static const struct attribute_group virtio_net_mrg_rx_group = { 6459 .name = "virtio_net", 6460 .attrs = virtio_net_mrg_rx_attrs 6461 }; 6462 #endif 6463 6464 static bool virtnet_fail_on_feature(struct virtio_device *vdev, 6465 unsigned int fbit, 6466 const char *fname, const char *dname) 6467 { 6468 if (!virtio_has_feature(vdev, fbit)) 6469 return false; 6470 6471 dev_err(&vdev->dev, "device advertises feature %s but not %s", 6472 fname, dname); 6473 6474 return true; 6475 } 6476 6477 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ 6478 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) 6479 6480 static bool virtnet_validate_features(struct virtio_device *vdev) 6481 { 6482 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && 6483 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, 6484 "VIRTIO_NET_F_CTRL_VQ") || 6485 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, 6486 "VIRTIO_NET_F_CTRL_VQ") || 6487 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, 6488 "VIRTIO_NET_F_CTRL_VQ") || 6489 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || 6490 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, 6491 "VIRTIO_NET_F_CTRL_VQ") || 6492 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, 6493 "VIRTIO_NET_F_CTRL_VQ") || 6494 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, 6495 "VIRTIO_NET_F_CTRL_VQ") || 6496 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, 6497 "VIRTIO_NET_F_CTRL_VQ") || 6498 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL, 6499 "VIRTIO_NET_F_CTRL_VQ"))) { 6500 return false; 6501 } 6502 6503 return true; 6504 } 6505 6506 #define MIN_MTU ETH_MIN_MTU 6507 #define MAX_MTU ETH_MAX_MTU 6508 6509 static int virtnet_validate(struct virtio_device *vdev) 6510 { 6511 if (!vdev->config->get) { 6512 dev_err(&vdev->dev, "%s failure: config access disabled\n", 6513 __func__); 6514 return -EINVAL; 6515 } 6516 6517 if (!virtnet_validate_features(vdev)) 6518 return -EINVAL; 6519 6520 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6521 int mtu = virtio_cread16(vdev, 6522 offsetof(struct virtio_net_config, 6523 mtu)); 6524 if (mtu < MIN_MTU) 6525 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); 6526 } 6527 6528 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) && 6529 !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6530 dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby"); 6531 __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY); 6532 } 6533 6534 return 0; 6535 } 6536 6537 static bool virtnet_check_guest_gso(const struct virtnet_info *vi) 6538 { 6539 return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 6540 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 6541 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 6542 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 6543 (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) && 6544 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6)); 6545 } 6546 6547 static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu) 6548 { 6549 bool guest_gso = virtnet_check_guest_gso(vi); 6550 6551 /* If device can receive ANY guest GSO packets, regardless of mtu, 6552 * allocate packets of maximum size, otherwise limit it to only 6553 * mtu size worth only. 6554 */ 6555 if (mtu > ETH_DATA_LEN || guest_gso) { 6556 vi->big_packets = true; 6557 vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE); 6558 } 6559 } 6560 6561 #define VIRTIO_NET_HASH_REPORT_MAX_TABLE 10 6562 static enum xdp_rss_hash_type 6563 virtnet_xdp_rss_type[VIRTIO_NET_HASH_REPORT_MAX_TABLE] = { 6564 [VIRTIO_NET_HASH_REPORT_NONE] = XDP_RSS_TYPE_NONE, 6565 [VIRTIO_NET_HASH_REPORT_IPv4] = XDP_RSS_TYPE_L3_IPV4, 6566 [VIRTIO_NET_HASH_REPORT_TCPv4] = XDP_RSS_TYPE_L4_IPV4_TCP, 6567 [VIRTIO_NET_HASH_REPORT_UDPv4] = XDP_RSS_TYPE_L4_IPV4_UDP, 6568 [VIRTIO_NET_HASH_REPORT_IPv6] = XDP_RSS_TYPE_L3_IPV6, 6569 [VIRTIO_NET_HASH_REPORT_TCPv6] = XDP_RSS_TYPE_L4_IPV6_TCP, 6570 [VIRTIO_NET_HASH_REPORT_UDPv6] = XDP_RSS_TYPE_L4_IPV6_UDP, 6571 [VIRTIO_NET_HASH_REPORT_IPv6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, 6572 [VIRTIO_NET_HASH_REPORT_TCPv6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, 6573 [VIRTIO_NET_HASH_REPORT_UDPv6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX 6574 }; 6575 6576 static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, 6577 enum xdp_rss_hash_type *rss_type) 6578 { 6579 const struct xdp_buff *xdp = (void *)_ctx; 6580 struct virtio_net_hdr_v1_hash *hdr_hash; 6581 struct virtnet_info *vi; 6582 u16 hash_report; 6583 6584 if (!(xdp->rxq->dev->features & NETIF_F_RXHASH)) 6585 return -ENODATA; 6586 6587 vi = netdev_priv(xdp->rxq->dev); 6588 hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len); 6589 hash_report = __le16_to_cpu(hdr_hash->hash_report); 6590 6591 if (hash_report >= VIRTIO_NET_HASH_REPORT_MAX_TABLE) 6592 hash_report = VIRTIO_NET_HASH_REPORT_NONE; 6593 6594 *rss_type = virtnet_xdp_rss_type[hash_report]; 6595 *hash = __le32_to_cpu(hdr_hash->hash_value); 6596 return 0; 6597 } 6598 6599 static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = { 6600 .xmo_rx_hash = virtnet_xdp_rx_hash, 6601 }; 6602 6603 static int virtnet_probe(struct virtio_device *vdev) 6604 { 6605 int i, err = -ENOMEM; 6606 struct net_device *dev; 6607 struct virtnet_info *vi; 6608 u16 max_queue_pairs; 6609 int mtu = 0; 6610 6611 /* Find if host supports multiqueue/rss virtio_net device */ 6612 max_queue_pairs = 1; 6613 if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) 6614 max_queue_pairs = 6615 virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); 6616 6617 /* We need at least 2 queue's */ 6618 if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 6619 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 6620 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6621 max_queue_pairs = 1; 6622 6623 /* Allocate ourselves a network device with room for our info */ 6624 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); 6625 if (!dev) 6626 return -ENOMEM; 6627 6628 /* Set up network device as normal. */ 6629 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | 6630 IFF_TX_SKB_NO_LINEAR; 6631 dev->netdev_ops = &virtnet_netdev; 6632 dev->stat_ops = &virtnet_stat_ops; 6633 dev->features = NETIF_F_HIGHDMA; 6634 6635 dev->ethtool_ops = &virtnet_ethtool_ops; 6636 SET_NETDEV_DEV(dev, &vdev->dev); 6637 6638 /* Do we support "hardware" checksums? */ 6639 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { 6640 /* This opens up the world of extra features. */ 6641 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6642 if (csum) 6643 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6644 6645 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { 6646 dev->hw_features |= NETIF_F_TSO 6647 | NETIF_F_TSO_ECN | NETIF_F_TSO6; 6648 } 6649 /* Individual feature bits: what can host handle? */ 6650 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4)) 6651 dev->hw_features |= NETIF_F_TSO; 6652 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6)) 6653 dev->hw_features |= NETIF_F_TSO6; 6654 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) 6655 dev->hw_features |= NETIF_F_TSO_ECN; 6656 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO)) 6657 dev->hw_features |= NETIF_F_GSO_UDP_L4; 6658 6659 dev->features |= NETIF_F_GSO_ROBUST; 6660 6661 if (gso) 6662 dev->features |= dev->hw_features & NETIF_F_ALL_TSO; 6663 /* (!csum && gso) case will be fixed by register_netdev() */ 6664 } 6665 6666 /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't 6667 * need to calculate checksums for partially checksummed packets, 6668 * as they're considered valid by the upper layer. 6669 * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only 6670 * receives fully checksummed packets. The device may assist in 6671 * validating these packets' checksums, so the driver won't have to. 6672 */ 6673 dev->features |= NETIF_F_RXCSUM; 6674 6675 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || 6676 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) 6677 dev->features |= NETIF_F_GRO_HW; 6678 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) 6679 dev->hw_features |= NETIF_F_GRO_HW; 6680 6681 dev->vlan_features = dev->features; 6682 dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 6683 NETDEV_XDP_ACT_XSK_ZEROCOPY; 6684 6685 /* MTU range: 68 - 65535 */ 6686 dev->min_mtu = MIN_MTU; 6687 dev->max_mtu = MAX_MTU; 6688 6689 /* Configuration may specify what MAC to use. Otherwise random. */ 6690 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6691 u8 addr[ETH_ALEN]; 6692 6693 virtio_cread_bytes(vdev, 6694 offsetof(struct virtio_net_config, mac), 6695 addr, ETH_ALEN); 6696 eth_hw_addr_set(dev, addr); 6697 } else { 6698 eth_hw_addr_random(dev); 6699 dev_info(&vdev->dev, "Assigned random MAC address %pM\n", 6700 dev->dev_addr); 6701 } 6702 6703 /* Set up our device-specific information */ 6704 vi = netdev_priv(dev); 6705 vi->dev = dev; 6706 vi->vdev = vdev; 6707 vdev->priv = vi; 6708 6709 INIT_WORK(&vi->config_work, virtnet_config_changed_work); 6710 INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work); 6711 spin_lock_init(&vi->refill_lock); 6712 6713 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { 6714 vi->mergeable_rx_bufs = true; 6715 dev->xdp_features |= NETDEV_XDP_ACT_RX_SG; 6716 } 6717 6718 if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) 6719 vi->has_rss_hash_report = true; 6720 6721 if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) { 6722 vi->has_rss = true; 6723 6724 vi->rss_indir_table_size = 6725 virtio_cread16(vdev, offsetof(struct virtio_net_config, 6726 rss_max_indirection_table_length)); 6727 } 6728 err = rss_indirection_table_alloc(&vi->rss, vi->rss_indir_table_size); 6729 if (err) 6730 goto free; 6731 6732 if (vi->has_rss || vi->has_rss_hash_report) { 6733 vi->rss_key_size = 6734 virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); 6735 if (vi->rss_key_size > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 6736 dev_err(&vdev->dev, "rss_max_key_size=%u exceeds the limit %u.\n", 6737 vi->rss_key_size, VIRTIO_NET_RSS_MAX_KEY_SIZE); 6738 err = -EINVAL; 6739 goto free; 6740 } 6741 6742 vi->rss_hash_types_supported = 6743 virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); 6744 vi->rss_hash_types_supported &= 6745 ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX | 6746 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 6747 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX); 6748 6749 dev->hw_features |= NETIF_F_RXHASH; 6750 dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops; 6751 } 6752 6753 if (vi->has_rss_hash_report) 6754 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); 6755 else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || 6756 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6757 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 6758 else 6759 vi->hdr_len = sizeof(struct virtio_net_hdr); 6760 6761 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || 6762 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6763 vi->any_header_sg = true; 6764 6765 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6766 vi->has_cvq = true; 6767 6768 mutex_init(&vi->cvq_lock); 6769 6770 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6771 mtu = virtio_cread16(vdev, 6772 offsetof(struct virtio_net_config, 6773 mtu)); 6774 if (mtu < dev->min_mtu) { 6775 /* Should never trigger: MTU was previously validated 6776 * in virtnet_validate. 6777 */ 6778 dev_err(&vdev->dev, 6779 "device MTU appears to have changed it is now %d < %d", 6780 mtu, dev->min_mtu); 6781 err = -EINVAL; 6782 goto free; 6783 } 6784 6785 dev->mtu = mtu; 6786 dev->max_mtu = mtu; 6787 } 6788 6789 virtnet_set_big_packets(vi, mtu); 6790 6791 if (vi->any_header_sg) 6792 dev->needed_headroom = vi->hdr_len; 6793 6794 /* Enable multiqueue by default */ 6795 if (num_online_cpus() >= max_queue_pairs) 6796 vi->curr_queue_pairs = max_queue_pairs; 6797 else 6798 vi->curr_queue_pairs = num_online_cpus(); 6799 vi->max_queue_pairs = max_queue_pairs; 6800 6801 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ 6802 err = init_vqs(vi); 6803 if (err) 6804 goto free; 6805 6806 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 6807 vi->intr_coal_rx.max_usecs = 0; 6808 vi->intr_coal_tx.max_usecs = 0; 6809 vi->intr_coal_rx.max_packets = 0; 6810 6811 /* Keep the default values of the coalescing parameters 6812 * aligned with the default napi_tx state. 6813 */ 6814 if (vi->sq[0].napi.weight) 6815 vi->intr_coal_tx.max_packets = 1; 6816 else 6817 vi->intr_coal_tx.max_packets = 0; 6818 } 6819 6820 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 6821 /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */ 6822 for (i = 0; i < vi->max_queue_pairs; i++) 6823 if (vi->sq[i].napi.weight) 6824 vi->sq[i].intr_coal.max_packets = 1; 6825 6826 err = virtnet_init_irq_moder(vi); 6827 if (err) 6828 goto free; 6829 } 6830 6831 #ifdef CONFIG_SYSFS 6832 if (vi->mergeable_rx_bufs) 6833 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; 6834 #endif 6835 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); 6836 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); 6837 6838 virtnet_init_settings(dev); 6839 6840 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 6841 vi->failover = net_failover_create(vi->dev); 6842 if (IS_ERR(vi->failover)) { 6843 err = PTR_ERR(vi->failover); 6844 goto free_vqs; 6845 } 6846 } 6847 6848 if (vi->has_rss || vi->has_rss_hash_report) 6849 virtnet_init_default_rss(vi); 6850 6851 enable_rx_mode_work(vi); 6852 6853 /* serialize netdev register + virtio_device_ready() with ndo_open() */ 6854 rtnl_lock(); 6855 6856 err = register_netdevice(dev); 6857 if (err) { 6858 pr_debug("virtio_net: registering device failed\n"); 6859 rtnl_unlock(); 6860 goto free_failover; 6861 } 6862 6863 /* Disable config change notification until ndo_open. */ 6864 virtio_config_driver_disable(vi->vdev); 6865 6866 virtio_device_ready(vdev); 6867 6868 if (vi->has_rss || vi->has_rss_hash_report) { 6869 if (!virtnet_commit_rss_command(vi)) { 6870 dev_warn(&vdev->dev, "RSS disabled because committing failed.\n"); 6871 dev->hw_features &= ~NETIF_F_RXHASH; 6872 vi->has_rss_hash_report = false; 6873 vi->has_rss = false; 6874 } 6875 } 6876 6877 virtnet_set_queues(vi, vi->curr_queue_pairs); 6878 6879 /* a random MAC address has been assigned, notify the device. 6880 * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there 6881 * because many devices work fine without getting MAC explicitly 6882 */ 6883 if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 6884 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 6885 struct scatterlist sg; 6886 6887 sg_init_one(&sg, dev->dev_addr, dev->addr_len); 6888 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 6889 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 6890 pr_debug("virtio_net: setting MAC address failed\n"); 6891 rtnl_unlock(); 6892 err = -EINVAL; 6893 goto free_unregister_netdev; 6894 } 6895 } 6896 6897 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) { 6898 struct virtio_net_stats_capabilities *stats_cap __free(kfree) = NULL; 6899 struct scatterlist sg; 6900 __le64 v; 6901 6902 stats_cap = kzalloc(sizeof(*stats_cap), GFP_KERNEL); 6903 if (!stats_cap) { 6904 rtnl_unlock(); 6905 err = -ENOMEM; 6906 goto free_unregister_netdev; 6907 } 6908 6909 sg_init_one(&sg, stats_cap, sizeof(*stats_cap)); 6910 6911 if (!virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 6912 VIRTIO_NET_CTRL_STATS_QUERY, 6913 NULL, &sg)) { 6914 pr_debug("virtio_net: fail to get stats capability\n"); 6915 rtnl_unlock(); 6916 err = -EINVAL; 6917 goto free_unregister_netdev; 6918 } 6919 6920 v = stats_cap->supported_stats_types[0]; 6921 vi->device_stats_cap = le64_to_cpu(v); 6922 } 6923 6924 /* Assume link up if device can't report link status, 6925 otherwise get link status from config. */ 6926 netif_carrier_off(dev); 6927 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 6928 virtnet_config_changed_work(&vi->config_work); 6929 } else { 6930 vi->status = VIRTIO_NET_S_LINK_UP; 6931 virtnet_update_settings(vi); 6932 netif_carrier_on(dev); 6933 } 6934 6935 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) 6936 if (virtio_has_feature(vi->vdev, guest_offloads[i])) 6937 set_bit(guest_offloads[i], &vi->guest_offloads); 6938 vi->guest_offloads_capable = vi->guest_offloads; 6939 6940 rtnl_unlock(); 6941 6942 err = virtnet_cpu_notif_add(vi); 6943 if (err) { 6944 pr_debug("virtio_net: registering cpu notifier failed\n"); 6945 goto free_unregister_netdev; 6946 } 6947 6948 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", 6949 dev->name, max_queue_pairs); 6950 6951 return 0; 6952 6953 free_unregister_netdev: 6954 unregister_netdev(dev); 6955 free_failover: 6956 net_failover_destroy(vi->failover); 6957 free_vqs: 6958 virtio_reset_device(vdev); 6959 cancel_delayed_work_sync(&vi->refill); 6960 free_receive_page_frags(vi); 6961 virtnet_del_vqs(vi); 6962 free: 6963 free_netdev(dev); 6964 return err; 6965 } 6966 6967 static void remove_vq_common(struct virtnet_info *vi) 6968 { 6969 virtio_reset_device(vi->vdev); 6970 6971 /* Free unused buffers in both send and recv, if any. */ 6972 free_unused_bufs(vi); 6973 6974 free_receive_bufs(vi); 6975 6976 free_receive_page_frags(vi); 6977 6978 virtnet_del_vqs(vi); 6979 } 6980 6981 static void virtnet_remove(struct virtio_device *vdev) 6982 { 6983 struct virtnet_info *vi = vdev->priv; 6984 6985 virtnet_cpu_notif_remove(vi); 6986 6987 /* Make sure no work handler is accessing the device. */ 6988 flush_work(&vi->config_work); 6989 disable_rx_mode_work(vi); 6990 flush_work(&vi->rx_mode_work); 6991 6992 virtnet_free_irq_moder(vi); 6993 6994 unregister_netdev(vi->dev); 6995 6996 net_failover_destroy(vi->failover); 6997 6998 remove_vq_common(vi); 6999 7000 rss_indirection_table_free(&vi->rss); 7001 7002 free_netdev(vi->dev); 7003 } 7004 7005 static __maybe_unused int virtnet_freeze(struct virtio_device *vdev) 7006 { 7007 struct virtnet_info *vi = vdev->priv; 7008 7009 virtnet_cpu_notif_remove(vi); 7010 virtnet_freeze_down(vdev); 7011 remove_vq_common(vi); 7012 7013 return 0; 7014 } 7015 7016 static __maybe_unused int virtnet_restore(struct virtio_device *vdev) 7017 { 7018 struct virtnet_info *vi = vdev->priv; 7019 int err; 7020 7021 err = virtnet_restore_up(vdev); 7022 if (err) 7023 return err; 7024 virtnet_set_queues(vi, vi->curr_queue_pairs); 7025 7026 err = virtnet_cpu_notif_add(vi); 7027 if (err) { 7028 virtnet_freeze_down(vdev); 7029 remove_vq_common(vi); 7030 return err; 7031 } 7032 7033 return 0; 7034 } 7035 7036 static struct virtio_device_id id_table[] = { 7037 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 7038 { 0 }, 7039 }; 7040 7041 #define VIRTNET_FEATURES \ 7042 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ 7043 VIRTIO_NET_F_MAC, \ 7044 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ 7045 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ 7046 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ 7047 VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \ 7048 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ 7049 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ 7050 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ 7051 VIRTIO_NET_F_CTRL_MAC_ADDR, \ 7052 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ 7053 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ 7054 VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ 7055 VIRTIO_NET_F_VQ_NOTF_COAL, \ 7056 VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS 7057 7058 static unsigned int features[] = { 7059 VIRTNET_FEATURES, 7060 }; 7061 7062 static unsigned int features_legacy[] = { 7063 VIRTNET_FEATURES, 7064 VIRTIO_NET_F_GSO, 7065 VIRTIO_F_ANY_LAYOUT, 7066 }; 7067 7068 static struct virtio_driver virtio_net_driver = { 7069 .feature_table = features, 7070 .feature_table_size = ARRAY_SIZE(features), 7071 .feature_table_legacy = features_legacy, 7072 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 7073 .driver.name = KBUILD_MODNAME, 7074 .id_table = id_table, 7075 .validate = virtnet_validate, 7076 .probe = virtnet_probe, 7077 .remove = virtnet_remove, 7078 .config_changed = virtnet_config_changed, 7079 #ifdef CONFIG_PM_SLEEP 7080 .freeze = virtnet_freeze, 7081 .restore = virtnet_restore, 7082 #endif 7083 }; 7084 7085 static __init int virtio_net_driver_init(void) 7086 { 7087 int ret; 7088 7089 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online", 7090 virtnet_cpu_online, 7091 virtnet_cpu_down_prep); 7092 if (ret < 0) 7093 goto out; 7094 virtionet_online = ret; 7095 ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead", 7096 NULL, virtnet_cpu_dead); 7097 if (ret) 7098 goto err_dead; 7099 ret = register_virtio_driver(&virtio_net_driver); 7100 if (ret) 7101 goto err_virtio; 7102 return 0; 7103 err_virtio: 7104 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 7105 err_dead: 7106 cpuhp_remove_multi_state(virtionet_online); 7107 out: 7108 return ret; 7109 } 7110 module_init(virtio_net_driver_init); 7111 7112 static __exit void virtio_net_driver_exit(void) 7113 { 7114 unregister_virtio_driver(&virtio_net_driver); 7115 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 7116 cpuhp_remove_multi_state(virtionet_online); 7117 } 7118 module_exit(virtio_net_driver_exit); 7119 7120 MODULE_DEVICE_TABLE(virtio, id_table); 7121 MODULE_DESCRIPTION("Virtio network driver"); 7122 MODULE_LICENSE("GPL"); 7123