1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* A network driver using virtio. 3 * 4 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 5 */ 6 //#define DEBUG 7 #include <linux/netdevice.h> 8 #include <linux/etherdevice.h> 9 #include <linux/ethtool.h> 10 #include <linux/module.h> 11 #include <linux/virtio.h> 12 #include <linux/virtio_net.h> 13 #include <linux/bpf.h> 14 #include <linux/bpf_trace.h> 15 #include <linux/scatterlist.h> 16 #include <linux/if_vlan.h> 17 #include <linux/slab.h> 18 #include <linux/cpu.h> 19 #include <linux/average.h> 20 #include <linux/filter.h> 21 #include <linux/kernel.h> 22 #include <linux/dim.h> 23 #include <net/route.h> 24 #include <net/xdp.h> 25 #include <net/net_failover.h> 26 #include <net/netdev_rx_queue.h> 27 #include <net/netdev_queues.h> 28 #include <net/xdp_sock_drv.h> 29 30 static int napi_weight = NAPI_POLL_WEIGHT; 31 module_param(napi_weight, int, 0444); 32 33 static bool csum = true, gso = true, napi_tx = true; 34 module_param(csum, bool, 0444); 35 module_param(gso, bool, 0444); 36 module_param(napi_tx, bool, 0644); 37 38 /* FIXME: MTU in config. */ 39 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) 40 #define GOOD_COPY_LEN 128 41 42 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) 43 44 /* Separating two types of XDP xmit */ 45 #define VIRTIO_XDP_TX BIT(0) 46 #define VIRTIO_XDP_REDIR BIT(1) 47 48 /* RX packet size EWMA. The average packet size is used to determine the packet 49 * buffer size when refilling RX rings. As the entire RX ring may be refilled 50 * at once, the weight is chosen so that the EWMA will be insensitive to short- 51 * term, transient changes in packet size. 52 */ 53 DECLARE_EWMA(pkt_len, 0, 64) 54 55 #define VIRTNET_DRIVER_VERSION "1.0.0" 56 57 static const unsigned long guest_offloads[] = { 58 VIRTIO_NET_F_GUEST_TSO4, 59 VIRTIO_NET_F_GUEST_TSO6, 60 VIRTIO_NET_F_GUEST_ECN, 61 VIRTIO_NET_F_GUEST_UFO, 62 VIRTIO_NET_F_GUEST_CSUM, 63 VIRTIO_NET_F_GUEST_USO4, 64 VIRTIO_NET_F_GUEST_USO6, 65 VIRTIO_NET_F_GUEST_HDRLEN 66 }; 67 68 #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 69 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 70 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 71 (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ 72 (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ 73 (1ULL << VIRTIO_NET_F_GUEST_USO6)) 74 75 struct virtnet_stat_desc { 76 char desc[ETH_GSTRING_LEN]; 77 size_t offset; 78 size_t qstat_offset; 79 }; 80 81 struct virtnet_sq_free_stats { 82 u64 packets; 83 u64 bytes; 84 u64 napi_packets; 85 u64 napi_bytes; 86 u64 xsk; 87 }; 88 89 struct virtnet_sq_stats { 90 struct u64_stats_sync syncp; 91 u64_stats_t packets; 92 u64_stats_t bytes; 93 u64_stats_t xdp_tx; 94 u64_stats_t xdp_tx_drops; 95 u64_stats_t kicks; 96 u64_stats_t tx_timeouts; 97 u64_stats_t stop; 98 u64_stats_t wake; 99 }; 100 101 struct virtnet_rq_stats { 102 struct u64_stats_sync syncp; 103 u64_stats_t packets; 104 u64_stats_t bytes; 105 u64_stats_t drops; 106 u64_stats_t xdp_packets; 107 u64_stats_t xdp_tx; 108 u64_stats_t xdp_redirects; 109 u64_stats_t xdp_drops; 110 u64_stats_t kicks; 111 }; 112 113 #define VIRTNET_SQ_STAT(name, m) {name, offsetof(struct virtnet_sq_stats, m), -1} 114 #define VIRTNET_RQ_STAT(name, m) {name, offsetof(struct virtnet_rq_stats, m), -1} 115 116 #define VIRTNET_SQ_STAT_QSTAT(name, m) \ 117 { \ 118 name, \ 119 offsetof(struct virtnet_sq_stats, m), \ 120 offsetof(struct netdev_queue_stats_tx, m), \ 121 } 122 123 #define VIRTNET_RQ_STAT_QSTAT(name, m) \ 124 { \ 125 name, \ 126 offsetof(struct virtnet_rq_stats, m), \ 127 offsetof(struct netdev_queue_stats_rx, m), \ 128 } 129 130 static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { 131 VIRTNET_SQ_STAT("xdp_tx", xdp_tx), 132 VIRTNET_SQ_STAT("xdp_tx_drops", xdp_tx_drops), 133 VIRTNET_SQ_STAT("kicks", kicks), 134 VIRTNET_SQ_STAT("tx_timeouts", tx_timeouts), 135 }; 136 137 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { 138 VIRTNET_RQ_STAT("drops", drops), 139 VIRTNET_RQ_STAT("xdp_packets", xdp_packets), 140 VIRTNET_RQ_STAT("xdp_tx", xdp_tx), 141 VIRTNET_RQ_STAT("xdp_redirects", xdp_redirects), 142 VIRTNET_RQ_STAT("xdp_drops", xdp_drops), 143 VIRTNET_RQ_STAT("kicks", kicks), 144 }; 145 146 static const struct virtnet_stat_desc virtnet_sq_stats_desc_qstat[] = { 147 VIRTNET_SQ_STAT_QSTAT("packets", packets), 148 VIRTNET_SQ_STAT_QSTAT("bytes", bytes), 149 VIRTNET_SQ_STAT_QSTAT("stop", stop), 150 VIRTNET_SQ_STAT_QSTAT("wake", wake), 151 }; 152 153 static const struct virtnet_stat_desc virtnet_rq_stats_desc_qstat[] = { 154 VIRTNET_RQ_STAT_QSTAT("packets", packets), 155 VIRTNET_RQ_STAT_QSTAT("bytes", bytes), 156 }; 157 158 #define VIRTNET_STATS_DESC_CQ(name) \ 159 {#name, offsetof(struct virtio_net_stats_cvq, name), -1} 160 161 #define VIRTNET_STATS_DESC_RX(class, name) \ 162 {#name, offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), -1} 163 164 #define VIRTNET_STATS_DESC_TX(class, name) \ 165 {#name, offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), -1} 166 167 168 static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = { 169 VIRTNET_STATS_DESC_CQ(command_num), 170 VIRTNET_STATS_DESC_CQ(ok_num), 171 }; 172 173 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = { 174 VIRTNET_STATS_DESC_RX(basic, packets), 175 VIRTNET_STATS_DESC_RX(basic, bytes), 176 177 VIRTNET_STATS_DESC_RX(basic, notifications), 178 VIRTNET_STATS_DESC_RX(basic, interrupts), 179 }; 180 181 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = { 182 VIRTNET_STATS_DESC_TX(basic, packets), 183 VIRTNET_STATS_DESC_TX(basic, bytes), 184 185 VIRTNET_STATS_DESC_TX(basic, notifications), 186 VIRTNET_STATS_DESC_TX(basic, interrupts), 187 }; 188 189 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = { 190 VIRTNET_STATS_DESC_RX(csum, needs_csum), 191 }; 192 193 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = { 194 VIRTNET_STATS_DESC_TX(gso, gso_packets_noseg), 195 VIRTNET_STATS_DESC_TX(gso, gso_bytes_noseg), 196 }; 197 198 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = { 199 VIRTNET_STATS_DESC_RX(speed, ratelimit_bytes), 200 }; 201 202 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = { 203 VIRTNET_STATS_DESC_TX(speed, ratelimit_bytes), 204 }; 205 206 #define VIRTNET_STATS_DESC_RX_QSTAT(class, name, qstat_field) \ 207 { \ 208 #name, \ 209 offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), \ 210 offsetof(struct netdev_queue_stats_rx, qstat_field), \ 211 } 212 213 #define VIRTNET_STATS_DESC_TX_QSTAT(class, name, qstat_field) \ 214 { \ 215 #name, \ 216 offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), \ 217 offsetof(struct netdev_queue_stats_tx, qstat_field), \ 218 } 219 220 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc_qstat[] = { 221 VIRTNET_STATS_DESC_RX_QSTAT(basic, drops, hw_drops), 222 VIRTNET_STATS_DESC_RX_QSTAT(basic, drop_overruns, hw_drop_overruns), 223 }; 224 225 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc_qstat[] = { 226 VIRTNET_STATS_DESC_TX_QSTAT(basic, drops, hw_drops), 227 VIRTNET_STATS_DESC_TX_QSTAT(basic, drop_malformed, hw_drop_errors), 228 }; 229 230 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc_qstat[] = { 231 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_valid, csum_unnecessary), 232 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_none, csum_none), 233 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_bad, csum_bad), 234 }; 235 236 static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc_qstat[] = { 237 VIRTNET_STATS_DESC_TX_QSTAT(csum, csum_none, csum_none), 238 VIRTNET_STATS_DESC_TX_QSTAT(csum, needs_csum, needs_csum), 239 }; 240 241 static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc_qstat[] = { 242 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets, hw_gro_packets), 243 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes, hw_gro_bytes), 244 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets_coalesced, hw_gro_wire_packets), 245 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes_coalesced, hw_gro_wire_bytes), 246 }; 247 248 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc_qstat[] = { 249 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_packets, hw_gso_packets), 250 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_bytes, hw_gso_bytes), 251 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments, hw_gso_wire_packets), 252 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments_bytes, hw_gso_wire_bytes), 253 }; 254 255 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc_qstat[] = { 256 VIRTNET_STATS_DESC_RX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 257 }; 258 259 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc_qstat[] = { 260 VIRTNET_STATS_DESC_TX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 261 }; 262 263 #define VIRTNET_Q_TYPE_RX 0 264 #define VIRTNET_Q_TYPE_TX 1 265 #define VIRTNET_Q_TYPE_CQ 2 266 267 struct virtnet_interrupt_coalesce { 268 u32 max_packets; 269 u32 max_usecs; 270 }; 271 272 /* The dma information of pages allocated at a time. */ 273 struct virtnet_rq_dma { 274 dma_addr_t addr; 275 u32 ref; 276 u16 len; 277 u16 need_sync; 278 }; 279 280 /* Internal representation of a send virtqueue */ 281 struct send_queue { 282 /* Virtqueue associated with this send _queue */ 283 struct virtqueue *vq; 284 285 /* TX: fragments + linear part + virtio header */ 286 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 287 288 /* Name of the send queue: output.$index */ 289 char name[16]; 290 291 struct virtnet_sq_stats stats; 292 293 struct virtnet_interrupt_coalesce intr_coal; 294 295 struct napi_struct napi; 296 297 /* Record whether sq is in reset state. */ 298 bool reset; 299 300 struct xsk_buff_pool *xsk_pool; 301 302 dma_addr_t xsk_hdr_dma_addr; 303 }; 304 305 /* Internal representation of a receive virtqueue */ 306 struct receive_queue { 307 /* Virtqueue associated with this receive_queue */ 308 struct virtqueue *vq; 309 310 struct napi_struct napi; 311 312 struct bpf_prog __rcu *xdp_prog; 313 314 struct virtnet_rq_stats stats; 315 316 /* The number of rx notifications */ 317 u16 calls; 318 319 /* Is dynamic interrupt moderation enabled? */ 320 bool dim_enabled; 321 322 /* Used to protect dim_enabled and inter_coal */ 323 struct mutex dim_lock; 324 325 /* Dynamic Interrupt Moderation */ 326 struct dim dim; 327 328 u32 packets_in_napi; 329 330 struct virtnet_interrupt_coalesce intr_coal; 331 332 /* Chain pages by the private ptr. */ 333 struct page *pages; 334 335 /* Average packet length for mergeable receive buffers. */ 336 struct ewma_pkt_len mrg_avg_pkt_len; 337 338 /* Page frag for packet buffer allocation. */ 339 struct page_frag alloc_frag; 340 341 /* RX: fragments + linear part + virtio header */ 342 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 343 344 /* Min single buffer size for mergeable buffers case. */ 345 unsigned int min_buf_len; 346 347 /* Name of this receive queue: input.$index */ 348 char name[16]; 349 350 struct xdp_rxq_info xdp_rxq; 351 352 /* Record the last dma info to free after new pages is allocated. */ 353 struct virtnet_rq_dma *last_dma; 354 355 struct xsk_buff_pool *xsk_pool; 356 357 /* xdp rxq used by xsk */ 358 struct xdp_rxq_info xsk_rxq_info; 359 360 struct xdp_buff **xsk_buffs; 361 }; 362 363 /* This structure can contain rss message with maximum settings for indirection table and keysize 364 * Note, that default structure that describes RSS configuration virtio_net_rss_config 365 * contains same info but can't handle table values. 366 * In any case, structure would be passed to virtio hw through sg_buf split by parts 367 * because table sizes may be differ according to the device configuration. 368 */ 369 #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 370 struct virtio_net_ctrl_rss { 371 u32 hash_types; 372 u16 indirection_table_mask; 373 u16 unclassified_queue; 374 u16 hash_cfg_reserved; /* for HASH_CONFIG (see virtio_net_hash_config for details) */ 375 u16 max_tx_vq; 376 u8 hash_key_length; 377 u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE]; 378 379 u16 *indirection_table; 380 }; 381 382 /* Control VQ buffers: protected by the rtnl lock */ 383 struct control_buf { 384 struct virtio_net_ctrl_hdr hdr; 385 virtio_net_ctrl_ack status; 386 }; 387 388 struct virtnet_info { 389 struct virtio_device *vdev; 390 struct virtqueue *cvq; 391 struct net_device *dev; 392 struct send_queue *sq; 393 struct receive_queue *rq; 394 unsigned int status; 395 396 /* Max # of queue pairs supported by the device */ 397 u16 max_queue_pairs; 398 399 /* # of queue pairs currently used by the driver */ 400 u16 curr_queue_pairs; 401 402 /* # of XDP queue pairs currently used by the driver */ 403 u16 xdp_queue_pairs; 404 405 /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ 406 bool xdp_enabled; 407 408 /* I like... big packets and I cannot lie! */ 409 bool big_packets; 410 411 /* number of sg entries allocated for big packets */ 412 unsigned int big_packets_num_skbfrags; 413 414 /* Host will merge rx buffers for big packets (shake it! shake it!) */ 415 bool mergeable_rx_bufs; 416 417 /* Host supports rss and/or hash report */ 418 bool has_rss; 419 bool has_rss_hash_report; 420 u8 rss_key_size; 421 u16 rss_indir_table_size; 422 u32 rss_hash_types_supported; 423 u32 rss_hash_types_saved; 424 struct virtio_net_ctrl_rss rss; 425 426 /* Has control virtqueue */ 427 bool has_cvq; 428 429 /* Lock to protect the control VQ */ 430 struct mutex cvq_lock; 431 432 /* Host can handle any s/g split between our header and packet data */ 433 bool any_header_sg; 434 435 /* Packet virtio header size */ 436 u8 hdr_len; 437 438 /* Work struct for delayed refilling if we run low on memory. */ 439 struct delayed_work refill; 440 441 /* Is delayed refill enabled? */ 442 bool refill_enabled; 443 444 /* The lock to synchronize the access to refill_enabled */ 445 spinlock_t refill_lock; 446 447 /* Work struct for config space updates */ 448 struct work_struct config_work; 449 450 /* Work struct for setting rx mode */ 451 struct work_struct rx_mode_work; 452 453 /* OK to queue work setting RX mode? */ 454 bool rx_mode_work_enabled; 455 456 /* Does the affinity hint is set for virtqueues? */ 457 bool affinity_hint_set; 458 459 /* CPU hotplug instances for online & dead */ 460 struct hlist_node node; 461 struct hlist_node node_dead; 462 463 struct control_buf *ctrl; 464 465 /* Ethtool settings */ 466 u8 duplex; 467 u32 speed; 468 469 /* Is rx dynamic interrupt moderation enabled? */ 470 bool rx_dim_enabled; 471 472 /* Interrupt coalescing settings */ 473 struct virtnet_interrupt_coalesce intr_coal_tx; 474 struct virtnet_interrupt_coalesce intr_coal_rx; 475 476 unsigned long guest_offloads; 477 unsigned long guest_offloads_capable; 478 479 /* failover when STANDBY feature enabled */ 480 struct failover *failover; 481 482 u64 device_stats_cap; 483 }; 484 485 struct padded_vnet_hdr { 486 struct virtio_net_hdr_v1_hash hdr; 487 /* 488 * hdr is in a separate sg buffer, and data sg buffer shares same page 489 * with this header sg. This padding makes next sg 16 byte aligned 490 * after the header. 491 */ 492 char padding[12]; 493 }; 494 495 struct virtio_net_common_hdr { 496 union { 497 struct virtio_net_hdr hdr; 498 struct virtio_net_hdr_mrg_rxbuf mrg_hdr; 499 struct virtio_net_hdr_v1_hash hash_v1_hdr; 500 }; 501 }; 502 503 static struct virtio_net_common_hdr xsk_hdr; 504 505 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); 506 static void virtnet_sq_free_unused_buf_done(struct virtqueue *vq); 507 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 508 struct net_device *dev, 509 unsigned int *xdp_xmit, 510 struct virtnet_rq_stats *stats); 511 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 512 struct sk_buff *skb, u8 flags); 513 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 514 struct sk_buff *curr_skb, 515 struct page *page, void *buf, 516 int len, int truesize); 517 static void virtnet_xsk_completed(struct send_queue *sq, int num); 518 519 enum virtnet_xmit_type { 520 VIRTNET_XMIT_TYPE_SKB, 521 VIRTNET_XMIT_TYPE_SKB_ORPHAN, 522 VIRTNET_XMIT_TYPE_XDP, 523 VIRTNET_XMIT_TYPE_XSK, 524 }; 525 526 static int rss_indirection_table_alloc(struct virtio_net_ctrl_rss *rss, u16 indir_table_size) 527 { 528 if (!indir_table_size) { 529 rss->indirection_table = NULL; 530 return 0; 531 } 532 533 rss->indirection_table = kmalloc_array(indir_table_size, sizeof(u16), GFP_KERNEL); 534 if (!rss->indirection_table) 535 return -ENOMEM; 536 537 return 0; 538 } 539 540 static void rss_indirection_table_free(struct virtio_net_ctrl_rss *rss) 541 { 542 kfree(rss->indirection_table); 543 } 544 545 /* We use the last two bits of the pointer to distinguish the xmit type. */ 546 #define VIRTNET_XMIT_TYPE_MASK (BIT(0) | BIT(1)) 547 548 #define VIRTIO_XSK_FLAG_OFFSET 2 549 550 static enum virtnet_xmit_type virtnet_xmit_ptr_unpack(void **ptr) 551 { 552 unsigned long p = (unsigned long)*ptr; 553 554 *ptr = (void *)(p & ~VIRTNET_XMIT_TYPE_MASK); 555 556 return p & VIRTNET_XMIT_TYPE_MASK; 557 } 558 559 static void *virtnet_xmit_ptr_pack(void *ptr, enum virtnet_xmit_type type) 560 { 561 return (void *)((unsigned long)ptr | type); 562 } 563 564 static int virtnet_add_outbuf(struct send_queue *sq, int num, void *data, 565 enum virtnet_xmit_type type) 566 { 567 return virtqueue_add_outbuf(sq->vq, sq->sg, num, 568 virtnet_xmit_ptr_pack(data, type), 569 GFP_ATOMIC); 570 } 571 572 static u32 virtnet_ptr_to_xsk_buff_len(void *ptr) 573 { 574 return ((unsigned long)ptr) >> VIRTIO_XSK_FLAG_OFFSET; 575 } 576 577 static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len) 578 { 579 sg_dma_address(sg) = addr; 580 sg_dma_len(sg) = len; 581 } 582 583 static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 584 bool in_napi, struct virtnet_sq_free_stats *stats) 585 { 586 struct xdp_frame *frame; 587 struct sk_buff *skb; 588 unsigned int len; 589 void *ptr; 590 591 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 592 switch (virtnet_xmit_ptr_unpack(&ptr)) { 593 case VIRTNET_XMIT_TYPE_SKB: 594 skb = ptr; 595 596 pr_debug("Sent skb %p\n", skb); 597 stats->napi_packets++; 598 stats->napi_bytes += skb->len; 599 napi_consume_skb(skb, in_napi); 600 break; 601 602 case VIRTNET_XMIT_TYPE_SKB_ORPHAN: 603 skb = ptr; 604 605 stats->packets++; 606 stats->bytes += skb->len; 607 napi_consume_skb(skb, in_napi); 608 break; 609 610 case VIRTNET_XMIT_TYPE_XDP: 611 frame = ptr; 612 613 stats->packets++; 614 stats->bytes += xdp_get_frame_len(frame); 615 xdp_return_frame(frame); 616 break; 617 618 case VIRTNET_XMIT_TYPE_XSK: 619 stats->bytes += virtnet_ptr_to_xsk_buff_len(ptr); 620 stats->xsk++; 621 break; 622 } 623 } 624 netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes); 625 } 626 627 static void virtnet_free_old_xmit(struct send_queue *sq, 628 struct netdev_queue *txq, 629 bool in_napi, 630 struct virtnet_sq_free_stats *stats) 631 { 632 __free_old_xmit(sq, txq, in_napi, stats); 633 634 if (stats->xsk) 635 virtnet_xsk_completed(sq, stats->xsk); 636 } 637 638 /* Converting between virtqueue no. and kernel tx/rx queue no. 639 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq 640 */ 641 static int vq2txq(struct virtqueue *vq) 642 { 643 return (vq->index - 1) / 2; 644 } 645 646 static int txq2vq(int txq) 647 { 648 return txq * 2 + 1; 649 } 650 651 static int vq2rxq(struct virtqueue *vq) 652 { 653 return vq->index / 2; 654 } 655 656 static int rxq2vq(int rxq) 657 { 658 return rxq * 2; 659 } 660 661 static int vq_type(struct virtnet_info *vi, int qid) 662 { 663 if (qid == vi->max_queue_pairs * 2) 664 return VIRTNET_Q_TYPE_CQ; 665 666 if (qid % 2) 667 return VIRTNET_Q_TYPE_TX; 668 669 return VIRTNET_Q_TYPE_RX; 670 } 671 672 static inline struct virtio_net_common_hdr * 673 skb_vnet_common_hdr(struct sk_buff *skb) 674 { 675 return (struct virtio_net_common_hdr *)skb->cb; 676 } 677 678 /* 679 * private is used to chain pages for big packets, put the whole 680 * most recent used list in the beginning for reuse 681 */ 682 static void give_pages(struct receive_queue *rq, struct page *page) 683 { 684 struct page *end; 685 686 /* Find end of list, sew whole thing into vi->rq.pages. */ 687 for (end = page; end->private; end = (struct page *)end->private); 688 end->private = (unsigned long)rq->pages; 689 rq->pages = page; 690 } 691 692 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) 693 { 694 struct page *p = rq->pages; 695 696 if (p) { 697 rq->pages = (struct page *)p->private; 698 /* clear private here, it is used to chain pages */ 699 p->private = 0; 700 } else 701 p = alloc_page(gfp_mask); 702 return p; 703 } 704 705 static void virtnet_rq_free_buf(struct virtnet_info *vi, 706 struct receive_queue *rq, void *buf) 707 { 708 if (vi->mergeable_rx_bufs) 709 put_page(virt_to_head_page(buf)); 710 else if (vi->big_packets) 711 give_pages(rq, buf); 712 else 713 put_page(virt_to_head_page(buf)); 714 } 715 716 static void enable_delayed_refill(struct virtnet_info *vi) 717 { 718 spin_lock_bh(&vi->refill_lock); 719 vi->refill_enabled = true; 720 spin_unlock_bh(&vi->refill_lock); 721 } 722 723 static void disable_delayed_refill(struct virtnet_info *vi) 724 { 725 spin_lock_bh(&vi->refill_lock); 726 vi->refill_enabled = false; 727 spin_unlock_bh(&vi->refill_lock); 728 } 729 730 static void enable_rx_mode_work(struct virtnet_info *vi) 731 { 732 rtnl_lock(); 733 vi->rx_mode_work_enabled = true; 734 rtnl_unlock(); 735 } 736 737 static void disable_rx_mode_work(struct virtnet_info *vi) 738 { 739 rtnl_lock(); 740 vi->rx_mode_work_enabled = false; 741 rtnl_unlock(); 742 } 743 744 static void virtqueue_napi_schedule(struct napi_struct *napi, 745 struct virtqueue *vq) 746 { 747 if (napi_schedule_prep(napi)) { 748 virtqueue_disable_cb(vq); 749 __napi_schedule(napi); 750 } 751 } 752 753 static bool virtqueue_napi_complete(struct napi_struct *napi, 754 struct virtqueue *vq, int processed) 755 { 756 int opaque; 757 758 opaque = virtqueue_enable_cb_prepare(vq); 759 if (napi_complete_done(napi, processed)) { 760 if (unlikely(virtqueue_poll(vq, opaque))) 761 virtqueue_napi_schedule(napi, vq); 762 else 763 return true; 764 } else { 765 virtqueue_disable_cb(vq); 766 } 767 768 return false; 769 } 770 771 static void skb_xmit_done(struct virtqueue *vq) 772 { 773 struct virtnet_info *vi = vq->vdev->priv; 774 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi; 775 776 /* Suppress further interrupts. */ 777 virtqueue_disable_cb(vq); 778 779 if (napi->weight) 780 virtqueue_napi_schedule(napi, vq); 781 else 782 /* We were probably waiting for more output buffers. */ 783 netif_wake_subqueue(vi->dev, vq2txq(vq)); 784 } 785 786 #define MRG_CTX_HEADER_SHIFT 22 787 static void *mergeable_len_to_ctx(unsigned int truesize, 788 unsigned int headroom) 789 { 790 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize); 791 } 792 793 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx) 794 { 795 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT; 796 } 797 798 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) 799 { 800 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); 801 } 802 803 static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, 804 unsigned int headroom, 805 unsigned int len) 806 { 807 struct sk_buff *skb; 808 809 skb = build_skb(buf, buflen); 810 if (unlikely(!skb)) 811 return NULL; 812 813 skb_reserve(skb, headroom); 814 skb_put(skb, len); 815 816 return skb; 817 } 818 819 /* Called from bottom half context */ 820 static struct sk_buff *page_to_skb(struct virtnet_info *vi, 821 struct receive_queue *rq, 822 struct page *page, unsigned int offset, 823 unsigned int len, unsigned int truesize, 824 unsigned int headroom) 825 { 826 struct sk_buff *skb; 827 struct virtio_net_common_hdr *hdr; 828 unsigned int copy, hdr_len, hdr_padded_len; 829 struct page *page_to_free = NULL; 830 int tailroom, shinfo_size; 831 char *p, *hdr_p, *buf; 832 833 p = page_address(page) + offset; 834 hdr_p = p; 835 836 hdr_len = vi->hdr_len; 837 if (vi->mergeable_rx_bufs) 838 hdr_padded_len = hdr_len; 839 else 840 hdr_padded_len = sizeof(struct padded_vnet_hdr); 841 842 buf = p - headroom; 843 len -= hdr_len; 844 offset += hdr_padded_len; 845 p += hdr_padded_len; 846 tailroom = truesize - headroom - hdr_padded_len - len; 847 848 shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 849 850 if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) { 851 skb = virtnet_build_skb(buf, truesize, p - buf, len); 852 if (unlikely(!skb)) 853 return NULL; 854 855 page = (struct page *)page->private; 856 if (page) 857 give_pages(rq, page); 858 goto ok; 859 } 860 861 /* copy small packet so we can reuse these pages for small data */ 862 skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); 863 if (unlikely(!skb)) 864 return NULL; 865 866 /* Copy all frame if it fits skb->head, otherwise 867 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. 868 */ 869 if (len <= skb_tailroom(skb)) 870 copy = len; 871 else 872 copy = ETH_HLEN; 873 skb_put_data(skb, p, copy); 874 875 len -= copy; 876 offset += copy; 877 878 if (vi->mergeable_rx_bufs) { 879 if (len) 880 skb_add_rx_frag(skb, 0, page, offset, len, truesize); 881 else 882 page_to_free = page; 883 goto ok; 884 } 885 886 /* 887 * Verify that we can indeed put this data into a skb. 888 * This is here to handle cases when the device erroneously 889 * tries to receive more than is possible. This is usually 890 * the case of a broken device. 891 */ 892 if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) { 893 net_dbg_ratelimited("%s: too much data\n", skb->dev->name); 894 dev_kfree_skb(skb); 895 return NULL; 896 } 897 BUG_ON(offset >= PAGE_SIZE); 898 while (len) { 899 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); 900 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, 901 frag_size, truesize); 902 len -= frag_size; 903 page = (struct page *)page->private; 904 offset = 0; 905 } 906 907 if (page) 908 give_pages(rq, page); 909 910 ok: 911 hdr = skb_vnet_common_hdr(skb); 912 memcpy(hdr, hdr_p, hdr_len); 913 if (page_to_free) 914 put_page(page_to_free); 915 916 return skb; 917 } 918 919 static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) 920 { 921 struct virtnet_info *vi = rq->vq->vdev->priv; 922 struct page *page = virt_to_head_page(buf); 923 struct virtnet_rq_dma *dma; 924 void *head; 925 int offset; 926 927 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 928 929 head = page_address(page); 930 931 dma = head; 932 933 --dma->ref; 934 935 if (dma->need_sync && len) { 936 offset = buf - (head + sizeof(*dma)); 937 938 virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr, 939 offset, len, 940 DMA_FROM_DEVICE); 941 } 942 943 if (dma->ref) 944 return; 945 946 virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len, 947 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 948 put_page(page); 949 } 950 951 static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) 952 { 953 struct virtnet_info *vi = rq->vq->vdev->priv; 954 void *buf; 955 956 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 957 958 buf = virtqueue_get_buf_ctx(rq->vq, len, ctx); 959 if (buf) 960 virtnet_rq_unmap(rq, buf, *len); 961 962 return buf; 963 } 964 965 static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) 966 { 967 struct virtnet_info *vi = rq->vq->vdev->priv; 968 struct virtnet_rq_dma *dma; 969 dma_addr_t addr; 970 u32 offset; 971 void *head; 972 973 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 974 975 head = page_address(rq->alloc_frag.page); 976 977 offset = buf - head; 978 979 dma = head; 980 981 addr = dma->addr - sizeof(*dma) + offset; 982 983 sg_init_table(rq->sg, 1); 984 sg_fill_dma(rq->sg, addr, len); 985 } 986 987 static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) 988 { 989 struct page_frag *alloc_frag = &rq->alloc_frag; 990 struct virtnet_info *vi = rq->vq->vdev->priv; 991 struct virtnet_rq_dma *dma; 992 void *buf, *head; 993 dma_addr_t addr; 994 995 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 996 997 head = page_address(alloc_frag->page); 998 999 dma = head; 1000 1001 /* new pages */ 1002 if (!alloc_frag->offset) { 1003 if (rq->last_dma) { 1004 /* Now, the new page is allocated, the last dma 1005 * will not be used. So the dma can be unmapped 1006 * if the ref is 0. 1007 */ 1008 virtnet_rq_unmap(rq, rq->last_dma, 0); 1009 rq->last_dma = NULL; 1010 } 1011 1012 dma->len = alloc_frag->size - sizeof(*dma); 1013 1014 addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1, 1015 dma->len, DMA_FROM_DEVICE, 0); 1016 if (virtqueue_dma_mapping_error(rq->vq, addr)) 1017 return NULL; 1018 1019 dma->addr = addr; 1020 dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr); 1021 1022 /* Add a reference to dma to prevent the entire dma from 1023 * being released during error handling. This reference 1024 * will be freed after the pages are no longer used. 1025 */ 1026 get_page(alloc_frag->page); 1027 dma->ref = 1; 1028 alloc_frag->offset = sizeof(*dma); 1029 1030 rq->last_dma = dma; 1031 } 1032 1033 ++dma->ref; 1034 1035 buf = head + alloc_frag->offset; 1036 1037 get_page(alloc_frag->page); 1038 alloc_frag->offset += size; 1039 1040 return buf; 1041 } 1042 1043 static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) 1044 { 1045 struct virtnet_info *vi = vq->vdev->priv; 1046 struct receive_queue *rq; 1047 int i = vq2rxq(vq); 1048 1049 rq = &vi->rq[i]; 1050 1051 if (rq->xsk_pool) { 1052 xsk_buff_free((struct xdp_buff *)buf); 1053 return; 1054 } 1055 1056 if (!vi->big_packets || vi->mergeable_rx_bufs) 1057 virtnet_rq_unmap(rq, buf, 0); 1058 1059 virtnet_rq_free_buf(vi, rq, buf); 1060 } 1061 1062 static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 1063 bool in_napi) 1064 { 1065 struct virtnet_sq_free_stats stats = {0}; 1066 1067 virtnet_free_old_xmit(sq, txq, in_napi, &stats); 1068 1069 /* Avoid overhead when no packets have been processed 1070 * happens when called speculatively from start_xmit. 1071 */ 1072 if (!stats.packets && !stats.napi_packets) 1073 return; 1074 1075 u64_stats_update_begin(&sq->stats.syncp); 1076 u64_stats_add(&sq->stats.bytes, stats.bytes + stats.napi_bytes); 1077 u64_stats_add(&sq->stats.packets, stats.packets + stats.napi_packets); 1078 u64_stats_update_end(&sq->stats.syncp); 1079 } 1080 1081 static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) 1082 { 1083 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) 1084 return false; 1085 else if (q < vi->curr_queue_pairs) 1086 return true; 1087 else 1088 return false; 1089 } 1090 1091 static bool tx_may_stop(struct virtnet_info *vi, 1092 struct net_device *dev, 1093 struct send_queue *sq) 1094 { 1095 int qnum; 1096 1097 qnum = sq - vi->sq; 1098 1099 /* If running out of space, stop queue to avoid getting packets that we 1100 * are then unable to transmit. 1101 * An alternative would be to force queuing layer to requeue the skb by 1102 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be 1103 * returned in a normal path of operation: it means that driver is not 1104 * maintaining the TX queue stop/start state properly, and causes 1105 * the stack to do a non-trivial amount of useless work. 1106 * Since most packets only take 1 or 2 ring slots, stopping the queue 1107 * early means 16 slots are typically wasted. 1108 */ 1109 if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { 1110 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1111 1112 netif_tx_stop_queue(txq); 1113 u64_stats_update_begin(&sq->stats.syncp); 1114 u64_stats_inc(&sq->stats.stop); 1115 u64_stats_update_end(&sq->stats.syncp); 1116 1117 return true; 1118 } 1119 1120 return false; 1121 } 1122 1123 static void check_sq_full_and_disable(struct virtnet_info *vi, 1124 struct net_device *dev, 1125 struct send_queue *sq) 1126 { 1127 bool use_napi = sq->napi.weight; 1128 int qnum; 1129 1130 qnum = sq - vi->sq; 1131 1132 if (tx_may_stop(vi, dev, sq)) { 1133 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1134 1135 if (use_napi) { 1136 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 1137 virtqueue_napi_schedule(&sq->napi, sq->vq); 1138 } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { 1139 /* More just got used, free them then recheck. */ 1140 free_old_xmit(sq, txq, false); 1141 if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { 1142 netif_start_subqueue(dev, qnum); 1143 u64_stats_update_begin(&sq->stats.syncp); 1144 u64_stats_inc(&sq->stats.wake); 1145 u64_stats_update_end(&sq->stats.syncp); 1146 virtqueue_disable_cb(sq->vq); 1147 } 1148 } 1149 } 1150 } 1151 1152 static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi, 1153 struct receive_queue *rq, void *buf, u32 len) 1154 { 1155 struct xdp_buff *xdp; 1156 u32 bufsize; 1157 1158 xdp = (struct xdp_buff *)buf; 1159 1160 bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len; 1161 1162 if (unlikely(len > bufsize)) { 1163 pr_debug("%s: rx error: len %u exceeds truesize %u\n", 1164 vi->dev->name, len, bufsize); 1165 DEV_STATS_INC(vi->dev, rx_length_errors); 1166 xsk_buff_free(xdp); 1167 return NULL; 1168 } 1169 1170 xsk_buff_set_size(xdp, len); 1171 xsk_buff_dma_sync_for_cpu(xdp); 1172 1173 return xdp; 1174 } 1175 1176 static struct sk_buff *xsk_construct_skb(struct receive_queue *rq, 1177 struct xdp_buff *xdp) 1178 { 1179 unsigned int metasize = xdp->data - xdp->data_meta; 1180 struct sk_buff *skb; 1181 unsigned int size; 1182 1183 size = xdp->data_end - xdp->data_hard_start; 1184 skb = napi_alloc_skb(&rq->napi, size); 1185 if (unlikely(!skb)) { 1186 xsk_buff_free(xdp); 1187 return NULL; 1188 } 1189 1190 skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); 1191 1192 size = xdp->data_end - xdp->data_meta; 1193 memcpy(__skb_put(skb, size), xdp->data_meta, size); 1194 1195 if (metasize) { 1196 __skb_pull(skb, metasize); 1197 skb_metadata_set(skb, metasize); 1198 } 1199 1200 xsk_buff_free(xdp); 1201 1202 return skb; 1203 } 1204 1205 static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi, 1206 struct receive_queue *rq, struct xdp_buff *xdp, 1207 unsigned int *xdp_xmit, 1208 struct virtnet_rq_stats *stats) 1209 { 1210 struct bpf_prog *prog; 1211 u32 ret; 1212 1213 ret = XDP_PASS; 1214 rcu_read_lock(); 1215 prog = rcu_dereference(rq->xdp_prog); 1216 if (prog) 1217 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); 1218 rcu_read_unlock(); 1219 1220 switch (ret) { 1221 case XDP_PASS: 1222 return xsk_construct_skb(rq, xdp); 1223 1224 case XDP_TX: 1225 case XDP_REDIRECT: 1226 return NULL; 1227 1228 default: 1229 /* drop packet */ 1230 xsk_buff_free(xdp); 1231 u64_stats_inc(&stats->drops); 1232 return NULL; 1233 } 1234 } 1235 1236 static void xsk_drop_follow_bufs(struct net_device *dev, 1237 struct receive_queue *rq, 1238 u32 num_buf, 1239 struct virtnet_rq_stats *stats) 1240 { 1241 struct xdp_buff *xdp; 1242 u32 len; 1243 1244 while (num_buf-- > 1) { 1245 xdp = virtqueue_get_buf(rq->vq, &len); 1246 if (unlikely(!xdp)) { 1247 pr_debug("%s: rx error: %d buffers missing\n", 1248 dev->name, num_buf); 1249 DEV_STATS_INC(dev, rx_length_errors); 1250 break; 1251 } 1252 u64_stats_add(&stats->bytes, len); 1253 xsk_buff_free(xdp); 1254 } 1255 } 1256 1257 static int xsk_append_merge_buffer(struct virtnet_info *vi, 1258 struct receive_queue *rq, 1259 struct sk_buff *head_skb, 1260 u32 num_buf, 1261 struct virtio_net_hdr_mrg_rxbuf *hdr, 1262 struct virtnet_rq_stats *stats) 1263 { 1264 struct sk_buff *curr_skb; 1265 struct xdp_buff *xdp; 1266 u32 len, truesize; 1267 struct page *page; 1268 void *buf; 1269 1270 curr_skb = head_skb; 1271 1272 while (--num_buf) { 1273 buf = virtqueue_get_buf(rq->vq, &len); 1274 if (unlikely(!buf)) { 1275 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1276 vi->dev->name, num_buf, 1277 virtio16_to_cpu(vi->vdev, 1278 hdr->num_buffers)); 1279 DEV_STATS_INC(vi->dev, rx_length_errors); 1280 return -EINVAL; 1281 } 1282 1283 u64_stats_add(&stats->bytes, len); 1284 1285 xdp = buf_to_xdp(vi, rq, buf, len); 1286 if (!xdp) 1287 goto err; 1288 1289 buf = napi_alloc_frag(len); 1290 if (!buf) { 1291 xsk_buff_free(xdp); 1292 goto err; 1293 } 1294 1295 memcpy(buf, xdp->data - vi->hdr_len, len); 1296 1297 xsk_buff_free(xdp); 1298 1299 page = virt_to_page(buf); 1300 1301 truesize = len; 1302 1303 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 1304 buf, len, truesize); 1305 if (!curr_skb) { 1306 put_page(page); 1307 goto err; 1308 } 1309 } 1310 1311 return 0; 1312 1313 err: 1314 xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats); 1315 return -EINVAL; 1316 } 1317 1318 static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi, 1319 struct receive_queue *rq, struct xdp_buff *xdp, 1320 unsigned int *xdp_xmit, 1321 struct virtnet_rq_stats *stats) 1322 { 1323 struct virtio_net_hdr_mrg_rxbuf *hdr; 1324 struct bpf_prog *prog; 1325 struct sk_buff *skb; 1326 u32 ret, num_buf; 1327 1328 hdr = xdp->data - vi->hdr_len; 1329 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 1330 1331 ret = XDP_PASS; 1332 rcu_read_lock(); 1333 prog = rcu_dereference(rq->xdp_prog); 1334 /* TODO: support multi buffer. */ 1335 if (prog && num_buf == 1) 1336 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); 1337 rcu_read_unlock(); 1338 1339 switch (ret) { 1340 case XDP_PASS: 1341 skb = xsk_construct_skb(rq, xdp); 1342 if (!skb) 1343 goto drop_bufs; 1344 1345 if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) { 1346 dev_kfree_skb(skb); 1347 goto drop; 1348 } 1349 1350 return skb; 1351 1352 case XDP_TX: 1353 case XDP_REDIRECT: 1354 return NULL; 1355 1356 default: 1357 /* drop packet */ 1358 xsk_buff_free(xdp); 1359 } 1360 1361 drop_bufs: 1362 xsk_drop_follow_bufs(dev, rq, num_buf, stats); 1363 1364 drop: 1365 u64_stats_inc(&stats->drops); 1366 return NULL; 1367 } 1368 1369 static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq, 1370 void *buf, u32 len, 1371 unsigned int *xdp_xmit, 1372 struct virtnet_rq_stats *stats) 1373 { 1374 struct net_device *dev = vi->dev; 1375 struct sk_buff *skb = NULL; 1376 struct xdp_buff *xdp; 1377 u8 flags; 1378 1379 len -= vi->hdr_len; 1380 1381 u64_stats_add(&stats->bytes, len); 1382 1383 xdp = buf_to_xdp(vi, rq, buf, len); 1384 if (!xdp) 1385 return; 1386 1387 if (unlikely(len < ETH_HLEN)) { 1388 pr_debug("%s: short packet %i\n", dev->name, len); 1389 DEV_STATS_INC(dev, rx_length_errors); 1390 xsk_buff_free(xdp); 1391 return; 1392 } 1393 1394 flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags; 1395 1396 if (!vi->mergeable_rx_bufs) 1397 skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats); 1398 else 1399 skb = virtnet_receive_xsk_merge(dev, vi, rq, xdp, xdp_xmit, stats); 1400 1401 if (skb) 1402 virtnet_receive_done(vi, rq, skb, flags); 1403 } 1404 1405 static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue *rq, 1406 struct xsk_buff_pool *pool, gfp_t gfp) 1407 { 1408 struct xdp_buff **xsk_buffs; 1409 dma_addr_t addr; 1410 int err = 0; 1411 u32 len, i; 1412 int num; 1413 1414 xsk_buffs = rq->xsk_buffs; 1415 1416 num = xsk_buff_alloc_batch(pool, xsk_buffs, rq->vq->num_free); 1417 if (!num) 1418 return -ENOMEM; 1419 1420 len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len; 1421 1422 for (i = 0; i < num; ++i) { 1423 /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space. 1424 * We assume XDP_PACKET_HEADROOM is larger than hdr->len. 1425 * (see function virtnet_xsk_pool_enable) 1426 */ 1427 addr = xsk_buff_xdp_get_dma(xsk_buffs[i]) - vi->hdr_len; 1428 1429 sg_init_table(rq->sg, 1); 1430 sg_fill_dma(rq->sg, addr, len); 1431 1432 err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, 1433 xsk_buffs[i], NULL, gfp); 1434 if (err) 1435 goto err; 1436 } 1437 1438 return num; 1439 1440 err: 1441 for (; i < num; ++i) 1442 xsk_buff_free(xsk_buffs[i]); 1443 1444 return err; 1445 } 1446 1447 static void *virtnet_xsk_to_ptr(u32 len) 1448 { 1449 unsigned long p; 1450 1451 p = len << VIRTIO_XSK_FLAG_OFFSET; 1452 1453 return virtnet_xmit_ptr_pack((void *)p, VIRTNET_XMIT_TYPE_XSK); 1454 } 1455 1456 static int virtnet_xsk_xmit_one(struct send_queue *sq, 1457 struct xsk_buff_pool *pool, 1458 struct xdp_desc *desc) 1459 { 1460 struct virtnet_info *vi; 1461 dma_addr_t addr; 1462 1463 vi = sq->vq->vdev->priv; 1464 1465 addr = xsk_buff_raw_get_dma(pool, desc->addr); 1466 xsk_buff_raw_dma_sync_for_device(pool, addr, desc->len); 1467 1468 sg_init_table(sq->sg, 2); 1469 sg_fill_dma(sq->sg, sq->xsk_hdr_dma_addr, vi->hdr_len); 1470 sg_fill_dma(sq->sg + 1, addr, desc->len); 1471 1472 return virtqueue_add_outbuf_premapped(sq->vq, sq->sg, 2, 1473 virtnet_xsk_to_ptr(desc->len), 1474 GFP_ATOMIC); 1475 } 1476 1477 static int virtnet_xsk_xmit_batch(struct send_queue *sq, 1478 struct xsk_buff_pool *pool, 1479 unsigned int budget, 1480 u64 *kicks) 1481 { 1482 struct xdp_desc *descs = pool->tx_descs; 1483 bool kick = false; 1484 u32 nb_pkts, i; 1485 int err; 1486 1487 budget = min_t(u32, budget, sq->vq->num_free); 1488 1489 nb_pkts = xsk_tx_peek_release_desc_batch(pool, budget); 1490 if (!nb_pkts) 1491 return 0; 1492 1493 for (i = 0; i < nb_pkts; i++) { 1494 err = virtnet_xsk_xmit_one(sq, pool, &descs[i]); 1495 if (unlikely(err)) { 1496 xsk_tx_completed(sq->xsk_pool, nb_pkts - i); 1497 break; 1498 } 1499 1500 kick = true; 1501 } 1502 1503 if (kick && virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 1504 (*kicks)++; 1505 1506 return i; 1507 } 1508 1509 static bool virtnet_xsk_xmit(struct send_queue *sq, struct xsk_buff_pool *pool, 1510 int budget) 1511 { 1512 struct virtnet_info *vi = sq->vq->vdev->priv; 1513 struct virtnet_sq_free_stats stats = {}; 1514 struct net_device *dev = vi->dev; 1515 u64 kicks = 0; 1516 int sent; 1517 1518 /* Avoid to wakeup napi meanless, so call __free_old_xmit instead of 1519 * free_old_xmit(). 1520 */ 1521 __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), true, &stats); 1522 1523 if (stats.xsk) 1524 xsk_tx_completed(sq->xsk_pool, stats.xsk); 1525 1526 sent = virtnet_xsk_xmit_batch(sq, pool, budget, &kicks); 1527 1528 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 1529 check_sq_full_and_disable(vi, vi->dev, sq); 1530 1531 if (sent) { 1532 struct netdev_queue *txq; 1533 1534 txq = netdev_get_tx_queue(vi->dev, sq - vi->sq); 1535 txq_trans_cond_update(txq); 1536 } 1537 1538 u64_stats_update_begin(&sq->stats.syncp); 1539 u64_stats_add(&sq->stats.packets, stats.packets); 1540 u64_stats_add(&sq->stats.bytes, stats.bytes); 1541 u64_stats_add(&sq->stats.kicks, kicks); 1542 u64_stats_add(&sq->stats.xdp_tx, sent); 1543 u64_stats_update_end(&sq->stats.syncp); 1544 1545 if (xsk_uses_need_wakeup(pool)) 1546 xsk_set_tx_need_wakeup(pool); 1547 1548 return sent; 1549 } 1550 1551 static void xsk_wakeup(struct send_queue *sq) 1552 { 1553 if (napi_if_scheduled_mark_missed(&sq->napi)) 1554 return; 1555 1556 local_bh_disable(); 1557 virtqueue_napi_schedule(&sq->napi, sq->vq); 1558 local_bh_enable(); 1559 } 1560 1561 static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag) 1562 { 1563 struct virtnet_info *vi = netdev_priv(dev); 1564 struct send_queue *sq; 1565 1566 if (!netif_running(dev)) 1567 return -ENETDOWN; 1568 1569 if (qid >= vi->curr_queue_pairs) 1570 return -EINVAL; 1571 1572 sq = &vi->sq[qid]; 1573 1574 xsk_wakeup(sq); 1575 return 0; 1576 } 1577 1578 static void virtnet_xsk_completed(struct send_queue *sq, int num) 1579 { 1580 xsk_tx_completed(sq->xsk_pool, num); 1581 1582 /* If this is called by rx poll, start_xmit and xdp xmit we should 1583 * wakeup the tx napi to consume the xsk tx queue, because the tx 1584 * interrupt may not be triggered. 1585 */ 1586 xsk_wakeup(sq); 1587 } 1588 1589 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, 1590 struct send_queue *sq, 1591 struct xdp_frame *xdpf) 1592 { 1593 struct virtio_net_hdr_mrg_rxbuf *hdr; 1594 struct skb_shared_info *shinfo; 1595 u8 nr_frags = 0; 1596 int err, i; 1597 1598 if (unlikely(xdpf->headroom < vi->hdr_len)) 1599 return -EOVERFLOW; 1600 1601 if (unlikely(xdp_frame_has_frags(xdpf))) { 1602 shinfo = xdp_get_shared_info_from_frame(xdpf); 1603 nr_frags = shinfo->nr_frags; 1604 } 1605 1606 /* In wrapping function virtnet_xdp_xmit(), we need to free 1607 * up the pending old buffers, where we need to calculate the 1608 * position of skb_shared_info in xdp_get_frame_len() and 1609 * xdp_return_frame(), which will involve to xdpf->data and 1610 * xdpf->headroom. Therefore, we need to update the value of 1611 * headroom synchronously here. 1612 */ 1613 xdpf->headroom -= vi->hdr_len; 1614 xdpf->data -= vi->hdr_len; 1615 /* Zero header and leave csum up to XDP layers */ 1616 hdr = xdpf->data; 1617 memset(hdr, 0, vi->hdr_len); 1618 xdpf->len += vi->hdr_len; 1619 1620 sg_init_table(sq->sg, nr_frags + 1); 1621 sg_set_buf(sq->sg, xdpf->data, xdpf->len); 1622 for (i = 0; i < nr_frags; i++) { 1623 skb_frag_t *frag = &shinfo->frags[i]; 1624 1625 sg_set_page(&sq->sg[i + 1], skb_frag_page(frag), 1626 skb_frag_size(frag), skb_frag_off(frag)); 1627 } 1628 1629 err = virtnet_add_outbuf(sq, nr_frags + 1, xdpf, VIRTNET_XMIT_TYPE_XDP); 1630 if (unlikely(err)) 1631 return -ENOSPC; /* Caller handle free/refcnt */ 1632 1633 return 0; 1634 } 1635 1636 /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on 1637 * the current cpu, so it does not need to be locked. 1638 * 1639 * Here we use marco instead of inline functions because we have to deal with 1640 * three issues at the same time: 1. the choice of sq. 2. judge and execute the 1641 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline 1642 * functions to perfectly solve these three problems at the same time. 1643 */ 1644 #define virtnet_xdp_get_sq(vi) ({ \ 1645 int cpu = smp_processor_id(); \ 1646 struct netdev_queue *txq; \ 1647 typeof(vi) v = (vi); \ 1648 unsigned int qp; \ 1649 \ 1650 if (v->curr_queue_pairs > nr_cpu_ids) { \ 1651 qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ 1652 qp += cpu; \ 1653 txq = netdev_get_tx_queue(v->dev, qp); \ 1654 __netif_tx_acquire(txq); \ 1655 } else { \ 1656 qp = cpu % v->curr_queue_pairs; \ 1657 txq = netdev_get_tx_queue(v->dev, qp); \ 1658 __netif_tx_lock(txq, cpu); \ 1659 } \ 1660 v->sq + qp; \ 1661 }) 1662 1663 #define virtnet_xdp_put_sq(vi, q) { \ 1664 struct netdev_queue *txq; \ 1665 typeof(vi) v = (vi); \ 1666 \ 1667 txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ 1668 if (v->curr_queue_pairs > nr_cpu_ids) \ 1669 __netif_tx_release(txq); \ 1670 else \ 1671 __netif_tx_unlock(txq); \ 1672 } 1673 1674 static int virtnet_xdp_xmit(struct net_device *dev, 1675 int n, struct xdp_frame **frames, u32 flags) 1676 { 1677 struct virtnet_info *vi = netdev_priv(dev); 1678 struct virtnet_sq_free_stats stats = {0}; 1679 struct receive_queue *rq = vi->rq; 1680 struct bpf_prog *xdp_prog; 1681 struct send_queue *sq; 1682 int nxmit = 0; 1683 int kicks = 0; 1684 int ret; 1685 int i; 1686 1687 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this 1688 * indicate XDP resources have been successfully allocated. 1689 */ 1690 xdp_prog = rcu_access_pointer(rq->xdp_prog); 1691 if (!xdp_prog) 1692 return -ENXIO; 1693 1694 sq = virtnet_xdp_get_sq(vi); 1695 1696 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { 1697 ret = -EINVAL; 1698 goto out; 1699 } 1700 1701 /* Free up any pending old buffers before queueing new ones. */ 1702 virtnet_free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), 1703 false, &stats); 1704 1705 for (i = 0; i < n; i++) { 1706 struct xdp_frame *xdpf = frames[i]; 1707 1708 if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) 1709 break; 1710 nxmit++; 1711 } 1712 ret = nxmit; 1713 1714 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 1715 check_sq_full_and_disable(vi, dev, sq); 1716 1717 if (flags & XDP_XMIT_FLUSH) { 1718 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 1719 kicks = 1; 1720 } 1721 out: 1722 u64_stats_update_begin(&sq->stats.syncp); 1723 u64_stats_add(&sq->stats.bytes, stats.bytes); 1724 u64_stats_add(&sq->stats.packets, stats.packets); 1725 u64_stats_add(&sq->stats.xdp_tx, n); 1726 u64_stats_add(&sq->stats.xdp_tx_drops, n - nxmit); 1727 u64_stats_add(&sq->stats.kicks, kicks); 1728 u64_stats_update_end(&sq->stats.syncp); 1729 1730 virtnet_xdp_put_sq(vi, sq); 1731 return ret; 1732 } 1733 1734 static void put_xdp_frags(struct xdp_buff *xdp) 1735 { 1736 struct skb_shared_info *shinfo; 1737 struct page *xdp_page; 1738 int i; 1739 1740 if (xdp_buff_has_frags(xdp)) { 1741 shinfo = xdp_get_shared_info_from_buff(xdp); 1742 for (i = 0; i < shinfo->nr_frags; i++) { 1743 xdp_page = skb_frag_page(&shinfo->frags[i]); 1744 put_page(xdp_page); 1745 } 1746 } 1747 } 1748 1749 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 1750 struct net_device *dev, 1751 unsigned int *xdp_xmit, 1752 struct virtnet_rq_stats *stats) 1753 { 1754 struct xdp_frame *xdpf; 1755 int err; 1756 u32 act; 1757 1758 act = bpf_prog_run_xdp(xdp_prog, xdp); 1759 u64_stats_inc(&stats->xdp_packets); 1760 1761 switch (act) { 1762 case XDP_PASS: 1763 return act; 1764 1765 case XDP_TX: 1766 u64_stats_inc(&stats->xdp_tx); 1767 xdpf = xdp_convert_buff_to_frame(xdp); 1768 if (unlikely(!xdpf)) { 1769 netdev_dbg(dev, "convert buff to frame failed for xdp\n"); 1770 return XDP_DROP; 1771 } 1772 1773 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); 1774 if (unlikely(!err)) { 1775 xdp_return_frame_rx_napi(xdpf); 1776 } else if (unlikely(err < 0)) { 1777 trace_xdp_exception(dev, xdp_prog, act); 1778 return XDP_DROP; 1779 } 1780 *xdp_xmit |= VIRTIO_XDP_TX; 1781 return act; 1782 1783 case XDP_REDIRECT: 1784 u64_stats_inc(&stats->xdp_redirects); 1785 err = xdp_do_redirect(dev, xdp, xdp_prog); 1786 if (err) 1787 return XDP_DROP; 1788 1789 *xdp_xmit |= VIRTIO_XDP_REDIR; 1790 return act; 1791 1792 default: 1793 bpf_warn_invalid_xdp_action(dev, xdp_prog, act); 1794 fallthrough; 1795 case XDP_ABORTED: 1796 trace_xdp_exception(dev, xdp_prog, act); 1797 fallthrough; 1798 case XDP_DROP: 1799 return XDP_DROP; 1800 } 1801 } 1802 1803 static unsigned int virtnet_get_headroom(struct virtnet_info *vi) 1804 { 1805 return vi->xdp_enabled ? XDP_PACKET_HEADROOM : 0; 1806 } 1807 1808 /* We copy the packet for XDP in the following cases: 1809 * 1810 * 1) Packet is scattered across multiple rx buffers. 1811 * 2) Headroom space is insufficient. 1812 * 1813 * This is inefficient but it's a temporary condition that 1814 * we hit right after XDP is enabled and until queue is refilled 1815 * with large buffers with sufficient headroom - so it should affect 1816 * at most queue size packets. 1817 * Afterwards, the conditions to enable 1818 * XDP should preclude the underlying device from sending packets 1819 * across multiple buffers (num_buf > 1), and we make sure buffers 1820 * have enough headroom. 1821 */ 1822 static struct page *xdp_linearize_page(struct receive_queue *rq, 1823 int *num_buf, 1824 struct page *p, 1825 int offset, 1826 int page_off, 1827 unsigned int *len) 1828 { 1829 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1830 struct page *page; 1831 1832 if (page_off + *len + tailroom > PAGE_SIZE) 1833 return NULL; 1834 1835 page = alloc_page(GFP_ATOMIC); 1836 if (!page) 1837 return NULL; 1838 1839 memcpy(page_address(page) + page_off, page_address(p) + offset, *len); 1840 page_off += *len; 1841 1842 while (--*num_buf) { 1843 unsigned int buflen; 1844 void *buf; 1845 int off; 1846 1847 buf = virtnet_rq_get_buf(rq, &buflen, NULL); 1848 if (unlikely(!buf)) 1849 goto err_buf; 1850 1851 p = virt_to_head_page(buf); 1852 off = buf - page_address(p); 1853 1854 /* guard against a misconfigured or uncooperative backend that 1855 * is sending packet larger than the MTU. 1856 */ 1857 if ((page_off + buflen + tailroom) > PAGE_SIZE) { 1858 put_page(p); 1859 goto err_buf; 1860 } 1861 1862 memcpy(page_address(page) + page_off, 1863 page_address(p) + off, buflen); 1864 page_off += buflen; 1865 put_page(p); 1866 } 1867 1868 /* Headroom does not contribute to packet length */ 1869 *len = page_off - XDP_PACKET_HEADROOM; 1870 return page; 1871 err_buf: 1872 __free_pages(page, 0); 1873 return NULL; 1874 } 1875 1876 static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi, 1877 unsigned int xdp_headroom, 1878 void *buf, 1879 unsigned int len) 1880 { 1881 unsigned int header_offset; 1882 unsigned int headroom; 1883 unsigned int buflen; 1884 struct sk_buff *skb; 1885 1886 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1887 headroom = vi->hdr_len + header_offset; 1888 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1889 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1890 1891 skb = virtnet_build_skb(buf, buflen, headroom, len); 1892 if (unlikely(!skb)) 1893 return NULL; 1894 1895 buf += header_offset; 1896 memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len); 1897 1898 return skb; 1899 } 1900 1901 static struct sk_buff *receive_small_xdp(struct net_device *dev, 1902 struct virtnet_info *vi, 1903 struct receive_queue *rq, 1904 struct bpf_prog *xdp_prog, 1905 void *buf, 1906 unsigned int xdp_headroom, 1907 unsigned int len, 1908 unsigned int *xdp_xmit, 1909 struct virtnet_rq_stats *stats) 1910 { 1911 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom; 1912 unsigned int headroom = vi->hdr_len + header_offset; 1913 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; 1914 struct page *page = virt_to_head_page(buf); 1915 struct page *xdp_page; 1916 unsigned int buflen; 1917 struct xdp_buff xdp; 1918 struct sk_buff *skb; 1919 unsigned int metasize = 0; 1920 u32 act; 1921 1922 if (unlikely(hdr->hdr.gso_type)) 1923 goto err_xdp; 1924 1925 /* Partially checksummed packets must be dropped. */ 1926 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 1927 goto err_xdp; 1928 1929 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1930 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1931 1932 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { 1933 int offset = buf - page_address(page) + header_offset; 1934 unsigned int tlen = len + vi->hdr_len; 1935 int num_buf = 1; 1936 1937 xdp_headroom = virtnet_get_headroom(vi); 1938 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1939 headroom = vi->hdr_len + header_offset; 1940 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1941 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1942 xdp_page = xdp_linearize_page(rq, &num_buf, page, 1943 offset, header_offset, 1944 &tlen); 1945 if (!xdp_page) 1946 goto err_xdp; 1947 1948 buf = page_address(xdp_page); 1949 put_page(page); 1950 page = xdp_page; 1951 } 1952 1953 xdp_init_buff(&xdp, buflen, &rq->xdp_rxq); 1954 xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len, 1955 xdp_headroom, len, true); 1956 1957 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 1958 1959 switch (act) { 1960 case XDP_PASS: 1961 /* Recalculate length in case bpf program changed it */ 1962 len = xdp.data_end - xdp.data; 1963 metasize = xdp.data - xdp.data_meta; 1964 break; 1965 1966 case XDP_TX: 1967 case XDP_REDIRECT: 1968 goto xdp_xmit; 1969 1970 default: 1971 goto err_xdp; 1972 } 1973 1974 skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len); 1975 if (unlikely(!skb)) 1976 goto err; 1977 1978 if (metasize) 1979 skb_metadata_set(skb, metasize); 1980 1981 return skb; 1982 1983 err_xdp: 1984 u64_stats_inc(&stats->xdp_drops); 1985 err: 1986 u64_stats_inc(&stats->drops); 1987 put_page(page); 1988 xdp_xmit: 1989 return NULL; 1990 } 1991 1992 static struct sk_buff *receive_small(struct net_device *dev, 1993 struct virtnet_info *vi, 1994 struct receive_queue *rq, 1995 void *buf, void *ctx, 1996 unsigned int len, 1997 unsigned int *xdp_xmit, 1998 struct virtnet_rq_stats *stats) 1999 { 2000 unsigned int xdp_headroom = (unsigned long)ctx; 2001 struct page *page = virt_to_head_page(buf); 2002 struct sk_buff *skb; 2003 2004 /* We passed the address of virtnet header to virtio-core, 2005 * so truncate the padding. 2006 */ 2007 buf -= VIRTNET_RX_PAD + xdp_headroom; 2008 2009 len -= vi->hdr_len; 2010 u64_stats_add(&stats->bytes, len); 2011 2012 if (unlikely(len > GOOD_PACKET_LEN)) { 2013 pr_debug("%s: rx error: len %u exceeds max size %d\n", 2014 dev->name, len, GOOD_PACKET_LEN); 2015 DEV_STATS_INC(dev, rx_length_errors); 2016 goto err; 2017 } 2018 2019 if (unlikely(vi->xdp_enabled)) { 2020 struct bpf_prog *xdp_prog; 2021 2022 rcu_read_lock(); 2023 xdp_prog = rcu_dereference(rq->xdp_prog); 2024 if (xdp_prog) { 2025 skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf, 2026 xdp_headroom, len, xdp_xmit, 2027 stats); 2028 rcu_read_unlock(); 2029 return skb; 2030 } 2031 rcu_read_unlock(); 2032 } 2033 2034 skb = receive_small_build_skb(vi, xdp_headroom, buf, len); 2035 if (likely(skb)) 2036 return skb; 2037 2038 err: 2039 u64_stats_inc(&stats->drops); 2040 put_page(page); 2041 return NULL; 2042 } 2043 2044 static struct sk_buff *receive_big(struct net_device *dev, 2045 struct virtnet_info *vi, 2046 struct receive_queue *rq, 2047 void *buf, 2048 unsigned int len, 2049 struct virtnet_rq_stats *stats) 2050 { 2051 struct page *page = buf; 2052 struct sk_buff *skb = 2053 page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); 2054 2055 u64_stats_add(&stats->bytes, len - vi->hdr_len); 2056 if (unlikely(!skb)) 2057 goto err; 2058 2059 return skb; 2060 2061 err: 2062 u64_stats_inc(&stats->drops); 2063 give_pages(rq, page); 2064 return NULL; 2065 } 2066 2067 static void mergeable_buf_free(struct receive_queue *rq, int num_buf, 2068 struct net_device *dev, 2069 struct virtnet_rq_stats *stats) 2070 { 2071 struct page *page; 2072 void *buf; 2073 int len; 2074 2075 while (num_buf-- > 1) { 2076 buf = virtnet_rq_get_buf(rq, &len, NULL); 2077 if (unlikely(!buf)) { 2078 pr_debug("%s: rx error: %d buffers missing\n", 2079 dev->name, num_buf); 2080 DEV_STATS_INC(dev, rx_length_errors); 2081 break; 2082 } 2083 u64_stats_add(&stats->bytes, len); 2084 page = virt_to_head_page(buf); 2085 put_page(page); 2086 } 2087 } 2088 2089 /* Why not use xdp_build_skb_from_frame() ? 2090 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in 2091 * virtio-net there are 2 points that do not match its requirements: 2092 * 1. The size of the prefilled buffer is not fixed before xdp is set. 2093 * 2. xdp_build_skb_from_frame() does more checks that we don't need, 2094 * like eth_type_trans() (which virtio-net does in receive_buf()). 2095 */ 2096 static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, 2097 struct virtnet_info *vi, 2098 struct xdp_buff *xdp, 2099 unsigned int xdp_frags_truesz) 2100 { 2101 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 2102 unsigned int headroom, data_len; 2103 struct sk_buff *skb; 2104 int metasize; 2105 u8 nr_frags; 2106 2107 if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { 2108 pr_debug("Error building skb as missing reserved tailroom for xdp"); 2109 return NULL; 2110 } 2111 2112 if (unlikely(xdp_buff_has_frags(xdp))) 2113 nr_frags = sinfo->nr_frags; 2114 2115 skb = build_skb(xdp->data_hard_start, xdp->frame_sz); 2116 if (unlikely(!skb)) 2117 return NULL; 2118 2119 headroom = xdp->data - xdp->data_hard_start; 2120 data_len = xdp->data_end - xdp->data; 2121 skb_reserve(skb, headroom); 2122 __skb_put(skb, data_len); 2123 2124 metasize = xdp->data - xdp->data_meta; 2125 metasize = metasize > 0 ? metasize : 0; 2126 if (metasize) 2127 skb_metadata_set(skb, metasize); 2128 2129 if (unlikely(xdp_buff_has_frags(xdp))) 2130 xdp_update_skb_shared_info(skb, nr_frags, 2131 sinfo->xdp_frags_size, 2132 xdp_frags_truesz, 2133 xdp_buff_is_frag_pfmemalloc(xdp)); 2134 2135 return skb; 2136 } 2137 2138 /* TODO: build xdp in big mode */ 2139 static int virtnet_build_xdp_buff_mrg(struct net_device *dev, 2140 struct virtnet_info *vi, 2141 struct receive_queue *rq, 2142 struct xdp_buff *xdp, 2143 void *buf, 2144 unsigned int len, 2145 unsigned int frame_sz, 2146 int *num_buf, 2147 unsigned int *xdp_frags_truesize, 2148 struct virtnet_rq_stats *stats) 2149 { 2150 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2151 unsigned int headroom, tailroom, room; 2152 unsigned int truesize, cur_frag_size; 2153 struct skb_shared_info *shinfo; 2154 unsigned int xdp_frags_truesz = 0; 2155 struct page *page; 2156 skb_frag_t *frag; 2157 int offset; 2158 void *ctx; 2159 2160 xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 2161 xdp_prepare_buff(xdp, buf - XDP_PACKET_HEADROOM, 2162 XDP_PACKET_HEADROOM + vi->hdr_len, len - vi->hdr_len, true); 2163 2164 if (!*num_buf) 2165 return 0; 2166 2167 if (*num_buf > 1) { 2168 /* If we want to build multi-buffer xdp, we need 2169 * to specify that the flags of xdp_buff have the 2170 * XDP_FLAGS_HAS_FRAG bit. 2171 */ 2172 if (!xdp_buff_has_frags(xdp)) 2173 xdp_buff_set_frags_flag(xdp); 2174 2175 shinfo = xdp_get_shared_info_from_buff(xdp); 2176 shinfo->nr_frags = 0; 2177 shinfo->xdp_frags_size = 0; 2178 } 2179 2180 if (*num_buf > MAX_SKB_FRAGS + 1) 2181 return -EINVAL; 2182 2183 while (--*num_buf > 0) { 2184 buf = virtnet_rq_get_buf(rq, &len, &ctx); 2185 if (unlikely(!buf)) { 2186 pr_debug("%s: rx error: %d buffers out of %d missing\n", 2187 dev->name, *num_buf, 2188 virtio16_to_cpu(vi->vdev, hdr->num_buffers)); 2189 DEV_STATS_INC(dev, rx_length_errors); 2190 goto err; 2191 } 2192 2193 u64_stats_add(&stats->bytes, len); 2194 page = virt_to_head_page(buf); 2195 offset = buf - page_address(page); 2196 2197 truesize = mergeable_ctx_to_truesize(ctx); 2198 headroom = mergeable_ctx_to_headroom(ctx); 2199 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2200 room = SKB_DATA_ALIGN(headroom + tailroom); 2201 2202 cur_frag_size = truesize; 2203 xdp_frags_truesz += cur_frag_size; 2204 if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) { 2205 put_page(page); 2206 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 2207 dev->name, len, (unsigned long)(truesize - room)); 2208 DEV_STATS_INC(dev, rx_length_errors); 2209 goto err; 2210 } 2211 2212 frag = &shinfo->frags[shinfo->nr_frags++]; 2213 skb_frag_fill_page_desc(frag, page, offset, len); 2214 if (page_is_pfmemalloc(page)) 2215 xdp_buff_set_frag_pfmemalloc(xdp); 2216 2217 shinfo->xdp_frags_size += len; 2218 } 2219 2220 *xdp_frags_truesize = xdp_frags_truesz; 2221 return 0; 2222 2223 err: 2224 put_xdp_frags(xdp); 2225 return -EINVAL; 2226 } 2227 2228 static void *mergeable_xdp_get_buf(struct virtnet_info *vi, 2229 struct receive_queue *rq, 2230 struct bpf_prog *xdp_prog, 2231 void *ctx, 2232 unsigned int *frame_sz, 2233 int *num_buf, 2234 struct page **page, 2235 int offset, 2236 unsigned int *len, 2237 struct virtio_net_hdr_mrg_rxbuf *hdr) 2238 { 2239 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2240 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2241 struct page *xdp_page; 2242 unsigned int xdp_room; 2243 2244 /* Transient failure which in theory could occur if 2245 * in-flight packets from before XDP was enabled reach 2246 * the receive path after XDP is loaded. 2247 */ 2248 if (unlikely(hdr->hdr.gso_type)) 2249 return NULL; 2250 2251 /* Partially checksummed packets must be dropped. */ 2252 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 2253 return NULL; 2254 2255 /* Now XDP core assumes frag size is PAGE_SIZE, but buffers 2256 * with headroom may add hole in truesize, which 2257 * make their length exceed PAGE_SIZE. So we disabled the 2258 * hole mechanism for xdp. See add_recvbuf_mergeable(). 2259 */ 2260 *frame_sz = truesize; 2261 2262 if (likely(headroom >= virtnet_get_headroom(vi) && 2263 (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) { 2264 return page_address(*page) + offset; 2265 } 2266 2267 /* This happens when headroom is not enough because 2268 * of the buffer was prefilled before XDP is set. 2269 * This should only happen for the first several packets. 2270 * In fact, vq reset can be used here to help us clean up 2271 * the prefilled buffers, but many existing devices do not 2272 * support it, and we don't want to bother users who are 2273 * using xdp normally. 2274 */ 2275 if (!xdp_prog->aux->xdp_has_frags) { 2276 /* linearize data for XDP */ 2277 xdp_page = xdp_linearize_page(rq, num_buf, 2278 *page, offset, 2279 XDP_PACKET_HEADROOM, 2280 len); 2281 if (!xdp_page) 2282 return NULL; 2283 } else { 2284 xdp_room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 2285 sizeof(struct skb_shared_info)); 2286 if (*len + xdp_room > PAGE_SIZE) 2287 return NULL; 2288 2289 xdp_page = alloc_page(GFP_ATOMIC); 2290 if (!xdp_page) 2291 return NULL; 2292 2293 memcpy(page_address(xdp_page) + XDP_PACKET_HEADROOM, 2294 page_address(*page) + offset, *len); 2295 } 2296 2297 *frame_sz = PAGE_SIZE; 2298 2299 put_page(*page); 2300 2301 *page = xdp_page; 2302 2303 return page_address(*page) + XDP_PACKET_HEADROOM; 2304 } 2305 2306 static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, 2307 struct virtnet_info *vi, 2308 struct receive_queue *rq, 2309 struct bpf_prog *xdp_prog, 2310 void *buf, 2311 void *ctx, 2312 unsigned int len, 2313 unsigned int *xdp_xmit, 2314 struct virtnet_rq_stats *stats) 2315 { 2316 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2317 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2318 struct page *page = virt_to_head_page(buf); 2319 int offset = buf - page_address(page); 2320 unsigned int xdp_frags_truesz = 0; 2321 struct sk_buff *head_skb; 2322 unsigned int frame_sz; 2323 struct xdp_buff xdp; 2324 void *data; 2325 u32 act; 2326 int err; 2327 2328 data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, &frame_sz, &num_buf, &page, 2329 offset, &len, hdr); 2330 if (unlikely(!data)) 2331 goto err_xdp; 2332 2333 err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz, 2334 &num_buf, &xdp_frags_truesz, stats); 2335 if (unlikely(err)) 2336 goto err_xdp; 2337 2338 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 2339 2340 switch (act) { 2341 case XDP_PASS: 2342 head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); 2343 if (unlikely(!head_skb)) 2344 break; 2345 return head_skb; 2346 2347 case XDP_TX: 2348 case XDP_REDIRECT: 2349 return NULL; 2350 2351 default: 2352 break; 2353 } 2354 2355 put_xdp_frags(&xdp); 2356 2357 err_xdp: 2358 put_page(page); 2359 mergeable_buf_free(rq, num_buf, dev, stats); 2360 2361 u64_stats_inc(&stats->xdp_drops); 2362 u64_stats_inc(&stats->drops); 2363 return NULL; 2364 } 2365 2366 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 2367 struct sk_buff *curr_skb, 2368 struct page *page, void *buf, 2369 int len, int truesize) 2370 { 2371 int num_skb_frags; 2372 int offset; 2373 2374 num_skb_frags = skb_shinfo(curr_skb)->nr_frags; 2375 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { 2376 struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); 2377 2378 if (unlikely(!nskb)) 2379 return NULL; 2380 2381 if (curr_skb == head_skb) 2382 skb_shinfo(curr_skb)->frag_list = nskb; 2383 else 2384 curr_skb->next = nskb; 2385 curr_skb = nskb; 2386 head_skb->truesize += nskb->truesize; 2387 num_skb_frags = 0; 2388 } 2389 2390 if (curr_skb != head_skb) { 2391 head_skb->data_len += len; 2392 head_skb->len += len; 2393 head_skb->truesize += truesize; 2394 } 2395 2396 offset = buf - page_address(page); 2397 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { 2398 put_page(page); 2399 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, 2400 len, truesize); 2401 } else { 2402 skb_add_rx_frag(curr_skb, num_skb_frags, page, 2403 offset, len, truesize); 2404 } 2405 2406 return curr_skb; 2407 } 2408 2409 static struct sk_buff *receive_mergeable(struct net_device *dev, 2410 struct virtnet_info *vi, 2411 struct receive_queue *rq, 2412 void *buf, 2413 void *ctx, 2414 unsigned int len, 2415 unsigned int *xdp_xmit, 2416 struct virtnet_rq_stats *stats) 2417 { 2418 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2419 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2420 struct page *page = virt_to_head_page(buf); 2421 int offset = buf - page_address(page); 2422 struct sk_buff *head_skb, *curr_skb; 2423 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2424 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2425 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2426 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 2427 2428 head_skb = NULL; 2429 u64_stats_add(&stats->bytes, len - vi->hdr_len); 2430 2431 if (unlikely(len > truesize - room)) { 2432 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 2433 dev->name, len, (unsigned long)(truesize - room)); 2434 DEV_STATS_INC(dev, rx_length_errors); 2435 goto err_skb; 2436 } 2437 2438 if (unlikely(vi->xdp_enabled)) { 2439 struct bpf_prog *xdp_prog; 2440 2441 rcu_read_lock(); 2442 xdp_prog = rcu_dereference(rq->xdp_prog); 2443 if (xdp_prog) { 2444 head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx, 2445 len, xdp_xmit, stats); 2446 rcu_read_unlock(); 2447 return head_skb; 2448 } 2449 rcu_read_unlock(); 2450 } 2451 2452 head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom); 2453 curr_skb = head_skb; 2454 2455 if (unlikely(!curr_skb)) 2456 goto err_skb; 2457 while (--num_buf) { 2458 buf = virtnet_rq_get_buf(rq, &len, &ctx); 2459 if (unlikely(!buf)) { 2460 pr_debug("%s: rx error: %d buffers out of %d missing\n", 2461 dev->name, num_buf, 2462 virtio16_to_cpu(vi->vdev, 2463 hdr->num_buffers)); 2464 DEV_STATS_INC(dev, rx_length_errors); 2465 goto err_buf; 2466 } 2467 2468 u64_stats_add(&stats->bytes, len); 2469 page = virt_to_head_page(buf); 2470 2471 truesize = mergeable_ctx_to_truesize(ctx); 2472 headroom = mergeable_ctx_to_headroom(ctx); 2473 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2474 room = SKB_DATA_ALIGN(headroom + tailroom); 2475 if (unlikely(len > truesize - room)) { 2476 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 2477 dev->name, len, (unsigned long)(truesize - room)); 2478 DEV_STATS_INC(dev, rx_length_errors); 2479 goto err_skb; 2480 } 2481 2482 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 2483 buf, len, truesize); 2484 if (!curr_skb) 2485 goto err_skb; 2486 } 2487 2488 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); 2489 return head_skb; 2490 2491 err_skb: 2492 put_page(page); 2493 mergeable_buf_free(rq, num_buf, dev, stats); 2494 2495 err_buf: 2496 u64_stats_inc(&stats->drops); 2497 dev_kfree_skb(head_skb); 2498 return NULL; 2499 } 2500 2501 static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, 2502 struct sk_buff *skb) 2503 { 2504 enum pkt_hash_types rss_hash_type; 2505 2506 if (!hdr_hash || !skb) 2507 return; 2508 2509 switch (__le16_to_cpu(hdr_hash->hash_report)) { 2510 case VIRTIO_NET_HASH_REPORT_TCPv4: 2511 case VIRTIO_NET_HASH_REPORT_UDPv4: 2512 case VIRTIO_NET_HASH_REPORT_TCPv6: 2513 case VIRTIO_NET_HASH_REPORT_UDPv6: 2514 case VIRTIO_NET_HASH_REPORT_TCPv6_EX: 2515 case VIRTIO_NET_HASH_REPORT_UDPv6_EX: 2516 rss_hash_type = PKT_HASH_TYPE_L4; 2517 break; 2518 case VIRTIO_NET_HASH_REPORT_IPv4: 2519 case VIRTIO_NET_HASH_REPORT_IPv6: 2520 case VIRTIO_NET_HASH_REPORT_IPv6_EX: 2521 rss_hash_type = PKT_HASH_TYPE_L3; 2522 break; 2523 case VIRTIO_NET_HASH_REPORT_NONE: 2524 default: 2525 rss_hash_type = PKT_HASH_TYPE_NONE; 2526 } 2527 skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type); 2528 } 2529 2530 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 2531 struct sk_buff *skb, u8 flags) 2532 { 2533 struct virtio_net_common_hdr *hdr; 2534 struct net_device *dev = vi->dev; 2535 2536 hdr = skb_vnet_common_hdr(skb); 2537 if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) 2538 virtio_skb_set_hash(&hdr->hash_v1_hdr, skb); 2539 2540 if (flags & VIRTIO_NET_HDR_F_DATA_VALID) 2541 skb->ip_summed = CHECKSUM_UNNECESSARY; 2542 2543 if (virtio_net_hdr_to_skb(skb, &hdr->hdr, 2544 virtio_is_little_endian(vi->vdev))) { 2545 net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n", 2546 dev->name, hdr->hdr.gso_type, 2547 hdr->hdr.gso_size); 2548 goto frame_err; 2549 } 2550 2551 skb_record_rx_queue(skb, vq2rxq(rq->vq)); 2552 skb->protocol = eth_type_trans(skb, dev); 2553 pr_debug("Receiving skb proto 0x%04x len %i type %i\n", 2554 ntohs(skb->protocol), skb->len, skb->pkt_type); 2555 2556 napi_gro_receive(&rq->napi, skb); 2557 return; 2558 2559 frame_err: 2560 DEV_STATS_INC(dev, rx_frame_errors); 2561 dev_kfree_skb(skb); 2562 } 2563 2564 static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, 2565 void *buf, unsigned int len, void **ctx, 2566 unsigned int *xdp_xmit, 2567 struct virtnet_rq_stats *stats) 2568 { 2569 struct net_device *dev = vi->dev; 2570 struct sk_buff *skb; 2571 u8 flags; 2572 2573 if (unlikely(len < vi->hdr_len + ETH_HLEN)) { 2574 pr_debug("%s: short packet %i\n", dev->name, len); 2575 DEV_STATS_INC(dev, rx_length_errors); 2576 virtnet_rq_free_buf(vi, rq, buf); 2577 return; 2578 } 2579 2580 /* 1. Save the flags early, as the XDP program might overwrite them. 2581 * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID 2582 * stay valid after XDP processing. 2583 * 2. XDP doesn't work with partially checksummed packets (refer to 2584 * virtnet_xdp_set()), so packets marked as 2585 * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing. 2586 */ 2587 flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; 2588 2589 if (vi->mergeable_rx_bufs) 2590 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, 2591 stats); 2592 else if (vi->big_packets) 2593 skb = receive_big(dev, vi, rq, buf, len, stats); 2594 else 2595 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); 2596 2597 if (unlikely(!skb)) 2598 return; 2599 2600 virtnet_receive_done(vi, rq, skb, flags); 2601 } 2602 2603 /* Unlike mergeable buffers, all buffers are allocated to the 2604 * same size, except for the headroom. For this reason we do 2605 * not need to use mergeable_len_to_ctx here - it is enough 2606 * to store the headroom as the context ignoring the truesize. 2607 */ 2608 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, 2609 gfp_t gfp) 2610 { 2611 char *buf; 2612 unsigned int xdp_headroom = virtnet_get_headroom(vi); 2613 void *ctx = (void *)(unsigned long)xdp_headroom; 2614 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; 2615 int err; 2616 2617 len = SKB_DATA_ALIGN(len) + 2618 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 2619 2620 if (unlikely(!skb_page_frag_refill(len, &rq->alloc_frag, gfp))) 2621 return -ENOMEM; 2622 2623 buf = virtnet_rq_alloc(rq, len, gfp); 2624 if (unlikely(!buf)) 2625 return -ENOMEM; 2626 2627 buf += VIRTNET_RX_PAD + xdp_headroom; 2628 2629 virtnet_rq_init_one_sg(rq, buf, vi->hdr_len + GOOD_PACKET_LEN); 2630 2631 err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, buf, ctx, gfp); 2632 if (err < 0) { 2633 virtnet_rq_unmap(rq, buf, 0); 2634 put_page(virt_to_head_page(buf)); 2635 } 2636 2637 return err; 2638 } 2639 2640 static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, 2641 gfp_t gfp) 2642 { 2643 struct page *first, *list = NULL; 2644 char *p; 2645 int i, err, offset; 2646 2647 sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2); 2648 2649 /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */ 2650 for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) { 2651 first = get_a_page(rq, gfp); 2652 if (!first) { 2653 if (list) 2654 give_pages(rq, list); 2655 return -ENOMEM; 2656 } 2657 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE); 2658 2659 /* chain new page in list head to match sg */ 2660 first->private = (unsigned long)list; 2661 list = first; 2662 } 2663 2664 first = get_a_page(rq, gfp); 2665 if (!first) { 2666 give_pages(rq, list); 2667 return -ENOMEM; 2668 } 2669 p = page_address(first); 2670 2671 /* rq->sg[0], rq->sg[1] share the same page */ 2672 /* a separated rq->sg[0] for header - required in case !any_header_sg */ 2673 sg_set_buf(&rq->sg[0], p, vi->hdr_len); 2674 2675 /* rq->sg[1] for data packet, from offset */ 2676 offset = sizeof(struct padded_vnet_hdr); 2677 sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset); 2678 2679 /* chain first in list head */ 2680 first->private = (unsigned long)list; 2681 err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2, 2682 first, gfp); 2683 if (err < 0) 2684 give_pages(rq, first); 2685 2686 return err; 2687 } 2688 2689 static unsigned int get_mergeable_buf_len(struct receive_queue *rq, 2690 struct ewma_pkt_len *avg_pkt_len, 2691 unsigned int room) 2692 { 2693 struct virtnet_info *vi = rq->vq->vdev->priv; 2694 const size_t hdr_len = vi->hdr_len; 2695 unsigned int len; 2696 2697 if (room) 2698 return PAGE_SIZE - room; 2699 2700 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), 2701 rq->min_buf_len, PAGE_SIZE - hdr_len); 2702 2703 return ALIGN(len, L1_CACHE_BYTES); 2704 } 2705 2706 static int add_recvbuf_mergeable(struct virtnet_info *vi, 2707 struct receive_queue *rq, gfp_t gfp) 2708 { 2709 struct page_frag *alloc_frag = &rq->alloc_frag; 2710 unsigned int headroom = virtnet_get_headroom(vi); 2711 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2712 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 2713 unsigned int len, hole; 2714 void *ctx; 2715 char *buf; 2716 int err; 2717 2718 /* Extra tailroom is needed to satisfy XDP's assumption. This 2719 * means rx frags coalescing won't work, but consider we've 2720 * disabled GSO for XDP, it won't be a big issue. 2721 */ 2722 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); 2723 2724 if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) 2725 return -ENOMEM; 2726 2727 if (!alloc_frag->offset && len + room + sizeof(struct virtnet_rq_dma) > alloc_frag->size) 2728 len -= sizeof(struct virtnet_rq_dma); 2729 2730 buf = virtnet_rq_alloc(rq, len + room, gfp); 2731 if (unlikely(!buf)) 2732 return -ENOMEM; 2733 2734 buf += headroom; /* advance address leaving hole at front of pkt */ 2735 hole = alloc_frag->size - alloc_frag->offset; 2736 if (hole < len + room) { 2737 /* To avoid internal fragmentation, if there is very likely not 2738 * enough space for another buffer, add the remaining space to 2739 * the current buffer. 2740 * XDP core assumes that frame_size of xdp_buff and the length 2741 * of the frag are PAGE_SIZE, so we disable the hole mechanism. 2742 */ 2743 if (!headroom) 2744 len += hole; 2745 alloc_frag->offset += hole; 2746 } 2747 2748 virtnet_rq_init_one_sg(rq, buf, len); 2749 2750 ctx = mergeable_len_to_ctx(len + room, headroom); 2751 err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, buf, ctx, gfp); 2752 if (err < 0) { 2753 virtnet_rq_unmap(rq, buf, 0); 2754 put_page(virt_to_head_page(buf)); 2755 } 2756 2757 return err; 2758 } 2759 2760 /* 2761 * Returns false if we couldn't fill entirely (OOM). 2762 * 2763 * Normally run in the receive path, but can also be run from ndo_open 2764 * before we're receiving packets, or from refill_work which is 2765 * careful to disable receiving (using napi_disable). 2766 */ 2767 static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, 2768 gfp_t gfp) 2769 { 2770 int err; 2771 2772 if (rq->xsk_pool) { 2773 err = virtnet_add_recvbuf_xsk(vi, rq, rq->xsk_pool, gfp); 2774 goto kick; 2775 } 2776 2777 do { 2778 if (vi->mergeable_rx_bufs) 2779 err = add_recvbuf_mergeable(vi, rq, gfp); 2780 else if (vi->big_packets) 2781 err = add_recvbuf_big(vi, rq, gfp); 2782 else 2783 err = add_recvbuf_small(vi, rq, gfp); 2784 2785 if (err) 2786 break; 2787 } while (rq->vq->num_free); 2788 2789 kick: 2790 if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { 2791 unsigned long flags; 2792 2793 flags = u64_stats_update_begin_irqsave(&rq->stats.syncp); 2794 u64_stats_inc(&rq->stats.kicks); 2795 u64_stats_update_end_irqrestore(&rq->stats.syncp, flags); 2796 } 2797 2798 return err != -ENOMEM; 2799 } 2800 2801 static void skb_recv_done(struct virtqueue *rvq) 2802 { 2803 struct virtnet_info *vi = rvq->vdev->priv; 2804 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)]; 2805 2806 rq->calls++; 2807 virtqueue_napi_schedule(&rq->napi, rvq); 2808 } 2809 2810 static void virtnet_napi_do_enable(struct virtqueue *vq, 2811 struct napi_struct *napi) 2812 { 2813 napi_enable(napi); 2814 2815 /* If all buffers were filled by other side before we napi_enabled, we 2816 * won't get another interrupt, so process any outstanding packets now. 2817 * Call local_bh_enable after to trigger softIRQ processing. 2818 */ 2819 local_bh_disable(); 2820 virtqueue_napi_schedule(napi, vq); 2821 local_bh_enable(); 2822 } 2823 2824 static void virtnet_napi_enable(struct receive_queue *rq) 2825 { 2826 struct virtnet_info *vi = rq->vq->vdev->priv; 2827 int qidx = vq2rxq(rq->vq); 2828 2829 virtnet_napi_do_enable(rq->vq, &rq->napi); 2830 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, &rq->napi); 2831 } 2832 2833 static void virtnet_napi_tx_enable(struct send_queue *sq) 2834 { 2835 struct virtnet_info *vi = sq->vq->vdev->priv; 2836 struct napi_struct *napi = &sq->napi; 2837 int qidx = vq2txq(sq->vq); 2838 2839 if (!napi->weight) 2840 return; 2841 2842 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only 2843 * enable the feature if this is likely affine with the transmit path. 2844 */ 2845 if (!vi->affinity_hint_set) { 2846 napi->weight = 0; 2847 return; 2848 } 2849 2850 virtnet_napi_do_enable(sq->vq, napi); 2851 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, napi); 2852 } 2853 2854 static void virtnet_napi_tx_disable(struct send_queue *sq) 2855 { 2856 struct virtnet_info *vi = sq->vq->vdev->priv; 2857 struct napi_struct *napi = &sq->napi; 2858 int qidx = vq2txq(sq->vq); 2859 2860 if (napi->weight) { 2861 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, NULL); 2862 napi_disable(napi); 2863 } 2864 } 2865 2866 static void virtnet_napi_disable(struct receive_queue *rq) 2867 { 2868 struct virtnet_info *vi = rq->vq->vdev->priv; 2869 struct napi_struct *napi = &rq->napi; 2870 int qidx = vq2rxq(rq->vq); 2871 2872 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, NULL); 2873 napi_disable(napi); 2874 } 2875 2876 static void refill_work(struct work_struct *work) 2877 { 2878 struct virtnet_info *vi = 2879 container_of(work, struct virtnet_info, refill.work); 2880 bool still_empty; 2881 int i; 2882 2883 for (i = 0; i < vi->curr_queue_pairs; i++) { 2884 struct receive_queue *rq = &vi->rq[i]; 2885 2886 /* 2887 * When queue API support is added in the future and the call 2888 * below becomes napi_disable_locked, this driver will need to 2889 * be refactored. 2890 * 2891 * One possible solution would be to: 2892 * - cancel refill_work with cancel_delayed_work (note: 2893 * non-sync) 2894 * - cancel refill_work with cancel_delayed_work_sync in 2895 * virtnet_remove after the netdev is unregistered 2896 * - wrap all of the work in a lock (perhaps the netdev 2897 * instance lock) 2898 * - check netif_running() and return early to avoid a race 2899 */ 2900 napi_disable(&rq->napi); 2901 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); 2902 virtnet_napi_do_enable(rq->vq, &rq->napi); 2903 2904 /* In theory, this can happen: if we don't get any buffers in 2905 * we will *never* try to fill again. 2906 */ 2907 if (still_empty) 2908 schedule_delayed_work(&vi->refill, HZ/2); 2909 } 2910 } 2911 2912 static int virtnet_receive_xsk_bufs(struct virtnet_info *vi, 2913 struct receive_queue *rq, 2914 int budget, 2915 unsigned int *xdp_xmit, 2916 struct virtnet_rq_stats *stats) 2917 { 2918 unsigned int len; 2919 int packets = 0; 2920 void *buf; 2921 2922 while (packets < budget) { 2923 buf = virtqueue_get_buf(rq->vq, &len); 2924 if (!buf) 2925 break; 2926 2927 virtnet_receive_xsk_buf(vi, rq, buf, len, xdp_xmit, stats); 2928 packets++; 2929 } 2930 2931 return packets; 2932 } 2933 2934 static int virtnet_receive_packets(struct virtnet_info *vi, 2935 struct receive_queue *rq, 2936 int budget, 2937 unsigned int *xdp_xmit, 2938 struct virtnet_rq_stats *stats) 2939 { 2940 unsigned int len; 2941 int packets = 0; 2942 void *buf; 2943 2944 if (!vi->big_packets || vi->mergeable_rx_bufs) { 2945 void *ctx; 2946 while (packets < budget && 2947 (buf = virtnet_rq_get_buf(rq, &len, &ctx))) { 2948 receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats); 2949 packets++; 2950 } 2951 } else { 2952 while (packets < budget && 2953 (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { 2954 receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats); 2955 packets++; 2956 } 2957 } 2958 2959 return packets; 2960 } 2961 2962 static int virtnet_receive(struct receive_queue *rq, int budget, 2963 unsigned int *xdp_xmit) 2964 { 2965 struct virtnet_info *vi = rq->vq->vdev->priv; 2966 struct virtnet_rq_stats stats = {}; 2967 int i, packets; 2968 2969 if (rq->xsk_pool) 2970 packets = virtnet_receive_xsk_bufs(vi, rq, budget, xdp_xmit, &stats); 2971 else 2972 packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats); 2973 2974 if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { 2975 if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { 2976 spin_lock(&vi->refill_lock); 2977 if (vi->refill_enabled) 2978 schedule_delayed_work(&vi->refill, 0); 2979 spin_unlock(&vi->refill_lock); 2980 } 2981 } 2982 2983 u64_stats_set(&stats.packets, packets); 2984 u64_stats_update_begin(&rq->stats.syncp); 2985 for (i = 0; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) { 2986 size_t offset = virtnet_rq_stats_desc[i].offset; 2987 u64_stats_t *item, *src; 2988 2989 item = (u64_stats_t *)((u8 *)&rq->stats + offset); 2990 src = (u64_stats_t *)((u8 *)&stats + offset); 2991 u64_stats_add(item, u64_stats_read(src)); 2992 } 2993 2994 u64_stats_add(&rq->stats.packets, u64_stats_read(&stats.packets)); 2995 u64_stats_add(&rq->stats.bytes, u64_stats_read(&stats.bytes)); 2996 2997 u64_stats_update_end(&rq->stats.syncp); 2998 2999 return packets; 3000 } 3001 3002 static void virtnet_poll_cleantx(struct receive_queue *rq, int budget) 3003 { 3004 struct virtnet_info *vi = rq->vq->vdev->priv; 3005 unsigned int index = vq2rxq(rq->vq); 3006 struct send_queue *sq = &vi->sq[index]; 3007 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); 3008 3009 if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) 3010 return; 3011 3012 if (__netif_tx_trylock(txq)) { 3013 if (sq->reset) { 3014 __netif_tx_unlock(txq); 3015 return; 3016 } 3017 3018 do { 3019 virtqueue_disable_cb(sq->vq); 3020 free_old_xmit(sq, txq, !!budget); 3021 } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 3022 3023 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { 3024 if (netif_tx_queue_stopped(txq)) { 3025 u64_stats_update_begin(&sq->stats.syncp); 3026 u64_stats_inc(&sq->stats.wake); 3027 u64_stats_update_end(&sq->stats.syncp); 3028 } 3029 netif_tx_wake_queue(txq); 3030 } 3031 3032 __netif_tx_unlock(txq); 3033 } 3034 } 3035 3036 static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue *rq) 3037 { 3038 struct dim_sample cur_sample = {}; 3039 3040 if (!rq->packets_in_napi) 3041 return; 3042 3043 /* Don't need protection when fetching stats, since fetcher and 3044 * updater of the stats are in same context 3045 */ 3046 dim_update_sample(rq->calls, 3047 u64_stats_read(&rq->stats.packets), 3048 u64_stats_read(&rq->stats.bytes), 3049 &cur_sample); 3050 3051 net_dim(&rq->dim, &cur_sample); 3052 rq->packets_in_napi = 0; 3053 } 3054 3055 static int virtnet_poll(struct napi_struct *napi, int budget) 3056 { 3057 struct receive_queue *rq = 3058 container_of(napi, struct receive_queue, napi); 3059 struct virtnet_info *vi = rq->vq->vdev->priv; 3060 struct send_queue *sq; 3061 unsigned int received; 3062 unsigned int xdp_xmit = 0; 3063 bool napi_complete; 3064 3065 virtnet_poll_cleantx(rq, budget); 3066 3067 received = virtnet_receive(rq, budget, &xdp_xmit); 3068 rq->packets_in_napi += received; 3069 3070 if (xdp_xmit & VIRTIO_XDP_REDIR) 3071 xdp_do_flush(); 3072 3073 /* Out of packets? */ 3074 if (received < budget) { 3075 napi_complete = virtqueue_napi_complete(napi, rq->vq, received); 3076 /* Intentionally not taking dim_lock here. This may result in a 3077 * spurious net_dim call. But if that happens virtnet_rx_dim_work 3078 * will not act on the scheduled work. 3079 */ 3080 if (napi_complete && rq->dim_enabled) 3081 virtnet_rx_dim_update(vi, rq); 3082 } 3083 3084 if (xdp_xmit & VIRTIO_XDP_TX) { 3085 sq = virtnet_xdp_get_sq(vi); 3086 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 3087 u64_stats_update_begin(&sq->stats.syncp); 3088 u64_stats_inc(&sq->stats.kicks); 3089 u64_stats_update_end(&sq->stats.syncp); 3090 } 3091 virtnet_xdp_put_sq(vi, sq); 3092 } 3093 3094 return received; 3095 } 3096 3097 static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index) 3098 { 3099 virtnet_napi_tx_disable(&vi->sq[qp_index]); 3100 virtnet_napi_disable(&vi->rq[qp_index]); 3101 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 3102 } 3103 3104 static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) 3105 { 3106 struct net_device *dev = vi->dev; 3107 int err; 3108 3109 err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index, 3110 vi->rq[qp_index].napi.napi_id); 3111 if (err < 0) 3112 return err; 3113 3114 err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq, 3115 MEM_TYPE_PAGE_SHARED, NULL); 3116 if (err < 0) 3117 goto err_xdp_reg_mem_model; 3118 3119 virtnet_napi_enable(&vi->rq[qp_index]); 3120 virtnet_napi_tx_enable(&vi->sq[qp_index]); 3121 3122 return 0; 3123 3124 err_xdp_reg_mem_model: 3125 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 3126 return err; 3127 } 3128 3129 static void virtnet_cancel_dim(struct virtnet_info *vi, struct dim *dim) 3130 { 3131 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 3132 return; 3133 net_dim_work_cancel(dim); 3134 } 3135 3136 static void virtnet_update_settings(struct virtnet_info *vi) 3137 { 3138 u32 speed; 3139 u8 duplex; 3140 3141 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) 3142 return; 3143 3144 virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); 3145 3146 if (ethtool_validate_speed(speed)) 3147 vi->speed = speed; 3148 3149 virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); 3150 3151 if (ethtool_validate_duplex(duplex)) 3152 vi->duplex = duplex; 3153 } 3154 3155 static int virtnet_open(struct net_device *dev) 3156 { 3157 struct virtnet_info *vi = netdev_priv(dev); 3158 int i, err; 3159 3160 enable_delayed_refill(vi); 3161 3162 for (i = 0; i < vi->max_queue_pairs; i++) { 3163 if (i < vi->curr_queue_pairs) 3164 /* Make sure we have some buffers: if oom use wq. */ 3165 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) 3166 schedule_delayed_work(&vi->refill, 0); 3167 3168 err = virtnet_enable_queue_pair(vi, i); 3169 if (err < 0) 3170 goto err_enable_qp; 3171 } 3172 3173 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 3174 if (vi->status & VIRTIO_NET_S_LINK_UP) 3175 netif_carrier_on(vi->dev); 3176 virtio_config_driver_enable(vi->vdev); 3177 } else { 3178 vi->status = VIRTIO_NET_S_LINK_UP; 3179 netif_carrier_on(dev); 3180 } 3181 3182 return 0; 3183 3184 err_enable_qp: 3185 disable_delayed_refill(vi); 3186 cancel_delayed_work_sync(&vi->refill); 3187 3188 for (i--; i >= 0; i--) { 3189 virtnet_disable_queue_pair(vi, i); 3190 virtnet_cancel_dim(vi, &vi->rq[i].dim); 3191 } 3192 3193 return err; 3194 } 3195 3196 static int virtnet_poll_tx(struct napi_struct *napi, int budget) 3197 { 3198 struct send_queue *sq = container_of(napi, struct send_queue, napi); 3199 struct virtnet_info *vi = sq->vq->vdev->priv; 3200 unsigned int index = vq2txq(sq->vq); 3201 struct netdev_queue *txq; 3202 int opaque, xsk_done = 0; 3203 bool done; 3204 3205 if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { 3206 /* We don't need to enable cb for XDP */ 3207 napi_complete_done(napi, 0); 3208 return 0; 3209 } 3210 3211 txq = netdev_get_tx_queue(vi->dev, index); 3212 __netif_tx_lock(txq, raw_smp_processor_id()); 3213 virtqueue_disable_cb(sq->vq); 3214 3215 if (sq->xsk_pool) 3216 xsk_done = virtnet_xsk_xmit(sq, sq->xsk_pool, budget); 3217 else 3218 free_old_xmit(sq, txq, !!budget); 3219 3220 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { 3221 if (netif_tx_queue_stopped(txq)) { 3222 u64_stats_update_begin(&sq->stats.syncp); 3223 u64_stats_inc(&sq->stats.wake); 3224 u64_stats_update_end(&sq->stats.syncp); 3225 } 3226 netif_tx_wake_queue(txq); 3227 } 3228 3229 if (xsk_done >= budget) { 3230 __netif_tx_unlock(txq); 3231 return budget; 3232 } 3233 3234 opaque = virtqueue_enable_cb_prepare(sq->vq); 3235 3236 done = napi_complete_done(napi, 0); 3237 3238 if (!done) 3239 virtqueue_disable_cb(sq->vq); 3240 3241 __netif_tx_unlock(txq); 3242 3243 if (done) { 3244 if (unlikely(virtqueue_poll(sq->vq, opaque))) { 3245 if (napi_schedule_prep(napi)) { 3246 __netif_tx_lock(txq, raw_smp_processor_id()); 3247 virtqueue_disable_cb(sq->vq); 3248 __netif_tx_unlock(txq); 3249 __napi_schedule(napi); 3250 } 3251 } 3252 } 3253 3254 return 0; 3255 } 3256 3257 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) 3258 { 3259 struct virtio_net_hdr_mrg_rxbuf *hdr; 3260 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; 3261 struct virtnet_info *vi = sq->vq->vdev->priv; 3262 int num_sg; 3263 unsigned hdr_len = vi->hdr_len; 3264 bool can_push; 3265 3266 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); 3267 3268 can_push = vi->any_header_sg && 3269 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && 3270 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; 3271 /* Even if we can, don't push here yet as this would skew 3272 * csum_start offset below. */ 3273 if (can_push) 3274 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len); 3275 else 3276 hdr = &skb_vnet_common_hdr(skb)->mrg_hdr; 3277 3278 if (virtio_net_hdr_from_skb(skb, &hdr->hdr, 3279 virtio_is_little_endian(vi->vdev), false, 3280 0)) 3281 return -EPROTO; 3282 3283 if (vi->mergeable_rx_bufs) 3284 hdr->num_buffers = 0; 3285 3286 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); 3287 if (can_push) { 3288 __skb_push(skb, hdr_len); 3289 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len); 3290 if (unlikely(num_sg < 0)) 3291 return num_sg; 3292 /* Pull header back to avoid skew in tx bytes calculations. */ 3293 __skb_pull(skb, hdr_len); 3294 } else { 3295 sg_set_buf(sq->sg, hdr, hdr_len); 3296 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); 3297 if (unlikely(num_sg < 0)) 3298 return num_sg; 3299 num_sg++; 3300 } 3301 3302 return virtnet_add_outbuf(sq, num_sg, skb, 3303 orphan ? VIRTNET_XMIT_TYPE_SKB_ORPHAN : VIRTNET_XMIT_TYPE_SKB); 3304 } 3305 3306 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) 3307 { 3308 struct virtnet_info *vi = netdev_priv(dev); 3309 int qnum = skb_get_queue_mapping(skb); 3310 struct send_queue *sq = &vi->sq[qnum]; 3311 int err; 3312 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 3313 bool xmit_more = netdev_xmit_more(); 3314 bool use_napi = sq->napi.weight; 3315 bool kick; 3316 3317 if (!use_napi) 3318 free_old_xmit(sq, txq, false); 3319 else 3320 virtqueue_disable_cb(sq->vq); 3321 3322 /* timestamp packet in software */ 3323 skb_tx_timestamp(skb); 3324 3325 /* Try to transmit */ 3326 err = xmit_skb(sq, skb, !use_napi); 3327 3328 /* This should not happen! */ 3329 if (unlikely(err)) { 3330 DEV_STATS_INC(dev, tx_fifo_errors); 3331 if (net_ratelimit()) 3332 dev_warn(&dev->dev, 3333 "Unexpected TXQ (%d) queue failure: %d\n", 3334 qnum, err); 3335 DEV_STATS_INC(dev, tx_dropped); 3336 dev_kfree_skb_any(skb); 3337 return NETDEV_TX_OK; 3338 } 3339 3340 /* Don't wait up for transmitted skbs to be freed. */ 3341 if (!use_napi) { 3342 skb_orphan(skb); 3343 nf_reset_ct(skb); 3344 } 3345 3346 if (use_napi) 3347 tx_may_stop(vi, dev, sq); 3348 else 3349 check_sq_full_and_disable(vi, dev,sq); 3350 3351 kick = use_napi ? __netdev_tx_sent_queue(txq, skb->len, xmit_more) : 3352 !xmit_more || netif_xmit_stopped(txq); 3353 if (kick) { 3354 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 3355 u64_stats_update_begin(&sq->stats.syncp); 3356 u64_stats_inc(&sq->stats.kicks); 3357 u64_stats_update_end(&sq->stats.syncp); 3358 } 3359 } 3360 3361 if (use_napi && kick && unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 3362 virtqueue_napi_schedule(&sq->napi, sq->vq); 3363 3364 return NETDEV_TX_OK; 3365 } 3366 3367 static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) 3368 { 3369 bool running = netif_running(vi->dev); 3370 3371 if (running) { 3372 virtnet_napi_disable(rq); 3373 virtnet_cancel_dim(vi, &rq->dim); 3374 } 3375 } 3376 3377 static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) 3378 { 3379 bool running = netif_running(vi->dev); 3380 3381 if (!try_fill_recv(vi, rq, GFP_KERNEL)) 3382 schedule_delayed_work(&vi->refill, 0); 3383 3384 if (running) 3385 virtnet_napi_enable(rq); 3386 } 3387 3388 static int virtnet_rx_resize(struct virtnet_info *vi, 3389 struct receive_queue *rq, u32 ring_num) 3390 { 3391 int err, qindex; 3392 3393 qindex = rq - vi->rq; 3394 3395 virtnet_rx_pause(vi, rq); 3396 3397 err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_unmap_free_buf, NULL); 3398 if (err) 3399 netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err); 3400 3401 virtnet_rx_resume(vi, rq); 3402 return err; 3403 } 3404 3405 static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq) 3406 { 3407 bool running = netif_running(vi->dev); 3408 struct netdev_queue *txq; 3409 int qindex; 3410 3411 qindex = sq - vi->sq; 3412 3413 if (running) 3414 virtnet_napi_tx_disable(sq); 3415 3416 txq = netdev_get_tx_queue(vi->dev, qindex); 3417 3418 /* 1. wait all ximt complete 3419 * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue() 3420 */ 3421 __netif_tx_lock_bh(txq); 3422 3423 /* Prevent rx poll from accessing sq. */ 3424 sq->reset = true; 3425 3426 /* Prevent the upper layer from trying to send packets. */ 3427 netif_stop_subqueue(vi->dev, qindex); 3428 3429 __netif_tx_unlock_bh(txq); 3430 } 3431 3432 static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq) 3433 { 3434 bool running = netif_running(vi->dev); 3435 struct netdev_queue *txq; 3436 int qindex; 3437 3438 qindex = sq - vi->sq; 3439 3440 txq = netdev_get_tx_queue(vi->dev, qindex); 3441 3442 __netif_tx_lock_bh(txq); 3443 sq->reset = false; 3444 netif_tx_wake_queue(txq); 3445 __netif_tx_unlock_bh(txq); 3446 3447 if (running) 3448 virtnet_napi_tx_enable(sq); 3449 } 3450 3451 static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq, 3452 u32 ring_num) 3453 { 3454 int qindex, err; 3455 3456 qindex = sq - vi->sq; 3457 3458 virtnet_tx_pause(vi, sq); 3459 3460 err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf, 3461 virtnet_sq_free_unused_buf_done); 3462 if (err) 3463 netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err); 3464 3465 virtnet_tx_resume(vi, sq); 3466 3467 return err; 3468 } 3469 3470 /* 3471 * Send command via the control virtqueue and check status. Commands 3472 * supported by the hypervisor, as indicated by feature bits, should 3473 * never fail unless improperly formatted. 3474 */ 3475 static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd, 3476 struct scatterlist *out, 3477 struct scatterlist *in) 3478 { 3479 struct scatterlist *sgs[5], hdr, stat; 3480 u32 out_num = 0, tmp, in_num = 0; 3481 bool ok; 3482 int ret; 3483 3484 /* Caller should know better */ 3485 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); 3486 3487 mutex_lock(&vi->cvq_lock); 3488 vi->ctrl->status = ~0; 3489 vi->ctrl->hdr.class = class; 3490 vi->ctrl->hdr.cmd = cmd; 3491 /* Add header */ 3492 sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr)); 3493 sgs[out_num++] = &hdr; 3494 3495 if (out) 3496 sgs[out_num++] = out; 3497 3498 /* Add return status. */ 3499 sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status)); 3500 sgs[out_num + in_num++] = &stat; 3501 3502 if (in) 3503 sgs[out_num + in_num++] = in; 3504 3505 BUG_ON(out_num + in_num > ARRAY_SIZE(sgs)); 3506 ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC); 3507 if (ret < 0) { 3508 dev_warn(&vi->vdev->dev, 3509 "Failed to add sgs for command vq: %d\n.", ret); 3510 mutex_unlock(&vi->cvq_lock); 3511 return false; 3512 } 3513 3514 if (unlikely(!virtqueue_kick(vi->cvq))) 3515 goto unlock; 3516 3517 /* Spin for a response, the kick causes an ioport write, trapping 3518 * into the hypervisor, so the request should be handled immediately. 3519 */ 3520 while (!virtqueue_get_buf(vi->cvq, &tmp) && 3521 !virtqueue_is_broken(vi->cvq)) { 3522 cond_resched(); 3523 cpu_relax(); 3524 } 3525 3526 unlock: 3527 ok = vi->ctrl->status == VIRTIO_NET_OK; 3528 mutex_unlock(&vi->cvq_lock); 3529 return ok; 3530 } 3531 3532 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, 3533 struct scatterlist *out) 3534 { 3535 return virtnet_send_command_reply(vi, class, cmd, out, NULL); 3536 } 3537 3538 static int virtnet_set_mac_address(struct net_device *dev, void *p) 3539 { 3540 struct virtnet_info *vi = netdev_priv(dev); 3541 struct virtio_device *vdev = vi->vdev; 3542 int ret; 3543 struct sockaddr *addr; 3544 struct scatterlist sg; 3545 3546 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 3547 return -EOPNOTSUPP; 3548 3549 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL); 3550 if (!addr) 3551 return -ENOMEM; 3552 3553 ret = eth_prepare_mac_addr_change(dev, addr); 3554 if (ret) 3555 goto out; 3556 3557 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 3558 sg_init_one(&sg, addr->sa_data, dev->addr_len); 3559 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3560 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 3561 dev_warn(&vdev->dev, 3562 "Failed to set mac address by vq command.\n"); 3563 ret = -EINVAL; 3564 goto out; 3565 } 3566 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 3567 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { 3568 unsigned int i; 3569 3570 /* Naturally, this has an atomicity problem. */ 3571 for (i = 0; i < dev->addr_len; i++) 3572 virtio_cwrite8(vdev, 3573 offsetof(struct virtio_net_config, mac) + 3574 i, addr->sa_data[i]); 3575 } 3576 3577 eth_commit_mac_addr_change(dev, p); 3578 ret = 0; 3579 3580 out: 3581 kfree(addr); 3582 return ret; 3583 } 3584 3585 static void virtnet_stats(struct net_device *dev, 3586 struct rtnl_link_stats64 *tot) 3587 { 3588 struct virtnet_info *vi = netdev_priv(dev); 3589 unsigned int start; 3590 int i; 3591 3592 for (i = 0; i < vi->max_queue_pairs; i++) { 3593 u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; 3594 struct receive_queue *rq = &vi->rq[i]; 3595 struct send_queue *sq = &vi->sq[i]; 3596 3597 do { 3598 start = u64_stats_fetch_begin(&sq->stats.syncp); 3599 tpackets = u64_stats_read(&sq->stats.packets); 3600 tbytes = u64_stats_read(&sq->stats.bytes); 3601 terrors = u64_stats_read(&sq->stats.tx_timeouts); 3602 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 3603 3604 do { 3605 start = u64_stats_fetch_begin(&rq->stats.syncp); 3606 rpackets = u64_stats_read(&rq->stats.packets); 3607 rbytes = u64_stats_read(&rq->stats.bytes); 3608 rdrops = u64_stats_read(&rq->stats.drops); 3609 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 3610 3611 tot->rx_packets += rpackets; 3612 tot->tx_packets += tpackets; 3613 tot->rx_bytes += rbytes; 3614 tot->tx_bytes += tbytes; 3615 tot->rx_dropped += rdrops; 3616 tot->tx_errors += terrors; 3617 } 3618 3619 tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped); 3620 tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors); 3621 tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors); 3622 tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors); 3623 } 3624 3625 static void virtnet_ack_link_announce(struct virtnet_info *vi) 3626 { 3627 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, 3628 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) 3629 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); 3630 } 3631 3632 static bool virtnet_commit_rss_command(struct virtnet_info *vi); 3633 3634 static void virtnet_rss_update_by_qpairs(struct virtnet_info *vi, u16 queue_pairs) 3635 { 3636 u32 indir_val = 0; 3637 int i = 0; 3638 3639 for (; i < vi->rss_indir_table_size; ++i) { 3640 indir_val = ethtool_rxfh_indir_default(i, queue_pairs); 3641 vi->rss.indirection_table[i] = indir_val; 3642 } 3643 vi->rss.max_tx_vq = queue_pairs; 3644 } 3645 3646 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 3647 { 3648 struct virtio_net_ctrl_mq *mq __free(kfree) = NULL; 3649 struct virtio_net_ctrl_rss old_rss; 3650 struct net_device *dev = vi->dev; 3651 struct scatterlist sg; 3652 3653 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) 3654 return 0; 3655 3656 /* Firstly check if we need update rss. Do updating if both (1) rss enabled and 3657 * (2) no user configuration. 3658 * 3659 * During rss command processing, device updates queue_pairs using rss.max_tx_vq. That is, 3660 * the device updates queue_pairs together with rss, so we can skip the sperate queue_pairs 3661 * update (VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET below) and return directly. 3662 */ 3663 if (vi->has_rss && !netif_is_rxfh_configured(dev)) { 3664 memcpy(&old_rss, &vi->rss, sizeof(old_rss)); 3665 if (rss_indirection_table_alloc(&vi->rss, vi->rss_indir_table_size)) { 3666 vi->rss.indirection_table = old_rss.indirection_table; 3667 return -ENOMEM; 3668 } 3669 3670 virtnet_rss_update_by_qpairs(vi, queue_pairs); 3671 3672 if (!virtnet_commit_rss_command(vi)) { 3673 /* restore ctrl_rss if commit_rss_command failed */ 3674 rss_indirection_table_free(&vi->rss); 3675 memcpy(&vi->rss, &old_rss, sizeof(old_rss)); 3676 3677 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d, because committing RSS failed\n", 3678 queue_pairs); 3679 return -EINVAL; 3680 } 3681 rss_indirection_table_free(&old_rss); 3682 goto succ; 3683 } 3684 3685 mq = kzalloc(sizeof(*mq), GFP_KERNEL); 3686 if (!mq) 3687 return -ENOMEM; 3688 3689 mq->virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); 3690 sg_init_one(&sg, mq, sizeof(*mq)); 3691 3692 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 3693 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { 3694 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", 3695 queue_pairs); 3696 return -EINVAL; 3697 } 3698 succ: 3699 vi->curr_queue_pairs = queue_pairs; 3700 /* virtnet_open() will refill when device is going to up. */ 3701 if (dev->flags & IFF_UP) 3702 schedule_delayed_work(&vi->refill, 0); 3703 3704 return 0; 3705 } 3706 3707 static int virtnet_close(struct net_device *dev) 3708 { 3709 struct virtnet_info *vi = netdev_priv(dev); 3710 int i; 3711 3712 /* Make sure NAPI doesn't schedule refill work */ 3713 disable_delayed_refill(vi); 3714 /* Make sure refill_work doesn't re-enable napi! */ 3715 cancel_delayed_work_sync(&vi->refill); 3716 /* Prevent the config change callback from changing carrier 3717 * after close 3718 */ 3719 virtio_config_driver_disable(vi->vdev); 3720 /* Stop getting status/speed updates: we don't care until next 3721 * open 3722 */ 3723 cancel_work_sync(&vi->config_work); 3724 3725 for (i = 0; i < vi->max_queue_pairs; i++) { 3726 virtnet_disable_queue_pair(vi, i); 3727 virtnet_cancel_dim(vi, &vi->rq[i].dim); 3728 } 3729 3730 netif_carrier_off(dev); 3731 3732 return 0; 3733 } 3734 3735 static void virtnet_rx_mode_work(struct work_struct *work) 3736 { 3737 struct virtnet_info *vi = 3738 container_of(work, struct virtnet_info, rx_mode_work); 3739 u8 *promisc_allmulti __free(kfree) = NULL; 3740 struct net_device *dev = vi->dev; 3741 struct scatterlist sg[2]; 3742 struct virtio_net_ctrl_mac *mac_data; 3743 struct netdev_hw_addr *ha; 3744 int uc_count; 3745 int mc_count; 3746 void *buf; 3747 int i; 3748 3749 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ 3750 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) 3751 return; 3752 3753 promisc_allmulti = kzalloc(sizeof(*promisc_allmulti), GFP_KERNEL); 3754 if (!promisc_allmulti) { 3755 dev_warn(&dev->dev, "Failed to set RX mode, no memory.\n"); 3756 return; 3757 } 3758 3759 rtnl_lock(); 3760 3761 *promisc_allmulti = !!(dev->flags & IFF_PROMISC); 3762 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3763 3764 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3765 VIRTIO_NET_CTRL_RX_PROMISC, sg)) 3766 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", 3767 *promisc_allmulti ? "en" : "dis"); 3768 3769 *promisc_allmulti = !!(dev->flags & IFF_ALLMULTI); 3770 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3771 3772 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3773 VIRTIO_NET_CTRL_RX_ALLMULTI, sg)) 3774 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", 3775 *promisc_allmulti ? "en" : "dis"); 3776 3777 netif_addr_lock_bh(dev); 3778 3779 uc_count = netdev_uc_count(dev); 3780 mc_count = netdev_mc_count(dev); 3781 /* MAC filter - use one buffer for both lists */ 3782 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) + 3783 (2 * sizeof(mac_data->entries)), GFP_ATOMIC); 3784 mac_data = buf; 3785 if (!buf) { 3786 netif_addr_unlock_bh(dev); 3787 rtnl_unlock(); 3788 return; 3789 } 3790 3791 sg_init_table(sg, 2); 3792 3793 /* Store the unicast list and count in the front of the buffer */ 3794 mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count); 3795 i = 0; 3796 netdev_for_each_uc_addr(ha, dev) 3797 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3798 3799 sg_set_buf(&sg[0], mac_data, 3800 sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); 3801 3802 /* multicast list and count fill the end */ 3803 mac_data = (void *)&mac_data->macs[uc_count][0]; 3804 3805 mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count); 3806 i = 0; 3807 netdev_for_each_mc_addr(ha, dev) 3808 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3809 3810 netif_addr_unlock_bh(dev); 3811 3812 sg_set_buf(&sg[1], mac_data, 3813 sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); 3814 3815 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3816 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg)) 3817 dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); 3818 3819 rtnl_unlock(); 3820 3821 kfree(buf); 3822 } 3823 3824 static void virtnet_set_rx_mode(struct net_device *dev) 3825 { 3826 struct virtnet_info *vi = netdev_priv(dev); 3827 3828 if (vi->rx_mode_work_enabled) 3829 schedule_work(&vi->rx_mode_work); 3830 } 3831 3832 static int virtnet_vlan_rx_add_vid(struct net_device *dev, 3833 __be16 proto, u16 vid) 3834 { 3835 struct virtnet_info *vi = netdev_priv(dev); 3836 __virtio16 *_vid __free(kfree) = NULL; 3837 struct scatterlist sg; 3838 3839 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3840 if (!_vid) 3841 return -ENOMEM; 3842 3843 *_vid = cpu_to_virtio16(vi->vdev, vid); 3844 sg_init_one(&sg, _vid, sizeof(*_vid)); 3845 3846 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3847 VIRTIO_NET_CTRL_VLAN_ADD, &sg)) 3848 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); 3849 return 0; 3850 } 3851 3852 static int virtnet_vlan_rx_kill_vid(struct net_device *dev, 3853 __be16 proto, u16 vid) 3854 { 3855 struct virtnet_info *vi = netdev_priv(dev); 3856 __virtio16 *_vid __free(kfree) = NULL; 3857 struct scatterlist sg; 3858 3859 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3860 if (!_vid) 3861 return -ENOMEM; 3862 3863 *_vid = cpu_to_virtio16(vi->vdev, vid); 3864 sg_init_one(&sg, _vid, sizeof(*_vid)); 3865 3866 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3867 VIRTIO_NET_CTRL_VLAN_DEL, &sg)) 3868 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); 3869 return 0; 3870 } 3871 3872 static void virtnet_clean_affinity(struct virtnet_info *vi) 3873 { 3874 int i; 3875 3876 if (vi->affinity_hint_set) { 3877 for (i = 0; i < vi->max_queue_pairs; i++) { 3878 virtqueue_set_affinity(vi->rq[i].vq, NULL); 3879 virtqueue_set_affinity(vi->sq[i].vq, NULL); 3880 } 3881 3882 vi->affinity_hint_set = false; 3883 } 3884 } 3885 3886 static void virtnet_set_affinity(struct virtnet_info *vi) 3887 { 3888 cpumask_var_t mask; 3889 int stragglers; 3890 int group_size; 3891 int i, j, cpu; 3892 int num_cpu; 3893 int stride; 3894 3895 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 3896 virtnet_clean_affinity(vi); 3897 return; 3898 } 3899 3900 num_cpu = num_online_cpus(); 3901 stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); 3902 stragglers = num_cpu >= vi->curr_queue_pairs ? 3903 num_cpu % vi->curr_queue_pairs : 3904 0; 3905 cpu = cpumask_first(cpu_online_mask); 3906 3907 for (i = 0; i < vi->curr_queue_pairs; i++) { 3908 group_size = stride + (i < stragglers ? 1 : 0); 3909 3910 for (j = 0; j < group_size; j++) { 3911 cpumask_set_cpu(cpu, mask); 3912 cpu = cpumask_next_wrap(cpu, cpu_online_mask, 3913 nr_cpu_ids, false); 3914 } 3915 virtqueue_set_affinity(vi->rq[i].vq, mask); 3916 virtqueue_set_affinity(vi->sq[i].vq, mask); 3917 __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS); 3918 cpumask_clear(mask); 3919 } 3920 3921 vi->affinity_hint_set = true; 3922 free_cpumask_var(mask); 3923 } 3924 3925 static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) 3926 { 3927 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3928 node); 3929 virtnet_set_affinity(vi); 3930 return 0; 3931 } 3932 3933 static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node) 3934 { 3935 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3936 node_dead); 3937 virtnet_set_affinity(vi); 3938 return 0; 3939 } 3940 3941 static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node) 3942 { 3943 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3944 node); 3945 3946 virtnet_clean_affinity(vi); 3947 return 0; 3948 } 3949 3950 static enum cpuhp_state virtionet_online; 3951 3952 static int virtnet_cpu_notif_add(struct virtnet_info *vi) 3953 { 3954 int ret; 3955 3956 ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node); 3957 if (ret) 3958 return ret; 3959 ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD, 3960 &vi->node_dead); 3961 if (!ret) 3962 return ret; 3963 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 3964 return ret; 3965 } 3966 3967 static void virtnet_cpu_notif_remove(struct virtnet_info *vi) 3968 { 3969 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 3970 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD, 3971 &vi->node_dead); 3972 } 3973 3974 static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi, 3975 u16 vqn, u32 max_usecs, u32 max_packets) 3976 { 3977 struct virtio_net_ctrl_coal_vq *coal_vq __free(kfree) = NULL; 3978 struct scatterlist sgs; 3979 3980 coal_vq = kzalloc(sizeof(*coal_vq), GFP_KERNEL); 3981 if (!coal_vq) 3982 return -ENOMEM; 3983 3984 coal_vq->vqn = cpu_to_le16(vqn); 3985 coal_vq->coal.max_usecs = cpu_to_le32(max_usecs); 3986 coal_vq->coal.max_packets = cpu_to_le32(max_packets); 3987 sg_init_one(&sgs, coal_vq, sizeof(*coal_vq)); 3988 3989 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 3990 VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET, 3991 &sgs)) 3992 return -EINVAL; 3993 3994 return 0; 3995 } 3996 3997 static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 3998 u16 queue, u32 max_usecs, 3999 u32 max_packets) 4000 { 4001 int err; 4002 4003 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 4004 return -EOPNOTSUPP; 4005 4006 err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), 4007 max_usecs, max_packets); 4008 if (err) 4009 return err; 4010 4011 vi->rq[queue].intr_coal.max_usecs = max_usecs; 4012 vi->rq[queue].intr_coal.max_packets = max_packets; 4013 4014 return 0; 4015 } 4016 4017 static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 4018 u16 queue, u32 max_usecs, 4019 u32 max_packets) 4020 { 4021 int err; 4022 4023 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 4024 return -EOPNOTSUPP; 4025 4026 err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), 4027 max_usecs, max_packets); 4028 if (err) 4029 return err; 4030 4031 vi->sq[queue].intr_coal.max_usecs = max_usecs; 4032 vi->sq[queue].intr_coal.max_packets = max_packets; 4033 4034 return 0; 4035 } 4036 4037 static void virtnet_get_ringparam(struct net_device *dev, 4038 struct ethtool_ringparam *ring, 4039 struct kernel_ethtool_ringparam *kernel_ring, 4040 struct netlink_ext_ack *extack) 4041 { 4042 struct virtnet_info *vi = netdev_priv(dev); 4043 4044 ring->rx_max_pending = vi->rq[0].vq->num_max; 4045 ring->tx_max_pending = vi->sq[0].vq->num_max; 4046 ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 4047 ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 4048 } 4049 4050 static int virtnet_set_ringparam(struct net_device *dev, 4051 struct ethtool_ringparam *ring, 4052 struct kernel_ethtool_ringparam *kernel_ring, 4053 struct netlink_ext_ack *extack) 4054 { 4055 struct virtnet_info *vi = netdev_priv(dev); 4056 u32 rx_pending, tx_pending; 4057 struct receive_queue *rq; 4058 struct send_queue *sq; 4059 int i, err; 4060 4061 if (ring->rx_mini_pending || ring->rx_jumbo_pending) 4062 return -EINVAL; 4063 4064 rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 4065 tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 4066 4067 if (ring->rx_pending == rx_pending && 4068 ring->tx_pending == tx_pending) 4069 return 0; 4070 4071 if (ring->rx_pending > vi->rq[0].vq->num_max) 4072 return -EINVAL; 4073 4074 if (ring->tx_pending > vi->sq[0].vq->num_max) 4075 return -EINVAL; 4076 4077 for (i = 0; i < vi->max_queue_pairs; i++) { 4078 rq = vi->rq + i; 4079 sq = vi->sq + i; 4080 4081 if (ring->tx_pending != tx_pending) { 4082 err = virtnet_tx_resize(vi, sq, ring->tx_pending); 4083 if (err) 4084 return err; 4085 4086 /* Upon disabling and re-enabling a transmit virtqueue, the device must 4087 * set the coalescing parameters of the virtqueue to those configured 4088 * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver 4089 * did not set any TX coalescing parameters, to 0. 4090 */ 4091 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i, 4092 vi->intr_coal_tx.max_usecs, 4093 vi->intr_coal_tx.max_packets); 4094 4095 /* Don't break the tx resize action if the vq coalescing is not 4096 * supported. The same is true for rx resize below. 4097 */ 4098 if (err && err != -EOPNOTSUPP) 4099 return err; 4100 } 4101 4102 if (ring->rx_pending != rx_pending) { 4103 err = virtnet_rx_resize(vi, rq, ring->rx_pending); 4104 if (err) 4105 return err; 4106 4107 /* The reason is same as the transmit virtqueue reset */ 4108 mutex_lock(&vi->rq[i].dim_lock); 4109 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, i, 4110 vi->intr_coal_rx.max_usecs, 4111 vi->intr_coal_rx.max_packets); 4112 mutex_unlock(&vi->rq[i].dim_lock); 4113 if (err && err != -EOPNOTSUPP) 4114 return err; 4115 } 4116 } 4117 4118 return 0; 4119 } 4120 4121 static bool virtnet_commit_rss_command(struct virtnet_info *vi) 4122 { 4123 struct net_device *dev = vi->dev; 4124 struct scatterlist sgs[4]; 4125 unsigned int sg_buf_size; 4126 4127 /* prepare sgs */ 4128 sg_init_table(sgs, 4); 4129 4130 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, hash_cfg_reserved); 4131 sg_set_buf(&sgs[0], &vi->rss, sg_buf_size); 4132 4133 if (vi->has_rss) { 4134 sg_buf_size = sizeof(uint16_t) * vi->rss_indir_table_size; 4135 sg_set_buf(&sgs[1], vi->rss.indirection_table, sg_buf_size); 4136 } else { 4137 sg_set_buf(&sgs[1], &vi->rss.hash_cfg_reserved, sizeof(uint16_t)); 4138 } 4139 4140 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key) 4141 - offsetof(struct virtio_net_ctrl_rss, max_tx_vq); 4142 sg_set_buf(&sgs[2], &vi->rss.max_tx_vq, sg_buf_size); 4143 4144 sg_buf_size = vi->rss_key_size; 4145 sg_set_buf(&sgs[3], vi->rss.key, sg_buf_size); 4146 4147 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 4148 vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG 4149 : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) 4150 goto err; 4151 4152 return true; 4153 4154 err: 4155 dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n"); 4156 return false; 4157 4158 } 4159 4160 static void virtnet_init_default_rss(struct virtnet_info *vi) 4161 { 4162 vi->rss.hash_types = vi->rss_hash_types_supported; 4163 vi->rss_hash_types_saved = vi->rss_hash_types_supported; 4164 vi->rss.indirection_table_mask = vi->rss_indir_table_size 4165 ? vi->rss_indir_table_size - 1 : 0; 4166 vi->rss.unclassified_queue = 0; 4167 4168 virtnet_rss_update_by_qpairs(vi, vi->curr_queue_pairs); 4169 4170 vi->rss.hash_key_length = vi->rss_key_size; 4171 4172 netdev_rss_key_fill(vi->rss.key, vi->rss_key_size); 4173 } 4174 4175 static void virtnet_get_hashflow(const struct virtnet_info *vi, struct ethtool_rxnfc *info) 4176 { 4177 info->data = 0; 4178 switch (info->flow_type) { 4179 case TCP_V4_FLOW: 4180 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 4181 info->data = RXH_IP_SRC | RXH_IP_DST | 4182 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4183 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 4184 info->data = RXH_IP_SRC | RXH_IP_DST; 4185 } 4186 break; 4187 case TCP_V6_FLOW: 4188 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 4189 info->data = RXH_IP_SRC | RXH_IP_DST | 4190 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4191 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 4192 info->data = RXH_IP_SRC | RXH_IP_DST; 4193 } 4194 break; 4195 case UDP_V4_FLOW: 4196 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 4197 info->data = RXH_IP_SRC | RXH_IP_DST | 4198 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4199 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 4200 info->data = RXH_IP_SRC | RXH_IP_DST; 4201 } 4202 break; 4203 case UDP_V6_FLOW: 4204 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 4205 info->data = RXH_IP_SRC | RXH_IP_DST | 4206 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4207 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 4208 info->data = RXH_IP_SRC | RXH_IP_DST; 4209 } 4210 break; 4211 case IPV4_FLOW: 4212 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) 4213 info->data = RXH_IP_SRC | RXH_IP_DST; 4214 4215 break; 4216 case IPV6_FLOW: 4217 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) 4218 info->data = RXH_IP_SRC | RXH_IP_DST; 4219 4220 break; 4221 default: 4222 info->data = 0; 4223 break; 4224 } 4225 } 4226 4227 static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc *info) 4228 { 4229 u32 new_hashtypes = vi->rss_hash_types_saved; 4230 bool is_disable = info->data & RXH_DISCARD; 4231 bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3); 4232 4233 /* supports only 'sd', 'sdfn' and 'r' */ 4234 if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable)) 4235 return false; 4236 4237 switch (info->flow_type) { 4238 case TCP_V4_FLOW: 4239 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4); 4240 if (!is_disable) 4241 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 4242 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0); 4243 break; 4244 case UDP_V4_FLOW: 4245 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4); 4246 if (!is_disable) 4247 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 4248 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0); 4249 break; 4250 case IPV4_FLOW: 4251 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4; 4252 if (!is_disable) 4253 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4; 4254 break; 4255 case TCP_V6_FLOW: 4256 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6); 4257 if (!is_disable) 4258 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 4259 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0); 4260 break; 4261 case UDP_V6_FLOW: 4262 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6); 4263 if (!is_disable) 4264 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 4265 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0); 4266 break; 4267 case IPV6_FLOW: 4268 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6; 4269 if (!is_disable) 4270 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6; 4271 break; 4272 default: 4273 /* unsupported flow */ 4274 return false; 4275 } 4276 4277 /* if unsupported hashtype was set */ 4278 if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported)) 4279 return false; 4280 4281 if (new_hashtypes != vi->rss_hash_types_saved) { 4282 vi->rss_hash_types_saved = new_hashtypes; 4283 vi->rss.hash_types = vi->rss_hash_types_saved; 4284 if (vi->dev->features & NETIF_F_RXHASH) 4285 return virtnet_commit_rss_command(vi); 4286 } 4287 4288 return true; 4289 } 4290 4291 static void virtnet_get_drvinfo(struct net_device *dev, 4292 struct ethtool_drvinfo *info) 4293 { 4294 struct virtnet_info *vi = netdev_priv(dev); 4295 struct virtio_device *vdev = vi->vdev; 4296 4297 strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); 4298 strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version)); 4299 strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info)); 4300 4301 } 4302 4303 /* TODO: Eliminate OOO packets during switching */ 4304 static int virtnet_set_channels(struct net_device *dev, 4305 struct ethtool_channels *channels) 4306 { 4307 struct virtnet_info *vi = netdev_priv(dev); 4308 u16 queue_pairs = channels->combined_count; 4309 int err; 4310 4311 /* We don't support separate rx/tx channels. 4312 * We don't allow setting 'other' channels. 4313 */ 4314 if (channels->rx_count || channels->tx_count || channels->other_count) 4315 return -EINVAL; 4316 4317 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) 4318 return -EINVAL; 4319 4320 /* For now we don't support modifying channels while XDP is loaded 4321 * also when XDP is loaded all RX queues have XDP programs so we only 4322 * need to check a single RX queue. 4323 */ 4324 if (vi->rq[0].xdp_prog) 4325 return -EINVAL; 4326 4327 cpus_read_lock(); 4328 err = virtnet_set_queues(vi, queue_pairs); 4329 if (err) { 4330 cpus_read_unlock(); 4331 goto err; 4332 } 4333 virtnet_set_affinity(vi); 4334 cpus_read_unlock(); 4335 4336 netif_set_real_num_tx_queues(dev, queue_pairs); 4337 netif_set_real_num_rx_queues(dev, queue_pairs); 4338 err: 4339 return err; 4340 } 4341 4342 static void virtnet_stats_sprintf(u8 **p, const char *fmt, const char *noq_fmt, 4343 int num, int qid, const struct virtnet_stat_desc *desc) 4344 { 4345 int i; 4346 4347 if (qid < 0) { 4348 for (i = 0; i < num; ++i) 4349 ethtool_sprintf(p, noq_fmt, desc[i].desc); 4350 } else { 4351 for (i = 0; i < num; ++i) 4352 ethtool_sprintf(p, fmt, qid, desc[i].desc); 4353 } 4354 } 4355 4356 /* qid == -1: for rx/tx queue total field */ 4357 static void virtnet_get_stats_string(struct virtnet_info *vi, int type, int qid, u8 **data) 4358 { 4359 const struct virtnet_stat_desc *desc; 4360 const char *fmt, *noq_fmt; 4361 u8 *p = *data; 4362 u32 num; 4363 4364 if (type == VIRTNET_Q_TYPE_CQ && qid >= 0) { 4365 noq_fmt = "cq_hw_%s"; 4366 4367 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4368 desc = &virtnet_stats_cvq_desc[0]; 4369 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4370 4371 virtnet_stats_sprintf(&p, NULL, noq_fmt, num, -1, desc); 4372 } 4373 } 4374 4375 if (type == VIRTNET_Q_TYPE_RX) { 4376 fmt = "rx%u_%s"; 4377 noq_fmt = "rx_%s"; 4378 4379 desc = &virtnet_rq_stats_desc[0]; 4380 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4381 4382 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4383 4384 fmt = "rx%u_hw_%s"; 4385 noq_fmt = "rx_hw_%s"; 4386 4387 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4388 desc = &virtnet_stats_rx_basic_desc[0]; 4389 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4390 4391 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4392 } 4393 4394 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4395 desc = &virtnet_stats_rx_csum_desc[0]; 4396 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4397 4398 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4399 } 4400 4401 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4402 desc = &virtnet_stats_rx_speed_desc[0]; 4403 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4404 4405 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4406 } 4407 } 4408 4409 if (type == VIRTNET_Q_TYPE_TX) { 4410 fmt = "tx%u_%s"; 4411 noq_fmt = "tx_%s"; 4412 4413 desc = &virtnet_sq_stats_desc[0]; 4414 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4415 4416 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4417 4418 fmt = "tx%u_hw_%s"; 4419 noq_fmt = "tx_hw_%s"; 4420 4421 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4422 desc = &virtnet_stats_tx_basic_desc[0]; 4423 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4424 4425 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4426 } 4427 4428 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4429 desc = &virtnet_stats_tx_gso_desc[0]; 4430 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4431 4432 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4433 } 4434 4435 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4436 desc = &virtnet_stats_tx_speed_desc[0]; 4437 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4438 4439 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4440 } 4441 } 4442 4443 *data = p; 4444 } 4445 4446 struct virtnet_stats_ctx { 4447 /* The stats are write to qstats or ethtool -S */ 4448 bool to_qstat; 4449 4450 /* Used to calculate the offset inside the output buffer. */ 4451 u32 desc_num[3]; 4452 4453 /* The actual supported stat types. */ 4454 u64 bitmap[3]; 4455 4456 /* Used to calculate the reply buffer size. */ 4457 u32 size[3]; 4458 4459 /* Record the output buffer. */ 4460 u64 *data; 4461 }; 4462 4463 static void virtnet_stats_ctx_init(struct virtnet_info *vi, 4464 struct virtnet_stats_ctx *ctx, 4465 u64 *data, bool to_qstat) 4466 { 4467 u32 queue_type; 4468 4469 ctx->data = data; 4470 ctx->to_qstat = to_qstat; 4471 4472 if (to_qstat) { 4473 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4474 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4475 4476 queue_type = VIRTNET_Q_TYPE_RX; 4477 4478 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4479 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4480 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4481 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4482 } 4483 4484 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4485 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4486 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4487 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4488 } 4489 4490 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4491 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_GSO; 4492 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4493 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_gso); 4494 } 4495 4496 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4497 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4498 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4499 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4500 } 4501 4502 queue_type = VIRTNET_Q_TYPE_TX; 4503 4504 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4505 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4506 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4507 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4508 } 4509 4510 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4511 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_CSUM; 4512 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4513 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_csum); 4514 } 4515 4516 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4517 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4518 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4519 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4520 } 4521 4522 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4523 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4524 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4525 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4526 } 4527 4528 return; 4529 } 4530 4531 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc); 4532 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc); 4533 4534 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4535 queue_type = VIRTNET_Q_TYPE_CQ; 4536 4537 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_CVQ; 4538 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_cvq_desc); 4539 ctx->size[queue_type] += sizeof(struct virtio_net_stats_cvq); 4540 } 4541 4542 queue_type = VIRTNET_Q_TYPE_RX; 4543 4544 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4545 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4546 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4547 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4548 } 4549 4550 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4551 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4552 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4553 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4554 } 4555 4556 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4557 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4558 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4559 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4560 } 4561 4562 queue_type = VIRTNET_Q_TYPE_TX; 4563 4564 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4565 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4566 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4567 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4568 } 4569 4570 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4571 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4572 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4573 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4574 } 4575 4576 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4577 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4578 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4579 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4580 } 4581 } 4582 4583 /* stats_sum_queue - Calculate the sum of the same fields in sq or rq. 4584 * @sum: the position to store the sum values 4585 * @num: field num 4586 * @q_value: the first queue fields 4587 * @q_num: number of the queues 4588 */ 4589 static void stats_sum_queue(u64 *sum, u32 num, u64 *q_value, u32 q_num) 4590 { 4591 u32 step = num; 4592 int i, j; 4593 u64 *p; 4594 4595 for (i = 0; i < num; ++i) { 4596 p = sum + i; 4597 *p = 0; 4598 4599 for (j = 0; j < q_num; ++j) 4600 *p += *(q_value + i + j * step); 4601 } 4602 } 4603 4604 static void virtnet_fill_total_fields(struct virtnet_info *vi, 4605 struct virtnet_stats_ctx *ctx) 4606 { 4607 u64 *data, *first_rx_q, *first_tx_q; 4608 u32 num_cq, num_rx, num_tx; 4609 4610 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4611 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4612 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4613 4614 first_rx_q = ctx->data + num_rx + num_tx + num_cq; 4615 first_tx_q = first_rx_q + vi->curr_queue_pairs * num_rx; 4616 4617 data = ctx->data; 4618 4619 stats_sum_queue(data, num_rx, first_rx_q, vi->curr_queue_pairs); 4620 4621 data = ctx->data + num_rx; 4622 4623 stats_sum_queue(data, num_tx, first_tx_q, vi->curr_queue_pairs); 4624 } 4625 4626 static void virtnet_fill_stats_qstat(struct virtnet_info *vi, u32 qid, 4627 struct virtnet_stats_ctx *ctx, 4628 const u8 *base, bool drv_stats, u8 reply_type) 4629 { 4630 const struct virtnet_stat_desc *desc; 4631 const u64_stats_t *v_stat; 4632 u64 offset, bitmap; 4633 const __le64 *v; 4634 u32 queue_type; 4635 int i, num; 4636 4637 queue_type = vq_type(vi, qid); 4638 bitmap = ctx->bitmap[queue_type]; 4639 4640 if (drv_stats) { 4641 if (queue_type == VIRTNET_Q_TYPE_RX) { 4642 desc = &virtnet_rq_stats_desc_qstat[0]; 4643 num = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4644 } else { 4645 desc = &virtnet_sq_stats_desc_qstat[0]; 4646 num = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4647 } 4648 4649 for (i = 0; i < num; ++i) { 4650 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4651 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4652 ctx->data[offset] = u64_stats_read(v_stat); 4653 } 4654 return; 4655 } 4656 4657 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4658 desc = &virtnet_stats_rx_basic_desc_qstat[0]; 4659 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4660 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4661 goto found; 4662 } 4663 4664 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4665 desc = &virtnet_stats_rx_csum_desc_qstat[0]; 4666 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4667 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4668 goto found; 4669 } 4670 4671 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4672 desc = &virtnet_stats_rx_gso_desc_qstat[0]; 4673 num = ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4674 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO) 4675 goto found; 4676 } 4677 4678 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4679 desc = &virtnet_stats_rx_speed_desc_qstat[0]; 4680 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4681 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4682 goto found; 4683 } 4684 4685 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4686 desc = &virtnet_stats_tx_basic_desc_qstat[0]; 4687 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4688 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4689 goto found; 4690 } 4691 4692 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4693 desc = &virtnet_stats_tx_csum_desc_qstat[0]; 4694 num = ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4695 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM) 4696 goto found; 4697 } 4698 4699 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4700 desc = &virtnet_stats_tx_gso_desc_qstat[0]; 4701 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4702 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4703 goto found; 4704 } 4705 4706 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4707 desc = &virtnet_stats_tx_speed_desc_qstat[0]; 4708 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4709 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4710 goto found; 4711 } 4712 4713 return; 4714 4715 found: 4716 for (i = 0; i < num; ++i) { 4717 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4718 v = (const __le64 *)(base + desc[i].offset); 4719 ctx->data[offset] = le64_to_cpu(*v); 4720 } 4721 } 4722 4723 /* virtnet_fill_stats - copy the stats to qstats or ethtool -S 4724 * The stats source is the device or the driver. 4725 * 4726 * @vi: virtio net info 4727 * @qid: the vq id 4728 * @ctx: stats ctx (initiated by virtnet_stats_ctx_init()) 4729 * @base: pointer to the device reply or the driver stats structure. 4730 * @drv_stats: designate the base type (device reply, driver stats) 4731 * @type: the type of the device reply (if drv_stats is true, this must be zero) 4732 */ 4733 static void virtnet_fill_stats(struct virtnet_info *vi, u32 qid, 4734 struct virtnet_stats_ctx *ctx, 4735 const u8 *base, bool drv_stats, u8 reply_type) 4736 { 4737 u32 queue_type, num_rx, num_tx, num_cq; 4738 const struct virtnet_stat_desc *desc; 4739 const u64_stats_t *v_stat; 4740 u64 offset, bitmap; 4741 const __le64 *v; 4742 int i, num; 4743 4744 if (ctx->to_qstat) 4745 return virtnet_fill_stats_qstat(vi, qid, ctx, base, drv_stats, reply_type); 4746 4747 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4748 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4749 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4750 4751 queue_type = vq_type(vi, qid); 4752 bitmap = ctx->bitmap[queue_type]; 4753 4754 /* skip the total fields of pairs */ 4755 offset = num_rx + num_tx; 4756 4757 if (queue_type == VIRTNET_Q_TYPE_TX) { 4758 offset += num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2); 4759 4760 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4761 if (drv_stats) { 4762 desc = &virtnet_sq_stats_desc[0]; 4763 goto drv_stats; 4764 } 4765 4766 offset += num; 4767 4768 } else if (queue_type == VIRTNET_Q_TYPE_RX) { 4769 offset += num_cq + num_rx * (qid / 2); 4770 4771 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4772 if (drv_stats) { 4773 desc = &virtnet_rq_stats_desc[0]; 4774 goto drv_stats; 4775 } 4776 4777 offset += num; 4778 } 4779 4780 if (bitmap & VIRTIO_NET_STATS_TYPE_CVQ) { 4781 desc = &virtnet_stats_cvq_desc[0]; 4782 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4783 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_CVQ) 4784 goto found; 4785 4786 offset += num; 4787 } 4788 4789 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4790 desc = &virtnet_stats_rx_basic_desc[0]; 4791 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4792 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4793 goto found; 4794 4795 offset += num; 4796 } 4797 4798 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4799 desc = &virtnet_stats_rx_csum_desc[0]; 4800 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4801 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4802 goto found; 4803 4804 offset += num; 4805 } 4806 4807 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4808 desc = &virtnet_stats_rx_speed_desc[0]; 4809 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4810 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4811 goto found; 4812 4813 offset += num; 4814 } 4815 4816 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4817 desc = &virtnet_stats_tx_basic_desc[0]; 4818 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4819 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4820 goto found; 4821 4822 offset += num; 4823 } 4824 4825 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4826 desc = &virtnet_stats_tx_gso_desc[0]; 4827 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4828 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4829 goto found; 4830 4831 offset += num; 4832 } 4833 4834 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4835 desc = &virtnet_stats_tx_speed_desc[0]; 4836 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4837 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4838 goto found; 4839 4840 offset += num; 4841 } 4842 4843 return; 4844 4845 found: 4846 for (i = 0; i < num; ++i) { 4847 v = (const __le64 *)(base + desc[i].offset); 4848 ctx->data[offset + i] = le64_to_cpu(*v); 4849 } 4850 4851 return; 4852 4853 drv_stats: 4854 for (i = 0; i < num; ++i) { 4855 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4856 ctx->data[offset + i] = u64_stats_read(v_stat); 4857 } 4858 } 4859 4860 static int __virtnet_get_hw_stats(struct virtnet_info *vi, 4861 struct virtnet_stats_ctx *ctx, 4862 struct virtio_net_ctrl_queue_stats *req, 4863 int req_size, void *reply, int res_size) 4864 { 4865 struct virtio_net_stats_reply_hdr *hdr; 4866 struct scatterlist sgs_in, sgs_out; 4867 void *p; 4868 u32 qid; 4869 int ok; 4870 4871 sg_init_one(&sgs_out, req, req_size); 4872 sg_init_one(&sgs_in, reply, res_size); 4873 4874 ok = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 4875 VIRTIO_NET_CTRL_STATS_GET, 4876 &sgs_out, &sgs_in); 4877 4878 if (!ok) 4879 return ok; 4880 4881 for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) { 4882 hdr = p; 4883 qid = le16_to_cpu(hdr->vq_index); 4884 virtnet_fill_stats(vi, qid, ctx, p, false, hdr->type); 4885 } 4886 4887 return 0; 4888 } 4889 4890 static void virtnet_make_stat_req(struct virtnet_info *vi, 4891 struct virtnet_stats_ctx *ctx, 4892 struct virtio_net_ctrl_queue_stats *req, 4893 int qid, int *idx) 4894 { 4895 int qtype = vq_type(vi, qid); 4896 u64 bitmap = ctx->bitmap[qtype]; 4897 4898 if (!bitmap) 4899 return; 4900 4901 req->stats[*idx].vq_index = cpu_to_le16(qid); 4902 req->stats[*idx].types_bitmap[0] = cpu_to_le64(bitmap); 4903 *idx += 1; 4904 } 4905 4906 /* qid: -1: get stats of all vq. 4907 * > 0: get the stats for the special vq. This must not be cvq. 4908 */ 4909 static int virtnet_get_hw_stats(struct virtnet_info *vi, 4910 struct virtnet_stats_ctx *ctx, int qid) 4911 { 4912 int qnum, i, j, res_size, qtype, last_vq, first_vq; 4913 struct virtio_net_ctrl_queue_stats *req; 4914 bool enable_cvq; 4915 void *reply; 4916 int ok; 4917 4918 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) 4919 return 0; 4920 4921 if (qid == -1) { 4922 last_vq = vi->curr_queue_pairs * 2 - 1; 4923 first_vq = 0; 4924 enable_cvq = true; 4925 } else { 4926 last_vq = qid; 4927 first_vq = qid; 4928 enable_cvq = false; 4929 } 4930 4931 qnum = 0; 4932 res_size = 0; 4933 for (i = first_vq; i <= last_vq ; ++i) { 4934 qtype = vq_type(vi, i); 4935 if (ctx->bitmap[qtype]) { 4936 ++qnum; 4937 res_size += ctx->size[qtype]; 4938 } 4939 } 4940 4941 if (enable_cvq && ctx->bitmap[VIRTNET_Q_TYPE_CQ]) { 4942 res_size += ctx->size[VIRTNET_Q_TYPE_CQ]; 4943 qnum += 1; 4944 } 4945 4946 req = kcalloc(qnum, sizeof(*req), GFP_KERNEL); 4947 if (!req) 4948 return -ENOMEM; 4949 4950 reply = kmalloc(res_size, GFP_KERNEL); 4951 if (!reply) { 4952 kfree(req); 4953 return -ENOMEM; 4954 } 4955 4956 j = 0; 4957 for (i = first_vq; i <= last_vq ; ++i) 4958 virtnet_make_stat_req(vi, ctx, req, i, &j); 4959 4960 if (enable_cvq) 4961 virtnet_make_stat_req(vi, ctx, req, vi->max_queue_pairs * 2, &j); 4962 4963 ok = __virtnet_get_hw_stats(vi, ctx, req, sizeof(*req) * j, reply, res_size); 4964 4965 kfree(req); 4966 kfree(reply); 4967 4968 return ok; 4969 } 4970 4971 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) 4972 { 4973 struct virtnet_info *vi = netdev_priv(dev); 4974 unsigned int i; 4975 u8 *p = data; 4976 4977 switch (stringset) { 4978 case ETH_SS_STATS: 4979 /* Generate the total field names. */ 4980 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, -1, &p); 4981 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, -1, &p); 4982 4983 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_CQ, 0, &p); 4984 4985 for (i = 0; i < vi->curr_queue_pairs; ++i) 4986 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, i, &p); 4987 4988 for (i = 0; i < vi->curr_queue_pairs; ++i) 4989 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, i, &p); 4990 break; 4991 } 4992 } 4993 4994 static int virtnet_get_sset_count(struct net_device *dev, int sset) 4995 { 4996 struct virtnet_info *vi = netdev_priv(dev); 4997 struct virtnet_stats_ctx ctx = {0}; 4998 u32 pair_count; 4999 5000 switch (sset) { 5001 case ETH_SS_STATS: 5002 virtnet_stats_ctx_init(vi, &ctx, NULL, false); 5003 5004 pair_count = ctx.desc_num[VIRTNET_Q_TYPE_RX] + ctx.desc_num[VIRTNET_Q_TYPE_TX]; 5005 5006 return pair_count + ctx.desc_num[VIRTNET_Q_TYPE_CQ] + 5007 vi->curr_queue_pairs * pair_count; 5008 default: 5009 return -EOPNOTSUPP; 5010 } 5011 } 5012 5013 static void virtnet_get_ethtool_stats(struct net_device *dev, 5014 struct ethtool_stats *stats, u64 *data) 5015 { 5016 struct virtnet_info *vi = netdev_priv(dev); 5017 struct virtnet_stats_ctx ctx = {0}; 5018 unsigned int start, i; 5019 const u8 *stats_base; 5020 5021 virtnet_stats_ctx_init(vi, &ctx, data, false); 5022 if (virtnet_get_hw_stats(vi, &ctx, -1)) 5023 dev_warn(&vi->dev->dev, "Failed to get hw stats.\n"); 5024 5025 for (i = 0; i < vi->curr_queue_pairs; i++) { 5026 struct receive_queue *rq = &vi->rq[i]; 5027 struct send_queue *sq = &vi->sq[i]; 5028 5029 stats_base = (const u8 *)&rq->stats; 5030 do { 5031 start = u64_stats_fetch_begin(&rq->stats.syncp); 5032 virtnet_fill_stats(vi, i * 2, &ctx, stats_base, true, 0); 5033 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 5034 5035 stats_base = (const u8 *)&sq->stats; 5036 do { 5037 start = u64_stats_fetch_begin(&sq->stats.syncp); 5038 virtnet_fill_stats(vi, i * 2 + 1, &ctx, stats_base, true, 0); 5039 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 5040 } 5041 5042 virtnet_fill_total_fields(vi, &ctx); 5043 } 5044 5045 static void virtnet_get_channels(struct net_device *dev, 5046 struct ethtool_channels *channels) 5047 { 5048 struct virtnet_info *vi = netdev_priv(dev); 5049 5050 channels->combined_count = vi->curr_queue_pairs; 5051 channels->max_combined = vi->max_queue_pairs; 5052 channels->max_other = 0; 5053 channels->rx_count = 0; 5054 channels->tx_count = 0; 5055 channels->other_count = 0; 5056 } 5057 5058 static int virtnet_set_link_ksettings(struct net_device *dev, 5059 const struct ethtool_link_ksettings *cmd) 5060 { 5061 struct virtnet_info *vi = netdev_priv(dev); 5062 5063 return ethtool_virtdev_set_link_ksettings(dev, cmd, 5064 &vi->speed, &vi->duplex); 5065 } 5066 5067 static int virtnet_get_link_ksettings(struct net_device *dev, 5068 struct ethtool_link_ksettings *cmd) 5069 { 5070 struct virtnet_info *vi = netdev_priv(dev); 5071 5072 cmd->base.speed = vi->speed; 5073 cmd->base.duplex = vi->duplex; 5074 cmd->base.port = PORT_OTHER; 5075 5076 return 0; 5077 } 5078 5079 static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info *vi, 5080 struct ethtool_coalesce *ec) 5081 { 5082 struct virtio_net_ctrl_coal_tx *coal_tx __free(kfree) = NULL; 5083 struct scatterlist sgs_tx; 5084 int i; 5085 5086 coal_tx = kzalloc(sizeof(*coal_tx), GFP_KERNEL); 5087 if (!coal_tx) 5088 return -ENOMEM; 5089 5090 coal_tx->tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); 5091 coal_tx->tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); 5092 sg_init_one(&sgs_tx, coal_tx, sizeof(*coal_tx)); 5093 5094 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 5095 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, 5096 &sgs_tx)) 5097 return -EINVAL; 5098 5099 vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs; 5100 vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames; 5101 for (i = 0; i < vi->max_queue_pairs; i++) { 5102 vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs; 5103 vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames; 5104 } 5105 5106 return 0; 5107 } 5108 5109 static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi, 5110 struct ethtool_coalesce *ec) 5111 { 5112 struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL; 5113 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 5114 struct scatterlist sgs_rx; 5115 int i; 5116 5117 if (rx_ctrl_dim_on && !virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5118 return -EOPNOTSUPP; 5119 5120 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != vi->intr_coal_rx.max_usecs || 5121 ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets)) 5122 return -EINVAL; 5123 5124 if (rx_ctrl_dim_on && !vi->rx_dim_enabled) { 5125 vi->rx_dim_enabled = true; 5126 for (i = 0; i < vi->max_queue_pairs; i++) { 5127 mutex_lock(&vi->rq[i].dim_lock); 5128 vi->rq[i].dim_enabled = true; 5129 mutex_unlock(&vi->rq[i].dim_lock); 5130 } 5131 return 0; 5132 } 5133 5134 coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL); 5135 if (!coal_rx) 5136 return -ENOMEM; 5137 5138 if (!rx_ctrl_dim_on && vi->rx_dim_enabled) { 5139 vi->rx_dim_enabled = false; 5140 for (i = 0; i < vi->max_queue_pairs; i++) { 5141 mutex_lock(&vi->rq[i].dim_lock); 5142 vi->rq[i].dim_enabled = false; 5143 mutex_unlock(&vi->rq[i].dim_lock); 5144 } 5145 } 5146 5147 /* Since the per-queue coalescing params can be set, 5148 * we need apply the global new params even if they 5149 * are not updated. 5150 */ 5151 coal_rx->rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); 5152 coal_rx->rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); 5153 sg_init_one(&sgs_rx, coal_rx, sizeof(*coal_rx)); 5154 5155 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 5156 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, 5157 &sgs_rx)) 5158 return -EINVAL; 5159 5160 vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs; 5161 vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames; 5162 for (i = 0; i < vi->max_queue_pairs; i++) { 5163 mutex_lock(&vi->rq[i].dim_lock); 5164 vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs; 5165 vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames; 5166 mutex_unlock(&vi->rq[i].dim_lock); 5167 } 5168 5169 return 0; 5170 } 5171 5172 static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, 5173 struct ethtool_coalesce *ec) 5174 { 5175 int err; 5176 5177 err = virtnet_send_tx_notf_coal_cmds(vi, ec); 5178 if (err) 5179 return err; 5180 5181 err = virtnet_send_rx_notf_coal_cmds(vi, ec); 5182 if (err) 5183 return err; 5184 5185 return 0; 5186 } 5187 5188 static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi, 5189 struct ethtool_coalesce *ec, 5190 u16 queue) 5191 { 5192 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 5193 u32 max_usecs, max_packets; 5194 bool cur_rx_dim; 5195 int err; 5196 5197 mutex_lock(&vi->rq[queue].dim_lock); 5198 cur_rx_dim = vi->rq[queue].dim_enabled; 5199 max_usecs = vi->rq[queue].intr_coal.max_usecs; 5200 max_packets = vi->rq[queue].intr_coal.max_packets; 5201 5202 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != max_usecs || 5203 ec->rx_max_coalesced_frames != max_packets)) { 5204 mutex_unlock(&vi->rq[queue].dim_lock); 5205 return -EINVAL; 5206 } 5207 5208 if (rx_ctrl_dim_on && !cur_rx_dim) { 5209 vi->rq[queue].dim_enabled = true; 5210 mutex_unlock(&vi->rq[queue].dim_lock); 5211 return 0; 5212 } 5213 5214 if (!rx_ctrl_dim_on && cur_rx_dim) 5215 vi->rq[queue].dim_enabled = false; 5216 5217 /* If no params are updated, userspace ethtool will 5218 * reject the modification. 5219 */ 5220 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue, 5221 ec->rx_coalesce_usecs, 5222 ec->rx_max_coalesced_frames); 5223 mutex_unlock(&vi->rq[queue].dim_lock); 5224 return err; 5225 } 5226 5227 static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi, 5228 struct ethtool_coalesce *ec, 5229 u16 queue) 5230 { 5231 int err; 5232 5233 err = virtnet_send_rx_notf_coal_vq_cmds(vi, ec, queue); 5234 if (err) 5235 return err; 5236 5237 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue, 5238 ec->tx_coalesce_usecs, 5239 ec->tx_max_coalesced_frames); 5240 if (err) 5241 return err; 5242 5243 return 0; 5244 } 5245 5246 static void virtnet_rx_dim_work(struct work_struct *work) 5247 { 5248 struct dim *dim = container_of(work, struct dim, work); 5249 struct receive_queue *rq = container_of(dim, 5250 struct receive_queue, dim); 5251 struct virtnet_info *vi = rq->vq->vdev->priv; 5252 struct net_device *dev = vi->dev; 5253 struct dim_cq_moder update_moder; 5254 int qnum, err; 5255 5256 qnum = rq - vi->rq; 5257 5258 mutex_lock(&rq->dim_lock); 5259 if (!rq->dim_enabled) 5260 goto out; 5261 5262 update_moder = net_dim_get_rx_irq_moder(dev, dim); 5263 if (update_moder.usec != rq->intr_coal.max_usecs || 5264 update_moder.pkts != rq->intr_coal.max_packets) { 5265 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum, 5266 update_moder.usec, 5267 update_moder.pkts); 5268 if (err) 5269 pr_debug("%s: Failed to send dim parameters on rxq%d\n", 5270 dev->name, qnum); 5271 } 5272 out: 5273 dim->state = DIM_START_MEASURE; 5274 mutex_unlock(&rq->dim_lock); 5275 } 5276 5277 static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) 5278 { 5279 /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL 5280 * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated. 5281 */ 5282 if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) 5283 return -EOPNOTSUPP; 5284 5285 if (ec->tx_max_coalesced_frames > 1 || 5286 ec->rx_max_coalesced_frames != 1) 5287 return -EINVAL; 5288 5289 return 0; 5290 } 5291 5292 static int virtnet_should_update_vq_weight(int dev_flags, int weight, 5293 int vq_weight, bool *should_update) 5294 { 5295 if (weight ^ vq_weight) { 5296 if (dev_flags & IFF_UP) 5297 return -EBUSY; 5298 *should_update = true; 5299 } 5300 5301 return 0; 5302 } 5303 5304 static int virtnet_set_coalesce(struct net_device *dev, 5305 struct ethtool_coalesce *ec, 5306 struct kernel_ethtool_coalesce *kernel_coal, 5307 struct netlink_ext_ack *extack) 5308 { 5309 struct virtnet_info *vi = netdev_priv(dev); 5310 int ret, queue_number, napi_weight, i; 5311 bool update_napi = false; 5312 5313 /* Can't change NAPI weight if the link is up */ 5314 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5315 for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) { 5316 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5317 vi->sq[queue_number].napi.weight, 5318 &update_napi); 5319 if (ret) 5320 return ret; 5321 5322 if (update_napi) { 5323 /* All queues that belong to [queue_number, vi->max_queue_pairs] will be 5324 * updated for the sake of simplicity, which might not be necessary 5325 */ 5326 break; 5327 } 5328 } 5329 5330 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) 5331 ret = virtnet_send_notf_coal_cmds(vi, ec); 5332 else 5333 ret = virtnet_coal_params_supported(ec); 5334 5335 if (ret) 5336 return ret; 5337 5338 if (update_napi) { 5339 /* xsk xmit depends on the tx napi. So if xsk is active, 5340 * prevent modifications to tx napi. 5341 */ 5342 for (i = queue_number; i < vi->max_queue_pairs; i++) { 5343 if (vi->sq[i].xsk_pool) 5344 return -EBUSY; 5345 } 5346 5347 for (; queue_number < vi->max_queue_pairs; queue_number++) 5348 vi->sq[queue_number].napi.weight = napi_weight; 5349 } 5350 5351 return ret; 5352 } 5353 5354 static int virtnet_get_coalesce(struct net_device *dev, 5355 struct ethtool_coalesce *ec, 5356 struct kernel_ethtool_coalesce *kernel_coal, 5357 struct netlink_ext_ack *extack) 5358 { 5359 struct virtnet_info *vi = netdev_priv(dev); 5360 5361 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 5362 ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs; 5363 ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs; 5364 ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets; 5365 ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets; 5366 ec->use_adaptive_rx_coalesce = vi->rx_dim_enabled; 5367 } else { 5368 ec->rx_max_coalesced_frames = 1; 5369 5370 if (vi->sq[0].napi.weight) 5371 ec->tx_max_coalesced_frames = 1; 5372 } 5373 5374 return 0; 5375 } 5376 5377 static int virtnet_set_per_queue_coalesce(struct net_device *dev, 5378 u32 queue, 5379 struct ethtool_coalesce *ec) 5380 { 5381 struct virtnet_info *vi = netdev_priv(dev); 5382 int ret, napi_weight; 5383 bool update_napi = false; 5384 5385 if (queue >= vi->max_queue_pairs) 5386 return -EINVAL; 5387 5388 /* Can't change NAPI weight if the link is up */ 5389 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5390 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5391 vi->sq[queue].napi.weight, 5392 &update_napi); 5393 if (ret) 5394 return ret; 5395 5396 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5397 ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue); 5398 else 5399 ret = virtnet_coal_params_supported(ec); 5400 5401 if (ret) 5402 return ret; 5403 5404 if (update_napi) 5405 vi->sq[queue].napi.weight = napi_weight; 5406 5407 return 0; 5408 } 5409 5410 static int virtnet_get_per_queue_coalesce(struct net_device *dev, 5411 u32 queue, 5412 struct ethtool_coalesce *ec) 5413 { 5414 struct virtnet_info *vi = netdev_priv(dev); 5415 5416 if (queue >= vi->max_queue_pairs) 5417 return -EINVAL; 5418 5419 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 5420 mutex_lock(&vi->rq[queue].dim_lock); 5421 ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs; 5422 ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs; 5423 ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets; 5424 ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets; 5425 ec->use_adaptive_rx_coalesce = vi->rq[queue].dim_enabled; 5426 mutex_unlock(&vi->rq[queue].dim_lock); 5427 } else { 5428 ec->rx_max_coalesced_frames = 1; 5429 5430 if (vi->sq[queue].napi.weight) 5431 ec->tx_max_coalesced_frames = 1; 5432 } 5433 5434 return 0; 5435 } 5436 5437 static void virtnet_init_settings(struct net_device *dev) 5438 { 5439 struct virtnet_info *vi = netdev_priv(dev); 5440 5441 vi->speed = SPEED_UNKNOWN; 5442 vi->duplex = DUPLEX_UNKNOWN; 5443 } 5444 5445 static u32 virtnet_get_rxfh_key_size(struct net_device *dev) 5446 { 5447 return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size; 5448 } 5449 5450 static u32 virtnet_get_rxfh_indir_size(struct net_device *dev) 5451 { 5452 return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size; 5453 } 5454 5455 static int virtnet_get_rxfh(struct net_device *dev, 5456 struct ethtool_rxfh_param *rxfh) 5457 { 5458 struct virtnet_info *vi = netdev_priv(dev); 5459 int i; 5460 5461 if (rxfh->indir) { 5462 for (i = 0; i < vi->rss_indir_table_size; ++i) 5463 rxfh->indir[i] = vi->rss.indirection_table[i]; 5464 } 5465 5466 if (rxfh->key) 5467 memcpy(rxfh->key, vi->rss.key, vi->rss_key_size); 5468 5469 rxfh->hfunc = ETH_RSS_HASH_TOP; 5470 5471 return 0; 5472 } 5473 5474 static int virtnet_set_rxfh(struct net_device *dev, 5475 struct ethtool_rxfh_param *rxfh, 5476 struct netlink_ext_ack *extack) 5477 { 5478 struct virtnet_info *vi = netdev_priv(dev); 5479 bool update = false; 5480 int i; 5481 5482 if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && 5483 rxfh->hfunc != ETH_RSS_HASH_TOP) 5484 return -EOPNOTSUPP; 5485 5486 if (rxfh->indir) { 5487 if (!vi->has_rss) 5488 return -EOPNOTSUPP; 5489 5490 for (i = 0; i < vi->rss_indir_table_size; ++i) 5491 vi->rss.indirection_table[i] = rxfh->indir[i]; 5492 update = true; 5493 } 5494 5495 if (rxfh->key) { 5496 /* If either _F_HASH_REPORT or _F_RSS are negotiated, the 5497 * device provides hash calculation capabilities, that is, 5498 * hash_key is configured. 5499 */ 5500 if (!vi->has_rss && !vi->has_rss_hash_report) 5501 return -EOPNOTSUPP; 5502 5503 memcpy(vi->rss.key, rxfh->key, vi->rss_key_size); 5504 update = true; 5505 } 5506 5507 if (update) 5508 virtnet_commit_rss_command(vi); 5509 5510 return 0; 5511 } 5512 5513 static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) 5514 { 5515 struct virtnet_info *vi = netdev_priv(dev); 5516 int rc = 0; 5517 5518 switch (info->cmd) { 5519 case ETHTOOL_GRXRINGS: 5520 info->data = vi->curr_queue_pairs; 5521 break; 5522 case ETHTOOL_GRXFH: 5523 virtnet_get_hashflow(vi, info); 5524 break; 5525 default: 5526 rc = -EOPNOTSUPP; 5527 } 5528 5529 return rc; 5530 } 5531 5532 static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) 5533 { 5534 struct virtnet_info *vi = netdev_priv(dev); 5535 int rc = 0; 5536 5537 switch (info->cmd) { 5538 case ETHTOOL_SRXFH: 5539 if (!virtnet_set_hashflow(vi, info)) 5540 rc = -EINVAL; 5541 5542 break; 5543 default: 5544 rc = -EOPNOTSUPP; 5545 } 5546 5547 return rc; 5548 } 5549 5550 static const struct ethtool_ops virtnet_ethtool_ops = { 5551 .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | 5552 ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX, 5553 .get_drvinfo = virtnet_get_drvinfo, 5554 .get_link = ethtool_op_get_link, 5555 .get_ringparam = virtnet_get_ringparam, 5556 .set_ringparam = virtnet_set_ringparam, 5557 .get_strings = virtnet_get_strings, 5558 .get_sset_count = virtnet_get_sset_count, 5559 .get_ethtool_stats = virtnet_get_ethtool_stats, 5560 .set_channels = virtnet_set_channels, 5561 .get_channels = virtnet_get_channels, 5562 .get_ts_info = ethtool_op_get_ts_info, 5563 .get_link_ksettings = virtnet_get_link_ksettings, 5564 .set_link_ksettings = virtnet_set_link_ksettings, 5565 .set_coalesce = virtnet_set_coalesce, 5566 .get_coalesce = virtnet_get_coalesce, 5567 .set_per_queue_coalesce = virtnet_set_per_queue_coalesce, 5568 .get_per_queue_coalesce = virtnet_get_per_queue_coalesce, 5569 .get_rxfh_key_size = virtnet_get_rxfh_key_size, 5570 .get_rxfh_indir_size = virtnet_get_rxfh_indir_size, 5571 .get_rxfh = virtnet_get_rxfh, 5572 .set_rxfh = virtnet_set_rxfh, 5573 .get_rxnfc = virtnet_get_rxnfc, 5574 .set_rxnfc = virtnet_set_rxnfc, 5575 }; 5576 5577 static void virtnet_get_queue_stats_rx(struct net_device *dev, int i, 5578 struct netdev_queue_stats_rx *stats) 5579 { 5580 struct virtnet_info *vi = netdev_priv(dev); 5581 struct receive_queue *rq = &vi->rq[i]; 5582 struct virtnet_stats_ctx ctx = {0}; 5583 5584 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5585 5586 virtnet_get_hw_stats(vi, &ctx, i * 2); 5587 virtnet_fill_stats(vi, i * 2, &ctx, (void *)&rq->stats, true, 0); 5588 } 5589 5590 static void virtnet_get_queue_stats_tx(struct net_device *dev, int i, 5591 struct netdev_queue_stats_tx *stats) 5592 { 5593 struct virtnet_info *vi = netdev_priv(dev); 5594 struct send_queue *sq = &vi->sq[i]; 5595 struct virtnet_stats_ctx ctx = {0}; 5596 5597 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5598 5599 virtnet_get_hw_stats(vi, &ctx, i * 2 + 1); 5600 virtnet_fill_stats(vi, i * 2 + 1, &ctx, (void *)&sq->stats, true, 0); 5601 } 5602 5603 static void virtnet_get_base_stats(struct net_device *dev, 5604 struct netdev_queue_stats_rx *rx, 5605 struct netdev_queue_stats_tx *tx) 5606 { 5607 struct virtnet_info *vi = netdev_priv(dev); 5608 5609 /* The queue stats of the virtio-net will not be reset. So here we 5610 * return 0. 5611 */ 5612 rx->bytes = 0; 5613 rx->packets = 0; 5614 5615 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 5616 rx->hw_drops = 0; 5617 rx->hw_drop_overruns = 0; 5618 } 5619 5620 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 5621 rx->csum_unnecessary = 0; 5622 rx->csum_none = 0; 5623 rx->csum_bad = 0; 5624 } 5625 5626 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 5627 rx->hw_gro_packets = 0; 5628 rx->hw_gro_bytes = 0; 5629 rx->hw_gro_wire_packets = 0; 5630 rx->hw_gro_wire_bytes = 0; 5631 } 5632 5633 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) 5634 rx->hw_drop_ratelimits = 0; 5635 5636 tx->bytes = 0; 5637 tx->packets = 0; 5638 tx->stop = 0; 5639 tx->wake = 0; 5640 5641 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 5642 tx->hw_drops = 0; 5643 tx->hw_drop_errors = 0; 5644 } 5645 5646 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 5647 tx->csum_none = 0; 5648 tx->needs_csum = 0; 5649 } 5650 5651 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 5652 tx->hw_gso_packets = 0; 5653 tx->hw_gso_bytes = 0; 5654 tx->hw_gso_wire_packets = 0; 5655 tx->hw_gso_wire_bytes = 0; 5656 } 5657 5658 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) 5659 tx->hw_drop_ratelimits = 0; 5660 } 5661 5662 static const struct netdev_stat_ops virtnet_stat_ops = { 5663 .get_queue_stats_rx = virtnet_get_queue_stats_rx, 5664 .get_queue_stats_tx = virtnet_get_queue_stats_tx, 5665 .get_base_stats = virtnet_get_base_stats, 5666 }; 5667 5668 static void virtnet_freeze_down(struct virtio_device *vdev) 5669 { 5670 struct virtnet_info *vi = vdev->priv; 5671 5672 /* Make sure no work handler is accessing the device */ 5673 flush_work(&vi->config_work); 5674 disable_rx_mode_work(vi); 5675 flush_work(&vi->rx_mode_work); 5676 5677 netif_tx_lock_bh(vi->dev); 5678 netif_device_detach(vi->dev); 5679 netif_tx_unlock_bh(vi->dev); 5680 if (netif_running(vi->dev)) { 5681 rtnl_lock(); 5682 virtnet_close(vi->dev); 5683 rtnl_unlock(); 5684 } 5685 } 5686 5687 static int init_vqs(struct virtnet_info *vi); 5688 5689 static int virtnet_restore_up(struct virtio_device *vdev) 5690 { 5691 struct virtnet_info *vi = vdev->priv; 5692 int err; 5693 5694 err = init_vqs(vi); 5695 if (err) 5696 return err; 5697 5698 virtio_device_ready(vdev); 5699 5700 enable_delayed_refill(vi); 5701 enable_rx_mode_work(vi); 5702 5703 if (netif_running(vi->dev)) { 5704 rtnl_lock(); 5705 err = virtnet_open(vi->dev); 5706 rtnl_unlock(); 5707 if (err) 5708 return err; 5709 } 5710 5711 netif_tx_lock_bh(vi->dev); 5712 netif_device_attach(vi->dev); 5713 netif_tx_unlock_bh(vi->dev); 5714 return err; 5715 } 5716 5717 static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) 5718 { 5719 __virtio64 *_offloads __free(kfree) = NULL; 5720 struct scatterlist sg; 5721 5722 _offloads = kzalloc(sizeof(*_offloads), GFP_KERNEL); 5723 if (!_offloads) 5724 return -ENOMEM; 5725 5726 *_offloads = cpu_to_virtio64(vi->vdev, offloads); 5727 5728 sg_init_one(&sg, _offloads, sizeof(*_offloads)); 5729 5730 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, 5731 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) { 5732 dev_warn(&vi->dev->dev, "Fail to set guest offload.\n"); 5733 return -EINVAL; 5734 } 5735 5736 return 0; 5737 } 5738 5739 static int virtnet_clear_guest_offloads(struct virtnet_info *vi) 5740 { 5741 u64 offloads = 0; 5742 5743 if (!vi->guest_offloads) 5744 return 0; 5745 5746 return virtnet_set_guest_offloads(vi, offloads); 5747 } 5748 5749 static int virtnet_restore_guest_offloads(struct virtnet_info *vi) 5750 { 5751 u64 offloads = vi->guest_offloads; 5752 5753 if (!vi->guest_offloads) 5754 return 0; 5755 5756 return virtnet_set_guest_offloads(vi, offloads); 5757 } 5758 5759 static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queue *rq, 5760 struct xsk_buff_pool *pool) 5761 { 5762 int err, qindex; 5763 5764 qindex = rq - vi->rq; 5765 5766 if (pool) { 5767 err = xdp_rxq_info_reg(&rq->xsk_rxq_info, vi->dev, qindex, rq->napi.napi_id); 5768 if (err < 0) 5769 return err; 5770 5771 err = xdp_rxq_info_reg_mem_model(&rq->xsk_rxq_info, 5772 MEM_TYPE_XSK_BUFF_POOL, NULL); 5773 if (err < 0) 5774 goto unreg; 5775 5776 xsk_pool_set_rxq_info(pool, &rq->xsk_rxq_info); 5777 } 5778 5779 virtnet_rx_pause(vi, rq); 5780 5781 err = virtqueue_reset(rq->vq, virtnet_rq_unmap_free_buf, NULL); 5782 if (err) { 5783 netdev_err(vi->dev, "reset rx fail: rx queue index: %d err: %d\n", qindex, err); 5784 5785 pool = NULL; 5786 } 5787 5788 rq->xsk_pool = pool; 5789 5790 virtnet_rx_resume(vi, rq); 5791 5792 if (pool) 5793 return 0; 5794 5795 unreg: 5796 xdp_rxq_info_unreg(&rq->xsk_rxq_info); 5797 return err; 5798 } 5799 5800 static int virtnet_sq_bind_xsk_pool(struct virtnet_info *vi, 5801 struct send_queue *sq, 5802 struct xsk_buff_pool *pool) 5803 { 5804 int err, qindex; 5805 5806 qindex = sq - vi->sq; 5807 5808 virtnet_tx_pause(vi, sq); 5809 5810 err = virtqueue_reset(sq->vq, virtnet_sq_free_unused_buf, 5811 virtnet_sq_free_unused_buf_done); 5812 if (err) { 5813 netdev_err(vi->dev, "reset tx fail: tx queue index: %d err: %d\n", qindex, err); 5814 pool = NULL; 5815 } 5816 5817 sq->xsk_pool = pool; 5818 5819 virtnet_tx_resume(vi, sq); 5820 5821 return err; 5822 } 5823 5824 static int virtnet_xsk_pool_enable(struct net_device *dev, 5825 struct xsk_buff_pool *pool, 5826 u16 qid) 5827 { 5828 struct virtnet_info *vi = netdev_priv(dev); 5829 struct receive_queue *rq; 5830 struct device *dma_dev; 5831 struct send_queue *sq; 5832 dma_addr_t hdr_dma; 5833 int err, size; 5834 5835 if (vi->hdr_len > xsk_pool_get_headroom(pool)) 5836 return -EINVAL; 5837 5838 /* In big_packets mode, xdp cannot work, so there is no need to 5839 * initialize xsk of rq. 5840 */ 5841 if (vi->big_packets && !vi->mergeable_rx_bufs) 5842 return -ENOENT; 5843 5844 if (qid >= vi->curr_queue_pairs) 5845 return -EINVAL; 5846 5847 sq = &vi->sq[qid]; 5848 rq = &vi->rq[qid]; 5849 5850 /* xsk assumes that tx and rx must have the same dma device. The af-xdp 5851 * may use one buffer to receive from the rx and reuse this buffer to 5852 * send by the tx. So the dma dev of sq and rq must be the same one. 5853 * 5854 * But vq->dma_dev allows every vq has the respective dma dev. So I 5855 * check the dma dev of vq and sq is the same dev. 5856 */ 5857 if (virtqueue_dma_dev(rq->vq) != virtqueue_dma_dev(sq->vq)) 5858 return -EINVAL; 5859 5860 dma_dev = virtqueue_dma_dev(rq->vq); 5861 if (!dma_dev) 5862 return -EINVAL; 5863 5864 size = virtqueue_get_vring_size(rq->vq); 5865 5866 rq->xsk_buffs = kvcalloc(size, sizeof(*rq->xsk_buffs), GFP_KERNEL); 5867 if (!rq->xsk_buffs) 5868 return -ENOMEM; 5869 5870 hdr_dma = virtqueue_dma_map_single_attrs(sq->vq, &xsk_hdr, vi->hdr_len, 5871 DMA_TO_DEVICE, 0); 5872 if (virtqueue_dma_mapping_error(sq->vq, hdr_dma)) 5873 return -ENOMEM; 5874 5875 err = xsk_pool_dma_map(pool, dma_dev, 0); 5876 if (err) 5877 goto err_xsk_map; 5878 5879 err = virtnet_rq_bind_xsk_pool(vi, rq, pool); 5880 if (err) 5881 goto err_rq; 5882 5883 err = virtnet_sq_bind_xsk_pool(vi, sq, pool); 5884 if (err) 5885 goto err_sq; 5886 5887 /* Now, we do not support tx offload(such as tx csum), so all the tx 5888 * virtnet hdr is zero. So all the tx packets can share a single hdr. 5889 */ 5890 sq->xsk_hdr_dma_addr = hdr_dma; 5891 5892 return 0; 5893 5894 err_sq: 5895 virtnet_rq_bind_xsk_pool(vi, rq, NULL); 5896 err_rq: 5897 xsk_pool_dma_unmap(pool, 0); 5898 err_xsk_map: 5899 virtqueue_dma_unmap_single_attrs(rq->vq, hdr_dma, vi->hdr_len, 5900 DMA_TO_DEVICE, 0); 5901 return err; 5902 } 5903 5904 static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid) 5905 { 5906 struct virtnet_info *vi = netdev_priv(dev); 5907 struct xsk_buff_pool *pool; 5908 struct receive_queue *rq; 5909 struct send_queue *sq; 5910 int err; 5911 5912 if (qid >= vi->curr_queue_pairs) 5913 return -EINVAL; 5914 5915 sq = &vi->sq[qid]; 5916 rq = &vi->rq[qid]; 5917 5918 pool = rq->xsk_pool; 5919 5920 err = virtnet_rq_bind_xsk_pool(vi, rq, NULL); 5921 err |= virtnet_sq_bind_xsk_pool(vi, sq, NULL); 5922 5923 xsk_pool_dma_unmap(pool, 0); 5924 5925 virtqueue_dma_unmap_single_attrs(sq->vq, sq->xsk_hdr_dma_addr, 5926 vi->hdr_len, DMA_TO_DEVICE, 0); 5927 kvfree(rq->xsk_buffs); 5928 5929 return err; 5930 } 5931 5932 static int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp) 5933 { 5934 if (xdp->xsk.pool) 5935 return virtnet_xsk_pool_enable(dev, xdp->xsk.pool, 5936 xdp->xsk.queue_id); 5937 else 5938 return virtnet_xsk_pool_disable(dev, xdp->xsk.queue_id); 5939 } 5940 5941 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, 5942 struct netlink_ext_ack *extack) 5943 { 5944 unsigned int room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 5945 sizeof(struct skb_shared_info)); 5946 unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; 5947 struct virtnet_info *vi = netdev_priv(dev); 5948 struct bpf_prog *old_prog; 5949 u16 xdp_qp = 0, curr_qp; 5950 int i, err; 5951 5952 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) 5953 && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 5954 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 5955 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 5956 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 5957 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) || 5958 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) || 5959 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) { 5960 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); 5961 return -EOPNOTSUPP; 5962 } 5963 5964 if (vi->mergeable_rx_bufs && !vi->any_header_sg) { 5965 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required"); 5966 return -EINVAL; 5967 } 5968 5969 if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { 5970 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags"); 5971 netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz); 5972 return -EINVAL; 5973 } 5974 5975 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; 5976 if (prog) 5977 xdp_qp = nr_cpu_ids; 5978 5979 /* XDP requires extra queues for XDP_TX */ 5980 if (curr_qp + xdp_qp > vi->max_queue_pairs) { 5981 netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", 5982 curr_qp + xdp_qp, vi->max_queue_pairs); 5983 xdp_qp = 0; 5984 } 5985 5986 old_prog = rtnl_dereference(vi->rq[0].xdp_prog); 5987 if (!prog && !old_prog) 5988 return 0; 5989 5990 if (prog) 5991 bpf_prog_add(prog, vi->max_queue_pairs - 1); 5992 5993 /* Make sure NAPI is not using any XDP TX queues for RX. */ 5994 if (netif_running(dev)) { 5995 for (i = 0; i < vi->max_queue_pairs; i++) { 5996 virtnet_napi_disable(&vi->rq[i]); 5997 virtnet_napi_tx_disable(&vi->sq[i]); 5998 } 5999 } 6000 6001 if (!prog) { 6002 for (i = 0; i < vi->max_queue_pairs; i++) { 6003 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 6004 if (i == 0) 6005 virtnet_restore_guest_offloads(vi); 6006 } 6007 synchronize_net(); 6008 } 6009 6010 err = virtnet_set_queues(vi, curr_qp + xdp_qp); 6011 if (err) 6012 goto err; 6013 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); 6014 vi->xdp_queue_pairs = xdp_qp; 6015 6016 if (prog) { 6017 vi->xdp_enabled = true; 6018 for (i = 0; i < vi->max_queue_pairs; i++) { 6019 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 6020 if (i == 0 && !old_prog) 6021 virtnet_clear_guest_offloads(vi); 6022 } 6023 if (!old_prog) 6024 xdp_features_set_redirect_target(dev, true); 6025 } else { 6026 xdp_features_clear_redirect_target(dev); 6027 vi->xdp_enabled = false; 6028 } 6029 6030 for (i = 0; i < vi->max_queue_pairs; i++) { 6031 if (old_prog) 6032 bpf_prog_put(old_prog); 6033 if (netif_running(dev)) { 6034 virtnet_napi_enable(&vi->rq[i]); 6035 virtnet_napi_tx_enable(&vi->sq[i]); 6036 } 6037 } 6038 6039 return 0; 6040 6041 err: 6042 if (!prog) { 6043 virtnet_clear_guest_offloads(vi); 6044 for (i = 0; i < vi->max_queue_pairs; i++) 6045 rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); 6046 } 6047 6048 if (netif_running(dev)) { 6049 for (i = 0; i < vi->max_queue_pairs; i++) { 6050 virtnet_napi_enable(&vi->rq[i]); 6051 virtnet_napi_tx_enable(&vi->sq[i]); 6052 } 6053 } 6054 if (prog) 6055 bpf_prog_sub(prog, vi->max_queue_pairs - 1); 6056 return err; 6057 } 6058 6059 static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) 6060 { 6061 switch (xdp->command) { 6062 case XDP_SETUP_PROG: 6063 return virtnet_xdp_set(dev, xdp->prog, xdp->extack); 6064 case XDP_SETUP_XSK_POOL: 6065 return virtnet_xsk_pool_setup(dev, xdp); 6066 default: 6067 return -EINVAL; 6068 } 6069 } 6070 6071 static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, 6072 size_t len) 6073 { 6074 struct virtnet_info *vi = netdev_priv(dev); 6075 int ret; 6076 6077 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 6078 return -EOPNOTSUPP; 6079 6080 ret = snprintf(buf, len, "sby"); 6081 if (ret >= len) 6082 return -EOPNOTSUPP; 6083 6084 return 0; 6085 } 6086 6087 static int virtnet_set_features(struct net_device *dev, 6088 netdev_features_t features) 6089 { 6090 struct virtnet_info *vi = netdev_priv(dev); 6091 u64 offloads; 6092 int err; 6093 6094 if ((dev->features ^ features) & NETIF_F_GRO_HW) { 6095 if (vi->xdp_enabled) 6096 return -EBUSY; 6097 6098 if (features & NETIF_F_GRO_HW) 6099 offloads = vi->guest_offloads_capable; 6100 else 6101 offloads = vi->guest_offloads_capable & 6102 ~GUEST_OFFLOAD_GRO_HW_MASK; 6103 6104 err = virtnet_set_guest_offloads(vi, offloads); 6105 if (err) 6106 return err; 6107 vi->guest_offloads = offloads; 6108 } 6109 6110 if ((dev->features ^ features) & NETIF_F_RXHASH) { 6111 if (features & NETIF_F_RXHASH) 6112 vi->rss.hash_types = vi->rss_hash_types_saved; 6113 else 6114 vi->rss.hash_types = VIRTIO_NET_HASH_REPORT_NONE; 6115 6116 if (!virtnet_commit_rss_command(vi)) 6117 return -EINVAL; 6118 } 6119 6120 return 0; 6121 } 6122 6123 static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) 6124 { 6125 struct virtnet_info *priv = netdev_priv(dev); 6126 struct send_queue *sq = &priv->sq[txqueue]; 6127 struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue); 6128 6129 u64_stats_update_begin(&sq->stats.syncp); 6130 u64_stats_inc(&sq->stats.tx_timeouts); 6131 u64_stats_update_end(&sq->stats.syncp); 6132 6133 netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n", 6134 txqueue, sq->name, sq->vq->index, sq->vq->name, 6135 jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start))); 6136 } 6137 6138 static int virtnet_init_irq_moder(struct virtnet_info *vi) 6139 { 6140 u8 profile_flags = 0, coal_flags = 0; 6141 int ret, i; 6142 6143 profile_flags |= DIM_PROFILE_RX; 6144 coal_flags |= DIM_COALESCE_USEC | DIM_COALESCE_PKTS; 6145 ret = net_dim_init_irq_moder(vi->dev, profile_flags, coal_flags, 6146 DIM_CQ_PERIOD_MODE_START_FROM_EQE, 6147 0, virtnet_rx_dim_work, NULL); 6148 6149 if (ret) 6150 return ret; 6151 6152 for (i = 0; i < vi->max_queue_pairs; i++) 6153 net_dim_setting(vi->dev, &vi->rq[i].dim, false); 6154 6155 return 0; 6156 } 6157 6158 static void virtnet_free_irq_moder(struct virtnet_info *vi) 6159 { 6160 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 6161 return; 6162 6163 rtnl_lock(); 6164 net_dim_free_irq_moder(vi->dev); 6165 rtnl_unlock(); 6166 } 6167 6168 static const struct net_device_ops virtnet_netdev = { 6169 .ndo_open = virtnet_open, 6170 .ndo_stop = virtnet_close, 6171 .ndo_start_xmit = start_xmit, 6172 .ndo_validate_addr = eth_validate_addr, 6173 .ndo_set_mac_address = virtnet_set_mac_address, 6174 .ndo_set_rx_mode = virtnet_set_rx_mode, 6175 .ndo_get_stats64 = virtnet_stats, 6176 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, 6177 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, 6178 .ndo_bpf = virtnet_xdp, 6179 .ndo_xdp_xmit = virtnet_xdp_xmit, 6180 .ndo_xsk_wakeup = virtnet_xsk_wakeup, 6181 .ndo_features_check = passthru_features_check, 6182 .ndo_get_phys_port_name = virtnet_get_phys_port_name, 6183 .ndo_set_features = virtnet_set_features, 6184 .ndo_tx_timeout = virtnet_tx_timeout, 6185 }; 6186 6187 static void virtnet_config_changed_work(struct work_struct *work) 6188 { 6189 struct virtnet_info *vi = 6190 container_of(work, struct virtnet_info, config_work); 6191 u16 v; 6192 6193 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, 6194 struct virtio_net_config, status, &v) < 0) 6195 return; 6196 6197 if (v & VIRTIO_NET_S_ANNOUNCE) { 6198 netdev_notify_peers(vi->dev); 6199 virtnet_ack_link_announce(vi); 6200 } 6201 6202 /* Ignore unknown (future) status bits */ 6203 v &= VIRTIO_NET_S_LINK_UP; 6204 6205 if (vi->status == v) 6206 return; 6207 6208 vi->status = v; 6209 6210 if (vi->status & VIRTIO_NET_S_LINK_UP) { 6211 virtnet_update_settings(vi); 6212 netif_carrier_on(vi->dev); 6213 netif_tx_wake_all_queues(vi->dev); 6214 } else { 6215 netif_carrier_off(vi->dev); 6216 netif_tx_stop_all_queues(vi->dev); 6217 } 6218 } 6219 6220 static void virtnet_config_changed(struct virtio_device *vdev) 6221 { 6222 struct virtnet_info *vi = vdev->priv; 6223 6224 schedule_work(&vi->config_work); 6225 } 6226 6227 static void virtnet_free_queues(struct virtnet_info *vi) 6228 { 6229 int i; 6230 6231 for (i = 0; i < vi->max_queue_pairs; i++) { 6232 __netif_napi_del(&vi->rq[i].napi); 6233 __netif_napi_del(&vi->sq[i].napi); 6234 } 6235 6236 /* We called __netif_napi_del(), 6237 * we need to respect an RCU grace period before freeing vi->rq 6238 */ 6239 synchronize_net(); 6240 6241 kfree(vi->rq); 6242 kfree(vi->sq); 6243 kfree(vi->ctrl); 6244 } 6245 6246 static void _free_receive_bufs(struct virtnet_info *vi) 6247 { 6248 struct bpf_prog *old_prog; 6249 int i; 6250 6251 for (i = 0; i < vi->max_queue_pairs; i++) { 6252 while (vi->rq[i].pages) 6253 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); 6254 6255 old_prog = rtnl_dereference(vi->rq[i].xdp_prog); 6256 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); 6257 if (old_prog) 6258 bpf_prog_put(old_prog); 6259 } 6260 } 6261 6262 static void free_receive_bufs(struct virtnet_info *vi) 6263 { 6264 rtnl_lock(); 6265 _free_receive_bufs(vi); 6266 rtnl_unlock(); 6267 } 6268 6269 static void free_receive_page_frags(struct virtnet_info *vi) 6270 { 6271 int i; 6272 for (i = 0; i < vi->max_queue_pairs; i++) 6273 if (vi->rq[i].alloc_frag.page) { 6274 if (vi->rq[i].last_dma) 6275 virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0); 6276 put_page(vi->rq[i].alloc_frag.page); 6277 } 6278 } 6279 6280 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) 6281 { 6282 struct virtnet_info *vi = vq->vdev->priv; 6283 struct send_queue *sq; 6284 int i = vq2txq(vq); 6285 6286 sq = &vi->sq[i]; 6287 6288 switch (virtnet_xmit_ptr_unpack(&buf)) { 6289 case VIRTNET_XMIT_TYPE_SKB: 6290 case VIRTNET_XMIT_TYPE_SKB_ORPHAN: 6291 dev_kfree_skb(buf); 6292 break; 6293 6294 case VIRTNET_XMIT_TYPE_XDP: 6295 xdp_return_frame(buf); 6296 break; 6297 6298 case VIRTNET_XMIT_TYPE_XSK: 6299 xsk_tx_completed(sq->xsk_pool, 1); 6300 break; 6301 } 6302 } 6303 6304 static void virtnet_sq_free_unused_buf_done(struct virtqueue *vq) 6305 { 6306 struct virtnet_info *vi = vq->vdev->priv; 6307 int i = vq2txq(vq); 6308 6309 netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, i)); 6310 } 6311 6312 static void free_unused_bufs(struct virtnet_info *vi) 6313 { 6314 void *buf; 6315 int i; 6316 6317 for (i = 0; i < vi->max_queue_pairs; i++) { 6318 struct virtqueue *vq = vi->sq[i].vq; 6319 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 6320 virtnet_sq_free_unused_buf(vq, buf); 6321 cond_resched(); 6322 } 6323 6324 for (i = 0; i < vi->max_queue_pairs; i++) { 6325 struct virtqueue *vq = vi->rq[i].vq; 6326 6327 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 6328 virtnet_rq_unmap_free_buf(vq, buf); 6329 cond_resched(); 6330 } 6331 } 6332 6333 static void virtnet_del_vqs(struct virtnet_info *vi) 6334 { 6335 struct virtio_device *vdev = vi->vdev; 6336 6337 virtnet_clean_affinity(vi); 6338 6339 vdev->config->del_vqs(vdev); 6340 6341 virtnet_free_queues(vi); 6342 } 6343 6344 /* How large should a single buffer be so a queue full of these can fit at 6345 * least one full packet? 6346 * Logic below assumes the mergeable buffer header is used. 6347 */ 6348 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) 6349 { 6350 const unsigned int hdr_len = vi->hdr_len; 6351 unsigned int rq_size = virtqueue_get_vring_size(vq); 6352 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; 6353 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; 6354 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size); 6355 6356 return max(max(min_buf_len, hdr_len) - hdr_len, 6357 (unsigned int)GOOD_PACKET_LEN); 6358 } 6359 6360 static int virtnet_find_vqs(struct virtnet_info *vi) 6361 { 6362 struct virtqueue_info *vqs_info; 6363 struct virtqueue **vqs; 6364 int ret = -ENOMEM; 6365 int total_vqs; 6366 bool *ctx; 6367 u16 i; 6368 6369 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by 6370 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by 6371 * possible control vq. 6372 */ 6373 total_vqs = vi->max_queue_pairs * 2 + 6374 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); 6375 6376 /* Allocate space for find_vqs parameters */ 6377 vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL); 6378 if (!vqs) 6379 goto err_vq; 6380 vqs_info = kcalloc(total_vqs, sizeof(*vqs_info), GFP_KERNEL); 6381 if (!vqs_info) 6382 goto err_vqs_info; 6383 if (!vi->big_packets || vi->mergeable_rx_bufs) { 6384 ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL); 6385 if (!ctx) 6386 goto err_ctx; 6387 } else { 6388 ctx = NULL; 6389 } 6390 6391 /* Parameters for control virtqueue, if any */ 6392 if (vi->has_cvq) { 6393 vqs_info[total_vqs - 1].name = "control"; 6394 } 6395 6396 /* Allocate/initialize parameters for send/receive virtqueues */ 6397 for (i = 0; i < vi->max_queue_pairs; i++) { 6398 vqs_info[rxq2vq(i)].callback = skb_recv_done; 6399 vqs_info[txq2vq(i)].callback = skb_xmit_done; 6400 sprintf(vi->rq[i].name, "input.%u", i); 6401 sprintf(vi->sq[i].name, "output.%u", i); 6402 vqs_info[rxq2vq(i)].name = vi->rq[i].name; 6403 vqs_info[txq2vq(i)].name = vi->sq[i].name; 6404 if (ctx) 6405 vqs_info[rxq2vq(i)].ctx = true; 6406 } 6407 6408 ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, vqs_info, NULL); 6409 if (ret) 6410 goto err_find; 6411 6412 if (vi->has_cvq) { 6413 vi->cvq = vqs[total_vqs - 1]; 6414 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) 6415 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 6416 } 6417 6418 for (i = 0; i < vi->max_queue_pairs; i++) { 6419 vi->rq[i].vq = vqs[rxq2vq(i)]; 6420 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq); 6421 vi->sq[i].vq = vqs[txq2vq(i)]; 6422 } 6423 6424 /* run here: ret == 0. */ 6425 6426 6427 err_find: 6428 kfree(ctx); 6429 err_ctx: 6430 kfree(vqs_info); 6431 err_vqs_info: 6432 kfree(vqs); 6433 err_vq: 6434 return ret; 6435 } 6436 6437 static int virtnet_alloc_queues(struct virtnet_info *vi) 6438 { 6439 int i; 6440 6441 if (vi->has_cvq) { 6442 vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); 6443 if (!vi->ctrl) 6444 goto err_ctrl; 6445 } else { 6446 vi->ctrl = NULL; 6447 } 6448 vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL); 6449 if (!vi->sq) 6450 goto err_sq; 6451 vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL); 6452 if (!vi->rq) 6453 goto err_rq; 6454 6455 INIT_DELAYED_WORK(&vi->refill, refill_work); 6456 for (i = 0; i < vi->max_queue_pairs; i++) { 6457 vi->rq[i].pages = NULL; 6458 netif_napi_add_config(vi->dev, &vi->rq[i].napi, virtnet_poll, 6459 i); 6460 vi->rq[i].napi.weight = napi_weight; 6461 netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi, 6462 virtnet_poll_tx, 6463 napi_tx ? napi_weight : 0); 6464 6465 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); 6466 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); 6467 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); 6468 6469 u64_stats_init(&vi->rq[i].stats.syncp); 6470 u64_stats_init(&vi->sq[i].stats.syncp); 6471 mutex_init(&vi->rq[i].dim_lock); 6472 } 6473 6474 return 0; 6475 6476 err_rq: 6477 kfree(vi->sq); 6478 err_sq: 6479 kfree(vi->ctrl); 6480 err_ctrl: 6481 return -ENOMEM; 6482 } 6483 6484 static int init_vqs(struct virtnet_info *vi) 6485 { 6486 int ret; 6487 6488 /* Allocate send & receive queues */ 6489 ret = virtnet_alloc_queues(vi); 6490 if (ret) 6491 goto err; 6492 6493 ret = virtnet_find_vqs(vi); 6494 if (ret) 6495 goto err_free; 6496 6497 cpus_read_lock(); 6498 virtnet_set_affinity(vi); 6499 cpus_read_unlock(); 6500 6501 return 0; 6502 6503 err_free: 6504 virtnet_free_queues(vi); 6505 err: 6506 return ret; 6507 } 6508 6509 #ifdef CONFIG_SYSFS 6510 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, 6511 char *buf) 6512 { 6513 struct virtnet_info *vi = netdev_priv(queue->dev); 6514 unsigned int queue_index = get_netdev_rx_queue_index(queue); 6515 unsigned int headroom = virtnet_get_headroom(vi); 6516 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 6517 struct ewma_pkt_len *avg; 6518 6519 BUG_ON(queue_index >= vi->max_queue_pairs); 6520 avg = &vi->rq[queue_index].mrg_avg_pkt_len; 6521 return sprintf(buf, "%u\n", 6522 get_mergeable_buf_len(&vi->rq[queue_index], avg, 6523 SKB_DATA_ALIGN(headroom + tailroom))); 6524 } 6525 6526 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = 6527 __ATTR_RO(mergeable_rx_buffer_size); 6528 6529 static struct attribute *virtio_net_mrg_rx_attrs[] = { 6530 &mergeable_rx_buffer_size_attribute.attr, 6531 NULL 6532 }; 6533 6534 static const struct attribute_group virtio_net_mrg_rx_group = { 6535 .name = "virtio_net", 6536 .attrs = virtio_net_mrg_rx_attrs 6537 }; 6538 #endif 6539 6540 static bool virtnet_fail_on_feature(struct virtio_device *vdev, 6541 unsigned int fbit, 6542 const char *fname, const char *dname) 6543 { 6544 if (!virtio_has_feature(vdev, fbit)) 6545 return false; 6546 6547 dev_err(&vdev->dev, "device advertises feature %s but not %s", 6548 fname, dname); 6549 6550 return true; 6551 } 6552 6553 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ 6554 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) 6555 6556 static bool virtnet_validate_features(struct virtio_device *vdev) 6557 { 6558 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && 6559 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, 6560 "VIRTIO_NET_F_CTRL_VQ") || 6561 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, 6562 "VIRTIO_NET_F_CTRL_VQ") || 6563 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, 6564 "VIRTIO_NET_F_CTRL_VQ") || 6565 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || 6566 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, 6567 "VIRTIO_NET_F_CTRL_VQ") || 6568 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, 6569 "VIRTIO_NET_F_CTRL_VQ") || 6570 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, 6571 "VIRTIO_NET_F_CTRL_VQ") || 6572 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, 6573 "VIRTIO_NET_F_CTRL_VQ") || 6574 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL, 6575 "VIRTIO_NET_F_CTRL_VQ"))) { 6576 return false; 6577 } 6578 6579 return true; 6580 } 6581 6582 #define MIN_MTU ETH_MIN_MTU 6583 #define MAX_MTU ETH_MAX_MTU 6584 6585 static int virtnet_validate(struct virtio_device *vdev) 6586 { 6587 if (!vdev->config->get) { 6588 dev_err(&vdev->dev, "%s failure: config access disabled\n", 6589 __func__); 6590 return -EINVAL; 6591 } 6592 6593 if (!virtnet_validate_features(vdev)) 6594 return -EINVAL; 6595 6596 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6597 int mtu = virtio_cread16(vdev, 6598 offsetof(struct virtio_net_config, 6599 mtu)); 6600 if (mtu < MIN_MTU) 6601 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); 6602 } 6603 6604 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) && 6605 !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6606 dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby"); 6607 __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY); 6608 } 6609 6610 return 0; 6611 } 6612 6613 static bool virtnet_check_guest_gso(const struct virtnet_info *vi) 6614 { 6615 return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 6616 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 6617 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 6618 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 6619 (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) && 6620 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6)); 6621 } 6622 6623 static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu) 6624 { 6625 bool guest_gso = virtnet_check_guest_gso(vi); 6626 6627 /* If device can receive ANY guest GSO packets, regardless of mtu, 6628 * allocate packets of maximum size, otherwise limit it to only 6629 * mtu size worth only. 6630 */ 6631 if (mtu > ETH_DATA_LEN || guest_gso) { 6632 vi->big_packets = true; 6633 vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE); 6634 } 6635 } 6636 6637 #define VIRTIO_NET_HASH_REPORT_MAX_TABLE 10 6638 static enum xdp_rss_hash_type 6639 virtnet_xdp_rss_type[VIRTIO_NET_HASH_REPORT_MAX_TABLE] = { 6640 [VIRTIO_NET_HASH_REPORT_NONE] = XDP_RSS_TYPE_NONE, 6641 [VIRTIO_NET_HASH_REPORT_IPv4] = XDP_RSS_TYPE_L3_IPV4, 6642 [VIRTIO_NET_HASH_REPORT_TCPv4] = XDP_RSS_TYPE_L4_IPV4_TCP, 6643 [VIRTIO_NET_HASH_REPORT_UDPv4] = XDP_RSS_TYPE_L4_IPV4_UDP, 6644 [VIRTIO_NET_HASH_REPORT_IPv6] = XDP_RSS_TYPE_L3_IPV6, 6645 [VIRTIO_NET_HASH_REPORT_TCPv6] = XDP_RSS_TYPE_L4_IPV6_TCP, 6646 [VIRTIO_NET_HASH_REPORT_UDPv6] = XDP_RSS_TYPE_L4_IPV6_UDP, 6647 [VIRTIO_NET_HASH_REPORT_IPv6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, 6648 [VIRTIO_NET_HASH_REPORT_TCPv6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, 6649 [VIRTIO_NET_HASH_REPORT_UDPv6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX 6650 }; 6651 6652 static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, 6653 enum xdp_rss_hash_type *rss_type) 6654 { 6655 const struct xdp_buff *xdp = (void *)_ctx; 6656 struct virtio_net_hdr_v1_hash *hdr_hash; 6657 struct virtnet_info *vi; 6658 u16 hash_report; 6659 6660 if (!(xdp->rxq->dev->features & NETIF_F_RXHASH)) 6661 return -ENODATA; 6662 6663 vi = netdev_priv(xdp->rxq->dev); 6664 hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len); 6665 hash_report = __le16_to_cpu(hdr_hash->hash_report); 6666 6667 if (hash_report >= VIRTIO_NET_HASH_REPORT_MAX_TABLE) 6668 hash_report = VIRTIO_NET_HASH_REPORT_NONE; 6669 6670 *rss_type = virtnet_xdp_rss_type[hash_report]; 6671 *hash = __le32_to_cpu(hdr_hash->hash_value); 6672 return 0; 6673 } 6674 6675 static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = { 6676 .xmo_rx_hash = virtnet_xdp_rx_hash, 6677 }; 6678 6679 static int virtnet_probe(struct virtio_device *vdev) 6680 { 6681 int i, err = -ENOMEM; 6682 struct net_device *dev; 6683 struct virtnet_info *vi; 6684 u16 max_queue_pairs; 6685 int mtu = 0; 6686 6687 /* Find if host supports multiqueue/rss virtio_net device */ 6688 max_queue_pairs = 1; 6689 if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) 6690 max_queue_pairs = 6691 virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); 6692 6693 /* We need at least 2 queue's */ 6694 if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 6695 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 6696 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6697 max_queue_pairs = 1; 6698 6699 /* Allocate ourselves a network device with room for our info */ 6700 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); 6701 if (!dev) 6702 return -ENOMEM; 6703 6704 /* Set up network device as normal. */ 6705 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | 6706 IFF_TX_SKB_NO_LINEAR; 6707 dev->netdev_ops = &virtnet_netdev; 6708 dev->stat_ops = &virtnet_stat_ops; 6709 dev->features = NETIF_F_HIGHDMA; 6710 6711 dev->ethtool_ops = &virtnet_ethtool_ops; 6712 SET_NETDEV_DEV(dev, &vdev->dev); 6713 6714 /* Do we support "hardware" checksums? */ 6715 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { 6716 /* This opens up the world of extra features. */ 6717 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6718 if (csum) 6719 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6720 6721 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { 6722 dev->hw_features |= NETIF_F_TSO 6723 | NETIF_F_TSO_ECN | NETIF_F_TSO6; 6724 } 6725 /* Individual feature bits: what can host handle? */ 6726 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4)) 6727 dev->hw_features |= NETIF_F_TSO; 6728 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6)) 6729 dev->hw_features |= NETIF_F_TSO6; 6730 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) 6731 dev->hw_features |= NETIF_F_TSO_ECN; 6732 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO)) 6733 dev->hw_features |= NETIF_F_GSO_UDP_L4; 6734 6735 dev->features |= NETIF_F_GSO_ROBUST; 6736 6737 if (gso) 6738 dev->features |= dev->hw_features & NETIF_F_ALL_TSO; 6739 /* (!csum && gso) case will be fixed by register_netdev() */ 6740 } 6741 6742 /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't 6743 * need to calculate checksums for partially checksummed packets, 6744 * as they're considered valid by the upper layer. 6745 * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only 6746 * receives fully checksummed packets. The device may assist in 6747 * validating these packets' checksums, so the driver won't have to. 6748 */ 6749 dev->features |= NETIF_F_RXCSUM; 6750 6751 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || 6752 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) 6753 dev->features |= NETIF_F_GRO_HW; 6754 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) 6755 dev->hw_features |= NETIF_F_GRO_HW; 6756 6757 dev->vlan_features = dev->features; 6758 dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 6759 NETDEV_XDP_ACT_XSK_ZEROCOPY; 6760 6761 /* MTU range: 68 - 65535 */ 6762 dev->min_mtu = MIN_MTU; 6763 dev->max_mtu = MAX_MTU; 6764 6765 /* Configuration may specify what MAC to use. Otherwise random. */ 6766 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6767 u8 addr[ETH_ALEN]; 6768 6769 virtio_cread_bytes(vdev, 6770 offsetof(struct virtio_net_config, mac), 6771 addr, ETH_ALEN); 6772 eth_hw_addr_set(dev, addr); 6773 } else { 6774 eth_hw_addr_random(dev); 6775 dev_info(&vdev->dev, "Assigned random MAC address %pM\n", 6776 dev->dev_addr); 6777 } 6778 6779 /* Set up our device-specific information */ 6780 vi = netdev_priv(dev); 6781 vi->dev = dev; 6782 vi->vdev = vdev; 6783 vdev->priv = vi; 6784 6785 INIT_WORK(&vi->config_work, virtnet_config_changed_work); 6786 INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work); 6787 spin_lock_init(&vi->refill_lock); 6788 6789 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { 6790 vi->mergeable_rx_bufs = true; 6791 dev->xdp_features |= NETDEV_XDP_ACT_RX_SG; 6792 } 6793 6794 if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) 6795 vi->has_rss_hash_report = true; 6796 6797 if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) { 6798 vi->has_rss = true; 6799 6800 vi->rss_indir_table_size = 6801 virtio_cread16(vdev, offsetof(struct virtio_net_config, 6802 rss_max_indirection_table_length)); 6803 } 6804 err = rss_indirection_table_alloc(&vi->rss, vi->rss_indir_table_size); 6805 if (err) 6806 goto free; 6807 6808 if (vi->has_rss || vi->has_rss_hash_report) { 6809 vi->rss_key_size = 6810 virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); 6811 if (vi->rss_key_size > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 6812 dev_err(&vdev->dev, "rss_max_key_size=%u exceeds the limit %u.\n", 6813 vi->rss_key_size, VIRTIO_NET_RSS_MAX_KEY_SIZE); 6814 err = -EINVAL; 6815 goto free; 6816 } 6817 6818 vi->rss_hash_types_supported = 6819 virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); 6820 vi->rss_hash_types_supported &= 6821 ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX | 6822 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 6823 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX); 6824 6825 dev->hw_features |= NETIF_F_RXHASH; 6826 dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops; 6827 } 6828 6829 if (vi->has_rss_hash_report) 6830 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); 6831 else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || 6832 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6833 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 6834 else 6835 vi->hdr_len = sizeof(struct virtio_net_hdr); 6836 6837 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || 6838 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6839 vi->any_header_sg = true; 6840 6841 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6842 vi->has_cvq = true; 6843 6844 mutex_init(&vi->cvq_lock); 6845 6846 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6847 mtu = virtio_cread16(vdev, 6848 offsetof(struct virtio_net_config, 6849 mtu)); 6850 if (mtu < dev->min_mtu) { 6851 /* Should never trigger: MTU was previously validated 6852 * in virtnet_validate. 6853 */ 6854 dev_err(&vdev->dev, 6855 "device MTU appears to have changed it is now %d < %d", 6856 mtu, dev->min_mtu); 6857 err = -EINVAL; 6858 goto free; 6859 } 6860 6861 dev->mtu = mtu; 6862 dev->max_mtu = mtu; 6863 } 6864 6865 virtnet_set_big_packets(vi, mtu); 6866 6867 if (vi->any_header_sg) 6868 dev->needed_headroom = vi->hdr_len; 6869 6870 /* Enable multiqueue by default */ 6871 if (num_online_cpus() >= max_queue_pairs) 6872 vi->curr_queue_pairs = max_queue_pairs; 6873 else 6874 vi->curr_queue_pairs = num_online_cpus(); 6875 vi->max_queue_pairs = max_queue_pairs; 6876 6877 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ 6878 err = init_vqs(vi); 6879 if (err) 6880 goto free; 6881 6882 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 6883 vi->intr_coal_rx.max_usecs = 0; 6884 vi->intr_coal_tx.max_usecs = 0; 6885 vi->intr_coal_rx.max_packets = 0; 6886 6887 /* Keep the default values of the coalescing parameters 6888 * aligned with the default napi_tx state. 6889 */ 6890 if (vi->sq[0].napi.weight) 6891 vi->intr_coal_tx.max_packets = 1; 6892 else 6893 vi->intr_coal_tx.max_packets = 0; 6894 } 6895 6896 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 6897 /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */ 6898 for (i = 0; i < vi->max_queue_pairs; i++) 6899 if (vi->sq[i].napi.weight) 6900 vi->sq[i].intr_coal.max_packets = 1; 6901 6902 err = virtnet_init_irq_moder(vi); 6903 if (err) 6904 goto free; 6905 } 6906 6907 #ifdef CONFIG_SYSFS 6908 if (vi->mergeable_rx_bufs) 6909 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; 6910 #endif 6911 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); 6912 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); 6913 6914 virtnet_init_settings(dev); 6915 6916 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 6917 vi->failover = net_failover_create(vi->dev); 6918 if (IS_ERR(vi->failover)) { 6919 err = PTR_ERR(vi->failover); 6920 goto free_vqs; 6921 } 6922 } 6923 6924 if (vi->has_rss || vi->has_rss_hash_report) 6925 virtnet_init_default_rss(vi); 6926 6927 enable_rx_mode_work(vi); 6928 6929 /* serialize netdev register + virtio_device_ready() with ndo_open() */ 6930 rtnl_lock(); 6931 6932 err = register_netdevice(dev); 6933 if (err) { 6934 pr_debug("virtio_net: registering device failed\n"); 6935 rtnl_unlock(); 6936 goto free_failover; 6937 } 6938 6939 /* Disable config change notification until ndo_open. */ 6940 virtio_config_driver_disable(vi->vdev); 6941 6942 virtio_device_ready(vdev); 6943 6944 if (vi->has_rss || vi->has_rss_hash_report) { 6945 if (!virtnet_commit_rss_command(vi)) { 6946 dev_warn(&vdev->dev, "RSS disabled because committing failed.\n"); 6947 dev->hw_features &= ~NETIF_F_RXHASH; 6948 vi->has_rss_hash_report = false; 6949 vi->has_rss = false; 6950 } 6951 } 6952 6953 virtnet_set_queues(vi, vi->curr_queue_pairs); 6954 6955 /* a random MAC address has been assigned, notify the device. 6956 * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there 6957 * because many devices work fine without getting MAC explicitly 6958 */ 6959 if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 6960 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 6961 struct scatterlist sg; 6962 6963 sg_init_one(&sg, dev->dev_addr, dev->addr_len); 6964 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 6965 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 6966 pr_debug("virtio_net: setting MAC address failed\n"); 6967 rtnl_unlock(); 6968 err = -EINVAL; 6969 goto free_unregister_netdev; 6970 } 6971 } 6972 6973 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) { 6974 struct virtio_net_stats_capabilities *stats_cap __free(kfree) = NULL; 6975 struct scatterlist sg; 6976 __le64 v; 6977 6978 stats_cap = kzalloc(sizeof(*stats_cap), GFP_KERNEL); 6979 if (!stats_cap) { 6980 rtnl_unlock(); 6981 err = -ENOMEM; 6982 goto free_unregister_netdev; 6983 } 6984 6985 sg_init_one(&sg, stats_cap, sizeof(*stats_cap)); 6986 6987 if (!virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 6988 VIRTIO_NET_CTRL_STATS_QUERY, 6989 NULL, &sg)) { 6990 pr_debug("virtio_net: fail to get stats capability\n"); 6991 rtnl_unlock(); 6992 err = -EINVAL; 6993 goto free_unregister_netdev; 6994 } 6995 6996 v = stats_cap->supported_stats_types[0]; 6997 vi->device_stats_cap = le64_to_cpu(v); 6998 } 6999 7000 /* Assume link up if device can't report link status, 7001 otherwise get link status from config. */ 7002 netif_carrier_off(dev); 7003 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 7004 virtnet_config_changed_work(&vi->config_work); 7005 } else { 7006 vi->status = VIRTIO_NET_S_LINK_UP; 7007 virtnet_update_settings(vi); 7008 netif_carrier_on(dev); 7009 } 7010 7011 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) 7012 if (virtio_has_feature(vi->vdev, guest_offloads[i])) 7013 set_bit(guest_offloads[i], &vi->guest_offloads); 7014 vi->guest_offloads_capable = vi->guest_offloads; 7015 7016 rtnl_unlock(); 7017 7018 err = virtnet_cpu_notif_add(vi); 7019 if (err) { 7020 pr_debug("virtio_net: registering cpu notifier failed\n"); 7021 goto free_unregister_netdev; 7022 } 7023 7024 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", 7025 dev->name, max_queue_pairs); 7026 7027 return 0; 7028 7029 free_unregister_netdev: 7030 unregister_netdev(dev); 7031 free_failover: 7032 net_failover_destroy(vi->failover); 7033 free_vqs: 7034 virtio_reset_device(vdev); 7035 cancel_delayed_work_sync(&vi->refill); 7036 free_receive_page_frags(vi); 7037 virtnet_del_vqs(vi); 7038 free: 7039 free_netdev(dev); 7040 return err; 7041 } 7042 7043 static void remove_vq_common(struct virtnet_info *vi) 7044 { 7045 int i; 7046 7047 virtio_reset_device(vi->vdev); 7048 7049 /* Free unused buffers in both send and recv, if any. */ 7050 free_unused_bufs(vi); 7051 7052 /* 7053 * Rule of thumb is netdev_tx_reset_queue() should follow any 7054 * skb freeing not followed by netdev_tx_completed_queue() 7055 */ 7056 for (i = 0; i < vi->max_queue_pairs; i++) 7057 netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, i)); 7058 7059 free_receive_bufs(vi); 7060 7061 free_receive_page_frags(vi); 7062 7063 virtnet_del_vqs(vi); 7064 } 7065 7066 static void virtnet_remove(struct virtio_device *vdev) 7067 { 7068 struct virtnet_info *vi = vdev->priv; 7069 7070 virtnet_cpu_notif_remove(vi); 7071 7072 /* Make sure no work handler is accessing the device. */ 7073 flush_work(&vi->config_work); 7074 disable_rx_mode_work(vi); 7075 flush_work(&vi->rx_mode_work); 7076 7077 virtnet_free_irq_moder(vi); 7078 7079 unregister_netdev(vi->dev); 7080 7081 net_failover_destroy(vi->failover); 7082 7083 remove_vq_common(vi); 7084 7085 rss_indirection_table_free(&vi->rss); 7086 7087 free_netdev(vi->dev); 7088 } 7089 7090 static __maybe_unused int virtnet_freeze(struct virtio_device *vdev) 7091 { 7092 struct virtnet_info *vi = vdev->priv; 7093 7094 virtnet_cpu_notif_remove(vi); 7095 virtnet_freeze_down(vdev); 7096 remove_vq_common(vi); 7097 7098 return 0; 7099 } 7100 7101 static __maybe_unused int virtnet_restore(struct virtio_device *vdev) 7102 { 7103 struct virtnet_info *vi = vdev->priv; 7104 int err; 7105 7106 err = virtnet_restore_up(vdev); 7107 if (err) 7108 return err; 7109 virtnet_set_queues(vi, vi->curr_queue_pairs); 7110 7111 err = virtnet_cpu_notif_add(vi); 7112 if (err) { 7113 virtnet_freeze_down(vdev); 7114 remove_vq_common(vi); 7115 return err; 7116 } 7117 7118 return 0; 7119 } 7120 7121 static struct virtio_device_id id_table[] = { 7122 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 7123 { 0 }, 7124 }; 7125 7126 #define VIRTNET_FEATURES \ 7127 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ 7128 VIRTIO_NET_F_MAC, \ 7129 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ 7130 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ 7131 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ 7132 VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \ 7133 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ 7134 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ 7135 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ 7136 VIRTIO_NET_F_CTRL_MAC_ADDR, \ 7137 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ 7138 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ 7139 VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ 7140 VIRTIO_NET_F_VQ_NOTF_COAL, \ 7141 VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS 7142 7143 static unsigned int features[] = { 7144 VIRTNET_FEATURES, 7145 }; 7146 7147 static unsigned int features_legacy[] = { 7148 VIRTNET_FEATURES, 7149 VIRTIO_NET_F_GSO, 7150 VIRTIO_F_ANY_LAYOUT, 7151 }; 7152 7153 static struct virtio_driver virtio_net_driver = { 7154 .feature_table = features, 7155 .feature_table_size = ARRAY_SIZE(features), 7156 .feature_table_legacy = features_legacy, 7157 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 7158 .driver.name = KBUILD_MODNAME, 7159 .id_table = id_table, 7160 .validate = virtnet_validate, 7161 .probe = virtnet_probe, 7162 .remove = virtnet_remove, 7163 .config_changed = virtnet_config_changed, 7164 #ifdef CONFIG_PM_SLEEP 7165 .freeze = virtnet_freeze, 7166 .restore = virtnet_restore, 7167 #endif 7168 }; 7169 7170 static __init int virtio_net_driver_init(void) 7171 { 7172 int ret; 7173 7174 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online", 7175 virtnet_cpu_online, 7176 virtnet_cpu_down_prep); 7177 if (ret < 0) 7178 goto out; 7179 virtionet_online = ret; 7180 ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead", 7181 NULL, virtnet_cpu_dead); 7182 if (ret) 7183 goto err_dead; 7184 ret = register_virtio_driver(&virtio_net_driver); 7185 if (ret) 7186 goto err_virtio; 7187 return 0; 7188 err_virtio: 7189 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 7190 err_dead: 7191 cpuhp_remove_multi_state(virtionet_online); 7192 out: 7193 return ret; 7194 } 7195 module_init(virtio_net_driver_init); 7196 7197 static __exit void virtio_net_driver_exit(void) 7198 { 7199 unregister_virtio_driver(&virtio_net_driver); 7200 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 7201 cpuhp_remove_multi_state(virtionet_online); 7202 } 7203 module_exit(virtio_net_driver_exit); 7204 7205 MODULE_DEVICE_TABLE(virtio, id_table); 7206 MODULE_DESCRIPTION("Virtio network driver"); 7207 MODULE_LICENSE("GPL"); 7208