1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* A network driver using virtio. 3 * 4 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 5 */ 6 //#define DEBUG 7 #include <linux/netdevice.h> 8 #include <linux/etherdevice.h> 9 #include <linux/ethtool.h> 10 #include <linux/module.h> 11 #include <linux/virtio.h> 12 #include <linux/virtio_net.h> 13 #include <linux/bpf.h> 14 #include <linux/bpf_trace.h> 15 #include <linux/scatterlist.h> 16 #include <linux/if_vlan.h> 17 #include <linux/slab.h> 18 #include <linux/cpu.h> 19 #include <linux/average.h> 20 #include <linux/filter.h> 21 #include <linux/kernel.h> 22 #include <linux/dim.h> 23 #include <net/route.h> 24 #include <net/xdp.h> 25 #include <net/net_failover.h> 26 #include <net/netdev_rx_queue.h> 27 #include <net/netdev_queues.h> 28 #include <net/xdp_sock_drv.h> 29 30 static int napi_weight = NAPI_POLL_WEIGHT; 31 module_param(napi_weight, int, 0444); 32 33 static bool csum = true, gso = true, napi_tx = true; 34 module_param(csum, bool, 0444); 35 module_param(gso, bool, 0444); 36 module_param(napi_tx, bool, 0644); 37 38 #define VIRTIO_OFFLOAD_MAP_MIN 46 39 #define VIRTIO_OFFLOAD_MAP_MAX 47 40 #define VIRTIO_FEATURES_MAP_MIN 65 41 #define VIRTIO_O2F_DELTA (VIRTIO_FEATURES_MAP_MIN - \ 42 VIRTIO_OFFLOAD_MAP_MIN) 43 44 static bool virtio_is_mapped_offload(unsigned int obit) 45 { 46 return obit >= VIRTIO_OFFLOAD_MAP_MIN && 47 obit <= VIRTIO_OFFLOAD_MAP_MAX; 48 } 49 50 static unsigned int virtio_offload_to_feature(unsigned int obit) 51 { 52 return virtio_is_mapped_offload(obit) ? obit + VIRTIO_O2F_DELTA : obit; 53 } 54 55 /* FIXME: MTU in config. */ 56 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) 57 #define GOOD_COPY_LEN 128 58 59 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) 60 61 /* Separating two types of XDP xmit */ 62 #define VIRTIO_XDP_TX BIT(0) 63 #define VIRTIO_XDP_REDIR BIT(1) 64 65 /* RX packet size EWMA. The average packet size is used to determine the packet 66 * buffer size when refilling RX rings. As the entire RX ring may be refilled 67 * at once, the weight is chosen so that the EWMA will be insensitive to short- 68 * term, transient changes in packet size. 69 */ 70 DECLARE_EWMA(pkt_len, 0, 64) 71 72 #define VIRTNET_DRIVER_VERSION "1.0.0" 73 74 static const unsigned long guest_offloads[] = { 75 VIRTIO_NET_F_GUEST_TSO4, 76 VIRTIO_NET_F_GUEST_TSO6, 77 VIRTIO_NET_F_GUEST_ECN, 78 VIRTIO_NET_F_GUEST_UFO, 79 VIRTIO_NET_F_GUEST_CSUM, 80 VIRTIO_NET_F_GUEST_USO4, 81 VIRTIO_NET_F_GUEST_USO6, 82 VIRTIO_NET_F_GUEST_HDRLEN, 83 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED, 84 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED, 85 }; 86 87 #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 88 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 89 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 90 (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ 91 (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ 92 (1ULL << VIRTIO_NET_F_GUEST_USO6) | \ 93 (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED) | \ 94 (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED)) 95 96 struct virtnet_stat_desc { 97 char desc[ETH_GSTRING_LEN]; 98 size_t offset; 99 size_t qstat_offset; 100 }; 101 102 struct virtnet_sq_free_stats { 103 u64 packets; 104 u64 bytes; 105 u64 napi_packets; 106 u64 napi_bytes; 107 u64 xsk; 108 }; 109 110 struct virtnet_sq_stats { 111 struct u64_stats_sync syncp; 112 u64_stats_t packets; 113 u64_stats_t bytes; 114 u64_stats_t xdp_tx; 115 u64_stats_t xdp_tx_drops; 116 u64_stats_t kicks; 117 u64_stats_t tx_timeouts; 118 u64_stats_t stop; 119 u64_stats_t wake; 120 }; 121 122 struct virtnet_rq_stats { 123 struct u64_stats_sync syncp; 124 u64_stats_t packets; 125 u64_stats_t bytes; 126 u64_stats_t drops; 127 u64_stats_t xdp_packets; 128 u64_stats_t xdp_tx; 129 u64_stats_t xdp_redirects; 130 u64_stats_t xdp_drops; 131 u64_stats_t kicks; 132 }; 133 134 #define VIRTNET_SQ_STAT(name, m) {name, offsetof(struct virtnet_sq_stats, m), -1} 135 #define VIRTNET_RQ_STAT(name, m) {name, offsetof(struct virtnet_rq_stats, m), -1} 136 137 #define VIRTNET_SQ_STAT_QSTAT(name, m) \ 138 { \ 139 name, \ 140 offsetof(struct virtnet_sq_stats, m), \ 141 offsetof(struct netdev_queue_stats_tx, m), \ 142 } 143 144 #define VIRTNET_RQ_STAT_QSTAT(name, m) \ 145 { \ 146 name, \ 147 offsetof(struct virtnet_rq_stats, m), \ 148 offsetof(struct netdev_queue_stats_rx, m), \ 149 } 150 151 static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { 152 VIRTNET_SQ_STAT("xdp_tx", xdp_tx), 153 VIRTNET_SQ_STAT("xdp_tx_drops", xdp_tx_drops), 154 VIRTNET_SQ_STAT("kicks", kicks), 155 VIRTNET_SQ_STAT("tx_timeouts", tx_timeouts), 156 }; 157 158 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { 159 VIRTNET_RQ_STAT("drops", drops), 160 VIRTNET_RQ_STAT("xdp_packets", xdp_packets), 161 VIRTNET_RQ_STAT("xdp_tx", xdp_tx), 162 VIRTNET_RQ_STAT("xdp_redirects", xdp_redirects), 163 VIRTNET_RQ_STAT("xdp_drops", xdp_drops), 164 VIRTNET_RQ_STAT("kicks", kicks), 165 }; 166 167 static const struct virtnet_stat_desc virtnet_sq_stats_desc_qstat[] = { 168 VIRTNET_SQ_STAT_QSTAT("packets", packets), 169 VIRTNET_SQ_STAT_QSTAT("bytes", bytes), 170 VIRTNET_SQ_STAT_QSTAT("stop", stop), 171 VIRTNET_SQ_STAT_QSTAT("wake", wake), 172 }; 173 174 static const struct virtnet_stat_desc virtnet_rq_stats_desc_qstat[] = { 175 VIRTNET_RQ_STAT_QSTAT("packets", packets), 176 VIRTNET_RQ_STAT_QSTAT("bytes", bytes), 177 }; 178 179 #define VIRTNET_STATS_DESC_CQ(name) \ 180 {#name, offsetof(struct virtio_net_stats_cvq, name), -1} 181 182 #define VIRTNET_STATS_DESC_RX(class, name) \ 183 {#name, offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), -1} 184 185 #define VIRTNET_STATS_DESC_TX(class, name) \ 186 {#name, offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), -1} 187 188 189 static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = { 190 VIRTNET_STATS_DESC_CQ(command_num), 191 VIRTNET_STATS_DESC_CQ(ok_num), 192 }; 193 194 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = { 195 VIRTNET_STATS_DESC_RX(basic, packets), 196 VIRTNET_STATS_DESC_RX(basic, bytes), 197 198 VIRTNET_STATS_DESC_RX(basic, notifications), 199 VIRTNET_STATS_DESC_RX(basic, interrupts), 200 }; 201 202 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = { 203 VIRTNET_STATS_DESC_TX(basic, packets), 204 VIRTNET_STATS_DESC_TX(basic, bytes), 205 206 VIRTNET_STATS_DESC_TX(basic, notifications), 207 VIRTNET_STATS_DESC_TX(basic, interrupts), 208 }; 209 210 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = { 211 VIRTNET_STATS_DESC_RX(csum, needs_csum), 212 }; 213 214 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = { 215 VIRTNET_STATS_DESC_TX(gso, gso_packets_noseg), 216 VIRTNET_STATS_DESC_TX(gso, gso_bytes_noseg), 217 }; 218 219 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = { 220 VIRTNET_STATS_DESC_RX(speed, ratelimit_bytes), 221 }; 222 223 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = { 224 VIRTNET_STATS_DESC_TX(speed, ratelimit_bytes), 225 }; 226 227 #define VIRTNET_STATS_DESC_RX_QSTAT(class, name, qstat_field) \ 228 { \ 229 #name, \ 230 offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), \ 231 offsetof(struct netdev_queue_stats_rx, qstat_field), \ 232 } 233 234 #define VIRTNET_STATS_DESC_TX_QSTAT(class, name, qstat_field) \ 235 { \ 236 #name, \ 237 offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), \ 238 offsetof(struct netdev_queue_stats_tx, qstat_field), \ 239 } 240 241 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc_qstat[] = { 242 VIRTNET_STATS_DESC_RX_QSTAT(basic, drops, hw_drops), 243 VIRTNET_STATS_DESC_RX_QSTAT(basic, drop_overruns, hw_drop_overruns), 244 }; 245 246 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc_qstat[] = { 247 VIRTNET_STATS_DESC_TX_QSTAT(basic, drops, hw_drops), 248 VIRTNET_STATS_DESC_TX_QSTAT(basic, drop_malformed, hw_drop_errors), 249 }; 250 251 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc_qstat[] = { 252 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_valid, csum_unnecessary), 253 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_none, csum_none), 254 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_bad, csum_bad), 255 }; 256 257 static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc_qstat[] = { 258 VIRTNET_STATS_DESC_TX_QSTAT(csum, csum_none, csum_none), 259 VIRTNET_STATS_DESC_TX_QSTAT(csum, needs_csum, needs_csum), 260 }; 261 262 static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc_qstat[] = { 263 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets, hw_gro_packets), 264 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes, hw_gro_bytes), 265 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets_coalesced, hw_gro_wire_packets), 266 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes_coalesced, hw_gro_wire_bytes), 267 }; 268 269 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc_qstat[] = { 270 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_packets, hw_gso_packets), 271 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_bytes, hw_gso_bytes), 272 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments, hw_gso_wire_packets), 273 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments_bytes, hw_gso_wire_bytes), 274 }; 275 276 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc_qstat[] = { 277 VIRTNET_STATS_DESC_RX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 278 }; 279 280 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc_qstat[] = { 281 VIRTNET_STATS_DESC_TX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 282 }; 283 284 #define VIRTNET_Q_TYPE_RX 0 285 #define VIRTNET_Q_TYPE_TX 1 286 #define VIRTNET_Q_TYPE_CQ 2 287 288 struct virtnet_interrupt_coalesce { 289 u32 max_packets; 290 u32 max_usecs; 291 }; 292 293 /* The dma information of pages allocated at a time. */ 294 struct virtnet_rq_dma { 295 dma_addr_t addr; 296 u32 ref; 297 u16 len; 298 u16 need_sync; 299 }; 300 301 /* Internal representation of a send virtqueue */ 302 struct send_queue { 303 /* Virtqueue associated with this send _queue */ 304 struct virtqueue *vq; 305 306 /* TX: fragments + linear part + virtio header */ 307 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 308 309 /* Name of the send queue: output.$index */ 310 char name[16]; 311 312 struct virtnet_sq_stats stats; 313 314 struct virtnet_interrupt_coalesce intr_coal; 315 316 struct napi_struct napi; 317 318 /* Record whether sq is in reset state. */ 319 bool reset; 320 321 struct xsk_buff_pool *xsk_pool; 322 323 dma_addr_t xsk_hdr_dma_addr; 324 }; 325 326 /* Internal representation of a receive virtqueue */ 327 struct receive_queue { 328 /* Virtqueue associated with this receive_queue */ 329 struct virtqueue *vq; 330 331 struct napi_struct napi; 332 333 struct bpf_prog __rcu *xdp_prog; 334 335 struct virtnet_rq_stats stats; 336 337 /* The number of rx notifications */ 338 u16 calls; 339 340 /* Is dynamic interrupt moderation enabled? */ 341 bool dim_enabled; 342 343 /* Used to protect dim_enabled and inter_coal */ 344 struct mutex dim_lock; 345 346 /* Dynamic Interrupt Moderation */ 347 struct dim dim; 348 349 u32 packets_in_napi; 350 351 struct virtnet_interrupt_coalesce intr_coal; 352 353 /* Chain pages by the private ptr. */ 354 struct page *pages; 355 356 /* Average packet length for mergeable receive buffers. */ 357 struct ewma_pkt_len mrg_avg_pkt_len; 358 359 /* Page frag for packet buffer allocation. */ 360 struct page_frag alloc_frag; 361 362 /* RX: fragments + linear part + virtio header */ 363 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 364 365 /* Min single buffer size for mergeable buffers case. */ 366 unsigned int min_buf_len; 367 368 /* Name of this receive queue: input.$index */ 369 char name[16]; 370 371 struct xdp_rxq_info xdp_rxq; 372 373 /* Record the last dma info to free after new pages is allocated. */ 374 struct virtnet_rq_dma *last_dma; 375 376 struct xsk_buff_pool *xsk_pool; 377 378 /* xdp rxq used by xsk */ 379 struct xdp_rxq_info xsk_rxq_info; 380 381 struct xdp_buff **xsk_buffs; 382 }; 383 384 #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 385 386 /* Control VQ buffers: protected by the rtnl lock */ 387 struct control_buf { 388 struct virtio_net_ctrl_hdr hdr; 389 virtio_net_ctrl_ack status; 390 }; 391 392 struct virtnet_info { 393 struct virtio_device *vdev; 394 struct virtqueue *cvq; 395 struct net_device *dev; 396 struct send_queue *sq; 397 struct receive_queue *rq; 398 unsigned int status; 399 400 /* Max # of queue pairs supported by the device */ 401 u16 max_queue_pairs; 402 403 /* # of queue pairs currently used by the driver */ 404 u16 curr_queue_pairs; 405 406 /* # of XDP queue pairs currently used by the driver */ 407 u16 xdp_queue_pairs; 408 409 /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ 410 bool xdp_enabled; 411 412 /* I like... big packets and I cannot lie! */ 413 bool big_packets; 414 415 /* number of sg entries allocated for big packets */ 416 unsigned int big_packets_num_skbfrags; 417 418 /* Host will merge rx buffers for big packets (shake it! shake it!) */ 419 bool mergeable_rx_bufs; 420 421 /* Host supports rss and/or hash report */ 422 bool has_rss; 423 bool has_rss_hash_report; 424 u8 rss_key_size; 425 u16 rss_indir_table_size; 426 u32 rss_hash_types_supported; 427 u32 rss_hash_types_saved; 428 struct virtio_net_rss_config_hdr *rss_hdr; 429 struct virtio_net_rss_config_trailer rss_trailer; 430 u8 rss_hash_key_data[VIRTIO_NET_RSS_MAX_KEY_SIZE]; 431 432 /* Has control virtqueue */ 433 bool has_cvq; 434 435 /* Lock to protect the control VQ */ 436 struct mutex cvq_lock; 437 438 /* Host can handle any s/g split between our header and packet data */ 439 bool any_header_sg; 440 441 /* Packet virtio header size */ 442 u8 hdr_len; 443 444 /* Work struct for delayed refilling if we run low on memory. */ 445 struct delayed_work refill; 446 447 /* UDP tunnel support */ 448 bool tx_tnl; 449 450 bool rx_tnl; 451 452 bool rx_tnl_csum; 453 454 /* Is delayed refill enabled? */ 455 bool refill_enabled; 456 457 /* The lock to synchronize the access to refill_enabled */ 458 spinlock_t refill_lock; 459 460 /* Work struct for config space updates */ 461 struct work_struct config_work; 462 463 /* Work struct for setting rx mode */ 464 struct work_struct rx_mode_work; 465 466 /* OK to queue work setting RX mode? */ 467 bool rx_mode_work_enabled; 468 469 /* Does the affinity hint is set for virtqueues? */ 470 bool affinity_hint_set; 471 472 /* CPU hotplug instances for online & dead */ 473 struct hlist_node node; 474 struct hlist_node node_dead; 475 476 struct control_buf *ctrl; 477 478 /* Ethtool settings */ 479 u8 duplex; 480 u32 speed; 481 482 /* Is rx dynamic interrupt moderation enabled? */ 483 bool rx_dim_enabled; 484 485 /* Interrupt coalescing settings */ 486 struct virtnet_interrupt_coalesce intr_coal_tx; 487 struct virtnet_interrupt_coalesce intr_coal_rx; 488 489 unsigned long guest_offloads; 490 unsigned long guest_offloads_capable; 491 492 /* failover when STANDBY feature enabled */ 493 struct failover *failover; 494 495 u64 device_stats_cap; 496 }; 497 498 struct padded_vnet_hdr { 499 struct virtio_net_hdr_v1_hash hdr; 500 /* 501 * hdr is in a separate sg buffer, and data sg buffer shares same page 502 * with this header sg. This padding makes next sg 16 byte aligned 503 * after the header. 504 */ 505 char padding[12]; 506 }; 507 508 struct virtio_net_common_hdr { 509 union { 510 struct virtio_net_hdr hdr; 511 struct virtio_net_hdr_mrg_rxbuf mrg_hdr; 512 struct virtio_net_hdr_v1_hash hash_v1_hdr; 513 struct virtio_net_hdr_v1_hash_tunnel tnl_hdr; 514 }; 515 }; 516 517 static struct virtio_net_common_hdr xsk_hdr; 518 519 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); 520 static void virtnet_sq_free_unused_buf_done(struct virtqueue *vq); 521 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 522 struct net_device *dev, 523 unsigned int *xdp_xmit, 524 struct virtnet_rq_stats *stats); 525 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 526 struct sk_buff *skb, u8 flags); 527 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 528 struct sk_buff *curr_skb, 529 struct page *page, void *buf, 530 int len, int truesize); 531 static void virtnet_xsk_completed(struct send_queue *sq, int num); 532 533 enum virtnet_xmit_type { 534 VIRTNET_XMIT_TYPE_SKB, 535 VIRTNET_XMIT_TYPE_SKB_ORPHAN, 536 VIRTNET_XMIT_TYPE_XDP, 537 VIRTNET_XMIT_TYPE_XSK, 538 }; 539 540 static size_t virtnet_rss_hdr_size(const struct virtnet_info *vi) 541 { 542 u16 indir_table_size = vi->has_rss ? vi->rss_indir_table_size : 1; 543 544 return struct_size(vi->rss_hdr, indirection_table, indir_table_size); 545 } 546 547 static size_t virtnet_rss_trailer_size(const struct virtnet_info *vi) 548 { 549 return struct_size(&vi->rss_trailer, hash_key_data, vi->rss_key_size); 550 } 551 552 /* We use the last two bits of the pointer to distinguish the xmit type. */ 553 #define VIRTNET_XMIT_TYPE_MASK (BIT(0) | BIT(1)) 554 555 #define VIRTIO_XSK_FLAG_OFFSET 2 556 557 static enum virtnet_xmit_type virtnet_xmit_ptr_unpack(void **ptr) 558 { 559 unsigned long p = (unsigned long)*ptr; 560 561 *ptr = (void *)(p & ~VIRTNET_XMIT_TYPE_MASK); 562 563 return p & VIRTNET_XMIT_TYPE_MASK; 564 } 565 566 static void *virtnet_xmit_ptr_pack(void *ptr, enum virtnet_xmit_type type) 567 { 568 return (void *)((unsigned long)ptr | type); 569 } 570 571 static int virtnet_add_outbuf(struct send_queue *sq, int num, void *data, 572 enum virtnet_xmit_type type) 573 { 574 return virtqueue_add_outbuf(sq->vq, sq->sg, num, 575 virtnet_xmit_ptr_pack(data, type), 576 GFP_ATOMIC); 577 } 578 579 static u32 virtnet_ptr_to_xsk_buff_len(void *ptr) 580 { 581 return ((unsigned long)ptr) >> VIRTIO_XSK_FLAG_OFFSET; 582 } 583 584 static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len) 585 { 586 sg_dma_address(sg) = addr; 587 sg_dma_len(sg) = len; 588 } 589 590 static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 591 bool in_napi, struct virtnet_sq_free_stats *stats) 592 { 593 struct xdp_frame *frame; 594 struct sk_buff *skb; 595 unsigned int len; 596 void *ptr; 597 598 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 599 switch (virtnet_xmit_ptr_unpack(&ptr)) { 600 case VIRTNET_XMIT_TYPE_SKB: 601 skb = ptr; 602 603 pr_debug("Sent skb %p\n", skb); 604 stats->napi_packets++; 605 stats->napi_bytes += skb->len; 606 napi_consume_skb(skb, in_napi); 607 break; 608 609 case VIRTNET_XMIT_TYPE_SKB_ORPHAN: 610 skb = ptr; 611 612 stats->packets++; 613 stats->bytes += skb->len; 614 napi_consume_skb(skb, in_napi); 615 break; 616 617 case VIRTNET_XMIT_TYPE_XDP: 618 frame = ptr; 619 620 stats->packets++; 621 stats->bytes += xdp_get_frame_len(frame); 622 xdp_return_frame(frame); 623 break; 624 625 case VIRTNET_XMIT_TYPE_XSK: 626 stats->bytes += virtnet_ptr_to_xsk_buff_len(ptr); 627 stats->xsk++; 628 break; 629 } 630 } 631 netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes); 632 } 633 634 static void virtnet_free_old_xmit(struct send_queue *sq, 635 struct netdev_queue *txq, 636 bool in_napi, 637 struct virtnet_sq_free_stats *stats) 638 { 639 __free_old_xmit(sq, txq, in_napi, stats); 640 641 if (stats->xsk) 642 virtnet_xsk_completed(sq, stats->xsk); 643 } 644 645 /* Converting between virtqueue no. and kernel tx/rx queue no. 646 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq 647 */ 648 static int vq2txq(struct virtqueue *vq) 649 { 650 return (vq->index - 1) / 2; 651 } 652 653 static int txq2vq(int txq) 654 { 655 return txq * 2 + 1; 656 } 657 658 static int vq2rxq(struct virtqueue *vq) 659 { 660 return vq->index / 2; 661 } 662 663 static int rxq2vq(int rxq) 664 { 665 return rxq * 2; 666 } 667 668 static int vq_type(struct virtnet_info *vi, int qid) 669 { 670 if (qid == vi->max_queue_pairs * 2) 671 return VIRTNET_Q_TYPE_CQ; 672 673 if (qid % 2) 674 return VIRTNET_Q_TYPE_TX; 675 676 return VIRTNET_Q_TYPE_RX; 677 } 678 679 static inline struct virtio_net_common_hdr * 680 skb_vnet_common_hdr(struct sk_buff *skb) 681 { 682 return (struct virtio_net_common_hdr *)skb->cb; 683 } 684 685 /* 686 * private is used to chain pages for big packets, put the whole 687 * most recent used list in the beginning for reuse 688 */ 689 static void give_pages(struct receive_queue *rq, struct page *page) 690 { 691 struct page *end; 692 693 /* Find end of list, sew whole thing into vi->rq.pages. */ 694 for (end = page; end->private; end = (struct page *)end->private); 695 end->private = (unsigned long)rq->pages; 696 rq->pages = page; 697 } 698 699 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) 700 { 701 struct page *p = rq->pages; 702 703 if (p) { 704 rq->pages = (struct page *)p->private; 705 /* clear private here, it is used to chain pages */ 706 p->private = 0; 707 } else 708 p = alloc_page(gfp_mask); 709 return p; 710 } 711 712 static void virtnet_rq_free_buf(struct virtnet_info *vi, 713 struct receive_queue *rq, void *buf) 714 { 715 if (vi->mergeable_rx_bufs) 716 put_page(virt_to_head_page(buf)); 717 else if (vi->big_packets) 718 give_pages(rq, buf); 719 else 720 put_page(virt_to_head_page(buf)); 721 } 722 723 static void enable_delayed_refill(struct virtnet_info *vi) 724 { 725 spin_lock_bh(&vi->refill_lock); 726 vi->refill_enabled = true; 727 spin_unlock_bh(&vi->refill_lock); 728 } 729 730 static void disable_delayed_refill(struct virtnet_info *vi) 731 { 732 spin_lock_bh(&vi->refill_lock); 733 vi->refill_enabled = false; 734 spin_unlock_bh(&vi->refill_lock); 735 } 736 737 static void enable_rx_mode_work(struct virtnet_info *vi) 738 { 739 rtnl_lock(); 740 vi->rx_mode_work_enabled = true; 741 rtnl_unlock(); 742 } 743 744 static void disable_rx_mode_work(struct virtnet_info *vi) 745 { 746 rtnl_lock(); 747 vi->rx_mode_work_enabled = false; 748 rtnl_unlock(); 749 } 750 751 static void virtqueue_napi_schedule(struct napi_struct *napi, 752 struct virtqueue *vq) 753 { 754 if (napi_schedule_prep(napi)) { 755 virtqueue_disable_cb(vq); 756 __napi_schedule(napi); 757 } 758 } 759 760 static bool virtqueue_napi_complete(struct napi_struct *napi, 761 struct virtqueue *vq, int processed) 762 { 763 int opaque; 764 765 opaque = virtqueue_enable_cb_prepare(vq); 766 if (napi_complete_done(napi, processed)) { 767 if (unlikely(virtqueue_poll(vq, opaque))) 768 virtqueue_napi_schedule(napi, vq); 769 else 770 return true; 771 } else { 772 virtqueue_disable_cb(vq); 773 } 774 775 return false; 776 } 777 778 static void skb_xmit_done(struct virtqueue *vq) 779 { 780 struct virtnet_info *vi = vq->vdev->priv; 781 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi; 782 783 /* Suppress further interrupts. */ 784 virtqueue_disable_cb(vq); 785 786 if (napi->weight) 787 virtqueue_napi_schedule(napi, vq); 788 else 789 /* We were probably waiting for more output buffers. */ 790 netif_wake_subqueue(vi->dev, vq2txq(vq)); 791 } 792 793 #define MRG_CTX_HEADER_SHIFT 22 794 static void *mergeable_len_to_ctx(unsigned int truesize, 795 unsigned int headroom) 796 { 797 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize); 798 } 799 800 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx) 801 { 802 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT; 803 } 804 805 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) 806 { 807 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); 808 } 809 810 static int check_mergeable_len(struct net_device *dev, void *mrg_ctx, 811 unsigned int len) 812 { 813 unsigned int headroom, tailroom, room, truesize; 814 815 truesize = mergeable_ctx_to_truesize(mrg_ctx); 816 headroom = mergeable_ctx_to_headroom(mrg_ctx); 817 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 818 room = SKB_DATA_ALIGN(headroom + tailroom); 819 820 if (len > truesize - room) { 821 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 822 dev->name, len, (unsigned long)(truesize - room)); 823 DEV_STATS_INC(dev, rx_length_errors); 824 return -1; 825 } 826 827 return 0; 828 } 829 830 static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, 831 unsigned int headroom, 832 unsigned int len) 833 { 834 struct sk_buff *skb; 835 836 skb = build_skb(buf, buflen); 837 if (unlikely(!skb)) 838 return NULL; 839 840 skb_reserve(skb, headroom); 841 skb_put(skb, len); 842 843 return skb; 844 } 845 846 /* Called from bottom half context */ 847 static struct sk_buff *page_to_skb(struct virtnet_info *vi, 848 struct receive_queue *rq, 849 struct page *page, unsigned int offset, 850 unsigned int len, unsigned int truesize, 851 unsigned int headroom) 852 { 853 struct sk_buff *skb; 854 struct virtio_net_common_hdr *hdr; 855 unsigned int copy, hdr_len, hdr_padded_len; 856 struct page *page_to_free = NULL; 857 int tailroom, shinfo_size; 858 char *p, *hdr_p, *buf; 859 860 p = page_address(page) + offset; 861 hdr_p = p; 862 863 hdr_len = vi->hdr_len; 864 if (vi->mergeable_rx_bufs) 865 hdr_padded_len = hdr_len; 866 else 867 hdr_padded_len = sizeof(struct padded_vnet_hdr); 868 869 buf = p - headroom; 870 len -= hdr_len; 871 offset += hdr_padded_len; 872 p += hdr_padded_len; 873 tailroom = truesize - headroom - hdr_padded_len - len; 874 875 shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 876 877 if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) { 878 skb = virtnet_build_skb(buf, truesize, p - buf, len); 879 if (unlikely(!skb)) 880 return NULL; 881 882 page = (struct page *)page->private; 883 if (page) 884 give_pages(rq, page); 885 goto ok; 886 } 887 888 /* copy small packet so we can reuse these pages for small data */ 889 skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); 890 if (unlikely(!skb)) 891 return NULL; 892 893 /* Copy all frame if it fits skb->head, otherwise 894 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. 895 */ 896 if (len <= skb_tailroom(skb)) 897 copy = len; 898 else 899 copy = ETH_HLEN; 900 skb_put_data(skb, p, copy); 901 902 len -= copy; 903 offset += copy; 904 905 if (vi->mergeable_rx_bufs) { 906 if (len) 907 skb_add_rx_frag(skb, 0, page, offset, len, truesize); 908 else 909 page_to_free = page; 910 goto ok; 911 } 912 913 BUG_ON(offset >= PAGE_SIZE); 914 while (len) { 915 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); 916 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, 917 frag_size, truesize); 918 len -= frag_size; 919 page = (struct page *)page->private; 920 offset = 0; 921 } 922 923 if (page) 924 give_pages(rq, page); 925 926 ok: 927 hdr = skb_vnet_common_hdr(skb); 928 memcpy(hdr, hdr_p, hdr_len); 929 if (page_to_free) 930 put_page(page_to_free); 931 932 return skb; 933 } 934 935 static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) 936 { 937 struct virtnet_info *vi = rq->vq->vdev->priv; 938 struct page *page = virt_to_head_page(buf); 939 struct virtnet_rq_dma *dma; 940 void *head; 941 int offset; 942 943 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 944 945 head = page_address(page); 946 947 dma = head; 948 949 --dma->ref; 950 951 if (dma->need_sync && len) { 952 offset = buf - (head + sizeof(*dma)); 953 954 virtqueue_map_sync_single_range_for_cpu(rq->vq, dma->addr, 955 offset, len, 956 DMA_FROM_DEVICE); 957 } 958 959 if (dma->ref) 960 return; 961 962 virtqueue_unmap_single_attrs(rq->vq, dma->addr, dma->len, 963 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 964 put_page(page); 965 } 966 967 static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) 968 { 969 struct virtnet_info *vi = rq->vq->vdev->priv; 970 void *buf; 971 972 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 973 974 buf = virtqueue_get_buf_ctx(rq->vq, len, ctx); 975 if (buf) 976 virtnet_rq_unmap(rq, buf, *len); 977 978 return buf; 979 } 980 981 static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) 982 { 983 struct virtnet_info *vi = rq->vq->vdev->priv; 984 struct virtnet_rq_dma *dma; 985 dma_addr_t addr; 986 u32 offset; 987 void *head; 988 989 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 990 991 head = page_address(rq->alloc_frag.page); 992 993 offset = buf - head; 994 995 dma = head; 996 997 addr = dma->addr - sizeof(*dma) + offset; 998 999 sg_init_table(rq->sg, 1); 1000 sg_fill_dma(rq->sg, addr, len); 1001 } 1002 1003 static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) 1004 { 1005 struct page_frag *alloc_frag = &rq->alloc_frag; 1006 struct virtnet_info *vi = rq->vq->vdev->priv; 1007 struct virtnet_rq_dma *dma; 1008 void *buf, *head; 1009 dma_addr_t addr; 1010 1011 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 1012 1013 head = page_address(alloc_frag->page); 1014 1015 dma = head; 1016 1017 /* new pages */ 1018 if (!alloc_frag->offset) { 1019 if (rq->last_dma) { 1020 /* Now, the new page is allocated, the last dma 1021 * will not be used. So the dma can be unmapped 1022 * if the ref is 0. 1023 */ 1024 virtnet_rq_unmap(rq, rq->last_dma, 0); 1025 rq->last_dma = NULL; 1026 } 1027 1028 dma->len = alloc_frag->size - sizeof(*dma); 1029 1030 addr = virtqueue_map_single_attrs(rq->vq, dma + 1, 1031 dma->len, DMA_FROM_DEVICE, 0); 1032 if (virtqueue_map_mapping_error(rq->vq, addr)) 1033 return NULL; 1034 1035 dma->addr = addr; 1036 dma->need_sync = virtqueue_map_need_sync(rq->vq, addr); 1037 1038 /* Add a reference to dma to prevent the entire dma from 1039 * being released during error handling. This reference 1040 * will be freed after the pages are no longer used. 1041 */ 1042 get_page(alloc_frag->page); 1043 dma->ref = 1; 1044 alloc_frag->offset = sizeof(*dma); 1045 1046 rq->last_dma = dma; 1047 } 1048 1049 ++dma->ref; 1050 1051 buf = head + alloc_frag->offset; 1052 1053 get_page(alloc_frag->page); 1054 alloc_frag->offset += size; 1055 1056 return buf; 1057 } 1058 1059 static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) 1060 { 1061 struct virtnet_info *vi = vq->vdev->priv; 1062 struct receive_queue *rq; 1063 int i = vq2rxq(vq); 1064 1065 rq = &vi->rq[i]; 1066 1067 if (rq->xsk_pool) { 1068 xsk_buff_free((struct xdp_buff *)buf); 1069 return; 1070 } 1071 1072 if (!vi->big_packets || vi->mergeable_rx_bufs) 1073 virtnet_rq_unmap(rq, buf, 0); 1074 1075 virtnet_rq_free_buf(vi, rq, buf); 1076 } 1077 1078 static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 1079 bool in_napi) 1080 { 1081 struct virtnet_sq_free_stats stats = {0}; 1082 1083 virtnet_free_old_xmit(sq, txq, in_napi, &stats); 1084 1085 /* Avoid overhead when no packets have been processed 1086 * happens when called speculatively from start_xmit. 1087 */ 1088 if (!stats.packets && !stats.napi_packets) 1089 return; 1090 1091 u64_stats_update_begin(&sq->stats.syncp); 1092 u64_stats_add(&sq->stats.bytes, stats.bytes + stats.napi_bytes); 1093 u64_stats_add(&sq->stats.packets, stats.packets + stats.napi_packets); 1094 u64_stats_update_end(&sq->stats.syncp); 1095 } 1096 1097 static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) 1098 { 1099 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) 1100 return false; 1101 else if (q < vi->curr_queue_pairs) 1102 return true; 1103 else 1104 return false; 1105 } 1106 1107 static bool tx_may_stop(struct virtnet_info *vi, 1108 struct net_device *dev, 1109 struct send_queue *sq) 1110 { 1111 int qnum; 1112 1113 qnum = sq - vi->sq; 1114 1115 /* If running out of space, stop queue to avoid getting packets that we 1116 * are then unable to transmit. 1117 * An alternative would be to force queuing layer to requeue the skb by 1118 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be 1119 * returned in a normal path of operation: it means that driver is not 1120 * maintaining the TX queue stop/start state properly, and causes 1121 * the stack to do a non-trivial amount of useless work. 1122 * Since most packets only take 1 or 2 ring slots, stopping the queue 1123 * early means 16 slots are typically wasted. 1124 */ 1125 if (sq->vq->num_free < MAX_SKB_FRAGS + 2) { 1126 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1127 1128 netif_tx_stop_queue(txq); 1129 u64_stats_update_begin(&sq->stats.syncp); 1130 u64_stats_inc(&sq->stats.stop); 1131 u64_stats_update_end(&sq->stats.syncp); 1132 1133 return true; 1134 } 1135 1136 return false; 1137 } 1138 1139 static void check_sq_full_and_disable(struct virtnet_info *vi, 1140 struct net_device *dev, 1141 struct send_queue *sq) 1142 { 1143 bool use_napi = sq->napi.weight; 1144 int qnum; 1145 1146 qnum = sq - vi->sq; 1147 1148 if (tx_may_stop(vi, dev, sq)) { 1149 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1150 1151 if (use_napi) { 1152 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 1153 virtqueue_napi_schedule(&sq->napi, sq->vq); 1154 } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { 1155 /* More just got used, free them then recheck. */ 1156 free_old_xmit(sq, txq, false); 1157 if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { 1158 netif_start_subqueue(dev, qnum); 1159 u64_stats_update_begin(&sq->stats.syncp); 1160 u64_stats_inc(&sq->stats.wake); 1161 u64_stats_update_end(&sq->stats.syncp); 1162 virtqueue_disable_cb(sq->vq); 1163 } 1164 } 1165 } 1166 } 1167 1168 /* Note that @len is the length of received data without virtio header */ 1169 static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi, 1170 struct receive_queue *rq, void *buf, 1171 u32 len, bool first_buf) 1172 { 1173 struct xdp_buff *xdp; 1174 u32 bufsize; 1175 1176 xdp = (struct xdp_buff *)buf; 1177 1178 /* In virtnet_add_recvbuf_xsk, we use part of XDP_PACKET_HEADROOM for 1179 * virtio header and ask the vhost to fill data from 1180 * hard_start + XDP_PACKET_HEADROOM - vi->hdr_len 1181 * The first buffer has virtio header so the remaining region for frame 1182 * data is 1183 * xsk_pool_get_rx_frame_size() 1184 * While other buffers than the first one do not have virtio header, so 1185 * the maximum frame data's length can be 1186 * xsk_pool_get_rx_frame_size() + vi->hdr_len 1187 */ 1188 bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool); 1189 if (!first_buf) 1190 bufsize += vi->hdr_len; 1191 1192 if (unlikely(len > bufsize)) { 1193 pr_debug("%s: rx error: len %u exceeds truesize %u\n", 1194 vi->dev->name, len, bufsize); 1195 DEV_STATS_INC(vi->dev, rx_length_errors); 1196 xsk_buff_free(xdp); 1197 return NULL; 1198 } 1199 1200 if (first_buf) { 1201 xsk_buff_set_size(xdp, len); 1202 } else { 1203 xdp_prepare_buff(xdp, xdp->data_hard_start, 1204 XDP_PACKET_HEADROOM - vi->hdr_len, len, 1); 1205 xdp->flags = 0; 1206 } 1207 1208 xsk_buff_dma_sync_for_cpu(xdp); 1209 1210 return xdp; 1211 } 1212 1213 static struct sk_buff *xsk_construct_skb(struct receive_queue *rq, 1214 struct xdp_buff *xdp) 1215 { 1216 unsigned int metasize = xdp->data - xdp->data_meta; 1217 struct sk_buff *skb; 1218 unsigned int size; 1219 1220 size = xdp->data_end - xdp->data_hard_start; 1221 skb = napi_alloc_skb(&rq->napi, size); 1222 if (unlikely(!skb)) { 1223 xsk_buff_free(xdp); 1224 return NULL; 1225 } 1226 1227 skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); 1228 1229 size = xdp->data_end - xdp->data_meta; 1230 memcpy(__skb_put(skb, size), xdp->data_meta, size); 1231 1232 if (metasize) { 1233 __skb_pull(skb, metasize); 1234 skb_metadata_set(skb, metasize); 1235 } 1236 1237 xsk_buff_free(xdp); 1238 1239 return skb; 1240 } 1241 1242 static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi, 1243 struct receive_queue *rq, struct xdp_buff *xdp, 1244 unsigned int *xdp_xmit, 1245 struct virtnet_rq_stats *stats) 1246 { 1247 struct bpf_prog *prog; 1248 u32 ret; 1249 1250 ret = XDP_PASS; 1251 rcu_read_lock(); 1252 prog = rcu_dereference(rq->xdp_prog); 1253 if (prog) 1254 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); 1255 rcu_read_unlock(); 1256 1257 switch (ret) { 1258 case XDP_PASS: 1259 return xsk_construct_skb(rq, xdp); 1260 1261 case XDP_TX: 1262 case XDP_REDIRECT: 1263 return NULL; 1264 1265 default: 1266 /* drop packet */ 1267 xsk_buff_free(xdp); 1268 u64_stats_inc(&stats->drops); 1269 return NULL; 1270 } 1271 } 1272 1273 static void xsk_drop_follow_bufs(struct net_device *dev, 1274 struct receive_queue *rq, 1275 u32 num_buf, 1276 struct virtnet_rq_stats *stats) 1277 { 1278 struct xdp_buff *xdp; 1279 u32 len; 1280 1281 while (num_buf-- > 1) { 1282 xdp = virtqueue_get_buf(rq->vq, &len); 1283 if (unlikely(!xdp)) { 1284 pr_debug("%s: rx error: %d buffers missing\n", 1285 dev->name, num_buf); 1286 DEV_STATS_INC(dev, rx_length_errors); 1287 break; 1288 } 1289 u64_stats_add(&stats->bytes, len); 1290 xsk_buff_free(xdp); 1291 } 1292 } 1293 1294 static int xsk_append_merge_buffer(struct virtnet_info *vi, 1295 struct receive_queue *rq, 1296 struct sk_buff *head_skb, 1297 u32 num_buf, 1298 struct virtio_net_hdr_mrg_rxbuf *hdr, 1299 struct virtnet_rq_stats *stats) 1300 { 1301 struct sk_buff *curr_skb; 1302 struct xdp_buff *xdp; 1303 u32 len, truesize; 1304 struct page *page; 1305 void *buf; 1306 1307 curr_skb = head_skb; 1308 1309 while (--num_buf) { 1310 buf = virtqueue_get_buf(rq->vq, &len); 1311 if (unlikely(!buf)) { 1312 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1313 vi->dev->name, num_buf, 1314 virtio16_to_cpu(vi->vdev, 1315 hdr->num_buffers)); 1316 DEV_STATS_INC(vi->dev, rx_length_errors); 1317 return -EINVAL; 1318 } 1319 1320 u64_stats_add(&stats->bytes, len); 1321 1322 xdp = buf_to_xdp(vi, rq, buf, len, false); 1323 if (!xdp) 1324 goto err; 1325 1326 buf = napi_alloc_frag(len); 1327 if (!buf) { 1328 xsk_buff_free(xdp); 1329 goto err; 1330 } 1331 1332 memcpy(buf, xdp->data, len); 1333 1334 xsk_buff_free(xdp); 1335 1336 page = virt_to_page(buf); 1337 1338 truesize = len; 1339 1340 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 1341 buf, len, truesize); 1342 if (!curr_skb) { 1343 put_page(page); 1344 goto err; 1345 } 1346 } 1347 1348 return 0; 1349 1350 err: 1351 xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats); 1352 return -EINVAL; 1353 } 1354 1355 static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi, 1356 struct receive_queue *rq, struct xdp_buff *xdp, 1357 unsigned int *xdp_xmit, 1358 struct virtnet_rq_stats *stats) 1359 { 1360 struct virtio_net_hdr_mrg_rxbuf *hdr; 1361 struct bpf_prog *prog; 1362 struct sk_buff *skb; 1363 u32 ret, num_buf; 1364 1365 hdr = xdp->data - vi->hdr_len; 1366 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 1367 1368 ret = XDP_PASS; 1369 rcu_read_lock(); 1370 prog = rcu_dereference(rq->xdp_prog); 1371 if (prog) { 1372 /* TODO: support multi buffer. */ 1373 if (num_buf == 1) 1374 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, 1375 stats); 1376 else 1377 ret = XDP_ABORTED; 1378 } 1379 rcu_read_unlock(); 1380 1381 switch (ret) { 1382 case XDP_PASS: 1383 skb = xsk_construct_skb(rq, xdp); 1384 if (!skb) 1385 goto drop_bufs; 1386 1387 if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) { 1388 dev_kfree_skb(skb); 1389 goto drop; 1390 } 1391 1392 return skb; 1393 1394 case XDP_TX: 1395 case XDP_REDIRECT: 1396 return NULL; 1397 1398 default: 1399 /* drop packet */ 1400 xsk_buff_free(xdp); 1401 } 1402 1403 drop_bufs: 1404 xsk_drop_follow_bufs(dev, rq, num_buf, stats); 1405 1406 drop: 1407 u64_stats_inc(&stats->drops); 1408 return NULL; 1409 } 1410 1411 static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq, 1412 void *buf, u32 len, 1413 unsigned int *xdp_xmit, 1414 struct virtnet_rq_stats *stats) 1415 { 1416 struct net_device *dev = vi->dev; 1417 struct sk_buff *skb = NULL; 1418 struct xdp_buff *xdp; 1419 u8 flags; 1420 1421 len -= vi->hdr_len; 1422 1423 u64_stats_add(&stats->bytes, len); 1424 1425 xdp = buf_to_xdp(vi, rq, buf, len, true); 1426 if (!xdp) 1427 return; 1428 1429 if (unlikely(len < ETH_HLEN)) { 1430 pr_debug("%s: short packet %i\n", dev->name, len); 1431 DEV_STATS_INC(dev, rx_length_errors); 1432 xsk_buff_free(xdp); 1433 return; 1434 } 1435 1436 flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags; 1437 1438 if (!vi->mergeable_rx_bufs) 1439 skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats); 1440 else 1441 skb = virtnet_receive_xsk_merge(dev, vi, rq, xdp, xdp_xmit, stats); 1442 1443 if (skb) 1444 virtnet_receive_done(vi, rq, skb, flags); 1445 } 1446 1447 static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue *rq, 1448 struct xsk_buff_pool *pool, gfp_t gfp) 1449 { 1450 struct xdp_buff **xsk_buffs; 1451 dma_addr_t addr; 1452 int err = 0; 1453 u32 len, i; 1454 int num; 1455 1456 xsk_buffs = rq->xsk_buffs; 1457 1458 num = xsk_buff_alloc_batch(pool, xsk_buffs, rq->vq->num_free); 1459 if (!num) 1460 return -ENOMEM; 1461 1462 len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len; 1463 1464 for (i = 0; i < num; ++i) { 1465 /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space. 1466 * We assume XDP_PACKET_HEADROOM is larger than hdr->len. 1467 * (see function virtnet_xsk_pool_enable) 1468 */ 1469 addr = xsk_buff_xdp_get_dma(xsk_buffs[i]) - vi->hdr_len; 1470 1471 sg_init_table(rq->sg, 1); 1472 sg_fill_dma(rq->sg, addr, len); 1473 1474 err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, 1475 xsk_buffs[i], NULL, gfp); 1476 if (err) 1477 goto err; 1478 } 1479 1480 return num; 1481 1482 err: 1483 for (; i < num; ++i) 1484 xsk_buff_free(xsk_buffs[i]); 1485 1486 return err; 1487 } 1488 1489 static void *virtnet_xsk_to_ptr(u32 len) 1490 { 1491 unsigned long p; 1492 1493 p = len << VIRTIO_XSK_FLAG_OFFSET; 1494 1495 return virtnet_xmit_ptr_pack((void *)p, VIRTNET_XMIT_TYPE_XSK); 1496 } 1497 1498 static int virtnet_xsk_xmit_one(struct send_queue *sq, 1499 struct xsk_buff_pool *pool, 1500 struct xdp_desc *desc) 1501 { 1502 struct virtnet_info *vi; 1503 dma_addr_t addr; 1504 1505 vi = sq->vq->vdev->priv; 1506 1507 addr = xsk_buff_raw_get_dma(pool, desc->addr); 1508 xsk_buff_raw_dma_sync_for_device(pool, addr, desc->len); 1509 1510 sg_init_table(sq->sg, 2); 1511 sg_fill_dma(sq->sg, sq->xsk_hdr_dma_addr, vi->hdr_len); 1512 sg_fill_dma(sq->sg + 1, addr, desc->len); 1513 1514 return virtqueue_add_outbuf_premapped(sq->vq, sq->sg, 2, 1515 virtnet_xsk_to_ptr(desc->len), 1516 GFP_ATOMIC); 1517 } 1518 1519 static int virtnet_xsk_xmit_batch(struct send_queue *sq, 1520 struct xsk_buff_pool *pool, 1521 unsigned int budget, 1522 u64 *kicks) 1523 { 1524 struct xdp_desc *descs = pool->tx_descs; 1525 bool kick = false; 1526 u32 nb_pkts, i; 1527 int err; 1528 1529 budget = min_t(u32, budget, sq->vq->num_free); 1530 1531 nb_pkts = xsk_tx_peek_release_desc_batch(pool, budget); 1532 if (!nb_pkts) 1533 return 0; 1534 1535 for (i = 0; i < nb_pkts; i++) { 1536 err = virtnet_xsk_xmit_one(sq, pool, &descs[i]); 1537 if (unlikely(err)) { 1538 xsk_tx_completed(sq->xsk_pool, nb_pkts - i); 1539 break; 1540 } 1541 1542 kick = true; 1543 } 1544 1545 if (kick && virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 1546 (*kicks)++; 1547 1548 return i; 1549 } 1550 1551 static bool virtnet_xsk_xmit(struct send_queue *sq, struct xsk_buff_pool *pool, 1552 int budget) 1553 { 1554 struct virtnet_info *vi = sq->vq->vdev->priv; 1555 struct virtnet_sq_free_stats stats = {}; 1556 struct net_device *dev = vi->dev; 1557 u64 kicks = 0; 1558 int sent; 1559 1560 /* Avoid to wakeup napi meanless, so call __free_old_xmit instead of 1561 * free_old_xmit(). 1562 */ 1563 __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), true, &stats); 1564 1565 if (stats.xsk) 1566 xsk_tx_completed(sq->xsk_pool, stats.xsk); 1567 1568 sent = virtnet_xsk_xmit_batch(sq, pool, budget, &kicks); 1569 1570 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 1571 check_sq_full_and_disable(vi, vi->dev, sq); 1572 1573 if (sent) { 1574 struct netdev_queue *txq; 1575 1576 txq = netdev_get_tx_queue(vi->dev, sq - vi->sq); 1577 txq_trans_cond_update(txq); 1578 } 1579 1580 u64_stats_update_begin(&sq->stats.syncp); 1581 u64_stats_add(&sq->stats.packets, stats.packets); 1582 u64_stats_add(&sq->stats.bytes, stats.bytes); 1583 u64_stats_add(&sq->stats.kicks, kicks); 1584 u64_stats_add(&sq->stats.xdp_tx, sent); 1585 u64_stats_update_end(&sq->stats.syncp); 1586 1587 if (xsk_uses_need_wakeup(pool)) 1588 xsk_set_tx_need_wakeup(pool); 1589 1590 return sent; 1591 } 1592 1593 static void xsk_wakeup(struct send_queue *sq) 1594 { 1595 if (napi_if_scheduled_mark_missed(&sq->napi)) 1596 return; 1597 1598 local_bh_disable(); 1599 virtqueue_napi_schedule(&sq->napi, sq->vq); 1600 local_bh_enable(); 1601 } 1602 1603 static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag) 1604 { 1605 struct virtnet_info *vi = netdev_priv(dev); 1606 struct send_queue *sq; 1607 1608 if (!netif_running(dev)) 1609 return -ENETDOWN; 1610 1611 if (qid >= vi->curr_queue_pairs) 1612 return -EINVAL; 1613 1614 sq = &vi->sq[qid]; 1615 1616 xsk_wakeup(sq); 1617 return 0; 1618 } 1619 1620 static void virtnet_xsk_completed(struct send_queue *sq, int num) 1621 { 1622 xsk_tx_completed(sq->xsk_pool, num); 1623 1624 /* If this is called by rx poll, start_xmit and xdp xmit we should 1625 * wakeup the tx napi to consume the xsk tx queue, because the tx 1626 * interrupt may not be triggered. 1627 */ 1628 xsk_wakeup(sq); 1629 } 1630 1631 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, 1632 struct send_queue *sq, 1633 struct xdp_frame *xdpf) 1634 { 1635 struct virtio_net_hdr_mrg_rxbuf *hdr; 1636 struct skb_shared_info *shinfo; 1637 u8 nr_frags = 0; 1638 int err, i; 1639 1640 if (unlikely(xdpf->headroom < vi->hdr_len)) 1641 return -EOVERFLOW; 1642 1643 if (unlikely(xdp_frame_has_frags(xdpf))) { 1644 shinfo = xdp_get_shared_info_from_frame(xdpf); 1645 nr_frags = shinfo->nr_frags; 1646 } 1647 1648 /* In wrapping function virtnet_xdp_xmit(), we need to free 1649 * up the pending old buffers, where we need to calculate the 1650 * position of skb_shared_info in xdp_get_frame_len() and 1651 * xdp_return_frame(), which will involve to xdpf->data and 1652 * xdpf->headroom. Therefore, we need to update the value of 1653 * headroom synchronously here. 1654 */ 1655 xdpf->headroom -= vi->hdr_len; 1656 xdpf->data -= vi->hdr_len; 1657 /* Zero header and leave csum up to XDP layers */ 1658 hdr = xdpf->data; 1659 memset(hdr, 0, vi->hdr_len); 1660 xdpf->len += vi->hdr_len; 1661 1662 sg_init_table(sq->sg, nr_frags + 1); 1663 sg_set_buf(sq->sg, xdpf->data, xdpf->len); 1664 for (i = 0; i < nr_frags; i++) { 1665 skb_frag_t *frag = &shinfo->frags[i]; 1666 1667 sg_set_page(&sq->sg[i + 1], skb_frag_page(frag), 1668 skb_frag_size(frag), skb_frag_off(frag)); 1669 } 1670 1671 err = virtnet_add_outbuf(sq, nr_frags + 1, xdpf, VIRTNET_XMIT_TYPE_XDP); 1672 if (unlikely(err)) 1673 return -ENOSPC; /* Caller handle free/refcnt */ 1674 1675 return 0; 1676 } 1677 1678 /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on 1679 * the current cpu, so it does not need to be locked. 1680 * 1681 * Here we use marco instead of inline functions because we have to deal with 1682 * three issues at the same time: 1. the choice of sq. 2. judge and execute the 1683 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline 1684 * functions to perfectly solve these three problems at the same time. 1685 */ 1686 #define virtnet_xdp_get_sq(vi) ({ \ 1687 int cpu = smp_processor_id(); \ 1688 struct netdev_queue *txq; \ 1689 typeof(vi) v = (vi); \ 1690 unsigned int qp; \ 1691 \ 1692 if (v->curr_queue_pairs > nr_cpu_ids) { \ 1693 qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ 1694 qp += cpu; \ 1695 txq = netdev_get_tx_queue(v->dev, qp); \ 1696 __netif_tx_acquire(txq); \ 1697 } else { \ 1698 qp = cpu % v->curr_queue_pairs; \ 1699 txq = netdev_get_tx_queue(v->dev, qp); \ 1700 __netif_tx_lock(txq, cpu); \ 1701 } \ 1702 v->sq + qp; \ 1703 }) 1704 1705 #define virtnet_xdp_put_sq(vi, q) { \ 1706 struct netdev_queue *txq; \ 1707 typeof(vi) v = (vi); \ 1708 \ 1709 txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ 1710 if (v->curr_queue_pairs > nr_cpu_ids) \ 1711 __netif_tx_release(txq); \ 1712 else \ 1713 __netif_tx_unlock(txq); \ 1714 } 1715 1716 static int virtnet_xdp_xmit(struct net_device *dev, 1717 int n, struct xdp_frame **frames, u32 flags) 1718 { 1719 struct virtnet_info *vi = netdev_priv(dev); 1720 struct virtnet_sq_free_stats stats = {0}; 1721 struct receive_queue *rq = vi->rq; 1722 struct bpf_prog *xdp_prog; 1723 struct send_queue *sq; 1724 int nxmit = 0; 1725 int kicks = 0; 1726 int ret; 1727 int i; 1728 1729 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this 1730 * indicate XDP resources have been successfully allocated. 1731 */ 1732 xdp_prog = rcu_access_pointer(rq->xdp_prog); 1733 if (!xdp_prog) 1734 return -ENXIO; 1735 1736 sq = virtnet_xdp_get_sq(vi); 1737 1738 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { 1739 ret = -EINVAL; 1740 goto out; 1741 } 1742 1743 /* Free up any pending old buffers before queueing new ones. */ 1744 virtnet_free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), 1745 false, &stats); 1746 1747 for (i = 0; i < n; i++) { 1748 struct xdp_frame *xdpf = frames[i]; 1749 1750 if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) 1751 break; 1752 nxmit++; 1753 } 1754 ret = nxmit; 1755 1756 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 1757 check_sq_full_and_disable(vi, dev, sq); 1758 1759 if (flags & XDP_XMIT_FLUSH) { 1760 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 1761 kicks = 1; 1762 } 1763 out: 1764 u64_stats_update_begin(&sq->stats.syncp); 1765 u64_stats_add(&sq->stats.bytes, stats.bytes); 1766 u64_stats_add(&sq->stats.packets, stats.packets); 1767 u64_stats_add(&sq->stats.xdp_tx, n); 1768 u64_stats_add(&sq->stats.xdp_tx_drops, n - nxmit); 1769 u64_stats_add(&sq->stats.kicks, kicks); 1770 u64_stats_update_end(&sq->stats.syncp); 1771 1772 virtnet_xdp_put_sq(vi, sq); 1773 return ret; 1774 } 1775 1776 static void put_xdp_frags(struct xdp_buff *xdp) 1777 { 1778 struct skb_shared_info *shinfo; 1779 struct page *xdp_page; 1780 int i; 1781 1782 if (xdp_buff_has_frags(xdp)) { 1783 shinfo = xdp_get_shared_info_from_buff(xdp); 1784 for (i = 0; i < shinfo->nr_frags; i++) { 1785 xdp_page = skb_frag_page(&shinfo->frags[i]); 1786 put_page(xdp_page); 1787 } 1788 } 1789 } 1790 1791 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 1792 struct net_device *dev, 1793 unsigned int *xdp_xmit, 1794 struct virtnet_rq_stats *stats) 1795 { 1796 struct xdp_frame *xdpf; 1797 int err; 1798 u32 act; 1799 1800 act = bpf_prog_run_xdp(xdp_prog, xdp); 1801 u64_stats_inc(&stats->xdp_packets); 1802 1803 switch (act) { 1804 case XDP_PASS: 1805 return act; 1806 1807 case XDP_TX: 1808 u64_stats_inc(&stats->xdp_tx); 1809 xdpf = xdp_convert_buff_to_frame(xdp); 1810 if (unlikely(!xdpf)) { 1811 netdev_dbg(dev, "convert buff to frame failed for xdp\n"); 1812 return XDP_DROP; 1813 } 1814 1815 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); 1816 if (unlikely(!err)) { 1817 xdp_return_frame_rx_napi(xdpf); 1818 } else if (unlikely(err < 0)) { 1819 trace_xdp_exception(dev, xdp_prog, act); 1820 return XDP_DROP; 1821 } 1822 *xdp_xmit |= VIRTIO_XDP_TX; 1823 return act; 1824 1825 case XDP_REDIRECT: 1826 u64_stats_inc(&stats->xdp_redirects); 1827 err = xdp_do_redirect(dev, xdp, xdp_prog); 1828 if (err) 1829 return XDP_DROP; 1830 1831 *xdp_xmit |= VIRTIO_XDP_REDIR; 1832 return act; 1833 1834 default: 1835 bpf_warn_invalid_xdp_action(dev, xdp_prog, act); 1836 fallthrough; 1837 case XDP_ABORTED: 1838 trace_xdp_exception(dev, xdp_prog, act); 1839 fallthrough; 1840 case XDP_DROP: 1841 return XDP_DROP; 1842 } 1843 } 1844 1845 static unsigned int virtnet_get_headroom(struct virtnet_info *vi) 1846 { 1847 return vi->xdp_enabled ? XDP_PACKET_HEADROOM : 0; 1848 } 1849 1850 /* We copy the packet for XDP in the following cases: 1851 * 1852 * 1) Packet is scattered across multiple rx buffers. 1853 * 2) Headroom space is insufficient. 1854 * 1855 * This is inefficient but it's a temporary condition that 1856 * we hit right after XDP is enabled and until queue is refilled 1857 * with large buffers with sufficient headroom - so it should affect 1858 * at most queue size packets. 1859 * Afterwards, the conditions to enable 1860 * XDP should preclude the underlying device from sending packets 1861 * across multiple buffers (num_buf > 1), and we make sure buffers 1862 * have enough headroom. 1863 */ 1864 static struct page *xdp_linearize_page(struct net_device *dev, 1865 struct receive_queue *rq, 1866 int *num_buf, 1867 struct page *p, 1868 int offset, 1869 int page_off, 1870 unsigned int *len) 1871 { 1872 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1873 struct page *page; 1874 1875 if (page_off + *len + tailroom > PAGE_SIZE) 1876 return NULL; 1877 1878 page = alloc_page(GFP_ATOMIC); 1879 if (!page) 1880 return NULL; 1881 1882 memcpy(page_address(page) + page_off, page_address(p) + offset, *len); 1883 page_off += *len; 1884 1885 /* Only mergeable mode can go inside this while loop. In small mode, 1886 * *num_buf == 1, so it cannot go inside. 1887 */ 1888 while (--*num_buf) { 1889 unsigned int buflen; 1890 void *buf; 1891 void *ctx; 1892 int off; 1893 1894 buf = virtnet_rq_get_buf(rq, &buflen, &ctx); 1895 if (unlikely(!buf)) 1896 goto err_buf; 1897 1898 p = virt_to_head_page(buf); 1899 off = buf - page_address(p); 1900 1901 if (check_mergeable_len(dev, ctx, buflen)) { 1902 put_page(p); 1903 goto err_buf; 1904 } 1905 1906 /* guard against a misconfigured or uncooperative backend that 1907 * is sending packet larger than the MTU. 1908 */ 1909 if ((page_off + buflen + tailroom) > PAGE_SIZE) { 1910 put_page(p); 1911 goto err_buf; 1912 } 1913 1914 memcpy(page_address(page) + page_off, 1915 page_address(p) + off, buflen); 1916 page_off += buflen; 1917 put_page(p); 1918 } 1919 1920 /* Headroom does not contribute to packet length */ 1921 *len = page_off - XDP_PACKET_HEADROOM; 1922 return page; 1923 err_buf: 1924 __free_pages(page, 0); 1925 return NULL; 1926 } 1927 1928 static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi, 1929 unsigned int xdp_headroom, 1930 void *buf, 1931 unsigned int len) 1932 { 1933 unsigned int header_offset; 1934 unsigned int headroom; 1935 unsigned int buflen; 1936 struct sk_buff *skb; 1937 1938 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1939 headroom = vi->hdr_len + header_offset; 1940 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1941 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1942 1943 skb = virtnet_build_skb(buf, buflen, headroom, len); 1944 if (unlikely(!skb)) 1945 return NULL; 1946 1947 buf += header_offset; 1948 memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len); 1949 1950 return skb; 1951 } 1952 1953 static struct sk_buff *receive_small_xdp(struct net_device *dev, 1954 struct virtnet_info *vi, 1955 struct receive_queue *rq, 1956 struct bpf_prog *xdp_prog, 1957 void *buf, 1958 unsigned int xdp_headroom, 1959 unsigned int len, 1960 unsigned int *xdp_xmit, 1961 struct virtnet_rq_stats *stats) 1962 { 1963 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom; 1964 unsigned int headroom = vi->hdr_len + header_offset; 1965 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; 1966 struct page *page = virt_to_head_page(buf); 1967 struct page *xdp_page; 1968 unsigned int buflen; 1969 struct xdp_buff xdp; 1970 struct sk_buff *skb; 1971 unsigned int metasize = 0; 1972 u32 act; 1973 1974 if (unlikely(hdr->hdr.gso_type)) 1975 goto err_xdp; 1976 1977 /* Partially checksummed packets must be dropped. */ 1978 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 1979 goto err_xdp; 1980 1981 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1982 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1983 1984 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { 1985 int offset = buf - page_address(page) + header_offset; 1986 unsigned int tlen = len + vi->hdr_len; 1987 int num_buf = 1; 1988 1989 xdp_headroom = virtnet_get_headroom(vi); 1990 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1991 headroom = vi->hdr_len + header_offset; 1992 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1993 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1994 xdp_page = xdp_linearize_page(dev, rq, &num_buf, page, 1995 offset, header_offset, 1996 &tlen); 1997 if (!xdp_page) 1998 goto err_xdp; 1999 2000 buf = page_address(xdp_page); 2001 put_page(page); 2002 page = xdp_page; 2003 } 2004 2005 xdp_init_buff(&xdp, buflen, &rq->xdp_rxq); 2006 xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len, 2007 xdp_headroom, len, true); 2008 2009 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 2010 2011 switch (act) { 2012 case XDP_PASS: 2013 /* Recalculate length in case bpf program changed it */ 2014 len = xdp.data_end - xdp.data; 2015 metasize = xdp.data - xdp.data_meta; 2016 break; 2017 2018 case XDP_TX: 2019 case XDP_REDIRECT: 2020 goto xdp_xmit; 2021 2022 default: 2023 goto err_xdp; 2024 } 2025 2026 skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len); 2027 if (unlikely(!skb)) 2028 goto err; 2029 2030 if (metasize) 2031 skb_metadata_set(skb, metasize); 2032 2033 return skb; 2034 2035 err_xdp: 2036 u64_stats_inc(&stats->xdp_drops); 2037 err: 2038 u64_stats_inc(&stats->drops); 2039 put_page(page); 2040 xdp_xmit: 2041 return NULL; 2042 } 2043 2044 static struct sk_buff *receive_small(struct net_device *dev, 2045 struct virtnet_info *vi, 2046 struct receive_queue *rq, 2047 void *buf, void *ctx, 2048 unsigned int len, 2049 unsigned int *xdp_xmit, 2050 struct virtnet_rq_stats *stats) 2051 { 2052 unsigned int xdp_headroom = (unsigned long)ctx; 2053 struct page *page = virt_to_head_page(buf); 2054 struct sk_buff *skb; 2055 2056 /* We passed the address of virtnet header to virtio-core, 2057 * so truncate the padding. 2058 */ 2059 buf -= VIRTNET_RX_PAD + xdp_headroom; 2060 2061 len -= vi->hdr_len; 2062 u64_stats_add(&stats->bytes, len); 2063 2064 if (unlikely(len > GOOD_PACKET_LEN)) { 2065 pr_debug("%s: rx error: len %u exceeds max size %d\n", 2066 dev->name, len, GOOD_PACKET_LEN); 2067 DEV_STATS_INC(dev, rx_length_errors); 2068 goto err; 2069 } 2070 2071 if (unlikely(vi->xdp_enabled)) { 2072 struct bpf_prog *xdp_prog; 2073 2074 rcu_read_lock(); 2075 xdp_prog = rcu_dereference(rq->xdp_prog); 2076 if (xdp_prog) { 2077 skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf, 2078 xdp_headroom, len, xdp_xmit, 2079 stats); 2080 rcu_read_unlock(); 2081 return skb; 2082 } 2083 rcu_read_unlock(); 2084 } 2085 2086 skb = receive_small_build_skb(vi, xdp_headroom, buf, len); 2087 if (likely(skb)) 2088 return skb; 2089 2090 err: 2091 u64_stats_inc(&stats->drops); 2092 put_page(page); 2093 return NULL; 2094 } 2095 2096 static struct sk_buff *receive_big(struct net_device *dev, 2097 struct virtnet_info *vi, 2098 struct receive_queue *rq, 2099 void *buf, 2100 unsigned int len, 2101 struct virtnet_rq_stats *stats) 2102 { 2103 struct page *page = buf; 2104 struct sk_buff *skb; 2105 2106 /* Make sure that len does not exceed the size allocated in 2107 * add_recvbuf_big. 2108 */ 2109 if (unlikely(len > (vi->big_packets_num_skbfrags + 1) * PAGE_SIZE)) { 2110 pr_debug("%s: rx error: len %u exceeds allocated size %lu\n", 2111 dev->name, len, 2112 (vi->big_packets_num_skbfrags + 1) * PAGE_SIZE); 2113 goto err; 2114 } 2115 2116 skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); 2117 u64_stats_add(&stats->bytes, len - vi->hdr_len); 2118 if (unlikely(!skb)) 2119 goto err; 2120 2121 return skb; 2122 2123 err: 2124 u64_stats_inc(&stats->drops); 2125 give_pages(rq, page); 2126 return NULL; 2127 } 2128 2129 static void mergeable_buf_free(struct receive_queue *rq, int num_buf, 2130 struct net_device *dev, 2131 struct virtnet_rq_stats *stats) 2132 { 2133 struct page *page; 2134 void *buf; 2135 int len; 2136 2137 while (num_buf-- > 1) { 2138 buf = virtnet_rq_get_buf(rq, &len, NULL); 2139 if (unlikely(!buf)) { 2140 pr_debug("%s: rx error: %d buffers missing\n", 2141 dev->name, num_buf); 2142 DEV_STATS_INC(dev, rx_length_errors); 2143 break; 2144 } 2145 u64_stats_add(&stats->bytes, len); 2146 page = virt_to_head_page(buf); 2147 put_page(page); 2148 } 2149 } 2150 2151 /* Why not use xdp_build_skb_from_frame() ? 2152 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in 2153 * virtio-net there are 2 points that do not match its requirements: 2154 * 1. The size of the prefilled buffer is not fixed before xdp is set. 2155 * 2. xdp_build_skb_from_frame() does more checks that we don't need, 2156 * like eth_type_trans() (which virtio-net does in receive_buf()). 2157 */ 2158 static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, 2159 struct virtnet_info *vi, 2160 struct xdp_buff *xdp, 2161 unsigned int xdp_frags_truesz) 2162 { 2163 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 2164 unsigned int headroom, data_len; 2165 struct sk_buff *skb; 2166 int metasize; 2167 u8 nr_frags; 2168 2169 if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { 2170 pr_debug("Error building skb as missing reserved tailroom for xdp"); 2171 return NULL; 2172 } 2173 2174 if (unlikely(xdp_buff_has_frags(xdp))) 2175 nr_frags = sinfo->nr_frags; 2176 2177 skb = build_skb(xdp->data_hard_start, xdp->frame_sz); 2178 if (unlikely(!skb)) 2179 return NULL; 2180 2181 headroom = xdp->data - xdp->data_hard_start; 2182 data_len = xdp->data_end - xdp->data; 2183 skb_reserve(skb, headroom); 2184 __skb_put(skb, data_len); 2185 2186 metasize = xdp->data - xdp->data_meta; 2187 metasize = metasize > 0 ? metasize : 0; 2188 if (metasize) 2189 skb_metadata_set(skb, metasize); 2190 2191 if (unlikely(xdp_buff_has_frags(xdp))) 2192 xdp_update_skb_frags_info(skb, nr_frags, sinfo->xdp_frags_size, 2193 xdp_frags_truesz, 2194 xdp_buff_get_skb_flags(xdp)); 2195 2196 return skb; 2197 } 2198 2199 /* TODO: build xdp in big mode */ 2200 static int virtnet_build_xdp_buff_mrg(struct net_device *dev, 2201 struct virtnet_info *vi, 2202 struct receive_queue *rq, 2203 struct xdp_buff *xdp, 2204 void *buf, 2205 unsigned int len, 2206 unsigned int frame_sz, 2207 int *num_buf, 2208 unsigned int *xdp_frags_truesize, 2209 struct virtnet_rq_stats *stats) 2210 { 2211 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2212 struct skb_shared_info *shinfo; 2213 unsigned int xdp_frags_truesz = 0; 2214 unsigned int truesize; 2215 struct page *page; 2216 skb_frag_t *frag; 2217 int offset; 2218 void *ctx; 2219 2220 xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 2221 xdp_prepare_buff(xdp, buf - XDP_PACKET_HEADROOM, 2222 XDP_PACKET_HEADROOM + vi->hdr_len, len - vi->hdr_len, true); 2223 2224 if (!*num_buf) 2225 return 0; 2226 2227 if (*num_buf > 1) { 2228 /* If we want to build multi-buffer xdp, we need 2229 * to specify that the flags of xdp_buff have the 2230 * XDP_FLAGS_HAS_FRAG bit. 2231 */ 2232 if (!xdp_buff_has_frags(xdp)) 2233 xdp_buff_set_frags_flag(xdp); 2234 2235 shinfo = xdp_get_shared_info_from_buff(xdp); 2236 shinfo->nr_frags = 0; 2237 shinfo->xdp_frags_size = 0; 2238 } 2239 2240 if (*num_buf > MAX_SKB_FRAGS + 1) 2241 return -EINVAL; 2242 2243 while (--*num_buf > 0) { 2244 buf = virtnet_rq_get_buf(rq, &len, &ctx); 2245 if (unlikely(!buf)) { 2246 pr_debug("%s: rx error: %d buffers out of %d missing\n", 2247 dev->name, *num_buf, 2248 virtio16_to_cpu(vi->vdev, hdr->num_buffers)); 2249 DEV_STATS_INC(dev, rx_length_errors); 2250 goto err; 2251 } 2252 2253 u64_stats_add(&stats->bytes, len); 2254 page = virt_to_head_page(buf); 2255 offset = buf - page_address(page); 2256 2257 if (check_mergeable_len(dev, ctx, len)) { 2258 put_page(page); 2259 goto err; 2260 } 2261 2262 truesize = mergeable_ctx_to_truesize(ctx); 2263 xdp_frags_truesz += truesize; 2264 2265 frag = &shinfo->frags[shinfo->nr_frags++]; 2266 skb_frag_fill_page_desc(frag, page, offset, len); 2267 if (page_is_pfmemalloc(page)) 2268 xdp_buff_set_frag_pfmemalloc(xdp); 2269 2270 shinfo->xdp_frags_size += len; 2271 } 2272 2273 *xdp_frags_truesize = xdp_frags_truesz; 2274 return 0; 2275 2276 err: 2277 put_xdp_frags(xdp); 2278 return -EINVAL; 2279 } 2280 2281 static void *mergeable_xdp_get_buf(struct virtnet_info *vi, 2282 struct receive_queue *rq, 2283 struct bpf_prog *xdp_prog, 2284 void *ctx, 2285 unsigned int *frame_sz, 2286 int *num_buf, 2287 struct page **page, 2288 int offset, 2289 unsigned int *len, 2290 struct virtio_net_hdr_mrg_rxbuf *hdr) 2291 { 2292 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2293 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2294 struct page *xdp_page; 2295 unsigned int xdp_room; 2296 2297 /* Transient failure which in theory could occur if 2298 * in-flight packets from before XDP was enabled reach 2299 * the receive path after XDP is loaded. 2300 */ 2301 if (unlikely(hdr->hdr.gso_type)) 2302 return NULL; 2303 2304 /* Partially checksummed packets must be dropped. */ 2305 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 2306 return NULL; 2307 2308 /* Now XDP core assumes frag size is PAGE_SIZE, but buffers 2309 * with headroom may add hole in truesize, which 2310 * make their length exceed PAGE_SIZE. So we disabled the 2311 * hole mechanism for xdp. See add_recvbuf_mergeable(). 2312 */ 2313 *frame_sz = truesize; 2314 2315 if (likely(headroom >= virtnet_get_headroom(vi) && 2316 (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) { 2317 return page_address(*page) + offset; 2318 } 2319 2320 /* This happens when headroom is not enough because 2321 * of the buffer was prefilled before XDP is set. 2322 * This should only happen for the first several packets. 2323 * In fact, vq reset can be used here to help us clean up 2324 * the prefilled buffers, but many existing devices do not 2325 * support it, and we don't want to bother users who are 2326 * using xdp normally. 2327 */ 2328 if (!xdp_prog->aux->xdp_has_frags) { 2329 /* linearize data for XDP */ 2330 xdp_page = xdp_linearize_page(vi->dev, rq, num_buf, 2331 *page, offset, 2332 XDP_PACKET_HEADROOM, 2333 len); 2334 if (!xdp_page) 2335 return NULL; 2336 } else { 2337 xdp_room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 2338 sizeof(struct skb_shared_info)); 2339 if (*len + xdp_room > PAGE_SIZE) 2340 return NULL; 2341 2342 xdp_page = alloc_page(GFP_ATOMIC); 2343 if (!xdp_page) 2344 return NULL; 2345 2346 memcpy(page_address(xdp_page) + XDP_PACKET_HEADROOM, 2347 page_address(*page) + offset, *len); 2348 } 2349 2350 *frame_sz = PAGE_SIZE; 2351 2352 put_page(*page); 2353 2354 *page = xdp_page; 2355 2356 return page_address(*page) + XDP_PACKET_HEADROOM; 2357 } 2358 2359 static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, 2360 struct virtnet_info *vi, 2361 struct receive_queue *rq, 2362 struct bpf_prog *xdp_prog, 2363 void *buf, 2364 void *ctx, 2365 unsigned int len, 2366 unsigned int *xdp_xmit, 2367 struct virtnet_rq_stats *stats) 2368 { 2369 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2370 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2371 struct page *page = virt_to_head_page(buf); 2372 int offset = buf - page_address(page); 2373 unsigned int xdp_frags_truesz = 0; 2374 struct sk_buff *head_skb; 2375 unsigned int frame_sz; 2376 struct xdp_buff xdp; 2377 void *data; 2378 u32 act; 2379 int err; 2380 2381 data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, &frame_sz, &num_buf, &page, 2382 offset, &len, hdr); 2383 if (unlikely(!data)) 2384 goto err_xdp; 2385 2386 err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz, 2387 &num_buf, &xdp_frags_truesz, stats); 2388 if (unlikely(err)) 2389 goto err_xdp; 2390 2391 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 2392 2393 switch (act) { 2394 case XDP_PASS: 2395 head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); 2396 if (unlikely(!head_skb)) 2397 break; 2398 return head_skb; 2399 2400 case XDP_TX: 2401 case XDP_REDIRECT: 2402 return NULL; 2403 2404 default: 2405 break; 2406 } 2407 2408 put_xdp_frags(&xdp); 2409 2410 err_xdp: 2411 put_page(page); 2412 mergeable_buf_free(rq, num_buf, dev, stats); 2413 2414 u64_stats_inc(&stats->xdp_drops); 2415 u64_stats_inc(&stats->drops); 2416 return NULL; 2417 } 2418 2419 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 2420 struct sk_buff *curr_skb, 2421 struct page *page, void *buf, 2422 int len, int truesize) 2423 { 2424 int num_skb_frags; 2425 int offset; 2426 2427 num_skb_frags = skb_shinfo(curr_skb)->nr_frags; 2428 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { 2429 struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); 2430 2431 if (unlikely(!nskb)) 2432 return NULL; 2433 2434 if (curr_skb == head_skb) 2435 skb_shinfo(curr_skb)->frag_list = nskb; 2436 else 2437 curr_skb->next = nskb; 2438 curr_skb = nskb; 2439 head_skb->truesize += nskb->truesize; 2440 num_skb_frags = 0; 2441 } 2442 2443 if (curr_skb != head_skb) { 2444 head_skb->data_len += len; 2445 head_skb->len += len; 2446 head_skb->truesize += truesize; 2447 } 2448 2449 offset = buf - page_address(page); 2450 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { 2451 put_page(page); 2452 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, 2453 len, truesize); 2454 } else { 2455 skb_add_rx_frag(curr_skb, num_skb_frags, page, 2456 offset, len, truesize); 2457 } 2458 2459 return curr_skb; 2460 } 2461 2462 static struct sk_buff *receive_mergeable(struct net_device *dev, 2463 struct virtnet_info *vi, 2464 struct receive_queue *rq, 2465 void *buf, 2466 void *ctx, 2467 unsigned int len, 2468 unsigned int *xdp_xmit, 2469 struct virtnet_rq_stats *stats) 2470 { 2471 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2472 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2473 struct page *page = virt_to_head_page(buf); 2474 int offset = buf - page_address(page); 2475 struct sk_buff *head_skb, *curr_skb; 2476 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2477 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2478 2479 head_skb = NULL; 2480 u64_stats_add(&stats->bytes, len - vi->hdr_len); 2481 2482 if (check_mergeable_len(dev, ctx, len)) 2483 goto err_skb; 2484 2485 if (unlikely(vi->xdp_enabled)) { 2486 struct bpf_prog *xdp_prog; 2487 2488 rcu_read_lock(); 2489 xdp_prog = rcu_dereference(rq->xdp_prog); 2490 if (xdp_prog) { 2491 head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx, 2492 len, xdp_xmit, stats); 2493 rcu_read_unlock(); 2494 return head_skb; 2495 } 2496 rcu_read_unlock(); 2497 } 2498 2499 head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom); 2500 curr_skb = head_skb; 2501 2502 if (unlikely(!curr_skb)) 2503 goto err_skb; 2504 while (--num_buf) { 2505 buf = virtnet_rq_get_buf(rq, &len, &ctx); 2506 if (unlikely(!buf)) { 2507 pr_debug("%s: rx error: %d buffers out of %d missing\n", 2508 dev->name, num_buf, 2509 virtio16_to_cpu(vi->vdev, 2510 hdr->num_buffers)); 2511 DEV_STATS_INC(dev, rx_length_errors); 2512 goto err_buf; 2513 } 2514 2515 u64_stats_add(&stats->bytes, len); 2516 page = virt_to_head_page(buf); 2517 2518 if (check_mergeable_len(dev, ctx, len)) 2519 goto err_skb; 2520 2521 truesize = mergeable_ctx_to_truesize(ctx); 2522 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 2523 buf, len, truesize); 2524 if (!curr_skb) 2525 goto err_skb; 2526 } 2527 2528 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); 2529 return head_skb; 2530 2531 err_skb: 2532 put_page(page); 2533 mergeable_buf_free(rq, num_buf, dev, stats); 2534 2535 err_buf: 2536 u64_stats_inc(&stats->drops); 2537 dev_kfree_skb(head_skb); 2538 return NULL; 2539 } 2540 2541 static inline u32 2542 virtio_net_hash_value(const struct virtio_net_hdr_v1_hash *hdr_hash) 2543 { 2544 return __le16_to_cpu(hdr_hash->hash_value_lo) | 2545 (__le16_to_cpu(hdr_hash->hash_value_hi) << 16); 2546 } 2547 2548 static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, 2549 struct sk_buff *skb) 2550 { 2551 enum pkt_hash_types rss_hash_type; 2552 2553 if (!hdr_hash || !skb) 2554 return; 2555 2556 switch (__le16_to_cpu(hdr_hash->hash_report)) { 2557 case VIRTIO_NET_HASH_REPORT_TCPv4: 2558 case VIRTIO_NET_HASH_REPORT_UDPv4: 2559 case VIRTIO_NET_HASH_REPORT_TCPv6: 2560 case VIRTIO_NET_HASH_REPORT_UDPv6: 2561 case VIRTIO_NET_HASH_REPORT_TCPv6_EX: 2562 case VIRTIO_NET_HASH_REPORT_UDPv6_EX: 2563 rss_hash_type = PKT_HASH_TYPE_L4; 2564 break; 2565 case VIRTIO_NET_HASH_REPORT_IPv4: 2566 case VIRTIO_NET_HASH_REPORT_IPv6: 2567 case VIRTIO_NET_HASH_REPORT_IPv6_EX: 2568 rss_hash_type = PKT_HASH_TYPE_L3; 2569 break; 2570 case VIRTIO_NET_HASH_REPORT_NONE: 2571 default: 2572 rss_hash_type = PKT_HASH_TYPE_NONE; 2573 } 2574 skb_set_hash(skb, virtio_net_hash_value(hdr_hash), rss_hash_type); 2575 } 2576 2577 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 2578 struct sk_buff *skb, u8 flags) 2579 { 2580 struct virtio_net_common_hdr *hdr; 2581 struct net_device *dev = vi->dev; 2582 2583 hdr = skb_vnet_common_hdr(skb); 2584 if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) 2585 virtio_skb_set_hash(&hdr->hash_v1_hdr, skb); 2586 2587 hdr->hdr.flags = flags; 2588 if (virtio_net_handle_csum_offload(skb, &hdr->hdr, vi->rx_tnl_csum)) { 2589 net_warn_ratelimited("%s: bad csum: flags: %x, gso_type: %x rx_tnl_csum %d\n", 2590 dev->name, hdr->hdr.flags, 2591 hdr->hdr.gso_type, vi->rx_tnl_csum); 2592 goto frame_err; 2593 } 2594 2595 if (virtio_net_hdr_tnl_to_skb(skb, &hdr->tnl_hdr, vi->rx_tnl, 2596 vi->rx_tnl_csum, 2597 virtio_is_little_endian(vi->vdev))) { 2598 net_warn_ratelimited("%s: bad gso: type: %x, size: %u, flags %x tunnel %d tnl csum %d\n", 2599 dev->name, hdr->hdr.gso_type, 2600 hdr->hdr.gso_size, hdr->hdr.flags, 2601 vi->rx_tnl, vi->rx_tnl_csum); 2602 goto frame_err; 2603 } 2604 2605 skb_record_rx_queue(skb, vq2rxq(rq->vq)); 2606 skb->protocol = eth_type_trans(skb, dev); 2607 pr_debug("Receiving skb proto 0x%04x len %i type %i\n", 2608 ntohs(skb->protocol), skb->len, skb->pkt_type); 2609 2610 napi_gro_receive(&rq->napi, skb); 2611 return; 2612 2613 frame_err: 2614 DEV_STATS_INC(dev, rx_frame_errors); 2615 dev_kfree_skb(skb); 2616 } 2617 2618 static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, 2619 void *buf, unsigned int len, void **ctx, 2620 unsigned int *xdp_xmit, 2621 struct virtnet_rq_stats *stats) 2622 { 2623 struct net_device *dev = vi->dev; 2624 struct sk_buff *skb; 2625 u8 flags; 2626 2627 if (unlikely(len < vi->hdr_len + ETH_HLEN)) { 2628 pr_debug("%s: short packet %i\n", dev->name, len); 2629 DEV_STATS_INC(dev, rx_length_errors); 2630 virtnet_rq_free_buf(vi, rq, buf); 2631 return; 2632 } 2633 2634 /* 1. Save the flags early, as the XDP program might overwrite them. 2635 * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID 2636 * stay valid after XDP processing. 2637 * 2. XDP doesn't work with partially checksummed packets (refer to 2638 * virtnet_xdp_set()), so packets marked as 2639 * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing. 2640 */ 2641 flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; 2642 2643 if (vi->mergeable_rx_bufs) 2644 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, 2645 stats); 2646 else if (vi->big_packets) 2647 skb = receive_big(dev, vi, rq, buf, len, stats); 2648 else 2649 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); 2650 2651 if (unlikely(!skb)) 2652 return; 2653 2654 virtnet_receive_done(vi, rq, skb, flags); 2655 } 2656 2657 /* Unlike mergeable buffers, all buffers are allocated to the 2658 * same size, except for the headroom. For this reason we do 2659 * not need to use mergeable_len_to_ctx here - it is enough 2660 * to store the headroom as the context ignoring the truesize. 2661 */ 2662 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, 2663 gfp_t gfp) 2664 { 2665 char *buf; 2666 unsigned int xdp_headroom = virtnet_get_headroom(vi); 2667 void *ctx = (void *)(unsigned long)xdp_headroom; 2668 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; 2669 int err; 2670 2671 len = SKB_DATA_ALIGN(len) + 2672 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 2673 2674 if (unlikely(!skb_page_frag_refill(len, &rq->alloc_frag, gfp))) 2675 return -ENOMEM; 2676 2677 buf = virtnet_rq_alloc(rq, len, gfp); 2678 if (unlikely(!buf)) 2679 return -ENOMEM; 2680 2681 buf += VIRTNET_RX_PAD + xdp_headroom; 2682 2683 virtnet_rq_init_one_sg(rq, buf, vi->hdr_len + GOOD_PACKET_LEN); 2684 2685 err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, buf, ctx, gfp); 2686 if (err < 0) { 2687 virtnet_rq_unmap(rq, buf, 0); 2688 put_page(virt_to_head_page(buf)); 2689 } 2690 2691 return err; 2692 } 2693 2694 static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, 2695 gfp_t gfp) 2696 { 2697 struct page *first, *list = NULL; 2698 char *p; 2699 int i, err, offset; 2700 2701 sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2); 2702 2703 /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */ 2704 for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) { 2705 first = get_a_page(rq, gfp); 2706 if (!first) { 2707 if (list) 2708 give_pages(rq, list); 2709 return -ENOMEM; 2710 } 2711 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE); 2712 2713 /* chain new page in list head to match sg */ 2714 first->private = (unsigned long)list; 2715 list = first; 2716 } 2717 2718 first = get_a_page(rq, gfp); 2719 if (!first) { 2720 give_pages(rq, list); 2721 return -ENOMEM; 2722 } 2723 p = page_address(first); 2724 2725 /* rq->sg[0], rq->sg[1] share the same page */ 2726 /* a separated rq->sg[0] for header - required in case !any_header_sg */ 2727 sg_set_buf(&rq->sg[0], p, vi->hdr_len); 2728 2729 /* rq->sg[1] for data packet, from offset */ 2730 offset = sizeof(struct padded_vnet_hdr); 2731 sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset); 2732 2733 /* chain first in list head */ 2734 first->private = (unsigned long)list; 2735 err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2, 2736 first, gfp); 2737 if (err < 0) 2738 give_pages(rq, first); 2739 2740 return err; 2741 } 2742 2743 static unsigned int get_mergeable_buf_len(struct receive_queue *rq, 2744 struct ewma_pkt_len *avg_pkt_len, 2745 unsigned int room) 2746 { 2747 struct virtnet_info *vi = rq->vq->vdev->priv; 2748 const size_t hdr_len = vi->hdr_len; 2749 unsigned int len; 2750 2751 if (room) 2752 return PAGE_SIZE - room; 2753 2754 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), 2755 rq->min_buf_len, PAGE_SIZE - hdr_len); 2756 2757 return ALIGN(len, L1_CACHE_BYTES); 2758 } 2759 2760 static int add_recvbuf_mergeable(struct virtnet_info *vi, 2761 struct receive_queue *rq, gfp_t gfp) 2762 { 2763 struct page_frag *alloc_frag = &rq->alloc_frag; 2764 unsigned int headroom = virtnet_get_headroom(vi); 2765 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2766 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 2767 unsigned int len, hole; 2768 void *ctx; 2769 char *buf; 2770 int err; 2771 2772 /* Extra tailroom is needed to satisfy XDP's assumption. This 2773 * means rx frags coalescing won't work, but consider we've 2774 * disabled GSO for XDP, it won't be a big issue. 2775 */ 2776 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); 2777 2778 if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) 2779 return -ENOMEM; 2780 2781 if (!alloc_frag->offset && len + room + sizeof(struct virtnet_rq_dma) > alloc_frag->size) 2782 len -= sizeof(struct virtnet_rq_dma); 2783 2784 buf = virtnet_rq_alloc(rq, len + room, gfp); 2785 if (unlikely(!buf)) 2786 return -ENOMEM; 2787 2788 buf += headroom; /* advance address leaving hole at front of pkt */ 2789 hole = alloc_frag->size - alloc_frag->offset; 2790 if (hole < len + room) { 2791 /* To avoid internal fragmentation, if there is very likely not 2792 * enough space for another buffer, add the remaining space to 2793 * the current buffer. 2794 * XDP core assumes that frame_size of xdp_buff and the length 2795 * of the frag are PAGE_SIZE, so we disable the hole mechanism. 2796 */ 2797 if (!headroom) 2798 len += hole; 2799 alloc_frag->offset += hole; 2800 } 2801 2802 virtnet_rq_init_one_sg(rq, buf, len); 2803 2804 ctx = mergeable_len_to_ctx(len + room, headroom); 2805 err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, buf, ctx, gfp); 2806 if (err < 0) { 2807 virtnet_rq_unmap(rq, buf, 0); 2808 put_page(virt_to_head_page(buf)); 2809 } 2810 2811 return err; 2812 } 2813 2814 /* 2815 * Returns false if we couldn't fill entirely (OOM). 2816 * 2817 * Normally run in the receive path, but can also be run from ndo_open 2818 * before we're receiving packets, or from refill_work which is 2819 * careful to disable receiving (using napi_disable). 2820 */ 2821 static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, 2822 gfp_t gfp) 2823 { 2824 int err; 2825 2826 if (rq->xsk_pool) { 2827 err = virtnet_add_recvbuf_xsk(vi, rq, rq->xsk_pool, gfp); 2828 goto kick; 2829 } 2830 2831 do { 2832 if (vi->mergeable_rx_bufs) 2833 err = add_recvbuf_mergeable(vi, rq, gfp); 2834 else if (vi->big_packets) 2835 err = add_recvbuf_big(vi, rq, gfp); 2836 else 2837 err = add_recvbuf_small(vi, rq, gfp); 2838 2839 if (err) 2840 break; 2841 } while (rq->vq->num_free); 2842 2843 kick: 2844 if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { 2845 unsigned long flags; 2846 2847 flags = u64_stats_update_begin_irqsave(&rq->stats.syncp); 2848 u64_stats_inc(&rq->stats.kicks); 2849 u64_stats_update_end_irqrestore(&rq->stats.syncp, flags); 2850 } 2851 2852 return err != -ENOMEM; 2853 } 2854 2855 static void skb_recv_done(struct virtqueue *rvq) 2856 { 2857 struct virtnet_info *vi = rvq->vdev->priv; 2858 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)]; 2859 2860 rq->calls++; 2861 virtqueue_napi_schedule(&rq->napi, rvq); 2862 } 2863 2864 static void virtnet_napi_do_enable(struct virtqueue *vq, 2865 struct napi_struct *napi) 2866 { 2867 napi_enable(napi); 2868 2869 /* If all buffers were filled by other side before we napi_enabled, we 2870 * won't get another interrupt, so process any outstanding packets now. 2871 * Call local_bh_enable after to trigger softIRQ processing. 2872 */ 2873 local_bh_disable(); 2874 virtqueue_napi_schedule(napi, vq); 2875 local_bh_enable(); 2876 } 2877 2878 static void virtnet_napi_enable(struct receive_queue *rq) 2879 { 2880 struct virtnet_info *vi = rq->vq->vdev->priv; 2881 int qidx = vq2rxq(rq->vq); 2882 2883 virtnet_napi_do_enable(rq->vq, &rq->napi); 2884 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, &rq->napi); 2885 } 2886 2887 static void virtnet_napi_tx_enable(struct send_queue *sq) 2888 { 2889 struct virtnet_info *vi = sq->vq->vdev->priv; 2890 struct napi_struct *napi = &sq->napi; 2891 int qidx = vq2txq(sq->vq); 2892 2893 if (!napi->weight) 2894 return; 2895 2896 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only 2897 * enable the feature if this is likely affine with the transmit path. 2898 */ 2899 if (!vi->affinity_hint_set) { 2900 napi->weight = 0; 2901 return; 2902 } 2903 2904 virtnet_napi_do_enable(sq->vq, napi); 2905 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, napi); 2906 } 2907 2908 static void virtnet_napi_tx_disable(struct send_queue *sq) 2909 { 2910 struct virtnet_info *vi = sq->vq->vdev->priv; 2911 struct napi_struct *napi = &sq->napi; 2912 int qidx = vq2txq(sq->vq); 2913 2914 if (napi->weight) { 2915 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, NULL); 2916 napi_disable(napi); 2917 } 2918 } 2919 2920 static void virtnet_napi_disable(struct receive_queue *rq) 2921 { 2922 struct virtnet_info *vi = rq->vq->vdev->priv; 2923 struct napi_struct *napi = &rq->napi; 2924 int qidx = vq2rxq(rq->vq); 2925 2926 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, NULL); 2927 napi_disable(napi); 2928 } 2929 2930 static void refill_work(struct work_struct *work) 2931 { 2932 struct virtnet_info *vi = 2933 container_of(work, struct virtnet_info, refill.work); 2934 bool still_empty; 2935 int i; 2936 2937 for (i = 0; i < vi->curr_queue_pairs; i++) { 2938 struct receive_queue *rq = &vi->rq[i]; 2939 2940 /* 2941 * When queue API support is added in the future and the call 2942 * below becomes napi_disable_locked, this driver will need to 2943 * be refactored. 2944 * 2945 * One possible solution would be to: 2946 * - cancel refill_work with cancel_delayed_work (note: 2947 * non-sync) 2948 * - cancel refill_work with cancel_delayed_work_sync in 2949 * virtnet_remove after the netdev is unregistered 2950 * - wrap all of the work in a lock (perhaps the netdev 2951 * instance lock) 2952 * - check netif_running() and return early to avoid a race 2953 */ 2954 napi_disable(&rq->napi); 2955 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); 2956 virtnet_napi_do_enable(rq->vq, &rq->napi); 2957 2958 /* In theory, this can happen: if we don't get any buffers in 2959 * we will *never* try to fill again. 2960 */ 2961 if (still_empty) 2962 schedule_delayed_work(&vi->refill, HZ/2); 2963 } 2964 } 2965 2966 static int virtnet_receive_xsk_bufs(struct virtnet_info *vi, 2967 struct receive_queue *rq, 2968 int budget, 2969 unsigned int *xdp_xmit, 2970 struct virtnet_rq_stats *stats) 2971 { 2972 unsigned int len; 2973 int packets = 0; 2974 void *buf; 2975 2976 while (packets < budget) { 2977 buf = virtqueue_get_buf(rq->vq, &len); 2978 if (!buf) 2979 break; 2980 2981 virtnet_receive_xsk_buf(vi, rq, buf, len, xdp_xmit, stats); 2982 packets++; 2983 } 2984 2985 return packets; 2986 } 2987 2988 static int virtnet_receive_packets(struct virtnet_info *vi, 2989 struct receive_queue *rq, 2990 int budget, 2991 unsigned int *xdp_xmit, 2992 struct virtnet_rq_stats *stats) 2993 { 2994 unsigned int len; 2995 int packets = 0; 2996 void *buf; 2997 2998 if (!vi->big_packets || vi->mergeable_rx_bufs) { 2999 void *ctx; 3000 while (packets < budget && 3001 (buf = virtnet_rq_get_buf(rq, &len, &ctx))) { 3002 receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats); 3003 packets++; 3004 } 3005 } else { 3006 while (packets < budget && 3007 (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { 3008 receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats); 3009 packets++; 3010 } 3011 } 3012 3013 return packets; 3014 } 3015 3016 static int virtnet_receive(struct receive_queue *rq, int budget, 3017 unsigned int *xdp_xmit) 3018 { 3019 struct virtnet_info *vi = rq->vq->vdev->priv; 3020 struct virtnet_rq_stats stats = {}; 3021 int i, packets; 3022 3023 if (rq->xsk_pool) 3024 packets = virtnet_receive_xsk_bufs(vi, rq, budget, xdp_xmit, &stats); 3025 else 3026 packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats); 3027 3028 if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { 3029 if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { 3030 spin_lock(&vi->refill_lock); 3031 if (vi->refill_enabled) 3032 schedule_delayed_work(&vi->refill, 0); 3033 spin_unlock(&vi->refill_lock); 3034 } 3035 } 3036 3037 u64_stats_set(&stats.packets, packets); 3038 u64_stats_update_begin(&rq->stats.syncp); 3039 for (i = 0; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) { 3040 size_t offset = virtnet_rq_stats_desc[i].offset; 3041 u64_stats_t *item, *src; 3042 3043 item = (u64_stats_t *)((u8 *)&rq->stats + offset); 3044 src = (u64_stats_t *)((u8 *)&stats + offset); 3045 u64_stats_add(item, u64_stats_read(src)); 3046 } 3047 3048 u64_stats_add(&rq->stats.packets, u64_stats_read(&stats.packets)); 3049 u64_stats_add(&rq->stats.bytes, u64_stats_read(&stats.bytes)); 3050 3051 u64_stats_update_end(&rq->stats.syncp); 3052 3053 return packets; 3054 } 3055 3056 static void virtnet_poll_cleantx(struct receive_queue *rq, int budget) 3057 { 3058 struct virtnet_info *vi = rq->vq->vdev->priv; 3059 unsigned int index = vq2rxq(rq->vq); 3060 struct send_queue *sq = &vi->sq[index]; 3061 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); 3062 3063 if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) 3064 return; 3065 3066 if (__netif_tx_trylock(txq)) { 3067 if (sq->reset) { 3068 __netif_tx_unlock(txq); 3069 return; 3070 } 3071 3072 do { 3073 virtqueue_disable_cb(sq->vq); 3074 free_old_xmit(sq, txq, !!budget); 3075 } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 3076 3077 if (sq->vq->num_free >= MAX_SKB_FRAGS + 2 && 3078 netif_tx_queue_stopped(txq)) { 3079 u64_stats_update_begin(&sq->stats.syncp); 3080 u64_stats_inc(&sq->stats.wake); 3081 u64_stats_update_end(&sq->stats.syncp); 3082 netif_tx_wake_queue(txq); 3083 } 3084 3085 __netif_tx_unlock(txq); 3086 } 3087 } 3088 3089 static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue *rq) 3090 { 3091 struct dim_sample cur_sample = {}; 3092 3093 if (!rq->packets_in_napi) 3094 return; 3095 3096 /* Don't need protection when fetching stats, since fetcher and 3097 * updater of the stats are in same context 3098 */ 3099 dim_update_sample(rq->calls, 3100 u64_stats_read(&rq->stats.packets), 3101 u64_stats_read(&rq->stats.bytes), 3102 &cur_sample); 3103 3104 net_dim(&rq->dim, &cur_sample); 3105 rq->packets_in_napi = 0; 3106 } 3107 3108 static int virtnet_poll(struct napi_struct *napi, int budget) 3109 { 3110 struct receive_queue *rq = 3111 container_of(napi, struct receive_queue, napi); 3112 struct virtnet_info *vi = rq->vq->vdev->priv; 3113 struct send_queue *sq; 3114 unsigned int received; 3115 unsigned int xdp_xmit = 0; 3116 bool napi_complete; 3117 3118 virtnet_poll_cleantx(rq, budget); 3119 3120 received = virtnet_receive(rq, budget, &xdp_xmit); 3121 rq->packets_in_napi += received; 3122 3123 if (xdp_xmit & VIRTIO_XDP_REDIR) 3124 xdp_do_flush(); 3125 3126 /* Out of packets? */ 3127 if (received < budget) { 3128 napi_complete = virtqueue_napi_complete(napi, rq->vq, received); 3129 /* Intentionally not taking dim_lock here. This may result in a 3130 * spurious net_dim call. But if that happens virtnet_rx_dim_work 3131 * will not act on the scheduled work. 3132 */ 3133 if (napi_complete && rq->dim_enabled) 3134 virtnet_rx_dim_update(vi, rq); 3135 } 3136 3137 if (xdp_xmit & VIRTIO_XDP_TX) { 3138 sq = virtnet_xdp_get_sq(vi); 3139 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 3140 u64_stats_update_begin(&sq->stats.syncp); 3141 u64_stats_inc(&sq->stats.kicks); 3142 u64_stats_update_end(&sq->stats.syncp); 3143 } 3144 virtnet_xdp_put_sq(vi, sq); 3145 } 3146 3147 return received; 3148 } 3149 3150 static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index) 3151 { 3152 virtnet_napi_tx_disable(&vi->sq[qp_index]); 3153 virtnet_napi_disable(&vi->rq[qp_index]); 3154 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 3155 } 3156 3157 static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) 3158 { 3159 struct net_device *dev = vi->dev; 3160 int err; 3161 3162 err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index, 3163 vi->rq[qp_index].napi.napi_id); 3164 if (err < 0) 3165 return err; 3166 3167 err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq, 3168 MEM_TYPE_PAGE_SHARED, NULL); 3169 if (err < 0) 3170 goto err_xdp_reg_mem_model; 3171 3172 virtnet_napi_enable(&vi->rq[qp_index]); 3173 virtnet_napi_tx_enable(&vi->sq[qp_index]); 3174 3175 return 0; 3176 3177 err_xdp_reg_mem_model: 3178 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 3179 return err; 3180 } 3181 3182 static void virtnet_cancel_dim(struct virtnet_info *vi, struct dim *dim) 3183 { 3184 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 3185 return; 3186 net_dim_work_cancel(dim); 3187 } 3188 3189 static void virtnet_update_settings(struct virtnet_info *vi) 3190 { 3191 u32 speed; 3192 u8 duplex; 3193 3194 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) 3195 return; 3196 3197 virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); 3198 3199 if (ethtool_validate_speed(speed)) 3200 vi->speed = speed; 3201 3202 virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); 3203 3204 if (ethtool_validate_duplex(duplex)) 3205 vi->duplex = duplex; 3206 } 3207 3208 static int virtnet_open(struct net_device *dev) 3209 { 3210 struct virtnet_info *vi = netdev_priv(dev); 3211 int i, err; 3212 3213 enable_delayed_refill(vi); 3214 3215 for (i = 0; i < vi->max_queue_pairs; i++) { 3216 if (i < vi->curr_queue_pairs) 3217 /* Make sure we have some buffers: if oom use wq. */ 3218 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) 3219 schedule_delayed_work(&vi->refill, 0); 3220 3221 err = virtnet_enable_queue_pair(vi, i); 3222 if (err < 0) 3223 goto err_enable_qp; 3224 } 3225 3226 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 3227 if (vi->status & VIRTIO_NET_S_LINK_UP) 3228 netif_carrier_on(vi->dev); 3229 virtio_config_driver_enable(vi->vdev); 3230 } else { 3231 vi->status = VIRTIO_NET_S_LINK_UP; 3232 netif_carrier_on(dev); 3233 } 3234 3235 return 0; 3236 3237 err_enable_qp: 3238 disable_delayed_refill(vi); 3239 cancel_delayed_work_sync(&vi->refill); 3240 3241 for (i--; i >= 0; i--) { 3242 virtnet_disable_queue_pair(vi, i); 3243 virtnet_cancel_dim(vi, &vi->rq[i].dim); 3244 } 3245 3246 return err; 3247 } 3248 3249 static int virtnet_poll_tx(struct napi_struct *napi, int budget) 3250 { 3251 struct send_queue *sq = container_of(napi, struct send_queue, napi); 3252 struct virtnet_info *vi = sq->vq->vdev->priv; 3253 unsigned int index = vq2txq(sq->vq); 3254 struct netdev_queue *txq; 3255 int opaque, xsk_done = 0; 3256 bool done; 3257 3258 if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { 3259 /* We don't need to enable cb for XDP */ 3260 napi_complete_done(napi, 0); 3261 return 0; 3262 } 3263 3264 txq = netdev_get_tx_queue(vi->dev, index); 3265 __netif_tx_lock(txq, raw_smp_processor_id()); 3266 virtqueue_disable_cb(sq->vq); 3267 3268 if (sq->xsk_pool) 3269 xsk_done = virtnet_xsk_xmit(sq, sq->xsk_pool, budget); 3270 else 3271 free_old_xmit(sq, txq, !!budget); 3272 3273 if (sq->vq->num_free >= MAX_SKB_FRAGS + 2 && 3274 netif_tx_queue_stopped(txq)) { 3275 u64_stats_update_begin(&sq->stats.syncp); 3276 u64_stats_inc(&sq->stats.wake); 3277 u64_stats_update_end(&sq->stats.syncp); 3278 netif_tx_wake_queue(txq); 3279 } 3280 3281 if (xsk_done >= budget) { 3282 __netif_tx_unlock(txq); 3283 return budget; 3284 } 3285 3286 opaque = virtqueue_enable_cb_prepare(sq->vq); 3287 3288 done = napi_complete_done(napi, 0); 3289 3290 if (!done) 3291 virtqueue_disable_cb(sq->vq); 3292 3293 __netif_tx_unlock(txq); 3294 3295 if (done) { 3296 if (unlikely(virtqueue_poll(sq->vq, opaque))) { 3297 if (napi_schedule_prep(napi)) { 3298 __netif_tx_lock(txq, raw_smp_processor_id()); 3299 virtqueue_disable_cb(sq->vq); 3300 __netif_tx_unlock(txq); 3301 __napi_schedule(napi); 3302 } 3303 } 3304 } 3305 3306 return 0; 3307 } 3308 3309 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) 3310 { 3311 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; 3312 struct virtnet_info *vi = sq->vq->vdev->priv; 3313 struct virtio_net_hdr_v1_hash_tunnel *hdr; 3314 int num_sg; 3315 unsigned hdr_len = vi->hdr_len; 3316 bool can_push; 3317 3318 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); 3319 3320 /* Make sure it's safe to cast between formats */ 3321 BUILD_BUG_ON(__alignof__(*hdr) != __alignof__(hdr->hash_hdr)); 3322 BUILD_BUG_ON(__alignof__(*hdr) != __alignof__(hdr->hash_hdr.hdr)); 3323 3324 can_push = vi->any_header_sg && 3325 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && 3326 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; 3327 /* Even if we can, don't push here yet as this would skew 3328 * csum_start offset below. */ 3329 if (can_push) 3330 hdr = (struct virtio_net_hdr_v1_hash_tunnel *)(skb->data - 3331 hdr_len); 3332 else 3333 hdr = &skb_vnet_common_hdr(skb)->tnl_hdr; 3334 3335 if (virtio_net_hdr_tnl_from_skb(skb, hdr, vi->tx_tnl, 3336 virtio_is_little_endian(vi->vdev), 0)) 3337 return -EPROTO; 3338 3339 if (vi->mergeable_rx_bufs) 3340 hdr->hash_hdr.hdr.num_buffers = 0; 3341 3342 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); 3343 if (can_push) { 3344 __skb_push(skb, hdr_len); 3345 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len); 3346 if (unlikely(num_sg < 0)) 3347 return num_sg; 3348 /* Pull header back to avoid skew in tx bytes calculations. */ 3349 __skb_pull(skb, hdr_len); 3350 } else { 3351 sg_set_buf(sq->sg, hdr, hdr_len); 3352 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); 3353 if (unlikely(num_sg < 0)) 3354 return num_sg; 3355 num_sg++; 3356 } 3357 3358 return virtnet_add_outbuf(sq, num_sg, skb, 3359 orphan ? VIRTNET_XMIT_TYPE_SKB_ORPHAN : VIRTNET_XMIT_TYPE_SKB); 3360 } 3361 3362 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) 3363 { 3364 struct virtnet_info *vi = netdev_priv(dev); 3365 int qnum = skb_get_queue_mapping(skb); 3366 struct send_queue *sq = &vi->sq[qnum]; 3367 int err; 3368 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 3369 bool xmit_more = netdev_xmit_more(); 3370 bool use_napi = sq->napi.weight; 3371 bool kick; 3372 3373 if (!use_napi) 3374 free_old_xmit(sq, txq, false); 3375 else 3376 virtqueue_disable_cb(sq->vq); 3377 3378 /* timestamp packet in software */ 3379 skb_tx_timestamp(skb); 3380 3381 /* Try to transmit */ 3382 err = xmit_skb(sq, skb, !use_napi); 3383 3384 /* This should not happen! */ 3385 if (unlikely(err)) { 3386 DEV_STATS_INC(dev, tx_fifo_errors); 3387 if (net_ratelimit()) 3388 dev_warn(&dev->dev, 3389 "Unexpected TXQ (%d) queue failure: %d\n", 3390 qnum, err); 3391 DEV_STATS_INC(dev, tx_dropped); 3392 dev_kfree_skb_any(skb); 3393 return NETDEV_TX_OK; 3394 } 3395 3396 /* Don't wait up for transmitted skbs to be freed. */ 3397 if (!use_napi) { 3398 skb_orphan(skb); 3399 nf_reset_ct(skb); 3400 } 3401 3402 if (use_napi) 3403 tx_may_stop(vi, dev, sq); 3404 else 3405 check_sq_full_and_disable(vi, dev,sq); 3406 3407 kick = use_napi ? __netdev_tx_sent_queue(txq, skb->len, xmit_more) : 3408 !xmit_more || netif_xmit_stopped(txq); 3409 if (kick) { 3410 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 3411 u64_stats_update_begin(&sq->stats.syncp); 3412 u64_stats_inc(&sq->stats.kicks); 3413 u64_stats_update_end(&sq->stats.syncp); 3414 } 3415 } 3416 3417 if (use_napi && kick && unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 3418 virtqueue_napi_schedule(&sq->napi, sq->vq); 3419 3420 return NETDEV_TX_OK; 3421 } 3422 3423 static void __virtnet_rx_pause(struct virtnet_info *vi, 3424 struct receive_queue *rq) 3425 { 3426 bool running = netif_running(vi->dev); 3427 3428 if (running) { 3429 virtnet_napi_disable(rq); 3430 virtnet_cancel_dim(vi, &rq->dim); 3431 } 3432 } 3433 3434 static void virtnet_rx_pause_all(struct virtnet_info *vi) 3435 { 3436 int i; 3437 3438 /* 3439 * Make sure refill_work does not run concurrently to 3440 * avoid napi_disable race which leads to deadlock. 3441 */ 3442 disable_delayed_refill(vi); 3443 cancel_delayed_work_sync(&vi->refill); 3444 for (i = 0; i < vi->max_queue_pairs; i++) 3445 __virtnet_rx_pause(vi, &vi->rq[i]); 3446 } 3447 3448 static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) 3449 { 3450 /* 3451 * Make sure refill_work does not run concurrently to 3452 * avoid napi_disable race which leads to deadlock. 3453 */ 3454 disable_delayed_refill(vi); 3455 cancel_delayed_work_sync(&vi->refill); 3456 __virtnet_rx_pause(vi, rq); 3457 } 3458 3459 static void __virtnet_rx_resume(struct virtnet_info *vi, 3460 struct receive_queue *rq, 3461 bool refill) 3462 { 3463 bool running = netif_running(vi->dev); 3464 bool schedule_refill = false; 3465 3466 if (refill && !try_fill_recv(vi, rq, GFP_KERNEL)) 3467 schedule_refill = true; 3468 if (running) 3469 virtnet_napi_enable(rq); 3470 3471 if (schedule_refill) 3472 schedule_delayed_work(&vi->refill, 0); 3473 } 3474 3475 static void virtnet_rx_resume_all(struct virtnet_info *vi) 3476 { 3477 int i; 3478 3479 enable_delayed_refill(vi); 3480 for (i = 0; i < vi->max_queue_pairs; i++) { 3481 if (i < vi->curr_queue_pairs) 3482 __virtnet_rx_resume(vi, &vi->rq[i], true); 3483 else 3484 __virtnet_rx_resume(vi, &vi->rq[i], false); 3485 } 3486 } 3487 3488 static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) 3489 { 3490 enable_delayed_refill(vi); 3491 __virtnet_rx_resume(vi, rq, true); 3492 } 3493 3494 static int virtnet_rx_resize(struct virtnet_info *vi, 3495 struct receive_queue *rq, u32 ring_num) 3496 { 3497 int err, qindex; 3498 3499 qindex = rq - vi->rq; 3500 3501 virtnet_rx_pause(vi, rq); 3502 3503 err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_unmap_free_buf, NULL); 3504 if (err) 3505 netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err); 3506 3507 virtnet_rx_resume(vi, rq); 3508 return err; 3509 } 3510 3511 static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq) 3512 { 3513 bool running = netif_running(vi->dev); 3514 struct netdev_queue *txq; 3515 int qindex; 3516 3517 qindex = sq - vi->sq; 3518 3519 if (running) 3520 virtnet_napi_tx_disable(sq); 3521 3522 txq = netdev_get_tx_queue(vi->dev, qindex); 3523 3524 /* 1. wait all ximt complete 3525 * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue() 3526 */ 3527 __netif_tx_lock_bh(txq); 3528 3529 /* Prevent rx poll from accessing sq. */ 3530 sq->reset = true; 3531 3532 /* Prevent the upper layer from trying to send packets. */ 3533 netif_stop_subqueue(vi->dev, qindex); 3534 3535 __netif_tx_unlock_bh(txq); 3536 } 3537 3538 static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq) 3539 { 3540 bool running = netif_running(vi->dev); 3541 struct netdev_queue *txq; 3542 int qindex; 3543 3544 qindex = sq - vi->sq; 3545 3546 txq = netdev_get_tx_queue(vi->dev, qindex); 3547 3548 __netif_tx_lock_bh(txq); 3549 sq->reset = false; 3550 netif_tx_wake_queue(txq); 3551 __netif_tx_unlock_bh(txq); 3552 3553 if (running) 3554 virtnet_napi_tx_enable(sq); 3555 } 3556 3557 static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq, 3558 u32 ring_num) 3559 { 3560 int qindex, err; 3561 3562 if (ring_num <= MAX_SKB_FRAGS + 2) { 3563 netdev_err(vi->dev, "tx size (%d) cannot be smaller than %d\n", 3564 ring_num, MAX_SKB_FRAGS + 2); 3565 return -EINVAL; 3566 } 3567 3568 qindex = sq - vi->sq; 3569 3570 virtnet_tx_pause(vi, sq); 3571 3572 err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf, 3573 virtnet_sq_free_unused_buf_done); 3574 if (err) 3575 netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err); 3576 3577 virtnet_tx_resume(vi, sq); 3578 3579 return err; 3580 } 3581 3582 /* 3583 * Send command via the control virtqueue and check status. Commands 3584 * supported by the hypervisor, as indicated by feature bits, should 3585 * never fail unless improperly formatted. 3586 */ 3587 static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd, 3588 struct scatterlist *out, 3589 struct scatterlist *in) 3590 { 3591 struct scatterlist *sgs[5], hdr, stat; 3592 u32 out_num = 0, tmp, in_num = 0; 3593 bool ok; 3594 int ret; 3595 3596 /* Caller should know better */ 3597 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); 3598 3599 mutex_lock(&vi->cvq_lock); 3600 vi->ctrl->status = ~0; 3601 vi->ctrl->hdr.class = class; 3602 vi->ctrl->hdr.cmd = cmd; 3603 /* Add header */ 3604 sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr)); 3605 sgs[out_num++] = &hdr; 3606 3607 if (out) 3608 sgs[out_num++] = out; 3609 3610 /* Add return status. */ 3611 sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status)); 3612 sgs[out_num + in_num++] = &stat; 3613 3614 if (in) 3615 sgs[out_num + in_num++] = in; 3616 3617 BUG_ON(out_num + in_num > ARRAY_SIZE(sgs)); 3618 ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC); 3619 if (ret < 0) { 3620 dev_warn(&vi->vdev->dev, 3621 "Failed to add sgs for command vq: %d\n.", ret); 3622 mutex_unlock(&vi->cvq_lock); 3623 return false; 3624 } 3625 3626 if (unlikely(!virtqueue_kick(vi->cvq))) 3627 goto unlock; 3628 3629 /* Spin for a response, the kick causes an ioport write, trapping 3630 * into the hypervisor, so the request should be handled immediately. 3631 */ 3632 while (!virtqueue_get_buf(vi->cvq, &tmp) && 3633 !virtqueue_is_broken(vi->cvq)) { 3634 cond_resched(); 3635 cpu_relax(); 3636 } 3637 3638 unlock: 3639 ok = vi->ctrl->status == VIRTIO_NET_OK; 3640 mutex_unlock(&vi->cvq_lock); 3641 return ok; 3642 } 3643 3644 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, 3645 struct scatterlist *out) 3646 { 3647 return virtnet_send_command_reply(vi, class, cmd, out, NULL); 3648 } 3649 3650 static int virtnet_set_mac_address(struct net_device *dev, void *p) 3651 { 3652 struct virtnet_info *vi = netdev_priv(dev); 3653 struct virtio_device *vdev = vi->vdev; 3654 int ret; 3655 struct sockaddr *addr; 3656 struct scatterlist sg; 3657 3658 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 3659 return -EOPNOTSUPP; 3660 3661 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL); 3662 if (!addr) 3663 return -ENOMEM; 3664 3665 ret = eth_prepare_mac_addr_change(dev, addr); 3666 if (ret) 3667 goto out; 3668 3669 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 3670 sg_init_one(&sg, addr->sa_data, dev->addr_len); 3671 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3672 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 3673 dev_warn(&vdev->dev, 3674 "Failed to set mac address by vq command.\n"); 3675 ret = -EINVAL; 3676 goto out; 3677 } 3678 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 3679 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { 3680 unsigned int i; 3681 3682 /* Naturally, this has an atomicity problem. */ 3683 for (i = 0; i < dev->addr_len; i++) 3684 virtio_cwrite8(vdev, 3685 offsetof(struct virtio_net_config, mac) + 3686 i, addr->sa_data[i]); 3687 } 3688 3689 eth_commit_mac_addr_change(dev, p); 3690 ret = 0; 3691 3692 out: 3693 kfree(addr); 3694 return ret; 3695 } 3696 3697 static void virtnet_stats(struct net_device *dev, 3698 struct rtnl_link_stats64 *tot) 3699 { 3700 struct virtnet_info *vi = netdev_priv(dev); 3701 unsigned int start; 3702 int i; 3703 3704 for (i = 0; i < vi->max_queue_pairs; i++) { 3705 u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; 3706 struct receive_queue *rq = &vi->rq[i]; 3707 struct send_queue *sq = &vi->sq[i]; 3708 3709 do { 3710 start = u64_stats_fetch_begin(&sq->stats.syncp); 3711 tpackets = u64_stats_read(&sq->stats.packets); 3712 tbytes = u64_stats_read(&sq->stats.bytes); 3713 terrors = u64_stats_read(&sq->stats.tx_timeouts); 3714 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 3715 3716 do { 3717 start = u64_stats_fetch_begin(&rq->stats.syncp); 3718 rpackets = u64_stats_read(&rq->stats.packets); 3719 rbytes = u64_stats_read(&rq->stats.bytes); 3720 rdrops = u64_stats_read(&rq->stats.drops); 3721 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 3722 3723 tot->rx_packets += rpackets; 3724 tot->tx_packets += tpackets; 3725 tot->rx_bytes += rbytes; 3726 tot->tx_bytes += tbytes; 3727 tot->rx_dropped += rdrops; 3728 tot->tx_errors += terrors; 3729 } 3730 3731 tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped); 3732 tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors); 3733 tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors); 3734 tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors); 3735 } 3736 3737 static void virtnet_ack_link_announce(struct virtnet_info *vi) 3738 { 3739 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, 3740 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) 3741 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); 3742 } 3743 3744 static bool virtnet_commit_rss_command(struct virtnet_info *vi); 3745 3746 static void virtnet_rss_update_by_qpairs(struct virtnet_info *vi, u16 queue_pairs) 3747 { 3748 u32 indir_val = 0; 3749 int i = 0; 3750 3751 for (; i < vi->rss_indir_table_size; ++i) { 3752 indir_val = ethtool_rxfh_indir_default(i, queue_pairs); 3753 vi->rss_hdr->indirection_table[i] = cpu_to_le16(indir_val); 3754 } 3755 vi->rss_trailer.max_tx_vq = cpu_to_le16(queue_pairs); 3756 } 3757 3758 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 3759 { 3760 struct virtio_net_ctrl_mq *mq __free(kfree) = NULL; 3761 struct virtio_net_rss_config_hdr *old_rss_hdr; 3762 struct virtio_net_rss_config_trailer old_rss_trailer; 3763 struct net_device *dev = vi->dev; 3764 struct scatterlist sg; 3765 3766 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) 3767 return 0; 3768 3769 /* Firstly check if we need update rss. Do updating if both (1) rss enabled and 3770 * (2) no user configuration. 3771 * 3772 * During rss command processing, device updates queue_pairs using rss.max_tx_vq. That is, 3773 * the device updates queue_pairs together with rss, so we can skip the sperate queue_pairs 3774 * update (VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET below) and return directly. 3775 */ 3776 if (vi->has_rss && !netif_is_rxfh_configured(dev)) { 3777 old_rss_hdr = vi->rss_hdr; 3778 old_rss_trailer = vi->rss_trailer; 3779 vi->rss_hdr = devm_kzalloc(&dev->dev, virtnet_rss_hdr_size(vi), GFP_KERNEL); 3780 if (!vi->rss_hdr) { 3781 vi->rss_hdr = old_rss_hdr; 3782 return -ENOMEM; 3783 } 3784 3785 *vi->rss_hdr = *old_rss_hdr; 3786 virtnet_rss_update_by_qpairs(vi, queue_pairs); 3787 3788 if (!virtnet_commit_rss_command(vi)) { 3789 /* restore ctrl_rss if commit_rss_command failed */ 3790 devm_kfree(&dev->dev, vi->rss_hdr); 3791 vi->rss_hdr = old_rss_hdr; 3792 vi->rss_trailer = old_rss_trailer; 3793 3794 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d, because committing RSS failed\n", 3795 queue_pairs); 3796 return -EINVAL; 3797 } 3798 devm_kfree(&dev->dev, old_rss_hdr); 3799 goto succ; 3800 } 3801 3802 mq = kzalloc(sizeof(*mq), GFP_KERNEL); 3803 if (!mq) 3804 return -ENOMEM; 3805 3806 mq->virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); 3807 sg_init_one(&sg, mq, sizeof(*mq)); 3808 3809 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 3810 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { 3811 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", 3812 queue_pairs); 3813 return -EINVAL; 3814 } 3815 succ: 3816 vi->curr_queue_pairs = queue_pairs; 3817 /* virtnet_open() will refill when device is going to up. */ 3818 spin_lock_bh(&vi->refill_lock); 3819 if (dev->flags & IFF_UP && vi->refill_enabled) 3820 schedule_delayed_work(&vi->refill, 0); 3821 spin_unlock_bh(&vi->refill_lock); 3822 3823 return 0; 3824 } 3825 3826 static int virtnet_close(struct net_device *dev) 3827 { 3828 struct virtnet_info *vi = netdev_priv(dev); 3829 int i; 3830 3831 /* Make sure NAPI doesn't schedule refill work */ 3832 disable_delayed_refill(vi); 3833 /* Make sure refill_work doesn't re-enable napi! */ 3834 cancel_delayed_work_sync(&vi->refill); 3835 /* Prevent the config change callback from changing carrier 3836 * after close 3837 */ 3838 virtio_config_driver_disable(vi->vdev); 3839 /* Stop getting status/speed updates: we don't care until next 3840 * open 3841 */ 3842 cancel_work_sync(&vi->config_work); 3843 3844 for (i = 0; i < vi->max_queue_pairs; i++) { 3845 virtnet_disable_queue_pair(vi, i); 3846 virtnet_cancel_dim(vi, &vi->rq[i].dim); 3847 } 3848 3849 netif_carrier_off(dev); 3850 3851 return 0; 3852 } 3853 3854 static void virtnet_rx_mode_work(struct work_struct *work) 3855 { 3856 struct virtnet_info *vi = 3857 container_of(work, struct virtnet_info, rx_mode_work); 3858 u8 *promisc_allmulti __free(kfree) = NULL; 3859 struct net_device *dev = vi->dev; 3860 struct scatterlist sg[2]; 3861 struct virtio_net_ctrl_mac *mac_data; 3862 struct netdev_hw_addr *ha; 3863 int uc_count; 3864 int mc_count; 3865 void *buf; 3866 int i; 3867 3868 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ 3869 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) 3870 return; 3871 3872 promisc_allmulti = kzalloc(sizeof(*promisc_allmulti), GFP_KERNEL); 3873 if (!promisc_allmulti) { 3874 dev_warn(&dev->dev, "Failed to set RX mode, no memory.\n"); 3875 return; 3876 } 3877 3878 rtnl_lock(); 3879 3880 *promisc_allmulti = !!(dev->flags & IFF_PROMISC); 3881 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3882 3883 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3884 VIRTIO_NET_CTRL_RX_PROMISC, sg)) 3885 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", 3886 *promisc_allmulti ? "en" : "dis"); 3887 3888 *promisc_allmulti = !!(dev->flags & IFF_ALLMULTI); 3889 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3890 3891 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3892 VIRTIO_NET_CTRL_RX_ALLMULTI, sg)) 3893 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", 3894 *promisc_allmulti ? "en" : "dis"); 3895 3896 netif_addr_lock_bh(dev); 3897 3898 uc_count = netdev_uc_count(dev); 3899 mc_count = netdev_mc_count(dev); 3900 /* MAC filter - use one buffer for both lists */ 3901 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) + 3902 (2 * sizeof(mac_data->entries)), GFP_ATOMIC); 3903 mac_data = buf; 3904 if (!buf) { 3905 netif_addr_unlock_bh(dev); 3906 rtnl_unlock(); 3907 return; 3908 } 3909 3910 sg_init_table(sg, 2); 3911 3912 /* Store the unicast list and count in the front of the buffer */ 3913 mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count); 3914 i = 0; 3915 netdev_for_each_uc_addr(ha, dev) 3916 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3917 3918 sg_set_buf(&sg[0], mac_data, 3919 sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); 3920 3921 /* multicast list and count fill the end */ 3922 mac_data = (void *)&mac_data->macs[uc_count][0]; 3923 3924 mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count); 3925 i = 0; 3926 netdev_for_each_mc_addr(ha, dev) 3927 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3928 3929 netif_addr_unlock_bh(dev); 3930 3931 sg_set_buf(&sg[1], mac_data, 3932 sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); 3933 3934 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3935 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg)) 3936 dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); 3937 3938 rtnl_unlock(); 3939 3940 kfree(buf); 3941 } 3942 3943 static void virtnet_set_rx_mode(struct net_device *dev) 3944 { 3945 struct virtnet_info *vi = netdev_priv(dev); 3946 3947 if (vi->rx_mode_work_enabled) 3948 schedule_work(&vi->rx_mode_work); 3949 } 3950 3951 static int virtnet_vlan_rx_add_vid(struct net_device *dev, 3952 __be16 proto, u16 vid) 3953 { 3954 struct virtnet_info *vi = netdev_priv(dev); 3955 __virtio16 *_vid __free(kfree) = NULL; 3956 struct scatterlist sg; 3957 3958 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3959 if (!_vid) 3960 return -ENOMEM; 3961 3962 *_vid = cpu_to_virtio16(vi->vdev, vid); 3963 sg_init_one(&sg, _vid, sizeof(*_vid)); 3964 3965 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3966 VIRTIO_NET_CTRL_VLAN_ADD, &sg)) 3967 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); 3968 return 0; 3969 } 3970 3971 static int virtnet_vlan_rx_kill_vid(struct net_device *dev, 3972 __be16 proto, u16 vid) 3973 { 3974 struct virtnet_info *vi = netdev_priv(dev); 3975 __virtio16 *_vid __free(kfree) = NULL; 3976 struct scatterlist sg; 3977 3978 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3979 if (!_vid) 3980 return -ENOMEM; 3981 3982 *_vid = cpu_to_virtio16(vi->vdev, vid); 3983 sg_init_one(&sg, _vid, sizeof(*_vid)); 3984 3985 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3986 VIRTIO_NET_CTRL_VLAN_DEL, &sg)) 3987 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); 3988 return 0; 3989 } 3990 3991 static void virtnet_clean_affinity(struct virtnet_info *vi) 3992 { 3993 int i; 3994 3995 if (vi->affinity_hint_set) { 3996 for (i = 0; i < vi->max_queue_pairs; i++) { 3997 virtqueue_set_affinity(vi->rq[i].vq, NULL); 3998 virtqueue_set_affinity(vi->sq[i].vq, NULL); 3999 } 4000 4001 vi->affinity_hint_set = false; 4002 } 4003 } 4004 4005 static void virtnet_set_affinity(struct virtnet_info *vi) 4006 { 4007 cpumask_var_t mask; 4008 int stragglers; 4009 int group_size; 4010 int i, start = 0, cpu; 4011 int num_cpu; 4012 int stride; 4013 4014 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 4015 virtnet_clean_affinity(vi); 4016 return; 4017 } 4018 4019 num_cpu = num_online_cpus(); 4020 stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); 4021 stragglers = num_cpu >= vi->curr_queue_pairs ? 4022 num_cpu % vi->curr_queue_pairs : 4023 0; 4024 4025 for (i = 0; i < vi->curr_queue_pairs; i++) { 4026 group_size = stride + (i < stragglers ? 1 : 0); 4027 4028 for_each_online_cpu_wrap(cpu, start) { 4029 if (!group_size--) { 4030 start = cpu; 4031 break; 4032 } 4033 cpumask_set_cpu(cpu, mask); 4034 } 4035 4036 virtqueue_set_affinity(vi->rq[i].vq, mask); 4037 virtqueue_set_affinity(vi->sq[i].vq, mask); 4038 __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS); 4039 cpumask_clear(mask); 4040 } 4041 4042 vi->affinity_hint_set = true; 4043 free_cpumask_var(mask); 4044 } 4045 4046 static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) 4047 { 4048 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 4049 node); 4050 virtnet_set_affinity(vi); 4051 return 0; 4052 } 4053 4054 static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node) 4055 { 4056 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 4057 node_dead); 4058 virtnet_set_affinity(vi); 4059 return 0; 4060 } 4061 4062 static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node) 4063 { 4064 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 4065 node); 4066 4067 virtnet_clean_affinity(vi); 4068 return 0; 4069 } 4070 4071 static enum cpuhp_state virtionet_online; 4072 4073 static int virtnet_cpu_notif_add(struct virtnet_info *vi) 4074 { 4075 int ret; 4076 4077 ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node); 4078 if (ret) 4079 return ret; 4080 ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD, 4081 &vi->node_dead); 4082 if (!ret) 4083 return ret; 4084 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 4085 return ret; 4086 } 4087 4088 static void virtnet_cpu_notif_remove(struct virtnet_info *vi) 4089 { 4090 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 4091 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD, 4092 &vi->node_dead); 4093 } 4094 4095 static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi, 4096 u16 vqn, u32 max_usecs, u32 max_packets) 4097 { 4098 struct virtio_net_ctrl_coal_vq *coal_vq __free(kfree) = NULL; 4099 struct scatterlist sgs; 4100 4101 coal_vq = kzalloc(sizeof(*coal_vq), GFP_KERNEL); 4102 if (!coal_vq) 4103 return -ENOMEM; 4104 4105 coal_vq->vqn = cpu_to_le16(vqn); 4106 coal_vq->coal.max_usecs = cpu_to_le32(max_usecs); 4107 coal_vq->coal.max_packets = cpu_to_le32(max_packets); 4108 sg_init_one(&sgs, coal_vq, sizeof(*coal_vq)); 4109 4110 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 4111 VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET, 4112 &sgs)) 4113 return -EINVAL; 4114 4115 return 0; 4116 } 4117 4118 static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 4119 u16 queue, u32 max_usecs, 4120 u32 max_packets) 4121 { 4122 int err; 4123 4124 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 4125 return -EOPNOTSUPP; 4126 4127 err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), 4128 max_usecs, max_packets); 4129 if (err) 4130 return err; 4131 4132 vi->rq[queue].intr_coal.max_usecs = max_usecs; 4133 vi->rq[queue].intr_coal.max_packets = max_packets; 4134 4135 return 0; 4136 } 4137 4138 static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 4139 u16 queue, u32 max_usecs, 4140 u32 max_packets) 4141 { 4142 int err; 4143 4144 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 4145 return -EOPNOTSUPP; 4146 4147 err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), 4148 max_usecs, max_packets); 4149 if (err) 4150 return err; 4151 4152 vi->sq[queue].intr_coal.max_usecs = max_usecs; 4153 vi->sq[queue].intr_coal.max_packets = max_packets; 4154 4155 return 0; 4156 } 4157 4158 static void virtnet_get_ringparam(struct net_device *dev, 4159 struct ethtool_ringparam *ring, 4160 struct kernel_ethtool_ringparam *kernel_ring, 4161 struct netlink_ext_ack *extack) 4162 { 4163 struct virtnet_info *vi = netdev_priv(dev); 4164 4165 ring->rx_max_pending = vi->rq[0].vq->num_max; 4166 ring->tx_max_pending = vi->sq[0].vq->num_max; 4167 ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 4168 ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 4169 } 4170 4171 static int virtnet_set_ringparam(struct net_device *dev, 4172 struct ethtool_ringparam *ring, 4173 struct kernel_ethtool_ringparam *kernel_ring, 4174 struct netlink_ext_ack *extack) 4175 { 4176 struct virtnet_info *vi = netdev_priv(dev); 4177 u32 rx_pending, tx_pending; 4178 struct receive_queue *rq; 4179 struct send_queue *sq; 4180 int i, err; 4181 4182 if (ring->rx_mini_pending || ring->rx_jumbo_pending) 4183 return -EINVAL; 4184 4185 rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 4186 tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 4187 4188 if (ring->rx_pending == rx_pending && 4189 ring->tx_pending == tx_pending) 4190 return 0; 4191 4192 if (ring->rx_pending > vi->rq[0].vq->num_max) 4193 return -EINVAL; 4194 4195 if (ring->tx_pending > vi->sq[0].vq->num_max) 4196 return -EINVAL; 4197 4198 for (i = 0; i < vi->max_queue_pairs; i++) { 4199 rq = vi->rq + i; 4200 sq = vi->sq + i; 4201 4202 if (ring->tx_pending != tx_pending) { 4203 err = virtnet_tx_resize(vi, sq, ring->tx_pending); 4204 if (err) 4205 return err; 4206 4207 /* Upon disabling and re-enabling a transmit virtqueue, the device must 4208 * set the coalescing parameters of the virtqueue to those configured 4209 * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver 4210 * did not set any TX coalescing parameters, to 0. 4211 */ 4212 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i, 4213 vi->intr_coal_tx.max_usecs, 4214 vi->intr_coal_tx.max_packets); 4215 4216 /* Don't break the tx resize action if the vq coalescing is not 4217 * supported. The same is true for rx resize below. 4218 */ 4219 if (err && err != -EOPNOTSUPP) 4220 return err; 4221 } 4222 4223 if (ring->rx_pending != rx_pending) { 4224 err = virtnet_rx_resize(vi, rq, ring->rx_pending); 4225 if (err) 4226 return err; 4227 4228 /* The reason is same as the transmit virtqueue reset */ 4229 mutex_lock(&vi->rq[i].dim_lock); 4230 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, i, 4231 vi->intr_coal_rx.max_usecs, 4232 vi->intr_coal_rx.max_packets); 4233 mutex_unlock(&vi->rq[i].dim_lock); 4234 if (err && err != -EOPNOTSUPP) 4235 return err; 4236 } 4237 } 4238 4239 return 0; 4240 } 4241 4242 static bool virtnet_commit_rss_command(struct virtnet_info *vi) 4243 { 4244 struct net_device *dev = vi->dev; 4245 struct scatterlist sgs[2]; 4246 4247 /* prepare sgs */ 4248 sg_init_table(sgs, 2); 4249 sg_set_buf(&sgs[0], vi->rss_hdr, virtnet_rss_hdr_size(vi)); 4250 sg_set_buf(&sgs[1], &vi->rss_trailer, virtnet_rss_trailer_size(vi)); 4251 4252 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 4253 vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG 4254 : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) 4255 goto err; 4256 4257 return true; 4258 4259 err: 4260 dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n"); 4261 return false; 4262 4263 } 4264 4265 static void virtnet_init_default_rss(struct virtnet_info *vi) 4266 { 4267 vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_supported); 4268 vi->rss_hash_types_saved = vi->rss_hash_types_supported; 4269 vi->rss_hdr->indirection_table_mask = vi->rss_indir_table_size 4270 ? cpu_to_le16(vi->rss_indir_table_size - 1) : 0; 4271 vi->rss_hdr->unclassified_queue = 0; 4272 4273 virtnet_rss_update_by_qpairs(vi, vi->curr_queue_pairs); 4274 4275 vi->rss_trailer.hash_key_length = vi->rss_key_size; 4276 4277 netdev_rss_key_fill(vi->rss_hash_key_data, vi->rss_key_size); 4278 } 4279 4280 static int virtnet_get_hashflow(struct net_device *dev, 4281 struct ethtool_rxfh_fields *info) 4282 { 4283 struct virtnet_info *vi = netdev_priv(dev); 4284 4285 info->data = 0; 4286 switch (info->flow_type) { 4287 case TCP_V4_FLOW: 4288 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 4289 info->data = RXH_IP_SRC | RXH_IP_DST | 4290 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4291 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 4292 info->data = RXH_IP_SRC | RXH_IP_DST; 4293 } 4294 break; 4295 case TCP_V6_FLOW: 4296 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 4297 info->data = RXH_IP_SRC | RXH_IP_DST | 4298 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4299 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 4300 info->data = RXH_IP_SRC | RXH_IP_DST; 4301 } 4302 break; 4303 case UDP_V4_FLOW: 4304 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 4305 info->data = RXH_IP_SRC | RXH_IP_DST | 4306 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4307 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 4308 info->data = RXH_IP_SRC | RXH_IP_DST; 4309 } 4310 break; 4311 case UDP_V6_FLOW: 4312 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 4313 info->data = RXH_IP_SRC | RXH_IP_DST | 4314 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4315 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 4316 info->data = RXH_IP_SRC | RXH_IP_DST; 4317 } 4318 break; 4319 case IPV4_FLOW: 4320 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) 4321 info->data = RXH_IP_SRC | RXH_IP_DST; 4322 4323 break; 4324 case IPV6_FLOW: 4325 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) 4326 info->data = RXH_IP_SRC | RXH_IP_DST; 4327 4328 break; 4329 default: 4330 info->data = 0; 4331 break; 4332 } 4333 4334 return 0; 4335 } 4336 4337 static int virtnet_set_hashflow(struct net_device *dev, 4338 const struct ethtool_rxfh_fields *info, 4339 struct netlink_ext_ack *extack) 4340 { 4341 struct virtnet_info *vi = netdev_priv(dev); 4342 u32 new_hashtypes = vi->rss_hash_types_saved; 4343 bool is_disable = info->data & RXH_DISCARD; 4344 bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3); 4345 4346 /* supports only 'sd', 'sdfn' and 'r' */ 4347 if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable)) 4348 return -EINVAL; 4349 4350 switch (info->flow_type) { 4351 case TCP_V4_FLOW: 4352 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4); 4353 if (!is_disable) 4354 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 4355 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0); 4356 break; 4357 case UDP_V4_FLOW: 4358 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4); 4359 if (!is_disable) 4360 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 4361 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0); 4362 break; 4363 case IPV4_FLOW: 4364 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4; 4365 if (!is_disable) 4366 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4; 4367 break; 4368 case TCP_V6_FLOW: 4369 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6); 4370 if (!is_disable) 4371 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 4372 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0); 4373 break; 4374 case UDP_V6_FLOW: 4375 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6); 4376 if (!is_disable) 4377 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 4378 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0); 4379 break; 4380 case IPV6_FLOW: 4381 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6; 4382 if (!is_disable) 4383 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6; 4384 break; 4385 default: 4386 /* unsupported flow */ 4387 return -EINVAL; 4388 } 4389 4390 /* if unsupported hashtype was set */ 4391 if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported)) 4392 return -EINVAL; 4393 4394 if (new_hashtypes != vi->rss_hash_types_saved) { 4395 vi->rss_hash_types_saved = new_hashtypes; 4396 vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_saved); 4397 if (vi->dev->features & NETIF_F_RXHASH) 4398 if (!virtnet_commit_rss_command(vi)) 4399 return -EINVAL; 4400 } 4401 4402 return 0; 4403 } 4404 4405 static void virtnet_get_drvinfo(struct net_device *dev, 4406 struct ethtool_drvinfo *info) 4407 { 4408 struct virtnet_info *vi = netdev_priv(dev); 4409 struct virtio_device *vdev = vi->vdev; 4410 4411 strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); 4412 strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version)); 4413 strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info)); 4414 4415 } 4416 4417 /* TODO: Eliminate OOO packets during switching */ 4418 static int virtnet_set_channels(struct net_device *dev, 4419 struct ethtool_channels *channels) 4420 { 4421 struct virtnet_info *vi = netdev_priv(dev); 4422 u16 queue_pairs = channels->combined_count; 4423 int err; 4424 4425 /* We don't support separate rx/tx channels. 4426 * We don't allow setting 'other' channels. 4427 */ 4428 if (channels->rx_count || channels->tx_count || channels->other_count) 4429 return -EINVAL; 4430 4431 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) 4432 return -EINVAL; 4433 4434 /* For now we don't support modifying channels while XDP is loaded 4435 * also when XDP is loaded all RX queues have XDP programs so we only 4436 * need to check a single RX queue. 4437 */ 4438 if (vi->rq[0].xdp_prog) 4439 return -EINVAL; 4440 4441 cpus_read_lock(); 4442 err = virtnet_set_queues(vi, queue_pairs); 4443 if (err) { 4444 cpus_read_unlock(); 4445 goto err; 4446 } 4447 virtnet_set_affinity(vi); 4448 cpus_read_unlock(); 4449 4450 netif_set_real_num_tx_queues(dev, queue_pairs); 4451 netif_set_real_num_rx_queues(dev, queue_pairs); 4452 err: 4453 return err; 4454 } 4455 4456 static void virtnet_stats_sprintf(u8 **p, const char *fmt, const char *noq_fmt, 4457 int num, int qid, const struct virtnet_stat_desc *desc) 4458 { 4459 int i; 4460 4461 if (qid < 0) { 4462 for (i = 0; i < num; ++i) 4463 ethtool_sprintf(p, noq_fmt, desc[i].desc); 4464 } else { 4465 for (i = 0; i < num; ++i) 4466 ethtool_sprintf(p, fmt, qid, desc[i].desc); 4467 } 4468 } 4469 4470 /* qid == -1: for rx/tx queue total field */ 4471 static void virtnet_get_stats_string(struct virtnet_info *vi, int type, int qid, u8 **data) 4472 { 4473 const struct virtnet_stat_desc *desc; 4474 const char *fmt, *noq_fmt; 4475 u8 *p = *data; 4476 u32 num; 4477 4478 if (type == VIRTNET_Q_TYPE_CQ && qid >= 0) { 4479 noq_fmt = "cq_hw_%s"; 4480 4481 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4482 desc = &virtnet_stats_cvq_desc[0]; 4483 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4484 4485 virtnet_stats_sprintf(&p, NULL, noq_fmt, num, -1, desc); 4486 } 4487 } 4488 4489 if (type == VIRTNET_Q_TYPE_RX) { 4490 fmt = "rx%u_%s"; 4491 noq_fmt = "rx_%s"; 4492 4493 desc = &virtnet_rq_stats_desc[0]; 4494 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4495 4496 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4497 4498 fmt = "rx%u_hw_%s"; 4499 noq_fmt = "rx_hw_%s"; 4500 4501 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4502 desc = &virtnet_stats_rx_basic_desc[0]; 4503 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4504 4505 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4506 } 4507 4508 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4509 desc = &virtnet_stats_rx_csum_desc[0]; 4510 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4511 4512 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4513 } 4514 4515 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4516 desc = &virtnet_stats_rx_speed_desc[0]; 4517 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4518 4519 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4520 } 4521 } 4522 4523 if (type == VIRTNET_Q_TYPE_TX) { 4524 fmt = "tx%u_%s"; 4525 noq_fmt = "tx_%s"; 4526 4527 desc = &virtnet_sq_stats_desc[0]; 4528 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4529 4530 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4531 4532 fmt = "tx%u_hw_%s"; 4533 noq_fmt = "tx_hw_%s"; 4534 4535 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4536 desc = &virtnet_stats_tx_basic_desc[0]; 4537 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4538 4539 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4540 } 4541 4542 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4543 desc = &virtnet_stats_tx_gso_desc[0]; 4544 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4545 4546 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4547 } 4548 4549 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4550 desc = &virtnet_stats_tx_speed_desc[0]; 4551 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4552 4553 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4554 } 4555 } 4556 4557 *data = p; 4558 } 4559 4560 struct virtnet_stats_ctx { 4561 /* The stats are write to qstats or ethtool -S */ 4562 bool to_qstat; 4563 4564 /* Used to calculate the offset inside the output buffer. */ 4565 u32 desc_num[3]; 4566 4567 /* The actual supported stat types. */ 4568 u64 bitmap[3]; 4569 4570 /* Used to calculate the reply buffer size. */ 4571 u32 size[3]; 4572 4573 /* Record the output buffer. */ 4574 u64 *data; 4575 }; 4576 4577 static void virtnet_stats_ctx_init(struct virtnet_info *vi, 4578 struct virtnet_stats_ctx *ctx, 4579 u64 *data, bool to_qstat) 4580 { 4581 u32 queue_type; 4582 4583 ctx->data = data; 4584 ctx->to_qstat = to_qstat; 4585 4586 if (to_qstat) { 4587 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4588 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4589 4590 queue_type = VIRTNET_Q_TYPE_RX; 4591 4592 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4593 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4594 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4595 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4596 } 4597 4598 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4599 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4600 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4601 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4602 } 4603 4604 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4605 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_GSO; 4606 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4607 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_gso); 4608 } 4609 4610 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4611 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4612 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4613 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4614 } 4615 4616 queue_type = VIRTNET_Q_TYPE_TX; 4617 4618 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4619 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4620 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4621 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4622 } 4623 4624 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4625 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_CSUM; 4626 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4627 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_csum); 4628 } 4629 4630 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4631 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4632 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4633 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4634 } 4635 4636 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4637 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4638 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4639 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4640 } 4641 4642 return; 4643 } 4644 4645 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc); 4646 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc); 4647 4648 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4649 queue_type = VIRTNET_Q_TYPE_CQ; 4650 4651 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_CVQ; 4652 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_cvq_desc); 4653 ctx->size[queue_type] += sizeof(struct virtio_net_stats_cvq); 4654 } 4655 4656 queue_type = VIRTNET_Q_TYPE_RX; 4657 4658 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4659 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4660 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4661 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4662 } 4663 4664 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4665 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4666 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4667 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4668 } 4669 4670 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4671 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4672 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4673 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4674 } 4675 4676 queue_type = VIRTNET_Q_TYPE_TX; 4677 4678 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4679 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4680 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4681 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4682 } 4683 4684 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4685 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4686 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4687 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4688 } 4689 4690 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4691 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4692 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4693 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4694 } 4695 } 4696 4697 /* stats_sum_queue - Calculate the sum of the same fields in sq or rq. 4698 * @sum: the position to store the sum values 4699 * @num: field num 4700 * @q_value: the first queue fields 4701 * @q_num: number of the queues 4702 */ 4703 static void stats_sum_queue(u64 *sum, u32 num, u64 *q_value, u32 q_num) 4704 { 4705 u32 step = num; 4706 int i, j; 4707 u64 *p; 4708 4709 for (i = 0; i < num; ++i) { 4710 p = sum + i; 4711 *p = 0; 4712 4713 for (j = 0; j < q_num; ++j) 4714 *p += *(q_value + i + j * step); 4715 } 4716 } 4717 4718 static void virtnet_fill_total_fields(struct virtnet_info *vi, 4719 struct virtnet_stats_ctx *ctx) 4720 { 4721 u64 *data, *first_rx_q, *first_tx_q; 4722 u32 num_cq, num_rx, num_tx; 4723 4724 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4725 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4726 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4727 4728 first_rx_q = ctx->data + num_rx + num_tx + num_cq; 4729 first_tx_q = first_rx_q + vi->curr_queue_pairs * num_rx; 4730 4731 data = ctx->data; 4732 4733 stats_sum_queue(data, num_rx, first_rx_q, vi->curr_queue_pairs); 4734 4735 data = ctx->data + num_rx; 4736 4737 stats_sum_queue(data, num_tx, first_tx_q, vi->curr_queue_pairs); 4738 } 4739 4740 static void virtnet_fill_stats_qstat(struct virtnet_info *vi, u32 qid, 4741 struct virtnet_stats_ctx *ctx, 4742 const u8 *base, bool drv_stats, u8 reply_type) 4743 { 4744 const struct virtnet_stat_desc *desc; 4745 const u64_stats_t *v_stat; 4746 u64 offset, bitmap; 4747 const __le64 *v; 4748 u32 queue_type; 4749 int i, num; 4750 4751 queue_type = vq_type(vi, qid); 4752 bitmap = ctx->bitmap[queue_type]; 4753 4754 if (drv_stats) { 4755 if (queue_type == VIRTNET_Q_TYPE_RX) { 4756 desc = &virtnet_rq_stats_desc_qstat[0]; 4757 num = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4758 } else { 4759 desc = &virtnet_sq_stats_desc_qstat[0]; 4760 num = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4761 } 4762 4763 for (i = 0; i < num; ++i) { 4764 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4765 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4766 ctx->data[offset] = u64_stats_read(v_stat); 4767 } 4768 return; 4769 } 4770 4771 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4772 desc = &virtnet_stats_rx_basic_desc_qstat[0]; 4773 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4774 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4775 goto found; 4776 } 4777 4778 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4779 desc = &virtnet_stats_rx_csum_desc_qstat[0]; 4780 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4781 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4782 goto found; 4783 } 4784 4785 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4786 desc = &virtnet_stats_rx_gso_desc_qstat[0]; 4787 num = ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4788 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO) 4789 goto found; 4790 } 4791 4792 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4793 desc = &virtnet_stats_rx_speed_desc_qstat[0]; 4794 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4795 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4796 goto found; 4797 } 4798 4799 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4800 desc = &virtnet_stats_tx_basic_desc_qstat[0]; 4801 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4802 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4803 goto found; 4804 } 4805 4806 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4807 desc = &virtnet_stats_tx_csum_desc_qstat[0]; 4808 num = ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4809 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM) 4810 goto found; 4811 } 4812 4813 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4814 desc = &virtnet_stats_tx_gso_desc_qstat[0]; 4815 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4816 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4817 goto found; 4818 } 4819 4820 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4821 desc = &virtnet_stats_tx_speed_desc_qstat[0]; 4822 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4823 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4824 goto found; 4825 } 4826 4827 return; 4828 4829 found: 4830 for (i = 0; i < num; ++i) { 4831 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4832 v = (const __le64 *)(base + desc[i].offset); 4833 ctx->data[offset] = le64_to_cpu(*v); 4834 } 4835 } 4836 4837 /* virtnet_fill_stats - copy the stats to qstats or ethtool -S 4838 * The stats source is the device or the driver. 4839 * 4840 * @vi: virtio net info 4841 * @qid: the vq id 4842 * @ctx: stats ctx (initiated by virtnet_stats_ctx_init()) 4843 * @base: pointer to the device reply or the driver stats structure. 4844 * @drv_stats: designate the base type (device reply, driver stats) 4845 * @type: the type of the device reply (if drv_stats is true, this must be zero) 4846 */ 4847 static void virtnet_fill_stats(struct virtnet_info *vi, u32 qid, 4848 struct virtnet_stats_ctx *ctx, 4849 const u8 *base, bool drv_stats, u8 reply_type) 4850 { 4851 u32 queue_type, num_rx, num_tx, num_cq; 4852 const struct virtnet_stat_desc *desc; 4853 const u64_stats_t *v_stat; 4854 u64 offset, bitmap; 4855 const __le64 *v; 4856 int i, num; 4857 4858 if (ctx->to_qstat) 4859 return virtnet_fill_stats_qstat(vi, qid, ctx, base, drv_stats, reply_type); 4860 4861 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4862 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4863 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4864 4865 queue_type = vq_type(vi, qid); 4866 bitmap = ctx->bitmap[queue_type]; 4867 4868 /* skip the total fields of pairs */ 4869 offset = num_rx + num_tx; 4870 4871 if (queue_type == VIRTNET_Q_TYPE_TX) { 4872 offset += num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2); 4873 4874 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4875 if (drv_stats) { 4876 desc = &virtnet_sq_stats_desc[0]; 4877 goto drv_stats; 4878 } 4879 4880 offset += num; 4881 4882 } else if (queue_type == VIRTNET_Q_TYPE_RX) { 4883 offset += num_cq + num_rx * (qid / 2); 4884 4885 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4886 if (drv_stats) { 4887 desc = &virtnet_rq_stats_desc[0]; 4888 goto drv_stats; 4889 } 4890 4891 offset += num; 4892 } 4893 4894 if (bitmap & VIRTIO_NET_STATS_TYPE_CVQ) { 4895 desc = &virtnet_stats_cvq_desc[0]; 4896 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4897 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_CVQ) 4898 goto found; 4899 4900 offset += num; 4901 } 4902 4903 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4904 desc = &virtnet_stats_rx_basic_desc[0]; 4905 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4906 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4907 goto found; 4908 4909 offset += num; 4910 } 4911 4912 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4913 desc = &virtnet_stats_rx_csum_desc[0]; 4914 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4915 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4916 goto found; 4917 4918 offset += num; 4919 } 4920 4921 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4922 desc = &virtnet_stats_rx_speed_desc[0]; 4923 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4924 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4925 goto found; 4926 4927 offset += num; 4928 } 4929 4930 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4931 desc = &virtnet_stats_tx_basic_desc[0]; 4932 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4933 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4934 goto found; 4935 4936 offset += num; 4937 } 4938 4939 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4940 desc = &virtnet_stats_tx_gso_desc[0]; 4941 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4942 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4943 goto found; 4944 4945 offset += num; 4946 } 4947 4948 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4949 desc = &virtnet_stats_tx_speed_desc[0]; 4950 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4951 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4952 goto found; 4953 4954 offset += num; 4955 } 4956 4957 return; 4958 4959 found: 4960 for (i = 0; i < num; ++i) { 4961 v = (const __le64 *)(base + desc[i].offset); 4962 ctx->data[offset + i] = le64_to_cpu(*v); 4963 } 4964 4965 return; 4966 4967 drv_stats: 4968 for (i = 0; i < num; ++i) { 4969 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4970 ctx->data[offset + i] = u64_stats_read(v_stat); 4971 } 4972 } 4973 4974 static int __virtnet_get_hw_stats(struct virtnet_info *vi, 4975 struct virtnet_stats_ctx *ctx, 4976 struct virtio_net_ctrl_queue_stats *req, 4977 int req_size, void *reply, int res_size) 4978 { 4979 struct virtio_net_stats_reply_hdr *hdr; 4980 struct scatterlist sgs_in, sgs_out; 4981 void *p; 4982 u32 qid; 4983 int ok; 4984 4985 sg_init_one(&sgs_out, req, req_size); 4986 sg_init_one(&sgs_in, reply, res_size); 4987 4988 ok = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 4989 VIRTIO_NET_CTRL_STATS_GET, 4990 &sgs_out, &sgs_in); 4991 4992 if (!ok) 4993 return ok; 4994 4995 for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) { 4996 hdr = p; 4997 qid = le16_to_cpu(hdr->vq_index); 4998 virtnet_fill_stats(vi, qid, ctx, p, false, hdr->type); 4999 } 5000 5001 return 0; 5002 } 5003 5004 static void virtnet_make_stat_req(struct virtnet_info *vi, 5005 struct virtnet_stats_ctx *ctx, 5006 struct virtio_net_ctrl_queue_stats *req, 5007 int qid, int *idx) 5008 { 5009 int qtype = vq_type(vi, qid); 5010 u64 bitmap = ctx->bitmap[qtype]; 5011 5012 if (!bitmap) 5013 return; 5014 5015 req->stats[*idx].vq_index = cpu_to_le16(qid); 5016 req->stats[*idx].types_bitmap[0] = cpu_to_le64(bitmap); 5017 *idx += 1; 5018 } 5019 5020 /* qid: -1: get stats of all vq. 5021 * > 0: get the stats for the special vq. This must not be cvq. 5022 */ 5023 static int virtnet_get_hw_stats(struct virtnet_info *vi, 5024 struct virtnet_stats_ctx *ctx, int qid) 5025 { 5026 int qnum, i, j, res_size, qtype, last_vq, first_vq; 5027 struct virtio_net_ctrl_queue_stats *req; 5028 bool enable_cvq; 5029 void *reply; 5030 int ok; 5031 5032 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) 5033 return 0; 5034 5035 if (qid == -1) { 5036 last_vq = vi->curr_queue_pairs * 2 - 1; 5037 first_vq = 0; 5038 enable_cvq = true; 5039 } else { 5040 last_vq = qid; 5041 first_vq = qid; 5042 enable_cvq = false; 5043 } 5044 5045 qnum = 0; 5046 res_size = 0; 5047 for (i = first_vq; i <= last_vq ; ++i) { 5048 qtype = vq_type(vi, i); 5049 if (ctx->bitmap[qtype]) { 5050 ++qnum; 5051 res_size += ctx->size[qtype]; 5052 } 5053 } 5054 5055 if (enable_cvq && ctx->bitmap[VIRTNET_Q_TYPE_CQ]) { 5056 res_size += ctx->size[VIRTNET_Q_TYPE_CQ]; 5057 qnum += 1; 5058 } 5059 5060 req = kcalloc(qnum, sizeof(*req), GFP_KERNEL); 5061 if (!req) 5062 return -ENOMEM; 5063 5064 reply = kmalloc(res_size, GFP_KERNEL); 5065 if (!reply) { 5066 kfree(req); 5067 return -ENOMEM; 5068 } 5069 5070 j = 0; 5071 for (i = first_vq; i <= last_vq ; ++i) 5072 virtnet_make_stat_req(vi, ctx, req, i, &j); 5073 5074 if (enable_cvq) 5075 virtnet_make_stat_req(vi, ctx, req, vi->max_queue_pairs * 2, &j); 5076 5077 ok = __virtnet_get_hw_stats(vi, ctx, req, sizeof(*req) * j, reply, res_size); 5078 5079 kfree(req); 5080 kfree(reply); 5081 5082 return ok; 5083 } 5084 5085 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) 5086 { 5087 struct virtnet_info *vi = netdev_priv(dev); 5088 unsigned int i; 5089 u8 *p = data; 5090 5091 switch (stringset) { 5092 case ETH_SS_STATS: 5093 /* Generate the total field names. */ 5094 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, -1, &p); 5095 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, -1, &p); 5096 5097 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_CQ, 0, &p); 5098 5099 for (i = 0; i < vi->curr_queue_pairs; ++i) 5100 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, i, &p); 5101 5102 for (i = 0; i < vi->curr_queue_pairs; ++i) 5103 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, i, &p); 5104 break; 5105 } 5106 } 5107 5108 static int virtnet_get_sset_count(struct net_device *dev, int sset) 5109 { 5110 struct virtnet_info *vi = netdev_priv(dev); 5111 struct virtnet_stats_ctx ctx = {0}; 5112 u32 pair_count; 5113 5114 switch (sset) { 5115 case ETH_SS_STATS: 5116 virtnet_stats_ctx_init(vi, &ctx, NULL, false); 5117 5118 pair_count = ctx.desc_num[VIRTNET_Q_TYPE_RX] + ctx.desc_num[VIRTNET_Q_TYPE_TX]; 5119 5120 return pair_count + ctx.desc_num[VIRTNET_Q_TYPE_CQ] + 5121 vi->curr_queue_pairs * pair_count; 5122 default: 5123 return -EOPNOTSUPP; 5124 } 5125 } 5126 5127 static void virtnet_get_ethtool_stats(struct net_device *dev, 5128 struct ethtool_stats *stats, u64 *data) 5129 { 5130 struct virtnet_info *vi = netdev_priv(dev); 5131 struct virtnet_stats_ctx ctx = {0}; 5132 unsigned int start, i; 5133 const u8 *stats_base; 5134 5135 virtnet_stats_ctx_init(vi, &ctx, data, false); 5136 if (virtnet_get_hw_stats(vi, &ctx, -1)) 5137 dev_warn(&vi->dev->dev, "Failed to get hw stats.\n"); 5138 5139 for (i = 0; i < vi->curr_queue_pairs; i++) { 5140 struct receive_queue *rq = &vi->rq[i]; 5141 struct send_queue *sq = &vi->sq[i]; 5142 5143 stats_base = (const u8 *)&rq->stats; 5144 do { 5145 start = u64_stats_fetch_begin(&rq->stats.syncp); 5146 virtnet_fill_stats(vi, i * 2, &ctx, stats_base, true, 0); 5147 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 5148 5149 stats_base = (const u8 *)&sq->stats; 5150 do { 5151 start = u64_stats_fetch_begin(&sq->stats.syncp); 5152 virtnet_fill_stats(vi, i * 2 + 1, &ctx, stats_base, true, 0); 5153 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 5154 } 5155 5156 virtnet_fill_total_fields(vi, &ctx); 5157 } 5158 5159 static void virtnet_get_channels(struct net_device *dev, 5160 struct ethtool_channels *channels) 5161 { 5162 struct virtnet_info *vi = netdev_priv(dev); 5163 5164 channels->combined_count = vi->curr_queue_pairs; 5165 channels->max_combined = vi->max_queue_pairs; 5166 channels->max_other = 0; 5167 channels->rx_count = 0; 5168 channels->tx_count = 0; 5169 channels->other_count = 0; 5170 } 5171 5172 static int virtnet_set_link_ksettings(struct net_device *dev, 5173 const struct ethtool_link_ksettings *cmd) 5174 { 5175 struct virtnet_info *vi = netdev_priv(dev); 5176 5177 return ethtool_virtdev_set_link_ksettings(dev, cmd, 5178 &vi->speed, &vi->duplex); 5179 } 5180 5181 static int virtnet_get_link_ksettings(struct net_device *dev, 5182 struct ethtool_link_ksettings *cmd) 5183 { 5184 struct virtnet_info *vi = netdev_priv(dev); 5185 5186 cmd->base.speed = vi->speed; 5187 cmd->base.duplex = vi->duplex; 5188 cmd->base.port = PORT_OTHER; 5189 5190 return 0; 5191 } 5192 5193 static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info *vi, 5194 struct ethtool_coalesce *ec) 5195 { 5196 struct virtio_net_ctrl_coal_tx *coal_tx __free(kfree) = NULL; 5197 struct scatterlist sgs_tx; 5198 int i; 5199 5200 coal_tx = kzalloc(sizeof(*coal_tx), GFP_KERNEL); 5201 if (!coal_tx) 5202 return -ENOMEM; 5203 5204 coal_tx->tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); 5205 coal_tx->tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); 5206 sg_init_one(&sgs_tx, coal_tx, sizeof(*coal_tx)); 5207 5208 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 5209 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, 5210 &sgs_tx)) 5211 return -EINVAL; 5212 5213 vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs; 5214 vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames; 5215 for (i = 0; i < vi->max_queue_pairs; i++) { 5216 vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs; 5217 vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames; 5218 } 5219 5220 return 0; 5221 } 5222 5223 static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi, 5224 struct ethtool_coalesce *ec) 5225 { 5226 struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL; 5227 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 5228 struct scatterlist sgs_rx; 5229 int i; 5230 5231 if (rx_ctrl_dim_on && !virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5232 return -EOPNOTSUPP; 5233 5234 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != vi->intr_coal_rx.max_usecs || 5235 ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets)) 5236 return -EINVAL; 5237 5238 if (rx_ctrl_dim_on && !vi->rx_dim_enabled) { 5239 vi->rx_dim_enabled = true; 5240 for (i = 0; i < vi->max_queue_pairs; i++) { 5241 mutex_lock(&vi->rq[i].dim_lock); 5242 vi->rq[i].dim_enabled = true; 5243 mutex_unlock(&vi->rq[i].dim_lock); 5244 } 5245 return 0; 5246 } 5247 5248 coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL); 5249 if (!coal_rx) 5250 return -ENOMEM; 5251 5252 if (!rx_ctrl_dim_on && vi->rx_dim_enabled) { 5253 vi->rx_dim_enabled = false; 5254 for (i = 0; i < vi->max_queue_pairs; i++) { 5255 mutex_lock(&vi->rq[i].dim_lock); 5256 vi->rq[i].dim_enabled = false; 5257 mutex_unlock(&vi->rq[i].dim_lock); 5258 } 5259 } 5260 5261 /* Since the per-queue coalescing params can be set, 5262 * we need apply the global new params even if they 5263 * are not updated. 5264 */ 5265 coal_rx->rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); 5266 coal_rx->rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); 5267 sg_init_one(&sgs_rx, coal_rx, sizeof(*coal_rx)); 5268 5269 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 5270 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, 5271 &sgs_rx)) 5272 return -EINVAL; 5273 5274 vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs; 5275 vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames; 5276 for (i = 0; i < vi->max_queue_pairs; i++) { 5277 mutex_lock(&vi->rq[i].dim_lock); 5278 vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs; 5279 vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames; 5280 mutex_unlock(&vi->rq[i].dim_lock); 5281 } 5282 5283 return 0; 5284 } 5285 5286 static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, 5287 struct ethtool_coalesce *ec) 5288 { 5289 int err; 5290 5291 err = virtnet_send_tx_notf_coal_cmds(vi, ec); 5292 if (err) 5293 return err; 5294 5295 err = virtnet_send_rx_notf_coal_cmds(vi, ec); 5296 if (err) 5297 return err; 5298 5299 return 0; 5300 } 5301 5302 static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi, 5303 struct ethtool_coalesce *ec, 5304 u16 queue) 5305 { 5306 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 5307 u32 max_usecs, max_packets; 5308 bool cur_rx_dim; 5309 int err; 5310 5311 mutex_lock(&vi->rq[queue].dim_lock); 5312 cur_rx_dim = vi->rq[queue].dim_enabled; 5313 max_usecs = vi->rq[queue].intr_coal.max_usecs; 5314 max_packets = vi->rq[queue].intr_coal.max_packets; 5315 5316 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != max_usecs || 5317 ec->rx_max_coalesced_frames != max_packets)) { 5318 mutex_unlock(&vi->rq[queue].dim_lock); 5319 return -EINVAL; 5320 } 5321 5322 if (rx_ctrl_dim_on && !cur_rx_dim) { 5323 vi->rq[queue].dim_enabled = true; 5324 mutex_unlock(&vi->rq[queue].dim_lock); 5325 return 0; 5326 } 5327 5328 if (!rx_ctrl_dim_on && cur_rx_dim) 5329 vi->rq[queue].dim_enabled = false; 5330 5331 /* If no params are updated, userspace ethtool will 5332 * reject the modification. 5333 */ 5334 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue, 5335 ec->rx_coalesce_usecs, 5336 ec->rx_max_coalesced_frames); 5337 mutex_unlock(&vi->rq[queue].dim_lock); 5338 return err; 5339 } 5340 5341 static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi, 5342 struct ethtool_coalesce *ec, 5343 u16 queue) 5344 { 5345 int err; 5346 5347 err = virtnet_send_rx_notf_coal_vq_cmds(vi, ec, queue); 5348 if (err) 5349 return err; 5350 5351 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue, 5352 ec->tx_coalesce_usecs, 5353 ec->tx_max_coalesced_frames); 5354 if (err) 5355 return err; 5356 5357 return 0; 5358 } 5359 5360 static void virtnet_rx_dim_work(struct work_struct *work) 5361 { 5362 struct dim *dim = container_of(work, struct dim, work); 5363 struct receive_queue *rq = container_of(dim, 5364 struct receive_queue, dim); 5365 struct virtnet_info *vi = rq->vq->vdev->priv; 5366 struct net_device *dev = vi->dev; 5367 struct dim_cq_moder update_moder; 5368 int qnum, err; 5369 5370 qnum = rq - vi->rq; 5371 5372 mutex_lock(&rq->dim_lock); 5373 if (!rq->dim_enabled) 5374 goto out; 5375 5376 update_moder = net_dim_get_rx_irq_moder(dev, dim); 5377 if (update_moder.usec != rq->intr_coal.max_usecs || 5378 update_moder.pkts != rq->intr_coal.max_packets) { 5379 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum, 5380 update_moder.usec, 5381 update_moder.pkts); 5382 if (err) 5383 pr_debug("%s: Failed to send dim parameters on rxq%d\n", 5384 dev->name, qnum); 5385 } 5386 out: 5387 dim->state = DIM_START_MEASURE; 5388 mutex_unlock(&rq->dim_lock); 5389 } 5390 5391 static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) 5392 { 5393 /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL 5394 * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated. 5395 */ 5396 if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) 5397 return -EOPNOTSUPP; 5398 5399 if (ec->tx_max_coalesced_frames > 1 || 5400 ec->rx_max_coalesced_frames != 1) 5401 return -EINVAL; 5402 5403 return 0; 5404 } 5405 5406 static int virtnet_should_update_vq_weight(int dev_flags, int weight, 5407 int vq_weight, bool *should_update) 5408 { 5409 if (weight ^ vq_weight) { 5410 if (dev_flags & IFF_UP) 5411 return -EBUSY; 5412 *should_update = true; 5413 } 5414 5415 return 0; 5416 } 5417 5418 static int virtnet_set_coalesce(struct net_device *dev, 5419 struct ethtool_coalesce *ec, 5420 struct kernel_ethtool_coalesce *kernel_coal, 5421 struct netlink_ext_ack *extack) 5422 { 5423 struct virtnet_info *vi = netdev_priv(dev); 5424 int ret, queue_number, napi_weight, i; 5425 bool update_napi = false; 5426 5427 /* Can't change NAPI weight if the link is up */ 5428 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5429 for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) { 5430 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5431 vi->sq[queue_number].napi.weight, 5432 &update_napi); 5433 if (ret) 5434 return ret; 5435 5436 if (update_napi) { 5437 /* All queues that belong to [queue_number, vi->max_queue_pairs] will be 5438 * updated for the sake of simplicity, which might not be necessary 5439 */ 5440 break; 5441 } 5442 } 5443 5444 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) 5445 ret = virtnet_send_notf_coal_cmds(vi, ec); 5446 else 5447 ret = virtnet_coal_params_supported(ec); 5448 5449 if (ret) 5450 return ret; 5451 5452 if (update_napi) { 5453 /* xsk xmit depends on the tx napi. So if xsk is active, 5454 * prevent modifications to tx napi. 5455 */ 5456 for (i = queue_number; i < vi->max_queue_pairs; i++) { 5457 if (vi->sq[i].xsk_pool) 5458 return -EBUSY; 5459 } 5460 5461 for (; queue_number < vi->max_queue_pairs; queue_number++) 5462 vi->sq[queue_number].napi.weight = napi_weight; 5463 } 5464 5465 return ret; 5466 } 5467 5468 static int virtnet_get_coalesce(struct net_device *dev, 5469 struct ethtool_coalesce *ec, 5470 struct kernel_ethtool_coalesce *kernel_coal, 5471 struct netlink_ext_ack *extack) 5472 { 5473 struct virtnet_info *vi = netdev_priv(dev); 5474 5475 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 5476 ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs; 5477 ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs; 5478 ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets; 5479 ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets; 5480 ec->use_adaptive_rx_coalesce = vi->rx_dim_enabled; 5481 } else { 5482 ec->rx_max_coalesced_frames = 1; 5483 5484 if (vi->sq[0].napi.weight) 5485 ec->tx_max_coalesced_frames = 1; 5486 } 5487 5488 return 0; 5489 } 5490 5491 static int virtnet_set_per_queue_coalesce(struct net_device *dev, 5492 u32 queue, 5493 struct ethtool_coalesce *ec) 5494 { 5495 struct virtnet_info *vi = netdev_priv(dev); 5496 int ret, napi_weight; 5497 bool update_napi = false; 5498 5499 if (queue >= vi->max_queue_pairs) 5500 return -EINVAL; 5501 5502 /* Can't change NAPI weight if the link is up */ 5503 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5504 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5505 vi->sq[queue].napi.weight, 5506 &update_napi); 5507 if (ret) 5508 return ret; 5509 5510 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5511 ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue); 5512 else 5513 ret = virtnet_coal_params_supported(ec); 5514 5515 if (ret) 5516 return ret; 5517 5518 if (update_napi) 5519 vi->sq[queue].napi.weight = napi_weight; 5520 5521 return 0; 5522 } 5523 5524 static int virtnet_get_per_queue_coalesce(struct net_device *dev, 5525 u32 queue, 5526 struct ethtool_coalesce *ec) 5527 { 5528 struct virtnet_info *vi = netdev_priv(dev); 5529 5530 if (queue >= vi->max_queue_pairs) 5531 return -EINVAL; 5532 5533 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 5534 mutex_lock(&vi->rq[queue].dim_lock); 5535 ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs; 5536 ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs; 5537 ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets; 5538 ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets; 5539 ec->use_adaptive_rx_coalesce = vi->rq[queue].dim_enabled; 5540 mutex_unlock(&vi->rq[queue].dim_lock); 5541 } else { 5542 ec->rx_max_coalesced_frames = 1; 5543 5544 if (vi->sq[queue].napi.weight) 5545 ec->tx_max_coalesced_frames = 1; 5546 } 5547 5548 return 0; 5549 } 5550 5551 static void virtnet_init_settings(struct net_device *dev) 5552 { 5553 struct virtnet_info *vi = netdev_priv(dev); 5554 5555 vi->speed = SPEED_UNKNOWN; 5556 vi->duplex = DUPLEX_UNKNOWN; 5557 } 5558 5559 static u32 virtnet_get_rxfh_key_size(struct net_device *dev) 5560 { 5561 return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size; 5562 } 5563 5564 static u32 virtnet_get_rxfh_indir_size(struct net_device *dev) 5565 { 5566 return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size; 5567 } 5568 5569 static int virtnet_get_rxfh(struct net_device *dev, 5570 struct ethtool_rxfh_param *rxfh) 5571 { 5572 struct virtnet_info *vi = netdev_priv(dev); 5573 int i; 5574 5575 if (rxfh->indir) { 5576 for (i = 0; i < vi->rss_indir_table_size; ++i) 5577 rxfh->indir[i] = le16_to_cpu(vi->rss_hdr->indirection_table[i]); 5578 } 5579 5580 if (rxfh->key) 5581 memcpy(rxfh->key, vi->rss_hash_key_data, vi->rss_key_size); 5582 5583 rxfh->hfunc = ETH_RSS_HASH_TOP; 5584 5585 return 0; 5586 } 5587 5588 static int virtnet_set_rxfh(struct net_device *dev, 5589 struct ethtool_rxfh_param *rxfh, 5590 struct netlink_ext_ack *extack) 5591 { 5592 struct virtnet_info *vi = netdev_priv(dev); 5593 bool update = false; 5594 int i; 5595 5596 if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && 5597 rxfh->hfunc != ETH_RSS_HASH_TOP) 5598 return -EOPNOTSUPP; 5599 5600 if (rxfh->indir) { 5601 if (!vi->has_rss) 5602 return -EOPNOTSUPP; 5603 5604 for (i = 0; i < vi->rss_indir_table_size; ++i) 5605 vi->rss_hdr->indirection_table[i] = cpu_to_le16(rxfh->indir[i]); 5606 update = true; 5607 } 5608 5609 if (rxfh->key) { 5610 /* If either _F_HASH_REPORT or _F_RSS are negotiated, the 5611 * device provides hash calculation capabilities, that is, 5612 * hash_key is configured. 5613 */ 5614 if (!vi->has_rss && !vi->has_rss_hash_report) 5615 return -EOPNOTSUPP; 5616 5617 memcpy(vi->rss_hash_key_data, rxfh->key, vi->rss_key_size); 5618 update = true; 5619 } 5620 5621 if (update) 5622 virtnet_commit_rss_command(vi); 5623 5624 return 0; 5625 } 5626 5627 static u32 virtnet_get_rx_ring_count(struct net_device *dev) 5628 { 5629 struct virtnet_info *vi = netdev_priv(dev); 5630 5631 return vi->curr_queue_pairs; 5632 } 5633 5634 static const struct ethtool_ops virtnet_ethtool_ops = { 5635 .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | 5636 ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX, 5637 .get_drvinfo = virtnet_get_drvinfo, 5638 .get_link = ethtool_op_get_link, 5639 .get_ringparam = virtnet_get_ringparam, 5640 .set_ringparam = virtnet_set_ringparam, 5641 .get_strings = virtnet_get_strings, 5642 .get_sset_count = virtnet_get_sset_count, 5643 .get_ethtool_stats = virtnet_get_ethtool_stats, 5644 .set_channels = virtnet_set_channels, 5645 .get_channels = virtnet_get_channels, 5646 .get_ts_info = ethtool_op_get_ts_info, 5647 .get_link_ksettings = virtnet_get_link_ksettings, 5648 .set_link_ksettings = virtnet_set_link_ksettings, 5649 .set_coalesce = virtnet_set_coalesce, 5650 .get_coalesce = virtnet_get_coalesce, 5651 .set_per_queue_coalesce = virtnet_set_per_queue_coalesce, 5652 .get_per_queue_coalesce = virtnet_get_per_queue_coalesce, 5653 .get_rxfh_key_size = virtnet_get_rxfh_key_size, 5654 .get_rxfh_indir_size = virtnet_get_rxfh_indir_size, 5655 .get_rxfh = virtnet_get_rxfh, 5656 .set_rxfh = virtnet_set_rxfh, 5657 .get_rxfh_fields = virtnet_get_hashflow, 5658 .set_rxfh_fields = virtnet_set_hashflow, 5659 .get_rx_ring_count = virtnet_get_rx_ring_count, 5660 }; 5661 5662 static void virtnet_get_queue_stats_rx(struct net_device *dev, int i, 5663 struct netdev_queue_stats_rx *stats) 5664 { 5665 struct virtnet_info *vi = netdev_priv(dev); 5666 struct receive_queue *rq = &vi->rq[i]; 5667 struct virtnet_stats_ctx ctx = {0}; 5668 5669 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5670 5671 virtnet_get_hw_stats(vi, &ctx, i * 2); 5672 virtnet_fill_stats(vi, i * 2, &ctx, (void *)&rq->stats, true, 0); 5673 } 5674 5675 static void virtnet_get_queue_stats_tx(struct net_device *dev, int i, 5676 struct netdev_queue_stats_tx *stats) 5677 { 5678 struct virtnet_info *vi = netdev_priv(dev); 5679 struct send_queue *sq = &vi->sq[i]; 5680 struct virtnet_stats_ctx ctx = {0}; 5681 5682 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5683 5684 virtnet_get_hw_stats(vi, &ctx, i * 2 + 1); 5685 virtnet_fill_stats(vi, i * 2 + 1, &ctx, (void *)&sq->stats, true, 0); 5686 } 5687 5688 static void virtnet_get_base_stats(struct net_device *dev, 5689 struct netdev_queue_stats_rx *rx, 5690 struct netdev_queue_stats_tx *tx) 5691 { 5692 struct virtnet_info *vi = netdev_priv(dev); 5693 5694 /* The queue stats of the virtio-net will not be reset. So here we 5695 * return 0. 5696 */ 5697 rx->bytes = 0; 5698 rx->packets = 0; 5699 5700 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 5701 rx->hw_drops = 0; 5702 rx->hw_drop_overruns = 0; 5703 } 5704 5705 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 5706 rx->csum_unnecessary = 0; 5707 rx->csum_none = 0; 5708 rx->csum_bad = 0; 5709 } 5710 5711 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 5712 rx->hw_gro_packets = 0; 5713 rx->hw_gro_bytes = 0; 5714 rx->hw_gro_wire_packets = 0; 5715 rx->hw_gro_wire_bytes = 0; 5716 } 5717 5718 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) 5719 rx->hw_drop_ratelimits = 0; 5720 5721 tx->bytes = 0; 5722 tx->packets = 0; 5723 tx->stop = 0; 5724 tx->wake = 0; 5725 5726 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 5727 tx->hw_drops = 0; 5728 tx->hw_drop_errors = 0; 5729 } 5730 5731 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 5732 tx->csum_none = 0; 5733 tx->needs_csum = 0; 5734 } 5735 5736 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 5737 tx->hw_gso_packets = 0; 5738 tx->hw_gso_bytes = 0; 5739 tx->hw_gso_wire_packets = 0; 5740 tx->hw_gso_wire_bytes = 0; 5741 } 5742 5743 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) 5744 tx->hw_drop_ratelimits = 0; 5745 5746 netdev_stat_queue_sum(dev, 5747 dev->real_num_rx_queues, vi->max_queue_pairs, rx, 5748 dev->real_num_tx_queues, vi->max_queue_pairs, tx); 5749 } 5750 5751 static const struct netdev_stat_ops virtnet_stat_ops = { 5752 .get_queue_stats_rx = virtnet_get_queue_stats_rx, 5753 .get_queue_stats_tx = virtnet_get_queue_stats_tx, 5754 .get_base_stats = virtnet_get_base_stats, 5755 }; 5756 5757 static void virtnet_freeze_down(struct virtio_device *vdev) 5758 { 5759 struct virtnet_info *vi = vdev->priv; 5760 5761 /* Make sure no work handler is accessing the device */ 5762 flush_work(&vi->config_work); 5763 disable_rx_mode_work(vi); 5764 flush_work(&vi->rx_mode_work); 5765 5766 if (netif_running(vi->dev)) { 5767 rtnl_lock(); 5768 virtnet_close(vi->dev); 5769 rtnl_unlock(); 5770 } 5771 5772 netif_tx_lock_bh(vi->dev); 5773 netif_device_detach(vi->dev); 5774 netif_tx_unlock_bh(vi->dev); 5775 } 5776 5777 static int init_vqs(struct virtnet_info *vi); 5778 5779 static int virtnet_restore_up(struct virtio_device *vdev) 5780 { 5781 struct virtnet_info *vi = vdev->priv; 5782 int err; 5783 5784 err = init_vqs(vi); 5785 if (err) 5786 return err; 5787 5788 virtio_device_ready(vdev); 5789 5790 enable_delayed_refill(vi); 5791 enable_rx_mode_work(vi); 5792 5793 if (netif_running(vi->dev)) { 5794 rtnl_lock(); 5795 err = virtnet_open(vi->dev); 5796 rtnl_unlock(); 5797 if (err) 5798 return err; 5799 } 5800 5801 netif_tx_lock_bh(vi->dev); 5802 netif_device_attach(vi->dev); 5803 netif_tx_unlock_bh(vi->dev); 5804 return err; 5805 } 5806 5807 static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) 5808 { 5809 __virtio64 *_offloads __free(kfree) = NULL; 5810 struct scatterlist sg; 5811 5812 _offloads = kzalloc(sizeof(*_offloads), GFP_KERNEL); 5813 if (!_offloads) 5814 return -ENOMEM; 5815 5816 *_offloads = cpu_to_virtio64(vi->vdev, offloads); 5817 5818 sg_init_one(&sg, _offloads, sizeof(*_offloads)); 5819 5820 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, 5821 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) { 5822 dev_warn(&vi->dev->dev, "Fail to set guest offload.\n"); 5823 return -EINVAL; 5824 } 5825 5826 return 0; 5827 } 5828 5829 static int virtnet_clear_guest_offloads(struct virtnet_info *vi) 5830 { 5831 u64 offloads = 0; 5832 5833 if (!vi->guest_offloads) 5834 return 0; 5835 5836 return virtnet_set_guest_offloads(vi, offloads); 5837 } 5838 5839 static int virtnet_restore_guest_offloads(struct virtnet_info *vi) 5840 { 5841 u64 offloads = vi->guest_offloads; 5842 5843 if (!vi->guest_offloads) 5844 return 0; 5845 5846 return virtnet_set_guest_offloads(vi, offloads); 5847 } 5848 5849 static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queue *rq, 5850 struct xsk_buff_pool *pool) 5851 { 5852 int err, qindex; 5853 5854 qindex = rq - vi->rq; 5855 5856 if (pool) { 5857 err = xdp_rxq_info_reg(&rq->xsk_rxq_info, vi->dev, qindex, rq->napi.napi_id); 5858 if (err < 0) 5859 return err; 5860 5861 err = xdp_rxq_info_reg_mem_model(&rq->xsk_rxq_info, 5862 MEM_TYPE_XSK_BUFF_POOL, NULL); 5863 if (err < 0) 5864 goto unreg; 5865 5866 xsk_pool_set_rxq_info(pool, &rq->xsk_rxq_info); 5867 } 5868 5869 virtnet_rx_pause(vi, rq); 5870 5871 err = virtqueue_reset(rq->vq, virtnet_rq_unmap_free_buf, NULL); 5872 if (err) { 5873 netdev_err(vi->dev, "reset rx fail: rx queue index: %d err: %d\n", qindex, err); 5874 5875 pool = NULL; 5876 } 5877 5878 rq->xsk_pool = pool; 5879 5880 virtnet_rx_resume(vi, rq); 5881 5882 if (pool) 5883 return 0; 5884 5885 unreg: 5886 xdp_rxq_info_unreg(&rq->xsk_rxq_info); 5887 return err; 5888 } 5889 5890 static int virtnet_sq_bind_xsk_pool(struct virtnet_info *vi, 5891 struct send_queue *sq, 5892 struct xsk_buff_pool *pool) 5893 { 5894 int err, qindex; 5895 5896 qindex = sq - vi->sq; 5897 5898 virtnet_tx_pause(vi, sq); 5899 5900 err = virtqueue_reset(sq->vq, virtnet_sq_free_unused_buf, 5901 virtnet_sq_free_unused_buf_done); 5902 if (err) { 5903 netdev_err(vi->dev, "reset tx fail: tx queue index: %d err: %d\n", qindex, err); 5904 pool = NULL; 5905 } 5906 5907 sq->xsk_pool = pool; 5908 5909 virtnet_tx_resume(vi, sq); 5910 5911 return err; 5912 } 5913 5914 static int virtnet_xsk_pool_enable(struct net_device *dev, 5915 struct xsk_buff_pool *pool, 5916 u16 qid) 5917 { 5918 struct virtnet_info *vi = netdev_priv(dev); 5919 struct receive_queue *rq; 5920 struct device *dma_dev; 5921 struct send_queue *sq; 5922 dma_addr_t hdr_dma; 5923 int err, size; 5924 5925 if (vi->hdr_len > xsk_pool_get_headroom(pool)) 5926 return -EINVAL; 5927 5928 /* In big_packets mode, xdp cannot work, so there is no need to 5929 * initialize xsk of rq. 5930 */ 5931 if (vi->big_packets && !vi->mergeable_rx_bufs) 5932 return -ENOENT; 5933 5934 if (qid >= vi->curr_queue_pairs) 5935 return -EINVAL; 5936 5937 sq = &vi->sq[qid]; 5938 rq = &vi->rq[qid]; 5939 5940 /* xsk assumes that tx and rx must have the same dma device. The af-xdp 5941 * may use one buffer to receive from the rx and reuse this buffer to 5942 * send by the tx. So the dma dev of sq and rq must be the same one. 5943 * 5944 * But vq->dma_dev allows every vq has the respective dma dev. So I 5945 * check the dma dev of vq and sq is the same dev. 5946 */ 5947 if (virtqueue_dma_dev(rq->vq) != virtqueue_dma_dev(sq->vq)) 5948 return -EINVAL; 5949 5950 dma_dev = virtqueue_dma_dev(rq->vq); 5951 if (!dma_dev) 5952 return -EINVAL; 5953 5954 size = virtqueue_get_vring_size(rq->vq); 5955 5956 rq->xsk_buffs = kvcalloc(size, sizeof(*rq->xsk_buffs), GFP_KERNEL); 5957 if (!rq->xsk_buffs) 5958 return -ENOMEM; 5959 5960 hdr_dma = virtqueue_map_single_attrs(sq->vq, &xsk_hdr, vi->hdr_len, 5961 DMA_TO_DEVICE, 0); 5962 if (virtqueue_map_mapping_error(sq->vq, hdr_dma)) { 5963 err = -ENOMEM; 5964 goto err_free_buffs; 5965 } 5966 5967 err = xsk_pool_dma_map(pool, dma_dev, 0); 5968 if (err) 5969 goto err_xsk_map; 5970 5971 err = virtnet_rq_bind_xsk_pool(vi, rq, pool); 5972 if (err) 5973 goto err_rq; 5974 5975 err = virtnet_sq_bind_xsk_pool(vi, sq, pool); 5976 if (err) 5977 goto err_sq; 5978 5979 /* Now, we do not support tx offload(such as tx csum), so all the tx 5980 * virtnet hdr is zero. So all the tx packets can share a single hdr. 5981 */ 5982 sq->xsk_hdr_dma_addr = hdr_dma; 5983 5984 return 0; 5985 5986 err_sq: 5987 virtnet_rq_bind_xsk_pool(vi, rq, NULL); 5988 err_rq: 5989 xsk_pool_dma_unmap(pool, 0); 5990 err_xsk_map: 5991 virtqueue_unmap_single_attrs(rq->vq, hdr_dma, vi->hdr_len, 5992 DMA_TO_DEVICE, 0); 5993 err_free_buffs: 5994 kvfree(rq->xsk_buffs); 5995 return err; 5996 } 5997 5998 static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid) 5999 { 6000 struct virtnet_info *vi = netdev_priv(dev); 6001 struct xsk_buff_pool *pool; 6002 struct receive_queue *rq; 6003 struct send_queue *sq; 6004 int err; 6005 6006 if (qid >= vi->curr_queue_pairs) 6007 return -EINVAL; 6008 6009 sq = &vi->sq[qid]; 6010 rq = &vi->rq[qid]; 6011 6012 pool = rq->xsk_pool; 6013 6014 err = virtnet_rq_bind_xsk_pool(vi, rq, NULL); 6015 err |= virtnet_sq_bind_xsk_pool(vi, sq, NULL); 6016 6017 xsk_pool_dma_unmap(pool, 0); 6018 6019 virtqueue_unmap_single_attrs(sq->vq, sq->xsk_hdr_dma_addr, 6020 vi->hdr_len, DMA_TO_DEVICE, 0); 6021 kvfree(rq->xsk_buffs); 6022 6023 return err; 6024 } 6025 6026 static int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp) 6027 { 6028 if (xdp->xsk.pool) 6029 return virtnet_xsk_pool_enable(dev, xdp->xsk.pool, 6030 xdp->xsk.queue_id); 6031 else 6032 return virtnet_xsk_pool_disable(dev, xdp->xsk.queue_id); 6033 } 6034 6035 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, 6036 struct netlink_ext_ack *extack) 6037 { 6038 unsigned int room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 6039 sizeof(struct skb_shared_info)); 6040 unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; 6041 struct virtnet_info *vi = netdev_priv(dev); 6042 struct bpf_prog *old_prog; 6043 u16 xdp_qp = 0, curr_qp; 6044 int i, err; 6045 6046 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) 6047 && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 6048 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 6049 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 6050 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 6051 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) || 6052 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) || 6053 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) { 6054 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); 6055 return -EOPNOTSUPP; 6056 } 6057 6058 if (vi->mergeable_rx_bufs && !vi->any_header_sg) { 6059 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required"); 6060 return -EINVAL; 6061 } 6062 6063 if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { 6064 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags"); 6065 netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz); 6066 return -EINVAL; 6067 } 6068 6069 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; 6070 if (prog) 6071 xdp_qp = nr_cpu_ids; 6072 6073 /* XDP requires extra queues for XDP_TX */ 6074 if (curr_qp + xdp_qp > vi->max_queue_pairs) { 6075 netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", 6076 curr_qp + xdp_qp, vi->max_queue_pairs); 6077 xdp_qp = 0; 6078 } 6079 6080 old_prog = rtnl_dereference(vi->rq[0].xdp_prog); 6081 if (!prog && !old_prog) 6082 return 0; 6083 6084 if (prog) 6085 bpf_prog_add(prog, vi->max_queue_pairs - 1); 6086 6087 virtnet_rx_pause_all(vi); 6088 6089 /* Make sure NAPI is not using any XDP TX queues for RX. */ 6090 if (netif_running(dev)) { 6091 for (i = 0; i < vi->max_queue_pairs; i++) 6092 virtnet_napi_tx_disable(&vi->sq[i]); 6093 } 6094 6095 if (!prog) { 6096 for (i = 0; i < vi->max_queue_pairs; i++) { 6097 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 6098 if (i == 0) 6099 virtnet_restore_guest_offloads(vi); 6100 } 6101 synchronize_net(); 6102 } 6103 6104 err = virtnet_set_queues(vi, curr_qp + xdp_qp); 6105 if (err) 6106 goto err; 6107 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); 6108 vi->xdp_queue_pairs = xdp_qp; 6109 6110 if (prog) { 6111 vi->xdp_enabled = true; 6112 for (i = 0; i < vi->max_queue_pairs; i++) { 6113 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 6114 if (i == 0 && !old_prog) 6115 virtnet_clear_guest_offloads(vi); 6116 } 6117 if (!old_prog) 6118 xdp_features_set_redirect_target(dev, true); 6119 } else { 6120 xdp_features_clear_redirect_target(dev); 6121 vi->xdp_enabled = false; 6122 } 6123 6124 virtnet_rx_resume_all(vi); 6125 for (i = 0; i < vi->max_queue_pairs; i++) { 6126 if (old_prog) 6127 bpf_prog_put(old_prog); 6128 if (netif_running(dev)) 6129 virtnet_napi_tx_enable(&vi->sq[i]); 6130 } 6131 6132 return 0; 6133 6134 err: 6135 if (!prog) { 6136 virtnet_clear_guest_offloads(vi); 6137 for (i = 0; i < vi->max_queue_pairs; i++) 6138 rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); 6139 } 6140 6141 virtnet_rx_resume_all(vi); 6142 if (netif_running(dev)) { 6143 for (i = 0; i < vi->max_queue_pairs; i++) 6144 virtnet_napi_tx_enable(&vi->sq[i]); 6145 } 6146 if (prog) 6147 bpf_prog_sub(prog, vi->max_queue_pairs - 1); 6148 return err; 6149 } 6150 6151 static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) 6152 { 6153 switch (xdp->command) { 6154 case XDP_SETUP_PROG: 6155 return virtnet_xdp_set(dev, xdp->prog, xdp->extack); 6156 case XDP_SETUP_XSK_POOL: 6157 return virtnet_xsk_pool_setup(dev, xdp); 6158 default: 6159 return -EINVAL; 6160 } 6161 } 6162 6163 static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, 6164 size_t len) 6165 { 6166 struct virtnet_info *vi = netdev_priv(dev); 6167 int ret; 6168 6169 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 6170 return -EOPNOTSUPP; 6171 6172 ret = snprintf(buf, len, "sby"); 6173 if (ret >= len) 6174 return -EOPNOTSUPP; 6175 6176 return 0; 6177 } 6178 6179 static int virtnet_set_features(struct net_device *dev, 6180 netdev_features_t features) 6181 { 6182 struct virtnet_info *vi = netdev_priv(dev); 6183 u64 offloads; 6184 int err; 6185 6186 if ((dev->features ^ features) & NETIF_F_GRO_HW) { 6187 if (vi->xdp_enabled) 6188 return -EBUSY; 6189 6190 if (features & NETIF_F_GRO_HW) 6191 offloads = vi->guest_offloads_capable; 6192 else 6193 offloads = vi->guest_offloads_capable & 6194 ~GUEST_OFFLOAD_GRO_HW_MASK; 6195 6196 err = virtnet_set_guest_offloads(vi, offloads); 6197 if (err) 6198 return err; 6199 vi->guest_offloads = offloads; 6200 } 6201 6202 if ((dev->features ^ features) & NETIF_F_RXHASH) { 6203 if (features & NETIF_F_RXHASH) 6204 vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_saved); 6205 else 6206 vi->rss_hdr->hash_types = cpu_to_le32(VIRTIO_NET_HASH_REPORT_NONE); 6207 6208 if (!virtnet_commit_rss_command(vi)) 6209 return -EINVAL; 6210 } 6211 6212 return 0; 6213 } 6214 6215 static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) 6216 { 6217 struct virtnet_info *priv = netdev_priv(dev); 6218 struct send_queue *sq = &priv->sq[txqueue]; 6219 struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue); 6220 6221 u64_stats_update_begin(&sq->stats.syncp); 6222 u64_stats_inc(&sq->stats.tx_timeouts); 6223 u64_stats_update_end(&sq->stats.syncp); 6224 6225 netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n", 6226 txqueue, sq->name, sq->vq->index, sq->vq->name, 6227 jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start))); 6228 } 6229 6230 static int virtnet_init_irq_moder(struct virtnet_info *vi) 6231 { 6232 u8 profile_flags = 0, coal_flags = 0; 6233 int ret, i; 6234 6235 profile_flags |= DIM_PROFILE_RX; 6236 coal_flags |= DIM_COALESCE_USEC | DIM_COALESCE_PKTS; 6237 ret = net_dim_init_irq_moder(vi->dev, profile_flags, coal_flags, 6238 DIM_CQ_PERIOD_MODE_START_FROM_EQE, 6239 0, virtnet_rx_dim_work, NULL); 6240 6241 if (ret) 6242 return ret; 6243 6244 for (i = 0; i < vi->max_queue_pairs; i++) 6245 net_dim_setting(vi->dev, &vi->rq[i].dim, false); 6246 6247 return 0; 6248 } 6249 6250 static void virtnet_free_irq_moder(struct virtnet_info *vi) 6251 { 6252 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 6253 return; 6254 6255 rtnl_lock(); 6256 net_dim_free_irq_moder(vi->dev); 6257 rtnl_unlock(); 6258 } 6259 6260 static const struct net_device_ops virtnet_netdev = { 6261 .ndo_open = virtnet_open, 6262 .ndo_stop = virtnet_close, 6263 .ndo_start_xmit = start_xmit, 6264 .ndo_validate_addr = eth_validate_addr, 6265 .ndo_set_mac_address = virtnet_set_mac_address, 6266 .ndo_set_rx_mode = virtnet_set_rx_mode, 6267 .ndo_get_stats64 = virtnet_stats, 6268 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, 6269 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, 6270 .ndo_bpf = virtnet_xdp, 6271 .ndo_xdp_xmit = virtnet_xdp_xmit, 6272 .ndo_xsk_wakeup = virtnet_xsk_wakeup, 6273 .ndo_features_check = passthru_features_check, 6274 .ndo_get_phys_port_name = virtnet_get_phys_port_name, 6275 .ndo_set_features = virtnet_set_features, 6276 .ndo_tx_timeout = virtnet_tx_timeout, 6277 }; 6278 6279 static void virtnet_config_changed_work(struct work_struct *work) 6280 { 6281 struct virtnet_info *vi = 6282 container_of(work, struct virtnet_info, config_work); 6283 u16 v; 6284 6285 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, 6286 struct virtio_net_config, status, &v) < 0) 6287 return; 6288 6289 if (v & VIRTIO_NET_S_ANNOUNCE) { 6290 netdev_notify_peers(vi->dev); 6291 virtnet_ack_link_announce(vi); 6292 } 6293 6294 /* Ignore unknown (future) status bits */ 6295 v &= VIRTIO_NET_S_LINK_UP; 6296 6297 if (vi->status == v) 6298 return; 6299 6300 vi->status = v; 6301 6302 if (vi->status & VIRTIO_NET_S_LINK_UP) { 6303 virtnet_update_settings(vi); 6304 netif_carrier_on(vi->dev); 6305 netif_tx_wake_all_queues(vi->dev); 6306 } else { 6307 netif_carrier_off(vi->dev); 6308 netif_tx_stop_all_queues(vi->dev); 6309 } 6310 } 6311 6312 static void virtnet_config_changed(struct virtio_device *vdev) 6313 { 6314 struct virtnet_info *vi = vdev->priv; 6315 6316 schedule_work(&vi->config_work); 6317 } 6318 6319 static void virtnet_free_queues(struct virtnet_info *vi) 6320 { 6321 int i; 6322 6323 for (i = 0; i < vi->max_queue_pairs; i++) { 6324 __netif_napi_del(&vi->rq[i].napi); 6325 __netif_napi_del(&vi->sq[i].napi); 6326 } 6327 6328 /* We called __netif_napi_del(), 6329 * we need to respect an RCU grace period before freeing vi->rq 6330 */ 6331 synchronize_net(); 6332 6333 kfree(vi->rq); 6334 kfree(vi->sq); 6335 kfree(vi->ctrl); 6336 } 6337 6338 static void _free_receive_bufs(struct virtnet_info *vi) 6339 { 6340 struct bpf_prog *old_prog; 6341 int i; 6342 6343 for (i = 0; i < vi->max_queue_pairs; i++) { 6344 while (vi->rq[i].pages) 6345 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); 6346 6347 old_prog = rtnl_dereference(vi->rq[i].xdp_prog); 6348 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); 6349 if (old_prog) 6350 bpf_prog_put(old_prog); 6351 } 6352 } 6353 6354 static void free_receive_bufs(struct virtnet_info *vi) 6355 { 6356 rtnl_lock(); 6357 _free_receive_bufs(vi); 6358 rtnl_unlock(); 6359 } 6360 6361 static void free_receive_page_frags(struct virtnet_info *vi) 6362 { 6363 int i; 6364 for (i = 0; i < vi->max_queue_pairs; i++) 6365 if (vi->rq[i].alloc_frag.page) { 6366 if (vi->rq[i].last_dma) 6367 virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0); 6368 put_page(vi->rq[i].alloc_frag.page); 6369 } 6370 } 6371 6372 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) 6373 { 6374 struct virtnet_info *vi = vq->vdev->priv; 6375 struct send_queue *sq; 6376 int i = vq2txq(vq); 6377 6378 sq = &vi->sq[i]; 6379 6380 switch (virtnet_xmit_ptr_unpack(&buf)) { 6381 case VIRTNET_XMIT_TYPE_SKB: 6382 case VIRTNET_XMIT_TYPE_SKB_ORPHAN: 6383 dev_kfree_skb(buf); 6384 break; 6385 6386 case VIRTNET_XMIT_TYPE_XDP: 6387 xdp_return_frame(buf); 6388 break; 6389 6390 case VIRTNET_XMIT_TYPE_XSK: 6391 xsk_tx_completed(sq->xsk_pool, 1); 6392 break; 6393 } 6394 } 6395 6396 static void virtnet_sq_free_unused_buf_done(struct virtqueue *vq) 6397 { 6398 struct virtnet_info *vi = vq->vdev->priv; 6399 int i = vq2txq(vq); 6400 6401 netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, i)); 6402 } 6403 6404 static void free_unused_bufs(struct virtnet_info *vi) 6405 { 6406 void *buf; 6407 int i; 6408 6409 for (i = 0; i < vi->max_queue_pairs; i++) { 6410 struct virtqueue *vq = vi->sq[i].vq; 6411 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 6412 virtnet_sq_free_unused_buf(vq, buf); 6413 cond_resched(); 6414 } 6415 6416 for (i = 0; i < vi->max_queue_pairs; i++) { 6417 struct virtqueue *vq = vi->rq[i].vq; 6418 6419 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 6420 virtnet_rq_unmap_free_buf(vq, buf); 6421 cond_resched(); 6422 } 6423 } 6424 6425 static void virtnet_del_vqs(struct virtnet_info *vi) 6426 { 6427 struct virtio_device *vdev = vi->vdev; 6428 6429 virtnet_clean_affinity(vi); 6430 6431 vdev->config->del_vqs(vdev); 6432 6433 virtnet_free_queues(vi); 6434 } 6435 6436 /* How large should a single buffer be so a queue full of these can fit at 6437 * least one full packet? 6438 * Logic below assumes the mergeable buffer header is used. 6439 */ 6440 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) 6441 { 6442 const unsigned int hdr_len = vi->hdr_len; 6443 unsigned int rq_size = virtqueue_get_vring_size(vq); 6444 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; 6445 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; 6446 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size); 6447 6448 return max(max(min_buf_len, hdr_len) - hdr_len, 6449 (unsigned int)GOOD_PACKET_LEN); 6450 } 6451 6452 static int virtnet_find_vqs(struct virtnet_info *vi) 6453 { 6454 struct virtqueue_info *vqs_info; 6455 struct virtqueue **vqs; 6456 int ret = -ENOMEM; 6457 int total_vqs; 6458 bool *ctx; 6459 u16 i; 6460 6461 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by 6462 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by 6463 * possible control vq. 6464 */ 6465 total_vqs = vi->max_queue_pairs * 2 + 6466 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); 6467 6468 /* Allocate space for find_vqs parameters */ 6469 vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL); 6470 if (!vqs) 6471 goto err_vq; 6472 vqs_info = kcalloc(total_vqs, sizeof(*vqs_info), GFP_KERNEL); 6473 if (!vqs_info) 6474 goto err_vqs_info; 6475 if (!vi->big_packets || vi->mergeable_rx_bufs) { 6476 ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL); 6477 if (!ctx) 6478 goto err_ctx; 6479 } else { 6480 ctx = NULL; 6481 } 6482 6483 /* Parameters for control virtqueue, if any */ 6484 if (vi->has_cvq) { 6485 vqs_info[total_vqs - 1].name = "control"; 6486 } 6487 6488 /* Allocate/initialize parameters for send/receive virtqueues */ 6489 for (i = 0; i < vi->max_queue_pairs; i++) { 6490 vqs_info[rxq2vq(i)].callback = skb_recv_done; 6491 vqs_info[txq2vq(i)].callback = skb_xmit_done; 6492 sprintf(vi->rq[i].name, "input.%u", i); 6493 sprintf(vi->sq[i].name, "output.%u", i); 6494 vqs_info[rxq2vq(i)].name = vi->rq[i].name; 6495 vqs_info[txq2vq(i)].name = vi->sq[i].name; 6496 if (ctx) 6497 vqs_info[rxq2vq(i)].ctx = true; 6498 } 6499 6500 ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, vqs_info, NULL); 6501 if (ret) 6502 goto err_find; 6503 6504 if (vi->has_cvq) { 6505 vi->cvq = vqs[total_vqs - 1]; 6506 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) 6507 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 6508 } 6509 6510 for (i = 0; i < vi->max_queue_pairs; i++) { 6511 vi->rq[i].vq = vqs[rxq2vq(i)]; 6512 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq); 6513 vi->sq[i].vq = vqs[txq2vq(i)]; 6514 } 6515 6516 /* run here: ret == 0. */ 6517 6518 6519 err_find: 6520 kfree(ctx); 6521 err_ctx: 6522 kfree(vqs_info); 6523 err_vqs_info: 6524 kfree(vqs); 6525 err_vq: 6526 return ret; 6527 } 6528 6529 static int virtnet_alloc_queues(struct virtnet_info *vi) 6530 { 6531 int i; 6532 6533 if (vi->has_cvq) { 6534 vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); 6535 if (!vi->ctrl) 6536 goto err_ctrl; 6537 } else { 6538 vi->ctrl = NULL; 6539 } 6540 vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL); 6541 if (!vi->sq) 6542 goto err_sq; 6543 vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL); 6544 if (!vi->rq) 6545 goto err_rq; 6546 6547 INIT_DELAYED_WORK(&vi->refill, refill_work); 6548 for (i = 0; i < vi->max_queue_pairs; i++) { 6549 vi->rq[i].pages = NULL; 6550 netif_napi_add_config(vi->dev, &vi->rq[i].napi, virtnet_poll, 6551 i); 6552 vi->rq[i].napi.weight = napi_weight; 6553 netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi, 6554 virtnet_poll_tx, 6555 napi_tx ? napi_weight : 0); 6556 6557 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); 6558 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); 6559 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); 6560 6561 u64_stats_init(&vi->rq[i].stats.syncp); 6562 u64_stats_init(&vi->sq[i].stats.syncp); 6563 mutex_init(&vi->rq[i].dim_lock); 6564 } 6565 6566 return 0; 6567 6568 err_rq: 6569 kfree(vi->sq); 6570 err_sq: 6571 kfree(vi->ctrl); 6572 err_ctrl: 6573 return -ENOMEM; 6574 } 6575 6576 static int init_vqs(struct virtnet_info *vi) 6577 { 6578 int ret; 6579 6580 /* Allocate send & receive queues */ 6581 ret = virtnet_alloc_queues(vi); 6582 if (ret) 6583 goto err; 6584 6585 ret = virtnet_find_vqs(vi); 6586 if (ret) 6587 goto err_free; 6588 6589 cpus_read_lock(); 6590 virtnet_set_affinity(vi); 6591 cpus_read_unlock(); 6592 6593 return 0; 6594 6595 err_free: 6596 virtnet_free_queues(vi); 6597 err: 6598 return ret; 6599 } 6600 6601 #ifdef CONFIG_SYSFS 6602 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, 6603 char *buf) 6604 { 6605 struct virtnet_info *vi = netdev_priv(queue->dev); 6606 unsigned int queue_index = get_netdev_rx_queue_index(queue); 6607 unsigned int headroom = virtnet_get_headroom(vi); 6608 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 6609 struct ewma_pkt_len *avg; 6610 6611 BUG_ON(queue_index >= vi->max_queue_pairs); 6612 avg = &vi->rq[queue_index].mrg_avg_pkt_len; 6613 return sprintf(buf, "%u\n", 6614 get_mergeable_buf_len(&vi->rq[queue_index], avg, 6615 SKB_DATA_ALIGN(headroom + tailroom))); 6616 } 6617 6618 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = 6619 __ATTR_RO(mergeable_rx_buffer_size); 6620 6621 static struct attribute *virtio_net_mrg_rx_attrs[] = { 6622 &mergeable_rx_buffer_size_attribute.attr, 6623 NULL 6624 }; 6625 6626 static const struct attribute_group virtio_net_mrg_rx_group = { 6627 .name = "virtio_net", 6628 .attrs = virtio_net_mrg_rx_attrs 6629 }; 6630 #endif 6631 6632 static bool virtnet_fail_on_feature(struct virtio_device *vdev, 6633 unsigned int fbit, 6634 const char *fname, const char *dname) 6635 { 6636 if (!virtio_has_feature(vdev, fbit)) 6637 return false; 6638 6639 dev_err(&vdev->dev, "device advertises feature %s but not %s", 6640 fname, dname); 6641 6642 return true; 6643 } 6644 6645 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ 6646 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) 6647 6648 static bool virtnet_validate_features(struct virtio_device *vdev) 6649 { 6650 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && 6651 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, 6652 "VIRTIO_NET_F_CTRL_VQ") || 6653 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, 6654 "VIRTIO_NET_F_CTRL_VQ") || 6655 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, 6656 "VIRTIO_NET_F_CTRL_VQ") || 6657 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || 6658 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, 6659 "VIRTIO_NET_F_CTRL_VQ") || 6660 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, 6661 "VIRTIO_NET_F_CTRL_VQ") || 6662 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, 6663 "VIRTIO_NET_F_CTRL_VQ") || 6664 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, 6665 "VIRTIO_NET_F_CTRL_VQ") || 6666 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL, 6667 "VIRTIO_NET_F_CTRL_VQ"))) { 6668 return false; 6669 } 6670 6671 return true; 6672 } 6673 6674 #define MIN_MTU ETH_MIN_MTU 6675 #define MAX_MTU ETH_MAX_MTU 6676 6677 static int virtnet_validate(struct virtio_device *vdev) 6678 { 6679 if (!vdev->config->get) { 6680 dev_err(&vdev->dev, "%s failure: config access disabled\n", 6681 __func__); 6682 return -EINVAL; 6683 } 6684 6685 if (!virtnet_validate_features(vdev)) 6686 return -EINVAL; 6687 6688 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6689 int mtu = virtio_cread16(vdev, 6690 offsetof(struct virtio_net_config, 6691 mtu)); 6692 if (mtu < MIN_MTU) 6693 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); 6694 } 6695 6696 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) && 6697 !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6698 dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby"); 6699 __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY); 6700 } 6701 6702 return 0; 6703 } 6704 6705 static bool virtnet_check_guest_gso(const struct virtnet_info *vi) 6706 { 6707 return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 6708 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 6709 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 6710 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 6711 (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) && 6712 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6)); 6713 } 6714 6715 static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu) 6716 { 6717 bool guest_gso = virtnet_check_guest_gso(vi); 6718 6719 /* If device can receive ANY guest GSO packets, regardless of mtu, 6720 * allocate packets of maximum size, otherwise limit it to only 6721 * mtu size worth only. 6722 */ 6723 if (mtu > ETH_DATA_LEN || guest_gso) { 6724 vi->big_packets = true; 6725 vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE); 6726 } 6727 } 6728 6729 #define VIRTIO_NET_HASH_REPORT_MAX_TABLE 10 6730 static enum xdp_rss_hash_type 6731 virtnet_xdp_rss_type[VIRTIO_NET_HASH_REPORT_MAX_TABLE] = { 6732 [VIRTIO_NET_HASH_REPORT_NONE] = XDP_RSS_TYPE_NONE, 6733 [VIRTIO_NET_HASH_REPORT_IPv4] = XDP_RSS_TYPE_L3_IPV4, 6734 [VIRTIO_NET_HASH_REPORT_TCPv4] = XDP_RSS_TYPE_L4_IPV4_TCP, 6735 [VIRTIO_NET_HASH_REPORT_UDPv4] = XDP_RSS_TYPE_L4_IPV4_UDP, 6736 [VIRTIO_NET_HASH_REPORT_IPv6] = XDP_RSS_TYPE_L3_IPV6, 6737 [VIRTIO_NET_HASH_REPORT_TCPv6] = XDP_RSS_TYPE_L4_IPV6_TCP, 6738 [VIRTIO_NET_HASH_REPORT_UDPv6] = XDP_RSS_TYPE_L4_IPV6_UDP, 6739 [VIRTIO_NET_HASH_REPORT_IPv6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, 6740 [VIRTIO_NET_HASH_REPORT_TCPv6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, 6741 [VIRTIO_NET_HASH_REPORT_UDPv6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX 6742 }; 6743 6744 static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, 6745 enum xdp_rss_hash_type *rss_type) 6746 { 6747 const struct xdp_buff *xdp = (void *)_ctx; 6748 struct virtio_net_hdr_v1_hash *hdr_hash; 6749 struct virtnet_info *vi; 6750 u16 hash_report; 6751 6752 if (!(xdp->rxq->dev->features & NETIF_F_RXHASH)) 6753 return -ENODATA; 6754 6755 vi = netdev_priv(xdp->rxq->dev); 6756 hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len); 6757 hash_report = __le16_to_cpu(hdr_hash->hash_report); 6758 6759 if (hash_report >= VIRTIO_NET_HASH_REPORT_MAX_TABLE) 6760 hash_report = VIRTIO_NET_HASH_REPORT_NONE; 6761 6762 *rss_type = virtnet_xdp_rss_type[hash_report]; 6763 *hash = virtio_net_hash_value(hdr_hash); 6764 return 0; 6765 } 6766 6767 static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = { 6768 .xmo_rx_hash = virtnet_xdp_rx_hash, 6769 }; 6770 6771 static int virtnet_probe(struct virtio_device *vdev) 6772 { 6773 int i, err = -ENOMEM; 6774 struct net_device *dev; 6775 struct virtnet_info *vi; 6776 u16 max_queue_pairs; 6777 int mtu = 0; 6778 6779 /* Find if host supports multiqueue/rss virtio_net device */ 6780 max_queue_pairs = 1; 6781 if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) 6782 max_queue_pairs = 6783 virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); 6784 6785 /* We need at least 2 queue's */ 6786 if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 6787 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 6788 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6789 max_queue_pairs = 1; 6790 6791 /* Allocate ourselves a network device with room for our info */ 6792 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); 6793 if (!dev) 6794 return -ENOMEM; 6795 6796 /* Set up network device as normal. */ 6797 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | 6798 IFF_TX_SKB_NO_LINEAR; 6799 dev->netdev_ops = &virtnet_netdev; 6800 dev->stat_ops = &virtnet_stat_ops; 6801 dev->features = NETIF_F_HIGHDMA; 6802 6803 dev->ethtool_ops = &virtnet_ethtool_ops; 6804 SET_NETDEV_DEV(dev, &vdev->dev); 6805 6806 /* Do we support "hardware" checksums? */ 6807 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { 6808 /* This opens up the world of extra features. */ 6809 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6810 if (csum) 6811 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6812 6813 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { 6814 dev->hw_features |= NETIF_F_TSO 6815 | NETIF_F_TSO_ECN | NETIF_F_TSO6; 6816 } 6817 /* Individual feature bits: what can host handle? */ 6818 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4)) 6819 dev->hw_features |= NETIF_F_TSO; 6820 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6)) 6821 dev->hw_features |= NETIF_F_TSO6; 6822 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) 6823 dev->hw_features |= NETIF_F_TSO_ECN; 6824 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO)) 6825 dev->hw_features |= NETIF_F_GSO_UDP_L4; 6826 6827 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO)) { 6828 dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; 6829 dev->hw_enc_features = dev->hw_features; 6830 } 6831 if (dev->hw_features & NETIF_F_GSO_UDP_TUNNEL && 6832 virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM)) { 6833 dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; 6834 dev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; 6835 } 6836 6837 dev->features |= NETIF_F_GSO_ROBUST; 6838 6839 if (gso) 6840 dev->features |= dev->hw_features; 6841 /* (!csum && gso) case will be fixed by register_netdev() */ 6842 } 6843 6844 /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't 6845 * need to calculate checksums for partially checksummed packets, 6846 * as they're considered valid by the upper layer. 6847 * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only 6848 * receives fully checksummed packets. The device may assist in 6849 * validating these packets' checksums, so the driver won't have to. 6850 */ 6851 dev->features |= NETIF_F_RXCSUM; 6852 6853 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || 6854 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) 6855 dev->features |= NETIF_F_GRO_HW; 6856 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) 6857 dev->hw_features |= NETIF_F_GRO_HW; 6858 6859 dev->vlan_features = dev->features; 6860 dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 6861 NETDEV_XDP_ACT_XSK_ZEROCOPY; 6862 6863 /* MTU range: 68 - 65535 */ 6864 dev->min_mtu = MIN_MTU; 6865 dev->max_mtu = MAX_MTU; 6866 6867 /* Configuration may specify what MAC to use. Otherwise random. */ 6868 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6869 u8 addr[ETH_ALEN]; 6870 6871 virtio_cread_bytes(vdev, 6872 offsetof(struct virtio_net_config, mac), 6873 addr, ETH_ALEN); 6874 eth_hw_addr_set(dev, addr); 6875 } else { 6876 eth_hw_addr_random(dev); 6877 dev_info(&vdev->dev, "Assigned random MAC address %pM\n", 6878 dev->dev_addr); 6879 } 6880 6881 /* Set up our device-specific information */ 6882 vi = netdev_priv(dev); 6883 vi->dev = dev; 6884 vi->vdev = vdev; 6885 vdev->priv = vi; 6886 6887 INIT_WORK(&vi->config_work, virtnet_config_changed_work); 6888 INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work); 6889 spin_lock_init(&vi->refill_lock); 6890 6891 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { 6892 vi->mergeable_rx_bufs = true; 6893 dev->xdp_features |= NETDEV_XDP_ACT_RX_SG; 6894 } 6895 6896 if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) 6897 vi->has_rss_hash_report = true; 6898 6899 if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) { 6900 vi->has_rss = true; 6901 6902 vi->rss_indir_table_size = 6903 virtio_cread16(vdev, offsetof(struct virtio_net_config, 6904 rss_max_indirection_table_length)); 6905 } 6906 vi->rss_hdr = devm_kzalloc(&vdev->dev, virtnet_rss_hdr_size(vi), GFP_KERNEL); 6907 if (!vi->rss_hdr) { 6908 err = -ENOMEM; 6909 goto free; 6910 } 6911 6912 if (vi->has_rss || vi->has_rss_hash_report) { 6913 vi->rss_key_size = 6914 virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); 6915 if (vi->rss_key_size > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 6916 dev_err(&vdev->dev, "rss_max_key_size=%u exceeds the limit %u.\n", 6917 vi->rss_key_size, VIRTIO_NET_RSS_MAX_KEY_SIZE); 6918 err = -EINVAL; 6919 goto free; 6920 } 6921 6922 vi->rss_hash_types_supported = 6923 virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); 6924 vi->rss_hash_types_supported &= 6925 ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX | 6926 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 6927 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX); 6928 6929 dev->hw_features |= NETIF_F_RXHASH; 6930 dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops; 6931 } 6932 6933 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO) || 6934 virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO)) 6935 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash_tunnel); 6936 else if (vi->has_rss_hash_report) 6937 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); 6938 else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || 6939 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6940 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 6941 else 6942 vi->hdr_len = sizeof(struct virtio_net_hdr); 6943 6944 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM)) 6945 vi->rx_tnl_csum = true; 6946 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO)) 6947 vi->rx_tnl = true; 6948 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO)) 6949 vi->tx_tnl = true; 6950 6951 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || 6952 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6953 vi->any_header_sg = true; 6954 6955 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6956 vi->has_cvq = true; 6957 6958 mutex_init(&vi->cvq_lock); 6959 6960 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6961 mtu = virtio_cread16(vdev, 6962 offsetof(struct virtio_net_config, 6963 mtu)); 6964 if (mtu < dev->min_mtu) { 6965 /* Should never trigger: MTU was previously validated 6966 * in virtnet_validate. 6967 */ 6968 dev_err(&vdev->dev, 6969 "device MTU appears to have changed it is now %d < %d", 6970 mtu, dev->min_mtu); 6971 err = -EINVAL; 6972 goto free; 6973 } 6974 6975 dev->mtu = mtu; 6976 dev->max_mtu = mtu; 6977 } 6978 6979 virtnet_set_big_packets(vi, mtu); 6980 6981 if (vi->any_header_sg) 6982 dev->needed_headroom = vi->hdr_len; 6983 6984 /* Enable multiqueue by default */ 6985 if (num_online_cpus() >= max_queue_pairs) 6986 vi->curr_queue_pairs = max_queue_pairs; 6987 else 6988 vi->curr_queue_pairs = num_online_cpus(); 6989 vi->max_queue_pairs = max_queue_pairs; 6990 6991 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ 6992 err = init_vqs(vi); 6993 if (err) 6994 goto free; 6995 6996 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 6997 vi->intr_coal_rx.max_usecs = 0; 6998 vi->intr_coal_tx.max_usecs = 0; 6999 vi->intr_coal_rx.max_packets = 0; 7000 7001 /* Keep the default values of the coalescing parameters 7002 * aligned with the default napi_tx state. 7003 */ 7004 if (vi->sq[0].napi.weight) 7005 vi->intr_coal_tx.max_packets = 1; 7006 else 7007 vi->intr_coal_tx.max_packets = 0; 7008 } 7009 7010 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 7011 /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */ 7012 for (i = 0; i < vi->max_queue_pairs; i++) 7013 if (vi->sq[i].napi.weight) 7014 vi->sq[i].intr_coal.max_packets = 1; 7015 7016 err = virtnet_init_irq_moder(vi); 7017 if (err) 7018 goto free; 7019 } 7020 7021 #ifdef CONFIG_SYSFS 7022 if (vi->mergeable_rx_bufs) 7023 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; 7024 #endif 7025 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); 7026 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); 7027 7028 virtnet_init_settings(dev); 7029 7030 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 7031 vi->failover = net_failover_create(vi->dev); 7032 if (IS_ERR(vi->failover)) { 7033 err = PTR_ERR(vi->failover); 7034 goto free_vqs; 7035 } 7036 } 7037 7038 if (vi->has_rss || vi->has_rss_hash_report) 7039 virtnet_init_default_rss(vi); 7040 7041 enable_rx_mode_work(vi); 7042 7043 /* serialize netdev register + virtio_device_ready() with ndo_open() */ 7044 rtnl_lock(); 7045 7046 err = register_netdevice(dev); 7047 if (err) { 7048 pr_debug("virtio_net: registering device failed\n"); 7049 rtnl_unlock(); 7050 goto free_failover; 7051 } 7052 7053 /* Disable config change notification until ndo_open. */ 7054 virtio_config_driver_disable(vi->vdev); 7055 7056 virtio_device_ready(vdev); 7057 7058 if (vi->has_rss || vi->has_rss_hash_report) { 7059 if (!virtnet_commit_rss_command(vi)) { 7060 dev_warn(&vdev->dev, "RSS disabled because committing failed.\n"); 7061 dev->hw_features &= ~NETIF_F_RXHASH; 7062 vi->has_rss_hash_report = false; 7063 vi->has_rss = false; 7064 } 7065 } 7066 7067 virtnet_set_queues(vi, vi->curr_queue_pairs); 7068 7069 /* a random MAC address has been assigned, notify the device. 7070 * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there 7071 * because many devices work fine without getting MAC explicitly 7072 */ 7073 if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 7074 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 7075 struct scatterlist sg; 7076 7077 sg_init_one(&sg, dev->dev_addr, dev->addr_len); 7078 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 7079 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 7080 pr_debug("virtio_net: setting MAC address failed\n"); 7081 rtnl_unlock(); 7082 err = -EINVAL; 7083 goto free_unregister_netdev; 7084 } 7085 } 7086 7087 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) { 7088 struct virtio_net_stats_capabilities *stats_cap __free(kfree) = NULL; 7089 struct scatterlist sg; 7090 __le64 v; 7091 7092 stats_cap = kzalloc(sizeof(*stats_cap), GFP_KERNEL); 7093 if (!stats_cap) { 7094 rtnl_unlock(); 7095 err = -ENOMEM; 7096 goto free_unregister_netdev; 7097 } 7098 7099 sg_init_one(&sg, stats_cap, sizeof(*stats_cap)); 7100 7101 if (!virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 7102 VIRTIO_NET_CTRL_STATS_QUERY, 7103 NULL, &sg)) { 7104 pr_debug("virtio_net: fail to get stats capability\n"); 7105 rtnl_unlock(); 7106 err = -EINVAL; 7107 goto free_unregister_netdev; 7108 } 7109 7110 v = stats_cap->supported_stats_types[0]; 7111 vi->device_stats_cap = le64_to_cpu(v); 7112 } 7113 7114 /* Assume link up if device can't report link status, 7115 otherwise get link status from config. */ 7116 netif_carrier_off(dev); 7117 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 7118 virtio_config_changed(vi->vdev); 7119 } else { 7120 vi->status = VIRTIO_NET_S_LINK_UP; 7121 virtnet_update_settings(vi); 7122 netif_carrier_on(dev); 7123 } 7124 7125 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) { 7126 unsigned int fbit; 7127 7128 fbit = virtio_offload_to_feature(guest_offloads[i]); 7129 if (virtio_has_feature(vi->vdev, fbit)) 7130 set_bit(guest_offloads[i], &vi->guest_offloads); 7131 } 7132 vi->guest_offloads_capable = vi->guest_offloads; 7133 7134 rtnl_unlock(); 7135 7136 err = virtnet_cpu_notif_add(vi); 7137 if (err) { 7138 pr_debug("virtio_net: registering cpu notifier failed\n"); 7139 goto free_unregister_netdev; 7140 } 7141 7142 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", 7143 dev->name, max_queue_pairs); 7144 7145 return 0; 7146 7147 free_unregister_netdev: 7148 unregister_netdev(dev); 7149 free_failover: 7150 net_failover_destroy(vi->failover); 7151 free_vqs: 7152 virtio_reset_device(vdev); 7153 cancel_delayed_work_sync(&vi->refill); 7154 free_receive_page_frags(vi); 7155 virtnet_del_vqs(vi); 7156 free: 7157 free_netdev(dev); 7158 return err; 7159 } 7160 7161 static void remove_vq_common(struct virtnet_info *vi) 7162 { 7163 int i; 7164 7165 virtio_reset_device(vi->vdev); 7166 7167 /* Free unused buffers in both send and recv, if any. */ 7168 free_unused_bufs(vi); 7169 7170 /* 7171 * Rule of thumb is netdev_tx_reset_queue() should follow any 7172 * skb freeing not followed by netdev_tx_completed_queue() 7173 */ 7174 for (i = 0; i < vi->max_queue_pairs; i++) 7175 netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, i)); 7176 7177 free_receive_bufs(vi); 7178 7179 free_receive_page_frags(vi); 7180 7181 virtnet_del_vqs(vi); 7182 } 7183 7184 static void virtnet_remove(struct virtio_device *vdev) 7185 { 7186 struct virtnet_info *vi = vdev->priv; 7187 7188 virtnet_cpu_notif_remove(vi); 7189 7190 /* Make sure no work handler is accessing the device. */ 7191 flush_work(&vi->config_work); 7192 disable_rx_mode_work(vi); 7193 flush_work(&vi->rx_mode_work); 7194 7195 virtnet_free_irq_moder(vi); 7196 7197 unregister_netdev(vi->dev); 7198 7199 net_failover_destroy(vi->failover); 7200 7201 remove_vq_common(vi); 7202 7203 free_netdev(vi->dev); 7204 } 7205 7206 static __maybe_unused int virtnet_freeze(struct virtio_device *vdev) 7207 { 7208 struct virtnet_info *vi = vdev->priv; 7209 7210 virtnet_cpu_notif_remove(vi); 7211 virtnet_freeze_down(vdev); 7212 remove_vq_common(vi); 7213 7214 return 0; 7215 } 7216 7217 static __maybe_unused int virtnet_restore(struct virtio_device *vdev) 7218 { 7219 struct virtnet_info *vi = vdev->priv; 7220 int err; 7221 7222 err = virtnet_restore_up(vdev); 7223 if (err) 7224 return err; 7225 virtnet_set_queues(vi, vi->curr_queue_pairs); 7226 7227 err = virtnet_cpu_notif_add(vi); 7228 if (err) { 7229 virtnet_freeze_down(vdev); 7230 remove_vq_common(vi); 7231 return err; 7232 } 7233 7234 return 0; 7235 } 7236 7237 static struct virtio_device_id id_table[] = { 7238 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 7239 { 0 }, 7240 }; 7241 7242 #define VIRTNET_FEATURES \ 7243 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ 7244 VIRTIO_NET_F_MAC, \ 7245 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ 7246 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ 7247 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ 7248 VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \ 7249 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ 7250 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ 7251 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ 7252 VIRTIO_NET_F_CTRL_MAC_ADDR, \ 7253 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ 7254 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ 7255 VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ 7256 VIRTIO_NET_F_VQ_NOTF_COAL, \ 7257 VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS 7258 7259 static unsigned int features[] = { 7260 VIRTNET_FEATURES, 7261 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO, 7262 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM, 7263 VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO, 7264 VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM, 7265 }; 7266 7267 static unsigned int features_legacy[] = { 7268 VIRTNET_FEATURES, 7269 VIRTIO_NET_F_GSO, 7270 VIRTIO_F_ANY_LAYOUT, 7271 }; 7272 7273 static struct virtio_driver virtio_net_driver = { 7274 .feature_table = features, 7275 .feature_table_size = ARRAY_SIZE(features), 7276 .feature_table_legacy = features_legacy, 7277 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 7278 .driver.name = KBUILD_MODNAME, 7279 .id_table = id_table, 7280 .validate = virtnet_validate, 7281 .probe = virtnet_probe, 7282 .remove = virtnet_remove, 7283 .config_changed = virtnet_config_changed, 7284 #ifdef CONFIG_PM_SLEEP 7285 .freeze = virtnet_freeze, 7286 .restore = virtnet_restore, 7287 #endif 7288 }; 7289 7290 static __init int virtio_net_driver_init(void) 7291 { 7292 int ret; 7293 7294 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online", 7295 virtnet_cpu_online, 7296 virtnet_cpu_down_prep); 7297 if (ret < 0) 7298 goto out; 7299 virtionet_online = ret; 7300 ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead", 7301 NULL, virtnet_cpu_dead); 7302 if (ret) 7303 goto err_dead; 7304 ret = register_virtio_driver(&virtio_net_driver); 7305 if (ret) 7306 goto err_virtio; 7307 return 0; 7308 err_virtio: 7309 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 7310 err_dead: 7311 cpuhp_remove_multi_state(virtionet_online); 7312 out: 7313 return ret; 7314 } 7315 module_init(virtio_net_driver_init); 7316 7317 static __exit void virtio_net_driver_exit(void) 7318 { 7319 unregister_virtio_driver(&virtio_net_driver); 7320 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 7321 cpuhp_remove_multi_state(virtionet_online); 7322 } 7323 module_exit(virtio_net_driver_exit); 7324 7325 MODULE_DEVICE_TABLE(virtio, id_table); 7326 MODULE_DESCRIPTION("Virtio network driver"); 7327 MODULE_LICENSE("GPL"); 7328