1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* A network driver using virtio. 3 * 4 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 5 */ 6 //#define DEBUG 7 #include <linux/netdevice.h> 8 #include <linux/etherdevice.h> 9 #include <linux/ethtool.h> 10 #include <linux/module.h> 11 #include <linux/virtio.h> 12 #include <linux/virtio_net.h> 13 #include <linux/bpf.h> 14 #include <linux/bpf_trace.h> 15 #include <linux/scatterlist.h> 16 #include <linux/if_vlan.h> 17 #include <linux/slab.h> 18 #include <linux/cpu.h> 19 #include <linux/average.h> 20 #include <linux/filter.h> 21 #include <linux/kernel.h> 22 #include <linux/dim.h> 23 #include <net/route.h> 24 #include <net/xdp.h> 25 #include <net/net_failover.h> 26 #include <net/netdev_rx_queue.h> 27 #include <net/netdev_queues.h> 28 #include <net/xdp_sock_drv.h> 29 30 static int napi_weight = NAPI_POLL_WEIGHT; 31 module_param(napi_weight, int, 0444); 32 33 static bool csum = true, gso = true, napi_tx = true; 34 module_param(csum, bool, 0444); 35 module_param(gso, bool, 0444); 36 module_param(napi_tx, bool, 0644); 37 38 #define VIRTIO_OFFLOAD_MAP_MIN 46 39 #define VIRTIO_OFFLOAD_MAP_MAX 47 40 #define VIRTIO_FEATURES_MAP_MIN 65 41 #define VIRTIO_O2F_DELTA (VIRTIO_FEATURES_MAP_MIN - \ 42 VIRTIO_OFFLOAD_MAP_MIN) 43 44 static bool virtio_is_mapped_offload(unsigned int obit) 45 { 46 return obit >= VIRTIO_OFFLOAD_MAP_MIN && 47 obit <= VIRTIO_OFFLOAD_MAP_MAX; 48 } 49 50 static unsigned int virtio_offload_to_feature(unsigned int obit) 51 { 52 return virtio_is_mapped_offload(obit) ? obit + VIRTIO_O2F_DELTA : obit; 53 } 54 55 /* FIXME: MTU in config. */ 56 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) 57 #define GOOD_COPY_LEN 128 58 59 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) 60 61 /* Separating two types of XDP xmit */ 62 #define VIRTIO_XDP_TX BIT(0) 63 #define VIRTIO_XDP_REDIR BIT(1) 64 65 /* RX packet size EWMA. The average packet size is used to determine the packet 66 * buffer size when refilling RX rings. As the entire RX ring may be refilled 67 * at once, the weight is chosen so that the EWMA will be insensitive to short- 68 * term, transient changes in packet size. 69 */ 70 DECLARE_EWMA(pkt_len, 0, 64) 71 72 #define VIRTNET_DRIVER_VERSION "1.0.0" 73 74 static const unsigned long guest_offloads[] = { 75 VIRTIO_NET_F_GUEST_TSO4, 76 VIRTIO_NET_F_GUEST_TSO6, 77 VIRTIO_NET_F_GUEST_ECN, 78 VIRTIO_NET_F_GUEST_UFO, 79 VIRTIO_NET_F_GUEST_CSUM, 80 VIRTIO_NET_F_GUEST_USO4, 81 VIRTIO_NET_F_GUEST_USO6, 82 VIRTIO_NET_F_GUEST_HDRLEN, 83 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED, 84 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED, 85 }; 86 87 #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 88 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 89 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 90 (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ 91 (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ 92 (1ULL << VIRTIO_NET_F_GUEST_USO6) | \ 93 (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED) | \ 94 (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED)) 95 96 struct virtnet_stat_desc { 97 char desc[ETH_GSTRING_LEN]; 98 size_t offset; 99 size_t qstat_offset; 100 }; 101 102 struct virtnet_sq_free_stats { 103 u64 packets; 104 u64 bytes; 105 u64 napi_packets; 106 u64 napi_bytes; 107 u64 xsk; 108 }; 109 110 struct virtnet_sq_stats { 111 struct u64_stats_sync syncp; 112 u64_stats_t packets; 113 u64_stats_t bytes; 114 u64_stats_t xdp_tx; 115 u64_stats_t xdp_tx_drops; 116 u64_stats_t kicks; 117 u64_stats_t tx_timeouts; 118 u64_stats_t stop; 119 u64_stats_t wake; 120 }; 121 122 struct virtnet_rq_stats { 123 struct u64_stats_sync syncp; 124 u64_stats_t packets; 125 u64_stats_t bytes; 126 u64_stats_t drops; 127 u64_stats_t xdp_packets; 128 u64_stats_t xdp_tx; 129 u64_stats_t xdp_redirects; 130 u64_stats_t xdp_drops; 131 u64_stats_t kicks; 132 }; 133 134 #define VIRTNET_SQ_STAT(name, m) {name, offsetof(struct virtnet_sq_stats, m), -1} 135 #define VIRTNET_RQ_STAT(name, m) {name, offsetof(struct virtnet_rq_stats, m), -1} 136 137 #define VIRTNET_SQ_STAT_QSTAT(name, m) \ 138 { \ 139 name, \ 140 offsetof(struct virtnet_sq_stats, m), \ 141 offsetof(struct netdev_queue_stats_tx, m), \ 142 } 143 144 #define VIRTNET_RQ_STAT_QSTAT(name, m) \ 145 { \ 146 name, \ 147 offsetof(struct virtnet_rq_stats, m), \ 148 offsetof(struct netdev_queue_stats_rx, m), \ 149 } 150 151 static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { 152 VIRTNET_SQ_STAT("xdp_tx", xdp_tx), 153 VIRTNET_SQ_STAT("xdp_tx_drops", xdp_tx_drops), 154 VIRTNET_SQ_STAT("kicks", kicks), 155 VIRTNET_SQ_STAT("tx_timeouts", tx_timeouts), 156 }; 157 158 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { 159 VIRTNET_RQ_STAT("drops", drops), 160 VIRTNET_RQ_STAT("xdp_packets", xdp_packets), 161 VIRTNET_RQ_STAT("xdp_tx", xdp_tx), 162 VIRTNET_RQ_STAT("xdp_redirects", xdp_redirects), 163 VIRTNET_RQ_STAT("xdp_drops", xdp_drops), 164 VIRTNET_RQ_STAT("kicks", kicks), 165 }; 166 167 static const struct virtnet_stat_desc virtnet_sq_stats_desc_qstat[] = { 168 VIRTNET_SQ_STAT_QSTAT("packets", packets), 169 VIRTNET_SQ_STAT_QSTAT("bytes", bytes), 170 VIRTNET_SQ_STAT_QSTAT("stop", stop), 171 VIRTNET_SQ_STAT_QSTAT("wake", wake), 172 }; 173 174 static const struct virtnet_stat_desc virtnet_rq_stats_desc_qstat[] = { 175 VIRTNET_RQ_STAT_QSTAT("packets", packets), 176 VIRTNET_RQ_STAT_QSTAT("bytes", bytes), 177 }; 178 179 #define VIRTNET_STATS_DESC_CQ(name) \ 180 {#name, offsetof(struct virtio_net_stats_cvq, name), -1} 181 182 #define VIRTNET_STATS_DESC_RX(class, name) \ 183 {#name, offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), -1} 184 185 #define VIRTNET_STATS_DESC_TX(class, name) \ 186 {#name, offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), -1} 187 188 189 static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = { 190 VIRTNET_STATS_DESC_CQ(command_num), 191 VIRTNET_STATS_DESC_CQ(ok_num), 192 }; 193 194 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = { 195 VIRTNET_STATS_DESC_RX(basic, packets), 196 VIRTNET_STATS_DESC_RX(basic, bytes), 197 198 VIRTNET_STATS_DESC_RX(basic, notifications), 199 VIRTNET_STATS_DESC_RX(basic, interrupts), 200 }; 201 202 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = { 203 VIRTNET_STATS_DESC_TX(basic, packets), 204 VIRTNET_STATS_DESC_TX(basic, bytes), 205 206 VIRTNET_STATS_DESC_TX(basic, notifications), 207 VIRTNET_STATS_DESC_TX(basic, interrupts), 208 }; 209 210 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = { 211 VIRTNET_STATS_DESC_RX(csum, needs_csum), 212 }; 213 214 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = { 215 VIRTNET_STATS_DESC_TX(gso, gso_packets_noseg), 216 VIRTNET_STATS_DESC_TX(gso, gso_bytes_noseg), 217 }; 218 219 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = { 220 VIRTNET_STATS_DESC_RX(speed, ratelimit_bytes), 221 }; 222 223 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = { 224 VIRTNET_STATS_DESC_TX(speed, ratelimit_bytes), 225 }; 226 227 #define VIRTNET_STATS_DESC_RX_QSTAT(class, name, qstat_field) \ 228 { \ 229 #name, \ 230 offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), \ 231 offsetof(struct netdev_queue_stats_rx, qstat_field), \ 232 } 233 234 #define VIRTNET_STATS_DESC_TX_QSTAT(class, name, qstat_field) \ 235 { \ 236 #name, \ 237 offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), \ 238 offsetof(struct netdev_queue_stats_tx, qstat_field), \ 239 } 240 241 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc_qstat[] = { 242 VIRTNET_STATS_DESC_RX_QSTAT(basic, drops, hw_drops), 243 VIRTNET_STATS_DESC_RX_QSTAT(basic, drop_overruns, hw_drop_overruns), 244 }; 245 246 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc_qstat[] = { 247 VIRTNET_STATS_DESC_TX_QSTAT(basic, drops, hw_drops), 248 VIRTNET_STATS_DESC_TX_QSTAT(basic, drop_malformed, hw_drop_errors), 249 }; 250 251 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc_qstat[] = { 252 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_valid, csum_unnecessary), 253 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_none, csum_none), 254 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_bad, csum_bad), 255 }; 256 257 static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc_qstat[] = { 258 VIRTNET_STATS_DESC_TX_QSTAT(csum, csum_none, csum_none), 259 VIRTNET_STATS_DESC_TX_QSTAT(csum, needs_csum, needs_csum), 260 }; 261 262 static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc_qstat[] = { 263 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets, hw_gro_packets), 264 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes, hw_gro_bytes), 265 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets_coalesced, hw_gro_wire_packets), 266 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes_coalesced, hw_gro_wire_bytes), 267 }; 268 269 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc_qstat[] = { 270 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_packets, hw_gso_packets), 271 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_bytes, hw_gso_bytes), 272 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments, hw_gso_wire_packets), 273 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments_bytes, hw_gso_wire_bytes), 274 }; 275 276 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc_qstat[] = { 277 VIRTNET_STATS_DESC_RX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 278 }; 279 280 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc_qstat[] = { 281 VIRTNET_STATS_DESC_TX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 282 }; 283 284 #define VIRTNET_Q_TYPE_RX 0 285 #define VIRTNET_Q_TYPE_TX 1 286 #define VIRTNET_Q_TYPE_CQ 2 287 288 struct virtnet_interrupt_coalesce { 289 u32 max_packets; 290 u32 max_usecs; 291 }; 292 293 /* The dma information of pages allocated at a time. */ 294 struct virtnet_rq_dma { 295 dma_addr_t addr; 296 u32 ref; 297 u16 len; 298 u16 need_sync; 299 }; 300 301 /* Internal representation of a send virtqueue */ 302 struct send_queue { 303 /* Virtqueue associated with this send _queue */ 304 struct virtqueue *vq; 305 306 /* TX: fragments + linear part + virtio header */ 307 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 308 309 /* Name of the send queue: output.$index */ 310 char name[16]; 311 312 struct virtnet_sq_stats stats; 313 314 struct virtnet_interrupt_coalesce intr_coal; 315 316 struct napi_struct napi; 317 318 /* Record whether sq is in reset state. */ 319 bool reset; 320 321 struct xsk_buff_pool *xsk_pool; 322 323 dma_addr_t xsk_hdr_dma_addr; 324 }; 325 326 /* Internal representation of a receive virtqueue */ 327 struct receive_queue { 328 /* Virtqueue associated with this receive_queue */ 329 struct virtqueue *vq; 330 331 struct napi_struct napi; 332 333 struct bpf_prog __rcu *xdp_prog; 334 335 struct virtnet_rq_stats stats; 336 337 /* The number of rx notifications */ 338 u16 calls; 339 340 /* Is dynamic interrupt moderation enabled? */ 341 bool dim_enabled; 342 343 /* Used to protect dim_enabled and inter_coal */ 344 struct mutex dim_lock; 345 346 /* Dynamic Interrupt Moderation */ 347 struct dim dim; 348 349 u32 packets_in_napi; 350 351 struct virtnet_interrupt_coalesce intr_coal; 352 353 /* Chain pages by the private ptr. */ 354 struct page *pages; 355 356 /* Average packet length for mergeable receive buffers. */ 357 struct ewma_pkt_len mrg_avg_pkt_len; 358 359 /* Page frag for packet buffer allocation. */ 360 struct page_frag alloc_frag; 361 362 /* RX: fragments + linear part + virtio header */ 363 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 364 365 /* Min single buffer size for mergeable buffers case. */ 366 unsigned int min_buf_len; 367 368 /* Name of this receive queue: input.$index */ 369 char name[16]; 370 371 struct xdp_rxq_info xdp_rxq; 372 373 /* Record the last dma info to free after new pages is allocated. */ 374 struct virtnet_rq_dma *last_dma; 375 376 struct xsk_buff_pool *xsk_pool; 377 378 /* xdp rxq used by xsk */ 379 struct xdp_rxq_info xsk_rxq_info; 380 381 struct xdp_buff **xsk_buffs; 382 }; 383 384 #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 385 386 /* Control VQ buffers: protected by the rtnl lock */ 387 struct control_buf { 388 struct virtio_net_ctrl_hdr hdr; 389 virtio_net_ctrl_ack status; 390 }; 391 392 struct virtnet_info { 393 struct virtio_device *vdev; 394 struct virtqueue *cvq; 395 struct net_device *dev; 396 struct send_queue *sq; 397 struct receive_queue *rq; 398 unsigned int status; 399 400 /* Max # of queue pairs supported by the device */ 401 u16 max_queue_pairs; 402 403 /* # of queue pairs currently used by the driver */ 404 u16 curr_queue_pairs; 405 406 /* # of XDP queue pairs currently used by the driver */ 407 u16 xdp_queue_pairs; 408 409 /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ 410 bool xdp_enabled; 411 412 /* I like... big packets and I cannot lie! */ 413 bool big_packets; 414 415 /* number of sg entries allocated for big packets */ 416 unsigned int big_packets_num_skbfrags; 417 418 /* Host will merge rx buffers for big packets (shake it! shake it!) */ 419 bool mergeable_rx_bufs; 420 421 /* Host supports rss and/or hash report */ 422 bool has_rss; 423 bool has_rss_hash_report; 424 u8 rss_key_size; 425 u16 rss_indir_table_size; 426 u32 rss_hash_types_supported; 427 u32 rss_hash_types_saved; 428 struct virtio_net_rss_config_hdr *rss_hdr; 429 struct virtio_net_rss_config_trailer rss_trailer; 430 u8 rss_hash_key_data[VIRTIO_NET_RSS_MAX_KEY_SIZE]; 431 432 /* Has control virtqueue */ 433 bool has_cvq; 434 435 /* Lock to protect the control VQ */ 436 struct mutex cvq_lock; 437 438 /* Host can handle any s/g split between our header and packet data */ 439 bool any_header_sg; 440 441 /* Packet virtio header size */ 442 u8 hdr_len; 443 444 /* Work struct for delayed refilling if we run low on memory. */ 445 struct delayed_work refill; 446 447 /* UDP tunnel support */ 448 bool tx_tnl; 449 450 bool rx_tnl; 451 452 bool rx_tnl_csum; 453 454 /* Is delayed refill enabled? */ 455 bool refill_enabled; 456 457 /* The lock to synchronize the access to refill_enabled */ 458 spinlock_t refill_lock; 459 460 /* Work struct for config space updates */ 461 struct work_struct config_work; 462 463 /* Work struct for setting rx mode */ 464 struct work_struct rx_mode_work; 465 466 /* OK to queue work setting RX mode? */ 467 bool rx_mode_work_enabled; 468 469 /* Does the affinity hint is set for virtqueues? */ 470 bool affinity_hint_set; 471 472 /* CPU hotplug instances for online & dead */ 473 struct hlist_node node; 474 struct hlist_node node_dead; 475 476 struct control_buf *ctrl; 477 478 /* Ethtool settings */ 479 u8 duplex; 480 u32 speed; 481 482 /* Is rx dynamic interrupt moderation enabled? */ 483 bool rx_dim_enabled; 484 485 /* Interrupt coalescing settings */ 486 struct virtnet_interrupt_coalesce intr_coal_tx; 487 struct virtnet_interrupt_coalesce intr_coal_rx; 488 489 unsigned long guest_offloads; 490 unsigned long guest_offloads_capable; 491 492 /* failover when STANDBY feature enabled */ 493 struct failover *failover; 494 495 u64 device_stats_cap; 496 }; 497 498 struct padded_vnet_hdr { 499 struct virtio_net_hdr_v1_hash hdr; 500 /* 501 * hdr is in a separate sg buffer, and data sg buffer shares same page 502 * with this header sg. This padding makes next sg 16 byte aligned 503 * after the header. 504 */ 505 char padding[12]; 506 }; 507 508 struct virtio_net_common_hdr { 509 union { 510 struct virtio_net_hdr hdr; 511 struct virtio_net_hdr_mrg_rxbuf mrg_hdr; 512 struct virtio_net_hdr_v1_hash hash_v1_hdr; 513 struct virtio_net_hdr_v1_hash_tunnel tnl_hdr; 514 }; 515 }; 516 517 static struct virtio_net_common_hdr xsk_hdr; 518 519 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); 520 static void virtnet_sq_free_unused_buf_done(struct virtqueue *vq); 521 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 522 struct net_device *dev, 523 unsigned int *xdp_xmit, 524 struct virtnet_rq_stats *stats); 525 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 526 struct sk_buff *skb, u8 flags); 527 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 528 struct sk_buff *curr_skb, 529 struct page *page, void *buf, 530 int len, int truesize); 531 static void virtnet_xsk_completed(struct send_queue *sq, int num); 532 533 enum virtnet_xmit_type { 534 VIRTNET_XMIT_TYPE_SKB, 535 VIRTNET_XMIT_TYPE_SKB_ORPHAN, 536 VIRTNET_XMIT_TYPE_XDP, 537 VIRTNET_XMIT_TYPE_XSK, 538 }; 539 540 static size_t virtnet_rss_hdr_size(const struct virtnet_info *vi) 541 { 542 u16 indir_table_size = vi->has_rss ? vi->rss_indir_table_size : 1; 543 544 return struct_size(vi->rss_hdr, indirection_table, indir_table_size); 545 } 546 547 static size_t virtnet_rss_trailer_size(const struct virtnet_info *vi) 548 { 549 return struct_size(&vi->rss_trailer, hash_key_data, vi->rss_key_size); 550 } 551 552 /* We use the last two bits of the pointer to distinguish the xmit type. */ 553 #define VIRTNET_XMIT_TYPE_MASK (BIT(0) | BIT(1)) 554 555 #define VIRTIO_XSK_FLAG_OFFSET 2 556 557 static enum virtnet_xmit_type virtnet_xmit_ptr_unpack(void **ptr) 558 { 559 unsigned long p = (unsigned long)*ptr; 560 561 *ptr = (void *)(p & ~VIRTNET_XMIT_TYPE_MASK); 562 563 return p & VIRTNET_XMIT_TYPE_MASK; 564 } 565 566 static void *virtnet_xmit_ptr_pack(void *ptr, enum virtnet_xmit_type type) 567 { 568 return (void *)((unsigned long)ptr | type); 569 } 570 571 static int virtnet_add_outbuf(struct send_queue *sq, int num, void *data, 572 enum virtnet_xmit_type type) 573 { 574 return virtqueue_add_outbuf(sq->vq, sq->sg, num, 575 virtnet_xmit_ptr_pack(data, type), 576 GFP_ATOMIC); 577 } 578 579 static u32 virtnet_ptr_to_xsk_buff_len(void *ptr) 580 { 581 return ((unsigned long)ptr) >> VIRTIO_XSK_FLAG_OFFSET; 582 } 583 584 static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len) 585 { 586 sg_dma_address(sg) = addr; 587 sg_dma_len(sg) = len; 588 } 589 590 static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 591 bool in_napi, struct virtnet_sq_free_stats *stats) 592 { 593 struct xdp_frame *frame; 594 struct sk_buff *skb; 595 unsigned int len; 596 void *ptr; 597 598 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 599 switch (virtnet_xmit_ptr_unpack(&ptr)) { 600 case VIRTNET_XMIT_TYPE_SKB: 601 skb = ptr; 602 603 pr_debug("Sent skb %p\n", skb); 604 stats->napi_packets++; 605 stats->napi_bytes += skb->len; 606 napi_consume_skb(skb, in_napi); 607 break; 608 609 case VIRTNET_XMIT_TYPE_SKB_ORPHAN: 610 skb = ptr; 611 612 stats->packets++; 613 stats->bytes += skb->len; 614 napi_consume_skb(skb, in_napi); 615 break; 616 617 case VIRTNET_XMIT_TYPE_XDP: 618 frame = ptr; 619 620 stats->packets++; 621 stats->bytes += xdp_get_frame_len(frame); 622 xdp_return_frame(frame); 623 break; 624 625 case VIRTNET_XMIT_TYPE_XSK: 626 stats->bytes += virtnet_ptr_to_xsk_buff_len(ptr); 627 stats->xsk++; 628 break; 629 } 630 } 631 netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes); 632 } 633 634 static void virtnet_free_old_xmit(struct send_queue *sq, 635 struct netdev_queue *txq, 636 bool in_napi, 637 struct virtnet_sq_free_stats *stats) 638 { 639 __free_old_xmit(sq, txq, in_napi, stats); 640 641 if (stats->xsk) 642 virtnet_xsk_completed(sq, stats->xsk); 643 } 644 645 /* Converting between virtqueue no. and kernel tx/rx queue no. 646 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq 647 */ 648 static int vq2txq(struct virtqueue *vq) 649 { 650 return (vq->index - 1) / 2; 651 } 652 653 static int txq2vq(int txq) 654 { 655 return txq * 2 + 1; 656 } 657 658 static int vq2rxq(struct virtqueue *vq) 659 { 660 return vq->index / 2; 661 } 662 663 static int rxq2vq(int rxq) 664 { 665 return rxq * 2; 666 } 667 668 static int vq_type(struct virtnet_info *vi, int qid) 669 { 670 if (qid == vi->max_queue_pairs * 2) 671 return VIRTNET_Q_TYPE_CQ; 672 673 if (qid % 2) 674 return VIRTNET_Q_TYPE_TX; 675 676 return VIRTNET_Q_TYPE_RX; 677 } 678 679 static inline struct virtio_net_common_hdr * 680 skb_vnet_common_hdr(struct sk_buff *skb) 681 { 682 return (struct virtio_net_common_hdr *)skb->cb; 683 } 684 685 /* 686 * private is used to chain pages for big packets, put the whole 687 * most recent used list in the beginning for reuse 688 */ 689 static void give_pages(struct receive_queue *rq, struct page *page) 690 { 691 struct page *end; 692 693 /* Find end of list, sew whole thing into vi->rq.pages. */ 694 for (end = page; end->private; end = (struct page *)end->private); 695 end->private = (unsigned long)rq->pages; 696 rq->pages = page; 697 } 698 699 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) 700 { 701 struct page *p = rq->pages; 702 703 if (p) { 704 rq->pages = (struct page *)p->private; 705 /* clear private here, it is used to chain pages */ 706 p->private = 0; 707 } else 708 p = alloc_page(gfp_mask); 709 return p; 710 } 711 712 static void virtnet_rq_free_buf(struct virtnet_info *vi, 713 struct receive_queue *rq, void *buf) 714 { 715 if (vi->mergeable_rx_bufs) 716 put_page(virt_to_head_page(buf)); 717 else if (vi->big_packets) 718 give_pages(rq, buf); 719 else 720 put_page(virt_to_head_page(buf)); 721 } 722 723 static void enable_delayed_refill(struct virtnet_info *vi) 724 { 725 spin_lock_bh(&vi->refill_lock); 726 vi->refill_enabled = true; 727 spin_unlock_bh(&vi->refill_lock); 728 } 729 730 static void disable_delayed_refill(struct virtnet_info *vi) 731 { 732 spin_lock_bh(&vi->refill_lock); 733 vi->refill_enabled = false; 734 spin_unlock_bh(&vi->refill_lock); 735 } 736 737 static void enable_rx_mode_work(struct virtnet_info *vi) 738 { 739 rtnl_lock(); 740 vi->rx_mode_work_enabled = true; 741 rtnl_unlock(); 742 } 743 744 static void disable_rx_mode_work(struct virtnet_info *vi) 745 { 746 rtnl_lock(); 747 vi->rx_mode_work_enabled = false; 748 rtnl_unlock(); 749 } 750 751 static void virtqueue_napi_schedule(struct napi_struct *napi, 752 struct virtqueue *vq) 753 { 754 if (napi_schedule_prep(napi)) { 755 virtqueue_disable_cb(vq); 756 __napi_schedule(napi); 757 } 758 } 759 760 static bool virtqueue_napi_complete(struct napi_struct *napi, 761 struct virtqueue *vq, int processed) 762 { 763 int opaque; 764 765 opaque = virtqueue_enable_cb_prepare(vq); 766 if (napi_complete_done(napi, processed)) { 767 if (unlikely(virtqueue_poll(vq, opaque))) 768 virtqueue_napi_schedule(napi, vq); 769 else 770 return true; 771 } else { 772 virtqueue_disable_cb(vq); 773 } 774 775 return false; 776 } 777 778 static void skb_xmit_done(struct virtqueue *vq) 779 { 780 struct virtnet_info *vi = vq->vdev->priv; 781 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi; 782 783 /* Suppress further interrupts. */ 784 virtqueue_disable_cb(vq); 785 786 if (napi->weight) 787 virtqueue_napi_schedule(napi, vq); 788 else 789 /* We were probably waiting for more output buffers. */ 790 netif_wake_subqueue(vi->dev, vq2txq(vq)); 791 } 792 793 #define MRG_CTX_HEADER_SHIFT 22 794 static void *mergeable_len_to_ctx(unsigned int truesize, 795 unsigned int headroom) 796 { 797 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize); 798 } 799 800 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx) 801 { 802 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT; 803 } 804 805 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) 806 { 807 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); 808 } 809 810 static int check_mergeable_len(struct net_device *dev, void *mrg_ctx, 811 unsigned int len) 812 { 813 unsigned int headroom, tailroom, room, truesize; 814 815 truesize = mergeable_ctx_to_truesize(mrg_ctx); 816 headroom = mergeable_ctx_to_headroom(mrg_ctx); 817 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 818 room = SKB_DATA_ALIGN(headroom + tailroom); 819 820 if (len > truesize - room) { 821 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 822 dev->name, len, (unsigned long)(truesize - room)); 823 DEV_STATS_INC(dev, rx_length_errors); 824 return -1; 825 } 826 827 return 0; 828 } 829 830 static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, 831 unsigned int headroom, 832 unsigned int len) 833 { 834 struct sk_buff *skb; 835 836 skb = build_skb(buf, buflen); 837 if (unlikely(!skb)) 838 return NULL; 839 840 skb_reserve(skb, headroom); 841 skb_put(skb, len); 842 843 return skb; 844 } 845 846 /* Called from bottom half context */ 847 static struct sk_buff *page_to_skb(struct virtnet_info *vi, 848 struct receive_queue *rq, 849 struct page *page, unsigned int offset, 850 unsigned int len, unsigned int truesize, 851 unsigned int headroom) 852 { 853 struct sk_buff *skb; 854 struct virtio_net_common_hdr *hdr; 855 unsigned int copy, hdr_len, hdr_padded_len; 856 struct page *page_to_free = NULL; 857 int tailroom, shinfo_size; 858 char *p, *hdr_p, *buf; 859 860 p = page_address(page) + offset; 861 hdr_p = p; 862 863 hdr_len = vi->hdr_len; 864 if (vi->mergeable_rx_bufs) 865 hdr_padded_len = hdr_len; 866 else 867 hdr_padded_len = sizeof(struct padded_vnet_hdr); 868 869 buf = p - headroom; 870 len -= hdr_len; 871 offset += hdr_padded_len; 872 p += hdr_padded_len; 873 tailroom = truesize - headroom - hdr_padded_len - len; 874 875 shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 876 877 if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) { 878 skb = virtnet_build_skb(buf, truesize, p - buf, len); 879 if (unlikely(!skb)) 880 return NULL; 881 882 page = (struct page *)page->private; 883 if (page) 884 give_pages(rq, page); 885 goto ok; 886 } 887 888 /* copy small packet so we can reuse these pages for small data */ 889 skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); 890 if (unlikely(!skb)) 891 return NULL; 892 893 /* Copy all frame if it fits skb->head, otherwise 894 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. 895 */ 896 if (len <= skb_tailroom(skb)) 897 copy = len; 898 else 899 copy = ETH_HLEN; 900 skb_put_data(skb, p, copy); 901 902 len -= copy; 903 offset += copy; 904 905 if (vi->mergeable_rx_bufs) { 906 if (len) 907 skb_add_rx_frag(skb, 0, page, offset, len, truesize); 908 else 909 page_to_free = page; 910 goto ok; 911 } 912 913 BUG_ON(offset >= PAGE_SIZE); 914 while (len) { 915 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); 916 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, 917 frag_size, truesize); 918 len -= frag_size; 919 page = (struct page *)page->private; 920 offset = 0; 921 } 922 923 if (page) 924 give_pages(rq, page); 925 926 ok: 927 hdr = skb_vnet_common_hdr(skb); 928 memcpy(hdr, hdr_p, hdr_len); 929 if (page_to_free) 930 put_page(page_to_free); 931 932 return skb; 933 } 934 935 static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) 936 { 937 struct virtnet_info *vi = rq->vq->vdev->priv; 938 struct page *page = virt_to_head_page(buf); 939 struct virtnet_rq_dma *dma; 940 void *head; 941 int offset; 942 943 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 944 945 head = page_address(page); 946 947 dma = head; 948 949 --dma->ref; 950 951 if (dma->need_sync && len) { 952 offset = buf - (head + sizeof(*dma)); 953 954 virtqueue_map_sync_single_range_for_cpu(rq->vq, dma->addr, 955 offset, len, 956 DMA_FROM_DEVICE); 957 } 958 959 if (dma->ref) 960 return; 961 962 virtqueue_unmap_single_attrs(rq->vq, dma->addr, dma->len, 963 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 964 put_page(page); 965 } 966 967 static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) 968 { 969 struct virtnet_info *vi = rq->vq->vdev->priv; 970 void *buf; 971 972 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 973 974 buf = virtqueue_get_buf_ctx(rq->vq, len, ctx); 975 if (buf) 976 virtnet_rq_unmap(rq, buf, *len); 977 978 return buf; 979 } 980 981 static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) 982 { 983 struct virtnet_info *vi = rq->vq->vdev->priv; 984 struct virtnet_rq_dma *dma; 985 dma_addr_t addr; 986 u32 offset; 987 void *head; 988 989 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 990 991 head = page_address(rq->alloc_frag.page); 992 993 offset = buf - head; 994 995 dma = head; 996 997 addr = dma->addr - sizeof(*dma) + offset; 998 999 sg_init_table(rq->sg, 1); 1000 sg_fill_dma(rq->sg, addr, len); 1001 } 1002 1003 static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) 1004 { 1005 struct page_frag *alloc_frag = &rq->alloc_frag; 1006 struct virtnet_info *vi = rq->vq->vdev->priv; 1007 struct virtnet_rq_dma *dma; 1008 void *buf, *head; 1009 dma_addr_t addr; 1010 1011 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 1012 1013 head = page_address(alloc_frag->page); 1014 1015 dma = head; 1016 1017 /* new pages */ 1018 if (!alloc_frag->offset) { 1019 if (rq->last_dma) { 1020 /* Now, the new page is allocated, the last dma 1021 * will not be used. So the dma can be unmapped 1022 * if the ref is 0. 1023 */ 1024 virtnet_rq_unmap(rq, rq->last_dma, 0); 1025 rq->last_dma = NULL; 1026 } 1027 1028 dma->len = alloc_frag->size - sizeof(*dma); 1029 1030 addr = virtqueue_map_single_attrs(rq->vq, dma + 1, 1031 dma->len, DMA_FROM_DEVICE, 0); 1032 if (virtqueue_map_mapping_error(rq->vq, addr)) 1033 return NULL; 1034 1035 dma->addr = addr; 1036 dma->need_sync = virtqueue_map_need_sync(rq->vq, addr); 1037 1038 /* Add a reference to dma to prevent the entire dma from 1039 * being released during error handling. This reference 1040 * will be freed after the pages are no longer used. 1041 */ 1042 get_page(alloc_frag->page); 1043 dma->ref = 1; 1044 alloc_frag->offset = sizeof(*dma); 1045 1046 rq->last_dma = dma; 1047 } 1048 1049 ++dma->ref; 1050 1051 buf = head + alloc_frag->offset; 1052 1053 get_page(alloc_frag->page); 1054 alloc_frag->offset += size; 1055 1056 return buf; 1057 } 1058 1059 static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) 1060 { 1061 struct virtnet_info *vi = vq->vdev->priv; 1062 struct receive_queue *rq; 1063 int i = vq2rxq(vq); 1064 1065 rq = &vi->rq[i]; 1066 1067 if (rq->xsk_pool) { 1068 xsk_buff_free((struct xdp_buff *)buf); 1069 return; 1070 } 1071 1072 if (!vi->big_packets || vi->mergeable_rx_bufs) 1073 virtnet_rq_unmap(rq, buf, 0); 1074 1075 virtnet_rq_free_buf(vi, rq, buf); 1076 } 1077 1078 static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 1079 bool in_napi) 1080 { 1081 struct virtnet_sq_free_stats stats = {0}; 1082 1083 virtnet_free_old_xmit(sq, txq, in_napi, &stats); 1084 1085 /* Avoid overhead when no packets have been processed 1086 * happens when called speculatively from start_xmit. 1087 */ 1088 if (!stats.packets && !stats.napi_packets) 1089 return; 1090 1091 u64_stats_update_begin(&sq->stats.syncp); 1092 u64_stats_add(&sq->stats.bytes, stats.bytes + stats.napi_bytes); 1093 u64_stats_add(&sq->stats.packets, stats.packets + stats.napi_packets); 1094 u64_stats_update_end(&sq->stats.syncp); 1095 } 1096 1097 static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) 1098 { 1099 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) 1100 return false; 1101 else if (q < vi->curr_queue_pairs) 1102 return true; 1103 else 1104 return false; 1105 } 1106 1107 static bool tx_may_stop(struct virtnet_info *vi, 1108 struct net_device *dev, 1109 struct send_queue *sq) 1110 { 1111 int qnum; 1112 1113 qnum = sq - vi->sq; 1114 1115 /* If running out of space, stop queue to avoid getting packets that we 1116 * are then unable to transmit. 1117 * An alternative would be to force queuing layer to requeue the skb by 1118 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be 1119 * returned in a normal path of operation: it means that driver is not 1120 * maintaining the TX queue stop/start state properly, and causes 1121 * the stack to do a non-trivial amount of useless work. 1122 * Since most packets only take 1 or 2 ring slots, stopping the queue 1123 * early means 16 slots are typically wasted. 1124 */ 1125 if (sq->vq->num_free < MAX_SKB_FRAGS + 2) { 1126 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1127 1128 netif_tx_stop_queue(txq); 1129 u64_stats_update_begin(&sq->stats.syncp); 1130 u64_stats_inc(&sq->stats.stop); 1131 u64_stats_update_end(&sq->stats.syncp); 1132 1133 return true; 1134 } 1135 1136 return false; 1137 } 1138 1139 static void check_sq_full_and_disable(struct virtnet_info *vi, 1140 struct net_device *dev, 1141 struct send_queue *sq) 1142 { 1143 bool use_napi = sq->napi.weight; 1144 int qnum; 1145 1146 qnum = sq - vi->sq; 1147 1148 if (tx_may_stop(vi, dev, sq)) { 1149 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1150 1151 if (use_napi) { 1152 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 1153 virtqueue_napi_schedule(&sq->napi, sq->vq); 1154 } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { 1155 /* More just got used, free them then recheck. */ 1156 free_old_xmit(sq, txq, false); 1157 if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { 1158 netif_start_subqueue(dev, qnum); 1159 u64_stats_update_begin(&sq->stats.syncp); 1160 u64_stats_inc(&sq->stats.wake); 1161 u64_stats_update_end(&sq->stats.syncp); 1162 virtqueue_disable_cb(sq->vq); 1163 } 1164 } 1165 } 1166 } 1167 1168 /* Note that @len is the length of received data without virtio header */ 1169 static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi, 1170 struct receive_queue *rq, void *buf, 1171 u32 len, bool first_buf) 1172 { 1173 struct xdp_buff *xdp; 1174 u32 bufsize; 1175 1176 xdp = (struct xdp_buff *)buf; 1177 1178 /* In virtnet_add_recvbuf_xsk, we use part of XDP_PACKET_HEADROOM for 1179 * virtio header and ask the vhost to fill data from 1180 * hard_start + XDP_PACKET_HEADROOM - vi->hdr_len 1181 * The first buffer has virtio header so the remaining region for frame 1182 * data is 1183 * xsk_pool_get_rx_frame_size() 1184 * While other buffers than the first one do not have virtio header, so 1185 * the maximum frame data's length can be 1186 * xsk_pool_get_rx_frame_size() + vi->hdr_len 1187 */ 1188 bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool); 1189 if (!first_buf) 1190 bufsize += vi->hdr_len; 1191 1192 if (unlikely(len > bufsize)) { 1193 pr_debug("%s: rx error: len %u exceeds truesize %u\n", 1194 vi->dev->name, len, bufsize); 1195 DEV_STATS_INC(vi->dev, rx_length_errors); 1196 xsk_buff_free(xdp); 1197 return NULL; 1198 } 1199 1200 if (first_buf) { 1201 xsk_buff_set_size(xdp, len); 1202 } else { 1203 xdp_prepare_buff(xdp, xdp->data_hard_start, 1204 XDP_PACKET_HEADROOM - vi->hdr_len, len, 1); 1205 xdp->flags = 0; 1206 } 1207 1208 xsk_buff_dma_sync_for_cpu(xdp); 1209 1210 return xdp; 1211 } 1212 1213 static struct sk_buff *xsk_construct_skb(struct receive_queue *rq, 1214 struct xdp_buff *xdp) 1215 { 1216 unsigned int metasize = xdp->data - xdp->data_meta; 1217 struct sk_buff *skb; 1218 unsigned int size; 1219 1220 size = xdp->data_end - xdp->data_hard_start; 1221 skb = napi_alloc_skb(&rq->napi, size); 1222 if (unlikely(!skb)) { 1223 xsk_buff_free(xdp); 1224 return NULL; 1225 } 1226 1227 skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); 1228 1229 size = xdp->data_end - xdp->data_meta; 1230 memcpy(__skb_put(skb, size), xdp->data_meta, size); 1231 1232 if (metasize) { 1233 __skb_pull(skb, metasize); 1234 skb_metadata_set(skb, metasize); 1235 } 1236 1237 xsk_buff_free(xdp); 1238 1239 return skb; 1240 } 1241 1242 static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi, 1243 struct receive_queue *rq, struct xdp_buff *xdp, 1244 unsigned int *xdp_xmit, 1245 struct virtnet_rq_stats *stats) 1246 { 1247 struct bpf_prog *prog; 1248 u32 ret; 1249 1250 ret = XDP_PASS; 1251 rcu_read_lock(); 1252 prog = rcu_dereference(rq->xdp_prog); 1253 if (prog) 1254 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); 1255 rcu_read_unlock(); 1256 1257 switch (ret) { 1258 case XDP_PASS: 1259 return xsk_construct_skb(rq, xdp); 1260 1261 case XDP_TX: 1262 case XDP_REDIRECT: 1263 return NULL; 1264 1265 default: 1266 /* drop packet */ 1267 xsk_buff_free(xdp); 1268 u64_stats_inc(&stats->drops); 1269 return NULL; 1270 } 1271 } 1272 1273 static void xsk_drop_follow_bufs(struct net_device *dev, 1274 struct receive_queue *rq, 1275 u32 num_buf, 1276 struct virtnet_rq_stats *stats) 1277 { 1278 struct xdp_buff *xdp; 1279 u32 len; 1280 1281 while (num_buf-- > 1) { 1282 xdp = virtqueue_get_buf(rq->vq, &len); 1283 if (unlikely(!xdp)) { 1284 pr_debug("%s: rx error: %d buffers missing\n", 1285 dev->name, num_buf); 1286 DEV_STATS_INC(dev, rx_length_errors); 1287 break; 1288 } 1289 u64_stats_add(&stats->bytes, len); 1290 xsk_buff_free(xdp); 1291 } 1292 } 1293 1294 static int xsk_append_merge_buffer(struct virtnet_info *vi, 1295 struct receive_queue *rq, 1296 struct sk_buff *head_skb, 1297 u32 num_buf, 1298 struct virtio_net_hdr_mrg_rxbuf *hdr, 1299 struct virtnet_rq_stats *stats) 1300 { 1301 struct sk_buff *curr_skb; 1302 struct xdp_buff *xdp; 1303 u32 len, truesize; 1304 struct page *page; 1305 void *buf; 1306 1307 curr_skb = head_skb; 1308 1309 while (--num_buf) { 1310 buf = virtqueue_get_buf(rq->vq, &len); 1311 if (unlikely(!buf)) { 1312 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1313 vi->dev->name, num_buf, 1314 virtio16_to_cpu(vi->vdev, 1315 hdr->num_buffers)); 1316 DEV_STATS_INC(vi->dev, rx_length_errors); 1317 return -EINVAL; 1318 } 1319 1320 u64_stats_add(&stats->bytes, len); 1321 1322 xdp = buf_to_xdp(vi, rq, buf, len, false); 1323 if (!xdp) 1324 goto err; 1325 1326 buf = napi_alloc_frag(len); 1327 if (!buf) { 1328 xsk_buff_free(xdp); 1329 goto err; 1330 } 1331 1332 memcpy(buf, xdp->data, len); 1333 1334 xsk_buff_free(xdp); 1335 1336 page = virt_to_page(buf); 1337 1338 truesize = len; 1339 1340 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 1341 buf, len, truesize); 1342 if (!curr_skb) { 1343 put_page(page); 1344 goto err; 1345 } 1346 } 1347 1348 return 0; 1349 1350 err: 1351 xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats); 1352 return -EINVAL; 1353 } 1354 1355 static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi, 1356 struct receive_queue *rq, struct xdp_buff *xdp, 1357 unsigned int *xdp_xmit, 1358 struct virtnet_rq_stats *stats) 1359 { 1360 struct virtio_net_hdr_mrg_rxbuf *hdr; 1361 struct bpf_prog *prog; 1362 struct sk_buff *skb; 1363 u32 ret, num_buf; 1364 1365 hdr = xdp->data - vi->hdr_len; 1366 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 1367 1368 ret = XDP_PASS; 1369 rcu_read_lock(); 1370 prog = rcu_dereference(rq->xdp_prog); 1371 if (prog) { 1372 /* TODO: support multi buffer. */ 1373 if (num_buf == 1) 1374 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, 1375 stats); 1376 else 1377 ret = XDP_ABORTED; 1378 } 1379 rcu_read_unlock(); 1380 1381 switch (ret) { 1382 case XDP_PASS: 1383 skb = xsk_construct_skb(rq, xdp); 1384 if (!skb) 1385 goto drop_bufs; 1386 1387 if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) { 1388 dev_kfree_skb(skb); 1389 goto drop; 1390 } 1391 1392 return skb; 1393 1394 case XDP_TX: 1395 case XDP_REDIRECT: 1396 return NULL; 1397 1398 default: 1399 /* drop packet */ 1400 xsk_buff_free(xdp); 1401 } 1402 1403 drop_bufs: 1404 xsk_drop_follow_bufs(dev, rq, num_buf, stats); 1405 1406 drop: 1407 u64_stats_inc(&stats->drops); 1408 return NULL; 1409 } 1410 1411 static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq, 1412 void *buf, u32 len, 1413 unsigned int *xdp_xmit, 1414 struct virtnet_rq_stats *stats) 1415 { 1416 struct net_device *dev = vi->dev; 1417 struct sk_buff *skb = NULL; 1418 struct xdp_buff *xdp; 1419 u8 flags; 1420 1421 len -= vi->hdr_len; 1422 1423 u64_stats_add(&stats->bytes, len); 1424 1425 xdp = buf_to_xdp(vi, rq, buf, len, true); 1426 if (!xdp) 1427 return; 1428 1429 if (unlikely(len < ETH_HLEN)) { 1430 pr_debug("%s: short packet %i\n", dev->name, len); 1431 DEV_STATS_INC(dev, rx_length_errors); 1432 xsk_buff_free(xdp); 1433 return; 1434 } 1435 1436 flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags; 1437 1438 if (!vi->mergeable_rx_bufs) 1439 skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats); 1440 else 1441 skb = virtnet_receive_xsk_merge(dev, vi, rq, xdp, xdp_xmit, stats); 1442 1443 if (skb) 1444 virtnet_receive_done(vi, rq, skb, flags); 1445 } 1446 1447 static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue *rq, 1448 struct xsk_buff_pool *pool, gfp_t gfp) 1449 { 1450 struct xdp_buff **xsk_buffs; 1451 dma_addr_t addr; 1452 int err = 0; 1453 u32 len, i; 1454 int num; 1455 1456 xsk_buffs = rq->xsk_buffs; 1457 1458 num = xsk_buff_alloc_batch(pool, xsk_buffs, rq->vq->num_free); 1459 if (!num) 1460 return -ENOMEM; 1461 1462 len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len; 1463 1464 for (i = 0; i < num; ++i) { 1465 /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space. 1466 * We assume XDP_PACKET_HEADROOM is larger than hdr->len. 1467 * (see function virtnet_xsk_pool_enable) 1468 */ 1469 addr = xsk_buff_xdp_get_dma(xsk_buffs[i]) - vi->hdr_len; 1470 1471 sg_init_table(rq->sg, 1); 1472 sg_fill_dma(rq->sg, addr, len); 1473 1474 err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, 1475 xsk_buffs[i], NULL, gfp); 1476 if (err) 1477 goto err; 1478 } 1479 1480 return num; 1481 1482 err: 1483 for (; i < num; ++i) 1484 xsk_buff_free(xsk_buffs[i]); 1485 1486 return err; 1487 } 1488 1489 static void *virtnet_xsk_to_ptr(u32 len) 1490 { 1491 unsigned long p; 1492 1493 p = len << VIRTIO_XSK_FLAG_OFFSET; 1494 1495 return virtnet_xmit_ptr_pack((void *)p, VIRTNET_XMIT_TYPE_XSK); 1496 } 1497 1498 static int virtnet_xsk_xmit_one(struct send_queue *sq, 1499 struct xsk_buff_pool *pool, 1500 struct xdp_desc *desc) 1501 { 1502 struct virtnet_info *vi; 1503 dma_addr_t addr; 1504 1505 vi = sq->vq->vdev->priv; 1506 1507 addr = xsk_buff_raw_get_dma(pool, desc->addr); 1508 xsk_buff_raw_dma_sync_for_device(pool, addr, desc->len); 1509 1510 sg_init_table(sq->sg, 2); 1511 sg_fill_dma(sq->sg, sq->xsk_hdr_dma_addr, vi->hdr_len); 1512 sg_fill_dma(sq->sg + 1, addr, desc->len); 1513 1514 return virtqueue_add_outbuf_premapped(sq->vq, sq->sg, 2, 1515 virtnet_xsk_to_ptr(desc->len), 1516 GFP_ATOMIC); 1517 } 1518 1519 static int virtnet_xsk_xmit_batch(struct send_queue *sq, 1520 struct xsk_buff_pool *pool, 1521 unsigned int budget, 1522 u64 *kicks) 1523 { 1524 struct xdp_desc *descs = pool->tx_descs; 1525 bool kick = false; 1526 u32 nb_pkts, i; 1527 int err; 1528 1529 budget = min_t(u32, budget, sq->vq->num_free); 1530 1531 nb_pkts = xsk_tx_peek_release_desc_batch(pool, budget); 1532 if (!nb_pkts) 1533 return 0; 1534 1535 for (i = 0; i < nb_pkts; i++) { 1536 err = virtnet_xsk_xmit_one(sq, pool, &descs[i]); 1537 if (unlikely(err)) { 1538 xsk_tx_completed(sq->xsk_pool, nb_pkts - i); 1539 break; 1540 } 1541 1542 kick = true; 1543 } 1544 1545 if (kick && virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 1546 (*kicks)++; 1547 1548 return i; 1549 } 1550 1551 static bool virtnet_xsk_xmit(struct send_queue *sq, struct xsk_buff_pool *pool, 1552 int budget) 1553 { 1554 struct virtnet_info *vi = sq->vq->vdev->priv; 1555 struct virtnet_sq_free_stats stats = {}; 1556 struct net_device *dev = vi->dev; 1557 u64 kicks = 0; 1558 int sent; 1559 1560 /* Avoid to wakeup napi meanless, so call __free_old_xmit instead of 1561 * free_old_xmit(). 1562 */ 1563 __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), true, &stats); 1564 1565 if (stats.xsk) 1566 xsk_tx_completed(sq->xsk_pool, stats.xsk); 1567 1568 sent = virtnet_xsk_xmit_batch(sq, pool, budget, &kicks); 1569 1570 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 1571 check_sq_full_and_disable(vi, vi->dev, sq); 1572 1573 if (sent) { 1574 struct netdev_queue *txq; 1575 1576 txq = netdev_get_tx_queue(vi->dev, sq - vi->sq); 1577 txq_trans_cond_update(txq); 1578 } 1579 1580 u64_stats_update_begin(&sq->stats.syncp); 1581 u64_stats_add(&sq->stats.packets, stats.packets); 1582 u64_stats_add(&sq->stats.bytes, stats.bytes); 1583 u64_stats_add(&sq->stats.kicks, kicks); 1584 u64_stats_add(&sq->stats.xdp_tx, sent); 1585 u64_stats_update_end(&sq->stats.syncp); 1586 1587 if (xsk_uses_need_wakeup(pool)) 1588 xsk_set_tx_need_wakeup(pool); 1589 1590 return sent; 1591 } 1592 1593 static void xsk_wakeup(struct send_queue *sq) 1594 { 1595 if (napi_if_scheduled_mark_missed(&sq->napi)) 1596 return; 1597 1598 local_bh_disable(); 1599 virtqueue_napi_schedule(&sq->napi, sq->vq); 1600 local_bh_enable(); 1601 } 1602 1603 static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag) 1604 { 1605 struct virtnet_info *vi = netdev_priv(dev); 1606 struct send_queue *sq; 1607 1608 if (!netif_running(dev)) 1609 return -ENETDOWN; 1610 1611 if (qid >= vi->curr_queue_pairs) 1612 return -EINVAL; 1613 1614 sq = &vi->sq[qid]; 1615 1616 xsk_wakeup(sq); 1617 return 0; 1618 } 1619 1620 static void virtnet_xsk_completed(struct send_queue *sq, int num) 1621 { 1622 xsk_tx_completed(sq->xsk_pool, num); 1623 1624 /* If this is called by rx poll, start_xmit and xdp xmit we should 1625 * wakeup the tx napi to consume the xsk tx queue, because the tx 1626 * interrupt may not be triggered. 1627 */ 1628 xsk_wakeup(sq); 1629 } 1630 1631 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, 1632 struct send_queue *sq, 1633 struct xdp_frame *xdpf) 1634 { 1635 struct virtio_net_hdr_mrg_rxbuf *hdr; 1636 struct skb_shared_info *shinfo; 1637 u8 nr_frags = 0; 1638 int err, i; 1639 1640 if (unlikely(xdpf->headroom < vi->hdr_len)) 1641 return -EOVERFLOW; 1642 1643 if (unlikely(xdp_frame_has_frags(xdpf))) { 1644 shinfo = xdp_get_shared_info_from_frame(xdpf); 1645 nr_frags = shinfo->nr_frags; 1646 } 1647 1648 /* In wrapping function virtnet_xdp_xmit(), we need to free 1649 * up the pending old buffers, where we need to calculate the 1650 * position of skb_shared_info in xdp_get_frame_len() and 1651 * xdp_return_frame(), which will involve to xdpf->data and 1652 * xdpf->headroom. Therefore, we need to update the value of 1653 * headroom synchronously here. 1654 */ 1655 xdpf->headroom -= vi->hdr_len; 1656 xdpf->data -= vi->hdr_len; 1657 /* Zero header and leave csum up to XDP layers */ 1658 hdr = xdpf->data; 1659 memset(hdr, 0, vi->hdr_len); 1660 xdpf->len += vi->hdr_len; 1661 1662 sg_init_table(sq->sg, nr_frags + 1); 1663 sg_set_buf(sq->sg, xdpf->data, xdpf->len); 1664 for (i = 0; i < nr_frags; i++) { 1665 skb_frag_t *frag = &shinfo->frags[i]; 1666 1667 sg_set_page(&sq->sg[i + 1], skb_frag_page(frag), 1668 skb_frag_size(frag), skb_frag_off(frag)); 1669 } 1670 1671 err = virtnet_add_outbuf(sq, nr_frags + 1, xdpf, VIRTNET_XMIT_TYPE_XDP); 1672 if (unlikely(err)) 1673 return -ENOSPC; /* Caller handle free/refcnt */ 1674 1675 return 0; 1676 } 1677 1678 /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on 1679 * the current cpu, so it does not need to be locked. 1680 * 1681 * Here we use marco instead of inline functions because we have to deal with 1682 * three issues at the same time: 1. the choice of sq. 2. judge and execute the 1683 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline 1684 * functions to perfectly solve these three problems at the same time. 1685 */ 1686 #define virtnet_xdp_get_sq(vi) ({ \ 1687 int cpu = smp_processor_id(); \ 1688 struct netdev_queue *txq; \ 1689 typeof(vi) v = (vi); \ 1690 unsigned int qp; \ 1691 \ 1692 if (v->curr_queue_pairs > nr_cpu_ids) { \ 1693 qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ 1694 qp += cpu; \ 1695 txq = netdev_get_tx_queue(v->dev, qp); \ 1696 __netif_tx_acquire(txq); \ 1697 } else { \ 1698 qp = cpu % v->curr_queue_pairs; \ 1699 txq = netdev_get_tx_queue(v->dev, qp); \ 1700 __netif_tx_lock(txq, cpu); \ 1701 } \ 1702 v->sq + qp; \ 1703 }) 1704 1705 #define virtnet_xdp_put_sq(vi, q) { \ 1706 struct netdev_queue *txq; \ 1707 typeof(vi) v = (vi); \ 1708 \ 1709 txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ 1710 if (v->curr_queue_pairs > nr_cpu_ids) \ 1711 __netif_tx_release(txq); \ 1712 else \ 1713 __netif_tx_unlock(txq); \ 1714 } 1715 1716 static int virtnet_xdp_xmit(struct net_device *dev, 1717 int n, struct xdp_frame **frames, u32 flags) 1718 { 1719 struct virtnet_info *vi = netdev_priv(dev); 1720 struct virtnet_sq_free_stats stats = {0}; 1721 struct receive_queue *rq = vi->rq; 1722 struct bpf_prog *xdp_prog; 1723 struct send_queue *sq; 1724 int nxmit = 0; 1725 int kicks = 0; 1726 int ret; 1727 int i; 1728 1729 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this 1730 * indicate XDP resources have been successfully allocated. 1731 */ 1732 xdp_prog = rcu_access_pointer(rq->xdp_prog); 1733 if (!xdp_prog) 1734 return -ENXIO; 1735 1736 sq = virtnet_xdp_get_sq(vi); 1737 1738 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { 1739 ret = -EINVAL; 1740 goto out; 1741 } 1742 1743 /* Free up any pending old buffers before queueing new ones. */ 1744 virtnet_free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), 1745 false, &stats); 1746 1747 for (i = 0; i < n; i++) { 1748 struct xdp_frame *xdpf = frames[i]; 1749 1750 if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) 1751 break; 1752 nxmit++; 1753 } 1754 ret = nxmit; 1755 1756 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 1757 check_sq_full_and_disable(vi, dev, sq); 1758 1759 if (flags & XDP_XMIT_FLUSH) { 1760 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 1761 kicks = 1; 1762 } 1763 out: 1764 u64_stats_update_begin(&sq->stats.syncp); 1765 u64_stats_add(&sq->stats.bytes, stats.bytes); 1766 u64_stats_add(&sq->stats.packets, stats.packets); 1767 u64_stats_add(&sq->stats.xdp_tx, n); 1768 u64_stats_add(&sq->stats.xdp_tx_drops, n - nxmit); 1769 u64_stats_add(&sq->stats.kicks, kicks); 1770 u64_stats_update_end(&sq->stats.syncp); 1771 1772 virtnet_xdp_put_sq(vi, sq); 1773 return ret; 1774 } 1775 1776 static void put_xdp_frags(struct xdp_buff *xdp) 1777 { 1778 struct skb_shared_info *shinfo; 1779 struct page *xdp_page; 1780 int i; 1781 1782 if (xdp_buff_has_frags(xdp)) { 1783 shinfo = xdp_get_shared_info_from_buff(xdp); 1784 for (i = 0; i < shinfo->nr_frags; i++) { 1785 xdp_page = skb_frag_page(&shinfo->frags[i]); 1786 put_page(xdp_page); 1787 } 1788 } 1789 } 1790 1791 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 1792 struct net_device *dev, 1793 unsigned int *xdp_xmit, 1794 struct virtnet_rq_stats *stats) 1795 { 1796 struct xdp_frame *xdpf; 1797 int err; 1798 u32 act; 1799 1800 act = bpf_prog_run_xdp(xdp_prog, xdp); 1801 u64_stats_inc(&stats->xdp_packets); 1802 1803 switch (act) { 1804 case XDP_PASS: 1805 return act; 1806 1807 case XDP_TX: 1808 u64_stats_inc(&stats->xdp_tx); 1809 xdpf = xdp_convert_buff_to_frame(xdp); 1810 if (unlikely(!xdpf)) { 1811 netdev_dbg(dev, "convert buff to frame failed for xdp\n"); 1812 return XDP_DROP; 1813 } 1814 1815 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); 1816 if (unlikely(!err)) { 1817 xdp_return_frame_rx_napi(xdpf); 1818 } else if (unlikely(err < 0)) { 1819 trace_xdp_exception(dev, xdp_prog, act); 1820 return XDP_DROP; 1821 } 1822 *xdp_xmit |= VIRTIO_XDP_TX; 1823 return act; 1824 1825 case XDP_REDIRECT: 1826 u64_stats_inc(&stats->xdp_redirects); 1827 err = xdp_do_redirect(dev, xdp, xdp_prog); 1828 if (err) 1829 return XDP_DROP; 1830 1831 *xdp_xmit |= VIRTIO_XDP_REDIR; 1832 return act; 1833 1834 default: 1835 bpf_warn_invalid_xdp_action(dev, xdp_prog, act); 1836 fallthrough; 1837 case XDP_ABORTED: 1838 trace_xdp_exception(dev, xdp_prog, act); 1839 fallthrough; 1840 case XDP_DROP: 1841 return XDP_DROP; 1842 } 1843 } 1844 1845 static unsigned int virtnet_get_headroom(struct virtnet_info *vi) 1846 { 1847 return vi->xdp_enabled ? XDP_PACKET_HEADROOM : 0; 1848 } 1849 1850 /* We copy the packet for XDP in the following cases: 1851 * 1852 * 1) Packet is scattered across multiple rx buffers. 1853 * 2) Headroom space is insufficient. 1854 * 1855 * This is inefficient but it's a temporary condition that 1856 * we hit right after XDP is enabled and until queue is refilled 1857 * with large buffers with sufficient headroom - so it should affect 1858 * at most queue size packets. 1859 * Afterwards, the conditions to enable 1860 * XDP should preclude the underlying device from sending packets 1861 * across multiple buffers (num_buf > 1), and we make sure buffers 1862 * have enough headroom. 1863 */ 1864 static struct page *xdp_linearize_page(struct net_device *dev, 1865 struct receive_queue *rq, 1866 int *num_buf, 1867 struct page *p, 1868 int offset, 1869 int page_off, 1870 unsigned int *len) 1871 { 1872 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1873 struct page *page; 1874 1875 if (page_off + *len + tailroom > PAGE_SIZE) 1876 return NULL; 1877 1878 page = alloc_page(GFP_ATOMIC); 1879 if (!page) 1880 return NULL; 1881 1882 memcpy(page_address(page) + page_off, page_address(p) + offset, *len); 1883 page_off += *len; 1884 1885 /* Only mergeable mode can go inside this while loop. In small mode, 1886 * *num_buf == 1, so it cannot go inside. 1887 */ 1888 while (--*num_buf) { 1889 unsigned int buflen; 1890 void *buf; 1891 void *ctx; 1892 int off; 1893 1894 buf = virtnet_rq_get_buf(rq, &buflen, &ctx); 1895 if (unlikely(!buf)) 1896 goto err_buf; 1897 1898 p = virt_to_head_page(buf); 1899 off = buf - page_address(p); 1900 1901 if (check_mergeable_len(dev, ctx, buflen)) { 1902 put_page(p); 1903 goto err_buf; 1904 } 1905 1906 /* guard against a misconfigured or uncooperative backend that 1907 * is sending packet larger than the MTU. 1908 */ 1909 if ((page_off + buflen + tailroom) > PAGE_SIZE) { 1910 put_page(p); 1911 goto err_buf; 1912 } 1913 1914 memcpy(page_address(page) + page_off, 1915 page_address(p) + off, buflen); 1916 page_off += buflen; 1917 put_page(p); 1918 } 1919 1920 /* Headroom does not contribute to packet length */ 1921 *len = page_off - XDP_PACKET_HEADROOM; 1922 return page; 1923 err_buf: 1924 __free_pages(page, 0); 1925 return NULL; 1926 } 1927 1928 static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi, 1929 unsigned int xdp_headroom, 1930 void *buf, 1931 unsigned int len) 1932 { 1933 unsigned int header_offset; 1934 unsigned int headroom; 1935 unsigned int buflen; 1936 struct sk_buff *skb; 1937 1938 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1939 headroom = vi->hdr_len + header_offset; 1940 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1941 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1942 1943 skb = virtnet_build_skb(buf, buflen, headroom, len); 1944 if (unlikely(!skb)) 1945 return NULL; 1946 1947 buf += header_offset; 1948 memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len); 1949 1950 return skb; 1951 } 1952 1953 static struct sk_buff *receive_small_xdp(struct net_device *dev, 1954 struct virtnet_info *vi, 1955 struct receive_queue *rq, 1956 struct bpf_prog *xdp_prog, 1957 void *buf, 1958 unsigned int xdp_headroom, 1959 unsigned int len, 1960 unsigned int *xdp_xmit, 1961 struct virtnet_rq_stats *stats) 1962 { 1963 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom; 1964 unsigned int headroom = vi->hdr_len + header_offset; 1965 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; 1966 struct page *page = virt_to_head_page(buf); 1967 struct page *xdp_page; 1968 unsigned int buflen; 1969 struct xdp_buff xdp; 1970 struct sk_buff *skb; 1971 unsigned int metasize = 0; 1972 u32 act; 1973 1974 if (unlikely(hdr->hdr.gso_type)) 1975 goto err_xdp; 1976 1977 /* Partially checksummed packets must be dropped. */ 1978 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 1979 goto err_xdp; 1980 1981 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1982 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1983 1984 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { 1985 int offset = buf - page_address(page) + header_offset; 1986 unsigned int tlen = len + vi->hdr_len; 1987 int num_buf = 1; 1988 1989 xdp_headroom = virtnet_get_headroom(vi); 1990 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1991 headroom = vi->hdr_len + header_offset; 1992 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1993 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1994 xdp_page = xdp_linearize_page(dev, rq, &num_buf, page, 1995 offset, header_offset, 1996 &tlen); 1997 if (!xdp_page) 1998 goto err_xdp; 1999 2000 buf = page_address(xdp_page); 2001 put_page(page); 2002 page = xdp_page; 2003 } 2004 2005 xdp_init_buff(&xdp, buflen, &rq->xdp_rxq); 2006 xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len, 2007 xdp_headroom, len, true); 2008 2009 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 2010 2011 switch (act) { 2012 case XDP_PASS: 2013 /* Recalculate length in case bpf program changed it */ 2014 len = xdp.data_end - xdp.data; 2015 metasize = xdp.data - xdp.data_meta; 2016 break; 2017 2018 case XDP_TX: 2019 case XDP_REDIRECT: 2020 goto xdp_xmit; 2021 2022 default: 2023 goto err_xdp; 2024 } 2025 2026 skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len); 2027 if (unlikely(!skb)) 2028 goto err; 2029 2030 if (metasize) 2031 skb_metadata_set(skb, metasize); 2032 2033 return skb; 2034 2035 err_xdp: 2036 u64_stats_inc(&stats->xdp_drops); 2037 err: 2038 u64_stats_inc(&stats->drops); 2039 put_page(page); 2040 xdp_xmit: 2041 return NULL; 2042 } 2043 2044 static struct sk_buff *receive_small(struct net_device *dev, 2045 struct virtnet_info *vi, 2046 struct receive_queue *rq, 2047 void *buf, void *ctx, 2048 unsigned int len, 2049 unsigned int *xdp_xmit, 2050 struct virtnet_rq_stats *stats) 2051 { 2052 unsigned int xdp_headroom = (unsigned long)ctx; 2053 struct page *page = virt_to_head_page(buf); 2054 struct sk_buff *skb; 2055 2056 /* We passed the address of virtnet header to virtio-core, 2057 * so truncate the padding. 2058 */ 2059 buf -= VIRTNET_RX_PAD + xdp_headroom; 2060 2061 len -= vi->hdr_len; 2062 u64_stats_add(&stats->bytes, len); 2063 2064 if (unlikely(len > GOOD_PACKET_LEN)) { 2065 pr_debug("%s: rx error: len %u exceeds max size %d\n", 2066 dev->name, len, GOOD_PACKET_LEN); 2067 DEV_STATS_INC(dev, rx_length_errors); 2068 goto err; 2069 } 2070 2071 if (unlikely(vi->xdp_enabled)) { 2072 struct bpf_prog *xdp_prog; 2073 2074 rcu_read_lock(); 2075 xdp_prog = rcu_dereference(rq->xdp_prog); 2076 if (xdp_prog) { 2077 skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf, 2078 xdp_headroom, len, xdp_xmit, 2079 stats); 2080 rcu_read_unlock(); 2081 return skb; 2082 } 2083 rcu_read_unlock(); 2084 } 2085 2086 skb = receive_small_build_skb(vi, xdp_headroom, buf, len); 2087 if (likely(skb)) 2088 return skb; 2089 2090 err: 2091 u64_stats_inc(&stats->drops); 2092 put_page(page); 2093 return NULL; 2094 } 2095 2096 static struct sk_buff *receive_big(struct net_device *dev, 2097 struct virtnet_info *vi, 2098 struct receive_queue *rq, 2099 void *buf, 2100 unsigned int len, 2101 struct virtnet_rq_stats *stats) 2102 { 2103 struct page *page = buf; 2104 struct sk_buff *skb; 2105 2106 /* Make sure that len does not exceed the size allocated in 2107 * add_recvbuf_big. 2108 */ 2109 if (unlikely(len > (vi->big_packets_num_skbfrags + 1) * PAGE_SIZE)) { 2110 pr_debug("%s: rx error: len %u exceeds allocated size %lu\n", 2111 dev->name, len, 2112 (vi->big_packets_num_skbfrags + 1) * PAGE_SIZE); 2113 goto err; 2114 } 2115 2116 skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); 2117 u64_stats_add(&stats->bytes, len - vi->hdr_len); 2118 if (unlikely(!skb)) 2119 goto err; 2120 2121 return skb; 2122 2123 err: 2124 u64_stats_inc(&stats->drops); 2125 give_pages(rq, page); 2126 return NULL; 2127 } 2128 2129 static void mergeable_buf_free(struct receive_queue *rq, int num_buf, 2130 struct net_device *dev, 2131 struct virtnet_rq_stats *stats) 2132 { 2133 struct page *page; 2134 void *buf; 2135 int len; 2136 2137 while (num_buf-- > 1) { 2138 buf = virtnet_rq_get_buf(rq, &len, NULL); 2139 if (unlikely(!buf)) { 2140 pr_debug("%s: rx error: %d buffers missing\n", 2141 dev->name, num_buf); 2142 DEV_STATS_INC(dev, rx_length_errors); 2143 break; 2144 } 2145 u64_stats_add(&stats->bytes, len); 2146 page = virt_to_head_page(buf); 2147 put_page(page); 2148 } 2149 } 2150 2151 /* Why not use xdp_build_skb_from_frame() ? 2152 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in 2153 * virtio-net there are 2 points that do not match its requirements: 2154 * 1. The size of the prefilled buffer is not fixed before xdp is set. 2155 * 2. xdp_build_skb_from_frame() does more checks that we don't need, 2156 * like eth_type_trans() (which virtio-net does in receive_buf()). 2157 */ 2158 static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, 2159 struct virtnet_info *vi, 2160 struct xdp_buff *xdp, 2161 unsigned int xdp_frags_truesz) 2162 { 2163 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 2164 unsigned int headroom, data_len; 2165 struct sk_buff *skb; 2166 int metasize; 2167 u8 nr_frags; 2168 2169 if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { 2170 pr_debug("Error building skb as missing reserved tailroom for xdp"); 2171 return NULL; 2172 } 2173 2174 if (unlikely(xdp_buff_has_frags(xdp))) 2175 nr_frags = sinfo->nr_frags; 2176 2177 skb = build_skb(xdp->data_hard_start, xdp->frame_sz); 2178 if (unlikely(!skb)) 2179 return NULL; 2180 2181 headroom = xdp->data - xdp->data_hard_start; 2182 data_len = xdp->data_end - xdp->data; 2183 skb_reserve(skb, headroom); 2184 __skb_put(skb, data_len); 2185 2186 metasize = xdp->data - xdp->data_meta; 2187 metasize = metasize > 0 ? metasize : 0; 2188 if (metasize) 2189 skb_metadata_set(skb, metasize); 2190 2191 if (unlikely(xdp_buff_has_frags(xdp))) 2192 xdp_update_skb_frags_info(skb, nr_frags, sinfo->xdp_frags_size, 2193 xdp_frags_truesz, 2194 xdp_buff_get_skb_flags(xdp)); 2195 2196 return skb; 2197 } 2198 2199 /* TODO: build xdp in big mode */ 2200 static int virtnet_build_xdp_buff_mrg(struct net_device *dev, 2201 struct virtnet_info *vi, 2202 struct receive_queue *rq, 2203 struct xdp_buff *xdp, 2204 void *buf, 2205 unsigned int len, 2206 unsigned int frame_sz, 2207 int *num_buf, 2208 unsigned int *xdp_frags_truesize, 2209 struct virtnet_rq_stats *stats) 2210 { 2211 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2212 struct skb_shared_info *shinfo; 2213 unsigned int xdp_frags_truesz = 0; 2214 unsigned int truesize; 2215 struct page *page; 2216 skb_frag_t *frag; 2217 int offset; 2218 void *ctx; 2219 2220 xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 2221 xdp_prepare_buff(xdp, buf - XDP_PACKET_HEADROOM, 2222 XDP_PACKET_HEADROOM + vi->hdr_len, len - vi->hdr_len, true); 2223 2224 if (!*num_buf) 2225 return 0; 2226 2227 if (*num_buf > 1) { 2228 /* If we want to build multi-buffer xdp, we need 2229 * to specify that the flags of xdp_buff have the 2230 * XDP_FLAGS_HAS_FRAG bit. 2231 */ 2232 if (!xdp_buff_has_frags(xdp)) 2233 xdp_buff_set_frags_flag(xdp); 2234 2235 shinfo = xdp_get_shared_info_from_buff(xdp); 2236 shinfo->nr_frags = 0; 2237 shinfo->xdp_frags_size = 0; 2238 } 2239 2240 if (*num_buf > MAX_SKB_FRAGS + 1) 2241 return -EINVAL; 2242 2243 while (--*num_buf > 0) { 2244 buf = virtnet_rq_get_buf(rq, &len, &ctx); 2245 if (unlikely(!buf)) { 2246 pr_debug("%s: rx error: %d buffers out of %d missing\n", 2247 dev->name, *num_buf, 2248 virtio16_to_cpu(vi->vdev, hdr->num_buffers)); 2249 DEV_STATS_INC(dev, rx_length_errors); 2250 goto err; 2251 } 2252 2253 u64_stats_add(&stats->bytes, len); 2254 page = virt_to_head_page(buf); 2255 offset = buf - page_address(page); 2256 2257 if (check_mergeable_len(dev, ctx, len)) { 2258 put_page(page); 2259 goto err; 2260 } 2261 2262 truesize = mergeable_ctx_to_truesize(ctx); 2263 xdp_frags_truesz += truesize; 2264 2265 frag = &shinfo->frags[shinfo->nr_frags++]; 2266 skb_frag_fill_page_desc(frag, page, offset, len); 2267 if (page_is_pfmemalloc(page)) 2268 xdp_buff_set_frag_pfmemalloc(xdp); 2269 2270 shinfo->xdp_frags_size += len; 2271 } 2272 2273 *xdp_frags_truesize = xdp_frags_truesz; 2274 return 0; 2275 2276 err: 2277 put_xdp_frags(xdp); 2278 return -EINVAL; 2279 } 2280 2281 static void *mergeable_xdp_get_buf(struct virtnet_info *vi, 2282 struct receive_queue *rq, 2283 struct bpf_prog *xdp_prog, 2284 void *ctx, 2285 unsigned int *frame_sz, 2286 int *num_buf, 2287 struct page **page, 2288 int offset, 2289 unsigned int *len, 2290 struct virtio_net_hdr_mrg_rxbuf *hdr) 2291 { 2292 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2293 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2294 struct page *xdp_page; 2295 unsigned int xdp_room; 2296 2297 /* Transient failure which in theory could occur if 2298 * in-flight packets from before XDP was enabled reach 2299 * the receive path after XDP is loaded. 2300 */ 2301 if (unlikely(hdr->hdr.gso_type)) 2302 return NULL; 2303 2304 /* Partially checksummed packets must be dropped. */ 2305 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 2306 return NULL; 2307 2308 /* Now XDP core assumes frag size is PAGE_SIZE, but buffers 2309 * with headroom may add hole in truesize, which 2310 * make their length exceed PAGE_SIZE. So we disabled the 2311 * hole mechanism for xdp. See add_recvbuf_mergeable(). 2312 */ 2313 *frame_sz = truesize; 2314 2315 if (likely(headroom >= virtnet_get_headroom(vi) && 2316 (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) { 2317 return page_address(*page) + offset; 2318 } 2319 2320 /* This happens when headroom is not enough because 2321 * of the buffer was prefilled before XDP is set. 2322 * This should only happen for the first several packets. 2323 * In fact, vq reset can be used here to help us clean up 2324 * the prefilled buffers, but many existing devices do not 2325 * support it, and we don't want to bother users who are 2326 * using xdp normally. 2327 */ 2328 if (!xdp_prog->aux->xdp_has_frags) { 2329 /* linearize data for XDP */ 2330 xdp_page = xdp_linearize_page(vi->dev, rq, num_buf, 2331 *page, offset, 2332 XDP_PACKET_HEADROOM, 2333 len); 2334 if (!xdp_page) 2335 return NULL; 2336 } else { 2337 xdp_room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 2338 sizeof(struct skb_shared_info)); 2339 if (*len + xdp_room > PAGE_SIZE) 2340 return NULL; 2341 2342 xdp_page = alloc_page(GFP_ATOMIC); 2343 if (!xdp_page) 2344 return NULL; 2345 2346 memcpy(page_address(xdp_page) + XDP_PACKET_HEADROOM, 2347 page_address(*page) + offset, *len); 2348 } 2349 2350 *frame_sz = PAGE_SIZE; 2351 2352 put_page(*page); 2353 2354 *page = xdp_page; 2355 2356 return page_address(*page) + XDP_PACKET_HEADROOM; 2357 } 2358 2359 static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, 2360 struct virtnet_info *vi, 2361 struct receive_queue *rq, 2362 struct bpf_prog *xdp_prog, 2363 void *buf, 2364 void *ctx, 2365 unsigned int len, 2366 unsigned int *xdp_xmit, 2367 struct virtnet_rq_stats *stats) 2368 { 2369 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2370 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2371 struct page *page = virt_to_head_page(buf); 2372 int offset = buf - page_address(page); 2373 unsigned int xdp_frags_truesz = 0; 2374 struct sk_buff *head_skb; 2375 unsigned int frame_sz; 2376 struct xdp_buff xdp; 2377 void *data; 2378 u32 act; 2379 int err; 2380 2381 data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, &frame_sz, &num_buf, &page, 2382 offset, &len, hdr); 2383 if (unlikely(!data)) 2384 goto err_xdp; 2385 2386 err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz, 2387 &num_buf, &xdp_frags_truesz, stats); 2388 if (unlikely(err)) 2389 goto err_xdp; 2390 2391 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 2392 2393 switch (act) { 2394 case XDP_PASS: 2395 head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); 2396 if (unlikely(!head_skb)) 2397 break; 2398 return head_skb; 2399 2400 case XDP_TX: 2401 case XDP_REDIRECT: 2402 return NULL; 2403 2404 default: 2405 break; 2406 } 2407 2408 put_xdp_frags(&xdp); 2409 2410 err_xdp: 2411 put_page(page); 2412 mergeable_buf_free(rq, num_buf, dev, stats); 2413 2414 u64_stats_inc(&stats->xdp_drops); 2415 u64_stats_inc(&stats->drops); 2416 return NULL; 2417 } 2418 2419 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 2420 struct sk_buff *curr_skb, 2421 struct page *page, void *buf, 2422 int len, int truesize) 2423 { 2424 int num_skb_frags; 2425 int offset; 2426 2427 num_skb_frags = skb_shinfo(curr_skb)->nr_frags; 2428 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { 2429 struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); 2430 2431 if (unlikely(!nskb)) 2432 return NULL; 2433 2434 if (curr_skb == head_skb) 2435 skb_shinfo(curr_skb)->frag_list = nskb; 2436 else 2437 curr_skb->next = nskb; 2438 curr_skb = nskb; 2439 head_skb->truesize += nskb->truesize; 2440 num_skb_frags = 0; 2441 } 2442 2443 if (curr_skb != head_skb) { 2444 head_skb->data_len += len; 2445 head_skb->len += len; 2446 head_skb->truesize += truesize; 2447 } 2448 2449 offset = buf - page_address(page); 2450 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { 2451 put_page(page); 2452 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, 2453 len, truesize); 2454 } else { 2455 skb_add_rx_frag(curr_skb, num_skb_frags, page, 2456 offset, len, truesize); 2457 } 2458 2459 return curr_skb; 2460 } 2461 2462 static struct sk_buff *receive_mergeable(struct net_device *dev, 2463 struct virtnet_info *vi, 2464 struct receive_queue *rq, 2465 void *buf, 2466 void *ctx, 2467 unsigned int len, 2468 unsigned int *xdp_xmit, 2469 struct virtnet_rq_stats *stats) 2470 { 2471 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2472 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2473 struct page *page = virt_to_head_page(buf); 2474 int offset = buf - page_address(page); 2475 struct sk_buff *head_skb, *curr_skb; 2476 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2477 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2478 2479 head_skb = NULL; 2480 u64_stats_add(&stats->bytes, len - vi->hdr_len); 2481 2482 if (check_mergeable_len(dev, ctx, len)) 2483 goto err_skb; 2484 2485 if (unlikely(vi->xdp_enabled)) { 2486 struct bpf_prog *xdp_prog; 2487 2488 rcu_read_lock(); 2489 xdp_prog = rcu_dereference(rq->xdp_prog); 2490 if (xdp_prog) { 2491 head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx, 2492 len, xdp_xmit, stats); 2493 rcu_read_unlock(); 2494 return head_skb; 2495 } 2496 rcu_read_unlock(); 2497 } 2498 2499 head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom); 2500 curr_skb = head_skb; 2501 2502 if (unlikely(!curr_skb)) 2503 goto err_skb; 2504 while (--num_buf) { 2505 buf = virtnet_rq_get_buf(rq, &len, &ctx); 2506 if (unlikely(!buf)) { 2507 pr_debug("%s: rx error: %d buffers out of %d missing\n", 2508 dev->name, num_buf, 2509 virtio16_to_cpu(vi->vdev, 2510 hdr->num_buffers)); 2511 DEV_STATS_INC(dev, rx_length_errors); 2512 goto err_buf; 2513 } 2514 2515 u64_stats_add(&stats->bytes, len); 2516 page = virt_to_head_page(buf); 2517 2518 if (check_mergeable_len(dev, ctx, len)) 2519 goto err_skb; 2520 2521 truesize = mergeable_ctx_to_truesize(ctx); 2522 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 2523 buf, len, truesize); 2524 if (!curr_skb) 2525 goto err_skb; 2526 } 2527 2528 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); 2529 return head_skb; 2530 2531 err_skb: 2532 put_page(page); 2533 mergeable_buf_free(rq, num_buf, dev, stats); 2534 2535 err_buf: 2536 u64_stats_inc(&stats->drops); 2537 dev_kfree_skb(head_skb); 2538 return NULL; 2539 } 2540 2541 static inline u32 2542 virtio_net_hash_value(const struct virtio_net_hdr_v1_hash *hdr_hash) 2543 { 2544 return __le16_to_cpu(hdr_hash->hash_value_lo) | 2545 (__le16_to_cpu(hdr_hash->hash_value_hi) << 16); 2546 } 2547 2548 static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, 2549 struct sk_buff *skb) 2550 { 2551 enum pkt_hash_types rss_hash_type; 2552 2553 if (!hdr_hash || !skb) 2554 return; 2555 2556 switch (__le16_to_cpu(hdr_hash->hash_report)) { 2557 case VIRTIO_NET_HASH_REPORT_TCPv4: 2558 case VIRTIO_NET_HASH_REPORT_UDPv4: 2559 case VIRTIO_NET_HASH_REPORT_TCPv6: 2560 case VIRTIO_NET_HASH_REPORT_UDPv6: 2561 case VIRTIO_NET_HASH_REPORT_TCPv6_EX: 2562 case VIRTIO_NET_HASH_REPORT_UDPv6_EX: 2563 rss_hash_type = PKT_HASH_TYPE_L4; 2564 break; 2565 case VIRTIO_NET_HASH_REPORT_IPv4: 2566 case VIRTIO_NET_HASH_REPORT_IPv6: 2567 case VIRTIO_NET_HASH_REPORT_IPv6_EX: 2568 rss_hash_type = PKT_HASH_TYPE_L3; 2569 break; 2570 case VIRTIO_NET_HASH_REPORT_NONE: 2571 default: 2572 rss_hash_type = PKT_HASH_TYPE_NONE; 2573 } 2574 skb_set_hash(skb, virtio_net_hash_value(hdr_hash), rss_hash_type); 2575 } 2576 2577 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 2578 struct sk_buff *skb, u8 flags) 2579 { 2580 struct virtio_net_common_hdr *hdr; 2581 struct net_device *dev = vi->dev; 2582 2583 hdr = skb_vnet_common_hdr(skb); 2584 if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) 2585 virtio_skb_set_hash(&hdr->hash_v1_hdr, skb); 2586 2587 hdr->hdr.flags = flags; 2588 if (virtio_net_handle_csum_offload(skb, &hdr->hdr, vi->rx_tnl_csum)) { 2589 net_warn_ratelimited("%s: bad csum: flags: %x, gso_type: %x rx_tnl_csum %d\n", 2590 dev->name, hdr->hdr.flags, 2591 hdr->hdr.gso_type, vi->rx_tnl_csum); 2592 goto frame_err; 2593 } 2594 2595 if (virtio_net_hdr_tnl_to_skb(skb, &hdr->tnl_hdr, vi->rx_tnl, 2596 vi->rx_tnl_csum, 2597 virtio_is_little_endian(vi->vdev))) { 2598 net_warn_ratelimited("%s: bad gso: type: %x, size: %u, flags %x tunnel %d tnl csum %d\n", 2599 dev->name, hdr->hdr.gso_type, 2600 hdr->hdr.gso_size, hdr->hdr.flags, 2601 vi->rx_tnl, vi->rx_tnl_csum); 2602 goto frame_err; 2603 } 2604 2605 skb_record_rx_queue(skb, vq2rxq(rq->vq)); 2606 skb->protocol = eth_type_trans(skb, dev); 2607 pr_debug("Receiving skb proto 0x%04x len %i type %i\n", 2608 ntohs(skb->protocol), skb->len, skb->pkt_type); 2609 2610 napi_gro_receive(&rq->napi, skb); 2611 return; 2612 2613 frame_err: 2614 DEV_STATS_INC(dev, rx_frame_errors); 2615 dev_kfree_skb(skb); 2616 } 2617 2618 static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, 2619 void *buf, unsigned int len, void **ctx, 2620 unsigned int *xdp_xmit, 2621 struct virtnet_rq_stats *stats) 2622 { 2623 struct net_device *dev = vi->dev; 2624 struct sk_buff *skb; 2625 u8 flags; 2626 2627 if (unlikely(len < vi->hdr_len + ETH_HLEN)) { 2628 pr_debug("%s: short packet %i\n", dev->name, len); 2629 DEV_STATS_INC(dev, rx_length_errors); 2630 virtnet_rq_free_buf(vi, rq, buf); 2631 return; 2632 } 2633 2634 /* About the flags below: 2635 * 1. Save the flags early, as the XDP program might overwrite them. 2636 * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID 2637 * stay valid after XDP processing. 2638 * 2. XDP doesn't work with partially checksummed packets (refer to 2639 * virtnet_xdp_set()), so packets marked as 2640 * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing. 2641 */ 2642 2643 if (vi->mergeable_rx_bufs) { 2644 flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; 2645 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, 2646 stats); 2647 } else if (vi->big_packets) { 2648 void *p = page_address((struct page *)buf); 2649 2650 flags = ((struct virtio_net_common_hdr *)p)->hdr.flags; 2651 skb = receive_big(dev, vi, rq, buf, len, stats); 2652 } else { 2653 flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; 2654 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); 2655 } 2656 2657 if (unlikely(!skb)) 2658 return; 2659 2660 virtnet_receive_done(vi, rq, skb, flags); 2661 } 2662 2663 /* Unlike mergeable buffers, all buffers are allocated to the 2664 * same size, except for the headroom. For this reason we do 2665 * not need to use mergeable_len_to_ctx here - it is enough 2666 * to store the headroom as the context ignoring the truesize. 2667 */ 2668 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, 2669 gfp_t gfp) 2670 { 2671 char *buf; 2672 unsigned int xdp_headroom = virtnet_get_headroom(vi); 2673 void *ctx = (void *)(unsigned long)xdp_headroom; 2674 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; 2675 int err; 2676 2677 len = SKB_DATA_ALIGN(len) + 2678 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 2679 2680 if (unlikely(!skb_page_frag_refill(len, &rq->alloc_frag, gfp))) 2681 return -ENOMEM; 2682 2683 buf = virtnet_rq_alloc(rq, len, gfp); 2684 if (unlikely(!buf)) 2685 return -ENOMEM; 2686 2687 buf += VIRTNET_RX_PAD + xdp_headroom; 2688 2689 virtnet_rq_init_one_sg(rq, buf, vi->hdr_len + GOOD_PACKET_LEN); 2690 2691 err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, buf, ctx, gfp); 2692 if (err < 0) { 2693 virtnet_rq_unmap(rq, buf, 0); 2694 put_page(virt_to_head_page(buf)); 2695 } 2696 2697 return err; 2698 } 2699 2700 static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, 2701 gfp_t gfp) 2702 { 2703 struct page *first, *list = NULL; 2704 char *p; 2705 int i, err, offset; 2706 2707 sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2); 2708 2709 /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */ 2710 for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) { 2711 first = get_a_page(rq, gfp); 2712 if (!first) { 2713 if (list) 2714 give_pages(rq, list); 2715 return -ENOMEM; 2716 } 2717 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE); 2718 2719 /* chain new page in list head to match sg */ 2720 first->private = (unsigned long)list; 2721 list = first; 2722 } 2723 2724 first = get_a_page(rq, gfp); 2725 if (!first) { 2726 give_pages(rq, list); 2727 return -ENOMEM; 2728 } 2729 p = page_address(first); 2730 2731 /* rq->sg[0], rq->sg[1] share the same page */ 2732 /* a separated rq->sg[0] for header - required in case !any_header_sg */ 2733 sg_set_buf(&rq->sg[0], p, vi->hdr_len); 2734 2735 /* rq->sg[1] for data packet, from offset */ 2736 offset = sizeof(struct padded_vnet_hdr); 2737 sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset); 2738 2739 /* chain first in list head */ 2740 first->private = (unsigned long)list; 2741 err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2, 2742 first, gfp); 2743 if (err < 0) 2744 give_pages(rq, first); 2745 2746 return err; 2747 } 2748 2749 static unsigned int get_mergeable_buf_len(struct receive_queue *rq, 2750 struct ewma_pkt_len *avg_pkt_len, 2751 unsigned int room) 2752 { 2753 struct virtnet_info *vi = rq->vq->vdev->priv; 2754 const size_t hdr_len = vi->hdr_len; 2755 unsigned int len; 2756 2757 if (room) 2758 return PAGE_SIZE - room; 2759 2760 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), 2761 rq->min_buf_len, PAGE_SIZE - hdr_len); 2762 2763 return ALIGN(len, L1_CACHE_BYTES); 2764 } 2765 2766 static int add_recvbuf_mergeable(struct virtnet_info *vi, 2767 struct receive_queue *rq, gfp_t gfp) 2768 { 2769 struct page_frag *alloc_frag = &rq->alloc_frag; 2770 unsigned int headroom = virtnet_get_headroom(vi); 2771 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2772 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 2773 unsigned int len, hole; 2774 void *ctx; 2775 char *buf; 2776 int err; 2777 2778 /* Extra tailroom is needed to satisfy XDP's assumption. This 2779 * means rx frags coalescing won't work, but consider we've 2780 * disabled GSO for XDP, it won't be a big issue. 2781 */ 2782 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); 2783 2784 if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) 2785 return -ENOMEM; 2786 2787 if (!alloc_frag->offset && len + room + sizeof(struct virtnet_rq_dma) > alloc_frag->size) 2788 len -= sizeof(struct virtnet_rq_dma); 2789 2790 buf = virtnet_rq_alloc(rq, len + room, gfp); 2791 if (unlikely(!buf)) 2792 return -ENOMEM; 2793 2794 buf += headroom; /* advance address leaving hole at front of pkt */ 2795 hole = alloc_frag->size - alloc_frag->offset; 2796 if (hole < len + room) { 2797 /* To avoid internal fragmentation, if there is very likely not 2798 * enough space for another buffer, add the remaining space to 2799 * the current buffer. 2800 * XDP core assumes that frame_size of xdp_buff and the length 2801 * of the frag are PAGE_SIZE, so we disable the hole mechanism. 2802 */ 2803 if (!headroom) 2804 len += hole; 2805 alloc_frag->offset += hole; 2806 } 2807 2808 virtnet_rq_init_one_sg(rq, buf, len); 2809 2810 ctx = mergeable_len_to_ctx(len + room, headroom); 2811 err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, buf, ctx, gfp); 2812 if (err < 0) { 2813 virtnet_rq_unmap(rq, buf, 0); 2814 put_page(virt_to_head_page(buf)); 2815 } 2816 2817 return err; 2818 } 2819 2820 /* 2821 * Returns false if we couldn't fill entirely (OOM). 2822 * 2823 * Normally run in the receive path, but can also be run from ndo_open 2824 * before we're receiving packets, or from refill_work which is 2825 * careful to disable receiving (using napi_disable). 2826 */ 2827 static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, 2828 gfp_t gfp) 2829 { 2830 int err; 2831 2832 if (rq->xsk_pool) { 2833 err = virtnet_add_recvbuf_xsk(vi, rq, rq->xsk_pool, gfp); 2834 goto kick; 2835 } 2836 2837 do { 2838 if (vi->mergeable_rx_bufs) 2839 err = add_recvbuf_mergeable(vi, rq, gfp); 2840 else if (vi->big_packets) 2841 err = add_recvbuf_big(vi, rq, gfp); 2842 else 2843 err = add_recvbuf_small(vi, rq, gfp); 2844 2845 if (err) 2846 break; 2847 } while (rq->vq->num_free); 2848 2849 kick: 2850 if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { 2851 unsigned long flags; 2852 2853 flags = u64_stats_update_begin_irqsave(&rq->stats.syncp); 2854 u64_stats_inc(&rq->stats.kicks); 2855 u64_stats_update_end_irqrestore(&rq->stats.syncp, flags); 2856 } 2857 2858 return err != -ENOMEM; 2859 } 2860 2861 static void skb_recv_done(struct virtqueue *rvq) 2862 { 2863 struct virtnet_info *vi = rvq->vdev->priv; 2864 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)]; 2865 2866 rq->calls++; 2867 virtqueue_napi_schedule(&rq->napi, rvq); 2868 } 2869 2870 static void virtnet_napi_do_enable(struct virtqueue *vq, 2871 struct napi_struct *napi) 2872 { 2873 napi_enable(napi); 2874 2875 /* If all buffers were filled by other side before we napi_enabled, we 2876 * won't get another interrupt, so process any outstanding packets now. 2877 * Call local_bh_enable after to trigger softIRQ processing. 2878 */ 2879 local_bh_disable(); 2880 virtqueue_napi_schedule(napi, vq); 2881 local_bh_enable(); 2882 } 2883 2884 static void virtnet_napi_enable(struct receive_queue *rq) 2885 { 2886 struct virtnet_info *vi = rq->vq->vdev->priv; 2887 int qidx = vq2rxq(rq->vq); 2888 2889 virtnet_napi_do_enable(rq->vq, &rq->napi); 2890 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, &rq->napi); 2891 } 2892 2893 static void virtnet_napi_tx_enable(struct send_queue *sq) 2894 { 2895 struct virtnet_info *vi = sq->vq->vdev->priv; 2896 struct napi_struct *napi = &sq->napi; 2897 int qidx = vq2txq(sq->vq); 2898 2899 if (!napi->weight) 2900 return; 2901 2902 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only 2903 * enable the feature if this is likely affine with the transmit path. 2904 */ 2905 if (!vi->affinity_hint_set) { 2906 napi->weight = 0; 2907 return; 2908 } 2909 2910 virtnet_napi_do_enable(sq->vq, napi); 2911 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, napi); 2912 } 2913 2914 static void virtnet_napi_tx_disable(struct send_queue *sq) 2915 { 2916 struct virtnet_info *vi = sq->vq->vdev->priv; 2917 struct napi_struct *napi = &sq->napi; 2918 int qidx = vq2txq(sq->vq); 2919 2920 if (napi->weight) { 2921 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, NULL); 2922 napi_disable(napi); 2923 } 2924 } 2925 2926 static void virtnet_napi_disable(struct receive_queue *rq) 2927 { 2928 struct virtnet_info *vi = rq->vq->vdev->priv; 2929 struct napi_struct *napi = &rq->napi; 2930 int qidx = vq2rxq(rq->vq); 2931 2932 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, NULL); 2933 napi_disable(napi); 2934 } 2935 2936 static void refill_work(struct work_struct *work) 2937 { 2938 struct virtnet_info *vi = 2939 container_of(work, struct virtnet_info, refill.work); 2940 bool still_empty; 2941 int i; 2942 2943 for (i = 0; i < vi->curr_queue_pairs; i++) { 2944 struct receive_queue *rq = &vi->rq[i]; 2945 2946 /* 2947 * When queue API support is added in the future and the call 2948 * below becomes napi_disable_locked, this driver will need to 2949 * be refactored. 2950 * 2951 * One possible solution would be to: 2952 * - cancel refill_work with cancel_delayed_work (note: 2953 * non-sync) 2954 * - cancel refill_work with cancel_delayed_work_sync in 2955 * virtnet_remove after the netdev is unregistered 2956 * - wrap all of the work in a lock (perhaps the netdev 2957 * instance lock) 2958 * - check netif_running() and return early to avoid a race 2959 */ 2960 napi_disable(&rq->napi); 2961 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); 2962 virtnet_napi_do_enable(rq->vq, &rq->napi); 2963 2964 /* In theory, this can happen: if we don't get any buffers in 2965 * we will *never* try to fill again. 2966 */ 2967 if (still_empty) 2968 schedule_delayed_work(&vi->refill, HZ/2); 2969 } 2970 } 2971 2972 static int virtnet_receive_xsk_bufs(struct virtnet_info *vi, 2973 struct receive_queue *rq, 2974 int budget, 2975 unsigned int *xdp_xmit, 2976 struct virtnet_rq_stats *stats) 2977 { 2978 unsigned int len; 2979 int packets = 0; 2980 void *buf; 2981 2982 while (packets < budget) { 2983 buf = virtqueue_get_buf(rq->vq, &len); 2984 if (!buf) 2985 break; 2986 2987 virtnet_receive_xsk_buf(vi, rq, buf, len, xdp_xmit, stats); 2988 packets++; 2989 } 2990 2991 return packets; 2992 } 2993 2994 static int virtnet_receive_packets(struct virtnet_info *vi, 2995 struct receive_queue *rq, 2996 int budget, 2997 unsigned int *xdp_xmit, 2998 struct virtnet_rq_stats *stats) 2999 { 3000 unsigned int len; 3001 int packets = 0; 3002 void *buf; 3003 3004 if (!vi->big_packets || vi->mergeable_rx_bufs) { 3005 void *ctx; 3006 while (packets < budget && 3007 (buf = virtnet_rq_get_buf(rq, &len, &ctx))) { 3008 receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats); 3009 packets++; 3010 } 3011 } else { 3012 while (packets < budget && 3013 (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { 3014 receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats); 3015 packets++; 3016 } 3017 } 3018 3019 return packets; 3020 } 3021 3022 static int virtnet_receive(struct receive_queue *rq, int budget, 3023 unsigned int *xdp_xmit) 3024 { 3025 struct virtnet_info *vi = rq->vq->vdev->priv; 3026 struct virtnet_rq_stats stats = {}; 3027 int i, packets; 3028 3029 if (rq->xsk_pool) 3030 packets = virtnet_receive_xsk_bufs(vi, rq, budget, xdp_xmit, &stats); 3031 else 3032 packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats); 3033 3034 if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { 3035 if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { 3036 spin_lock(&vi->refill_lock); 3037 if (vi->refill_enabled) 3038 schedule_delayed_work(&vi->refill, 0); 3039 spin_unlock(&vi->refill_lock); 3040 } 3041 } 3042 3043 u64_stats_set(&stats.packets, packets); 3044 u64_stats_update_begin(&rq->stats.syncp); 3045 for (i = 0; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) { 3046 size_t offset = virtnet_rq_stats_desc[i].offset; 3047 u64_stats_t *item, *src; 3048 3049 item = (u64_stats_t *)((u8 *)&rq->stats + offset); 3050 src = (u64_stats_t *)((u8 *)&stats + offset); 3051 u64_stats_add(item, u64_stats_read(src)); 3052 } 3053 3054 u64_stats_add(&rq->stats.packets, u64_stats_read(&stats.packets)); 3055 u64_stats_add(&rq->stats.bytes, u64_stats_read(&stats.bytes)); 3056 3057 u64_stats_update_end(&rq->stats.syncp); 3058 3059 return packets; 3060 } 3061 3062 static void virtnet_poll_cleantx(struct receive_queue *rq, int budget) 3063 { 3064 struct virtnet_info *vi = rq->vq->vdev->priv; 3065 unsigned int index = vq2rxq(rq->vq); 3066 struct send_queue *sq = &vi->sq[index]; 3067 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); 3068 3069 if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) 3070 return; 3071 3072 if (__netif_tx_trylock(txq)) { 3073 if (sq->reset) { 3074 __netif_tx_unlock(txq); 3075 return; 3076 } 3077 3078 do { 3079 virtqueue_disable_cb(sq->vq); 3080 free_old_xmit(sq, txq, !!budget); 3081 } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 3082 3083 if (sq->vq->num_free >= MAX_SKB_FRAGS + 2 && 3084 netif_tx_queue_stopped(txq)) { 3085 u64_stats_update_begin(&sq->stats.syncp); 3086 u64_stats_inc(&sq->stats.wake); 3087 u64_stats_update_end(&sq->stats.syncp); 3088 netif_tx_wake_queue(txq); 3089 } 3090 3091 __netif_tx_unlock(txq); 3092 } 3093 } 3094 3095 static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue *rq) 3096 { 3097 struct dim_sample cur_sample = {}; 3098 3099 if (!rq->packets_in_napi) 3100 return; 3101 3102 /* Don't need protection when fetching stats, since fetcher and 3103 * updater of the stats are in same context 3104 */ 3105 dim_update_sample(rq->calls, 3106 u64_stats_read(&rq->stats.packets), 3107 u64_stats_read(&rq->stats.bytes), 3108 &cur_sample); 3109 3110 net_dim(&rq->dim, &cur_sample); 3111 rq->packets_in_napi = 0; 3112 } 3113 3114 static int virtnet_poll(struct napi_struct *napi, int budget) 3115 { 3116 struct receive_queue *rq = 3117 container_of(napi, struct receive_queue, napi); 3118 struct virtnet_info *vi = rq->vq->vdev->priv; 3119 struct send_queue *sq; 3120 unsigned int received; 3121 unsigned int xdp_xmit = 0; 3122 bool napi_complete; 3123 3124 virtnet_poll_cleantx(rq, budget); 3125 3126 received = virtnet_receive(rq, budget, &xdp_xmit); 3127 rq->packets_in_napi += received; 3128 3129 if (xdp_xmit & VIRTIO_XDP_REDIR) 3130 xdp_do_flush(); 3131 3132 /* Out of packets? */ 3133 if (received < budget) { 3134 napi_complete = virtqueue_napi_complete(napi, rq->vq, received); 3135 /* Intentionally not taking dim_lock here. This may result in a 3136 * spurious net_dim call. But if that happens virtnet_rx_dim_work 3137 * will not act on the scheduled work. 3138 */ 3139 if (napi_complete && rq->dim_enabled) 3140 virtnet_rx_dim_update(vi, rq); 3141 } 3142 3143 if (xdp_xmit & VIRTIO_XDP_TX) { 3144 sq = virtnet_xdp_get_sq(vi); 3145 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 3146 u64_stats_update_begin(&sq->stats.syncp); 3147 u64_stats_inc(&sq->stats.kicks); 3148 u64_stats_update_end(&sq->stats.syncp); 3149 } 3150 virtnet_xdp_put_sq(vi, sq); 3151 } 3152 3153 return received; 3154 } 3155 3156 static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index) 3157 { 3158 virtnet_napi_tx_disable(&vi->sq[qp_index]); 3159 virtnet_napi_disable(&vi->rq[qp_index]); 3160 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 3161 } 3162 3163 static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) 3164 { 3165 struct net_device *dev = vi->dev; 3166 int err; 3167 3168 err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index, 3169 vi->rq[qp_index].napi.napi_id); 3170 if (err < 0) 3171 return err; 3172 3173 err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq, 3174 MEM_TYPE_PAGE_SHARED, NULL); 3175 if (err < 0) 3176 goto err_xdp_reg_mem_model; 3177 3178 virtnet_napi_enable(&vi->rq[qp_index]); 3179 virtnet_napi_tx_enable(&vi->sq[qp_index]); 3180 3181 return 0; 3182 3183 err_xdp_reg_mem_model: 3184 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 3185 return err; 3186 } 3187 3188 static void virtnet_cancel_dim(struct virtnet_info *vi, struct dim *dim) 3189 { 3190 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 3191 return; 3192 net_dim_work_cancel(dim); 3193 } 3194 3195 static void virtnet_update_settings(struct virtnet_info *vi) 3196 { 3197 u32 speed; 3198 u8 duplex; 3199 3200 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) 3201 return; 3202 3203 virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); 3204 3205 if (ethtool_validate_speed(speed)) 3206 vi->speed = speed; 3207 3208 virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); 3209 3210 if (ethtool_validate_duplex(duplex)) 3211 vi->duplex = duplex; 3212 } 3213 3214 static int virtnet_open(struct net_device *dev) 3215 { 3216 struct virtnet_info *vi = netdev_priv(dev); 3217 int i, err; 3218 3219 enable_delayed_refill(vi); 3220 3221 for (i = 0; i < vi->max_queue_pairs; i++) { 3222 if (i < vi->curr_queue_pairs) 3223 /* Make sure we have some buffers: if oom use wq. */ 3224 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) 3225 schedule_delayed_work(&vi->refill, 0); 3226 3227 err = virtnet_enable_queue_pair(vi, i); 3228 if (err < 0) 3229 goto err_enable_qp; 3230 } 3231 3232 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 3233 if (vi->status & VIRTIO_NET_S_LINK_UP) 3234 netif_carrier_on(vi->dev); 3235 virtio_config_driver_enable(vi->vdev); 3236 } else { 3237 vi->status = VIRTIO_NET_S_LINK_UP; 3238 netif_carrier_on(dev); 3239 } 3240 3241 return 0; 3242 3243 err_enable_qp: 3244 disable_delayed_refill(vi); 3245 cancel_delayed_work_sync(&vi->refill); 3246 3247 for (i--; i >= 0; i--) { 3248 virtnet_disable_queue_pair(vi, i); 3249 virtnet_cancel_dim(vi, &vi->rq[i].dim); 3250 } 3251 3252 return err; 3253 } 3254 3255 static int virtnet_poll_tx(struct napi_struct *napi, int budget) 3256 { 3257 struct send_queue *sq = container_of(napi, struct send_queue, napi); 3258 struct virtnet_info *vi = sq->vq->vdev->priv; 3259 unsigned int index = vq2txq(sq->vq); 3260 struct netdev_queue *txq; 3261 int opaque, xsk_done = 0; 3262 bool done; 3263 3264 if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { 3265 /* We don't need to enable cb for XDP */ 3266 napi_complete_done(napi, 0); 3267 return 0; 3268 } 3269 3270 txq = netdev_get_tx_queue(vi->dev, index); 3271 __netif_tx_lock(txq, raw_smp_processor_id()); 3272 virtqueue_disable_cb(sq->vq); 3273 3274 if (sq->xsk_pool) 3275 xsk_done = virtnet_xsk_xmit(sq, sq->xsk_pool, budget); 3276 else 3277 free_old_xmit(sq, txq, !!budget); 3278 3279 if (sq->vq->num_free >= MAX_SKB_FRAGS + 2 && 3280 netif_tx_queue_stopped(txq)) { 3281 u64_stats_update_begin(&sq->stats.syncp); 3282 u64_stats_inc(&sq->stats.wake); 3283 u64_stats_update_end(&sq->stats.syncp); 3284 netif_tx_wake_queue(txq); 3285 } 3286 3287 if (xsk_done >= budget) { 3288 __netif_tx_unlock(txq); 3289 return budget; 3290 } 3291 3292 opaque = virtqueue_enable_cb_prepare(sq->vq); 3293 3294 done = napi_complete_done(napi, 0); 3295 3296 if (!done) 3297 virtqueue_disable_cb(sq->vq); 3298 3299 __netif_tx_unlock(txq); 3300 3301 if (done) { 3302 if (unlikely(virtqueue_poll(sq->vq, opaque))) { 3303 if (napi_schedule_prep(napi)) { 3304 __netif_tx_lock(txq, raw_smp_processor_id()); 3305 virtqueue_disable_cb(sq->vq); 3306 __netif_tx_unlock(txq); 3307 __napi_schedule(napi); 3308 } 3309 } 3310 } 3311 3312 return 0; 3313 } 3314 3315 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) 3316 { 3317 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; 3318 struct virtnet_info *vi = sq->vq->vdev->priv; 3319 struct virtio_net_hdr_v1_hash_tunnel *hdr; 3320 int num_sg; 3321 unsigned hdr_len = vi->hdr_len; 3322 bool can_push; 3323 3324 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); 3325 3326 /* Make sure it's safe to cast between formats */ 3327 BUILD_BUG_ON(__alignof__(*hdr) != __alignof__(hdr->hash_hdr)); 3328 BUILD_BUG_ON(__alignof__(*hdr) != __alignof__(hdr->hash_hdr.hdr)); 3329 3330 can_push = vi->any_header_sg && 3331 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && 3332 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; 3333 /* Even if we can, don't push here yet as this would skew 3334 * csum_start offset below. */ 3335 if (can_push) 3336 hdr = (struct virtio_net_hdr_v1_hash_tunnel *)(skb->data - 3337 hdr_len); 3338 else 3339 hdr = &skb_vnet_common_hdr(skb)->tnl_hdr; 3340 3341 if (virtio_net_hdr_tnl_from_skb(skb, hdr, vi->tx_tnl, 3342 virtio_is_little_endian(vi->vdev), 0, 3343 false)) 3344 return -EPROTO; 3345 3346 if (vi->mergeable_rx_bufs) 3347 hdr->hash_hdr.hdr.num_buffers = 0; 3348 3349 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); 3350 if (can_push) { 3351 __skb_push(skb, hdr_len); 3352 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len); 3353 if (unlikely(num_sg < 0)) 3354 return num_sg; 3355 /* Pull header back to avoid skew in tx bytes calculations. */ 3356 __skb_pull(skb, hdr_len); 3357 } else { 3358 sg_set_buf(sq->sg, hdr, hdr_len); 3359 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); 3360 if (unlikely(num_sg < 0)) 3361 return num_sg; 3362 num_sg++; 3363 } 3364 3365 return virtnet_add_outbuf(sq, num_sg, skb, 3366 orphan ? VIRTNET_XMIT_TYPE_SKB_ORPHAN : VIRTNET_XMIT_TYPE_SKB); 3367 } 3368 3369 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) 3370 { 3371 struct virtnet_info *vi = netdev_priv(dev); 3372 int qnum = skb_get_queue_mapping(skb); 3373 struct send_queue *sq = &vi->sq[qnum]; 3374 int err; 3375 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 3376 bool xmit_more = netdev_xmit_more(); 3377 bool use_napi = sq->napi.weight; 3378 bool kick; 3379 3380 if (!use_napi) 3381 free_old_xmit(sq, txq, false); 3382 else 3383 virtqueue_disable_cb(sq->vq); 3384 3385 /* timestamp packet in software */ 3386 skb_tx_timestamp(skb); 3387 3388 /* Try to transmit */ 3389 err = xmit_skb(sq, skb, !use_napi); 3390 3391 /* This should not happen! */ 3392 if (unlikely(err)) { 3393 DEV_STATS_INC(dev, tx_fifo_errors); 3394 if (net_ratelimit()) 3395 dev_warn(&dev->dev, 3396 "Unexpected TXQ (%d) queue failure: %d\n", 3397 qnum, err); 3398 DEV_STATS_INC(dev, tx_dropped); 3399 dev_kfree_skb_any(skb); 3400 return NETDEV_TX_OK; 3401 } 3402 3403 /* Don't wait up for transmitted skbs to be freed. */ 3404 if (!use_napi) { 3405 skb_orphan(skb); 3406 nf_reset_ct(skb); 3407 } 3408 3409 if (use_napi) 3410 tx_may_stop(vi, dev, sq); 3411 else 3412 check_sq_full_and_disable(vi, dev,sq); 3413 3414 kick = use_napi ? __netdev_tx_sent_queue(txq, skb->len, xmit_more) : 3415 !xmit_more || netif_xmit_stopped(txq); 3416 if (kick) { 3417 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 3418 u64_stats_update_begin(&sq->stats.syncp); 3419 u64_stats_inc(&sq->stats.kicks); 3420 u64_stats_update_end(&sq->stats.syncp); 3421 } 3422 } 3423 3424 if (use_napi && kick && unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 3425 virtqueue_napi_schedule(&sq->napi, sq->vq); 3426 3427 return NETDEV_TX_OK; 3428 } 3429 3430 static void __virtnet_rx_pause(struct virtnet_info *vi, 3431 struct receive_queue *rq) 3432 { 3433 bool running = netif_running(vi->dev); 3434 3435 if (running) { 3436 virtnet_napi_disable(rq); 3437 virtnet_cancel_dim(vi, &rq->dim); 3438 } 3439 } 3440 3441 static void virtnet_rx_pause_all(struct virtnet_info *vi) 3442 { 3443 int i; 3444 3445 /* 3446 * Make sure refill_work does not run concurrently to 3447 * avoid napi_disable race which leads to deadlock. 3448 */ 3449 disable_delayed_refill(vi); 3450 cancel_delayed_work_sync(&vi->refill); 3451 for (i = 0; i < vi->max_queue_pairs; i++) 3452 __virtnet_rx_pause(vi, &vi->rq[i]); 3453 } 3454 3455 static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) 3456 { 3457 /* 3458 * Make sure refill_work does not run concurrently to 3459 * avoid napi_disable race which leads to deadlock. 3460 */ 3461 disable_delayed_refill(vi); 3462 cancel_delayed_work_sync(&vi->refill); 3463 __virtnet_rx_pause(vi, rq); 3464 } 3465 3466 static void __virtnet_rx_resume(struct virtnet_info *vi, 3467 struct receive_queue *rq, 3468 bool refill) 3469 { 3470 bool running = netif_running(vi->dev); 3471 bool schedule_refill = false; 3472 3473 if (refill && !try_fill_recv(vi, rq, GFP_KERNEL)) 3474 schedule_refill = true; 3475 if (running) 3476 virtnet_napi_enable(rq); 3477 3478 if (schedule_refill) 3479 schedule_delayed_work(&vi->refill, 0); 3480 } 3481 3482 static void virtnet_rx_resume_all(struct virtnet_info *vi) 3483 { 3484 int i; 3485 3486 enable_delayed_refill(vi); 3487 for (i = 0; i < vi->max_queue_pairs; i++) { 3488 if (i < vi->curr_queue_pairs) 3489 __virtnet_rx_resume(vi, &vi->rq[i], true); 3490 else 3491 __virtnet_rx_resume(vi, &vi->rq[i], false); 3492 } 3493 } 3494 3495 static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) 3496 { 3497 enable_delayed_refill(vi); 3498 __virtnet_rx_resume(vi, rq, true); 3499 } 3500 3501 static int virtnet_rx_resize(struct virtnet_info *vi, 3502 struct receive_queue *rq, u32 ring_num) 3503 { 3504 int err, qindex; 3505 3506 qindex = rq - vi->rq; 3507 3508 virtnet_rx_pause(vi, rq); 3509 3510 err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_unmap_free_buf, NULL); 3511 if (err) 3512 netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err); 3513 3514 virtnet_rx_resume(vi, rq); 3515 return err; 3516 } 3517 3518 static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq) 3519 { 3520 bool running = netif_running(vi->dev); 3521 struct netdev_queue *txq; 3522 int qindex; 3523 3524 qindex = sq - vi->sq; 3525 3526 if (running) 3527 virtnet_napi_tx_disable(sq); 3528 3529 txq = netdev_get_tx_queue(vi->dev, qindex); 3530 3531 /* 1. wait all ximt complete 3532 * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue() 3533 */ 3534 __netif_tx_lock_bh(txq); 3535 3536 /* Prevent rx poll from accessing sq. */ 3537 sq->reset = true; 3538 3539 /* Prevent the upper layer from trying to send packets. */ 3540 netif_stop_subqueue(vi->dev, qindex); 3541 3542 __netif_tx_unlock_bh(txq); 3543 } 3544 3545 static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq) 3546 { 3547 bool running = netif_running(vi->dev); 3548 struct netdev_queue *txq; 3549 int qindex; 3550 3551 qindex = sq - vi->sq; 3552 3553 txq = netdev_get_tx_queue(vi->dev, qindex); 3554 3555 __netif_tx_lock_bh(txq); 3556 sq->reset = false; 3557 netif_tx_wake_queue(txq); 3558 __netif_tx_unlock_bh(txq); 3559 3560 if (running) 3561 virtnet_napi_tx_enable(sq); 3562 } 3563 3564 static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq, 3565 u32 ring_num) 3566 { 3567 int qindex, err; 3568 3569 if (ring_num <= MAX_SKB_FRAGS + 2) { 3570 netdev_err(vi->dev, "tx size (%d) cannot be smaller than %d\n", 3571 ring_num, MAX_SKB_FRAGS + 2); 3572 return -EINVAL; 3573 } 3574 3575 qindex = sq - vi->sq; 3576 3577 virtnet_tx_pause(vi, sq); 3578 3579 err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf, 3580 virtnet_sq_free_unused_buf_done); 3581 if (err) 3582 netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err); 3583 3584 virtnet_tx_resume(vi, sq); 3585 3586 return err; 3587 } 3588 3589 /* 3590 * Send command via the control virtqueue and check status. Commands 3591 * supported by the hypervisor, as indicated by feature bits, should 3592 * never fail unless improperly formatted. 3593 */ 3594 static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd, 3595 struct scatterlist *out, 3596 struct scatterlist *in) 3597 { 3598 struct scatterlist *sgs[5], hdr, stat; 3599 u32 out_num = 0, tmp, in_num = 0; 3600 bool ok; 3601 int ret; 3602 3603 /* Caller should know better */ 3604 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); 3605 3606 mutex_lock(&vi->cvq_lock); 3607 vi->ctrl->status = ~0; 3608 vi->ctrl->hdr.class = class; 3609 vi->ctrl->hdr.cmd = cmd; 3610 /* Add header */ 3611 sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr)); 3612 sgs[out_num++] = &hdr; 3613 3614 if (out) 3615 sgs[out_num++] = out; 3616 3617 /* Add return status. */ 3618 sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status)); 3619 sgs[out_num + in_num++] = &stat; 3620 3621 if (in) 3622 sgs[out_num + in_num++] = in; 3623 3624 BUG_ON(out_num + in_num > ARRAY_SIZE(sgs)); 3625 ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC); 3626 if (ret < 0) { 3627 dev_warn(&vi->vdev->dev, 3628 "Failed to add sgs for command vq: %d\n.", ret); 3629 mutex_unlock(&vi->cvq_lock); 3630 return false; 3631 } 3632 3633 if (unlikely(!virtqueue_kick(vi->cvq))) 3634 goto unlock; 3635 3636 /* Spin for a response, the kick causes an ioport write, trapping 3637 * into the hypervisor, so the request should be handled immediately. 3638 */ 3639 while (!virtqueue_get_buf(vi->cvq, &tmp) && 3640 !virtqueue_is_broken(vi->cvq)) { 3641 cond_resched(); 3642 cpu_relax(); 3643 } 3644 3645 unlock: 3646 ok = vi->ctrl->status == VIRTIO_NET_OK; 3647 mutex_unlock(&vi->cvq_lock); 3648 return ok; 3649 } 3650 3651 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, 3652 struct scatterlist *out) 3653 { 3654 return virtnet_send_command_reply(vi, class, cmd, out, NULL); 3655 } 3656 3657 static int virtnet_set_mac_address(struct net_device *dev, void *p) 3658 { 3659 struct virtnet_info *vi = netdev_priv(dev); 3660 struct virtio_device *vdev = vi->vdev; 3661 int ret; 3662 struct sockaddr *addr; 3663 struct scatterlist sg; 3664 3665 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 3666 return -EOPNOTSUPP; 3667 3668 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL); 3669 if (!addr) 3670 return -ENOMEM; 3671 3672 ret = eth_prepare_mac_addr_change(dev, addr); 3673 if (ret) 3674 goto out; 3675 3676 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 3677 sg_init_one(&sg, addr->sa_data, dev->addr_len); 3678 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3679 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 3680 dev_warn(&vdev->dev, 3681 "Failed to set mac address by vq command.\n"); 3682 ret = -EINVAL; 3683 goto out; 3684 } 3685 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 3686 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { 3687 unsigned int i; 3688 3689 /* Naturally, this has an atomicity problem. */ 3690 for (i = 0; i < dev->addr_len; i++) 3691 virtio_cwrite8(vdev, 3692 offsetof(struct virtio_net_config, mac) + 3693 i, addr->sa_data[i]); 3694 } 3695 3696 eth_commit_mac_addr_change(dev, p); 3697 ret = 0; 3698 3699 out: 3700 kfree(addr); 3701 return ret; 3702 } 3703 3704 static void virtnet_stats(struct net_device *dev, 3705 struct rtnl_link_stats64 *tot) 3706 { 3707 struct virtnet_info *vi = netdev_priv(dev); 3708 unsigned int start; 3709 int i; 3710 3711 for (i = 0; i < vi->max_queue_pairs; i++) { 3712 u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; 3713 struct receive_queue *rq = &vi->rq[i]; 3714 struct send_queue *sq = &vi->sq[i]; 3715 3716 do { 3717 start = u64_stats_fetch_begin(&sq->stats.syncp); 3718 tpackets = u64_stats_read(&sq->stats.packets); 3719 tbytes = u64_stats_read(&sq->stats.bytes); 3720 terrors = u64_stats_read(&sq->stats.tx_timeouts); 3721 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 3722 3723 do { 3724 start = u64_stats_fetch_begin(&rq->stats.syncp); 3725 rpackets = u64_stats_read(&rq->stats.packets); 3726 rbytes = u64_stats_read(&rq->stats.bytes); 3727 rdrops = u64_stats_read(&rq->stats.drops); 3728 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 3729 3730 tot->rx_packets += rpackets; 3731 tot->tx_packets += tpackets; 3732 tot->rx_bytes += rbytes; 3733 tot->tx_bytes += tbytes; 3734 tot->rx_dropped += rdrops; 3735 tot->tx_errors += terrors; 3736 } 3737 3738 tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped); 3739 tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors); 3740 tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors); 3741 tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors); 3742 } 3743 3744 static void virtnet_ack_link_announce(struct virtnet_info *vi) 3745 { 3746 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, 3747 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) 3748 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); 3749 } 3750 3751 static bool virtnet_commit_rss_command(struct virtnet_info *vi); 3752 3753 static void virtnet_rss_update_by_qpairs(struct virtnet_info *vi, u16 queue_pairs) 3754 { 3755 u32 indir_val = 0; 3756 int i = 0; 3757 3758 for (; i < vi->rss_indir_table_size; ++i) { 3759 indir_val = ethtool_rxfh_indir_default(i, queue_pairs); 3760 vi->rss_hdr->indirection_table[i] = cpu_to_le16(indir_val); 3761 } 3762 vi->rss_trailer.max_tx_vq = cpu_to_le16(queue_pairs); 3763 } 3764 3765 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 3766 { 3767 struct virtio_net_ctrl_mq *mq __free(kfree) = NULL; 3768 struct virtio_net_rss_config_hdr *old_rss_hdr; 3769 struct virtio_net_rss_config_trailer old_rss_trailer; 3770 struct net_device *dev = vi->dev; 3771 struct scatterlist sg; 3772 3773 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) 3774 return 0; 3775 3776 /* Firstly check if we need update rss. Do updating if both (1) rss enabled and 3777 * (2) no user configuration. 3778 * 3779 * During rss command processing, device updates queue_pairs using rss.max_tx_vq. That is, 3780 * the device updates queue_pairs together with rss, so we can skip the sperate queue_pairs 3781 * update (VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET below) and return directly. 3782 */ 3783 if (vi->has_rss && !netif_is_rxfh_configured(dev)) { 3784 old_rss_hdr = vi->rss_hdr; 3785 old_rss_trailer = vi->rss_trailer; 3786 vi->rss_hdr = devm_kzalloc(&dev->dev, virtnet_rss_hdr_size(vi), GFP_KERNEL); 3787 if (!vi->rss_hdr) { 3788 vi->rss_hdr = old_rss_hdr; 3789 return -ENOMEM; 3790 } 3791 3792 *vi->rss_hdr = *old_rss_hdr; 3793 virtnet_rss_update_by_qpairs(vi, queue_pairs); 3794 3795 if (!virtnet_commit_rss_command(vi)) { 3796 /* restore ctrl_rss if commit_rss_command failed */ 3797 devm_kfree(&dev->dev, vi->rss_hdr); 3798 vi->rss_hdr = old_rss_hdr; 3799 vi->rss_trailer = old_rss_trailer; 3800 3801 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d, because committing RSS failed\n", 3802 queue_pairs); 3803 return -EINVAL; 3804 } 3805 devm_kfree(&dev->dev, old_rss_hdr); 3806 goto succ; 3807 } 3808 3809 mq = kzalloc(sizeof(*mq), GFP_KERNEL); 3810 if (!mq) 3811 return -ENOMEM; 3812 3813 mq->virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); 3814 sg_init_one(&sg, mq, sizeof(*mq)); 3815 3816 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 3817 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { 3818 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", 3819 queue_pairs); 3820 return -EINVAL; 3821 } 3822 succ: 3823 vi->curr_queue_pairs = queue_pairs; 3824 /* virtnet_open() will refill when device is going to up. */ 3825 spin_lock_bh(&vi->refill_lock); 3826 if (dev->flags & IFF_UP && vi->refill_enabled) 3827 schedule_delayed_work(&vi->refill, 0); 3828 spin_unlock_bh(&vi->refill_lock); 3829 3830 return 0; 3831 } 3832 3833 static int virtnet_close(struct net_device *dev) 3834 { 3835 struct virtnet_info *vi = netdev_priv(dev); 3836 int i; 3837 3838 /* Make sure NAPI doesn't schedule refill work */ 3839 disable_delayed_refill(vi); 3840 /* Make sure refill_work doesn't re-enable napi! */ 3841 cancel_delayed_work_sync(&vi->refill); 3842 /* Prevent the config change callback from changing carrier 3843 * after close 3844 */ 3845 virtio_config_driver_disable(vi->vdev); 3846 /* Stop getting status/speed updates: we don't care until next 3847 * open 3848 */ 3849 cancel_work_sync(&vi->config_work); 3850 3851 for (i = 0; i < vi->max_queue_pairs; i++) { 3852 virtnet_disable_queue_pair(vi, i); 3853 virtnet_cancel_dim(vi, &vi->rq[i].dim); 3854 } 3855 3856 netif_carrier_off(dev); 3857 3858 return 0; 3859 } 3860 3861 static void virtnet_rx_mode_work(struct work_struct *work) 3862 { 3863 struct virtnet_info *vi = 3864 container_of(work, struct virtnet_info, rx_mode_work); 3865 u8 *promisc_allmulti __free(kfree) = NULL; 3866 struct net_device *dev = vi->dev; 3867 struct scatterlist sg[2]; 3868 struct virtio_net_ctrl_mac *mac_data; 3869 struct netdev_hw_addr *ha; 3870 int uc_count; 3871 int mc_count; 3872 void *buf; 3873 int i; 3874 3875 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ 3876 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) 3877 return; 3878 3879 promisc_allmulti = kzalloc(sizeof(*promisc_allmulti), GFP_KERNEL); 3880 if (!promisc_allmulti) { 3881 dev_warn(&dev->dev, "Failed to set RX mode, no memory.\n"); 3882 return; 3883 } 3884 3885 rtnl_lock(); 3886 3887 *promisc_allmulti = !!(dev->flags & IFF_PROMISC); 3888 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3889 3890 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3891 VIRTIO_NET_CTRL_RX_PROMISC, sg)) 3892 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", 3893 *promisc_allmulti ? "en" : "dis"); 3894 3895 *promisc_allmulti = !!(dev->flags & IFF_ALLMULTI); 3896 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3897 3898 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3899 VIRTIO_NET_CTRL_RX_ALLMULTI, sg)) 3900 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", 3901 *promisc_allmulti ? "en" : "dis"); 3902 3903 netif_addr_lock_bh(dev); 3904 3905 uc_count = netdev_uc_count(dev); 3906 mc_count = netdev_mc_count(dev); 3907 /* MAC filter - use one buffer for both lists */ 3908 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) + 3909 (2 * sizeof(mac_data->entries)), GFP_ATOMIC); 3910 mac_data = buf; 3911 if (!buf) { 3912 netif_addr_unlock_bh(dev); 3913 rtnl_unlock(); 3914 return; 3915 } 3916 3917 sg_init_table(sg, 2); 3918 3919 /* Store the unicast list and count in the front of the buffer */ 3920 mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count); 3921 i = 0; 3922 netdev_for_each_uc_addr(ha, dev) 3923 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3924 3925 sg_set_buf(&sg[0], mac_data, 3926 sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); 3927 3928 /* multicast list and count fill the end */ 3929 mac_data = (void *)&mac_data->macs[uc_count][0]; 3930 3931 mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count); 3932 i = 0; 3933 netdev_for_each_mc_addr(ha, dev) 3934 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3935 3936 netif_addr_unlock_bh(dev); 3937 3938 sg_set_buf(&sg[1], mac_data, 3939 sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); 3940 3941 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3942 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg)) 3943 dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); 3944 3945 rtnl_unlock(); 3946 3947 kfree(buf); 3948 } 3949 3950 static void virtnet_set_rx_mode(struct net_device *dev) 3951 { 3952 struct virtnet_info *vi = netdev_priv(dev); 3953 3954 if (vi->rx_mode_work_enabled) 3955 schedule_work(&vi->rx_mode_work); 3956 } 3957 3958 static int virtnet_vlan_rx_add_vid(struct net_device *dev, 3959 __be16 proto, u16 vid) 3960 { 3961 struct virtnet_info *vi = netdev_priv(dev); 3962 __virtio16 *_vid __free(kfree) = NULL; 3963 struct scatterlist sg; 3964 3965 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3966 if (!_vid) 3967 return -ENOMEM; 3968 3969 *_vid = cpu_to_virtio16(vi->vdev, vid); 3970 sg_init_one(&sg, _vid, sizeof(*_vid)); 3971 3972 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3973 VIRTIO_NET_CTRL_VLAN_ADD, &sg)) 3974 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); 3975 return 0; 3976 } 3977 3978 static int virtnet_vlan_rx_kill_vid(struct net_device *dev, 3979 __be16 proto, u16 vid) 3980 { 3981 struct virtnet_info *vi = netdev_priv(dev); 3982 __virtio16 *_vid __free(kfree) = NULL; 3983 struct scatterlist sg; 3984 3985 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3986 if (!_vid) 3987 return -ENOMEM; 3988 3989 *_vid = cpu_to_virtio16(vi->vdev, vid); 3990 sg_init_one(&sg, _vid, sizeof(*_vid)); 3991 3992 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3993 VIRTIO_NET_CTRL_VLAN_DEL, &sg)) 3994 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); 3995 return 0; 3996 } 3997 3998 static void virtnet_clean_affinity(struct virtnet_info *vi) 3999 { 4000 int i; 4001 4002 if (vi->affinity_hint_set) { 4003 for (i = 0; i < vi->max_queue_pairs; i++) { 4004 virtqueue_set_affinity(vi->rq[i].vq, NULL); 4005 virtqueue_set_affinity(vi->sq[i].vq, NULL); 4006 } 4007 4008 vi->affinity_hint_set = false; 4009 } 4010 } 4011 4012 static void virtnet_set_affinity(struct virtnet_info *vi) 4013 { 4014 cpumask_var_t mask; 4015 int stragglers; 4016 int group_size; 4017 int i, start = 0, cpu; 4018 int num_cpu; 4019 int stride; 4020 4021 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 4022 virtnet_clean_affinity(vi); 4023 return; 4024 } 4025 4026 num_cpu = num_online_cpus(); 4027 stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); 4028 stragglers = num_cpu >= vi->curr_queue_pairs ? 4029 num_cpu % vi->curr_queue_pairs : 4030 0; 4031 4032 for (i = 0; i < vi->curr_queue_pairs; i++) { 4033 group_size = stride + (i < stragglers ? 1 : 0); 4034 4035 for_each_online_cpu_wrap(cpu, start) { 4036 if (!group_size--) { 4037 start = cpu; 4038 break; 4039 } 4040 cpumask_set_cpu(cpu, mask); 4041 } 4042 4043 virtqueue_set_affinity(vi->rq[i].vq, mask); 4044 virtqueue_set_affinity(vi->sq[i].vq, mask); 4045 __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS); 4046 cpumask_clear(mask); 4047 } 4048 4049 vi->affinity_hint_set = true; 4050 free_cpumask_var(mask); 4051 } 4052 4053 static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) 4054 { 4055 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 4056 node); 4057 virtnet_set_affinity(vi); 4058 return 0; 4059 } 4060 4061 static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node) 4062 { 4063 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 4064 node_dead); 4065 virtnet_set_affinity(vi); 4066 return 0; 4067 } 4068 4069 static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node) 4070 { 4071 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 4072 node); 4073 4074 virtnet_clean_affinity(vi); 4075 return 0; 4076 } 4077 4078 static enum cpuhp_state virtionet_online; 4079 4080 static int virtnet_cpu_notif_add(struct virtnet_info *vi) 4081 { 4082 int ret; 4083 4084 ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node); 4085 if (ret) 4086 return ret; 4087 ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD, 4088 &vi->node_dead); 4089 if (!ret) 4090 return ret; 4091 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 4092 return ret; 4093 } 4094 4095 static void virtnet_cpu_notif_remove(struct virtnet_info *vi) 4096 { 4097 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 4098 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD, 4099 &vi->node_dead); 4100 } 4101 4102 static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi, 4103 u16 vqn, u32 max_usecs, u32 max_packets) 4104 { 4105 struct virtio_net_ctrl_coal_vq *coal_vq __free(kfree) = NULL; 4106 struct scatterlist sgs; 4107 4108 coal_vq = kzalloc(sizeof(*coal_vq), GFP_KERNEL); 4109 if (!coal_vq) 4110 return -ENOMEM; 4111 4112 coal_vq->vqn = cpu_to_le16(vqn); 4113 coal_vq->coal.max_usecs = cpu_to_le32(max_usecs); 4114 coal_vq->coal.max_packets = cpu_to_le32(max_packets); 4115 sg_init_one(&sgs, coal_vq, sizeof(*coal_vq)); 4116 4117 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 4118 VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET, 4119 &sgs)) 4120 return -EINVAL; 4121 4122 return 0; 4123 } 4124 4125 static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 4126 u16 queue, u32 max_usecs, 4127 u32 max_packets) 4128 { 4129 int err; 4130 4131 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 4132 return -EOPNOTSUPP; 4133 4134 err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), 4135 max_usecs, max_packets); 4136 if (err) 4137 return err; 4138 4139 vi->rq[queue].intr_coal.max_usecs = max_usecs; 4140 vi->rq[queue].intr_coal.max_packets = max_packets; 4141 4142 return 0; 4143 } 4144 4145 static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 4146 u16 queue, u32 max_usecs, 4147 u32 max_packets) 4148 { 4149 int err; 4150 4151 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 4152 return -EOPNOTSUPP; 4153 4154 err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), 4155 max_usecs, max_packets); 4156 if (err) 4157 return err; 4158 4159 vi->sq[queue].intr_coal.max_usecs = max_usecs; 4160 vi->sq[queue].intr_coal.max_packets = max_packets; 4161 4162 return 0; 4163 } 4164 4165 static void virtnet_get_ringparam(struct net_device *dev, 4166 struct ethtool_ringparam *ring, 4167 struct kernel_ethtool_ringparam *kernel_ring, 4168 struct netlink_ext_ack *extack) 4169 { 4170 struct virtnet_info *vi = netdev_priv(dev); 4171 4172 ring->rx_max_pending = vi->rq[0].vq->num_max; 4173 ring->tx_max_pending = vi->sq[0].vq->num_max; 4174 ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 4175 ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 4176 } 4177 4178 static int virtnet_set_ringparam(struct net_device *dev, 4179 struct ethtool_ringparam *ring, 4180 struct kernel_ethtool_ringparam *kernel_ring, 4181 struct netlink_ext_ack *extack) 4182 { 4183 struct virtnet_info *vi = netdev_priv(dev); 4184 u32 rx_pending, tx_pending; 4185 struct receive_queue *rq; 4186 struct send_queue *sq; 4187 int i, err; 4188 4189 if (ring->rx_mini_pending || ring->rx_jumbo_pending) 4190 return -EINVAL; 4191 4192 rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 4193 tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 4194 4195 if (ring->rx_pending == rx_pending && 4196 ring->tx_pending == tx_pending) 4197 return 0; 4198 4199 if (ring->rx_pending > vi->rq[0].vq->num_max) 4200 return -EINVAL; 4201 4202 if (ring->tx_pending > vi->sq[0].vq->num_max) 4203 return -EINVAL; 4204 4205 for (i = 0; i < vi->max_queue_pairs; i++) { 4206 rq = vi->rq + i; 4207 sq = vi->sq + i; 4208 4209 if (ring->tx_pending != tx_pending) { 4210 err = virtnet_tx_resize(vi, sq, ring->tx_pending); 4211 if (err) 4212 return err; 4213 4214 /* Upon disabling and re-enabling a transmit virtqueue, the device must 4215 * set the coalescing parameters of the virtqueue to those configured 4216 * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver 4217 * did not set any TX coalescing parameters, to 0. 4218 */ 4219 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i, 4220 vi->intr_coal_tx.max_usecs, 4221 vi->intr_coal_tx.max_packets); 4222 4223 /* Don't break the tx resize action if the vq coalescing is not 4224 * supported. The same is true for rx resize below. 4225 */ 4226 if (err && err != -EOPNOTSUPP) 4227 return err; 4228 } 4229 4230 if (ring->rx_pending != rx_pending) { 4231 err = virtnet_rx_resize(vi, rq, ring->rx_pending); 4232 if (err) 4233 return err; 4234 4235 /* The reason is same as the transmit virtqueue reset */ 4236 mutex_lock(&vi->rq[i].dim_lock); 4237 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, i, 4238 vi->intr_coal_rx.max_usecs, 4239 vi->intr_coal_rx.max_packets); 4240 mutex_unlock(&vi->rq[i].dim_lock); 4241 if (err && err != -EOPNOTSUPP) 4242 return err; 4243 } 4244 } 4245 4246 return 0; 4247 } 4248 4249 static bool virtnet_commit_rss_command(struct virtnet_info *vi) 4250 { 4251 struct net_device *dev = vi->dev; 4252 struct scatterlist sgs[2]; 4253 4254 /* prepare sgs */ 4255 sg_init_table(sgs, 2); 4256 sg_set_buf(&sgs[0], vi->rss_hdr, virtnet_rss_hdr_size(vi)); 4257 sg_set_buf(&sgs[1], &vi->rss_trailer, virtnet_rss_trailer_size(vi)); 4258 4259 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 4260 vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG 4261 : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) 4262 goto err; 4263 4264 return true; 4265 4266 err: 4267 dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n"); 4268 return false; 4269 4270 } 4271 4272 static void virtnet_init_default_rss(struct virtnet_info *vi) 4273 { 4274 vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_supported); 4275 vi->rss_hash_types_saved = vi->rss_hash_types_supported; 4276 vi->rss_hdr->indirection_table_mask = vi->rss_indir_table_size 4277 ? cpu_to_le16(vi->rss_indir_table_size - 1) : 0; 4278 vi->rss_hdr->unclassified_queue = 0; 4279 4280 virtnet_rss_update_by_qpairs(vi, vi->curr_queue_pairs); 4281 4282 vi->rss_trailer.hash_key_length = vi->rss_key_size; 4283 4284 netdev_rss_key_fill(vi->rss_hash_key_data, vi->rss_key_size); 4285 } 4286 4287 static int virtnet_get_hashflow(struct net_device *dev, 4288 struct ethtool_rxfh_fields *info) 4289 { 4290 struct virtnet_info *vi = netdev_priv(dev); 4291 4292 info->data = 0; 4293 switch (info->flow_type) { 4294 case TCP_V4_FLOW: 4295 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 4296 info->data = RXH_IP_SRC | RXH_IP_DST | 4297 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4298 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 4299 info->data = RXH_IP_SRC | RXH_IP_DST; 4300 } 4301 break; 4302 case TCP_V6_FLOW: 4303 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 4304 info->data = RXH_IP_SRC | RXH_IP_DST | 4305 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4306 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 4307 info->data = RXH_IP_SRC | RXH_IP_DST; 4308 } 4309 break; 4310 case UDP_V4_FLOW: 4311 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 4312 info->data = RXH_IP_SRC | RXH_IP_DST | 4313 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4314 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 4315 info->data = RXH_IP_SRC | RXH_IP_DST; 4316 } 4317 break; 4318 case UDP_V6_FLOW: 4319 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 4320 info->data = RXH_IP_SRC | RXH_IP_DST | 4321 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4322 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 4323 info->data = RXH_IP_SRC | RXH_IP_DST; 4324 } 4325 break; 4326 case IPV4_FLOW: 4327 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) 4328 info->data = RXH_IP_SRC | RXH_IP_DST; 4329 4330 break; 4331 case IPV6_FLOW: 4332 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) 4333 info->data = RXH_IP_SRC | RXH_IP_DST; 4334 4335 break; 4336 default: 4337 info->data = 0; 4338 break; 4339 } 4340 4341 return 0; 4342 } 4343 4344 static int virtnet_set_hashflow(struct net_device *dev, 4345 const struct ethtool_rxfh_fields *info, 4346 struct netlink_ext_ack *extack) 4347 { 4348 struct virtnet_info *vi = netdev_priv(dev); 4349 u32 new_hashtypes = vi->rss_hash_types_saved; 4350 bool is_disable = info->data & RXH_DISCARD; 4351 bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3); 4352 4353 /* supports only 'sd', 'sdfn' and 'r' */ 4354 if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable)) 4355 return -EINVAL; 4356 4357 switch (info->flow_type) { 4358 case TCP_V4_FLOW: 4359 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4); 4360 if (!is_disable) 4361 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 4362 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0); 4363 break; 4364 case UDP_V4_FLOW: 4365 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4); 4366 if (!is_disable) 4367 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 4368 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0); 4369 break; 4370 case IPV4_FLOW: 4371 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4; 4372 if (!is_disable) 4373 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4; 4374 break; 4375 case TCP_V6_FLOW: 4376 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6); 4377 if (!is_disable) 4378 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 4379 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0); 4380 break; 4381 case UDP_V6_FLOW: 4382 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6); 4383 if (!is_disable) 4384 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 4385 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0); 4386 break; 4387 case IPV6_FLOW: 4388 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6; 4389 if (!is_disable) 4390 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6; 4391 break; 4392 default: 4393 /* unsupported flow */ 4394 return -EINVAL; 4395 } 4396 4397 /* if unsupported hashtype was set */ 4398 if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported)) 4399 return -EINVAL; 4400 4401 if (new_hashtypes != vi->rss_hash_types_saved) { 4402 vi->rss_hash_types_saved = new_hashtypes; 4403 vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_saved); 4404 if (vi->dev->features & NETIF_F_RXHASH) 4405 if (!virtnet_commit_rss_command(vi)) 4406 return -EINVAL; 4407 } 4408 4409 return 0; 4410 } 4411 4412 static void virtnet_get_drvinfo(struct net_device *dev, 4413 struct ethtool_drvinfo *info) 4414 { 4415 struct virtnet_info *vi = netdev_priv(dev); 4416 struct virtio_device *vdev = vi->vdev; 4417 4418 strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); 4419 strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version)); 4420 strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info)); 4421 4422 } 4423 4424 /* TODO: Eliminate OOO packets during switching */ 4425 static int virtnet_set_channels(struct net_device *dev, 4426 struct ethtool_channels *channels) 4427 { 4428 struct virtnet_info *vi = netdev_priv(dev); 4429 u16 queue_pairs = channels->combined_count; 4430 int err; 4431 4432 /* We don't support separate rx/tx channels. 4433 * We don't allow setting 'other' channels. 4434 */ 4435 if (channels->rx_count || channels->tx_count || channels->other_count) 4436 return -EINVAL; 4437 4438 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) 4439 return -EINVAL; 4440 4441 /* For now we don't support modifying channels while XDP is loaded 4442 * also when XDP is loaded all RX queues have XDP programs so we only 4443 * need to check a single RX queue. 4444 */ 4445 if (vi->rq[0].xdp_prog) 4446 return -EINVAL; 4447 4448 cpus_read_lock(); 4449 err = virtnet_set_queues(vi, queue_pairs); 4450 if (err) { 4451 cpus_read_unlock(); 4452 goto err; 4453 } 4454 virtnet_set_affinity(vi); 4455 cpus_read_unlock(); 4456 4457 netif_set_real_num_tx_queues(dev, queue_pairs); 4458 netif_set_real_num_rx_queues(dev, queue_pairs); 4459 err: 4460 return err; 4461 } 4462 4463 static void virtnet_stats_sprintf(u8 **p, const char *fmt, const char *noq_fmt, 4464 int num, int qid, const struct virtnet_stat_desc *desc) 4465 { 4466 int i; 4467 4468 if (qid < 0) { 4469 for (i = 0; i < num; ++i) 4470 ethtool_sprintf(p, noq_fmt, desc[i].desc); 4471 } else { 4472 for (i = 0; i < num; ++i) 4473 ethtool_sprintf(p, fmt, qid, desc[i].desc); 4474 } 4475 } 4476 4477 /* qid == -1: for rx/tx queue total field */ 4478 static void virtnet_get_stats_string(struct virtnet_info *vi, int type, int qid, u8 **data) 4479 { 4480 const struct virtnet_stat_desc *desc; 4481 const char *fmt, *noq_fmt; 4482 u8 *p = *data; 4483 u32 num; 4484 4485 if (type == VIRTNET_Q_TYPE_CQ && qid >= 0) { 4486 noq_fmt = "cq_hw_%s"; 4487 4488 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4489 desc = &virtnet_stats_cvq_desc[0]; 4490 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4491 4492 virtnet_stats_sprintf(&p, NULL, noq_fmt, num, -1, desc); 4493 } 4494 } 4495 4496 if (type == VIRTNET_Q_TYPE_RX) { 4497 fmt = "rx%u_%s"; 4498 noq_fmt = "rx_%s"; 4499 4500 desc = &virtnet_rq_stats_desc[0]; 4501 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4502 4503 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4504 4505 fmt = "rx%u_hw_%s"; 4506 noq_fmt = "rx_hw_%s"; 4507 4508 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4509 desc = &virtnet_stats_rx_basic_desc[0]; 4510 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4511 4512 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4513 } 4514 4515 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4516 desc = &virtnet_stats_rx_csum_desc[0]; 4517 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4518 4519 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4520 } 4521 4522 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4523 desc = &virtnet_stats_rx_speed_desc[0]; 4524 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4525 4526 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4527 } 4528 } 4529 4530 if (type == VIRTNET_Q_TYPE_TX) { 4531 fmt = "tx%u_%s"; 4532 noq_fmt = "tx_%s"; 4533 4534 desc = &virtnet_sq_stats_desc[0]; 4535 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4536 4537 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4538 4539 fmt = "tx%u_hw_%s"; 4540 noq_fmt = "tx_hw_%s"; 4541 4542 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4543 desc = &virtnet_stats_tx_basic_desc[0]; 4544 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4545 4546 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4547 } 4548 4549 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4550 desc = &virtnet_stats_tx_gso_desc[0]; 4551 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4552 4553 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4554 } 4555 4556 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4557 desc = &virtnet_stats_tx_speed_desc[0]; 4558 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4559 4560 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4561 } 4562 } 4563 4564 *data = p; 4565 } 4566 4567 struct virtnet_stats_ctx { 4568 /* The stats are write to qstats or ethtool -S */ 4569 bool to_qstat; 4570 4571 /* Used to calculate the offset inside the output buffer. */ 4572 u32 desc_num[3]; 4573 4574 /* The actual supported stat types. */ 4575 u64 bitmap[3]; 4576 4577 /* Used to calculate the reply buffer size. */ 4578 u32 size[3]; 4579 4580 /* Record the output buffer. */ 4581 u64 *data; 4582 }; 4583 4584 static void virtnet_stats_ctx_init(struct virtnet_info *vi, 4585 struct virtnet_stats_ctx *ctx, 4586 u64 *data, bool to_qstat) 4587 { 4588 u32 queue_type; 4589 4590 ctx->data = data; 4591 ctx->to_qstat = to_qstat; 4592 4593 if (to_qstat) { 4594 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4595 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4596 4597 queue_type = VIRTNET_Q_TYPE_RX; 4598 4599 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4600 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4601 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4602 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4603 } 4604 4605 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4606 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4607 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4608 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4609 } 4610 4611 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4612 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_GSO; 4613 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4614 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_gso); 4615 } 4616 4617 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4618 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4619 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4620 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4621 } 4622 4623 queue_type = VIRTNET_Q_TYPE_TX; 4624 4625 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4626 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4627 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4628 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4629 } 4630 4631 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4632 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_CSUM; 4633 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4634 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_csum); 4635 } 4636 4637 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4638 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4639 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4640 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4641 } 4642 4643 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4644 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4645 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4646 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4647 } 4648 4649 return; 4650 } 4651 4652 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc); 4653 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc); 4654 4655 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4656 queue_type = VIRTNET_Q_TYPE_CQ; 4657 4658 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_CVQ; 4659 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_cvq_desc); 4660 ctx->size[queue_type] += sizeof(struct virtio_net_stats_cvq); 4661 } 4662 4663 queue_type = VIRTNET_Q_TYPE_RX; 4664 4665 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4666 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4667 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4668 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4669 } 4670 4671 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4672 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4673 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4674 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4675 } 4676 4677 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4678 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4679 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4680 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4681 } 4682 4683 queue_type = VIRTNET_Q_TYPE_TX; 4684 4685 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4686 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4687 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4688 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4689 } 4690 4691 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4692 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4693 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4694 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4695 } 4696 4697 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4698 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4699 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4700 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4701 } 4702 } 4703 4704 /* stats_sum_queue - Calculate the sum of the same fields in sq or rq. 4705 * @sum: the position to store the sum values 4706 * @num: field num 4707 * @q_value: the first queue fields 4708 * @q_num: number of the queues 4709 */ 4710 static void stats_sum_queue(u64 *sum, u32 num, u64 *q_value, u32 q_num) 4711 { 4712 u32 step = num; 4713 int i, j; 4714 u64 *p; 4715 4716 for (i = 0; i < num; ++i) { 4717 p = sum + i; 4718 *p = 0; 4719 4720 for (j = 0; j < q_num; ++j) 4721 *p += *(q_value + i + j * step); 4722 } 4723 } 4724 4725 static void virtnet_fill_total_fields(struct virtnet_info *vi, 4726 struct virtnet_stats_ctx *ctx) 4727 { 4728 u64 *data, *first_rx_q, *first_tx_q; 4729 u32 num_cq, num_rx, num_tx; 4730 4731 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4732 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4733 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4734 4735 first_rx_q = ctx->data + num_rx + num_tx + num_cq; 4736 first_tx_q = first_rx_q + vi->curr_queue_pairs * num_rx; 4737 4738 data = ctx->data; 4739 4740 stats_sum_queue(data, num_rx, first_rx_q, vi->curr_queue_pairs); 4741 4742 data = ctx->data + num_rx; 4743 4744 stats_sum_queue(data, num_tx, first_tx_q, vi->curr_queue_pairs); 4745 } 4746 4747 static void virtnet_fill_stats_qstat(struct virtnet_info *vi, u32 qid, 4748 struct virtnet_stats_ctx *ctx, 4749 const u8 *base, bool drv_stats, u8 reply_type) 4750 { 4751 const struct virtnet_stat_desc *desc; 4752 const u64_stats_t *v_stat; 4753 u64 offset, bitmap; 4754 const __le64 *v; 4755 u32 queue_type; 4756 int i, num; 4757 4758 queue_type = vq_type(vi, qid); 4759 bitmap = ctx->bitmap[queue_type]; 4760 4761 if (drv_stats) { 4762 if (queue_type == VIRTNET_Q_TYPE_RX) { 4763 desc = &virtnet_rq_stats_desc_qstat[0]; 4764 num = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4765 } else { 4766 desc = &virtnet_sq_stats_desc_qstat[0]; 4767 num = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4768 } 4769 4770 for (i = 0; i < num; ++i) { 4771 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4772 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4773 ctx->data[offset] = u64_stats_read(v_stat); 4774 } 4775 return; 4776 } 4777 4778 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4779 desc = &virtnet_stats_rx_basic_desc_qstat[0]; 4780 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4781 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4782 goto found; 4783 } 4784 4785 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4786 desc = &virtnet_stats_rx_csum_desc_qstat[0]; 4787 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4788 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4789 goto found; 4790 } 4791 4792 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4793 desc = &virtnet_stats_rx_gso_desc_qstat[0]; 4794 num = ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4795 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO) 4796 goto found; 4797 } 4798 4799 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4800 desc = &virtnet_stats_rx_speed_desc_qstat[0]; 4801 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4802 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4803 goto found; 4804 } 4805 4806 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4807 desc = &virtnet_stats_tx_basic_desc_qstat[0]; 4808 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4809 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4810 goto found; 4811 } 4812 4813 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4814 desc = &virtnet_stats_tx_csum_desc_qstat[0]; 4815 num = ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4816 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM) 4817 goto found; 4818 } 4819 4820 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4821 desc = &virtnet_stats_tx_gso_desc_qstat[0]; 4822 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4823 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4824 goto found; 4825 } 4826 4827 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4828 desc = &virtnet_stats_tx_speed_desc_qstat[0]; 4829 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4830 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4831 goto found; 4832 } 4833 4834 return; 4835 4836 found: 4837 for (i = 0; i < num; ++i) { 4838 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4839 v = (const __le64 *)(base + desc[i].offset); 4840 ctx->data[offset] = le64_to_cpu(*v); 4841 } 4842 } 4843 4844 /* virtnet_fill_stats - copy the stats to qstats or ethtool -S 4845 * The stats source is the device or the driver. 4846 * 4847 * @vi: virtio net info 4848 * @qid: the vq id 4849 * @ctx: stats ctx (initiated by virtnet_stats_ctx_init()) 4850 * @base: pointer to the device reply or the driver stats structure. 4851 * @drv_stats: designate the base type (device reply, driver stats) 4852 * @type: the type of the device reply (if drv_stats is true, this must be zero) 4853 */ 4854 static void virtnet_fill_stats(struct virtnet_info *vi, u32 qid, 4855 struct virtnet_stats_ctx *ctx, 4856 const u8 *base, bool drv_stats, u8 reply_type) 4857 { 4858 u32 queue_type, num_rx, num_tx, num_cq; 4859 const struct virtnet_stat_desc *desc; 4860 const u64_stats_t *v_stat; 4861 u64 offset, bitmap; 4862 const __le64 *v; 4863 int i, num; 4864 4865 if (ctx->to_qstat) 4866 return virtnet_fill_stats_qstat(vi, qid, ctx, base, drv_stats, reply_type); 4867 4868 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4869 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4870 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4871 4872 queue_type = vq_type(vi, qid); 4873 bitmap = ctx->bitmap[queue_type]; 4874 4875 /* skip the total fields of pairs */ 4876 offset = num_rx + num_tx; 4877 4878 if (queue_type == VIRTNET_Q_TYPE_TX) { 4879 offset += num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2); 4880 4881 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4882 if (drv_stats) { 4883 desc = &virtnet_sq_stats_desc[0]; 4884 goto drv_stats; 4885 } 4886 4887 offset += num; 4888 4889 } else if (queue_type == VIRTNET_Q_TYPE_RX) { 4890 offset += num_cq + num_rx * (qid / 2); 4891 4892 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4893 if (drv_stats) { 4894 desc = &virtnet_rq_stats_desc[0]; 4895 goto drv_stats; 4896 } 4897 4898 offset += num; 4899 } 4900 4901 if (bitmap & VIRTIO_NET_STATS_TYPE_CVQ) { 4902 desc = &virtnet_stats_cvq_desc[0]; 4903 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4904 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_CVQ) 4905 goto found; 4906 4907 offset += num; 4908 } 4909 4910 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4911 desc = &virtnet_stats_rx_basic_desc[0]; 4912 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4913 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4914 goto found; 4915 4916 offset += num; 4917 } 4918 4919 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4920 desc = &virtnet_stats_rx_csum_desc[0]; 4921 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4922 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4923 goto found; 4924 4925 offset += num; 4926 } 4927 4928 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4929 desc = &virtnet_stats_rx_speed_desc[0]; 4930 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4931 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4932 goto found; 4933 4934 offset += num; 4935 } 4936 4937 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4938 desc = &virtnet_stats_tx_basic_desc[0]; 4939 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4940 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4941 goto found; 4942 4943 offset += num; 4944 } 4945 4946 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4947 desc = &virtnet_stats_tx_gso_desc[0]; 4948 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4949 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4950 goto found; 4951 4952 offset += num; 4953 } 4954 4955 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4956 desc = &virtnet_stats_tx_speed_desc[0]; 4957 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4958 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4959 goto found; 4960 4961 offset += num; 4962 } 4963 4964 return; 4965 4966 found: 4967 for (i = 0; i < num; ++i) { 4968 v = (const __le64 *)(base + desc[i].offset); 4969 ctx->data[offset + i] = le64_to_cpu(*v); 4970 } 4971 4972 return; 4973 4974 drv_stats: 4975 for (i = 0; i < num; ++i) { 4976 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4977 ctx->data[offset + i] = u64_stats_read(v_stat); 4978 } 4979 } 4980 4981 static int __virtnet_get_hw_stats(struct virtnet_info *vi, 4982 struct virtnet_stats_ctx *ctx, 4983 struct virtio_net_ctrl_queue_stats *req, 4984 int req_size, void *reply, int res_size) 4985 { 4986 struct virtio_net_stats_reply_hdr *hdr; 4987 struct scatterlist sgs_in, sgs_out; 4988 void *p; 4989 u32 qid; 4990 int ok; 4991 4992 sg_init_one(&sgs_out, req, req_size); 4993 sg_init_one(&sgs_in, reply, res_size); 4994 4995 ok = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 4996 VIRTIO_NET_CTRL_STATS_GET, 4997 &sgs_out, &sgs_in); 4998 4999 if (!ok) 5000 return ok; 5001 5002 for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) { 5003 hdr = p; 5004 qid = le16_to_cpu(hdr->vq_index); 5005 virtnet_fill_stats(vi, qid, ctx, p, false, hdr->type); 5006 } 5007 5008 return 0; 5009 } 5010 5011 static void virtnet_make_stat_req(struct virtnet_info *vi, 5012 struct virtnet_stats_ctx *ctx, 5013 struct virtio_net_ctrl_queue_stats *req, 5014 int qid, int *idx) 5015 { 5016 int qtype = vq_type(vi, qid); 5017 u64 bitmap = ctx->bitmap[qtype]; 5018 5019 if (!bitmap) 5020 return; 5021 5022 req->stats[*idx].vq_index = cpu_to_le16(qid); 5023 req->stats[*idx].types_bitmap[0] = cpu_to_le64(bitmap); 5024 *idx += 1; 5025 } 5026 5027 /* qid: -1: get stats of all vq. 5028 * > 0: get the stats for the special vq. This must not be cvq. 5029 */ 5030 static int virtnet_get_hw_stats(struct virtnet_info *vi, 5031 struct virtnet_stats_ctx *ctx, int qid) 5032 { 5033 int qnum, i, j, res_size, qtype, last_vq, first_vq; 5034 struct virtio_net_ctrl_queue_stats *req; 5035 bool enable_cvq; 5036 void *reply; 5037 int ok; 5038 5039 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) 5040 return 0; 5041 5042 if (qid == -1) { 5043 last_vq = vi->curr_queue_pairs * 2 - 1; 5044 first_vq = 0; 5045 enable_cvq = true; 5046 } else { 5047 last_vq = qid; 5048 first_vq = qid; 5049 enable_cvq = false; 5050 } 5051 5052 qnum = 0; 5053 res_size = 0; 5054 for (i = first_vq; i <= last_vq ; ++i) { 5055 qtype = vq_type(vi, i); 5056 if (ctx->bitmap[qtype]) { 5057 ++qnum; 5058 res_size += ctx->size[qtype]; 5059 } 5060 } 5061 5062 if (enable_cvq && ctx->bitmap[VIRTNET_Q_TYPE_CQ]) { 5063 res_size += ctx->size[VIRTNET_Q_TYPE_CQ]; 5064 qnum += 1; 5065 } 5066 5067 req = kcalloc(qnum, sizeof(*req), GFP_KERNEL); 5068 if (!req) 5069 return -ENOMEM; 5070 5071 reply = kmalloc(res_size, GFP_KERNEL); 5072 if (!reply) { 5073 kfree(req); 5074 return -ENOMEM; 5075 } 5076 5077 j = 0; 5078 for (i = first_vq; i <= last_vq ; ++i) 5079 virtnet_make_stat_req(vi, ctx, req, i, &j); 5080 5081 if (enable_cvq) 5082 virtnet_make_stat_req(vi, ctx, req, vi->max_queue_pairs * 2, &j); 5083 5084 ok = __virtnet_get_hw_stats(vi, ctx, req, sizeof(*req) * j, reply, res_size); 5085 5086 kfree(req); 5087 kfree(reply); 5088 5089 return ok; 5090 } 5091 5092 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) 5093 { 5094 struct virtnet_info *vi = netdev_priv(dev); 5095 unsigned int i; 5096 u8 *p = data; 5097 5098 switch (stringset) { 5099 case ETH_SS_STATS: 5100 /* Generate the total field names. */ 5101 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, -1, &p); 5102 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, -1, &p); 5103 5104 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_CQ, 0, &p); 5105 5106 for (i = 0; i < vi->curr_queue_pairs; ++i) 5107 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, i, &p); 5108 5109 for (i = 0; i < vi->curr_queue_pairs; ++i) 5110 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, i, &p); 5111 break; 5112 } 5113 } 5114 5115 static int virtnet_get_sset_count(struct net_device *dev, int sset) 5116 { 5117 struct virtnet_info *vi = netdev_priv(dev); 5118 struct virtnet_stats_ctx ctx = {0}; 5119 u32 pair_count; 5120 5121 switch (sset) { 5122 case ETH_SS_STATS: 5123 virtnet_stats_ctx_init(vi, &ctx, NULL, false); 5124 5125 pair_count = ctx.desc_num[VIRTNET_Q_TYPE_RX] + ctx.desc_num[VIRTNET_Q_TYPE_TX]; 5126 5127 return pair_count + ctx.desc_num[VIRTNET_Q_TYPE_CQ] + 5128 vi->curr_queue_pairs * pair_count; 5129 default: 5130 return -EOPNOTSUPP; 5131 } 5132 } 5133 5134 static void virtnet_get_ethtool_stats(struct net_device *dev, 5135 struct ethtool_stats *stats, u64 *data) 5136 { 5137 struct virtnet_info *vi = netdev_priv(dev); 5138 struct virtnet_stats_ctx ctx = {0}; 5139 unsigned int start, i; 5140 const u8 *stats_base; 5141 5142 virtnet_stats_ctx_init(vi, &ctx, data, false); 5143 if (virtnet_get_hw_stats(vi, &ctx, -1)) 5144 dev_warn(&vi->dev->dev, "Failed to get hw stats.\n"); 5145 5146 for (i = 0; i < vi->curr_queue_pairs; i++) { 5147 struct receive_queue *rq = &vi->rq[i]; 5148 struct send_queue *sq = &vi->sq[i]; 5149 5150 stats_base = (const u8 *)&rq->stats; 5151 do { 5152 start = u64_stats_fetch_begin(&rq->stats.syncp); 5153 virtnet_fill_stats(vi, i * 2, &ctx, stats_base, true, 0); 5154 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 5155 5156 stats_base = (const u8 *)&sq->stats; 5157 do { 5158 start = u64_stats_fetch_begin(&sq->stats.syncp); 5159 virtnet_fill_stats(vi, i * 2 + 1, &ctx, stats_base, true, 0); 5160 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 5161 } 5162 5163 virtnet_fill_total_fields(vi, &ctx); 5164 } 5165 5166 static void virtnet_get_channels(struct net_device *dev, 5167 struct ethtool_channels *channels) 5168 { 5169 struct virtnet_info *vi = netdev_priv(dev); 5170 5171 channels->combined_count = vi->curr_queue_pairs; 5172 channels->max_combined = vi->max_queue_pairs; 5173 channels->max_other = 0; 5174 channels->rx_count = 0; 5175 channels->tx_count = 0; 5176 channels->other_count = 0; 5177 } 5178 5179 static int virtnet_set_link_ksettings(struct net_device *dev, 5180 const struct ethtool_link_ksettings *cmd) 5181 { 5182 struct virtnet_info *vi = netdev_priv(dev); 5183 5184 return ethtool_virtdev_set_link_ksettings(dev, cmd, 5185 &vi->speed, &vi->duplex); 5186 } 5187 5188 static int virtnet_get_link_ksettings(struct net_device *dev, 5189 struct ethtool_link_ksettings *cmd) 5190 { 5191 struct virtnet_info *vi = netdev_priv(dev); 5192 5193 cmd->base.speed = vi->speed; 5194 cmd->base.duplex = vi->duplex; 5195 cmd->base.port = PORT_OTHER; 5196 5197 return 0; 5198 } 5199 5200 static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info *vi, 5201 struct ethtool_coalesce *ec) 5202 { 5203 struct virtio_net_ctrl_coal_tx *coal_tx __free(kfree) = NULL; 5204 struct scatterlist sgs_tx; 5205 int i; 5206 5207 coal_tx = kzalloc(sizeof(*coal_tx), GFP_KERNEL); 5208 if (!coal_tx) 5209 return -ENOMEM; 5210 5211 coal_tx->tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); 5212 coal_tx->tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); 5213 sg_init_one(&sgs_tx, coal_tx, sizeof(*coal_tx)); 5214 5215 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 5216 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, 5217 &sgs_tx)) 5218 return -EINVAL; 5219 5220 vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs; 5221 vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames; 5222 for (i = 0; i < vi->max_queue_pairs; i++) { 5223 vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs; 5224 vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames; 5225 } 5226 5227 return 0; 5228 } 5229 5230 static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi, 5231 struct ethtool_coalesce *ec) 5232 { 5233 struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL; 5234 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 5235 struct scatterlist sgs_rx; 5236 int i; 5237 5238 if (rx_ctrl_dim_on && !virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5239 return -EOPNOTSUPP; 5240 5241 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != vi->intr_coal_rx.max_usecs || 5242 ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets)) 5243 return -EINVAL; 5244 5245 if (rx_ctrl_dim_on && !vi->rx_dim_enabled) { 5246 vi->rx_dim_enabled = true; 5247 for (i = 0; i < vi->max_queue_pairs; i++) { 5248 mutex_lock(&vi->rq[i].dim_lock); 5249 vi->rq[i].dim_enabled = true; 5250 mutex_unlock(&vi->rq[i].dim_lock); 5251 } 5252 return 0; 5253 } 5254 5255 coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL); 5256 if (!coal_rx) 5257 return -ENOMEM; 5258 5259 if (!rx_ctrl_dim_on && vi->rx_dim_enabled) { 5260 vi->rx_dim_enabled = false; 5261 for (i = 0; i < vi->max_queue_pairs; i++) { 5262 mutex_lock(&vi->rq[i].dim_lock); 5263 vi->rq[i].dim_enabled = false; 5264 mutex_unlock(&vi->rq[i].dim_lock); 5265 } 5266 } 5267 5268 /* Since the per-queue coalescing params can be set, 5269 * we need apply the global new params even if they 5270 * are not updated. 5271 */ 5272 coal_rx->rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); 5273 coal_rx->rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); 5274 sg_init_one(&sgs_rx, coal_rx, sizeof(*coal_rx)); 5275 5276 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 5277 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, 5278 &sgs_rx)) 5279 return -EINVAL; 5280 5281 vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs; 5282 vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames; 5283 for (i = 0; i < vi->max_queue_pairs; i++) { 5284 mutex_lock(&vi->rq[i].dim_lock); 5285 vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs; 5286 vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames; 5287 mutex_unlock(&vi->rq[i].dim_lock); 5288 } 5289 5290 return 0; 5291 } 5292 5293 static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, 5294 struct ethtool_coalesce *ec) 5295 { 5296 int err; 5297 5298 err = virtnet_send_tx_notf_coal_cmds(vi, ec); 5299 if (err) 5300 return err; 5301 5302 err = virtnet_send_rx_notf_coal_cmds(vi, ec); 5303 if (err) 5304 return err; 5305 5306 return 0; 5307 } 5308 5309 static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi, 5310 struct ethtool_coalesce *ec, 5311 u16 queue) 5312 { 5313 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 5314 u32 max_usecs, max_packets; 5315 bool cur_rx_dim; 5316 int err; 5317 5318 mutex_lock(&vi->rq[queue].dim_lock); 5319 cur_rx_dim = vi->rq[queue].dim_enabled; 5320 max_usecs = vi->rq[queue].intr_coal.max_usecs; 5321 max_packets = vi->rq[queue].intr_coal.max_packets; 5322 5323 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != max_usecs || 5324 ec->rx_max_coalesced_frames != max_packets)) { 5325 mutex_unlock(&vi->rq[queue].dim_lock); 5326 return -EINVAL; 5327 } 5328 5329 if (rx_ctrl_dim_on && !cur_rx_dim) { 5330 vi->rq[queue].dim_enabled = true; 5331 mutex_unlock(&vi->rq[queue].dim_lock); 5332 return 0; 5333 } 5334 5335 if (!rx_ctrl_dim_on && cur_rx_dim) 5336 vi->rq[queue].dim_enabled = false; 5337 5338 /* If no params are updated, userspace ethtool will 5339 * reject the modification. 5340 */ 5341 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue, 5342 ec->rx_coalesce_usecs, 5343 ec->rx_max_coalesced_frames); 5344 mutex_unlock(&vi->rq[queue].dim_lock); 5345 return err; 5346 } 5347 5348 static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi, 5349 struct ethtool_coalesce *ec, 5350 u16 queue) 5351 { 5352 int err; 5353 5354 err = virtnet_send_rx_notf_coal_vq_cmds(vi, ec, queue); 5355 if (err) 5356 return err; 5357 5358 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue, 5359 ec->tx_coalesce_usecs, 5360 ec->tx_max_coalesced_frames); 5361 if (err) 5362 return err; 5363 5364 return 0; 5365 } 5366 5367 static void virtnet_rx_dim_work(struct work_struct *work) 5368 { 5369 struct dim *dim = container_of(work, struct dim, work); 5370 struct receive_queue *rq = container_of(dim, 5371 struct receive_queue, dim); 5372 struct virtnet_info *vi = rq->vq->vdev->priv; 5373 struct net_device *dev = vi->dev; 5374 struct dim_cq_moder update_moder; 5375 int qnum, err; 5376 5377 qnum = rq - vi->rq; 5378 5379 mutex_lock(&rq->dim_lock); 5380 if (!rq->dim_enabled) 5381 goto out; 5382 5383 update_moder = net_dim_get_rx_irq_moder(dev, dim); 5384 if (update_moder.usec != rq->intr_coal.max_usecs || 5385 update_moder.pkts != rq->intr_coal.max_packets) { 5386 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum, 5387 update_moder.usec, 5388 update_moder.pkts); 5389 if (err) 5390 pr_debug("%s: Failed to send dim parameters on rxq%d\n", 5391 dev->name, qnum); 5392 } 5393 out: 5394 dim->state = DIM_START_MEASURE; 5395 mutex_unlock(&rq->dim_lock); 5396 } 5397 5398 static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) 5399 { 5400 /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL 5401 * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated. 5402 */ 5403 if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) 5404 return -EOPNOTSUPP; 5405 5406 if (ec->tx_max_coalesced_frames > 1 || 5407 ec->rx_max_coalesced_frames != 1) 5408 return -EINVAL; 5409 5410 return 0; 5411 } 5412 5413 static int virtnet_should_update_vq_weight(int dev_flags, int weight, 5414 int vq_weight, bool *should_update) 5415 { 5416 if (weight ^ vq_weight) { 5417 if (dev_flags & IFF_UP) 5418 return -EBUSY; 5419 *should_update = true; 5420 } 5421 5422 return 0; 5423 } 5424 5425 static int virtnet_set_coalesce(struct net_device *dev, 5426 struct ethtool_coalesce *ec, 5427 struct kernel_ethtool_coalesce *kernel_coal, 5428 struct netlink_ext_ack *extack) 5429 { 5430 struct virtnet_info *vi = netdev_priv(dev); 5431 int ret, queue_number, napi_weight, i; 5432 bool update_napi = false; 5433 5434 /* Can't change NAPI weight if the link is up */ 5435 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5436 for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) { 5437 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5438 vi->sq[queue_number].napi.weight, 5439 &update_napi); 5440 if (ret) 5441 return ret; 5442 5443 if (update_napi) { 5444 /* All queues that belong to [queue_number, vi->max_queue_pairs] will be 5445 * updated for the sake of simplicity, which might not be necessary 5446 */ 5447 break; 5448 } 5449 } 5450 5451 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) 5452 ret = virtnet_send_notf_coal_cmds(vi, ec); 5453 else 5454 ret = virtnet_coal_params_supported(ec); 5455 5456 if (ret) 5457 return ret; 5458 5459 if (update_napi) { 5460 /* xsk xmit depends on the tx napi. So if xsk is active, 5461 * prevent modifications to tx napi. 5462 */ 5463 for (i = queue_number; i < vi->max_queue_pairs; i++) { 5464 if (vi->sq[i].xsk_pool) 5465 return -EBUSY; 5466 } 5467 5468 for (; queue_number < vi->max_queue_pairs; queue_number++) 5469 vi->sq[queue_number].napi.weight = napi_weight; 5470 } 5471 5472 return ret; 5473 } 5474 5475 static int virtnet_get_coalesce(struct net_device *dev, 5476 struct ethtool_coalesce *ec, 5477 struct kernel_ethtool_coalesce *kernel_coal, 5478 struct netlink_ext_ack *extack) 5479 { 5480 struct virtnet_info *vi = netdev_priv(dev); 5481 5482 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 5483 ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs; 5484 ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs; 5485 ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets; 5486 ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets; 5487 ec->use_adaptive_rx_coalesce = vi->rx_dim_enabled; 5488 } else { 5489 ec->rx_max_coalesced_frames = 1; 5490 5491 if (vi->sq[0].napi.weight) 5492 ec->tx_max_coalesced_frames = 1; 5493 } 5494 5495 return 0; 5496 } 5497 5498 static int virtnet_set_per_queue_coalesce(struct net_device *dev, 5499 u32 queue, 5500 struct ethtool_coalesce *ec) 5501 { 5502 struct virtnet_info *vi = netdev_priv(dev); 5503 int ret, napi_weight; 5504 bool update_napi = false; 5505 5506 if (queue >= vi->max_queue_pairs) 5507 return -EINVAL; 5508 5509 /* Can't change NAPI weight if the link is up */ 5510 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5511 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5512 vi->sq[queue].napi.weight, 5513 &update_napi); 5514 if (ret) 5515 return ret; 5516 5517 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5518 ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue); 5519 else 5520 ret = virtnet_coal_params_supported(ec); 5521 5522 if (ret) 5523 return ret; 5524 5525 if (update_napi) 5526 vi->sq[queue].napi.weight = napi_weight; 5527 5528 return 0; 5529 } 5530 5531 static int virtnet_get_per_queue_coalesce(struct net_device *dev, 5532 u32 queue, 5533 struct ethtool_coalesce *ec) 5534 { 5535 struct virtnet_info *vi = netdev_priv(dev); 5536 5537 if (queue >= vi->max_queue_pairs) 5538 return -EINVAL; 5539 5540 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 5541 mutex_lock(&vi->rq[queue].dim_lock); 5542 ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs; 5543 ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs; 5544 ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets; 5545 ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets; 5546 ec->use_adaptive_rx_coalesce = vi->rq[queue].dim_enabled; 5547 mutex_unlock(&vi->rq[queue].dim_lock); 5548 } else { 5549 ec->rx_max_coalesced_frames = 1; 5550 5551 if (vi->sq[queue].napi.weight) 5552 ec->tx_max_coalesced_frames = 1; 5553 } 5554 5555 return 0; 5556 } 5557 5558 static void virtnet_init_settings(struct net_device *dev) 5559 { 5560 struct virtnet_info *vi = netdev_priv(dev); 5561 5562 vi->speed = SPEED_UNKNOWN; 5563 vi->duplex = DUPLEX_UNKNOWN; 5564 } 5565 5566 static u32 virtnet_get_rxfh_key_size(struct net_device *dev) 5567 { 5568 return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size; 5569 } 5570 5571 static u32 virtnet_get_rxfh_indir_size(struct net_device *dev) 5572 { 5573 return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size; 5574 } 5575 5576 static int virtnet_get_rxfh(struct net_device *dev, 5577 struct ethtool_rxfh_param *rxfh) 5578 { 5579 struct virtnet_info *vi = netdev_priv(dev); 5580 int i; 5581 5582 if (rxfh->indir) { 5583 for (i = 0; i < vi->rss_indir_table_size; ++i) 5584 rxfh->indir[i] = le16_to_cpu(vi->rss_hdr->indirection_table[i]); 5585 } 5586 5587 if (rxfh->key) 5588 memcpy(rxfh->key, vi->rss_hash_key_data, vi->rss_key_size); 5589 5590 rxfh->hfunc = ETH_RSS_HASH_TOP; 5591 5592 return 0; 5593 } 5594 5595 static int virtnet_set_rxfh(struct net_device *dev, 5596 struct ethtool_rxfh_param *rxfh, 5597 struct netlink_ext_ack *extack) 5598 { 5599 struct virtnet_info *vi = netdev_priv(dev); 5600 bool update = false; 5601 int i; 5602 5603 if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && 5604 rxfh->hfunc != ETH_RSS_HASH_TOP) 5605 return -EOPNOTSUPP; 5606 5607 if (rxfh->indir) { 5608 if (!vi->has_rss) 5609 return -EOPNOTSUPP; 5610 5611 for (i = 0; i < vi->rss_indir_table_size; ++i) 5612 vi->rss_hdr->indirection_table[i] = cpu_to_le16(rxfh->indir[i]); 5613 update = true; 5614 } 5615 5616 if (rxfh->key) { 5617 /* If either _F_HASH_REPORT or _F_RSS are negotiated, the 5618 * device provides hash calculation capabilities, that is, 5619 * hash_key is configured. 5620 */ 5621 if (!vi->has_rss && !vi->has_rss_hash_report) 5622 return -EOPNOTSUPP; 5623 5624 memcpy(vi->rss_hash_key_data, rxfh->key, vi->rss_key_size); 5625 update = true; 5626 } 5627 5628 if (update) 5629 virtnet_commit_rss_command(vi); 5630 5631 return 0; 5632 } 5633 5634 static u32 virtnet_get_rx_ring_count(struct net_device *dev) 5635 { 5636 struct virtnet_info *vi = netdev_priv(dev); 5637 5638 return vi->curr_queue_pairs; 5639 } 5640 5641 static const struct ethtool_ops virtnet_ethtool_ops = { 5642 .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | 5643 ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX, 5644 .get_drvinfo = virtnet_get_drvinfo, 5645 .get_link = ethtool_op_get_link, 5646 .get_ringparam = virtnet_get_ringparam, 5647 .set_ringparam = virtnet_set_ringparam, 5648 .get_strings = virtnet_get_strings, 5649 .get_sset_count = virtnet_get_sset_count, 5650 .get_ethtool_stats = virtnet_get_ethtool_stats, 5651 .set_channels = virtnet_set_channels, 5652 .get_channels = virtnet_get_channels, 5653 .get_ts_info = ethtool_op_get_ts_info, 5654 .get_link_ksettings = virtnet_get_link_ksettings, 5655 .set_link_ksettings = virtnet_set_link_ksettings, 5656 .set_coalesce = virtnet_set_coalesce, 5657 .get_coalesce = virtnet_get_coalesce, 5658 .set_per_queue_coalesce = virtnet_set_per_queue_coalesce, 5659 .get_per_queue_coalesce = virtnet_get_per_queue_coalesce, 5660 .get_rxfh_key_size = virtnet_get_rxfh_key_size, 5661 .get_rxfh_indir_size = virtnet_get_rxfh_indir_size, 5662 .get_rxfh = virtnet_get_rxfh, 5663 .set_rxfh = virtnet_set_rxfh, 5664 .get_rxfh_fields = virtnet_get_hashflow, 5665 .set_rxfh_fields = virtnet_set_hashflow, 5666 .get_rx_ring_count = virtnet_get_rx_ring_count, 5667 }; 5668 5669 static void virtnet_get_queue_stats_rx(struct net_device *dev, int i, 5670 struct netdev_queue_stats_rx *stats) 5671 { 5672 struct virtnet_info *vi = netdev_priv(dev); 5673 struct receive_queue *rq = &vi->rq[i]; 5674 struct virtnet_stats_ctx ctx = {0}; 5675 5676 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5677 5678 virtnet_get_hw_stats(vi, &ctx, i * 2); 5679 virtnet_fill_stats(vi, i * 2, &ctx, (void *)&rq->stats, true, 0); 5680 } 5681 5682 static void virtnet_get_queue_stats_tx(struct net_device *dev, int i, 5683 struct netdev_queue_stats_tx *stats) 5684 { 5685 struct virtnet_info *vi = netdev_priv(dev); 5686 struct send_queue *sq = &vi->sq[i]; 5687 struct virtnet_stats_ctx ctx = {0}; 5688 5689 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5690 5691 virtnet_get_hw_stats(vi, &ctx, i * 2 + 1); 5692 virtnet_fill_stats(vi, i * 2 + 1, &ctx, (void *)&sq->stats, true, 0); 5693 } 5694 5695 static void virtnet_get_base_stats(struct net_device *dev, 5696 struct netdev_queue_stats_rx *rx, 5697 struct netdev_queue_stats_tx *tx) 5698 { 5699 struct virtnet_info *vi = netdev_priv(dev); 5700 5701 /* The queue stats of the virtio-net will not be reset. So here we 5702 * return 0. 5703 */ 5704 rx->bytes = 0; 5705 rx->packets = 0; 5706 5707 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 5708 rx->hw_drops = 0; 5709 rx->hw_drop_overruns = 0; 5710 } 5711 5712 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 5713 rx->csum_unnecessary = 0; 5714 rx->csum_none = 0; 5715 rx->csum_bad = 0; 5716 } 5717 5718 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 5719 rx->hw_gro_packets = 0; 5720 rx->hw_gro_bytes = 0; 5721 rx->hw_gro_wire_packets = 0; 5722 rx->hw_gro_wire_bytes = 0; 5723 } 5724 5725 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) 5726 rx->hw_drop_ratelimits = 0; 5727 5728 tx->bytes = 0; 5729 tx->packets = 0; 5730 tx->stop = 0; 5731 tx->wake = 0; 5732 5733 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 5734 tx->hw_drops = 0; 5735 tx->hw_drop_errors = 0; 5736 } 5737 5738 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 5739 tx->csum_none = 0; 5740 tx->needs_csum = 0; 5741 } 5742 5743 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 5744 tx->hw_gso_packets = 0; 5745 tx->hw_gso_bytes = 0; 5746 tx->hw_gso_wire_packets = 0; 5747 tx->hw_gso_wire_bytes = 0; 5748 } 5749 5750 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) 5751 tx->hw_drop_ratelimits = 0; 5752 5753 netdev_stat_queue_sum(dev, 5754 dev->real_num_rx_queues, vi->max_queue_pairs, rx, 5755 dev->real_num_tx_queues, vi->max_queue_pairs, tx); 5756 } 5757 5758 static const struct netdev_stat_ops virtnet_stat_ops = { 5759 .get_queue_stats_rx = virtnet_get_queue_stats_rx, 5760 .get_queue_stats_tx = virtnet_get_queue_stats_tx, 5761 .get_base_stats = virtnet_get_base_stats, 5762 }; 5763 5764 static void virtnet_freeze_down(struct virtio_device *vdev) 5765 { 5766 struct virtnet_info *vi = vdev->priv; 5767 5768 /* Make sure no work handler is accessing the device */ 5769 flush_work(&vi->config_work); 5770 disable_rx_mode_work(vi); 5771 flush_work(&vi->rx_mode_work); 5772 5773 if (netif_running(vi->dev)) { 5774 rtnl_lock(); 5775 virtnet_close(vi->dev); 5776 rtnl_unlock(); 5777 } 5778 5779 netif_tx_lock_bh(vi->dev); 5780 netif_device_detach(vi->dev); 5781 netif_tx_unlock_bh(vi->dev); 5782 } 5783 5784 static int init_vqs(struct virtnet_info *vi); 5785 5786 static int virtnet_restore_up(struct virtio_device *vdev) 5787 { 5788 struct virtnet_info *vi = vdev->priv; 5789 int err; 5790 5791 err = init_vqs(vi); 5792 if (err) 5793 return err; 5794 5795 virtio_device_ready(vdev); 5796 5797 enable_delayed_refill(vi); 5798 enable_rx_mode_work(vi); 5799 5800 if (netif_running(vi->dev)) { 5801 rtnl_lock(); 5802 err = virtnet_open(vi->dev); 5803 rtnl_unlock(); 5804 if (err) 5805 return err; 5806 } 5807 5808 netif_tx_lock_bh(vi->dev); 5809 netif_device_attach(vi->dev); 5810 netif_tx_unlock_bh(vi->dev); 5811 return err; 5812 } 5813 5814 static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) 5815 { 5816 __virtio64 *_offloads __free(kfree) = NULL; 5817 struct scatterlist sg; 5818 5819 _offloads = kzalloc(sizeof(*_offloads), GFP_KERNEL); 5820 if (!_offloads) 5821 return -ENOMEM; 5822 5823 *_offloads = cpu_to_virtio64(vi->vdev, offloads); 5824 5825 sg_init_one(&sg, _offloads, sizeof(*_offloads)); 5826 5827 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, 5828 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) { 5829 dev_warn(&vi->dev->dev, "Fail to set guest offload.\n"); 5830 return -EINVAL; 5831 } 5832 5833 return 0; 5834 } 5835 5836 static int virtnet_clear_guest_offloads(struct virtnet_info *vi) 5837 { 5838 u64 offloads = 0; 5839 5840 if (!vi->guest_offloads) 5841 return 0; 5842 5843 return virtnet_set_guest_offloads(vi, offloads); 5844 } 5845 5846 static int virtnet_restore_guest_offloads(struct virtnet_info *vi) 5847 { 5848 u64 offloads = vi->guest_offloads; 5849 5850 if (!vi->guest_offloads) 5851 return 0; 5852 5853 return virtnet_set_guest_offloads(vi, offloads); 5854 } 5855 5856 static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queue *rq, 5857 struct xsk_buff_pool *pool) 5858 { 5859 int err, qindex; 5860 5861 qindex = rq - vi->rq; 5862 5863 if (pool) { 5864 err = xdp_rxq_info_reg(&rq->xsk_rxq_info, vi->dev, qindex, rq->napi.napi_id); 5865 if (err < 0) 5866 return err; 5867 5868 err = xdp_rxq_info_reg_mem_model(&rq->xsk_rxq_info, 5869 MEM_TYPE_XSK_BUFF_POOL, NULL); 5870 if (err < 0) 5871 goto unreg; 5872 5873 xsk_pool_set_rxq_info(pool, &rq->xsk_rxq_info); 5874 } 5875 5876 virtnet_rx_pause(vi, rq); 5877 5878 err = virtqueue_reset(rq->vq, virtnet_rq_unmap_free_buf, NULL); 5879 if (err) { 5880 netdev_err(vi->dev, "reset rx fail: rx queue index: %d err: %d\n", qindex, err); 5881 5882 pool = NULL; 5883 } 5884 5885 rq->xsk_pool = pool; 5886 5887 virtnet_rx_resume(vi, rq); 5888 5889 if (pool) 5890 return 0; 5891 5892 unreg: 5893 xdp_rxq_info_unreg(&rq->xsk_rxq_info); 5894 return err; 5895 } 5896 5897 static int virtnet_sq_bind_xsk_pool(struct virtnet_info *vi, 5898 struct send_queue *sq, 5899 struct xsk_buff_pool *pool) 5900 { 5901 int err, qindex; 5902 5903 qindex = sq - vi->sq; 5904 5905 virtnet_tx_pause(vi, sq); 5906 5907 err = virtqueue_reset(sq->vq, virtnet_sq_free_unused_buf, 5908 virtnet_sq_free_unused_buf_done); 5909 if (err) { 5910 netdev_err(vi->dev, "reset tx fail: tx queue index: %d err: %d\n", qindex, err); 5911 pool = NULL; 5912 } 5913 5914 sq->xsk_pool = pool; 5915 5916 virtnet_tx_resume(vi, sq); 5917 5918 return err; 5919 } 5920 5921 static int virtnet_xsk_pool_enable(struct net_device *dev, 5922 struct xsk_buff_pool *pool, 5923 u16 qid) 5924 { 5925 struct virtnet_info *vi = netdev_priv(dev); 5926 struct receive_queue *rq; 5927 struct device *dma_dev; 5928 struct send_queue *sq; 5929 dma_addr_t hdr_dma; 5930 int err, size; 5931 5932 if (vi->hdr_len > xsk_pool_get_headroom(pool)) 5933 return -EINVAL; 5934 5935 /* In big_packets mode, xdp cannot work, so there is no need to 5936 * initialize xsk of rq. 5937 */ 5938 if (vi->big_packets && !vi->mergeable_rx_bufs) 5939 return -ENOENT; 5940 5941 if (qid >= vi->curr_queue_pairs) 5942 return -EINVAL; 5943 5944 sq = &vi->sq[qid]; 5945 rq = &vi->rq[qid]; 5946 5947 /* xsk assumes that tx and rx must have the same dma device. The af-xdp 5948 * may use one buffer to receive from the rx and reuse this buffer to 5949 * send by the tx. So the dma dev of sq and rq must be the same one. 5950 * 5951 * But vq->dma_dev allows every vq has the respective dma dev. So I 5952 * check the dma dev of vq and sq is the same dev. 5953 */ 5954 if (virtqueue_dma_dev(rq->vq) != virtqueue_dma_dev(sq->vq)) 5955 return -EINVAL; 5956 5957 dma_dev = virtqueue_dma_dev(rq->vq); 5958 if (!dma_dev) 5959 return -EINVAL; 5960 5961 size = virtqueue_get_vring_size(rq->vq); 5962 5963 rq->xsk_buffs = kvcalloc(size, sizeof(*rq->xsk_buffs), GFP_KERNEL); 5964 if (!rq->xsk_buffs) 5965 return -ENOMEM; 5966 5967 hdr_dma = virtqueue_map_single_attrs(sq->vq, &xsk_hdr, vi->hdr_len, 5968 DMA_TO_DEVICE, 0); 5969 if (virtqueue_map_mapping_error(sq->vq, hdr_dma)) { 5970 err = -ENOMEM; 5971 goto err_free_buffs; 5972 } 5973 5974 err = xsk_pool_dma_map(pool, dma_dev, 0); 5975 if (err) 5976 goto err_xsk_map; 5977 5978 err = virtnet_rq_bind_xsk_pool(vi, rq, pool); 5979 if (err) 5980 goto err_rq; 5981 5982 err = virtnet_sq_bind_xsk_pool(vi, sq, pool); 5983 if (err) 5984 goto err_sq; 5985 5986 /* Now, we do not support tx offload(such as tx csum), so all the tx 5987 * virtnet hdr is zero. So all the tx packets can share a single hdr. 5988 */ 5989 sq->xsk_hdr_dma_addr = hdr_dma; 5990 5991 return 0; 5992 5993 err_sq: 5994 virtnet_rq_bind_xsk_pool(vi, rq, NULL); 5995 err_rq: 5996 xsk_pool_dma_unmap(pool, 0); 5997 err_xsk_map: 5998 virtqueue_unmap_single_attrs(rq->vq, hdr_dma, vi->hdr_len, 5999 DMA_TO_DEVICE, 0); 6000 err_free_buffs: 6001 kvfree(rq->xsk_buffs); 6002 return err; 6003 } 6004 6005 static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid) 6006 { 6007 struct virtnet_info *vi = netdev_priv(dev); 6008 struct xsk_buff_pool *pool; 6009 struct receive_queue *rq; 6010 struct send_queue *sq; 6011 int err; 6012 6013 if (qid >= vi->curr_queue_pairs) 6014 return -EINVAL; 6015 6016 sq = &vi->sq[qid]; 6017 rq = &vi->rq[qid]; 6018 6019 pool = rq->xsk_pool; 6020 6021 err = virtnet_rq_bind_xsk_pool(vi, rq, NULL); 6022 err |= virtnet_sq_bind_xsk_pool(vi, sq, NULL); 6023 6024 xsk_pool_dma_unmap(pool, 0); 6025 6026 virtqueue_unmap_single_attrs(sq->vq, sq->xsk_hdr_dma_addr, 6027 vi->hdr_len, DMA_TO_DEVICE, 0); 6028 kvfree(rq->xsk_buffs); 6029 6030 return err; 6031 } 6032 6033 static int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp) 6034 { 6035 if (xdp->xsk.pool) 6036 return virtnet_xsk_pool_enable(dev, xdp->xsk.pool, 6037 xdp->xsk.queue_id); 6038 else 6039 return virtnet_xsk_pool_disable(dev, xdp->xsk.queue_id); 6040 } 6041 6042 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, 6043 struct netlink_ext_ack *extack) 6044 { 6045 unsigned int room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 6046 sizeof(struct skb_shared_info)); 6047 unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; 6048 struct virtnet_info *vi = netdev_priv(dev); 6049 struct bpf_prog *old_prog; 6050 u16 xdp_qp = 0, curr_qp; 6051 int i, err; 6052 6053 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) 6054 && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 6055 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 6056 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 6057 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 6058 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) || 6059 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) || 6060 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) { 6061 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); 6062 return -EOPNOTSUPP; 6063 } 6064 6065 if (vi->mergeable_rx_bufs && !vi->any_header_sg) { 6066 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required"); 6067 return -EINVAL; 6068 } 6069 6070 if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { 6071 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags"); 6072 netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz); 6073 return -EINVAL; 6074 } 6075 6076 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; 6077 if (prog) 6078 xdp_qp = nr_cpu_ids; 6079 6080 /* XDP requires extra queues for XDP_TX */ 6081 if (curr_qp + xdp_qp > vi->max_queue_pairs) { 6082 netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", 6083 curr_qp + xdp_qp, vi->max_queue_pairs); 6084 xdp_qp = 0; 6085 } 6086 6087 old_prog = rtnl_dereference(vi->rq[0].xdp_prog); 6088 if (!prog && !old_prog) 6089 return 0; 6090 6091 if (prog) 6092 bpf_prog_add(prog, vi->max_queue_pairs - 1); 6093 6094 virtnet_rx_pause_all(vi); 6095 6096 /* Make sure NAPI is not using any XDP TX queues for RX. */ 6097 if (netif_running(dev)) { 6098 for (i = 0; i < vi->max_queue_pairs; i++) 6099 virtnet_napi_tx_disable(&vi->sq[i]); 6100 } 6101 6102 if (!prog) { 6103 for (i = 0; i < vi->max_queue_pairs; i++) { 6104 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 6105 if (i == 0) 6106 virtnet_restore_guest_offloads(vi); 6107 } 6108 synchronize_net(); 6109 } 6110 6111 err = virtnet_set_queues(vi, curr_qp + xdp_qp); 6112 if (err) 6113 goto err; 6114 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); 6115 vi->xdp_queue_pairs = xdp_qp; 6116 6117 if (prog) { 6118 vi->xdp_enabled = true; 6119 for (i = 0; i < vi->max_queue_pairs; i++) { 6120 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 6121 if (i == 0 && !old_prog) 6122 virtnet_clear_guest_offloads(vi); 6123 } 6124 if (!old_prog) 6125 xdp_features_set_redirect_target(dev, true); 6126 } else { 6127 xdp_features_clear_redirect_target(dev); 6128 vi->xdp_enabled = false; 6129 } 6130 6131 virtnet_rx_resume_all(vi); 6132 for (i = 0; i < vi->max_queue_pairs; i++) { 6133 if (old_prog) 6134 bpf_prog_put(old_prog); 6135 if (netif_running(dev)) 6136 virtnet_napi_tx_enable(&vi->sq[i]); 6137 } 6138 6139 return 0; 6140 6141 err: 6142 if (!prog) { 6143 virtnet_clear_guest_offloads(vi); 6144 for (i = 0; i < vi->max_queue_pairs; i++) 6145 rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); 6146 } 6147 6148 virtnet_rx_resume_all(vi); 6149 if (netif_running(dev)) { 6150 for (i = 0; i < vi->max_queue_pairs; i++) 6151 virtnet_napi_tx_enable(&vi->sq[i]); 6152 } 6153 if (prog) 6154 bpf_prog_sub(prog, vi->max_queue_pairs - 1); 6155 return err; 6156 } 6157 6158 static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) 6159 { 6160 switch (xdp->command) { 6161 case XDP_SETUP_PROG: 6162 return virtnet_xdp_set(dev, xdp->prog, xdp->extack); 6163 case XDP_SETUP_XSK_POOL: 6164 return virtnet_xsk_pool_setup(dev, xdp); 6165 default: 6166 return -EINVAL; 6167 } 6168 } 6169 6170 static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, 6171 size_t len) 6172 { 6173 struct virtnet_info *vi = netdev_priv(dev); 6174 int ret; 6175 6176 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 6177 return -EOPNOTSUPP; 6178 6179 ret = snprintf(buf, len, "sby"); 6180 if (ret >= len) 6181 return -EOPNOTSUPP; 6182 6183 return 0; 6184 } 6185 6186 static int virtnet_set_features(struct net_device *dev, 6187 netdev_features_t features) 6188 { 6189 struct virtnet_info *vi = netdev_priv(dev); 6190 u64 offloads; 6191 int err; 6192 6193 if ((dev->features ^ features) & NETIF_F_GRO_HW) { 6194 if (vi->xdp_enabled) 6195 return -EBUSY; 6196 6197 if (features & NETIF_F_GRO_HW) 6198 offloads = vi->guest_offloads_capable; 6199 else 6200 offloads = vi->guest_offloads_capable & 6201 ~GUEST_OFFLOAD_GRO_HW_MASK; 6202 6203 err = virtnet_set_guest_offloads(vi, offloads); 6204 if (err) 6205 return err; 6206 vi->guest_offloads = offloads; 6207 } 6208 6209 if ((dev->features ^ features) & NETIF_F_RXHASH) { 6210 if (features & NETIF_F_RXHASH) 6211 vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_saved); 6212 else 6213 vi->rss_hdr->hash_types = cpu_to_le32(VIRTIO_NET_HASH_REPORT_NONE); 6214 6215 if (!virtnet_commit_rss_command(vi)) 6216 return -EINVAL; 6217 } 6218 6219 return 0; 6220 } 6221 6222 static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) 6223 { 6224 struct virtnet_info *priv = netdev_priv(dev); 6225 struct send_queue *sq = &priv->sq[txqueue]; 6226 struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue); 6227 6228 u64_stats_update_begin(&sq->stats.syncp); 6229 u64_stats_inc(&sq->stats.tx_timeouts); 6230 u64_stats_update_end(&sq->stats.syncp); 6231 6232 netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n", 6233 txqueue, sq->name, sq->vq->index, sq->vq->name, 6234 jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start))); 6235 } 6236 6237 static int virtnet_init_irq_moder(struct virtnet_info *vi) 6238 { 6239 u8 profile_flags = 0, coal_flags = 0; 6240 int ret, i; 6241 6242 profile_flags |= DIM_PROFILE_RX; 6243 coal_flags |= DIM_COALESCE_USEC | DIM_COALESCE_PKTS; 6244 ret = net_dim_init_irq_moder(vi->dev, profile_flags, coal_flags, 6245 DIM_CQ_PERIOD_MODE_START_FROM_EQE, 6246 0, virtnet_rx_dim_work, NULL); 6247 6248 if (ret) 6249 return ret; 6250 6251 for (i = 0; i < vi->max_queue_pairs; i++) 6252 net_dim_setting(vi->dev, &vi->rq[i].dim, false); 6253 6254 return 0; 6255 } 6256 6257 static void virtnet_free_irq_moder(struct virtnet_info *vi) 6258 { 6259 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 6260 return; 6261 6262 rtnl_lock(); 6263 net_dim_free_irq_moder(vi->dev); 6264 rtnl_unlock(); 6265 } 6266 6267 static const struct net_device_ops virtnet_netdev = { 6268 .ndo_open = virtnet_open, 6269 .ndo_stop = virtnet_close, 6270 .ndo_start_xmit = start_xmit, 6271 .ndo_validate_addr = eth_validate_addr, 6272 .ndo_set_mac_address = virtnet_set_mac_address, 6273 .ndo_set_rx_mode = virtnet_set_rx_mode, 6274 .ndo_get_stats64 = virtnet_stats, 6275 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, 6276 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, 6277 .ndo_bpf = virtnet_xdp, 6278 .ndo_xdp_xmit = virtnet_xdp_xmit, 6279 .ndo_xsk_wakeup = virtnet_xsk_wakeup, 6280 .ndo_features_check = passthru_features_check, 6281 .ndo_get_phys_port_name = virtnet_get_phys_port_name, 6282 .ndo_set_features = virtnet_set_features, 6283 .ndo_tx_timeout = virtnet_tx_timeout, 6284 }; 6285 6286 static void virtnet_config_changed_work(struct work_struct *work) 6287 { 6288 struct virtnet_info *vi = 6289 container_of(work, struct virtnet_info, config_work); 6290 u16 v; 6291 6292 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, 6293 struct virtio_net_config, status, &v) < 0) 6294 return; 6295 6296 if (v & VIRTIO_NET_S_ANNOUNCE) { 6297 netdev_notify_peers(vi->dev); 6298 virtnet_ack_link_announce(vi); 6299 } 6300 6301 /* Ignore unknown (future) status bits */ 6302 v &= VIRTIO_NET_S_LINK_UP; 6303 6304 if (vi->status == v) 6305 return; 6306 6307 vi->status = v; 6308 6309 if (vi->status & VIRTIO_NET_S_LINK_UP) { 6310 virtnet_update_settings(vi); 6311 netif_carrier_on(vi->dev); 6312 netif_tx_wake_all_queues(vi->dev); 6313 } else { 6314 netif_carrier_off(vi->dev); 6315 netif_tx_stop_all_queues(vi->dev); 6316 } 6317 } 6318 6319 static void virtnet_config_changed(struct virtio_device *vdev) 6320 { 6321 struct virtnet_info *vi = vdev->priv; 6322 6323 schedule_work(&vi->config_work); 6324 } 6325 6326 static void virtnet_free_queues(struct virtnet_info *vi) 6327 { 6328 int i; 6329 6330 for (i = 0; i < vi->max_queue_pairs; i++) { 6331 __netif_napi_del(&vi->rq[i].napi); 6332 __netif_napi_del(&vi->sq[i].napi); 6333 } 6334 6335 /* We called __netif_napi_del(), 6336 * we need to respect an RCU grace period before freeing vi->rq 6337 */ 6338 synchronize_net(); 6339 6340 kfree(vi->rq); 6341 kfree(vi->sq); 6342 kfree(vi->ctrl); 6343 } 6344 6345 static void _free_receive_bufs(struct virtnet_info *vi) 6346 { 6347 struct bpf_prog *old_prog; 6348 int i; 6349 6350 for (i = 0; i < vi->max_queue_pairs; i++) { 6351 while (vi->rq[i].pages) 6352 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); 6353 6354 old_prog = rtnl_dereference(vi->rq[i].xdp_prog); 6355 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); 6356 if (old_prog) 6357 bpf_prog_put(old_prog); 6358 } 6359 } 6360 6361 static void free_receive_bufs(struct virtnet_info *vi) 6362 { 6363 rtnl_lock(); 6364 _free_receive_bufs(vi); 6365 rtnl_unlock(); 6366 } 6367 6368 static void free_receive_page_frags(struct virtnet_info *vi) 6369 { 6370 int i; 6371 for (i = 0; i < vi->max_queue_pairs; i++) 6372 if (vi->rq[i].alloc_frag.page) { 6373 if (vi->rq[i].last_dma) 6374 virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0); 6375 put_page(vi->rq[i].alloc_frag.page); 6376 } 6377 } 6378 6379 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) 6380 { 6381 struct virtnet_info *vi = vq->vdev->priv; 6382 struct send_queue *sq; 6383 int i = vq2txq(vq); 6384 6385 sq = &vi->sq[i]; 6386 6387 switch (virtnet_xmit_ptr_unpack(&buf)) { 6388 case VIRTNET_XMIT_TYPE_SKB: 6389 case VIRTNET_XMIT_TYPE_SKB_ORPHAN: 6390 dev_kfree_skb(buf); 6391 break; 6392 6393 case VIRTNET_XMIT_TYPE_XDP: 6394 xdp_return_frame(buf); 6395 break; 6396 6397 case VIRTNET_XMIT_TYPE_XSK: 6398 xsk_tx_completed(sq->xsk_pool, 1); 6399 break; 6400 } 6401 } 6402 6403 static void virtnet_sq_free_unused_buf_done(struct virtqueue *vq) 6404 { 6405 struct virtnet_info *vi = vq->vdev->priv; 6406 int i = vq2txq(vq); 6407 6408 netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, i)); 6409 } 6410 6411 static void free_unused_bufs(struct virtnet_info *vi) 6412 { 6413 void *buf; 6414 int i; 6415 6416 for (i = 0; i < vi->max_queue_pairs; i++) { 6417 struct virtqueue *vq = vi->sq[i].vq; 6418 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 6419 virtnet_sq_free_unused_buf(vq, buf); 6420 cond_resched(); 6421 } 6422 6423 for (i = 0; i < vi->max_queue_pairs; i++) { 6424 struct virtqueue *vq = vi->rq[i].vq; 6425 6426 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 6427 virtnet_rq_unmap_free_buf(vq, buf); 6428 cond_resched(); 6429 } 6430 } 6431 6432 static void virtnet_del_vqs(struct virtnet_info *vi) 6433 { 6434 struct virtio_device *vdev = vi->vdev; 6435 6436 virtnet_clean_affinity(vi); 6437 6438 vdev->config->del_vqs(vdev); 6439 6440 virtnet_free_queues(vi); 6441 } 6442 6443 /* How large should a single buffer be so a queue full of these can fit at 6444 * least one full packet? 6445 * Logic below assumes the mergeable buffer header is used. 6446 */ 6447 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) 6448 { 6449 const unsigned int hdr_len = vi->hdr_len; 6450 unsigned int rq_size = virtqueue_get_vring_size(vq); 6451 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; 6452 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; 6453 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size); 6454 6455 return max(max(min_buf_len, hdr_len) - hdr_len, 6456 (unsigned int)GOOD_PACKET_LEN); 6457 } 6458 6459 static int virtnet_find_vqs(struct virtnet_info *vi) 6460 { 6461 struct virtqueue_info *vqs_info; 6462 struct virtqueue **vqs; 6463 int ret = -ENOMEM; 6464 int total_vqs; 6465 bool *ctx; 6466 u16 i; 6467 6468 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by 6469 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by 6470 * possible control vq. 6471 */ 6472 total_vqs = vi->max_queue_pairs * 2 + 6473 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); 6474 6475 /* Allocate space for find_vqs parameters */ 6476 vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL); 6477 if (!vqs) 6478 goto err_vq; 6479 vqs_info = kcalloc(total_vqs, sizeof(*vqs_info), GFP_KERNEL); 6480 if (!vqs_info) 6481 goto err_vqs_info; 6482 if (!vi->big_packets || vi->mergeable_rx_bufs) { 6483 ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL); 6484 if (!ctx) 6485 goto err_ctx; 6486 } else { 6487 ctx = NULL; 6488 } 6489 6490 /* Parameters for control virtqueue, if any */ 6491 if (vi->has_cvq) { 6492 vqs_info[total_vqs - 1].name = "control"; 6493 } 6494 6495 /* Allocate/initialize parameters for send/receive virtqueues */ 6496 for (i = 0; i < vi->max_queue_pairs; i++) { 6497 vqs_info[rxq2vq(i)].callback = skb_recv_done; 6498 vqs_info[txq2vq(i)].callback = skb_xmit_done; 6499 sprintf(vi->rq[i].name, "input.%u", i); 6500 sprintf(vi->sq[i].name, "output.%u", i); 6501 vqs_info[rxq2vq(i)].name = vi->rq[i].name; 6502 vqs_info[txq2vq(i)].name = vi->sq[i].name; 6503 if (ctx) 6504 vqs_info[rxq2vq(i)].ctx = true; 6505 } 6506 6507 ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, vqs_info, NULL); 6508 if (ret) 6509 goto err_find; 6510 6511 if (vi->has_cvq) { 6512 vi->cvq = vqs[total_vqs - 1]; 6513 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) 6514 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 6515 } 6516 6517 for (i = 0; i < vi->max_queue_pairs; i++) { 6518 vi->rq[i].vq = vqs[rxq2vq(i)]; 6519 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq); 6520 vi->sq[i].vq = vqs[txq2vq(i)]; 6521 } 6522 6523 /* run here: ret == 0. */ 6524 6525 6526 err_find: 6527 kfree(ctx); 6528 err_ctx: 6529 kfree(vqs_info); 6530 err_vqs_info: 6531 kfree(vqs); 6532 err_vq: 6533 return ret; 6534 } 6535 6536 static int virtnet_alloc_queues(struct virtnet_info *vi) 6537 { 6538 int i; 6539 6540 if (vi->has_cvq) { 6541 vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); 6542 if (!vi->ctrl) 6543 goto err_ctrl; 6544 } else { 6545 vi->ctrl = NULL; 6546 } 6547 vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL); 6548 if (!vi->sq) 6549 goto err_sq; 6550 vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL); 6551 if (!vi->rq) 6552 goto err_rq; 6553 6554 INIT_DELAYED_WORK(&vi->refill, refill_work); 6555 for (i = 0; i < vi->max_queue_pairs; i++) { 6556 vi->rq[i].pages = NULL; 6557 netif_napi_add_config(vi->dev, &vi->rq[i].napi, virtnet_poll, 6558 i); 6559 vi->rq[i].napi.weight = napi_weight; 6560 netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi, 6561 virtnet_poll_tx, 6562 napi_tx ? napi_weight : 0); 6563 6564 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); 6565 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); 6566 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); 6567 6568 u64_stats_init(&vi->rq[i].stats.syncp); 6569 u64_stats_init(&vi->sq[i].stats.syncp); 6570 mutex_init(&vi->rq[i].dim_lock); 6571 } 6572 6573 return 0; 6574 6575 err_rq: 6576 kfree(vi->sq); 6577 err_sq: 6578 kfree(vi->ctrl); 6579 err_ctrl: 6580 return -ENOMEM; 6581 } 6582 6583 static int init_vqs(struct virtnet_info *vi) 6584 { 6585 int ret; 6586 6587 /* Allocate send & receive queues */ 6588 ret = virtnet_alloc_queues(vi); 6589 if (ret) 6590 goto err; 6591 6592 ret = virtnet_find_vqs(vi); 6593 if (ret) 6594 goto err_free; 6595 6596 cpus_read_lock(); 6597 virtnet_set_affinity(vi); 6598 cpus_read_unlock(); 6599 6600 return 0; 6601 6602 err_free: 6603 virtnet_free_queues(vi); 6604 err: 6605 return ret; 6606 } 6607 6608 #ifdef CONFIG_SYSFS 6609 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, 6610 char *buf) 6611 { 6612 struct virtnet_info *vi = netdev_priv(queue->dev); 6613 unsigned int queue_index = get_netdev_rx_queue_index(queue); 6614 unsigned int headroom = virtnet_get_headroom(vi); 6615 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 6616 struct ewma_pkt_len *avg; 6617 6618 BUG_ON(queue_index >= vi->max_queue_pairs); 6619 avg = &vi->rq[queue_index].mrg_avg_pkt_len; 6620 return sprintf(buf, "%u\n", 6621 get_mergeable_buf_len(&vi->rq[queue_index], avg, 6622 SKB_DATA_ALIGN(headroom + tailroom))); 6623 } 6624 6625 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = 6626 __ATTR_RO(mergeable_rx_buffer_size); 6627 6628 static struct attribute *virtio_net_mrg_rx_attrs[] = { 6629 &mergeable_rx_buffer_size_attribute.attr, 6630 NULL 6631 }; 6632 6633 static const struct attribute_group virtio_net_mrg_rx_group = { 6634 .name = "virtio_net", 6635 .attrs = virtio_net_mrg_rx_attrs 6636 }; 6637 #endif 6638 6639 static bool virtnet_fail_on_feature(struct virtio_device *vdev, 6640 unsigned int fbit, 6641 const char *fname, const char *dname) 6642 { 6643 if (!virtio_has_feature(vdev, fbit)) 6644 return false; 6645 6646 dev_err(&vdev->dev, "device advertises feature %s but not %s", 6647 fname, dname); 6648 6649 return true; 6650 } 6651 6652 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ 6653 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) 6654 6655 static bool virtnet_validate_features(struct virtio_device *vdev) 6656 { 6657 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && 6658 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, 6659 "VIRTIO_NET_F_CTRL_VQ") || 6660 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, 6661 "VIRTIO_NET_F_CTRL_VQ") || 6662 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, 6663 "VIRTIO_NET_F_CTRL_VQ") || 6664 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || 6665 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, 6666 "VIRTIO_NET_F_CTRL_VQ") || 6667 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, 6668 "VIRTIO_NET_F_CTRL_VQ") || 6669 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, 6670 "VIRTIO_NET_F_CTRL_VQ") || 6671 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, 6672 "VIRTIO_NET_F_CTRL_VQ") || 6673 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL, 6674 "VIRTIO_NET_F_CTRL_VQ"))) { 6675 return false; 6676 } 6677 6678 return true; 6679 } 6680 6681 #define MIN_MTU ETH_MIN_MTU 6682 #define MAX_MTU ETH_MAX_MTU 6683 6684 static int virtnet_validate(struct virtio_device *vdev) 6685 { 6686 if (!vdev->config->get) { 6687 dev_err(&vdev->dev, "%s failure: config access disabled\n", 6688 __func__); 6689 return -EINVAL; 6690 } 6691 6692 if (!virtnet_validate_features(vdev)) 6693 return -EINVAL; 6694 6695 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6696 int mtu = virtio_cread16(vdev, 6697 offsetof(struct virtio_net_config, 6698 mtu)); 6699 if (mtu < MIN_MTU) 6700 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); 6701 } 6702 6703 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) && 6704 !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6705 dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby"); 6706 __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY); 6707 } 6708 6709 return 0; 6710 } 6711 6712 static bool virtnet_check_guest_gso(const struct virtnet_info *vi) 6713 { 6714 return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 6715 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 6716 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 6717 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 6718 (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) && 6719 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6)); 6720 } 6721 6722 static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu) 6723 { 6724 bool guest_gso = virtnet_check_guest_gso(vi); 6725 6726 /* If device can receive ANY guest GSO packets, regardless of mtu, 6727 * allocate packets of maximum size, otherwise limit it to only 6728 * mtu size worth only. 6729 */ 6730 if (mtu > ETH_DATA_LEN || guest_gso) { 6731 vi->big_packets = true; 6732 vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE); 6733 } 6734 } 6735 6736 #define VIRTIO_NET_HASH_REPORT_MAX_TABLE 10 6737 static enum xdp_rss_hash_type 6738 virtnet_xdp_rss_type[VIRTIO_NET_HASH_REPORT_MAX_TABLE] = { 6739 [VIRTIO_NET_HASH_REPORT_NONE] = XDP_RSS_TYPE_NONE, 6740 [VIRTIO_NET_HASH_REPORT_IPv4] = XDP_RSS_TYPE_L3_IPV4, 6741 [VIRTIO_NET_HASH_REPORT_TCPv4] = XDP_RSS_TYPE_L4_IPV4_TCP, 6742 [VIRTIO_NET_HASH_REPORT_UDPv4] = XDP_RSS_TYPE_L4_IPV4_UDP, 6743 [VIRTIO_NET_HASH_REPORT_IPv6] = XDP_RSS_TYPE_L3_IPV6, 6744 [VIRTIO_NET_HASH_REPORT_TCPv6] = XDP_RSS_TYPE_L4_IPV6_TCP, 6745 [VIRTIO_NET_HASH_REPORT_UDPv6] = XDP_RSS_TYPE_L4_IPV6_UDP, 6746 [VIRTIO_NET_HASH_REPORT_IPv6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, 6747 [VIRTIO_NET_HASH_REPORT_TCPv6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, 6748 [VIRTIO_NET_HASH_REPORT_UDPv6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX 6749 }; 6750 6751 static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, 6752 enum xdp_rss_hash_type *rss_type) 6753 { 6754 const struct xdp_buff *xdp = (void *)_ctx; 6755 struct virtio_net_hdr_v1_hash *hdr_hash; 6756 struct virtnet_info *vi; 6757 u16 hash_report; 6758 6759 if (!(xdp->rxq->dev->features & NETIF_F_RXHASH)) 6760 return -ENODATA; 6761 6762 vi = netdev_priv(xdp->rxq->dev); 6763 hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len); 6764 hash_report = __le16_to_cpu(hdr_hash->hash_report); 6765 6766 if (hash_report >= VIRTIO_NET_HASH_REPORT_MAX_TABLE) 6767 hash_report = VIRTIO_NET_HASH_REPORT_NONE; 6768 6769 *rss_type = virtnet_xdp_rss_type[hash_report]; 6770 *hash = virtio_net_hash_value(hdr_hash); 6771 return 0; 6772 } 6773 6774 static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = { 6775 .xmo_rx_hash = virtnet_xdp_rx_hash, 6776 }; 6777 6778 static int virtnet_probe(struct virtio_device *vdev) 6779 { 6780 int i, err = -ENOMEM; 6781 struct net_device *dev; 6782 struct virtnet_info *vi; 6783 u16 max_queue_pairs; 6784 int mtu = 0; 6785 6786 /* Find if host supports multiqueue/rss virtio_net device */ 6787 max_queue_pairs = 1; 6788 if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) 6789 max_queue_pairs = 6790 virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); 6791 6792 /* We need at least 2 queue's */ 6793 if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 6794 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 6795 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6796 max_queue_pairs = 1; 6797 6798 /* Allocate ourselves a network device with room for our info */ 6799 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); 6800 if (!dev) 6801 return -ENOMEM; 6802 6803 /* Set up network device as normal. */ 6804 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | 6805 IFF_TX_SKB_NO_LINEAR; 6806 dev->netdev_ops = &virtnet_netdev; 6807 dev->stat_ops = &virtnet_stat_ops; 6808 dev->features = NETIF_F_HIGHDMA; 6809 6810 dev->ethtool_ops = &virtnet_ethtool_ops; 6811 SET_NETDEV_DEV(dev, &vdev->dev); 6812 6813 /* Do we support "hardware" checksums? */ 6814 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { 6815 /* This opens up the world of extra features. */ 6816 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6817 if (csum) 6818 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6819 6820 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { 6821 dev->hw_features |= NETIF_F_TSO 6822 | NETIF_F_TSO_ECN | NETIF_F_TSO6; 6823 } 6824 /* Individual feature bits: what can host handle? */ 6825 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4)) 6826 dev->hw_features |= NETIF_F_TSO; 6827 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6)) 6828 dev->hw_features |= NETIF_F_TSO6; 6829 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) 6830 dev->hw_features |= NETIF_F_TSO_ECN; 6831 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO)) 6832 dev->hw_features |= NETIF_F_GSO_UDP_L4; 6833 6834 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO)) { 6835 dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; 6836 dev->hw_enc_features = dev->hw_features; 6837 } 6838 if (dev->hw_features & NETIF_F_GSO_UDP_TUNNEL && 6839 virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM)) { 6840 dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; 6841 dev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; 6842 } 6843 6844 dev->features |= NETIF_F_GSO_ROBUST; 6845 6846 if (gso) 6847 dev->features |= dev->hw_features; 6848 /* (!csum && gso) case will be fixed by register_netdev() */ 6849 } 6850 6851 /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't 6852 * need to calculate checksums for partially checksummed packets, 6853 * as they're considered valid by the upper layer. 6854 * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only 6855 * receives fully checksummed packets. The device may assist in 6856 * validating these packets' checksums, so the driver won't have to. 6857 */ 6858 dev->features |= NETIF_F_RXCSUM; 6859 6860 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || 6861 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) 6862 dev->features |= NETIF_F_GRO_HW; 6863 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) 6864 dev->hw_features |= NETIF_F_GRO_HW; 6865 6866 dev->vlan_features = dev->features; 6867 dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 6868 NETDEV_XDP_ACT_XSK_ZEROCOPY; 6869 6870 /* MTU range: 68 - 65535 */ 6871 dev->min_mtu = MIN_MTU; 6872 dev->max_mtu = MAX_MTU; 6873 6874 /* Configuration may specify what MAC to use. Otherwise random. */ 6875 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6876 u8 addr[ETH_ALEN]; 6877 6878 virtio_cread_bytes(vdev, 6879 offsetof(struct virtio_net_config, mac), 6880 addr, ETH_ALEN); 6881 eth_hw_addr_set(dev, addr); 6882 } else { 6883 eth_hw_addr_random(dev); 6884 dev_info(&vdev->dev, "Assigned random MAC address %pM\n", 6885 dev->dev_addr); 6886 } 6887 6888 /* Set up our device-specific information */ 6889 vi = netdev_priv(dev); 6890 vi->dev = dev; 6891 vi->vdev = vdev; 6892 vdev->priv = vi; 6893 6894 INIT_WORK(&vi->config_work, virtnet_config_changed_work); 6895 INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work); 6896 spin_lock_init(&vi->refill_lock); 6897 6898 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { 6899 vi->mergeable_rx_bufs = true; 6900 dev->xdp_features |= NETDEV_XDP_ACT_RX_SG; 6901 } 6902 6903 if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) 6904 vi->has_rss_hash_report = true; 6905 6906 if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) { 6907 vi->has_rss = true; 6908 6909 vi->rss_indir_table_size = 6910 virtio_cread16(vdev, offsetof(struct virtio_net_config, 6911 rss_max_indirection_table_length)); 6912 } 6913 vi->rss_hdr = devm_kzalloc(&vdev->dev, virtnet_rss_hdr_size(vi), GFP_KERNEL); 6914 if (!vi->rss_hdr) { 6915 err = -ENOMEM; 6916 goto free; 6917 } 6918 6919 if (vi->has_rss || vi->has_rss_hash_report) { 6920 vi->rss_key_size = 6921 virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); 6922 if (vi->rss_key_size > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 6923 dev_err(&vdev->dev, "rss_max_key_size=%u exceeds the limit %u.\n", 6924 vi->rss_key_size, VIRTIO_NET_RSS_MAX_KEY_SIZE); 6925 err = -EINVAL; 6926 goto free; 6927 } 6928 6929 vi->rss_hash_types_supported = 6930 virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); 6931 vi->rss_hash_types_supported &= 6932 ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX | 6933 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 6934 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX); 6935 6936 dev->hw_features |= NETIF_F_RXHASH; 6937 dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops; 6938 } 6939 6940 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO) || 6941 virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO)) 6942 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash_tunnel); 6943 else if (vi->has_rss_hash_report) 6944 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); 6945 else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || 6946 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6947 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 6948 else 6949 vi->hdr_len = sizeof(struct virtio_net_hdr); 6950 6951 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM)) 6952 vi->rx_tnl_csum = true; 6953 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO)) 6954 vi->rx_tnl = true; 6955 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO)) 6956 vi->tx_tnl = true; 6957 6958 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || 6959 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6960 vi->any_header_sg = true; 6961 6962 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6963 vi->has_cvq = true; 6964 6965 mutex_init(&vi->cvq_lock); 6966 6967 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6968 mtu = virtio_cread16(vdev, 6969 offsetof(struct virtio_net_config, 6970 mtu)); 6971 if (mtu < dev->min_mtu) { 6972 /* Should never trigger: MTU was previously validated 6973 * in virtnet_validate. 6974 */ 6975 dev_err(&vdev->dev, 6976 "device MTU appears to have changed it is now %d < %d", 6977 mtu, dev->min_mtu); 6978 err = -EINVAL; 6979 goto free; 6980 } 6981 6982 dev->mtu = mtu; 6983 dev->max_mtu = mtu; 6984 } 6985 6986 virtnet_set_big_packets(vi, mtu); 6987 6988 if (vi->any_header_sg) 6989 dev->needed_headroom = vi->hdr_len; 6990 6991 /* Enable multiqueue by default */ 6992 if (num_online_cpus() >= max_queue_pairs) 6993 vi->curr_queue_pairs = max_queue_pairs; 6994 else 6995 vi->curr_queue_pairs = num_online_cpus(); 6996 vi->max_queue_pairs = max_queue_pairs; 6997 6998 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ 6999 err = init_vqs(vi); 7000 if (err) 7001 goto free; 7002 7003 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 7004 vi->intr_coal_rx.max_usecs = 0; 7005 vi->intr_coal_tx.max_usecs = 0; 7006 vi->intr_coal_rx.max_packets = 0; 7007 7008 /* Keep the default values of the coalescing parameters 7009 * aligned with the default napi_tx state. 7010 */ 7011 if (vi->sq[0].napi.weight) 7012 vi->intr_coal_tx.max_packets = 1; 7013 else 7014 vi->intr_coal_tx.max_packets = 0; 7015 } 7016 7017 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 7018 /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */ 7019 for (i = 0; i < vi->max_queue_pairs; i++) 7020 if (vi->sq[i].napi.weight) 7021 vi->sq[i].intr_coal.max_packets = 1; 7022 7023 err = virtnet_init_irq_moder(vi); 7024 if (err) 7025 goto free; 7026 } 7027 7028 #ifdef CONFIG_SYSFS 7029 if (vi->mergeable_rx_bufs) 7030 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; 7031 #endif 7032 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); 7033 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); 7034 7035 virtnet_init_settings(dev); 7036 7037 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 7038 vi->failover = net_failover_create(vi->dev); 7039 if (IS_ERR(vi->failover)) { 7040 err = PTR_ERR(vi->failover); 7041 goto free_vqs; 7042 } 7043 } 7044 7045 if (vi->has_rss || vi->has_rss_hash_report) 7046 virtnet_init_default_rss(vi); 7047 7048 enable_rx_mode_work(vi); 7049 7050 /* serialize netdev register + virtio_device_ready() with ndo_open() */ 7051 rtnl_lock(); 7052 7053 err = register_netdevice(dev); 7054 if (err) { 7055 pr_debug("virtio_net: registering device failed\n"); 7056 rtnl_unlock(); 7057 goto free_failover; 7058 } 7059 7060 /* Disable config change notification until ndo_open. */ 7061 virtio_config_driver_disable(vi->vdev); 7062 7063 virtio_device_ready(vdev); 7064 7065 if (vi->has_rss || vi->has_rss_hash_report) { 7066 if (!virtnet_commit_rss_command(vi)) { 7067 dev_warn(&vdev->dev, "RSS disabled because committing failed.\n"); 7068 dev->hw_features &= ~NETIF_F_RXHASH; 7069 vi->has_rss_hash_report = false; 7070 vi->has_rss = false; 7071 } 7072 } 7073 7074 virtnet_set_queues(vi, vi->curr_queue_pairs); 7075 7076 /* a random MAC address has been assigned, notify the device. 7077 * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there 7078 * because many devices work fine without getting MAC explicitly 7079 */ 7080 if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 7081 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 7082 struct scatterlist sg; 7083 7084 sg_init_one(&sg, dev->dev_addr, dev->addr_len); 7085 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 7086 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 7087 pr_debug("virtio_net: setting MAC address failed\n"); 7088 rtnl_unlock(); 7089 err = -EINVAL; 7090 goto free_unregister_netdev; 7091 } 7092 } 7093 7094 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) { 7095 struct virtio_net_stats_capabilities *stats_cap __free(kfree) = NULL; 7096 struct scatterlist sg; 7097 __le64 v; 7098 7099 stats_cap = kzalloc(sizeof(*stats_cap), GFP_KERNEL); 7100 if (!stats_cap) { 7101 rtnl_unlock(); 7102 err = -ENOMEM; 7103 goto free_unregister_netdev; 7104 } 7105 7106 sg_init_one(&sg, stats_cap, sizeof(*stats_cap)); 7107 7108 if (!virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 7109 VIRTIO_NET_CTRL_STATS_QUERY, 7110 NULL, &sg)) { 7111 pr_debug("virtio_net: fail to get stats capability\n"); 7112 rtnl_unlock(); 7113 err = -EINVAL; 7114 goto free_unregister_netdev; 7115 } 7116 7117 v = stats_cap->supported_stats_types[0]; 7118 vi->device_stats_cap = le64_to_cpu(v); 7119 } 7120 7121 /* Assume link up if device can't report link status, 7122 otherwise get link status from config. */ 7123 netif_carrier_off(dev); 7124 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 7125 virtio_config_changed(vi->vdev); 7126 } else { 7127 vi->status = VIRTIO_NET_S_LINK_UP; 7128 virtnet_update_settings(vi); 7129 netif_carrier_on(dev); 7130 } 7131 7132 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) { 7133 unsigned int fbit; 7134 7135 fbit = virtio_offload_to_feature(guest_offloads[i]); 7136 if (virtio_has_feature(vi->vdev, fbit)) 7137 set_bit(guest_offloads[i], &vi->guest_offloads); 7138 } 7139 vi->guest_offloads_capable = vi->guest_offloads; 7140 7141 rtnl_unlock(); 7142 7143 err = virtnet_cpu_notif_add(vi); 7144 if (err) { 7145 pr_debug("virtio_net: registering cpu notifier failed\n"); 7146 goto free_unregister_netdev; 7147 } 7148 7149 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", 7150 dev->name, max_queue_pairs); 7151 7152 return 0; 7153 7154 free_unregister_netdev: 7155 unregister_netdev(dev); 7156 free_failover: 7157 net_failover_destroy(vi->failover); 7158 free_vqs: 7159 virtio_reset_device(vdev); 7160 cancel_delayed_work_sync(&vi->refill); 7161 free_receive_page_frags(vi); 7162 virtnet_del_vqs(vi); 7163 free: 7164 free_netdev(dev); 7165 return err; 7166 } 7167 7168 static void remove_vq_common(struct virtnet_info *vi) 7169 { 7170 int i; 7171 7172 virtio_reset_device(vi->vdev); 7173 7174 /* Free unused buffers in both send and recv, if any. */ 7175 free_unused_bufs(vi); 7176 7177 /* 7178 * Rule of thumb is netdev_tx_reset_queue() should follow any 7179 * skb freeing not followed by netdev_tx_completed_queue() 7180 */ 7181 for (i = 0; i < vi->max_queue_pairs; i++) 7182 netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, i)); 7183 7184 free_receive_bufs(vi); 7185 7186 free_receive_page_frags(vi); 7187 7188 virtnet_del_vqs(vi); 7189 } 7190 7191 static void virtnet_remove(struct virtio_device *vdev) 7192 { 7193 struct virtnet_info *vi = vdev->priv; 7194 7195 virtnet_cpu_notif_remove(vi); 7196 7197 /* Make sure no work handler is accessing the device. */ 7198 flush_work(&vi->config_work); 7199 disable_rx_mode_work(vi); 7200 flush_work(&vi->rx_mode_work); 7201 7202 virtnet_free_irq_moder(vi); 7203 7204 unregister_netdev(vi->dev); 7205 7206 net_failover_destroy(vi->failover); 7207 7208 remove_vq_common(vi); 7209 7210 free_netdev(vi->dev); 7211 } 7212 7213 static __maybe_unused int virtnet_freeze(struct virtio_device *vdev) 7214 { 7215 struct virtnet_info *vi = vdev->priv; 7216 7217 virtnet_cpu_notif_remove(vi); 7218 virtnet_freeze_down(vdev); 7219 remove_vq_common(vi); 7220 7221 return 0; 7222 } 7223 7224 static __maybe_unused int virtnet_restore(struct virtio_device *vdev) 7225 { 7226 struct virtnet_info *vi = vdev->priv; 7227 int err; 7228 7229 err = virtnet_restore_up(vdev); 7230 if (err) 7231 return err; 7232 virtnet_set_queues(vi, vi->curr_queue_pairs); 7233 7234 err = virtnet_cpu_notif_add(vi); 7235 if (err) { 7236 virtnet_freeze_down(vdev); 7237 remove_vq_common(vi); 7238 return err; 7239 } 7240 7241 return 0; 7242 } 7243 7244 static struct virtio_device_id id_table[] = { 7245 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 7246 { 0 }, 7247 }; 7248 7249 #define VIRTNET_FEATURES \ 7250 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ 7251 VIRTIO_NET_F_MAC, \ 7252 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ 7253 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ 7254 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ 7255 VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \ 7256 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ 7257 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ 7258 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ 7259 VIRTIO_NET_F_CTRL_MAC_ADDR, \ 7260 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ 7261 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ 7262 VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ 7263 VIRTIO_NET_F_VQ_NOTF_COAL, \ 7264 VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS 7265 7266 static unsigned int features[] = { 7267 VIRTNET_FEATURES, 7268 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO, 7269 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM, 7270 VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO, 7271 VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM, 7272 }; 7273 7274 static unsigned int features_legacy[] = { 7275 VIRTNET_FEATURES, 7276 VIRTIO_NET_F_GSO, 7277 VIRTIO_F_ANY_LAYOUT, 7278 }; 7279 7280 static struct virtio_driver virtio_net_driver = { 7281 .feature_table = features, 7282 .feature_table_size = ARRAY_SIZE(features), 7283 .feature_table_legacy = features_legacy, 7284 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 7285 .driver.name = KBUILD_MODNAME, 7286 .id_table = id_table, 7287 .validate = virtnet_validate, 7288 .probe = virtnet_probe, 7289 .remove = virtnet_remove, 7290 .config_changed = virtnet_config_changed, 7291 #ifdef CONFIG_PM_SLEEP 7292 .freeze = virtnet_freeze, 7293 .restore = virtnet_restore, 7294 #endif 7295 }; 7296 7297 static __init int virtio_net_driver_init(void) 7298 { 7299 int ret; 7300 7301 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online", 7302 virtnet_cpu_online, 7303 virtnet_cpu_down_prep); 7304 if (ret < 0) 7305 goto out; 7306 virtionet_online = ret; 7307 ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead", 7308 NULL, virtnet_cpu_dead); 7309 if (ret) 7310 goto err_dead; 7311 ret = register_virtio_driver(&virtio_net_driver); 7312 if (ret) 7313 goto err_virtio; 7314 return 0; 7315 err_virtio: 7316 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 7317 err_dead: 7318 cpuhp_remove_multi_state(virtionet_online); 7319 out: 7320 return ret; 7321 } 7322 module_init(virtio_net_driver_init); 7323 7324 static __exit void virtio_net_driver_exit(void) 7325 { 7326 unregister_virtio_driver(&virtio_net_driver); 7327 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 7328 cpuhp_remove_multi_state(virtionet_online); 7329 } 7330 module_exit(virtio_net_driver_exit); 7331 7332 MODULE_DEVICE_TABLE(virtio, id_table); 7333 MODULE_DESCRIPTION("Virtio network driver"); 7334 MODULE_LICENSE("GPL"); 7335