1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* A network driver using virtio. 3 * 4 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 5 */ 6 //#define DEBUG 7 #include <linux/netdevice.h> 8 #include <linux/etherdevice.h> 9 #include <linux/ethtool.h> 10 #include <linux/module.h> 11 #include <linux/virtio.h> 12 #include <linux/virtio_net.h> 13 #include <linux/bpf.h> 14 #include <linux/bpf_trace.h> 15 #include <linux/scatterlist.h> 16 #include <linux/if_vlan.h> 17 #include <linux/slab.h> 18 #include <linux/cpu.h> 19 #include <linux/average.h> 20 #include <linux/filter.h> 21 #include <linux/kernel.h> 22 #include <linux/dim.h> 23 #include <net/route.h> 24 #include <net/xdp.h> 25 #include <net/net_failover.h> 26 #include <net/netdev_rx_queue.h> 27 #include <net/netdev_queues.h> 28 #include <net/xdp_sock_drv.h> 29 30 static int napi_weight = NAPI_POLL_WEIGHT; 31 module_param(napi_weight, int, 0444); 32 33 static bool csum = true, gso = true, napi_tx = true; 34 module_param(csum, bool, 0444); 35 module_param(gso, bool, 0444); 36 module_param(napi_tx, bool, 0644); 37 38 #define VIRTIO_OFFLOAD_MAP_MIN 46 39 #define VIRTIO_OFFLOAD_MAP_MAX 47 40 #define VIRTIO_FEATURES_MAP_MIN 65 41 #define VIRTIO_O2F_DELTA (VIRTIO_FEATURES_MAP_MIN - \ 42 VIRTIO_OFFLOAD_MAP_MIN) 43 44 static bool virtio_is_mapped_offload(unsigned int obit) 45 { 46 return obit >= VIRTIO_OFFLOAD_MAP_MIN && 47 obit <= VIRTIO_OFFLOAD_MAP_MAX; 48 } 49 50 static unsigned int virtio_offload_to_feature(unsigned int obit) 51 { 52 return virtio_is_mapped_offload(obit) ? obit + VIRTIO_O2F_DELTA : obit; 53 } 54 55 /* FIXME: MTU in config. */ 56 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) 57 #define GOOD_COPY_LEN 128 58 59 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) 60 61 /* Separating two types of XDP xmit */ 62 #define VIRTIO_XDP_TX BIT(0) 63 #define VIRTIO_XDP_REDIR BIT(1) 64 65 /* RX packet size EWMA. The average packet size is used to determine the packet 66 * buffer size when refilling RX rings. As the entire RX ring may be refilled 67 * at once, the weight is chosen so that the EWMA will be insensitive to short- 68 * term, transient changes in packet size. 69 */ 70 DECLARE_EWMA(pkt_len, 0, 64) 71 72 #define VIRTNET_DRIVER_VERSION "1.0.0" 73 74 static const unsigned long guest_offloads[] = { 75 VIRTIO_NET_F_GUEST_TSO4, 76 VIRTIO_NET_F_GUEST_TSO6, 77 VIRTIO_NET_F_GUEST_ECN, 78 VIRTIO_NET_F_GUEST_UFO, 79 VIRTIO_NET_F_GUEST_CSUM, 80 VIRTIO_NET_F_GUEST_USO4, 81 VIRTIO_NET_F_GUEST_USO6, 82 VIRTIO_NET_F_GUEST_HDRLEN, 83 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED, 84 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED, 85 }; 86 87 #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 88 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 89 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 90 (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ 91 (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ 92 (1ULL << VIRTIO_NET_F_GUEST_USO6) | \ 93 (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED) | \ 94 (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED)) 95 96 struct virtnet_stat_desc { 97 char desc[ETH_GSTRING_LEN]; 98 size_t offset; 99 size_t qstat_offset; 100 }; 101 102 struct virtnet_sq_free_stats { 103 u64 packets; 104 u64 bytes; 105 u64 napi_packets; 106 u64 napi_bytes; 107 u64 xsk; 108 }; 109 110 struct virtnet_sq_stats { 111 struct u64_stats_sync syncp; 112 u64_stats_t packets; 113 u64_stats_t bytes; 114 u64_stats_t xdp_tx; 115 u64_stats_t xdp_tx_drops; 116 u64_stats_t kicks; 117 u64_stats_t tx_timeouts; 118 u64_stats_t stop; 119 u64_stats_t wake; 120 }; 121 122 struct virtnet_rq_stats { 123 struct u64_stats_sync syncp; 124 u64_stats_t packets; 125 u64_stats_t bytes; 126 u64_stats_t drops; 127 u64_stats_t xdp_packets; 128 u64_stats_t xdp_tx; 129 u64_stats_t xdp_redirects; 130 u64_stats_t xdp_drops; 131 u64_stats_t kicks; 132 }; 133 134 #define VIRTNET_SQ_STAT(name, m) {name, offsetof(struct virtnet_sq_stats, m), -1} 135 #define VIRTNET_RQ_STAT(name, m) {name, offsetof(struct virtnet_rq_stats, m), -1} 136 137 #define VIRTNET_SQ_STAT_QSTAT(name, m) \ 138 { \ 139 name, \ 140 offsetof(struct virtnet_sq_stats, m), \ 141 offsetof(struct netdev_queue_stats_tx, m), \ 142 } 143 144 #define VIRTNET_RQ_STAT_QSTAT(name, m) \ 145 { \ 146 name, \ 147 offsetof(struct virtnet_rq_stats, m), \ 148 offsetof(struct netdev_queue_stats_rx, m), \ 149 } 150 151 static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { 152 VIRTNET_SQ_STAT("xdp_tx", xdp_tx), 153 VIRTNET_SQ_STAT("xdp_tx_drops", xdp_tx_drops), 154 VIRTNET_SQ_STAT("kicks", kicks), 155 VIRTNET_SQ_STAT("tx_timeouts", tx_timeouts), 156 }; 157 158 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { 159 VIRTNET_RQ_STAT("drops", drops), 160 VIRTNET_RQ_STAT("xdp_packets", xdp_packets), 161 VIRTNET_RQ_STAT("xdp_tx", xdp_tx), 162 VIRTNET_RQ_STAT("xdp_redirects", xdp_redirects), 163 VIRTNET_RQ_STAT("xdp_drops", xdp_drops), 164 VIRTNET_RQ_STAT("kicks", kicks), 165 }; 166 167 static const struct virtnet_stat_desc virtnet_sq_stats_desc_qstat[] = { 168 VIRTNET_SQ_STAT_QSTAT("packets", packets), 169 VIRTNET_SQ_STAT_QSTAT("bytes", bytes), 170 VIRTNET_SQ_STAT_QSTAT("stop", stop), 171 VIRTNET_SQ_STAT_QSTAT("wake", wake), 172 }; 173 174 static const struct virtnet_stat_desc virtnet_rq_stats_desc_qstat[] = { 175 VIRTNET_RQ_STAT_QSTAT("packets", packets), 176 VIRTNET_RQ_STAT_QSTAT("bytes", bytes), 177 }; 178 179 #define VIRTNET_STATS_DESC_CQ(name) \ 180 {#name, offsetof(struct virtio_net_stats_cvq, name), -1} 181 182 #define VIRTNET_STATS_DESC_RX(class, name) \ 183 {#name, offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), -1} 184 185 #define VIRTNET_STATS_DESC_TX(class, name) \ 186 {#name, offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), -1} 187 188 189 static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = { 190 VIRTNET_STATS_DESC_CQ(command_num), 191 VIRTNET_STATS_DESC_CQ(ok_num), 192 }; 193 194 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = { 195 VIRTNET_STATS_DESC_RX(basic, packets), 196 VIRTNET_STATS_DESC_RX(basic, bytes), 197 198 VIRTNET_STATS_DESC_RX(basic, notifications), 199 VIRTNET_STATS_DESC_RX(basic, interrupts), 200 }; 201 202 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = { 203 VIRTNET_STATS_DESC_TX(basic, packets), 204 VIRTNET_STATS_DESC_TX(basic, bytes), 205 206 VIRTNET_STATS_DESC_TX(basic, notifications), 207 VIRTNET_STATS_DESC_TX(basic, interrupts), 208 }; 209 210 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = { 211 VIRTNET_STATS_DESC_RX(csum, needs_csum), 212 }; 213 214 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = { 215 VIRTNET_STATS_DESC_TX(gso, gso_packets_noseg), 216 VIRTNET_STATS_DESC_TX(gso, gso_bytes_noseg), 217 }; 218 219 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = { 220 VIRTNET_STATS_DESC_RX(speed, ratelimit_bytes), 221 }; 222 223 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = { 224 VIRTNET_STATS_DESC_TX(speed, ratelimit_bytes), 225 }; 226 227 #define VIRTNET_STATS_DESC_RX_QSTAT(class, name, qstat_field) \ 228 { \ 229 #name, \ 230 offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), \ 231 offsetof(struct netdev_queue_stats_rx, qstat_field), \ 232 } 233 234 #define VIRTNET_STATS_DESC_TX_QSTAT(class, name, qstat_field) \ 235 { \ 236 #name, \ 237 offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), \ 238 offsetof(struct netdev_queue_stats_tx, qstat_field), \ 239 } 240 241 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc_qstat[] = { 242 VIRTNET_STATS_DESC_RX_QSTAT(basic, drops, hw_drops), 243 VIRTNET_STATS_DESC_RX_QSTAT(basic, drop_overruns, hw_drop_overruns), 244 }; 245 246 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc_qstat[] = { 247 VIRTNET_STATS_DESC_TX_QSTAT(basic, drops, hw_drops), 248 VIRTNET_STATS_DESC_TX_QSTAT(basic, drop_malformed, hw_drop_errors), 249 }; 250 251 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc_qstat[] = { 252 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_valid, csum_unnecessary), 253 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_none, csum_none), 254 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_bad, csum_bad), 255 }; 256 257 static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc_qstat[] = { 258 VIRTNET_STATS_DESC_TX_QSTAT(csum, csum_none, csum_none), 259 VIRTNET_STATS_DESC_TX_QSTAT(csum, needs_csum, needs_csum), 260 }; 261 262 static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc_qstat[] = { 263 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets, hw_gro_packets), 264 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes, hw_gro_bytes), 265 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets_coalesced, hw_gro_wire_packets), 266 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes_coalesced, hw_gro_wire_bytes), 267 }; 268 269 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc_qstat[] = { 270 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_packets, hw_gso_packets), 271 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_bytes, hw_gso_bytes), 272 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments, hw_gso_wire_packets), 273 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments_bytes, hw_gso_wire_bytes), 274 }; 275 276 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc_qstat[] = { 277 VIRTNET_STATS_DESC_RX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 278 }; 279 280 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc_qstat[] = { 281 VIRTNET_STATS_DESC_TX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 282 }; 283 284 #define VIRTNET_Q_TYPE_RX 0 285 #define VIRTNET_Q_TYPE_TX 1 286 #define VIRTNET_Q_TYPE_CQ 2 287 288 struct virtnet_interrupt_coalesce { 289 u32 max_packets; 290 u32 max_usecs; 291 }; 292 293 /* The dma information of pages allocated at a time. */ 294 struct virtnet_rq_dma { 295 dma_addr_t addr; 296 u32 ref; 297 u16 len; 298 u16 need_sync; 299 }; 300 301 /* Internal representation of a send virtqueue */ 302 struct send_queue { 303 /* Virtqueue associated with this send _queue */ 304 struct virtqueue *vq; 305 306 /* TX: fragments + linear part + virtio header */ 307 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 308 309 /* Name of the send queue: output.$index */ 310 char name[16]; 311 312 struct virtnet_sq_stats stats; 313 314 struct virtnet_interrupt_coalesce intr_coal; 315 316 struct napi_struct napi; 317 318 /* Record whether sq is in reset state. */ 319 bool reset; 320 321 struct xsk_buff_pool *xsk_pool; 322 323 dma_addr_t xsk_hdr_dma_addr; 324 }; 325 326 /* Internal representation of a receive virtqueue */ 327 struct receive_queue { 328 /* Virtqueue associated with this receive_queue */ 329 struct virtqueue *vq; 330 331 struct napi_struct napi; 332 333 struct bpf_prog __rcu *xdp_prog; 334 335 struct virtnet_rq_stats stats; 336 337 /* The number of rx notifications */ 338 u16 calls; 339 340 /* Is dynamic interrupt moderation enabled? */ 341 bool dim_enabled; 342 343 /* Used to protect dim_enabled and inter_coal */ 344 struct mutex dim_lock; 345 346 /* Dynamic Interrupt Moderation */ 347 struct dim dim; 348 349 u32 packets_in_napi; 350 351 struct virtnet_interrupt_coalesce intr_coal; 352 353 /* Chain pages by the private ptr. */ 354 struct page *pages; 355 356 /* Average packet length for mergeable receive buffers. */ 357 struct ewma_pkt_len mrg_avg_pkt_len; 358 359 /* Page frag for packet buffer allocation. */ 360 struct page_frag alloc_frag; 361 362 /* RX: fragments + linear part + virtio header */ 363 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 364 365 /* Min single buffer size for mergeable buffers case. */ 366 unsigned int min_buf_len; 367 368 /* Name of this receive queue: input.$index */ 369 char name[16]; 370 371 struct xdp_rxq_info xdp_rxq; 372 373 /* Record the last dma info to free after new pages is allocated. */ 374 struct virtnet_rq_dma *last_dma; 375 376 struct xsk_buff_pool *xsk_pool; 377 378 /* xdp rxq used by xsk */ 379 struct xdp_rxq_info xsk_rxq_info; 380 381 struct xdp_buff **xsk_buffs; 382 }; 383 384 #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 385 386 /* Control VQ buffers: protected by the rtnl lock */ 387 struct control_buf { 388 struct virtio_net_ctrl_hdr hdr; 389 virtio_net_ctrl_ack status; 390 }; 391 392 struct virtnet_info { 393 struct virtio_device *vdev; 394 struct virtqueue *cvq; 395 struct net_device *dev; 396 struct send_queue *sq; 397 struct receive_queue *rq; 398 unsigned int status; 399 400 /* Max # of queue pairs supported by the device */ 401 u16 max_queue_pairs; 402 403 /* # of queue pairs currently used by the driver */ 404 u16 curr_queue_pairs; 405 406 /* # of XDP queue pairs currently used by the driver */ 407 u16 xdp_queue_pairs; 408 409 /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ 410 bool xdp_enabled; 411 412 /* I like... big packets and I cannot lie! */ 413 bool big_packets; 414 415 /* number of sg entries allocated for big packets */ 416 unsigned int big_packets_num_skbfrags; 417 418 /* Host will merge rx buffers for big packets (shake it! shake it!) */ 419 bool mergeable_rx_bufs; 420 421 /* Host supports rss and/or hash report */ 422 bool has_rss; 423 bool has_rss_hash_report; 424 u8 rss_key_size; 425 u16 rss_indir_table_size; 426 u32 rss_hash_types_supported; 427 u32 rss_hash_types_saved; 428 struct virtio_net_rss_config_hdr *rss_hdr; 429 struct virtio_net_rss_config_trailer rss_trailer; 430 u8 rss_hash_key_data[VIRTIO_NET_RSS_MAX_KEY_SIZE]; 431 432 /* Has control virtqueue */ 433 bool has_cvq; 434 435 /* Lock to protect the control VQ */ 436 struct mutex cvq_lock; 437 438 /* Host can handle any s/g split between our header and packet data */ 439 bool any_header_sg; 440 441 /* Packet virtio header size */ 442 u8 hdr_len; 443 444 /* Work struct for delayed refilling if we run low on memory. */ 445 struct delayed_work refill; 446 447 /* UDP tunnel support */ 448 bool tx_tnl; 449 450 bool rx_tnl; 451 452 bool rx_tnl_csum; 453 454 /* Is delayed refill enabled? */ 455 bool refill_enabled; 456 457 /* The lock to synchronize the access to refill_enabled */ 458 spinlock_t refill_lock; 459 460 /* Work struct for config space updates */ 461 struct work_struct config_work; 462 463 /* Work struct for setting rx mode */ 464 struct work_struct rx_mode_work; 465 466 /* OK to queue work setting RX mode? */ 467 bool rx_mode_work_enabled; 468 469 /* Does the affinity hint is set for virtqueues? */ 470 bool affinity_hint_set; 471 472 /* CPU hotplug instances for online & dead */ 473 struct hlist_node node; 474 struct hlist_node node_dead; 475 476 struct control_buf *ctrl; 477 478 /* Ethtool settings */ 479 u8 duplex; 480 u32 speed; 481 482 /* Is rx dynamic interrupt moderation enabled? */ 483 bool rx_dim_enabled; 484 485 /* Interrupt coalescing settings */ 486 struct virtnet_interrupt_coalesce intr_coal_tx; 487 struct virtnet_interrupt_coalesce intr_coal_rx; 488 489 unsigned long guest_offloads; 490 unsigned long guest_offloads_capable; 491 492 /* failover when STANDBY feature enabled */ 493 struct failover *failover; 494 495 u64 device_stats_cap; 496 }; 497 498 struct padded_vnet_hdr { 499 struct virtio_net_hdr_v1_hash hdr; 500 /* 501 * hdr is in a separate sg buffer, and data sg buffer shares same page 502 * with this header sg. This padding makes next sg 16 byte aligned 503 * after the header. 504 */ 505 char padding[12]; 506 }; 507 508 struct virtio_net_common_hdr { 509 union { 510 struct virtio_net_hdr hdr; 511 struct virtio_net_hdr_mrg_rxbuf mrg_hdr; 512 struct virtio_net_hdr_v1_hash hash_v1_hdr; 513 struct virtio_net_hdr_v1_hash_tunnel tnl_hdr; 514 }; 515 }; 516 517 static struct virtio_net_common_hdr xsk_hdr; 518 519 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); 520 static void virtnet_sq_free_unused_buf_done(struct virtqueue *vq); 521 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 522 struct net_device *dev, 523 unsigned int *xdp_xmit, 524 struct virtnet_rq_stats *stats); 525 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 526 struct sk_buff *skb, u8 flags); 527 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 528 struct sk_buff *curr_skb, 529 struct page *page, void *buf, 530 int len, int truesize); 531 static void virtnet_xsk_completed(struct send_queue *sq, int num); 532 533 enum virtnet_xmit_type { 534 VIRTNET_XMIT_TYPE_SKB, 535 VIRTNET_XMIT_TYPE_SKB_ORPHAN, 536 VIRTNET_XMIT_TYPE_XDP, 537 VIRTNET_XMIT_TYPE_XSK, 538 }; 539 540 static size_t virtnet_rss_hdr_size(const struct virtnet_info *vi) 541 { 542 u16 indir_table_size = vi->has_rss ? vi->rss_indir_table_size : 1; 543 544 return struct_size(vi->rss_hdr, indirection_table, indir_table_size); 545 } 546 547 static size_t virtnet_rss_trailer_size(const struct virtnet_info *vi) 548 { 549 return struct_size(&vi->rss_trailer, hash_key_data, vi->rss_key_size); 550 } 551 552 /* We use the last two bits of the pointer to distinguish the xmit type. */ 553 #define VIRTNET_XMIT_TYPE_MASK (BIT(0) | BIT(1)) 554 555 #define VIRTIO_XSK_FLAG_OFFSET 2 556 557 static enum virtnet_xmit_type virtnet_xmit_ptr_unpack(void **ptr) 558 { 559 unsigned long p = (unsigned long)*ptr; 560 561 *ptr = (void *)(p & ~VIRTNET_XMIT_TYPE_MASK); 562 563 return p & VIRTNET_XMIT_TYPE_MASK; 564 } 565 566 static void *virtnet_xmit_ptr_pack(void *ptr, enum virtnet_xmit_type type) 567 { 568 return (void *)((unsigned long)ptr | type); 569 } 570 571 static int virtnet_add_outbuf(struct send_queue *sq, int num, void *data, 572 enum virtnet_xmit_type type) 573 { 574 return virtqueue_add_outbuf(sq->vq, sq->sg, num, 575 virtnet_xmit_ptr_pack(data, type), 576 GFP_ATOMIC); 577 } 578 579 static u32 virtnet_ptr_to_xsk_buff_len(void *ptr) 580 { 581 return ((unsigned long)ptr) >> VIRTIO_XSK_FLAG_OFFSET; 582 } 583 584 static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len) 585 { 586 sg_dma_address(sg) = addr; 587 sg_dma_len(sg) = len; 588 } 589 590 static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 591 bool in_napi, struct virtnet_sq_free_stats *stats) 592 { 593 struct xdp_frame *frame; 594 struct sk_buff *skb; 595 unsigned int len; 596 void *ptr; 597 598 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 599 switch (virtnet_xmit_ptr_unpack(&ptr)) { 600 case VIRTNET_XMIT_TYPE_SKB: 601 skb = ptr; 602 603 pr_debug("Sent skb %p\n", skb); 604 stats->napi_packets++; 605 stats->napi_bytes += skb->len; 606 napi_consume_skb(skb, in_napi); 607 break; 608 609 case VIRTNET_XMIT_TYPE_SKB_ORPHAN: 610 skb = ptr; 611 612 stats->packets++; 613 stats->bytes += skb->len; 614 napi_consume_skb(skb, in_napi); 615 break; 616 617 case VIRTNET_XMIT_TYPE_XDP: 618 frame = ptr; 619 620 stats->packets++; 621 stats->bytes += xdp_get_frame_len(frame); 622 xdp_return_frame(frame); 623 break; 624 625 case VIRTNET_XMIT_TYPE_XSK: 626 stats->bytes += virtnet_ptr_to_xsk_buff_len(ptr); 627 stats->xsk++; 628 break; 629 } 630 } 631 netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes); 632 } 633 634 static void virtnet_free_old_xmit(struct send_queue *sq, 635 struct netdev_queue *txq, 636 bool in_napi, 637 struct virtnet_sq_free_stats *stats) 638 { 639 __free_old_xmit(sq, txq, in_napi, stats); 640 641 if (stats->xsk) 642 virtnet_xsk_completed(sq, stats->xsk); 643 } 644 645 /* Converting between virtqueue no. and kernel tx/rx queue no. 646 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq 647 */ 648 static int vq2txq(struct virtqueue *vq) 649 { 650 return (vq->index - 1) / 2; 651 } 652 653 static int txq2vq(int txq) 654 { 655 return txq * 2 + 1; 656 } 657 658 static int vq2rxq(struct virtqueue *vq) 659 { 660 return vq->index / 2; 661 } 662 663 static int rxq2vq(int rxq) 664 { 665 return rxq * 2; 666 } 667 668 static int vq_type(struct virtnet_info *vi, int qid) 669 { 670 if (qid == vi->max_queue_pairs * 2) 671 return VIRTNET_Q_TYPE_CQ; 672 673 if (qid % 2) 674 return VIRTNET_Q_TYPE_TX; 675 676 return VIRTNET_Q_TYPE_RX; 677 } 678 679 static inline struct virtio_net_common_hdr * 680 skb_vnet_common_hdr(struct sk_buff *skb) 681 { 682 return (struct virtio_net_common_hdr *)skb->cb; 683 } 684 685 /* 686 * private is used to chain pages for big packets, put the whole 687 * most recent used list in the beginning for reuse 688 */ 689 static void give_pages(struct receive_queue *rq, struct page *page) 690 { 691 struct page *end; 692 693 /* Find end of list, sew whole thing into vi->rq.pages. */ 694 for (end = page; end->private; end = (struct page *)end->private); 695 end->private = (unsigned long)rq->pages; 696 rq->pages = page; 697 } 698 699 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) 700 { 701 struct page *p = rq->pages; 702 703 if (p) { 704 rq->pages = (struct page *)p->private; 705 /* clear private here, it is used to chain pages */ 706 p->private = 0; 707 } else 708 p = alloc_page(gfp_mask); 709 return p; 710 } 711 712 static void virtnet_rq_free_buf(struct virtnet_info *vi, 713 struct receive_queue *rq, void *buf) 714 { 715 if (vi->mergeable_rx_bufs) 716 put_page(virt_to_head_page(buf)); 717 else if (vi->big_packets) 718 give_pages(rq, buf); 719 else 720 put_page(virt_to_head_page(buf)); 721 } 722 723 static void enable_delayed_refill(struct virtnet_info *vi) 724 { 725 spin_lock_bh(&vi->refill_lock); 726 vi->refill_enabled = true; 727 spin_unlock_bh(&vi->refill_lock); 728 } 729 730 static void disable_delayed_refill(struct virtnet_info *vi) 731 { 732 spin_lock_bh(&vi->refill_lock); 733 vi->refill_enabled = false; 734 spin_unlock_bh(&vi->refill_lock); 735 } 736 737 static void enable_rx_mode_work(struct virtnet_info *vi) 738 { 739 rtnl_lock(); 740 vi->rx_mode_work_enabled = true; 741 rtnl_unlock(); 742 } 743 744 static void disable_rx_mode_work(struct virtnet_info *vi) 745 { 746 rtnl_lock(); 747 vi->rx_mode_work_enabled = false; 748 rtnl_unlock(); 749 } 750 751 static void virtqueue_napi_schedule(struct napi_struct *napi, 752 struct virtqueue *vq) 753 { 754 if (napi_schedule_prep(napi)) { 755 virtqueue_disable_cb(vq); 756 __napi_schedule(napi); 757 } 758 } 759 760 static bool virtqueue_napi_complete(struct napi_struct *napi, 761 struct virtqueue *vq, int processed) 762 { 763 int opaque; 764 765 opaque = virtqueue_enable_cb_prepare(vq); 766 if (napi_complete_done(napi, processed)) { 767 if (unlikely(virtqueue_poll(vq, opaque))) 768 virtqueue_napi_schedule(napi, vq); 769 else 770 return true; 771 } else { 772 virtqueue_disable_cb(vq); 773 } 774 775 return false; 776 } 777 778 static void skb_xmit_done(struct virtqueue *vq) 779 { 780 struct virtnet_info *vi = vq->vdev->priv; 781 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi; 782 783 /* Suppress further interrupts. */ 784 virtqueue_disable_cb(vq); 785 786 if (napi->weight) 787 virtqueue_napi_schedule(napi, vq); 788 else 789 /* We were probably waiting for more output buffers. */ 790 netif_wake_subqueue(vi->dev, vq2txq(vq)); 791 } 792 793 #define MRG_CTX_HEADER_SHIFT 22 794 static void *mergeable_len_to_ctx(unsigned int truesize, 795 unsigned int headroom) 796 { 797 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize); 798 } 799 800 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx) 801 { 802 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT; 803 } 804 805 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) 806 { 807 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); 808 } 809 810 static int check_mergeable_len(struct net_device *dev, void *mrg_ctx, 811 unsigned int len) 812 { 813 unsigned int headroom, tailroom, room, truesize; 814 815 truesize = mergeable_ctx_to_truesize(mrg_ctx); 816 headroom = mergeable_ctx_to_headroom(mrg_ctx); 817 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 818 room = SKB_DATA_ALIGN(headroom + tailroom); 819 820 if (len > truesize - room) { 821 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 822 dev->name, len, (unsigned long)(truesize - room)); 823 DEV_STATS_INC(dev, rx_length_errors); 824 return -1; 825 } 826 827 return 0; 828 } 829 830 static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, 831 unsigned int headroom, 832 unsigned int len) 833 { 834 struct sk_buff *skb; 835 836 skb = build_skb(buf, buflen); 837 if (unlikely(!skb)) 838 return NULL; 839 840 skb_reserve(skb, headroom); 841 skb_put(skb, len); 842 843 return skb; 844 } 845 846 /* Called from bottom half context */ 847 static struct sk_buff *page_to_skb(struct virtnet_info *vi, 848 struct receive_queue *rq, 849 struct page *page, unsigned int offset, 850 unsigned int len, unsigned int truesize, 851 unsigned int headroom) 852 { 853 struct sk_buff *skb; 854 struct virtio_net_common_hdr *hdr; 855 unsigned int copy, hdr_len, hdr_padded_len; 856 struct page *page_to_free = NULL; 857 int tailroom, shinfo_size; 858 char *p, *hdr_p, *buf; 859 860 p = page_address(page) + offset; 861 hdr_p = p; 862 863 hdr_len = vi->hdr_len; 864 if (vi->mergeable_rx_bufs) 865 hdr_padded_len = hdr_len; 866 else 867 hdr_padded_len = sizeof(struct padded_vnet_hdr); 868 869 buf = p - headroom; 870 len -= hdr_len; 871 offset += hdr_padded_len; 872 p += hdr_padded_len; 873 tailroom = truesize - headroom - hdr_padded_len - len; 874 875 shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 876 877 if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) { 878 skb = virtnet_build_skb(buf, truesize, p - buf, len); 879 if (unlikely(!skb)) 880 return NULL; 881 882 page = (struct page *)page->private; 883 if (page) 884 give_pages(rq, page); 885 goto ok; 886 } 887 888 /* copy small packet so we can reuse these pages for small data */ 889 skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); 890 if (unlikely(!skb)) 891 return NULL; 892 893 /* Copy all frame if it fits skb->head, otherwise 894 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. 895 */ 896 if (len <= skb_tailroom(skb)) 897 copy = len; 898 else 899 copy = ETH_HLEN; 900 skb_put_data(skb, p, copy); 901 902 len -= copy; 903 offset += copy; 904 905 if (vi->mergeable_rx_bufs) { 906 if (len) 907 skb_add_rx_frag(skb, 0, page, offset, len, truesize); 908 else 909 page_to_free = page; 910 goto ok; 911 } 912 913 BUG_ON(offset >= PAGE_SIZE); 914 while (len) { 915 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); 916 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, 917 frag_size, truesize); 918 len -= frag_size; 919 page = (struct page *)page->private; 920 offset = 0; 921 } 922 923 if (page) 924 give_pages(rq, page); 925 926 ok: 927 hdr = skb_vnet_common_hdr(skb); 928 memcpy(hdr, hdr_p, hdr_len); 929 if (page_to_free) 930 put_page(page_to_free); 931 932 return skb; 933 } 934 935 static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) 936 { 937 struct virtnet_info *vi = rq->vq->vdev->priv; 938 struct page *page = virt_to_head_page(buf); 939 struct virtnet_rq_dma *dma; 940 void *head; 941 int offset; 942 943 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 944 945 head = page_address(page); 946 947 dma = head; 948 949 --dma->ref; 950 951 if (dma->need_sync && len) { 952 offset = buf - (head + sizeof(*dma)); 953 954 virtqueue_map_sync_single_range_for_cpu(rq->vq, dma->addr, 955 offset, len, 956 DMA_FROM_DEVICE); 957 } 958 959 if (dma->ref) 960 return; 961 962 virtqueue_unmap_single_attrs(rq->vq, dma->addr, dma->len, 963 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 964 put_page(page); 965 } 966 967 static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) 968 { 969 struct virtnet_info *vi = rq->vq->vdev->priv; 970 void *buf; 971 972 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 973 974 buf = virtqueue_get_buf_ctx(rq->vq, len, ctx); 975 if (buf) 976 virtnet_rq_unmap(rq, buf, *len); 977 978 return buf; 979 } 980 981 static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) 982 { 983 struct virtnet_info *vi = rq->vq->vdev->priv; 984 struct virtnet_rq_dma *dma; 985 dma_addr_t addr; 986 u32 offset; 987 void *head; 988 989 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 990 991 head = page_address(rq->alloc_frag.page); 992 993 offset = buf - head; 994 995 dma = head; 996 997 addr = dma->addr - sizeof(*dma) + offset; 998 999 sg_init_table(rq->sg, 1); 1000 sg_fill_dma(rq->sg, addr, len); 1001 } 1002 1003 static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) 1004 { 1005 struct page_frag *alloc_frag = &rq->alloc_frag; 1006 struct virtnet_info *vi = rq->vq->vdev->priv; 1007 struct virtnet_rq_dma *dma; 1008 void *buf, *head; 1009 dma_addr_t addr; 1010 1011 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 1012 1013 head = page_address(alloc_frag->page); 1014 1015 dma = head; 1016 1017 /* new pages */ 1018 if (!alloc_frag->offset) { 1019 if (rq->last_dma) { 1020 /* Now, the new page is allocated, the last dma 1021 * will not be used. So the dma can be unmapped 1022 * if the ref is 0. 1023 */ 1024 virtnet_rq_unmap(rq, rq->last_dma, 0); 1025 rq->last_dma = NULL; 1026 } 1027 1028 dma->len = alloc_frag->size - sizeof(*dma); 1029 1030 addr = virtqueue_map_single_attrs(rq->vq, dma + 1, 1031 dma->len, DMA_FROM_DEVICE, 0); 1032 if (virtqueue_map_mapping_error(rq->vq, addr)) 1033 return NULL; 1034 1035 dma->addr = addr; 1036 dma->need_sync = virtqueue_map_need_sync(rq->vq, addr); 1037 1038 /* Add a reference to dma to prevent the entire dma from 1039 * being released during error handling. This reference 1040 * will be freed after the pages are no longer used. 1041 */ 1042 get_page(alloc_frag->page); 1043 dma->ref = 1; 1044 alloc_frag->offset = sizeof(*dma); 1045 1046 rq->last_dma = dma; 1047 } 1048 1049 ++dma->ref; 1050 1051 buf = head + alloc_frag->offset; 1052 1053 get_page(alloc_frag->page); 1054 alloc_frag->offset += size; 1055 1056 return buf; 1057 } 1058 1059 static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) 1060 { 1061 struct virtnet_info *vi = vq->vdev->priv; 1062 struct receive_queue *rq; 1063 int i = vq2rxq(vq); 1064 1065 rq = &vi->rq[i]; 1066 1067 if (rq->xsk_pool) { 1068 xsk_buff_free((struct xdp_buff *)buf); 1069 return; 1070 } 1071 1072 if (!vi->big_packets || vi->mergeable_rx_bufs) 1073 virtnet_rq_unmap(rq, buf, 0); 1074 1075 virtnet_rq_free_buf(vi, rq, buf); 1076 } 1077 1078 static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 1079 bool in_napi) 1080 { 1081 struct virtnet_sq_free_stats stats = {0}; 1082 1083 virtnet_free_old_xmit(sq, txq, in_napi, &stats); 1084 1085 /* Avoid overhead when no packets have been processed 1086 * happens when called speculatively from start_xmit. 1087 */ 1088 if (!stats.packets && !stats.napi_packets) 1089 return; 1090 1091 u64_stats_update_begin(&sq->stats.syncp); 1092 u64_stats_add(&sq->stats.bytes, stats.bytes + stats.napi_bytes); 1093 u64_stats_add(&sq->stats.packets, stats.packets + stats.napi_packets); 1094 u64_stats_update_end(&sq->stats.syncp); 1095 } 1096 1097 static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) 1098 { 1099 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) 1100 return false; 1101 else if (q < vi->curr_queue_pairs) 1102 return true; 1103 else 1104 return false; 1105 } 1106 1107 static bool tx_may_stop(struct virtnet_info *vi, 1108 struct net_device *dev, 1109 struct send_queue *sq) 1110 { 1111 int qnum; 1112 1113 qnum = sq - vi->sq; 1114 1115 /* If running out of space, stop queue to avoid getting packets that we 1116 * are then unable to transmit. 1117 * An alternative would be to force queuing layer to requeue the skb by 1118 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be 1119 * returned in a normal path of operation: it means that driver is not 1120 * maintaining the TX queue stop/start state properly, and causes 1121 * the stack to do a non-trivial amount of useless work. 1122 * Since most packets only take 1 or 2 ring slots, stopping the queue 1123 * early means 16 slots are typically wasted. 1124 */ 1125 if (sq->vq->num_free < MAX_SKB_FRAGS + 2) { 1126 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1127 1128 netif_tx_stop_queue(txq); 1129 u64_stats_update_begin(&sq->stats.syncp); 1130 u64_stats_inc(&sq->stats.stop); 1131 u64_stats_update_end(&sq->stats.syncp); 1132 1133 return true; 1134 } 1135 1136 return false; 1137 } 1138 1139 static void check_sq_full_and_disable(struct virtnet_info *vi, 1140 struct net_device *dev, 1141 struct send_queue *sq) 1142 { 1143 bool use_napi = sq->napi.weight; 1144 int qnum; 1145 1146 qnum = sq - vi->sq; 1147 1148 if (tx_may_stop(vi, dev, sq)) { 1149 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1150 1151 if (use_napi) { 1152 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 1153 virtqueue_napi_schedule(&sq->napi, sq->vq); 1154 } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { 1155 /* More just got used, free them then recheck. */ 1156 free_old_xmit(sq, txq, false); 1157 if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { 1158 netif_start_subqueue(dev, qnum); 1159 u64_stats_update_begin(&sq->stats.syncp); 1160 u64_stats_inc(&sq->stats.wake); 1161 u64_stats_update_end(&sq->stats.syncp); 1162 virtqueue_disable_cb(sq->vq); 1163 } 1164 } 1165 } 1166 } 1167 1168 /* Note that @len is the length of received data without virtio header */ 1169 static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi, 1170 struct receive_queue *rq, void *buf, 1171 u32 len, bool first_buf) 1172 { 1173 struct xdp_buff *xdp; 1174 u32 bufsize; 1175 1176 xdp = (struct xdp_buff *)buf; 1177 1178 /* In virtnet_add_recvbuf_xsk, we use part of XDP_PACKET_HEADROOM for 1179 * virtio header and ask the vhost to fill data from 1180 * hard_start + XDP_PACKET_HEADROOM - vi->hdr_len 1181 * The first buffer has virtio header so the remaining region for frame 1182 * data is 1183 * xsk_pool_get_rx_frame_size() 1184 * While other buffers than the first one do not have virtio header, so 1185 * the maximum frame data's length can be 1186 * xsk_pool_get_rx_frame_size() + vi->hdr_len 1187 */ 1188 bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool); 1189 if (!first_buf) 1190 bufsize += vi->hdr_len; 1191 1192 if (unlikely(len > bufsize)) { 1193 pr_debug("%s: rx error: len %u exceeds truesize %u\n", 1194 vi->dev->name, len, bufsize); 1195 DEV_STATS_INC(vi->dev, rx_length_errors); 1196 xsk_buff_free(xdp); 1197 return NULL; 1198 } 1199 1200 if (first_buf) { 1201 xsk_buff_set_size(xdp, len); 1202 } else { 1203 xdp_prepare_buff(xdp, xdp->data_hard_start, 1204 XDP_PACKET_HEADROOM - vi->hdr_len, len, 1); 1205 xdp->flags = 0; 1206 } 1207 1208 xsk_buff_dma_sync_for_cpu(xdp); 1209 1210 return xdp; 1211 } 1212 1213 static struct sk_buff *xsk_construct_skb(struct receive_queue *rq, 1214 struct xdp_buff *xdp) 1215 { 1216 unsigned int metasize = xdp->data - xdp->data_meta; 1217 struct sk_buff *skb; 1218 unsigned int size; 1219 1220 size = xdp->data_end - xdp->data_hard_start; 1221 skb = napi_alloc_skb(&rq->napi, size); 1222 if (unlikely(!skb)) { 1223 xsk_buff_free(xdp); 1224 return NULL; 1225 } 1226 1227 skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); 1228 1229 size = xdp->data_end - xdp->data_meta; 1230 memcpy(__skb_put(skb, size), xdp->data_meta, size); 1231 1232 if (metasize) { 1233 __skb_pull(skb, metasize); 1234 skb_metadata_set(skb, metasize); 1235 } 1236 1237 xsk_buff_free(xdp); 1238 1239 return skb; 1240 } 1241 1242 static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi, 1243 struct receive_queue *rq, struct xdp_buff *xdp, 1244 unsigned int *xdp_xmit, 1245 struct virtnet_rq_stats *stats) 1246 { 1247 struct bpf_prog *prog; 1248 u32 ret; 1249 1250 ret = XDP_PASS; 1251 rcu_read_lock(); 1252 prog = rcu_dereference(rq->xdp_prog); 1253 if (prog) 1254 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); 1255 rcu_read_unlock(); 1256 1257 switch (ret) { 1258 case XDP_PASS: 1259 return xsk_construct_skb(rq, xdp); 1260 1261 case XDP_TX: 1262 case XDP_REDIRECT: 1263 return NULL; 1264 1265 default: 1266 /* drop packet */ 1267 xsk_buff_free(xdp); 1268 u64_stats_inc(&stats->drops); 1269 return NULL; 1270 } 1271 } 1272 1273 static void xsk_drop_follow_bufs(struct net_device *dev, 1274 struct receive_queue *rq, 1275 u32 num_buf, 1276 struct virtnet_rq_stats *stats) 1277 { 1278 struct xdp_buff *xdp; 1279 u32 len; 1280 1281 while (num_buf-- > 1) { 1282 xdp = virtqueue_get_buf(rq->vq, &len); 1283 if (unlikely(!xdp)) { 1284 pr_debug("%s: rx error: %d buffers missing\n", 1285 dev->name, num_buf); 1286 DEV_STATS_INC(dev, rx_length_errors); 1287 break; 1288 } 1289 u64_stats_add(&stats->bytes, len); 1290 xsk_buff_free(xdp); 1291 } 1292 } 1293 1294 static int xsk_append_merge_buffer(struct virtnet_info *vi, 1295 struct receive_queue *rq, 1296 struct sk_buff *head_skb, 1297 u32 num_buf, 1298 struct virtio_net_hdr_mrg_rxbuf *hdr, 1299 struct virtnet_rq_stats *stats) 1300 { 1301 struct sk_buff *curr_skb; 1302 struct xdp_buff *xdp; 1303 u32 len, truesize; 1304 struct page *page; 1305 void *buf; 1306 1307 curr_skb = head_skb; 1308 1309 while (--num_buf) { 1310 buf = virtqueue_get_buf(rq->vq, &len); 1311 if (unlikely(!buf)) { 1312 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1313 vi->dev->name, num_buf, 1314 virtio16_to_cpu(vi->vdev, 1315 hdr->num_buffers)); 1316 DEV_STATS_INC(vi->dev, rx_length_errors); 1317 return -EINVAL; 1318 } 1319 1320 u64_stats_add(&stats->bytes, len); 1321 1322 xdp = buf_to_xdp(vi, rq, buf, len, false); 1323 if (!xdp) 1324 goto err; 1325 1326 buf = napi_alloc_frag(len); 1327 if (!buf) { 1328 xsk_buff_free(xdp); 1329 goto err; 1330 } 1331 1332 memcpy(buf, xdp->data, len); 1333 1334 xsk_buff_free(xdp); 1335 1336 page = virt_to_page(buf); 1337 1338 truesize = len; 1339 1340 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 1341 buf, len, truesize); 1342 if (!curr_skb) { 1343 put_page(page); 1344 goto err; 1345 } 1346 } 1347 1348 return 0; 1349 1350 err: 1351 xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats); 1352 return -EINVAL; 1353 } 1354 1355 static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi, 1356 struct receive_queue *rq, struct xdp_buff *xdp, 1357 unsigned int *xdp_xmit, 1358 struct virtnet_rq_stats *stats) 1359 { 1360 struct virtio_net_hdr_mrg_rxbuf *hdr; 1361 struct bpf_prog *prog; 1362 struct sk_buff *skb; 1363 u32 ret, num_buf; 1364 1365 hdr = xdp->data - vi->hdr_len; 1366 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 1367 1368 ret = XDP_PASS; 1369 rcu_read_lock(); 1370 prog = rcu_dereference(rq->xdp_prog); 1371 if (prog) { 1372 /* TODO: support multi buffer. */ 1373 if (num_buf == 1) 1374 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, 1375 stats); 1376 else 1377 ret = XDP_ABORTED; 1378 } 1379 rcu_read_unlock(); 1380 1381 switch (ret) { 1382 case XDP_PASS: 1383 skb = xsk_construct_skb(rq, xdp); 1384 if (!skb) 1385 goto drop_bufs; 1386 1387 if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) { 1388 dev_kfree_skb(skb); 1389 goto drop; 1390 } 1391 1392 return skb; 1393 1394 case XDP_TX: 1395 case XDP_REDIRECT: 1396 return NULL; 1397 1398 default: 1399 /* drop packet */ 1400 xsk_buff_free(xdp); 1401 } 1402 1403 drop_bufs: 1404 xsk_drop_follow_bufs(dev, rq, num_buf, stats); 1405 1406 drop: 1407 u64_stats_inc(&stats->drops); 1408 return NULL; 1409 } 1410 1411 static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq, 1412 void *buf, u32 len, 1413 unsigned int *xdp_xmit, 1414 struct virtnet_rq_stats *stats) 1415 { 1416 struct net_device *dev = vi->dev; 1417 struct sk_buff *skb = NULL; 1418 struct xdp_buff *xdp; 1419 u8 flags; 1420 1421 len -= vi->hdr_len; 1422 1423 u64_stats_add(&stats->bytes, len); 1424 1425 xdp = buf_to_xdp(vi, rq, buf, len, true); 1426 if (!xdp) 1427 return; 1428 1429 if (unlikely(len < ETH_HLEN)) { 1430 pr_debug("%s: short packet %i\n", dev->name, len); 1431 DEV_STATS_INC(dev, rx_length_errors); 1432 xsk_buff_free(xdp); 1433 return; 1434 } 1435 1436 flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags; 1437 1438 if (!vi->mergeable_rx_bufs) 1439 skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats); 1440 else 1441 skb = virtnet_receive_xsk_merge(dev, vi, rq, xdp, xdp_xmit, stats); 1442 1443 if (skb) 1444 virtnet_receive_done(vi, rq, skb, flags); 1445 } 1446 1447 static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue *rq, 1448 struct xsk_buff_pool *pool, gfp_t gfp) 1449 { 1450 struct xdp_buff **xsk_buffs; 1451 dma_addr_t addr; 1452 int err = 0; 1453 u32 len, i; 1454 int num; 1455 1456 xsk_buffs = rq->xsk_buffs; 1457 1458 num = xsk_buff_alloc_batch(pool, xsk_buffs, rq->vq->num_free); 1459 if (!num) 1460 return -ENOMEM; 1461 1462 len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len; 1463 1464 for (i = 0; i < num; ++i) { 1465 /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space. 1466 * We assume XDP_PACKET_HEADROOM is larger than hdr->len. 1467 * (see function virtnet_xsk_pool_enable) 1468 */ 1469 addr = xsk_buff_xdp_get_dma(xsk_buffs[i]) - vi->hdr_len; 1470 1471 sg_init_table(rq->sg, 1); 1472 sg_fill_dma(rq->sg, addr, len); 1473 1474 err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, 1475 xsk_buffs[i], NULL, gfp); 1476 if (err) 1477 goto err; 1478 } 1479 1480 return num; 1481 1482 err: 1483 for (; i < num; ++i) 1484 xsk_buff_free(xsk_buffs[i]); 1485 1486 return err; 1487 } 1488 1489 static void *virtnet_xsk_to_ptr(u32 len) 1490 { 1491 unsigned long p; 1492 1493 p = len << VIRTIO_XSK_FLAG_OFFSET; 1494 1495 return virtnet_xmit_ptr_pack((void *)p, VIRTNET_XMIT_TYPE_XSK); 1496 } 1497 1498 static int virtnet_xsk_xmit_one(struct send_queue *sq, 1499 struct xsk_buff_pool *pool, 1500 struct xdp_desc *desc) 1501 { 1502 struct virtnet_info *vi; 1503 dma_addr_t addr; 1504 1505 vi = sq->vq->vdev->priv; 1506 1507 addr = xsk_buff_raw_get_dma(pool, desc->addr); 1508 xsk_buff_raw_dma_sync_for_device(pool, addr, desc->len); 1509 1510 sg_init_table(sq->sg, 2); 1511 sg_fill_dma(sq->sg, sq->xsk_hdr_dma_addr, vi->hdr_len); 1512 sg_fill_dma(sq->sg + 1, addr, desc->len); 1513 1514 return virtqueue_add_outbuf_premapped(sq->vq, sq->sg, 2, 1515 virtnet_xsk_to_ptr(desc->len), 1516 GFP_ATOMIC); 1517 } 1518 1519 static int virtnet_xsk_xmit_batch(struct send_queue *sq, 1520 struct xsk_buff_pool *pool, 1521 unsigned int budget, 1522 u64 *kicks) 1523 { 1524 struct xdp_desc *descs = pool->tx_descs; 1525 bool kick = false; 1526 u32 nb_pkts, i; 1527 int err; 1528 1529 budget = min_t(u32, budget, sq->vq->num_free); 1530 1531 nb_pkts = xsk_tx_peek_release_desc_batch(pool, budget); 1532 if (!nb_pkts) 1533 return 0; 1534 1535 for (i = 0; i < nb_pkts; i++) { 1536 err = virtnet_xsk_xmit_one(sq, pool, &descs[i]); 1537 if (unlikely(err)) { 1538 xsk_tx_completed(sq->xsk_pool, nb_pkts - i); 1539 break; 1540 } 1541 1542 kick = true; 1543 } 1544 1545 if (kick && virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 1546 (*kicks)++; 1547 1548 return i; 1549 } 1550 1551 static bool virtnet_xsk_xmit(struct send_queue *sq, struct xsk_buff_pool *pool, 1552 int budget) 1553 { 1554 struct virtnet_info *vi = sq->vq->vdev->priv; 1555 struct virtnet_sq_free_stats stats = {}; 1556 struct net_device *dev = vi->dev; 1557 u64 kicks = 0; 1558 int sent; 1559 1560 /* Avoid to wakeup napi meanless, so call __free_old_xmit instead of 1561 * free_old_xmit(). 1562 */ 1563 __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), true, &stats); 1564 1565 if (stats.xsk) 1566 xsk_tx_completed(sq->xsk_pool, stats.xsk); 1567 1568 sent = virtnet_xsk_xmit_batch(sq, pool, budget, &kicks); 1569 1570 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 1571 check_sq_full_and_disable(vi, vi->dev, sq); 1572 1573 if (sent) { 1574 struct netdev_queue *txq; 1575 1576 txq = netdev_get_tx_queue(vi->dev, sq - vi->sq); 1577 txq_trans_cond_update(txq); 1578 } 1579 1580 u64_stats_update_begin(&sq->stats.syncp); 1581 u64_stats_add(&sq->stats.packets, stats.packets); 1582 u64_stats_add(&sq->stats.bytes, stats.bytes); 1583 u64_stats_add(&sq->stats.kicks, kicks); 1584 u64_stats_add(&sq->stats.xdp_tx, sent); 1585 u64_stats_update_end(&sq->stats.syncp); 1586 1587 if (xsk_uses_need_wakeup(pool)) 1588 xsk_set_tx_need_wakeup(pool); 1589 1590 return sent; 1591 } 1592 1593 static void xsk_wakeup(struct send_queue *sq) 1594 { 1595 if (napi_if_scheduled_mark_missed(&sq->napi)) 1596 return; 1597 1598 local_bh_disable(); 1599 virtqueue_napi_schedule(&sq->napi, sq->vq); 1600 local_bh_enable(); 1601 } 1602 1603 static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag) 1604 { 1605 struct virtnet_info *vi = netdev_priv(dev); 1606 struct send_queue *sq; 1607 1608 if (!netif_running(dev)) 1609 return -ENETDOWN; 1610 1611 if (qid >= vi->curr_queue_pairs) 1612 return -EINVAL; 1613 1614 sq = &vi->sq[qid]; 1615 1616 xsk_wakeup(sq); 1617 return 0; 1618 } 1619 1620 static void virtnet_xsk_completed(struct send_queue *sq, int num) 1621 { 1622 xsk_tx_completed(sq->xsk_pool, num); 1623 1624 /* If this is called by rx poll, start_xmit and xdp xmit we should 1625 * wakeup the tx napi to consume the xsk tx queue, because the tx 1626 * interrupt may not be triggered. 1627 */ 1628 xsk_wakeup(sq); 1629 } 1630 1631 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, 1632 struct send_queue *sq, 1633 struct xdp_frame *xdpf) 1634 { 1635 struct virtio_net_hdr_mrg_rxbuf *hdr; 1636 struct skb_shared_info *shinfo; 1637 u8 nr_frags = 0; 1638 int err, i; 1639 1640 if (unlikely(xdpf->headroom < vi->hdr_len)) 1641 return -EOVERFLOW; 1642 1643 if (unlikely(xdp_frame_has_frags(xdpf))) { 1644 shinfo = xdp_get_shared_info_from_frame(xdpf); 1645 nr_frags = shinfo->nr_frags; 1646 } 1647 1648 /* In wrapping function virtnet_xdp_xmit(), we need to free 1649 * up the pending old buffers, where we need to calculate the 1650 * position of skb_shared_info in xdp_get_frame_len() and 1651 * xdp_return_frame(), which will involve to xdpf->data and 1652 * xdpf->headroom. Therefore, we need to update the value of 1653 * headroom synchronously here. 1654 */ 1655 xdpf->headroom -= vi->hdr_len; 1656 xdpf->data -= vi->hdr_len; 1657 /* Zero header and leave csum up to XDP layers */ 1658 hdr = xdpf->data; 1659 memset(hdr, 0, vi->hdr_len); 1660 xdpf->len += vi->hdr_len; 1661 1662 sg_init_table(sq->sg, nr_frags + 1); 1663 sg_set_buf(sq->sg, xdpf->data, xdpf->len); 1664 for (i = 0; i < nr_frags; i++) { 1665 skb_frag_t *frag = &shinfo->frags[i]; 1666 1667 sg_set_page(&sq->sg[i + 1], skb_frag_page(frag), 1668 skb_frag_size(frag), skb_frag_off(frag)); 1669 } 1670 1671 err = virtnet_add_outbuf(sq, nr_frags + 1, xdpf, VIRTNET_XMIT_TYPE_XDP); 1672 if (unlikely(err)) 1673 return -ENOSPC; /* Caller handle free/refcnt */ 1674 1675 return 0; 1676 } 1677 1678 /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on 1679 * the current cpu, so it does not need to be locked. 1680 * 1681 * Here we use marco instead of inline functions because we have to deal with 1682 * three issues at the same time: 1. the choice of sq. 2. judge and execute the 1683 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline 1684 * functions to perfectly solve these three problems at the same time. 1685 */ 1686 #define virtnet_xdp_get_sq(vi) ({ \ 1687 int cpu = smp_processor_id(); \ 1688 struct netdev_queue *txq; \ 1689 typeof(vi) v = (vi); \ 1690 unsigned int qp; \ 1691 \ 1692 if (v->curr_queue_pairs > nr_cpu_ids) { \ 1693 qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ 1694 qp += cpu; \ 1695 txq = netdev_get_tx_queue(v->dev, qp); \ 1696 __netif_tx_acquire(txq); \ 1697 } else { \ 1698 qp = cpu % v->curr_queue_pairs; \ 1699 txq = netdev_get_tx_queue(v->dev, qp); \ 1700 __netif_tx_lock(txq, cpu); \ 1701 } \ 1702 v->sq + qp; \ 1703 }) 1704 1705 #define virtnet_xdp_put_sq(vi, q) { \ 1706 struct netdev_queue *txq; \ 1707 typeof(vi) v = (vi); \ 1708 \ 1709 txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ 1710 if (v->curr_queue_pairs > nr_cpu_ids) \ 1711 __netif_tx_release(txq); \ 1712 else \ 1713 __netif_tx_unlock(txq); \ 1714 } 1715 1716 static int virtnet_xdp_xmit(struct net_device *dev, 1717 int n, struct xdp_frame **frames, u32 flags) 1718 { 1719 struct virtnet_info *vi = netdev_priv(dev); 1720 struct virtnet_sq_free_stats stats = {0}; 1721 struct receive_queue *rq = vi->rq; 1722 struct bpf_prog *xdp_prog; 1723 struct send_queue *sq; 1724 int nxmit = 0; 1725 int kicks = 0; 1726 int ret; 1727 int i; 1728 1729 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this 1730 * indicate XDP resources have been successfully allocated. 1731 */ 1732 xdp_prog = rcu_access_pointer(rq->xdp_prog); 1733 if (!xdp_prog) 1734 return -ENXIO; 1735 1736 sq = virtnet_xdp_get_sq(vi); 1737 1738 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { 1739 ret = -EINVAL; 1740 goto out; 1741 } 1742 1743 /* Free up any pending old buffers before queueing new ones. */ 1744 virtnet_free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), 1745 false, &stats); 1746 1747 for (i = 0; i < n; i++) { 1748 struct xdp_frame *xdpf = frames[i]; 1749 1750 if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) 1751 break; 1752 nxmit++; 1753 } 1754 ret = nxmit; 1755 1756 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 1757 check_sq_full_and_disable(vi, dev, sq); 1758 1759 if (flags & XDP_XMIT_FLUSH) { 1760 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 1761 kicks = 1; 1762 } 1763 out: 1764 u64_stats_update_begin(&sq->stats.syncp); 1765 u64_stats_add(&sq->stats.bytes, stats.bytes); 1766 u64_stats_add(&sq->stats.packets, stats.packets); 1767 u64_stats_add(&sq->stats.xdp_tx, n); 1768 u64_stats_add(&sq->stats.xdp_tx_drops, n - nxmit); 1769 u64_stats_add(&sq->stats.kicks, kicks); 1770 u64_stats_update_end(&sq->stats.syncp); 1771 1772 virtnet_xdp_put_sq(vi, sq); 1773 return ret; 1774 } 1775 1776 static void put_xdp_frags(struct xdp_buff *xdp) 1777 { 1778 struct skb_shared_info *shinfo; 1779 struct page *xdp_page; 1780 int i; 1781 1782 if (xdp_buff_has_frags(xdp)) { 1783 shinfo = xdp_get_shared_info_from_buff(xdp); 1784 for (i = 0; i < shinfo->nr_frags; i++) { 1785 xdp_page = skb_frag_page(&shinfo->frags[i]); 1786 put_page(xdp_page); 1787 } 1788 } 1789 } 1790 1791 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 1792 struct net_device *dev, 1793 unsigned int *xdp_xmit, 1794 struct virtnet_rq_stats *stats) 1795 { 1796 struct xdp_frame *xdpf; 1797 int err; 1798 u32 act; 1799 1800 act = bpf_prog_run_xdp(xdp_prog, xdp); 1801 u64_stats_inc(&stats->xdp_packets); 1802 1803 switch (act) { 1804 case XDP_PASS: 1805 return act; 1806 1807 case XDP_TX: 1808 u64_stats_inc(&stats->xdp_tx); 1809 xdpf = xdp_convert_buff_to_frame(xdp); 1810 if (unlikely(!xdpf)) { 1811 netdev_dbg(dev, "convert buff to frame failed for xdp\n"); 1812 return XDP_DROP; 1813 } 1814 1815 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); 1816 if (unlikely(!err)) { 1817 xdp_return_frame_rx_napi(xdpf); 1818 } else if (unlikely(err < 0)) { 1819 trace_xdp_exception(dev, xdp_prog, act); 1820 return XDP_DROP; 1821 } 1822 *xdp_xmit |= VIRTIO_XDP_TX; 1823 return act; 1824 1825 case XDP_REDIRECT: 1826 u64_stats_inc(&stats->xdp_redirects); 1827 err = xdp_do_redirect(dev, xdp, xdp_prog); 1828 if (err) 1829 return XDP_DROP; 1830 1831 *xdp_xmit |= VIRTIO_XDP_REDIR; 1832 return act; 1833 1834 default: 1835 bpf_warn_invalid_xdp_action(dev, xdp_prog, act); 1836 fallthrough; 1837 case XDP_ABORTED: 1838 trace_xdp_exception(dev, xdp_prog, act); 1839 fallthrough; 1840 case XDP_DROP: 1841 return XDP_DROP; 1842 } 1843 } 1844 1845 static unsigned int virtnet_get_headroom(struct virtnet_info *vi) 1846 { 1847 return vi->xdp_enabled ? XDP_PACKET_HEADROOM : 0; 1848 } 1849 1850 /* We copy the packet for XDP in the following cases: 1851 * 1852 * 1) Packet is scattered across multiple rx buffers. 1853 * 2) Headroom space is insufficient. 1854 * 1855 * This is inefficient but it's a temporary condition that 1856 * we hit right after XDP is enabled and until queue is refilled 1857 * with large buffers with sufficient headroom - so it should affect 1858 * at most queue size packets. 1859 * Afterwards, the conditions to enable 1860 * XDP should preclude the underlying device from sending packets 1861 * across multiple buffers (num_buf > 1), and we make sure buffers 1862 * have enough headroom. 1863 */ 1864 static struct page *xdp_linearize_page(struct net_device *dev, 1865 struct receive_queue *rq, 1866 int *num_buf, 1867 struct page *p, 1868 int offset, 1869 int page_off, 1870 unsigned int *len) 1871 { 1872 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1873 struct page *page; 1874 1875 if (page_off + *len + tailroom > PAGE_SIZE) 1876 return NULL; 1877 1878 page = alloc_page(GFP_ATOMIC); 1879 if (!page) 1880 return NULL; 1881 1882 memcpy(page_address(page) + page_off, page_address(p) + offset, *len); 1883 page_off += *len; 1884 1885 /* Only mergeable mode can go inside this while loop. In small mode, 1886 * *num_buf == 1, so it cannot go inside. 1887 */ 1888 while (--*num_buf) { 1889 unsigned int buflen; 1890 void *buf; 1891 void *ctx; 1892 int off; 1893 1894 buf = virtnet_rq_get_buf(rq, &buflen, &ctx); 1895 if (unlikely(!buf)) 1896 goto err_buf; 1897 1898 p = virt_to_head_page(buf); 1899 off = buf - page_address(p); 1900 1901 if (check_mergeable_len(dev, ctx, buflen)) { 1902 put_page(p); 1903 goto err_buf; 1904 } 1905 1906 /* guard against a misconfigured or uncooperative backend that 1907 * is sending packet larger than the MTU. 1908 */ 1909 if ((page_off + buflen + tailroom) > PAGE_SIZE) { 1910 put_page(p); 1911 goto err_buf; 1912 } 1913 1914 memcpy(page_address(page) + page_off, 1915 page_address(p) + off, buflen); 1916 page_off += buflen; 1917 put_page(p); 1918 } 1919 1920 /* Headroom does not contribute to packet length */ 1921 *len = page_off - XDP_PACKET_HEADROOM; 1922 return page; 1923 err_buf: 1924 __free_pages(page, 0); 1925 return NULL; 1926 } 1927 1928 static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi, 1929 unsigned int xdp_headroom, 1930 void *buf, 1931 unsigned int len) 1932 { 1933 unsigned int header_offset; 1934 unsigned int headroom; 1935 unsigned int buflen; 1936 struct sk_buff *skb; 1937 1938 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1939 headroom = vi->hdr_len + header_offset; 1940 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1941 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1942 1943 skb = virtnet_build_skb(buf, buflen, headroom, len); 1944 if (unlikely(!skb)) 1945 return NULL; 1946 1947 buf += header_offset; 1948 memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len); 1949 1950 return skb; 1951 } 1952 1953 static struct sk_buff *receive_small_xdp(struct net_device *dev, 1954 struct virtnet_info *vi, 1955 struct receive_queue *rq, 1956 struct bpf_prog *xdp_prog, 1957 void *buf, 1958 unsigned int xdp_headroom, 1959 unsigned int len, 1960 unsigned int *xdp_xmit, 1961 struct virtnet_rq_stats *stats) 1962 { 1963 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom; 1964 unsigned int headroom = vi->hdr_len + header_offset; 1965 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; 1966 struct page *page = virt_to_head_page(buf); 1967 struct page *xdp_page; 1968 unsigned int buflen; 1969 struct xdp_buff xdp; 1970 struct sk_buff *skb; 1971 unsigned int metasize = 0; 1972 u32 act; 1973 1974 if (unlikely(hdr->hdr.gso_type)) 1975 goto err_xdp; 1976 1977 /* Partially checksummed packets must be dropped. */ 1978 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 1979 goto err_xdp; 1980 1981 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1982 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1983 1984 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { 1985 int offset = buf - page_address(page) + header_offset; 1986 unsigned int tlen = len + vi->hdr_len; 1987 int num_buf = 1; 1988 1989 xdp_headroom = virtnet_get_headroom(vi); 1990 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1991 headroom = vi->hdr_len + header_offset; 1992 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1993 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1994 xdp_page = xdp_linearize_page(dev, rq, &num_buf, page, 1995 offset, header_offset, 1996 &tlen); 1997 if (!xdp_page) 1998 goto err_xdp; 1999 2000 buf = page_address(xdp_page); 2001 put_page(page); 2002 page = xdp_page; 2003 } 2004 2005 xdp_init_buff(&xdp, buflen, &rq->xdp_rxq); 2006 xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len, 2007 xdp_headroom, len, true); 2008 2009 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 2010 2011 switch (act) { 2012 case XDP_PASS: 2013 /* Recalculate length in case bpf program changed it */ 2014 len = xdp.data_end - xdp.data; 2015 metasize = xdp.data - xdp.data_meta; 2016 break; 2017 2018 case XDP_TX: 2019 case XDP_REDIRECT: 2020 goto xdp_xmit; 2021 2022 default: 2023 goto err_xdp; 2024 } 2025 2026 skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len); 2027 if (unlikely(!skb)) 2028 goto err; 2029 2030 if (metasize) 2031 skb_metadata_set(skb, metasize); 2032 2033 return skb; 2034 2035 err_xdp: 2036 u64_stats_inc(&stats->xdp_drops); 2037 err: 2038 u64_stats_inc(&stats->drops); 2039 put_page(page); 2040 xdp_xmit: 2041 return NULL; 2042 } 2043 2044 static struct sk_buff *receive_small(struct net_device *dev, 2045 struct virtnet_info *vi, 2046 struct receive_queue *rq, 2047 void *buf, void *ctx, 2048 unsigned int len, 2049 unsigned int *xdp_xmit, 2050 struct virtnet_rq_stats *stats) 2051 { 2052 unsigned int xdp_headroom = (unsigned long)ctx; 2053 struct page *page = virt_to_head_page(buf); 2054 struct sk_buff *skb; 2055 2056 /* We passed the address of virtnet header to virtio-core, 2057 * so truncate the padding. 2058 */ 2059 buf -= VIRTNET_RX_PAD + xdp_headroom; 2060 2061 len -= vi->hdr_len; 2062 u64_stats_add(&stats->bytes, len); 2063 2064 if (unlikely(len > GOOD_PACKET_LEN)) { 2065 pr_debug("%s: rx error: len %u exceeds max size %d\n", 2066 dev->name, len, GOOD_PACKET_LEN); 2067 DEV_STATS_INC(dev, rx_length_errors); 2068 goto err; 2069 } 2070 2071 if (unlikely(vi->xdp_enabled)) { 2072 struct bpf_prog *xdp_prog; 2073 2074 rcu_read_lock(); 2075 xdp_prog = rcu_dereference(rq->xdp_prog); 2076 if (xdp_prog) { 2077 skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf, 2078 xdp_headroom, len, xdp_xmit, 2079 stats); 2080 rcu_read_unlock(); 2081 return skb; 2082 } 2083 rcu_read_unlock(); 2084 } 2085 2086 skb = receive_small_build_skb(vi, xdp_headroom, buf, len); 2087 if (likely(skb)) 2088 return skb; 2089 2090 err: 2091 u64_stats_inc(&stats->drops); 2092 put_page(page); 2093 return NULL; 2094 } 2095 2096 static struct sk_buff *receive_big(struct net_device *dev, 2097 struct virtnet_info *vi, 2098 struct receive_queue *rq, 2099 void *buf, 2100 unsigned int len, 2101 struct virtnet_rq_stats *stats) 2102 { 2103 struct page *page = buf; 2104 struct sk_buff *skb; 2105 2106 /* Make sure that len does not exceed the size allocated in 2107 * add_recvbuf_big. 2108 */ 2109 if (unlikely(len > (vi->big_packets_num_skbfrags + 1) * PAGE_SIZE)) { 2110 pr_debug("%s: rx error: len %u exceeds allocated size %lu\n", 2111 dev->name, len, 2112 (vi->big_packets_num_skbfrags + 1) * PAGE_SIZE); 2113 goto err; 2114 } 2115 2116 skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); 2117 u64_stats_add(&stats->bytes, len - vi->hdr_len); 2118 if (unlikely(!skb)) 2119 goto err; 2120 2121 return skb; 2122 2123 err: 2124 u64_stats_inc(&stats->drops); 2125 give_pages(rq, page); 2126 return NULL; 2127 } 2128 2129 static void mergeable_buf_free(struct receive_queue *rq, int num_buf, 2130 struct net_device *dev, 2131 struct virtnet_rq_stats *stats) 2132 { 2133 struct page *page; 2134 void *buf; 2135 int len; 2136 2137 while (num_buf-- > 1) { 2138 buf = virtnet_rq_get_buf(rq, &len, NULL); 2139 if (unlikely(!buf)) { 2140 pr_debug("%s: rx error: %d buffers missing\n", 2141 dev->name, num_buf); 2142 DEV_STATS_INC(dev, rx_length_errors); 2143 break; 2144 } 2145 u64_stats_add(&stats->bytes, len); 2146 page = virt_to_head_page(buf); 2147 put_page(page); 2148 } 2149 } 2150 2151 /* Why not use xdp_build_skb_from_frame() ? 2152 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in 2153 * virtio-net there are 2 points that do not match its requirements: 2154 * 1. The size of the prefilled buffer is not fixed before xdp is set. 2155 * 2. xdp_build_skb_from_frame() does more checks that we don't need, 2156 * like eth_type_trans() (which virtio-net does in receive_buf()). 2157 */ 2158 static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, 2159 struct virtnet_info *vi, 2160 struct xdp_buff *xdp, 2161 unsigned int xdp_frags_truesz) 2162 { 2163 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 2164 unsigned int headroom, data_len; 2165 struct sk_buff *skb; 2166 int metasize; 2167 u8 nr_frags; 2168 2169 if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { 2170 pr_debug("Error building skb as missing reserved tailroom for xdp"); 2171 return NULL; 2172 } 2173 2174 if (unlikely(xdp_buff_has_frags(xdp))) 2175 nr_frags = sinfo->nr_frags; 2176 2177 skb = build_skb(xdp->data_hard_start, xdp->frame_sz); 2178 if (unlikely(!skb)) 2179 return NULL; 2180 2181 headroom = xdp->data - xdp->data_hard_start; 2182 data_len = xdp->data_end - xdp->data; 2183 skb_reserve(skb, headroom); 2184 __skb_put(skb, data_len); 2185 2186 metasize = xdp->data - xdp->data_meta; 2187 metasize = metasize > 0 ? metasize : 0; 2188 if (metasize) 2189 skb_metadata_set(skb, metasize); 2190 2191 if (unlikely(xdp_buff_has_frags(xdp))) 2192 xdp_update_skb_frags_info(skb, nr_frags, sinfo->xdp_frags_size, 2193 xdp_frags_truesz, 2194 xdp_buff_get_skb_flags(xdp)); 2195 2196 return skb; 2197 } 2198 2199 /* TODO: build xdp in big mode */ 2200 static int virtnet_build_xdp_buff_mrg(struct net_device *dev, 2201 struct virtnet_info *vi, 2202 struct receive_queue *rq, 2203 struct xdp_buff *xdp, 2204 void *buf, 2205 unsigned int len, 2206 unsigned int frame_sz, 2207 int *num_buf, 2208 unsigned int *xdp_frags_truesize, 2209 struct virtnet_rq_stats *stats) 2210 { 2211 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2212 struct skb_shared_info *shinfo; 2213 unsigned int xdp_frags_truesz = 0; 2214 unsigned int truesize; 2215 struct page *page; 2216 skb_frag_t *frag; 2217 int offset; 2218 void *ctx; 2219 2220 xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 2221 xdp_prepare_buff(xdp, buf - XDP_PACKET_HEADROOM, 2222 XDP_PACKET_HEADROOM + vi->hdr_len, len - vi->hdr_len, true); 2223 2224 if (!*num_buf) 2225 return 0; 2226 2227 if (*num_buf > 1) { 2228 /* If we want to build multi-buffer xdp, we need 2229 * to specify that the flags of xdp_buff have the 2230 * XDP_FLAGS_HAS_FRAG bit. 2231 */ 2232 if (!xdp_buff_has_frags(xdp)) 2233 xdp_buff_set_frags_flag(xdp); 2234 2235 shinfo = xdp_get_shared_info_from_buff(xdp); 2236 shinfo->nr_frags = 0; 2237 shinfo->xdp_frags_size = 0; 2238 } 2239 2240 if (*num_buf > MAX_SKB_FRAGS + 1) 2241 return -EINVAL; 2242 2243 while (--*num_buf > 0) { 2244 buf = virtnet_rq_get_buf(rq, &len, &ctx); 2245 if (unlikely(!buf)) { 2246 pr_debug("%s: rx error: %d buffers out of %d missing\n", 2247 dev->name, *num_buf, 2248 virtio16_to_cpu(vi->vdev, hdr->num_buffers)); 2249 DEV_STATS_INC(dev, rx_length_errors); 2250 goto err; 2251 } 2252 2253 u64_stats_add(&stats->bytes, len); 2254 page = virt_to_head_page(buf); 2255 offset = buf - page_address(page); 2256 2257 if (check_mergeable_len(dev, ctx, len)) { 2258 put_page(page); 2259 goto err; 2260 } 2261 2262 truesize = mergeable_ctx_to_truesize(ctx); 2263 xdp_frags_truesz += truesize; 2264 2265 frag = &shinfo->frags[shinfo->nr_frags++]; 2266 skb_frag_fill_page_desc(frag, page, offset, len); 2267 if (page_is_pfmemalloc(page)) 2268 xdp_buff_set_frag_pfmemalloc(xdp); 2269 2270 shinfo->xdp_frags_size += len; 2271 } 2272 2273 *xdp_frags_truesize = xdp_frags_truesz; 2274 return 0; 2275 2276 err: 2277 put_xdp_frags(xdp); 2278 return -EINVAL; 2279 } 2280 2281 static void *mergeable_xdp_get_buf(struct virtnet_info *vi, 2282 struct receive_queue *rq, 2283 struct bpf_prog *xdp_prog, 2284 void *ctx, 2285 unsigned int *frame_sz, 2286 int *num_buf, 2287 struct page **page, 2288 int offset, 2289 unsigned int *len, 2290 struct virtio_net_hdr_mrg_rxbuf *hdr) 2291 { 2292 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2293 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2294 struct page *xdp_page; 2295 unsigned int xdp_room; 2296 2297 /* Transient failure which in theory could occur if 2298 * in-flight packets from before XDP was enabled reach 2299 * the receive path after XDP is loaded. 2300 */ 2301 if (unlikely(hdr->hdr.gso_type)) 2302 return NULL; 2303 2304 /* Partially checksummed packets must be dropped. */ 2305 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 2306 return NULL; 2307 2308 /* Now XDP core assumes frag size is PAGE_SIZE, but buffers 2309 * with headroom may add hole in truesize, which 2310 * make their length exceed PAGE_SIZE. So we disabled the 2311 * hole mechanism for xdp. See add_recvbuf_mergeable(). 2312 */ 2313 *frame_sz = truesize; 2314 2315 if (likely(headroom >= virtnet_get_headroom(vi) && 2316 (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) { 2317 return page_address(*page) + offset; 2318 } 2319 2320 /* This happens when headroom is not enough because 2321 * of the buffer was prefilled before XDP is set. 2322 * This should only happen for the first several packets. 2323 * In fact, vq reset can be used here to help us clean up 2324 * the prefilled buffers, but many existing devices do not 2325 * support it, and we don't want to bother users who are 2326 * using xdp normally. 2327 */ 2328 if (!xdp_prog->aux->xdp_has_frags) { 2329 /* linearize data for XDP */ 2330 xdp_page = xdp_linearize_page(vi->dev, rq, num_buf, 2331 *page, offset, 2332 XDP_PACKET_HEADROOM, 2333 len); 2334 if (!xdp_page) 2335 return NULL; 2336 } else { 2337 xdp_room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 2338 sizeof(struct skb_shared_info)); 2339 if (*len + xdp_room > PAGE_SIZE) 2340 return NULL; 2341 2342 xdp_page = alloc_page(GFP_ATOMIC); 2343 if (!xdp_page) 2344 return NULL; 2345 2346 memcpy(page_address(xdp_page) + XDP_PACKET_HEADROOM, 2347 page_address(*page) + offset, *len); 2348 } 2349 2350 *frame_sz = PAGE_SIZE; 2351 2352 put_page(*page); 2353 2354 *page = xdp_page; 2355 2356 return page_address(*page) + XDP_PACKET_HEADROOM; 2357 } 2358 2359 static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, 2360 struct virtnet_info *vi, 2361 struct receive_queue *rq, 2362 struct bpf_prog *xdp_prog, 2363 void *buf, 2364 void *ctx, 2365 unsigned int len, 2366 unsigned int *xdp_xmit, 2367 struct virtnet_rq_stats *stats) 2368 { 2369 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2370 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2371 struct page *page = virt_to_head_page(buf); 2372 int offset = buf - page_address(page); 2373 unsigned int xdp_frags_truesz = 0; 2374 struct sk_buff *head_skb; 2375 unsigned int frame_sz; 2376 struct xdp_buff xdp; 2377 void *data; 2378 u32 act; 2379 int err; 2380 2381 data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, &frame_sz, &num_buf, &page, 2382 offset, &len, hdr); 2383 if (unlikely(!data)) 2384 goto err_xdp; 2385 2386 err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz, 2387 &num_buf, &xdp_frags_truesz, stats); 2388 if (unlikely(err)) 2389 goto err_xdp; 2390 2391 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 2392 2393 switch (act) { 2394 case XDP_PASS: 2395 head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); 2396 if (unlikely(!head_skb)) 2397 break; 2398 return head_skb; 2399 2400 case XDP_TX: 2401 case XDP_REDIRECT: 2402 return NULL; 2403 2404 default: 2405 break; 2406 } 2407 2408 put_xdp_frags(&xdp); 2409 2410 err_xdp: 2411 put_page(page); 2412 mergeable_buf_free(rq, num_buf, dev, stats); 2413 2414 u64_stats_inc(&stats->xdp_drops); 2415 u64_stats_inc(&stats->drops); 2416 return NULL; 2417 } 2418 2419 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 2420 struct sk_buff *curr_skb, 2421 struct page *page, void *buf, 2422 int len, int truesize) 2423 { 2424 int num_skb_frags; 2425 int offset; 2426 2427 num_skb_frags = skb_shinfo(curr_skb)->nr_frags; 2428 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { 2429 struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); 2430 2431 if (unlikely(!nskb)) 2432 return NULL; 2433 2434 if (curr_skb == head_skb) 2435 skb_shinfo(curr_skb)->frag_list = nskb; 2436 else 2437 curr_skb->next = nskb; 2438 curr_skb = nskb; 2439 head_skb->truesize += nskb->truesize; 2440 num_skb_frags = 0; 2441 } 2442 2443 if (curr_skb != head_skb) { 2444 head_skb->data_len += len; 2445 head_skb->len += len; 2446 head_skb->truesize += truesize; 2447 } 2448 2449 offset = buf - page_address(page); 2450 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { 2451 put_page(page); 2452 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, 2453 len, truesize); 2454 } else { 2455 skb_add_rx_frag(curr_skb, num_skb_frags, page, 2456 offset, len, truesize); 2457 } 2458 2459 return curr_skb; 2460 } 2461 2462 static struct sk_buff *receive_mergeable(struct net_device *dev, 2463 struct virtnet_info *vi, 2464 struct receive_queue *rq, 2465 void *buf, 2466 void *ctx, 2467 unsigned int len, 2468 unsigned int *xdp_xmit, 2469 struct virtnet_rq_stats *stats) 2470 { 2471 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2472 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2473 struct page *page = virt_to_head_page(buf); 2474 int offset = buf - page_address(page); 2475 struct sk_buff *head_skb, *curr_skb; 2476 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2477 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2478 2479 head_skb = NULL; 2480 u64_stats_add(&stats->bytes, len - vi->hdr_len); 2481 2482 if (check_mergeable_len(dev, ctx, len)) 2483 goto err_skb; 2484 2485 if (unlikely(vi->xdp_enabled)) { 2486 struct bpf_prog *xdp_prog; 2487 2488 rcu_read_lock(); 2489 xdp_prog = rcu_dereference(rq->xdp_prog); 2490 if (xdp_prog) { 2491 head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx, 2492 len, xdp_xmit, stats); 2493 rcu_read_unlock(); 2494 return head_skb; 2495 } 2496 rcu_read_unlock(); 2497 } 2498 2499 head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom); 2500 curr_skb = head_skb; 2501 2502 if (unlikely(!curr_skb)) 2503 goto err_skb; 2504 while (--num_buf) { 2505 buf = virtnet_rq_get_buf(rq, &len, &ctx); 2506 if (unlikely(!buf)) { 2507 pr_debug("%s: rx error: %d buffers out of %d missing\n", 2508 dev->name, num_buf, 2509 virtio16_to_cpu(vi->vdev, 2510 hdr->num_buffers)); 2511 DEV_STATS_INC(dev, rx_length_errors); 2512 goto err_buf; 2513 } 2514 2515 u64_stats_add(&stats->bytes, len); 2516 page = virt_to_head_page(buf); 2517 2518 if (check_mergeable_len(dev, ctx, len)) 2519 goto err_skb; 2520 2521 truesize = mergeable_ctx_to_truesize(ctx); 2522 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 2523 buf, len, truesize); 2524 if (!curr_skb) 2525 goto err_skb; 2526 } 2527 2528 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); 2529 return head_skb; 2530 2531 err_skb: 2532 put_page(page); 2533 mergeable_buf_free(rq, num_buf, dev, stats); 2534 2535 err_buf: 2536 u64_stats_inc(&stats->drops); 2537 dev_kfree_skb(head_skb); 2538 return NULL; 2539 } 2540 2541 static inline u32 2542 virtio_net_hash_value(const struct virtio_net_hdr_v1_hash *hdr_hash) 2543 { 2544 return __le16_to_cpu(hdr_hash->hash_value_lo) | 2545 (__le16_to_cpu(hdr_hash->hash_value_hi) << 16); 2546 } 2547 2548 static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, 2549 struct sk_buff *skb) 2550 { 2551 enum pkt_hash_types rss_hash_type; 2552 2553 if (!hdr_hash || !skb) 2554 return; 2555 2556 switch (__le16_to_cpu(hdr_hash->hash_report)) { 2557 case VIRTIO_NET_HASH_REPORT_TCPv4: 2558 case VIRTIO_NET_HASH_REPORT_UDPv4: 2559 case VIRTIO_NET_HASH_REPORT_TCPv6: 2560 case VIRTIO_NET_HASH_REPORT_UDPv6: 2561 case VIRTIO_NET_HASH_REPORT_TCPv6_EX: 2562 case VIRTIO_NET_HASH_REPORT_UDPv6_EX: 2563 rss_hash_type = PKT_HASH_TYPE_L4; 2564 break; 2565 case VIRTIO_NET_HASH_REPORT_IPv4: 2566 case VIRTIO_NET_HASH_REPORT_IPv6: 2567 case VIRTIO_NET_HASH_REPORT_IPv6_EX: 2568 rss_hash_type = PKT_HASH_TYPE_L3; 2569 break; 2570 case VIRTIO_NET_HASH_REPORT_NONE: 2571 default: 2572 rss_hash_type = PKT_HASH_TYPE_NONE; 2573 } 2574 skb_set_hash(skb, virtio_net_hash_value(hdr_hash), rss_hash_type); 2575 } 2576 2577 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 2578 struct sk_buff *skb, u8 flags) 2579 { 2580 struct virtio_net_common_hdr *hdr; 2581 struct net_device *dev = vi->dev; 2582 2583 hdr = skb_vnet_common_hdr(skb); 2584 if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) 2585 virtio_skb_set_hash(&hdr->hash_v1_hdr, skb); 2586 2587 hdr->hdr.flags = flags; 2588 if (virtio_net_handle_csum_offload(skb, &hdr->hdr, vi->rx_tnl_csum)) { 2589 net_warn_ratelimited("%s: bad csum: flags: %x, gso_type: %x rx_tnl_csum %d\n", 2590 dev->name, hdr->hdr.flags, 2591 hdr->hdr.gso_type, vi->rx_tnl_csum); 2592 goto frame_err; 2593 } 2594 2595 if (virtio_net_hdr_tnl_to_skb(skb, &hdr->tnl_hdr, vi->rx_tnl, 2596 vi->rx_tnl_csum, 2597 virtio_is_little_endian(vi->vdev))) { 2598 net_warn_ratelimited("%s: bad gso: type: %x, size: %u, flags %x tunnel %d tnl csum %d\n", 2599 dev->name, hdr->hdr.gso_type, 2600 hdr->hdr.gso_size, hdr->hdr.flags, 2601 vi->rx_tnl, vi->rx_tnl_csum); 2602 goto frame_err; 2603 } 2604 2605 skb_record_rx_queue(skb, vq2rxq(rq->vq)); 2606 skb->protocol = eth_type_trans(skb, dev); 2607 pr_debug("Receiving skb proto 0x%04x len %i type %i\n", 2608 ntohs(skb->protocol), skb->len, skb->pkt_type); 2609 2610 napi_gro_receive(&rq->napi, skb); 2611 return; 2612 2613 frame_err: 2614 DEV_STATS_INC(dev, rx_frame_errors); 2615 dev_kfree_skb(skb); 2616 } 2617 2618 static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, 2619 void *buf, unsigned int len, void **ctx, 2620 unsigned int *xdp_xmit, 2621 struct virtnet_rq_stats *stats) 2622 { 2623 struct net_device *dev = vi->dev; 2624 struct sk_buff *skb; 2625 u8 flags; 2626 2627 if (unlikely(len < vi->hdr_len + ETH_HLEN)) { 2628 pr_debug("%s: short packet %i\n", dev->name, len); 2629 DEV_STATS_INC(dev, rx_length_errors); 2630 virtnet_rq_free_buf(vi, rq, buf); 2631 return; 2632 } 2633 2634 /* About the flags below: 2635 * 1. Save the flags early, as the XDP program might overwrite them. 2636 * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID 2637 * stay valid after XDP processing. 2638 * 2. XDP doesn't work with partially checksummed packets (refer to 2639 * virtnet_xdp_set()), so packets marked as 2640 * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing. 2641 */ 2642 2643 if (vi->mergeable_rx_bufs) { 2644 flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; 2645 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, 2646 stats); 2647 } else if (vi->big_packets) { 2648 void *p = page_address((struct page *)buf); 2649 2650 flags = ((struct virtio_net_common_hdr *)p)->hdr.flags; 2651 skb = receive_big(dev, vi, rq, buf, len, stats); 2652 } else { 2653 flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; 2654 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); 2655 } 2656 2657 if (unlikely(!skb)) 2658 return; 2659 2660 virtnet_receive_done(vi, rq, skb, flags); 2661 } 2662 2663 /* Unlike mergeable buffers, all buffers are allocated to the 2664 * same size, except for the headroom. For this reason we do 2665 * not need to use mergeable_len_to_ctx here - it is enough 2666 * to store the headroom as the context ignoring the truesize. 2667 */ 2668 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, 2669 gfp_t gfp) 2670 { 2671 char *buf; 2672 unsigned int xdp_headroom = virtnet_get_headroom(vi); 2673 void *ctx = (void *)(unsigned long)xdp_headroom; 2674 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; 2675 int err; 2676 2677 len = SKB_DATA_ALIGN(len) + 2678 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 2679 2680 if (unlikely(!skb_page_frag_refill(len, &rq->alloc_frag, gfp))) 2681 return -ENOMEM; 2682 2683 buf = virtnet_rq_alloc(rq, len, gfp); 2684 if (unlikely(!buf)) 2685 return -ENOMEM; 2686 2687 buf += VIRTNET_RX_PAD + xdp_headroom; 2688 2689 virtnet_rq_init_one_sg(rq, buf, vi->hdr_len + GOOD_PACKET_LEN); 2690 2691 err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, buf, ctx, gfp); 2692 if (err < 0) { 2693 virtnet_rq_unmap(rq, buf, 0); 2694 put_page(virt_to_head_page(buf)); 2695 } 2696 2697 return err; 2698 } 2699 2700 static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, 2701 gfp_t gfp) 2702 { 2703 struct page *first, *list = NULL; 2704 char *p; 2705 int i, err, offset; 2706 2707 sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2); 2708 2709 /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */ 2710 for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) { 2711 first = get_a_page(rq, gfp); 2712 if (!first) { 2713 if (list) 2714 give_pages(rq, list); 2715 return -ENOMEM; 2716 } 2717 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE); 2718 2719 /* chain new page in list head to match sg */ 2720 first->private = (unsigned long)list; 2721 list = first; 2722 } 2723 2724 first = get_a_page(rq, gfp); 2725 if (!first) { 2726 give_pages(rq, list); 2727 return -ENOMEM; 2728 } 2729 p = page_address(first); 2730 2731 /* rq->sg[0], rq->sg[1] share the same page */ 2732 /* a separated rq->sg[0] for header - required in case !any_header_sg */ 2733 sg_set_buf(&rq->sg[0], p, vi->hdr_len); 2734 2735 /* rq->sg[1] for data packet, from offset */ 2736 offset = sizeof(struct padded_vnet_hdr); 2737 sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset); 2738 2739 /* chain first in list head */ 2740 first->private = (unsigned long)list; 2741 err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2, 2742 first, gfp); 2743 if (err < 0) 2744 give_pages(rq, first); 2745 2746 return err; 2747 } 2748 2749 static unsigned int get_mergeable_buf_len(struct receive_queue *rq, 2750 struct ewma_pkt_len *avg_pkt_len, 2751 unsigned int room) 2752 { 2753 struct virtnet_info *vi = rq->vq->vdev->priv; 2754 const size_t hdr_len = vi->hdr_len; 2755 unsigned int len; 2756 2757 if (room) 2758 return PAGE_SIZE - room; 2759 2760 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), 2761 rq->min_buf_len, PAGE_SIZE - hdr_len); 2762 2763 return ALIGN(len, L1_CACHE_BYTES); 2764 } 2765 2766 static int add_recvbuf_mergeable(struct virtnet_info *vi, 2767 struct receive_queue *rq, gfp_t gfp) 2768 { 2769 struct page_frag *alloc_frag = &rq->alloc_frag; 2770 unsigned int headroom = virtnet_get_headroom(vi); 2771 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2772 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 2773 unsigned int len, hole; 2774 void *ctx; 2775 char *buf; 2776 int err; 2777 2778 /* Extra tailroom is needed to satisfy XDP's assumption. This 2779 * means rx frags coalescing won't work, but consider we've 2780 * disabled GSO for XDP, it won't be a big issue. 2781 */ 2782 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); 2783 2784 if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) 2785 return -ENOMEM; 2786 2787 if (!alloc_frag->offset && len + room + sizeof(struct virtnet_rq_dma) > alloc_frag->size) 2788 len -= sizeof(struct virtnet_rq_dma); 2789 2790 buf = virtnet_rq_alloc(rq, len + room, gfp); 2791 if (unlikely(!buf)) 2792 return -ENOMEM; 2793 2794 buf += headroom; /* advance address leaving hole at front of pkt */ 2795 hole = alloc_frag->size - alloc_frag->offset; 2796 if (hole < len + room) { 2797 /* To avoid internal fragmentation, if there is very likely not 2798 * enough space for another buffer, add the remaining space to 2799 * the current buffer. 2800 * XDP core assumes that frame_size of xdp_buff and the length 2801 * of the frag are PAGE_SIZE, so we disable the hole mechanism. 2802 */ 2803 if (!headroom) 2804 len += hole; 2805 alloc_frag->offset += hole; 2806 } 2807 2808 virtnet_rq_init_one_sg(rq, buf, len); 2809 2810 ctx = mergeable_len_to_ctx(len + room, headroom); 2811 err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, buf, ctx, gfp); 2812 if (err < 0) { 2813 virtnet_rq_unmap(rq, buf, 0); 2814 put_page(virt_to_head_page(buf)); 2815 } 2816 2817 return err; 2818 } 2819 2820 /* 2821 * Returns false if we couldn't fill entirely (OOM). 2822 * 2823 * Normally run in the receive path, but can also be run from ndo_open 2824 * before we're receiving packets, or from refill_work which is 2825 * careful to disable receiving (using napi_disable). 2826 */ 2827 static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, 2828 gfp_t gfp) 2829 { 2830 int err; 2831 2832 if (rq->xsk_pool) { 2833 err = virtnet_add_recvbuf_xsk(vi, rq, rq->xsk_pool, gfp); 2834 goto kick; 2835 } 2836 2837 do { 2838 if (vi->mergeable_rx_bufs) 2839 err = add_recvbuf_mergeable(vi, rq, gfp); 2840 else if (vi->big_packets) 2841 err = add_recvbuf_big(vi, rq, gfp); 2842 else 2843 err = add_recvbuf_small(vi, rq, gfp); 2844 2845 if (err) 2846 break; 2847 } while (rq->vq->num_free); 2848 2849 kick: 2850 if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { 2851 unsigned long flags; 2852 2853 flags = u64_stats_update_begin_irqsave(&rq->stats.syncp); 2854 u64_stats_inc(&rq->stats.kicks); 2855 u64_stats_update_end_irqrestore(&rq->stats.syncp, flags); 2856 } 2857 2858 return err != -ENOMEM; 2859 } 2860 2861 static void skb_recv_done(struct virtqueue *rvq) 2862 { 2863 struct virtnet_info *vi = rvq->vdev->priv; 2864 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)]; 2865 2866 rq->calls++; 2867 virtqueue_napi_schedule(&rq->napi, rvq); 2868 } 2869 2870 static void virtnet_napi_do_enable(struct virtqueue *vq, 2871 struct napi_struct *napi) 2872 { 2873 napi_enable(napi); 2874 2875 /* If all buffers were filled by other side before we napi_enabled, we 2876 * won't get another interrupt, so process any outstanding packets now. 2877 * Call local_bh_enable after to trigger softIRQ processing. 2878 */ 2879 local_bh_disable(); 2880 virtqueue_napi_schedule(napi, vq); 2881 local_bh_enable(); 2882 } 2883 2884 static void virtnet_napi_enable(struct receive_queue *rq) 2885 { 2886 struct virtnet_info *vi = rq->vq->vdev->priv; 2887 int qidx = vq2rxq(rq->vq); 2888 2889 virtnet_napi_do_enable(rq->vq, &rq->napi); 2890 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, &rq->napi); 2891 } 2892 2893 static void virtnet_napi_tx_enable(struct send_queue *sq) 2894 { 2895 struct virtnet_info *vi = sq->vq->vdev->priv; 2896 struct napi_struct *napi = &sq->napi; 2897 int qidx = vq2txq(sq->vq); 2898 2899 if (!napi->weight) 2900 return; 2901 2902 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only 2903 * enable the feature if this is likely affine with the transmit path. 2904 */ 2905 if (!vi->affinity_hint_set) { 2906 napi->weight = 0; 2907 return; 2908 } 2909 2910 virtnet_napi_do_enable(sq->vq, napi); 2911 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, napi); 2912 } 2913 2914 static void virtnet_napi_tx_disable(struct send_queue *sq) 2915 { 2916 struct virtnet_info *vi = sq->vq->vdev->priv; 2917 struct napi_struct *napi = &sq->napi; 2918 int qidx = vq2txq(sq->vq); 2919 2920 if (napi->weight) { 2921 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, NULL); 2922 napi_disable(napi); 2923 } 2924 } 2925 2926 static void virtnet_napi_disable(struct receive_queue *rq) 2927 { 2928 struct virtnet_info *vi = rq->vq->vdev->priv; 2929 struct napi_struct *napi = &rq->napi; 2930 int qidx = vq2rxq(rq->vq); 2931 2932 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, NULL); 2933 napi_disable(napi); 2934 } 2935 2936 static void refill_work(struct work_struct *work) 2937 { 2938 struct virtnet_info *vi = 2939 container_of(work, struct virtnet_info, refill.work); 2940 bool still_empty; 2941 int i; 2942 2943 for (i = 0; i < vi->curr_queue_pairs; i++) { 2944 struct receive_queue *rq = &vi->rq[i]; 2945 2946 /* 2947 * When queue API support is added in the future and the call 2948 * below becomes napi_disable_locked, this driver will need to 2949 * be refactored. 2950 * 2951 * One possible solution would be to: 2952 * - cancel refill_work with cancel_delayed_work (note: 2953 * non-sync) 2954 * - cancel refill_work with cancel_delayed_work_sync in 2955 * virtnet_remove after the netdev is unregistered 2956 * - wrap all of the work in a lock (perhaps the netdev 2957 * instance lock) 2958 * - check netif_running() and return early to avoid a race 2959 */ 2960 napi_disable(&rq->napi); 2961 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); 2962 virtnet_napi_do_enable(rq->vq, &rq->napi); 2963 2964 /* In theory, this can happen: if we don't get any buffers in 2965 * we will *never* try to fill again. 2966 */ 2967 if (still_empty) 2968 schedule_delayed_work(&vi->refill, HZ/2); 2969 } 2970 } 2971 2972 static int virtnet_receive_xsk_bufs(struct virtnet_info *vi, 2973 struct receive_queue *rq, 2974 int budget, 2975 unsigned int *xdp_xmit, 2976 struct virtnet_rq_stats *stats) 2977 { 2978 unsigned int len; 2979 int packets = 0; 2980 void *buf; 2981 2982 while (packets < budget) { 2983 buf = virtqueue_get_buf(rq->vq, &len); 2984 if (!buf) 2985 break; 2986 2987 virtnet_receive_xsk_buf(vi, rq, buf, len, xdp_xmit, stats); 2988 packets++; 2989 } 2990 2991 return packets; 2992 } 2993 2994 static int virtnet_receive_packets(struct virtnet_info *vi, 2995 struct receive_queue *rq, 2996 int budget, 2997 unsigned int *xdp_xmit, 2998 struct virtnet_rq_stats *stats) 2999 { 3000 unsigned int len; 3001 int packets = 0; 3002 void *buf; 3003 3004 if (!vi->big_packets || vi->mergeable_rx_bufs) { 3005 void *ctx; 3006 while (packets < budget && 3007 (buf = virtnet_rq_get_buf(rq, &len, &ctx))) { 3008 receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats); 3009 packets++; 3010 } 3011 } else { 3012 while (packets < budget && 3013 (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { 3014 receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats); 3015 packets++; 3016 } 3017 } 3018 3019 return packets; 3020 } 3021 3022 static int virtnet_receive(struct receive_queue *rq, int budget, 3023 unsigned int *xdp_xmit) 3024 { 3025 struct virtnet_info *vi = rq->vq->vdev->priv; 3026 struct virtnet_rq_stats stats = {}; 3027 int i, packets; 3028 3029 if (rq->xsk_pool) 3030 packets = virtnet_receive_xsk_bufs(vi, rq, budget, xdp_xmit, &stats); 3031 else 3032 packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats); 3033 3034 if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { 3035 if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { 3036 spin_lock(&vi->refill_lock); 3037 if (vi->refill_enabled) 3038 schedule_delayed_work(&vi->refill, 0); 3039 spin_unlock(&vi->refill_lock); 3040 } 3041 } 3042 3043 u64_stats_set(&stats.packets, packets); 3044 u64_stats_update_begin(&rq->stats.syncp); 3045 for (i = 0; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) { 3046 size_t offset = virtnet_rq_stats_desc[i].offset; 3047 u64_stats_t *item, *src; 3048 3049 item = (u64_stats_t *)((u8 *)&rq->stats + offset); 3050 src = (u64_stats_t *)((u8 *)&stats + offset); 3051 u64_stats_add(item, u64_stats_read(src)); 3052 } 3053 3054 u64_stats_add(&rq->stats.packets, u64_stats_read(&stats.packets)); 3055 u64_stats_add(&rq->stats.bytes, u64_stats_read(&stats.bytes)); 3056 3057 u64_stats_update_end(&rq->stats.syncp); 3058 3059 return packets; 3060 } 3061 3062 static void virtnet_poll_cleantx(struct receive_queue *rq, int budget) 3063 { 3064 struct virtnet_info *vi = rq->vq->vdev->priv; 3065 unsigned int index = vq2rxq(rq->vq); 3066 struct send_queue *sq = &vi->sq[index]; 3067 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); 3068 3069 if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) 3070 return; 3071 3072 if (__netif_tx_trylock(txq)) { 3073 if (sq->reset) { 3074 __netif_tx_unlock(txq); 3075 return; 3076 } 3077 3078 do { 3079 virtqueue_disable_cb(sq->vq); 3080 free_old_xmit(sq, txq, !!budget); 3081 } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 3082 3083 if (sq->vq->num_free >= MAX_SKB_FRAGS + 2 && 3084 netif_tx_queue_stopped(txq)) { 3085 u64_stats_update_begin(&sq->stats.syncp); 3086 u64_stats_inc(&sq->stats.wake); 3087 u64_stats_update_end(&sq->stats.syncp); 3088 netif_tx_wake_queue(txq); 3089 } 3090 3091 __netif_tx_unlock(txq); 3092 } 3093 } 3094 3095 static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue *rq) 3096 { 3097 struct dim_sample cur_sample = {}; 3098 3099 if (!rq->packets_in_napi) 3100 return; 3101 3102 /* Don't need protection when fetching stats, since fetcher and 3103 * updater of the stats are in same context 3104 */ 3105 dim_update_sample(rq->calls, 3106 u64_stats_read(&rq->stats.packets), 3107 u64_stats_read(&rq->stats.bytes), 3108 &cur_sample); 3109 3110 net_dim(&rq->dim, &cur_sample); 3111 rq->packets_in_napi = 0; 3112 } 3113 3114 static int virtnet_poll(struct napi_struct *napi, int budget) 3115 { 3116 struct receive_queue *rq = 3117 container_of(napi, struct receive_queue, napi); 3118 struct virtnet_info *vi = rq->vq->vdev->priv; 3119 struct send_queue *sq; 3120 unsigned int received; 3121 unsigned int xdp_xmit = 0; 3122 bool napi_complete; 3123 3124 virtnet_poll_cleantx(rq, budget); 3125 3126 received = virtnet_receive(rq, budget, &xdp_xmit); 3127 rq->packets_in_napi += received; 3128 3129 if (xdp_xmit & VIRTIO_XDP_REDIR) 3130 xdp_do_flush(); 3131 3132 /* Out of packets? */ 3133 if (received < budget) { 3134 napi_complete = virtqueue_napi_complete(napi, rq->vq, received); 3135 /* Intentionally not taking dim_lock here. This may result in a 3136 * spurious net_dim call. But if that happens virtnet_rx_dim_work 3137 * will not act on the scheduled work. 3138 */ 3139 if (napi_complete && rq->dim_enabled) 3140 virtnet_rx_dim_update(vi, rq); 3141 } 3142 3143 if (xdp_xmit & VIRTIO_XDP_TX) { 3144 sq = virtnet_xdp_get_sq(vi); 3145 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 3146 u64_stats_update_begin(&sq->stats.syncp); 3147 u64_stats_inc(&sq->stats.kicks); 3148 u64_stats_update_end(&sq->stats.syncp); 3149 } 3150 virtnet_xdp_put_sq(vi, sq); 3151 } 3152 3153 return received; 3154 } 3155 3156 static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index) 3157 { 3158 virtnet_napi_tx_disable(&vi->sq[qp_index]); 3159 virtnet_napi_disable(&vi->rq[qp_index]); 3160 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 3161 } 3162 3163 static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) 3164 { 3165 struct net_device *dev = vi->dev; 3166 int err; 3167 3168 err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index, 3169 vi->rq[qp_index].napi.napi_id); 3170 if (err < 0) 3171 return err; 3172 3173 err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq, 3174 MEM_TYPE_PAGE_SHARED, NULL); 3175 if (err < 0) 3176 goto err_xdp_reg_mem_model; 3177 3178 virtnet_napi_enable(&vi->rq[qp_index]); 3179 virtnet_napi_tx_enable(&vi->sq[qp_index]); 3180 3181 return 0; 3182 3183 err_xdp_reg_mem_model: 3184 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 3185 return err; 3186 } 3187 3188 static void virtnet_cancel_dim(struct virtnet_info *vi, struct dim *dim) 3189 { 3190 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 3191 return; 3192 net_dim_work_cancel(dim); 3193 } 3194 3195 static void virtnet_update_settings(struct virtnet_info *vi) 3196 { 3197 u32 speed; 3198 u8 duplex; 3199 3200 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) 3201 return; 3202 3203 virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); 3204 3205 if (ethtool_validate_speed(speed)) 3206 vi->speed = speed; 3207 3208 virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); 3209 3210 if (ethtool_validate_duplex(duplex)) 3211 vi->duplex = duplex; 3212 } 3213 3214 static int virtnet_open(struct net_device *dev) 3215 { 3216 struct virtnet_info *vi = netdev_priv(dev); 3217 int i, err; 3218 3219 enable_delayed_refill(vi); 3220 3221 for (i = 0; i < vi->max_queue_pairs; i++) { 3222 if (i < vi->curr_queue_pairs) 3223 /* Make sure we have some buffers: if oom use wq. */ 3224 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) 3225 schedule_delayed_work(&vi->refill, 0); 3226 3227 err = virtnet_enable_queue_pair(vi, i); 3228 if (err < 0) 3229 goto err_enable_qp; 3230 } 3231 3232 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 3233 if (vi->status & VIRTIO_NET_S_LINK_UP) 3234 netif_carrier_on(vi->dev); 3235 virtio_config_driver_enable(vi->vdev); 3236 } else { 3237 vi->status = VIRTIO_NET_S_LINK_UP; 3238 netif_carrier_on(dev); 3239 } 3240 3241 return 0; 3242 3243 err_enable_qp: 3244 disable_delayed_refill(vi); 3245 cancel_delayed_work_sync(&vi->refill); 3246 3247 for (i--; i >= 0; i--) { 3248 virtnet_disable_queue_pair(vi, i); 3249 virtnet_cancel_dim(vi, &vi->rq[i].dim); 3250 } 3251 3252 return err; 3253 } 3254 3255 static int virtnet_poll_tx(struct napi_struct *napi, int budget) 3256 { 3257 struct send_queue *sq = container_of(napi, struct send_queue, napi); 3258 struct virtnet_info *vi = sq->vq->vdev->priv; 3259 unsigned int index = vq2txq(sq->vq); 3260 struct netdev_queue *txq; 3261 int opaque, xsk_done = 0; 3262 bool done; 3263 3264 if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { 3265 /* We don't need to enable cb for XDP */ 3266 napi_complete_done(napi, 0); 3267 return 0; 3268 } 3269 3270 txq = netdev_get_tx_queue(vi->dev, index); 3271 __netif_tx_lock(txq, raw_smp_processor_id()); 3272 virtqueue_disable_cb(sq->vq); 3273 3274 if (sq->xsk_pool) 3275 xsk_done = virtnet_xsk_xmit(sq, sq->xsk_pool, budget); 3276 else 3277 free_old_xmit(sq, txq, !!budget); 3278 3279 if (sq->vq->num_free >= MAX_SKB_FRAGS + 2 && 3280 netif_tx_queue_stopped(txq)) { 3281 u64_stats_update_begin(&sq->stats.syncp); 3282 u64_stats_inc(&sq->stats.wake); 3283 u64_stats_update_end(&sq->stats.syncp); 3284 netif_tx_wake_queue(txq); 3285 } 3286 3287 if (xsk_done >= budget) { 3288 __netif_tx_unlock(txq); 3289 return budget; 3290 } 3291 3292 opaque = virtqueue_enable_cb_prepare(sq->vq); 3293 3294 done = napi_complete_done(napi, 0); 3295 3296 if (!done) 3297 virtqueue_disable_cb(sq->vq); 3298 3299 __netif_tx_unlock(txq); 3300 3301 if (done) { 3302 if (unlikely(virtqueue_poll(sq->vq, opaque))) { 3303 if (napi_schedule_prep(napi)) { 3304 __netif_tx_lock(txq, raw_smp_processor_id()); 3305 virtqueue_disable_cb(sq->vq); 3306 __netif_tx_unlock(txq); 3307 __napi_schedule(napi); 3308 } 3309 } 3310 } 3311 3312 return 0; 3313 } 3314 3315 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) 3316 { 3317 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; 3318 struct virtnet_info *vi = sq->vq->vdev->priv; 3319 struct virtio_net_hdr_v1_hash_tunnel *hdr; 3320 int num_sg; 3321 unsigned hdr_len = vi->hdr_len; 3322 bool can_push; 3323 3324 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); 3325 3326 /* Make sure it's safe to cast between formats */ 3327 BUILD_BUG_ON(__alignof__(*hdr) != __alignof__(hdr->hash_hdr)); 3328 BUILD_BUG_ON(__alignof__(*hdr) != __alignof__(hdr->hash_hdr.hdr)); 3329 3330 can_push = vi->any_header_sg && 3331 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && 3332 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; 3333 /* Even if we can, don't push here yet as this would skew 3334 * csum_start offset below. */ 3335 if (can_push) 3336 hdr = (struct virtio_net_hdr_v1_hash_tunnel *)(skb->data - 3337 hdr_len); 3338 else 3339 hdr = &skb_vnet_common_hdr(skb)->tnl_hdr; 3340 3341 if (virtio_net_hdr_tnl_from_skb(skb, hdr, vi->tx_tnl, 3342 virtio_is_little_endian(vi->vdev), 0)) 3343 return -EPROTO; 3344 3345 if (vi->mergeable_rx_bufs) 3346 hdr->hash_hdr.hdr.num_buffers = 0; 3347 3348 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); 3349 if (can_push) { 3350 __skb_push(skb, hdr_len); 3351 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len); 3352 if (unlikely(num_sg < 0)) 3353 return num_sg; 3354 /* Pull header back to avoid skew in tx bytes calculations. */ 3355 __skb_pull(skb, hdr_len); 3356 } else { 3357 sg_set_buf(sq->sg, hdr, hdr_len); 3358 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); 3359 if (unlikely(num_sg < 0)) 3360 return num_sg; 3361 num_sg++; 3362 } 3363 3364 return virtnet_add_outbuf(sq, num_sg, skb, 3365 orphan ? VIRTNET_XMIT_TYPE_SKB_ORPHAN : VIRTNET_XMIT_TYPE_SKB); 3366 } 3367 3368 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) 3369 { 3370 struct virtnet_info *vi = netdev_priv(dev); 3371 int qnum = skb_get_queue_mapping(skb); 3372 struct send_queue *sq = &vi->sq[qnum]; 3373 int err; 3374 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 3375 bool xmit_more = netdev_xmit_more(); 3376 bool use_napi = sq->napi.weight; 3377 bool kick; 3378 3379 if (!use_napi) 3380 free_old_xmit(sq, txq, false); 3381 else 3382 virtqueue_disable_cb(sq->vq); 3383 3384 /* timestamp packet in software */ 3385 skb_tx_timestamp(skb); 3386 3387 /* Try to transmit */ 3388 err = xmit_skb(sq, skb, !use_napi); 3389 3390 /* This should not happen! */ 3391 if (unlikely(err)) { 3392 DEV_STATS_INC(dev, tx_fifo_errors); 3393 if (net_ratelimit()) 3394 dev_warn(&dev->dev, 3395 "Unexpected TXQ (%d) queue failure: %d\n", 3396 qnum, err); 3397 DEV_STATS_INC(dev, tx_dropped); 3398 dev_kfree_skb_any(skb); 3399 return NETDEV_TX_OK; 3400 } 3401 3402 /* Don't wait up for transmitted skbs to be freed. */ 3403 if (!use_napi) { 3404 skb_orphan(skb); 3405 nf_reset_ct(skb); 3406 } 3407 3408 if (use_napi) 3409 tx_may_stop(vi, dev, sq); 3410 else 3411 check_sq_full_and_disable(vi, dev,sq); 3412 3413 kick = use_napi ? __netdev_tx_sent_queue(txq, skb->len, xmit_more) : 3414 !xmit_more || netif_xmit_stopped(txq); 3415 if (kick) { 3416 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 3417 u64_stats_update_begin(&sq->stats.syncp); 3418 u64_stats_inc(&sq->stats.kicks); 3419 u64_stats_update_end(&sq->stats.syncp); 3420 } 3421 } 3422 3423 if (use_napi && kick && unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 3424 virtqueue_napi_schedule(&sq->napi, sq->vq); 3425 3426 return NETDEV_TX_OK; 3427 } 3428 3429 static void __virtnet_rx_pause(struct virtnet_info *vi, 3430 struct receive_queue *rq) 3431 { 3432 bool running = netif_running(vi->dev); 3433 3434 if (running) { 3435 virtnet_napi_disable(rq); 3436 virtnet_cancel_dim(vi, &rq->dim); 3437 } 3438 } 3439 3440 static void virtnet_rx_pause_all(struct virtnet_info *vi) 3441 { 3442 int i; 3443 3444 /* 3445 * Make sure refill_work does not run concurrently to 3446 * avoid napi_disable race which leads to deadlock. 3447 */ 3448 disable_delayed_refill(vi); 3449 cancel_delayed_work_sync(&vi->refill); 3450 for (i = 0; i < vi->max_queue_pairs; i++) 3451 __virtnet_rx_pause(vi, &vi->rq[i]); 3452 } 3453 3454 static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) 3455 { 3456 /* 3457 * Make sure refill_work does not run concurrently to 3458 * avoid napi_disable race which leads to deadlock. 3459 */ 3460 disable_delayed_refill(vi); 3461 cancel_delayed_work_sync(&vi->refill); 3462 __virtnet_rx_pause(vi, rq); 3463 } 3464 3465 static void __virtnet_rx_resume(struct virtnet_info *vi, 3466 struct receive_queue *rq, 3467 bool refill) 3468 { 3469 bool running = netif_running(vi->dev); 3470 bool schedule_refill = false; 3471 3472 if (refill && !try_fill_recv(vi, rq, GFP_KERNEL)) 3473 schedule_refill = true; 3474 if (running) 3475 virtnet_napi_enable(rq); 3476 3477 if (schedule_refill) 3478 schedule_delayed_work(&vi->refill, 0); 3479 } 3480 3481 static void virtnet_rx_resume_all(struct virtnet_info *vi) 3482 { 3483 int i; 3484 3485 enable_delayed_refill(vi); 3486 for (i = 0; i < vi->max_queue_pairs; i++) { 3487 if (i < vi->curr_queue_pairs) 3488 __virtnet_rx_resume(vi, &vi->rq[i], true); 3489 else 3490 __virtnet_rx_resume(vi, &vi->rq[i], false); 3491 } 3492 } 3493 3494 static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) 3495 { 3496 enable_delayed_refill(vi); 3497 __virtnet_rx_resume(vi, rq, true); 3498 } 3499 3500 static int virtnet_rx_resize(struct virtnet_info *vi, 3501 struct receive_queue *rq, u32 ring_num) 3502 { 3503 int err, qindex; 3504 3505 qindex = rq - vi->rq; 3506 3507 virtnet_rx_pause(vi, rq); 3508 3509 err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_unmap_free_buf, NULL); 3510 if (err) 3511 netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err); 3512 3513 virtnet_rx_resume(vi, rq); 3514 return err; 3515 } 3516 3517 static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq) 3518 { 3519 bool running = netif_running(vi->dev); 3520 struct netdev_queue *txq; 3521 int qindex; 3522 3523 qindex = sq - vi->sq; 3524 3525 if (running) 3526 virtnet_napi_tx_disable(sq); 3527 3528 txq = netdev_get_tx_queue(vi->dev, qindex); 3529 3530 /* 1. wait all ximt complete 3531 * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue() 3532 */ 3533 __netif_tx_lock_bh(txq); 3534 3535 /* Prevent rx poll from accessing sq. */ 3536 sq->reset = true; 3537 3538 /* Prevent the upper layer from trying to send packets. */ 3539 netif_stop_subqueue(vi->dev, qindex); 3540 3541 __netif_tx_unlock_bh(txq); 3542 } 3543 3544 static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq) 3545 { 3546 bool running = netif_running(vi->dev); 3547 struct netdev_queue *txq; 3548 int qindex; 3549 3550 qindex = sq - vi->sq; 3551 3552 txq = netdev_get_tx_queue(vi->dev, qindex); 3553 3554 __netif_tx_lock_bh(txq); 3555 sq->reset = false; 3556 netif_tx_wake_queue(txq); 3557 __netif_tx_unlock_bh(txq); 3558 3559 if (running) 3560 virtnet_napi_tx_enable(sq); 3561 } 3562 3563 static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq, 3564 u32 ring_num) 3565 { 3566 int qindex, err; 3567 3568 if (ring_num <= MAX_SKB_FRAGS + 2) { 3569 netdev_err(vi->dev, "tx size (%d) cannot be smaller than %d\n", 3570 ring_num, MAX_SKB_FRAGS + 2); 3571 return -EINVAL; 3572 } 3573 3574 qindex = sq - vi->sq; 3575 3576 virtnet_tx_pause(vi, sq); 3577 3578 err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf, 3579 virtnet_sq_free_unused_buf_done); 3580 if (err) 3581 netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err); 3582 3583 virtnet_tx_resume(vi, sq); 3584 3585 return err; 3586 } 3587 3588 /* 3589 * Send command via the control virtqueue and check status. Commands 3590 * supported by the hypervisor, as indicated by feature bits, should 3591 * never fail unless improperly formatted. 3592 */ 3593 static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd, 3594 struct scatterlist *out, 3595 struct scatterlist *in) 3596 { 3597 struct scatterlist *sgs[5], hdr, stat; 3598 u32 out_num = 0, tmp, in_num = 0; 3599 bool ok; 3600 int ret; 3601 3602 /* Caller should know better */ 3603 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); 3604 3605 mutex_lock(&vi->cvq_lock); 3606 vi->ctrl->status = ~0; 3607 vi->ctrl->hdr.class = class; 3608 vi->ctrl->hdr.cmd = cmd; 3609 /* Add header */ 3610 sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr)); 3611 sgs[out_num++] = &hdr; 3612 3613 if (out) 3614 sgs[out_num++] = out; 3615 3616 /* Add return status. */ 3617 sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status)); 3618 sgs[out_num + in_num++] = &stat; 3619 3620 if (in) 3621 sgs[out_num + in_num++] = in; 3622 3623 BUG_ON(out_num + in_num > ARRAY_SIZE(sgs)); 3624 ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC); 3625 if (ret < 0) { 3626 dev_warn(&vi->vdev->dev, 3627 "Failed to add sgs for command vq: %d\n.", ret); 3628 mutex_unlock(&vi->cvq_lock); 3629 return false; 3630 } 3631 3632 if (unlikely(!virtqueue_kick(vi->cvq))) 3633 goto unlock; 3634 3635 /* Spin for a response, the kick causes an ioport write, trapping 3636 * into the hypervisor, so the request should be handled immediately. 3637 */ 3638 while (!virtqueue_get_buf(vi->cvq, &tmp) && 3639 !virtqueue_is_broken(vi->cvq)) { 3640 cond_resched(); 3641 cpu_relax(); 3642 } 3643 3644 unlock: 3645 ok = vi->ctrl->status == VIRTIO_NET_OK; 3646 mutex_unlock(&vi->cvq_lock); 3647 return ok; 3648 } 3649 3650 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, 3651 struct scatterlist *out) 3652 { 3653 return virtnet_send_command_reply(vi, class, cmd, out, NULL); 3654 } 3655 3656 static int virtnet_set_mac_address(struct net_device *dev, void *p) 3657 { 3658 struct virtnet_info *vi = netdev_priv(dev); 3659 struct virtio_device *vdev = vi->vdev; 3660 int ret; 3661 struct sockaddr *addr; 3662 struct scatterlist sg; 3663 3664 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 3665 return -EOPNOTSUPP; 3666 3667 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL); 3668 if (!addr) 3669 return -ENOMEM; 3670 3671 ret = eth_prepare_mac_addr_change(dev, addr); 3672 if (ret) 3673 goto out; 3674 3675 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 3676 sg_init_one(&sg, addr->sa_data, dev->addr_len); 3677 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3678 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 3679 dev_warn(&vdev->dev, 3680 "Failed to set mac address by vq command.\n"); 3681 ret = -EINVAL; 3682 goto out; 3683 } 3684 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 3685 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { 3686 unsigned int i; 3687 3688 /* Naturally, this has an atomicity problem. */ 3689 for (i = 0; i < dev->addr_len; i++) 3690 virtio_cwrite8(vdev, 3691 offsetof(struct virtio_net_config, mac) + 3692 i, addr->sa_data[i]); 3693 } 3694 3695 eth_commit_mac_addr_change(dev, p); 3696 ret = 0; 3697 3698 out: 3699 kfree(addr); 3700 return ret; 3701 } 3702 3703 static void virtnet_stats(struct net_device *dev, 3704 struct rtnl_link_stats64 *tot) 3705 { 3706 struct virtnet_info *vi = netdev_priv(dev); 3707 unsigned int start; 3708 int i; 3709 3710 for (i = 0; i < vi->max_queue_pairs; i++) { 3711 u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; 3712 struct receive_queue *rq = &vi->rq[i]; 3713 struct send_queue *sq = &vi->sq[i]; 3714 3715 do { 3716 start = u64_stats_fetch_begin(&sq->stats.syncp); 3717 tpackets = u64_stats_read(&sq->stats.packets); 3718 tbytes = u64_stats_read(&sq->stats.bytes); 3719 terrors = u64_stats_read(&sq->stats.tx_timeouts); 3720 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 3721 3722 do { 3723 start = u64_stats_fetch_begin(&rq->stats.syncp); 3724 rpackets = u64_stats_read(&rq->stats.packets); 3725 rbytes = u64_stats_read(&rq->stats.bytes); 3726 rdrops = u64_stats_read(&rq->stats.drops); 3727 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 3728 3729 tot->rx_packets += rpackets; 3730 tot->tx_packets += tpackets; 3731 tot->rx_bytes += rbytes; 3732 tot->tx_bytes += tbytes; 3733 tot->rx_dropped += rdrops; 3734 tot->tx_errors += terrors; 3735 } 3736 3737 tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped); 3738 tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors); 3739 tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors); 3740 tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors); 3741 } 3742 3743 static void virtnet_ack_link_announce(struct virtnet_info *vi) 3744 { 3745 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, 3746 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) 3747 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); 3748 } 3749 3750 static bool virtnet_commit_rss_command(struct virtnet_info *vi); 3751 3752 static void virtnet_rss_update_by_qpairs(struct virtnet_info *vi, u16 queue_pairs) 3753 { 3754 u32 indir_val = 0; 3755 int i = 0; 3756 3757 for (; i < vi->rss_indir_table_size; ++i) { 3758 indir_val = ethtool_rxfh_indir_default(i, queue_pairs); 3759 vi->rss_hdr->indirection_table[i] = cpu_to_le16(indir_val); 3760 } 3761 vi->rss_trailer.max_tx_vq = cpu_to_le16(queue_pairs); 3762 } 3763 3764 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 3765 { 3766 struct virtio_net_ctrl_mq *mq __free(kfree) = NULL; 3767 struct virtio_net_rss_config_hdr *old_rss_hdr; 3768 struct virtio_net_rss_config_trailer old_rss_trailer; 3769 struct net_device *dev = vi->dev; 3770 struct scatterlist sg; 3771 3772 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) 3773 return 0; 3774 3775 /* Firstly check if we need update rss. Do updating if both (1) rss enabled and 3776 * (2) no user configuration. 3777 * 3778 * During rss command processing, device updates queue_pairs using rss.max_tx_vq. That is, 3779 * the device updates queue_pairs together with rss, so we can skip the sperate queue_pairs 3780 * update (VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET below) and return directly. 3781 */ 3782 if (vi->has_rss && !netif_is_rxfh_configured(dev)) { 3783 old_rss_hdr = vi->rss_hdr; 3784 old_rss_trailer = vi->rss_trailer; 3785 vi->rss_hdr = devm_kzalloc(&dev->dev, virtnet_rss_hdr_size(vi), GFP_KERNEL); 3786 if (!vi->rss_hdr) { 3787 vi->rss_hdr = old_rss_hdr; 3788 return -ENOMEM; 3789 } 3790 3791 *vi->rss_hdr = *old_rss_hdr; 3792 virtnet_rss_update_by_qpairs(vi, queue_pairs); 3793 3794 if (!virtnet_commit_rss_command(vi)) { 3795 /* restore ctrl_rss if commit_rss_command failed */ 3796 devm_kfree(&dev->dev, vi->rss_hdr); 3797 vi->rss_hdr = old_rss_hdr; 3798 vi->rss_trailer = old_rss_trailer; 3799 3800 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d, because committing RSS failed\n", 3801 queue_pairs); 3802 return -EINVAL; 3803 } 3804 devm_kfree(&dev->dev, old_rss_hdr); 3805 goto succ; 3806 } 3807 3808 mq = kzalloc(sizeof(*mq), GFP_KERNEL); 3809 if (!mq) 3810 return -ENOMEM; 3811 3812 mq->virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); 3813 sg_init_one(&sg, mq, sizeof(*mq)); 3814 3815 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 3816 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { 3817 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", 3818 queue_pairs); 3819 return -EINVAL; 3820 } 3821 succ: 3822 vi->curr_queue_pairs = queue_pairs; 3823 /* virtnet_open() will refill when device is going to up. */ 3824 spin_lock_bh(&vi->refill_lock); 3825 if (dev->flags & IFF_UP && vi->refill_enabled) 3826 schedule_delayed_work(&vi->refill, 0); 3827 spin_unlock_bh(&vi->refill_lock); 3828 3829 return 0; 3830 } 3831 3832 static int virtnet_close(struct net_device *dev) 3833 { 3834 struct virtnet_info *vi = netdev_priv(dev); 3835 int i; 3836 3837 /* Make sure NAPI doesn't schedule refill work */ 3838 disable_delayed_refill(vi); 3839 /* Make sure refill_work doesn't re-enable napi! */ 3840 cancel_delayed_work_sync(&vi->refill); 3841 /* Prevent the config change callback from changing carrier 3842 * after close 3843 */ 3844 virtio_config_driver_disable(vi->vdev); 3845 /* Stop getting status/speed updates: we don't care until next 3846 * open 3847 */ 3848 cancel_work_sync(&vi->config_work); 3849 3850 for (i = 0; i < vi->max_queue_pairs; i++) { 3851 virtnet_disable_queue_pair(vi, i); 3852 virtnet_cancel_dim(vi, &vi->rq[i].dim); 3853 } 3854 3855 netif_carrier_off(dev); 3856 3857 return 0; 3858 } 3859 3860 static void virtnet_rx_mode_work(struct work_struct *work) 3861 { 3862 struct virtnet_info *vi = 3863 container_of(work, struct virtnet_info, rx_mode_work); 3864 u8 *promisc_allmulti __free(kfree) = NULL; 3865 struct net_device *dev = vi->dev; 3866 struct scatterlist sg[2]; 3867 struct virtio_net_ctrl_mac *mac_data; 3868 struct netdev_hw_addr *ha; 3869 int uc_count; 3870 int mc_count; 3871 void *buf; 3872 int i; 3873 3874 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ 3875 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) 3876 return; 3877 3878 promisc_allmulti = kzalloc(sizeof(*promisc_allmulti), GFP_KERNEL); 3879 if (!promisc_allmulti) { 3880 dev_warn(&dev->dev, "Failed to set RX mode, no memory.\n"); 3881 return; 3882 } 3883 3884 rtnl_lock(); 3885 3886 *promisc_allmulti = !!(dev->flags & IFF_PROMISC); 3887 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3888 3889 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3890 VIRTIO_NET_CTRL_RX_PROMISC, sg)) 3891 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", 3892 *promisc_allmulti ? "en" : "dis"); 3893 3894 *promisc_allmulti = !!(dev->flags & IFF_ALLMULTI); 3895 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3896 3897 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3898 VIRTIO_NET_CTRL_RX_ALLMULTI, sg)) 3899 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", 3900 *promisc_allmulti ? "en" : "dis"); 3901 3902 netif_addr_lock_bh(dev); 3903 3904 uc_count = netdev_uc_count(dev); 3905 mc_count = netdev_mc_count(dev); 3906 /* MAC filter - use one buffer for both lists */ 3907 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) + 3908 (2 * sizeof(mac_data->entries)), GFP_ATOMIC); 3909 mac_data = buf; 3910 if (!buf) { 3911 netif_addr_unlock_bh(dev); 3912 rtnl_unlock(); 3913 return; 3914 } 3915 3916 sg_init_table(sg, 2); 3917 3918 /* Store the unicast list and count in the front of the buffer */ 3919 mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count); 3920 i = 0; 3921 netdev_for_each_uc_addr(ha, dev) 3922 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3923 3924 sg_set_buf(&sg[0], mac_data, 3925 sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); 3926 3927 /* multicast list and count fill the end */ 3928 mac_data = (void *)&mac_data->macs[uc_count][0]; 3929 3930 mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count); 3931 i = 0; 3932 netdev_for_each_mc_addr(ha, dev) 3933 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3934 3935 netif_addr_unlock_bh(dev); 3936 3937 sg_set_buf(&sg[1], mac_data, 3938 sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); 3939 3940 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3941 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg)) 3942 dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); 3943 3944 rtnl_unlock(); 3945 3946 kfree(buf); 3947 } 3948 3949 static void virtnet_set_rx_mode(struct net_device *dev) 3950 { 3951 struct virtnet_info *vi = netdev_priv(dev); 3952 3953 if (vi->rx_mode_work_enabled) 3954 schedule_work(&vi->rx_mode_work); 3955 } 3956 3957 static int virtnet_vlan_rx_add_vid(struct net_device *dev, 3958 __be16 proto, u16 vid) 3959 { 3960 struct virtnet_info *vi = netdev_priv(dev); 3961 __virtio16 *_vid __free(kfree) = NULL; 3962 struct scatterlist sg; 3963 3964 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3965 if (!_vid) 3966 return -ENOMEM; 3967 3968 *_vid = cpu_to_virtio16(vi->vdev, vid); 3969 sg_init_one(&sg, _vid, sizeof(*_vid)); 3970 3971 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3972 VIRTIO_NET_CTRL_VLAN_ADD, &sg)) 3973 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); 3974 return 0; 3975 } 3976 3977 static int virtnet_vlan_rx_kill_vid(struct net_device *dev, 3978 __be16 proto, u16 vid) 3979 { 3980 struct virtnet_info *vi = netdev_priv(dev); 3981 __virtio16 *_vid __free(kfree) = NULL; 3982 struct scatterlist sg; 3983 3984 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3985 if (!_vid) 3986 return -ENOMEM; 3987 3988 *_vid = cpu_to_virtio16(vi->vdev, vid); 3989 sg_init_one(&sg, _vid, sizeof(*_vid)); 3990 3991 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3992 VIRTIO_NET_CTRL_VLAN_DEL, &sg)) 3993 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); 3994 return 0; 3995 } 3996 3997 static void virtnet_clean_affinity(struct virtnet_info *vi) 3998 { 3999 int i; 4000 4001 if (vi->affinity_hint_set) { 4002 for (i = 0; i < vi->max_queue_pairs; i++) { 4003 virtqueue_set_affinity(vi->rq[i].vq, NULL); 4004 virtqueue_set_affinity(vi->sq[i].vq, NULL); 4005 } 4006 4007 vi->affinity_hint_set = false; 4008 } 4009 } 4010 4011 static void virtnet_set_affinity(struct virtnet_info *vi) 4012 { 4013 cpumask_var_t mask; 4014 int stragglers; 4015 int group_size; 4016 int i, start = 0, cpu; 4017 int num_cpu; 4018 int stride; 4019 4020 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 4021 virtnet_clean_affinity(vi); 4022 return; 4023 } 4024 4025 num_cpu = num_online_cpus(); 4026 stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); 4027 stragglers = num_cpu >= vi->curr_queue_pairs ? 4028 num_cpu % vi->curr_queue_pairs : 4029 0; 4030 4031 for (i = 0; i < vi->curr_queue_pairs; i++) { 4032 group_size = stride + (i < stragglers ? 1 : 0); 4033 4034 for_each_online_cpu_wrap(cpu, start) { 4035 if (!group_size--) { 4036 start = cpu; 4037 break; 4038 } 4039 cpumask_set_cpu(cpu, mask); 4040 } 4041 4042 virtqueue_set_affinity(vi->rq[i].vq, mask); 4043 virtqueue_set_affinity(vi->sq[i].vq, mask); 4044 __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS); 4045 cpumask_clear(mask); 4046 } 4047 4048 vi->affinity_hint_set = true; 4049 free_cpumask_var(mask); 4050 } 4051 4052 static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) 4053 { 4054 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 4055 node); 4056 virtnet_set_affinity(vi); 4057 return 0; 4058 } 4059 4060 static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node) 4061 { 4062 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 4063 node_dead); 4064 virtnet_set_affinity(vi); 4065 return 0; 4066 } 4067 4068 static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node) 4069 { 4070 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 4071 node); 4072 4073 virtnet_clean_affinity(vi); 4074 return 0; 4075 } 4076 4077 static enum cpuhp_state virtionet_online; 4078 4079 static int virtnet_cpu_notif_add(struct virtnet_info *vi) 4080 { 4081 int ret; 4082 4083 ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node); 4084 if (ret) 4085 return ret; 4086 ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD, 4087 &vi->node_dead); 4088 if (!ret) 4089 return ret; 4090 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 4091 return ret; 4092 } 4093 4094 static void virtnet_cpu_notif_remove(struct virtnet_info *vi) 4095 { 4096 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 4097 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD, 4098 &vi->node_dead); 4099 } 4100 4101 static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi, 4102 u16 vqn, u32 max_usecs, u32 max_packets) 4103 { 4104 struct virtio_net_ctrl_coal_vq *coal_vq __free(kfree) = NULL; 4105 struct scatterlist sgs; 4106 4107 coal_vq = kzalloc(sizeof(*coal_vq), GFP_KERNEL); 4108 if (!coal_vq) 4109 return -ENOMEM; 4110 4111 coal_vq->vqn = cpu_to_le16(vqn); 4112 coal_vq->coal.max_usecs = cpu_to_le32(max_usecs); 4113 coal_vq->coal.max_packets = cpu_to_le32(max_packets); 4114 sg_init_one(&sgs, coal_vq, sizeof(*coal_vq)); 4115 4116 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 4117 VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET, 4118 &sgs)) 4119 return -EINVAL; 4120 4121 return 0; 4122 } 4123 4124 static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 4125 u16 queue, u32 max_usecs, 4126 u32 max_packets) 4127 { 4128 int err; 4129 4130 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 4131 return -EOPNOTSUPP; 4132 4133 err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), 4134 max_usecs, max_packets); 4135 if (err) 4136 return err; 4137 4138 vi->rq[queue].intr_coal.max_usecs = max_usecs; 4139 vi->rq[queue].intr_coal.max_packets = max_packets; 4140 4141 return 0; 4142 } 4143 4144 static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 4145 u16 queue, u32 max_usecs, 4146 u32 max_packets) 4147 { 4148 int err; 4149 4150 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 4151 return -EOPNOTSUPP; 4152 4153 err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), 4154 max_usecs, max_packets); 4155 if (err) 4156 return err; 4157 4158 vi->sq[queue].intr_coal.max_usecs = max_usecs; 4159 vi->sq[queue].intr_coal.max_packets = max_packets; 4160 4161 return 0; 4162 } 4163 4164 static void virtnet_get_ringparam(struct net_device *dev, 4165 struct ethtool_ringparam *ring, 4166 struct kernel_ethtool_ringparam *kernel_ring, 4167 struct netlink_ext_ack *extack) 4168 { 4169 struct virtnet_info *vi = netdev_priv(dev); 4170 4171 ring->rx_max_pending = vi->rq[0].vq->num_max; 4172 ring->tx_max_pending = vi->sq[0].vq->num_max; 4173 ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 4174 ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 4175 } 4176 4177 static int virtnet_set_ringparam(struct net_device *dev, 4178 struct ethtool_ringparam *ring, 4179 struct kernel_ethtool_ringparam *kernel_ring, 4180 struct netlink_ext_ack *extack) 4181 { 4182 struct virtnet_info *vi = netdev_priv(dev); 4183 u32 rx_pending, tx_pending; 4184 struct receive_queue *rq; 4185 struct send_queue *sq; 4186 int i, err; 4187 4188 if (ring->rx_mini_pending || ring->rx_jumbo_pending) 4189 return -EINVAL; 4190 4191 rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 4192 tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 4193 4194 if (ring->rx_pending == rx_pending && 4195 ring->tx_pending == tx_pending) 4196 return 0; 4197 4198 if (ring->rx_pending > vi->rq[0].vq->num_max) 4199 return -EINVAL; 4200 4201 if (ring->tx_pending > vi->sq[0].vq->num_max) 4202 return -EINVAL; 4203 4204 for (i = 0; i < vi->max_queue_pairs; i++) { 4205 rq = vi->rq + i; 4206 sq = vi->sq + i; 4207 4208 if (ring->tx_pending != tx_pending) { 4209 err = virtnet_tx_resize(vi, sq, ring->tx_pending); 4210 if (err) 4211 return err; 4212 4213 /* Upon disabling and re-enabling a transmit virtqueue, the device must 4214 * set the coalescing parameters of the virtqueue to those configured 4215 * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver 4216 * did not set any TX coalescing parameters, to 0. 4217 */ 4218 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i, 4219 vi->intr_coal_tx.max_usecs, 4220 vi->intr_coal_tx.max_packets); 4221 4222 /* Don't break the tx resize action if the vq coalescing is not 4223 * supported. The same is true for rx resize below. 4224 */ 4225 if (err && err != -EOPNOTSUPP) 4226 return err; 4227 } 4228 4229 if (ring->rx_pending != rx_pending) { 4230 err = virtnet_rx_resize(vi, rq, ring->rx_pending); 4231 if (err) 4232 return err; 4233 4234 /* The reason is same as the transmit virtqueue reset */ 4235 mutex_lock(&vi->rq[i].dim_lock); 4236 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, i, 4237 vi->intr_coal_rx.max_usecs, 4238 vi->intr_coal_rx.max_packets); 4239 mutex_unlock(&vi->rq[i].dim_lock); 4240 if (err && err != -EOPNOTSUPP) 4241 return err; 4242 } 4243 } 4244 4245 return 0; 4246 } 4247 4248 static bool virtnet_commit_rss_command(struct virtnet_info *vi) 4249 { 4250 struct net_device *dev = vi->dev; 4251 struct scatterlist sgs[2]; 4252 4253 /* prepare sgs */ 4254 sg_init_table(sgs, 2); 4255 sg_set_buf(&sgs[0], vi->rss_hdr, virtnet_rss_hdr_size(vi)); 4256 sg_set_buf(&sgs[1], &vi->rss_trailer, virtnet_rss_trailer_size(vi)); 4257 4258 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 4259 vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG 4260 : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) 4261 goto err; 4262 4263 return true; 4264 4265 err: 4266 dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n"); 4267 return false; 4268 4269 } 4270 4271 static void virtnet_init_default_rss(struct virtnet_info *vi) 4272 { 4273 vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_supported); 4274 vi->rss_hash_types_saved = vi->rss_hash_types_supported; 4275 vi->rss_hdr->indirection_table_mask = vi->rss_indir_table_size 4276 ? cpu_to_le16(vi->rss_indir_table_size - 1) : 0; 4277 vi->rss_hdr->unclassified_queue = 0; 4278 4279 virtnet_rss_update_by_qpairs(vi, vi->curr_queue_pairs); 4280 4281 vi->rss_trailer.hash_key_length = vi->rss_key_size; 4282 4283 netdev_rss_key_fill(vi->rss_hash_key_data, vi->rss_key_size); 4284 } 4285 4286 static int virtnet_get_hashflow(struct net_device *dev, 4287 struct ethtool_rxfh_fields *info) 4288 { 4289 struct virtnet_info *vi = netdev_priv(dev); 4290 4291 info->data = 0; 4292 switch (info->flow_type) { 4293 case TCP_V4_FLOW: 4294 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 4295 info->data = RXH_IP_SRC | RXH_IP_DST | 4296 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4297 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 4298 info->data = RXH_IP_SRC | RXH_IP_DST; 4299 } 4300 break; 4301 case TCP_V6_FLOW: 4302 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 4303 info->data = RXH_IP_SRC | RXH_IP_DST | 4304 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4305 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 4306 info->data = RXH_IP_SRC | RXH_IP_DST; 4307 } 4308 break; 4309 case UDP_V4_FLOW: 4310 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 4311 info->data = RXH_IP_SRC | RXH_IP_DST | 4312 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4313 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 4314 info->data = RXH_IP_SRC | RXH_IP_DST; 4315 } 4316 break; 4317 case UDP_V6_FLOW: 4318 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 4319 info->data = RXH_IP_SRC | RXH_IP_DST | 4320 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4321 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 4322 info->data = RXH_IP_SRC | RXH_IP_DST; 4323 } 4324 break; 4325 case IPV4_FLOW: 4326 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) 4327 info->data = RXH_IP_SRC | RXH_IP_DST; 4328 4329 break; 4330 case IPV6_FLOW: 4331 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) 4332 info->data = RXH_IP_SRC | RXH_IP_DST; 4333 4334 break; 4335 default: 4336 info->data = 0; 4337 break; 4338 } 4339 4340 return 0; 4341 } 4342 4343 static int virtnet_set_hashflow(struct net_device *dev, 4344 const struct ethtool_rxfh_fields *info, 4345 struct netlink_ext_ack *extack) 4346 { 4347 struct virtnet_info *vi = netdev_priv(dev); 4348 u32 new_hashtypes = vi->rss_hash_types_saved; 4349 bool is_disable = info->data & RXH_DISCARD; 4350 bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3); 4351 4352 /* supports only 'sd', 'sdfn' and 'r' */ 4353 if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable)) 4354 return -EINVAL; 4355 4356 switch (info->flow_type) { 4357 case TCP_V4_FLOW: 4358 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4); 4359 if (!is_disable) 4360 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 4361 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0); 4362 break; 4363 case UDP_V4_FLOW: 4364 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4); 4365 if (!is_disable) 4366 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 4367 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0); 4368 break; 4369 case IPV4_FLOW: 4370 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4; 4371 if (!is_disable) 4372 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4; 4373 break; 4374 case TCP_V6_FLOW: 4375 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6); 4376 if (!is_disable) 4377 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 4378 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0); 4379 break; 4380 case UDP_V6_FLOW: 4381 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6); 4382 if (!is_disable) 4383 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 4384 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0); 4385 break; 4386 case IPV6_FLOW: 4387 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6; 4388 if (!is_disable) 4389 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6; 4390 break; 4391 default: 4392 /* unsupported flow */ 4393 return -EINVAL; 4394 } 4395 4396 /* if unsupported hashtype was set */ 4397 if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported)) 4398 return -EINVAL; 4399 4400 if (new_hashtypes != vi->rss_hash_types_saved) { 4401 vi->rss_hash_types_saved = new_hashtypes; 4402 vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_saved); 4403 if (vi->dev->features & NETIF_F_RXHASH) 4404 if (!virtnet_commit_rss_command(vi)) 4405 return -EINVAL; 4406 } 4407 4408 return 0; 4409 } 4410 4411 static void virtnet_get_drvinfo(struct net_device *dev, 4412 struct ethtool_drvinfo *info) 4413 { 4414 struct virtnet_info *vi = netdev_priv(dev); 4415 struct virtio_device *vdev = vi->vdev; 4416 4417 strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); 4418 strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version)); 4419 strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info)); 4420 4421 } 4422 4423 /* TODO: Eliminate OOO packets during switching */ 4424 static int virtnet_set_channels(struct net_device *dev, 4425 struct ethtool_channels *channels) 4426 { 4427 struct virtnet_info *vi = netdev_priv(dev); 4428 u16 queue_pairs = channels->combined_count; 4429 int err; 4430 4431 /* We don't support separate rx/tx channels. 4432 * We don't allow setting 'other' channels. 4433 */ 4434 if (channels->rx_count || channels->tx_count || channels->other_count) 4435 return -EINVAL; 4436 4437 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) 4438 return -EINVAL; 4439 4440 /* For now we don't support modifying channels while XDP is loaded 4441 * also when XDP is loaded all RX queues have XDP programs so we only 4442 * need to check a single RX queue. 4443 */ 4444 if (vi->rq[0].xdp_prog) 4445 return -EINVAL; 4446 4447 cpus_read_lock(); 4448 err = virtnet_set_queues(vi, queue_pairs); 4449 if (err) { 4450 cpus_read_unlock(); 4451 goto err; 4452 } 4453 virtnet_set_affinity(vi); 4454 cpus_read_unlock(); 4455 4456 netif_set_real_num_tx_queues(dev, queue_pairs); 4457 netif_set_real_num_rx_queues(dev, queue_pairs); 4458 err: 4459 return err; 4460 } 4461 4462 static void virtnet_stats_sprintf(u8 **p, const char *fmt, const char *noq_fmt, 4463 int num, int qid, const struct virtnet_stat_desc *desc) 4464 { 4465 int i; 4466 4467 if (qid < 0) { 4468 for (i = 0; i < num; ++i) 4469 ethtool_sprintf(p, noq_fmt, desc[i].desc); 4470 } else { 4471 for (i = 0; i < num; ++i) 4472 ethtool_sprintf(p, fmt, qid, desc[i].desc); 4473 } 4474 } 4475 4476 /* qid == -1: for rx/tx queue total field */ 4477 static void virtnet_get_stats_string(struct virtnet_info *vi, int type, int qid, u8 **data) 4478 { 4479 const struct virtnet_stat_desc *desc; 4480 const char *fmt, *noq_fmt; 4481 u8 *p = *data; 4482 u32 num; 4483 4484 if (type == VIRTNET_Q_TYPE_CQ && qid >= 0) { 4485 noq_fmt = "cq_hw_%s"; 4486 4487 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4488 desc = &virtnet_stats_cvq_desc[0]; 4489 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4490 4491 virtnet_stats_sprintf(&p, NULL, noq_fmt, num, -1, desc); 4492 } 4493 } 4494 4495 if (type == VIRTNET_Q_TYPE_RX) { 4496 fmt = "rx%u_%s"; 4497 noq_fmt = "rx_%s"; 4498 4499 desc = &virtnet_rq_stats_desc[0]; 4500 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4501 4502 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4503 4504 fmt = "rx%u_hw_%s"; 4505 noq_fmt = "rx_hw_%s"; 4506 4507 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4508 desc = &virtnet_stats_rx_basic_desc[0]; 4509 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4510 4511 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4512 } 4513 4514 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4515 desc = &virtnet_stats_rx_csum_desc[0]; 4516 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4517 4518 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4519 } 4520 4521 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4522 desc = &virtnet_stats_rx_speed_desc[0]; 4523 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4524 4525 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4526 } 4527 } 4528 4529 if (type == VIRTNET_Q_TYPE_TX) { 4530 fmt = "tx%u_%s"; 4531 noq_fmt = "tx_%s"; 4532 4533 desc = &virtnet_sq_stats_desc[0]; 4534 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4535 4536 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4537 4538 fmt = "tx%u_hw_%s"; 4539 noq_fmt = "tx_hw_%s"; 4540 4541 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4542 desc = &virtnet_stats_tx_basic_desc[0]; 4543 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4544 4545 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4546 } 4547 4548 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4549 desc = &virtnet_stats_tx_gso_desc[0]; 4550 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4551 4552 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4553 } 4554 4555 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4556 desc = &virtnet_stats_tx_speed_desc[0]; 4557 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4558 4559 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4560 } 4561 } 4562 4563 *data = p; 4564 } 4565 4566 struct virtnet_stats_ctx { 4567 /* The stats are write to qstats or ethtool -S */ 4568 bool to_qstat; 4569 4570 /* Used to calculate the offset inside the output buffer. */ 4571 u32 desc_num[3]; 4572 4573 /* The actual supported stat types. */ 4574 u64 bitmap[3]; 4575 4576 /* Used to calculate the reply buffer size. */ 4577 u32 size[3]; 4578 4579 /* Record the output buffer. */ 4580 u64 *data; 4581 }; 4582 4583 static void virtnet_stats_ctx_init(struct virtnet_info *vi, 4584 struct virtnet_stats_ctx *ctx, 4585 u64 *data, bool to_qstat) 4586 { 4587 u32 queue_type; 4588 4589 ctx->data = data; 4590 ctx->to_qstat = to_qstat; 4591 4592 if (to_qstat) { 4593 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4594 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4595 4596 queue_type = VIRTNET_Q_TYPE_RX; 4597 4598 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4599 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4600 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4601 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4602 } 4603 4604 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4605 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4606 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4607 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4608 } 4609 4610 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4611 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_GSO; 4612 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4613 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_gso); 4614 } 4615 4616 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4617 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4618 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4619 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4620 } 4621 4622 queue_type = VIRTNET_Q_TYPE_TX; 4623 4624 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4625 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4626 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4627 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4628 } 4629 4630 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4631 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_CSUM; 4632 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4633 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_csum); 4634 } 4635 4636 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4637 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4638 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4639 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4640 } 4641 4642 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4643 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4644 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4645 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4646 } 4647 4648 return; 4649 } 4650 4651 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc); 4652 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc); 4653 4654 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4655 queue_type = VIRTNET_Q_TYPE_CQ; 4656 4657 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_CVQ; 4658 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_cvq_desc); 4659 ctx->size[queue_type] += sizeof(struct virtio_net_stats_cvq); 4660 } 4661 4662 queue_type = VIRTNET_Q_TYPE_RX; 4663 4664 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4665 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4666 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4667 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4668 } 4669 4670 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4671 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4672 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4673 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4674 } 4675 4676 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4677 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4678 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4679 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4680 } 4681 4682 queue_type = VIRTNET_Q_TYPE_TX; 4683 4684 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4685 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4686 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4687 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4688 } 4689 4690 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4691 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4692 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4693 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4694 } 4695 4696 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4697 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4698 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4699 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4700 } 4701 } 4702 4703 /* stats_sum_queue - Calculate the sum of the same fields in sq or rq. 4704 * @sum: the position to store the sum values 4705 * @num: field num 4706 * @q_value: the first queue fields 4707 * @q_num: number of the queues 4708 */ 4709 static void stats_sum_queue(u64 *sum, u32 num, u64 *q_value, u32 q_num) 4710 { 4711 u32 step = num; 4712 int i, j; 4713 u64 *p; 4714 4715 for (i = 0; i < num; ++i) { 4716 p = sum + i; 4717 *p = 0; 4718 4719 for (j = 0; j < q_num; ++j) 4720 *p += *(q_value + i + j * step); 4721 } 4722 } 4723 4724 static void virtnet_fill_total_fields(struct virtnet_info *vi, 4725 struct virtnet_stats_ctx *ctx) 4726 { 4727 u64 *data, *first_rx_q, *first_tx_q; 4728 u32 num_cq, num_rx, num_tx; 4729 4730 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4731 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4732 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4733 4734 first_rx_q = ctx->data + num_rx + num_tx + num_cq; 4735 first_tx_q = first_rx_q + vi->curr_queue_pairs * num_rx; 4736 4737 data = ctx->data; 4738 4739 stats_sum_queue(data, num_rx, first_rx_q, vi->curr_queue_pairs); 4740 4741 data = ctx->data + num_rx; 4742 4743 stats_sum_queue(data, num_tx, first_tx_q, vi->curr_queue_pairs); 4744 } 4745 4746 static void virtnet_fill_stats_qstat(struct virtnet_info *vi, u32 qid, 4747 struct virtnet_stats_ctx *ctx, 4748 const u8 *base, bool drv_stats, u8 reply_type) 4749 { 4750 const struct virtnet_stat_desc *desc; 4751 const u64_stats_t *v_stat; 4752 u64 offset, bitmap; 4753 const __le64 *v; 4754 u32 queue_type; 4755 int i, num; 4756 4757 queue_type = vq_type(vi, qid); 4758 bitmap = ctx->bitmap[queue_type]; 4759 4760 if (drv_stats) { 4761 if (queue_type == VIRTNET_Q_TYPE_RX) { 4762 desc = &virtnet_rq_stats_desc_qstat[0]; 4763 num = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4764 } else { 4765 desc = &virtnet_sq_stats_desc_qstat[0]; 4766 num = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4767 } 4768 4769 for (i = 0; i < num; ++i) { 4770 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4771 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4772 ctx->data[offset] = u64_stats_read(v_stat); 4773 } 4774 return; 4775 } 4776 4777 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4778 desc = &virtnet_stats_rx_basic_desc_qstat[0]; 4779 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4780 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4781 goto found; 4782 } 4783 4784 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4785 desc = &virtnet_stats_rx_csum_desc_qstat[0]; 4786 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4787 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4788 goto found; 4789 } 4790 4791 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4792 desc = &virtnet_stats_rx_gso_desc_qstat[0]; 4793 num = ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4794 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO) 4795 goto found; 4796 } 4797 4798 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4799 desc = &virtnet_stats_rx_speed_desc_qstat[0]; 4800 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4801 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4802 goto found; 4803 } 4804 4805 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4806 desc = &virtnet_stats_tx_basic_desc_qstat[0]; 4807 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4808 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4809 goto found; 4810 } 4811 4812 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4813 desc = &virtnet_stats_tx_csum_desc_qstat[0]; 4814 num = ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4815 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM) 4816 goto found; 4817 } 4818 4819 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4820 desc = &virtnet_stats_tx_gso_desc_qstat[0]; 4821 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4822 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4823 goto found; 4824 } 4825 4826 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4827 desc = &virtnet_stats_tx_speed_desc_qstat[0]; 4828 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4829 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4830 goto found; 4831 } 4832 4833 return; 4834 4835 found: 4836 for (i = 0; i < num; ++i) { 4837 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4838 v = (const __le64 *)(base + desc[i].offset); 4839 ctx->data[offset] = le64_to_cpu(*v); 4840 } 4841 } 4842 4843 /* virtnet_fill_stats - copy the stats to qstats or ethtool -S 4844 * The stats source is the device or the driver. 4845 * 4846 * @vi: virtio net info 4847 * @qid: the vq id 4848 * @ctx: stats ctx (initiated by virtnet_stats_ctx_init()) 4849 * @base: pointer to the device reply or the driver stats structure. 4850 * @drv_stats: designate the base type (device reply, driver stats) 4851 * @type: the type of the device reply (if drv_stats is true, this must be zero) 4852 */ 4853 static void virtnet_fill_stats(struct virtnet_info *vi, u32 qid, 4854 struct virtnet_stats_ctx *ctx, 4855 const u8 *base, bool drv_stats, u8 reply_type) 4856 { 4857 u32 queue_type, num_rx, num_tx, num_cq; 4858 const struct virtnet_stat_desc *desc; 4859 const u64_stats_t *v_stat; 4860 u64 offset, bitmap; 4861 const __le64 *v; 4862 int i, num; 4863 4864 if (ctx->to_qstat) 4865 return virtnet_fill_stats_qstat(vi, qid, ctx, base, drv_stats, reply_type); 4866 4867 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4868 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4869 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4870 4871 queue_type = vq_type(vi, qid); 4872 bitmap = ctx->bitmap[queue_type]; 4873 4874 /* skip the total fields of pairs */ 4875 offset = num_rx + num_tx; 4876 4877 if (queue_type == VIRTNET_Q_TYPE_TX) { 4878 offset += num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2); 4879 4880 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4881 if (drv_stats) { 4882 desc = &virtnet_sq_stats_desc[0]; 4883 goto drv_stats; 4884 } 4885 4886 offset += num; 4887 4888 } else if (queue_type == VIRTNET_Q_TYPE_RX) { 4889 offset += num_cq + num_rx * (qid / 2); 4890 4891 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4892 if (drv_stats) { 4893 desc = &virtnet_rq_stats_desc[0]; 4894 goto drv_stats; 4895 } 4896 4897 offset += num; 4898 } 4899 4900 if (bitmap & VIRTIO_NET_STATS_TYPE_CVQ) { 4901 desc = &virtnet_stats_cvq_desc[0]; 4902 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4903 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_CVQ) 4904 goto found; 4905 4906 offset += num; 4907 } 4908 4909 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4910 desc = &virtnet_stats_rx_basic_desc[0]; 4911 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4912 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4913 goto found; 4914 4915 offset += num; 4916 } 4917 4918 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4919 desc = &virtnet_stats_rx_csum_desc[0]; 4920 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4921 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4922 goto found; 4923 4924 offset += num; 4925 } 4926 4927 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4928 desc = &virtnet_stats_rx_speed_desc[0]; 4929 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4930 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4931 goto found; 4932 4933 offset += num; 4934 } 4935 4936 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4937 desc = &virtnet_stats_tx_basic_desc[0]; 4938 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4939 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4940 goto found; 4941 4942 offset += num; 4943 } 4944 4945 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4946 desc = &virtnet_stats_tx_gso_desc[0]; 4947 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4948 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4949 goto found; 4950 4951 offset += num; 4952 } 4953 4954 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4955 desc = &virtnet_stats_tx_speed_desc[0]; 4956 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4957 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4958 goto found; 4959 4960 offset += num; 4961 } 4962 4963 return; 4964 4965 found: 4966 for (i = 0; i < num; ++i) { 4967 v = (const __le64 *)(base + desc[i].offset); 4968 ctx->data[offset + i] = le64_to_cpu(*v); 4969 } 4970 4971 return; 4972 4973 drv_stats: 4974 for (i = 0; i < num; ++i) { 4975 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4976 ctx->data[offset + i] = u64_stats_read(v_stat); 4977 } 4978 } 4979 4980 static int __virtnet_get_hw_stats(struct virtnet_info *vi, 4981 struct virtnet_stats_ctx *ctx, 4982 struct virtio_net_ctrl_queue_stats *req, 4983 int req_size, void *reply, int res_size) 4984 { 4985 struct virtio_net_stats_reply_hdr *hdr; 4986 struct scatterlist sgs_in, sgs_out; 4987 void *p; 4988 u32 qid; 4989 int ok; 4990 4991 sg_init_one(&sgs_out, req, req_size); 4992 sg_init_one(&sgs_in, reply, res_size); 4993 4994 ok = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 4995 VIRTIO_NET_CTRL_STATS_GET, 4996 &sgs_out, &sgs_in); 4997 4998 if (!ok) 4999 return ok; 5000 5001 for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) { 5002 hdr = p; 5003 qid = le16_to_cpu(hdr->vq_index); 5004 virtnet_fill_stats(vi, qid, ctx, p, false, hdr->type); 5005 } 5006 5007 return 0; 5008 } 5009 5010 static void virtnet_make_stat_req(struct virtnet_info *vi, 5011 struct virtnet_stats_ctx *ctx, 5012 struct virtio_net_ctrl_queue_stats *req, 5013 int qid, int *idx) 5014 { 5015 int qtype = vq_type(vi, qid); 5016 u64 bitmap = ctx->bitmap[qtype]; 5017 5018 if (!bitmap) 5019 return; 5020 5021 req->stats[*idx].vq_index = cpu_to_le16(qid); 5022 req->stats[*idx].types_bitmap[0] = cpu_to_le64(bitmap); 5023 *idx += 1; 5024 } 5025 5026 /* qid: -1: get stats of all vq. 5027 * > 0: get the stats for the special vq. This must not be cvq. 5028 */ 5029 static int virtnet_get_hw_stats(struct virtnet_info *vi, 5030 struct virtnet_stats_ctx *ctx, int qid) 5031 { 5032 int qnum, i, j, res_size, qtype, last_vq, first_vq; 5033 struct virtio_net_ctrl_queue_stats *req; 5034 bool enable_cvq; 5035 void *reply; 5036 int ok; 5037 5038 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) 5039 return 0; 5040 5041 if (qid == -1) { 5042 last_vq = vi->curr_queue_pairs * 2 - 1; 5043 first_vq = 0; 5044 enable_cvq = true; 5045 } else { 5046 last_vq = qid; 5047 first_vq = qid; 5048 enable_cvq = false; 5049 } 5050 5051 qnum = 0; 5052 res_size = 0; 5053 for (i = first_vq; i <= last_vq ; ++i) { 5054 qtype = vq_type(vi, i); 5055 if (ctx->bitmap[qtype]) { 5056 ++qnum; 5057 res_size += ctx->size[qtype]; 5058 } 5059 } 5060 5061 if (enable_cvq && ctx->bitmap[VIRTNET_Q_TYPE_CQ]) { 5062 res_size += ctx->size[VIRTNET_Q_TYPE_CQ]; 5063 qnum += 1; 5064 } 5065 5066 req = kcalloc(qnum, sizeof(*req), GFP_KERNEL); 5067 if (!req) 5068 return -ENOMEM; 5069 5070 reply = kmalloc(res_size, GFP_KERNEL); 5071 if (!reply) { 5072 kfree(req); 5073 return -ENOMEM; 5074 } 5075 5076 j = 0; 5077 for (i = first_vq; i <= last_vq ; ++i) 5078 virtnet_make_stat_req(vi, ctx, req, i, &j); 5079 5080 if (enable_cvq) 5081 virtnet_make_stat_req(vi, ctx, req, vi->max_queue_pairs * 2, &j); 5082 5083 ok = __virtnet_get_hw_stats(vi, ctx, req, sizeof(*req) * j, reply, res_size); 5084 5085 kfree(req); 5086 kfree(reply); 5087 5088 return ok; 5089 } 5090 5091 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) 5092 { 5093 struct virtnet_info *vi = netdev_priv(dev); 5094 unsigned int i; 5095 u8 *p = data; 5096 5097 switch (stringset) { 5098 case ETH_SS_STATS: 5099 /* Generate the total field names. */ 5100 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, -1, &p); 5101 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, -1, &p); 5102 5103 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_CQ, 0, &p); 5104 5105 for (i = 0; i < vi->curr_queue_pairs; ++i) 5106 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, i, &p); 5107 5108 for (i = 0; i < vi->curr_queue_pairs; ++i) 5109 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, i, &p); 5110 break; 5111 } 5112 } 5113 5114 static int virtnet_get_sset_count(struct net_device *dev, int sset) 5115 { 5116 struct virtnet_info *vi = netdev_priv(dev); 5117 struct virtnet_stats_ctx ctx = {0}; 5118 u32 pair_count; 5119 5120 switch (sset) { 5121 case ETH_SS_STATS: 5122 virtnet_stats_ctx_init(vi, &ctx, NULL, false); 5123 5124 pair_count = ctx.desc_num[VIRTNET_Q_TYPE_RX] + ctx.desc_num[VIRTNET_Q_TYPE_TX]; 5125 5126 return pair_count + ctx.desc_num[VIRTNET_Q_TYPE_CQ] + 5127 vi->curr_queue_pairs * pair_count; 5128 default: 5129 return -EOPNOTSUPP; 5130 } 5131 } 5132 5133 static void virtnet_get_ethtool_stats(struct net_device *dev, 5134 struct ethtool_stats *stats, u64 *data) 5135 { 5136 struct virtnet_info *vi = netdev_priv(dev); 5137 struct virtnet_stats_ctx ctx = {0}; 5138 unsigned int start, i; 5139 const u8 *stats_base; 5140 5141 virtnet_stats_ctx_init(vi, &ctx, data, false); 5142 if (virtnet_get_hw_stats(vi, &ctx, -1)) 5143 dev_warn(&vi->dev->dev, "Failed to get hw stats.\n"); 5144 5145 for (i = 0; i < vi->curr_queue_pairs; i++) { 5146 struct receive_queue *rq = &vi->rq[i]; 5147 struct send_queue *sq = &vi->sq[i]; 5148 5149 stats_base = (const u8 *)&rq->stats; 5150 do { 5151 start = u64_stats_fetch_begin(&rq->stats.syncp); 5152 virtnet_fill_stats(vi, i * 2, &ctx, stats_base, true, 0); 5153 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 5154 5155 stats_base = (const u8 *)&sq->stats; 5156 do { 5157 start = u64_stats_fetch_begin(&sq->stats.syncp); 5158 virtnet_fill_stats(vi, i * 2 + 1, &ctx, stats_base, true, 0); 5159 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 5160 } 5161 5162 virtnet_fill_total_fields(vi, &ctx); 5163 } 5164 5165 static void virtnet_get_channels(struct net_device *dev, 5166 struct ethtool_channels *channels) 5167 { 5168 struct virtnet_info *vi = netdev_priv(dev); 5169 5170 channels->combined_count = vi->curr_queue_pairs; 5171 channels->max_combined = vi->max_queue_pairs; 5172 channels->max_other = 0; 5173 channels->rx_count = 0; 5174 channels->tx_count = 0; 5175 channels->other_count = 0; 5176 } 5177 5178 static int virtnet_set_link_ksettings(struct net_device *dev, 5179 const struct ethtool_link_ksettings *cmd) 5180 { 5181 struct virtnet_info *vi = netdev_priv(dev); 5182 5183 return ethtool_virtdev_set_link_ksettings(dev, cmd, 5184 &vi->speed, &vi->duplex); 5185 } 5186 5187 static int virtnet_get_link_ksettings(struct net_device *dev, 5188 struct ethtool_link_ksettings *cmd) 5189 { 5190 struct virtnet_info *vi = netdev_priv(dev); 5191 5192 cmd->base.speed = vi->speed; 5193 cmd->base.duplex = vi->duplex; 5194 cmd->base.port = PORT_OTHER; 5195 5196 return 0; 5197 } 5198 5199 static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info *vi, 5200 struct ethtool_coalesce *ec) 5201 { 5202 struct virtio_net_ctrl_coal_tx *coal_tx __free(kfree) = NULL; 5203 struct scatterlist sgs_tx; 5204 int i; 5205 5206 coal_tx = kzalloc(sizeof(*coal_tx), GFP_KERNEL); 5207 if (!coal_tx) 5208 return -ENOMEM; 5209 5210 coal_tx->tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); 5211 coal_tx->tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); 5212 sg_init_one(&sgs_tx, coal_tx, sizeof(*coal_tx)); 5213 5214 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 5215 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, 5216 &sgs_tx)) 5217 return -EINVAL; 5218 5219 vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs; 5220 vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames; 5221 for (i = 0; i < vi->max_queue_pairs; i++) { 5222 vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs; 5223 vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames; 5224 } 5225 5226 return 0; 5227 } 5228 5229 static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi, 5230 struct ethtool_coalesce *ec) 5231 { 5232 struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL; 5233 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 5234 struct scatterlist sgs_rx; 5235 int i; 5236 5237 if (rx_ctrl_dim_on && !virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5238 return -EOPNOTSUPP; 5239 5240 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != vi->intr_coal_rx.max_usecs || 5241 ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets)) 5242 return -EINVAL; 5243 5244 if (rx_ctrl_dim_on && !vi->rx_dim_enabled) { 5245 vi->rx_dim_enabled = true; 5246 for (i = 0; i < vi->max_queue_pairs; i++) { 5247 mutex_lock(&vi->rq[i].dim_lock); 5248 vi->rq[i].dim_enabled = true; 5249 mutex_unlock(&vi->rq[i].dim_lock); 5250 } 5251 return 0; 5252 } 5253 5254 coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL); 5255 if (!coal_rx) 5256 return -ENOMEM; 5257 5258 if (!rx_ctrl_dim_on && vi->rx_dim_enabled) { 5259 vi->rx_dim_enabled = false; 5260 for (i = 0; i < vi->max_queue_pairs; i++) { 5261 mutex_lock(&vi->rq[i].dim_lock); 5262 vi->rq[i].dim_enabled = false; 5263 mutex_unlock(&vi->rq[i].dim_lock); 5264 } 5265 } 5266 5267 /* Since the per-queue coalescing params can be set, 5268 * we need apply the global new params even if they 5269 * are not updated. 5270 */ 5271 coal_rx->rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); 5272 coal_rx->rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); 5273 sg_init_one(&sgs_rx, coal_rx, sizeof(*coal_rx)); 5274 5275 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 5276 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, 5277 &sgs_rx)) 5278 return -EINVAL; 5279 5280 vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs; 5281 vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames; 5282 for (i = 0; i < vi->max_queue_pairs; i++) { 5283 mutex_lock(&vi->rq[i].dim_lock); 5284 vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs; 5285 vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames; 5286 mutex_unlock(&vi->rq[i].dim_lock); 5287 } 5288 5289 return 0; 5290 } 5291 5292 static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, 5293 struct ethtool_coalesce *ec) 5294 { 5295 int err; 5296 5297 err = virtnet_send_tx_notf_coal_cmds(vi, ec); 5298 if (err) 5299 return err; 5300 5301 err = virtnet_send_rx_notf_coal_cmds(vi, ec); 5302 if (err) 5303 return err; 5304 5305 return 0; 5306 } 5307 5308 static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi, 5309 struct ethtool_coalesce *ec, 5310 u16 queue) 5311 { 5312 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 5313 u32 max_usecs, max_packets; 5314 bool cur_rx_dim; 5315 int err; 5316 5317 mutex_lock(&vi->rq[queue].dim_lock); 5318 cur_rx_dim = vi->rq[queue].dim_enabled; 5319 max_usecs = vi->rq[queue].intr_coal.max_usecs; 5320 max_packets = vi->rq[queue].intr_coal.max_packets; 5321 5322 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != max_usecs || 5323 ec->rx_max_coalesced_frames != max_packets)) { 5324 mutex_unlock(&vi->rq[queue].dim_lock); 5325 return -EINVAL; 5326 } 5327 5328 if (rx_ctrl_dim_on && !cur_rx_dim) { 5329 vi->rq[queue].dim_enabled = true; 5330 mutex_unlock(&vi->rq[queue].dim_lock); 5331 return 0; 5332 } 5333 5334 if (!rx_ctrl_dim_on && cur_rx_dim) 5335 vi->rq[queue].dim_enabled = false; 5336 5337 /* If no params are updated, userspace ethtool will 5338 * reject the modification. 5339 */ 5340 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue, 5341 ec->rx_coalesce_usecs, 5342 ec->rx_max_coalesced_frames); 5343 mutex_unlock(&vi->rq[queue].dim_lock); 5344 return err; 5345 } 5346 5347 static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi, 5348 struct ethtool_coalesce *ec, 5349 u16 queue) 5350 { 5351 int err; 5352 5353 err = virtnet_send_rx_notf_coal_vq_cmds(vi, ec, queue); 5354 if (err) 5355 return err; 5356 5357 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue, 5358 ec->tx_coalesce_usecs, 5359 ec->tx_max_coalesced_frames); 5360 if (err) 5361 return err; 5362 5363 return 0; 5364 } 5365 5366 static void virtnet_rx_dim_work(struct work_struct *work) 5367 { 5368 struct dim *dim = container_of(work, struct dim, work); 5369 struct receive_queue *rq = container_of(dim, 5370 struct receive_queue, dim); 5371 struct virtnet_info *vi = rq->vq->vdev->priv; 5372 struct net_device *dev = vi->dev; 5373 struct dim_cq_moder update_moder; 5374 int qnum, err; 5375 5376 qnum = rq - vi->rq; 5377 5378 mutex_lock(&rq->dim_lock); 5379 if (!rq->dim_enabled) 5380 goto out; 5381 5382 update_moder = net_dim_get_rx_irq_moder(dev, dim); 5383 if (update_moder.usec != rq->intr_coal.max_usecs || 5384 update_moder.pkts != rq->intr_coal.max_packets) { 5385 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum, 5386 update_moder.usec, 5387 update_moder.pkts); 5388 if (err) 5389 pr_debug("%s: Failed to send dim parameters on rxq%d\n", 5390 dev->name, qnum); 5391 } 5392 out: 5393 dim->state = DIM_START_MEASURE; 5394 mutex_unlock(&rq->dim_lock); 5395 } 5396 5397 static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) 5398 { 5399 /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL 5400 * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated. 5401 */ 5402 if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) 5403 return -EOPNOTSUPP; 5404 5405 if (ec->tx_max_coalesced_frames > 1 || 5406 ec->rx_max_coalesced_frames != 1) 5407 return -EINVAL; 5408 5409 return 0; 5410 } 5411 5412 static int virtnet_should_update_vq_weight(int dev_flags, int weight, 5413 int vq_weight, bool *should_update) 5414 { 5415 if (weight ^ vq_weight) { 5416 if (dev_flags & IFF_UP) 5417 return -EBUSY; 5418 *should_update = true; 5419 } 5420 5421 return 0; 5422 } 5423 5424 static int virtnet_set_coalesce(struct net_device *dev, 5425 struct ethtool_coalesce *ec, 5426 struct kernel_ethtool_coalesce *kernel_coal, 5427 struct netlink_ext_ack *extack) 5428 { 5429 struct virtnet_info *vi = netdev_priv(dev); 5430 int ret, queue_number, napi_weight, i; 5431 bool update_napi = false; 5432 5433 /* Can't change NAPI weight if the link is up */ 5434 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5435 for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) { 5436 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5437 vi->sq[queue_number].napi.weight, 5438 &update_napi); 5439 if (ret) 5440 return ret; 5441 5442 if (update_napi) { 5443 /* All queues that belong to [queue_number, vi->max_queue_pairs] will be 5444 * updated for the sake of simplicity, which might not be necessary 5445 */ 5446 break; 5447 } 5448 } 5449 5450 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) 5451 ret = virtnet_send_notf_coal_cmds(vi, ec); 5452 else 5453 ret = virtnet_coal_params_supported(ec); 5454 5455 if (ret) 5456 return ret; 5457 5458 if (update_napi) { 5459 /* xsk xmit depends on the tx napi. So if xsk is active, 5460 * prevent modifications to tx napi. 5461 */ 5462 for (i = queue_number; i < vi->max_queue_pairs; i++) { 5463 if (vi->sq[i].xsk_pool) 5464 return -EBUSY; 5465 } 5466 5467 for (; queue_number < vi->max_queue_pairs; queue_number++) 5468 vi->sq[queue_number].napi.weight = napi_weight; 5469 } 5470 5471 return ret; 5472 } 5473 5474 static int virtnet_get_coalesce(struct net_device *dev, 5475 struct ethtool_coalesce *ec, 5476 struct kernel_ethtool_coalesce *kernel_coal, 5477 struct netlink_ext_ack *extack) 5478 { 5479 struct virtnet_info *vi = netdev_priv(dev); 5480 5481 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 5482 ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs; 5483 ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs; 5484 ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets; 5485 ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets; 5486 ec->use_adaptive_rx_coalesce = vi->rx_dim_enabled; 5487 } else { 5488 ec->rx_max_coalesced_frames = 1; 5489 5490 if (vi->sq[0].napi.weight) 5491 ec->tx_max_coalesced_frames = 1; 5492 } 5493 5494 return 0; 5495 } 5496 5497 static int virtnet_set_per_queue_coalesce(struct net_device *dev, 5498 u32 queue, 5499 struct ethtool_coalesce *ec) 5500 { 5501 struct virtnet_info *vi = netdev_priv(dev); 5502 int ret, napi_weight; 5503 bool update_napi = false; 5504 5505 if (queue >= vi->max_queue_pairs) 5506 return -EINVAL; 5507 5508 /* Can't change NAPI weight if the link is up */ 5509 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5510 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5511 vi->sq[queue].napi.weight, 5512 &update_napi); 5513 if (ret) 5514 return ret; 5515 5516 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5517 ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue); 5518 else 5519 ret = virtnet_coal_params_supported(ec); 5520 5521 if (ret) 5522 return ret; 5523 5524 if (update_napi) 5525 vi->sq[queue].napi.weight = napi_weight; 5526 5527 return 0; 5528 } 5529 5530 static int virtnet_get_per_queue_coalesce(struct net_device *dev, 5531 u32 queue, 5532 struct ethtool_coalesce *ec) 5533 { 5534 struct virtnet_info *vi = netdev_priv(dev); 5535 5536 if (queue >= vi->max_queue_pairs) 5537 return -EINVAL; 5538 5539 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 5540 mutex_lock(&vi->rq[queue].dim_lock); 5541 ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs; 5542 ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs; 5543 ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets; 5544 ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets; 5545 ec->use_adaptive_rx_coalesce = vi->rq[queue].dim_enabled; 5546 mutex_unlock(&vi->rq[queue].dim_lock); 5547 } else { 5548 ec->rx_max_coalesced_frames = 1; 5549 5550 if (vi->sq[queue].napi.weight) 5551 ec->tx_max_coalesced_frames = 1; 5552 } 5553 5554 return 0; 5555 } 5556 5557 static void virtnet_init_settings(struct net_device *dev) 5558 { 5559 struct virtnet_info *vi = netdev_priv(dev); 5560 5561 vi->speed = SPEED_UNKNOWN; 5562 vi->duplex = DUPLEX_UNKNOWN; 5563 } 5564 5565 static u32 virtnet_get_rxfh_key_size(struct net_device *dev) 5566 { 5567 return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size; 5568 } 5569 5570 static u32 virtnet_get_rxfh_indir_size(struct net_device *dev) 5571 { 5572 return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size; 5573 } 5574 5575 static int virtnet_get_rxfh(struct net_device *dev, 5576 struct ethtool_rxfh_param *rxfh) 5577 { 5578 struct virtnet_info *vi = netdev_priv(dev); 5579 int i; 5580 5581 if (rxfh->indir) { 5582 for (i = 0; i < vi->rss_indir_table_size; ++i) 5583 rxfh->indir[i] = le16_to_cpu(vi->rss_hdr->indirection_table[i]); 5584 } 5585 5586 if (rxfh->key) 5587 memcpy(rxfh->key, vi->rss_hash_key_data, vi->rss_key_size); 5588 5589 rxfh->hfunc = ETH_RSS_HASH_TOP; 5590 5591 return 0; 5592 } 5593 5594 static int virtnet_set_rxfh(struct net_device *dev, 5595 struct ethtool_rxfh_param *rxfh, 5596 struct netlink_ext_ack *extack) 5597 { 5598 struct virtnet_info *vi = netdev_priv(dev); 5599 bool update = false; 5600 int i; 5601 5602 if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && 5603 rxfh->hfunc != ETH_RSS_HASH_TOP) 5604 return -EOPNOTSUPP; 5605 5606 if (rxfh->indir) { 5607 if (!vi->has_rss) 5608 return -EOPNOTSUPP; 5609 5610 for (i = 0; i < vi->rss_indir_table_size; ++i) 5611 vi->rss_hdr->indirection_table[i] = cpu_to_le16(rxfh->indir[i]); 5612 update = true; 5613 } 5614 5615 if (rxfh->key) { 5616 /* If either _F_HASH_REPORT or _F_RSS are negotiated, the 5617 * device provides hash calculation capabilities, that is, 5618 * hash_key is configured. 5619 */ 5620 if (!vi->has_rss && !vi->has_rss_hash_report) 5621 return -EOPNOTSUPP; 5622 5623 memcpy(vi->rss_hash_key_data, rxfh->key, vi->rss_key_size); 5624 update = true; 5625 } 5626 5627 if (update) 5628 virtnet_commit_rss_command(vi); 5629 5630 return 0; 5631 } 5632 5633 static u32 virtnet_get_rx_ring_count(struct net_device *dev) 5634 { 5635 struct virtnet_info *vi = netdev_priv(dev); 5636 5637 return vi->curr_queue_pairs; 5638 } 5639 5640 static const struct ethtool_ops virtnet_ethtool_ops = { 5641 .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | 5642 ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX, 5643 .get_drvinfo = virtnet_get_drvinfo, 5644 .get_link = ethtool_op_get_link, 5645 .get_ringparam = virtnet_get_ringparam, 5646 .set_ringparam = virtnet_set_ringparam, 5647 .get_strings = virtnet_get_strings, 5648 .get_sset_count = virtnet_get_sset_count, 5649 .get_ethtool_stats = virtnet_get_ethtool_stats, 5650 .set_channels = virtnet_set_channels, 5651 .get_channels = virtnet_get_channels, 5652 .get_ts_info = ethtool_op_get_ts_info, 5653 .get_link_ksettings = virtnet_get_link_ksettings, 5654 .set_link_ksettings = virtnet_set_link_ksettings, 5655 .set_coalesce = virtnet_set_coalesce, 5656 .get_coalesce = virtnet_get_coalesce, 5657 .set_per_queue_coalesce = virtnet_set_per_queue_coalesce, 5658 .get_per_queue_coalesce = virtnet_get_per_queue_coalesce, 5659 .get_rxfh_key_size = virtnet_get_rxfh_key_size, 5660 .get_rxfh_indir_size = virtnet_get_rxfh_indir_size, 5661 .get_rxfh = virtnet_get_rxfh, 5662 .set_rxfh = virtnet_set_rxfh, 5663 .get_rxfh_fields = virtnet_get_hashflow, 5664 .set_rxfh_fields = virtnet_set_hashflow, 5665 .get_rx_ring_count = virtnet_get_rx_ring_count, 5666 }; 5667 5668 static void virtnet_get_queue_stats_rx(struct net_device *dev, int i, 5669 struct netdev_queue_stats_rx *stats) 5670 { 5671 struct virtnet_info *vi = netdev_priv(dev); 5672 struct receive_queue *rq = &vi->rq[i]; 5673 struct virtnet_stats_ctx ctx = {0}; 5674 5675 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5676 5677 virtnet_get_hw_stats(vi, &ctx, i * 2); 5678 virtnet_fill_stats(vi, i * 2, &ctx, (void *)&rq->stats, true, 0); 5679 } 5680 5681 static void virtnet_get_queue_stats_tx(struct net_device *dev, int i, 5682 struct netdev_queue_stats_tx *stats) 5683 { 5684 struct virtnet_info *vi = netdev_priv(dev); 5685 struct send_queue *sq = &vi->sq[i]; 5686 struct virtnet_stats_ctx ctx = {0}; 5687 5688 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5689 5690 virtnet_get_hw_stats(vi, &ctx, i * 2 + 1); 5691 virtnet_fill_stats(vi, i * 2 + 1, &ctx, (void *)&sq->stats, true, 0); 5692 } 5693 5694 static void virtnet_get_base_stats(struct net_device *dev, 5695 struct netdev_queue_stats_rx *rx, 5696 struct netdev_queue_stats_tx *tx) 5697 { 5698 struct virtnet_info *vi = netdev_priv(dev); 5699 5700 /* The queue stats of the virtio-net will not be reset. So here we 5701 * return 0. 5702 */ 5703 rx->bytes = 0; 5704 rx->packets = 0; 5705 5706 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 5707 rx->hw_drops = 0; 5708 rx->hw_drop_overruns = 0; 5709 } 5710 5711 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 5712 rx->csum_unnecessary = 0; 5713 rx->csum_none = 0; 5714 rx->csum_bad = 0; 5715 } 5716 5717 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 5718 rx->hw_gro_packets = 0; 5719 rx->hw_gro_bytes = 0; 5720 rx->hw_gro_wire_packets = 0; 5721 rx->hw_gro_wire_bytes = 0; 5722 } 5723 5724 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) 5725 rx->hw_drop_ratelimits = 0; 5726 5727 tx->bytes = 0; 5728 tx->packets = 0; 5729 tx->stop = 0; 5730 tx->wake = 0; 5731 5732 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 5733 tx->hw_drops = 0; 5734 tx->hw_drop_errors = 0; 5735 } 5736 5737 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 5738 tx->csum_none = 0; 5739 tx->needs_csum = 0; 5740 } 5741 5742 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 5743 tx->hw_gso_packets = 0; 5744 tx->hw_gso_bytes = 0; 5745 tx->hw_gso_wire_packets = 0; 5746 tx->hw_gso_wire_bytes = 0; 5747 } 5748 5749 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) 5750 tx->hw_drop_ratelimits = 0; 5751 5752 netdev_stat_queue_sum(dev, 5753 dev->real_num_rx_queues, vi->max_queue_pairs, rx, 5754 dev->real_num_tx_queues, vi->max_queue_pairs, tx); 5755 } 5756 5757 static const struct netdev_stat_ops virtnet_stat_ops = { 5758 .get_queue_stats_rx = virtnet_get_queue_stats_rx, 5759 .get_queue_stats_tx = virtnet_get_queue_stats_tx, 5760 .get_base_stats = virtnet_get_base_stats, 5761 }; 5762 5763 static void virtnet_freeze_down(struct virtio_device *vdev) 5764 { 5765 struct virtnet_info *vi = vdev->priv; 5766 5767 /* Make sure no work handler is accessing the device */ 5768 flush_work(&vi->config_work); 5769 disable_rx_mode_work(vi); 5770 flush_work(&vi->rx_mode_work); 5771 5772 if (netif_running(vi->dev)) { 5773 rtnl_lock(); 5774 virtnet_close(vi->dev); 5775 rtnl_unlock(); 5776 } 5777 5778 netif_tx_lock_bh(vi->dev); 5779 netif_device_detach(vi->dev); 5780 netif_tx_unlock_bh(vi->dev); 5781 } 5782 5783 static int init_vqs(struct virtnet_info *vi); 5784 5785 static int virtnet_restore_up(struct virtio_device *vdev) 5786 { 5787 struct virtnet_info *vi = vdev->priv; 5788 int err; 5789 5790 err = init_vqs(vi); 5791 if (err) 5792 return err; 5793 5794 virtio_device_ready(vdev); 5795 5796 enable_delayed_refill(vi); 5797 enable_rx_mode_work(vi); 5798 5799 if (netif_running(vi->dev)) { 5800 rtnl_lock(); 5801 err = virtnet_open(vi->dev); 5802 rtnl_unlock(); 5803 if (err) 5804 return err; 5805 } 5806 5807 netif_tx_lock_bh(vi->dev); 5808 netif_device_attach(vi->dev); 5809 netif_tx_unlock_bh(vi->dev); 5810 return err; 5811 } 5812 5813 static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) 5814 { 5815 __virtio64 *_offloads __free(kfree) = NULL; 5816 struct scatterlist sg; 5817 5818 _offloads = kzalloc(sizeof(*_offloads), GFP_KERNEL); 5819 if (!_offloads) 5820 return -ENOMEM; 5821 5822 *_offloads = cpu_to_virtio64(vi->vdev, offloads); 5823 5824 sg_init_one(&sg, _offloads, sizeof(*_offloads)); 5825 5826 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, 5827 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) { 5828 dev_warn(&vi->dev->dev, "Fail to set guest offload.\n"); 5829 return -EINVAL; 5830 } 5831 5832 return 0; 5833 } 5834 5835 static int virtnet_clear_guest_offloads(struct virtnet_info *vi) 5836 { 5837 u64 offloads = 0; 5838 5839 if (!vi->guest_offloads) 5840 return 0; 5841 5842 return virtnet_set_guest_offloads(vi, offloads); 5843 } 5844 5845 static int virtnet_restore_guest_offloads(struct virtnet_info *vi) 5846 { 5847 u64 offloads = vi->guest_offloads; 5848 5849 if (!vi->guest_offloads) 5850 return 0; 5851 5852 return virtnet_set_guest_offloads(vi, offloads); 5853 } 5854 5855 static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queue *rq, 5856 struct xsk_buff_pool *pool) 5857 { 5858 int err, qindex; 5859 5860 qindex = rq - vi->rq; 5861 5862 if (pool) { 5863 err = xdp_rxq_info_reg(&rq->xsk_rxq_info, vi->dev, qindex, rq->napi.napi_id); 5864 if (err < 0) 5865 return err; 5866 5867 err = xdp_rxq_info_reg_mem_model(&rq->xsk_rxq_info, 5868 MEM_TYPE_XSK_BUFF_POOL, NULL); 5869 if (err < 0) 5870 goto unreg; 5871 5872 xsk_pool_set_rxq_info(pool, &rq->xsk_rxq_info); 5873 } 5874 5875 virtnet_rx_pause(vi, rq); 5876 5877 err = virtqueue_reset(rq->vq, virtnet_rq_unmap_free_buf, NULL); 5878 if (err) { 5879 netdev_err(vi->dev, "reset rx fail: rx queue index: %d err: %d\n", qindex, err); 5880 5881 pool = NULL; 5882 } 5883 5884 rq->xsk_pool = pool; 5885 5886 virtnet_rx_resume(vi, rq); 5887 5888 if (pool) 5889 return 0; 5890 5891 unreg: 5892 xdp_rxq_info_unreg(&rq->xsk_rxq_info); 5893 return err; 5894 } 5895 5896 static int virtnet_sq_bind_xsk_pool(struct virtnet_info *vi, 5897 struct send_queue *sq, 5898 struct xsk_buff_pool *pool) 5899 { 5900 int err, qindex; 5901 5902 qindex = sq - vi->sq; 5903 5904 virtnet_tx_pause(vi, sq); 5905 5906 err = virtqueue_reset(sq->vq, virtnet_sq_free_unused_buf, 5907 virtnet_sq_free_unused_buf_done); 5908 if (err) { 5909 netdev_err(vi->dev, "reset tx fail: tx queue index: %d err: %d\n", qindex, err); 5910 pool = NULL; 5911 } 5912 5913 sq->xsk_pool = pool; 5914 5915 virtnet_tx_resume(vi, sq); 5916 5917 return err; 5918 } 5919 5920 static int virtnet_xsk_pool_enable(struct net_device *dev, 5921 struct xsk_buff_pool *pool, 5922 u16 qid) 5923 { 5924 struct virtnet_info *vi = netdev_priv(dev); 5925 struct receive_queue *rq; 5926 struct device *dma_dev; 5927 struct send_queue *sq; 5928 dma_addr_t hdr_dma; 5929 int err, size; 5930 5931 if (vi->hdr_len > xsk_pool_get_headroom(pool)) 5932 return -EINVAL; 5933 5934 /* In big_packets mode, xdp cannot work, so there is no need to 5935 * initialize xsk of rq. 5936 */ 5937 if (vi->big_packets && !vi->mergeable_rx_bufs) 5938 return -ENOENT; 5939 5940 if (qid >= vi->curr_queue_pairs) 5941 return -EINVAL; 5942 5943 sq = &vi->sq[qid]; 5944 rq = &vi->rq[qid]; 5945 5946 /* xsk assumes that tx and rx must have the same dma device. The af-xdp 5947 * may use one buffer to receive from the rx and reuse this buffer to 5948 * send by the tx. So the dma dev of sq and rq must be the same one. 5949 * 5950 * But vq->dma_dev allows every vq has the respective dma dev. So I 5951 * check the dma dev of vq and sq is the same dev. 5952 */ 5953 if (virtqueue_dma_dev(rq->vq) != virtqueue_dma_dev(sq->vq)) 5954 return -EINVAL; 5955 5956 dma_dev = virtqueue_dma_dev(rq->vq); 5957 if (!dma_dev) 5958 return -EINVAL; 5959 5960 size = virtqueue_get_vring_size(rq->vq); 5961 5962 rq->xsk_buffs = kvcalloc(size, sizeof(*rq->xsk_buffs), GFP_KERNEL); 5963 if (!rq->xsk_buffs) 5964 return -ENOMEM; 5965 5966 hdr_dma = virtqueue_map_single_attrs(sq->vq, &xsk_hdr, vi->hdr_len, 5967 DMA_TO_DEVICE, 0); 5968 if (virtqueue_map_mapping_error(sq->vq, hdr_dma)) { 5969 err = -ENOMEM; 5970 goto err_free_buffs; 5971 } 5972 5973 err = xsk_pool_dma_map(pool, dma_dev, 0); 5974 if (err) 5975 goto err_xsk_map; 5976 5977 err = virtnet_rq_bind_xsk_pool(vi, rq, pool); 5978 if (err) 5979 goto err_rq; 5980 5981 err = virtnet_sq_bind_xsk_pool(vi, sq, pool); 5982 if (err) 5983 goto err_sq; 5984 5985 /* Now, we do not support tx offload(such as tx csum), so all the tx 5986 * virtnet hdr is zero. So all the tx packets can share a single hdr. 5987 */ 5988 sq->xsk_hdr_dma_addr = hdr_dma; 5989 5990 return 0; 5991 5992 err_sq: 5993 virtnet_rq_bind_xsk_pool(vi, rq, NULL); 5994 err_rq: 5995 xsk_pool_dma_unmap(pool, 0); 5996 err_xsk_map: 5997 virtqueue_unmap_single_attrs(rq->vq, hdr_dma, vi->hdr_len, 5998 DMA_TO_DEVICE, 0); 5999 err_free_buffs: 6000 kvfree(rq->xsk_buffs); 6001 return err; 6002 } 6003 6004 static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid) 6005 { 6006 struct virtnet_info *vi = netdev_priv(dev); 6007 struct xsk_buff_pool *pool; 6008 struct receive_queue *rq; 6009 struct send_queue *sq; 6010 int err; 6011 6012 if (qid >= vi->curr_queue_pairs) 6013 return -EINVAL; 6014 6015 sq = &vi->sq[qid]; 6016 rq = &vi->rq[qid]; 6017 6018 pool = rq->xsk_pool; 6019 6020 err = virtnet_rq_bind_xsk_pool(vi, rq, NULL); 6021 err |= virtnet_sq_bind_xsk_pool(vi, sq, NULL); 6022 6023 xsk_pool_dma_unmap(pool, 0); 6024 6025 virtqueue_unmap_single_attrs(sq->vq, sq->xsk_hdr_dma_addr, 6026 vi->hdr_len, DMA_TO_DEVICE, 0); 6027 kvfree(rq->xsk_buffs); 6028 6029 return err; 6030 } 6031 6032 static int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp) 6033 { 6034 if (xdp->xsk.pool) 6035 return virtnet_xsk_pool_enable(dev, xdp->xsk.pool, 6036 xdp->xsk.queue_id); 6037 else 6038 return virtnet_xsk_pool_disable(dev, xdp->xsk.queue_id); 6039 } 6040 6041 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, 6042 struct netlink_ext_ack *extack) 6043 { 6044 unsigned int room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 6045 sizeof(struct skb_shared_info)); 6046 unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; 6047 struct virtnet_info *vi = netdev_priv(dev); 6048 struct bpf_prog *old_prog; 6049 u16 xdp_qp = 0, curr_qp; 6050 int i, err; 6051 6052 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) 6053 && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 6054 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 6055 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 6056 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 6057 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) || 6058 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) || 6059 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) { 6060 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); 6061 return -EOPNOTSUPP; 6062 } 6063 6064 if (vi->mergeable_rx_bufs && !vi->any_header_sg) { 6065 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required"); 6066 return -EINVAL; 6067 } 6068 6069 if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { 6070 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags"); 6071 netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz); 6072 return -EINVAL; 6073 } 6074 6075 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; 6076 if (prog) 6077 xdp_qp = nr_cpu_ids; 6078 6079 /* XDP requires extra queues for XDP_TX */ 6080 if (curr_qp + xdp_qp > vi->max_queue_pairs) { 6081 netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", 6082 curr_qp + xdp_qp, vi->max_queue_pairs); 6083 xdp_qp = 0; 6084 } 6085 6086 old_prog = rtnl_dereference(vi->rq[0].xdp_prog); 6087 if (!prog && !old_prog) 6088 return 0; 6089 6090 if (prog) 6091 bpf_prog_add(prog, vi->max_queue_pairs - 1); 6092 6093 virtnet_rx_pause_all(vi); 6094 6095 /* Make sure NAPI is not using any XDP TX queues for RX. */ 6096 if (netif_running(dev)) { 6097 for (i = 0; i < vi->max_queue_pairs; i++) 6098 virtnet_napi_tx_disable(&vi->sq[i]); 6099 } 6100 6101 if (!prog) { 6102 for (i = 0; i < vi->max_queue_pairs; i++) { 6103 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 6104 if (i == 0) 6105 virtnet_restore_guest_offloads(vi); 6106 } 6107 synchronize_net(); 6108 } 6109 6110 err = virtnet_set_queues(vi, curr_qp + xdp_qp); 6111 if (err) 6112 goto err; 6113 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); 6114 vi->xdp_queue_pairs = xdp_qp; 6115 6116 if (prog) { 6117 vi->xdp_enabled = true; 6118 for (i = 0; i < vi->max_queue_pairs; i++) { 6119 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 6120 if (i == 0 && !old_prog) 6121 virtnet_clear_guest_offloads(vi); 6122 } 6123 if (!old_prog) 6124 xdp_features_set_redirect_target(dev, true); 6125 } else { 6126 xdp_features_clear_redirect_target(dev); 6127 vi->xdp_enabled = false; 6128 } 6129 6130 virtnet_rx_resume_all(vi); 6131 for (i = 0; i < vi->max_queue_pairs; i++) { 6132 if (old_prog) 6133 bpf_prog_put(old_prog); 6134 if (netif_running(dev)) 6135 virtnet_napi_tx_enable(&vi->sq[i]); 6136 } 6137 6138 return 0; 6139 6140 err: 6141 if (!prog) { 6142 virtnet_clear_guest_offloads(vi); 6143 for (i = 0; i < vi->max_queue_pairs; i++) 6144 rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); 6145 } 6146 6147 virtnet_rx_resume_all(vi); 6148 if (netif_running(dev)) { 6149 for (i = 0; i < vi->max_queue_pairs; i++) 6150 virtnet_napi_tx_enable(&vi->sq[i]); 6151 } 6152 if (prog) 6153 bpf_prog_sub(prog, vi->max_queue_pairs - 1); 6154 return err; 6155 } 6156 6157 static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) 6158 { 6159 switch (xdp->command) { 6160 case XDP_SETUP_PROG: 6161 return virtnet_xdp_set(dev, xdp->prog, xdp->extack); 6162 case XDP_SETUP_XSK_POOL: 6163 return virtnet_xsk_pool_setup(dev, xdp); 6164 default: 6165 return -EINVAL; 6166 } 6167 } 6168 6169 static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, 6170 size_t len) 6171 { 6172 struct virtnet_info *vi = netdev_priv(dev); 6173 int ret; 6174 6175 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 6176 return -EOPNOTSUPP; 6177 6178 ret = snprintf(buf, len, "sby"); 6179 if (ret >= len) 6180 return -EOPNOTSUPP; 6181 6182 return 0; 6183 } 6184 6185 static int virtnet_set_features(struct net_device *dev, 6186 netdev_features_t features) 6187 { 6188 struct virtnet_info *vi = netdev_priv(dev); 6189 u64 offloads; 6190 int err; 6191 6192 if ((dev->features ^ features) & NETIF_F_GRO_HW) { 6193 if (vi->xdp_enabled) 6194 return -EBUSY; 6195 6196 if (features & NETIF_F_GRO_HW) 6197 offloads = vi->guest_offloads_capable; 6198 else 6199 offloads = vi->guest_offloads_capable & 6200 ~GUEST_OFFLOAD_GRO_HW_MASK; 6201 6202 err = virtnet_set_guest_offloads(vi, offloads); 6203 if (err) 6204 return err; 6205 vi->guest_offloads = offloads; 6206 } 6207 6208 if ((dev->features ^ features) & NETIF_F_RXHASH) { 6209 if (features & NETIF_F_RXHASH) 6210 vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_saved); 6211 else 6212 vi->rss_hdr->hash_types = cpu_to_le32(VIRTIO_NET_HASH_REPORT_NONE); 6213 6214 if (!virtnet_commit_rss_command(vi)) 6215 return -EINVAL; 6216 } 6217 6218 return 0; 6219 } 6220 6221 static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) 6222 { 6223 struct virtnet_info *priv = netdev_priv(dev); 6224 struct send_queue *sq = &priv->sq[txqueue]; 6225 struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue); 6226 6227 u64_stats_update_begin(&sq->stats.syncp); 6228 u64_stats_inc(&sq->stats.tx_timeouts); 6229 u64_stats_update_end(&sq->stats.syncp); 6230 6231 netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n", 6232 txqueue, sq->name, sq->vq->index, sq->vq->name, 6233 jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start))); 6234 } 6235 6236 static int virtnet_init_irq_moder(struct virtnet_info *vi) 6237 { 6238 u8 profile_flags = 0, coal_flags = 0; 6239 int ret, i; 6240 6241 profile_flags |= DIM_PROFILE_RX; 6242 coal_flags |= DIM_COALESCE_USEC | DIM_COALESCE_PKTS; 6243 ret = net_dim_init_irq_moder(vi->dev, profile_flags, coal_flags, 6244 DIM_CQ_PERIOD_MODE_START_FROM_EQE, 6245 0, virtnet_rx_dim_work, NULL); 6246 6247 if (ret) 6248 return ret; 6249 6250 for (i = 0; i < vi->max_queue_pairs; i++) 6251 net_dim_setting(vi->dev, &vi->rq[i].dim, false); 6252 6253 return 0; 6254 } 6255 6256 static void virtnet_free_irq_moder(struct virtnet_info *vi) 6257 { 6258 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 6259 return; 6260 6261 rtnl_lock(); 6262 net_dim_free_irq_moder(vi->dev); 6263 rtnl_unlock(); 6264 } 6265 6266 static const struct net_device_ops virtnet_netdev = { 6267 .ndo_open = virtnet_open, 6268 .ndo_stop = virtnet_close, 6269 .ndo_start_xmit = start_xmit, 6270 .ndo_validate_addr = eth_validate_addr, 6271 .ndo_set_mac_address = virtnet_set_mac_address, 6272 .ndo_set_rx_mode = virtnet_set_rx_mode, 6273 .ndo_get_stats64 = virtnet_stats, 6274 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, 6275 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, 6276 .ndo_bpf = virtnet_xdp, 6277 .ndo_xdp_xmit = virtnet_xdp_xmit, 6278 .ndo_xsk_wakeup = virtnet_xsk_wakeup, 6279 .ndo_features_check = passthru_features_check, 6280 .ndo_get_phys_port_name = virtnet_get_phys_port_name, 6281 .ndo_set_features = virtnet_set_features, 6282 .ndo_tx_timeout = virtnet_tx_timeout, 6283 }; 6284 6285 static void virtnet_config_changed_work(struct work_struct *work) 6286 { 6287 struct virtnet_info *vi = 6288 container_of(work, struct virtnet_info, config_work); 6289 u16 v; 6290 6291 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, 6292 struct virtio_net_config, status, &v) < 0) 6293 return; 6294 6295 if (v & VIRTIO_NET_S_ANNOUNCE) { 6296 netdev_notify_peers(vi->dev); 6297 virtnet_ack_link_announce(vi); 6298 } 6299 6300 /* Ignore unknown (future) status bits */ 6301 v &= VIRTIO_NET_S_LINK_UP; 6302 6303 if (vi->status == v) 6304 return; 6305 6306 vi->status = v; 6307 6308 if (vi->status & VIRTIO_NET_S_LINK_UP) { 6309 virtnet_update_settings(vi); 6310 netif_carrier_on(vi->dev); 6311 netif_tx_wake_all_queues(vi->dev); 6312 } else { 6313 netif_carrier_off(vi->dev); 6314 netif_tx_stop_all_queues(vi->dev); 6315 } 6316 } 6317 6318 static void virtnet_config_changed(struct virtio_device *vdev) 6319 { 6320 struct virtnet_info *vi = vdev->priv; 6321 6322 schedule_work(&vi->config_work); 6323 } 6324 6325 static void virtnet_free_queues(struct virtnet_info *vi) 6326 { 6327 int i; 6328 6329 for (i = 0; i < vi->max_queue_pairs; i++) { 6330 __netif_napi_del(&vi->rq[i].napi); 6331 __netif_napi_del(&vi->sq[i].napi); 6332 } 6333 6334 /* We called __netif_napi_del(), 6335 * we need to respect an RCU grace period before freeing vi->rq 6336 */ 6337 synchronize_net(); 6338 6339 kfree(vi->rq); 6340 kfree(vi->sq); 6341 kfree(vi->ctrl); 6342 } 6343 6344 static void _free_receive_bufs(struct virtnet_info *vi) 6345 { 6346 struct bpf_prog *old_prog; 6347 int i; 6348 6349 for (i = 0; i < vi->max_queue_pairs; i++) { 6350 while (vi->rq[i].pages) 6351 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); 6352 6353 old_prog = rtnl_dereference(vi->rq[i].xdp_prog); 6354 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); 6355 if (old_prog) 6356 bpf_prog_put(old_prog); 6357 } 6358 } 6359 6360 static void free_receive_bufs(struct virtnet_info *vi) 6361 { 6362 rtnl_lock(); 6363 _free_receive_bufs(vi); 6364 rtnl_unlock(); 6365 } 6366 6367 static void free_receive_page_frags(struct virtnet_info *vi) 6368 { 6369 int i; 6370 for (i = 0; i < vi->max_queue_pairs; i++) 6371 if (vi->rq[i].alloc_frag.page) { 6372 if (vi->rq[i].last_dma) 6373 virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0); 6374 put_page(vi->rq[i].alloc_frag.page); 6375 } 6376 } 6377 6378 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) 6379 { 6380 struct virtnet_info *vi = vq->vdev->priv; 6381 struct send_queue *sq; 6382 int i = vq2txq(vq); 6383 6384 sq = &vi->sq[i]; 6385 6386 switch (virtnet_xmit_ptr_unpack(&buf)) { 6387 case VIRTNET_XMIT_TYPE_SKB: 6388 case VIRTNET_XMIT_TYPE_SKB_ORPHAN: 6389 dev_kfree_skb(buf); 6390 break; 6391 6392 case VIRTNET_XMIT_TYPE_XDP: 6393 xdp_return_frame(buf); 6394 break; 6395 6396 case VIRTNET_XMIT_TYPE_XSK: 6397 xsk_tx_completed(sq->xsk_pool, 1); 6398 break; 6399 } 6400 } 6401 6402 static void virtnet_sq_free_unused_buf_done(struct virtqueue *vq) 6403 { 6404 struct virtnet_info *vi = vq->vdev->priv; 6405 int i = vq2txq(vq); 6406 6407 netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, i)); 6408 } 6409 6410 static void free_unused_bufs(struct virtnet_info *vi) 6411 { 6412 void *buf; 6413 int i; 6414 6415 for (i = 0; i < vi->max_queue_pairs; i++) { 6416 struct virtqueue *vq = vi->sq[i].vq; 6417 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 6418 virtnet_sq_free_unused_buf(vq, buf); 6419 cond_resched(); 6420 } 6421 6422 for (i = 0; i < vi->max_queue_pairs; i++) { 6423 struct virtqueue *vq = vi->rq[i].vq; 6424 6425 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 6426 virtnet_rq_unmap_free_buf(vq, buf); 6427 cond_resched(); 6428 } 6429 } 6430 6431 static void virtnet_del_vqs(struct virtnet_info *vi) 6432 { 6433 struct virtio_device *vdev = vi->vdev; 6434 6435 virtnet_clean_affinity(vi); 6436 6437 vdev->config->del_vqs(vdev); 6438 6439 virtnet_free_queues(vi); 6440 } 6441 6442 /* How large should a single buffer be so a queue full of these can fit at 6443 * least one full packet? 6444 * Logic below assumes the mergeable buffer header is used. 6445 */ 6446 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) 6447 { 6448 const unsigned int hdr_len = vi->hdr_len; 6449 unsigned int rq_size = virtqueue_get_vring_size(vq); 6450 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; 6451 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; 6452 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size); 6453 6454 return max(max(min_buf_len, hdr_len) - hdr_len, 6455 (unsigned int)GOOD_PACKET_LEN); 6456 } 6457 6458 static int virtnet_find_vqs(struct virtnet_info *vi) 6459 { 6460 struct virtqueue_info *vqs_info; 6461 struct virtqueue **vqs; 6462 int ret = -ENOMEM; 6463 int total_vqs; 6464 bool *ctx; 6465 u16 i; 6466 6467 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by 6468 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by 6469 * possible control vq. 6470 */ 6471 total_vqs = vi->max_queue_pairs * 2 + 6472 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); 6473 6474 /* Allocate space for find_vqs parameters */ 6475 vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL); 6476 if (!vqs) 6477 goto err_vq; 6478 vqs_info = kcalloc(total_vqs, sizeof(*vqs_info), GFP_KERNEL); 6479 if (!vqs_info) 6480 goto err_vqs_info; 6481 if (!vi->big_packets || vi->mergeable_rx_bufs) { 6482 ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL); 6483 if (!ctx) 6484 goto err_ctx; 6485 } else { 6486 ctx = NULL; 6487 } 6488 6489 /* Parameters for control virtqueue, if any */ 6490 if (vi->has_cvq) { 6491 vqs_info[total_vqs - 1].name = "control"; 6492 } 6493 6494 /* Allocate/initialize parameters for send/receive virtqueues */ 6495 for (i = 0; i < vi->max_queue_pairs; i++) { 6496 vqs_info[rxq2vq(i)].callback = skb_recv_done; 6497 vqs_info[txq2vq(i)].callback = skb_xmit_done; 6498 sprintf(vi->rq[i].name, "input.%u", i); 6499 sprintf(vi->sq[i].name, "output.%u", i); 6500 vqs_info[rxq2vq(i)].name = vi->rq[i].name; 6501 vqs_info[txq2vq(i)].name = vi->sq[i].name; 6502 if (ctx) 6503 vqs_info[rxq2vq(i)].ctx = true; 6504 } 6505 6506 ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, vqs_info, NULL); 6507 if (ret) 6508 goto err_find; 6509 6510 if (vi->has_cvq) { 6511 vi->cvq = vqs[total_vqs - 1]; 6512 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) 6513 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 6514 } 6515 6516 for (i = 0; i < vi->max_queue_pairs; i++) { 6517 vi->rq[i].vq = vqs[rxq2vq(i)]; 6518 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq); 6519 vi->sq[i].vq = vqs[txq2vq(i)]; 6520 } 6521 6522 /* run here: ret == 0. */ 6523 6524 6525 err_find: 6526 kfree(ctx); 6527 err_ctx: 6528 kfree(vqs_info); 6529 err_vqs_info: 6530 kfree(vqs); 6531 err_vq: 6532 return ret; 6533 } 6534 6535 static int virtnet_alloc_queues(struct virtnet_info *vi) 6536 { 6537 int i; 6538 6539 if (vi->has_cvq) { 6540 vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); 6541 if (!vi->ctrl) 6542 goto err_ctrl; 6543 } else { 6544 vi->ctrl = NULL; 6545 } 6546 vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL); 6547 if (!vi->sq) 6548 goto err_sq; 6549 vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL); 6550 if (!vi->rq) 6551 goto err_rq; 6552 6553 INIT_DELAYED_WORK(&vi->refill, refill_work); 6554 for (i = 0; i < vi->max_queue_pairs; i++) { 6555 vi->rq[i].pages = NULL; 6556 netif_napi_add_config(vi->dev, &vi->rq[i].napi, virtnet_poll, 6557 i); 6558 vi->rq[i].napi.weight = napi_weight; 6559 netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi, 6560 virtnet_poll_tx, 6561 napi_tx ? napi_weight : 0); 6562 6563 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); 6564 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); 6565 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); 6566 6567 u64_stats_init(&vi->rq[i].stats.syncp); 6568 u64_stats_init(&vi->sq[i].stats.syncp); 6569 mutex_init(&vi->rq[i].dim_lock); 6570 } 6571 6572 return 0; 6573 6574 err_rq: 6575 kfree(vi->sq); 6576 err_sq: 6577 kfree(vi->ctrl); 6578 err_ctrl: 6579 return -ENOMEM; 6580 } 6581 6582 static int init_vqs(struct virtnet_info *vi) 6583 { 6584 int ret; 6585 6586 /* Allocate send & receive queues */ 6587 ret = virtnet_alloc_queues(vi); 6588 if (ret) 6589 goto err; 6590 6591 ret = virtnet_find_vqs(vi); 6592 if (ret) 6593 goto err_free; 6594 6595 cpus_read_lock(); 6596 virtnet_set_affinity(vi); 6597 cpus_read_unlock(); 6598 6599 return 0; 6600 6601 err_free: 6602 virtnet_free_queues(vi); 6603 err: 6604 return ret; 6605 } 6606 6607 #ifdef CONFIG_SYSFS 6608 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, 6609 char *buf) 6610 { 6611 struct virtnet_info *vi = netdev_priv(queue->dev); 6612 unsigned int queue_index = get_netdev_rx_queue_index(queue); 6613 unsigned int headroom = virtnet_get_headroom(vi); 6614 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 6615 struct ewma_pkt_len *avg; 6616 6617 BUG_ON(queue_index >= vi->max_queue_pairs); 6618 avg = &vi->rq[queue_index].mrg_avg_pkt_len; 6619 return sprintf(buf, "%u\n", 6620 get_mergeable_buf_len(&vi->rq[queue_index], avg, 6621 SKB_DATA_ALIGN(headroom + tailroom))); 6622 } 6623 6624 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = 6625 __ATTR_RO(mergeable_rx_buffer_size); 6626 6627 static struct attribute *virtio_net_mrg_rx_attrs[] = { 6628 &mergeable_rx_buffer_size_attribute.attr, 6629 NULL 6630 }; 6631 6632 static const struct attribute_group virtio_net_mrg_rx_group = { 6633 .name = "virtio_net", 6634 .attrs = virtio_net_mrg_rx_attrs 6635 }; 6636 #endif 6637 6638 static bool virtnet_fail_on_feature(struct virtio_device *vdev, 6639 unsigned int fbit, 6640 const char *fname, const char *dname) 6641 { 6642 if (!virtio_has_feature(vdev, fbit)) 6643 return false; 6644 6645 dev_err(&vdev->dev, "device advertises feature %s but not %s", 6646 fname, dname); 6647 6648 return true; 6649 } 6650 6651 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ 6652 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) 6653 6654 static bool virtnet_validate_features(struct virtio_device *vdev) 6655 { 6656 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && 6657 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, 6658 "VIRTIO_NET_F_CTRL_VQ") || 6659 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, 6660 "VIRTIO_NET_F_CTRL_VQ") || 6661 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, 6662 "VIRTIO_NET_F_CTRL_VQ") || 6663 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || 6664 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, 6665 "VIRTIO_NET_F_CTRL_VQ") || 6666 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, 6667 "VIRTIO_NET_F_CTRL_VQ") || 6668 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, 6669 "VIRTIO_NET_F_CTRL_VQ") || 6670 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, 6671 "VIRTIO_NET_F_CTRL_VQ") || 6672 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL, 6673 "VIRTIO_NET_F_CTRL_VQ"))) { 6674 return false; 6675 } 6676 6677 return true; 6678 } 6679 6680 #define MIN_MTU ETH_MIN_MTU 6681 #define MAX_MTU ETH_MAX_MTU 6682 6683 static int virtnet_validate(struct virtio_device *vdev) 6684 { 6685 if (!vdev->config->get) { 6686 dev_err(&vdev->dev, "%s failure: config access disabled\n", 6687 __func__); 6688 return -EINVAL; 6689 } 6690 6691 if (!virtnet_validate_features(vdev)) 6692 return -EINVAL; 6693 6694 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6695 int mtu = virtio_cread16(vdev, 6696 offsetof(struct virtio_net_config, 6697 mtu)); 6698 if (mtu < MIN_MTU) 6699 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); 6700 } 6701 6702 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) && 6703 !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6704 dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby"); 6705 __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY); 6706 } 6707 6708 return 0; 6709 } 6710 6711 static bool virtnet_check_guest_gso(const struct virtnet_info *vi) 6712 { 6713 return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 6714 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 6715 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 6716 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 6717 (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) && 6718 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6)); 6719 } 6720 6721 static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu) 6722 { 6723 bool guest_gso = virtnet_check_guest_gso(vi); 6724 6725 /* If device can receive ANY guest GSO packets, regardless of mtu, 6726 * allocate packets of maximum size, otherwise limit it to only 6727 * mtu size worth only. 6728 */ 6729 if (mtu > ETH_DATA_LEN || guest_gso) { 6730 vi->big_packets = true; 6731 vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE); 6732 } 6733 } 6734 6735 #define VIRTIO_NET_HASH_REPORT_MAX_TABLE 10 6736 static enum xdp_rss_hash_type 6737 virtnet_xdp_rss_type[VIRTIO_NET_HASH_REPORT_MAX_TABLE] = { 6738 [VIRTIO_NET_HASH_REPORT_NONE] = XDP_RSS_TYPE_NONE, 6739 [VIRTIO_NET_HASH_REPORT_IPv4] = XDP_RSS_TYPE_L3_IPV4, 6740 [VIRTIO_NET_HASH_REPORT_TCPv4] = XDP_RSS_TYPE_L4_IPV4_TCP, 6741 [VIRTIO_NET_HASH_REPORT_UDPv4] = XDP_RSS_TYPE_L4_IPV4_UDP, 6742 [VIRTIO_NET_HASH_REPORT_IPv6] = XDP_RSS_TYPE_L3_IPV6, 6743 [VIRTIO_NET_HASH_REPORT_TCPv6] = XDP_RSS_TYPE_L4_IPV6_TCP, 6744 [VIRTIO_NET_HASH_REPORT_UDPv6] = XDP_RSS_TYPE_L4_IPV6_UDP, 6745 [VIRTIO_NET_HASH_REPORT_IPv6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, 6746 [VIRTIO_NET_HASH_REPORT_TCPv6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, 6747 [VIRTIO_NET_HASH_REPORT_UDPv6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX 6748 }; 6749 6750 static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, 6751 enum xdp_rss_hash_type *rss_type) 6752 { 6753 const struct xdp_buff *xdp = (void *)_ctx; 6754 struct virtio_net_hdr_v1_hash *hdr_hash; 6755 struct virtnet_info *vi; 6756 u16 hash_report; 6757 6758 if (!(xdp->rxq->dev->features & NETIF_F_RXHASH)) 6759 return -ENODATA; 6760 6761 vi = netdev_priv(xdp->rxq->dev); 6762 hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len); 6763 hash_report = __le16_to_cpu(hdr_hash->hash_report); 6764 6765 if (hash_report >= VIRTIO_NET_HASH_REPORT_MAX_TABLE) 6766 hash_report = VIRTIO_NET_HASH_REPORT_NONE; 6767 6768 *rss_type = virtnet_xdp_rss_type[hash_report]; 6769 *hash = virtio_net_hash_value(hdr_hash); 6770 return 0; 6771 } 6772 6773 static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = { 6774 .xmo_rx_hash = virtnet_xdp_rx_hash, 6775 }; 6776 6777 static int virtnet_probe(struct virtio_device *vdev) 6778 { 6779 int i, err = -ENOMEM; 6780 struct net_device *dev; 6781 struct virtnet_info *vi; 6782 u16 max_queue_pairs; 6783 int mtu = 0; 6784 6785 /* Find if host supports multiqueue/rss virtio_net device */ 6786 max_queue_pairs = 1; 6787 if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) 6788 max_queue_pairs = 6789 virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); 6790 6791 /* We need at least 2 queue's */ 6792 if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 6793 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 6794 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6795 max_queue_pairs = 1; 6796 6797 /* Allocate ourselves a network device with room for our info */ 6798 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); 6799 if (!dev) 6800 return -ENOMEM; 6801 6802 /* Set up network device as normal. */ 6803 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | 6804 IFF_TX_SKB_NO_LINEAR; 6805 dev->netdev_ops = &virtnet_netdev; 6806 dev->stat_ops = &virtnet_stat_ops; 6807 dev->features = NETIF_F_HIGHDMA; 6808 6809 dev->ethtool_ops = &virtnet_ethtool_ops; 6810 SET_NETDEV_DEV(dev, &vdev->dev); 6811 6812 /* Do we support "hardware" checksums? */ 6813 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { 6814 /* This opens up the world of extra features. */ 6815 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6816 if (csum) 6817 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6818 6819 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { 6820 dev->hw_features |= NETIF_F_TSO 6821 | NETIF_F_TSO_ECN | NETIF_F_TSO6; 6822 } 6823 /* Individual feature bits: what can host handle? */ 6824 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4)) 6825 dev->hw_features |= NETIF_F_TSO; 6826 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6)) 6827 dev->hw_features |= NETIF_F_TSO6; 6828 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) 6829 dev->hw_features |= NETIF_F_TSO_ECN; 6830 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO)) 6831 dev->hw_features |= NETIF_F_GSO_UDP_L4; 6832 6833 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO)) { 6834 dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; 6835 dev->hw_enc_features = dev->hw_features; 6836 } 6837 if (dev->hw_features & NETIF_F_GSO_UDP_TUNNEL && 6838 virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM)) { 6839 dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; 6840 dev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; 6841 } 6842 6843 dev->features |= NETIF_F_GSO_ROBUST; 6844 6845 if (gso) 6846 dev->features |= dev->hw_features; 6847 /* (!csum && gso) case will be fixed by register_netdev() */ 6848 } 6849 6850 /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't 6851 * need to calculate checksums for partially checksummed packets, 6852 * as they're considered valid by the upper layer. 6853 * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only 6854 * receives fully checksummed packets. The device may assist in 6855 * validating these packets' checksums, so the driver won't have to. 6856 */ 6857 dev->features |= NETIF_F_RXCSUM; 6858 6859 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || 6860 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) 6861 dev->features |= NETIF_F_GRO_HW; 6862 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) 6863 dev->hw_features |= NETIF_F_GRO_HW; 6864 6865 dev->vlan_features = dev->features; 6866 dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 6867 NETDEV_XDP_ACT_XSK_ZEROCOPY; 6868 6869 /* MTU range: 68 - 65535 */ 6870 dev->min_mtu = MIN_MTU; 6871 dev->max_mtu = MAX_MTU; 6872 6873 /* Configuration may specify what MAC to use. Otherwise random. */ 6874 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6875 u8 addr[ETH_ALEN]; 6876 6877 virtio_cread_bytes(vdev, 6878 offsetof(struct virtio_net_config, mac), 6879 addr, ETH_ALEN); 6880 eth_hw_addr_set(dev, addr); 6881 } else { 6882 eth_hw_addr_random(dev); 6883 dev_info(&vdev->dev, "Assigned random MAC address %pM\n", 6884 dev->dev_addr); 6885 } 6886 6887 /* Set up our device-specific information */ 6888 vi = netdev_priv(dev); 6889 vi->dev = dev; 6890 vi->vdev = vdev; 6891 vdev->priv = vi; 6892 6893 INIT_WORK(&vi->config_work, virtnet_config_changed_work); 6894 INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work); 6895 spin_lock_init(&vi->refill_lock); 6896 6897 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { 6898 vi->mergeable_rx_bufs = true; 6899 dev->xdp_features |= NETDEV_XDP_ACT_RX_SG; 6900 } 6901 6902 if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) 6903 vi->has_rss_hash_report = true; 6904 6905 if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) { 6906 vi->has_rss = true; 6907 6908 vi->rss_indir_table_size = 6909 virtio_cread16(vdev, offsetof(struct virtio_net_config, 6910 rss_max_indirection_table_length)); 6911 } 6912 vi->rss_hdr = devm_kzalloc(&vdev->dev, virtnet_rss_hdr_size(vi), GFP_KERNEL); 6913 if (!vi->rss_hdr) { 6914 err = -ENOMEM; 6915 goto free; 6916 } 6917 6918 if (vi->has_rss || vi->has_rss_hash_report) { 6919 vi->rss_key_size = 6920 virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); 6921 if (vi->rss_key_size > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 6922 dev_err(&vdev->dev, "rss_max_key_size=%u exceeds the limit %u.\n", 6923 vi->rss_key_size, VIRTIO_NET_RSS_MAX_KEY_SIZE); 6924 err = -EINVAL; 6925 goto free; 6926 } 6927 6928 vi->rss_hash_types_supported = 6929 virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); 6930 vi->rss_hash_types_supported &= 6931 ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX | 6932 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 6933 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX); 6934 6935 dev->hw_features |= NETIF_F_RXHASH; 6936 dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops; 6937 } 6938 6939 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO) || 6940 virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO)) 6941 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash_tunnel); 6942 else if (vi->has_rss_hash_report) 6943 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); 6944 else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || 6945 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6946 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 6947 else 6948 vi->hdr_len = sizeof(struct virtio_net_hdr); 6949 6950 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM)) 6951 vi->rx_tnl_csum = true; 6952 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO)) 6953 vi->rx_tnl = true; 6954 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO)) 6955 vi->tx_tnl = true; 6956 6957 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || 6958 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6959 vi->any_header_sg = true; 6960 6961 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6962 vi->has_cvq = true; 6963 6964 mutex_init(&vi->cvq_lock); 6965 6966 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6967 mtu = virtio_cread16(vdev, 6968 offsetof(struct virtio_net_config, 6969 mtu)); 6970 if (mtu < dev->min_mtu) { 6971 /* Should never trigger: MTU was previously validated 6972 * in virtnet_validate. 6973 */ 6974 dev_err(&vdev->dev, 6975 "device MTU appears to have changed it is now %d < %d", 6976 mtu, dev->min_mtu); 6977 err = -EINVAL; 6978 goto free; 6979 } 6980 6981 dev->mtu = mtu; 6982 dev->max_mtu = mtu; 6983 } 6984 6985 virtnet_set_big_packets(vi, mtu); 6986 6987 if (vi->any_header_sg) 6988 dev->needed_headroom = vi->hdr_len; 6989 6990 /* Enable multiqueue by default */ 6991 if (num_online_cpus() >= max_queue_pairs) 6992 vi->curr_queue_pairs = max_queue_pairs; 6993 else 6994 vi->curr_queue_pairs = num_online_cpus(); 6995 vi->max_queue_pairs = max_queue_pairs; 6996 6997 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ 6998 err = init_vqs(vi); 6999 if (err) 7000 goto free; 7001 7002 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 7003 vi->intr_coal_rx.max_usecs = 0; 7004 vi->intr_coal_tx.max_usecs = 0; 7005 vi->intr_coal_rx.max_packets = 0; 7006 7007 /* Keep the default values of the coalescing parameters 7008 * aligned with the default napi_tx state. 7009 */ 7010 if (vi->sq[0].napi.weight) 7011 vi->intr_coal_tx.max_packets = 1; 7012 else 7013 vi->intr_coal_tx.max_packets = 0; 7014 } 7015 7016 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 7017 /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */ 7018 for (i = 0; i < vi->max_queue_pairs; i++) 7019 if (vi->sq[i].napi.weight) 7020 vi->sq[i].intr_coal.max_packets = 1; 7021 7022 err = virtnet_init_irq_moder(vi); 7023 if (err) 7024 goto free; 7025 } 7026 7027 #ifdef CONFIG_SYSFS 7028 if (vi->mergeable_rx_bufs) 7029 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; 7030 #endif 7031 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); 7032 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); 7033 7034 virtnet_init_settings(dev); 7035 7036 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 7037 vi->failover = net_failover_create(vi->dev); 7038 if (IS_ERR(vi->failover)) { 7039 err = PTR_ERR(vi->failover); 7040 goto free_vqs; 7041 } 7042 } 7043 7044 if (vi->has_rss || vi->has_rss_hash_report) 7045 virtnet_init_default_rss(vi); 7046 7047 enable_rx_mode_work(vi); 7048 7049 /* serialize netdev register + virtio_device_ready() with ndo_open() */ 7050 rtnl_lock(); 7051 7052 err = register_netdevice(dev); 7053 if (err) { 7054 pr_debug("virtio_net: registering device failed\n"); 7055 rtnl_unlock(); 7056 goto free_failover; 7057 } 7058 7059 /* Disable config change notification until ndo_open. */ 7060 virtio_config_driver_disable(vi->vdev); 7061 7062 virtio_device_ready(vdev); 7063 7064 if (vi->has_rss || vi->has_rss_hash_report) { 7065 if (!virtnet_commit_rss_command(vi)) { 7066 dev_warn(&vdev->dev, "RSS disabled because committing failed.\n"); 7067 dev->hw_features &= ~NETIF_F_RXHASH; 7068 vi->has_rss_hash_report = false; 7069 vi->has_rss = false; 7070 } 7071 } 7072 7073 virtnet_set_queues(vi, vi->curr_queue_pairs); 7074 7075 /* a random MAC address has been assigned, notify the device. 7076 * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there 7077 * because many devices work fine without getting MAC explicitly 7078 */ 7079 if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 7080 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 7081 struct scatterlist sg; 7082 7083 sg_init_one(&sg, dev->dev_addr, dev->addr_len); 7084 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 7085 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 7086 pr_debug("virtio_net: setting MAC address failed\n"); 7087 rtnl_unlock(); 7088 err = -EINVAL; 7089 goto free_unregister_netdev; 7090 } 7091 } 7092 7093 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) { 7094 struct virtio_net_stats_capabilities *stats_cap __free(kfree) = NULL; 7095 struct scatterlist sg; 7096 __le64 v; 7097 7098 stats_cap = kzalloc(sizeof(*stats_cap), GFP_KERNEL); 7099 if (!stats_cap) { 7100 rtnl_unlock(); 7101 err = -ENOMEM; 7102 goto free_unregister_netdev; 7103 } 7104 7105 sg_init_one(&sg, stats_cap, sizeof(*stats_cap)); 7106 7107 if (!virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 7108 VIRTIO_NET_CTRL_STATS_QUERY, 7109 NULL, &sg)) { 7110 pr_debug("virtio_net: fail to get stats capability\n"); 7111 rtnl_unlock(); 7112 err = -EINVAL; 7113 goto free_unregister_netdev; 7114 } 7115 7116 v = stats_cap->supported_stats_types[0]; 7117 vi->device_stats_cap = le64_to_cpu(v); 7118 } 7119 7120 /* Assume link up if device can't report link status, 7121 otherwise get link status from config. */ 7122 netif_carrier_off(dev); 7123 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 7124 virtio_config_changed(vi->vdev); 7125 } else { 7126 vi->status = VIRTIO_NET_S_LINK_UP; 7127 virtnet_update_settings(vi); 7128 netif_carrier_on(dev); 7129 } 7130 7131 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) { 7132 unsigned int fbit; 7133 7134 fbit = virtio_offload_to_feature(guest_offloads[i]); 7135 if (virtio_has_feature(vi->vdev, fbit)) 7136 set_bit(guest_offloads[i], &vi->guest_offloads); 7137 } 7138 vi->guest_offloads_capable = vi->guest_offloads; 7139 7140 rtnl_unlock(); 7141 7142 err = virtnet_cpu_notif_add(vi); 7143 if (err) { 7144 pr_debug("virtio_net: registering cpu notifier failed\n"); 7145 goto free_unregister_netdev; 7146 } 7147 7148 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", 7149 dev->name, max_queue_pairs); 7150 7151 return 0; 7152 7153 free_unregister_netdev: 7154 unregister_netdev(dev); 7155 free_failover: 7156 net_failover_destroy(vi->failover); 7157 free_vqs: 7158 virtio_reset_device(vdev); 7159 cancel_delayed_work_sync(&vi->refill); 7160 free_receive_page_frags(vi); 7161 virtnet_del_vqs(vi); 7162 free: 7163 free_netdev(dev); 7164 return err; 7165 } 7166 7167 static void remove_vq_common(struct virtnet_info *vi) 7168 { 7169 int i; 7170 7171 virtio_reset_device(vi->vdev); 7172 7173 /* Free unused buffers in both send and recv, if any. */ 7174 free_unused_bufs(vi); 7175 7176 /* 7177 * Rule of thumb is netdev_tx_reset_queue() should follow any 7178 * skb freeing not followed by netdev_tx_completed_queue() 7179 */ 7180 for (i = 0; i < vi->max_queue_pairs; i++) 7181 netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, i)); 7182 7183 free_receive_bufs(vi); 7184 7185 free_receive_page_frags(vi); 7186 7187 virtnet_del_vqs(vi); 7188 } 7189 7190 static void virtnet_remove(struct virtio_device *vdev) 7191 { 7192 struct virtnet_info *vi = vdev->priv; 7193 7194 virtnet_cpu_notif_remove(vi); 7195 7196 /* Make sure no work handler is accessing the device. */ 7197 flush_work(&vi->config_work); 7198 disable_rx_mode_work(vi); 7199 flush_work(&vi->rx_mode_work); 7200 7201 virtnet_free_irq_moder(vi); 7202 7203 unregister_netdev(vi->dev); 7204 7205 net_failover_destroy(vi->failover); 7206 7207 remove_vq_common(vi); 7208 7209 free_netdev(vi->dev); 7210 } 7211 7212 static __maybe_unused int virtnet_freeze(struct virtio_device *vdev) 7213 { 7214 struct virtnet_info *vi = vdev->priv; 7215 7216 virtnet_cpu_notif_remove(vi); 7217 virtnet_freeze_down(vdev); 7218 remove_vq_common(vi); 7219 7220 return 0; 7221 } 7222 7223 static __maybe_unused int virtnet_restore(struct virtio_device *vdev) 7224 { 7225 struct virtnet_info *vi = vdev->priv; 7226 int err; 7227 7228 err = virtnet_restore_up(vdev); 7229 if (err) 7230 return err; 7231 virtnet_set_queues(vi, vi->curr_queue_pairs); 7232 7233 err = virtnet_cpu_notif_add(vi); 7234 if (err) { 7235 virtnet_freeze_down(vdev); 7236 remove_vq_common(vi); 7237 return err; 7238 } 7239 7240 return 0; 7241 } 7242 7243 static struct virtio_device_id id_table[] = { 7244 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 7245 { 0 }, 7246 }; 7247 7248 #define VIRTNET_FEATURES \ 7249 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ 7250 VIRTIO_NET_F_MAC, \ 7251 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ 7252 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ 7253 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ 7254 VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \ 7255 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ 7256 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ 7257 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ 7258 VIRTIO_NET_F_CTRL_MAC_ADDR, \ 7259 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ 7260 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ 7261 VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ 7262 VIRTIO_NET_F_VQ_NOTF_COAL, \ 7263 VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS 7264 7265 static unsigned int features[] = { 7266 VIRTNET_FEATURES, 7267 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO, 7268 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM, 7269 VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO, 7270 VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM, 7271 }; 7272 7273 static unsigned int features_legacy[] = { 7274 VIRTNET_FEATURES, 7275 VIRTIO_NET_F_GSO, 7276 VIRTIO_F_ANY_LAYOUT, 7277 }; 7278 7279 static struct virtio_driver virtio_net_driver = { 7280 .feature_table = features, 7281 .feature_table_size = ARRAY_SIZE(features), 7282 .feature_table_legacy = features_legacy, 7283 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 7284 .driver.name = KBUILD_MODNAME, 7285 .id_table = id_table, 7286 .validate = virtnet_validate, 7287 .probe = virtnet_probe, 7288 .remove = virtnet_remove, 7289 .config_changed = virtnet_config_changed, 7290 #ifdef CONFIG_PM_SLEEP 7291 .freeze = virtnet_freeze, 7292 .restore = virtnet_restore, 7293 #endif 7294 }; 7295 7296 static __init int virtio_net_driver_init(void) 7297 { 7298 int ret; 7299 7300 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online", 7301 virtnet_cpu_online, 7302 virtnet_cpu_down_prep); 7303 if (ret < 0) 7304 goto out; 7305 virtionet_online = ret; 7306 ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead", 7307 NULL, virtnet_cpu_dead); 7308 if (ret) 7309 goto err_dead; 7310 ret = register_virtio_driver(&virtio_net_driver); 7311 if (ret) 7312 goto err_virtio; 7313 return 0; 7314 err_virtio: 7315 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 7316 err_dead: 7317 cpuhp_remove_multi_state(virtionet_online); 7318 out: 7319 return ret; 7320 } 7321 module_init(virtio_net_driver_init); 7322 7323 static __exit void virtio_net_driver_exit(void) 7324 { 7325 unregister_virtio_driver(&virtio_net_driver); 7326 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 7327 cpuhp_remove_multi_state(virtionet_online); 7328 } 7329 module_exit(virtio_net_driver_exit); 7330 7331 MODULE_DEVICE_TABLE(virtio, id_table); 7332 MODULE_DESCRIPTION("Virtio network driver"); 7333 MODULE_LICENSE("GPL"); 7334