1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* A network driver using virtio. 3 * 4 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 5 */ 6 //#define DEBUG 7 #include <linux/netdevice.h> 8 #include <linux/etherdevice.h> 9 #include <linux/ethtool.h> 10 #include <linux/module.h> 11 #include <linux/virtio.h> 12 #include <linux/virtio_net.h> 13 #include <linux/bpf.h> 14 #include <linux/bpf_trace.h> 15 #include <linux/scatterlist.h> 16 #include <linux/if_vlan.h> 17 #include <linux/slab.h> 18 #include <linux/cpu.h> 19 #include <linux/average.h> 20 #include <linux/filter.h> 21 #include <linux/kernel.h> 22 #include <linux/dim.h> 23 #include <net/route.h> 24 #include <net/xdp.h> 25 #include <net/net_failover.h> 26 #include <net/netdev_rx_queue.h> 27 #include <net/netdev_queues.h> 28 #include <net/xdp_sock_drv.h> 29 30 static int napi_weight = NAPI_POLL_WEIGHT; 31 module_param(napi_weight, int, 0444); 32 33 static bool csum = true, gso = true, napi_tx = true; 34 module_param(csum, bool, 0444); 35 module_param(gso, bool, 0444); 36 module_param(napi_tx, bool, 0644); 37 38 #define VIRTIO_OFFLOAD_MAP_MIN 46 39 #define VIRTIO_OFFLOAD_MAP_MAX 47 40 #define VIRTIO_FEATURES_MAP_MIN 65 41 #define VIRTIO_O2F_DELTA (VIRTIO_FEATURES_MAP_MIN - \ 42 VIRTIO_OFFLOAD_MAP_MIN) 43 44 static bool virtio_is_mapped_offload(unsigned int obit) 45 { 46 return obit >= VIRTIO_OFFLOAD_MAP_MIN && 47 obit <= VIRTIO_OFFLOAD_MAP_MAX; 48 } 49 50 static unsigned int virtio_offload_to_feature(unsigned int obit) 51 { 52 return virtio_is_mapped_offload(obit) ? obit + VIRTIO_O2F_DELTA : obit; 53 } 54 55 /* FIXME: MTU in config. */ 56 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) 57 #define GOOD_COPY_LEN 128 58 59 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) 60 61 /* Separating two types of XDP xmit */ 62 #define VIRTIO_XDP_TX BIT(0) 63 #define VIRTIO_XDP_REDIR BIT(1) 64 65 /* RX packet size EWMA. The average packet size is used to determine the packet 66 * buffer size when refilling RX rings. As the entire RX ring may be refilled 67 * at once, the weight is chosen so that the EWMA will be insensitive to short- 68 * term, transient changes in packet size. 69 */ 70 DECLARE_EWMA(pkt_len, 0, 64) 71 72 #define VIRTNET_DRIVER_VERSION "1.0.0" 73 74 static const unsigned long guest_offloads[] = { 75 VIRTIO_NET_F_GUEST_TSO4, 76 VIRTIO_NET_F_GUEST_TSO6, 77 VIRTIO_NET_F_GUEST_ECN, 78 VIRTIO_NET_F_GUEST_UFO, 79 VIRTIO_NET_F_GUEST_CSUM, 80 VIRTIO_NET_F_GUEST_USO4, 81 VIRTIO_NET_F_GUEST_USO6, 82 VIRTIO_NET_F_GUEST_HDRLEN, 83 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED, 84 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED, 85 }; 86 87 #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 88 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 89 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 90 (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ 91 (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ 92 (1ULL << VIRTIO_NET_F_GUEST_USO6) | \ 93 (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED) | \ 94 (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED)) 95 96 struct virtnet_stat_desc { 97 char desc[ETH_GSTRING_LEN]; 98 size_t offset; 99 size_t qstat_offset; 100 }; 101 102 struct virtnet_sq_free_stats { 103 u64 packets; 104 u64 bytes; 105 u64 napi_packets; 106 u64 napi_bytes; 107 u64 xsk; 108 }; 109 110 struct virtnet_sq_stats { 111 struct u64_stats_sync syncp; 112 u64_stats_t packets; 113 u64_stats_t bytes; 114 u64_stats_t xdp_tx; 115 u64_stats_t xdp_tx_drops; 116 u64_stats_t kicks; 117 u64_stats_t tx_timeouts; 118 u64_stats_t stop; 119 u64_stats_t wake; 120 }; 121 122 struct virtnet_rq_stats { 123 struct u64_stats_sync syncp; 124 u64_stats_t packets; 125 u64_stats_t bytes; 126 u64_stats_t drops; 127 u64_stats_t xdp_packets; 128 u64_stats_t xdp_tx; 129 u64_stats_t xdp_redirects; 130 u64_stats_t xdp_drops; 131 u64_stats_t kicks; 132 }; 133 134 #define VIRTNET_SQ_STAT(name, m) {name, offsetof(struct virtnet_sq_stats, m), -1} 135 #define VIRTNET_RQ_STAT(name, m) {name, offsetof(struct virtnet_rq_stats, m), -1} 136 137 #define VIRTNET_SQ_STAT_QSTAT(name, m) \ 138 { \ 139 name, \ 140 offsetof(struct virtnet_sq_stats, m), \ 141 offsetof(struct netdev_queue_stats_tx, m), \ 142 } 143 144 #define VIRTNET_RQ_STAT_QSTAT(name, m) \ 145 { \ 146 name, \ 147 offsetof(struct virtnet_rq_stats, m), \ 148 offsetof(struct netdev_queue_stats_rx, m), \ 149 } 150 151 static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { 152 VIRTNET_SQ_STAT("xdp_tx", xdp_tx), 153 VIRTNET_SQ_STAT("xdp_tx_drops", xdp_tx_drops), 154 VIRTNET_SQ_STAT("kicks", kicks), 155 VIRTNET_SQ_STAT("tx_timeouts", tx_timeouts), 156 }; 157 158 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { 159 VIRTNET_RQ_STAT("drops", drops), 160 VIRTNET_RQ_STAT("xdp_packets", xdp_packets), 161 VIRTNET_RQ_STAT("xdp_tx", xdp_tx), 162 VIRTNET_RQ_STAT("xdp_redirects", xdp_redirects), 163 VIRTNET_RQ_STAT("xdp_drops", xdp_drops), 164 VIRTNET_RQ_STAT("kicks", kicks), 165 }; 166 167 static const struct virtnet_stat_desc virtnet_sq_stats_desc_qstat[] = { 168 VIRTNET_SQ_STAT_QSTAT("packets", packets), 169 VIRTNET_SQ_STAT_QSTAT("bytes", bytes), 170 VIRTNET_SQ_STAT_QSTAT("stop", stop), 171 VIRTNET_SQ_STAT_QSTAT("wake", wake), 172 }; 173 174 static const struct virtnet_stat_desc virtnet_rq_stats_desc_qstat[] = { 175 VIRTNET_RQ_STAT_QSTAT("packets", packets), 176 VIRTNET_RQ_STAT_QSTAT("bytes", bytes), 177 }; 178 179 #define VIRTNET_STATS_DESC_CQ(name) \ 180 {#name, offsetof(struct virtio_net_stats_cvq, name), -1} 181 182 #define VIRTNET_STATS_DESC_RX(class, name) \ 183 {#name, offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), -1} 184 185 #define VIRTNET_STATS_DESC_TX(class, name) \ 186 {#name, offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), -1} 187 188 189 static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = { 190 VIRTNET_STATS_DESC_CQ(command_num), 191 VIRTNET_STATS_DESC_CQ(ok_num), 192 }; 193 194 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = { 195 VIRTNET_STATS_DESC_RX(basic, packets), 196 VIRTNET_STATS_DESC_RX(basic, bytes), 197 198 VIRTNET_STATS_DESC_RX(basic, notifications), 199 VIRTNET_STATS_DESC_RX(basic, interrupts), 200 }; 201 202 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = { 203 VIRTNET_STATS_DESC_TX(basic, packets), 204 VIRTNET_STATS_DESC_TX(basic, bytes), 205 206 VIRTNET_STATS_DESC_TX(basic, notifications), 207 VIRTNET_STATS_DESC_TX(basic, interrupts), 208 }; 209 210 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = { 211 VIRTNET_STATS_DESC_RX(csum, needs_csum), 212 }; 213 214 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = { 215 VIRTNET_STATS_DESC_TX(gso, gso_packets_noseg), 216 VIRTNET_STATS_DESC_TX(gso, gso_bytes_noseg), 217 }; 218 219 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = { 220 VIRTNET_STATS_DESC_RX(speed, ratelimit_bytes), 221 }; 222 223 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = { 224 VIRTNET_STATS_DESC_TX(speed, ratelimit_bytes), 225 }; 226 227 #define VIRTNET_STATS_DESC_RX_QSTAT(class, name, qstat_field) \ 228 { \ 229 #name, \ 230 offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), \ 231 offsetof(struct netdev_queue_stats_rx, qstat_field), \ 232 } 233 234 #define VIRTNET_STATS_DESC_TX_QSTAT(class, name, qstat_field) \ 235 { \ 236 #name, \ 237 offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), \ 238 offsetof(struct netdev_queue_stats_tx, qstat_field), \ 239 } 240 241 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc_qstat[] = { 242 VIRTNET_STATS_DESC_RX_QSTAT(basic, drops, hw_drops), 243 VIRTNET_STATS_DESC_RX_QSTAT(basic, drop_overruns, hw_drop_overruns), 244 }; 245 246 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc_qstat[] = { 247 VIRTNET_STATS_DESC_TX_QSTAT(basic, drops, hw_drops), 248 VIRTNET_STATS_DESC_TX_QSTAT(basic, drop_malformed, hw_drop_errors), 249 }; 250 251 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc_qstat[] = { 252 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_valid, csum_unnecessary), 253 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_none, csum_none), 254 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_bad, csum_bad), 255 }; 256 257 static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc_qstat[] = { 258 VIRTNET_STATS_DESC_TX_QSTAT(csum, csum_none, csum_none), 259 VIRTNET_STATS_DESC_TX_QSTAT(csum, needs_csum, needs_csum), 260 }; 261 262 static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc_qstat[] = { 263 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets, hw_gro_packets), 264 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes, hw_gro_bytes), 265 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets_coalesced, hw_gro_wire_packets), 266 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes_coalesced, hw_gro_wire_bytes), 267 }; 268 269 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc_qstat[] = { 270 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_packets, hw_gso_packets), 271 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_bytes, hw_gso_bytes), 272 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments, hw_gso_wire_packets), 273 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments_bytes, hw_gso_wire_bytes), 274 }; 275 276 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc_qstat[] = { 277 VIRTNET_STATS_DESC_RX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 278 }; 279 280 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc_qstat[] = { 281 VIRTNET_STATS_DESC_TX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 282 }; 283 284 #define VIRTNET_Q_TYPE_RX 0 285 #define VIRTNET_Q_TYPE_TX 1 286 #define VIRTNET_Q_TYPE_CQ 2 287 288 struct virtnet_interrupt_coalesce { 289 u32 max_packets; 290 u32 max_usecs; 291 }; 292 293 /* The dma information of pages allocated at a time. */ 294 struct virtnet_rq_dma { 295 dma_addr_t addr; 296 u32 ref; 297 u16 len; 298 u16 need_sync; 299 }; 300 301 /* Internal representation of a send virtqueue */ 302 struct send_queue { 303 /* Virtqueue associated with this send _queue */ 304 struct virtqueue *vq; 305 306 /* TX: fragments + linear part + virtio header */ 307 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 308 309 /* Name of the send queue: output.$index */ 310 char name[16]; 311 312 struct virtnet_sq_stats stats; 313 314 struct virtnet_interrupt_coalesce intr_coal; 315 316 struct napi_struct napi; 317 318 /* Record whether sq is in reset state. */ 319 bool reset; 320 321 struct xsk_buff_pool *xsk_pool; 322 323 dma_addr_t xsk_hdr_dma_addr; 324 }; 325 326 /* Internal representation of a receive virtqueue */ 327 struct receive_queue { 328 /* Virtqueue associated with this receive_queue */ 329 struct virtqueue *vq; 330 331 struct napi_struct napi; 332 333 struct bpf_prog __rcu *xdp_prog; 334 335 struct virtnet_rq_stats stats; 336 337 /* The number of rx notifications */ 338 u16 calls; 339 340 /* Is dynamic interrupt moderation enabled? */ 341 bool dim_enabled; 342 343 /* Used to protect dim_enabled and inter_coal */ 344 struct mutex dim_lock; 345 346 /* Dynamic Interrupt Moderation */ 347 struct dim dim; 348 349 u32 packets_in_napi; 350 351 struct virtnet_interrupt_coalesce intr_coal; 352 353 /* Chain pages by the private ptr. */ 354 struct page *pages; 355 356 /* Average packet length for mergeable receive buffers. */ 357 struct ewma_pkt_len mrg_avg_pkt_len; 358 359 /* Page frag for packet buffer allocation. */ 360 struct page_frag alloc_frag; 361 362 /* RX: fragments + linear part + virtio header */ 363 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 364 365 /* Min single buffer size for mergeable buffers case. */ 366 unsigned int min_buf_len; 367 368 /* Name of this receive queue: input.$index */ 369 char name[16]; 370 371 struct xdp_rxq_info xdp_rxq; 372 373 /* Record the last dma info to free after new pages is allocated. */ 374 struct virtnet_rq_dma *last_dma; 375 376 struct xsk_buff_pool *xsk_pool; 377 378 /* xdp rxq used by xsk */ 379 struct xdp_rxq_info xsk_rxq_info; 380 381 struct xdp_buff **xsk_buffs; 382 }; 383 384 #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 385 386 /* Control VQ buffers: protected by the rtnl lock */ 387 struct control_buf { 388 struct virtio_net_ctrl_hdr hdr; 389 virtio_net_ctrl_ack status; 390 }; 391 392 struct virtnet_info { 393 struct virtio_device *vdev; 394 struct virtqueue *cvq; 395 struct net_device *dev; 396 struct send_queue *sq; 397 struct receive_queue *rq; 398 unsigned int status; 399 400 /* Max # of queue pairs supported by the device */ 401 u16 max_queue_pairs; 402 403 /* # of queue pairs currently used by the driver */ 404 u16 curr_queue_pairs; 405 406 /* # of XDP queue pairs currently used by the driver */ 407 u16 xdp_queue_pairs; 408 409 /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ 410 bool xdp_enabled; 411 412 /* I like... big packets and I cannot lie! */ 413 bool big_packets; 414 415 /* number of sg entries allocated for big packets */ 416 unsigned int big_packets_num_skbfrags; 417 418 /* Host will merge rx buffers for big packets (shake it! shake it!) */ 419 bool mergeable_rx_bufs; 420 421 /* Host supports rss and/or hash report */ 422 bool has_rss; 423 bool has_rss_hash_report; 424 u8 rss_key_size; 425 u16 rss_indir_table_size; 426 u32 rss_hash_types_supported; 427 u32 rss_hash_types_saved; 428 struct virtio_net_rss_config_hdr *rss_hdr; 429 struct virtio_net_rss_config_trailer rss_trailer; 430 u8 rss_hash_key_data[VIRTIO_NET_RSS_MAX_KEY_SIZE]; 431 432 /* Has control virtqueue */ 433 bool has_cvq; 434 435 /* Lock to protect the control VQ */ 436 struct mutex cvq_lock; 437 438 /* Host can handle any s/g split between our header and packet data */ 439 bool any_header_sg; 440 441 /* Packet virtio header size */ 442 u8 hdr_len; 443 444 /* Work struct for delayed refilling if we run low on memory. */ 445 struct delayed_work refill; 446 447 /* UDP tunnel support */ 448 bool tx_tnl; 449 450 bool rx_tnl; 451 452 bool rx_tnl_csum; 453 454 /* Is delayed refill enabled? */ 455 bool refill_enabled; 456 457 /* The lock to synchronize the access to refill_enabled */ 458 spinlock_t refill_lock; 459 460 /* Work struct for config space updates */ 461 struct work_struct config_work; 462 463 /* Work struct for setting rx mode */ 464 struct work_struct rx_mode_work; 465 466 /* OK to queue work setting RX mode? */ 467 bool rx_mode_work_enabled; 468 469 /* Does the affinity hint is set for virtqueues? */ 470 bool affinity_hint_set; 471 472 /* CPU hotplug instances for online & dead */ 473 struct hlist_node node; 474 struct hlist_node node_dead; 475 476 struct control_buf *ctrl; 477 478 /* Ethtool settings */ 479 u8 duplex; 480 u32 speed; 481 482 /* Is rx dynamic interrupt moderation enabled? */ 483 bool rx_dim_enabled; 484 485 /* Interrupt coalescing settings */ 486 struct virtnet_interrupt_coalesce intr_coal_tx; 487 struct virtnet_interrupt_coalesce intr_coal_rx; 488 489 unsigned long guest_offloads; 490 unsigned long guest_offloads_capable; 491 492 /* failover when STANDBY feature enabled */ 493 struct failover *failover; 494 495 u64 device_stats_cap; 496 }; 497 498 struct padded_vnet_hdr { 499 struct virtio_net_hdr_v1_hash hdr; 500 /* 501 * hdr is in a separate sg buffer, and data sg buffer shares same page 502 * with this header sg. This padding makes next sg 16 byte aligned 503 * after the header. 504 */ 505 char padding[12]; 506 }; 507 508 struct virtio_net_common_hdr { 509 union { 510 struct virtio_net_hdr hdr; 511 struct virtio_net_hdr_mrg_rxbuf mrg_hdr; 512 struct virtio_net_hdr_v1_hash hash_v1_hdr; 513 struct virtio_net_hdr_v1_hash_tunnel tnl_hdr; 514 }; 515 }; 516 517 static struct virtio_net_common_hdr xsk_hdr; 518 519 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); 520 static void virtnet_sq_free_unused_buf_done(struct virtqueue *vq); 521 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 522 struct net_device *dev, 523 unsigned int *xdp_xmit, 524 struct virtnet_rq_stats *stats); 525 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 526 struct sk_buff *skb, u8 flags); 527 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 528 struct sk_buff *curr_skb, 529 struct page *page, void *buf, 530 int len, int truesize); 531 static void virtnet_xsk_completed(struct send_queue *sq, int num); 532 533 enum virtnet_xmit_type { 534 VIRTNET_XMIT_TYPE_SKB, 535 VIRTNET_XMIT_TYPE_SKB_ORPHAN, 536 VIRTNET_XMIT_TYPE_XDP, 537 VIRTNET_XMIT_TYPE_XSK, 538 }; 539 540 static size_t virtnet_rss_hdr_size(const struct virtnet_info *vi) 541 { 542 u16 indir_table_size = vi->has_rss ? vi->rss_indir_table_size : 1; 543 544 return struct_size(vi->rss_hdr, indirection_table, indir_table_size); 545 } 546 547 static size_t virtnet_rss_trailer_size(const struct virtnet_info *vi) 548 { 549 return struct_size(&vi->rss_trailer, hash_key_data, vi->rss_key_size); 550 } 551 552 /* We use the last two bits of the pointer to distinguish the xmit type. */ 553 #define VIRTNET_XMIT_TYPE_MASK (BIT(0) | BIT(1)) 554 555 #define VIRTIO_XSK_FLAG_OFFSET 2 556 557 static enum virtnet_xmit_type virtnet_xmit_ptr_unpack(void **ptr) 558 { 559 unsigned long p = (unsigned long)*ptr; 560 561 *ptr = (void *)(p & ~VIRTNET_XMIT_TYPE_MASK); 562 563 return p & VIRTNET_XMIT_TYPE_MASK; 564 } 565 566 static void *virtnet_xmit_ptr_pack(void *ptr, enum virtnet_xmit_type type) 567 { 568 return (void *)((unsigned long)ptr | type); 569 } 570 571 static int virtnet_add_outbuf(struct send_queue *sq, int num, void *data, 572 enum virtnet_xmit_type type) 573 { 574 return virtqueue_add_outbuf(sq->vq, sq->sg, num, 575 virtnet_xmit_ptr_pack(data, type), 576 GFP_ATOMIC); 577 } 578 579 static u32 virtnet_ptr_to_xsk_buff_len(void *ptr) 580 { 581 return ((unsigned long)ptr) >> VIRTIO_XSK_FLAG_OFFSET; 582 } 583 584 static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len) 585 { 586 sg_dma_address(sg) = addr; 587 sg_dma_len(sg) = len; 588 } 589 590 static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 591 bool in_napi, struct virtnet_sq_free_stats *stats) 592 { 593 struct xdp_frame *frame; 594 struct sk_buff *skb; 595 unsigned int len; 596 void *ptr; 597 598 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 599 switch (virtnet_xmit_ptr_unpack(&ptr)) { 600 case VIRTNET_XMIT_TYPE_SKB: 601 skb = ptr; 602 603 pr_debug("Sent skb %p\n", skb); 604 stats->napi_packets++; 605 stats->napi_bytes += skb->len; 606 napi_consume_skb(skb, in_napi); 607 break; 608 609 case VIRTNET_XMIT_TYPE_SKB_ORPHAN: 610 skb = ptr; 611 612 stats->packets++; 613 stats->bytes += skb->len; 614 napi_consume_skb(skb, in_napi); 615 break; 616 617 case VIRTNET_XMIT_TYPE_XDP: 618 frame = ptr; 619 620 stats->packets++; 621 stats->bytes += xdp_get_frame_len(frame); 622 xdp_return_frame(frame); 623 break; 624 625 case VIRTNET_XMIT_TYPE_XSK: 626 stats->bytes += virtnet_ptr_to_xsk_buff_len(ptr); 627 stats->xsk++; 628 break; 629 } 630 } 631 netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes); 632 } 633 634 static void virtnet_free_old_xmit(struct send_queue *sq, 635 struct netdev_queue *txq, 636 bool in_napi, 637 struct virtnet_sq_free_stats *stats) 638 { 639 __free_old_xmit(sq, txq, in_napi, stats); 640 641 if (stats->xsk) 642 virtnet_xsk_completed(sq, stats->xsk); 643 } 644 645 /* Converting between virtqueue no. and kernel tx/rx queue no. 646 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq 647 */ 648 static int vq2txq(struct virtqueue *vq) 649 { 650 return (vq->index - 1) / 2; 651 } 652 653 static int txq2vq(int txq) 654 { 655 return txq * 2 + 1; 656 } 657 658 static int vq2rxq(struct virtqueue *vq) 659 { 660 return vq->index / 2; 661 } 662 663 static int rxq2vq(int rxq) 664 { 665 return rxq * 2; 666 } 667 668 static int vq_type(struct virtnet_info *vi, int qid) 669 { 670 if (qid == vi->max_queue_pairs * 2) 671 return VIRTNET_Q_TYPE_CQ; 672 673 if (qid % 2) 674 return VIRTNET_Q_TYPE_TX; 675 676 return VIRTNET_Q_TYPE_RX; 677 } 678 679 static inline struct virtio_net_common_hdr * 680 skb_vnet_common_hdr(struct sk_buff *skb) 681 { 682 return (struct virtio_net_common_hdr *)skb->cb; 683 } 684 685 /* 686 * private is used to chain pages for big packets, put the whole 687 * most recent used list in the beginning for reuse 688 */ 689 static void give_pages(struct receive_queue *rq, struct page *page) 690 { 691 struct page *end; 692 693 /* Find end of list, sew whole thing into vi->rq.pages. */ 694 for (end = page; end->private; end = (struct page *)end->private); 695 end->private = (unsigned long)rq->pages; 696 rq->pages = page; 697 } 698 699 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) 700 { 701 struct page *p = rq->pages; 702 703 if (p) { 704 rq->pages = (struct page *)p->private; 705 /* clear private here, it is used to chain pages */ 706 p->private = 0; 707 } else 708 p = alloc_page(gfp_mask); 709 return p; 710 } 711 712 static void virtnet_rq_free_buf(struct virtnet_info *vi, 713 struct receive_queue *rq, void *buf) 714 { 715 if (vi->mergeable_rx_bufs) 716 put_page(virt_to_head_page(buf)); 717 else if (vi->big_packets) 718 give_pages(rq, buf); 719 else 720 put_page(virt_to_head_page(buf)); 721 } 722 723 static void enable_delayed_refill(struct virtnet_info *vi) 724 { 725 spin_lock_bh(&vi->refill_lock); 726 vi->refill_enabled = true; 727 spin_unlock_bh(&vi->refill_lock); 728 } 729 730 static void disable_delayed_refill(struct virtnet_info *vi) 731 { 732 spin_lock_bh(&vi->refill_lock); 733 vi->refill_enabled = false; 734 spin_unlock_bh(&vi->refill_lock); 735 } 736 737 static void enable_rx_mode_work(struct virtnet_info *vi) 738 { 739 rtnl_lock(); 740 vi->rx_mode_work_enabled = true; 741 rtnl_unlock(); 742 } 743 744 static void disable_rx_mode_work(struct virtnet_info *vi) 745 { 746 rtnl_lock(); 747 vi->rx_mode_work_enabled = false; 748 rtnl_unlock(); 749 } 750 751 static void virtqueue_napi_schedule(struct napi_struct *napi, 752 struct virtqueue *vq) 753 { 754 if (napi_schedule_prep(napi)) { 755 virtqueue_disable_cb(vq); 756 __napi_schedule(napi); 757 } 758 } 759 760 static bool virtqueue_napi_complete(struct napi_struct *napi, 761 struct virtqueue *vq, int processed) 762 { 763 int opaque; 764 765 opaque = virtqueue_enable_cb_prepare(vq); 766 if (napi_complete_done(napi, processed)) { 767 if (unlikely(virtqueue_poll(vq, opaque))) 768 virtqueue_napi_schedule(napi, vq); 769 else 770 return true; 771 } else { 772 virtqueue_disable_cb(vq); 773 } 774 775 return false; 776 } 777 778 static void skb_xmit_done(struct virtqueue *vq) 779 { 780 struct virtnet_info *vi = vq->vdev->priv; 781 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi; 782 783 /* Suppress further interrupts. */ 784 virtqueue_disable_cb(vq); 785 786 if (napi->weight) 787 virtqueue_napi_schedule(napi, vq); 788 else 789 /* We were probably waiting for more output buffers. */ 790 netif_wake_subqueue(vi->dev, vq2txq(vq)); 791 } 792 793 #define MRG_CTX_HEADER_SHIFT 22 794 static void *mergeable_len_to_ctx(unsigned int truesize, 795 unsigned int headroom) 796 { 797 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize); 798 } 799 800 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx) 801 { 802 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT; 803 } 804 805 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) 806 { 807 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); 808 } 809 810 static int check_mergeable_len(struct net_device *dev, void *mrg_ctx, 811 unsigned int len) 812 { 813 unsigned int headroom, tailroom, room, truesize; 814 815 truesize = mergeable_ctx_to_truesize(mrg_ctx); 816 headroom = mergeable_ctx_to_headroom(mrg_ctx); 817 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 818 room = SKB_DATA_ALIGN(headroom + tailroom); 819 820 if (len > truesize - room) { 821 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 822 dev->name, len, (unsigned long)(truesize - room)); 823 DEV_STATS_INC(dev, rx_length_errors); 824 return -1; 825 } 826 827 return 0; 828 } 829 830 static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, 831 unsigned int headroom, 832 unsigned int len) 833 { 834 struct sk_buff *skb; 835 836 skb = build_skb(buf, buflen); 837 if (unlikely(!skb)) 838 return NULL; 839 840 skb_reserve(skb, headroom); 841 skb_put(skb, len); 842 843 return skb; 844 } 845 846 /* Called from bottom half context */ 847 static struct sk_buff *page_to_skb(struct virtnet_info *vi, 848 struct receive_queue *rq, 849 struct page *page, unsigned int offset, 850 unsigned int len, unsigned int truesize, 851 unsigned int headroom) 852 { 853 struct sk_buff *skb; 854 struct virtio_net_common_hdr *hdr; 855 unsigned int copy, hdr_len, hdr_padded_len; 856 struct page *page_to_free = NULL; 857 int tailroom, shinfo_size; 858 char *p, *hdr_p, *buf; 859 860 p = page_address(page) + offset; 861 hdr_p = p; 862 863 hdr_len = vi->hdr_len; 864 if (vi->mergeable_rx_bufs) 865 hdr_padded_len = hdr_len; 866 else 867 hdr_padded_len = sizeof(struct padded_vnet_hdr); 868 869 buf = p - headroom; 870 len -= hdr_len; 871 offset += hdr_padded_len; 872 p += hdr_padded_len; 873 tailroom = truesize - headroom - hdr_padded_len - len; 874 875 shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 876 877 if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) { 878 skb = virtnet_build_skb(buf, truesize, p - buf, len); 879 if (unlikely(!skb)) 880 return NULL; 881 882 page = (struct page *)page->private; 883 if (page) 884 give_pages(rq, page); 885 goto ok; 886 } 887 888 /* copy small packet so we can reuse these pages for small data */ 889 skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); 890 if (unlikely(!skb)) 891 return NULL; 892 893 /* Copy all frame if it fits skb->head, otherwise 894 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. 895 */ 896 if (len <= skb_tailroom(skb)) 897 copy = len; 898 else 899 copy = ETH_HLEN; 900 skb_put_data(skb, p, copy); 901 902 len -= copy; 903 offset += copy; 904 905 if (vi->mergeable_rx_bufs) { 906 if (len) 907 skb_add_rx_frag(skb, 0, page, offset, len, truesize); 908 else 909 page_to_free = page; 910 goto ok; 911 } 912 913 /* 914 * Verify that we can indeed put this data into a skb. 915 * This is here to handle cases when the device erroneously 916 * tries to receive more than is possible. This is usually 917 * the case of a broken device. 918 */ 919 if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) { 920 net_dbg_ratelimited("%s: too much data\n", skb->dev->name); 921 dev_kfree_skb(skb); 922 return NULL; 923 } 924 BUG_ON(offset >= PAGE_SIZE); 925 while (len) { 926 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); 927 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, 928 frag_size, truesize); 929 len -= frag_size; 930 page = (struct page *)page->private; 931 offset = 0; 932 } 933 934 if (page) 935 give_pages(rq, page); 936 937 ok: 938 hdr = skb_vnet_common_hdr(skb); 939 memcpy(hdr, hdr_p, hdr_len); 940 if (page_to_free) 941 put_page(page_to_free); 942 943 return skb; 944 } 945 946 static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) 947 { 948 struct virtnet_info *vi = rq->vq->vdev->priv; 949 struct page *page = virt_to_head_page(buf); 950 struct virtnet_rq_dma *dma; 951 void *head; 952 int offset; 953 954 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 955 956 head = page_address(page); 957 958 dma = head; 959 960 --dma->ref; 961 962 if (dma->need_sync && len) { 963 offset = buf - (head + sizeof(*dma)); 964 965 virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr, 966 offset, len, 967 DMA_FROM_DEVICE); 968 } 969 970 if (dma->ref) 971 return; 972 973 virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len, 974 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 975 put_page(page); 976 } 977 978 static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) 979 { 980 struct virtnet_info *vi = rq->vq->vdev->priv; 981 void *buf; 982 983 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 984 985 buf = virtqueue_get_buf_ctx(rq->vq, len, ctx); 986 if (buf) 987 virtnet_rq_unmap(rq, buf, *len); 988 989 return buf; 990 } 991 992 static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) 993 { 994 struct virtnet_info *vi = rq->vq->vdev->priv; 995 struct virtnet_rq_dma *dma; 996 dma_addr_t addr; 997 u32 offset; 998 void *head; 999 1000 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 1001 1002 head = page_address(rq->alloc_frag.page); 1003 1004 offset = buf - head; 1005 1006 dma = head; 1007 1008 addr = dma->addr - sizeof(*dma) + offset; 1009 1010 sg_init_table(rq->sg, 1); 1011 sg_fill_dma(rq->sg, addr, len); 1012 } 1013 1014 static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) 1015 { 1016 struct page_frag *alloc_frag = &rq->alloc_frag; 1017 struct virtnet_info *vi = rq->vq->vdev->priv; 1018 struct virtnet_rq_dma *dma; 1019 void *buf, *head; 1020 dma_addr_t addr; 1021 1022 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 1023 1024 head = page_address(alloc_frag->page); 1025 1026 dma = head; 1027 1028 /* new pages */ 1029 if (!alloc_frag->offset) { 1030 if (rq->last_dma) { 1031 /* Now, the new page is allocated, the last dma 1032 * will not be used. So the dma can be unmapped 1033 * if the ref is 0. 1034 */ 1035 virtnet_rq_unmap(rq, rq->last_dma, 0); 1036 rq->last_dma = NULL; 1037 } 1038 1039 dma->len = alloc_frag->size - sizeof(*dma); 1040 1041 addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1, 1042 dma->len, DMA_FROM_DEVICE, 0); 1043 if (virtqueue_dma_mapping_error(rq->vq, addr)) 1044 return NULL; 1045 1046 dma->addr = addr; 1047 dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr); 1048 1049 /* Add a reference to dma to prevent the entire dma from 1050 * being released during error handling. This reference 1051 * will be freed after the pages are no longer used. 1052 */ 1053 get_page(alloc_frag->page); 1054 dma->ref = 1; 1055 alloc_frag->offset = sizeof(*dma); 1056 1057 rq->last_dma = dma; 1058 } 1059 1060 ++dma->ref; 1061 1062 buf = head + alloc_frag->offset; 1063 1064 get_page(alloc_frag->page); 1065 alloc_frag->offset += size; 1066 1067 return buf; 1068 } 1069 1070 static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) 1071 { 1072 struct virtnet_info *vi = vq->vdev->priv; 1073 struct receive_queue *rq; 1074 int i = vq2rxq(vq); 1075 1076 rq = &vi->rq[i]; 1077 1078 if (rq->xsk_pool) { 1079 xsk_buff_free((struct xdp_buff *)buf); 1080 return; 1081 } 1082 1083 if (!vi->big_packets || vi->mergeable_rx_bufs) 1084 virtnet_rq_unmap(rq, buf, 0); 1085 1086 virtnet_rq_free_buf(vi, rq, buf); 1087 } 1088 1089 static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 1090 bool in_napi) 1091 { 1092 struct virtnet_sq_free_stats stats = {0}; 1093 1094 virtnet_free_old_xmit(sq, txq, in_napi, &stats); 1095 1096 /* Avoid overhead when no packets have been processed 1097 * happens when called speculatively from start_xmit. 1098 */ 1099 if (!stats.packets && !stats.napi_packets) 1100 return; 1101 1102 u64_stats_update_begin(&sq->stats.syncp); 1103 u64_stats_add(&sq->stats.bytes, stats.bytes + stats.napi_bytes); 1104 u64_stats_add(&sq->stats.packets, stats.packets + stats.napi_packets); 1105 u64_stats_update_end(&sq->stats.syncp); 1106 } 1107 1108 static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) 1109 { 1110 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) 1111 return false; 1112 else if (q < vi->curr_queue_pairs) 1113 return true; 1114 else 1115 return false; 1116 } 1117 1118 static bool tx_may_stop(struct virtnet_info *vi, 1119 struct net_device *dev, 1120 struct send_queue *sq) 1121 { 1122 int qnum; 1123 1124 qnum = sq - vi->sq; 1125 1126 /* If running out of space, stop queue to avoid getting packets that we 1127 * are then unable to transmit. 1128 * An alternative would be to force queuing layer to requeue the skb by 1129 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be 1130 * returned in a normal path of operation: it means that driver is not 1131 * maintaining the TX queue stop/start state properly, and causes 1132 * the stack to do a non-trivial amount of useless work. 1133 * Since most packets only take 1 or 2 ring slots, stopping the queue 1134 * early means 16 slots are typically wasted. 1135 */ 1136 if (sq->vq->num_free < MAX_SKB_FRAGS + 2) { 1137 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1138 1139 netif_tx_stop_queue(txq); 1140 u64_stats_update_begin(&sq->stats.syncp); 1141 u64_stats_inc(&sq->stats.stop); 1142 u64_stats_update_end(&sq->stats.syncp); 1143 1144 return true; 1145 } 1146 1147 return false; 1148 } 1149 1150 static void check_sq_full_and_disable(struct virtnet_info *vi, 1151 struct net_device *dev, 1152 struct send_queue *sq) 1153 { 1154 bool use_napi = sq->napi.weight; 1155 int qnum; 1156 1157 qnum = sq - vi->sq; 1158 1159 if (tx_may_stop(vi, dev, sq)) { 1160 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1161 1162 if (use_napi) { 1163 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 1164 virtqueue_napi_schedule(&sq->napi, sq->vq); 1165 } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { 1166 /* More just got used, free them then recheck. */ 1167 free_old_xmit(sq, txq, false); 1168 if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { 1169 netif_start_subqueue(dev, qnum); 1170 u64_stats_update_begin(&sq->stats.syncp); 1171 u64_stats_inc(&sq->stats.wake); 1172 u64_stats_update_end(&sq->stats.syncp); 1173 virtqueue_disable_cb(sq->vq); 1174 } 1175 } 1176 } 1177 } 1178 1179 /* Note that @len is the length of received data without virtio header */ 1180 static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi, 1181 struct receive_queue *rq, void *buf, 1182 u32 len, bool first_buf) 1183 { 1184 struct xdp_buff *xdp; 1185 u32 bufsize; 1186 1187 xdp = (struct xdp_buff *)buf; 1188 1189 /* In virtnet_add_recvbuf_xsk, we use part of XDP_PACKET_HEADROOM for 1190 * virtio header and ask the vhost to fill data from 1191 * hard_start + XDP_PACKET_HEADROOM - vi->hdr_len 1192 * The first buffer has virtio header so the remaining region for frame 1193 * data is 1194 * xsk_pool_get_rx_frame_size() 1195 * While other buffers than the first one do not have virtio header, so 1196 * the maximum frame data's length can be 1197 * xsk_pool_get_rx_frame_size() + vi->hdr_len 1198 */ 1199 bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool); 1200 if (!first_buf) 1201 bufsize += vi->hdr_len; 1202 1203 if (unlikely(len > bufsize)) { 1204 pr_debug("%s: rx error: len %u exceeds truesize %u\n", 1205 vi->dev->name, len, bufsize); 1206 DEV_STATS_INC(vi->dev, rx_length_errors); 1207 xsk_buff_free(xdp); 1208 return NULL; 1209 } 1210 1211 if (first_buf) { 1212 xsk_buff_set_size(xdp, len); 1213 } else { 1214 xdp_prepare_buff(xdp, xdp->data_hard_start, 1215 XDP_PACKET_HEADROOM - vi->hdr_len, len, 1); 1216 xdp->flags = 0; 1217 } 1218 1219 xsk_buff_dma_sync_for_cpu(xdp); 1220 1221 return xdp; 1222 } 1223 1224 static struct sk_buff *xsk_construct_skb(struct receive_queue *rq, 1225 struct xdp_buff *xdp) 1226 { 1227 unsigned int metasize = xdp->data - xdp->data_meta; 1228 struct sk_buff *skb; 1229 unsigned int size; 1230 1231 size = xdp->data_end - xdp->data_hard_start; 1232 skb = napi_alloc_skb(&rq->napi, size); 1233 if (unlikely(!skb)) { 1234 xsk_buff_free(xdp); 1235 return NULL; 1236 } 1237 1238 skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); 1239 1240 size = xdp->data_end - xdp->data_meta; 1241 memcpy(__skb_put(skb, size), xdp->data_meta, size); 1242 1243 if (metasize) { 1244 __skb_pull(skb, metasize); 1245 skb_metadata_set(skb, metasize); 1246 } 1247 1248 xsk_buff_free(xdp); 1249 1250 return skb; 1251 } 1252 1253 static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi, 1254 struct receive_queue *rq, struct xdp_buff *xdp, 1255 unsigned int *xdp_xmit, 1256 struct virtnet_rq_stats *stats) 1257 { 1258 struct bpf_prog *prog; 1259 u32 ret; 1260 1261 ret = XDP_PASS; 1262 rcu_read_lock(); 1263 prog = rcu_dereference(rq->xdp_prog); 1264 if (prog) 1265 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); 1266 rcu_read_unlock(); 1267 1268 switch (ret) { 1269 case XDP_PASS: 1270 return xsk_construct_skb(rq, xdp); 1271 1272 case XDP_TX: 1273 case XDP_REDIRECT: 1274 return NULL; 1275 1276 default: 1277 /* drop packet */ 1278 xsk_buff_free(xdp); 1279 u64_stats_inc(&stats->drops); 1280 return NULL; 1281 } 1282 } 1283 1284 static void xsk_drop_follow_bufs(struct net_device *dev, 1285 struct receive_queue *rq, 1286 u32 num_buf, 1287 struct virtnet_rq_stats *stats) 1288 { 1289 struct xdp_buff *xdp; 1290 u32 len; 1291 1292 while (num_buf-- > 1) { 1293 xdp = virtqueue_get_buf(rq->vq, &len); 1294 if (unlikely(!xdp)) { 1295 pr_debug("%s: rx error: %d buffers missing\n", 1296 dev->name, num_buf); 1297 DEV_STATS_INC(dev, rx_length_errors); 1298 break; 1299 } 1300 u64_stats_add(&stats->bytes, len); 1301 xsk_buff_free(xdp); 1302 } 1303 } 1304 1305 static int xsk_append_merge_buffer(struct virtnet_info *vi, 1306 struct receive_queue *rq, 1307 struct sk_buff *head_skb, 1308 u32 num_buf, 1309 struct virtio_net_hdr_mrg_rxbuf *hdr, 1310 struct virtnet_rq_stats *stats) 1311 { 1312 struct sk_buff *curr_skb; 1313 struct xdp_buff *xdp; 1314 u32 len, truesize; 1315 struct page *page; 1316 void *buf; 1317 1318 curr_skb = head_skb; 1319 1320 while (--num_buf) { 1321 buf = virtqueue_get_buf(rq->vq, &len); 1322 if (unlikely(!buf)) { 1323 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1324 vi->dev->name, num_buf, 1325 virtio16_to_cpu(vi->vdev, 1326 hdr->num_buffers)); 1327 DEV_STATS_INC(vi->dev, rx_length_errors); 1328 return -EINVAL; 1329 } 1330 1331 u64_stats_add(&stats->bytes, len); 1332 1333 xdp = buf_to_xdp(vi, rq, buf, len, false); 1334 if (!xdp) 1335 goto err; 1336 1337 buf = napi_alloc_frag(len); 1338 if (!buf) { 1339 xsk_buff_free(xdp); 1340 goto err; 1341 } 1342 1343 memcpy(buf, xdp->data, len); 1344 1345 xsk_buff_free(xdp); 1346 1347 page = virt_to_page(buf); 1348 1349 truesize = len; 1350 1351 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 1352 buf, len, truesize); 1353 if (!curr_skb) { 1354 put_page(page); 1355 goto err; 1356 } 1357 } 1358 1359 return 0; 1360 1361 err: 1362 xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats); 1363 return -EINVAL; 1364 } 1365 1366 static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi, 1367 struct receive_queue *rq, struct xdp_buff *xdp, 1368 unsigned int *xdp_xmit, 1369 struct virtnet_rq_stats *stats) 1370 { 1371 struct virtio_net_hdr_mrg_rxbuf *hdr; 1372 struct bpf_prog *prog; 1373 struct sk_buff *skb; 1374 u32 ret, num_buf; 1375 1376 hdr = xdp->data - vi->hdr_len; 1377 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 1378 1379 ret = XDP_PASS; 1380 rcu_read_lock(); 1381 prog = rcu_dereference(rq->xdp_prog); 1382 /* TODO: support multi buffer. */ 1383 if (prog && num_buf == 1) 1384 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); 1385 rcu_read_unlock(); 1386 1387 switch (ret) { 1388 case XDP_PASS: 1389 skb = xsk_construct_skb(rq, xdp); 1390 if (!skb) 1391 goto drop_bufs; 1392 1393 if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) { 1394 dev_kfree_skb(skb); 1395 goto drop; 1396 } 1397 1398 return skb; 1399 1400 case XDP_TX: 1401 case XDP_REDIRECT: 1402 return NULL; 1403 1404 default: 1405 /* drop packet */ 1406 xsk_buff_free(xdp); 1407 } 1408 1409 drop_bufs: 1410 xsk_drop_follow_bufs(dev, rq, num_buf, stats); 1411 1412 drop: 1413 u64_stats_inc(&stats->drops); 1414 return NULL; 1415 } 1416 1417 static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq, 1418 void *buf, u32 len, 1419 unsigned int *xdp_xmit, 1420 struct virtnet_rq_stats *stats) 1421 { 1422 struct net_device *dev = vi->dev; 1423 struct sk_buff *skb = NULL; 1424 struct xdp_buff *xdp; 1425 u8 flags; 1426 1427 len -= vi->hdr_len; 1428 1429 u64_stats_add(&stats->bytes, len); 1430 1431 xdp = buf_to_xdp(vi, rq, buf, len, true); 1432 if (!xdp) 1433 return; 1434 1435 if (unlikely(len < ETH_HLEN)) { 1436 pr_debug("%s: short packet %i\n", dev->name, len); 1437 DEV_STATS_INC(dev, rx_length_errors); 1438 xsk_buff_free(xdp); 1439 return; 1440 } 1441 1442 flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags; 1443 1444 if (!vi->mergeable_rx_bufs) 1445 skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats); 1446 else 1447 skb = virtnet_receive_xsk_merge(dev, vi, rq, xdp, xdp_xmit, stats); 1448 1449 if (skb) 1450 virtnet_receive_done(vi, rq, skb, flags); 1451 } 1452 1453 static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue *rq, 1454 struct xsk_buff_pool *pool, gfp_t gfp) 1455 { 1456 struct xdp_buff **xsk_buffs; 1457 dma_addr_t addr; 1458 int err = 0; 1459 u32 len, i; 1460 int num; 1461 1462 xsk_buffs = rq->xsk_buffs; 1463 1464 num = xsk_buff_alloc_batch(pool, xsk_buffs, rq->vq->num_free); 1465 if (!num) 1466 return -ENOMEM; 1467 1468 len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len; 1469 1470 for (i = 0; i < num; ++i) { 1471 /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space. 1472 * We assume XDP_PACKET_HEADROOM is larger than hdr->len. 1473 * (see function virtnet_xsk_pool_enable) 1474 */ 1475 addr = xsk_buff_xdp_get_dma(xsk_buffs[i]) - vi->hdr_len; 1476 1477 sg_init_table(rq->sg, 1); 1478 sg_fill_dma(rq->sg, addr, len); 1479 1480 err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, 1481 xsk_buffs[i], NULL, gfp); 1482 if (err) 1483 goto err; 1484 } 1485 1486 return num; 1487 1488 err: 1489 for (; i < num; ++i) 1490 xsk_buff_free(xsk_buffs[i]); 1491 1492 return err; 1493 } 1494 1495 static void *virtnet_xsk_to_ptr(u32 len) 1496 { 1497 unsigned long p; 1498 1499 p = len << VIRTIO_XSK_FLAG_OFFSET; 1500 1501 return virtnet_xmit_ptr_pack((void *)p, VIRTNET_XMIT_TYPE_XSK); 1502 } 1503 1504 static int virtnet_xsk_xmit_one(struct send_queue *sq, 1505 struct xsk_buff_pool *pool, 1506 struct xdp_desc *desc) 1507 { 1508 struct virtnet_info *vi; 1509 dma_addr_t addr; 1510 1511 vi = sq->vq->vdev->priv; 1512 1513 addr = xsk_buff_raw_get_dma(pool, desc->addr); 1514 xsk_buff_raw_dma_sync_for_device(pool, addr, desc->len); 1515 1516 sg_init_table(sq->sg, 2); 1517 sg_fill_dma(sq->sg, sq->xsk_hdr_dma_addr, vi->hdr_len); 1518 sg_fill_dma(sq->sg + 1, addr, desc->len); 1519 1520 return virtqueue_add_outbuf_premapped(sq->vq, sq->sg, 2, 1521 virtnet_xsk_to_ptr(desc->len), 1522 GFP_ATOMIC); 1523 } 1524 1525 static int virtnet_xsk_xmit_batch(struct send_queue *sq, 1526 struct xsk_buff_pool *pool, 1527 unsigned int budget, 1528 u64 *kicks) 1529 { 1530 struct xdp_desc *descs = pool->tx_descs; 1531 bool kick = false; 1532 u32 nb_pkts, i; 1533 int err; 1534 1535 budget = min_t(u32, budget, sq->vq->num_free); 1536 1537 nb_pkts = xsk_tx_peek_release_desc_batch(pool, budget); 1538 if (!nb_pkts) 1539 return 0; 1540 1541 for (i = 0; i < nb_pkts; i++) { 1542 err = virtnet_xsk_xmit_one(sq, pool, &descs[i]); 1543 if (unlikely(err)) { 1544 xsk_tx_completed(sq->xsk_pool, nb_pkts - i); 1545 break; 1546 } 1547 1548 kick = true; 1549 } 1550 1551 if (kick && virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 1552 (*kicks)++; 1553 1554 return i; 1555 } 1556 1557 static bool virtnet_xsk_xmit(struct send_queue *sq, struct xsk_buff_pool *pool, 1558 int budget) 1559 { 1560 struct virtnet_info *vi = sq->vq->vdev->priv; 1561 struct virtnet_sq_free_stats stats = {}; 1562 struct net_device *dev = vi->dev; 1563 u64 kicks = 0; 1564 int sent; 1565 1566 /* Avoid to wakeup napi meanless, so call __free_old_xmit instead of 1567 * free_old_xmit(). 1568 */ 1569 __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), true, &stats); 1570 1571 if (stats.xsk) 1572 xsk_tx_completed(sq->xsk_pool, stats.xsk); 1573 1574 sent = virtnet_xsk_xmit_batch(sq, pool, budget, &kicks); 1575 1576 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 1577 check_sq_full_and_disable(vi, vi->dev, sq); 1578 1579 if (sent) { 1580 struct netdev_queue *txq; 1581 1582 txq = netdev_get_tx_queue(vi->dev, sq - vi->sq); 1583 txq_trans_cond_update(txq); 1584 } 1585 1586 u64_stats_update_begin(&sq->stats.syncp); 1587 u64_stats_add(&sq->stats.packets, stats.packets); 1588 u64_stats_add(&sq->stats.bytes, stats.bytes); 1589 u64_stats_add(&sq->stats.kicks, kicks); 1590 u64_stats_add(&sq->stats.xdp_tx, sent); 1591 u64_stats_update_end(&sq->stats.syncp); 1592 1593 if (xsk_uses_need_wakeup(pool)) 1594 xsk_set_tx_need_wakeup(pool); 1595 1596 return sent; 1597 } 1598 1599 static void xsk_wakeup(struct send_queue *sq) 1600 { 1601 if (napi_if_scheduled_mark_missed(&sq->napi)) 1602 return; 1603 1604 local_bh_disable(); 1605 virtqueue_napi_schedule(&sq->napi, sq->vq); 1606 local_bh_enable(); 1607 } 1608 1609 static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag) 1610 { 1611 struct virtnet_info *vi = netdev_priv(dev); 1612 struct send_queue *sq; 1613 1614 if (!netif_running(dev)) 1615 return -ENETDOWN; 1616 1617 if (qid >= vi->curr_queue_pairs) 1618 return -EINVAL; 1619 1620 sq = &vi->sq[qid]; 1621 1622 xsk_wakeup(sq); 1623 return 0; 1624 } 1625 1626 static void virtnet_xsk_completed(struct send_queue *sq, int num) 1627 { 1628 xsk_tx_completed(sq->xsk_pool, num); 1629 1630 /* If this is called by rx poll, start_xmit and xdp xmit we should 1631 * wakeup the tx napi to consume the xsk tx queue, because the tx 1632 * interrupt may not be triggered. 1633 */ 1634 xsk_wakeup(sq); 1635 } 1636 1637 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, 1638 struct send_queue *sq, 1639 struct xdp_frame *xdpf) 1640 { 1641 struct virtio_net_hdr_mrg_rxbuf *hdr; 1642 struct skb_shared_info *shinfo; 1643 u8 nr_frags = 0; 1644 int err, i; 1645 1646 if (unlikely(xdpf->headroom < vi->hdr_len)) 1647 return -EOVERFLOW; 1648 1649 if (unlikely(xdp_frame_has_frags(xdpf))) { 1650 shinfo = xdp_get_shared_info_from_frame(xdpf); 1651 nr_frags = shinfo->nr_frags; 1652 } 1653 1654 /* In wrapping function virtnet_xdp_xmit(), we need to free 1655 * up the pending old buffers, where we need to calculate the 1656 * position of skb_shared_info in xdp_get_frame_len() and 1657 * xdp_return_frame(), which will involve to xdpf->data and 1658 * xdpf->headroom. Therefore, we need to update the value of 1659 * headroom synchronously here. 1660 */ 1661 xdpf->headroom -= vi->hdr_len; 1662 xdpf->data -= vi->hdr_len; 1663 /* Zero header and leave csum up to XDP layers */ 1664 hdr = xdpf->data; 1665 memset(hdr, 0, vi->hdr_len); 1666 xdpf->len += vi->hdr_len; 1667 1668 sg_init_table(sq->sg, nr_frags + 1); 1669 sg_set_buf(sq->sg, xdpf->data, xdpf->len); 1670 for (i = 0; i < nr_frags; i++) { 1671 skb_frag_t *frag = &shinfo->frags[i]; 1672 1673 sg_set_page(&sq->sg[i + 1], skb_frag_page(frag), 1674 skb_frag_size(frag), skb_frag_off(frag)); 1675 } 1676 1677 err = virtnet_add_outbuf(sq, nr_frags + 1, xdpf, VIRTNET_XMIT_TYPE_XDP); 1678 if (unlikely(err)) 1679 return -ENOSPC; /* Caller handle free/refcnt */ 1680 1681 return 0; 1682 } 1683 1684 /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on 1685 * the current cpu, so it does not need to be locked. 1686 * 1687 * Here we use marco instead of inline functions because we have to deal with 1688 * three issues at the same time: 1. the choice of sq. 2. judge and execute the 1689 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline 1690 * functions to perfectly solve these three problems at the same time. 1691 */ 1692 #define virtnet_xdp_get_sq(vi) ({ \ 1693 int cpu = smp_processor_id(); \ 1694 struct netdev_queue *txq; \ 1695 typeof(vi) v = (vi); \ 1696 unsigned int qp; \ 1697 \ 1698 if (v->curr_queue_pairs > nr_cpu_ids) { \ 1699 qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ 1700 qp += cpu; \ 1701 txq = netdev_get_tx_queue(v->dev, qp); \ 1702 __netif_tx_acquire(txq); \ 1703 } else { \ 1704 qp = cpu % v->curr_queue_pairs; \ 1705 txq = netdev_get_tx_queue(v->dev, qp); \ 1706 __netif_tx_lock(txq, cpu); \ 1707 } \ 1708 v->sq + qp; \ 1709 }) 1710 1711 #define virtnet_xdp_put_sq(vi, q) { \ 1712 struct netdev_queue *txq; \ 1713 typeof(vi) v = (vi); \ 1714 \ 1715 txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ 1716 if (v->curr_queue_pairs > nr_cpu_ids) \ 1717 __netif_tx_release(txq); \ 1718 else \ 1719 __netif_tx_unlock(txq); \ 1720 } 1721 1722 static int virtnet_xdp_xmit(struct net_device *dev, 1723 int n, struct xdp_frame **frames, u32 flags) 1724 { 1725 struct virtnet_info *vi = netdev_priv(dev); 1726 struct virtnet_sq_free_stats stats = {0}; 1727 struct receive_queue *rq = vi->rq; 1728 struct bpf_prog *xdp_prog; 1729 struct send_queue *sq; 1730 int nxmit = 0; 1731 int kicks = 0; 1732 int ret; 1733 int i; 1734 1735 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this 1736 * indicate XDP resources have been successfully allocated. 1737 */ 1738 xdp_prog = rcu_access_pointer(rq->xdp_prog); 1739 if (!xdp_prog) 1740 return -ENXIO; 1741 1742 sq = virtnet_xdp_get_sq(vi); 1743 1744 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { 1745 ret = -EINVAL; 1746 goto out; 1747 } 1748 1749 /* Free up any pending old buffers before queueing new ones. */ 1750 virtnet_free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), 1751 false, &stats); 1752 1753 for (i = 0; i < n; i++) { 1754 struct xdp_frame *xdpf = frames[i]; 1755 1756 if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) 1757 break; 1758 nxmit++; 1759 } 1760 ret = nxmit; 1761 1762 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 1763 check_sq_full_and_disable(vi, dev, sq); 1764 1765 if (flags & XDP_XMIT_FLUSH) { 1766 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 1767 kicks = 1; 1768 } 1769 out: 1770 u64_stats_update_begin(&sq->stats.syncp); 1771 u64_stats_add(&sq->stats.bytes, stats.bytes); 1772 u64_stats_add(&sq->stats.packets, stats.packets); 1773 u64_stats_add(&sq->stats.xdp_tx, n); 1774 u64_stats_add(&sq->stats.xdp_tx_drops, n - nxmit); 1775 u64_stats_add(&sq->stats.kicks, kicks); 1776 u64_stats_update_end(&sq->stats.syncp); 1777 1778 virtnet_xdp_put_sq(vi, sq); 1779 return ret; 1780 } 1781 1782 static void put_xdp_frags(struct xdp_buff *xdp) 1783 { 1784 struct skb_shared_info *shinfo; 1785 struct page *xdp_page; 1786 int i; 1787 1788 if (xdp_buff_has_frags(xdp)) { 1789 shinfo = xdp_get_shared_info_from_buff(xdp); 1790 for (i = 0; i < shinfo->nr_frags; i++) { 1791 xdp_page = skb_frag_page(&shinfo->frags[i]); 1792 put_page(xdp_page); 1793 } 1794 } 1795 } 1796 1797 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 1798 struct net_device *dev, 1799 unsigned int *xdp_xmit, 1800 struct virtnet_rq_stats *stats) 1801 { 1802 struct xdp_frame *xdpf; 1803 int err; 1804 u32 act; 1805 1806 act = bpf_prog_run_xdp(xdp_prog, xdp); 1807 u64_stats_inc(&stats->xdp_packets); 1808 1809 switch (act) { 1810 case XDP_PASS: 1811 return act; 1812 1813 case XDP_TX: 1814 u64_stats_inc(&stats->xdp_tx); 1815 xdpf = xdp_convert_buff_to_frame(xdp); 1816 if (unlikely(!xdpf)) { 1817 netdev_dbg(dev, "convert buff to frame failed for xdp\n"); 1818 return XDP_DROP; 1819 } 1820 1821 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); 1822 if (unlikely(!err)) { 1823 xdp_return_frame_rx_napi(xdpf); 1824 } else if (unlikely(err < 0)) { 1825 trace_xdp_exception(dev, xdp_prog, act); 1826 return XDP_DROP; 1827 } 1828 *xdp_xmit |= VIRTIO_XDP_TX; 1829 return act; 1830 1831 case XDP_REDIRECT: 1832 u64_stats_inc(&stats->xdp_redirects); 1833 err = xdp_do_redirect(dev, xdp, xdp_prog); 1834 if (err) 1835 return XDP_DROP; 1836 1837 *xdp_xmit |= VIRTIO_XDP_REDIR; 1838 return act; 1839 1840 default: 1841 bpf_warn_invalid_xdp_action(dev, xdp_prog, act); 1842 fallthrough; 1843 case XDP_ABORTED: 1844 trace_xdp_exception(dev, xdp_prog, act); 1845 fallthrough; 1846 case XDP_DROP: 1847 return XDP_DROP; 1848 } 1849 } 1850 1851 static unsigned int virtnet_get_headroom(struct virtnet_info *vi) 1852 { 1853 return vi->xdp_enabled ? XDP_PACKET_HEADROOM : 0; 1854 } 1855 1856 /* We copy the packet for XDP in the following cases: 1857 * 1858 * 1) Packet is scattered across multiple rx buffers. 1859 * 2) Headroom space is insufficient. 1860 * 1861 * This is inefficient but it's a temporary condition that 1862 * we hit right after XDP is enabled and until queue is refilled 1863 * with large buffers with sufficient headroom - so it should affect 1864 * at most queue size packets. 1865 * Afterwards, the conditions to enable 1866 * XDP should preclude the underlying device from sending packets 1867 * across multiple buffers (num_buf > 1), and we make sure buffers 1868 * have enough headroom. 1869 */ 1870 static struct page *xdp_linearize_page(struct net_device *dev, 1871 struct receive_queue *rq, 1872 int *num_buf, 1873 struct page *p, 1874 int offset, 1875 int page_off, 1876 unsigned int *len) 1877 { 1878 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1879 struct page *page; 1880 1881 if (page_off + *len + tailroom > PAGE_SIZE) 1882 return NULL; 1883 1884 page = alloc_page(GFP_ATOMIC); 1885 if (!page) 1886 return NULL; 1887 1888 memcpy(page_address(page) + page_off, page_address(p) + offset, *len); 1889 page_off += *len; 1890 1891 /* Only mergeable mode can go inside this while loop. In small mode, 1892 * *num_buf == 1, so it cannot go inside. 1893 */ 1894 while (--*num_buf) { 1895 unsigned int buflen; 1896 void *buf; 1897 void *ctx; 1898 int off; 1899 1900 buf = virtnet_rq_get_buf(rq, &buflen, &ctx); 1901 if (unlikely(!buf)) 1902 goto err_buf; 1903 1904 p = virt_to_head_page(buf); 1905 off = buf - page_address(p); 1906 1907 if (check_mergeable_len(dev, ctx, buflen)) { 1908 put_page(p); 1909 goto err_buf; 1910 } 1911 1912 /* guard against a misconfigured or uncooperative backend that 1913 * is sending packet larger than the MTU. 1914 */ 1915 if ((page_off + buflen + tailroom) > PAGE_SIZE) { 1916 put_page(p); 1917 goto err_buf; 1918 } 1919 1920 memcpy(page_address(page) + page_off, 1921 page_address(p) + off, buflen); 1922 page_off += buflen; 1923 put_page(p); 1924 } 1925 1926 /* Headroom does not contribute to packet length */ 1927 *len = page_off - XDP_PACKET_HEADROOM; 1928 return page; 1929 err_buf: 1930 __free_pages(page, 0); 1931 return NULL; 1932 } 1933 1934 static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi, 1935 unsigned int xdp_headroom, 1936 void *buf, 1937 unsigned int len) 1938 { 1939 unsigned int header_offset; 1940 unsigned int headroom; 1941 unsigned int buflen; 1942 struct sk_buff *skb; 1943 1944 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1945 headroom = vi->hdr_len + header_offset; 1946 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1947 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1948 1949 skb = virtnet_build_skb(buf, buflen, headroom, len); 1950 if (unlikely(!skb)) 1951 return NULL; 1952 1953 buf += header_offset; 1954 memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len); 1955 1956 return skb; 1957 } 1958 1959 static struct sk_buff *receive_small_xdp(struct net_device *dev, 1960 struct virtnet_info *vi, 1961 struct receive_queue *rq, 1962 struct bpf_prog *xdp_prog, 1963 void *buf, 1964 unsigned int xdp_headroom, 1965 unsigned int len, 1966 unsigned int *xdp_xmit, 1967 struct virtnet_rq_stats *stats) 1968 { 1969 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom; 1970 unsigned int headroom = vi->hdr_len + header_offset; 1971 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; 1972 struct page *page = virt_to_head_page(buf); 1973 struct page *xdp_page; 1974 unsigned int buflen; 1975 struct xdp_buff xdp; 1976 struct sk_buff *skb; 1977 unsigned int metasize = 0; 1978 u32 act; 1979 1980 if (unlikely(hdr->hdr.gso_type)) 1981 goto err_xdp; 1982 1983 /* Partially checksummed packets must be dropped. */ 1984 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 1985 goto err_xdp; 1986 1987 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1988 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1989 1990 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { 1991 int offset = buf - page_address(page) + header_offset; 1992 unsigned int tlen = len + vi->hdr_len; 1993 int num_buf = 1; 1994 1995 xdp_headroom = virtnet_get_headroom(vi); 1996 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1997 headroom = vi->hdr_len + header_offset; 1998 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1999 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 2000 xdp_page = xdp_linearize_page(dev, rq, &num_buf, page, 2001 offset, header_offset, 2002 &tlen); 2003 if (!xdp_page) 2004 goto err_xdp; 2005 2006 buf = page_address(xdp_page); 2007 put_page(page); 2008 page = xdp_page; 2009 } 2010 2011 xdp_init_buff(&xdp, buflen, &rq->xdp_rxq); 2012 xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len, 2013 xdp_headroom, len, true); 2014 2015 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 2016 2017 switch (act) { 2018 case XDP_PASS: 2019 /* Recalculate length in case bpf program changed it */ 2020 len = xdp.data_end - xdp.data; 2021 metasize = xdp.data - xdp.data_meta; 2022 break; 2023 2024 case XDP_TX: 2025 case XDP_REDIRECT: 2026 goto xdp_xmit; 2027 2028 default: 2029 goto err_xdp; 2030 } 2031 2032 skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len); 2033 if (unlikely(!skb)) 2034 goto err; 2035 2036 if (metasize) 2037 skb_metadata_set(skb, metasize); 2038 2039 return skb; 2040 2041 err_xdp: 2042 u64_stats_inc(&stats->xdp_drops); 2043 err: 2044 u64_stats_inc(&stats->drops); 2045 put_page(page); 2046 xdp_xmit: 2047 return NULL; 2048 } 2049 2050 static struct sk_buff *receive_small(struct net_device *dev, 2051 struct virtnet_info *vi, 2052 struct receive_queue *rq, 2053 void *buf, void *ctx, 2054 unsigned int len, 2055 unsigned int *xdp_xmit, 2056 struct virtnet_rq_stats *stats) 2057 { 2058 unsigned int xdp_headroom = (unsigned long)ctx; 2059 struct page *page = virt_to_head_page(buf); 2060 struct sk_buff *skb; 2061 2062 /* We passed the address of virtnet header to virtio-core, 2063 * so truncate the padding. 2064 */ 2065 buf -= VIRTNET_RX_PAD + xdp_headroom; 2066 2067 len -= vi->hdr_len; 2068 u64_stats_add(&stats->bytes, len); 2069 2070 if (unlikely(len > GOOD_PACKET_LEN)) { 2071 pr_debug("%s: rx error: len %u exceeds max size %d\n", 2072 dev->name, len, GOOD_PACKET_LEN); 2073 DEV_STATS_INC(dev, rx_length_errors); 2074 goto err; 2075 } 2076 2077 if (unlikely(vi->xdp_enabled)) { 2078 struct bpf_prog *xdp_prog; 2079 2080 rcu_read_lock(); 2081 xdp_prog = rcu_dereference(rq->xdp_prog); 2082 if (xdp_prog) { 2083 skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf, 2084 xdp_headroom, len, xdp_xmit, 2085 stats); 2086 rcu_read_unlock(); 2087 return skb; 2088 } 2089 rcu_read_unlock(); 2090 } 2091 2092 skb = receive_small_build_skb(vi, xdp_headroom, buf, len); 2093 if (likely(skb)) 2094 return skb; 2095 2096 err: 2097 u64_stats_inc(&stats->drops); 2098 put_page(page); 2099 return NULL; 2100 } 2101 2102 static struct sk_buff *receive_big(struct net_device *dev, 2103 struct virtnet_info *vi, 2104 struct receive_queue *rq, 2105 void *buf, 2106 unsigned int len, 2107 struct virtnet_rq_stats *stats) 2108 { 2109 struct page *page = buf; 2110 struct sk_buff *skb = 2111 page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); 2112 2113 u64_stats_add(&stats->bytes, len - vi->hdr_len); 2114 if (unlikely(!skb)) 2115 goto err; 2116 2117 return skb; 2118 2119 err: 2120 u64_stats_inc(&stats->drops); 2121 give_pages(rq, page); 2122 return NULL; 2123 } 2124 2125 static void mergeable_buf_free(struct receive_queue *rq, int num_buf, 2126 struct net_device *dev, 2127 struct virtnet_rq_stats *stats) 2128 { 2129 struct page *page; 2130 void *buf; 2131 int len; 2132 2133 while (num_buf-- > 1) { 2134 buf = virtnet_rq_get_buf(rq, &len, NULL); 2135 if (unlikely(!buf)) { 2136 pr_debug("%s: rx error: %d buffers missing\n", 2137 dev->name, num_buf); 2138 DEV_STATS_INC(dev, rx_length_errors); 2139 break; 2140 } 2141 u64_stats_add(&stats->bytes, len); 2142 page = virt_to_head_page(buf); 2143 put_page(page); 2144 } 2145 } 2146 2147 /* Why not use xdp_build_skb_from_frame() ? 2148 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in 2149 * virtio-net there are 2 points that do not match its requirements: 2150 * 1. The size of the prefilled buffer is not fixed before xdp is set. 2151 * 2. xdp_build_skb_from_frame() does more checks that we don't need, 2152 * like eth_type_trans() (which virtio-net does in receive_buf()). 2153 */ 2154 static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, 2155 struct virtnet_info *vi, 2156 struct xdp_buff *xdp, 2157 unsigned int xdp_frags_truesz) 2158 { 2159 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 2160 unsigned int headroom, data_len; 2161 struct sk_buff *skb; 2162 int metasize; 2163 u8 nr_frags; 2164 2165 if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { 2166 pr_debug("Error building skb as missing reserved tailroom for xdp"); 2167 return NULL; 2168 } 2169 2170 if (unlikely(xdp_buff_has_frags(xdp))) 2171 nr_frags = sinfo->nr_frags; 2172 2173 skb = build_skb(xdp->data_hard_start, xdp->frame_sz); 2174 if (unlikely(!skb)) 2175 return NULL; 2176 2177 headroom = xdp->data - xdp->data_hard_start; 2178 data_len = xdp->data_end - xdp->data; 2179 skb_reserve(skb, headroom); 2180 __skb_put(skb, data_len); 2181 2182 metasize = xdp->data - xdp->data_meta; 2183 metasize = metasize > 0 ? metasize : 0; 2184 if (metasize) 2185 skb_metadata_set(skb, metasize); 2186 2187 if (unlikely(xdp_buff_has_frags(xdp))) 2188 xdp_update_skb_shared_info(skb, nr_frags, 2189 sinfo->xdp_frags_size, 2190 xdp_frags_truesz, 2191 xdp_buff_is_frag_pfmemalloc(xdp)); 2192 2193 return skb; 2194 } 2195 2196 /* TODO: build xdp in big mode */ 2197 static int virtnet_build_xdp_buff_mrg(struct net_device *dev, 2198 struct virtnet_info *vi, 2199 struct receive_queue *rq, 2200 struct xdp_buff *xdp, 2201 void *buf, 2202 unsigned int len, 2203 unsigned int frame_sz, 2204 int *num_buf, 2205 unsigned int *xdp_frags_truesize, 2206 struct virtnet_rq_stats *stats) 2207 { 2208 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2209 struct skb_shared_info *shinfo; 2210 unsigned int xdp_frags_truesz = 0; 2211 unsigned int truesize; 2212 struct page *page; 2213 skb_frag_t *frag; 2214 int offset; 2215 void *ctx; 2216 2217 xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 2218 xdp_prepare_buff(xdp, buf - XDP_PACKET_HEADROOM, 2219 XDP_PACKET_HEADROOM + vi->hdr_len, len - vi->hdr_len, true); 2220 2221 if (!*num_buf) 2222 return 0; 2223 2224 if (*num_buf > 1) { 2225 /* If we want to build multi-buffer xdp, we need 2226 * to specify that the flags of xdp_buff have the 2227 * XDP_FLAGS_HAS_FRAG bit. 2228 */ 2229 if (!xdp_buff_has_frags(xdp)) 2230 xdp_buff_set_frags_flag(xdp); 2231 2232 shinfo = xdp_get_shared_info_from_buff(xdp); 2233 shinfo->nr_frags = 0; 2234 shinfo->xdp_frags_size = 0; 2235 } 2236 2237 if (*num_buf > MAX_SKB_FRAGS + 1) 2238 return -EINVAL; 2239 2240 while (--*num_buf > 0) { 2241 buf = virtnet_rq_get_buf(rq, &len, &ctx); 2242 if (unlikely(!buf)) { 2243 pr_debug("%s: rx error: %d buffers out of %d missing\n", 2244 dev->name, *num_buf, 2245 virtio16_to_cpu(vi->vdev, hdr->num_buffers)); 2246 DEV_STATS_INC(dev, rx_length_errors); 2247 goto err; 2248 } 2249 2250 u64_stats_add(&stats->bytes, len); 2251 page = virt_to_head_page(buf); 2252 offset = buf - page_address(page); 2253 2254 if (check_mergeable_len(dev, ctx, len)) { 2255 put_page(page); 2256 goto err; 2257 } 2258 2259 truesize = mergeable_ctx_to_truesize(ctx); 2260 xdp_frags_truesz += truesize; 2261 2262 frag = &shinfo->frags[shinfo->nr_frags++]; 2263 skb_frag_fill_page_desc(frag, page, offset, len); 2264 if (page_is_pfmemalloc(page)) 2265 xdp_buff_set_frag_pfmemalloc(xdp); 2266 2267 shinfo->xdp_frags_size += len; 2268 } 2269 2270 *xdp_frags_truesize = xdp_frags_truesz; 2271 return 0; 2272 2273 err: 2274 put_xdp_frags(xdp); 2275 return -EINVAL; 2276 } 2277 2278 static void *mergeable_xdp_get_buf(struct virtnet_info *vi, 2279 struct receive_queue *rq, 2280 struct bpf_prog *xdp_prog, 2281 void *ctx, 2282 unsigned int *frame_sz, 2283 int *num_buf, 2284 struct page **page, 2285 int offset, 2286 unsigned int *len, 2287 struct virtio_net_hdr_mrg_rxbuf *hdr) 2288 { 2289 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2290 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2291 struct page *xdp_page; 2292 unsigned int xdp_room; 2293 2294 /* Transient failure which in theory could occur if 2295 * in-flight packets from before XDP was enabled reach 2296 * the receive path after XDP is loaded. 2297 */ 2298 if (unlikely(hdr->hdr.gso_type)) 2299 return NULL; 2300 2301 /* Partially checksummed packets must be dropped. */ 2302 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 2303 return NULL; 2304 2305 /* Now XDP core assumes frag size is PAGE_SIZE, but buffers 2306 * with headroom may add hole in truesize, which 2307 * make their length exceed PAGE_SIZE. So we disabled the 2308 * hole mechanism for xdp. See add_recvbuf_mergeable(). 2309 */ 2310 *frame_sz = truesize; 2311 2312 if (likely(headroom >= virtnet_get_headroom(vi) && 2313 (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) { 2314 return page_address(*page) + offset; 2315 } 2316 2317 /* This happens when headroom is not enough because 2318 * of the buffer was prefilled before XDP is set. 2319 * This should only happen for the first several packets. 2320 * In fact, vq reset can be used here to help us clean up 2321 * the prefilled buffers, but many existing devices do not 2322 * support it, and we don't want to bother users who are 2323 * using xdp normally. 2324 */ 2325 if (!xdp_prog->aux->xdp_has_frags) { 2326 /* linearize data for XDP */ 2327 xdp_page = xdp_linearize_page(vi->dev, rq, num_buf, 2328 *page, offset, 2329 XDP_PACKET_HEADROOM, 2330 len); 2331 if (!xdp_page) 2332 return NULL; 2333 } else { 2334 xdp_room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 2335 sizeof(struct skb_shared_info)); 2336 if (*len + xdp_room > PAGE_SIZE) 2337 return NULL; 2338 2339 xdp_page = alloc_page(GFP_ATOMIC); 2340 if (!xdp_page) 2341 return NULL; 2342 2343 memcpy(page_address(xdp_page) + XDP_PACKET_HEADROOM, 2344 page_address(*page) + offset, *len); 2345 } 2346 2347 *frame_sz = PAGE_SIZE; 2348 2349 put_page(*page); 2350 2351 *page = xdp_page; 2352 2353 return page_address(*page) + XDP_PACKET_HEADROOM; 2354 } 2355 2356 static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, 2357 struct virtnet_info *vi, 2358 struct receive_queue *rq, 2359 struct bpf_prog *xdp_prog, 2360 void *buf, 2361 void *ctx, 2362 unsigned int len, 2363 unsigned int *xdp_xmit, 2364 struct virtnet_rq_stats *stats) 2365 { 2366 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2367 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2368 struct page *page = virt_to_head_page(buf); 2369 int offset = buf - page_address(page); 2370 unsigned int xdp_frags_truesz = 0; 2371 struct sk_buff *head_skb; 2372 unsigned int frame_sz; 2373 struct xdp_buff xdp; 2374 void *data; 2375 u32 act; 2376 int err; 2377 2378 data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, &frame_sz, &num_buf, &page, 2379 offset, &len, hdr); 2380 if (unlikely(!data)) 2381 goto err_xdp; 2382 2383 err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz, 2384 &num_buf, &xdp_frags_truesz, stats); 2385 if (unlikely(err)) 2386 goto err_xdp; 2387 2388 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 2389 2390 switch (act) { 2391 case XDP_PASS: 2392 head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); 2393 if (unlikely(!head_skb)) 2394 break; 2395 return head_skb; 2396 2397 case XDP_TX: 2398 case XDP_REDIRECT: 2399 return NULL; 2400 2401 default: 2402 break; 2403 } 2404 2405 put_xdp_frags(&xdp); 2406 2407 err_xdp: 2408 put_page(page); 2409 mergeable_buf_free(rq, num_buf, dev, stats); 2410 2411 u64_stats_inc(&stats->xdp_drops); 2412 u64_stats_inc(&stats->drops); 2413 return NULL; 2414 } 2415 2416 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 2417 struct sk_buff *curr_skb, 2418 struct page *page, void *buf, 2419 int len, int truesize) 2420 { 2421 int num_skb_frags; 2422 int offset; 2423 2424 num_skb_frags = skb_shinfo(curr_skb)->nr_frags; 2425 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { 2426 struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); 2427 2428 if (unlikely(!nskb)) 2429 return NULL; 2430 2431 if (curr_skb == head_skb) 2432 skb_shinfo(curr_skb)->frag_list = nskb; 2433 else 2434 curr_skb->next = nskb; 2435 curr_skb = nskb; 2436 head_skb->truesize += nskb->truesize; 2437 num_skb_frags = 0; 2438 } 2439 2440 if (curr_skb != head_skb) { 2441 head_skb->data_len += len; 2442 head_skb->len += len; 2443 head_skb->truesize += truesize; 2444 } 2445 2446 offset = buf - page_address(page); 2447 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { 2448 put_page(page); 2449 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, 2450 len, truesize); 2451 } else { 2452 skb_add_rx_frag(curr_skb, num_skb_frags, page, 2453 offset, len, truesize); 2454 } 2455 2456 return curr_skb; 2457 } 2458 2459 static struct sk_buff *receive_mergeable(struct net_device *dev, 2460 struct virtnet_info *vi, 2461 struct receive_queue *rq, 2462 void *buf, 2463 void *ctx, 2464 unsigned int len, 2465 unsigned int *xdp_xmit, 2466 struct virtnet_rq_stats *stats) 2467 { 2468 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2469 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2470 struct page *page = virt_to_head_page(buf); 2471 int offset = buf - page_address(page); 2472 struct sk_buff *head_skb, *curr_skb; 2473 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2474 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2475 2476 head_skb = NULL; 2477 u64_stats_add(&stats->bytes, len - vi->hdr_len); 2478 2479 if (check_mergeable_len(dev, ctx, len)) 2480 goto err_skb; 2481 2482 if (unlikely(vi->xdp_enabled)) { 2483 struct bpf_prog *xdp_prog; 2484 2485 rcu_read_lock(); 2486 xdp_prog = rcu_dereference(rq->xdp_prog); 2487 if (xdp_prog) { 2488 head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx, 2489 len, xdp_xmit, stats); 2490 rcu_read_unlock(); 2491 return head_skb; 2492 } 2493 rcu_read_unlock(); 2494 } 2495 2496 head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom); 2497 curr_skb = head_skb; 2498 2499 if (unlikely(!curr_skb)) 2500 goto err_skb; 2501 while (--num_buf) { 2502 buf = virtnet_rq_get_buf(rq, &len, &ctx); 2503 if (unlikely(!buf)) { 2504 pr_debug("%s: rx error: %d buffers out of %d missing\n", 2505 dev->name, num_buf, 2506 virtio16_to_cpu(vi->vdev, 2507 hdr->num_buffers)); 2508 DEV_STATS_INC(dev, rx_length_errors); 2509 goto err_buf; 2510 } 2511 2512 u64_stats_add(&stats->bytes, len); 2513 page = virt_to_head_page(buf); 2514 2515 if (check_mergeable_len(dev, ctx, len)) 2516 goto err_skb; 2517 2518 truesize = mergeable_ctx_to_truesize(ctx); 2519 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 2520 buf, len, truesize); 2521 if (!curr_skb) 2522 goto err_skb; 2523 } 2524 2525 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); 2526 return head_skb; 2527 2528 err_skb: 2529 put_page(page); 2530 mergeable_buf_free(rq, num_buf, dev, stats); 2531 2532 err_buf: 2533 u64_stats_inc(&stats->drops); 2534 dev_kfree_skb(head_skb); 2535 return NULL; 2536 } 2537 2538 static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, 2539 struct sk_buff *skb) 2540 { 2541 enum pkt_hash_types rss_hash_type; 2542 2543 if (!hdr_hash || !skb) 2544 return; 2545 2546 switch (__le16_to_cpu(hdr_hash->hash_report)) { 2547 case VIRTIO_NET_HASH_REPORT_TCPv4: 2548 case VIRTIO_NET_HASH_REPORT_UDPv4: 2549 case VIRTIO_NET_HASH_REPORT_TCPv6: 2550 case VIRTIO_NET_HASH_REPORT_UDPv6: 2551 case VIRTIO_NET_HASH_REPORT_TCPv6_EX: 2552 case VIRTIO_NET_HASH_REPORT_UDPv6_EX: 2553 rss_hash_type = PKT_HASH_TYPE_L4; 2554 break; 2555 case VIRTIO_NET_HASH_REPORT_IPv4: 2556 case VIRTIO_NET_HASH_REPORT_IPv6: 2557 case VIRTIO_NET_HASH_REPORT_IPv6_EX: 2558 rss_hash_type = PKT_HASH_TYPE_L3; 2559 break; 2560 case VIRTIO_NET_HASH_REPORT_NONE: 2561 default: 2562 rss_hash_type = PKT_HASH_TYPE_NONE; 2563 } 2564 skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type); 2565 } 2566 2567 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 2568 struct sk_buff *skb, u8 flags) 2569 { 2570 struct virtio_net_common_hdr *hdr; 2571 struct net_device *dev = vi->dev; 2572 2573 hdr = skb_vnet_common_hdr(skb); 2574 if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) 2575 virtio_skb_set_hash(&hdr->hash_v1_hdr, skb); 2576 2577 hdr->hdr.flags = flags; 2578 if (virtio_net_handle_csum_offload(skb, &hdr->hdr, vi->rx_tnl_csum)) { 2579 net_warn_ratelimited("%s: bad csum: flags: %x, gso_type: %x rx_tnl_csum %d\n", 2580 dev->name, hdr->hdr.flags, 2581 hdr->hdr.gso_type, vi->rx_tnl_csum); 2582 goto frame_err; 2583 } 2584 2585 if (virtio_net_hdr_tnl_to_skb(skb, &hdr->tnl_hdr, vi->rx_tnl, 2586 vi->rx_tnl_csum, 2587 virtio_is_little_endian(vi->vdev))) { 2588 net_warn_ratelimited("%s: bad gso: type: %x, size: %u, flags %x tunnel %d tnl csum %d\n", 2589 dev->name, hdr->hdr.gso_type, 2590 hdr->hdr.gso_size, hdr->hdr.flags, 2591 vi->rx_tnl, vi->rx_tnl_csum); 2592 goto frame_err; 2593 } 2594 2595 skb_record_rx_queue(skb, vq2rxq(rq->vq)); 2596 skb->protocol = eth_type_trans(skb, dev); 2597 pr_debug("Receiving skb proto 0x%04x len %i type %i\n", 2598 ntohs(skb->protocol), skb->len, skb->pkt_type); 2599 2600 napi_gro_receive(&rq->napi, skb); 2601 return; 2602 2603 frame_err: 2604 DEV_STATS_INC(dev, rx_frame_errors); 2605 dev_kfree_skb(skb); 2606 } 2607 2608 static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, 2609 void *buf, unsigned int len, void **ctx, 2610 unsigned int *xdp_xmit, 2611 struct virtnet_rq_stats *stats) 2612 { 2613 struct net_device *dev = vi->dev; 2614 struct sk_buff *skb; 2615 u8 flags; 2616 2617 if (unlikely(len < vi->hdr_len + ETH_HLEN)) { 2618 pr_debug("%s: short packet %i\n", dev->name, len); 2619 DEV_STATS_INC(dev, rx_length_errors); 2620 virtnet_rq_free_buf(vi, rq, buf); 2621 return; 2622 } 2623 2624 /* 1. Save the flags early, as the XDP program might overwrite them. 2625 * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID 2626 * stay valid after XDP processing. 2627 * 2. XDP doesn't work with partially checksummed packets (refer to 2628 * virtnet_xdp_set()), so packets marked as 2629 * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing. 2630 */ 2631 flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; 2632 2633 if (vi->mergeable_rx_bufs) 2634 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, 2635 stats); 2636 else if (vi->big_packets) 2637 skb = receive_big(dev, vi, rq, buf, len, stats); 2638 else 2639 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); 2640 2641 if (unlikely(!skb)) 2642 return; 2643 2644 virtnet_receive_done(vi, rq, skb, flags); 2645 } 2646 2647 /* Unlike mergeable buffers, all buffers are allocated to the 2648 * same size, except for the headroom. For this reason we do 2649 * not need to use mergeable_len_to_ctx here - it is enough 2650 * to store the headroom as the context ignoring the truesize. 2651 */ 2652 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, 2653 gfp_t gfp) 2654 { 2655 char *buf; 2656 unsigned int xdp_headroom = virtnet_get_headroom(vi); 2657 void *ctx = (void *)(unsigned long)xdp_headroom; 2658 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; 2659 int err; 2660 2661 len = SKB_DATA_ALIGN(len) + 2662 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 2663 2664 if (unlikely(!skb_page_frag_refill(len, &rq->alloc_frag, gfp))) 2665 return -ENOMEM; 2666 2667 buf = virtnet_rq_alloc(rq, len, gfp); 2668 if (unlikely(!buf)) 2669 return -ENOMEM; 2670 2671 buf += VIRTNET_RX_PAD + xdp_headroom; 2672 2673 virtnet_rq_init_one_sg(rq, buf, vi->hdr_len + GOOD_PACKET_LEN); 2674 2675 err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, buf, ctx, gfp); 2676 if (err < 0) { 2677 virtnet_rq_unmap(rq, buf, 0); 2678 put_page(virt_to_head_page(buf)); 2679 } 2680 2681 return err; 2682 } 2683 2684 static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, 2685 gfp_t gfp) 2686 { 2687 struct page *first, *list = NULL; 2688 char *p; 2689 int i, err, offset; 2690 2691 sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2); 2692 2693 /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */ 2694 for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) { 2695 first = get_a_page(rq, gfp); 2696 if (!first) { 2697 if (list) 2698 give_pages(rq, list); 2699 return -ENOMEM; 2700 } 2701 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE); 2702 2703 /* chain new page in list head to match sg */ 2704 first->private = (unsigned long)list; 2705 list = first; 2706 } 2707 2708 first = get_a_page(rq, gfp); 2709 if (!first) { 2710 give_pages(rq, list); 2711 return -ENOMEM; 2712 } 2713 p = page_address(first); 2714 2715 /* rq->sg[0], rq->sg[1] share the same page */ 2716 /* a separated rq->sg[0] for header - required in case !any_header_sg */ 2717 sg_set_buf(&rq->sg[0], p, vi->hdr_len); 2718 2719 /* rq->sg[1] for data packet, from offset */ 2720 offset = sizeof(struct padded_vnet_hdr); 2721 sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset); 2722 2723 /* chain first in list head */ 2724 first->private = (unsigned long)list; 2725 err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2, 2726 first, gfp); 2727 if (err < 0) 2728 give_pages(rq, first); 2729 2730 return err; 2731 } 2732 2733 static unsigned int get_mergeable_buf_len(struct receive_queue *rq, 2734 struct ewma_pkt_len *avg_pkt_len, 2735 unsigned int room) 2736 { 2737 struct virtnet_info *vi = rq->vq->vdev->priv; 2738 const size_t hdr_len = vi->hdr_len; 2739 unsigned int len; 2740 2741 if (room) 2742 return PAGE_SIZE - room; 2743 2744 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), 2745 rq->min_buf_len, PAGE_SIZE - hdr_len); 2746 2747 return ALIGN(len, L1_CACHE_BYTES); 2748 } 2749 2750 static int add_recvbuf_mergeable(struct virtnet_info *vi, 2751 struct receive_queue *rq, gfp_t gfp) 2752 { 2753 struct page_frag *alloc_frag = &rq->alloc_frag; 2754 unsigned int headroom = virtnet_get_headroom(vi); 2755 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2756 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 2757 unsigned int len, hole; 2758 void *ctx; 2759 char *buf; 2760 int err; 2761 2762 /* Extra tailroom is needed to satisfy XDP's assumption. This 2763 * means rx frags coalescing won't work, but consider we've 2764 * disabled GSO for XDP, it won't be a big issue. 2765 */ 2766 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); 2767 2768 if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) 2769 return -ENOMEM; 2770 2771 if (!alloc_frag->offset && len + room + sizeof(struct virtnet_rq_dma) > alloc_frag->size) 2772 len -= sizeof(struct virtnet_rq_dma); 2773 2774 buf = virtnet_rq_alloc(rq, len + room, gfp); 2775 if (unlikely(!buf)) 2776 return -ENOMEM; 2777 2778 buf += headroom; /* advance address leaving hole at front of pkt */ 2779 hole = alloc_frag->size - alloc_frag->offset; 2780 if (hole < len + room) { 2781 /* To avoid internal fragmentation, if there is very likely not 2782 * enough space for another buffer, add the remaining space to 2783 * the current buffer. 2784 * XDP core assumes that frame_size of xdp_buff and the length 2785 * of the frag are PAGE_SIZE, so we disable the hole mechanism. 2786 */ 2787 if (!headroom) 2788 len += hole; 2789 alloc_frag->offset += hole; 2790 } 2791 2792 virtnet_rq_init_one_sg(rq, buf, len); 2793 2794 ctx = mergeable_len_to_ctx(len + room, headroom); 2795 err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, buf, ctx, gfp); 2796 if (err < 0) { 2797 virtnet_rq_unmap(rq, buf, 0); 2798 put_page(virt_to_head_page(buf)); 2799 } 2800 2801 return err; 2802 } 2803 2804 /* 2805 * Returns false if we couldn't fill entirely (OOM). 2806 * 2807 * Normally run in the receive path, but can also be run from ndo_open 2808 * before we're receiving packets, or from refill_work which is 2809 * careful to disable receiving (using napi_disable). 2810 */ 2811 static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, 2812 gfp_t gfp) 2813 { 2814 int err; 2815 2816 if (rq->xsk_pool) { 2817 err = virtnet_add_recvbuf_xsk(vi, rq, rq->xsk_pool, gfp); 2818 goto kick; 2819 } 2820 2821 do { 2822 if (vi->mergeable_rx_bufs) 2823 err = add_recvbuf_mergeable(vi, rq, gfp); 2824 else if (vi->big_packets) 2825 err = add_recvbuf_big(vi, rq, gfp); 2826 else 2827 err = add_recvbuf_small(vi, rq, gfp); 2828 2829 if (err) 2830 break; 2831 } while (rq->vq->num_free); 2832 2833 kick: 2834 if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { 2835 unsigned long flags; 2836 2837 flags = u64_stats_update_begin_irqsave(&rq->stats.syncp); 2838 u64_stats_inc(&rq->stats.kicks); 2839 u64_stats_update_end_irqrestore(&rq->stats.syncp, flags); 2840 } 2841 2842 return err != -ENOMEM; 2843 } 2844 2845 static void skb_recv_done(struct virtqueue *rvq) 2846 { 2847 struct virtnet_info *vi = rvq->vdev->priv; 2848 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)]; 2849 2850 rq->calls++; 2851 virtqueue_napi_schedule(&rq->napi, rvq); 2852 } 2853 2854 static void virtnet_napi_do_enable(struct virtqueue *vq, 2855 struct napi_struct *napi) 2856 { 2857 napi_enable(napi); 2858 2859 /* If all buffers were filled by other side before we napi_enabled, we 2860 * won't get another interrupt, so process any outstanding packets now. 2861 * Call local_bh_enable after to trigger softIRQ processing. 2862 */ 2863 local_bh_disable(); 2864 virtqueue_napi_schedule(napi, vq); 2865 local_bh_enable(); 2866 } 2867 2868 static void virtnet_napi_enable(struct receive_queue *rq) 2869 { 2870 struct virtnet_info *vi = rq->vq->vdev->priv; 2871 int qidx = vq2rxq(rq->vq); 2872 2873 virtnet_napi_do_enable(rq->vq, &rq->napi); 2874 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, &rq->napi); 2875 } 2876 2877 static void virtnet_napi_tx_enable(struct send_queue *sq) 2878 { 2879 struct virtnet_info *vi = sq->vq->vdev->priv; 2880 struct napi_struct *napi = &sq->napi; 2881 int qidx = vq2txq(sq->vq); 2882 2883 if (!napi->weight) 2884 return; 2885 2886 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only 2887 * enable the feature if this is likely affine with the transmit path. 2888 */ 2889 if (!vi->affinity_hint_set) { 2890 napi->weight = 0; 2891 return; 2892 } 2893 2894 virtnet_napi_do_enable(sq->vq, napi); 2895 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, napi); 2896 } 2897 2898 static void virtnet_napi_tx_disable(struct send_queue *sq) 2899 { 2900 struct virtnet_info *vi = sq->vq->vdev->priv; 2901 struct napi_struct *napi = &sq->napi; 2902 int qidx = vq2txq(sq->vq); 2903 2904 if (napi->weight) { 2905 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, NULL); 2906 napi_disable(napi); 2907 } 2908 } 2909 2910 static void virtnet_napi_disable(struct receive_queue *rq) 2911 { 2912 struct virtnet_info *vi = rq->vq->vdev->priv; 2913 struct napi_struct *napi = &rq->napi; 2914 int qidx = vq2rxq(rq->vq); 2915 2916 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, NULL); 2917 napi_disable(napi); 2918 } 2919 2920 static void refill_work(struct work_struct *work) 2921 { 2922 struct virtnet_info *vi = 2923 container_of(work, struct virtnet_info, refill.work); 2924 bool still_empty; 2925 int i; 2926 2927 for (i = 0; i < vi->curr_queue_pairs; i++) { 2928 struct receive_queue *rq = &vi->rq[i]; 2929 2930 /* 2931 * When queue API support is added in the future and the call 2932 * below becomes napi_disable_locked, this driver will need to 2933 * be refactored. 2934 * 2935 * One possible solution would be to: 2936 * - cancel refill_work with cancel_delayed_work (note: 2937 * non-sync) 2938 * - cancel refill_work with cancel_delayed_work_sync in 2939 * virtnet_remove after the netdev is unregistered 2940 * - wrap all of the work in a lock (perhaps the netdev 2941 * instance lock) 2942 * - check netif_running() and return early to avoid a race 2943 */ 2944 napi_disable(&rq->napi); 2945 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); 2946 virtnet_napi_do_enable(rq->vq, &rq->napi); 2947 2948 /* In theory, this can happen: if we don't get any buffers in 2949 * we will *never* try to fill again. 2950 */ 2951 if (still_empty) 2952 schedule_delayed_work(&vi->refill, HZ/2); 2953 } 2954 } 2955 2956 static int virtnet_receive_xsk_bufs(struct virtnet_info *vi, 2957 struct receive_queue *rq, 2958 int budget, 2959 unsigned int *xdp_xmit, 2960 struct virtnet_rq_stats *stats) 2961 { 2962 unsigned int len; 2963 int packets = 0; 2964 void *buf; 2965 2966 while (packets < budget) { 2967 buf = virtqueue_get_buf(rq->vq, &len); 2968 if (!buf) 2969 break; 2970 2971 virtnet_receive_xsk_buf(vi, rq, buf, len, xdp_xmit, stats); 2972 packets++; 2973 } 2974 2975 return packets; 2976 } 2977 2978 static int virtnet_receive_packets(struct virtnet_info *vi, 2979 struct receive_queue *rq, 2980 int budget, 2981 unsigned int *xdp_xmit, 2982 struct virtnet_rq_stats *stats) 2983 { 2984 unsigned int len; 2985 int packets = 0; 2986 void *buf; 2987 2988 if (!vi->big_packets || vi->mergeable_rx_bufs) { 2989 void *ctx; 2990 while (packets < budget && 2991 (buf = virtnet_rq_get_buf(rq, &len, &ctx))) { 2992 receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats); 2993 packets++; 2994 } 2995 } else { 2996 while (packets < budget && 2997 (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { 2998 receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats); 2999 packets++; 3000 } 3001 } 3002 3003 return packets; 3004 } 3005 3006 static int virtnet_receive(struct receive_queue *rq, int budget, 3007 unsigned int *xdp_xmit) 3008 { 3009 struct virtnet_info *vi = rq->vq->vdev->priv; 3010 struct virtnet_rq_stats stats = {}; 3011 int i, packets; 3012 3013 if (rq->xsk_pool) 3014 packets = virtnet_receive_xsk_bufs(vi, rq, budget, xdp_xmit, &stats); 3015 else 3016 packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats); 3017 3018 if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { 3019 if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { 3020 spin_lock(&vi->refill_lock); 3021 if (vi->refill_enabled) 3022 schedule_delayed_work(&vi->refill, 0); 3023 spin_unlock(&vi->refill_lock); 3024 } 3025 } 3026 3027 u64_stats_set(&stats.packets, packets); 3028 u64_stats_update_begin(&rq->stats.syncp); 3029 for (i = 0; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) { 3030 size_t offset = virtnet_rq_stats_desc[i].offset; 3031 u64_stats_t *item, *src; 3032 3033 item = (u64_stats_t *)((u8 *)&rq->stats + offset); 3034 src = (u64_stats_t *)((u8 *)&stats + offset); 3035 u64_stats_add(item, u64_stats_read(src)); 3036 } 3037 3038 u64_stats_add(&rq->stats.packets, u64_stats_read(&stats.packets)); 3039 u64_stats_add(&rq->stats.bytes, u64_stats_read(&stats.bytes)); 3040 3041 u64_stats_update_end(&rq->stats.syncp); 3042 3043 return packets; 3044 } 3045 3046 static void virtnet_poll_cleantx(struct receive_queue *rq, int budget) 3047 { 3048 struct virtnet_info *vi = rq->vq->vdev->priv; 3049 unsigned int index = vq2rxq(rq->vq); 3050 struct send_queue *sq = &vi->sq[index]; 3051 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); 3052 3053 if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) 3054 return; 3055 3056 if (__netif_tx_trylock(txq)) { 3057 if (sq->reset) { 3058 __netif_tx_unlock(txq); 3059 return; 3060 } 3061 3062 do { 3063 virtqueue_disable_cb(sq->vq); 3064 free_old_xmit(sq, txq, !!budget); 3065 } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 3066 3067 if (sq->vq->num_free >= MAX_SKB_FRAGS + 2 && 3068 netif_tx_queue_stopped(txq)) { 3069 u64_stats_update_begin(&sq->stats.syncp); 3070 u64_stats_inc(&sq->stats.wake); 3071 u64_stats_update_end(&sq->stats.syncp); 3072 netif_tx_wake_queue(txq); 3073 } 3074 3075 __netif_tx_unlock(txq); 3076 } 3077 } 3078 3079 static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue *rq) 3080 { 3081 struct dim_sample cur_sample = {}; 3082 3083 if (!rq->packets_in_napi) 3084 return; 3085 3086 /* Don't need protection when fetching stats, since fetcher and 3087 * updater of the stats are in same context 3088 */ 3089 dim_update_sample(rq->calls, 3090 u64_stats_read(&rq->stats.packets), 3091 u64_stats_read(&rq->stats.bytes), 3092 &cur_sample); 3093 3094 net_dim(&rq->dim, &cur_sample); 3095 rq->packets_in_napi = 0; 3096 } 3097 3098 static int virtnet_poll(struct napi_struct *napi, int budget) 3099 { 3100 struct receive_queue *rq = 3101 container_of(napi, struct receive_queue, napi); 3102 struct virtnet_info *vi = rq->vq->vdev->priv; 3103 struct send_queue *sq; 3104 unsigned int received; 3105 unsigned int xdp_xmit = 0; 3106 bool napi_complete; 3107 3108 virtnet_poll_cleantx(rq, budget); 3109 3110 received = virtnet_receive(rq, budget, &xdp_xmit); 3111 rq->packets_in_napi += received; 3112 3113 if (xdp_xmit & VIRTIO_XDP_REDIR) 3114 xdp_do_flush(); 3115 3116 /* Out of packets? */ 3117 if (received < budget) { 3118 napi_complete = virtqueue_napi_complete(napi, rq->vq, received); 3119 /* Intentionally not taking dim_lock here. This may result in a 3120 * spurious net_dim call. But if that happens virtnet_rx_dim_work 3121 * will not act on the scheduled work. 3122 */ 3123 if (napi_complete && rq->dim_enabled) 3124 virtnet_rx_dim_update(vi, rq); 3125 } 3126 3127 if (xdp_xmit & VIRTIO_XDP_TX) { 3128 sq = virtnet_xdp_get_sq(vi); 3129 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 3130 u64_stats_update_begin(&sq->stats.syncp); 3131 u64_stats_inc(&sq->stats.kicks); 3132 u64_stats_update_end(&sq->stats.syncp); 3133 } 3134 virtnet_xdp_put_sq(vi, sq); 3135 } 3136 3137 return received; 3138 } 3139 3140 static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index) 3141 { 3142 virtnet_napi_tx_disable(&vi->sq[qp_index]); 3143 virtnet_napi_disable(&vi->rq[qp_index]); 3144 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 3145 } 3146 3147 static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) 3148 { 3149 struct net_device *dev = vi->dev; 3150 int err; 3151 3152 err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index, 3153 vi->rq[qp_index].napi.napi_id); 3154 if (err < 0) 3155 return err; 3156 3157 err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq, 3158 MEM_TYPE_PAGE_SHARED, NULL); 3159 if (err < 0) 3160 goto err_xdp_reg_mem_model; 3161 3162 virtnet_napi_enable(&vi->rq[qp_index]); 3163 virtnet_napi_tx_enable(&vi->sq[qp_index]); 3164 3165 return 0; 3166 3167 err_xdp_reg_mem_model: 3168 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 3169 return err; 3170 } 3171 3172 static void virtnet_cancel_dim(struct virtnet_info *vi, struct dim *dim) 3173 { 3174 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 3175 return; 3176 net_dim_work_cancel(dim); 3177 } 3178 3179 static void virtnet_update_settings(struct virtnet_info *vi) 3180 { 3181 u32 speed; 3182 u8 duplex; 3183 3184 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) 3185 return; 3186 3187 virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); 3188 3189 if (ethtool_validate_speed(speed)) 3190 vi->speed = speed; 3191 3192 virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); 3193 3194 if (ethtool_validate_duplex(duplex)) 3195 vi->duplex = duplex; 3196 } 3197 3198 static int virtnet_open(struct net_device *dev) 3199 { 3200 struct virtnet_info *vi = netdev_priv(dev); 3201 int i, err; 3202 3203 enable_delayed_refill(vi); 3204 3205 for (i = 0; i < vi->max_queue_pairs; i++) { 3206 if (i < vi->curr_queue_pairs) 3207 /* Make sure we have some buffers: if oom use wq. */ 3208 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) 3209 schedule_delayed_work(&vi->refill, 0); 3210 3211 err = virtnet_enable_queue_pair(vi, i); 3212 if (err < 0) 3213 goto err_enable_qp; 3214 } 3215 3216 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 3217 if (vi->status & VIRTIO_NET_S_LINK_UP) 3218 netif_carrier_on(vi->dev); 3219 virtio_config_driver_enable(vi->vdev); 3220 } else { 3221 vi->status = VIRTIO_NET_S_LINK_UP; 3222 netif_carrier_on(dev); 3223 } 3224 3225 return 0; 3226 3227 err_enable_qp: 3228 disable_delayed_refill(vi); 3229 cancel_delayed_work_sync(&vi->refill); 3230 3231 for (i--; i >= 0; i--) { 3232 virtnet_disable_queue_pair(vi, i); 3233 virtnet_cancel_dim(vi, &vi->rq[i].dim); 3234 } 3235 3236 return err; 3237 } 3238 3239 static int virtnet_poll_tx(struct napi_struct *napi, int budget) 3240 { 3241 struct send_queue *sq = container_of(napi, struct send_queue, napi); 3242 struct virtnet_info *vi = sq->vq->vdev->priv; 3243 unsigned int index = vq2txq(sq->vq); 3244 struct netdev_queue *txq; 3245 int opaque, xsk_done = 0; 3246 bool done; 3247 3248 if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { 3249 /* We don't need to enable cb for XDP */ 3250 napi_complete_done(napi, 0); 3251 return 0; 3252 } 3253 3254 txq = netdev_get_tx_queue(vi->dev, index); 3255 __netif_tx_lock(txq, raw_smp_processor_id()); 3256 virtqueue_disable_cb(sq->vq); 3257 3258 if (sq->xsk_pool) 3259 xsk_done = virtnet_xsk_xmit(sq, sq->xsk_pool, budget); 3260 else 3261 free_old_xmit(sq, txq, !!budget); 3262 3263 if (sq->vq->num_free >= MAX_SKB_FRAGS + 2 && 3264 netif_tx_queue_stopped(txq)) { 3265 u64_stats_update_begin(&sq->stats.syncp); 3266 u64_stats_inc(&sq->stats.wake); 3267 u64_stats_update_end(&sq->stats.syncp); 3268 netif_tx_wake_queue(txq); 3269 } 3270 3271 if (xsk_done >= budget) { 3272 __netif_tx_unlock(txq); 3273 return budget; 3274 } 3275 3276 opaque = virtqueue_enable_cb_prepare(sq->vq); 3277 3278 done = napi_complete_done(napi, 0); 3279 3280 if (!done) 3281 virtqueue_disable_cb(sq->vq); 3282 3283 __netif_tx_unlock(txq); 3284 3285 if (done) { 3286 if (unlikely(virtqueue_poll(sq->vq, opaque))) { 3287 if (napi_schedule_prep(napi)) { 3288 __netif_tx_lock(txq, raw_smp_processor_id()); 3289 virtqueue_disable_cb(sq->vq); 3290 __netif_tx_unlock(txq); 3291 __napi_schedule(napi); 3292 } 3293 } 3294 } 3295 3296 return 0; 3297 } 3298 3299 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) 3300 { 3301 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; 3302 struct virtnet_info *vi = sq->vq->vdev->priv; 3303 struct virtio_net_hdr_v1_hash_tunnel *hdr; 3304 int num_sg; 3305 unsigned hdr_len = vi->hdr_len; 3306 bool can_push; 3307 3308 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); 3309 3310 can_push = vi->any_header_sg && 3311 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && 3312 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; 3313 /* Even if we can, don't push here yet as this would skew 3314 * csum_start offset below. */ 3315 if (can_push) 3316 hdr = (struct virtio_net_hdr_v1_hash_tunnel *)(skb->data - 3317 hdr_len); 3318 else 3319 hdr = &skb_vnet_common_hdr(skb)->tnl_hdr; 3320 3321 if (virtio_net_hdr_tnl_from_skb(skb, hdr, vi->tx_tnl, 3322 virtio_is_little_endian(vi->vdev), 0)) 3323 return -EPROTO; 3324 3325 if (vi->mergeable_rx_bufs) 3326 hdr->hash_hdr.hdr.num_buffers = 0; 3327 3328 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); 3329 if (can_push) { 3330 __skb_push(skb, hdr_len); 3331 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len); 3332 if (unlikely(num_sg < 0)) 3333 return num_sg; 3334 /* Pull header back to avoid skew in tx bytes calculations. */ 3335 __skb_pull(skb, hdr_len); 3336 } else { 3337 sg_set_buf(sq->sg, hdr, hdr_len); 3338 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); 3339 if (unlikely(num_sg < 0)) 3340 return num_sg; 3341 num_sg++; 3342 } 3343 3344 return virtnet_add_outbuf(sq, num_sg, skb, 3345 orphan ? VIRTNET_XMIT_TYPE_SKB_ORPHAN : VIRTNET_XMIT_TYPE_SKB); 3346 } 3347 3348 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) 3349 { 3350 struct virtnet_info *vi = netdev_priv(dev); 3351 int qnum = skb_get_queue_mapping(skb); 3352 struct send_queue *sq = &vi->sq[qnum]; 3353 int err; 3354 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 3355 bool xmit_more = netdev_xmit_more(); 3356 bool use_napi = sq->napi.weight; 3357 bool kick; 3358 3359 if (!use_napi) 3360 free_old_xmit(sq, txq, false); 3361 else 3362 virtqueue_disable_cb(sq->vq); 3363 3364 /* timestamp packet in software */ 3365 skb_tx_timestamp(skb); 3366 3367 /* Try to transmit */ 3368 err = xmit_skb(sq, skb, !use_napi); 3369 3370 /* This should not happen! */ 3371 if (unlikely(err)) { 3372 DEV_STATS_INC(dev, tx_fifo_errors); 3373 if (net_ratelimit()) 3374 dev_warn(&dev->dev, 3375 "Unexpected TXQ (%d) queue failure: %d\n", 3376 qnum, err); 3377 DEV_STATS_INC(dev, tx_dropped); 3378 dev_kfree_skb_any(skb); 3379 return NETDEV_TX_OK; 3380 } 3381 3382 /* Don't wait up for transmitted skbs to be freed. */ 3383 if (!use_napi) { 3384 skb_orphan(skb); 3385 nf_reset_ct(skb); 3386 } 3387 3388 if (use_napi) 3389 tx_may_stop(vi, dev, sq); 3390 else 3391 check_sq_full_and_disable(vi, dev,sq); 3392 3393 kick = use_napi ? __netdev_tx_sent_queue(txq, skb->len, xmit_more) : 3394 !xmit_more || netif_xmit_stopped(txq); 3395 if (kick) { 3396 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 3397 u64_stats_update_begin(&sq->stats.syncp); 3398 u64_stats_inc(&sq->stats.kicks); 3399 u64_stats_update_end(&sq->stats.syncp); 3400 } 3401 } 3402 3403 if (use_napi && kick && unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 3404 virtqueue_napi_schedule(&sq->napi, sq->vq); 3405 3406 return NETDEV_TX_OK; 3407 } 3408 3409 static void __virtnet_rx_pause(struct virtnet_info *vi, 3410 struct receive_queue *rq) 3411 { 3412 bool running = netif_running(vi->dev); 3413 3414 if (running) { 3415 virtnet_napi_disable(rq); 3416 virtnet_cancel_dim(vi, &rq->dim); 3417 } 3418 } 3419 3420 static void virtnet_rx_pause_all(struct virtnet_info *vi) 3421 { 3422 int i; 3423 3424 /* 3425 * Make sure refill_work does not run concurrently to 3426 * avoid napi_disable race which leads to deadlock. 3427 */ 3428 disable_delayed_refill(vi); 3429 cancel_delayed_work_sync(&vi->refill); 3430 for (i = 0; i < vi->max_queue_pairs; i++) 3431 __virtnet_rx_pause(vi, &vi->rq[i]); 3432 } 3433 3434 static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) 3435 { 3436 /* 3437 * Make sure refill_work does not run concurrently to 3438 * avoid napi_disable race which leads to deadlock. 3439 */ 3440 disable_delayed_refill(vi); 3441 cancel_delayed_work_sync(&vi->refill); 3442 __virtnet_rx_pause(vi, rq); 3443 } 3444 3445 static void __virtnet_rx_resume(struct virtnet_info *vi, 3446 struct receive_queue *rq, 3447 bool refill) 3448 { 3449 bool running = netif_running(vi->dev); 3450 bool schedule_refill = false; 3451 3452 if (refill && !try_fill_recv(vi, rq, GFP_KERNEL)) 3453 schedule_refill = true; 3454 if (running) 3455 virtnet_napi_enable(rq); 3456 3457 if (schedule_refill) 3458 schedule_delayed_work(&vi->refill, 0); 3459 } 3460 3461 static void virtnet_rx_resume_all(struct virtnet_info *vi) 3462 { 3463 int i; 3464 3465 enable_delayed_refill(vi); 3466 for (i = 0; i < vi->max_queue_pairs; i++) { 3467 if (i < vi->curr_queue_pairs) 3468 __virtnet_rx_resume(vi, &vi->rq[i], true); 3469 else 3470 __virtnet_rx_resume(vi, &vi->rq[i], false); 3471 } 3472 } 3473 3474 static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) 3475 { 3476 enable_delayed_refill(vi); 3477 __virtnet_rx_resume(vi, rq, true); 3478 } 3479 3480 static int virtnet_rx_resize(struct virtnet_info *vi, 3481 struct receive_queue *rq, u32 ring_num) 3482 { 3483 int err, qindex; 3484 3485 qindex = rq - vi->rq; 3486 3487 virtnet_rx_pause(vi, rq); 3488 3489 err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_unmap_free_buf, NULL); 3490 if (err) 3491 netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err); 3492 3493 virtnet_rx_resume(vi, rq); 3494 return err; 3495 } 3496 3497 static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq) 3498 { 3499 bool running = netif_running(vi->dev); 3500 struct netdev_queue *txq; 3501 int qindex; 3502 3503 qindex = sq - vi->sq; 3504 3505 if (running) 3506 virtnet_napi_tx_disable(sq); 3507 3508 txq = netdev_get_tx_queue(vi->dev, qindex); 3509 3510 /* 1. wait all ximt complete 3511 * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue() 3512 */ 3513 __netif_tx_lock_bh(txq); 3514 3515 /* Prevent rx poll from accessing sq. */ 3516 sq->reset = true; 3517 3518 /* Prevent the upper layer from trying to send packets. */ 3519 netif_stop_subqueue(vi->dev, qindex); 3520 3521 __netif_tx_unlock_bh(txq); 3522 } 3523 3524 static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq) 3525 { 3526 bool running = netif_running(vi->dev); 3527 struct netdev_queue *txq; 3528 int qindex; 3529 3530 qindex = sq - vi->sq; 3531 3532 txq = netdev_get_tx_queue(vi->dev, qindex); 3533 3534 __netif_tx_lock_bh(txq); 3535 sq->reset = false; 3536 netif_tx_wake_queue(txq); 3537 __netif_tx_unlock_bh(txq); 3538 3539 if (running) 3540 virtnet_napi_tx_enable(sq); 3541 } 3542 3543 static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq, 3544 u32 ring_num) 3545 { 3546 int qindex, err; 3547 3548 if (ring_num <= MAX_SKB_FRAGS + 2) { 3549 netdev_err(vi->dev, "tx size (%d) cannot be smaller than %d\n", 3550 ring_num, MAX_SKB_FRAGS + 2); 3551 return -EINVAL; 3552 } 3553 3554 qindex = sq - vi->sq; 3555 3556 virtnet_tx_pause(vi, sq); 3557 3558 err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf, 3559 virtnet_sq_free_unused_buf_done); 3560 if (err) 3561 netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err); 3562 3563 virtnet_tx_resume(vi, sq); 3564 3565 return err; 3566 } 3567 3568 /* 3569 * Send command via the control virtqueue and check status. Commands 3570 * supported by the hypervisor, as indicated by feature bits, should 3571 * never fail unless improperly formatted. 3572 */ 3573 static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd, 3574 struct scatterlist *out, 3575 struct scatterlist *in) 3576 { 3577 struct scatterlist *sgs[5], hdr, stat; 3578 u32 out_num = 0, tmp, in_num = 0; 3579 bool ok; 3580 int ret; 3581 3582 /* Caller should know better */ 3583 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); 3584 3585 mutex_lock(&vi->cvq_lock); 3586 vi->ctrl->status = ~0; 3587 vi->ctrl->hdr.class = class; 3588 vi->ctrl->hdr.cmd = cmd; 3589 /* Add header */ 3590 sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr)); 3591 sgs[out_num++] = &hdr; 3592 3593 if (out) 3594 sgs[out_num++] = out; 3595 3596 /* Add return status. */ 3597 sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status)); 3598 sgs[out_num + in_num++] = &stat; 3599 3600 if (in) 3601 sgs[out_num + in_num++] = in; 3602 3603 BUG_ON(out_num + in_num > ARRAY_SIZE(sgs)); 3604 ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC); 3605 if (ret < 0) { 3606 dev_warn(&vi->vdev->dev, 3607 "Failed to add sgs for command vq: %d\n.", ret); 3608 mutex_unlock(&vi->cvq_lock); 3609 return false; 3610 } 3611 3612 if (unlikely(!virtqueue_kick(vi->cvq))) 3613 goto unlock; 3614 3615 /* Spin for a response, the kick causes an ioport write, trapping 3616 * into the hypervisor, so the request should be handled immediately. 3617 */ 3618 while (!virtqueue_get_buf(vi->cvq, &tmp) && 3619 !virtqueue_is_broken(vi->cvq)) { 3620 cond_resched(); 3621 cpu_relax(); 3622 } 3623 3624 unlock: 3625 ok = vi->ctrl->status == VIRTIO_NET_OK; 3626 mutex_unlock(&vi->cvq_lock); 3627 return ok; 3628 } 3629 3630 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, 3631 struct scatterlist *out) 3632 { 3633 return virtnet_send_command_reply(vi, class, cmd, out, NULL); 3634 } 3635 3636 static int virtnet_set_mac_address(struct net_device *dev, void *p) 3637 { 3638 struct virtnet_info *vi = netdev_priv(dev); 3639 struct virtio_device *vdev = vi->vdev; 3640 int ret; 3641 struct sockaddr *addr; 3642 struct scatterlist sg; 3643 3644 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 3645 return -EOPNOTSUPP; 3646 3647 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL); 3648 if (!addr) 3649 return -ENOMEM; 3650 3651 ret = eth_prepare_mac_addr_change(dev, addr); 3652 if (ret) 3653 goto out; 3654 3655 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 3656 sg_init_one(&sg, addr->sa_data, dev->addr_len); 3657 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3658 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 3659 dev_warn(&vdev->dev, 3660 "Failed to set mac address by vq command.\n"); 3661 ret = -EINVAL; 3662 goto out; 3663 } 3664 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 3665 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { 3666 unsigned int i; 3667 3668 /* Naturally, this has an atomicity problem. */ 3669 for (i = 0; i < dev->addr_len; i++) 3670 virtio_cwrite8(vdev, 3671 offsetof(struct virtio_net_config, mac) + 3672 i, addr->sa_data[i]); 3673 } 3674 3675 eth_commit_mac_addr_change(dev, p); 3676 ret = 0; 3677 3678 out: 3679 kfree(addr); 3680 return ret; 3681 } 3682 3683 static void virtnet_stats(struct net_device *dev, 3684 struct rtnl_link_stats64 *tot) 3685 { 3686 struct virtnet_info *vi = netdev_priv(dev); 3687 unsigned int start; 3688 int i; 3689 3690 for (i = 0; i < vi->max_queue_pairs; i++) { 3691 u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; 3692 struct receive_queue *rq = &vi->rq[i]; 3693 struct send_queue *sq = &vi->sq[i]; 3694 3695 do { 3696 start = u64_stats_fetch_begin(&sq->stats.syncp); 3697 tpackets = u64_stats_read(&sq->stats.packets); 3698 tbytes = u64_stats_read(&sq->stats.bytes); 3699 terrors = u64_stats_read(&sq->stats.tx_timeouts); 3700 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 3701 3702 do { 3703 start = u64_stats_fetch_begin(&rq->stats.syncp); 3704 rpackets = u64_stats_read(&rq->stats.packets); 3705 rbytes = u64_stats_read(&rq->stats.bytes); 3706 rdrops = u64_stats_read(&rq->stats.drops); 3707 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 3708 3709 tot->rx_packets += rpackets; 3710 tot->tx_packets += tpackets; 3711 tot->rx_bytes += rbytes; 3712 tot->tx_bytes += tbytes; 3713 tot->rx_dropped += rdrops; 3714 tot->tx_errors += terrors; 3715 } 3716 3717 tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped); 3718 tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors); 3719 tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors); 3720 tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors); 3721 } 3722 3723 static void virtnet_ack_link_announce(struct virtnet_info *vi) 3724 { 3725 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, 3726 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) 3727 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); 3728 } 3729 3730 static bool virtnet_commit_rss_command(struct virtnet_info *vi); 3731 3732 static void virtnet_rss_update_by_qpairs(struct virtnet_info *vi, u16 queue_pairs) 3733 { 3734 u32 indir_val = 0; 3735 int i = 0; 3736 3737 for (; i < vi->rss_indir_table_size; ++i) { 3738 indir_val = ethtool_rxfh_indir_default(i, queue_pairs); 3739 vi->rss_hdr->indirection_table[i] = cpu_to_le16(indir_val); 3740 } 3741 vi->rss_trailer.max_tx_vq = cpu_to_le16(queue_pairs); 3742 } 3743 3744 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 3745 { 3746 struct virtio_net_ctrl_mq *mq __free(kfree) = NULL; 3747 struct virtio_net_rss_config_hdr *old_rss_hdr; 3748 struct virtio_net_rss_config_trailer old_rss_trailer; 3749 struct net_device *dev = vi->dev; 3750 struct scatterlist sg; 3751 3752 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) 3753 return 0; 3754 3755 /* Firstly check if we need update rss. Do updating if both (1) rss enabled and 3756 * (2) no user configuration. 3757 * 3758 * During rss command processing, device updates queue_pairs using rss.max_tx_vq. That is, 3759 * the device updates queue_pairs together with rss, so we can skip the sperate queue_pairs 3760 * update (VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET below) and return directly. 3761 */ 3762 if (vi->has_rss && !netif_is_rxfh_configured(dev)) { 3763 old_rss_hdr = vi->rss_hdr; 3764 old_rss_trailer = vi->rss_trailer; 3765 vi->rss_hdr = devm_kzalloc(&dev->dev, virtnet_rss_hdr_size(vi), GFP_KERNEL); 3766 if (!vi->rss_hdr) { 3767 vi->rss_hdr = old_rss_hdr; 3768 return -ENOMEM; 3769 } 3770 3771 *vi->rss_hdr = *old_rss_hdr; 3772 virtnet_rss_update_by_qpairs(vi, queue_pairs); 3773 3774 if (!virtnet_commit_rss_command(vi)) { 3775 /* restore ctrl_rss if commit_rss_command failed */ 3776 devm_kfree(&dev->dev, vi->rss_hdr); 3777 vi->rss_hdr = old_rss_hdr; 3778 vi->rss_trailer = old_rss_trailer; 3779 3780 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d, because committing RSS failed\n", 3781 queue_pairs); 3782 return -EINVAL; 3783 } 3784 devm_kfree(&dev->dev, old_rss_hdr); 3785 goto succ; 3786 } 3787 3788 mq = kzalloc(sizeof(*mq), GFP_KERNEL); 3789 if (!mq) 3790 return -ENOMEM; 3791 3792 mq->virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); 3793 sg_init_one(&sg, mq, sizeof(*mq)); 3794 3795 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 3796 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { 3797 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", 3798 queue_pairs); 3799 return -EINVAL; 3800 } 3801 succ: 3802 vi->curr_queue_pairs = queue_pairs; 3803 /* virtnet_open() will refill when device is going to up. */ 3804 spin_lock_bh(&vi->refill_lock); 3805 if (dev->flags & IFF_UP && vi->refill_enabled) 3806 schedule_delayed_work(&vi->refill, 0); 3807 spin_unlock_bh(&vi->refill_lock); 3808 3809 return 0; 3810 } 3811 3812 static int virtnet_close(struct net_device *dev) 3813 { 3814 struct virtnet_info *vi = netdev_priv(dev); 3815 int i; 3816 3817 /* Make sure NAPI doesn't schedule refill work */ 3818 disable_delayed_refill(vi); 3819 /* Make sure refill_work doesn't re-enable napi! */ 3820 cancel_delayed_work_sync(&vi->refill); 3821 /* Prevent the config change callback from changing carrier 3822 * after close 3823 */ 3824 virtio_config_driver_disable(vi->vdev); 3825 /* Stop getting status/speed updates: we don't care until next 3826 * open 3827 */ 3828 cancel_work_sync(&vi->config_work); 3829 3830 for (i = 0; i < vi->max_queue_pairs; i++) { 3831 virtnet_disable_queue_pair(vi, i); 3832 virtnet_cancel_dim(vi, &vi->rq[i].dim); 3833 } 3834 3835 netif_carrier_off(dev); 3836 3837 return 0; 3838 } 3839 3840 static void virtnet_rx_mode_work(struct work_struct *work) 3841 { 3842 struct virtnet_info *vi = 3843 container_of(work, struct virtnet_info, rx_mode_work); 3844 u8 *promisc_allmulti __free(kfree) = NULL; 3845 struct net_device *dev = vi->dev; 3846 struct scatterlist sg[2]; 3847 struct virtio_net_ctrl_mac *mac_data; 3848 struct netdev_hw_addr *ha; 3849 int uc_count; 3850 int mc_count; 3851 void *buf; 3852 int i; 3853 3854 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ 3855 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) 3856 return; 3857 3858 promisc_allmulti = kzalloc(sizeof(*promisc_allmulti), GFP_KERNEL); 3859 if (!promisc_allmulti) { 3860 dev_warn(&dev->dev, "Failed to set RX mode, no memory.\n"); 3861 return; 3862 } 3863 3864 rtnl_lock(); 3865 3866 *promisc_allmulti = !!(dev->flags & IFF_PROMISC); 3867 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3868 3869 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3870 VIRTIO_NET_CTRL_RX_PROMISC, sg)) 3871 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", 3872 *promisc_allmulti ? "en" : "dis"); 3873 3874 *promisc_allmulti = !!(dev->flags & IFF_ALLMULTI); 3875 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3876 3877 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3878 VIRTIO_NET_CTRL_RX_ALLMULTI, sg)) 3879 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", 3880 *promisc_allmulti ? "en" : "dis"); 3881 3882 netif_addr_lock_bh(dev); 3883 3884 uc_count = netdev_uc_count(dev); 3885 mc_count = netdev_mc_count(dev); 3886 /* MAC filter - use one buffer for both lists */ 3887 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) + 3888 (2 * sizeof(mac_data->entries)), GFP_ATOMIC); 3889 mac_data = buf; 3890 if (!buf) { 3891 netif_addr_unlock_bh(dev); 3892 rtnl_unlock(); 3893 return; 3894 } 3895 3896 sg_init_table(sg, 2); 3897 3898 /* Store the unicast list and count in the front of the buffer */ 3899 mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count); 3900 i = 0; 3901 netdev_for_each_uc_addr(ha, dev) 3902 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3903 3904 sg_set_buf(&sg[0], mac_data, 3905 sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); 3906 3907 /* multicast list and count fill the end */ 3908 mac_data = (void *)&mac_data->macs[uc_count][0]; 3909 3910 mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count); 3911 i = 0; 3912 netdev_for_each_mc_addr(ha, dev) 3913 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3914 3915 netif_addr_unlock_bh(dev); 3916 3917 sg_set_buf(&sg[1], mac_data, 3918 sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); 3919 3920 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3921 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg)) 3922 dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); 3923 3924 rtnl_unlock(); 3925 3926 kfree(buf); 3927 } 3928 3929 static void virtnet_set_rx_mode(struct net_device *dev) 3930 { 3931 struct virtnet_info *vi = netdev_priv(dev); 3932 3933 if (vi->rx_mode_work_enabled) 3934 schedule_work(&vi->rx_mode_work); 3935 } 3936 3937 static int virtnet_vlan_rx_add_vid(struct net_device *dev, 3938 __be16 proto, u16 vid) 3939 { 3940 struct virtnet_info *vi = netdev_priv(dev); 3941 __virtio16 *_vid __free(kfree) = NULL; 3942 struct scatterlist sg; 3943 3944 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3945 if (!_vid) 3946 return -ENOMEM; 3947 3948 *_vid = cpu_to_virtio16(vi->vdev, vid); 3949 sg_init_one(&sg, _vid, sizeof(*_vid)); 3950 3951 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3952 VIRTIO_NET_CTRL_VLAN_ADD, &sg)) 3953 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); 3954 return 0; 3955 } 3956 3957 static int virtnet_vlan_rx_kill_vid(struct net_device *dev, 3958 __be16 proto, u16 vid) 3959 { 3960 struct virtnet_info *vi = netdev_priv(dev); 3961 __virtio16 *_vid __free(kfree) = NULL; 3962 struct scatterlist sg; 3963 3964 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3965 if (!_vid) 3966 return -ENOMEM; 3967 3968 *_vid = cpu_to_virtio16(vi->vdev, vid); 3969 sg_init_one(&sg, _vid, sizeof(*_vid)); 3970 3971 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3972 VIRTIO_NET_CTRL_VLAN_DEL, &sg)) 3973 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); 3974 return 0; 3975 } 3976 3977 static void virtnet_clean_affinity(struct virtnet_info *vi) 3978 { 3979 int i; 3980 3981 if (vi->affinity_hint_set) { 3982 for (i = 0; i < vi->max_queue_pairs; i++) { 3983 virtqueue_set_affinity(vi->rq[i].vq, NULL); 3984 virtqueue_set_affinity(vi->sq[i].vq, NULL); 3985 } 3986 3987 vi->affinity_hint_set = false; 3988 } 3989 } 3990 3991 static void virtnet_set_affinity(struct virtnet_info *vi) 3992 { 3993 cpumask_var_t mask; 3994 int stragglers; 3995 int group_size; 3996 int i, start = 0, cpu; 3997 int num_cpu; 3998 int stride; 3999 4000 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 4001 virtnet_clean_affinity(vi); 4002 return; 4003 } 4004 4005 num_cpu = num_online_cpus(); 4006 stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); 4007 stragglers = num_cpu >= vi->curr_queue_pairs ? 4008 num_cpu % vi->curr_queue_pairs : 4009 0; 4010 4011 for (i = 0; i < vi->curr_queue_pairs; i++) { 4012 group_size = stride + (i < stragglers ? 1 : 0); 4013 4014 for_each_online_cpu_wrap(cpu, start) { 4015 if (!group_size--) { 4016 start = cpu; 4017 break; 4018 } 4019 cpumask_set_cpu(cpu, mask); 4020 } 4021 4022 virtqueue_set_affinity(vi->rq[i].vq, mask); 4023 virtqueue_set_affinity(vi->sq[i].vq, mask); 4024 __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS); 4025 cpumask_clear(mask); 4026 } 4027 4028 vi->affinity_hint_set = true; 4029 free_cpumask_var(mask); 4030 } 4031 4032 static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) 4033 { 4034 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 4035 node); 4036 virtnet_set_affinity(vi); 4037 return 0; 4038 } 4039 4040 static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node) 4041 { 4042 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 4043 node_dead); 4044 virtnet_set_affinity(vi); 4045 return 0; 4046 } 4047 4048 static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node) 4049 { 4050 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 4051 node); 4052 4053 virtnet_clean_affinity(vi); 4054 return 0; 4055 } 4056 4057 static enum cpuhp_state virtionet_online; 4058 4059 static int virtnet_cpu_notif_add(struct virtnet_info *vi) 4060 { 4061 int ret; 4062 4063 ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node); 4064 if (ret) 4065 return ret; 4066 ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD, 4067 &vi->node_dead); 4068 if (!ret) 4069 return ret; 4070 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 4071 return ret; 4072 } 4073 4074 static void virtnet_cpu_notif_remove(struct virtnet_info *vi) 4075 { 4076 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 4077 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD, 4078 &vi->node_dead); 4079 } 4080 4081 static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi, 4082 u16 vqn, u32 max_usecs, u32 max_packets) 4083 { 4084 struct virtio_net_ctrl_coal_vq *coal_vq __free(kfree) = NULL; 4085 struct scatterlist sgs; 4086 4087 coal_vq = kzalloc(sizeof(*coal_vq), GFP_KERNEL); 4088 if (!coal_vq) 4089 return -ENOMEM; 4090 4091 coal_vq->vqn = cpu_to_le16(vqn); 4092 coal_vq->coal.max_usecs = cpu_to_le32(max_usecs); 4093 coal_vq->coal.max_packets = cpu_to_le32(max_packets); 4094 sg_init_one(&sgs, coal_vq, sizeof(*coal_vq)); 4095 4096 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 4097 VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET, 4098 &sgs)) 4099 return -EINVAL; 4100 4101 return 0; 4102 } 4103 4104 static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 4105 u16 queue, u32 max_usecs, 4106 u32 max_packets) 4107 { 4108 int err; 4109 4110 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 4111 return -EOPNOTSUPP; 4112 4113 err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), 4114 max_usecs, max_packets); 4115 if (err) 4116 return err; 4117 4118 vi->rq[queue].intr_coal.max_usecs = max_usecs; 4119 vi->rq[queue].intr_coal.max_packets = max_packets; 4120 4121 return 0; 4122 } 4123 4124 static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 4125 u16 queue, u32 max_usecs, 4126 u32 max_packets) 4127 { 4128 int err; 4129 4130 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 4131 return -EOPNOTSUPP; 4132 4133 err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), 4134 max_usecs, max_packets); 4135 if (err) 4136 return err; 4137 4138 vi->sq[queue].intr_coal.max_usecs = max_usecs; 4139 vi->sq[queue].intr_coal.max_packets = max_packets; 4140 4141 return 0; 4142 } 4143 4144 static void virtnet_get_ringparam(struct net_device *dev, 4145 struct ethtool_ringparam *ring, 4146 struct kernel_ethtool_ringparam *kernel_ring, 4147 struct netlink_ext_ack *extack) 4148 { 4149 struct virtnet_info *vi = netdev_priv(dev); 4150 4151 ring->rx_max_pending = vi->rq[0].vq->num_max; 4152 ring->tx_max_pending = vi->sq[0].vq->num_max; 4153 ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 4154 ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 4155 } 4156 4157 static int virtnet_set_ringparam(struct net_device *dev, 4158 struct ethtool_ringparam *ring, 4159 struct kernel_ethtool_ringparam *kernel_ring, 4160 struct netlink_ext_ack *extack) 4161 { 4162 struct virtnet_info *vi = netdev_priv(dev); 4163 u32 rx_pending, tx_pending; 4164 struct receive_queue *rq; 4165 struct send_queue *sq; 4166 int i, err; 4167 4168 if (ring->rx_mini_pending || ring->rx_jumbo_pending) 4169 return -EINVAL; 4170 4171 rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 4172 tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 4173 4174 if (ring->rx_pending == rx_pending && 4175 ring->tx_pending == tx_pending) 4176 return 0; 4177 4178 if (ring->rx_pending > vi->rq[0].vq->num_max) 4179 return -EINVAL; 4180 4181 if (ring->tx_pending > vi->sq[0].vq->num_max) 4182 return -EINVAL; 4183 4184 for (i = 0; i < vi->max_queue_pairs; i++) { 4185 rq = vi->rq + i; 4186 sq = vi->sq + i; 4187 4188 if (ring->tx_pending != tx_pending) { 4189 err = virtnet_tx_resize(vi, sq, ring->tx_pending); 4190 if (err) 4191 return err; 4192 4193 /* Upon disabling and re-enabling a transmit virtqueue, the device must 4194 * set the coalescing parameters of the virtqueue to those configured 4195 * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver 4196 * did not set any TX coalescing parameters, to 0. 4197 */ 4198 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i, 4199 vi->intr_coal_tx.max_usecs, 4200 vi->intr_coal_tx.max_packets); 4201 4202 /* Don't break the tx resize action if the vq coalescing is not 4203 * supported. The same is true for rx resize below. 4204 */ 4205 if (err && err != -EOPNOTSUPP) 4206 return err; 4207 } 4208 4209 if (ring->rx_pending != rx_pending) { 4210 err = virtnet_rx_resize(vi, rq, ring->rx_pending); 4211 if (err) 4212 return err; 4213 4214 /* The reason is same as the transmit virtqueue reset */ 4215 mutex_lock(&vi->rq[i].dim_lock); 4216 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, i, 4217 vi->intr_coal_rx.max_usecs, 4218 vi->intr_coal_rx.max_packets); 4219 mutex_unlock(&vi->rq[i].dim_lock); 4220 if (err && err != -EOPNOTSUPP) 4221 return err; 4222 } 4223 } 4224 4225 return 0; 4226 } 4227 4228 static bool virtnet_commit_rss_command(struct virtnet_info *vi) 4229 { 4230 struct net_device *dev = vi->dev; 4231 struct scatterlist sgs[2]; 4232 4233 /* prepare sgs */ 4234 sg_init_table(sgs, 2); 4235 sg_set_buf(&sgs[0], vi->rss_hdr, virtnet_rss_hdr_size(vi)); 4236 sg_set_buf(&sgs[1], &vi->rss_trailer, virtnet_rss_trailer_size(vi)); 4237 4238 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 4239 vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG 4240 : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) 4241 goto err; 4242 4243 return true; 4244 4245 err: 4246 dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n"); 4247 return false; 4248 4249 } 4250 4251 static void virtnet_init_default_rss(struct virtnet_info *vi) 4252 { 4253 vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_supported); 4254 vi->rss_hash_types_saved = vi->rss_hash_types_supported; 4255 vi->rss_hdr->indirection_table_mask = vi->rss_indir_table_size 4256 ? cpu_to_le16(vi->rss_indir_table_size - 1) : 0; 4257 vi->rss_hdr->unclassified_queue = 0; 4258 4259 virtnet_rss_update_by_qpairs(vi, vi->curr_queue_pairs); 4260 4261 vi->rss_trailer.hash_key_length = vi->rss_key_size; 4262 4263 netdev_rss_key_fill(vi->rss_hash_key_data, vi->rss_key_size); 4264 } 4265 4266 static int virtnet_get_hashflow(struct net_device *dev, 4267 struct ethtool_rxfh_fields *info) 4268 { 4269 struct virtnet_info *vi = netdev_priv(dev); 4270 4271 info->data = 0; 4272 switch (info->flow_type) { 4273 case TCP_V4_FLOW: 4274 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 4275 info->data = RXH_IP_SRC | RXH_IP_DST | 4276 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4277 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 4278 info->data = RXH_IP_SRC | RXH_IP_DST; 4279 } 4280 break; 4281 case TCP_V6_FLOW: 4282 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 4283 info->data = RXH_IP_SRC | RXH_IP_DST | 4284 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4285 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 4286 info->data = RXH_IP_SRC | RXH_IP_DST; 4287 } 4288 break; 4289 case UDP_V4_FLOW: 4290 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 4291 info->data = RXH_IP_SRC | RXH_IP_DST | 4292 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4293 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 4294 info->data = RXH_IP_SRC | RXH_IP_DST; 4295 } 4296 break; 4297 case UDP_V6_FLOW: 4298 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 4299 info->data = RXH_IP_SRC | RXH_IP_DST | 4300 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4301 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 4302 info->data = RXH_IP_SRC | RXH_IP_DST; 4303 } 4304 break; 4305 case IPV4_FLOW: 4306 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) 4307 info->data = RXH_IP_SRC | RXH_IP_DST; 4308 4309 break; 4310 case IPV6_FLOW: 4311 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) 4312 info->data = RXH_IP_SRC | RXH_IP_DST; 4313 4314 break; 4315 default: 4316 info->data = 0; 4317 break; 4318 } 4319 4320 return 0; 4321 } 4322 4323 static int virtnet_set_hashflow(struct net_device *dev, 4324 const struct ethtool_rxfh_fields *info, 4325 struct netlink_ext_ack *extack) 4326 { 4327 struct virtnet_info *vi = netdev_priv(dev); 4328 u32 new_hashtypes = vi->rss_hash_types_saved; 4329 bool is_disable = info->data & RXH_DISCARD; 4330 bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3); 4331 4332 /* supports only 'sd', 'sdfn' and 'r' */ 4333 if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable)) 4334 return -EINVAL; 4335 4336 switch (info->flow_type) { 4337 case TCP_V4_FLOW: 4338 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4); 4339 if (!is_disable) 4340 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 4341 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0); 4342 break; 4343 case UDP_V4_FLOW: 4344 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4); 4345 if (!is_disable) 4346 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 4347 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0); 4348 break; 4349 case IPV4_FLOW: 4350 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4; 4351 if (!is_disable) 4352 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4; 4353 break; 4354 case TCP_V6_FLOW: 4355 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6); 4356 if (!is_disable) 4357 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 4358 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0); 4359 break; 4360 case UDP_V6_FLOW: 4361 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6); 4362 if (!is_disable) 4363 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 4364 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0); 4365 break; 4366 case IPV6_FLOW: 4367 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6; 4368 if (!is_disable) 4369 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6; 4370 break; 4371 default: 4372 /* unsupported flow */ 4373 return -EINVAL; 4374 } 4375 4376 /* if unsupported hashtype was set */ 4377 if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported)) 4378 return -EINVAL; 4379 4380 if (new_hashtypes != vi->rss_hash_types_saved) { 4381 vi->rss_hash_types_saved = new_hashtypes; 4382 vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_saved); 4383 if (vi->dev->features & NETIF_F_RXHASH) 4384 if (!virtnet_commit_rss_command(vi)) 4385 return -EINVAL; 4386 } 4387 4388 return 0; 4389 } 4390 4391 static void virtnet_get_drvinfo(struct net_device *dev, 4392 struct ethtool_drvinfo *info) 4393 { 4394 struct virtnet_info *vi = netdev_priv(dev); 4395 struct virtio_device *vdev = vi->vdev; 4396 4397 strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); 4398 strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version)); 4399 strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info)); 4400 4401 } 4402 4403 /* TODO: Eliminate OOO packets during switching */ 4404 static int virtnet_set_channels(struct net_device *dev, 4405 struct ethtool_channels *channels) 4406 { 4407 struct virtnet_info *vi = netdev_priv(dev); 4408 u16 queue_pairs = channels->combined_count; 4409 int err; 4410 4411 /* We don't support separate rx/tx channels. 4412 * We don't allow setting 'other' channels. 4413 */ 4414 if (channels->rx_count || channels->tx_count || channels->other_count) 4415 return -EINVAL; 4416 4417 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) 4418 return -EINVAL; 4419 4420 /* For now we don't support modifying channels while XDP is loaded 4421 * also when XDP is loaded all RX queues have XDP programs so we only 4422 * need to check a single RX queue. 4423 */ 4424 if (vi->rq[0].xdp_prog) 4425 return -EINVAL; 4426 4427 cpus_read_lock(); 4428 err = virtnet_set_queues(vi, queue_pairs); 4429 if (err) { 4430 cpus_read_unlock(); 4431 goto err; 4432 } 4433 virtnet_set_affinity(vi); 4434 cpus_read_unlock(); 4435 4436 netif_set_real_num_tx_queues(dev, queue_pairs); 4437 netif_set_real_num_rx_queues(dev, queue_pairs); 4438 err: 4439 return err; 4440 } 4441 4442 static void virtnet_stats_sprintf(u8 **p, const char *fmt, const char *noq_fmt, 4443 int num, int qid, const struct virtnet_stat_desc *desc) 4444 { 4445 int i; 4446 4447 if (qid < 0) { 4448 for (i = 0; i < num; ++i) 4449 ethtool_sprintf(p, noq_fmt, desc[i].desc); 4450 } else { 4451 for (i = 0; i < num; ++i) 4452 ethtool_sprintf(p, fmt, qid, desc[i].desc); 4453 } 4454 } 4455 4456 /* qid == -1: for rx/tx queue total field */ 4457 static void virtnet_get_stats_string(struct virtnet_info *vi, int type, int qid, u8 **data) 4458 { 4459 const struct virtnet_stat_desc *desc; 4460 const char *fmt, *noq_fmt; 4461 u8 *p = *data; 4462 u32 num; 4463 4464 if (type == VIRTNET_Q_TYPE_CQ && qid >= 0) { 4465 noq_fmt = "cq_hw_%s"; 4466 4467 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4468 desc = &virtnet_stats_cvq_desc[0]; 4469 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4470 4471 virtnet_stats_sprintf(&p, NULL, noq_fmt, num, -1, desc); 4472 } 4473 } 4474 4475 if (type == VIRTNET_Q_TYPE_RX) { 4476 fmt = "rx%u_%s"; 4477 noq_fmt = "rx_%s"; 4478 4479 desc = &virtnet_rq_stats_desc[0]; 4480 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4481 4482 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4483 4484 fmt = "rx%u_hw_%s"; 4485 noq_fmt = "rx_hw_%s"; 4486 4487 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4488 desc = &virtnet_stats_rx_basic_desc[0]; 4489 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4490 4491 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4492 } 4493 4494 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4495 desc = &virtnet_stats_rx_csum_desc[0]; 4496 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4497 4498 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4499 } 4500 4501 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4502 desc = &virtnet_stats_rx_speed_desc[0]; 4503 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4504 4505 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4506 } 4507 } 4508 4509 if (type == VIRTNET_Q_TYPE_TX) { 4510 fmt = "tx%u_%s"; 4511 noq_fmt = "tx_%s"; 4512 4513 desc = &virtnet_sq_stats_desc[0]; 4514 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4515 4516 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4517 4518 fmt = "tx%u_hw_%s"; 4519 noq_fmt = "tx_hw_%s"; 4520 4521 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4522 desc = &virtnet_stats_tx_basic_desc[0]; 4523 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4524 4525 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4526 } 4527 4528 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4529 desc = &virtnet_stats_tx_gso_desc[0]; 4530 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4531 4532 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4533 } 4534 4535 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4536 desc = &virtnet_stats_tx_speed_desc[0]; 4537 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4538 4539 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4540 } 4541 } 4542 4543 *data = p; 4544 } 4545 4546 struct virtnet_stats_ctx { 4547 /* The stats are write to qstats or ethtool -S */ 4548 bool to_qstat; 4549 4550 /* Used to calculate the offset inside the output buffer. */ 4551 u32 desc_num[3]; 4552 4553 /* The actual supported stat types. */ 4554 u64 bitmap[3]; 4555 4556 /* Used to calculate the reply buffer size. */ 4557 u32 size[3]; 4558 4559 /* Record the output buffer. */ 4560 u64 *data; 4561 }; 4562 4563 static void virtnet_stats_ctx_init(struct virtnet_info *vi, 4564 struct virtnet_stats_ctx *ctx, 4565 u64 *data, bool to_qstat) 4566 { 4567 u32 queue_type; 4568 4569 ctx->data = data; 4570 ctx->to_qstat = to_qstat; 4571 4572 if (to_qstat) { 4573 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4574 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4575 4576 queue_type = VIRTNET_Q_TYPE_RX; 4577 4578 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4579 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4580 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4581 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4582 } 4583 4584 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4585 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4586 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4587 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4588 } 4589 4590 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4591 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_GSO; 4592 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4593 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_gso); 4594 } 4595 4596 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4597 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4598 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4599 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4600 } 4601 4602 queue_type = VIRTNET_Q_TYPE_TX; 4603 4604 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4605 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4606 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4607 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4608 } 4609 4610 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4611 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_CSUM; 4612 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4613 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_csum); 4614 } 4615 4616 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4617 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4618 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4619 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4620 } 4621 4622 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4623 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4624 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4625 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4626 } 4627 4628 return; 4629 } 4630 4631 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc); 4632 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc); 4633 4634 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4635 queue_type = VIRTNET_Q_TYPE_CQ; 4636 4637 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_CVQ; 4638 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_cvq_desc); 4639 ctx->size[queue_type] += sizeof(struct virtio_net_stats_cvq); 4640 } 4641 4642 queue_type = VIRTNET_Q_TYPE_RX; 4643 4644 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4645 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4646 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4647 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4648 } 4649 4650 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4651 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4652 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4653 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4654 } 4655 4656 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4657 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4658 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4659 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4660 } 4661 4662 queue_type = VIRTNET_Q_TYPE_TX; 4663 4664 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4665 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4666 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4667 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4668 } 4669 4670 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4671 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4672 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4673 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4674 } 4675 4676 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4677 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4678 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4679 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4680 } 4681 } 4682 4683 /* stats_sum_queue - Calculate the sum of the same fields in sq or rq. 4684 * @sum: the position to store the sum values 4685 * @num: field num 4686 * @q_value: the first queue fields 4687 * @q_num: number of the queues 4688 */ 4689 static void stats_sum_queue(u64 *sum, u32 num, u64 *q_value, u32 q_num) 4690 { 4691 u32 step = num; 4692 int i, j; 4693 u64 *p; 4694 4695 for (i = 0; i < num; ++i) { 4696 p = sum + i; 4697 *p = 0; 4698 4699 for (j = 0; j < q_num; ++j) 4700 *p += *(q_value + i + j * step); 4701 } 4702 } 4703 4704 static void virtnet_fill_total_fields(struct virtnet_info *vi, 4705 struct virtnet_stats_ctx *ctx) 4706 { 4707 u64 *data, *first_rx_q, *first_tx_q; 4708 u32 num_cq, num_rx, num_tx; 4709 4710 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4711 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4712 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4713 4714 first_rx_q = ctx->data + num_rx + num_tx + num_cq; 4715 first_tx_q = first_rx_q + vi->curr_queue_pairs * num_rx; 4716 4717 data = ctx->data; 4718 4719 stats_sum_queue(data, num_rx, first_rx_q, vi->curr_queue_pairs); 4720 4721 data = ctx->data + num_rx; 4722 4723 stats_sum_queue(data, num_tx, first_tx_q, vi->curr_queue_pairs); 4724 } 4725 4726 static void virtnet_fill_stats_qstat(struct virtnet_info *vi, u32 qid, 4727 struct virtnet_stats_ctx *ctx, 4728 const u8 *base, bool drv_stats, u8 reply_type) 4729 { 4730 const struct virtnet_stat_desc *desc; 4731 const u64_stats_t *v_stat; 4732 u64 offset, bitmap; 4733 const __le64 *v; 4734 u32 queue_type; 4735 int i, num; 4736 4737 queue_type = vq_type(vi, qid); 4738 bitmap = ctx->bitmap[queue_type]; 4739 4740 if (drv_stats) { 4741 if (queue_type == VIRTNET_Q_TYPE_RX) { 4742 desc = &virtnet_rq_stats_desc_qstat[0]; 4743 num = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4744 } else { 4745 desc = &virtnet_sq_stats_desc_qstat[0]; 4746 num = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4747 } 4748 4749 for (i = 0; i < num; ++i) { 4750 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4751 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4752 ctx->data[offset] = u64_stats_read(v_stat); 4753 } 4754 return; 4755 } 4756 4757 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4758 desc = &virtnet_stats_rx_basic_desc_qstat[0]; 4759 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4760 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4761 goto found; 4762 } 4763 4764 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4765 desc = &virtnet_stats_rx_csum_desc_qstat[0]; 4766 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4767 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4768 goto found; 4769 } 4770 4771 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4772 desc = &virtnet_stats_rx_gso_desc_qstat[0]; 4773 num = ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4774 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO) 4775 goto found; 4776 } 4777 4778 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4779 desc = &virtnet_stats_rx_speed_desc_qstat[0]; 4780 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4781 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4782 goto found; 4783 } 4784 4785 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4786 desc = &virtnet_stats_tx_basic_desc_qstat[0]; 4787 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4788 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4789 goto found; 4790 } 4791 4792 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4793 desc = &virtnet_stats_tx_csum_desc_qstat[0]; 4794 num = ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4795 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM) 4796 goto found; 4797 } 4798 4799 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4800 desc = &virtnet_stats_tx_gso_desc_qstat[0]; 4801 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4802 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4803 goto found; 4804 } 4805 4806 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4807 desc = &virtnet_stats_tx_speed_desc_qstat[0]; 4808 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4809 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4810 goto found; 4811 } 4812 4813 return; 4814 4815 found: 4816 for (i = 0; i < num; ++i) { 4817 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4818 v = (const __le64 *)(base + desc[i].offset); 4819 ctx->data[offset] = le64_to_cpu(*v); 4820 } 4821 } 4822 4823 /* virtnet_fill_stats - copy the stats to qstats or ethtool -S 4824 * The stats source is the device or the driver. 4825 * 4826 * @vi: virtio net info 4827 * @qid: the vq id 4828 * @ctx: stats ctx (initiated by virtnet_stats_ctx_init()) 4829 * @base: pointer to the device reply or the driver stats structure. 4830 * @drv_stats: designate the base type (device reply, driver stats) 4831 * @type: the type of the device reply (if drv_stats is true, this must be zero) 4832 */ 4833 static void virtnet_fill_stats(struct virtnet_info *vi, u32 qid, 4834 struct virtnet_stats_ctx *ctx, 4835 const u8 *base, bool drv_stats, u8 reply_type) 4836 { 4837 u32 queue_type, num_rx, num_tx, num_cq; 4838 const struct virtnet_stat_desc *desc; 4839 const u64_stats_t *v_stat; 4840 u64 offset, bitmap; 4841 const __le64 *v; 4842 int i, num; 4843 4844 if (ctx->to_qstat) 4845 return virtnet_fill_stats_qstat(vi, qid, ctx, base, drv_stats, reply_type); 4846 4847 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4848 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4849 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4850 4851 queue_type = vq_type(vi, qid); 4852 bitmap = ctx->bitmap[queue_type]; 4853 4854 /* skip the total fields of pairs */ 4855 offset = num_rx + num_tx; 4856 4857 if (queue_type == VIRTNET_Q_TYPE_TX) { 4858 offset += num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2); 4859 4860 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4861 if (drv_stats) { 4862 desc = &virtnet_sq_stats_desc[0]; 4863 goto drv_stats; 4864 } 4865 4866 offset += num; 4867 4868 } else if (queue_type == VIRTNET_Q_TYPE_RX) { 4869 offset += num_cq + num_rx * (qid / 2); 4870 4871 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4872 if (drv_stats) { 4873 desc = &virtnet_rq_stats_desc[0]; 4874 goto drv_stats; 4875 } 4876 4877 offset += num; 4878 } 4879 4880 if (bitmap & VIRTIO_NET_STATS_TYPE_CVQ) { 4881 desc = &virtnet_stats_cvq_desc[0]; 4882 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4883 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_CVQ) 4884 goto found; 4885 4886 offset += num; 4887 } 4888 4889 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4890 desc = &virtnet_stats_rx_basic_desc[0]; 4891 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4892 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4893 goto found; 4894 4895 offset += num; 4896 } 4897 4898 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4899 desc = &virtnet_stats_rx_csum_desc[0]; 4900 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4901 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4902 goto found; 4903 4904 offset += num; 4905 } 4906 4907 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4908 desc = &virtnet_stats_rx_speed_desc[0]; 4909 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4910 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4911 goto found; 4912 4913 offset += num; 4914 } 4915 4916 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4917 desc = &virtnet_stats_tx_basic_desc[0]; 4918 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4919 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4920 goto found; 4921 4922 offset += num; 4923 } 4924 4925 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4926 desc = &virtnet_stats_tx_gso_desc[0]; 4927 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4928 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4929 goto found; 4930 4931 offset += num; 4932 } 4933 4934 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4935 desc = &virtnet_stats_tx_speed_desc[0]; 4936 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4937 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4938 goto found; 4939 4940 offset += num; 4941 } 4942 4943 return; 4944 4945 found: 4946 for (i = 0; i < num; ++i) { 4947 v = (const __le64 *)(base + desc[i].offset); 4948 ctx->data[offset + i] = le64_to_cpu(*v); 4949 } 4950 4951 return; 4952 4953 drv_stats: 4954 for (i = 0; i < num; ++i) { 4955 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4956 ctx->data[offset + i] = u64_stats_read(v_stat); 4957 } 4958 } 4959 4960 static int __virtnet_get_hw_stats(struct virtnet_info *vi, 4961 struct virtnet_stats_ctx *ctx, 4962 struct virtio_net_ctrl_queue_stats *req, 4963 int req_size, void *reply, int res_size) 4964 { 4965 struct virtio_net_stats_reply_hdr *hdr; 4966 struct scatterlist sgs_in, sgs_out; 4967 void *p; 4968 u32 qid; 4969 int ok; 4970 4971 sg_init_one(&sgs_out, req, req_size); 4972 sg_init_one(&sgs_in, reply, res_size); 4973 4974 ok = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 4975 VIRTIO_NET_CTRL_STATS_GET, 4976 &sgs_out, &sgs_in); 4977 4978 if (!ok) 4979 return ok; 4980 4981 for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) { 4982 hdr = p; 4983 qid = le16_to_cpu(hdr->vq_index); 4984 virtnet_fill_stats(vi, qid, ctx, p, false, hdr->type); 4985 } 4986 4987 return 0; 4988 } 4989 4990 static void virtnet_make_stat_req(struct virtnet_info *vi, 4991 struct virtnet_stats_ctx *ctx, 4992 struct virtio_net_ctrl_queue_stats *req, 4993 int qid, int *idx) 4994 { 4995 int qtype = vq_type(vi, qid); 4996 u64 bitmap = ctx->bitmap[qtype]; 4997 4998 if (!bitmap) 4999 return; 5000 5001 req->stats[*idx].vq_index = cpu_to_le16(qid); 5002 req->stats[*idx].types_bitmap[0] = cpu_to_le64(bitmap); 5003 *idx += 1; 5004 } 5005 5006 /* qid: -1: get stats of all vq. 5007 * > 0: get the stats for the special vq. This must not be cvq. 5008 */ 5009 static int virtnet_get_hw_stats(struct virtnet_info *vi, 5010 struct virtnet_stats_ctx *ctx, int qid) 5011 { 5012 int qnum, i, j, res_size, qtype, last_vq, first_vq; 5013 struct virtio_net_ctrl_queue_stats *req; 5014 bool enable_cvq; 5015 void *reply; 5016 int ok; 5017 5018 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) 5019 return 0; 5020 5021 if (qid == -1) { 5022 last_vq = vi->curr_queue_pairs * 2 - 1; 5023 first_vq = 0; 5024 enable_cvq = true; 5025 } else { 5026 last_vq = qid; 5027 first_vq = qid; 5028 enable_cvq = false; 5029 } 5030 5031 qnum = 0; 5032 res_size = 0; 5033 for (i = first_vq; i <= last_vq ; ++i) { 5034 qtype = vq_type(vi, i); 5035 if (ctx->bitmap[qtype]) { 5036 ++qnum; 5037 res_size += ctx->size[qtype]; 5038 } 5039 } 5040 5041 if (enable_cvq && ctx->bitmap[VIRTNET_Q_TYPE_CQ]) { 5042 res_size += ctx->size[VIRTNET_Q_TYPE_CQ]; 5043 qnum += 1; 5044 } 5045 5046 req = kcalloc(qnum, sizeof(*req), GFP_KERNEL); 5047 if (!req) 5048 return -ENOMEM; 5049 5050 reply = kmalloc(res_size, GFP_KERNEL); 5051 if (!reply) { 5052 kfree(req); 5053 return -ENOMEM; 5054 } 5055 5056 j = 0; 5057 for (i = first_vq; i <= last_vq ; ++i) 5058 virtnet_make_stat_req(vi, ctx, req, i, &j); 5059 5060 if (enable_cvq) 5061 virtnet_make_stat_req(vi, ctx, req, vi->max_queue_pairs * 2, &j); 5062 5063 ok = __virtnet_get_hw_stats(vi, ctx, req, sizeof(*req) * j, reply, res_size); 5064 5065 kfree(req); 5066 kfree(reply); 5067 5068 return ok; 5069 } 5070 5071 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) 5072 { 5073 struct virtnet_info *vi = netdev_priv(dev); 5074 unsigned int i; 5075 u8 *p = data; 5076 5077 switch (stringset) { 5078 case ETH_SS_STATS: 5079 /* Generate the total field names. */ 5080 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, -1, &p); 5081 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, -1, &p); 5082 5083 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_CQ, 0, &p); 5084 5085 for (i = 0; i < vi->curr_queue_pairs; ++i) 5086 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, i, &p); 5087 5088 for (i = 0; i < vi->curr_queue_pairs; ++i) 5089 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, i, &p); 5090 break; 5091 } 5092 } 5093 5094 static int virtnet_get_sset_count(struct net_device *dev, int sset) 5095 { 5096 struct virtnet_info *vi = netdev_priv(dev); 5097 struct virtnet_stats_ctx ctx = {0}; 5098 u32 pair_count; 5099 5100 switch (sset) { 5101 case ETH_SS_STATS: 5102 virtnet_stats_ctx_init(vi, &ctx, NULL, false); 5103 5104 pair_count = ctx.desc_num[VIRTNET_Q_TYPE_RX] + ctx.desc_num[VIRTNET_Q_TYPE_TX]; 5105 5106 return pair_count + ctx.desc_num[VIRTNET_Q_TYPE_CQ] + 5107 vi->curr_queue_pairs * pair_count; 5108 default: 5109 return -EOPNOTSUPP; 5110 } 5111 } 5112 5113 static void virtnet_get_ethtool_stats(struct net_device *dev, 5114 struct ethtool_stats *stats, u64 *data) 5115 { 5116 struct virtnet_info *vi = netdev_priv(dev); 5117 struct virtnet_stats_ctx ctx = {0}; 5118 unsigned int start, i; 5119 const u8 *stats_base; 5120 5121 virtnet_stats_ctx_init(vi, &ctx, data, false); 5122 if (virtnet_get_hw_stats(vi, &ctx, -1)) 5123 dev_warn(&vi->dev->dev, "Failed to get hw stats.\n"); 5124 5125 for (i = 0; i < vi->curr_queue_pairs; i++) { 5126 struct receive_queue *rq = &vi->rq[i]; 5127 struct send_queue *sq = &vi->sq[i]; 5128 5129 stats_base = (const u8 *)&rq->stats; 5130 do { 5131 start = u64_stats_fetch_begin(&rq->stats.syncp); 5132 virtnet_fill_stats(vi, i * 2, &ctx, stats_base, true, 0); 5133 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 5134 5135 stats_base = (const u8 *)&sq->stats; 5136 do { 5137 start = u64_stats_fetch_begin(&sq->stats.syncp); 5138 virtnet_fill_stats(vi, i * 2 + 1, &ctx, stats_base, true, 0); 5139 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 5140 } 5141 5142 virtnet_fill_total_fields(vi, &ctx); 5143 } 5144 5145 static void virtnet_get_channels(struct net_device *dev, 5146 struct ethtool_channels *channels) 5147 { 5148 struct virtnet_info *vi = netdev_priv(dev); 5149 5150 channels->combined_count = vi->curr_queue_pairs; 5151 channels->max_combined = vi->max_queue_pairs; 5152 channels->max_other = 0; 5153 channels->rx_count = 0; 5154 channels->tx_count = 0; 5155 channels->other_count = 0; 5156 } 5157 5158 static int virtnet_set_link_ksettings(struct net_device *dev, 5159 const struct ethtool_link_ksettings *cmd) 5160 { 5161 struct virtnet_info *vi = netdev_priv(dev); 5162 5163 return ethtool_virtdev_set_link_ksettings(dev, cmd, 5164 &vi->speed, &vi->duplex); 5165 } 5166 5167 static int virtnet_get_link_ksettings(struct net_device *dev, 5168 struct ethtool_link_ksettings *cmd) 5169 { 5170 struct virtnet_info *vi = netdev_priv(dev); 5171 5172 cmd->base.speed = vi->speed; 5173 cmd->base.duplex = vi->duplex; 5174 cmd->base.port = PORT_OTHER; 5175 5176 return 0; 5177 } 5178 5179 static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info *vi, 5180 struct ethtool_coalesce *ec) 5181 { 5182 struct virtio_net_ctrl_coal_tx *coal_tx __free(kfree) = NULL; 5183 struct scatterlist sgs_tx; 5184 int i; 5185 5186 coal_tx = kzalloc(sizeof(*coal_tx), GFP_KERNEL); 5187 if (!coal_tx) 5188 return -ENOMEM; 5189 5190 coal_tx->tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); 5191 coal_tx->tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); 5192 sg_init_one(&sgs_tx, coal_tx, sizeof(*coal_tx)); 5193 5194 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 5195 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, 5196 &sgs_tx)) 5197 return -EINVAL; 5198 5199 vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs; 5200 vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames; 5201 for (i = 0; i < vi->max_queue_pairs; i++) { 5202 vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs; 5203 vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames; 5204 } 5205 5206 return 0; 5207 } 5208 5209 static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi, 5210 struct ethtool_coalesce *ec) 5211 { 5212 struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL; 5213 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 5214 struct scatterlist sgs_rx; 5215 int i; 5216 5217 if (rx_ctrl_dim_on && !virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5218 return -EOPNOTSUPP; 5219 5220 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != vi->intr_coal_rx.max_usecs || 5221 ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets)) 5222 return -EINVAL; 5223 5224 if (rx_ctrl_dim_on && !vi->rx_dim_enabled) { 5225 vi->rx_dim_enabled = true; 5226 for (i = 0; i < vi->max_queue_pairs; i++) { 5227 mutex_lock(&vi->rq[i].dim_lock); 5228 vi->rq[i].dim_enabled = true; 5229 mutex_unlock(&vi->rq[i].dim_lock); 5230 } 5231 return 0; 5232 } 5233 5234 coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL); 5235 if (!coal_rx) 5236 return -ENOMEM; 5237 5238 if (!rx_ctrl_dim_on && vi->rx_dim_enabled) { 5239 vi->rx_dim_enabled = false; 5240 for (i = 0; i < vi->max_queue_pairs; i++) { 5241 mutex_lock(&vi->rq[i].dim_lock); 5242 vi->rq[i].dim_enabled = false; 5243 mutex_unlock(&vi->rq[i].dim_lock); 5244 } 5245 } 5246 5247 /* Since the per-queue coalescing params can be set, 5248 * we need apply the global new params even if they 5249 * are not updated. 5250 */ 5251 coal_rx->rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); 5252 coal_rx->rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); 5253 sg_init_one(&sgs_rx, coal_rx, sizeof(*coal_rx)); 5254 5255 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 5256 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, 5257 &sgs_rx)) 5258 return -EINVAL; 5259 5260 vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs; 5261 vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames; 5262 for (i = 0; i < vi->max_queue_pairs; i++) { 5263 mutex_lock(&vi->rq[i].dim_lock); 5264 vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs; 5265 vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames; 5266 mutex_unlock(&vi->rq[i].dim_lock); 5267 } 5268 5269 return 0; 5270 } 5271 5272 static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, 5273 struct ethtool_coalesce *ec) 5274 { 5275 int err; 5276 5277 err = virtnet_send_tx_notf_coal_cmds(vi, ec); 5278 if (err) 5279 return err; 5280 5281 err = virtnet_send_rx_notf_coal_cmds(vi, ec); 5282 if (err) 5283 return err; 5284 5285 return 0; 5286 } 5287 5288 static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi, 5289 struct ethtool_coalesce *ec, 5290 u16 queue) 5291 { 5292 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 5293 u32 max_usecs, max_packets; 5294 bool cur_rx_dim; 5295 int err; 5296 5297 mutex_lock(&vi->rq[queue].dim_lock); 5298 cur_rx_dim = vi->rq[queue].dim_enabled; 5299 max_usecs = vi->rq[queue].intr_coal.max_usecs; 5300 max_packets = vi->rq[queue].intr_coal.max_packets; 5301 5302 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != max_usecs || 5303 ec->rx_max_coalesced_frames != max_packets)) { 5304 mutex_unlock(&vi->rq[queue].dim_lock); 5305 return -EINVAL; 5306 } 5307 5308 if (rx_ctrl_dim_on && !cur_rx_dim) { 5309 vi->rq[queue].dim_enabled = true; 5310 mutex_unlock(&vi->rq[queue].dim_lock); 5311 return 0; 5312 } 5313 5314 if (!rx_ctrl_dim_on && cur_rx_dim) 5315 vi->rq[queue].dim_enabled = false; 5316 5317 /* If no params are updated, userspace ethtool will 5318 * reject the modification. 5319 */ 5320 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue, 5321 ec->rx_coalesce_usecs, 5322 ec->rx_max_coalesced_frames); 5323 mutex_unlock(&vi->rq[queue].dim_lock); 5324 return err; 5325 } 5326 5327 static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi, 5328 struct ethtool_coalesce *ec, 5329 u16 queue) 5330 { 5331 int err; 5332 5333 err = virtnet_send_rx_notf_coal_vq_cmds(vi, ec, queue); 5334 if (err) 5335 return err; 5336 5337 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue, 5338 ec->tx_coalesce_usecs, 5339 ec->tx_max_coalesced_frames); 5340 if (err) 5341 return err; 5342 5343 return 0; 5344 } 5345 5346 static void virtnet_rx_dim_work(struct work_struct *work) 5347 { 5348 struct dim *dim = container_of(work, struct dim, work); 5349 struct receive_queue *rq = container_of(dim, 5350 struct receive_queue, dim); 5351 struct virtnet_info *vi = rq->vq->vdev->priv; 5352 struct net_device *dev = vi->dev; 5353 struct dim_cq_moder update_moder; 5354 int qnum, err; 5355 5356 qnum = rq - vi->rq; 5357 5358 mutex_lock(&rq->dim_lock); 5359 if (!rq->dim_enabled) 5360 goto out; 5361 5362 update_moder = net_dim_get_rx_irq_moder(dev, dim); 5363 if (update_moder.usec != rq->intr_coal.max_usecs || 5364 update_moder.pkts != rq->intr_coal.max_packets) { 5365 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum, 5366 update_moder.usec, 5367 update_moder.pkts); 5368 if (err) 5369 pr_debug("%s: Failed to send dim parameters on rxq%d\n", 5370 dev->name, qnum); 5371 } 5372 out: 5373 dim->state = DIM_START_MEASURE; 5374 mutex_unlock(&rq->dim_lock); 5375 } 5376 5377 static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) 5378 { 5379 /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL 5380 * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated. 5381 */ 5382 if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) 5383 return -EOPNOTSUPP; 5384 5385 if (ec->tx_max_coalesced_frames > 1 || 5386 ec->rx_max_coalesced_frames != 1) 5387 return -EINVAL; 5388 5389 return 0; 5390 } 5391 5392 static int virtnet_should_update_vq_weight(int dev_flags, int weight, 5393 int vq_weight, bool *should_update) 5394 { 5395 if (weight ^ vq_weight) { 5396 if (dev_flags & IFF_UP) 5397 return -EBUSY; 5398 *should_update = true; 5399 } 5400 5401 return 0; 5402 } 5403 5404 static int virtnet_set_coalesce(struct net_device *dev, 5405 struct ethtool_coalesce *ec, 5406 struct kernel_ethtool_coalesce *kernel_coal, 5407 struct netlink_ext_ack *extack) 5408 { 5409 struct virtnet_info *vi = netdev_priv(dev); 5410 int ret, queue_number, napi_weight, i; 5411 bool update_napi = false; 5412 5413 /* Can't change NAPI weight if the link is up */ 5414 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5415 for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) { 5416 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5417 vi->sq[queue_number].napi.weight, 5418 &update_napi); 5419 if (ret) 5420 return ret; 5421 5422 if (update_napi) { 5423 /* All queues that belong to [queue_number, vi->max_queue_pairs] will be 5424 * updated for the sake of simplicity, which might not be necessary 5425 */ 5426 break; 5427 } 5428 } 5429 5430 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) 5431 ret = virtnet_send_notf_coal_cmds(vi, ec); 5432 else 5433 ret = virtnet_coal_params_supported(ec); 5434 5435 if (ret) 5436 return ret; 5437 5438 if (update_napi) { 5439 /* xsk xmit depends on the tx napi. So if xsk is active, 5440 * prevent modifications to tx napi. 5441 */ 5442 for (i = queue_number; i < vi->max_queue_pairs; i++) { 5443 if (vi->sq[i].xsk_pool) 5444 return -EBUSY; 5445 } 5446 5447 for (; queue_number < vi->max_queue_pairs; queue_number++) 5448 vi->sq[queue_number].napi.weight = napi_weight; 5449 } 5450 5451 return ret; 5452 } 5453 5454 static int virtnet_get_coalesce(struct net_device *dev, 5455 struct ethtool_coalesce *ec, 5456 struct kernel_ethtool_coalesce *kernel_coal, 5457 struct netlink_ext_ack *extack) 5458 { 5459 struct virtnet_info *vi = netdev_priv(dev); 5460 5461 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 5462 ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs; 5463 ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs; 5464 ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets; 5465 ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets; 5466 ec->use_adaptive_rx_coalesce = vi->rx_dim_enabled; 5467 } else { 5468 ec->rx_max_coalesced_frames = 1; 5469 5470 if (vi->sq[0].napi.weight) 5471 ec->tx_max_coalesced_frames = 1; 5472 } 5473 5474 return 0; 5475 } 5476 5477 static int virtnet_set_per_queue_coalesce(struct net_device *dev, 5478 u32 queue, 5479 struct ethtool_coalesce *ec) 5480 { 5481 struct virtnet_info *vi = netdev_priv(dev); 5482 int ret, napi_weight; 5483 bool update_napi = false; 5484 5485 if (queue >= vi->max_queue_pairs) 5486 return -EINVAL; 5487 5488 /* Can't change NAPI weight if the link is up */ 5489 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5490 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5491 vi->sq[queue].napi.weight, 5492 &update_napi); 5493 if (ret) 5494 return ret; 5495 5496 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5497 ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue); 5498 else 5499 ret = virtnet_coal_params_supported(ec); 5500 5501 if (ret) 5502 return ret; 5503 5504 if (update_napi) 5505 vi->sq[queue].napi.weight = napi_weight; 5506 5507 return 0; 5508 } 5509 5510 static int virtnet_get_per_queue_coalesce(struct net_device *dev, 5511 u32 queue, 5512 struct ethtool_coalesce *ec) 5513 { 5514 struct virtnet_info *vi = netdev_priv(dev); 5515 5516 if (queue >= vi->max_queue_pairs) 5517 return -EINVAL; 5518 5519 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 5520 mutex_lock(&vi->rq[queue].dim_lock); 5521 ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs; 5522 ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs; 5523 ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets; 5524 ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets; 5525 ec->use_adaptive_rx_coalesce = vi->rq[queue].dim_enabled; 5526 mutex_unlock(&vi->rq[queue].dim_lock); 5527 } else { 5528 ec->rx_max_coalesced_frames = 1; 5529 5530 if (vi->sq[queue].napi.weight) 5531 ec->tx_max_coalesced_frames = 1; 5532 } 5533 5534 return 0; 5535 } 5536 5537 static void virtnet_init_settings(struct net_device *dev) 5538 { 5539 struct virtnet_info *vi = netdev_priv(dev); 5540 5541 vi->speed = SPEED_UNKNOWN; 5542 vi->duplex = DUPLEX_UNKNOWN; 5543 } 5544 5545 static u32 virtnet_get_rxfh_key_size(struct net_device *dev) 5546 { 5547 return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size; 5548 } 5549 5550 static u32 virtnet_get_rxfh_indir_size(struct net_device *dev) 5551 { 5552 return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size; 5553 } 5554 5555 static int virtnet_get_rxfh(struct net_device *dev, 5556 struct ethtool_rxfh_param *rxfh) 5557 { 5558 struct virtnet_info *vi = netdev_priv(dev); 5559 int i; 5560 5561 if (rxfh->indir) { 5562 for (i = 0; i < vi->rss_indir_table_size; ++i) 5563 rxfh->indir[i] = le16_to_cpu(vi->rss_hdr->indirection_table[i]); 5564 } 5565 5566 if (rxfh->key) 5567 memcpy(rxfh->key, vi->rss_hash_key_data, vi->rss_key_size); 5568 5569 rxfh->hfunc = ETH_RSS_HASH_TOP; 5570 5571 return 0; 5572 } 5573 5574 static int virtnet_set_rxfh(struct net_device *dev, 5575 struct ethtool_rxfh_param *rxfh, 5576 struct netlink_ext_ack *extack) 5577 { 5578 struct virtnet_info *vi = netdev_priv(dev); 5579 bool update = false; 5580 int i; 5581 5582 if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && 5583 rxfh->hfunc != ETH_RSS_HASH_TOP) 5584 return -EOPNOTSUPP; 5585 5586 if (rxfh->indir) { 5587 if (!vi->has_rss) 5588 return -EOPNOTSUPP; 5589 5590 for (i = 0; i < vi->rss_indir_table_size; ++i) 5591 vi->rss_hdr->indirection_table[i] = cpu_to_le16(rxfh->indir[i]); 5592 update = true; 5593 } 5594 5595 if (rxfh->key) { 5596 /* If either _F_HASH_REPORT or _F_RSS are negotiated, the 5597 * device provides hash calculation capabilities, that is, 5598 * hash_key is configured. 5599 */ 5600 if (!vi->has_rss && !vi->has_rss_hash_report) 5601 return -EOPNOTSUPP; 5602 5603 memcpy(vi->rss_hash_key_data, rxfh->key, vi->rss_key_size); 5604 update = true; 5605 } 5606 5607 if (update) 5608 virtnet_commit_rss_command(vi); 5609 5610 return 0; 5611 } 5612 5613 static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) 5614 { 5615 struct virtnet_info *vi = netdev_priv(dev); 5616 int rc = 0; 5617 5618 switch (info->cmd) { 5619 case ETHTOOL_GRXRINGS: 5620 info->data = vi->curr_queue_pairs; 5621 break; 5622 default: 5623 rc = -EOPNOTSUPP; 5624 } 5625 5626 return rc; 5627 } 5628 5629 static const struct ethtool_ops virtnet_ethtool_ops = { 5630 .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | 5631 ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX, 5632 .get_drvinfo = virtnet_get_drvinfo, 5633 .get_link = ethtool_op_get_link, 5634 .get_ringparam = virtnet_get_ringparam, 5635 .set_ringparam = virtnet_set_ringparam, 5636 .get_strings = virtnet_get_strings, 5637 .get_sset_count = virtnet_get_sset_count, 5638 .get_ethtool_stats = virtnet_get_ethtool_stats, 5639 .set_channels = virtnet_set_channels, 5640 .get_channels = virtnet_get_channels, 5641 .get_ts_info = ethtool_op_get_ts_info, 5642 .get_link_ksettings = virtnet_get_link_ksettings, 5643 .set_link_ksettings = virtnet_set_link_ksettings, 5644 .set_coalesce = virtnet_set_coalesce, 5645 .get_coalesce = virtnet_get_coalesce, 5646 .set_per_queue_coalesce = virtnet_set_per_queue_coalesce, 5647 .get_per_queue_coalesce = virtnet_get_per_queue_coalesce, 5648 .get_rxfh_key_size = virtnet_get_rxfh_key_size, 5649 .get_rxfh_indir_size = virtnet_get_rxfh_indir_size, 5650 .get_rxfh = virtnet_get_rxfh, 5651 .set_rxfh = virtnet_set_rxfh, 5652 .get_rxfh_fields = virtnet_get_hashflow, 5653 .set_rxfh_fields = virtnet_set_hashflow, 5654 .get_rxnfc = virtnet_get_rxnfc, 5655 }; 5656 5657 static void virtnet_get_queue_stats_rx(struct net_device *dev, int i, 5658 struct netdev_queue_stats_rx *stats) 5659 { 5660 struct virtnet_info *vi = netdev_priv(dev); 5661 struct receive_queue *rq = &vi->rq[i]; 5662 struct virtnet_stats_ctx ctx = {0}; 5663 5664 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5665 5666 virtnet_get_hw_stats(vi, &ctx, i * 2); 5667 virtnet_fill_stats(vi, i * 2, &ctx, (void *)&rq->stats, true, 0); 5668 } 5669 5670 static void virtnet_get_queue_stats_tx(struct net_device *dev, int i, 5671 struct netdev_queue_stats_tx *stats) 5672 { 5673 struct virtnet_info *vi = netdev_priv(dev); 5674 struct send_queue *sq = &vi->sq[i]; 5675 struct virtnet_stats_ctx ctx = {0}; 5676 5677 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5678 5679 virtnet_get_hw_stats(vi, &ctx, i * 2 + 1); 5680 virtnet_fill_stats(vi, i * 2 + 1, &ctx, (void *)&sq->stats, true, 0); 5681 } 5682 5683 static void virtnet_get_base_stats(struct net_device *dev, 5684 struct netdev_queue_stats_rx *rx, 5685 struct netdev_queue_stats_tx *tx) 5686 { 5687 struct virtnet_info *vi = netdev_priv(dev); 5688 5689 /* The queue stats of the virtio-net will not be reset. So here we 5690 * return 0. 5691 */ 5692 rx->bytes = 0; 5693 rx->packets = 0; 5694 5695 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 5696 rx->hw_drops = 0; 5697 rx->hw_drop_overruns = 0; 5698 } 5699 5700 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 5701 rx->csum_unnecessary = 0; 5702 rx->csum_none = 0; 5703 rx->csum_bad = 0; 5704 } 5705 5706 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 5707 rx->hw_gro_packets = 0; 5708 rx->hw_gro_bytes = 0; 5709 rx->hw_gro_wire_packets = 0; 5710 rx->hw_gro_wire_bytes = 0; 5711 } 5712 5713 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) 5714 rx->hw_drop_ratelimits = 0; 5715 5716 tx->bytes = 0; 5717 tx->packets = 0; 5718 tx->stop = 0; 5719 tx->wake = 0; 5720 5721 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 5722 tx->hw_drops = 0; 5723 tx->hw_drop_errors = 0; 5724 } 5725 5726 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 5727 tx->csum_none = 0; 5728 tx->needs_csum = 0; 5729 } 5730 5731 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 5732 tx->hw_gso_packets = 0; 5733 tx->hw_gso_bytes = 0; 5734 tx->hw_gso_wire_packets = 0; 5735 tx->hw_gso_wire_bytes = 0; 5736 } 5737 5738 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) 5739 tx->hw_drop_ratelimits = 0; 5740 5741 netdev_stat_queue_sum(dev, 5742 dev->real_num_rx_queues, vi->max_queue_pairs, rx, 5743 dev->real_num_tx_queues, vi->max_queue_pairs, tx); 5744 } 5745 5746 static const struct netdev_stat_ops virtnet_stat_ops = { 5747 .get_queue_stats_rx = virtnet_get_queue_stats_rx, 5748 .get_queue_stats_tx = virtnet_get_queue_stats_tx, 5749 .get_base_stats = virtnet_get_base_stats, 5750 }; 5751 5752 static void virtnet_freeze_down(struct virtio_device *vdev) 5753 { 5754 struct virtnet_info *vi = vdev->priv; 5755 5756 /* Make sure no work handler is accessing the device */ 5757 flush_work(&vi->config_work); 5758 disable_rx_mode_work(vi); 5759 flush_work(&vi->rx_mode_work); 5760 5761 if (netif_running(vi->dev)) { 5762 rtnl_lock(); 5763 virtnet_close(vi->dev); 5764 rtnl_unlock(); 5765 } 5766 5767 netif_tx_lock_bh(vi->dev); 5768 netif_device_detach(vi->dev); 5769 netif_tx_unlock_bh(vi->dev); 5770 } 5771 5772 static int init_vqs(struct virtnet_info *vi); 5773 5774 static int virtnet_restore_up(struct virtio_device *vdev) 5775 { 5776 struct virtnet_info *vi = vdev->priv; 5777 int err; 5778 5779 err = init_vqs(vi); 5780 if (err) 5781 return err; 5782 5783 virtio_device_ready(vdev); 5784 5785 enable_delayed_refill(vi); 5786 enable_rx_mode_work(vi); 5787 5788 if (netif_running(vi->dev)) { 5789 rtnl_lock(); 5790 err = virtnet_open(vi->dev); 5791 rtnl_unlock(); 5792 if (err) 5793 return err; 5794 } 5795 5796 netif_tx_lock_bh(vi->dev); 5797 netif_device_attach(vi->dev); 5798 netif_tx_unlock_bh(vi->dev); 5799 return err; 5800 } 5801 5802 static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) 5803 { 5804 __virtio64 *_offloads __free(kfree) = NULL; 5805 struct scatterlist sg; 5806 5807 _offloads = kzalloc(sizeof(*_offloads), GFP_KERNEL); 5808 if (!_offloads) 5809 return -ENOMEM; 5810 5811 *_offloads = cpu_to_virtio64(vi->vdev, offloads); 5812 5813 sg_init_one(&sg, _offloads, sizeof(*_offloads)); 5814 5815 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, 5816 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) { 5817 dev_warn(&vi->dev->dev, "Fail to set guest offload.\n"); 5818 return -EINVAL; 5819 } 5820 5821 return 0; 5822 } 5823 5824 static int virtnet_clear_guest_offloads(struct virtnet_info *vi) 5825 { 5826 u64 offloads = 0; 5827 5828 if (!vi->guest_offloads) 5829 return 0; 5830 5831 return virtnet_set_guest_offloads(vi, offloads); 5832 } 5833 5834 static int virtnet_restore_guest_offloads(struct virtnet_info *vi) 5835 { 5836 u64 offloads = vi->guest_offloads; 5837 5838 if (!vi->guest_offloads) 5839 return 0; 5840 5841 return virtnet_set_guest_offloads(vi, offloads); 5842 } 5843 5844 static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queue *rq, 5845 struct xsk_buff_pool *pool) 5846 { 5847 int err, qindex; 5848 5849 qindex = rq - vi->rq; 5850 5851 if (pool) { 5852 err = xdp_rxq_info_reg(&rq->xsk_rxq_info, vi->dev, qindex, rq->napi.napi_id); 5853 if (err < 0) 5854 return err; 5855 5856 err = xdp_rxq_info_reg_mem_model(&rq->xsk_rxq_info, 5857 MEM_TYPE_XSK_BUFF_POOL, NULL); 5858 if (err < 0) 5859 goto unreg; 5860 5861 xsk_pool_set_rxq_info(pool, &rq->xsk_rxq_info); 5862 } 5863 5864 virtnet_rx_pause(vi, rq); 5865 5866 err = virtqueue_reset(rq->vq, virtnet_rq_unmap_free_buf, NULL); 5867 if (err) { 5868 netdev_err(vi->dev, "reset rx fail: rx queue index: %d err: %d\n", qindex, err); 5869 5870 pool = NULL; 5871 } 5872 5873 rq->xsk_pool = pool; 5874 5875 virtnet_rx_resume(vi, rq); 5876 5877 if (pool) 5878 return 0; 5879 5880 unreg: 5881 xdp_rxq_info_unreg(&rq->xsk_rxq_info); 5882 return err; 5883 } 5884 5885 static int virtnet_sq_bind_xsk_pool(struct virtnet_info *vi, 5886 struct send_queue *sq, 5887 struct xsk_buff_pool *pool) 5888 { 5889 int err, qindex; 5890 5891 qindex = sq - vi->sq; 5892 5893 virtnet_tx_pause(vi, sq); 5894 5895 err = virtqueue_reset(sq->vq, virtnet_sq_free_unused_buf, 5896 virtnet_sq_free_unused_buf_done); 5897 if (err) { 5898 netdev_err(vi->dev, "reset tx fail: tx queue index: %d err: %d\n", qindex, err); 5899 pool = NULL; 5900 } 5901 5902 sq->xsk_pool = pool; 5903 5904 virtnet_tx_resume(vi, sq); 5905 5906 return err; 5907 } 5908 5909 static int virtnet_xsk_pool_enable(struct net_device *dev, 5910 struct xsk_buff_pool *pool, 5911 u16 qid) 5912 { 5913 struct virtnet_info *vi = netdev_priv(dev); 5914 struct receive_queue *rq; 5915 struct device *dma_dev; 5916 struct send_queue *sq; 5917 dma_addr_t hdr_dma; 5918 int err, size; 5919 5920 if (vi->hdr_len > xsk_pool_get_headroom(pool)) 5921 return -EINVAL; 5922 5923 /* In big_packets mode, xdp cannot work, so there is no need to 5924 * initialize xsk of rq. 5925 */ 5926 if (vi->big_packets && !vi->mergeable_rx_bufs) 5927 return -ENOENT; 5928 5929 if (qid >= vi->curr_queue_pairs) 5930 return -EINVAL; 5931 5932 sq = &vi->sq[qid]; 5933 rq = &vi->rq[qid]; 5934 5935 /* xsk assumes that tx and rx must have the same dma device. The af-xdp 5936 * may use one buffer to receive from the rx and reuse this buffer to 5937 * send by the tx. So the dma dev of sq and rq must be the same one. 5938 * 5939 * But vq->dma_dev allows every vq has the respective dma dev. So I 5940 * check the dma dev of vq and sq is the same dev. 5941 */ 5942 if (virtqueue_dma_dev(rq->vq) != virtqueue_dma_dev(sq->vq)) 5943 return -EINVAL; 5944 5945 dma_dev = virtqueue_dma_dev(rq->vq); 5946 if (!dma_dev) 5947 return -EINVAL; 5948 5949 size = virtqueue_get_vring_size(rq->vq); 5950 5951 rq->xsk_buffs = kvcalloc(size, sizeof(*rq->xsk_buffs), GFP_KERNEL); 5952 if (!rq->xsk_buffs) 5953 return -ENOMEM; 5954 5955 hdr_dma = virtqueue_dma_map_single_attrs(sq->vq, &xsk_hdr, vi->hdr_len, 5956 DMA_TO_DEVICE, 0); 5957 if (virtqueue_dma_mapping_error(sq->vq, hdr_dma)) { 5958 err = -ENOMEM; 5959 goto err_free_buffs; 5960 } 5961 5962 err = xsk_pool_dma_map(pool, dma_dev, 0); 5963 if (err) 5964 goto err_xsk_map; 5965 5966 err = virtnet_rq_bind_xsk_pool(vi, rq, pool); 5967 if (err) 5968 goto err_rq; 5969 5970 err = virtnet_sq_bind_xsk_pool(vi, sq, pool); 5971 if (err) 5972 goto err_sq; 5973 5974 /* Now, we do not support tx offload(such as tx csum), so all the tx 5975 * virtnet hdr is zero. So all the tx packets can share a single hdr. 5976 */ 5977 sq->xsk_hdr_dma_addr = hdr_dma; 5978 5979 return 0; 5980 5981 err_sq: 5982 virtnet_rq_bind_xsk_pool(vi, rq, NULL); 5983 err_rq: 5984 xsk_pool_dma_unmap(pool, 0); 5985 err_xsk_map: 5986 virtqueue_dma_unmap_single_attrs(rq->vq, hdr_dma, vi->hdr_len, 5987 DMA_TO_DEVICE, 0); 5988 err_free_buffs: 5989 kvfree(rq->xsk_buffs); 5990 return err; 5991 } 5992 5993 static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid) 5994 { 5995 struct virtnet_info *vi = netdev_priv(dev); 5996 struct xsk_buff_pool *pool; 5997 struct receive_queue *rq; 5998 struct send_queue *sq; 5999 int err; 6000 6001 if (qid >= vi->curr_queue_pairs) 6002 return -EINVAL; 6003 6004 sq = &vi->sq[qid]; 6005 rq = &vi->rq[qid]; 6006 6007 pool = rq->xsk_pool; 6008 6009 err = virtnet_rq_bind_xsk_pool(vi, rq, NULL); 6010 err |= virtnet_sq_bind_xsk_pool(vi, sq, NULL); 6011 6012 xsk_pool_dma_unmap(pool, 0); 6013 6014 virtqueue_dma_unmap_single_attrs(sq->vq, sq->xsk_hdr_dma_addr, 6015 vi->hdr_len, DMA_TO_DEVICE, 0); 6016 kvfree(rq->xsk_buffs); 6017 6018 return err; 6019 } 6020 6021 static int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp) 6022 { 6023 if (xdp->xsk.pool) 6024 return virtnet_xsk_pool_enable(dev, xdp->xsk.pool, 6025 xdp->xsk.queue_id); 6026 else 6027 return virtnet_xsk_pool_disable(dev, xdp->xsk.queue_id); 6028 } 6029 6030 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, 6031 struct netlink_ext_ack *extack) 6032 { 6033 unsigned int room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 6034 sizeof(struct skb_shared_info)); 6035 unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; 6036 struct virtnet_info *vi = netdev_priv(dev); 6037 struct bpf_prog *old_prog; 6038 u16 xdp_qp = 0, curr_qp; 6039 int i, err; 6040 6041 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) 6042 && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 6043 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 6044 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 6045 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 6046 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) || 6047 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) || 6048 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) { 6049 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); 6050 return -EOPNOTSUPP; 6051 } 6052 6053 if (vi->mergeable_rx_bufs && !vi->any_header_sg) { 6054 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required"); 6055 return -EINVAL; 6056 } 6057 6058 if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { 6059 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags"); 6060 netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz); 6061 return -EINVAL; 6062 } 6063 6064 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; 6065 if (prog) 6066 xdp_qp = nr_cpu_ids; 6067 6068 /* XDP requires extra queues for XDP_TX */ 6069 if (curr_qp + xdp_qp > vi->max_queue_pairs) { 6070 netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", 6071 curr_qp + xdp_qp, vi->max_queue_pairs); 6072 xdp_qp = 0; 6073 } 6074 6075 old_prog = rtnl_dereference(vi->rq[0].xdp_prog); 6076 if (!prog && !old_prog) 6077 return 0; 6078 6079 if (prog) 6080 bpf_prog_add(prog, vi->max_queue_pairs - 1); 6081 6082 virtnet_rx_pause_all(vi); 6083 6084 /* Make sure NAPI is not using any XDP TX queues for RX. */ 6085 if (netif_running(dev)) { 6086 for (i = 0; i < vi->max_queue_pairs; i++) 6087 virtnet_napi_tx_disable(&vi->sq[i]); 6088 } 6089 6090 if (!prog) { 6091 for (i = 0; i < vi->max_queue_pairs; i++) { 6092 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 6093 if (i == 0) 6094 virtnet_restore_guest_offloads(vi); 6095 } 6096 synchronize_net(); 6097 } 6098 6099 err = virtnet_set_queues(vi, curr_qp + xdp_qp); 6100 if (err) 6101 goto err; 6102 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); 6103 vi->xdp_queue_pairs = xdp_qp; 6104 6105 if (prog) { 6106 vi->xdp_enabled = true; 6107 for (i = 0; i < vi->max_queue_pairs; i++) { 6108 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 6109 if (i == 0 && !old_prog) 6110 virtnet_clear_guest_offloads(vi); 6111 } 6112 if (!old_prog) 6113 xdp_features_set_redirect_target(dev, true); 6114 } else { 6115 xdp_features_clear_redirect_target(dev); 6116 vi->xdp_enabled = false; 6117 } 6118 6119 virtnet_rx_resume_all(vi); 6120 for (i = 0; i < vi->max_queue_pairs; i++) { 6121 if (old_prog) 6122 bpf_prog_put(old_prog); 6123 if (netif_running(dev)) 6124 virtnet_napi_tx_enable(&vi->sq[i]); 6125 } 6126 6127 return 0; 6128 6129 err: 6130 if (!prog) { 6131 virtnet_clear_guest_offloads(vi); 6132 for (i = 0; i < vi->max_queue_pairs; i++) 6133 rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); 6134 } 6135 6136 virtnet_rx_resume_all(vi); 6137 if (netif_running(dev)) { 6138 for (i = 0; i < vi->max_queue_pairs; i++) 6139 virtnet_napi_tx_enable(&vi->sq[i]); 6140 } 6141 if (prog) 6142 bpf_prog_sub(prog, vi->max_queue_pairs - 1); 6143 return err; 6144 } 6145 6146 static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) 6147 { 6148 switch (xdp->command) { 6149 case XDP_SETUP_PROG: 6150 return virtnet_xdp_set(dev, xdp->prog, xdp->extack); 6151 case XDP_SETUP_XSK_POOL: 6152 return virtnet_xsk_pool_setup(dev, xdp); 6153 default: 6154 return -EINVAL; 6155 } 6156 } 6157 6158 static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, 6159 size_t len) 6160 { 6161 struct virtnet_info *vi = netdev_priv(dev); 6162 int ret; 6163 6164 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 6165 return -EOPNOTSUPP; 6166 6167 ret = snprintf(buf, len, "sby"); 6168 if (ret >= len) 6169 return -EOPNOTSUPP; 6170 6171 return 0; 6172 } 6173 6174 static int virtnet_set_features(struct net_device *dev, 6175 netdev_features_t features) 6176 { 6177 struct virtnet_info *vi = netdev_priv(dev); 6178 u64 offloads; 6179 int err; 6180 6181 if ((dev->features ^ features) & NETIF_F_GRO_HW) { 6182 if (vi->xdp_enabled) 6183 return -EBUSY; 6184 6185 if (features & NETIF_F_GRO_HW) 6186 offloads = vi->guest_offloads_capable; 6187 else 6188 offloads = vi->guest_offloads_capable & 6189 ~GUEST_OFFLOAD_GRO_HW_MASK; 6190 6191 err = virtnet_set_guest_offloads(vi, offloads); 6192 if (err) 6193 return err; 6194 vi->guest_offloads = offloads; 6195 } 6196 6197 if ((dev->features ^ features) & NETIF_F_RXHASH) { 6198 if (features & NETIF_F_RXHASH) 6199 vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_saved); 6200 else 6201 vi->rss_hdr->hash_types = cpu_to_le32(VIRTIO_NET_HASH_REPORT_NONE); 6202 6203 if (!virtnet_commit_rss_command(vi)) 6204 return -EINVAL; 6205 } 6206 6207 return 0; 6208 } 6209 6210 static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) 6211 { 6212 struct virtnet_info *priv = netdev_priv(dev); 6213 struct send_queue *sq = &priv->sq[txqueue]; 6214 struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue); 6215 6216 u64_stats_update_begin(&sq->stats.syncp); 6217 u64_stats_inc(&sq->stats.tx_timeouts); 6218 u64_stats_update_end(&sq->stats.syncp); 6219 6220 netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n", 6221 txqueue, sq->name, sq->vq->index, sq->vq->name, 6222 jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start))); 6223 } 6224 6225 static int virtnet_init_irq_moder(struct virtnet_info *vi) 6226 { 6227 u8 profile_flags = 0, coal_flags = 0; 6228 int ret, i; 6229 6230 profile_flags |= DIM_PROFILE_RX; 6231 coal_flags |= DIM_COALESCE_USEC | DIM_COALESCE_PKTS; 6232 ret = net_dim_init_irq_moder(vi->dev, profile_flags, coal_flags, 6233 DIM_CQ_PERIOD_MODE_START_FROM_EQE, 6234 0, virtnet_rx_dim_work, NULL); 6235 6236 if (ret) 6237 return ret; 6238 6239 for (i = 0; i < vi->max_queue_pairs; i++) 6240 net_dim_setting(vi->dev, &vi->rq[i].dim, false); 6241 6242 return 0; 6243 } 6244 6245 static void virtnet_free_irq_moder(struct virtnet_info *vi) 6246 { 6247 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 6248 return; 6249 6250 rtnl_lock(); 6251 net_dim_free_irq_moder(vi->dev); 6252 rtnl_unlock(); 6253 } 6254 6255 static const struct net_device_ops virtnet_netdev = { 6256 .ndo_open = virtnet_open, 6257 .ndo_stop = virtnet_close, 6258 .ndo_start_xmit = start_xmit, 6259 .ndo_validate_addr = eth_validate_addr, 6260 .ndo_set_mac_address = virtnet_set_mac_address, 6261 .ndo_set_rx_mode = virtnet_set_rx_mode, 6262 .ndo_get_stats64 = virtnet_stats, 6263 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, 6264 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, 6265 .ndo_bpf = virtnet_xdp, 6266 .ndo_xdp_xmit = virtnet_xdp_xmit, 6267 .ndo_xsk_wakeup = virtnet_xsk_wakeup, 6268 .ndo_features_check = passthru_features_check, 6269 .ndo_get_phys_port_name = virtnet_get_phys_port_name, 6270 .ndo_set_features = virtnet_set_features, 6271 .ndo_tx_timeout = virtnet_tx_timeout, 6272 }; 6273 6274 static void virtnet_config_changed_work(struct work_struct *work) 6275 { 6276 struct virtnet_info *vi = 6277 container_of(work, struct virtnet_info, config_work); 6278 u16 v; 6279 6280 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, 6281 struct virtio_net_config, status, &v) < 0) 6282 return; 6283 6284 if (v & VIRTIO_NET_S_ANNOUNCE) { 6285 netdev_notify_peers(vi->dev); 6286 virtnet_ack_link_announce(vi); 6287 } 6288 6289 /* Ignore unknown (future) status bits */ 6290 v &= VIRTIO_NET_S_LINK_UP; 6291 6292 if (vi->status == v) 6293 return; 6294 6295 vi->status = v; 6296 6297 if (vi->status & VIRTIO_NET_S_LINK_UP) { 6298 virtnet_update_settings(vi); 6299 netif_carrier_on(vi->dev); 6300 netif_tx_wake_all_queues(vi->dev); 6301 } else { 6302 netif_carrier_off(vi->dev); 6303 netif_tx_stop_all_queues(vi->dev); 6304 } 6305 } 6306 6307 static void virtnet_config_changed(struct virtio_device *vdev) 6308 { 6309 struct virtnet_info *vi = vdev->priv; 6310 6311 schedule_work(&vi->config_work); 6312 } 6313 6314 static void virtnet_free_queues(struct virtnet_info *vi) 6315 { 6316 int i; 6317 6318 for (i = 0; i < vi->max_queue_pairs; i++) { 6319 __netif_napi_del(&vi->rq[i].napi); 6320 __netif_napi_del(&vi->sq[i].napi); 6321 } 6322 6323 /* We called __netif_napi_del(), 6324 * we need to respect an RCU grace period before freeing vi->rq 6325 */ 6326 synchronize_net(); 6327 6328 kfree(vi->rq); 6329 kfree(vi->sq); 6330 kfree(vi->ctrl); 6331 } 6332 6333 static void _free_receive_bufs(struct virtnet_info *vi) 6334 { 6335 struct bpf_prog *old_prog; 6336 int i; 6337 6338 for (i = 0; i < vi->max_queue_pairs; i++) { 6339 while (vi->rq[i].pages) 6340 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); 6341 6342 old_prog = rtnl_dereference(vi->rq[i].xdp_prog); 6343 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); 6344 if (old_prog) 6345 bpf_prog_put(old_prog); 6346 } 6347 } 6348 6349 static void free_receive_bufs(struct virtnet_info *vi) 6350 { 6351 rtnl_lock(); 6352 _free_receive_bufs(vi); 6353 rtnl_unlock(); 6354 } 6355 6356 static void free_receive_page_frags(struct virtnet_info *vi) 6357 { 6358 int i; 6359 for (i = 0; i < vi->max_queue_pairs; i++) 6360 if (vi->rq[i].alloc_frag.page) { 6361 if (vi->rq[i].last_dma) 6362 virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0); 6363 put_page(vi->rq[i].alloc_frag.page); 6364 } 6365 } 6366 6367 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) 6368 { 6369 struct virtnet_info *vi = vq->vdev->priv; 6370 struct send_queue *sq; 6371 int i = vq2txq(vq); 6372 6373 sq = &vi->sq[i]; 6374 6375 switch (virtnet_xmit_ptr_unpack(&buf)) { 6376 case VIRTNET_XMIT_TYPE_SKB: 6377 case VIRTNET_XMIT_TYPE_SKB_ORPHAN: 6378 dev_kfree_skb(buf); 6379 break; 6380 6381 case VIRTNET_XMIT_TYPE_XDP: 6382 xdp_return_frame(buf); 6383 break; 6384 6385 case VIRTNET_XMIT_TYPE_XSK: 6386 xsk_tx_completed(sq->xsk_pool, 1); 6387 break; 6388 } 6389 } 6390 6391 static void virtnet_sq_free_unused_buf_done(struct virtqueue *vq) 6392 { 6393 struct virtnet_info *vi = vq->vdev->priv; 6394 int i = vq2txq(vq); 6395 6396 netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, i)); 6397 } 6398 6399 static void free_unused_bufs(struct virtnet_info *vi) 6400 { 6401 void *buf; 6402 int i; 6403 6404 for (i = 0; i < vi->max_queue_pairs; i++) { 6405 struct virtqueue *vq = vi->sq[i].vq; 6406 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 6407 virtnet_sq_free_unused_buf(vq, buf); 6408 cond_resched(); 6409 } 6410 6411 for (i = 0; i < vi->max_queue_pairs; i++) { 6412 struct virtqueue *vq = vi->rq[i].vq; 6413 6414 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 6415 virtnet_rq_unmap_free_buf(vq, buf); 6416 cond_resched(); 6417 } 6418 } 6419 6420 static void virtnet_del_vqs(struct virtnet_info *vi) 6421 { 6422 struct virtio_device *vdev = vi->vdev; 6423 6424 virtnet_clean_affinity(vi); 6425 6426 vdev->config->del_vqs(vdev); 6427 6428 virtnet_free_queues(vi); 6429 } 6430 6431 /* How large should a single buffer be so a queue full of these can fit at 6432 * least one full packet? 6433 * Logic below assumes the mergeable buffer header is used. 6434 */ 6435 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) 6436 { 6437 const unsigned int hdr_len = vi->hdr_len; 6438 unsigned int rq_size = virtqueue_get_vring_size(vq); 6439 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; 6440 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; 6441 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size); 6442 6443 return max(max(min_buf_len, hdr_len) - hdr_len, 6444 (unsigned int)GOOD_PACKET_LEN); 6445 } 6446 6447 static int virtnet_find_vqs(struct virtnet_info *vi) 6448 { 6449 struct virtqueue_info *vqs_info; 6450 struct virtqueue **vqs; 6451 int ret = -ENOMEM; 6452 int total_vqs; 6453 bool *ctx; 6454 u16 i; 6455 6456 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by 6457 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by 6458 * possible control vq. 6459 */ 6460 total_vqs = vi->max_queue_pairs * 2 + 6461 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); 6462 6463 /* Allocate space for find_vqs parameters */ 6464 vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL); 6465 if (!vqs) 6466 goto err_vq; 6467 vqs_info = kcalloc(total_vqs, sizeof(*vqs_info), GFP_KERNEL); 6468 if (!vqs_info) 6469 goto err_vqs_info; 6470 if (!vi->big_packets || vi->mergeable_rx_bufs) { 6471 ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL); 6472 if (!ctx) 6473 goto err_ctx; 6474 } else { 6475 ctx = NULL; 6476 } 6477 6478 /* Parameters for control virtqueue, if any */ 6479 if (vi->has_cvq) { 6480 vqs_info[total_vqs - 1].name = "control"; 6481 } 6482 6483 /* Allocate/initialize parameters for send/receive virtqueues */ 6484 for (i = 0; i < vi->max_queue_pairs; i++) { 6485 vqs_info[rxq2vq(i)].callback = skb_recv_done; 6486 vqs_info[txq2vq(i)].callback = skb_xmit_done; 6487 sprintf(vi->rq[i].name, "input.%u", i); 6488 sprintf(vi->sq[i].name, "output.%u", i); 6489 vqs_info[rxq2vq(i)].name = vi->rq[i].name; 6490 vqs_info[txq2vq(i)].name = vi->sq[i].name; 6491 if (ctx) 6492 vqs_info[rxq2vq(i)].ctx = true; 6493 } 6494 6495 ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, vqs_info, NULL); 6496 if (ret) 6497 goto err_find; 6498 6499 if (vi->has_cvq) { 6500 vi->cvq = vqs[total_vqs - 1]; 6501 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) 6502 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 6503 } 6504 6505 for (i = 0; i < vi->max_queue_pairs; i++) { 6506 vi->rq[i].vq = vqs[rxq2vq(i)]; 6507 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq); 6508 vi->sq[i].vq = vqs[txq2vq(i)]; 6509 } 6510 6511 /* run here: ret == 0. */ 6512 6513 6514 err_find: 6515 kfree(ctx); 6516 err_ctx: 6517 kfree(vqs_info); 6518 err_vqs_info: 6519 kfree(vqs); 6520 err_vq: 6521 return ret; 6522 } 6523 6524 static int virtnet_alloc_queues(struct virtnet_info *vi) 6525 { 6526 int i; 6527 6528 if (vi->has_cvq) { 6529 vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); 6530 if (!vi->ctrl) 6531 goto err_ctrl; 6532 } else { 6533 vi->ctrl = NULL; 6534 } 6535 vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL); 6536 if (!vi->sq) 6537 goto err_sq; 6538 vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL); 6539 if (!vi->rq) 6540 goto err_rq; 6541 6542 INIT_DELAYED_WORK(&vi->refill, refill_work); 6543 for (i = 0; i < vi->max_queue_pairs; i++) { 6544 vi->rq[i].pages = NULL; 6545 netif_napi_add_config(vi->dev, &vi->rq[i].napi, virtnet_poll, 6546 i); 6547 vi->rq[i].napi.weight = napi_weight; 6548 netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi, 6549 virtnet_poll_tx, 6550 napi_tx ? napi_weight : 0); 6551 6552 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); 6553 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); 6554 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); 6555 6556 u64_stats_init(&vi->rq[i].stats.syncp); 6557 u64_stats_init(&vi->sq[i].stats.syncp); 6558 mutex_init(&vi->rq[i].dim_lock); 6559 } 6560 6561 return 0; 6562 6563 err_rq: 6564 kfree(vi->sq); 6565 err_sq: 6566 kfree(vi->ctrl); 6567 err_ctrl: 6568 return -ENOMEM; 6569 } 6570 6571 static int init_vqs(struct virtnet_info *vi) 6572 { 6573 int ret; 6574 6575 /* Allocate send & receive queues */ 6576 ret = virtnet_alloc_queues(vi); 6577 if (ret) 6578 goto err; 6579 6580 ret = virtnet_find_vqs(vi); 6581 if (ret) 6582 goto err_free; 6583 6584 cpus_read_lock(); 6585 virtnet_set_affinity(vi); 6586 cpus_read_unlock(); 6587 6588 return 0; 6589 6590 err_free: 6591 virtnet_free_queues(vi); 6592 err: 6593 return ret; 6594 } 6595 6596 #ifdef CONFIG_SYSFS 6597 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, 6598 char *buf) 6599 { 6600 struct virtnet_info *vi = netdev_priv(queue->dev); 6601 unsigned int queue_index = get_netdev_rx_queue_index(queue); 6602 unsigned int headroom = virtnet_get_headroom(vi); 6603 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 6604 struct ewma_pkt_len *avg; 6605 6606 BUG_ON(queue_index >= vi->max_queue_pairs); 6607 avg = &vi->rq[queue_index].mrg_avg_pkt_len; 6608 return sprintf(buf, "%u\n", 6609 get_mergeable_buf_len(&vi->rq[queue_index], avg, 6610 SKB_DATA_ALIGN(headroom + tailroom))); 6611 } 6612 6613 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = 6614 __ATTR_RO(mergeable_rx_buffer_size); 6615 6616 static struct attribute *virtio_net_mrg_rx_attrs[] = { 6617 &mergeable_rx_buffer_size_attribute.attr, 6618 NULL 6619 }; 6620 6621 static const struct attribute_group virtio_net_mrg_rx_group = { 6622 .name = "virtio_net", 6623 .attrs = virtio_net_mrg_rx_attrs 6624 }; 6625 #endif 6626 6627 static bool virtnet_fail_on_feature(struct virtio_device *vdev, 6628 unsigned int fbit, 6629 const char *fname, const char *dname) 6630 { 6631 if (!virtio_has_feature(vdev, fbit)) 6632 return false; 6633 6634 dev_err(&vdev->dev, "device advertises feature %s but not %s", 6635 fname, dname); 6636 6637 return true; 6638 } 6639 6640 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ 6641 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) 6642 6643 static bool virtnet_validate_features(struct virtio_device *vdev) 6644 { 6645 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && 6646 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, 6647 "VIRTIO_NET_F_CTRL_VQ") || 6648 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, 6649 "VIRTIO_NET_F_CTRL_VQ") || 6650 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, 6651 "VIRTIO_NET_F_CTRL_VQ") || 6652 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || 6653 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, 6654 "VIRTIO_NET_F_CTRL_VQ") || 6655 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, 6656 "VIRTIO_NET_F_CTRL_VQ") || 6657 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, 6658 "VIRTIO_NET_F_CTRL_VQ") || 6659 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, 6660 "VIRTIO_NET_F_CTRL_VQ") || 6661 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL, 6662 "VIRTIO_NET_F_CTRL_VQ"))) { 6663 return false; 6664 } 6665 6666 return true; 6667 } 6668 6669 #define MIN_MTU ETH_MIN_MTU 6670 #define MAX_MTU ETH_MAX_MTU 6671 6672 static int virtnet_validate(struct virtio_device *vdev) 6673 { 6674 if (!vdev->config->get) { 6675 dev_err(&vdev->dev, "%s failure: config access disabled\n", 6676 __func__); 6677 return -EINVAL; 6678 } 6679 6680 if (!virtnet_validate_features(vdev)) 6681 return -EINVAL; 6682 6683 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6684 int mtu = virtio_cread16(vdev, 6685 offsetof(struct virtio_net_config, 6686 mtu)); 6687 if (mtu < MIN_MTU) 6688 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); 6689 } 6690 6691 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) && 6692 !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6693 dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby"); 6694 __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY); 6695 } 6696 6697 return 0; 6698 } 6699 6700 static bool virtnet_check_guest_gso(const struct virtnet_info *vi) 6701 { 6702 return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 6703 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 6704 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 6705 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 6706 (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) && 6707 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6)); 6708 } 6709 6710 static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu) 6711 { 6712 bool guest_gso = virtnet_check_guest_gso(vi); 6713 6714 /* If device can receive ANY guest GSO packets, regardless of mtu, 6715 * allocate packets of maximum size, otherwise limit it to only 6716 * mtu size worth only. 6717 */ 6718 if (mtu > ETH_DATA_LEN || guest_gso) { 6719 vi->big_packets = true; 6720 vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE); 6721 } 6722 } 6723 6724 #define VIRTIO_NET_HASH_REPORT_MAX_TABLE 10 6725 static enum xdp_rss_hash_type 6726 virtnet_xdp_rss_type[VIRTIO_NET_HASH_REPORT_MAX_TABLE] = { 6727 [VIRTIO_NET_HASH_REPORT_NONE] = XDP_RSS_TYPE_NONE, 6728 [VIRTIO_NET_HASH_REPORT_IPv4] = XDP_RSS_TYPE_L3_IPV4, 6729 [VIRTIO_NET_HASH_REPORT_TCPv4] = XDP_RSS_TYPE_L4_IPV4_TCP, 6730 [VIRTIO_NET_HASH_REPORT_UDPv4] = XDP_RSS_TYPE_L4_IPV4_UDP, 6731 [VIRTIO_NET_HASH_REPORT_IPv6] = XDP_RSS_TYPE_L3_IPV6, 6732 [VIRTIO_NET_HASH_REPORT_TCPv6] = XDP_RSS_TYPE_L4_IPV6_TCP, 6733 [VIRTIO_NET_HASH_REPORT_UDPv6] = XDP_RSS_TYPE_L4_IPV6_UDP, 6734 [VIRTIO_NET_HASH_REPORT_IPv6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, 6735 [VIRTIO_NET_HASH_REPORT_TCPv6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, 6736 [VIRTIO_NET_HASH_REPORT_UDPv6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX 6737 }; 6738 6739 static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, 6740 enum xdp_rss_hash_type *rss_type) 6741 { 6742 const struct xdp_buff *xdp = (void *)_ctx; 6743 struct virtio_net_hdr_v1_hash *hdr_hash; 6744 struct virtnet_info *vi; 6745 u16 hash_report; 6746 6747 if (!(xdp->rxq->dev->features & NETIF_F_RXHASH)) 6748 return -ENODATA; 6749 6750 vi = netdev_priv(xdp->rxq->dev); 6751 hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len); 6752 hash_report = __le16_to_cpu(hdr_hash->hash_report); 6753 6754 if (hash_report >= VIRTIO_NET_HASH_REPORT_MAX_TABLE) 6755 hash_report = VIRTIO_NET_HASH_REPORT_NONE; 6756 6757 *rss_type = virtnet_xdp_rss_type[hash_report]; 6758 *hash = __le32_to_cpu(hdr_hash->hash_value); 6759 return 0; 6760 } 6761 6762 static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = { 6763 .xmo_rx_hash = virtnet_xdp_rx_hash, 6764 }; 6765 6766 static int virtnet_probe(struct virtio_device *vdev) 6767 { 6768 int i, err = -ENOMEM; 6769 struct net_device *dev; 6770 struct virtnet_info *vi; 6771 u16 max_queue_pairs; 6772 int mtu = 0; 6773 6774 /* Find if host supports multiqueue/rss virtio_net device */ 6775 max_queue_pairs = 1; 6776 if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) 6777 max_queue_pairs = 6778 virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); 6779 6780 /* We need at least 2 queue's */ 6781 if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 6782 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 6783 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6784 max_queue_pairs = 1; 6785 6786 /* Allocate ourselves a network device with room for our info */ 6787 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); 6788 if (!dev) 6789 return -ENOMEM; 6790 6791 /* Set up network device as normal. */ 6792 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | 6793 IFF_TX_SKB_NO_LINEAR; 6794 dev->netdev_ops = &virtnet_netdev; 6795 dev->stat_ops = &virtnet_stat_ops; 6796 dev->features = NETIF_F_HIGHDMA; 6797 6798 dev->ethtool_ops = &virtnet_ethtool_ops; 6799 SET_NETDEV_DEV(dev, &vdev->dev); 6800 6801 /* Do we support "hardware" checksums? */ 6802 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { 6803 /* This opens up the world of extra features. */ 6804 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6805 if (csum) 6806 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6807 6808 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { 6809 dev->hw_features |= NETIF_F_TSO 6810 | NETIF_F_TSO_ECN | NETIF_F_TSO6; 6811 } 6812 /* Individual feature bits: what can host handle? */ 6813 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4)) 6814 dev->hw_features |= NETIF_F_TSO; 6815 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6)) 6816 dev->hw_features |= NETIF_F_TSO6; 6817 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) 6818 dev->hw_features |= NETIF_F_TSO_ECN; 6819 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO)) 6820 dev->hw_features |= NETIF_F_GSO_UDP_L4; 6821 6822 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO)) { 6823 dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; 6824 dev->hw_enc_features = dev->hw_features; 6825 } 6826 if (dev->hw_features & NETIF_F_GSO_UDP_TUNNEL && 6827 virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM)) { 6828 dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; 6829 dev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; 6830 } 6831 6832 dev->features |= NETIF_F_GSO_ROBUST; 6833 6834 if (gso) 6835 dev->features |= dev->hw_features; 6836 /* (!csum && gso) case will be fixed by register_netdev() */ 6837 } 6838 6839 /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't 6840 * need to calculate checksums for partially checksummed packets, 6841 * as they're considered valid by the upper layer. 6842 * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only 6843 * receives fully checksummed packets. The device may assist in 6844 * validating these packets' checksums, so the driver won't have to. 6845 */ 6846 dev->features |= NETIF_F_RXCSUM; 6847 6848 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || 6849 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) 6850 dev->features |= NETIF_F_GRO_HW; 6851 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) 6852 dev->hw_features |= NETIF_F_GRO_HW; 6853 6854 dev->vlan_features = dev->features; 6855 dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 6856 NETDEV_XDP_ACT_XSK_ZEROCOPY; 6857 6858 /* MTU range: 68 - 65535 */ 6859 dev->min_mtu = MIN_MTU; 6860 dev->max_mtu = MAX_MTU; 6861 6862 /* Configuration may specify what MAC to use. Otherwise random. */ 6863 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6864 u8 addr[ETH_ALEN]; 6865 6866 virtio_cread_bytes(vdev, 6867 offsetof(struct virtio_net_config, mac), 6868 addr, ETH_ALEN); 6869 eth_hw_addr_set(dev, addr); 6870 } else { 6871 eth_hw_addr_random(dev); 6872 dev_info(&vdev->dev, "Assigned random MAC address %pM\n", 6873 dev->dev_addr); 6874 } 6875 6876 /* Set up our device-specific information */ 6877 vi = netdev_priv(dev); 6878 vi->dev = dev; 6879 vi->vdev = vdev; 6880 vdev->priv = vi; 6881 6882 INIT_WORK(&vi->config_work, virtnet_config_changed_work); 6883 INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work); 6884 spin_lock_init(&vi->refill_lock); 6885 6886 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { 6887 vi->mergeable_rx_bufs = true; 6888 dev->xdp_features |= NETDEV_XDP_ACT_RX_SG; 6889 } 6890 6891 if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) 6892 vi->has_rss_hash_report = true; 6893 6894 if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) { 6895 vi->has_rss = true; 6896 6897 vi->rss_indir_table_size = 6898 virtio_cread16(vdev, offsetof(struct virtio_net_config, 6899 rss_max_indirection_table_length)); 6900 } 6901 vi->rss_hdr = devm_kzalloc(&vdev->dev, virtnet_rss_hdr_size(vi), GFP_KERNEL); 6902 if (!vi->rss_hdr) { 6903 err = -ENOMEM; 6904 goto free; 6905 } 6906 6907 if (vi->has_rss || vi->has_rss_hash_report) { 6908 vi->rss_key_size = 6909 virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); 6910 if (vi->rss_key_size > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 6911 dev_err(&vdev->dev, "rss_max_key_size=%u exceeds the limit %u.\n", 6912 vi->rss_key_size, VIRTIO_NET_RSS_MAX_KEY_SIZE); 6913 err = -EINVAL; 6914 goto free; 6915 } 6916 6917 vi->rss_hash_types_supported = 6918 virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); 6919 vi->rss_hash_types_supported &= 6920 ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX | 6921 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 6922 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX); 6923 6924 dev->hw_features |= NETIF_F_RXHASH; 6925 dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops; 6926 } 6927 6928 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO) || 6929 virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO)) 6930 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash_tunnel); 6931 else if (vi->has_rss_hash_report) 6932 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); 6933 else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || 6934 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6935 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 6936 else 6937 vi->hdr_len = sizeof(struct virtio_net_hdr); 6938 6939 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM)) 6940 vi->rx_tnl_csum = true; 6941 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO)) 6942 vi->rx_tnl = true; 6943 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO)) 6944 vi->tx_tnl = true; 6945 6946 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || 6947 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6948 vi->any_header_sg = true; 6949 6950 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6951 vi->has_cvq = true; 6952 6953 mutex_init(&vi->cvq_lock); 6954 6955 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6956 mtu = virtio_cread16(vdev, 6957 offsetof(struct virtio_net_config, 6958 mtu)); 6959 if (mtu < dev->min_mtu) { 6960 /* Should never trigger: MTU was previously validated 6961 * in virtnet_validate. 6962 */ 6963 dev_err(&vdev->dev, 6964 "device MTU appears to have changed it is now %d < %d", 6965 mtu, dev->min_mtu); 6966 err = -EINVAL; 6967 goto free; 6968 } 6969 6970 dev->mtu = mtu; 6971 dev->max_mtu = mtu; 6972 } 6973 6974 virtnet_set_big_packets(vi, mtu); 6975 6976 if (vi->any_header_sg) 6977 dev->needed_headroom = vi->hdr_len; 6978 6979 /* Enable multiqueue by default */ 6980 if (num_online_cpus() >= max_queue_pairs) 6981 vi->curr_queue_pairs = max_queue_pairs; 6982 else 6983 vi->curr_queue_pairs = num_online_cpus(); 6984 vi->max_queue_pairs = max_queue_pairs; 6985 6986 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ 6987 err = init_vqs(vi); 6988 if (err) 6989 goto free; 6990 6991 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 6992 vi->intr_coal_rx.max_usecs = 0; 6993 vi->intr_coal_tx.max_usecs = 0; 6994 vi->intr_coal_rx.max_packets = 0; 6995 6996 /* Keep the default values of the coalescing parameters 6997 * aligned with the default napi_tx state. 6998 */ 6999 if (vi->sq[0].napi.weight) 7000 vi->intr_coal_tx.max_packets = 1; 7001 else 7002 vi->intr_coal_tx.max_packets = 0; 7003 } 7004 7005 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 7006 /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */ 7007 for (i = 0; i < vi->max_queue_pairs; i++) 7008 if (vi->sq[i].napi.weight) 7009 vi->sq[i].intr_coal.max_packets = 1; 7010 7011 err = virtnet_init_irq_moder(vi); 7012 if (err) 7013 goto free; 7014 } 7015 7016 #ifdef CONFIG_SYSFS 7017 if (vi->mergeable_rx_bufs) 7018 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; 7019 #endif 7020 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); 7021 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); 7022 7023 virtnet_init_settings(dev); 7024 7025 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 7026 vi->failover = net_failover_create(vi->dev); 7027 if (IS_ERR(vi->failover)) { 7028 err = PTR_ERR(vi->failover); 7029 goto free_vqs; 7030 } 7031 } 7032 7033 if (vi->has_rss || vi->has_rss_hash_report) 7034 virtnet_init_default_rss(vi); 7035 7036 enable_rx_mode_work(vi); 7037 7038 /* serialize netdev register + virtio_device_ready() with ndo_open() */ 7039 rtnl_lock(); 7040 7041 err = register_netdevice(dev); 7042 if (err) { 7043 pr_debug("virtio_net: registering device failed\n"); 7044 rtnl_unlock(); 7045 goto free_failover; 7046 } 7047 7048 /* Disable config change notification until ndo_open. */ 7049 virtio_config_driver_disable(vi->vdev); 7050 7051 virtio_device_ready(vdev); 7052 7053 if (vi->has_rss || vi->has_rss_hash_report) { 7054 if (!virtnet_commit_rss_command(vi)) { 7055 dev_warn(&vdev->dev, "RSS disabled because committing failed.\n"); 7056 dev->hw_features &= ~NETIF_F_RXHASH; 7057 vi->has_rss_hash_report = false; 7058 vi->has_rss = false; 7059 } 7060 } 7061 7062 virtnet_set_queues(vi, vi->curr_queue_pairs); 7063 7064 /* a random MAC address has been assigned, notify the device. 7065 * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there 7066 * because many devices work fine without getting MAC explicitly 7067 */ 7068 if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 7069 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 7070 struct scatterlist sg; 7071 7072 sg_init_one(&sg, dev->dev_addr, dev->addr_len); 7073 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 7074 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 7075 pr_debug("virtio_net: setting MAC address failed\n"); 7076 rtnl_unlock(); 7077 err = -EINVAL; 7078 goto free_unregister_netdev; 7079 } 7080 } 7081 7082 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) { 7083 struct virtio_net_stats_capabilities *stats_cap __free(kfree) = NULL; 7084 struct scatterlist sg; 7085 __le64 v; 7086 7087 stats_cap = kzalloc(sizeof(*stats_cap), GFP_KERNEL); 7088 if (!stats_cap) { 7089 rtnl_unlock(); 7090 err = -ENOMEM; 7091 goto free_unregister_netdev; 7092 } 7093 7094 sg_init_one(&sg, stats_cap, sizeof(*stats_cap)); 7095 7096 if (!virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 7097 VIRTIO_NET_CTRL_STATS_QUERY, 7098 NULL, &sg)) { 7099 pr_debug("virtio_net: fail to get stats capability\n"); 7100 rtnl_unlock(); 7101 err = -EINVAL; 7102 goto free_unregister_netdev; 7103 } 7104 7105 v = stats_cap->supported_stats_types[0]; 7106 vi->device_stats_cap = le64_to_cpu(v); 7107 } 7108 7109 /* Assume link up if device can't report link status, 7110 otherwise get link status from config. */ 7111 netif_carrier_off(dev); 7112 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 7113 virtio_config_changed(vi->vdev); 7114 } else { 7115 vi->status = VIRTIO_NET_S_LINK_UP; 7116 virtnet_update_settings(vi); 7117 netif_carrier_on(dev); 7118 } 7119 7120 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) { 7121 unsigned int fbit; 7122 7123 fbit = virtio_offload_to_feature(guest_offloads[i]); 7124 if (virtio_has_feature(vi->vdev, fbit)) 7125 set_bit(guest_offloads[i], &vi->guest_offloads); 7126 } 7127 vi->guest_offloads_capable = vi->guest_offloads; 7128 7129 rtnl_unlock(); 7130 7131 err = virtnet_cpu_notif_add(vi); 7132 if (err) { 7133 pr_debug("virtio_net: registering cpu notifier failed\n"); 7134 goto free_unregister_netdev; 7135 } 7136 7137 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", 7138 dev->name, max_queue_pairs); 7139 7140 return 0; 7141 7142 free_unregister_netdev: 7143 unregister_netdev(dev); 7144 free_failover: 7145 net_failover_destroy(vi->failover); 7146 free_vqs: 7147 virtio_reset_device(vdev); 7148 cancel_delayed_work_sync(&vi->refill); 7149 free_receive_page_frags(vi); 7150 virtnet_del_vqs(vi); 7151 free: 7152 free_netdev(dev); 7153 return err; 7154 } 7155 7156 static void remove_vq_common(struct virtnet_info *vi) 7157 { 7158 int i; 7159 7160 virtio_reset_device(vi->vdev); 7161 7162 /* Free unused buffers in both send and recv, if any. */ 7163 free_unused_bufs(vi); 7164 7165 /* 7166 * Rule of thumb is netdev_tx_reset_queue() should follow any 7167 * skb freeing not followed by netdev_tx_completed_queue() 7168 */ 7169 for (i = 0; i < vi->max_queue_pairs; i++) 7170 netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, i)); 7171 7172 free_receive_bufs(vi); 7173 7174 free_receive_page_frags(vi); 7175 7176 virtnet_del_vqs(vi); 7177 } 7178 7179 static void virtnet_remove(struct virtio_device *vdev) 7180 { 7181 struct virtnet_info *vi = vdev->priv; 7182 7183 virtnet_cpu_notif_remove(vi); 7184 7185 /* Make sure no work handler is accessing the device. */ 7186 flush_work(&vi->config_work); 7187 disable_rx_mode_work(vi); 7188 flush_work(&vi->rx_mode_work); 7189 7190 virtnet_free_irq_moder(vi); 7191 7192 unregister_netdev(vi->dev); 7193 7194 net_failover_destroy(vi->failover); 7195 7196 remove_vq_common(vi); 7197 7198 free_netdev(vi->dev); 7199 } 7200 7201 static __maybe_unused int virtnet_freeze(struct virtio_device *vdev) 7202 { 7203 struct virtnet_info *vi = vdev->priv; 7204 7205 virtnet_cpu_notif_remove(vi); 7206 virtnet_freeze_down(vdev); 7207 remove_vq_common(vi); 7208 7209 return 0; 7210 } 7211 7212 static __maybe_unused int virtnet_restore(struct virtio_device *vdev) 7213 { 7214 struct virtnet_info *vi = vdev->priv; 7215 int err; 7216 7217 err = virtnet_restore_up(vdev); 7218 if (err) 7219 return err; 7220 virtnet_set_queues(vi, vi->curr_queue_pairs); 7221 7222 err = virtnet_cpu_notif_add(vi); 7223 if (err) { 7224 virtnet_freeze_down(vdev); 7225 remove_vq_common(vi); 7226 return err; 7227 } 7228 7229 return 0; 7230 } 7231 7232 static struct virtio_device_id id_table[] = { 7233 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 7234 { 0 }, 7235 }; 7236 7237 #define VIRTNET_FEATURES \ 7238 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ 7239 VIRTIO_NET_F_MAC, \ 7240 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ 7241 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ 7242 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ 7243 VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \ 7244 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ 7245 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ 7246 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ 7247 VIRTIO_NET_F_CTRL_MAC_ADDR, \ 7248 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ 7249 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ 7250 VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ 7251 VIRTIO_NET_F_VQ_NOTF_COAL, \ 7252 VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS 7253 7254 static unsigned int features[] = { 7255 VIRTNET_FEATURES, 7256 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO, 7257 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM, 7258 VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO, 7259 VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM, 7260 }; 7261 7262 static unsigned int features_legacy[] = { 7263 VIRTNET_FEATURES, 7264 VIRTIO_NET_F_GSO, 7265 VIRTIO_F_ANY_LAYOUT, 7266 }; 7267 7268 static struct virtio_driver virtio_net_driver = { 7269 .feature_table = features, 7270 .feature_table_size = ARRAY_SIZE(features), 7271 .feature_table_legacy = features_legacy, 7272 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 7273 .driver.name = KBUILD_MODNAME, 7274 .id_table = id_table, 7275 .validate = virtnet_validate, 7276 .probe = virtnet_probe, 7277 .remove = virtnet_remove, 7278 .config_changed = virtnet_config_changed, 7279 #ifdef CONFIG_PM_SLEEP 7280 .freeze = virtnet_freeze, 7281 .restore = virtnet_restore, 7282 #endif 7283 }; 7284 7285 static __init int virtio_net_driver_init(void) 7286 { 7287 int ret; 7288 7289 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online", 7290 virtnet_cpu_online, 7291 virtnet_cpu_down_prep); 7292 if (ret < 0) 7293 goto out; 7294 virtionet_online = ret; 7295 ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead", 7296 NULL, virtnet_cpu_dead); 7297 if (ret) 7298 goto err_dead; 7299 ret = register_virtio_driver(&virtio_net_driver); 7300 if (ret) 7301 goto err_virtio; 7302 return 0; 7303 err_virtio: 7304 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 7305 err_dead: 7306 cpuhp_remove_multi_state(virtionet_online); 7307 out: 7308 return ret; 7309 } 7310 module_init(virtio_net_driver_init); 7311 7312 static __exit void virtio_net_driver_exit(void) 7313 { 7314 unregister_virtio_driver(&virtio_net_driver); 7315 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 7316 cpuhp_remove_multi_state(virtionet_online); 7317 } 7318 module_exit(virtio_net_driver_exit); 7319 7320 MODULE_DEVICE_TABLE(virtio, id_table); 7321 MODULE_DESCRIPTION("Virtio network driver"); 7322 MODULE_LICENSE("GPL"); 7323