1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* A network driver using virtio. 3 * 4 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 5 */ 6 //#define DEBUG 7 #include <linux/netdevice.h> 8 #include <linux/etherdevice.h> 9 #include <linux/ethtool.h> 10 #include <linux/module.h> 11 #include <linux/virtio.h> 12 #include <linux/virtio_net.h> 13 #include <linux/bpf.h> 14 #include <linux/bpf_trace.h> 15 #include <linux/scatterlist.h> 16 #include <linux/if_vlan.h> 17 #include <linux/slab.h> 18 #include <linux/cpu.h> 19 #include <linux/average.h> 20 #include <linux/filter.h> 21 #include <linux/kernel.h> 22 #include <linux/dim.h> 23 #include <net/route.h> 24 #include <net/xdp.h> 25 #include <net/net_failover.h> 26 #include <net/netdev_rx_queue.h> 27 #include <net/netdev_queues.h> 28 #include <net/xdp_sock_drv.h> 29 #include <net/page_pool/helpers.h> 30 31 static int napi_weight = NAPI_POLL_WEIGHT; 32 module_param(napi_weight, int, 0444); 33 34 static bool csum = true, gso = true, napi_tx = true; 35 module_param(csum, bool, 0444); 36 module_param(gso, bool, 0444); 37 module_param(napi_tx, bool, 0644); 38 39 #define VIRTIO_OFFLOAD_MAP_MIN 46 40 #define VIRTIO_OFFLOAD_MAP_MAX 47 41 #define VIRTIO_FEATURES_MAP_MIN 65 42 #define VIRTIO_O2F_DELTA (VIRTIO_FEATURES_MAP_MIN - \ 43 VIRTIO_OFFLOAD_MAP_MIN) 44 45 static bool virtio_is_mapped_offload(unsigned int obit) 46 { 47 return obit >= VIRTIO_OFFLOAD_MAP_MIN && 48 obit <= VIRTIO_OFFLOAD_MAP_MAX; 49 } 50 51 static unsigned int virtio_offload_to_feature(unsigned int obit) 52 { 53 return virtio_is_mapped_offload(obit) ? obit + VIRTIO_O2F_DELTA : obit; 54 } 55 56 /* FIXME: MTU in config. */ 57 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) 58 #define GOOD_COPY_LEN 128 59 60 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) 61 62 /* Separating two types of XDP xmit */ 63 #define VIRTIO_XDP_TX BIT(0) 64 #define VIRTIO_XDP_REDIR BIT(1) 65 66 /* RX packet size EWMA. The average packet size is used to determine the packet 67 * buffer size when refilling RX rings. As the entire RX ring may be refilled 68 * at once, the weight is chosen so that the EWMA will be insensitive to short- 69 * term, transient changes in packet size. 70 */ 71 DECLARE_EWMA(pkt_len, 0, 64) 72 73 #define VIRTNET_DRIVER_VERSION "1.0.0" 74 75 static const unsigned long guest_offloads[] = { 76 VIRTIO_NET_F_GUEST_TSO4, 77 VIRTIO_NET_F_GUEST_TSO6, 78 VIRTIO_NET_F_GUEST_ECN, 79 VIRTIO_NET_F_GUEST_UFO, 80 VIRTIO_NET_F_GUEST_CSUM, 81 VIRTIO_NET_F_GUEST_USO4, 82 VIRTIO_NET_F_GUEST_USO6, 83 VIRTIO_NET_F_GUEST_HDRLEN, 84 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED, 85 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED, 86 }; 87 88 #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 89 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 90 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 91 (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ 92 (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ 93 (1ULL << VIRTIO_NET_F_GUEST_USO6) | \ 94 (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED) | \ 95 (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED)) 96 97 struct virtnet_stat_desc { 98 char desc[ETH_GSTRING_LEN]; 99 size_t offset; 100 size_t qstat_offset; 101 }; 102 103 struct virtnet_sq_free_stats { 104 u64 packets; 105 u64 bytes; 106 u64 napi_packets; 107 u64 napi_bytes; 108 u64 xsk; 109 }; 110 111 struct virtnet_sq_stats { 112 struct u64_stats_sync syncp; 113 u64_stats_t packets; 114 u64_stats_t bytes; 115 u64_stats_t xdp_tx; 116 u64_stats_t xdp_tx_drops; 117 u64_stats_t kicks; 118 u64_stats_t tx_timeouts; 119 u64_stats_t stop; 120 u64_stats_t wake; 121 }; 122 123 struct virtnet_rq_stats { 124 struct u64_stats_sync syncp; 125 u64_stats_t packets; 126 u64_stats_t bytes; 127 u64_stats_t drops; 128 u64_stats_t xdp_packets; 129 u64_stats_t xdp_tx; 130 u64_stats_t xdp_redirects; 131 u64_stats_t xdp_drops; 132 u64_stats_t kicks; 133 }; 134 135 #define VIRTNET_SQ_STAT(name, m) {name, offsetof(struct virtnet_sq_stats, m), -1} 136 #define VIRTNET_RQ_STAT(name, m) {name, offsetof(struct virtnet_rq_stats, m), -1} 137 138 #define VIRTNET_SQ_STAT_QSTAT(name, m) \ 139 { \ 140 name, \ 141 offsetof(struct virtnet_sq_stats, m), \ 142 offsetof(struct netdev_queue_stats_tx, m), \ 143 } 144 145 #define VIRTNET_RQ_STAT_QSTAT(name, m) \ 146 { \ 147 name, \ 148 offsetof(struct virtnet_rq_stats, m), \ 149 offsetof(struct netdev_queue_stats_rx, m), \ 150 } 151 152 static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { 153 VIRTNET_SQ_STAT("xdp_tx", xdp_tx), 154 VIRTNET_SQ_STAT("xdp_tx_drops", xdp_tx_drops), 155 VIRTNET_SQ_STAT("kicks", kicks), 156 VIRTNET_SQ_STAT("tx_timeouts", tx_timeouts), 157 }; 158 159 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { 160 VIRTNET_RQ_STAT("drops", drops), 161 VIRTNET_RQ_STAT("xdp_packets", xdp_packets), 162 VIRTNET_RQ_STAT("xdp_tx", xdp_tx), 163 VIRTNET_RQ_STAT("xdp_redirects", xdp_redirects), 164 VIRTNET_RQ_STAT("xdp_drops", xdp_drops), 165 VIRTNET_RQ_STAT("kicks", kicks), 166 }; 167 168 static const struct virtnet_stat_desc virtnet_sq_stats_desc_qstat[] = { 169 VIRTNET_SQ_STAT_QSTAT("packets", packets), 170 VIRTNET_SQ_STAT_QSTAT("bytes", bytes), 171 VIRTNET_SQ_STAT_QSTAT("stop", stop), 172 VIRTNET_SQ_STAT_QSTAT("wake", wake), 173 }; 174 175 static const struct virtnet_stat_desc virtnet_rq_stats_desc_qstat[] = { 176 VIRTNET_RQ_STAT_QSTAT("packets", packets), 177 VIRTNET_RQ_STAT_QSTAT("bytes", bytes), 178 }; 179 180 #define VIRTNET_STATS_DESC_CQ(name) \ 181 {#name, offsetof(struct virtio_net_stats_cvq, name), -1} 182 183 #define VIRTNET_STATS_DESC_RX(class, name) \ 184 {#name, offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), -1} 185 186 #define VIRTNET_STATS_DESC_TX(class, name) \ 187 {#name, offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), -1} 188 189 190 static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = { 191 VIRTNET_STATS_DESC_CQ(command_num), 192 VIRTNET_STATS_DESC_CQ(ok_num), 193 }; 194 195 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = { 196 VIRTNET_STATS_DESC_RX(basic, packets), 197 VIRTNET_STATS_DESC_RX(basic, bytes), 198 199 VIRTNET_STATS_DESC_RX(basic, notifications), 200 VIRTNET_STATS_DESC_RX(basic, interrupts), 201 }; 202 203 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = { 204 VIRTNET_STATS_DESC_TX(basic, packets), 205 VIRTNET_STATS_DESC_TX(basic, bytes), 206 207 VIRTNET_STATS_DESC_TX(basic, notifications), 208 VIRTNET_STATS_DESC_TX(basic, interrupts), 209 }; 210 211 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = { 212 VIRTNET_STATS_DESC_RX(csum, needs_csum), 213 }; 214 215 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = { 216 VIRTNET_STATS_DESC_TX(gso, gso_packets_noseg), 217 VIRTNET_STATS_DESC_TX(gso, gso_bytes_noseg), 218 }; 219 220 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = { 221 VIRTNET_STATS_DESC_RX(speed, ratelimit_bytes), 222 }; 223 224 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = { 225 VIRTNET_STATS_DESC_TX(speed, ratelimit_bytes), 226 }; 227 228 #define VIRTNET_STATS_DESC_RX_QSTAT(class, name, qstat_field) \ 229 { \ 230 #name, \ 231 offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), \ 232 offsetof(struct netdev_queue_stats_rx, qstat_field), \ 233 } 234 235 #define VIRTNET_STATS_DESC_TX_QSTAT(class, name, qstat_field) \ 236 { \ 237 #name, \ 238 offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), \ 239 offsetof(struct netdev_queue_stats_tx, qstat_field), \ 240 } 241 242 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc_qstat[] = { 243 VIRTNET_STATS_DESC_RX_QSTAT(basic, drops, hw_drops), 244 VIRTNET_STATS_DESC_RX_QSTAT(basic, drop_overruns, hw_drop_overruns), 245 }; 246 247 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc_qstat[] = { 248 VIRTNET_STATS_DESC_TX_QSTAT(basic, drops, hw_drops), 249 VIRTNET_STATS_DESC_TX_QSTAT(basic, drop_malformed, hw_drop_errors), 250 }; 251 252 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc_qstat[] = { 253 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_valid, csum_unnecessary), 254 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_none, csum_none), 255 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_bad, csum_bad), 256 }; 257 258 static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc_qstat[] = { 259 VIRTNET_STATS_DESC_TX_QSTAT(csum, csum_none, csum_none), 260 VIRTNET_STATS_DESC_TX_QSTAT(csum, needs_csum, needs_csum), 261 }; 262 263 static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc_qstat[] = { 264 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets, hw_gro_packets), 265 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes, hw_gro_bytes), 266 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets_coalesced, hw_gro_wire_packets), 267 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes_coalesced, hw_gro_wire_bytes), 268 }; 269 270 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc_qstat[] = { 271 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_packets, hw_gso_packets), 272 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_bytes, hw_gso_bytes), 273 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments, hw_gso_wire_packets), 274 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments_bytes, hw_gso_wire_bytes), 275 }; 276 277 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc_qstat[] = { 278 VIRTNET_STATS_DESC_RX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 279 }; 280 281 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc_qstat[] = { 282 VIRTNET_STATS_DESC_TX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 283 }; 284 285 #define VIRTNET_Q_TYPE_RX 0 286 #define VIRTNET_Q_TYPE_TX 1 287 #define VIRTNET_Q_TYPE_CQ 2 288 289 struct virtnet_interrupt_coalesce { 290 u32 max_packets; 291 u32 max_usecs; 292 }; 293 294 /* Internal representation of a send virtqueue */ 295 struct send_queue { 296 /* Virtqueue associated with this send _queue */ 297 struct virtqueue *vq; 298 299 /* TX: fragments + linear part + virtio header */ 300 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 301 302 /* Name of the send queue: output.$index */ 303 char name[16]; 304 305 struct virtnet_sq_stats stats; 306 307 struct virtnet_interrupt_coalesce intr_coal; 308 309 struct napi_struct napi; 310 311 /* Record whether sq is in reset state. */ 312 bool reset; 313 314 struct xsk_buff_pool *xsk_pool; 315 316 dma_addr_t xsk_hdr_dma_addr; 317 }; 318 319 /* Internal representation of a receive virtqueue */ 320 struct receive_queue { 321 /* Virtqueue associated with this receive_queue */ 322 struct virtqueue *vq; 323 324 struct napi_struct napi; 325 326 struct bpf_prog __rcu *xdp_prog; 327 328 struct virtnet_rq_stats stats; 329 330 /* The number of rx notifications */ 331 u16 calls; 332 333 /* Is dynamic interrupt moderation enabled? */ 334 bool dim_enabled; 335 336 /* Used to protect dim_enabled and inter_coal */ 337 struct mutex dim_lock; 338 339 /* Dynamic Interrupt Moderation */ 340 struct dim dim; 341 342 u32 packets_in_napi; 343 344 struct virtnet_interrupt_coalesce intr_coal; 345 346 /* Chain pages by the private ptr. */ 347 struct page *pages; 348 349 /* Average packet length for mergeable receive buffers. */ 350 struct ewma_pkt_len mrg_avg_pkt_len; 351 352 struct page_pool *page_pool; 353 354 /* True if page_pool handles DMA mapping via PP_FLAG_DMA_MAP */ 355 bool use_page_pool_dma; 356 357 /* RX: fragments + linear part + virtio header */ 358 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 359 360 /* Min single buffer size for mergeable buffers case. */ 361 unsigned int min_buf_len; 362 363 /* Name of this receive queue: input.$index */ 364 char name[16]; 365 366 struct xdp_rxq_info xdp_rxq; 367 368 struct xsk_buff_pool *xsk_pool; 369 370 /* xdp rxq used by xsk */ 371 struct xdp_rxq_info xsk_rxq_info; 372 373 struct xdp_buff **xsk_buffs; 374 }; 375 376 #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 377 378 /* Control VQ buffers: protected by the rtnl lock */ 379 struct control_buf { 380 struct virtio_net_ctrl_hdr hdr; 381 virtio_net_ctrl_ack status; 382 }; 383 384 struct virtnet_info { 385 struct virtio_device *vdev; 386 struct virtqueue *cvq; 387 struct net_device *dev; 388 struct send_queue *sq; 389 struct receive_queue *rq; 390 unsigned int status; 391 392 /* Max # of queue pairs supported by the device */ 393 u16 max_queue_pairs; 394 395 /* # of queue pairs currently used by the driver */ 396 u16 curr_queue_pairs; 397 398 /* # of XDP queue pairs currently used by the driver */ 399 u16 xdp_queue_pairs; 400 401 /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ 402 bool xdp_enabled; 403 404 /* I like... big packets and I cannot lie! */ 405 bool big_packets; 406 407 /* number of sg entries allocated for big packets */ 408 unsigned int big_packets_num_skbfrags; 409 410 /* Host will merge rx buffers for big packets (shake it! shake it!) */ 411 bool mergeable_rx_bufs; 412 413 /* Host supports rss and/or hash report */ 414 bool has_rss; 415 bool has_rss_hash_report; 416 u8 rss_key_size; 417 u16 rss_indir_table_size; 418 u32 rss_hash_types_supported; 419 u32 rss_hash_types_saved; 420 421 /* Has control virtqueue */ 422 bool has_cvq; 423 424 /* Lock to protect the control VQ */ 425 struct mutex cvq_lock; 426 427 /* Host can handle any s/g split between our header and packet data */ 428 bool any_header_sg; 429 430 /* Packet virtio header size */ 431 u8 hdr_len; 432 433 /* UDP tunnel support */ 434 bool tx_tnl; 435 436 bool rx_tnl; 437 438 bool rx_tnl_csum; 439 440 /* Work struct for config space updates */ 441 struct work_struct config_work; 442 443 /* Work struct for setting rx mode */ 444 struct work_struct rx_mode_work; 445 446 /* OK to queue work setting RX mode? */ 447 bool rx_mode_work_enabled; 448 449 /* Does the affinity hint is set for virtqueues? */ 450 bool affinity_hint_set; 451 452 /* CPU hotplug instances for online & dead */ 453 struct hlist_node node; 454 struct hlist_node node_dead; 455 456 struct control_buf *ctrl; 457 458 /* Ethtool settings */ 459 u8 duplex; 460 u32 speed; 461 462 /* Is rx dynamic interrupt moderation enabled? */ 463 bool rx_dim_enabled; 464 465 /* Interrupt coalescing settings */ 466 struct virtnet_interrupt_coalesce intr_coal_tx; 467 struct virtnet_interrupt_coalesce intr_coal_rx; 468 469 unsigned long guest_offloads; 470 unsigned long guest_offloads_capable; 471 472 /* failover when STANDBY feature enabled */ 473 struct failover *failover; 474 475 u64 device_stats_cap; 476 477 struct virtio_net_rss_config_hdr *rss_hdr; 478 479 /* Must be last as it ends in a flexible-array member. */ 480 TRAILING_OVERLAP(struct virtio_net_rss_config_trailer, rss_trailer, hash_key_data, 481 u8 rss_hash_key_data[VIRTIO_NET_RSS_MAX_KEY_SIZE]; 482 ); 483 }; 484 static_assert(offsetof(struct virtnet_info, rss_trailer.hash_key_data) == 485 offsetof(struct virtnet_info, rss_hash_key_data)); 486 487 struct padded_vnet_hdr { 488 struct virtio_net_hdr_v1_hash hdr; 489 /* 490 * hdr is in a separate sg buffer, and data sg buffer shares same page 491 * with this header sg. This padding makes next sg 16 byte aligned 492 * after the header. 493 */ 494 char padding[12]; 495 }; 496 497 struct virtio_net_common_hdr { 498 union { 499 struct virtio_net_hdr hdr; 500 struct virtio_net_hdr_mrg_rxbuf mrg_hdr; 501 struct virtio_net_hdr_v1_hash hash_v1_hdr; 502 struct virtio_net_hdr_v1_hash_tunnel tnl_hdr; 503 }; 504 }; 505 506 static struct virtio_net_common_hdr xsk_hdr; 507 508 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); 509 static void virtnet_sq_free_unused_buf_done(struct virtqueue *vq); 510 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 511 struct net_device *dev, 512 unsigned int *xdp_xmit, 513 struct virtnet_rq_stats *stats); 514 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 515 struct sk_buff *skb, u8 flags); 516 static struct sk_buff *virtnet_skb_append_frag(struct receive_queue *rq, 517 struct sk_buff *head_skb, 518 struct sk_buff *curr_skb, 519 struct page *page, void *buf, 520 int len, int truesize); 521 static void virtnet_xsk_completed(struct send_queue *sq, int num); 522 static void free_unused_bufs(struct virtnet_info *vi); 523 static void virtnet_del_vqs(struct virtnet_info *vi); 524 525 enum virtnet_xmit_type { 526 VIRTNET_XMIT_TYPE_SKB, 527 VIRTNET_XMIT_TYPE_SKB_ORPHAN, 528 VIRTNET_XMIT_TYPE_XDP, 529 VIRTNET_XMIT_TYPE_XSK, 530 }; 531 532 static size_t virtnet_rss_hdr_size(const struct virtnet_info *vi) 533 { 534 u16 indir_table_size = vi->has_rss ? vi->rss_indir_table_size : 1; 535 536 return struct_size(vi->rss_hdr, indirection_table, indir_table_size); 537 } 538 539 static size_t virtnet_rss_trailer_size(const struct virtnet_info *vi) 540 { 541 return struct_size(&vi->rss_trailer, hash_key_data, vi->rss_key_size); 542 } 543 544 /* We use the last two bits of the pointer to distinguish the xmit type. */ 545 #define VIRTNET_XMIT_TYPE_MASK (BIT(0) | BIT(1)) 546 547 #define VIRTIO_XSK_FLAG_OFFSET 2 548 549 static enum virtnet_xmit_type virtnet_xmit_ptr_unpack(void **ptr) 550 { 551 unsigned long p = (unsigned long)*ptr; 552 553 *ptr = (void *)(p & ~VIRTNET_XMIT_TYPE_MASK); 554 555 return p & VIRTNET_XMIT_TYPE_MASK; 556 } 557 558 static void *virtnet_xmit_ptr_pack(void *ptr, enum virtnet_xmit_type type) 559 { 560 return (void *)((unsigned long)ptr | type); 561 } 562 563 static int virtnet_add_outbuf(struct send_queue *sq, int num, void *data, 564 enum virtnet_xmit_type type) 565 { 566 return virtqueue_add_outbuf(sq->vq, sq->sg, num, 567 virtnet_xmit_ptr_pack(data, type), 568 GFP_ATOMIC); 569 } 570 571 static u32 virtnet_ptr_to_xsk_buff_len(void *ptr) 572 { 573 return ((unsigned long)ptr) >> VIRTIO_XSK_FLAG_OFFSET; 574 } 575 576 static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len) 577 { 578 sg_dma_address(sg) = addr; 579 sg_dma_len(sg) = len; 580 } 581 582 static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 583 bool in_napi, struct virtnet_sq_free_stats *stats) 584 { 585 struct xdp_frame *frame; 586 struct sk_buff *skb; 587 unsigned int len; 588 void *ptr; 589 590 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 591 switch (virtnet_xmit_ptr_unpack(&ptr)) { 592 case VIRTNET_XMIT_TYPE_SKB: 593 skb = ptr; 594 595 pr_debug("Sent skb %p\n", skb); 596 stats->napi_packets++; 597 stats->napi_bytes += skb->len; 598 napi_consume_skb(skb, in_napi); 599 break; 600 601 case VIRTNET_XMIT_TYPE_SKB_ORPHAN: 602 skb = ptr; 603 604 stats->packets++; 605 stats->bytes += skb->len; 606 napi_consume_skb(skb, in_napi); 607 break; 608 609 case VIRTNET_XMIT_TYPE_XDP: 610 frame = ptr; 611 612 stats->packets++; 613 stats->bytes += xdp_get_frame_len(frame); 614 xdp_return_frame(frame); 615 break; 616 617 case VIRTNET_XMIT_TYPE_XSK: 618 stats->bytes += virtnet_ptr_to_xsk_buff_len(ptr); 619 stats->xsk++; 620 break; 621 } 622 } 623 netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes); 624 } 625 626 static void virtnet_free_old_xmit(struct send_queue *sq, 627 struct netdev_queue *txq, 628 bool in_napi, 629 struct virtnet_sq_free_stats *stats) 630 { 631 __free_old_xmit(sq, txq, in_napi, stats); 632 633 if (stats->xsk) 634 virtnet_xsk_completed(sq, stats->xsk); 635 } 636 637 /* Converting between virtqueue no. and kernel tx/rx queue no. 638 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq 639 */ 640 static int vq2txq(struct virtqueue *vq) 641 { 642 return (vq->index - 1) / 2; 643 } 644 645 static int txq2vq(int txq) 646 { 647 return txq * 2 + 1; 648 } 649 650 static int vq2rxq(struct virtqueue *vq) 651 { 652 return vq->index / 2; 653 } 654 655 static int rxq2vq(int rxq) 656 { 657 return rxq * 2; 658 } 659 660 static int vq_type(struct virtnet_info *vi, int qid) 661 { 662 if (qid == vi->max_queue_pairs * 2) 663 return VIRTNET_Q_TYPE_CQ; 664 665 if (qid % 2) 666 return VIRTNET_Q_TYPE_TX; 667 668 return VIRTNET_Q_TYPE_RX; 669 } 670 671 static inline struct virtio_net_common_hdr * 672 skb_vnet_common_hdr(struct sk_buff *skb) 673 { 674 return (struct virtio_net_common_hdr *)skb->cb; 675 } 676 677 /* 678 * private is used to chain pages for big packets, put the whole 679 * most recent used list in the beginning for reuse 680 */ 681 static void give_pages(struct receive_queue *rq, struct page *page) 682 { 683 struct page *end; 684 685 /* Find end of list, sew whole thing into vi->rq.pages. */ 686 for (end = page; end->private; end = (struct page *)end->private); 687 end->private = (unsigned long)rq->pages; 688 rq->pages = page; 689 } 690 691 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) 692 { 693 struct page *p = rq->pages; 694 695 if (p) { 696 rq->pages = (struct page *)p->private; 697 /* clear private here, it is used to chain pages */ 698 p->private = 0; 699 } else 700 p = alloc_page(gfp_mask); 701 return p; 702 } 703 704 static void virtnet_rq_free_buf(struct virtnet_info *vi, 705 struct receive_queue *rq, void *buf) 706 { 707 if (!rq->page_pool) 708 give_pages(rq, buf); 709 else 710 page_pool_put_page(rq->page_pool, virt_to_head_page(buf), -1, false); 711 } 712 713 static void enable_rx_mode_work(struct virtnet_info *vi) 714 { 715 rtnl_lock(); 716 vi->rx_mode_work_enabled = true; 717 rtnl_unlock(); 718 } 719 720 static void disable_rx_mode_work(struct virtnet_info *vi) 721 { 722 rtnl_lock(); 723 vi->rx_mode_work_enabled = false; 724 rtnl_unlock(); 725 } 726 727 static void virtqueue_napi_schedule(struct napi_struct *napi, 728 struct virtqueue *vq) 729 { 730 if (napi_schedule_prep(napi)) { 731 virtqueue_disable_cb(vq); 732 __napi_schedule(napi); 733 } 734 } 735 736 static bool virtqueue_napi_complete(struct napi_struct *napi, 737 struct virtqueue *vq, int processed) 738 { 739 int opaque; 740 741 opaque = virtqueue_enable_cb_prepare(vq); 742 if (napi_complete_done(napi, processed)) { 743 if (unlikely(virtqueue_poll(vq, opaque))) 744 virtqueue_napi_schedule(napi, vq); 745 else 746 return true; 747 } else { 748 virtqueue_disable_cb(vq); 749 } 750 751 return false; 752 } 753 754 static void virtnet_tx_wake_queue(struct virtnet_info *vi, 755 struct send_queue *sq) 756 { 757 unsigned int index = vq2txq(sq->vq); 758 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); 759 760 if (netif_tx_queue_stopped(txq)) { 761 u64_stats_update_begin(&sq->stats.syncp); 762 u64_stats_inc(&sq->stats.wake); 763 u64_stats_update_end(&sq->stats.syncp); 764 netif_tx_wake_queue(txq); 765 } 766 } 767 768 static void skb_xmit_done(struct virtqueue *vq) 769 { 770 struct virtnet_info *vi = vq->vdev->priv; 771 unsigned int index = vq2txq(vq); 772 struct send_queue *sq = &vi->sq[index]; 773 struct napi_struct *napi = &sq->napi; 774 775 /* Suppress further interrupts. */ 776 virtqueue_disable_cb(vq); 777 778 if (napi->weight) 779 virtqueue_napi_schedule(napi, vq); 780 else 781 virtnet_tx_wake_queue(vi, sq); 782 } 783 784 #define MRG_CTX_HEADER_SHIFT 22 785 static void *mergeable_len_to_ctx(unsigned int truesize, 786 unsigned int headroom) 787 { 788 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize); 789 } 790 791 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx) 792 { 793 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT; 794 } 795 796 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) 797 { 798 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); 799 } 800 801 static int check_mergeable_len(struct net_device *dev, void *mrg_ctx, 802 unsigned int len) 803 { 804 unsigned int headroom, tailroom, room, truesize; 805 806 truesize = mergeable_ctx_to_truesize(mrg_ctx); 807 headroom = mergeable_ctx_to_headroom(mrg_ctx); 808 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 809 room = SKB_DATA_ALIGN(headroom + tailroom); 810 811 if (len > truesize - room) { 812 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 813 dev->name, len, (unsigned long)(truesize - room)); 814 DEV_STATS_INC(dev, rx_length_errors); 815 return -1; 816 } 817 818 return 0; 819 } 820 821 static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, 822 unsigned int headroom, 823 unsigned int len) 824 { 825 struct sk_buff *skb; 826 827 skb = build_skb(buf, buflen); 828 if (unlikely(!skb)) 829 return NULL; 830 831 skb_reserve(skb, headroom); 832 skb_put(skb, len); 833 834 return skb; 835 } 836 837 /* Called from bottom half context */ 838 static struct sk_buff *page_to_skb(struct virtnet_info *vi, 839 struct receive_queue *rq, 840 struct page *page, unsigned int offset, 841 unsigned int len, unsigned int truesize, 842 unsigned int headroom) 843 { 844 struct sk_buff *skb; 845 struct virtio_net_common_hdr *hdr; 846 unsigned int copy, hdr_len, hdr_padded_len; 847 struct page *page_to_free = NULL; 848 int tailroom, shinfo_size; 849 char *p, *hdr_p, *buf; 850 851 p = page_address(page) + offset; 852 hdr_p = p; 853 854 hdr_len = vi->hdr_len; 855 if (vi->mergeable_rx_bufs) 856 hdr_padded_len = hdr_len; 857 else 858 hdr_padded_len = sizeof(struct padded_vnet_hdr); 859 860 buf = p - headroom; 861 len -= hdr_len; 862 offset += hdr_padded_len; 863 p += hdr_padded_len; 864 tailroom = truesize - headroom - hdr_padded_len - len; 865 866 shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 867 868 if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) { 869 skb = virtnet_build_skb(buf, truesize, p - buf, len); 870 if (unlikely(!skb)) 871 return NULL; 872 /* Big packets mode chains pages via page->private, which is 873 * incompatible with the way page_pool uses page->private. 874 * Currently, big packets mode doesn't use page pools. 875 */ 876 if (!rq->page_pool) { 877 page = (struct page *)page->private; 878 if (page) 879 give_pages(rq, page); 880 } 881 882 goto ok; 883 } 884 885 /* copy small packet so we can reuse these pages for small data */ 886 skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); 887 if (unlikely(!skb)) 888 return NULL; 889 890 /* Copy all frame if it fits skb->head, otherwise 891 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. 892 */ 893 if (len <= skb_tailroom(skb)) 894 copy = len; 895 else 896 copy = ETH_HLEN; 897 skb_put_data(skb, p, copy); 898 899 len -= copy; 900 offset += copy; 901 902 if (vi->mergeable_rx_bufs) { 903 if (len) 904 skb_add_rx_frag(skb, 0, page, offset, len, truesize); 905 else 906 page_to_free = page; 907 goto ok; 908 } 909 910 BUG_ON(offset >= PAGE_SIZE); 911 while (len) { 912 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); 913 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, 914 frag_size, truesize); 915 len -= frag_size; 916 page = (struct page *)page->private; 917 offset = 0; 918 } 919 920 if (page) 921 give_pages(rq, page); 922 923 ok: 924 hdr = skb_vnet_common_hdr(skb); 925 memcpy(hdr, hdr_p, hdr_len); 926 if (page_to_free) 927 page_pool_put_page(rq->page_pool, page_to_free, -1, true); 928 929 return skb; 930 } 931 932 static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) 933 { 934 BUG_ON(!rq->page_pool); 935 936 return virtqueue_get_buf_ctx(rq->vq, len, ctx); 937 } 938 939 static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) 940 { 941 struct virtnet_info *vi = vq->vdev->priv; 942 struct receive_queue *rq; 943 int i = vq2rxq(vq); 944 945 rq = &vi->rq[i]; 946 947 if (rq->xsk_pool) { 948 xsk_buff_free((struct xdp_buff *)buf); 949 return; 950 } 951 952 virtnet_rq_free_buf(vi, rq, buf); 953 } 954 955 static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 956 bool in_napi) 957 { 958 struct virtnet_sq_free_stats stats = {0}; 959 960 virtnet_free_old_xmit(sq, txq, in_napi, &stats); 961 962 /* Avoid overhead when no packets have been processed 963 * happens when called speculatively from start_xmit. 964 */ 965 if (!stats.packets && !stats.napi_packets) 966 return; 967 968 u64_stats_update_begin(&sq->stats.syncp); 969 u64_stats_add(&sq->stats.bytes, stats.bytes + stats.napi_bytes); 970 u64_stats_add(&sq->stats.packets, stats.packets + stats.napi_packets); 971 u64_stats_update_end(&sq->stats.syncp); 972 } 973 974 static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) 975 { 976 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) 977 return false; 978 else if (q < vi->curr_queue_pairs) 979 return true; 980 else 981 return false; 982 } 983 984 static bool tx_may_stop(struct virtnet_info *vi, 985 struct net_device *dev, 986 struct send_queue *sq) 987 { 988 int qnum; 989 990 qnum = sq - vi->sq; 991 992 /* If running out of space, stop queue to avoid getting packets that we 993 * are then unable to transmit. 994 * An alternative would be to force queuing layer to requeue the skb by 995 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be 996 * returned in a normal path of operation: it means that driver is not 997 * maintaining the TX queue stop/start state properly, and causes 998 * the stack to do a non-trivial amount of useless work. 999 * Since most packets only take 1 or 2 ring slots, stopping the queue 1000 * early means 16 slots are typically wasted. 1001 */ 1002 if (sq->vq->num_free < MAX_SKB_FRAGS + 2) { 1003 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1004 1005 netif_tx_stop_queue(txq); 1006 u64_stats_update_begin(&sq->stats.syncp); 1007 u64_stats_inc(&sq->stats.stop); 1008 u64_stats_update_end(&sq->stats.syncp); 1009 1010 return true; 1011 } 1012 1013 return false; 1014 } 1015 1016 static void check_sq_full_and_disable(struct virtnet_info *vi, 1017 struct net_device *dev, 1018 struct send_queue *sq) 1019 { 1020 bool use_napi = sq->napi.weight; 1021 int qnum; 1022 1023 qnum = sq - vi->sq; 1024 1025 if (tx_may_stop(vi, dev, sq)) { 1026 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1027 1028 if (use_napi) { 1029 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 1030 virtqueue_napi_schedule(&sq->napi, sq->vq); 1031 } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { 1032 /* More just got used, free them then recheck. */ 1033 free_old_xmit(sq, txq, false); 1034 if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { 1035 netif_start_subqueue(dev, qnum); 1036 u64_stats_update_begin(&sq->stats.syncp); 1037 u64_stats_inc(&sq->stats.wake); 1038 u64_stats_update_end(&sq->stats.syncp); 1039 virtqueue_disable_cb(sq->vq); 1040 } 1041 } 1042 } 1043 } 1044 1045 /* Note that @len is the length of received data without virtio header */ 1046 static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi, 1047 struct receive_queue *rq, void *buf, 1048 u32 len, bool first_buf) 1049 { 1050 struct xdp_buff *xdp; 1051 u32 bufsize; 1052 1053 xdp = (struct xdp_buff *)buf; 1054 1055 /* In virtnet_add_recvbuf_xsk, we use part of XDP_PACKET_HEADROOM for 1056 * virtio header and ask the vhost to fill data from 1057 * hard_start + XDP_PACKET_HEADROOM - vi->hdr_len 1058 * The first buffer has virtio header so the remaining region for frame 1059 * data is 1060 * xsk_pool_get_rx_frame_size() 1061 * While other buffers than the first one do not have virtio header, so 1062 * the maximum frame data's length can be 1063 * xsk_pool_get_rx_frame_size() + vi->hdr_len 1064 */ 1065 bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool); 1066 if (!first_buf) 1067 bufsize += vi->hdr_len; 1068 1069 if (unlikely(len > bufsize)) { 1070 pr_debug("%s: rx error: len %u exceeds truesize %u\n", 1071 vi->dev->name, len, bufsize); 1072 DEV_STATS_INC(vi->dev, rx_length_errors); 1073 xsk_buff_free(xdp); 1074 return NULL; 1075 } 1076 1077 if (first_buf) { 1078 xsk_buff_set_size(xdp, len); 1079 } else { 1080 xdp_prepare_buff(xdp, xdp->data_hard_start, 1081 XDP_PACKET_HEADROOM - vi->hdr_len, len, 1); 1082 xdp->flags = 0; 1083 } 1084 1085 xsk_buff_dma_sync_for_cpu(xdp); 1086 1087 return xdp; 1088 } 1089 1090 static struct sk_buff *xsk_construct_skb(struct receive_queue *rq, 1091 struct xdp_buff *xdp) 1092 { 1093 unsigned int metasize = xdp->data - xdp->data_meta; 1094 struct sk_buff *skb; 1095 unsigned int size; 1096 1097 size = xdp->data_end - xdp->data_hard_start; 1098 skb = napi_alloc_skb(&rq->napi, size); 1099 if (unlikely(!skb)) { 1100 xsk_buff_free(xdp); 1101 return NULL; 1102 } 1103 1104 skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); 1105 1106 size = xdp->data_end - xdp->data_meta; 1107 memcpy(__skb_put(skb, size), xdp->data_meta, size); 1108 1109 if (metasize) { 1110 __skb_pull(skb, metasize); 1111 skb_metadata_set(skb, metasize); 1112 } 1113 1114 xsk_buff_free(xdp); 1115 1116 return skb; 1117 } 1118 1119 static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi, 1120 struct receive_queue *rq, struct xdp_buff *xdp, 1121 unsigned int *xdp_xmit, 1122 struct virtnet_rq_stats *stats) 1123 { 1124 struct bpf_prog *prog; 1125 u32 ret; 1126 1127 ret = XDP_PASS; 1128 rcu_read_lock(); 1129 prog = rcu_dereference(rq->xdp_prog); 1130 if (prog) 1131 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); 1132 rcu_read_unlock(); 1133 1134 switch (ret) { 1135 case XDP_PASS: 1136 return xsk_construct_skb(rq, xdp); 1137 1138 case XDP_TX: 1139 case XDP_REDIRECT: 1140 return NULL; 1141 1142 default: 1143 /* drop packet */ 1144 xsk_buff_free(xdp); 1145 u64_stats_inc(&stats->drops); 1146 return NULL; 1147 } 1148 } 1149 1150 static void xsk_drop_follow_bufs(struct net_device *dev, 1151 struct receive_queue *rq, 1152 u32 num_buf, 1153 struct virtnet_rq_stats *stats) 1154 { 1155 struct xdp_buff *xdp; 1156 u32 len; 1157 1158 while (num_buf-- > 1) { 1159 xdp = virtqueue_get_buf(rq->vq, &len); 1160 if (unlikely(!xdp)) { 1161 pr_debug("%s: rx error: %d buffers missing\n", 1162 dev->name, num_buf); 1163 DEV_STATS_INC(dev, rx_length_errors); 1164 break; 1165 } 1166 u64_stats_add(&stats->bytes, len); 1167 xsk_buff_free(xdp); 1168 } 1169 } 1170 1171 static int xsk_append_merge_buffer(struct virtnet_info *vi, 1172 struct receive_queue *rq, 1173 struct sk_buff *head_skb, 1174 u32 num_buf, 1175 struct virtio_net_hdr_mrg_rxbuf *hdr, 1176 struct virtnet_rq_stats *stats) 1177 { 1178 struct sk_buff *curr_skb; 1179 struct xdp_buff *xdp; 1180 u32 len, truesize; 1181 struct page *page; 1182 void *buf; 1183 1184 curr_skb = head_skb; 1185 1186 while (--num_buf) { 1187 buf = virtqueue_get_buf(rq->vq, &len); 1188 if (unlikely(!buf)) { 1189 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1190 vi->dev->name, num_buf, 1191 virtio16_to_cpu(vi->vdev, 1192 hdr->num_buffers)); 1193 DEV_STATS_INC(vi->dev, rx_length_errors); 1194 return -EINVAL; 1195 } 1196 1197 u64_stats_add(&stats->bytes, len); 1198 1199 xdp = buf_to_xdp(vi, rq, buf, len, false); 1200 if (!xdp) 1201 goto err; 1202 1203 buf = napi_alloc_frag(len); 1204 if (!buf) { 1205 xsk_buff_free(xdp); 1206 goto err; 1207 } 1208 1209 memcpy(buf, xdp->data, len); 1210 1211 xsk_buff_free(xdp); 1212 1213 page = virt_to_page(buf); 1214 1215 truesize = len; 1216 1217 curr_skb = virtnet_skb_append_frag(rq, head_skb, curr_skb, page, 1218 buf, len, truesize); 1219 if (!curr_skb) { 1220 put_page(page); 1221 goto err; 1222 } 1223 } 1224 1225 return 0; 1226 1227 err: 1228 xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats); 1229 return -EINVAL; 1230 } 1231 1232 static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi, 1233 struct receive_queue *rq, struct xdp_buff *xdp, 1234 unsigned int *xdp_xmit, 1235 struct virtnet_rq_stats *stats) 1236 { 1237 struct virtio_net_hdr_mrg_rxbuf *hdr; 1238 struct bpf_prog *prog; 1239 struct sk_buff *skb; 1240 u32 ret, num_buf; 1241 1242 hdr = xdp->data - vi->hdr_len; 1243 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 1244 1245 ret = XDP_PASS; 1246 rcu_read_lock(); 1247 prog = rcu_dereference(rq->xdp_prog); 1248 if (prog) { 1249 /* TODO: support multi buffer. */ 1250 if (num_buf == 1) 1251 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, 1252 stats); 1253 else 1254 ret = XDP_ABORTED; 1255 } 1256 rcu_read_unlock(); 1257 1258 switch (ret) { 1259 case XDP_PASS: 1260 skb = xsk_construct_skb(rq, xdp); 1261 if (!skb) 1262 goto drop_bufs; 1263 1264 if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) { 1265 dev_kfree_skb(skb); 1266 goto drop; 1267 } 1268 1269 return skb; 1270 1271 case XDP_TX: 1272 case XDP_REDIRECT: 1273 return NULL; 1274 1275 default: 1276 /* drop packet */ 1277 xsk_buff_free(xdp); 1278 } 1279 1280 drop_bufs: 1281 xsk_drop_follow_bufs(dev, rq, num_buf, stats); 1282 1283 drop: 1284 u64_stats_inc(&stats->drops); 1285 return NULL; 1286 } 1287 1288 static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq, 1289 void *buf, u32 len, 1290 unsigned int *xdp_xmit, 1291 struct virtnet_rq_stats *stats) 1292 { 1293 struct net_device *dev = vi->dev; 1294 struct sk_buff *skb = NULL; 1295 struct xdp_buff *xdp; 1296 u8 flags; 1297 1298 len -= vi->hdr_len; 1299 1300 u64_stats_add(&stats->bytes, len); 1301 1302 xdp = buf_to_xdp(vi, rq, buf, len, true); 1303 if (!xdp) 1304 return; 1305 1306 if (unlikely(len < ETH_HLEN)) { 1307 pr_debug("%s: short packet %i\n", dev->name, len); 1308 DEV_STATS_INC(dev, rx_length_errors); 1309 xsk_buff_free(xdp); 1310 return; 1311 } 1312 1313 flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags; 1314 1315 if (!vi->mergeable_rx_bufs) 1316 skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats); 1317 else 1318 skb = virtnet_receive_xsk_merge(dev, vi, rq, xdp, xdp_xmit, stats); 1319 1320 if (skb) 1321 virtnet_receive_done(vi, rq, skb, flags); 1322 } 1323 1324 static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue *rq, 1325 struct xsk_buff_pool *pool, gfp_t gfp) 1326 { 1327 struct xdp_buff **xsk_buffs; 1328 dma_addr_t addr; 1329 int err = 0; 1330 u32 len, i; 1331 int num; 1332 1333 xsk_buffs = rq->xsk_buffs; 1334 1335 num = xsk_buff_alloc_batch(pool, xsk_buffs, rq->vq->num_free); 1336 if (!num) { 1337 if (xsk_uses_need_wakeup(pool)) { 1338 xsk_set_rx_need_wakeup(pool); 1339 /* Return 0 instead of -ENOMEM so that NAPI is 1340 * descheduled. 1341 */ 1342 return 0; 1343 } 1344 1345 return -ENOMEM; 1346 } else { 1347 xsk_clear_rx_need_wakeup(pool); 1348 } 1349 1350 len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len; 1351 1352 for (i = 0; i < num; ++i) { 1353 /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space. 1354 * We assume XDP_PACKET_HEADROOM is larger than hdr->len. 1355 * (see function virtnet_xsk_pool_enable) 1356 */ 1357 addr = xsk_buff_xdp_get_dma(xsk_buffs[i]) - vi->hdr_len; 1358 1359 sg_init_table(rq->sg, 1); 1360 sg_fill_dma(rq->sg, addr, len); 1361 1362 err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, 1363 xsk_buffs[i], NULL, gfp); 1364 if (err) 1365 goto err; 1366 } 1367 1368 return num; 1369 1370 err: 1371 for (; i < num; ++i) 1372 xsk_buff_free(xsk_buffs[i]); 1373 1374 return err; 1375 } 1376 1377 static void *virtnet_xsk_to_ptr(u32 len) 1378 { 1379 unsigned long p; 1380 1381 p = len << VIRTIO_XSK_FLAG_OFFSET; 1382 1383 return virtnet_xmit_ptr_pack((void *)p, VIRTNET_XMIT_TYPE_XSK); 1384 } 1385 1386 static int virtnet_xsk_xmit_one(struct send_queue *sq, 1387 struct xsk_buff_pool *pool, 1388 struct xdp_desc *desc) 1389 { 1390 struct virtnet_info *vi; 1391 dma_addr_t addr; 1392 1393 vi = sq->vq->vdev->priv; 1394 1395 addr = xsk_buff_raw_get_dma(pool, desc->addr); 1396 xsk_buff_raw_dma_sync_for_device(pool, addr, desc->len); 1397 1398 sg_init_table(sq->sg, 2); 1399 sg_fill_dma(sq->sg, sq->xsk_hdr_dma_addr, vi->hdr_len); 1400 sg_fill_dma(sq->sg + 1, addr, desc->len); 1401 1402 return virtqueue_add_outbuf_premapped(sq->vq, sq->sg, 2, 1403 virtnet_xsk_to_ptr(desc->len), 1404 GFP_ATOMIC); 1405 } 1406 1407 static int virtnet_xsk_xmit_batch(struct send_queue *sq, 1408 struct xsk_buff_pool *pool, 1409 unsigned int budget, 1410 u64 *kicks) 1411 { 1412 struct xdp_desc *descs = pool->tx_descs; 1413 bool kick = false; 1414 u32 nb_pkts, i; 1415 int err; 1416 1417 budget = min_t(u32, budget, sq->vq->num_free); 1418 1419 nb_pkts = xsk_tx_peek_release_desc_batch(pool, budget); 1420 if (!nb_pkts) 1421 return 0; 1422 1423 for (i = 0; i < nb_pkts; i++) { 1424 err = virtnet_xsk_xmit_one(sq, pool, &descs[i]); 1425 if (unlikely(err)) { 1426 xsk_tx_completed(sq->xsk_pool, nb_pkts - i); 1427 break; 1428 } 1429 1430 kick = true; 1431 } 1432 1433 if (kick && virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 1434 (*kicks)++; 1435 1436 return i; 1437 } 1438 1439 static bool virtnet_xsk_xmit(struct send_queue *sq, struct xsk_buff_pool *pool, 1440 int budget) 1441 { 1442 struct virtnet_info *vi = sq->vq->vdev->priv; 1443 struct virtnet_sq_free_stats stats = {}; 1444 struct net_device *dev = vi->dev; 1445 u64 kicks = 0; 1446 int sent; 1447 1448 /* Avoid to wakeup napi meanless, so call __free_old_xmit instead of 1449 * free_old_xmit(). 1450 */ 1451 __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), true, &stats); 1452 1453 if (stats.xsk) 1454 xsk_tx_completed(sq->xsk_pool, stats.xsk); 1455 1456 sent = virtnet_xsk_xmit_batch(sq, pool, budget, &kicks); 1457 1458 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 1459 check_sq_full_and_disable(vi, vi->dev, sq); 1460 1461 if (sent) { 1462 struct netdev_queue *txq; 1463 1464 txq = netdev_get_tx_queue(vi->dev, sq - vi->sq); 1465 txq_trans_cond_update(txq); 1466 } 1467 1468 u64_stats_update_begin(&sq->stats.syncp); 1469 u64_stats_add(&sq->stats.packets, stats.packets); 1470 u64_stats_add(&sq->stats.bytes, stats.bytes); 1471 u64_stats_add(&sq->stats.kicks, kicks); 1472 u64_stats_add(&sq->stats.xdp_tx, sent); 1473 u64_stats_update_end(&sq->stats.syncp); 1474 1475 if (xsk_uses_need_wakeup(pool)) 1476 xsk_set_tx_need_wakeup(pool); 1477 1478 return sent; 1479 } 1480 1481 static void xsk_wakeup(struct napi_struct *napi, struct virtqueue *vq) 1482 { 1483 if (napi_if_scheduled_mark_missed(napi)) 1484 return; 1485 1486 local_bh_disable(); 1487 virtqueue_napi_schedule(napi, vq); 1488 local_bh_enable(); 1489 } 1490 1491 static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag) 1492 { 1493 struct virtnet_info *vi = netdev_priv(dev); 1494 1495 if (!netif_running(dev)) 1496 return -ENETDOWN; 1497 1498 if (qid >= vi->curr_queue_pairs) 1499 return -EINVAL; 1500 1501 if (flag & XDP_WAKEUP_TX) { 1502 struct send_queue *sq = &vi->sq[qid]; 1503 1504 xsk_wakeup(&sq->napi, sq->vq); 1505 } 1506 1507 if (flag & XDP_WAKEUP_RX) { 1508 struct receive_queue *rq = &vi->rq[qid]; 1509 1510 xsk_wakeup(&rq->napi, rq->vq); 1511 } 1512 1513 return 0; 1514 } 1515 1516 static void virtnet_xsk_completed(struct send_queue *sq, int num) 1517 { 1518 xsk_tx_completed(sq->xsk_pool, num); 1519 1520 /* If this is called by rx poll, start_xmit and xdp xmit we should 1521 * wakeup the tx napi to consume the xsk tx queue, because the tx 1522 * interrupt may not be triggered. 1523 */ 1524 xsk_wakeup(&sq->napi, sq->vq); 1525 } 1526 1527 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, 1528 struct send_queue *sq, 1529 struct xdp_frame *xdpf) 1530 { 1531 struct virtio_net_hdr_mrg_rxbuf *hdr; 1532 struct skb_shared_info *shinfo; 1533 u8 nr_frags = 0; 1534 int err, i; 1535 1536 if (unlikely(xdpf->headroom < vi->hdr_len)) 1537 return -EOVERFLOW; 1538 1539 if (unlikely(xdp_frame_has_frags(xdpf))) { 1540 shinfo = xdp_get_shared_info_from_frame(xdpf); 1541 nr_frags = shinfo->nr_frags; 1542 } 1543 1544 /* In wrapping function virtnet_xdp_xmit(), we need to free 1545 * up the pending old buffers, where we need to calculate the 1546 * position of skb_shared_info in xdp_get_frame_len() and 1547 * xdp_return_frame(), which will involve to xdpf->data and 1548 * xdpf->headroom. Therefore, we need to update the value of 1549 * headroom synchronously here. 1550 */ 1551 xdpf->headroom -= vi->hdr_len; 1552 xdpf->data -= vi->hdr_len; 1553 /* Zero header and leave csum up to XDP layers */ 1554 hdr = xdpf->data; 1555 memset(hdr, 0, vi->hdr_len); 1556 xdpf->len += vi->hdr_len; 1557 1558 sg_init_table(sq->sg, nr_frags + 1); 1559 sg_set_buf(sq->sg, xdpf->data, xdpf->len); 1560 for (i = 0; i < nr_frags; i++) { 1561 skb_frag_t *frag = &shinfo->frags[i]; 1562 1563 sg_set_page(&sq->sg[i + 1], skb_frag_page(frag), 1564 skb_frag_size(frag), skb_frag_off(frag)); 1565 } 1566 1567 err = virtnet_add_outbuf(sq, nr_frags + 1, xdpf, VIRTNET_XMIT_TYPE_XDP); 1568 if (unlikely(err)) 1569 return -ENOSPC; /* Caller handle free/refcnt */ 1570 1571 return 0; 1572 } 1573 1574 /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on 1575 * the current cpu, so it does not need to be locked. 1576 * 1577 * Here we use marco instead of inline functions because we have to deal with 1578 * three issues at the same time: 1. the choice of sq. 2. judge and execute the 1579 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline 1580 * functions to perfectly solve these three problems at the same time. 1581 */ 1582 #define virtnet_xdp_get_sq(vi) ({ \ 1583 int cpu = smp_processor_id(); \ 1584 struct netdev_queue *txq; \ 1585 typeof(vi) v = (vi); \ 1586 unsigned int qp; \ 1587 \ 1588 if (v->curr_queue_pairs > nr_cpu_ids) { \ 1589 qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ 1590 qp += cpu; \ 1591 txq = netdev_get_tx_queue(v->dev, qp); \ 1592 __netif_tx_acquire(txq); \ 1593 } else { \ 1594 qp = cpu % v->curr_queue_pairs; \ 1595 txq = netdev_get_tx_queue(v->dev, qp); \ 1596 __netif_tx_lock(txq, cpu); \ 1597 } \ 1598 v->sq + qp; \ 1599 }) 1600 1601 #define virtnet_xdp_put_sq(vi, q) { \ 1602 struct netdev_queue *txq; \ 1603 typeof(vi) v = (vi); \ 1604 \ 1605 txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ 1606 if (v->curr_queue_pairs > nr_cpu_ids) \ 1607 __netif_tx_release(txq); \ 1608 else \ 1609 __netif_tx_unlock(txq); \ 1610 } 1611 1612 static int virtnet_xdp_xmit(struct net_device *dev, 1613 int n, struct xdp_frame **frames, u32 flags) 1614 { 1615 struct virtnet_info *vi = netdev_priv(dev); 1616 struct virtnet_sq_free_stats stats = {0}; 1617 struct receive_queue *rq = vi->rq; 1618 struct bpf_prog *xdp_prog; 1619 struct send_queue *sq; 1620 int nxmit = 0; 1621 int kicks = 0; 1622 int ret; 1623 int i; 1624 1625 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this 1626 * indicate XDP resources have been successfully allocated. 1627 */ 1628 xdp_prog = rcu_access_pointer(rq->xdp_prog); 1629 if (!xdp_prog) 1630 return -ENXIO; 1631 1632 sq = virtnet_xdp_get_sq(vi); 1633 1634 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { 1635 ret = -EINVAL; 1636 goto out; 1637 } 1638 1639 /* Free up any pending old buffers before queueing new ones. */ 1640 virtnet_free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), 1641 false, &stats); 1642 1643 for (i = 0; i < n; i++) { 1644 struct xdp_frame *xdpf = frames[i]; 1645 1646 if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) 1647 break; 1648 nxmit++; 1649 } 1650 ret = nxmit; 1651 1652 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 1653 check_sq_full_and_disable(vi, dev, sq); 1654 1655 if (flags & XDP_XMIT_FLUSH) { 1656 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 1657 kicks = 1; 1658 } 1659 out: 1660 u64_stats_update_begin(&sq->stats.syncp); 1661 u64_stats_add(&sq->stats.bytes, stats.bytes); 1662 u64_stats_add(&sq->stats.packets, stats.packets); 1663 u64_stats_add(&sq->stats.xdp_tx, n); 1664 u64_stats_add(&sq->stats.xdp_tx_drops, n - nxmit); 1665 u64_stats_add(&sq->stats.kicks, kicks); 1666 u64_stats_update_end(&sq->stats.syncp); 1667 1668 virtnet_xdp_put_sq(vi, sq); 1669 return ret; 1670 } 1671 1672 static void put_xdp_frags(struct receive_queue *rq, struct xdp_buff *xdp) 1673 { 1674 struct skb_shared_info *shinfo; 1675 struct page *xdp_page; 1676 int i; 1677 1678 if (xdp_buff_has_frags(xdp)) { 1679 shinfo = xdp_get_shared_info_from_buff(xdp); 1680 for (i = 0; i < shinfo->nr_frags; i++) { 1681 xdp_page = skb_frag_page(&shinfo->frags[i]); 1682 page_pool_put_page(rq->page_pool, xdp_page, -1, true); 1683 } 1684 } 1685 } 1686 1687 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 1688 struct net_device *dev, 1689 unsigned int *xdp_xmit, 1690 struct virtnet_rq_stats *stats) 1691 { 1692 struct xdp_frame *xdpf; 1693 int err; 1694 u32 act; 1695 1696 act = bpf_prog_run_xdp(xdp_prog, xdp); 1697 u64_stats_inc(&stats->xdp_packets); 1698 1699 switch (act) { 1700 case XDP_PASS: 1701 return act; 1702 1703 case XDP_TX: 1704 u64_stats_inc(&stats->xdp_tx); 1705 xdpf = xdp_convert_buff_to_frame(xdp); 1706 if (unlikely(!xdpf)) { 1707 netdev_dbg(dev, "convert buff to frame failed for xdp\n"); 1708 return XDP_DROP; 1709 } 1710 1711 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); 1712 if (unlikely(!err)) { 1713 xdp_return_frame_rx_napi(xdpf); 1714 } else if (unlikely(err < 0)) { 1715 trace_xdp_exception(dev, xdp_prog, act); 1716 return XDP_DROP; 1717 } 1718 *xdp_xmit |= VIRTIO_XDP_TX; 1719 return act; 1720 1721 case XDP_REDIRECT: 1722 u64_stats_inc(&stats->xdp_redirects); 1723 err = xdp_do_redirect(dev, xdp, xdp_prog); 1724 if (err) 1725 return XDP_DROP; 1726 1727 *xdp_xmit |= VIRTIO_XDP_REDIR; 1728 return act; 1729 1730 default: 1731 bpf_warn_invalid_xdp_action(dev, xdp_prog, act); 1732 fallthrough; 1733 case XDP_ABORTED: 1734 trace_xdp_exception(dev, xdp_prog, act); 1735 fallthrough; 1736 case XDP_DROP: 1737 return XDP_DROP; 1738 } 1739 } 1740 1741 static unsigned int virtnet_get_headroom(struct virtnet_info *vi) 1742 { 1743 return vi->xdp_enabled ? XDP_PACKET_HEADROOM : 0; 1744 } 1745 1746 /* We copy the packet for XDP in the following cases: 1747 * 1748 * 1) Packet is scattered across multiple rx buffers. 1749 * 2) Headroom space is insufficient. 1750 * 1751 * This is inefficient but it's a temporary condition that 1752 * we hit right after XDP is enabled and until queue is refilled 1753 * with large buffers with sufficient headroom - so it should affect 1754 * at most queue size packets. 1755 * Afterwards, the conditions to enable 1756 * XDP should preclude the underlying device from sending packets 1757 * across multiple buffers (num_buf > 1), and we make sure buffers 1758 * have enough headroom. 1759 */ 1760 static struct page *xdp_linearize_page(struct net_device *dev, 1761 struct receive_queue *rq, 1762 int *num_buf, 1763 struct page *p, 1764 int offset, 1765 int page_off, 1766 unsigned int *len) 1767 { 1768 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1769 struct page *page; 1770 1771 if (page_off + *len + tailroom > PAGE_SIZE) 1772 return NULL; 1773 1774 page = page_pool_alloc_pages(rq->page_pool, GFP_ATOMIC); 1775 if (!page) 1776 return NULL; 1777 1778 memcpy(page_address(page) + page_off, page_address(p) + offset, *len); 1779 page_off += *len; 1780 1781 /* Only mergeable mode can go inside this while loop. In small mode, 1782 * *num_buf == 1, so it cannot go inside. 1783 */ 1784 while (--*num_buf) { 1785 unsigned int buflen; 1786 void *buf; 1787 void *ctx; 1788 int off; 1789 1790 buf = virtnet_rq_get_buf(rq, &buflen, &ctx); 1791 if (unlikely(!buf)) 1792 goto err_buf; 1793 1794 p = virt_to_head_page(buf); 1795 off = buf - page_address(p); 1796 1797 if (rq->use_page_pool_dma) 1798 page_pool_dma_sync_for_cpu(rq->page_pool, p, 1799 off, buflen); 1800 1801 if (check_mergeable_len(dev, ctx, buflen)) { 1802 page_pool_put_page(rq->page_pool, p, -1, true); 1803 goto err_buf; 1804 } 1805 1806 /* guard against a misconfigured or uncooperative backend that 1807 * is sending packet larger than the MTU. 1808 */ 1809 if ((page_off + buflen + tailroom) > PAGE_SIZE) { 1810 page_pool_put_page(rq->page_pool, p, -1, true); 1811 goto err_buf; 1812 } 1813 1814 memcpy(page_address(page) + page_off, 1815 page_address(p) + off, buflen); 1816 page_off += buflen; 1817 page_pool_put_page(rq->page_pool, p, -1, true); 1818 } 1819 1820 /* Headroom does not contribute to packet length */ 1821 *len = page_off - XDP_PACKET_HEADROOM; 1822 return page; 1823 err_buf: 1824 page_pool_put_page(rq->page_pool, page, -1, true); 1825 return NULL; 1826 } 1827 1828 static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi, 1829 unsigned int xdp_headroom, 1830 void *buf, 1831 unsigned int len, 1832 unsigned int buflen) 1833 { 1834 unsigned int header_offset; 1835 unsigned int headroom; 1836 struct sk_buff *skb; 1837 1838 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1839 headroom = vi->hdr_len + header_offset; 1840 1841 skb = virtnet_build_skb(buf, buflen, headroom, len); 1842 if (unlikely(!skb)) 1843 return NULL; 1844 1845 buf += header_offset; 1846 memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len); 1847 1848 return skb; 1849 } 1850 1851 static struct sk_buff *receive_small_xdp(struct net_device *dev, 1852 struct virtnet_info *vi, 1853 struct receive_queue *rq, 1854 struct bpf_prog *xdp_prog, 1855 void *buf, 1856 unsigned int xdp_headroom, 1857 unsigned int len, 1858 unsigned int buflen, 1859 unsigned int *xdp_xmit, 1860 struct virtnet_rq_stats *stats) 1861 { 1862 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom; 1863 unsigned int headroom = vi->hdr_len + header_offset; 1864 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; 1865 struct page *page = virt_to_head_page(buf); 1866 struct page *xdp_page; 1867 struct xdp_buff xdp; 1868 struct sk_buff *skb; 1869 unsigned int metasize = 0; 1870 u32 act; 1871 1872 if (unlikely(hdr->hdr.gso_type)) 1873 goto err_xdp; 1874 1875 /* Partially checksummed packets must be dropped. */ 1876 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 1877 goto err_xdp; 1878 1879 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { 1880 int offset = buf - page_address(page) + header_offset; 1881 unsigned int tlen = len + vi->hdr_len; 1882 int num_buf = 1; 1883 1884 xdp_headroom = virtnet_get_headroom(vi); 1885 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1886 headroom = vi->hdr_len + header_offset; 1887 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1888 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1889 xdp_page = xdp_linearize_page(dev, rq, &num_buf, page, 1890 offset, header_offset, 1891 &tlen); 1892 if (!xdp_page) 1893 goto err_xdp; 1894 1895 buf = page_address(xdp_page); 1896 page_pool_put_page(rq->page_pool, page, -1, true); 1897 page = xdp_page; 1898 } 1899 1900 xdp_init_buff(&xdp, buflen, &rq->xdp_rxq); 1901 xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len, 1902 xdp_headroom, len, true); 1903 1904 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 1905 1906 switch (act) { 1907 case XDP_PASS: 1908 /* Recalculate length in case bpf program changed it */ 1909 len = xdp.data_end - xdp.data; 1910 metasize = xdp.data - xdp.data_meta; 1911 break; 1912 1913 case XDP_TX: 1914 case XDP_REDIRECT: 1915 goto xdp_xmit; 1916 1917 default: 1918 goto err_xdp; 1919 } 1920 1921 skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len); 1922 if (unlikely(!skb)) 1923 goto err; 1924 1925 if (metasize) 1926 skb_metadata_set(skb, metasize); 1927 1928 skb_mark_for_recycle(skb); 1929 1930 return skb; 1931 1932 err_xdp: 1933 u64_stats_inc(&stats->xdp_drops); 1934 err: 1935 u64_stats_inc(&stats->drops); 1936 page_pool_put_page(rq->page_pool, page, -1, true); 1937 xdp_xmit: 1938 return NULL; 1939 } 1940 1941 static struct sk_buff *receive_small(struct net_device *dev, 1942 struct virtnet_info *vi, 1943 struct receive_queue *rq, 1944 void *buf, void *ctx, 1945 unsigned int len, 1946 unsigned int *xdp_xmit, 1947 struct virtnet_rq_stats *stats) 1948 { 1949 unsigned int xdp_headroom = mergeable_ctx_to_headroom(ctx); 1950 unsigned int buflen = mergeable_ctx_to_truesize(ctx); 1951 struct page *page = virt_to_head_page(buf); 1952 struct sk_buff *skb; 1953 1954 /* We passed the address of virtnet header to virtio-core, 1955 * so truncate the padding. 1956 */ 1957 buf -= VIRTNET_RX_PAD + xdp_headroom; 1958 1959 if (rq->use_page_pool_dma) { 1960 int offset = buf - page_address(page) + 1961 VIRTNET_RX_PAD + xdp_headroom; 1962 1963 page_pool_dma_sync_for_cpu(rq->page_pool, page, offset, len); 1964 } 1965 1966 len -= vi->hdr_len; 1967 u64_stats_add(&stats->bytes, len); 1968 1969 if (unlikely(len > GOOD_PACKET_LEN)) { 1970 pr_debug("%s: rx error: len %u exceeds max size %d\n", 1971 dev->name, len, GOOD_PACKET_LEN); 1972 DEV_STATS_INC(dev, rx_length_errors); 1973 goto err; 1974 } 1975 1976 if (unlikely(vi->xdp_enabled)) { 1977 struct bpf_prog *xdp_prog; 1978 1979 rcu_read_lock(); 1980 xdp_prog = rcu_dereference(rq->xdp_prog); 1981 if (xdp_prog) { 1982 skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf, 1983 xdp_headroom, len, buflen, 1984 xdp_xmit, stats); 1985 rcu_read_unlock(); 1986 return skb; 1987 } 1988 rcu_read_unlock(); 1989 } 1990 1991 skb = receive_small_build_skb(vi, xdp_headroom, buf, len, buflen); 1992 if (likely(skb)) { 1993 skb_mark_for_recycle(skb); 1994 return skb; 1995 } 1996 1997 err: 1998 u64_stats_inc(&stats->drops); 1999 page_pool_put_page(rq->page_pool, page, -1, true); 2000 return NULL; 2001 } 2002 2003 static struct sk_buff *receive_big(struct net_device *dev, 2004 struct virtnet_info *vi, 2005 struct receive_queue *rq, 2006 void *buf, 2007 unsigned int len, 2008 struct virtnet_rq_stats *stats) 2009 { 2010 struct page *page = buf; 2011 struct sk_buff *skb; 2012 2013 /* Make sure that len does not exceed the size allocated in 2014 * add_recvbuf_big. 2015 */ 2016 if (unlikely(len > (vi->big_packets_num_skbfrags + 1) * PAGE_SIZE)) { 2017 pr_debug("%s: rx error: len %u exceeds allocated size %lu\n", 2018 dev->name, len, 2019 (vi->big_packets_num_skbfrags + 1) * PAGE_SIZE); 2020 goto err; 2021 } 2022 2023 skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); 2024 u64_stats_add(&stats->bytes, len - vi->hdr_len); 2025 if (unlikely(!skb)) 2026 goto err; 2027 2028 return skb; 2029 2030 err: 2031 u64_stats_inc(&stats->drops); 2032 give_pages(rq, page); 2033 return NULL; 2034 } 2035 2036 static void mergeable_buf_free(struct receive_queue *rq, int num_buf, 2037 struct net_device *dev, 2038 struct virtnet_rq_stats *stats) 2039 { 2040 struct page *page; 2041 void *buf; 2042 int len; 2043 2044 while (num_buf-- > 1) { 2045 buf = virtnet_rq_get_buf(rq, &len, NULL); 2046 if (unlikely(!buf)) { 2047 pr_debug("%s: rx error: %d buffers missing\n", 2048 dev->name, num_buf); 2049 DEV_STATS_INC(dev, rx_length_errors); 2050 break; 2051 } 2052 u64_stats_add(&stats->bytes, len); 2053 page = virt_to_head_page(buf); 2054 page_pool_put_page(rq->page_pool, page, -1, true); 2055 } 2056 } 2057 2058 /* Why not use xdp_build_skb_from_frame() ? 2059 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in 2060 * virtio-net there are 2 points that do not match its requirements: 2061 * 1. The size of the prefilled buffer is not fixed before xdp is set. 2062 * 2. xdp_build_skb_from_frame() does more checks that we don't need, 2063 * like eth_type_trans() (which virtio-net does in receive_buf()). 2064 */ 2065 static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, 2066 struct virtnet_info *vi, 2067 struct xdp_buff *xdp, 2068 unsigned int xdp_frags_truesz) 2069 { 2070 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 2071 unsigned int headroom, data_len; 2072 struct sk_buff *skb; 2073 int metasize; 2074 u8 nr_frags; 2075 2076 if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { 2077 pr_debug("Error building skb as missing reserved tailroom for xdp"); 2078 return NULL; 2079 } 2080 2081 if (unlikely(xdp_buff_has_frags(xdp))) 2082 nr_frags = sinfo->nr_frags; 2083 2084 skb = build_skb(xdp->data_hard_start, xdp->frame_sz); 2085 if (unlikely(!skb)) 2086 return NULL; 2087 2088 headroom = xdp->data - xdp->data_hard_start; 2089 data_len = xdp->data_end - xdp->data; 2090 skb_reserve(skb, headroom); 2091 __skb_put(skb, data_len); 2092 2093 metasize = xdp->data - xdp->data_meta; 2094 metasize = metasize > 0 ? metasize : 0; 2095 if (metasize) 2096 skb_metadata_set(skb, metasize); 2097 2098 if (unlikely(xdp_buff_has_frags(xdp))) 2099 xdp_update_skb_frags_info(skb, nr_frags, sinfo->xdp_frags_size, 2100 xdp_frags_truesz, 2101 xdp_buff_get_skb_flags(xdp)); 2102 2103 return skb; 2104 } 2105 2106 /* TODO: build xdp in big mode */ 2107 static int virtnet_build_xdp_buff_mrg(struct net_device *dev, 2108 struct virtnet_info *vi, 2109 struct receive_queue *rq, 2110 struct xdp_buff *xdp, 2111 void *buf, 2112 unsigned int len, 2113 unsigned int frame_sz, 2114 int *num_buf, 2115 unsigned int *xdp_frags_truesize, 2116 struct virtnet_rq_stats *stats) 2117 { 2118 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2119 struct skb_shared_info *shinfo; 2120 unsigned int xdp_frags_truesz = 0; 2121 unsigned int truesize; 2122 struct page *page; 2123 skb_frag_t *frag; 2124 int offset; 2125 void *ctx; 2126 2127 xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 2128 xdp_prepare_buff(xdp, buf - XDP_PACKET_HEADROOM, 2129 XDP_PACKET_HEADROOM + vi->hdr_len, len - vi->hdr_len, true); 2130 2131 if (!*num_buf) 2132 return 0; 2133 2134 if (*num_buf > 1) { 2135 /* If we want to build multi-buffer xdp, we need 2136 * to specify that the flags of xdp_buff have the 2137 * XDP_FLAGS_HAS_FRAG bit. 2138 */ 2139 if (!xdp_buff_has_frags(xdp)) 2140 xdp_buff_set_frags_flag(xdp); 2141 2142 shinfo = xdp_get_shared_info_from_buff(xdp); 2143 shinfo->nr_frags = 0; 2144 shinfo->xdp_frags_size = 0; 2145 } 2146 2147 if (*num_buf > MAX_SKB_FRAGS + 1) 2148 return -EINVAL; 2149 2150 while (--*num_buf > 0) { 2151 buf = virtnet_rq_get_buf(rq, &len, &ctx); 2152 if (unlikely(!buf)) { 2153 pr_debug("%s: rx error: %d buffers out of %d missing\n", 2154 dev->name, *num_buf, 2155 virtio16_to_cpu(vi->vdev, hdr->num_buffers)); 2156 DEV_STATS_INC(dev, rx_length_errors); 2157 goto err; 2158 } 2159 2160 u64_stats_add(&stats->bytes, len); 2161 page = virt_to_head_page(buf); 2162 offset = buf - page_address(page); 2163 2164 if (rq->use_page_pool_dma) 2165 page_pool_dma_sync_for_cpu(rq->page_pool, page, 2166 offset, len); 2167 2168 if (check_mergeable_len(dev, ctx, len)) { 2169 page_pool_put_page(rq->page_pool, page, -1, true); 2170 goto err; 2171 } 2172 2173 truesize = mergeable_ctx_to_truesize(ctx); 2174 xdp_frags_truesz += truesize; 2175 2176 frag = &shinfo->frags[shinfo->nr_frags++]; 2177 skb_frag_fill_page_desc(frag, page, offset, len); 2178 if (page_is_pfmemalloc(page)) 2179 xdp_buff_set_frag_pfmemalloc(xdp); 2180 2181 shinfo->xdp_frags_size += len; 2182 } 2183 2184 *xdp_frags_truesize = xdp_frags_truesz; 2185 return 0; 2186 2187 err: 2188 put_xdp_frags(rq, xdp); 2189 return -EINVAL; 2190 } 2191 2192 static void *mergeable_xdp_get_buf(struct virtnet_info *vi, 2193 struct receive_queue *rq, 2194 struct bpf_prog *xdp_prog, 2195 void *ctx, 2196 unsigned int *frame_sz, 2197 int *num_buf, 2198 struct page **page, 2199 int offset, 2200 unsigned int *len, 2201 struct virtio_net_hdr_mrg_rxbuf *hdr) 2202 { 2203 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2204 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2205 struct page *xdp_page; 2206 unsigned int xdp_room; 2207 2208 /* Transient failure which in theory could occur if 2209 * in-flight packets from before XDP was enabled reach 2210 * the receive path after XDP is loaded. 2211 */ 2212 if (unlikely(hdr->hdr.gso_type)) 2213 return NULL; 2214 2215 /* Partially checksummed packets must be dropped. */ 2216 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 2217 return NULL; 2218 2219 /* Now XDP core assumes frag size is PAGE_SIZE, but buffers 2220 * with headroom may add hole in truesize, which 2221 * make their length exceed PAGE_SIZE. So we disabled the 2222 * hole mechanism for xdp. See add_recvbuf_mergeable(). 2223 */ 2224 *frame_sz = truesize; 2225 2226 if (likely(headroom >= virtnet_get_headroom(vi) && 2227 (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) { 2228 return page_address(*page) + offset; 2229 } 2230 2231 /* This happens when headroom is not enough because 2232 * of the buffer was prefilled before XDP is set. 2233 * This should only happen for the first several packets. 2234 * In fact, vq reset can be used here to help us clean up 2235 * the prefilled buffers, but many existing devices do not 2236 * support it, and we don't want to bother users who are 2237 * using xdp normally. 2238 */ 2239 if (!xdp_prog->aux->xdp_has_frags) { 2240 /* linearize data for XDP */ 2241 xdp_page = xdp_linearize_page(vi->dev, rq, num_buf, 2242 *page, offset, 2243 XDP_PACKET_HEADROOM, 2244 len); 2245 if (!xdp_page) 2246 return NULL; 2247 } else { 2248 xdp_room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 2249 sizeof(struct skb_shared_info)); 2250 if (*len + xdp_room > PAGE_SIZE) 2251 return NULL; 2252 2253 xdp_page = page_pool_alloc_pages(rq->page_pool, GFP_ATOMIC); 2254 if (!xdp_page) 2255 return NULL; 2256 2257 memcpy(page_address(xdp_page) + XDP_PACKET_HEADROOM, 2258 page_address(*page) + offset, *len); 2259 } 2260 2261 *frame_sz = PAGE_SIZE; 2262 2263 page_pool_put_page(rq->page_pool, *page, -1, true); 2264 2265 *page = xdp_page; 2266 2267 return page_address(*page) + XDP_PACKET_HEADROOM; 2268 } 2269 2270 static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, 2271 struct virtnet_info *vi, 2272 struct receive_queue *rq, 2273 struct bpf_prog *xdp_prog, 2274 void *buf, 2275 void *ctx, 2276 unsigned int len, 2277 unsigned int *xdp_xmit, 2278 struct virtnet_rq_stats *stats) 2279 { 2280 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2281 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2282 struct page *page = virt_to_head_page(buf); 2283 int offset = buf - page_address(page); 2284 unsigned int xdp_frags_truesz = 0; 2285 struct sk_buff *head_skb; 2286 unsigned int frame_sz; 2287 struct xdp_buff xdp; 2288 void *data; 2289 u32 act; 2290 int err; 2291 2292 data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, &frame_sz, &num_buf, &page, 2293 offset, &len, hdr); 2294 if (unlikely(!data)) 2295 goto err_xdp; 2296 2297 err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz, 2298 &num_buf, &xdp_frags_truesz, stats); 2299 if (unlikely(err)) 2300 goto err_xdp; 2301 2302 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 2303 2304 switch (act) { 2305 case XDP_PASS: 2306 head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); 2307 if (unlikely(!head_skb)) 2308 break; 2309 2310 skb_mark_for_recycle(head_skb); 2311 return head_skb; 2312 2313 case XDP_TX: 2314 case XDP_REDIRECT: 2315 return NULL; 2316 2317 default: 2318 break; 2319 } 2320 2321 put_xdp_frags(rq, &xdp); 2322 2323 err_xdp: 2324 page_pool_put_page(rq->page_pool, page, -1, true); 2325 mergeable_buf_free(rq, num_buf, dev, stats); 2326 2327 u64_stats_inc(&stats->xdp_drops); 2328 u64_stats_inc(&stats->drops); 2329 return NULL; 2330 } 2331 2332 static struct sk_buff *virtnet_skb_append_frag(struct receive_queue *rq, 2333 struct sk_buff *head_skb, 2334 struct sk_buff *curr_skb, 2335 struct page *page, void *buf, 2336 int len, int truesize) 2337 { 2338 int num_skb_frags; 2339 int offset; 2340 2341 num_skb_frags = skb_shinfo(curr_skb)->nr_frags; 2342 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { 2343 struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); 2344 2345 if (unlikely(!nskb)) 2346 return NULL; 2347 2348 if (head_skb->pp_recycle) 2349 skb_mark_for_recycle(nskb); 2350 2351 if (curr_skb == head_skb) 2352 skb_shinfo(curr_skb)->frag_list = nskb; 2353 else 2354 curr_skb->next = nskb; 2355 curr_skb = nskb; 2356 head_skb->truesize += nskb->truesize; 2357 num_skb_frags = 0; 2358 } 2359 2360 if (curr_skb != head_skb) { 2361 head_skb->data_len += len; 2362 head_skb->len += len; 2363 head_skb->truesize += truesize; 2364 } 2365 2366 offset = buf - page_address(page); 2367 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { 2368 if (head_skb->pp_recycle) 2369 page_pool_put_page(rq->page_pool, page, -1, true); 2370 else 2371 put_page(page); 2372 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, 2373 len, truesize); 2374 } else { 2375 skb_add_rx_frag(curr_skb, num_skb_frags, page, 2376 offset, len, truesize); 2377 } 2378 2379 return curr_skb; 2380 } 2381 2382 static struct sk_buff *receive_mergeable(struct net_device *dev, 2383 struct virtnet_info *vi, 2384 struct receive_queue *rq, 2385 void *buf, 2386 void *ctx, 2387 unsigned int len, 2388 unsigned int *xdp_xmit, 2389 struct virtnet_rq_stats *stats) 2390 { 2391 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2392 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2393 struct page *page = virt_to_head_page(buf); 2394 int offset = buf - page_address(page); 2395 struct sk_buff *head_skb, *curr_skb; 2396 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2397 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2398 2399 head_skb = NULL; 2400 2401 if (rq->use_page_pool_dma) 2402 page_pool_dma_sync_for_cpu(rq->page_pool, page, offset, len); 2403 2404 u64_stats_add(&stats->bytes, len - vi->hdr_len); 2405 2406 if (check_mergeable_len(dev, ctx, len)) 2407 goto err_skb; 2408 2409 if (unlikely(vi->xdp_enabled)) { 2410 struct bpf_prog *xdp_prog; 2411 2412 rcu_read_lock(); 2413 xdp_prog = rcu_dereference(rq->xdp_prog); 2414 if (xdp_prog) { 2415 head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx, 2416 len, xdp_xmit, stats); 2417 rcu_read_unlock(); 2418 return head_skb; 2419 } 2420 rcu_read_unlock(); 2421 } 2422 2423 head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom); 2424 curr_skb = head_skb; 2425 2426 if (unlikely(!curr_skb)) 2427 goto err_skb; 2428 2429 skb_mark_for_recycle(head_skb); 2430 while (--num_buf) { 2431 buf = virtnet_rq_get_buf(rq, &len, &ctx); 2432 if (unlikely(!buf)) { 2433 pr_debug("%s: rx error: %d buffers out of %d missing\n", 2434 dev->name, num_buf, 2435 virtio16_to_cpu(vi->vdev, 2436 hdr->num_buffers)); 2437 DEV_STATS_INC(dev, rx_length_errors); 2438 goto err_buf; 2439 } 2440 2441 u64_stats_add(&stats->bytes, len); 2442 page = virt_to_head_page(buf); 2443 2444 if (rq->use_page_pool_dma) { 2445 offset = buf - page_address(page); 2446 page_pool_dma_sync_for_cpu(rq->page_pool, page, 2447 offset, len); 2448 } 2449 2450 if (check_mergeable_len(dev, ctx, len)) 2451 goto err_skb; 2452 2453 truesize = mergeable_ctx_to_truesize(ctx); 2454 curr_skb = virtnet_skb_append_frag(rq, head_skb, curr_skb, page, 2455 buf, len, truesize); 2456 if (!curr_skb) 2457 goto err_skb; 2458 } 2459 2460 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); 2461 return head_skb; 2462 2463 err_skb: 2464 page_pool_put_page(rq->page_pool, page, -1, true); 2465 mergeable_buf_free(rq, num_buf, dev, stats); 2466 2467 err_buf: 2468 u64_stats_inc(&stats->drops); 2469 dev_kfree_skb(head_skb); 2470 return NULL; 2471 } 2472 2473 static inline u32 2474 virtio_net_hash_value(const struct virtio_net_hdr_v1_hash *hdr_hash) 2475 { 2476 return __le16_to_cpu(hdr_hash->hash_value_lo) | 2477 (__le16_to_cpu(hdr_hash->hash_value_hi) << 16); 2478 } 2479 2480 static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, 2481 struct sk_buff *skb) 2482 { 2483 enum pkt_hash_types rss_hash_type; 2484 2485 if (!hdr_hash || !skb) 2486 return; 2487 2488 switch (__le16_to_cpu(hdr_hash->hash_report)) { 2489 case VIRTIO_NET_HASH_REPORT_TCPv4: 2490 case VIRTIO_NET_HASH_REPORT_UDPv4: 2491 case VIRTIO_NET_HASH_REPORT_TCPv6: 2492 case VIRTIO_NET_HASH_REPORT_UDPv6: 2493 case VIRTIO_NET_HASH_REPORT_TCPv6_EX: 2494 case VIRTIO_NET_HASH_REPORT_UDPv6_EX: 2495 rss_hash_type = PKT_HASH_TYPE_L4; 2496 break; 2497 case VIRTIO_NET_HASH_REPORT_IPv4: 2498 case VIRTIO_NET_HASH_REPORT_IPv6: 2499 case VIRTIO_NET_HASH_REPORT_IPv6_EX: 2500 rss_hash_type = PKT_HASH_TYPE_L3; 2501 break; 2502 case VIRTIO_NET_HASH_REPORT_NONE: 2503 default: 2504 rss_hash_type = PKT_HASH_TYPE_NONE; 2505 } 2506 skb_set_hash(skb, virtio_net_hash_value(hdr_hash), rss_hash_type); 2507 } 2508 2509 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 2510 struct sk_buff *skb, u8 flags) 2511 { 2512 struct virtio_net_common_hdr *hdr; 2513 struct net_device *dev = vi->dev; 2514 2515 hdr = skb_vnet_common_hdr(skb); 2516 if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) 2517 virtio_skb_set_hash(&hdr->hash_v1_hdr, skb); 2518 2519 hdr->hdr.flags = flags; 2520 if (virtio_net_handle_csum_offload(skb, &hdr->hdr, vi->rx_tnl_csum)) { 2521 net_warn_ratelimited("%s: bad csum: flags: %x, gso_type: %x rx_tnl_csum %d\n", 2522 dev->name, hdr->hdr.flags, 2523 hdr->hdr.gso_type, vi->rx_tnl_csum); 2524 goto frame_err; 2525 } 2526 2527 if (virtio_net_hdr_tnl_to_skb(skb, &hdr->tnl_hdr, vi->rx_tnl, 2528 vi->rx_tnl_csum, 2529 virtio_is_little_endian(vi->vdev))) { 2530 net_warn_ratelimited("%s: bad gso: type: %x, size: %u, flags %x tunnel %d tnl csum %d\n", 2531 dev->name, hdr->hdr.gso_type, 2532 hdr->hdr.gso_size, hdr->hdr.flags, 2533 vi->rx_tnl, vi->rx_tnl_csum); 2534 goto frame_err; 2535 } 2536 2537 skb_record_rx_queue(skb, vq2rxq(rq->vq)); 2538 skb->protocol = eth_type_trans(skb, dev); 2539 pr_debug("Receiving skb proto 0x%04x len %i type %i\n", 2540 ntohs(skb->protocol), skb->len, skb->pkt_type); 2541 2542 napi_gro_receive(&rq->napi, skb); 2543 return; 2544 2545 frame_err: 2546 DEV_STATS_INC(dev, rx_frame_errors); 2547 dev_kfree_skb(skb); 2548 } 2549 2550 static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, 2551 void *buf, unsigned int len, void **ctx, 2552 unsigned int *xdp_xmit, 2553 struct virtnet_rq_stats *stats) 2554 { 2555 struct net_device *dev = vi->dev; 2556 struct sk_buff *skb; 2557 u8 flags; 2558 2559 if (unlikely(len < vi->hdr_len + ETH_HLEN)) { 2560 pr_debug("%s: short packet %i\n", dev->name, len); 2561 DEV_STATS_INC(dev, rx_length_errors); 2562 virtnet_rq_free_buf(vi, rq, buf); 2563 return; 2564 } 2565 2566 /* About the flags below: 2567 * 1. Save the flags early, as the XDP program might overwrite them. 2568 * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID 2569 * stay valid after XDP processing. 2570 * 2. XDP doesn't work with partially checksummed packets (refer to 2571 * virtnet_xdp_set()), so packets marked as 2572 * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing. 2573 */ 2574 2575 if (vi->mergeable_rx_bufs) { 2576 flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; 2577 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, 2578 stats); 2579 } else if (vi->big_packets) { 2580 void *p = page_address((struct page *)buf); 2581 2582 flags = ((struct virtio_net_common_hdr *)p)->hdr.flags; 2583 skb = receive_big(dev, vi, rq, buf, len, stats); 2584 } else { 2585 flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; 2586 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); 2587 } 2588 2589 if (unlikely(!skb)) 2590 return; 2591 2592 virtnet_receive_done(vi, rq, skb, flags); 2593 } 2594 2595 static int virtnet_rq_submit(struct receive_queue *rq, char *buf, 2596 int len, void *ctx, gfp_t gfp) 2597 { 2598 if (rq->use_page_pool_dma) { 2599 struct page *page = virt_to_head_page(buf); 2600 dma_addr_t addr = page_pool_get_dma_addr(page) + 2601 (buf - (char *)page_address(page)); 2602 2603 sg_init_table(rq->sg, 1); 2604 sg_fill_dma(rq->sg, addr, len); 2605 return virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, 2606 buf, ctx, gfp); 2607 } 2608 2609 sg_init_one(rq->sg, buf, len); 2610 return virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); 2611 } 2612 2613 /* With page_pool, the actual allocation may exceed the requested size 2614 * when the remaining page fragment can't fit another buffer. Encode 2615 * the actual allocation size in ctx so build_skb() gets the correct 2616 * buflen for truesize accounting. 2617 */ 2618 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, 2619 gfp_t gfp) 2620 { 2621 unsigned int xdp_headroom = virtnet_get_headroom(vi); 2622 unsigned int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; 2623 unsigned int alloc_len; 2624 char *buf; 2625 void *ctx; 2626 int err; 2627 2628 len = SKB_DATA_ALIGN(len) + 2629 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 2630 2631 alloc_len = len; 2632 buf = page_pool_alloc_va(rq->page_pool, &alloc_len, gfp); 2633 if (unlikely(!buf)) 2634 return -ENOMEM; 2635 2636 buf += VIRTNET_RX_PAD + xdp_headroom; 2637 2638 ctx = mergeable_len_to_ctx(alloc_len, xdp_headroom); 2639 err = virtnet_rq_submit(rq, buf, vi->hdr_len + GOOD_PACKET_LEN, ctx, gfp); 2640 2641 if (err < 0) 2642 page_pool_put_page(rq->page_pool, virt_to_head_page(buf), -1, false); 2643 return err; 2644 } 2645 2646 static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, 2647 gfp_t gfp) 2648 { 2649 struct page *first, *list = NULL; 2650 char *p; 2651 int i, err, offset; 2652 2653 sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2); 2654 2655 /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */ 2656 for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) { 2657 first = get_a_page(rq, gfp); 2658 if (!first) { 2659 if (list) 2660 give_pages(rq, list); 2661 return -ENOMEM; 2662 } 2663 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE); 2664 2665 /* chain new page in list head to match sg */ 2666 first->private = (unsigned long)list; 2667 list = first; 2668 } 2669 2670 first = get_a_page(rq, gfp); 2671 if (!first) { 2672 give_pages(rq, list); 2673 return -ENOMEM; 2674 } 2675 p = page_address(first); 2676 2677 /* rq->sg[0], rq->sg[1] share the same page */ 2678 /* a separated rq->sg[0] for header - required in case !any_header_sg */ 2679 sg_set_buf(&rq->sg[0], p, vi->hdr_len); 2680 2681 /* rq->sg[1] for data packet, from offset */ 2682 offset = sizeof(struct padded_vnet_hdr); 2683 sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset); 2684 2685 /* chain first in list head */ 2686 first->private = (unsigned long)list; 2687 err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2, 2688 first, gfp); 2689 if (err < 0) 2690 give_pages(rq, first); 2691 2692 return err; 2693 } 2694 2695 static unsigned int get_mergeable_buf_len(struct receive_queue *rq, 2696 struct ewma_pkt_len *avg_pkt_len, 2697 unsigned int room) 2698 { 2699 struct virtnet_info *vi = rq->vq->vdev->priv; 2700 const size_t hdr_len = vi->hdr_len; 2701 unsigned int len; 2702 2703 if (room) 2704 return PAGE_SIZE - room; 2705 2706 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), 2707 rq->min_buf_len, PAGE_SIZE - hdr_len); 2708 2709 return ALIGN(len, L1_CACHE_BYTES); 2710 } 2711 2712 static int add_recvbuf_mergeable(struct virtnet_info *vi, 2713 struct receive_queue *rq, gfp_t gfp) 2714 { 2715 unsigned int headroom = virtnet_get_headroom(vi); 2716 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2717 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 2718 unsigned int len, alloc_len; 2719 char *buf; 2720 void *ctx; 2721 int err; 2722 2723 /* Extra tailroom is needed to satisfy XDP's assumption. This 2724 * means rx frags coalescing won't work, but consider we've 2725 * disabled GSO for XDP, it won't be a big issue. 2726 */ 2727 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); 2728 2729 alloc_len = len + room; 2730 buf = page_pool_alloc_va(rq->page_pool, &alloc_len, gfp); 2731 if (unlikely(!buf)) 2732 return -ENOMEM; 2733 2734 buf += headroom; /* advance address leaving hole at front of pkt */ 2735 2736 if (!headroom) 2737 len = alloc_len - room; 2738 2739 ctx = mergeable_len_to_ctx(len + room, headroom); 2740 2741 err = virtnet_rq_submit(rq, buf, len, ctx, gfp); 2742 2743 if (err < 0) 2744 page_pool_put_page(rq->page_pool, virt_to_head_page(buf), -1, false); 2745 return err; 2746 } 2747 2748 /* 2749 * Returns false if we couldn't fill entirely (OOM) and need to retry. 2750 * In XSK mode, it's when the receive buffer is not allocated and 2751 * xsk_use_need_wakeup is not set. 2752 * 2753 * Normally run in the receive path, but can also be run from ndo_open 2754 * before we're receiving packets, or from refill_work which is 2755 * careful to disable receiving (using napi_disable). 2756 */ 2757 static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, 2758 gfp_t gfp) 2759 { 2760 int err; 2761 2762 if (rq->xsk_pool) { 2763 err = virtnet_add_recvbuf_xsk(vi, rq, rq->xsk_pool, gfp); 2764 goto kick; 2765 } 2766 2767 do { 2768 if (vi->mergeable_rx_bufs) 2769 err = add_recvbuf_mergeable(vi, rq, gfp); 2770 else if (vi->big_packets) 2771 err = add_recvbuf_big(vi, rq, gfp); 2772 else 2773 err = add_recvbuf_small(vi, rq, gfp); 2774 2775 if (err) 2776 break; 2777 } while (rq->vq->num_free); 2778 2779 kick: 2780 if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { 2781 unsigned long flags; 2782 2783 flags = u64_stats_update_begin_irqsave(&rq->stats.syncp); 2784 u64_stats_inc(&rq->stats.kicks); 2785 u64_stats_update_end_irqrestore(&rq->stats.syncp, flags); 2786 } 2787 2788 return err != -ENOMEM; 2789 } 2790 2791 static void skb_recv_done(struct virtqueue *rvq) 2792 { 2793 struct virtnet_info *vi = rvq->vdev->priv; 2794 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)]; 2795 2796 rq->calls++; 2797 virtqueue_napi_schedule(&rq->napi, rvq); 2798 } 2799 2800 static void virtnet_napi_do_enable(struct virtqueue *vq, 2801 struct napi_struct *napi) 2802 { 2803 napi_enable(napi); 2804 2805 /* If all buffers were filled by other side before we napi_enabled, we 2806 * won't get another interrupt, so process any outstanding packets now. 2807 * Call local_bh_enable after to trigger softIRQ processing. 2808 */ 2809 local_bh_disable(); 2810 virtqueue_napi_schedule(napi, vq); 2811 local_bh_enable(); 2812 } 2813 2814 static void virtnet_napi_enable(struct receive_queue *rq) 2815 { 2816 struct virtnet_info *vi = rq->vq->vdev->priv; 2817 int qidx = vq2rxq(rq->vq); 2818 2819 virtnet_napi_do_enable(rq->vq, &rq->napi); 2820 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, &rq->napi); 2821 } 2822 2823 static void virtnet_napi_tx_enable(struct send_queue *sq) 2824 { 2825 struct virtnet_info *vi = sq->vq->vdev->priv; 2826 struct napi_struct *napi = &sq->napi; 2827 int qidx = vq2txq(sq->vq); 2828 2829 if (!napi->weight) 2830 return; 2831 2832 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only 2833 * enable the feature if this is likely affine with the transmit path. 2834 */ 2835 if (!vi->affinity_hint_set) { 2836 napi->weight = 0; 2837 return; 2838 } 2839 2840 virtnet_napi_do_enable(sq->vq, napi); 2841 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, napi); 2842 } 2843 2844 static void virtnet_napi_tx_disable(struct send_queue *sq) 2845 { 2846 struct virtnet_info *vi = sq->vq->vdev->priv; 2847 struct napi_struct *napi = &sq->napi; 2848 int qidx = vq2txq(sq->vq); 2849 2850 if (napi->weight) { 2851 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, NULL); 2852 napi_disable(napi); 2853 } 2854 } 2855 2856 static void virtnet_napi_disable(struct receive_queue *rq) 2857 { 2858 struct virtnet_info *vi = rq->vq->vdev->priv; 2859 struct napi_struct *napi = &rq->napi; 2860 int qidx = vq2rxq(rq->vq); 2861 2862 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, NULL); 2863 napi_disable(napi); 2864 } 2865 2866 static int virtnet_receive_xsk_bufs(struct virtnet_info *vi, 2867 struct receive_queue *rq, 2868 int budget, 2869 unsigned int *xdp_xmit, 2870 struct virtnet_rq_stats *stats) 2871 { 2872 unsigned int len; 2873 int packets = 0; 2874 void *buf; 2875 2876 while (packets < budget) { 2877 buf = virtqueue_get_buf(rq->vq, &len); 2878 if (!buf) 2879 break; 2880 2881 virtnet_receive_xsk_buf(vi, rq, buf, len, xdp_xmit, stats); 2882 packets++; 2883 } 2884 2885 return packets; 2886 } 2887 2888 static int virtnet_receive_packets(struct virtnet_info *vi, 2889 struct receive_queue *rq, 2890 int budget, 2891 unsigned int *xdp_xmit, 2892 struct virtnet_rq_stats *stats) 2893 { 2894 unsigned int len; 2895 int packets = 0; 2896 void *buf; 2897 2898 if (rq->page_pool) { 2899 void *ctx; 2900 while (packets < budget && 2901 (buf = virtnet_rq_get_buf(rq, &len, &ctx))) { 2902 receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats); 2903 packets++; 2904 } 2905 } else { 2906 while (packets < budget && 2907 (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { 2908 receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats); 2909 packets++; 2910 } 2911 } 2912 2913 return packets; 2914 } 2915 2916 static int virtnet_receive(struct receive_queue *rq, int budget, 2917 unsigned int *xdp_xmit) 2918 { 2919 struct virtnet_info *vi = rq->vq->vdev->priv; 2920 struct virtnet_rq_stats stats = {}; 2921 int i, packets; 2922 2923 if (rq->xsk_pool) 2924 packets = virtnet_receive_xsk_bufs(vi, rq, budget, xdp_xmit, &stats); 2925 else 2926 packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats); 2927 2928 u64_stats_set(&stats.packets, packets); 2929 if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { 2930 if (!try_fill_recv(vi, rq, GFP_ATOMIC)) 2931 /* We need to retry refilling in the next NAPI poll so 2932 * we must return budget to make sure the NAPI is 2933 * repolled. 2934 */ 2935 packets = budget; 2936 } 2937 2938 u64_stats_update_begin(&rq->stats.syncp); 2939 for (i = 0; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) { 2940 size_t offset = virtnet_rq_stats_desc[i].offset; 2941 u64_stats_t *item, *src; 2942 2943 item = (u64_stats_t *)((u8 *)&rq->stats + offset); 2944 src = (u64_stats_t *)((u8 *)&stats + offset); 2945 u64_stats_add(item, u64_stats_read(src)); 2946 } 2947 2948 u64_stats_add(&rq->stats.packets, u64_stats_read(&stats.packets)); 2949 u64_stats_add(&rq->stats.bytes, u64_stats_read(&stats.bytes)); 2950 2951 u64_stats_update_end(&rq->stats.syncp); 2952 2953 return packets; 2954 } 2955 2956 static void virtnet_poll_cleantx(struct receive_queue *rq, int budget) 2957 { 2958 struct virtnet_info *vi = rq->vq->vdev->priv; 2959 unsigned int index = vq2rxq(rq->vq); 2960 struct send_queue *sq = &vi->sq[index]; 2961 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); 2962 2963 if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) 2964 return; 2965 2966 if (__netif_tx_trylock(txq)) { 2967 if (sq->reset) { 2968 __netif_tx_unlock(txq); 2969 return; 2970 } 2971 2972 do { 2973 virtqueue_disable_cb(sq->vq); 2974 free_old_xmit(sq, txq, !!budget); 2975 } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 2976 2977 if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) 2978 virtnet_tx_wake_queue(vi, sq); 2979 2980 __netif_tx_unlock(txq); 2981 } 2982 } 2983 2984 static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue *rq) 2985 { 2986 struct dim_sample cur_sample = {}; 2987 2988 if (!rq->packets_in_napi) 2989 return; 2990 2991 /* Don't need protection when fetching stats, since fetcher and 2992 * updater of the stats are in same context 2993 */ 2994 dim_update_sample(rq->calls, 2995 u64_stats_read(&rq->stats.packets), 2996 u64_stats_read(&rq->stats.bytes), 2997 &cur_sample); 2998 2999 net_dim(&rq->dim, &cur_sample); 3000 rq->packets_in_napi = 0; 3001 } 3002 3003 static int virtnet_poll(struct napi_struct *napi, int budget) 3004 { 3005 struct receive_queue *rq = 3006 container_of(napi, struct receive_queue, napi); 3007 struct virtnet_info *vi = rq->vq->vdev->priv; 3008 struct send_queue *sq; 3009 unsigned int received; 3010 unsigned int xdp_xmit = 0; 3011 bool napi_complete; 3012 3013 virtnet_poll_cleantx(rq, budget); 3014 3015 received = virtnet_receive(rq, budget, &xdp_xmit); 3016 rq->packets_in_napi += received; 3017 3018 if (xdp_xmit & VIRTIO_XDP_REDIR) 3019 xdp_do_flush(); 3020 3021 /* Out of packets? */ 3022 if (received < budget) { 3023 napi_complete = virtqueue_napi_complete(napi, rq->vq, received); 3024 /* Intentionally not taking dim_lock here. This may result in a 3025 * spurious net_dim call. But if that happens virtnet_rx_dim_work 3026 * will not act on the scheduled work. 3027 */ 3028 if (napi_complete && rq->dim_enabled) 3029 virtnet_rx_dim_update(vi, rq); 3030 } 3031 3032 if (xdp_xmit & VIRTIO_XDP_TX) { 3033 sq = virtnet_xdp_get_sq(vi); 3034 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 3035 u64_stats_update_begin(&sq->stats.syncp); 3036 u64_stats_inc(&sq->stats.kicks); 3037 u64_stats_update_end(&sq->stats.syncp); 3038 } 3039 virtnet_xdp_put_sq(vi, sq); 3040 } 3041 3042 return received; 3043 } 3044 3045 static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index) 3046 { 3047 virtnet_napi_tx_disable(&vi->sq[qp_index]); 3048 virtnet_napi_disable(&vi->rq[qp_index]); 3049 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 3050 } 3051 3052 static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) 3053 { 3054 struct net_device *dev = vi->dev; 3055 int err; 3056 3057 err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index, 3058 vi->rq[qp_index].napi.napi_id); 3059 if (err < 0) 3060 return err; 3061 3062 err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq, 3063 vi->rq[qp_index].page_pool ? 3064 MEM_TYPE_PAGE_POOL : 3065 MEM_TYPE_PAGE_SHARED, 3066 vi->rq[qp_index].page_pool); 3067 if (err < 0) 3068 goto err_xdp_reg_mem_model; 3069 3070 virtnet_napi_enable(&vi->rq[qp_index]); 3071 virtnet_napi_tx_enable(&vi->sq[qp_index]); 3072 3073 return 0; 3074 3075 err_xdp_reg_mem_model: 3076 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 3077 return err; 3078 } 3079 3080 static void virtnet_cancel_dim(struct virtnet_info *vi, struct dim *dim) 3081 { 3082 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 3083 return; 3084 net_dim_work_cancel(dim); 3085 } 3086 3087 static void virtnet_update_settings(struct virtnet_info *vi) 3088 { 3089 u32 speed; 3090 u8 duplex; 3091 3092 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) 3093 return; 3094 3095 virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); 3096 3097 if (ethtool_validate_speed(speed)) 3098 vi->speed = speed; 3099 3100 virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); 3101 3102 if (ethtool_validate_duplex(duplex)) 3103 vi->duplex = duplex; 3104 } 3105 3106 static int virtnet_create_page_pools(struct virtnet_info *vi) 3107 { 3108 int i, err; 3109 3110 if (vi->big_packets && !vi->mergeable_rx_bufs) 3111 return 0; 3112 3113 for (i = 0; i < vi->max_queue_pairs; i++) { 3114 struct receive_queue *rq = &vi->rq[i]; 3115 struct page_pool_params pp_params = { 0 }; 3116 struct device *dma_dev; 3117 3118 if (rq->page_pool) 3119 continue; 3120 3121 if (rq->xsk_pool) 3122 continue; 3123 3124 pp_params.order = 0; 3125 pp_params.pool_size = virtqueue_get_vring_size(rq->vq); 3126 pp_params.nid = dev_to_node(vi->vdev->dev.parent); 3127 pp_params.netdev = vi->dev; 3128 pp_params.napi = &rq->napi; 3129 3130 /* Use page_pool DMA mapping if backend supports DMA API. 3131 * DMA_SYNC_DEV is needed for non-coherent archs on recycle. 3132 */ 3133 dma_dev = virtqueue_dma_dev(rq->vq); 3134 if (dma_dev) { 3135 pp_params.dev = dma_dev; 3136 pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; 3137 pp_params.dma_dir = DMA_FROM_DEVICE; 3138 pp_params.max_len = PAGE_SIZE; 3139 pp_params.offset = 0; 3140 rq->use_page_pool_dma = true; 3141 } else { 3142 /* No DMA API (e.g., VDUSE): page_pool for allocation only. */ 3143 pp_params.flags = 0; 3144 rq->use_page_pool_dma = false; 3145 } 3146 3147 rq->page_pool = page_pool_create(&pp_params); 3148 if (IS_ERR(rq->page_pool)) { 3149 err = PTR_ERR(rq->page_pool); 3150 rq->page_pool = NULL; 3151 goto err_cleanup; 3152 } 3153 } 3154 return 0; 3155 3156 err_cleanup: 3157 while (--i >= 0) { 3158 struct receive_queue *rq = &vi->rq[i]; 3159 3160 if (rq->page_pool) { 3161 page_pool_destroy(rq->page_pool); 3162 rq->page_pool = NULL; 3163 } 3164 } 3165 return err; 3166 } 3167 3168 static void virtnet_destroy_page_pools(struct virtnet_info *vi) 3169 { 3170 int i; 3171 3172 for (i = 0; i < vi->max_queue_pairs; i++) { 3173 struct receive_queue *rq = &vi->rq[i]; 3174 3175 if (rq->page_pool) { 3176 page_pool_destroy(rq->page_pool); 3177 rq->page_pool = NULL; 3178 } 3179 } 3180 } 3181 3182 static int virtnet_open(struct net_device *dev) 3183 { 3184 struct virtnet_info *vi = netdev_priv(dev); 3185 int i, err; 3186 3187 for (i = 0; i < vi->max_queue_pairs; i++) { 3188 if (i < vi->curr_queue_pairs) 3189 /* Pre-fill rq agressively, to make sure we are ready to 3190 * get packets immediately. 3191 */ 3192 try_fill_recv(vi, &vi->rq[i], GFP_KERNEL); 3193 3194 err = virtnet_enable_queue_pair(vi, i); 3195 if (err < 0) 3196 goto err_enable_qp; 3197 } 3198 3199 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 3200 if (vi->status & VIRTIO_NET_S_LINK_UP) 3201 netif_carrier_on(vi->dev); 3202 virtio_config_driver_enable(vi->vdev); 3203 } else { 3204 vi->status = VIRTIO_NET_S_LINK_UP; 3205 netif_carrier_on(dev); 3206 } 3207 3208 return 0; 3209 3210 err_enable_qp: 3211 for (i--; i >= 0; i--) { 3212 virtnet_disable_queue_pair(vi, i); 3213 virtnet_cancel_dim(vi, &vi->rq[i].dim); 3214 } 3215 3216 return err; 3217 } 3218 3219 static int virtnet_poll_tx(struct napi_struct *napi, int budget) 3220 { 3221 struct send_queue *sq = container_of(napi, struct send_queue, napi); 3222 struct virtnet_info *vi = sq->vq->vdev->priv; 3223 unsigned int index = vq2txq(sq->vq); 3224 struct netdev_queue *txq; 3225 int opaque, xsk_done = 0; 3226 bool done; 3227 3228 if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { 3229 /* We don't need to enable cb for XDP */ 3230 napi_complete_done(napi, 0); 3231 return 0; 3232 } 3233 3234 txq = netdev_get_tx_queue(vi->dev, index); 3235 __netif_tx_lock(txq, raw_smp_processor_id()); 3236 virtqueue_disable_cb(sq->vq); 3237 3238 if (sq->xsk_pool) 3239 xsk_done = virtnet_xsk_xmit(sq, sq->xsk_pool, budget); 3240 else 3241 free_old_xmit(sq, txq, !!budget); 3242 3243 if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) 3244 virtnet_tx_wake_queue(vi, sq); 3245 3246 if (xsk_done >= budget) { 3247 __netif_tx_unlock(txq); 3248 return budget; 3249 } 3250 3251 opaque = virtqueue_enable_cb_prepare(sq->vq); 3252 3253 done = napi_complete_done(napi, 0); 3254 3255 if (!done) 3256 virtqueue_disable_cb(sq->vq); 3257 3258 __netif_tx_unlock(txq); 3259 3260 if (done) { 3261 if (unlikely(virtqueue_poll(sq->vq, opaque))) { 3262 if (napi_schedule_prep(napi)) { 3263 __netif_tx_lock(txq, raw_smp_processor_id()); 3264 virtqueue_disable_cb(sq->vq); 3265 __netif_tx_unlock(txq); 3266 __napi_schedule(napi); 3267 } 3268 } 3269 } 3270 3271 return 0; 3272 } 3273 3274 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) 3275 { 3276 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; 3277 struct virtnet_info *vi = sq->vq->vdev->priv; 3278 struct virtio_net_hdr_v1_hash_tunnel *hdr; 3279 int num_sg; 3280 unsigned hdr_len = vi->hdr_len; 3281 bool can_push; 3282 3283 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); 3284 3285 /* Make sure it's safe to cast between formats */ 3286 BUILD_BUG_ON(__alignof__(*hdr) != __alignof__(hdr->hash_hdr)); 3287 BUILD_BUG_ON(__alignof__(*hdr) != __alignof__(hdr->hash_hdr.hdr)); 3288 3289 can_push = vi->any_header_sg && 3290 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && 3291 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; 3292 /* Even if we can, don't push here yet as this would skew 3293 * csum_start offset below. */ 3294 if (can_push) 3295 hdr = (struct virtio_net_hdr_v1_hash_tunnel *)(skb->data - 3296 hdr_len); 3297 else 3298 hdr = &skb_vnet_common_hdr(skb)->tnl_hdr; 3299 3300 if (virtio_net_hdr_tnl_from_skb(skb, hdr, vi->tx_tnl, 3301 virtio_is_little_endian(vi->vdev), 0, 3302 false)) 3303 return -EPROTO; 3304 3305 if (vi->mergeable_rx_bufs) 3306 hdr->hash_hdr.hdr.num_buffers = 0; 3307 3308 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); 3309 if (can_push) { 3310 __skb_push(skb, hdr_len); 3311 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len); 3312 if (unlikely(num_sg < 0)) 3313 return num_sg; 3314 /* Pull header back to avoid skew in tx bytes calculations. */ 3315 __skb_pull(skb, hdr_len); 3316 } else { 3317 sg_set_buf(sq->sg, hdr, hdr_len); 3318 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); 3319 if (unlikely(num_sg < 0)) 3320 return num_sg; 3321 num_sg++; 3322 } 3323 3324 return virtnet_add_outbuf(sq, num_sg, skb, 3325 orphan ? VIRTNET_XMIT_TYPE_SKB_ORPHAN : VIRTNET_XMIT_TYPE_SKB); 3326 } 3327 3328 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) 3329 { 3330 struct virtnet_info *vi = netdev_priv(dev); 3331 int qnum = skb_get_queue_mapping(skb); 3332 struct send_queue *sq = &vi->sq[qnum]; 3333 int err; 3334 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 3335 bool xmit_more = netdev_xmit_more(); 3336 bool use_napi = sq->napi.weight; 3337 bool kick; 3338 3339 if (!use_napi) 3340 free_old_xmit(sq, txq, false); 3341 else 3342 virtqueue_disable_cb(sq->vq); 3343 3344 /* timestamp packet in software */ 3345 skb_tx_timestamp(skb); 3346 3347 /* Try to transmit */ 3348 err = xmit_skb(sq, skb, !use_napi); 3349 3350 /* This should not happen! */ 3351 if (unlikely(err)) { 3352 DEV_STATS_INC(dev, tx_fifo_errors); 3353 if (net_ratelimit()) 3354 dev_warn(&dev->dev, 3355 "Unexpected TXQ (%d) queue failure: %d\n", 3356 qnum, err); 3357 DEV_STATS_INC(dev, tx_dropped); 3358 dev_kfree_skb_any(skb); 3359 return NETDEV_TX_OK; 3360 } 3361 3362 /* Don't wait up for transmitted skbs to be freed. */ 3363 if (!use_napi) { 3364 skb_orphan(skb); 3365 nf_reset_ct(skb); 3366 } 3367 3368 if (use_napi) 3369 tx_may_stop(vi, dev, sq); 3370 else 3371 check_sq_full_and_disable(vi, dev,sq); 3372 3373 kick = use_napi ? __netdev_tx_sent_queue(txq, skb->len, xmit_more) : 3374 !xmit_more || netif_xmit_stopped(txq); 3375 if (kick) { 3376 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 3377 u64_stats_update_begin(&sq->stats.syncp); 3378 u64_stats_inc(&sq->stats.kicks); 3379 u64_stats_update_end(&sq->stats.syncp); 3380 } 3381 } 3382 3383 if (use_napi && kick && unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 3384 virtqueue_napi_schedule(&sq->napi, sq->vq); 3385 3386 return NETDEV_TX_OK; 3387 } 3388 3389 static void virtnet_rx_pause(struct virtnet_info *vi, 3390 struct receive_queue *rq) 3391 { 3392 bool running = netif_running(vi->dev); 3393 3394 if (running) { 3395 virtnet_napi_disable(rq); 3396 virtnet_cancel_dim(vi, &rq->dim); 3397 } 3398 } 3399 3400 static void virtnet_rx_pause_all(struct virtnet_info *vi) 3401 { 3402 int i; 3403 3404 for (i = 0; i < vi->max_queue_pairs; i++) 3405 virtnet_rx_pause(vi, &vi->rq[i]); 3406 } 3407 3408 static void virtnet_rx_resume(struct virtnet_info *vi, 3409 struct receive_queue *rq, 3410 bool refill) 3411 { 3412 if (netif_running(vi->dev)) { 3413 /* Pre-fill rq agressively, to make sure we are ready to get 3414 * packets immediately. 3415 */ 3416 if (refill) 3417 try_fill_recv(vi, rq, GFP_KERNEL); 3418 3419 virtnet_napi_enable(rq); 3420 } 3421 } 3422 3423 static void virtnet_rx_resume_all(struct virtnet_info *vi) 3424 { 3425 int i; 3426 3427 for (i = 0; i < vi->max_queue_pairs; i++) { 3428 if (i < vi->curr_queue_pairs) 3429 virtnet_rx_resume(vi, &vi->rq[i], true); 3430 else 3431 virtnet_rx_resume(vi, &vi->rq[i], false); 3432 } 3433 } 3434 3435 static int virtnet_rx_resize(struct virtnet_info *vi, 3436 struct receive_queue *rq, u32 ring_num) 3437 { 3438 int err, qindex; 3439 3440 qindex = rq - vi->rq; 3441 3442 virtnet_rx_pause(vi, rq); 3443 3444 err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_unmap_free_buf, NULL); 3445 if (err) 3446 netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err); 3447 3448 virtnet_rx_resume(vi, rq, true); 3449 return err; 3450 } 3451 3452 static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq) 3453 { 3454 bool running = netif_running(vi->dev); 3455 struct netdev_queue *txq; 3456 int qindex; 3457 3458 qindex = sq - vi->sq; 3459 3460 if (running) 3461 virtnet_napi_tx_disable(sq); 3462 3463 txq = netdev_get_tx_queue(vi->dev, qindex); 3464 3465 /* 1. wait all ximt complete 3466 * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue() 3467 */ 3468 __netif_tx_lock_bh(txq); 3469 3470 /* Prevent rx poll from accessing sq. */ 3471 sq->reset = true; 3472 3473 /* Prevent the upper layer from trying to send packets. */ 3474 netif_stop_subqueue(vi->dev, qindex); 3475 u64_stats_update_begin(&sq->stats.syncp); 3476 u64_stats_inc(&sq->stats.stop); 3477 u64_stats_update_end(&sq->stats.syncp); 3478 3479 __netif_tx_unlock_bh(txq); 3480 } 3481 3482 static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq) 3483 { 3484 bool running = netif_running(vi->dev); 3485 struct netdev_queue *txq; 3486 int qindex; 3487 3488 qindex = sq - vi->sq; 3489 3490 txq = netdev_get_tx_queue(vi->dev, qindex); 3491 3492 __netif_tx_lock_bh(txq); 3493 sq->reset = false; 3494 virtnet_tx_wake_queue(vi, sq); 3495 __netif_tx_unlock_bh(txq); 3496 3497 if (running) 3498 virtnet_napi_tx_enable(sq); 3499 } 3500 3501 static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq, 3502 u32 ring_num) 3503 { 3504 int qindex, err; 3505 3506 if (ring_num <= MAX_SKB_FRAGS + 2) { 3507 netdev_err(vi->dev, "tx size (%d) cannot be smaller than %d\n", 3508 ring_num, MAX_SKB_FRAGS + 2); 3509 return -EINVAL; 3510 } 3511 3512 qindex = sq - vi->sq; 3513 3514 virtnet_tx_pause(vi, sq); 3515 3516 err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf, 3517 virtnet_sq_free_unused_buf_done); 3518 if (err) 3519 netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err); 3520 3521 virtnet_tx_resume(vi, sq); 3522 3523 return err; 3524 } 3525 3526 /* 3527 * Send command via the control virtqueue and check status. Commands 3528 * supported by the hypervisor, as indicated by feature bits, should 3529 * never fail unless improperly formatted. 3530 */ 3531 static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd, 3532 struct scatterlist *out, 3533 struct scatterlist *in) 3534 { 3535 struct scatterlist *sgs[5], hdr, stat; 3536 u32 out_num = 0, tmp, in_num = 0; 3537 bool ok; 3538 int ret; 3539 3540 /* Caller should know better */ 3541 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); 3542 3543 mutex_lock(&vi->cvq_lock); 3544 vi->ctrl->status = ~0; 3545 vi->ctrl->hdr.class = class; 3546 vi->ctrl->hdr.cmd = cmd; 3547 /* Add header */ 3548 sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr)); 3549 sgs[out_num++] = &hdr; 3550 3551 if (out) 3552 sgs[out_num++] = out; 3553 3554 /* Add return status. */ 3555 sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status)); 3556 sgs[out_num + in_num++] = &stat; 3557 3558 if (in) 3559 sgs[out_num + in_num++] = in; 3560 3561 BUG_ON(out_num + in_num > ARRAY_SIZE(sgs)); 3562 ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC); 3563 if (ret < 0) { 3564 dev_warn(&vi->vdev->dev, 3565 "Failed to add sgs for command vq: %d\n.", ret); 3566 mutex_unlock(&vi->cvq_lock); 3567 return false; 3568 } 3569 3570 if (unlikely(!virtqueue_kick(vi->cvq))) 3571 goto unlock; 3572 3573 /* Spin for a response, the kick causes an ioport write, trapping 3574 * into the hypervisor, so the request should be handled immediately. 3575 */ 3576 while (!virtqueue_get_buf(vi->cvq, &tmp) && 3577 !virtqueue_is_broken(vi->cvq)) { 3578 cond_resched(); 3579 cpu_relax(); 3580 } 3581 3582 unlock: 3583 ok = vi->ctrl->status == VIRTIO_NET_OK; 3584 mutex_unlock(&vi->cvq_lock); 3585 return ok; 3586 } 3587 3588 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, 3589 struct scatterlist *out) 3590 { 3591 return virtnet_send_command_reply(vi, class, cmd, out, NULL); 3592 } 3593 3594 static int virtnet_set_mac_address(struct net_device *dev, void *p) 3595 { 3596 struct virtnet_info *vi = netdev_priv(dev); 3597 struct virtio_device *vdev = vi->vdev; 3598 int ret; 3599 struct sockaddr *addr; 3600 struct scatterlist sg; 3601 3602 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 3603 return -EOPNOTSUPP; 3604 3605 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL); 3606 if (!addr) 3607 return -ENOMEM; 3608 3609 ret = eth_prepare_mac_addr_change(dev, addr); 3610 if (ret) 3611 goto out; 3612 3613 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 3614 sg_init_one(&sg, addr->sa_data, dev->addr_len); 3615 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3616 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 3617 dev_warn(&vdev->dev, 3618 "Failed to set mac address by vq command.\n"); 3619 ret = -EINVAL; 3620 goto out; 3621 } 3622 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 3623 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { 3624 unsigned int i; 3625 3626 /* Naturally, this has an atomicity problem. */ 3627 for (i = 0; i < dev->addr_len; i++) 3628 virtio_cwrite8(vdev, 3629 offsetof(struct virtio_net_config, mac) + 3630 i, addr->sa_data[i]); 3631 } 3632 3633 eth_commit_mac_addr_change(dev, p); 3634 ret = 0; 3635 3636 out: 3637 kfree(addr); 3638 return ret; 3639 } 3640 3641 static void virtnet_stats(struct net_device *dev, 3642 struct rtnl_link_stats64 *tot) 3643 { 3644 struct virtnet_info *vi = netdev_priv(dev); 3645 unsigned int start; 3646 int i; 3647 3648 for (i = 0; i < vi->max_queue_pairs; i++) { 3649 u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; 3650 struct receive_queue *rq = &vi->rq[i]; 3651 struct send_queue *sq = &vi->sq[i]; 3652 3653 do { 3654 start = u64_stats_fetch_begin(&sq->stats.syncp); 3655 tpackets = u64_stats_read(&sq->stats.packets); 3656 tbytes = u64_stats_read(&sq->stats.bytes); 3657 terrors = u64_stats_read(&sq->stats.tx_timeouts); 3658 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 3659 3660 do { 3661 start = u64_stats_fetch_begin(&rq->stats.syncp); 3662 rpackets = u64_stats_read(&rq->stats.packets); 3663 rbytes = u64_stats_read(&rq->stats.bytes); 3664 rdrops = u64_stats_read(&rq->stats.drops); 3665 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 3666 3667 tot->rx_packets += rpackets; 3668 tot->tx_packets += tpackets; 3669 tot->rx_bytes += rbytes; 3670 tot->tx_bytes += tbytes; 3671 tot->rx_dropped += rdrops; 3672 tot->tx_errors += terrors; 3673 } 3674 3675 tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped); 3676 tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors); 3677 tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors); 3678 tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors); 3679 } 3680 3681 static void virtnet_ack_link_announce(struct virtnet_info *vi) 3682 { 3683 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, 3684 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) 3685 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); 3686 } 3687 3688 static bool virtnet_commit_rss_command(struct virtnet_info *vi); 3689 3690 static void virtnet_rss_update_by_qpairs(struct virtnet_info *vi, u16 queue_pairs) 3691 { 3692 u32 indir_val = 0; 3693 int i = 0; 3694 3695 for (; i < vi->rss_indir_table_size; ++i) { 3696 indir_val = ethtool_rxfh_indir_default(i, queue_pairs); 3697 vi->rss_hdr->indirection_table[i] = cpu_to_le16(indir_val); 3698 } 3699 vi->rss_trailer.max_tx_vq = cpu_to_le16(queue_pairs); 3700 } 3701 3702 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 3703 { 3704 struct virtio_net_ctrl_mq *mq __free(kfree) = NULL; 3705 struct virtio_net_rss_config_hdr *old_rss_hdr; 3706 struct virtio_net_rss_config_trailer old_rss_trailer; 3707 struct net_device *dev = vi->dev; 3708 struct scatterlist sg; 3709 3710 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) 3711 return 0; 3712 3713 /* Firstly check if we need update rss. Do updating if both (1) rss enabled and 3714 * (2) no user configuration. 3715 * 3716 * During rss command processing, device updates queue_pairs using rss.max_tx_vq. That is, 3717 * the device updates queue_pairs together with rss, so we can skip the separate queue_pairs 3718 * update (VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET below) and return directly. 3719 */ 3720 if (vi->has_rss && !netif_is_rxfh_configured(dev)) { 3721 old_rss_hdr = vi->rss_hdr; 3722 old_rss_trailer = vi->rss_trailer; 3723 vi->rss_hdr = devm_kzalloc(&vi->vdev->dev, virtnet_rss_hdr_size(vi), GFP_KERNEL); 3724 if (!vi->rss_hdr) { 3725 vi->rss_hdr = old_rss_hdr; 3726 return -ENOMEM; 3727 } 3728 3729 *vi->rss_hdr = *old_rss_hdr; 3730 virtnet_rss_update_by_qpairs(vi, queue_pairs); 3731 3732 if (!virtnet_commit_rss_command(vi)) { 3733 /* restore ctrl_rss if commit_rss_command failed */ 3734 devm_kfree(&vi->vdev->dev, vi->rss_hdr); 3735 vi->rss_hdr = old_rss_hdr; 3736 vi->rss_trailer = old_rss_trailer; 3737 3738 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d, because committing RSS failed\n", 3739 queue_pairs); 3740 return -EINVAL; 3741 } 3742 devm_kfree(&vi->vdev->dev, old_rss_hdr); 3743 goto succ; 3744 } 3745 3746 mq = kzalloc_obj(*mq); 3747 if (!mq) 3748 return -ENOMEM; 3749 3750 mq->virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); 3751 sg_init_one(&sg, mq, sizeof(*mq)); 3752 3753 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 3754 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { 3755 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", 3756 queue_pairs); 3757 return -EINVAL; 3758 } 3759 succ: 3760 vi->curr_queue_pairs = queue_pairs; 3761 if (dev->flags & IFF_UP) { 3762 local_bh_disable(); 3763 for (int i = 0; i < vi->curr_queue_pairs; ++i) 3764 virtqueue_napi_schedule(&vi->rq[i].napi, vi->rq[i].vq); 3765 local_bh_enable(); 3766 } 3767 3768 return 0; 3769 } 3770 3771 static int virtnet_close(struct net_device *dev) 3772 { 3773 struct virtnet_info *vi = netdev_priv(dev); 3774 int i; 3775 3776 /* Prevent the config change callback from changing carrier 3777 * after close 3778 */ 3779 virtio_config_driver_disable(vi->vdev); 3780 /* Stop getting status/speed updates: we don't care until next 3781 * open 3782 */ 3783 cancel_work_sync(&vi->config_work); 3784 3785 for (i = 0; i < vi->max_queue_pairs; i++) { 3786 virtnet_disable_queue_pair(vi, i); 3787 virtnet_cancel_dim(vi, &vi->rq[i].dim); 3788 } 3789 3790 netif_carrier_off(dev); 3791 3792 return 0; 3793 } 3794 3795 static void virtnet_rx_mode_work(struct work_struct *work) 3796 { 3797 struct virtnet_info *vi = 3798 container_of(work, struct virtnet_info, rx_mode_work); 3799 u8 *promisc_allmulti __free(kfree) = NULL; 3800 struct net_device *dev = vi->dev; 3801 struct scatterlist sg[2]; 3802 struct virtio_net_ctrl_mac *mac_data; 3803 struct netdev_hw_addr *ha; 3804 int uc_count; 3805 int mc_count; 3806 void *buf; 3807 int i; 3808 3809 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ 3810 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) 3811 return; 3812 3813 promisc_allmulti = kzalloc_obj(*promisc_allmulti); 3814 if (!promisc_allmulti) { 3815 dev_warn(&dev->dev, "Failed to set RX mode, no memory.\n"); 3816 return; 3817 } 3818 3819 rtnl_lock(); 3820 3821 *promisc_allmulti = !!(dev->flags & IFF_PROMISC); 3822 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3823 3824 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3825 VIRTIO_NET_CTRL_RX_PROMISC, sg)) 3826 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", 3827 *promisc_allmulti ? "en" : "dis"); 3828 3829 *promisc_allmulti = !!(dev->flags & IFF_ALLMULTI); 3830 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3831 3832 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3833 VIRTIO_NET_CTRL_RX_ALLMULTI, sg)) 3834 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", 3835 *promisc_allmulti ? "en" : "dis"); 3836 3837 netif_addr_lock_bh(dev); 3838 3839 uc_count = netdev_uc_count(dev); 3840 mc_count = netdev_mc_count(dev); 3841 /* MAC filter - use one buffer for both lists */ 3842 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) + 3843 (2 * sizeof(mac_data->entries)), GFP_ATOMIC); 3844 mac_data = buf; 3845 if (!buf) { 3846 netif_addr_unlock_bh(dev); 3847 rtnl_unlock(); 3848 return; 3849 } 3850 3851 sg_init_table(sg, 2); 3852 3853 /* Store the unicast list and count in the front of the buffer */ 3854 mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count); 3855 i = 0; 3856 netdev_for_each_uc_addr(ha, dev) 3857 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3858 3859 sg_set_buf(&sg[0], mac_data, 3860 sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); 3861 3862 /* multicast list and count fill the end */ 3863 mac_data = (void *)&mac_data->macs[uc_count][0]; 3864 3865 mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count); 3866 i = 0; 3867 netdev_for_each_mc_addr(ha, dev) 3868 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3869 3870 netif_addr_unlock_bh(dev); 3871 3872 sg_set_buf(&sg[1], mac_data, 3873 sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); 3874 3875 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3876 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg)) 3877 dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); 3878 3879 rtnl_unlock(); 3880 3881 kfree(buf); 3882 } 3883 3884 static void virtnet_set_rx_mode(struct net_device *dev) 3885 { 3886 struct virtnet_info *vi = netdev_priv(dev); 3887 3888 if (vi->rx_mode_work_enabled) 3889 schedule_work(&vi->rx_mode_work); 3890 } 3891 3892 static int virtnet_vlan_rx_add_vid(struct net_device *dev, 3893 __be16 proto, u16 vid) 3894 { 3895 struct virtnet_info *vi = netdev_priv(dev); 3896 __virtio16 *_vid __free(kfree) = NULL; 3897 struct scatterlist sg; 3898 3899 _vid = kzalloc_obj(*_vid); 3900 if (!_vid) 3901 return -ENOMEM; 3902 3903 *_vid = cpu_to_virtio16(vi->vdev, vid); 3904 sg_init_one(&sg, _vid, sizeof(*_vid)); 3905 3906 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3907 VIRTIO_NET_CTRL_VLAN_ADD, &sg)) 3908 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); 3909 return 0; 3910 } 3911 3912 static int virtnet_vlan_rx_kill_vid(struct net_device *dev, 3913 __be16 proto, u16 vid) 3914 { 3915 struct virtnet_info *vi = netdev_priv(dev); 3916 __virtio16 *_vid __free(kfree) = NULL; 3917 struct scatterlist sg; 3918 3919 _vid = kzalloc_obj(*_vid); 3920 if (!_vid) 3921 return -ENOMEM; 3922 3923 *_vid = cpu_to_virtio16(vi->vdev, vid); 3924 sg_init_one(&sg, _vid, sizeof(*_vid)); 3925 3926 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3927 VIRTIO_NET_CTRL_VLAN_DEL, &sg)) 3928 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); 3929 return 0; 3930 } 3931 3932 static void virtnet_clean_affinity(struct virtnet_info *vi) 3933 { 3934 int i; 3935 3936 if (vi->affinity_hint_set) { 3937 for (i = 0; i < vi->max_queue_pairs; i++) { 3938 virtqueue_set_affinity(vi->rq[i].vq, NULL); 3939 virtqueue_set_affinity(vi->sq[i].vq, NULL); 3940 } 3941 3942 vi->affinity_hint_set = false; 3943 } 3944 } 3945 3946 static void virtnet_set_affinity(struct virtnet_info *vi) 3947 { 3948 cpumask_var_t mask; 3949 int stragglers; 3950 int group_size; 3951 int i, start = 0, cpu; 3952 int num_cpu; 3953 int stride; 3954 3955 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 3956 virtnet_clean_affinity(vi); 3957 return; 3958 } 3959 3960 num_cpu = num_online_cpus(); 3961 stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); 3962 stragglers = num_cpu >= vi->curr_queue_pairs ? 3963 num_cpu % vi->curr_queue_pairs : 3964 0; 3965 3966 for (i = 0; i < vi->curr_queue_pairs; i++) { 3967 group_size = stride + (i < stragglers ? 1 : 0); 3968 3969 for_each_online_cpu_wrap(cpu, start) { 3970 if (!group_size--) { 3971 start = cpu; 3972 break; 3973 } 3974 cpumask_set_cpu(cpu, mask); 3975 } 3976 3977 virtqueue_set_affinity(vi->rq[i].vq, mask); 3978 virtqueue_set_affinity(vi->sq[i].vq, mask); 3979 __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS); 3980 cpumask_clear(mask); 3981 } 3982 3983 vi->affinity_hint_set = true; 3984 free_cpumask_var(mask); 3985 } 3986 3987 static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) 3988 { 3989 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3990 node); 3991 virtnet_set_affinity(vi); 3992 return 0; 3993 } 3994 3995 static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node) 3996 { 3997 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3998 node_dead); 3999 virtnet_set_affinity(vi); 4000 return 0; 4001 } 4002 4003 static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node) 4004 { 4005 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 4006 node); 4007 4008 virtnet_clean_affinity(vi); 4009 return 0; 4010 } 4011 4012 static enum cpuhp_state virtionet_online; 4013 4014 static int virtnet_cpu_notif_add(struct virtnet_info *vi) 4015 { 4016 int ret; 4017 4018 ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node); 4019 if (ret) 4020 return ret; 4021 ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD, 4022 &vi->node_dead); 4023 if (!ret) 4024 return ret; 4025 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 4026 return ret; 4027 } 4028 4029 static void virtnet_cpu_notif_remove(struct virtnet_info *vi) 4030 { 4031 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 4032 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD, 4033 &vi->node_dead); 4034 } 4035 4036 static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi, 4037 u16 vqn, u32 max_usecs, u32 max_packets) 4038 { 4039 struct virtio_net_ctrl_coal_vq *coal_vq __free(kfree) = NULL; 4040 struct scatterlist sgs; 4041 4042 coal_vq = kzalloc_obj(*coal_vq); 4043 if (!coal_vq) 4044 return -ENOMEM; 4045 4046 coal_vq->vqn = cpu_to_le16(vqn); 4047 coal_vq->coal.max_usecs = cpu_to_le32(max_usecs); 4048 coal_vq->coal.max_packets = cpu_to_le32(max_packets); 4049 sg_init_one(&sgs, coal_vq, sizeof(*coal_vq)); 4050 4051 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 4052 VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET, 4053 &sgs)) 4054 return -EINVAL; 4055 4056 return 0; 4057 } 4058 4059 static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 4060 u16 queue, u32 max_usecs, 4061 u32 max_packets) 4062 { 4063 int err; 4064 4065 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 4066 return -EOPNOTSUPP; 4067 4068 err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), 4069 max_usecs, max_packets); 4070 if (err) 4071 return err; 4072 4073 vi->rq[queue].intr_coal.max_usecs = max_usecs; 4074 vi->rq[queue].intr_coal.max_packets = max_packets; 4075 4076 return 0; 4077 } 4078 4079 static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 4080 u16 queue, u32 max_usecs, 4081 u32 max_packets) 4082 { 4083 int err; 4084 4085 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 4086 return -EOPNOTSUPP; 4087 4088 err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), 4089 max_usecs, max_packets); 4090 if (err) 4091 return err; 4092 4093 vi->sq[queue].intr_coal.max_usecs = max_usecs; 4094 vi->sq[queue].intr_coal.max_packets = max_packets; 4095 4096 return 0; 4097 } 4098 4099 static void virtnet_get_ringparam(struct net_device *dev, 4100 struct ethtool_ringparam *ring, 4101 struct kernel_ethtool_ringparam *kernel_ring, 4102 struct netlink_ext_ack *extack) 4103 { 4104 struct virtnet_info *vi = netdev_priv(dev); 4105 4106 ring->rx_max_pending = vi->rq[0].vq->num_max; 4107 ring->tx_max_pending = vi->sq[0].vq->num_max; 4108 ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 4109 ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 4110 } 4111 4112 static int virtnet_set_ringparam(struct net_device *dev, 4113 struct ethtool_ringparam *ring, 4114 struct kernel_ethtool_ringparam *kernel_ring, 4115 struct netlink_ext_ack *extack) 4116 { 4117 struct virtnet_info *vi = netdev_priv(dev); 4118 u32 rx_pending, tx_pending; 4119 struct receive_queue *rq; 4120 struct send_queue *sq; 4121 int i, err; 4122 4123 if (ring->rx_mini_pending || ring->rx_jumbo_pending) 4124 return -EINVAL; 4125 4126 rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 4127 tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 4128 4129 if (ring->rx_pending == rx_pending && 4130 ring->tx_pending == tx_pending) 4131 return 0; 4132 4133 if (ring->rx_pending > vi->rq[0].vq->num_max) 4134 return -EINVAL; 4135 4136 if (ring->tx_pending > vi->sq[0].vq->num_max) 4137 return -EINVAL; 4138 4139 for (i = 0; i < vi->max_queue_pairs; i++) { 4140 rq = vi->rq + i; 4141 sq = vi->sq + i; 4142 4143 if (ring->tx_pending != tx_pending) { 4144 err = virtnet_tx_resize(vi, sq, ring->tx_pending); 4145 if (err) 4146 return err; 4147 4148 /* Upon disabling and re-enabling a transmit virtqueue, the device must 4149 * set the coalescing parameters of the virtqueue to those configured 4150 * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver 4151 * did not set any TX coalescing parameters, to 0. 4152 */ 4153 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i, 4154 vi->intr_coal_tx.max_usecs, 4155 vi->intr_coal_tx.max_packets); 4156 4157 /* Don't break the tx resize action if the vq coalescing is not 4158 * supported. The same is true for rx resize below. 4159 */ 4160 if (err && err != -EOPNOTSUPP) 4161 return err; 4162 } 4163 4164 if (ring->rx_pending != rx_pending) { 4165 err = virtnet_rx_resize(vi, rq, ring->rx_pending); 4166 if (err) 4167 return err; 4168 4169 /* The reason is same as the transmit virtqueue reset */ 4170 mutex_lock(&vi->rq[i].dim_lock); 4171 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, i, 4172 vi->intr_coal_rx.max_usecs, 4173 vi->intr_coal_rx.max_packets); 4174 mutex_unlock(&vi->rq[i].dim_lock); 4175 if (err && err != -EOPNOTSUPP) 4176 return err; 4177 } 4178 } 4179 4180 return 0; 4181 } 4182 4183 static bool virtnet_commit_rss_command(struct virtnet_info *vi) 4184 { 4185 struct net_device *dev = vi->dev; 4186 struct scatterlist sgs[2]; 4187 4188 /* prepare sgs */ 4189 sg_init_table(sgs, 2); 4190 sg_set_buf(&sgs[0], vi->rss_hdr, virtnet_rss_hdr_size(vi)); 4191 sg_set_buf(&sgs[1], &vi->rss_trailer, virtnet_rss_trailer_size(vi)); 4192 4193 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 4194 vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG 4195 : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) 4196 goto err; 4197 4198 return true; 4199 4200 err: 4201 dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n"); 4202 return false; 4203 4204 } 4205 4206 static void virtnet_init_default_rss(struct virtnet_info *vi) 4207 { 4208 vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_supported); 4209 vi->rss_hash_types_saved = vi->rss_hash_types_supported; 4210 vi->rss_hdr->indirection_table_mask = vi->rss_indir_table_size 4211 ? cpu_to_le16(vi->rss_indir_table_size - 1) : 0; 4212 vi->rss_hdr->unclassified_queue = 0; 4213 4214 virtnet_rss_update_by_qpairs(vi, vi->curr_queue_pairs); 4215 4216 vi->rss_trailer.hash_key_length = vi->rss_key_size; 4217 4218 netdev_rss_key_fill(vi->rss_hash_key_data, vi->rss_key_size); 4219 } 4220 4221 static int virtnet_get_hashflow(struct net_device *dev, 4222 struct ethtool_rxfh_fields *info) 4223 { 4224 struct virtnet_info *vi = netdev_priv(dev); 4225 4226 info->data = 0; 4227 switch (info->flow_type) { 4228 case TCP_V4_FLOW: 4229 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 4230 info->data = RXH_IP_SRC | RXH_IP_DST | 4231 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4232 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 4233 info->data = RXH_IP_SRC | RXH_IP_DST; 4234 } 4235 break; 4236 case TCP_V6_FLOW: 4237 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 4238 info->data = RXH_IP_SRC | RXH_IP_DST | 4239 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4240 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 4241 info->data = RXH_IP_SRC | RXH_IP_DST; 4242 } 4243 break; 4244 case UDP_V4_FLOW: 4245 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 4246 info->data = RXH_IP_SRC | RXH_IP_DST | 4247 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4248 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 4249 info->data = RXH_IP_SRC | RXH_IP_DST; 4250 } 4251 break; 4252 case UDP_V6_FLOW: 4253 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 4254 info->data = RXH_IP_SRC | RXH_IP_DST | 4255 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4256 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 4257 info->data = RXH_IP_SRC | RXH_IP_DST; 4258 } 4259 break; 4260 case IPV4_FLOW: 4261 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) 4262 info->data = RXH_IP_SRC | RXH_IP_DST; 4263 4264 break; 4265 case IPV6_FLOW: 4266 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) 4267 info->data = RXH_IP_SRC | RXH_IP_DST; 4268 4269 break; 4270 default: 4271 info->data = 0; 4272 break; 4273 } 4274 4275 return 0; 4276 } 4277 4278 static int virtnet_set_hashflow(struct net_device *dev, 4279 const struct ethtool_rxfh_fields *info, 4280 struct netlink_ext_ack *extack) 4281 { 4282 struct virtnet_info *vi = netdev_priv(dev); 4283 u32 new_hashtypes = vi->rss_hash_types_saved; 4284 bool is_disable = info->data & RXH_DISCARD; 4285 bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3); 4286 4287 /* supports only 'sd', 'sdfn' and 'r' */ 4288 if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable)) 4289 return -EINVAL; 4290 4291 switch (info->flow_type) { 4292 case TCP_V4_FLOW: 4293 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4); 4294 if (!is_disable) 4295 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 4296 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0); 4297 break; 4298 case UDP_V4_FLOW: 4299 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4); 4300 if (!is_disable) 4301 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 4302 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0); 4303 break; 4304 case IPV4_FLOW: 4305 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4; 4306 if (!is_disable) 4307 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4; 4308 break; 4309 case TCP_V6_FLOW: 4310 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6); 4311 if (!is_disable) 4312 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 4313 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0); 4314 break; 4315 case UDP_V6_FLOW: 4316 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6); 4317 if (!is_disable) 4318 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 4319 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0); 4320 break; 4321 case IPV6_FLOW: 4322 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6; 4323 if (!is_disable) 4324 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6; 4325 break; 4326 default: 4327 /* unsupported flow */ 4328 return -EINVAL; 4329 } 4330 4331 /* if unsupported hashtype was set */ 4332 if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported)) 4333 return -EINVAL; 4334 4335 if (new_hashtypes != vi->rss_hash_types_saved) { 4336 vi->rss_hash_types_saved = new_hashtypes; 4337 vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_saved); 4338 if (vi->dev->features & NETIF_F_RXHASH) 4339 if (!virtnet_commit_rss_command(vi)) 4340 return -EINVAL; 4341 } 4342 4343 return 0; 4344 } 4345 4346 static void virtnet_get_drvinfo(struct net_device *dev, 4347 struct ethtool_drvinfo *info) 4348 { 4349 struct virtnet_info *vi = netdev_priv(dev); 4350 struct virtio_device *vdev = vi->vdev; 4351 4352 strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); 4353 strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version)); 4354 strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info)); 4355 4356 } 4357 4358 /* TODO: Eliminate OOO packets during switching */ 4359 static int virtnet_set_channels(struct net_device *dev, 4360 struct ethtool_channels *channels) 4361 { 4362 struct virtnet_info *vi = netdev_priv(dev); 4363 u16 queue_pairs = channels->combined_count; 4364 int err; 4365 4366 /* We don't support separate rx/tx channels. 4367 * We don't allow setting 'other' channels. 4368 */ 4369 if (channels->rx_count || channels->tx_count || channels->other_count) 4370 return -EINVAL; 4371 4372 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) 4373 return -EINVAL; 4374 4375 /* For now we don't support modifying channels while XDP is loaded 4376 * also when XDP is loaded all RX queues have XDP programs so we only 4377 * need to check a single RX queue. 4378 */ 4379 if (vi->rq[0].xdp_prog) 4380 return -EINVAL; 4381 4382 cpus_read_lock(); 4383 err = virtnet_set_queues(vi, queue_pairs); 4384 if (err) { 4385 cpus_read_unlock(); 4386 goto err; 4387 } 4388 virtnet_set_affinity(vi); 4389 cpus_read_unlock(); 4390 4391 netif_set_real_num_tx_queues(dev, queue_pairs); 4392 netif_set_real_num_rx_queues(dev, queue_pairs); 4393 err: 4394 return err; 4395 } 4396 4397 static void virtnet_stats_sprintf(u8 **p, const char *fmt, const char *noq_fmt, 4398 int num, int qid, const struct virtnet_stat_desc *desc) 4399 { 4400 int i; 4401 4402 if (qid < 0) { 4403 for (i = 0; i < num; ++i) 4404 ethtool_sprintf(p, noq_fmt, desc[i].desc); 4405 } else { 4406 for (i = 0; i < num; ++i) 4407 ethtool_sprintf(p, fmt, qid, desc[i].desc); 4408 } 4409 } 4410 4411 /* qid == -1: for rx/tx queue total field */ 4412 static void virtnet_get_stats_string(struct virtnet_info *vi, int type, int qid, u8 **data) 4413 { 4414 const struct virtnet_stat_desc *desc; 4415 const char *fmt, *noq_fmt; 4416 u8 *p = *data; 4417 u32 num; 4418 4419 if (type == VIRTNET_Q_TYPE_CQ && qid >= 0) { 4420 noq_fmt = "cq_hw_%s"; 4421 4422 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4423 desc = &virtnet_stats_cvq_desc[0]; 4424 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4425 4426 virtnet_stats_sprintf(&p, NULL, noq_fmt, num, -1, desc); 4427 } 4428 } 4429 4430 if (type == VIRTNET_Q_TYPE_RX) { 4431 fmt = "rx%u_%s"; 4432 noq_fmt = "rx_%s"; 4433 4434 desc = &virtnet_rq_stats_desc[0]; 4435 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4436 4437 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4438 4439 fmt = "rx%u_hw_%s"; 4440 noq_fmt = "rx_hw_%s"; 4441 4442 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4443 desc = &virtnet_stats_rx_basic_desc[0]; 4444 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4445 4446 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4447 } 4448 4449 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4450 desc = &virtnet_stats_rx_csum_desc[0]; 4451 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4452 4453 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4454 } 4455 4456 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4457 desc = &virtnet_stats_rx_speed_desc[0]; 4458 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4459 4460 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4461 } 4462 } 4463 4464 if (type == VIRTNET_Q_TYPE_TX) { 4465 fmt = "tx%u_%s"; 4466 noq_fmt = "tx_%s"; 4467 4468 desc = &virtnet_sq_stats_desc[0]; 4469 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4470 4471 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4472 4473 fmt = "tx%u_hw_%s"; 4474 noq_fmt = "tx_hw_%s"; 4475 4476 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4477 desc = &virtnet_stats_tx_basic_desc[0]; 4478 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4479 4480 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4481 } 4482 4483 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4484 desc = &virtnet_stats_tx_gso_desc[0]; 4485 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4486 4487 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4488 } 4489 4490 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4491 desc = &virtnet_stats_tx_speed_desc[0]; 4492 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4493 4494 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4495 } 4496 } 4497 4498 *data = p; 4499 } 4500 4501 struct virtnet_stats_ctx { 4502 /* The stats are write to qstats or ethtool -S */ 4503 bool to_qstat; 4504 4505 /* Used to calculate the offset inside the output buffer. */ 4506 u32 desc_num[3]; 4507 4508 /* The actual supported stat types. */ 4509 u64 bitmap[3]; 4510 4511 /* Used to calculate the reply buffer size. */ 4512 u32 size[3]; 4513 4514 /* Record the output buffer. */ 4515 u64 *data; 4516 }; 4517 4518 static void virtnet_stats_ctx_init(struct virtnet_info *vi, 4519 struct virtnet_stats_ctx *ctx, 4520 u64 *data, bool to_qstat) 4521 { 4522 u32 queue_type; 4523 4524 ctx->data = data; 4525 ctx->to_qstat = to_qstat; 4526 4527 if (to_qstat) { 4528 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4529 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4530 4531 queue_type = VIRTNET_Q_TYPE_RX; 4532 4533 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4534 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4535 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4536 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4537 } 4538 4539 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4540 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4541 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4542 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4543 } 4544 4545 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4546 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_GSO; 4547 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4548 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_gso); 4549 } 4550 4551 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4552 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4553 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4554 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4555 } 4556 4557 queue_type = VIRTNET_Q_TYPE_TX; 4558 4559 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4560 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4561 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4562 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4563 } 4564 4565 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4566 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_CSUM; 4567 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4568 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_csum); 4569 } 4570 4571 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4572 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4573 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4574 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4575 } 4576 4577 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4578 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4579 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4580 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4581 } 4582 4583 return; 4584 } 4585 4586 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc); 4587 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc); 4588 4589 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4590 queue_type = VIRTNET_Q_TYPE_CQ; 4591 4592 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_CVQ; 4593 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_cvq_desc); 4594 ctx->size[queue_type] += sizeof(struct virtio_net_stats_cvq); 4595 } 4596 4597 queue_type = VIRTNET_Q_TYPE_RX; 4598 4599 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4600 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4601 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4602 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4603 } 4604 4605 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4606 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4607 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4608 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4609 } 4610 4611 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4612 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4613 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4614 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4615 } 4616 4617 queue_type = VIRTNET_Q_TYPE_TX; 4618 4619 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4620 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4621 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4622 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4623 } 4624 4625 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4626 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4627 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4628 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4629 } 4630 4631 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4632 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4633 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4634 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4635 } 4636 } 4637 4638 /* stats_sum_queue - Calculate the sum of the same fields in sq or rq. 4639 * @sum: the position to store the sum values 4640 * @num: field num 4641 * @q_value: the first queue fields 4642 * @q_num: number of the queues 4643 */ 4644 static void stats_sum_queue(u64 *sum, u32 num, u64 *q_value, u32 q_num) 4645 { 4646 u32 step = num; 4647 int i, j; 4648 u64 *p; 4649 4650 for (i = 0; i < num; ++i) { 4651 p = sum + i; 4652 *p = 0; 4653 4654 for (j = 0; j < q_num; ++j) 4655 *p += *(q_value + i + j * step); 4656 } 4657 } 4658 4659 static void virtnet_fill_total_fields(struct virtnet_info *vi, 4660 struct virtnet_stats_ctx *ctx) 4661 { 4662 u64 *data, *first_rx_q, *first_tx_q; 4663 u32 num_cq, num_rx, num_tx; 4664 4665 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4666 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4667 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4668 4669 first_rx_q = ctx->data + num_rx + num_tx + num_cq; 4670 first_tx_q = first_rx_q + vi->curr_queue_pairs * num_rx; 4671 4672 data = ctx->data; 4673 4674 stats_sum_queue(data, num_rx, first_rx_q, vi->curr_queue_pairs); 4675 4676 data = ctx->data + num_rx; 4677 4678 stats_sum_queue(data, num_tx, first_tx_q, vi->curr_queue_pairs); 4679 } 4680 4681 static void virtnet_fill_stats_qstat(struct virtnet_info *vi, u32 qid, 4682 struct virtnet_stats_ctx *ctx, 4683 const u8 *base, bool drv_stats, u8 reply_type) 4684 { 4685 const struct virtnet_stat_desc *desc; 4686 const u64_stats_t *v_stat; 4687 u64 offset, bitmap; 4688 const __le64 *v; 4689 u32 queue_type; 4690 int i, num; 4691 4692 queue_type = vq_type(vi, qid); 4693 bitmap = ctx->bitmap[queue_type]; 4694 4695 if (drv_stats) { 4696 if (queue_type == VIRTNET_Q_TYPE_RX) { 4697 desc = &virtnet_rq_stats_desc_qstat[0]; 4698 num = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4699 } else { 4700 desc = &virtnet_sq_stats_desc_qstat[0]; 4701 num = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4702 } 4703 4704 for (i = 0; i < num; ++i) { 4705 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4706 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4707 ctx->data[offset] = u64_stats_read(v_stat); 4708 } 4709 return; 4710 } 4711 4712 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4713 desc = &virtnet_stats_rx_basic_desc_qstat[0]; 4714 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4715 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4716 goto found; 4717 } 4718 4719 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4720 desc = &virtnet_stats_rx_csum_desc_qstat[0]; 4721 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4722 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4723 goto found; 4724 } 4725 4726 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4727 desc = &virtnet_stats_rx_gso_desc_qstat[0]; 4728 num = ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4729 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO) 4730 goto found; 4731 } 4732 4733 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4734 desc = &virtnet_stats_rx_speed_desc_qstat[0]; 4735 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4736 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4737 goto found; 4738 } 4739 4740 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4741 desc = &virtnet_stats_tx_basic_desc_qstat[0]; 4742 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4743 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4744 goto found; 4745 } 4746 4747 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4748 desc = &virtnet_stats_tx_csum_desc_qstat[0]; 4749 num = ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4750 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM) 4751 goto found; 4752 } 4753 4754 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4755 desc = &virtnet_stats_tx_gso_desc_qstat[0]; 4756 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4757 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4758 goto found; 4759 } 4760 4761 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4762 desc = &virtnet_stats_tx_speed_desc_qstat[0]; 4763 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4764 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4765 goto found; 4766 } 4767 4768 return; 4769 4770 found: 4771 for (i = 0; i < num; ++i) { 4772 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4773 v = (const __le64 *)(base + desc[i].offset); 4774 ctx->data[offset] = le64_to_cpu(*v); 4775 } 4776 } 4777 4778 /* virtnet_fill_stats - copy the stats to qstats or ethtool -S 4779 * The stats source is the device or the driver. 4780 * 4781 * @vi: virtio net info 4782 * @qid: the vq id 4783 * @ctx: stats ctx (initiated by virtnet_stats_ctx_init()) 4784 * @base: pointer to the device reply or the driver stats structure. 4785 * @drv_stats: designate the base type (device reply, driver stats) 4786 * @type: the type of the device reply (if drv_stats is true, this must be zero) 4787 */ 4788 static void virtnet_fill_stats(struct virtnet_info *vi, u32 qid, 4789 struct virtnet_stats_ctx *ctx, 4790 const u8 *base, bool drv_stats, u8 reply_type) 4791 { 4792 u32 queue_type, num_rx, num_tx, num_cq; 4793 const struct virtnet_stat_desc *desc; 4794 const u64_stats_t *v_stat; 4795 u64 offset, bitmap; 4796 const __le64 *v; 4797 int i, num; 4798 4799 if (ctx->to_qstat) 4800 return virtnet_fill_stats_qstat(vi, qid, ctx, base, drv_stats, reply_type); 4801 4802 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4803 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4804 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4805 4806 queue_type = vq_type(vi, qid); 4807 bitmap = ctx->bitmap[queue_type]; 4808 4809 /* skip the total fields of pairs */ 4810 offset = num_rx + num_tx; 4811 4812 if (queue_type == VIRTNET_Q_TYPE_TX) { 4813 offset += num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2); 4814 4815 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4816 if (drv_stats) { 4817 desc = &virtnet_sq_stats_desc[0]; 4818 goto drv_stats; 4819 } 4820 4821 offset += num; 4822 4823 } else if (queue_type == VIRTNET_Q_TYPE_RX) { 4824 offset += num_cq + num_rx * (qid / 2); 4825 4826 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4827 if (drv_stats) { 4828 desc = &virtnet_rq_stats_desc[0]; 4829 goto drv_stats; 4830 } 4831 4832 offset += num; 4833 } 4834 4835 if (bitmap & VIRTIO_NET_STATS_TYPE_CVQ) { 4836 desc = &virtnet_stats_cvq_desc[0]; 4837 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4838 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_CVQ) 4839 goto found; 4840 4841 offset += num; 4842 } 4843 4844 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4845 desc = &virtnet_stats_rx_basic_desc[0]; 4846 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4847 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4848 goto found; 4849 4850 offset += num; 4851 } 4852 4853 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4854 desc = &virtnet_stats_rx_csum_desc[0]; 4855 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4856 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4857 goto found; 4858 4859 offset += num; 4860 } 4861 4862 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4863 desc = &virtnet_stats_rx_speed_desc[0]; 4864 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4865 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4866 goto found; 4867 4868 offset += num; 4869 } 4870 4871 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4872 desc = &virtnet_stats_tx_basic_desc[0]; 4873 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4874 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4875 goto found; 4876 4877 offset += num; 4878 } 4879 4880 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4881 desc = &virtnet_stats_tx_gso_desc[0]; 4882 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4883 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4884 goto found; 4885 4886 offset += num; 4887 } 4888 4889 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4890 desc = &virtnet_stats_tx_speed_desc[0]; 4891 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4892 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4893 goto found; 4894 4895 offset += num; 4896 } 4897 4898 return; 4899 4900 found: 4901 for (i = 0; i < num; ++i) { 4902 v = (const __le64 *)(base + desc[i].offset); 4903 ctx->data[offset + i] = le64_to_cpu(*v); 4904 } 4905 4906 return; 4907 4908 drv_stats: 4909 for (i = 0; i < num; ++i) { 4910 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4911 ctx->data[offset + i] = u64_stats_read(v_stat); 4912 } 4913 } 4914 4915 static int __virtnet_get_hw_stats(struct virtnet_info *vi, 4916 struct virtnet_stats_ctx *ctx, 4917 struct virtio_net_ctrl_queue_stats *req, 4918 int req_size, void *reply, int res_size) 4919 { 4920 struct virtio_net_stats_reply_hdr *hdr; 4921 struct scatterlist sgs_in, sgs_out; 4922 void *p; 4923 u32 qid; 4924 int ok; 4925 4926 sg_init_one(&sgs_out, req, req_size); 4927 sg_init_one(&sgs_in, reply, res_size); 4928 4929 ok = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 4930 VIRTIO_NET_CTRL_STATS_GET, 4931 &sgs_out, &sgs_in); 4932 4933 if (!ok) 4934 return ok; 4935 4936 for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) { 4937 hdr = p; 4938 qid = le16_to_cpu(hdr->vq_index); 4939 virtnet_fill_stats(vi, qid, ctx, p, false, hdr->type); 4940 } 4941 4942 return 0; 4943 } 4944 4945 static void virtnet_make_stat_req(struct virtnet_info *vi, 4946 struct virtnet_stats_ctx *ctx, 4947 struct virtio_net_ctrl_queue_stats *req, 4948 int qid, int *idx) 4949 { 4950 int qtype = vq_type(vi, qid); 4951 u64 bitmap = ctx->bitmap[qtype]; 4952 4953 if (!bitmap) 4954 return; 4955 4956 req->stats[*idx].vq_index = cpu_to_le16(qid); 4957 req->stats[*idx].types_bitmap[0] = cpu_to_le64(bitmap); 4958 *idx += 1; 4959 } 4960 4961 /* qid: -1: get stats of all vq. 4962 * > 0: get the stats for the special vq. This must not be cvq. 4963 */ 4964 static int virtnet_get_hw_stats(struct virtnet_info *vi, 4965 struct virtnet_stats_ctx *ctx, int qid) 4966 { 4967 int qnum, i, j, res_size, qtype, last_vq, first_vq; 4968 struct virtio_net_ctrl_queue_stats *req; 4969 bool enable_cvq; 4970 void *reply; 4971 int ok; 4972 4973 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) 4974 return 0; 4975 4976 if (qid == -1) { 4977 last_vq = vi->curr_queue_pairs * 2 - 1; 4978 first_vq = 0; 4979 enable_cvq = true; 4980 } else { 4981 last_vq = qid; 4982 first_vq = qid; 4983 enable_cvq = false; 4984 } 4985 4986 qnum = 0; 4987 res_size = 0; 4988 for (i = first_vq; i <= last_vq ; ++i) { 4989 qtype = vq_type(vi, i); 4990 if (ctx->bitmap[qtype]) { 4991 ++qnum; 4992 res_size += ctx->size[qtype]; 4993 } 4994 } 4995 4996 if (enable_cvq && ctx->bitmap[VIRTNET_Q_TYPE_CQ]) { 4997 res_size += ctx->size[VIRTNET_Q_TYPE_CQ]; 4998 qnum += 1; 4999 } 5000 5001 req = kzalloc_objs(*req, qnum); 5002 if (!req) 5003 return -ENOMEM; 5004 5005 reply = kmalloc(res_size, GFP_KERNEL); 5006 if (!reply) { 5007 kfree(req); 5008 return -ENOMEM; 5009 } 5010 5011 j = 0; 5012 for (i = first_vq; i <= last_vq ; ++i) 5013 virtnet_make_stat_req(vi, ctx, req, i, &j); 5014 5015 if (enable_cvq) 5016 virtnet_make_stat_req(vi, ctx, req, vi->max_queue_pairs * 2, &j); 5017 5018 ok = __virtnet_get_hw_stats(vi, ctx, req, sizeof(*req) * j, reply, res_size); 5019 5020 kfree(req); 5021 kfree(reply); 5022 5023 return ok; 5024 } 5025 5026 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) 5027 { 5028 struct virtnet_info *vi = netdev_priv(dev); 5029 unsigned int i; 5030 u8 *p = data; 5031 5032 switch (stringset) { 5033 case ETH_SS_STATS: 5034 /* Generate the total field names. */ 5035 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, -1, &p); 5036 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, -1, &p); 5037 5038 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_CQ, 0, &p); 5039 5040 for (i = 0; i < vi->curr_queue_pairs; ++i) 5041 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, i, &p); 5042 5043 for (i = 0; i < vi->curr_queue_pairs; ++i) 5044 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, i, &p); 5045 break; 5046 } 5047 } 5048 5049 static int virtnet_get_sset_count(struct net_device *dev, int sset) 5050 { 5051 struct virtnet_info *vi = netdev_priv(dev); 5052 struct virtnet_stats_ctx ctx = {0}; 5053 u32 pair_count; 5054 5055 switch (sset) { 5056 case ETH_SS_STATS: 5057 virtnet_stats_ctx_init(vi, &ctx, NULL, false); 5058 5059 pair_count = ctx.desc_num[VIRTNET_Q_TYPE_RX] + ctx.desc_num[VIRTNET_Q_TYPE_TX]; 5060 5061 return pair_count + ctx.desc_num[VIRTNET_Q_TYPE_CQ] + 5062 vi->curr_queue_pairs * pair_count; 5063 default: 5064 return -EOPNOTSUPP; 5065 } 5066 } 5067 5068 static void virtnet_get_ethtool_stats(struct net_device *dev, 5069 struct ethtool_stats *stats, u64 *data) 5070 { 5071 struct virtnet_info *vi = netdev_priv(dev); 5072 struct virtnet_stats_ctx ctx = {0}; 5073 unsigned int start, i; 5074 const u8 *stats_base; 5075 5076 virtnet_stats_ctx_init(vi, &ctx, data, false); 5077 if (virtnet_get_hw_stats(vi, &ctx, -1)) 5078 dev_warn(&vi->dev->dev, "Failed to get hw stats.\n"); 5079 5080 for (i = 0; i < vi->curr_queue_pairs; i++) { 5081 struct receive_queue *rq = &vi->rq[i]; 5082 struct send_queue *sq = &vi->sq[i]; 5083 5084 stats_base = (const u8 *)&rq->stats; 5085 do { 5086 start = u64_stats_fetch_begin(&rq->stats.syncp); 5087 virtnet_fill_stats(vi, i * 2, &ctx, stats_base, true, 0); 5088 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 5089 5090 stats_base = (const u8 *)&sq->stats; 5091 do { 5092 start = u64_stats_fetch_begin(&sq->stats.syncp); 5093 virtnet_fill_stats(vi, i * 2 + 1, &ctx, stats_base, true, 0); 5094 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 5095 } 5096 5097 virtnet_fill_total_fields(vi, &ctx); 5098 } 5099 5100 static void virtnet_get_channels(struct net_device *dev, 5101 struct ethtool_channels *channels) 5102 { 5103 struct virtnet_info *vi = netdev_priv(dev); 5104 5105 channels->combined_count = vi->curr_queue_pairs; 5106 channels->max_combined = vi->max_queue_pairs; 5107 channels->max_other = 0; 5108 channels->rx_count = 0; 5109 channels->tx_count = 0; 5110 channels->other_count = 0; 5111 } 5112 5113 static int virtnet_set_link_ksettings(struct net_device *dev, 5114 const struct ethtool_link_ksettings *cmd) 5115 { 5116 struct virtnet_info *vi = netdev_priv(dev); 5117 5118 return ethtool_virtdev_set_link_ksettings(dev, cmd, 5119 &vi->speed, &vi->duplex); 5120 } 5121 5122 static int virtnet_get_link_ksettings(struct net_device *dev, 5123 struct ethtool_link_ksettings *cmd) 5124 { 5125 struct virtnet_info *vi = netdev_priv(dev); 5126 5127 cmd->base.speed = vi->speed; 5128 cmd->base.duplex = vi->duplex; 5129 cmd->base.port = PORT_OTHER; 5130 5131 return 0; 5132 } 5133 5134 static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info *vi, 5135 struct ethtool_coalesce *ec) 5136 { 5137 struct virtio_net_ctrl_coal_tx *coal_tx __free(kfree) = NULL; 5138 struct scatterlist sgs_tx; 5139 int i; 5140 5141 coal_tx = kzalloc_obj(*coal_tx); 5142 if (!coal_tx) 5143 return -ENOMEM; 5144 5145 coal_tx->tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); 5146 coal_tx->tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); 5147 sg_init_one(&sgs_tx, coal_tx, sizeof(*coal_tx)); 5148 5149 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 5150 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, 5151 &sgs_tx)) 5152 return -EINVAL; 5153 5154 vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs; 5155 vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames; 5156 for (i = 0; i < vi->max_queue_pairs; i++) { 5157 vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs; 5158 vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames; 5159 } 5160 5161 return 0; 5162 } 5163 5164 static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi, 5165 struct ethtool_coalesce *ec) 5166 { 5167 struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL; 5168 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 5169 struct scatterlist sgs_rx; 5170 int i; 5171 5172 if (rx_ctrl_dim_on && !virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5173 return -EOPNOTSUPP; 5174 5175 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != vi->intr_coal_rx.max_usecs || 5176 ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets)) 5177 return -EINVAL; 5178 5179 if (rx_ctrl_dim_on && !vi->rx_dim_enabled) { 5180 vi->rx_dim_enabled = true; 5181 for (i = 0; i < vi->max_queue_pairs; i++) { 5182 mutex_lock(&vi->rq[i].dim_lock); 5183 vi->rq[i].dim_enabled = true; 5184 mutex_unlock(&vi->rq[i].dim_lock); 5185 } 5186 return 0; 5187 } 5188 5189 coal_rx = kzalloc_obj(*coal_rx); 5190 if (!coal_rx) 5191 return -ENOMEM; 5192 5193 if (!rx_ctrl_dim_on && vi->rx_dim_enabled) { 5194 vi->rx_dim_enabled = false; 5195 for (i = 0; i < vi->max_queue_pairs; i++) { 5196 mutex_lock(&vi->rq[i].dim_lock); 5197 vi->rq[i].dim_enabled = false; 5198 mutex_unlock(&vi->rq[i].dim_lock); 5199 } 5200 } 5201 5202 /* Since the per-queue coalescing params can be set, 5203 * we need apply the global new params even if they 5204 * are not updated. 5205 */ 5206 coal_rx->rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); 5207 coal_rx->rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); 5208 sg_init_one(&sgs_rx, coal_rx, sizeof(*coal_rx)); 5209 5210 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 5211 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, 5212 &sgs_rx)) 5213 return -EINVAL; 5214 5215 vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs; 5216 vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames; 5217 for (i = 0; i < vi->max_queue_pairs; i++) { 5218 mutex_lock(&vi->rq[i].dim_lock); 5219 vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs; 5220 vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames; 5221 mutex_unlock(&vi->rq[i].dim_lock); 5222 } 5223 5224 return 0; 5225 } 5226 5227 static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, 5228 struct ethtool_coalesce *ec) 5229 { 5230 int err; 5231 5232 err = virtnet_send_tx_notf_coal_cmds(vi, ec); 5233 if (err) 5234 return err; 5235 5236 err = virtnet_send_rx_notf_coal_cmds(vi, ec); 5237 if (err) 5238 return err; 5239 5240 return 0; 5241 } 5242 5243 static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi, 5244 struct ethtool_coalesce *ec, 5245 u16 queue) 5246 { 5247 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 5248 u32 max_usecs, max_packets; 5249 bool cur_rx_dim; 5250 int err; 5251 5252 mutex_lock(&vi->rq[queue].dim_lock); 5253 cur_rx_dim = vi->rq[queue].dim_enabled; 5254 max_usecs = vi->rq[queue].intr_coal.max_usecs; 5255 max_packets = vi->rq[queue].intr_coal.max_packets; 5256 5257 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != max_usecs || 5258 ec->rx_max_coalesced_frames != max_packets)) { 5259 mutex_unlock(&vi->rq[queue].dim_lock); 5260 return -EINVAL; 5261 } 5262 5263 if (rx_ctrl_dim_on && !cur_rx_dim) { 5264 vi->rq[queue].dim_enabled = true; 5265 mutex_unlock(&vi->rq[queue].dim_lock); 5266 return 0; 5267 } 5268 5269 if (!rx_ctrl_dim_on && cur_rx_dim) 5270 vi->rq[queue].dim_enabled = false; 5271 5272 /* If no params are updated, userspace ethtool will 5273 * reject the modification. 5274 */ 5275 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue, 5276 ec->rx_coalesce_usecs, 5277 ec->rx_max_coalesced_frames); 5278 mutex_unlock(&vi->rq[queue].dim_lock); 5279 return err; 5280 } 5281 5282 static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi, 5283 struct ethtool_coalesce *ec, 5284 u16 queue) 5285 { 5286 int err; 5287 5288 err = virtnet_send_rx_notf_coal_vq_cmds(vi, ec, queue); 5289 if (err) 5290 return err; 5291 5292 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue, 5293 ec->tx_coalesce_usecs, 5294 ec->tx_max_coalesced_frames); 5295 if (err) 5296 return err; 5297 5298 return 0; 5299 } 5300 5301 static void virtnet_rx_dim_work(struct work_struct *work) 5302 { 5303 struct dim *dim = container_of(work, struct dim, work); 5304 struct receive_queue *rq = container_of(dim, 5305 struct receive_queue, dim); 5306 struct virtnet_info *vi = rq->vq->vdev->priv; 5307 struct net_device *dev = vi->dev; 5308 struct dim_cq_moder update_moder; 5309 int qnum, err; 5310 5311 qnum = rq - vi->rq; 5312 5313 mutex_lock(&rq->dim_lock); 5314 if (!rq->dim_enabled) 5315 goto out; 5316 5317 update_moder = net_dim_get_rx_irq_moder(dev, dim); 5318 if (update_moder.usec != rq->intr_coal.max_usecs || 5319 update_moder.pkts != rq->intr_coal.max_packets) { 5320 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum, 5321 update_moder.usec, 5322 update_moder.pkts); 5323 if (err) 5324 pr_debug("%s: Failed to send dim parameters on rxq%d\n", 5325 dev->name, qnum); 5326 } 5327 out: 5328 dim->state = DIM_START_MEASURE; 5329 mutex_unlock(&rq->dim_lock); 5330 } 5331 5332 static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) 5333 { 5334 /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL 5335 * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated. 5336 */ 5337 if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) 5338 return -EOPNOTSUPP; 5339 5340 if (ec->tx_max_coalesced_frames > 1 || 5341 ec->rx_max_coalesced_frames != 1) 5342 return -EINVAL; 5343 5344 return 0; 5345 } 5346 5347 static int virtnet_should_update_vq_weight(int dev_flags, int weight, 5348 int vq_weight, bool *should_update) 5349 { 5350 if (weight ^ vq_weight) { 5351 if (dev_flags & IFF_UP) 5352 return -EBUSY; 5353 *should_update = true; 5354 } 5355 5356 return 0; 5357 } 5358 5359 static int virtnet_set_coalesce(struct net_device *dev, 5360 struct ethtool_coalesce *ec, 5361 struct kernel_ethtool_coalesce *kernel_coal, 5362 struct netlink_ext_ack *extack) 5363 { 5364 struct virtnet_info *vi = netdev_priv(dev); 5365 int ret, queue_number, napi_weight, i; 5366 bool update_napi = false; 5367 5368 /* Can't change NAPI weight if the link is up */ 5369 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5370 for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) { 5371 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5372 vi->sq[queue_number].napi.weight, 5373 &update_napi); 5374 if (ret) 5375 return ret; 5376 5377 if (update_napi) { 5378 /* All queues that belong to [queue_number, vi->max_queue_pairs] will be 5379 * updated for the sake of simplicity, which might not be necessary 5380 */ 5381 break; 5382 } 5383 } 5384 5385 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) 5386 ret = virtnet_send_notf_coal_cmds(vi, ec); 5387 else 5388 ret = virtnet_coal_params_supported(ec); 5389 5390 if (ret) 5391 return ret; 5392 5393 if (update_napi) { 5394 /* xsk xmit depends on the tx napi. So if xsk is active, 5395 * prevent modifications to tx napi. 5396 */ 5397 for (i = queue_number; i < vi->max_queue_pairs; i++) { 5398 if (vi->sq[i].xsk_pool) 5399 return -EBUSY; 5400 } 5401 5402 for (; queue_number < vi->max_queue_pairs; queue_number++) 5403 vi->sq[queue_number].napi.weight = napi_weight; 5404 } 5405 5406 return ret; 5407 } 5408 5409 static int virtnet_get_coalesce(struct net_device *dev, 5410 struct ethtool_coalesce *ec, 5411 struct kernel_ethtool_coalesce *kernel_coal, 5412 struct netlink_ext_ack *extack) 5413 { 5414 struct virtnet_info *vi = netdev_priv(dev); 5415 5416 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 5417 ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs; 5418 ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs; 5419 ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets; 5420 ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets; 5421 ec->use_adaptive_rx_coalesce = vi->rx_dim_enabled; 5422 } else { 5423 ec->rx_max_coalesced_frames = 1; 5424 5425 if (vi->sq[0].napi.weight) 5426 ec->tx_max_coalesced_frames = 1; 5427 } 5428 5429 return 0; 5430 } 5431 5432 static int virtnet_set_per_queue_coalesce(struct net_device *dev, 5433 u32 queue, 5434 struct ethtool_coalesce *ec) 5435 { 5436 struct virtnet_info *vi = netdev_priv(dev); 5437 int ret, napi_weight; 5438 bool update_napi = false; 5439 5440 if (queue >= vi->max_queue_pairs) 5441 return -EINVAL; 5442 5443 /* Can't change NAPI weight if the link is up */ 5444 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5445 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5446 vi->sq[queue].napi.weight, 5447 &update_napi); 5448 if (ret) 5449 return ret; 5450 5451 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5452 ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue); 5453 else 5454 ret = virtnet_coal_params_supported(ec); 5455 5456 if (ret) 5457 return ret; 5458 5459 if (update_napi) 5460 vi->sq[queue].napi.weight = napi_weight; 5461 5462 return 0; 5463 } 5464 5465 static int virtnet_get_per_queue_coalesce(struct net_device *dev, 5466 u32 queue, 5467 struct ethtool_coalesce *ec) 5468 { 5469 struct virtnet_info *vi = netdev_priv(dev); 5470 5471 if (queue >= vi->max_queue_pairs) 5472 return -EINVAL; 5473 5474 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 5475 mutex_lock(&vi->rq[queue].dim_lock); 5476 ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs; 5477 ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs; 5478 ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets; 5479 ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets; 5480 ec->use_adaptive_rx_coalesce = vi->rq[queue].dim_enabled; 5481 mutex_unlock(&vi->rq[queue].dim_lock); 5482 } else { 5483 ec->rx_max_coalesced_frames = 1; 5484 5485 if (vi->sq[queue].napi.weight) 5486 ec->tx_max_coalesced_frames = 1; 5487 } 5488 5489 return 0; 5490 } 5491 5492 static void virtnet_init_settings(struct net_device *dev) 5493 { 5494 struct virtnet_info *vi = netdev_priv(dev); 5495 5496 vi->speed = SPEED_UNKNOWN; 5497 vi->duplex = DUPLEX_UNKNOWN; 5498 } 5499 5500 static u32 virtnet_get_rxfh_key_size(struct net_device *dev) 5501 { 5502 return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size; 5503 } 5504 5505 static u32 virtnet_get_rxfh_indir_size(struct net_device *dev) 5506 { 5507 return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size; 5508 } 5509 5510 static int virtnet_get_rxfh(struct net_device *dev, 5511 struct ethtool_rxfh_param *rxfh) 5512 { 5513 struct virtnet_info *vi = netdev_priv(dev); 5514 int i; 5515 5516 if (rxfh->indir) { 5517 for (i = 0; i < vi->rss_indir_table_size; ++i) 5518 rxfh->indir[i] = le16_to_cpu(vi->rss_hdr->indirection_table[i]); 5519 } 5520 5521 if (rxfh->key) 5522 memcpy(rxfh->key, vi->rss_hash_key_data, vi->rss_key_size); 5523 5524 rxfh->hfunc = ETH_RSS_HASH_TOP; 5525 5526 return 0; 5527 } 5528 5529 static int virtnet_set_rxfh(struct net_device *dev, 5530 struct ethtool_rxfh_param *rxfh, 5531 struct netlink_ext_ack *extack) 5532 { 5533 struct virtnet_info *vi = netdev_priv(dev); 5534 bool update = false; 5535 int i; 5536 5537 if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && 5538 rxfh->hfunc != ETH_RSS_HASH_TOP) 5539 return -EOPNOTSUPP; 5540 5541 if (rxfh->indir) { 5542 if (!vi->has_rss) 5543 return -EOPNOTSUPP; 5544 5545 for (i = 0; i < vi->rss_indir_table_size; ++i) 5546 vi->rss_hdr->indirection_table[i] = cpu_to_le16(rxfh->indir[i]); 5547 update = true; 5548 } 5549 5550 if (rxfh->key) { 5551 /* If either _F_HASH_REPORT or _F_RSS are negotiated, the 5552 * device provides hash calculation capabilities, that is, 5553 * hash_key is configured. 5554 */ 5555 if (!vi->has_rss && !vi->has_rss_hash_report) 5556 return -EOPNOTSUPP; 5557 5558 memcpy(vi->rss_hash_key_data, rxfh->key, vi->rss_key_size); 5559 update = true; 5560 } 5561 5562 if (update) 5563 virtnet_commit_rss_command(vi); 5564 5565 return 0; 5566 } 5567 5568 static u32 virtnet_get_rx_ring_count(struct net_device *dev) 5569 { 5570 struct virtnet_info *vi = netdev_priv(dev); 5571 5572 return vi->curr_queue_pairs; 5573 } 5574 5575 static const struct ethtool_ops virtnet_ethtool_ops = { 5576 .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | 5577 ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX, 5578 .get_drvinfo = virtnet_get_drvinfo, 5579 .get_link = ethtool_op_get_link, 5580 .get_ringparam = virtnet_get_ringparam, 5581 .set_ringparam = virtnet_set_ringparam, 5582 .get_strings = virtnet_get_strings, 5583 .get_sset_count = virtnet_get_sset_count, 5584 .get_ethtool_stats = virtnet_get_ethtool_stats, 5585 .set_channels = virtnet_set_channels, 5586 .get_channels = virtnet_get_channels, 5587 .get_ts_info = ethtool_op_get_ts_info, 5588 .get_link_ksettings = virtnet_get_link_ksettings, 5589 .set_link_ksettings = virtnet_set_link_ksettings, 5590 .set_coalesce = virtnet_set_coalesce, 5591 .get_coalesce = virtnet_get_coalesce, 5592 .set_per_queue_coalesce = virtnet_set_per_queue_coalesce, 5593 .get_per_queue_coalesce = virtnet_get_per_queue_coalesce, 5594 .get_rxfh_key_size = virtnet_get_rxfh_key_size, 5595 .get_rxfh_indir_size = virtnet_get_rxfh_indir_size, 5596 .get_rxfh = virtnet_get_rxfh, 5597 .set_rxfh = virtnet_set_rxfh, 5598 .get_rxfh_fields = virtnet_get_hashflow, 5599 .set_rxfh_fields = virtnet_set_hashflow, 5600 .get_rx_ring_count = virtnet_get_rx_ring_count, 5601 }; 5602 5603 static void virtnet_get_queue_stats_rx(struct net_device *dev, int i, 5604 struct netdev_queue_stats_rx *stats) 5605 { 5606 struct virtnet_info *vi = netdev_priv(dev); 5607 struct receive_queue *rq = &vi->rq[i]; 5608 struct virtnet_stats_ctx ctx = {0}; 5609 5610 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5611 5612 virtnet_get_hw_stats(vi, &ctx, i * 2); 5613 virtnet_fill_stats(vi, i * 2, &ctx, (void *)&rq->stats, true, 0); 5614 } 5615 5616 static void virtnet_get_queue_stats_tx(struct net_device *dev, int i, 5617 struct netdev_queue_stats_tx *stats) 5618 { 5619 struct virtnet_info *vi = netdev_priv(dev); 5620 struct send_queue *sq = &vi->sq[i]; 5621 struct virtnet_stats_ctx ctx = {0}; 5622 5623 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5624 5625 virtnet_get_hw_stats(vi, &ctx, i * 2 + 1); 5626 virtnet_fill_stats(vi, i * 2 + 1, &ctx, (void *)&sq->stats, true, 0); 5627 } 5628 5629 static void virtnet_get_base_stats(struct net_device *dev, 5630 struct netdev_queue_stats_rx *rx, 5631 struct netdev_queue_stats_tx *tx) 5632 { 5633 struct virtnet_info *vi = netdev_priv(dev); 5634 5635 /* The queue stats of the virtio-net will not be reset. So here we 5636 * return 0. 5637 */ 5638 rx->bytes = 0; 5639 rx->packets = 0; 5640 5641 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 5642 rx->hw_drops = 0; 5643 rx->hw_drop_overruns = 0; 5644 } 5645 5646 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 5647 rx->csum_unnecessary = 0; 5648 rx->csum_none = 0; 5649 rx->csum_bad = 0; 5650 } 5651 5652 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 5653 rx->hw_gro_packets = 0; 5654 rx->hw_gro_bytes = 0; 5655 rx->hw_gro_wire_packets = 0; 5656 rx->hw_gro_wire_bytes = 0; 5657 } 5658 5659 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) 5660 rx->hw_drop_ratelimits = 0; 5661 5662 tx->bytes = 0; 5663 tx->packets = 0; 5664 tx->stop = 0; 5665 tx->wake = 0; 5666 5667 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 5668 tx->hw_drops = 0; 5669 tx->hw_drop_errors = 0; 5670 } 5671 5672 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 5673 tx->csum_none = 0; 5674 tx->needs_csum = 0; 5675 } 5676 5677 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 5678 tx->hw_gso_packets = 0; 5679 tx->hw_gso_bytes = 0; 5680 tx->hw_gso_wire_packets = 0; 5681 tx->hw_gso_wire_bytes = 0; 5682 } 5683 5684 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) 5685 tx->hw_drop_ratelimits = 0; 5686 5687 netdev_stat_queue_sum(dev, 5688 dev->real_num_rx_queues, vi->max_queue_pairs, rx, 5689 dev->real_num_tx_queues, vi->max_queue_pairs, tx); 5690 } 5691 5692 static const struct netdev_stat_ops virtnet_stat_ops = { 5693 .get_queue_stats_rx = virtnet_get_queue_stats_rx, 5694 .get_queue_stats_tx = virtnet_get_queue_stats_tx, 5695 .get_base_stats = virtnet_get_base_stats, 5696 }; 5697 5698 static void virtnet_freeze_down(struct virtio_device *vdev) 5699 { 5700 struct virtnet_info *vi = vdev->priv; 5701 5702 /* Make sure no work handler is accessing the device */ 5703 flush_work(&vi->config_work); 5704 disable_rx_mode_work(vi); 5705 flush_work(&vi->rx_mode_work); 5706 5707 if (netif_running(vi->dev)) { 5708 rtnl_lock(); 5709 virtnet_close(vi->dev); 5710 rtnl_unlock(); 5711 } 5712 5713 netif_tx_lock_bh(vi->dev); 5714 netif_device_detach(vi->dev); 5715 netif_tx_unlock_bh(vi->dev); 5716 } 5717 5718 static int init_vqs(struct virtnet_info *vi); 5719 5720 static int virtnet_restore_up(struct virtio_device *vdev) 5721 { 5722 struct virtnet_info *vi = vdev->priv; 5723 int err; 5724 5725 err = init_vqs(vi); 5726 if (err) 5727 return err; 5728 5729 err = virtnet_create_page_pools(vi); 5730 if (err) 5731 goto err_del_vqs; 5732 5733 virtio_device_ready(vdev); 5734 5735 enable_rx_mode_work(vi); 5736 5737 if (netif_running(vi->dev)) { 5738 rtnl_lock(); 5739 err = virtnet_open(vi->dev); 5740 rtnl_unlock(); 5741 if (err) 5742 goto err_destroy_pools; 5743 } 5744 5745 netif_tx_lock_bh(vi->dev); 5746 netif_device_attach(vi->dev); 5747 netif_tx_unlock_bh(vi->dev); 5748 return 0; 5749 5750 err_destroy_pools: 5751 virtio_reset_device(vdev); 5752 free_unused_bufs(vi); 5753 virtnet_destroy_page_pools(vi); 5754 virtnet_del_vqs(vi); 5755 return err; 5756 5757 err_del_vqs: 5758 virtio_reset_device(vdev); 5759 virtnet_del_vqs(vi); 5760 return err; 5761 } 5762 5763 static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) 5764 { 5765 __virtio64 *_offloads __free(kfree) = NULL; 5766 struct scatterlist sg; 5767 5768 _offloads = kzalloc_obj(*_offloads); 5769 if (!_offloads) 5770 return -ENOMEM; 5771 5772 *_offloads = cpu_to_virtio64(vi->vdev, offloads); 5773 5774 sg_init_one(&sg, _offloads, sizeof(*_offloads)); 5775 5776 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, 5777 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) { 5778 dev_warn(&vi->dev->dev, "Fail to set guest offload.\n"); 5779 return -EINVAL; 5780 } 5781 5782 return 0; 5783 } 5784 5785 static int virtnet_clear_guest_offloads(struct virtnet_info *vi) 5786 { 5787 u64 offloads = 0; 5788 5789 if (!vi->guest_offloads) 5790 return 0; 5791 5792 return virtnet_set_guest_offloads(vi, offloads); 5793 } 5794 5795 static int virtnet_restore_guest_offloads(struct virtnet_info *vi) 5796 { 5797 u64 offloads = vi->guest_offloads; 5798 5799 if (!vi->guest_offloads) 5800 return 0; 5801 5802 return virtnet_set_guest_offloads(vi, offloads); 5803 } 5804 5805 static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queue *rq, 5806 struct xsk_buff_pool *pool) 5807 { 5808 int err, qindex; 5809 5810 qindex = rq - vi->rq; 5811 5812 if (pool) { 5813 err = xdp_rxq_info_reg(&rq->xsk_rxq_info, vi->dev, qindex, rq->napi.napi_id); 5814 if (err < 0) 5815 return err; 5816 5817 err = xdp_rxq_info_reg_mem_model(&rq->xsk_rxq_info, 5818 MEM_TYPE_XSK_BUFF_POOL, NULL); 5819 if (err < 0) 5820 goto unreg; 5821 5822 xsk_pool_set_rxq_info(pool, &rq->xsk_rxq_info); 5823 } 5824 5825 virtnet_rx_pause(vi, rq); 5826 5827 err = virtqueue_reset(rq->vq, virtnet_rq_unmap_free_buf, NULL); 5828 if (err) { 5829 netdev_err(vi->dev, "reset rx fail: rx queue index: %d err: %d\n", qindex, err); 5830 5831 pool = NULL; 5832 } 5833 5834 rq->xsk_pool = pool; 5835 5836 virtnet_rx_resume(vi, rq, true); 5837 5838 if (pool) 5839 return 0; 5840 5841 unreg: 5842 xdp_rxq_info_unreg(&rq->xsk_rxq_info); 5843 return err; 5844 } 5845 5846 static int virtnet_sq_bind_xsk_pool(struct virtnet_info *vi, 5847 struct send_queue *sq, 5848 struct xsk_buff_pool *pool) 5849 { 5850 int err, qindex; 5851 5852 qindex = sq - vi->sq; 5853 5854 virtnet_tx_pause(vi, sq); 5855 5856 err = virtqueue_reset(sq->vq, virtnet_sq_free_unused_buf, 5857 virtnet_sq_free_unused_buf_done); 5858 if (err) { 5859 netdev_err(vi->dev, "reset tx fail: tx queue index: %d err: %d\n", qindex, err); 5860 pool = NULL; 5861 } 5862 5863 sq->xsk_pool = pool; 5864 5865 virtnet_tx_resume(vi, sq); 5866 5867 return err; 5868 } 5869 5870 static int virtnet_xsk_pool_enable(struct net_device *dev, 5871 struct xsk_buff_pool *pool, 5872 u16 qid) 5873 { 5874 struct virtnet_info *vi = netdev_priv(dev); 5875 struct receive_queue *rq; 5876 struct device *dma_dev; 5877 struct send_queue *sq; 5878 dma_addr_t hdr_dma; 5879 int err, size; 5880 5881 if (vi->hdr_len > xsk_pool_get_headroom(pool)) 5882 return -EINVAL; 5883 5884 /* In big_packets mode, xdp cannot work, so there is no need to 5885 * initialize xsk of rq. 5886 */ 5887 if (!vi->rq[qid].page_pool) 5888 return -ENOENT; 5889 5890 if (qid >= vi->curr_queue_pairs) 5891 return -EINVAL; 5892 5893 sq = &vi->sq[qid]; 5894 rq = &vi->rq[qid]; 5895 5896 /* xsk assumes that tx and rx must have the same dma device. The af-xdp 5897 * may use one buffer to receive from the rx and reuse this buffer to 5898 * send by the tx. So the dma dev of sq and rq must be the same one. 5899 * 5900 * But vq->dma_dev allows every vq has the respective dma dev. So I 5901 * check the dma dev of vq and sq is the same dev. 5902 */ 5903 if (virtqueue_dma_dev(rq->vq) != virtqueue_dma_dev(sq->vq)) 5904 return -EINVAL; 5905 5906 dma_dev = virtqueue_dma_dev(rq->vq); 5907 if (!dma_dev) 5908 return -EINVAL; 5909 5910 size = virtqueue_get_vring_size(rq->vq); 5911 5912 rq->xsk_buffs = kvzalloc_objs(*rq->xsk_buffs, size); 5913 if (!rq->xsk_buffs) 5914 return -ENOMEM; 5915 5916 hdr_dma = virtqueue_map_single_attrs(sq->vq, &xsk_hdr, vi->hdr_len, 5917 DMA_TO_DEVICE, 0); 5918 if (virtqueue_map_mapping_error(sq->vq, hdr_dma)) { 5919 err = -ENOMEM; 5920 goto err_free_buffs; 5921 } 5922 5923 err = xsk_pool_dma_map(pool, dma_dev, 0); 5924 if (err) 5925 goto err_xsk_map; 5926 5927 err = virtnet_rq_bind_xsk_pool(vi, rq, pool); 5928 if (err) 5929 goto err_rq; 5930 5931 err = virtnet_sq_bind_xsk_pool(vi, sq, pool); 5932 if (err) 5933 goto err_sq; 5934 5935 /* Now, we do not support tx offload(such as tx csum), so all the tx 5936 * virtnet hdr is zero. So all the tx packets can share a single hdr. 5937 */ 5938 sq->xsk_hdr_dma_addr = hdr_dma; 5939 5940 return 0; 5941 5942 err_sq: 5943 virtnet_rq_bind_xsk_pool(vi, rq, NULL); 5944 err_rq: 5945 xsk_pool_dma_unmap(pool, 0); 5946 err_xsk_map: 5947 virtqueue_unmap_single_attrs(rq->vq, hdr_dma, vi->hdr_len, 5948 DMA_TO_DEVICE, 0); 5949 err_free_buffs: 5950 kvfree(rq->xsk_buffs); 5951 return err; 5952 } 5953 5954 static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid) 5955 { 5956 struct virtnet_info *vi = netdev_priv(dev); 5957 struct xsk_buff_pool *pool; 5958 struct receive_queue *rq; 5959 struct send_queue *sq; 5960 int err; 5961 5962 if (qid >= vi->curr_queue_pairs) 5963 return -EINVAL; 5964 5965 sq = &vi->sq[qid]; 5966 rq = &vi->rq[qid]; 5967 5968 pool = rq->xsk_pool; 5969 5970 err = virtnet_rq_bind_xsk_pool(vi, rq, NULL); 5971 err |= virtnet_sq_bind_xsk_pool(vi, sq, NULL); 5972 5973 xsk_pool_dma_unmap(pool, 0); 5974 5975 virtqueue_unmap_single_attrs(sq->vq, sq->xsk_hdr_dma_addr, 5976 vi->hdr_len, DMA_TO_DEVICE, 0); 5977 kvfree(rq->xsk_buffs); 5978 5979 return err; 5980 } 5981 5982 static int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp) 5983 { 5984 if (xdp->xsk.pool) 5985 return virtnet_xsk_pool_enable(dev, xdp->xsk.pool, 5986 xdp->xsk.queue_id); 5987 else 5988 return virtnet_xsk_pool_disable(dev, xdp->xsk.queue_id); 5989 } 5990 5991 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, 5992 struct netlink_ext_ack *extack) 5993 { 5994 unsigned int room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 5995 sizeof(struct skb_shared_info)); 5996 unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; 5997 struct virtnet_info *vi = netdev_priv(dev); 5998 struct bpf_prog *old_prog; 5999 u16 xdp_qp = 0, curr_qp; 6000 int i, err; 6001 6002 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) 6003 && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 6004 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 6005 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 6006 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 6007 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) || 6008 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) || 6009 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) { 6010 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); 6011 return -EOPNOTSUPP; 6012 } 6013 6014 if (vi->mergeable_rx_bufs && !vi->any_header_sg) { 6015 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required"); 6016 return -EINVAL; 6017 } 6018 6019 if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { 6020 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags"); 6021 netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz); 6022 return -EINVAL; 6023 } 6024 6025 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; 6026 if (prog) 6027 xdp_qp = nr_cpu_ids; 6028 6029 /* XDP requires extra queues for XDP_TX */ 6030 if (curr_qp + xdp_qp > vi->max_queue_pairs) { 6031 netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", 6032 curr_qp + xdp_qp, vi->max_queue_pairs); 6033 xdp_qp = 0; 6034 } 6035 6036 old_prog = rtnl_dereference(vi->rq[0].xdp_prog); 6037 if (!prog && !old_prog) 6038 return 0; 6039 6040 if (prog) 6041 bpf_prog_add(prog, vi->max_queue_pairs - 1); 6042 6043 virtnet_rx_pause_all(vi); 6044 6045 /* Make sure NAPI is not using any XDP TX queues for RX. */ 6046 if (netif_running(dev)) { 6047 for (i = 0; i < vi->max_queue_pairs; i++) 6048 virtnet_napi_tx_disable(&vi->sq[i]); 6049 } 6050 6051 if (!prog) { 6052 for (i = 0; i < vi->max_queue_pairs; i++) { 6053 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 6054 if (i == 0) 6055 virtnet_restore_guest_offloads(vi); 6056 } 6057 synchronize_net(); 6058 } 6059 6060 err = virtnet_set_queues(vi, curr_qp + xdp_qp); 6061 if (err) 6062 goto err; 6063 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); 6064 vi->xdp_queue_pairs = xdp_qp; 6065 6066 if (prog) { 6067 vi->xdp_enabled = true; 6068 for (i = 0; i < vi->max_queue_pairs; i++) { 6069 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 6070 if (i == 0 && !old_prog) 6071 virtnet_clear_guest_offloads(vi); 6072 } 6073 if (!old_prog) 6074 xdp_features_set_redirect_target(dev, true); 6075 } else { 6076 xdp_features_clear_redirect_target(dev); 6077 vi->xdp_enabled = false; 6078 } 6079 6080 virtnet_rx_resume_all(vi); 6081 for (i = 0; i < vi->max_queue_pairs; i++) { 6082 if (old_prog) 6083 bpf_prog_put(old_prog); 6084 if (netif_running(dev)) 6085 virtnet_napi_tx_enable(&vi->sq[i]); 6086 } 6087 6088 return 0; 6089 6090 err: 6091 if (!prog) { 6092 virtnet_clear_guest_offloads(vi); 6093 for (i = 0; i < vi->max_queue_pairs; i++) 6094 rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); 6095 } 6096 6097 virtnet_rx_resume_all(vi); 6098 if (netif_running(dev)) { 6099 for (i = 0; i < vi->max_queue_pairs; i++) 6100 virtnet_napi_tx_enable(&vi->sq[i]); 6101 } 6102 if (prog) 6103 bpf_prog_sub(prog, vi->max_queue_pairs - 1); 6104 return err; 6105 } 6106 6107 static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) 6108 { 6109 switch (xdp->command) { 6110 case XDP_SETUP_PROG: 6111 return virtnet_xdp_set(dev, xdp->prog, xdp->extack); 6112 case XDP_SETUP_XSK_POOL: 6113 return virtnet_xsk_pool_setup(dev, xdp); 6114 default: 6115 return -EINVAL; 6116 } 6117 } 6118 6119 static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, 6120 size_t len) 6121 { 6122 struct virtnet_info *vi = netdev_priv(dev); 6123 int ret; 6124 6125 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 6126 return -EOPNOTSUPP; 6127 6128 ret = snprintf(buf, len, "sby"); 6129 if (ret >= len) 6130 return -EOPNOTSUPP; 6131 6132 return 0; 6133 } 6134 6135 static int virtnet_set_features(struct net_device *dev, 6136 netdev_features_t features) 6137 { 6138 struct virtnet_info *vi = netdev_priv(dev); 6139 u64 offloads; 6140 int err; 6141 6142 if ((dev->features ^ features) & NETIF_F_GRO_HW) { 6143 if (vi->xdp_enabled) 6144 return -EBUSY; 6145 6146 if (features & NETIF_F_GRO_HW) 6147 offloads = vi->guest_offloads_capable; 6148 else 6149 offloads = vi->guest_offloads_capable & 6150 ~GUEST_OFFLOAD_GRO_HW_MASK; 6151 6152 err = virtnet_set_guest_offloads(vi, offloads); 6153 if (err) 6154 return err; 6155 vi->guest_offloads = offloads; 6156 } 6157 6158 if ((dev->features ^ features) & NETIF_F_RXHASH) { 6159 if (features & NETIF_F_RXHASH) 6160 vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_saved); 6161 else 6162 vi->rss_hdr->hash_types = cpu_to_le32(VIRTIO_NET_HASH_REPORT_NONE); 6163 6164 if (!virtnet_commit_rss_command(vi)) 6165 return -EINVAL; 6166 } 6167 6168 return 0; 6169 } 6170 6171 static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) 6172 { 6173 struct virtnet_info *priv = netdev_priv(dev); 6174 struct send_queue *sq = &priv->sq[txqueue]; 6175 struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue); 6176 6177 u64_stats_update_begin(&sq->stats.syncp); 6178 u64_stats_inc(&sq->stats.tx_timeouts); 6179 u64_stats_update_end(&sq->stats.syncp); 6180 6181 netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n", 6182 txqueue, sq->name, sq->vq->index, sq->vq->name, 6183 jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start))); 6184 } 6185 6186 static int virtnet_init_irq_moder(struct virtnet_info *vi) 6187 { 6188 u8 profile_flags = 0, coal_flags = 0; 6189 int ret, i; 6190 6191 profile_flags |= DIM_PROFILE_RX; 6192 coal_flags |= DIM_COALESCE_USEC | DIM_COALESCE_PKTS; 6193 ret = net_dim_init_irq_moder(vi->dev, profile_flags, coal_flags, 6194 DIM_CQ_PERIOD_MODE_START_FROM_EQE, 6195 0, virtnet_rx_dim_work, NULL); 6196 6197 if (ret) 6198 return ret; 6199 6200 for (i = 0; i < vi->max_queue_pairs; i++) 6201 net_dim_setting(vi->dev, &vi->rq[i].dim, false); 6202 6203 return 0; 6204 } 6205 6206 static void virtnet_free_irq_moder(struct virtnet_info *vi) 6207 { 6208 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 6209 return; 6210 6211 rtnl_lock(); 6212 net_dim_free_irq_moder(vi->dev); 6213 rtnl_unlock(); 6214 } 6215 6216 static const struct net_device_ops virtnet_netdev = { 6217 .ndo_open = virtnet_open, 6218 .ndo_stop = virtnet_close, 6219 .ndo_start_xmit = start_xmit, 6220 .ndo_validate_addr = eth_validate_addr, 6221 .ndo_set_mac_address = virtnet_set_mac_address, 6222 .ndo_set_rx_mode = virtnet_set_rx_mode, 6223 .ndo_get_stats64 = virtnet_stats, 6224 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, 6225 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, 6226 .ndo_bpf = virtnet_xdp, 6227 .ndo_xdp_xmit = virtnet_xdp_xmit, 6228 .ndo_xsk_wakeup = virtnet_xsk_wakeup, 6229 .ndo_features_check = passthru_features_check, 6230 .ndo_get_phys_port_name = virtnet_get_phys_port_name, 6231 .ndo_set_features = virtnet_set_features, 6232 .ndo_tx_timeout = virtnet_tx_timeout, 6233 }; 6234 6235 static void virtnet_config_changed_work(struct work_struct *work) 6236 { 6237 struct virtnet_info *vi = 6238 container_of(work, struct virtnet_info, config_work); 6239 u16 v; 6240 6241 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, 6242 struct virtio_net_config, status, &v) < 0) 6243 return; 6244 6245 if (v & VIRTIO_NET_S_ANNOUNCE) { 6246 netdev_notify_peers(vi->dev); 6247 virtnet_ack_link_announce(vi); 6248 } 6249 6250 /* Ignore unknown (future) status bits */ 6251 v &= VIRTIO_NET_S_LINK_UP; 6252 6253 if (vi->status == v) 6254 return; 6255 6256 vi->status = v; 6257 6258 if (vi->status & VIRTIO_NET_S_LINK_UP) { 6259 virtnet_update_settings(vi); 6260 netif_carrier_on(vi->dev); 6261 netif_tx_wake_all_queues(vi->dev); 6262 } else { 6263 netif_carrier_off(vi->dev); 6264 netif_tx_stop_all_queues(vi->dev); 6265 } 6266 } 6267 6268 static void virtnet_config_changed(struct virtio_device *vdev) 6269 { 6270 struct virtnet_info *vi = vdev->priv; 6271 6272 schedule_work(&vi->config_work); 6273 } 6274 6275 static void virtnet_free_queues(struct virtnet_info *vi) 6276 { 6277 int i; 6278 6279 for (i = 0; i < vi->max_queue_pairs; i++) { 6280 __netif_napi_del(&vi->rq[i].napi); 6281 __netif_napi_del(&vi->sq[i].napi); 6282 } 6283 6284 /* We called __netif_napi_del(), 6285 * we need to respect an RCU grace period before freeing vi->rq 6286 */ 6287 synchronize_net(); 6288 6289 kfree(vi->rq); 6290 kfree(vi->sq); 6291 kfree(vi->ctrl); 6292 } 6293 6294 static void _free_receive_bufs(struct virtnet_info *vi) 6295 { 6296 struct bpf_prog *old_prog; 6297 int i; 6298 6299 for (i = 0; i < vi->max_queue_pairs; i++) { 6300 while (vi->rq[i].pages) 6301 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); 6302 6303 old_prog = rtnl_dereference(vi->rq[i].xdp_prog); 6304 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); 6305 if (old_prog) 6306 bpf_prog_put(old_prog); 6307 } 6308 } 6309 6310 static void free_receive_bufs(struct virtnet_info *vi) 6311 { 6312 rtnl_lock(); 6313 _free_receive_bufs(vi); 6314 rtnl_unlock(); 6315 } 6316 6317 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) 6318 { 6319 struct virtnet_info *vi = vq->vdev->priv; 6320 struct send_queue *sq; 6321 int i = vq2txq(vq); 6322 6323 sq = &vi->sq[i]; 6324 6325 switch (virtnet_xmit_ptr_unpack(&buf)) { 6326 case VIRTNET_XMIT_TYPE_SKB: 6327 case VIRTNET_XMIT_TYPE_SKB_ORPHAN: 6328 dev_kfree_skb(buf); 6329 break; 6330 6331 case VIRTNET_XMIT_TYPE_XDP: 6332 xdp_return_frame(buf); 6333 break; 6334 6335 case VIRTNET_XMIT_TYPE_XSK: 6336 xsk_tx_completed(sq->xsk_pool, 1); 6337 break; 6338 } 6339 } 6340 6341 static void virtnet_sq_free_unused_buf_done(struct virtqueue *vq) 6342 { 6343 struct virtnet_info *vi = vq->vdev->priv; 6344 int i = vq2txq(vq); 6345 6346 netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, i)); 6347 } 6348 6349 static void free_unused_bufs(struct virtnet_info *vi) 6350 { 6351 void *buf; 6352 int i; 6353 6354 for (i = 0; i < vi->max_queue_pairs; i++) { 6355 struct virtqueue *vq = vi->sq[i].vq; 6356 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 6357 virtnet_sq_free_unused_buf(vq, buf); 6358 cond_resched(); 6359 } 6360 6361 for (i = 0; i < vi->max_queue_pairs; i++) { 6362 struct virtqueue *vq = vi->rq[i].vq; 6363 6364 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 6365 virtnet_rq_unmap_free_buf(vq, buf); 6366 cond_resched(); 6367 } 6368 } 6369 6370 static void virtnet_del_vqs(struct virtnet_info *vi) 6371 { 6372 struct virtio_device *vdev = vi->vdev; 6373 6374 virtnet_clean_affinity(vi); 6375 6376 vdev->config->del_vqs(vdev); 6377 6378 virtnet_free_queues(vi); 6379 } 6380 6381 /* How large should a single buffer be so a queue full of these can fit at 6382 * least one full packet? 6383 * Logic below assumes the mergeable buffer header is used. 6384 */ 6385 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) 6386 { 6387 const unsigned int hdr_len = vi->hdr_len; 6388 unsigned int rq_size = virtqueue_get_vring_size(vq); 6389 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; 6390 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; 6391 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size); 6392 6393 return max(max(min_buf_len, hdr_len) - hdr_len, 6394 (unsigned int)GOOD_PACKET_LEN); 6395 } 6396 6397 static int virtnet_find_vqs(struct virtnet_info *vi) 6398 { 6399 struct virtqueue_info *vqs_info; 6400 struct virtqueue **vqs; 6401 int ret = -ENOMEM; 6402 int total_vqs; 6403 bool *ctx; 6404 u16 i; 6405 6406 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by 6407 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by 6408 * possible control vq. 6409 */ 6410 total_vqs = vi->max_queue_pairs * 2 + 6411 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); 6412 6413 /* Allocate space for find_vqs parameters */ 6414 vqs = kzalloc_objs(*vqs, total_vqs); 6415 if (!vqs) 6416 goto err_vq; 6417 vqs_info = kzalloc_objs(*vqs_info, total_vqs); 6418 if (!vqs_info) 6419 goto err_vqs_info; 6420 if (vi->mergeable_rx_bufs || !vi->big_packets) { 6421 ctx = kzalloc_objs(*ctx, total_vqs); 6422 if (!ctx) 6423 goto err_ctx; 6424 } else { 6425 ctx = NULL; 6426 } 6427 6428 /* Parameters for control virtqueue, if any */ 6429 if (vi->has_cvq) { 6430 vqs_info[total_vqs - 1].name = "control"; 6431 } 6432 6433 /* Allocate/initialize parameters for send/receive virtqueues */ 6434 for (i = 0; i < vi->max_queue_pairs; i++) { 6435 vqs_info[rxq2vq(i)].callback = skb_recv_done; 6436 vqs_info[txq2vq(i)].callback = skb_xmit_done; 6437 sprintf(vi->rq[i].name, "input.%u", i); 6438 sprintf(vi->sq[i].name, "output.%u", i); 6439 vqs_info[rxq2vq(i)].name = vi->rq[i].name; 6440 vqs_info[txq2vq(i)].name = vi->sq[i].name; 6441 if (ctx) 6442 vqs_info[rxq2vq(i)].ctx = true; 6443 } 6444 6445 ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, vqs_info, NULL); 6446 if (ret) 6447 goto err_find; 6448 6449 if (vi->has_cvq) { 6450 vi->cvq = vqs[total_vqs - 1]; 6451 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) 6452 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 6453 } 6454 6455 for (i = 0; i < vi->max_queue_pairs; i++) { 6456 vi->rq[i].vq = vqs[rxq2vq(i)]; 6457 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq); 6458 vi->sq[i].vq = vqs[txq2vq(i)]; 6459 } 6460 /* run here: ret == 0. */ 6461 6462 err_find: 6463 kfree(ctx); 6464 err_ctx: 6465 kfree(vqs_info); 6466 err_vqs_info: 6467 kfree(vqs); 6468 err_vq: 6469 return ret; 6470 } 6471 6472 static int virtnet_alloc_queues(struct virtnet_info *vi) 6473 { 6474 int i; 6475 6476 if (vi->has_cvq) { 6477 vi->ctrl = kzalloc_obj(*vi->ctrl); 6478 if (!vi->ctrl) 6479 goto err_ctrl; 6480 } else { 6481 vi->ctrl = NULL; 6482 } 6483 vi->sq = kzalloc_objs(*vi->sq, vi->max_queue_pairs); 6484 if (!vi->sq) 6485 goto err_sq; 6486 vi->rq = kzalloc_objs(*vi->rq, vi->max_queue_pairs); 6487 if (!vi->rq) 6488 goto err_rq; 6489 6490 for (i = 0; i < vi->max_queue_pairs; i++) { 6491 vi->rq[i].pages = NULL; 6492 netif_napi_add_config(vi->dev, &vi->rq[i].napi, virtnet_poll, 6493 i); 6494 vi->rq[i].napi.weight = napi_weight; 6495 netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi, 6496 virtnet_poll_tx, 6497 napi_tx ? napi_weight : 0); 6498 6499 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); 6500 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); 6501 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); 6502 6503 u64_stats_init(&vi->rq[i].stats.syncp); 6504 u64_stats_init(&vi->sq[i].stats.syncp); 6505 mutex_init(&vi->rq[i].dim_lock); 6506 } 6507 6508 return 0; 6509 6510 err_rq: 6511 kfree(vi->sq); 6512 err_sq: 6513 kfree(vi->ctrl); 6514 err_ctrl: 6515 return -ENOMEM; 6516 } 6517 6518 static int init_vqs(struct virtnet_info *vi) 6519 { 6520 int ret; 6521 6522 /* Allocate send & receive queues */ 6523 ret = virtnet_alloc_queues(vi); 6524 if (ret) 6525 goto err; 6526 6527 ret = virtnet_find_vqs(vi); 6528 if (ret) 6529 goto err_free; 6530 6531 cpus_read_lock(); 6532 virtnet_set_affinity(vi); 6533 cpus_read_unlock(); 6534 6535 return 0; 6536 6537 err_free: 6538 virtnet_free_queues(vi); 6539 err: 6540 return ret; 6541 } 6542 6543 #ifdef CONFIG_SYSFS 6544 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, 6545 char *buf) 6546 { 6547 struct virtnet_info *vi = netdev_priv(queue->dev); 6548 unsigned int queue_index = get_netdev_rx_queue_index(queue); 6549 unsigned int headroom = virtnet_get_headroom(vi); 6550 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 6551 struct ewma_pkt_len *avg; 6552 6553 BUG_ON(queue_index >= vi->max_queue_pairs); 6554 avg = &vi->rq[queue_index].mrg_avg_pkt_len; 6555 return sprintf(buf, "%u\n", 6556 get_mergeable_buf_len(&vi->rq[queue_index], avg, 6557 SKB_DATA_ALIGN(headroom + tailroom))); 6558 } 6559 6560 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = 6561 __ATTR_RO(mergeable_rx_buffer_size); 6562 6563 static struct attribute *virtio_net_mrg_rx_attrs[] = { 6564 &mergeable_rx_buffer_size_attribute.attr, 6565 NULL 6566 }; 6567 6568 static const struct attribute_group virtio_net_mrg_rx_group = { 6569 .name = "virtio_net", 6570 .attrs = virtio_net_mrg_rx_attrs 6571 }; 6572 #endif 6573 6574 static bool virtnet_fail_on_feature(struct virtio_device *vdev, 6575 unsigned int fbit, 6576 const char *fname, const char *dname) 6577 { 6578 if (!virtio_has_feature(vdev, fbit)) 6579 return false; 6580 6581 dev_err(&vdev->dev, "device advertises feature %s but not %s", 6582 fname, dname); 6583 6584 return true; 6585 } 6586 6587 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ 6588 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) 6589 6590 static bool virtnet_validate_features(struct virtio_device *vdev) 6591 { 6592 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && 6593 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, 6594 "VIRTIO_NET_F_CTRL_VQ") || 6595 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, 6596 "VIRTIO_NET_F_CTRL_VQ") || 6597 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, 6598 "VIRTIO_NET_F_CTRL_VQ") || 6599 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || 6600 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, 6601 "VIRTIO_NET_F_CTRL_VQ") || 6602 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, 6603 "VIRTIO_NET_F_CTRL_VQ") || 6604 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, 6605 "VIRTIO_NET_F_CTRL_VQ") || 6606 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, 6607 "VIRTIO_NET_F_CTRL_VQ") || 6608 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL, 6609 "VIRTIO_NET_F_CTRL_VQ"))) { 6610 return false; 6611 } 6612 6613 return true; 6614 } 6615 6616 #define MIN_MTU ETH_MIN_MTU 6617 #define MAX_MTU ETH_MAX_MTU 6618 6619 static int virtnet_validate(struct virtio_device *vdev) 6620 { 6621 if (!vdev->config->get) { 6622 dev_err(&vdev->dev, "%s failure: config access disabled\n", 6623 __func__); 6624 return -EINVAL; 6625 } 6626 6627 if (!virtnet_validate_features(vdev)) 6628 return -EINVAL; 6629 6630 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6631 int mtu = virtio_cread16(vdev, 6632 offsetof(struct virtio_net_config, 6633 mtu)); 6634 if (mtu < MIN_MTU) 6635 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); 6636 } 6637 6638 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) && 6639 !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6640 dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby"); 6641 __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY); 6642 } 6643 6644 return 0; 6645 } 6646 6647 static bool virtnet_check_guest_gso(const struct virtnet_info *vi) 6648 { 6649 return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 6650 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 6651 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 6652 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 6653 (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) && 6654 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6)); 6655 } 6656 6657 static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu) 6658 { 6659 bool guest_gso = virtnet_check_guest_gso(vi); 6660 6661 /* If device can receive ANY guest GSO packets, regardless of mtu, 6662 * allocate packets of maximum size, otherwise limit it to only 6663 * mtu size worth only. 6664 */ 6665 if (mtu > ETH_DATA_LEN || guest_gso) { 6666 vi->big_packets = true; 6667 vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE); 6668 } 6669 } 6670 6671 #define VIRTIO_NET_HASH_REPORT_MAX_TABLE 10 6672 static enum xdp_rss_hash_type 6673 virtnet_xdp_rss_type[VIRTIO_NET_HASH_REPORT_MAX_TABLE] = { 6674 [VIRTIO_NET_HASH_REPORT_NONE] = XDP_RSS_TYPE_NONE, 6675 [VIRTIO_NET_HASH_REPORT_IPv4] = XDP_RSS_TYPE_L3_IPV4, 6676 [VIRTIO_NET_HASH_REPORT_TCPv4] = XDP_RSS_TYPE_L4_IPV4_TCP, 6677 [VIRTIO_NET_HASH_REPORT_UDPv4] = XDP_RSS_TYPE_L4_IPV4_UDP, 6678 [VIRTIO_NET_HASH_REPORT_IPv6] = XDP_RSS_TYPE_L3_IPV6, 6679 [VIRTIO_NET_HASH_REPORT_TCPv6] = XDP_RSS_TYPE_L4_IPV6_TCP, 6680 [VIRTIO_NET_HASH_REPORT_UDPv6] = XDP_RSS_TYPE_L4_IPV6_UDP, 6681 [VIRTIO_NET_HASH_REPORT_IPv6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, 6682 [VIRTIO_NET_HASH_REPORT_TCPv6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, 6683 [VIRTIO_NET_HASH_REPORT_UDPv6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX 6684 }; 6685 6686 static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, 6687 enum xdp_rss_hash_type *rss_type) 6688 { 6689 const struct xdp_buff *xdp = (void *)_ctx; 6690 struct virtio_net_hdr_v1_hash *hdr_hash; 6691 struct virtnet_info *vi; 6692 u16 hash_report; 6693 6694 if (!(xdp->rxq->dev->features & NETIF_F_RXHASH)) 6695 return -ENODATA; 6696 6697 vi = netdev_priv(xdp->rxq->dev); 6698 hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len); 6699 hash_report = __le16_to_cpu(hdr_hash->hash_report); 6700 6701 if (hash_report >= VIRTIO_NET_HASH_REPORT_MAX_TABLE) 6702 hash_report = VIRTIO_NET_HASH_REPORT_NONE; 6703 6704 *rss_type = virtnet_xdp_rss_type[hash_report]; 6705 *hash = virtio_net_hash_value(hdr_hash); 6706 return 0; 6707 } 6708 6709 static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = { 6710 .xmo_rx_hash = virtnet_xdp_rx_hash, 6711 }; 6712 6713 static int virtnet_probe(struct virtio_device *vdev) 6714 { 6715 int i, err = -ENOMEM; 6716 struct net_device *dev; 6717 struct virtnet_info *vi; 6718 u16 max_queue_pairs; 6719 int mtu = 0; 6720 6721 /* Find if host supports multiqueue/rss virtio_net device */ 6722 max_queue_pairs = 1; 6723 if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) 6724 max_queue_pairs = 6725 virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); 6726 6727 /* We need at least 2 queue's */ 6728 if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 6729 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 6730 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6731 max_queue_pairs = 1; 6732 6733 /* Allocate ourselves a network device with room for our info */ 6734 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); 6735 if (!dev) 6736 return -ENOMEM; 6737 6738 /* Set up network device as normal. */ 6739 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | 6740 IFF_TX_SKB_NO_LINEAR; 6741 dev->netdev_ops = &virtnet_netdev; 6742 dev->stat_ops = &virtnet_stat_ops; 6743 dev->features = NETIF_F_HIGHDMA; 6744 6745 dev->ethtool_ops = &virtnet_ethtool_ops; 6746 SET_NETDEV_DEV(dev, &vdev->dev); 6747 6748 /* Do we support "hardware" checksums? */ 6749 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { 6750 /* This opens up the world of extra features. */ 6751 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6752 if (csum) 6753 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6754 6755 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { 6756 dev->hw_features |= NETIF_F_TSO 6757 | NETIF_F_TSO_ECN | NETIF_F_TSO6; 6758 } 6759 /* Individual feature bits: what can host handle? */ 6760 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4)) 6761 dev->hw_features |= NETIF_F_TSO; 6762 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6)) 6763 dev->hw_features |= NETIF_F_TSO6; 6764 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) 6765 dev->hw_features |= NETIF_F_TSO_ECN; 6766 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO)) 6767 dev->hw_features |= NETIF_F_GSO_UDP_L4; 6768 6769 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO)) { 6770 dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; 6771 dev->hw_enc_features = dev->hw_features; 6772 } 6773 if (dev->hw_features & NETIF_F_GSO_UDP_TUNNEL && 6774 virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM)) { 6775 dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; 6776 dev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; 6777 } 6778 6779 dev->features |= NETIF_F_GSO_ROBUST; 6780 6781 if (gso) 6782 dev->features |= dev->hw_features; 6783 /* (!csum && gso) case will be fixed by register_netdev() */ 6784 } 6785 6786 /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't 6787 * need to calculate checksums for partially checksummed packets, 6788 * as they're considered valid by the upper layer. 6789 * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only 6790 * receives fully checksummed packets. The device may assist in 6791 * validating these packets' checksums, so the driver won't have to. 6792 */ 6793 dev->features |= NETIF_F_RXCSUM; 6794 6795 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || 6796 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) 6797 dev->features |= NETIF_F_GRO_HW; 6798 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) 6799 dev->hw_features |= NETIF_F_GRO_HW; 6800 6801 dev->vlan_features = dev->features; 6802 dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 6803 NETDEV_XDP_ACT_XSK_ZEROCOPY; 6804 6805 /* MTU range: 68 - 65535 */ 6806 dev->min_mtu = MIN_MTU; 6807 dev->max_mtu = MAX_MTU; 6808 6809 /* Configuration may specify what MAC to use. Otherwise random. */ 6810 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6811 u8 addr[ETH_ALEN]; 6812 6813 virtio_cread_bytes(vdev, 6814 offsetof(struct virtio_net_config, mac), 6815 addr, ETH_ALEN); 6816 eth_hw_addr_set(dev, addr); 6817 } else { 6818 eth_hw_addr_random(dev); 6819 dev_info(&vdev->dev, "Assigned random MAC address %pM\n", 6820 dev->dev_addr); 6821 } 6822 6823 /* Set up our device-specific information */ 6824 vi = netdev_priv(dev); 6825 vi->dev = dev; 6826 vi->vdev = vdev; 6827 vdev->priv = vi; 6828 6829 INIT_WORK(&vi->config_work, virtnet_config_changed_work); 6830 INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work); 6831 6832 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { 6833 vi->mergeable_rx_bufs = true; 6834 dev->xdp_features |= NETDEV_XDP_ACT_RX_SG; 6835 } 6836 6837 if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) 6838 vi->has_rss_hash_report = true; 6839 6840 if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) { 6841 vi->has_rss = true; 6842 6843 vi->rss_indir_table_size = 6844 virtio_cread16(vdev, offsetof(struct virtio_net_config, 6845 rss_max_indirection_table_length)); 6846 } 6847 vi->rss_hdr = devm_kzalloc(&vdev->dev, virtnet_rss_hdr_size(vi), GFP_KERNEL); 6848 if (!vi->rss_hdr) { 6849 err = -ENOMEM; 6850 goto free; 6851 } 6852 6853 if (vi->has_rss || vi->has_rss_hash_report) { 6854 vi->rss_key_size = 6855 virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); 6856 if (vi->rss_key_size > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 6857 dev_err(&vdev->dev, "rss_max_key_size=%u exceeds the limit %u.\n", 6858 vi->rss_key_size, VIRTIO_NET_RSS_MAX_KEY_SIZE); 6859 err = -EINVAL; 6860 goto free; 6861 } 6862 6863 vi->rss_hash_types_supported = 6864 virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); 6865 vi->rss_hash_types_supported &= 6866 ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX | 6867 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 6868 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX); 6869 6870 dev->hw_features |= NETIF_F_RXHASH; 6871 dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops; 6872 } 6873 6874 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO) || 6875 virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO)) 6876 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash_tunnel); 6877 else if (vi->has_rss_hash_report) 6878 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); 6879 else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || 6880 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6881 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 6882 else 6883 vi->hdr_len = sizeof(struct virtio_net_hdr); 6884 6885 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM)) 6886 vi->rx_tnl_csum = true; 6887 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO)) 6888 vi->rx_tnl = true; 6889 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO)) 6890 vi->tx_tnl = true; 6891 6892 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || 6893 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6894 vi->any_header_sg = true; 6895 6896 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6897 vi->has_cvq = true; 6898 6899 mutex_init(&vi->cvq_lock); 6900 6901 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6902 mtu = virtio_cread16(vdev, 6903 offsetof(struct virtio_net_config, 6904 mtu)); 6905 if (mtu < dev->min_mtu) { 6906 /* Should never trigger: MTU was previously validated 6907 * in virtnet_validate. 6908 */ 6909 dev_err(&vdev->dev, 6910 "device MTU appears to have changed it is now %d < %d", 6911 mtu, dev->min_mtu); 6912 err = -EINVAL; 6913 goto free; 6914 } 6915 6916 dev->mtu = mtu; 6917 dev->max_mtu = mtu; 6918 } 6919 6920 virtnet_set_big_packets(vi, mtu); 6921 6922 if (vi->any_header_sg) 6923 dev->needed_headroom = vi->hdr_len; 6924 6925 /* Enable multiqueue by default */ 6926 if (num_online_cpus() >= max_queue_pairs) 6927 vi->curr_queue_pairs = max_queue_pairs; 6928 else 6929 vi->curr_queue_pairs = num_online_cpus(); 6930 vi->max_queue_pairs = max_queue_pairs; 6931 6932 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ 6933 err = init_vqs(vi); 6934 if (err) 6935 goto free; 6936 6937 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 6938 vi->intr_coal_rx.max_usecs = 0; 6939 vi->intr_coal_tx.max_usecs = 0; 6940 vi->intr_coal_rx.max_packets = 0; 6941 6942 /* Keep the default values of the coalescing parameters 6943 * aligned with the default napi_tx state. 6944 */ 6945 if (vi->sq[0].napi.weight) 6946 vi->intr_coal_tx.max_packets = 1; 6947 else 6948 vi->intr_coal_tx.max_packets = 0; 6949 } 6950 6951 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 6952 /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */ 6953 for (i = 0; i < vi->max_queue_pairs; i++) 6954 if (vi->sq[i].napi.weight) 6955 vi->sq[i].intr_coal.max_packets = 1; 6956 6957 err = virtnet_init_irq_moder(vi); 6958 if (err) 6959 goto free; 6960 } 6961 6962 /* Create page pools for receive queues. 6963 * Page pools are created at probe time so they can be used 6964 * with premapped DMA addresses throughout the device lifetime. 6965 */ 6966 err = virtnet_create_page_pools(vi); 6967 if (err) 6968 goto free_irq_moder; 6969 6970 #ifdef CONFIG_SYSFS 6971 if (vi->mergeable_rx_bufs) 6972 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; 6973 #endif 6974 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); 6975 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); 6976 6977 virtnet_init_settings(dev); 6978 6979 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 6980 vi->failover = net_failover_create(vi->dev); 6981 if (IS_ERR(vi->failover)) { 6982 err = PTR_ERR(vi->failover); 6983 goto free_page_pools; 6984 } 6985 } 6986 6987 if (vi->has_rss || vi->has_rss_hash_report) 6988 virtnet_init_default_rss(vi); 6989 6990 enable_rx_mode_work(vi); 6991 6992 /* serialize netdev register + virtio_device_ready() with ndo_open() */ 6993 rtnl_lock(); 6994 6995 err = register_netdevice(dev); 6996 if (err) { 6997 pr_debug("virtio_net: registering device failed\n"); 6998 rtnl_unlock(); 6999 goto free_failover; 7000 } 7001 7002 /* Disable config change notification until ndo_open. */ 7003 virtio_config_driver_disable(vi->vdev); 7004 7005 virtio_device_ready(vdev); 7006 7007 if (vi->has_rss || vi->has_rss_hash_report) { 7008 if (!virtnet_commit_rss_command(vi)) { 7009 dev_warn(&vdev->dev, "RSS disabled because committing failed.\n"); 7010 dev->hw_features &= ~NETIF_F_RXHASH; 7011 vi->has_rss_hash_report = false; 7012 vi->has_rss = false; 7013 } 7014 } 7015 7016 virtnet_set_queues(vi, vi->curr_queue_pairs); 7017 7018 /* a random MAC address has been assigned, notify the device. 7019 * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there 7020 * because many devices work fine without getting MAC explicitly 7021 */ 7022 if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 7023 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 7024 struct scatterlist sg; 7025 7026 sg_init_one(&sg, dev->dev_addr, dev->addr_len); 7027 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 7028 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 7029 pr_debug("virtio_net: setting MAC address failed\n"); 7030 rtnl_unlock(); 7031 err = -EINVAL; 7032 goto free_unregister_netdev; 7033 } 7034 } 7035 7036 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) { 7037 struct virtio_net_stats_capabilities *stats_cap __free(kfree) = NULL; 7038 struct scatterlist sg; 7039 __le64 v; 7040 7041 stats_cap = kzalloc_obj(*stats_cap); 7042 if (!stats_cap) { 7043 rtnl_unlock(); 7044 err = -ENOMEM; 7045 goto free_unregister_netdev; 7046 } 7047 7048 sg_init_one(&sg, stats_cap, sizeof(*stats_cap)); 7049 7050 if (!virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 7051 VIRTIO_NET_CTRL_STATS_QUERY, 7052 NULL, &sg)) { 7053 pr_debug("virtio_net: fail to get stats capability\n"); 7054 rtnl_unlock(); 7055 err = -EINVAL; 7056 goto free_unregister_netdev; 7057 } 7058 7059 v = stats_cap->supported_stats_types[0]; 7060 vi->device_stats_cap = le64_to_cpu(v); 7061 } 7062 7063 /* Assume link up if device can't report link status, 7064 otherwise get link status from config. */ 7065 netif_carrier_off(dev); 7066 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 7067 virtio_config_changed(vi->vdev); 7068 } else { 7069 vi->status = VIRTIO_NET_S_LINK_UP; 7070 virtnet_update_settings(vi); 7071 netif_carrier_on(dev); 7072 } 7073 7074 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) { 7075 unsigned int fbit; 7076 7077 fbit = virtio_offload_to_feature(guest_offloads[i]); 7078 if (virtio_has_feature(vi->vdev, fbit)) 7079 set_bit(guest_offloads[i], &vi->guest_offloads); 7080 } 7081 vi->guest_offloads_capable = vi->guest_offloads; 7082 7083 rtnl_unlock(); 7084 7085 err = virtnet_cpu_notif_add(vi); 7086 if (err) { 7087 pr_debug("virtio_net: registering cpu notifier failed\n"); 7088 goto free_unregister_netdev; 7089 } 7090 7091 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", 7092 dev->name, max_queue_pairs); 7093 7094 return 0; 7095 7096 free_unregister_netdev: 7097 unregister_netdev(dev); 7098 free_failover: 7099 net_failover_destroy(vi->failover); 7100 free_page_pools: 7101 virtnet_destroy_page_pools(vi); 7102 free_irq_moder: 7103 virtnet_free_irq_moder(vi); 7104 virtio_reset_device(vdev); 7105 virtnet_del_vqs(vi); 7106 free: 7107 free_netdev(dev); 7108 return err; 7109 } 7110 7111 static void remove_vq_common(struct virtnet_info *vi) 7112 { 7113 int i; 7114 7115 virtio_reset_device(vi->vdev); 7116 7117 /* Free unused buffers in both send and recv, if any. */ 7118 free_unused_bufs(vi); 7119 7120 /* 7121 * Rule of thumb is netdev_tx_reset_queue() should follow any 7122 * skb freeing not followed by netdev_tx_completed_queue() 7123 */ 7124 for (i = 0; i < vi->max_queue_pairs; i++) 7125 netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, i)); 7126 7127 free_receive_bufs(vi); 7128 7129 virtnet_destroy_page_pools(vi); 7130 7131 virtnet_del_vqs(vi); 7132 } 7133 7134 static void virtnet_remove(struct virtio_device *vdev) 7135 { 7136 struct virtnet_info *vi = vdev->priv; 7137 7138 virtnet_cpu_notif_remove(vi); 7139 7140 /* Make sure no work handler is accessing the device. */ 7141 flush_work(&vi->config_work); 7142 disable_rx_mode_work(vi); 7143 flush_work(&vi->rx_mode_work); 7144 7145 virtnet_free_irq_moder(vi); 7146 7147 unregister_netdev(vi->dev); 7148 7149 net_failover_destroy(vi->failover); 7150 7151 remove_vq_common(vi); 7152 7153 free_netdev(vi->dev); 7154 } 7155 7156 static __maybe_unused int virtnet_freeze(struct virtio_device *vdev) 7157 { 7158 struct virtnet_info *vi = vdev->priv; 7159 7160 virtnet_cpu_notif_remove(vi); 7161 virtnet_freeze_down(vdev); 7162 remove_vq_common(vi); 7163 7164 return 0; 7165 } 7166 7167 static __maybe_unused int virtnet_restore(struct virtio_device *vdev) 7168 { 7169 struct virtnet_info *vi = vdev->priv; 7170 int err; 7171 7172 err = virtnet_restore_up(vdev); 7173 if (err) 7174 return err; 7175 virtnet_set_queues(vi, vi->curr_queue_pairs); 7176 7177 err = virtnet_cpu_notif_add(vi); 7178 if (err) { 7179 virtnet_freeze_down(vdev); 7180 remove_vq_common(vi); 7181 return err; 7182 } 7183 7184 return 0; 7185 } 7186 7187 static struct virtio_device_id id_table[] = { 7188 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 7189 { 0 }, 7190 }; 7191 7192 #define VIRTNET_FEATURES \ 7193 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ 7194 VIRTIO_NET_F_MAC, \ 7195 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ 7196 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ 7197 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ 7198 VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \ 7199 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ 7200 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ 7201 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ 7202 VIRTIO_NET_F_CTRL_MAC_ADDR, \ 7203 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ 7204 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ 7205 VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ 7206 VIRTIO_NET_F_VQ_NOTF_COAL, \ 7207 VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS 7208 7209 static unsigned int features[] = { 7210 VIRTNET_FEATURES, 7211 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO, 7212 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM, 7213 VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO, 7214 VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM, 7215 }; 7216 7217 static unsigned int features_legacy[] = { 7218 VIRTNET_FEATURES, 7219 VIRTIO_NET_F_GSO, 7220 VIRTIO_F_ANY_LAYOUT, 7221 }; 7222 7223 static struct virtio_driver virtio_net_driver = { 7224 .feature_table = features, 7225 .feature_table_size = ARRAY_SIZE(features), 7226 .feature_table_legacy = features_legacy, 7227 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 7228 .driver.name = KBUILD_MODNAME, 7229 .id_table = id_table, 7230 .validate = virtnet_validate, 7231 .probe = virtnet_probe, 7232 .remove = virtnet_remove, 7233 .config_changed = virtnet_config_changed, 7234 #ifdef CONFIG_PM_SLEEP 7235 .freeze = virtnet_freeze, 7236 .restore = virtnet_restore, 7237 #endif 7238 }; 7239 7240 static __init int virtio_net_driver_init(void) 7241 { 7242 int ret; 7243 7244 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online", 7245 virtnet_cpu_online, 7246 virtnet_cpu_down_prep); 7247 if (ret < 0) 7248 goto out; 7249 virtionet_online = ret; 7250 ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead", 7251 NULL, virtnet_cpu_dead); 7252 if (ret) 7253 goto err_dead; 7254 ret = register_virtio_driver(&virtio_net_driver); 7255 if (ret) 7256 goto err_virtio; 7257 return 0; 7258 err_virtio: 7259 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 7260 err_dead: 7261 cpuhp_remove_multi_state(virtionet_online); 7262 out: 7263 return ret; 7264 } 7265 module_init(virtio_net_driver_init); 7266 7267 static __exit void virtio_net_driver_exit(void) 7268 { 7269 unregister_virtio_driver(&virtio_net_driver); 7270 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 7271 cpuhp_remove_multi_state(virtionet_online); 7272 } 7273 module_exit(virtio_net_driver_exit); 7274 7275 MODULE_DEVICE_TABLE(virtio, id_table); 7276 MODULE_DESCRIPTION("Virtio network driver"); 7277 MODULE_LICENSE("GPL"); 7278