1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* A network driver using virtio. 3 * 4 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 5 */ 6 //#define DEBUG 7 #include <linux/netdevice.h> 8 #include <linux/etherdevice.h> 9 #include <linux/ethtool.h> 10 #include <linux/module.h> 11 #include <linux/virtio.h> 12 #include <linux/virtio_net.h> 13 #include <linux/bpf.h> 14 #include <linux/bpf_trace.h> 15 #include <linux/scatterlist.h> 16 #include <linux/if_vlan.h> 17 #include <linux/slab.h> 18 #include <linux/cpu.h> 19 #include <linux/average.h> 20 #include <linux/filter.h> 21 #include <linux/kernel.h> 22 #include <linux/dim.h> 23 #include <net/route.h> 24 #include <net/xdp.h> 25 #include <net/net_failover.h> 26 #include <net/netdev_rx_queue.h> 27 #include <net/netdev_queues.h> 28 #include <net/xdp_sock_drv.h> 29 30 static int napi_weight = NAPI_POLL_WEIGHT; 31 module_param(napi_weight, int, 0444); 32 33 static bool csum = true, gso = true, napi_tx = true; 34 module_param(csum, bool, 0444); 35 module_param(gso, bool, 0444); 36 module_param(napi_tx, bool, 0644); 37 38 /* FIXME: MTU in config. */ 39 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) 40 #define GOOD_COPY_LEN 128 41 42 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) 43 44 /* Separating two types of XDP xmit */ 45 #define VIRTIO_XDP_TX BIT(0) 46 #define VIRTIO_XDP_REDIR BIT(1) 47 48 /* RX packet size EWMA. The average packet size is used to determine the packet 49 * buffer size when refilling RX rings. As the entire RX ring may be refilled 50 * at once, the weight is chosen so that the EWMA will be insensitive to short- 51 * term, transient changes in packet size. 52 */ 53 DECLARE_EWMA(pkt_len, 0, 64) 54 55 #define VIRTNET_DRIVER_VERSION "1.0.0" 56 57 static const unsigned long guest_offloads[] = { 58 VIRTIO_NET_F_GUEST_TSO4, 59 VIRTIO_NET_F_GUEST_TSO6, 60 VIRTIO_NET_F_GUEST_ECN, 61 VIRTIO_NET_F_GUEST_UFO, 62 VIRTIO_NET_F_GUEST_CSUM, 63 VIRTIO_NET_F_GUEST_USO4, 64 VIRTIO_NET_F_GUEST_USO6, 65 VIRTIO_NET_F_GUEST_HDRLEN 66 }; 67 68 #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 69 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 70 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 71 (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ 72 (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ 73 (1ULL << VIRTIO_NET_F_GUEST_USO6)) 74 75 struct virtnet_stat_desc { 76 char desc[ETH_GSTRING_LEN]; 77 size_t offset; 78 size_t qstat_offset; 79 }; 80 81 struct virtnet_sq_free_stats { 82 u64 packets; 83 u64 bytes; 84 u64 napi_packets; 85 u64 napi_bytes; 86 u64 xsk; 87 }; 88 89 struct virtnet_sq_stats { 90 struct u64_stats_sync syncp; 91 u64_stats_t packets; 92 u64_stats_t bytes; 93 u64_stats_t xdp_tx; 94 u64_stats_t xdp_tx_drops; 95 u64_stats_t kicks; 96 u64_stats_t tx_timeouts; 97 u64_stats_t stop; 98 u64_stats_t wake; 99 }; 100 101 struct virtnet_rq_stats { 102 struct u64_stats_sync syncp; 103 u64_stats_t packets; 104 u64_stats_t bytes; 105 u64_stats_t drops; 106 u64_stats_t xdp_packets; 107 u64_stats_t xdp_tx; 108 u64_stats_t xdp_redirects; 109 u64_stats_t xdp_drops; 110 u64_stats_t kicks; 111 }; 112 113 #define VIRTNET_SQ_STAT(name, m) {name, offsetof(struct virtnet_sq_stats, m), -1} 114 #define VIRTNET_RQ_STAT(name, m) {name, offsetof(struct virtnet_rq_stats, m), -1} 115 116 #define VIRTNET_SQ_STAT_QSTAT(name, m) \ 117 { \ 118 name, \ 119 offsetof(struct virtnet_sq_stats, m), \ 120 offsetof(struct netdev_queue_stats_tx, m), \ 121 } 122 123 #define VIRTNET_RQ_STAT_QSTAT(name, m) \ 124 { \ 125 name, \ 126 offsetof(struct virtnet_rq_stats, m), \ 127 offsetof(struct netdev_queue_stats_rx, m), \ 128 } 129 130 static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { 131 VIRTNET_SQ_STAT("xdp_tx", xdp_tx), 132 VIRTNET_SQ_STAT("xdp_tx_drops", xdp_tx_drops), 133 VIRTNET_SQ_STAT("kicks", kicks), 134 VIRTNET_SQ_STAT("tx_timeouts", tx_timeouts), 135 }; 136 137 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { 138 VIRTNET_RQ_STAT("drops", drops), 139 VIRTNET_RQ_STAT("xdp_packets", xdp_packets), 140 VIRTNET_RQ_STAT("xdp_tx", xdp_tx), 141 VIRTNET_RQ_STAT("xdp_redirects", xdp_redirects), 142 VIRTNET_RQ_STAT("xdp_drops", xdp_drops), 143 VIRTNET_RQ_STAT("kicks", kicks), 144 }; 145 146 static const struct virtnet_stat_desc virtnet_sq_stats_desc_qstat[] = { 147 VIRTNET_SQ_STAT_QSTAT("packets", packets), 148 VIRTNET_SQ_STAT_QSTAT("bytes", bytes), 149 VIRTNET_SQ_STAT_QSTAT("stop", stop), 150 VIRTNET_SQ_STAT_QSTAT("wake", wake), 151 }; 152 153 static const struct virtnet_stat_desc virtnet_rq_stats_desc_qstat[] = { 154 VIRTNET_RQ_STAT_QSTAT("packets", packets), 155 VIRTNET_RQ_STAT_QSTAT("bytes", bytes), 156 }; 157 158 #define VIRTNET_STATS_DESC_CQ(name) \ 159 {#name, offsetof(struct virtio_net_stats_cvq, name), -1} 160 161 #define VIRTNET_STATS_DESC_RX(class, name) \ 162 {#name, offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), -1} 163 164 #define VIRTNET_STATS_DESC_TX(class, name) \ 165 {#name, offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), -1} 166 167 168 static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = { 169 VIRTNET_STATS_DESC_CQ(command_num), 170 VIRTNET_STATS_DESC_CQ(ok_num), 171 }; 172 173 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = { 174 VIRTNET_STATS_DESC_RX(basic, packets), 175 VIRTNET_STATS_DESC_RX(basic, bytes), 176 177 VIRTNET_STATS_DESC_RX(basic, notifications), 178 VIRTNET_STATS_DESC_RX(basic, interrupts), 179 }; 180 181 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = { 182 VIRTNET_STATS_DESC_TX(basic, packets), 183 VIRTNET_STATS_DESC_TX(basic, bytes), 184 185 VIRTNET_STATS_DESC_TX(basic, notifications), 186 VIRTNET_STATS_DESC_TX(basic, interrupts), 187 }; 188 189 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = { 190 VIRTNET_STATS_DESC_RX(csum, needs_csum), 191 }; 192 193 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = { 194 VIRTNET_STATS_DESC_TX(gso, gso_packets_noseg), 195 VIRTNET_STATS_DESC_TX(gso, gso_bytes_noseg), 196 }; 197 198 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = { 199 VIRTNET_STATS_DESC_RX(speed, ratelimit_bytes), 200 }; 201 202 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = { 203 VIRTNET_STATS_DESC_TX(speed, ratelimit_bytes), 204 }; 205 206 #define VIRTNET_STATS_DESC_RX_QSTAT(class, name, qstat_field) \ 207 { \ 208 #name, \ 209 offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), \ 210 offsetof(struct netdev_queue_stats_rx, qstat_field), \ 211 } 212 213 #define VIRTNET_STATS_DESC_TX_QSTAT(class, name, qstat_field) \ 214 { \ 215 #name, \ 216 offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), \ 217 offsetof(struct netdev_queue_stats_tx, qstat_field), \ 218 } 219 220 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc_qstat[] = { 221 VIRTNET_STATS_DESC_RX_QSTAT(basic, drops, hw_drops), 222 VIRTNET_STATS_DESC_RX_QSTAT(basic, drop_overruns, hw_drop_overruns), 223 }; 224 225 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc_qstat[] = { 226 VIRTNET_STATS_DESC_TX_QSTAT(basic, drops, hw_drops), 227 VIRTNET_STATS_DESC_TX_QSTAT(basic, drop_malformed, hw_drop_errors), 228 }; 229 230 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc_qstat[] = { 231 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_valid, csum_unnecessary), 232 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_none, csum_none), 233 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_bad, csum_bad), 234 }; 235 236 static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc_qstat[] = { 237 VIRTNET_STATS_DESC_TX_QSTAT(csum, csum_none, csum_none), 238 VIRTNET_STATS_DESC_TX_QSTAT(csum, needs_csum, needs_csum), 239 }; 240 241 static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc_qstat[] = { 242 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets, hw_gro_packets), 243 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes, hw_gro_bytes), 244 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets_coalesced, hw_gro_wire_packets), 245 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes_coalesced, hw_gro_wire_bytes), 246 }; 247 248 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc_qstat[] = { 249 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_packets, hw_gso_packets), 250 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_bytes, hw_gso_bytes), 251 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments, hw_gso_wire_packets), 252 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments_bytes, hw_gso_wire_bytes), 253 }; 254 255 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc_qstat[] = { 256 VIRTNET_STATS_DESC_RX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 257 }; 258 259 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc_qstat[] = { 260 VIRTNET_STATS_DESC_TX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 261 }; 262 263 #define VIRTNET_Q_TYPE_RX 0 264 #define VIRTNET_Q_TYPE_TX 1 265 #define VIRTNET_Q_TYPE_CQ 2 266 267 struct virtnet_interrupt_coalesce { 268 u32 max_packets; 269 u32 max_usecs; 270 }; 271 272 /* The dma information of pages allocated at a time. */ 273 struct virtnet_rq_dma { 274 dma_addr_t addr; 275 u32 ref; 276 u16 len; 277 u16 need_sync; 278 }; 279 280 /* Internal representation of a send virtqueue */ 281 struct send_queue { 282 /* Virtqueue associated with this send _queue */ 283 struct virtqueue *vq; 284 285 /* TX: fragments + linear part + virtio header */ 286 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 287 288 /* Name of the send queue: output.$index */ 289 char name[16]; 290 291 struct virtnet_sq_stats stats; 292 293 struct virtnet_interrupt_coalesce intr_coal; 294 295 struct napi_struct napi; 296 297 /* Record whether sq is in reset state. */ 298 bool reset; 299 300 struct xsk_buff_pool *xsk_pool; 301 302 dma_addr_t xsk_hdr_dma_addr; 303 }; 304 305 /* Internal representation of a receive virtqueue */ 306 struct receive_queue { 307 /* Virtqueue associated with this receive_queue */ 308 struct virtqueue *vq; 309 310 struct napi_struct napi; 311 312 struct bpf_prog __rcu *xdp_prog; 313 314 struct virtnet_rq_stats stats; 315 316 /* The number of rx notifications */ 317 u16 calls; 318 319 /* Is dynamic interrupt moderation enabled? */ 320 bool dim_enabled; 321 322 /* Used to protect dim_enabled and inter_coal */ 323 struct mutex dim_lock; 324 325 /* Dynamic Interrupt Moderation */ 326 struct dim dim; 327 328 u32 packets_in_napi; 329 330 struct virtnet_interrupt_coalesce intr_coal; 331 332 /* Chain pages by the private ptr. */ 333 struct page *pages; 334 335 /* Average packet length for mergeable receive buffers. */ 336 struct ewma_pkt_len mrg_avg_pkt_len; 337 338 /* Page frag for packet buffer allocation. */ 339 struct page_frag alloc_frag; 340 341 /* RX: fragments + linear part + virtio header */ 342 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 343 344 /* Min single buffer size for mergeable buffers case. */ 345 unsigned int min_buf_len; 346 347 /* Name of this receive queue: input.$index */ 348 char name[16]; 349 350 struct xdp_rxq_info xdp_rxq; 351 352 /* Record the last dma info to free after new pages is allocated. */ 353 struct virtnet_rq_dma *last_dma; 354 355 struct xsk_buff_pool *xsk_pool; 356 357 /* xdp rxq used by xsk */ 358 struct xdp_rxq_info xsk_rxq_info; 359 360 struct xdp_buff **xsk_buffs; 361 }; 362 363 #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 364 365 /* Control VQ buffers: protected by the rtnl lock */ 366 struct control_buf { 367 struct virtio_net_ctrl_hdr hdr; 368 virtio_net_ctrl_ack status; 369 }; 370 371 struct virtnet_info { 372 struct virtio_device *vdev; 373 struct virtqueue *cvq; 374 struct net_device *dev; 375 struct send_queue *sq; 376 struct receive_queue *rq; 377 unsigned int status; 378 379 /* Max # of queue pairs supported by the device */ 380 u16 max_queue_pairs; 381 382 /* # of queue pairs currently used by the driver */ 383 u16 curr_queue_pairs; 384 385 /* # of XDP queue pairs currently used by the driver */ 386 u16 xdp_queue_pairs; 387 388 /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ 389 bool xdp_enabled; 390 391 /* I like... big packets and I cannot lie! */ 392 bool big_packets; 393 394 /* number of sg entries allocated for big packets */ 395 unsigned int big_packets_num_skbfrags; 396 397 /* Host will merge rx buffers for big packets (shake it! shake it!) */ 398 bool mergeable_rx_bufs; 399 400 /* Host supports rss and/or hash report */ 401 bool has_rss; 402 bool has_rss_hash_report; 403 u8 rss_key_size; 404 u16 rss_indir_table_size; 405 u32 rss_hash_types_supported; 406 u32 rss_hash_types_saved; 407 struct virtio_net_rss_config_hdr *rss_hdr; 408 struct virtio_net_rss_config_trailer rss_trailer; 409 u8 rss_hash_key_data[VIRTIO_NET_RSS_MAX_KEY_SIZE]; 410 411 /* Has control virtqueue */ 412 bool has_cvq; 413 414 /* Lock to protect the control VQ */ 415 struct mutex cvq_lock; 416 417 /* Host can handle any s/g split between our header and packet data */ 418 bool any_header_sg; 419 420 /* Packet virtio header size */ 421 u8 hdr_len; 422 423 /* Work struct for delayed refilling if we run low on memory. */ 424 struct delayed_work refill; 425 426 /* Is delayed refill enabled? */ 427 bool refill_enabled; 428 429 /* The lock to synchronize the access to refill_enabled */ 430 spinlock_t refill_lock; 431 432 /* Work struct for config space updates */ 433 struct work_struct config_work; 434 435 /* Work struct for setting rx mode */ 436 struct work_struct rx_mode_work; 437 438 /* OK to queue work setting RX mode? */ 439 bool rx_mode_work_enabled; 440 441 /* Does the affinity hint is set for virtqueues? */ 442 bool affinity_hint_set; 443 444 /* CPU hotplug instances for online & dead */ 445 struct hlist_node node; 446 struct hlist_node node_dead; 447 448 struct control_buf *ctrl; 449 450 /* Ethtool settings */ 451 u8 duplex; 452 u32 speed; 453 454 /* Is rx dynamic interrupt moderation enabled? */ 455 bool rx_dim_enabled; 456 457 /* Interrupt coalescing settings */ 458 struct virtnet_interrupt_coalesce intr_coal_tx; 459 struct virtnet_interrupt_coalesce intr_coal_rx; 460 461 unsigned long guest_offloads; 462 unsigned long guest_offloads_capable; 463 464 /* failover when STANDBY feature enabled */ 465 struct failover *failover; 466 467 u64 device_stats_cap; 468 }; 469 470 struct padded_vnet_hdr { 471 struct virtio_net_hdr_v1_hash hdr; 472 /* 473 * hdr is in a separate sg buffer, and data sg buffer shares same page 474 * with this header sg. This padding makes next sg 16 byte aligned 475 * after the header. 476 */ 477 char padding[12]; 478 }; 479 480 struct virtio_net_common_hdr { 481 union { 482 struct virtio_net_hdr hdr; 483 struct virtio_net_hdr_mrg_rxbuf mrg_hdr; 484 struct virtio_net_hdr_v1_hash hash_v1_hdr; 485 }; 486 }; 487 488 static struct virtio_net_common_hdr xsk_hdr; 489 490 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); 491 static void virtnet_sq_free_unused_buf_done(struct virtqueue *vq); 492 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 493 struct net_device *dev, 494 unsigned int *xdp_xmit, 495 struct virtnet_rq_stats *stats); 496 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 497 struct sk_buff *skb, u8 flags); 498 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 499 struct sk_buff *curr_skb, 500 struct page *page, void *buf, 501 int len, int truesize); 502 static void virtnet_xsk_completed(struct send_queue *sq, int num); 503 504 enum virtnet_xmit_type { 505 VIRTNET_XMIT_TYPE_SKB, 506 VIRTNET_XMIT_TYPE_SKB_ORPHAN, 507 VIRTNET_XMIT_TYPE_XDP, 508 VIRTNET_XMIT_TYPE_XSK, 509 }; 510 511 static size_t virtnet_rss_hdr_size(const struct virtnet_info *vi) 512 { 513 u16 indir_table_size = vi->has_rss ? vi->rss_indir_table_size : 1; 514 515 return struct_size(vi->rss_hdr, indirection_table, indir_table_size); 516 } 517 518 static size_t virtnet_rss_trailer_size(const struct virtnet_info *vi) 519 { 520 return struct_size(&vi->rss_trailer, hash_key_data, vi->rss_key_size); 521 } 522 523 /* We use the last two bits of the pointer to distinguish the xmit type. */ 524 #define VIRTNET_XMIT_TYPE_MASK (BIT(0) | BIT(1)) 525 526 #define VIRTIO_XSK_FLAG_OFFSET 2 527 528 static enum virtnet_xmit_type virtnet_xmit_ptr_unpack(void **ptr) 529 { 530 unsigned long p = (unsigned long)*ptr; 531 532 *ptr = (void *)(p & ~VIRTNET_XMIT_TYPE_MASK); 533 534 return p & VIRTNET_XMIT_TYPE_MASK; 535 } 536 537 static void *virtnet_xmit_ptr_pack(void *ptr, enum virtnet_xmit_type type) 538 { 539 return (void *)((unsigned long)ptr | type); 540 } 541 542 static int virtnet_add_outbuf(struct send_queue *sq, int num, void *data, 543 enum virtnet_xmit_type type) 544 { 545 return virtqueue_add_outbuf(sq->vq, sq->sg, num, 546 virtnet_xmit_ptr_pack(data, type), 547 GFP_ATOMIC); 548 } 549 550 static u32 virtnet_ptr_to_xsk_buff_len(void *ptr) 551 { 552 return ((unsigned long)ptr) >> VIRTIO_XSK_FLAG_OFFSET; 553 } 554 555 static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len) 556 { 557 sg_dma_address(sg) = addr; 558 sg_dma_len(sg) = len; 559 } 560 561 static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 562 bool in_napi, struct virtnet_sq_free_stats *stats) 563 { 564 struct xdp_frame *frame; 565 struct sk_buff *skb; 566 unsigned int len; 567 void *ptr; 568 569 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 570 switch (virtnet_xmit_ptr_unpack(&ptr)) { 571 case VIRTNET_XMIT_TYPE_SKB: 572 skb = ptr; 573 574 pr_debug("Sent skb %p\n", skb); 575 stats->napi_packets++; 576 stats->napi_bytes += skb->len; 577 napi_consume_skb(skb, in_napi); 578 break; 579 580 case VIRTNET_XMIT_TYPE_SKB_ORPHAN: 581 skb = ptr; 582 583 stats->packets++; 584 stats->bytes += skb->len; 585 napi_consume_skb(skb, in_napi); 586 break; 587 588 case VIRTNET_XMIT_TYPE_XDP: 589 frame = ptr; 590 591 stats->packets++; 592 stats->bytes += xdp_get_frame_len(frame); 593 xdp_return_frame(frame); 594 break; 595 596 case VIRTNET_XMIT_TYPE_XSK: 597 stats->bytes += virtnet_ptr_to_xsk_buff_len(ptr); 598 stats->xsk++; 599 break; 600 } 601 } 602 netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes); 603 } 604 605 static void virtnet_free_old_xmit(struct send_queue *sq, 606 struct netdev_queue *txq, 607 bool in_napi, 608 struct virtnet_sq_free_stats *stats) 609 { 610 __free_old_xmit(sq, txq, in_napi, stats); 611 612 if (stats->xsk) 613 virtnet_xsk_completed(sq, stats->xsk); 614 } 615 616 /* Converting between virtqueue no. and kernel tx/rx queue no. 617 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq 618 */ 619 static int vq2txq(struct virtqueue *vq) 620 { 621 return (vq->index - 1) / 2; 622 } 623 624 static int txq2vq(int txq) 625 { 626 return txq * 2 + 1; 627 } 628 629 static int vq2rxq(struct virtqueue *vq) 630 { 631 return vq->index / 2; 632 } 633 634 static int rxq2vq(int rxq) 635 { 636 return rxq * 2; 637 } 638 639 static int vq_type(struct virtnet_info *vi, int qid) 640 { 641 if (qid == vi->max_queue_pairs * 2) 642 return VIRTNET_Q_TYPE_CQ; 643 644 if (qid % 2) 645 return VIRTNET_Q_TYPE_TX; 646 647 return VIRTNET_Q_TYPE_RX; 648 } 649 650 static inline struct virtio_net_common_hdr * 651 skb_vnet_common_hdr(struct sk_buff *skb) 652 { 653 return (struct virtio_net_common_hdr *)skb->cb; 654 } 655 656 /* 657 * private is used to chain pages for big packets, put the whole 658 * most recent used list in the beginning for reuse 659 */ 660 static void give_pages(struct receive_queue *rq, struct page *page) 661 { 662 struct page *end; 663 664 /* Find end of list, sew whole thing into vi->rq.pages. */ 665 for (end = page; end->private; end = (struct page *)end->private); 666 end->private = (unsigned long)rq->pages; 667 rq->pages = page; 668 } 669 670 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) 671 { 672 struct page *p = rq->pages; 673 674 if (p) { 675 rq->pages = (struct page *)p->private; 676 /* clear private here, it is used to chain pages */ 677 p->private = 0; 678 } else 679 p = alloc_page(gfp_mask); 680 return p; 681 } 682 683 static void virtnet_rq_free_buf(struct virtnet_info *vi, 684 struct receive_queue *rq, void *buf) 685 { 686 if (vi->mergeable_rx_bufs) 687 put_page(virt_to_head_page(buf)); 688 else if (vi->big_packets) 689 give_pages(rq, buf); 690 else 691 put_page(virt_to_head_page(buf)); 692 } 693 694 static void enable_delayed_refill(struct virtnet_info *vi) 695 { 696 spin_lock_bh(&vi->refill_lock); 697 vi->refill_enabled = true; 698 spin_unlock_bh(&vi->refill_lock); 699 } 700 701 static void disable_delayed_refill(struct virtnet_info *vi) 702 { 703 spin_lock_bh(&vi->refill_lock); 704 vi->refill_enabled = false; 705 spin_unlock_bh(&vi->refill_lock); 706 } 707 708 static void enable_rx_mode_work(struct virtnet_info *vi) 709 { 710 rtnl_lock(); 711 vi->rx_mode_work_enabled = true; 712 rtnl_unlock(); 713 } 714 715 static void disable_rx_mode_work(struct virtnet_info *vi) 716 { 717 rtnl_lock(); 718 vi->rx_mode_work_enabled = false; 719 rtnl_unlock(); 720 } 721 722 static void virtqueue_napi_schedule(struct napi_struct *napi, 723 struct virtqueue *vq) 724 { 725 if (napi_schedule_prep(napi)) { 726 virtqueue_disable_cb(vq); 727 __napi_schedule(napi); 728 } 729 } 730 731 static bool virtqueue_napi_complete(struct napi_struct *napi, 732 struct virtqueue *vq, int processed) 733 { 734 int opaque; 735 736 opaque = virtqueue_enable_cb_prepare(vq); 737 if (napi_complete_done(napi, processed)) { 738 if (unlikely(virtqueue_poll(vq, opaque))) 739 virtqueue_napi_schedule(napi, vq); 740 else 741 return true; 742 } else { 743 virtqueue_disable_cb(vq); 744 } 745 746 return false; 747 } 748 749 static void skb_xmit_done(struct virtqueue *vq) 750 { 751 struct virtnet_info *vi = vq->vdev->priv; 752 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi; 753 754 /* Suppress further interrupts. */ 755 virtqueue_disable_cb(vq); 756 757 if (napi->weight) 758 virtqueue_napi_schedule(napi, vq); 759 else 760 /* We were probably waiting for more output buffers. */ 761 netif_wake_subqueue(vi->dev, vq2txq(vq)); 762 } 763 764 #define MRG_CTX_HEADER_SHIFT 22 765 static void *mergeable_len_to_ctx(unsigned int truesize, 766 unsigned int headroom) 767 { 768 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize); 769 } 770 771 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx) 772 { 773 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT; 774 } 775 776 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) 777 { 778 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); 779 } 780 781 static int check_mergeable_len(struct net_device *dev, void *mrg_ctx, 782 unsigned int len) 783 { 784 unsigned int headroom, tailroom, room, truesize; 785 786 truesize = mergeable_ctx_to_truesize(mrg_ctx); 787 headroom = mergeable_ctx_to_headroom(mrg_ctx); 788 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 789 room = SKB_DATA_ALIGN(headroom + tailroom); 790 791 if (len > truesize - room) { 792 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 793 dev->name, len, (unsigned long)(truesize - room)); 794 DEV_STATS_INC(dev, rx_length_errors); 795 return -1; 796 } 797 798 return 0; 799 } 800 801 static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, 802 unsigned int headroom, 803 unsigned int len) 804 { 805 struct sk_buff *skb; 806 807 skb = build_skb(buf, buflen); 808 if (unlikely(!skb)) 809 return NULL; 810 811 skb_reserve(skb, headroom); 812 skb_put(skb, len); 813 814 return skb; 815 } 816 817 /* Called from bottom half context */ 818 static struct sk_buff *page_to_skb(struct virtnet_info *vi, 819 struct receive_queue *rq, 820 struct page *page, unsigned int offset, 821 unsigned int len, unsigned int truesize, 822 unsigned int headroom) 823 { 824 struct sk_buff *skb; 825 struct virtio_net_common_hdr *hdr; 826 unsigned int copy, hdr_len, hdr_padded_len; 827 struct page *page_to_free = NULL; 828 int tailroom, shinfo_size; 829 char *p, *hdr_p, *buf; 830 831 p = page_address(page) + offset; 832 hdr_p = p; 833 834 hdr_len = vi->hdr_len; 835 if (vi->mergeable_rx_bufs) 836 hdr_padded_len = hdr_len; 837 else 838 hdr_padded_len = sizeof(struct padded_vnet_hdr); 839 840 buf = p - headroom; 841 len -= hdr_len; 842 offset += hdr_padded_len; 843 p += hdr_padded_len; 844 tailroom = truesize - headroom - hdr_padded_len - len; 845 846 shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 847 848 if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) { 849 skb = virtnet_build_skb(buf, truesize, p - buf, len); 850 if (unlikely(!skb)) 851 return NULL; 852 853 page = (struct page *)page->private; 854 if (page) 855 give_pages(rq, page); 856 goto ok; 857 } 858 859 /* copy small packet so we can reuse these pages for small data */ 860 skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); 861 if (unlikely(!skb)) 862 return NULL; 863 864 /* Copy all frame if it fits skb->head, otherwise 865 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. 866 */ 867 if (len <= skb_tailroom(skb)) 868 copy = len; 869 else 870 copy = ETH_HLEN; 871 skb_put_data(skb, p, copy); 872 873 len -= copy; 874 offset += copy; 875 876 if (vi->mergeable_rx_bufs) { 877 if (len) 878 skb_add_rx_frag(skb, 0, page, offset, len, truesize); 879 else 880 page_to_free = page; 881 goto ok; 882 } 883 884 /* 885 * Verify that we can indeed put this data into a skb. 886 * This is here to handle cases when the device erroneously 887 * tries to receive more than is possible. This is usually 888 * the case of a broken device. 889 */ 890 if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) { 891 net_dbg_ratelimited("%s: too much data\n", skb->dev->name); 892 dev_kfree_skb(skb); 893 return NULL; 894 } 895 BUG_ON(offset >= PAGE_SIZE); 896 while (len) { 897 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); 898 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, 899 frag_size, truesize); 900 len -= frag_size; 901 page = (struct page *)page->private; 902 offset = 0; 903 } 904 905 if (page) 906 give_pages(rq, page); 907 908 ok: 909 hdr = skb_vnet_common_hdr(skb); 910 memcpy(hdr, hdr_p, hdr_len); 911 if (page_to_free) 912 put_page(page_to_free); 913 914 return skb; 915 } 916 917 static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) 918 { 919 struct virtnet_info *vi = rq->vq->vdev->priv; 920 struct page *page = virt_to_head_page(buf); 921 struct virtnet_rq_dma *dma; 922 void *head; 923 int offset; 924 925 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 926 927 head = page_address(page); 928 929 dma = head; 930 931 --dma->ref; 932 933 if (dma->need_sync && len) { 934 offset = buf - (head + sizeof(*dma)); 935 936 virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr, 937 offset, len, 938 DMA_FROM_DEVICE); 939 } 940 941 if (dma->ref) 942 return; 943 944 virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len, 945 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 946 put_page(page); 947 } 948 949 static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) 950 { 951 struct virtnet_info *vi = rq->vq->vdev->priv; 952 void *buf; 953 954 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 955 956 buf = virtqueue_get_buf_ctx(rq->vq, len, ctx); 957 if (buf) 958 virtnet_rq_unmap(rq, buf, *len); 959 960 return buf; 961 } 962 963 static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) 964 { 965 struct virtnet_info *vi = rq->vq->vdev->priv; 966 struct virtnet_rq_dma *dma; 967 dma_addr_t addr; 968 u32 offset; 969 void *head; 970 971 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 972 973 head = page_address(rq->alloc_frag.page); 974 975 offset = buf - head; 976 977 dma = head; 978 979 addr = dma->addr - sizeof(*dma) + offset; 980 981 sg_init_table(rq->sg, 1); 982 sg_fill_dma(rq->sg, addr, len); 983 } 984 985 static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) 986 { 987 struct page_frag *alloc_frag = &rq->alloc_frag; 988 struct virtnet_info *vi = rq->vq->vdev->priv; 989 struct virtnet_rq_dma *dma; 990 void *buf, *head; 991 dma_addr_t addr; 992 993 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 994 995 head = page_address(alloc_frag->page); 996 997 dma = head; 998 999 /* new pages */ 1000 if (!alloc_frag->offset) { 1001 if (rq->last_dma) { 1002 /* Now, the new page is allocated, the last dma 1003 * will not be used. So the dma can be unmapped 1004 * if the ref is 0. 1005 */ 1006 virtnet_rq_unmap(rq, rq->last_dma, 0); 1007 rq->last_dma = NULL; 1008 } 1009 1010 dma->len = alloc_frag->size - sizeof(*dma); 1011 1012 addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1, 1013 dma->len, DMA_FROM_DEVICE, 0); 1014 if (virtqueue_dma_mapping_error(rq->vq, addr)) 1015 return NULL; 1016 1017 dma->addr = addr; 1018 dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr); 1019 1020 /* Add a reference to dma to prevent the entire dma from 1021 * being released during error handling. This reference 1022 * will be freed after the pages are no longer used. 1023 */ 1024 get_page(alloc_frag->page); 1025 dma->ref = 1; 1026 alloc_frag->offset = sizeof(*dma); 1027 1028 rq->last_dma = dma; 1029 } 1030 1031 ++dma->ref; 1032 1033 buf = head + alloc_frag->offset; 1034 1035 get_page(alloc_frag->page); 1036 alloc_frag->offset += size; 1037 1038 return buf; 1039 } 1040 1041 static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) 1042 { 1043 struct virtnet_info *vi = vq->vdev->priv; 1044 struct receive_queue *rq; 1045 int i = vq2rxq(vq); 1046 1047 rq = &vi->rq[i]; 1048 1049 if (rq->xsk_pool) { 1050 xsk_buff_free((struct xdp_buff *)buf); 1051 return; 1052 } 1053 1054 if (!vi->big_packets || vi->mergeable_rx_bufs) 1055 virtnet_rq_unmap(rq, buf, 0); 1056 1057 virtnet_rq_free_buf(vi, rq, buf); 1058 } 1059 1060 static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 1061 bool in_napi) 1062 { 1063 struct virtnet_sq_free_stats stats = {0}; 1064 1065 virtnet_free_old_xmit(sq, txq, in_napi, &stats); 1066 1067 /* Avoid overhead when no packets have been processed 1068 * happens when called speculatively from start_xmit. 1069 */ 1070 if (!stats.packets && !stats.napi_packets) 1071 return; 1072 1073 u64_stats_update_begin(&sq->stats.syncp); 1074 u64_stats_add(&sq->stats.bytes, stats.bytes + stats.napi_bytes); 1075 u64_stats_add(&sq->stats.packets, stats.packets + stats.napi_packets); 1076 u64_stats_update_end(&sq->stats.syncp); 1077 } 1078 1079 static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) 1080 { 1081 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) 1082 return false; 1083 else if (q < vi->curr_queue_pairs) 1084 return true; 1085 else 1086 return false; 1087 } 1088 1089 static bool tx_may_stop(struct virtnet_info *vi, 1090 struct net_device *dev, 1091 struct send_queue *sq) 1092 { 1093 int qnum; 1094 1095 qnum = sq - vi->sq; 1096 1097 /* If running out of space, stop queue to avoid getting packets that we 1098 * are then unable to transmit. 1099 * An alternative would be to force queuing layer to requeue the skb by 1100 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be 1101 * returned in a normal path of operation: it means that driver is not 1102 * maintaining the TX queue stop/start state properly, and causes 1103 * the stack to do a non-trivial amount of useless work. 1104 * Since most packets only take 1 or 2 ring slots, stopping the queue 1105 * early means 16 slots are typically wasted. 1106 */ 1107 if (sq->vq->num_free < MAX_SKB_FRAGS + 2) { 1108 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1109 1110 netif_tx_stop_queue(txq); 1111 u64_stats_update_begin(&sq->stats.syncp); 1112 u64_stats_inc(&sq->stats.stop); 1113 u64_stats_update_end(&sq->stats.syncp); 1114 1115 return true; 1116 } 1117 1118 return false; 1119 } 1120 1121 static void check_sq_full_and_disable(struct virtnet_info *vi, 1122 struct net_device *dev, 1123 struct send_queue *sq) 1124 { 1125 bool use_napi = sq->napi.weight; 1126 int qnum; 1127 1128 qnum = sq - vi->sq; 1129 1130 if (tx_may_stop(vi, dev, sq)) { 1131 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1132 1133 if (use_napi) { 1134 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 1135 virtqueue_napi_schedule(&sq->napi, sq->vq); 1136 } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { 1137 /* More just got used, free them then recheck. */ 1138 free_old_xmit(sq, txq, false); 1139 if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { 1140 netif_start_subqueue(dev, qnum); 1141 u64_stats_update_begin(&sq->stats.syncp); 1142 u64_stats_inc(&sq->stats.wake); 1143 u64_stats_update_end(&sq->stats.syncp); 1144 virtqueue_disable_cb(sq->vq); 1145 } 1146 } 1147 } 1148 } 1149 1150 /* Note that @len is the length of received data without virtio header */ 1151 static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi, 1152 struct receive_queue *rq, void *buf, 1153 u32 len, bool first_buf) 1154 { 1155 struct xdp_buff *xdp; 1156 u32 bufsize; 1157 1158 xdp = (struct xdp_buff *)buf; 1159 1160 /* In virtnet_add_recvbuf_xsk, we use part of XDP_PACKET_HEADROOM for 1161 * virtio header and ask the vhost to fill data from 1162 * hard_start + XDP_PACKET_HEADROOM - vi->hdr_len 1163 * The first buffer has virtio header so the remaining region for frame 1164 * data is 1165 * xsk_pool_get_rx_frame_size() 1166 * While other buffers than the first one do not have virtio header, so 1167 * the maximum frame data's length can be 1168 * xsk_pool_get_rx_frame_size() + vi->hdr_len 1169 */ 1170 bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool); 1171 if (!first_buf) 1172 bufsize += vi->hdr_len; 1173 1174 if (unlikely(len > bufsize)) { 1175 pr_debug("%s: rx error: len %u exceeds truesize %u\n", 1176 vi->dev->name, len, bufsize); 1177 DEV_STATS_INC(vi->dev, rx_length_errors); 1178 xsk_buff_free(xdp); 1179 return NULL; 1180 } 1181 1182 if (first_buf) { 1183 xsk_buff_set_size(xdp, len); 1184 } else { 1185 xdp_prepare_buff(xdp, xdp->data_hard_start, 1186 XDP_PACKET_HEADROOM - vi->hdr_len, len, 1); 1187 xdp->flags = 0; 1188 } 1189 1190 xsk_buff_dma_sync_for_cpu(xdp); 1191 1192 return xdp; 1193 } 1194 1195 static struct sk_buff *xsk_construct_skb(struct receive_queue *rq, 1196 struct xdp_buff *xdp) 1197 { 1198 unsigned int metasize = xdp->data - xdp->data_meta; 1199 struct sk_buff *skb; 1200 unsigned int size; 1201 1202 size = xdp->data_end - xdp->data_hard_start; 1203 skb = napi_alloc_skb(&rq->napi, size); 1204 if (unlikely(!skb)) { 1205 xsk_buff_free(xdp); 1206 return NULL; 1207 } 1208 1209 skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); 1210 1211 size = xdp->data_end - xdp->data_meta; 1212 memcpy(__skb_put(skb, size), xdp->data_meta, size); 1213 1214 if (metasize) { 1215 __skb_pull(skb, metasize); 1216 skb_metadata_set(skb, metasize); 1217 } 1218 1219 xsk_buff_free(xdp); 1220 1221 return skb; 1222 } 1223 1224 static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi, 1225 struct receive_queue *rq, struct xdp_buff *xdp, 1226 unsigned int *xdp_xmit, 1227 struct virtnet_rq_stats *stats) 1228 { 1229 struct bpf_prog *prog; 1230 u32 ret; 1231 1232 ret = XDP_PASS; 1233 rcu_read_lock(); 1234 prog = rcu_dereference(rq->xdp_prog); 1235 if (prog) 1236 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); 1237 rcu_read_unlock(); 1238 1239 switch (ret) { 1240 case XDP_PASS: 1241 return xsk_construct_skb(rq, xdp); 1242 1243 case XDP_TX: 1244 case XDP_REDIRECT: 1245 return NULL; 1246 1247 default: 1248 /* drop packet */ 1249 xsk_buff_free(xdp); 1250 u64_stats_inc(&stats->drops); 1251 return NULL; 1252 } 1253 } 1254 1255 static void xsk_drop_follow_bufs(struct net_device *dev, 1256 struct receive_queue *rq, 1257 u32 num_buf, 1258 struct virtnet_rq_stats *stats) 1259 { 1260 struct xdp_buff *xdp; 1261 u32 len; 1262 1263 while (num_buf-- > 1) { 1264 xdp = virtqueue_get_buf(rq->vq, &len); 1265 if (unlikely(!xdp)) { 1266 pr_debug("%s: rx error: %d buffers missing\n", 1267 dev->name, num_buf); 1268 DEV_STATS_INC(dev, rx_length_errors); 1269 break; 1270 } 1271 u64_stats_add(&stats->bytes, len); 1272 xsk_buff_free(xdp); 1273 } 1274 } 1275 1276 static int xsk_append_merge_buffer(struct virtnet_info *vi, 1277 struct receive_queue *rq, 1278 struct sk_buff *head_skb, 1279 u32 num_buf, 1280 struct virtio_net_hdr_mrg_rxbuf *hdr, 1281 struct virtnet_rq_stats *stats) 1282 { 1283 struct sk_buff *curr_skb; 1284 struct xdp_buff *xdp; 1285 u32 len, truesize; 1286 struct page *page; 1287 void *buf; 1288 1289 curr_skb = head_skb; 1290 1291 while (--num_buf) { 1292 buf = virtqueue_get_buf(rq->vq, &len); 1293 if (unlikely(!buf)) { 1294 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1295 vi->dev->name, num_buf, 1296 virtio16_to_cpu(vi->vdev, 1297 hdr->num_buffers)); 1298 DEV_STATS_INC(vi->dev, rx_length_errors); 1299 return -EINVAL; 1300 } 1301 1302 u64_stats_add(&stats->bytes, len); 1303 1304 xdp = buf_to_xdp(vi, rq, buf, len, false); 1305 if (!xdp) 1306 goto err; 1307 1308 buf = napi_alloc_frag(len); 1309 if (!buf) { 1310 xsk_buff_free(xdp); 1311 goto err; 1312 } 1313 1314 memcpy(buf, xdp->data, len); 1315 1316 xsk_buff_free(xdp); 1317 1318 page = virt_to_page(buf); 1319 1320 truesize = len; 1321 1322 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 1323 buf, len, truesize); 1324 if (!curr_skb) { 1325 put_page(page); 1326 goto err; 1327 } 1328 } 1329 1330 return 0; 1331 1332 err: 1333 xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats); 1334 return -EINVAL; 1335 } 1336 1337 static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi, 1338 struct receive_queue *rq, struct xdp_buff *xdp, 1339 unsigned int *xdp_xmit, 1340 struct virtnet_rq_stats *stats) 1341 { 1342 struct virtio_net_hdr_mrg_rxbuf *hdr; 1343 struct bpf_prog *prog; 1344 struct sk_buff *skb; 1345 u32 ret, num_buf; 1346 1347 hdr = xdp->data - vi->hdr_len; 1348 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 1349 1350 ret = XDP_PASS; 1351 rcu_read_lock(); 1352 prog = rcu_dereference(rq->xdp_prog); 1353 /* TODO: support multi buffer. */ 1354 if (prog && num_buf == 1) 1355 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); 1356 rcu_read_unlock(); 1357 1358 switch (ret) { 1359 case XDP_PASS: 1360 skb = xsk_construct_skb(rq, xdp); 1361 if (!skb) 1362 goto drop_bufs; 1363 1364 if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) { 1365 dev_kfree_skb(skb); 1366 goto drop; 1367 } 1368 1369 return skb; 1370 1371 case XDP_TX: 1372 case XDP_REDIRECT: 1373 return NULL; 1374 1375 default: 1376 /* drop packet */ 1377 xsk_buff_free(xdp); 1378 } 1379 1380 drop_bufs: 1381 xsk_drop_follow_bufs(dev, rq, num_buf, stats); 1382 1383 drop: 1384 u64_stats_inc(&stats->drops); 1385 return NULL; 1386 } 1387 1388 static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq, 1389 void *buf, u32 len, 1390 unsigned int *xdp_xmit, 1391 struct virtnet_rq_stats *stats) 1392 { 1393 struct net_device *dev = vi->dev; 1394 struct sk_buff *skb = NULL; 1395 struct xdp_buff *xdp; 1396 u8 flags; 1397 1398 len -= vi->hdr_len; 1399 1400 u64_stats_add(&stats->bytes, len); 1401 1402 xdp = buf_to_xdp(vi, rq, buf, len, true); 1403 if (!xdp) 1404 return; 1405 1406 if (unlikely(len < ETH_HLEN)) { 1407 pr_debug("%s: short packet %i\n", dev->name, len); 1408 DEV_STATS_INC(dev, rx_length_errors); 1409 xsk_buff_free(xdp); 1410 return; 1411 } 1412 1413 flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags; 1414 1415 if (!vi->mergeable_rx_bufs) 1416 skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats); 1417 else 1418 skb = virtnet_receive_xsk_merge(dev, vi, rq, xdp, xdp_xmit, stats); 1419 1420 if (skb) 1421 virtnet_receive_done(vi, rq, skb, flags); 1422 } 1423 1424 static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue *rq, 1425 struct xsk_buff_pool *pool, gfp_t gfp) 1426 { 1427 struct xdp_buff **xsk_buffs; 1428 dma_addr_t addr; 1429 int err = 0; 1430 u32 len, i; 1431 int num; 1432 1433 xsk_buffs = rq->xsk_buffs; 1434 1435 num = xsk_buff_alloc_batch(pool, xsk_buffs, rq->vq->num_free); 1436 if (!num) 1437 return -ENOMEM; 1438 1439 len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len; 1440 1441 for (i = 0; i < num; ++i) { 1442 /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space. 1443 * We assume XDP_PACKET_HEADROOM is larger than hdr->len. 1444 * (see function virtnet_xsk_pool_enable) 1445 */ 1446 addr = xsk_buff_xdp_get_dma(xsk_buffs[i]) - vi->hdr_len; 1447 1448 sg_init_table(rq->sg, 1); 1449 sg_fill_dma(rq->sg, addr, len); 1450 1451 err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, 1452 xsk_buffs[i], NULL, gfp); 1453 if (err) 1454 goto err; 1455 } 1456 1457 return num; 1458 1459 err: 1460 for (; i < num; ++i) 1461 xsk_buff_free(xsk_buffs[i]); 1462 1463 return err; 1464 } 1465 1466 static void *virtnet_xsk_to_ptr(u32 len) 1467 { 1468 unsigned long p; 1469 1470 p = len << VIRTIO_XSK_FLAG_OFFSET; 1471 1472 return virtnet_xmit_ptr_pack((void *)p, VIRTNET_XMIT_TYPE_XSK); 1473 } 1474 1475 static int virtnet_xsk_xmit_one(struct send_queue *sq, 1476 struct xsk_buff_pool *pool, 1477 struct xdp_desc *desc) 1478 { 1479 struct virtnet_info *vi; 1480 dma_addr_t addr; 1481 1482 vi = sq->vq->vdev->priv; 1483 1484 addr = xsk_buff_raw_get_dma(pool, desc->addr); 1485 xsk_buff_raw_dma_sync_for_device(pool, addr, desc->len); 1486 1487 sg_init_table(sq->sg, 2); 1488 sg_fill_dma(sq->sg, sq->xsk_hdr_dma_addr, vi->hdr_len); 1489 sg_fill_dma(sq->sg + 1, addr, desc->len); 1490 1491 return virtqueue_add_outbuf_premapped(sq->vq, sq->sg, 2, 1492 virtnet_xsk_to_ptr(desc->len), 1493 GFP_ATOMIC); 1494 } 1495 1496 static int virtnet_xsk_xmit_batch(struct send_queue *sq, 1497 struct xsk_buff_pool *pool, 1498 unsigned int budget, 1499 u64 *kicks) 1500 { 1501 struct xdp_desc *descs = pool->tx_descs; 1502 bool kick = false; 1503 u32 nb_pkts, i; 1504 int err; 1505 1506 budget = min_t(u32, budget, sq->vq->num_free); 1507 1508 nb_pkts = xsk_tx_peek_release_desc_batch(pool, budget); 1509 if (!nb_pkts) 1510 return 0; 1511 1512 for (i = 0; i < nb_pkts; i++) { 1513 err = virtnet_xsk_xmit_one(sq, pool, &descs[i]); 1514 if (unlikely(err)) { 1515 xsk_tx_completed(sq->xsk_pool, nb_pkts - i); 1516 break; 1517 } 1518 1519 kick = true; 1520 } 1521 1522 if (kick && virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 1523 (*kicks)++; 1524 1525 return i; 1526 } 1527 1528 static bool virtnet_xsk_xmit(struct send_queue *sq, struct xsk_buff_pool *pool, 1529 int budget) 1530 { 1531 struct virtnet_info *vi = sq->vq->vdev->priv; 1532 struct virtnet_sq_free_stats stats = {}; 1533 struct net_device *dev = vi->dev; 1534 u64 kicks = 0; 1535 int sent; 1536 1537 /* Avoid to wakeup napi meanless, so call __free_old_xmit instead of 1538 * free_old_xmit(). 1539 */ 1540 __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), true, &stats); 1541 1542 if (stats.xsk) 1543 xsk_tx_completed(sq->xsk_pool, stats.xsk); 1544 1545 sent = virtnet_xsk_xmit_batch(sq, pool, budget, &kicks); 1546 1547 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 1548 check_sq_full_and_disable(vi, vi->dev, sq); 1549 1550 if (sent) { 1551 struct netdev_queue *txq; 1552 1553 txq = netdev_get_tx_queue(vi->dev, sq - vi->sq); 1554 txq_trans_cond_update(txq); 1555 } 1556 1557 u64_stats_update_begin(&sq->stats.syncp); 1558 u64_stats_add(&sq->stats.packets, stats.packets); 1559 u64_stats_add(&sq->stats.bytes, stats.bytes); 1560 u64_stats_add(&sq->stats.kicks, kicks); 1561 u64_stats_add(&sq->stats.xdp_tx, sent); 1562 u64_stats_update_end(&sq->stats.syncp); 1563 1564 if (xsk_uses_need_wakeup(pool)) 1565 xsk_set_tx_need_wakeup(pool); 1566 1567 return sent; 1568 } 1569 1570 static void xsk_wakeup(struct send_queue *sq) 1571 { 1572 if (napi_if_scheduled_mark_missed(&sq->napi)) 1573 return; 1574 1575 local_bh_disable(); 1576 virtqueue_napi_schedule(&sq->napi, sq->vq); 1577 local_bh_enable(); 1578 } 1579 1580 static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag) 1581 { 1582 struct virtnet_info *vi = netdev_priv(dev); 1583 struct send_queue *sq; 1584 1585 if (!netif_running(dev)) 1586 return -ENETDOWN; 1587 1588 if (qid >= vi->curr_queue_pairs) 1589 return -EINVAL; 1590 1591 sq = &vi->sq[qid]; 1592 1593 xsk_wakeup(sq); 1594 return 0; 1595 } 1596 1597 static void virtnet_xsk_completed(struct send_queue *sq, int num) 1598 { 1599 xsk_tx_completed(sq->xsk_pool, num); 1600 1601 /* If this is called by rx poll, start_xmit and xdp xmit we should 1602 * wakeup the tx napi to consume the xsk tx queue, because the tx 1603 * interrupt may not be triggered. 1604 */ 1605 xsk_wakeup(sq); 1606 } 1607 1608 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, 1609 struct send_queue *sq, 1610 struct xdp_frame *xdpf) 1611 { 1612 struct virtio_net_hdr_mrg_rxbuf *hdr; 1613 struct skb_shared_info *shinfo; 1614 u8 nr_frags = 0; 1615 int err, i; 1616 1617 if (unlikely(xdpf->headroom < vi->hdr_len)) 1618 return -EOVERFLOW; 1619 1620 if (unlikely(xdp_frame_has_frags(xdpf))) { 1621 shinfo = xdp_get_shared_info_from_frame(xdpf); 1622 nr_frags = shinfo->nr_frags; 1623 } 1624 1625 /* In wrapping function virtnet_xdp_xmit(), we need to free 1626 * up the pending old buffers, where we need to calculate the 1627 * position of skb_shared_info in xdp_get_frame_len() and 1628 * xdp_return_frame(), which will involve to xdpf->data and 1629 * xdpf->headroom. Therefore, we need to update the value of 1630 * headroom synchronously here. 1631 */ 1632 xdpf->headroom -= vi->hdr_len; 1633 xdpf->data -= vi->hdr_len; 1634 /* Zero header and leave csum up to XDP layers */ 1635 hdr = xdpf->data; 1636 memset(hdr, 0, vi->hdr_len); 1637 xdpf->len += vi->hdr_len; 1638 1639 sg_init_table(sq->sg, nr_frags + 1); 1640 sg_set_buf(sq->sg, xdpf->data, xdpf->len); 1641 for (i = 0; i < nr_frags; i++) { 1642 skb_frag_t *frag = &shinfo->frags[i]; 1643 1644 sg_set_page(&sq->sg[i + 1], skb_frag_page(frag), 1645 skb_frag_size(frag), skb_frag_off(frag)); 1646 } 1647 1648 err = virtnet_add_outbuf(sq, nr_frags + 1, xdpf, VIRTNET_XMIT_TYPE_XDP); 1649 if (unlikely(err)) 1650 return -ENOSPC; /* Caller handle free/refcnt */ 1651 1652 return 0; 1653 } 1654 1655 /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on 1656 * the current cpu, so it does not need to be locked. 1657 * 1658 * Here we use marco instead of inline functions because we have to deal with 1659 * three issues at the same time: 1. the choice of sq. 2. judge and execute the 1660 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline 1661 * functions to perfectly solve these three problems at the same time. 1662 */ 1663 #define virtnet_xdp_get_sq(vi) ({ \ 1664 int cpu = smp_processor_id(); \ 1665 struct netdev_queue *txq; \ 1666 typeof(vi) v = (vi); \ 1667 unsigned int qp; \ 1668 \ 1669 if (v->curr_queue_pairs > nr_cpu_ids) { \ 1670 qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ 1671 qp += cpu; \ 1672 txq = netdev_get_tx_queue(v->dev, qp); \ 1673 __netif_tx_acquire(txq); \ 1674 } else { \ 1675 qp = cpu % v->curr_queue_pairs; \ 1676 txq = netdev_get_tx_queue(v->dev, qp); \ 1677 __netif_tx_lock(txq, cpu); \ 1678 } \ 1679 v->sq + qp; \ 1680 }) 1681 1682 #define virtnet_xdp_put_sq(vi, q) { \ 1683 struct netdev_queue *txq; \ 1684 typeof(vi) v = (vi); \ 1685 \ 1686 txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ 1687 if (v->curr_queue_pairs > nr_cpu_ids) \ 1688 __netif_tx_release(txq); \ 1689 else \ 1690 __netif_tx_unlock(txq); \ 1691 } 1692 1693 static int virtnet_xdp_xmit(struct net_device *dev, 1694 int n, struct xdp_frame **frames, u32 flags) 1695 { 1696 struct virtnet_info *vi = netdev_priv(dev); 1697 struct virtnet_sq_free_stats stats = {0}; 1698 struct receive_queue *rq = vi->rq; 1699 struct bpf_prog *xdp_prog; 1700 struct send_queue *sq; 1701 int nxmit = 0; 1702 int kicks = 0; 1703 int ret; 1704 int i; 1705 1706 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this 1707 * indicate XDP resources have been successfully allocated. 1708 */ 1709 xdp_prog = rcu_access_pointer(rq->xdp_prog); 1710 if (!xdp_prog) 1711 return -ENXIO; 1712 1713 sq = virtnet_xdp_get_sq(vi); 1714 1715 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { 1716 ret = -EINVAL; 1717 goto out; 1718 } 1719 1720 /* Free up any pending old buffers before queueing new ones. */ 1721 virtnet_free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), 1722 false, &stats); 1723 1724 for (i = 0; i < n; i++) { 1725 struct xdp_frame *xdpf = frames[i]; 1726 1727 if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) 1728 break; 1729 nxmit++; 1730 } 1731 ret = nxmit; 1732 1733 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 1734 check_sq_full_and_disable(vi, dev, sq); 1735 1736 if (flags & XDP_XMIT_FLUSH) { 1737 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 1738 kicks = 1; 1739 } 1740 out: 1741 u64_stats_update_begin(&sq->stats.syncp); 1742 u64_stats_add(&sq->stats.bytes, stats.bytes); 1743 u64_stats_add(&sq->stats.packets, stats.packets); 1744 u64_stats_add(&sq->stats.xdp_tx, n); 1745 u64_stats_add(&sq->stats.xdp_tx_drops, n - nxmit); 1746 u64_stats_add(&sq->stats.kicks, kicks); 1747 u64_stats_update_end(&sq->stats.syncp); 1748 1749 virtnet_xdp_put_sq(vi, sq); 1750 return ret; 1751 } 1752 1753 static void put_xdp_frags(struct xdp_buff *xdp) 1754 { 1755 struct skb_shared_info *shinfo; 1756 struct page *xdp_page; 1757 int i; 1758 1759 if (xdp_buff_has_frags(xdp)) { 1760 shinfo = xdp_get_shared_info_from_buff(xdp); 1761 for (i = 0; i < shinfo->nr_frags; i++) { 1762 xdp_page = skb_frag_page(&shinfo->frags[i]); 1763 put_page(xdp_page); 1764 } 1765 } 1766 } 1767 1768 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 1769 struct net_device *dev, 1770 unsigned int *xdp_xmit, 1771 struct virtnet_rq_stats *stats) 1772 { 1773 struct xdp_frame *xdpf; 1774 int err; 1775 u32 act; 1776 1777 act = bpf_prog_run_xdp(xdp_prog, xdp); 1778 u64_stats_inc(&stats->xdp_packets); 1779 1780 switch (act) { 1781 case XDP_PASS: 1782 return act; 1783 1784 case XDP_TX: 1785 u64_stats_inc(&stats->xdp_tx); 1786 xdpf = xdp_convert_buff_to_frame(xdp); 1787 if (unlikely(!xdpf)) { 1788 netdev_dbg(dev, "convert buff to frame failed for xdp\n"); 1789 return XDP_DROP; 1790 } 1791 1792 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); 1793 if (unlikely(!err)) { 1794 xdp_return_frame_rx_napi(xdpf); 1795 } else if (unlikely(err < 0)) { 1796 trace_xdp_exception(dev, xdp_prog, act); 1797 return XDP_DROP; 1798 } 1799 *xdp_xmit |= VIRTIO_XDP_TX; 1800 return act; 1801 1802 case XDP_REDIRECT: 1803 u64_stats_inc(&stats->xdp_redirects); 1804 err = xdp_do_redirect(dev, xdp, xdp_prog); 1805 if (err) 1806 return XDP_DROP; 1807 1808 *xdp_xmit |= VIRTIO_XDP_REDIR; 1809 return act; 1810 1811 default: 1812 bpf_warn_invalid_xdp_action(dev, xdp_prog, act); 1813 fallthrough; 1814 case XDP_ABORTED: 1815 trace_xdp_exception(dev, xdp_prog, act); 1816 fallthrough; 1817 case XDP_DROP: 1818 return XDP_DROP; 1819 } 1820 } 1821 1822 static unsigned int virtnet_get_headroom(struct virtnet_info *vi) 1823 { 1824 return vi->xdp_enabled ? XDP_PACKET_HEADROOM : 0; 1825 } 1826 1827 /* We copy the packet for XDP in the following cases: 1828 * 1829 * 1) Packet is scattered across multiple rx buffers. 1830 * 2) Headroom space is insufficient. 1831 * 1832 * This is inefficient but it's a temporary condition that 1833 * we hit right after XDP is enabled and until queue is refilled 1834 * with large buffers with sufficient headroom - so it should affect 1835 * at most queue size packets. 1836 * Afterwards, the conditions to enable 1837 * XDP should preclude the underlying device from sending packets 1838 * across multiple buffers (num_buf > 1), and we make sure buffers 1839 * have enough headroom. 1840 */ 1841 static struct page *xdp_linearize_page(struct net_device *dev, 1842 struct receive_queue *rq, 1843 int *num_buf, 1844 struct page *p, 1845 int offset, 1846 int page_off, 1847 unsigned int *len) 1848 { 1849 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1850 struct page *page; 1851 1852 if (page_off + *len + tailroom > PAGE_SIZE) 1853 return NULL; 1854 1855 page = alloc_page(GFP_ATOMIC); 1856 if (!page) 1857 return NULL; 1858 1859 memcpy(page_address(page) + page_off, page_address(p) + offset, *len); 1860 page_off += *len; 1861 1862 /* Only mergeable mode can go inside this while loop. In small mode, 1863 * *num_buf == 1, so it cannot go inside. 1864 */ 1865 while (--*num_buf) { 1866 unsigned int buflen; 1867 void *buf; 1868 void *ctx; 1869 int off; 1870 1871 buf = virtnet_rq_get_buf(rq, &buflen, &ctx); 1872 if (unlikely(!buf)) 1873 goto err_buf; 1874 1875 p = virt_to_head_page(buf); 1876 off = buf - page_address(p); 1877 1878 if (check_mergeable_len(dev, ctx, buflen)) { 1879 put_page(p); 1880 goto err_buf; 1881 } 1882 1883 /* guard against a misconfigured or uncooperative backend that 1884 * is sending packet larger than the MTU. 1885 */ 1886 if ((page_off + buflen + tailroom) > PAGE_SIZE) { 1887 put_page(p); 1888 goto err_buf; 1889 } 1890 1891 memcpy(page_address(page) + page_off, 1892 page_address(p) + off, buflen); 1893 page_off += buflen; 1894 put_page(p); 1895 } 1896 1897 /* Headroom does not contribute to packet length */ 1898 *len = page_off - XDP_PACKET_HEADROOM; 1899 return page; 1900 err_buf: 1901 __free_pages(page, 0); 1902 return NULL; 1903 } 1904 1905 static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi, 1906 unsigned int xdp_headroom, 1907 void *buf, 1908 unsigned int len) 1909 { 1910 unsigned int header_offset; 1911 unsigned int headroom; 1912 unsigned int buflen; 1913 struct sk_buff *skb; 1914 1915 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1916 headroom = vi->hdr_len + header_offset; 1917 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1918 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1919 1920 skb = virtnet_build_skb(buf, buflen, headroom, len); 1921 if (unlikely(!skb)) 1922 return NULL; 1923 1924 buf += header_offset; 1925 memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len); 1926 1927 return skb; 1928 } 1929 1930 static struct sk_buff *receive_small_xdp(struct net_device *dev, 1931 struct virtnet_info *vi, 1932 struct receive_queue *rq, 1933 struct bpf_prog *xdp_prog, 1934 void *buf, 1935 unsigned int xdp_headroom, 1936 unsigned int len, 1937 unsigned int *xdp_xmit, 1938 struct virtnet_rq_stats *stats) 1939 { 1940 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom; 1941 unsigned int headroom = vi->hdr_len + header_offset; 1942 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; 1943 struct page *page = virt_to_head_page(buf); 1944 struct page *xdp_page; 1945 unsigned int buflen; 1946 struct xdp_buff xdp; 1947 struct sk_buff *skb; 1948 unsigned int metasize = 0; 1949 u32 act; 1950 1951 if (unlikely(hdr->hdr.gso_type)) 1952 goto err_xdp; 1953 1954 /* Partially checksummed packets must be dropped. */ 1955 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 1956 goto err_xdp; 1957 1958 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1959 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1960 1961 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { 1962 int offset = buf - page_address(page) + header_offset; 1963 unsigned int tlen = len + vi->hdr_len; 1964 int num_buf = 1; 1965 1966 xdp_headroom = virtnet_get_headroom(vi); 1967 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1968 headroom = vi->hdr_len + header_offset; 1969 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1970 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1971 xdp_page = xdp_linearize_page(dev, rq, &num_buf, page, 1972 offset, header_offset, 1973 &tlen); 1974 if (!xdp_page) 1975 goto err_xdp; 1976 1977 buf = page_address(xdp_page); 1978 put_page(page); 1979 page = xdp_page; 1980 } 1981 1982 xdp_init_buff(&xdp, buflen, &rq->xdp_rxq); 1983 xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len, 1984 xdp_headroom, len, true); 1985 1986 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 1987 1988 switch (act) { 1989 case XDP_PASS: 1990 /* Recalculate length in case bpf program changed it */ 1991 len = xdp.data_end - xdp.data; 1992 metasize = xdp.data - xdp.data_meta; 1993 break; 1994 1995 case XDP_TX: 1996 case XDP_REDIRECT: 1997 goto xdp_xmit; 1998 1999 default: 2000 goto err_xdp; 2001 } 2002 2003 skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len); 2004 if (unlikely(!skb)) 2005 goto err; 2006 2007 if (metasize) 2008 skb_metadata_set(skb, metasize); 2009 2010 return skb; 2011 2012 err_xdp: 2013 u64_stats_inc(&stats->xdp_drops); 2014 err: 2015 u64_stats_inc(&stats->drops); 2016 put_page(page); 2017 xdp_xmit: 2018 return NULL; 2019 } 2020 2021 static struct sk_buff *receive_small(struct net_device *dev, 2022 struct virtnet_info *vi, 2023 struct receive_queue *rq, 2024 void *buf, void *ctx, 2025 unsigned int len, 2026 unsigned int *xdp_xmit, 2027 struct virtnet_rq_stats *stats) 2028 { 2029 unsigned int xdp_headroom = (unsigned long)ctx; 2030 struct page *page = virt_to_head_page(buf); 2031 struct sk_buff *skb; 2032 2033 /* We passed the address of virtnet header to virtio-core, 2034 * so truncate the padding. 2035 */ 2036 buf -= VIRTNET_RX_PAD + xdp_headroom; 2037 2038 len -= vi->hdr_len; 2039 u64_stats_add(&stats->bytes, len); 2040 2041 if (unlikely(len > GOOD_PACKET_LEN)) { 2042 pr_debug("%s: rx error: len %u exceeds max size %d\n", 2043 dev->name, len, GOOD_PACKET_LEN); 2044 DEV_STATS_INC(dev, rx_length_errors); 2045 goto err; 2046 } 2047 2048 if (unlikely(vi->xdp_enabled)) { 2049 struct bpf_prog *xdp_prog; 2050 2051 rcu_read_lock(); 2052 xdp_prog = rcu_dereference(rq->xdp_prog); 2053 if (xdp_prog) { 2054 skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf, 2055 xdp_headroom, len, xdp_xmit, 2056 stats); 2057 rcu_read_unlock(); 2058 return skb; 2059 } 2060 rcu_read_unlock(); 2061 } 2062 2063 skb = receive_small_build_skb(vi, xdp_headroom, buf, len); 2064 if (likely(skb)) 2065 return skb; 2066 2067 err: 2068 u64_stats_inc(&stats->drops); 2069 put_page(page); 2070 return NULL; 2071 } 2072 2073 static struct sk_buff *receive_big(struct net_device *dev, 2074 struct virtnet_info *vi, 2075 struct receive_queue *rq, 2076 void *buf, 2077 unsigned int len, 2078 struct virtnet_rq_stats *stats) 2079 { 2080 struct page *page = buf; 2081 struct sk_buff *skb = 2082 page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); 2083 2084 u64_stats_add(&stats->bytes, len - vi->hdr_len); 2085 if (unlikely(!skb)) 2086 goto err; 2087 2088 return skb; 2089 2090 err: 2091 u64_stats_inc(&stats->drops); 2092 give_pages(rq, page); 2093 return NULL; 2094 } 2095 2096 static void mergeable_buf_free(struct receive_queue *rq, int num_buf, 2097 struct net_device *dev, 2098 struct virtnet_rq_stats *stats) 2099 { 2100 struct page *page; 2101 void *buf; 2102 int len; 2103 2104 while (num_buf-- > 1) { 2105 buf = virtnet_rq_get_buf(rq, &len, NULL); 2106 if (unlikely(!buf)) { 2107 pr_debug("%s: rx error: %d buffers missing\n", 2108 dev->name, num_buf); 2109 DEV_STATS_INC(dev, rx_length_errors); 2110 break; 2111 } 2112 u64_stats_add(&stats->bytes, len); 2113 page = virt_to_head_page(buf); 2114 put_page(page); 2115 } 2116 } 2117 2118 /* Why not use xdp_build_skb_from_frame() ? 2119 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in 2120 * virtio-net there are 2 points that do not match its requirements: 2121 * 1. The size of the prefilled buffer is not fixed before xdp is set. 2122 * 2. xdp_build_skb_from_frame() does more checks that we don't need, 2123 * like eth_type_trans() (which virtio-net does in receive_buf()). 2124 */ 2125 static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, 2126 struct virtnet_info *vi, 2127 struct xdp_buff *xdp, 2128 unsigned int xdp_frags_truesz) 2129 { 2130 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 2131 unsigned int headroom, data_len; 2132 struct sk_buff *skb; 2133 int metasize; 2134 u8 nr_frags; 2135 2136 if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { 2137 pr_debug("Error building skb as missing reserved tailroom for xdp"); 2138 return NULL; 2139 } 2140 2141 if (unlikely(xdp_buff_has_frags(xdp))) 2142 nr_frags = sinfo->nr_frags; 2143 2144 skb = build_skb(xdp->data_hard_start, xdp->frame_sz); 2145 if (unlikely(!skb)) 2146 return NULL; 2147 2148 headroom = xdp->data - xdp->data_hard_start; 2149 data_len = xdp->data_end - xdp->data; 2150 skb_reserve(skb, headroom); 2151 __skb_put(skb, data_len); 2152 2153 metasize = xdp->data - xdp->data_meta; 2154 metasize = metasize > 0 ? metasize : 0; 2155 if (metasize) 2156 skb_metadata_set(skb, metasize); 2157 2158 if (unlikely(xdp_buff_has_frags(xdp))) 2159 xdp_update_skb_shared_info(skb, nr_frags, 2160 sinfo->xdp_frags_size, 2161 xdp_frags_truesz, 2162 xdp_buff_is_frag_pfmemalloc(xdp)); 2163 2164 return skb; 2165 } 2166 2167 /* TODO: build xdp in big mode */ 2168 static int virtnet_build_xdp_buff_mrg(struct net_device *dev, 2169 struct virtnet_info *vi, 2170 struct receive_queue *rq, 2171 struct xdp_buff *xdp, 2172 void *buf, 2173 unsigned int len, 2174 unsigned int frame_sz, 2175 int *num_buf, 2176 unsigned int *xdp_frags_truesize, 2177 struct virtnet_rq_stats *stats) 2178 { 2179 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2180 struct skb_shared_info *shinfo; 2181 unsigned int xdp_frags_truesz = 0; 2182 unsigned int truesize; 2183 struct page *page; 2184 skb_frag_t *frag; 2185 int offset; 2186 void *ctx; 2187 2188 xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 2189 xdp_prepare_buff(xdp, buf - XDP_PACKET_HEADROOM, 2190 XDP_PACKET_HEADROOM + vi->hdr_len, len - vi->hdr_len, true); 2191 2192 if (!*num_buf) 2193 return 0; 2194 2195 if (*num_buf > 1) { 2196 /* If we want to build multi-buffer xdp, we need 2197 * to specify that the flags of xdp_buff have the 2198 * XDP_FLAGS_HAS_FRAG bit. 2199 */ 2200 if (!xdp_buff_has_frags(xdp)) 2201 xdp_buff_set_frags_flag(xdp); 2202 2203 shinfo = xdp_get_shared_info_from_buff(xdp); 2204 shinfo->nr_frags = 0; 2205 shinfo->xdp_frags_size = 0; 2206 } 2207 2208 if (*num_buf > MAX_SKB_FRAGS + 1) 2209 return -EINVAL; 2210 2211 while (--*num_buf > 0) { 2212 buf = virtnet_rq_get_buf(rq, &len, &ctx); 2213 if (unlikely(!buf)) { 2214 pr_debug("%s: rx error: %d buffers out of %d missing\n", 2215 dev->name, *num_buf, 2216 virtio16_to_cpu(vi->vdev, hdr->num_buffers)); 2217 DEV_STATS_INC(dev, rx_length_errors); 2218 goto err; 2219 } 2220 2221 u64_stats_add(&stats->bytes, len); 2222 page = virt_to_head_page(buf); 2223 offset = buf - page_address(page); 2224 2225 if (check_mergeable_len(dev, ctx, len)) { 2226 put_page(page); 2227 goto err; 2228 } 2229 2230 truesize = mergeable_ctx_to_truesize(ctx); 2231 xdp_frags_truesz += truesize; 2232 2233 frag = &shinfo->frags[shinfo->nr_frags++]; 2234 skb_frag_fill_page_desc(frag, page, offset, len); 2235 if (page_is_pfmemalloc(page)) 2236 xdp_buff_set_frag_pfmemalloc(xdp); 2237 2238 shinfo->xdp_frags_size += len; 2239 } 2240 2241 *xdp_frags_truesize = xdp_frags_truesz; 2242 return 0; 2243 2244 err: 2245 put_xdp_frags(xdp); 2246 return -EINVAL; 2247 } 2248 2249 static void *mergeable_xdp_get_buf(struct virtnet_info *vi, 2250 struct receive_queue *rq, 2251 struct bpf_prog *xdp_prog, 2252 void *ctx, 2253 unsigned int *frame_sz, 2254 int *num_buf, 2255 struct page **page, 2256 int offset, 2257 unsigned int *len, 2258 struct virtio_net_hdr_mrg_rxbuf *hdr) 2259 { 2260 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2261 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2262 struct page *xdp_page; 2263 unsigned int xdp_room; 2264 2265 /* Transient failure which in theory could occur if 2266 * in-flight packets from before XDP was enabled reach 2267 * the receive path after XDP is loaded. 2268 */ 2269 if (unlikely(hdr->hdr.gso_type)) 2270 return NULL; 2271 2272 /* Partially checksummed packets must be dropped. */ 2273 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 2274 return NULL; 2275 2276 /* Now XDP core assumes frag size is PAGE_SIZE, but buffers 2277 * with headroom may add hole in truesize, which 2278 * make their length exceed PAGE_SIZE. So we disabled the 2279 * hole mechanism for xdp. See add_recvbuf_mergeable(). 2280 */ 2281 *frame_sz = truesize; 2282 2283 if (likely(headroom >= virtnet_get_headroom(vi) && 2284 (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) { 2285 return page_address(*page) + offset; 2286 } 2287 2288 /* This happens when headroom is not enough because 2289 * of the buffer was prefilled before XDP is set. 2290 * This should only happen for the first several packets. 2291 * In fact, vq reset can be used here to help us clean up 2292 * the prefilled buffers, but many existing devices do not 2293 * support it, and we don't want to bother users who are 2294 * using xdp normally. 2295 */ 2296 if (!xdp_prog->aux->xdp_has_frags) { 2297 /* linearize data for XDP */ 2298 xdp_page = xdp_linearize_page(vi->dev, rq, num_buf, 2299 *page, offset, 2300 XDP_PACKET_HEADROOM, 2301 len); 2302 if (!xdp_page) 2303 return NULL; 2304 } else { 2305 xdp_room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 2306 sizeof(struct skb_shared_info)); 2307 if (*len + xdp_room > PAGE_SIZE) 2308 return NULL; 2309 2310 xdp_page = alloc_page(GFP_ATOMIC); 2311 if (!xdp_page) 2312 return NULL; 2313 2314 memcpy(page_address(xdp_page) + XDP_PACKET_HEADROOM, 2315 page_address(*page) + offset, *len); 2316 } 2317 2318 *frame_sz = PAGE_SIZE; 2319 2320 put_page(*page); 2321 2322 *page = xdp_page; 2323 2324 return page_address(*page) + XDP_PACKET_HEADROOM; 2325 } 2326 2327 static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, 2328 struct virtnet_info *vi, 2329 struct receive_queue *rq, 2330 struct bpf_prog *xdp_prog, 2331 void *buf, 2332 void *ctx, 2333 unsigned int len, 2334 unsigned int *xdp_xmit, 2335 struct virtnet_rq_stats *stats) 2336 { 2337 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2338 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2339 struct page *page = virt_to_head_page(buf); 2340 int offset = buf - page_address(page); 2341 unsigned int xdp_frags_truesz = 0; 2342 struct sk_buff *head_skb; 2343 unsigned int frame_sz; 2344 struct xdp_buff xdp; 2345 void *data; 2346 u32 act; 2347 int err; 2348 2349 data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, &frame_sz, &num_buf, &page, 2350 offset, &len, hdr); 2351 if (unlikely(!data)) 2352 goto err_xdp; 2353 2354 err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz, 2355 &num_buf, &xdp_frags_truesz, stats); 2356 if (unlikely(err)) 2357 goto err_xdp; 2358 2359 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 2360 2361 switch (act) { 2362 case XDP_PASS: 2363 head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); 2364 if (unlikely(!head_skb)) 2365 break; 2366 return head_skb; 2367 2368 case XDP_TX: 2369 case XDP_REDIRECT: 2370 return NULL; 2371 2372 default: 2373 break; 2374 } 2375 2376 put_xdp_frags(&xdp); 2377 2378 err_xdp: 2379 put_page(page); 2380 mergeable_buf_free(rq, num_buf, dev, stats); 2381 2382 u64_stats_inc(&stats->xdp_drops); 2383 u64_stats_inc(&stats->drops); 2384 return NULL; 2385 } 2386 2387 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 2388 struct sk_buff *curr_skb, 2389 struct page *page, void *buf, 2390 int len, int truesize) 2391 { 2392 int num_skb_frags; 2393 int offset; 2394 2395 num_skb_frags = skb_shinfo(curr_skb)->nr_frags; 2396 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { 2397 struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); 2398 2399 if (unlikely(!nskb)) 2400 return NULL; 2401 2402 if (curr_skb == head_skb) 2403 skb_shinfo(curr_skb)->frag_list = nskb; 2404 else 2405 curr_skb->next = nskb; 2406 curr_skb = nskb; 2407 head_skb->truesize += nskb->truesize; 2408 num_skb_frags = 0; 2409 } 2410 2411 if (curr_skb != head_skb) { 2412 head_skb->data_len += len; 2413 head_skb->len += len; 2414 head_skb->truesize += truesize; 2415 } 2416 2417 offset = buf - page_address(page); 2418 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { 2419 put_page(page); 2420 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, 2421 len, truesize); 2422 } else { 2423 skb_add_rx_frag(curr_skb, num_skb_frags, page, 2424 offset, len, truesize); 2425 } 2426 2427 return curr_skb; 2428 } 2429 2430 static struct sk_buff *receive_mergeable(struct net_device *dev, 2431 struct virtnet_info *vi, 2432 struct receive_queue *rq, 2433 void *buf, 2434 void *ctx, 2435 unsigned int len, 2436 unsigned int *xdp_xmit, 2437 struct virtnet_rq_stats *stats) 2438 { 2439 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2440 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2441 struct page *page = virt_to_head_page(buf); 2442 int offset = buf - page_address(page); 2443 struct sk_buff *head_skb, *curr_skb; 2444 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2445 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2446 2447 head_skb = NULL; 2448 u64_stats_add(&stats->bytes, len - vi->hdr_len); 2449 2450 if (check_mergeable_len(dev, ctx, len)) 2451 goto err_skb; 2452 2453 if (unlikely(vi->xdp_enabled)) { 2454 struct bpf_prog *xdp_prog; 2455 2456 rcu_read_lock(); 2457 xdp_prog = rcu_dereference(rq->xdp_prog); 2458 if (xdp_prog) { 2459 head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx, 2460 len, xdp_xmit, stats); 2461 rcu_read_unlock(); 2462 return head_skb; 2463 } 2464 rcu_read_unlock(); 2465 } 2466 2467 head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom); 2468 curr_skb = head_skb; 2469 2470 if (unlikely(!curr_skb)) 2471 goto err_skb; 2472 while (--num_buf) { 2473 buf = virtnet_rq_get_buf(rq, &len, &ctx); 2474 if (unlikely(!buf)) { 2475 pr_debug("%s: rx error: %d buffers out of %d missing\n", 2476 dev->name, num_buf, 2477 virtio16_to_cpu(vi->vdev, 2478 hdr->num_buffers)); 2479 DEV_STATS_INC(dev, rx_length_errors); 2480 goto err_buf; 2481 } 2482 2483 u64_stats_add(&stats->bytes, len); 2484 page = virt_to_head_page(buf); 2485 2486 if (check_mergeable_len(dev, ctx, len)) 2487 goto err_skb; 2488 2489 truesize = mergeable_ctx_to_truesize(ctx); 2490 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 2491 buf, len, truesize); 2492 if (!curr_skb) 2493 goto err_skb; 2494 } 2495 2496 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); 2497 return head_skb; 2498 2499 err_skb: 2500 put_page(page); 2501 mergeable_buf_free(rq, num_buf, dev, stats); 2502 2503 err_buf: 2504 u64_stats_inc(&stats->drops); 2505 dev_kfree_skb(head_skb); 2506 return NULL; 2507 } 2508 2509 static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, 2510 struct sk_buff *skb) 2511 { 2512 enum pkt_hash_types rss_hash_type; 2513 2514 if (!hdr_hash || !skb) 2515 return; 2516 2517 switch (__le16_to_cpu(hdr_hash->hash_report)) { 2518 case VIRTIO_NET_HASH_REPORT_TCPv4: 2519 case VIRTIO_NET_HASH_REPORT_UDPv4: 2520 case VIRTIO_NET_HASH_REPORT_TCPv6: 2521 case VIRTIO_NET_HASH_REPORT_UDPv6: 2522 case VIRTIO_NET_HASH_REPORT_TCPv6_EX: 2523 case VIRTIO_NET_HASH_REPORT_UDPv6_EX: 2524 rss_hash_type = PKT_HASH_TYPE_L4; 2525 break; 2526 case VIRTIO_NET_HASH_REPORT_IPv4: 2527 case VIRTIO_NET_HASH_REPORT_IPv6: 2528 case VIRTIO_NET_HASH_REPORT_IPv6_EX: 2529 rss_hash_type = PKT_HASH_TYPE_L3; 2530 break; 2531 case VIRTIO_NET_HASH_REPORT_NONE: 2532 default: 2533 rss_hash_type = PKT_HASH_TYPE_NONE; 2534 } 2535 skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type); 2536 } 2537 2538 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 2539 struct sk_buff *skb, u8 flags) 2540 { 2541 struct virtio_net_common_hdr *hdr; 2542 struct net_device *dev = vi->dev; 2543 2544 hdr = skb_vnet_common_hdr(skb); 2545 if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) 2546 virtio_skb_set_hash(&hdr->hash_v1_hdr, skb); 2547 2548 if (flags & VIRTIO_NET_HDR_F_DATA_VALID) 2549 skb->ip_summed = CHECKSUM_UNNECESSARY; 2550 2551 if (virtio_net_hdr_to_skb(skb, &hdr->hdr, 2552 virtio_is_little_endian(vi->vdev))) { 2553 net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n", 2554 dev->name, hdr->hdr.gso_type, 2555 hdr->hdr.gso_size); 2556 goto frame_err; 2557 } 2558 2559 skb_record_rx_queue(skb, vq2rxq(rq->vq)); 2560 skb->protocol = eth_type_trans(skb, dev); 2561 pr_debug("Receiving skb proto 0x%04x len %i type %i\n", 2562 ntohs(skb->protocol), skb->len, skb->pkt_type); 2563 2564 napi_gro_receive(&rq->napi, skb); 2565 return; 2566 2567 frame_err: 2568 DEV_STATS_INC(dev, rx_frame_errors); 2569 dev_kfree_skb(skb); 2570 } 2571 2572 static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, 2573 void *buf, unsigned int len, void **ctx, 2574 unsigned int *xdp_xmit, 2575 struct virtnet_rq_stats *stats) 2576 { 2577 struct net_device *dev = vi->dev; 2578 struct sk_buff *skb; 2579 u8 flags; 2580 2581 if (unlikely(len < vi->hdr_len + ETH_HLEN)) { 2582 pr_debug("%s: short packet %i\n", dev->name, len); 2583 DEV_STATS_INC(dev, rx_length_errors); 2584 virtnet_rq_free_buf(vi, rq, buf); 2585 return; 2586 } 2587 2588 /* 1. Save the flags early, as the XDP program might overwrite them. 2589 * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID 2590 * stay valid after XDP processing. 2591 * 2. XDP doesn't work with partially checksummed packets (refer to 2592 * virtnet_xdp_set()), so packets marked as 2593 * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing. 2594 */ 2595 flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; 2596 2597 if (vi->mergeable_rx_bufs) 2598 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, 2599 stats); 2600 else if (vi->big_packets) 2601 skb = receive_big(dev, vi, rq, buf, len, stats); 2602 else 2603 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); 2604 2605 if (unlikely(!skb)) 2606 return; 2607 2608 virtnet_receive_done(vi, rq, skb, flags); 2609 } 2610 2611 /* Unlike mergeable buffers, all buffers are allocated to the 2612 * same size, except for the headroom. For this reason we do 2613 * not need to use mergeable_len_to_ctx here - it is enough 2614 * to store the headroom as the context ignoring the truesize. 2615 */ 2616 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, 2617 gfp_t gfp) 2618 { 2619 char *buf; 2620 unsigned int xdp_headroom = virtnet_get_headroom(vi); 2621 void *ctx = (void *)(unsigned long)xdp_headroom; 2622 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; 2623 int err; 2624 2625 len = SKB_DATA_ALIGN(len) + 2626 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 2627 2628 if (unlikely(!skb_page_frag_refill(len, &rq->alloc_frag, gfp))) 2629 return -ENOMEM; 2630 2631 buf = virtnet_rq_alloc(rq, len, gfp); 2632 if (unlikely(!buf)) 2633 return -ENOMEM; 2634 2635 buf += VIRTNET_RX_PAD + xdp_headroom; 2636 2637 virtnet_rq_init_one_sg(rq, buf, vi->hdr_len + GOOD_PACKET_LEN); 2638 2639 err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, buf, ctx, gfp); 2640 if (err < 0) { 2641 virtnet_rq_unmap(rq, buf, 0); 2642 put_page(virt_to_head_page(buf)); 2643 } 2644 2645 return err; 2646 } 2647 2648 static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, 2649 gfp_t gfp) 2650 { 2651 struct page *first, *list = NULL; 2652 char *p; 2653 int i, err, offset; 2654 2655 sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2); 2656 2657 /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */ 2658 for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) { 2659 first = get_a_page(rq, gfp); 2660 if (!first) { 2661 if (list) 2662 give_pages(rq, list); 2663 return -ENOMEM; 2664 } 2665 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE); 2666 2667 /* chain new page in list head to match sg */ 2668 first->private = (unsigned long)list; 2669 list = first; 2670 } 2671 2672 first = get_a_page(rq, gfp); 2673 if (!first) { 2674 give_pages(rq, list); 2675 return -ENOMEM; 2676 } 2677 p = page_address(first); 2678 2679 /* rq->sg[0], rq->sg[1] share the same page */ 2680 /* a separated rq->sg[0] for header - required in case !any_header_sg */ 2681 sg_set_buf(&rq->sg[0], p, vi->hdr_len); 2682 2683 /* rq->sg[1] for data packet, from offset */ 2684 offset = sizeof(struct padded_vnet_hdr); 2685 sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset); 2686 2687 /* chain first in list head */ 2688 first->private = (unsigned long)list; 2689 err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2, 2690 first, gfp); 2691 if (err < 0) 2692 give_pages(rq, first); 2693 2694 return err; 2695 } 2696 2697 static unsigned int get_mergeable_buf_len(struct receive_queue *rq, 2698 struct ewma_pkt_len *avg_pkt_len, 2699 unsigned int room) 2700 { 2701 struct virtnet_info *vi = rq->vq->vdev->priv; 2702 const size_t hdr_len = vi->hdr_len; 2703 unsigned int len; 2704 2705 if (room) 2706 return PAGE_SIZE - room; 2707 2708 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), 2709 rq->min_buf_len, PAGE_SIZE - hdr_len); 2710 2711 return ALIGN(len, L1_CACHE_BYTES); 2712 } 2713 2714 static int add_recvbuf_mergeable(struct virtnet_info *vi, 2715 struct receive_queue *rq, gfp_t gfp) 2716 { 2717 struct page_frag *alloc_frag = &rq->alloc_frag; 2718 unsigned int headroom = virtnet_get_headroom(vi); 2719 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2720 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 2721 unsigned int len, hole; 2722 void *ctx; 2723 char *buf; 2724 int err; 2725 2726 /* Extra tailroom is needed to satisfy XDP's assumption. This 2727 * means rx frags coalescing won't work, but consider we've 2728 * disabled GSO for XDP, it won't be a big issue. 2729 */ 2730 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); 2731 2732 if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) 2733 return -ENOMEM; 2734 2735 if (!alloc_frag->offset && len + room + sizeof(struct virtnet_rq_dma) > alloc_frag->size) 2736 len -= sizeof(struct virtnet_rq_dma); 2737 2738 buf = virtnet_rq_alloc(rq, len + room, gfp); 2739 if (unlikely(!buf)) 2740 return -ENOMEM; 2741 2742 buf += headroom; /* advance address leaving hole at front of pkt */ 2743 hole = alloc_frag->size - alloc_frag->offset; 2744 if (hole < len + room) { 2745 /* To avoid internal fragmentation, if there is very likely not 2746 * enough space for another buffer, add the remaining space to 2747 * the current buffer. 2748 * XDP core assumes that frame_size of xdp_buff and the length 2749 * of the frag are PAGE_SIZE, so we disable the hole mechanism. 2750 */ 2751 if (!headroom) 2752 len += hole; 2753 alloc_frag->offset += hole; 2754 } 2755 2756 virtnet_rq_init_one_sg(rq, buf, len); 2757 2758 ctx = mergeable_len_to_ctx(len + room, headroom); 2759 err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, buf, ctx, gfp); 2760 if (err < 0) { 2761 virtnet_rq_unmap(rq, buf, 0); 2762 put_page(virt_to_head_page(buf)); 2763 } 2764 2765 return err; 2766 } 2767 2768 /* 2769 * Returns false if we couldn't fill entirely (OOM). 2770 * 2771 * Normally run in the receive path, but can also be run from ndo_open 2772 * before we're receiving packets, or from refill_work which is 2773 * careful to disable receiving (using napi_disable). 2774 */ 2775 static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, 2776 gfp_t gfp) 2777 { 2778 int err; 2779 2780 if (rq->xsk_pool) { 2781 err = virtnet_add_recvbuf_xsk(vi, rq, rq->xsk_pool, gfp); 2782 goto kick; 2783 } 2784 2785 do { 2786 if (vi->mergeable_rx_bufs) 2787 err = add_recvbuf_mergeable(vi, rq, gfp); 2788 else if (vi->big_packets) 2789 err = add_recvbuf_big(vi, rq, gfp); 2790 else 2791 err = add_recvbuf_small(vi, rq, gfp); 2792 2793 if (err) 2794 break; 2795 } while (rq->vq->num_free); 2796 2797 kick: 2798 if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { 2799 unsigned long flags; 2800 2801 flags = u64_stats_update_begin_irqsave(&rq->stats.syncp); 2802 u64_stats_inc(&rq->stats.kicks); 2803 u64_stats_update_end_irqrestore(&rq->stats.syncp, flags); 2804 } 2805 2806 return err != -ENOMEM; 2807 } 2808 2809 static void skb_recv_done(struct virtqueue *rvq) 2810 { 2811 struct virtnet_info *vi = rvq->vdev->priv; 2812 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)]; 2813 2814 rq->calls++; 2815 virtqueue_napi_schedule(&rq->napi, rvq); 2816 } 2817 2818 static void virtnet_napi_do_enable(struct virtqueue *vq, 2819 struct napi_struct *napi) 2820 { 2821 napi_enable(napi); 2822 2823 /* If all buffers were filled by other side before we napi_enabled, we 2824 * won't get another interrupt, so process any outstanding packets now. 2825 * Call local_bh_enable after to trigger softIRQ processing. 2826 */ 2827 local_bh_disable(); 2828 virtqueue_napi_schedule(napi, vq); 2829 local_bh_enable(); 2830 } 2831 2832 static void virtnet_napi_enable(struct receive_queue *rq) 2833 { 2834 struct virtnet_info *vi = rq->vq->vdev->priv; 2835 int qidx = vq2rxq(rq->vq); 2836 2837 virtnet_napi_do_enable(rq->vq, &rq->napi); 2838 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, &rq->napi); 2839 } 2840 2841 static void virtnet_napi_tx_enable(struct send_queue *sq) 2842 { 2843 struct virtnet_info *vi = sq->vq->vdev->priv; 2844 struct napi_struct *napi = &sq->napi; 2845 int qidx = vq2txq(sq->vq); 2846 2847 if (!napi->weight) 2848 return; 2849 2850 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only 2851 * enable the feature if this is likely affine with the transmit path. 2852 */ 2853 if (!vi->affinity_hint_set) { 2854 napi->weight = 0; 2855 return; 2856 } 2857 2858 virtnet_napi_do_enable(sq->vq, napi); 2859 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, napi); 2860 } 2861 2862 static void virtnet_napi_tx_disable(struct send_queue *sq) 2863 { 2864 struct virtnet_info *vi = sq->vq->vdev->priv; 2865 struct napi_struct *napi = &sq->napi; 2866 int qidx = vq2txq(sq->vq); 2867 2868 if (napi->weight) { 2869 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, NULL); 2870 napi_disable(napi); 2871 } 2872 } 2873 2874 static void virtnet_napi_disable(struct receive_queue *rq) 2875 { 2876 struct virtnet_info *vi = rq->vq->vdev->priv; 2877 struct napi_struct *napi = &rq->napi; 2878 int qidx = vq2rxq(rq->vq); 2879 2880 netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, NULL); 2881 napi_disable(napi); 2882 } 2883 2884 static void refill_work(struct work_struct *work) 2885 { 2886 struct virtnet_info *vi = 2887 container_of(work, struct virtnet_info, refill.work); 2888 bool still_empty; 2889 int i; 2890 2891 for (i = 0; i < vi->curr_queue_pairs; i++) { 2892 struct receive_queue *rq = &vi->rq[i]; 2893 2894 /* 2895 * When queue API support is added in the future and the call 2896 * below becomes napi_disable_locked, this driver will need to 2897 * be refactored. 2898 * 2899 * One possible solution would be to: 2900 * - cancel refill_work with cancel_delayed_work (note: 2901 * non-sync) 2902 * - cancel refill_work with cancel_delayed_work_sync in 2903 * virtnet_remove after the netdev is unregistered 2904 * - wrap all of the work in a lock (perhaps the netdev 2905 * instance lock) 2906 * - check netif_running() and return early to avoid a race 2907 */ 2908 napi_disable(&rq->napi); 2909 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); 2910 virtnet_napi_do_enable(rq->vq, &rq->napi); 2911 2912 /* In theory, this can happen: if we don't get any buffers in 2913 * we will *never* try to fill again. 2914 */ 2915 if (still_empty) 2916 schedule_delayed_work(&vi->refill, HZ/2); 2917 } 2918 } 2919 2920 static int virtnet_receive_xsk_bufs(struct virtnet_info *vi, 2921 struct receive_queue *rq, 2922 int budget, 2923 unsigned int *xdp_xmit, 2924 struct virtnet_rq_stats *stats) 2925 { 2926 unsigned int len; 2927 int packets = 0; 2928 void *buf; 2929 2930 while (packets < budget) { 2931 buf = virtqueue_get_buf(rq->vq, &len); 2932 if (!buf) 2933 break; 2934 2935 virtnet_receive_xsk_buf(vi, rq, buf, len, xdp_xmit, stats); 2936 packets++; 2937 } 2938 2939 return packets; 2940 } 2941 2942 static int virtnet_receive_packets(struct virtnet_info *vi, 2943 struct receive_queue *rq, 2944 int budget, 2945 unsigned int *xdp_xmit, 2946 struct virtnet_rq_stats *stats) 2947 { 2948 unsigned int len; 2949 int packets = 0; 2950 void *buf; 2951 2952 if (!vi->big_packets || vi->mergeable_rx_bufs) { 2953 void *ctx; 2954 while (packets < budget && 2955 (buf = virtnet_rq_get_buf(rq, &len, &ctx))) { 2956 receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats); 2957 packets++; 2958 } 2959 } else { 2960 while (packets < budget && 2961 (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { 2962 receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats); 2963 packets++; 2964 } 2965 } 2966 2967 return packets; 2968 } 2969 2970 static int virtnet_receive(struct receive_queue *rq, int budget, 2971 unsigned int *xdp_xmit) 2972 { 2973 struct virtnet_info *vi = rq->vq->vdev->priv; 2974 struct virtnet_rq_stats stats = {}; 2975 int i, packets; 2976 2977 if (rq->xsk_pool) 2978 packets = virtnet_receive_xsk_bufs(vi, rq, budget, xdp_xmit, &stats); 2979 else 2980 packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats); 2981 2982 if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { 2983 if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { 2984 spin_lock(&vi->refill_lock); 2985 if (vi->refill_enabled) 2986 schedule_delayed_work(&vi->refill, 0); 2987 spin_unlock(&vi->refill_lock); 2988 } 2989 } 2990 2991 u64_stats_set(&stats.packets, packets); 2992 u64_stats_update_begin(&rq->stats.syncp); 2993 for (i = 0; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) { 2994 size_t offset = virtnet_rq_stats_desc[i].offset; 2995 u64_stats_t *item, *src; 2996 2997 item = (u64_stats_t *)((u8 *)&rq->stats + offset); 2998 src = (u64_stats_t *)((u8 *)&stats + offset); 2999 u64_stats_add(item, u64_stats_read(src)); 3000 } 3001 3002 u64_stats_add(&rq->stats.packets, u64_stats_read(&stats.packets)); 3003 u64_stats_add(&rq->stats.bytes, u64_stats_read(&stats.bytes)); 3004 3005 u64_stats_update_end(&rq->stats.syncp); 3006 3007 return packets; 3008 } 3009 3010 static void virtnet_poll_cleantx(struct receive_queue *rq, int budget) 3011 { 3012 struct virtnet_info *vi = rq->vq->vdev->priv; 3013 unsigned int index = vq2rxq(rq->vq); 3014 struct send_queue *sq = &vi->sq[index]; 3015 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); 3016 3017 if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) 3018 return; 3019 3020 if (__netif_tx_trylock(txq)) { 3021 if (sq->reset) { 3022 __netif_tx_unlock(txq); 3023 return; 3024 } 3025 3026 do { 3027 virtqueue_disable_cb(sq->vq); 3028 free_old_xmit(sq, txq, !!budget); 3029 } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 3030 3031 if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { 3032 if (netif_tx_queue_stopped(txq)) { 3033 u64_stats_update_begin(&sq->stats.syncp); 3034 u64_stats_inc(&sq->stats.wake); 3035 u64_stats_update_end(&sq->stats.syncp); 3036 } 3037 netif_tx_wake_queue(txq); 3038 } 3039 3040 __netif_tx_unlock(txq); 3041 } 3042 } 3043 3044 static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue *rq) 3045 { 3046 struct dim_sample cur_sample = {}; 3047 3048 if (!rq->packets_in_napi) 3049 return; 3050 3051 /* Don't need protection when fetching stats, since fetcher and 3052 * updater of the stats are in same context 3053 */ 3054 dim_update_sample(rq->calls, 3055 u64_stats_read(&rq->stats.packets), 3056 u64_stats_read(&rq->stats.bytes), 3057 &cur_sample); 3058 3059 net_dim(&rq->dim, &cur_sample); 3060 rq->packets_in_napi = 0; 3061 } 3062 3063 static int virtnet_poll(struct napi_struct *napi, int budget) 3064 { 3065 struct receive_queue *rq = 3066 container_of(napi, struct receive_queue, napi); 3067 struct virtnet_info *vi = rq->vq->vdev->priv; 3068 struct send_queue *sq; 3069 unsigned int received; 3070 unsigned int xdp_xmit = 0; 3071 bool napi_complete; 3072 3073 virtnet_poll_cleantx(rq, budget); 3074 3075 received = virtnet_receive(rq, budget, &xdp_xmit); 3076 rq->packets_in_napi += received; 3077 3078 if (xdp_xmit & VIRTIO_XDP_REDIR) 3079 xdp_do_flush(); 3080 3081 /* Out of packets? */ 3082 if (received < budget) { 3083 napi_complete = virtqueue_napi_complete(napi, rq->vq, received); 3084 /* Intentionally not taking dim_lock here. This may result in a 3085 * spurious net_dim call. But if that happens virtnet_rx_dim_work 3086 * will not act on the scheduled work. 3087 */ 3088 if (napi_complete && rq->dim_enabled) 3089 virtnet_rx_dim_update(vi, rq); 3090 } 3091 3092 if (xdp_xmit & VIRTIO_XDP_TX) { 3093 sq = virtnet_xdp_get_sq(vi); 3094 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 3095 u64_stats_update_begin(&sq->stats.syncp); 3096 u64_stats_inc(&sq->stats.kicks); 3097 u64_stats_update_end(&sq->stats.syncp); 3098 } 3099 virtnet_xdp_put_sq(vi, sq); 3100 } 3101 3102 return received; 3103 } 3104 3105 static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index) 3106 { 3107 virtnet_napi_tx_disable(&vi->sq[qp_index]); 3108 virtnet_napi_disable(&vi->rq[qp_index]); 3109 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 3110 } 3111 3112 static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) 3113 { 3114 struct net_device *dev = vi->dev; 3115 int err; 3116 3117 err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index, 3118 vi->rq[qp_index].napi.napi_id); 3119 if (err < 0) 3120 return err; 3121 3122 err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq, 3123 MEM_TYPE_PAGE_SHARED, NULL); 3124 if (err < 0) 3125 goto err_xdp_reg_mem_model; 3126 3127 virtnet_napi_enable(&vi->rq[qp_index]); 3128 virtnet_napi_tx_enable(&vi->sq[qp_index]); 3129 3130 return 0; 3131 3132 err_xdp_reg_mem_model: 3133 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 3134 return err; 3135 } 3136 3137 static void virtnet_cancel_dim(struct virtnet_info *vi, struct dim *dim) 3138 { 3139 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 3140 return; 3141 net_dim_work_cancel(dim); 3142 } 3143 3144 static void virtnet_update_settings(struct virtnet_info *vi) 3145 { 3146 u32 speed; 3147 u8 duplex; 3148 3149 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) 3150 return; 3151 3152 virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); 3153 3154 if (ethtool_validate_speed(speed)) 3155 vi->speed = speed; 3156 3157 virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); 3158 3159 if (ethtool_validate_duplex(duplex)) 3160 vi->duplex = duplex; 3161 } 3162 3163 static int virtnet_open(struct net_device *dev) 3164 { 3165 struct virtnet_info *vi = netdev_priv(dev); 3166 int i, err; 3167 3168 enable_delayed_refill(vi); 3169 3170 for (i = 0; i < vi->max_queue_pairs; i++) { 3171 if (i < vi->curr_queue_pairs) 3172 /* Make sure we have some buffers: if oom use wq. */ 3173 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) 3174 schedule_delayed_work(&vi->refill, 0); 3175 3176 err = virtnet_enable_queue_pair(vi, i); 3177 if (err < 0) 3178 goto err_enable_qp; 3179 } 3180 3181 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 3182 if (vi->status & VIRTIO_NET_S_LINK_UP) 3183 netif_carrier_on(vi->dev); 3184 virtio_config_driver_enable(vi->vdev); 3185 } else { 3186 vi->status = VIRTIO_NET_S_LINK_UP; 3187 netif_carrier_on(dev); 3188 } 3189 3190 return 0; 3191 3192 err_enable_qp: 3193 disable_delayed_refill(vi); 3194 cancel_delayed_work_sync(&vi->refill); 3195 3196 for (i--; i >= 0; i--) { 3197 virtnet_disable_queue_pair(vi, i); 3198 virtnet_cancel_dim(vi, &vi->rq[i].dim); 3199 } 3200 3201 return err; 3202 } 3203 3204 static int virtnet_poll_tx(struct napi_struct *napi, int budget) 3205 { 3206 struct send_queue *sq = container_of(napi, struct send_queue, napi); 3207 struct virtnet_info *vi = sq->vq->vdev->priv; 3208 unsigned int index = vq2txq(sq->vq); 3209 struct netdev_queue *txq; 3210 int opaque, xsk_done = 0; 3211 bool done; 3212 3213 if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { 3214 /* We don't need to enable cb for XDP */ 3215 napi_complete_done(napi, 0); 3216 return 0; 3217 } 3218 3219 txq = netdev_get_tx_queue(vi->dev, index); 3220 __netif_tx_lock(txq, raw_smp_processor_id()); 3221 virtqueue_disable_cb(sq->vq); 3222 3223 if (sq->xsk_pool) 3224 xsk_done = virtnet_xsk_xmit(sq, sq->xsk_pool, budget); 3225 else 3226 free_old_xmit(sq, txq, !!budget); 3227 3228 if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { 3229 if (netif_tx_queue_stopped(txq)) { 3230 u64_stats_update_begin(&sq->stats.syncp); 3231 u64_stats_inc(&sq->stats.wake); 3232 u64_stats_update_end(&sq->stats.syncp); 3233 } 3234 netif_tx_wake_queue(txq); 3235 } 3236 3237 if (xsk_done >= budget) { 3238 __netif_tx_unlock(txq); 3239 return budget; 3240 } 3241 3242 opaque = virtqueue_enable_cb_prepare(sq->vq); 3243 3244 done = napi_complete_done(napi, 0); 3245 3246 if (!done) 3247 virtqueue_disable_cb(sq->vq); 3248 3249 __netif_tx_unlock(txq); 3250 3251 if (done) { 3252 if (unlikely(virtqueue_poll(sq->vq, opaque))) { 3253 if (napi_schedule_prep(napi)) { 3254 __netif_tx_lock(txq, raw_smp_processor_id()); 3255 virtqueue_disable_cb(sq->vq); 3256 __netif_tx_unlock(txq); 3257 __napi_schedule(napi); 3258 } 3259 } 3260 } 3261 3262 return 0; 3263 } 3264 3265 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) 3266 { 3267 struct virtio_net_hdr_mrg_rxbuf *hdr; 3268 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; 3269 struct virtnet_info *vi = sq->vq->vdev->priv; 3270 int num_sg; 3271 unsigned hdr_len = vi->hdr_len; 3272 bool can_push; 3273 3274 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); 3275 3276 can_push = vi->any_header_sg && 3277 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && 3278 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; 3279 /* Even if we can, don't push here yet as this would skew 3280 * csum_start offset below. */ 3281 if (can_push) 3282 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len); 3283 else 3284 hdr = &skb_vnet_common_hdr(skb)->mrg_hdr; 3285 3286 if (virtio_net_hdr_from_skb(skb, &hdr->hdr, 3287 virtio_is_little_endian(vi->vdev), false, 3288 0)) 3289 return -EPROTO; 3290 3291 if (vi->mergeable_rx_bufs) 3292 hdr->num_buffers = 0; 3293 3294 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); 3295 if (can_push) { 3296 __skb_push(skb, hdr_len); 3297 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len); 3298 if (unlikely(num_sg < 0)) 3299 return num_sg; 3300 /* Pull header back to avoid skew in tx bytes calculations. */ 3301 __skb_pull(skb, hdr_len); 3302 } else { 3303 sg_set_buf(sq->sg, hdr, hdr_len); 3304 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); 3305 if (unlikely(num_sg < 0)) 3306 return num_sg; 3307 num_sg++; 3308 } 3309 3310 return virtnet_add_outbuf(sq, num_sg, skb, 3311 orphan ? VIRTNET_XMIT_TYPE_SKB_ORPHAN : VIRTNET_XMIT_TYPE_SKB); 3312 } 3313 3314 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) 3315 { 3316 struct virtnet_info *vi = netdev_priv(dev); 3317 int qnum = skb_get_queue_mapping(skb); 3318 struct send_queue *sq = &vi->sq[qnum]; 3319 int err; 3320 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 3321 bool xmit_more = netdev_xmit_more(); 3322 bool use_napi = sq->napi.weight; 3323 bool kick; 3324 3325 if (!use_napi) 3326 free_old_xmit(sq, txq, false); 3327 else 3328 virtqueue_disable_cb(sq->vq); 3329 3330 /* timestamp packet in software */ 3331 skb_tx_timestamp(skb); 3332 3333 /* Try to transmit */ 3334 err = xmit_skb(sq, skb, !use_napi); 3335 3336 /* This should not happen! */ 3337 if (unlikely(err)) { 3338 DEV_STATS_INC(dev, tx_fifo_errors); 3339 if (net_ratelimit()) 3340 dev_warn(&dev->dev, 3341 "Unexpected TXQ (%d) queue failure: %d\n", 3342 qnum, err); 3343 DEV_STATS_INC(dev, tx_dropped); 3344 dev_kfree_skb_any(skb); 3345 return NETDEV_TX_OK; 3346 } 3347 3348 /* Don't wait up for transmitted skbs to be freed. */ 3349 if (!use_napi) { 3350 skb_orphan(skb); 3351 nf_reset_ct(skb); 3352 } 3353 3354 if (use_napi) 3355 tx_may_stop(vi, dev, sq); 3356 else 3357 check_sq_full_and_disable(vi, dev,sq); 3358 3359 kick = use_napi ? __netdev_tx_sent_queue(txq, skb->len, xmit_more) : 3360 !xmit_more || netif_xmit_stopped(txq); 3361 if (kick) { 3362 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 3363 u64_stats_update_begin(&sq->stats.syncp); 3364 u64_stats_inc(&sq->stats.kicks); 3365 u64_stats_update_end(&sq->stats.syncp); 3366 } 3367 } 3368 3369 if (use_napi && kick && unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 3370 virtqueue_napi_schedule(&sq->napi, sq->vq); 3371 3372 return NETDEV_TX_OK; 3373 } 3374 3375 static void __virtnet_rx_pause(struct virtnet_info *vi, 3376 struct receive_queue *rq) 3377 { 3378 bool running = netif_running(vi->dev); 3379 3380 if (running) { 3381 virtnet_napi_disable(rq); 3382 virtnet_cancel_dim(vi, &rq->dim); 3383 } 3384 } 3385 3386 static void virtnet_rx_pause_all(struct virtnet_info *vi) 3387 { 3388 int i; 3389 3390 /* 3391 * Make sure refill_work does not run concurrently to 3392 * avoid napi_disable race which leads to deadlock. 3393 */ 3394 disable_delayed_refill(vi); 3395 cancel_delayed_work_sync(&vi->refill); 3396 for (i = 0; i < vi->max_queue_pairs; i++) 3397 __virtnet_rx_pause(vi, &vi->rq[i]); 3398 } 3399 3400 static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) 3401 { 3402 /* 3403 * Make sure refill_work does not run concurrently to 3404 * avoid napi_disable race which leads to deadlock. 3405 */ 3406 disable_delayed_refill(vi); 3407 cancel_delayed_work_sync(&vi->refill); 3408 __virtnet_rx_pause(vi, rq); 3409 } 3410 3411 static void __virtnet_rx_resume(struct virtnet_info *vi, 3412 struct receive_queue *rq, 3413 bool refill) 3414 { 3415 bool running = netif_running(vi->dev); 3416 bool schedule_refill = false; 3417 3418 if (refill && !try_fill_recv(vi, rq, GFP_KERNEL)) 3419 schedule_refill = true; 3420 if (running) 3421 virtnet_napi_enable(rq); 3422 3423 if (schedule_refill) 3424 schedule_delayed_work(&vi->refill, 0); 3425 } 3426 3427 static void virtnet_rx_resume_all(struct virtnet_info *vi) 3428 { 3429 int i; 3430 3431 enable_delayed_refill(vi); 3432 for (i = 0; i < vi->max_queue_pairs; i++) { 3433 if (i < vi->curr_queue_pairs) 3434 __virtnet_rx_resume(vi, &vi->rq[i], true); 3435 else 3436 __virtnet_rx_resume(vi, &vi->rq[i], false); 3437 } 3438 } 3439 3440 static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) 3441 { 3442 enable_delayed_refill(vi); 3443 __virtnet_rx_resume(vi, rq, true); 3444 } 3445 3446 static int virtnet_rx_resize(struct virtnet_info *vi, 3447 struct receive_queue *rq, u32 ring_num) 3448 { 3449 int err, qindex; 3450 3451 qindex = rq - vi->rq; 3452 3453 virtnet_rx_pause(vi, rq); 3454 3455 err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_unmap_free_buf, NULL); 3456 if (err) 3457 netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err); 3458 3459 virtnet_rx_resume(vi, rq); 3460 return err; 3461 } 3462 3463 static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq) 3464 { 3465 bool running = netif_running(vi->dev); 3466 struct netdev_queue *txq; 3467 int qindex; 3468 3469 qindex = sq - vi->sq; 3470 3471 if (running) 3472 virtnet_napi_tx_disable(sq); 3473 3474 txq = netdev_get_tx_queue(vi->dev, qindex); 3475 3476 /* 1. wait all ximt complete 3477 * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue() 3478 */ 3479 __netif_tx_lock_bh(txq); 3480 3481 /* Prevent rx poll from accessing sq. */ 3482 sq->reset = true; 3483 3484 /* Prevent the upper layer from trying to send packets. */ 3485 netif_stop_subqueue(vi->dev, qindex); 3486 3487 __netif_tx_unlock_bh(txq); 3488 } 3489 3490 static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq) 3491 { 3492 bool running = netif_running(vi->dev); 3493 struct netdev_queue *txq; 3494 int qindex; 3495 3496 qindex = sq - vi->sq; 3497 3498 txq = netdev_get_tx_queue(vi->dev, qindex); 3499 3500 __netif_tx_lock_bh(txq); 3501 sq->reset = false; 3502 netif_tx_wake_queue(txq); 3503 __netif_tx_unlock_bh(txq); 3504 3505 if (running) 3506 virtnet_napi_tx_enable(sq); 3507 } 3508 3509 static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq, 3510 u32 ring_num) 3511 { 3512 int qindex, err; 3513 3514 if (ring_num <= MAX_SKB_FRAGS + 2) { 3515 netdev_err(vi->dev, "tx size (%d) cannot be smaller than %d\n", 3516 ring_num, MAX_SKB_FRAGS + 2); 3517 return -EINVAL; 3518 } 3519 3520 qindex = sq - vi->sq; 3521 3522 virtnet_tx_pause(vi, sq); 3523 3524 err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf, 3525 virtnet_sq_free_unused_buf_done); 3526 if (err) 3527 netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err); 3528 3529 virtnet_tx_resume(vi, sq); 3530 3531 return err; 3532 } 3533 3534 /* 3535 * Send command via the control virtqueue and check status. Commands 3536 * supported by the hypervisor, as indicated by feature bits, should 3537 * never fail unless improperly formatted. 3538 */ 3539 static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd, 3540 struct scatterlist *out, 3541 struct scatterlist *in) 3542 { 3543 struct scatterlist *sgs[5], hdr, stat; 3544 u32 out_num = 0, tmp, in_num = 0; 3545 bool ok; 3546 int ret; 3547 3548 /* Caller should know better */ 3549 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); 3550 3551 mutex_lock(&vi->cvq_lock); 3552 vi->ctrl->status = ~0; 3553 vi->ctrl->hdr.class = class; 3554 vi->ctrl->hdr.cmd = cmd; 3555 /* Add header */ 3556 sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr)); 3557 sgs[out_num++] = &hdr; 3558 3559 if (out) 3560 sgs[out_num++] = out; 3561 3562 /* Add return status. */ 3563 sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status)); 3564 sgs[out_num + in_num++] = &stat; 3565 3566 if (in) 3567 sgs[out_num + in_num++] = in; 3568 3569 BUG_ON(out_num + in_num > ARRAY_SIZE(sgs)); 3570 ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC); 3571 if (ret < 0) { 3572 dev_warn(&vi->vdev->dev, 3573 "Failed to add sgs for command vq: %d\n.", ret); 3574 mutex_unlock(&vi->cvq_lock); 3575 return false; 3576 } 3577 3578 if (unlikely(!virtqueue_kick(vi->cvq))) 3579 goto unlock; 3580 3581 /* Spin for a response, the kick causes an ioport write, trapping 3582 * into the hypervisor, so the request should be handled immediately. 3583 */ 3584 while (!virtqueue_get_buf(vi->cvq, &tmp) && 3585 !virtqueue_is_broken(vi->cvq)) { 3586 cond_resched(); 3587 cpu_relax(); 3588 } 3589 3590 unlock: 3591 ok = vi->ctrl->status == VIRTIO_NET_OK; 3592 mutex_unlock(&vi->cvq_lock); 3593 return ok; 3594 } 3595 3596 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, 3597 struct scatterlist *out) 3598 { 3599 return virtnet_send_command_reply(vi, class, cmd, out, NULL); 3600 } 3601 3602 static int virtnet_set_mac_address(struct net_device *dev, void *p) 3603 { 3604 struct virtnet_info *vi = netdev_priv(dev); 3605 struct virtio_device *vdev = vi->vdev; 3606 int ret; 3607 struct sockaddr *addr; 3608 struct scatterlist sg; 3609 3610 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 3611 return -EOPNOTSUPP; 3612 3613 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL); 3614 if (!addr) 3615 return -ENOMEM; 3616 3617 ret = eth_prepare_mac_addr_change(dev, addr); 3618 if (ret) 3619 goto out; 3620 3621 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 3622 sg_init_one(&sg, addr->sa_data, dev->addr_len); 3623 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3624 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 3625 dev_warn(&vdev->dev, 3626 "Failed to set mac address by vq command.\n"); 3627 ret = -EINVAL; 3628 goto out; 3629 } 3630 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 3631 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { 3632 unsigned int i; 3633 3634 /* Naturally, this has an atomicity problem. */ 3635 for (i = 0; i < dev->addr_len; i++) 3636 virtio_cwrite8(vdev, 3637 offsetof(struct virtio_net_config, mac) + 3638 i, addr->sa_data[i]); 3639 } 3640 3641 eth_commit_mac_addr_change(dev, p); 3642 ret = 0; 3643 3644 out: 3645 kfree(addr); 3646 return ret; 3647 } 3648 3649 static void virtnet_stats(struct net_device *dev, 3650 struct rtnl_link_stats64 *tot) 3651 { 3652 struct virtnet_info *vi = netdev_priv(dev); 3653 unsigned int start; 3654 int i; 3655 3656 for (i = 0; i < vi->max_queue_pairs; i++) { 3657 u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; 3658 struct receive_queue *rq = &vi->rq[i]; 3659 struct send_queue *sq = &vi->sq[i]; 3660 3661 do { 3662 start = u64_stats_fetch_begin(&sq->stats.syncp); 3663 tpackets = u64_stats_read(&sq->stats.packets); 3664 tbytes = u64_stats_read(&sq->stats.bytes); 3665 terrors = u64_stats_read(&sq->stats.tx_timeouts); 3666 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 3667 3668 do { 3669 start = u64_stats_fetch_begin(&rq->stats.syncp); 3670 rpackets = u64_stats_read(&rq->stats.packets); 3671 rbytes = u64_stats_read(&rq->stats.bytes); 3672 rdrops = u64_stats_read(&rq->stats.drops); 3673 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 3674 3675 tot->rx_packets += rpackets; 3676 tot->tx_packets += tpackets; 3677 tot->rx_bytes += rbytes; 3678 tot->tx_bytes += tbytes; 3679 tot->rx_dropped += rdrops; 3680 tot->tx_errors += terrors; 3681 } 3682 3683 tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped); 3684 tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors); 3685 tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors); 3686 tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors); 3687 } 3688 3689 static void virtnet_ack_link_announce(struct virtnet_info *vi) 3690 { 3691 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, 3692 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) 3693 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); 3694 } 3695 3696 static bool virtnet_commit_rss_command(struct virtnet_info *vi); 3697 3698 static void virtnet_rss_update_by_qpairs(struct virtnet_info *vi, u16 queue_pairs) 3699 { 3700 u32 indir_val = 0; 3701 int i = 0; 3702 3703 for (; i < vi->rss_indir_table_size; ++i) { 3704 indir_val = ethtool_rxfh_indir_default(i, queue_pairs); 3705 vi->rss_hdr->indirection_table[i] = cpu_to_le16(indir_val); 3706 } 3707 vi->rss_trailer.max_tx_vq = cpu_to_le16(queue_pairs); 3708 } 3709 3710 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 3711 { 3712 struct virtio_net_ctrl_mq *mq __free(kfree) = NULL; 3713 struct virtio_net_rss_config_hdr *old_rss_hdr; 3714 struct virtio_net_rss_config_trailer old_rss_trailer; 3715 struct net_device *dev = vi->dev; 3716 struct scatterlist sg; 3717 3718 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) 3719 return 0; 3720 3721 /* Firstly check if we need update rss. Do updating if both (1) rss enabled and 3722 * (2) no user configuration. 3723 * 3724 * During rss command processing, device updates queue_pairs using rss.max_tx_vq. That is, 3725 * the device updates queue_pairs together with rss, so we can skip the sperate queue_pairs 3726 * update (VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET below) and return directly. 3727 */ 3728 if (vi->has_rss && !netif_is_rxfh_configured(dev)) { 3729 old_rss_hdr = vi->rss_hdr; 3730 old_rss_trailer = vi->rss_trailer; 3731 vi->rss_hdr = devm_kzalloc(&dev->dev, virtnet_rss_hdr_size(vi), GFP_KERNEL); 3732 if (!vi->rss_hdr) { 3733 vi->rss_hdr = old_rss_hdr; 3734 return -ENOMEM; 3735 } 3736 3737 *vi->rss_hdr = *old_rss_hdr; 3738 virtnet_rss_update_by_qpairs(vi, queue_pairs); 3739 3740 if (!virtnet_commit_rss_command(vi)) { 3741 /* restore ctrl_rss if commit_rss_command failed */ 3742 devm_kfree(&dev->dev, vi->rss_hdr); 3743 vi->rss_hdr = old_rss_hdr; 3744 vi->rss_trailer = old_rss_trailer; 3745 3746 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d, because committing RSS failed\n", 3747 queue_pairs); 3748 return -EINVAL; 3749 } 3750 devm_kfree(&dev->dev, old_rss_hdr); 3751 goto succ; 3752 } 3753 3754 mq = kzalloc(sizeof(*mq), GFP_KERNEL); 3755 if (!mq) 3756 return -ENOMEM; 3757 3758 mq->virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); 3759 sg_init_one(&sg, mq, sizeof(*mq)); 3760 3761 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 3762 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { 3763 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", 3764 queue_pairs); 3765 return -EINVAL; 3766 } 3767 succ: 3768 vi->curr_queue_pairs = queue_pairs; 3769 /* virtnet_open() will refill when device is going to up. */ 3770 spin_lock_bh(&vi->refill_lock); 3771 if (dev->flags & IFF_UP && vi->refill_enabled) 3772 schedule_delayed_work(&vi->refill, 0); 3773 spin_unlock_bh(&vi->refill_lock); 3774 3775 return 0; 3776 } 3777 3778 static int virtnet_close(struct net_device *dev) 3779 { 3780 struct virtnet_info *vi = netdev_priv(dev); 3781 int i; 3782 3783 /* Make sure NAPI doesn't schedule refill work */ 3784 disable_delayed_refill(vi); 3785 /* Make sure refill_work doesn't re-enable napi! */ 3786 cancel_delayed_work_sync(&vi->refill); 3787 /* Prevent the config change callback from changing carrier 3788 * after close 3789 */ 3790 virtio_config_driver_disable(vi->vdev); 3791 /* Stop getting status/speed updates: we don't care until next 3792 * open 3793 */ 3794 cancel_work_sync(&vi->config_work); 3795 3796 for (i = 0; i < vi->max_queue_pairs; i++) { 3797 virtnet_disable_queue_pair(vi, i); 3798 virtnet_cancel_dim(vi, &vi->rq[i].dim); 3799 } 3800 3801 netif_carrier_off(dev); 3802 3803 return 0; 3804 } 3805 3806 static void virtnet_rx_mode_work(struct work_struct *work) 3807 { 3808 struct virtnet_info *vi = 3809 container_of(work, struct virtnet_info, rx_mode_work); 3810 u8 *promisc_allmulti __free(kfree) = NULL; 3811 struct net_device *dev = vi->dev; 3812 struct scatterlist sg[2]; 3813 struct virtio_net_ctrl_mac *mac_data; 3814 struct netdev_hw_addr *ha; 3815 int uc_count; 3816 int mc_count; 3817 void *buf; 3818 int i; 3819 3820 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ 3821 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) 3822 return; 3823 3824 promisc_allmulti = kzalloc(sizeof(*promisc_allmulti), GFP_KERNEL); 3825 if (!promisc_allmulti) { 3826 dev_warn(&dev->dev, "Failed to set RX mode, no memory.\n"); 3827 return; 3828 } 3829 3830 rtnl_lock(); 3831 3832 *promisc_allmulti = !!(dev->flags & IFF_PROMISC); 3833 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3834 3835 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3836 VIRTIO_NET_CTRL_RX_PROMISC, sg)) 3837 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", 3838 *promisc_allmulti ? "en" : "dis"); 3839 3840 *promisc_allmulti = !!(dev->flags & IFF_ALLMULTI); 3841 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3842 3843 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3844 VIRTIO_NET_CTRL_RX_ALLMULTI, sg)) 3845 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", 3846 *promisc_allmulti ? "en" : "dis"); 3847 3848 netif_addr_lock_bh(dev); 3849 3850 uc_count = netdev_uc_count(dev); 3851 mc_count = netdev_mc_count(dev); 3852 /* MAC filter - use one buffer for both lists */ 3853 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) + 3854 (2 * sizeof(mac_data->entries)), GFP_ATOMIC); 3855 mac_data = buf; 3856 if (!buf) { 3857 netif_addr_unlock_bh(dev); 3858 rtnl_unlock(); 3859 return; 3860 } 3861 3862 sg_init_table(sg, 2); 3863 3864 /* Store the unicast list and count in the front of the buffer */ 3865 mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count); 3866 i = 0; 3867 netdev_for_each_uc_addr(ha, dev) 3868 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3869 3870 sg_set_buf(&sg[0], mac_data, 3871 sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); 3872 3873 /* multicast list and count fill the end */ 3874 mac_data = (void *)&mac_data->macs[uc_count][0]; 3875 3876 mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count); 3877 i = 0; 3878 netdev_for_each_mc_addr(ha, dev) 3879 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3880 3881 netif_addr_unlock_bh(dev); 3882 3883 sg_set_buf(&sg[1], mac_data, 3884 sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); 3885 3886 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3887 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg)) 3888 dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); 3889 3890 rtnl_unlock(); 3891 3892 kfree(buf); 3893 } 3894 3895 static void virtnet_set_rx_mode(struct net_device *dev) 3896 { 3897 struct virtnet_info *vi = netdev_priv(dev); 3898 3899 if (vi->rx_mode_work_enabled) 3900 schedule_work(&vi->rx_mode_work); 3901 } 3902 3903 static int virtnet_vlan_rx_add_vid(struct net_device *dev, 3904 __be16 proto, u16 vid) 3905 { 3906 struct virtnet_info *vi = netdev_priv(dev); 3907 __virtio16 *_vid __free(kfree) = NULL; 3908 struct scatterlist sg; 3909 3910 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3911 if (!_vid) 3912 return -ENOMEM; 3913 3914 *_vid = cpu_to_virtio16(vi->vdev, vid); 3915 sg_init_one(&sg, _vid, sizeof(*_vid)); 3916 3917 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3918 VIRTIO_NET_CTRL_VLAN_ADD, &sg)) 3919 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); 3920 return 0; 3921 } 3922 3923 static int virtnet_vlan_rx_kill_vid(struct net_device *dev, 3924 __be16 proto, u16 vid) 3925 { 3926 struct virtnet_info *vi = netdev_priv(dev); 3927 __virtio16 *_vid __free(kfree) = NULL; 3928 struct scatterlist sg; 3929 3930 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3931 if (!_vid) 3932 return -ENOMEM; 3933 3934 *_vid = cpu_to_virtio16(vi->vdev, vid); 3935 sg_init_one(&sg, _vid, sizeof(*_vid)); 3936 3937 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3938 VIRTIO_NET_CTRL_VLAN_DEL, &sg)) 3939 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); 3940 return 0; 3941 } 3942 3943 static void virtnet_clean_affinity(struct virtnet_info *vi) 3944 { 3945 int i; 3946 3947 if (vi->affinity_hint_set) { 3948 for (i = 0; i < vi->max_queue_pairs; i++) { 3949 virtqueue_set_affinity(vi->rq[i].vq, NULL); 3950 virtqueue_set_affinity(vi->sq[i].vq, NULL); 3951 } 3952 3953 vi->affinity_hint_set = false; 3954 } 3955 } 3956 3957 static void virtnet_set_affinity(struct virtnet_info *vi) 3958 { 3959 cpumask_var_t mask; 3960 int stragglers; 3961 int group_size; 3962 int i, start = 0, cpu; 3963 int num_cpu; 3964 int stride; 3965 3966 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 3967 virtnet_clean_affinity(vi); 3968 return; 3969 } 3970 3971 num_cpu = num_online_cpus(); 3972 stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); 3973 stragglers = num_cpu >= vi->curr_queue_pairs ? 3974 num_cpu % vi->curr_queue_pairs : 3975 0; 3976 3977 for (i = 0; i < vi->curr_queue_pairs; i++) { 3978 group_size = stride + (i < stragglers ? 1 : 0); 3979 3980 for_each_online_cpu_wrap(cpu, start) { 3981 if (!group_size--) { 3982 start = cpu; 3983 break; 3984 } 3985 cpumask_set_cpu(cpu, mask); 3986 } 3987 3988 virtqueue_set_affinity(vi->rq[i].vq, mask); 3989 virtqueue_set_affinity(vi->sq[i].vq, mask); 3990 __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS); 3991 cpumask_clear(mask); 3992 } 3993 3994 vi->affinity_hint_set = true; 3995 free_cpumask_var(mask); 3996 } 3997 3998 static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) 3999 { 4000 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 4001 node); 4002 virtnet_set_affinity(vi); 4003 return 0; 4004 } 4005 4006 static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node) 4007 { 4008 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 4009 node_dead); 4010 virtnet_set_affinity(vi); 4011 return 0; 4012 } 4013 4014 static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node) 4015 { 4016 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 4017 node); 4018 4019 virtnet_clean_affinity(vi); 4020 return 0; 4021 } 4022 4023 static enum cpuhp_state virtionet_online; 4024 4025 static int virtnet_cpu_notif_add(struct virtnet_info *vi) 4026 { 4027 int ret; 4028 4029 ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node); 4030 if (ret) 4031 return ret; 4032 ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD, 4033 &vi->node_dead); 4034 if (!ret) 4035 return ret; 4036 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 4037 return ret; 4038 } 4039 4040 static void virtnet_cpu_notif_remove(struct virtnet_info *vi) 4041 { 4042 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 4043 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD, 4044 &vi->node_dead); 4045 } 4046 4047 static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi, 4048 u16 vqn, u32 max_usecs, u32 max_packets) 4049 { 4050 struct virtio_net_ctrl_coal_vq *coal_vq __free(kfree) = NULL; 4051 struct scatterlist sgs; 4052 4053 coal_vq = kzalloc(sizeof(*coal_vq), GFP_KERNEL); 4054 if (!coal_vq) 4055 return -ENOMEM; 4056 4057 coal_vq->vqn = cpu_to_le16(vqn); 4058 coal_vq->coal.max_usecs = cpu_to_le32(max_usecs); 4059 coal_vq->coal.max_packets = cpu_to_le32(max_packets); 4060 sg_init_one(&sgs, coal_vq, sizeof(*coal_vq)); 4061 4062 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 4063 VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET, 4064 &sgs)) 4065 return -EINVAL; 4066 4067 return 0; 4068 } 4069 4070 static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 4071 u16 queue, u32 max_usecs, 4072 u32 max_packets) 4073 { 4074 int err; 4075 4076 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 4077 return -EOPNOTSUPP; 4078 4079 err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), 4080 max_usecs, max_packets); 4081 if (err) 4082 return err; 4083 4084 vi->rq[queue].intr_coal.max_usecs = max_usecs; 4085 vi->rq[queue].intr_coal.max_packets = max_packets; 4086 4087 return 0; 4088 } 4089 4090 static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 4091 u16 queue, u32 max_usecs, 4092 u32 max_packets) 4093 { 4094 int err; 4095 4096 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 4097 return -EOPNOTSUPP; 4098 4099 err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), 4100 max_usecs, max_packets); 4101 if (err) 4102 return err; 4103 4104 vi->sq[queue].intr_coal.max_usecs = max_usecs; 4105 vi->sq[queue].intr_coal.max_packets = max_packets; 4106 4107 return 0; 4108 } 4109 4110 static void virtnet_get_ringparam(struct net_device *dev, 4111 struct ethtool_ringparam *ring, 4112 struct kernel_ethtool_ringparam *kernel_ring, 4113 struct netlink_ext_ack *extack) 4114 { 4115 struct virtnet_info *vi = netdev_priv(dev); 4116 4117 ring->rx_max_pending = vi->rq[0].vq->num_max; 4118 ring->tx_max_pending = vi->sq[0].vq->num_max; 4119 ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 4120 ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 4121 } 4122 4123 static int virtnet_set_ringparam(struct net_device *dev, 4124 struct ethtool_ringparam *ring, 4125 struct kernel_ethtool_ringparam *kernel_ring, 4126 struct netlink_ext_ack *extack) 4127 { 4128 struct virtnet_info *vi = netdev_priv(dev); 4129 u32 rx_pending, tx_pending; 4130 struct receive_queue *rq; 4131 struct send_queue *sq; 4132 int i, err; 4133 4134 if (ring->rx_mini_pending || ring->rx_jumbo_pending) 4135 return -EINVAL; 4136 4137 rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 4138 tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 4139 4140 if (ring->rx_pending == rx_pending && 4141 ring->tx_pending == tx_pending) 4142 return 0; 4143 4144 if (ring->rx_pending > vi->rq[0].vq->num_max) 4145 return -EINVAL; 4146 4147 if (ring->tx_pending > vi->sq[0].vq->num_max) 4148 return -EINVAL; 4149 4150 for (i = 0; i < vi->max_queue_pairs; i++) { 4151 rq = vi->rq + i; 4152 sq = vi->sq + i; 4153 4154 if (ring->tx_pending != tx_pending) { 4155 err = virtnet_tx_resize(vi, sq, ring->tx_pending); 4156 if (err) 4157 return err; 4158 4159 /* Upon disabling and re-enabling a transmit virtqueue, the device must 4160 * set the coalescing parameters of the virtqueue to those configured 4161 * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver 4162 * did not set any TX coalescing parameters, to 0. 4163 */ 4164 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i, 4165 vi->intr_coal_tx.max_usecs, 4166 vi->intr_coal_tx.max_packets); 4167 4168 /* Don't break the tx resize action if the vq coalescing is not 4169 * supported. The same is true for rx resize below. 4170 */ 4171 if (err && err != -EOPNOTSUPP) 4172 return err; 4173 } 4174 4175 if (ring->rx_pending != rx_pending) { 4176 err = virtnet_rx_resize(vi, rq, ring->rx_pending); 4177 if (err) 4178 return err; 4179 4180 /* The reason is same as the transmit virtqueue reset */ 4181 mutex_lock(&vi->rq[i].dim_lock); 4182 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, i, 4183 vi->intr_coal_rx.max_usecs, 4184 vi->intr_coal_rx.max_packets); 4185 mutex_unlock(&vi->rq[i].dim_lock); 4186 if (err && err != -EOPNOTSUPP) 4187 return err; 4188 } 4189 } 4190 4191 return 0; 4192 } 4193 4194 static bool virtnet_commit_rss_command(struct virtnet_info *vi) 4195 { 4196 struct net_device *dev = vi->dev; 4197 struct scatterlist sgs[2]; 4198 4199 /* prepare sgs */ 4200 sg_init_table(sgs, 2); 4201 sg_set_buf(&sgs[0], vi->rss_hdr, virtnet_rss_hdr_size(vi)); 4202 sg_set_buf(&sgs[1], &vi->rss_trailer, virtnet_rss_trailer_size(vi)); 4203 4204 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 4205 vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG 4206 : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) 4207 goto err; 4208 4209 return true; 4210 4211 err: 4212 dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n"); 4213 return false; 4214 4215 } 4216 4217 static void virtnet_init_default_rss(struct virtnet_info *vi) 4218 { 4219 vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_supported); 4220 vi->rss_hash_types_saved = vi->rss_hash_types_supported; 4221 vi->rss_hdr->indirection_table_mask = vi->rss_indir_table_size 4222 ? cpu_to_le16(vi->rss_indir_table_size - 1) : 0; 4223 vi->rss_hdr->unclassified_queue = 0; 4224 4225 virtnet_rss_update_by_qpairs(vi, vi->curr_queue_pairs); 4226 4227 vi->rss_trailer.hash_key_length = vi->rss_key_size; 4228 4229 netdev_rss_key_fill(vi->rss_hash_key_data, vi->rss_key_size); 4230 } 4231 4232 static int virtnet_get_hashflow(struct net_device *dev, 4233 struct ethtool_rxfh_fields *info) 4234 { 4235 struct virtnet_info *vi = netdev_priv(dev); 4236 4237 info->data = 0; 4238 switch (info->flow_type) { 4239 case TCP_V4_FLOW: 4240 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 4241 info->data = RXH_IP_SRC | RXH_IP_DST | 4242 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4243 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 4244 info->data = RXH_IP_SRC | RXH_IP_DST; 4245 } 4246 break; 4247 case TCP_V6_FLOW: 4248 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 4249 info->data = RXH_IP_SRC | RXH_IP_DST | 4250 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4251 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 4252 info->data = RXH_IP_SRC | RXH_IP_DST; 4253 } 4254 break; 4255 case UDP_V4_FLOW: 4256 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 4257 info->data = RXH_IP_SRC | RXH_IP_DST | 4258 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4259 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 4260 info->data = RXH_IP_SRC | RXH_IP_DST; 4261 } 4262 break; 4263 case UDP_V6_FLOW: 4264 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 4265 info->data = RXH_IP_SRC | RXH_IP_DST | 4266 RXH_L4_B_0_1 | RXH_L4_B_2_3; 4267 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 4268 info->data = RXH_IP_SRC | RXH_IP_DST; 4269 } 4270 break; 4271 case IPV4_FLOW: 4272 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) 4273 info->data = RXH_IP_SRC | RXH_IP_DST; 4274 4275 break; 4276 case IPV6_FLOW: 4277 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) 4278 info->data = RXH_IP_SRC | RXH_IP_DST; 4279 4280 break; 4281 default: 4282 info->data = 0; 4283 break; 4284 } 4285 4286 return 0; 4287 } 4288 4289 static int virtnet_set_hashflow(struct net_device *dev, 4290 const struct ethtool_rxfh_fields *info, 4291 struct netlink_ext_ack *extack) 4292 { 4293 struct virtnet_info *vi = netdev_priv(dev); 4294 u32 new_hashtypes = vi->rss_hash_types_saved; 4295 bool is_disable = info->data & RXH_DISCARD; 4296 bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3); 4297 4298 /* supports only 'sd', 'sdfn' and 'r' */ 4299 if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable)) 4300 return -EINVAL; 4301 4302 switch (info->flow_type) { 4303 case TCP_V4_FLOW: 4304 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4); 4305 if (!is_disable) 4306 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 4307 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0); 4308 break; 4309 case UDP_V4_FLOW: 4310 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4); 4311 if (!is_disable) 4312 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 4313 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0); 4314 break; 4315 case IPV4_FLOW: 4316 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4; 4317 if (!is_disable) 4318 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4; 4319 break; 4320 case TCP_V6_FLOW: 4321 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6); 4322 if (!is_disable) 4323 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 4324 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0); 4325 break; 4326 case UDP_V6_FLOW: 4327 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6); 4328 if (!is_disable) 4329 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 4330 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0); 4331 break; 4332 case IPV6_FLOW: 4333 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6; 4334 if (!is_disable) 4335 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6; 4336 break; 4337 default: 4338 /* unsupported flow */ 4339 return -EINVAL; 4340 } 4341 4342 /* if unsupported hashtype was set */ 4343 if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported)) 4344 return -EINVAL; 4345 4346 if (new_hashtypes != vi->rss_hash_types_saved) { 4347 vi->rss_hash_types_saved = new_hashtypes; 4348 vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_saved); 4349 if (vi->dev->features & NETIF_F_RXHASH) 4350 if (!virtnet_commit_rss_command(vi)) 4351 return -EINVAL; 4352 } 4353 4354 return 0; 4355 } 4356 4357 static void virtnet_get_drvinfo(struct net_device *dev, 4358 struct ethtool_drvinfo *info) 4359 { 4360 struct virtnet_info *vi = netdev_priv(dev); 4361 struct virtio_device *vdev = vi->vdev; 4362 4363 strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); 4364 strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version)); 4365 strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info)); 4366 4367 } 4368 4369 /* TODO: Eliminate OOO packets during switching */ 4370 static int virtnet_set_channels(struct net_device *dev, 4371 struct ethtool_channels *channels) 4372 { 4373 struct virtnet_info *vi = netdev_priv(dev); 4374 u16 queue_pairs = channels->combined_count; 4375 int err; 4376 4377 /* We don't support separate rx/tx channels. 4378 * We don't allow setting 'other' channels. 4379 */ 4380 if (channels->rx_count || channels->tx_count || channels->other_count) 4381 return -EINVAL; 4382 4383 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) 4384 return -EINVAL; 4385 4386 /* For now we don't support modifying channels while XDP is loaded 4387 * also when XDP is loaded all RX queues have XDP programs so we only 4388 * need to check a single RX queue. 4389 */ 4390 if (vi->rq[0].xdp_prog) 4391 return -EINVAL; 4392 4393 cpus_read_lock(); 4394 err = virtnet_set_queues(vi, queue_pairs); 4395 if (err) { 4396 cpus_read_unlock(); 4397 goto err; 4398 } 4399 virtnet_set_affinity(vi); 4400 cpus_read_unlock(); 4401 4402 netif_set_real_num_tx_queues(dev, queue_pairs); 4403 netif_set_real_num_rx_queues(dev, queue_pairs); 4404 err: 4405 return err; 4406 } 4407 4408 static void virtnet_stats_sprintf(u8 **p, const char *fmt, const char *noq_fmt, 4409 int num, int qid, const struct virtnet_stat_desc *desc) 4410 { 4411 int i; 4412 4413 if (qid < 0) { 4414 for (i = 0; i < num; ++i) 4415 ethtool_sprintf(p, noq_fmt, desc[i].desc); 4416 } else { 4417 for (i = 0; i < num; ++i) 4418 ethtool_sprintf(p, fmt, qid, desc[i].desc); 4419 } 4420 } 4421 4422 /* qid == -1: for rx/tx queue total field */ 4423 static void virtnet_get_stats_string(struct virtnet_info *vi, int type, int qid, u8 **data) 4424 { 4425 const struct virtnet_stat_desc *desc; 4426 const char *fmt, *noq_fmt; 4427 u8 *p = *data; 4428 u32 num; 4429 4430 if (type == VIRTNET_Q_TYPE_CQ && qid >= 0) { 4431 noq_fmt = "cq_hw_%s"; 4432 4433 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4434 desc = &virtnet_stats_cvq_desc[0]; 4435 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4436 4437 virtnet_stats_sprintf(&p, NULL, noq_fmt, num, -1, desc); 4438 } 4439 } 4440 4441 if (type == VIRTNET_Q_TYPE_RX) { 4442 fmt = "rx%u_%s"; 4443 noq_fmt = "rx_%s"; 4444 4445 desc = &virtnet_rq_stats_desc[0]; 4446 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4447 4448 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4449 4450 fmt = "rx%u_hw_%s"; 4451 noq_fmt = "rx_hw_%s"; 4452 4453 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4454 desc = &virtnet_stats_rx_basic_desc[0]; 4455 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4456 4457 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4458 } 4459 4460 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4461 desc = &virtnet_stats_rx_csum_desc[0]; 4462 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4463 4464 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4465 } 4466 4467 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4468 desc = &virtnet_stats_rx_speed_desc[0]; 4469 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4470 4471 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4472 } 4473 } 4474 4475 if (type == VIRTNET_Q_TYPE_TX) { 4476 fmt = "tx%u_%s"; 4477 noq_fmt = "tx_%s"; 4478 4479 desc = &virtnet_sq_stats_desc[0]; 4480 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4481 4482 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4483 4484 fmt = "tx%u_hw_%s"; 4485 noq_fmt = "tx_hw_%s"; 4486 4487 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4488 desc = &virtnet_stats_tx_basic_desc[0]; 4489 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4490 4491 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4492 } 4493 4494 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4495 desc = &virtnet_stats_tx_gso_desc[0]; 4496 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4497 4498 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4499 } 4500 4501 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4502 desc = &virtnet_stats_tx_speed_desc[0]; 4503 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4504 4505 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4506 } 4507 } 4508 4509 *data = p; 4510 } 4511 4512 struct virtnet_stats_ctx { 4513 /* The stats are write to qstats or ethtool -S */ 4514 bool to_qstat; 4515 4516 /* Used to calculate the offset inside the output buffer. */ 4517 u32 desc_num[3]; 4518 4519 /* The actual supported stat types. */ 4520 u64 bitmap[3]; 4521 4522 /* Used to calculate the reply buffer size. */ 4523 u32 size[3]; 4524 4525 /* Record the output buffer. */ 4526 u64 *data; 4527 }; 4528 4529 static void virtnet_stats_ctx_init(struct virtnet_info *vi, 4530 struct virtnet_stats_ctx *ctx, 4531 u64 *data, bool to_qstat) 4532 { 4533 u32 queue_type; 4534 4535 ctx->data = data; 4536 ctx->to_qstat = to_qstat; 4537 4538 if (to_qstat) { 4539 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4540 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4541 4542 queue_type = VIRTNET_Q_TYPE_RX; 4543 4544 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4545 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4546 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4547 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4548 } 4549 4550 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4551 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4552 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4553 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4554 } 4555 4556 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4557 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_GSO; 4558 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4559 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_gso); 4560 } 4561 4562 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4563 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4564 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4565 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4566 } 4567 4568 queue_type = VIRTNET_Q_TYPE_TX; 4569 4570 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4571 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4572 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4573 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4574 } 4575 4576 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4577 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_CSUM; 4578 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4579 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_csum); 4580 } 4581 4582 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4583 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4584 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4585 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4586 } 4587 4588 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4589 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4590 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4591 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4592 } 4593 4594 return; 4595 } 4596 4597 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc); 4598 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc); 4599 4600 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4601 queue_type = VIRTNET_Q_TYPE_CQ; 4602 4603 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_CVQ; 4604 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_cvq_desc); 4605 ctx->size[queue_type] += sizeof(struct virtio_net_stats_cvq); 4606 } 4607 4608 queue_type = VIRTNET_Q_TYPE_RX; 4609 4610 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4611 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4612 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4613 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4614 } 4615 4616 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4617 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4618 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4619 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4620 } 4621 4622 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4623 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4624 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4625 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4626 } 4627 4628 queue_type = VIRTNET_Q_TYPE_TX; 4629 4630 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4631 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4632 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4633 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4634 } 4635 4636 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4637 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4638 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4639 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4640 } 4641 4642 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4643 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4644 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4645 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4646 } 4647 } 4648 4649 /* stats_sum_queue - Calculate the sum of the same fields in sq or rq. 4650 * @sum: the position to store the sum values 4651 * @num: field num 4652 * @q_value: the first queue fields 4653 * @q_num: number of the queues 4654 */ 4655 static void stats_sum_queue(u64 *sum, u32 num, u64 *q_value, u32 q_num) 4656 { 4657 u32 step = num; 4658 int i, j; 4659 u64 *p; 4660 4661 for (i = 0; i < num; ++i) { 4662 p = sum + i; 4663 *p = 0; 4664 4665 for (j = 0; j < q_num; ++j) 4666 *p += *(q_value + i + j * step); 4667 } 4668 } 4669 4670 static void virtnet_fill_total_fields(struct virtnet_info *vi, 4671 struct virtnet_stats_ctx *ctx) 4672 { 4673 u64 *data, *first_rx_q, *first_tx_q; 4674 u32 num_cq, num_rx, num_tx; 4675 4676 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4677 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4678 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4679 4680 first_rx_q = ctx->data + num_rx + num_tx + num_cq; 4681 first_tx_q = first_rx_q + vi->curr_queue_pairs * num_rx; 4682 4683 data = ctx->data; 4684 4685 stats_sum_queue(data, num_rx, first_rx_q, vi->curr_queue_pairs); 4686 4687 data = ctx->data + num_rx; 4688 4689 stats_sum_queue(data, num_tx, first_tx_q, vi->curr_queue_pairs); 4690 } 4691 4692 static void virtnet_fill_stats_qstat(struct virtnet_info *vi, u32 qid, 4693 struct virtnet_stats_ctx *ctx, 4694 const u8 *base, bool drv_stats, u8 reply_type) 4695 { 4696 const struct virtnet_stat_desc *desc; 4697 const u64_stats_t *v_stat; 4698 u64 offset, bitmap; 4699 const __le64 *v; 4700 u32 queue_type; 4701 int i, num; 4702 4703 queue_type = vq_type(vi, qid); 4704 bitmap = ctx->bitmap[queue_type]; 4705 4706 if (drv_stats) { 4707 if (queue_type == VIRTNET_Q_TYPE_RX) { 4708 desc = &virtnet_rq_stats_desc_qstat[0]; 4709 num = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4710 } else { 4711 desc = &virtnet_sq_stats_desc_qstat[0]; 4712 num = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4713 } 4714 4715 for (i = 0; i < num; ++i) { 4716 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4717 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4718 ctx->data[offset] = u64_stats_read(v_stat); 4719 } 4720 return; 4721 } 4722 4723 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4724 desc = &virtnet_stats_rx_basic_desc_qstat[0]; 4725 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4726 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4727 goto found; 4728 } 4729 4730 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4731 desc = &virtnet_stats_rx_csum_desc_qstat[0]; 4732 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4733 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4734 goto found; 4735 } 4736 4737 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4738 desc = &virtnet_stats_rx_gso_desc_qstat[0]; 4739 num = ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4740 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO) 4741 goto found; 4742 } 4743 4744 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4745 desc = &virtnet_stats_rx_speed_desc_qstat[0]; 4746 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4747 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4748 goto found; 4749 } 4750 4751 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4752 desc = &virtnet_stats_tx_basic_desc_qstat[0]; 4753 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4754 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4755 goto found; 4756 } 4757 4758 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4759 desc = &virtnet_stats_tx_csum_desc_qstat[0]; 4760 num = ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4761 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM) 4762 goto found; 4763 } 4764 4765 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4766 desc = &virtnet_stats_tx_gso_desc_qstat[0]; 4767 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4768 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4769 goto found; 4770 } 4771 4772 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4773 desc = &virtnet_stats_tx_speed_desc_qstat[0]; 4774 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4775 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4776 goto found; 4777 } 4778 4779 return; 4780 4781 found: 4782 for (i = 0; i < num; ++i) { 4783 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4784 v = (const __le64 *)(base + desc[i].offset); 4785 ctx->data[offset] = le64_to_cpu(*v); 4786 } 4787 } 4788 4789 /* virtnet_fill_stats - copy the stats to qstats or ethtool -S 4790 * The stats source is the device or the driver. 4791 * 4792 * @vi: virtio net info 4793 * @qid: the vq id 4794 * @ctx: stats ctx (initiated by virtnet_stats_ctx_init()) 4795 * @base: pointer to the device reply or the driver stats structure. 4796 * @drv_stats: designate the base type (device reply, driver stats) 4797 * @type: the type of the device reply (if drv_stats is true, this must be zero) 4798 */ 4799 static void virtnet_fill_stats(struct virtnet_info *vi, u32 qid, 4800 struct virtnet_stats_ctx *ctx, 4801 const u8 *base, bool drv_stats, u8 reply_type) 4802 { 4803 u32 queue_type, num_rx, num_tx, num_cq; 4804 const struct virtnet_stat_desc *desc; 4805 const u64_stats_t *v_stat; 4806 u64 offset, bitmap; 4807 const __le64 *v; 4808 int i, num; 4809 4810 if (ctx->to_qstat) 4811 return virtnet_fill_stats_qstat(vi, qid, ctx, base, drv_stats, reply_type); 4812 4813 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4814 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4815 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4816 4817 queue_type = vq_type(vi, qid); 4818 bitmap = ctx->bitmap[queue_type]; 4819 4820 /* skip the total fields of pairs */ 4821 offset = num_rx + num_tx; 4822 4823 if (queue_type == VIRTNET_Q_TYPE_TX) { 4824 offset += num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2); 4825 4826 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4827 if (drv_stats) { 4828 desc = &virtnet_sq_stats_desc[0]; 4829 goto drv_stats; 4830 } 4831 4832 offset += num; 4833 4834 } else if (queue_type == VIRTNET_Q_TYPE_RX) { 4835 offset += num_cq + num_rx * (qid / 2); 4836 4837 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4838 if (drv_stats) { 4839 desc = &virtnet_rq_stats_desc[0]; 4840 goto drv_stats; 4841 } 4842 4843 offset += num; 4844 } 4845 4846 if (bitmap & VIRTIO_NET_STATS_TYPE_CVQ) { 4847 desc = &virtnet_stats_cvq_desc[0]; 4848 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4849 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_CVQ) 4850 goto found; 4851 4852 offset += num; 4853 } 4854 4855 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4856 desc = &virtnet_stats_rx_basic_desc[0]; 4857 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4858 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4859 goto found; 4860 4861 offset += num; 4862 } 4863 4864 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4865 desc = &virtnet_stats_rx_csum_desc[0]; 4866 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4867 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4868 goto found; 4869 4870 offset += num; 4871 } 4872 4873 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4874 desc = &virtnet_stats_rx_speed_desc[0]; 4875 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4876 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4877 goto found; 4878 4879 offset += num; 4880 } 4881 4882 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4883 desc = &virtnet_stats_tx_basic_desc[0]; 4884 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4885 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4886 goto found; 4887 4888 offset += num; 4889 } 4890 4891 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4892 desc = &virtnet_stats_tx_gso_desc[0]; 4893 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4894 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4895 goto found; 4896 4897 offset += num; 4898 } 4899 4900 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4901 desc = &virtnet_stats_tx_speed_desc[0]; 4902 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4903 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4904 goto found; 4905 4906 offset += num; 4907 } 4908 4909 return; 4910 4911 found: 4912 for (i = 0; i < num; ++i) { 4913 v = (const __le64 *)(base + desc[i].offset); 4914 ctx->data[offset + i] = le64_to_cpu(*v); 4915 } 4916 4917 return; 4918 4919 drv_stats: 4920 for (i = 0; i < num; ++i) { 4921 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4922 ctx->data[offset + i] = u64_stats_read(v_stat); 4923 } 4924 } 4925 4926 static int __virtnet_get_hw_stats(struct virtnet_info *vi, 4927 struct virtnet_stats_ctx *ctx, 4928 struct virtio_net_ctrl_queue_stats *req, 4929 int req_size, void *reply, int res_size) 4930 { 4931 struct virtio_net_stats_reply_hdr *hdr; 4932 struct scatterlist sgs_in, sgs_out; 4933 void *p; 4934 u32 qid; 4935 int ok; 4936 4937 sg_init_one(&sgs_out, req, req_size); 4938 sg_init_one(&sgs_in, reply, res_size); 4939 4940 ok = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 4941 VIRTIO_NET_CTRL_STATS_GET, 4942 &sgs_out, &sgs_in); 4943 4944 if (!ok) 4945 return ok; 4946 4947 for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) { 4948 hdr = p; 4949 qid = le16_to_cpu(hdr->vq_index); 4950 virtnet_fill_stats(vi, qid, ctx, p, false, hdr->type); 4951 } 4952 4953 return 0; 4954 } 4955 4956 static void virtnet_make_stat_req(struct virtnet_info *vi, 4957 struct virtnet_stats_ctx *ctx, 4958 struct virtio_net_ctrl_queue_stats *req, 4959 int qid, int *idx) 4960 { 4961 int qtype = vq_type(vi, qid); 4962 u64 bitmap = ctx->bitmap[qtype]; 4963 4964 if (!bitmap) 4965 return; 4966 4967 req->stats[*idx].vq_index = cpu_to_le16(qid); 4968 req->stats[*idx].types_bitmap[0] = cpu_to_le64(bitmap); 4969 *idx += 1; 4970 } 4971 4972 /* qid: -1: get stats of all vq. 4973 * > 0: get the stats for the special vq. This must not be cvq. 4974 */ 4975 static int virtnet_get_hw_stats(struct virtnet_info *vi, 4976 struct virtnet_stats_ctx *ctx, int qid) 4977 { 4978 int qnum, i, j, res_size, qtype, last_vq, first_vq; 4979 struct virtio_net_ctrl_queue_stats *req; 4980 bool enable_cvq; 4981 void *reply; 4982 int ok; 4983 4984 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) 4985 return 0; 4986 4987 if (qid == -1) { 4988 last_vq = vi->curr_queue_pairs * 2 - 1; 4989 first_vq = 0; 4990 enable_cvq = true; 4991 } else { 4992 last_vq = qid; 4993 first_vq = qid; 4994 enable_cvq = false; 4995 } 4996 4997 qnum = 0; 4998 res_size = 0; 4999 for (i = first_vq; i <= last_vq ; ++i) { 5000 qtype = vq_type(vi, i); 5001 if (ctx->bitmap[qtype]) { 5002 ++qnum; 5003 res_size += ctx->size[qtype]; 5004 } 5005 } 5006 5007 if (enable_cvq && ctx->bitmap[VIRTNET_Q_TYPE_CQ]) { 5008 res_size += ctx->size[VIRTNET_Q_TYPE_CQ]; 5009 qnum += 1; 5010 } 5011 5012 req = kcalloc(qnum, sizeof(*req), GFP_KERNEL); 5013 if (!req) 5014 return -ENOMEM; 5015 5016 reply = kmalloc(res_size, GFP_KERNEL); 5017 if (!reply) { 5018 kfree(req); 5019 return -ENOMEM; 5020 } 5021 5022 j = 0; 5023 for (i = first_vq; i <= last_vq ; ++i) 5024 virtnet_make_stat_req(vi, ctx, req, i, &j); 5025 5026 if (enable_cvq) 5027 virtnet_make_stat_req(vi, ctx, req, vi->max_queue_pairs * 2, &j); 5028 5029 ok = __virtnet_get_hw_stats(vi, ctx, req, sizeof(*req) * j, reply, res_size); 5030 5031 kfree(req); 5032 kfree(reply); 5033 5034 return ok; 5035 } 5036 5037 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) 5038 { 5039 struct virtnet_info *vi = netdev_priv(dev); 5040 unsigned int i; 5041 u8 *p = data; 5042 5043 switch (stringset) { 5044 case ETH_SS_STATS: 5045 /* Generate the total field names. */ 5046 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, -1, &p); 5047 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, -1, &p); 5048 5049 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_CQ, 0, &p); 5050 5051 for (i = 0; i < vi->curr_queue_pairs; ++i) 5052 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, i, &p); 5053 5054 for (i = 0; i < vi->curr_queue_pairs; ++i) 5055 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, i, &p); 5056 break; 5057 } 5058 } 5059 5060 static int virtnet_get_sset_count(struct net_device *dev, int sset) 5061 { 5062 struct virtnet_info *vi = netdev_priv(dev); 5063 struct virtnet_stats_ctx ctx = {0}; 5064 u32 pair_count; 5065 5066 switch (sset) { 5067 case ETH_SS_STATS: 5068 virtnet_stats_ctx_init(vi, &ctx, NULL, false); 5069 5070 pair_count = ctx.desc_num[VIRTNET_Q_TYPE_RX] + ctx.desc_num[VIRTNET_Q_TYPE_TX]; 5071 5072 return pair_count + ctx.desc_num[VIRTNET_Q_TYPE_CQ] + 5073 vi->curr_queue_pairs * pair_count; 5074 default: 5075 return -EOPNOTSUPP; 5076 } 5077 } 5078 5079 static void virtnet_get_ethtool_stats(struct net_device *dev, 5080 struct ethtool_stats *stats, u64 *data) 5081 { 5082 struct virtnet_info *vi = netdev_priv(dev); 5083 struct virtnet_stats_ctx ctx = {0}; 5084 unsigned int start, i; 5085 const u8 *stats_base; 5086 5087 virtnet_stats_ctx_init(vi, &ctx, data, false); 5088 if (virtnet_get_hw_stats(vi, &ctx, -1)) 5089 dev_warn(&vi->dev->dev, "Failed to get hw stats.\n"); 5090 5091 for (i = 0; i < vi->curr_queue_pairs; i++) { 5092 struct receive_queue *rq = &vi->rq[i]; 5093 struct send_queue *sq = &vi->sq[i]; 5094 5095 stats_base = (const u8 *)&rq->stats; 5096 do { 5097 start = u64_stats_fetch_begin(&rq->stats.syncp); 5098 virtnet_fill_stats(vi, i * 2, &ctx, stats_base, true, 0); 5099 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 5100 5101 stats_base = (const u8 *)&sq->stats; 5102 do { 5103 start = u64_stats_fetch_begin(&sq->stats.syncp); 5104 virtnet_fill_stats(vi, i * 2 + 1, &ctx, stats_base, true, 0); 5105 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 5106 } 5107 5108 virtnet_fill_total_fields(vi, &ctx); 5109 } 5110 5111 static void virtnet_get_channels(struct net_device *dev, 5112 struct ethtool_channels *channels) 5113 { 5114 struct virtnet_info *vi = netdev_priv(dev); 5115 5116 channels->combined_count = vi->curr_queue_pairs; 5117 channels->max_combined = vi->max_queue_pairs; 5118 channels->max_other = 0; 5119 channels->rx_count = 0; 5120 channels->tx_count = 0; 5121 channels->other_count = 0; 5122 } 5123 5124 static int virtnet_set_link_ksettings(struct net_device *dev, 5125 const struct ethtool_link_ksettings *cmd) 5126 { 5127 struct virtnet_info *vi = netdev_priv(dev); 5128 5129 return ethtool_virtdev_set_link_ksettings(dev, cmd, 5130 &vi->speed, &vi->duplex); 5131 } 5132 5133 static int virtnet_get_link_ksettings(struct net_device *dev, 5134 struct ethtool_link_ksettings *cmd) 5135 { 5136 struct virtnet_info *vi = netdev_priv(dev); 5137 5138 cmd->base.speed = vi->speed; 5139 cmd->base.duplex = vi->duplex; 5140 cmd->base.port = PORT_OTHER; 5141 5142 return 0; 5143 } 5144 5145 static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info *vi, 5146 struct ethtool_coalesce *ec) 5147 { 5148 struct virtio_net_ctrl_coal_tx *coal_tx __free(kfree) = NULL; 5149 struct scatterlist sgs_tx; 5150 int i; 5151 5152 coal_tx = kzalloc(sizeof(*coal_tx), GFP_KERNEL); 5153 if (!coal_tx) 5154 return -ENOMEM; 5155 5156 coal_tx->tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); 5157 coal_tx->tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); 5158 sg_init_one(&sgs_tx, coal_tx, sizeof(*coal_tx)); 5159 5160 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 5161 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, 5162 &sgs_tx)) 5163 return -EINVAL; 5164 5165 vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs; 5166 vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames; 5167 for (i = 0; i < vi->max_queue_pairs; i++) { 5168 vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs; 5169 vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames; 5170 } 5171 5172 return 0; 5173 } 5174 5175 static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi, 5176 struct ethtool_coalesce *ec) 5177 { 5178 struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL; 5179 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 5180 struct scatterlist sgs_rx; 5181 int i; 5182 5183 if (rx_ctrl_dim_on && !virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5184 return -EOPNOTSUPP; 5185 5186 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != vi->intr_coal_rx.max_usecs || 5187 ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets)) 5188 return -EINVAL; 5189 5190 if (rx_ctrl_dim_on && !vi->rx_dim_enabled) { 5191 vi->rx_dim_enabled = true; 5192 for (i = 0; i < vi->max_queue_pairs; i++) { 5193 mutex_lock(&vi->rq[i].dim_lock); 5194 vi->rq[i].dim_enabled = true; 5195 mutex_unlock(&vi->rq[i].dim_lock); 5196 } 5197 return 0; 5198 } 5199 5200 coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL); 5201 if (!coal_rx) 5202 return -ENOMEM; 5203 5204 if (!rx_ctrl_dim_on && vi->rx_dim_enabled) { 5205 vi->rx_dim_enabled = false; 5206 for (i = 0; i < vi->max_queue_pairs; i++) { 5207 mutex_lock(&vi->rq[i].dim_lock); 5208 vi->rq[i].dim_enabled = false; 5209 mutex_unlock(&vi->rq[i].dim_lock); 5210 } 5211 } 5212 5213 /* Since the per-queue coalescing params can be set, 5214 * we need apply the global new params even if they 5215 * are not updated. 5216 */ 5217 coal_rx->rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); 5218 coal_rx->rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); 5219 sg_init_one(&sgs_rx, coal_rx, sizeof(*coal_rx)); 5220 5221 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 5222 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, 5223 &sgs_rx)) 5224 return -EINVAL; 5225 5226 vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs; 5227 vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames; 5228 for (i = 0; i < vi->max_queue_pairs; i++) { 5229 mutex_lock(&vi->rq[i].dim_lock); 5230 vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs; 5231 vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames; 5232 mutex_unlock(&vi->rq[i].dim_lock); 5233 } 5234 5235 return 0; 5236 } 5237 5238 static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, 5239 struct ethtool_coalesce *ec) 5240 { 5241 int err; 5242 5243 err = virtnet_send_tx_notf_coal_cmds(vi, ec); 5244 if (err) 5245 return err; 5246 5247 err = virtnet_send_rx_notf_coal_cmds(vi, ec); 5248 if (err) 5249 return err; 5250 5251 return 0; 5252 } 5253 5254 static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi, 5255 struct ethtool_coalesce *ec, 5256 u16 queue) 5257 { 5258 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 5259 u32 max_usecs, max_packets; 5260 bool cur_rx_dim; 5261 int err; 5262 5263 mutex_lock(&vi->rq[queue].dim_lock); 5264 cur_rx_dim = vi->rq[queue].dim_enabled; 5265 max_usecs = vi->rq[queue].intr_coal.max_usecs; 5266 max_packets = vi->rq[queue].intr_coal.max_packets; 5267 5268 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != max_usecs || 5269 ec->rx_max_coalesced_frames != max_packets)) { 5270 mutex_unlock(&vi->rq[queue].dim_lock); 5271 return -EINVAL; 5272 } 5273 5274 if (rx_ctrl_dim_on && !cur_rx_dim) { 5275 vi->rq[queue].dim_enabled = true; 5276 mutex_unlock(&vi->rq[queue].dim_lock); 5277 return 0; 5278 } 5279 5280 if (!rx_ctrl_dim_on && cur_rx_dim) 5281 vi->rq[queue].dim_enabled = false; 5282 5283 /* If no params are updated, userspace ethtool will 5284 * reject the modification. 5285 */ 5286 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue, 5287 ec->rx_coalesce_usecs, 5288 ec->rx_max_coalesced_frames); 5289 mutex_unlock(&vi->rq[queue].dim_lock); 5290 return err; 5291 } 5292 5293 static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi, 5294 struct ethtool_coalesce *ec, 5295 u16 queue) 5296 { 5297 int err; 5298 5299 err = virtnet_send_rx_notf_coal_vq_cmds(vi, ec, queue); 5300 if (err) 5301 return err; 5302 5303 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue, 5304 ec->tx_coalesce_usecs, 5305 ec->tx_max_coalesced_frames); 5306 if (err) 5307 return err; 5308 5309 return 0; 5310 } 5311 5312 static void virtnet_rx_dim_work(struct work_struct *work) 5313 { 5314 struct dim *dim = container_of(work, struct dim, work); 5315 struct receive_queue *rq = container_of(dim, 5316 struct receive_queue, dim); 5317 struct virtnet_info *vi = rq->vq->vdev->priv; 5318 struct net_device *dev = vi->dev; 5319 struct dim_cq_moder update_moder; 5320 int qnum, err; 5321 5322 qnum = rq - vi->rq; 5323 5324 mutex_lock(&rq->dim_lock); 5325 if (!rq->dim_enabled) 5326 goto out; 5327 5328 update_moder = net_dim_get_rx_irq_moder(dev, dim); 5329 if (update_moder.usec != rq->intr_coal.max_usecs || 5330 update_moder.pkts != rq->intr_coal.max_packets) { 5331 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum, 5332 update_moder.usec, 5333 update_moder.pkts); 5334 if (err) 5335 pr_debug("%s: Failed to send dim parameters on rxq%d\n", 5336 dev->name, qnum); 5337 } 5338 out: 5339 dim->state = DIM_START_MEASURE; 5340 mutex_unlock(&rq->dim_lock); 5341 } 5342 5343 static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) 5344 { 5345 /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL 5346 * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated. 5347 */ 5348 if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) 5349 return -EOPNOTSUPP; 5350 5351 if (ec->tx_max_coalesced_frames > 1 || 5352 ec->rx_max_coalesced_frames != 1) 5353 return -EINVAL; 5354 5355 return 0; 5356 } 5357 5358 static int virtnet_should_update_vq_weight(int dev_flags, int weight, 5359 int vq_weight, bool *should_update) 5360 { 5361 if (weight ^ vq_weight) { 5362 if (dev_flags & IFF_UP) 5363 return -EBUSY; 5364 *should_update = true; 5365 } 5366 5367 return 0; 5368 } 5369 5370 static int virtnet_set_coalesce(struct net_device *dev, 5371 struct ethtool_coalesce *ec, 5372 struct kernel_ethtool_coalesce *kernel_coal, 5373 struct netlink_ext_ack *extack) 5374 { 5375 struct virtnet_info *vi = netdev_priv(dev); 5376 int ret, queue_number, napi_weight, i; 5377 bool update_napi = false; 5378 5379 /* Can't change NAPI weight if the link is up */ 5380 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5381 for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) { 5382 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5383 vi->sq[queue_number].napi.weight, 5384 &update_napi); 5385 if (ret) 5386 return ret; 5387 5388 if (update_napi) { 5389 /* All queues that belong to [queue_number, vi->max_queue_pairs] will be 5390 * updated for the sake of simplicity, which might not be necessary 5391 */ 5392 break; 5393 } 5394 } 5395 5396 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) 5397 ret = virtnet_send_notf_coal_cmds(vi, ec); 5398 else 5399 ret = virtnet_coal_params_supported(ec); 5400 5401 if (ret) 5402 return ret; 5403 5404 if (update_napi) { 5405 /* xsk xmit depends on the tx napi. So if xsk is active, 5406 * prevent modifications to tx napi. 5407 */ 5408 for (i = queue_number; i < vi->max_queue_pairs; i++) { 5409 if (vi->sq[i].xsk_pool) 5410 return -EBUSY; 5411 } 5412 5413 for (; queue_number < vi->max_queue_pairs; queue_number++) 5414 vi->sq[queue_number].napi.weight = napi_weight; 5415 } 5416 5417 return ret; 5418 } 5419 5420 static int virtnet_get_coalesce(struct net_device *dev, 5421 struct ethtool_coalesce *ec, 5422 struct kernel_ethtool_coalesce *kernel_coal, 5423 struct netlink_ext_ack *extack) 5424 { 5425 struct virtnet_info *vi = netdev_priv(dev); 5426 5427 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 5428 ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs; 5429 ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs; 5430 ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets; 5431 ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets; 5432 ec->use_adaptive_rx_coalesce = vi->rx_dim_enabled; 5433 } else { 5434 ec->rx_max_coalesced_frames = 1; 5435 5436 if (vi->sq[0].napi.weight) 5437 ec->tx_max_coalesced_frames = 1; 5438 } 5439 5440 return 0; 5441 } 5442 5443 static int virtnet_set_per_queue_coalesce(struct net_device *dev, 5444 u32 queue, 5445 struct ethtool_coalesce *ec) 5446 { 5447 struct virtnet_info *vi = netdev_priv(dev); 5448 int ret, napi_weight; 5449 bool update_napi = false; 5450 5451 if (queue >= vi->max_queue_pairs) 5452 return -EINVAL; 5453 5454 /* Can't change NAPI weight if the link is up */ 5455 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5456 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5457 vi->sq[queue].napi.weight, 5458 &update_napi); 5459 if (ret) 5460 return ret; 5461 5462 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5463 ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue); 5464 else 5465 ret = virtnet_coal_params_supported(ec); 5466 5467 if (ret) 5468 return ret; 5469 5470 if (update_napi) 5471 vi->sq[queue].napi.weight = napi_weight; 5472 5473 return 0; 5474 } 5475 5476 static int virtnet_get_per_queue_coalesce(struct net_device *dev, 5477 u32 queue, 5478 struct ethtool_coalesce *ec) 5479 { 5480 struct virtnet_info *vi = netdev_priv(dev); 5481 5482 if (queue >= vi->max_queue_pairs) 5483 return -EINVAL; 5484 5485 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 5486 mutex_lock(&vi->rq[queue].dim_lock); 5487 ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs; 5488 ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs; 5489 ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets; 5490 ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets; 5491 ec->use_adaptive_rx_coalesce = vi->rq[queue].dim_enabled; 5492 mutex_unlock(&vi->rq[queue].dim_lock); 5493 } else { 5494 ec->rx_max_coalesced_frames = 1; 5495 5496 if (vi->sq[queue].napi.weight) 5497 ec->tx_max_coalesced_frames = 1; 5498 } 5499 5500 return 0; 5501 } 5502 5503 static void virtnet_init_settings(struct net_device *dev) 5504 { 5505 struct virtnet_info *vi = netdev_priv(dev); 5506 5507 vi->speed = SPEED_UNKNOWN; 5508 vi->duplex = DUPLEX_UNKNOWN; 5509 } 5510 5511 static u32 virtnet_get_rxfh_key_size(struct net_device *dev) 5512 { 5513 return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size; 5514 } 5515 5516 static u32 virtnet_get_rxfh_indir_size(struct net_device *dev) 5517 { 5518 return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size; 5519 } 5520 5521 static int virtnet_get_rxfh(struct net_device *dev, 5522 struct ethtool_rxfh_param *rxfh) 5523 { 5524 struct virtnet_info *vi = netdev_priv(dev); 5525 int i; 5526 5527 if (rxfh->indir) { 5528 for (i = 0; i < vi->rss_indir_table_size; ++i) 5529 rxfh->indir[i] = le16_to_cpu(vi->rss_hdr->indirection_table[i]); 5530 } 5531 5532 if (rxfh->key) 5533 memcpy(rxfh->key, vi->rss_hash_key_data, vi->rss_key_size); 5534 5535 rxfh->hfunc = ETH_RSS_HASH_TOP; 5536 5537 return 0; 5538 } 5539 5540 static int virtnet_set_rxfh(struct net_device *dev, 5541 struct ethtool_rxfh_param *rxfh, 5542 struct netlink_ext_ack *extack) 5543 { 5544 struct virtnet_info *vi = netdev_priv(dev); 5545 bool update = false; 5546 int i; 5547 5548 if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && 5549 rxfh->hfunc != ETH_RSS_HASH_TOP) 5550 return -EOPNOTSUPP; 5551 5552 if (rxfh->indir) { 5553 if (!vi->has_rss) 5554 return -EOPNOTSUPP; 5555 5556 for (i = 0; i < vi->rss_indir_table_size; ++i) 5557 vi->rss_hdr->indirection_table[i] = cpu_to_le16(rxfh->indir[i]); 5558 update = true; 5559 } 5560 5561 if (rxfh->key) { 5562 /* If either _F_HASH_REPORT or _F_RSS are negotiated, the 5563 * device provides hash calculation capabilities, that is, 5564 * hash_key is configured. 5565 */ 5566 if (!vi->has_rss && !vi->has_rss_hash_report) 5567 return -EOPNOTSUPP; 5568 5569 memcpy(vi->rss_hash_key_data, rxfh->key, vi->rss_key_size); 5570 update = true; 5571 } 5572 5573 if (update) 5574 virtnet_commit_rss_command(vi); 5575 5576 return 0; 5577 } 5578 5579 static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) 5580 { 5581 struct virtnet_info *vi = netdev_priv(dev); 5582 int rc = 0; 5583 5584 switch (info->cmd) { 5585 case ETHTOOL_GRXRINGS: 5586 info->data = vi->curr_queue_pairs; 5587 break; 5588 default: 5589 rc = -EOPNOTSUPP; 5590 } 5591 5592 return rc; 5593 } 5594 5595 static const struct ethtool_ops virtnet_ethtool_ops = { 5596 .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | 5597 ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX, 5598 .get_drvinfo = virtnet_get_drvinfo, 5599 .get_link = ethtool_op_get_link, 5600 .get_ringparam = virtnet_get_ringparam, 5601 .set_ringparam = virtnet_set_ringparam, 5602 .get_strings = virtnet_get_strings, 5603 .get_sset_count = virtnet_get_sset_count, 5604 .get_ethtool_stats = virtnet_get_ethtool_stats, 5605 .set_channels = virtnet_set_channels, 5606 .get_channels = virtnet_get_channels, 5607 .get_ts_info = ethtool_op_get_ts_info, 5608 .get_link_ksettings = virtnet_get_link_ksettings, 5609 .set_link_ksettings = virtnet_set_link_ksettings, 5610 .set_coalesce = virtnet_set_coalesce, 5611 .get_coalesce = virtnet_get_coalesce, 5612 .set_per_queue_coalesce = virtnet_set_per_queue_coalesce, 5613 .get_per_queue_coalesce = virtnet_get_per_queue_coalesce, 5614 .get_rxfh_key_size = virtnet_get_rxfh_key_size, 5615 .get_rxfh_indir_size = virtnet_get_rxfh_indir_size, 5616 .get_rxfh = virtnet_get_rxfh, 5617 .set_rxfh = virtnet_set_rxfh, 5618 .get_rxfh_fields = virtnet_get_hashflow, 5619 .set_rxfh_fields = virtnet_set_hashflow, 5620 .get_rxnfc = virtnet_get_rxnfc, 5621 }; 5622 5623 static void virtnet_get_queue_stats_rx(struct net_device *dev, int i, 5624 struct netdev_queue_stats_rx *stats) 5625 { 5626 struct virtnet_info *vi = netdev_priv(dev); 5627 struct receive_queue *rq = &vi->rq[i]; 5628 struct virtnet_stats_ctx ctx = {0}; 5629 5630 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5631 5632 virtnet_get_hw_stats(vi, &ctx, i * 2); 5633 virtnet_fill_stats(vi, i * 2, &ctx, (void *)&rq->stats, true, 0); 5634 } 5635 5636 static void virtnet_get_queue_stats_tx(struct net_device *dev, int i, 5637 struct netdev_queue_stats_tx *stats) 5638 { 5639 struct virtnet_info *vi = netdev_priv(dev); 5640 struct send_queue *sq = &vi->sq[i]; 5641 struct virtnet_stats_ctx ctx = {0}; 5642 5643 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5644 5645 virtnet_get_hw_stats(vi, &ctx, i * 2 + 1); 5646 virtnet_fill_stats(vi, i * 2 + 1, &ctx, (void *)&sq->stats, true, 0); 5647 } 5648 5649 static void virtnet_get_base_stats(struct net_device *dev, 5650 struct netdev_queue_stats_rx *rx, 5651 struct netdev_queue_stats_tx *tx) 5652 { 5653 struct virtnet_info *vi = netdev_priv(dev); 5654 5655 /* The queue stats of the virtio-net will not be reset. So here we 5656 * return 0. 5657 */ 5658 rx->bytes = 0; 5659 rx->packets = 0; 5660 5661 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 5662 rx->hw_drops = 0; 5663 rx->hw_drop_overruns = 0; 5664 } 5665 5666 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 5667 rx->csum_unnecessary = 0; 5668 rx->csum_none = 0; 5669 rx->csum_bad = 0; 5670 } 5671 5672 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 5673 rx->hw_gro_packets = 0; 5674 rx->hw_gro_bytes = 0; 5675 rx->hw_gro_wire_packets = 0; 5676 rx->hw_gro_wire_bytes = 0; 5677 } 5678 5679 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) 5680 rx->hw_drop_ratelimits = 0; 5681 5682 tx->bytes = 0; 5683 tx->packets = 0; 5684 tx->stop = 0; 5685 tx->wake = 0; 5686 5687 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 5688 tx->hw_drops = 0; 5689 tx->hw_drop_errors = 0; 5690 } 5691 5692 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 5693 tx->csum_none = 0; 5694 tx->needs_csum = 0; 5695 } 5696 5697 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 5698 tx->hw_gso_packets = 0; 5699 tx->hw_gso_bytes = 0; 5700 tx->hw_gso_wire_packets = 0; 5701 tx->hw_gso_wire_bytes = 0; 5702 } 5703 5704 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) 5705 tx->hw_drop_ratelimits = 0; 5706 5707 netdev_stat_queue_sum(dev, 5708 dev->real_num_rx_queues, vi->max_queue_pairs, rx, 5709 dev->real_num_tx_queues, vi->max_queue_pairs, tx); 5710 } 5711 5712 static const struct netdev_stat_ops virtnet_stat_ops = { 5713 .get_queue_stats_rx = virtnet_get_queue_stats_rx, 5714 .get_queue_stats_tx = virtnet_get_queue_stats_tx, 5715 .get_base_stats = virtnet_get_base_stats, 5716 }; 5717 5718 static void virtnet_freeze_down(struct virtio_device *vdev) 5719 { 5720 struct virtnet_info *vi = vdev->priv; 5721 5722 /* Make sure no work handler is accessing the device */ 5723 flush_work(&vi->config_work); 5724 disable_rx_mode_work(vi); 5725 flush_work(&vi->rx_mode_work); 5726 5727 netif_tx_lock_bh(vi->dev); 5728 netif_device_detach(vi->dev); 5729 netif_tx_unlock_bh(vi->dev); 5730 if (netif_running(vi->dev)) { 5731 rtnl_lock(); 5732 virtnet_close(vi->dev); 5733 rtnl_unlock(); 5734 } 5735 } 5736 5737 static int init_vqs(struct virtnet_info *vi); 5738 5739 static int virtnet_restore_up(struct virtio_device *vdev) 5740 { 5741 struct virtnet_info *vi = vdev->priv; 5742 int err; 5743 5744 err = init_vqs(vi); 5745 if (err) 5746 return err; 5747 5748 virtio_device_ready(vdev); 5749 5750 enable_delayed_refill(vi); 5751 enable_rx_mode_work(vi); 5752 5753 if (netif_running(vi->dev)) { 5754 rtnl_lock(); 5755 err = virtnet_open(vi->dev); 5756 rtnl_unlock(); 5757 if (err) 5758 return err; 5759 } 5760 5761 netif_tx_lock_bh(vi->dev); 5762 netif_device_attach(vi->dev); 5763 netif_tx_unlock_bh(vi->dev); 5764 return err; 5765 } 5766 5767 static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) 5768 { 5769 __virtio64 *_offloads __free(kfree) = NULL; 5770 struct scatterlist sg; 5771 5772 _offloads = kzalloc(sizeof(*_offloads), GFP_KERNEL); 5773 if (!_offloads) 5774 return -ENOMEM; 5775 5776 *_offloads = cpu_to_virtio64(vi->vdev, offloads); 5777 5778 sg_init_one(&sg, _offloads, sizeof(*_offloads)); 5779 5780 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, 5781 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) { 5782 dev_warn(&vi->dev->dev, "Fail to set guest offload.\n"); 5783 return -EINVAL; 5784 } 5785 5786 return 0; 5787 } 5788 5789 static int virtnet_clear_guest_offloads(struct virtnet_info *vi) 5790 { 5791 u64 offloads = 0; 5792 5793 if (!vi->guest_offloads) 5794 return 0; 5795 5796 return virtnet_set_guest_offloads(vi, offloads); 5797 } 5798 5799 static int virtnet_restore_guest_offloads(struct virtnet_info *vi) 5800 { 5801 u64 offloads = vi->guest_offloads; 5802 5803 if (!vi->guest_offloads) 5804 return 0; 5805 5806 return virtnet_set_guest_offloads(vi, offloads); 5807 } 5808 5809 static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queue *rq, 5810 struct xsk_buff_pool *pool) 5811 { 5812 int err, qindex; 5813 5814 qindex = rq - vi->rq; 5815 5816 if (pool) { 5817 err = xdp_rxq_info_reg(&rq->xsk_rxq_info, vi->dev, qindex, rq->napi.napi_id); 5818 if (err < 0) 5819 return err; 5820 5821 err = xdp_rxq_info_reg_mem_model(&rq->xsk_rxq_info, 5822 MEM_TYPE_XSK_BUFF_POOL, NULL); 5823 if (err < 0) 5824 goto unreg; 5825 5826 xsk_pool_set_rxq_info(pool, &rq->xsk_rxq_info); 5827 } 5828 5829 virtnet_rx_pause(vi, rq); 5830 5831 err = virtqueue_reset(rq->vq, virtnet_rq_unmap_free_buf, NULL); 5832 if (err) { 5833 netdev_err(vi->dev, "reset rx fail: rx queue index: %d err: %d\n", qindex, err); 5834 5835 pool = NULL; 5836 } 5837 5838 rq->xsk_pool = pool; 5839 5840 virtnet_rx_resume(vi, rq); 5841 5842 if (pool) 5843 return 0; 5844 5845 unreg: 5846 xdp_rxq_info_unreg(&rq->xsk_rxq_info); 5847 return err; 5848 } 5849 5850 static int virtnet_sq_bind_xsk_pool(struct virtnet_info *vi, 5851 struct send_queue *sq, 5852 struct xsk_buff_pool *pool) 5853 { 5854 int err, qindex; 5855 5856 qindex = sq - vi->sq; 5857 5858 virtnet_tx_pause(vi, sq); 5859 5860 err = virtqueue_reset(sq->vq, virtnet_sq_free_unused_buf, 5861 virtnet_sq_free_unused_buf_done); 5862 if (err) { 5863 netdev_err(vi->dev, "reset tx fail: tx queue index: %d err: %d\n", qindex, err); 5864 pool = NULL; 5865 } 5866 5867 sq->xsk_pool = pool; 5868 5869 virtnet_tx_resume(vi, sq); 5870 5871 return err; 5872 } 5873 5874 static int virtnet_xsk_pool_enable(struct net_device *dev, 5875 struct xsk_buff_pool *pool, 5876 u16 qid) 5877 { 5878 struct virtnet_info *vi = netdev_priv(dev); 5879 struct receive_queue *rq; 5880 struct device *dma_dev; 5881 struct send_queue *sq; 5882 dma_addr_t hdr_dma; 5883 int err, size; 5884 5885 if (vi->hdr_len > xsk_pool_get_headroom(pool)) 5886 return -EINVAL; 5887 5888 /* In big_packets mode, xdp cannot work, so there is no need to 5889 * initialize xsk of rq. 5890 */ 5891 if (vi->big_packets && !vi->mergeable_rx_bufs) 5892 return -ENOENT; 5893 5894 if (qid >= vi->curr_queue_pairs) 5895 return -EINVAL; 5896 5897 sq = &vi->sq[qid]; 5898 rq = &vi->rq[qid]; 5899 5900 /* xsk assumes that tx and rx must have the same dma device. The af-xdp 5901 * may use one buffer to receive from the rx and reuse this buffer to 5902 * send by the tx. So the dma dev of sq and rq must be the same one. 5903 * 5904 * But vq->dma_dev allows every vq has the respective dma dev. So I 5905 * check the dma dev of vq and sq is the same dev. 5906 */ 5907 if (virtqueue_dma_dev(rq->vq) != virtqueue_dma_dev(sq->vq)) 5908 return -EINVAL; 5909 5910 dma_dev = virtqueue_dma_dev(rq->vq); 5911 if (!dma_dev) 5912 return -EINVAL; 5913 5914 size = virtqueue_get_vring_size(rq->vq); 5915 5916 rq->xsk_buffs = kvcalloc(size, sizeof(*rq->xsk_buffs), GFP_KERNEL); 5917 if (!rq->xsk_buffs) 5918 return -ENOMEM; 5919 5920 hdr_dma = virtqueue_dma_map_single_attrs(sq->vq, &xsk_hdr, vi->hdr_len, 5921 DMA_TO_DEVICE, 0); 5922 if (virtqueue_dma_mapping_error(sq->vq, hdr_dma)) { 5923 err = -ENOMEM; 5924 goto err_free_buffs; 5925 } 5926 5927 err = xsk_pool_dma_map(pool, dma_dev, 0); 5928 if (err) 5929 goto err_xsk_map; 5930 5931 err = virtnet_rq_bind_xsk_pool(vi, rq, pool); 5932 if (err) 5933 goto err_rq; 5934 5935 err = virtnet_sq_bind_xsk_pool(vi, sq, pool); 5936 if (err) 5937 goto err_sq; 5938 5939 /* Now, we do not support tx offload(such as tx csum), so all the tx 5940 * virtnet hdr is zero. So all the tx packets can share a single hdr. 5941 */ 5942 sq->xsk_hdr_dma_addr = hdr_dma; 5943 5944 return 0; 5945 5946 err_sq: 5947 virtnet_rq_bind_xsk_pool(vi, rq, NULL); 5948 err_rq: 5949 xsk_pool_dma_unmap(pool, 0); 5950 err_xsk_map: 5951 virtqueue_dma_unmap_single_attrs(rq->vq, hdr_dma, vi->hdr_len, 5952 DMA_TO_DEVICE, 0); 5953 err_free_buffs: 5954 kvfree(rq->xsk_buffs); 5955 return err; 5956 } 5957 5958 static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid) 5959 { 5960 struct virtnet_info *vi = netdev_priv(dev); 5961 struct xsk_buff_pool *pool; 5962 struct receive_queue *rq; 5963 struct send_queue *sq; 5964 int err; 5965 5966 if (qid >= vi->curr_queue_pairs) 5967 return -EINVAL; 5968 5969 sq = &vi->sq[qid]; 5970 rq = &vi->rq[qid]; 5971 5972 pool = rq->xsk_pool; 5973 5974 err = virtnet_rq_bind_xsk_pool(vi, rq, NULL); 5975 err |= virtnet_sq_bind_xsk_pool(vi, sq, NULL); 5976 5977 xsk_pool_dma_unmap(pool, 0); 5978 5979 virtqueue_dma_unmap_single_attrs(sq->vq, sq->xsk_hdr_dma_addr, 5980 vi->hdr_len, DMA_TO_DEVICE, 0); 5981 kvfree(rq->xsk_buffs); 5982 5983 return err; 5984 } 5985 5986 static int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp) 5987 { 5988 if (xdp->xsk.pool) 5989 return virtnet_xsk_pool_enable(dev, xdp->xsk.pool, 5990 xdp->xsk.queue_id); 5991 else 5992 return virtnet_xsk_pool_disable(dev, xdp->xsk.queue_id); 5993 } 5994 5995 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, 5996 struct netlink_ext_ack *extack) 5997 { 5998 unsigned int room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 5999 sizeof(struct skb_shared_info)); 6000 unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; 6001 struct virtnet_info *vi = netdev_priv(dev); 6002 struct bpf_prog *old_prog; 6003 u16 xdp_qp = 0, curr_qp; 6004 int i, err; 6005 6006 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) 6007 && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 6008 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 6009 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 6010 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 6011 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) || 6012 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) || 6013 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) { 6014 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); 6015 return -EOPNOTSUPP; 6016 } 6017 6018 if (vi->mergeable_rx_bufs && !vi->any_header_sg) { 6019 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required"); 6020 return -EINVAL; 6021 } 6022 6023 if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { 6024 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags"); 6025 netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz); 6026 return -EINVAL; 6027 } 6028 6029 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; 6030 if (prog) 6031 xdp_qp = nr_cpu_ids; 6032 6033 /* XDP requires extra queues for XDP_TX */ 6034 if (curr_qp + xdp_qp > vi->max_queue_pairs) { 6035 netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", 6036 curr_qp + xdp_qp, vi->max_queue_pairs); 6037 xdp_qp = 0; 6038 } 6039 6040 old_prog = rtnl_dereference(vi->rq[0].xdp_prog); 6041 if (!prog && !old_prog) 6042 return 0; 6043 6044 if (prog) 6045 bpf_prog_add(prog, vi->max_queue_pairs - 1); 6046 6047 virtnet_rx_pause_all(vi); 6048 6049 /* Make sure NAPI is not using any XDP TX queues for RX. */ 6050 if (netif_running(dev)) { 6051 for (i = 0; i < vi->max_queue_pairs; i++) 6052 virtnet_napi_tx_disable(&vi->sq[i]); 6053 } 6054 6055 if (!prog) { 6056 for (i = 0; i < vi->max_queue_pairs; i++) { 6057 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 6058 if (i == 0) 6059 virtnet_restore_guest_offloads(vi); 6060 } 6061 synchronize_net(); 6062 } 6063 6064 err = virtnet_set_queues(vi, curr_qp + xdp_qp); 6065 if (err) 6066 goto err; 6067 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); 6068 vi->xdp_queue_pairs = xdp_qp; 6069 6070 if (prog) { 6071 vi->xdp_enabled = true; 6072 for (i = 0; i < vi->max_queue_pairs; i++) { 6073 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 6074 if (i == 0 && !old_prog) 6075 virtnet_clear_guest_offloads(vi); 6076 } 6077 if (!old_prog) 6078 xdp_features_set_redirect_target(dev, true); 6079 } else { 6080 xdp_features_clear_redirect_target(dev); 6081 vi->xdp_enabled = false; 6082 } 6083 6084 virtnet_rx_resume_all(vi); 6085 for (i = 0; i < vi->max_queue_pairs; i++) { 6086 if (old_prog) 6087 bpf_prog_put(old_prog); 6088 if (netif_running(dev)) 6089 virtnet_napi_tx_enable(&vi->sq[i]); 6090 } 6091 6092 return 0; 6093 6094 err: 6095 if (!prog) { 6096 virtnet_clear_guest_offloads(vi); 6097 for (i = 0; i < vi->max_queue_pairs; i++) 6098 rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); 6099 } 6100 6101 virtnet_rx_resume_all(vi); 6102 if (netif_running(dev)) { 6103 for (i = 0; i < vi->max_queue_pairs; i++) 6104 virtnet_napi_tx_enable(&vi->sq[i]); 6105 } 6106 if (prog) 6107 bpf_prog_sub(prog, vi->max_queue_pairs - 1); 6108 return err; 6109 } 6110 6111 static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) 6112 { 6113 switch (xdp->command) { 6114 case XDP_SETUP_PROG: 6115 return virtnet_xdp_set(dev, xdp->prog, xdp->extack); 6116 case XDP_SETUP_XSK_POOL: 6117 return virtnet_xsk_pool_setup(dev, xdp); 6118 default: 6119 return -EINVAL; 6120 } 6121 } 6122 6123 static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, 6124 size_t len) 6125 { 6126 struct virtnet_info *vi = netdev_priv(dev); 6127 int ret; 6128 6129 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 6130 return -EOPNOTSUPP; 6131 6132 ret = snprintf(buf, len, "sby"); 6133 if (ret >= len) 6134 return -EOPNOTSUPP; 6135 6136 return 0; 6137 } 6138 6139 static int virtnet_set_features(struct net_device *dev, 6140 netdev_features_t features) 6141 { 6142 struct virtnet_info *vi = netdev_priv(dev); 6143 u64 offloads; 6144 int err; 6145 6146 if ((dev->features ^ features) & NETIF_F_GRO_HW) { 6147 if (vi->xdp_enabled) 6148 return -EBUSY; 6149 6150 if (features & NETIF_F_GRO_HW) 6151 offloads = vi->guest_offloads_capable; 6152 else 6153 offloads = vi->guest_offloads_capable & 6154 ~GUEST_OFFLOAD_GRO_HW_MASK; 6155 6156 err = virtnet_set_guest_offloads(vi, offloads); 6157 if (err) 6158 return err; 6159 vi->guest_offloads = offloads; 6160 } 6161 6162 if ((dev->features ^ features) & NETIF_F_RXHASH) { 6163 if (features & NETIF_F_RXHASH) 6164 vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_saved); 6165 else 6166 vi->rss_hdr->hash_types = cpu_to_le32(VIRTIO_NET_HASH_REPORT_NONE); 6167 6168 if (!virtnet_commit_rss_command(vi)) 6169 return -EINVAL; 6170 } 6171 6172 return 0; 6173 } 6174 6175 static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) 6176 { 6177 struct virtnet_info *priv = netdev_priv(dev); 6178 struct send_queue *sq = &priv->sq[txqueue]; 6179 struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue); 6180 6181 u64_stats_update_begin(&sq->stats.syncp); 6182 u64_stats_inc(&sq->stats.tx_timeouts); 6183 u64_stats_update_end(&sq->stats.syncp); 6184 6185 netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n", 6186 txqueue, sq->name, sq->vq->index, sq->vq->name, 6187 jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start))); 6188 } 6189 6190 static int virtnet_init_irq_moder(struct virtnet_info *vi) 6191 { 6192 u8 profile_flags = 0, coal_flags = 0; 6193 int ret, i; 6194 6195 profile_flags |= DIM_PROFILE_RX; 6196 coal_flags |= DIM_COALESCE_USEC | DIM_COALESCE_PKTS; 6197 ret = net_dim_init_irq_moder(vi->dev, profile_flags, coal_flags, 6198 DIM_CQ_PERIOD_MODE_START_FROM_EQE, 6199 0, virtnet_rx_dim_work, NULL); 6200 6201 if (ret) 6202 return ret; 6203 6204 for (i = 0; i < vi->max_queue_pairs; i++) 6205 net_dim_setting(vi->dev, &vi->rq[i].dim, false); 6206 6207 return 0; 6208 } 6209 6210 static void virtnet_free_irq_moder(struct virtnet_info *vi) 6211 { 6212 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 6213 return; 6214 6215 rtnl_lock(); 6216 net_dim_free_irq_moder(vi->dev); 6217 rtnl_unlock(); 6218 } 6219 6220 static const struct net_device_ops virtnet_netdev = { 6221 .ndo_open = virtnet_open, 6222 .ndo_stop = virtnet_close, 6223 .ndo_start_xmit = start_xmit, 6224 .ndo_validate_addr = eth_validate_addr, 6225 .ndo_set_mac_address = virtnet_set_mac_address, 6226 .ndo_set_rx_mode = virtnet_set_rx_mode, 6227 .ndo_get_stats64 = virtnet_stats, 6228 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, 6229 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, 6230 .ndo_bpf = virtnet_xdp, 6231 .ndo_xdp_xmit = virtnet_xdp_xmit, 6232 .ndo_xsk_wakeup = virtnet_xsk_wakeup, 6233 .ndo_features_check = passthru_features_check, 6234 .ndo_get_phys_port_name = virtnet_get_phys_port_name, 6235 .ndo_set_features = virtnet_set_features, 6236 .ndo_tx_timeout = virtnet_tx_timeout, 6237 }; 6238 6239 static void virtnet_config_changed_work(struct work_struct *work) 6240 { 6241 struct virtnet_info *vi = 6242 container_of(work, struct virtnet_info, config_work); 6243 u16 v; 6244 6245 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, 6246 struct virtio_net_config, status, &v) < 0) 6247 return; 6248 6249 if (v & VIRTIO_NET_S_ANNOUNCE) { 6250 netdev_notify_peers(vi->dev); 6251 virtnet_ack_link_announce(vi); 6252 } 6253 6254 /* Ignore unknown (future) status bits */ 6255 v &= VIRTIO_NET_S_LINK_UP; 6256 6257 if (vi->status == v) 6258 return; 6259 6260 vi->status = v; 6261 6262 if (vi->status & VIRTIO_NET_S_LINK_UP) { 6263 virtnet_update_settings(vi); 6264 netif_carrier_on(vi->dev); 6265 netif_tx_wake_all_queues(vi->dev); 6266 } else { 6267 netif_carrier_off(vi->dev); 6268 netif_tx_stop_all_queues(vi->dev); 6269 } 6270 } 6271 6272 static void virtnet_config_changed(struct virtio_device *vdev) 6273 { 6274 struct virtnet_info *vi = vdev->priv; 6275 6276 schedule_work(&vi->config_work); 6277 } 6278 6279 static void virtnet_free_queues(struct virtnet_info *vi) 6280 { 6281 int i; 6282 6283 for (i = 0; i < vi->max_queue_pairs; i++) { 6284 __netif_napi_del(&vi->rq[i].napi); 6285 __netif_napi_del(&vi->sq[i].napi); 6286 } 6287 6288 /* We called __netif_napi_del(), 6289 * we need to respect an RCU grace period before freeing vi->rq 6290 */ 6291 synchronize_net(); 6292 6293 kfree(vi->rq); 6294 kfree(vi->sq); 6295 kfree(vi->ctrl); 6296 } 6297 6298 static void _free_receive_bufs(struct virtnet_info *vi) 6299 { 6300 struct bpf_prog *old_prog; 6301 int i; 6302 6303 for (i = 0; i < vi->max_queue_pairs; i++) { 6304 while (vi->rq[i].pages) 6305 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); 6306 6307 old_prog = rtnl_dereference(vi->rq[i].xdp_prog); 6308 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); 6309 if (old_prog) 6310 bpf_prog_put(old_prog); 6311 } 6312 } 6313 6314 static void free_receive_bufs(struct virtnet_info *vi) 6315 { 6316 rtnl_lock(); 6317 _free_receive_bufs(vi); 6318 rtnl_unlock(); 6319 } 6320 6321 static void free_receive_page_frags(struct virtnet_info *vi) 6322 { 6323 int i; 6324 for (i = 0; i < vi->max_queue_pairs; i++) 6325 if (vi->rq[i].alloc_frag.page) { 6326 if (vi->rq[i].last_dma) 6327 virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0); 6328 put_page(vi->rq[i].alloc_frag.page); 6329 } 6330 } 6331 6332 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) 6333 { 6334 struct virtnet_info *vi = vq->vdev->priv; 6335 struct send_queue *sq; 6336 int i = vq2txq(vq); 6337 6338 sq = &vi->sq[i]; 6339 6340 switch (virtnet_xmit_ptr_unpack(&buf)) { 6341 case VIRTNET_XMIT_TYPE_SKB: 6342 case VIRTNET_XMIT_TYPE_SKB_ORPHAN: 6343 dev_kfree_skb(buf); 6344 break; 6345 6346 case VIRTNET_XMIT_TYPE_XDP: 6347 xdp_return_frame(buf); 6348 break; 6349 6350 case VIRTNET_XMIT_TYPE_XSK: 6351 xsk_tx_completed(sq->xsk_pool, 1); 6352 break; 6353 } 6354 } 6355 6356 static void virtnet_sq_free_unused_buf_done(struct virtqueue *vq) 6357 { 6358 struct virtnet_info *vi = vq->vdev->priv; 6359 int i = vq2txq(vq); 6360 6361 netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, i)); 6362 } 6363 6364 static void free_unused_bufs(struct virtnet_info *vi) 6365 { 6366 void *buf; 6367 int i; 6368 6369 for (i = 0; i < vi->max_queue_pairs; i++) { 6370 struct virtqueue *vq = vi->sq[i].vq; 6371 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 6372 virtnet_sq_free_unused_buf(vq, buf); 6373 cond_resched(); 6374 } 6375 6376 for (i = 0; i < vi->max_queue_pairs; i++) { 6377 struct virtqueue *vq = vi->rq[i].vq; 6378 6379 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 6380 virtnet_rq_unmap_free_buf(vq, buf); 6381 cond_resched(); 6382 } 6383 } 6384 6385 static void virtnet_del_vqs(struct virtnet_info *vi) 6386 { 6387 struct virtio_device *vdev = vi->vdev; 6388 6389 virtnet_clean_affinity(vi); 6390 6391 vdev->config->del_vqs(vdev); 6392 6393 virtnet_free_queues(vi); 6394 } 6395 6396 /* How large should a single buffer be so a queue full of these can fit at 6397 * least one full packet? 6398 * Logic below assumes the mergeable buffer header is used. 6399 */ 6400 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) 6401 { 6402 const unsigned int hdr_len = vi->hdr_len; 6403 unsigned int rq_size = virtqueue_get_vring_size(vq); 6404 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; 6405 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; 6406 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size); 6407 6408 return max(max(min_buf_len, hdr_len) - hdr_len, 6409 (unsigned int)GOOD_PACKET_LEN); 6410 } 6411 6412 static int virtnet_find_vqs(struct virtnet_info *vi) 6413 { 6414 struct virtqueue_info *vqs_info; 6415 struct virtqueue **vqs; 6416 int ret = -ENOMEM; 6417 int total_vqs; 6418 bool *ctx; 6419 u16 i; 6420 6421 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by 6422 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by 6423 * possible control vq. 6424 */ 6425 total_vqs = vi->max_queue_pairs * 2 + 6426 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); 6427 6428 /* Allocate space for find_vqs parameters */ 6429 vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL); 6430 if (!vqs) 6431 goto err_vq; 6432 vqs_info = kcalloc(total_vqs, sizeof(*vqs_info), GFP_KERNEL); 6433 if (!vqs_info) 6434 goto err_vqs_info; 6435 if (!vi->big_packets || vi->mergeable_rx_bufs) { 6436 ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL); 6437 if (!ctx) 6438 goto err_ctx; 6439 } else { 6440 ctx = NULL; 6441 } 6442 6443 /* Parameters for control virtqueue, if any */ 6444 if (vi->has_cvq) { 6445 vqs_info[total_vqs - 1].name = "control"; 6446 } 6447 6448 /* Allocate/initialize parameters for send/receive virtqueues */ 6449 for (i = 0; i < vi->max_queue_pairs; i++) { 6450 vqs_info[rxq2vq(i)].callback = skb_recv_done; 6451 vqs_info[txq2vq(i)].callback = skb_xmit_done; 6452 sprintf(vi->rq[i].name, "input.%u", i); 6453 sprintf(vi->sq[i].name, "output.%u", i); 6454 vqs_info[rxq2vq(i)].name = vi->rq[i].name; 6455 vqs_info[txq2vq(i)].name = vi->sq[i].name; 6456 if (ctx) 6457 vqs_info[rxq2vq(i)].ctx = true; 6458 } 6459 6460 ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, vqs_info, NULL); 6461 if (ret) 6462 goto err_find; 6463 6464 if (vi->has_cvq) { 6465 vi->cvq = vqs[total_vqs - 1]; 6466 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) 6467 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 6468 } 6469 6470 for (i = 0; i < vi->max_queue_pairs; i++) { 6471 vi->rq[i].vq = vqs[rxq2vq(i)]; 6472 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq); 6473 vi->sq[i].vq = vqs[txq2vq(i)]; 6474 } 6475 6476 /* run here: ret == 0. */ 6477 6478 6479 err_find: 6480 kfree(ctx); 6481 err_ctx: 6482 kfree(vqs_info); 6483 err_vqs_info: 6484 kfree(vqs); 6485 err_vq: 6486 return ret; 6487 } 6488 6489 static int virtnet_alloc_queues(struct virtnet_info *vi) 6490 { 6491 int i; 6492 6493 if (vi->has_cvq) { 6494 vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); 6495 if (!vi->ctrl) 6496 goto err_ctrl; 6497 } else { 6498 vi->ctrl = NULL; 6499 } 6500 vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL); 6501 if (!vi->sq) 6502 goto err_sq; 6503 vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL); 6504 if (!vi->rq) 6505 goto err_rq; 6506 6507 INIT_DELAYED_WORK(&vi->refill, refill_work); 6508 for (i = 0; i < vi->max_queue_pairs; i++) { 6509 vi->rq[i].pages = NULL; 6510 netif_napi_add_config(vi->dev, &vi->rq[i].napi, virtnet_poll, 6511 i); 6512 vi->rq[i].napi.weight = napi_weight; 6513 netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi, 6514 virtnet_poll_tx, 6515 napi_tx ? napi_weight : 0); 6516 6517 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); 6518 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); 6519 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); 6520 6521 u64_stats_init(&vi->rq[i].stats.syncp); 6522 u64_stats_init(&vi->sq[i].stats.syncp); 6523 mutex_init(&vi->rq[i].dim_lock); 6524 } 6525 6526 return 0; 6527 6528 err_rq: 6529 kfree(vi->sq); 6530 err_sq: 6531 kfree(vi->ctrl); 6532 err_ctrl: 6533 return -ENOMEM; 6534 } 6535 6536 static int init_vqs(struct virtnet_info *vi) 6537 { 6538 int ret; 6539 6540 /* Allocate send & receive queues */ 6541 ret = virtnet_alloc_queues(vi); 6542 if (ret) 6543 goto err; 6544 6545 ret = virtnet_find_vqs(vi); 6546 if (ret) 6547 goto err_free; 6548 6549 cpus_read_lock(); 6550 virtnet_set_affinity(vi); 6551 cpus_read_unlock(); 6552 6553 return 0; 6554 6555 err_free: 6556 virtnet_free_queues(vi); 6557 err: 6558 return ret; 6559 } 6560 6561 #ifdef CONFIG_SYSFS 6562 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, 6563 char *buf) 6564 { 6565 struct virtnet_info *vi = netdev_priv(queue->dev); 6566 unsigned int queue_index = get_netdev_rx_queue_index(queue); 6567 unsigned int headroom = virtnet_get_headroom(vi); 6568 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 6569 struct ewma_pkt_len *avg; 6570 6571 BUG_ON(queue_index >= vi->max_queue_pairs); 6572 avg = &vi->rq[queue_index].mrg_avg_pkt_len; 6573 return sprintf(buf, "%u\n", 6574 get_mergeable_buf_len(&vi->rq[queue_index], avg, 6575 SKB_DATA_ALIGN(headroom + tailroom))); 6576 } 6577 6578 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = 6579 __ATTR_RO(mergeable_rx_buffer_size); 6580 6581 static struct attribute *virtio_net_mrg_rx_attrs[] = { 6582 &mergeable_rx_buffer_size_attribute.attr, 6583 NULL 6584 }; 6585 6586 static const struct attribute_group virtio_net_mrg_rx_group = { 6587 .name = "virtio_net", 6588 .attrs = virtio_net_mrg_rx_attrs 6589 }; 6590 #endif 6591 6592 static bool virtnet_fail_on_feature(struct virtio_device *vdev, 6593 unsigned int fbit, 6594 const char *fname, const char *dname) 6595 { 6596 if (!virtio_has_feature(vdev, fbit)) 6597 return false; 6598 6599 dev_err(&vdev->dev, "device advertises feature %s but not %s", 6600 fname, dname); 6601 6602 return true; 6603 } 6604 6605 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ 6606 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) 6607 6608 static bool virtnet_validate_features(struct virtio_device *vdev) 6609 { 6610 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && 6611 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, 6612 "VIRTIO_NET_F_CTRL_VQ") || 6613 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, 6614 "VIRTIO_NET_F_CTRL_VQ") || 6615 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, 6616 "VIRTIO_NET_F_CTRL_VQ") || 6617 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || 6618 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, 6619 "VIRTIO_NET_F_CTRL_VQ") || 6620 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, 6621 "VIRTIO_NET_F_CTRL_VQ") || 6622 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, 6623 "VIRTIO_NET_F_CTRL_VQ") || 6624 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, 6625 "VIRTIO_NET_F_CTRL_VQ") || 6626 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL, 6627 "VIRTIO_NET_F_CTRL_VQ"))) { 6628 return false; 6629 } 6630 6631 return true; 6632 } 6633 6634 #define MIN_MTU ETH_MIN_MTU 6635 #define MAX_MTU ETH_MAX_MTU 6636 6637 static int virtnet_validate(struct virtio_device *vdev) 6638 { 6639 if (!vdev->config->get) { 6640 dev_err(&vdev->dev, "%s failure: config access disabled\n", 6641 __func__); 6642 return -EINVAL; 6643 } 6644 6645 if (!virtnet_validate_features(vdev)) 6646 return -EINVAL; 6647 6648 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6649 int mtu = virtio_cread16(vdev, 6650 offsetof(struct virtio_net_config, 6651 mtu)); 6652 if (mtu < MIN_MTU) 6653 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); 6654 } 6655 6656 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) && 6657 !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6658 dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby"); 6659 __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY); 6660 } 6661 6662 return 0; 6663 } 6664 6665 static bool virtnet_check_guest_gso(const struct virtnet_info *vi) 6666 { 6667 return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 6668 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 6669 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 6670 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 6671 (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) && 6672 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6)); 6673 } 6674 6675 static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu) 6676 { 6677 bool guest_gso = virtnet_check_guest_gso(vi); 6678 6679 /* If device can receive ANY guest GSO packets, regardless of mtu, 6680 * allocate packets of maximum size, otherwise limit it to only 6681 * mtu size worth only. 6682 */ 6683 if (mtu > ETH_DATA_LEN || guest_gso) { 6684 vi->big_packets = true; 6685 vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE); 6686 } 6687 } 6688 6689 #define VIRTIO_NET_HASH_REPORT_MAX_TABLE 10 6690 static enum xdp_rss_hash_type 6691 virtnet_xdp_rss_type[VIRTIO_NET_HASH_REPORT_MAX_TABLE] = { 6692 [VIRTIO_NET_HASH_REPORT_NONE] = XDP_RSS_TYPE_NONE, 6693 [VIRTIO_NET_HASH_REPORT_IPv4] = XDP_RSS_TYPE_L3_IPV4, 6694 [VIRTIO_NET_HASH_REPORT_TCPv4] = XDP_RSS_TYPE_L4_IPV4_TCP, 6695 [VIRTIO_NET_HASH_REPORT_UDPv4] = XDP_RSS_TYPE_L4_IPV4_UDP, 6696 [VIRTIO_NET_HASH_REPORT_IPv6] = XDP_RSS_TYPE_L3_IPV6, 6697 [VIRTIO_NET_HASH_REPORT_TCPv6] = XDP_RSS_TYPE_L4_IPV6_TCP, 6698 [VIRTIO_NET_HASH_REPORT_UDPv6] = XDP_RSS_TYPE_L4_IPV6_UDP, 6699 [VIRTIO_NET_HASH_REPORT_IPv6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, 6700 [VIRTIO_NET_HASH_REPORT_TCPv6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, 6701 [VIRTIO_NET_HASH_REPORT_UDPv6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX 6702 }; 6703 6704 static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, 6705 enum xdp_rss_hash_type *rss_type) 6706 { 6707 const struct xdp_buff *xdp = (void *)_ctx; 6708 struct virtio_net_hdr_v1_hash *hdr_hash; 6709 struct virtnet_info *vi; 6710 u16 hash_report; 6711 6712 if (!(xdp->rxq->dev->features & NETIF_F_RXHASH)) 6713 return -ENODATA; 6714 6715 vi = netdev_priv(xdp->rxq->dev); 6716 hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len); 6717 hash_report = __le16_to_cpu(hdr_hash->hash_report); 6718 6719 if (hash_report >= VIRTIO_NET_HASH_REPORT_MAX_TABLE) 6720 hash_report = VIRTIO_NET_HASH_REPORT_NONE; 6721 6722 *rss_type = virtnet_xdp_rss_type[hash_report]; 6723 *hash = __le32_to_cpu(hdr_hash->hash_value); 6724 return 0; 6725 } 6726 6727 static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = { 6728 .xmo_rx_hash = virtnet_xdp_rx_hash, 6729 }; 6730 6731 static int virtnet_probe(struct virtio_device *vdev) 6732 { 6733 int i, err = -ENOMEM; 6734 struct net_device *dev; 6735 struct virtnet_info *vi; 6736 u16 max_queue_pairs; 6737 int mtu = 0; 6738 6739 /* Find if host supports multiqueue/rss virtio_net device */ 6740 max_queue_pairs = 1; 6741 if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) 6742 max_queue_pairs = 6743 virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); 6744 6745 /* We need at least 2 queue's */ 6746 if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 6747 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 6748 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6749 max_queue_pairs = 1; 6750 6751 /* Allocate ourselves a network device with room for our info */ 6752 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); 6753 if (!dev) 6754 return -ENOMEM; 6755 6756 /* Set up network device as normal. */ 6757 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | 6758 IFF_TX_SKB_NO_LINEAR; 6759 dev->netdev_ops = &virtnet_netdev; 6760 dev->stat_ops = &virtnet_stat_ops; 6761 dev->features = NETIF_F_HIGHDMA; 6762 6763 dev->ethtool_ops = &virtnet_ethtool_ops; 6764 SET_NETDEV_DEV(dev, &vdev->dev); 6765 6766 /* Do we support "hardware" checksums? */ 6767 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { 6768 /* This opens up the world of extra features. */ 6769 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6770 if (csum) 6771 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6772 6773 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { 6774 dev->hw_features |= NETIF_F_TSO 6775 | NETIF_F_TSO_ECN | NETIF_F_TSO6; 6776 } 6777 /* Individual feature bits: what can host handle? */ 6778 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4)) 6779 dev->hw_features |= NETIF_F_TSO; 6780 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6)) 6781 dev->hw_features |= NETIF_F_TSO6; 6782 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) 6783 dev->hw_features |= NETIF_F_TSO_ECN; 6784 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO)) 6785 dev->hw_features |= NETIF_F_GSO_UDP_L4; 6786 6787 dev->features |= NETIF_F_GSO_ROBUST; 6788 6789 if (gso) 6790 dev->features |= dev->hw_features & NETIF_F_ALL_TSO; 6791 /* (!csum && gso) case will be fixed by register_netdev() */ 6792 } 6793 6794 /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't 6795 * need to calculate checksums for partially checksummed packets, 6796 * as they're considered valid by the upper layer. 6797 * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only 6798 * receives fully checksummed packets. The device may assist in 6799 * validating these packets' checksums, so the driver won't have to. 6800 */ 6801 dev->features |= NETIF_F_RXCSUM; 6802 6803 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || 6804 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) 6805 dev->features |= NETIF_F_GRO_HW; 6806 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) 6807 dev->hw_features |= NETIF_F_GRO_HW; 6808 6809 dev->vlan_features = dev->features; 6810 dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 6811 NETDEV_XDP_ACT_XSK_ZEROCOPY; 6812 6813 /* MTU range: 68 - 65535 */ 6814 dev->min_mtu = MIN_MTU; 6815 dev->max_mtu = MAX_MTU; 6816 6817 /* Configuration may specify what MAC to use. Otherwise random. */ 6818 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6819 u8 addr[ETH_ALEN]; 6820 6821 virtio_cread_bytes(vdev, 6822 offsetof(struct virtio_net_config, mac), 6823 addr, ETH_ALEN); 6824 eth_hw_addr_set(dev, addr); 6825 } else { 6826 eth_hw_addr_random(dev); 6827 dev_info(&vdev->dev, "Assigned random MAC address %pM\n", 6828 dev->dev_addr); 6829 } 6830 6831 /* Set up our device-specific information */ 6832 vi = netdev_priv(dev); 6833 vi->dev = dev; 6834 vi->vdev = vdev; 6835 vdev->priv = vi; 6836 6837 INIT_WORK(&vi->config_work, virtnet_config_changed_work); 6838 INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work); 6839 spin_lock_init(&vi->refill_lock); 6840 6841 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { 6842 vi->mergeable_rx_bufs = true; 6843 dev->xdp_features |= NETDEV_XDP_ACT_RX_SG; 6844 } 6845 6846 if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) 6847 vi->has_rss_hash_report = true; 6848 6849 if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) { 6850 vi->has_rss = true; 6851 6852 vi->rss_indir_table_size = 6853 virtio_cread16(vdev, offsetof(struct virtio_net_config, 6854 rss_max_indirection_table_length)); 6855 } 6856 vi->rss_hdr = devm_kzalloc(&vdev->dev, virtnet_rss_hdr_size(vi), GFP_KERNEL); 6857 if (!vi->rss_hdr) { 6858 err = -ENOMEM; 6859 goto free; 6860 } 6861 6862 if (vi->has_rss || vi->has_rss_hash_report) { 6863 vi->rss_key_size = 6864 virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); 6865 if (vi->rss_key_size > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 6866 dev_err(&vdev->dev, "rss_max_key_size=%u exceeds the limit %u.\n", 6867 vi->rss_key_size, VIRTIO_NET_RSS_MAX_KEY_SIZE); 6868 err = -EINVAL; 6869 goto free; 6870 } 6871 6872 vi->rss_hash_types_supported = 6873 virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); 6874 vi->rss_hash_types_supported &= 6875 ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX | 6876 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 6877 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX); 6878 6879 dev->hw_features |= NETIF_F_RXHASH; 6880 dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops; 6881 } 6882 6883 if (vi->has_rss_hash_report) 6884 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); 6885 else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || 6886 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6887 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 6888 else 6889 vi->hdr_len = sizeof(struct virtio_net_hdr); 6890 6891 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || 6892 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6893 vi->any_header_sg = true; 6894 6895 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6896 vi->has_cvq = true; 6897 6898 mutex_init(&vi->cvq_lock); 6899 6900 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6901 mtu = virtio_cread16(vdev, 6902 offsetof(struct virtio_net_config, 6903 mtu)); 6904 if (mtu < dev->min_mtu) { 6905 /* Should never trigger: MTU was previously validated 6906 * in virtnet_validate. 6907 */ 6908 dev_err(&vdev->dev, 6909 "device MTU appears to have changed it is now %d < %d", 6910 mtu, dev->min_mtu); 6911 err = -EINVAL; 6912 goto free; 6913 } 6914 6915 dev->mtu = mtu; 6916 dev->max_mtu = mtu; 6917 } 6918 6919 virtnet_set_big_packets(vi, mtu); 6920 6921 if (vi->any_header_sg) 6922 dev->needed_headroom = vi->hdr_len; 6923 6924 /* Enable multiqueue by default */ 6925 if (num_online_cpus() >= max_queue_pairs) 6926 vi->curr_queue_pairs = max_queue_pairs; 6927 else 6928 vi->curr_queue_pairs = num_online_cpus(); 6929 vi->max_queue_pairs = max_queue_pairs; 6930 6931 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ 6932 err = init_vqs(vi); 6933 if (err) 6934 goto free; 6935 6936 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 6937 vi->intr_coal_rx.max_usecs = 0; 6938 vi->intr_coal_tx.max_usecs = 0; 6939 vi->intr_coal_rx.max_packets = 0; 6940 6941 /* Keep the default values of the coalescing parameters 6942 * aligned with the default napi_tx state. 6943 */ 6944 if (vi->sq[0].napi.weight) 6945 vi->intr_coal_tx.max_packets = 1; 6946 else 6947 vi->intr_coal_tx.max_packets = 0; 6948 } 6949 6950 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 6951 /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */ 6952 for (i = 0; i < vi->max_queue_pairs; i++) 6953 if (vi->sq[i].napi.weight) 6954 vi->sq[i].intr_coal.max_packets = 1; 6955 6956 err = virtnet_init_irq_moder(vi); 6957 if (err) 6958 goto free; 6959 } 6960 6961 #ifdef CONFIG_SYSFS 6962 if (vi->mergeable_rx_bufs) 6963 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; 6964 #endif 6965 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); 6966 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); 6967 6968 virtnet_init_settings(dev); 6969 6970 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 6971 vi->failover = net_failover_create(vi->dev); 6972 if (IS_ERR(vi->failover)) { 6973 err = PTR_ERR(vi->failover); 6974 goto free_vqs; 6975 } 6976 } 6977 6978 if (vi->has_rss || vi->has_rss_hash_report) 6979 virtnet_init_default_rss(vi); 6980 6981 enable_rx_mode_work(vi); 6982 6983 /* serialize netdev register + virtio_device_ready() with ndo_open() */ 6984 rtnl_lock(); 6985 6986 err = register_netdevice(dev); 6987 if (err) { 6988 pr_debug("virtio_net: registering device failed\n"); 6989 rtnl_unlock(); 6990 goto free_failover; 6991 } 6992 6993 /* Disable config change notification until ndo_open. */ 6994 virtio_config_driver_disable(vi->vdev); 6995 6996 virtio_device_ready(vdev); 6997 6998 if (vi->has_rss || vi->has_rss_hash_report) { 6999 if (!virtnet_commit_rss_command(vi)) { 7000 dev_warn(&vdev->dev, "RSS disabled because committing failed.\n"); 7001 dev->hw_features &= ~NETIF_F_RXHASH; 7002 vi->has_rss_hash_report = false; 7003 vi->has_rss = false; 7004 } 7005 } 7006 7007 virtnet_set_queues(vi, vi->curr_queue_pairs); 7008 7009 /* a random MAC address has been assigned, notify the device. 7010 * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there 7011 * because many devices work fine without getting MAC explicitly 7012 */ 7013 if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 7014 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 7015 struct scatterlist sg; 7016 7017 sg_init_one(&sg, dev->dev_addr, dev->addr_len); 7018 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 7019 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 7020 pr_debug("virtio_net: setting MAC address failed\n"); 7021 rtnl_unlock(); 7022 err = -EINVAL; 7023 goto free_unregister_netdev; 7024 } 7025 } 7026 7027 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) { 7028 struct virtio_net_stats_capabilities *stats_cap __free(kfree) = NULL; 7029 struct scatterlist sg; 7030 __le64 v; 7031 7032 stats_cap = kzalloc(sizeof(*stats_cap), GFP_KERNEL); 7033 if (!stats_cap) { 7034 rtnl_unlock(); 7035 err = -ENOMEM; 7036 goto free_unregister_netdev; 7037 } 7038 7039 sg_init_one(&sg, stats_cap, sizeof(*stats_cap)); 7040 7041 if (!virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 7042 VIRTIO_NET_CTRL_STATS_QUERY, 7043 NULL, &sg)) { 7044 pr_debug("virtio_net: fail to get stats capability\n"); 7045 rtnl_unlock(); 7046 err = -EINVAL; 7047 goto free_unregister_netdev; 7048 } 7049 7050 v = stats_cap->supported_stats_types[0]; 7051 vi->device_stats_cap = le64_to_cpu(v); 7052 } 7053 7054 /* Assume link up if device can't report link status, 7055 otherwise get link status from config. */ 7056 netif_carrier_off(dev); 7057 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 7058 virtnet_config_changed_work(&vi->config_work); 7059 } else { 7060 vi->status = VIRTIO_NET_S_LINK_UP; 7061 virtnet_update_settings(vi); 7062 netif_carrier_on(dev); 7063 } 7064 7065 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) 7066 if (virtio_has_feature(vi->vdev, guest_offloads[i])) 7067 set_bit(guest_offloads[i], &vi->guest_offloads); 7068 vi->guest_offloads_capable = vi->guest_offloads; 7069 7070 rtnl_unlock(); 7071 7072 err = virtnet_cpu_notif_add(vi); 7073 if (err) { 7074 pr_debug("virtio_net: registering cpu notifier failed\n"); 7075 goto free_unregister_netdev; 7076 } 7077 7078 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", 7079 dev->name, max_queue_pairs); 7080 7081 return 0; 7082 7083 free_unregister_netdev: 7084 unregister_netdev(dev); 7085 free_failover: 7086 net_failover_destroy(vi->failover); 7087 free_vqs: 7088 virtio_reset_device(vdev); 7089 cancel_delayed_work_sync(&vi->refill); 7090 free_receive_page_frags(vi); 7091 virtnet_del_vqs(vi); 7092 free: 7093 free_netdev(dev); 7094 return err; 7095 } 7096 7097 static void remove_vq_common(struct virtnet_info *vi) 7098 { 7099 int i; 7100 7101 virtio_reset_device(vi->vdev); 7102 7103 /* Free unused buffers in both send and recv, if any. */ 7104 free_unused_bufs(vi); 7105 7106 /* 7107 * Rule of thumb is netdev_tx_reset_queue() should follow any 7108 * skb freeing not followed by netdev_tx_completed_queue() 7109 */ 7110 for (i = 0; i < vi->max_queue_pairs; i++) 7111 netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, i)); 7112 7113 free_receive_bufs(vi); 7114 7115 free_receive_page_frags(vi); 7116 7117 virtnet_del_vqs(vi); 7118 } 7119 7120 static void virtnet_remove(struct virtio_device *vdev) 7121 { 7122 struct virtnet_info *vi = vdev->priv; 7123 7124 virtnet_cpu_notif_remove(vi); 7125 7126 /* Make sure no work handler is accessing the device. */ 7127 flush_work(&vi->config_work); 7128 disable_rx_mode_work(vi); 7129 flush_work(&vi->rx_mode_work); 7130 7131 virtnet_free_irq_moder(vi); 7132 7133 unregister_netdev(vi->dev); 7134 7135 net_failover_destroy(vi->failover); 7136 7137 remove_vq_common(vi); 7138 7139 free_netdev(vi->dev); 7140 } 7141 7142 static __maybe_unused int virtnet_freeze(struct virtio_device *vdev) 7143 { 7144 struct virtnet_info *vi = vdev->priv; 7145 7146 virtnet_cpu_notif_remove(vi); 7147 virtnet_freeze_down(vdev); 7148 remove_vq_common(vi); 7149 7150 return 0; 7151 } 7152 7153 static __maybe_unused int virtnet_restore(struct virtio_device *vdev) 7154 { 7155 struct virtnet_info *vi = vdev->priv; 7156 int err; 7157 7158 err = virtnet_restore_up(vdev); 7159 if (err) 7160 return err; 7161 virtnet_set_queues(vi, vi->curr_queue_pairs); 7162 7163 err = virtnet_cpu_notif_add(vi); 7164 if (err) { 7165 virtnet_freeze_down(vdev); 7166 remove_vq_common(vi); 7167 return err; 7168 } 7169 7170 return 0; 7171 } 7172 7173 static struct virtio_device_id id_table[] = { 7174 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 7175 { 0 }, 7176 }; 7177 7178 #define VIRTNET_FEATURES \ 7179 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ 7180 VIRTIO_NET_F_MAC, \ 7181 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ 7182 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ 7183 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ 7184 VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \ 7185 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ 7186 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ 7187 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ 7188 VIRTIO_NET_F_CTRL_MAC_ADDR, \ 7189 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ 7190 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ 7191 VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ 7192 VIRTIO_NET_F_VQ_NOTF_COAL, \ 7193 VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS 7194 7195 static unsigned int features[] = { 7196 VIRTNET_FEATURES, 7197 }; 7198 7199 static unsigned int features_legacy[] = { 7200 VIRTNET_FEATURES, 7201 VIRTIO_NET_F_GSO, 7202 VIRTIO_F_ANY_LAYOUT, 7203 }; 7204 7205 static struct virtio_driver virtio_net_driver = { 7206 .feature_table = features, 7207 .feature_table_size = ARRAY_SIZE(features), 7208 .feature_table_legacy = features_legacy, 7209 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 7210 .driver.name = KBUILD_MODNAME, 7211 .id_table = id_table, 7212 .validate = virtnet_validate, 7213 .probe = virtnet_probe, 7214 .remove = virtnet_remove, 7215 .config_changed = virtnet_config_changed, 7216 #ifdef CONFIG_PM_SLEEP 7217 .freeze = virtnet_freeze, 7218 .restore = virtnet_restore, 7219 #endif 7220 }; 7221 7222 static __init int virtio_net_driver_init(void) 7223 { 7224 int ret; 7225 7226 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online", 7227 virtnet_cpu_online, 7228 virtnet_cpu_down_prep); 7229 if (ret < 0) 7230 goto out; 7231 virtionet_online = ret; 7232 ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead", 7233 NULL, virtnet_cpu_dead); 7234 if (ret) 7235 goto err_dead; 7236 ret = register_virtio_driver(&virtio_net_driver); 7237 if (ret) 7238 goto err_virtio; 7239 return 0; 7240 err_virtio: 7241 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 7242 err_dead: 7243 cpuhp_remove_multi_state(virtionet_online); 7244 out: 7245 return ret; 7246 } 7247 module_init(virtio_net_driver_init); 7248 7249 static __exit void virtio_net_driver_exit(void) 7250 { 7251 unregister_virtio_driver(&virtio_net_driver); 7252 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 7253 cpuhp_remove_multi_state(virtionet_online); 7254 } 7255 module_exit(virtio_net_driver_exit); 7256 7257 MODULE_DEVICE_TABLE(virtio, id_table); 7258 MODULE_DESCRIPTION("Virtio network driver"); 7259 MODULE_LICENSE("GPL"); 7260