1 /* 2 * Back-end of the driver for virtual network devices. This portion of the 3 * driver exports a 'unified' network-device interface that can be accessed 4 * by any operating system that implements a compatible front end. A 5 * reference front-end implementation can be found in: 6 * drivers/net/xen-netfront.c 7 * 8 * Copyright (c) 2002-2005, K A Fraser 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License version 2 12 * as published by the Free Software Foundation; or, when distributed 13 * separately from the Linux kernel or incorporated into other 14 * software packages, subject to the following license: 15 * 16 * Permission is hereby granted, free of charge, to any person obtaining a copy 17 * of this source file (the "Software"), to deal in the Software without 18 * restriction, including without limitation the rights to use, copy, modify, 19 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 20 * and to permit persons to whom the Software is furnished to do so, subject to 21 * the following conditions: 22 * 23 * The above copyright notice and this permission notice shall be included in 24 * all copies or substantial portions of the Software. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 32 * IN THE SOFTWARE. 33 */ 34 35 #include "common.h" 36 37 #include <linux/kthread.h> 38 #include <linux/if_vlan.h> 39 #include <linux/udp.h> 40 #include <linux/highmem.h> 41 42 #include <net/tcp.h> 43 44 #include <xen/xen.h> 45 #include <xen/events.h> 46 #include <xen/interface/memory.h> 47 #include <xen/page.h> 48 49 #include <asm/xen/hypercall.h> 50 51 /* Provide an option to disable split event channels at load time as 52 * event channels are limited resource. Split event channels are 53 * enabled by default. 54 */ 55 bool separate_tx_rx_irq = true; 56 module_param(separate_tx_rx_irq, bool, 0644); 57 58 /* The time that packets can stay on the guest Rx internal queue 59 * before they are dropped. 60 */ 61 unsigned int rx_drain_timeout_msecs = 10000; 62 module_param(rx_drain_timeout_msecs, uint, 0444); 63 64 /* The length of time before the frontend is considered unresponsive 65 * because it isn't providing Rx slots. 66 */ 67 unsigned int rx_stall_timeout_msecs = 60000; 68 module_param(rx_stall_timeout_msecs, uint, 0444); 69 70 #define MAX_QUEUES_DEFAULT 8 71 unsigned int xenvif_max_queues; 72 module_param_named(max_queues, xenvif_max_queues, uint, 0644); 73 MODULE_PARM_DESC(max_queues, 74 "Maximum number of queues per virtual interface"); 75 76 /* 77 * This is the maximum slots a skb can have. If a guest sends a skb 78 * which exceeds this limit it is considered malicious. 79 */ 80 #define FATAL_SKB_SLOTS_DEFAULT 20 81 static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT; 82 module_param(fatal_skb_slots, uint, 0444); 83 84 /* The amount to copy out of the first guest Tx slot into the skb's 85 * linear area. If the first slot has more data, it will be mapped 86 * and put into the first frag. 87 * 88 * This is sized to avoid pulling headers from the frags for most 89 * TCP/IP packets. 90 */ 91 #define XEN_NETBACK_TX_COPY_LEN 128 92 93 /* This is the maximum number of flows in the hash cache. */ 94 #define XENVIF_HASH_CACHE_SIZE_DEFAULT 64 95 unsigned int xenvif_hash_cache_size = XENVIF_HASH_CACHE_SIZE_DEFAULT; 96 module_param_named(hash_cache_size, xenvif_hash_cache_size, uint, 0644); 97 MODULE_PARM_DESC(hash_cache_size, "Number of flows in the hash cache"); 98 99 static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, 100 u8 status); 101 102 static void make_tx_response(struct xenvif_queue *queue, 103 struct xen_netif_tx_request *txp, 104 unsigned int extra_count, 105 s8 st); 106 static void push_tx_responses(struct xenvif_queue *queue); 107 108 static inline int tx_work_todo(struct xenvif_queue *queue); 109 110 static inline unsigned long idx_to_pfn(struct xenvif_queue *queue, 111 u16 idx) 112 { 113 return page_to_pfn(queue->mmap_pages[idx]); 114 } 115 116 static inline unsigned long idx_to_kaddr(struct xenvif_queue *queue, 117 u16 idx) 118 { 119 return (unsigned long)pfn_to_kaddr(idx_to_pfn(queue, idx)); 120 } 121 122 #define callback_param(vif, pending_idx) \ 123 (vif->pending_tx_info[pending_idx].callback_struct) 124 125 /* Find the containing VIF's structure from a pointer in pending_tx_info array 126 */ 127 static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info *ubuf) 128 { 129 u16 pending_idx = ubuf->desc; 130 struct pending_tx_info *temp = 131 container_of(ubuf, struct pending_tx_info, callback_struct); 132 return container_of(temp - pending_idx, 133 struct xenvif_queue, 134 pending_tx_info[0]); 135 } 136 137 static u16 frag_get_pending_idx(skb_frag_t *frag) 138 { 139 return (u16)frag->page_offset; 140 } 141 142 static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx) 143 { 144 frag->page_offset = pending_idx; 145 } 146 147 static inline pending_ring_idx_t pending_index(unsigned i) 148 { 149 return i & (MAX_PENDING_REQS-1); 150 } 151 152 void xenvif_kick_thread(struct xenvif_queue *queue) 153 { 154 wake_up(&queue->wq); 155 } 156 157 void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue) 158 { 159 int more_to_do; 160 161 RING_FINAL_CHECK_FOR_REQUESTS(&queue->tx, more_to_do); 162 163 if (more_to_do) 164 napi_schedule(&queue->napi); 165 } 166 167 static void tx_add_credit(struct xenvif_queue *queue) 168 { 169 unsigned long max_burst, max_credit; 170 171 /* 172 * Allow a burst big enough to transmit a jumbo packet of up to 128kB. 173 * Otherwise the interface can seize up due to insufficient credit. 174 */ 175 max_burst = max(131072UL, queue->credit_bytes); 176 177 /* Take care that adding a new chunk of credit doesn't wrap to zero. */ 178 max_credit = queue->remaining_credit + queue->credit_bytes; 179 if (max_credit < queue->remaining_credit) 180 max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */ 181 182 queue->remaining_credit = min(max_credit, max_burst); 183 } 184 185 void xenvif_tx_credit_callback(unsigned long data) 186 { 187 struct xenvif_queue *queue = (struct xenvif_queue *)data; 188 tx_add_credit(queue); 189 xenvif_napi_schedule_or_enable_events(queue); 190 } 191 192 static void xenvif_tx_err(struct xenvif_queue *queue, 193 struct xen_netif_tx_request *txp, 194 unsigned int extra_count, RING_IDX end) 195 { 196 RING_IDX cons = queue->tx.req_cons; 197 unsigned long flags; 198 199 do { 200 spin_lock_irqsave(&queue->response_lock, flags); 201 make_tx_response(queue, txp, extra_count, XEN_NETIF_RSP_ERROR); 202 push_tx_responses(queue); 203 spin_unlock_irqrestore(&queue->response_lock, flags); 204 if (cons == end) 205 break; 206 RING_COPY_REQUEST(&queue->tx, cons++, txp); 207 extra_count = 0; /* only the first frag can have extras */ 208 } while (1); 209 queue->tx.req_cons = cons; 210 } 211 212 static void xenvif_fatal_tx_err(struct xenvif *vif) 213 { 214 netdev_err(vif->dev, "fatal error; disabling device\n"); 215 vif->disabled = true; 216 /* Disable the vif from queue 0's kthread */ 217 if (vif->num_queues) 218 xenvif_kick_thread(&vif->queues[0]); 219 } 220 221 static int xenvif_count_requests(struct xenvif_queue *queue, 222 struct xen_netif_tx_request *first, 223 unsigned int extra_count, 224 struct xen_netif_tx_request *txp, 225 int work_to_do) 226 { 227 RING_IDX cons = queue->tx.req_cons; 228 int slots = 0; 229 int drop_err = 0; 230 int more_data; 231 232 if (!(first->flags & XEN_NETTXF_more_data)) 233 return 0; 234 235 do { 236 struct xen_netif_tx_request dropped_tx = { 0 }; 237 238 if (slots >= work_to_do) { 239 netdev_err(queue->vif->dev, 240 "Asked for %d slots but exceeds this limit\n", 241 work_to_do); 242 xenvif_fatal_tx_err(queue->vif); 243 return -ENODATA; 244 } 245 246 /* This guest is really using too many slots and 247 * considered malicious. 248 */ 249 if (unlikely(slots >= fatal_skb_slots)) { 250 netdev_err(queue->vif->dev, 251 "Malicious frontend using %d slots, threshold %u\n", 252 slots, fatal_skb_slots); 253 xenvif_fatal_tx_err(queue->vif); 254 return -E2BIG; 255 } 256 257 /* Xen network protocol had implicit dependency on 258 * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to 259 * the historical MAX_SKB_FRAGS value 18 to honor the 260 * same behavior as before. Any packet using more than 261 * 18 slots but less than fatal_skb_slots slots is 262 * dropped 263 */ 264 if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) { 265 if (net_ratelimit()) 266 netdev_dbg(queue->vif->dev, 267 "Too many slots (%d) exceeding limit (%d), dropping packet\n", 268 slots, XEN_NETBK_LEGACY_SLOTS_MAX); 269 drop_err = -E2BIG; 270 } 271 272 if (drop_err) 273 txp = &dropped_tx; 274 275 RING_COPY_REQUEST(&queue->tx, cons + slots, txp); 276 277 /* If the guest submitted a frame >= 64 KiB then 278 * first->size overflowed and following slots will 279 * appear to be larger than the frame. 280 * 281 * This cannot be fatal error as there are buggy 282 * frontends that do this. 283 * 284 * Consume all slots and drop the packet. 285 */ 286 if (!drop_err && txp->size > first->size) { 287 if (net_ratelimit()) 288 netdev_dbg(queue->vif->dev, 289 "Invalid tx request, slot size %u > remaining size %u\n", 290 txp->size, first->size); 291 drop_err = -EIO; 292 } 293 294 first->size -= txp->size; 295 slots++; 296 297 if (unlikely((txp->offset + txp->size) > XEN_PAGE_SIZE)) { 298 netdev_err(queue->vif->dev, "Cross page boundary, txp->offset: %u, size: %u\n", 299 txp->offset, txp->size); 300 xenvif_fatal_tx_err(queue->vif); 301 return -EINVAL; 302 } 303 304 more_data = txp->flags & XEN_NETTXF_more_data; 305 306 if (!drop_err) 307 txp++; 308 309 } while (more_data); 310 311 if (drop_err) { 312 xenvif_tx_err(queue, first, extra_count, cons + slots); 313 return drop_err; 314 } 315 316 return slots; 317 } 318 319 320 struct xenvif_tx_cb { 321 u16 pending_idx; 322 }; 323 324 #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb) 325 326 static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue, 327 u16 pending_idx, 328 struct xen_netif_tx_request *txp, 329 unsigned int extra_count, 330 struct gnttab_map_grant_ref *mop) 331 { 332 queue->pages_to_map[mop-queue->tx_map_ops] = queue->mmap_pages[pending_idx]; 333 gnttab_set_map_op(mop, idx_to_kaddr(queue, pending_idx), 334 GNTMAP_host_map | GNTMAP_readonly, 335 txp->gref, queue->vif->domid); 336 337 memcpy(&queue->pending_tx_info[pending_idx].req, txp, 338 sizeof(*txp)); 339 queue->pending_tx_info[pending_idx].extra_count = extra_count; 340 } 341 342 static inline struct sk_buff *xenvif_alloc_skb(unsigned int size) 343 { 344 struct sk_buff *skb = 345 alloc_skb(size + NET_SKB_PAD + NET_IP_ALIGN, 346 GFP_ATOMIC | __GFP_NOWARN); 347 if (unlikely(skb == NULL)) 348 return NULL; 349 350 /* Packets passed to netif_rx() must have some headroom. */ 351 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); 352 353 /* Initialize it here to avoid later surprises */ 354 skb_shinfo(skb)->destructor_arg = NULL; 355 356 return skb; 357 } 358 359 static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue, 360 struct sk_buff *skb, 361 struct xen_netif_tx_request *txp, 362 struct gnttab_map_grant_ref *gop, 363 unsigned int frag_overflow, 364 struct sk_buff *nskb) 365 { 366 struct skb_shared_info *shinfo = skb_shinfo(skb); 367 skb_frag_t *frags = shinfo->frags; 368 u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; 369 int start; 370 pending_ring_idx_t index; 371 unsigned int nr_slots; 372 373 nr_slots = shinfo->nr_frags; 374 375 /* Skip first skb fragment if it is on same page as header fragment. */ 376 start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); 377 378 for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots; 379 shinfo->nr_frags++, txp++, gop++) { 380 index = pending_index(queue->pending_cons++); 381 pending_idx = queue->pending_ring[index]; 382 xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop); 383 frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); 384 } 385 386 if (frag_overflow) { 387 388 shinfo = skb_shinfo(nskb); 389 frags = shinfo->frags; 390 391 for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow; 392 shinfo->nr_frags++, txp++, gop++) { 393 index = pending_index(queue->pending_cons++); 394 pending_idx = queue->pending_ring[index]; 395 xenvif_tx_create_map_op(queue, pending_idx, txp, 0, 396 gop); 397 frag_set_pending_idx(&frags[shinfo->nr_frags], 398 pending_idx); 399 } 400 401 skb_shinfo(skb)->frag_list = nskb; 402 } 403 404 return gop; 405 } 406 407 static inline void xenvif_grant_handle_set(struct xenvif_queue *queue, 408 u16 pending_idx, 409 grant_handle_t handle) 410 { 411 if (unlikely(queue->grant_tx_handle[pending_idx] != 412 NETBACK_INVALID_HANDLE)) { 413 netdev_err(queue->vif->dev, 414 "Trying to overwrite active handle! pending_idx: 0x%x\n", 415 pending_idx); 416 BUG(); 417 } 418 queue->grant_tx_handle[pending_idx] = handle; 419 } 420 421 static inline void xenvif_grant_handle_reset(struct xenvif_queue *queue, 422 u16 pending_idx) 423 { 424 if (unlikely(queue->grant_tx_handle[pending_idx] == 425 NETBACK_INVALID_HANDLE)) { 426 netdev_err(queue->vif->dev, 427 "Trying to unmap invalid handle! pending_idx: 0x%x\n", 428 pending_idx); 429 BUG(); 430 } 431 queue->grant_tx_handle[pending_idx] = NETBACK_INVALID_HANDLE; 432 } 433 434 static int xenvif_tx_check_gop(struct xenvif_queue *queue, 435 struct sk_buff *skb, 436 struct gnttab_map_grant_ref **gopp_map, 437 struct gnttab_copy **gopp_copy) 438 { 439 struct gnttab_map_grant_ref *gop_map = *gopp_map; 440 u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; 441 /* This always points to the shinfo of the skb being checked, which 442 * could be either the first or the one on the frag_list 443 */ 444 struct skb_shared_info *shinfo = skb_shinfo(skb); 445 /* If this is non-NULL, we are currently checking the frag_list skb, and 446 * this points to the shinfo of the first one 447 */ 448 struct skb_shared_info *first_shinfo = NULL; 449 int nr_frags = shinfo->nr_frags; 450 const bool sharedslot = nr_frags && 451 frag_get_pending_idx(&shinfo->frags[0]) == pending_idx; 452 int i, err; 453 454 /* Check status of header. */ 455 err = (*gopp_copy)->status; 456 if (unlikely(err)) { 457 if (net_ratelimit()) 458 netdev_dbg(queue->vif->dev, 459 "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", 460 (*gopp_copy)->status, 461 pending_idx, 462 (*gopp_copy)->source.u.ref); 463 /* The first frag might still have this slot mapped */ 464 if (!sharedslot) 465 xenvif_idx_release(queue, pending_idx, 466 XEN_NETIF_RSP_ERROR); 467 } 468 (*gopp_copy)++; 469 470 check_frags: 471 for (i = 0; i < nr_frags; i++, gop_map++) { 472 int j, newerr; 473 474 pending_idx = frag_get_pending_idx(&shinfo->frags[i]); 475 476 /* Check error status: if okay then remember grant handle. */ 477 newerr = gop_map->status; 478 479 if (likely(!newerr)) { 480 xenvif_grant_handle_set(queue, 481 pending_idx, 482 gop_map->handle); 483 /* Had a previous error? Invalidate this fragment. */ 484 if (unlikely(err)) { 485 xenvif_idx_unmap(queue, pending_idx); 486 /* If the mapping of the first frag was OK, but 487 * the header's copy failed, and they are 488 * sharing a slot, send an error 489 */ 490 if (i == 0 && sharedslot) 491 xenvif_idx_release(queue, pending_idx, 492 XEN_NETIF_RSP_ERROR); 493 else 494 xenvif_idx_release(queue, pending_idx, 495 XEN_NETIF_RSP_OKAY); 496 } 497 continue; 498 } 499 500 /* Error on this fragment: respond to client with an error. */ 501 if (net_ratelimit()) 502 netdev_dbg(queue->vif->dev, 503 "Grant map of %d. frag failed! status: %d pending_idx: %u ref: %u\n", 504 i, 505 gop_map->status, 506 pending_idx, 507 gop_map->ref); 508 509 xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR); 510 511 /* Not the first error? Preceding frags already invalidated. */ 512 if (err) 513 continue; 514 515 /* First error: if the header haven't shared a slot with the 516 * first frag, release it as well. 517 */ 518 if (!sharedslot) 519 xenvif_idx_release(queue, 520 XENVIF_TX_CB(skb)->pending_idx, 521 XEN_NETIF_RSP_OKAY); 522 523 /* Invalidate preceding fragments of this skb. */ 524 for (j = 0; j < i; j++) { 525 pending_idx = frag_get_pending_idx(&shinfo->frags[j]); 526 xenvif_idx_unmap(queue, pending_idx); 527 xenvif_idx_release(queue, pending_idx, 528 XEN_NETIF_RSP_OKAY); 529 } 530 531 /* And if we found the error while checking the frag_list, unmap 532 * the first skb's frags 533 */ 534 if (first_shinfo) { 535 for (j = 0; j < first_shinfo->nr_frags; j++) { 536 pending_idx = frag_get_pending_idx(&first_shinfo->frags[j]); 537 xenvif_idx_unmap(queue, pending_idx); 538 xenvif_idx_release(queue, pending_idx, 539 XEN_NETIF_RSP_OKAY); 540 } 541 } 542 543 /* Remember the error: invalidate all subsequent fragments. */ 544 err = newerr; 545 } 546 547 if (skb_has_frag_list(skb) && !first_shinfo) { 548 first_shinfo = skb_shinfo(skb); 549 shinfo = skb_shinfo(skb_shinfo(skb)->frag_list); 550 nr_frags = shinfo->nr_frags; 551 552 goto check_frags; 553 } 554 555 *gopp_map = gop_map; 556 return err; 557 } 558 559 static void xenvif_fill_frags(struct xenvif_queue *queue, struct sk_buff *skb) 560 { 561 struct skb_shared_info *shinfo = skb_shinfo(skb); 562 int nr_frags = shinfo->nr_frags; 563 int i; 564 u16 prev_pending_idx = INVALID_PENDING_IDX; 565 566 for (i = 0; i < nr_frags; i++) { 567 skb_frag_t *frag = shinfo->frags + i; 568 struct xen_netif_tx_request *txp; 569 struct page *page; 570 u16 pending_idx; 571 572 pending_idx = frag_get_pending_idx(frag); 573 574 /* If this is not the first frag, chain it to the previous*/ 575 if (prev_pending_idx == INVALID_PENDING_IDX) 576 skb_shinfo(skb)->destructor_arg = 577 &callback_param(queue, pending_idx); 578 else 579 callback_param(queue, prev_pending_idx).ctx = 580 &callback_param(queue, pending_idx); 581 582 callback_param(queue, pending_idx).ctx = NULL; 583 prev_pending_idx = pending_idx; 584 585 txp = &queue->pending_tx_info[pending_idx].req; 586 page = virt_to_page(idx_to_kaddr(queue, pending_idx)); 587 __skb_fill_page_desc(skb, i, page, txp->offset, txp->size); 588 skb->len += txp->size; 589 skb->data_len += txp->size; 590 skb->truesize += txp->size; 591 592 /* Take an extra reference to offset network stack's put_page */ 593 get_page(queue->mmap_pages[pending_idx]); 594 } 595 } 596 597 static int xenvif_get_extras(struct xenvif_queue *queue, 598 struct xen_netif_extra_info *extras, 599 unsigned int *extra_count, 600 int work_to_do) 601 { 602 struct xen_netif_extra_info extra; 603 RING_IDX cons = queue->tx.req_cons; 604 605 do { 606 if (unlikely(work_to_do-- <= 0)) { 607 netdev_err(queue->vif->dev, "Missing extra info\n"); 608 xenvif_fatal_tx_err(queue->vif); 609 return -EBADR; 610 } 611 612 RING_COPY_REQUEST(&queue->tx, cons, &extra); 613 614 queue->tx.req_cons = ++cons; 615 (*extra_count)++; 616 617 if (unlikely(!extra.type || 618 extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { 619 netdev_err(queue->vif->dev, 620 "Invalid extra type: %d\n", extra.type); 621 xenvif_fatal_tx_err(queue->vif); 622 return -EINVAL; 623 } 624 625 memcpy(&extras[extra.type - 1], &extra, sizeof(extra)); 626 } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); 627 628 return work_to_do; 629 } 630 631 static int xenvif_set_skb_gso(struct xenvif *vif, 632 struct sk_buff *skb, 633 struct xen_netif_extra_info *gso) 634 { 635 if (!gso->u.gso.size) { 636 netdev_err(vif->dev, "GSO size must not be zero.\n"); 637 xenvif_fatal_tx_err(vif); 638 return -EINVAL; 639 } 640 641 switch (gso->u.gso.type) { 642 case XEN_NETIF_GSO_TYPE_TCPV4: 643 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 644 break; 645 case XEN_NETIF_GSO_TYPE_TCPV6: 646 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; 647 break; 648 default: 649 netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type); 650 xenvif_fatal_tx_err(vif); 651 return -EINVAL; 652 } 653 654 skb_shinfo(skb)->gso_size = gso->u.gso.size; 655 /* gso_segs will be calculated later */ 656 657 return 0; 658 } 659 660 static int checksum_setup(struct xenvif_queue *queue, struct sk_buff *skb) 661 { 662 bool recalculate_partial_csum = false; 663 664 /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy 665 * peers can fail to set NETRXF_csum_blank when sending a GSO 666 * frame. In this case force the SKB to CHECKSUM_PARTIAL and 667 * recalculate the partial checksum. 668 */ 669 if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { 670 queue->stats.rx_gso_checksum_fixup++; 671 skb->ip_summed = CHECKSUM_PARTIAL; 672 recalculate_partial_csum = true; 673 } 674 675 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ 676 if (skb->ip_summed != CHECKSUM_PARTIAL) 677 return 0; 678 679 return skb_checksum_setup(skb, recalculate_partial_csum); 680 } 681 682 static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size) 683 { 684 u64 now = get_jiffies_64(); 685 u64 next_credit = queue->credit_window_start + 686 msecs_to_jiffies(queue->credit_usec / 1000); 687 688 /* Timer could already be pending in rare cases. */ 689 if (timer_pending(&queue->credit_timeout)) 690 return true; 691 692 /* Passed the point where we can replenish credit? */ 693 if (time_after_eq64(now, next_credit)) { 694 queue->credit_window_start = now; 695 tx_add_credit(queue); 696 } 697 698 /* Still too big to send right now? Set a callback. */ 699 if (size > queue->remaining_credit) { 700 queue->credit_timeout.data = 701 (unsigned long)queue; 702 mod_timer(&queue->credit_timeout, 703 next_credit); 704 queue->credit_window_start = next_credit; 705 706 return true; 707 } 708 709 return false; 710 } 711 712 /* No locking is required in xenvif_mcast_add/del() as they are 713 * only ever invoked from NAPI poll. An RCU list is used because 714 * xenvif_mcast_match() is called asynchronously, during start_xmit. 715 */ 716 717 static int xenvif_mcast_add(struct xenvif *vif, const u8 *addr) 718 { 719 struct xenvif_mcast_addr *mcast; 720 721 if (vif->fe_mcast_count == XEN_NETBK_MCAST_MAX) { 722 if (net_ratelimit()) 723 netdev_err(vif->dev, 724 "Too many multicast addresses\n"); 725 return -ENOSPC; 726 } 727 728 mcast = kzalloc(sizeof(*mcast), GFP_ATOMIC); 729 if (!mcast) 730 return -ENOMEM; 731 732 ether_addr_copy(mcast->addr, addr); 733 list_add_tail_rcu(&mcast->entry, &vif->fe_mcast_addr); 734 vif->fe_mcast_count++; 735 736 return 0; 737 } 738 739 static void xenvif_mcast_del(struct xenvif *vif, const u8 *addr) 740 { 741 struct xenvif_mcast_addr *mcast; 742 743 list_for_each_entry_rcu(mcast, &vif->fe_mcast_addr, entry) { 744 if (ether_addr_equal(addr, mcast->addr)) { 745 --vif->fe_mcast_count; 746 list_del_rcu(&mcast->entry); 747 kfree_rcu(mcast, rcu); 748 break; 749 } 750 } 751 } 752 753 bool xenvif_mcast_match(struct xenvif *vif, const u8 *addr) 754 { 755 struct xenvif_mcast_addr *mcast; 756 757 rcu_read_lock(); 758 list_for_each_entry_rcu(mcast, &vif->fe_mcast_addr, entry) { 759 if (ether_addr_equal(addr, mcast->addr)) { 760 rcu_read_unlock(); 761 return true; 762 } 763 } 764 rcu_read_unlock(); 765 766 return false; 767 } 768 769 void xenvif_mcast_addr_list_free(struct xenvif *vif) 770 { 771 /* No need for locking or RCU here. NAPI poll and TX queue 772 * are stopped. 773 */ 774 while (!list_empty(&vif->fe_mcast_addr)) { 775 struct xenvif_mcast_addr *mcast; 776 777 mcast = list_first_entry(&vif->fe_mcast_addr, 778 struct xenvif_mcast_addr, 779 entry); 780 --vif->fe_mcast_count; 781 list_del(&mcast->entry); 782 kfree(mcast); 783 } 784 } 785 786 static void xenvif_tx_build_gops(struct xenvif_queue *queue, 787 int budget, 788 unsigned *copy_ops, 789 unsigned *map_ops) 790 { 791 struct gnttab_map_grant_ref *gop = queue->tx_map_ops; 792 struct sk_buff *skb, *nskb; 793 int ret; 794 unsigned int frag_overflow; 795 796 while (skb_queue_len(&queue->tx_queue) < budget) { 797 struct xen_netif_tx_request txreq; 798 struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX]; 799 struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; 800 unsigned int extra_count; 801 u16 pending_idx; 802 RING_IDX idx; 803 int work_to_do; 804 unsigned int data_len; 805 pending_ring_idx_t index; 806 807 if (queue->tx.sring->req_prod - queue->tx.req_cons > 808 XEN_NETIF_TX_RING_SIZE) { 809 netdev_err(queue->vif->dev, 810 "Impossible number of requests. " 811 "req_prod %d, req_cons %d, size %ld\n", 812 queue->tx.sring->req_prod, queue->tx.req_cons, 813 XEN_NETIF_TX_RING_SIZE); 814 xenvif_fatal_tx_err(queue->vif); 815 break; 816 } 817 818 work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx); 819 if (!work_to_do) 820 break; 821 822 idx = queue->tx.req_cons; 823 rmb(); /* Ensure that we see the request before we copy it. */ 824 RING_COPY_REQUEST(&queue->tx, idx, &txreq); 825 826 /* Credit-based scheduling. */ 827 if (txreq.size > queue->remaining_credit && 828 tx_credit_exceeded(queue, txreq.size)) 829 break; 830 831 queue->remaining_credit -= txreq.size; 832 833 work_to_do--; 834 queue->tx.req_cons = ++idx; 835 836 memset(extras, 0, sizeof(extras)); 837 extra_count = 0; 838 if (txreq.flags & XEN_NETTXF_extra_info) { 839 work_to_do = xenvif_get_extras(queue, extras, 840 &extra_count, 841 work_to_do); 842 idx = queue->tx.req_cons; 843 if (unlikely(work_to_do < 0)) 844 break; 845 } 846 847 if (extras[XEN_NETIF_EXTRA_TYPE_MCAST_ADD - 1].type) { 848 struct xen_netif_extra_info *extra; 849 850 extra = &extras[XEN_NETIF_EXTRA_TYPE_MCAST_ADD - 1]; 851 ret = xenvif_mcast_add(queue->vif, extra->u.mcast.addr); 852 853 make_tx_response(queue, &txreq, extra_count, 854 (ret == 0) ? 855 XEN_NETIF_RSP_OKAY : 856 XEN_NETIF_RSP_ERROR); 857 push_tx_responses(queue); 858 continue; 859 } 860 861 if (extras[XEN_NETIF_EXTRA_TYPE_MCAST_DEL - 1].type) { 862 struct xen_netif_extra_info *extra; 863 864 extra = &extras[XEN_NETIF_EXTRA_TYPE_MCAST_DEL - 1]; 865 xenvif_mcast_del(queue->vif, extra->u.mcast.addr); 866 867 make_tx_response(queue, &txreq, extra_count, 868 XEN_NETIF_RSP_OKAY); 869 push_tx_responses(queue); 870 continue; 871 } 872 873 ret = xenvif_count_requests(queue, &txreq, extra_count, 874 txfrags, work_to_do); 875 if (unlikely(ret < 0)) 876 break; 877 878 idx += ret; 879 880 if (unlikely(txreq.size < ETH_HLEN)) { 881 netdev_dbg(queue->vif->dev, 882 "Bad packet size: %d\n", txreq.size); 883 xenvif_tx_err(queue, &txreq, extra_count, idx); 884 break; 885 } 886 887 /* No crossing a page as the payload mustn't fragment. */ 888 if (unlikely((txreq.offset + txreq.size) > XEN_PAGE_SIZE)) { 889 netdev_err(queue->vif->dev, 890 "txreq.offset: %u, size: %u, end: %lu\n", 891 txreq.offset, txreq.size, 892 (unsigned long)(txreq.offset&~XEN_PAGE_MASK) + txreq.size); 893 xenvif_fatal_tx_err(queue->vif); 894 break; 895 } 896 897 index = pending_index(queue->pending_cons); 898 pending_idx = queue->pending_ring[index]; 899 900 data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN && 901 ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? 902 XEN_NETBACK_TX_COPY_LEN : txreq.size; 903 904 skb = xenvif_alloc_skb(data_len); 905 if (unlikely(skb == NULL)) { 906 netdev_dbg(queue->vif->dev, 907 "Can't allocate a skb in start_xmit.\n"); 908 xenvif_tx_err(queue, &txreq, extra_count, idx); 909 break; 910 } 911 912 skb_shinfo(skb)->nr_frags = ret; 913 if (data_len < txreq.size) 914 skb_shinfo(skb)->nr_frags++; 915 /* At this point shinfo->nr_frags is in fact the number of 916 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. 917 */ 918 frag_overflow = 0; 919 nskb = NULL; 920 if (skb_shinfo(skb)->nr_frags > MAX_SKB_FRAGS) { 921 frag_overflow = skb_shinfo(skb)->nr_frags - MAX_SKB_FRAGS; 922 BUG_ON(frag_overflow > MAX_SKB_FRAGS); 923 skb_shinfo(skb)->nr_frags = MAX_SKB_FRAGS; 924 nskb = xenvif_alloc_skb(0); 925 if (unlikely(nskb == NULL)) { 926 kfree_skb(skb); 927 xenvif_tx_err(queue, &txreq, extra_count, idx); 928 if (net_ratelimit()) 929 netdev_err(queue->vif->dev, 930 "Can't allocate the frag_list skb.\n"); 931 break; 932 } 933 } 934 935 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { 936 struct xen_netif_extra_info *gso; 937 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; 938 939 if (xenvif_set_skb_gso(queue->vif, skb, gso)) { 940 /* Failure in xenvif_set_skb_gso is fatal. */ 941 kfree_skb(skb); 942 kfree_skb(nskb); 943 break; 944 } 945 } 946 947 if (extras[XEN_NETIF_EXTRA_TYPE_HASH - 1].type) { 948 struct xen_netif_extra_info *extra; 949 enum pkt_hash_types type = PKT_HASH_TYPE_NONE; 950 951 extra = &extras[XEN_NETIF_EXTRA_TYPE_HASH - 1]; 952 953 switch (extra->u.hash.type) { 954 case _XEN_NETIF_CTRL_HASH_TYPE_IPV4: 955 case _XEN_NETIF_CTRL_HASH_TYPE_IPV6: 956 type = PKT_HASH_TYPE_L3; 957 break; 958 959 case _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP: 960 case _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP: 961 type = PKT_HASH_TYPE_L4; 962 break; 963 964 default: 965 break; 966 } 967 968 if (type != PKT_HASH_TYPE_NONE) 969 skb_set_hash(skb, 970 *(u32 *)extra->u.hash.value, 971 type); 972 } 973 974 XENVIF_TX_CB(skb)->pending_idx = pending_idx; 975 976 __skb_put(skb, data_len); 977 queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref; 978 queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid; 979 queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset; 980 981 queue->tx_copy_ops[*copy_ops].dest.u.gmfn = 982 virt_to_gfn(skb->data); 983 queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF; 984 queue->tx_copy_ops[*copy_ops].dest.offset = 985 offset_in_page(skb->data) & ~XEN_PAGE_MASK; 986 987 queue->tx_copy_ops[*copy_ops].len = data_len; 988 queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref; 989 990 (*copy_ops)++; 991 992 if (data_len < txreq.size) { 993 frag_set_pending_idx(&skb_shinfo(skb)->frags[0], 994 pending_idx); 995 xenvif_tx_create_map_op(queue, pending_idx, &txreq, 996 extra_count, gop); 997 gop++; 998 } else { 999 frag_set_pending_idx(&skb_shinfo(skb)->frags[0], 1000 INVALID_PENDING_IDX); 1001 memcpy(&queue->pending_tx_info[pending_idx].req, 1002 &txreq, sizeof(txreq)); 1003 queue->pending_tx_info[pending_idx].extra_count = 1004 extra_count; 1005 } 1006 1007 queue->pending_cons++; 1008 1009 gop = xenvif_get_requests(queue, skb, txfrags, gop, 1010 frag_overflow, nskb); 1011 1012 __skb_queue_tail(&queue->tx_queue, skb); 1013 1014 queue->tx.req_cons = idx; 1015 1016 if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) || 1017 (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops))) 1018 break; 1019 } 1020 1021 (*map_ops) = gop - queue->tx_map_ops; 1022 return; 1023 } 1024 1025 /* Consolidate skb with a frag_list into a brand new one with local pages on 1026 * frags. Returns 0 or -ENOMEM if can't allocate new pages. 1027 */ 1028 static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *skb) 1029 { 1030 unsigned int offset = skb_headlen(skb); 1031 skb_frag_t frags[MAX_SKB_FRAGS]; 1032 int i, f; 1033 struct ubuf_info *uarg; 1034 struct sk_buff *nskb = skb_shinfo(skb)->frag_list; 1035 1036 queue->stats.tx_zerocopy_sent += 2; 1037 queue->stats.tx_frag_overflow++; 1038 1039 xenvif_fill_frags(queue, nskb); 1040 /* Subtract frags size, we will correct it later */ 1041 skb->truesize -= skb->data_len; 1042 skb->len += nskb->len; 1043 skb->data_len += nskb->len; 1044 1045 /* create a brand new frags array and coalesce there */ 1046 for (i = 0; offset < skb->len; i++) { 1047 struct page *page; 1048 unsigned int len; 1049 1050 BUG_ON(i >= MAX_SKB_FRAGS); 1051 page = alloc_page(GFP_ATOMIC); 1052 if (!page) { 1053 int j; 1054 skb->truesize += skb->data_len; 1055 for (j = 0; j < i; j++) 1056 put_page(frags[j].page.p); 1057 return -ENOMEM; 1058 } 1059 1060 if (offset + PAGE_SIZE < skb->len) 1061 len = PAGE_SIZE; 1062 else 1063 len = skb->len - offset; 1064 if (skb_copy_bits(skb, offset, page_address(page), len)) 1065 BUG(); 1066 1067 offset += len; 1068 frags[i].page.p = page; 1069 frags[i].page_offset = 0; 1070 skb_frag_size_set(&frags[i], len); 1071 } 1072 1073 /* Copied all the bits from the frag list -- free it. */ 1074 skb_frag_list_init(skb); 1075 xenvif_skb_zerocopy_prepare(queue, nskb); 1076 kfree_skb(nskb); 1077 1078 /* Release all the original (foreign) frags. */ 1079 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) 1080 skb_frag_unref(skb, f); 1081 uarg = skb_shinfo(skb)->destructor_arg; 1082 /* increase inflight counter to offset decrement in callback */ 1083 atomic_inc(&queue->inflight_packets); 1084 uarg->callback(uarg, true); 1085 skb_shinfo(skb)->destructor_arg = NULL; 1086 1087 /* Fill the skb with the new (local) frags. */ 1088 memcpy(skb_shinfo(skb)->frags, frags, i * sizeof(skb_frag_t)); 1089 skb_shinfo(skb)->nr_frags = i; 1090 skb->truesize += i * PAGE_SIZE; 1091 1092 return 0; 1093 } 1094 1095 static int xenvif_tx_submit(struct xenvif_queue *queue) 1096 { 1097 struct gnttab_map_grant_ref *gop_map = queue->tx_map_ops; 1098 struct gnttab_copy *gop_copy = queue->tx_copy_ops; 1099 struct sk_buff *skb; 1100 int work_done = 0; 1101 1102 while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) { 1103 struct xen_netif_tx_request *txp; 1104 u16 pending_idx; 1105 unsigned data_len; 1106 1107 pending_idx = XENVIF_TX_CB(skb)->pending_idx; 1108 txp = &queue->pending_tx_info[pending_idx].req; 1109 1110 /* Check the remap error code. */ 1111 if (unlikely(xenvif_tx_check_gop(queue, skb, &gop_map, &gop_copy))) { 1112 /* If there was an error, xenvif_tx_check_gop is 1113 * expected to release all the frags which were mapped, 1114 * so kfree_skb shouldn't do it again 1115 */ 1116 skb_shinfo(skb)->nr_frags = 0; 1117 if (skb_has_frag_list(skb)) { 1118 struct sk_buff *nskb = 1119 skb_shinfo(skb)->frag_list; 1120 skb_shinfo(nskb)->nr_frags = 0; 1121 } 1122 kfree_skb(skb); 1123 continue; 1124 } 1125 1126 data_len = skb->len; 1127 callback_param(queue, pending_idx).ctx = NULL; 1128 if (data_len < txp->size) { 1129 /* Append the packet payload as a fragment. */ 1130 txp->offset += data_len; 1131 txp->size -= data_len; 1132 } else { 1133 /* Schedule a response immediately. */ 1134 xenvif_idx_release(queue, pending_idx, 1135 XEN_NETIF_RSP_OKAY); 1136 } 1137 1138 if (txp->flags & XEN_NETTXF_csum_blank) 1139 skb->ip_summed = CHECKSUM_PARTIAL; 1140 else if (txp->flags & XEN_NETTXF_data_validated) 1141 skb->ip_summed = CHECKSUM_UNNECESSARY; 1142 1143 xenvif_fill_frags(queue, skb); 1144 1145 if (unlikely(skb_has_frag_list(skb))) { 1146 if (xenvif_handle_frag_list(queue, skb)) { 1147 if (net_ratelimit()) 1148 netdev_err(queue->vif->dev, 1149 "Not enough memory to consolidate frag_list!\n"); 1150 xenvif_skb_zerocopy_prepare(queue, skb); 1151 kfree_skb(skb); 1152 continue; 1153 } 1154 } 1155 1156 skb->dev = queue->vif->dev; 1157 skb->protocol = eth_type_trans(skb, skb->dev); 1158 skb_reset_network_header(skb); 1159 1160 if (checksum_setup(queue, skb)) { 1161 netdev_dbg(queue->vif->dev, 1162 "Can't setup checksum in net_tx_action\n"); 1163 /* We have to set this flag to trigger the callback */ 1164 if (skb_shinfo(skb)->destructor_arg) 1165 xenvif_skb_zerocopy_prepare(queue, skb); 1166 kfree_skb(skb); 1167 continue; 1168 } 1169 1170 skb_probe_transport_header(skb, 0); 1171 1172 /* If the packet is GSO then we will have just set up the 1173 * transport header offset in checksum_setup so it's now 1174 * straightforward to calculate gso_segs. 1175 */ 1176 if (skb_is_gso(skb)) { 1177 int mss = skb_shinfo(skb)->gso_size; 1178 int hdrlen = skb_transport_header(skb) - 1179 skb_mac_header(skb) + 1180 tcp_hdrlen(skb); 1181 1182 skb_shinfo(skb)->gso_segs = 1183 DIV_ROUND_UP(skb->len - hdrlen, mss); 1184 } 1185 1186 queue->stats.rx_bytes += skb->len; 1187 queue->stats.rx_packets++; 1188 1189 work_done++; 1190 1191 /* Set this flag right before netif_receive_skb, otherwise 1192 * someone might think this packet already left netback, and 1193 * do a skb_copy_ubufs while we are still in control of the 1194 * skb. E.g. the __pskb_pull_tail earlier can do such thing. 1195 */ 1196 if (skb_shinfo(skb)->destructor_arg) { 1197 xenvif_skb_zerocopy_prepare(queue, skb); 1198 queue->stats.tx_zerocopy_sent++; 1199 } 1200 1201 netif_receive_skb(skb); 1202 } 1203 1204 return work_done; 1205 } 1206 1207 void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success) 1208 { 1209 unsigned long flags; 1210 pending_ring_idx_t index; 1211 struct xenvif_queue *queue = ubuf_to_queue(ubuf); 1212 1213 /* This is the only place where we grab this lock, to protect callbacks 1214 * from each other. 1215 */ 1216 spin_lock_irqsave(&queue->callback_lock, flags); 1217 do { 1218 u16 pending_idx = ubuf->desc; 1219 ubuf = (struct ubuf_info *) ubuf->ctx; 1220 BUG_ON(queue->dealloc_prod - queue->dealloc_cons >= 1221 MAX_PENDING_REQS); 1222 index = pending_index(queue->dealloc_prod); 1223 queue->dealloc_ring[index] = pending_idx; 1224 /* Sync with xenvif_tx_dealloc_action: 1225 * insert idx then incr producer. 1226 */ 1227 smp_wmb(); 1228 queue->dealloc_prod++; 1229 } while (ubuf); 1230 spin_unlock_irqrestore(&queue->callback_lock, flags); 1231 1232 if (likely(zerocopy_success)) 1233 queue->stats.tx_zerocopy_success++; 1234 else 1235 queue->stats.tx_zerocopy_fail++; 1236 xenvif_skb_zerocopy_complete(queue); 1237 } 1238 1239 static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue) 1240 { 1241 struct gnttab_unmap_grant_ref *gop; 1242 pending_ring_idx_t dc, dp; 1243 u16 pending_idx, pending_idx_release[MAX_PENDING_REQS]; 1244 unsigned int i = 0; 1245 1246 dc = queue->dealloc_cons; 1247 gop = queue->tx_unmap_ops; 1248 1249 /* Free up any grants we have finished using */ 1250 do { 1251 dp = queue->dealloc_prod; 1252 1253 /* Ensure we see all indices enqueued by all 1254 * xenvif_zerocopy_callback(). 1255 */ 1256 smp_rmb(); 1257 1258 while (dc != dp) { 1259 BUG_ON(gop - queue->tx_unmap_ops >= MAX_PENDING_REQS); 1260 pending_idx = 1261 queue->dealloc_ring[pending_index(dc++)]; 1262 1263 pending_idx_release[gop - queue->tx_unmap_ops] = 1264 pending_idx; 1265 queue->pages_to_unmap[gop - queue->tx_unmap_ops] = 1266 queue->mmap_pages[pending_idx]; 1267 gnttab_set_unmap_op(gop, 1268 idx_to_kaddr(queue, pending_idx), 1269 GNTMAP_host_map, 1270 queue->grant_tx_handle[pending_idx]); 1271 xenvif_grant_handle_reset(queue, pending_idx); 1272 ++gop; 1273 } 1274 1275 } while (dp != queue->dealloc_prod); 1276 1277 queue->dealloc_cons = dc; 1278 1279 if (gop - queue->tx_unmap_ops > 0) { 1280 int ret; 1281 ret = gnttab_unmap_refs(queue->tx_unmap_ops, 1282 NULL, 1283 queue->pages_to_unmap, 1284 gop - queue->tx_unmap_ops); 1285 if (ret) { 1286 netdev_err(queue->vif->dev, "Unmap fail: nr_ops %tu ret %d\n", 1287 gop - queue->tx_unmap_ops, ret); 1288 for (i = 0; i < gop - queue->tx_unmap_ops; ++i) { 1289 if (gop[i].status != GNTST_okay) 1290 netdev_err(queue->vif->dev, 1291 " host_addr: 0x%llx handle: 0x%x status: %d\n", 1292 gop[i].host_addr, 1293 gop[i].handle, 1294 gop[i].status); 1295 } 1296 BUG(); 1297 } 1298 } 1299 1300 for (i = 0; i < gop - queue->tx_unmap_ops; ++i) 1301 xenvif_idx_release(queue, pending_idx_release[i], 1302 XEN_NETIF_RSP_OKAY); 1303 } 1304 1305 1306 /* Called after netfront has transmitted */ 1307 int xenvif_tx_action(struct xenvif_queue *queue, int budget) 1308 { 1309 unsigned nr_mops, nr_cops = 0; 1310 int work_done, ret; 1311 1312 if (unlikely(!tx_work_todo(queue))) 1313 return 0; 1314 1315 xenvif_tx_build_gops(queue, budget, &nr_cops, &nr_mops); 1316 1317 if (nr_cops == 0) 1318 return 0; 1319 1320 gnttab_batch_copy(queue->tx_copy_ops, nr_cops); 1321 if (nr_mops != 0) { 1322 ret = gnttab_map_refs(queue->tx_map_ops, 1323 NULL, 1324 queue->pages_to_map, 1325 nr_mops); 1326 BUG_ON(ret); 1327 } 1328 1329 work_done = xenvif_tx_submit(queue); 1330 1331 return work_done; 1332 } 1333 1334 static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, 1335 u8 status) 1336 { 1337 struct pending_tx_info *pending_tx_info; 1338 pending_ring_idx_t index; 1339 unsigned long flags; 1340 1341 pending_tx_info = &queue->pending_tx_info[pending_idx]; 1342 1343 spin_lock_irqsave(&queue->response_lock, flags); 1344 1345 make_tx_response(queue, &pending_tx_info->req, 1346 pending_tx_info->extra_count, status); 1347 1348 /* Release the pending index before pusing the Tx response so 1349 * its available before a new Tx request is pushed by the 1350 * frontend. 1351 */ 1352 index = pending_index(queue->pending_prod++); 1353 queue->pending_ring[index] = pending_idx; 1354 1355 push_tx_responses(queue); 1356 1357 spin_unlock_irqrestore(&queue->response_lock, flags); 1358 } 1359 1360 1361 static void make_tx_response(struct xenvif_queue *queue, 1362 struct xen_netif_tx_request *txp, 1363 unsigned int extra_count, 1364 s8 st) 1365 { 1366 RING_IDX i = queue->tx.rsp_prod_pvt; 1367 struct xen_netif_tx_response *resp; 1368 1369 resp = RING_GET_RESPONSE(&queue->tx, i); 1370 resp->id = txp->id; 1371 resp->status = st; 1372 1373 while (extra_count-- != 0) 1374 RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL; 1375 1376 queue->tx.rsp_prod_pvt = ++i; 1377 } 1378 1379 static void push_tx_responses(struct xenvif_queue *queue) 1380 { 1381 int notify; 1382 1383 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify); 1384 if (notify) 1385 notify_remote_via_irq(queue->tx_irq); 1386 } 1387 1388 void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) 1389 { 1390 int ret; 1391 struct gnttab_unmap_grant_ref tx_unmap_op; 1392 1393 gnttab_set_unmap_op(&tx_unmap_op, 1394 idx_to_kaddr(queue, pending_idx), 1395 GNTMAP_host_map, 1396 queue->grant_tx_handle[pending_idx]); 1397 xenvif_grant_handle_reset(queue, pending_idx); 1398 1399 ret = gnttab_unmap_refs(&tx_unmap_op, NULL, 1400 &queue->mmap_pages[pending_idx], 1); 1401 if (ret) { 1402 netdev_err(queue->vif->dev, 1403 "Unmap fail: ret: %d pending_idx: %d host_addr: %llx handle: 0x%x status: %d\n", 1404 ret, 1405 pending_idx, 1406 tx_unmap_op.host_addr, 1407 tx_unmap_op.handle, 1408 tx_unmap_op.status); 1409 BUG(); 1410 } 1411 } 1412 1413 static inline int tx_work_todo(struct xenvif_queue *queue) 1414 { 1415 if (likely(RING_HAS_UNCONSUMED_REQUESTS(&queue->tx))) 1416 return 1; 1417 1418 return 0; 1419 } 1420 1421 static inline bool tx_dealloc_work_todo(struct xenvif_queue *queue) 1422 { 1423 return queue->dealloc_cons != queue->dealloc_prod; 1424 } 1425 1426 void xenvif_unmap_frontend_data_rings(struct xenvif_queue *queue) 1427 { 1428 if (queue->tx.sring) 1429 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif), 1430 queue->tx.sring); 1431 if (queue->rx.sring) 1432 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif), 1433 queue->rx.sring); 1434 } 1435 1436 int xenvif_map_frontend_data_rings(struct xenvif_queue *queue, 1437 grant_ref_t tx_ring_ref, 1438 grant_ref_t rx_ring_ref) 1439 { 1440 void *addr; 1441 struct xen_netif_tx_sring *txs; 1442 struct xen_netif_rx_sring *rxs; 1443 1444 int err = -ENOMEM; 1445 1446 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif), 1447 &tx_ring_ref, 1, &addr); 1448 if (err) 1449 goto err; 1450 1451 txs = (struct xen_netif_tx_sring *)addr; 1452 BACK_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE); 1453 1454 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif), 1455 &rx_ring_ref, 1, &addr); 1456 if (err) 1457 goto err; 1458 1459 rxs = (struct xen_netif_rx_sring *)addr; 1460 BACK_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE); 1461 1462 return 0; 1463 1464 err: 1465 xenvif_unmap_frontend_data_rings(queue); 1466 return err; 1467 } 1468 1469 static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue) 1470 { 1471 /* Dealloc thread must remain running until all inflight 1472 * packets complete. 1473 */ 1474 return kthread_should_stop() && 1475 !atomic_read(&queue->inflight_packets); 1476 } 1477 1478 int xenvif_dealloc_kthread(void *data) 1479 { 1480 struct xenvif_queue *queue = data; 1481 1482 for (;;) { 1483 wait_event_interruptible(queue->dealloc_wq, 1484 tx_dealloc_work_todo(queue) || 1485 xenvif_dealloc_kthread_should_stop(queue)); 1486 if (xenvif_dealloc_kthread_should_stop(queue)) 1487 break; 1488 1489 xenvif_tx_dealloc_action(queue); 1490 cond_resched(); 1491 } 1492 1493 /* Unmap anything remaining*/ 1494 if (tx_dealloc_work_todo(queue)) 1495 xenvif_tx_dealloc_action(queue); 1496 1497 return 0; 1498 } 1499 1500 static void make_ctrl_response(struct xenvif *vif, 1501 const struct xen_netif_ctrl_request *req, 1502 u32 status, u32 data) 1503 { 1504 RING_IDX idx = vif->ctrl.rsp_prod_pvt; 1505 struct xen_netif_ctrl_response rsp = { 1506 .id = req->id, 1507 .type = req->type, 1508 .status = status, 1509 .data = data, 1510 }; 1511 1512 *RING_GET_RESPONSE(&vif->ctrl, idx) = rsp; 1513 vif->ctrl.rsp_prod_pvt = ++idx; 1514 } 1515 1516 static void push_ctrl_response(struct xenvif *vif) 1517 { 1518 int notify; 1519 1520 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->ctrl, notify); 1521 if (notify) 1522 notify_remote_via_irq(vif->ctrl_irq); 1523 } 1524 1525 static void process_ctrl_request(struct xenvif *vif, 1526 const struct xen_netif_ctrl_request *req) 1527 { 1528 u32 status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED; 1529 u32 data = 0; 1530 1531 switch (req->type) { 1532 case XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM: 1533 status = xenvif_set_hash_alg(vif, req->data[0]); 1534 break; 1535 1536 case XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS: 1537 status = xenvif_get_hash_flags(vif, &data); 1538 break; 1539 1540 case XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS: 1541 status = xenvif_set_hash_flags(vif, req->data[0]); 1542 break; 1543 1544 case XEN_NETIF_CTRL_TYPE_SET_HASH_KEY: 1545 status = xenvif_set_hash_key(vif, req->data[0], 1546 req->data[1]); 1547 break; 1548 1549 case XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE: 1550 status = XEN_NETIF_CTRL_STATUS_SUCCESS; 1551 data = XEN_NETBK_MAX_HASH_MAPPING_SIZE; 1552 break; 1553 1554 case XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE: 1555 status = xenvif_set_hash_mapping_size(vif, 1556 req->data[0]); 1557 break; 1558 1559 case XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING: 1560 status = xenvif_set_hash_mapping(vif, req->data[0], 1561 req->data[1], 1562 req->data[2]); 1563 break; 1564 1565 default: 1566 break; 1567 } 1568 1569 make_ctrl_response(vif, req, status, data); 1570 push_ctrl_response(vif); 1571 } 1572 1573 static void xenvif_ctrl_action(struct xenvif *vif) 1574 { 1575 for (;;) { 1576 RING_IDX req_prod, req_cons; 1577 1578 req_prod = vif->ctrl.sring->req_prod; 1579 req_cons = vif->ctrl.req_cons; 1580 1581 /* Make sure we can see requests before we process them. */ 1582 rmb(); 1583 1584 if (req_cons == req_prod) 1585 break; 1586 1587 while (req_cons != req_prod) { 1588 struct xen_netif_ctrl_request req; 1589 1590 RING_COPY_REQUEST(&vif->ctrl, req_cons, &req); 1591 req_cons++; 1592 1593 process_ctrl_request(vif, &req); 1594 } 1595 1596 vif->ctrl.req_cons = req_cons; 1597 vif->ctrl.sring->req_event = req_cons + 1; 1598 } 1599 } 1600 1601 static bool xenvif_ctrl_work_todo(struct xenvif *vif) 1602 { 1603 if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->ctrl))) 1604 return 1; 1605 1606 return 0; 1607 } 1608 1609 irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data) 1610 { 1611 struct xenvif *vif = data; 1612 1613 while (xenvif_ctrl_work_todo(vif)) 1614 xenvif_ctrl_action(vif); 1615 1616 return IRQ_HANDLED; 1617 } 1618 1619 static int __init netback_init(void) 1620 { 1621 int rc = 0; 1622 1623 if (!xen_domain()) 1624 return -ENODEV; 1625 1626 /* Allow as many queues as there are CPUs but max. 8 if user has not 1627 * specified a value. 1628 */ 1629 if (xenvif_max_queues == 0) 1630 xenvif_max_queues = min_t(unsigned int, MAX_QUEUES_DEFAULT, 1631 num_online_cpus()); 1632 1633 if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) { 1634 pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n", 1635 fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX); 1636 fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX; 1637 } 1638 1639 rc = xenvif_xenbus_init(); 1640 if (rc) 1641 goto failed_init; 1642 1643 #ifdef CONFIG_DEBUG_FS 1644 xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL); 1645 if (IS_ERR_OR_NULL(xen_netback_dbg_root)) 1646 pr_warn("Init of debugfs returned %ld!\n", 1647 PTR_ERR(xen_netback_dbg_root)); 1648 #endif /* CONFIG_DEBUG_FS */ 1649 1650 return 0; 1651 1652 failed_init: 1653 return rc; 1654 } 1655 1656 module_init(netback_init); 1657 1658 static void __exit netback_fini(void) 1659 { 1660 #ifdef CONFIG_DEBUG_FS 1661 if (!IS_ERR_OR_NULL(xen_netback_dbg_root)) 1662 debugfs_remove_recursive(xen_netback_dbg_root); 1663 #endif /* CONFIG_DEBUG_FS */ 1664 xenvif_xenbus_fini(); 1665 } 1666 module_exit(netback_fini); 1667 1668 MODULE_LICENSE("Dual BSD/GPL"); 1669 MODULE_ALIAS("xen-backend:vif"); 1670