1 /* 2 * Back-end of the driver for virtual network devices. This portion of the 3 * driver exports a 'unified' network-device interface that can be accessed 4 * by any operating system that implements a compatible front end. A 5 * reference front-end implementation can be found in: 6 * drivers/net/xen-netfront.c 7 * 8 * Copyright (c) 2002-2005, K A Fraser 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License version 2 12 * as published by the Free Software Foundation; or, when distributed 13 * separately from the Linux kernel or incorporated into other 14 * software packages, subject to the following license: 15 * 16 * Permission is hereby granted, free of charge, to any person obtaining a copy 17 * of this source file (the "Software"), to deal in the Software without 18 * restriction, including without limitation the rights to use, copy, modify, 19 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 20 * and to permit persons to whom the Software is furnished to do so, subject to 21 * the following conditions: 22 * 23 * The above copyright notice and this permission notice shall be included in 24 * all copies or substantial portions of the Software. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 32 * IN THE SOFTWARE. 33 */ 34 35 #include "common.h" 36 37 #include <linux/kthread.h> 38 #include <linux/if_vlan.h> 39 #include <linux/udp.h> 40 41 #include <net/tcp.h> 42 #include <net/ip6_checksum.h> 43 44 #include <xen/xen.h> 45 #include <xen/events.h> 46 #include <xen/interface/memory.h> 47 48 #include <asm/xen/hypercall.h> 49 #include <asm/xen/page.h> 50 51 /* Provide an option to disable split event channels at load time as 52 * event channels are limited resource. Split event channels are 53 * enabled by default. 54 */ 55 bool separate_tx_rx_irq = 1; 56 module_param(separate_tx_rx_irq, bool, 0644); 57 58 /* 59 * This is the maximum slots a skb can have. If a guest sends a skb 60 * which exceeds this limit it is considered malicious. 61 */ 62 #define FATAL_SKB_SLOTS_DEFAULT 20 63 static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT; 64 module_param(fatal_skb_slots, uint, 0444); 65 66 /* 67 * To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating 68 * the maximum slots a valid packet can use. Now this value is defined 69 * to be XEN_NETIF_NR_SLOTS_MIN, which is supposed to be supported by 70 * all backend. 71 */ 72 #define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN 73 74 /* 75 * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of 76 * one or more merged tx requests, otherwise it is the continuation of 77 * previous tx request. 78 */ 79 static inline int pending_tx_is_head(struct xenvif *vif, RING_IDX idx) 80 { 81 return vif->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX; 82 } 83 84 static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx, 85 u8 status); 86 87 static void make_tx_response(struct xenvif *vif, 88 struct xen_netif_tx_request *txp, 89 s8 st); 90 91 static inline int tx_work_todo(struct xenvif *vif); 92 static inline int rx_work_todo(struct xenvif *vif); 93 94 static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif, 95 u16 id, 96 s8 st, 97 u16 offset, 98 u16 size, 99 u16 flags); 100 101 static inline unsigned long idx_to_pfn(struct xenvif *vif, 102 u16 idx) 103 { 104 return page_to_pfn(vif->mmap_pages[idx]); 105 } 106 107 static inline unsigned long idx_to_kaddr(struct xenvif *vif, 108 u16 idx) 109 { 110 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx)); 111 } 112 113 /* This is a miniumum size for the linear area to avoid lots of 114 * calls to __pskb_pull_tail() as we set up checksum offsets. The 115 * value 128 was chosen as it covers all IPv4 and most likely 116 * IPv6 headers. 117 */ 118 #define PKT_PROT_LEN 128 119 120 static u16 frag_get_pending_idx(skb_frag_t *frag) 121 { 122 return (u16)frag->page_offset; 123 } 124 125 static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx) 126 { 127 frag->page_offset = pending_idx; 128 } 129 130 static inline pending_ring_idx_t pending_index(unsigned i) 131 { 132 return i & (MAX_PENDING_REQS-1); 133 } 134 135 static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif) 136 { 137 return MAX_PENDING_REQS - 138 vif->pending_prod + vif->pending_cons; 139 } 140 141 static int max_required_rx_slots(struct xenvif *vif) 142 { 143 int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE); 144 145 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ 146 if (vif->can_sg || vif->gso_mask || vif->gso_prefix_mask) 147 max += MAX_SKB_FRAGS + 1; /* extra_info + frags */ 148 149 return max; 150 } 151 152 int xenvif_rx_ring_full(struct xenvif *vif) 153 { 154 RING_IDX peek = vif->rx_req_cons_peek; 155 RING_IDX needed = max_required_rx_slots(vif); 156 157 return ((vif->rx.sring->req_prod - peek) < needed) || 158 ((vif->rx.rsp_prod_pvt + XEN_NETIF_RX_RING_SIZE - peek) < needed); 159 } 160 161 int xenvif_must_stop_queue(struct xenvif *vif) 162 { 163 if (!xenvif_rx_ring_full(vif)) 164 return 0; 165 166 vif->rx.sring->req_event = vif->rx_req_cons_peek + 167 max_required_rx_slots(vif); 168 mb(); /* request notification /then/ check the queue */ 169 170 return xenvif_rx_ring_full(vif); 171 } 172 173 /* 174 * Returns true if we should start a new receive buffer instead of 175 * adding 'size' bytes to a buffer which currently contains 'offset' 176 * bytes. 177 */ 178 static bool start_new_rx_buffer(int offset, unsigned long size, int head) 179 { 180 /* simple case: we have completely filled the current buffer. */ 181 if (offset == MAX_BUFFER_OFFSET) 182 return true; 183 184 /* 185 * complex case: start a fresh buffer if the current frag 186 * would overflow the current buffer but only if: 187 * (i) this frag would fit completely in the next buffer 188 * and (ii) there is already some data in the current buffer 189 * and (iii) this is not the head buffer. 190 * 191 * Where: 192 * - (i) stops us splitting a frag into two copies 193 * unless the frag is too large for a single buffer. 194 * - (ii) stops us from leaving a buffer pointlessly empty. 195 * - (iii) stops us leaving the first buffer 196 * empty. Strictly speaking this is already covered 197 * by (ii) but is explicitly checked because 198 * netfront relies on the first buffer being 199 * non-empty and can crash otherwise. 200 * 201 * This means we will effectively linearise small 202 * frags but do not needlessly split large buffers 203 * into multiple copies tend to give large frags their 204 * own buffers as before. 205 */ 206 if ((offset + size > MAX_BUFFER_OFFSET) && 207 (size <= MAX_BUFFER_OFFSET) && offset && !head) 208 return true; 209 210 return false; 211 } 212 213 struct xenvif_count_slot_state { 214 unsigned long copy_off; 215 bool head; 216 }; 217 218 unsigned int xenvif_count_frag_slots(struct xenvif *vif, 219 unsigned long offset, unsigned long size, 220 struct xenvif_count_slot_state *state) 221 { 222 unsigned count = 0; 223 224 offset &= ~PAGE_MASK; 225 226 while (size > 0) { 227 unsigned long bytes; 228 229 bytes = PAGE_SIZE - offset; 230 231 if (bytes > size) 232 bytes = size; 233 234 if (start_new_rx_buffer(state->copy_off, bytes, state->head)) { 235 count++; 236 state->copy_off = 0; 237 } 238 239 if (state->copy_off + bytes > MAX_BUFFER_OFFSET) 240 bytes = MAX_BUFFER_OFFSET - state->copy_off; 241 242 state->copy_off += bytes; 243 244 offset += bytes; 245 size -= bytes; 246 247 if (offset == PAGE_SIZE) 248 offset = 0; 249 250 state->head = false; 251 } 252 253 return count; 254 } 255 256 /* 257 * Figure out how many ring slots we're going to need to send @skb to 258 * the guest. This function is essentially a dry run of 259 * xenvif_gop_frag_copy. 260 */ 261 unsigned int xenvif_count_skb_slots(struct xenvif *vif, struct sk_buff *skb) 262 { 263 struct xenvif_count_slot_state state; 264 unsigned int count; 265 unsigned char *data; 266 unsigned i; 267 268 state.head = true; 269 state.copy_off = 0; 270 271 /* Slot for the first (partial) page of data. */ 272 count = 1; 273 274 /* Need a slot for the GSO prefix for GSO extra data? */ 275 if (skb_shinfo(skb)->gso_size) 276 count++; 277 278 data = skb->data; 279 while (data < skb_tail_pointer(skb)) { 280 unsigned long offset = offset_in_page(data); 281 unsigned long size = PAGE_SIZE - offset; 282 283 if (data + size > skb_tail_pointer(skb)) 284 size = skb_tail_pointer(skb) - data; 285 286 count += xenvif_count_frag_slots(vif, offset, size, &state); 287 288 data += size; 289 } 290 291 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 292 unsigned long size = skb_frag_size(&skb_shinfo(skb)->frags[i]); 293 unsigned long offset = skb_shinfo(skb)->frags[i].page_offset; 294 295 count += xenvif_count_frag_slots(vif, offset, size, &state); 296 } 297 return count; 298 } 299 300 struct netrx_pending_operations { 301 unsigned copy_prod, copy_cons; 302 unsigned meta_prod, meta_cons; 303 struct gnttab_copy *copy; 304 struct xenvif_rx_meta *meta; 305 int copy_off; 306 grant_ref_t copy_gref; 307 }; 308 309 static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif, 310 struct netrx_pending_operations *npo) 311 { 312 struct xenvif_rx_meta *meta; 313 struct xen_netif_rx_request *req; 314 315 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 316 317 meta = npo->meta + npo->meta_prod++; 318 meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; 319 meta->gso_size = 0; 320 meta->size = 0; 321 meta->id = req->id; 322 323 npo->copy_off = 0; 324 npo->copy_gref = req->gref; 325 326 return meta; 327 } 328 329 /* 330 * Set up the grant operations for this fragment. If it's a flipping 331 * interface, we also set up the unmap request from here. 332 */ 333 static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, 334 struct netrx_pending_operations *npo, 335 struct page *page, unsigned long size, 336 unsigned long offset, int *head) 337 { 338 struct gnttab_copy *copy_gop; 339 struct xenvif_rx_meta *meta; 340 unsigned long bytes; 341 int gso_type; 342 343 /* Data must not cross a page boundary. */ 344 BUG_ON(size + offset > PAGE_SIZE<<compound_order(page)); 345 346 meta = npo->meta + npo->meta_prod - 1; 347 348 /* Skip unused frames from start of page */ 349 page += offset >> PAGE_SHIFT; 350 offset &= ~PAGE_MASK; 351 352 while (size > 0) { 353 BUG_ON(offset >= PAGE_SIZE); 354 BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET); 355 356 bytes = PAGE_SIZE - offset; 357 358 if (bytes > size) 359 bytes = size; 360 361 if (start_new_rx_buffer(npo->copy_off, bytes, *head)) { 362 /* 363 * Netfront requires there to be some data in the head 364 * buffer. 365 */ 366 BUG_ON(*head); 367 368 meta = get_next_rx_buffer(vif, npo); 369 } 370 371 if (npo->copy_off + bytes > MAX_BUFFER_OFFSET) 372 bytes = MAX_BUFFER_OFFSET - npo->copy_off; 373 374 copy_gop = npo->copy + npo->copy_prod++; 375 copy_gop->flags = GNTCOPY_dest_gref; 376 copy_gop->len = bytes; 377 378 copy_gop->source.domid = DOMID_SELF; 379 copy_gop->source.u.gmfn = virt_to_mfn(page_address(page)); 380 copy_gop->source.offset = offset; 381 382 copy_gop->dest.domid = vif->domid; 383 copy_gop->dest.offset = npo->copy_off; 384 copy_gop->dest.u.ref = npo->copy_gref; 385 386 npo->copy_off += bytes; 387 meta->size += bytes; 388 389 offset += bytes; 390 size -= bytes; 391 392 /* Next frame */ 393 if (offset == PAGE_SIZE && size) { 394 BUG_ON(!PageCompound(page)); 395 page++; 396 offset = 0; 397 } 398 399 /* Leave a gap for the GSO descriptor. */ 400 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) 401 gso_type = XEN_NETIF_GSO_TYPE_TCPV4; 402 else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) 403 gso_type = XEN_NETIF_GSO_TYPE_TCPV6; 404 else 405 gso_type = XEN_NETIF_GSO_TYPE_NONE; 406 407 if (*head && ((1 << gso_type) & vif->gso_mask)) 408 vif->rx.req_cons++; 409 410 *head = 0; /* There must be something in this buffer now. */ 411 412 } 413 } 414 415 /* 416 * Prepare an SKB to be transmitted to the frontend. 417 * 418 * This function is responsible for allocating grant operations, meta 419 * structures, etc. 420 * 421 * It returns the number of meta structures consumed. The number of 422 * ring slots used is always equal to the number of meta slots used 423 * plus the number of GSO descriptors used. Currently, we use either 424 * zero GSO descriptors (for non-GSO packets) or one descriptor (for 425 * frontend-side LRO). 426 */ 427 static int xenvif_gop_skb(struct sk_buff *skb, 428 struct netrx_pending_operations *npo) 429 { 430 struct xenvif *vif = netdev_priv(skb->dev); 431 int nr_frags = skb_shinfo(skb)->nr_frags; 432 int i; 433 struct xen_netif_rx_request *req; 434 struct xenvif_rx_meta *meta; 435 unsigned char *data; 436 int head = 1; 437 int old_meta_prod; 438 int gso_type; 439 int gso_size; 440 441 old_meta_prod = npo->meta_prod; 442 443 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { 444 gso_type = XEN_NETIF_GSO_TYPE_TCPV4; 445 gso_size = skb_shinfo(skb)->gso_size; 446 } else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { 447 gso_type = XEN_NETIF_GSO_TYPE_TCPV6; 448 gso_size = skb_shinfo(skb)->gso_size; 449 } else { 450 gso_type = XEN_NETIF_GSO_TYPE_NONE; 451 gso_size = 0; 452 } 453 454 /* Set up a GSO prefix descriptor, if necessary */ 455 if ((1 << gso_type) & vif->gso_prefix_mask) { 456 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 457 meta = npo->meta + npo->meta_prod++; 458 meta->gso_type = gso_type; 459 meta->gso_size = gso_size; 460 meta->size = 0; 461 meta->id = req->id; 462 } 463 464 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 465 meta = npo->meta + npo->meta_prod++; 466 467 if ((1 << gso_type) & vif->gso_mask) { 468 meta->gso_type = gso_type; 469 meta->gso_size = gso_size; 470 } else { 471 meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; 472 meta->gso_size = 0; 473 } 474 475 meta->size = 0; 476 meta->id = req->id; 477 npo->copy_off = 0; 478 npo->copy_gref = req->gref; 479 480 data = skb->data; 481 while (data < skb_tail_pointer(skb)) { 482 unsigned int offset = offset_in_page(data); 483 unsigned int len = PAGE_SIZE - offset; 484 485 if (data + len > skb_tail_pointer(skb)) 486 len = skb_tail_pointer(skb) - data; 487 488 xenvif_gop_frag_copy(vif, skb, npo, 489 virt_to_page(data), len, offset, &head); 490 data += len; 491 } 492 493 for (i = 0; i < nr_frags; i++) { 494 xenvif_gop_frag_copy(vif, skb, npo, 495 skb_frag_page(&skb_shinfo(skb)->frags[i]), 496 skb_frag_size(&skb_shinfo(skb)->frags[i]), 497 skb_shinfo(skb)->frags[i].page_offset, 498 &head); 499 } 500 501 return npo->meta_prod - old_meta_prod; 502 } 503 504 /* 505 * This is a twin to xenvif_gop_skb. Assume that xenvif_gop_skb was 506 * used to set up the operations on the top of 507 * netrx_pending_operations, which have since been done. Check that 508 * they didn't give any errors and advance over them. 509 */ 510 static int xenvif_check_gop(struct xenvif *vif, int nr_meta_slots, 511 struct netrx_pending_operations *npo) 512 { 513 struct gnttab_copy *copy_op; 514 int status = XEN_NETIF_RSP_OKAY; 515 int i; 516 517 for (i = 0; i < nr_meta_slots; i++) { 518 copy_op = npo->copy + npo->copy_cons++; 519 if (copy_op->status != GNTST_okay) { 520 netdev_dbg(vif->dev, 521 "Bad status %d from copy to DOM%d.\n", 522 copy_op->status, vif->domid); 523 status = XEN_NETIF_RSP_ERROR; 524 } 525 } 526 527 return status; 528 } 529 530 static void xenvif_add_frag_responses(struct xenvif *vif, int status, 531 struct xenvif_rx_meta *meta, 532 int nr_meta_slots) 533 { 534 int i; 535 unsigned long offset; 536 537 /* No fragments used */ 538 if (nr_meta_slots <= 1) 539 return; 540 541 nr_meta_slots--; 542 543 for (i = 0; i < nr_meta_slots; i++) { 544 int flags; 545 if (i == nr_meta_slots - 1) 546 flags = 0; 547 else 548 flags = XEN_NETRXF_more_data; 549 550 offset = 0; 551 make_rx_response(vif, meta[i].id, status, offset, 552 meta[i].size, flags); 553 } 554 } 555 556 struct skb_cb_overlay { 557 int meta_slots_used; 558 }; 559 560 static void xenvif_kick_thread(struct xenvif *vif) 561 { 562 wake_up(&vif->wq); 563 } 564 565 void xenvif_rx_action(struct xenvif *vif) 566 { 567 s8 status; 568 u16 flags; 569 struct xen_netif_rx_response *resp; 570 struct sk_buff_head rxq; 571 struct sk_buff *skb; 572 LIST_HEAD(notify); 573 int ret; 574 int nr_frags; 575 int count; 576 unsigned long offset; 577 struct skb_cb_overlay *sco; 578 int need_to_notify = 0; 579 580 struct netrx_pending_operations npo = { 581 .copy = vif->grant_copy_op, 582 .meta = vif->meta, 583 }; 584 585 skb_queue_head_init(&rxq); 586 587 count = 0; 588 589 while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) { 590 vif = netdev_priv(skb->dev); 591 nr_frags = skb_shinfo(skb)->nr_frags; 592 593 sco = (struct skb_cb_overlay *)skb->cb; 594 sco->meta_slots_used = xenvif_gop_skb(skb, &npo); 595 596 count += nr_frags + 1; 597 598 __skb_queue_tail(&rxq, skb); 599 600 /* Filled the batch queue? */ 601 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ 602 if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE) 603 break; 604 } 605 606 BUG_ON(npo.meta_prod > ARRAY_SIZE(vif->meta)); 607 608 if (!npo.copy_prod) 609 return; 610 611 BUG_ON(npo.copy_prod > ARRAY_SIZE(vif->grant_copy_op)); 612 gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod); 613 614 while ((skb = __skb_dequeue(&rxq)) != NULL) { 615 sco = (struct skb_cb_overlay *)skb->cb; 616 617 vif = netdev_priv(skb->dev); 618 619 if ((1 << vif->meta[npo.meta_cons].gso_type) & 620 vif->gso_prefix_mask) { 621 resp = RING_GET_RESPONSE(&vif->rx, 622 vif->rx.rsp_prod_pvt++); 623 624 resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data; 625 626 resp->offset = vif->meta[npo.meta_cons].gso_size; 627 resp->id = vif->meta[npo.meta_cons].id; 628 resp->status = sco->meta_slots_used; 629 630 npo.meta_cons++; 631 sco->meta_slots_used--; 632 } 633 634 635 vif->dev->stats.tx_bytes += skb->len; 636 vif->dev->stats.tx_packets++; 637 638 status = xenvif_check_gop(vif, sco->meta_slots_used, &npo); 639 640 if (sco->meta_slots_used == 1) 641 flags = 0; 642 else 643 flags = XEN_NETRXF_more_data; 644 645 if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ 646 flags |= XEN_NETRXF_csum_blank | XEN_NETRXF_data_validated; 647 else if (skb->ip_summed == CHECKSUM_UNNECESSARY) 648 /* remote but checksummed. */ 649 flags |= XEN_NETRXF_data_validated; 650 651 offset = 0; 652 resp = make_rx_response(vif, vif->meta[npo.meta_cons].id, 653 status, offset, 654 vif->meta[npo.meta_cons].size, 655 flags); 656 657 if ((1 << vif->meta[npo.meta_cons].gso_type) & 658 vif->gso_mask) { 659 struct xen_netif_extra_info *gso = 660 (struct xen_netif_extra_info *) 661 RING_GET_RESPONSE(&vif->rx, 662 vif->rx.rsp_prod_pvt++); 663 664 resp->flags |= XEN_NETRXF_extra_info; 665 666 gso->u.gso.type = vif->meta[npo.meta_cons].gso_type; 667 gso->u.gso.size = vif->meta[npo.meta_cons].gso_size; 668 gso->u.gso.pad = 0; 669 gso->u.gso.features = 0; 670 671 gso->type = XEN_NETIF_EXTRA_TYPE_GSO; 672 gso->flags = 0; 673 } 674 675 xenvif_add_frag_responses(vif, status, 676 vif->meta + npo.meta_cons + 1, 677 sco->meta_slots_used); 678 679 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret); 680 681 if (ret) 682 need_to_notify = 1; 683 684 xenvif_notify_tx_completion(vif); 685 686 npo.meta_cons += sco->meta_slots_used; 687 dev_kfree_skb(skb); 688 } 689 690 if (need_to_notify) 691 notify_remote_via_irq(vif->rx_irq); 692 693 /* More work to do? */ 694 if (!skb_queue_empty(&vif->rx_queue)) 695 xenvif_kick_thread(vif); 696 } 697 698 void xenvif_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb) 699 { 700 skb_queue_tail(&vif->rx_queue, skb); 701 702 xenvif_kick_thread(vif); 703 } 704 705 void xenvif_check_rx_xenvif(struct xenvif *vif) 706 { 707 int more_to_do; 708 709 RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do); 710 711 if (more_to_do) 712 napi_schedule(&vif->napi); 713 } 714 715 static void tx_add_credit(struct xenvif *vif) 716 { 717 unsigned long max_burst, max_credit; 718 719 /* 720 * Allow a burst big enough to transmit a jumbo packet of up to 128kB. 721 * Otherwise the interface can seize up due to insufficient credit. 722 */ 723 max_burst = RING_GET_REQUEST(&vif->tx, vif->tx.req_cons)->size; 724 max_burst = min(max_burst, 131072UL); 725 max_burst = max(max_burst, vif->credit_bytes); 726 727 /* Take care that adding a new chunk of credit doesn't wrap to zero. */ 728 max_credit = vif->remaining_credit + vif->credit_bytes; 729 if (max_credit < vif->remaining_credit) 730 max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */ 731 732 vif->remaining_credit = min(max_credit, max_burst); 733 } 734 735 static void tx_credit_callback(unsigned long data) 736 { 737 struct xenvif *vif = (struct xenvif *)data; 738 tx_add_credit(vif); 739 xenvif_check_rx_xenvif(vif); 740 } 741 742 static void xenvif_tx_err(struct xenvif *vif, 743 struct xen_netif_tx_request *txp, RING_IDX end) 744 { 745 RING_IDX cons = vif->tx.req_cons; 746 747 do { 748 make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR); 749 if (cons == end) 750 break; 751 txp = RING_GET_REQUEST(&vif->tx, cons++); 752 } while (1); 753 vif->tx.req_cons = cons; 754 } 755 756 static void xenvif_fatal_tx_err(struct xenvif *vif) 757 { 758 netdev_err(vif->dev, "fatal error; disabling device\n"); 759 xenvif_carrier_off(vif); 760 } 761 762 static int xenvif_count_requests(struct xenvif *vif, 763 struct xen_netif_tx_request *first, 764 struct xen_netif_tx_request *txp, 765 int work_to_do) 766 { 767 RING_IDX cons = vif->tx.req_cons; 768 int slots = 0; 769 int drop_err = 0; 770 int more_data; 771 772 if (!(first->flags & XEN_NETTXF_more_data)) 773 return 0; 774 775 do { 776 struct xen_netif_tx_request dropped_tx = { 0 }; 777 778 if (slots >= work_to_do) { 779 netdev_err(vif->dev, 780 "Asked for %d slots but exceeds this limit\n", 781 work_to_do); 782 xenvif_fatal_tx_err(vif); 783 return -ENODATA; 784 } 785 786 /* This guest is really using too many slots and 787 * considered malicious. 788 */ 789 if (unlikely(slots >= fatal_skb_slots)) { 790 netdev_err(vif->dev, 791 "Malicious frontend using %d slots, threshold %u\n", 792 slots, fatal_skb_slots); 793 xenvif_fatal_tx_err(vif); 794 return -E2BIG; 795 } 796 797 /* Xen network protocol had implicit dependency on 798 * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to 799 * the historical MAX_SKB_FRAGS value 18 to honor the 800 * same behavior as before. Any packet using more than 801 * 18 slots but less than fatal_skb_slots slots is 802 * dropped 803 */ 804 if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) { 805 if (net_ratelimit()) 806 netdev_dbg(vif->dev, 807 "Too many slots (%d) exceeding limit (%d), dropping packet\n", 808 slots, XEN_NETBK_LEGACY_SLOTS_MAX); 809 drop_err = -E2BIG; 810 } 811 812 if (drop_err) 813 txp = &dropped_tx; 814 815 memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots), 816 sizeof(*txp)); 817 818 /* If the guest submitted a frame >= 64 KiB then 819 * first->size overflowed and following slots will 820 * appear to be larger than the frame. 821 * 822 * This cannot be fatal error as there are buggy 823 * frontends that do this. 824 * 825 * Consume all slots and drop the packet. 826 */ 827 if (!drop_err && txp->size > first->size) { 828 if (net_ratelimit()) 829 netdev_dbg(vif->dev, 830 "Invalid tx request, slot size %u > remaining size %u\n", 831 txp->size, first->size); 832 drop_err = -EIO; 833 } 834 835 first->size -= txp->size; 836 slots++; 837 838 if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { 839 netdev_err(vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n", 840 txp->offset, txp->size); 841 xenvif_fatal_tx_err(vif); 842 return -EINVAL; 843 } 844 845 more_data = txp->flags & XEN_NETTXF_more_data; 846 847 if (!drop_err) 848 txp++; 849 850 } while (more_data); 851 852 if (drop_err) { 853 xenvif_tx_err(vif, first, cons + slots); 854 return drop_err; 855 } 856 857 return slots; 858 } 859 860 static struct page *xenvif_alloc_page(struct xenvif *vif, 861 u16 pending_idx) 862 { 863 struct page *page; 864 865 page = alloc_page(GFP_ATOMIC|__GFP_COLD); 866 if (!page) 867 return NULL; 868 vif->mmap_pages[pending_idx] = page; 869 870 return page; 871 } 872 873 static struct gnttab_copy *xenvif_get_requests(struct xenvif *vif, 874 struct sk_buff *skb, 875 struct xen_netif_tx_request *txp, 876 struct gnttab_copy *gop) 877 { 878 struct skb_shared_info *shinfo = skb_shinfo(skb); 879 skb_frag_t *frags = shinfo->frags; 880 u16 pending_idx = *((u16 *)skb->data); 881 u16 head_idx = 0; 882 int slot, start; 883 struct page *page; 884 pending_ring_idx_t index, start_idx = 0; 885 uint16_t dst_offset; 886 unsigned int nr_slots; 887 struct pending_tx_info *first = NULL; 888 889 /* At this point shinfo->nr_frags is in fact the number of 890 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. 891 */ 892 nr_slots = shinfo->nr_frags; 893 894 /* Skip first skb fragment if it is on same page as header fragment. */ 895 start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); 896 897 /* Coalesce tx requests, at this point the packet passed in 898 * should be <= 64K. Any packets larger than 64K have been 899 * handled in xenvif_count_requests(). 900 */ 901 for (shinfo->nr_frags = slot = start; slot < nr_slots; 902 shinfo->nr_frags++) { 903 struct pending_tx_info *pending_tx_info = 904 vif->pending_tx_info; 905 906 page = alloc_page(GFP_ATOMIC|__GFP_COLD); 907 if (!page) 908 goto err; 909 910 dst_offset = 0; 911 first = NULL; 912 while (dst_offset < PAGE_SIZE && slot < nr_slots) { 913 gop->flags = GNTCOPY_source_gref; 914 915 gop->source.u.ref = txp->gref; 916 gop->source.domid = vif->domid; 917 gop->source.offset = txp->offset; 918 919 gop->dest.domid = DOMID_SELF; 920 921 gop->dest.offset = dst_offset; 922 gop->dest.u.gmfn = virt_to_mfn(page_address(page)); 923 924 if (dst_offset + txp->size > PAGE_SIZE) { 925 /* This page can only merge a portion 926 * of tx request. Do not increment any 927 * pointer / counter here. The txp 928 * will be dealt with in future 929 * rounds, eventually hitting the 930 * `else` branch. 931 */ 932 gop->len = PAGE_SIZE - dst_offset; 933 txp->offset += gop->len; 934 txp->size -= gop->len; 935 dst_offset += gop->len; /* quit loop */ 936 } else { 937 /* This tx request can be merged in the page */ 938 gop->len = txp->size; 939 dst_offset += gop->len; 940 941 index = pending_index(vif->pending_cons++); 942 943 pending_idx = vif->pending_ring[index]; 944 945 memcpy(&pending_tx_info[pending_idx].req, txp, 946 sizeof(*txp)); 947 948 /* Poison these fields, corresponding 949 * fields for head tx req will be set 950 * to correct values after the loop. 951 */ 952 vif->mmap_pages[pending_idx] = (void *)(~0UL); 953 pending_tx_info[pending_idx].head = 954 INVALID_PENDING_RING_IDX; 955 956 if (!first) { 957 first = &pending_tx_info[pending_idx]; 958 start_idx = index; 959 head_idx = pending_idx; 960 } 961 962 txp++; 963 slot++; 964 } 965 966 gop++; 967 } 968 969 first->req.offset = 0; 970 first->req.size = dst_offset; 971 first->head = start_idx; 972 vif->mmap_pages[head_idx] = page; 973 frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx); 974 } 975 976 BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS); 977 978 return gop; 979 err: 980 /* Unwind, freeing all pages and sending error responses. */ 981 while (shinfo->nr_frags-- > start) { 982 xenvif_idx_release(vif, 983 frag_get_pending_idx(&frags[shinfo->nr_frags]), 984 XEN_NETIF_RSP_ERROR); 985 } 986 /* The head too, if necessary. */ 987 if (start) 988 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); 989 990 return NULL; 991 } 992 993 static int xenvif_tx_check_gop(struct xenvif *vif, 994 struct sk_buff *skb, 995 struct gnttab_copy **gopp) 996 { 997 struct gnttab_copy *gop = *gopp; 998 u16 pending_idx = *((u16 *)skb->data); 999 struct skb_shared_info *shinfo = skb_shinfo(skb); 1000 struct pending_tx_info *tx_info; 1001 int nr_frags = shinfo->nr_frags; 1002 int i, err, start; 1003 u16 peek; /* peek into next tx request */ 1004 1005 /* Check status of header. */ 1006 err = gop->status; 1007 if (unlikely(err)) 1008 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); 1009 1010 /* Skip first skb fragment if it is on same page as header fragment. */ 1011 start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); 1012 1013 for (i = start; i < nr_frags; i++) { 1014 int j, newerr; 1015 pending_ring_idx_t head; 1016 1017 pending_idx = frag_get_pending_idx(&shinfo->frags[i]); 1018 tx_info = &vif->pending_tx_info[pending_idx]; 1019 head = tx_info->head; 1020 1021 /* Check error status: if okay then remember grant handle. */ 1022 do { 1023 newerr = (++gop)->status; 1024 if (newerr) 1025 break; 1026 peek = vif->pending_ring[pending_index(++head)]; 1027 } while (!pending_tx_is_head(vif, peek)); 1028 1029 if (likely(!newerr)) { 1030 /* Had a previous error? Invalidate this fragment. */ 1031 if (unlikely(err)) 1032 xenvif_idx_release(vif, pending_idx, 1033 XEN_NETIF_RSP_OKAY); 1034 continue; 1035 } 1036 1037 /* Error on this fragment: respond to client with an error. */ 1038 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); 1039 1040 /* Not the first error? Preceding frags already invalidated. */ 1041 if (err) 1042 continue; 1043 1044 /* First error: invalidate header and preceding fragments. */ 1045 pending_idx = *((u16 *)skb->data); 1046 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY); 1047 for (j = start; j < i; j++) { 1048 pending_idx = frag_get_pending_idx(&shinfo->frags[j]); 1049 xenvif_idx_release(vif, pending_idx, 1050 XEN_NETIF_RSP_OKAY); 1051 } 1052 1053 /* Remember the error: invalidate all subsequent fragments. */ 1054 err = newerr; 1055 } 1056 1057 *gopp = gop + 1; 1058 return err; 1059 } 1060 1061 static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb) 1062 { 1063 struct skb_shared_info *shinfo = skb_shinfo(skb); 1064 int nr_frags = shinfo->nr_frags; 1065 int i; 1066 1067 for (i = 0; i < nr_frags; i++) { 1068 skb_frag_t *frag = shinfo->frags + i; 1069 struct xen_netif_tx_request *txp; 1070 struct page *page; 1071 u16 pending_idx; 1072 1073 pending_idx = frag_get_pending_idx(frag); 1074 1075 txp = &vif->pending_tx_info[pending_idx].req; 1076 page = virt_to_page(idx_to_kaddr(vif, pending_idx)); 1077 __skb_fill_page_desc(skb, i, page, txp->offset, txp->size); 1078 skb->len += txp->size; 1079 skb->data_len += txp->size; 1080 skb->truesize += txp->size; 1081 1082 /* Take an extra reference to offset xenvif_idx_release */ 1083 get_page(vif->mmap_pages[pending_idx]); 1084 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY); 1085 } 1086 } 1087 1088 static int xenvif_get_extras(struct xenvif *vif, 1089 struct xen_netif_extra_info *extras, 1090 int work_to_do) 1091 { 1092 struct xen_netif_extra_info extra; 1093 RING_IDX cons = vif->tx.req_cons; 1094 1095 do { 1096 if (unlikely(work_to_do-- <= 0)) { 1097 netdev_err(vif->dev, "Missing extra info\n"); 1098 xenvif_fatal_tx_err(vif); 1099 return -EBADR; 1100 } 1101 1102 memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons), 1103 sizeof(extra)); 1104 if (unlikely(!extra.type || 1105 extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { 1106 vif->tx.req_cons = ++cons; 1107 netdev_err(vif->dev, 1108 "Invalid extra type: %d\n", extra.type); 1109 xenvif_fatal_tx_err(vif); 1110 return -EINVAL; 1111 } 1112 1113 memcpy(&extras[extra.type - 1], &extra, sizeof(extra)); 1114 vif->tx.req_cons = ++cons; 1115 } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); 1116 1117 return work_to_do; 1118 } 1119 1120 static int xenvif_set_skb_gso(struct xenvif *vif, 1121 struct sk_buff *skb, 1122 struct xen_netif_extra_info *gso) 1123 { 1124 if (!gso->u.gso.size) { 1125 netdev_err(vif->dev, "GSO size must not be zero.\n"); 1126 xenvif_fatal_tx_err(vif); 1127 return -EINVAL; 1128 } 1129 1130 switch (gso->u.gso.type) { 1131 case XEN_NETIF_GSO_TYPE_TCPV4: 1132 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 1133 break; 1134 case XEN_NETIF_GSO_TYPE_TCPV6: 1135 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; 1136 break; 1137 default: 1138 netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type); 1139 xenvif_fatal_tx_err(vif); 1140 return -EINVAL; 1141 } 1142 1143 skb_shinfo(skb)->gso_size = gso->u.gso.size; 1144 1145 /* Header must be checked, and gso_segs computed. */ 1146 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; 1147 skb_shinfo(skb)->gso_segs = 0; 1148 1149 return 0; 1150 } 1151 1152 static inline int maybe_pull_tail(struct sk_buff *skb, unsigned int len, 1153 unsigned int max) 1154 { 1155 if (skb_headlen(skb) >= len) 1156 return 0; 1157 1158 /* If we need to pullup then pullup to the max, so we 1159 * won't need to do it again. 1160 */ 1161 if (max > skb->len) 1162 max = skb->len; 1163 1164 if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL) 1165 return -ENOMEM; 1166 1167 if (skb_headlen(skb) < len) 1168 return -EPROTO; 1169 1170 return 0; 1171 } 1172 1173 /* This value should be large enough to cover a tagged ethernet header plus 1174 * maximally sized IP and TCP or UDP headers. 1175 */ 1176 #define MAX_IP_HDR_LEN 128 1177 1178 static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb, 1179 int recalculate_partial_csum) 1180 { 1181 unsigned int off; 1182 bool fragment; 1183 int err; 1184 1185 fragment = false; 1186 1187 err = maybe_pull_tail(skb, 1188 sizeof(struct iphdr), 1189 MAX_IP_HDR_LEN); 1190 if (err < 0) 1191 goto out; 1192 1193 if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF)) 1194 fragment = true; 1195 1196 off = ip_hdrlen(skb); 1197 1198 err = -EPROTO; 1199 1200 if (fragment) 1201 goto out; 1202 1203 switch (ip_hdr(skb)->protocol) { 1204 case IPPROTO_TCP: 1205 err = maybe_pull_tail(skb, 1206 off + sizeof(struct tcphdr), 1207 MAX_IP_HDR_LEN); 1208 if (err < 0) 1209 goto out; 1210 1211 if (!skb_partial_csum_set(skb, off, 1212 offsetof(struct tcphdr, check))) 1213 goto out; 1214 1215 if (recalculate_partial_csum) 1216 tcp_hdr(skb)->check = 1217 ~csum_tcpudp_magic(ip_hdr(skb)->saddr, 1218 ip_hdr(skb)->daddr, 1219 skb->len - off, 1220 IPPROTO_TCP, 0); 1221 break; 1222 case IPPROTO_UDP: 1223 err = maybe_pull_tail(skb, 1224 off + sizeof(struct udphdr), 1225 MAX_IP_HDR_LEN); 1226 if (err < 0) 1227 goto out; 1228 1229 if (!skb_partial_csum_set(skb, off, 1230 offsetof(struct udphdr, check))) 1231 goto out; 1232 1233 if (recalculate_partial_csum) 1234 udp_hdr(skb)->check = 1235 ~csum_tcpudp_magic(ip_hdr(skb)->saddr, 1236 ip_hdr(skb)->daddr, 1237 skb->len - off, 1238 IPPROTO_UDP, 0); 1239 break; 1240 default: 1241 goto out; 1242 } 1243 1244 err = 0; 1245 1246 out: 1247 return err; 1248 } 1249 1250 /* This value should be large enough to cover a tagged ethernet header plus 1251 * an IPv6 header, all options, and a maximal TCP or UDP header. 1252 */ 1253 #define MAX_IPV6_HDR_LEN 256 1254 1255 #define OPT_HDR(type, skb, off) \ 1256 (type *)(skb_network_header(skb) + (off)) 1257 1258 static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb, 1259 int recalculate_partial_csum) 1260 { 1261 int err; 1262 u8 nexthdr; 1263 unsigned int off; 1264 unsigned int len; 1265 bool fragment; 1266 bool done; 1267 1268 fragment = false; 1269 done = false; 1270 1271 off = sizeof(struct ipv6hdr); 1272 1273 err = maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN); 1274 if (err < 0) 1275 goto out; 1276 1277 nexthdr = ipv6_hdr(skb)->nexthdr; 1278 1279 len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); 1280 while (off <= len && !done) { 1281 switch (nexthdr) { 1282 case IPPROTO_DSTOPTS: 1283 case IPPROTO_HOPOPTS: 1284 case IPPROTO_ROUTING: { 1285 struct ipv6_opt_hdr *hp; 1286 1287 err = maybe_pull_tail(skb, 1288 off + 1289 sizeof(struct ipv6_opt_hdr), 1290 MAX_IPV6_HDR_LEN); 1291 if (err < 0) 1292 goto out; 1293 1294 hp = OPT_HDR(struct ipv6_opt_hdr, skb, off); 1295 nexthdr = hp->nexthdr; 1296 off += ipv6_optlen(hp); 1297 break; 1298 } 1299 case IPPROTO_AH: { 1300 struct ip_auth_hdr *hp; 1301 1302 err = maybe_pull_tail(skb, 1303 off + 1304 sizeof(struct ip_auth_hdr), 1305 MAX_IPV6_HDR_LEN); 1306 if (err < 0) 1307 goto out; 1308 1309 hp = OPT_HDR(struct ip_auth_hdr, skb, off); 1310 nexthdr = hp->nexthdr; 1311 off += ipv6_authlen(hp); 1312 break; 1313 } 1314 case IPPROTO_FRAGMENT: { 1315 struct frag_hdr *hp; 1316 1317 err = maybe_pull_tail(skb, 1318 off + 1319 sizeof(struct frag_hdr), 1320 MAX_IPV6_HDR_LEN); 1321 if (err < 0) 1322 goto out; 1323 1324 hp = OPT_HDR(struct frag_hdr, skb, off); 1325 1326 if (hp->frag_off & htons(IP6_OFFSET | IP6_MF)) 1327 fragment = true; 1328 1329 nexthdr = hp->nexthdr; 1330 off += sizeof(struct frag_hdr); 1331 break; 1332 } 1333 default: 1334 done = true; 1335 break; 1336 } 1337 } 1338 1339 err = -EPROTO; 1340 1341 if (!done || fragment) 1342 goto out; 1343 1344 switch (nexthdr) { 1345 case IPPROTO_TCP: 1346 err = maybe_pull_tail(skb, 1347 off + sizeof(struct tcphdr), 1348 MAX_IPV6_HDR_LEN); 1349 if (err < 0) 1350 goto out; 1351 1352 if (!skb_partial_csum_set(skb, off, 1353 offsetof(struct tcphdr, check))) 1354 goto out; 1355 1356 if (recalculate_partial_csum) 1357 tcp_hdr(skb)->check = 1358 ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 1359 &ipv6_hdr(skb)->daddr, 1360 skb->len - off, 1361 IPPROTO_TCP, 0); 1362 break; 1363 case IPPROTO_UDP: 1364 err = maybe_pull_tail(skb, 1365 off + sizeof(struct udphdr), 1366 MAX_IPV6_HDR_LEN); 1367 if (err < 0) 1368 goto out; 1369 1370 if (!skb_partial_csum_set(skb, off, 1371 offsetof(struct udphdr, check))) 1372 goto out; 1373 1374 if (recalculate_partial_csum) 1375 udp_hdr(skb)->check = 1376 ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 1377 &ipv6_hdr(skb)->daddr, 1378 skb->len - off, 1379 IPPROTO_UDP, 0); 1380 break; 1381 default: 1382 goto out; 1383 } 1384 1385 err = 0; 1386 1387 out: 1388 return err; 1389 } 1390 1391 static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) 1392 { 1393 int err = -EPROTO; 1394 int recalculate_partial_csum = 0; 1395 1396 /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy 1397 * peers can fail to set NETRXF_csum_blank when sending a GSO 1398 * frame. In this case force the SKB to CHECKSUM_PARTIAL and 1399 * recalculate the partial checksum. 1400 */ 1401 if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { 1402 vif->rx_gso_checksum_fixup++; 1403 skb->ip_summed = CHECKSUM_PARTIAL; 1404 recalculate_partial_csum = 1; 1405 } 1406 1407 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ 1408 if (skb->ip_summed != CHECKSUM_PARTIAL) 1409 return 0; 1410 1411 if (skb->protocol == htons(ETH_P_IP)) 1412 err = checksum_setup_ip(vif, skb, recalculate_partial_csum); 1413 else if (skb->protocol == htons(ETH_P_IPV6)) 1414 err = checksum_setup_ipv6(vif, skb, recalculate_partial_csum); 1415 1416 return err; 1417 } 1418 1419 static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) 1420 { 1421 u64 now = get_jiffies_64(); 1422 u64 next_credit = vif->credit_window_start + 1423 msecs_to_jiffies(vif->credit_usec / 1000); 1424 1425 /* Timer could already be pending in rare cases. */ 1426 if (timer_pending(&vif->credit_timeout)) 1427 return true; 1428 1429 /* Passed the point where we can replenish credit? */ 1430 if (time_after_eq64(now, next_credit)) { 1431 vif->credit_window_start = now; 1432 tx_add_credit(vif); 1433 } 1434 1435 /* Still too big to send right now? Set a callback. */ 1436 if (size > vif->remaining_credit) { 1437 vif->credit_timeout.data = 1438 (unsigned long)vif; 1439 vif->credit_timeout.function = 1440 tx_credit_callback; 1441 mod_timer(&vif->credit_timeout, 1442 next_credit); 1443 vif->credit_window_start = next_credit; 1444 1445 return true; 1446 } 1447 1448 return false; 1449 } 1450 1451 static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget) 1452 { 1453 struct gnttab_copy *gop = vif->tx_copy_ops, *request_gop; 1454 struct sk_buff *skb; 1455 int ret; 1456 1457 while ((nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX 1458 < MAX_PENDING_REQS) && 1459 (skb_queue_len(&vif->tx_queue) < budget)) { 1460 struct xen_netif_tx_request txreq; 1461 struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX]; 1462 struct page *page; 1463 struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; 1464 u16 pending_idx; 1465 RING_IDX idx; 1466 int work_to_do; 1467 unsigned int data_len; 1468 pending_ring_idx_t index; 1469 1470 if (vif->tx.sring->req_prod - vif->tx.req_cons > 1471 XEN_NETIF_TX_RING_SIZE) { 1472 netdev_err(vif->dev, 1473 "Impossible number of requests. " 1474 "req_prod %d, req_cons %d, size %ld\n", 1475 vif->tx.sring->req_prod, vif->tx.req_cons, 1476 XEN_NETIF_TX_RING_SIZE); 1477 xenvif_fatal_tx_err(vif); 1478 continue; 1479 } 1480 1481 work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&vif->tx); 1482 if (!work_to_do) 1483 break; 1484 1485 idx = vif->tx.req_cons; 1486 rmb(); /* Ensure that we see the request before we copy it. */ 1487 memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq)); 1488 1489 /* Credit-based scheduling. */ 1490 if (txreq.size > vif->remaining_credit && 1491 tx_credit_exceeded(vif, txreq.size)) 1492 break; 1493 1494 vif->remaining_credit -= txreq.size; 1495 1496 work_to_do--; 1497 vif->tx.req_cons = ++idx; 1498 1499 memset(extras, 0, sizeof(extras)); 1500 if (txreq.flags & XEN_NETTXF_extra_info) { 1501 work_to_do = xenvif_get_extras(vif, extras, 1502 work_to_do); 1503 idx = vif->tx.req_cons; 1504 if (unlikely(work_to_do < 0)) 1505 break; 1506 } 1507 1508 ret = xenvif_count_requests(vif, &txreq, txfrags, work_to_do); 1509 if (unlikely(ret < 0)) 1510 break; 1511 1512 idx += ret; 1513 1514 if (unlikely(txreq.size < ETH_HLEN)) { 1515 netdev_dbg(vif->dev, 1516 "Bad packet size: %d\n", txreq.size); 1517 xenvif_tx_err(vif, &txreq, idx); 1518 break; 1519 } 1520 1521 /* No crossing a page as the payload mustn't fragment. */ 1522 if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) { 1523 netdev_err(vif->dev, 1524 "txreq.offset: %x, size: %u, end: %lu\n", 1525 txreq.offset, txreq.size, 1526 (txreq.offset&~PAGE_MASK) + txreq.size); 1527 xenvif_fatal_tx_err(vif); 1528 break; 1529 } 1530 1531 index = pending_index(vif->pending_cons); 1532 pending_idx = vif->pending_ring[index]; 1533 1534 data_len = (txreq.size > PKT_PROT_LEN && 1535 ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? 1536 PKT_PROT_LEN : txreq.size; 1537 1538 skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN, 1539 GFP_ATOMIC | __GFP_NOWARN); 1540 if (unlikely(skb == NULL)) { 1541 netdev_dbg(vif->dev, 1542 "Can't allocate a skb in start_xmit.\n"); 1543 xenvif_tx_err(vif, &txreq, idx); 1544 break; 1545 } 1546 1547 /* Packets passed to netif_rx() must have some headroom. */ 1548 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); 1549 1550 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { 1551 struct xen_netif_extra_info *gso; 1552 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; 1553 1554 if (xenvif_set_skb_gso(vif, skb, gso)) { 1555 /* Failure in xenvif_set_skb_gso is fatal. */ 1556 kfree_skb(skb); 1557 break; 1558 } 1559 } 1560 1561 /* XXX could copy straight to head */ 1562 page = xenvif_alloc_page(vif, pending_idx); 1563 if (!page) { 1564 kfree_skb(skb); 1565 xenvif_tx_err(vif, &txreq, idx); 1566 break; 1567 } 1568 1569 gop->source.u.ref = txreq.gref; 1570 gop->source.domid = vif->domid; 1571 gop->source.offset = txreq.offset; 1572 1573 gop->dest.u.gmfn = virt_to_mfn(page_address(page)); 1574 gop->dest.domid = DOMID_SELF; 1575 gop->dest.offset = txreq.offset; 1576 1577 gop->len = txreq.size; 1578 gop->flags = GNTCOPY_source_gref; 1579 1580 gop++; 1581 1582 memcpy(&vif->pending_tx_info[pending_idx].req, 1583 &txreq, sizeof(txreq)); 1584 vif->pending_tx_info[pending_idx].head = index; 1585 *((u16 *)skb->data) = pending_idx; 1586 1587 __skb_put(skb, data_len); 1588 1589 skb_shinfo(skb)->nr_frags = ret; 1590 if (data_len < txreq.size) { 1591 skb_shinfo(skb)->nr_frags++; 1592 frag_set_pending_idx(&skb_shinfo(skb)->frags[0], 1593 pending_idx); 1594 } else { 1595 frag_set_pending_idx(&skb_shinfo(skb)->frags[0], 1596 INVALID_PENDING_IDX); 1597 } 1598 1599 vif->pending_cons++; 1600 1601 request_gop = xenvif_get_requests(vif, skb, txfrags, gop); 1602 if (request_gop == NULL) { 1603 kfree_skb(skb); 1604 xenvif_tx_err(vif, &txreq, idx); 1605 break; 1606 } 1607 gop = request_gop; 1608 1609 __skb_queue_tail(&vif->tx_queue, skb); 1610 1611 vif->tx.req_cons = idx; 1612 1613 if ((gop-vif->tx_copy_ops) >= ARRAY_SIZE(vif->tx_copy_ops)) 1614 break; 1615 } 1616 1617 return gop - vif->tx_copy_ops; 1618 } 1619 1620 1621 static int xenvif_tx_submit(struct xenvif *vif) 1622 { 1623 struct gnttab_copy *gop = vif->tx_copy_ops; 1624 struct sk_buff *skb; 1625 int work_done = 0; 1626 1627 while ((skb = __skb_dequeue(&vif->tx_queue)) != NULL) { 1628 struct xen_netif_tx_request *txp; 1629 u16 pending_idx; 1630 unsigned data_len; 1631 1632 pending_idx = *((u16 *)skb->data); 1633 txp = &vif->pending_tx_info[pending_idx].req; 1634 1635 /* Check the remap error code. */ 1636 if (unlikely(xenvif_tx_check_gop(vif, skb, &gop))) { 1637 netdev_dbg(vif->dev, "netback grant failed.\n"); 1638 skb_shinfo(skb)->nr_frags = 0; 1639 kfree_skb(skb); 1640 continue; 1641 } 1642 1643 data_len = skb->len; 1644 memcpy(skb->data, 1645 (void *)(idx_to_kaddr(vif, pending_idx)|txp->offset), 1646 data_len); 1647 if (data_len < txp->size) { 1648 /* Append the packet payload as a fragment. */ 1649 txp->offset += data_len; 1650 txp->size -= data_len; 1651 } else { 1652 /* Schedule a response immediately. */ 1653 xenvif_idx_release(vif, pending_idx, 1654 XEN_NETIF_RSP_OKAY); 1655 } 1656 1657 if (txp->flags & XEN_NETTXF_csum_blank) 1658 skb->ip_summed = CHECKSUM_PARTIAL; 1659 else if (txp->flags & XEN_NETTXF_data_validated) 1660 skb->ip_summed = CHECKSUM_UNNECESSARY; 1661 1662 xenvif_fill_frags(vif, skb); 1663 1664 if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) { 1665 int target = min_t(int, skb->len, PKT_PROT_LEN); 1666 __pskb_pull_tail(skb, target - skb_headlen(skb)); 1667 } 1668 1669 skb->dev = vif->dev; 1670 skb->protocol = eth_type_trans(skb, skb->dev); 1671 skb_reset_network_header(skb); 1672 1673 if (checksum_setup(vif, skb)) { 1674 netdev_dbg(vif->dev, 1675 "Can't setup checksum in net_tx_action\n"); 1676 kfree_skb(skb); 1677 continue; 1678 } 1679 1680 skb_probe_transport_header(skb, 0); 1681 1682 vif->dev->stats.rx_bytes += skb->len; 1683 vif->dev->stats.rx_packets++; 1684 1685 work_done++; 1686 1687 netif_receive_skb(skb); 1688 } 1689 1690 return work_done; 1691 } 1692 1693 /* Called after netfront has transmitted */ 1694 int xenvif_tx_action(struct xenvif *vif, int budget) 1695 { 1696 unsigned nr_gops; 1697 int work_done; 1698 1699 if (unlikely(!tx_work_todo(vif))) 1700 return 0; 1701 1702 nr_gops = xenvif_tx_build_gops(vif, budget); 1703 1704 if (nr_gops == 0) 1705 return 0; 1706 1707 gnttab_batch_copy(vif->tx_copy_ops, nr_gops); 1708 1709 work_done = xenvif_tx_submit(vif); 1710 1711 return work_done; 1712 } 1713 1714 static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx, 1715 u8 status) 1716 { 1717 struct pending_tx_info *pending_tx_info; 1718 pending_ring_idx_t head; 1719 u16 peek; /* peek into next tx request */ 1720 1721 BUG_ON(vif->mmap_pages[pending_idx] == (void *)(~0UL)); 1722 1723 /* Already complete? */ 1724 if (vif->mmap_pages[pending_idx] == NULL) 1725 return; 1726 1727 pending_tx_info = &vif->pending_tx_info[pending_idx]; 1728 1729 head = pending_tx_info->head; 1730 1731 BUG_ON(!pending_tx_is_head(vif, head)); 1732 BUG_ON(vif->pending_ring[pending_index(head)] != pending_idx); 1733 1734 do { 1735 pending_ring_idx_t index; 1736 pending_ring_idx_t idx = pending_index(head); 1737 u16 info_idx = vif->pending_ring[idx]; 1738 1739 pending_tx_info = &vif->pending_tx_info[info_idx]; 1740 make_tx_response(vif, &pending_tx_info->req, status); 1741 1742 /* Setting any number other than 1743 * INVALID_PENDING_RING_IDX indicates this slot is 1744 * starting a new packet / ending a previous packet. 1745 */ 1746 pending_tx_info->head = 0; 1747 1748 index = pending_index(vif->pending_prod++); 1749 vif->pending_ring[index] = vif->pending_ring[info_idx]; 1750 1751 peek = vif->pending_ring[pending_index(++head)]; 1752 1753 } while (!pending_tx_is_head(vif, peek)); 1754 1755 put_page(vif->mmap_pages[pending_idx]); 1756 vif->mmap_pages[pending_idx] = NULL; 1757 } 1758 1759 1760 static void make_tx_response(struct xenvif *vif, 1761 struct xen_netif_tx_request *txp, 1762 s8 st) 1763 { 1764 RING_IDX i = vif->tx.rsp_prod_pvt; 1765 struct xen_netif_tx_response *resp; 1766 int notify; 1767 1768 resp = RING_GET_RESPONSE(&vif->tx, i); 1769 resp->id = txp->id; 1770 resp->status = st; 1771 1772 if (txp->flags & XEN_NETTXF_extra_info) 1773 RING_GET_RESPONSE(&vif->tx, ++i)->status = XEN_NETIF_RSP_NULL; 1774 1775 vif->tx.rsp_prod_pvt = ++i; 1776 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify); 1777 if (notify) 1778 notify_remote_via_irq(vif->tx_irq); 1779 } 1780 1781 static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif, 1782 u16 id, 1783 s8 st, 1784 u16 offset, 1785 u16 size, 1786 u16 flags) 1787 { 1788 RING_IDX i = vif->rx.rsp_prod_pvt; 1789 struct xen_netif_rx_response *resp; 1790 1791 resp = RING_GET_RESPONSE(&vif->rx, i); 1792 resp->offset = offset; 1793 resp->flags = flags; 1794 resp->id = id; 1795 resp->status = (s16)size; 1796 if (st < 0) 1797 resp->status = (s16)st; 1798 1799 vif->rx.rsp_prod_pvt = ++i; 1800 1801 return resp; 1802 } 1803 1804 static inline int rx_work_todo(struct xenvif *vif) 1805 { 1806 return !skb_queue_empty(&vif->rx_queue); 1807 } 1808 1809 static inline int tx_work_todo(struct xenvif *vif) 1810 { 1811 1812 if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)) && 1813 (nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX 1814 < MAX_PENDING_REQS)) 1815 return 1; 1816 1817 return 0; 1818 } 1819 1820 void xenvif_unmap_frontend_rings(struct xenvif *vif) 1821 { 1822 if (vif->tx.sring) 1823 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif), 1824 vif->tx.sring); 1825 if (vif->rx.sring) 1826 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif), 1827 vif->rx.sring); 1828 } 1829 1830 int xenvif_map_frontend_rings(struct xenvif *vif, 1831 grant_ref_t tx_ring_ref, 1832 grant_ref_t rx_ring_ref) 1833 { 1834 void *addr; 1835 struct xen_netif_tx_sring *txs; 1836 struct xen_netif_rx_sring *rxs; 1837 1838 int err = -ENOMEM; 1839 1840 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), 1841 tx_ring_ref, &addr); 1842 if (err) 1843 goto err; 1844 1845 txs = (struct xen_netif_tx_sring *)addr; 1846 BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE); 1847 1848 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), 1849 rx_ring_ref, &addr); 1850 if (err) 1851 goto err; 1852 1853 rxs = (struct xen_netif_rx_sring *)addr; 1854 BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE); 1855 1856 vif->rx_req_cons_peek = 0; 1857 1858 return 0; 1859 1860 err: 1861 xenvif_unmap_frontend_rings(vif); 1862 return err; 1863 } 1864 1865 int xenvif_kthread(void *data) 1866 { 1867 struct xenvif *vif = data; 1868 1869 while (!kthread_should_stop()) { 1870 wait_event_interruptible(vif->wq, 1871 rx_work_todo(vif) || 1872 kthread_should_stop()); 1873 if (kthread_should_stop()) 1874 break; 1875 1876 if (rx_work_todo(vif)) 1877 xenvif_rx_action(vif); 1878 1879 cond_resched(); 1880 } 1881 1882 return 0; 1883 } 1884 1885 static int __init netback_init(void) 1886 { 1887 int rc = 0; 1888 1889 if (!xen_domain()) 1890 return -ENODEV; 1891 1892 if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) { 1893 pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n", 1894 fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX); 1895 fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX; 1896 } 1897 1898 rc = xenvif_xenbus_init(); 1899 if (rc) 1900 goto failed_init; 1901 1902 return 0; 1903 1904 failed_init: 1905 return rc; 1906 } 1907 1908 module_init(netback_init); 1909 1910 static void __exit netback_fini(void) 1911 { 1912 xenvif_xenbus_fini(); 1913 } 1914 module_exit(netback_fini); 1915 1916 MODULE_LICENSE("Dual BSD/GPL"); 1917 MODULE_ALIAS("xen-backend:vif"); 1918