1 /* 2 * Back-end of the driver for virtual network devices. This portion of the 3 * driver exports a 'unified' network-device interface that can be accessed 4 * by any operating system that implements a compatible front end. A 5 * reference front-end implementation can be found in: 6 * drivers/net/xen-netfront.c 7 * 8 * Copyright (c) 2002-2005, K A Fraser 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License version 2 12 * as published by the Free Software Foundation; or, when distributed 13 * separately from the Linux kernel or incorporated into other 14 * software packages, subject to the following license: 15 * 16 * Permission is hereby granted, free of charge, to any person obtaining a copy 17 * of this source file (the "Software"), to deal in the Software without 18 * restriction, including without limitation the rights to use, copy, modify, 19 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 20 * and to permit persons to whom the Software is furnished to do so, subject to 21 * the following conditions: 22 * 23 * The above copyright notice and this permission notice shall be included in 24 * all copies or substantial portions of the Software. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 32 * IN THE SOFTWARE. 33 */ 34 35 #include "common.h" 36 37 #include <linux/kthread.h> 38 #include <linux/if_vlan.h> 39 #include <linux/udp.h> 40 41 #include <net/tcp.h> 42 43 #include <xen/xen.h> 44 #include <xen/events.h> 45 #include <xen/interface/memory.h> 46 47 #include <asm/xen/hypercall.h> 48 #include <asm/xen/page.h> 49 50 /* SKB control block overlay is used to store useful information when 51 * doing guest RX. 52 */ 53 struct skb_cb_overlay { 54 int meta_slots_used; 55 int peek_slots_count; 56 }; 57 58 /* Provide an option to disable split event channels at load time as 59 * event channels are limited resource. Split event channels are 60 * enabled by default. 61 */ 62 bool separate_tx_rx_irq = 1; 63 module_param(separate_tx_rx_irq, bool, 0644); 64 65 /* 66 * This is the maximum slots a skb can have. If a guest sends a skb 67 * which exceeds this limit it is considered malicious. 68 */ 69 #define FATAL_SKB_SLOTS_DEFAULT 20 70 static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT; 71 module_param(fatal_skb_slots, uint, 0444); 72 73 /* 74 * To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating 75 * the maximum slots a valid packet can use. Now this value is defined 76 * to be XEN_NETIF_NR_SLOTS_MIN, which is supposed to be supported by 77 * all backend. 78 */ 79 #define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN 80 81 /* 82 * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of 83 * one or more merged tx requests, otherwise it is the continuation of 84 * previous tx request. 85 */ 86 static inline int pending_tx_is_head(struct xenvif *vif, RING_IDX idx) 87 { 88 return vif->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX; 89 } 90 91 static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx, 92 u8 status); 93 94 static void make_tx_response(struct xenvif *vif, 95 struct xen_netif_tx_request *txp, 96 s8 st); 97 98 static inline int tx_work_todo(struct xenvif *vif); 99 static inline int rx_work_todo(struct xenvif *vif); 100 101 static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif, 102 u16 id, 103 s8 st, 104 u16 offset, 105 u16 size, 106 u16 flags); 107 108 static inline unsigned long idx_to_pfn(struct xenvif *vif, 109 u16 idx) 110 { 111 return page_to_pfn(vif->mmap_pages[idx]); 112 } 113 114 static inline unsigned long idx_to_kaddr(struct xenvif *vif, 115 u16 idx) 116 { 117 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx)); 118 } 119 120 /* 121 * This is the amount of packet we copy rather than map, so that the 122 * guest can't fiddle with the contents of the headers while we do 123 * packet processing on them (netfilter, routing, etc). 124 */ 125 #define PKT_PROT_LEN (ETH_HLEN + \ 126 VLAN_HLEN + \ 127 sizeof(struct iphdr) + MAX_IPOPTLEN + \ 128 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE) 129 130 static u16 frag_get_pending_idx(skb_frag_t *frag) 131 { 132 return (u16)frag->page_offset; 133 } 134 135 static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx) 136 { 137 frag->page_offset = pending_idx; 138 } 139 140 static inline pending_ring_idx_t pending_index(unsigned i) 141 { 142 return i & (MAX_PENDING_REQS-1); 143 } 144 145 static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif) 146 { 147 return MAX_PENDING_REQS - 148 vif->pending_prod + vif->pending_cons; 149 } 150 151 static int max_required_rx_slots(struct xenvif *vif) 152 { 153 int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE); 154 155 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ 156 if (vif->can_sg || vif->gso || vif->gso_prefix) 157 max += MAX_SKB_FRAGS + 1; /* extra_info + frags */ 158 159 return max; 160 } 161 162 int xenvif_rx_ring_full(struct xenvif *vif) 163 { 164 RING_IDX peek = vif->rx_req_cons_peek; 165 RING_IDX needed = max_required_rx_slots(vif); 166 167 return ((vif->rx.sring->req_prod - peek) < needed) || 168 ((vif->rx.rsp_prod_pvt + XEN_NETIF_RX_RING_SIZE - peek) < needed); 169 } 170 171 int xenvif_must_stop_queue(struct xenvif *vif) 172 { 173 if (!xenvif_rx_ring_full(vif)) 174 return 0; 175 176 vif->rx.sring->req_event = vif->rx_req_cons_peek + 177 max_required_rx_slots(vif); 178 mb(); /* request notification /then/ check the queue */ 179 180 return xenvif_rx_ring_full(vif); 181 } 182 183 /* 184 * Returns true if we should start a new receive buffer instead of 185 * adding 'size' bytes to a buffer which currently contains 'offset' 186 * bytes. 187 */ 188 static bool start_new_rx_buffer(int offset, unsigned long size, int head) 189 { 190 /* simple case: we have completely filled the current buffer. */ 191 if (offset == MAX_BUFFER_OFFSET) 192 return true; 193 194 /* 195 * complex case: start a fresh buffer if the current frag 196 * would overflow the current buffer but only if: 197 * (i) this frag would fit completely in the next buffer 198 * and (ii) there is already some data in the current buffer 199 * and (iii) this is not the head buffer. 200 * 201 * Where: 202 * - (i) stops us splitting a frag into two copies 203 * unless the frag is too large for a single buffer. 204 * - (ii) stops us from leaving a buffer pointlessly empty. 205 * - (iii) stops us leaving the first buffer 206 * empty. Strictly speaking this is already covered 207 * by (ii) but is explicitly checked because 208 * netfront relies on the first buffer being 209 * non-empty and can crash otherwise. 210 * 211 * This means we will effectively linearise small 212 * frags but do not needlessly split large buffers 213 * into multiple copies tend to give large frags their 214 * own buffers as before. 215 */ 216 if ((offset + size > MAX_BUFFER_OFFSET) && 217 (size <= MAX_BUFFER_OFFSET) && offset && !head) 218 return true; 219 220 return false; 221 } 222 223 /* 224 * Figure out how many ring slots we're going to need to send @skb to 225 * the guest. This function is essentially a dry run of 226 * xenvif_gop_frag_copy. 227 */ 228 unsigned int xenvif_count_skb_slots(struct xenvif *vif, struct sk_buff *skb) 229 { 230 unsigned int count; 231 int i, copy_off; 232 struct skb_cb_overlay *sco; 233 234 count = DIV_ROUND_UP(skb_headlen(skb), PAGE_SIZE); 235 236 copy_off = skb_headlen(skb) % PAGE_SIZE; 237 238 if (skb_shinfo(skb)->gso_size) 239 count++; 240 241 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 242 unsigned long size = skb_frag_size(&skb_shinfo(skb)->frags[i]); 243 unsigned long offset = skb_shinfo(skb)->frags[i].page_offset; 244 unsigned long bytes; 245 246 offset &= ~PAGE_MASK; 247 248 while (size > 0) { 249 BUG_ON(offset >= PAGE_SIZE); 250 BUG_ON(copy_off > MAX_BUFFER_OFFSET); 251 252 bytes = PAGE_SIZE - offset; 253 254 if (bytes > size) 255 bytes = size; 256 257 if (start_new_rx_buffer(copy_off, bytes, 0)) { 258 count++; 259 copy_off = 0; 260 } 261 262 if (copy_off + bytes > MAX_BUFFER_OFFSET) 263 bytes = MAX_BUFFER_OFFSET - copy_off; 264 265 copy_off += bytes; 266 267 offset += bytes; 268 size -= bytes; 269 270 if (offset == PAGE_SIZE) 271 offset = 0; 272 } 273 } 274 275 sco = (struct skb_cb_overlay *)skb->cb; 276 sco->peek_slots_count = count; 277 return count; 278 } 279 280 struct netrx_pending_operations { 281 unsigned copy_prod, copy_cons; 282 unsigned meta_prod, meta_cons; 283 struct gnttab_copy *copy; 284 struct xenvif_rx_meta *meta; 285 int copy_off; 286 grant_ref_t copy_gref; 287 }; 288 289 static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif, 290 struct netrx_pending_operations *npo) 291 { 292 struct xenvif_rx_meta *meta; 293 struct xen_netif_rx_request *req; 294 295 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 296 297 meta = npo->meta + npo->meta_prod++; 298 meta->gso_size = 0; 299 meta->size = 0; 300 meta->id = req->id; 301 302 npo->copy_off = 0; 303 npo->copy_gref = req->gref; 304 305 return meta; 306 } 307 308 /* Set up the grant operations for this fragment. */ 309 static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, 310 struct netrx_pending_operations *npo, 311 struct page *page, unsigned long size, 312 unsigned long offset, int head, int *first) 313 { 314 struct gnttab_copy *copy_gop; 315 struct xenvif_rx_meta *meta; 316 unsigned long bytes; 317 318 /* Data must not cross a page boundary. */ 319 BUG_ON(size + offset > PAGE_SIZE<<compound_order(page)); 320 321 meta = npo->meta + npo->meta_prod - 1; 322 323 /* Skip unused frames from start of page */ 324 page += offset >> PAGE_SHIFT; 325 offset &= ~PAGE_MASK; 326 327 while (size > 0) { 328 BUG_ON(offset >= PAGE_SIZE); 329 BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET); 330 331 bytes = PAGE_SIZE - offset; 332 333 if (bytes > size) 334 bytes = size; 335 336 if (start_new_rx_buffer(npo->copy_off, bytes, head)) { 337 /* 338 * Netfront requires there to be some data in the head 339 * buffer. 340 */ 341 BUG_ON(*first); 342 343 meta = get_next_rx_buffer(vif, npo); 344 } 345 346 if (npo->copy_off + bytes > MAX_BUFFER_OFFSET) 347 bytes = MAX_BUFFER_OFFSET - npo->copy_off; 348 349 copy_gop = npo->copy + npo->copy_prod++; 350 copy_gop->flags = GNTCOPY_dest_gref; 351 copy_gop->len = bytes; 352 353 copy_gop->source.domid = DOMID_SELF; 354 copy_gop->source.u.gmfn = virt_to_mfn(page_address(page)); 355 copy_gop->source.offset = offset; 356 357 copy_gop->dest.domid = vif->domid; 358 copy_gop->dest.offset = npo->copy_off; 359 copy_gop->dest.u.ref = npo->copy_gref; 360 361 npo->copy_off += bytes; 362 meta->size += bytes; 363 364 offset += bytes; 365 size -= bytes; 366 367 /* Next frame */ 368 if (offset == PAGE_SIZE && size) { 369 BUG_ON(!PageCompound(page)); 370 page++; 371 offset = 0; 372 } 373 374 /* Leave a gap for the GSO descriptor. */ 375 if (*first && skb_shinfo(skb)->gso_size && !vif->gso_prefix) 376 vif->rx.req_cons++; 377 378 *first = 0; /* There must be something in this buffer now. */ 379 380 } 381 } 382 383 /* 384 * Prepare an SKB to be transmitted to the frontend. 385 * 386 * This function is responsible for allocating grant operations, meta 387 * structures, etc. 388 * 389 * It returns the number of meta structures consumed. The number of 390 * ring slots used is always equal to the number of meta slots used 391 * plus the number of GSO descriptors used. Currently, we use either 392 * zero GSO descriptors (for non-GSO packets) or one descriptor (for 393 * frontend-side LRO). 394 */ 395 static int xenvif_gop_skb(struct sk_buff *skb, 396 struct netrx_pending_operations *npo) 397 { 398 struct xenvif *vif = netdev_priv(skb->dev); 399 int nr_frags = skb_shinfo(skb)->nr_frags; 400 int i; 401 struct xen_netif_rx_request *req; 402 struct xenvif_rx_meta *meta; 403 unsigned char *data; 404 int first = 1; 405 int old_meta_prod; 406 407 old_meta_prod = npo->meta_prod; 408 409 /* Set up a GSO prefix descriptor, if necessary */ 410 if (skb_shinfo(skb)->gso_size && vif->gso_prefix) { 411 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 412 meta = npo->meta + npo->meta_prod++; 413 meta->gso_size = skb_shinfo(skb)->gso_size; 414 meta->size = 0; 415 meta->id = req->id; 416 } 417 418 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 419 meta = npo->meta + npo->meta_prod++; 420 421 if (!vif->gso_prefix) 422 meta->gso_size = skb_shinfo(skb)->gso_size; 423 else 424 meta->gso_size = 0; 425 426 meta->size = 0; 427 meta->id = req->id; 428 npo->copy_off = 0; 429 npo->copy_gref = req->gref; 430 431 data = skb->data; 432 while (data < skb_tail_pointer(skb)) { 433 unsigned int offset = offset_in_page(data); 434 unsigned int len = PAGE_SIZE - offset; 435 436 if (data + len > skb_tail_pointer(skb)) 437 len = skb_tail_pointer(skb) - data; 438 439 xenvif_gop_frag_copy(vif, skb, npo, 440 virt_to_page(data), len, offset, 1, &first); 441 data += len; 442 } 443 444 for (i = 0; i < nr_frags; i++) { 445 xenvif_gop_frag_copy(vif, skb, npo, 446 skb_frag_page(&skb_shinfo(skb)->frags[i]), 447 skb_frag_size(&skb_shinfo(skb)->frags[i]), 448 skb_shinfo(skb)->frags[i].page_offset, 449 0, &first); 450 } 451 452 return npo->meta_prod - old_meta_prod; 453 } 454 455 /* 456 * This is a twin to xenvif_gop_skb. Assume that xenvif_gop_skb was 457 * used to set up the operations on the top of 458 * netrx_pending_operations, which have since been done. Check that 459 * they didn't give any errors and advance over them. 460 */ 461 static int xenvif_check_gop(struct xenvif *vif, int nr_meta_slots, 462 struct netrx_pending_operations *npo) 463 { 464 struct gnttab_copy *copy_op; 465 int status = XEN_NETIF_RSP_OKAY; 466 int i; 467 468 for (i = 0; i < nr_meta_slots; i++) { 469 copy_op = npo->copy + npo->copy_cons++; 470 if (copy_op->status != GNTST_okay) { 471 netdev_dbg(vif->dev, 472 "Bad status %d from copy to DOM%d.\n", 473 copy_op->status, vif->domid); 474 status = XEN_NETIF_RSP_ERROR; 475 } 476 } 477 478 return status; 479 } 480 481 static void xenvif_add_frag_responses(struct xenvif *vif, int status, 482 struct xenvif_rx_meta *meta, 483 int nr_meta_slots) 484 { 485 int i; 486 unsigned long offset; 487 488 /* No fragments used */ 489 if (nr_meta_slots <= 1) 490 return; 491 492 nr_meta_slots--; 493 494 for (i = 0; i < nr_meta_slots; i++) { 495 int flags; 496 if (i == nr_meta_slots - 1) 497 flags = 0; 498 else 499 flags = XEN_NETRXF_more_data; 500 501 offset = 0; 502 make_rx_response(vif, meta[i].id, status, offset, 503 meta[i].size, flags); 504 } 505 } 506 507 static void xenvif_kick_thread(struct xenvif *vif) 508 { 509 wake_up(&vif->wq); 510 } 511 512 void xenvif_rx_action(struct xenvif *vif) 513 { 514 s8 status; 515 u16 flags; 516 struct xen_netif_rx_response *resp; 517 struct sk_buff_head rxq; 518 struct sk_buff *skb; 519 LIST_HEAD(notify); 520 int ret; 521 int nr_frags; 522 int count; 523 unsigned long offset; 524 struct skb_cb_overlay *sco; 525 int need_to_notify = 0; 526 527 struct netrx_pending_operations npo = { 528 .copy = vif->grant_copy_op, 529 .meta = vif->meta, 530 }; 531 532 skb_queue_head_init(&rxq); 533 534 count = 0; 535 536 while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) { 537 RING_IDX old_rx_req_cons; 538 539 vif = netdev_priv(skb->dev); 540 nr_frags = skb_shinfo(skb)->nr_frags; 541 542 old_rx_req_cons = vif->rx.req_cons; 543 sco = (struct skb_cb_overlay *)skb->cb; 544 sco->meta_slots_used = xenvif_gop_skb(skb, &npo); 545 546 count += vif->rx.req_cons - old_rx_req_cons; 547 548 __skb_queue_tail(&rxq, skb); 549 550 skb = skb_peek(&vif->rx_queue); 551 if (skb == NULL) 552 break; 553 sco = (struct skb_cb_overlay *)skb->cb; 554 555 /* Filled the batch queue? */ 556 if (count + sco->peek_slots_count >= XEN_NETIF_RX_RING_SIZE) 557 break; 558 } 559 560 BUG_ON(npo.meta_prod > ARRAY_SIZE(vif->meta)); 561 562 if (!npo.copy_prod) 563 return; 564 565 BUG_ON(npo.copy_prod > ARRAY_SIZE(vif->grant_copy_op)); 566 gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod); 567 568 while ((skb = __skb_dequeue(&rxq)) != NULL) { 569 sco = (struct skb_cb_overlay *)skb->cb; 570 571 vif = netdev_priv(skb->dev); 572 573 if (vif->meta[npo.meta_cons].gso_size && vif->gso_prefix) { 574 resp = RING_GET_RESPONSE(&vif->rx, 575 vif->rx.rsp_prod_pvt++); 576 577 resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data; 578 579 resp->offset = vif->meta[npo.meta_cons].gso_size; 580 resp->id = vif->meta[npo.meta_cons].id; 581 resp->status = sco->meta_slots_used; 582 583 npo.meta_cons++; 584 sco->meta_slots_used--; 585 } 586 587 588 vif->dev->stats.tx_bytes += skb->len; 589 vif->dev->stats.tx_packets++; 590 591 status = xenvif_check_gop(vif, sco->meta_slots_used, &npo); 592 593 if (sco->meta_slots_used == 1) 594 flags = 0; 595 else 596 flags = XEN_NETRXF_more_data; 597 598 if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ 599 flags |= XEN_NETRXF_csum_blank | XEN_NETRXF_data_validated; 600 else if (skb->ip_summed == CHECKSUM_UNNECESSARY) 601 /* remote but checksummed. */ 602 flags |= XEN_NETRXF_data_validated; 603 604 offset = 0; 605 resp = make_rx_response(vif, vif->meta[npo.meta_cons].id, 606 status, offset, 607 vif->meta[npo.meta_cons].size, 608 flags); 609 610 if (vif->meta[npo.meta_cons].gso_size && !vif->gso_prefix) { 611 struct xen_netif_extra_info *gso = 612 (struct xen_netif_extra_info *) 613 RING_GET_RESPONSE(&vif->rx, 614 vif->rx.rsp_prod_pvt++); 615 616 resp->flags |= XEN_NETRXF_extra_info; 617 618 gso->u.gso.size = vif->meta[npo.meta_cons].gso_size; 619 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; 620 gso->u.gso.pad = 0; 621 gso->u.gso.features = 0; 622 623 gso->type = XEN_NETIF_EXTRA_TYPE_GSO; 624 gso->flags = 0; 625 } 626 627 xenvif_add_frag_responses(vif, status, 628 vif->meta + npo.meta_cons + 1, 629 sco->meta_slots_used); 630 631 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret); 632 633 if (ret) 634 need_to_notify = 1; 635 636 xenvif_notify_tx_completion(vif); 637 638 npo.meta_cons += sco->meta_slots_used; 639 dev_kfree_skb(skb); 640 } 641 642 if (need_to_notify) 643 notify_remote_via_irq(vif->rx_irq); 644 645 /* More work to do? */ 646 if (!skb_queue_empty(&vif->rx_queue)) 647 xenvif_kick_thread(vif); 648 } 649 650 void xenvif_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb) 651 { 652 skb_queue_tail(&vif->rx_queue, skb); 653 654 xenvif_kick_thread(vif); 655 } 656 657 void xenvif_check_rx_xenvif(struct xenvif *vif) 658 { 659 int more_to_do; 660 661 RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do); 662 663 if (more_to_do) 664 napi_schedule(&vif->napi); 665 } 666 667 static void tx_add_credit(struct xenvif *vif) 668 { 669 unsigned long max_burst, max_credit; 670 671 /* 672 * Allow a burst big enough to transmit a jumbo packet of up to 128kB. 673 * Otherwise the interface can seize up due to insufficient credit. 674 */ 675 max_burst = RING_GET_REQUEST(&vif->tx, vif->tx.req_cons)->size; 676 max_burst = min(max_burst, 131072UL); 677 max_burst = max(max_burst, vif->credit_bytes); 678 679 /* Take care that adding a new chunk of credit doesn't wrap to zero. */ 680 max_credit = vif->remaining_credit + vif->credit_bytes; 681 if (max_credit < vif->remaining_credit) 682 max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */ 683 684 vif->remaining_credit = min(max_credit, max_burst); 685 } 686 687 static void tx_credit_callback(unsigned long data) 688 { 689 struct xenvif *vif = (struct xenvif *)data; 690 tx_add_credit(vif); 691 xenvif_check_rx_xenvif(vif); 692 } 693 694 static void xenvif_tx_err(struct xenvif *vif, 695 struct xen_netif_tx_request *txp, RING_IDX end) 696 { 697 RING_IDX cons = vif->tx.req_cons; 698 699 do { 700 make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR); 701 if (cons == end) 702 break; 703 txp = RING_GET_REQUEST(&vif->tx, cons++); 704 } while (1); 705 vif->tx.req_cons = cons; 706 } 707 708 static void xenvif_fatal_tx_err(struct xenvif *vif) 709 { 710 netdev_err(vif->dev, "fatal error; disabling device\n"); 711 xenvif_carrier_off(vif); 712 } 713 714 static int xenvif_count_requests(struct xenvif *vif, 715 struct xen_netif_tx_request *first, 716 struct xen_netif_tx_request *txp, 717 int work_to_do) 718 { 719 RING_IDX cons = vif->tx.req_cons; 720 int slots = 0; 721 int drop_err = 0; 722 int more_data; 723 724 if (!(first->flags & XEN_NETTXF_more_data)) 725 return 0; 726 727 do { 728 struct xen_netif_tx_request dropped_tx = { 0 }; 729 730 if (slots >= work_to_do) { 731 netdev_err(vif->dev, 732 "Asked for %d slots but exceeds this limit\n", 733 work_to_do); 734 xenvif_fatal_tx_err(vif); 735 return -ENODATA; 736 } 737 738 /* This guest is really using too many slots and 739 * considered malicious. 740 */ 741 if (unlikely(slots >= fatal_skb_slots)) { 742 netdev_err(vif->dev, 743 "Malicious frontend using %d slots, threshold %u\n", 744 slots, fatal_skb_slots); 745 xenvif_fatal_tx_err(vif); 746 return -E2BIG; 747 } 748 749 /* Xen network protocol had implicit dependency on 750 * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to 751 * the historical MAX_SKB_FRAGS value 18 to honor the 752 * same behavior as before. Any packet using more than 753 * 18 slots but less than fatal_skb_slots slots is 754 * dropped 755 */ 756 if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) { 757 if (net_ratelimit()) 758 netdev_dbg(vif->dev, 759 "Too many slots (%d) exceeding limit (%d), dropping packet\n", 760 slots, XEN_NETBK_LEGACY_SLOTS_MAX); 761 drop_err = -E2BIG; 762 } 763 764 if (drop_err) 765 txp = &dropped_tx; 766 767 memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots), 768 sizeof(*txp)); 769 770 /* If the guest submitted a frame >= 64 KiB then 771 * first->size overflowed and following slots will 772 * appear to be larger than the frame. 773 * 774 * This cannot be fatal error as there are buggy 775 * frontends that do this. 776 * 777 * Consume all slots and drop the packet. 778 */ 779 if (!drop_err && txp->size > first->size) { 780 if (net_ratelimit()) 781 netdev_dbg(vif->dev, 782 "Invalid tx request, slot size %u > remaining size %u\n", 783 txp->size, first->size); 784 drop_err = -EIO; 785 } 786 787 first->size -= txp->size; 788 slots++; 789 790 if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { 791 netdev_err(vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n", 792 txp->offset, txp->size); 793 xenvif_fatal_tx_err(vif); 794 return -EINVAL; 795 } 796 797 more_data = txp->flags & XEN_NETTXF_more_data; 798 799 if (!drop_err) 800 txp++; 801 802 } while (more_data); 803 804 if (drop_err) { 805 xenvif_tx_err(vif, first, cons + slots); 806 return drop_err; 807 } 808 809 return slots; 810 } 811 812 static struct page *xenvif_alloc_page(struct xenvif *vif, 813 u16 pending_idx) 814 { 815 struct page *page; 816 817 page = alloc_page(GFP_ATOMIC|__GFP_COLD); 818 if (!page) 819 return NULL; 820 vif->mmap_pages[pending_idx] = page; 821 822 return page; 823 } 824 825 static struct gnttab_copy *xenvif_get_requests(struct xenvif *vif, 826 struct sk_buff *skb, 827 struct xen_netif_tx_request *txp, 828 struct gnttab_copy *gop) 829 { 830 struct skb_shared_info *shinfo = skb_shinfo(skb); 831 skb_frag_t *frags = shinfo->frags; 832 u16 pending_idx = *((u16 *)skb->data); 833 u16 head_idx = 0; 834 int slot, start; 835 struct page *page; 836 pending_ring_idx_t index, start_idx = 0; 837 uint16_t dst_offset; 838 unsigned int nr_slots; 839 struct pending_tx_info *first = NULL; 840 841 /* At this point shinfo->nr_frags is in fact the number of 842 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. 843 */ 844 nr_slots = shinfo->nr_frags; 845 846 /* Skip first skb fragment if it is on same page as header fragment. */ 847 start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); 848 849 /* Coalesce tx requests, at this point the packet passed in 850 * should be <= 64K. Any packets larger than 64K have been 851 * handled in xenvif_count_requests(). 852 */ 853 for (shinfo->nr_frags = slot = start; slot < nr_slots; 854 shinfo->nr_frags++) { 855 struct pending_tx_info *pending_tx_info = 856 vif->pending_tx_info; 857 858 page = alloc_page(GFP_ATOMIC|__GFP_COLD); 859 if (!page) 860 goto err; 861 862 dst_offset = 0; 863 first = NULL; 864 while (dst_offset < PAGE_SIZE && slot < nr_slots) { 865 gop->flags = GNTCOPY_source_gref; 866 867 gop->source.u.ref = txp->gref; 868 gop->source.domid = vif->domid; 869 gop->source.offset = txp->offset; 870 871 gop->dest.domid = DOMID_SELF; 872 873 gop->dest.offset = dst_offset; 874 gop->dest.u.gmfn = virt_to_mfn(page_address(page)); 875 876 if (dst_offset + txp->size > PAGE_SIZE) { 877 /* This page can only merge a portion 878 * of tx request. Do not increment any 879 * pointer / counter here. The txp 880 * will be dealt with in future 881 * rounds, eventually hitting the 882 * `else` branch. 883 */ 884 gop->len = PAGE_SIZE - dst_offset; 885 txp->offset += gop->len; 886 txp->size -= gop->len; 887 dst_offset += gop->len; /* quit loop */ 888 } else { 889 /* This tx request can be merged in the page */ 890 gop->len = txp->size; 891 dst_offset += gop->len; 892 893 index = pending_index(vif->pending_cons++); 894 895 pending_idx = vif->pending_ring[index]; 896 897 memcpy(&pending_tx_info[pending_idx].req, txp, 898 sizeof(*txp)); 899 900 /* Poison these fields, corresponding 901 * fields for head tx req will be set 902 * to correct values after the loop. 903 */ 904 vif->mmap_pages[pending_idx] = (void *)(~0UL); 905 pending_tx_info[pending_idx].head = 906 INVALID_PENDING_RING_IDX; 907 908 if (!first) { 909 first = &pending_tx_info[pending_idx]; 910 start_idx = index; 911 head_idx = pending_idx; 912 } 913 914 txp++; 915 slot++; 916 } 917 918 gop++; 919 } 920 921 first->req.offset = 0; 922 first->req.size = dst_offset; 923 first->head = start_idx; 924 vif->mmap_pages[head_idx] = page; 925 frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx); 926 } 927 928 BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS); 929 930 return gop; 931 err: 932 /* Unwind, freeing all pages and sending error responses. */ 933 while (shinfo->nr_frags-- > start) { 934 xenvif_idx_release(vif, 935 frag_get_pending_idx(&frags[shinfo->nr_frags]), 936 XEN_NETIF_RSP_ERROR); 937 } 938 /* The head too, if necessary. */ 939 if (start) 940 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); 941 942 return NULL; 943 } 944 945 static int xenvif_tx_check_gop(struct xenvif *vif, 946 struct sk_buff *skb, 947 struct gnttab_copy **gopp) 948 { 949 struct gnttab_copy *gop = *gopp; 950 u16 pending_idx = *((u16 *)skb->data); 951 struct skb_shared_info *shinfo = skb_shinfo(skb); 952 struct pending_tx_info *tx_info; 953 int nr_frags = shinfo->nr_frags; 954 int i, err, start; 955 u16 peek; /* peek into next tx request */ 956 957 /* Check status of header. */ 958 err = gop->status; 959 if (unlikely(err)) 960 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); 961 962 /* Skip first skb fragment if it is on same page as header fragment. */ 963 start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); 964 965 for (i = start; i < nr_frags; i++) { 966 int j, newerr; 967 pending_ring_idx_t head; 968 969 pending_idx = frag_get_pending_idx(&shinfo->frags[i]); 970 tx_info = &vif->pending_tx_info[pending_idx]; 971 head = tx_info->head; 972 973 /* Check error status: if okay then remember grant handle. */ 974 do { 975 newerr = (++gop)->status; 976 if (newerr) 977 break; 978 peek = vif->pending_ring[pending_index(++head)]; 979 } while (!pending_tx_is_head(vif, peek)); 980 981 if (likely(!newerr)) { 982 /* Had a previous error? Invalidate this fragment. */ 983 if (unlikely(err)) 984 xenvif_idx_release(vif, pending_idx, 985 XEN_NETIF_RSP_OKAY); 986 continue; 987 } 988 989 /* Error on this fragment: respond to client with an error. */ 990 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); 991 992 /* Not the first error? Preceding frags already invalidated. */ 993 if (err) 994 continue; 995 996 /* First error: invalidate header and preceding fragments. */ 997 pending_idx = *((u16 *)skb->data); 998 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY); 999 for (j = start; j < i; j++) { 1000 pending_idx = frag_get_pending_idx(&shinfo->frags[j]); 1001 xenvif_idx_release(vif, pending_idx, 1002 XEN_NETIF_RSP_OKAY); 1003 } 1004 1005 /* Remember the error: invalidate all subsequent fragments. */ 1006 err = newerr; 1007 } 1008 1009 *gopp = gop + 1; 1010 return err; 1011 } 1012 1013 static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb) 1014 { 1015 struct skb_shared_info *shinfo = skb_shinfo(skb); 1016 int nr_frags = shinfo->nr_frags; 1017 int i; 1018 1019 for (i = 0; i < nr_frags; i++) { 1020 skb_frag_t *frag = shinfo->frags + i; 1021 struct xen_netif_tx_request *txp; 1022 struct page *page; 1023 u16 pending_idx; 1024 1025 pending_idx = frag_get_pending_idx(frag); 1026 1027 txp = &vif->pending_tx_info[pending_idx].req; 1028 page = virt_to_page(idx_to_kaddr(vif, pending_idx)); 1029 __skb_fill_page_desc(skb, i, page, txp->offset, txp->size); 1030 skb->len += txp->size; 1031 skb->data_len += txp->size; 1032 skb->truesize += txp->size; 1033 1034 /* Take an extra reference to offset xenvif_idx_release */ 1035 get_page(vif->mmap_pages[pending_idx]); 1036 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY); 1037 } 1038 } 1039 1040 static int xenvif_get_extras(struct xenvif *vif, 1041 struct xen_netif_extra_info *extras, 1042 int work_to_do) 1043 { 1044 struct xen_netif_extra_info extra; 1045 RING_IDX cons = vif->tx.req_cons; 1046 1047 do { 1048 if (unlikely(work_to_do-- <= 0)) { 1049 netdev_err(vif->dev, "Missing extra info\n"); 1050 xenvif_fatal_tx_err(vif); 1051 return -EBADR; 1052 } 1053 1054 memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons), 1055 sizeof(extra)); 1056 if (unlikely(!extra.type || 1057 extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { 1058 vif->tx.req_cons = ++cons; 1059 netdev_err(vif->dev, 1060 "Invalid extra type: %d\n", extra.type); 1061 xenvif_fatal_tx_err(vif); 1062 return -EINVAL; 1063 } 1064 1065 memcpy(&extras[extra.type - 1], &extra, sizeof(extra)); 1066 vif->tx.req_cons = ++cons; 1067 } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); 1068 1069 return work_to_do; 1070 } 1071 1072 static int xenvif_set_skb_gso(struct xenvif *vif, 1073 struct sk_buff *skb, 1074 struct xen_netif_extra_info *gso) 1075 { 1076 if (!gso->u.gso.size) { 1077 netdev_err(vif->dev, "GSO size must not be zero.\n"); 1078 xenvif_fatal_tx_err(vif); 1079 return -EINVAL; 1080 } 1081 1082 /* Currently only TCPv4 S.O. is supported. */ 1083 if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { 1084 netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type); 1085 xenvif_fatal_tx_err(vif); 1086 return -EINVAL; 1087 } 1088 1089 skb_shinfo(skb)->gso_size = gso->u.gso.size; 1090 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 1091 1092 /* Header must be checked, and gso_segs computed. */ 1093 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; 1094 skb_shinfo(skb)->gso_segs = 0; 1095 1096 return 0; 1097 } 1098 1099 static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) 1100 { 1101 struct iphdr *iph; 1102 int err = -EPROTO; 1103 int recalculate_partial_csum = 0; 1104 1105 /* 1106 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy 1107 * peers can fail to set NETRXF_csum_blank when sending a GSO 1108 * frame. In this case force the SKB to CHECKSUM_PARTIAL and 1109 * recalculate the partial checksum. 1110 */ 1111 if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { 1112 vif->rx_gso_checksum_fixup++; 1113 skb->ip_summed = CHECKSUM_PARTIAL; 1114 recalculate_partial_csum = 1; 1115 } 1116 1117 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ 1118 if (skb->ip_summed != CHECKSUM_PARTIAL) 1119 return 0; 1120 1121 if (skb->protocol != htons(ETH_P_IP)) 1122 goto out; 1123 1124 iph = (void *)skb->data; 1125 switch (iph->protocol) { 1126 case IPPROTO_TCP: 1127 if (!skb_partial_csum_set(skb, 4 * iph->ihl, 1128 offsetof(struct tcphdr, check))) 1129 goto out; 1130 1131 if (recalculate_partial_csum) { 1132 struct tcphdr *tcph = tcp_hdr(skb); 1133 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 1134 skb->len - iph->ihl*4, 1135 IPPROTO_TCP, 0); 1136 } 1137 break; 1138 case IPPROTO_UDP: 1139 if (!skb_partial_csum_set(skb, 4 * iph->ihl, 1140 offsetof(struct udphdr, check))) 1141 goto out; 1142 1143 if (recalculate_partial_csum) { 1144 struct udphdr *udph = udp_hdr(skb); 1145 udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 1146 skb->len - iph->ihl*4, 1147 IPPROTO_UDP, 0); 1148 } 1149 break; 1150 default: 1151 if (net_ratelimit()) 1152 netdev_err(vif->dev, 1153 "Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n", 1154 iph->protocol); 1155 goto out; 1156 } 1157 1158 err = 0; 1159 1160 out: 1161 return err; 1162 } 1163 1164 static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) 1165 { 1166 unsigned long now = jiffies; 1167 unsigned long next_credit = 1168 vif->credit_timeout.expires + 1169 msecs_to_jiffies(vif->credit_usec / 1000); 1170 1171 /* Timer could already be pending in rare cases. */ 1172 if (timer_pending(&vif->credit_timeout)) 1173 return true; 1174 1175 /* Passed the point where we can replenish credit? */ 1176 if (time_after_eq(now, next_credit)) { 1177 vif->credit_timeout.expires = now; 1178 tx_add_credit(vif); 1179 } 1180 1181 /* Still too big to send right now? Set a callback. */ 1182 if (size > vif->remaining_credit) { 1183 vif->credit_timeout.data = 1184 (unsigned long)vif; 1185 vif->credit_timeout.function = 1186 tx_credit_callback; 1187 mod_timer(&vif->credit_timeout, 1188 next_credit); 1189 1190 return true; 1191 } 1192 1193 return false; 1194 } 1195 1196 static unsigned xenvif_tx_build_gops(struct xenvif *vif) 1197 { 1198 struct gnttab_copy *gop = vif->tx_copy_ops, *request_gop; 1199 struct sk_buff *skb; 1200 int ret; 1201 1202 while ((nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX 1203 < MAX_PENDING_REQS)) { 1204 struct xen_netif_tx_request txreq; 1205 struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX]; 1206 struct page *page; 1207 struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; 1208 u16 pending_idx; 1209 RING_IDX idx; 1210 int work_to_do; 1211 unsigned int data_len; 1212 pending_ring_idx_t index; 1213 1214 if (vif->tx.sring->req_prod - vif->tx.req_cons > 1215 XEN_NETIF_TX_RING_SIZE) { 1216 netdev_err(vif->dev, 1217 "Impossible number of requests. " 1218 "req_prod %d, req_cons %d, size %ld\n", 1219 vif->tx.sring->req_prod, vif->tx.req_cons, 1220 XEN_NETIF_TX_RING_SIZE); 1221 xenvif_fatal_tx_err(vif); 1222 continue; 1223 } 1224 1225 RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, work_to_do); 1226 if (!work_to_do) 1227 break; 1228 1229 idx = vif->tx.req_cons; 1230 rmb(); /* Ensure that we see the request before we copy it. */ 1231 memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq)); 1232 1233 /* Credit-based scheduling. */ 1234 if (txreq.size > vif->remaining_credit && 1235 tx_credit_exceeded(vif, txreq.size)) 1236 break; 1237 1238 vif->remaining_credit -= txreq.size; 1239 1240 work_to_do--; 1241 vif->tx.req_cons = ++idx; 1242 1243 memset(extras, 0, sizeof(extras)); 1244 if (txreq.flags & XEN_NETTXF_extra_info) { 1245 work_to_do = xenvif_get_extras(vif, extras, 1246 work_to_do); 1247 idx = vif->tx.req_cons; 1248 if (unlikely(work_to_do < 0)) 1249 break; 1250 } 1251 1252 ret = xenvif_count_requests(vif, &txreq, txfrags, work_to_do); 1253 if (unlikely(ret < 0)) 1254 break; 1255 1256 idx += ret; 1257 1258 if (unlikely(txreq.size < ETH_HLEN)) { 1259 netdev_dbg(vif->dev, 1260 "Bad packet size: %d\n", txreq.size); 1261 xenvif_tx_err(vif, &txreq, idx); 1262 break; 1263 } 1264 1265 /* No crossing a page as the payload mustn't fragment. */ 1266 if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) { 1267 netdev_err(vif->dev, 1268 "txreq.offset: %x, size: %u, end: %lu\n", 1269 txreq.offset, txreq.size, 1270 (txreq.offset&~PAGE_MASK) + txreq.size); 1271 xenvif_fatal_tx_err(vif); 1272 break; 1273 } 1274 1275 index = pending_index(vif->pending_cons); 1276 pending_idx = vif->pending_ring[index]; 1277 1278 data_len = (txreq.size > PKT_PROT_LEN && 1279 ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? 1280 PKT_PROT_LEN : txreq.size; 1281 1282 skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN, 1283 GFP_ATOMIC | __GFP_NOWARN); 1284 if (unlikely(skb == NULL)) { 1285 netdev_dbg(vif->dev, 1286 "Can't allocate a skb in start_xmit.\n"); 1287 xenvif_tx_err(vif, &txreq, idx); 1288 break; 1289 } 1290 1291 /* Packets passed to netif_rx() must have some headroom. */ 1292 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); 1293 1294 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { 1295 struct xen_netif_extra_info *gso; 1296 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; 1297 1298 if (xenvif_set_skb_gso(vif, skb, gso)) { 1299 /* Failure in xenvif_set_skb_gso is fatal. */ 1300 kfree_skb(skb); 1301 break; 1302 } 1303 } 1304 1305 /* XXX could copy straight to head */ 1306 page = xenvif_alloc_page(vif, pending_idx); 1307 if (!page) { 1308 kfree_skb(skb); 1309 xenvif_tx_err(vif, &txreq, idx); 1310 break; 1311 } 1312 1313 gop->source.u.ref = txreq.gref; 1314 gop->source.domid = vif->domid; 1315 gop->source.offset = txreq.offset; 1316 1317 gop->dest.u.gmfn = virt_to_mfn(page_address(page)); 1318 gop->dest.domid = DOMID_SELF; 1319 gop->dest.offset = txreq.offset; 1320 1321 gop->len = txreq.size; 1322 gop->flags = GNTCOPY_source_gref; 1323 1324 gop++; 1325 1326 memcpy(&vif->pending_tx_info[pending_idx].req, 1327 &txreq, sizeof(txreq)); 1328 vif->pending_tx_info[pending_idx].head = index; 1329 *((u16 *)skb->data) = pending_idx; 1330 1331 __skb_put(skb, data_len); 1332 1333 skb_shinfo(skb)->nr_frags = ret; 1334 if (data_len < txreq.size) { 1335 skb_shinfo(skb)->nr_frags++; 1336 frag_set_pending_idx(&skb_shinfo(skb)->frags[0], 1337 pending_idx); 1338 } else { 1339 frag_set_pending_idx(&skb_shinfo(skb)->frags[0], 1340 INVALID_PENDING_IDX); 1341 } 1342 1343 vif->pending_cons++; 1344 1345 request_gop = xenvif_get_requests(vif, skb, txfrags, gop); 1346 if (request_gop == NULL) { 1347 kfree_skb(skb); 1348 xenvif_tx_err(vif, &txreq, idx); 1349 break; 1350 } 1351 gop = request_gop; 1352 1353 __skb_queue_tail(&vif->tx_queue, skb); 1354 1355 vif->tx.req_cons = idx; 1356 1357 if ((gop-vif->tx_copy_ops) >= ARRAY_SIZE(vif->tx_copy_ops)) 1358 break; 1359 } 1360 1361 return gop - vif->tx_copy_ops; 1362 } 1363 1364 1365 static int xenvif_tx_submit(struct xenvif *vif, int budget) 1366 { 1367 struct gnttab_copy *gop = vif->tx_copy_ops; 1368 struct sk_buff *skb; 1369 int work_done = 0; 1370 1371 while (work_done < budget && 1372 (skb = __skb_dequeue(&vif->tx_queue)) != NULL) { 1373 struct xen_netif_tx_request *txp; 1374 u16 pending_idx; 1375 unsigned data_len; 1376 1377 pending_idx = *((u16 *)skb->data); 1378 txp = &vif->pending_tx_info[pending_idx].req; 1379 1380 /* Check the remap error code. */ 1381 if (unlikely(xenvif_tx_check_gop(vif, skb, &gop))) { 1382 netdev_dbg(vif->dev, "netback grant failed.\n"); 1383 skb_shinfo(skb)->nr_frags = 0; 1384 kfree_skb(skb); 1385 continue; 1386 } 1387 1388 data_len = skb->len; 1389 memcpy(skb->data, 1390 (void *)(idx_to_kaddr(vif, pending_idx)|txp->offset), 1391 data_len); 1392 if (data_len < txp->size) { 1393 /* Append the packet payload as a fragment. */ 1394 txp->offset += data_len; 1395 txp->size -= data_len; 1396 } else { 1397 /* Schedule a response immediately. */ 1398 xenvif_idx_release(vif, pending_idx, 1399 XEN_NETIF_RSP_OKAY); 1400 } 1401 1402 if (txp->flags & XEN_NETTXF_csum_blank) 1403 skb->ip_summed = CHECKSUM_PARTIAL; 1404 else if (txp->flags & XEN_NETTXF_data_validated) 1405 skb->ip_summed = CHECKSUM_UNNECESSARY; 1406 1407 xenvif_fill_frags(vif, skb); 1408 1409 /* 1410 * If the initial fragment was < PKT_PROT_LEN then 1411 * pull through some bytes from the other fragments to 1412 * increase the linear region to PKT_PROT_LEN bytes. 1413 */ 1414 if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) { 1415 int target = min_t(int, skb->len, PKT_PROT_LEN); 1416 __pskb_pull_tail(skb, target - skb_headlen(skb)); 1417 } 1418 1419 skb->dev = vif->dev; 1420 skb->protocol = eth_type_trans(skb, skb->dev); 1421 skb_reset_network_header(skb); 1422 1423 if (checksum_setup(vif, skb)) { 1424 netdev_dbg(vif->dev, 1425 "Can't setup checksum in net_tx_action\n"); 1426 kfree_skb(skb); 1427 continue; 1428 } 1429 1430 skb_probe_transport_header(skb, 0); 1431 1432 vif->dev->stats.rx_bytes += skb->len; 1433 vif->dev->stats.rx_packets++; 1434 1435 work_done++; 1436 1437 netif_receive_skb(skb); 1438 } 1439 1440 return work_done; 1441 } 1442 1443 /* Called after netfront has transmitted */ 1444 int xenvif_tx_action(struct xenvif *vif, int budget) 1445 { 1446 unsigned nr_gops; 1447 int work_done; 1448 1449 if (unlikely(!tx_work_todo(vif))) 1450 return 0; 1451 1452 nr_gops = xenvif_tx_build_gops(vif); 1453 1454 if (nr_gops == 0) 1455 return 0; 1456 1457 gnttab_batch_copy(vif->tx_copy_ops, nr_gops); 1458 1459 work_done = xenvif_tx_submit(vif, nr_gops); 1460 1461 return work_done; 1462 } 1463 1464 static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx, 1465 u8 status) 1466 { 1467 struct pending_tx_info *pending_tx_info; 1468 pending_ring_idx_t head; 1469 u16 peek; /* peek into next tx request */ 1470 1471 BUG_ON(vif->mmap_pages[pending_idx] == (void *)(~0UL)); 1472 1473 /* Already complete? */ 1474 if (vif->mmap_pages[pending_idx] == NULL) 1475 return; 1476 1477 pending_tx_info = &vif->pending_tx_info[pending_idx]; 1478 1479 head = pending_tx_info->head; 1480 1481 BUG_ON(!pending_tx_is_head(vif, head)); 1482 BUG_ON(vif->pending_ring[pending_index(head)] != pending_idx); 1483 1484 do { 1485 pending_ring_idx_t index; 1486 pending_ring_idx_t idx = pending_index(head); 1487 u16 info_idx = vif->pending_ring[idx]; 1488 1489 pending_tx_info = &vif->pending_tx_info[info_idx]; 1490 make_tx_response(vif, &pending_tx_info->req, status); 1491 1492 /* Setting any number other than 1493 * INVALID_PENDING_RING_IDX indicates this slot is 1494 * starting a new packet / ending a previous packet. 1495 */ 1496 pending_tx_info->head = 0; 1497 1498 index = pending_index(vif->pending_prod++); 1499 vif->pending_ring[index] = vif->pending_ring[info_idx]; 1500 1501 peek = vif->pending_ring[pending_index(++head)]; 1502 1503 } while (!pending_tx_is_head(vif, peek)); 1504 1505 put_page(vif->mmap_pages[pending_idx]); 1506 vif->mmap_pages[pending_idx] = NULL; 1507 } 1508 1509 1510 static void make_tx_response(struct xenvif *vif, 1511 struct xen_netif_tx_request *txp, 1512 s8 st) 1513 { 1514 RING_IDX i = vif->tx.rsp_prod_pvt; 1515 struct xen_netif_tx_response *resp; 1516 int notify; 1517 1518 resp = RING_GET_RESPONSE(&vif->tx, i); 1519 resp->id = txp->id; 1520 resp->status = st; 1521 1522 if (txp->flags & XEN_NETTXF_extra_info) 1523 RING_GET_RESPONSE(&vif->tx, ++i)->status = XEN_NETIF_RSP_NULL; 1524 1525 vif->tx.rsp_prod_pvt = ++i; 1526 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify); 1527 if (notify) 1528 notify_remote_via_irq(vif->tx_irq); 1529 } 1530 1531 static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif, 1532 u16 id, 1533 s8 st, 1534 u16 offset, 1535 u16 size, 1536 u16 flags) 1537 { 1538 RING_IDX i = vif->rx.rsp_prod_pvt; 1539 struct xen_netif_rx_response *resp; 1540 1541 resp = RING_GET_RESPONSE(&vif->rx, i); 1542 resp->offset = offset; 1543 resp->flags = flags; 1544 resp->id = id; 1545 resp->status = (s16)size; 1546 if (st < 0) 1547 resp->status = (s16)st; 1548 1549 vif->rx.rsp_prod_pvt = ++i; 1550 1551 return resp; 1552 } 1553 1554 static inline int rx_work_todo(struct xenvif *vif) 1555 { 1556 return !skb_queue_empty(&vif->rx_queue); 1557 } 1558 1559 static inline int tx_work_todo(struct xenvif *vif) 1560 { 1561 1562 if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)) && 1563 (nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX 1564 < MAX_PENDING_REQS)) 1565 return 1; 1566 1567 return 0; 1568 } 1569 1570 void xenvif_unmap_frontend_rings(struct xenvif *vif) 1571 { 1572 if (vif->tx.sring) 1573 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif), 1574 vif->tx.sring); 1575 if (vif->rx.sring) 1576 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif), 1577 vif->rx.sring); 1578 } 1579 1580 int xenvif_map_frontend_rings(struct xenvif *vif, 1581 grant_ref_t tx_ring_ref, 1582 grant_ref_t rx_ring_ref) 1583 { 1584 void *addr; 1585 struct xen_netif_tx_sring *txs; 1586 struct xen_netif_rx_sring *rxs; 1587 1588 int err = -ENOMEM; 1589 1590 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), 1591 tx_ring_ref, &addr); 1592 if (err) 1593 goto err; 1594 1595 txs = (struct xen_netif_tx_sring *)addr; 1596 BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE); 1597 1598 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), 1599 rx_ring_ref, &addr); 1600 if (err) 1601 goto err; 1602 1603 rxs = (struct xen_netif_rx_sring *)addr; 1604 BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE); 1605 1606 vif->rx_req_cons_peek = 0; 1607 1608 return 0; 1609 1610 err: 1611 xenvif_unmap_frontend_rings(vif); 1612 return err; 1613 } 1614 1615 int xenvif_kthread(void *data) 1616 { 1617 struct xenvif *vif = data; 1618 1619 while (!kthread_should_stop()) { 1620 wait_event_interruptible(vif->wq, 1621 rx_work_todo(vif) || 1622 kthread_should_stop()); 1623 if (kthread_should_stop()) 1624 break; 1625 1626 if (rx_work_todo(vif)) 1627 xenvif_rx_action(vif); 1628 1629 cond_resched(); 1630 } 1631 1632 return 0; 1633 } 1634 1635 static int __init netback_init(void) 1636 { 1637 int rc = 0; 1638 1639 if (!xen_domain()) 1640 return -ENODEV; 1641 1642 if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) { 1643 pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n", 1644 fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX); 1645 fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX; 1646 } 1647 1648 rc = xenvif_xenbus_init(); 1649 if (rc) 1650 goto failed_init; 1651 1652 return 0; 1653 1654 failed_init: 1655 return rc; 1656 } 1657 1658 module_init(netback_init); 1659 1660 static void __exit netback_fini(void) 1661 { 1662 xenvif_xenbus_fini(); 1663 } 1664 module_exit(netback_fini); 1665 1666 MODULE_LICENSE("Dual BSD/GPL"); 1667 MODULE_ALIAS("xen-backend:vif"); 1668