1 /* 2 * Virtual network driver for conversing with remote driver backends. 3 * 4 * Copyright (c) 2002-2005, K A Fraser 5 * Copyright (c) 2005, XenSource Ltd 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License version 2 9 * as published by the Free Software Foundation; or, when distributed 10 * separately from the Linux kernel or incorporated into other 11 * software packages, subject to the following license: 12 * 13 * Permission is hereby granted, free of charge, to any person obtaining a copy 14 * of this source file (the "Software"), to deal in the Software without 15 * restriction, including without limitation the rights to use, copy, modify, 16 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 17 * and to permit persons to whom the Software is furnished to do so, subject to 18 * the following conditions: 19 * 20 * The above copyright notice and this permission notice shall be included in 21 * all copies or substantial portions of the Software. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 29 * IN THE SOFTWARE. 30 */ 31 32 #include <linux/module.h> 33 #include <linux/kernel.h> 34 #include <linux/netdevice.h> 35 #include <linux/etherdevice.h> 36 #include <linux/skbuff.h> 37 #include <linux/ethtool.h> 38 #include <linux/if_ether.h> 39 #include <linux/tcp.h> 40 #include <linux/udp.h> 41 #include <linux/moduleparam.h> 42 #include <linux/mm.h> 43 #include <net/ip.h> 44 45 #include <xen/xenbus.h> 46 #include <xen/events.h> 47 #include <xen/page.h> 48 #include <xen/grant_table.h> 49 50 #include <xen/interface/io/netif.h> 51 #include <xen/interface/memory.h> 52 #include <xen/interface/grant_table.h> 53 54 static struct ethtool_ops xennet_ethtool_ops; 55 56 struct netfront_cb { 57 struct page *page; 58 unsigned offset; 59 }; 60 61 #define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb)) 62 63 #define RX_COPY_THRESHOLD 256 64 65 #define GRANT_INVALID_REF 0 66 67 #define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE) 68 #define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE) 69 #define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) 70 71 struct netfront_info { 72 struct list_head list; 73 struct net_device *netdev; 74 75 struct napi_struct napi; 76 77 struct xen_netif_tx_front_ring tx; 78 struct xen_netif_rx_front_ring rx; 79 80 spinlock_t tx_lock; 81 spinlock_t rx_lock; 82 83 unsigned int evtchn; 84 85 /* Receive-ring batched refills. */ 86 #define RX_MIN_TARGET 8 87 #define RX_DFL_MIN_TARGET 64 88 #define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) 89 unsigned rx_min_target, rx_max_target, rx_target; 90 struct sk_buff_head rx_batch; 91 92 struct timer_list rx_refill_timer; 93 94 /* 95 * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries 96 * are linked from tx_skb_freelist through skb_entry.link. 97 * 98 * NB. Freelist index entries are always going to be less than 99 * PAGE_OFFSET, whereas pointers to skbs will always be equal or 100 * greater than PAGE_OFFSET: we use this property to distinguish 101 * them. 102 */ 103 union skb_entry { 104 struct sk_buff *skb; 105 unsigned link; 106 } tx_skbs[NET_TX_RING_SIZE]; 107 grant_ref_t gref_tx_head; 108 grant_ref_t grant_tx_ref[NET_TX_RING_SIZE]; 109 unsigned tx_skb_freelist; 110 111 struct sk_buff *rx_skbs[NET_RX_RING_SIZE]; 112 grant_ref_t gref_rx_head; 113 grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; 114 115 struct xenbus_device *xbdev; 116 int tx_ring_ref; 117 int rx_ring_ref; 118 119 unsigned long rx_pfn_array[NET_RX_RING_SIZE]; 120 struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1]; 121 struct mmu_update rx_mmu[NET_RX_RING_SIZE]; 122 }; 123 124 struct netfront_rx_info { 125 struct xen_netif_rx_response rx; 126 struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; 127 }; 128 129 /* 130 * Access macros for acquiring freeing slots in tx_skbs[]. 131 */ 132 133 static void add_id_to_freelist(unsigned *head, union skb_entry *list, 134 unsigned short id) 135 { 136 list[id].link = *head; 137 *head = id; 138 } 139 140 static unsigned short get_id_from_freelist(unsigned *head, 141 union skb_entry *list) 142 { 143 unsigned int id = *head; 144 *head = list[id].link; 145 return id; 146 } 147 148 static int xennet_rxidx(RING_IDX idx) 149 { 150 return idx & (NET_RX_RING_SIZE - 1); 151 } 152 153 static struct sk_buff *xennet_get_rx_skb(struct netfront_info *np, 154 RING_IDX ri) 155 { 156 int i = xennet_rxidx(ri); 157 struct sk_buff *skb = np->rx_skbs[i]; 158 np->rx_skbs[i] = NULL; 159 return skb; 160 } 161 162 static grant_ref_t xennet_get_rx_ref(struct netfront_info *np, 163 RING_IDX ri) 164 { 165 int i = xennet_rxidx(ri); 166 grant_ref_t ref = np->grant_rx_ref[i]; 167 np->grant_rx_ref[i] = GRANT_INVALID_REF; 168 return ref; 169 } 170 171 #ifdef CONFIG_SYSFS 172 static int xennet_sysfs_addif(struct net_device *netdev); 173 static void xennet_sysfs_delif(struct net_device *netdev); 174 #else /* !CONFIG_SYSFS */ 175 #define xennet_sysfs_addif(dev) (0) 176 #define xennet_sysfs_delif(dev) do { } while (0) 177 #endif 178 179 static int xennet_can_sg(struct net_device *dev) 180 { 181 return dev->features & NETIF_F_SG; 182 } 183 184 185 static void rx_refill_timeout(unsigned long data) 186 { 187 struct net_device *dev = (struct net_device *)data; 188 struct netfront_info *np = netdev_priv(dev); 189 netif_rx_schedule(dev, &np->napi); 190 } 191 192 static int netfront_tx_slot_available(struct netfront_info *np) 193 { 194 return ((np->tx.req_prod_pvt - np->tx.rsp_cons) < 195 (TX_MAX_TARGET - MAX_SKB_FRAGS - 2)); 196 } 197 198 static void xennet_maybe_wake_tx(struct net_device *dev) 199 { 200 struct netfront_info *np = netdev_priv(dev); 201 202 if (unlikely(netif_queue_stopped(dev)) && 203 netfront_tx_slot_available(np) && 204 likely(netif_running(dev))) 205 netif_wake_queue(dev); 206 } 207 208 static void xennet_alloc_rx_buffers(struct net_device *dev) 209 { 210 unsigned short id; 211 struct netfront_info *np = netdev_priv(dev); 212 struct sk_buff *skb; 213 struct page *page; 214 int i, batch_target, notify; 215 RING_IDX req_prod = np->rx.req_prod_pvt; 216 grant_ref_t ref; 217 unsigned long pfn; 218 void *vaddr; 219 struct xen_netif_rx_request *req; 220 221 if (unlikely(!netif_carrier_ok(dev))) 222 return; 223 224 /* 225 * Allocate skbuffs greedily, even though we batch updates to the 226 * receive ring. This creates a less bursty demand on the memory 227 * allocator, so should reduce the chance of failed allocation requests 228 * both for ourself and for other kernel subsystems. 229 */ 230 batch_target = np->rx_target - (req_prod - np->rx.rsp_cons); 231 for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) { 232 skb = __netdev_alloc_skb(dev, RX_COPY_THRESHOLD, 233 GFP_ATOMIC | __GFP_NOWARN); 234 if (unlikely(!skb)) 235 goto no_skb; 236 237 page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); 238 if (!page) { 239 kfree_skb(skb); 240 no_skb: 241 /* Any skbuffs queued for refill? Force them out. */ 242 if (i != 0) 243 goto refill; 244 /* Could not allocate any skbuffs. Try again later. */ 245 mod_timer(&np->rx_refill_timer, 246 jiffies + (HZ/10)); 247 break; 248 } 249 250 skb_shinfo(skb)->frags[0].page = page; 251 skb_shinfo(skb)->nr_frags = 1; 252 __skb_queue_tail(&np->rx_batch, skb); 253 } 254 255 /* Is the batch large enough to be worthwhile? */ 256 if (i < (np->rx_target/2)) { 257 if (req_prod > np->rx.sring->req_prod) 258 goto push; 259 return; 260 } 261 262 /* Adjust our fill target if we risked running out of buffers. */ 263 if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) && 264 ((np->rx_target *= 2) > np->rx_max_target)) 265 np->rx_target = np->rx_max_target; 266 267 refill: 268 for (i = 0; ; i++) { 269 skb = __skb_dequeue(&np->rx_batch); 270 if (skb == NULL) 271 break; 272 273 skb->dev = dev; 274 275 id = xennet_rxidx(req_prod + i); 276 277 BUG_ON(np->rx_skbs[id]); 278 np->rx_skbs[id] = skb; 279 280 ref = gnttab_claim_grant_reference(&np->gref_rx_head); 281 BUG_ON((signed short)ref < 0); 282 np->grant_rx_ref[id] = ref; 283 284 pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page); 285 vaddr = page_address(skb_shinfo(skb)->frags[0].page); 286 287 req = RING_GET_REQUEST(&np->rx, req_prod + i); 288 gnttab_grant_foreign_access_ref(ref, 289 np->xbdev->otherend_id, 290 pfn_to_mfn(pfn), 291 0); 292 293 req->id = id; 294 req->gref = ref; 295 } 296 297 wmb(); /* barrier so backend seens requests */ 298 299 /* Above is a suitable barrier to ensure backend will see requests. */ 300 np->rx.req_prod_pvt = req_prod + i; 301 push: 302 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify); 303 if (notify) 304 notify_remote_via_irq(np->netdev->irq); 305 } 306 307 static int xennet_open(struct net_device *dev) 308 { 309 struct netfront_info *np = netdev_priv(dev); 310 311 napi_enable(&np->napi); 312 313 spin_lock_bh(&np->rx_lock); 314 if (netif_carrier_ok(dev)) { 315 xennet_alloc_rx_buffers(dev); 316 np->rx.sring->rsp_event = np->rx.rsp_cons + 1; 317 if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) 318 netif_rx_schedule(dev, &np->napi); 319 } 320 spin_unlock_bh(&np->rx_lock); 321 322 xennet_maybe_wake_tx(dev); 323 324 return 0; 325 } 326 327 static void xennet_tx_buf_gc(struct net_device *dev) 328 { 329 RING_IDX cons, prod; 330 unsigned short id; 331 struct netfront_info *np = netdev_priv(dev); 332 struct sk_buff *skb; 333 334 BUG_ON(!netif_carrier_ok(dev)); 335 336 do { 337 prod = np->tx.sring->rsp_prod; 338 rmb(); /* Ensure we see responses up to 'rp'. */ 339 340 for (cons = np->tx.rsp_cons; cons != prod; cons++) { 341 struct xen_netif_tx_response *txrsp; 342 343 txrsp = RING_GET_RESPONSE(&np->tx, cons); 344 if (txrsp->status == NETIF_RSP_NULL) 345 continue; 346 347 id = txrsp->id; 348 skb = np->tx_skbs[id].skb; 349 if (unlikely(gnttab_query_foreign_access( 350 np->grant_tx_ref[id]) != 0)) { 351 printk(KERN_ALERT "xennet_tx_buf_gc: warning " 352 "-- grant still in use by backend " 353 "domain.\n"); 354 BUG(); 355 } 356 gnttab_end_foreign_access_ref( 357 np->grant_tx_ref[id], GNTMAP_readonly); 358 gnttab_release_grant_reference( 359 &np->gref_tx_head, np->grant_tx_ref[id]); 360 np->grant_tx_ref[id] = GRANT_INVALID_REF; 361 add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id); 362 dev_kfree_skb_irq(skb); 363 } 364 365 np->tx.rsp_cons = prod; 366 367 /* 368 * Set a new event, then check for race with update of tx_cons. 369 * Note that it is essential to schedule a callback, no matter 370 * how few buffers are pending. Even if there is space in the 371 * transmit ring, higher layers may be blocked because too much 372 * data is outstanding: in such cases notification from Xen is 373 * likely to be the only kick that we'll get. 374 */ 375 np->tx.sring->rsp_event = 376 prod + ((np->tx.sring->req_prod - prod) >> 1) + 1; 377 mb(); /* update shared area */ 378 } while ((cons == prod) && (prod != np->tx.sring->rsp_prod)); 379 380 xennet_maybe_wake_tx(dev); 381 } 382 383 static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, 384 struct xen_netif_tx_request *tx) 385 { 386 struct netfront_info *np = netdev_priv(dev); 387 char *data = skb->data; 388 unsigned long mfn; 389 RING_IDX prod = np->tx.req_prod_pvt; 390 int frags = skb_shinfo(skb)->nr_frags; 391 unsigned int offset = offset_in_page(data); 392 unsigned int len = skb_headlen(skb); 393 unsigned int id; 394 grant_ref_t ref; 395 int i; 396 397 /* While the header overlaps a page boundary (including being 398 larger than a page), split it it into page-sized chunks. */ 399 while (len > PAGE_SIZE - offset) { 400 tx->size = PAGE_SIZE - offset; 401 tx->flags |= NETTXF_more_data; 402 len -= tx->size; 403 data += tx->size; 404 offset = 0; 405 406 id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); 407 np->tx_skbs[id].skb = skb_get(skb); 408 tx = RING_GET_REQUEST(&np->tx, prod++); 409 tx->id = id; 410 ref = gnttab_claim_grant_reference(&np->gref_tx_head); 411 BUG_ON((signed short)ref < 0); 412 413 mfn = virt_to_mfn(data); 414 gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, 415 mfn, GNTMAP_readonly); 416 417 tx->gref = np->grant_tx_ref[id] = ref; 418 tx->offset = offset; 419 tx->size = len; 420 tx->flags = 0; 421 } 422 423 /* Grant backend access to each skb fragment page. */ 424 for (i = 0; i < frags; i++) { 425 skb_frag_t *frag = skb_shinfo(skb)->frags + i; 426 427 tx->flags |= NETTXF_more_data; 428 429 id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); 430 np->tx_skbs[id].skb = skb_get(skb); 431 tx = RING_GET_REQUEST(&np->tx, prod++); 432 tx->id = id; 433 ref = gnttab_claim_grant_reference(&np->gref_tx_head); 434 BUG_ON((signed short)ref < 0); 435 436 mfn = pfn_to_mfn(page_to_pfn(frag->page)); 437 gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, 438 mfn, GNTMAP_readonly); 439 440 tx->gref = np->grant_tx_ref[id] = ref; 441 tx->offset = frag->page_offset; 442 tx->size = frag->size; 443 tx->flags = 0; 444 } 445 446 np->tx.req_prod_pvt = prod; 447 } 448 449 static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) 450 { 451 unsigned short id; 452 struct netfront_info *np = netdev_priv(dev); 453 struct xen_netif_tx_request *tx; 454 struct xen_netif_extra_info *extra; 455 char *data = skb->data; 456 RING_IDX i; 457 grant_ref_t ref; 458 unsigned long mfn; 459 int notify; 460 int frags = skb_shinfo(skb)->nr_frags; 461 unsigned int offset = offset_in_page(data); 462 unsigned int len = skb_headlen(skb); 463 464 frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE; 465 if (unlikely(frags > MAX_SKB_FRAGS + 1)) { 466 printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n", 467 frags); 468 dump_stack(); 469 goto drop; 470 } 471 472 spin_lock_irq(&np->tx_lock); 473 474 if (unlikely(!netif_carrier_ok(dev) || 475 (frags > 1 && !xennet_can_sg(dev)) || 476 netif_needs_gso(dev, skb))) { 477 spin_unlock_irq(&np->tx_lock); 478 goto drop; 479 } 480 481 i = np->tx.req_prod_pvt; 482 483 id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); 484 np->tx_skbs[id].skb = skb; 485 486 tx = RING_GET_REQUEST(&np->tx, i); 487 488 tx->id = id; 489 ref = gnttab_claim_grant_reference(&np->gref_tx_head); 490 BUG_ON((signed short)ref < 0); 491 mfn = virt_to_mfn(data); 492 gnttab_grant_foreign_access_ref( 493 ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly); 494 tx->gref = np->grant_tx_ref[id] = ref; 495 tx->offset = offset; 496 tx->size = len; 497 extra = NULL; 498 499 tx->flags = 0; 500 if (skb->ip_summed == CHECKSUM_PARTIAL) 501 /* local packet? */ 502 tx->flags |= NETTXF_csum_blank | NETTXF_data_validated; 503 else if (skb->ip_summed == CHECKSUM_UNNECESSARY) 504 /* remote but checksummed. */ 505 tx->flags |= NETTXF_data_validated; 506 507 if (skb_shinfo(skb)->gso_size) { 508 struct xen_netif_extra_info *gso; 509 510 gso = (struct xen_netif_extra_info *) 511 RING_GET_REQUEST(&np->tx, ++i); 512 513 if (extra) 514 extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; 515 else 516 tx->flags |= NETTXF_extra_info; 517 518 gso->u.gso.size = skb_shinfo(skb)->gso_size; 519 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; 520 gso->u.gso.pad = 0; 521 gso->u.gso.features = 0; 522 523 gso->type = XEN_NETIF_EXTRA_TYPE_GSO; 524 gso->flags = 0; 525 extra = gso; 526 } 527 528 np->tx.req_prod_pvt = i + 1; 529 530 xennet_make_frags(skb, dev, tx); 531 tx->size = skb->len; 532 533 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify); 534 if (notify) 535 notify_remote_via_irq(np->netdev->irq); 536 537 dev->stats.tx_bytes += skb->len; 538 dev->stats.tx_packets++; 539 540 /* Note: It is not safe to access skb after xennet_tx_buf_gc()! */ 541 xennet_tx_buf_gc(dev); 542 543 if (!netfront_tx_slot_available(np)) 544 netif_stop_queue(dev); 545 546 spin_unlock_irq(&np->tx_lock); 547 548 return 0; 549 550 drop: 551 dev->stats.tx_dropped++; 552 dev_kfree_skb(skb); 553 return 0; 554 } 555 556 static int xennet_close(struct net_device *dev) 557 { 558 struct netfront_info *np = netdev_priv(dev); 559 netif_stop_queue(np->netdev); 560 napi_disable(&np->napi); 561 return 0; 562 } 563 564 static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb, 565 grant_ref_t ref) 566 { 567 int new = xennet_rxidx(np->rx.req_prod_pvt); 568 569 BUG_ON(np->rx_skbs[new]); 570 np->rx_skbs[new] = skb; 571 np->grant_rx_ref[new] = ref; 572 RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new; 573 RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref; 574 np->rx.req_prod_pvt++; 575 } 576 577 static int xennet_get_extras(struct netfront_info *np, 578 struct xen_netif_extra_info *extras, 579 RING_IDX rp) 580 581 { 582 struct xen_netif_extra_info *extra; 583 struct device *dev = &np->netdev->dev; 584 RING_IDX cons = np->rx.rsp_cons; 585 int err = 0; 586 587 do { 588 struct sk_buff *skb; 589 grant_ref_t ref; 590 591 if (unlikely(cons + 1 == rp)) { 592 if (net_ratelimit()) 593 dev_warn(dev, "Missing extra info\n"); 594 err = -EBADR; 595 break; 596 } 597 598 extra = (struct xen_netif_extra_info *) 599 RING_GET_RESPONSE(&np->rx, ++cons); 600 601 if (unlikely(!extra->type || 602 extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { 603 if (net_ratelimit()) 604 dev_warn(dev, "Invalid extra type: %d\n", 605 extra->type); 606 err = -EINVAL; 607 } else { 608 memcpy(&extras[extra->type - 1], extra, 609 sizeof(*extra)); 610 } 611 612 skb = xennet_get_rx_skb(np, cons); 613 ref = xennet_get_rx_ref(np, cons); 614 xennet_move_rx_slot(np, skb, ref); 615 } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); 616 617 np->rx.rsp_cons = cons; 618 return err; 619 } 620 621 static int xennet_get_responses(struct netfront_info *np, 622 struct netfront_rx_info *rinfo, RING_IDX rp, 623 struct sk_buff_head *list) 624 { 625 struct xen_netif_rx_response *rx = &rinfo->rx; 626 struct xen_netif_extra_info *extras = rinfo->extras; 627 struct device *dev = &np->netdev->dev; 628 RING_IDX cons = np->rx.rsp_cons; 629 struct sk_buff *skb = xennet_get_rx_skb(np, cons); 630 grant_ref_t ref = xennet_get_rx_ref(np, cons); 631 int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD); 632 int frags = 1; 633 int err = 0; 634 unsigned long ret; 635 636 if (rx->flags & NETRXF_extra_info) { 637 err = xennet_get_extras(np, extras, rp); 638 cons = np->rx.rsp_cons; 639 } 640 641 for (;;) { 642 if (unlikely(rx->status < 0 || 643 rx->offset + rx->status > PAGE_SIZE)) { 644 if (net_ratelimit()) 645 dev_warn(dev, "rx->offset: %x, size: %u\n", 646 rx->offset, rx->status); 647 xennet_move_rx_slot(np, skb, ref); 648 err = -EINVAL; 649 goto next; 650 } 651 652 /* 653 * This definitely indicates a bug, either in this driver or in 654 * the backend driver. In future this should flag the bad 655 * situation to the system controller to reboot the backed. 656 */ 657 if (ref == GRANT_INVALID_REF) { 658 if (net_ratelimit()) 659 dev_warn(dev, "Bad rx response id %d.\n", 660 rx->id); 661 err = -EINVAL; 662 goto next; 663 } 664 665 ret = gnttab_end_foreign_access_ref(ref, 0); 666 BUG_ON(!ret); 667 668 gnttab_release_grant_reference(&np->gref_rx_head, ref); 669 670 __skb_queue_tail(list, skb); 671 672 next: 673 if (!(rx->flags & NETRXF_more_data)) 674 break; 675 676 if (cons + frags == rp) { 677 if (net_ratelimit()) 678 dev_warn(dev, "Need more frags\n"); 679 err = -ENOENT; 680 break; 681 } 682 683 rx = RING_GET_RESPONSE(&np->rx, cons + frags); 684 skb = xennet_get_rx_skb(np, cons + frags); 685 ref = xennet_get_rx_ref(np, cons + frags); 686 frags++; 687 } 688 689 if (unlikely(frags > max)) { 690 if (net_ratelimit()) 691 dev_warn(dev, "Too many frags\n"); 692 err = -E2BIG; 693 } 694 695 if (unlikely(err)) 696 np->rx.rsp_cons = cons + frags; 697 698 return err; 699 } 700 701 static int xennet_set_skb_gso(struct sk_buff *skb, 702 struct xen_netif_extra_info *gso) 703 { 704 if (!gso->u.gso.size) { 705 if (net_ratelimit()) 706 printk(KERN_WARNING "GSO size must not be zero.\n"); 707 return -EINVAL; 708 } 709 710 /* Currently only TCPv4 S.O. is supported. */ 711 if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { 712 if (net_ratelimit()) 713 printk(KERN_WARNING "Bad GSO type %d.\n", gso->u.gso.type); 714 return -EINVAL; 715 } 716 717 skb_shinfo(skb)->gso_size = gso->u.gso.size; 718 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 719 720 /* Header must be checked, and gso_segs computed. */ 721 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; 722 skb_shinfo(skb)->gso_segs = 0; 723 724 return 0; 725 } 726 727 static RING_IDX xennet_fill_frags(struct netfront_info *np, 728 struct sk_buff *skb, 729 struct sk_buff_head *list) 730 { 731 struct skb_shared_info *shinfo = skb_shinfo(skb); 732 int nr_frags = shinfo->nr_frags; 733 RING_IDX cons = np->rx.rsp_cons; 734 skb_frag_t *frag = shinfo->frags + nr_frags; 735 struct sk_buff *nskb; 736 737 while ((nskb = __skb_dequeue(list))) { 738 struct xen_netif_rx_response *rx = 739 RING_GET_RESPONSE(&np->rx, ++cons); 740 741 frag->page = skb_shinfo(nskb)->frags[0].page; 742 frag->page_offset = rx->offset; 743 frag->size = rx->status; 744 745 skb->data_len += rx->status; 746 747 skb_shinfo(nskb)->nr_frags = 0; 748 kfree_skb(nskb); 749 750 frag++; 751 nr_frags++; 752 } 753 754 shinfo->nr_frags = nr_frags; 755 return cons; 756 } 757 758 static int skb_checksum_setup(struct sk_buff *skb) 759 { 760 struct iphdr *iph; 761 unsigned char *th; 762 int err = -EPROTO; 763 764 if (skb->protocol != htons(ETH_P_IP)) 765 goto out; 766 767 iph = (void *)skb->data; 768 th = skb->data + 4 * iph->ihl; 769 if (th >= skb_tail_pointer(skb)) 770 goto out; 771 772 skb->csum_start = th - skb->head; 773 switch (iph->protocol) { 774 case IPPROTO_TCP: 775 skb->csum_offset = offsetof(struct tcphdr, check); 776 break; 777 case IPPROTO_UDP: 778 skb->csum_offset = offsetof(struct udphdr, check); 779 break; 780 default: 781 if (net_ratelimit()) 782 printk(KERN_ERR "Attempting to checksum a non-" 783 "TCP/UDP packet, dropping a protocol" 784 " %d packet", iph->protocol); 785 goto out; 786 } 787 788 if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb)) 789 goto out; 790 791 err = 0; 792 793 out: 794 return err; 795 } 796 797 static int handle_incoming_queue(struct net_device *dev, 798 struct sk_buff_head *rxq) 799 { 800 int packets_dropped = 0; 801 struct sk_buff *skb; 802 803 while ((skb = __skb_dequeue(rxq)) != NULL) { 804 struct page *page = NETFRONT_SKB_CB(skb)->page; 805 void *vaddr = page_address(page); 806 unsigned offset = NETFRONT_SKB_CB(skb)->offset; 807 808 memcpy(skb->data, vaddr + offset, 809 skb_headlen(skb)); 810 811 if (page != skb_shinfo(skb)->frags[0].page) 812 __free_page(page); 813 814 /* Ethernet work: Delayed to here as it peeks the header. */ 815 skb->protocol = eth_type_trans(skb, dev); 816 817 if (skb->ip_summed == CHECKSUM_PARTIAL) { 818 if (skb_checksum_setup(skb)) { 819 kfree_skb(skb); 820 packets_dropped++; 821 dev->stats.rx_errors++; 822 continue; 823 } 824 } 825 826 dev->stats.rx_packets++; 827 dev->stats.rx_bytes += skb->len; 828 829 /* Pass it up. */ 830 netif_receive_skb(skb); 831 dev->last_rx = jiffies; 832 } 833 834 return packets_dropped; 835 } 836 837 static int xennet_poll(struct napi_struct *napi, int budget) 838 { 839 struct netfront_info *np = container_of(napi, struct netfront_info, napi); 840 struct net_device *dev = np->netdev; 841 struct sk_buff *skb; 842 struct netfront_rx_info rinfo; 843 struct xen_netif_rx_response *rx = &rinfo.rx; 844 struct xen_netif_extra_info *extras = rinfo.extras; 845 RING_IDX i, rp; 846 int work_done; 847 struct sk_buff_head rxq; 848 struct sk_buff_head errq; 849 struct sk_buff_head tmpq; 850 unsigned long flags; 851 unsigned int len; 852 int err; 853 854 spin_lock(&np->rx_lock); 855 856 if (unlikely(!netif_carrier_ok(dev))) { 857 spin_unlock(&np->rx_lock); 858 return 0; 859 } 860 861 skb_queue_head_init(&rxq); 862 skb_queue_head_init(&errq); 863 skb_queue_head_init(&tmpq); 864 865 rp = np->rx.sring->rsp_prod; 866 rmb(); /* Ensure we see queued responses up to 'rp'. */ 867 868 i = np->rx.rsp_cons; 869 work_done = 0; 870 while ((i != rp) && (work_done < budget)) { 871 memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx)); 872 memset(extras, 0, sizeof(rinfo.extras)); 873 874 err = xennet_get_responses(np, &rinfo, rp, &tmpq); 875 876 if (unlikely(err)) { 877 err: 878 while ((skb = __skb_dequeue(&tmpq))) 879 __skb_queue_tail(&errq, skb); 880 dev->stats.rx_errors++; 881 i = np->rx.rsp_cons; 882 continue; 883 } 884 885 skb = __skb_dequeue(&tmpq); 886 887 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { 888 struct xen_netif_extra_info *gso; 889 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; 890 891 if (unlikely(xennet_set_skb_gso(skb, gso))) { 892 __skb_queue_head(&tmpq, skb); 893 np->rx.rsp_cons += skb_queue_len(&tmpq); 894 goto err; 895 } 896 } 897 898 NETFRONT_SKB_CB(skb)->page = skb_shinfo(skb)->frags[0].page; 899 NETFRONT_SKB_CB(skb)->offset = rx->offset; 900 901 len = rx->status; 902 if (len > RX_COPY_THRESHOLD) 903 len = RX_COPY_THRESHOLD; 904 skb_put(skb, len); 905 906 if (rx->status > len) { 907 skb_shinfo(skb)->frags[0].page_offset = 908 rx->offset + len; 909 skb_shinfo(skb)->frags[0].size = rx->status - len; 910 skb->data_len = rx->status - len; 911 } else { 912 skb_shinfo(skb)->frags[0].page = NULL; 913 skb_shinfo(skb)->nr_frags = 0; 914 } 915 916 i = xennet_fill_frags(np, skb, &tmpq); 917 918 /* 919 * Truesize approximates the size of true data plus 920 * any supervisor overheads. Adding hypervisor 921 * overheads has been shown to significantly reduce 922 * achievable bandwidth with the default receive 923 * buffer size. It is therefore not wise to account 924 * for it here. 925 * 926 * After alloc_skb(RX_COPY_THRESHOLD), truesize is set 927 * to RX_COPY_THRESHOLD + the supervisor 928 * overheads. Here, we add the size of the data pulled 929 * in xennet_fill_frags(). 930 * 931 * We also adjust for any unused space in the main 932 * data area by subtracting (RX_COPY_THRESHOLD - 933 * len). This is especially important with drivers 934 * which split incoming packets into header and data, 935 * using only 66 bytes of the main data area (see the 936 * e1000 driver for example.) On such systems, 937 * without this last adjustement, our achievable 938 * receive throughout using the standard receive 939 * buffer size was cut by 25%(!!!). 940 */ 941 skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len); 942 skb->len += skb->data_len; 943 944 if (rx->flags & NETRXF_csum_blank) 945 skb->ip_summed = CHECKSUM_PARTIAL; 946 else if (rx->flags & NETRXF_data_validated) 947 skb->ip_summed = CHECKSUM_UNNECESSARY; 948 949 __skb_queue_tail(&rxq, skb); 950 951 np->rx.rsp_cons = ++i; 952 work_done++; 953 } 954 955 while ((skb = __skb_dequeue(&errq))) 956 kfree_skb(skb); 957 958 work_done -= handle_incoming_queue(dev, &rxq); 959 960 /* If we get a callback with very few responses, reduce fill target. */ 961 /* NB. Note exponential increase, linear decrease. */ 962 if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) > 963 ((3*np->rx_target) / 4)) && 964 (--np->rx_target < np->rx_min_target)) 965 np->rx_target = np->rx_min_target; 966 967 xennet_alloc_rx_buffers(dev); 968 969 if (work_done < budget) { 970 int more_to_do = 0; 971 972 local_irq_save(flags); 973 974 RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do); 975 if (!more_to_do) 976 __netif_rx_complete(dev, napi); 977 978 local_irq_restore(flags); 979 } 980 981 spin_unlock(&np->rx_lock); 982 983 return work_done; 984 } 985 986 static int xennet_change_mtu(struct net_device *dev, int mtu) 987 { 988 int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN; 989 990 if (mtu > max) 991 return -EINVAL; 992 dev->mtu = mtu; 993 return 0; 994 } 995 996 static void xennet_release_tx_bufs(struct netfront_info *np) 997 { 998 struct sk_buff *skb; 999 int i; 1000 1001 for (i = 0; i < NET_TX_RING_SIZE; i++) { 1002 /* Skip over entries which are actually freelist references */ 1003 if ((unsigned long)np->tx_skbs[i].skb < PAGE_OFFSET) 1004 continue; 1005 1006 skb = np->tx_skbs[i].skb; 1007 gnttab_end_foreign_access_ref(np->grant_tx_ref[i], 1008 GNTMAP_readonly); 1009 gnttab_release_grant_reference(&np->gref_tx_head, 1010 np->grant_tx_ref[i]); 1011 np->grant_tx_ref[i] = GRANT_INVALID_REF; 1012 add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, i); 1013 dev_kfree_skb_irq(skb); 1014 } 1015 } 1016 1017 static void xennet_release_rx_bufs(struct netfront_info *np) 1018 { 1019 struct mmu_update *mmu = np->rx_mmu; 1020 struct multicall_entry *mcl = np->rx_mcl; 1021 struct sk_buff_head free_list; 1022 struct sk_buff *skb; 1023 unsigned long mfn; 1024 int xfer = 0, noxfer = 0, unused = 0; 1025 int id, ref; 1026 1027 dev_warn(&np->netdev->dev, "%s: fix me for copying receiver.\n", 1028 __func__); 1029 return; 1030 1031 skb_queue_head_init(&free_list); 1032 1033 spin_lock_bh(&np->rx_lock); 1034 1035 for (id = 0; id < NET_RX_RING_SIZE; id++) { 1036 ref = np->grant_rx_ref[id]; 1037 if (ref == GRANT_INVALID_REF) { 1038 unused++; 1039 continue; 1040 } 1041 1042 skb = np->rx_skbs[id]; 1043 mfn = gnttab_end_foreign_transfer_ref(ref); 1044 gnttab_release_grant_reference(&np->gref_rx_head, ref); 1045 np->grant_rx_ref[id] = GRANT_INVALID_REF; 1046 1047 if (0 == mfn) { 1048 skb_shinfo(skb)->nr_frags = 0; 1049 dev_kfree_skb(skb); 1050 noxfer++; 1051 continue; 1052 } 1053 1054 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 1055 /* Remap the page. */ 1056 struct page *page = skb_shinfo(skb)->frags[0].page; 1057 unsigned long pfn = page_to_pfn(page); 1058 void *vaddr = page_address(page); 1059 1060 MULTI_update_va_mapping(mcl, (unsigned long)vaddr, 1061 mfn_pte(mfn, PAGE_KERNEL), 1062 0); 1063 mcl++; 1064 mmu->ptr = ((u64)mfn << PAGE_SHIFT) 1065 | MMU_MACHPHYS_UPDATE; 1066 mmu->val = pfn; 1067 mmu++; 1068 1069 set_phys_to_machine(pfn, mfn); 1070 } 1071 __skb_queue_tail(&free_list, skb); 1072 xfer++; 1073 } 1074 1075 dev_info(&np->netdev->dev, "%s: %d xfer, %d noxfer, %d unused\n", 1076 __func__, xfer, noxfer, unused); 1077 1078 if (xfer) { 1079 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 1080 /* Do all the remapping work and M2P updates. */ 1081 MULTI_mmu_update(mcl, np->rx_mmu, mmu - np->rx_mmu, 1082 0, DOMID_SELF); 1083 mcl++; 1084 HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl); 1085 } 1086 } 1087 1088 while ((skb = __skb_dequeue(&free_list)) != NULL) 1089 dev_kfree_skb(skb); 1090 1091 spin_unlock_bh(&np->rx_lock); 1092 } 1093 1094 static void xennet_uninit(struct net_device *dev) 1095 { 1096 struct netfront_info *np = netdev_priv(dev); 1097 xennet_release_tx_bufs(np); 1098 xennet_release_rx_bufs(np); 1099 gnttab_free_grant_references(np->gref_tx_head); 1100 gnttab_free_grant_references(np->gref_rx_head); 1101 } 1102 1103 static struct net_device * __devinit xennet_create_dev(struct xenbus_device *dev) 1104 { 1105 int i, err; 1106 struct net_device *netdev; 1107 struct netfront_info *np; 1108 1109 netdev = alloc_etherdev(sizeof(struct netfront_info)); 1110 if (!netdev) { 1111 printk(KERN_WARNING "%s> alloc_etherdev failed.\n", 1112 __func__); 1113 return ERR_PTR(-ENOMEM); 1114 } 1115 1116 np = netdev_priv(netdev); 1117 np->xbdev = dev; 1118 1119 spin_lock_init(&np->tx_lock); 1120 spin_lock_init(&np->rx_lock); 1121 1122 skb_queue_head_init(&np->rx_batch); 1123 np->rx_target = RX_DFL_MIN_TARGET; 1124 np->rx_min_target = RX_DFL_MIN_TARGET; 1125 np->rx_max_target = RX_MAX_TARGET; 1126 1127 init_timer(&np->rx_refill_timer); 1128 np->rx_refill_timer.data = (unsigned long)netdev; 1129 np->rx_refill_timer.function = rx_refill_timeout; 1130 1131 /* Initialise tx_skbs as a free chain containing every entry. */ 1132 np->tx_skb_freelist = 0; 1133 for (i = 0; i < NET_TX_RING_SIZE; i++) { 1134 np->tx_skbs[i].link = i+1; 1135 np->grant_tx_ref[i] = GRANT_INVALID_REF; 1136 } 1137 1138 /* Clear out rx_skbs */ 1139 for (i = 0; i < NET_RX_RING_SIZE; i++) { 1140 np->rx_skbs[i] = NULL; 1141 np->grant_rx_ref[i] = GRANT_INVALID_REF; 1142 } 1143 1144 /* A grant for every tx ring slot */ 1145 if (gnttab_alloc_grant_references(TX_MAX_TARGET, 1146 &np->gref_tx_head) < 0) { 1147 printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n"); 1148 err = -ENOMEM; 1149 goto exit; 1150 } 1151 /* A grant for every rx ring slot */ 1152 if (gnttab_alloc_grant_references(RX_MAX_TARGET, 1153 &np->gref_rx_head) < 0) { 1154 printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n"); 1155 err = -ENOMEM; 1156 goto exit_free_tx; 1157 } 1158 1159 netdev->open = xennet_open; 1160 netdev->hard_start_xmit = xennet_start_xmit; 1161 netdev->stop = xennet_close; 1162 netif_napi_add(netdev, &np->napi, xennet_poll, 64); 1163 netdev->uninit = xennet_uninit; 1164 netdev->change_mtu = xennet_change_mtu; 1165 netdev->features = NETIF_F_IP_CSUM; 1166 1167 SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops); 1168 SET_NETDEV_DEV(netdev, &dev->dev); 1169 1170 np->netdev = netdev; 1171 1172 netif_carrier_off(netdev); 1173 1174 return netdev; 1175 1176 exit_free_tx: 1177 gnttab_free_grant_references(np->gref_tx_head); 1178 exit: 1179 free_netdev(netdev); 1180 return ERR_PTR(err); 1181 } 1182 1183 /** 1184 * Entry point to this code when a new device is created. Allocate the basic 1185 * structures and the ring buffers for communication with the backend, and 1186 * inform the backend of the appropriate details for those. 1187 */ 1188 static int __devinit netfront_probe(struct xenbus_device *dev, 1189 const struct xenbus_device_id *id) 1190 { 1191 int err; 1192 struct net_device *netdev; 1193 struct netfront_info *info; 1194 1195 netdev = xennet_create_dev(dev); 1196 if (IS_ERR(netdev)) { 1197 err = PTR_ERR(netdev); 1198 xenbus_dev_fatal(dev, err, "creating netdev"); 1199 return err; 1200 } 1201 1202 info = netdev_priv(netdev); 1203 dev->dev.driver_data = info; 1204 1205 err = register_netdev(info->netdev); 1206 if (err) { 1207 printk(KERN_WARNING "%s: register_netdev err=%d\n", 1208 __func__, err); 1209 goto fail; 1210 } 1211 1212 err = xennet_sysfs_addif(info->netdev); 1213 if (err) { 1214 unregister_netdev(info->netdev); 1215 printk(KERN_WARNING "%s: add sysfs failed err=%d\n", 1216 __func__, err); 1217 goto fail; 1218 } 1219 1220 return 0; 1221 1222 fail: 1223 free_netdev(netdev); 1224 dev->dev.driver_data = NULL; 1225 return err; 1226 } 1227 1228 static void xennet_end_access(int ref, void *page) 1229 { 1230 /* This frees the page as a side-effect */ 1231 if (ref != GRANT_INVALID_REF) 1232 gnttab_end_foreign_access(ref, 0, (unsigned long)page); 1233 } 1234 1235 static void xennet_disconnect_backend(struct netfront_info *info) 1236 { 1237 /* Stop old i/f to prevent errors whilst we rebuild the state. */ 1238 spin_lock_bh(&info->rx_lock); 1239 spin_lock_irq(&info->tx_lock); 1240 netif_carrier_off(info->netdev); 1241 spin_unlock_irq(&info->tx_lock); 1242 spin_unlock_bh(&info->rx_lock); 1243 1244 if (info->netdev->irq) 1245 unbind_from_irqhandler(info->netdev->irq, info->netdev); 1246 info->evtchn = info->netdev->irq = 0; 1247 1248 /* End access and free the pages */ 1249 xennet_end_access(info->tx_ring_ref, info->tx.sring); 1250 xennet_end_access(info->rx_ring_ref, info->rx.sring); 1251 1252 info->tx_ring_ref = GRANT_INVALID_REF; 1253 info->rx_ring_ref = GRANT_INVALID_REF; 1254 info->tx.sring = NULL; 1255 info->rx.sring = NULL; 1256 } 1257 1258 /** 1259 * We are reconnecting to the backend, due to a suspend/resume, or a backend 1260 * driver restart. We tear down our netif structure and recreate it, but 1261 * leave the device-layer structures intact so that this is transparent to the 1262 * rest of the kernel. 1263 */ 1264 static int netfront_resume(struct xenbus_device *dev) 1265 { 1266 struct netfront_info *info = dev->dev.driver_data; 1267 1268 dev_dbg(&dev->dev, "%s\n", dev->nodename); 1269 1270 xennet_disconnect_backend(info); 1271 return 0; 1272 } 1273 1274 static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) 1275 { 1276 char *s, *e, *macstr; 1277 int i; 1278 1279 macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL); 1280 if (IS_ERR(macstr)) 1281 return PTR_ERR(macstr); 1282 1283 for (i = 0; i < ETH_ALEN; i++) { 1284 mac[i] = simple_strtoul(s, &e, 16); 1285 if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) { 1286 kfree(macstr); 1287 return -ENOENT; 1288 } 1289 s = e+1; 1290 } 1291 1292 kfree(macstr); 1293 return 0; 1294 } 1295 1296 static irqreturn_t xennet_interrupt(int irq, void *dev_id) 1297 { 1298 struct net_device *dev = dev_id; 1299 struct netfront_info *np = netdev_priv(dev); 1300 unsigned long flags; 1301 1302 spin_lock_irqsave(&np->tx_lock, flags); 1303 1304 if (likely(netif_carrier_ok(dev))) { 1305 xennet_tx_buf_gc(dev); 1306 /* Under tx_lock: protects access to rx shared-ring indexes. */ 1307 if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) 1308 netif_rx_schedule(dev, &np->napi); 1309 } 1310 1311 spin_unlock_irqrestore(&np->tx_lock, flags); 1312 1313 return IRQ_HANDLED; 1314 } 1315 1316 static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) 1317 { 1318 struct xen_netif_tx_sring *txs; 1319 struct xen_netif_rx_sring *rxs; 1320 int err; 1321 struct net_device *netdev = info->netdev; 1322 1323 info->tx_ring_ref = GRANT_INVALID_REF; 1324 info->rx_ring_ref = GRANT_INVALID_REF; 1325 info->rx.sring = NULL; 1326 info->tx.sring = NULL; 1327 netdev->irq = 0; 1328 1329 err = xen_net_read_mac(dev, netdev->dev_addr); 1330 if (err) { 1331 xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename); 1332 goto fail; 1333 } 1334 1335 txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_KERNEL); 1336 if (!txs) { 1337 err = -ENOMEM; 1338 xenbus_dev_fatal(dev, err, "allocating tx ring page"); 1339 goto fail; 1340 } 1341 SHARED_RING_INIT(txs); 1342 FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE); 1343 1344 err = xenbus_grant_ring(dev, virt_to_mfn(txs)); 1345 if (err < 0) { 1346 free_page((unsigned long)txs); 1347 goto fail; 1348 } 1349 1350 info->tx_ring_ref = err; 1351 rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_KERNEL); 1352 if (!rxs) { 1353 err = -ENOMEM; 1354 xenbus_dev_fatal(dev, err, "allocating rx ring page"); 1355 goto fail; 1356 } 1357 SHARED_RING_INIT(rxs); 1358 FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE); 1359 1360 err = xenbus_grant_ring(dev, virt_to_mfn(rxs)); 1361 if (err < 0) { 1362 free_page((unsigned long)rxs); 1363 goto fail; 1364 } 1365 info->rx_ring_ref = err; 1366 1367 err = xenbus_alloc_evtchn(dev, &info->evtchn); 1368 if (err) 1369 goto fail; 1370 1371 err = bind_evtchn_to_irqhandler(info->evtchn, xennet_interrupt, 1372 IRQF_SAMPLE_RANDOM, netdev->name, 1373 netdev); 1374 if (err < 0) 1375 goto fail; 1376 netdev->irq = err; 1377 return 0; 1378 1379 fail: 1380 return err; 1381 } 1382 1383 /* Common code used when first setting up, and when resuming. */ 1384 static int talk_to_backend(struct xenbus_device *dev, 1385 struct netfront_info *info) 1386 { 1387 const char *message; 1388 struct xenbus_transaction xbt; 1389 int err; 1390 1391 /* Create shared ring, alloc event channel. */ 1392 err = setup_netfront(dev, info); 1393 if (err) 1394 goto out; 1395 1396 again: 1397 err = xenbus_transaction_start(&xbt); 1398 if (err) { 1399 xenbus_dev_fatal(dev, err, "starting transaction"); 1400 goto destroy_ring; 1401 } 1402 1403 err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref", "%u", 1404 info->tx_ring_ref); 1405 if (err) { 1406 message = "writing tx ring-ref"; 1407 goto abort_transaction; 1408 } 1409 err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref", "%u", 1410 info->rx_ring_ref); 1411 if (err) { 1412 message = "writing rx ring-ref"; 1413 goto abort_transaction; 1414 } 1415 err = xenbus_printf(xbt, dev->nodename, 1416 "event-channel", "%u", info->evtchn); 1417 if (err) { 1418 message = "writing event-channel"; 1419 goto abort_transaction; 1420 } 1421 1422 err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u", 1423 1); 1424 if (err) { 1425 message = "writing request-rx-copy"; 1426 goto abort_transaction; 1427 } 1428 1429 err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1); 1430 if (err) { 1431 message = "writing feature-rx-notify"; 1432 goto abort_transaction; 1433 } 1434 1435 err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1); 1436 if (err) { 1437 message = "writing feature-sg"; 1438 goto abort_transaction; 1439 } 1440 1441 err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1); 1442 if (err) { 1443 message = "writing feature-gso-tcpv4"; 1444 goto abort_transaction; 1445 } 1446 1447 err = xenbus_transaction_end(xbt, 0); 1448 if (err) { 1449 if (err == -EAGAIN) 1450 goto again; 1451 xenbus_dev_fatal(dev, err, "completing transaction"); 1452 goto destroy_ring; 1453 } 1454 1455 return 0; 1456 1457 abort_transaction: 1458 xenbus_transaction_end(xbt, 1); 1459 xenbus_dev_fatal(dev, err, "%s", message); 1460 destroy_ring: 1461 xennet_disconnect_backend(info); 1462 out: 1463 return err; 1464 } 1465 1466 static int xennet_set_sg(struct net_device *dev, u32 data) 1467 { 1468 if (data) { 1469 struct netfront_info *np = netdev_priv(dev); 1470 int val; 1471 1472 if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg", 1473 "%d", &val) < 0) 1474 val = 0; 1475 if (!val) 1476 return -ENOSYS; 1477 } else if (dev->mtu > ETH_DATA_LEN) 1478 dev->mtu = ETH_DATA_LEN; 1479 1480 return ethtool_op_set_sg(dev, data); 1481 } 1482 1483 static int xennet_set_tso(struct net_device *dev, u32 data) 1484 { 1485 if (data) { 1486 struct netfront_info *np = netdev_priv(dev); 1487 int val; 1488 1489 if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, 1490 "feature-gso-tcpv4", "%d", &val) < 0) 1491 val = 0; 1492 if (!val) 1493 return -ENOSYS; 1494 } 1495 1496 return ethtool_op_set_tso(dev, data); 1497 } 1498 1499 static void xennet_set_features(struct net_device *dev) 1500 { 1501 /* Turn off all GSO bits except ROBUST. */ 1502 dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1; 1503 dev->features |= NETIF_F_GSO_ROBUST; 1504 xennet_set_sg(dev, 0); 1505 1506 /* We need checksum offload to enable scatter/gather and TSO. */ 1507 if (!(dev->features & NETIF_F_IP_CSUM)) 1508 return; 1509 1510 if (!xennet_set_sg(dev, 1)) 1511 xennet_set_tso(dev, 1); 1512 } 1513 1514 static int xennet_connect(struct net_device *dev) 1515 { 1516 struct netfront_info *np = netdev_priv(dev); 1517 int i, requeue_idx, err; 1518 struct sk_buff *skb; 1519 grant_ref_t ref; 1520 struct xen_netif_rx_request *req; 1521 unsigned int feature_rx_copy; 1522 1523 err = xenbus_scanf(XBT_NIL, np->xbdev->otherend, 1524 "feature-rx-copy", "%u", &feature_rx_copy); 1525 if (err != 1) 1526 feature_rx_copy = 0; 1527 1528 if (!feature_rx_copy) { 1529 dev_info(&dev->dev, 1530 "backend does not support copying recieve path"); 1531 return -ENODEV; 1532 } 1533 1534 err = talk_to_backend(np->xbdev, np); 1535 if (err) 1536 return err; 1537 1538 xennet_set_features(dev); 1539 1540 spin_lock_bh(&np->rx_lock); 1541 spin_lock_irq(&np->tx_lock); 1542 1543 /* Step 1: Discard all pending TX packet fragments. */ 1544 xennet_release_tx_bufs(np); 1545 1546 /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */ 1547 for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { 1548 if (!np->rx_skbs[i]) 1549 continue; 1550 1551 skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i); 1552 ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i); 1553 req = RING_GET_REQUEST(&np->rx, requeue_idx); 1554 1555 gnttab_grant_foreign_access_ref( 1556 ref, np->xbdev->otherend_id, 1557 pfn_to_mfn(page_to_pfn(skb_shinfo(skb)-> 1558 frags->page)), 1559 0); 1560 req->gref = ref; 1561 req->id = requeue_idx; 1562 1563 requeue_idx++; 1564 } 1565 1566 np->rx.req_prod_pvt = requeue_idx; 1567 1568 /* 1569 * Step 3: All public and private state should now be sane. Get 1570 * ready to start sending and receiving packets and give the driver 1571 * domain a kick because we've probably just requeued some 1572 * packets. 1573 */ 1574 netif_carrier_on(np->netdev); 1575 notify_remote_via_irq(np->netdev->irq); 1576 xennet_tx_buf_gc(dev); 1577 xennet_alloc_rx_buffers(dev); 1578 1579 spin_unlock_irq(&np->tx_lock); 1580 spin_unlock_bh(&np->rx_lock); 1581 1582 return 0; 1583 } 1584 1585 /** 1586 * Callback received when the backend's state changes. 1587 */ 1588 static void backend_changed(struct xenbus_device *dev, 1589 enum xenbus_state backend_state) 1590 { 1591 struct netfront_info *np = dev->dev.driver_data; 1592 struct net_device *netdev = np->netdev; 1593 1594 dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state)); 1595 1596 switch (backend_state) { 1597 case XenbusStateInitialising: 1598 case XenbusStateInitialised: 1599 case XenbusStateConnected: 1600 case XenbusStateUnknown: 1601 case XenbusStateClosed: 1602 break; 1603 1604 case XenbusStateInitWait: 1605 if (dev->state != XenbusStateInitialising) 1606 break; 1607 if (xennet_connect(netdev) != 0) 1608 break; 1609 xenbus_switch_state(dev, XenbusStateConnected); 1610 break; 1611 1612 case XenbusStateClosing: 1613 xenbus_frontend_closed(dev); 1614 break; 1615 } 1616 } 1617 1618 static struct ethtool_ops xennet_ethtool_ops = 1619 { 1620 .set_tx_csum = ethtool_op_set_tx_csum, 1621 .set_sg = xennet_set_sg, 1622 .set_tso = xennet_set_tso, 1623 .get_link = ethtool_op_get_link, 1624 }; 1625 1626 #ifdef CONFIG_SYSFS 1627 static ssize_t show_rxbuf_min(struct device *dev, 1628 struct device_attribute *attr, char *buf) 1629 { 1630 struct net_device *netdev = to_net_dev(dev); 1631 struct netfront_info *info = netdev_priv(netdev); 1632 1633 return sprintf(buf, "%u\n", info->rx_min_target); 1634 } 1635 1636 static ssize_t store_rxbuf_min(struct device *dev, 1637 struct device_attribute *attr, 1638 const char *buf, size_t len) 1639 { 1640 struct net_device *netdev = to_net_dev(dev); 1641 struct netfront_info *np = netdev_priv(netdev); 1642 char *endp; 1643 unsigned long target; 1644 1645 if (!capable(CAP_NET_ADMIN)) 1646 return -EPERM; 1647 1648 target = simple_strtoul(buf, &endp, 0); 1649 if (endp == buf) 1650 return -EBADMSG; 1651 1652 if (target < RX_MIN_TARGET) 1653 target = RX_MIN_TARGET; 1654 if (target > RX_MAX_TARGET) 1655 target = RX_MAX_TARGET; 1656 1657 spin_lock_bh(&np->rx_lock); 1658 if (target > np->rx_max_target) 1659 np->rx_max_target = target; 1660 np->rx_min_target = target; 1661 if (target > np->rx_target) 1662 np->rx_target = target; 1663 1664 xennet_alloc_rx_buffers(netdev); 1665 1666 spin_unlock_bh(&np->rx_lock); 1667 return len; 1668 } 1669 1670 static ssize_t show_rxbuf_max(struct device *dev, 1671 struct device_attribute *attr, char *buf) 1672 { 1673 struct net_device *netdev = to_net_dev(dev); 1674 struct netfront_info *info = netdev_priv(netdev); 1675 1676 return sprintf(buf, "%u\n", info->rx_max_target); 1677 } 1678 1679 static ssize_t store_rxbuf_max(struct device *dev, 1680 struct device_attribute *attr, 1681 const char *buf, size_t len) 1682 { 1683 struct net_device *netdev = to_net_dev(dev); 1684 struct netfront_info *np = netdev_priv(netdev); 1685 char *endp; 1686 unsigned long target; 1687 1688 if (!capable(CAP_NET_ADMIN)) 1689 return -EPERM; 1690 1691 target = simple_strtoul(buf, &endp, 0); 1692 if (endp == buf) 1693 return -EBADMSG; 1694 1695 if (target < RX_MIN_TARGET) 1696 target = RX_MIN_TARGET; 1697 if (target > RX_MAX_TARGET) 1698 target = RX_MAX_TARGET; 1699 1700 spin_lock_bh(&np->rx_lock); 1701 if (target < np->rx_min_target) 1702 np->rx_min_target = target; 1703 np->rx_max_target = target; 1704 if (target < np->rx_target) 1705 np->rx_target = target; 1706 1707 xennet_alloc_rx_buffers(netdev); 1708 1709 spin_unlock_bh(&np->rx_lock); 1710 return len; 1711 } 1712 1713 static ssize_t show_rxbuf_cur(struct device *dev, 1714 struct device_attribute *attr, char *buf) 1715 { 1716 struct net_device *netdev = to_net_dev(dev); 1717 struct netfront_info *info = netdev_priv(netdev); 1718 1719 return sprintf(buf, "%u\n", info->rx_target); 1720 } 1721 1722 static struct device_attribute xennet_attrs[] = { 1723 __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min), 1724 __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max), 1725 __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL), 1726 }; 1727 1728 static int xennet_sysfs_addif(struct net_device *netdev) 1729 { 1730 int i; 1731 int err; 1732 1733 for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) { 1734 err = device_create_file(&netdev->dev, 1735 &xennet_attrs[i]); 1736 if (err) 1737 goto fail; 1738 } 1739 return 0; 1740 1741 fail: 1742 while (--i >= 0) 1743 device_remove_file(&netdev->dev, &xennet_attrs[i]); 1744 return err; 1745 } 1746 1747 static void xennet_sysfs_delif(struct net_device *netdev) 1748 { 1749 int i; 1750 1751 for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) 1752 device_remove_file(&netdev->dev, &xennet_attrs[i]); 1753 } 1754 1755 #endif /* CONFIG_SYSFS */ 1756 1757 static struct xenbus_device_id netfront_ids[] = { 1758 { "vif" }, 1759 { "" } 1760 }; 1761 1762 1763 static int __devexit xennet_remove(struct xenbus_device *dev) 1764 { 1765 struct netfront_info *info = dev->dev.driver_data; 1766 1767 dev_dbg(&dev->dev, "%s\n", dev->nodename); 1768 1769 unregister_netdev(info->netdev); 1770 1771 xennet_disconnect_backend(info); 1772 1773 del_timer_sync(&info->rx_refill_timer); 1774 1775 xennet_sysfs_delif(info->netdev); 1776 1777 free_netdev(info->netdev); 1778 1779 return 0; 1780 } 1781 1782 static struct xenbus_driver netfront = { 1783 .name = "vif", 1784 .owner = THIS_MODULE, 1785 .ids = netfront_ids, 1786 .probe = netfront_probe, 1787 .remove = __devexit_p(xennet_remove), 1788 .resume = netfront_resume, 1789 .otherend_changed = backend_changed, 1790 }; 1791 1792 static int __init netif_init(void) 1793 { 1794 if (!is_running_on_xen()) 1795 return -ENODEV; 1796 1797 if (is_initial_xendomain()) 1798 return 0; 1799 1800 printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n"); 1801 1802 return xenbus_register_frontend(&netfront); 1803 } 1804 module_init(netif_init); 1805 1806 1807 static void __exit netif_exit(void) 1808 { 1809 if (is_initial_xendomain()) 1810 return; 1811 1812 return xenbus_unregister_driver(&netfront); 1813 } 1814 module_exit(netif_exit); 1815 1816 MODULE_DESCRIPTION("Xen virtual network device frontend"); 1817 MODULE_LICENSE("GPL"); 1818