1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Xen network backend - mac client edition. 29 * 30 * A driver that sits above an existing GLDv3/Nemo MAC driver and 31 * relays packets to/from that driver from/to a guest domain. 32 */ 33 34 #include "xnb.h" 35 36 #include <sys/sunddi.h> 37 #include <sys/ddi.h> 38 #include <sys/modctl.h> 39 #include <sys/strsubr.h> 40 #include <sys/mac_client.h> 41 #include <sys/mac_provider.h> 42 #include <sys/mac_client_priv.h> 43 #include <sys/mac.h> 44 #include <net/if.h> 45 #include <sys/dlpi.h> 46 #include <sys/pattr.h> 47 #include <xen/sys/xenbus_impl.h> 48 #include <xen/sys/xendev.h> 49 #include <sys/sdt.h> 50 #include <sys/note.h> 51 52 /* Track multicast addresses. */ 53 typedef struct xmca { 54 struct xmca *next; 55 ether_addr_t addr; 56 } xmca_t; 57 58 /* State about this device instance. */ 59 typedef struct xnbo { 60 mac_handle_t o_mh; 61 mac_client_handle_t o_mch; 62 mac_unicast_handle_t o_mah; 63 mac_promisc_handle_t o_mphp; 64 boolean_t o_running; 65 boolean_t o_promiscuous; 66 uint32_t o_hcksum_capab; 67 xmca_t *o_mca; 68 char o_link_name[LIFNAMSIZ]; 69 boolean_t o_need_rx_filter; 70 boolean_t o_need_setphysaddr; 71 boolean_t o_multicast_control; 72 } xnbo_t; 73 74 static void xnbo_close_mac(xnb_t *); 75 76 /* 77 * Packets from the peer come here. We pass them to the mac device. 78 */ 79 static void 80 xnbo_to_mac(xnb_t *xnbp, mblk_t *mp) 81 { 82 xnbo_t *xnbop = xnbp->xnb_flavour_data; 83 84 ASSERT(mp != NULL); 85 86 if (!xnbop->o_running) { 87 xnbp->xnb_stat_tx_too_early++; 88 goto fail; 89 } 90 91 if (mac_tx(xnbop->o_mch, mp, 0, 92 MAC_DROP_ON_NO_DESC, NULL) != NULL) { 93 xnbp->xnb_stat_mac_full++; 94 } 95 96 return; 97 98 fail: 99 freemsgchain(mp); 100 } 101 102 /* 103 * Process the checksum flags `flags' provided by the peer for the 104 * packet `mp'. 105 */ 106 static mblk_t * 107 xnbo_cksum_from_peer(xnb_t *xnbp, mblk_t *mp, uint16_t flags) 108 { 109 xnbo_t *xnbop = xnbp->xnb_flavour_data; 110 111 ASSERT(mp->b_next == NULL); 112 113 if ((flags & NETTXF_csum_blank) != 0) { 114 /* 115 * The checksum in the packet is blank. Determine 116 * whether we can do hardware offload and, if so, 117 * update the flags on the mblk according. If not, 118 * calculate and insert the checksum using software. 119 */ 120 mp = xnb_process_cksum_flags(xnbp, mp, 121 xnbop->o_hcksum_capab); 122 } 123 124 return (mp); 125 } 126 127 /* 128 * Calculate the checksum flags to be relayed to the peer for the 129 * packet `mp'. 130 */ 131 static uint16_t 132 xnbo_cksum_to_peer(xnb_t *xnbp, mblk_t *mp) 133 { 134 _NOTE(ARGUNUSED(xnbp)); 135 uint16_t r = 0; 136 uint32_t pflags, csum; 137 138 /* 139 * We might also check for HCK_PARTIALCKSUM here and, 140 * providing that the partial checksum covers the TCP/UDP 141 * payload, return NETRXF_data_validated. 142 * 143 * It seems that it's probably not worthwhile, as even MAC 144 * devices which advertise HCKSUM_INET_PARTIAL in their 145 * capabilities tend to use HCK_FULLCKSUM on the receive side 146 * - they are actually saying that in the output path the 147 * caller must use HCK_PARTIALCKSUM. 148 * 149 * Then again, if a NIC supports HCK_PARTIALCKSUM in its' 150 * output path, the host IP stack will use it. If such packets 151 * are destined for the peer (i.e. looped around) we would 152 * gain some advantage. 153 */ 154 155 hcksum_retrieve(mp, NULL, NULL, NULL, NULL, 156 NULL, &csum, &pflags); 157 158 /* 159 * If the MAC driver has asserted that the checksum is 160 * good, let the peer know. 161 */ 162 if (((pflags & HCK_FULLCKSUM) != 0) && 163 (((pflags & HCK_FULLCKSUM_OK) != 0) || 164 (csum == 0xffff))) 165 r |= NETRXF_data_validated; 166 167 return (r); 168 } 169 170 /* 171 * Packets from the mac device come here. We pass them to the peer. 172 */ 173 /*ARGSUSED*/ 174 static void 175 xnbo_from_mac(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 176 boolean_t loopback) 177 { 178 xnb_t *xnbp = arg; 179 180 mp = xnb_copy_to_peer(xnbp, mp); 181 182 if (mp != NULL) 183 freemsgchain(mp); 184 } 185 186 /* 187 * Packets from the mac device come here. We pass them to the peer if 188 * the destination mac address matches or it's a multicast/broadcast 189 * address. 190 */ 191 static void 192 xnbo_from_mac_filter(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 193 boolean_t loopback) 194 { 195 _NOTE(ARGUNUSED(loopback)); 196 xnb_t *xnbp = arg; 197 xnbo_t *xnbop = xnbp->xnb_flavour_data; 198 mblk_t *next, *keep, *keep_head, *free, *free_head; 199 200 keep = keep_head = free = free_head = NULL; 201 202 #define ADD(list, bp) \ 203 if (list != NULL) \ 204 list->b_next = bp; \ 205 else \ 206 list##_head = bp; \ 207 list = bp; 208 209 for (; mp != NULL; mp = next) { 210 mac_header_info_t hdr_info; 211 212 next = mp->b_next; 213 mp->b_next = NULL; 214 215 if (mac_header_info(xnbop->o_mh, mp, &hdr_info) != 0) { 216 ADD(free, mp); 217 continue; 218 } 219 220 if ((hdr_info.mhi_dsttype == MAC_ADDRTYPE_BROADCAST) || 221 (hdr_info.mhi_dsttype == MAC_ADDRTYPE_MULTICAST)) { 222 ADD(keep, mp); 223 continue; 224 } 225 226 if (bcmp(hdr_info.mhi_daddr, xnbp->xnb_mac_addr, 227 sizeof (xnbp->xnb_mac_addr)) == 0) { 228 ADD(keep, mp); 229 continue; 230 } 231 232 ADD(free, mp); 233 } 234 #undef ADD 235 236 if (keep_head != NULL) 237 xnbo_from_mac(xnbp, mrh, keep_head, B_FALSE); 238 239 if (free_head != NULL) 240 freemsgchain(free_head); 241 } 242 243 static boolean_t 244 xnbo_open_mac(xnb_t *xnbp, char *mac) 245 { 246 xnbo_t *xnbop = xnbp->xnb_flavour_data; 247 int err; 248 const mac_info_t *mi; 249 void (*rx_fn)(void *, mac_resource_handle_t, mblk_t *, boolean_t); 250 struct ether_addr ea; 251 uint_t max_sdu; 252 mac_diag_t diag; 253 254 if ((err = mac_open_by_linkname(mac, &xnbop->o_mh)) != 0) { 255 cmn_err(CE_WARN, "xnbo_open_mac: " 256 "cannot open mac for link %s (%d)", mac, err); 257 return (B_FALSE); 258 } 259 ASSERT(xnbop->o_mh != NULL); 260 261 mi = mac_info(xnbop->o_mh); 262 ASSERT(mi != NULL); 263 264 if (mi->mi_media != DL_ETHER) { 265 cmn_err(CE_WARN, "xnbo_open_mac: " 266 "device is not DL_ETHER (%d)", mi->mi_media); 267 xnbo_close_mac(xnbp); 268 return (B_FALSE); 269 } 270 if (mi->mi_media != mi->mi_nativemedia) { 271 cmn_err(CE_WARN, "xnbo_open_mac: " 272 "device media and native media mismatch (%d != %d)", 273 mi->mi_media, mi->mi_nativemedia); 274 xnbo_close_mac(xnbp); 275 return (B_FALSE); 276 } 277 278 mac_sdu_get(xnbop->o_mh, NULL, &max_sdu); 279 if (max_sdu > XNBMAXPKT) { 280 cmn_err(CE_WARN, "xnbo_open_mac: mac device SDU too big (%d)", 281 max_sdu); 282 xnbo_close_mac(xnbp); 283 return (B_FALSE); 284 } 285 286 /* 287 * MAC_OPEN_FLAGS_MULTI_PRIMARY is relevant when we are migrating a 288 * guest on the localhost itself. In this case we would have the MAC 289 * client open for the guest being migrated *and* also for the 290 * migrated guest (i.e. the former will be active till the migration 291 * is complete when the latter will be activated). This flag states 292 * that it is OK for mac_unicast_add to add the primary MAC unicast 293 * address multiple times. 294 */ 295 if (mac_client_open(xnbop->o_mh, &xnbop->o_mch, NULL, 296 MAC_OPEN_FLAGS_USE_DATALINK_NAME | 297 MAC_OPEN_FLAGS_MULTI_PRIMARY) != 0) { 298 cmn_err(CE_WARN, "xnbo_open_mac: " 299 "error (%d) opening mac client", err); 300 xnbo_close_mac(xnbp); 301 return (B_FALSE); 302 } 303 304 if (xnbop->o_need_rx_filter) 305 rx_fn = xnbo_from_mac_filter; 306 else 307 rx_fn = xnbo_from_mac; 308 309 err = mac_unicast_add_set_rx(xnbop->o_mch, NULL, MAC_UNICAST_PRIMARY, 310 &xnbop->o_mah, 0, &diag, xnbop->o_multicast_control ? rx_fn : NULL, 311 xnbp); 312 if (err != 0) { 313 cmn_err(CE_WARN, "xnbo_open_mac: failed to get the primary " 314 "MAC address of %s: %d", mac, err); 315 xnbo_close_mac(xnbp); 316 return (B_FALSE); 317 } 318 if (!xnbop->o_multicast_control) { 319 err = mac_promisc_add(xnbop->o_mch, MAC_CLIENT_PROMISC_ALL, 320 rx_fn, xnbp, &xnbop->o_mphp, MAC_PROMISC_FLAGS_NO_TX_LOOP | 321 MAC_PROMISC_FLAGS_VLAN_TAG_STRIP); 322 if (err != 0) { 323 cmn_err(CE_WARN, "xnbo_open_mac: " 324 "cannot enable promiscuous mode of %s: %d", 325 mac, err); 326 xnbo_close_mac(xnbp); 327 return (B_FALSE); 328 } 329 xnbop->o_promiscuous = B_TRUE; 330 } 331 332 if (xnbop->o_need_setphysaddr) { 333 err = mac_unicast_primary_set(xnbop->o_mh, xnbp->xnb_mac_addr); 334 /* Warn, but continue on. */ 335 if (err != 0) { 336 bcopy(xnbp->xnb_mac_addr, ea.ether_addr_octet, 337 ETHERADDRL); 338 cmn_err(CE_WARN, "xnbo_open_mac: " 339 "cannot set MAC address of %s to " 340 "%s: %d", mac, ether_sprintf(&ea), err); 341 } 342 } 343 344 if (!mac_capab_get(xnbop->o_mh, MAC_CAPAB_HCKSUM, 345 &xnbop->o_hcksum_capab)) 346 xnbop->o_hcksum_capab = 0; 347 348 xnbop->o_running = B_TRUE; 349 350 return (B_TRUE); 351 } 352 353 static void 354 xnbo_close_mac(xnb_t *xnbp) 355 { 356 xnbo_t *xnbop = xnbp->xnb_flavour_data; 357 xmca_t *loop; 358 359 if (xnbop->o_mh == NULL) 360 return; 361 362 if (xnbop->o_running) 363 xnbop->o_running = B_FALSE; 364 365 mutex_enter(&xnbp->xnb_state_lock); 366 loop = xnbop->o_mca; 367 xnbop->o_mca = NULL; 368 mutex_exit(&xnbp->xnb_state_lock); 369 370 while (loop != NULL) { 371 xmca_t *next = loop->next; 372 373 DTRACE_PROBE3(mcast_remove, 374 (char *), "close", 375 (void *), xnbp, 376 (etheraddr_t *), loop->addr); 377 (void) mac_multicast_remove(xnbop->o_mch, loop->addr); 378 kmem_free(loop, sizeof (*loop)); 379 loop = next; 380 } 381 382 if (xnbop->o_promiscuous) { 383 if (xnbop->o_mphp != NULL) { 384 mac_promisc_remove(xnbop->o_mphp); 385 xnbop->o_mphp = NULL; 386 } 387 xnbop->o_promiscuous = B_FALSE; 388 } else { 389 if (xnbop->o_mch != NULL) 390 mac_rx_clear(xnbop->o_mch); 391 } 392 393 if (xnbop->o_mah != NULL) { 394 (void) mac_unicast_remove(xnbop->o_mch, xnbop->o_mah); 395 xnbop->o_mah = NULL; 396 } 397 398 if (xnbop->o_mch != NULL) { 399 mac_client_close(xnbop->o_mch, 0); 400 xnbop->o_mch = NULL; 401 } 402 403 mac_close(xnbop->o_mh); 404 xnbop->o_mh = NULL; 405 } 406 407 /* 408 * Hotplug has completed and we are connected to the peer. We have all 409 * the information we need to exchange traffic, so open the MAC device 410 * and configure it appropriately. 411 */ 412 static boolean_t 413 xnbo_start_connect(xnb_t *xnbp) 414 { 415 xnbo_t *xnbop = xnbp->xnb_flavour_data; 416 417 return (xnbo_open_mac(xnbp, xnbop->o_link_name)); 418 } 419 420 /* 421 * The guest has successfully synchronize with this instance. We read 422 * the configuration of the guest from xenstore to check whether the 423 * guest requests multicast control. If not (the default) we make a 424 * note that the MAC device needs to be used in promiscious mode. 425 */ 426 static boolean_t 427 xnbo_peer_connected(xnb_t *xnbp) 428 { 429 char *oename; 430 int request; 431 xnbo_t *xnbop = xnbp->xnb_flavour_data; 432 433 oename = xvdi_get_oename(xnbp->xnb_devinfo); 434 435 if (xenbus_scanf(XBT_NULL, oename, 436 "request-multicast-control", "%d", &request) != 0) 437 request = 0; 438 xnbop->o_multicast_control = (request > 0); 439 440 return (B_TRUE); 441 } 442 443 /* 444 * The guest domain has closed down the inter-domain connection. We 445 * close the underlying MAC device. 446 */ 447 static void 448 xnbo_peer_disconnected(xnb_t *xnbp) 449 { 450 xnbo_close_mac(xnbp); 451 } 452 453 /* 454 * The hotplug script has completed. We read information from xenstore 455 * about our configuration, most notably the name of the MAC device we 456 * should use. 457 */ 458 static boolean_t 459 xnbo_hotplug_connected(xnb_t *xnbp) 460 { 461 char *xsname; 462 xnbo_t *xnbop = xnbp->xnb_flavour_data; 463 int need; 464 465 xsname = xvdi_get_xsname(xnbp->xnb_devinfo); 466 467 if (xenbus_scanf(XBT_NULL, xsname, 468 "nic", "%s", xnbop->o_link_name) != 0) { 469 cmn_err(CE_WARN, "xnbo_connect: " 470 "cannot read nic name from %s", xsname); 471 return (B_FALSE); 472 } 473 474 if (xenbus_scanf(XBT_NULL, xsname, 475 "SUNW-need-rx-filter", "%d", &need) != 0) 476 need = 0; 477 xnbop->o_need_rx_filter = (need > 0); 478 479 if (xenbus_scanf(XBT_NULL, xsname, 480 "SUNW-need-set-physaddr", "%d", &need) != 0) 481 need = 0; 482 xnbop->o_need_setphysaddr = (need > 0); 483 484 return (B_TRUE); 485 } 486 487 /* 488 * Find the multicast address `addr', return B_TRUE if it is one that 489 * we receive. If `remove', remove it from the set received. 490 */ 491 static boolean_t 492 xnbo_mcast_find(xnb_t *xnbp, ether_addr_t *addr, boolean_t remove) 493 { 494 xnbo_t *xnbop = xnbp->xnb_flavour_data; 495 xmca_t *prev, *del, *this; 496 497 ASSERT(MUTEX_HELD(&xnbp->xnb_state_lock)); 498 ASSERT(xnbop->o_promiscuous == B_FALSE); 499 500 prev = del = NULL; 501 502 this = xnbop->o_mca; 503 504 while (this != NULL) { 505 if (bcmp(&this->addr, addr, sizeof (this->addr)) == 0) { 506 del = this; 507 if (remove) { 508 if (prev == NULL) 509 xnbop->o_mca = this->next; 510 else 511 prev->next = this->next; 512 } 513 break; 514 } 515 516 prev = this; 517 this = this->next; 518 } 519 520 if (del == NULL) 521 return (B_FALSE); 522 523 if (remove) { 524 DTRACE_PROBE3(mcast_remove, 525 (char *), "remove", 526 (void *), xnbp, 527 (etheraddr_t *), del->addr); 528 mac_multicast_remove(xnbop->o_mch, del->addr); 529 kmem_free(del, sizeof (*del)); 530 } 531 532 return (B_TRUE); 533 } 534 535 /* 536 * Add the multicast address `addr' to the set received. 537 */ 538 static boolean_t 539 xnbo_mcast_add(xnb_t *xnbp, ether_addr_t *addr) 540 { 541 xnbo_t *xnbop = xnbp->xnb_flavour_data; 542 boolean_t r = B_FALSE; 543 544 ASSERT(xnbop->o_promiscuous == B_FALSE); 545 546 mutex_enter(&xnbp->xnb_state_lock); 547 548 if (xnbo_mcast_find(xnbp, addr, B_FALSE)) { 549 r = B_TRUE; 550 } else if (mac_multicast_add(xnbop->o_mch, 551 (const uint8_t *)addr) == 0) { 552 xmca_t *mca; 553 554 DTRACE_PROBE3(mcast_add, 555 (char *), "add", 556 (void *), xnbp, 557 (etheraddr_t *), addr); 558 559 mca = kmem_alloc(sizeof (*mca), KM_SLEEP); 560 bcopy(addr, &mca->addr, sizeof (mca->addr)); 561 562 mca->next = xnbop->o_mca; 563 xnbop->o_mca = mca; 564 565 r = B_TRUE; 566 } 567 568 mutex_exit(&xnbp->xnb_state_lock); 569 570 return (r); 571 } 572 573 /* 574 * Remove the multicast address `addr' from the set received. 575 */ 576 static boolean_t 577 xnbo_mcast_del(xnb_t *xnbp, ether_addr_t *addr) 578 { 579 boolean_t r; 580 581 mutex_enter(&xnbp->xnb_state_lock); 582 r = xnbo_mcast_find(xnbp, addr, B_TRUE); 583 mutex_exit(&xnbp->xnb_state_lock); 584 585 return (r); 586 } 587 588 static int 589 xnbo_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 590 { 591 static xnb_flavour_t flavour = { 592 xnbo_to_mac, xnbo_peer_connected, xnbo_peer_disconnected, 593 xnbo_hotplug_connected, xnbo_start_connect, 594 xnbo_cksum_from_peer, xnbo_cksum_to_peer, 595 xnbo_mcast_add, xnbo_mcast_del, 596 }; 597 xnbo_t *xnbop; 598 599 switch (cmd) { 600 case DDI_ATTACH: 601 break; 602 case DDI_RESUME: 603 return (DDI_SUCCESS); 604 default: 605 return (DDI_FAILURE); 606 } 607 608 xnbop = kmem_zalloc(sizeof (*xnbop), KM_SLEEP); 609 610 if (xnb_attach(dip, &flavour, xnbop) != DDI_SUCCESS) { 611 kmem_free(xnbop, sizeof (*xnbop)); 612 return (DDI_FAILURE); 613 } 614 615 return (DDI_SUCCESS); 616 } 617 618 static int 619 xnbo_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 620 { 621 xnb_t *xnbp = ddi_get_driver_private(dip); 622 xnbo_t *xnbop = xnbp->xnb_flavour_data; 623 624 switch (cmd) { 625 case DDI_DETACH: 626 break; 627 case DDI_SUSPEND: 628 return (DDI_SUCCESS); 629 default: 630 return (DDI_FAILURE); 631 } 632 633 mutex_enter(&xnbp->xnb_tx_lock); 634 mutex_enter(&xnbp->xnb_rx_lock); 635 636 if (!xnbp->xnb_detachable || xnbp->xnb_connected || 637 (xnbp->xnb_tx_buf_count > 0)) { 638 mutex_exit(&xnbp->xnb_rx_lock); 639 mutex_exit(&xnbp->xnb_tx_lock); 640 641 return (DDI_FAILURE); 642 } 643 644 mutex_exit(&xnbp->xnb_rx_lock); 645 mutex_exit(&xnbp->xnb_tx_lock); 646 647 xnbo_close_mac(xnbp); 648 kmem_free(xnbop, sizeof (*xnbop)); 649 650 xnb_detach(dip); 651 652 return (DDI_SUCCESS); 653 } 654 655 static struct cb_ops cb_ops = { 656 nulldev, /* open */ 657 nulldev, /* close */ 658 nodev, /* strategy */ 659 nodev, /* print */ 660 nodev, /* dump */ 661 nodev, /* read */ 662 nodev, /* write */ 663 nodev, /* ioctl */ 664 nodev, /* devmap */ 665 nodev, /* mmap */ 666 nodev, /* segmap */ 667 nochpoll, /* poll */ 668 ddi_prop_op, /* cb_prop_op */ 669 0, /* streamtab */ 670 D_NEW | D_MP | D_64BIT /* Driver compatibility flag */ 671 }; 672 673 static struct dev_ops ops = { 674 DEVO_REV, /* devo_rev */ 675 0, /* devo_refcnt */ 676 nulldev, /* devo_getinfo */ 677 nulldev, /* devo_identify */ 678 nulldev, /* devo_probe */ 679 xnbo_attach, /* devo_attach */ 680 xnbo_detach, /* devo_detach */ 681 nodev, /* devo_reset */ 682 &cb_ops, /* devo_cb_ops */ 683 (struct bus_ops *)0, /* devo_bus_ops */ 684 NULL, /* devo_power */ 685 ddi_quiesce_not_needed, /* devo_quiesce */ 686 }; 687 688 static struct modldrv modldrv = { 689 &mod_driverops, "xnbo driver", &ops, 690 }; 691 692 static struct modlinkage modlinkage = { 693 MODREV_1, &modldrv, NULL 694 }; 695 696 int 697 _init(void) 698 { 699 return (mod_install(&modlinkage)); 700 } 701 702 int 703 _info(struct modinfo *modinfop) 704 { 705 return (mod_info(&modlinkage, modinfop)); 706 } 707 708 int 709 _fini(void) 710 { 711 return (mod_remove(&modlinkage)); 712 } 713