1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * This RCM module adds support to the RCM framework for an abstract 30 * namespace for network devices (DLPI providers). 31 */ 32 #include <alloca.h> 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <unistd.h> 36 #include <assert.h> 37 #include <string.h> 38 #include <synch.h> 39 #include <libintl.h> 40 #include <errno.h> 41 #include <libdevinfo.h> 42 #include <sys/types.h> 43 #include <net/if.h> 44 #include <libdlaggr.h> 45 #include "rcm_module.h" 46 47 /* 48 * Definitions 49 */ 50 #ifndef lint 51 #define _(x) gettext(x) 52 #else 53 #define _(x) x 54 #endif 55 56 #define CACHE_STALE 1 /* flags */ 57 #define CACHE_NEW 2 /* flags */ 58 59 /* operations */ 60 #define NET_OFFLINE 1 61 #define NET_ONLINE 2 62 #define NET_REMOVE 3 63 #define NET_SUSPEND 4 64 #define NET_RESUME 5 65 66 typedef struct net_cache 67 { 68 char *resource; 69 char *exported; 70 char *driver; 71 int ppa; 72 int flags; 73 struct net_cache *next; 74 struct net_cache *prev; 75 } net_cache_t; 76 77 static net_cache_t cache_head; 78 static net_cache_t cache_tail; 79 static mutex_t cache_lock; 80 81 /* module interface routines */ 82 static int net_register(rcm_handle_t *); 83 static int net_unregister(rcm_handle_t *); 84 static int net_getinfo(rcm_handle_t *, char *, id_t, uint_t, char **, 85 char **, nvlist_t *, rcm_info_t **); 86 static int net_suspend(rcm_handle_t *, char *, id_t, timespec_t *, 87 uint_t, char **, rcm_info_t **); 88 static int net_resume(rcm_handle_t *, char *, id_t, uint_t, char **, 89 rcm_info_t **); 90 static int net_offline(rcm_handle_t *, char *, id_t, uint_t, char **, 91 rcm_info_t **); 92 static int net_online(rcm_handle_t *, char *, id_t, uint_t, char **, 93 rcm_info_t **); 94 static int net_remove(rcm_handle_t *, char *, id_t, uint_t, char **, 95 rcm_info_t **); 96 97 /* module private routines */ 98 static void free_cache(void); 99 static void update_cache(rcm_handle_t *hd); 100 static int devfs_entry(di_node_t node, di_minor_t minor, void *arg); 101 static void cache_remove(net_cache_t *node); 102 static net_cache_t *cache_lookup(const char *resource); 103 static void free_node(net_cache_t *); 104 static void cache_insert(net_cache_t *); 105 static boolean_t is_aggregated(char *driver, int ppa); 106 107 /* 108 * Module-Private data 109 */ 110 static struct rcm_mod_ops net_ops = { 111 RCM_MOD_OPS_VERSION, 112 net_register, 113 net_unregister, 114 net_getinfo, 115 net_suspend, 116 net_resume, 117 net_offline, 118 net_online, 119 net_remove 120 }; 121 122 /* 123 * Module Interface Routines 124 */ 125 126 /* 127 * rcm_mod_init() 128 * 129 * Update registrations, and return the ops structure. 130 */ 131 struct rcm_mod_ops * 132 rcm_mod_init(void) 133 { 134 cache_head.next = &cache_tail; 135 cache_head.prev = NULL; 136 cache_tail.prev = &cache_head; 137 cache_tail.next = NULL; 138 (void) mutex_init(&cache_lock, NULL, NULL); 139 140 /* Return the ops vectors */ 141 return (&net_ops); 142 } 143 144 /* 145 * rcm_mod_info() 146 * 147 * Return a string describing this module. 148 */ 149 const char * 150 rcm_mod_info(void) 151 { 152 return ("Network namespace module %I%"); 153 } 154 155 /* 156 * rcm_mod_fini() 157 * 158 * Destroy the cache. 159 */ 160 int 161 rcm_mod_fini(void) 162 { 163 free_cache(); 164 (void) mutex_destroy(&cache_lock); 165 return (RCM_SUCCESS); 166 } 167 168 /* 169 * net_register() 170 * 171 * Make sure the cache is properly sync'ed, and its registrations 172 * are in order. 173 * 174 * Locking: the cache is locked by update_cache, and is held 175 * throughout update_cache's execution because it reads and 176 * possibly modifies cache links continuously. 177 */ 178 static int 179 net_register(rcm_handle_t *hd) 180 { 181 update_cache(hd); 182 return (RCM_SUCCESS); 183 } 184 185 /* 186 * net_unregister() 187 * 188 * Manually walk through the cache, unregistering all the networks. 189 * 190 * Locking: the cache is locked throughout the execution of this routine 191 * because it reads and modifies cache links continuously. 192 */ 193 static int 194 net_unregister(rcm_handle_t *hd) 195 { 196 net_cache_t *probe; 197 198 assert(hd != NULL); 199 200 /* Walk the cache, unregistering everything */ 201 (void) mutex_lock(&cache_lock); 202 probe = cache_head.next; 203 while (probe != &cache_tail) { 204 (void) rcm_unregister_interest(hd, probe->resource, 0); 205 cache_remove(probe); 206 free_node(probe); 207 probe = cache_head.next; 208 } 209 (void) mutex_unlock(&cache_lock); 210 return (RCM_SUCCESS); 211 } 212 213 /* 214 * Since all we do is pass operations thru, we provide a general 215 * routine for passing through operations. 216 */ 217 /*ARGSUSED*/ 218 static int 219 net_passthru(rcm_handle_t *hd, int op, const char *rsrc, uint_t flag, 220 char **reason, rcm_info_t **dependent_reason, void *arg) 221 { 222 net_cache_t *node; 223 char *exported; 224 int rv; 225 226 /* 227 * Lock the cache just long enough to extract information about this 228 * resource. 229 */ 230 (void) mutex_lock(&cache_lock); 231 node = cache_lookup(rsrc); 232 if (!node) { 233 rcm_log_message(RCM_WARNING, 234 _("NET: unrecognized resource %s\n"), rsrc); 235 (void) mutex_unlock(&cache_lock); 236 return (RCM_SUCCESS); 237 } 238 239 /* 240 * Since node->exported could be freed after we drop cache_lock, 241 * allocate a stack-local copy. We don't use strdup() because some of 242 * the operations (such as NET_REMOVE) are not allowed to fail. Note 243 * that node->exported is never more than MAXPATHLEN bytes. 244 */ 245 exported = alloca(strlen(node->exported) + 1); 246 (void) strlcpy(exported, node->exported, strlen(node->exported) + 1); 247 248 /* 249 * Remove notifications are unconditional in the RCM state model, 250 * so it's safe to remove the node from the cache at this point. 251 * And we need to remove it so that we will recognize it as a new 252 * resource following the reattachment of the resource. 253 */ 254 if (op == NET_REMOVE) { 255 cache_remove(node); 256 free_node(node); 257 } 258 (void) mutex_unlock(&cache_lock); 259 260 switch (op) { 261 case NET_SUSPEND: 262 rv = rcm_request_suspend(hd, exported, flag, 263 (timespec_t *)arg, dependent_reason); 264 break; 265 case NET_OFFLINE: 266 if (is_aggregated(node->driver, node->ppa)) { 267 /* device is aggregated */ 268 *reason = strdup(gettext( 269 "Resource is in use by aggregation")); 270 if (*reason == NULL) { 271 rcm_log_message(RCM_ERROR, 272 gettext("NET: malloc failure")); 273 } 274 errno = EBUSY; 275 return (RCM_FAILURE); 276 } 277 278 rv = rcm_request_offline(hd, exported, flag, dependent_reason); 279 break; 280 case NET_ONLINE: 281 rv = rcm_notify_online(hd, exported, flag, dependent_reason); 282 break; 283 case NET_REMOVE: 284 rv = rcm_notify_remove(hd, exported, flag, dependent_reason); 285 break; 286 case NET_RESUME: 287 rv = rcm_notify_resume(hd, exported, flag, dependent_reason); 288 break; 289 default: 290 rcm_log_message(RCM_WARNING, 291 _("NET: bad RCM operation %1$d for %2$s\n"), op, exported); 292 errno = EINVAL; 293 return (RCM_FAILURE); 294 } 295 296 if (rv != RCM_SUCCESS) { 297 char format[256]; 298 (void) snprintf(format, sizeof (format), 299 _("RCM operation on dependent %s did not succeed"), 300 exported); 301 rcm_log_message(RCM_WARNING, "NET: %s\n", format); 302 } 303 304 return (rv); 305 } 306 307 308 /* 309 * net_offline() 310 * 311 * Determine dependents of the resource being offlined, and offline 312 * them all. 313 */ 314 static int 315 net_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, 316 char **reason, rcm_info_t **dependent_reason) 317 { 318 assert(hd != NULL); 319 assert(rsrc != NULL); 320 assert(id == (id_t)0); 321 assert(reason != NULL); 322 assert(dependent_reason != NULL); 323 324 rcm_log_message(RCM_TRACE1, "NET: offline(%s)\n", rsrc); 325 326 return (net_passthru(hd, NET_OFFLINE, rsrc, flags, reason, 327 dependent_reason, NULL)); 328 } 329 330 /* 331 * net_online() 332 * 333 * Online the previously offlined resource, and online its dependents. 334 */ 335 static int 336 net_online(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, char **reason, 337 rcm_info_t **dependent_reason) 338 { 339 assert(hd != NULL); 340 assert(rsrc != NULL); 341 assert(id == (id_t)0); 342 343 rcm_log_message(RCM_TRACE1, "NET: online(%s)\n", rsrc); 344 345 return (net_passthru(hd, NET_ONLINE, rsrc, flag, reason, 346 dependent_reason, NULL)); 347 } 348 349 /* 350 * net_getinfo() 351 * 352 * Gather usage information for this resource. 353 * 354 * Locking: the cache is locked while this routine looks up the 355 * resource and extracts copies of any piece of information it needs. 356 * The cache is then unlocked, and this routine performs the rest of 357 * its functions without touching any part of the cache. 358 */ 359 /*ARGSUSED*/ 360 static int 361 net_getinfo(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, 362 char **info, char **errstr, nvlist_t *proplist, rcm_info_t **depend_info) 363 { 364 int len; 365 char *exported; 366 char nic[64]; 367 const char *info_fmt; 368 net_cache_t *node; 369 370 assert(hd != NULL); 371 assert(rsrc != NULL); 372 assert(id == (id_t)0); 373 assert(info != NULL); 374 assert(depend_info != NULL); 375 376 rcm_log_message(RCM_TRACE1, "NET: getinfo(%s)\n", rsrc); 377 378 info_fmt = _("Network interface %s"); 379 380 (void) mutex_lock(&cache_lock); 381 node = cache_lookup(rsrc); 382 if (!node) { 383 rcm_log_message(RCM_WARNING, 384 _("NET: unrecognized resource %s\n"), rsrc); 385 (void) mutex_unlock(&cache_lock); 386 errno = ENOENT; 387 return (RCM_FAILURE); 388 } 389 exported = strdup(node->exported); 390 if (!exported) { 391 rcm_log_message(RCM_ERROR, _("NET: strdup failure")); 392 (void) mutex_unlock(&cache_lock); 393 return (RCM_FAILURE); 394 } 395 396 (void) snprintf(nic, sizeof (nic), "%s%d", node->driver, node->ppa); 397 (void) mutex_unlock(&cache_lock); 398 399 len = strlen(info_fmt) + strlen(nic) + 1; 400 if ((*info = (char *)malloc(len)) == NULL) { 401 rcm_log_message(RCM_ERROR, _("NET: malloc failure")); 402 free(exported); 403 return (RCM_FAILURE); 404 } 405 406 /* Fill in the string */ 407 (void) snprintf(*info, len, info_fmt, nic); 408 409 /* Get dependent info if requested */ 410 if ((flag & RCM_INCLUDE_DEPENDENT) || (flag & RCM_INCLUDE_SUBTREE)) { 411 (void) rcm_get_info(hd, exported, flag, depend_info); 412 } 413 414 (void) nvlist_add_string(proplist, RCM_CLIENT_NAME, "SunOS"); 415 (void) nvlist_add_string_array(proplist, RCM_CLIENT_EXPORTS, 416 &exported, 1); 417 418 free(exported); 419 return (RCM_SUCCESS); 420 } 421 422 /* 423 * net_suspend() 424 * 425 * Notify all dependents that the resource is being suspended. 426 * Since no real operation is involved, QUERY or not doesn't matter. 427 * 428 * Locking: the cache is only used to retrieve some information about 429 * this resource, so it is only locked during that retrieval. 430 */ 431 static int 432 net_suspend(rcm_handle_t *hd, char *rsrc, id_t id, timespec_t *interval, 433 uint_t flag, char **reason, rcm_info_t **dependent_reason) 434 { 435 assert(hd != NULL); 436 assert(rsrc != NULL); 437 assert(id == (id_t)0); 438 assert(interval != NULL); 439 assert(reason != NULL); 440 assert(dependent_reason != NULL); 441 442 rcm_log_message(RCM_TRACE1, "NET: suspend(%s)\n", rsrc); 443 444 return (net_passthru(hd, NET_SUSPEND, rsrc, flag, reason, 445 dependent_reason, (void *)interval)); 446 } 447 448 /* 449 * net_resume() 450 * 451 * Resume all the dependents of a suspended network. 452 * 453 * Locking: the cache is only used to retrieve some information about 454 * this resource, so it is only locked during that retrieval. 455 */ 456 static int 457 net_resume(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, char **info, 458 rcm_info_t **dependent_info) 459 { 460 assert(hd != NULL); 461 assert(rsrc != NULL); 462 assert(id == (id_t)0); 463 assert(info != NULL); 464 assert(dependent_info != NULL); 465 466 rcm_log_message(RCM_TRACE1, "NET: resume(%s)\n", rsrc); 467 468 return (net_passthru(hd, NET_RESUME, rsrc, flag, info, dependent_info, 469 NULL)); 470 } 471 472 /* 473 * net_remove() 474 * 475 * This is another NO-OP for us, we just passthru the information. We 476 * don't need to remove it from our cache. We don't unregister 477 * interest at this point either; the network device name is still 478 * around. This way we don't have to change this logic when we 479 * gain the ability to learn about DR attach operations. 480 */ 481 static int 482 net_remove(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, char **info, 483 rcm_info_t **dependent_info) 484 { 485 assert(hd != NULL); 486 assert(rsrc != NULL); 487 assert(id == (id_t)0); 488 assert(info != NULL); 489 assert(dependent_info != NULL); 490 491 rcm_log_message(RCM_TRACE1, "NET: remove(%s)\n", rsrc); 492 493 return (net_passthru(hd, NET_REMOVE, rsrc, flag, info, dependent_info, 494 NULL)); 495 } 496 497 /* 498 * Cache management routines. Note that the cache is implemented as a 499 * trivial linked list, and is only required because RCM doesn't 500 * provide enough state about our own registrations back to us. This 501 * linked list implementation probably clobbers the CPU cache pretty 502 * well. 503 */ 504 505 /* 506 * cache_lookup() 507 * 508 * Get a cache node for a resource. Call with cache lock held. 509 */ 510 static net_cache_t * 511 cache_lookup(const char *resource) 512 { 513 net_cache_t *probe; 514 probe = cache_head.next; 515 while (probe != &cache_tail) { 516 if (probe->resource && 517 (strcmp(resource, probe->resource) == 0)) { 518 return (probe); 519 } 520 probe = probe->next; 521 } 522 return (NULL); 523 } 524 525 /* 526 * free_node() 527 * 528 * Free a node. Make sure it isn't in the list! 529 */ 530 static void 531 free_node(net_cache_t *node) 532 { 533 if (node) { 534 free(node->resource); 535 free(node->exported); 536 free(node->driver); 537 free(node); 538 } 539 } 540 541 /* 542 * cache_insert() 543 * 544 * Call with the cache_lock held. 545 */ 546 static void 547 cache_insert(net_cache_t *node) 548 { 549 /* insert at the head for best performance */ 550 node->next = cache_head.next; 551 node->prev = &cache_head; 552 553 node->next->prev = node; 554 node->prev->next = node; 555 } 556 557 /* 558 * cache_remove() 559 * 560 * Call with the cache_lock held. 561 */ 562 static void 563 cache_remove(net_cache_t *node) 564 { 565 node->next->prev = node->prev; 566 node->prev->next = node->next; 567 node->next = NULL; 568 node->prev = NULL; 569 } 570 571 /* 572 * devfs_entry() 573 * 574 * Call with the cache_lock held. 575 */ 576 /*ARGSUSED*/ 577 static int 578 devfs_entry(di_node_t node, di_minor_t minor, void *arg) 579 { 580 char ifname [MAXPATHLEN]; /* should be big enough! */ 581 char *devfspath; 582 char resource[MAXPATHLEN]; 583 char *name; 584 char *cp; 585 int instance; 586 net_cache_t *probe; 587 588 cp = di_minor_nodetype(minor); 589 if ((cp == NULL) || (strcmp(cp, DDI_NT_NET))) { 590 /* doesn't look like a network device */ 591 return (DI_WALK_CONTINUE); 592 } 593 594 name = di_driver_name(node); 595 if (name == NULL) { 596 /* what else can we do? */ 597 return (DI_WALK_CONTINUE); 598 } 599 600 instance = di_instance(node); 601 602 (void) snprintf(ifname, sizeof (ifname), "SUNW_network/%s%d", 603 name, instance); 604 605 devfspath = di_devfs_path(node); 606 if (!devfspath) { 607 /* no devfs path?!? */ 608 rcm_log_message(RCM_DEBUG, "NET: missing devfs path\n"); 609 return (DI_WALK_CONTINUE); 610 } 611 612 if (strncmp("/pseudo", devfspath, strlen("/pseudo")) == 0) { 613 /* ignore pseudo devices, probably not really NICs */ 614 rcm_log_message(RCM_DEBUG, "NET: ignoring pseudo device %s\n", 615 devfspath); 616 di_devfs_path_free(devfspath); 617 return (DI_WALK_CONTINUE); 618 } 619 620 (void) snprintf(resource, sizeof (resource), "/devices%s", devfspath); 621 di_devfs_path_free(devfspath); 622 623 probe = cache_lookup(resource); 624 if (probe != NULL) { 625 rcm_log_message(RCM_DEBUG, "NET: %s already registered\n", 626 resource); 627 probe->flags &= ~(CACHE_STALE); 628 } else { 629 rcm_log_message(RCM_DEBUG, "NET: %s is new resource\n", 630 resource); 631 probe = calloc(1, sizeof (net_cache_t)); 632 if (!probe) { 633 rcm_log_message(RCM_ERROR, _("NET: malloc failure")); 634 return (DI_WALK_CONTINUE); 635 } 636 637 probe->resource = strdup(resource); 638 probe->ppa = instance; 639 probe->driver = strdup(name); 640 probe->exported = strdup(ifname); 641 642 if ((!probe->resource) || (!probe->exported) || 643 (!probe->driver)) { 644 free_node(probe); 645 return (DI_WALK_CONTINUE); 646 } 647 648 probe->flags |= CACHE_NEW; 649 cache_insert(probe); 650 } 651 652 return (DI_WALK_CONTINUE); 653 } 654 655 /* 656 * update_cache() 657 * 658 * The devinfo tree walking code is lifted from ifconfig.c. 659 */ 660 static void 661 update_cache(rcm_handle_t *hd) 662 { 663 net_cache_t *probe; 664 di_node_t root; 665 int rv; 666 667 (void) mutex_lock(&cache_lock); 668 669 /* first we walk the entire cache, marking each entry stale */ 670 probe = cache_head.next; 671 while (probe != &cache_tail) { 672 probe->flags |= CACHE_STALE; 673 probe = probe->next; 674 } 675 676 root = di_init("/", DINFOSUBTREE | DINFOMINOR); 677 if (root == DI_NODE_NIL) { 678 goto done; 679 } 680 681 (void) di_walk_minor(root, DDI_NT_NET, DI_CHECK_ALIAS, NULL, 682 devfs_entry); 683 684 di_fini(root); 685 686 probe = cache_head.next; 687 while (probe != &cache_tail) { 688 net_cache_t *freeit; 689 if (probe->flags & CACHE_STALE) { 690 (void) rcm_unregister_interest(hd, probe->resource, 0); 691 rcm_log_message(RCM_DEBUG, "NET: unregistered %s\n", 692 probe->resource); 693 freeit = probe; 694 probe = probe->next; 695 cache_remove(freeit); 696 free_node(freeit); 697 continue; 698 } 699 700 if (!(probe->flags & CACHE_NEW)) { 701 probe = probe->next; 702 continue; 703 } 704 705 rcm_log_message(RCM_DEBUG, "NET: registering %s\n", 706 probe->resource); 707 rv = rcm_register_interest(hd, probe->resource, 0, NULL); 708 if (rv != RCM_SUCCESS) { 709 rcm_log_message(RCM_ERROR, 710 _("NET: failed to register %s\n"), 711 probe->resource); 712 } else { 713 rcm_log_message(RCM_DEBUG, 714 "NET: registered %s (as %s)\n", 715 probe->resource, probe->exported); 716 probe->flags &= ~(CACHE_NEW); 717 } 718 probe = probe->next; 719 } 720 721 done: 722 (void) mutex_unlock(&cache_lock); 723 } 724 725 /* 726 * free_cache() 727 */ 728 static void 729 free_cache(void) 730 { 731 net_cache_t *probe; 732 733 (void) mutex_lock(&cache_lock); 734 probe = cache_head.next; 735 while (probe != &cache_tail) { 736 cache_remove(probe); 737 free_node(probe); 738 probe = cache_head.next; 739 } 740 (void) mutex_unlock(&cache_lock); 741 } 742 743 /* 744 * is_aggregated() checks whether a NIC being removed is part of an 745 * aggregation. 746 */ 747 748 typedef struct aggr_walker_state_s { 749 uint_t naggr; 750 char dev_name[LIFNAMSIZ]; 751 } aggr_walker_state_t; 752 753 static int 754 aggr_walker(void *arg, dladm_aggr_grp_attr_t *grp) 755 { 756 aggr_walker_state_t *state = arg; 757 dladm_aggr_port_attr_t *port; 758 int i; 759 760 for (i = 0; i < grp->lg_nports; i++) { 761 port = &grp->lg_ports[i]; 762 763 rcm_log_message(RCM_TRACE1, "MAC: aggr (%d) port %s\n", 764 grp->lg_key, port->lp_devname); 765 766 if (strcmp(port->lp_devname, state->dev_name) != 0) 767 continue; 768 769 /* found matching MAC port */ 770 state->naggr++; 771 } 772 773 return (0); 774 } 775 776 static boolean_t 777 is_aggregated(char *driver, int ppa) 778 { 779 aggr_walker_state_t state; 780 781 state.naggr = 0; 782 (void) snprintf(state.dev_name, sizeof (state.dev_name), "%s%d", 783 driver, ppa); 784 785 if (dladm_aggr_walk(aggr_walker, &state) != 0) { 786 rcm_log_message(RCM_ERROR, gettext("NET: cannot walk " 787 "aggregations (%s)\n"), strerror(errno)); 788 return (B_FALSE); 789 } 790 791 return (state.naggr > 0); 792 } 793