1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * This RCM module adds support to the RCM framework for an abstract 31 * namespace for network devices (DLPI providers). 32 */ 33 #include <alloca.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <unistd.h> 37 #include <assert.h> 38 #include <string.h> 39 #include <synch.h> 40 #include <libintl.h> 41 #include <errno.h> 42 #include <libdevinfo.h> 43 #include <sys/types.h> 44 #include <net/if.h> 45 #include <liblaadm.h> 46 #include "rcm_module.h" 47 48 /* 49 * Definitions 50 */ 51 #ifndef lint 52 #define _(x) gettext(x) 53 #else 54 #define _(x) x 55 #endif 56 57 #define CACHE_STALE 1 /* flags */ 58 #define CACHE_NEW 2 /* flags */ 59 60 /* operations */ 61 #define NET_OFFLINE 1 62 #define NET_ONLINE 2 63 #define NET_REMOVE 3 64 #define NET_SUSPEND 4 65 #define NET_RESUME 5 66 67 typedef struct net_cache 68 { 69 char *resource; 70 char *exported; 71 char *driver; 72 int ppa; 73 int flags; 74 struct net_cache *next; 75 struct net_cache *prev; 76 } net_cache_t; 77 78 static net_cache_t cache_head; 79 static net_cache_t cache_tail; 80 static mutex_t cache_lock; 81 82 /* module interface routines */ 83 static int net_register(rcm_handle_t *); 84 static int net_unregister(rcm_handle_t *); 85 static int net_getinfo(rcm_handle_t *, char *, id_t, uint_t, char **, 86 char **, nvlist_t *, rcm_info_t **); 87 static int net_suspend(rcm_handle_t *, char *, id_t, timespec_t *, 88 uint_t, char **, rcm_info_t **); 89 static int net_resume(rcm_handle_t *, char *, id_t, uint_t, char **, 90 rcm_info_t **); 91 static int net_offline(rcm_handle_t *, char *, id_t, uint_t, char **, 92 rcm_info_t **); 93 static int net_online(rcm_handle_t *, char *, id_t, uint_t, char **, 94 rcm_info_t **); 95 static int net_remove(rcm_handle_t *, char *, id_t, uint_t, char **, 96 rcm_info_t **); 97 98 /* module private routines */ 99 static void free_cache(void); 100 static void update_cache(rcm_handle_t *hd); 101 static int devfs_entry(di_node_t node, di_minor_t minor, void *arg); 102 static void cache_remove(net_cache_t *node); 103 static net_cache_t *cache_lookup(const char *resource); 104 static void free_node(net_cache_t *); 105 static void cache_insert(net_cache_t *); 106 static boolean_t is_aggregated(char *driver, int ppa); 107 108 /* 109 * Module-Private data 110 */ 111 static struct rcm_mod_ops net_ops = { 112 RCM_MOD_OPS_VERSION, 113 net_register, 114 net_unregister, 115 net_getinfo, 116 net_suspend, 117 net_resume, 118 net_offline, 119 net_online, 120 net_remove 121 }; 122 123 /* 124 * Module Interface Routines 125 */ 126 127 /* 128 * rcm_mod_init() 129 * 130 * Update registrations, and return the ops structure. 131 */ 132 struct rcm_mod_ops * 133 rcm_mod_init(void) 134 { 135 cache_head.next = &cache_tail; 136 cache_head.prev = NULL; 137 cache_tail.prev = &cache_head; 138 cache_tail.next = NULL; 139 (void) mutex_init(&cache_lock, NULL, NULL); 140 141 /* Return the ops vectors */ 142 return (&net_ops); 143 } 144 145 /* 146 * rcm_mod_info() 147 * 148 * Return a string describing this module. 149 */ 150 const char * 151 rcm_mod_info(void) 152 { 153 return ("Network namespace module %I%"); 154 } 155 156 /* 157 * rcm_mod_fini() 158 * 159 * Destroy the cache. 160 */ 161 int 162 rcm_mod_fini(void) 163 { 164 free_cache(); 165 (void) mutex_destroy(&cache_lock); 166 return (RCM_SUCCESS); 167 } 168 169 /* 170 * net_register() 171 * 172 * Make sure the cache is properly sync'ed, and its registrations 173 * are in order. 174 * 175 * Locking: the cache is locked by update_cache, and is held 176 * throughout update_cache's execution because it reads and 177 * possibly modifies cache links continuously. 178 */ 179 static int 180 net_register(rcm_handle_t *hd) 181 { 182 update_cache(hd); 183 return (RCM_SUCCESS); 184 } 185 186 /* 187 * net_unregister() 188 * 189 * Manually walk through the cache, unregistering all the networks. 190 * 191 * Locking: the cache is locked throughout the execution of this routine 192 * because it reads and modifies cache links continuously. 193 */ 194 static int 195 net_unregister(rcm_handle_t *hd) 196 { 197 net_cache_t *probe; 198 199 assert(hd != NULL); 200 201 /* Walk the cache, unregistering everything */ 202 (void) mutex_lock(&cache_lock); 203 probe = cache_head.next; 204 while (probe != &cache_tail) { 205 (void) rcm_unregister_interest(hd, probe->resource, 0); 206 cache_remove(probe); 207 free_node(probe); 208 probe = cache_head.next; 209 } 210 (void) mutex_unlock(&cache_lock); 211 return (RCM_SUCCESS); 212 } 213 214 /* 215 * Since all we do is pass operations thru, we provide a general 216 * routine for passing through operations. 217 */ 218 /*ARGSUSED*/ 219 static int 220 net_passthru(rcm_handle_t *hd, int op, const char *rsrc, uint_t flag, 221 char **reason, rcm_info_t **dependent_reason, void *arg) 222 { 223 net_cache_t *node; 224 char *exported; 225 int rv; 226 227 /* 228 * Lock the cache just long enough to extract information about this 229 * resource. 230 */ 231 (void) mutex_lock(&cache_lock); 232 node = cache_lookup(rsrc); 233 if (!node) { 234 rcm_log_message(RCM_WARNING, 235 _("NET: unrecognized resource %s\n"), rsrc); 236 (void) mutex_unlock(&cache_lock); 237 return (RCM_SUCCESS); 238 } 239 240 /* 241 * Since node->exported could be freed after we drop cache_lock, 242 * allocate a stack-local copy. We don't use strdup() because some of 243 * the operations (such as NET_REMOVE) are not allowed to fail. Note 244 * that node->exported is never more than MAXPATHLEN bytes. 245 */ 246 exported = alloca(strlen(node->exported) + 1); 247 (void) strlcpy(exported, node->exported, strlen(node->exported) + 1); 248 249 /* 250 * Remove notifications are unconditional in the RCM state model, 251 * so it's safe to remove the node from the cache at this point. 252 * And we need to remove it so that we will recognize it as a new 253 * resource following the reattachment of the resource. 254 */ 255 if (op == NET_REMOVE) { 256 cache_remove(node); 257 free_node(node); 258 } 259 (void) mutex_unlock(&cache_lock); 260 261 switch (op) { 262 case NET_SUSPEND: 263 rv = rcm_request_suspend(hd, exported, flag, 264 (timespec_t *)arg, dependent_reason); 265 break; 266 case NET_OFFLINE: 267 if (is_aggregated(node->driver, node->ppa)) { 268 /* device is aggregated */ 269 *reason = strdup(gettext( 270 "Resource is in use by aggregation")); 271 if (*reason == NULL) { 272 rcm_log_message(RCM_ERROR, 273 gettext("NET: malloc failure")); 274 } 275 errno = EBUSY; 276 return (RCM_FAILURE); 277 } 278 279 rv = rcm_request_offline(hd, exported, flag, dependent_reason); 280 break; 281 case NET_ONLINE: 282 rv = rcm_notify_online(hd, exported, flag, dependent_reason); 283 break; 284 case NET_REMOVE: 285 rv = rcm_notify_remove(hd, exported, flag, dependent_reason); 286 break; 287 case NET_RESUME: 288 rv = rcm_notify_resume(hd, exported, flag, dependent_reason); 289 break; 290 default: 291 rcm_log_message(RCM_WARNING, 292 _("NET: bad RCM operation %1$d for %2$s\n"), op, exported); 293 errno = EINVAL; 294 return (RCM_FAILURE); 295 } 296 297 if (rv != RCM_SUCCESS) { 298 char format[256]; 299 (void) snprintf(format, sizeof (format), 300 _("RCM operation on dependent %s did not succeed"), 301 exported); 302 rcm_log_message(RCM_WARNING, "NET: %s\n", format); 303 } 304 305 return (rv); 306 } 307 308 309 /* 310 * net_offline() 311 * 312 * Determine dependents of the resource being offlined, and offline 313 * them all. 314 */ 315 static int 316 net_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, 317 char **reason, rcm_info_t **dependent_reason) 318 { 319 assert(hd != NULL); 320 assert(rsrc != NULL); 321 assert(id == (id_t)0); 322 assert(reason != NULL); 323 assert(dependent_reason != NULL); 324 325 rcm_log_message(RCM_TRACE1, "NET: offline(%s)\n", rsrc); 326 327 return (net_passthru(hd, NET_OFFLINE, rsrc, flags, reason, 328 dependent_reason, NULL)); 329 } 330 331 /* 332 * net_online() 333 * 334 * Online the previously offlined resource, and online its dependents. 335 */ 336 static int 337 net_online(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, char **reason, 338 rcm_info_t **dependent_reason) 339 { 340 assert(hd != NULL); 341 assert(rsrc != NULL); 342 assert(id == (id_t)0); 343 344 rcm_log_message(RCM_TRACE1, "NET: online(%s)\n", rsrc); 345 346 return (net_passthru(hd, NET_ONLINE, rsrc, flag, reason, 347 dependent_reason, NULL)); 348 } 349 350 /* 351 * net_getinfo() 352 * 353 * Gather usage information for this resource. 354 * 355 * Locking: the cache is locked while this routine looks up the 356 * resource and extracts copies of any piece of information it needs. 357 * The cache is then unlocked, and this routine performs the rest of 358 * its functions without touching any part of the cache. 359 */ 360 /*ARGSUSED*/ 361 static int 362 net_getinfo(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, 363 char **info, char **errstr, nvlist_t *proplist, rcm_info_t **depend_info) 364 { 365 int len; 366 char *exported; 367 char nic[64]; 368 const char *info_fmt; 369 net_cache_t *node; 370 371 assert(hd != NULL); 372 assert(rsrc != NULL); 373 assert(id == (id_t)0); 374 assert(info != NULL); 375 assert(depend_info != NULL); 376 377 rcm_log_message(RCM_TRACE1, "NET: getinfo(%s)\n", rsrc); 378 379 info_fmt = _("Network interface %s"); 380 381 (void) mutex_lock(&cache_lock); 382 node = cache_lookup(rsrc); 383 if (!node) { 384 rcm_log_message(RCM_WARNING, 385 _("NET: unrecognized resource %s\n"), rsrc); 386 (void) mutex_unlock(&cache_lock); 387 errno = ENOENT; 388 return (RCM_FAILURE); 389 } 390 exported = strdup(node->exported); 391 if (!exported) { 392 rcm_log_message(RCM_ERROR, _("NET: strdup failure")); 393 (void) mutex_unlock(&cache_lock); 394 return (RCM_FAILURE); 395 } 396 397 (void) snprintf(nic, sizeof (nic), "%s%d", node->driver, node->ppa); 398 (void) mutex_unlock(&cache_lock); 399 400 len = strlen(info_fmt) + strlen(nic) + 1; 401 if ((*info = (char *)malloc(len)) == NULL) { 402 rcm_log_message(RCM_ERROR, _("NET: malloc failure")); 403 free(exported); 404 return (RCM_FAILURE); 405 } 406 407 /* Fill in the string */ 408 (void) snprintf(*info, len, info_fmt, nic); 409 410 /* Get dependent info if requested */ 411 if ((flag & RCM_INCLUDE_DEPENDENT) || (flag & RCM_INCLUDE_SUBTREE)) { 412 (void) rcm_get_info(hd, exported, flag, depend_info); 413 } 414 415 (void) nvlist_add_string(proplist, RCM_CLIENT_NAME, "SunOS"); 416 (void) nvlist_add_string_array(proplist, RCM_CLIENT_EXPORTS, 417 &exported, 1); 418 419 free(exported); 420 return (RCM_SUCCESS); 421 } 422 423 /* 424 * net_suspend() 425 * 426 * Notify all dependents that the resource is being suspended. 427 * Since no real operation is involved, QUERY or not doesn't matter. 428 * 429 * Locking: the cache is only used to retrieve some information about 430 * this resource, so it is only locked during that retrieval. 431 */ 432 static int 433 net_suspend(rcm_handle_t *hd, char *rsrc, id_t id, timespec_t *interval, 434 uint_t flag, char **reason, rcm_info_t **dependent_reason) 435 { 436 assert(hd != NULL); 437 assert(rsrc != NULL); 438 assert(id == (id_t)0); 439 assert(interval != NULL); 440 assert(reason != NULL); 441 assert(dependent_reason != NULL); 442 443 rcm_log_message(RCM_TRACE1, "NET: suspend(%s)\n", rsrc); 444 445 return (net_passthru(hd, NET_SUSPEND, rsrc, flag, reason, 446 dependent_reason, (void *)interval)); 447 } 448 449 /* 450 * net_resume() 451 * 452 * Resume all the dependents of a suspended network. 453 * 454 * Locking: the cache is only used to retrieve some information about 455 * this resource, so it is only locked during that retrieval. 456 */ 457 static int 458 net_resume(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, char **info, 459 rcm_info_t **dependent_info) 460 { 461 assert(hd != NULL); 462 assert(rsrc != NULL); 463 assert(id == (id_t)0); 464 assert(info != NULL); 465 assert(dependent_info != NULL); 466 467 rcm_log_message(RCM_TRACE1, "NET: resume(%s)\n", rsrc); 468 469 return (net_passthru(hd, NET_RESUME, rsrc, flag, info, dependent_info, 470 NULL)); 471 } 472 473 /* 474 * net_remove() 475 * 476 * This is another NO-OP for us, we just passthru the information. We 477 * don't need to remove it from our cache. We don't unregister 478 * interest at this point either; the network device name is still 479 * around. This way we don't have to change this logic when we 480 * gain the ability to learn about DR attach operations. 481 */ 482 static int 483 net_remove(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, char **info, 484 rcm_info_t **dependent_info) 485 { 486 assert(hd != NULL); 487 assert(rsrc != NULL); 488 assert(id == (id_t)0); 489 assert(info != NULL); 490 assert(dependent_info != NULL); 491 492 rcm_log_message(RCM_TRACE1, "NET: remove(%s)\n", rsrc); 493 494 return (net_passthru(hd, NET_REMOVE, rsrc, flag, info, dependent_info, 495 NULL)); 496 } 497 498 /* 499 * Cache management routines. Note that the cache is implemented as a 500 * trivial linked list, and is only required because RCM doesn't 501 * provide enough state about our own registrations back to us. This 502 * linked list implementation probably clobbers the CPU cache pretty 503 * well. 504 */ 505 506 /* 507 * cache_lookup() 508 * 509 * Get a cache node for a resource. Call with cache lock held. 510 */ 511 static net_cache_t * 512 cache_lookup(const char *resource) 513 { 514 net_cache_t *probe; 515 probe = cache_head.next; 516 while (probe != &cache_tail) { 517 if (probe->resource && 518 (strcmp(resource, probe->resource) == 0)) { 519 return (probe); 520 } 521 probe = probe->next; 522 } 523 return (NULL); 524 } 525 526 /* 527 * free_node() 528 * 529 * Free a node. Make sure it isn't in the list! 530 */ 531 static void 532 free_node(net_cache_t *node) 533 { 534 if (node) { 535 free(node->resource); 536 free(node->exported); 537 free(node->driver); 538 free(node); 539 } 540 } 541 542 /* 543 * cache_insert() 544 * 545 * Call with the cache_lock held. 546 */ 547 static void 548 cache_insert(net_cache_t *node) 549 { 550 /* insert at the head for best performance */ 551 node->next = cache_head.next; 552 node->prev = &cache_head; 553 554 node->next->prev = node; 555 node->prev->next = node; 556 } 557 558 /* 559 * cache_remove() 560 * 561 * Call with the cache_lock held. 562 */ 563 static void 564 cache_remove(net_cache_t *node) 565 { 566 node->next->prev = node->prev; 567 node->prev->next = node->next; 568 node->next = NULL; 569 node->prev = NULL; 570 } 571 572 /* 573 * devfs_entry() 574 * 575 * Call with the cache_lock held. 576 */ 577 /*ARGSUSED*/ 578 static int 579 devfs_entry(di_node_t node, di_minor_t minor, void *arg) 580 { 581 char ifname [MAXPATHLEN]; /* should be big enough! */ 582 char *devfspath; 583 char resource[MAXPATHLEN]; 584 char *name; 585 char *cp; 586 int instance; 587 net_cache_t *probe; 588 589 cp = di_minor_nodetype(minor); 590 if ((cp == NULL) || (strcmp(cp, DDI_NT_NET))) { 591 /* doesn't look like a network device */ 592 return (DI_WALK_CONTINUE); 593 } 594 595 name = di_driver_name(node); 596 if (name == NULL) { 597 /* what else can we do? */ 598 return (DI_WALK_CONTINUE); 599 } 600 601 instance = di_instance(node); 602 603 (void) snprintf(ifname, sizeof (ifname), "SUNW_network/%s%d", 604 name, instance); 605 606 devfspath = di_devfs_path(node); 607 if (!devfspath) { 608 /* no devfs path?!? */ 609 rcm_log_message(RCM_DEBUG, "NET: missing devfs path\n"); 610 return (DI_WALK_CONTINUE); 611 } 612 613 if (strncmp("/pseudo", devfspath, strlen("/pseudo")) == 0) { 614 /* ignore pseudo devices, probably not really NICs */ 615 rcm_log_message(RCM_DEBUG, "NET: ignoring pseudo device %s\n", 616 devfspath); 617 di_devfs_path_free(devfspath); 618 return (DI_WALK_CONTINUE); 619 } 620 621 (void) snprintf(resource, sizeof (resource), "/devices%s", devfspath); 622 di_devfs_path_free(devfspath); 623 624 probe = cache_lookup(resource); 625 if (probe != NULL) { 626 rcm_log_message(RCM_DEBUG, "NET: %s already registered\n", 627 resource); 628 probe->flags &= ~(CACHE_STALE); 629 } else { 630 rcm_log_message(RCM_DEBUG, "NET: %s is new resource\n", 631 resource); 632 probe = calloc(1, sizeof (net_cache_t)); 633 if (!probe) { 634 rcm_log_message(RCM_ERROR, _("NET: malloc failure")); 635 return (DI_WALK_CONTINUE); 636 } 637 638 probe->resource = strdup(resource); 639 probe->ppa = instance; 640 probe->driver = strdup(name); 641 probe->exported = strdup(ifname); 642 643 if ((!probe->resource) || (!probe->exported) || 644 (!probe->driver)) { 645 free_node(probe); 646 return (DI_WALK_CONTINUE); 647 } 648 649 probe->flags |= CACHE_NEW; 650 cache_insert(probe); 651 } 652 653 return (DI_WALK_CONTINUE); 654 } 655 656 /* 657 * update_cache() 658 * 659 * The devinfo tree walking code is lifted from ifconfig.c. 660 */ 661 static void 662 update_cache(rcm_handle_t *hd) 663 { 664 net_cache_t *probe; 665 di_node_t root; 666 int rv; 667 668 (void) mutex_lock(&cache_lock); 669 670 /* first we walk the entire cache, marking each entry stale */ 671 probe = cache_head.next; 672 while (probe != &cache_tail) { 673 probe->flags |= CACHE_STALE; 674 probe = probe->next; 675 } 676 677 root = di_init("/", DINFOSUBTREE | DINFOMINOR); 678 if (root == DI_NODE_NIL) { 679 goto done; 680 } 681 682 (void) di_walk_minor(root, DDI_NT_NET, DI_CHECK_ALIAS, NULL, 683 devfs_entry); 684 685 di_fini(root); 686 687 probe = cache_head.next; 688 while (probe != &cache_tail) { 689 net_cache_t *freeit; 690 if (probe->flags & CACHE_STALE) { 691 (void) rcm_unregister_interest(hd, probe->resource, 0); 692 rcm_log_message(RCM_DEBUG, "NET: unregistered %s\n", 693 probe->resource); 694 freeit = probe; 695 probe = probe->next; 696 cache_remove(freeit); 697 free_node(freeit); 698 continue; 699 } 700 701 if (!(probe->flags & CACHE_NEW)) { 702 probe = probe->next; 703 continue; 704 } 705 706 rcm_log_message(RCM_DEBUG, "NET: registering %s\n", 707 probe->resource); 708 rv = rcm_register_interest(hd, probe->resource, 0, NULL); 709 if (rv != RCM_SUCCESS) { 710 rcm_log_message(RCM_ERROR, 711 _("NET: failed to register %s\n"), 712 probe->resource); 713 } else { 714 rcm_log_message(RCM_DEBUG, 715 "NET: registered %s (as %s)\n", 716 probe->resource, probe->exported); 717 probe->flags &= ~(CACHE_NEW); 718 } 719 probe = probe->next; 720 } 721 722 done: 723 (void) mutex_unlock(&cache_lock); 724 } 725 726 /* 727 * free_cache() 728 */ 729 static void 730 free_cache(void) 731 { 732 net_cache_t *probe; 733 734 (void) mutex_lock(&cache_lock); 735 probe = cache_head.next; 736 while (probe != &cache_tail) { 737 cache_remove(probe); 738 free_node(probe); 739 probe = cache_head.next; 740 } 741 (void) mutex_unlock(&cache_lock); 742 } 743 744 /* 745 * is_aggregated() checks whether a NIC being removed is part of an 746 * aggregation. 747 */ 748 749 typedef struct aggr_walker_state_s { 750 uint_t naggr; 751 char dev_name[LIFNAMSIZ]; 752 } aggr_walker_state_t; 753 754 static int 755 aggr_walker(void *arg, laadm_grp_attr_sys_t *grp) 756 { 757 aggr_walker_state_t *state = arg; 758 laadm_port_attr_sys_t *port; 759 int i; 760 761 for (i = 0; i < grp->lg_nports; i++) { 762 port = &grp->lg_ports[i]; 763 764 rcm_log_message(RCM_TRACE1, "MAC: aggr (%d) port %s/%d\n", 765 grp->lg_key, port->lp_devname, port->lp_port); 766 767 if (strcmp(port->lp_devname, state->dev_name) != 0) 768 continue; 769 770 /* found matching MAC port */ 771 state->naggr++; 772 } 773 774 return (0); 775 } 776 777 static boolean_t 778 is_aggregated(char *driver, int ppa) 779 { 780 aggr_walker_state_t state; 781 782 state.naggr = 0; 783 (void) snprintf(state.dev_name, sizeof (state.dev_name), "%s%d", 784 driver, ppa); 785 786 if (laadm_walk_sys(aggr_walker, &state) != 0) { 787 rcm_log_message(RCM_ERROR, gettext("NET: cannot walk " 788 "aggregations (%s)\n"), strerror(errno)); 789 return (B_FALSE); 790 } 791 792 return (state.naggr > 0); 793 } 794