1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include "mpd_defs.h" 27 #include "mpd_tables.h" 28 29 /* 30 * Global list of phyints, phyint instances, phyint groups and the anonymous 31 * group; the latter is initialized in phyint_init(). 32 */ 33 struct phyint *phyints = NULL; 34 struct phyint_instance *phyint_instances = NULL; 35 struct phyint_group *phyint_groups = NULL; 36 struct phyint_group *phyint_anongroup; 37 38 /* 39 * Grouplist signature; initialized in phyint_init(). 40 */ 41 static uint64_t phyint_grouplistsig; 42 43 static void phyint_inst_insert(struct phyint_instance *pii); 44 static void phyint_inst_print(struct phyint_instance *pii); 45 46 static void phyint_insert(struct phyint *pi, struct phyint_group *pg); 47 static void phyint_delete(struct phyint *pi); 48 static boolean_t phyint_is_usable(struct phyint *pi); 49 50 static void logint_print(struct logint *li); 51 static void logint_insert(struct phyint_instance *pii, struct logint *li); 52 static struct logint *logint_lookup(struct phyint_instance *pii, char *li_name); 53 54 static void target_print(struct target *tg); 55 static void target_insert(struct phyint_instance *pii, struct target *tg); 56 static struct target *target_first(struct phyint_instance *pii); 57 static struct target *target_select_best(struct phyint_instance *pii); 58 static void target_flush_hosts(struct phyint_group *pg); 59 60 static void reset_pii_probes(struct phyint_instance *pii, struct target *tg); 61 62 static boolean_t phyint_inst_v6_sockinit(struct phyint_instance *pii); 63 static boolean_t phyint_inst_v4_sockinit(struct phyint_instance *pii); 64 65 static int phyint_state_event(struct phyint_group *pg, struct phyint *pi); 66 static int phyint_group_state_event(struct phyint_group *pg); 67 static int phyint_group_change_event(struct phyint_group *pg, ipmp_group_op_t); 68 static int phyint_group_member_event(struct phyint_group *pg, struct phyint *pi, 69 ipmp_if_op_t op); 70 71 static int logint_upcount(struct phyint *pi); 72 static uint64_t gensig(void); 73 74 /* Initialize any per-file global state. Returns 0 on success, -1 on failure */ 75 int 76 phyint_init(void) 77 { 78 phyint_grouplistsig = gensig(); 79 if (track_all_phyints) { 80 phyint_anongroup = phyint_group_create(""); 81 if (phyint_anongroup == NULL) 82 return (-1); 83 phyint_group_insert(phyint_anongroup); 84 } 85 return (0); 86 } 87 88 /* Return the phyint with the given name */ 89 struct phyint * 90 phyint_lookup(const char *name) 91 { 92 struct phyint *pi; 93 94 if (debug & D_PHYINT) 95 logdebug("phyint_lookup(%s)\n", name); 96 97 for (pi = phyints; pi != NULL; pi = pi->pi_next) { 98 if (strncmp(pi->pi_name, name, sizeof (pi->pi_name)) == 0) 99 break; 100 } 101 return (pi); 102 } 103 104 /* 105 * Lookup a phyint in the group that has the same hardware address as `pi', or 106 * NULL if there's none. If `online_only' is set, then only online phyints 107 * are considered when matching. Otherwise, phyints that had been offlined 108 * due to a duplicate hardware address will also be considered. 109 */ 110 static struct phyint * 111 phyint_lookup_hwaddr(struct phyint *pi, boolean_t online_only) 112 { 113 struct phyint *pi2; 114 115 if (pi->pi_group == phyint_anongroup) 116 return (NULL); 117 118 for (pi2 = pi->pi_group->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) { 119 if (pi2 == pi) 120 continue; 121 122 /* 123 * NOTE: even when online_only is B_FALSE, we ignore phyints 124 * that are administratively offline (rather than offline 125 * because they're dups); when they're brought back online, 126 * they'll be flagged as dups if need be. 127 */ 128 if (pi2->pi_state == PI_OFFLINE && 129 (online_only || !pi2->pi_hwaddrdup)) 130 continue; 131 132 if (pi2->pi_hwaddrlen == pi->pi_hwaddrlen && 133 bcmp(pi2->pi_hwaddr, pi->pi_hwaddr, pi->pi_hwaddrlen) == 0) 134 return (pi2); 135 } 136 return (NULL); 137 } 138 139 /* 140 * Respond to DLPI notifications. Currently, this only processes physical 141 * address changes for the phyint passed via `arg' by onlining or offlining 142 * phyints in the group. 143 */ 144 /* ARGSUSED */ 145 static void 146 phyint_link_notify(dlpi_handle_t dh, dlpi_notifyinfo_t *dnip, void *arg) 147 { 148 struct phyint *pi = arg; 149 struct phyint *oduppi = NULL, *duppi = NULL; 150 151 assert((dnip->dni_note & pi->pi_notes) != 0); 152 153 if (dnip->dni_note != DL_NOTE_PHYS_ADDR) 154 return; 155 156 assert(dnip->dni_physaddrlen <= DLPI_PHYSADDR_MAX); 157 158 /* 159 * If our hardware address hasn't changed, there's nothing to do. 160 */ 161 if (pi->pi_hwaddrlen == dnip->dni_physaddrlen && 162 bcmp(pi->pi_hwaddr, dnip->dni_physaddr, pi->pi_hwaddrlen) == 0) 163 return; 164 165 oduppi = phyint_lookup_hwaddr(pi, _B_FALSE); 166 pi->pi_hwaddrlen = dnip->dni_physaddrlen; 167 (void) memcpy(pi->pi_hwaddr, dnip->dni_physaddr, pi->pi_hwaddrlen); 168 duppi = phyint_lookup_hwaddr(pi, _B_FALSE); 169 170 if (oduppi != NULL || pi->pi_hwaddrdup) { 171 /* 172 * Our old hardware address was a duplicate. If we'd been 173 * offlined because of it, and our new hardware address is not 174 * a duplicate, then bring us online. Otherwise, `oduppi' 175 * must've been the one brought offline; bring it online. 176 */ 177 if (pi->pi_hwaddrdup) { 178 if (duppi == NULL) 179 (void) phyint_undo_offline(pi); 180 } else { 181 assert(oduppi->pi_hwaddrdup); 182 (void) phyint_undo_offline(oduppi); 183 } 184 } 185 186 if (duppi != NULL && !pi->pi_hwaddrdup) { 187 /* 188 * Our new hardware address was a duplicate and we're not 189 * yet flagged as a duplicate; bring us offline. 190 */ 191 pi->pi_hwaddrdup = _B_TRUE; 192 (void) phyint_offline(pi, 0); 193 } 194 } 195 196 /* 197 * Initialize information about the underlying link for `pi', and set us 198 * up to be notified about future changes. Returns _B_TRUE on success. 199 */ 200 boolean_t 201 phyint_link_init(struct phyint *pi) 202 { 203 int retval; 204 uint_t notes; 205 const char *errmsg; 206 dlpi_notifyid_t id; 207 208 pi->pi_notes = 0; 209 retval = dlpi_open(pi->pi_name, &pi->pi_dh, 0); 210 if (retval != DLPI_SUCCESS) { 211 pi->pi_dh = NULL; 212 errmsg = "cannot open"; 213 goto failed; 214 } 215 216 pi->pi_hwaddrlen = DLPI_PHYSADDR_MAX; 217 retval = dlpi_get_physaddr(pi->pi_dh, DL_CURR_PHYS_ADDR, pi->pi_hwaddr, 218 &pi->pi_hwaddrlen); 219 if (retval != DLPI_SUCCESS) { 220 errmsg = "cannot get hardware address"; 221 goto failed; 222 } 223 224 /* 225 * Check if the link supports DLPI link state notifications. For 226 * historical reasons, the actual changes are tracked through routing 227 * sockets, so we immediately disable the notification upon success. 228 */ 229 notes = DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN; 230 retval = dlpi_enabnotify(pi->pi_dh, notes, phyint_link_notify, pi, &id); 231 if (retval == DLPI_SUCCESS) { 232 (void) dlpi_disabnotify(pi->pi_dh, id, NULL); 233 pi->pi_notes |= notes; 234 } 235 236 /* 237 * Enable notification of hardware address changes to keep pi_hwaddr 238 * up-to-date and track if we need to offline/undo-offline phyints. 239 */ 240 notes = DL_NOTE_PHYS_ADDR; 241 retval = dlpi_enabnotify(pi->pi_dh, notes, phyint_link_notify, pi, &id); 242 if (retval == DLPI_SUCCESS && poll_add(dlpi_fd(pi->pi_dh)) == 0) 243 pi->pi_notes |= notes; 244 245 return (_B_TRUE); 246 failed: 247 logerr("%s: %s: %s\n", pi->pi_name, errmsg, dlpi_strerror(retval)); 248 if (pi->pi_dh != NULL) { 249 dlpi_close(pi->pi_dh); 250 pi->pi_dh = NULL; 251 } 252 return (_B_FALSE); 253 } 254 255 /* 256 * Close use of link on `pi'. 257 */ 258 void 259 phyint_link_close(struct phyint *pi) 260 { 261 if (pi->pi_notes & DL_NOTE_PHYS_ADDR) { 262 (void) poll_remove(dlpi_fd(pi->pi_dh)); 263 pi->pi_notes &= ~DL_NOTE_PHYS_ADDR; 264 } 265 266 /* 267 * NOTE: we don't clear pi_notes here so that iflinkstate() can still 268 * properly report the link state even when offline (which is possible 269 * since we use IFF_RUNNING to track link state). 270 */ 271 dlpi_close(pi->pi_dh); 272 pi->pi_dh = NULL; 273 } 274 275 /* Return the phyint instance with the given name and the given family */ 276 struct phyint_instance * 277 phyint_inst_lookup(int af, char *name) 278 { 279 struct phyint *pi; 280 281 if (debug & D_PHYINT) 282 logdebug("phyint_inst_lookup(%s %s)\n", AF_STR(af), name); 283 284 assert(af == AF_INET || af == AF_INET6); 285 286 pi = phyint_lookup(name); 287 if (pi == NULL) 288 return (NULL); 289 290 return (PHYINT_INSTANCE(pi, af)); 291 } 292 293 struct phyint_group * 294 phyint_group_lookup(const char *pg_name) 295 { 296 struct phyint_group *pg; 297 298 if (debug & D_PHYINT) 299 logdebug("phyint_group_lookup(%s)\n", pg_name); 300 301 for (pg = phyint_groups; pg != NULL; pg = pg->pg_next) { 302 if (strncmp(pg->pg_name, pg_name, sizeof (pg->pg_name)) == 0) 303 break; 304 } 305 return (pg); 306 } 307 308 /* 309 * Insert the phyint in the linked list of all phyints. If the phyint belongs 310 * to some group, insert it in the phyint group list. 311 */ 312 static void 313 phyint_insert(struct phyint *pi, struct phyint_group *pg) 314 { 315 if (debug & D_PHYINT) 316 logdebug("phyint_insert(%s '%s')\n", pi->pi_name, pg->pg_name); 317 318 /* Insert the phyint at the head of the 'all phyints' list */ 319 pi->pi_next = phyints; 320 pi->pi_prev = NULL; 321 if (phyints != NULL) 322 phyints->pi_prev = pi; 323 phyints = pi; 324 325 /* 326 * Insert the phyint at the head of the 'phyint_group members' list 327 * of the phyint group to which it belongs. 328 */ 329 pi->pi_pgnext = NULL; 330 pi->pi_pgprev = NULL; 331 pi->pi_group = pg; 332 333 pi->pi_pgnext = pg->pg_phyint; 334 if (pi->pi_pgnext != NULL) 335 pi->pi_pgnext->pi_pgprev = pi; 336 pg->pg_phyint = pi; 337 338 /* Refresh the group state now that this phyint has been added */ 339 phyint_group_refresh_state(pg); 340 341 pg->pg_sig++; 342 (void) phyint_group_member_event(pg, pi, IPMP_IF_ADD); 343 } 344 345 /* Insert the phyint instance in the linked list of all phyint instances. */ 346 static void 347 phyint_inst_insert(struct phyint_instance *pii) 348 { 349 if (debug & D_PHYINT) { 350 logdebug("phyint_inst_insert(%s %s)\n", 351 AF_STR(pii->pii_af), pii->pii_name); 352 } 353 354 /* 355 * Insert the phyint at the head of the 'all phyint instances' list. 356 */ 357 pii->pii_next = phyint_instances; 358 pii->pii_prev = NULL; 359 if (phyint_instances != NULL) 360 phyint_instances->pii_prev = pii; 361 phyint_instances = pii; 362 } 363 364 /* 365 * Create a new phyint with the given parameters. Also insert it into 366 * the list of all phyints and the list of phyint group members by calling 367 * phyint_insert(). 368 */ 369 static struct phyint * 370 phyint_create(char *pi_name, struct phyint_group *pg, uint_t ifindex, 371 uint64_t flags) 372 { 373 struct phyint *pi; 374 375 pi = calloc(1, sizeof (struct phyint)); 376 if (pi == NULL) { 377 logperror("phyint_create: calloc"); 378 return (NULL); 379 } 380 381 /* 382 * Record the phyint values. 383 */ 384 (void) strlcpy(pi->pi_name, pi_name, sizeof (pi->pi_name)); 385 pi->pi_taddrthresh = getcurrentsec() + TESTADDR_CONF_TIME; 386 pi->pi_ifindex = ifindex; 387 pi->pi_icmpid = htons(((getpid() & 0xFF) << 8) | (ifindex & 0xFF)); 388 389 pi->pi_state = PI_INIT; 390 pi->pi_flags = PHYINT_FLAGS(flags); 391 392 /* 393 * Initialize the link state. The link state is initialized to 394 * up, so that if the link is down when IPMP starts monitoring 395 * the interface, it will appear as though there has been a 396 * transition from the link up to link down. This avoids 397 * having to treat this situation as a special case. 398 */ 399 INIT_LINK_STATE(pi); 400 401 if (!phyint_link_init(pi)) { 402 free(pi); 403 return (NULL); 404 } 405 406 /* 407 * Insert the phyint in the list of all phyints, and the 408 * list of phyint group members 409 */ 410 phyint_insert(pi, pg); 411 412 /* 413 * If the interface is offline, we set the state to PI_OFFLINE. 414 * Otherwise, optimistically consider this interface running. Later 415 * (in process_link_state_changes()), we will adjust this to match the 416 * current state of the link. Further, if test addresses are 417 * subsequently assigned, we will transition to PI_NOTARGETS and then 418 * to either PI_RUNNING or PI_FAILED depending on the probe results. 419 */ 420 if (flags & IFF_OFFLINE) 421 phyint_chstate(pi, PI_OFFLINE); 422 else 423 phyint_transition_to_running(pi); /* calls phyint_chstate() */ 424 425 return (pi); 426 } 427 428 /* 429 * Create a new phyint instance belonging to the phyint 'pi' and address 430 * family 'af'. Also insert it into the list of all phyint instances by 431 * calling phyint_inst_insert(). 432 */ 433 static struct phyint_instance * 434 phyint_inst_create(struct phyint *pi, int af) 435 { 436 struct phyint_instance *pii; 437 438 pii = calloc(1, sizeof (struct phyint_instance)); 439 if (pii == NULL) { 440 logperror("phyint_inst_create: calloc"); 441 return (NULL); 442 } 443 444 /* 445 * Attach the phyint instance to the phyint. 446 * Set the back pointers as well 447 */ 448 pii->pii_phyint = pi; 449 if (af == AF_INET) 450 pi->pi_v4 = pii; 451 else 452 pi->pi_v6 = pii; 453 454 pii->pii_in_use = 1; 455 pii->pii_probe_sock = -1; 456 pii->pii_snxt = 1; 457 pii->pii_af = af; 458 pii->pii_fd_hrtime = gethrtime() + 459 (FAILURE_DETECTION_QP * (hrtime_t)NANOSEC); 460 pii->pii_flags = pi->pi_flags; 461 462 /* Insert the phyint instance in the list of all phyint instances. */ 463 phyint_inst_insert(pii); 464 return (pii); 465 } 466 467 /* 468 * Change the state of phyint `pi' to state `state'. 469 */ 470 void 471 phyint_chstate(struct phyint *pi, enum pi_state state) 472 { 473 /* 474 * To simplify things, some callers always set a given state 475 * regardless of the previous state of the phyint (e.g., setting 476 * PI_RUNNING when it's already set). We shouldn't bother 477 * generating an event or consuming a signature for these, since 478 * the actual state of the interface is unchanged. 479 */ 480 if (pi->pi_state == state) 481 return; 482 483 pi->pi_state = state; 484 phyint_changed(pi); 485 } 486 487 /* 488 * Note that `pi' has changed state. 489 */ 490 void 491 phyint_changed(struct phyint *pi) 492 { 493 pi->pi_group->pg_sig++; 494 (void) phyint_state_event(pi->pi_group, pi); 495 } 496 497 /* 498 * Insert the phyint group in the linked list of all phyint groups 499 * at the head of the list 500 */ 501 void 502 phyint_group_insert(struct phyint_group *pg) 503 { 504 pg->pg_next = phyint_groups; 505 pg->pg_prev = NULL; 506 if (phyint_groups != NULL) 507 phyint_groups->pg_prev = pg; 508 phyint_groups = pg; 509 510 phyint_grouplistsig++; 511 (void) phyint_group_change_event(pg, IPMP_GROUP_ADD); 512 } 513 514 /* 515 * Create a new phyint group called 'name'. 516 */ 517 struct phyint_group * 518 phyint_group_create(const char *name) 519 { 520 struct phyint_group *pg; 521 522 if (debug & D_PHYINT) 523 logdebug("phyint_group_create(%s)\n", name); 524 525 pg = calloc(1, sizeof (struct phyint_group)); 526 if (pg == NULL) { 527 logperror("phyint_group_create: calloc"); 528 return (NULL); 529 } 530 531 (void) strlcpy(pg->pg_name, name, sizeof (pg->pg_name)); 532 pg->pg_sig = gensig(); 533 pg->pg_fdt = user_failure_detection_time; 534 pg->pg_probeint = user_probe_interval; 535 pg->pg_in_use = _B_TRUE; 536 537 /* 538 * Normal groups always start in the PG_FAILED state since they 539 * have no active interfaces. In contrast, anonymous groups are 540 * heterogeneous and thus always PG_OK. 541 */ 542 pg->pg_state = (name[0] == '\0' ? PG_OK : PG_FAILED); 543 544 return (pg); 545 } 546 547 /* 548 * Change the state of the phyint group `pg' to state `state'. 549 */ 550 void 551 phyint_group_chstate(struct phyint_group *pg, enum pg_state state) 552 { 553 assert(pg != phyint_anongroup); 554 555 /* 556 * To simplify things, some callers always set a given state 557 * regardless of the previous state of the group (e.g., setting 558 * PG_DEGRADED when it's already set). We shouldn't bother 559 * generating an event or consuming a signature for these, since 560 * the actual state of the group is unchanged. 561 */ 562 if (pg->pg_state == state) 563 return; 564 565 pg->pg_state = state; 566 567 switch (state) { 568 case PG_FAILED: 569 /* 570 * We can never know with certainty that a group has 571 * failed. It is possible that all known targets have 572 * failed simultaneously, and new targets have come up 573 * instead. If the targets are routers then router 574 * discovery will kick in, and we will see the new routers 575 * thru routing socket messages. But if the targets are 576 * hosts, we have to discover it by multicast. So flush 577 * all the host targets. The next probe will send out a 578 * multicast echo request. If this is a group failure, we 579 * will still not see any response, otherwise the group 580 * will be repaired after we get NUM_PROBE_REPAIRS 581 * consecutive unicast replies on any phyint. 582 */ 583 target_flush_hosts(pg); 584 break; 585 586 case PG_OK: 587 case PG_DEGRADED: 588 break; 589 590 default: 591 logerr("phyint_group_chstate: invalid group state %d; " 592 "aborting\n", state); 593 abort(); 594 } 595 596 pg->pg_sig++; 597 (void) phyint_group_state_event(pg); 598 } 599 600 /* 601 * Create a new phyint instance and initialize it from the values supplied by 602 * the kernel. Always check for ENXIO before logging any error, because the 603 * interface could have vanished after completion of SIOCGLIFCONF. 604 * Return values: 605 * pointer to the phyint instance on success 606 * NULL on failure Eg. if the phyint instance is not found in the kernel 607 */ 608 struct phyint_instance * 609 phyint_inst_init_from_k(int af, char *pi_name) 610 { 611 char pg_name[LIFNAMSIZ + 1]; 612 int ifsock; 613 uint_t ifindex; 614 uint64_t flags; 615 struct lifreq lifr; 616 struct phyint *pi; 617 struct phyint_instance *pii; 618 boolean_t pi_created; 619 struct phyint_group *pg; 620 621 retry: 622 pii = NULL; 623 pi = NULL; 624 pg = NULL; 625 pi_created = _B_FALSE; 626 627 if (debug & D_PHYINT) { 628 logdebug("phyint_inst_init_from_k(%s %s)\n", 629 AF_STR(af), pi_name); 630 } 631 632 assert(af == AF_INET || af == AF_INET6); 633 634 /* Get the socket for doing ioctls */ 635 ifsock = (af == AF_INET) ? ifsock_v4 : ifsock_v6; 636 637 /* 638 * Get the interface flags. Ignore virtual interfaces, IPMP 639 * meta-interfaces, point-to-point interfaces, and interfaces 640 * that can't support multicast. 641 */ 642 (void) strlcpy(lifr.lifr_name, pi_name, sizeof (lifr.lifr_name)); 643 if (ioctl(ifsock, SIOCGLIFFLAGS, (char *)&lifr) < 0) { 644 if (errno != ENXIO) { 645 logperror("phyint_inst_init_from_k:" 646 " ioctl (get flags)"); 647 } 648 return (NULL); 649 } 650 flags = lifr.lifr_flags; 651 if (!(flags & IFF_MULTICAST) || 652 (flags & (IFF_VIRTUAL|IFF_IPMP|IFF_POINTOPOINT))) 653 return (NULL); 654 655 /* 656 * Get the ifindex for recording later in our tables, in case we need 657 * to create a new phyint. 658 */ 659 if (ioctl(ifsock, SIOCGLIFINDEX, (char *)&lifr) < 0) { 660 if (errno != ENXIO) { 661 logperror("phyint_inst_init_from_k: " 662 " ioctl (get lifindex)"); 663 } 664 return (NULL); 665 } 666 ifindex = lifr.lifr_index; 667 668 /* 669 * Get the phyint group name of this phyint, from the kernel. 670 */ 671 if (ioctl(ifsock, SIOCGLIFGROUPNAME, (char *)&lifr) < 0) { 672 if (errno != ENXIO) { 673 logperror("phyint_inst_init_from_k: " 674 "ioctl (get group name)"); 675 } 676 return (NULL); 677 } 678 (void) strlcpy(pg_name, lifr.lifr_groupname, sizeof (pg_name)); 679 680 /* 681 * If the phyint is not part of any group, pg_name is the 682 * null string. If 'track_all_phyints' is false, there is no 683 * need to create a phyint. 684 */ 685 if (pg_name[0] == '\0' && !track_all_phyints) { 686 /* 687 * If the IFF_FAILED, IFF_INACTIVE, or IFF_OFFLINE flags are 688 * set, reset them. These flags shouldn't be set if in.mpathd 689 * isn't tracking the interface. 690 */ 691 if ((flags & (IFF_FAILED | IFF_INACTIVE | IFF_OFFLINE))) { 692 lifr.lifr_flags = flags & 693 ~(IFF_FAILED | IFF_INACTIVE | IFF_OFFLINE); 694 if (ioctl(ifsock, SIOCSLIFFLAGS, (char *)&lifr) < 0) { 695 if (errno != ENXIO) { 696 logperror("phyint_inst_init_from_k:" 697 " ioctl (set flags)"); 698 } 699 } 700 } 701 return (NULL); 702 } 703 704 /* 705 * We need to create a new phyint instance. We may also need to 706 * create the group if e.g. the SIOCGLIFCONF loop in initifs() found 707 * an underlying interface before it found its IPMP meta-interface. 708 * Note that we keep any created groups even if phyint_inst_from_k() 709 * fails since a group's existence is not dependent on the ability of 710 * in.mpathd to the track the group's interfaces. 711 */ 712 if ((pg = phyint_group_lookup(pg_name)) == NULL) { 713 if ((pg = phyint_group_create(pg_name)) == NULL) { 714 logerr("phyint_inst_init_from_k: cannot create group " 715 "%s\n", pg_name); 716 return (NULL); 717 } 718 phyint_group_insert(pg); 719 } 720 721 /* 722 * Lookup the phyint. If the phyint does not exist create it. 723 */ 724 pi = phyint_lookup(pi_name); 725 if (pi == NULL) { 726 pi = phyint_create(pi_name, pg, ifindex, flags); 727 if (pi == NULL) { 728 logerr("phyint_inst_init_from_k:" 729 " unable to create phyint %s\n", pi_name); 730 return (NULL); 731 } 732 pi_created = _B_TRUE; 733 } else { 734 /* The phyint exists already. */ 735 assert(pi_created == _B_FALSE); 736 /* 737 * Normally we should see consistent values for the IPv4 and 738 * IPv6 instances, for phyint properties. If we don't, it 739 * means things have changed underneath us, and we should 740 * resync our tables with the kernel. Check whether the 741 * interface index has changed. If so, it is most likely 742 * the interface has been unplumbed and replumbed, 743 * while we are yet to update our tables. Do it now. 744 */ 745 if (pi->pi_ifindex != ifindex) { 746 phyint_inst_delete(PHYINT_INSTANCE(pi, AF_OTHER(af))); 747 goto retry; 748 } 749 assert(PHYINT_INSTANCE(pi, af) == NULL); 750 751 /* 752 * If the group name seen by the IPv4 and IPv6 instances 753 * are different, it is most likely the groupname has 754 * changed, while we are yet to update our tables. Do it now. 755 */ 756 if (strcmp(pi->pi_group->pg_name, pg_name) != 0) { 757 phyint_inst_delete(PHYINT_INSTANCE(pi, 758 AF_OTHER(af))); 759 goto retry; 760 } 761 } 762 763 /* 764 * Create a new phyint instance, corresponding to the 'af' 765 * passed in. 766 */ 767 pii = phyint_inst_create(pi, af); 768 if (pii == NULL) { 769 logerr("phyint_inst_init_from_k: unable to create" 770 "phyint inst %s\n", pi->pi_name); 771 if (pi_created) 772 phyint_delete(pi); 773 774 return (NULL); 775 } 776 777 if (pi_created) { 778 /* 779 * If this phyint does not have a unique hardware address in its 780 * group, offline it. (The change_pif_flags() implementation 781 * requires that we defer this until after the phyint_instance 782 * is created.) 783 */ 784 if (phyint_lookup_hwaddr(pi, _B_TRUE) != NULL) { 785 pi->pi_hwaddrdup = _B_TRUE; 786 (void) phyint_offline(pi, 0); 787 } 788 } 789 790 return (pii); 791 } 792 793 /* 794 * Bind pii_probe_sock to the address associated with pii_probe_logint. 795 * This socket will be used for sending and receiving ICMP/ICMPv6 probes to 796 * targets. Do the common part in this function, and complete the 797 * initializations by calling the protocol specific functions 798 * phyint_inst_v{4,6}_sockinit() respectively. 799 * 800 * Return values: _B_TRUE/_B_FALSE for success or failure respectively. 801 */ 802 boolean_t 803 phyint_inst_sockinit(struct phyint_instance *pii) 804 { 805 boolean_t success; 806 struct phyint_group *pg; 807 808 if (debug & D_PHYINT) { 809 logdebug("phyint_inst_sockinit(%s %s)\n", 810 AF_STR(pii->pii_af), pii->pii_name); 811 } 812 813 assert(pii->pii_probe_logint != NULL); 814 assert(pii->pii_probe_logint->li_flags & IFF_UP); 815 assert(pii->pii_probe_logint->li_flags & IFF_NOFAILOVER); 816 assert(pii->pii_af == AF_INET || pii->pii_af == AF_INET6); 817 818 /* 819 * If the socket is already bound, close pii_probe_sock 820 */ 821 if (pii->pii_probe_sock != -1) 822 close_probe_socket(pii, _B_TRUE); 823 824 /* 825 * If the phyint is not part of a named group and track_all_phyints is 826 * false, simply return. 827 */ 828 pg = pii->pii_phyint->pi_group; 829 if (pg == phyint_anongroup && !track_all_phyints) { 830 if (debug & D_PHYINT) 831 logdebug("phyint_inst_sockinit: no group\n"); 832 return (_B_FALSE); 833 } 834 835 /* 836 * Initialize the socket by calling the protocol specific function. 837 * If it succeeds, add the socket to the poll list. 838 */ 839 if (pii->pii_af == AF_INET6) 840 success = phyint_inst_v6_sockinit(pii); 841 else 842 success = phyint_inst_v4_sockinit(pii); 843 844 if (success && (poll_add(pii->pii_probe_sock) == 0)) 845 return (_B_TRUE); 846 847 /* Something failed, cleanup and return false */ 848 if (pii->pii_probe_sock != -1) 849 close_probe_socket(pii, _B_FALSE); 850 851 return (_B_FALSE); 852 } 853 854 /* 855 * IPv6 specific part in initializing the pii_probe_sock. This socket is 856 * used to send/receive ICMPv6 probe packets. 857 */ 858 static boolean_t 859 phyint_inst_v6_sockinit(struct phyint_instance *pii) 860 { 861 icmp6_filter_t filter; 862 int hopcount = 1; 863 int off = 0; 864 int on = 1; 865 struct sockaddr_in6 testaddr; 866 867 /* 868 * Open a raw socket with ICMPv6 protocol. 869 * 870 * Use IPV6_BOUND_IF to make sure that probes are sent and received on 871 * the specified phyint only. Bind to the test address to ensure that 872 * the responses are sent to the specified phyint. 873 * 874 * Set the hopcount to 1 so that probe packets are not routed. 875 * Disable multicast loopback. Set the receive filter to 876 * receive only ICMPv6 echo replies. 877 */ 878 pii->pii_probe_sock = socket(pii->pii_af, SOCK_RAW, IPPROTO_ICMPV6); 879 if (pii->pii_probe_sock < 0) { 880 logperror_pii(pii, "phyint_inst_v6_sockinit: socket"); 881 return (_B_FALSE); 882 } 883 884 bzero(&testaddr, sizeof (testaddr)); 885 testaddr.sin6_family = AF_INET6; 886 testaddr.sin6_port = 0; 887 testaddr.sin6_addr = pii->pii_probe_logint->li_addr; 888 889 if (bind(pii->pii_probe_sock, (struct sockaddr *)&testaddr, 890 sizeof (testaddr)) < 0) { 891 logperror_pii(pii, "phyint_inst_v6_sockinit: IPv6 bind"); 892 return (_B_FALSE); 893 } 894 895 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_IF, 896 (char *)&pii->pii_ifindex, sizeof (uint_t)) < 0) { 897 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 898 " IPV6_MULTICAST_IF"); 899 return (_B_FALSE); 900 } 901 902 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_BOUND_IF, 903 &pii->pii_ifindex, sizeof (uint_t)) < 0) { 904 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 905 " IPV6_BOUND_IF"); 906 return (_B_FALSE); 907 } 908 909 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 910 (char *)&hopcount, sizeof (hopcount)) < 0) { 911 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 912 " IPV6_UNICAST_HOPS"); 913 return (_B_FALSE); 914 } 915 916 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, 917 (char *)&hopcount, sizeof (hopcount)) < 0) { 918 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 919 " IPV6_MULTICAST_HOPS"); 920 return (_B_FALSE); 921 } 922 923 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, 924 (char *)&off, sizeof (off)) < 0) { 925 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 926 " IPV6_MULTICAST_LOOP"); 927 return (_B_FALSE); 928 } 929 930 /* 931 * Filter out so that we only receive ICMP echo replies 932 */ 933 ICMP6_FILTER_SETBLOCKALL(&filter); 934 ICMP6_FILTER_SETPASS(ICMP6_ECHO_REPLY, &filter); 935 936 if (setsockopt(pii->pii_probe_sock, IPPROTO_ICMPV6, ICMP6_FILTER, 937 (char *)&filter, sizeof (filter)) < 0) { 938 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 939 " ICMP6_FILTER"); 940 return (_B_FALSE); 941 } 942 943 /* Enable receipt of hoplimit */ 944 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, 945 &on, sizeof (on)) < 0) { 946 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 947 " IPV6_RECVHOPLIMIT"); 948 return (_B_FALSE); 949 } 950 951 /* Enable receipt of timestamp */ 952 if (setsockopt(pii->pii_probe_sock, SOL_SOCKET, SO_TIMESTAMP, 953 &on, sizeof (on)) < 0) { 954 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 955 " SO_TIMESTAMP"); 956 return (_B_FALSE); 957 } 958 959 return (_B_TRUE); 960 } 961 962 /* 963 * IPv4 specific part in initializing the pii_probe_sock. This socket is 964 * used to send/receive ICMPv4 probe packets. 965 */ 966 static boolean_t 967 phyint_inst_v4_sockinit(struct phyint_instance *pii) 968 { 969 struct sockaddr_in testaddr; 970 char char_off = 0; 971 int ttl = 1; 972 char char_ttl = 1; 973 int on = 1; 974 975 /* 976 * Open a raw socket with ICMPv4 protocol. 977 * 978 * Use IP_BOUND_IF to make sure that probes are sent and received on 979 * the specified phyint only. Bind to the test address to ensure that 980 * the responses are sent to the specified phyint. 981 * 982 * Set the ttl to 1 so that probe packets are not routed. 983 * Disable multicast loopback. Enable receipt of timestamp. 984 */ 985 pii->pii_probe_sock = socket(pii->pii_af, SOCK_RAW, IPPROTO_ICMP); 986 if (pii->pii_probe_sock < 0) { 987 logperror_pii(pii, "phyint_inst_v4_sockinit: socket"); 988 return (_B_FALSE); 989 } 990 991 bzero(&testaddr, sizeof (testaddr)); 992 testaddr.sin_family = AF_INET; 993 testaddr.sin_port = 0; 994 IN6_V4MAPPED_TO_INADDR(&pii->pii_probe_logint->li_addr, 995 &testaddr.sin_addr); 996 997 if (bind(pii->pii_probe_sock, (struct sockaddr *)&testaddr, 998 sizeof (testaddr)) < 0) { 999 logperror_pii(pii, "phyint_inst_v4_sockinit: IPv4 bind"); 1000 return (_B_FALSE); 1001 } 1002 1003 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_BOUND_IF, 1004 &pii->pii_ifindex, sizeof (uint_t)) < 0) { 1005 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1006 " IP_BOUND_IF"); 1007 return (_B_FALSE); 1008 } 1009 1010 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_IF, 1011 (char *)&testaddr.sin_addr, sizeof (struct in_addr)) < 0) { 1012 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1013 " IP_MULTICAST_IF"); 1014 return (_B_FALSE); 1015 } 1016 1017 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_TTL, 1018 (char *)&ttl, sizeof (ttl)) < 0) { 1019 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1020 " IP_TTL"); 1021 return (_B_FALSE); 1022 } 1023 1024 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_LOOP, 1025 (char *)&char_off, sizeof (char_off)) == -1) { 1026 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1027 " IP_MULTICAST_LOOP"); 1028 return (_B_FALSE); 1029 } 1030 1031 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_TTL, 1032 (char *)&char_ttl, sizeof (char_ttl)) == -1) { 1033 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1034 " IP_MULTICAST_TTL"); 1035 return (_B_FALSE); 1036 } 1037 1038 if (setsockopt(pii->pii_probe_sock, SOL_SOCKET, SO_TIMESTAMP, &on, 1039 sizeof (on)) < 0) { 1040 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1041 " SO_TIMESTAMP"); 1042 return (_B_FALSE); 1043 } 1044 1045 return (_B_TRUE); 1046 } 1047 1048 /* 1049 * Remove the phyint group from the list of 'all phyint groups' 1050 * and free it. 1051 */ 1052 void 1053 phyint_group_delete(struct phyint_group *pg) 1054 { 1055 /* 1056 * The anonymous group always exists, even when empty. 1057 */ 1058 if (pg == phyint_anongroup) 1059 return; 1060 1061 if (debug & D_PHYINT) 1062 logdebug("phyint_group_delete('%s')\n", pg->pg_name); 1063 1064 /* 1065 * The phyint group must be empty, and must not have any phyints. 1066 * The phyint group must be in the list of all phyint groups 1067 */ 1068 assert(pg->pg_phyint == NULL); 1069 assert(phyint_groups == pg || pg->pg_prev != NULL); 1070 1071 if (pg->pg_prev != NULL) 1072 pg->pg_prev->pg_next = pg->pg_next; 1073 else 1074 phyint_groups = pg->pg_next; 1075 1076 if (pg->pg_next != NULL) 1077 pg->pg_next->pg_prev = pg->pg_prev; 1078 1079 pg->pg_next = NULL; 1080 pg->pg_prev = NULL; 1081 1082 phyint_grouplistsig++; 1083 (void) phyint_group_change_event(pg, IPMP_GROUP_REMOVE); 1084 1085 addrlist_free(&pg->pg_addrs); 1086 free(pg); 1087 } 1088 1089 /* 1090 * Refresh the state of `pg' based on its current members. 1091 */ 1092 void 1093 phyint_group_refresh_state(struct phyint_group *pg) 1094 { 1095 enum pg_state state; 1096 enum pg_state origstate = pg->pg_state; 1097 struct phyint *pi, *usablepi; 1098 uint_t nif = 0, nusable = 0; 1099 1100 /* 1101 * Anonymous groups never change state. 1102 */ 1103 if (pg == phyint_anongroup) 1104 return; 1105 1106 for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) { 1107 nif++; 1108 if (phyint_is_usable(pi)) { 1109 nusable++; 1110 usablepi = pi; 1111 } 1112 } 1113 1114 if (nusable == 0) 1115 state = PG_FAILED; 1116 else if (nif == nusable) 1117 state = PG_OK; 1118 else 1119 state = PG_DEGRADED; 1120 1121 phyint_group_chstate(pg, state); 1122 1123 /* 1124 * If we're shutting down, skip logging messages since otherwise our 1125 * shutdown housecleaning will make us report that groups are unusable. 1126 */ 1127 if (cleanup_started) 1128 return; 1129 1130 /* 1131 * NOTE: We use pg_failmsg_printed rather than origstate since 1132 * otherwise at startup we'll log a "now usable" message when the 1133 * first usable phyint is added to an empty group. 1134 */ 1135 if (state != PG_FAILED && pg->pg_failmsg_printed) { 1136 assert(origstate == PG_FAILED); 1137 logerr("At least 1 IP interface (%s) in group %s is now " 1138 "usable\n", usablepi->pi_name, pg->pg_name); 1139 pg->pg_failmsg_printed = _B_FALSE; 1140 } else if (origstate != PG_FAILED && state == PG_FAILED) { 1141 logerr("All IP interfaces in group %s are now unusable\n", 1142 pg->pg_name); 1143 pg->pg_failmsg_printed = _B_TRUE; 1144 } 1145 } 1146 1147 /* 1148 * Extract information from the kernel about the desired phyint. 1149 * Look only for properties of the phyint and not properties of logints. 1150 * Take appropriate action on the changes. 1151 * Return codes: 1152 * PI_OK 1153 * The phyint exists in the kernel and matches our knowledge 1154 * of the phyint. 1155 * PI_DELETED 1156 * The phyint has vanished in the kernel. 1157 * PI_IFINDEX_CHANGED 1158 * The phyint's interface index has changed. 1159 * Ask the caller to delete and recreate the phyint. 1160 * PI_IOCTL_ERROR 1161 * Some ioctl error. Don't change anything. 1162 * PI_GROUP_CHANGED 1163 * The phyint has changed group. 1164 */ 1165 int 1166 phyint_inst_update_from_k(struct phyint_instance *pii) 1167 { 1168 struct lifreq lifr; 1169 int ifsock; 1170 struct phyint *pi; 1171 1172 pi = pii->pii_phyint; 1173 1174 if (debug & D_PHYINT) { 1175 logdebug("phyint_inst_update_from_k(%s %s)\n", 1176 AF_STR(pii->pii_af), pi->pi_name); 1177 } 1178 1179 /* 1180 * Get the ifindex from the kernel, for comparison with the 1181 * value in our tables. 1182 */ 1183 (void) strncpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name)); 1184 lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0'; 1185 1186 ifsock = (pii->pii_af == AF_INET) ? ifsock_v4 : ifsock_v6; 1187 if (ioctl(ifsock, SIOCGLIFINDEX, &lifr) < 0) { 1188 if (errno == ENXIO) { 1189 return (PI_DELETED); 1190 } else { 1191 logperror_pii(pii, "phyint_inst_update_from_k:" 1192 " ioctl (get lifindex)"); 1193 return (PI_IOCTL_ERROR); 1194 } 1195 } 1196 1197 if (lifr.lifr_index != pi->pi_ifindex) { 1198 /* 1199 * The index has changed. Most likely the interface has 1200 * been unplumbed and replumbed. Ask the caller to take 1201 * appropriate action. 1202 */ 1203 if (debug & D_PHYINT) { 1204 logdebug("phyint_inst_update_from_k:" 1205 " old index %d new index %d\n", 1206 pi->pi_ifindex, lifr.lifr_index); 1207 } 1208 return (PI_IFINDEX_CHANGED); 1209 } 1210 1211 /* 1212 * Get the group name from the kernel, for comparison with 1213 * the value in our tables. 1214 */ 1215 if (ioctl(ifsock, SIOCGLIFGROUPNAME, &lifr) < 0) { 1216 if (errno == ENXIO) { 1217 return (PI_DELETED); 1218 } else { 1219 logperror_pii(pii, "phyint_inst_update_from_k:" 1220 " ioctl (get groupname)"); 1221 return (PI_IOCTL_ERROR); 1222 } 1223 } 1224 1225 /* 1226 * If the phyint has changed group i.e. if the phyint group name 1227 * returned by the kernel is different, ask the caller to delete 1228 * and recreate the phyint in the right group 1229 */ 1230 if (strcmp(lifr.lifr_groupname, pi->pi_group->pg_name) != 0) { 1231 /* Groupname has changed */ 1232 if (debug & D_PHYINT) { 1233 logdebug("phyint_inst_update_from_k:" 1234 " groupname change\n"); 1235 } 1236 return (PI_GROUP_CHANGED); 1237 } 1238 1239 /* 1240 * Get the current phyint flags from the kernel, and determine what 1241 * flags have changed by comparing against our tables. Note that the 1242 * IFF_INACTIVE processing in initifs() relies on this call to ensure 1243 * that IFF_INACTIVE is really still set on the interface. 1244 */ 1245 if (ioctl(ifsock, SIOCGLIFFLAGS, &lifr) < 0) { 1246 if (errno == ENXIO) { 1247 return (PI_DELETED); 1248 } else { 1249 logperror_pii(pii, "phyint_inst_update_from_k: " 1250 " ioctl (get flags)"); 1251 return (PI_IOCTL_ERROR); 1252 } 1253 } 1254 1255 pi->pi_flags = PHYINT_FLAGS(lifr.lifr_flags); 1256 if (pi->pi_v4 != NULL) 1257 pi->pi_v4->pii_flags = pi->pi_flags; 1258 if (pi->pi_v6 != NULL) 1259 pi->pi_v6->pii_flags = pi->pi_flags; 1260 1261 /* 1262 * Make sure the IFF_FAILED flag is set if and only if we think 1263 * the interface should be failed. 1264 */ 1265 if (pi->pi_flags & IFF_FAILED) { 1266 if (pi->pi_state == PI_RUNNING) 1267 (void) change_pif_flags(pi, 0, IFF_FAILED); 1268 } else { 1269 if (pi->pi_state == PI_FAILED) 1270 (void) change_pif_flags(pi, IFF_FAILED, IFF_INACTIVE); 1271 } 1272 1273 /* No change in phyint status */ 1274 return (PI_OK); 1275 } 1276 1277 /* 1278 * Delete the phyint. Remove it from the list of all phyints, and the 1279 * list of phyint group members. 1280 */ 1281 static void 1282 phyint_delete(struct phyint *pi) 1283 { 1284 struct phyint *pi2; 1285 struct phyint_group *pg = pi->pi_group; 1286 1287 if (debug & D_PHYINT) 1288 logdebug("phyint_delete(%s)\n", pi->pi_name); 1289 1290 /* Both IPv4 and IPv6 phyint instances must have been deleted. */ 1291 assert(pi->pi_v4 == NULL && pi->pi_v6 == NULL); 1292 1293 /* 1294 * The phyint must belong to a group. 1295 */ 1296 assert(pg->pg_phyint == pi || pi->pi_pgprev != NULL); 1297 1298 /* The phyint must be in the list of all phyints */ 1299 assert(phyints == pi || pi->pi_prev != NULL); 1300 1301 /* Remove the phyint from the phyint group list */ 1302 pg->pg_sig++; 1303 (void) phyint_group_member_event(pg, pi, IPMP_IF_REMOVE); 1304 1305 if (pi->pi_pgprev == NULL) { 1306 /* Phyint is the 1st in the phyint group list */ 1307 pg->pg_phyint = pi->pi_pgnext; 1308 } else { 1309 pi->pi_pgprev->pi_pgnext = pi->pi_pgnext; 1310 } 1311 if (pi->pi_pgnext != NULL) 1312 pi->pi_pgnext->pi_pgprev = pi->pi_pgprev; 1313 pi->pi_pgnext = NULL; 1314 pi->pi_pgprev = NULL; 1315 1316 /* Refresh the group state now that this phyint has been removed */ 1317 phyint_group_refresh_state(pg); 1318 1319 /* Remove the phyint from the global list of phyints */ 1320 if (pi->pi_prev == NULL) { 1321 /* Phyint is the 1st in the list */ 1322 phyints = pi->pi_next; 1323 } else { 1324 pi->pi_prev->pi_next = pi->pi_next; 1325 } 1326 if (pi->pi_next != NULL) 1327 pi->pi_next->pi_prev = pi->pi_prev; 1328 pi->pi_next = NULL; 1329 pi->pi_prev = NULL; 1330 1331 /* 1332 * See if another phyint in the group had been offlined because 1333 * it was a dup of `pi' -- and if so, online it. 1334 */ 1335 if (!pi->pi_hwaddrdup && 1336 (pi2 = phyint_lookup_hwaddr(pi, _B_FALSE)) != NULL) { 1337 assert(pi2->pi_hwaddrdup); 1338 (void) phyint_undo_offline(pi2); 1339 } 1340 phyint_link_close(pi); 1341 free(pi); 1342 } 1343 1344 /* 1345 * Offline phyint `pi' if at least `minred' usable interfaces remain in the 1346 * group. Returns an IPMP error code. 1347 */ 1348 int 1349 phyint_offline(struct phyint *pi, uint_t minred) 1350 { 1351 boolean_t was_active; 1352 unsigned int nusable = 0; 1353 struct phyint *pi2; 1354 struct phyint_group *pg = pi->pi_group; 1355 1356 /* 1357 * Verify that enough usable interfaces in the group would remain. 1358 * As a special case, if the group has failed, allow any non-offline 1359 * phyints to be offlined. 1360 */ 1361 if (pg != phyint_anongroup) { 1362 for (pi2 = pg->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) { 1363 if (pi2 == pi) 1364 continue; 1365 if (phyint_is_usable(pi2) || 1366 (GROUP_FAILED(pg) && pi2->pi_state != PI_OFFLINE)) 1367 nusable++; 1368 } 1369 } 1370 if (nusable < minred) 1371 return (IPMP_EMINRED); 1372 1373 was_active = ((pi->pi_flags & IFF_INACTIVE) == 0); 1374 1375 if (!change_pif_flags(pi, IFF_OFFLINE, IFF_INACTIVE)) 1376 return (IPMP_FAILURE); 1377 1378 /* 1379 * The interface is now offline, so stop probing it. Note that 1380 * if_mpadm(1M) will down the test addresses, after receiving a 1381 * success reply from us. The routing socket message will then make us 1382 * close the socket used for sending probes. But it is more logical 1383 * that an offlined interface must not be probed, even if it has test 1384 * addresses. 1385 * 1386 * NOTE: stop_probing() also sets PI_OFFLINE. 1387 */ 1388 stop_probing(pi); 1389 1390 /* 1391 * If we're offlining the phyint because it has a duplicate hardware 1392 * address, print a warning -- and leave the link open so that we can 1393 * be notified of hardware address changes that make it usable again. 1394 * Otherwise, close the link so that we won't prevent a detach. 1395 */ 1396 if (pi->pi_hwaddrdup) { 1397 logerr("IP interface %s has a hardware address which is not " 1398 "unique in group %s; offlining\n", pi->pi_name, 1399 pg->pg_name); 1400 } else { 1401 phyint_link_close(pi); 1402 } 1403 1404 /* 1405 * If this phyint was preventing another phyint with a duplicate 1406 * hardware address from being online, bring that one online now. 1407 */ 1408 if (!pi->pi_hwaddrdup && 1409 (pi2 = phyint_lookup_hwaddr(pi, _B_FALSE)) != NULL) { 1410 assert(pi2->pi_hwaddrdup); 1411 (void) phyint_undo_offline(pi2); 1412 } 1413 1414 /* 1415 * If this interface was active, try to activate another INACTIVE 1416 * interface in the group. 1417 */ 1418 if (was_active) 1419 phyint_activate_another(pi); 1420 1421 return (IPMP_SUCCESS); 1422 } 1423 1424 /* 1425 * Undo a previous offline of `pi'. Returns an IPMP error code. 1426 */ 1427 int 1428 phyint_undo_offline(struct phyint *pi) 1429 { 1430 if (pi->pi_state != PI_OFFLINE) { 1431 errno = EINVAL; 1432 return (IPMP_FAILURE); 1433 } 1434 1435 /* 1436 * If necessary, reinitialize our link information and verify that its 1437 * hardware address is still unique across the group. 1438 */ 1439 if (pi->pi_dh == NULL && !phyint_link_init(pi)) { 1440 errno = EIO; 1441 return (IPMP_FAILURE); 1442 } 1443 1444 if (phyint_lookup_hwaddr(pi, _B_TRUE) != NULL) { 1445 pi->pi_hwaddrdup = _B_TRUE; 1446 return (IPMP_EHWADDRDUP); 1447 } 1448 1449 if (pi->pi_hwaddrdup) { 1450 logerr("IP interface %s now has a unique hardware address in " 1451 "group %s; onlining\n", pi->pi_name, pi->pi_group->pg_name); 1452 pi->pi_hwaddrdup = _B_FALSE; 1453 } 1454 1455 if (!change_pif_flags(pi, 0, IFF_OFFLINE)) 1456 return (IPMP_FAILURE); 1457 1458 /* 1459 * While the interface was offline, it may have failed (e.g. the link 1460 * may have gone down). phyint_inst_check_for_failure() will have 1461 * already set pi_flags with IFF_FAILED, so we can use that to decide 1462 * whether the phyint should transition to running. Note that after 1463 * we transition to running, we will start sending probes again (if 1464 * test addresses are configured), which may also reveal that the 1465 * interface is in fact failed. 1466 */ 1467 if (pi->pi_flags & IFF_FAILED) { 1468 phyint_chstate(pi, PI_FAILED); 1469 } else { 1470 /* calls phyint_chstate() */ 1471 phyint_transition_to_running(pi); 1472 } 1473 1474 /* 1475 * Give the requestor time to configure test addresses before 1476 * complaining that they're missing. 1477 */ 1478 pi->pi_taddrthresh = getcurrentsec() + TESTADDR_CONF_TIME; 1479 1480 return (IPMP_SUCCESS); 1481 } 1482 1483 /* 1484 * Delete (unlink and free), the phyint instance. 1485 */ 1486 void 1487 phyint_inst_delete(struct phyint_instance *pii) 1488 { 1489 struct phyint *pi = pii->pii_phyint; 1490 1491 assert(pi != NULL); 1492 1493 if (debug & D_PHYINT) { 1494 logdebug("phyint_inst_delete(%s %s)\n", 1495 AF_STR(pii->pii_af), pi->pi_name); 1496 } 1497 1498 /* 1499 * If the phyint instance has associated probe targets 1500 * delete all the targets 1501 */ 1502 while (pii->pii_targets != NULL) 1503 target_delete(pii->pii_targets); 1504 1505 /* 1506 * Delete all the logints associated with this phyint 1507 * instance. 1508 */ 1509 while (pii->pii_logint != NULL) 1510 logint_delete(pii->pii_logint); 1511 1512 /* 1513 * Close the socket used to send probes to targets from this phyint. 1514 */ 1515 if (pii->pii_probe_sock != -1) 1516 close_probe_socket(pii, _B_TRUE); 1517 1518 /* 1519 * Phyint instance must be in the list of all phyint instances. 1520 * Remove phyint instance from the global list of phyint instances. 1521 */ 1522 assert(phyint_instances == pii || pii->pii_prev != NULL); 1523 if (pii->pii_prev == NULL) { 1524 /* Phyint is the 1st in the list */ 1525 phyint_instances = pii->pii_next; 1526 } else { 1527 pii->pii_prev->pii_next = pii->pii_next; 1528 } 1529 if (pii->pii_next != NULL) 1530 pii->pii_next->pii_prev = pii->pii_prev; 1531 pii->pii_next = NULL; 1532 pii->pii_prev = NULL; 1533 1534 /* 1535 * Reset the phyint instance pointer in the phyint. 1536 * If this is the last phyint instance (being deleted) on this 1537 * phyint, then delete the phyint. 1538 */ 1539 if (pii->pii_af == AF_INET) 1540 pi->pi_v4 = NULL; 1541 else 1542 pi->pi_v6 = NULL; 1543 1544 if (pi->pi_v4 == NULL && pi->pi_v6 == NULL) 1545 phyint_delete(pi); 1546 1547 free(pii); 1548 } 1549 1550 static void 1551 phyint_inst_print(struct phyint_instance *pii) 1552 { 1553 struct logint *li; 1554 struct target *tg; 1555 char abuf[INET6_ADDRSTRLEN]; 1556 int most_recent; 1557 int i; 1558 1559 if (pii->pii_phyint == NULL) { 1560 logdebug("pii->pi_phyint NULL can't print\n"); 1561 return; 1562 } 1563 1564 logdebug("\nPhyint instance: %s %s index %u state %x flags %llx " 1565 "sock %x in_use %d\n", 1566 AF_STR(pii->pii_af), pii->pii_name, pii->pii_ifindex, 1567 pii->pii_state, pii->pii_phyint->pi_flags, pii->pii_probe_sock, 1568 pii->pii_in_use); 1569 1570 for (li = pii->pii_logint; li != NULL; li = li->li_next) 1571 logint_print(li); 1572 1573 logdebug("\n"); 1574 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) 1575 target_print(tg); 1576 1577 if (pii->pii_targets == NULL) 1578 logdebug("pi_targets NULL\n"); 1579 1580 if (pii->pii_target_next != NULL) { 1581 logdebug("pi_target_next %s %s\n", AF_STR(pii->pii_af), 1582 pr_addr(pii->pii_af, pii->pii_target_next->tg_address, 1583 abuf, sizeof (abuf))); 1584 } else { 1585 logdebug("pi_target_next NULL\n"); 1586 } 1587 1588 if (pii->pii_rtt_target_next != NULL) { 1589 logdebug("pi_rtt_target_next %s %s\n", AF_STR(pii->pii_af), 1590 pr_addr(pii->pii_af, pii->pii_rtt_target_next->tg_address, 1591 abuf, sizeof (abuf))); 1592 } else { 1593 logdebug("pi_rtt_target_next NULL\n"); 1594 } 1595 1596 if (pii->pii_targets != NULL) { 1597 most_recent = PROBE_INDEX_PREV(pii->pii_probe_next); 1598 1599 i = most_recent; 1600 do { 1601 if (pii->pii_probes[i].pr_target != NULL) { 1602 logdebug("#%d target %s ", i, 1603 pr_addr(pii->pii_af, 1604 pii->pii_probes[i].pr_target->tg_address, 1605 abuf, sizeof (abuf))); 1606 } else { 1607 logdebug("#%d target NULL ", i); 1608 } 1609 logdebug("time_start %lld status %d " 1610 "time_ackproc %lld time_lost %u", 1611 pii->pii_probes[i].pr_hrtime_start, 1612 pii->pii_probes[i].pr_status, 1613 pii->pii_probes[i].pr_hrtime_ackproc, 1614 pii->pii_probes[i].pr_time_lost); 1615 i = PROBE_INDEX_PREV(i); 1616 } while (i != most_recent); 1617 } 1618 } 1619 1620 /* 1621 * Lookup a logint based on the logical interface name, on the given 1622 * phyint instance. 1623 */ 1624 static struct logint * 1625 logint_lookup(struct phyint_instance *pii, char *name) 1626 { 1627 struct logint *li; 1628 1629 if (debug & D_LOGINT) { 1630 logdebug("logint_lookup(%s, %s)\n", 1631 AF_STR(pii->pii_af), name); 1632 } 1633 1634 for (li = pii->pii_logint; li != NULL; li = li->li_next) { 1635 if (strncmp(name, li->li_name, sizeof (li->li_name)) == 0) 1636 break; 1637 } 1638 return (li); 1639 } 1640 1641 /* 1642 * Insert a logint at the head of the list of logints of the given 1643 * phyint instance 1644 */ 1645 static void 1646 logint_insert(struct phyint_instance *pii, struct logint *li) 1647 { 1648 li->li_next = pii->pii_logint; 1649 li->li_prev = NULL; 1650 if (pii->pii_logint != NULL) 1651 pii->pii_logint->li_prev = li; 1652 pii->pii_logint = li; 1653 li->li_phyint_inst = pii; 1654 } 1655 1656 /* 1657 * Create a new named logint, on the specified phyint instance. 1658 */ 1659 static struct logint * 1660 logint_create(struct phyint_instance *pii, char *name) 1661 { 1662 struct logint *li; 1663 1664 if (debug & D_LOGINT) { 1665 logdebug("logint_create(%s %s %s)\n", 1666 AF_STR(pii->pii_af), pii->pii_name, name); 1667 } 1668 1669 li = calloc(1, sizeof (struct logint)); 1670 if (li == NULL) { 1671 logperror("logint_create: calloc"); 1672 return (NULL); 1673 } 1674 1675 (void) strncpy(li->li_name, name, sizeof (li->li_name)); 1676 li->li_name[sizeof (li->li_name) - 1] = '\0'; 1677 logint_insert(pii, li); 1678 return (li); 1679 } 1680 1681 /* 1682 * Initialize the logint based on the data returned by the kernel. 1683 */ 1684 void 1685 logint_init_from_k(struct phyint_instance *pii, char *li_name) 1686 { 1687 int ifsock; 1688 uint64_t flags; 1689 uint64_t saved_flags; 1690 struct logint *li; 1691 struct lifreq lifr; 1692 struct in6_addr test_subnet; 1693 struct in6_addr testaddr; 1694 int test_subnet_len; 1695 struct sockaddr_in6 *sin6; 1696 struct sockaddr_in *sin; 1697 char abuf[INET6_ADDRSTRLEN]; 1698 boolean_t ptp = _B_FALSE; 1699 struct in6_addr tgaddr; 1700 1701 if (debug & D_LOGINT) { 1702 logdebug("logint_init_from_k(%s %s)\n", 1703 AF_STR(pii->pii_af), li_name); 1704 } 1705 1706 /* Get the socket for doing ioctls */ 1707 ifsock = (pii->pii_af == AF_INET) ? ifsock_v4 : ifsock_v6; 1708 1709 /* 1710 * Get the flags from the kernel. Also serves as a check whether 1711 * the logical still exists. If it doesn't exist, no need to proceed 1712 * any further. li_in_use will make the caller clean up the logint 1713 */ 1714 (void) strncpy(lifr.lifr_name, li_name, sizeof (lifr.lifr_name)); 1715 lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0'; 1716 if (ioctl(ifsock, SIOCGLIFFLAGS, (char *)&lifr) < 0) { 1717 /* Interface may have vanished */ 1718 if (errno != ENXIO) { 1719 logperror_pii(pii, "logint_init_from_k: " 1720 "ioctl (get flags)"); 1721 } 1722 return; 1723 } 1724 1725 flags = lifr.lifr_flags; 1726 1727 /* 1728 * Verified the logint exists. Now lookup the logint in our tables. 1729 * If it does not exist, create a new logint. 1730 */ 1731 li = logint_lookup(pii, li_name); 1732 if (li == NULL) { 1733 li = logint_create(pii, li_name); 1734 if (li == NULL) { 1735 /* 1736 * Pretend the interface does not exist 1737 * in the kernel 1738 */ 1739 return; 1740 } 1741 } 1742 1743 /* 1744 * Update li->li_flags with the new flags, after saving the old 1745 * value. This is used later to check what flags has changed and 1746 * take any action 1747 */ 1748 saved_flags = li->li_flags; 1749 li->li_flags = flags; 1750 1751 /* 1752 * Get the address, prefix, prefixlength and update the logint. 1753 * Check if anything has changed. If the logint used for the 1754 * test address has changed, take suitable action. 1755 */ 1756 if (ioctl(ifsock, SIOCGLIFADDR, (char *)&lifr) < 0) { 1757 /* Interface may have vanished */ 1758 if (errno != ENXIO) { 1759 logperror_li(li, "logint_init_from_k: (get addr)"); 1760 } 1761 goto error; 1762 } 1763 1764 if (pii->pii_af == AF_INET) { 1765 sin = (struct sockaddr_in *)&lifr.lifr_addr; 1766 IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &testaddr); 1767 } else { 1768 sin6 = (struct sockaddr_in6 *)&lifr.lifr_addr; 1769 testaddr = sin6->sin6_addr; 1770 } 1771 1772 if (ioctl(ifsock, SIOCGLIFSUBNET, (char *)&lifr) < 0) { 1773 /* Interface may have vanished */ 1774 if (errno != ENXIO) 1775 logperror_li(li, "logint_init_from_k: (get subnet)"); 1776 goto error; 1777 } 1778 if (lifr.lifr_subnet.ss_family == AF_INET6) { 1779 sin6 = (struct sockaddr_in6 *)&lifr.lifr_subnet; 1780 test_subnet = sin6->sin6_addr; 1781 test_subnet_len = lifr.lifr_addrlen; 1782 } else { 1783 sin = (struct sockaddr_in *)&lifr.lifr_subnet; 1784 IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &test_subnet); 1785 test_subnet_len = lifr.lifr_addrlen + (IPV6_ABITS - IP_ABITS); 1786 } 1787 1788 /* 1789 * If this is the logint corresponding to the test address used for 1790 * sending probes, then if anything significant has changed we need to 1791 * determine the test address again. We ignore changes to the 1792 * IFF_FAILED and IFF_RUNNING flags since those happen as a matter of 1793 * course. 1794 */ 1795 if (pii->pii_probe_logint == li) { 1796 if (((li->li_flags ^ saved_flags) & 1797 ~(IFF_FAILED | IFF_RUNNING)) != 0 || 1798 !IN6_ARE_ADDR_EQUAL(&testaddr, &li->li_addr) || 1799 (!ptp && !IN6_ARE_ADDR_EQUAL(&test_subnet, 1800 &li->li_subnet)) || 1801 (!ptp && test_subnet_len != li->li_subnet_len) || 1802 (ptp && !IN6_ARE_ADDR_EQUAL(&tgaddr, &li->li_dstaddr))) { 1803 /* 1804 * Something significant that affects the testaddress 1805 * has changed. Redo the testaddress selection later on 1806 * in select_test_ifs(). For now do the cleanup and 1807 * set pii_probe_logint to NULL. 1808 */ 1809 if (pii->pii_probe_sock != -1) 1810 close_probe_socket(pii, _B_TRUE); 1811 pii->pii_probe_logint = NULL; 1812 } 1813 } 1814 1815 1816 /* Update the logint with the values obtained from the kernel. */ 1817 li->li_addr = testaddr; 1818 li->li_in_use = 1; 1819 if (ptp) { 1820 li->li_dstaddr = tgaddr; 1821 li->li_subnet_len = (pii->pii_af == AF_INET) ? 1822 IP_ABITS : IPV6_ABITS; 1823 } else { 1824 li->li_subnet = test_subnet; 1825 li->li_subnet_len = test_subnet_len; 1826 } 1827 1828 if (debug & D_LOGINT) 1829 logint_print(li); 1830 1831 return; 1832 1833 error: 1834 logerr("logint_init_from_k: IGNORED %s %s %s addr %s\n", 1835 AF_STR(pii->pii_af), pii->pii_name, li->li_name, 1836 pr_addr(pii->pii_af, testaddr, abuf, sizeof (abuf))); 1837 logint_delete(li); 1838 } 1839 1840 /* 1841 * Delete (unlink and free) a logint. 1842 */ 1843 void 1844 logint_delete(struct logint *li) 1845 { 1846 struct phyint_instance *pii; 1847 1848 pii = li->li_phyint_inst; 1849 assert(pii != NULL); 1850 1851 if (debug & D_LOGINT) { 1852 int af; 1853 char abuf[INET6_ADDRSTRLEN]; 1854 1855 af = pii->pii_af; 1856 logdebug("logint_delete(%s %s %s/%u)\n", 1857 AF_STR(af), li->li_name, 1858 pr_addr(af, li->li_addr, abuf, sizeof (abuf)), 1859 li->li_subnet_len); 1860 } 1861 1862 /* logint must be in the list of logints */ 1863 assert(pii->pii_logint == li || li->li_prev != NULL); 1864 1865 /* Remove the logint from the list of logints */ 1866 if (li->li_prev == NULL) { 1867 /* logint is the 1st in the list */ 1868 pii->pii_logint = li->li_next; 1869 } else { 1870 li->li_prev->li_next = li->li_next; 1871 } 1872 if (li->li_next != NULL) 1873 li->li_next->li_prev = li->li_prev; 1874 li->li_next = NULL; 1875 li->li_prev = NULL; 1876 1877 /* 1878 * If this logint is also being used for probing, then close the 1879 * associated socket, if it exists. 1880 */ 1881 if (pii->pii_probe_logint == li) { 1882 if (pii->pii_probe_sock != -1) 1883 close_probe_socket(pii, _B_TRUE); 1884 pii->pii_probe_logint = NULL; 1885 } 1886 1887 free(li); 1888 } 1889 1890 static void 1891 logint_print(struct logint *li) 1892 { 1893 char abuf[INET6_ADDRSTRLEN]; 1894 int af = li->li_phyint_inst->pii_af; 1895 1896 logdebug("logint: %s %s addr %s/%u", AF_STR(af), li->li_name, 1897 pr_addr(af, li->li_addr, abuf, sizeof (abuf)), li->li_subnet_len); 1898 1899 logdebug("\tFlags: %llx in_use %d\n", li->li_flags, li->li_in_use); 1900 } 1901 1902 char * 1903 pr_addr(int af, struct in6_addr addr, char *abuf, int len) 1904 { 1905 struct in_addr addr_v4; 1906 1907 if (af == AF_INET) { 1908 IN6_V4MAPPED_TO_INADDR(&addr, &addr_v4); 1909 (void) inet_ntop(AF_INET, (void *)&addr_v4, abuf, len); 1910 } else { 1911 (void) inet_ntop(AF_INET6, (void *)&addr, abuf, len); 1912 } 1913 return (abuf); 1914 } 1915 1916 /* 1917 * Fill in the sockaddr_storage pointed to by `ssp' with the IP address 1918 * represented by the [`af',`addr'] pair. Needed because in.mpathd internally 1919 * stores all addresses as in6_addrs, but we don't want to expose that. 1920 */ 1921 void 1922 addr2storage(int af, const struct in6_addr *addr, struct sockaddr_storage *ssp) 1923 { 1924 struct sockaddr_in *sinp = (struct sockaddr_in *)ssp; 1925 struct sockaddr_in6 *sin6p = (struct sockaddr_in6 *)ssp; 1926 1927 assert(af == AF_INET || af == AF_INET6); 1928 1929 switch (af) { 1930 case AF_INET: 1931 (void) memset(sinp, 0, sizeof (*sinp)); 1932 sinp->sin_family = AF_INET; 1933 IN6_V4MAPPED_TO_INADDR(addr, &sinp->sin_addr); 1934 break; 1935 case AF_INET6: 1936 (void) memset(sin6p, 0, sizeof (*sin6p)); 1937 sin6p->sin6_family = AF_INET6; 1938 sin6p->sin6_addr = *addr; 1939 break; 1940 } 1941 } 1942 1943 /* Lookup target on its address */ 1944 struct target * 1945 target_lookup(struct phyint_instance *pii, struct in6_addr addr) 1946 { 1947 struct target *tg; 1948 1949 if (debug & D_TARGET) { 1950 char abuf[INET6_ADDRSTRLEN]; 1951 1952 logdebug("target_lookup(%s %s): addr %s\n", 1953 AF_STR(pii->pii_af), pii->pii_name, 1954 pr_addr(pii->pii_af, addr, abuf, sizeof (abuf))); 1955 } 1956 1957 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) { 1958 if (IN6_ARE_ADDR_EQUAL(&tg->tg_address, &addr)) 1959 break; 1960 } 1961 return (tg); 1962 } 1963 1964 /* 1965 * Find and return the next active target, for the next probe. 1966 * If no active targets are available, return NULL. 1967 */ 1968 struct target * 1969 target_next(struct target *tg) 1970 { 1971 struct phyint_instance *pii = tg->tg_phyint_inst; 1972 struct target *marker = tg; 1973 hrtime_t now; 1974 1975 now = gethrtime(); 1976 1977 /* 1978 * Target must be in the list of targets for this phyint 1979 * instance. 1980 */ 1981 assert(pii->pii_targets == tg || tg->tg_prev != NULL); 1982 assert(pii->pii_targets != NULL); 1983 1984 /* Return the next active target */ 1985 do { 1986 /* 1987 * Go to the next target. If we hit the end, 1988 * reset the ptr to the head 1989 */ 1990 tg = tg->tg_next; 1991 if (tg == NULL) 1992 tg = pii->pii_targets; 1993 1994 assert(TG_STATUS_VALID(tg->tg_status)); 1995 1996 switch (tg->tg_status) { 1997 case TG_ACTIVE: 1998 return (tg); 1999 2000 case TG_UNUSED: 2001 assert(pii->pii_targets_are_routers); 2002 if (pii->pii_ntargets < MAX_PROBE_TARGETS) { 2003 /* 2004 * Bubble up the unused target to active 2005 */ 2006 tg->tg_status = TG_ACTIVE; 2007 pii->pii_ntargets++; 2008 return (tg); 2009 } 2010 break; 2011 2012 case TG_SLOW: 2013 assert(pii->pii_targets_are_routers); 2014 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2015 /* 2016 * Bubble up the slow target to unused 2017 */ 2018 tg->tg_status = TG_UNUSED; 2019 } 2020 break; 2021 2022 case TG_DEAD: 2023 assert(pii->pii_targets_are_routers); 2024 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2025 /* 2026 * Bubble up the dead target to slow 2027 */ 2028 tg->tg_status = TG_SLOW; 2029 tg->tg_latime = now; 2030 } 2031 break; 2032 } 2033 2034 } while (tg != marker); 2035 2036 return (NULL); 2037 } 2038 2039 /* 2040 * Select the best available target, that is not already TG_ACTIVE, 2041 * for the caller. The caller will determine whether it wants to 2042 * make the returned target TG_ACTIVE. 2043 * The selection order is as follows. 2044 * 1. pick a TG_UNSED target, if it exists. 2045 * 2. else pick a TG_SLOW target that has recovered, if it exists 2046 * 3. else pick any TG_SLOW target, if it exists 2047 * 4. else pick a TG_DEAD target that has recovered, if it exists 2048 * 5. else pick any TG_DEAD target, if it exists 2049 * 6. else return null 2050 */ 2051 static struct target * 2052 target_select_best(struct phyint_instance *pii) 2053 { 2054 struct target *tg; 2055 struct target *slow = NULL; 2056 struct target *dead = NULL; 2057 struct target *slow_recovered = NULL; 2058 struct target *dead_recovered = NULL; 2059 hrtime_t now; 2060 2061 now = gethrtime(); 2062 2063 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) { 2064 assert(TG_STATUS_VALID(tg->tg_status)); 2065 2066 switch (tg->tg_status) { 2067 case TG_UNUSED: 2068 return (tg); 2069 2070 case TG_SLOW: 2071 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2072 slow_recovered = tg; 2073 /* 2074 * Promote the slow_recovered to unused 2075 */ 2076 tg->tg_status = TG_UNUSED; 2077 } else { 2078 slow = tg; 2079 } 2080 break; 2081 2082 case TG_DEAD: 2083 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2084 dead_recovered = tg; 2085 /* 2086 * Promote the dead_recovered to slow 2087 */ 2088 tg->tg_status = TG_SLOW; 2089 tg->tg_latime = now; 2090 } else { 2091 dead = tg; 2092 } 2093 break; 2094 2095 default: 2096 break; 2097 } 2098 } 2099 2100 if (slow_recovered != NULL) 2101 return (slow_recovered); 2102 else if (slow != NULL) 2103 return (slow); 2104 else if (dead_recovered != NULL) 2105 return (dead_recovered); 2106 else 2107 return (dead); 2108 } 2109 2110 /* 2111 * Some target was deleted. If we don't have even MIN_PROBE_TARGETS 2112 * that are active, pick the next best below. 2113 */ 2114 static void 2115 target_activate_all(struct phyint_instance *pii) 2116 { 2117 struct target *tg; 2118 2119 assert(pii->pii_ntargets == 0); 2120 assert(pii->pii_target_next == NULL); 2121 assert(pii->pii_rtt_target_next == NULL); 2122 assert(pii->pii_targets_are_routers); 2123 2124 while (pii->pii_ntargets < MIN_PROBE_TARGETS) { 2125 tg = target_select_best(pii); 2126 if (tg == NULL) { 2127 /* We are out of targets */ 2128 return; 2129 } 2130 2131 assert(TG_STATUS_VALID(tg->tg_status)); 2132 assert(tg->tg_status != TG_ACTIVE); 2133 tg->tg_status = TG_ACTIVE; 2134 pii->pii_ntargets++; 2135 if (pii->pii_target_next == NULL) { 2136 pii->pii_target_next = tg; 2137 pii->pii_rtt_target_next = tg; 2138 } 2139 } 2140 } 2141 2142 static struct target * 2143 target_first(struct phyint_instance *pii) 2144 { 2145 struct target *tg; 2146 2147 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) { 2148 assert(TG_STATUS_VALID(tg->tg_status)); 2149 if (tg->tg_status == TG_ACTIVE) 2150 break; 2151 } 2152 2153 return (tg); 2154 } 2155 2156 /* 2157 * Create a default target entry. 2158 */ 2159 void 2160 target_create(struct phyint_instance *pii, struct in6_addr addr, 2161 boolean_t is_router) 2162 { 2163 struct target *tg; 2164 struct phyint *pi; 2165 struct logint *li; 2166 2167 if (debug & D_TARGET) { 2168 char abuf[INET6_ADDRSTRLEN]; 2169 2170 logdebug("target_create(%s %s, %s)\n", 2171 AF_STR(pii->pii_af), pii->pii_name, 2172 pr_addr(pii->pii_af, addr, abuf, sizeof (abuf))); 2173 } 2174 2175 /* 2176 * If the test address is not yet initialized, do not add 2177 * any target, since we cannot determine whether the target 2178 * belongs to the same subnet as the test address. 2179 */ 2180 li = pii->pii_probe_logint; 2181 if (li == NULL) 2182 return; 2183 2184 /* 2185 * If there are multiple subnets associated with an interface, then 2186 * add the target to this phyint instance only if it belongs to the 2187 * same subnet as the test address. This assures us that we will 2188 * be able to reach this target through our routing table. 2189 */ 2190 if (!prefix_equal(li->li_subnet, addr, li->li_subnet_len)) 2191 return; 2192 2193 if (pii->pii_targets != NULL) { 2194 assert(pii->pii_ntargets <= MAX_PROBE_TARGETS); 2195 if (is_router) { 2196 if (!pii->pii_targets_are_routers) { 2197 /* 2198 * Prefer router over hosts. Using hosts is a 2199 * fallback mechanism, hence delete all host 2200 * targets. 2201 */ 2202 while (pii->pii_targets != NULL) 2203 target_delete(pii->pii_targets); 2204 } 2205 } else { 2206 /* 2207 * Routers take precedence over hosts. If this 2208 * is a router list and we are trying to add a 2209 * host, just return. If this is a host list 2210 * and if we have sufficient targets, just return 2211 */ 2212 if (pii->pii_targets_are_routers || 2213 pii->pii_ntargets == MAX_PROBE_TARGETS) 2214 return; 2215 } 2216 } 2217 2218 tg = calloc(1, sizeof (struct target)); 2219 if (tg == NULL) { 2220 logperror("target_create: calloc"); 2221 return; 2222 } 2223 2224 tg->tg_phyint_inst = pii; 2225 tg->tg_address = addr; 2226 tg->tg_in_use = 1; 2227 tg->tg_rtt_sa = -1; 2228 tg->tg_num_deferred = 0; 2229 2230 /* 2231 * If this is the first target, set 'pii_targets_are_routers' 2232 * The list of targets is either a list of hosts or list or 2233 * routers, but not a mix. 2234 */ 2235 if (pii->pii_targets == NULL) { 2236 assert(pii->pii_ntargets == 0); 2237 assert(pii->pii_target_next == NULL); 2238 assert(pii->pii_rtt_target_next == NULL); 2239 pii->pii_targets_are_routers = is_router ? 1 : 0; 2240 } 2241 2242 if (pii->pii_ntargets == MAX_PROBE_TARGETS) { 2243 assert(pii->pii_targets_are_routers); 2244 assert(pii->pii_target_next != NULL); 2245 assert(pii->pii_rtt_target_next != NULL); 2246 tg->tg_status = TG_UNUSED; 2247 } else { 2248 if (pii->pii_ntargets == 0) { 2249 assert(pii->pii_target_next == NULL); 2250 pii->pii_target_next = tg; 2251 pii->pii_rtt_target_next = tg; 2252 } 2253 pii->pii_ntargets++; 2254 tg->tg_status = TG_ACTIVE; 2255 } 2256 2257 target_insert(pii, tg); 2258 2259 /* 2260 * Change state to PI_RUNNING if this phyint instance is capable of 2261 * sending and receiving probes -- that is, if we know of at least 1 2262 * target, and this phyint instance is probe-capable. For more 2263 * details, see the phyint state diagram in mpd_probe.c. 2264 */ 2265 pi = pii->pii_phyint; 2266 if (pi->pi_state == PI_NOTARGETS && PROBE_CAPABLE(pii)) { 2267 if (pi->pi_flags & IFF_FAILED) 2268 phyint_chstate(pi, PI_FAILED); 2269 else 2270 phyint_chstate(pi, PI_RUNNING); 2271 } 2272 } 2273 2274 /* 2275 * Add the target address named by `addr' to phyint instance `pii' if it does 2276 * not already exist. If the target is a router, `is_router' should be set to 2277 * B_TRUE. 2278 */ 2279 void 2280 target_add(struct phyint_instance *pii, struct in6_addr addr, 2281 boolean_t is_router) 2282 { 2283 struct target *tg; 2284 2285 if (pii == NULL) 2286 return; 2287 2288 tg = target_lookup(pii, addr); 2289 2290 /* 2291 * If the target does not exist, create it; target_create() will set 2292 * tg_in_use to true. Even if it exists already, if it's a router 2293 * target and we'd previously learned of it through multicast, then we 2294 * need to recreate it as a router target. Otherwise, just set 2295 * tg_in_use to to true so that init_router_targets() won't delete it. 2296 */ 2297 if (tg == NULL || (is_router && !pii->pii_targets_are_routers)) 2298 target_create(pii, addr, is_router); 2299 else if (is_router) 2300 tg->tg_in_use = 1; 2301 } 2302 2303 /* 2304 * Insert target at head of linked list of targets for the associated 2305 * phyint instance 2306 */ 2307 static void 2308 target_insert(struct phyint_instance *pii, struct target *tg) 2309 { 2310 tg->tg_next = pii->pii_targets; 2311 tg->tg_prev = NULL; 2312 if (tg->tg_next != NULL) 2313 tg->tg_next->tg_prev = tg; 2314 pii->pii_targets = tg; 2315 } 2316 2317 /* 2318 * Delete a target (unlink and free). 2319 */ 2320 void 2321 target_delete(struct target *tg) 2322 { 2323 int af; 2324 struct phyint_instance *pii; 2325 struct phyint_instance *pii_other; 2326 2327 pii = tg->tg_phyint_inst; 2328 af = pii->pii_af; 2329 2330 if (debug & D_TARGET) { 2331 char abuf[INET6_ADDRSTRLEN]; 2332 2333 logdebug("target_delete(%s %s, %s)\n", 2334 AF_STR(af), pii->pii_name, 2335 pr_addr(af, tg->tg_address, abuf, sizeof (abuf))); 2336 } 2337 2338 /* 2339 * Target must be in the list of targets for this phyint 2340 * instance. 2341 */ 2342 assert(pii->pii_targets == tg || tg->tg_prev != NULL); 2343 2344 /* 2345 * Reset all references to 'tg' in the probe information 2346 * for this phyint. 2347 */ 2348 reset_pii_probes(pii, tg); 2349 2350 /* 2351 * Remove this target from the list of targets of this 2352 * phyint instance. 2353 */ 2354 if (tg->tg_prev == NULL) { 2355 pii->pii_targets = tg->tg_next; 2356 } else { 2357 tg->tg_prev->tg_next = tg->tg_next; 2358 } 2359 2360 if (tg->tg_next != NULL) 2361 tg->tg_next->tg_prev = tg->tg_prev; 2362 2363 tg->tg_next = NULL; 2364 tg->tg_prev = NULL; 2365 2366 if (tg->tg_status == TG_ACTIVE) 2367 pii->pii_ntargets--; 2368 2369 /* 2370 * Adjust the next target to probe, if it points to 2371 * to the currently deleted target. 2372 */ 2373 if (pii->pii_target_next == tg) 2374 pii->pii_target_next = target_first(pii); 2375 2376 if (pii->pii_rtt_target_next == tg) 2377 pii->pii_rtt_target_next = target_first(pii); 2378 2379 free(tg); 2380 2381 /* 2382 * The number of active targets pii_ntargets == 0 iff 2383 * the next active target pii->pii_target_next == NULL 2384 */ 2385 if (pii->pii_ntargets != 0) { 2386 assert(pii->pii_target_next != NULL); 2387 assert(pii->pii_rtt_target_next != NULL); 2388 assert(pii->pii_target_next->tg_status == TG_ACTIVE); 2389 assert(pii->pii_rtt_target_next->tg_status == TG_ACTIVE); 2390 return; 2391 } 2392 2393 /* At this point, we don't have any active targets. */ 2394 assert(pii->pii_target_next == NULL); 2395 assert(pii->pii_rtt_target_next == NULL); 2396 2397 if (pii->pii_targets_are_routers) { 2398 /* 2399 * Activate any TG_SLOW or TG_DEAD router targets, 2400 * since we don't have any other targets 2401 */ 2402 target_activate_all(pii); 2403 2404 if (pii->pii_ntargets != 0) { 2405 assert(pii->pii_target_next != NULL); 2406 assert(pii->pii_rtt_target_next != NULL); 2407 assert(pii->pii_target_next->tg_status == TG_ACTIVE); 2408 assert(pii->pii_rtt_target_next->tg_status == 2409 TG_ACTIVE); 2410 return; 2411 } 2412 } 2413 2414 /* 2415 * If we still don't have any active targets, the list must 2416 * must be really empty. There aren't even TG_SLOW or TG_DEAD 2417 * targets. Zero out the probe stats since it will not be 2418 * relevant any longer. 2419 */ 2420 assert(pii->pii_targets == NULL); 2421 pii->pii_targets_are_routers = _B_FALSE; 2422 clear_pii_probe_stats(pii); 2423 pii_other = phyint_inst_other(pii); 2424 2425 /* 2426 * If there are no targets on both instances and the interface would 2427 * otherwise be considered PI_RUNNING, go back to PI_NOTARGETS state, 2428 * since we cannot probe this phyint any more. For more details, 2429 * please see phyint state diagram in mpd_probe.c. 2430 */ 2431 if (!PROBE_CAPABLE(pii_other) && LINK_UP(pii->pii_phyint) && 2432 pii->pii_phyint->pi_state != PI_OFFLINE) 2433 phyint_chstate(pii->pii_phyint, PI_NOTARGETS); 2434 } 2435 2436 /* 2437 * Flush the target list of every phyint in the group, if the list 2438 * is a host target list. This is called if group failure is suspected. 2439 * If all targets have failed, multicast will subsequently discover new 2440 * targets. Else it is a group failure. 2441 * Note: This function is a no-op if the list is a router target list. 2442 */ 2443 static void 2444 target_flush_hosts(struct phyint_group *pg) 2445 { 2446 struct phyint *pi; 2447 struct phyint_instance *pii; 2448 2449 if (debug & D_TARGET) 2450 logdebug("target_flush_hosts(%s)\n", pg->pg_name); 2451 2452 for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) { 2453 pii = pi->pi_v4; 2454 if (pii != NULL && !pii->pii_targets_are_routers) { 2455 /* 2456 * Delete all the targets. When the list becomes 2457 * empty, target_delete() will set pii->pii_targets 2458 * to NULL. 2459 */ 2460 while (pii->pii_targets != NULL) 2461 target_delete(pii->pii_targets); 2462 } 2463 pii = pi->pi_v6; 2464 if (pii != NULL && !pii->pii_targets_are_routers) { 2465 /* 2466 * Delete all the targets. When the list becomes 2467 * empty, target_delete() will set pii->pii_targets 2468 * to NULL. 2469 */ 2470 while (pii->pii_targets != NULL) 2471 target_delete(pii->pii_targets); 2472 } 2473 } 2474 } 2475 2476 /* 2477 * Reset all references to 'target' in the probe info, as this target is 2478 * being deleted. The pr_target field is guaranteed to be non-null if 2479 * pr_status is PR_UNACKED. So we change the pr_status to PR_LOST, so that 2480 * pr_target will not be accessed unconditionally. 2481 */ 2482 static void 2483 reset_pii_probes(struct phyint_instance *pii, struct target *tg) 2484 { 2485 int i; 2486 2487 for (i = 0; i < PROBE_STATS_COUNT; i++) { 2488 if (pii->pii_probes[i].pr_target == tg) { 2489 if (pii->pii_probes[i].pr_status == PR_UNACKED) { 2490 probe_chstate(&pii->pii_probes[i], pii, 2491 PR_LOST); 2492 } 2493 pii->pii_probes[i].pr_target = NULL; 2494 } 2495 } 2496 2497 } 2498 2499 /* 2500 * Clear the probe statistics array. 2501 */ 2502 void 2503 clear_pii_probe_stats(struct phyint_instance *pii) 2504 { 2505 bzero(pii->pii_probes, sizeof (struct probe_stats) * PROBE_STATS_COUNT); 2506 /* Reset the next probe index in the probe stats array */ 2507 pii->pii_probe_next = 0; 2508 } 2509 2510 static void 2511 target_print(struct target *tg) 2512 { 2513 char abuf[INET6_ADDRSTRLEN]; 2514 char buf[128]; 2515 char buf2[128]; 2516 int af; 2517 int i; 2518 2519 af = tg->tg_phyint_inst->pii_af; 2520 2521 logdebug("Target on %s %s addr %s\n" 2522 "status %d rtt_sa %lld rtt_sd %lld crtt %d tg_in_use %d\n", 2523 AF_STR(af), tg->tg_phyint_inst->pii_name, 2524 pr_addr(af, tg->tg_address, abuf, sizeof (abuf)), 2525 tg->tg_status, tg->tg_rtt_sa, tg->tg_rtt_sd, 2526 tg->tg_crtt, tg->tg_in_use); 2527 2528 buf[0] = '\0'; 2529 for (i = 0; i < tg->tg_num_deferred; i++) { 2530 (void) snprintf(buf2, sizeof (buf2), " %dms", 2531 tg->tg_deferred[i]); 2532 (void) strlcat(buf, buf2, sizeof (buf)); 2533 } 2534 logdebug("deferred rtts:%s\n", buf); 2535 } 2536 2537 void 2538 phyint_inst_print_all(void) 2539 { 2540 struct phyint_instance *pii; 2541 2542 for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) { 2543 phyint_inst_print(pii); 2544 } 2545 } 2546 2547 /* 2548 * Compare two prefixes that have the same prefix length. 2549 * Fails if the prefix length is unreasonable. 2550 */ 2551 boolean_t 2552 prefix_equal(struct in6_addr p1, struct in6_addr p2, uint_t prefix_len) 2553 { 2554 uchar_t mask; 2555 int j; 2556 2557 if (prefix_len > IPV6_ABITS) 2558 return (_B_FALSE); 2559 2560 for (j = 0; prefix_len > 8; prefix_len -= 8, j++) 2561 if (p1.s6_addr[j] != p2.s6_addr[j]) 2562 return (_B_FALSE); 2563 2564 /* Make the N leftmost bits one */ 2565 mask = 0xff << (8 - prefix_len); 2566 if ((p1.s6_addr[j] & mask) != (p2.s6_addr[j] & mask)) 2567 return (_B_FALSE); 2568 2569 return (_B_TRUE); 2570 } 2571 2572 /* 2573 * Get the number of UP logints on phyint `pi'. 2574 */ 2575 static int 2576 logint_upcount(struct phyint *pi) 2577 { 2578 struct logint *li; 2579 int count = 0; 2580 2581 if (pi->pi_v4 != NULL) { 2582 for (li = pi->pi_v4->pii_logint; li != NULL; li = li->li_next) { 2583 if (li->li_flags & IFF_UP) 2584 count++; 2585 } 2586 } 2587 2588 if (pi->pi_v6 != NULL) { 2589 for (li = pi->pi_v6->pii_logint; li != NULL; li = li->li_next) { 2590 if (li->li_flags & IFF_UP) 2591 count++; 2592 } 2593 } 2594 2595 return (count); 2596 } 2597 2598 /* 2599 * Get the phyint instance with the other (IPv4 / IPv6) protocol 2600 */ 2601 struct phyint_instance * 2602 phyint_inst_other(struct phyint_instance *pii) 2603 { 2604 if (pii->pii_af == AF_INET) 2605 return (pii->pii_phyint->pi_v6); 2606 else 2607 return (pii->pii_phyint->pi_v4); 2608 } 2609 2610 /* 2611 * Check whether a phyint is functioning. 2612 */ 2613 static boolean_t 2614 phyint_is_functioning(struct phyint *pi) 2615 { 2616 if (pi->pi_state == PI_RUNNING) 2617 return (_B_TRUE); 2618 return (pi->pi_state == PI_NOTARGETS && !(pi->pi_flags & IFF_FAILED)); 2619 } 2620 2621 /* 2622 * Check whether a phyint is usable. 2623 */ 2624 static boolean_t 2625 phyint_is_usable(struct phyint *pi) 2626 { 2627 if (logint_upcount(pi) == 0) 2628 return (_B_FALSE); 2629 return (phyint_is_functioning(pi)); 2630 } 2631 2632 /* 2633 * Post an EC_IPMP sysevent of subclass `subclass' and attributes `nvl'. 2634 * Before sending the event, it prepends the current version of the IPMP 2635 * sysevent API. Returns 0 on success, -1 on failure (in either case, 2636 * `nvl' is freed). 2637 */ 2638 static int 2639 post_event(const char *subclass, nvlist_t *nvl) 2640 { 2641 static evchan_t *evchp = NULL; 2642 2643 /* 2644 * Initialize the event channel if we haven't already done so. 2645 */ 2646 if (evchp == NULL) { 2647 errno = sysevent_evc_bind(IPMP_EVENT_CHAN, &evchp, EVCH_CREAT); 2648 if (errno != 0) { 2649 logerr("cannot create event channel `%s': %s\n", 2650 IPMP_EVENT_CHAN, strerror(errno)); 2651 goto failed; 2652 } 2653 } 2654 2655 errno = nvlist_add_uint32(nvl, IPMP_EVENT_VERSION, 2656 IPMP_EVENT_CUR_VERSION); 2657 if (errno != 0) { 2658 logerr("cannot create `%s' event: %s", subclass, 2659 strerror(errno)); 2660 goto failed; 2661 } 2662 2663 errno = sysevent_evc_publish(evchp, EC_IPMP, subclass, "com.sun", 2664 "in.mpathd", nvl, EVCH_NOSLEEP); 2665 if (errno != 0) { 2666 logerr("cannot send `%s' event: %s\n", subclass, 2667 strerror(errno)); 2668 goto failed; 2669 } 2670 2671 nvlist_free(nvl); 2672 return (0); 2673 failed: 2674 nvlist_free(nvl); 2675 return (-1); 2676 } 2677 2678 /* 2679 * Return the external IPMP state associated with phyint `pi'. 2680 */ 2681 static ipmp_if_state_t 2682 ifstate(struct phyint *pi) 2683 { 2684 switch (pi->pi_state) { 2685 case PI_INIT: 2686 return (IPMP_IF_UNKNOWN); 2687 2688 case PI_NOTARGETS: 2689 if (pi->pi_flags & IFF_FAILED) 2690 return (IPMP_IF_FAILED); 2691 return (IPMP_IF_UNKNOWN); 2692 2693 case PI_OFFLINE: 2694 return (IPMP_IF_OFFLINE); 2695 2696 case PI_FAILED: 2697 return (IPMP_IF_FAILED); 2698 2699 case PI_RUNNING: 2700 return (IPMP_IF_OK); 2701 } 2702 2703 logerr("ifstate: unknown state %d; aborting\n", pi->pi_state); 2704 abort(); 2705 /* NOTREACHED */ 2706 } 2707 2708 /* 2709 * Return the external IPMP interface type associated with phyint `pi'. 2710 */ 2711 static ipmp_if_type_t 2712 iftype(struct phyint *pi) 2713 { 2714 if (pi->pi_flags & IFF_STANDBY) 2715 return (IPMP_IF_STANDBY); 2716 else 2717 return (IPMP_IF_NORMAL); 2718 } 2719 2720 /* 2721 * Return the external IPMP link state associated with phyint `pi'. 2722 */ 2723 static ipmp_if_linkstate_t 2724 iflinkstate(struct phyint *pi) 2725 { 2726 if (!(pi->pi_notes & (DL_NOTE_LINK_UP|DL_NOTE_LINK_DOWN))) 2727 return (IPMP_LINK_UNKNOWN); 2728 2729 return (LINK_DOWN(pi) ? IPMP_LINK_DOWN : IPMP_LINK_UP); 2730 } 2731 2732 /* 2733 * Return the external IPMP probe state associated with phyint `pi'. 2734 */ 2735 static ipmp_if_probestate_t 2736 ifprobestate(struct phyint *pi) 2737 { 2738 if (!PROBE_ENABLED(pi->pi_v4) && !PROBE_ENABLED(pi->pi_v6)) 2739 return (IPMP_PROBE_DISABLED); 2740 2741 if (pi->pi_state == PI_FAILED) 2742 return (IPMP_PROBE_FAILED); 2743 2744 if (!PROBE_CAPABLE(pi->pi_v4) && !PROBE_CAPABLE(pi->pi_v6)) 2745 return (IPMP_PROBE_UNKNOWN); 2746 2747 return (IPMP_PROBE_OK); 2748 } 2749 2750 /* 2751 * Return the external IPMP target mode associated with phyint instance `pii'. 2752 */ 2753 static ipmp_if_targmode_t 2754 iftargmode(struct phyint_instance *pii) 2755 { 2756 if (!PROBE_ENABLED(pii)) 2757 return (IPMP_TARG_DISABLED); 2758 else if (pii->pii_targets_are_routers) 2759 return (IPMP_TARG_ROUTES); 2760 else 2761 return (IPMP_TARG_MULTICAST); 2762 } 2763 2764 /* 2765 * Return the external IPMP flags associated with phyint `pi'. 2766 */ 2767 static ipmp_if_flags_t 2768 ifflags(struct phyint *pi) 2769 { 2770 ipmp_if_flags_t flags = 0; 2771 2772 if (logint_upcount(pi) == 0) 2773 flags |= IPMP_IFFLAG_DOWN; 2774 if (pi->pi_flags & IFF_INACTIVE) 2775 flags |= IPMP_IFFLAG_INACTIVE; 2776 if (pi->pi_hwaddrdup) 2777 flags |= IPMP_IFFLAG_HWADDRDUP; 2778 if (phyint_is_functioning(pi) && flags == 0) 2779 flags |= IPMP_IFFLAG_ACTIVE; 2780 2781 return (flags); 2782 } 2783 2784 /* 2785 * Store the test address used on phyint instance `pii' in `ssp'. If there's 2786 * no test address, 0.0.0.0 is stored. 2787 */ 2788 static struct sockaddr_storage * 2789 iftestaddr(struct phyint_instance *pii, struct sockaddr_storage *ssp) 2790 { 2791 if (PROBE_ENABLED(pii)) 2792 addr2storage(pii->pii_af, &pii->pii_probe_logint->li_addr, ssp); 2793 else 2794 addr2storage(AF_INET6, &in6addr_any, ssp); 2795 2796 return (ssp); 2797 } 2798 2799 /* 2800 * Return the external IPMP group state associated with phyint group `pg'. 2801 */ 2802 static ipmp_group_state_t 2803 groupstate(struct phyint_group *pg) 2804 { 2805 switch (pg->pg_state) { 2806 case PG_FAILED: 2807 return (IPMP_GROUP_FAILED); 2808 case PG_DEGRADED: 2809 return (IPMP_GROUP_DEGRADED); 2810 case PG_OK: 2811 return (IPMP_GROUP_OK); 2812 } 2813 2814 logerr("groupstate: unknown state %d; aborting\n", pg->pg_state); 2815 abort(); 2816 /* NOTREACHED */ 2817 } 2818 2819 /* 2820 * Return the external IPMP probe state associated with probe `ps'. 2821 */ 2822 static ipmp_probe_state_t 2823 probestate(struct probe_stats *ps) 2824 { 2825 switch (ps->pr_status) { 2826 case PR_UNUSED: 2827 case PR_LOST: 2828 return (IPMP_PROBE_LOST); 2829 case PR_UNACKED: 2830 return (IPMP_PROBE_SENT); 2831 case PR_ACKED: 2832 return (IPMP_PROBE_ACKED); 2833 } 2834 2835 logerr("probestate: unknown state %d; aborting\n", ps->pr_status); 2836 abort(); 2837 /* NOTREACHED */ 2838 } 2839 2840 /* 2841 * Generate an ESC_IPMP_PROBE_STATE sysevent for the probe described by `pr' 2842 * on phyint instance `pii'. Returns 0 on success, -1 on failure. 2843 */ 2844 int 2845 probe_state_event(struct probe_stats *pr, struct phyint_instance *pii) 2846 { 2847 nvlist_t *nvl; 2848 hrtime_t proc_time = 0, recv_time = 0; 2849 struct sockaddr_storage ss; 2850 struct target *tg = pr->pr_target; 2851 int64_t rttavg, rttdev; 2852 2853 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 2854 if (errno != 0) { 2855 logperror("cannot create `interface change' event"); 2856 return (-1); 2857 } 2858 2859 errno = nvlist_add_uint32(nvl, IPMP_PROBE_ID, pr->pr_id); 2860 if (errno != 0) 2861 goto failed; 2862 2863 errno = nvlist_add_string(nvl, IPMP_IF_NAME, pii->pii_phyint->pi_name); 2864 if (errno != 0) 2865 goto failed; 2866 2867 errno = nvlist_add_uint32(nvl, IPMP_PROBE_STATE, probestate(pr)); 2868 if (errno != 0) 2869 goto failed; 2870 2871 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_START_TIME, 2872 pr->pr_hrtime_start); 2873 if (errno != 0) 2874 goto failed; 2875 2876 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_SENT_TIME, 2877 pr->pr_hrtime_sent); 2878 if (errno != 0) 2879 goto failed; 2880 2881 if (pr->pr_status == PR_ACKED) { 2882 recv_time = pr->pr_hrtime_ackrecv; 2883 proc_time = pr->pr_hrtime_ackproc; 2884 } 2885 2886 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_ACKRECV_TIME, recv_time); 2887 if (errno != 0) 2888 goto failed; 2889 2890 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_ACKPROC_TIME, proc_time); 2891 if (errno != 0) 2892 goto failed; 2893 2894 if (tg != NULL) 2895 addr2storage(pii->pii_af, &tg->tg_address, &ss); 2896 else 2897 addr2storage(pii->pii_af, &in6addr_any, &ss); 2898 2899 errno = nvlist_add_byte_array(nvl, IPMP_PROBE_TARGET, (uchar_t *)&ss, 2900 sizeof (ss)); 2901 if (errno != 0) 2902 goto failed; 2903 2904 rttavg = (tg != NULL) ? (tg->tg_rtt_sa / 8) : 0; 2905 errno = nvlist_add_int64(nvl, IPMP_PROBE_TARGET_RTTAVG, rttavg); 2906 if (errno != 0) 2907 goto failed; 2908 2909 rttdev = (tg != NULL) ? (tg->tg_rtt_sd / 4) : 0; 2910 errno = nvlist_add_int64(nvl, IPMP_PROBE_TARGET_RTTDEV, rttdev); 2911 if (errno != 0) 2912 goto failed; 2913 2914 return (post_event(ESC_IPMP_PROBE_STATE, nvl)); 2915 failed: 2916 logperror("cannot create `probe state' event"); 2917 nvlist_free(nvl); 2918 return (-1); 2919 } 2920 2921 /* 2922 * Generate an ESC_IPMP_GROUP_STATE sysevent for phyint group `pg'. 2923 * Returns 0 on success, -1 on failure. 2924 */ 2925 static int 2926 phyint_group_state_event(struct phyint_group *pg) 2927 { 2928 nvlist_t *nvl; 2929 2930 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 2931 if (errno != 0) { 2932 logperror("cannot create `group state change' event"); 2933 return (-1); 2934 } 2935 2936 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 2937 if (errno != 0) 2938 goto failed; 2939 2940 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 2941 if (errno != 0) 2942 goto failed; 2943 2944 errno = nvlist_add_uint32(nvl, IPMP_GROUP_STATE, groupstate(pg)); 2945 if (errno != 0) 2946 goto failed; 2947 2948 return (post_event(ESC_IPMP_GROUP_STATE, nvl)); 2949 failed: 2950 logperror("cannot create `group state change' event"); 2951 nvlist_free(nvl); 2952 return (-1); 2953 } 2954 2955 /* 2956 * Generate an ESC_IPMP_GROUP_CHANGE sysevent of type `op' for phyint group 2957 * `pg'. Returns 0 on success, -1 on failure. 2958 */ 2959 static int 2960 phyint_group_change_event(struct phyint_group *pg, ipmp_group_op_t op) 2961 { 2962 nvlist_t *nvl; 2963 2964 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 2965 if (errno != 0) { 2966 logperror("cannot create `group change' event"); 2967 return (-1); 2968 } 2969 2970 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 2971 if (errno != 0) 2972 goto failed; 2973 2974 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 2975 if (errno != 0) 2976 goto failed; 2977 2978 errno = nvlist_add_uint64(nvl, IPMP_GROUPLIST_SIGNATURE, 2979 phyint_grouplistsig); 2980 if (errno != 0) 2981 goto failed; 2982 2983 errno = nvlist_add_uint32(nvl, IPMP_GROUP_OPERATION, op); 2984 if (errno != 0) 2985 goto failed; 2986 2987 return (post_event(ESC_IPMP_GROUP_CHANGE, nvl)); 2988 failed: 2989 logperror("cannot create `group change' event"); 2990 nvlist_free(nvl); 2991 return (-1); 2992 } 2993 2994 /* 2995 * Generate an ESC_IPMP_GROUP_MEMBER_CHANGE sysevent for phyint `pi' in 2996 * group `pg'. Returns 0 on success, -1 on failure. 2997 */ 2998 static int 2999 phyint_group_member_event(struct phyint_group *pg, struct phyint *pi, 3000 ipmp_if_op_t op) 3001 { 3002 nvlist_t *nvl; 3003 3004 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 3005 if (errno != 0) { 3006 logperror("cannot create `group member change' event"); 3007 return (-1); 3008 } 3009 3010 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 3011 if (errno != 0) 3012 goto failed; 3013 3014 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 3015 if (errno != 0) 3016 goto failed; 3017 3018 errno = nvlist_add_uint32(nvl, IPMP_IF_OPERATION, op); 3019 if (errno != 0) 3020 goto failed; 3021 3022 errno = nvlist_add_string(nvl, IPMP_IF_NAME, pi->pi_name); 3023 if (errno != 0) 3024 goto failed; 3025 3026 errno = nvlist_add_uint32(nvl, IPMP_IF_TYPE, iftype(pi)); 3027 if (errno != 0) 3028 goto failed; 3029 3030 errno = nvlist_add_uint32(nvl, IPMP_IF_STATE, ifstate(pi)); 3031 if (errno != 0) 3032 goto failed; 3033 3034 return (post_event(ESC_IPMP_GROUP_MEMBER_CHANGE, nvl)); 3035 failed: 3036 logperror("cannot create `group member change' event"); 3037 nvlist_free(nvl); 3038 return (-1); 3039 3040 } 3041 3042 /* 3043 * Generate an ESC_IPMP_IF_CHANGE sysevent for phyint `pi' in group `pg'. 3044 * Returns 0 on success, -1 on failure. 3045 */ 3046 static int 3047 phyint_state_event(struct phyint_group *pg, struct phyint *pi) 3048 { 3049 nvlist_t *nvl; 3050 3051 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 3052 if (errno != 0) { 3053 logperror("cannot create `interface change' event"); 3054 return (-1); 3055 } 3056 3057 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 3058 if (errno != 0) 3059 goto failed; 3060 3061 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 3062 if (errno != 0) 3063 goto failed; 3064 3065 errno = nvlist_add_string(nvl, IPMP_IF_NAME, pi->pi_name); 3066 if (errno != 0) 3067 goto failed; 3068 3069 errno = nvlist_add_uint32(nvl, IPMP_IF_TYPE, iftype(pi)); 3070 if (errno != 0) 3071 goto failed; 3072 3073 errno = nvlist_add_uint32(nvl, IPMP_IF_STATE, ifstate(pi)); 3074 if (errno != 0) 3075 goto failed; 3076 3077 return (post_event(ESC_IPMP_IF_CHANGE, nvl)); 3078 failed: 3079 logperror("cannot create `interface change' event"); 3080 nvlist_free(nvl); 3081 return (-1); 3082 3083 } 3084 3085 /* 3086 * Generate a signature for use. The signature is conceptually divided 3087 * into two pieces: a random 16-bit "generation number" and a 48-bit 3088 * monotonically increasing integer. The generation number protects 3089 * against stale updates to entities (e.g., IPMP groups) that have been 3090 * deleted and since recreated. 3091 */ 3092 static uint64_t 3093 gensig(void) 3094 { 3095 static int seeded = 0; 3096 3097 if (seeded == 0) { 3098 srand48((long)gethrtime()); 3099 seeded++; 3100 } 3101 3102 return ((uint64_t)lrand48() << 48 | 1); 3103 } 3104 3105 /* 3106 * Store the information associated with group `grname' into a dynamically 3107 * allocated structure pointed to by `*grinfopp'. Returns an IPMP error code. 3108 */ 3109 unsigned int 3110 getgroupinfo(const char *grname, ipmp_groupinfo_t **grinfopp) 3111 { 3112 struct phyint *pi; 3113 struct phyint_group *pg; 3114 char (*ifs)[LIFNAMSIZ]; 3115 unsigned int i, j; 3116 unsigned int nif = 0, naddr = 0; 3117 lifgroupinfo_t lifgr; 3118 addrlist_t *addrp; 3119 struct sockaddr_storage *addrs; 3120 int fdt = 0; 3121 3122 pg = phyint_group_lookup(grname); 3123 if (pg == NULL) 3124 return (IPMP_EUNKGROUP); 3125 3126 /* 3127 * Tally up the number of interfaces, allocate an array to hold them, 3128 * and insert their names into the array. While we're at it, if any 3129 * interface is actually enabled to send probes, save the group fdt. 3130 */ 3131 for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) 3132 nif++; 3133 3134 ifs = alloca(nif * sizeof (*ifs)); 3135 for (i = 0, pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext, i++) { 3136 assert(i < nif); 3137 (void) strlcpy(ifs[i], pi->pi_name, LIFNAMSIZ); 3138 if (PROBE_ENABLED(pi->pi_v4) || PROBE_ENABLED(pi->pi_v6)) 3139 fdt = pg->pg_fdt; 3140 } 3141 assert(i == nif); 3142 3143 /* 3144 * If this is the anonymous group, there's no other information to 3145 * collect (since there's no IPMP interface). 3146 */ 3147 if (pg == phyint_anongroup) { 3148 *grinfopp = ipmp_groupinfo_create(pg->pg_name, pg->pg_sig, fdt, 3149 groupstate(pg), nif, ifs, "", "", "", "", 0, NULL); 3150 return (*grinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3151 } 3152 3153 /* 3154 * Grab some additional information about the group from the kernel. 3155 * (NOTE: since SIOCGLIFGROUPINFO does not look up by interface name, 3156 * we can use ifsock_v4 even for a V6-only group.) 3157 */ 3158 (void) strlcpy(lifgr.gi_grname, grname, LIFGRNAMSIZ); 3159 if (ioctl(ifsock_v4, SIOCGLIFGROUPINFO, &lifgr) == -1) { 3160 if (errno == ENOENT) 3161 return (IPMP_EUNKGROUP); 3162 3163 logperror("getgroupinfo: SIOCGLIFGROUPINFO"); 3164 return (IPMP_FAILURE); 3165 } 3166 3167 /* 3168 * Tally up the number of data addresses, allocate an array to hold 3169 * them, and insert their values into the array. 3170 */ 3171 for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next) 3172 naddr++; 3173 3174 addrs = alloca(naddr * sizeof (*addrs)); 3175 i = 0; 3176 for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next) { 3177 /* 3178 * It's possible to have duplicate addresses (if some are 3179 * down). Weed the dups out to avoid confusing consumers. 3180 * (If groups start having tons of addresses, we'll need a 3181 * better algorithm here.) 3182 */ 3183 for (j = 0; j < i; j++) { 3184 if (sockaddrcmp(&addrs[j], &addrp->al_addr)) 3185 break; 3186 } 3187 if (j == i) { 3188 assert(i < naddr); 3189 addrs[i++] = addrp->al_addr; 3190 } 3191 } 3192 naddr = i; 3193 3194 *grinfopp = ipmp_groupinfo_create(pg->pg_name, pg->pg_sig, fdt, 3195 groupstate(pg), nif, ifs, lifgr.gi_grifname, lifgr.gi_m4ifname, 3196 lifgr.gi_m6ifname, lifgr.gi_bcifname, naddr, addrs); 3197 return (*grinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3198 } 3199 3200 /* 3201 * Store the target information associated with phyint instance `pii' into a 3202 * dynamically allocated structure pointed to by `*targinfopp'. Returns an 3203 * IPMP error code. 3204 */ 3205 unsigned int 3206 gettarginfo(struct phyint_instance *pii, const char *name, 3207 ipmp_targinfo_t **targinfopp) 3208 { 3209 uint_t ntarg = 0; 3210 struct target *tg; 3211 struct sockaddr_storage ss; 3212 struct sockaddr_storage *targs = NULL; 3213 3214 if (PROBE_CAPABLE(pii)) { 3215 targs = alloca(pii->pii_ntargets * sizeof (*targs)); 3216 tg = pii->pii_target_next; 3217 do { 3218 if (tg->tg_status == TG_ACTIVE) { 3219 assert(ntarg < pii->pii_ntargets); 3220 addr2storage(pii->pii_af, &tg->tg_address, 3221 &targs[ntarg++]); 3222 } 3223 if ((tg = tg->tg_next) == NULL) 3224 tg = pii->pii_targets; 3225 } while (tg != pii->pii_target_next); 3226 3227 assert(ntarg == pii->pii_ntargets); 3228 } 3229 3230 *targinfopp = ipmp_targinfo_create(name, iftestaddr(pii, &ss), 3231 iftargmode(pii), ntarg, targs); 3232 return (*targinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3233 } 3234 3235 /* 3236 * Store the information associated with interface `ifname' into a dynamically 3237 * allocated structure pointed to by `*ifinfopp'. Returns an IPMP error code. 3238 */ 3239 unsigned int 3240 getifinfo(const char *ifname, ipmp_ifinfo_t **ifinfopp) 3241 { 3242 int retval; 3243 struct phyint *pi; 3244 ipmp_targinfo_t *targinfo4; 3245 ipmp_targinfo_t *targinfo6; 3246 3247 pi = phyint_lookup(ifname); 3248 if (pi == NULL) 3249 return (IPMP_EUNKIF); 3250 3251 if ((retval = gettarginfo(pi->pi_v4, pi->pi_name, &targinfo4)) != 0 || 3252 (retval = gettarginfo(pi->pi_v6, pi->pi_name, &targinfo6)) != 0) 3253 goto out; 3254 3255 *ifinfopp = ipmp_ifinfo_create(pi->pi_name, pi->pi_group->pg_name, 3256 ifstate(pi), iftype(pi), iflinkstate(pi), ifprobestate(pi), 3257 ifflags(pi), targinfo4, targinfo6); 3258 retval = (*ifinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3259 out: 3260 if (targinfo4 != NULL) 3261 ipmp_freetarginfo(targinfo4); 3262 if (targinfo6 != NULL) 3263 ipmp_freetarginfo(targinfo6); 3264 return (retval); 3265 } 3266 3267 /* 3268 * Store the current list of IPMP groups into a dynamically allocated 3269 * structure pointed to by `*grlistpp'. Returns an IPMP error code. 3270 */ 3271 unsigned int 3272 getgrouplist(ipmp_grouplist_t **grlistpp) 3273 { 3274 struct phyint_group *pg; 3275 char (*groups)[LIFGRNAMSIZ]; 3276 unsigned int i, ngroup; 3277 3278 /* 3279 * Tally up the number of groups, allocate an array to hold them, and 3280 * insert their names into the array. 3281 */ 3282 for (ngroup = 0, pg = phyint_groups; pg != NULL; pg = pg->pg_next) 3283 ngroup++; 3284 3285 groups = alloca(ngroup * sizeof (*groups)); 3286 for (i = 0, pg = phyint_groups; pg != NULL; pg = pg->pg_next, i++) { 3287 assert(i < ngroup); 3288 (void) strlcpy(groups[i], pg->pg_name, LIFGRNAMSIZ); 3289 } 3290 assert(i == ngroup); 3291 3292 *grlistpp = ipmp_grouplist_create(phyint_grouplistsig, ngroup, groups); 3293 return (*grlistpp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3294 } 3295 3296 /* 3297 * Store the address information for `ssp' (in group `grname') into a 3298 * dynamically allocated structure pointed to by `*adinfopp'. Returns an IPMP 3299 * error code. (We'd call this function getaddrinfo(), but it would conflict 3300 * with getaddrinfo(3SOCKET)). 3301 */ 3302 unsigned int 3303 getgraddrinfo(const char *grname, struct sockaddr_storage *ssp, 3304 ipmp_addrinfo_t **adinfopp) 3305 { 3306 int ifsock; 3307 addrlist_t *addrp, *addrmatchp = NULL; 3308 ipmp_addr_state_t state; 3309 const char *binding = ""; 3310 struct lifreq lifr; 3311 struct phyint_group *pg; 3312 3313 if ((pg = phyint_group_lookup(grname)) == NULL) 3314 return (IPMP_EUNKADDR); 3315 3316 /* 3317 * Walk through the data addresses, and find a match. Note that since 3318 * some of the addresses may be down, more than one may match. We 3319 * prefer an up address (if one exists). 3320 */ 3321 for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next) { 3322 if (sockaddrcmp(ssp, &addrp->al_addr)) { 3323 addrmatchp = addrp; 3324 if (addrmatchp->al_flags & IFF_UP) 3325 break; 3326 } 3327 } 3328 3329 if (addrmatchp == NULL) 3330 return (IPMP_EUNKADDR); 3331 3332 state = (addrmatchp->al_flags & IFF_UP) ? IPMP_ADDR_UP : IPMP_ADDR_DOWN; 3333 if (state == IPMP_ADDR_UP) { 3334 ifsock = (ssp->ss_family == AF_INET) ? ifsock_v4 : ifsock_v6; 3335 (void) strlcpy(lifr.lifr_name, addrmatchp->al_name, LIFNAMSIZ); 3336 if (ioctl(ifsock, SIOCGLIFBINDING, &lifr) >= 0) 3337 binding = lifr.lifr_binding; 3338 } 3339 3340 *adinfopp = ipmp_addrinfo_create(ssp, state, pg->pg_name, binding); 3341 return (*adinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3342 } 3343 3344 /* 3345 * Store a snapshot of the IPMP subsystem into a dynamically allocated 3346 * structure pointed to by `*snapp'. Returns an IPMP error code. 3347 */ 3348 unsigned int 3349 getsnap(ipmp_snap_t **snapp) 3350 { 3351 ipmp_grouplist_t *grlistp; 3352 ipmp_groupinfo_t *grinfop; 3353 ipmp_addrinfo_t *adinfop; 3354 ipmp_addrlist_t *adlistp; 3355 ipmp_ifinfo_t *ifinfop; 3356 ipmp_snap_t *snap; 3357 struct phyint *pi; 3358 unsigned int i, j; 3359 int retval; 3360 3361 snap = ipmp_snap_create(); 3362 if (snap == NULL) 3363 return (IPMP_ENOMEM); 3364 3365 /* 3366 * Add group list. 3367 */ 3368 retval = getgrouplist(&snap->sn_grlistp); 3369 if (retval != IPMP_SUCCESS) 3370 goto failed; 3371 3372 /* 3373 * Add information for each group in the list, along with all of its 3374 * data addresses. 3375 */ 3376 grlistp = snap->sn_grlistp; 3377 for (i = 0; i < grlistp->gl_ngroup; i++) { 3378 retval = getgroupinfo(grlistp->gl_groups[i], &grinfop); 3379 if (retval != IPMP_SUCCESS) 3380 goto failed; 3381 3382 retval = ipmp_snap_addgroupinfo(snap, grinfop); 3383 if (retval != IPMP_SUCCESS) { 3384 ipmp_freegroupinfo(grinfop); 3385 goto failed; 3386 } 3387 3388 adlistp = grinfop->gr_adlistp; 3389 for (j = 0; j < adlistp->al_naddr; j++) { 3390 retval = getgraddrinfo(grinfop->gr_name, 3391 &adlistp->al_addrs[j], &adinfop); 3392 if (retval != IPMP_SUCCESS) 3393 goto failed; 3394 3395 retval = ipmp_snap_addaddrinfo(snap, adinfop); 3396 if (retval != IPMP_SUCCESS) { 3397 ipmp_freeaddrinfo(adinfop); 3398 goto failed; 3399 } 3400 } 3401 } 3402 3403 /* 3404 * Add information for each configured phyint. 3405 */ 3406 for (pi = phyints; pi != NULL; pi = pi->pi_next) { 3407 retval = getifinfo(pi->pi_name, &ifinfop); 3408 if (retval != IPMP_SUCCESS) 3409 goto failed; 3410 3411 retval = ipmp_snap_addifinfo(snap, ifinfop); 3412 if (retval != IPMP_SUCCESS) { 3413 ipmp_freeifinfo(ifinfop); 3414 goto failed; 3415 } 3416 } 3417 3418 *snapp = snap; 3419 return (IPMP_SUCCESS); 3420 failed: 3421 ipmp_snap_free(snap); 3422 return (retval); 3423 } 3424