1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include "mpd_defs.h" 27 #include "mpd_tables.h" 28 29 /* 30 * Global list of phyints, phyint instances, phyint groups and the anonymous 31 * group; the latter is initialized in phyint_init(). 32 */ 33 struct phyint *phyints = NULL; 34 struct phyint_instance *phyint_instances = NULL; 35 struct phyint_group *phyint_groups = NULL; 36 struct phyint_group *phyint_anongroup; 37 38 /* 39 * Grouplist signature; initialized in phyint_init(). 40 */ 41 static uint64_t phyint_grouplistsig; 42 43 static void phyint_inst_insert(struct phyint_instance *pii); 44 static void phyint_inst_print(struct phyint_instance *pii); 45 46 static void phyint_insert(struct phyint *pi, struct phyint_group *pg); 47 static void phyint_delete(struct phyint *pi); 48 static boolean_t phyint_is_usable(struct phyint *pi); 49 50 static void logint_print(struct logint *li); 51 static void logint_insert(struct phyint_instance *pii, struct logint *li); 52 static struct logint *logint_lookup(struct phyint_instance *pii, char *li_name); 53 54 static void target_print(struct target *tg); 55 static void target_insert(struct phyint_instance *pii, struct target *tg); 56 static struct target *target_first(struct phyint_instance *pii); 57 static struct target *target_select_best(struct phyint_instance *pii); 58 static void target_flush_hosts(struct phyint_group *pg); 59 60 static void reset_pii_probes(struct phyint_instance *pii, struct target *tg); 61 62 static boolean_t phyint_inst_v6_sockinit(struct phyint_instance *pii); 63 static boolean_t phyint_inst_v4_sockinit(struct phyint_instance *pii); 64 65 static int phyint_state_event(struct phyint_group *pg, struct phyint *pi); 66 static int phyint_group_state_event(struct phyint_group *pg); 67 static int phyint_group_change_event(struct phyint_group *pg, ipmp_group_op_t); 68 static int phyint_group_member_event(struct phyint_group *pg, struct phyint *pi, 69 ipmp_if_op_t op); 70 71 static int logint_upcount(struct phyint *pi); 72 static uint64_t gensig(void); 73 74 /* Initialize any per-file global state. Returns 0 on success, -1 on failure */ 75 int 76 phyint_init(void) 77 { 78 phyint_grouplistsig = gensig(); 79 if (track_all_phyints) { 80 phyint_anongroup = phyint_group_create(""); 81 if (phyint_anongroup == NULL) 82 return (-1); 83 phyint_group_insert(phyint_anongroup); 84 } 85 return (0); 86 } 87 88 /* Return the phyint with the given name */ 89 struct phyint * 90 phyint_lookup(const char *name) 91 { 92 struct phyint *pi; 93 94 if (debug & D_PHYINT) 95 logdebug("phyint_lookup(%s)\n", name); 96 97 for (pi = phyints; pi != NULL; pi = pi->pi_next) { 98 if (strncmp(pi->pi_name, name, sizeof (pi->pi_name)) == 0) 99 break; 100 } 101 return (pi); 102 } 103 104 /* 105 * Lookup a phyint in the group that has the same hardware address as `pi', or 106 * NULL if there's none. If `online_only' is set, then only online phyints 107 * are considered when matching. Otherwise, phyints that had been offlined 108 * due to a duplicate hardware address will also be considered. 109 */ 110 static struct phyint * 111 phyint_lookup_hwaddr(struct phyint *pi, boolean_t online_only) 112 { 113 struct phyint *pi2; 114 115 if (pi->pi_group == phyint_anongroup) 116 return (NULL); 117 118 for (pi2 = pi->pi_group->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) { 119 if (pi2 == pi) 120 continue; 121 122 /* 123 * NOTE: even when online_only is B_FALSE, we ignore phyints 124 * that are administratively offline (rather than offline 125 * because they're dups); when they're brought back online, 126 * they'll be flagged as dups if need be. 127 */ 128 if (pi2->pi_state == PI_OFFLINE && 129 (online_only || !pi2->pi_hwaddrdup)) 130 continue; 131 132 if (pi2->pi_hwaddrlen == pi->pi_hwaddrlen && 133 bcmp(pi2->pi_hwaddr, pi->pi_hwaddr, pi->pi_hwaddrlen) == 0) 134 return (pi2); 135 } 136 return (NULL); 137 } 138 139 /* 140 * Respond to DLPI notifications. Currently, this only processes physical 141 * address changes for the phyint passed via `arg' by onlining or offlining 142 * phyints in the group. 143 */ 144 /* ARGSUSED */ 145 static void 146 phyint_link_notify(dlpi_handle_t dh, dlpi_notifyinfo_t *dnip, void *arg) 147 { 148 struct phyint *pi = arg; 149 struct phyint *oduppi = NULL, *duppi = NULL; 150 151 assert((dnip->dni_note & pi->pi_notes) != 0); 152 153 if (dnip->dni_note != DL_NOTE_PHYS_ADDR) 154 return; 155 156 assert(dnip->dni_physaddrlen <= DLPI_PHYSADDR_MAX); 157 158 /* 159 * If our hardware address hasn't changed, there's nothing to do. 160 */ 161 if (pi->pi_hwaddrlen == dnip->dni_physaddrlen && 162 bcmp(pi->pi_hwaddr, dnip->dni_physaddr, pi->pi_hwaddrlen) == 0) 163 return; 164 165 oduppi = phyint_lookup_hwaddr(pi, _B_FALSE); 166 pi->pi_hwaddrlen = dnip->dni_physaddrlen; 167 (void) memcpy(pi->pi_hwaddr, dnip->dni_physaddr, pi->pi_hwaddrlen); 168 duppi = phyint_lookup_hwaddr(pi, _B_FALSE); 169 170 if (oduppi != NULL || pi->pi_hwaddrdup) { 171 /* 172 * Our old hardware address was a duplicate. If we'd been 173 * offlined because of it, and our new hardware address is not 174 * a duplicate, then bring us online. Otherwise, `oduppi' 175 * must've been the one brought offline; bring it online. 176 */ 177 if (pi->pi_hwaddrdup) { 178 if (duppi == NULL) 179 (void) phyint_undo_offline(pi); 180 } else { 181 assert(oduppi->pi_hwaddrdup); 182 (void) phyint_undo_offline(oduppi); 183 } 184 } 185 186 if (duppi != NULL && !pi->pi_hwaddrdup) { 187 /* 188 * Our new hardware address was a duplicate and we're not 189 * yet flagged as a duplicate; bring us offline. 190 */ 191 pi->pi_hwaddrdup = _B_TRUE; 192 (void) phyint_offline(pi, 0); 193 } 194 } 195 196 /* 197 * Initialize information about the underlying link for `pi', and set us 198 * up to be notified about future changes. Returns _B_TRUE on success. 199 */ 200 boolean_t 201 phyint_link_init(struct phyint *pi) 202 { 203 int retval; 204 uint_t notes; 205 const char *errmsg; 206 dlpi_notifyid_t id; 207 208 pi->pi_notes = 0; 209 retval = dlpi_open(pi->pi_name, &pi->pi_dh, 0); 210 if (retval != DLPI_SUCCESS) { 211 pi->pi_dh = NULL; 212 errmsg = "cannot open"; 213 goto failed; 214 } 215 216 pi->pi_hwaddrlen = DLPI_PHYSADDR_MAX; 217 retval = dlpi_get_physaddr(pi->pi_dh, DL_CURR_PHYS_ADDR, pi->pi_hwaddr, 218 &pi->pi_hwaddrlen); 219 if (retval != DLPI_SUCCESS) { 220 errmsg = "cannot get hardware address"; 221 goto failed; 222 } 223 224 /* 225 * Check if the link supports DLPI link state notifications. For 226 * historical reasons, the actual changes are tracked through routing 227 * sockets, so we immediately disable the notification upon success. 228 */ 229 notes = DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN; 230 retval = dlpi_enabnotify(pi->pi_dh, notes, phyint_link_notify, pi, &id); 231 if (retval == DLPI_SUCCESS) { 232 (void) dlpi_disabnotify(pi->pi_dh, id, NULL); 233 pi->pi_notes |= notes; 234 } 235 236 /* 237 * Enable notification of hardware address changes to keep pi_hwaddr 238 * up-to-date and track if we need to offline/undo-offline phyints. 239 */ 240 notes = DL_NOTE_PHYS_ADDR; 241 retval = dlpi_enabnotify(pi->pi_dh, notes, phyint_link_notify, pi, &id); 242 if (retval == DLPI_SUCCESS && poll_add(dlpi_fd(pi->pi_dh)) == 0) 243 pi->pi_notes |= notes; 244 245 return (_B_TRUE); 246 failed: 247 logerr("%s: %s: %s\n", pi->pi_name, errmsg, dlpi_strerror(retval)); 248 if (pi->pi_dh != NULL) { 249 dlpi_close(pi->pi_dh); 250 pi->pi_dh = NULL; 251 } 252 return (_B_FALSE); 253 } 254 255 /* 256 * Close use of link on `pi'. 257 */ 258 void 259 phyint_link_close(struct phyint *pi) 260 { 261 if (pi->pi_notes & DL_NOTE_PHYS_ADDR) { 262 (void) poll_remove(dlpi_fd(pi->pi_dh)); 263 pi->pi_notes &= ~DL_NOTE_PHYS_ADDR; 264 } 265 266 /* 267 * NOTE: we don't clear pi_notes here so that iflinkstate() can still 268 * properly report the link state even when offline (which is possible 269 * since we use IFF_RUNNING to track link state). 270 */ 271 dlpi_close(pi->pi_dh); 272 pi->pi_dh = NULL; 273 } 274 275 /* Return the phyint instance with the given name and the given family */ 276 struct phyint_instance * 277 phyint_inst_lookup(int af, char *name) 278 { 279 struct phyint *pi; 280 281 if (debug & D_PHYINT) 282 logdebug("phyint_inst_lookup(%s %s)\n", AF_STR(af), name); 283 284 assert(af == AF_INET || af == AF_INET6); 285 286 pi = phyint_lookup(name); 287 if (pi == NULL) 288 return (NULL); 289 290 return (PHYINT_INSTANCE(pi, af)); 291 } 292 293 struct phyint_group * 294 phyint_group_lookup(const char *pg_name) 295 { 296 struct phyint_group *pg; 297 298 if (debug & D_PHYINT) 299 logdebug("phyint_group_lookup(%s)\n", pg_name); 300 301 for (pg = phyint_groups; pg != NULL; pg = pg->pg_next) { 302 if (strncmp(pg->pg_name, pg_name, sizeof (pg->pg_name)) == 0) 303 break; 304 } 305 return (pg); 306 } 307 308 /* 309 * Insert the phyint in the linked list of all phyints. If the phyint belongs 310 * to some group, insert it in the phyint group list. 311 */ 312 static void 313 phyint_insert(struct phyint *pi, struct phyint_group *pg) 314 { 315 if (debug & D_PHYINT) 316 logdebug("phyint_insert(%s '%s')\n", pi->pi_name, pg->pg_name); 317 318 /* Insert the phyint at the head of the 'all phyints' list */ 319 pi->pi_next = phyints; 320 pi->pi_prev = NULL; 321 if (phyints != NULL) 322 phyints->pi_prev = pi; 323 phyints = pi; 324 325 /* 326 * Insert the phyint at the head of the 'phyint_group members' list 327 * of the phyint group to which it belongs. 328 */ 329 pi->pi_pgnext = NULL; 330 pi->pi_pgprev = NULL; 331 pi->pi_group = pg; 332 333 pi->pi_pgnext = pg->pg_phyint; 334 if (pi->pi_pgnext != NULL) 335 pi->pi_pgnext->pi_pgprev = pi; 336 pg->pg_phyint = pi; 337 338 /* Refresh the group state now that this phyint has been added */ 339 phyint_group_refresh_state(pg); 340 341 pg->pg_sig++; 342 (void) phyint_group_member_event(pg, pi, IPMP_IF_ADD); 343 } 344 345 /* Insert the phyint instance in the linked list of all phyint instances. */ 346 static void 347 phyint_inst_insert(struct phyint_instance *pii) 348 { 349 if (debug & D_PHYINT) { 350 logdebug("phyint_inst_insert(%s %s)\n", 351 AF_STR(pii->pii_af), pii->pii_name); 352 } 353 354 /* 355 * Insert the phyint at the head of the 'all phyint instances' list. 356 */ 357 pii->pii_next = phyint_instances; 358 pii->pii_prev = NULL; 359 if (phyint_instances != NULL) 360 phyint_instances->pii_prev = pii; 361 phyint_instances = pii; 362 } 363 364 /* 365 * Create a new phyint with the given parameters. Also insert it into 366 * the list of all phyints and the list of phyint group members by calling 367 * phyint_insert(). 368 */ 369 static struct phyint * 370 phyint_create(char *pi_name, struct phyint_group *pg, uint_t ifindex, 371 uint64_t flags) 372 { 373 struct phyint *pi; 374 375 pi = calloc(1, sizeof (struct phyint)); 376 if (pi == NULL) { 377 logperror("phyint_create: calloc"); 378 return (NULL); 379 } 380 381 /* 382 * Record the phyint values. 383 */ 384 (void) strlcpy(pi->pi_name, pi_name, sizeof (pi->pi_name)); 385 pi->pi_taddrthresh = getcurrentsec() + TESTADDR_CONF_TIME; 386 pi->pi_ifindex = ifindex; 387 pi->pi_icmpid = htons(((getpid() & 0xFF) << 8) | (ifindex & 0xFF)); 388 389 /* 390 * If the interface is offline, we set the state to PI_OFFLINE. 391 * Otherwise, we optimistically start in the PI_RUNNING state. Later 392 * (in process_link_state_changes()), we will adjust this to match the 393 * current state of the link. Further, if test addresses are 394 * subsequently assigned, we will transition to PI_NOTARGETS and then 395 * to either PI_RUNNING or PI_FAILED depending on the probe results. 396 */ 397 pi->pi_state = (flags & IFF_OFFLINE) ? PI_OFFLINE : PI_RUNNING; 398 pi->pi_flags = PHYINT_FLAGS(flags); 399 400 /* 401 * Initialize the link state. The link state is initialized to 402 * up, so that if the link is down when IPMP starts monitoring 403 * the interface, it will appear as though there has been a 404 * transition from the link up to link down. This avoids 405 * having to treat this situation as a special case. 406 */ 407 INIT_LINK_STATE(pi); 408 409 if (!phyint_link_init(pi)) { 410 free(pi); 411 return (NULL); 412 } 413 414 /* 415 * Insert the phyint in the list of all phyints, and the 416 * list of phyint group members 417 */ 418 phyint_insert(pi, pg); 419 420 return (pi); 421 } 422 423 /* 424 * Create a new phyint instance belonging to the phyint 'pi' and address 425 * family 'af'. Also insert it into the list of all phyint instances by 426 * calling phyint_inst_insert(). 427 */ 428 static struct phyint_instance * 429 phyint_inst_create(struct phyint *pi, int af) 430 { 431 struct phyint_instance *pii; 432 433 pii = calloc(1, sizeof (struct phyint_instance)); 434 if (pii == NULL) { 435 logperror("phyint_inst_create: calloc"); 436 return (NULL); 437 } 438 439 /* 440 * Attach the phyint instance to the phyint. 441 * Set the back pointers as well 442 */ 443 pii->pii_phyint = pi; 444 if (af == AF_INET) 445 pi->pi_v4 = pii; 446 else 447 pi->pi_v6 = pii; 448 449 pii->pii_in_use = 1; 450 pii->pii_probe_sock = -1; 451 pii->pii_snxt = 1; 452 pii->pii_af = af; 453 pii->pii_fd_hrtime = gethrtime() + 454 (FAILURE_DETECTION_QP * (hrtime_t)NANOSEC); 455 pii->pii_flags = pi->pi_flags; 456 457 /* Insert the phyint instance in the list of all phyint instances. */ 458 phyint_inst_insert(pii); 459 return (pii); 460 } 461 462 /* 463 * Change the state of phyint `pi' to state `state'. 464 */ 465 void 466 phyint_chstate(struct phyint *pi, enum pi_state state) 467 { 468 /* 469 * To simplify things, some callers always set a given state 470 * regardless of the previous state of the phyint (e.g., setting 471 * PI_RUNNING when it's already set). We shouldn't bother 472 * generating an event or consuming a signature for these, since 473 * the actual state of the interface is unchanged. 474 */ 475 if (pi->pi_state == state) 476 return; 477 478 pi->pi_state = state; 479 phyint_changed(pi); 480 } 481 482 /* 483 * Note that `pi' has changed state. 484 */ 485 void 486 phyint_changed(struct phyint *pi) 487 { 488 pi->pi_group->pg_sig++; 489 (void) phyint_state_event(pi->pi_group, pi); 490 } 491 492 /* 493 * Insert the phyint group in the linked list of all phyint groups 494 * at the head of the list 495 */ 496 void 497 phyint_group_insert(struct phyint_group *pg) 498 { 499 pg->pg_next = phyint_groups; 500 pg->pg_prev = NULL; 501 if (phyint_groups != NULL) 502 phyint_groups->pg_prev = pg; 503 phyint_groups = pg; 504 505 phyint_grouplistsig++; 506 (void) phyint_group_change_event(pg, IPMP_GROUP_ADD); 507 } 508 509 /* 510 * Create a new phyint group called 'name'. 511 */ 512 struct phyint_group * 513 phyint_group_create(const char *name) 514 { 515 struct phyint_group *pg; 516 517 if (debug & D_PHYINT) 518 logdebug("phyint_group_create(%s)\n", name); 519 520 pg = calloc(1, sizeof (struct phyint_group)); 521 if (pg == NULL) { 522 logperror("phyint_group_create: calloc"); 523 return (NULL); 524 } 525 526 (void) strlcpy(pg->pg_name, name, sizeof (pg->pg_name)); 527 pg->pg_sig = gensig(); 528 pg->pg_fdt = user_failure_detection_time; 529 pg->pg_probeint = user_probe_interval; 530 pg->pg_in_use = _B_TRUE; 531 532 /* 533 * Normal groups always start in the PG_FAILED state since they 534 * have no active interfaces. In contrast, anonymous groups are 535 * heterogeneous and thus always PG_OK. 536 */ 537 pg->pg_state = (name[0] == '\0' ? PG_OK : PG_FAILED); 538 539 return (pg); 540 } 541 542 /* 543 * Change the state of the phyint group `pg' to state `state'. 544 */ 545 void 546 phyint_group_chstate(struct phyint_group *pg, enum pg_state state) 547 { 548 assert(pg != phyint_anongroup); 549 550 /* 551 * To simplify things, some callers always set a given state 552 * regardless of the previous state of the group (e.g., setting 553 * PG_DEGRADED when it's already set). We shouldn't bother 554 * generating an event or consuming a signature for these, since 555 * the actual state of the group is unchanged. 556 */ 557 if (pg->pg_state == state) 558 return; 559 560 pg->pg_state = state; 561 562 switch (state) { 563 case PG_FAILED: 564 /* 565 * We can never know with certainty that a group has 566 * failed. It is possible that all known targets have 567 * failed simultaneously, and new targets have come up 568 * instead. If the targets are routers then router 569 * discovery will kick in, and we will see the new routers 570 * thru routing socket messages. But if the targets are 571 * hosts, we have to discover it by multicast. So flush 572 * all the host targets. The next probe will send out a 573 * multicast echo request. If this is a group failure, we 574 * will still not see any response, otherwise the group 575 * will be repaired after we get NUM_PROBE_REPAIRS 576 * consecutive unicast replies on any phyint. 577 */ 578 target_flush_hosts(pg); 579 break; 580 581 case PG_OK: 582 case PG_DEGRADED: 583 break; 584 585 default: 586 logerr("phyint_group_chstate: invalid group state %d; " 587 "aborting\n", state); 588 abort(); 589 } 590 591 pg->pg_sig++; 592 (void) phyint_group_state_event(pg); 593 } 594 595 /* 596 * Create a new phyint instance and initialize it from the values supplied by 597 * the kernel. Always check for ENXIO before logging any error, because the 598 * interface could have vanished after completion of SIOCGLIFCONF. 599 * Return values: 600 * pointer to the phyint instance on success 601 * NULL on failure Eg. if the phyint instance is not found in the kernel 602 */ 603 struct phyint_instance * 604 phyint_inst_init_from_k(int af, char *pi_name) 605 { 606 char pg_name[LIFNAMSIZ + 1]; 607 int ifsock; 608 uint_t ifindex; 609 uint64_t flags; 610 struct lifreq lifr; 611 struct phyint *pi; 612 struct phyint_instance *pii; 613 boolean_t pi_created; 614 struct phyint_group *pg; 615 616 retry: 617 pii = NULL; 618 pi = NULL; 619 pg = NULL; 620 pi_created = _B_FALSE; 621 622 if (debug & D_PHYINT) { 623 logdebug("phyint_inst_init_from_k(%s %s)\n", 624 AF_STR(af), pi_name); 625 } 626 627 assert(af == AF_INET || af == AF_INET6); 628 629 /* Get the socket for doing ioctls */ 630 ifsock = (af == AF_INET) ? ifsock_v4 : ifsock_v6; 631 632 /* 633 * Get the interface flags. Ignore virtual interfaces, IPMP 634 * meta-interfaces, point-to-point interfaces, and interfaces 635 * that can't support multicast. 636 */ 637 (void) strlcpy(lifr.lifr_name, pi_name, sizeof (lifr.lifr_name)); 638 if (ioctl(ifsock, SIOCGLIFFLAGS, (char *)&lifr) < 0) { 639 if (errno != ENXIO) { 640 logperror("phyint_inst_init_from_k:" 641 " ioctl (get flags)"); 642 } 643 return (NULL); 644 } 645 flags = lifr.lifr_flags; 646 if (!(flags & IFF_MULTICAST) || 647 (flags & (IFF_VIRTUAL|IFF_IPMP|IFF_POINTOPOINT))) 648 return (NULL); 649 650 /* 651 * Get the ifindex for recording later in our tables, in case we need 652 * to create a new phyint. 653 */ 654 if (ioctl(ifsock, SIOCGLIFINDEX, (char *)&lifr) < 0) { 655 if (errno != ENXIO) { 656 logperror("phyint_inst_init_from_k: " 657 " ioctl (get lifindex)"); 658 } 659 return (NULL); 660 } 661 ifindex = lifr.lifr_index; 662 663 /* 664 * Get the phyint group name of this phyint, from the kernel. 665 */ 666 if (ioctl(ifsock, SIOCGLIFGROUPNAME, (char *)&lifr) < 0) { 667 if (errno != ENXIO) { 668 logperror("phyint_inst_init_from_k: " 669 "ioctl (get group name)"); 670 } 671 return (NULL); 672 } 673 (void) strlcpy(pg_name, lifr.lifr_groupname, sizeof (pg_name)); 674 675 /* 676 * If the phyint is not part of any group, pg_name is the 677 * null string. If 'track_all_phyints' is false, there is no 678 * need to create a phyint. 679 */ 680 if (pg_name[0] == '\0' && !track_all_phyints) { 681 /* 682 * If the IFF_FAILED, IFF_INACTIVE, or IFF_OFFLINE flags are 683 * set, reset them. These flags shouldn't be set if in.mpathd 684 * isn't tracking the interface. 685 */ 686 if ((flags & (IFF_FAILED | IFF_INACTIVE | IFF_OFFLINE))) { 687 lifr.lifr_flags = flags & 688 ~(IFF_FAILED | IFF_INACTIVE | IFF_OFFLINE); 689 if (ioctl(ifsock, SIOCSLIFFLAGS, (char *)&lifr) < 0) { 690 if (errno != ENXIO) { 691 logperror("phyint_inst_init_from_k:" 692 " ioctl (set flags)"); 693 } 694 } 695 } 696 return (NULL); 697 } 698 699 /* 700 * We need to create a new phyint instance. We may also need to 701 * create the group if e.g. the SIOCGLIFCONF loop in initifs() found 702 * an underlying interface before it found its IPMP meta-interface. 703 * Note that we keep any created groups even if phyint_inst_from_k() 704 * fails since a group's existence is not dependent on the ability of 705 * in.mpathd to the track the group's interfaces. 706 */ 707 if ((pg = phyint_group_lookup(pg_name)) == NULL) { 708 if ((pg = phyint_group_create(pg_name)) == NULL) { 709 logerr("phyint_inst_init_from_k: cannot create group " 710 "%s\n", pg_name); 711 return (NULL); 712 } 713 phyint_group_insert(pg); 714 } 715 716 /* 717 * Lookup the phyint. If the phyint does not exist create it. 718 */ 719 pi = phyint_lookup(pi_name); 720 if (pi == NULL) { 721 pi = phyint_create(pi_name, pg, ifindex, flags); 722 if (pi == NULL) { 723 logerr("phyint_inst_init_from_k:" 724 " unable to create phyint %s\n", pi_name); 725 return (NULL); 726 } 727 pi_created = _B_TRUE; 728 } else { 729 /* The phyint exists already. */ 730 assert(pi_created == _B_FALSE); 731 /* 732 * Normally we should see consistent values for the IPv4 and 733 * IPv6 instances, for phyint properties. If we don't, it 734 * means things have changed underneath us, and we should 735 * resync our tables with the kernel. Check whether the 736 * interface index has changed. If so, it is most likely 737 * the interface has been unplumbed and replumbed, 738 * while we are yet to update our tables. Do it now. 739 */ 740 if (pi->pi_ifindex != ifindex) { 741 phyint_inst_delete(PHYINT_INSTANCE(pi, AF_OTHER(af))); 742 goto retry; 743 } 744 assert(PHYINT_INSTANCE(pi, af) == NULL); 745 746 /* 747 * If the group name seen by the IPv4 and IPv6 instances 748 * are different, it is most likely the groupname has 749 * changed, while we are yet to update our tables. Do it now. 750 */ 751 if (strcmp(pi->pi_group->pg_name, pg_name) != 0) { 752 phyint_inst_delete(PHYINT_INSTANCE(pi, 753 AF_OTHER(af))); 754 goto retry; 755 } 756 } 757 758 /* 759 * Create a new phyint instance, corresponding to the 'af' 760 * passed in. 761 */ 762 pii = phyint_inst_create(pi, af); 763 if (pii == NULL) { 764 logerr("phyint_inst_init_from_k: unable to create" 765 "phyint inst %s\n", pi->pi_name); 766 if (pi_created) 767 phyint_delete(pi); 768 769 return (NULL); 770 } 771 772 if (pi_created) { 773 /* 774 * If this phyint does not have a unique hardware address in its 775 * group, offline it. (The change_pif_flags() implementation 776 * requires that we defer this until after the phyint_instance 777 * is created.) 778 */ 779 if (phyint_lookup_hwaddr(pi, _B_TRUE) != NULL) { 780 pi->pi_hwaddrdup = _B_TRUE; 781 (void) phyint_offline(pi, 0); 782 } 783 } 784 785 return (pii); 786 } 787 788 /* 789 * Bind pii_probe_sock to the address associated with pii_probe_logint. 790 * This socket will be used for sending and receiving ICMP/ICMPv6 probes to 791 * targets. Do the common part in this function, and complete the 792 * initializations by calling the protocol specific functions 793 * phyint_inst_v{4,6}_sockinit() respectively. 794 * 795 * Return values: _B_TRUE/_B_FALSE for success or failure respectively. 796 */ 797 boolean_t 798 phyint_inst_sockinit(struct phyint_instance *pii) 799 { 800 boolean_t success; 801 struct phyint_group *pg; 802 803 if (debug & D_PHYINT) { 804 logdebug("phyint_inst_sockinit(%s %s)\n", 805 AF_STR(pii->pii_af), pii->pii_name); 806 } 807 808 assert(pii->pii_probe_logint != NULL); 809 assert(pii->pii_probe_logint->li_flags & IFF_UP); 810 assert(pii->pii_probe_logint->li_flags & IFF_NOFAILOVER); 811 assert(pii->pii_af == AF_INET || pii->pii_af == AF_INET6); 812 813 /* 814 * If the socket is already bound, close pii_probe_sock 815 */ 816 if (pii->pii_probe_sock != -1) 817 close_probe_socket(pii, _B_TRUE); 818 819 /* 820 * If the phyint is not part of a named group and track_all_phyints is 821 * false, simply return. 822 */ 823 pg = pii->pii_phyint->pi_group; 824 if (pg == phyint_anongroup && !track_all_phyints) { 825 if (debug & D_PHYINT) 826 logdebug("phyint_inst_sockinit: no group\n"); 827 return (_B_FALSE); 828 } 829 830 /* 831 * Initialize the socket by calling the protocol specific function. 832 * If it succeeds, add the socket to the poll list. 833 */ 834 if (pii->pii_af == AF_INET6) 835 success = phyint_inst_v6_sockinit(pii); 836 else 837 success = phyint_inst_v4_sockinit(pii); 838 839 if (success && (poll_add(pii->pii_probe_sock) == 0)) 840 return (_B_TRUE); 841 842 /* Something failed, cleanup and return false */ 843 if (pii->pii_probe_sock != -1) 844 close_probe_socket(pii, _B_FALSE); 845 846 return (_B_FALSE); 847 } 848 849 /* 850 * IPv6 specific part in initializing the pii_probe_sock. This socket is 851 * used to send/receive ICMPv6 probe packets. 852 */ 853 static boolean_t 854 phyint_inst_v6_sockinit(struct phyint_instance *pii) 855 { 856 icmp6_filter_t filter; 857 int hopcount = 1; 858 int off = 0; 859 int on = 1; 860 struct sockaddr_in6 testaddr; 861 862 /* 863 * Open a raw socket with ICMPv6 protocol. 864 * 865 * Use IPV6_BOUND_IF to make sure that probes are sent and received on 866 * the specified phyint only. Bind to the test address to ensure that 867 * the responses are sent to the specified phyint. 868 * 869 * Set the hopcount to 1 so that probe packets are not routed. 870 * Disable multicast loopback. Set the receive filter to 871 * receive only ICMPv6 echo replies. 872 */ 873 pii->pii_probe_sock = socket(pii->pii_af, SOCK_RAW, IPPROTO_ICMPV6); 874 if (pii->pii_probe_sock < 0) { 875 logperror_pii(pii, "phyint_inst_v6_sockinit: socket"); 876 return (_B_FALSE); 877 } 878 879 bzero(&testaddr, sizeof (testaddr)); 880 testaddr.sin6_family = AF_INET6; 881 testaddr.sin6_port = 0; 882 testaddr.sin6_addr = pii->pii_probe_logint->li_addr; 883 884 if (bind(pii->pii_probe_sock, (struct sockaddr *)&testaddr, 885 sizeof (testaddr)) < 0) { 886 logperror_pii(pii, "phyint_inst_v6_sockinit: IPv6 bind"); 887 return (_B_FALSE); 888 } 889 890 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_IF, 891 (char *)&pii->pii_ifindex, sizeof (uint_t)) < 0) { 892 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 893 " IPV6_MULTICAST_IF"); 894 return (_B_FALSE); 895 } 896 897 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_BOUND_IF, 898 &pii->pii_ifindex, sizeof (uint_t)) < 0) { 899 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 900 " IPV6_BOUND_IF"); 901 return (_B_FALSE); 902 } 903 904 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 905 (char *)&hopcount, sizeof (hopcount)) < 0) { 906 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 907 " IPV6_UNICAST_HOPS"); 908 return (_B_FALSE); 909 } 910 911 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, 912 (char *)&hopcount, sizeof (hopcount)) < 0) { 913 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 914 " IPV6_MULTICAST_HOPS"); 915 return (_B_FALSE); 916 } 917 918 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, 919 (char *)&off, sizeof (off)) < 0) { 920 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 921 " IPV6_MULTICAST_LOOP"); 922 return (_B_FALSE); 923 } 924 925 /* 926 * Filter out so that we only receive ICMP echo replies 927 */ 928 ICMP6_FILTER_SETBLOCKALL(&filter); 929 ICMP6_FILTER_SETPASS(ICMP6_ECHO_REPLY, &filter); 930 931 if (setsockopt(pii->pii_probe_sock, IPPROTO_ICMPV6, ICMP6_FILTER, 932 (char *)&filter, sizeof (filter)) < 0) { 933 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 934 " ICMP6_FILTER"); 935 return (_B_FALSE); 936 } 937 938 /* Enable receipt of hoplimit */ 939 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, 940 &on, sizeof (on)) < 0) { 941 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 942 " IPV6_RECVHOPLIMIT"); 943 return (_B_FALSE); 944 } 945 946 /* Enable receipt of timestamp */ 947 if (setsockopt(pii->pii_probe_sock, SOL_SOCKET, SO_TIMESTAMP, 948 &on, sizeof (on)) < 0) { 949 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 950 " SO_TIMESTAMP"); 951 return (_B_FALSE); 952 } 953 954 return (_B_TRUE); 955 } 956 957 /* 958 * IPv4 specific part in initializing the pii_probe_sock. This socket is 959 * used to send/receive ICMPv4 probe packets. 960 */ 961 static boolean_t 962 phyint_inst_v4_sockinit(struct phyint_instance *pii) 963 { 964 struct sockaddr_in testaddr; 965 char char_off = 0; 966 int ttl = 1; 967 char char_ttl = 1; 968 int on = 1; 969 970 /* 971 * Open a raw socket with ICMPv4 protocol. 972 * 973 * Use IP_BOUND_IF to make sure that probes are sent and received on 974 * the specified phyint only. Bind to the test address to ensure that 975 * the responses are sent to the specified phyint. 976 * 977 * Set the ttl to 1 so that probe packets are not routed. 978 * Disable multicast loopback. Enable receipt of timestamp. 979 */ 980 pii->pii_probe_sock = socket(pii->pii_af, SOCK_RAW, IPPROTO_ICMP); 981 if (pii->pii_probe_sock < 0) { 982 logperror_pii(pii, "phyint_inst_v4_sockinit: socket"); 983 return (_B_FALSE); 984 } 985 986 bzero(&testaddr, sizeof (testaddr)); 987 testaddr.sin_family = AF_INET; 988 testaddr.sin_port = 0; 989 IN6_V4MAPPED_TO_INADDR(&pii->pii_probe_logint->li_addr, 990 &testaddr.sin_addr); 991 992 if (bind(pii->pii_probe_sock, (struct sockaddr *)&testaddr, 993 sizeof (testaddr)) < 0) { 994 logperror_pii(pii, "phyint_inst_v4_sockinit: IPv4 bind"); 995 return (_B_FALSE); 996 } 997 998 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_BOUND_IF, 999 &pii->pii_ifindex, sizeof (uint_t)) < 0) { 1000 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1001 " IP_BOUND_IF"); 1002 return (_B_FALSE); 1003 } 1004 1005 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_IF, 1006 (char *)&testaddr.sin_addr, sizeof (struct in_addr)) < 0) { 1007 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1008 " IP_MULTICAST_IF"); 1009 return (_B_FALSE); 1010 } 1011 1012 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_TTL, 1013 (char *)&ttl, sizeof (ttl)) < 0) { 1014 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1015 " IP_TTL"); 1016 return (_B_FALSE); 1017 } 1018 1019 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_LOOP, 1020 (char *)&char_off, sizeof (char_off)) == -1) { 1021 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1022 " IP_MULTICAST_LOOP"); 1023 return (_B_FALSE); 1024 } 1025 1026 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_TTL, 1027 (char *)&char_ttl, sizeof (char_ttl)) == -1) { 1028 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1029 " IP_MULTICAST_TTL"); 1030 return (_B_FALSE); 1031 } 1032 1033 if (setsockopt(pii->pii_probe_sock, SOL_SOCKET, SO_TIMESTAMP, &on, 1034 sizeof (on)) < 0) { 1035 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1036 " SO_TIMESTAMP"); 1037 return (_B_FALSE); 1038 } 1039 1040 return (_B_TRUE); 1041 } 1042 1043 /* 1044 * Remove the phyint group from the list of 'all phyint groups' 1045 * and free it. 1046 */ 1047 void 1048 phyint_group_delete(struct phyint_group *pg) 1049 { 1050 /* 1051 * The anonymous group always exists, even when empty. 1052 */ 1053 if (pg == phyint_anongroup) 1054 return; 1055 1056 if (debug & D_PHYINT) 1057 logdebug("phyint_group_delete('%s')\n", pg->pg_name); 1058 1059 /* 1060 * The phyint group must be empty, and must not have any phyints. 1061 * The phyint group must be in the list of all phyint groups 1062 */ 1063 assert(pg->pg_phyint == NULL); 1064 assert(phyint_groups == pg || pg->pg_prev != NULL); 1065 1066 if (pg->pg_prev != NULL) 1067 pg->pg_prev->pg_next = pg->pg_next; 1068 else 1069 phyint_groups = pg->pg_next; 1070 1071 if (pg->pg_next != NULL) 1072 pg->pg_next->pg_prev = pg->pg_prev; 1073 1074 pg->pg_next = NULL; 1075 pg->pg_prev = NULL; 1076 1077 phyint_grouplistsig++; 1078 (void) phyint_group_change_event(pg, IPMP_GROUP_REMOVE); 1079 1080 addrlist_free(&pg->pg_addrs); 1081 free(pg); 1082 } 1083 1084 /* 1085 * Refresh the state of `pg' based on its current members. 1086 */ 1087 void 1088 phyint_group_refresh_state(struct phyint_group *pg) 1089 { 1090 enum pg_state state; 1091 enum pg_state origstate = pg->pg_state; 1092 struct phyint *pi, *usablepi; 1093 uint_t nif = 0, nusable = 0; 1094 1095 /* 1096 * Anonymous groups never change state. 1097 */ 1098 if (pg == phyint_anongroup) 1099 return; 1100 1101 for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) { 1102 nif++; 1103 if (phyint_is_usable(pi)) { 1104 nusable++; 1105 usablepi = pi; 1106 } 1107 } 1108 1109 if (nusable == 0) 1110 state = PG_FAILED; 1111 else if (nif == nusable) 1112 state = PG_OK; 1113 else 1114 state = PG_DEGRADED; 1115 1116 phyint_group_chstate(pg, state); 1117 1118 /* 1119 * If we're shutting down, skip logging messages since otherwise our 1120 * shutdown housecleaning will make us report that groups are unusable. 1121 */ 1122 if (cleanup_started) 1123 return; 1124 1125 /* 1126 * NOTE: We use pg_failmsg_printed rather than origstate since 1127 * otherwise at startup we'll log a "now usable" message when the 1128 * first usable phyint is added to an empty group. 1129 */ 1130 if (state != PG_FAILED && pg->pg_failmsg_printed) { 1131 assert(origstate == PG_FAILED); 1132 logerr("At least 1 IP interface (%s) in group %s is now " 1133 "usable\n", usablepi->pi_name, pg->pg_name); 1134 pg->pg_failmsg_printed = _B_FALSE; 1135 } else if (origstate != PG_FAILED && state == PG_FAILED) { 1136 logerr("All IP interfaces in group %s are now unusable\n", 1137 pg->pg_name); 1138 pg->pg_failmsg_printed = _B_TRUE; 1139 } 1140 } 1141 1142 /* 1143 * Extract information from the kernel about the desired phyint. 1144 * Look only for properties of the phyint and not properties of logints. 1145 * Take appropriate action on the changes. 1146 * Return codes: 1147 * PI_OK 1148 * The phyint exists in the kernel and matches our knowledge 1149 * of the phyint. 1150 * PI_DELETED 1151 * The phyint has vanished in the kernel. 1152 * PI_IFINDEX_CHANGED 1153 * The phyint's interface index has changed. 1154 * Ask the caller to delete and recreate the phyint. 1155 * PI_IOCTL_ERROR 1156 * Some ioctl error. Don't change anything. 1157 * PI_GROUP_CHANGED 1158 * The phyint has changed group. 1159 */ 1160 int 1161 phyint_inst_update_from_k(struct phyint_instance *pii) 1162 { 1163 struct lifreq lifr; 1164 int ifsock; 1165 struct phyint *pi; 1166 1167 pi = pii->pii_phyint; 1168 1169 if (debug & D_PHYINT) { 1170 logdebug("phyint_inst_update_from_k(%s %s)\n", 1171 AF_STR(pii->pii_af), pi->pi_name); 1172 } 1173 1174 /* 1175 * Get the ifindex from the kernel, for comparison with the 1176 * value in our tables. 1177 */ 1178 (void) strncpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name)); 1179 lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0'; 1180 1181 ifsock = (pii->pii_af == AF_INET) ? ifsock_v4 : ifsock_v6; 1182 if (ioctl(ifsock, SIOCGLIFINDEX, &lifr) < 0) { 1183 if (errno == ENXIO) { 1184 return (PI_DELETED); 1185 } else { 1186 logperror_pii(pii, "phyint_inst_update_from_k:" 1187 " ioctl (get lifindex)"); 1188 return (PI_IOCTL_ERROR); 1189 } 1190 } 1191 1192 if (lifr.lifr_index != pi->pi_ifindex) { 1193 /* 1194 * The index has changed. Most likely the interface has 1195 * been unplumbed and replumbed. Ask the caller to take 1196 * appropriate action. 1197 */ 1198 if (debug & D_PHYINT) { 1199 logdebug("phyint_inst_update_from_k:" 1200 " old index %d new index %d\n", 1201 pi->pi_ifindex, lifr.lifr_index); 1202 } 1203 return (PI_IFINDEX_CHANGED); 1204 } 1205 1206 /* 1207 * Get the group name from the kernel, for comparison with 1208 * the value in our tables. 1209 */ 1210 if (ioctl(ifsock, SIOCGLIFGROUPNAME, &lifr) < 0) { 1211 if (errno == ENXIO) { 1212 return (PI_DELETED); 1213 } else { 1214 logperror_pii(pii, "phyint_inst_update_from_k:" 1215 " ioctl (get groupname)"); 1216 return (PI_IOCTL_ERROR); 1217 } 1218 } 1219 1220 /* 1221 * If the phyint has changed group i.e. if the phyint group name 1222 * returned by the kernel is different, ask the caller to delete 1223 * and recreate the phyint in the right group 1224 */ 1225 if (strcmp(lifr.lifr_groupname, pi->pi_group->pg_name) != 0) { 1226 /* Groupname has changed */ 1227 if (debug & D_PHYINT) { 1228 logdebug("phyint_inst_update_from_k:" 1229 " groupname change\n"); 1230 } 1231 return (PI_GROUP_CHANGED); 1232 } 1233 1234 /* 1235 * Get the current phyint flags from the kernel, and determine what 1236 * flags have changed by comparing against our tables. Note that the 1237 * IFF_INACTIVE processing in initifs() relies on this call to ensure 1238 * that IFF_INACTIVE is really still set on the interface. 1239 */ 1240 if (ioctl(ifsock, SIOCGLIFFLAGS, &lifr) < 0) { 1241 if (errno == ENXIO) { 1242 return (PI_DELETED); 1243 } else { 1244 logperror_pii(pii, "phyint_inst_update_from_k: " 1245 " ioctl (get flags)"); 1246 return (PI_IOCTL_ERROR); 1247 } 1248 } 1249 1250 pi->pi_flags = PHYINT_FLAGS(lifr.lifr_flags); 1251 if (pi->pi_v4 != NULL) 1252 pi->pi_v4->pii_flags = pi->pi_flags; 1253 if (pi->pi_v6 != NULL) 1254 pi->pi_v6->pii_flags = pi->pi_flags; 1255 1256 /* 1257 * Make sure the IFF_FAILED flag is set if and only if we think 1258 * the interface should be failed. 1259 */ 1260 if (pi->pi_flags & IFF_FAILED) { 1261 if (pi->pi_state == PI_RUNNING) 1262 (void) change_pif_flags(pi, 0, IFF_FAILED); 1263 } else { 1264 if (pi->pi_state == PI_FAILED) 1265 (void) change_pif_flags(pi, IFF_FAILED, IFF_INACTIVE); 1266 } 1267 1268 /* No change in phyint status */ 1269 return (PI_OK); 1270 } 1271 1272 /* 1273 * Delete the phyint. Remove it from the list of all phyints, and the 1274 * list of phyint group members. 1275 */ 1276 static void 1277 phyint_delete(struct phyint *pi) 1278 { 1279 struct phyint *pi2; 1280 struct phyint_group *pg = pi->pi_group; 1281 1282 if (debug & D_PHYINT) 1283 logdebug("phyint_delete(%s)\n", pi->pi_name); 1284 1285 /* Both IPv4 and IPv6 phyint instances must have been deleted. */ 1286 assert(pi->pi_v4 == NULL && pi->pi_v6 == NULL); 1287 1288 /* 1289 * The phyint must belong to a group. 1290 */ 1291 assert(pg->pg_phyint == pi || pi->pi_pgprev != NULL); 1292 1293 /* The phyint must be in the list of all phyints */ 1294 assert(phyints == pi || pi->pi_prev != NULL); 1295 1296 /* Remove the phyint from the phyint group list */ 1297 pg->pg_sig++; 1298 (void) phyint_group_member_event(pg, pi, IPMP_IF_REMOVE); 1299 1300 if (pi->pi_pgprev == NULL) { 1301 /* Phyint is the 1st in the phyint group list */ 1302 pg->pg_phyint = pi->pi_pgnext; 1303 } else { 1304 pi->pi_pgprev->pi_pgnext = pi->pi_pgnext; 1305 } 1306 if (pi->pi_pgnext != NULL) 1307 pi->pi_pgnext->pi_pgprev = pi->pi_pgprev; 1308 pi->pi_pgnext = NULL; 1309 pi->pi_pgprev = NULL; 1310 1311 /* Refresh the group state now that this phyint has been removed */ 1312 phyint_group_refresh_state(pg); 1313 1314 /* Remove the phyint from the global list of phyints */ 1315 if (pi->pi_prev == NULL) { 1316 /* Phyint is the 1st in the list */ 1317 phyints = pi->pi_next; 1318 } else { 1319 pi->pi_prev->pi_next = pi->pi_next; 1320 } 1321 if (pi->pi_next != NULL) 1322 pi->pi_next->pi_prev = pi->pi_prev; 1323 pi->pi_next = NULL; 1324 pi->pi_prev = NULL; 1325 1326 /* 1327 * See if another phyint in the group had been offlined because 1328 * it was a dup of `pi' -- and if so, online it. 1329 */ 1330 if (!pi->pi_hwaddrdup && 1331 (pi2 = phyint_lookup_hwaddr(pi, _B_FALSE)) != NULL) { 1332 assert(pi2->pi_hwaddrdup); 1333 (void) phyint_undo_offline(pi2); 1334 } 1335 phyint_link_close(pi); 1336 free(pi); 1337 } 1338 1339 /* 1340 * Offline phyint `pi' if at least `minred' usable interfaces remain in the 1341 * group. Returns an IPMP error code. 1342 */ 1343 int 1344 phyint_offline(struct phyint *pi, uint_t minred) 1345 { 1346 boolean_t was_active; 1347 unsigned int nusable = 0; 1348 struct phyint *pi2; 1349 struct phyint_group *pg = pi->pi_group; 1350 1351 /* 1352 * Verify that enough usable interfaces in the group would remain. 1353 * As a special case, if the group has failed, allow any non-offline 1354 * phyints to be offlined. 1355 */ 1356 if (pg != phyint_anongroup) { 1357 for (pi2 = pg->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) { 1358 if (pi2 == pi) 1359 continue; 1360 if (phyint_is_usable(pi2) || 1361 (GROUP_FAILED(pg) && pi2->pi_state != PI_OFFLINE)) 1362 nusable++; 1363 } 1364 } 1365 if (nusable < minred) 1366 return (IPMP_EMINRED); 1367 1368 was_active = ((pi->pi_flags & IFF_INACTIVE) == 0); 1369 1370 if (!change_pif_flags(pi, IFF_OFFLINE, IFF_INACTIVE)) 1371 return (IPMP_FAILURE); 1372 1373 /* 1374 * The interface is now offline, so stop probing it. Note that 1375 * if_mpadm(1M) will down the test addresses, after receiving a 1376 * success reply from us. The routing socket message will then make us 1377 * close the socket used for sending probes. But it is more logical 1378 * that an offlined interface must not be probed, even if it has test 1379 * addresses. 1380 * 1381 * NOTE: stop_probing() also sets PI_OFFLINE. 1382 */ 1383 stop_probing(pi); 1384 1385 /* 1386 * If we're offlining the phyint because it has a duplicate hardware 1387 * address, print a warning -- and leave the link open so that we can 1388 * be notified of hardware address changes that make it usable again. 1389 * Otherwise, close the link so that we won't prevent a detach. 1390 */ 1391 if (pi->pi_hwaddrdup) { 1392 logerr("IP interface %s has a hardware address which is not " 1393 "unique in group %s; offlining\n", pi->pi_name, 1394 pg->pg_name); 1395 } else { 1396 phyint_link_close(pi); 1397 } 1398 1399 /* 1400 * If this phyint was preventing another phyint with a duplicate 1401 * hardware address from being online, bring that one online now. 1402 */ 1403 if (!pi->pi_hwaddrdup && 1404 (pi2 = phyint_lookup_hwaddr(pi, _B_FALSE)) != NULL) { 1405 assert(pi2->pi_hwaddrdup); 1406 (void) phyint_undo_offline(pi2); 1407 } 1408 1409 /* 1410 * If this interface was active, try to activate another INACTIVE 1411 * interface in the group. 1412 */ 1413 if (was_active) 1414 phyint_activate_another(pi); 1415 1416 return (IPMP_SUCCESS); 1417 } 1418 1419 /* 1420 * Undo a previous offline of `pi'. Returns an IPMP error code. 1421 */ 1422 int 1423 phyint_undo_offline(struct phyint *pi) 1424 { 1425 if (pi->pi_state != PI_OFFLINE) { 1426 errno = EINVAL; 1427 return (IPMP_FAILURE); 1428 } 1429 1430 /* 1431 * If necessary, reinitialize our link information and verify that its 1432 * hardware address is still unique across the group. 1433 */ 1434 if (pi->pi_dh == NULL && !phyint_link_init(pi)) { 1435 errno = EIO; 1436 return (IPMP_FAILURE); 1437 } 1438 1439 if (phyint_lookup_hwaddr(pi, _B_TRUE) != NULL) { 1440 pi->pi_hwaddrdup = _B_TRUE; 1441 return (IPMP_EHWADDRDUP); 1442 } 1443 1444 if (pi->pi_hwaddrdup) { 1445 logerr("IP interface %s now has a unique hardware address in " 1446 "group %s; onlining\n", pi->pi_name, pi->pi_group->pg_name); 1447 pi->pi_hwaddrdup = _B_FALSE; 1448 } 1449 1450 if (!change_pif_flags(pi, 0, IFF_OFFLINE)) 1451 return (IPMP_FAILURE); 1452 1453 /* 1454 * While the interface was offline, it may have failed (e.g. the link 1455 * may have gone down). phyint_inst_check_for_failure() will have 1456 * already set pi_flags with IFF_FAILED, so we can use that to decide 1457 * whether the phyint should transition to running. Note that after 1458 * we transition to running, we will start sending probes again (if 1459 * test addresses are configured), which may also reveal that the 1460 * interface is in fact failed. 1461 */ 1462 if (pi->pi_flags & IFF_FAILED) { 1463 phyint_chstate(pi, PI_FAILED); 1464 } else { 1465 /* calls phyint_chstate() */ 1466 phyint_transition_to_running(pi); 1467 } 1468 1469 /* 1470 * Give the requestor time to configure test addresses before 1471 * complaining that they're missing. 1472 */ 1473 pi->pi_taddrthresh = getcurrentsec() + TESTADDR_CONF_TIME; 1474 1475 return (IPMP_SUCCESS); 1476 } 1477 1478 /* 1479 * Delete (unlink and free), the phyint instance. 1480 */ 1481 void 1482 phyint_inst_delete(struct phyint_instance *pii) 1483 { 1484 struct phyint *pi = pii->pii_phyint; 1485 1486 assert(pi != NULL); 1487 1488 if (debug & D_PHYINT) { 1489 logdebug("phyint_inst_delete(%s %s)\n", 1490 AF_STR(pii->pii_af), pi->pi_name); 1491 } 1492 1493 /* 1494 * If the phyint instance has associated probe targets 1495 * delete all the targets 1496 */ 1497 while (pii->pii_targets != NULL) 1498 target_delete(pii->pii_targets); 1499 1500 /* 1501 * Delete all the logints associated with this phyint 1502 * instance. 1503 */ 1504 while (pii->pii_logint != NULL) 1505 logint_delete(pii->pii_logint); 1506 1507 /* 1508 * Close the socket used to send probes to targets from this phyint. 1509 */ 1510 if (pii->pii_probe_sock != -1) 1511 close_probe_socket(pii, _B_TRUE); 1512 1513 /* 1514 * Phyint instance must be in the list of all phyint instances. 1515 * Remove phyint instance from the global list of phyint instances. 1516 */ 1517 assert(phyint_instances == pii || pii->pii_prev != NULL); 1518 if (pii->pii_prev == NULL) { 1519 /* Phyint is the 1st in the list */ 1520 phyint_instances = pii->pii_next; 1521 } else { 1522 pii->pii_prev->pii_next = pii->pii_next; 1523 } 1524 if (pii->pii_next != NULL) 1525 pii->pii_next->pii_prev = pii->pii_prev; 1526 pii->pii_next = NULL; 1527 pii->pii_prev = NULL; 1528 1529 /* 1530 * Reset the phyint instance pointer in the phyint. 1531 * If this is the last phyint instance (being deleted) on this 1532 * phyint, then delete the phyint. 1533 */ 1534 if (pii->pii_af == AF_INET) 1535 pi->pi_v4 = NULL; 1536 else 1537 pi->pi_v6 = NULL; 1538 1539 if (pi->pi_v4 == NULL && pi->pi_v6 == NULL) 1540 phyint_delete(pi); 1541 1542 free(pii); 1543 } 1544 1545 static void 1546 phyint_inst_print(struct phyint_instance *pii) 1547 { 1548 struct logint *li; 1549 struct target *tg; 1550 char abuf[INET6_ADDRSTRLEN]; 1551 int most_recent; 1552 int i; 1553 1554 if (pii->pii_phyint == NULL) { 1555 logdebug("pii->pi_phyint NULL can't print\n"); 1556 return; 1557 } 1558 1559 logdebug("\nPhyint instance: %s %s index %u state %x flags %llx " 1560 "sock %x in_use %d\n", 1561 AF_STR(pii->pii_af), pii->pii_name, pii->pii_ifindex, 1562 pii->pii_state, pii->pii_phyint->pi_flags, pii->pii_probe_sock, 1563 pii->pii_in_use); 1564 1565 for (li = pii->pii_logint; li != NULL; li = li->li_next) 1566 logint_print(li); 1567 1568 logdebug("\n"); 1569 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) 1570 target_print(tg); 1571 1572 if (pii->pii_targets == NULL) 1573 logdebug("pi_targets NULL\n"); 1574 1575 if (pii->pii_target_next != NULL) { 1576 logdebug("pi_target_next %s %s\n", AF_STR(pii->pii_af), 1577 pr_addr(pii->pii_af, pii->pii_target_next->tg_address, 1578 abuf, sizeof (abuf))); 1579 } else { 1580 logdebug("pi_target_next NULL\n"); 1581 } 1582 1583 if (pii->pii_rtt_target_next != NULL) { 1584 logdebug("pi_rtt_target_next %s %s\n", AF_STR(pii->pii_af), 1585 pr_addr(pii->pii_af, pii->pii_rtt_target_next->tg_address, 1586 abuf, sizeof (abuf))); 1587 } else { 1588 logdebug("pi_rtt_target_next NULL\n"); 1589 } 1590 1591 if (pii->pii_targets != NULL) { 1592 most_recent = PROBE_INDEX_PREV(pii->pii_probe_next); 1593 1594 i = most_recent; 1595 do { 1596 if (pii->pii_probes[i].pr_target != NULL) { 1597 logdebug("#%d target %s ", i, 1598 pr_addr(pii->pii_af, 1599 pii->pii_probes[i].pr_target->tg_address, 1600 abuf, sizeof (abuf))); 1601 } else { 1602 logdebug("#%d target NULL ", i); 1603 } 1604 logdebug("time_start %lld status %d " 1605 "time_ackproc %lld time_lost %u", 1606 pii->pii_probes[i].pr_hrtime_start, 1607 pii->pii_probes[i].pr_status, 1608 pii->pii_probes[i].pr_hrtime_ackproc, 1609 pii->pii_probes[i].pr_time_lost); 1610 i = PROBE_INDEX_PREV(i); 1611 } while (i != most_recent); 1612 } 1613 } 1614 1615 /* 1616 * Lookup a logint based on the logical interface name, on the given 1617 * phyint instance. 1618 */ 1619 static struct logint * 1620 logint_lookup(struct phyint_instance *pii, char *name) 1621 { 1622 struct logint *li; 1623 1624 if (debug & D_LOGINT) { 1625 logdebug("logint_lookup(%s, %s)\n", 1626 AF_STR(pii->pii_af), name); 1627 } 1628 1629 for (li = pii->pii_logint; li != NULL; li = li->li_next) { 1630 if (strncmp(name, li->li_name, sizeof (li->li_name)) == 0) 1631 break; 1632 } 1633 return (li); 1634 } 1635 1636 /* 1637 * Insert a logint at the head of the list of logints of the given 1638 * phyint instance 1639 */ 1640 static void 1641 logint_insert(struct phyint_instance *pii, struct logint *li) 1642 { 1643 li->li_next = pii->pii_logint; 1644 li->li_prev = NULL; 1645 if (pii->pii_logint != NULL) 1646 pii->pii_logint->li_prev = li; 1647 pii->pii_logint = li; 1648 li->li_phyint_inst = pii; 1649 } 1650 1651 /* 1652 * Create a new named logint, on the specified phyint instance. 1653 */ 1654 static struct logint * 1655 logint_create(struct phyint_instance *pii, char *name) 1656 { 1657 struct logint *li; 1658 1659 if (debug & D_LOGINT) { 1660 logdebug("logint_create(%s %s %s)\n", 1661 AF_STR(pii->pii_af), pii->pii_name, name); 1662 } 1663 1664 li = calloc(1, sizeof (struct logint)); 1665 if (li == NULL) { 1666 logperror("logint_create: calloc"); 1667 return (NULL); 1668 } 1669 1670 (void) strncpy(li->li_name, name, sizeof (li->li_name)); 1671 li->li_name[sizeof (li->li_name) - 1] = '\0'; 1672 logint_insert(pii, li); 1673 return (li); 1674 } 1675 1676 /* 1677 * Initialize the logint based on the data returned by the kernel. 1678 */ 1679 void 1680 logint_init_from_k(struct phyint_instance *pii, char *li_name) 1681 { 1682 int ifsock; 1683 uint64_t flags; 1684 uint64_t saved_flags; 1685 struct logint *li; 1686 struct lifreq lifr; 1687 struct in6_addr test_subnet; 1688 struct in6_addr testaddr; 1689 int test_subnet_len; 1690 struct sockaddr_in6 *sin6; 1691 struct sockaddr_in *sin; 1692 char abuf[INET6_ADDRSTRLEN]; 1693 boolean_t ptp = _B_FALSE; 1694 struct in6_addr tgaddr; 1695 1696 if (debug & D_LOGINT) { 1697 logdebug("logint_init_from_k(%s %s)\n", 1698 AF_STR(pii->pii_af), li_name); 1699 } 1700 1701 /* Get the socket for doing ioctls */ 1702 ifsock = (pii->pii_af == AF_INET) ? ifsock_v4 : ifsock_v6; 1703 1704 /* 1705 * Get the flags from the kernel. Also serves as a check whether 1706 * the logical still exists. If it doesn't exist, no need to proceed 1707 * any further. li_in_use will make the caller clean up the logint 1708 */ 1709 (void) strncpy(lifr.lifr_name, li_name, sizeof (lifr.lifr_name)); 1710 lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0'; 1711 if (ioctl(ifsock, SIOCGLIFFLAGS, (char *)&lifr) < 0) { 1712 /* Interface may have vanished */ 1713 if (errno != ENXIO) { 1714 logperror_pii(pii, "logint_init_from_k: " 1715 "ioctl (get flags)"); 1716 } 1717 return; 1718 } 1719 1720 flags = lifr.lifr_flags; 1721 1722 /* 1723 * Verified the logint exists. Now lookup the logint in our tables. 1724 * If it does not exist, create a new logint. 1725 */ 1726 li = logint_lookup(pii, li_name); 1727 if (li == NULL) { 1728 li = logint_create(pii, li_name); 1729 if (li == NULL) { 1730 /* 1731 * Pretend the interface does not exist 1732 * in the kernel 1733 */ 1734 return; 1735 } 1736 } 1737 1738 /* 1739 * Update li->li_flags with the new flags, after saving the old 1740 * value. This is used later to check what flags has changed and 1741 * take any action 1742 */ 1743 saved_flags = li->li_flags; 1744 li->li_flags = flags; 1745 1746 /* 1747 * Get the address, prefix, prefixlength and update the logint. 1748 * Check if anything has changed. If the logint used for the 1749 * test address has changed, take suitable action. 1750 */ 1751 if (ioctl(ifsock, SIOCGLIFADDR, (char *)&lifr) < 0) { 1752 /* Interface may have vanished */ 1753 if (errno != ENXIO) { 1754 logperror_li(li, "logint_init_from_k: (get addr)"); 1755 } 1756 goto error; 1757 } 1758 1759 if (pii->pii_af == AF_INET) { 1760 sin = (struct sockaddr_in *)&lifr.lifr_addr; 1761 IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &testaddr); 1762 } else { 1763 sin6 = (struct sockaddr_in6 *)&lifr.lifr_addr; 1764 testaddr = sin6->sin6_addr; 1765 } 1766 1767 if (ioctl(ifsock, SIOCGLIFSUBNET, (char *)&lifr) < 0) { 1768 /* Interface may have vanished */ 1769 if (errno != ENXIO) 1770 logperror_li(li, "logint_init_from_k: (get subnet)"); 1771 goto error; 1772 } 1773 if (lifr.lifr_subnet.ss_family == AF_INET6) { 1774 sin6 = (struct sockaddr_in6 *)&lifr.lifr_subnet; 1775 test_subnet = sin6->sin6_addr; 1776 test_subnet_len = lifr.lifr_addrlen; 1777 } else { 1778 sin = (struct sockaddr_in *)&lifr.lifr_subnet; 1779 IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &test_subnet); 1780 test_subnet_len = lifr.lifr_addrlen + (IPV6_ABITS - IP_ABITS); 1781 } 1782 1783 /* 1784 * If this is the logint corresponding to the test address used for 1785 * sending probes, then if anything significant has changed we need to 1786 * determine the test address again. We ignore changes to the 1787 * IFF_FAILED and IFF_RUNNING flags since those happen as a matter of 1788 * course. 1789 */ 1790 if (pii->pii_probe_logint == li) { 1791 if (((li->li_flags ^ saved_flags) & 1792 ~(IFF_FAILED | IFF_RUNNING)) != 0 || 1793 !IN6_ARE_ADDR_EQUAL(&testaddr, &li->li_addr) || 1794 (!ptp && !IN6_ARE_ADDR_EQUAL(&test_subnet, 1795 &li->li_subnet)) || 1796 (!ptp && test_subnet_len != li->li_subnet_len) || 1797 (ptp && !IN6_ARE_ADDR_EQUAL(&tgaddr, &li->li_dstaddr))) { 1798 /* 1799 * Something significant that affects the testaddress 1800 * has changed. Redo the testaddress selection later on 1801 * in select_test_ifs(). For now do the cleanup and 1802 * set pii_probe_logint to NULL. 1803 */ 1804 if (pii->pii_probe_sock != -1) 1805 close_probe_socket(pii, _B_TRUE); 1806 pii->pii_probe_logint = NULL; 1807 } 1808 } 1809 1810 1811 /* Update the logint with the values obtained from the kernel. */ 1812 li->li_addr = testaddr; 1813 li->li_in_use = 1; 1814 if (ptp) { 1815 li->li_dstaddr = tgaddr; 1816 li->li_subnet_len = (pii->pii_af == AF_INET) ? 1817 IP_ABITS : IPV6_ABITS; 1818 } else { 1819 li->li_subnet = test_subnet; 1820 li->li_subnet_len = test_subnet_len; 1821 } 1822 1823 if (debug & D_LOGINT) 1824 logint_print(li); 1825 1826 return; 1827 1828 error: 1829 logerr("logint_init_from_k: IGNORED %s %s %s addr %s\n", 1830 AF_STR(pii->pii_af), pii->pii_name, li->li_name, 1831 pr_addr(pii->pii_af, testaddr, abuf, sizeof (abuf))); 1832 logint_delete(li); 1833 } 1834 1835 /* 1836 * Delete (unlink and free) a logint. 1837 */ 1838 void 1839 logint_delete(struct logint *li) 1840 { 1841 struct phyint_instance *pii; 1842 1843 pii = li->li_phyint_inst; 1844 assert(pii != NULL); 1845 1846 if (debug & D_LOGINT) { 1847 int af; 1848 char abuf[INET6_ADDRSTRLEN]; 1849 1850 af = pii->pii_af; 1851 logdebug("logint_delete(%s %s %s/%u)\n", 1852 AF_STR(af), li->li_name, 1853 pr_addr(af, li->li_addr, abuf, sizeof (abuf)), 1854 li->li_subnet_len); 1855 } 1856 1857 /* logint must be in the list of logints */ 1858 assert(pii->pii_logint == li || li->li_prev != NULL); 1859 1860 /* Remove the logint from the list of logints */ 1861 if (li->li_prev == NULL) { 1862 /* logint is the 1st in the list */ 1863 pii->pii_logint = li->li_next; 1864 } else { 1865 li->li_prev->li_next = li->li_next; 1866 } 1867 if (li->li_next != NULL) 1868 li->li_next->li_prev = li->li_prev; 1869 li->li_next = NULL; 1870 li->li_prev = NULL; 1871 1872 /* 1873 * If this logint is also being used for probing, then close the 1874 * associated socket, if it exists. 1875 */ 1876 if (pii->pii_probe_logint == li) { 1877 if (pii->pii_probe_sock != -1) 1878 close_probe_socket(pii, _B_TRUE); 1879 pii->pii_probe_logint = NULL; 1880 } 1881 1882 free(li); 1883 } 1884 1885 static void 1886 logint_print(struct logint *li) 1887 { 1888 char abuf[INET6_ADDRSTRLEN]; 1889 int af = li->li_phyint_inst->pii_af; 1890 1891 logdebug("logint: %s %s addr %s/%u", AF_STR(af), li->li_name, 1892 pr_addr(af, li->li_addr, abuf, sizeof (abuf)), li->li_subnet_len); 1893 1894 logdebug("\tFlags: %llx in_use %d\n", li->li_flags, li->li_in_use); 1895 } 1896 1897 char * 1898 pr_addr(int af, struct in6_addr addr, char *abuf, int len) 1899 { 1900 struct in_addr addr_v4; 1901 1902 if (af == AF_INET) { 1903 IN6_V4MAPPED_TO_INADDR(&addr, &addr_v4); 1904 (void) inet_ntop(AF_INET, (void *)&addr_v4, abuf, len); 1905 } else { 1906 (void) inet_ntop(AF_INET6, (void *)&addr, abuf, len); 1907 } 1908 return (abuf); 1909 } 1910 1911 /* 1912 * Fill in the sockaddr_storage pointed to by `ssp' with the IP address 1913 * represented by the [`af',`addr'] pair. Needed because in.mpathd internally 1914 * stores all addresses as in6_addrs, but we don't want to expose that. 1915 */ 1916 void 1917 addr2storage(int af, const struct in6_addr *addr, struct sockaddr_storage *ssp) 1918 { 1919 struct sockaddr_in *sinp = (struct sockaddr_in *)ssp; 1920 struct sockaddr_in6 *sin6p = (struct sockaddr_in6 *)ssp; 1921 1922 assert(af == AF_INET || af == AF_INET6); 1923 1924 switch (af) { 1925 case AF_INET: 1926 (void) memset(sinp, 0, sizeof (*sinp)); 1927 sinp->sin_family = AF_INET; 1928 IN6_V4MAPPED_TO_INADDR(addr, &sinp->sin_addr); 1929 break; 1930 case AF_INET6: 1931 (void) memset(sin6p, 0, sizeof (*sin6p)); 1932 sin6p->sin6_family = AF_INET6; 1933 sin6p->sin6_addr = *addr; 1934 break; 1935 } 1936 } 1937 1938 /* Lookup target on its address */ 1939 struct target * 1940 target_lookup(struct phyint_instance *pii, struct in6_addr addr) 1941 { 1942 struct target *tg; 1943 1944 if (debug & D_TARGET) { 1945 char abuf[INET6_ADDRSTRLEN]; 1946 1947 logdebug("target_lookup(%s %s): addr %s\n", 1948 AF_STR(pii->pii_af), pii->pii_name, 1949 pr_addr(pii->pii_af, addr, abuf, sizeof (abuf))); 1950 } 1951 1952 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) { 1953 if (IN6_ARE_ADDR_EQUAL(&tg->tg_address, &addr)) 1954 break; 1955 } 1956 return (tg); 1957 } 1958 1959 /* 1960 * Find and return the next active target, for the next probe. 1961 * If no active targets are available, return NULL. 1962 */ 1963 struct target * 1964 target_next(struct target *tg) 1965 { 1966 struct phyint_instance *pii = tg->tg_phyint_inst; 1967 struct target *marker = tg; 1968 hrtime_t now; 1969 1970 now = gethrtime(); 1971 1972 /* 1973 * Target must be in the list of targets for this phyint 1974 * instance. 1975 */ 1976 assert(pii->pii_targets == tg || tg->tg_prev != NULL); 1977 assert(pii->pii_targets != NULL); 1978 1979 /* Return the next active target */ 1980 do { 1981 /* 1982 * Go to the next target. If we hit the end, 1983 * reset the ptr to the head 1984 */ 1985 tg = tg->tg_next; 1986 if (tg == NULL) 1987 tg = pii->pii_targets; 1988 1989 assert(TG_STATUS_VALID(tg->tg_status)); 1990 1991 switch (tg->tg_status) { 1992 case TG_ACTIVE: 1993 return (tg); 1994 1995 case TG_UNUSED: 1996 assert(pii->pii_targets_are_routers); 1997 if (pii->pii_ntargets < MAX_PROBE_TARGETS) { 1998 /* 1999 * Bubble up the unused target to active 2000 */ 2001 tg->tg_status = TG_ACTIVE; 2002 pii->pii_ntargets++; 2003 return (tg); 2004 } 2005 break; 2006 2007 case TG_SLOW: 2008 assert(pii->pii_targets_are_routers); 2009 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2010 /* 2011 * Bubble up the slow target to unused 2012 */ 2013 tg->tg_status = TG_UNUSED; 2014 } 2015 break; 2016 2017 case TG_DEAD: 2018 assert(pii->pii_targets_are_routers); 2019 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2020 /* 2021 * Bubble up the dead target to slow 2022 */ 2023 tg->tg_status = TG_SLOW; 2024 tg->tg_latime = now; 2025 } 2026 break; 2027 } 2028 2029 } while (tg != marker); 2030 2031 return (NULL); 2032 } 2033 2034 /* 2035 * Select the best available target, that is not already TG_ACTIVE, 2036 * for the caller. The caller will determine whether it wants to 2037 * make the returned target TG_ACTIVE. 2038 * The selection order is as follows. 2039 * 1. pick a TG_UNSED target, if it exists. 2040 * 2. else pick a TG_SLOW target that has recovered, if it exists 2041 * 3. else pick any TG_SLOW target, if it exists 2042 * 4. else pick a TG_DEAD target that has recovered, if it exists 2043 * 5. else pick any TG_DEAD target, if it exists 2044 * 6. else return null 2045 */ 2046 static struct target * 2047 target_select_best(struct phyint_instance *pii) 2048 { 2049 struct target *tg; 2050 struct target *slow = NULL; 2051 struct target *dead = NULL; 2052 struct target *slow_recovered = NULL; 2053 struct target *dead_recovered = NULL; 2054 hrtime_t now; 2055 2056 now = gethrtime(); 2057 2058 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) { 2059 assert(TG_STATUS_VALID(tg->tg_status)); 2060 2061 switch (tg->tg_status) { 2062 case TG_UNUSED: 2063 return (tg); 2064 2065 case TG_SLOW: 2066 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2067 slow_recovered = tg; 2068 /* 2069 * Promote the slow_recovered to unused 2070 */ 2071 tg->tg_status = TG_UNUSED; 2072 } else { 2073 slow = tg; 2074 } 2075 break; 2076 2077 case TG_DEAD: 2078 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2079 dead_recovered = tg; 2080 /* 2081 * Promote the dead_recovered to slow 2082 */ 2083 tg->tg_status = TG_SLOW; 2084 tg->tg_latime = now; 2085 } else { 2086 dead = tg; 2087 } 2088 break; 2089 2090 default: 2091 break; 2092 } 2093 } 2094 2095 if (slow_recovered != NULL) 2096 return (slow_recovered); 2097 else if (slow != NULL) 2098 return (slow); 2099 else if (dead_recovered != NULL) 2100 return (dead_recovered); 2101 else 2102 return (dead); 2103 } 2104 2105 /* 2106 * Some target was deleted. If we don't have even MIN_PROBE_TARGETS 2107 * that are active, pick the next best below. 2108 */ 2109 static void 2110 target_activate_all(struct phyint_instance *pii) 2111 { 2112 struct target *tg; 2113 2114 assert(pii->pii_ntargets == 0); 2115 assert(pii->pii_target_next == NULL); 2116 assert(pii->pii_rtt_target_next == NULL); 2117 assert(pii->pii_targets_are_routers); 2118 2119 while (pii->pii_ntargets < MIN_PROBE_TARGETS) { 2120 tg = target_select_best(pii); 2121 if (tg == NULL) { 2122 /* We are out of targets */ 2123 return; 2124 } 2125 2126 assert(TG_STATUS_VALID(tg->tg_status)); 2127 assert(tg->tg_status != TG_ACTIVE); 2128 tg->tg_status = TG_ACTIVE; 2129 pii->pii_ntargets++; 2130 if (pii->pii_target_next == NULL) { 2131 pii->pii_target_next = tg; 2132 pii->pii_rtt_target_next = tg; 2133 } 2134 } 2135 } 2136 2137 static struct target * 2138 target_first(struct phyint_instance *pii) 2139 { 2140 struct target *tg; 2141 2142 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) { 2143 assert(TG_STATUS_VALID(tg->tg_status)); 2144 if (tg->tg_status == TG_ACTIVE) 2145 break; 2146 } 2147 2148 return (tg); 2149 } 2150 2151 /* 2152 * Create a default target entry. 2153 */ 2154 void 2155 target_create(struct phyint_instance *pii, struct in6_addr addr, 2156 boolean_t is_router) 2157 { 2158 struct target *tg; 2159 struct phyint *pi; 2160 struct logint *li; 2161 2162 if (debug & D_TARGET) { 2163 char abuf[INET6_ADDRSTRLEN]; 2164 2165 logdebug("target_create(%s %s, %s)\n", 2166 AF_STR(pii->pii_af), pii->pii_name, 2167 pr_addr(pii->pii_af, addr, abuf, sizeof (abuf))); 2168 } 2169 2170 /* 2171 * If the test address is not yet initialized, do not add 2172 * any target, since we cannot determine whether the target 2173 * belongs to the same subnet as the test address. 2174 */ 2175 li = pii->pii_probe_logint; 2176 if (li == NULL) 2177 return; 2178 2179 /* 2180 * If there are multiple subnets associated with an interface, then 2181 * add the target to this phyint instance only if it belongs to the 2182 * same subnet as the test address. This assures us that we will 2183 * be able to reach this target through our routing table. 2184 */ 2185 if (!prefix_equal(li->li_subnet, addr, li->li_subnet_len)) 2186 return; 2187 2188 if (pii->pii_targets != NULL) { 2189 assert(pii->pii_ntargets <= MAX_PROBE_TARGETS); 2190 if (is_router) { 2191 if (!pii->pii_targets_are_routers) { 2192 /* 2193 * Prefer router over hosts. Using hosts is a 2194 * fallback mechanism, hence delete all host 2195 * targets. 2196 */ 2197 while (pii->pii_targets != NULL) 2198 target_delete(pii->pii_targets); 2199 } 2200 } else { 2201 /* 2202 * Routers take precedence over hosts. If this 2203 * is a router list and we are trying to add a 2204 * host, just return. If this is a host list 2205 * and if we have sufficient targets, just return 2206 */ 2207 if (pii->pii_targets_are_routers || 2208 pii->pii_ntargets == MAX_PROBE_TARGETS) 2209 return; 2210 } 2211 } 2212 2213 tg = calloc(1, sizeof (struct target)); 2214 if (tg == NULL) { 2215 logperror("target_create: calloc"); 2216 return; 2217 } 2218 2219 tg->tg_phyint_inst = pii; 2220 tg->tg_address = addr; 2221 tg->tg_in_use = 1; 2222 tg->tg_rtt_sa = -1; 2223 tg->tg_num_deferred = 0; 2224 2225 /* 2226 * If this is the first target, set 'pii_targets_are_routers' 2227 * The list of targets is either a list of hosts or list or 2228 * routers, but not a mix. 2229 */ 2230 if (pii->pii_targets == NULL) { 2231 assert(pii->pii_ntargets == 0); 2232 assert(pii->pii_target_next == NULL); 2233 assert(pii->pii_rtt_target_next == NULL); 2234 pii->pii_targets_are_routers = is_router ? 1 : 0; 2235 } 2236 2237 if (pii->pii_ntargets == MAX_PROBE_TARGETS) { 2238 assert(pii->pii_targets_are_routers); 2239 assert(pii->pii_target_next != NULL); 2240 assert(pii->pii_rtt_target_next != NULL); 2241 tg->tg_status = TG_UNUSED; 2242 } else { 2243 if (pii->pii_ntargets == 0) { 2244 assert(pii->pii_target_next == NULL); 2245 pii->pii_target_next = tg; 2246 pii->pii_rtt_target_next = tg; 2247 } 2248 pii->pii_ntargets++; 2249 tg->tg_status = TG_ACTIVE; 2250 } 2251 2252 target_insert(pii, tg); 2253 2254 /* 2255 * Change state to PI_RUNNING if this phyint instance is capable of 2256 * sending and receiving probes -- that is, if we know of at least 1 2257 * target, and this phyint instance is probe-capable. For more 2258 * details, see the phyint state diagram in mpd_probe.c. 2259 */ 2260 pi = pii->pii_phyint; 2261 if (pi->pi_state == PI_NOTARGETS && PROBE_CAPABLE(pii)) { 2262 if (pi->pi_flags & IFF_FAILED) 2263 phyint_chstate(pi, PI_FAILED); 2264 else 2265 phyint_chstate(pi, PI_RUNNING); 2266 } 2267 } 2268 2269 /* 2270 * Add the target address named by `addr' to phyint instance `pii' if it does 2271 * not already exist. If the target is a router, `is_router' should be set to 2272 * B_TRUE. 2273 */ 2274 void 2275 target_add(struct phyint_instance *pii, struct in6_addr addr, 2276 boolean_t is_router) 2277 { 2278 struct target *tg; 2279 2280 if (pii == NULL) 2281 return; 2282 2283 tg = target_lookup(pii, addr); 2284 2285 /* 2286 * If the target does not exist, create it; target_create() will set 2287 * tg_in_use to true. Even if it exists already, if it's a router 2288 * target and we'd previously learned of it through multicast, then we 2289 * need to recreate it as a router target. Otherwise, just set 2290 * tg_in_use to to true so that init_router_targets() won't delete it. 2291 */ 2292 if (tg == NULL || (is_router && !pii->pii_targets_are_routers)) 2293 target_create(pii, addr, is_router); 2294 else if (is_router) 2295 tg->tg_in_use = 1; 2296 } 2297 2298 /* 2299 * Insert target at head of linked list of targets for the associated 2300 * phyint instance 2301 */ 2302 static void 2303 target_insert(struct phyint_instance *pii, struct target *tg) 2304 { 2305 tg->tg_next = pii->pii_targets; 2306 tg->tg_prev = NULL; 2307 if (tg->tg_next != NULL) 2308 tg->tg_next->tg_prev = tg; 2309 pii->pii_targets = tg; 2310 } 2311 2312 /* 2313 * Delete a target (unlink and free). 2314 */ 2315 void 2316 target_delete(struct target *tg) 2317 { 2318 int af; 2319 struct phyint_instance *pii; 2320 struct phyint_instance *pii_other; 2321 2322 pii = tg->tg_phyint_inst; 2323 af = pii->pii_af; 2324 2325 if (debug & D_TARGET) { 2326 char abuf[INET6_ADDRSTRLEN]; 2327 2328 logdebug("target_delete(%s %s, %s)\n", 2329 AF_STR(af), pii->pii_name, 2330 pr_addr(af, tg->tg_address, abuf, sizeof (abuf))); 2331 } 2332 2333 /* 2334 * Target must be in the list of targets for this phyint 2335 * instance. 2336 */ 2337 assert(pii->pii_targets == tg || tg->tg_prev != NULL); 2338 2339 /* 2340 * Reset all references to 'tg' in the probe information 2341 * for this phyint. 2342 */ 2343 reset_pii_probes(pii, tg); 2344 2345 /* 2346 * Remove this target from the list of targets of this 2347 * phyint instance. 2348 */ 2349 if (tg->tg_prev == NULL) { 2350 pii->pii_targets = tg->tg_next; 2351 } else { 2352 tg->tg_prev->tg_next = tg->tg_next; 2353 } 2354 2355 if (tg->tg_next != NULL) 2356 tg->tg_next->tg_prev = tg->tg_prev; 2357 2358 tg->tg_next = NULL; 2359 tg->tg_prev = NULL; 2360 2361 if (tg->tg_status == TG_ACTIVE) 2362 pii->pii_ntargets--; 2363 2364 /* 2365 * Adjust the next target to probe, if it points to 2366 * to the currently deleted target. 2367 */ 2368 if (pii->pii_target_next == tg) 2369 pii->pii_target_next = target_first(pii); 2370 2371 if (pii->pii_rtt_target_next == tg) 2372 pii->pii_rtt_target_next = target_first(pii); 2373 2374 free(tg); 2375 2376 /* 2377 * The number of active targets pii_ntargets == 0 iff 2378 * the next active target pii->pii_target_next == NULL 2379 */ 2380 if (pii->pii_ntargets != 0) { 2381 assert(pii->pii_target_next != NULL); 2382 assert(pii->pii_rtt_target_next != NULL); 2383 assert(pii->pii_target_next->tg_status == TG_ACTIVE); 2384 assert(pii->pii_rtt_target_next->tg_status == TG_ACTIVE); 2385 return; 2386 } 2387 2388 /* At this point, we don't have any active targets. */ 2389 assert(pii->pii_target_next == NULL); 2390 assert(pii->pii_rtt_target_next == NULL); 2391 2392 if (pii->pii_targets_are_routers) { 2393 /* 2394 * Activate any TG_SLOW or TG_DEAD router targets, 2395 * since we don't have any other targets 2396 */ 2397 target_activate_all(pii); 2398 2399 if (pii->pii_ntargets != 0) { 2400 assert(pii->pii_target_next != NULL); 2401 assert(pii->pii_rtt_target_next != NULL); 2402 assert(pii->pii_target_next->tg_status == TG_ACTIVE); 2403 assert(pii->pii_rtt_target_next->tg_status == 2404 TG_ACTIVE); 2405 return; 2406 } 2407 } 2408 2409 /* 2410 * If we still don't have any active targets, the list must 2411 * must be really empty. There aren't even TG_SLOW or TG_DEAD 2412 * targets. Zero out the probe stats since it will not be 2413 * relevant any longer. 2414 */ 2415 assert(pii->pii_targets == NULL); 2416 pii->pii_targets_are_routers = _B_FALSE; 2417 clear_pii_probe_stats(pii); 2418 pii_other = phyint_inst_other(pii); 2419 2420 /* 2421 * If there are no targets on both instances and the interface would 2422 * otherwise be considered PI_RUNNING, go back to PI_NOTARGETS state, 2423 * since we cannot probe this phyint any more. For more details, 2424 * please see phyint state diagram in mpd_probe.c. 2425 */ 2426 if (!PROBE_CAPABLE(pii_other) && LINK_UP(pii->pii_phyint) && 2427 pii->pii_phyint->pi_state != PI_OFFLINE) 2428 phyint_chstate(pii->pii_phyint, PI_NOTARGETS); 2429 } 2430 2431 /* 2432 * Flush the target list of every phyint in the group, if the list 2433 * is a host target list. This is called if group failure is suspected. 2434 * If all targets have failed, multicast will subsequently discover new 2435 * targets. Else it is a group failure. 2436 * Note: This function is a no-op if the list is a router target list. 2437 */ 2438 static void 2439 target_flush_hosts(struct phyint_group *pg) 2440 { 2441 struct phyint *pi; 2442 struct phyint_instance *pii; 2443 2444 if (debug & D_TARGET) 2445 logdebug("target_flush_hosts(%s)\n", pg->pg_name); 2446 2447 for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) { 2448 pii = pi->pi_v4; 2449 if (pii != NULL && !pii->pii_targets_are_routers) { 2450 /* 2451 * Delete all the targets. When the list becomes 2452 * empty, target_delete() will set pii->pii_targets 2453 * to NULL. 2454 */ 2455 while (pii->pii_targets != NULL) 2456 target_delete(pii->pii_targets); 2457 } 2458 pii = pi->pi_v6; 2459 if (pii != NULL && !pii->pii_targets_are_routers) { 2460 /* 2461 * Delete all the targets. When the list becomes 2462 * empty, target_delete() will set pii->pii_targets 2463 * to NULL. 2464 */ 2465 while (pii->pii_targets != NULL) 2466 target_delete(pii->pii_targets); 2467 } 2468 } 2469 } 2470 2471 /* 2472 * Reset all references to 'target' in the probe info, as this target is 2473 * being deleted. The pr_target field is guaranteed to be non-null if 2474 * pr_status is PR_UNACKED. So we change the pr_status to PR_LOST, so that 2475 * pr_target will not be accessed unconditionally. 2476 */ 2477 static void 2478 reset_pii_probes(struct phyint_instance *pii, struct target *tg) 2479 { 2480 int i; 2481 2482 for (i = 0; i < PROBE_STATS_COUNT; i++) { 2483 if (pii->pii_probes[i].pr_target == tg) { 2484 if (pii->pii_probes[i].pr_status == PR_UNACKED) { 2485 probe_chstate(&pii->pii_probes[i], pii, 2486 PR_LOST); 2487 } 2488 pii->pii_probes[i].pr_target = NULL; 2489 } 2490 } 2491 2492 } 2493 2494 /* 2495 * Clear the probe statistics array. 2496 */ 2497 void 2498 clear_pii_probe_stats(struct phyint_instance *pii) 2499 { 2500 bzero(pii->pii_probes, sizeof (struct probe_stats) * PROBE_STATS_COUNT); 2501 /* Reset the next probe index in the probe stats array */ 2502 pii->pii_probe_next = 0; 2503 } 2504 2505 static void 2506 target_print(struct target *tg) 2507 { 2508 char abuf[INET6_ADDRSTRLEN]; 2509 char buf[128]; 2510 char buf2[128]; 2511 int af; 2512 int i; 2513 2514 af = tg->tg_phyint_inst->pii_af; 2515 2516 logdebug("Target on %s %s addr %s\n" 2517 "status %d rtt_sa %lld rtt_sd %lld crtt %d tg_in_use %d\n", 2518 AF_STR(af), tg->tg_phyint_inst->pii_name, 2519 pr_addr(af, tg->tg_address, abuf, sizeof (abuf)), 2520 tg->tg_status, tg->tg_rtt_sa, tg->tg_rtt_sd, 2521 tg->tg_crtt, tg->tg_in_use); 2522 2523 buf[0] = '\0'; 2524 for (i = 0; i < tg->tg_num_deferred; i++) { 2525 (void) snprintf(buf2, sizeof (buf2), " %dms", 2526 tg->tg_deferred[i]); 2527 (void) strlcat(buf, buf2, sizeof (buf)); 2528 } 2529 logdebug("deferred rtts:%s\n", buf); 2530 } 2531 2532 void 2533 phyint_inst_print_all(void) 2534 { 2535 struct phyint_instance *pii; 2536 2537 for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) { 2538 phyint_inst_print(pii); 2539 } 2540 } 2541 2542 /* 2543 * Compare two prefixes that have the same prefix length. 2544 * Fails if the prefix length is unreasonable. 2545 */ 2546 boolean_t 2547 prefix_equal(struct in6_addr p1, struct in6_addr p2, uint_t prefix_len) 2548 { 2549 uchar_t mask; 2550 int j; 2551 2552 if (prefix_len > IPV6_ABITS) 2553 return (_B_FALSE); 2554 2555 for (j = 0; prefix_len > 8; prefix_len -= 8, j++) 2556 if (p1.s6_addr[j] != p2.s6_addr[j]) 2557 return (_B_FALSE); 2558 2559 /* Make the N leftmost bits one */ 2560 mask = 0xff << (8 - prefix_len); 2561 if ((p1.s6_addr[j] & mask) != (p2.s6_addr[j] & mask)) 2562 return (_B_FALSE); 2563 2564 return (_B_TRUE); 2565 } 2566 2567 /* 2568 * Get the number of UP logints on phyint `pi'. 2569 */ 2570 static int 2571 logint_upcount(struct phyint *pi) 2572 { 2573 struct logint *li; 2574 int count = 0; 2575 2576 if (pi->pi_v4 != NULL) { 2577 for (li = pi->pi_v4->pii_logint; li != NULL; li = li->li_next) { 2578 if (li->li_flags & IFF_UP) 2579 count++; 2580 } 2581 } 2582 2583 if (pi->pi_v6 != NULL) { 2584 for (li = pi->pi_v6->pii_logint; li != NULL; li = li->li_next) { 2585 if (li->li_flags & IFF_UP) 2586 count++; 2587 } 2588 } 2589 2590 return (count); 2591 } 2592 2593 /* 2594 * Get the phyint instance with the other (IPv4 / IPv6) protocol 2595 */ 2596 struct phyint_instance * 2597 phyint_inst_other(struct phyint_instance *pii) 2598 { 2599 if (pii->pii_af == AF_INET) 2600 return (pii->pii_phyint->pi_v6); 2601 else 2602 return (pii->pii_phyint->pi_v4); 2603 } 2604 2605 /* 2606 * Check whether a phyint is functioning. 2607 */ 2608 static boolean_t 2609 phyint_is_functioning(struct phyint *pi) 2610 { 2611 if (pi->pi_state == PI_RUNNING) 2612 return (_B_TRUE); 2613 return (pi->pi_state == PI_NOTARGETS && !(pi->pi_flags & IFF_FAILED)); 2614 } 2615 2616 /* 2617 * Check whether a phyint is usable. 2618 */ 2619 static boolean_t 2620 phyint_is_usable(struct phyint *pi) 2621 { 2622 if (logint_upcount(pi) == 0) 2623 return (_B_FALSE); 2624 return (phyint_is_functioning(pi)); 2625 } 2626 2627 /* 2628 * Post an EC_IPMP sysevent of subclass `subclass' and attributes `nvl'. 2629 * Before sending the event, it prepends the current version of the IPMP 2630 * sysevent API. Returns 0 on success, -1 on failure (in either case, 2631 * `nvl' is freed). 2632 */ 2633 static int 2634 post_event(const char *subclass, nvlist_t *nvl) 2635 { 2636 static evchan_t *evchp = NULL; 2637 2638 /* 2639 * Initialize the event channel if we haven't already done so. 2640 */ 2641 if (evchp == NULL) { 2642 errno = sysevent_evc_bind(IPMP_EVENT_CHAN, &evchp, EVCH_CREAT); 2643 if (errno != 0) { 2644 logerr("cannot create event channel `%s': %s\n", 2645 IPMP_EVENT_CHAN, strerror(errno)); 2646 goto failed; 2647 } 2648 } 2649 2650 errno = nvlist_add_uint32(nvl, IPMP_EVENT_VERSION, 2651 IPMP_EVENT_CUR_VERSION); 2652 if (errno != 0) { 2653 logerr("cannot create `%s' event: %s", subclass, 2654 strerror(errno)); 2655 goto failed; 2656 } 2657 2658 errno = sysevent_evc_publish(evchp, EC_IPMP, subclass, "com.sun", 2659 "in.mpathd", nvl, EVCH_NOSLEEP); 2660 if (errno != 0) { 2661 logerr("cannot send `%s' event: %s\n", subclass, 2662 strerror(errno)); 2663 goto failed; 2664 } 2665 2666 nvlist_free(nvl); 2667 return (0); 2668 failed: 2669 nvlist_free(nvl); 2670 return (-1); 2671 } 2672 2673 /* 2674 * Return the external IPMP state associated with phyint `pi'. 2675 */ 2676 static ipmp_if_state_t 2677 ifstate(struct phyint *pi) 2678 { 2679 switch (pi->pi_state) { 2680 case PI_NOTARGETS: 2681 if (pi->pi_flags & IFF_FAILED) 2682 return (IPMP_IF_FAILED); 2683 return (IPMP_IF_UNKNOWN); 2684 2685 case PI_OFFLINE: 2686 return (IPMP_IF_OFFLINE); 2687 2688 case PI_FAILED: 2689 return (IPMP_IF_FAILED); 2690 2691 case PI_RUNNING: 2692 return (IPMP_IF_OK); 2693 } 2694 2695 logerr("ifstate: unknown state %d; aborting\n", pi->pi_state); 2696 abort(); 2697 /* NOTREACHED */ 2698 } 2699 2700 /* 2701 * Return the external IPMP interface type associated with phyint `pi'. 2702 */ 2703 static ipmp_if_type_t 2704 iftype(struct phyint *pi) 2705 { 2706 if (pi->pi_flags & IFF_STANDBY) 2707 return (IPMP_IF_STANDBY); 2708 else 2709 return (IPMP_IF_NORMAL); 2710 } 2711 2712 /* 2713 * Return the external IPMP link state associated with phyint `pi'. 2714 */ 2715 static ipmp_if_linkstate_t 2716 iflinkstate(struct phyint *pi) 2717 { 2718 if (!(pi->pi_notes & (DL_NOTE_LINK_UP|DL_NOTE_LINK_DOWN))) 2719 return (IPMP_LINK_UNKNOWN); 2720 2721 return (LINK_DOWN(pi) ? IPMP_LINK_DOWN : IPMP_LINK_UP); 2722 } 2723 2724 /* 2725 * Return the external IPMP probe state associated with phyint `pi'. 2726 */ 2727 static ipmp_if_probestate_t 2728 ifprobestate(struct phyint *pi) 2729 { 2730 if (!PROBE_ENABLED(pi->pi_v4) && !PROBE_ENABLED(pi->pi_v6)) 2731 return (IPMP_PROBE_DISABLED); 2732 2733 if (pi->pi_state == PI_FAILED) 2734 return (IPMP_PROBE_FAILED); 2735 2736 if (!PROBE_CAPABLE(pi->pi_v4) && !PROBE_CAPABLE(pi->pi_v6)) 2737 return (IPMP_PROBE_UNKNOWN); 2738 2739 return (IPMP_PROBE_OK); 2740 } 2741 2742 /* 2743 * Return the external IPMP target mode associated with phyint instance `pii'. 2744 */ 2745 static ipmp_if_targmode_t 2746 iftargmode(struct phyint_instance *pii) 2747 { 2748 if (!PROBE_ENABLED(pii)) 2749 return (IPMP_TARG_DISABLED); 2750 else if (pii->pii_targets_are_routers) 2751 return (IPMP_TARG_ROUTES); 2752 else 2753 return (IPMP_TARG_MULTICAST); 2754 } 2755 2756 /* 2757 * Return the external IPMP flags associated with phyint `pi'. 2758 */ 2759 static ipmp_if_flags_t 2760 ifflags(struct phyint *pi) 2761 { 2762 ipmp_if_flags_t flags = 0; 2763 2764 if (logint_upcount(pi) == 0) 2765 flags |= IPMP_IFFLAG_DOWN; 2766 if (pi->pi_flags & IFF_INACTIVE) 2767 flags |= IPMP_IFFLAG_INACTIVE; 2768 if (pi->pi_hwaddrdup) 2769 flags |= IPMP_IFFLAG_HWADDRDUP; 2770 if (phyint_is_functioning(pi) && flags == 0) 2771 flags |= IPMP_IFFLAG_ACTIVE; 2772 2773 return (flags); 2774 } 2775 2776 /* 2777 * Store the test address used on phyint instance `pii' in `ssp'. If there's 2778 * no test address, 0.0.0.0 is stored. 2779 */ 2780 static struct sockaddr_storage * 2781 iftestaddr(struct phyint_instance *pii, struct sockaddr_storage *ssp) 2782 { 2783 if (PROBE_ENABLED(pii)) 2784 addr2storage(pii->pii_af, &pii->pii_probe_logint->li_addr, ssp); 2785 else 2786 addr2storage(AF_INET6, &in6addr_any, ssp); 2787 2788 return (ssp); 2789 } 2790 2791 /* 2792 * Return the external IPMP group state associated with phyint group `pg'. 2793 */ 2794 static ipmp_group_state_t 2795 groupstate(struct phyint_group *pg) 2796 { 2797 switch (pg->pg_state) { 2798 case PG_FAILED: 2799 return (IPMP_GROUP_FAILED); 2800 case PG_DEGRADED: 2801 return (IPMP_GROUP_DEGRADED); 2802 case PG_OK: 2803 return (IPMP_GROUP_OK); 2804 } 2805 2806 logerr("groupstate: unknown state %d; aborting\n", pg->pg_state); 2807 abort(); 2808 /* NOTREACHED */ 2809 } 2810 2811 /* 2812 * Return the external IPMP probe state associated with probe `ps'. 2813 */ 2814 static ipmp_probe_state_t 2815 probestate(struct probe_stats *ps) 2816 { 2817 switch (ps->pr_status) { 2818 case PR_UNUSED: 2819 case PR_LOST: 2820 return (IPMP_PROBE_LOST); 2821 case PR_UNACKED: 2822 return (IPMP_PROBE_SENT); 2823 case PR_ACKED: 2824 return (IPMP_PROBE_ACKED); 2825 } 2826 2827 logerr("probestate: unknown state %d; aborting\n", ps->pr_status); 2828 abort(); 2829 /* NOTREACHED */ 2830 } 2831 2832 /* 2833 * Generate an ESC_IPMP_PROBE_STATE sysevent for the probe described by `pr' 2834 * on phyint instance `pii'. Returns 0 on success, -1 on failure. 2835 */ 2836 int 2837 probe_state_event(struct probe_stats *pr, struct phyint_instance *pii) 2838 { 2839 nvlist_t *nvl; 2840 hrtime_t proc_time = 0, recv_time = 0; 2841 struct sockaddr_storage ss; 2842 struct target *tg = pr->pr_target; 2843 2844 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 2845 if (errno != 0) { 2846 logperror("cannot create `interface change' event"); 2847 return (-1); 2848 } 2849 2850 errno = nvlist_add_uint32(nvl, IPMP_PROBE_ID, pr->pr_id); 2851 if (errno != 0) 2852 goto failed; 2853 2854 errno = nvlist_add_string(nvl, IPMP_IF_NAME, pii->pii_phyint->pi_name); 2855 if (errno != 0) 2856 goto failed; 2857 2858 errno = nvlist_add_uint32(nvl, IPMP_PROBE_STATE, probestate(pr)); 2859 if (errno != 0) 2860 goto failed; 2861 2862 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_START_TIME, 2863 pr->pr_hrtime_start); 2864 if (errno != 0) 2865 goto failed; 2866 2867 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_SENT_TIME, 2868 pr->pr_hrtime_sent); 2869 if (errno != 0) 2870 goto failed; 2871 2872 if (pr->pr_status == PR_ACKED) { 2873 recv_time = pr->pr_hrtime_ackrecv; 2874 proc_time = pr->pr_hrtime_ackproc; 2875 } 2876 2877 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_ACKRECV_TIME, recv_time); 2878 if (errno != 0) 2879 goto failed; 2880 2881 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_ACKPROC_TIME, proc_time); 2882 if (errno != 0) 2883 goto failed; 2884 2885 if (tg != NULL) 2886 addr2storage(pii->pii_af, &tg->tg_address, &ss); 2887 else 2888 addr2storage(pii->pii_af, &in6addr_any, &ss); 2889 2890 errno = nvlist_add_byte_array(nvl, IPMP_PROBE_TARGET, (uchar_t *)&ss, 2891 sizeof (ss)); 2892 if (errno != 0) 2893 goto failed; 2894 2895 errno = nvlist_add_int64(nvl, IPMP_PROBE_TARGET_RTTAVG, 2896 tg->tg_rtt_sa / 8); 2897 if (errno != 0) 2898 goto failed; 2899 2900 errno = nvlist_add_int64(nvl, IPMP_PROBE_TARGET_RTTDEV, 2901 tg->tg_rtt_sd / 4); 2902 if (errno != 0) 2903 goto failed; 2904 2905 return (post_event(ESC_IPMP_PROBE_STATE, nvl)); 2906 failed: 2907 logperror("cannot create `probe state' event"); 2908 nvlist_free(nvl); 2909 return (-1); 2910 } 2911 2912 /* 2913 * Generate an ESC_IPMP_GROUP_STATE sysevent for phyint group `pg'. 2914 * Returns 0 on success, -1 on failure. 2915 */ 2916 static int 2917 phyint_group_state_event(struct phyint_group *pg) 2918 { 2919 nvlist_t *nvl; 2920 2921 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 2922 if (errno != 0) { 2923 logperror("cannot create `group state change' event"); 2924 return (-1); 2925 } 2926 2927 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 2928 if (errno != 0) 2929 goto failed; 2930 2931 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 2932 if (errno != 0) 2933 goto failed; 2934 2935 errno = nvlist_add_uint32(nvl, IPMP_GROUP_STATE, groupstate(pg)); 2936 if (errno != 0) 2937 goto failed; 2938 2939 return (post_event(ESC_IPMP_GROUP_STATE, nvl)); 2940 failed: 2941 logperror("cannot create `group state change' event"); 2942 nvlist_free(nvl); 2943 return (-1); 2944 } 2945 2946 /* 2947 * Generate an ESC_IPMP_GROUP_CHANGE sysevent of type `op' for phyint group 2948 * `pg'. Returns 0 on success, -1 on failure. 2949 */ 2950 static int 2951 phyint_group_change_event(struct phyint_group *pg, ipmp_group_op_t op) 2952 { 2953 nvlist_t *nvl; 2954 2955 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 2956 if (errno != 0) { 2957 logperror("cannot create `group change' event"); 2958 return (-1); 2959 } 2960 2961 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 2962 if (errno != 0) 2963 goto failed; 2964 2965 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 2966 if (errno != 0) 2967 goto failed; 2968 2969 errno = nvlist_add_uint64(nvl, IPMP_GROUPLIST_SIGNATURE, 2970 phyint_grouplistsig); 2971 if (errno != 0) 2972 goto failed; 2973 2974 errno = nvlist_add_uint32(nvl, IPMP_GROUP_OPERATION, op); 2975 if (errno != 0) 2976 goto failed; 2977 2978 return (post_event(ESC_IPMP_GROUP_CHANGE, nvl)); 2979 failed: 2980 logperror("cannot create `group change' event"); 2981 nvlist_free(nvl); 2982 return (-1); 2983 } 2984 2985 /* 2986 * Generate an ESC_IPMP_GROUP_MEMBER_CHANGE sysevent for phyint `pi' in 2987 * group `pg'. Returns 0 on success, -1 on failure. 2988 */ 2989 static int 2990 phyint_group_member_event(struct phyint_group *pg, struct phyint *pi, 2991 ipmp_if_op_t op) 2992 { 2993 nvlist_t *nvl; 2994 2995 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 2996 if (errno != 0) { 2997 logperror("cannot create `group member change' event"); 2998 return (-1); 2999 } 3000 3001 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 3002 if (errno != 0) 3003 goto failed; 3004 3005 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 3006 if (errno != 0) 3007 goto failed; 3008 3009 errno = nvlist_add_uint32(nvl, IPMP_IF_OPERATION, op); 3010 if (errno != 0) 3011 goto failed; 3012 3013 errno = nvlist_add_string(nvl, IPMP_IF_NAME, pi->pi_name); 3014 if (errno != 0) 3015 goto failed; 3016 3017 errno = nvlist_add_uint32(nvl, IPMP_IF_TYPE, iftype(pi)); 3018 if (errno != 0) 3019 goto failed; 3020 3021 errno = nvlist_add_uint32(nvl, IPMP_IF_STATE, ifstate(pi)); 3022 if (errno != 0) 3023 goto failed; 3024 3025 return (post_event(ESC_IPMP_GROUP_MEMBER_CHANGE, nvl)); 3026 failed: 3027 logperror("cannot create `group member change' event"); 3028 nvlist_free(nvl); 3029 return (-1); 3030 3031 } 3032 3033 /* 3034 * Generate an ESC_IPMP_IF_CHANGE sysevent for phyint `pi' in group `pg'. 3035 * Returns 0 on success, -1 on failure. 3036 */ 3037 static int 3038 phyint_state_event(struct phyint_group *pg, struct phyint *pi) 3039 { 3040 nvlist_t *nvl; 3041 3042 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 3043 if (errno != 0) { 3044 logperror("cannot create `interface change' event"); 3045 return (-1); 3046 } 3047 3048 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 3049 if (errno != 0) 3050 goto failed; 3051 3052 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 3053 if (errno != 0) 3054 goto failed; 3055 3056 errno = nvlist_add_string(nvl, IPMP_IF_NAME, pi->pi_name); 3057 if (errno != 0) 3058 goto failed; 3059 3060 errno = nvlist_add_uint32(nvl, IPMP_IF_TYPE, iftype(pi)); 3061 if (errno != 0) 3062 goto failed; 3063 3064 errno = nvlist_add_uint32(nvl, IPMP_IF_STATE, ifstate(pi)); 3065 if (errno != 0) 3066 goto failed; 3067 3068 return (post_event(ESC_IPMP_IF_CHANGE, nvl)); 3069 failed: 3070 logperror("cannot create `interface change' event"); 3071 nvlist_free(nvl); 3072 return (-1); 3073 3074 } 3075 3076 /* 3077 * Generate a signature for use. The signature is conceptually divided 3078 * into two pieces: a random 16-bit "generation number" and a 48-bit 3079 * monotonically increasing integer. The generation number protects 3080 * against stale updates to entities (e.g., IPMP groups) that have been 3081 * deleted and since recreated. 3082 */ 3083 static uint64_t 3084 gensig(void) 3085 { 3086 static int seeded = 0; 3087 3088 if (seeded == 0) { 3089 srand48((long)gethrtime()); 3090 seeded++; 3091 } 3092 3093 return ((uint64_t)lrand48() << 48 | 1); 3094 } 3095 3096 /* 3097 * Store the information associated with group `grname' into a dynamically 3098 * allocated structure pointed to by `*grinfopp'. Returns an IPMP error code. 3099 */ 3100 unsigned int 3101 getgroupinfo(const char *grname, ipmp_groupinfo_t **grinfopp) 3102 { 3103 struct phyint *pi; 3104 struct phyint_group *pg; 3105 char (*ifs)[LIFNAMSIZ]; 3106 unsigned int i, j; 3107 unsigned int nif = 0, naddr = 0; 3108 lifgroupinfo_t lifgr; 3109 addrlist_t *addrp; 3110 struct sockaddr_storage *addrs; 3111 int fdt = 0; 3112 3113 pg = phyint_group_lookup(grname); 3114 if (pg == NULL) 3115 return (IPMP_EUNKGROUP); 3116 3117 /* 3118 * Tally up the number of interfaces, allocate an array to hold them, 3119 * and insert their names into the array. While we're at it, if any 3120 * interface is actually enabled to send probes, save the group fdt. 3121 */ 3122 for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) 3123 nif++; 3124 3125 ifs = alloca(nif * sizeof (*ifs)); 3126 for (i = 0, pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext, i++) { 3127 assert(i < nif); 3128 (void) strlcpy(ifs[i], pi->pi_name, LIFNAMSIZ); 3129 if (PROBE_ENABLED(pi->pi_v4) || PROBE_ENABLED(pi->pi_v6)) 3130 fdt = pg->pg_fdt; 3131 } 3132 assert(i == nif); 3133 3134 /* 3135 * If this is the anonymous group, there's no other information to 3136 * collect (since there's no IPMP interface). 3137 */ 3138 if (pg == phyint_anongroup) { 3139 *grinfopp = ipmp_groupinfo_create(pg->pg_name, pg->pg_sig, fdt, 3140 groupstate(pg), nif, ifs, "", "", "", "", 0, NULL); 3141 return (*grinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3142 } 3143 3144 /* 3145 * Grab some additional information about the group from the kernel. 3146 * (NOTE: since SIOCGLIFGROUPINFO does not look up by interface name, 3147 * we can use ifsock_v4 even for a V6-only group.) 3148 */ 3149 (void) strlcpy(lifgr.gi_grname, grname, LIFGRNAMSIZ); 3150 if (ioctl(ifsock_v4, SIOCGLIFGROUPINFO, &lifgr) == -1) { 3151 if (errno == ENOENT) 3152 return (IPMP_EUNKGROUP); 3153 3154 logperror("getgroupinfo: SIOCGLIFGROUPINFO"); 3155 return (IPMP_FAILURE); 3156 } 3157 3158 /* 3159 * Tally up the number of data addresses, allocate an array to hold 3160 * them, and insert their values into the array. 3161 */ 3162 for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next) 3163 naddr++; 3164 3165 addrs = alloca(naddr * sizeof (*addrs)); 3166 i = 0; 3167 for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next) { 3168 /* 3169 * It's possible to have duplicate addresses (if some are 3170 * down). Weed the dups out to avoid confusing consumers. 3171 * (If groups start having tons of addresses, we'll need a 3172 * better algorithm here.) 3173 */ 3174 for (j = 0; j < i; j++) { 3175 if (sockaddrcmp(&addrs[j], &addrp->al_addr)) 3176 break; 3177 } 3178 if (j == i) { 3179 assert(i < naddr); 3180 addrs[i++] = addrp->al_addr; 3181 } 3182 } 3183 naddr = i; 3184 3185 *grinfopp = ipmp_groupinfo_create(pg->pg_name, pg->pg_sig, fdt, 3186 groupstate(pg), nif, ifs, lifgr.gi_grifname, lifgr.gi_m4ifname, 3187 lifgr.gi_m6ifname, lifgr.gi_bcifname, naddr, addrs); 3188 return (*grinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3189 } 3190 3191 /* 3192 * Store the target information associated with phyint instance `pii' into a 3193 * dynamically allocated structure pointed to by `*targinfopp'. Returns an 3194 * IPMP error code. 3195 */ 3196 unsigned int 3197 gettarginfo(struct phyint_instance *pii, const char *name, 3198 ipmp_targinfo_t **targinfopp) 3199 { 3200 uint_t ntarg = 0; 3201 struct target *tg; 3202 struct sockaddr_storage ss; 3203 struct sockaddr_storage *targs = NULL; 3204 3205 if (PROBE_CAPABLE(pii)) { 3206 targs = alloca(pii->pii_ntargets * sizeof (*targs)); 3207 tg = pii->pii_target_next; 3208 do { 3209 if (tg->tg_status == TG_ACTIVE) { 3210 assert(ntarg < pii->pii_ntargets); 3211 addr2storage(pii->pii_af, &tg->tg_address, 3212 &targs[ntarg++]); 3213 } 3214 if ((tg = tg->tg_next) == NULL) 3215 tg = pii->pii_targets; 3216 } while (tg != pii->pii_target_next); 3217 3218 assert(ntarg == pii->pii_ntargets); 3219 } 3220 3221 *targinfopp = ipmp_targinfo_create(name, iftestaddr(pii, &ss), 3222 iftargmode(pii), ntarg, targs); 3223 return (*targinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3224 } 3225 3226 /* 3227 * Store the information associated with interface `ifname' into a dynamically 3228 * allocated structure pointed to by `*ifinfopp'. Returns an IPMP error code. 3229 */ 3230 unsigned int 3231 getifinfo(const char *ifname, ipmp_ifinfo_t **ifinfopp) 3232 { 3233 int retval; 3234 struct phyint *pi; 3235 ipmp_targinfo_t *targinfo4; 3236 ipmp_targinfo_t *targinfo6; 3237 3238 pi = phyint_lookup(ifname); 3239 if (pi == NULL) 3240 return (IPMP_EUNKIF); 3241 3242 if ((retval = gettarginfo(pi->pi_v4, pi->pi_name, &targinfo4)) != 0 || 3243 (retval = gettarginfo(pi->pi_v6, pi->pi_name, &targinfo6)) != 0) 3244 goto out; 3245 3246 *ifinfopp = ipmp_ifinfo_create(pi->pi_name, pi->pi_group->pg_name, 3247 ifstate(pi), iftype(pi), iflinkstate(pi), ifprobestate(pi), 3248 ifflags(pi), targinfo4, targinfo6); 3249 retval = (*ifinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3250 out: 3251 if (targinfo4 != NULL) 3252 ipmp_freetarginfo(targinfo4); 3253 if (targinfo6 != NULL) 3254 ipmp_freetarginfo(targinfo6); 3255 return (retval); 3256 } 3257 3258 /* 3259 * Store the current list of IPMP groups into a dynamically allocated 3260 * structure pointed to by `*grlistpp'. Returns an IPMP error code. 3261 */ 3262 unsigned int 3263 getgrouplist(ipmp_grouplist_t **grlistpp) 3264 { 3265 struct phyint_group *pg; 3266 char (*groups)[LIFGRNAMSIZ]; 3267 unsigned int i, ngroup; 3268 3269 /* 3270 * Tally up the number of groups, allocate an array to hold them, and 3271 * insert their names into the array. 3272 */ 3273 for (ngroup = 0, pg = phyint_groups; pg != NULL; pg = pg->pg_next) 3274 ngroup++; 3275 3276 groups = alloca(ngroup * sizeof (*groups)); 3277 for (i = 0, pg = phyint_groups; pg != NULL; pg = pg->pg_next, i++) { 3278 assert(i < ngroup); 3279 (void) strlcpy(groups[i], pg->pg_name, LIFGRNAMSIZ); 3280 } 3281 assert(i == ngroup); 3282 3283 *grlistpp = ipmp_grouplist_create(phyint_grouplistsig, ngroup, groups); 3284 return (*grlistpp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3285 } 3286 3287 /* 3288 * Store the address information for `ssp' (in group `grname') into a 3289 * dynamically allocated structure pointed to by `*adinfopp'. Returns an IPMP 3290 * error code. (We'd call this function getaddrinfo(), but it would conflict 3291 * with getaddrinfo(3SOCKET)). 3292 */ 3293 unsigned int 3294 getgraddrinfo(const char *grname, struct sockaddr_storage *ssp, 3295 ipmp_addrinfo_t **adinfopp) 3296 { 3297 int ifsock; 3298 addrlist_t *addrp, *addrmatchp = NULL; 3299 ipmp_addr_state_t state; 3300 const char *binding = ""; 3301 struct lifreq lifr; 3302 struct phyint_group *pg; 3303 3304 if ((pg = phyint_group_lookup(grname)) == NULL) 3305 return (IPMP_EUNKADDR); 3306 3307 /* 3308 * Walk through the data addresses, and find a match. Note that since 3309 * some of the addresses may be down, more than one may match. We 3310 * prefer an up address (if one exists). 3311 */ 3312 for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next) { 3313 if (sockaddrcmp(ssp, &addrp->al_addr)) { 3314 addrmatchp = addrp; 3315 if (addrmatchp->al_flags & IFF_UP) 3316 break; 3317 } 3318 } 3319 3320 if (addrmatchp == NULL) 3321 return (IPMP_EUNKADDR); 3322 3323 state = (addrmatchp->al_flags & IFF_UP) ? IPMP_ADDR_UP : IPMP_ADDR_DOWN; 3324 if (state == IPMP_ADDR_UP) { 3325 ifsock = (ssp->ss_family == AF_INET) ? ifsock_v4 : ifsock_v6; 3326 (void) strlcpy(lifr.lifr_name, addrmatchp->al_name, LIFNAMSIZ); 3327 if (ioctl(ifsock, SIOCGLIFBINDING, &lifr) >= 0) 3328 binding = lifr.lifr_binding; 3329 } 3330 3331 *adinfopp = ipmp_addrinfo_create(ssp, state, pg->pg_name, binding); 3332 return (*adinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3333 } 3334 3335 /* 3336 * Store a snapshot of the IPMP subsystem into a dynamically allocated 3337 * structure pointed to by `*snapp'. Returns an IPMP error code. 3338 */ 3339 unsigned int 3340 getsnap(ipmp_snap_t **snapp) 3341 { 3342 ipmp_grouplist_t *grlistp; 3343 ipmp_groupinfo_t *grinfop; 3344 ipmp_addrinfo_t *adinfop; 3345 ipmp_addrlist_t *adlistp; 3346 ipmp_ifinfo_t *ifinfop; 3347 ipmp_snap_t *snap; 3348 struct phyint *pi; 3349 unsigned int i, j; 3350 int retval; 3351 3352 snap = ipmp_snap_create(); 3353 if (snap == NULL) 3354 return (IPMP_ENOMEM); 3355 3356 /* 3357 * Add group list. 3358 */ 3359 retval = getgrouplist(&snap->sn_grlistp); 3360 if (retval != IPMP_SUCCESS) 3361 goto failed; 3362 3363 /* 3364 * Add information for each group in the list, along with all of its 3365 * data addresses. 3366 */ 3367 grlistp = snap->sn_grlistp; 3368 for (i = 0; i < grlistp->gl_ngroup; i++) { 3369 retval = getgroupinfo(grlistp->gl_groups[i], &grinfop); 3370 if (retval != IPMP_SUCCESS) 3371 goto failed; 3372 3373 retval = ipmp_snap_addgroupinfo(snap, grinfop); 3374 if (retval != IPMP_SUCCESS) { 3375 ipmp_freegroupinfo(grinfop); 3376 goto failed; 3377 } 3378 3379 adlistp = grinfop->gr_adlistp; 3380 for (j = 0; j < adlistp->al_naddr; j++) { 3381 retval = getgraddrinfo(grinfop->gr_name, 3382 &adlistp->al_addrs[j], &adinfop); 3383 if (retval != IPMP_SUCCESS) 3384 goto failed; 3385 3386 retval = ipmp_snap_addaddrinfo(snap, adinfop); 3387 if (retval != IPMP_SUCCESS) { 3388 ipmp_freeaddrinfo(adinfop); 3389 goto failed; 3390 } 3391 } 3392 } 3393 3394 /* 3395 * Add information for each configured phyint. 3396 */ 3397 for (pi = phyints; pi != NULL; pi = pi->pi_next) { 3398 retval = getifinfo(pi->pi_name, &ifinfop); 3399 if (retval != IPMP_SUCCESS) 3400 goto failed; 3401 3402 retval = ipmp_snap_addifinfo(snap, ifinfop); 3403 if (retval != IPMP_SUCCESS) { 3404 ipmp_freeifinfo(ifinfop); 3405 goto failed; 3406 } 3407 } 3408 3409 *snapp = snap; 3410 return (IPMP_SUCCESS); 3411 failed: 3412 ipmp_snap_free(snap); 3413 return (retval); 3414 } 3415