1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include "mpd_defs.h" 27 #include "mpd_tables.h" 28 29 /* 30 * Global list of phyints, phyint instances, phyint groups and the anonymous 31 * group; the latter is initialized in phyint_init(). 32 */ 33 struct phyint *phyints = NULL; 34 struct phyint_instance *phyint_instances = NULL; 35 struct phyint_group *phyint_groups = NULL; 36 struct phyint_group *phyint_anongroup; 37 38 /* 39 * Grouplist signature; initialized in phyint_init(). 40 */ 41 static uint64_t phyint_grouplistsig; 42 43 static void phyint_inst_insert(struct phyint_instance *pii); 44 static void phyint_inst_print(struct phyint_instance *pii); 45 46 static void phyint_insert(struct phyint *pi, struct phyint_group *pg); 47 static void phyint_delete(struct phyint *pi); 48 static boolean_t phyint_is_usable(struct phyint *pi); 49 50 static void logint_print(struct logint *li); 51 static void logint_insert(struct phyint_instance *pii, struct logint *li); 52 static struct logint *logint_lookup(struct phyint_instance *pii, char *li_name); 53 54 static void target_print(struct target *tg); 55 static void target_insert(struct phyint_instance *pii, struct target *tg); 56 static struct target *target_first(struct phyint_instance *pii); 57 static struct target *target_select_best(struct phyint_instance *pii); 58 static void target_flush_hosts(struct phyint_group *pg); 59 60 static void reset_pii_probes(struct phyint_instance *pii, struct target *tg); 61 62 static boolean_t phyint_inst_v6_sockinit(struct phyint_instance *pii); 63 static boolean_t phyint_inst_v4_sockinit(struct phyint_instance *pii); 64 65 static int phyint_state_event(struct phyint_group *pg, struct phyint *pi); 66 static int phyint_group_state_event(struct phyint_group *pg); 67 static int phyint_group_change_event(struct phyint_group *pg, ipmp_group_op_t); 68 static int phyint_group_member_event(struct phyint_group *pg, struct phyint *pi, 69 ipmp_if_op_t op); 70 71 static int logint_upcount(struct phyint *pi); 72 static uint64_t gensig(void); 73 74 /* Initialize any per-file global state. Returns 0 on success, -1 on failure */ 75 int 76 phyint_init(void) 77 { 78 phyint_grouplistsig = gensig(); 79 if (track_all_phyints) { 80 phyint_anongroup = phyint_group_create(""); 81 if (phyint_anongroup == NULL) 82 return (-1); 83 phyint_group_insert(phyint_anongroup); 84 } 85 return (0); 86 } 87 88 /* Return the phyint with the given name */ 89 struct phyint * 90 phyint_lookup(const char *name) 91 { 92 struct phyint *pi; 93 94 if (debug & D_PHYINT) 95 logdebug("phyint_lookup(%s)\n", name); 96 97 for (pi = phyints; pi != NULL; pi = pi->pi_next) { 98 if (strncmp(pi->pi_name, name, sizeof (pi->pi_name)) == 0) 99 break; 100 } 101 return (pi); 102 } 103 104 /* 105 * Lookup a phyint in the group that has the same hardware address as `pi', or 106 * NULL if there's none. If `online_only' is set, then only online phyints 107 * are considered when matching. Otherwise, phyints that had been offlined 108 * due to a duplicate hardware address will also be considered. 109 */ 110 static struct phyint * 111 phyint_lookup_hwaddr(struct phyint *pi, boolean_t online_only) 112 { 113 struct phyint *pi2; 114 115 if (pi->pi_group == phyint_anongroup) 116 return (NULL); 117 118 for (pi2 = pi->pi_group->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) { 119 if (pi2 == pi) 120 continue; 121 122 /* 123 * NOTE: even when online_only is B_FALSE, we ignore phyints 124 * that are administratively offline (rather than offline 125 * because they're dups); when they're brought back online, 126 * they'll be flagged as dups if need be. 127 */ 128 if (pi2->pi_state == PI_OFFLINE && 129 (online_only || !pi2->pi_hwaddrdup)) 130 continue; 131 132 if (pi2->pi_hwaddrlen == pi->pi_hwaddrlen && 133 bcmp(pi2->pi_hwaddr, pi->pi_hwaddr, pi->pi_hwaddrlen) == 0) 134 return (pi2); 135 } 136 return (NULL); 137 } 138 139 /* 140 * Respond to DLPI notifications. Currently, this only processes physical 141 * address changes for the phyint passed via `arg' by onlining or offlining 142 * phyints in the group. 143 */ 144 /* ARGSUSED */ 145 static void 146 phyint_link_notify(dlpi_handle_t dh, dlpi_notifyinfo_t *dnip, void *arg) 147 { 148 struct phyint *pi = arg; 149 struct phyint *oduppi = NULL, *duppi = NULL; 150 151 assert((dnip->dni_note & pi->pi_notes) != 0); 152 153 if (dnip->dni_note != DL_NOTE_PHYS_ADDR) 154 return; 155 156 assert(dnip->dni_physaddrlen <= DLPI_PHYSADDR_MAX); 157 158 /* 159 * If our hardware address hasn't changed, there's nothing to do. 160 */ 161 if (pi->pi_hwaddrlen == dnip->dni_physaddrlen && 162 bcmp(pi->pi_hwaddr, dnip->dni_physaddr, pi->pi_hwaddrlen) == 0) 163 return; 164 165 oduppi = phyint_lookup_hwaddr(pi, _B_FALSE); 166 pi->pi_hwaddrlen = dnip->dni_physaddrlen; 167 (void) memcpy(pi->pi_hwaddr, dnip->dni_physaddr, pi->pi_hwaddrlen); 168 duppi = phyint_lookup_hwaddr(pi, _B_FALSE); 169 170 if (oduppi != NULL || pi->pi_hwaddrdup) { 171 /* 172 * Our old hardware address was a duplicate. If we'd been 173 * offlined because of it, and our new hardware address is not 174 * a duplicate, then bring us online. Otherwise, `oduppi' 175 * must've been the one brought offline; bring it online. 176 */ 177 if (pi->pi_hwaddrdup) { 178 if (duppi == NULL) 179 (void) phyint_undo_offline(pi); 180 } else { 181 assert(oduppi->pi_hwaddrdup); 182 (void) phyint_undo_offline(oduppi); 183 } 184 } 185 186 if (duppi != NULL && !pi->pi_hwaddrdup) { 187 /* 188 * Our new hardware address was a duplicate and we're not 189 * yet flagged as a duplicate; bring us offline. 190 */ 191 pi->pi_hwaddrdup = _B_TRUE; 192 (void) phyint_offline(pi, 0); 193 } 194 } 195 196 /* 197 * Initialize information about the underlying link for `pi', and set us 198 * up to be notified about future changes. Returns _B_TRUE on success. 199 */ 200 boolean_t 201 phyint_link_init(struct phyint *pi) 202 { 203 int retval; 204 uint_t notes; 205 const char *errmsg; 206 dlpi_notifyid_t id; 207 208 pi->pi_notes = 0; 209 retval = dlpi_open(pi->pi_name, &pi->pi_dh, 0); 210 if (retval != DLPI_SUCCESS) { 211 pi->pi_dh = NULL; 212 errmsg = "cannot open"; 213 goto failed; 214 } 215 216 pi->pi_hwaddrlen = DLPI_PHYSADDR_MAX; 217 retval = dlpi_get_physaddr(pi->pi_dh, DL_CURR_PHYS_ADDR, pi->pi_hwaddr, 218 &pi->pi_hwaddrlen); 219 if (retval != DLPI_SUCCESS) { 220 errmsg = "cannot get hardware address"; 221 goto failed; 222 } 223 224 retval = dlpi_bind(pi->pi_dh, DLPI_ANY_SAP, NULL); 225 if (retval != DLPI_SUCCESS) { 226 errmsg = "cannot bind to DLPI_ANY_SAP"; 227 goto failed; 228 } 229 230 /* 231 * Check if the link supports DLPI link state notifications. For 232 * historical reasons, the actual changes are tracked through routing 233 * sockets, so we immediately disable the notification upon success. 234 */ 235 notes = DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN; 236 retval = dlpi_enabnotify(pi->pi_dh, notes, phyint_link_notify, pi, &id); 237 if (retval == DLPI_SUCCESS) { 238 (void) dlpi_disabnotify(pi->pi_dh, id, NULL); 239 pi->pi_notes |= notes; 240 } 241 242 /* 243 * Enable notification of hardware address changes to keep pi_hwaddr 244 * up-to-date and track if we need to offline/undo-offline phyints. 245 */ 246 notes = DL_NOTE_PHYS_ADDR; 247 retval = dlpi_enabnotify(pi->pi_dh, notes, phyint_link_notify, pi, &id); 248 if (retval == DLPI_SUCCESS && poll_add(dlpi_fd(pi->pi_dh)) == 0) 249 pi->pi_notes |= notes; 250 251 return (_B_TRUE); 252 failed: 253 logerr("%s: %s: %s\n", pi->pi_name, errmsg, dlpi_strerror(retval)); 254 if (pi->pi_dh != NULL) { 255 dlpi_close(pi->pi_dh); 256 pi->pi_dh = NULL; 257 } 258 return (_B_FALSE); 259 } 260 261 /* 262 * Close use of link on `pi'. 263 */ 264 void 265 phyint_link_close(struct phyint *pi) 266 { 267 if (pi->pi_notes & DL_NOTE_PHYS_ADDR) { 268 (void) poll_remove(dlpi_fd(pi->pi_dh)); 269 pi->pi_notes &= ~DL_NOTE_PHYS_ADDR; 270 } 271 272 /* 273 * NOTE: we don't clear pi_notes here so that iflinkstate() can still 274 * properly report the link state even when offline (which is possible 275 * since we use IFF_RUNNING to track link state). 276 */ 277 dlpi_close(pi->pi_dh); 278 pi->pi_dh = NULL; 279 } 280 281 /* Return the phyint instance with the given name and the given family */ 282 struct phyint_instance * 283 phyint_inst_lookup(int af, char *name) 284 { 285 struct phyint *pi; 286 287 if (debug & D_PHYINT) 288 logdebug("phyint_inst_lookup(%s %s)\n", AF_STR(af), name); 289 290 assert(af == AF_INET || af == AF_INET6); 291 292 pi = phyint_lookup(name); 293 if (pi == NULL) 294 return (NULL); 295 296 return (PHYINT_INSTANCE(pi, af)); 297 } 298 299 struct phyint_group * 300 phyint_group_lookup(const char *pg_name) 301 { 302 struct phyint_group *pg; 303 304 if (debug & D_PHYINT) 305 logdebug("phyint_group_lookup(%s)\n", pg_name); 306 307 for (pg = phyint_groups; pg != NULL; pg = pg->pg_next) { 308 if (strncmp(pg->pg_name, pg_name, sizeof (pg->pg_name)) == 0) 309 break; 310 } 311 return (pg); 312 } 313 314 /* 315 * Insert the phyint in the linked list of all phyints. If the phyint belongs 316 * to some group, insert it in the phyint group list. 317 */ 318 static void 319 phyint_insert(struct phyint *pi, struct phyint_group *pg) 320 { 321 if (debug & D_PHYINT) 322 logdebug("phyint_insert(%s '%s')\n", pi->pi_name, pg->pg_name); 323 324 /* Insert the phyint at the head of the 'all phyints' list */ 325 pi->pi_next = phyints; 326 pi->pi_prev = NULL; 327 if (phyints != NULL) 328 phyints->pi_prev = pi; 329 phyints = pi; 330 331 /* 332 * Insert the phyint at the head of the 'phyint_group members' list 333 * of the phyint group to which it belongs. 334 */ 335 pi->pi_pgnext = NULL; 336 pi->pi_pgprev = NULL; 337 pi->pi_group = pg; 338 339 pi->pi_pgnext = pg->pg_phyint; 340 if (pi->pi_pgnext != NULL) 341 pi->pi_pgnext->pi_pgprev = pi; 342 pg->pg_phyint = pi; 343 344 /* Refresh the group state now that this phyint has been added */ 345 phyint_group_refresh_state(pg); 346 347 pg->pg_sig++; 348 (void) phyint_group_member_event(pg, pi, IPMP_IF_ADD); 349 } 350 351 /* Insert the phyint instance in the linked list of all phyint instances. */ 352 static void 353 phyint_inst_insert(struct phyint_instance *pii) 354 { 355 if (debug & D_PHYINT) { 356 logdebug("phyint_inst_insert(%s %s)\n", 357 AF_STR(pii->pii_af), pii->pii_name); 358 } 359 360 /* 361 * Insert the phyint at the head of the 'all phyint instances' list. 362 */ 363 pii->pii_next = phyint_instances; 364 pii->pii_prev = NULL; 365 if (phyint_instances != NULL) 366 phyint_instances->pii_prev = pii; 367 phyint_instances = pii; 368 } 369 370 /* 371 * Create a new phyint with the given parameters. Also insert it into 372 * the list of all phyints and the list of phyint group members by calling 373 * phyint_insert(). 374 */ 375 static struct phyint * 376 phyint_create(char *pi_name, struct phyint_group *pg, uint_t ifindex, 377 uint64_t flags) 378 { 379 struct phyint *pi; 380 381 pi = calloc(1, sizeof (struct phyint)); 382 if (pi == NULL) { 383 logperror("phyint_create: calloc"); 384 return (NULL); 385 } 386 387 /* 388 * Record the phyint values. 389 */ 390 (void) strlcpy(pi->pi_name, pi_name, sizeof (pi->pi_name)); 391 pi->pi_taddrthresh = getcurrentsec() + TESTADDR_CONF_TIME; 392 pi->pi_ifindex = ifindex; 393 pi->pi_icmpid = htons(((getpid() & 0xFF) << 8) | (ifindex & 0xFF)); 394 395 /* 396 * If the interface is offline, we set the state to PI_OFFLINE. 397 * Otherwise, we optimistically start in the PI_RUNNING state. Later 398 * (in process_link_state_changes()), we will adjust this to match the 399 * current state of the link. Further, if test addresses are 400 * subsequently assigned, we will transition to PI_NOTARGETS and then 401 * to either PI_RUNNING or PI_FAILED depending on the probe results. 402 */ 403 pi->pi_state = (flags & IFF_OFFLINE) ? PI_OFFLINE : PI_RUNNING; 404 pi->pi_flags = PHYINT_FLAGS(flags); 405 406 /* 407 * Initialize the link state. The link state is initialized to 408 * up, so that if the link is down when IPMP starts monitoring 409 * the interface, it will appear as though there has been a 410 * transition from the link up to link down. This avoids 411 * having to treat this situation as a special case. 412 */ 413 INIT_LINK_STATE(pi); 414 415 if (!phyint_link_init(pi)) { 416 free(pi); 417 return (NULL); 418 } 419 420 /* 421 * Insert the phyint in the list of all phyints, and the 422 * list of phyint group members 423 */ 424 phyint_insert(pi, pg); 425 426 return (pi); 427 } 428 429 /* 430 * Create a new phyint instance belonging to the phyint 'pi' and address 431 * family 'af'. Also insert it into the list of all phyint instances by 432 * calling phyint_inst_insert(). 433 */ 434 static struct phyint_instance * 435 phyint_inst_create(struct phyint *pi, int af) 436 { 437 struct phyint_instance *pii; 438 439 pii = calloc(1, sizeof (struct phyint_instance)); 440 if (pii == NULL) { 441 logperror("phyint_inst_create: calloc"); 442 return (NULL); 443 } 444 445 /* 446 * Attach the phyint instance to the phyint. 447 * Set the back pointers as well 448 */ 449 pii->pii_phyint = pi; 450 if (af == AF_INET) 451 pi->pi_v4 = pii; 452 else 453 pi->pi_v6 = pii; 454 455 pii->pii_in_use = 1; 456 pii->pii_probe_sock = -1; 457 pii->pii_snxt = 1; 458 pii->pii_af = af; 459 pii->pii_fd_hrtime = gethrtime() + 460 (FAILURE_DETECTION_QP * (hrtime_t)NANOSEC); 461 pii->pii_flags = pi->pi_flags; 462 463 /* Insert the phyint instance in the list of all phyint instances. */ 464 phyint_inst_insert(pii); 465 return (pii); 466 } 467 468 /* 469 * Change the state of phyint `pi' to state `state'. 470 */ 471 void 472 phyint_chstate(struct phyint *pi, enum pi_state state) 473 { 474 /* 475 * To simplify things, some callers always set a given state 476 * regardless of the previous state of the phyint (e.g., setting 477 * PI_RUNNING when it's already set). We shouldn't bother 478 * generating an event or consuming a signature for these, since 479 * the actual state of the interface is unchanged. 480 */ 481 if (pi->pi_state == state) 482 return; 483 484 pi->pi_state = state; 485 phyint_changed(pi); 486 } 487 488 /* 489 * Note that `pi' has changed state. 490 */ 491 void 492 phyint_changed(struct phyint *pi) 493 { 494 pi->pi_group->pg_sig++; 495 (void) phyint_state_event(pi->pi_group, pi); 496 } 497 498 /* 499 * Insert the phyint group in the linked list of all phyint groups 500 * at the head of the list 501 */ 502 void 503 phyint_group_insert(struct phyint_group *pg) 504 { 505 pg->pg_next = phyint_groups; 506 pg->pg_prev = NULL; 507 if (phyint_groups != NULL) 508 phyint_groups->pg_prev = pg; 509 phyint_groups = pg; 510 511 phyint_grouplistsig++; 512 (void) phyint_group_change_event(pg, IPMP_GROUP_ADD); 513 } 514 515 /* 516 * Create a new phyint group called 'name'. 517 */ 518 struct phyint_group * 519 phyint_group_create(const char *name) 520 { 521 struct phyint_group *pg; 522 523 if (debug & D_PHYINT) 524 logdebug("phyint_group_create(%s)\n", name); 525 526 pg = calloc(1, sizeof (struct phyint_group)); 527 if (pg == NULL) { 528 logperror("phyint_group_create: calloc"); 529 return (NULL); 530 } 531 532 (void) strlcpy(pg->pg_name, name, sizeof (pg->pg_name)); 533 pg->pg_sig = gensig(); 534 pg->pg_fdt = user_failure_detection_time; 535 pg->pg_probeint = user_probe_interval; 536 pg->pg_in_use = _B_TRUE; 537 538 /* 539 * Normal groups always start in the PG_FAILED state since they 540 * have no active interfaces. In contrast, anonymous groups are 541 * heterogeneous and thus always PG_OK. 542 */ 543 pg->pg_state = (name[0] == '\0' ? PG_OK : PG_FAILED); 544 545 return (pg); 546 } 547 548 /* 549 * Change the state of the phyint group `pg' to state `state'. 550 */ 551 void 552 phyint_group_chstate(struct phyint_group *pg, enum pg_state state) 553 { 554 assert(pg != phyint_anongroup); 555 556 /* 557 * To simplify things, some callers always set a given state 558 * regardless of the previous state of the group (e.g., setting 559 * PG_DEGRADED when it's already set). We shouldn't bother 560 * generating an event or consuming a signature for these, since 561 * the actual state of the group is unchanged. 562 */ 563 if (pg->pg_state == state) 564 return; 565 566 pg->pg_state = state; 567 568 switch (state) { 569 case PG_FAILED: 570 /* 571 * We can never know with certainty that a group has 572 * failed. It is possible that all known targets have 573 * failed simultaneously, and new targets have come up 574 * instead. If the targets are routers then router 575 * discovery will kick in, and we will see the new routers 576 * thru routing socket messages. But if the targets are 577 * hosts, we have to discover it by multicast. So flush 578 * all the host targets. The next probe will send out a 579 * multicast echo request. If this is a group failure, we 580 * will still not see any response, otherwise the group 581 * will be repaired after we get NUM_PROBE_REPAIRS 582 * consecutive unicast replies on any phyint. 583 */ 584 target_flush_hosts(pg); 585 break; 586 587 case PG_OK: 588 case PG_DEGRADED: 589 break; 590 591 default: 592 logerr("phyint_group_chstate: invalid group state %d; " 593 "aborting\n", state); 594 abort(); 595 } 596 597 pg->pg_sig++; 598 (void) phyint_group_state_event(pg); 599 } 600 601 /* 602 * Create a new phyint instance and initialize it from the values supplied by 603 * the kernel. Always check for ENXIO before logging any error, because the 604 * interface could have vanished after completion of SIOCGLIFCONF. 605 * Return values: 606 * pointer to the phyint instance on success 607 * NULL on failure Eg. if the phyint instance is not found in the kernel 608 */ 609 struct phyint_instance * 610 phyint_inst_init_from_k(int af, char *pi_name) 611 { 612 char pg_name[LIFNAMSIZ + 1]; 613 int ifsock; 614 uint_t ifindex; 615 uint64_t flags; 616 struct lifreq lifr; 617 struct phyint *pi; 618 struct phyint_instance *pii; 619 boolean_t pi_created; 620 struct phyint_group *pg; 621 622 retry: 623 pii = NULL; 624 pi = NULL; 625 pg = NULL; 626 pi_created = _B_FALSE; 627 628 if (debug & D_PHYINT) { 629 logdebug("phyint_inst_init_from_k(%s %s)\n", 630 AF_STR(af), pi_name); 631 } 632 633 assert(af == AF_INET || af == AF_INET6); 634 635 /* Get the socket for doing ioctls */ 636 ifsock = (af == AF_INET) ? ifsock_v4 : ifsock_v6; 637 638 /* 639 * Get the interface flags. Ignore virtual interfaces, IPMP 640 * meta-interfaces, point-to-point interfaces, and interfaces 641 * that can't support multicast. 642 */ 643 (void) strlcpy(lifr.lifr_name, pi_name, sizeof (lifr.lifr_name)); 644 if (ioctl(ifsock, SIOCGLIFFLAGS, (char *)&lifr) < 0) { 645 if (errno != ENXIO) { 646 logperror("phyint_inst_init_from_k:" 647 " ioctl (get flags)"); 648 } 649 return (NULL); 650 } 651 flags = lifr.lifr_flags; 652 if (!(flags & IFF_MULTICAST) || 653 (flags & (IFF_VIRTUAL|IFF_IPMP|IFF_POINTOPOINT))) 654 return (NULL); 655 656 /* 657 * Get the ifindex for recording later in our tables, in case we need 658 * to create a new phyint. 659 */ 660 if (ioctl(ifsock, SIOCGLIFINDEX, (char *)&lifr) < 0) { 661 if (errno != ENXIO) { 662 logperror("phyint_inst_init_from_k: " 663 " ioctl (get lifindex)"); 664 } 665 return (NULL); 666 } 667 ifindex = lifr.lifr_index; 668 669 /* 670 * Get the phyint group name of this phyint, from the kernel. 671 */ 672 if (ioctl(ifsock, SIOCGLIFGROUPNAME, (char *)&lifr) < 0) { 673 if (errno != ENXIO) { 674 logperror("phyint_inst_init_from_k: " 675 "ioctl (get group name)"); 676 } 677 return (NULL); 678 } 679 (void) strlcpy(pg_name, lifr.lifr_groupname, sizeof (pg_name)); 680 681 /* 682 * If the phyint is not part of any group, pg_name is the 683 * null string. If 'track_all_phyints' is false, there is no 684 * need to create a phyint. 685 */ 686 if (pg_name[0] == '\0' && !track_all_phyints) { 687 /* 688 * If the IFF_FAILED, IFF_INACTIVE, or IFF_OFFLINE flags are 689 * set, reset them. These flags shouldn't be set if in.mpathd 690 * isn't tracking the interface. 691 */ 692 if ((flags & (IFF_FAILED | IFF_INACTIVE | IFF_OFFLINE))) { 693 lifr.lifr_flags = flags & 694 ~(IFF_FAILED | IFF_INACTIVE | IFF_OFFLINE); 695 if (ioctl(ifsock, SIOCSLIFFLAGS, (char *)&lifr) < 0) { 696 if (errno != ENXIO) { 697 logperror("phyint_inst_init_from_k:" 698 " ioctl (set flags)"); 699 } 700 } 701 } 702 return (NULL); 703 } 704 705 /* 706 * We need to create a new phyint instance. We may also need to 707 * create the group if e.g. the SIOCGLIFCONF loop in initifs() found 708 * an underlying interface before it found its IPMP meta-interface. 709 * Note that we keep any created groups even if phyint_inst_from_k() 710 * fails since a group's existence is not dependent on the ability of 711 * in.mpathd to the track the group's interfaces. 712 */ 713 if ((pg = phyint_group_lookup(pg_name)) == NULL) { 714 if ((pg = phyint_group_create(pg_name)) == NULL) { 715 logerr("phyint_inst_init_from_k: cannot create group " 716 "%s\n", pg_name); 717 return (NULL); 718 } 719 phyint_group_insert(pg); 720 } 721 722 /* 723 * Lookup the phyint. If the phyint does not exist create it. 724 */ 725 pi = phyint_lookup(pi_name); 726 if (pi == NULL) { 727 pi = phyint_create(pi_name, pg, ifindex, flags); 728 if (pi == NULL) { 729 logerr("phyint_inst_init_from_k:" 730 " unable to create phyint %s\n", pi_name); 731 return (NULL); 732 } 733 pi_created = _B_TRUE; 734 } else { 735 /* The phyint exists already. */ 736 assert(pi_created == _B_FALSE); 737 /* 738 * Normally we should see consistent values for the IPv4 and 739 * IPv6 instances, for phyint properties. If we don't, it 740 * means things have changed underneath us, and we should 741 * resync our tables with the kernel. Check whether the 742 * interface index has changed. If so, it is most likely 743 * the interface has been unplumbed and replumbed, 744 * while we are yet to update our tables. Do it now. 745 */ 746 if (pi->pi_ifindex != ifindex) { 747 phyint_inst_delete(PHYINT_INSTANCE(pi, AF_OTHER(af))); 748 goto retry; 749 } 750 assert(PHYINT_INSTANCE(pi, af) == NULL); 751 752 /* 753 * If the group name seen by the IPv4 and IPv6 instances 754 * are different, it is most likely the groupname has 755 * changed, while we are yet to update our tables. Do it now. 756 */ 757 if (strcmp(pi->pi_group->pg_name, pg_name) != 0) { 758 phyint_inst_delete(PHYINT_INSTANCE(pi, 759 AF_OTHER(af))); 760 goto retry; 761 } 762 } 763 764 /* 765 * Create a new phyint instance, corresponding to the 'af' 766 * passed in. 767 */ 768 pii = phyint_inst_create(pi, af); 769 if (pii == NULL) { 770 logerr("phyint_inst_init_from_k: unable to create" 771 "phyint inst %s\n", pi->pi_name); 772 if (pi_created) 773 phyint_delete(pi); 774 775 return (NULL); 776 } 777 778 if (pi_created) { 779 /* 780 * If this phyint does not have a unique hardware address in its 781 * group, offline it. (The change_pif_flags() implementation 782 * requires that we defer this until after the phyint_instance 783 * is created.) 784 */ 785 if (phyint_lookup_hwaddr(pi, _B_TRUE) != NULL) { 786 pi->pi_hwaddrdup = _B_TRUE; 787 (void) phyint_offline(pi, 0); 788 } 789 } 790 791 return (pii); 792 } 793 794 /* 795 * Bind pii_probe_sock to the address associated with pii_probe_logint. 796 * This socket will be used for sending and receiving ICMP/ICMPv6 probes to 797 * targets. Do the common part in this function, and complete the 798 * initializations by calling the protocol specific functions 799 * phyint_inst_v{4,6}_sockinit() respectively. 800 * 801 * Return values: _B_TRUE/_B_FALSE for success or failure respectively. 802 */ 803 boolean_t 804 phyint_inst_sockinit(struct phyint_instance *pii) 805 { 806 boolean_t success; 807 struct phyint_group *pg; 808 809 if (debug & D_PHYINT) { 810 logdebug("phyint_inst_sockinit(%s %s)\n", 811 AF_STR(pii->pii_af), pii->pii_name); 812 } 813 814 assert(pii->pii_probe_logint != NULL); 815 assert(pii->pii_probe_logint->li_flags & IFF_UP); 816 assert(pii->pii_probe_logint->li_flags & IFF_NOFAILOVER); 817 assert(pii->pii_af == AF_INET || pii->pii_af == AF_INET6); 818 819 /* 820 * If the socket is already bound, close pii_probe_sock 821 */ 822 if (pii->pii_probe_sock != -1) 823 close_probe_socket(pii, _B_TRUE); 824 825 /* 826 * If the phyint is not part of a named group and track_all_phyints is 827 * false, simply return. 828 */ 829 pg = pii->pii_phyint->pi_group; 830 if (pg == phyint_anongroup && !track_all_phyints) { 831 if (debug & D_PHYINT) 832 logdebug("phyint_inst_sockinit: no group\n"); 833 return (_B_FALSE); 834 } 835 836 /* 837 * Initialize the socket by calling the protocol specific function. 838 * If it succeeds, add the socket to the poll list. 839 */ 840 if (pii->pii_af == AF_INET6) 841 success = phyint_inst_v6_sockinit(pii); 842 else 843 success = phyint_inst_v4_sockinit(pii); 844 845 if (success && (poll_add(pii->pii_probe_sock) == 0)) 846 return (_B_TRUE); 847 848 /* Something failed, cleanup and return false */ 849 if (pii->pii_probe_sock != -1) 850 close_probe_socket(pii, _B_FALSE); 851 852 return (_B_FALSE); 853 } 854 855 /* 856 * IPv6 specific part in initializing the pii_probe_sock. This socket is 857 * used to send/receive ICMPv6 probe packets. 858 */ 859 static boolean_t 860 phyint_inst_v6_sockinit(struct phyint_instance *pii) 861 { 862 icmp6_filter_t filter; 863 int hopcount = 1; 864 int off = 0; 865 int on = 1; 866 struct sockaddr_in6 testaddr; 867 868 /* 869 * Open a raw socket with ICMPv6 protocol. 870 * 871 * Use IPV6_BOUND_IF to make sure that probes are sent and received on 872 * the specified phyint only. Bind to the test address to ensure that 873 * the responses are sent to the specified phyint. 874 * 875 * Set the hopcount to 1 so that probe packets are not routed. 876 * Disable multicast loopback. Set the receive filter to 877 * receive only ICMPv6 echo replies. 878 */ 879 pii->pii_probe_sock = socket(pii->pii_af, SOCK_RAW, IPPROTO_ICMPV6); 880 if (pii->pii_probe_sock < 0) { 881 logperror_pii(pii, "phyint_inst_v6_sockinit: socket"); 882 return (_B_FALSE); 883 } 884 885 bzero(&testaddr, sizeof (testaddr)); 886 testaddr.sin6_family = AF_INET6; 887 testaddr.sin6_port = 0; 888 testaddr.sin6_addr = pii->pii_probe_logint->li_addr; 889 890 if (bind(pii->pii_probe_sock, (struct sockaddr *)&testaddr, 891 sizeof (testaddr)) < 0) { 892 logperror_pii(pii, "phyint_inst_v6_sockinit: IPv6 bind"); 893 return (_B_FALSE); 894 } 895 896 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_IF, 897 (char *)&pii->pii_ifindex, sizeof (uint_t)) < 0) { 898 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 899 " IPV6_MULTICAST_IF"); 900 return (_B_FALSE); 901 } 902 903 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_BOUND_IF, 904 &pii->pii_ifindex, sizeof (uint_t)) < 0) { 905 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 906 " IPV6_BOUND_IF"); 907 return (_B_FALSE); 908 } 909 910 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 911 (char *)&hopcount, sizeof (hopcount)) < 0) { 912 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 913 " IPV6_UNICAST_HOPS"); 914 return (_B_FALSE); 915 } 916 917 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, 918 (char *)&hopcount, sizeof (hopcount)) < 0) { 919 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 920 " IPV6_MULTICAST_HOPS"); 921 return (_B_FALSE); 922 } 923 924 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, 925 (char *)&off, sizeof (off)) < 0) { 926 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 927 " IPV6_MULTICAST_LOOP"); 928 return (_B_FALSE); 929 } 930 931 /* 932 * Filter out so that we only receive ICMP echo replies 933 */ 934 ICMP6_FILTER_SETBLOCKALL(&filter); 935 ICMP6_FILTER_SETPASS(ICMP6_ECHO_REPLY, &filter); 936 937 if (setsockopt(pii->pii_probe_sock, IPPROTO_ICMPV6, ICMP6_FILTER, 938 (char *)&filter, sizeof (filter)) < 0) { 939 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 940 " ICMP6_FILTER"); 941 return (_B_FALSE); 942 } 943 944 /* Enable receipt of hoplimit */ 945 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, 946 &on, sizeof (on)) < 0) { 947 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 948 " IPV6_RECVHOPLIMIT"); 949 return (_B_FALSE); 950 } 951 952 /* Enable receipt of timestamp */ 953 if (setsockopt(pii->pii_probe_sock, SOL_SOCKET, SO_TIMESTAMP, 954 &on, sizeof (on)) < 0) { 955 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 956 " SO_TIMESTAMP"); 957 return (_B_FALSE); 958 } 959 960 return (_B_TRUE); 961 } 962 963 /* 964 * IPv4 specific part in initializing the pii_probe_sock. This socket is 965 * used to send/receive ICMPv4 probe packets. 966 */ 967 static boolean_t 968 phyint_inst_v4_sockinit(struct phyint_instance *pii) 969 { 970 struct sockaddr_in testaddr; 971 char char_off = 0; 972 int ttl = 1; 973 char char_ttl = 1; 974 int on = 1; 975 976 /* 977 * Open a raw socket with ICMPv4 protocol. 978 * 979 * Use IP_BOUND_IF to make sure that probes are sent and received on 980 * the specified phyint only. Bind to the test address to ensure that 981 * the responses are sent to the specified phyint. 982 * 983 * Set the ttl to 1 so that probe packets are not routed. 984 * Disable multicast loopback. Enable receipt of timestamp. 985 */ 986 pii->pii_probe_sock = socket(pii->pii_af, SOCK_RAW, IPPROTO_ICMP); 987 if (pii->pii_probe_sock < 0) { 988 logperror_pii(pii, "phyint_inst_v4_sockinit: socket"); 989 return (_B_FALSE); 990 } 991 992 bzero(&testaddr, sizeof (testaddr)); 993 testaddr.sin_family = AF_INET; 994 testaddr.sin_port = 0; 995 IN6_V4MAPPED_TO_INADDR(&pii->pii_probe_logint->li_addr, 996 &testaddr.sin_addr); 997 998 if (bind(pii->pii_probe_sock, (struct sockaddr *)&testaddr, 999 sizeof (testaddr)) < 0) { 1000 logperror_pii(pii, "phyint_inst_v4_sockinit: IPv4 bind"); 1001 return (_B_FALSE); 1002 } 1003 1004 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_BOUND_IF, 1005 &pii->pii_ifindex, sizeof (uint_t)) < 0) { 1006 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1007 " IP_BOUND_IF"); 1008 return (_B_FALSE); 1009 } 1010 1011 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_IF, 1012 (char *)&testaddr.sin_addr, sizeof (struct in_addr)) < 0) { 1013 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1014 " IP_MULTICAST_IF"); 1015 return (_B_FALSE); 1016 } 1017 1018 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_TTL, 1019 (char *)&ttl, sizeof (ttl)) < 0) { 1020 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1021 " IP_TTL"); 1022 return (_B_FALSE); 1023 } 1024 1025 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_LOOP, 1026 (char *)&char_off, sizeof (char_off)) == -1) { 1027 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1028 " IP_MULTICAST_LOOP"); 1029 return (_B_FALSE); 1030 } 1031 1032 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_TTL, 1033 (char *)&char_ttl, sizeof (char_ttl)) == -1) { 1034 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1035 " IP_MULTICAST_TTL"); 1036 return (_B_FALSE); 1037 } 1038 1039 if (setsockopt(pii->pii_probe_sock, SOL_SOCKET, SO_TIMESTAMP, &on, 1040 sizeof (on)) < 0) { 1041 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1042 " SO_TIMESTAMP"); 1043 return (_B_FALSE); 1044 } 1045 1046 return (_B_TRUE); 1047 } 1048 1049 /* 1050 * Remove the phyint group from the list of 'all phyint groups' 1051 * and free it. 1052 */ 1053 void 1054 phyint_group_delete(struct phyint_group *pg) 1055 { 1056 /* 1057 * The anonymous group always exists, even when empty. 1058 */ 1059 if (pg == phyint_anongroup) 1060 return; 1061 1062 if (debug & D_PHYINT) 1063 logdebug("phyint_group_delete('%s')\n", pg->pg_name); 1064 1065 /* 1066 * The phyint group must be empty, and must not have any phyints. 1067 * The phyint group must be in the list of all phyint groups 1068 */ 1069 assert(pg->pg_phyint == NULL); 1070 assert(phyint_groups == pg || pg->pg_prev != NULL); 1071 1072 if (pg->pg_prev != NULL) 1073 pg->pg_prev->pg_next = pg->pg_next; 1074 else 1075 phyint_groups = pg->pg_next; 1076 1077 if (pg->pg_next != NULL) 1078 pg->pg_next->pg_prev = pg->pg_prev; 1079 1080 pg->pg_next = NULL; 1081 pg->pg_prev = NULL; 1082 1083 phyint_grouplistsig++; 1084 (void) phyint_group_change_event(pg, IPMP_GROUP_REMOVE); 1085 1086 addrlist_free(&pg->pg_addrs); 1087 free(pg); 1088 } 1089 1090 /* 1091 * Refresh the state of `pg' based on its current members. 1092 */ 1093 void 1094 phyint_group_refresh_state(struct phyint_group *pg) 1095 { 1096 enum pg_state state; 1097 enum pg_state origstate = pg->pg_state; 1098 struct phyint *pi, *usablepi; 1099 uint_t nif = 0, nusable = 0; 1100 1101 /* 1102 * Anonymous groups never change state. 1103 */ 1104 if (pg == phyint_anongroup) 1105 return; 1106 1107 for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) { 1108 nif++; 1109 if (phyint_is_usable(pi)) { 1110 nusable++; 1111 usablepi = pi; 1112 } 1113 } 1114 1115 if (nusable == 0) 1116 state = PG_FAILED; 1117 else if (nif == nusable) 1118 state = PG_OK; 1119 else 1120 state = PG_DEGRADED; 1121 1122 phyint_group_chstate(pg, state); 1123 1124 /* 1125 * If we're shutting down, skip logging messages since otherwise our 1126 * shutdown housecleaning will make us report that groups are unusable. 1127 */ 1128 if (cleanup_started) 1129 return; 1130 1131 /* 1132 * NOTE: We use pg_failmsg_printed rather than origstate since 1133 * otherwise at startup we'll log a "now usable" message when the 1134 * first usable phyint is added to an empty group. 1135 */ 1136 if (state != PG_FAILED && pg->pg_failmsg_printed) { 1137 assert(origstate == PG_FAILED); 1138 logerr("At least 1 IP interface (%s) in group %s is now " 1139 "usable\n", usablepi->pi_name, pg->pg_name); 1140 pg->pg_failmsg_printed = _B_FALSE; 1141 } else if (origstate != PG_FAILED && state == PG_FAILED) { 1142 logerr("All IP interfaces in group %s are now unusable\n", 1143 pg->pg_name); 1144 pg->pg_failmsg_printed = _B_TRUE; 1145 } 1146 } 1147 1148 /* 1149 * Extract information from the kernel about the desired phyint. 1150 * Look only for properties of the phyint and not properties of logints. 1151 * Take appropriate action on the changes. 1152 * Return codes: 1153 * PI_OK 1154 * The phyint exists in the kernel and matches our knowledge 1155 * of the phyint. 1156 * PI_DELETED 1157 * The phyint has vanished in the kernel. 1158 * PI_IFINDEX_CHANGED 1159 * The phyint's interface index has changed. 1160 * Ask the caller to delete and recreate the phyint. 1161 * PI_IOCTL_ERROR 1162 * Some ioctl error. Don't change anything. 1163 * PI_GROUP_CHANGED 1164 * The phyint has changed group. 1165 */ 1166 int 1167 phyint_inst_update_from_k(struct phyint_instance *pii) 1168 { 1169 struct lifreq lifr; 1170 int ifsock; 1171 struct phyint *pi; 1172 1173 pi = pii->pii_phyint; 1174 1175 if (debug & D_PHYINT) { 1176 logdebug("phyint_inst_update_from_k(%s %s)\n", 1177 AF_STR(pii->pii_af), pi->pi_name); 1178 } 1179 1180 /* 1181 * Get the ifindex from the kernel, for comparison with the 1182 * value in our tables. 1183 */ 1184 (void) strncpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name)); 1185 lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0'; 1186 1187 ifsock = (pii->pii_af == AF_INET) ? ifsock_v4 : ifsock_v6; 1188 if (ioctl(ifsock, SIOCGLIFINDEX, &lifr) < 0) { 1189 if (errno == ENXIO) { 1190 return (PI_DELETED); 1191 } else { 1192 logperror_pii(pii, "phyint_inst_update_from_k:" 1193 " ioctl (get lifindex)"); 1194 return (PI_IOCTL_ERROR); 1195 } 1196 } 1197 1198 if (lifr.lifr_index != pi->pi_ifindex) { 1199 /* 1200 * The index has changed. Most likely the interface has 1201 * been unplumbed and replumbed. Ask the caller to take 1202 * appropriate action. 1203 */ 1204 if (debug & D_PHYINT) { 1205 logdebug("phyint_inst_update_from_k:" 1206 " old index %d new index %d\n", 1207 pi->pi_ifindex, lifr.lifr_index); 1208 } 1209 return (PI_IFINDEX_CHANGED); 1210 } 1211 1212 /* 1213 * Get the group name from the kernel, for comparison with 1214 * the value in our tables. 1215 */ 1216 if (ioctl(ifsock, SIOCGLIFGROUPNAME, &lifr) < 0) { 1217 if (errno == ENXIO) { 1218 return (PI_DELETED); 1219 } else { 1220 logperror_pii(pii, "phyint_inst_update_from_k:" 1221 " ioctl (get groupname)"); 1222 return (PI_IOCTL_ERROR); 1223 } 1224 } 1225 1226 /* 1227 * If the phyint has changed group i.e. if the phyint group name 1228 * returned by the kernel is different, ask the caller to delete 1229 * and recreate the phyint in the right group 1230 */ 1231 if (strcmp(lifr.lifr_groupname, pi->pi_group->pg_name) != 0) { 1232 /* Groupname has changed */ 1233 if (debug & D_PHYINT) { 1234 logdebug("phyint_inst_update_from_k:" 1235 " groupname change\n"); 1236 } 1237 return (PI_GROUP_CHANGED); 1238 } 1239 1240 /* 1241 * Get the current phyint flags from the kernel, and determine what 1242 * flags have changed by comparing against our tables. Note that the 1243 * IFF_INACTIVE processing in initifs() relies on this call to ensure 1244 * that IFF_INACTIVE is really still set on the interface. 1245 */ 1246 if (ioctl(ifsock, SIOCGLIFFLAGS, &lifr) < 0) { 1247 if (errno == ENXIO) { 1248 return (PI_DELETED); 1249 } else { 1250 logperror_pii(pii, "phyint_inst_update_from_k: " 1251 " ioctl (get flags)"); 1252 return (PI_IOCTL_ERROR); 1253 } 1254 } 1255 1256 pi->pi_flags = PHYINT_FLAGS(lifr.lifr_flags); 1257 if (pi->pi_v4 != NULL) 1258 pi->pi_v4->pii_flags = pi->pi_flags; 1259 if (pi->pi_v6 != NULL) 1260 pi->pi_v6->pii_flags = pi->pi_flags; 1261 1262 /* 1263 * Make sure the IFF_FAILED flag is set if and only if we think 1264 * the interface should be failed. 1265 */ 1266 if (pi->pi_flags & IFF_FAILED) { 1267 if (pi->pi_state == PI_RUNNING) 1268 (void) change_pif_flags(pi, 0, IFF_FAILED); 1269 } else { 1270 if (pi->pi_state == PI_FAILED) 1271 (void) change_pif_flags(pi, IFF_FAILED, IFF_INACTIVE); 1272 } 1273 1274 /* No change in phyint status */ 1275 return (PI_OK); 1276 } 1277 1278 /* 1279 * Delete the phyint. Remove it from the list of all phyints, and the 1280 * list of phyint group members. 1281 */ 1282 static void 1283 phyint_delete(struct phyint *pi) 1284 { 1285 struct phyint *pi2; 1286 struct phyint_group *pg = pi->pi_group; 1287 1288 if (debug & D_PHYINT) 1289 logdebug("phyint_delete(%s)\n", pi->pi_name); 1290 1291 /* Both IPv4 and IPv6 phyint instances must have been deleted. */ 1292 assert(pi->pi_v4 == NULL && pi->pi_v6 == NULL); 1293 1294 /* 1295 * The phyint must belong to a group. 1296 */ 1297 assert(pg->pg_phyint == pi || pi->pi_pgprev != NULL); 1298 1299 /* The phyint must be in the list of all phyints */ 1300 assert(phyints == pi || pi->pi_prev != NULL); 1301 1302 /* Remove the phyint from the phyint group list */ 1303 pg->pg_sig++; 1304 (void) phyint_group_member_event(pg, pi, IPMP_IF_REMOVE); 1305 1306 if (pi->pi_pgprev == NULL) { 1307 /* Phyint is the 1st in the phyint group list */ 1308 pg->pg_phyint = pi->pi_pgnext; 1309 } else { 1310 pi->pi_pgprev->pi_pgnext = pi->pi_pgnext; 1311 } 1312 if (pi->pi_pgnext != NULL) 1313 pi->pi_pgnext->pi_pgprev = pi->pi_pgprev; 1314 pi->pi_pgnext = NULL; 1315 pi->pi_pgprev = NULL; 1316 1317 /* Refresh the group state now that this phyint has been removed */ 1318 phyint_group_refresh_state(pg); 1319 1320 /* Remove the phyint from the global list of phyints */ 1321 if (pi->pi_prev == NULL) { 1322 /* Phyint is the 1st in the list */ 1323 phyints = pi->pi_next; 1324 } else { 1325 pi->pi_prev->pi_next = pi->pi_next; 1326 } 1327 if (pi->pi_next != NULL) 1328 pi->pi_next->pi_prev = pi->pi_prev; 1329 pi->pi_next = NULL; 1330 pi->pi_prev = NULL; 1331 1332 /* 1333 * See if another phyint in the group had been offlined because 1334 * it was a dup of `pi' -- and if so, online it. 1335 */ 1336 if (!pi->pi_hwaddrdup && 1337 (pi2 = phyint_lookup_hwaddr(pi, _B_FALSE)) != NULL) { 1338 assert(pi2->pi_hwaddrdup); 1339 (void) phyint_undo_offline(pi2); 1340 } 1341 phyint_link_close(pi); 1342 free(pi); 1343 } 1344 1345 /* 1346 * Offline phyint `pi' if at least `minred' usable interfaces remain in the 1347 * group. Returns an IPMP error code. 1348 */ 1349 int 1350 phyint_offline(struct phyint *pi, uint_t minred) 1351 { 1352 boolean_t was_active; 1353 unsigned int nusable = 0; 1354 struct phyint *pi2; 1355 struct phyint_group *pg = pi->pi_group; 1356 1357 /* 1358 * Verify that enough usable interfaces in the group would remain. 1359 * As a special case, if the group has failed, allow any non-offline 1360 * phyints to be offlined. 1361 */ 1362 if (pg != phyint_anongroup) { 1363 for (pi2 = pg->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) { 1364 if (pi2 == pi) 1365 continue; 1366 if (phyint_is_usable(pi2) || 1367 (GROUP_FAILED(pg) && pi2->pi_state != PI_OFFLINE)) 1368 nusable++; 1369 } 1370 } 1371 if (nusable < minred) 1372 return (IPMP_EMINRED); 1373 1374 was_active = ((pi->pi_flags & IFF_INACTIVE) == 0); 1375 1376 if (!change_pif_flags(pi, IFF_OFFLINE, IFF_INACTIVE)) 1377 return (IPMP_FAILURE); 1378 1379 /* 1380 * The interface is now offline, so stop probing it. Note that 1381 * if_mpadm(1M) will down the test addresses, after receiving a 1382 * success reply from us. The routing socket message will then make us 1383 * close the socket used for sending probes. But it is more logical 1384 * that an offlined interface must not be probed, even if it has test 1385 * addresses. 1386 * 1387 * NOTE: stop_probing() also sets PI_OFFLINE. 1388 */ 1389 stop_probing(pi); 1390 1391 /* 1392 * If we're offlining the phyint because it has a duplicate hardware 1393 * address, print a warning -- and leave the link open so that we can 1394 * be notified of hardware address changes that make it usable again. 1395 * Otherwise, close the link so that we won't prevent a detach. 1396 */ 1397 if (pi->pi_hwaddrdup) { 1398 logerr("IP interface %s has a hardware address which is not " 1399 "unique in group %s; offlining\n", pi->pi_name, 1400 pg->pg_name); 1401 } else { 1402 phyint_link_close(pi); 1403 } 1404 1405 /* 1406 * If this phyint was preventing another phyint with a duplicate 1407 * hardware address from being online, bring that one online now. 1408 */ 1409 if (!pi->pi_hwaddrdup && 1410 (pi2 = phyint_lookup_hwaddr(pi, _B_FALSE)) != NULL) { 1411 assert(pi2->pi_hwaddrdup); 1412 (void) phyint_undo_offline(pi2); 1413 } 1414 1415 /* 1416 * If this interface was active, try to activate another INACTIVE 1417 * interface in the group. 1418 */ 1419 if (was_active) 1420 phyint_activate_another(pi); 1421 1422 return (IPMP_SUCCESS); 1423 } 1424 1425 /* 1426 * Undo a previous offline of `pi'. Returns an IPMP error code. 1427 */ 1428 int 1429 phyint_undo_offline(struct phyint *pi) 1430 { 1431 if (pi->pi_state != PI_OFFLINE) { 1432 errno = EINVAL; 1433 return (IPMP_FAILURE); 1434 } 1435 1436 /* 1437 * If necessary, reinitialize our link information and verify that its 1438 * hardware address is still unique across the group. 1439 */ 1440 if (pi->pi_dh == NULL && !phyint_link_init(pi)) { 1441 errno = EIO; 1442 return (IPMP_FAILURE); 1443 } 1444 1445 if (phyint_lookup_hwaddr(pi, _B_TRUE) != NULL) { 1446 pi->pi_hwaddrdup = _B_TRUE; 1447 return (IPMP_EHWADDRDUP); 1448 } 1449 1450 if (pi->pi_hwaddrdup) { 1451 logerr("IP interface %s now has a unique hardware address in " 1452 "group %s; onlining\n", pi->pi_name, pi->pi_group->pg_name); 1453 pi->pi_hwaddrdup = _B_FALSE; 1454 } 1455 1456 if (!change_pif_flags(pi, 0, IFF_OFFLINE)) 1457 return (IPMP_FAILURE); 1458 1459 /* 1460 * While the interface was offline, it may have failed (e.g. the link 1461 * may have gone down). phyint_inst_check_for_failure() will have 1462 * already set pi_flags with IFF_FAILED, so we can use that to decide 1463 * whether the phyint should transition to running. Note that after 1464 * we transition to running, we will start sending probes again (if 1465 * test addresses are configured), which may also reveal that the 1466 * interface is in fact failed. 1467 */ 1468 if (pi->pi_flags & IFF_FAILED) { 1469 phyint_chstate(pi, PI_FAILED); 1470 } else { 1471 /* calls phyint_chstate() */ 1472 phyint_transition_to_running(pi); 1473 } 1474 1475 /* 1476 * Give the requestor time to configure test addresses before 1477 * complaining that they're missing. 1478 */ 1479 pi->pi_taddrthresh = getcurrentsec() + TESTADDR_CONF_TIME; 1480 1481 return (IPMP_SUCCESS); 1482 } 1483 1484 /* 1485 * Delete (unlink and free), the phyint instance. 1486 */ 1487 void 1488 phyint_inst_delete(struct phyint_instance *pii) 1489 { 1490 struct phyint *pi = pii->pii_phyint; 1491 1492 assert(pi != NULL); 1493 1494 if (debug & D_PHYINT) { 1495 logdebug("phyint_inst_delete(%s %s)\n", 1496 AF_STR(pii->pii_af), pi->pi_name); 1497 } 1498 1499 /* 1500 * If the phyint instance has associated probe targets 1501 * delete all the targets 1502 */ 1503 while (pii->pii_targets != NULL) 1504 target_delete(pii->pii_targets); 1505 1506 /* 1507 * Delete all the logints associated with this phyint 1508 * instance. 1509 */ 1510 while (pii->pii_logint != NULL) 1511 logint_delete(pii->pii_logint); 1512 1513 /* 1514 * Close the socket used to send probes to targets from this phyint. 1515 */ 1516 if (pii->pii_probe_sock != -1) 1517 close_probe_socket(pii, _B_TRUE); 1518 1519 /* 1520 * Phyint instance must be in the list of all phyint instances. 1521 * Remove phyint instance from the global list of phyint instances. 1522 */ 1523 assert(phyint_instances == pii || pii->pii_prev != NULL); 1524 if (pii->pii_prev == NULL) { 1525 /* Phyint is the 1st in the list */ 1526 phyint_instances = pii->pii_next; 1527 } else { 1528 pii->pii_prev->pii_next = pii->pii_next; 1529 } 1530 if (pii->pii_next != NULL) 1531 pii->pii_next->pii_prev = pii->pii_prev; 1532 pii->pii_next = NULL; 1533 pii->pii_prev = NULL; 1534 1535 /* 1536 * Reset the phyint instance pointer in the phyint. 1537 * If this is the last phyint instance (being deleted) on this 1538 * phyint, then delete the phyint. 1539 */ 1540 if (pii->pii_af == AF_INET) 1541 pi->pi_v4 = NULL; 1542 else 1543 pi->pi_v6 = NULL; 1544 1545 if (pi->pi_v4 == NULL && pi->pi_v6 == NULL) 1546 phyint_delete(pi); 1547 1548 free(pii); 1549 } 1550 1551 static void 1552 phyint_inst_print(struct phyint_instance *pii) 1553 { 1554 struct logint *li; 1555 struct target *tg; 1556 char abuf[INET6_ADDRSTRLEN]; 1557 int most_recent; 1558 int i; 1559 1560 if (pii->pii_phyint == NULL) { 1561 logdebug("pii->pi_phyint NULL can't print\n"); 1562 return; 1563 } 1564 1565 logdebug("\nPhyint instance: %s %s index %u state %x flags %llx " 1566 "sock %x in_use %d\n", 1567 AF_STR(pii->pii_af), pii->pii_name, pii->pii_ifindex, 1568 pii->pii_state, pii->pii_phyint->pi_flags, pii->pii_probe_sock, 1569 pii->pii_in_use); 1570 1571 for (li = pii->pii_logint; li != NULL; li = li->li_next) 1572 logint_print(li); 1573 1574 logdebug("\n"); 1575 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) 1576 target_print(tg); 1577 1578 if (pii->pii_targets == NULL) 1579 logdebug("pi_targets NULL\n"); 1580 1581 if (pii->pii_target_next != NULL) { 1582 logdebug("pi_target_next %s %s\n", AF_STR(pii->pii_af), 1583 pr_addr(pii->pii_af, pii->pii_target_next->tg_address, 1584 abuf, sizeof (abuf))); 1585 } else { 1586 logdebug("pi_target_next NULL\n"); 1587 } 1588 1589 if (pii->pii_rtt_target_next != NULL) { 1590 logdebug("pi_rtt_target_next %s %s\n", AF_STR(pii->pii_af), 1591 pr_addr(pii->pii_af, pii->pii_rtt_target_next->tg_address, 1592 abuf, sizeof (abuf))); 1593 } else { 1594 logdebug("pi_rtt_target_next NULL\n"); 1595 } 1596 1597 if (pii->pii_targets != NULL) { 1598 most_recent = PROBE_INDEX_PREV(pii->pii_probe_next); 1599 1600 i = most_recent; 1601 do { 1602 if (pii->pii_probes[i].pr_target != NULL) { 1603 logdebug("#%d target %s ", i, 1604 pr_addr(pii->pii_af, 1605 pii->pii_probes[i].pr_target->tg_address, 1606 abuf, sizeof (abuf))); 1607 } else { 1608 logdebug("#%d target NULL ", i); 1609 } 1610 logdebug("time_start %lld status %d " 1611 "time_ackproc %lld time_lost %u", 1612 pii->pii_probes[i].pr_hrtime_start, 1613 pii->pii_probes[i].pr_status, 1614 pii->pii_probes[i].pr_hrtime_ackproc, 1615 pii->pii_probes[i].pr_time_lost); 1616 i = PROBE_INDEX_PREV(i); 1617 } while (i != most_recent); 1618 } 1619 } 1620 1621 /* 1622 * Lookup a logint based on the logical interface name, on the given 1623 * phyint instance. 1624 */ 1625 static struct logint * 1626 logint_lookup(struct phyint_instance *pii, char *name) 1627 { 1628 struct logint *li; 1629 1630 if (debug & D_LOGINT) { 1631 logdebug("logint_lookup(%s, %s)\n", 1632 AF_STR(pii->pii_af), name); 1633 } 1634 1635 for (li = pii->pii_logint; li != NULL; li = li->li_next) { 1636 if (strncmp(name, li->li_name, sizeof (li->li_name)) == 0) 1637 break; 1638 } 1639 return (li); 1640 } 1641 1642 /* 1643 * Insert a logint at the head of the list of logints of the given 1644 * phyint instance 1645 */ 1646 static void 1647 logint_insert(struct phyint_instance *pii, struct logint *li) 1648 { 1649 li->li_next = pii->pii_logint; 1650 li->li_prev = NULL; 1651 if (pii->pii_logint != NULL) 1652 pii->pii_logint->li_prev = li; 1653 pii->pii_logint = li; 1654 li->li_phyint_inst = pii; 1655 } 1656 1657 /* 1658 * Create a new named logint, on the specified phyint instance. 1659 */ 1660 static struct logint * 1661 logint_create(struct phyint_instance *pii, char *name) 1662 { 1663 struct logint *li; 1664 1665 if (debug & D_LOGINT) { 1666 logdebug("logint_create(%s %s %s)\n", 1667 AF_STR(pii->pii_af), pii->pii_name, name); 1668 } 1669 1670 li = calloc(1, sizeof (struct logint)); 1671 if (li == NULL) { 1672 logperror("logint_create: calloc"); 1673 return (NULL); 1674 } 1675 1676 (void) strncpy(li->li_name, name, sizeof (li->li_name)); 1677 li->li_name[sizeof (li->li_name) - 1] = '\0'; 1678 logint_insert(pii, li); 1679 return (li); 1680 } 1681 1682 /* 1683 * Initialize the logint based on the data returned by the kernel. 1684 */ 1685 void 1686 logint_init_from_k(struct phyint_instance *pii, char *li_name) 1687 { 1688 int ifsock; 1689 uint64_t flags; 1690 uint64_t saved_flags; 1691 struct logint *li; 1692 struct lifreq lifr; 1693 struct in6_addr test_subnet; 1694 struct in6_addr testaddr; 1695 int test_subnet_len; 1696 struct sockaddr_in6 *sin6; 1697 struct sockaddr_in *sin; 1698 char abuf[INET6_ADDRSTRLEN]; 1699 boolean_t ptp = _B_FALSE; 1700 struct in6_addr tgaddr; 1701 1702 if (debug & D_LOGINT) { 1703 logdebug("logint_init_from_k(%s %s)\n", 1704 AF_STR(pii->pii_af), li_name); 1705 } 1706 1707 /* Get the socket for doing ioctls */ 1708 ifsock = (pii->pii_af == AF_INET) ? ifsock_v4 : ifsock_v6; 1709 1710 /* 1711 * Get the flags from the kernel. Also serves as a check whether 1712 * the logical still exists. If it doesn't exist, no need to proceed 1713 * any further. li_in_use will make the caller clean up the logint 1714 */ 1715 (void) strncpy(lifr.lifr_name, li_name, sizeof (lifr.lifr_name)); 1716 lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0'; 1717 if (ioctl(ifsock, SIOCGLIFFLAGS, (char *)&lifr) < 0) { 1718 /* Interface may have vanished */ 1719 if (errno != ENXIO) { 1720 logperror_pii(pii, "logint_init_from_k: " 1721 "ioctl (get flags)"); 1722 } 1723 return; 1724 } 1725 1726 flags = lifr.lifr_flags; 1727 1728 /* 1729 * Verified the logint exists. Now lookup the logint in our tables. 1730 * If it does not exist, create a new logint. 1731 */ 1732 li = logint_lookup(pii, li_name); 1733 if (li == NULL) { 1734 li = logint_create(pii, li_name); 1735 if (li == NULL) { 1736 /* 1737 * Pretend the interface does not exist 1738 * in the kernel 1739 */ 1740 return; 1741 } 1742 } 1743 1744 /* 1745 * Update li->li_flags with the new flags, after saving the old 1746 * value. This is used later to check what flags has changed and 1747 * take any action 1748 */ 1749 saved_flags = li->li_flags; 1750 li->li_flags = flags; 1751 1752 /* 1753 * Get the address, prefix, prefixlength and update the logint. 1754 * Check if anything has changed. If the logint used for the 1755 * test address has changed, take suitable action. 1756 */ 1757 if (ioctl(ifsock, SIOCGLIFADDR, (char *)&lifr) < 0) { 1758 /* Interface may have vanished */ 1759 if (errno != ENXIO) { 1760 logperror_li(li, "logint_init_from_k: (get addr)"); 1761 } 1762 goto error; 1763 } 1764 1765 if (pii->pii_af == AF_INET) { 1766 sin = (struct sockaddr_in *)&lifr.lifr_addr; 1767 IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &testaddr); 1768 } else { 1769 sin6 = (struct sockaddr_in6 *)&lifr.lifr_addr; 1770 testaddr = sin6->sin6_addr; 1771 } 1772 1773 if (ioctl(ifsock, SIOCGLIFSUBNET, (char *)&lifr) < 0) { 1774 /* Interface may have vanished */ 1775 if (errno != ENXIO) 1776 logperror_li(li, "logint_init_from_k: (get subnet)"); 1777 goto error; 1778 } 1779 if (lifr.lifr_subnet.ss_family == AF_INET6) { 1780 sin6 = (struct sockaddr_in6 *)&lifr.lifr_subnet; 1781 test_subnet = sin6->sin6_addr; 1782 test_subnet_len = lifr.lifr_addrlen; 1783 } else { 1784 sin = (struct sockaddr_in *)&lifr.lifr_subnet; 1785 IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &test_subnet); 1786 test_subnet_len = lifr.lifr_addrlen + (IPV6_ABITS - IP_ABITS); 1787 } 1788 1789 /* 1790 * If this is the logint corresponding to the test address used for 1791 * sending probes, then if anything significant has changed we need to 1792 * determine the test address again. We ignore changes to the 1793 * IFF_FAILED and IFF_RUNNING flags since those happen as a matter of 1794 * course. 1795 */ 1796 if (pii->pii_probe_logint == li) { 1797 if (((li->li_flags ^ saved_flags) & 1798 ~(IFF_FAILED | IFF_RUNNING)) != 0 || 1799 !IN6_ARE_ADDR_EQUAL(&testaddr, &li->li_addr) || 1800 (!ptp && !IN6_ARE_ADDR_EQUAL(&test_subnet, 1801 &li->li_subnet)) || 1802 (!ptp && test_subnet_len != li->li_subnet_len) || 1803 (ptp && !IN6_ARE_ADDR_EQUAL(&tgaddr, &li->li_dstaddr))) { 1804 /* 1805 * Something significant that affects the testaddress 1806 * has changed. Redo the testaddress selection later on 1807 * in select_test_ifs(). For now do the cleanup and 1808 * set pii_probe_logint to NULL. 1809 */ 1810 if (pii->pii_probe_sock != -1) 1811 close_probe_socket(pii, _B_TRUE); 1812 pii->pii_probe_logint = NULL; 1813 } 1814 } 1815 1816 1817 /* Update the logint with the values obtained from the kernel. */ 1818 li->li_addr = testaddr; 1819 li->li_in_use = 1; 1820 if (ptp) { 1821 li->li_dstaddr = tgaddr; 1822 li->li_subnet_len = (pii->pii_af == AF_INET) ? 1823 IP_ABITS : IPV6_ABITS; 1824 } else { 1825 li->li_subnet = test_subnet; 1826 li->li_subnet_len = test_subnet_len; 1827 } 1828 1829 if (debug & D_LOGINT) 1830 logint_print(li); 1831 1832 return; 1833 1834 error: 1835 logerr("logint_init_from_k: IGNORED %s %s %s addr %s\n", 1836 AF_STR(pii->pii_af), pii->pii_name, li->li_name, 1837 pr_addr(pii->pii_af, testaddr, abuf, sizeof (abuf))); 1838 logint_delete(li); 1839 } 1840 1841 /* 1842 * Delete (unlink and free) a logint. 1843 */ 1844 void 1845 logint_delete(struct logint *li) 1846 { 1847 struct phyint_instance *pii; 1848 1849 pii = li->li_phyint_inst; 1850 assert(pii != NULL); 1851 1852 if (debug & D_LOGINT) { 1853 int af; 1854 char abuf[INET6_ADDRSTRLEN]; 1855 1856 af = pii->pii_af; 1857 logdebug("logint_delete(%s %s %s/%u)\n", 1858 AF_STR(af), li->li_name, 1859 pr_addr(af, li->li_addr, abuf, sizeof (abuf)), 1860 li->li_subnet_len); 1861 } 1862 1863 /* logint must be in the list of logints */ 1864 assert(pii->pii_logint == li || li->li_prev != NULL); 1865 1866 /* Remove the logint from the list of logints */ 1867 if (li->li_prev == NULL) { 1868 /* logint is the 1st in the list */ 1869 pii->pii_logint = li->li_next; 1870 } else { 1871 li->li_prev->li_next = li->li_next; 1872 } 1873 if (li->li_next != NULL) 1874 li->li_next->li_prev = li->li_prev; 1875 li->li_next = NULL; 1876 li->li_prev = NULL; 1877 1878 /* 1879 * If this logint is also being used for probing, then close the 1880 * associated socket, if it exists. 1881 */ 1882 if (pii->pii_probe_logint == li) { 1883 if (pii->pii_probe_sock != -1) 1884 close_probe_socket(pii, _B_TRUE); 1885 pii->pii_probe_logint = NULL; 1886 } 1887 1888 free(li); 1889 } 1890 1891 static void 1892 logint_print(struct logint *li) 1893 { 1894 char abuf[INET6_ADDRSTRLEN]; 1895 int af = li->li_phyint_inst->pii_af; 1896 1897 logdebug("logint: %s %s addr %s/%u", AF_STR(af), li->li_name, 1898 pr_addr(af, li->li_addr, abuf, sizeof (abuf)), li->li_subnet_len); 1899 1900 logdebug("\tFlags: %llx in_use %d\n", li->li_flags, li->li_in_use); 1901 } 1902 1903 char * 1904 pr_addr(int af, struct in6_addr addr, char *abuf, int len) 1905 { 1906 struct in_addr addr_v4; 1907 1908 if (af == AF_INET) { 1909 IN6_V4MAPPED_TO_INADDR(&addr, &addr_v4); 1910 (void) inet_ntop(AF_INET, (void *)&addr_v4, abuf, len); 1911 } else { 1912 (void) inet_ntop(AF_INET6, (void *)&addr, abuf, len); 1913 } 1914 return (abuf); 1915 } 1916 1917 /* 1918 * Fill in the sockaddr_storage pointed to by `ssp' with the IP address 1919 * represented by the [`af',`addr'] pair. Needed because in.mpathd internally 1920 * stores all addresses as in6_addrs, but we don't want to expose that. 1921 */ 1922 void 1923 addr2storage(int af, const struct in6_addr *addr, struct sockaddr_storage *ssp) 1924 { 1925 struct sockaddr_in *sinp = (struct sockaddr_in *)ssp; 1926 struct sockaddr_in6 *sin6p = (struct sockaddr_in6 *)ssp; 1927 1928 assert(af == AF_INET || af == AF_INET6); 1929 1930 switch (af) { 1931 case AF_INET: 1932 (void) memset(sinp, 0, sizeof (*sinp)); 1933 sinp->sin_family = AF_INET; 1934 IN6_V4MAPPED_TO_INADDR(addr, &sinp->sin_addr); 1935 break; 1936 case AF_INET6: 1937 (void) memset(sin6p, 0, sizeof (*sin6p)); 1938 sin6p->sin6_family = AF_INET6; 1939 sin6p->sin6_addr = *addr; 1940 break; 1941 } 1942 } 1943 1944 /* Lookup target on its address */ 1945 struct target * 1946 target_lookup(struct phyint_instance *pii, struct in6_addr addr) 1947 { 1948 struct target *tg; 1949 1950 if (debug & D_TARGET) { 1951 char abuf[INET6_ADDRSTRLEN]; 1952 1953 logdebug("target_lookup(%s %s): addr %s\n", 1954 AF_STR(pii->pii_af), pii->pii_name, 1955 pr_addr(pii->pii_af, addr, abuf, sizeof (abuf))); 1956 } 1957 1958 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) { 1959 if (IN6_ARE_ADDR_EQUAL(&tg->tg_address, &addr)) 1960 break; 1961 } 1962 return (tg); 1963 } 1964 1965 /* 1966 * Find and return the next active target, for the next probe. 1967 * If no active targets are available, return NULL. 1968 */ 1969 struct target * 1970 target_next(struct target *tg) 1971 { 1972 struct phyint_instance *pii = tg->tg_phyint_inst; 1973 struct target *marker = tg; 1974 hrtime_t now; 1975 1976 now = gethrtime(); 1977 1978 /* 1979 * Target must be in the list of targets for this phyint 1980 * instance. 1981 */ 1982 assert(pii->pii_targets == tg || tg->tg_prev != NULL); 1983 assert(pii->pii_targets != NULL); 1984 1985 /* Return the next active target */ 1986 do { 1987 /* 1988 * Go to the next target. If we hit the end, 1989 * reset the ptr to the head 1990 */ 1991 tg = tg->tg_next; 1992 if (tg == NULL) 1993 tg = pii->pii_targets; 1994 1995 assert(TG_STATUS_VALID(tg->tg_status)); 1996 1997 switch (tg->tg_status) { 1998 case TG_ACTIVE: 1999 return (tg); 2000 2001 case TG_UNUSED: 2002 assert(pii->pii_targets_are_routers); 2003 if (pii->pii_ntargets < MAX_PROBE_TARGETS) { 2004 /* 2005 * Bubble up the unused target to active 2006 */ 2007 tg->tg_status = TG_ACTIVE; 2008 pii->pii_ntargets++; 2009 return (tg); 2010 } 2011 break; 2012 2013 case TG_SLOW: 2014 assert(pii->pii_targets_are_routers); 2015 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2016 /* 2017 * Bubble up the slow target to unused 2018 */ 2019 tg->tg_status = TG_UNUSED; 2020 } 2021 break; 2022 2023 case TG_DEAD: 2024 assert(pii->pii_targets_are_routers); 2025 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2026 /* 2027 * Bubble up the dead target to slow 2028 */ 2029 tg->tg_status = TG_SLOW; 2030 tg->tg_latime = now; 2031 } 2032 break; 2033 } 2034 2035 } while (tg != marker); 2036 2037 return (NULL); 2038 } 2039 2040 /* 2041 * Select the best available target, that is not already TG_ACTIVE, 2042 * for the caller. The caller will determine whether it wants to 2043 * make the returned target TG_ACTIVE. 2044 * The selection order is as follows. 2045 * 1. pick a TG_UNSED target, if it exists. 2046 * 2. else pick a TG_SLOW target that has recovered, if it exists 2047 * 3. else pick any TG_SLOW target, if it exists 2048 * 4. else pick a TG_DEAD target that has recovered, if it exists 2049 * 5. else pick any TG_DEAD target, if it exists 2050 * 6. else return null 2051 */ 2052 static struct target * 2053 target_select_best(struct phyint_instance *pii) 2054 { 2055 struct target *tg; 2056 struct target *slow = NULL; 2057 struct target *dead = NULL; 2058 struct target *slow_recovered = NULL; 2059 struct target *dead_recovered = NULL; 2060 hrtime_t now; 2061 2062 now = gethrtime(); 2063 2064 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) { 2065 assert(TG_STATUS_VALID(tg->tg_status)); 2066 2067 switch (tg->tg_status) { 2068 case TG_UNUSED: 2069 return (tg); 2070 2071 case TG_SLOW: 2072 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2073 slow_recovered = tg; 2074 /* 2075 * Promote the slow_recovered to unused 2076 */ 2077 tg->tg_status = TG_UNUSED; 2078 } else { 2079 slow = tg; 2080 } 2081 break; 2082 2083 case TG_DEAD: 2084 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2085 dead_recovered = tg; 2086 /* 2087 * Promote the dead_recovered to slow 2088 */ 2089 tg->tg_status = TG_SLOW; 2090 tg->tg_latime = now; 2091 } else { 2092 dead = tg; 2093 } 2094 break; 2095 2096 default: 2097 break; 2098 } 2099 } 2100 2101 if (slow_recovered != NULL) 2102 return (slow_recovered); 2103 else if (slow != NULL) 2104 return (slow); 2105 else if (dead_recovered != NULL) 2106 return (dead_recovered); 2107 else 2108 return (dead); 2109 } 2110 2111 /* 2112 * Some target was deleted. If we don't have even MIN_PROBE_TARGETS 2113 * that are active, pick the next best below. 2114 */ 2115 static void 2116 target_activate_all(struct phyint_instance *pii) 2117 { 2118 struct target *tg; 2119 2120 assert(pii->pii_ntargets == 0); 2121 assert(pii->pii_target_next == NULL); 2122 assert(pii->pii_rtt_target_next == NULL); 2123 assert(pii->pii_targets_are_routers); 2124 2125 while (pii->pii_ntargets < MIN_PROBE_TARGETS) { 2126 tg = target_select_best(pii); 2127 if (tg == NULL) { 2128 /* We are out of targets */ 2129 return; 2130 } 2131 2132 assert(TG_STATUS_VALID(tg->tg_status)); 2133 assert(tg->tg_status != TG_ACTIVE); 2134 tg->tg_status = TG_ACTIVE; 2135 pii->pii_ntargets++; 2136 if (pii->pii_target_next == NULL) { 2137 pii->pii_target_next = tg; 2138 pii->pii_rtt_target_next = tg; 2139 } 2140 } 2141 } 2142 2143 static struct target * 2144 target_first(struct phyint_instance *pii) 2145 { 2146 struct target *tg; 2147 2148 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) { 2149 assert(TG_STATUS_VALID(tg->tg_status)); 2150 if (tg->tg_status == TG_ACTIVE) 2151 break; 2152 } 2153 2154 return (tg); 2155 } 2156 2157 /* 2158 * Create a default target entry. 2159 */ 2160 void 2161 target_create(struct phyint_instance *pii, struct in6_addr addr, 2162 boolean_t is_router) 2163 { 2164 struct target *tg; 2165 struct phyint *pi; 2166 struct logint *li; 2167 2168 if (debug & D_TARGET) { 2169 char abuf[INET6_ADDRSTRLEN]; 2170 2171 logdebug("target_create(%s %s, %s)\n", 2172 AF_STR(pii->pii_af), pii->pii_name, 2173 pr_addr(pii->pii_af, addr, abuf, sizeof (abuf))); 2174 } 2175 2176 /* 2177 * If the test address is not yet initialized, do not add 2178 * any target, since we cannot determine whether the target 2179 * belongs to the same subnet as the test address. 2180 */ 2181 li = pii->pii_probe_logint; 2182 if (li == NULL) 2183 return; 2184 2185 /* 2186 * If there are multiple subnets associated with an interface, then 2187 * add the target to this phyint instance only if it belongs to the 2188 * same subnet as the test address. This assures us that we will 2189 * be able to reach this target through our routing table. 2190 */ 2191 if (!prefix_equal(li->li_subnet, addr, li->li_subnet_len)) 2192 return; 2193 2194 if (pii->pii_targets != NULL) { 2195 assert(pii->pii_ntargets <= MAX_PROBE_TARGETS); 2196 if (is_router) { 2197 if (!pii->pii_targets_are_routers) { 2198 /* 2199 * Prefer router over hosts. Using hosts is a 2200 * fallback mechanism, hence delete all host 2201 * targets. 2202 */ 2203 while (pii->pii_targets != NULL) 2204 target_delete(pii->pii_targets); 2205 } 2206 } else { 2207 /* 2208 * Routers take precedence over hosts. If this 2209 * is a router list and we are trying to add a 2210 * host, just return. If this is a host list 2211 * and if we have sufficient targets, just return 2212 */ 2213 if (pii->pii_targets_are_routers || 2214 pii->pii_ntargets == MAX_PROBE_TARGETS) 2215 return; 2216 } 2217 } 2218 2219 tg = calloc(1, sizeof (struct target)); 2220 if (tg == NULL) { 2221 logperror("target_create: calloc"); 2222 return; 2223 } 2224 2225 tg->tg_phyint_inst = pii; 2226 tg->tg_address = addr; 2227 tg->tg_in_use = 1; 2228 tg->tg_rtt_sa = -1; 2229 tg->tg_num_deferred = 0; 2230 2231 /* 2232 * If this is the first target, set 'pii_targets_are_routers' 2233 * The list of targets is either a list of hosts or list or 2234 * routers, but not a mix. 2235 */ 2236 if (pii->pii_targets == NULL) { 2237 assert(pii->pii_ntargets == 0); 2238 assert(pii->pii_target_next == NULL); 2239 assert(pii->pii_rtt_target_next == NULL); 2240 pii->pii_targets_are_routers = is_router ? 1 : 0; 2241 } 2242 2243 if (pii->pii_ntargets == MAX_PROBE_TARGETS) { 2244 assert(pii->pii_targets_are_routers); 2245 assert(pii->pii_target_next != NULL); 2246 assert(pii->pii_rtt_target_next != NULL); 2247 tg->tg_status = TG_UNUSED; 2248 } else { 2249 if (pii->pii_ntargets == 0) { 2250 assert(pii->pii_target_next == NULL); 2251 pii->pii_target_next = tg; 2252 pii->pii_rtt_target_next = tg; 2253 } 2254 pii->pii_ntargets++; 2255 tg->tg_status = TG_ACTIVE; 2256 } 2257 2258 target_insert(pii, tg); 2259 2260 /* 2261 * Change state to PI_RUNNING if this phyint instance is capable of 2262 * sending and receiving probes -- that is, if we know of at least 1 2263 * target, and this phyint instance is probe-capable. For more 2264 * details, see the phyint state diagram in mpd_probe.c. 2265 */ 2266 pi = pii->pii_phyint; 2267 if (pi->pi_state == PI_NOTARGETS && PROBE_CAPABLE(pii)) { 2268 if (pi->pi_flags & IFF_FAILED) 2269 phyint_chstate(pi, PI_FAILED); 2270 else 2271 phyint_chstate(pi, PI_RUNNING); 2272 } 2273 } 2274 2275 /* 2276 * Add the target address named by `addr' to phyint instance `pii' if it does 2277 * not already exist. If the target is a router, `is_router' should be set to 2278 * B_TRUE. 2279 */ 2280 void 2281 target_add(struct phyint_instance *pii, struct in6_addr addr, 2282 boolean_t is_router) 2283 { 2284 struct target *tg; 2285 2286 if (pii == NULL) 2287 return; 2288 2289 tg = target_lookup(pii, addr); 2290 2291 /* 2292 * If the target does not exist, create it; target_create() will set 2293 * tg_in_use to true. Even if it exists already, if it's a router 2294 * target and we'd previously learned of it through multicast, then we 2295 * need to recreate it as a router target. Otherwise, just set 2296 * tg_in_use to to true so that init_router_targets() won't delete it. 2297 */ 2298 if (tg == NULL || (is_router && !pii->pii_targets_are_routers)) 2299 target_create(pii, addr, is_router); 2300 else if (is_router) 2301 tg->tg_in_use = 1; 2302 } 2303 2304 /* 2305 * Insert target at head of linked list of targets for the associated 2306 * phyint instance 2307 */ 2308 static void 2309 target_insert(struct phyint_instance *pii, struct target *tg) 2310 { 2311 tg->tg_next = pii->pii_targets; 2312 tg->tg_prev = NULL; 2313 if (tg->tg_next != NULL) 2314 tg->tg_next->tg_prev = tg; 2315 pii->pii_targets = tg; 2316 } 2317 2318 /* 2319 * Delete a target (unlink and free). 2320 */ 2321 void 2322 target_delete(struct target *tg) 2323 { 2324 int af; 2325 struct phyint_instance *pii; 2326 struct phyint_instance *pii_other; 2327 2328 pii = tg->tg_phyint_inst; 2329 af = pii->pii_af; 2330 2331 if (debug & D_TARGET) { 2332 char abuf[INET6_ADDRSTRLEN]; 2333 2334 logdebug("target_delete(%s %s, %s)\n", 2335 AF_STR(af), pii->pii_name, 2336 pr_addr(af, tg->tg_address, abuf, sizeof (abuf))); 2337 } 2338 2339 /* 2340 * Target must be in the list of targets for this phyint 2341 * instance. 2342 */ 2343 assert(pii->pii_targets == tg || tg->tg_prev != NULL); 2344 2345 /* 2346 * Reset all references to 'tg' in the probe information 2347 * for this phyint. 2348 */ 2349 reset_pii_probes(pii, tg); 2350 2351 /* 2352 * Remove this target from the list of targets of this 2353 * phyint instance. 2354 */ 2355 if (tg->tg_prev == NULL) { 2356 pii->pii_targets = tg->tg_next; 2357 } else { 2358 tg->tg_prev->tg_next = tg->tg_next; 2359 } 2360 2361 if (tg->tg_next != NULL) 2362 tg->tg_next->tg_prev = tg->tg_prev; 2363 2364 tg->tg_next = NULL; 2365 tg->tg_prev = NULL; 2366 2367 if (tg->tg_status == TG_ACTIVE) 2368 pii->pii_ntargets--; 2369 2370 /* 2371 * Adjust the next target to probe, if it points to 2372 * to the currently deleted target. 2373 */ 2374 if (pii->pii_target_next == tg) 2375 pii->pii_target_next = target_first(pii); 2376 2377 if (pii->pii_rtt_target_next == tg) 2378 pii->pii_rtt_target_next = target_first(pii); 2379 2380 free(tg); 2381 2382 /* 2383 * The number of active targets pii_ntargets == 0 iff 2384 * the next active target pii->pii_target_next == NULL 2385 */ 2386 if (pii->pii_ntargets != 0) { 2387 assert(pii->pii_target_next != NULL); 2388 assert(pii->pii_rtt_target_next != NULL); 2389 assert(pii->pii_target_next->tg_status == TG_ACTIVE); 2390 assert(pii->pii_rtt_target_next->tg_status == TG_ACTIVE); 2391 return; 2392 } 2393 2394 /* At this point, we don't have any active targets. */ 2395 assert(pii->pii_target_next == NULL); 2396 assert(pii->pii_rtt_target_next == NULL); 2397 2398 if (pii->pii_targets_are_routers) { 2399 /* 2400 * Activate any TG_SLOW or TG_DEAD router targets, 2401 * since we don't have any other targets 2402 */ 2403 target_activate_all(pii); 2404 2405 if (pii->pii_ntargets != 0) { 2406 assert(pii->pii_target_next != NULL); 2407 assert(pii->pii_rtt_target_next != NULL); 2408 assert(pii->pii_target_next->tg_status == TG_ACTIVE); 2409 assert(pii->pii_rtt_target_next->tg_status == 2410 TG_ACTIVE); 2411 return; 2412 } 2413 } 2414 2415 /* 2416 * If we still don't have any active targets, the list must 2417 * must be really empty. There aren't even TG_SLOW or TG_DEAD 2418 * targets. Zero out the probe stats since it will not be 2419 * relevant any longer. 2420 */ 2421 assert(pii->pii_targets == NULL); 2422 pii->pii_targets_are_routers = _B_FALSE; 2423 clear_pii_probe_stats(pii); 2424 pii_other = phyint_inst_other(pii); 2425 2426 /* 2427 * If there are no targets on both instances and the interface would 2428 * otherwise be considered PI_RUNNING, go back to PI_NOTARGETS state, 2429 * since we cannot probe this phyint any more. For more details, 2430 * please see phyint state diagram in mpd_probe.c. 2431 */ 2432 if (!PROBE_CAPABLE(pii_other) && LINK_UP(pii->pii_phyint) && 2433 pii->pii_phyint->pi_state != PI_OFFLINE) 2434 phyint_chstate(pii->pii_phyint, PI_NOTARGETS); 2435 } 2436 2437 /* 2438 * Flush the target list of every phyint in the group, if the list 2439 * is a host target list. This is called if group failure is suspected. 2440 * If all targets have failed, multicast will subsequently discover new 2441 * targets. Else it is a group failure. 2442 * Note: This function is a no-op if the list is a router target list. 2443 */ 2444 static void 2445 target_flush_hosts(struct phyint_group *pg) 2446 { 2447 struct phyint *pi; 2448 struct phyint_instance *pii; 2449 2450 if (debug & D_TARGET) 2451 logdebug("target_flush_hosts(%s)\n", pg->pg_name); 2452 2453 for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) { 2454 pii = pi->pi_v4; 2455 if (pii != NULL && !pii->pii_targets_are_routers) { 2456 /* 2457 * Delete all the targets. When the list becomes 2458 * empty, target_delete() will set pii->pii_targets 2459 * to NULL. 2460 */ 2461 while (pii->pii_targets != NULL) 2462 target_delete(pii->pii_targets); 2463 } 2464 pii = pi->pi_v6; 2465 if (pii != NULL && !pii->pii_targets_are_routers) { 2466 /* 2467 * Delete all the targets. When the list becomes 2468 * empty, target_delete() will set pii->pii_targets 2469 * to NULL. 2470 */ 2471 while (pii->pii_targets != NULL) 2472 target_delete(pii->pii_targets); 2473 } 2474 } 2475 } 2476 2477 /* 2478 * Reset all references to 'target' in the probe info, as this target is 2479 * being deleted. The pr_target field is guaranteed to be non-null if 2480 * pr_status is PR_UNACKED. So we change the pr_status to PR_LOST, so that 2481 * pr_target will not be accessed unconditionally. 2482 */ 2483 static void 2484 reset_pii_probes(struct phyint_instance *pii, struct target *tg) 2485 { 2486 int i; 2487 2488 for (i = 0; i < PROBE_STATS_COUNT; i++) { 2489 if (pii->pii_probes[i].pr_target == tg) { 2490 if (pii->pii_probes[i].pr_status == PR_UNACKED) { 2491 probe_chstate(&pii->pii_probes[i], pii, 2492 PR_LOST); 2493 } 2494 pii->pii_probes[i].pr_target = NULL; 2495 } 2496 } 2497 2498 } 2499 2500 /* 2501 * Clear the probe statistics array. 2502 */ 2503 void 2504 clear_pii_probe_stats(struct phyint_instance *pii) 2505 { 2506 bzero(pii->pii_probes, sizeof (struct probe_stats) * PROBE_STATS_COUNT); 2507 /* Reset the next probe index in the probe stats array */ 2508 pii->pii_probe_next = 0; 2509 } 2510 2511 static void 2512 target_print(struct target *tg) 2513 { 2514 char abuf[INET6_ADDRSTRLEN]; 2515 char buf[128]; 2516 char buf2[128]; 2517 int af; 2518 int i; 2519 2520 af = tg->tg_phyint_inst->pii_af; 2521 2522 logdebug("Target on %s %s addr %s\n" 2523 "status %d rtt_sa %lld rtt_sd %lld crtt %d tg_in_use %d\n", 2524 AF_STR(af), tg->tg_phyint_inst->pii_name, 2525 pr_addr(af, tg->tg_address, abuf, sizeof (abuf)), 2526 tg->tg_status, tg->tg_rtt_sa, tg->tg_rtt_sd, 2527 tg->tg_crtt, tg->tg_in_use); 2528 2529 buf[0] = '\0'; 2530 for (i = 0; i < tg->tg_num_deferred; i++) { 2531 (void) snprintf(buf2, sizeof (buf2), " %dms", 2532 tg->tg_deferred[i]); 2533 (void) strlcat(buf, buf2, sizeof (buf)); 2534 } 2535 logdebug("deferred rtts:%s\n", buf); 2536 } 2537 2538 void 2539 phyint_inst_print_all(void) 2540 { 2541 struct phyint_instance *pii; 2542 2543 for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) { 2544 phyint_inst_print(pii); 2545 } 2546 } 2547 2548 /* 2549 * Compare two prefixes that have the same prefix length. 2550 * Fails if the prefix length is unreasonable. 2551 */ 2552 boolean_t 2553 prefix_equal(struct in6_addr p1, struct in6_addr p2, uint_t prefix_len) 2554 { 2555 uchar_t mask; 2556 int j; 2557 2558 if (prefix_len > IPV6_ABITS) 2559 return (_B_FALSE); 2560 2561 for (j = 0; prefix_len > 8; prefix_len -= 8, j++) 2562 if (p1.s6_addr[j] != p2.s6_addr[j]) 2563 return (_B_FALSE); 2564 2565 /* Make the N leftmost bits one */ 2566 mask = 0xff << (8 - prefix_len); 2567 if ((p1.s6_addr[j] & mask) != (p2.s6_addr[j] & mask)) 2568 return (_B_FALSE); 2569 2570 return (_B_TRUE); 2571 } 2572 2573 /* 2574 * Get the number of UP logints on phyint `pi'. 2575 */ 2576 static int 2577 logint_upcount(struct phyint *pi) 2578 { 2579 struct logint *li; 2580 int count = 0; 2581 2582 if (pi->pi_v4 != NULL) { 2583 for (li = pi->pi_v4->pii_logint; li != NULL; li = li->li_next) { 2584 if (li->li_flags & IFF_UP) 2585 count++; 2586 } 2587 } 2588 2589 if (pi->pi_v6 != NULL) { 2590 for (li = pi->pi_v6->pii_logint; li != NULL; li = li->li_next) { 2591 if (li->li_flags & IFF_UP) 2592 count++; 2593 } 2594 } 2595 2596 return (count); 2597 } 2598 2599 /* 2600 * Get the phyint instance with the other (IPv4 / IPv6) protocol 2601 */ 2602 struct phyint_instance * 2603 phyint_inst_other(struct phyint_instance *pii) 2604 { 2605 if (pii->pii_af == AF_INET) 2606 return (pii->pii_phyint->pi_v6); 2607 else 2608 return (pii->pii_phyint->pi_v4); 2609 } 2610 2611 /* 2612 * Check whether a phyint is functioning. 2613 */ 2614 static boolean_t 2615 phyint_is_functioning(struct phyint *pi) 2616 { 2617 if (pi->pi_state == PI_RUNNING) 2618 return (_B_TRUE); 2619 return (pi->pi_state == PI_NOTARGETS && !(pi->pi_flags & IFF_FAILED)); 2620 } 2621 2622 /* 2623 * Check whether a phyint is usable. 2624 */ 2625 static boolean_t 2626 phyint_is_usable(struct phyint *pi) 2627 { 2628 if (logint_upcount(pi) == 0) 2629 return (_B_FALSE); 2630 return (phyint_is_functioning(pi)); 2631 } 2632 2633 /* 2634 * Post an EC_IPMP sysevent of subclass `subclass' and attributes `nvl'. 2635 * Before sending the event, it prepends the current version of the IPMP 2636 * sysevent API. Returns 0 on success, -1 on failure (in either case, 2637 * `nvl' is freed). 2638 */ 2639 static int 2640 post_event(const char *subclass, nvlist_t *nvl) 2641 { 2642 static evchan_t *evchp = NULL; 2643 2644 /* 2645 * Initialize the event channel if we haven't already done so. 2646 */ 2647 if (evchp == NULL) { 2648 errno = sysevent_evc_bind(IPMP_EVENT_CHAN, &evchp, EVCH_CREAT); 2649 if (errno != 0) { 2650 logerr("cannot create event channel `%s': %s\n", 2651 IPMP_EVENT_CHAN, strerror(errno)); 2652 goto failed; 2653 } 2654 } 2655 2656 errno = nvlist_add_uint32(nvl, IPMP_EVENT_VERSION, 2657 IPMP_EVENT_CUR_VERSION); 2658 if (errno != 0) { 2659 logerr("cannot create `%s' event: %s", subclass, 2660 strerror(errno)); 2661 goto failed; 2662 } 2663 2664 errno = sysevent_evc_publish(evchp, EC_IPMP, subclass, "com.sun", 2665 "in.mpathd", nvl, EVCH_NOSLEEP); 2666 if (errno != 0) { 2667 logerr("cannot send `%s' event: %s\n", subclass, 2668 strerror(errno)); 2669 goto failed; 2670 } 2671 2672 nvlist_free(nvl); 2673 return (0); 2674 failed: 2675 nvlist_free(nvl); 2676 return (-1); 2677 } 2678 2679 /* 2680 * Return the external IPMP state associated with phyint `pi'. 2681 */ 2682 static ipmp_if_state_t 2683 ifstate(struct phyint *pi) 2684 { 2685 switch (pi->pi_state) { 2686 case PI_NOTARGETS: 2687 if (pi->pi_flags & IFF_FAILED) 2688 return (IPMP_IF_FAILED); 2689 return (IPMP_IF_UNKNOWN); 2690 2691 case PI_OFFLINE: 2692 return (IPMP_IF_OFFLINE); 2693 2694 case PI_FAILED: 2695 return (IPMP_IF_FAILED); 2696 2697 case PI_RUNNING: 2698 return (IPMP_IF_OK); 2699 } 2700 2701 logerr("ifstate: unknown state %d; aborting\n", pi->pi_state); 2702 abort(); 2703 /* NOTREACHED */ 2704 } 2705 2706 /* 2707 * Return the external IPMP interface type associated with phyint `pi'. 2708 */ 2709 static ipmp_if_type_t 2710 iftype(struct phyint *pi) 2711 { 2712 if (pi->pi_flags & IFF_STANDBY) 2713 return (IPMP_IF_STANDBY); 2714 else 2715 return (IPMP_IF_NORMAL); 2716 } 2717 2718 /* 2719 * Return the external IPMP link state associated with phyint `pi'. 2720 */ 2721 static ipmp_if_linkstate_t 2722 iflinkstate(struct phyint *pi) 2723 { 2724 if (!(pi->pi_notes & (DL_NOTE_LINK_UP|DL_NOTE_LINK_DOWN))) 2725 return (IPMP_LINK_UNKNOWN); 2726 2727 return (LINK_DOWN(pi) ? IPMP_LINK_DOWN : IPMP_LINK_UP); 2728 } 2729 2730 /* 2731 * Return the external IPMP probe state associated with phyint `pi'. 2732 */ 2733 static ipmp_if_probestate_t 2734 ifprobestate(struct phyint *pi) 2735 { 2736 if (!PROBE_ENABLED(pi->pi_v4) && !PROBE_ENABLED(pi->pi_v6)) 2737 return (IPMP_PROBE_DISABLED); 2738 2739 if (pi->pi_state == PI_FAILED) 2740 return (IPMP_PROBE_FAILED); 2741 2742 if (!PROBE_CAPABLE(pi->pi_v4) && !PROBE_CAPABLE(pi->pi_v6)) 2743 return (IPMP_PROBE_UNKNOWN); 2744 2745 return (IPMP_PROBE_OK); 2746 } 2747 2748 /* 2749 * Return the external IPMP target mode associated with phyint instance `pii'. 2750 */ 2751 static ipmp_if_targmode_t 2752 iftargmode(struct phyint_instance *pii) 2753 { 2754 if (!PROBE_ENABLED(pii)) 2755 return (IPMP_TARG_DISABLED); 2756 else if (pii->pii_targets_are_routers) 2757 return (IPMP_TARG_ROUTES); 2758 else 2759 return (IPMP_TARG_MULTICAST); 2760 } 2761 2762 /* 2763 * Return the external IPMP flags associated with phyint `pi'. 2764 */ 2765 static ipmp_if_flags_t 2766 ifflags(struct phyint *pi) 2767 { 2768 ipmp_if_flags_t flags = 0; 2769 2770 if (logint_upcount(pi) == 0) 2771 flags |= IPMP_IFFLAG_DOWN; 2772 if (pi->pi_flags & IFF_INACTIVE) 2773 flags |= IPMP_IFFLAG_INACTIVE; 2774 if (pi->pi_hwaddrdup) 2775 flags |= IPMP_IFFLAG_HWADDRDUP; 2776 if (phyint_is_functioning(pi) && flags == 0) 2777 flags |= IPMP_IFFLAG_ACTIVE; 2778 2779 return (flags); 2780 } 2781 2782 /* 2783 * Store the test address used on phyint instance `pii' in `ssp'. If there's 2784 * no test address, 0.0.0.0 is stored. 2785 */ 2786 static struct sockaddr_storage * 2787 iftestaddr(struct phyint_instance *pii, struct sockaddr_storage *ssp) 2788 { 2789 if (PROBE_ENABLED(pii)) 2790 addr2storage(pii->pii_af, &pii->pii_probe_logint->li_addr, ssp); 2791 else 2792 addr2storage(AF_INET6, &in6addr_any, ssp); 2793 2794 return (ssp); 2795 } 2796 2797 /* 2798 * Return the external IPMP group state associated with phyint group `pg'. 2799 */ 2800 static ipmp_group_state_t 2801 groupstate(struct phyint_group *pg) 2802 { 2803 switch (pg->pg_state) { 2804 case PG_FAILED: 2805 return (IPMP_GROUP_FAILED); 2806 case PG_DEGRADED: 2807 return (IPMP_GROUP_DEGRADED); 2808 case PG_OK: 2809 return (IPMP_GROUP_OK); 2810 } 2811 2812 logerr("groupstate: unknown state %d; aborting\n", pg->pg_state); 2813 abort(); 2814 /* NOTREACHED */ 2815 } 2816 2817 /* 2818 * Return the external IPMP probe state associated with probe `ps'. 2819 */ 2820 static ipmp_probe_state_t 2821 probestate(struct probe_stats *ps) 2822 { 2823 switch (ps->pr_status) { 2824 case PR_UNUSED: 2825 case PR_LOST: 2826 return (IPMP_PROBE_LOST); 2827 case PR_UNACKED: 2828 return (IPMP_PROBE_SENT); 2829 case PR_ACKED: 2830 return (IPMP_PROBE_ACKED); 2831 } 2832 2833 logerr("probestate: unknown state %d; aborting\n", ps->pr_status); 2834 abort(); 2835 /* NOTREACHED */ 2836 } 2837 2838 /* 2839 * Generate an ESC_IPMP_PROBE_STATE sysevent for the probe described by `pr' 2840 * on phyint instance `pii'. Returns 0 on success, -1 on failure. 2841 */ 2842 int 2843 probe_state_event(struct probe_stats *pr, struct phyint_instance *pii) 2844 { 2845 nvlist_t *nvl; 2846 hrtime_t proc_time = 0, recv_time = 0; 2847 struct sockaddr_storage ss; 2848 struct target *tg = pr->pr_target; 2849 2850 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 2851 if (errno != 0) { 2852 logperror("cannot create `interface change' event"); 2853 return (-1); 2854 } 2855 2856 errno = nvlist_add_uint32(nvl, IPMP_PROBE_ID, pr->pr_id); 2857 if (errno != 0) 2858 goto failed; 2859 2860 errno = nvlist_add_string(nvl, IPMP_IF_NAME, pii->pii_phyint->pi_name); 2861 if (errno != 0) 2862 goto failed; 2863 2864 errno = nvlist_add_uint32(nvl, IPMP_PROBE_STATE, probestate(pr)); 2865 if (errno != 0) 2866 goto failed; 2867 2868 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_START_TIME, 2869 pr->pr_hrtime_start); 2870 if (errno != 0) 2871 goto failed; 2872 2873 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_SENT_TIME, 2874 pr->pr_hrtime_sent); 2875 if (errno != 0) 2876 goto failed; 2877 2878 if (pr->pr_status == PR_ACKED) { 2879 recv_time = pr->pr_hrtime_ackrecv; 2880 proc_time = pr->pr_hrtime_ackproc; 2881 } 2882 2883 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_ACKRECV_TIME, recv_time); 2884 if (errno != 0) 2885 goto failed; 2886 2887 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_ACKPROC_TIME, proc_time); 2888 if (errno != 0) 2889 goto failed; 2890 2891 if (tg != NULL) 2892 addr2storage(pii->pii_af, &tg->tg_address, &ss); 2893 else 2894 addr2storage(pii->pii_af, &in6addr_any, &ss); 2895 2896 errno = nvlist_add_byte_array(nvl, IPMP_PROBE_TARGET, (uchar_t *)&ss, 2897 sizeof (ss)); 2898 if (errno != 0) 2899 goto failed; 2900 2901 errno = nvlist_add_int64(nvl, IPMP_PROBE_TARGET_RTTAVG, 2902 tg->tg_rtt_sa / 8); 2903 if (errno != 0) 2904 goto failed; 2905 2906 errno = nvlist_add_int64(nvl, IPMP_PROBE_TARGET_RTTDEV, 2907 tg->tg_rtt_sd / 4); 2908 if (errno != 0) 2909 goto failed; 2910 2911 return (post_event(ESC_IPMP_PROBE_STATE, nvl)); 2912 failed: 2913 logperror("cannot create `probe state' event"); 2914 nvlist_free(nvl); 2915 return (-1); 2916 } 2917 2918 /* 2919 * Generate an ESC_IPMP_GROUP_STATE sysevent for phyint group `pg'. 2920 * Returns 0 on success, -1 on failure. 2921 */ 2922 static int 2923 phyint_group_state_event(struct phyint_group *pg) 2924 { 2925 nvlist_t *nvl; 2926 2927 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 2928 if (errno != 0) { 2929 logperror("cannot create `group state change' event"); 2930 return (-1); 2931 } 2932 2933 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 2934 if (errno != 0) 2935 goto failed; 2936 2937 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 2938 if (errno != 0) 2939 goto failed; 2940 2941 errno = nvlist_add_uint32(nvl, IPMP_GROUP_STATE, groupstate(pg)); 2942 if (errno != 0) 2943 goto failed; 2944 2945 return (post_event(ESC_IPMP_GROUP_STATE, nvl)); 2946 failed: 2947 logperror("cannot create `group state change' event"); 2948 nvlist_free(nvl); 2949 return (-1); 2950 } 2951 2952 /* 2953 * Generate an ESC_IPMP_GROUP_CHANGE sysevent of type `op' for phyint group 2954 * `pg'. Returns 0 on success, -1 on failure. 2955 */ 2956 static int 2957 phyint_group_change_event(struct phyint_group *pg, ipmp_group_op_t op) 2958 { 2959 nvlist_t *nvl; 2960 2961 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 2962 if (errno != 0) { 2963 logperror("cannot create `group change' event"); 2964 return (-1); 2965 } 2966 2967 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 2968 if (errno != 0) 2969 goto failed; 2970 2971 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 2972 if (errno != 0) 2973 goto failed; 2974 2975 errno = nvlist_add_uint64(nvl, IPMP_GROUPLIST_SIGNATURE, 2976 phyint_grouplistsig); 2977 if (errno != 0) 2978 goto failed; 2979 2980 errno = nvlist_add_uint32(nvl, IPMP_GROUP_OPERATION, op); 2981 if (errno != 0) 2982 goto failed; 2983 2984 return (post_event(ESC_IPMP_GROUP_CHANGE, nvl)); 2985 failed: 2986 logperror("cannot create `group change' event"); 2987 nvlist_free(nvl); 2988 return (-1); 2989 } 2990 2991 /* 2992 * Generate an ESC_IPMP_GROUP_MEMBER_CHANGE sysevent for phyint `pi' in 2993 * group `pg'. Returns 0 on success, -1 on failure. 2994 */ 2995 static int 2996 phyint_group_member_event(struct phyint_group *pg, struct phyint *pi, 2997 ipmp_if_op_t op) 2998 { 2999 nvlist_t *nvl; 3000 3001 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 3002 if (errno != 0) { 3003 logperror("cannot create `group member change' event"); 3004 return (-1); 3005 } 3006 3007 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 3008 if (errno != 0) 3009 goto failed; 3010 3011 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 3012 if (errno != 0) 3013 goto failed; 3014 3015 errno = nvlist_add_uint32(nvl, IPMP_IF_OPERATION, op); 3016 if (errno != 0) 3017 goto failed; 3018 3019 errno = nvlist_add_string(nvl, IPMP_IF_NAME, pi->pi_name); 3020 if (errno != 0) 3021 goto failed; 3022 3023 errno = nvlist_add_uint32(nvl, IPMP_IF_TYPE, iftype(pi)); 3024 if (errno != 0) 3025 goto failed; 3026 3027 errno = nvlist_add_uint32(nvl, IPMP_IF_STATE, ifstate(pi)); 3028 if (errno != 0) 3029 goto failed; 3030 3031 return (post_event(ESC_IPMP_GROUP_MEMBER_CHANGE, nvl)); 3032 failed: 3033 logperror("cannot create `group member change' event"); 3034 nvlist_free(nvl); 3035 return (-1); 3036 3037 } 3038 3039 /* 3040 * Generate an ESC_IPMP_IF_CHANGE sysevent for phyint `pi' in group `pg'. 3041 * Returns 0 on success, -1 on failure. 3042 */ 3043 static int 3044 phyint_state_event(struct phyint_group *pg, struct phyint *pi) 3045 { 3046 nvlist_t *nvl; 3047 3048 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 3049 if (errno != 0) { 3050 logperror("cannot create `interface change' event"); 3051 return (-1); 3052 } 3053 3054 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 3055 if (errno != 0) 3056 goto failed; 3057 3058 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 3059 if (errno != 0) 3060 goto failed; 3061 3062 errno = nvlist_add_string(nvl, IPMP_IF_NAME, pi->pi_name); 3063 if (errno != 0) 3064 goto failed; 3065 3066 errno = nvlist_add_uint32(nvl, IPMP_IF_TYPE, iftype(pi)); 3067 if (errno != 0) 3068 goto failed; 3069 3070 errno = nvlist_add_uint32(nvl, IPMP_IF_STATE, ifstate(pi)); 3071 if (errno != 0) 3072 goto failed; 3073 3074 return (post_event(ESC_IPMP_IF_CHANGE, nvl)); 3075 failed: 3076 logperror("cannot create `interface change' event"); 3077 nvlist_free(nvl); 3078 return (-1); 3079 3080 } 3081 3082 /* 3083 * Generate a signature for use. The signature is conceptually divided 3084 * into two pieces: a random 16-bit "generation number" and a 48-bit 3085 * monotonically increasing integer. The generation number protects 3086 * against stale updates to entities (e.g., IPMP groups) that have been 3087 * deleted and since recreated. 3088 */ 3089 static uint64_t 3090 gensig(void) 3091 { 3092 static int seeded = 0; 3093 3094 if (seeded == 0) { 3095 srand48((long)gethrtime()); 3096 seeded++; 3097 } 3098 3099 return ((uint64_t)lrand48() << 48 | 1); 3100 } 3101 3102 /* 3103 * Store the information associated with group `grname' into a dynamically 3104 * allocated structure pointed to by `*grinfopp'. Returns an IPMP error code. 3105 */ 3106 unsigned int 3107 getgroupinfo(const char *grname, ipmp_groupinfo_t **grinfopp) 3108 { 3109 struct phyint *pi; 3110 struct phyint_group *pg; 3111 char (*ifs)[LIFNAMSIZ]; 3112 unsigned int i, j; 3113 unsigned int nif = 0, naddr = 0; 3114 lifgroupinfo_t lifgr; 3115 addrlist_t *addrp; 3116 struct sockaddr_storage *addrs; 3117 int fdt = 0; 3118 3119 pg = phyint_group_lookup(grname); 3120 if (pg == NULL) 3121 return (IPMP_EUNKGROUP); 3122 3123 /* 3124 * Tally up the number of interfaces, allocate an array to hold them, 3125 * and insert their names into the array. While we're at it, if any 3126 * interface is actually enabled to send probes, save the group fdt. 3127 */ 3128 for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) 3129 nif++; 3130 3131 ifs = alloca(nif * sizeof (*ifs)); 3132 for (i = 0, pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext, i++) { 3133 assert(i < nif); 3134 (void) strlcpy(ifs[i], pi->pi_name, LIFNAMSIZ); 3135 if (PROBE_ENABLED(pi->pi_v4) || PROBE_ENABLED(pi->pi_v6)) 3136 fdt = pg->pg_fdt; 3137 } 3138 assert(i == nif); 3139 3140 /* 3141 * If this is the anonymous group, there's no other information to 3142 * collect (since there's no IPMP interface). 3143 */ 3144 if (pg == phyint_anongroup) { 3145 *grinfopp = ipmp_groupinfo_create(pg->pg_name, pg->pg_sig, fdt, 3146 groupstate(pg), nif, ifs, "", "", "", "", 0, NULL); 3147 return (*grinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3148 } 3149 3150 /* 3151 * Grab some additional information about the group from the kernel. 3152 * (NOTE: since SIOCGLIFGROUPINFO does not look up by interface name, 3153 * we can use ifsock_v4 even for a V6-only group.) 3154 */ 3155 (void) strlcpy(lifgr.gi_grname, grname, LIFGRNAMSIZ); 3156 if (ioctl(ifsock_v4, SIOCGLIFGROUPINFO, &lifgr) == -1) { 3157 if (errno == ENOENT) 3158 return (IPMP_EUNKGROUP); 3159 3160 logperror("getgroupinfo: SIOCGLIFGROUPINFO"); 3161 return (IPMP_FAILURE); 3162 } 3163 3164 /* 3165 * Tally up the number of data addresses, allocate an array to hold 3166 * them, and insert their values into the array. 3167 */ 3168 for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next) 3169 naddr++; 3170 3171 addrs = alloca(naddr * sizeof (*addrs)); 3172 i = 0; 3173 for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next) { 3174 /* 3175 * It's possible to have duplicate addresses (if some are 3176 * down). Weed the dups out to avoid confusing consumers. 3177 * (If groups start having tons of addresses, we'll need a 3178 * better algorithm here.) 3179 */ 3180 for (j = 0; j < i; j++) { 3181 if (sockaddrcmp(&addrs[j], &addrp->al_addr)) 3182 break; 3183 } 3184 if (j == i) { 3185 assert(i < naddr); 3186 addrs[i++] = addrp->al_addr; 3187 } 3188 } 3189 naddr = i; 3190 3191 *grinfopp = ipmp_groupinfo_create(pg->pg_name, pg->pg_sig, fdt, 3192 groupstate(pg), nif, ifs, lifgr.gi_grifname, lifgr.gi_m4ifname, 3193 lifgr.gi_m6ifname, lifgr.gi_bcifname, naddr, addrs); 3194 return (*grinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3195 } 3196 3197 /* 3198 * Store the target information associated with phyint instance `pii' into a 3199 * dynamically allocated structure pointed to by `*targinfopp'. Returns an 3200 * IPMP error code. 3201 */ 3202 unsigned int 3203 gettarginfo(struct phyint_instance *pii, const char *name, 3204 ipmp_targinfo_t **targinfopp) 3205 { 3206 uint_t ntarg = 0; 3207 struct target *tg; 3208 struct sockaddr_storage ss; 3209 struct sockaddr_storage *targs = NULL; 3210 3211 if (PROBE_CAPABLE(pii)) { 3212 targs = alloca(pii->pii_ntargets * sizeof (*targs)); 3213 tg = pii->pii_target_next; 3214 do { 3215 if (tg->tg_status == TG_ACTIVE) { 3216 assert(ntarg < pii->pii_ntargets); 3217 addr2storage(pii->pii_af, &tg->tg_address, 3218 &targs[ntarg++]); 3219 } 3220 if ((tg = tg->tg_next) == NULL) 3221 tg = pii->pii_targets; 3222 } while (tg != pii->pii_target_next); 3223 3224 assert(ntarg == pii->pii_ntargets); 3225 } 3226 3227 *targinfopp = ipmp_targinfo_create(name, iftestaddr(pii, &ss), 3228 iftargmode(pii), ntarg, targs); 3229 return (*targinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3230 } 3231 3232 /* 3233 * Store the information associated with interface `ifname' into a dynamically 3234 * allocated structure pointed to by `*ifinfopp'. Returns an IPMP error code. 3235 */ 3236 unsigned int 3237 getifinfo(const char *ifname, ipmp_ifinfo_t **ifinfopp) 3238 { 3239 int retval; 3240 struct phyint *pi; 3241 ipmp_targinfo_t *targinfo4; 3242 ipmp_targinfo_t *targinfo6; 3243 3244 pi = phyint_lookup(ifname); 3245 if (pi == NULL) 3246 return (IPMP_EUNKIF); 3247 3248 if ((retval = gettarginfo(pi->pi_v4, pi->pi_name, &targinfo4)) != 0 || 3249 (retval = gettarginfo(pi->pi_v6, pi->pi_name, &targinfo6)) != 0) 3250 goto out; 3251 3252 *ifinfopp = ipmp_ifinfo_create(pi->pi_name, pi->pi_group->pg_name, 3253 ifstate(pi), iftype(pi), iflinkstate(pi), ifprobestate(pi), 3254 ifflags(pi), targinfo4, targinfo6); 3255 retval = (*ifinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3256 out: 3257 if (targinfo4 != NULL) 3258 ipmp_freetarginfo(targinfo4); 3259 if (targinfo6 != NULL) 3260 ipmp_freetarginfo(targinfo6); 3261 return (retval); 3262 } 3263 3264 /* 3265 * Store the current list of IPMP groups into a dynamically allocated 3266 * structure pointed to by `*grlistpp'. Returns an IPMP error code. 3267 */ 3268 unsigned int 3269 getgrouplist(ipmp_grouplist_t **grlistpp) 3270 { 3271 struct phyint_group *pg; 3272 char (*groups)[LIFGRNAMSIZ]; 3273 unsigned int i, ngroup; 3274 3275 /* 3276 * Tally up the number of groups, allocate an array to hold them, and 3277 * insert their names into the array. 3278 */ 3279 for (ngroup = 0, pg = phyint_groups; pg != NULL; pg = pg->pg_next) 3280 ngroup++; 3281 3282 groups = alloca(ngroup * sizeof (*groups)); 3283 for (i = 0, pg = phyint_groups; pg != NULL; pg = pg->pg_next, i++) { 3284 assert(i < ngroup); 3285 (void) strlcpy(groups[i], pg->pg_name, LIFGRNAMSIZ); 3286 } 3287 assert(i == ngroup); 3288 3289 *grlistpp = ipmp_grouplist_create(phyint_grouplistsig, ngroup, groups); 3290 return (*grlistpp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3291 } 3292 3293 /* 3294 * Store the address information for `ssp' (in group `grname') into a 3295 * dynamically allocated structure pointed to by `*adinfopp'. Returns an IPMP 3296 * error code. (We'd call this function getaddrinfo(), but it would conflict 3297 * with getaddrinfo(3SOCKET)). 3298 */ 3299 unsigned int 3300 getgraddrinfo(const char *grname, struct sockaddr_storage *ssp, 3301 ipmp_addrinfo_t **adinfopp) 3302 { 3303 int ifsock; 3304 addrlist_t *addrp, *addrmatchp = NULL; 3305 ipmp_addr_state_t state; 3306 const char *binding = ""; 3307 struct lifreq lifr; 3308 struct phyint_group *pg; 3309 3310 if ((pg = phyint_group_lookup(grname)) == NULL) 3311 return (IPMP_EUNKADDR); 3312 3313 /* 3314 * Walk through the data addresses, and find a match. Note that since 3315 * some of the addresses may be down, more than one may match. We 3316 * prefer an up address (if one exists). 3317 */ 3318 for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next) { 3319 if (sockaddrcmp(ssp, &addrp->al_addr)) { 3320 addrmatchp = addrp; 3321 if (addrmatchp->al_flags & IFF_UP) 3322 break; 3323 } 3324 } 3325 3326 if (addrmatchp == NULL) 3327 return (IPMP_EUNKADDR); 3328 3329 state = (addrmatchp->al_flags & IFF_UP) ? IPMP_ADDR_UP : IPMP_ADDR_DOWN; 3330 if (state == IPMP_ADDR_UP) { 3331 ifsock = (ssp->ss_family == AF_INET) ? ifsock_v4 : ifsock_v6; 3332 (void) strlcpy(lifr.lifr_name, addrmatchp->al_name, LIFNAMSIZ); 3333 if (ioctl(ifsock, SIOCGLIFBINDING, &lifr) >= 0) 3334 binding = lifr.lifr_binding; 3335 } 3336 3337 *adinfopp = ipmp_addrinfo_create(ssp, state, pg->pg_name, binding); 3338 return (*adinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3339 } 3340 3341 /* 3342 * Store a snapshot of the IPMP subsystem into a dynamically allocated 3343 * structure pointed to by `*snapp'. Returns an IPMP error code. 3344 */ 3345 unsigned int 3346 getsnap(ipmp_snap_t **snapp) 3347 { 3348 ipmp_grouplist_t *grlistp; 3349 ipmp_groupinfo_t *grinfop; 3350 ipmp_addrinfo_t *adinfop; 3351 ipmp_addrlist_t *adlistp; 3352 ipmp_ifinfo_t *ifinfop; 3353 ipmp_snap_t *snap; 3354 struct phyint *pi; 3355 unsigned int i, j; 3356 int retval; 3357 3358 snap = ipmp_snap_create(); 3359 if (snap == NULL) 3360 return (IPMP_ENOMEM); 3361 3362 /* 3363 * Add group list. 3364 */ 3365 retval = getgrouplist(&snap->sn_grlistp); 3366 if (retval != IPMP_SUCCESS) 3367 goto failed; 3368 3369 /* 3370 * Add information for each group in the list, along with all of its 3371 * data addresses. 3372 */ 3373 grlistp = snap->sn_grlistp; 3374 for (i = 0; i < grlistp->gl_ngroup; i++) { 3375 retval = getgroupinfo(grlistp->gl_groups[i], &grinfop); 3376 if (retval != IPMP_SUCCESS) 3377 goto failed; 3378 3379 retval = ipmp_snap_addgroupinfo(snap, grinfop); 3380 if (retval != IPMP_SUCCESS) { 3381 ipmp_freegroupinfo(grinfop); 3382 goto failed; 3383 } 3384 3385 adlistp = grinfop->gr_adlistp; 3386 for (j = 0; j < adlistp->al_naddr; j++) { 3387 retval = getgraddrinfo(grinfop->gr_name, 3388 &adlistp->al_addrs[j], &adinfop); 3389 if (retval != IPMP_SUCCESS) 3390 goto failed; 3391 3392 retval = ipmp_snap_addaddrinfo(snap, adinfop); 3393 if (retval != IPMP_SUCCESS) { 3394 ipmp_freeaddrinfo(adinfop); 3395 goto failed; 3396 } 3397 } 3398 } 3399 3400 /* 3401 * Add information for each configured phyint. 3402 */ 3403 for (pi = phyints; pi != NULL; pi = pi->pi_next) { 3404 retval = getifinfo(pi->pi_name, &ifinfop); 3405 if (retval != IPMP_SUCCESS) 3406 goto failed; 3407 3408 retval = ipmp_snap_addifinfo(snap, ifinfop); 3409 if (retval != IPMP_SUCCESS) { 3410 ipmp_freeifinfo(ifinfop); 3411 goto failed; 3412 } 3413 } 3414 3415 *snapp = snap; 3416 return (IPMP_SUCCESS); 3417 failed: 3418 ipmp_snap_free(snap); 3419 return (retval); 3420 } 3421