1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include "mpd_defs.h" 27 #include "mpd_tables.h" 28 29 /* 30 * Global list of phyints, phyint instances, phyint groups and the anonymous 31 * group; the latter is initialized in phyint_init(). 32 */ 33 struct phyint *phyints = NULL; 34 struct phyint_instance *phyint_instances = NULL; 35 struct phyint_group *phyint_groups = NULL; 36 struct phyint_group *phyint_anongroup; 37 38 /* 39 * Grouplist signature; initialized in phyint_init(). 40 */ 41 static uint64_t phyint_grouplistsig; 42 43 static void phyint_inst_insert(struct phyint_instance *pii); 44 static void phyint_inst_print(struct phyint_instance *pii); 45 46 static void phyint_insert(struct phyint *pi, struct phyint_group *pg); 47 static void phyint_delete(struct phyint *pi); 48 static boolean_t phyint_is_usable(struct phyint *pi); 49 50 static void logint_print(struct logint *li); 51 static void logint_insert(struct phyint_instance *pii, struct logint *li); 52 static struct logint *logint_lookup(struct phyint_instance *pii, char *li_name); 53 54 static void target_print(struct target *tg); 55 static void target_insert(struct phyint_instance *pii, struct target *tg); 56 static struct target *target_first(struct phyint_instance *pii); 57 static struct target *target_select_best(struct phyint_instance *pii); 58 static void target_flush_hosts(struct phyint_group *pg); 59 60 static void reset_pii_probes(struct phyint_instance *pii, struct target *tg); 61 62 static boolean_t phyint_inst_v6_sockinit(struct phyint_instance *pii); 63 static boolean_t phyint_inst_v4_sockinit(struct phyint_instance *pii); 64 65 static int phyint_state_event(struct phyint_group *pg, struct phyint *pi); 66 static int phyint_group_state_event(struct phyint_group *pg); 67 static int phyint_group_change_event(struct phyint_group *pg, ipmp_group_op_t); 68 static int phyint_group_member_event(struct phyint_group *pg, struct phyint *pi, 69 ipmp_if_op_t op); 70 71 static int logint_upcount(struct phyint *pi); 72 static uint64_t gensig(void); 73 74 /* Initialize any per-file global state. Returns 0 on success, -1 on failure */ 75 int 76 phyint_init(void) 77 { 78 phyint_grouplistsig = gensig(); 79 if (track_all_phyints) { 80 phyint_anongroup = phyint_group_create(""); 81 if (phyint_anongroup == NULL) 82 return (-1); 83 phyint_group_insert(phyint_anongroup); 84 } 85 return (0); 86 } 87 88 /* Return the phyint with the given name */ 89 struct phyint * 90 phyint_lookup(const char *name) 91 { 92 struct phyint *pi; 93 94 if (debug & D_PHYINT) 95 logdebug("phyint_lookup(%s)\n", name); 96 97 for (pi = phyints; pi != NULL; pi = pi->pi_next) { 98 if (strncmp(pi->pi_name, name, sizeof (pi->pi_name)) == 0) 99 break; 100 } 101 return (pi); 102 } 103 104 /* 105 * Lookup a phyint in the group that has the same hardware address as `pi', or 106 * NULL if there's none. If `online_only' is set, then only online phyints 107 * are considered when matching. Otherwise, phyints that had been offlined 108 * due to a duplicate hardware address will also be considered. 109 */ 110 static struct phyint * 111 phyint_lookup_hwaddr(struct phyint *pi, boolean_t online_only) 112 { 113 struct phyint *pi2; 114 115 if (pi->pi_group == phyint_anongroup) 116 return (NULL); 117 118 for (pi2 = pi->pi_group->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) { 119 if (pi2 == pi) 120 continue; 121 122 /* 123 * NOTE: even when online_only is B_FALSE, we ignore phyints 124 * that are administratively offline (rather than offline 125 * because they're dups); when they're brought back online, 126 * they'll be flagged as dups if need be. 127 */ 128 if (pi2->pi_state == PI_OFFLINE && 129 (online_only || !pi2->pi_hwaddrdup)) 130 continue; 131 132 if (pi2->pi_hwaddrlen == pi->pi_hwaddrlen && 133 bcmp(pi2->pi_hwaddr, pi->pi_hwaddr, pi->pi_hwaddrlen) == 0) 134 return (pi2); 135 } 136 return (NULL); 137 } 138 139 /* 140 * Respond to DLPI notifications. Currently, this only processes physical 141 * address changes for the phyint passed via `arg' by onlining or offlining 142 * phyints in the group. 143 */ 144 /* ARGSUSED */ 145 static void 146 phyint_link_notify(dlpi_handle_t dh, dlpi_notifyinfo_t *dnip, void *arg) 147 { 148 struct phyint *pi = arg; 149 struct phyint *oduppi = NULL, *duppi = NULL; 150 151 assert((dnip->dni_note & pi->pi_notes) != 0); 152 153 if (dnip->dni_note != DL_NOTE_PHYS_ADDR) 154 return; 155 156 assert(dnip->dni_physaddrlen <= DLPI_PHYSADDR_MAX); 157 158 /* 159 * If our hardware address hasn't changed, there's nothing to do. 160 */ 161 if (pi->pi_hwaddrlen == dnip->dni_physaddrlen && 162 bcmp(pi->pi_hwaddr, dnip->dni_physaddr, pi->pi_hwaddrlen) == 0) 163 return; 164 165 oduppi = phyint_lookup_hwaddr(pi, _B_FALSE); 166 pi->pi_hwaddrlen = dnip->dni_physaddrlen; 167 (void) memcpy(pi->pi_hwaddr, dnip->dni_physaddr, pi->pi_hwaddrlen); 168 duppi = phyint_lookup_hwaddr(pi, _B_FALSE); 169 170 if (oduppi != NULL || pi->pi_hwaddrdup) { 171 /* 172 * Our old hardware address was a duplicate. If we'd been 173 * offlined because of it, and our new hardware address is not 174 * a duplicate, then bring us online. Otherwise, `oduppi' 175 * must've been the one brought offline; bring it online. 176 */ 177 if (pi->pi_hwaddrdup) { 178 if (duppi == NULL) 179 (void) phyint_undo_offline(pi); 180 } else { 181 assert(oduppi->pi_hwaddrdup); 182 (void) phyint_undo_offline(oduppi); 183 } 184 } 185 186 if (duppi != NULL && !pi->pi_hwaddrdup) { 187 /* 188 * Our new hardware address was a duplicate and we're not 189 * yet flagged as a duplicate; bring us offline. 190 */ 191 pi->pi_hwaddrdup = _B_TRUE; 192 (void) phyint_offline(pi, 0); 193 } 194 } 195 196 /* 197 * Initialize information about the underlying link for `pi', and set us 198 * up to be notified about future changes. Returns _B_TRUE on success. 199 */ 200 boolean_t 201 phyint_link_init(struct phyint *pi) 202 { 203 int retval; 204 uint_t notes; 205 const char *errmsg; 206 dlpi_notifyid_t id; 207 208 pi->pi_notes = 0; 209 retval = dlpi_open(pi->pi_name, &pi->pi_dh, 0); 210 if (retval != DLPI_SUCCESS) { 211 pi->pi_dh = NULL; 212 errmsg = "cannot open"; 213 goto failed; 214 } 215 216 pi->pi_hwaddrlen = DLPI_PHYSADDR_MAX; 217 retval = dlpi_get_physaddr(pi->pi_dh, DL_CURR_PHYS_ADDR, pi->pi_hwaddr, 218 &pi->pi_hwaddrlen); 219 if (retval != DLPI_SUCCESS) { 220 errmsg = "cannot get hardware address"; 221 goto failed; 222 } 223 224 /* 225 * Check if the link supports DLPI link state notifications. For 226 * historical reasons, the actual changes are tracked through routing 227 * sockets, so we immediately disable the notification upon success. 228 */ 229 notes = DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN; 230 retval = dlpi_enabnotify(pi->pi_dh, notes, phyint_link_notify, pi, &id); 231 if (retval == DLPI_SUCCESS) { 232 (void) dlpi_disabnotify(pi->pi_dh, id, NULL); 233 pi->pi_notes |= notes; 234 } 235 236 /* 237 * Enable notification of hardware address changes to keep pi_hwaddr 238 * up-to-date and track if we need to offline/undo-offline phyints. 239 */ 240 notes = DL_NOTE_PHYS_ADDR; 241 retval = dlpi_enabnotify(pi->pi_dh, notes, phyint_link_notify, pi, &id); 242 if (retval == DLPI_SUCCESS && poll_add(dlpi_fd(pi->pi_dh)) == 0) 243 pi->pi_notes |= notes; 244 245 return (_B_TRUE); 246 failed: 247 logerr("%s: %s: %s\n", pi->pi_name, errmsg, dlpi_strerror(retval)); 248 if (pi->pi_dh != NULL) { 249 dlpi_close(pi->pi_dh); 250 pi->pi_dh = NULL; 251 } 252 return (_B_FALSE); 253 } 254 255 /* 256 * Close use of link on `pi'. 257 */ 258 void 259 phyint_link_close(struct phyint *pi) 260 { 261 if (pi->pi_notes & DL_NOTE_PHYS_ADDR) { 262 (void) poll_remove(dlpi_fd(pi->pi_dh)); 263 pi->pi_notes &= ~DL_NOTE_PHYS_ADDR; 264 } 265 266 /* 267 * NOTE: we don't clear pi_notes here so that iflinkstate() can still 268 * properly report the link state even when offline (which is possible 269 * since we use IFF_RUNNING to track link state). 270 */ 271 dlpi_close(pi->pi_dh); 272 pi->pi_dh = NULL; 273 } 274 275 /* Return the phyint instance with the given name and the given family */ 276 struct phyint_instance * 277 phyint_inst_lookup(int af, char *name) 278 { 279 struct phyint *pi; 280 281 if (debug & D_PHYINT) 282 logdebug("phyint_inst_lookup(%s %s)\n", AF_STR(af), name); 283 284 assert(af == AF_INET || af == AF_INET6); 285 286 pi = phyint_lookup(name); 287 if (pi == NULL) 288 return (NULL); 289 290 return (PHYINT_INSTANCE(pi, af)); 291 } 292 293 struct phyint_group * 294 phyint_group_lookup(const char *pg_name) 295 { 296 struct phyint_group *pg; 297 298 if (debug & D_PHYINT) 299 logdebug("phyint_group_lookup(%s)\n", pg_name); 300 301 for (pg = phyint_groups; pg != NULL; pg = pg->pg_next) { 302 if (strncmp(pg->pg_name, pg_name, sizeof (pg->pg_name)) == 0) 303 break; 304 } 305 return (pg); 306 } 307 308 /* 309 * Insert the phyint in the linked list of all phyints. If the phyint belongs 310 * to some group, insert it in the phyint group list. 311 */ 312 static void 313 phyint_insert(struct phyint *pi, struct phyint_group *pg) 314 { 315 if (debug & D_PHYINT) 316 logdebug("phyint_insert(%s '%s')\n", pi->pi_name, pg->pg_name); 317 318 /* Insert the phyint at the head of the 'all phyints' list */ 319 pi->pi_next = phyints; 320 pi->pi_prev = NULL; 321 if (phyints != NULL) 322 phyints->pi_prev = pi; 323 phyints = pi; 324 325 /* 326 * Insert the phyint at the head of the 'phyint_group members' list 327 * of the phyint group to which it belongs. 328 */ 329 pi->pi_pgnext = NULL; 330 pi->pi_pgprev = NULL; 331 pi->pi_group = pg; 332 333 pi->pi_pgnext = pg->pg_phyint; 334 if (pi->pi_pgnext != NULL) 335 pi->pi_pgnext->pi_pgprev = pi; 336 pg->pg_phyint = pi; 337 338 /* Refresh the group state now that this phyint has been added */ 339 phyint_group_refresh_state(pg); 340 341 pg->pg_sig++; 342 (void) phyint_group_member_event(pg, pi, IPMP_IF_ADD); 343 } 344 345 /* Insert the phyint instance in the linked list of all phyint instances. */ 346 static void 347 phyint_inst_insert(struct phyint_instance *pii) 348 { 349 if (debug & D_PHYINT) { 350 logdebug("phyint_inst_insert(%s %s)\n", 351 AF_STR(pii->pii_af), pii->pii_name); 352 } 353 354 /* 355 * Insert the phyint at the head of the 'all phyint instances' list. 356 */ 357 pii->pii_next = phyint_instances; 358 pii->pii_prev = NULL; 359 if (phyint_instances != NULL) 360 phyint_instances->pii_prev = pii; 361 phyint_instances = pii; 362 } 363 364 /* 365 * Create a new phyint with the given parameters. Also insert it into 366 * the list of all phyints and the list of phyint group members by calling 367 * phyint_insert(). 368 */ 369 static struct phyint * 370 phyint_create(char *pi_name, struct phyint_group *pg, uint_t ifindex, 371 uint64_t flags) 372 { 373 struct phyint *pi; 374 375 pi = calloc(1, sizeof (struct phyint)); 376 if (pi == NULL) { 377 logperror("phyint_create: calloc"); 378 return (NULL); 379 } 380 381 /* 382 * Record the phyint values. 383 */ 384 (void) strlcpy(pi->pi_name, pi_name, sizeof (pi->pi_name)); 385 pi->pi_taddrthresh = getcurrentsec() + TESTADDR_CONF_TIME; 386 pi->pi_ifindex = ifindex; 387 pi->pi_icmpid = htons(((getpid() & 0xFF) << 8) | (ifindex & 0xFF)); 388 389 pi->pi_state = PI_INIT; 390 pi->pi_flags = PHYINT_FLAGS(flags); 391 392 /* 393 * Initialize the link state. The link state is initialized to 394 * up, so that if the link is down when IPMP starts monitoring 395 * the interface, it will appear as though there has been a 396 * transition from the link up to link down. This avoids 397 * having to treat this situation as a special case. 398 */ 399 INIT_LINK_STATE(pi); 400 401 if (!phyint_link_init(pi)) { 402 free(pi); 403 return (NULL); 404 } 405 406 /* 407 * Insert the phyint in the list of all phyints, and the 408 * list of phyint group members 409 */ 410 phyint_insert(pi, pg); 411 412 /* 413 * If the interface is offline, we set the state to PI_OFFLINE. 414 * Otherwise, optimistically consider this interface running. Later 415 * (in process_link_state_changes()), we will adjust this to match the 416 * current state of the link. Further, if test addresses are 417 * subsequently assigned, we will transition to PI_NOTARGETS and then 418 * to either PI_RUNNING or PI_FAILED depending on the probe results. 419 */ 420 if (flags & IFF_OFFLINE) 421 phyint_chstate(pi, PI_OFFLINE); 422 else 423 phyint_transition_to_running(pi); /* calls phyint_chstate() */ 424 425 return (pi); 426 } 427 428 /* 429 * Create a new phyint instance belonging to the phyint 'pi' and address 430 * family 'af'. Also insert it into the list of all phyint instances by 431 * calling phyint_inst_insert(). 432 */ 433 static struct phyint_instance * 434 phyint_inst_create(struct phyint *pi, int af) 435 { 436 struct phyint_instance *pii; 437 438 pii = calloc(1, sizeof (struct phyint_instance)); 439 if (pii == NULL) { 440 logperror("phyint_inst_create: calloc"); 441 return (NULL); 442 } 443 444 /* 445 * Attach the phyint instance to the phyint. 446 * Set the back pointers as well 447 */ 448 pii->pii_phyint = pi; 449 if (af == AF_INET) 450 pi->pi_v4 = pii; 451 else 452 pi->pi_v6 = pii; 453 454 pii->pii_in_use = 1; 455 pii->pii_probe_sock = -1; 456 pii->pii_snxt = 1; 457 pii->pii_af = af; 458 pii->pii_fd_hrtime = gethrtime() + 459 (FAILURE_DETECTION_QP * (hrtime_t)NANOSEC); 460 pii->pii_flags = pi->pi_flags; 461 462 /* Insert the phyint instance in the list of all phyint instances. */ 463 phyint_inst_insert(pii); 464 return (pii); 465 } 466 467 /* 468 * Change the state of phyint `pi' to state `state'. 469 */ 470 void 471 phyint_chstate(struct phyint *pi, enum pi_state state) 472 { 473 /* 474 * To simplify things, some callers always set a given state 475 * regardless of the previous state of the phyint (e.g., setting 476 * PI_RUNNING when it's already set). We shouldn't bother 477 * generating an event or consuming a signature for these, since 478 * the actual state of the interface is unchanged. 479 */ 480 if (pi->pi_state == state) 481 return; 482 483 pi->pi_state = state; 484 phyint_changed(pi); 485 } 486 487 /* 488 * Note that `pi' has changed state. 489 */ 490 void 491 phyint_changed(struct phyint *pi) 492 { 493 pi->pi_group->pg_sig++; 494 (void) phyint_state_event(pi->pi_group, pi); 495 } 496 497 /* 498 * Insert the phyint group in the linked list of all phyint groups 499 * at the head of the list 500 */ 501 void 502 phyint_group_insert(struct phyint_group *pg) 503 { 504 pg->pg_next = phyint_groups; 505 pg->pg_prev = NULL; 506 if (phyint_groups != NULL) 507 phyint_groups->pg_prev = pg; 508 phyint_groups = pg; 509 510 phyint_grouplistsig++; 511 (void) phyint_group_change_event(pg, IPMP_GROUP_ADD); 512 } 513 514 /* 515 * Create a new phyint group called 'name'. 516 */ 517 struct phyint_group * 518 phyint_group_create(const char *name) 519 { 520 struct phyint_group *pg; 521 522 if (debug & D_PHYINT) 523 logdebug("phyint_group_create(%s)\n", name); 524 525 pg = calloc(1, sizeof (struct phyint_group)); 526 if (pg == NULL) { 527 logperror("phyint_group_create: calloc"); 528 return (NULL); 529 } 530 531 (void) strlcpy(pg->pg_name, name, sizeof (pg->pg_name)); 532 pg->pg_sig = gensig(); 533 pg->pg_fdt = user_failure_detection_time; 534 pg->pg_probeint = user_probe_interval; 535 pg->pg_in_use = _B_TRUE; 536 537 /* 538 * Normal groups always start in the PG_FAILED state since they 539 * have no active interfaces. In contrast, anonymous groups are 540 * heterogeneous and thus always PG_OK. 541 */ 542 pg->pg_state = (name[0] == '\0' ? PG_OK : PG_FAILED); 543 544 return (pg); 545 } 546 547 /* 548 * Change the state of the phyint group `pg' to state `state'. 549 */ 550 void 551 phyint_group_chstate(struct phyint_group *pg, enum pg_state state) 552 { 553 assert(pg != phyint_anongroup); 554 555 /* 556 * To simplify things, some callers always set a given state 557 * regardless of the previous state of the group (e.g., setting 558 * PG_DEGRADED when it's already set). We shouldn't bother 559 * generating an event or consuming a signature for these, since 560 * the actual state of the group is unchanged. 561 */ 562 if (pg->pg_state == state) 563 return; 564 565 pg->pg_state = state; 566 567 switch (state) { 568 case PG_FAILED: 569 /* 570 * We can never know with certainty that a group has 571 * failed. It is possible that all known targets have 572 * failed simultaneously, and new targets have come up 573 * instead. If the targets are routers then router 574 * discovery will kick in, and we will see the new routers 575 * thru routing socket messages. But if the targets are 576 * hosts, we have to discover it by multicast. So flush 577 * all the host targets. The next probe will send out a 578 * multicast echo request. If this is a group failure, we 579 * will still not see any response, otherwise the group 580 * will be repaired after we get NUM_PROBE_REPAIRS 581 * consecutive unicast replies on any phyint. 582 */ 583 target_flush_hosts(pg); 584 break; 585 586 case PG_OK: 587 case PG_DEGRADED: 588 break; 589 590 default: 591 logerr("phyint_group_chstate: invalid group state %d; " 592 "aborting\n", state); 593 abort(); 594 } 595 596 pg->pg_sig++; 597 (void) phyint_group_state_event(pg); 598 } 599 600 /* 601 * Create a new phyint instance and initialize it from the values supplied by 602 * the kernel. Always check for ENXIO before logging any error, because the 603 * interface could have vanished after completion of SIOCGLIFCONF. 604 * Return values: 605 * pointer to the phyint instance on success 606 * NULL on failure Eg. if the phyint instance is not found in the kernel 607 */ 608 struct phyint_instance * 609 phyint_inst_init_from_k(int af, char *pi_name) 610 { 611 char pg_name[LIFNAMSIZ + 1]; 612 int ifsock; 613 uint_t ifindex; 614 uint64_t flags; 615 struct lifreq lifr; 616 struct phyint *pi; 617 struct phyint_instance *pii; 618 boolean_t pi_created; 619 struct phyint_group *pg; 620 621 retry: 622 pii = NULL; 623 pi = NULL; 624 pg = NULL; 625 pi_created = _B_FALSE; 626 627 if (debug & D_PHYINT) { 628 logdebug("phyint_inst_init_from_k(%s %s)\n", 629 AF_STR(af), pi_name); 630 } 631 632 assert(af == AF_INET || af == AF_INET6); 633 634 /* Get the socket for doing ioctls */ 635 ifsock = (af == AF_INET) ? ifsock_v4 : ifsock_v6; 636 637 /* 638 * Get the interface flags. Ignore virtual interfaces, IPMP 639 * meta-interfaces, point-to-point interfaces, and interfaces 640 * that can't support multicast. 641 */ 642 (void) strlcpy(lifr.lifr_name, pi_name, sizeof (lifr.lifr_name)); 643 if (ioctl(ifsock, SIOCGLIFFLAGS, (char *)&lifr) < 0) { 644 if (errno != ENXIO) { 645 logperror("phyint_inst_init_from_k:" 646 " ioctl (get flags)"); 647 } 648 return (NULL); 649 } 650 flags = lifr.lifr_flags; 651 if (!(flags & IFF_MULTICAST) || 652 (flags & (IFF_VIRTUAL|IFF_IPMP|IFF_POINTOPOINT))) 653 return (NULL); 654 655 /* 656 * Get the ifindex for recording later in our tables, in case we need 657 * to create a new phyint. 658 */ 659 if (ioctl(ifsock, SIOCGLIFINDEX, (char *)&lifr) < 0) { 660 if (errno != ENXIO) { 661 logperror("phyint_inst_init_from_k: " 662 " ioctl (get lifindex)"); 663 } 664 return (NULL); 665 } 666 ifindex = lifr.lifr_index; 667 668 /* 669 * Get the phyint group name of this phyint, from the kernel. 670 */ 671 if (ioctl(ifsock, SIOCGLIFGROUPNAME, (char *)&lifr) < 0) { 672 if (errno != ENXIO) { 673 logperror("phyint_inst_init_from_k: " 674 "ioctl (get group name)"); 675 } 676 return (NULL); 677 } 678 (void) strlcpy(pg_name, lifr.lifr_groupname, sizeof (pg_name)); 679 680 /* 681 * If the phyint is not part of any group, pg_name is the 682 * null string. If 'track_all_phyints' is false, there is no 683 * need to create a phyint. 684 */ 685 if (pg_name[0] == '\0' && !track_all_phyints) { 686 /* 687 * If the IFF_FAILED, IFF_INACTIVE, or IFF_OFFLINE flags are 688 * set, reset them. These flags shouldn't be set if in.mpathd 689 * isn't tracking the interface. 690 */ 691 if ((flags & (IFF_FAILED | IFF_INACTIVE | IFF_OFFLINE))) { 692 lifr.lifr_flags = flags & 693 ~(IFF_FAILED | IFF_INACTIVE | IFF_OFFLINE); 694 if (ioctl(ifsock, SIOCSLIFFLAGS, (char *)&lifr) < 0) { 695 if (errno != ENXIO) { 696 logperror("phyint_inst_init_from_k:" 697 " ioctl (set flags)"); 698 } 699 } 700 } 701 return (NULL); 702 } 703 704 /* 705 * We need to create a new phyint instance. We may also need to 706 * create the group if e.g. the SIOCGLIFCONF loop in initifs() found 707 * an underlying interface before it found its IPMP meta-interface. 708 * Note that we keep any created groups even if phyint_inst_from_k() 709 * fails since a group's existence is not dependent on the ability of 710 * in.mpathd to the track the group's interfaces. 711 */ 712 if ((pg = phyint_group_lookup(pg_name)) == NULL) { 713 if ((pg = phyint_group_create(pg_name)) == NULL) { 714 logerr("phyint_inst_init_from_k: cannot create group " 715 "%s\n", pg_name); 716 return (NULL); 717 } 718 phyint_group_insert(pg); 719 } 720 721 /* 722 * Lookup the phyint. If the phyint does not exist create it. 723 */ 724 pi = phyint_lookup(pi_name); 725 if (pi == NULL) { 726 pi = phyint_create(pi_name, pg, ifindex, flags); 727 if (pi == NULL) { 728 logerr("phyint_inst_init_from_k:" 729 " unable to create phyint %s\n", pi_name); 730 return (NULL); 731 } 732 pi_created = _B_TRUE; 733 } else { 734 /* The phyint exists already. */ 735 assert(pi_created == _B_FALSE); 736 /* 737 * Normally we should see consistent values for the IPv4 and 738 * IPv6 instances, for phyint properties. If we don't, it 739 * means things have changed underneath us, and we should 740 * resync our tables with the kernel. Check whether the 741 * interface index has changed. If so, it is most likely 742 * the interface has been unplumbed and replumbed, 743 * while we are yet to update our tables. Do it now. 744 */ 745 if (pi->pi_ifindex != ifindex) { 746 phyint_inst_delete(PHYINT_INSTANCE(pi, AF_OTHER(af))); 747 goto retry; 748 } 749 assert(PHYINT_INSTANCE(pi, af) == NULL); 750 751 /* 752 * If the group name seen by the IPv4 and IPv6 instances 753 * are different, it is most likely the groupname has 754 * changed, while we are yet to update our tables. Do it now. 755 */ 756 if (strcmp(pi->pi_group->pg_name, pg_name) != 0) { 757 phyint_inst_delete(PHYINT_INSTANCE(pi, 758 AF_OTHER(af))); 759 goto retry; 760 } 761 } 762 763 /* 764 * Create a new phyint instance, corresponding to the 'af' 765 * passed in. 766 */ 767 pii = phyint_inst_create(pi, af); 768 if (pii == NULL) { 769 logerr("phyint_inst_init_from_k: unable to create" 770 "phyint inst %s\n", pi->pi_name); 771 if (pi_created) 772 phyint_delete(pi); 773 774 return (NULL); 775 } 776 777 if (pi_created) { 778 /* 779 * If this phyint does not have a unique hardware address in its 780 * group, offline it. (The change_pif_flags() implementation 781 * requires that we defer this until after the phyint_instance 782 * is created.) 783 */ 784 if (phyint_lookup_hwaddr(pi, _B_TRUE) != NULL) { 785 pi->pi_hwaddrdup = _B_TRUE; 786 (void) phyint_offline(pi, 0); 787 } 788 } 789 790 return (pii); 791 } 792 793 /* 794 * Bind pii_probe_sock to the address associated with pii_probe_logint. 795 * This socket will be used for sending and receiving ICMP/ICMPv6 probes to 796 * targets. Do the common part in this function, and complete the 797 * initializations by calling the protocol specific functions 798 * phyint_inst_v{4,6}_sockinit() respectively. 799 * 800 * Return values: _B_TRUE/_B_FALSE for success or failure respectively. 801 */ 802 boolean_t 803 phyint_inst_sockinit(struct phyint_instance *pii) 804 { 805 boolean_t success; 806 struct phyint_group *pg; 807 808 if (debug & D_PHYINT) { 809 logdebug("phyint_inst_sockinit(%s %s)\n", 810 AF_STR(pii->pii_af), pii->pii_name); 811 } 812 813 assert(pii->pii_probe_logint != NULL); 814 assert(pii->pii_probe_logint->li_flags & IFF_UP); 815 assert(pii->pii_probe_logint->li_flags & IFF_NOFAILOVER); 816 assert(pii->pii_af == AF_INET || pii->pii_af == AF_INET6); 817 818 /* 819 * If the socket is already bound, close pii_probe_sock 820 */ 821 if (pii->pii_probe_sock != -1) 822 close_probe_socket(pii, _B_TRUE); 823 824 /* 825 * If the phyint is not part of a named group and track_all_phyints is 826 * false, simply return. 827 */ 828 pg = pii->pii_phyint->pi_group; 829 if (pg == phyint_anongroup && !track_all_phyints) { 830 if (debug & D_PHYINT) 831 logdebug("phyint_inst_sockinit: no group\n"); 832 return (_B_FALSE); 833 } 834 835 /* 836 * Initialize the socket by calling the protocol specific function. 837 * If it succeeds, add the socket to the poll list. 838 */ 839 if (pii->pii_af == AF_INET6) 840 success = phyint_inst_v6_sockinit(pii); 841 else 842 success = phyint_inst_v4_sockinit(pii); 843 844 if (success && (poll_add(pii->pii_probe_sock) == 0)) 845 return (_B_TRUE); 846 847 /* Something failed, cleanup and return false */ 848 if (pii->pii_probe_sock != -1) 849 close_probe_socket(pii, _B_FALSE); 850 851 return (_B_FALSE); 852 } 853 854 /* 855 * IPv6 specific part in initializing the pii_probe_sock. This socket is 856 * used to send/receive ICMPv6 probe packets. 857 */ 858 static boolean_t 859 phyint_inst_v6_sockinit(struct phyint_instance *pii) 860 { 861 icmp6_filter_t filter; 862 int hopcount = 1; 863 int off = 0; 864 int on = 1; 865 struct sockaddr_in6 testaddr; 866 int flags; 867 868 /* 869 * Open a raw socket with ICMPv6 protocol. 870 * 871 * Use IPV6_BOUND_IF to make sure that probes are sent and received on 872 * the specified phyint only. Bind to the test address to ensure that 873 * the responses are sent to the specified phyint. 874 * 875 * Set the hopcount to 1 so that probe packets are not routed. 876 * Disable multicast loopback. Set the receive filter to 877 * receive only ICMPv6 echo replies. 878 */ 879 pii->pii_probe_sock = socket(pii->pii_af, SOCK_RAW, IPPROTO_ICMPV6); 880 if (pii->pii_probe_sock < 0) { 881 logperror_pii(pii, "phyint_inst_v6_sockinit: socket"); 882 return (_B_FALSE); 883 } 884 885 /* 886 * Probes must not block in case of lower layer issues. 887 */ 888 if ((flags = fcntl(pii->pii_probe_sock, F_GETFL, 0)) == -1) { 889 logperror_pii(pii, "phyint_inst_v6_sockinit: fcntl" 890 " F_GETFL"); 891 return (_B_FALSE); 892 } 893 if (fcntl(pii->pii_probe_sock, F_SETFL, 894 flags | O_NONBLOCK) == -1) { 895 logperror_pii(pii, "phyint_inst_v6_sockinit: fcntl" 896 " F_SETFL O_NONBLOCK"); 897 return (_B_FALSE); 898 } 899 900 bzero(&testaddr, sizeof (testaddr)); 901 testaddr.sin6_family = AF_INET6; 902 testaddr.sin6_port = 0; 903 testaddr.sin6_addr = pii->pii_probe_logint->li_addr; 904 905 if (bind(pii->pii_probe_sock, (struct sockaddr *)&testaddr, 906 sizeof (testaddr)) < 0) { 907 logperror_pii(pii, "phyint_inst_v6_sockinit: IPv6 bind"); 908 return (_B_FALSE); 909 } 910 911 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_IF, 912 (char *)&pii->pii_ifindex, sizeof (uint_t)) < 0) { 913 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 914 " IPV6_MULTICAST_IF"); 915 return (_B_FALSE); 916 } 917 918 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_BOUND_IF, 919 &pii->pii_ifindex, sizeof (uint_t)) < 0) { 920 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 921 " IPV6_BOUND_IF"); 922 return (_B_FALSE); 923 } 924 925 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 926 (char *)&hopcount, sizeof (hopcount)) < 0) { 927 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 928 " IPV6_UNICAST_HOPS"); 929 return (_B_FALSE); 930 } 931 932 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, 933 (char *)&hopcount, sizeof (hopcount)) < 0) { 934 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 935 " IPV6_MULTICAST_HOPS"); 936 return (_B_FALSE); 937 } 938 939 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, 940 (char *)&off, sizeof (off)) < 0) { 941 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 942 " IPV6_MULTICAST_LOOP"); 943 return (_B_FALSE); 944 } 945 946 /* 947 * Filter out so that we only receive ICMP echo replies 948 */ 949 ICMP6_FILTER_SETBLOCKALL(&filter); 950 ICMP6_FILTER_SETPASS(ICMP6_ECHO_REPLY, &filter); 951 952 if (setsockopt(pii->pii_probe_sock, IPPROTO_ICMPV6, ICMP6_FILTER, 953 (char *)&filter, sizeof (filter)) < 0) { 954 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 955 " ICMP6_FILTER"); 956 return (_B_FALSE); 957 } 958 959 /* Enable receipt of hoplimit */ 960 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, 961 &on, sizeof (on)) < 0) { 962 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 963 " IPV6_RECVHOPLIMIT"); 964 return (_B_FALSE); 965 } 966 967 /* Enable receipt of timestamp */ 968 if (setsockopt(pii->pii_probe_sock, SOL_SOCKET, SO_TIMESTAMP, 969 &on, sizeof (on)) < 0) { 970 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 971 " SO_TIMESTAMP"); 972 return (_B_FALSE); 973 } 974 975 return (_B_TRUE); 976 } 977 978 /* 979 * IPv4 specific part in initializing the pii_probe_sock. This socket is 980 * used to send/receive ICMPv4 probe packets. 981 */ 982 static boolean_t 983 phyint_inst_v4_sockinit(struct phyint_instance *pii) 984 { 985 struct sockaddr_in testaddr; 986 char char_off = 0; 987 int ttl = 1; 988 char char_ttl = 1; 989 int on = 1; 990 int flags; 991 992 /* 993 * Open a raw socket with ICMPv4 protocol. 994 * 995 * Use IP_BOUND_IF to make sure that probes are sent and received on 996 * the specified phyint only. Bind to the test address to ensure that 997 * the responses are sent to the specified phyint. 998 * 999 * Set the ttl to 1 so that probe packets are not routed. 1000 * Disable multicast loopback. Enable receipt of timestamp. 1001 */ 1002 pii->pii_probe_sock = socket(pii->pii_af, SOCK_RAW, IPPROTO_ICMP); 1003 if (pii->pii_probe_sock < 0) { 1004 logperror_pii(pii, "phyint_inst_v4_sockinit: socket"); 1005 return (_B_FALSE); 1006 } 1007 1008 /* 1009 * Probes must not block in case of lower layer issues. 1010 */ 1011 if ((flags = fcntl(pii->pii_probe_sock, F_GETFL, 0)) == -1) { 1012 logperror_pii(pii, "phyint_inst_v4_sockinit: fcntl" 1013 " F_GETFL"); 1014 return (_B_FALSE); 1015 } 1016 if (fcntl(pii->pii_probe_sock, F_SETFL, 1017 flags | O_NONBLOCK) == -1) { 1018 logperror_pii(pii, "phyint_inst_v4_sockinit: fcntl" 1019 " F_SETFL O_NONBLOCK"); 1020 return (_B_FALSE); 1021 } 1022 1023 bzero(&testaddr, sizeof (testaddr)); 1024 testaddr.sin_family = AF_INET; 1025 testaddr.sin_port = 0; 1026 IN6_V4MAPPED_TO_INADDR(&pii->pii_probe_logint->li_addr, 1027 &testaddr.sin_addr); 1028 1029 if (bind(pii->pii_probe_sock, (struct sockaddr *)&testaddr, 1030 sizeof (testaddr)) < 0) { 1031 logperror_pii(pii, "phyint_inst_v4_sockinit: IPv4 bind"); 1032 return (_B_FALSE); 1033 } 1034 1035 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_BOUND_IF, 1036 &pii->pii_ifindex, sizeof (uint_t)) < 0) { 1037 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1038 " IP_BOUND_IF"); 1039 return (_B_FALSE); 1040 } 1041 1042 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_IF, 1043 (char *)&testaddr.sin_addr, sizeof (struct in_addr)) < 0) { 1044 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1045 " IP_MULTICAST_IF"); 1046 return (_B_FALSE); 1047 } 1048 1049 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_TTL, 1050 (char *)&ttl, sizeof (ttl)) < 0) { 1051 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1052 " IP_TTL"); 1053 return (_B_FALSE); 1054 } 1055 1056 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_LOOP, 1057 (char *)&char_off, sizeof (char_off)) == -1) { 1058 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1059 " IP_MULTICAST_LOOP"); 1060 return (_B_FALSE); 1061 } 1062 1063 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_TTL, 1064 (char *)&char_ttl, sizeof (char_ttl)) == -1) { 1065 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1066 " IP_MULTICAST_TTL"); 1067 return (_B_FALSE); 1068 } 1069 1070 if (setsockopt(pii->pii_probe_sock, SOL_SOCKET, SO_TIMESTAMP, &on, 1071 sizeof (on)) < 0) { 1072 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1073 " SO_TIMESTAMP"); 1074 return (_B_FALSE); 1075 } 1076 1077 return (_B_TRUE); 1078 } 1079 1080 /* 1081 * Remove the phyint group from the list of 'all phyint groups' 1082 * and free it. 1083 */ 1084 void 1085 phyint_group_delete(struct phyint_group *pg) 1086 { 1087 /* 1088 * The anonymous group always exists, even when empty. 1089 */ 1090 if (pg == phyint_anongroup) 1091 return; 1092 1093 if (debug & D_PHYINT) 1094 logdebug("phyint_group_delete('%s')\n", pg->pg_name); 1095 1096 /* 1097 * The phyint group must be empty, and must not have any phyints. 1098 * The phyint group must be in the list of all phyint groups 1099 */ 1100 assert(pg->pg_phyint == NULL); 1101 assert(phyint_groups == pg || pg->pg_prev != NULL); 1102 1103 if (pg->pg_prev != NULL) 1104 pg->pg_prev->pg_next = pg->pg_next; 1105 else 1106 phyint_groups = pg->pg_next; 1107 1108 if (pg->pg_next != NULL) 1109 pg->pg_next->pg_prev = pg->pg_prev; 1110 1111 pg->pg_next = NULL; 1112 pg->pg_prev = NULL; 1113 1114 phyint_grouplistsig++; 1115 (void) phyint_group_change_event(pg, IPMP_GROUP_REMOVE); 1116 1117 addrlist_free(&pg->pg_addrs); 1118 free(pg); 1119 } 1120 1121 /* 1122 * Refresh the state of `pg' based on its current members. 1123 */ 1124 void 1125 phyint_group_refresh_state(struct phyint_group *pg) 1126 { 1127 enum pg_state state; 1128 enum pg_state origstate = pg->pg_state; 1129 struct phyint *pi, *usablepi; 1130 uint_t nif = 0, nusable = 0; 1131 1132 /* 1133 * Anonymous groups never change state. 1134 */ 1135 if (pg == phyint_anongroup) 1136 return; 1137 1138 for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) { 1139 nif++; 1140 if (phyint_is_usable(pi)) { 1141 nusable++; 1142 usablepi = pi; 1143 } 1144 } 1145 1146 if (nusable == 0) 1147 state = PG_FAILED; 1148 else if (nif == nusable) 1149 state = PG_OK; 1150 else 1151 state = PG_DEGRADED; 1152 1153 phyint_group_chstate(pg, state); 1154 1155 /* 1156 * If we're shutting down, skip logging messages since otherwise our 1157 * shutdown housecleaning will make us report that groups are unusable. 1158 */ 1159 if (cleanup_started) 1160 return; 1161 1162 /* 1163 * NOTE: We use pg_failmsg_printed rather than origstate since 1164 * otherwise at startup we'll log a "now usable" message when the 1165 * first usable phyint is added to an empty group. 1166 */ 1167 if (state != PG_FAILED && pg->pg_failmsg_printed) { 1168 assert(origstate == PG_FAILED); 1169 logerr("At least 1 IP interface (%s) in group %s is now " 1170 "usable\n", usablepi->pi_name, pg->pg_name); 1171 pg->pg_failmsg_printed = _B_FALSE; 1172 } else if (origstate != PG_FAILED && state == PG_FAILED) { 1173 logerr("All IP interfaces in group %s are now unusable\n", 1174 pg->pg_name); 1175 pg->pg_failmsg_printed = _B_TRUE; 1176 } 1177 } 1178 1179 /* 1180 * Extract information from the kernel about the desired phyint. 1181 * Look only for properties of the phyint and not properties of logints. 1182 * Take appropriate action on the changes. 1183 * Return codes: 1184 * PI_OK 1185 * The phyint exists in the kernel and matches our knowledge 1186 * of the phyint. 1187 * PI_DELETED 1188 * The phyint has vanished in the kernel. 1189 * PI_IFINDEX_CHANGED 1190 * The phyint's interface index has changed. 1191 * Ask the caller to delete and recreate the phyint. 1192 * PI_IOCTL_ERROR 1193 * Some ioctl error. Don't change anything. 1194 * PI_GROUP_CHANGED 1195 * The phyint has changed group. 1196 */ 1197 int 1198 phyint_inst_update_from_k(struct phyint_instance *pii) 1199 { 1200 struct lifreq lifr; 1201 int ifsock; 1202 struct phyint *pi; 1203 1204 pi = pii->pii_phyint; 1205 1206 if (debug & D_PHYINT) { 1207 logdebug("phyint_inst_update_from_k(%s %s)\n", 1208 AF_STR(pii->pii_af), pi->pi_name); 1209 } 1210 1211 /* 1212 * Get the ifindex from the kernel, for comparison with the 1213 * value in our tables. 1214 */ 1215 (void) strncpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name)); 1216 lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0'; 1217 1218 ifsock = (pii->pii_af == AF_INET) ? ifsock_v4 : ifsock_v6; 1219 if (ioctl(ifsock, SIOCGLIFINDEX, &lifr) < 0) { 1220 if (errno == ENXIO) { 1221 return (PI_DELETED); 1222 } else { 1223 logperror_pii(pii, "phyint_inst_update_from_k:" 1224 " ioctl (get lifindex)"); 1225 return (PI_IOCTL_ERROR); 1226 } 1227 } 1228 1229 if (lifr.lifr_index != pi->pi_ifindex) { 1230 /* 1231 * The index has changed. Most likely the interface has 1232 * been unplumbed and replumbed. Ask the caller to take 1233 * appropriate action. 1234 */ 1235 if (debug & D_PHYINT) { 1236 logdebug("phyint_inst_update_from_k:" 1237 " old index %d new index %d\n", 1238 pi->pi_ifindex, lifr.lifr_index); 1239 } 1240 return (PI_IFINDEX_CHANGED); 1241 } 1242 1243 /* 1244 * Get the group name from the kernel, for comparison with 1245 * the value in our tables. 1246 */ 1247 if (ioctl(ifsock, SIOCGLIFGROUPNAME, &lifr) < 0) { 1248 if (errno == ENXIO) { 1249 return (PI_DELETED); 1250 } else { 1251 logperror_pii(pii, "phyint_inst_update_from_k:" 1252 " ioctl (get groupname)"); 1253 return (PI_IOCTL_ERROR); 1254 } 1255 } 1256 1257 /* 1258 * If the phyint has changed group i.e. if the phyint group name 1259 * returned by the kernel is different, ask the caller to delete 1260 * and recreate the phyint in the right group 1261 */ 1262 if (strcmp(lifr.lifr_groupname, pi->pi_group->pg_name) != 0) { 1263 /* Groupname has changed */ 1264 if (debug & D_PHYINT) { 1265 logdebug("phyint_inst_update_from_k:" 1266 " groupname change\n"); 1267 } 1268 return (PI_GROUP_CHANGED); 1269 } 1270 1271 /* 1272 * Get the current phyint flags from the kernel, and determine what 1273 * flags have changed by comparing against our tables. Note that the 1274 * IFF_INACTIVE processing in initifs() relies on this call to ensure 1275 * that IFF_INACTIVE is really still set on the interface. 1276 */ 1277 if (ioctl(ifsock, SIOCGLIFFLAGS, &lifr) < 0) { 1278 if (errno == ENXIO) { 1279 return (PI_DELETED); 1280 } else { 1281 logperror_pii(pii, "phyint_inst_update_from_k: " 1282 " ioctl (get flags)"); 1283 return (PI_IOCTL_ERROR); 1284 } 1285 } 1286 1287 pi->pi_flags = PHYINT_FLAGS(lifr.lifr_flags); 1288 if (pi->pi_v4 != NULL) 1289 pi->pi_v4->pii_flags = pi->pi_flags; 1290 if (pi->pi_v6 != NULL) 1291 pi->pi_v6->pii_flags = pi->pi_flags; 1292 1293 /* 1294 * Make sure the IFF_FAILED flag is set if and only if we think 1295 * the interface should be failed. 1296 */ 1297 if (pi->pi_flags & IFF_FAILED) { 1298 if (pi->pi_state == PI_RUNNING) 1299 (void) change_pif_flags(pi, 0, IFF_FAILED); 1300 } else { 1301 if (pi->pi_state == PI_FAILED) 1302 (void) change_pif_flags(pi, IFF_FAILED, IFF_INACTIVE); 1303 } 1304 1305 /* No change in phyint status */ 1306 return (PI_OK); 1307 } 1308 1309 /* 1310 * Delete the phyint. Remove it from the list of all phyints, and the 1311 * list of phyint group members. 1312 */ 1313 static void 1314 phyint_delete(struct phyint *pi) 1315 { 1316 struct phyint *pi2; 1317 struct phyint_group *pg = pi->pi_group; 1318 1319 if (debug & D_PHYINT) 1320 logdebug("phyint_delete(%s)\n", pi->pi_name); 1321 1322 /* Both IPv4 and IPv6 phyint instances must have been deleted. */ 1323 assert(pi->pi_v4 == NULL && pi->pi_v6 == NULL); 1324 1325 /* 1326 * The phyint must belong to a group. 1327 */ 1328 assert(pg->pg_phyint == pi || pi->pi_pgprev != NULL); 1329 1330 /* The phyint must be in the list of all phyints */ 1331 assert(phyints == pi || pi->pi_prev != NULL); 1332 1333 /* Remove the phyint from the phyint group list */ 1334 pg->pg_sig++; 1335 (void) phyint_group_member_event(pg, pi, IPMP_IF_REMOVE); 1336 1337 if (pi->pi_pgprev == NULL) { 1338 /* Phyint is the 1st in the phyint group list */ 1339 pg->pg_phyint = pi->pi_pgnext; 1340 } else { 1341 pi->pi_pgprev->pi_pgnext = pi->pi_pgnext; 1342 } 1343 if (pi->pi_pgnext != NULL) 1344 pi->pi_pgnext->pi_pgprev = pi->pi_pgprev; 1345 pi->pi_pgnext = NULL; 1346 pi->pi_pgprev = NULL; 1347 1348 /* Refresh the group state now that this phyint has been removed */ 1349 phyint_group_refresh_state(pg); 1350 1351 /* Remove the phyint from the global list of phyints */ 1352 if (pi->pi_prev == NULL) { 1353 /* Phyint is the 1st in the list */ 1354 phyints = pi->pi_next; 1355 } else { 1356 pi->pi_prev->pi_next = pi->pi_next; 1357 } 1358 if (pi->pi_next != NULL) 1359 pi->pi_next->pi_prev = pi->pi_prev; 1360 pi->pi_next = NULL; 1361 pi->pi_prev = NULL; 1362 1363 /* 1364 * See if another phyint in the group had been offlined because 1365 * it was a dup of `pi' -- and if so, online it. 1366 */ 1367 if (!pi->pi_hwaddrdup && 1368 (pi2 = phyint_lookup_hwaddr(pi, _B_FALSE)) != NULL) { 1369 assert(pi2->pi_hwaddrdup); 1370 (void) phyint_undo_offline(pi2); 1371 } 1372 phyint_link_close(pi); 1373 free(pi); 1374 } 1375 1376 /* 1377 * Offline phyint `pi' if at least `minred' usable interfaces remain in the 1378 * group. Returns an IPMP error code. 1379 */ 1380 int 1381 phyint_offline(struct phyint *pi, uint_t minred) 1382 { 1383 boolean_t was_active; 1384 unsigned int nusable = 0; 1385 struct phyint *pi2; 1386 struct phyint_group *pg = pi->pi_group; 1387 1388 /* 1389 * Verify that enough usable interfaces in the group would remain. 1390 * As a special case, if the group has failed, allow any non-offline 1391 * phyints to be offlined. 1392 */ 1393 if (pg != phyint_anongroup) { 1394 for (pi2 = pg->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) { 1395 if (pi2 == pi) 1396 continue; 1397 if (phyint_is_usable(pi2) || 1398 (GROUP_FAILED(pg) && pi2->pi_state != PI_OFFLINE)) 1399 nusable++; 1400 } 1401 } 1402 if (nusable < minred) 1403 return (IPMP_EMINRED); 1404 1405 was_active = ((pi->pi_flags & IFF_INACTIVE) == 0); 1406 1407 if (!change_pif_flags(pi, IFF_OFFLINE, IFF_INACTIVE)) 1408 return (IPMP_FAILURE); 1409 1410 /* 1411 * The interface is now offline, so stop probing it. Note that 1412 * if_mpadm(1M) will down the test addresses, after receiving a 1413 * success reply from us. The routing socket message will then make us 1414 * close the socket used for sending probes. But it is more logical 1415 * that an offlined interface must not be probed, even if it has test 1416 * addresses. 1417 * 1418 * NOTE: stop_probing() also sets PI_OFFLINE. 1419 */ 1420 stop_probing(pi); 1421 1422 /* 1423 * If we're offlining the phyint because it has a duplicate hardware 1424 * address, print a warning -- and leave the link open so that we can 1425 * be notified of hardware address changes that make it usable again. 1426 * Otherwise, close the link so that we won't prevent a detach. 1427 */ 1428 if (pi->pi_hwaddrdup) { 1429 logerr("IP interface %s has a hardware address which is not " 1430 "unique in group %s; offlining\n", pi->pi_name, 1431 pg->pg_name); 1432 } else { 1433 phyint_link_close(pi); 1434 } 1435 1436 /* 1437 * If this phyint was preventing another phyint with a duplicate 1438 * hardware address from being online, bring that one online now. 1439 */ 1440 if (!pi->pi_hwaddrdup && 1441 (pi2 = phyint_lookup_hwaddr(pi, _B_FALSE)) != NULL) { 1442 assert(pi2->pi_hwaddrdup); 1443 (void) phyint_undo_offline(pi2); 1444 } 1445 1446 /* 1447 * If this interface was active, try to activate another INACTIVE 1448 * interface in the group. 1449 */ 1450 if (was_active) 1451 phyint_activate_another(pi); 1452 1453 return (IPMP_SUCCESS); 1454 } 1455 1456 /* 1457 * Undo a previous offline of `pi'. Returns an IPMP error code. 1458 */ 1459 int 1460 phyint_undo_offline(struct phyint *pi) 1461 { 1462 if (pi->pi_state != PI_OFFLINE) { 1463 errno = EINVAL; 1464 return (IPMP_FAILURE); 1465 } 1466 1467 /* 1468 * If necessary, reinitialize our link information and verify that its 1469 * hardware address is still unique across the group. 1470 */ 1471 if (pi->pi_dh == NULL && !phyint_link_init(pi)) { 1472 errno = EIO; 1473 return (IPMP_FAILURE); 1474 } 1475 1476 if (phyint_lookup_hwaddr(pi, _B_TRUE) != NULL) { 1477 pi->pi_hwaddrdup = _B_TRUE; 1478 return (IPMP_EHWADDRDUP); 1479 } 1480 1481 if (pi->pi_hwaddrdup) { 1482 logerr("IP interface %s now has a unique hardware address in " 1483 "group %s; onlining\n", pi->pi_name, pi->pi_group->pg_name); 1484 pi->pi_hwaddrdup = _B_FALSE; 1485 } 1486 1487 if (!change_pif_flags(pi, 0, IFF_OFFLINE)) 1488 return (IPMP_FAILURE); 1489 1490 /* 1491 * While the interface was offline, it may have failed (e.g. the link 1492 * may have gone down). phyint_inst_check_for_failure() will have 1493 * already set pi_flags with IFF_FAILED, so we can use that to decide 1494 * whether the phyint should transition to running. Note that after 1495 * we transition to running, we will start sending probes again (if 1496 * test addresses are configured), which may also reveal that the 1497 * interface is in fact failed. 1498 */ 1499 if (pi->pi_flags & IFF_FAILED) { 1500 phyint_chstate(pi, PI_FAILED); 1501 } else { 1502 /* calls phyint_chstate() */ 1503 phyint_transition_to_running(pi); 1504 } 1505 1506 /* 1507 * Give the requestor time to configure test addresses before 1508 * complaining that they're missing. 1509 */ 1510 pi->pi_taddrthresh = getcurrentsec() + TESTADDR_CONF_TIME; 1511 1512 return (IPMP_SUCCESS); 1513 } 1514 1515 /* 1516 * Delete (unlink and free), the phyint instance. 1517 */ 1518 void 1519 phyint_inst_delete(struct phyint_instance *pii) 1520 { 1521 struct phyint *pi = pii->pii_phyint; 1522 1523 assert(pi != NULL); 1524 1525 if (debug & D_PHYINT) { 1526 logdebug("phyint_inst_delete(%s %s)\n", 1527 AF_STR(pii->pii_af), pi->pi_name); 1528 } 1529 1530 /* 1531 * If the phyint instance has associated probe targets 1532 * delete all the targets 1533 */ 1534 while (pii->pii_targets != NULL) 1535 target_delete(pii->pii_targets); 1536 1537 /* 1538 * Delete all the logints associated with this phyint 1539 * instance. 1540 */ 1541 while (pii->pii_logint != NULL) 1542 logint_delete(pii->pii_logint); 1543 1544 /* 1545 * Close the socket used to send probes to targets from this phyint. 1546 */ 1547 if (pii->pii_probe_sock != -1) 1548 close_probe_socket(pii, _B_TRUE); 1549 1550 /* 1551 * Phyint instance must be in the list of all phyint instances. 1552 * Remove phyint instance from the global list of phyint instances. 1553 */ 1554 assert(phyint_instances == pii || pii->pii_prev != NULL); 1555 if (pii->pii_prev == NULL) { 1556 /* Phyint is the 1st in the list */ 1557 phyint_instances = pii->pii_next; 1558 } else { 1559 pii->pii_prev->pii_next = pii->pii_next; 1560 } 1561 if (pii->pii_next != NULL) 1562 pii->pii_next->pii_prev = pii->pii_prev; 1563 pii->pii_next = NULL; 1564 pii->pii_prev = NULL; 1565 1566 /* 1567 * Reset the phyint instance pointer in the phyint. 1568 * If this is the last phyint instance (being deleted) on this 1569 * phyint, then delete the phyint. 1570 */ 1571 if (pii->pii_af == AF_INET) 1572 pi->pi_v4 = NULL; 1573 else 1574 pi->pi_v6 = NULL; 1575 1576 if (pi->pi_v4 == NULL && pi->pi_v6 == NULL) 1577 phyint_delete(pi); 1578 1579 free(pii); 1580 } 1581 1582 static void 1583 phyint_inst_print(struct phyint_instance *pii) 1584 { 1585 struct logint *li; 1586 struct target *tg; 1587 char abuf[INET6_ADDRSTRLEN]; 1588 int most_recent; 1589 int i; 1590 1591 if (pii->pii_phyint == NULL) { 1592 logdebug("pii->pi_phyint NULL can't print\n"); 1593 return; 1594 } 1595 1596 logdebug("\nPhyint instance: %s %s index %u state %x flags %llx " 1597 "sock %x in_use %d\n", 1598 AF_STR(pii->pii_af), pii->pii_name, pii->pii_ifindex, 1599 pii->pii_state, pii->pii_phyint->pi_flags, pii->pii_probe_sock, 1600 pii->pii_in_use); 1601 1602 for (li = pii->pii_logint; li != NULL; li = li->li_next) 1603 logint_print(li); 1604 1605 logdebug("\n"); 1606 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) 1607 target_print(tg); 1608 1609 if (pii->pii_targets == NULL) 1610 logdebug("pi_targets NULL\n"); 1611 1612 if (pii->pii_target_next != NULL) { 1613 logdebug("pi_target_next %s %s\n", AF_STR(pii->pii_af), 1614 pr_addr(pii->pii_af, pii->pii_target_next->tg_address, 1615 abuf, sizeof (abuf))); 1616 } else { 1617 logdebug("pi_target_next NULL\n"); 1618 } 1619 1620 if (pii->pii_rtt_target_next != NULL) { 1621 logdebug("pi_rtt_target_next %s %s\n", AF_STR(pii->pii_af), 1622 pr_addr(pii->pii_af, pii->pii_rtt_target_next->tg_address, 1623 abuf, sizeof (abuf))); 1624 } else { 1625 logdebug("pi_rtt_target_next NULL\n"); 1626 } 1627 1628 if (pii->pii_targets != NULL) { 1629 most_recent = PROBE_INDEX_PREV(pii->pii_probe_next); 1630 1631 i = most_recent; 1632 do { 1633 if (pii->pii_probes[i].pr_target != NULL) { 1634 logdebug("#%d target %s ", i, 1635 pr_addr(pii->pii_af, 1636 pii->pii_probes[i].pr_target->tg_address, 1637 abuf, sizeof (abuf))); 1638 } else { 1639 logdebug("#%d target NULL ", i); 1640 } 1641 logdebug("time_start %lld status %d " 1642 "time_ackproc %lld time_lost %u", 1643 pii->pii_probes[i].pr_hrtime_start, 1644 pii->pii_probes[i].pr_status, 1645 pii->pii_probes[i].pr_hrtime_ackproc, 1646 pii->pii_probes[i].pr_time_lost); 1647 i = PROBE_INDEX_PREV(i); 1648 } while (i != most_recent); 1649 } 1650 } 1651 1652 /* 1653 * Lookup a logint based on the logical interface name, on the given 1654 * phyint instance. 1655 */ 1656 static struct logint * 1657 logint_lookup(struct phyint_instance *pii, char *name) 1658 { 1659 struct logint *li; 1660 1661 if (debug & D_LOGINT) { 1662 logdebug("logint_lookup(%s, %s)\n", 1663 AF_STR(pii->pii_af), name); 1664 } 1665 1666 for (li = pii->pii_logint; li != NULL; li = li->li_next) { 1667 if (strncmp(name, li->li_name, sizeof (li->li_name)) == 0) 1668 break; 1669 } 1670 return (li); 1671 } 1672 1673 /* 1674 * Insert a logint at the head of the list of logints of the given 1675 * phyint instance 1676 */ 1677 static void 1678 logint_insert(struct phyint_instance *pii, struct logint *li) 1679 { 1680 li->li_next = pii->pii_logint; 1681 li->li_prev = NULL; 1682 if (pii->pii_logint != NULL) 1683 pii->pii_logint->li_prev = li; 1684 pii->pii_logint = li; 1685 li->li_phyint_inst = pii; 1686 } 1687 1688 /* 1689 * Create a new named logint, on the specified phyint instance. 1690 */ 1691 static struct logint * 1692 logint_create(struct phyint_instance *pii, char *name) 1693 { 1694 struct logint *li; 1695 1696 if (debug & D_LOGINT) { 1697 logdebug("logint_create(%s %s %s)\n", 1698 AF_STR(pii->pii_af), pii->pii_name, name); 1699 } 1700 1701 li = calloc(1, sizeof (struct logint)); 1702 if (li == NULL) { 1703 logperror("logint_create: calloc"); 1704 return (NULL); 1705 } 1706 1707 (void) strncpy(li->li_name, name, sizeof (li->li_name)); 1708 li->li_name[sizeof (li->li_name) - 1] = '\0'; 1709 logint_insert(pii, li); 1710 return (li); 1711 } 1712 1713 /* 1714 * Initialize the logint based on the data returned by the kernel. 1715 */ 1716 void 1717 logint_init_from_k(struct phyint_instance *pii, char *li_name) 1718 { 1719 int ifsock; 1720 uint64_t flags; 1721 uint64_t saved_flags; 1722 struct logint *li; 1723 struct lifreq lifr; 1724 struct in6_addr test_subnet; 1725 struct in6_addr testaddr; 1726 int test_subnet_len; 1727 struct sockaddr_in6 *sin6; 1728 struct sockaddr_in *sin; 1729 char abuf[INET6_ADDRSTRLEN]; 1730 boolean_t ptp = _B_FALSE; 1731 struct in6_addr tgaddr; 1732 1733 if (debug & D_LOGINT) { 1734 logdebug("logint_init_from_k(%s %s)\n", 1735 AF_STR(pii->pii_af), li_name); 1736 } 1737 1738 /* Get the socket for doing ioctls */ 1739 ifsock = (pii->pii_af == AF_INET) ? ifsock_v4 : ifsock_v6; 1740 1741 /* 1742 * Get the flags from the kernel. Also serves as a check whether 1743 * the logical still exists. If it doesn't exist, no need to proceed 1744 * any further. li_in_use will make the caller clean up the logint 1745 */ 1746 (void) strncpy(lifr.lifr_name, li_name, sizeof (lifr.lifr_name)); 1747 lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0'; 1748 if (ioctl(ifsock, SIOCGLIFFLAGS, (char *)&lifr) < 0) { 1749 /* Interface may have vanished */ 1750 if (errno != ENXIO) { 1751 logperror_pii(pii, "logint_init_from_k: " 1752 "ioctl (get flags)"); 1753 } 1754 return; 1755 } 1756 1757 flags = lifr.lifr_flags; 1758 1759 /* 1760 * Verified the logint exists. Now lookup the logint in our tables. 1761 * If it does not exist, create a new logint. 1762 */ 1763 li = logint_lookup(pii, li_name); 1764 if (li == NULL) { 1765 li = logint_create(pii, li_name); 1766 if (li == NULL) { 1767 /* 1768 * Pretend the interface does not exist 1769 * in the kernel 1770 */ 1771 return; 1772 } 1773 } 1774 1775 /* 1776 * Update li->li_flags with the new flags, after saving the old 1777 * value. This is used later to check what flags has changed and 1778 * take any action 1779 */ 1780 saved_flags = li->li_flags; 1781 li->li_flags = flags; 1782 1783 /* 1784 * Get the address, prefix, prefixlength and update the logint. 1785 * Check if anything has changed. If the logint used for the 1786 * test address has changed, take suitable action. 1787 */ 1788 if (ioctl(ifsock, SIOCGLIFADDR, (char *)&lifr) < 0) { 1789 /* Interface may have vanished */ 1790 if (errno != ENXIO) { 1791 logperror_li(li, "logint_init_from_k: (get addr)"); 1792 } 1793 goto error; 1794 } 1795 1796 if (pii->pii_af == AF_INET) { 1797 sin = (struct sockaddr_in *)&lifr.lifr_addr; 1798 IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &testaddr); 1799 } else { 1800 sin6 = (struct sockaddr_in6 *)&lifr.lifr_addr; 1801 testaddr = sin6->sin6_addr; 1802 } 1803 1804 if (ioctl(ifsock, SIOCGLIFSUBNET, (char *)&lifr) < 0) { 1805 /* Interface may have vanished */ 1806 if (errno != ENXIO) 1807 logperror_li(li, "logint_init_from_k: (get subnet)"); 1808 goto error; 1809 } 1810 if (lifr.lifr_subnet.ss_family == AF_INET6) { 1811 sin6 = (struct sockaddr_in6 *)&lifr.lifr_subnet; 1812 test_subnet = sin6->sin6_addr; 1813 test_subnet_len = lifr.lifr_addrlen; 1814 } else { 1815 sin = (struct sockaddr_in *)&lifr.lifr_subnet; 1816 IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &test_subnet); 1817 test_subnet_len = lifr.lifr_addrlen + (IPV6_ABITS - IP_ABITS); 1818 } 1819 1820 /* 1821 * If this is the logint corresponding to the test address used for 1822 * sending probes, then if anything significant has changed we need to 1823 * determine the test address again. We ignore changes to the 1824 * IFF_FAILED and IFF_RUNNING flags since those happen as a matter of 1825 * course. 1826 */ 1827 if (pii->pii_probe_logint == li) { 1828 if (((li->li_flags ^ saved_flags) & 1829 ~(IFF_FAILED | IFF_RUNNING)) != 0 || 1830 !IN6_ARE_ADDR_EQUAL(&testaddr, &li->li_addr) || 1831 (!ptp && !IN6_ARE_ADDR_EQUAL(&test_subnet, 1832 &li->li_subnet)) || 1833 (!ptp && test_subnet_len != li->li_subnet_len) || 1834 (ptp && !IN6_ARE_ADDR_EQUAL(&tgaddr, &li->li_dstaddr))) { 1835 /* 1836 * Something significant that affects the testaddress 1837 * has changed. Redo the testaddress selection later on 1838 * in select_test_ifs(). For now do the cleanup and 1839 * set pii_probe_logint to NULL. 1840 */ 1841 if (pii->pii_probe_sock != -1) 1842 close_probe_socket(pii, _B_TRUE); 1843 pii->pii_probe_logint = NULL; 1844 } 1845 } 1846 1847 1848 /* Update the logint with the values obtained from the kernel. */ 1849 li->li_addr = testaddr; 1850 li->li_in_use = 1; 1851 if (ptp) { 1852 li->li_dstaddr = tgaddr; 1853 li->li_subnet_len = (pii->pii_af == AF_INET) ? 1854 IP_ABITS : IPV6_ABITS; 1855 } else { 1856 li->li_subnet = test_subnet; 1857 li->li_subnet_len = test_subnet_len; 1858 } 1859 1860 if (debug & D_LOGINT) 1861 logint_print(li); 1862 1863 return; 1864 1865 error: 1866 logerr("logint_init_from_k: IGNORED %s %s %s addr %s\n", 1867 AF_STR(pii->pii_af), pii->pii_name, li->li_name, 1868 pr_addr(pii->pii_af, testaddr, abuf, sizeof (abuf))); 1869 logint_delete(li); 1870 } 1871 1872 /* 1873 * Delete (unlink and free) a logint. 1874 */ 1875 void 1876 logint_delete(struct logint *li) 1877 { 1878 struct phyint_instance *pii; 1879 1880 pii = li->li_phyint_inst; 1881 assert(pii != NULL); 1882 1883 if (debug & D_LOGINT) { 1884 int af; 1885 char abuf[INET6_ADDRSTRLEN]; 1886 1887 af = pii->pii_af; 1888 logdebug("logint_delete(%s %s %s/%u)\n", 1889 AF_STR(af), li->li_name, 1890 pr_addr(af, li->li_addr, abuf, sizeof (abuf)), 1891 li->li_subnet_len); 1892 } 1893 1894 /* logint must be in the list of logints */ 1895 assert(pii->pii_logint == li || li->li_prev != NULL); 1896 1897 /* Remove the logint from the list of logints */ 1898 if (li->li_prev == NULL) { 1899 /* logint is the 1st in the list */ 1900 pii->pii_logint = li->li_next; 1901 } else { 1902 li->li_prev->li_next = li->li_next; 1903 } 1904 if (li->li_next != NULL) 1905 li->li_next->li_prev = li->li_prev; 1906 li->li_next = NULL; 1907 li->li_prev = NULL; 1908 1909 /* 1910 * If this logint is also being used for probing, then close the 1911 * associated socket, if it exists. 1912 */ 1913 if (pii->pii_probe_logint == li) { 1914 if (pii->pii_probe_sock != -1) 1915 close_probe_socket(pii, _B_TRUE); 1916 pii->pii_probe_logint = NULL; 1917 } 1918 1919 free(li); 1920 } 1921 1922 static void 1923 logint_print(struct logint *li) 1924 { 1925 char abuf[INET6_ADDRSTRLEN]; 1926 int af = li->li_phyint_inst->pii_af; 1927 1928 logdebug("logint: %s %s addr %s/%u", AF_STR(af), li->li_name, 1929 pr_addr(af, li->li_addr, abuf, sizeof (abuf)), li->li_subnet_len); 1930 1931 logdebug("\tFlags: %llx in_use %d\n", li->li_flags, li->li_in_use); 1932 } 1933 1934 char * 1935 pr_addr(int af, struct in6_addr addr, char *abuf, int len) 1936 { 1937 struct in_addr addr_v4; 1938 1939 if (af == AF_INET) { 1940 IN6_V4MAPPED_TO_INADDR(&addr, &addr_v4); 1941 (void) inet_ntop(AF_INET, (void *)&addr_v4, abuf, len); 1942 } else { 1943 (void) inet_ntop(AF_INET6, (void *)&addr, abuf, len); 1944 } 1945 return (abuf); 1946 } 1947 1948 /* 1949 * Fill in the sockaddr_storage pointed to by `ssp' with the IP address 1950 * represented by the [`af',`addr'] pair. Needed because in.mpathd internally 1951 * stores all addresses as in6_addrs, but we don't want to expose that. 1952 */ 1953 void 1954 addr2storage(int af, const struct in6_addr *addr, struct sockaddr_storage *ssp) 1955 { 1956 struct sockaddr_in *sinp = (struct sockaddr_in *)ssp; 1957 struct sockaddr_in6 *sin6p = (struct sockaddr_in6 *)ssp; 1958 1959 assert(af == AF_INET || af == AF_INET6); 1960 1961 switch (af) { 1962 case AF_INET: 1963 (void) memset(sinp, 0, sizeof (*sinp)); 1964 sinp->sin_family = AF_INET; 1965 IN6_V4MAPPED_TO_INADDR(addr, &sinp->sin_addr); 1966 break; 1967 case AF_INET6: 1968 (void) memset(sin6p, 0, sizeof (*sin6p)); 1969 sin6p->sin6_family = AF_INET6; 1970 sin6p->sin6_addr = *addr; 1971 break; 1972 } 1973 } 1974 1975 /* Lookup target on its address */ 1976 struct target * 1977 target_lookup(struct phyint_instance *pii, struct in6_addr addr) 1978 { 1979 struct target *tg; 1980 1981 if (debug & D_TARGET) { 1982 char abuf[INET6_ADDRSTRLEN]; 1983 1984 logdebug("target_lookup(%s %s): addr %s\n", 1985 AF_STR(pii->pii_af), pii->pii_name, 1986 pr_addr(pii->pii_af, addr, abuf, sizeof (abuf))); 1987 } 1988 1989 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) { 1990 if (IN6_ARE_ADDR_EQUAL(&tg->tg_address, &addr)) 1991 break; 1992 } 1993 return (tg); 1994 } 1995 1996 /* 1997 * Find and return the next active target, for the next probe. 1998 * If no active targets are available, return NULL. 1999 */ 2000 struct target * 2001 target_next(struct target *tg) 2002 { 2003 struct phyint_instance *pii = tg->tg_phyint_inst; 2004 struct target *marker = tg; 2005 hrtime_t now; 2006 2007 now = gethrtime(); 2008 2009 /* 2010 * Target must be in the list of targets for this phyint 2011 * instance. 2012 */ 2013 assert(pii->pii_targets == tg || tg->tg_prev != NULL); 2014 assert(pii->pii_targets != NULL); 2015 2016 /* Return the next active target */ 2017 do { 2018 /* 2019 * Go to the next target. If we hit the end, 2020 * reset the ptr to the head 2021 */ 2022 tg = tg->tg_next; 2023 if (tg == NULL) 2024 tg = pii->pii_targets; 2025 2026 assert(TG_STATUS_VALID(tg->tg_status)); 2027 2028 switch (tg->tg_status) { 2029 case TG_ACTIVE: 2030 return (tg); 2031 2032 case TG_UNUSED: 2033 assert(pii->pii_targets_are_routers); 2034 if (pii->pii_ntargets < MAX_PROBE_TARGETS) { 2035 /* 2036 * Bubble up the unused target to active 2037 */ 2038 tg->tg_status = TG_ACTIVE; 2039 pii->pii_ntargets++; 2040 return (tg); 2041 } 2042 break; 2043 2044 case TG_SLOW: 2045 assert(pii->pii_targets_are_routers); 2046 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2047 /* 2048 * Bubble up the slow target to unused 2049 */ 2050 tg->tg_status = TG_UNUSED; 2051 } 2052 break; 2053 2054 case TG_DEAD: 2055 assert(pii->pii_targets_are_routers); 2056 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2057 /* 2058 * Bubble up the dead target to slow 2059 */ 2060 tg->tg_status = TG_SLOW; 2061 tg->tg_latime = now; 2062 } 2063 break; 2064 } 2065 2066 } while (tg != marker); 2067 2068 return (NULL); 2069 } 2070 2071 /* 2072 * Select the best available target, that is not already TG_ACTIVE, 2073 * for the caller. The caller will determine whether it wants to 2074 * make the returned target TG_ACTIVE. 2075 * The selection order is as follows. 2076 * 1. pick a TG_UNSED target, if it exists. 2077 * 2. else pick a TG_SLOW target that has recovered, if it exists 2078 * 3. else pick any TG_SLOW target, if it exists 2079 * 4. else pick a TG_DEAD target that has recovered, if it exists 2080 * 5. else pick any TG_DEAD target, if it exists 2081 * 6. else return null 2082 */ 2083 static struct target * 2084 target_select_best(struct phyint_instance *pii) 2085 { 2086 struct target *tg; 2087 struct target *slow = NULL; 2088 struct target *dead = NULL; 2089 struct target *slow_recovered = NULL; 2090 struct target *dead_recovered = NULL; 2091 hrtime_t now; 2092 2093 now = gethrtime(); 2094 2095 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) { 2096 assert(TG_STATUS_VALID(tg->tg_status)); 2097 2098 switch (tg->tg_status) { 2099 case TG_UNUSED: 2100 return (tg); 2101 2102 case TG_SLOW: 2103 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2104 slow_recovered = tg; 2105 /* 2106 * Promote the slow_recovered to unused 2107 */ 2108 tg->tg_status = TG_UNUSED; 2109 } else { 2110 slow = tg; 2111 } 2112 break; 2113 2114 case TG_DEAD: 2115 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2116 dead_recovered = tg; 2117 /* 2118 * Promote the dead_recovered to slow 2119 */ 2120 tg->tg_status = TG_SLOW; 2121 tg->tg_latime = now; 2122 } else { 2123 dead = tg; 2124 } 2125 break; 2126 2127 default: 2128 break; 2129 } 2130 } 2131 2132 if (slow_recovered != NULL) 2133 return (slow_recovered); 2134 else if (slow != NULL) 2135 return (slow); 2136 else if (dead_recovered != NULL) 2137 return (dead_recovered); 2138 else 2139 return (dead); 2140 } 2141 2142 /* 2143 * Some target was deleted. If we don't have even MIN_PROBE_TARGETS 2144 * that are active, pick the next best below. 2145 */ 2146 static void 2147 target_activate_all(struct phyint_instance *pii) 2148 { 2149 struct target *tg; 2150 2151 assert(pii->pii_ntargets == 0); 2152 assert(pii->pii_target_next == NULL); 2153 assert(pii->pii_rtt_target_next == NULL); 2154 assert(pii->pii_targets_are_routers); 2155 2156 while (pii->pii_ntargets < MIN_PROBE_TARGETS) { 2157 tg = target_select_best(pii); 2158 if (tg == NULL) { 2159 /* We are out of targets */ 2160 return; 2161 } 2162 2163 assert(TG_STATUS_VALID(tg->tg_status)); 2164 assert(tg->tg_status != TG_ACTIVE); 2165 tg->tg_status = TG_ACTIVE; 2166 pii->pii_ntargets++; 2167 if (pii->pii_target_next == NULL) { 2168 pii->pii_target_next = tg; 2169 pii->pii_rtt_target_next = tg; 2170 } 2171 } 2172 } 2173 2174 static struct target * 2175 target_first(struct phyint_instance *pii) 2176 { 2177 struct target *tg; 2178 2179 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) { 2180 assert(TG_STATUS_VALID(tg->tg_status)); 2181 if (tg->tg_status == TG_ACTIVE) 2182 break; 2183 } 2184 2185 return (tg); 2186 } 2187 2188 /* 2189 * Create a default target entry. 2190 */ 2191 void 2192 target_create(struct phyint_instance *pii, struct in6_addr addr, 2193 boolean_t is_router) 2194 { 2195 struct target *tg; 2196 struct phyint *pi; 2197 struct logint *li; 2198 2199 if (debug & D_TARGET) { 2200 char abuf[INET6_ADDRSTRLEN]; 2201 2202 logdebug("target_create(%s %s, %s)\n", 2203 AF_STR(pii->pii_af), pii->pii_name, 2204 pr_addr(pii->pii_af, addr, abuf, sizeof (abuf))); 2205 } 2206 2207 /* 2208 * If the test address is not yet initialized, do not add 2209 * any target, since we cannot determine whether the target 2210 * belongs to the same subnet as the test address. 2211 */ 2212 li = pii->pii_probe_logint; 2213 if (li == NULL) 2214 return; 2215 2216 /* 2217 * If there are multiple subnets associated with an interface, then 2218 * add the target to this phyint instance only if it belongs to the 2219 * same subnet as the test address. This assures us that we will 2220 * be able to reach this target through our routing table. 2221 */ 2222 if (!prefix_equal(li->li_subnet, addr, li->li_subnet_len)) 2223 return; 2224 2225 if (pii->pii_targets != NULL) { 2226 assert(pii->pii_ntargets <= MAX_PROBE_TARGETS); 2227 if (is_router) { 2228 if (!pii->pii_targets_are_routers) { 2229 /* 2230 * Prefer router over hosts. Using hosts is a 2231 * fallback mechanism, hence delete all host 2232 * targets. 2233 */ 2234 while (pii->pii_targets != NULL) 2235 target_delete(pii->pii_targets); 2236 } 2237 } else { 2238 /* 2239 * Routers take precedence over hosts. If this 2240 * is a router list and we are trying to add a 2241 * host, just return. If this is a host list 2242 * and if we have sufficient targets, just return 2243 */ 2244 if (pii->pii_targets_are_routers || 2245 pii->pii_ntargets == MAX_PROBE_TARGETS) 2246 return; 2247 } 2248 } 2249 2250 tg = calloc(1, sizeof (struct target)); 2251 if (tg == NULL) { 2252 logperror("target_create: calloc"); 2253 return; 2254 } 2255 2256 tg->tg_phyint_inst = pii; 2257 tg->tg_address = addr; 2258 tg->tg_in_use = 1; 2259 tg->tg_rtt_sa = -1; 2260 tg->tg_num_deferred = 0; 2261 2262 /* 2263 * If this is the first target, set 'pii_targets_are_routers' 2264 * The list of targets is either a list of hosts or list or 2265 * routers, but not a mix. 2266 */ 2267 if (pii->pii_targets == NULL) { 2268 assert(pii->pii_ntargets == 0); 2269 assert(pii->pii_target_next == NULL); 2270 assert(pii->pii_rtt_target_next == NULL); 2271 pii->pii_targets_are_routers = is_router ? 1 : 0; 2272 } 2273 2274 if (pii->pii_ntargets == MAX_PROBE_TARGETS) { 2275 assert(pii->pii_targets_are_routers); 2276 assert(pii->pii_target_next != NULL); 2277 assert(pii->pii_rtt_target_next != NULL); 2278 tg->tg_status = TG_UNUSED; 2279 } else { 2280 if (pii->pii_ntargets == 0) { 2281 assert(pii->pii_target_next == NULL); 2282 pii->pii_target_next = tg; 2283 pii->pii_rtt_target_next = tg; 2284 } 2285 pii->pii_ntargets++; 2286 tg->tg_status = TG_ACTIVE; 2287 } 2288 2289 target_insert(pii, tg); 2290 2291 /* 2292 * Change state to PI_RUNNING if this phyint instance is capable of 2293 * sending and receiving probes -- that is, if we know of at least 1 2294 * target, and this phyint instance is probe-capable. For more 2295 * details, see the phyint state diagram in mpd_probe.c. 2296 */ 2297 pi = pii->pii_phyint; 2298 if (pi->pi_state == PI_NOTARGETS && PROBE_CAPABLE(pii)) { 2299 if (pi->pi_flags & IFF_FAILED) 2300 phyint_chstate(pi, PI_FAILED); 2301 else 2302 phyint_chstate(pi, PI_RUNNING); 2303 } 2304 } 2305 2306 /* 2307 * Add the target address named by `addr' to phyint instance `pii' if it does 2308 * not already exist. If the target is a router, `is_router' should be set to 2309 * B_TRUE. 2310 */ 2311 void 2312 target_add(struct phyint_instance *pii, struct in6_addr addr, 2313 boolean_t is_router) 2314 { 2315 struct target *tg; 2316 2317 if (pii == NULL) 2318 return; 2319 2320 tg = target_lookup(pii, addr); 2321 2322 /* 2323 * If the target does not exist, create it; target_create() will set 2324 * tg_in_use to true. Even if it exists already, if it's a router 2325 * target and we'd previously learned of it through multicast, then we 2326 * need to recreate it as a router target. Otherwise, just set 2327 * tg_in_use to to true so that init_router_targets() won't delete it. 2328 */ 2329 if (tg == NULL || (is_router && !pii->pii_targets_are_routers)) 2330 target_create(pii, addr, is_router); 2331 else if (is_router) 2332 tg->tg_in_use = 1; 2333 } 2334 2335 /* 2336 * Insert target at head of linked list of targets for the associated 2337 * phyint instance 2338 */ 2339 static void 2340 target_insert(struct phyint_instance *pii, struct target *tg) 2341 { 2342 tg->tg_next = pii->pii_targets; 2343 tg->tg_prev = NULL; 2344 if (tg->tg_next != NULL) 2345 tg->tg_next->tg_prev = tg; 2346 pii->pii_targets = tg; 2347 } 2348 2349 /* 2350 * Delete a target (unlink and free). 2351 */ 2352 void 2353 target_delete(struct target *tg) 2354 { 2355 int af; 2356 struct phyint_instance *pii; 2357 struct phyint_instance *pii_other; 2358 2359 pii = tg->tg_phyint_inst; 2360 af = pii->pii_af; 2361 2362 if (debug & D_TARGET) { 2363 char abuf[INET6_ADDRSTRLEN]; 2364 2365 logdebug("target_delete(%s %s, %s)\n", 2366 AF_STR(af), pii->pii_name, 2367 pr_addr(af, tg->tg_address, abuf, sizeof (abuf))); 2368 } 2369 2370 /* 2371 * Target must be in the list of targets for this phyint 2372 * instance. 2373 */ 2374 assert(pii->pii_targets == tg || tg->tg_prev != NULL); 2375 2376 /* 2377 * Reset all references to 'tg' in the probe information 2378 * for this phyint. 2379 */ 2380 reset_pii_probes(pii, tg); 2381 2382 /* 2383 * Remove this target from the list of targets of this 2384 * phyint instance. 2385 */ 2386 if (tg->tg_prev == NULL) { 2387 pii->pii_targets = tg->tg_next; 2388 } else { 2389 tg->tg_prev->tg_next = tg->tg_next; 2390 } 2391 2392 if (tg->tg_next != NULL) 2393 tg->tg_next->tg_prev = tg->tg_prev; 2394 2395 tg->tg_next = NULL; 2396 tg->tg_prev = NULL; 2397 2398 if (tg->tg_status == TG_ACTIVE) 2399 pii->pii_ntargets--; 2400 2401 /* 2402 * Adjust the next target to probe, if it points to 2403 * to the currently deleted target. 2404 */ 2405 if (pii->pii_target_next == tg) 2406 pii->pii_target_next = target_first(pii); 2407 2408 if (pii->pii_rtt_target_next == tg) 2409 pii->pii_rtt_target_next = target_first(pii); 2410 2411 free(tg); 2412 2413 /* 2414 * The number of active targets pii_ntargets == 0 iff 2415 * the next active target pii->pii_target_next == NULL 2416 */ 2417 if (pii->pii_ntargets != 0) { 2418 assert(pii->pii_target_next != NULL); 2419 assert(pii->pii_rtt_target_next != NULL); 2420 assert(pii->pii_target_next->tg_status == TG_ACTIVE); 2421 assert(pii->pii_rtt_target_next->tg_status == TG_ACTIVE); 2422 return; 2423 } 2424 2425 /* At this point, we don't have any active targets. */ 2426 assert(pii->pii_target_next == NULL); 2427 assert(pii->pii_rtt_target_next == NULL); 2428 2429 if (pii->pii_targets_are_routers) { 2430 /* 2431 * Activate any TG_SLOW or TG_DEAD router targets, 2432 * since we don't have any other targets 2433 */ 2434 target_activate_all(pii); 2435 2436 if (pii->pii_ntargets != 0) { 2437 assert(pii->pii_target_next != NULL); 2438 assert(pii->pii_rtt_target_next != NULL); 2439 assert(pii->pii_target_next->tg_status == TG_ACTIVE); 2440 assert(pii->pii_rtt_target_next->tg_status == 2441 TG_ACTIVE); 2442 return; 2443 } 2444 } 2445 2446 /* 2447 * If we still don't have any active targets, the list must 2448 * must be really empty. There aren't even TG_SLOW or TG_DEAD 2449 * targets. Zero out the probe stats since it will not be 2450 * relevant any longer. 2451 */ 2452 assert(pii->pii_targets == NULL); 2453 pii->pii_targets_are_routers = _B_FALSE; 2454 clear_pii_probe_stats(pii); 2455 pii_other = phyint_inst_other(pii); 2456 2457 /* 2458 * If there are no targets on both instances and the interface would 2459 * otherwise be considered PI_RUNNING, go back to PI_NOTARGETS state, 2460 * since we cannot probe this phyint any more. For more details, 2461 * please see phyint state diagram in mpd_probe.c. 2462 */ 2463 if (!PROBE_CAPABLE(pii_other) && LINK_UP(pii->pii_phyint) && 2464 pii->pii_phyint->pi_state != PI_OFFLINE) 2465 phyint_chstate(pii->pii_phyint, PI_NOTARGETS); 2466 } 2467 2468 /* 2469 * Flush the target list of every phyint in the group, if the list 2470 * is a host target list. This is called if group failure is suspected. 2471 * If all targets have failed, multicast will subsequently discover new 2472 * targets. Else it is a group failure. 2473 * Note: This function is a no-op if the list is a router target list. 2474 */ 2475 static void 2476 target_flush_hosts(struct phyint_group *pg) 2477 { 2478 struct phyint *pi; 2479 struct phyint_instance *pii; 2480 2481 if (debug & D_TARGET) 2482 logdebug("target_flush_hosts(%s)\n", pg->pg_name); 2483 2484 for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) { 2485 pii = pi->pi_v4; 2486 if (pii != NULL && !pii->pii_targets_are_routers) { 2487 /* 2488 * Delete all the targets. When the list becomes 2489 * empty, target_delete() will set pii->pii_targets 2490 * to NULL. 2491 */ 2492 while (pii->pii_targets != NULL) 2493 target_delete(pii->pii_targets); 2494 } 2495 pii = pi->pi_v6; 2496 if (pii != NULL && !pii->pii_targets_are_routers) { 2497 /* 2498 * Delete all the targets. When the list becomes 2499 * empty, target_delete() will set pii->pii_targets 2500 * to NULL. 2501 */ 2502 while (pii->pii_targets != NULL) 2503 target_delete(pii->pii_targets); 2504 } 2505 } 2506 } 2507 2508 /* 2509 * Reset all references to 'target' in the probe info, as this target is 2510 * being deleted. The pr_target field is guaranteed to be non-null if 2511 * pr_status is PR_UNACKED. So we change the pr_status to PR_LOST, so that 2512 * pr_target will not be accessed unconditionally. 2513 */ 2514 static void 2515 reset_pii_probes(struct phyint_instance *pii, struct target *tg) 2516 { 2517 int i; 2518 2519 for (i = 0; i < PROBE_STATS_COUNT; i++) { 2520 if (pii->pii_probes[i].pr_target == tg) { 2521 if (pii->pii_probes[i].pr_status == PR_UNACKED) { 2522 probe_chstate(&pii->pii_probes[i], pii, 2523 PR_LOST); 2524 } 2525 pii->pii_probes[i].pr_target = NULL; 2526 } 2527 } 2528 2529 } 2530 2531 /* 2532 * Clear the probe statistics array. 2533 */ 2534 void 2535 clear_pii_probe_stats(struct phyint_instance *pii) 2536 { 2537 bzero(pii->pii_probes, sizeof (struct probe_stats) * PROBE_STATS_COUNT); 2538 /* Reset the next probe index in the probe stats array */ 2539 pii->pii_probe_next = 0; 2540 } 2541 2542 static void 2543 target_print(struct target *tg) 2544 { 2545 char abuf[INET6_ADDRSTRLEN]; 2546 char buf[128]; 2547 char buf2[128]; 2548 int af; 2549 int i; 2550 2551 af = tg->tg_phyint_inst->pii_af; 2552 2553 logdebug("Target on %s %s addr %s\n" 2554 "status %d rtt_sa %lld rtt_sd %lld crtt %d tg_in_use %d\n", 2555 AF_STR(af), tg->tg_phyint_inst->pii_name, 2556 pr_addr(af, tg->tg_address, abuf, sizeof (abuf)), 2557 tg->tg_status, tg->tg_rtt_sa, tg->tg_rtt_sd, 2558 tg->tg_crtt, tg->tg_in_use); 2559 2560 buf[0] = '\0'; 2561 for (i = 0; i < tg->tg_num_deferred; i++) { 2562 (void) snprintf(buf2, sizeof (buf2), " %dms", 2563 tg->tg_deferred[i]); 2564 (void) strlcat(buf, buf2, sizeof (buf)); 2565 } 2566 logdebug("deferred rtts:%s\n", buf); 2567 } 2568 2569 void 2570 phyint_inst_print_all(void) 2571 { 2572 struct phyint_instance *pii; 2573 2574 for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) { 2575 phyint_inst_print(pii); 2576 } 2577 } 2578 2579 /* 2580 * Compare two prefixes that have the same prefix length. 2581 * Fails if the prefix length is unreasonable. 2582 */ 2583 boolean_t 2584 prefix_equal(struct in6_addr p1, struct in6_addr p2, uint_t prefix_len) 2585 { 2586 uchar_t mask; 2587 int j; 2588 2589 if (prefix_len > IPV6_ABITS) 2590 return (_B_FALSE); 2591 2592 for (j = 0; prefix_len > 8; prefix_len -= 8, j++) 2593 if (p1.s6_addr[j] != p2.s6_addr[j]) 2594 return (_B_FALSE); 2595 2596 /* Make the N leftmost bits one */ 2597 mask = 0xff << (8 - prefix_len); 2598 if ((p1.s6_addr[j] & mask) != (p2.s6_addr[j] & mask)) 2599 return (_B_FALSE); 2600 2601 return (_B_TRUE); 2602 } 2603 2604 /* 2605 * Get the number of UP logints on phyint `pi'. 2606 */ 2607 static int 2608 logint_upcount(struct phyint *pi) 2609 { 2610 struct logint *li; 2611 int count = 0; 2612 2613 if (pi->pi_v4 != NULL) { 2614 for (li = pi->pi_v4->pii_logint; li != NULL; li = li->li_next) { 2615 if (li->li_flags & IFF_UP) 2616 count++; 2617 } 2618 } 2619 2620 if (pi->pi_v6 != NULL) { 2621 for (li = pi->pi_v6->pii_logint; li != NULL; li = li->li_next) { 2622 if (li->li_flags & IFF_UP) 2623 count++; 2624 } 2625 } 2626 2627 return (count); 2628 } 2629 2630 /* 2631 * Get the phyint instance with the other (IPv4 / IPv6) protocol 2632 */ 2633 struct phyint_instance * 2634 phyint_inst_other(struct phyint_instance *pii) 2635 { 2636 if (pii->pii_af == AF_INET) 2637 return (pii->pii_phyint->pi_v6); 2638 else 2639 return (pii->pii_phyint->pi_v4); 2640 } 2641 2642 /* 2643 * Check whether a phyint is functioning. 2644 */ 2645 static boolean_t 2646 phyint_is_functioning(struct phyint *pi) 2647 { 2648 if (pi->pi_state == PI_RUNNING) 2649 return (_B_TRUE); 2650 return (pi->pi_state == PI_NOTARGETS && !(pi->pi_flags & IFF_FAILED)); 2651 } 2652 2653 /* 2654 * Check whether a phyint is usable. 2655 */ 2656 static boolean_t 2657 phyint_is_usable(struct phyint *pi) 2658 { 2659 if (logint_upcount(pi) == 0) 2660 return (_B_FALSE); 2661 return (phyint_is_functioning(pi)); 2662 } 2663 2664 /* 2665 * Post an EC_IPMP sysevent of subclass `subclass' and attributes `nvl'. 2666 * Before sending the event, it prepends the current version of the IPMP 2667 * sysevent API. Returns 0 on success, -1 on failure (in either case, 2668 * `nvl' is freed). 2669 */ 2670 static int 2671 post_event(const char *subclass, nvlist_t *nvl) 2672 { 2673 static evchan_t *evchp = NULL; 2674 2675 /* 2676 * Initialize the event channel if we haven't already done so. 2677 */ 2678 if (evchp == NULL) { 2679 errno = sysevent_evc_bind(IPMP_EVENT_CHAN, &evchp, EVCH_CREAT); 2680 if (errno != 0) { 2681 logerr("cannot create event channel `%s': %s\n", 2682 IPMP_EVENT_CHAN, strerror(errno)); 2683 goto failed; 2684 } 2685 } 2686 2687 errno = nvlist_add_uint32(nvl, IPMP_EVENT_VERSION, 2688 IPMP_EVENT_CUR_VERSION); 2689 if (errno != 0) { 2690 logerr("cannot create `%s' event: %s", subclass, 2691 strerror(errno)); 2692 goto failed; 2693 } 2694 2695 errno = sysevent_evc_publish(evchp, EC_IPMP, subclass, "com.sun", 2696 "in.mpathd", nvl, EVCH_NOSLEEP); 2697 if (errno != 0) { 2698 logerr("cannot send `%s' event: %s\n", subclass, 2699 strerror(errno)); 2700 goto failed; 2701 } 2702 2703 nvlist_free(nvl); 2704 return (0); 2705 failed: 2706 nvlist_free(nvl); 2707 return (-1); 2708 } 2709 2710 /* 2711 * Return the external IPMP state associated with phyint `pi'. 2712 */ 2713 static ipmp_if_state_t 2714 ifstate(struct phyint *pi) 2715 { 2716 switch (pi->pi_state) { 2717 case PI_INIT: 2718 return (IPMP_IF_UNKNOWN); 2719 2720 case PI_NOTARGETS: 2721 if (pi->pi_flags & IFF_FAILED) 2722 return (IPMP_IF_FAILED); 2723 return (IPMP_IF_UNKNOWN); 2724 2725 case PI_OFFLINE: 2726 return (IPMP_IF_OFFLINE); 2727 2728 case PI_FAILED: 2729 return (IPMP_IF_FAILED); 2730 2731 case PI_RUNNING: 2732 return (IPMP_IF_OK); 2733 } 2734 2735 logerr("ifstate: unknown state %d; aborting\n", pi->pi_state); 2736 abort(); 2737 /* NOTREACHED */ 2738 } 2739 2740 /* 2741 * Return the external IPMP interface type associated with phyint `pi'. 2742 */ 2743 static ipmp_if_type_t 2744 iftype(struct phyint *pi) 2745 { 2746 if (pi->pi_flags & IFF_STANDBY) 2747 return (IPMP_IF_STANDBY); 2748 else 2749 return (IPMP_IF_NORMAL); 2750 } 2751 2752 /* 2753 * Return the external IPMP link state associated with phyint `pi'. 2754 */ 2755 static ipmp_if_linkstate_t 2756 iflinkstate(struct phyint *pi) 2757 { 2758 if (!(pi->pi_notes & (DL_NOTE_LINK_UP|DL_NOTE_LINK_DOWN))) 2759 return (IPMP_LINK_UNKNOWN); 2760 2761 return (LINK_DOWN(pi) ? IPMP_LINK_DOWN : IPMP_LINK_UP); 2762 } 2763 2764 /* 2765 * Return the external IPMP probe state associated with phyint `pi'. 2766 */ 2767 static ipmp_if_probestate_t 2768 ifprobestate(struct phyint *pi) 2769 { 2770 if (!PROBE_ENABLED(pi->pi_v4) && !PROBE_ENABLED(pi->pi_v6)) 2771 return (IPMP_PROBE_DISABLED); 2772 2773 if (pi->pi_state == PI_FAILED) 2774 return (IPMP_PROBE_FAILED); 2775 2776 if (!PROBE_CAPABLE(pi->pi_v4) && !PROBE_CAPABLE(pi->pi_v6)) 2777 return (IPMP_PROBE_UNKNOWN); 2778 2779 return (IPMP_PROBE_OK); 2780 } 2781 2782 /* 2783 * Return the external IPMP target mode associated with phyint instance `pii'. 2784 */ 2785 static ipmp_if_targmode_t 2786 iftargmode(struct phyint_instance *pii) 2787 { 2788 if (!PROBE_ENABLED(pii)) 2789 return (IPMP_TARG_DISABLED); 2790 else if (pii->pii_targets_are_routers) 2791 return (IPMP_TARG_ROUTES); 2792 else 2793 return (IPMP_TARG_MULTICAST); 2794 } 2795 2796 /* 2797 * Return the external IPMP flags associated with phyint `pi'. 2798 */ 2799 static ipmp_if_flags_t 2800 ifflags(struct phyint *pi) 2801 { 2802 ipmp_if_flags_t flags = 0; 2803 2804 if (logint_upcount(pi) == 0) 2805 flags |= IPMP_IFFLAG_DOWN; 2806 if (pi->pi_flags & IFF_INACTIVE) 2807 flags |= IPMP_IFFLAG_INACTIVE; 2808 if (pi->pi_hwaddrdup) 2809 flags |= IPMP_IFFLAG_HWADDRDUP; 2810 if (phyint_is_functioning(pi) && flags == 0) 2811 flags |= IPMP_IFFLAG_ACTIVE; 2812 2813 return (flags); 2814 } 2815 2816 /* 2817 * Store the test address used on phyint instance `pii' in `ssp'. If there's 2818 * no test address, 0.0.0.0 is stored. 2819 */ 2820 static struct sockaddr_storage * 2821 iftestaddr(struct phyint_instance *pii, struct sockaddr_storage *ssp) 2822 { 2823 if (PROBE_ENABLED(pii)) 2824 addr2storage(pii->pii_af, &pii->pii_probe_logint->li_addr, ssp); 2825 else 2826 addr2storage(AF_INET6, &in6addr_any, ssp); 2827 2828 return (ssp); 2829 } 2830 2831 /* 2832 * Return the external IPMP group state associated with phyint group `pg'. 2833 */ 2834 static ipmp_group_state_t 2835 groupstate(struct phyint_group *pg) 2836 { 2837 switch (pg->pg_state) { 2838 case PG_FAILED: 2839 return (IPMP_GROUP_FAILED); 2840 case PG_DEGRADED: 2841 return (IPMP_GROUP_DEGRADED); 2842 case PG_OK: 2843 return (IPMP_GROUP_OK); 2844 } 2845 2846 logerr("groupstate: unknown state %d; aborting\n", pg->pg_state); 2847 abort(); 2848 /* NOTREACHED */ 2849 } 2850 2851 /* 2852 * Return the external IPMP probe state associated with probe `ps'. 2853 */ 2854 static ipmp_probe_state_t 2855 probestate(struct probe_stats *ps) 2856 { 2857 switch (ps->pr_status) { 2858 case PR_UNUSED: 2859 case PR_LOST: 2860 return (IPMP_PROBE_LOST); 2861 case PR_UNACKED: 2862 return (IPMP_PROBE_SENT); 2863 case PR_ACKED: 2864 return (IPMP_PROBE_ACKED); 2865 } 2866 2867 logerr("probestate: unknown state %d; aborting\n", ps->pr_status); 2868 abort(); 2869 /* NOTREACHED */ 2870 } 2871 2872 /* 2873 * Generate an ESC_IPMP_PROBE_STATE sysevent for the probe described by `pr' 2874 * on phyint instance `pii'. Returns 0 on success, -1 on failure. 2875 */ 2876 int 2877 probe_state_event(struct probe_stats *pr, struct phyint_instance *pii) 2878 { 2879 nvlist_t *nvl; 2880 hrtime_t proc_time = 0, recv_time = 0; 2881 struct sockaddr_storage ss; 2882 struct target *tg = pr->pr_target; 2883 int64_t rttavg, rttdev; 2884 2885 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 2886 if (errno != 0) { 2887 logperror("cannot create `interface change' event"); 2888 return (-1); 2889 } 2890 2891 errno = nvlist_add_uint32(nvl, IPMP_PROBE_ID, pr->pr_id); 2892 if (errno != 0) 2893 goto failed; 2894 2895 errno = nvlist_add_string(nvl, IPMP_IF_NAME, pii->pii_phyint->pi_name); 2896 if (errno != 0) 2897 goto failed; 2898 2899 errno = nvlist_add_uint32(nvl, IPMP_PROBE_STATE, probestate(pr)); 2900 if (errno != 0) 2901 goto failed; 2902 2903 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_START_TIME, 2904 pr->pr_hrtime_start); 2905 if (errno != 0) 2906 goto failed; 2907 2908 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_SENT_TIME, 2909 pr->pr_hrtime_sent); 2910 if (errno != 0) 2911 goto failed; 2912 2913 if (pr->pr_status == PR_ACKED) { 2914 recv_time = pr->pr_hrtime_ackrecv; 2915 proc_time = pr->pr_hrtime_ackproc; 2916 } 2917 2918 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_ACKRECV_TIME, recv_time); 2919 if (errno != 0) 2920 goto failed; 2921 2922 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_ACKPROC_TIME, proc_time); 2923 if (errno != 0) 2924 goto failed; 2925 2926 if (tg != NULL) 2927 addr2storage(pii->pii_af, &tg->tg_address, &ss); 2928 else 2929 addr2storage(pii->pii_af, &in6addr_any, &ss); 2930 2931 errno = nvlist_add_byte_array(nvl, IPMP_PROBE_TARGET, (uchar_t *)&ss, 2932 sizeof (ss)); 2933 if (errno != 0) 2934 goto failed; 2935 2936 rttavg = (tg != NULL) ? (tg->tg_rtt_sa / 8) : 0; 2937 errno = nvlist_add_int64(nvl, IPMP_PROBE_TARGET_RTTAVG, rttavg); 2938 if (errno != 0) 2939 goto failed; 2940 2941 rttdev = (tg != NULL) ? (tg->tg_rtt_sd / 4) : 0; 2942 errno = nvlist_add_int64(nvl, IPMP_PROBE_TARGET_RTTDEV, rttdev); 2943 if (errno != 0) 2944 goto failed; 2945 2946 return (post_event(ESC_IPMP_PROBE_STATE, nvl)); 2947 failed: 2948 logperror("cannot create `probe state' event"); 2949 nvlist_free(nvl); 2950 return (-1); 2951 } 2952 2953 /* 2954 * Generate an ESC_IPMP_GROUP_STATE sysevent for phyint group `pg'. 2955 * Returns 0 on success, -1 on failure. 2956 */ 2957 static int 2958 phyint_group_state_event(struct phyint_group *pg) 2959 { 2960 nvlist_t *nvl; 2961 2962 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 2963 if (errno != 0) { 2964 logperror("cannot create `group state change' event"); 2965 return (-1); 2966 } 2967 2968 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 2969 if (errno != 0) 2970 goto failed; 2971 2972 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 2973 if (errno != 0) 2974 goto failed; 2975 2976 errno = nvlist_add_uint32(nvl, IPMP_GROUP_STATE, groupstate(pg)); 2977 if (errno != 0) 2978 goto failed; 2979 2980 return (post_event(ESC_IPMP_GROUP_STATE, nvl)); 2981 failed: 2982 logperror("cannot create `group state change' event"); 2983 nvlist_free(nvl); 2984 return (-1); 2985 } 2986 2987 /* 2988 * Generate an ESC_IPMP_GROUP_CHANGE sysevent of type `op' for phyint group 2989 * `pg'. Returns 0 on success, -1 on failure. 2990 */ 2991 static int 2992 phyint_group_change_event(struct phyint_group *pg, ipmp_group_op_t op) 2993 { 2994 nvlist_t *nvl; 2995 2996 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 2997 if (errno != 0) { 2998 logperror("cannot create `group change' event"); 2999 return (-1); 3000 } 3001 3002 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 3003 if (errno != 0) 3004 goto failed; 3005 3006 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 3007 if (errno != 0) 3008 goto failed; 3009 3010 errno = nvlist_add_uint64(nvl, IPMP_GROUPLIST_SIGNATURE, 3011 phyint_grouplistsig); 3012 if (errno != 0) 3013 goto failed; 3014 3015 errno = nvlist_add_uint32(nvl, IPMP_GROUP_OPERATION, op); 3016 if (errno != 0) 3017 goto failed; 3018 3019 return (post_event(ESC_IPMP_GROUP_CHANGE, nvl)); 3020 failed: 3021 logperror("cannot create `group change' event"); 3022 nvlist_free(nvl); 3023 return (-1); 3024 } 3025 3026 /* 3027 * Generate an ESC_IPMP_GROUP_MEMBER_CHANGE sysevent for phyint `pi' in 3028 * group `pg'. Returns 0 on success, -1 on failure. 3029 */ 3030 static int 3031 phyint_group_member_event(struct phyint_group *pg, struct phyint *pi, 3032 ipmp_if_op_t op) 3033 { 3034 nvlist_t *nvl; 3035 3036 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 3037 if (errno != 0) { 3038 logperror("cannot create `group member change' event"); 3039 return (-1); 3040 } 3041 3042 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 3043 if (errno != 0) 3044 goto failed; 3045 3046 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 3047 if (errno != 0) 3048 goto failed; 3049 3050 errno = nvlist_add_uint32(nvl, IPMP_IF_OPERATION, op); 3051 if (errno != 0) 3052 goto failed; 3053 3054 errno = nvlist_add_string(nvl, IPMP_IF_NAME, pi->pi_name); 3055 if (errno != 0) 3056 goto failed; 3057 3058 errno = nvlist_add_uint32(nvl, IPMP_IF_TYPE, iftype(pi)); 3059 if (errno != 0) 3060 goto failed; 3061 3062 errno = nvlist_add_uint32(nvl, IPMP_IF_STATE, ifstate(pi)); 3063 if (errno != 0) 3064 goto failed; 3065 3066 return (post_event(ESC_IPMP_GROUP_MEMBER_CHANGE, nvl)); 3067 failed: 3068 logperror("cannot create `group member change' event"); 3069 nvlist_free(nvl); 3070 return (-1); 3071 3072 } 3073 3074 /* 3075 * Generate an ESC_IPMP_IF_CHANGE sysevent for phyint `pi' in group `pg'. 3076 * Returns 0 on success, -1 on failure. 3077 */ 3078 static int 3079 phyint_state_event(struct phyint_group *pg, struct phyint *pi) 3080 { 3081 nvlist_t *nvl; 3082 3083 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 3084 if (errno != 0) { 3085 logperror("cannot create `interface change' event"); 3086 return (-1); 3087 } 3088 3089 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 3090 if (errno != 0) 3091 goto failed; 3092 3093 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 3094 if (errno != 0) 3095 goto failed; 3096 3097 errno = nvlist_add_string(nvl, IPMP_IF_NAME, pi->pi_name); 3098 if (errno != 0) 3099 goto failed; 3100 3101 errno = nvlist_add_uint32(nvl, IPMP_IF_TYPE, iftype(pi)); 3102 if (errno != 0) 3103 goto failed; 3104 3105 errno = nvlist_add_uint32(nvl, IPMP_IF_STATE, ifstate(pi)); 3106 if (errno != 0) 3107 goto failed; 3108 3109 return (post_event(ESC_IPMP_IF_CHANGE, nvl)); 3110 failed: 3111 logperror("cannot create `interface change' event"); 3112 nvlist_free(nvl); 3113 return (-1); 3114 3115 } 3116 3117 /* 3118 * Generate a signature for use. The signature is conceptually divided 3119 * into two pieces: a random 16-bit "generation number" and a 48-bit 3120 * monotonically increasing integer. The generation number protects 3121 * against stale updates to entities (e.g., IPMP groups) that have been 3122 * deleted and since recreated. 3123 */ 3124 static uint64_t 3125 gensig(void) 3126 { 3127 static int seeded = 0; 3128 3129 if (seeded == 0) { 3130 srand48((long)gethrtime()); 3131 seeded++; 3132 } 3133 3134 return ((uint64_t)lrand48() << 48 | 1); 3135 } 3136 3137 /* 3138 * Store the information associated with group `grname' into a dynamically 3139 * allocated structure pointed to by `*grinfopp'. Returns an IPMP error code. 3140 */ 3141 unsigned int 3142 getgroupinfo(const char *grname, ipmp_groupinfo_t **grinfopp) 3143 { 3144 struct phyint *pi; 3145 struct phyint_group *pg; 3146 char (*ifs)[LIFNAMSIZ]; 3147 unsigned int i, j; 3148 unsigned int nif = 0, naddr = 0; 3149 lifgroupinfo_t lifgr; 3150 addrlist_t *addrp; 3151 struct sockaddr_storage *addrs; 3152 int fdt = 0; 3153 3154 pg = phyint_group_lookup(grname); 3155 if (pg == NULL) 3156 return (IPMP_EUNKGROUP); 3157 3158 /* 3159 * Tally up the number of interfaces, allocate an array to hold them, 3160 * and insert their names into the array. While we're at it, if any 3161 * interface is actually enabled to send probes, save the group fdt. 3162 */ 3163 for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) 3164 nif++; 3165 3166 ifs = alloca(nif * sizeof (*ifs)); 3167 for (i = 0, pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext, i++) { 3168 assert(i < nif); 3169 (void) strlcpy(ifs[i], pi->pi_name, LIFNAMSIZ); 3170 if (PROBE_ENABLED(pi->pi_v4) || PROBE_ENABLED(pi->pi_v6)) 3171 fdt = pg->pg_fdt; 3172 } 3173 assert(i == nif); 3174 3175 /* 3176 * If this is the anonymous group, there's no other information to 3177 * collect (since there's no IPMP interface). 3178 */ 3179 if (pg == phyint_anongroup) { 3180 *grinfopp = ipmp_groupinfo_create(pg->pg_name, pg->pg_sig, fdt, 3181 groupstate(pg), nif, ifs, "", "", "", "", 0, NULL); 3182 return (*grinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3183 } 3184 3185 /* 3186 * Grab some additional information about the group from the kernel. 3187 * (NOTE: since SIOCGLIFGROUPINFO does not look up by interface name, 3188 * we can use ifsock_v4 even for a V6-only group.) 3189 */ 3190 (void) strlcpy(lifgr.gi_grname, grname, LIFGRNAMSIZ); 3191 if (ioctl(ifsock_v4, SIOCGLIFGROUPINFO, &lifgr) == -1) { 3192 if (errno == ENOENT) 3193 return (IPMP_EUNKGROUP); 3194 3195 logperror("getgroupinfo: SIOCGLIFGROUPINFO"); 3196 return (IPMP_FAILURE); 3197 } 3198 3199 /* 3200 * Tally up the number of data addresses, allocate an array to hold 3201 * them, and insert their values into the array. 3202 */ 3203 for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next) 3204 naddr++; 3205 3206 addrs = alloca(naddr * sizeof (*addrs)); 3207 i = 0; 3208 for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next) { 3209 /* 3210 * It's possible to have duplicate addresses (if some are 3211 * down). Weed the dups out to avoid confusing consumers. 3212 * (If groups start having tons of addresses, we'll need a 3213 * better algorithm here.) 3214 */ 3215 for (j = 0; j < i; j++) { 3216 if (sockaddrcmp(&addrs[j], &addrp->al_addr)) 3217 break; 3218 } 3219 if (j == i) { 3220 assert(i < naddr); 3221 addrs[i++] = addrp->al_addr; 3222 } 3223 } 3224 naddr = i; 3225 3226 *grinfopp = ipmp_groupinfo_create(pg->pg_name, pg->pg_sig, fdt, 3227 groupstate(pg), nif, ifs, lifgr.gi_grifname, lifgr.gi_m4ifname, 3228 lifgr.gi_m6ifname, lifgr.gi_bcifname, naddr, addrs); 3229 return (*grinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3230 } 3231 3232 /* 3233 * Store the target information associated with phyint instance `pii' into a 3234 * dynamically allocated structure pointed to by `*targinfopp'. Returns an 3235 * IPMP error code. 3236 */ 3237 unsigned int 3238 gettarginfo(struct phyint_instance *pii, const char *name, 3239 ipmp_targinfo_t **targinfopp) 3240 { 3241 uint_t ntarg = 0; 3242 struct target *tg; 3243 struct sockaddr_storage ss; 3244 struct sockaddr_storage *targs = NULL; 3245 3246 if (PROBE_CAPABLE(pii)) { 3247 targs = alloca(pii->pii_ntargets * sizeof (*targs)); 3248 tg = pii->pii_target_next; 3249 do { 3250 if (tg->tg_status == TG_ACTIVE) { 3251 assert(ntarg < pii->pii_ntargets); 3252 addr2storage(pii->pii_af, &tg->tg_address, 3253 &targs[ntarg++]); 3254 } 3255 if ((tg = tg->tg_next) == NULL) 3256 tg = pii->pii_targets; 3257 } while (tg != pii->pii_target_next); 3258 3259 assert(ntarg == pii->pii_ntargets); 3260 } 3261 3262 *targinfopp = ipmp_targinfo_create(name, iftestaddr(pii, &ss), 3263 iftargmode(pii), ntarg, targs); 3264 return (*targinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3265 } 3266 3267 /* 3268 * Store the information associated with interface `ifname' into a dynamically 3269 * allocated structure pointed to by `*ifinfopp'. Returns an IPMP error code. 3270 */ 3271 unsigned int 3272 getifinfo(const char *ifname, ipmp_ifinfo_t **ifinfopp) 3273 { 3274 int retval; 3275 struct phyint *pi; 3276 ipmp_targinfo_t *targinfo4; 3277 ipmp_targinfo_t *targinfo6; 3278 3279 pi = phyint_lookup(ifname); 3280 if (pi == NULL) 3281 return (IPMP_EUNKIF); 3282 3283 if ((retval = gettarginfo(pi->pi_v4, pi->pi_name, &targinfo4)) != 0 || 3284 (retval = gettarginfo(pi->pi_v6, pi->pi_name, &targinfo6)) != 0) 3285 goto out; 3286 3287 *ifinfopp = ipmp_ifinfo_create(pi->pi_name, pi->pi_group->pg_name, 3288 ifstate(pi), iftype(pi), iflinkstate(pi), ifprobestate(pi), 3289 ifflags(pi), targinfo4, targinfo6); 3290 retval = (*ifinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3291 out: 3292 if (targinfo4 != NULL) 3293 ipmp_freetarginfo(targinfo4); 3294 if (targinfo6 != NULL) 3295 ipmp_freetarginfo(targinfo6); 3296 return (retval); 3297 } 3298 3299 /* 3300 * Store the current list of IPMP groups into a dynamically allocated 3301 * structure pointed to by `*grlistpp'. Returns an IPMP error code. 3302 */ 3303 unsigned int 3304 getgrouplist(ipmp_grouplist_t **grlistpp) 3305 { 3306 struct phyint_group *pg; 3307 char (*groups)[LIFGRNAMSIZ]; 3308 unsigned int i, ngroup; 3309 3310 /* 3311 * Tally up the number of groups, allocate an array to hold them, and 3312 * insert their names into the array. 3313 */ 3314 for (ngroup = 0, pg = phyint_groups; pg != NULL; pg = pg->pg_next) 3315 ngroup++; 3316 3317 groups = alloca(ngroup * sizeof (*groups)); 3318 for (i = 0, pg = phyint_groups; pg != NULL; pg = pg->pg_next, i++) { 3319 assert(i < ngroup); 3320 (void) strlcpy(groups[i], pg->pg_name, LIFGRNAMSIZ); 3321 } 3322 assert(i == ngroup); 3323 3324 *grlistpp = ipmp_grouplist_create(phyint_grouplistsig, ngroup, groups); 3325 return (*grlistpp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3326 } 3327 3328 /* 3329 * Store the address information for `ssp' (in group `grname') into a 3330 * dynamically allocated structure pointed to by `*adinfopp'. Returns an IPMP 3331 * error code. (We'd call this function getaddrinfo(), but it would conflict 3332 * with getaddrinfo(3SOCKET)). 3333 */ 3334 unsigned int 3335 getgraddrinfo(const char *grname, struct sockaddr_storage *ssp, 3336 ipmp_addrinfo_t **adinfopp) 3337 { 3338 int ifsock; 3339 addrlist_t *addrp, *addrmatchp = NULL; 3340 ipmp_addr_state_t state; 3341 const char *binding = ""; 3342 struct lifreq lifr; 3343 struct phyint_group *pg; 3344 3345 if ((pg = phyint_group_lookup(grname)) == NULL) 3346 return (IPMP_EUNKADDR); 3347 3348 /* 3349 * Walk through the data addresses, and find a match. Note that since 3350 * some of the addresses may be down, more than one may match. We 3351 * prefer an up address (if one exists). 3352 */ 3353 for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next) { 3354 if (sockaddrcmp(ssp, &addrp->al_addr)) { 3355 addrmatchp = addrp; 3356 if (addrmatchp->al_flags & IFF_UP) 3357 break; 3358 } 3359 } 3360 3361 if (addrmatchp == NULL) 3362 return (IPMP_EUNKADDR); 3363 3364 state = (addrmatchp->al_flags & IFF_UP) ? IPMP_ADDR_UP : IPMP_ADDR_DOWN; 3365 if (state == IPMP_ADDR_UP) { 3366 ifsock = (ssp->ss_family == AF_INET) ? ifsock_v4 : ifsock_v6; 3367 (void) strlcpy(lifr.lifr_name, addrmatchp->al_name, LIFNAMSIZ); 3368 if (ioctl(ifsock, SIOCGLIFBINDING, &lifr) >= 0) 3369 binding = lifr.lifr_binding; 3370 } 3371 3372 *adinfopp = ipmp_addrinfo_create(ssp, state, pg->pg_name, binding); 3373 return (*adinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3374 } 3375 3376 /* 3377 * Store a snapshot of the IPMP subsystem into a dynamically allocated 3378 * structure pointed to by `*snapp'. Returns an IPMP error code. 3379 */ 3380 unsigned int 3381 getsnap(ipmp_snap_t **snapp) 3382 { 3383 ipmp_grouplist_t *grlistp; 3384 ipmp_groupinfo_t *grinfop; 3385 ipmp_addrinfo_t *adinfop; 3386 ipmp_addrlist_t *adlistp; 3387 ipmp_ifinfo_t *ifinfop; 3388 ipmp_snap_t *snap; 3389 struct phyint *pi; 3390 unsigned int i, j; 3391 int retval; 3392 3393 snap = ipmp_snap_create(); 3394 if (snap == NULL) 3395 return (IPMP_ENOMEM); 3396 3397 /* 3398 * Add group list. 3399 */ 3400 retval = getgrouplist(&snap->sn_grlistp); 3401 if (retval != IPMP_SUCCESS) 3402 goto failed; 3403 3404 /* 3405 * Add information for each group in the list, along with all of its 3406 * data addresses. 3407 */ 3408 grlistp = snap->sn_grlistp; 3409 for (i = 0; i < grlistp->gl_ngroup; i++) { 3410 retval = getgroupinfo(grlistp->gl_groups[i], &grinfop); 3411 if (retval != IPMP_SUCCESS) 3412 goto failed; 3413 3414 retval = ipmp_snap_addgroupinfo(snap, grinfop); 3415 if (retval != IPMP_SUCCESS) { 3416 ipmp_freegroupinfo(grinfop); 3417 goto failed; 3418 } 3419 3420 adlistp = grinfop->gr_adlistp; 3421 for (j = 0; j < adlistp->al_naddr; j++) { 3422 retval = getgraddrinfo(grinfop->gr_name, 3423 &adlistp->al_addrs[j], &adinfop); 3424 if (retval != IPMP_SUCCESS) 3425 goto failed; 3426 3427 retval = ipmp_snap_addaddrinfo(snap, adinfop); 3428 if (retval != IPMP_SUCCESS) { 3429 ipmp_freeaddrinfo(adinfop); 3430 goto failed; 3431 } 3432 } 3433 } 3434 3435 /* 3436 * Add information for each configured phyint. 3437 */ 3438 for (pi = phyints; pi != NULL; pi = pi->pi_next) { 3439 retval = getifinfo(pi->pi_name, &ifinfop); 3440 if (retval != IPMP_SUCCESS) 3441 goto failed; 3442 3443 retval = ipmp_snap_addifinfo(snap, ifinfop); 3444 if (retval != IPMP_SUCCESS) { 3445 ipmp_freeifinfo(ifinfop); 3446 goto failed; 3447 } 3448 } 3449 3450 *snapp = snap; 3451 return (IPMP_SUCCESS); 3452 failed: 3453 ipmp_snap_free(snap); 3454 return (retval); 3455 } 3456