1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include "mpd_defs.h" 27 #include "mpd_tables.h" 28 29 /* 30 * Global list of phyints, phyint instances, phyint groups and the anonymous 31 * group; the latter is initialized in phyint_init(). 32 */ 33 struct phyint *phyints = NULL; 34 struct phyint_instance *phyint_instances = NULL; 35 struct phyint_group *phyint_groups = NULL; 36 struct phyint_group *phyint_anongroup; 37 38 /* 39 * Grouplist signature; initialized in phyint_init(). 40 */ 41 static uint64_t phyint_grouplistsig; 42 43 static void phyint_inst_insert(struct phyint_instance *pii); 44 static void phyint_inst_print(struct phyint_instance *pii); 45 46 static void phyint_insert(struct phyint *pi, struct phyint_group *pg); 47 static void phyint_delete(struct phyint *pi); 48 static boolean_t phyint_is_usable(struct phyint *pi); 49 50 static void logint_print(struct logint *li); 51 static void logint_insert(struct phyint_instance *pii, struct logint *li); 52 static struct logint *logint_lookup(struct phyint_instance *pii, char *li_name); 53 54 static void target_print(struct target *tg); 55 static void target_insert(struct phyint_instance *pii, struct target *tg); 56 static struct target *target_first(struct phyint_instance *pii); 57 static struct target *target_select_best(struct phyint_instance *pii); 58 static void target_flush_hosts(struct phyint_group *pg); 59 60 static void reset_pii_probes(struct phyint_instance *pii, struct target *tg); 61 62 static boolean_t phyint_inst_v6_sockinit(struct phyint_instance *pii); 63 static boolean_t phyint_inst_v4_sockinit(struct phyint_instance *pii); 64 65 static int phyint_state_event(struct phyint_group *pg, struct phyint *pi); 66 static int phyint_group_state_event(struct phyint_group *pg); 67 static int phyint_group_change_event(struct phyint_group *pg, ipmp_group_op_t); 68 static int phyint_group_member_event(struct phyint_group *pg, struct phyint *pi, 69 ipmp_if_op_t op); 70 71 static int logint_upcount(struct phyint *pi); 72 static uint64_t gensig(void); 73 74 /* Initialize any per-file global state. Returns 0 on success, -1 on failure */ 75 int 76 phyint_init(void) 77 { 78 phyint_grouplistsig = gensig(); 79 if (track_all_phyints) { 80 phyint_anongroup = phyint_group_create(""); 81 if (phyint_anongroup == NULL) 82 return (-1); 83 phyint_group_insert(phyint_anongroup); 84 } 85 return (0); 86 } 87 88 /* Return the phyint with the given name */ 89 struct phyint * 90 phyint_lookup(const char *name) 91 { 92 struct phyint *pi; 93 94 if (debug & D_PHYINT) 95 logdebug("phyint_lookup(%s)\n", name); 96 97 for (pi = phyints; pi != NULL; pi = pi->pi_next) { 98 if (strncmp(pi->pi_name, name, sizeof (pi->pi_name)) == 0) 99 break; 100 } 101 return (pi); 102 } 103 104 /* 105 * Lookup a phyint in the group that has the same hardware address as `pi', or 106 * NULL if there's none. If `online_only' is set, then only online phyints 107 * are considered when matching. Otherwise, phyints that had been offlined 108 * due to a duplicate hardware address will also be considered. 109 */ 110 static struct phyint * 111 phyint_lookup_hwaddr(struct phyint *pi, boolean_t online_only) 112 { 113 struct phyint *pi2; 114 115 if (pi->pi_group == phyint_anongroup) 116 return (NULL); 117 118 for (pi2 = pi->pi_group->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) { 119 if (pi2 == pi) 120 continue; 121 122 /* 123 * NOTE: even when online_only is B_FALSE, we ignore phyints 124 * that are administratively offline (rather than offline 125 * because they're dups); when they're brought back online, 126 * they'll be flagged as dups if need be. 127 */ 128 if (pi2->pi_state == PI_OFFLINE && 129 (online_only || !pi2->pi_hwaddrdup)) 130 continue; 131 132 if (pi2->pi_hwaddrlen == pi->pi_hwaddrlen && 133 bcmp(pi2->pi_hwaddr, pi->pi_hwaddr, pi->pi_hwaddrlen) == 0) 134 return (pi2); 135 } 136 return (NULL); 137 } 138 139 /* 140 * Respond to DLPI notifications. Currently, this only processes physical 141 * address changes for the phyint passed via `arg' by onlining or offlining 142 * phyints in the group. 143 */ 144 /* ARGSUSED */ 145 static void 146 phyint_link_notify(dlpi_handle_t dh, dlpi_notifyinfo_t *dnip, void *arg) 147 { 148 struct phyint *pi = arg; 149 struct phyint *oduppi = NULL, *duppi = NULL; 150 151 assert((dnip->dni_note & pi->pi_notes) != 0); 152 153 if (dnip->dni_note != DL_NOTE_PHYS_ADDR) 154 return; 155 156 assert(dnip->dni_physaddrlen <= DLPI_PHYSADDR_MAX); 157 158 /* 159 * If our hardware address hasn't changed, there's nothing to do. 160 */ 161 if (pi->pi_hwaddrlen == dnip->dni_physaddrlen && 162 bcmp(pi->pi_hwaddr, dnip->dni_physaddr, pi->pi_hwaddrlen) == 0) 163 return; 164 165 oduppi = phyint_lookup_hwaddr(pi, _B_FALSE); 166 pi->pi_hwaddrlen = dnip->dni_physaddrlen; 167 (void) memcpy(pi->pi_hwaddr, dnip->dni_physaddr, pi->pi_hwaddrlen); 168 duppi = phyint_lookup_hwaddr(pi, _B_FALSE); 169 170 if (oduppi != NULL || pi->pi_hwaddrdup) { 171 /* 172 * Our old hardware address was a duplicate. If we'd been 173 * offlined because of it, and our new hardware address is not 174 * a duplicate, then bring us online. Otherwise, `oduppi' 175 * must've been the one brought offline; bring it online. 176 */ 177 if (pi->pi_hwaddrdup) { 178 if (duppi == NULL) 179 (void) phyint_undo_offline(pi); 180 } else { 181 assert(oduppi->pi_hwaddrdup); 182 (void) phyint_undo_offline(oduppi); 183 } 184 } 185 186 if (duppi != NULL && !pi->pi_hwaddrdup) { 187 /* 188 * Our new hardware address was a duplicate and we're not 189 * yet flagged as a duplicate; bring us offline. 190 */ 191 pi->pi_hwaddrdup = _B_TRUE; 192 (void) phyint_offline(pi, 0); 193 } 194 } 195 196 /* 197 * Initialize information about the underlying link for `pi', and set us 198 * up to be notified about future changes. Returns _B_TRUE on success. 199 */ 200 boolean_t 201 phyint_link_init(struct phyint *pi) 202 { 203 int retval; 204 uint_t notes; 205 const char *errmsg; 206 dlpi_notifyid_t id; 207 208 pi->pi_notes = 0; 209 retval = dlpi_open(pi->pi_name, &pi->pi_dh, 0); 210 if (retval != DLPI_SUCCESS) { 211 pi->pi_dh = NULL; 212 errmsg = "cannot open"; 213 goto failed; 214 } 215 216 pi->pi_hwaddrlen = DLPI_PHYSADDR_MAX; 217 retval = dlpi_get_physaddr(pi->pi_dh, DL_CURR_PHYS_ADDR, pi->pi_hwaddr, 218 &pi->pi_hwaddrlen); 219 if (retval != DLPI_SUCCESS) { 220 errmsg = "cannot get hardware address"; 221 goto failed; 222 } 223 224 retval = dlpi_bind(pi->pi_dh, DLPI_ANY_SAP, NULL); 225 if (retval != DLPI_SUCCESS) { 226 errmsg = "cannot bind to DLPI_ANY_SAP"; 227 goto failed; 228 } 229 230 /* 231 * Check if the link supports DLPI link state notifications. For 232 * historical reasons, the actual changes are tracked through routing 233 * sockets, so we immediately disable the notification upon success. 234 */ 235 notes = DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN; 236 retval = dlpi_enabnotify(pi->pi_dh, notes, phyint_link_notify, pi, &id); 237 if (retval == DLPI_SUCCESS) { 238 (void) dlpi_disabnotify(pi->pi_dh, id, NULL); 239 pi->pi_notes |= notes; 240 } 241 242 /* 243 * Enable notification of hardware address changes to keep pi_hwaddr 244 * up-to-date and track if we need to offline/undo-offline phyints. 245 */ 246 notes = DL_NOTE_PHYS_ADDR; 247 retval = dlpi_enabnotify(pi->pi_dh, notes, phyint_link_notify, pi, &id); 248 if (retval == DLPI_SUCCESS && poll_add(dlpi_fd(pi->pi_dh)) == 0) 249 pi->pi_notes |= notes; 250 251 return (_B_TRUE); 252 failed: 253 logerr("%s: %s: %s\n", pi->pi_name, errmsg, dlpi_strerror(retval)); 254 if (pi->pi_dh != NULL) { 255 dlpi_close(pi->pi_dh); 256 pi->pi_dh = NULL; 257 } 258 return (_B_FALSE); 259 } 260 261 /* 262 * Close use of link on `pi'. 263 */ 264 void 265 phyint_link_close(struct phyint *pi) 266 { 267 if (pi->pi_notes & DL_NOTE_PHYS_ADDR) { 268 (void) poll_remove(dlpi_fd(pi->pi_dh)); 269 pi->pi_notes &= ~DL_NOTE_PHYS_ADDR; 270 } 271 272 /* 273 * NOTE: we don't clear pi_notes here so that iflinkstate() can still 274 * properly report the link state even when offline (which is possible 275 * since we use IFF_RUNNING to track link state). 276 */ 277 dlpi_close(pi->pi_dh); 278 pi->pi_dh = NULL; 279 } 280 281 /* Return the phyint instance with the given name and the given family */ 282 struct phyint_instance * 283 phyint_inst_lookup(int af, char *name) 284 { 285 struct phyint *pi; 286 287 if (debug & D_PHYINT) 288 logdebug("phyint_inst_lookup(%s %s)\n", AF_STR(af), name); 289 290 assert(af == AF_INET || af == AF_INET6); 291 292 pi = phyint_lookup(name); 293 if (pi == NULL) 294 return (NULL); 295 296 return (PHYINT_INSTANCE(pi, af)); 297 } 298 299 struct phyint_group * 300 phyint_group_lookup(const char *pg_name) 301 { 302 struct phyint_group *pg; 303 304 if (debug & D_PHYINT) 305 logdebug("phyint_group_lookup(%s)\n", pg_name); 306 307 for (pg = phyint_groups; pg != NULL; pg = pg->pg_next) { 308 if (strncmp(pg->pg_name, pg_name, sizeof (pg->pg_name)) == 0) 309 break; 310 } 311 return (pg); 312 } 313 314 /* 315 * Insert the phyint in the linked list of all phyints. If the phyint belongs 316 * to some group, insert it in the phyint group list. 317 */ 318 static void 319 phyint_insert(struct phyint *pi, struct phyint_group *pg) 320 { 321 if (debug & D_PHYINT) 322 logdebug("phyint_insert(%s '%s')\n", pi->pi_name, pg->pg_name); 323 324 /* Insert the phyint at the head of the 'all phyints' list */ 325 pi->pi_next = phyints; 326 pi->pi_prev = NULL; 327 if (phyints != NULL) 328 phyints->pi_prev = pi; 329 phyints = pi; 330 331 /* 332 * Insert the phyint at the head of the 'phyint_group members' list 333 * of the phyint group to which it belongs. 334 */ 335 pi->pi_pgnext = NULL; 336 pi->pi_pgprev = NULL; 337 pi->pi_group = pg; 338 339 pi->pi_pgnext = pg->pg_phyint; 340 if (pi->pi_pgnext != NULL) 341 pi->pi_pgnext->pi_pgprev = pi; 342 pg->pg_phyint = pi; 343 344 /* Refresh the group state now that this phyint has been added */ 345 phyint_group_refresh_state(pg); 346 347 pg->pg_sig++; 348 (void) phyint_group_member_event(pg, pi, IPMP_IF_ADD); 349 } 350 351 /* Insert the phyint instance in the linked list of all phyint instances. */ 352 static void 353 phyint_inst_insert(struct phyint_instance *pii) 354 { 355 if (debug & D_PHYINT) { 356 logdebug("phyint_inst_insert(%s %s)\n", 357 AF_STR(pii->pii_af), pii->pii_name); 358 } 359 360 /* 361 * Insert the phyint at the head of the 'all phyint instances' list. 362 */ 363 pii->pii_next = phyint_instances; 364 pii->pii_prev = NULL; 365 if (phyint_instances != NULL) 366 phyint_instances->pii_prev = pii; 367 phyint_instances = pii; 368 } 369 370 /* 371 * Create a new phyint with the given parameters. Also insert it into 372 * the list of all phyints and the list of phyint group members by calling 373 * phyint_insert(). 374 */ 375 static struct phyint * 376 phyint_create(char *pi_name, struct phyint_group *pg, uint_t ifindex, 377 uint64_t flags) 378 { 379 struct phyint *pi; 380 381 pi = calloc(1, sizeof (struct phyint)); 382 if (pi == NULL) { 383 logperror("phyint_create: calloc"); 384 return (NULL); 385 } 386 387 /* 388 * Record the phyint values. 389 */ 390 (void) strlcpy(pi->pi_name, pi_name, sizeof (pi->pi_name)); 391 pi->pi_taddrthresh = getcurrentsec() + TESTADDR_CONF_TIME; 392 pi->pi_ifindex = ifindex; 393 pi->pi_icmpid = htons(((getpid() & 0xFF) << 8) | (ifindex & 0xFF)); 394 395 /* 396 * If the interface is offline, we set the state to PI_OFFLINE. 397 * Otherwise, we optimistically start in the PI_RUNNING state. Later 398 * (in process_link_state_changes()), we will adjust this to match the 399 * current state of the link. Further, if test addresses are 400 * subsequently assigned, we will transition to PI_NOTARGETS and then 401 * to either PI_RUNNING or PI_FAILED depending on the probe results. 402 */ 403 pi->pi_state = (flags & IFF_OFFLINE) ? PI_OFFLINE : PI_RUNNING; 404 pi->pi_flags = PHYINT_FLAGS(flags); 405 406 /* 407 * Initialise the link state. The link state is initialised to 408 * up, so that if the link is down when IPMP starts monitoring 409 * the interface, it will appear as though there has been a 410 * transition from the link up to link down. This avoids 411 * having to treat this situation as a special case. 412 */ 413 INIT_LINK_STATE(pi); 414 415 if (!phyint_link_init(pi)) { 416 free(pi); 417 return (NULL); 418 } 419 420 /* 421 * Insert the phyint in the list of all phyints, and the 422 * list of phyint group members 423 */ 424 phyint_insert(pi, pg); 425 426 return (pi); 427 } 428 429 /* 430 * Create a new phyint instance belonging to the phyint 'pi' and address 431 * family 'af'. Also insert it into the list of all phyint instances by 432 * calling phyint_inst_insert(). 433 */ 434 static struct phyint_instance * 435 phyint_inst_create(struct phyint *pi, int af) 436 { 437 struct phyint_instance *pii; 438 439 pii = calloc(1, sizeof (struct phyint_instance)); 440 if (pii == NULL) { 441 logperror("phyint_inst_create: calloc"); 442 return (NULL); 443 } 444 445 /* 446 * Attach the phyint instance to the phyint. 447 * Set the back pointers as well 448 */ 449 pii->pii_phyint = pi; 450 if (af == AF_INET) 451 pi->pi_v4 = pii; 452 else 453 pi->pi_v6 = pii; 454 455 pii->pii_in_use = 1; 456 pii->pii_probe_sock = -1; 457 pii->pii_snxt = 1; 458 pii->pii_af = af; 459 pii->pii_fd_hrtime = gethrtime() + 460 (FAILURE_DETECTION_QP * (hrtime_t)NANOSEC); 461 pii->pii_flags = pi->pi_flags; 462 463 /* Insert the phyint instance in the list of all phyint instances. */ 464 phyint_inst_insert(pii); 465 return (pii); 466 } 467 468 /* 469 * Change the state of phyint `pi' to state `state'. 470 */ 471 void 472 phyint_chstate(struct phyint *pi, enum pi_state state) 473 { 474 /* 475 * To simplify things, some callers always set a given state 476 * regardless of the previous state of the phyint (e.g., setting 477 * PI_RUNNING when it's already set). We shouldn't bother 478 * generating an event or consuming a signature for these, since 479 * the actual state of the interface is unchanged. 480 */ 481 if (pi->pi_state == state) 482 return; 483 484 pi->pi_state = state; 485 phyint_changed(pi); 486 } 487 488 /* 489 * Note that `pi' has changed state. 490 */ 491 void 492 phyint_changed(struct phyint *pi) 493 { 494 pi->pi_group->pg_sig++; 495 (void) phyint_state_event(pi->pi_group, pi); 496 } 497 498 /* 499 * Insert the phyint group in the linked list of all phyint groups 500 * at the head of the list 501 */ 502 void 503 phyint_group_insert(struct phyint_group *pg) 504 { 505 pg->pg_next = phyint_groups; 506 pg->pg_prev = NULL; 507 if (phyint_groups != NULL) 508 phyint_groups->pg_prev = pg; 509 phyint_groups = pg; 510 511 phyint_grouplistsig++; 512 (void) phyint_group_change_event(pg, IPMP_GROUP_ADD); 513 } 514 515 /* 516 * Create a new phyint group called 'name'. 517 */ 518 struct phyint_group * 519 phyint_group_create(const char *name) 520 { 521 struct phyint_group *pg; 522 523 if (debug & D_PHYINT) 524 logdebug("phyint_group_create(%s)\n", name); 525 526 pg = calloc(1, sizeof (struct phyint_group)); 527 if (pg == NULL) { 528 logperror("phyint_group_create: calloc"); 529 return (NULL); 530 } 531 532 (void) strlcpy(pg->pg_name, name, sizeof (pg->pg_name)); 533 pg->pg_sig = gensig(); 534 pg->pg_fdt = user_failure_detection_time; 535 pg->pg_probeint = user_probe_interval; 536 pg->pg_in_use = _B_TRUE; 537 538 /* 539 * Normal groups always start in the PG_FAILED state since they 540 * have no active interfaces. In contrast, anonymous groups are 541 * heterogeneous and thus always PG_OK. 542 */ 543 pg->pg_state = (name[0] == '\0' ? PG_OK : PG_FAILED); 544 545 return (pg); 546 } 547 548 /* 549 * Change the state of the phyint group `pg' to state `state'. 550 */ 551 void 552 phyint_group_chstate(struct phyint_group *pg, enum pg_state state) 553 { 554 assert(pg != phyint_anongroup); 555 556 /* 557 * To simplify things, some callers always set a given state 558 * regardless of the previous state of the group (e.g., setting 559 * PG_DEGRADED when it's already set). We shouldn't bother 560 * generating an event or consuming a signature for these, since 561 * the actual state of the group is unchanged. 562 */ 563 if (pg->pg_state == state) 564 return; 565 566 pg->pg_state = state; 567 568 switch (state) { 569 case PG_FAILED: 570 /* 571 * We can never know with certainty that a group has 572 * failed. It is possible that all known targets have 573 * failed simultaneously, and new targets have come up 574 * instead. If the targets are routers then router 575 * discovery will kick in, and we will see the new routers 576 * thru routing socket messages. But if the targets are 577 * hosts, we have to discover it by multicast. So flush 578 * all the host targets. The next probe will send out a 579 * multicast echo request. If this is a group failure, we 580 * will still not see any response, otherwise the group 581 * will be repaired after we get NUM_PROBE_REPAIRS 582 * consecutive unicast replies on any phyint. 583 */ 584 target_flush_hosts(pg); 585 break; 586 587 case PG_OK: 588 case PG_DEGRADED: 589 break; 590 591 default: 592 logerr("phyint_group_chstate: invalid group state %d; " 593 "aborting\n", state); 594 abort(); 595 } 596 597 pg->pg_sig++; 598 (void) phyint_group_state_event(pg); 599 } 600 601 /* 602 * Create a new phyint instance and initialize it from the values supplied by 603 * the kernel. Always check for ENXIO before logging any error, because the 604 * interface could have vanished after completion of SIOCGLIFCONF. 605 * Return values: 606 * pointer to the phyint instance on success 607 * NULL on failure Eg. if the phyint instance is not found in the kernel 608 */ 609 struct phyint_instance * 610 phyint_inst_init_from_k(int af, char *pi_name) 611 { 612 char pg_name[LIFNAMSIZ + 1]; 613 int ifsock; 614 uint_t ifindex; 615 uint64_t flags; 616 struct lifreq lifr; 617 struct phyint *pi; 618 struct phyint_instance *pii; 619 boolean_t pi_created; 620 struct phyint_group *pg; 621 622 retry: 623 pii = NULL; 624 pi = NULL; 625 pg = NULL; 626 pi_created = _B_FALSE; 627 628 if (debug & D_PHYINT) { 629 logdebug("phyint_inst_init_from_k(%s %s)\n", 630 AF_STR(af), pi_name); 631 } 632 633 assert(af == AF_INET || af == AF_INET6); 634 635 /* Get the socket for doing ioctls */ 636 ifsock = (af == AF_INET) ? ifsock_v4 : ifsock_v6; 637 638 /* 639 * Get the interface flags. Ignore virtual interfaces, IPMP 640 * meta-interfaces, point-to-point interfaces, and interfaces 641 * that can't support multicast. 642 */ 643 (void) strlcpy(lifr.lifr_name, pi_name, sizeof (lifr.lifr_name)); 644 if (ioctl(ifsock, SIOCGLIFFLAGS, (char *)&lifr) < 0) { 645 if (errno != ENXIO) { 646 logperror("phyint_inst_init_from_k:" 647 " ioctl (get flags)"); 648 } 649 return (NULL); 650 } 651 flags = lifr.lifr_flags; 652 if (!(flags & IFF_MULTICAST) || 653 (flags & (IFF_VIRTUAL|IFF_IPMP|IFF_POINTOPOINT))) 654 return (NULL); 655 656 /* 657 * Get the ifindex for recording later in our tables, in case we need 658 * to create a new phyint. 659 */ 660 if (ioctl(ifsock, SIOCGLIFINDEX, (char *)&lifr) < 0) { 661 if (errno != ENXIO) { 662 logperror("phyint_inst_init_from_k: " 663 " ioctl (get lifindex)"); 664 } 665 return (NULL); 666 } 667 ifindex = lifr.lifr_index; 668 669 /* 670 * Get the phyint group name of this phyint, from the kernel. 671 */ 672 if (ioctl(ifsock, SIOCGLIFGROUPNAME, (char *)&lifr) < 0) { 673 if (errno != ENXIO) { 674 logperror("phyint_inst_init_from_k: " 675 "ioctl (get group name)"); 676 } 677 return (NULL); 678 } 679 (void) strlcpy(pg_name, lifr.lifr_groupname, sizeof (pg_name)); 680 681 /* 682 * If the phyint is not part of any group, pg_name is the 683 * null string. If 'track_all_phyints' is false, there is no 684 * need to create a phyint. 685 */ 686 if (pg_name[0] == '\0' && !track_all_phyints) { 687 /* 688 * If the IFF_FAILED, IFF_INACTIVE, or IFF_OFFLINE flags are 689 * set, reset them. These flags shouldn't be set if in.mpathd 690 * isn't tracking the interface. 691 */ 692 if ((flags & (IFF_FAILED | IFF_INACTIVE | IFF_OFFLINE))) { 693 lifr.lifr_flags = flags & 694 ~(IFF_FAILED | IFF_INACTIVE | IFF_OFFLINE); 695 if (ioctl(ifsock, SIOCSLIFFLAGS, (char *)&lifr) < 0) { 696 if (errno != ENXIO) { 697 logperror("phyint_inst_init_from_k:" 698 " ioctl (set flags)"); 699 } 700 } 701 } 702 return (NULL); 703 } 704 705 /* 706 * We need to create a new phyint instance. We may also need to 707 * create the group if e.g. the SIOCGLIFCONF loop in initifs() found 708 * an underlying interface before it found its IPMP meta-interface. 709 * Note that we keep any created groups even if phyint_inst_from_k() 710 * fails since a group's existence is not dependent on the ability of 711 * in.mpathd to the track the group's interfaces. 712 */ 713 if ((pg = phyint_group_lookup(pg_name)) == NULL) { 714 if ((pg = phyint_group_create(pg_name)) == NULL) { 715 logerr("phyint_inst_init_from_k: cannot create group " 716 "%s\n", pg_name); 717 return (NULL); 718 } 719 phyint_group_insert(pg); 720 } 721 722 /* 723 * Lookup the phyint. If the phyint does not exist create it. 724 */ 725 pi = phyint_lookup(pi_name); 726 if (pi == NULL) { 727 pi = phyint_create(pi_name, pg, ifindex, flags); 728 if (pi == NULL) { 729 logerr("phyint_inst_init_from_k:" 730 " unable to create phyint %s\n", pi_name); 731 return (NULL); 732 } 733 pi_created = _B_TRUE; 734 } else { 735 /* The phyint exists already. */ 736 assert(pi_created == _B_FALSE); 737 /* 738 * Normally we should see consistent values for the IPv4 and 739 * IPv6 instances, for phyint properties. If we don't, it 740 * means things have changed underneath us, and we should 741 * resync our tables with the kernel. Check whether the 742 * interface index has changed. If so, it is most likely 743 * the interface has been unplumbed and replumbed, 744 * while we are yet to update our tables. Do it now. 745 */ 746 if (pi->pi_ifindex != ifindex) { 747 phyint_inst_delete(PHYINT_INSTANCE(pi, AF_OTHER(af))); 748 goto retry; 749 } 750 assert(PHYINT_INSTANCE(pi, af) == NULL); 751 752 /* 753 * If the group name seen by the IPv4 and IPv6 instances 754 * are different, it is most likely the groupname has 755 * changed, while we are yet to update our tables. Do it now. 756 */ 757 if (strcmp(pi->pi_group->pg_name, pg_name) != 0) { 758 phyint_inst_delete(PHYINT_INSTANCE(pi, 759 AF_OTHER(af))); 760 goto retry; 761 } 762 } 763 764 /* 765 * Create a new phyint instance, corresponding to the 'af' 766 * passed in. 767 */ 768 pii = phyint_inst_create(pi, af); 769 if (pii == NULL) { 770 logerr("phyint_inst_init_from_k: unable to create" 771 "phyint inst %s\n", pi->pi_name); 772 if (pi_created) 773 phyint_delete(pi); 774 775 return (NULL); 776 } 777 778 if (pi_created) { 779 /* 780 * If this phyint does not have a unique hardware address in its 781 * group, offline it. (The change_pif_flags() implementation 782 * requires that we defer this until after the phyint_instance 783 * is created.) 784 */ 785 if (phyint_lookup_hwaddr(pi, _B_TRUE) != NULL) { 786 pi->pi_hwaddrdup = _B_TRUE; 787 (void) phyint_offline(pi, 0); 788 } 789 } 790 791 return (pii); 792 } 793 794 /* 795 * Bind pii_probe_sock to the address associated with pii_probe_logint. 796 * This socket will be used for sending and receiving ICMP/ICMPv6 probes to 797 * targets. Do the common part in this function, and complete the 798 * initializations by calling the protocol specific functions 799 * phyint_inst_v{4,6}_sockinit() respectively. 800 * 801 * Return values: _B_TRUE/_B_FALSE for success or failure respectively. 802 */ 803 boolean_t 804 phyint_inst_sockinit(struct phyint_instance *pii) 805 { 806 boolean_t success; 807 struct phyint_group *pg; 808 809 if (debug & D_PHYINT) { 810 logdebug("phyint_inst_sockinit(%s %s)\n", 811 AF_STR(pii->pii_af), pii->pii_name); 812 } 813 814 assert(pii->pii_probe_logint != NULL); 815 assert(pii->pii_probe_logint->li_flags & IFF_UP); 816 assert(pii->pii_probe_logint->li_flags & IFF_NOFAILOVER); 817 assert(pii->pii_af == AF_INET || pii->pii_af == AF_INET6); 818 819 /* 820 * If the socket is already bound, close pii_probe_sock 821 */ 822 if (pii->pii_probe_sock != -1) 823 close_probe_socket(pii, _B_TRUE); 824 825 /* 826 * If the phyint is not part of a named group and track_all_phyints is 827 * false, simply return. 828 */ 829 pg = pii->pii_phyint->pi_group; 830 if (pg == phyint_anongroup && !track_all_phyints) { 831 if (debug & D_PHYINT) 832 logdebug("phyint_inst_sockinit: no group\n"); 833 return (_B_FALSE); 834 } 835 836 /* 837 * Initialize the socket by calling the protocol specific function. 838 * If it succeeds, add the socket to the poll list. 839 */ 840 if (pii->pii_af == AF_INET6) 841 success = phyint_inst_v6_sockinit(pii); 842 else 843 success = phyint_inst_v4_sockinit(pii); 844 845 if (success && (poll_add(pii->pii_probe_sock) == 0)) 846 return (_B_TRUE); 847 848 /* Something failed, cleanup and return false */ 849 if (pii->pii_probe_sock != -1) 850 close_probe_socket(pii, _B_FALSE); 851 852 return (_B_FALSE); 853 } 854 855 /* 856 * IPv6 specific part in initializing the pii_probe_sock. This socket is 857 * used to send/receive ICMPv6 probe packets. 858 */ 859 static boolean_t 860 phyint_inst_v6_sockinit(struct phyint_instance *pii) 861 { 862 icmp6_filter_t filter; 863 int hopcount = 1; 864 int off = 0; 865 int on = 1; 866 struct sockaddr_in6 testaddr; 867 868 /* 869 * Open a raw socket with ICMPv6 protocol. 870 * 871 * Use IPV6_BOUND_IF to make sure that probes are sent and received on 872 * the specified phyint only. Bind to the test address to ensure that 873 * the responses are sent to the specified phyint. 874 * 875 * Set the hopcount to 1 so that probe packets are not routed. 876 * Disable multicast loopback. Set the receive filter to 877 * receive only ICMPv6 echo replies. 878 */ 879 pii->pii_probe_sock = socket(pii->pii_af, SOCK_RAW, IPPROTO_ICMPV6); 880 if (pii->pii_probe_sock < 0) { 881 logperror_pii(pii, "phyint_inst_v6_sockinit: socket"); 882 return (_B_FALSE); 883 } 884 885 bzero(&testaddr, sizeof (testaddr)); 886 testaddr.sin6_family = AF_INET6; 887 testaddr.sin6_port = 0; 888 testaddr.sin6_addr = pii->pii_probe_logint->li_addr; 889 890 if (bind(pii->pii_probe_sock, (struct sockaddr *)&testaddr, 891 sizeof (testaddr)) < 0) { 892 logperror_pii(pii, "phyint_inst_v6_sockinit: IPv6 bind"); 893 return (_B_FALSE); 894 } 895 896 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_IF, 897 (char *)&pii->pii_ifindex, sizeof (uint_t)) < 0) { 898 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 899 " IPV6_MULTICAST_IF"); 900 return (_B_FALSE); 901 } 902 903 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_BOUND_IF, 904 &pii->pii_ifindex, sizeof (uint_t)) < 0) { 905 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 906 " IPV6_BOUND_IF"); 907 return (_B_FALSE); 908 } 909 910 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 911 (char *)&hopcount, sizeof (hopcount)) < 0) { 912 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 913 " IPV6_UNICAST_HOPS"); 914 return (_B_FALSE); 915 } 916 917 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, 918 (char *)&hopcount, sizeof (hopcount)) < 0) { 919 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 920 " IPV6_MULTICAST_HOPS"); 921 return (_B_FALSE); 922 } 923 924 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, 925 (char *)&off, sizeof (off)) < 0) { 926 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 927 " IPV6_MULTICAST_LOOP"); 928 return (_B_FALSE); 929 } 930 931 /* 932 * Filter out so that we only receive ICMP echo replies 933 */ 934 ICMP6_FILTER_SETBLOCKALL(&filter); 935 ICMP6_FILTER_SETPASS(ICMP6_ECHO_REPLY, &filter); 936 937 if (setsockopt(pii->pii_probe_sock, IPPROTO_ICMPV6, ICMP6_FILTER, 938 (char *)&filter, sizeof (filter)) < 0) { 939 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 940 " ICMP6_FILTER"); 941 return (_B_FALSE); 942 } 943 944 /* Enable receipt of hoplimit */ 945 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, 946 &on, sizeof (on)) < 0) { 947 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 948 " IPV6_RECVHOPLIMIT"); 949 return (_B_FALSE); 950 } 951 952 /* Enable receipt of timestamp */ 953 if (setsockopt(pii->pii_probe_sock, SOL_SOCKET, SO_TIMESTAMP, 954 &on, sizeof (on)) < 0) { 955 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 956 " SO_TIMESTAMP"); 957 return (_B_FALSE); 958 } 959 960 return (_B_TRUE); 961 } 962 963 /* 964 * IPv4 specific part in initializing the pii_probe_sock. This socket is 965 * used to send/receive ICMPv4 probe packets. 966 */ 967 static boolean_t 968 phyint_inst_v4_sockinit(struct phyint_instance *pii) 969 { 970 struct sockaddr_in testaddr; 971 char char_off = 0; 972 int ttl = 1; 973 char char_ttl = 1; 974 int on = 1; 975 976 /* 977 * Open a raw socket with ICMPv4 protocol. 978 * 979 * Use IP_BOUND_IF to make sure that probes are sent and received on 980 * the specified phyint only. Bind to the test address to ensure that 981 * the responses are sent to the specified phyint. 982 * 983 * Set the ttl to 1 so that probe packets are not routed. 984 * Disable multicast loopback. Enable receipt of timestamp. 985 */ 986 pii->pii_probe_sock = socket(pii->pii_af, SOCK_RAW, IPPROTO_ICMP); 987 if (pii->pii_probe_sock < 0) { 988 logperror_pii(pii, "phyint_inst_v4_sockinit: socket"); 989 return (_B_FALSE); 990 } 991 992 bzero(&testaddr, sizeof (testaddr)); 993 testaddr.sin_family = AF_INET; 994 testaddr.sin_port = 0; 995 IN6_V4MAPPED_TO_INADDR(&pii->pii_probe_logint->li_addr, 996 &testaddr.sin_addr); 997 998 if (bind(pii->pii_probe_sock, (struct sockaddr *)&testaddr, 999 sizeof (testaddr)) < 0) { 1000 logperror_pii(pii, "phyint_inst_v4_sockinit: IPv4 bind"); 1001 return (_B_FALSE); 1002 } 1003 1004 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_BOUND_IF, 1005 &pii->pii_ifindex, sizeof (uint_t)) < 0) { 1006 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1007 " IP_BOUND_IF"); 1008 return (_B_FALSE); 1009 } 1010 1011 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_IF, 1012 (char *)&testaddr.sin_addr, sizeof (struct in_addr)) < 0) { 1013 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1014 " IP_MULTICAST_IF"); 1015 return (_B_FALSE); 1016 } 1017 1018 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_TTL, 1019 (char *)&ttl, sizeof (ttl)) < 0) { 1020 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1021 " IP_TTL"); 1022 return (_B_FALSE); 1023 } 1024 1025 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_LOOP, 1026 (char *)&char_off, sizeof (char_off)) == -1) { 1027 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1028 " IP_MULTICAST_LOOP"); 1029 return (_B_FALSE); 1030 } 1031 1032 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_TTL, 1033 (char *)&char_ttl, sizeof (char_ttl)) == -1) { 1034 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1035 " IP_MULTICAST_TTL"); 1036 return (_B_FALSE); 1037 } 1038 1039 if (setsockopt(pii->pii_probe_sock, SOL_SOCKET, SO_TIMESTAMP, &on, 1040 sizeof (on)) < 0) { 1041 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1042 " SO_TIMESTAMP"); 1043 return (_B_FALSE); 1044 } 1045 1046 return (_B_TRUE); 1047 } 1048 1049 /* 1050 * Remove the phyint group from the list of 'all phyint groups' 1051 * and free it. 1052 */ 1053 void 1054 phyint_group_delete(struct phyint_group *pg) 1055 { 1056 /* 1057 * The anonymous group always exists, even when empty. 1058 */ 1059 if (pg == phyint_anongroup) 1060 return; 1061 1062 if (debug & D_PHYINT) 1063 logdebug("phyint_group_delete('%s')\n", pg->pg_name); 1064 1065 /* 1066 * The phyint group must be empty, and must not have any phyints. 1067 * The phyint group must be in the list of all phyint groups 1068 */ 1069 assert(pg->pg_phyint == NULL); 1070 assert(phyint_groups == pg || pg->pg_prev != NULL); 1071 1072 if (pg->pg_prev != NULL) 1073 pg->pg_prev->pg_next = pg->pg_next; 1074 else 1075 phyint_groups = pg->pg_next; 1076 1077 if (pg->pg_next != NULL) 1078 pg->pg_next->pg_prev = pg->pg_prev; 1079 1080 pg->pg_next = NULL; 1081 pg->pg_prev = NULL; 1082 1083 phyint_grouplistsig++; 1084 (void) phyint_group_change_event(pg, IPMP_GROUP_REMOVE); 1085 1086 addrlist_free(&pg->pg_addrs); 1087 free(pg); 1088 } 1089 1090 /* 1091 * Refresh the state of `pg' based on its current members. 1092 */ 1093 void 1094 phyint_group_refresh_state(struct phyint_group *pg) 1095 { 1096 enum pg_state state; 1097 enum pg_state origstate = pg->pg_state; 1098 struct phyint *pi, *usablepi; 1099 uint_t nif = 0, nusable = 0; 1100 1101 /* 1102 * Anonymous groups never change state. 1103 */ 1104 if (pg == phyint_anongroup) 1105 return; 1106 1107 for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) { 1108 nif++; 1109 if (phyint_is_usable(pi)) { 1110 nusable++; 1111 usablepi = pi; 1112 } 1113 } 1114 1115 if (nusable == 0) 1116 state = PG_FAILED; 1117 else if (nif == nusable) 1118 state = PG_OK; 1119 else 1120 state = PG_DEGRADED; 1121 1122 phyint_group_chstate(pg, state); 1123 1124 /* 1125 * If we're shutting down, skip logging messages since otherwise our 1126 * shutdown housecleaning will make us report that groups are unusable. 1127 */ 1128 if (cleanup_started) 1129 return; 1130 1131 /* 1132 * NOTE: We use pg_failmsg_printed rather than origstate since 1133 * otherwise at startup we'll log a "now usable" message when the 1134 * first usable phyint is added to an empty group. 1135 */ 1136 if (state != PG_FAILED && pg->pg_failmsg_printed) { 1137 assert(origstate == PG_FAILED); 1138 logerr("At least 1 IP interface (%s) in group %s is now " 1139 "usable\n", usablepi->pi_name, pg->pg_name); 1140 pg->pg_failmsg_printed = _B_FALSE; 1141 } else if (origstate != PG_FAILED && state == PG_FAILED) { 1142 logerr("All IP interfaces in group %s are now unusable\n", 1143 pg->pg_name); 1144 pg->pg_failmsg_printed = _B_TRUE; 1145 } 1146 } 1147 1148 /* 1149 * Extract information from the kernel about the desired phyint. 1150 * Look only for properties of the phyint and not properties of logints. 1151 * Take appropriate action on the changes. 1152 * Return codes: 1153 * PI_OK 1154 * The phyint exists in the kernel and matches our knowledge 1155 * of the phyint. 1156 * PI_DELETED 1157 * The phyint has vanished in the kernel. 1158 * PI_IFINDEX_CHANGED 1159 * The phyint's interface index has changed. 1160 * Ask the caller to delete and recreate the phyint. 1161 * PI_IOCTL_ERROR 1162 * Some ioctl error. Don't change anything. 1163 * PI_GROUP_CHANGED 1164 * The phyint has changed group. 1165 */ 1166 int 1167 phyint_inst_update_from_k(struct phyint_instance *pii) 1168 { 1169 struct lifreq lifr; 1170 int ifsock; 1171 struct phyint *pi; 1172 1173 pi = pii->pii_phyint; 1174 1175 if (debug & D_PHYINT) { 1176 logdebug("phyint_inst_update_from_k(%s %s)\n", 1177 AF_STR(pii->pii_af), pi->pi_name); 1178 } 1179 1180 /* 1181 * Get the ifindex from the kernel, for comparison with the 1182 * value in our tables. 1183 */ 1184 (void) strncpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name)); 1185 lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0'; 1186 1187 ifsock = (pii->pii_af == AF_INET) ? ifsock_v4 : ifsock_v6; 1188 if (ioctl(ifsock, SIOCGLIFINDEX, &lifr) < 0) { 1189 if (errno == ENXIO) { 1190 return (PI_DELETED); 1191 } else { 1192 logperror_pii(pii, "phyint_inst_update_from_k:" 1193 " ioctl (get lifindex)"); 1194 return (PI_IOCTL_ERROR); 1195 } 1196 } 1197 1198 if (lifr.lifr_index != pi->pi_ifindex) { 1199 /* 1200 * The index has changed. Most likely the interface has 1201 * been unplumbed and replumbed. Ask the caller to take 1202 * appropriate action. 1203 */ 1204 if (debug & D_PHYINT) { 1205 logdebug("phyint_inst_update_from_k:" 1206 " old index %d new index %d\n", 1207 pi->pi_ifindex, lifr.lifr_index); 1208 } 1209 return (PI_IFINDEX_CHANGED); 1210 } 1211 1212 /* 1213 * Get the group name from the kernel, for comparison with 1214 * the value in our tables. 1215 */ 1216 if (ioctl(ifsock, SIOCGLIFGROUPNAME, &lifr) < 0) { 1217 if (errno == ENXIO) { 1218 return (PI_DELETED); 1219 } else { 1220 logperror_pii(pii, "phyint_inst_update_from_k:" 1221 " ioctl (get groupname)"); 1222 return (PI_IOCTL_ERROR); 1223 } 1224 } 1225 1226 /* 1227 * If the phyint has changed group i.e. if the phyint group name 1228 * returned by the kernel is different, ask the caller to delete 1229 * and recreate the phyint in the right group 1230 */ 1231 if (strcmp(lifr.lifr_groupname, pi->pi_group->pg_name) != 0) { 1232 /* Groupname has changed */ 1233 if (debug & D_PHYINT) { 1234 logdebug("phyint_inst_update_from_k:" 1235 " groupname change\n"); 1236 } 1237 return (PI_GROUP_CHANGED); 1238 } 1239 1240 /* 1241 * Get the current phyint flags from the kernel, and determine what 1242 * flags have changed by comparing against our tables. Note that the 1243 * IFF_INACTIVE processing in initifs() relies on this call to ensure 1244 * that IFF_INACTIVE is really still set on the interface. 1245 */ 1246 if (ioctl(ifsock, SIOCGLIFFLAGS, &lifr) < 0) { 1247 if (errno == ENXIO) { 1248 return (PI_DELETED); 1249 } else { 1250 logperror_pii(pii, "phyint_inst_update_from_k: " 1251 " ioctl (get flags)"); 1252 return (PI_IOCTL_ERROR); 1253 } 1254 } 1255 1256 pi->pi_flags = PHYINT_FLAGS(lifr.lifr_flags); 1257 if (pi->pi_v4 != NULL) 1258 pi->pi_v4->pii_flags = pi->pi_flags; 1259 if (pi->pi_v6 != NULL) 1260 pi->pi_v6->pii_flags = pi->pi_flags; 1261 1262 /* 1263 * Make sure the IFF_FAILED flag is set if and only if we think 1264 * the interface should be failed. 1265 */ 1266 if (pi->pi_flags & IFF_FAILED) { 1267 if (pi->pi_state == PI_RUNNING) 1268 (void) change_pif_flags(pi, 0, IFF_FAILED); 1269 } else { 1270 if (pi->pi_state == PI_FAILED) 1271 (void) change_pif_flags(pi, IFF_FAILED, IFF_INACTIVE); 1272 } 1273 1274 /* No change in phyint status */ 1275 return (PI_OK); 1276 } 1277 1278 /* 1279 * Delete the phyint. Remove it from the list of all phyints, and the 1280 * list of phyint group members. 1281 */ 1282 static void 1283 phyint_delete(struct phyint *pi) 1284 { 1285 struct phyint *pi2; 1286 struct phyint_group *pg = pi->pi_group; 1287 1288 if (debug & D_PHYINT) 1289 logdebug("phyint_delete(%s)\n", pi->pi_name); 1290 1291 /* Both IPv4 and IPv6 phyint instances must have been deleted. */ 1292 assert(pi->pi_v4 == NULL && pi->pi_v6 == NULL); 1293 1294 /* 1295 * The phyint must belong to a group. 1296 */ 1297 assert(pg->pg_phyint == pi || pi->pi_pgprev != NULL); 1298 1299 /* The phyint must be in the list of all phyints */ 1300 assert(phyints == pi || pi->pi_prev != NULL); 1301 1302 /* Remove the phyint from the phyint group list */ 1303 pg->pg_sig++; 1304 (void) phyint_group_member_event(pg, pi, IPMP_IF_REMOVE); 1305 1306 if (pi->pi_pgprev == NULL) { 1307 /* Phyint is the 1st in the phyint group list */ 1308 pg->pg_phyint = pi->pi_pgnext; 1309 } else { 1310 pi->pi_pgprev->pi_pgnext = pi->pi_pgnext; 1311 } 1312 if (pi->pi_pgnext != NULL) 1313 pi->pi_pgnext->pi_pgprev = pi->pi_pgprev; 1314 pi->pi_pgnext = NULL; 1315 pi->pi_pgprev = NULL; 1316 1317 /* Refresh the group state now that this phyint has been removed */ 1318 phyint_group_refresh_state(pg); 1319 1320 /* Remove the phyint from the global list of phyints */ 1321 if (pi->pi_prev == NULL) { 1322 /* Phyint is the 1st in the list */ 1323 phyints = pi->pi_next; 1324 } else { 1325 pi->pi_prev->pi_next = pi->pi_next; 1326 } 1327 if (pi->pi_next != NULL) 1328 pi->pi_next->pi_prev = pi->pi_prev; 1329 pi->pi_next = NULL; 1330 pi->pi_prev = NULL; 1331 1332 /* 1333 * See if another phyint in the group had been offlined because 1334 * it was a dup of `pi' -- and if so, online it. 1335 */ 1336 if (!pi->pi_hwaddrdup && 1337 (pi2 = phyint_lookup_hwaddr(pi, _B_FALSE)) != NULL) { 1338 assert(pi2->pi_hwaddrdup); 1339 (void) phyint_undo_offline(pi2); 1340 } 1341 phyint_link_close(pi); 1342 free(pi); 1343 } 1344 1345 /* 1346 * Offline phyint `pi' if at least `minred' usable interfaces remain in the 1347 * group. Returns an IPMP error code. 1348 */ 1349 int 1350 phyint_offline(struct phyint *pi, uint_t minred) 1351 { 1352 unsigned int nusable = 0; 1353 struct phyint *pi2; 1354 struct phyint_group *pg = pi->pi_group; 1355 1356 /* 1357 * Verify that enough usable interfaces in the group would remain. 1358 * As a special case, if the group has failed, allow any non-offline 1359 * phyints to be offlined. 1360 */ 1361 if (pg != phyint_anongroup) { 1362 for (pi2 = pg->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) { 1363 if (pi2 == pi) 1364 continue; 1365 if (phyint_is_usable(pi2) || 1366 (GROUP_FAILED(pg) && pi2->pi_state != PI_OFFLINE)) 1367 nusable++; 1368 } 1369 } 1370 if (nusable < minred) 1371 return (IPMP_EMINRED); 1372 1373 if (!change_pif_flags(pi, IFF_OFFLINE, 0)) 1374 return (IPMP_FAILURE); 1375 1376 /* 1377 * The interface is now offline, so stop probing it. Note that 1378 * if_mpadm(1M) will down the test addresses, after receiving a 1379 * success reply from us. The routing socket message will then make us 1380 * close the socket used for sending probes. But it is more logical 1381 * that an offlined interface must not be probed, even if it has test 1382 * addresses. 1383 * 1384 * NOTE: stop_probing() also sets PI_OFFLINE. 1385 */ 1386 stop_probing(pi); 1387 1388 /* 1389 * If we're offlining the phyint because it has a duplicate hardware 1390 * address, print a warning -- and leave the link open so that we can 1391 * be notified of hardware address changes that make it usable again. 1392 * Otherwise, close the link so that we won't prevent a detach. 1393 */ 1394 if (pi->pi_hwaddrdup) { 1395 logerr("IP interface %s has a hardware address which is not " 1396 "unique in group %s; offlining\n", pi->pi_name, 1397 pg->pg_name); 1398 } else { 1399 phyint_link_close(pi); 1400 } 1401 1402 /* 1403 * If this phyint was preventing another phyint with a duplicate 1404 * hardware address from being online, bring that one online now. 1405 */ 1406 if (!pi->pi_hwaddrdup && 1407 (pi2 = phyint_lookup_hwaddr(pi, _B_FALSE)) != NULL) { 1408 assert(pi2->pi_hwaddrdup); 1409 (void) phyint_undo_offline(pi2); 1410 } 1411 1412 /* 1413 * If this interface was active, try to activate another INACTIVE 1414 * interface in the group. 1415 */ 1416 if (!(pi->pi_flags & IFF_INACTIVE)) 1417 phyint_activate_another(pi); 1418 1419 return (IPMP_SUCCESS); 1420 } 1421 1422 /* 1423 * Undo a previous offline of `pi'. Returns an IPMP error code. 1424 */ 1425 int 1426 phyint_undo_offline(struct phyint *pi) 1427 { 1428 if (pi->pi_state != PI_OFFLINE) { 1429 errno = EINVAL; 1430 return (IPMP_FAILURE); 1431 } 1432 1433 /* 1434 * If necessary, reinitialize our link information and verify that its 1435 * hardware address is still unique across the group. 1436 */ 1437 if (pi->pi_dh == NULL && !phyint_link_init(pi)) { 1438 errno = EIO; 1439 return (IPMP_FAILURE); 1440 } 1441 1442 if (phyint_lookup_hwaddr(pi, _B_TRUE) != NULL) { 1443 pi->pi_hwaddrdup = _B_TRUE; 1444 return (IPMP_EHWADDRDUP); 1445 } 1446 1447 if (pi->pi_hwaddrdup) { 1448 logerr("IP interface %s now has a unique hardware address in " 1449 "group %s; onlining\n", pi->pi_name, pi->pi_group->pg_name); 1450 pi->pi_hwaddrdup = _B_FALSE; 1451 } 1452 1453 if (!change_pif_flags(pi, 0, IFF_OFFLINE)) 1454 return (IPMP_FAILURE); 1455 1456 /* 1457 * While the interface was offline, it may have failed (e.g. the link 1458 * may have gone down). phyint_inst_check_for_failure() will have 1459 * already set pi_flags with IFF_FAILED, so we can use that to decide 1460 * whether the phyint should transition to running. Note that after 1461 * we transition to running, we will start sending probes again (if 1462 * test addresses are configured), which may also reveal that the 1463 * interface is in fact failed. 1464 */ 1465 if (pi->pi_flags & IFF_FAILED) { 1466 phyint_chstate(pi, PI_FAILED); 1467 } else { 1468 /* calls phyint_chstate() */ 1469 phyint_transition_to_running(pi); 1470 } 1471 1472 /* 1473 * Give the requestor time to configure test addresses before 1474 * complaining that they're missing. 1475 */ 1476 pi->pi_taddrthresh = getcurrentsec() + TESTADDR_CONF_TIME; 1477 1478 return (IPMP_SUCCESS); 1479 } 1480 1481 /* 1482 * Delete (unlink and free), the phyint instance. 1483 */ 1484 void 1485 phyint_inst_delete(struct phyint_instance *pii) 1486 { 1487 struct phyint *pi = pii->pii_phyint; 1488 1489 assert(pi != NULL); 1490 1491 if (debug & D_PHYINT) { 1492 logdebug("phyint_inst_delete(%s %s)\n", 1493 AF_STR(pii->pii_af), pi->pi_name); 1494 } 1495 1496 /* 1497 * If the phyint instance has associated probe targets 1498 * delete all the targets 1499 */ 1500 while (pii->pii_targets != NULL) 1501 target_delete(pii->pii_targets); 1502 1503 /* 1504 * Delete all the logints associated with this phyint 1505 * instance. 1506 */ 1507 while (pii->pii_logint != NULL) 1508 logint_delete(pii->pii_logint); 1509 1510 /* 1511 * Close the socket used to send probes to targets from this phyint. 1512 */ 1513 if (pii->pii_probe_sock != -1) 1514 close_probe_socket(pii, _B_TRUE); 1515 1516 /* 1517 * Phyint instance must be in the list of all phyint instances. 1518 * Remove phyint instance from the global list of phyint instances. 1519 */ 1520 assert(phyint_instances == pii || pii->pii_prev != NULL); 1521 if (pii->pii_prev == NULL) { 1522 /* Phyint is the 1st in the list */ 1523 phyint_instances = pii->pii_next; 1524 } else { 1525 pii->pii_prev->pii_next = pii->pii_next; 1526 } 1527 if (pii->pii_next != NULL) 1528 pii->pii_next->pii_prev = pii->pii_prev; 1529 pii->pii_next = NULL; 1530 pii->pii_prev = NULL; 1531 1532 /* 1533 * Reset the phyint instance pointer in the phyint. 1534 * If this is the last phyint instance (being deleted) on this 1535 * phyint, then delete the phyint. 1536 */ 1537 if (pii->pii_af == AF_INET) 1538 pi->pi_v4 = NULL; 1539 else 1540 pi->pi_v6 = NULL; 1541 1542 if (pi->pi_v4 == NULL && pi->pi_v6 == NULL) 1543 phyint_delete(pi); 1544 1545 free(pii); 1546 } 1547 1548 static void 1549 phyint_inst_print(struct phyint_instance *pii) 1550 { 1551 struct logint *li; 1552 struct target *tg; 1553 char abuf[INET6_ADDRSTRLEN]; 1554 int most_recent; 1555 int i; 1556 1557 if (pii->pii_phyint == NULL) { 1558 logdebug("pii->pi_phyint NULL can't print\n"); 1559 return; 1560 } 1561 1562 logdebug("\nPhyint instance: %s %s index %u state %x flags %llx " 1563 "sock %x in_use %d\n", 1564 AF_STR(pii->pii_af), pii->pii_name, pii->pii_ifindex, 1565 pii->pii_state, pii->pii_phyint->pi_flags, pii->pii_probe_sock, 1566 pii->pii_in_use); 1567 1568 for (li = pii->pii_logint; li != NULL; li = li->li_next) 1569 logint_print(li); 1570 1571 logdebug("\n"); 1572 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) 1573 target_print(tg); 1574 1575 if (pii->pii_targets == NULL) 1576 logdebug("pi_targets NULL\n"); 1577 1578 if (pii->pii_target_next != NULL) { 1579 logdebug("pi_target_next %s %s\n", AF_STR(pii->pii_af), 1580 pr_addr(pii->pii_af, pii->pii_target_next->tg_address, 1581 abuf, sizeof (abuf))); 1582 } else { 1583 logdebug("pi_target_next NULL\n"); 1584 } 1585 1586 if (pii->pii_rtt_target_next != NULL) { 1587 logdebug("pi_rtt_target_next %s %s\n", AF_STR(pii->pii_af), 1588 pr_addr(pii->pii_af, pii->pii_rtt_target_next->tg_address, 1589 abuf, sizeof (abuf))); 1590 } else { 1591 logdebug("pi_rtt_target_next NULL\n"); 1592 } 1593 1594 if (pii->pii_targets != NULL) { 1595 most_recent = PROBE_INDEX_PREV(pii->pii_probe_next); 1596 1597 i = most_recent; 1598 do { 1599 if (pii->pii_probes[i].pr_target != NULL) { 1600 logdebug("#%d target %s ", i, 1601 pr_addr(pii->pii_af, 1602 pii->pii_probes[i].pr_target->tg_address, 1603 abuf, sizeof (abuf))); 1604 } else { 1605 logdebug("#%d target NULL ", i); 1606 } 1607 logdebug("time_start %lld status %d " 1608 "time_ackproc %lld time_lost %u", 1609 pii->pii_probes[i].pr_hrtime_start, 1610 pii->pii_probes[i].pr_status, 1611 pii->pii_probes[i].pr_hrtime_ackproc, 1612 pii->pii_probes[i].pr_time_lost); 1613 i = PROBE_INDEX_PREV(i); 1614 } while (i != most_recent); 1615 } 1616 } 1617 1618 /* 1619 * Lookup a logint based on the logical interface name, on the given 1620 * phyint instance. 1621 */ 1622 static struct logint * 1623 logint_lookup(struct phyint_instance *pii, char *name) 1624 { 1625 struct logint *li; 1626 1627 if (debug & D_LOGINT) { 1628 logdebug("logint_lookup(%s, %s)\n", 1629 AF_STR(pii->pii_af), name); 1630 } 1631 1632 for (li = pii->pii_logint; li != NULL; li = li->li_next) { 1633 if (strncmp(name, li->li_name, sizeof (li->li_name)) == 0) 1634 break; 1635 } 1636 return (li); 1637 } 1638 1639 /* 1640 * Insert a logint at the head of the list of logints of the given 1641 * phyint instance 1642 */ 1643 static void 1644 logint_insert(struct phyint_instance *pii, struct logint *li) 1645 { 1646 li->li_next = pii->pii_logint; 1647 li->li_prev = NULL; 1648 if (pii->pii_logint != NULL) 1649 pii->pii_logint->li_prev = li; 1650 pii->pii_logint = li; 1651 li->li_phyint_inst = pii; 1652 } 1653 1654 /* 1655 * Create a new named logint, on the specified phyint instance. 1656 */ 1657 static struct logint * 1658 logint_create(struct phyint_instance *pii, char *name) 1659 { 1660 struct logint *li; 1661 1662 if (debug & D_LOGINT) { 1663 logdebug("logint_create(%s %s %s)\n", 1664 AF_STR(pii->pii_af), pii->pii_name, name); 1665 } 1666 1667 li = calloc(1, sizeof (struct logint)); 1668 if (li == NULL) { 1669 logperror("logint_create: calloc"); 1670 return (NULL); 1671 } 1672 1673 (void) strncpy(li->li_name, name, sizeof (li->li_name)); 1674 li->li_name[sizeof (li->li_name) - 1] = '\0'; 1675 logint_insert(pii, li); 1676 return (li); 1677 } 1678 1679 /* 1680 * Initialize the logint based on the data returned by the kernel. 1681 */ 1682 void 1683 logint_init_from_k(struct phyint_instance *pii, char *li_name) 1684 { 1685 int ifsock; 1686 uint64_t flags; 1687 uint64_t saved_flags; 1688 struct logint *li; 1689 struct lifreq lifr; 1690 struct in6_addr test_subnet; 1691 struct in6_addr testaddr; 1692 int test_subnet_len; 1693 struct sockaddr_in6 *sin6; 1694 struct sockaddr_in *sin; 1695 char abuf[INET6_ADDRSTRLEN]; 1696 boolean_t ptp = _B_FALSE; 1697 struct in6_addr tgaddr; 1698 1699 if (debug & D_LOGINT) { 1700 logdebug("logint_init_from_k(%s %s)\n", 1701 AF_STR(pii->pii_af), li_name); 1702 } 1703 1704 /* Get the socket for doing ioctls */ 1705 ifsock = (pii->pii_af == AF_INET) ? ifsock_v4 : ifsock_v6; 1706 1707 /* 1708 * Get the flags from the kernel. Also serves as a check whether 1709 * the logical still exists. If it doesn't exist, no need to proceed 1710 * any further. li_in_use will make the caller clean up the logint 1711 */ 1712 (void) strncpy(lifr.lifr_name, li_name, sizeof (lifr.lifr_name)); 1713 lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0'; 1714 if (ioctl(ifsock, SIOCGLIFFLAGS, (char *)&lifr) < 0) { 1715 /* Interface may have vanished */ 1716 if (errno != ENXIO) { 1717 logperror_pii(pii, "logint_init_from_k: " 1718 "ioctl (get flags)"); 1719 } 1720 return; 1721 } 1722 1723 flags = lifr.lifr_flags; 1724 1725 /* 1726 * Verified the logint exists. Now lookup the logint in our tables. 1727 * If it does not exist, create a new logint. 1728 */ 1729 li = logint_lookup(pii, li_name); 1730 if (li == NULL) { 1731 li = logint_create(pii, li_name); 1732 if (li == NULL) { 1733 /* 1734 * Pretend the interface does not exist 1735 * in the kernel 1736 */ 1737 return; 1738 } 1739 } 1740 1741 /* 1742 * Update li->li_flags with the new flags, after saving the old 1743 * value. This is used later to check what flags has changed and 1744 * take any action 1745 */ 1746 saved_flags = li->li_flags; 1747 li->li_flags = flags; 1748 1749 /* 1750 * Get the address, prefix, prefixlength and update the logint. 1751 * Check if anything has changed. If the logint used for the 1752 * test address has changed, take suitable action. 1753 */ 1754 if (ioctl(ifsock, SIOCGLIFADDR, (char *)&lifr) < 0) { 1755 /* Interface may have vanished */ 1756 if (errno != ENXIO) { 1757 logperror_li(li, "logint_init_from_k: (get addr)"); 1758 } 1759 goto error; 1760 } 1761 1762 if (pii->pii_af == AF_INET) { 1763 sin = (struct sockaddr_in *)&lifr.lifr_addr; 1764 IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &testaddr); 1765 } else { 1766 sin6 = (struct sockaddr_in6 *)&lifr.lifr_addr; 1767 testaddr = sin6->sin6_addr; 1768 } 1769 1770 if (ioctl(ifsock, SIOCGLIFSUBNET, (char *)&lifr) < 0) { 1771 /* Interface may have vanished */ 1772 if (errno != ENXIO) 1773 logperror_li(li, "logint_init_from_k: (get subnet)"); 1774 goto error; 1775 } 1776 if (lifr.lifr_subnet.ss_family == AF_INET6) { 1777 sin6 = (struct sockaddr_in6 *)&lifr.lifr_subnet; 1778 test_subnet = sin6->sin6_addr; 1779 test_subnet_len = lifr.lifr_addrlen; 1780 } else { 1781 sin = (struct sockaddr_in *)&lifr.lifr_subnet; 1782 IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &test_subnet); 1783 test_subnet_len = lifr.lifr_addrlen + (IPV6_ABITS - IP_ABITS); 1784 } 1785 1786 /* 1787 * If this is the logint corresponding to the test address used for 1788 * sending probes, then if anything significant has changed we need to 1789 * determine the test address again. We ignore changes to the 1790 * IFF_FAILED and IFF_RUNNING flags since those happen as a matter of 1791 * course. 1792 */ 1793 if (pii->pii_probe_logint == li) { 1794 if (((li->li_flags ^ saved_flags) & 1795 ~(IFF_FAILED | IFF_RUNNING)) != 0 || 1796 !IN6_ARE_ADDR_EQUAL(&testaddr, &li->li_addr) || 1797 (!ptp && !IN6_ARE_ADDR_EQUAL(&test_subnet, 1798 &li->li_subnet)) || 1799 (!ptp && test_subnet_len != li->li_subnet_len) || 1800 (ptp && !IN6_ARE_ADDR_EQUAL(&tgaddr, &li->li_dstaddr))) { 1801 /* 1802 * Something significant that affects the testaddress 1803 * has changed. Redo the testaddress selection later on 1804 * in select_test_ifs(). For now do the cleanup and 1805 * set pii_probe_logint to NULL. 1806 */ 1807 if (pii->pii_probe_sock != -1) 1808 close_probe_socket(pii, _B_TRUE); 1809 pii->pii_probe_logint = NULL; 1810 } 1811 } 1812 1813 1814 /* Update the logint with the values obtained from the kernel. */ 1815 li->li_addr = testaddr; 1816 li->li_in_use = 1; 1817 if (ptp) { 1818 li->li_dstaddr = tgaddr; 1819 li->li_subnet_len = (pii->pii_af == AF_INET) ? 1820 IP_ABITS : IPV6_ABITS; 1821 } else { 1822 li->li_subnet = test_subnet; 1823 li->li_subnet_len = test_subnet_len; 1824 } 1825 1826 if (debug & D_LOGINT) 1827 logint_print(li); 1828 1829 return; 1830 1831 error: 1832 logerr("logint_init_from_k: IGNORED %s %s %s addr %s\n", 1833 AF_STR(pii->pii_af), pii->pii_name, li->li_name, 1834 pr_addr(pii->pii_af, testaddr, abuf, sizeof (abuf))); 1835 logint_delete(li); 1836 } 1837 1838 /* 1839 * Delete (unlink and free) a logint. 1840 */ 1841 void 1842 logint_delete(struct logint *li) 1843 { 1844 struct phyint_instance *pii; 1845 1846 pii = li->li_phyint_inst; 1847 assert(pii != NULL); 1848 1849 if (debug & D_LOGINT) { 1850 int af; 1851 char abuf[INET6_ADDRSTRLEN]; 1852 1853 af = pii->pii_af; 1854 logdebug("logint_delete(%s %s %s/%u)\n", 1855 AF_STR(af), li->li_name, 1856 pr_addr(af, li->li_addr, abuf, sizeof (abuf)), 1857 li->li_subnet_len); 1858 } 1859 1860 /* logint must be in the list of logints */ 1861 assert(pii->pii_logint == li || li->li_prev != NULL); 1862 1863 /* Remove the logint from the list of logints */ 1864 if (li->li_prev == NULL) { 1865 /* logint is the 1st in the list */ 1866 pii->pii_logint = li->li_next; 1867 } else { 1868 li->li_prev->li_next = li->li_next; 1869 } 1870 if (li->li_next != NULL) 1871 li->li_next->li_prev = li->li_prev; 1872 li->li_next = NULL; 1873 li->li_prev = NULL; 1874 1875 /* 1876 * If this logint is also being used for probing, then close the 1877 * associated socket, if it exists. 1878 */ 1879 if (pii->pii_probe_logint == li) { 1880 if (pii->pii_probe_sock != -1) 1881 close_probe_socket(pii, _B_TRUE); 1882 pii->pii_probe_logint = NULL; 1883 } 1884 1885 free(li); 1886 } 1887 1888 static void 1889 logint_print(struct logint *li) 1890 { 1891 char abuf[INET6_ADDRSTRLEN]; 1892 int af = li->li_phyint_inst->pii_af; 1893 1894 logdebug("logint: %s %s addr %s/%u", AF_STR(af), li->li_name, 1895 pr_addr(af, li->li_addr, abuf, sizeof (abuf)), li->li_subnet_len); 1896 1897 logdebug("\tFlags: %llx in_use %d\n", li->li_flags, li->li_in_use); 1898 } 1899 1900 char * 1901 pr_addr(int af, struct in6_addr addr, char *abuf, int len) 1902 { 1903 struct in_addr addr_v4; 1904 1905 if (af == AF_INET) { 1906 IN6_V4MAPPED_TO_INADDR(&addr, &addr_v4); 1907 (void) inet_ntop(AF_INET, (void *)&addr_v4, abuf, len); 1908 } else { 1909 (void) inet_ntop(AF_INET6, (void *)&addr, abuf, len); 1910 } 1911 return (abuf); 1912 } 1913 1914 /* 1915 * Fill in the sockaddr_storage pointed to by `ssp' with the IP address 1916 * represented by the [`af',`addr'] pair. Needed because in.mpathd internally 1917 * stores all addresses as in6_addrs, but we don't want to expose that. 1918 */ 1919 void 1920 addr2storage(int af, const struct in6_addr *addr, struct sockaddr_storage *ssp) 1921 { 1922 struct sockaddr_in *sinp = (struct sockaddr_in *)ssp; 1923 struct sockaddr_in6 *sin6p = (struct sockaddr_in6 *)ssp; 1924 1925 assert(af == AF_INET || af == AF_INET6); 1926 1927 switch (af) { 1928 case AF_INET: 1929 (void) memset(sinp, 0, sizeof (*sinp)); 1930 sinp->sin_family = AF_INET; 1931 IN6_V4MAPPED_TO_INADDR(addr, &sinp->sin_addr); 1932 break; 1933 case AF_INET6: 1934 (void) memset(sin6p, 0, sizeof (*sin6p)); 1935 sin6p->sin6_family = AF_INET6; 1936 sin6p->sin6_addr = *addr; 1937 break; 1938 } 1939 } 1940 1941 /* Lookup target on its address */ 1942 struct target * 1943 target_lookup(struct phyint_instance *pii, struct in6_addr addr) 1944 { 1945 struct target *tg; 1946 1947 if (debug & D_TARGET) { 1948 char abuf[INET6_ADDRSTRLEN]; 1949 1950 logdebug("target_lookup(%s %s): addr %s\n", 1951 AF_STR(pii->pii_af), pii->pii_name, 1952 pr_addr(pii->pii_af, addr, abuf, sizeof (abuf))); 1953 } 1954 1955 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) { 1956 if (IN6_ARE_ADDR_EQUAL(&tg->tg_address, &addr)) 1957 break; 1958 } 1959 return (tg); 1960 } 1961 1962 /* 1963 * Find and return the next active target, for the next probe. 1964 * If no active targets are available, return NULL. 1965 */ 1966 struct target * 1967 target_next(struct target *tg) 1968 { 1969 struct phyint_instance *pii = tg->tg_phyint_inst; 1970 struct target *marker = tg; 1971 hrtime_t now; 1972 1973 now = gethrtime(); 1974 1975 /* 1976 * Target must be in the list of targets for this phyint 1977 * instance. 1978 */ 1979 assert(pii->pii_targets == tg || tg->tg_prev != NULL); 1980 assert(pii->pii_targets != NULL); 1981 1982 /* Return the next active target */ 1983 do { 1984 /* 1985 * Go to the next target. If we hit the end, 1986 * reset the ptr to the head 1987 */ 1988 tg = tg->tg_next; 1989 if (tg == NULL) 1990 tg = pii->pii_targets; 1991 1992 assert(TG_STATUS_VALID(tg->tg_status)); 1993 1994 switch (tg->tg_status) { 1995 case TG_ACTIVE: 1996 return (tg); 1997 1998 case TG_UNUSED: 1999 assert(pii->pii_targets_are_routers); 2000 if (pii->pii_ntargets < MAX_PROBE_TARGETS) { 2001 /* 2002 * Bubble up the unused target to active 2003 */ 2004 tg->tg_status = TG_ACTIVE; 2005 pii->pii_ntargets++; 2006 return (tg); 2007 } 2008 break; 2009 2010 case TG_SLOW: 2011 assert(pii->pii_targets_are_routers); 2012 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2013 /* 2014 * Bubble up the slow target to unused 2015 */ 2016 tg->tg_status = TG_UNUSED; 2017 } 2018 break; 2019 2020 case TG_DEAD: 2021 assert(pii->pii_targets_are_routers); 2022 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2023 /* 2024 * Bubble up the dead target to slow 2025 */ 2026 tg->tg_status = TG_SLOW; 2027 tg->tg_latime = now; 2028 } 2029 break; 2030 } 2031 2032 } while (tg != marker); 2033 2034 return (NULL); 2035 } 2036 2037 /* 2038 * Select the best available target, that is not already TG_ACTIVE, 2039 * for the caller. The caller will determine whether it wants to 2040 * make the returned target TG_ACTIVE. 2041 * The selection order is as follows. 2042 * 1. pick a TG_UNSED target, if it exists. 2043 * 2. else pick a TG_SLOW target that has recovered, if it exists 2044 * 3. else pick any TG_SLOW target, if it exists 2045 * 4. else pick a TG_DEAD target that has recovered, if it exists 2046 * 5. else pick any TG_DEAD target, if it exists 2047 * 6. else return null 2048 */ 2049 static struct target * 2050 target_select_best(struct phyint_instance *pii) 2051 { 2052 struct target *tg; 2053 struct target *slow = NULL; 2054 struct target *dead = NULL; 2055 struct target *slow_recovered = NULL; 2056 struct target *dead_recovered = NULL; 2057 hrtime_t now; 2058 2059 now = gethrtime(); 2060 2061 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) { 2062 assert(TG_STATUS_VALID(tg->tg_status)); 2063 2064 switch (tg->tg_status) { 2065 case TG_UNUSED: 2066 return (tg); 2067 2068 case TG_SLOW: 2069 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2070 slow_recovered = tg; 2071 /* 2072 * Promote the slow_recovered to unused 2073 */ 2074 tg->tg_status = TG_UNUSED; 2075 } else { 2076 slow = tg; 2077 } 2078 break; 2079 2080 case TG_DEAD: 2081 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2082 dead_recovered = tg; 2083 /* 2084 * Promote the dead_recovered to slow 2085 */ 2086 tg->tg_status = TG_SLOW; 2087 tg->tg_latime = now; 2088 } else { 2089 dead = tg; 2090 } 2091 break; 2092 2093 default: 2094 break; 2095 } 2096 } 2097 2098 if (slow_recovered != NULL) 2099 return (slow_recovered); 2100 else if (slow != NULL) 2101 return (slow); 2102 else if (dead_recovered != NULL) 2103 return (dead_recovered); 2104 else 2105 return (dead); 2106 } 2107 2108 /* 2109 * Some target was deleted. If we don't have even MIN_PROBE_TARGETS 2110 * that are active, pick the next best below. 2111 */ 2112 static void 2113 target_activate_all(struct phyint_instance *pii) 2114 { 2115 struct target *tg; 2116 2117 assert(pii->pii_ntargets == 0); 2118 assert(pii->pii_target_next == NULL); 2119 assert(pii->pii_rtt_target_next == NULL); 2120 assert(pii->pii_targets_are_routers); 2121 2122 while (pii->pii_ntargets < MIN_PROBE_TARGETS) { 2123 tg = target_select_best(pii); 2124 if (tg == NULL) { 2125 /* We are out of targets */ 2126 return; 2127 } 2128 2129 assert(TG_STATUS_VALID(tg->tg_status)); 2130 assert(tg->tg_status != TG_ACTIVE); 2131 tg->tg_status = TG_ACTIVE; 2132 pii->pii_ntargets++; 2133 if (pii->pii_target_next == NULL) { 2134 pii->pii_target_next = tg; 2135 pii->pii_rtt_target_next = tg; 2136 } 2137 } 2138 } 2139 2140 static struct target * 2141 target_first(struct phyint_instance *pii) 2142 { 2143 struct target *tg; 2144 2145 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) { 2146 assert(TG_STATUS_VALID(tg->tg_status)); 2147 if (tg->tg_status == TG_ACTIVE) 2148 break; 2149 } 2150 2151 return (tg); 2152 } 2153 2154 /* 2155 * Create a default target entry. 2156 */ 2157 void 2158 target_create(struct phyint_instance *pii, struct in6_addr addr, 2159 boolean_t is_router) 2160 { 2161 struct target *tg; 2162 struct phyint *pi; 2163 struct logint *li; 2164 2165 if (debug & D_TARGET) { 2166 char abuf[INET6_ADDRSTRLEN]; 2167 2168 logdebug("target_create(%s %s, %s)\n", 2169 AF_STR(pii->pii_af), pii->pii_name, 2170 pr_addr(pii->pii_af, addr, abuf, sizeof (abuf))); 2171 } 2172 2173 /* 2174 * If the test address is not yet initialized, do not add 2175 * any target, since we cannot determine whether the target 2176 * belongs to the same subnet as the test address. 2177 */ 2178 li = pii->pii_probe_logint; 2179 if (li == NULL) 2180 return; 2181 2182 /* 2183 * If there are multiple subnets associated with an interface, then 2184 * add the target to this phyint instance only if it belongs to the 2185 * same subnet as the test address. This assures us that we will 2186 * be able to reach this target through our routing table. 2187 */ 2188 if (!prefix_equal(li->li_subnet, addr, li->li_subnet_len)) 2189 return; 2190 2191 if (pii->pii_targets != NULL) { 2192 assert(pii->pii_ntargets <= MAX_PROBE_TARGETS); 2193 if (is_router) { 2194 if (!pii->pii_targets_are_routers) { 2195 /* 2196 * Prefer router over hosts. Using hosts is a 2197 * fallback mechanism, hence delete all host 2198 * targets. 2199 */ 2200 while (pii->pii_targets != NULL) 2201 target_delete(pii->pii_targets); 2202 } 2203 } else { 2204 /* 2205 * Routers take precedence over hosts. If this 2206 * is a router list and we are trying to add a 2207 * host, just return. If this is a host list 2208 * and if we have sufficient targets, just return 2209 */ 2210 if (pii->pii_targets_are_routers || 2211 pii->pii_ntargets == MAX_PROBE_TARGETS) 2212 return; 2213 } 2214 } 2215 2216 tg = calloc(1, sizeof (struct target)); 2217 if (tg == NULL) { 2218 logperror("target_create: calloc"); 2219 return; 2220 } 2221 2222 tg->tg_phyint_inst = pii; 2223 tg->tg_address = addr; 2224 tg->tg_in_use = 1; 2225 tg->tg_rtt_sa = -1; 2226 tg->tg_num_deferred = 0; 2227 2228 /* 2229 * If this is the first target, set 'pii_targets_are_routers' 2230 * The list of targets is either a list of hosts or list or 2231 * routers, but not a mix. 2232 */ 2233 if (pii->pii_targets == NULL) { 2234 assert(pii->pii_ntargets == 0); 2235 assert(pii->pii_target_next == NULL); 2236 assert(pii->pii_rtt_target_next == NULL); 2237 pii->pii_targets_are_routers = is_router ? 1 : 0; 2238 } 2239 2240 if (pii->pii_ntargets == MAX_PROBE_TARGETS) { 2241 assert(pii->pii_targets_are_routers); 2242 assert(pii->pii_target_next != NULL); 2243 assert(pii->pii_rtt_target_next != NULL); 2244 tg->tg_status = TG_UNUSED; 2245 } else { 2246 if (pii->pii_ntargets == 0) { 2247 assert(pii->pii_target_next == NULL); 2248 pii->pii_target_next = tg; 2249 pii->pii_rtt_target_next = tg; 2250 } 2251 pii->pii_ntargets++; 2252 tg->tg_status = TG_ACTIVE; 2253 } 2254 2255 target_insert(pii, tg); 2256 2257 /* 2258 * Change state to PI_RUNNING if this phyint instance is capable of 2259 * sending and receiving probes -- that is, if we know of at least 1 2260 * target, and this phyint instance is probe-capable. For more 2261 * details, see the phyint state diagram in mpd_probe.c. 2262 */ 2263 pi = pii->pii_phyint; 2264 if (pi->pi_state == PI_NOTARGETS && PROBE_CAPABLE(pii)) { 2265 if (pi->pi_flags & IFF_FAILED) 2266 phyint_chstate(pi, PI_FAILED); 2267 else 2268 phyint_chstate(pi, PI_RUNNING); 2269 } 2270 } 2271 2272 /* 2273 * Add the target address named by `addr' to phyint instance `pii' if it does 2274 * not already exist. If the target is a router, `is_router' should be set to 2275 * B_TRUE. 2276 */ 2277 void 2278 target_add(struct phyint_instance *pii, struct in6_addr addr, 2279 boolean_t is_router) 2280 { 2281 struct target *tg; 2282 2283 if (pii == NULL) 2284 return; 2285 2286 tg = target_lookup(pii, addr); 2287 2288 /* 2289 * If the target does not exist, create it; target_create() will set 2290 * tg_in_use to true. Even if it exists already, if it's a router 2291 * target and we'd previously learned of it through multicast, then we 2292 * need to recreate it as a router target. Otherwise, just set 2293 * tg_in_use to to true so that init_router_targets() won't delete it. 2294 */ 2295 if (tg == NULL || (is_router && !pii->pii_targets_are_routers)) 2296 target_create(pii, addr, is_router); 2297 else if (is_router) 2298 tg->tg_in_use = 1; 2299 } 2300 2301 /* 2302 * Insert target at head of linked list of targets for the associated 2303 * phyint instance 2304 */ 2305 static void 2306 target_insert(struct phyint_instance *pii, struct target *tg) 2307 { 2308 tg->tg_next = pii->pii_targets; 2309 tg->tg_prev = NULL; 2310 if (tg->tg_next != NULL) 2311 tg->tg_next->tg_prev = tg; 2312 pii->pii_targets = tg; 2313 } 2314 2315 /* 2316 * Delete a target (unlink and free). 2317 */ 2318 void 2319 target_delete(struct target *tg) 2320 { 2321 int af; 2322 struct phyint_instance *pii; 2323 struct phyint_instance *pii_other; 2324 2325 pii = tg->tg_phyint_inst; 2326 af = pii->pii_af; 2327 2328 if (debug & D_TARGET) { 2329 char abuf[INET6_ADDRSTRLEN]; 2330 2331 logdebug("target_delete(%s %s, %s)\n", 2332 AF_STR(af), pii->pii_name, 2333 pr_addr(af, tg->tg_address, abuf, sizeof (abuf))); 2334 } 2335 2336 /* 2337 * Target must be in the list of targets for this phyint 2338 * instance. 2339 */ 2340 assert(pii->pii_targets == tg || tg->tg_prev != NULL); 2341 2342 /* 2343 * Reset all references to 'tg' in the probe information 2344 * for this phyint. 2345 */ 2346 reset_pii_probes(pii, tg); 2347 2348 /* 2349 * Remove this target from the list of targets of this 2350 * phyint instance. 2351 */ 2352 if (tg->tg_prev == NULL) { 2353 pii->pii_targets = tg->tg_next; 2354 } else { 2355 tg->tg_prev->tg_next = tg->tg_next; 2356 } 2357 2358 if (tg->tg_next != NULL) 2359 tg->tg_next->tg_prev = tg->tg_prev; 2360 2361 tg->tg_next = NULL; 2362 tg->tg_prev = NULL; 2363 2364 if (tg->tg_status == TG_ACTIVE) 2365 pii->pii_ntargets--; 2366 2367 /* 2368 * Adjust the next target to probe, if it points to 2369 * to the currently deleted target. 2370 */ 2371 if (pii->pii_target_next == tg) 2372 pii->pii_target_next = target_first(pii); 2373 2374 if (pii->pii_rtt_target_next == tg) 2375 pii->pii_rtt_target_next = target_first(pii); 2376 2377 free(tg); 2378 2379 /* 2380 * The number of active targets pii_ntargets == 0 iff 2381 * the next active target pii->pii_target_next == NULL 2382 */ 2383 if (pii->pii_ntargets != 0) { 2384 assert(pii->pii_target_next != NULL); 2385 assert(pii->pii_rtt_target_next != NULL); 2386 assert(pii->pii_target_next->tg_status == TG_ACTIVE); 2387 assert(pii->pii_rtt_target_next->tg_status == TG_ACTIVE); 2388 return; 2389 } 2390 2391 /* At this point, we don't have any active targets. */ 2392 assert(pii->pii_target_next == NULL); 2393 assert(pii->pii_rtt_target_next == NULL); 2394 2395 if (pii->pii_targets_are_routers) { 2396 /* 2397 * Activate any TG_SLOW or TG_DEAD router targets, 2398 * since we don't have any other targets 2399 */ 2400 target_activate_all(pii); 2401 2402 if (pii->pii_ntargets != 0) { 2403 assert(pii->pii_target_next != NULL); 2404 assert(pii->pii_rtt_target_next != NULL); 2405 assert(pii->pii_target_next->tg_status == TG_ACTIVE); 2406 assert(pii->pii_rtt_target_next->tg_status == 2407 TG_ACTIVE); 2408 return; 2409 } 2410 } 2411 2412 /* 2413 * If we still don't have any active targets, the list must 2414 * must be really empty. There aren't even TG_SLOW or TG_DEAD 2415 * targets. Zero out the probe stats since it will not be 2416 * relevant any longer. 2417 */ 2418 assert(pii->pii_targets == NULL); 2419 pii->pii_targets_are_routers = _B_FALSE; 2420 clear_pii_probe_stats(pii); 2421 pii_other = phyint_inst_other(pii); 2422 2423 /* 2424 * If there are no targets on both instances and the interface would 2425 * otherwise be considered PI_RUNNING, go back to PI_NOTARGETS state, 2426 * since we cannot probe this phyint any more. For more details, 2427 * please see phyint state diagram in mpd_probe.c. 2428 */ 2429 if (!PROBE_CAPABLE(pii_other) && LINK_UP(pii->pii_phyint) && 2430 pii->pii_phyint->pi_state != PI_OFFLINE) 2431 phyint_chstate(pii->pii_phyint, PI_NOTARGETS); 2432 } 2433 2434 /* 2435 * Flush the target list of every phyint in the group, if the list 2436 * is a host target list. This is called if group failure is suspected. 2437 * If all targets have failed, multicast will subsequently discover new 2438 * targets. Else it is a group failure. 2439 * Note: This function is a no-op if the list is a router target list. 2440 */ 2441 static void 2442 target_flush_hosts(struct phyint_group *pg) 2443 { 2444 struct phyint *pi; 2445 struct phyint_instance *pii; 2446 2447 if (debug & D_TARGET) 2448 logdebug("target_flush_hosts(%s)\n", pg->pg_name); 2449 2450 for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) { 2451 pii = pi->pi_v4; 2452 if (pii != NULL && !pii->pii_targets_are_routers) { 2453 /* 2454 * Delete all the targets. When the list becomes 2455 * empty, target_delete() will set pii->pii_targets 2456 * to NULL. 2457 */ 2458 while (pii->pii_targets != NULL) 2459 target_delete(pii->pii_targets); 2460 } 2461 pii = pi->pi_v6; 2462 if (pii != NULL && !pii->pii_targets_are_routers) { 2463 /* 2464 * Delete all the targets. When the list becomes 2465 * empty, target_delete() will set pii->pii_targets 2466 * to NULL. 2467 */ 2468 while (pii->pii_targets != NULL) 2469 target_delete(pii->pii_targets); 2470 } 2471 } 2472 } 2473 2474 /* 2475 * Reset all references to 'target' in the probe info, as this target is 2476 * being deleted. The pr_target field is guaranteed to be non-null if 2477 * pr_status is PR_UNACKED. So we change the pr_status to PR_LOST, so that 2478 * pr_target will not be accessed unconditionally. 2479 */ 2480 static void 2481 reset_pii_probes(struct phyint_instance *pii, struct target *tg) 2482 { 2483 int i; 2484 2485 for (i = 0; i < PROBE_STATS_COUNT; i++) { 2486 if (pii->pii_probes[i].pr_target == tg) { 2487 if (pii->pii_probes[i].pr_status == PR_UNACKED) { 2488 probe_chstate(&pii->pii_probes[i], pii, 2489 PR_LOST); 2490 } 2491 pii->pii_probes[i].pr_target = NULL; 2492 } 2493 } 2494 2495 } 2496 2497 /* 2498 * Clear the probe statistics array. 2499 */ 2500 void 2501 clear_pii_probe_stats(struct phyint_instance *pii) 2502 { 2503 bzero(pii->pii_probes, sizeof (struct probe_stats) * PROBE_STATS_COUNT); 2504 /* Reset the next probe index in the probe stats array */ 2505 pii->pii_probe_next = 0; 2506 } 2507 2508 static void 2509 target_print(struct target *tg) 2510 { 2511 char abuf[INET6_ADDRSTRLEN]; 2512 char buf[128]; 2513 char buf2[128]; 2514 int af; 2515 int i; 2516 2517 af = tg->tg_phyint_inst->pii_af; 2518 2519 logdebug("Target on %s %s addr %s\n" 2520 "status %d rtt_sa %lld rtt_sd %lld crtt %d tg_in_use %d\n", 2521 AF_STR(af), tg->tg_phyint_inst->pii_name, 2522 pr_addr(af, tg->tg_address, abuf, sizeof (abuf)), 2523 tg->tg_status, tg->tg_rtt_sa, tg->tg_rtt_sd, 2524 tg->tg_crtt, tg->tg_in_use); 2525 2526 buf[0] = '\0'; 2527 for (i = 0; i < tg->tg_num_deferred; i++) { 2528 (void) snprintf(buf2, sizeof (buf2), " %dms", 2529 tg->tg_deferred[i]); 2530 (void) strlcat(buf, buf2, sizeof (buf)); 2531 } 2532 logdebug("deferred rtts:%s\n", buf); 2533 } 2534 2535 void 2536 phyint_inst_print_all(void) 2537 { 2538 struct phyint_instance *pii; 2539 2540 for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) { 2541 phyint_inst_print(pii); 2542 } 2543 } 2544 2545 /* 2546 * Compare two prefixes that have the same prefix length. 2547 * Fails if the prefix length is unreasonable. 2548 */ 2549 boolean_t 2550 prefix_equal(struct in6_addr p1, struct in6_addr p2, uint_t prefix_len) 2551 { 2552 uchar_t mask; 2553 int j; 2554 2555 if (prefix_len > IPV6_ABITS) 2556 return (_B_FALSE); 2557 2558 for (j = 0; prefix_len > 8; prefix_len -= 8, j++) 2559 if (p1.s6_addr[j] != p2.s6_addr[j]) 2560 return (_B_FALSE); 2561 2562 /* Make the N leftmost bits one */ 2563 mask = 0xff << (8 - prefix_len); 2564 if ((p1.s6_addr[j] & mask) != (p2.s6_addr[j] & mask)) 2565 return (_B_FALSE); 2566 2567 return (_B_TRUE); 2568 } 2569 2570 /* 2571 * Get the number of UP logints on phyint `pi'. 2572 */ 2573 static int 2574 logint_upcount(struct phyint *pi) 2575 { 2576 struct logint *li; 2577 int count = 0; 2578 2579 if (pi->pi_v4 != NULL) { 2580 for (li = pi->pi_v4->pii_logint; li != NULL; li = li->li_next) { 2581 if (li->li_flags & IFF_UP) 2582 count++; 2583 } 2584 } 2585 2586 if (pi->pi_v6 != NULL) { 2587 for (li = pi->pi_v6->pii_logint; li != NULL; li = li->li_next) { 2588 if (li->li_flags & IFF_UP) 2589 count++; 2590 } 2591 } 2592 2593 return (count); 2594 } 2595 2596 /* 2597 * Get the phyint instance with the other (IPv4 / IPv6) protocol 2598 */ 2599 struct phyint_instance * 2600 phyint_inst_other(struct phyint_instance *pii) 2601 { 2602 if (pii->pii_af == AF_INET) 2603 return (pii->pii_phyint->pi_v6); 2604 else 2605 return (pii->pii_phyint->pi_v4); 2606 } 2607 2608 /* 2609 * Check whether a phyint is functioning. 2610 */ 2611 static boolean_t 2612 phyint_is_functioning(struct phyint *pi) 2613 { 2614 if (pi->pi_state == PI_RUNNING) 2615 return (_B_TRUE); 2616 return (pi->pi_state == PI_NOTARGETS && !(pi->pi_flags & IFF_FAILED)); 2617 } 2618 2619 /* 2620 * Check whether a phyint is usable. 2621 */ 2622 static boolean_t 2623 phyint_is_usable(struct phyint *pi) 2624 { 2625 if (logint_upcount(pi) == 0) 2626 return (_B_FALSE); 2627 return (phyint_is_functioning(pi)); 2628 } 2629 2630 /* 2631 * Post an EC_IPMP sysevent of subclass `subclass' and attributes `nvl'. 2632 * Before sending the event, it prepends the current version of the IPMP 2633 * sysevent API. Returns 0 on success, -1 on failure (in either case, 2634 * `nvl' is freed). 2635 */ 2636 static int 2637 post_event(const char *subclass, nvlist_t *nvl) 2638 { 2639 static evchan_t *evchp = NULL; 2640 2641 /* 2642 * Initialize the event channel if we haven't already done so. 2643 */ 2644 if (evchp == NULL) { 2645 errno = sysevent_evc_bind(IPMP_EVENT_CHAN, &evchp, EVCH_CREAT); 2646 if (errno != 0) { 2647 logerr("cannot create event channel `%s': %s\n", 2648 IPMP_EVENT_CHAN, strerror(errno)); 2649 goto failed; 2650 } 2651 } 2652 2653 errno = nvlist_add_uint32(nvl, IPMP_EVENT_VERSION, 2654 IPMP_EVENT_CUR_VERSION); 2655 if (errno != 0) { 2656 logerr("cannot create `%s' event: %s", subclass, 2657 strerror(errno)); 2658 goto failed; 2659 } 2660 2661 errno = sysevent_evc_publish(evchp, EC_IPMP, subclass, "com.sun", 2662 "in.mpathd", nvl, EVCH_NOSLEEP); 2663 if (errno != 0) { 2664 logerr("cannot send `%s' event: %s\n", subclass, 2665 strerror(errno)); 2666 goto failed; 2667 } 2668 2669 nvlist_free(nvl); 2670 return (0); 2671 failed: 2672 nvlist_free(nvl); 2673 return (-1); 2674 } 2675 2676 /* 2677 * Return the external IPMP state associated with phyint `pi'. 2678 */ 2679 static ipmp_if_state_t 2680 ifstate(struct phyint *pi) 2681 { 2682 switch (pi->pi_state) { 2683 case PI_NOTARGETS: 2684 if (pi->pi_flags & IFF_FAILED) 2685 return (IPMP_IF_FAILED); 2686 return (IPMP_IF_UNKNOWN); 2687 2688 case PI_OFFLINE: 2689 return (IPMP_IF_OFFLINE); 2690 2691 case PI_FAILED: 2692 return (IPMP_IF_FAILED); 2693 2694 case PI_RUNNING: 2695 return (IPMP_IF_OK); 2696 } 2697 2698 logerr("ifstate: unknown state %d; aborting\n", pi->pi_state); 2699 abort(); 2700 /* NOTREACHED */ 2701 } 2702 2703 /* 2704 * Return the external IPMP interface type associated with phyint `pi'. 2705 */ 2706 static ipmp_if_type_t 2707 iftype(struct phyint *pi) 2708 { 2709 if (pi->pi_flags & IFF_STANDBY) 2710 return (IPMP_IF_STANDBY); 2711 else 2712 return (IPMP_IF_NORMAL); 2713 } 2714 2715 /* 2716 * Return the external IPMP link state associated with phyint `pi'. 2717 */ 2718 static ipmp_if_linkstate_t 2719 iflinkstate(struct phyint *pi) 2720 { 2721 if (!(pi->pi_notes & (DL_NOTE_LINK_UP|DL_NOTE_LINK_DOWN))) 2722 return (IPMP_LINK_UNKNOWN); 2723 2724 return (LINK_DOWN(pi) ? IPMP_LINK_DOWN : IPMP_LINK_UP); 2725 } 2726 2727 /* 2728 * Return the external IPMP probe state associated with phyint `pi'. 2729 */ 2730 static ipmp_if_probestate_t 2731 ifprobestate(struct phyint *pi) 2732 { 2733 if (!PROBE_ENABLED(pi->pi_v4) && !PROBE_ENABLED(pi->pi_v6)) 2734 return (IPMP_PROBE_DISABLED); 2735 2736 if (pi->pi_state == PI_FAILED) 2737 return (IPMP_PROBE_FAILED); 2738 2739 if (!PROBE_CAPABLE(pi->pi_v4) && !PROBE_CAPABLE(pi->pi_v6)) 2740 return (IPMP_PROBE_UNKNOWN); 2741 2742 return (IPMP_PROBE_OK); 2743 } 2744 2745 /* 2746 * Return the external IPMP target mode associated with phyint instance `pii'. 2747 */ 2748 static ipmp_if_targmode_t 2749 iftargmode(struct phyint_instance *pii) 2750 { 2751 if (!PROBE_ENABLED(pii)) 2752 return (IPMP_TARG_DISABLED); 2753 else if (pii->pii_targets_are_routers) 2754 return (IPMP_TARG_ROUTES); 2755 else 2756 return (IPMP_TARG_MULTICAST); 2757 } 2758 2759 /* 2760 * Return the external IPMP flags associated with phyint `pi'. 2761 */ 2762 static ipmp_if_flags_t 2763 ifflags(struct phyint *pi) 2764 { 2765 ipmp_if_flags_t flags = 0; 2766 2767 if (logint_upcount(pi) == 0) 2768 flags |= IPMP_IFFLAG_DOWN; 2769 if (pi->pi_flags & IFF_INACTIVE) 2770 flags |= IPMP_IFFLAG_INACTIVE; 2771 if (pi->pi_hwaddrdup) 2772 flags |= IPMP_IFFLAG_HWADDRDUP; 2773 if (phyint_is_functioning(pi) && flags == 0) 2774 flags |= IPMP_IFFLAG_ACTIVE; 2775 2776 return (flags); 2777 } 2778 2779 /* 2780 * Store the test address used on phyint instance `pii' in `ssp'. If there's 2781 * no test address, 0.0.0.0 is stored. 2782 */ 2783 static struct sockaddr_storage * 2784 iftestaddr(struct phyint_instance *pii, struct sockaddr_storage *ssp) 2785 { 2786 if (PROBE_ENABLED(pii)) 2787 addr2storage(pii->pii_af, &pii->pii_probe_logint->li_addr, ssp); 2788 else 2789 addr2storage(AF_INET6, &in6addr_any, ssp); 2790 2791 return (ssp); 2792 } 2793 2794 /* 2795 * Return the external IPMP group state associated with phyint group `pg'. 2796 */ 2797 static ipmp_group_state_t 2798 groupstate(struct phyint_group *pg) 2799 { 2800 switch (pg->pg_state) { 2801 case PG_FAILED: 2802 return (IPMP_GROUP_FAILED); 2803 case PG_DEGRADED: 2804 return (IPMP_GROUP_DEGRADED); 2805 case PG_OK: 2806 return (IPMP_GROUP_OK); 2807 } 2808 2809 logerr("groupstate: unknown state %d; aborting\n", pg->pg_state); 2810 abort(); 2811 /* NOTREACHED */ 2812 } 2813 2814 /* 2815 * Return the external IPMP probe state associated with probe `ps'. 2816 */ 2817 static ipmp_probe_state_t 2818 probestate(struct probe_stats *ps) 2819 { 2820 switch (ps->pr_status) { 2821 case PR_UNUSED: 2822 case PR_LOST: 2823 return (IPMP_PROBE_LOST); 2824 case PR_UNACKED: 2825 return (IPMP_PROBE_SENT); 2826 case PR_ACKED: 2827 return (IPMP_PROBE_ACKED); 2828 } 2829 2830 logerr("probestate: unknown state %d; aborting\n", ps->pr_status); 2831 abort(); 2832 /* NOTREACHED */ 2833 } 2834 2835 /* 2836 * Generate an ESC_IPMP_PROBE_STATE sysevent for the probe described by `pr' 2837 * on phyint instance `pii'. Returns 0 on success, -1 on failure. 2838 */ 2839 int 2840 probe_state_event(struct probe_stats *pr, struct phyint_instance *pii) 2841 { 2842 nvlist_t *nvl; 2843 hrtime_t proc_time = 0, recv_time = 0; 2844 struct sockaddr_storage ss; 2845 struct target *tg = pr->pr_target; 2846 2847 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 2848 if (errno != 0) { 2849 logperror("cannot create `interface change' event"); 2850 return (-1); 2851 } 2852 2853 errno = nvlist_add_uint32(nvl, IPMP_PROBE_ID, pr->pr_id); 2854 if (errno != 0) 2855 goto failed; 2856 2857 errno = nvlist_add_string(nvl, IPMP_IF_NAME, pii->pii_phyint->pi_name); 2858 if (errno != 0) 2859 goto failed; 2860 2861 errno = nvlist_add_uint32(nvl, IPMP_PROBE_STATE, probestate(pr)); 2862 if (errno != 0) 2863 goto failed; 2864 2865 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_START_TIME, 2866 pr->pr_hrtime_start); 2867 if (errno != 0) 2868 goto failed; 2869 2870 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_SENT_TIME, 2871 pr->pr_hrtime_sent); 2872 if (errno != 0) 2873 goto failed; 2874 2875 if (pr->pr_status == PR_ACKED) { 2876 recv_time = pr->pr_hrtime_ackrecv; 2877 proc_time = pr->pr_hrtime_ackproc; 2878 } 2879 2880 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_ACKRECV_TIME, recv_time); 2881 if (errno != 0) 2882 goto failed; 2883 2884 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_ACKPROC_TIME, proc_time); 2885 if (errno != 0) 2886 goto failed; 2887 2888 if (tg != NULL) 2889 addr2storage(pii->pii_af, &tg->tg_address, &ss); 2890 else 2891 addr2storage(pii->pii_af, &in6addr_any, &ss); 2892 2893 errno = nvlist_add_byte_array(nvl, IPMP_PROBE_TARGET, (uchar_t *)&ss, 2894 sizeof (ss)); 2895 if (errno != 0) 2896 goto failed; 2897 2898 errno = nvlist_add_int64(nvl, IPMP_PROBE_TARGET_RTTAVG, 2899 tg->tg_rtt_sa / 8); 2900 if (errno != 0) 2901 goto failed; 2902 2903 errno = nvlist_add_int64(nvl, IPMP_PROBE_TARGET_RTTDEV, 2904 tg->tg_rtt_sd / 4); 2905 if (errno != 0) 2906 goto failed; 2907 2908 return (post_event(ESC_IPMP_PROBE_STATE, nvl)); 2909 failed: 2910 logperror("cannot create `probe state' event"); 2911 nvlist_free(nvl); 2912 return (-1); 2913 } 2914 2915 /* 2916 * Generate an ESC_IPMP_GROUP_STATE sysevent for phyint group `pg'. 2917 * Returns 0 on success, -1 on failure. 2918 */ 2919 static int 2920 phyint_group_state_event(struct phyint_group *pg) 2921 { 2922 nvlist_t *nvl; 2923 2924 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 2925 if (errno != 0) { 2926 logperror("cannot create `group state change' event"); 2927 return (-1); 2928 } 2929 2930 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 2931 if (errno != 0) 2932 goto failed; 2933 2934 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 2935 if (errno != 0) 2936 goto failed; 2937 2938 errno = nvlist_add_uint32(nvl, IPMP_GROUP_STATE, groupstate(pg)); 2939 if (errno != 0) 2940 goto failed; 2941 2942 return (post_event(ESC_IPMP_GROUP_STATE, nvl)); 2943 failed: 2944 logperror("cannot create `group state change' event"); 2945 nvlist_free(nvl); 2946 return (-1); 2947 } 2948 2949 /* 2950 * Generate an ESC_IPMP_GROUP_CHANGE sysevent of type `op' for phyint group 2951 * `pg'. Returns 0 on success, -1 on failure. 2952 */ 2953 static int 2954 phyint_group_change_event(struct phyint_group *pg, ipmp_group_op_t op) 2955 { 2956 nvlist_t *nvl; 2957 2958 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 2959 if (errno != 0) { 2960 logperror("cannot create `group change' event"); 2961 return (-1); 2962 } 2963 2964 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 2965 if (errno != 0) 2966 goto failed; 2967 2968 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 2969 if (errno != 0) 2970 goto failed; 2971 2972 errno = nvlist_add_uint64(nvl, IPMP_GROUPLIST_SIGNATURE, 2973 phyint_grouplistsig); 2974 if (errno != 0) 2975 goto failed; 2976 2977 errno = nvlist_add_uint32(nvl, IPMP_GROUP_OPERATION, op); 2978 if (errno != 0) 2979 goto failed; 2980 2981 return (post_event(ESC_IPMP_GROUP_CHANGE, nvl)); 2982 failed: 2983 logperror("cannot create `group change' event"); 2984 nvlist_free(nvl); 2985 return (-1); 2986 } 2987 2988 /* 2989 * Generate an ESC_IPMP_GROUP_MEMBER_CHANGE sysevent for phyint `pi' in 2990 * group `pg'. Returns 0 on success, -1 on failure. 2991 */ 2992 static int 2993 phyint_group_member_event(struct phyint_group *pg, struct phyint *pi, 2994 ipmp_if_op_t op) 2995 { 2996 nvlist_t *nvl; 2997 2998 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 2999 if (errno != 0) { 3000 logperror("cannot create `group member change' event"); 3001 return (-1); 3002 } 3003 3004 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 3005 if (errno != 0) 3006 goto failed; 3007 3008 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 3009 if (errno != 0) 3010 goto failed; 3011 3012 errno = nvlist_add_uint32(nvl, IPMP_IF_OPERATION, op); 3013 if (errno != 0) 3014 goto failed; 3015 3016 errno = nvlist_add_string(nvl, IPMP_IF_NAME, pi->pi_name); 3017 if (errno != 0) 3018 goto failed; 3019 3020 errno = nvlist_add_uint32(nvl, IPMP_IF_TYPE, iftype(pi)); 3021 if (errno != 0) 3022 goto failed; 3023 3024 errno = nvlist_add_uint32(nvl, IPMP_IF_STATE, ifstate(pi)); 3025 if (errno != 0) 3026 goto failed; 3027 3028 return (post_event(ESC_IPMP_GROUP_MEMBER_CHANGE, nvl)); 3029 failed: 3030 logperror("cannot create `group member change' event"); 3031 nvlist_free(nvl); 3032 return (-1); 3033 3034 } 3035 3036 /* 3037 * Generate an ESC_IPMP_IF_CHANGE sysevent for phyint `pi' in group `pg'. 3038 * Returns 0 on success, -1 on failure. 3039 */ 3040 static int 3041 phyint_state_event(struct phyint_group *pg, struct phyint *pi) 3042 { 3043 nvlist_t *nvl; 3044 3045 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 3046 if (errno != 0) { 3047 logperror("cannot create `interface change' event"); 3048 return (-1); 3049 } 3050 3051 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 3052 if (errno != 0) 3053 goto failed; 3054 3055 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 3056 if (errno != 0) 3057 goto failed; 3058 3059 errno = nvlist_add_string(nvl, IPMP_IF_NAME, pi->pi_name); 3060 if (errno != 0) 3061 goto failed; 3062 3063 errno = nvlist_add_uint32(nvl, IPMP_IF_TYPE, iftype(pi)); 3064 if (errno != 0) 3065 goto failed; 3066 3067 errno = nvlist_add_uint32(nvl, IPMP_IF_STATE, ifstate(pi)); 3068 if (errno != 0) 3069 goto failed; 3070 3071 return (post_event(ESC_IPMP_IF_CHANGE, nvl)); 3072 failed: 3073 logperror("cannot create `interface change' event"); 3074 nvlist_free(nvl); 3075 return (-1); 3076 3077 } 3078 3079 /* 3080 * Generate a signature for use. The signature is conceptually divided 3081 * into two pieces: a random 16-bit "generation number" and a 48-bit 3082 * monotonically increasing integer. The generation number protects 3083 * against stale updates to entities (e.g., IPMP groups) that have been 3084 * deleted and since recreated. 3085 */ 3086 static uint64_t 3087 gensig(void) 3088 { 3089 static int seeded = 0; 3090 3091 if (seeded == 0) { 3092 srand48((long)gethrtime()); 3093 seeded++; 3094 } 3095 3096 return ((uint64_t)lrand48() << 48 | 1); 3097 } 3098 3099 /* 3100 * Store the information associated with group `grname' into a dynamically 3101 * allocated structure pointed to by `*grinfopp'. Returns an IPMP error code. 3102 */ 3103 unsigned int 3104 getgroupinfo(const char *grname, ipmp_groupinfo_t **grinfopp) 3105 { 3106 struct phyint *pi; 3107 struct phyint_group *pg; 3108 char (*ifs)[LIFNAMSIZ]; 3109 unsigned int i, j; 3110 unsigned int nif = 0, naddr = 0; 3111 lifgroupinfo_t lifgr; 3112 addrlist_t *addrp; 3113 struct sockaddr_storage *addrs; 3114 int fdt = 0; 3115 3116 pg = phyint_group_lookup(grname); 3117 if (pg == NULL) 3118 return (IPMP_EUNKGROUP); 3119 3120 /* 3121 * Tally up the number of interfaces, allocate an array to hold them, 3122 * and insert their names into the array. While we're at it, if any 3123 * interface is actually enabled to send probes, save the group fdt. 3124 */ 3125 for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) 3126 nif++; 3127 3128 ifs = alloca(nif * sizeof (*ifs)); 3129 for (i = 0, pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext, i++) { 3130 assert(i < nif); 3131 (void) strlcpy(ifs[i], pi->pi_name, LIFNAMSIZ); 3132 if (PROBE_ENABLED(pi->pi_v4) || PROBE_ENABLED(pi->pi_v6)) 3133 fdt = pg->pg_fdt; 3134 } 3135 assert(i == nif); 3136 3137 /* 3138 * If this is the anonymous group, there's no other information to 3139 * collect (since there's no IPMP interface). 3140 */ 3141 if (pg == phyint_anongroup) { 3142 *grinfopp = ipmp_groupinfo_create(pg->pg_name, pg->pg_sig, fdt, 3143 groupstate(pg), nif, ifs, "", "", "", "", 0, NULL); 3144 return (*grinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3145 } 3146 3147 /* 3148 * Grab some additional information about the group from the kernel. 3149 * (NOTE: since SIOCGLIFGROUPINFO does not look up by interface name, 3150 * we can use ifsock_v4 even for a V6-only group.) 3151 */ 3152 (void) strlcpy(lifgr.gi_grname, grname, LIFGRNAMSIZ); 3153 if (ioctl(ifsock_v4, SIOCGLIFGROUPINFO, &lifgr) == -1) { 3154 if (errno == ENOENT) 3155 return (IPMP_EUNKGROUP); 3156 3157 logperror("getgroupinfo: SIOCGLIFGROUPINFO"); 3158 return (IPMP_FAILURE); 3159 } 3160 3161 /* 3162 * Tally up the number of data addresses, allocate an array to hold 3163 * them, and insert their values into the array. 3164 */ 3165 for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next) 3166 naddr++; 3167 3168 addrs = alloca(naddr * sizeof (*addrs)); 3169 i = 0; 3170 for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next) { 3171 /* 3172 * It's possible to have duplicate addresses (if some are 3173 * down). Weed the dups out to avoid confusing consumers. 3174 * (If groups start having tons of addresses, we'll need a 3175 * better algorithm here.) 3176 */ 3177 for (j = 0; j < i; j++) { 3178 if (sockaddrcmp(&addrs[j], &addrp->al_addr)) 3179 break; 3180 } 3181 if (j == i) { 3182 assert(i < naddr); 3183 addrs[i++] = addrp->al_addr; 3184 } 3185 } 3186 naddr = i; 3187 3188 *grinfopp = ipmp_groupinfo_create(pg->pg_name, pg->pg_sig, fdt, 3189 groupstate(pg), nif, ifs, lifgr.gi_grifname, lifgr.gi_m4ifname, 3190 lifgr.gi_m6ifname, lifgr.gi_bcifname, naddr, addrs); 3191 return (*grinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3192 } 3193 3194 /* 3195 * Store the target information associated with phyint instance `pii' into a 3196 * dynamically allocated structure pointed to by `*targinfopp'. Returns an 3197 * IPMP error code. 3198 */ 3199 unsigned int 3200 gettarginfo(struct phyint_instance *pii, const char *name, 3201 ipmp_targinfo_t **targinfopp) 3202 { 3203 uint_t ntarg = 0; 3204 struct target *tg; 3205 struct sockaddr_storage ss; 3206 struct sockaddr_storage *targs = NULL; 3207 3208 if (PROBE_CAPABLE(pii)) { 3209 targs = alloca(pii->pii_ntargets * sizeof (*targs)); 3210 tg = pii->pii_target_next; 3211 do { 3212 if (tg->tg_status == TG_ACTIVE) { 3213 assert(ntarg < pii->pii_ntargets); 3214 addr2storage(pii->pii_af, &tg->tg_address, 3215 &targs[ntarg++]); 3216 } 3217 if ((tg = tg->tg_next) == NULL) 3218 tg = pii->pii_targets; 3219 } while (tg != pii->pii_target_next); 3220 3221 assert(ntarg == pii->pii_ntargets); 3222 } 3223 3224 *targinfopp = ipmp_targinfo_create(name, iftestaddr(pii, &ss), 3225 iftargmode(pii), ntarg, targs); 3226 return (*targinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3227 } 3228 3229 /* 3230 * Store the information associated with interface `ifname' into a dynamically 3231 * allocated structure pointed to by `*ifinfopp'. Returns an IPMP error code. 3232 */ 3233 unsigned int 3234 getifinfo(const char *ifname, ipmp_ifinfo_t **ifinfopp) 3235 { 3236 int retval; 3237 struct phyint *pi; 3238 ipmp_targinfo_t *targinfo4; 3239 ipmp_targinfo_t *targinfo6; 3240 3241 pi = phyint_lookup(ifname); 3242 if (pi == NULL) 3243 return (IPMP_EUNKIF); 3244 3245 if ((retval = gettarginfo(pi->pi_v4, pi->pi_name, &targinfo4)) != 0 || 3246 (retval = gettarginfo(pi->pi_v6, pi->pi_name, &targinfo6)) != 0) 3247 goto out; 3248 3249 *ifinfopp = ipmp_ifinfo_create(pi->pi_name, pi->pi_group->pg_name, 3250 ifstate(pi), iftype(pi), iflinkstate(pi), ifprobestate(pi), 3251 ifflags(pi), targinfo4, targinfo6); 3252 retval = (*ifinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3253 out: 3254 if (targinfo4 != NULL) 3255 ipmp_freetarginfo(targinfo4); 3256 if (targinfo6 != NULL) 3257 ipmp_freetarginfo(targinfo6); 3258 return (retval); 3259 } 3260 3261 /* 3262 * Store the current list of IPMP groups into a dynamically allocated 3263 * structure pointed to by `*grlistpp'. Returns an IPMP error code. 3264 */ 3265 unsigned int 3266 getgrouplist(ipmp_grouplist_t **grlistpp) 3267 { 3268 struct phyint_group *pg; 3269 char (*groups)[LIFGRNAMSIZ]; 3270 unsigned int i, ngroup; 3271 3272 /* 3273 * Tally up the number of groups, allocate an array to hold them, and 3274 * insert their names into the array. 3275 */ 3276 for (ngroup = 0, pg = phyint_groups; pg != NULL; pg = pg->pg_next) 3277 ngroup++; 3278 3279 groups = alloca(ngroup * sizeof (*groups)); 3280 for (i = 0, pg = phyint_groups; pg != NULL; pg = pg->pg_next, i++) { 3281 assert(i < ngroup); 3282 (void) strlcpy(groups[i], pg->pg_name, LIFGRNAMSIZ); 3283 } 3284 assert(i == ngroup); 3285 3286 *grlistpp = ipmp_grouplist_create(phyint_grouplistsig, ngroup, groups); 3287 return (*grlistpp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3288 } 3289 3290 /* 3291 * Store the address information for `ssp' (in group `grname') into a 3292 * dynamically allocated structure pointed to by `*adinfopp'. Returns an IPMP 3293 * error code. (We'd call this function getaddrinfo(), but it would conflict 3294 * with getaddrinfo(3SOCKET)). 3295 */ 3296 unsigned int 3297 getgraddrinfo(const char *grname, struct sockaddr_storage *ssp, 3298 ipmp_addrinfo_t **adinfopp) 3299 { 3300 int ifsock; 3301 addrlist_t *addrp, *addrmatchp = NULL; 3302 ipmp_addr_state_t state; 3303 const char *binding = ""; 3304 struct lifreq lifr; 3305 struct phyint_group *pg; 3306 3307 if ((pg = phyint_group_lookup(grname)) == NULL) 3308 return (IPMP_EUNKADDR); 3309 3310 /* 3311 * Walk through the data addresses, and find a match. Note that since 3312 * some of the addresses may be down, more than one may match. We 3313 * prefer an up address (if one exists). 3314 */ 3315 for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next) { 3316 if (sockaddrcmp(ssp, &addrp->al_addr)) { 3317 addrmatchp = addrp; 3318 if (addrmatchp->al_flags & IFF_UP) 3319 break; 3320 } 3321 } 3322 3323 if (addrmatchp == NULL) 3324 return (IPMP_EUNKADDR); 3325 3326 state = (addrmatchp->al_flags & IFF_UP) ? IPMP_ADDR_UP : IPMP_ADDR_DOWN; 3327 if (state == IPMP_ADDR_UP) { 3328 ifsock = (ssp->ss_family == AF_INET) ? ifsock_v4 : ifsock_v6; 3329 (void) strlcpy(lifr.lifr_name, addrmatchp->al_name, LIFNAMSIZ); 3330 if (ioctl(ifsock, SIOCGLIFBINDING, &lifr) >= 0) 3331 binding = lifr.lifr_binding; 3332 } 3333 3334 *adinfopp = ipmp_addrinfo_create(ssp, state, pg->pg_name, binding); 3335 return (*adinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3336 } 3337 3338 /* 3339 * Store a snapshot of the IPMP subsystem into a dynamically allocated 3340 * structure pointed to by `*snapp'. Returns an IPMP error code. 3341 */ 3342 unsigned int 3343 getsnap(ipmp_snap_t **snapp) 3344 { 3345 ipmp_grouplist_t *grlistp; 3346 ipmp_groupinfo_t *grinfop; 3347 ipmp_addrinfo_t *adinfop; 3348 ipmp_addrlist_t *adlistp; 3349 ipmp_ifinfo_t *ifinfop; 3350 ipmp_snap_t *snap; 3351 struct phyint *pi; 3352 unsigned int i, j; 3353 int retval; 3354 3355 snap = ipmp_snap_create(); 3356 if (snap == NULL) 3357 return (IPMP_ENOMEM); 3358 3359 /* 3360 * Add group list. 3361 */ 3362 retval = getgrouplist(&snap->sn_grlistp); 3363 if (retval != IPMP_SUCCESS) 3364 goto failed; 3365 3366 /* 3367 * Add information for each group in the list, along with all of its 3368 * data addresses. 3369 */ 3370 grlistp = snap->sn_grlistp; 3371 for (i = 0; i < grlistp->gl_ngroup; i++) { 3372 retval = getgroupinfo(grlistp->gl_groups[i], &grinfop); 3373 if (retval != IPMP_SUCCESS) 3374 goto failed; 3375 3376 retval = ipmp_snap_addgroupinfo(snap, grinfop); 3377 if (retval != IPMP_SUCCESS) { 3378 ipmp_freegroupinfo(grinfop); 3379 goto failed; 3380 } 3381 3382 adlistp = grinfop->gr_adlistp; 3383 for (j = 0; j < adlistp->al_naddr; j++) { 3384 retval = getgraddrinfo(grinfop->gr_name, 3385 &adlistp->al_addrs[j], &adinfop); 3386 if (retval != IPMP_SUCCESS) 3387 goto failed; 3388 3389 retval = ipmp_snap_addaddrinfo(snap, adinfop); 3390 if (retval != IPMP_SUCCESS) { 3391 ipmp_freeaddrinfo(adinfop); 3392 goto failed; 3393 } 3394 } 3395 } 3396 3397 /* 3398 * Add information for each configured phyint. 3399 */ 3400 for (pi = phyints; pi != NULL; pi = pi->pi_next) { 3401 retval = getifinfo(pi->pi_name, &ifinfop); 3402 if (retval != IPMP_SUCCESS) 3403 goto failed; 3404 3405 retval = ipmp_snap_addifinfo(snap, ifinfop); 3406 if (retval != IPMP_SUCCESS) { 3407 ipmp_freeifinfo(ifinfop); 3408 goto failed; 3409 } 3410 } 3411 3412 *snapp = snap; 3413 return (IPMP_SUCCESS); 3414 failed: 3415 ipmp_snap_free(snap); 3416 return (retval); 3417 } 3418