1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include "mpd_defs.h" 27 #include "mpd_tables.h" 28 29 /* 30 * Global list of phyints, phyint instances, phyint groups and the anonymous 31 * group; the latter is initialized in phyint_init(). 32 */ 33 struct phyint *phyints = NULL; 34 struct phyint_instance *phyint_instances = NULL; 35 struct phyint_group *phyint_groups = NULL; 36 struct phyint_group *phyint_anongroup; 37 38 /* 39 * Grouplist signature; initialized in phyint_init(). 40 */ 41 static uint64_t phyint_grouplistsig; 42 43 static void phyint_inst_insert(struct phyint_instance *pii); 44 static void phyint_inst_print(struct phyint_instance *pii); 45 46 static void phyint_insert(struct phyint *pi, struct phyint_group *pg); 47 static void phyint_delete(struct phyint *pi); 48 static boolean_t phyint_is_usable(struct phyint *pi); 49 50 static void logint_print(struct logint *li); 51 static void logint_insert(struct phyint_instance *pii, struct logint *li); 52 static struct logint *logint_lookup(struct phyint_instance *pii, char *li_name); 53 54 static void target_print(struct target *tg); 55 static void target_insert(struct phyint_instance *pii, struct target *tg); 56 static struct target *target_first(struct phyint_instance *pii); 57 static struct target *target_select_best(struct phyint_instance *pii); 58 static void target_flush_hosts(struct phyint_group *pg); 59 60 static void reset_pii_probes(struct phyint_instance *pii, struct target *tg); 61 62 static boolean_t phyint_inst_v6_sockinit(struct phyint_instance *pii); 63 static boolean_t phyint_inst_v4_sockinit(struct phyint_instance *pii); 64 65 static int phyint_state_event(struct phyint_group *pg, struct phyint *pi); 66 static int phyint_group_state_event(struct phyint_group *pg); 67 static int phyint_group_change_event(struct phyint_group *pg, ipmp_group_op_t); 68 static int phyint_group_member_event(struct phyint_group *pg, struct phyint *pi, 69 ipmp_if_op_t op); 70 71 static int logint_upcount(struct phyint *pi); 72 static uint64_t gensig(void); 73 74 /* Initialize any per-file global state. Returns 0 on success, -1 on failure */ 75 int 76 phyint_init(void) 77 { 78 phyint_grouplistsig = gensig(); 79 if (track_all_phyints) { 80 phyint_anongroup = phyint_group_create(""); 81 if (phyint_anongroup == NULL) 82 return (-1); 83 phyint_group_insert(phyint_anongroup); 84 } 85 return (0); 86 } 87 88 /* Return the phyint with the given name */ 89 struct phyint * 90 phyint_lookup(const char *name) 91 { 92 struct phyint *pi; 93 94 if (debug & D_PHYINT) 95 logdebug("phyint_lookup(%s)\n", name); 96 97 for (pi = phyints; pi != NULL; pi = pi->pi_next) { 98 if (strncmp(pi->pi_name, name, sizeof (pi->pi_name)) == 0) 99 break; 100 } 101 return (pi); 102 } 103 104 /* 105 * Lookup a phyint in the group that has the same hardware address as `pi', or 106 * NULL if there's none. If `online_only' is set, then only online phyints 107 * are considered when matching. Otherwise, phyints that had been offlined 108 * due to a duplicate hardware address will also be considered. 109 */ 110 static struct phyint * 111 phyint_lookup_hwaddr(struct phyint *pi, boolean_t online_only) 112 { 113 struct phyint *pi2; 114 115 if (pi->pi_group == phyint_anongroup) 116 return (NULL); 117 118 for (pi2 = pi->pi_group->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) { 119 if (pi2 == pi) 120 continue; 121 122 /* 123 * NOTE: even when online_only is B_FALSE, we ignore phyints 124 * that are administratively offline (rather than offline 125 * because they're dups); when they're brought back online, 126 * they'll be flagged as dups if need be. 127 */ 128 if (pi2->pi_state == PI_OFFLINE && 129 (online_only || !pi2->pi_hwaddrdup)) 130 continue; 131 132 if (pi2->pi_hwaddrlen == pi->pi_hwaddrlen && 133 bcmp(pi2->pi_hwaddr, pi->pi_hwaddr, pi->pi_hwaddrlen) == 0) 134 return (pi2); 135 } 136 return (NULL); 137 } 138 139 /* 140 * Respond to DLPI notifications. Currently, this only processes physical 141 * address changes for the phyint passed via `arg' by onlining or offlining 142 * phyints in the group. 143 */ 144 /* ARGSUSED */ 145 static void 146 phyint_link_notify(dlpi_handle_t dh, dlpi_notifyinfo_t *dnip, void *arg) 147 { 148 struct phyint *pi = arg; 149 struct phyint *oduppi = NULL, *duppi = NULL; 150 151 assert((dnip->dni_note & pi->pi_notes) != 0); 152 153 if (dnip->dni_note != DL_NOTE_PHYS_ADDR) 154 return; 155 156 assert(dnip->dni_physaddrlen <= DLPI_PHYSADDR_MAX); 157 158 /* 159 * If our hardware address hasn't changed, there's nothing to do. 160 */ 161 if (pi->pi_hwaddrlen == dnip->dni_physaddrlen && 162 bcmp(pi->pi_hwaddr, dnip->dni_physaddr, pi->pi_hwaddrlen) == 0) 163 return; 164 165 oduppi = phyint_lookup_hwaddr(pi, _B_FALSE); 166 pi->pi_hwaddrlen = dnip->dni_physaddrlen; 167 (void) memcpy(pi->pi_hwaddr, dnip->dni_physaddr, pi->pi_hwaddrlen); 168 duppi = phyint_lookup_hwaddr(pi, _B_FALSE); 169 170 if (oduppi != NULL || pi->pi_hwaddrdup) { 171 /* 172 * Our old hardware address was a duplicate. If we'd been 173 * offlined because of it, and our new hardware address is not 174 * a duplicate, then bring us online. Otherwise, `oduppi' 175 * must've been the one brought offline; bring it online. 176 */ 177 if (pi->pi_hwaddrdup) { 178 if (duppi == NULL) 179 (void) phyint_undo_offline(pi); 180 } else { 181 assert(oduppi->pi_hwaddrdup); 182 (void) phyint_undo_offline(oduppi); 183 } 184 } 185 186 if (duppi != NULL && !pi->pi_hwaddrdup) { 187 /* 188 * Our new hardware address was a duplicate and we're not 189 * yet flagged as a duplicate; bring us offline. 190 */ 191 pi->pi_hwaddrdup = _B_TRUE; 192 (void) phyint_offline(pi, 0); 193 } 194 } 195 196 /* 197 * Initialize information about the underlying link for `pi', and set us 198 * up to be notified about future changes. Returns _B_TRUE on success. 199 */ 200 boolean_t 201 phyint_link_init(struct phyint *pi) 202 { 203 int retval; 204 uint_t notes; 205 const char *errmsg; 206 dlpi_notifyid_t id; 207 208 pi->pi_notes = 0; 209 retval = dlpi_open(pi->pi_name, &pi->pi_dh, 0); 210 if (retval != DLPI_SUCCESS) { 211 pi->pi_dh = NULL; 212 errmsg = "cannot open"; 213 goto failed; 214 } 215 216 pi->pi_hwaddrlen = DLPI_PHYSADDR_MAX; 217 retval = dlpi_get_physaddr(pi->pi_dh, DL_CURR_PHYS_ADDR, pi->pi_hwaddr, 218 &pi->pi_hwaddrlen); 219 if (retval != DLPI_SUCCESS) { 220 errmsg = "cannot get hardware address"; 221 goto failed; 222 } 223 224 /* 225 * Check if the link supports DLPI link state notifications. For 226 * historical reasons, the actual changes are tracked through routing 227 * sockets, so we immediately disable the notification upon success. 228 */ 229 notes = DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN; 230 retval = dlpi_enabnotify(pi->pi_dh, notes, phyint_link_notify, pi, &id); 231 if (retval == DLPI_SUCCESS) { 232 (void) dlpi_disabnotify(pi->pi_dh, id, NULL); 233 pi->pi_notes |= notes; 234 } 235 236 /* 237 * Enable notification of hardware address changes to keep pi_hwaddr 238 * up-to-date and track if we need to offline/undo-offline phyints. 239 */ 240 notes = DL_NOTE_PHYS_ADDR; 241 retval = dlpi_enabnotify(pi->pi_dh, notes, phyint_link_notify, pi, &id); 242 if (retval == DLPI_SUCCESS && poll_add(dlpi_fd(pi->pi_dh)) == 0) 243 pi->pi_notes |= notes; 244 245 return (_B_TRUE); 246 failed: 247 logerr("%s: %s: %s\n", pi->pi_name, errmsg, dlpi_strerror(retval)); 248 if (pi->pi_dh != NULL) { 249 dlpi_close(pi->pi_dh); 250 pi->pi_dh = NULL; 251 } 252 return (_B_FALSE); 253 } 254 255 /* 256 * Close use of link on `pi'. 257 */ 258 void 259 phyint_link_close(struct phyint *pi) 260 { 261 if (pi->pi_notes & DL_NOTE_PHYS_ADDR) { 262 (void) poll_remove(dlpi_fd(pi->pi_dh)); 263 pi->pi_notes &= ~DL_NOTE_PHYS_ADDR; 264 } 265 266 /* 267 * NOTE: we don't clear pi_notes here so that iflinkstate() can still 268 * properly report the link state even when offline (which is possible 269 * since we use IFF_RUNNING to track link state). 270 */ 271 dlpi_close(pi->pi_dh); 272 pi->pi_dh = NULL; 273 } 274 275 /* Return the phyint instance with the given name and the given family */ 276 struct phyint_instance * 277 phyint_inst_lookup(int af, char *name) 278 { 279 struct phyint *pi; 280 281 if (debug & D_PHYINT) 282 logdebug("phyint_inst_lookup(%s %s)\n", AF_STR(af), name); 283 284 assert(af == AF_INET || af == AF_INET6); 285 286 pi = phyint_lookup(name); 287 if (pi == NULL) 288 return (NULL); 289 290 return (PHYINT_INSTANCE(pi, af)); 291 } 292 293 struct phyint_group * 294 phyint_group_lookup(const char *pg_name) 295 { 296 struct phyint_group *pg; 297 298 if (debug & D_PHYINT) 299 logdebug("phyint_group_lookup(%s)\n", pg_name); 300 301 for (pg = phyint_groups; pg != NULL; pg = pg->pg_next) { 302 if (strncmp(pg->pg_name, pg_name, sizeof (pg->pg_name)) == 0) 303 break; 304 } 305 return (pg); 306 } 307 308 /* 309 * Insert the phyint in the linked list of all phyints. If the phyint belongs 310 * to some group, insert it in the phyint group list. 311 */ 312 static void 313 phyint_insert(struct phyint *pi, struct phyint_group *pg) 314 { 315 if (debug & D_PHYINT) 316 logdebug("phyint_insert(%s '%s')\n", pi->pi_name, pg->pg_name); 317 318 /* Insert the phyint at the head of the 'all phyints' list */ 319 pi->pi_next = phyints; 320 pi->pi_prev = NULL; 321 if (phyints != NULL) 322 phyints->pi_prev = pi; 323 phyints = pi; 324 325 /* 326 * Insert the phyint at the head of the 'phyint_group members' list 327 * of the phyint group to which it belongs. 328 */ 329 pi->pi_pgnext = NULL; 330 pi->pi_pgprev = NULL; 331 pi->pi_group = pg; 332 333 pi->pi_pgnext = pg->pg_phyint; 334 if (pi->pi_pgnext != NULL) 335 pi->pi_pgnext->pi_pgprev = pi; 336 pg->pg_phyint = pi; 337 338 /* Refresh the group state now that this phyint has been added */ 339 phyint_group_refresh_state(pg); 340 341 pg->pg_sig++; 342 (void) phyint_group_member_event(pg, pi, IPMP_IF_ADD); 343 } 344 345 /* Insert the phyint instance in the linked list of all phyint instances. */ 346 static void 347 phyint_inst_insert(struct phyint_instance *pii) 348 { 349 if (debug & D_PHYINT) { 350 logdebug("phyint_inst_insert(%s %s)\n", 351 AF_STR(pii->pii_af), pii->pii_name); 352 } 353 354 /* 355 * Insert the phyint at the head of the 'all phyint instances' list. 356 */ 357 pii->pii_next = phyint_instances; 358 pii->pii_prev = NULL; 359 if (phyint_instances != NULL) 360 phyint_instances->pii_prev = pii; 361 phyint_instances = pii; 362 } 363 364 /* 365 * Create a new phyint with the given parameters. Also insert it into 366 * the list of all phyints and the list of phyint group members by calling 367 * phyint_insert(). 368 */ 369 static struct phyint * 370 phyint_create(char *pi_name, struct phyint_group *pg, uint_t ifindex, 371 uint64_t flags) 372 { 373 struct phyint *pi; 374 375 pi = calloc(1, sizeof (struct phyint)); 376 if (pi == NULL) { 377 logperror("phyint_create: calloc"); 378 return (NULL); 379 } 380 381 /* 382 * Record the phyint values. 383 */ 384 (void) strlcpy(pi->pi_name, pi_name, sizeof (pi->pi_name)); 385 pi->pi_taddrthresh = getcurrentsec() + TESTADDR_CONF_TIME; 386 pi->pi_ifindex = ifindex; 387 pi->pi_icmpid = htons(((getpid() & 0xFF) << 8) | (ifindex & 0xFF)); 388 389 pi->pi_state = PI_INIT; 390 pi->pi_flags = PHYINT_FLAGS(flags); 391 392 /* 393 * Initialize the link state. The link state is initialized to 394 * up, so that if the link is down when IPMP starts monitoring 395 * the interface, it will appear as though there has been a 396 * transition from the link up to link down. This avoids 397 * having to treat this situation as a special case. 398 */ 399 INIT_LINK_STATE(pi); 400 401 if (!phyint_link_init(pi)) { 402 free(pi); 403 return (NULL); 404 } 405 406 /* 407 * Insert the phyint in the list of all phyints, and the 408 * list of phyint group members 409 */ 410 phyint_insert(pi, pg); 411 412 return (pi); 413 } 414 415 /* 416 * Create a new phyint instance belonging to the phyint 'pi' and address 417 * family 'af'. Also insert it into the list of all phyint instances by 418 * calling phyint_inst_insert(). 419 */ 420 static struct phyint_instance * 421 phyint_inst_create(struct phyint *pi, int af) 422 { 423 struct phyint_instance *pii; 424 425 pii = calloc(1, sizeof (struct phyint_instance)); 426 if (pii == NULL) { 427 logperror("phyint_inst_create: calloc"); 428 return (NULL); 429 } 430 431 /* 432 * Attach the phyint instance to the phyint. 433 * Set the back pointers as well 434 */ 435 pii->pii_phyint = pi; 436 if (af == AF_INET) 437 pi->pi_v4 = pii; 438 else 439 pi->pi_v6 = pii; 440 441 pii->pii_in_use = 1; 442 pii->pii_probe_sock = -1; 443 pii->pii_snxt = 1; 444 pii->pii_af = af; 445 pii->pii_fd_hrtime = gethrtime() + 446 (FAILURE_DETECTION_QP * (hrtime_t)NANOSEC); 447 pii->pii_flags = pi->pi_flags; 448 449 /* Insert the phyint instance in the list of all phyint instances. */ 450 phyint_inst_insert(pii); 451 return (pii); 452 } 453 454 /* 455 * Change the state of phyint `pi' to state `state'. 456 */ 457 void 458 phyint_chstate(struct phyint *pi, enum pi_state state) 459 { 460 /* 461 * To simplify things, some callers always set a given state 462 * regardless of the previous state of the phyint (e.g., setting 463 * PI_RUNNING when it's already set). We shouldn't bother 464 * generating an event or consuming a signature for these, since 465 * the actual state of the interface is unchanged. 466 */ 467 if (pi->pi_state == state) 468 return; 469 470 pi->pi_state = state; 471 phyint_changed(pi); 472 } 473 474 /* 475 * Note that `pi' has changed state. 476 */ 477 void 478 phyint_changed(struct phyint *pi) 479 { 480 pi->pi_group->pg_sig++; 481 (void) phyint_state_event(pi->pi_group, pi); 482 } 483 484 /* 485 * Insert the phyint group in the linked list of all phyint groups 486 * at the head of the list 487 */ 488 void 489 phyint_group_insert(struct phyint_group *pg) 490 { 491 pg->pg_next = phyint_groups; 492 pg->pg_prev = NULL; 493 if (phyint_groups != NULL) 494 phyint_groups->pg_prev = pg; 495 phyint_groups = pg; 496 497 phyint_grouplistsig++; 498 (void) phyint_group_change_event(pg, IPMP_GROUP_ADD); 499 } 500 501 /* 502 * Create a new phyint group called 'name'. 503 */ 504 struct phyint_group * 505 phyint_group_create(const char *name) 506 { 507 struct phyint_group *pg; 508 509 if (debug & D_PHYINT) 510 logdebug("phyint_group_create(%s)\n", name); 511 512 pg = calloc(1, sizeof (struct phyint_group)); 513 if (pg == NULL) { 514 logperror("phyint_group_create: calloc"); 515 return (NULL); 516 } 517 518 (void) strlcpy(pg->pg_name, name, sizeof (pg->pg_name)); 519 pg->pg_sig = gensig(); 520 pg->pg_fdt = user_failure_detection_time; 521 pg->pg_probeint = user_probe_interval; 522 pg->pg_in_use = _B_TRUE; 523 524 /* 525 * Normal groups always start in the PG_FAILED state since they 526 * have no active interfaces. In contrast, anonymous groups are 527 * heterogeneous and thus always PG_OK. 528 */ 529 pg->pg_state = (name[0] == '\0' ? PG_OK : PG_FAILED); 530 531 return (pg); 532 } 533 534 /* 535 * Change the state of the phyint group `pg' to state `state'. 536 */ 537 void 538 phyint_group_chstate(struct phyint_group *pg, enum pg_state state) 539 { 540 assert(pg != phyint_anongroup); 541 542 /* 543 * To simplify things, some callers always set a given state 544 * regardless of the previous state of the group (e.g., setting 545 * PG_DEGRADED when it's already set). We shouldn't bother 546 * generating an event or consuming a signature for these, since 547 * the actual state of the group is unchanged. 548 */ 549 if (pg->pg_state == state) 550 return; 551 552 pg->pg_state = state; 553 554 switch (state) { 555 case PG_FAILED: 556 /* 557 * We can never know with certainty that a group has 558 * failed. It is possible that all known targets have 559 * failed simultaneously, and new targets have come up 560 * instead. If the targets are routers then router 561 * discovery will kick in, and we will see the new routers 562 * thru routing socket messages. But if the targets are 563 * hosts, we have to discover it by multicast. So flush 564 * all the host targets. The next probe will send out a 565 * multicast echo request. If this is a group failure, we 566 * will still not see any response, otherwise the group 567 * will be repaired after we get NUM_PROBE_REPAIRS 568 * consecutive unicast replies on any phyint. 569 */ 570 target_flush_hosts(pg); 571 break; 572 573 case PG_OK: 574 case PG_DEGRADED: 575 break; 576 577 default: 578 logerr("phyint_group_chstate: invalid group state %d; " 579 "aborting\n", state); 580 abort(); 581 } 582 583 pg->pg_sig++; 584 (void) phyint_group_state_event(pg); 585 } 586 587 /* 588 * Create a new phyint instance and initialize it from the values supplied by 589 * the kernel. Always check for ENXIO before logging any error, because the 590 * interface could have vanished after completion of SIOCGLIFCONF. 591 * Return values: 592 * pointer to the phyint instance on success 593 * NULL on failure Eg. if the phyint instance is not found in the kernel 594 */ 595 struct phyint_instance * 596 phyint_inst_init_from_k(int af, char *pi_name) 597 { 598 char pg_name[LIFNAMSIZ + 1]; 599 int ifsock; 600 uint_t ifindex; 601 uint64_t flags; 602 struct lifreq lifr; 603 struct phyint *pi; 604 struct phyint_instance *pii; 605 boolean_t pi_created; 606 struct phyint_group *pg; 607 608 retry: 609 pii = NULL; 610 pi = NULL; 611 pg = NULL; 612 pi_created = _B_FALSE; 613 614 if (debug & D_PHYINT) { 615 logdebug("phyint_inst_init_from_k(%s %s)\n", 616 AF_STR(af), pi_name); 617 } 618 619 assert(af == AF_INET || af == AF_INET6); 620 621 /* Get the socket for doing ioctls */ 622 ifsock = (af == AF_INET) ? ifsock_v4 : ifsock_v6; 623 624 /* 625 * Get the interface flags. Ignore virtual interfaces, IPMP 626 * meta-interfaces, point-to-point interfaces, and interfaces 627 * that can't support multicast. 628 */ 629 (void) strlcpy(lifr.lifr_name, pi_name, sizeof (lifr.lifr_name)); 630 if (ioctl(ifsock, SIOCGLIFFLAGS, (char *)&lifr) < 0) { 631 if (errno != ENXIO) { 632 logperror("phyint_inst_init_from_k:" 633 " ioctl (get flags)"); 634 } 635 return (NULL); 636 } 637 flags = lifr.lifr_flags; 638 if (!(flags & IFF_MULTICAST) || 639 (flags & (IFF_VIRTUAL|IFF_IPMP|IFF_POINTOPOINT))) 640 return (NULL); 641 642 /* 643 * Get the ifindex for recording later in our tables, in case we need 644 * to create a new phyint. 645 */ 646 if (ioctl(ifsock, SIOCGLIFINDEX, (char *)&lifr) < 0) { 647 if (errno != ENXIO) { 648 logperror("phyint_inst_init_from_k: " 649 " ioctl (get lifindex)"); 650 } 651 return (NULL); 652 } 653 ifindex = lifr.lifr_index; 654 655 /* 656 * Get the phyint group name of this phyint, from the kernel. 657 */ 658 if (ioctl(ifsock, SIOCGLIFGROUPNAME, (char *)&lifr) < 0) { 659 if (errno != ENXIO) { 660 logperror("phyint_inst_init_from_k: " 661 "ioctl (get group name)"); 662 } 663 return (NULL); 664 } 665 (void) strlcpy(pg_name, lifr.lifr_groupname, sizeof (pg_name)); 666 667 /* 668 * If the phyint is not part of any group, pg_name is the 669 * null string. If 'track_all_phyints' is false, there is no 670 * need to create a phyint. 671 */ 672 if (pg_name[0] == '\0' && !track_all_phyints) { 673 /* 674 * If the IFF_FAILED, IFF_INACTIVE, or IFF_OFFLINE flags are 675 * set, reset them. These flags shouldn't be set if in.mpathd 676 * isn't tracking the interface. 677 */ 678 if ((flags & (IFF_FAILED | IFF_INACTIVE | IFF_OFFLINE))) { 679 lifr.lifr_flags = flags & 680 ~(IFF_FAILED | IFF_INACTIVE | IFF_OFFLINE); 681 if (ioctl(ifsock, SIOCSLIFFLAGS, (char *)&lifr) < 0) { 682 if (errno != ENXIO) { 683 logperror("phyint_inst_init_from_k:" 684 " ioctl (set flags)"); 685 } 686 } 687 } 688 return (NULL); 689 } 690 691 /* 692 * We need to create a new phyint instance. We may also need to 693 * create the group if e.g. the SIOCGLIFCONF loop in initifs() found 694 * an underlying interface before it found its IPMP meta-interface. 695 * Note that we keep any created groups even if phyint_inst_from_k() 696 * fails since a group's existence is not dependent on the ability of 697 * in.mpathd to the track the group's interfaces. 698 */ 699 if ((pg = phyint_group_lookup(pg_name)) == NULL) { 700 if ((pg = phyint_group_create(pg_name)) == NULL) { 701 logerr("phyint_inst_init_from_k: cannot create group " 702 "%s\n", pg_name); 703 return (NULL); 704 } 705 phyint_group_insert(pg); 706 } 707 708 /* 709 * Lookup the phyint. If the phyint does not exist create it. 710 */ 711 pi = phyint_lookup(pi_name); 712 if (pi == NULL) { 713 pi = phyint_create(pi_name, pg, ifindex, flags); 714 if (pi == NULL) { 715 logerr("phyint_inst_init_from_k:" 716 " unable to create phyint %s\n", pi_name); 717 return (NULL); 718 } 719 pi_created = _B_TRUE; 720 } else { 721 /* The phyint exists already. */ 722 assert(pi_created == _B_FALSE); 723 /* 724 * Normally we should see consistent values for the IPv4 and 725 * IPv6 instances, for phyint properties. If we don't, it 726 * means things have changed underneath us, and we should 727 * resync our tables with the kernel. Check whether the 728 * interface index has changed. If so, it is most likely 729 * the interface has been unplumbed and replumbed, 730 * while we are yet to update our tables. Do it now. 731 */ 732 if (pi->pi_ifindex != ifindex) { 733 phyint_inst_delete(PHYINT_INSTANCE(pi, AF_OTHER(af))); 734 goto retry; 735 } 736 assert(PHYINT_INSTANCE(pi, af) == NULL); 737 738 /* 739 * If the group name seen by the IPv4 and IPv6 instances 740 * are different, it is most likely the groupname has 741 * changed, while we are yet to update our tables. Do it now. 742 */ 743 if (strcmp(pi->pi_group->pg_name, pg_name) != 0) { 744 phyint_inst_delete(PHYINT_INSTANCE(pi, 745 AF_OTHER(af))); 746 goto retry; 747 } 748 } 749 750 /* 751 * Create a new phyint instance, corresponding to the 'af' 752 * passed in. 753 */ 754 pii = phyint_inst_create(pi, af); 755 if (pii == NULL) { 756 logerr("phyint_inst_init_from_k: unable to create" 757 "phyint inst %s\n", pi->pi_name); 758 if (pi_created) 759 phyint_delete(pi); 760 761 return (NULL); 762 } 763 764 /* 765 * NOTE: the change_pif_flags() implementation requires a phyint 766 * instance before it can function, so a number of tasks that would 767 * otherwise be done in phyint_create() are deferred to here. 768 */ 769 if (pi_created) { 770 /* 771 * If the interface is offline, set the state to PI_OFFLINE. 772 * Otherwise, optimistically consider this interface running. 773 * Later (in process_link_state_changes()), we will adjust 774 * this to match the current state of the link. Further, if 775 * test addresses are subsequently assigned, we will 776 * transition to PI_NOTARGETS and then to either PI_RUNNING or 777 * PI_FAILED depending on the probe results. 778 */ 779 if (pi->pi_flags & IFF_OFFLINE) { 780 phyint_chstate(pi, PI_OFFLINE); 781 } else { 782 /* calls phyint_chstate() */ 783 phyint_transition_to_running(pi); 784 } 785 786 /* 787 * If this a standby phyint, determine whether it should be 788 * IFF_INACTIVE. 789 */ 790 if (pi->pi_flags & IFF_STANDBY) 791 phyint_standby_refresh_inactive(pi); 792 793 /* 794 * If this phyint does not have a unique hardware address in its 795 * group, offline it. 796 */ 797 if (phyint_lookup_hwaddr(pi, _B_TRUE) != NULL) { 798 pi->pi_hwaddrdup = _B_TRUE; 799 (void) phyint_offline(pi, 0); 800 } 801 } 802 803 return (pii); 804 } 805 806 /* 807 * Bind pii_probe_sock to the address associated with pii_probe_logint. 808 * This socket will be used for sending and receiving ICMP/ICMPv6 probes to 809 * targets. Do the common part in this function, and complete the 810 * initializations by calling the protocol specific functions 811 * phyint_inst_v{4,6}_sockinit() respectively. 812 * 813 * Return values: _B_TRUE/_B_FALSE for success or failure respectively. 814 */ 815 boolean_t 816 phyint_inst_sockinit(struct phyint_instance *pii) 817 { 818 boolean_t success; 819 struct phyint_group *pg; 820 821 if (debug & D_PHYINT) { 822 logdebug("phyint_inst_sockinit(%s %s)\n", 823 AF_STR(pii->pii_af), pii->pii_name); 824 } 825 826 assert(pii->pii_probe_logint != NULL); 827 assert(pii->pii_probe_logint->li_flags & IFF_UP); 828 assert(pii->pii_probe_logint->li_flags & IFF_NOFAILOVER); 829 assert(pii->pii_af == AF_INET || pii->pii_af == AF_INET6); 830 831 /* 832 * If the socket is already bound, close pii_probe_sock 833 */ 834 if (pii->pii_probe_sock != -1) 835 close_probe_socket(pii, _B_TRUE); 836 837 /* 838 * If the phyint is not part of a named group and track_all_phyints is 839 * false, simply return. 840 */ 841 pg = pii->pii_phyint->pi_group; 842 if (pg == phyint_anongroup && !track_all_phyints) { 843 if (debug & D_PHYINT) 844 logdebug("phyint_inst_sockinit: no group\n"); 845 return (_B_FALSE); 846 } 847 848 /* 849 * Initialize the socket by calling the protocol specific function. 850 * If it succeeds, add the socket to the poll list. 851 */ 852 if (pii->pii_af == AF_INET6) 853 success = phyint_inst_v6_sockinit(pii); 854 else 855 success = phyint_inst_v4_sockinit(pii); 856 857 if (success && (poll_add(pii->pii_probe_sock) == 0)) 858 return (_B_TRUE); 859 860 /* Something failed, cleanup and return false */ 861 if (pii->pii_probe_sock != -1) 862 close_probe_socket(pii, _B_FALSE); 863 864 return (_B_FALSE); 865 } 866 867 /* 868 * IPv6 specific part in initializing the pii_probe_sock. This socket is 869 * used to send/receive ICMPv6 probe packets. 870 */ 871 static boolean_t 872 phyint_inst_v6_sockinit(struct phyint_instance *pii) 873 { 874 icmp6_filter_t filter; 875 int hopcount = 1; 876 int off = 0; 877 int on = 1; 878 struct sockaddr_in6 testaddr; 879 int flags; 880 881 /* 882 * Open a raw socket with ICMPv6 protocol. 883 * 884 * Use IPV6_BOUND_IF to make sure that probes are sent and received on 885 * the specified phyint only. Bind to the test address to ensure that 886 * the responses are sent to the specified phyint. 887 * 888 * Set the hopcount to 1 so that probe packets are not routed. 889 * Disable multicast loopback. Set the receive filter to 890 * receive only ICMPv6 echo replies. 891 */ 892 pii->pii_probe_sock = socket(pii->pii_af, SOCK_RAW, IPPROTO_ICMPV6); 893 if (pii->pii_probe_sock < 0) { 894 logperror_pii(pii, "phyint_inst_v6_sockinit: socket"); 895 return (_B_FALSE); 896 } 897 898 /* 899 * Probes must not block in case of lower layer issues. 900 */ 901 if ((flags = fcntl(pii->pii_probe_sock, F_GETFL, 0)) == -1) { 902 logperror_pii(pii, "phyint_inst_v6_sockinit: fcntl" 903 " F_GETFL"); 904 return (_B_FALSE); 905 } 906 if (fcntl(pii->pii_probe_sock, F_SETFL, 907 flags | O_NONBLOCK) == -1) { 908 logperror_pii(pii, "phyint_inst_v6_sockinit: fcntl" 909 " F_SETFL O_NONBLOCK"); 910 return (_B_FALSE); 911 } 912 913 bzero(&testaddr, sizeof (testaddr)); 914 testaddr.sin6_family = AF_INET6; 915 testaddr.sin6_port = 0; 916 testaddr.sin6_addr = pii->pii_probe_logint->li_addr; 917 918 if (bind(pii->pii_probe_sock, (struct sockaddr *)&testaddr, 919 sizeof (testaddr)) < 0) { 920 logperror_pii(pii, "phyint_inst_v6_sockinit: IPv6 bind"); 921 return (_B_FALSE); 922 } 923 924 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_IF, 925 (char *)&pii->pii_ifindex, sizeof (uint_t)) < 0) { 926 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 927 " IPV6_MULTICAST_IF"); 928 return (_B_FALSE); 929 } 930 931 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_BOUND_IF, 932 &pii->pii_ifindex, sizeof (uint_t)) < 0) { 933 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 934 " IPV6_BOUND_IF"); 935 return (_B_FALSE); 936 } 937 938 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 939 (char *)&hopcount, sizeof (hopcount)) < 0) { 940 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 941 " IPV6_UNICAST_HOPS"); 942 return (_B_FALSE); 943 } 944 945 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, 946 (char *)&hopcount, sizeof (hopcount)) < 0) { 947 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 948 " IPV6_MULTICAST_HOPS"); 949 return (_B_FALSE); 950 } 951 952 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, 953 (char *)&off, sizeof (off)) < 0) { 954 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 955 " IPV6_MULTICAST_LOOP"); 956 return (_B_FALSE); 957 } 958 959 /* 960 * Filter out so that we only receive ICMP echo replies 961 */ 962 ICMP6_FILTER_SETBLOCKALL(&filter); 963 ICMP6_FILTER_SETPASS(ICMP6_ECHO_REPLY, &filter); 964 965 if (setsockopt(pii->pii_probe_sock, IPPROTO_ICMPV6, ICMP6_FILTER, 966 (char *)&filter, sizeof (filter)) < 0) { 967 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 968 " ICMP6_FILTER"); 969 return (_B_FALSE); 970 } 971 972 /* Enable receipt of hoplimit */ 973 if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, 974 &on, sizeof (on)) < 0) { 975 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 976 " IPV6_RECVHOPLIMIT"); 977 return (_B_FALSE); 978 } 979 980 /* Enable receipt of timestamp */ 981 if (setsockopt(pii->pii_probe_sock, SOL_SOCKET, SO_TIMESTAMP, 982 &on, sizeof (on)) < 0) { 983 logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt" 984 " SO_TIMESTAMP"); 985 return (_B_FALSE); 986 } 987 988 return (_B_TRUE); 989 } 990 991 /* 992 * IPv4 specific part in initializing the pii_probe_sock. This socket is 993 * used to send/receive ICMPv4 probe packets. 994 */ 995 static boolean_t 996 phyint_inst_v4_sockinit(struct phyint_instance *pii) 997 { 998 struct sockaddr_in testaddr; 999 char char_off = 0; 1000 int ttl = 1; 1001 char char_ttl = 1; 1002 int on = 1; 1003 int flags; 1004 1005 /* 1006 * Open a raw socket with ICMPv4 protocol. 1007 * 1008 * Use IP_BOUND_IF to make sure that probes are sent and received on 1009 * the specified phyint only. Bind to the test address to ensure that 1010 * the responses are sent to the specified phyint. 1011 * 1012 * Set the ttl to 1 so that probe packets are not routed. 1013 * Disable multicast loopback. Enable receipt of timestamp. 1014 */ 1015 pii->pii_probe_sock = socket(pii->pii_af, SOCK_RAW, IPPROTO_ICMP); 1016 if (pii->pii_probe_sock < 0) { 1017 logperror_pii(pii, "phyint_inst_v4_sockinit: socket"); 1018 return (_B_FALSE); 1019 } 1020 1021 /* 1022 * Probes must not block in case of lower layer issues. 1023 */ 1024 if ((flags = fcntl(pii->pii_probe_sock, F_GETFL, 0)) == -1) { 1025 logperror_pii(pii, "phyint_inst_v4_sockinit: fcntl" 1026 " F_GETFL"); 1027 return (_B_FALSE); 1028 } 1029 if (fcntl(pii->pii_probe_sock, F_SETFL, 1030 flags | O_NONBLOCK) == -1) { 1031 logperror_pii(pii, "phyint_inst_v4_sockinit: fcntl" 1032 " F_SETFL O_NONBLOCK"); 1033 return (_B_FALSE); 1034 } 1035 1036 bzero(&testaddr, sizeof (testaddr)); 1037 testaddr.sin_family = AF_INET; 1038 testaddr.sin_port = 0; 1039 IN6_V4MAPPED_TO_INADDR(&pii->pii_probe_logint->li_addr, 1040 &testaddr.sin_addr); 1041 1042 if (bind(pii->pii_probe_sock, (struct sockaddr *)&testaddr, 1043 sizeof (testaddr)) < 0) { 1044 logperror_pii(pii, "phyint_inst_v4_sockinit: IPv4 bind"); 1045 return (_B_FALSE); 1046 } 1047 1048 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_BOUND_IF, 1049 &pii->pii_ifindex, sizeof (uint_t)) < 0) { 1050 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1051 " IP_BOUND_IF"); 1052 return (_B_FALSE); 1053 } 1054 1055 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_IF, 1056 (char *)&testaddr.sin_addr, sizeof (struct in_addr)) < 0) { 1057 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1058 " IP_MULTICAST_IF"); 1059 return (_B_FALSE); 1060 } 1061 1062 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_TTL, 1063 (char *)&ttl, sizeof (ttl)) < 0) { 1064 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1065 " IP_TTL"); 1066 return (_B_FALSE); 1067 } 1068 1069 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_LOOP, 1070 (char *)&char_off, sizeof (char_off)) == -1) { 1071 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1072 " IP_MULTICAST_LOOP"); 1073 return (_B_FALSE); 1074 } 1075 1076 if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_TTL, 1077 (char *)&char_ttl, sizeof (char_ttl)) == -1) { 1078 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1079 " IP_MULTICAST_TTL"); 1080 return (_B_FALSE); 1081 } 1082 1083 if (setsockopt(pii->pii_probe_sock, SOL_SOCKET, SO_TIMESTAMP, &on, 1084 sizeof (on)) < 0) { 1085 logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt" 1086 " SO_TIMESTAMP"); 1087 return (_B_FALSE); 1088 } 1089 1090 return (_B_TRUE); 1091 } 1092 1093 /* 1094 * Remove the phyint group from the list of 'all phyint groups' 1095 * and free it. 1096 */ 1097 void 1098 phyint_group_delete(struct phyint_group *pg) 1099 { 1100 /* 1101 * The anonymous group always exists, even when empty. 1102 */ 1103 if (pg == phyint_anongroup) 1104 return; 1105 1106 if (debug & D_PHYINT) 1107 logdebug("phyint_group_delete('%s')\n", pg->pg_name); 1108 1109 /* 1110 * The phyint group must be empty, and must not have any phyints. 1111 * The phyint group must be in the list of all phyint groups 1112 */ 1113 assert(pg->pg_phyint == NULL); 1114 assert(phyint_groups == pg || pg->pg_prev != NULL); 1115 1116 if (pg->pg_prev != NULL) 1117 pg->pg_prev->pg_next = pg->pg_next; 1118 else 1119 phyint_groups = pg->pg_next; 1120 1121 if (pg->pg_next != NULL) 1122 pg->pg_next->pg_prev = pg->pg_prev; 1123 1124 pg->pg_next = NULL; 1125 pg->pg_prev = NULL; 1126 1127 phyint_grouplistsig++; 1128 (void) phyint_group_change_event(pg, IPMP_GROUP_REMOVE); 1129 1130 addrlist_free(&pg->pg_addrs); 1131 free(pg); 1132 } 1133 1134 /* 1135 * Refresh the state of `pg' based on its current members. 1136 */ 1137 void 1138 phyint_group_refresh_state(struct phyint_group *pg) 1139 { 1140 enum pg_state state; 1141 enum pg_state origstate = pg->pg_state; 1142 struct phyint *pi, *usablepi; 1143 uint_t nif = 0, nusable = 0; 1144 1145 /* 1146 * Anonymous groups never change state. 1147 */ 1148 if (pg == phyint_anongroup) 1149 return; 1150 1151 for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) { 1152 nif++; 1153 if (phyint_is_usable(pi)) { 1154 nusable++; 1155 usablepi = pi; 1156 } 1157 } 1158 1159 if (nusable == 0) 1160 state = PG_FAILED; 1161 else if (nif == nusable) 1162 state = PG_OK; 1163 else 1164 state = PG_DEGRADED; 1165 1166 phyint_group_chstate(pg, state); 1167 1168 /* 1169 * If we're shutting down, skip logging messages since otherwise our 1170 * shutdown housecleaning will make us report that groups are unusable. 1171 */ 1172 if (cleanup_started) 1173 return; 1174 1175 /* 1176 * NOTE: We use pg_failmsg_printed rather than origstate since 1177 * otherwise at startup we'll log a "now usable" message when the 1178 * first usable phyint is added to an empty group. 1179 */ 1180 if (state != PG_FAILED && pg->pg_failmsg_printed) { 1181 assert(origstate == PG_FAILED); 1182 logerr("At least 1 IP interface (%s) in group %s is now " 1183 "usable\n", usablepi->pi_name, pg->pg_name); 1184 pg->pg_failmsg_printed = _B_FALSE; 1185 } else if (origstate != PG_FAILED && state == PG_FAILED) { 1186 logerr("All IP interfaces in group %s are now unusable\n", 1187 pg->pg_name); 1188 pg->pg_failmsg_printed = _B_TRUE; 1189 } 1190 } 1191 1192 /* 1193 * Extract information from the kernel about the desired phyint. 1194 * Look only for properties of the phyint and not properties of logints. 1195 * Take appropriate action on the changes. 1196 * Return codes: 1197 * PI_OK 1198 * The phyint exists in the kernel and matches our knowledge 1199 * of the phyint. 1200 * PI_DELETED 1201 * The phyint has vanished in the kernel. 1202 * PI_IFINDEX_CHANGED 1203 * The phyint's interface index has changed. 1204 * Ask the caller to delete and recreate the phyint. 1205 * PI_IOCTL_ERROR 1206 * Some ioctl error. Don't change anything. 1207 * PI_GROUP_CHANGED 1208 * The phyint has changed group. 1209 */ 1210 int 1211 phyint_inst_update_from_k(struct phyint_instance *pii) 1212 { 1213 struct lifreq lifr; 1214 int ifsock; 1215 struct phyint *pi; 1216 1217 pi = pii->pii_phyint; 1218 1219 if (debug & D_PHYINT) { 1220 logdebug("phyint_inst_update_from_k(%s %s)\n", 1221 AF_STR(pii->pii_af), pi->pi_name); 1222 } 1223 1224 /* 1225 * Get the ifindex from the kernel, for comparison with the 1226 * value in our tables. 1227 */ 1228 (void) strncpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name)); 1229 lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0'; 1230 1231 ifsock = (pii->pii_af == AF_INET) ? ifsock_v4 : ifsock_v6; 1232 if (ioctl(ifsock, SIOCGLIFINDEX, &lifr) < 0) { 1233 if (errno == ENXIO) { 1234 return (PI_DELETED); 1235 } else { 1236 logperror_pii(pii, "phyint_inst_update_from_k:" 1237 " ioctl (get lifindex)"); 1238 return (PI_IOCTL_ERROR); 1239 } 1240 } 1241 1242 if (lifr.lifr_index != pi->pi_ifindex) { 1243 /* 1244 * The index has changed. Most likely the interface has 1245 * been unplumbed and replumbed. Ask the caller to take 1246 * appropriate action. 1247 */ 1248 if (debug & D_PHYINT) { 1249 logdebug("phyint_inst_update_from_k:" 1250 " old index %d new index %d\n", 1251 pi->pi_ifindex, lifr.lifr_index); 1252 } 1253 return (PI_IFINDEX_CHANGED); 1254 } 1255 1256 /* 1257 * Get the group name from the kernel, for comparison with 1258 * the value in our tables. 1259 */ 1260 if (ioctl(ifsock, SIOCGLIFGROUPNAME, &lifr) < 0) { 1261 if (errno == ENXIO) { 1262 return (PI_DELETED); 1263 } else { 1264 logperror_pii(pii, "phyint_inst_update_from_k:" 1265 " ioctl (get groupname)"); 1266 return (PI_IOCTL_ERROR); 1267 } 1268 } 1269 1270 /* 1271 * If the phyint has changed group i.e. if the phyint group name 1272 * returned by the kernel is different, ask the caller to delete 1273 * and recreate the phyint in the right group 1274 */ 1275 if (strcmp(lifr.lifr_groupname, pi->pi_group->pg_name) != 0) { 1276 /* Groupname has changed */ 1277 if (debug & D_PHYINT) { 1278 logdebug("phyint_inst_update_from_k:" 1279 " groupname change\n"); 1280 } 1281 return (PI_GROUP_CHANGED); 1282 } 1283 1284 /* 1285 * Get the current phyint flags from the kernel, and determine what 1286 * flags have changed by comparing against our tables. Note that the 1287 * IFF_INACTIVE processing in initifs() relies on this call to ensure 1288 * that IFF_INACTIVE is really still set on the interface. 1289 */ 1290 if (ioctl(ifsock, SIOCGLIFFLAGS, &lifr) < 0) { 1291 if (errno == ENXIO) { 1292 return (PI_DELETED); 1293 } else { 1294 logperror_pii(pii, "phyint_inst_update_from_k: " 1295 " ioctl (get flags)"); 1296 return (PI_IOCTL_ERROR); 1297 } 1298 } 1299 1300 pi->pi_flags = PHYINT_FLAGS(lifr.lifr_flags); 1301 if (pi->pi_v4 != NULL) 1302 pi->pi_v4->pii_flags = pi->pi_flags; 1303 if (pi->pi_v6 != NULL) 1304 pi->pi_v6->pii_flags = pi->pi_flags; 1305 1306 /* 1307 * Make sure the IFF_FAILED flag is set if and only if we think 1308 * the interface should be failed. 1309 */ 1310 if (pi->pi_flags & IFF_FAILED) { 1311 if (pi->pi_state == PI_RUNNING) 1312 (void) change_pif_flags(pi, 0, IFF_FAILED); 1313 } else { 1314 if (pi->pi_state == PI_FAILED) 1315 (void) change_pif_flags(pi, IFF_FAILED, IFF_INACTIVE); 1316 } 1317 1318 /* No change in phyint status */ 1319 return (PI_OK); 1320 } 1321 1322 /* 1323 * Delete the phyint. Remove it from the list of all phyints, and the 1324 * list of phyint group members. 1325 */ 1326 static void 1327 phyint_delete(struct phyint *pi) 1328 { 1329 boolean_t active; 1330 struct phyint *pi2; 1331 struct phyint_group *pg = pi->pi_group; 1332 1333 if (debug & D_PHYINT) 1334 logdebug("phyint_delete(%s)\n", pi->pi_name); 1335 1336 /* Both IPv4 and IPv6 phyint instances must have been deleted. */ 1337 assert(pi->pi_v4 == NULL && pi->pi_v6 == NULL); 1338 1339 /* 1340 * The phyint must belong to a group. 1341 */ 1342 assert(pg->pg_phyint == pi || pi->pi_pgprev != NULL); 1343 1344 /* The phyint must be in the list of all phyints */ 1345 assert(phyints == pi || pi->pi_prev != NULL); 1346 1347 /* Remove the phyint from the phyint group list */ 1348 pg->pg_sig++; 1349 (void) phyint_group_member_event(pg, pi, IPMP_IF_REMOVE); 1350 1351 if (pi->pi_pgprev == NULL) { 1352 /* Phyint is the 1st in the phyint group list */ 1353 pg->pg_phyint = pi->pi_pgnext; 1354 } else { 1355 pi->pi_pgprev->pi_pgnext = pi->pi_pgnext; 1356 } 1357 if (pi->pi_pgnext != NULL) 1358 pi->pi_pgnext->pi_pgprev = pi->pi_pgprev; 1359 pi->pi_pgnext = NULL; 1360 pi->pi_pgprev = NULL; 1361 1362 /* Refresh the group state now that this phyint has been removed */ 1363 phyint_group_refresh_state(pg); 1364 1365 /* Remove the phyint from the global list of phyints */ 1366 if (pi->pi_prev == NULL) { 1367 /* Phyint is the 1st in the list */ 1368 phyints = pi->pi_next; 1369 } else { 1370 pi->pi_prev->pi_next = pi->pi_next; 1371 } 1372 if (pi->pi_next != NULL) 1373 pi->pi_next->pi_prev = pi->pi_prev; 1374 pi->pi_next = NULL; 1375 pi->pi_prev = NULL; 1376 1377 /* 1378 * See if another phyint in the group had been offlined because 1379 * it was a dup of `pi' -- and if so, online it. 1380 */ 1381 if (!pi->pi_hwaddrdup && 1382 (pi2 = phyint_lookup_hwaddr(pi, _B_FALSE)) != NULL) { 1383 assert(pi2->pi_hwaddrdup); 1384 (void) phyint_undo_offline(pi2); 1385 } 1386 1387 /* 1388 * If the interface was in a named group and was either an active 1389 * standby or the last active interface, try to activate another 1390 * interface to compensate. 1391 */ 1392 if (pg != phyint_anongroup) { 1393 active = _B_FALSE; 1394 for (pi2 = pg->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) { 1395 if (phyint_is_functioning(pi2) && 1396 !(pi2->pi_flags & IFF_INACTIVE)) { 1397 active = _B_TRUE; 1398 break; 1399 } 1400 } 1401 1402 if (!active || 1403 (pi->pi_flags & (IFF_STANDBY|IFF_INACTIVE)) == IFF_STANDBY) 1404 phyint_activate_another(pi); 1405 } 1406 1407 phyint_link_close(pi); 1408 free(pi); 1409 } 1410 1411 /* 1412 * Offline phyint `pi' if at least `minred' usable interfaces remain in the 1413 * group. Returns an IPMP error code. 1414 */ 1415 int 1416 phyint_offline(struct phyint *pi, uint_t minred) 1417 { 1418 boolean_t was_active; 1419 unsigned int nusable = 0; 1420 struct phyint *pi2; 1421 struct phyint_group *pg = pi->pi_group; 1422 1423 /* 1424 * Verify that enough usable interfaces in the group would remain. 1425 * As a special case, if the group has failed, allow any non-offline 1426 * phyints to be offlined. 1427 */ 1428 if (pg != phyint_anongroup) { 1429 for (pi2 = pg->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) { 1430 if (pi2 == pi) 1431 continue; 1432 if (phyint_is_usable(pi2) || 1433 (GROUP_FAILED(pg) && pi2->pi_state != PI_OFFLINE)) 1434 nusable++; 1435 } 1436 } 1437 if (nusable < minred) 1438 return (IPMP_EMINRED); 1439 1440 was_active = ((pi->pi_flags & IFF_INACTIVE) == 0); 1441 1442 if (!change_pif_flags(pi, IFF_OFFLINE, IFF_INACTIVE)) 1443 return (IPMP_FAILURE); 1444 1445 /* 1446 * The interface is now offline, so stop probing it. Note that 1447 * if_mpadm(1M) will down the test addresses, after receiving a 1448 * success reply from us. The routing socket message will then make us 1449 * close the socket used for sending probes. But it is more logical 1450 * that an offlined interface must not be probed, even if it has test 1451 * addresses. 1452 * 1453 * NOTE: stop_probing() also sets PI_OFFLINE. 1454 */ 1455 stop_probing(pi); 1456 1457 /* 1458 * If we're offlining the phyint because it has a duplicate hardware 1459 * address, print a warning -- and leave the link open so that we can 1460 * be notified of hardware address changes that make it usable again. 1461 * Otherwise, close the link so that we won't prevent a detach. 1462 */ 1463 if (pi->pi_hwaddrdup) { 1464 logerr("IP interface %s has a hardware address which is not " 1465 "unique in group %s; offlining\n", pi->pi_name, 1466 pg->pg_name); 1467 } else { 1468 phyint_link_close(pi); 1469 } 1470 1471 /* 1472 * If this phyint was preventing another phyint with a duplicate 1473 * hardware address from being online, bring that one online now. 1474 */ 1475 if (!pi->pi_hwaddrdup && 1476 (pi2 = phyint_lookup_hwaddr(pi, _B_FALSE)) != NULL) { 1477 assert(pi2->pi_hwaddrdup); 1478 (void) phyint_undo_offline(pi2); 1479 } 1480 1481 /* 1482 * If this interface was active, try to activate another INACTIVE 1483 * interface in the group. 1484 */ 1485 if (was_active) 1486 phyint_activate_another(pi); 1487 1488 return (IPMP_SUCCESS); 1489 } 1490 1491 /* 1492 * Undo a previous offline of `pi'. Returns an IPMP error code. 1493 */ 1494 int 1495 phyint_undo_offline(struct phyint *pi) 1496 { 1497 if (pi->pi_state != PI_OFFLINE) { 1498 errno = EINVAL; 1499 return (IPMP_FAILURE); 1500 } 1501 1502 /* 1503 * If necessary, reinitialize our link information and verify that its 1504 * hardware address is still unique across the group. 1505 */ 1506 if (pi->pi_dh == NULL && !phyint_link_init(pi)) { 1507 errno = EIO; 1508 return (IPMP_FAILURE); 1509 } 1510 1511 if (phyint_lookup_hwaddr(pi, _B_TRUE) != NULL) { 1512 pi->pi_hwaddrdup = _B_TRUE; 1513 return (IPMP_EHWADDRDUP); 1514 } 1515 1516 if (pi->pi_hwaddrdup) { 1517 logerr("IP interface %s now has a unique hardware address in " 1518 "group %s; onlining\n", pi->pi_name, pi->pi_group->pg_name); 1519 pi->pi_hwaddrdup = _B_FALSE; 1520 } 1521 1522 if (!change_pif_flags(pi, 0, IFF_OFFLINE)) 1523 return (IPMP_FAILURE); 1524 1525 /* 1526 * While the interface was offline, it may have failed (e.g. the link 1527 * may have gone down). phyint_inst_check_for_failure() will have 1528 * already set pi_flags with IFF_FAILED, so we can use that to decide 1529 * whether the phyint should transition to running. Note that after 1530 * we transition to running, we will start sending probes again (if 1531 * test addresses are configured), which may also reveal that the 1532 * interface is in fact failed. 1533 */ 1534 if (pi->pi_flags & IFF_FAILED) { 1535 phyint_chstate(pi, PI_FAILED); 1536 } else { 1537 /* calls phyint_chstate() */ 1538 phyint_transition_to_running(pi); 1539 } 1540 1541 /* 1542 * Give the requestor time to configure test addresses before 1543 * complaining that they're missing. 1544 */ 1545 pi->pi_taddrthresh = getcurrentsec() + TESTADDR_CONF_TIME; 1546 1547 return (IPMP_SUCCESS); 1548 } 1549 1550 /* 1551 * Delete (unlink and free), the phyint instance. 1552 */ 1553 void 1554 phyint_inst_delete(struct phyint_instance *pii) 1555 { 1556 struct phyint *pi = pii->pii_phyint; 1557 1558 assert(pi != NULL); 1559 1560 if (debug & D_PHYINT) { 1561 logdebug("phyint_inst_delete(%s %s)\n", 1562 AF_STR(pii->pii_af), pi->pi_name); 1563 } 1564 1565 /* 1566 * If the phyint instance has associated probe targets 1567 * delete all the targets 1568 */ 1569 while (pii->pii_targets != NULL) 1570 target_delete(pii->pii_targets); 1571 1572 /* 1573 * Delete all the logints associated with this phyint 1574 * instance. 1575 */ 1576 while (pii->pii_logint != NULL) 1577 logint_delete(pii->pii_logint); 1578 1579 /* 1580 * Close the socket used to send probes to targets from this phyint. 1581 */ 1582 if (pii->pii_probe_sock != -1) 1583 close_probe_socket(pii, _B_TRUE); 1584 1585 /* 1586 * Phyint instance must be in the list of all phyint instances. 1587 * Remove phyint instance from the global list of phyint instances. 1588 */ 1589 assert(phyint_instances == pii || pii->pii_prev != NULL); 1590 if (pii->pii_prev == NULL) { 1591 /* Phyint is the 1st in the list */ 1592 phyint_instances = pii->pii_next; 1593 } else { 1594 pii->pii_prev->pii_next = pii->pii_next; 1595 } 1596 if (pii->pii_next != NULL) 1597 pii->pii_next->pii_prev = pii->pii_prev; 1598 pii->pii_next = NULL; 1599 pii->pii_prev = NULL; 1600 1601 /* 1602 * Reset the phyint instance pointer in the phyint. 1603 * If this is the last phyint instance (being deleted) on this 1604 * phyint, then delete the phyint. 1605 */ 1606 if (pii->pii_af == AF_INET) 1607 pi->pi_v4 = NULL; 1608 else 1609 pi->pi_v6 = NULL; 1610 1611 if (pi->pi_v4 == NULL && pi->pi_v6 == NULL) 1612 phyint_delete(pi); 1613 1614 free(pii); 1615 } 1616 1617 static void 1618 phyint_inst_print(struct phyint_instance *pii) 1619 { 1620 struct logint *li; 1621 struct target *tg; 1622 char abuf[INET6_ADDRSTRLEN]; 1623 int most_recent; 1624 int i; 1625 1626 if (pii->pii_phyint == NULL) { 1627 logdebug("pii->pi_phyint NULL can't print\n"); 1628 return; 1629 } 1630 1631 logdebug("\nPhyint instance: %s %s index %u state %x flags %llx " 1632 "sock %x in_use %d\n", 1633 AF_STR(pii->pii_af), pii->pii_name, pii->pii_ifindex, 1634 pii->pii_state, pii->pii_phyint->pi_flags, pii->pii_probe_sock, 1635 pii->pii_in_use); 1636 1637 for (li = pii->pii_logint; li != NULL; li = li->li_next) 1638 logint_print(li); 1639 1640 logdebug("\n"); 1641 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) 1642 target_print(tg); 1643 1644 if (pii->pii_targets == NULL) 1645 logdebug("pi_targets NULL\n"); 1646 1647 if (pii->pii_target_next != NULL) { 1648 logdebug("pi_target_next %s %s\n", AF_STR(pii->pii_af), 1649 pr_addr(pii->pii_af, pii->pii_target_next->tg_address, 1650 abuf, sizeof (abuf))); 1651 } else { 1652 logdebug("pi_target_next NULL\n"); 1653 } 1654 1655 if (pii->pii_rtt_target_next != NULL) { 1656 logdebug("pi_rtt_target_next %s %s\n", AF_STR(pii->pii_af), 1657 pr_addr(pii->pii_af, pii->pii_rtt_target_next->tg_address, 1658 abuf, sizeof (abuf))); 1659 } else { 1660 logdebug("pi_rtt_target_next NULL\n"); 1661 } 1662 1663 if (pii->pii_targets != NULL) { 1664 most_recent = PROBE_INDEX_PREV(pii->pii_probe_next); 1665 1666 i = most_recent; 1667 do { 1668 if (pii->pii_probes[i].pr_target != NULL) { 1669 logdebug("#%d target %s ", i, 1670 pr_addr(pii->pii_af, 1671 pii->pii_probes[i].pr_target->tg_address, 1672 abuf, sizeof (abuf))); 1673 } else { 1674 logdebug("#%d target NULL ", i); 1675 } 1676 logdebug("time_start %lld status %d " 1677 "time_ackproc %lld time_lost %u", 1678 pii->pii_probes[i].pr_hrtime_start, 1679 pii->pii_probes[i].pr_status, 1680 pii->pii_probes[i].pr_hrtime_ackproc, 1681 pii->pii_probes[i].pr_time_lost); 1682 i = PROBE_INDEX_PREV(i); 1683 } while (i != most_recent); 1684 } 1685 } 1686 1687 /* 1688 * Lookup a logint based on the logical interface name, on the given 1689 * phyint instance. 1690 */ 1691 static struct logint * 1692 logint_lookup(struct phyint_instance *pii, char *name) 1693 { 1694 struct logint *li; 1695 1696 if (debug & D_LOGINT) { 1697 logdebug("logint_lookup(%s, %s)\n", 1698 AF_STR(pii->pii_af), name); 1699 } 1700 1701 for (li = pii->pii_logint; li != NULL; li = li->li_next) { 1702 if (strncmp(name, li->li_name, sizeof (li->li_name)) == 0) 1703 break; 1704 } 1705 return (li); 1706 } 1707 1708 /* 1709 * Insert a logint at the head of the list of logints of the given 1710 * phyint instance 1711 */ 1712 static void 1713 logint_insert(struct phyint_instance *pii, struct logint *li) 1714 { 1715 li->li_next = pii->pii_logint; 1716 li->li_prev = NULL; 1717 if (pii->pii_logint != NULL) 1718 pii->pii_logint->li_prev = li; 1719 pii->pii_logint = li; 1720 li->li_phyint_inst = pii; 1721 } 1722 1723 /* 1724 * Create a new named logint, on the specified phyint instance. 1725 */ 1726 static struct logint * 1727 logint_create(struct phyint_instance *pii, char *name) 1728 { 1729 struct logint *li; 1730 1731 if (debug & D_LOGINT) { 1732 logdebug("logint_create(%s %s %s)\n", 1733 AF_STR(pii->pii_af), pii->pii_name, name); 1734 } 1735 1736 li = calloc(1, sizeof (struct logint)); 1737 if (li == NULL) { 1738 logperror("logint_create: calloc"); 1739 return (NULL); 1740 } 1741 1742 (void) strncpy(li->li_name, name, sizeof (li->li_name)); 1743 li->li_name[sizeof (li->li_name) - 1] = '\0'; 1744 logint_insert(pii, li); 1745 return (li); 1746 } 1747 1748 /* 1749 * Initialize the logint based on the data returned by the kernel. 1750 */ 1751 void 1752 logint_init_from_k(struct phyint_instance *pii, char *li_name) 1753 { 1754 int ifsock; 1755 uint64_t flags; 1756 uint64_t saved_flags; 1757 struct logint *li; 1758 struct lifreq lifr; 1759 struct in6_addr test_subnet; 1760 struct in6_addr testaddr; 1761 int test_subnet_len; 1762 struct sockaddr_in6 *sin6; 1763 struct sockaddr_in *sin; 1764 char abuf[INET6_ADDRSTRLEN]; 1765 boolean_t ptp = _B_FALSE; 1766 struct in6_addr tgaddr; 1767 1768 if (debug & D_LOGINT) { 1769 logdebug("logint_init_from_k(%s %s)\n", 1770 AF_STR(pii->pii_af), li_name); 1771 } 1772 1773 /* Get the socket for doing ioctls */ 1774 ifsock = (pii->pii_af == AF_INET) ? ifsock_v4 : ifsock_v6; 1775 1776 /* 1777 * Get the flags from the kernel. Also serves as a check whether 1778 * the logical still exists. If it doesn't exist, no need to proceed 1779 * any further. li_in_use will make the caller clean up the logint 1780 */ 1781 (void) strncpy(lifr.lifr_name, li_name, sizeof (lifr.lifr_name)); 1782 lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0'; 1783 if (ioctl(ifsock, SIOCGLIFFLAGS, (char *)&lifr) < 0) { 1784 /* Interface may have vanished */ 1785 if (errno != ENXIO) { 1786 logperror_pii(pii, "logint_init_from_k: " 1787 "ioctl (get flags)"); 1788 } 1789 return; 1790 } 1791 1792 flags = lifr.lifr_flags; 1793 1794 /* 1795 * Verified the logint exists. Now lookup the logint in our tables. 1796 * If it does not exist, create a new logint. 1797 */ 1798 li = logint_lookup(pii, li_name); 1799 if (li == NULL) { 1800 li = logint_create(pii, li_name); 1801 if (li == NULL) { 1802 /* 1803 * Pretend the interface does not exist 1804 * in the kernel 1805 */ 1806 return; 1807 } 1808 } 1809 1810 /* 1811 * Update li->li_flags with the new flags, after saving the old 1812 * value. This is used later to check what flags has changed and 1813 * take any action 1814 */ 1815 saved_flags = li->li_flags; 1816 li->li_flags = flags; 1817 1818 /* 1819 * Get the address, prefix, prefixlength and update the logint. 1820 * Check if anything has changed. If the logint used for the 1821 * test address has changed, take suitable action. 1822 */ 1823 if (ioctl(ifsock, SIOCGLIFADDR, (char *)&lifr) < 0) { 1824 /* Interface may have vanished */ 1825 if (errno != ENXIO) { 1826 logperror_li(li, "logint_init_from_k: (get addr)"); 1827 } 1828 goto error; 1829 } 1830 1831 if (pii->pii_af == AF_INET) { 1832 sin = (struct sockaddr_in *)&lifr.lifr_addr; 1833 IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &testaddr); 1834 } else { 1835 sin6 = (struct sockaddr_in6 *)&lifr.lifr_addr; 1836 testaddr = sin6->sin6_addr; 1837 } 1838 1839 if (ioctl(ifsock, SIOCGLIFSUBNET, (char *)&lifr) < 0) { 1840 /* Interface may have vanished */ 1841 if (errno != ENXIO) 1842 logperror_li(li, "logint_init_from_k: (get subnet)"); 1843 goto error; 1844 } 1845 if (lifr.lifr_subnet.ss_family == AF_INET6) { 1846 sin6 = (struct sockaddr_in6 *)&lifr.lifr_subnet; 1847 test_subnet = sin6->sin6_addr; 1848 test_subnet_len = lifr.lifr_addrlen; 1849 } else { 1850 sin = (struct sockaddr_in *)&lifr.lifr_subnet; 1851 IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &test_subnet); 1852 test_subnet_len = lifr.lifr_addrlen + (IPV6_ABITS - IP_ABITS); 1853 } 1854 1855 /* 1856 * If this is the logint corresponding to the test address used for 1857 * sending probes, then if anything significant has changed we need to 1858 * determine the test address again. We ignore changes to the 1859 * IFF_FAILED and IFF_RUNNING flags since those happen as a matter of 1860 * course. 1861 */ 1862 if (pii->pii_probe_logint == li) { 1863 if (((li->li_flags ^ saved_flags) & 1864 ~(IFF_FAILED | IFF_RUNNING)) != 0 || 1865 !IN6_ARE_ADDR_EQUAL(&testaddr, &li->li_addr) || 1866 (!ptp && !IN6_ARE_ADDR_EQUAL(&test_subnet, 1867 &li->li_subnet)) || 1868 (!ptp && test_subnet_len != li->li_subnet_len) || 1869 (ptp && !IN6_ARE_ADDR_EQUAL(&tgaddr, &li->li_dstaddr))) { 1870 /* 1871 * Something significant that affects the testaddress 1872 * has changed. Redo the testaddress selection later on 1873 * in select_test_ifs(). For now do the cleanup and 1874 * set pii_probe_logint to NULL. 1875 */ 1876 if (pii->pii_probe_sock != -1) 1877 close_probe_socket(pii, _B_TRUE); 1878 pii->pii_probe_logint = NULL; 1879 } 1880 } 1881 1882 1883 /* Update the logint with the values obtained from the kernel. */ 1884 li->li_addr = testaddr; 1885 li->li_in_use = 1; 1886 if (ptp) { 1887 li->li_dstaddr = tgaddr; 1888 li->li_subnet_len = (pii->pii_af == AF_INET) ? 1889 IP_ABITS : IPV6_ABITS; 1890 } else { 1891 li->li_subnet = test_subnet; 1892 li->li_subnet_len = test_subnet_len; 1893 } 1894 1895 if (debug & D_LOGINT) 1896 logint_print(li); 1897 1898 return; 1899 1900 error: 1901 logerr("logint_init_from_k: IGNORED %s %s %s addr %s\n", 1902 AF_STR(pii->pii_af), pii->pii_name, li->li_name, 1903 pr_addr(pii->pii_af, testaddr, abuf, sizeof (abuf))); 1904 logint_delete(li); 1905 } 1906 1907 /* 1908 * Delete (unlink and free) a logint. 1909 */ 1910 void 1911 logint_delete(struct logint *li) 1912 { 1913 struct phyint_instance *pii; 1914 1915 pii = li->li_phyint_inst; 1916 assert(pii != NULL); 1917 1918 if (debug & D_LOGINT) { 1919 int af; 1920 char abuf[INET6_ADDRSTRLEN]; 1921 1922 af = pii->pii_af; 1923 logdebug("logint_delete(%s %s %s/%u)\n", 1924 AF_STR(af), li->li_name, 1925 pr_addr(af, li->li_addr, abuf, sizeof (abuf)), 1926 li->li_subnet_len); 1927 } 1928 1929 /* logint must be in the list of logints */ 1930 assert(pii->pii_logint == li || li->li_prev != NULL); 1931 1932 /* Remove the logint from the list of logints */ 1933 if (li->li_prev == NULL) { 1934 /* logint is the 1st in the list */ 1935 pii->pii_logint = li->li_next; 1936 } else { 1937 li->li_prev->li_next = li->li_next; 1938 } 1939 if (li->li_next != NULL) 1940 li->li_next->li_prev = li->li_prev; 1941 li->li_next = NULL; 1942 li->li_prev = NULL; 1943 1944 /* 1945 * If this logint is also being used for probing, then close the 1946 * associated socket, if it exists. 1947 */ 1948 if (pii->pii_probe_logint == li) { 1949 if (pii->pii_probe_sock != -1) 1950 close_probe_socket(pii, _B_TRUE); 1951 pii->pii_probe_logint = NULL; 1952 } 1953 1954 free(li); 1955 } 1956 1957 static void 1958 logint_print(struct logint *li) 1959 { 1960 char abuf[INET6_ADDRSTRLEN]; 1961 int af = li->li_phyint_inst->pii_af; 1962 1963 logdebug("logint: %s %s addr %s/%u", AF_STR(af), li->li_name, 1964 pr_addr(af, li->li_addr, abuf, sizeof (abuf)), li->li_subnet_len); 1965 1966 logdebug("\tFlags: %llx in_use %d\n", li->li_flags, li->li_in_use); 1967 } 1968 1969 char * 1970 pr_addr(int af, struct in6_addr addr, char *abuf, int len) 1971 { 1972 struct in_addr addr_v4; 1973 1974 if (af == AF_INET) { 1975 IN6_V4MAPPED_TO_INADDR(&addr, &addr_v4); 1976 (void) inet_ntop(AF_INET, (void *)&addr_v4, abuf, len); 1977 } else { 1978 (void) inet_ntop(AF_INET6, (void *)&addr, abuf, len); 1979 } 1980 return (abuf); 1981 } 1982 1983 /* 1984 * Fill in the sockaddr_storage pointed to by `ssp' with the IP address 1985 * represented by the [`af',`addr'] pair. Needed because in.mpathd internally 1986 * stores all addresses as in6_addrs, but we don't want to expose that. 1987 */ 1988 void 1989 addr2storage(int af, const struct in6_addr *addr, struct sockaddr_storage *ssp) 1990 { 1991 struct sockaddr_in *sinp = (struct sockaddr_in *)ssp; 1992 struct sockaddr_in6 *sin6p = (struct sockaddr_in6 *)ssp; 1993 1994 assert(af == AF_INET || af == AF_INET6); 1995 1996 switch (af) { 1997 case AF_INET: 1998 (void) memset(sinp, 0, sizeof (*sinp)); 1999 sinp->sin_family = AF_INET; 2000 IN6_V4MAPPED_TO_INADDR(addr, &sinp->sin_addr); 2001 break; 2002 case AF_INET6: 2003 (void) memset(sin6p, 0, sizeof (*sin6p)); 2004 sin6p->sin6_family = AF_INET6; 2005 sin6p->sin6_addr = *addr; 2006 break; 2007 } 2008 } 2009 2010 /* Lookup target on its address */ 2011 struct target * 2012 target_lookup(struct phyint_instance *pii, struct in6_addr addr) 2013 { 2014 struct target *tg; 2015 2016 if (debug & D_TARGET) { 2017 char abuf[INET6_ADDRSTRLEN]; 2018 2019 logdebug("target_lookup(%s %s): addr %s\n", 2020 AF_STR(pii->pii_af), pii->pii_name, 2021 pr_addr(pii->pii_af, addr, abuf, sizeof (abuf))); 2022 } 2023 2024 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) { 2025 if (IN6_ARE_ADDR_EQUAL(&tg->tg_address, &addr)) 2026 break; 2027 } 2028 return (tg); 2029 } 2030 2031 /* 2032 * Find and return the next active target, for the next probe. 2033 * If no active targets are available, return NULL. 2034 */ 2035 struct target * 2036 target_next(struct target *tg) 2037 { 2038 struct phyint_instance *pii = tg->tg_phyint_inst; 2039 struct target *marker = tg; 2040 hrtime_t now; 2041 2042 now = gethrtime(); 2043 2044 /* 2045 * Target must be in the list of targets for this phyint 2046 * instance. 2047 */ 2048 assert(pii->pii_targets == tg || tg->tg_prev != NULL); 2049 assert(pii->pii_targets != NULL); 2050 2051 /* Return the next active target */ 2052 do { 2053 /* 2054 * Go to the next target. If we hit the end, 2055 * reset the ptr to the head 2056 */ 2057 tg = tg->tg_next; 2058 if (tg == NULL) 2059 tg = pii->pii_targets; 2060 2061 assert(TG_STATUS_VALID(tg->tg_status)); 2062 2063 switch (tg->tg_status) { 2064 case TG_ACTIVE: 2065 return (tg); 2066 2067 case TG_UNUSED: 2068 assert(pii->pii_targets_are_routers); 2069 if (pii->pii_ntargets < MAX_PROBE_TARGETS) { 2070 /* 2071 * Bubble up the unused target to active 2072 */ 2073 tg->tg_status = TG_ACTIVE; 2074 pii->pii_ntargets++; 2075 return (tg); 2076 } 2077 break; 2078 2079 case TG_SLOW: 2080 assert(pii->pii_targets_are_routers); 2081 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2082 /* 2083 * Bubble up the slow target to unused 2084 */ 2085 tg->tg_status = TG_UNUSED; 2086 } 2087 break; 2088 2089 case TG_DEAD: 2090 assert(pii->pii_targets_are_routers); 2091 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2092 /* 2093 * Bubble up the dead target to slow 2094 */ 2095 tg->tg_status = TG_SLOW; 2096 tg->tg_latime = now; 2097 } 2098 break; 2099 } 2100 2101 } while (tg != marker); 2102 2103 return (NULL); 2104 } 2105 2106 /* 2107 * Select the best available target, that is not already TG_ACTIVE, 2108 * for the caller. The caller will determine whether it wants to 2109 * make the returned target TG_ACTIVE. 2110 * The selection order is as follows. 2111 * 1. pick a TG_UNSED target, if it exists. 2112 * 2. else pick a TG_SLOW target that has recovered, if it exists 2113 * 3. else pick any TG_SLOW target, if it exists 2114 * 4. else pick a TG_DEAD target that has recovered, if it exists 2115 * 5. else pick any TG_DEAD target, if it exists 2116 * 6. else return null 2117 */ 2118 static struct target * 2119 target_select_best(struct phyint_instance *pii) 2120 { 2121 struct target *tg; 2122 struct target *slow = NULL; 2123 struct target *dead = NULL; 2124 struct target *slow_recovered = NULL; 2125 struct target *dead_recovered = NULL; 2126 hrtime_t now; 2127 2128 now = gethrtime(); 2129 2130 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) { 2131 assert(TG_STATUS_VALID(tg->tg_status)); 2132 2133 switch (tg->tg_status) { 2134 case TG_UNUSED: 2135 return (tg); 2136 2137 case TG_SLOW: 2138 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2139 slow_recovered = tg; 2140 /* 2141 * Promote the slow_recovered to unused 2142 */ 2143 tg->tg_status = TG_UNUSED; 2144 } else { 2145 slow = tg; 2146 } 2147 break; 2148 2149 case TG_DEAD: 2150 if (tg->tg_latime + MIN_RECOVERY_TIME < now) { 2151 dead_recovered = tg; 2152 /* 2153 * Promote the dead_recovered to slow 2154 */ 2155 tg->tg_status = TG_SLOW; 2156 tg->tg_latime = now; 2157 } else { 2158 dead = tg; 2159 } 2160 break; 2161 2162 default: 2163 break; 2164 } 2165 } 2166 2167 if (slow_recovered != NULL) 2168 return (slow_recovered); 2169 else if (slow != NULL) 2170 return (slow); 2171 else if (dead_recovered != NULL) 2172 return (dead_recovered); 2173 else 2174 return (dead); 2175 } 2176 2177 /* 2178 * Some target was deleted. If we don't have even MIN_PROBE_TARGETS 2179 * that are active, pick the next best below. 2180 */ 2181 static void 2182 target_activate_all(struct phyint_instance *pii) 2183 { 2184 struct target *tg; 2185 2186 assert(pii->pii_ntargets == 0); 2187 assert(pii->pii_target_next == NULL); 2188 assert(pii->pii_rtt_target_next == NULL); 2189 assert(pii->pii_targets_are_routers); 2190 2191 while (pii->pii_ntargets < MIN_PROBE_TARGETS) { 2192 tg = target_select_best(pii); 2193 if (tg == NULL) { 2194 /* We are out of targets */ 2195 return; 2196 } 2197 2198 assert(TG_STATUS_VALID(tg->tg_status)); 2199 assert(tg->tg_status != TG_ACTIVE); 2200 tg->tg_status = TG_ACTIVE; 2201 pii->pii_ntargets++; 2202 if (pii->pii_target_next == NULL) { 2203 pii->pii_target_next = tg; 2204 pii->pii_rtt_target_next = tg; 2205 } 2206 } 2207 } 2208 2209 static struct target * 2210 target_first(struct phyint_instance *pii) 2211 { 2212 struct target *tg; 2213 2214 for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) { 2215 assert(TG_STATUS_VALID(tg->tg_status)); 2216 if (tg->tg_status == TG_ACTIVE) 2217 break; 2218 } 2219 2220 return (tg); 2221 } 2222 2223 /* 2224 * Create a default target entry. 2225 */ 2226 void 2227 target_create(struct phyint_instance *pii, struct in6_addr addr, 2228 boolean_t is_router) 2229 { 2230 struct target *tg; 2231 struct phyint *pi; 2232 struct logint *li; 2233 2234 if (debug & D_TARGET) { 2235 char abuf[INET6_ADDRSTRLEN]; 2236 2237 logdebug("target_create(%s %s, %s)\n", 2238 AF_STR(pii->pii_af), pii->pii_name, 2239 pr_addr(pii->pii_af, addr, abuf, sizeof (abuf))); 2240 } 2241 2242 /* 2243 * If the test address is not yet initialized, do not add 2244 * any target, since we cannot determine whether the target 2245 * belongs to the same subnet as the test address. 2246 */ 2247 li = pii->pii_probe_logint; 2248 if (li == NULL) 2249 return; 2250 2251 /* 2252 * If there are multiple subnets associated with an interface, then 2253 * add the target to this phyint instance only if it belongs to the 2254 * same subnet as the test address. This assures us that we will 2255 * be able to reach this target through our routing table. 2256 */ 2257 if (!prefix_equal(li->li_subnet, addr, li->li_subnet_len)) 2258 return; 2259 2260 if (pii->pii_targets != NULL) { 2261 assert(pii->pii_ntargets <= MAX_PROBE_TARGETS); 2262 if (is_router) { 2263 if (!pii->pii_targets_are_routers) { 2264 /* 2265 * Prefer router over hosts. Using hosts is a 2266 * fallback mechanism, hence delete all host 2267 * targets. 2268 */ 2269 while (pii->pii_targets != NULL) 2270 target_delete(pii->pii_targets); 2271 } 2272 } else { 2273 /* 2274 * Routers take precedence over hosts. If this 2275 * is a router list and we are trying to add a 2276 * host, just return. If this is a host list 2277 * and if we have sufficient targets, just return 2278 */ 2279 if (pii->pii_targets_are_routers || 2280 pii->pii_ntargets == MAX_PROBE_TARGETS) 2281 return; 2282 } 2283 } 2284 2285 tg = calloc(1, sizeof (struct target)); 2286 if (tg == NULL) { 2287 logperror("target_create: calloc"); 2288 return; 2289 } 2290 2291 tg->tg_phyint_inst = pii; 2292 tg->tg_address = addr; 2293 tg->tg_in_use = 1; 2294 tg->tg_rtt_sa = -1; 2295 tg->tg_num_deferred = 0; 2296 2297 /* 2298 * If this is the first target, set 'pii_targets_are_routers' 2299 * The list of targets is either a list of hosts or list or 2300 * routers, but not a mix. 2301 */ 2302 if (pii->pii_targets == NULL) { 2303 assert(pii->pii_ntargets == 0); 2304 assert(pii->pii_target_next == NULL); 2305 assert(pii->pii_rtt_target_next == NULL); 2306 pii->pii_targets_are_routers = is_router ? 1 : 0; 2307 } 2308 2309 if (pii->pii_ntargets == MAX_PROBE_TARGETS) { 2310 assert(pii->pii_targets_are_routers); 2311 assert(pii->pii_target_next != NULL); 2312 assert(pii->pii_rtt_target_next != NULL); 2313 tg->tg_status = TG_UNUSED; 2314 } else { 2315 if (pii->pii_ntargets == 0) { 2316 assert(pii->pii_target_next == NULL); 2317 pii->pii_target_next = tg; 2318 pii->pii_rtt_target_next = tg; 2319 } 2320 pii->pii_ntargets++; 2321 tg->tg_status = TG_ACTIVE; 2322 } 2323 2324 target_insert(pii, tg); 2325 2326 /* 2327 * Change state to PI_RUNNING if this phyint instance is capable of 2328 * sending and receiving probes -- that is, if we know of at least 1 2329 * target, and this phyint instance is probe-capable. For more 2330 * details, see the phyint state diagram in mpd_probe.c. 2331 */ 2332 pi = pii->pii_phyint; 2333 if (pi->pi_state == PI_NOTARGETS && PROBE_CAPABLE(pii)) { 2334 if (pi->pi_flags & IFF_FAILED) 2335 phyint_chstate(pi, PI_FAILED); 2336 else 2337 phyint_chstate(pi, PI_RUNNING); 2338 } 2339 } 2340 2341 /* 2342 * Add the target address named by `addr' to phyint instance `pii' if it does 2343 * not already exist. If the target is a router, `is_router' should be set to 2344 * B_TRUE. 2345 */ 2346 void 2347 target_add(struct phyint_instance *pii, struct in6_addr addr, 2348 boolean_t is_router) 2349 { 2350 struct target *tg; 2351 2352 if (pii == NULL) 2353 return; 2354 2355 tg = target_lookup(pii, addr); 2356 2357 /* 2358 * If the target does not exist, create it; target_create() will set 2359 * tg_in_use to true. Even if it exists already, if it's a router 2360 * target and we'd previously learned of it through multicast, then we 2361 * need to recreate it as a router target. Otherwise, just set 2362 * tg_in_use to to true so that init_router_targets() won't delete it. 2363 */ 2364 if (tg == NULL || (is_router && !pii->pii_targets_are_routers)) 2365 target_create(pii, addr, is_router); 2366 else if (is_router) 2367 tg->tg_in_use = 1; 2368 } 2369 2370 /* 2371 * Insert target at head of linked list of targets for the associated 2372 * phyint instance 2373 */ 2374 static void 2375 target_insert(struct phyint_instance *pii, struct target *tg) 2376 { 2377 tg->tg_next = pii->pii_targets; 2378 tg->tg_prev = NULL; 2379 if (tg->tg_next != NULL) 2380 tg->tg_next->tg_prev = tg; 2381 pii->pii_targets = tg; 2382 } 2383 2384 /* 2385 * Delete a target (unlink and free). 2386 */ 2387 void 2388 target_delete(struct target *tg) 2389 { 2390 int af; 2391 struct phyint_instance *pii; 2392 struct phyint_instance *pii_other; 2393 2394 pii = tg->tg_phyint_inst; 2395 af = pii->pii_af; 2396 2397 if (debug & D_TARGET) { 2398 char abuf[INET6_ADDRSTRLEN]; 2399 2400 logdebug("target_delete(%s %s, %s)\n", 2401 AF_STR(af), pii->pii_name, 2402 pr_addr(af, tg->tg_address, abuf, sizeof (abuf))); 2403 } 2404 2405 /* 2406 * Target must be in the list of targets for this phyint 2407 * instance. 2408 */ 2409 assert(pii->pii_targets == tg || tg->tg_prev != NULL); 2410 2411 /* 2412 * Reset all references to 'tg' in the probe information 2413 * for this phyint. 2414 */ 2415 reset_pii_probes(pii, tg); 2416 2417 /* 2418 * Remove this target from the list of targets of this 2419 * phyint instance. 2420 */ 2421 if (tg->tg_prev == NULL) { 2422 pii->pii_targets = tg->tg_next; 2423 } else { 2424 tg->tg_prev->tg_next = tg->tg_next; 2425 } 2426 2427 if (tg->tg_next != NULL) 2428 tg->tg_next->tg_prev = tg->tg_prev; 2429 2430 tg->tg_next = NULL; 2431 tg->tg_prev = NULL; 2432 2433 if (tg->tg_status == TG_ACTIVE) 2434 pii->pii_ntargets--; 2435 2436 /* 2437 * Adjust the next target to probe, if it points to 2438 * to the currently deleted target. 2439 */ 2440 if (pii->pii_target_next == tg) 2441 pii->pii_target_next = target_first(pii); 2442 2443 if (pii->pii_rtt_target_next == tg) 2444 pii->pii_rtt_target_next = target_first(pii); 2445 2446 free(tg); 2447 2448 /* 2449 * The number of active targets pii_ntargets == 0 iff 2450 * the next active target pii->pii_target_next == NULL 2451 */ 2452 if (pii->pii_ntargets != 0) { 2453 assert(pii->pii_target_next != NULL); 2454 assert(pii->pii_rtt_target_next != NULL); 2455 assert(pii->pii_target_next->tg_status == TG_ACTIVE); 2456 assert(pii->pii_rtt_target_next->tg_status == TG_ACTIVE); 2457 return; 2458 } 2459 2460 /* At this point, we don't have any active targets. */ 2461 assert(pii->pii_target_next == NULL); 2462 assert(pii->pii_rtt_target_next == NULL); 2463 2464 if (pii->pii_targets_are_routers) { 2465 /* 2466 * Activate any TG_SLOW or TG_DEAD router targets, 2467 * since we don't have any other targets 2468 */ 2469 target_activate_all(pii); 2470 2471 if (pii->pii_ntargets != 0) { 2472 assert(pii->pii_target_next != NULL); 2473 assert(pii->pii_rtt_target_next != NULL); 2474 assert(pii->pii_target_next->tg_status == TG_ACTIVE); 2475 assert(pii->pii_rtt_target_next->tg_status == 2476 TG_ACTIVE); 2477 return; 2478 } 2479 } 2480 2481 /* 2482 * If we still don't have any active targets, the list must 2483 * must be really empty. There aren't even TG_SLOW or TG_DEAD 2484 * targets. Zero out the probe stats since it will not be 2485 * relevant any longer. 2486 */ 2487 assert(pii->pii_targets == NULL); 2488 pii->pii_targets_are_routers = _B_FALSE; 2489 clear_pii_probe_stats(pii); 2490 pii_other = phyint_inst_other(pii); 2491 2492 /* 2493 * If there are no targets on both instances and the interface would 2494 * otherwise be considered PI_RUNNING, go back to PI_NOTARGETS state, 2495 * since we cannot probe this phyint any more. For more details, 2496 * please see phyint state diagram in mpd_probe.c. 2497 */ 2498 if (!PROBE_CAPABLE(pii_other) && LINK_UP(pii->pii_phyint) && 2499 pii->pii_phyint->pi_state != PI_OFFLINE) 2500 phyint_chstate(pii->pii_phyint, PI_NOTARGETS); 2501 } 2502 2503 /* 2504 * Flush the target list of every phyint in the group, if the list 2505 * is a host target list. This is called if group failure is suspected. 2506 * If all targets have failed, multicast will subsequently discover new 2507 * targets. Else it is a group failure. 2508 * Note: This function is a no-op if the list is a router target list. 2509 */ 2510 static void 2511 target_flush_hosts(struct phyint_group *pg) 2512 { 2513 struct phyint *pi; 2514 struct phyint_instance *pii; 2515 2516 if (debug & D_TARGET) 2517 logdebug("target_flush_hosts(%s)\n", pg->pg_name); 2518 2519 for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) { 2520 pii = pi->pi_v4; 2521 if (pii != NULL && !pii->pii_targets_are_routers) { 2522 /* 2523 * Delete all the targets. When the list becomes 2524 * empty, target_delete() will set pii->pii_targets 2525 * to NULL. 2526 */ 2527 while (pii->pii_targets != NULL) 2528 target_delete(pii->pii_targets); 2529 } 2530 pii = pi->pi_v6; 2531 if (pii != NULL && !pii->pii_targets_are_routers) { 2532 /* 2533 * Delete all the targets. When the list becomes 2534 * empty, target_delete() will set pii->pii_targets 2535 * to NULL. 2536 */ 2537 while (pii->pii_targets != NULL) 2538 target_delete(pii->pii_targets); 2539 } 2540 } 2541 } 2542 2543 /* 2544 * Reset all references to 'target' in the probe info, as this target is 2545 * being deleted. The pr_target field is guaranteed to be non-null if 2546 * pr_status is PR_UNACKED. So we change the pr_status to PR_LOST, so that 2547 * pr_target will not be accessed unconditionally. 2548 */ 2549 static void 2550 reset_pii_probes(struct phyint_instance *pii, struct target *tg) 2551 { 2552 int i; 2553 2554 for (i = 0; i < PROBE_STATS_COUNT; i++) { 2555 if (pii->pii_probes[i].pr_target == tg) { 2556 if (pii->pii_probes[i].pr_status == PR_UNACKED) { 2557 probe_chstate(&pii->pii_probes[i], pii, 2558 PR_LOST); 2559 } 2560 pii->pii_probes[i].pr_target = NULL; 2561 } 2562 } 2563 } 2564 2565 /* 2566 * Clear the probe statistics array. 2567 */ 2568 void 2569 clear_pii_probe_stats(struct phyint_instance *pii) 2570 { 2571 bzero(pii->pii_probes, sizeof (struct probe_stats) * PROBE_STATS_COUNT); 2572 /* Reset the next probe index in the probe stats array */ 2573 pii->pii_probe_next = 0; 2574 } 2575 2576 static void 2577 target_print(struct target *tg) 2578 { 2579 char abuf[INET6_ADDRSTRLEN]; 2580 char buf[128]; 2581 char buf2[128]; 2582 int af; 2583 int i; 2584 2585 af = tg->tg_phyint_inst->pii_af; 2586 2587 logdebug("Target on %s %s addr %s\n" 2588 "status %d rtt_sa %lld rtt_sd %lld crtt %d tg_in_use %d\n", 2589 AF_STR(af), tg->tg_phyint_inst->pii_name, 2590 pr_addr(af, tg->tg_address, abuf, sizeof (abuf)), 2591 tg->tg_status, tg->tg_rtt_sa, tg->tg_rtt_sd, 2592 tg->tg_crtt, tg->tg_in_use); 2593 2594 buf[0] = '\0'; 2595 for (i = 0; i < tg->tg_num_deferred; i++) { 2596 (void) snprintf(buf2, sizeof (buf2), " %dms", 2597 tg->tg_deferred[i]); 2598 (void) strlcat(buf, buf2, sizeof (buf)); 2599 } 2600 logdebug("deferred rtts:%s\n", buf); 2601 } 2602 2603 void 2604 phyint_inst_print_all(void) 2605 { 2606 struct phyint_instance *pii; 2607 2608 for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) { 2609 phyint_inst_print(pii); 2610 } 2611 } 2612 2613 /* 2614 * Compare two prefixes that have the same prefix length. 2615 * Fails if the prefix length is unreasonable. 2616 */ 2617 boolean_t 2618 prefix_equal(struct in6_addr p1, struct in6_addr p2, uint_t prefix_len) 2619 { 2620 uchar_t mask; 2621 int j; 2622 2623 if (prefix_len > IPV6_ABITS) 2624 return (_B_FALSE); 2625 2626 for (j = 0; prefix_len > 8; prefix_len -= 8, j++) 2627 if (p1.s6_addr[j] != p2.s6_addr[j]) 2628 return (_B_FALSE); 2629 2630 /* Make the N leftmost bits one */ 2631 mask = 0xff << (8 - prefix_len); 2632 if ((p1.s6_addr[j] & mask) != (p2.s6_addr[j] & mask)) 2633 return (_B_FALSE); 2634 2635 return (_B_TRUE); 2636 } 2637 2638 /* 2639 * Get the number of UP logints on phyint `pi'. 2640 */ 2641 static int 2642 logint_upcount(struct phyint *pi) 2643 { 2644 struct logint *li; 2645 int count = 0; 2646 2647 if (pi->pi_v4 != NULL) { 2648 for (li = pi->pi_v4->pii_logint; li != NULL; li = li->li_next) { 2649 if (li->li_flags & IFF_UP) 2650 count++; 2651 } 2652 } 2653 2654 if (pi->pi_v6 != NULL) { 2655 for (li = pi->pi_v6->pii_logint; li != NULL; li = li->li_next) { 2656 if (li->li_flags & IFF_UP) 2657 count++; 2658 } 2659 } 2660 2661 return (count); 2662 } 2663 2664 /* 2665 * Get the phyint instance with the other (IPv4 / IPv6) protocol 2666 */ 2667 struct phyint_instance * 2668 phyint_inst_other(struct phyint_instance *pii) 2669 { 2670 if (pii->pii_af == AF_INET) 2671 return (pii->pii_phyint->pi_v6); 2672 else 2673 return (pii->pii_phyint->pi_v4); 2674 } 2675 2676 /* 2677 * Check whether a phyint is functioning. 2678 */ 2679 boolean_t 2680 phyint_is_functioning(struct phyint *pi) 2681 { 2682 if (pi->pi_state == PI_RUNNING) 2683 return (_B_TRUE); 2684 return (pi->pi_state == PI_NOTARGETS && !(pi->pi_flags & IFF_FAILED)); 2685 } 2686 2687 /* 2688 * Check whether a phyint is usable. 2689 */ 2690 boolean_t 2691 phyint_is_usable(struct phyint *pi) 2692 { 2693 if (logint_upcount(pi) == 0) 2694 return (_B_FALSE); 2695 return (phyint_is_functioning(pi)); 2696 } 2697 2698 /* 2699 * Post an EC_IPMP sysevent of subclass `subclass' and attributes `nvl'. 2700 * Before sending the event, it prepends the current version of the IPMP 2701 * sysevent API. Returns 0 on success, -1 on failure (in either case, 2702 * `nvl' is freed). 2703 */ 2704 static int 2705 post_event(const char *subclass, nvlist_t *nvl) 2706 { 2707 static evchan_t *evchp = NULL; 2708 2709 /* 2710 * Initialize the event channel if we haven't already done so. 2711 */ 2712 if (evchp == NULL) { 2713 errno = sysevent_evc_bind(IPMP_EVENT_CHAN, &evchp, EVCH_CREAT); 2714 if (errno != 0) { 2715 logerr("cannot create event channel `%s': %s\n", 2716 IPMP_EVENT_CHAN, strerror(errno)); 2717 goto failed; 2718 } 2719 } 2720 2721 errno = nvlist_add_uint32(nvl, IPMP_EVENT_VERSION, 2722 IPMP_EVENT_CUR_VERSION); 2723 if (errno != 0) { 2724 logerr("cannot create `%s' event: %s", subclass, 2725 strerror(errno)); 2726 goto failed; 2727 } 2728 2729 errno = sysevent_evc_publish(evchp, EC_IPMP, subclass, "com.sun", 2730 "in.mpathd", nvl, EVCH_NOSLEEP); 2731 if (errno != 0) { 2732 logerr("cannot send `%s' event: %s\n", subclass, 2733 strerror(errno)); 2734 goto failed; 2735 } 2736 2737 nvlist_free(nvl); 2738 return (0); 2739 failed: 2740 nvlist_free(nvl); 2741 return (-1); 2742 } 2743 2744 /* 2745 * Return the external IPMP state associated with phyint `pi'. 2746 */ 2747 static ipmp_if_state_t 2748 ifstate(struct phyint *pi) 2749 { 2750 switch (pi->pi_state) { 2751 case PI_INIT: 2752 return (IPMP_IF_UNKNOWN); 2753 2754 case PI_NOTARGETS: 2755 if (pi->pi_flags & IFF_FAILED) 2756 return (IPMP_IF_FAILED); 2757 return (IPMP_IF_UNKNOWN); 2758 2759 case PI_OFFLINE: 2760 return (IPMP_IF_OFFLINE); 2761 2762 case PI_FAILED: 2763 return (IPMP_IF_FAILED); 2764 2765 case PI_RUNNING: 2766 return (IPMP_IF_OK); 2767 } 2768 2769 logerr("ifstate: unknown state %d; aborting\n", pi->pi_state); 2770 abort(); 2771 /* NOTREACHED */ 2772 } 2773 2774 /* 2775 * Return the external IPMP interface type associated with phyint `pi'. 2776 */ 2777 static ipmp_if_type_t 2778 iftype(struct phyint *pi) 2779 { 2780 if (pi->pi_flags & IFF_STANDBY) 2781 return (IPMP_IF_STANDBY); 2782 else 2783 return (IPMP_IF_NORMAL); 2784 } 2785 2786 /* 2787 * Return the external IPMP link state associated with phyint `pi'. 2788 */ 2789 static ipmp_if_linkstate_t 2790 iflinkstate(struct phyint *pi) 2791 { 2792 if (!(pi->pi_notes & (DL_NOTE_LINK_UP|DL_NOTE_LINK_DOWN))) 2793 return (IPMP_LINK_UNKNOWN); 2794 2795 return (LINK_DOWN(pi) ? IPMP_LINK_DOWN : IPMP_LINK_UP); 2796 } 2797 2798 /* 2799 * Return the external IPMP probe state associated with phyint `pi'. 2800 */ 2801 static ipmp_if_probestate_t 2802 ifprobestate(struct phyint *pi) 2803 { 2804 if (!PROBE_ENABLED(pi->pi_v4) && !PROBE_ENABLED(pi->pi_v6)) 2805 return (IPMP_PROBE_DISABLED); 2806 2807 if (pi->pi_state == PI_FAILED) 2808 return (IPMP_PROBE_FAILED); 2809 2810 if (!PROBE_CAPABLE(pi->pi_v4) && !PROBE_CAPABLE(pi->pi_v6)) 2811 return (IPMP_PROBE_UNKNOWN); 2812 2813 return (IPMP_PROBE_OK); 2814 } 2815 2816 /* 2817 * Return the external IPMP target mode associated with phyint instance `pii'. 2818 */ 2819 static ipmp_if_targmode_t 2820 iftargmode(struct phyint_instance *pii) 2821 { 2822 if (!PROBE_ENABLED(pii)) 2823 return (IPMP_TARG_DISABLED); 2824 else if (pii->pii_targets_are_routers) 2825 return (IPMP_TARG_ROUTES); 2826 else 2827 return (IPMP_TARG_MULTICAST); 2828 } 2829 2830 /* 2831 * Return the external IPMP flags associated with phyint `pi'. 2832 */ 2833 static ipmp_if_flags_t 2834 ifflags(struct phyint *pi) 2835 { 2836 ipmp_if_flags_t flags = 0; 2837 2838 if (logint_upcount(pi) == 0) 2839 flags |= IPMP_IFFLAG_DOWN; 2840 if (pi->pi_flags & IFF_INACTIVE) 2841 flags |= IPMP_IFFLAG_INACTIVE; 2842 if (pi->pi_hwaddrdup) 2843 flags |= IPMP_IFFLAG_HWADDRDUP; 2844 if (phyint_is_functioning(pi) && flags == 0) 2845 flags |= IPMP_IFFLAG_ACTIVE; 2846 2847 return (flags); 2848 } 2849 2850 /* 2851 * Store the test address used on phyint instance `pii' in `ssp'. If there's 2852 * no test address, 0.0.0.0 is stored. 2853 */ 2854 static struct sockaddr_storage * 2855 iftestaddr(struct phyint_instance *pii, struct sockaddr_storage *ssp) 2856 { 2857 if (PROBE_ENABLED(pii)) 2858 addr2storage(pii->pii_af, &pii->pii_probe_logint->li_addr, ssp); 2859 else 2860 addr2storage(AF_INET6, &in6addr_any, ssp); 2861 2862 return (ssp); 2863 } 2864 2865 /* 2866 * Return the external IPMP group state associated with phyint group `pg'. 2867 */ 2868 static ipmp_group_state_t 2869 groupstate(struct phyint_group *pg) 2870 { 2871 switch (pg->pg_state) { 2872 case PG_FAILED: 2873 return (IPMP_GROUP_FAILED); 2874 case PG_DEGRADED: 2875 return (IPMP_GROUP_DEGRADED); 2876 case PG_OK: 2877 return (IPMP_GROUP_OK); 2878 } 2879 2880 logerr("groupstate: unknown state %d; aborting\n", pg->pg_state); 2881 abort(); 2882 /* NOTREACHED */ 2883 } 2884 2885 /* 2886 * Return the external IPMP probe state associated with probe `ps'. 2887 */ 2888 static ipmp_probe_state_t 2889 probestate(struct probe_stats *ps) 2890 { 2891 switch (ps->pr_status) { 2892 case PR_UNUSED: 2893 case PR_LOST: 2894 return (IPMP_PROBE_LOST); 2895 case PR_UNACKED: 2896 return (IPMP_PROBE_SENT); 2897 case PR_ACKED: 2898 return (IPMP_PROBE_ACKED); 2899 } 2900 2901 logerr("probestate: unknown state %d; aborting\n", ps->pr_status); 2902 abort(); 2903 /* NOTREACHED */ 2904 } 2905 2906 /* 2907 * Generate an ESC_IPMP_PROBE_STATE sysevent for the probe described by `pr' 2908 * on phyint instance `pii'. Returns 0 on success, -1 on failure. 2909 */ 2910 int 2911 probe_state_event(struct probe_stats *pr, struct phyint_instance *pii) 2912 { 2913 nvlist_t *nvl; 2914 hrtime_t proc_time = 0, recv_time = 0; 2915 struct sockaddr_storage ss; 2916 struct target *tg = pr->pr_target; 2917 int64_t rttavg, rttdev; 2918 2919 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 2920 if (errno != 0) { 2921 logperror("cannot create `interface change' event"); 2922 return (-1); 2923 } 2924 2925 errno = nvlist_add_uint32(nvl, IPMP_PROBE_ID, pr->pr_id); 2926 if (errno != 0) 2927 goto failed; 2928 2929 errno = nvlist_add_string(nvl, IPMP_IF_NAME, pii->pii_phyint->pi_name); 2930 if (errno != 0) 2931 goto failed; 2932 2933 errno = nvlist_add_uint32(nvl, IPMP_PROBE_STATE, probestate(pr)); 2934 if (errno != 0) 2935 goto failed; 2936 2937 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_START_TIME, 2938 pr->pr_hrtime_start); 2939 if (errno != 0) 2940 goto failed; 2941 2942 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_SENT_TIME, 2943 pr->pr_hrtime_sent); 2944 if (errno != 0) 2945 goto failed; 2946 2947 if (pr->pr_status == PR_ACKED) { 2948 recv_time = pr->pr_hrtime_ackrecv; 2949 proc_time = pr->pr_hrtime_ackproc; 2950 } 2951 2952 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_ACKRECV_TIME, recv_time); 2953 if (errno != 0) 2954 goto failed; 2955 2956 errno = nvlist_add_hrtime(nvl, IPMP_PROBE_ACKPROC_TIME, proc_time); 2957 if (errno != 0) 2958 goto failed; 2959 2960 if (tg != NULL) 2961 addr2storage(pii->pii_af, &tg->tg_address, &ss); 2962 else 2963 addr2storage(pii->pii_af, &in6addr_any, &ss); 2964 2965 errno = nvlist_add_byte_array(nvl, IPMP_PROBE_TARGET, (uchar_t *)&ss, 2966 sizeof (ss)); 2967 if (errno != 0) 2968 goto failed; 2969 2970 rttavg = (tg != NULL) ? (tg->tg_rtt_sa / 8) : 0; 2971 errno = nvlist_add_int64(nvl, IPMP_PROBE_TARGET_RTTAVG, rttavg); 2972 if (errno != 0) 2973 goto failed; 2974 2975 rttdev = (tg != NULL) ? (tg->tg_rtt_sd / 4) : 0; 2976 errno = nvlist_add_int64(nvl, IPMP_PROBE_TARGET_RTTDEV, rttdev); 2977 if (errno != 0) 2978 goto failed; 2979 2980 return (post_event(ESC_IPMP_PROBE_STATE, nvl)); 2981 failed: 2982 logperror("cannot create `probe state' event"); 2983 nvlist_free(nvl); 2984 return (-1); 2985 } 2986 2987 /* 2988 * Generate an ESC_IPMP_GROUP_STATE sysevent for phyint group `pg'. 2989 * Returns 0 on success, -1 on failure. 2990 */ 2991 static int 2992 phyint_group_state_event(struct phyint_group *pg) 2993 { 2994 nvlist_t *nvl; 2995 2996 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 2997 if (errno != 0) { 2998 logperror("cannot create `group state change' event"); 2999 return (-1); 3000 } 3001 3002 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 3003 if (errno != 0) 3004 goto failed; 3005 3006 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 3007 if (errno != 0) 3008 goto failed; 3009 3010 errno = nvlist_add_uint32(nvl, IPMP_GROUP_STATE, groupstate(pg)); 3011 if (errno != 0) 3012 goto failed; 3013 3014 return (post_event(ESC_IPMP_GROUP_STATE, nvl)); 3015 failed: 3016 logperror("cannot create `group state change' event"); 3017 nvlist_free(nvl); 3018 return (-1); 3019 } 3020 3021 /* 3022 * Generate an ESC_IPMP_GROUP_CHANGE sysevent of type `op' for phyint group 3023 * `pg'. Returns 0 on success, -1 on failure. 3024 */ 3025 static int 3026 phyint_group_change_event(struct phyint_group *pg, ipmp_group_op_t op) 3027 { 3028 nvlist_t *nvl; 3029 3030 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 3031 if (errno != 0) { 3032 logperror("cannot create `group change' event"); 3033 return (-1); 3034 } 3035 3036 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 3037 if (errno != 0) 3038 goto failed; 3039 3040 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 3041 if (errno != 0) 3042 goto failed; 3043 3044 errno = nvlist_add_uint64(nvl, IPMP_GROUPLIST_SIGNATURE, 3045 phyint_grouplistsig); 3046 if (errno != 0) 3047 goto failed; 3048 3049 errno = nvlist_add_uint32(nvl, IPMP_GROUP_OPERATION, op); 3050 if (errno != 0) 3051 goto failed; 3052 3053 return (post_event(ESC_IPMP_GROUP_CHANGE, nvl)); 3054 failed: 3055 logperror("cannot create `group change' event"); 3056 nvlist_free(nvl); 3057 return (-1); 3058 } 3059 3060 /* 3061 * Generate an ESC_IPMP_GROUP_MEMBER_CHANGE sysevent for phyint `pi' in 3062 * group `pg'. Returns 0 on success, -1 on failure. 3063 */ 3064 static int 3065 phyint_group_member_event(struct phyint_group *pg, struct phyint *pi, 3066 ipmp_if_op_t op) 3067 { 3068 nvlist_t *nvl; 3069 3070 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 3071 if (errno != 0) { 3072 logperror("cannot create `group member change' event"); 3073 return (-1); 3074 } 3075 3076 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 3077 if (errno != 0) 3078 goto failed; 3079 3080 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 3081 if (errno != 0) 3082 goto failed; 3083 3084 errno = nvlist_add_uint32(nvl, IPMP_IF_OPERATION, op); 3085 if (errno != 0) 3086 goto failed; 3087 3088 errno = nvlist_add_string(nvl, IPMP_IF_NAME, pi->pi_name); 3089 if (errno != 0) 3090 goto failed; 3091 3092 errno = nvlist_add_uint32(nvl, IPMP_IF_TYPE, iftype(pi)); 3093 if (errno != 0) 3094 goto failed; 3095 3096 errno = nvlist_add_uint32(nvl, IPMP_IF_STATE, ifstate(pi)); 3097 if (errno != 0) 3098 goto failed; 3099 3100 return (post_event(ESC_IPMP_GROUP_MEMBER_CHANGE, nvl)); 3101 failed: 3102 logperror("cannot create `group member change' event"); 3103 nvlist_free(nvl); 3104 return (-1); 3105 3106 } 3107 3108 /* 3109 * Generate an ESC_IPMP_IF_CHANGE sysevent for phyint `pi' in group `pg'. 3110 * Returns 0 on success, -1 on failure. 3111 */ 3112 static int 3113 phyint_state_event(struct phyint_group *pg, struct phyint *pi) 3114 { 3115 nvlist_t *nvl; 3116 3117 errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); 3118 if (errno != 0) { 3119 logperror("cannot create `interface change' event"); 3120 return (-1); 3121 } 3122 3123 errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name); 3124 if (errno != 0) 3125 goto failed; 3126 3127 errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig); 3128 if (errno != 0) 3129 goto failed; 3130 3131 errno = nvlist_add_string(nvl, IPMP_IF_NAME, pi->pi_name); 3132 if (errno != 0) 3133 goto failed; 3134 3135 errno = nvlist_add_uint32(nvl, IPMP_IF_TYPE, iftype(pi)); 3136 if (errno != 0) 3137 goto failed; 3138 3139 errno = nvlist_add_uint32(nvl, IPMP_IF_STATE, ifstate(pi)); 3140 if (errno != 0) 3141 goto failed; 3142 3143 return (post_event(ESC_IPMP_IF_CHANGE, nvl)); 3144 failed: 3145 logperror("cannot create `interface change' event"); 3146 nvlist_free(nvl); 3147 return (-1); 3148 3149 } 3150 3151 /* 3152 * Generate a signature for use. The signature is conceptually divided 3153 * into two pieces: a random 16-bit "generation number" and a 48-bit 3154 * monotonically increasing integer. The generation number protects 3155 * against stale updates to entities (e.g., IPMP groups) that have been 3156 * deleted and since recreated. 3157 */ 3158 static uint64_t 3159 gensig(void) 3160 { 3161 static int seeded = 0; 3162 3163 if (seeded == 0) { 3164 srand48((long)gethrtime()); 3165 seeded++; 3166 } 3167 3168 return ((uint64_t)lrand48() << 48 | 1); 3169 } 3170 3171 /* 3172 * Store the information associated with group `grname' into a dynamically 3173 * allocated structure pointed to by `*grinfopp'. Returns an IPMP error code. 3174 */ 3175 unsigned int 3176 getgroupinfo(const char *grname, ipmp_groupinfo_t **grinfopp) 3177 { 3178 struct phyint *pi; 3179 struct phyint_group *pg; 3180 char (*ifs)[LIFNAMSIZ]; 3181 unsigned int i, j; 3182 unsigned int nif = 0, naddr = 0; 3183 lifgroupinfo_t lifgr; 3184 addrlist_t *addrp; 3185 struct sockaddr_storage *addrs; 3186 int fdt = 0; 3187 3188 pg = phyint_group_lookup(grname); 3189 if (pg == NULL) 3190 return (IPMP_EUNKGROUP); 3191 3192 /* 3193 * Tally up the number of interfaces, allocate an array to hold them, 3194 * and insert their names into the array. While we're at it, if any 3195 * interface is actually enabled to send probes, save the group fdt. 3196 */ 3197 for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) 3198 nif++; 3199 3200 ifs = alloca(nif * sizeof (*ifs)); 3201 for (i = 0, pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext, i++) { 3202 assert(i < nif); 3203 (void) strlcpy(ifs[i], pi->pi_name, LIFNAMSIZ); 3204 if (PROBE_ENABLED(pi->pi_v4) || PROBE_ENABLED(pi->pi_v6)) 3205 fdt = pg->pg_fdt; 3206 } 3207 assert(i == nif); 3208 3209 /* 3210 * If this is the anonymous group, there's no other information to 3211 * collect (since there's no IPMP interface). 3212 */ 3213 if (pg == phyint_anongroup) { 3214 *grinfopp = ipmp_groupinfo_create(pg->pg_name, pg->pg_sig, fdt, 3215 groupstate(pg), nif, ifs, "", "", "", "", 0, NULL); 3216 return (*grinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3217 } 3218 3219 /* 3220 * Grab some additional information about the group from the kernel. 3221 * (NOTE: since SIOCGLIFGROUPINFO does not look up by interface name, 3222 * we can use ifsock_v4 even for a V6-only group.) 3223 */ 3224 (void) strlcpy(lifgr.gi_grname, grname, LIFGRNAMSIZ); 3225 if (ioctl(ifsock_v4, SIOCGLIFGROUPINFO, &lifgr) == -1) { 3226 if (errno == ENOENT) 3227 return (IPMP_EUNKGROUP); 3228 3229 logperror("getgroupinfo: SIOCGLIFGROUPINFO"); 3230 return (IPMP_FAILURE); 3231 } 3232 3233 /* 3234 * Tally up the number of data addresses, allocate an array to hold 3235 * them, and insert their values into the array. 3236 */ 3237 for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next) 3238 naddr++; 3239 3240 addrs = alloca(naddr * sizeof (*addrs)); 3241 i = 0; 3242 for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next) { 3243 /* 3244 * It's possible to have duplicate addresses (if some are 3245 * down). Weed the dups out to avoid confusing consumers. 3246 * (If groups start having tons of addresses, we'll need a 3247 * better algorithm here.) 3248 */ 3249 for (j = 0; j < i; j++) { 3250 if (sockaddrcmp(&addrs[j], &addrp->al_addr)) 3251 break; 3252 } 3253 if (j == i) { 3254 assert(i < naddr); 3255 addrs[i++] = addrp->al_addr; 3256 } 3257 } 3258 naddr = i; 3259 3260 *grinfopp = ipmp_groupinfo_create(pg->pg_name, pg->pg_sig, fdt, 3261 groupstate(pg), nif, ifs, lifgr.gi_grifname, lifgr.gi_m4ifname, 3262 lifgr.gi_m6ifname, lifgr.gi_bcifname, naddr, addrs); 3263 return (*grinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3264 } 3265 3266 /* 3267 * Store the target information associated with phyint instance `pii' into a 3268 * dynamically allocated structure pointed to by `*targinfopp'. Returns an 3269 * IPMP error code. 3270 */ 3271 unsigned int 3272 gettarginfo(struct phyint_instance *pii, const char *name, 3273 ipmp_targinfo_t **targinfopp) 3274 { 3275 uint_t ntarg = 0; 3276 struct target *tg; 3277 struct sockaddr_storage ss; 3278 struct sockaddr_storage *targs = NULL; 3279 3280 if (PROBE_CAPABLE(pii)) { 3281 targs = alloca(pii->pii_ntargets * sizeof (*targs)); 3282 tg = pii->pii_target_next; 3283 do { 3284 if (tg->tg_status == TG_ACTIVE) { 3285 assert(ntarg < pii->pii_ntargets); 3286 addr2storage(pii->pii_af, &tg->tg_address, 3287 &targs[ntarg++]); 3288 } 3289 if ((tg = tg->tg_next) == NULL) 3290 tg = pii->pii_targets; 3291 } while (tg != pii->pii_target_next); 3292 3293 assert(ntarg == pii->pii_ntargets); 3294 } 3295 3296 *targinfopp = ipmp_targinfo_create(name, iftestaddr(pii, &ss), 3297 iftargmode(pii), ntarg, targs); 3298 return (*targinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3299 } 3300 3301 /* 3302 * Store the information associated with interface `ifname' into a dynamically 3303 * allocated structure pointed to by `*ifinfopp'. Returns an IPMP error code. 3304 */ 3305 unsigned int 3306 getifinfo(const char *ifname, ipmp_ifinfo_t **ifinfopp) 3307 { 3308 int retval; 3309 struct phyint *pi; 3310 ipmp_targinfo_t *targinfo4; 3311 ipmp_targinfo_t *targinfo6; 3312 3313 pi = phyint_lookup(ifname); 3314 if (pi == NULL) 3315 return (IPMP_EUNKIF); 3316 3317 if ((retval = gettarginfo(pi->pi_v4, pi->pi_name, &targinfo4)) != 0 || 3318 (retval = gettarginfo(pi->pi_v6, pi->pi_name, &targinfo6)) != 0) 3319 goto out; 3320 3321 *ifinfopp = ipmp_ifinfo_create(pi->pi_name, pi->pi_group->pg_name, 3322 ifstate(pi), iftype(pi), iflinkstate(pi), ifprobestate(pi), 3323 ifflags(pi), targinfo4, targinfo6); 3324 retval = (*ifinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3325 out: 3326 if (targinfo4 != NULL) 3327 ipmp_freetarginfo(targinfo4); 3328 if (targinfo6 != NULL) 3329 ipmp_freetarginfo(targinfo6); 3330 return (retval); 3331 } 3332 3333 /* 3334 * Store the current list of IPMP groups into a dynamically allocated 3335 * structure pointed to by `*grlistpp'. Returns an IPMP error code. 3336 */ 3337 unsigned int 3338 getgrouplist(ipmp_grouplist_t **grlistpp) 3339 { 3340 struct phyint_group *pg; 3341 char (*groups)[LIFGRNAMSIZ]; 3342 unsigned int i, ngroup; 3343 3344 /* 3345 * Tally up the number of groups, allocate an array to hold them, and 3346 * insert their names into the array. 3347 */ 3348 for (ngroup = 0, pg = phyint_groups; pg != NULL; pg = pg->pg_next) 3349 ngroup++; 3350 3351 groups = alloca(ngroup * sizeof (*groups)); 3352 for (i = 0, pg = phyint_groups; pg != NULL; pg = pg->pg_next, i++) { 3353 assert(i < ngroup); 3354 (void) strlcpy(groups[i], pg->pg_name, LIFGRNAMSIZ); 3355 } 3356 assert(i == ngroup); 3357 3358 *grlistpp = ipmp_grouplist_create(phyint_grouplistsig, ngroup, groups); 3359 return (*grlistpp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3360 } 3361 3362 /* 3363 * Store the address information for `ssp' (in group `grname') into a 3364 * dynamically allocated structure pointed to by `*adinfopp'. Returns an IPMP 3365 * error code. (We'd call this function getaddrinfo(), but it would conflict 3366 * with getaddrinfo(3SOCKET)). 3367 */ 3368 unsigned int 3369 getgraddrinfo(const char *grname, struct sockaddr_storage *ssp, 3370 ipmp_addrinfo_t **adinfopp) 3371 { 3372 int ifsock; 3373 addrlist_t *addrp, *addrmatchp = NULL; 3374 ipmp_addr_state_t state; 3375 const char *binding = ""; 3376 struct lifreq lifr; 3377 struct phyint_group *pg; 3378 3379 if ((pg = phyint_group_lookup(grname)) == NULL) 3380 return (IPMP_EUNKADDR); 3381 3382 /* 3383 * Walk through the data addresses, and find a match. Note that since 3384 * some of the addresses may be down, more than one may match. We 3385 * prefer an up address (if one exists). 3386 */ 3387 for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next) { 3388 if (sockaddrcmp(ssp, &addrp->al_addr)) { 3389 addrmatchp = addrp; 3390 if (addrmatchp->al_flags & IFF_UP) 3391 break; 3392 } 3393 } 3394 3395 if (addrmatchp == NULL) 3396 return (IPMP_EUNKADDR); 3397 3398 state = (addrmatchp->al_flags & IFF_UP) ? IPMP_ADDR_UP : IPMP_ADDR_DOWN; 3399 if (state == IPMP_ADDR_UP) { 3400 ifsock = (ssp->ss_family == AF_INET) ? ifsock_v4 : ifsock_v6; 3401 (void) strlcpy(lifr.lifr_name, addrmatchp->al_name, LIFNAMSIZ); 3402 if (ioctl(ifsock, SIOCGLIFBINDING, &lifr) >= 0) 3403 binding = lifr.lifr_binding; 3404 } 3405 3406 *adinfopp = ipmp_addrinfo_create(ssp, state, pg->pg_name, binding); 3407 return (*adinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS); 3408 } 3409 3410 /* 3411 * Store a snapshot of the IPMP subsystem into a dynamically allocated 3412 * structure pointed to by `*snapp'. Returns an IPMP error code. 3413 */ 3414 unsigned int 3415 getsnap(ipmp_snap_t **snapp) 3416 { 3417 ipmp_grouplist_t *grlistp; 3418 ipmp_groupinfo_t *grinfop; 3419 ipmp_addrinfo_t *adinfop; 3420 ipmp_addrlist_t *adlistp; 3421 ipmp_ifinfo_t *ifinfop; 3422 ipmp_snap_t *snap; 3423 struct phyint *pi; 3424 unsigned int i, j; 3425 int retval; 3426 3427 snap = ipmp_snap_create(); 3428 if (snap == NULL) 3429 return (IPMP_ENOMEM); 3430 3431 /* 3432 * Add group list. 3433 */ 3434 retval = getgrouplist(&snap->sn_grlistp); 3435 if (retval != IPMP_SUCCESS) 3436 goto failed; 3437 3438 /* 3439 * Add information for each group in the list, along with all of its 3440 * data addresses. 3441 */ 3442 grlistp = snap->sn_grlistp; 3443 for (i = 0; i < grlistp->gl_ngroup; i++) { 3444 retval = getgroupinfo(grlistp->gl_groups[i], &grinfop); 3445 if (retval != IPMP_SUCCESS) 3446 goto failed; 3447 3448 retval = ipmp_snap_addgroupinfo(snap, grinfop); 3449 if (retval != IPMP_SUCCESS) { 3450 ipmp_freegroupinfo(grinfop); 3451 goto failed; 3452 } 3453 3454 adlistp = grinfop->gr_adlistp; 3455 for (j = 0; j < adlistp->al_naddr; j++) { 3456 retval = getgraddrinfo(grinfop->gr_name, 3457 &adlistp->al_addrs[j], &adinfop); 3458 if (retval != IPMP_SUCCESS) 3459 goto failed; 3460 3461 retval = ipmp_snap_addaddrinfo(snap, adinfop); 3462 if (retval != IPMP_SUCCESS) { 3463 ipmp_freeaddrinfo(adinfop); 3464 goto failed; 3465 } 3466 } 3467 } 3468 3469 /* 3470 * Add information for each configured phyint. 3471 */ 3472 for (pi = phyints; pi != NULL; pi = pi->pi_next) { 3473 retval = getifinfo(pi->pi_name, &ifinfop); 3474 if (retval != IPMP_SUCCESS) 3475 goto failed; 3476 3477 retval = ipmp_snap_addifinfo(snap, ifinfop); 3478 if (retval != IPMP_SUCCESS) { 3479 ipmp_freeifinfo(ifinfop); 3480 goto failed; 3481 } 3482 } 3483 3484 *snapp = snap; 3485 return (IPMP_SUCCESS); 3486 failed: 3487 ipmp_snap_free(snap); 3488 return (retval); 3489 } 3490