1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright 2012 Milan Jurik. All rights reserved. 26 */ 27 28 #include <sys/types.h> 29 #include <sys/socket.h> 30 #include <sys/list.h> 31 #include <sys/stropts.h> 32 #include <sys/siginfo.h> 33 #include <sys/wait.h> 34 #include <arpa/inet.h> 35 #include <netinet/in.h> 36 #include <stdlib.h> 37 #include <stdio.h> 38 #include <strings.h> 39 #include <stddef.h> 40 #include <unistd.h> 41 #include <libilb.h> 42 #include <port.h> 43 #include <time.h> 44 #include <signal.h> 45 #include <assert.h> 46 #include <errno.h> 47 #include <spawn.h> 48 #include <fcntl.h> 49 #include <limits.h> 50 #include "libilb_impl.h" 51 #include "ilbd.h" 52 53 /* Global list of HC objects */ 54 list_t ilbd_hc_list; 55 56 /* Timer queue for all hc related timers. */ 57 static iu_tq_t *ilbd_hc_timer_q; 58 59 /* Indicate whether the timer needs to be updated */ 60 static boolean_t hc_timer_restarted; 61 62 static void ilbd_hc_probe_timer(iu_tq_t *, void *); 63 static ilb_status_t ilbd_hc_restart_timer(ilbd_hc_t *, ilbd_hc_srv_t *); 64 static boolean_t ilbd_run_probe(ilbd_hc_srv_t *); 65 66 #define MAX(a, b) ((a) > (b) ? (a) : (b)) 67 68 /* 69 * Number of arguments passed to a probe. argc[0] is the path name of 70 * the probe. 71 */ 72 #define HC_PROBE_ARGC 8 73 74 /* 75 * Max number of characters to be read from the output of a probe. It 76 * is long enough to read in a 64 bit integer. 77 */ 78 #define HC_MAX_PROBE_OUTPUT 24 79 80 void 81 i_ilbd_setup_hc_list(void) 82 { 83 list_create(&ilbd_hc_list, sizeof (ilbd_hc_t), 84 offsetof(ilbd_hc_t, ihc_link)); 85 } 86 87 /* 88 * Given a hc object name, return a pointer to hc object if found. 89 */ 90 ilbd_hc_t * 91 ilbd_get_hc(const char *name) 92 { 93 ilbd_hc_t *hc; 94 95 for (hc = list_head(&ilbd_hc_list); hc != NULL; 96 hc = list_next(&ilbd_hc_list, hc)) { 97 if (strcasecmp(hc->ihc_name, name) == 0) 98 return (hc); 99 } 100 return (NULL); 101 } 102 103 /* 104 * Generates an audit record for create-healthcheck, 105 * delete-healtcheck subcommands. 106 */ 107 static void 108 ilbd_audit_hc_event(const char *audit_hcname, 109 const ilb_hc_info_t *audit_hcinfo, ilbd_cmd_t cmd, 110 ilb_status_t rc, ucred_t *ucredp) 111 { 112 adt_session_data_t *ah; 113 adt_event_data_t *event; 114 au_event_t flag; 115 int audit_error; 116 117 if ((ucredp == NULL) && (cmd == ILBD_CREATE_HC)) { 118 /* 119 * we came here from the path where ilbd incorporates 120 * the configuration that is listed in SCF: 121 * i_ilbd_read_config->ilbd_walk_hc_pgs-> 122 * ->ilbd_scf_instance_walk_pg->ilbd_create_hc 123 * We skip auditing in that case 124 */ 125 logdebug("ilbd_audit_hc_event: skipping auditing"); 126 return; 127 } 128 129 if (adt_start_session(&ah, NULL, 0) != 0) { 130 logerr("ilbd_audit_hc_event: adt_start_session failed"); 131 exit(EXIT_FAILURE); 132 } 133 if (adt_set_from_ucred(ah, ucredp, ADT_NEW) != 0) { 134 (void) adt_end_session(ah); 135 logerr("ilbd_audit_rule_event: adt_set_from_ucred failed"); 136 exit(EXIT_FAILURE); 137 } 138 if (cmd == ILBD_CREATE_HC) 139 flag = ADT_ilb_create_healthcheck; 140 else if (cmd == ILBD_DESTROY_HC) 141 flag = ADT_ilb_delete_healthcheck; 142 143 if ((event = adt_alloc_event(ah, flag)) == NULL) { 144 logerr("ilbd_audit_hc_event: adt_alloc_event failed"); 145 exit(EXIT_FAILURE); 146 } 147 (void) memset((char *)event, 0, sizeof (adt_event_data_t)); 148 149 switch (cmd) { 150 case ILBD_CREATE_HC: 151 event->adt_ilb_create_healthcheck.auth_used = 152 NET_ILB_CONFIG_AUTH; 153 event->adt_ilb_create_healthcheck.hc_test = 154 (char *)audit_hcinfo->hci_test; 155 event->adt_ilb_create_healthcheck.hc_name = 156 (char *)audit_hcinfo->hci_name; 157 158 /* 159 * If the value 0 is stored, the default values are 160 * set in the kernel. User land does not know about them 161 * So if the user does not specify them, audit record 162 * will show them as 0 163 */ 164 event->adt_ilb_create_healthcheck.hc_timeout = 165 audit_hcinfo->hci_timeout; 166 event->adt_ilb_create_healthcheck.hc_count = 167 audit_hcinfo->hci_count; 168 event->adt_ilb_create_healthcheck.hc_interval = 169 audit_hcinfo->hci_interval; 170 break; 171 case ILBD_DESTROY_HC: 172 event->adt_ilb_delete_healthcheck.auth_used = 173 NET_ILB_CONFIG_AUTH; 174 event->adt_ilb_delete_healthcheck.hc_name = 175 (char *)audit_hcname; 176 break; 177 } 178 179 /* Fill in success/failure */ 180 if (rc == ILB_STATUS_OK) { 181 if (adt_put_event(event, ADT_SUCCESS, ADT_SUCCESS) != 0) { 182 logerr("ilbd_audit_hc_event: adt_put_event failed"); 183 exit(EXIT_FAILURE); 184 } 185 } else { 186 audit_error = ilberror2auditerror(rc); 187 if (adt_put_event(event, ADT_FAILURE, audit_error) != 0) { 188 logerr("ilbd_audit_hc_event: adt_put_event failed"); 189 exit(EXIT_FAILURE); 190 } 191 } 192 adt_free_event(event); 193 (void) adt_end_session(ah); 194 } 195 196 /* 197 * Given the ilb_hc_info_t passed in (from the libilb), create a hc object 198 * in ilbd. The parameter ev_port is not used, refer to comments of 199 * ilbd_create_sg() in ilbd_sg.c 200 */ 201 /* ARGSUSED */ 202 ilb_status_t 203 ilbd_create_hc(const ilb_hc_info_t *hc_info, int ev_port, 204 const struct passwd *ps, ucred_t *ucredp) 205 { 206 ilbd_hc_t *hc; 207 ilb_status_t ret = ILB_STATUS_OK; 208 209 /* 210 * ps == NULL is from the daemon when it starts and load configuration 211 * ps != NULL is from client. 212 */ 213 if (ps != NULL) { 214 ret = ilbd_check_client_config_auth(ps); 215 if (ret != ILB_STATUS_OK) { 216 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, 217 ret, ucredp); 218 return (ret); 219 } 220 } 221 222 if (hc_info->hci_name[0] == '\0') { 223 logdebug("ilbd_create_hc: missing healthcheck info"); 224 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, 225 ILB_STATUS_ENOHCINFO, ucredp); 226 return (ILB_STATUS_ENOHCINFO); 227 } 228 229 hc = ilbd_get_hc(hc_info->hci_name); 230 if (hc != NULL) { 231 logdebug("ilbd_create_hc: healthcheck name %s already" 232 " exists", hc_info->hci_name); 233 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, 234 ILB_STATUS_EEXIST, ucredp); 235 return (ILB_STATUS_EEXIST); 236 } 237 238 /* 239 * Sanity check on user supplied probe. The given path name 240 * must be a full path name (starts with '/') and is 241 * executable. 242 */ 243 if (strcasecmp(hc_info->hci_test, ILB_HC_STR_TCP) != 0 && 244 strcasecmp(hc_info->hci_test, ILB_HC_STR_UDP) != 0 && 245 strcasecmp(hc_info->hci_test, ILB_HC_STR_PING) != 0 && 246 (hc_info->hci_test[0] != '/' || 247 access(hc_info->hci_test, X_OK) == -1)) { 248 if (errno == ENOENT) { 249 logdebug("ilbd_create_hc: user script %s doesn't " 250 "exist", hc_info->hci_test); 251 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, 252 ILB_STATUS_ENOENT, ucredp); 253 return (ILB_STATUS_ENOENT); 254 } else { 255 logdebug("ilbd_create_hc: user script %s is " 256 "invalid", hc_info->hci_test); 257 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, 258 ILB_STATUS_EINVAL, ucredp); 259 return (ILB_STATUS_EINVAL); 260 } 261 } 262 263 /* Create and add the hc object */ 264 hc = calloc(1, sizeof (ilbd_hc_t)); 265 if (hc == NULL) { 266 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, 267 ILB_STATUS_ENOMEM, ucredp); 268 return (ILB_STATUS_ENOMEM); 269 } 270 (void) memcpy(&hc->ihc_info, hc_info, sizeof (ilb_hc_info_t)); 271 if (strcasecmp(hc->ihc_test, ILB_HC_STR_TCP) == 0) 272 hc->ihc_test_type = ILBD_HC_TCP; 273 else if (strcasecmp(hc->ihc_test, ILB_HC_STR_UDP) == 0) 274 hc->ihc_test_type = ILBD_HC_UDP; 275 else if (strcasecmp(hc->ihc_test, ILB_HC_STR_PING) == 0) 276 hc->ihc_test_type = ILBD_HC_PING; 277 else 278 hc->ihc_test_type = ILBD_HC_USER; 279 list_create(&hc->ihc_rules, sizeof (ilbd_hc_rule_t), 280 offsetof(ilbd_hc_rule_t, hcr_link)); 281 282 /* Update SCF */ 283 if (ps != NULL) { 284 if ((ret = ilbd_create_pg(ILBD_SCF_HC, (void *)hc)) != 285 ILB_STATUS_OK) { 286 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, 287 ret, ucredp); 288 list_destroy(&hc->ihc_rules); 289 free(hc); 290 return (ret); 291 } 292 } 293 294 /* Everything is fine, now add it to the global list. */ 295 list_insert_tail(&ilbd_hc_list, hc); 296 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, ret, ucredp); 297 return (ret); 298 } 299 300 /* 301 * Given a name of a hc object, destroy it. 302 */ 303 ilb_status_t 304 ilbd_destroy_hc(const char *hc_name, const struct passwd *ps, 305 ucred_t *ucredp) 306 { 307 ilb_status_t ret; 308 ilbd_hc_t *hc; 309 310 /* 311 * No need to check ps == NULL, daemon won't call any destroy func 312 * at start up. 313 */ 314 ret = ilbd_check_client_config_auth(ps); 315 if (ret != ILB_STATUS_OK) { 316 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, 317 ret, ucredp); 318 return (ret); 319 } 320 321 hc = ilbd_get_hc(hc_name); 322 if (hc == NULL) { 323 logdebug("ilbd_destroy_hc: healthcheck %s does not exist", 324 hc_name); 325 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, 326 ILB_STATUS_ENOENT, ucredp); 327 return (ILB_STATUS_ENOENT); 328 } 329 330 /* If hc is in use, cannot delete it */ 331 if (hc->ihc_rule_cnt > 0) { 332 logdebug("ilbd_destroy_hc: healthcheck %s is associated" 333 " with a rule - cannot remove", hc_name); 334 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, 335 ILB_STATUS_INUSE, ucredp); 336 return (ILB_STATUS_INUSE); 337 } 338 339 if ((ret = ilbd_destroy_pg(ILBD_SCF_HC, hc_name)) != 340 ILB_STATUS_OK) { 341 logdebug("ilbd_destroy_hc: cannot destroy healthcheck %s " 342 "property group", hc_name); 343 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, 344 ret, ucredp); 345 return (ret); 346 } 347 348 list_remove(&ilbd_hc_list, hc); 349 list_destroy(&hc->ihc_rules); 350 free(hc); 351 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, ret, ucredp); 352 return (ret); 353 } 354 355 /* 356 * Given a hc object name, return its information. Used by libilb to 357 * get hc info. 358 */ 359 ilb_status_t 360 ilbd_get_hc_info(const char *hc_name, uint32_t *rbuf, size_t *rbufsz) 361 { 362 ilbd_hc_t *hc; 363 ilb_hc_info_t *hc_info; 364 ilb_comm_t *ic = (ilb_comm_t *)rbuf; 365 366 hc = ilbd_get_hc(hc_name); 367 if (hc == NULL) { 368 logdebug("%s: healthcheck %s does not exist", __func__, 369 hc_name); 370 return (ILB_STATUS_ENOENT); 371 } 372 ilbd_reply_ok(rbuf, rbufsz); 373 hc_info = (ilb_hc_info_t *)&ic->ic_data; 374 375 (void) strlcpy(hc_info->hci_name, hc->ihc_name, sizeof (hc->ihc_name)); 376 (void) strlcpy(hc_info->hci_test, hc->ihc_test, sizeof (hc->ihc_test)); 377 hc_info->hci_timeout = hc->ihc_timeout; 378 hc_info->hci_count = hc->ihc_count; 379 hc_info->hci_interval = hc->ihc_interval; 380 hc_info->hci_def_ping = hc->ihc_def_ping; 381 382 *rbufsz += sizeof (ilb_hc_info_t); 383 384 return (ILB_STATUS_OK); 385 } 386 387 static void 388 ilbd_hc_copy_srvs(uint32_t *rbuf, size_t *rbufsz, ilbd_hc_rule_t *hc_rule, 389 const char *rulename) 390 { 391 ilbd_hc_srv_t *tmp_srv; 392 ilb_hc_srv_t *dst_srv; 393 ilb_hc_rule_srv_t *srvs; 394 size_t tmp_rbufsz; 395 int i; 396 397 tmp_rbufsz = *rbufsz; 398 /* Set up the reply buffer. rbufsz will be set to the new size. */ 399 ilbd_reply_ok(rbuf, rbufsz); 400 401 /* Calculate how much space is left for holding server info. */ 402 *rbufsz += sizeof (ilb_hc_rule_srv_t); 403 tmp_rbufsz -= *rbufsz; 404 405 srvs = (ilb_hc_rule_srv_t *)&((ilb_comm_t *)rbuf)->ic_data; 406 407 tmp_srv = list_head(&hc_rule->hcr_servers); 408 for (i = 0; tmp_srv != NULL && tmp_rbufsz >= sizeof (*dst_srv); i++) { 409 dst_srv = &srvs->rs_srvs[i]; 410 411 (void) strlcpy(dst_srv->hcs_rule_name, rulename, ILB_NAMESZ); 412 (void) strlcpy(dst_srv->hcs_ID, tmp_srv->shc_sg_srv->sgs_srvID, 413 ILB_NAMESZ); 414 (void) strlcpy(dst_srv->hcs_hc_name, 415 tmp_srv->shc_hc->ihc_name, ILB_NAMESZ); 416 dst_srv->hcs_IP = tmp_srv->shc_sg_srv->sgs_addr; 417 dst_srv->hcs_fail_cnt = tmp_srv->shc_fail_cnt; 418 dst_srv->hcs_status = tmp_srv->shc_status; 419 dst_srv->hcs_rtt = tmp_srv->shc_rtt; 420 dst_srv->hcs_lasttime = tmp_srv->shc_lasttime; 421 dst_srv->hcs_nexttime = tmp_srv->shc_nexttime; 422 423 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv); 424 tmp_rbufsz -= sizeof (*dst_srv); 425 } 426 srvs->rs_num_srvs = i; 427 *rbufsz += i * sizeof (*dst_srv); 428 } 429 430 /* 431 * Given a rule name, return the hc status of its servers. 432 */ 433 ilb_status_t 434 ilbd_get_hc_srvs(const char *rulename, uint32_t *rbuf, size_t *rbufsz) 435 { 436 ilbd_hc_t *hc; 437 ilbd_hc_rule_t *hc_rule; 438 439 for (hc = list_head(&ilbd_hc_list); hc != NULL; 440 hc = list_next(&ilbd_hc_list, hc)) { 441 for (hc_rule = list_head(&hc->ihc_rules); hc_rule != NULL; 442 hc_rule = list_next(&hc->ihc_rules, hc_rule)) { 443 if (strcasecmp(hc_rule->hcr_rule->irl_name, 444 rulename) != 0) { 445 continue; 446 } 447 ilbd_hc_copy_srvs(rbuf, rbufsz, hc_rule, rulename); 448 return (ILB_STATUS_OK); 449 } 450 } 451 return (ILB_STATUS_RULE_NO_HC); 452 } 453 454 /* 455 * Initialize the hc timer and associate the notification of timeout to 456 * the given event port. 457 */ 458 void 459 ilbd_hc_timer_init(int ev_port, ilbd_timer_event_obj_t *ev_obj) 460 { 461 struct sigevent sigev; 462 port_notify_t notify; 463 464 if ((ilbd_hc_timer_q = iu_tq_create()) == NULL) { 465 logerr("%s: cannot create hc timer queue", __func__); 466 exit(EXIT_FAILURE); 467 } 468 hc_timer_restarted = B_FALSE; 469 470 ev_obj->ev = ILBD_EVENT_TIMER; 471 ev_obj->timerid = -1; 472 473 notify.portnfy_port = ev_port; 474 notify.portnfy_user = ev_obj; 475 sigev.sigev_notify = SIGEV_PORT; 476 sigev.sigev_value.sival_ptr = ¬ify; 477 if (timer_create(CLOCK_REALTIME, &sigev, &ev_obj->timerid) == -1) { 478 logerr("%s: cannot create timer", __func__); 479 exit(EXIT_FAILURE); 480 } 481 } 482 483 /* 484 * HC timeout handler. 485 */ 486 void 487 ilbd_hc_timeout(void) 488 { 489 (void) iu_expire_timers(ilbd_hc_timer_q); 490 hc_timer_restarted = B_TRUE; 491 } 492 493 /* 494 * Set up the timer to fire at the earliest timeout. 495 */ 496 void 497 ilbd_hc_timer_update(ilbd_timer_event_obj_t *ev_obj) 498 { 499 itimerspec_t itimeout; 500 int timeout; 501 502 /* 503 * There is no change on the timer list, so no need to set up the 504 * timer again. 505 */ 506 if (!hc_timer_restarted) 507 return; 508 509 restart: 510 if ((timeout = iu_earliest_timer(ilbd_hc_timer_q)) == INFTIM) { 511 hc_timer_restarted = B_FALSE; 512 return; 513 } else if (timeout == 0) { 514 /* 515 * Handle the timeout immediately. After that (clearing all 516 * the expired timers), check to see if there are still 517 * timers running. If yes, start them. 518 */ 519 (void) iu_expire_timers(ilbd_hc_timer_q); 520 goto restart; 521 } 522 523 itimeout.it_value.tv_sec = timeout / MILLISEC + 1; 524 itimeout.it_value.tv_nsec = 0; 525 itimeout.it_interval.tv_sec = 0; 526 itimeout.it_interval.tv_nsec = 0; 527 528 /* 529 * Failure to set a timeout is "OK" since hopefully there will be 530 * other events and timer_settime() will be called again. So 531 * we will only miss some timeouts. But in the worst case, no event 532 * will happen and ilbd will get stuck... 533 */ 534 if (timer_settime(ev_obj->timerid, 0, &itimeout, NULL) == -1) 535 logerr("%s: cannot set timer", __func__); 536 hc_timer_restarted = B_FALSE; 537 } 538 539 /* 540 * Kill the probe process of a server. 541 */ 542 static void 543 ilbd_hc_kill_probe(ilbd_hc_srv_t *srv) 544 { 545 /* 546 * First dissociate the fd from the event port. It should not 547 * fail. 548 */ 549 if (port_dissociate(srv->shc_ev_port, PORT_SOURCE_FD, 550 srv->shc_child_fd) != 0) { 551 logdebug("%s: port_dissociate: %s", __func__, strerror(errno)); 552 } 553 (void) close(srv->shc_child_fd); 554 free(srv->shc_ev); 555 srv->shc_ev = NULL; 556 557 /* Then kill the probe process. */ 558 if (kill(srv->shc_child_pid, SIGKILL) != 0) { 559 logerr("%s: rule %s server %s: %s", __func__, 560 srv->shc_hc_rule->hcr_rule->irl_name, 561 srv->shc_sg_srv->sgs_srvID, strerror(errno)); 562 } 563 /* Should not fail... */ 564 if (waitpid(srv->shc_child_pid, NULL, 0) != srv->shc_child_pid) { 565 logdebug("%s: waitpid: rule %s server %s", __func__, 566 srv->shc_hc_rule->hcr_rule->irl_name, 567 srv->shc_sg_srv->sgs_srvID); 568 } 569 srv->shc_child_pid = 0; 570 } 571 572 /* 573 * Disable the server, either because the server is dead or because a timer 574 * cannot be started for this server. Note that this only affects the 575 * transient configuration, meaning only in memory. The persistent 576 * configuration is not affected. 577 */ 578 static void 579 ilbd_mark_server_disabled(ilbd_hc_srv_t *srv) 580 { 581 srv->shc_status = ILB_HCS_DISABLED; 582 583 /* Disable the server in kernel. */ 584 if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr, 585 srv->shc_hc_rule->hcr_rule->irl_name, 586 stat_declare_srv_dead) != ILB_STATUS_OK) { 587 logerr("%s: cannot disable server in kernel: rule %s " 588 "server %s", __func__, 589 srv->shc_hc_rule->hcr_rule->irl_name, 590 srv->shc_sg_srv->sgs_srvID); 591 } 592 } 593 594 /* 595 * A probe fails, set the state of the server. 596 */ 597 static void 598 ilbd_set_fail_state(ilbd_hc_srv_t *srv) 599 { 600 if (++srv->shc_fail_cnt < srv->shc_hc->ihc_count) { 601 /* Probe again */ 602 ilbd_hc_probe_timer(ilbd_hc_timer_q, srv); 603 return; 604 } 605 606 logdebug("%s: rule %s server %s fails %u", __func__, 607 srv->shc_hc_rule->hcr_rule->irl_name, srv->shc_sg_srv->sgs_srvID, 608 srv->shc_fail_cnt); 609 610 /* 611 * If this is a ping test, mark the server as 612 * unreachable instead of dead. 613 */ 614 if (srv->shc_hc->ihc_test_type == ILBD_HC_PING || 615 srv->shc_state == ilbd_hc_def_pinging) { 616 srv->shc_status = ILB_HCS_UNREACH; 617 } else { 618 srv->shc_status = ILB_HCS_DEAD; 619 } 620 621 /* Disable the server in kernel. */ 622 if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr, 623 srv->shc_hc_rule->hcr_rule->irl_name, stat_declare_srv_dead) != 624 ILB_STATUS_OK) { 625 logerr("%s: cannot disable server in kernel: rule %s " 626 "server %s", __func__, 627 srv->shc_hc_rule->hcr_rule->irl_name, 628 srv->shc_sg_srv->sgs_srvID); 629 } 630 631 /* Still keep probing in case the server is alive again. */ 632 if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) { 633 /* Only thing to do is to disable the server... */ 634 logerr("%s: cannot restart timer: rule %s server %s", __func__, 635 srv->shc_hc_rule->hcr_rule->irl_name, 636 srv->shc_sg_srv->sgs_srvID); 637 srv->shc_status = ILB_HCS_DISABLED; 638 } 639 } 640 641 /* 642 * A probe process has not returned for the ihc_timeout period, we should 643 * kill it. This function is the handler of this. 644 */ 645 /* ARGSUSED */ 646 static void 647 ilbd_hc_kill_timer(iu_tq_t *tq, void *arg) 648 { 649 ilbd_hc_srv_t *srv = (ilbd_hc_srv_t *)arg; 650 651 ilbd_hc_kill_probe(srv); 652 ilbd_set_fail_state(srv); 653 } 654 655 /* 656 * Probe timeout handler. Send out the appropriate probe. 657 */ 658 /* ARGSUSED */ 659 static void 660 ilbd_hc_probe_timer(iu_tq_t *tq, void *arg) 661 { 662 ilbd_hc_srv_t *srv = (ilbd_hc_srv_t *)arg; 663 664 /* 665 * If starting the probe fails, just pretend that the timeout has 666 * extended. 667 */ 668 if (!ilbd_run_probe(srv)) { 669 /* 670 * If we cannot restart the timer, the only thing we can do 671 * is to disable this server. Hopefully the sys admin will 672 * notice this and enable this server again later. 673 */ 674 if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) { 675 logerr("%s: cannot restart timer: rule %s server %s, " 676 "disabling it", __func__, 677 srv->shc_hc_rule->hcr_rule->irl_name, 678 srv->shc_sg_srv->sgs_srvID); 679 ilbd_mark_server_disabled(srv); 680 } 681 return; 682 } 683 684 /* 685 * Similar to above, if kill timer cannot be started, disable the 686 * server. 687 */ 688 if ((srv->shc_tid = iu_schedule_timer(ilbd_hc_timer_q, 689 srv->shc_hc->ihc_timeout, ilbd_hc_kill_timer, srv)) == -1) { 690 logerr("%s: cannot start kill timer: rule %s server %s, " 691 "disabling it", __func__, 692 srv->shc_hc_rule->hcr_rule->irl_name, 693 srv->shc_sg_srv->sgs_srvID); 694 ilbd_mark_server_disabled(srv); 695 } 696 hc_timer_restarted = B_TRUE; 697 } 698 699 /* Restart the periodic timer for a given server. */ 700 static ilb_status_t 701 ilbd_hc_restart_timer(ilbd_hc_t *hc, ilbd_hc_srv_t *srv) 702 { 703 int timeout; 704 705 /* Don't allow the timeout interval to be less than 1s */ 706 timeout = MAX((hc->ihc_interval >> 1) + (gethrtime() % 707 (hc->ihc_interval + 1)), 1); 708 709 /* 710 * If the probe is actually a ping probe, there is no need to 711 * do default pinging. Just skip the step. 712 */ 713 if (hc->ihc_def_ping && hc->ihc_test_type != ILBD_HC_PING) 714 srv->shc_state = ilbd_hc_def_pinging; 715 else 716 srv->shc_state = ilbd_hc_probing; 717 srv->shc_tid = iu_schedule_timer(ilbd_hc_timer_q, timeout, 718 ilbd_hc_probe_timer, srv); 719 720 if (srv->shc_tid == -1) 721 return (ILB_STATUS_TIMER); 722 srv->shc_lasttime = time(NULL); 723 srv->shc_nexttime = time(NULL) + timeout; 724 725 hc_timer_restarted = B_TRUE; 726 return (ILB_STATUS_OK); 727 } 728 729 /* Helper routine to associate a server with its hc object. */ 730 static ilb_status_t 731 ilbd_hc_srv_add(ilbd_hc_t *hc, ilbd_hc_rule_t *hc_rule, 732 const ilb_sg_srv_t *srv, int ev_port) 733 { 734 ilbd_hc_srv_t *new_srv; 735 ilb_status_t ret; 736 737 if ((new_srv = calloc(1, sizeof (ilbd_hc_srv_t))) == NULL) 738 return (ILB_STATUS_ENOMEM); 739 new_srv->shc_hc = hc; 740 new_srv->shc_hc_rule = hc_rule; 741 new_srv->shc_sg_srv = srv; 742 new_srv->shc_ev_port = ev_port; 743 new_srv->shc_tid = -1; 744 new_srv->shc_nexttime = time(NULL); 745 new_srv->shc_lasttime = new_srv->shc_nexttime; 746 747 if ((hc_rule->hcr_rule->irl_flags & ILB_FLAGS_RULE_ENABLED) && 748 ILB_IS_SRV_ENABLED(srv->sgs_flags)) { 749 new_srv->shc_status = ILB_HCS_UNINIT; 750 ret = ilbd_hc_restart_timer(hc, new_srv); 751 if (ret != ILB_STATUS_OK) { 752 free(new_srv); 753 return (ret); 754 } 755 } else { 756 new_srv->shc_status = ILB_HCS_DISABLED; 757 } 758 759 list_insert_tail(&hc_rule->hcr_servers, new_srv); 760 return (ILB_STATUS_OK); 761 } 762 763 /* Handy macro to cancel a server's timer. */ 764 #define HC_CANCEL_TIMER(srv) \ 765 { \ 766 void *arg; \ 767 int ret; \ 768 if ((srv)->shc_tid != -1) { \ 769 ret = iu_cancel_timer(ilbd_hc_timer_q, (srv)->shc_tid, &arg); \ 770 (srv)->shc_tid = -1; \ 771 assert(ret == 1); \ 772 assert(arg == (srv)); \ 773 } \ 774 hc_timer_restarted = B_TRUE; \ 775 } 776 777 /* Helper routine to dissociate a server from its hc object. */ 778 static ilb_status_t 779 ilbd_hc_srv_rem(ilbd_hc_rule_t *hc_rule, const ilb_sg_srv_t *srv) 780 { 781 ilbd_hc_srv_t *tmp_srv; 782 783 for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL; 784 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) { 785 if (tmp_srv->shc_sg_srv == srv) { 786 list_remove(&hc_rule->hcr_servers, tmp_srv); 787 HC_CANCEL_TIMER(tmp_srv); 788 if (tmp_srv->shc_child_pid != 0) 789 ilbd_hc_kill_probe(tmp_srv); 790 free(tmp_srv); 791 return (ILB_STATUS_OK); 792 } 793 } 794 return (ILB_STATUS_ENOENT); 795 } 796 797 /* Helper routine to dissociate all servers of a rule from its hc object. */ 798 static void 799 ilbd_hc_srv_rem_all(ilbd_hc_rule_t *hc_rule) 800 { 801 ilbd_hc_srv_t *srv; 802 803 while ((srv = list_remove_head(&hc_rule->hcr_servers)) != NULL) { 804 HC_CANCEL_TIMER(srv); 805 if (srv->shc_child_pid != 0) 806 ilbd_hc_kill_probe(srv); 807 free(srv); 808 } 809 } 810 811 /* Associate a rule with its hc object. */ 812 ilb_status_t 813 ilbd_hc_associate_rule(const ilbd_rule_t *rule, int ev_port) 814 { 815 ilbd_hc_t *hc; 816 ilbd_hc_rule_t *hc_rule; 817 ilb_status_t ret; 818 ilbd_sg_t *sg; 819 ilbd_srv_t *ilbd_srv; 820 821 /* The rule is assumed to be initialized appropriately. */ 822 if ((hc = ilbd_get_hc(rule->irl_hcname)) == NULL) { 823 logdebug("ilbd_hc_associate_rule: healthcheck %s does not " 824 "exist", rule->irl_hcname); 825 return (ILB_STATUS_ENOHCINFO); 826 } 827 if ((hc->ihc_test_type == ILBD_HC_TCP && 828 rule->irl_proto != IPPROTO_TCP) || 829 (hc->ihc_test_type == ILBD_HC_UDP && 830 rule->irl_proto != IPPROTO_UDP)) { 831 return (ILB_STATUS_RULE_HC_MISMATCH); 832 } 833 if ((hc_rule = calloc(1, sizeof (ilbd_hc_rule_t))) == NULL) { 834 logdebug("ilbd_hc_associate_rule: out of memory"); 835 return (ILB_STATUS_ENOMEM); 836 } 837 838 hc_rule->hcr_rule = rule; 839 list_create(&hc_rule->hcr_servers, sizeof (ilbd_hc_srv_t), 840 offsetof(ilbd_hc_srv_t, shc_srv_link)); 841 842 /* Add all the servers. */ 843 sg = rule->irl_sg; 844 for (ilbd_srv = list_head(&sg->isg_srvlist); ilbd_srv != NULL; 845 ilbd_srv = list_next(&sg->isg_srvlist, ilbd_srv)) { 846 if ((ret = ilbd_hc_srv_add(hc, hc_rule, &ilbd_srv->isv_srv, 847 ev_port)) != ILB_STATUS_OK) { 848 /* Remove all previously added servers */ 849 ilbd_hc_srv_rem_all(hc_rule); 850 list_destroy(&hc_rule->hcr_servers); 851 free(hc_rule); 852 return (ret); 853 } 854 } 855 list_insert_tail(&hc->ihc_rules, hc_rule); 856 hc->ihc_rule_cnt++; 857 858 return (ILB_STATUS_OK); 859 } 860 861 /* Dissociate a rule from its hc object. */ 862 ilb_status_t 863 ilbd_hc_dissociate_rule(const ilbd_rule_t *rule) 864 { 865 ilbd_hc_t *hc; 866 ilbd_hc_rule_t *hc_rule; 867 868 /* The rule is assumed to be initialized appropriately. */ 869 if ((hc = ilbd_get_hc(rule->irl_hcname)) == NULL) { 870 logdebug("ilbd_hc_dissociate_rule: healthcheck %s does not " 871 "exist", rule->irl_hcname); 872 return (ILB_STATUS_ENOENT); 873 } 874 for (hc_rule = list_head(&hc->ihc_rules); hc_rule != NULL; 875 hc_rule = list_next(&hc->ihc_rules, hc_rule)) { 876 if (hc_rule->hcr_rule == rule) 877 break; 878 } 879 if (hc_rule == NULL) { 880 logdebug("ilbd_hc_dissociate_rule: rule %s is not associated " 881 "with healtcheck %s", rule->irl_hcname, hc->ihc_name); 882 return (ILB_STATUS_ENOENT); 883 } 884 ilbd_hc_srv_rem_all(hc_rule); 885 list_remove(&hc->ihc_rules, hc_rule); 886 hc->ihc_rule_cnt--; 887 list_destroy(&hc_rule->hcr_servers); 888 free(hc_rule); 889 return (ILB_STATUS_OK); 890 } 891 892 /* 893 * Given a hc object name and a rule, check to see if the rule is associated 894 * with the hc object. If it is, the hc object is returned in **hc and the 895 * ilbd_hc_rule_t is returned in **hc_rule. 896 */ 897 static boolean_t 898 ilbd_hc_check_rule(const char *hc_name, const ilbd_rule_t *rule, 899 ilbd_hc_t **hc, ilbd_hc_rule_t **hc_rule) 900 { 901 ilbd_hc_t *tmp_hc; 902 ilbd_hc_rule_t *tmp_hc_rule; 903 904 if ((tmp_hc = ilbd_get_hc(hc_name)) == NULL) 905 return (B_FALSE); 906 for (tmp_hc_rule = list_head(&tmp_hc->ihc_rules); tmp_hc_rule != NULL; 907 tmp_hc_rule = list_next(&tmp_hc->ihc_rules, tmp_hc_rule)) { 908 if (tmp_hc_rule->hcr_rule == rule) { 909 *hc = tmp_hc; 910 *hc_rule = tmp_hc_rule; 911 return (B_TRUE); 912 } 913 } 914 return (B_FALSE); 915 } 916 917 /* Associate a server with its hc object. */ 918 ilb_status_t 919 ilbd_hc_add_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv, 920 int ev_port) 921 { 922 ilbd_hc_t *hc; 923 ilbd_hc_rule_t *hc_rule; 924 925 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule)) 926 return (ILB_STATUS_ENOENT); 927 return (ilbd_hc_srv_add(hc, hc_rule, srv, ev_port)); 928 } 929 930 /* Dissociate a server from its hc object. */ 931 ilb_status_t 932 ilbd_hc_del_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv) 933 { 934 ilbd_hc_t *hc; 935 ilbd_hc_rule_t *hc_rule; 936 937 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule)) 938 return (ILB_STATUS_ENOENT); 939 return (ilbd_hc_srv_rem(hc_rule, srv)); 940 } 941 942 /* Helper routine to enable/disable a server's hc probe. */ 943 static ilb_status_t 944 ilbd_hc_toggle_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv, 945 boolean_t enable) 946 { 947 ilbd_hc_t *hc; 948 ilbd_hc_rule_t *hc_rule; 949 ilbd_hc_srv_t *tmp_srv; 950 ilb_status_t ret; 951 952 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule)) 953 return (ILB_STATUS_ENOENT); 954 for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL; 955 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) { 956 if (tmp_srv->shc_sg_srv != srv) { 957 continue; 958 } 959 if (enable) { 960 if (tmp_srv->shc_status == ILB_HCS_DISABLED) { 961 ret = ilbd_hc_restart_timer(hc, tmp_srv); 962 if (ret != ILB_STATUS_OK) { 963 logerr("%s: cannot start timers for " 964 "rule %s server %s", __func__, 965 rule->irl_name, 966 tmp_srv->shc_sg_srv->sgs_srvID); 967 return (ret); 968 } 969 /* Start from fresh... */ 970 tmp_srv->shc_status = ILB_HCS_UNINIT; 971 tmp_srv->shc_rtt = 0; 972 tmp_srv->shc_fail_cnt = 0; 973 } 974 } else { 975 if (tmp_srv->shc_status != ILB_HCS_DISABLED) { 976 tmp_srv->shc_status = ILB_HCS_DISABLED; 977 HC_CANCEL_TIMER(tmp_srv); 978 if (tmp_srv->shc_child_pid != 0) 979 ilbd_hc_kill_probe(tmp_srv); 980 } 981 } 982 return (ILB_STATUS_OK); 983 } 984 return (ILB_STATUS_ENOENT); 985 } 986 987 ilb_status_t 988 ilbd_hc_enable_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv) 989 { 990 return (ilbd_hc_toggle_server(rule, srv, B_TRUE)); 991 } 992 993 ilb_status_t 994 ilbd_hc_disable_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv) 995 { 996 return (ilbd_hc_toggle_server(rule, srv, B_FALSE)); 997 } 998 999 /* 1000 * Helper routine to enable/disable a rule's hc probe (including all its 1001 * servers). 1002 */ 1003 static ilb_status_t 1004 ilbd_hc_toggle_rule(const ilbd_rule_t *rule, boolean_t enable) 1005 { 1006 ilbd_hc_t *hc; 1007 ilbd_hc_rule_t *hc_rule; 1008 ilbd_hc_srv_t *tmp_srv; 1009 int ret; 1010 1011 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule)) 1012 return (ILB_STATUS_ENOENT); 1013 1014 for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL; 1015 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) { 1016 if (enable) { 1017 /* 1018 * If the server is disabled in the rule, do not 1019 * restart its timer. 1020 */ 1021 if (tmp_srv->shc_status == ILB_HCS_DISABLED && 1022 ILB_IS_SRV_ENABLED( 1023 tmp_srv->shc_sg_srv->sgs_flags)) { 1024 ret = ilbd_hc_restart_timer(hc, tmp_srv); 1025 if (ret != ILB_STATUS_OK) { 1026 logerr("%s: cannot start timers for " 1027 "rule %s server %s", __func__, 1028 rule->irl_name, 1029 tmp_srv->shc_sg_srv->sgs_srvID); 1030 goto rollback; 1031 } else { 1032 /* Start from fresh... */ 1033 tmp_srv->shc_status = ILB_HCS_UNINIT; 1034 tmp_srv->shc_rtt = 0; 1035 tmp_srv->shc_fail_cnt = 0; 1036 } 1037 } 1038 } else { 1039 if (tmp_srv->shc_status != ILB_HCS_DISABLED) { 1040 HC_CANCEL_TIMER(tmp_srv); 1041 tmp_srv->shc_status = ILB_HCS_DISABLED; 1042 if (tmp_srv->shc_child_pid != 0) 1043 ilbd_hc_kill_probe(tmp_srv); 1044 } 1045 } 1046 } 1047 return (ILB_STATUS_OK); 1048 rollback: 1049 enable = !enable; 1050 for (tmp_srv = list_prev(&hc_rule->hcr_servers, tmp_srv); 1051 tmp_srv != NULL; 1052 tmp_srv = list_prev(&hc_rule->hcr_servers, tmp_srv)) { 1053 if (enable) { 1054 if (tmp_srv->shc_status == ILB_HCS_DISABLED && 1055 ILB_IS_SRV_ENABLED( 1056 tmp_srv->shc_sg_srv->sgs_flags)) { 1057 (void) ilbd_hc_restart_timer(hc, tmp_srv); 1058 tmp_srv->shc_status = ILB_HCS_UNINIT; 1059 tmp_srv->shc_rtt = 0; 1060 tmp_srv->shc_fail_cnt = 0; 1061 } 1062 } else { 1063 if (tmp_srv->shc_status != ILB_HCS_DISABLED) { 1064 HC_CANCEL_TIMER(tmp_srv); 1065 tmp_srv->shc_status = ILB_HCS_DISABLED; 1066 if (tmp_srv->shc_child_pid != 0) 1067 ilbd_hc_kill_probe(tmp_srv); 1068 } 1069 } 1070 } 1071 return (ret); 1072 } 1073 1074 ilb_status_t 1075 ilbd_hc_enable_rule(const ilbd_rule_t *rule) 1076 { 1077 return (ilbd_hc_toggle_rule(rule, B_TRUE)); 1078 } 1079 1080 ilb_status_t 1081 ilbd_hc_disable_rule(const ilbd_rule_t *rule) 1082 { 1083 return (ilbd_hc_toggle_rule(rule, B_FALSE)); 1084 } 1085 1086 static const char * 1087 topo_2_str(ilb_topo_t topo) 1088 { 1089 switch (topo) { 1090 case ILB_TOPO_DSR: 1091 return ("DSR"); 1092 case ILB_TOPO_NAT: 1093 return ("NAT"); 1094 case ILB_TOPO_HALF_NAT: 1095 return ("HALF_NAT"); 1096 default: 1097 /* Should not happen. */ 1098 logerr("%s: unknown topology", __func__); 1099 break; 1100 } 1101 return (""); 1102 } 1103 1104 /* 1105 * Create the argument list to be passed to a hc probe command. 1106 * The passed in argv is assumed to have HC_PROBE_ARGC elements. 1107 */ 1108 static boolean_t 1109 create_argv(ilbd_hc_srv_t *srv, char *argv[]) 1110 { 1111 char buf[INET6_ADDRSTRLEN]; 1112 ilbd_rule_t const *rule; 1113 ilb_sg_srv_t const *sg_srv; 1114 struct in_addr v4_addr; 1115 in_port_t port; 1116 int i; 1117 1118 rule = srv->shc_hc_rule->hcr_rule; 1119 sg_srv = srv->shc_sg_srv; 1120 1121 if (srv->shc_state == ilbd_hc_def_pinging) { 1122 if ((argv[0] = strdup(ILB_PROBE_PING)) == NULL) 1123 return (B_FALSE); 1124 } else { 1125 switch (srv->shc_hc->ihc_test_type) { 1126 case ILBD_HC_USER: 1127 if ((argv[0] = strdup(srv->shc_hc->ihc_test)) == NULL) 1128 return (B_FALSE); 1129 break; 1130 case ILBD_HC_TCP: 1131 case ILBD_HC_UDP: 1132 if ((argv[0] = strdup(ILB_PROBE_PROTO)) == 1133 NULL) { 1134 return (B_FALSE); 1135 } 1136 break; 1137 case ILBD_HC_PING: 1138 if ((argv[0] = strdup(ILB_PROBE_PING)) == NULL) { 1139 return (B_FALSE); 1140 } 1141 break; 1142 } 1143 } 1144 1145 /* 1146 * argv[1] is the VIP. 1147 * 1148 * Right now, the VIP and the backend server addresses should be 1149 * in the same IP address family. Here we don't do that in case 1150 * this assumption is changed in future. 1151 */ 1152 if (IN6_IS_ADDR_V4MAPPED(&rule->irl_vip)) { 1153 IN6_V4MAPPED_TO_INADDR(&rule->irl_vip, &v4_addr); 1154 if (inet_ntop(AF_INET, &v4_addr, buf, sizeof (buf)) == NULL) 1155 goto cleanup; 1156 } else { 1157 if (inet_ntop(AF_INET6, &rule->irl_vip, buf, 1158 sizeof (buf)) == NULL) { 1159 goto cleanup; 1160 } 1161 } 1162 if ((argv[1] = strdup(buf)) == NULL) 1163 goto cleanup; 1164 1165 /* 1166 * argv[2] is the backend server address. 1167 */ 1168 if (IN6_IS_ADDR_V4MAPPED(&sg_srv->sgs_addr)) { 1169 IN6_V4MAPPED_TO_INADDR(&sg_srv->sgs_addr, &v4_addr); 1170 if (inet_ntop(AF_INET, &v4_addr, buf, sizeof (buf)) == NULL) 1171 goto cleanup; 1172 } else { 1173 if (inet_ntop(AF_INET6, &sg_srv->sgs_addr, buf, 1174 sizeof (buf)) == NULL) { 1175 goto cleanup; 1176 } 1177 } 1178 if ((argv[2] = strdup(buf)) == NULL) 1179 goto cleanup; 1180 1181 /* 1182 * argv[3] is the transport protocol used in the rule. 1183 */ 1184 switch (rule->irl_proto) { 1185 case IPPROTO_TCP: 1186 argv[3] = strdup("TCP"); 1187 break; 1188 case IPPROTO_UDP: 1189 argv[3] = strdup("UDP"); 1190 break; 1191 default: 1192 logerr("%s: unknown protocol", __func__); 1193 goto cleanup; 1194 } 1195 if (argv[3] == NULL) 1196 goto cleanup; 1197 1198 /* 1199 * argv[4] is the load balance mode, DSR, NAT, HALF-NAT. 1200 */ 1201 if ((argv[4] = strdup(topo_2_str(rule->irl_topo))) == NULL) 1202 goto cleanup; 1203 1204 /* 1205 * argv[5] is the port range. Right now, there should only be 1 port. 1206 */ 1207 switch (rule->irl_hcpflag) { 1208 case ILB_HCI_PROBE_FIX: 1209 port = ntohs(rule->irl_hcport); 1210 break; 1211 case ILB_HCI_PROBE_ANY: { 1212 in_port_t min, max; 1213 1214 if (ntohs(sg_srv->sgs_minport) == 0) { 1215 min = ntohs(rule->irl_minport); 1216 max = ntohs(rule->irl_maxport); 1217 } else { 1218 min = ntohs(sg_srv->sgs_minport); 1219 max = ntohs(sg_srv->sgs_maxport); 1220 } 1221 if (max > min) 1222 port = min + gethrtime() % (max - min + 1); 1223 else 1224 port = min; 1225 break; 1226 } 1227 default: 1228 logerr("%s: unknown HC flag", __func__); 1229 goto cleanup; 1230 } 1231 (void) sprintf(buf, "%d", port); 1232 if ((argv[5] = strdup(buf)) == NULL) 1233 goto cleanup; 1234 1235 /* 1236 * argv[6] is the probe timeout. 1237 */ 1238 (void) sprintf(buf, "%d", srv->shc_hc->ihc_timeout); 1239 if ((argv[6] = strdup(buf)) == NULL) 1240 goto cleanup; 1241 1242 argv[7] = NULL; 1243 return (B_TRUE); 1244 1245 cleanup: 1246 for (i = 0; i < HC_PROBE_ARGC; i++) { 1247 if (argv[i] != NULL) 1248 free(argv[i]); 1249 } 1250 return (B_FALSE); 1251 } 1252 1253 static void 1254 destroy_argv(char *argv[]) 1255 { 1256 int i; 1257 1258 for (i = 0; argv[i] != NULL; i++) 1259 free(argv[i]); 1260 } 1261 1262 /* Spawn a process to run the hc probe on the given server. */ 1263 static boolean_t 1264 ilbd_run_probe(ilbd_hc_srv_t *srv) 1265 { 1266 posix_spawn_file_actions_t fd_actions; 1267 boolean_t init_fd_actions = B_FALSE; 1268 posix_spawnattr_t attr; 1269 boolean_t init_attr = B_FALSE; 1270 sigset_t child_sigset; 1271 int fds[2]; 1272 int fdflags; 1273 pid_t pid; 1274 char *child_argv[HC_PROBE_ARGC]; 1275 ilbd_hc_probe_event_t *probe_ev; 1276 char *probe_name; 1277 1278 bzero(child_argv, HC_PROBE_ARGC * sizeof (char *)); 1279 if ((probe_ev = calloc(1, sizeof (*probe_ev))) == NULL) { 1280 logdebug("ilbd_run_probe: calloc"); 1281 return (B_FALSE); 1282 } 1283 1284 /* Set up a pipe to get output from probe command. */ 1285 if (pipe(fds) < 0) { 1286 logdebug("ilbd_run_probe: cannot create pipe"); 1287 free(probe_ev); 1288 return (B_FALSE); 1289 } 1290 /* Set our side of the pipe to be non-blocking */ 1291 if ((fdflags = fcntl(fds[0], F_GETFL, 0)) == -1) { 1292 logdebug("ilbd_run_probe: fcntl(F_GETFL)"); 1293 goto cleanup; 1294 } 1295 if (fcntl(fds[0], F_SETFL, fdflags | O_NONBLOCK) == -1) { 1296 logdebug("ilbd_run_probe: fcntl(F_SETFL)"); 1297 goto cleanup; 1298 } 1299 1300 if (posix_spawn_file_actions_init(&fd_actions) != 0) { 1301 logdebug("ilbd_run_probe: posix_spawn_file_actions_init"); 1302 goto cleanup; 1303 } 1304 init_fd_actions = B_TRUE; 1305 if (posix_spawnattr_init(&attr) != 0) { 1306 logdebug("ilbd_run_probe: posix_spawnattr_init"); 1307 goto cleanup; 1308 } 1309 init_attr = B_TRUE; 1310 if (posix_spawn_file_actions_addclose(&fd_actions, fds[0]) != 0) { 1311 logdebug("ilbd_run_probe: posix_spawn_file_actions_addclose"); 1312 goto cleanup; 1313 } 1314 if (posix_spawn_file_actions_adddup2(&fd_actions, fds[1], 1315 STDOUT_FILENO) != 0) { 1316 logdebug("ilbd_run_probe: posix_spawn_file_actions_dup2"); 1317 goto cleanup; 1318 } 1319 if (posix_spawn_file_actions_addclose(&fd_actions, fds[1]) != 0) { 1320 logdebug("ilbd_run_probe: posix_spawn_file_actions_addclose"); 1321 goto cleanup; 1322 } 1323 1324 /* Reset all signal handling of the child to default. */ 1325 (void) sigfillset(&child_sigset); 1326 if (posix_spawnattr_setsigdefault(&attr, &child_sigset) != 0) { 1327 logdebug("ilbd_run_probe: posix_spawnattr_setsigdefault"); 1328 goto cleanup; 1329 } 1330 /* Don't want SIGCHLD. */ 1331 if (posix_spawnattr_setflags(&attr, POSIX_SPAWN_NOSIGCHLD_NP| 1332 POSIX_SPAWN_SETSIGDEF) != 0) { 1333 logdebug("ilbd_run_probe: posix_spawnattr_setflags"); 1334 goto cleanup; 1335 } 1336 1337 if (!create_argv(srv, child_argv)) { 1338 logdebug("ilbd_run_probe: create_argv"); 1339 goto cleanup; 1340 } 1341 1342 /* 1343 * If we are doing default pinging or not using a user supplied 1344 * probe, we should execute our standard supplied probe. The 1345 * supplied probe command handles all types of probes. And the 1346 * type used depends on argv[0], as filled in by create_argv(). 1347 */ 1348 if (srv->shc_state == ilbd_hc_def_pinging || 1349 srv->shc_hc->ihc_test_type != ILBD_HC_USER) { 1350 probe_name = ILB_PROBE_PROTO; 1351 } else { 1352 probe_name = srv->shc_hc->ihc_test; 1353 } 1354 if (posix_spawn(&pid, probe_name, &fd_actions, &attr, child_argv, 1355 NULL) != 0) { 1356 logerr("%s: posix_spawn: %s for server %s: %s", __func__, 1357 srv->shc_hc->ihc_test, srv->shc_sg_srv->sgs_srvID, 1358 strerror(errno)); 1359 goto cleanup; 1360 } 1361 1362 (void) close(fds[1]); 1363 srv->shc_child_pid = pid; 1364 srv->shc_child_fd = fds[0]; 1365 srv->shc_ev = probe_ev; 1366 1367 probe_ev->ihp_ev = ILBD_EVENT_PROBE; 1368 probe_ev->ihp_srv = srv; 1369 probe_ev->ihp_pid = pid; 1370 if (port_associate(srv->shc_ev_port, PORT_SOURCE_FD, fds[0], 1371 POLLRDNORM, probe_ev) != 0) { 1372 /* 1373 * Need to kill the child. It will free the srv->shc_ev, 1374 * which is probe_ev. So set probe_ev to NULL. 1375 */ 1376 ilbd_hc_kill_probe(srv); 1377 probe_ev = NULL; 1378 goto cleanup; 1379 } 1380 1381 destroy_argv(child_argv); 1382 (void) posix_spawn_file_actions_destroy(&fd_actions); 1383 (void) posix_spawnattr_destroy(&attr); 1384 return (B_TRUE); 1385 1386 cleanup: 1387 destroy_argv(child_argv); 1388 if (init_fd_actions == B_TRUE) 1389 (void) posix_spawn_file_actions_destroy(&fd_actions); 1390 if (init_attr == B_TRUE) 1391 (void) posix_spawnattr_destroy(&attr); 1392 (void) close(fds[0]); 1393 (void) close(fds[1]); 1394 if (probe_ev != NULL) 1395 free(probe_ev); 1396 return (B_FALSE); 1397 } 1398 1399 /* 1400 * Called by ild_hc_probe_return() to re-associate the fd to a child to 1401 * the event port. 1402 */ 1403 static void 1404 reassociate_port(int ev_port, int fd, ilbd_hc_probe_event_t *ev) 1405 { 1406 if (port_associate(ev_port, PORT_SOURCE_FD, fd, 1407 POLLRDNORM, ev) != 0) { 1408 /* 1409 * If we cannot reassociate with the port, the only 1410 * thing we can do now is to kill the child and 1411 * do a blocking wait here... 1412 */ 1413 logdebug("%s: port_associate: %s", __func__, strerror(errno)); 1414 if (kill(ev->ihp_pid, SIGKILL) != 0) 1415 logerr("%s: kill: %s", __func__, strerror(errno)); 1416 if (waitpid(ev->ihp_pid, NULL, 0) != ev->ihp_pid) 1417 logdebug("%s: waitpid: %s", __func__, strerror(errno)); 1418 free(ev); 1419 } 1420 } 1421 1422 /* 1423 * To handle a child probe process hanging up. 1424 */ 1425 static void 1426 ilbd_hc_child_hup(int ev_port, int fd, ilbd_hc_probe_event_t *ev) 1427 { 1428 ilbd_hc_srv_t *srv; 1429 pid_t ret_pid; 1430 int ret; 1431 1432 srv = ev->ihp_srv; 1433 1434 if (!ev->ihp_done) { 1435 /* ilbd does not care about this process anymore ... */ 1436 ev->ihp_done = B_TRUE; 1437 srv->shc_ev = NULL; 1438 srv->shc_child_pid = 0; 1439 HC_CANCEL_TIMER(srv); 1440 ilbd_set_fail_state(srv); 1441 } 1442 ret_pid = waitpid(ev->ihp_pid, &ret, WNOHANG); 1443 switch (ret_pid) { 1444 case -1: 1445 logperror("ilbd_hc_child_hup: waitpid"); 1446 /* FALLTHROUGH */ 1447 case 0: 1448 /* The child has not completed the exit. Wait again. */ 1449 reassociate_port(ev_port, fd, ev); 1450 break; 1451 default: 1452 /* Right now, we just ignore the exit status. */ 1453 if (WIFEXITED(ret)) 1454 ret = WEXITSTATUS(ret); 1455 (void) close(fd); 1456 free(ev); 1457 } 1458 } 1459 1460 /* 1461 * To read the output of a child probe process. 1462 */ 1463 static void 1464 ilbd_hc_child_data(int fd, ilbd_hc_probe_event_t *ev) 1465 { 1466 ilbd_hc_srv_t *srv; 1467 char buf[HC_MAX_PROBE_OUTPUT]; 1468 int ret; 1469 int64_t rtt; 1470 1471 srv = ev->ihp_srv; 1472 1473 bzero(buf, HC_MAX_PROBE_OUTPUT); 1474 ret = read(fd, buf, HC_MAX_PROBE_OUTPUT - 1); 1475 /* Should not happen since event port should have caught this. */ 1476 assert(ret > 0); 1477 1478 /* 1479 * We expect the probe command to print out the RTT only. But 1480 * the command may misbehave and print out more than what we intend to 1481 * read in. So need to do this check below to "flush" out all the 1482 * output from the command. 1483 */ 1484 if (!ev->ihp_done) { 1485 ev->ihp_done = B_TRUE; 1486 /* We don't need to know about this event anymore. */ 1487 srv->shc_ev = NULL; 1488 srv->shc_child_pid = 0; 1489 HC_CANCEL_TIMER(srv); 1490 } else { 1491 return; 1492 } 1493 1494 rtt = strtoll(buf, NULL, 10); 1495 1496 /* 1497 * -1 means the server is dead or the probe somehow fails. Treat 1498 * them both as server is dead. 1499 */ 1500 if (rtt == -1) { 1501 ilbd_set_fail_state(srv); 1502 return; 1503 } else if (rtt > 0) { 1504 /* If the returned RTT value is not valid, just ignore it. */ 1505 if (rtt > 0 && rtt <= UINT_MAX) { 1506 /* Set rtt to be the simple smoothed average. */ 1507 if (srv->shc_rtt == 0) { 1508 srv->shc_rtt = rtt; 1509 } else { 1510 srv->shc_rtt = 3 * ((srv)->shc_rtt >> 2) + 1511 (rtt >> 2); 1512 } 1513 } 1514 1515 } 1516 1517 switch (srv->shc_state) { 1518 case ilbd_hc_def_pinging: 1519 srv->shc_state = ilbd_hc_probing; 1520 1521 /* Ping is OK, now start the probe. */ 1522 ilbd_hc_probe_timer(ilbd_hc_timer_q, srv); 1523 break; 1524 case ilbd_hc_probing: 1525 srv->shc_fail_cnt = 0; 1526 1527 /* Server is dead before, re-enable it. */ 1528 if (srv->shc_status == ILB_HCS_UNREACH || 1529 srv->shc_status == ILB_HCS_DEAD) { 1530 /* 1531 * If enabling the server in kernel fails now, 1532 * hopefully when the timer fires again later, the 1533 * enabling can be done. 1534 */ 1535 if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr, 1536 srv->shc_hc_rule->hcr_rule->irl_name, 1537 stat_declare_srv_alive) != ILB_STATUS_OK) { 1538 logerr("%s: cannot enable server in kernel: " 1539 " rule %s server %s", __func__, 1540 srv->shc_hc_rule->hcr_rule->irl_name, 1541 srv->shc_sg_srv->sgs_srvID); 1542 } else { 1543 srv->shc_status = ILB_HCS_ALIVE; 1544 } 1545 } else { 1546 srv->shc_status = ILB_HCS_ALIVE; 1547 } 1548 if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) { 1549 logerr("%s: cannot restart timer: rule %s server %s", 1550 __func__, srv->shc_hc_rule->hcr_rule->irl_name, 1551 srv->shc_sg_srv->sgs_srvID); 1552 ilbd_mark_server_disabled(srv); 1553 } 1554 break; 1555 default: 1556 logdebug("%s: unknown state", __func__); 1557 break; 1558 } 1559 } 1560 1561 /* 1562 * Handle the return event of a child probe fd. 1563 */ 1564 void 1565 ilbd_hc_probe_return(int ev_port, int fd, int port_events, 1566 ilbd_hc_probe_event_t *ev) 1567 { 1568 /* 1569 * Note that there can be more than one events delivered to us at 1570 * the same time. So we need to check them individually. 1571 */ 1572 if (port_events & POLLRDNORM) 1573 ilbd_hc_child_data(fd, ev); 1574 1575 if (port_events & (POLLHUP|POLLERR)) { 1576 ilbd_hc_child_hup(ev_port, fd, ev); 1577 return; 1578 } 1579 1580 /* 1581 * Re-associate the fd with the port so that when the child 1582 * exits, we can reap the status. 1583 */ 1584 reassociate_port(ev_port, fd, ev); 1585 } 1586