1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/socket.h> 29 #include <sys/list.h> 30 #include <sys/stropts.h> 31 #include <sys/siginfo.h> 32 #include <sys/wait.h> 33 #include <arpa/inet.h> 34 #include <netinet/in.h> 35 #include <stdlib.h> 36 #include <stdio.h> 37 #include <strings.h> 38 #include <stddef.h> 39 #include <unistd.h> 40 #include <libilb.h> 41 #include <port.h> 42 #include <time.h> 43 #include <signal.h> 44 #include <assert.h> 45 #include <errno.h> 46 #include <spawn.h> 47 #include <fcntl.h> 48 #include <limits.h> 49 #include "libilb_impl.h" 50 #include "ilbd.h" 51 52 /* Global list of HC objects */ 53 list_t ilbd_hc_list; 54 55 /* Timer queue for all hc related timers. */ 56 static iu_tq_t *ilbd_hc_timer_q; 57 58 /* Indicate whether the timer needs to be updated */ 59 static boolean_t hc_timer_restarted; 60 61 static void ilbd_hc_probe_timer(iu_tq_t *, void *); 62 static ilb_status_t ilbd_hc_restart_timer(ilbd_hc_t *, ilbd_hc_srv_t *); 63 static boolean_t ilbd_run_probe(ilbd_hc_srv_t *); 64 65 #define MAX(a, b) ((a) > (b) ? (a) : (b)) 66 67 /* 68 * Number of arguments passed to a probe. argc[0] is the path name of 69 * the probe. 70 */ 71 #define HC_PROBE_ARGC 8 72 73 /* 74 * Max number of characters to be read from the output of a probe. It 75 * is long enough to read in a 64 bit integer. 76 */ 77 #define HC_MAX_PROBE_OUTPUT 24 78 79 void 80 i_ilbd_setup_hc_list(void) 81 { 82 list_create(&ilbd_hc_list, sizeof (ilbd_hc_t), 83 offsetof(ilbd_hc_t, ihc_link)); 84 } 85 86 /* 87 * Given a hc object name, return a pointer to hc object if found. 88 */ 89 ilbd_hc_t * 90 ilbd_get_hc(const char *name) 91 { 92 ilbd_hc_t *hc; 93 94 for (hc = list_head(&ilbd_hc_list); hc != NULL; 95 hc = list_next(&ilbd_hc_list, hc)) { 96 if (strcasecmp(hc->ihc_name, name) == 0) 97 return (hc); 98 } 99 return (NULL); 100 } 101 102 /* 103 * Generates an audit record for create-healthcheck, 104 * delete-healtcheck subcommands. 105 */ 106 static void 107 ilbd_audit_hc_event(const char *audit_hcname, 108 const ilb_hc_info_t *audit_hcinfo, ilbd_cmd_t cmd, 109 ilb_status_t rc, ucred_t *ucredp) 110 { 111 adt_session_data_t *ah; 112 adt_event_data_t *event; 113 au_event_t flag; 114 int audit_error; 115 116 if ((ucredp == NULL) && (cmd == ILBD_CREATE_HC)) { 117 /* 118 * we came here from the path where ilbd incorporates 119 * the configuration that is listed in SCF: 120 * i_ilbd_read_config->ilbd_walk_hc_pgs-> 121 * ->ilbd_scf_instance_walk_pg->ilbd_create_hc 122 * We skip auditing in that case 123 */ 124 logdebug("ilbd_audit_hc_event: skipping auditing"); 125 return; 126 } 127 128 if (adt_start_session(&ah, NULL, 0) != 0) { 129 logerr("ilbd_audit_hc_event: adt_start_session failed"); 130 exit(EXIT_FAILURE); 131 } 132 if (adt_set_from_ucred(ah, ucredp, ADT_NEW) != 0) { 133 (void) adt_end_session(ah); 134 logerr("ilbd_audit_rule_event: adt_set_from_ucred failed"); 135 exit(EXIT_FAILURE); 136 } 137 if (cmd == ILBD_CREATE_HC) 138 flag = ADT_ilb_create_healthcheck; 139 else if (cmd == ILBD_DESTROY_HC) 140 flag = ADT_ilb_delete_healthcheck; 141 142 if ((event = adt_alloc_event(ah, flag)) == NULL) { 143 logerr("ilbd_audit_hc_event: adt_alloc_event failed"); 144 exit(EXIT_FAILURE); 145 } 146 (void) memset((char *)event, 0, sizeof (adt_event_data_t)); 147 148 switch (cmd) { 149 case ILBD_CREATE_HC: 150 event->adt_ilb_create_healthcheck.auth_used = 151 NET_ILB_CONFIG_AUTH; 152 event->adt_ilb_create_healthcheck.hc_test = 153 (char *)audit_hcinfo->hci_test; 154 event->adt_ilb_create_healthcheck.hc_name = 155 (char *)audit_hcinfo->hci_name; 156 157 /* 158 * If the value 0 is stored, the default values are 159 * set in the kernel. User land does not know about them 160 * So if the user does not specify them, audit record 161 * will show them as 0 162 */ 163 event->adt_ilb_create_healthcheck.hc_timeout = 164 audit_hcinfo->hci_timeout; 165 event->adt_ilb_create_healthcheck.hc_count = 166 audit_hcinfo->hci_count; 167 event->adt_ilb_create_healthcheck.hc_interval = 168 audit_hcinfo->hci_interval; 169 break; 170 case ILBD_DESTROY_HC: 171 event->adt_ilb_delete_healthcheck.auth_used = 172 NET_ILB_CONFIG_AUTH; 173 event->adt_ilb_delete_healthcheck.hc_name = 174 (char *)audit_hcname; 175 break; 176 } 177 178 /* Fill in success/failure */ 179 if (rc == ILB_STATUS_OK) { 180 if (adt_put_event(event, ADT_SUCCESS, ADT_SUCCESS) != 0) { 181 logerr("ilbd_audit_hc_event: adt_put_event failed"); 182 exit(EXIT_FAILURE); 183 } 184 } else { 185 audit_error = ilberror2auditerror(rc); 186 if (adt_put_event(event, ADT_FAILURE, audit_error) != 0) { 187 logerr("ilbd_audit_hc_event: adt_put_event failed"); 188 exit(EXIT_FAILURE); 189 } 190 } 191 adt_free_event(event); 192 (void) adt_end_session(ah); 193 } 194 195 /* 196 * Given the ilb_hc_info_t passed in (from the libilb), create a hc object 197 * in ilbd. The parameter ev_port is not used, refer to comments of 198 * ilbd_create_sg() in ilbd_sg.c 199 */ 200 /* ARGSUSED */ 201 ilb_status_t 202 ilbd_create_hc(const ilb_hc_info_t *hc_info, int ev_port, 203 const struct passwd *ps, ucred_t *ucredp) 204 { 205 ilbd_hc_t *hc; 206 ilb_status_t ret = ILB_STATUS_OK; 207 208 /* 209 * ps == NULL is from the daemon when it starts and load configuration 210 * ps != NULL is from client. 211 */ 212 if (ps != NULL) { 213 ret = ilbd_check_client_config_auth(ps); 214 if (ret != ILB_STATUS_OK) { 215 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, 216 ret, ucredp); 217 return (ret); 218 } 219 } 220 221 if (hc_info->hci_name[0] == '\0') { 222 logdebug("ilbd_create_hc: missing healthcheck info"); 223 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, 224 ILB_STATUS_ENOHCINFO, ucredp); 225 return (ILB_STATUS_ENOHCINFO); 226 } 227 228 hc = ilbd_get_hc(hc_info->hci_name); 229 if (hc != NULL) { 230 logdebug("ilbd_create_hc: healthcheck name %s already" 231 " exists", hc_info->hci_name); 232 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, 233 ILB_STATUS_EEXIST, ucredp); 234 return (ILB_STATUS_EEXIST); 235 } 236 237 /* 238 * Sanity check on user supplied probe. The given path name 239 * must be a full path name (starts with '/') and is 240 * executable. 241 */ 242 if (strcasecmp(hc_info->hci_test, ILB_HC_STR_TCP) != 0 && 243 strcasecmp(hc_info->hci_test, ILB_HC_STR_UDP) != 0 && 244 strcasecmp(hc_info->hci_test, ILB_HC_STR_PING) != 0 && 245 (hc_info->hci_test[0] != '/' || 246 access(hc_info->hci_test, X_OK) == -1)) { 247 if (errno == ENOENT) { 248 logdebug("ilbd_create_hc: user script %s doesn't " 249 "exist", hc_info->hci_test); 250 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, 251 ILB_STATUS_ENOENT, ucredp); 252 return (ILB_STATUS_ENOENT); 253 } else { 254 logdebug("ilbd_create_hc: user script %s is " 255 "invalid", hc_info->hci_test); 256 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, 257 ILB_STATUS_EINVAL, ucredp); 258 return (ILB_STATUS_EINVAL); 259 } 260 } 261 262 /* Create and add the hc object */ 263 hc = calloc(1, sizeof (ilbd_hc_t)); 264 if (hc == NULL) { 265 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, 266 ILB_STATUS_ENOMEM, ucredp); 267 return (ILB_STATUS_ENOMEM); 268 } 269 (void) memcpy(&hc->ihc_info, hc_info, sizeof (ilb_hc_info_t)); 270 if (strcasecmp(hc->ihc_test, ILB_HC_STR_TCP) == 0) 271 hc->ihc_test_type = ILBD_HC_TCP; 272 else if (strcasecmp(hc->ihc_test, ILB_HC_STR_UDP) == 0) 273 hc->ihc_test_type = ILBD_HC_UDP; 274 else if (strcasecmp(hc->ihc_test, ILB_HC_STR_PING) == 0) 275 hc->ihc_test_type = ILBD_HC_PING; 276 else 277 hc->ihc_test_type = ILBD_HC_USER; 278 list_create(&hc->ihc_rules, sizeof (ilbd_hc_rule_t), 279 offsetof(ilbd_hc_rule_t, hcr_link)); 280 281 /* Update SCF */ 282 if (ps != NULL) { 283 if ((ret = ilbd_create_pg(ILBD_SCF_HC, (void *)hc)) != 284 ILB_STATUS_OK) { 285 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, 286 ret, ucredp); 287 free(hc); 288 return (ret); 289 } 290 } 291 292 /* Everything is fine, now add it to the global list. */ 293 list_insert_tail(&ilbd_hc_list, hc); 294 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, ret, ucredp); 295 return (ret); 296 } 297 298 /* 299 * Given a name of a hc object, destroy it. 300 */ 301 ilb_status_t 302 ilbd_destroy_hc(const char *hc_name, const struct passwd *ps, 303 ucred_t *ucredp) 304 { 305 ilb_status_t ret; 306 ilbd_hc_t *hc; 307 308 /* 309 * No need to check ps == NULL, daemon won't call any destroy func 310 * at start up. 311 */ 312 ret = ilbd_check_client_config_auth(ps); 313 if (ret != ILB_STATUS_OK) { 314 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, 315 ret, ucredp); 316 return (ret); 317 } 318 319 hc = ilbd_get_hc(hc_name); 320 if (hc == NULL) { 321 logdebug("ilbd_destroy_hc: healthcheck %s does not exist", 322 hc_name); 323 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, 324 ILB_STATUS_ENOENT, ucredp); 325 return (ILB_STATUS_ENOENT); 326 } 327 328 /* If hc is in use, cannot delete it */ 329 if (hc->ihc_rule_cnt > 0) { 330 logdebug("ilbd_destroy_hc: healthcheck %s is associated" 331 " with a rule - cannot remove", hc_name); 332 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, 333 ILB_STATUS_INUSE, ucredp); 334 return (ILB_STATUS_INUSE); 335 } 336 337 if ((ret = ilbd_destroy_pg(ILBD_SCF_HC, hc_name)) != 338 ILB_STATUS_OK) { 339 logdebug("ilbd_destroy_hc: cannot destroy healthcheck %s " 340 "property group", hc_name); 341 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, 342 ret, ucredp); 343 return (ret); 344 } 345 346 list_remove(&ilbd_hc_list, hc); 347 free(hc); 348 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, ret, ucredp); 349 return (ret); 350 } 351 352 /* 353 * Given a hc object name, return its information. Used by libilb to 354 * get hc info. 355 */ 356 ilb_status_t 357 ilbd_get_hc_info(const char *hc_name, uint32_t *rbuf, size_t *rbufsz) 358 { 359 ilbd_hc_t *hc; 360 ilb_hc_info_t *hc_info; 361 ilb_comm_t *ic = (ilb_comm_t *)rbuf; 362 363 hc = ilbd_get_hc(hc_name); 364 if (hc == NULL) { 365 logdebug("%s: healthcheck %s does not exist", __func__, 366 hc_name); 367 return (ILB_STATUS_ENOENT); 368 } 369 ilbd_reply_ok(rbuf, rbufsz); 370 hc_info = (ilb_hc_info_t *)&ic->ic_data; 371 372 (void) strlcpy(hc_info->hci_name, hc->ihc_name, sizeof (hc->ihc_name)); 373 (void) strlcpy(hc_info->hci_test, hc->ihc_test, sizeof (hc->ihc_test)); 374 hc_info->hci_timeout = hc->ihc_timeout; 375 hc_info->hci_count = hc->ihc_count; 376 hc_info->hci_interval = hc->ihc_interval; 377 hc_info->hci_def_ping = hc->ihc_def_ping; 378 379 *rbufsz += sizeof (ilb_hc_info_t); 380 381 return (ILB_STATUS_OK); 382 } 383 384 static void 385 ilbd_hc_copy_srvs(uint32_t *rbuf, size_t *rbufsz, ilbd_hc_rule_t *hc_rule, 386 const char *rulename) 387 { 388 ilbd_hc_srv_t *tmp_srv; 389 ilb_hc_srv_t *dst_srv; 390 ilb_hc_rule_srv_t *srvs; 391 size_t tmp_rbufsz; 392 int i; 393 394 tmp_rbufsz = *rbufsz; 395 /* Set up the reply buffer. rbufsz will be set to the new size. */ 396 ilbd_reply_ok(rbuf, rbufsz); 397 398 /* Calculate how much space is left for holding server info. */ 399 *rbufsz += sizeof (ilb_hc_rule_srv_t); 400 tmp_rbufsz -= *rbufsz; 401 402 srvs = (ilb_hc_rule_srv_t *)&((ilb_comm_t *)rbuf)->ic_data; 403 404 tmp_srv = list_head(&hc_rule->hcr_servers); 405 for (i = 0; tmp_srv != NULL && tmp_rbufsz >= sizeof (*dst_srv); i++) { 406 dst_srv = &srvs->rs_srvs[i]; 407 408 (void) strlcpy(dst_srv->hcs_rule_name, rulename, ILB_NAMESZ); 409 (void) strlcpy(dst_srv->hcs_ID, tmp_srv->shc_sg_srv->sgs_srvID, 410 ILB_NAMESZ); 411 (void) strlcpy(dst_srv->hcs_hc_name, 412 tmp_srv->shc_hc->ihc_name, ILB_NAMESZ); 413 dst_srv->hcs_IP = tmp_srv->shc_sg_srv->sgs_addr; 414 dst_srv->hcs_fail_cnt = tmp_srv->shc_fail_cnt; 415 dst_srv->hcs_status = tmp_srv->shc_status; 416 dst_srv->hcs_rtt = tmp_srv->shc_rtt; 417 dst_srv->hcs_lasttime = tmp_srv->shc_lasttime; 418 dst_srv->hcs_nexttime = tmp_srv->shc_nexttime; 419 420 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv); 421 tmp_rbufsz -= sizeof (*dst_srv); 422 } 423 srvs->rs_num_srvs = i; 424 *rbufsz += i * sizeof (*dst_srv); 425 } 426 427 /* 428 * Given a rule name, return the hc status of its servers. 429 */ 430 ilb_status_t 431 ilbd_get_hc_srvs(const char *rulename, uint32_t *rbuf, size_t *rbufsz) 432 { 433 ilbd_hc_t *hc; 434 ilbd_hc_rule_t *hc_rule; 435 436 for (hc = list_head(&ilbd_hc_list); hc != NULL; 437 hc = list_next(&ilbd_hc_list, hc)) { 438 for (hc_rule = list_head(&hc->ihc_rules); hc_rule != NULL; 439 hc_rule = list_next(&hc->ihc_rules, hc_rule)) { 440 if (strcasecmp(hc_rule->hcr_rule->irl_name, 441 rulename) != 0) { 442 continue; 443 } 444 ilbd_hc_copy_srvs(rbuf, rbufsz, hc_rule, rulename); 445 return (ILB_STATUS_OK); 446 } 447 } 448 return (ILB_STATUS_RULE_NO_HC); 449 } 450 451 /* 452 * Initialize the hc timer and associate the notification of timeout to 453 * the given event port. 454 */ 455 void 456 ilbd_hc_timer_init(int ev_port, ilbd_timer_event_obj_t *ev_obj) 457 { 458 struct sigevent sigev; 459 port_notify_t notify; 460 461 if ((ilbd_hc_timer_q = iu_tq_create()) == NULL) { 462 logerr("%s: cannot create hc timer queue", __func__); 463 exit(EXIT_FAILURE); 464 } 465 hc_timer_restarted = B_FALSE; 466 467 ev_obj->ev = ILBD_EVENT_TIMER; 468 ev_obj->timerid = -1; 469 470 notify.portnfy_port = ev_port; 471 notify.portnfy_user = ev_obj; 472 sigev.sigev_notify = SIGEV_PORT; 473 sigev.sigev_value.sival_ptr = ¬ify; 474 if (timer_create(CLOCK_REALTIME, &sigev, &ev_obj->timerid) == -1) { 475 logerr("%s: cannot create timer", __func__); 476 exit(EXIT_FAILURE); 477 } 478 } 479 480 /* 481 * HC timeout handler. 482 */ 483 void 484 ilbd_hc_timeout(void) 485 { 486 (void) iu_expire_timers(ilbd_hc_timer_q); 487 hc_timer_restarted = B_TRUE; 488 } 489 490 /* 491 * Set up the timer to fire at the earliest timeout. 492 */ 493 void 494 ilbd_hc_timer_update(ilbd_timer_event_obj_t *ev_obj) 495 { 496 itimerspec_t itimeout; 497 int timeout; 498 499 /* 500 * There is no change on the timer list, so no need to set up the 501 * timer again. 502 */ 503 if (!hc_timer_restarted) 504 return; 505 506 restart: 507 if ((timeout = iu_earliest_timer(ilbd_hc_timer_q)) == INFTIM) { 508 hc_timer_restarted = B_FALSE; 509 return; 510 } else if (timeout == 0) { 511 /* 512 * Handle the timeout immediately. After that (clearing all 513 * the expired timers), check to see if there are still 514 * timers running. If yes, start them. 515 */ 516 (void) iu_expire_timers(ilbd_hc_timer_q); 517 goto restart; 518 } 519 520 itimeout.it_value.tv_sec = timeout / MILLISEC + 1; 521 itimeout.it_value.tv_nsec = 0; 522 itimeout.it_interval.tv_sec = 0; 523 itimeout.it_interval.tv_nsec = 0; 524 525 /* 526 * Failure to set a timeout is "OK" since hopefully there will be 527 * other events and timer_settime() will be called again. So 528 * we will only miss some timeouts. But in the worst case, no event 529 * will happen and ilbd will get stuck... 530 */ 531 if (timer_settime(ev_obj->timerid, 0, &itimeout, NULL) == -1) 532 logerr("%s: cannot set timer", __func__); 533 hc_timer_restarted = B_FALSE; 534 } 535 536 /* 537 * Kill the probe process of a server. 538 */ 539 static void 540 ilbd_hc_kill_probe(ilbd_hc_srv_t *srv) 541 { 542 /* 543 * First dissociate the fd from the event port. It should not 544 * fail. 545 */ 546 if (port_dissociate(srv->shc_ev_port, PORT_SOURCE_FD, 547 srv->shc_child_fd) != 0) { 548 logdebug("%s: port_dissociate: %s", __func__, strerror(errno)); 549 } 550 (void) close(srv->shc_child_fd); 551 free(srv->shc_ev); 552 srv->shc_ev = NULL; 553 554 /* Then kill the probe process. */ 555 if (kill(srv->shc_child_pid, SIGKILL) != 0) { 556 logerr("%s: rule %s server %s: %s", __func__, 557 srv->shc_hc_rule->hcr_rule->irl_name, 558 srv->shc_sg_srv->sgs_srvID, strerror(errno)); 559 } 560 /* Should not fail... */ 561 if (waitpid(srv->shc_child_pid, NULL, 0) != srv->shc_child_pid) { 562 logdebug("%s: waitpid: rule %s server %s", __func__, 563 srv->shc_hc_rule->hcr_rule->irl_name, 564 srv->shc_sg_srv->sgs_srvID); 565 } 566 srv->shc_child_pid = 0; 567 } 568 569 /* 570 * Disable the server, either because the server is dead or because a timer 571 * cannot be started for this server. Note that this only affects the 572 * transient configuration, meaning only in memory. The persistent 573 * configuration is not affected. 574 */ 575 static void 576 ilbd_mark_server_disabled(ilbd_hc_srv_t *srv) 577 { 578 srv->shc_status = ILB_HCS_DISABLED; 579 580 /* Disable the server in kernel. */ 581 if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr, 582 srv->shc_hc_rule->hcr_rule->irl_name, 583 stat_declare_srv_dead) != ILB_STATUS_OK) { 584 logerr("%s: cannot disable server in kernel: rule %s " 585 "server %s", __func__, 586 srv->shc_hc_rule->hcr_rule->irl_name, 587 srv->shc_sg_srv->sgs_srvID); 588 } 589 } 590 591 /* 592 * A probe fails, set the state of the server. 593 */ 594 static void 595 ilbd_set_fail_state(ilbd_hc_srv_t *srv) 596 { 597 if (++srv->shc_fail_cnt < srv->shc_hc->ihc_count) { 598 /* Probe again */ 599 ilbd_hc_probe_timer(ilbd_hc_timer_q, srv); 600 return; 601 } 602 603 logdebug("%s: rule %s server %s fails %u", __func__, 604 srv->shc_hc_rule->hcr_rule->irl_name, srv->shc_sg_srv->sgs_srvID, 605 srv->shc_fail_cnt); 606 607 /* 608 * If this is a ping test, mark the server as 609 * unreachable instead of dead. 610 */ 611 if (srv->shc_hc->ihc_test_type == ILBD_HC_PING || 612 srv->shc_state == ilbd_hc_def_pinging) { 613 srv->shc_status = ILB_HCS_UNREACH; 614 } else { 615 srv->shc_status = ILB_HCS_DEAD; 616 } 617 618 /* Disable the server in kernel. */ 619 if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr, 620 srv->shc_hc_rule->hcr_rule->irl_name, stat_declare_srv_dead) != 621 ILB_STATUS_OK) { 622 logerr("%s: cannot disable server in kernel: rule %s " 623 "server %s", __func__, 624 srv->shc_hc_rule->hcr_rule->irl_name, 625 srv->shc_sg_srv->sgs_srvID); 626 } 627 628 /* Still keep probing in case the server is alive again. */ 629 if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) { 630 /* Only thing to do is to disable the server... */ 631 logerr("%s: cannot restart timer: rule %s server %s", __func__, 632 srv->shc_hc_rule->hcr_rule->irl_name, 633 srv->shc_sg_srv->sgs_srvID); 634 srv->shc_status = ILB_HCS_DISABLED; 635 } 636 } 637 638 /* 639 * A probe process has not returned for the ihc_timeout period, we should 640 * kill it. This function is the handler of this. 641 */ 642 /* ARGSUSED */ 643 static void 644 ilbd_hc_kill_timer(iu_tq_t *tq, void *arg) 645 { 646 ilbd_hc_srv_t *srv = (ilbd_hc_srv_t *)arg; 647 648 ilbd_hc_kill_probe(srv); 649 ilbd_set_fail_state(srv); 650 } 651 652 /* 653 * Probe timeout handler. Send out the appropriate probe. 654 */ 655 /* ARGSUSED */ 656 static void 657 ilbd_hc_probe_timer(iu_tq_t *tq, void *arg) 658 { 659 ilbd_hc_srv_t *srv = (ilbd_hc_srv_t *)arg; 660 661 /* 662 * If starting the probe fails, just pretend that the timeout has 663 * extended. 664 */ 665 if (!ilbd_run_probe(srv)) { 666 /* 667 * If we cannot restart the timer, the only thing we can do 668 * is to disable this server. Hopefully the sys admin will 669 * notice this and enable this server again later. 670 */ 671 if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) { 672 logerr("%s: cannot restart timer: rule %s server %s, " 673 "disabling it", __func__, 674 srv->shc_hc_rule->hcr_rule->irl_name, 675 srv->shc_sg_srv->sgs_srvID); 676 ilbd_mark_server_disabled(srv); 677 } 678 return; 679 } 680 681 /* 682 * Similar to above, if kill timer cannot be started, disable the 683 * server. 684 */ 685 if ((srv->shc_tid = iu_schedule_timer(ilbd_hc_timer_q, 686 srv->shc_hc->ihc_timeout, ilbd_hc_kill_timer, srv)) == -1) { 687 logerr("%s: cannot start kill timer: rule %s server %s, " 688 "disabling it", __func__, 689 srv->shc_hc_rule->hcr_rule->irl_name, 690 srv->shc_sg_srv->sgs_srvID); 691 ilbd_mark_server_disabled(srv); 692 } 693 hc_timer_restarted = B_TRUE; 694 } 695 696 /* Restart the periodic timer for a given server. */ 697 static ilb_status_t 698 ilbd_hc_restart_timer(ilbd_hc_t *hc, ilbd_hc_srv_t *srv) 699 { 700 int timeout; 701 702 /* Don't allow the timeout interval to be less than 1s */ 703 timeout = MAX((hc->ihc_interval >> 1) + (gethrtime() % 704 (hc->ihc_interval + 1)), 1); 705 706 /* 707 * If the probe is actually a ping probe, there is no need to 708 * do default pinging. Just skip the step. 709 */ 710 if (hc->ihc_def_ping && hc->ihc_test_type != ILBD_HC_PING) 711 srv->shc_state = ilbd_hc_def_pinging; 712 else 713 srv->shc_state = ilbd_hc_probing; 714 srv->shc_tid = iu_schedule_timer(ilbd_hc_timer_q, timeout, 715 ilbd_hc_probe_timer, srv); 716 717 if (srv->shc_tid == -1) 718 return (ILB_STATUS_TIMER); 719 srv->shc_lasttime = time(NULL); 720 srv->shc_nexttime = time(NULL) + timeout; 721 722 hc_timer_restarted = B_TRUE; 723 return (ILB_STATUS_OK); 724 } 725 726 /* Helper routine to associate a server with its hc object. */ 727 static ilb_status_t 728 ilbd_hc_srv_add(ilbd_hc_t *hc, ilbd_hc_rule_t *hc_rule, 729 const ilb_sg_srv_t *srv, int ev_port) 730 { 731 ilbd_hc_srv_t *new_srv; 732 ilb_status_t ret; 733 734 if ((new_srv = calloc(1, sizeof (ilbd_hc_srv_t))) == NULL) 735 return (ILB_STATUS_ENOMEM); 736 new_srv->shc_hc = hc; 737 new_srv->shc_hc_rule = hc_rule; 738 new_srv->shc_sg_srv = srv; 739 new_srv->shc_ev_port = ev_port; 740 new_srv->shc_tid = -1; 741 new_srv->shc_nexttime = time(NULL); 742 new_srv->shc_lasttime = new_srv->shc_nexttime; 743 744 if ((hc_rule->hcr_rule->irl_flags & ILB_FLAGS_RULE_ENABLED) && 745 ILB_IS_SRV_ENABLED(srv->sgs_flags)) { 746 new_srv->shc_status = ILB_HCS_UNINIT; 747 ret = ilbd_hc_restart_timer(hc, new_srv); 748 if (ret != ILB_STATUS_OK) { 749 free(new_srv); 750 return (ret); 751 } 752 } else { 753 new_srv->shc_status = ILB_HCS_DISABLED; 754 } 755 756 list_insert_tail(&hc_rule->hcr_servers, new_srv); 757 return (ILB_STATUS_OK); 758 } 759 760 /* Handy macro to cancel a server's timer. */ 761 #define HC_CANCEL_TIMER(srv) \ 762 { \ 763 void *arg; \ 764 int ret; \ 765 if ((srv)->shc_tid != -1) { \ 766 ret = iu_cancel_timer(ilbd_hc_timer_q, (srv)->shc_tid, &arg); \ 767 (srv)->shc_tid = -1; \ 768 assert(ret == 1); \ 769 assert(arg == (srv)); \ 770 } \ 771 hc_timer_restarted = B_TRUE; \ 772 } 773 774 /* Helper routine to dissociate a server from its hc object. */ 775 static ilb_status_t 776 ilbd_hc_srv_rem(ilbd_hc_rule_t *hc_rule, const ilb_sg_srv_t *srv) 777 { 778 ilbd_hc_srv_t *tmp_srv; 779 780 for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL; 781 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) { 782 if (tmp_srv->shc_sg_srv == srv) { 783 list_remove(&hc_rule->hcr_servers, tmp_srv); 784 HC_CANCEL_TIMER(tmp_srv); 785 if (tmp_srv->shc_child_pid != 0) 786 ilbd_hc_kill_probe(tmp_srv); 787 free(tmp_srv); 788 return (ILB_STATUS_OK); 789 } 790 } 791 return (ILB_STATUS_ENOENT); 792 } 793 794 /* Helper routine to dissociate all servers of a rule from its hc object. */ 795 static void 796 ilbd_hc_srv_rem_all(ilbd_hc_rule_t *hc_rule) 797 { 798 ilbd_hc_srv_t *srv; 799 800 while ((srv = list_remove_head(&hc_rule->hcr_servers)) != NULL) { 801 HC_CANCEL_TIMER(srv); 802 if (srv->shc_child_pid != 0) 803 ilbd_hc_kill_probe(srv); 804 free(srv); 805 } 806 } 807 808 /* Associate a rule with its hc object. */ 809 ilb_status_t 810 ilbd_hc_associate_rule(const ilbd_rule_t *rule, int ev_port) 811 { 812 ilbd_hc_t *hc; 813 ilbd_hc_rule_t *hc_rule; 814 ilb_status_t ret; 815 ilbd_sg_t *sg; 816 ilbd_srv_t *ilbd_srv; 817 818 /* The rule is assumed to be initialized appropriately. */ 819 if ((hc = ilbd_get_hc(rule->irl_hcname)) == NULL) { 820 logdebug("ilbd_hc_associate_rule: healthcheck %s does not " 821 "exist", rule->irl_hcname); 822 return (ILB_STATUS_ENOHCINFO); 823 } 824 if ((hc->ihc_test_type == ILBD_HC_TCP && 825 rule->irl_proto != IPPROTO_TCP) || 826 (hc->ihc_test_type == ILBD_HC_UDP && 827 rule->irl_proto != IPPROTO_UDP)) { 828 return (ILB_STATUS_RULE_HC_MISMATCH); 829 } 830 if ((hc_rule = calloc(1, sizeof (ilbd_hc_rule_t))) == NULL) { 831 logdebug("ilbd_hc_associate_rule: out of memory"); 832 return (ILB_STATUS_ENOMEM); 833 } 834 835 hc_rule->hcr_rule = rule; 836 list_create(&hc_rule->hcr_servers, sizeof (ilbd_hc_srv_t), 837 offsetof(ilbd_hc_srv_t, shc_srv_link)); 838 839 /* Add all the servers. */ 840 sg = rule->irl_sg; 841 for (ilbd_srv = list_head(&sg->isg_srvlist); ilbd_srv != NULL; 842 ilbd_srv = list_next(&sg->isg_srvlist, ilbd_srv)) { 843 if ((ret = ilbd_hc_srv_add(hc, hc_rule, &ilbd_srv->isv_srv, 844 ev_port)) != ILB_STATUS_OK) { 845 /* Remove all previously added servers */ 846 ilbd_hc_srv_rem_all(hc_rule); 847 free(hc_rule); 848 return (ret); 849 } 850 } 851 list_insert_tail(&hc->ihc_rules, hc_rule); 852 hc->ihc_rule_cnt++; 853 854 return (ILB_STATUS_OK); 855 } 856 857 /* Dissociate a rule from its hc object. */ 858 ilb_status_t 859 ilbd_hc_dissociate_rule(const ilbd_rule_t *rule) 860 { 861 ilbd_hc_t *hc; 862 ilbd_hc_rule_t *hc_rule; 863 864 /* The rule is assumed to be initialized appropriately. */ 865 if ((hc = ilbd_get_hc(rule->irl_hcname)) == NULL) { 866 logdebug("ilbd_hc_dissociate_rule: healthcheck %s does not " 867 "exist", rule->irl_hcname); 868 return (ILB_STATUS_ENOENT); 869 } 870 for (hc_rule = list_head(&hc->ihc_rules); hc_rule != NULL; 871 hc_rule = list_next(&hc->ihc_rules, hc_rule)) { 872 if (hc_rule->hcr_rule == rule) 873 break; 874 } 875 if (hc_rule == NULL) { 876 logdebug("ilbd_hc_dissociate_rule: rule %s is not associated " 877 "with healtcheck %s", rule->irl_hcname, hc->ihc_name); 878 return (ILB_STATUS_ENOENT); 879 } 880 ilbd_hc_srv_rem_all(hc_rule); 881 list_remove(&hc->ihc_rules, hc_rule); 882 hc->ihc_rule_cnt--; 883 return (ILB_STATUS_OK); 884 } 885 886 /* 887 * Given a hc object name and a rule, check to see if the rule is associated 888 * with the hc object. If it is, the hc object is returned in **hc and the 889 * ilbd_hc_rule_t is returned in **hc_rule. 890 */ 891 static boolean_t 892 ilbd_hc_check_rule(const char *hc_name, const ilbd_rule_t *rule, 893 ilbd_hc_t **hc, ilbd_hc_rule_t **hc_rule) 894 { 895 ilbd_hc_t *tmp_hc; 896 ilbd_hc_rule_t *tmp_hc_rule; 897 898 if ((tmp_hc = ilbd_get_hc(hc_name)) == NULL) 899 return (B_FALSE); 900 for (tmp_hc_rule = list_head(&tmp_hc->ihc_rules); tmp_hc_rule != NULL; 901 tmp_hc_rule = list_next(&tmp_hc->ihc_rules, tmp_hc_rule)) { 902 if (tmp_hc_rule->hcr_rule == rule) { 903 *hc = tmp_hc; 904 *hc_rule = tmp_hc_rule; 905 return (B_TRUE); 906 } 907 } 908 return (B_FALSE); 909 } 910 911 /* Associate a server with its hc object. */ 912 ilb_status_t 913 ilbd_hc_add_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv, 914 int ev_port) 915 { 916 ilbd_hc_t *hc; 917 ilbd_hc_rule_t *hc_rule; 918 919 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule)) 920 return (ILB_STATUS_ENOENT); 921 return (ilbd_hc_srv_add(hc, hc_rule, srv, ev_port)); 922 } 923 924 /* Dissociate a server from its hc object. */ 925 ilb_status_t 926 ilbd_hc_del_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv) 927 { 928 ilbd_hc_t *hc; 929 ilbd_hc_rule_t *hc_rule; 930 931 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule)) 932 return (ILB_STATUS_ENOENT); 933 return (ilbd_hc_srv_rem(hc_rule, srv)); 934 } 935 936 /* Helper routine to enable/disable a server's hc probe. */ 937 static ilb_status_t 938 ilbd_hc_toggle_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv, 939 boolean_t enable) 940 { 941 ilbd_hc_t *hc; 942 ilbd_hc_rule_t *hc_rule; 943 ilbd_hc_srv_t *tmp_srv; 944 ilb_status_t ret; 945 946 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule)) 947 return (ILB_STATUS_ENOENT); 948 for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL; 949 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) { 950 if (tmp_srv->shc_sg_srv != srv) { 951 continue; 952 } 953 if (enable) { 954 if (tmp_srv->shc_status == ILB_HCS_DISABLED) { 955 ret = ilbd_hc_restart_timer(hc, tmp_srv); 956 if (ret != ILB_STATUS_OK) { 957 logerr("%s: cannot start timers for " 958 "rule %s server %s", __func__, 959 rule->irl_name, 960 tmp_srv->shc_sg_srv->sgs_srvID); 961 return (ret); 962 } 963 /* Start from fresh... */ 964 tmp_srv->shc_status = ILB_HCS_UNINIT; 965 tmp_srv->shc_rtt = 0; 966 tmp_srv->shc_fail_cnt = 0; 967 } 968 } else { 969 if (tmp_srv->shc_status != ILB_HCS_DISABLED) { 970 tmp_srv->shc_status = ILB_HCS_DISABLED; 971 HC_CANCEL_TIMER(tmp_srv); 972 if (tmp_srv->shc_child_pid != 0) 973 ilbd_hc_kill_probe(tmp_srv); 974 } 975 } 976 return (ILB_STATUS_OK); 977 } 978 return (ILB_STATUS_ENOENT); 979 } 980 981 ilb_status_t 982 ilbd_hc_enable_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv) 983 { 984 return (ilbd_hc_toggle_server(rule, srv, B_TRUE)); 985 } 986 987 ilb_status_t 988 ilbd_hc_disable_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv) 989 { 990 return (ilbd_hc_toggle_server(rule, srv, B_FALSE)); 991 } 992 993 /* 994 * Helper routine to enable/disable a rule's hc probe (including all its 995 * servers). 996 */ 997 static ilb_status_t 998 ilbd_hc_toggle_rule(const ilbd_rule_t *rule, boolean_t enable) 999 { 1000 ilbd_hc_t *hc; 1001 ilbd_hc_rule_t *hc_rule; 1002 ilbd_hc_srv_t *tmp_srv; 1003 int ret; 1004 1005 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule)) 1006 return (ILB_STATUS_ENOENT); 1007 1008 for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL; 1009 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) { 1010 if (enable) { 1011 /* 1012 * If the server is disabled in the rule, do not 1013 * restart its timer. 1014 */ 1015 if (tmp_srv->shc_status == ILB_HCS_DISABLED && 1016 ILB_IS_SRV_ENABLED( 1017 tmp_srv->shc_sg_srv->sgs_flags)) { 1018 ret = ilbd_hc_restart_timer(hc, tmp_srv); 1019 if (ret != ILB_STATUS_OK) { 1020 logerr("%s: cannot start timers for " 1021 "rule %s server %s", __func__, 1022 rule->irl_name, 1023 tmp_srv->shc_sg_srv->sgs_srvID); 1024 goto rollback; 1025 } else { 1026 /* Start from fresh... */ 1027 tmp_srv->shc_status = ILB_HCS_UNINIT; 1028 tmp_srv->shc_rtt = 0; 1029 tmp_srv->shc_fail_cnt = 0; 1030 } 1031 } 1032 } else { 1033 if (tmp_srv->shc_status != ILB_HCS_DISABLED) { 1034 HC_CANCEL_TIMER(tmp_srv); 1035 tmp_srv->shc_status = ILB_HCS_DISABLED; 1036 if (tmp_srv->shc_child_pid != 0) 1037 ilbd_hc_kill_probe(tmp_srv); 1038 } 1039 } 1040 } 1041 return (ILB_STATUS_OK); 1042 rollback: 1043 enable = !enable; 1044 for (tmp_srv = list_prev(&hc_rule->hcr_servers, tmp_srv); 1045 tmp_srv != NULL; 1046 tmp_srv = list_prev(&hc_rule->hcr_servers, tmp_srv)) { 1047 if (enable) { 1048 if (tmp_srv->shc_status == ILB_HCS_DISABLED && 1049 ILB_IS_SRV_ENABLED( 1050 tmp_srv->shc_sg_srv->sgs_flags)) { 1051 (void) ilbd_hc_restart_timer(hc, tmp_srv); 1052 tmp_srv->shc_status = ILB_HCS_UNINIT; 1053 tmp_srv->shc_rtt = 0; 1054 tmp_srv->shc_fail_cnt = 0; 1055 } 1056 } else { 1057 if (tmp_srv->shc_status != ILB_HCS_DISABLED) { 1058 HC_CANCEL_TIMER(tmp_srv); 1059 tmp_srv->shc_status = ILB_HCS_DISABLED; 1060 if (tmp_srv->shc_child_pid != 0) 1061 ilbd_hc_kill_probe(tmp_srv); 1062 } 1063 } 1064 } 1065 return (ret); 1066 } 1067 1068 ilb_status_t 1069 ilbd_hc_enable_rule(const ilbd_rule_t *rule) 1070 { 1071 return (ilbd_hc_toggle_rule(rule, B_TRUE)); 1072 } 1073 1074 ilb_status_t 1075 ilbd_hc_disable_rule(const ilbd_rule_t *rule) 1076 { 1077 return (ilbd_hc_toggle_rule(rule, B_FALSE)); 1078 } 1079 1080 static const char * 1081 topo_2_str(ilb_topo_t topo) 1082 { 1083 switch (topo) { 1084 case ILB_TOPO_DSR: 1085 return ("DSR"); 1086 break; 1087 case ILB_TOPO_NAT: 1088 return ("NAT"); 1089 break; 1090 case ILB_TOPO_HALF_NAT: 1091 return ("HALF_NAT"); 1092 break; 1093 default: 1094 /* Should not happen. */ 1095 logerr("%s: unknown topology", __func__); 1096 break; 1097 } 1098 return (""); 1099 } 1100 1101 /* 1102 * Create the argument list to be passed to a hc probe command. 1103 * The passed in argv is assumed to have HC_PROBE_ARGC elements. 1104 */ 1105 static boolean_t 1106 create_argv(ilbd_hc_srv_t *srv, char *argv[]) 1107 { 1108 char buf[INET6_ADDRSTRLEN]; 1109 ilbd_rule_t const *rule; 1110 ilb_sg_srv_t const *sg_srv; 1111 struct in_addr v4_addr; 1112 in_port_t port; 1113 int i; 1114 1115 rule = srv->shc_hc_rule->hcr_rule; 1116 sg_srv = srv->shc_sg_srv; 1117 1118 if (srv->shc_state == ilbd_hc_def_pinging) { 1119 if ((argv[0] = strdup(ILB_PROBE_PING)) == NULL) 1120 return (B_FALSE); 1121 } else { 1122 switch (srv->shc_hc->ihc_test_type) { 1123 case ILBD_HC_USER: 1124 if ((argv[0] = strdup(srv->shc_hc->ihc_test)) == NULL) 1125 return (B_FALSE); 1126 break; 1127 case ILBD_HC_TCP: 1128 case ILBD_HC_UDP: 1129 if ((argv[0] = strdup(ILB_PROBE_PROTO)) == 1130 NULL) { 1131 return (B_FALSE); 1132 } 1133 break; 1134 case ILBD_HC_PING: 1135 if ((argv[0] = strdup(ILB_PROBE_PING)) == NULL) { 1136 return (B_FALSE); 1137 } 1138 break; 1139 } 1140 } 1141 1142 /* 1143 * argv[1] is the VIP. 1144 * 1145 * Right now, the VIP and the backend server addresses should be 1146 * in the same IP address family. Here we don't do that in case 1147 * this assumption is changed in future. 1148 */ 1149 if (IN6_IS_ADDR_V4MAPPED(&rule->irl_vip)) { 1150 IN6_V4MAPPED_TO_INADDR(&rule->irl_vip, &v4_addr); 1151 if (inet_ntop(AF_INET, &v4_addr, buf, sizeof (buf)) == NULL) 1152 goto cleanup; 1153 } else { 1154 if (inet_ntop(AF_INET6, &rule->irl_vip, buf, 1155 sizeof (buf)) == NULL) { 1156 goto cleanup; 1157 } 1158 } 1159 if ((argv[1] = strdup(buf)) == NULL) 1160 goto cleanup; 1161 1162 /* 1163 * argv[2] is the backend server address. 1164 */ 1165 if (IN6_IS_ADDR_V4MAPPED(&sg_srv->sgs_addr)) { 1166 IN6_V4MAPPED_TO_INADDR(&sg_srv->sgs_addr, &v4_addr); 1167 if (inet_ntop(AF_INET, &v4_addr, buf, sizeof (buf)) == NULL) 1168 goto cleanup; 1169 } else { 1170 if (inet_ntop(AF_INET6, &sg_srv->sgs_addr, buf, 1171 sizeof (buf)) == NULL) { 1172 goto cleanup; 1173 } 1174 } 1175 if ((argv[2] = strdup(buf)) == NULL) 1176 goto cleanup; 1177 1178 /* 1179 * argv[3] is the transport protocol used in the rule. 1180 */ 1181 switch (rule->irl_proto) { 1182 case IPPROTO_TCP: 1183 argv[3] = strdup("TCP"); 1184 break; 1185 case IPPROTO_UDP: 1186 argv[3] = strdup("UDP"); 1187 break; 1188 default: 1189 logerr("%s: unknown protocol", __func__); 1190 goto cleanup; 1191 break; 1192 } 1193 if (argv[3] == NULL) 1194 goto cleanup; 1195 1196 /* 1197 * argv[4] is the load balance mode, DSR, NAT, HALF-NAT. 1198 */ 1199 if ((argv[4] = strdup(topo_2_str(rule->irl_topo))) == NULL) 1200 goto cleanup; 1201 1202 /* 1203 * argv[5] is the port range. Right now, there should only be 1 port. 1204 */ 1205 switch (rule->irl_hcpflag) { 1206 case ILB_HCI_PROBE_FIX: 1207 port = ntohs(rule->irl_hcport); 1208 break; 1209 case ILB_HCI_PROBE_ANY: { 1210 in_port_t min, max; 1211 1212 if (ntohs(sg_srv->sgs_minport) == 0) { 1213 min = ntohs(rule->irl_minport); 1214 max = ntohs(rule->irl_maxport); 1215 } else { 1216 min = ntohs(sg_srv->sgs_minport); 1217 max = ntohs(sg_srv->sgs_maxport); 1218 } 1219 if (max > min) 1220 port = min + gethrtime() % (max - min + 1); 1221 else 1222 port = min; 1223 break; 1224 } 1225 default: 1226 logerr("%s: unknown HC flag", __func__); 1227 goto cleanup; 1228 break; 1229 } 1230 (void) sprintf(buf, "%d", port); 1231 if ((argv[5] = strdup(buf)) == NULL) 1232 goto cleanup; 1233 1234 /* 1235 * argv[6] is the probe timeout. 1236 */ 1237 (void) sprintf(buf, "%d", srv->shc_hc->ihc_timeout); 1238 if ((argv[6] = strdup(buf)) == NULL) 1239 goto cleanup; 1240 1241 argv[7] = NULL; 1242 return (B_TRUE); 1243 1244 cleanup: 1245 for (i = 0; i < HC_PROBE_ARGC; i++) { 1246 if (argv[i] != NULL) 1247 free(argv[i]); 1248 } 1249 return (B_FALSE); 1250 } 1251 1252 static void 1253 destroy_argv(char *argv[]) 1254 { 1255 int i; 1256 1257 for (i = 0; argv[i] != NULL; i++) 1258 free(argv[i]); 1259 } 1260 1261 /* Spawn a process to run the hc probe on the given server. */ 1262 static boolean_t 1263 ilbd_run_probe(ilbd_hc_srv_t *srv) 1264 { 1265 posix_spawn_file_actions_t fd_actions; 1266 posix_spawnattr_t attr; 1267 sigset_t child_sigset; 1268 int fds[2]; 1269 int fdflags; 1270 pid_t pid; 1271 char *child_argv[HC_PROBE_ARGC]; 1272 ilbd_hc_probe_event_t *probe_ev; 1273 char *probe_name; 1274 1275 bzero(child_argv, HC_PROBE_ARGC * sizeof (char *)); 1276 if ((probe_ev = calloc(1, sizeof (*probe_ev))) == NULL) { 1277 logdebug("ilbd_run_probe: calloc"); 1278 return (B_FALSE); 1279 } 1280 1281 /* Set up a pipe to get output from probe command. */ 1282 if (pipe(fds) < 0) { 1283 logdebug("ilbd_run_probe: cannot create pipe"); 1284 free(probe_ev); 1285 return (B_FALSE); 1286 } 1287 /* Set our side of the pipe to be non-blocking */ 1288 if ((fdflags = fcntl(fds[0], F_GETFL, 0)) == -1) { 1289 logdebug("ilbd_run_probe: fcntl(F_GETFL)"); 1290 goto cleanup; 1291 } 1292 if (fcntl(fds[0], F_SETFL, fdflags | O_NONBLOCK) == -1) { 1293 logdebug("ilbd_run_probe: fcntl(F_SETFL)"); 1294 goto cleanup; 1295 } 1296 1297 if (posix_spawn_file_actions_init(&fd_actions) != 0) { 1298 logdebug("ilbd_run_probe: posix_spawn_file_actions_init"); 1299 goto cleanup; 1300 } 1301 if (posix_spawnattr_init(&attr) != 0) { 1302 logdebug("ilbd_run_probe: posix_spawnattr_init"); 1303 goto cleanup; 1304 } 1305 if (posix_spawn_file_actions_addclose(&fd_actions, fds[0]) != 0) { 1306 logdebug("ilbd_run_probe: posix_spawn_file_actions_addclose"); 1307 goto cleanup; 1308 } 1309 if (posix_spawn_file_actions_adddup2(&fd_actions, fds[1], 1310 STDOUT_FILENO) != 0) { 1311 logdebug("ilbd_run_probe: posix_spawn_file_actions_dup2"); 1312 goto cleanup; 1313 } 1314 if (posix_spawn_file_actions_addclose(&fd_actions, fds[1]) != 0) { 1315 logdebug("ilbd_run_probe: posix_spawn_file_actions_addclose"); 1316 goto cleanup; 1317 } 1318 1319 /* Reset all signal handling of the child to default. */ 1320 (void) sigfillset(&child_sigset); 1321 if (posix_spawnattr_setsigdefault(&attr, &child_sigset) != 0) { 1322 logdebug("ilbd_run_probe: posix_spawnattr_setsigdefault"); 1323 goto cleanup; 1324 } 1325 /* Don't want SIGCHLD. */ 1326 if (posix_spawnattr_setflags(&attr, POSIX_SPAWN_NOSIGCHLD_NP| 1327 POSIX_SPAWN_SETSIGDEF) != 0) { 1328 logdebug("ilbd_run_probe: posix_spawnattr_setflags"); 1329 goto cleanup; 1330 } 1331 1332 if (!create_argv(srv, child_argv)) { 1333 logdebug("ilbd_run_probe: create_argv"); 1334 goto cleanup; 1335 } 1336 1337 /* 1338 * If we are doing default pinging or not using a user supplied 1339 * probe, we should execute our standard supplied probe. The 1340 * supplied probe command handles all types of probes. And the 1341 * type used depends on argv[0], as filled in by create_argv(). 1342 */ 1343 if (srv->shc_state == ilbd_hc_def_pinging || 1344 srv->shc_hc->ihc_test_type != ILBD_HC_USER) { 1345 probe_name = ILB_PROBE_PROTO; 1346 } else { 1347 probe_name = srv->shc_hc->ihc_test; 1348 } 1349 if (posix_spawn(&pid, probe_name, &fd_actions, &attr, child_argv, 1350 NULL) != 0) { 1351 logerr("%s: posix_spawn: %s for server %s: %s", __func__, 1352 srv->shc_hc->ihc_test, srv->shc_sg_srv->sgs_srvID, 1353 strerror(errno)); 1354 goto cleanup; 1355 } 1356 1357 (void) close(fds[1]); 1358 destroy_argv(child_argv); 1359 srv->shc_child_pid = pid; 1360 srv->shc_child_fd = fds[0]; 1361 srv->shc_ev = probe_ev; 1362 1363 probe_ev->ihp_ev = ILBD_EVENT_PROBE; 1364 probe_ev->ihp_srv = srv; 1365 probe_ev->ihp_pid = pid; 1366 if (port_associate(srv->shc_ev_port, PORT_SOURCE_FD, fds[0], 1367 POLLRDNORM, probe_ev) != 0) { 1368 /* 1369 * Need to kill the child. It will free the srv->shc_ev, 1370 * which is probe_ev. So set probe_ev to NULL. 1371 */ 1372 ilbd_hc_kill_probe(srv); 1373 probe_ev = NULL; 1374 goto cleanup; 1375 } 1376 1377 return (B_TRUE); 1378 1379 cleanup: 1380 (void) close(fds[0]); 1381 (void) close(fds[1]); 1382 destroy_argv(child_argv); 1383 if (probe_ev != NULL) 1384 free(probe_ev); 1385 return (B_FALSE); 1386 } 1387 1388 /* 1389 * Called by ild_hc_probe_return() to re-associate the fd to a child to 1390 * the event port. 1391 */ 1392 static void 1393 reassociate_port(int ev_port, int fd, ilbd_hc_probe_event_t *ev) 1394 { 1395 if (port_associate(ev_port, PORT_SOURCE_FD, fd, 1396 POLLRDNORM, ev) != 0) { 1397 /* 1398 * If we cannot reassociate with the port, the only 1399 * thing we can do now is to kill the child and 1400 * do a blocking wait here... 1401 */ 1402 logdebug("%s: port_associate: %s", __func__, strerror(errno)); 1403 if (kill(ev->ihp_pid, SIGKILL) != 0) 1404 logerr("%s: kill: %s", __func__, strerror(errno)); 1405 if (waitpid(ev->ihp_pid, NULL, 0) != ev->ihp_pid) 1406 logdebug("%s: waitpid: %s", __func__, strerror(errno)); 1407 free(ev); 1408 } 1409 } 1410 1411 /* 1412 * To handle a child probe process hanging up. 1413 */ 1414 static void 1415 ilbd_hc_child_hup(int ev_port, int fd, ilbd_hc_probe_event_t *ev) 1416 { 1417 ilbd_hc_srv_t *srv; 1418 pid_t ret_pid; 1419 int ret; 1420 1421 srv = ev->ihp_srv; 1422 1423 if (!ev->ihp_done) { 1424 /* ilbd does not care about this process anymore ... */ 1425 ev->ihp_done = B_TRUE; 1426 srv->shc_ev = NULL; 1427 srv->shc_child_pid = 0; 1428 HC_CANCEL_TIMER(srv); 1429 ilbd_set_fail_state(srv); 1430 } 1431 ret_pid = waitpid(ev->ihp_pid, &ret, WNOHANG); 1432 switch (ret_pid) { 1433 case -1: 1434 logperror("ilbd_hc_child_hup: waitpid"); 1435 /* FALLTHROUGH */ 1436 case 0: 1437 /* The child has not completed the exit. Wait again. */ 1438 reassociate_port(ev_port, fd, ev); 1439 break; 1440 default: 1441 /* Right now, we just ignore the exit status. */ 1442 if (WIFEXITED(ret)) 1443 ret = WEXITSTATUS(ret); 1444 (void) close(fd); 1445 free(ev); 1446 } 1447 } 1448 1449 /* 1450 * To read the output of a child probe process. 1451 */ 1452 static void 1453 ilbd_hc_child_data(int fd, ilbd_hc_probe_event_t *ev) 1454 { 1455 ilbd_hc_srv_t *srv; 1456 char buf[HC_MAX_PROBE_OUTPUT]; 1457 int ret; 1458 int64_t rtt; 1459 1460 srv = ev->ihp_srv; 1461 1462 bzero(buf, HC_MAX_PROBE_OUTPUT); 1463 ret = read(fd, buf, HC_MAX_PROBE_OUTPUT - 1); 1464 /* Should not happen since event port should have caught this. */ 1465 assert(ret > 0); 1466 1467 /* 1468 * We expect the probe command to print out the RTT only. But 1469 * the command may misbehave and print out more than what we intend to 1470 * read in. So need to do this check below to "flush" out all the 1471 * output from the command. 1472 */ 1473 if (!ev->ihp_done) { 1474 ev->ihp_done = B_TRUE; 1475 /* We don't need to know about this event anymore. */ 1476 srv->shc_ev = NULL; 1477 srv->shc_child_pid = 0; 1478 HC_CANCEL_TIMER(srv); 1479 } else { 1480 return; 1481 } 1482 1483 rtt = strtoll(buf, NULL, 10); 1484 1485 /* 1486 * -1 means the server is dead or the probe somehow fails. Treat 1487 * them both as server is dead. 1488 */ 1489 if (rtt == -1) { 1490 ilbd_set_fail_state(srv); 1491 return; 1492 } else if (rtt > 0) { 1493 /* If the returned RTT value is not valid, just ignore it. */ 1494 if (rtt > 0 && rtt <= UINT_MAX) { 1495 /* Set rtt to be the simple smoothed average. */ 1496 if (srv->shc_rtt == 0) { 1497 srv->shc_rtt = rtt; 1498 } else { 1499 srv->shc_rtt = 3 * ((srv)->shc_rtt >> 2) + 1500 (rtt >> 2); 1501 } 1502 } 1503 1504 } 1505 1506 switch (srv->shc_state) { 1507 case ilbd_hc_def_pinging: 1508 srv->shc_state = ilbd_hc_probing; 1509 1510 /* Ping is OK, now start the probe. */ 1511 ilbd_hc_probe_timer(ilbd_hc_timer_q, srv); 1512 break; 1513 case ilbd_hc_probing: 1514 srv->shc_fail_cnt = 0; 1515 1516 /* Server is dead before, re-enable it. */ 1517 if (srv->shc_status == ILB_HCS_UNREACH || 1518 srv->shc_status == ILB_HCS_DEAD) { 1519 /* 1520 * If enabling the server in kernel fails now, 1521 * hopefully when the timer fires again later, the 1522 * enabling can be done. 1523 */ 1524 if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr, 1525 srv->shc_hc_rule->hcr_rule->irl_name, 1526 stat_declare_srv_alive) != ILB_STATUS_OK) { 1527 logerr("%s: cannot enable server in kernel: " 1528 " rule %s server %s", __func__, 1529 srv->shc_hc_rule->hcr_rule->irl_name, 1530 srv->shc_sg_srv->sgs_srvID); 1531 } else { 1532 srv->shc_status = ILB_HCS_ALIVE; 1533 } 1534 } else { 1535 srv->shc_status = ILB_HCS_ALIVE; 1536 } 1537 if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) { 1538 logerr("%s: cannot restart timer: rule %s server %s", 1539 __func__, srv->shc_hc_rule->hcr_rule->irl_name, 1540 srv->shc_sg_srv->sgs_srvID); 1541 ilbd_mark_server_disabled(srv); 1542 } 1543 break; 1544 default: 1545 logdebug("%s: unknown state", __func__); 1546 break; 1547 } 1548 } 1549 1550 /* 1551 * Handle the return event of a child probe fd. 1552 */ 1553 void 1554 ilbd_hc_probe_return(int ev_port, int fd, int port_events, 1555 ilbd_hc_probe_event_t *ev) 1556 { 1557 /* 1558 * Note that there can be more than one events delivered to us at 1559 * the same time. So we need to check them individually. 1560 */ 1561 if (port_events & POLLRDNORM) 1562 ilbd_hc_child_data(fd, ev); 1563 1564 if (port_events & (POLLHUP|POLLERR)) { 1565 ilbd_hc_child_hup(ev_port, fd, ev); 1566 return; 1567 } 1568 1569 /* 1570 * Re-associate the fd with the port so that when the child 1571 * exits, we can reap the status. 1572 */ 1573 reassociate_port(ev_port, fd, ev); 1574 } 1575