1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright 2012 Milan Jurik. All rights reserved. 26 */ 27 28 #include <sys/types.h> 29 #include <sys/socket.h> 30 #include <sys/list.h> 31 #include <sys/stropts.h> 32 #include <sys/siginfo.h> 33 #include <sys/wait.h> 34 #include <arpa/inet.h> 35 #include <netinet/in.h> 36 #include <stdlib.h> 37 #include <stdio.h> 38 #include <strings.h> 39 #include <stddef.h> 40 #include <unistd.h> 41 #include <libilb.h> 42 #include <port.h> 43 #include <time.h> 44 #include <signal.h> 45 #include <assert.h> 46 #include <errno.h> 47 #include <spawn.h> 48 #include <fcntl.h> 49 #include <limits.h> 50 #include "libilb_impl.h" 51 #include "ilbd.h" 52 53 /* Global list of HC objects */ 54 list_t ilbd_hc_list; 55 56 /* Timer queue for all hc related timers. */ 57 static iu_tq_t *ilbd_hc_timer_q; 58 59 /* Indicate whether the timer needs to be updated */ 60 static boolean_t hc_timer_restarted; 61 62 static void ilbd_hc_probe_timer(iu_tq_t *, void *); 63 static ilb_status_t ilbd_hc_restart_timer(ilbd_hc_t *, ilbd_hc_srv_t *); 64 static boolean_t ilbd_run_probe(ilbd_hc_srv_t *); 65 66 #define MAX(a, b) ((a) > (b) ? (a) : (b)) 67 68 /* 69 * Number of arguments passed to a probe. argc[0] is the path name of 70 * the probe. 71 */ 72 #define HC_PROBE_ARGC 8 73 74 /* 75 * Max number of characters to be read from the output of a probe. It 76 * is long enough to read in a 64 bit integer. 77 */ 78 #define HC_MAX_PROBE_OUTPUT 24 79 80 void 81 i_ilbd_setup_hc_list(void) 82 { 83 list_create(&ilbd_hc_list, sizeof (ilbd_hc_t), 84 offsetof(ilbd_hc_t, ihc_link)); 85 } 86 87 /* 88 * Given a hc object name, return a pointer to hc object if found. 89 */ 90 ilbd_hc_t * 91 ilbd_get_hc(const char *name) 92 { 93 ilbd_hc_t *hc; 94 95 for (hc = list_head(&ilbd_hc_list); hc != NULL; 96 hc = list_next(&ilbd_hc_list, hc)) { 97 if (strcasecmp(hc->ihc_name, name) == 0) 98 return (hc); 99 } 100 return (NULL); 101 } 102 103 /* 104 * Generates an audit record for create-healthcheck, 105 * delete-healtcheck subcommands. 106 */ 107 static void 108 ilbd_audit_hc_event(const char *audit_hcname, 109 const ilb_hc_info_t *audit_hcinfo, ilbd_cmd_t cmd, 110 ilb_status_t rc, ucred_t *ucredp) 111 { 112 adt_session_data_t *ah; 113 adt_event_data_t *event; 114 au_event_t flag; 115 int audit_error; 116 117 if ((ucredp == NULL) && (cmd == ILBD_CREATE_HC)) { 118 /* 119 * we came here from the path where ilbd incorporates 120 * the configuration that is listed in SCF: 121 * i_ilbd_read_config->ilbd_walk_hc_pgs-> 122 * ->ilbd_scf_instance_walk_pg->ilbd_create_hc 123 * We skip auditing in that case 124 */ 125 logdebug("ilbd_audit_hc_event: skipping auditing"); 126 return; 127 } 128 129 if (adt_start_session(&ah, NULL, 0) != 0) { 130 logerr("ilbd_audit_hc_event: adt_start_session failed"); 131 exit(EXIT_FAILURE); 132 } 133 if (adt_set_from_ucred(ah, ucredp, ADT_NEW) != 0) { 134 (void) adt_end_session(ah); 135 logerr("ilbd_audit_rule_event: adt_set_from_ucred failed"); 136 exit(EXIT_FAILURE); 137 } 138 if (cmd == ILBD_CREATE_HC) 139 flag = ADT_ilb_create_healthcheck; 140 else if (cmd == ILBD_DESTROY_HC) 141 flag = ADT_ilb_delete_healthcheck; 142 143 if ((event = adt_alloc_event(ah, flag)) == NULL) { 144 logerr("ilbd_audit_hc_event: adt_alloc_event failed"); 145 exit(EXIT_FAILURE); 146 } 147 (void) memset((char *)event, 0, sizeof (adt_event_data_t)); 148 149 switch (cmd) { 150 case ILBD_CREATE_HC: 151 event->adt_ilb_create_healthcheck.auth_used = 152 NET_ILB_CONFIG_AUTH; 153 event->adt_ilb_create_healthcheck.hc_test = 154 (char *)audit_hcinfo->hci_test; 155 event->adt_ilb_create_healthcheck.hc_name = 156 (char *)audit_hcinfo->hci_name; 157 158 /* 159 * If the value 0 is stored, the default values are 160 * set in the kernel. User land does not know about them 161 * So if the user does not specify them, audit record 162 * will show them as 0 163 */ 164 event->adt_ilb_create_healthcheck.hc_timeout = 165 audit_hcinfo->hci_timeout; 166 event->adt_ilb_create_healthcheck.hc_count = 167 audit_hcinfo->hci_count; 168 event->adt_ilb_create_healthcheck.hc_interval = 169 audit_hcinfo->hci_interval; 170 break; 171 case ILBD_DESTROY_HC: 172 event->adt_ilb_delete_healthcheck.auth_used = 173 NET_ILB_CONFIG_AUTH; 174 event->adt_ilb_delete_healthcheck.hc_name = 175 (char *)audit_hcname; 176 break; 177 } 178 179 /* Fill in success/failure */ 180 if (rc == ILB_STATUS_OK) { 181 if (adt_put_event(event, ADT_SUCCESS, ADT_SUCCESS) != 0) { 182 logerr("ilbd_audit_hc_event: adt_put_event failed"); 183 exit(EXIT_FAILURE); 184 } 185 } else { 186 audit_error = ilberror2auditerror(rc); 187 if (adt_put_event(event, ADT_FAILURE, audit_error) != 0) { 188 logerr("ilbd_audit_hc_event: adt_put_event failed"); 189 exit(EXIT_FAILURE); 190 } 191 } 192 adt_free_event(event); 193 (void) adt_end_session(ah); 194 } 195 196 /* 197 * Given the ilb_hc_info_t passed in (from the libilb), create a hc object 198 * in ilbd. The parameter ev_port is not used, refer to comments of 199 * ilbd_create_sg() in ilbd_sg.c 200 */ 201 /* ARGSUSED */ 202 ilb_status_t 203 ilbd_create_hc(const ilb_hc_info_t *hc_info, int ev_port, 204 const struct passwd *ps, ucred_t *ucredp) 205 { 206 ilbd_hc_t *hc; 207 ilb_status_t ret = ILB_STATUS_OK; 208 209 /* 210 * ps == NULL is from the daemon when it starts and load configuration 211 * ps != NULL is from client. 212 */ 213 if (ps != NULL) { 214 ret = ilbd_check_client_config_auth(ps); 215 if (ret != ILB_STATUS_OK) { 216 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, 217 ret, ucredp); 218 return (ret); 219 } 220 } 221 222 if (hc_info->hci_name[0] == '\0') { 223 logdebug("ilbd_create_hc: missing healthcheck info"); 224 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, 225 ILB_STATUS_ENOHCINFO, ucredp); 226 return (ILB_STATUS_ENOHCINFO); 227 } 228 229 hc = ilbd_get_hc(hc_info->hci_name); 230 if (hc != NULL) { 231 logdebug("ilbd_create_hc: healthcheck name %s already" 232 " exists", hc_info->hci_name); 233 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, 234 ILB_STATUS_EEXIST, ucredp); 235 return (ILB_STATUS_EEXIST); 236 } 237 238 /* 239 * Sanity check on user supplied probe. The given path name 240 * must be a full path name (starts with '/') and is 241 * executable. 242 */ 243 if (strcasecmp(hc_info->hci_test, ILB_HC_STR_TCP) != 0 && 244 strcasecmp(hc_info->hci_test, ILB_HC_STR_UDP) != 0 && 245 strcasecmp(hc_info->hci_test, ILB_HC_STR_PING) != 0 && 246 (hc_info->hci_test[0] != '/' || 247 access(hc_info->hci_test, X_OK) == -1)) { 248 if (errno == ENOENT) { 249 logdebug("ilbd_create_hc: user script %s doesn't " 250 "exist", hc_info->hci_test); 251 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, 252 ILB_STATUS_ENOENT, ucredp); 253 return (ILB_STATUS_ENOENT); 254 } else { 255 logdebug("ilbd_create_hc: user script %s is " 256 "invalid", hc_info->hci_test); 257 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, 258 ILB_STATUS_EINVAL, ucredp); 259 return (ILB_STATUS_EINVAL); 260 } 261 } 262 263 /* Create and add the hc object */ 264 hc = calloc(1, sizeof (ilbd_hc_t)); 265 if (hc == NULL) { 266 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, 267 ILB_STATUS_ENOMEM, ucredp); 268 return (ILB_STATUS_ENOMEM); 269 } 270 (void) memcpy(&hc->ihc_info, hc_info, sizeof (ilb_hc_info_t)); 271 if (strcasecmp(hc->ihc_test, ILB_HC_STR_TCP) == 0) 272 hc->ihc_test_type = ILBD_HC_TCP; 273 else if (strcasecmp(hc->ihc_test, ILB_HC_STR_UDP) == 0) 274 hc->ihc_test_type = ILBD_HC_UDP; 275 else if (strcasecmp(hc->ihc_test, ILB_HC_STR_PING) == 0) 276 hc->ihc_test_type = ILBD_HC_PING; 277 else 278 hc->ihc_test_type = ILBD_HC_USER; 279 list_create(&hc->ihc_rules, sizeof (ilbd_hc_rule_t), 280 offsetof(ilbd_hc_rule_t, hcr_link)); 281 282 /* Update SCF */ 283 if (ps != NULL) { 284 if ((ret = ilbd_create_pg(ILBD_SCF_HC, (void *)hc)) != 285 ILB_STATUS_OK) { 286 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, 287 ret, ucredp); 288 free(hc); 289 return (ret); 290 } 291 } 292 293 /* Everything is fine, now add it to the global list. */ 294 list_insert_tail(&ilbd_hc_list, hc); 295 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, ret, ucredp); 296 return (ret); 297 } 298 299 /* 300 * Given a name of a hc object, destroy it. 301 */ 302 ilb_status_t 303 ilbd_destroy_hc(const char *hc_name, const struct passwd *ps, 304 ucred_t *ucredp) 305 { 306 ilb_status_t ret; 307 ilbd_hc_t *hc; 308 309 /* 310 * No need to check ps == NULL, daemon won't call any destroy func 311 * at start up. 312 */ 313 ret = ilbd_check_client_config_auth(ps); 314 if (ret != ILB_STATUS_OK) { 315 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, 316 ret, ucredp); 317 return (ret); 318 } 319 320 hc = ilbd_get_hc(hc_name); 321 if (hc == NULL) { 322 logdebug("ilbd_destroy_hc: healthcheck %s does not exist", 323 hc_name); 324 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, 325 ILB_STATUS_ENOENT, ucredp); 326 return (ILB_STATUS_ENOENT); 327 } 328 329 /* If hc is in use, cannot delete it */ 330 if (hc->ihc_rule_cnt > 0) { 331 logdebug("ilbd_destroy_hc: healthcheck %s is associated" 332 " with a rule - cannot remove", hc_name); 333 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, 334 ILB_STATUS_INUSE, ucredp); 335 return (ILB_STATUS_INUSE); 336 } 337 338 if ((ret = ilbd_destroy_pg(ILBD_SCF_HC, hc_name)) != 339 ILB_STATUS_OK) { 340 logdebug("ilbd_destroy_hc: cannot destroy healthcheck %s " 341 "property group", hc_name); 342 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, 343 ret, ucredp); 344 return (ret); 345 } 346 347 list_remove(&ilbd_hc_list, hc); 348 free(hc); 349 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, ret, ucredp); 350 return (ret); 351 } 352 353 /* 354 * Given a hc object name, return its information. Used by libilb to 355 * get hc info. 356 */ 357 ilb_status_t 358 ilbd_get_hc_info(const char *hc_name, uint32_t *rbuf, size_t *rbufsz) 359 { 360 ilbd_hc_t *hc; 361 ilb_hc_info_t *hc_info; 362 ilb_comm_t *ic = (ilb_comm_t *)rbuf; 363 364 hc = ilbd_get_hc(hc_name); 365 if (hc == NULL) { 366 logdebug("%s: healthcheck %s does not exist", __func__, 367 hc_name); 368 return (ILB_STATUS_ENOENT); 369 } 370 ilbd_reply_ok(rbuf, rbufsz); 371 hc_info = (ilb_hc_info_t *)&ic->ic_data; 372 373 (void) strlcpy(hc_info->hci_name, hc->ihc_name, sizeof (hc->ihc_name)); 374 (void) strlcpy(hc_info->hci_test, hc->ihc_test, sizeof (hc->ihc_test)); 375 hc_info->hci_timeout = hc->ihc_timeout; 376 hc_info->hci_count = hc->ihc_count; 377 hc_info->hci_interval = hc->ihc_interval; 378 hc_info->hci_def_ping = hc->ihc_def_ping; 379 380 *rbufsz += sizeof (ilb_hc_info_t); 381 382 return (ILB_STATUS_OK); 383 } 384 385 static void 386 ilbd_hc_copy_srvs(uint32_t *rbuf, size_t *rbufsz, ilbd_hc_rule_t *hc_rule, 387 const char *rulename) 388 { 389 ilbd_hc_srv_t *tmp_srv; 390 ilb_hc_srv_t *dst_srv; 391 ilb_hc_rule_srv_t *srvs; 392 size_t tmp_rbufsz; 393 int i; 394 395 tmp_rbufsz = *rbufsz; 396 /* Set up the reply buffer. rbufsz will be set to the new size. */ 397 ilbd_reply_ok(rbuf, rbufsz); 398 399 /* Calculate how much space is left for holding server info. */ 400 *rbufsz += sizeof (ilb_hc_rule_srv_t); 401 tmp_rbufsz -= *rbufsz; 402 403 srvs = (ilb_hc_rule_srv_t *)&((ilb_comm_t *)rbuf)->ic_data; 404 405 tmp_srv = list_head(&hc_rule->hcr_servers); 406 for (i = 0; tmp_srv != NULL && tmp_rbufsz >= sizeof (*dst_srv); i++) { 407 dst_srv = &srvs->rs_srvs[i]; 408 409 (void) strlcpy(dst_srv->hcs_rule_name, rulename, ILB_NAMESZ); 410 (void) strlcpy(dst_srv->hcs_ID, tmp_srv->shc_sg_srv->sgs_srvID, 411 ILB_NAMESZ); 412 (void) strlcpy(dst_srv->hcs_hc_name, 413 tmp_srv->shc_hc->ihc_name, ILB_NAMESZ); 414 dst_srv->hcs_IP = tmp_srv->shc_sg_srv->sgs_addr; 415 dst_srv->hcs_fail_cnt = tmp_srv->shc_fail_cnt; 416 dst_srv->hcs_status = tmp_srv->shc_status; 417 dst_srv->hcs_rtt = tmp_srv->shc_rtt; 418 dst_srv->hcs_lasttime = tmp_srv->shc_lasttime; 419 dst_srv->hcs_nexttime = tmp_srv->shc_nexttime; 420 421 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv); 422 tmp_rbufsz -= sizeof (*dst_srv); 423 } 424 srvs->rs_num_srvs = i; 425 *rbufsz += i * sizeof (*dst_srv); 426 } 427 428 /* 429 * Given a rule name, return the hc status of its servers. 430 */ 431 ilb_status_t 432 ilbd_get_hc_srvs(const char *rulename, uint32_t *rbuf, size_t *rbufsz) 433 { 434 ilbd_hc_t *hc; 435 ilbd_hc_rule_t *hc_rule; 436 437 for (hc = list_head(&ilbd_hc_list); hc != NULL; 438 hc = list_next(&ilbd_hc_list, hc)) { 439 for (hc_rule = list_head(&hc->ihc_rules); hc_rule != NULL; 440 hc_rule = list_next(&hc->ihc_rules, hc_rule)) { 441 if (strcasecmp(hc_rule->hcr_rule->irl_name, 442 rulename) != 0) { 443 continue; 444 } 445 ilbd_hc_copy_srvs(rbuf, rbufsz, hc_rule, rulename); 446 return (ILB_STATUS_OK); 447 } 448 } 449 return (ILB_STATUS_RULE_NO_HC); 450 } 451 452 /* 453 * Initialize the hc timer and associate the notification of timeout to 454 * the given event port. 455 */ 456 void 457 ilbd_hc_timer_init(int ev_port, ilbd_timer_event_obj_t *ev_obj) 458 { 459 struct sigevent sigev; 460 port_notify_t notify; 461 462 if ((ilbd_hc_timer_q = iu_tq_create()) == NULL) { 463 logerr("%s: cannot create hc timer queue", __func__); 464 exit(EXIT_FAILURE); 465 } 466 hc_timer_restarted = B_FALSE; 467 468 ev_obj->ev = ILBD_EVENT_TIMER; 469 ev_obj->timerid = -1; 470 471 notify.portnfy_port = ev_port; 472 notify.portnfy_user = ev_obj; 473 sigev.sigev_notify = SIGEV_PORT; 474 sigev.sigev_value.sival_ptr = ¬ify; 475 if (timer_create(CLOCK_REALTIME, &sigev, &ev_obj->timerid) == -1) { 476 logerr("%s: cannot create timer", __func__); 477 exit(EXIT_FAILURE); 478 } 479 } 480 481 /* 482 * HC timeout handler. 483 */ 484 void 485 ilbd_hc_timeout(void) 486 { 487 (void) iu_expire_timers(ilbd_hc_timer_q); 488 hc_timer_restarted = B_TRUE; 489 } 490 491 /* 492 * Set up the timer to fire at the earliest timeout. 493 */ 494 void 495 ilbd_hc_timer_update(ilbd_timer_event_obj_t *ev_obj) 496 { 497 itimerspec_t itimeout; 498 int timeout; 499 500 /* 501 * There is no change on the timer list, so no need to set up the 502 * timer again. 503 */ 504 if (!hc_timer_restarted) 505 return; 506 507 restart: 508 if ((timeout = iu_earliest_timer(ilbd_hc_timer_q)) == INFTIM) { 509 hc_timer_restarted = B_FALSE; 510 return; 511 } else if (timeout == 0) { 512 /* 513 * Handle the timeout immediately. After that (clearing all 514 * the expired timers), check to see if there are still 515 * timers running. If yes, start them. 516 */ 517 (void) iu_expire_timers(ilbd_hc_timer_q); 518 goto restart; 519 } 520 521 itimeout.it_value.tv_sec = timeout / MILLISEC + 1; 522 itimeout.it_value.tv_nsec = 0; 523 itimeout.it_interval.tv_sec = 0; 524 itimeout.it_interval.tv_nsec = 0; 525 526 /* 527 * Failure to set a timeout is "OK" since hopefully there will be 528 * other events and timer_settime() will be called again. So 529 * we will only miss some timeouts. But in the worst case, no event 530 * will happen and ilbd will get stuck... 531 */ 532 if (timer_settime(ev_obj->timerid, 0, &itimeout, NULL) == -1) 533 logerr("%s: cannot set timer", __func__); 534 hc_timer_restarted = B_FALSE; 535 } 536 537 /* 538 * Kill the probe process of a server. 539 */ 540 static void 541 ilbd_hc_kill_probe(ilbd_hc_srv_t *srv) 542 { 543 /* 544 * First dissociate the fd from the event port. It should not 545 * fail. 546 */ 547 if (port_dissociate(srv->shc_ev_port, PORT_SOURCE_FD, 548 srv->shc_child_fd) != 0) { 549 logdebug("%s: port_dissociate: %s", __func__, strerror(errno)); 550 } 551 (void) close(srv->shc_child_fd); 552 free(srv->shc_ev); 553 srv->shc_ev = NULL; 554 555 /* Then kill the probe process. */ 556 if (kill(srv->shc_child_pid, SIGKILL) != 0) { 557 logerr("%s: rule %s server %s: %s", __func__, 558 srv->shc_hc_rule->hcr_rule->irl_name, 559 srv->shc_sg_srv->sgs_srvID, strerror(errno)); 560 } 561 /* Should not fail... */ 562 if (waitpid(srv->shc_child_pid, NULL, 0) != srv->shc_child_pid) { 563 logdebug("%s: waitpid: rule %s server %s", __func__, 564 srv->shc_hc_rule->hcr_rule->irl_name, 565 srv->shc_sg_srv->sgs_srvID); 566 } 567 srv->shc_child_pid = 0; 568 } 569 570 /* 571 * Disable the server, either because the server is dead or because a timer 572 * cannot be started for this server. Note that this only affects the 573 * transient configuration, meaning only in memory. The persistent 574 * configuration is not affected. 575 */ 576 static void 577 ilbd_mark_server_disabled(ilbd_hc_srv_t *srv) 578 { 579 srv->shc_status = ILB_HCS_DISABLED; 580 581 /* Disable the server in kernel. */ 582 if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr, 583 srv->shc_hc_rule->hcr_rule->irl_name, 584 stat_declare_srv_dead) != ILB_STATUS_OK) { 585 logerr("%s: cannot disable server in kernel: rule %s " 586 "server %s", __func__, 587 srv->shc_hc_rule->hcr_rule->irl_name, 588 srv->shc_sg_srv->sgs_srvID); 589 } 590 } 591 592 /* 593 * A probe fails, set the state of the server. 594 */ 595 static void 596 ilbd_set_fail_state(ilbd_hc_srv_t *srv) 597 { 598 if (++srv->shc_fail_cnt < srv->shc_hc->ihc_count) { 599 /* Probe again */ 600 ilbd_hc_probe_timer(ilbd_hc_timer_q, srv); 601 return; 602 } 603 604 logdebug("%s: rule %s server %s fails %u", __func__, 605 srv->shc_hc_rule->hcr_rule->irl_name, srv->shc_sg_srv->sgs_srvID, 606 srv->shc_fail_cnt); 607 608 /* 609 * If this is a ping test, mark the server as 610 * unreachable instead of dead. 611 */ 612 if (srv->shc_hc->ihc_test_type == ILBD_HC_PING || 613 srv->shc_state == ilbd_hc_def_pinging) { 614 srv->shc_status = ILB_HCS_UNREACH; 615 } else { 616 srv->shc_status = ILB_HCS_DEAD; 617 } 618 619 /* Disable the server in kernel. */ 620 if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr, 621 srv->shc_hc_rule->hcr_rule->irl_name, stat_declare_srv_dead) != 622 ILB_STATUS_OK) { 623 logerr("%s: cannot disable server in kernel: rule %s " 624 "server %s", __func__, 625 srv->shc_hc_rule->hcr_rule->irl_name, 626 srv->shc_sg_srv->sgs_srvID); 627 } 628 629 /* Still keep probing in case the server is alive again. */ 630 if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) { 631 /* Only thing to do is to disable the server... */ 632 logerr("%s: cannot restart timer: rule %s server %s", __func__, 633 srv->shc_hc_rule->hcr_rule->irl_name, 634 srv->shc_sg_srv->sgs_srvID); 635 srv->shc_status = ILB_HCS_DISABLED; 636 } 637 } 638 639 /* 640 * A probe process has not returned for the ihc_timeout period, we should 641 * kill it. This function is the handler of this. 642 */ 643 /* ARGSUSED */ 644 static void 645 ilbd_hc_kill_timer(iu_tq_t *tq, void *arg) 646 { 647 ilbd_hc_srv_t *srv = (ilbd_hc_srv_t *)arg; 648 649 ilbd_hc_kill_probe(srv); 650 ilbd_set_fail_state(srv); 651 } 652 653 /* 654 * Probe timeout handler. Send out the appropriate probe. 655 */ 656 /* ARGSUSED */ 657 static void 658 ilbd_hc_probe_timer(iu_tq_t *tq, void *arg) 659 { 660 ilbd_hc_srv_t *srv = (ilbd_hc_srv_t *)arg; 661 662 /* 663 * If starting the probe fails, just pretend that the timeout has 664 * extended. 665 */ 666 if (!ilbd_run_probe(srv)) { 667 /* 668 * If we cannot restart the timer, the only thing we can do 669 * is to disable this server. Hopefully the sys admin will 670 * notice this and enable this server again later. 671 */ 672 if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) { 673 logerr("%s: cannot restart timer: rule %s server %s, " 674 "disabling it", __func__, 675 srv->shc_hc_rule->hcr_rule->irl_name, 676 srv->shc_sg_srv->sgs_srvID); 677 ilbd_mark_server_disabled(srv); 678 } 679 return; 680 } 681 682 /* 683 * Similar to above, if kill timer cannot be started, disable the 684 * server. 685 */ 686 if ((srv->shc_tid = iu_schedule_timer(ilbd_hc_timer_q, 687 srv->shc_hc->ihc_timeout, ilbd_hc_kill_timer, srv)) == -1) { 688 logerr("%s: cannot start kill timer: rule %s server %s, " 689 "disabling it", __func__, 690 srv->shc_hc_rule->hcr_rule->irl_name, 691 srv->shc_sg_srv->sgs_srvID); 692 ilbd_mark_server_disabled(srv); 693 } 694 hc_timer_restarted = B_TRUE; 695 } 696 697 /* Restart the periodic timer for a given server. */ 698 static ilb_status_t 699 ilbd_hc_restart_timer(ilbd_hc_t *hc, ilbd_hc_srv_t *srv) 700 { 701 int timeout; 702 703 /* Don't allow the timeout interval to be less than 1s */ 704 timeout = MAX((hc->ihc_interval >> 1) + (gethrtime() % 705 (hc->ihc_interval + 1)), 1); 706 707 /* 708 * If the probe is actually a ping probe, there is no need to 709 * do default pinging. Just skip the step. 710 */ 711 if (hc->ihc_def_ping && hc->ihc_test_type != ILBD_HC_PING) 712 srv->shc_state = ilbd_hc_def_pinging; 713 else 714 srv->shc_state = ilbd_hc_probing; 715 srv->shc_tid = iu_schedule_timer(ilbd_hc_timer_q, timeout, 716 ilbd_hc_probe_timer, srv); 717 718 if (srv->shc_tid == -1) 719 return (ILB_STATUS_TIMER); 720 srv->shc_lasttime = time(NULL); 721 srv->shc_nexttime = time(NULL) + timeout; 722 723 hc_timer_restarted = B_TRUE; 724 return (ILB_STATUS_OK); 725 } 726 727 /* Helper routine to associate a server with its hc object. */ 728 static ilb_status_t 729 ilbd_hc_srv_add(ilbd_hc_t *hc, ilbd_hc_rule_t *hc_rule, 730 const ilb_sg_srv_t *srv, int ev_port) 731 { 732 ilbd_hc_srv_t *new_srv; 733 ilb_status_t ret; 734 735 if ((new_srv = calloc(1, sizeof (ilbd_hc_srv_t))) == NULL) 736 return (ILB_STATUS_ENOMEM); 737 new_srv->shc_hc = hc; 738 new_srv->shc_hc_rule = hc_rule; 739 new_srv->shc_sg_srv = srv; 740 new_srv->shc_ev_port = ev_port; 741 new_srv->shc_tid = -1; 742 new_srv->shc_nexttime = time(NULL); 743 new_srv->shc_lasttime = new_srv->shc_nexttime; 744 745 if ((hc_rule->hcr_rule->irl_flags & ILB_FLAGS_RULE_ENABLED) && 746 ILB_IS_SRV_ENABLED(srv->sgs_flags)) { 747 new_srv->shc_status = ILB_HCS_UNINIT; 748 ret = ilbd_hc_restart_timer(hc, new_srv); 749 if (ret != ILB_STATUS_OK) { 750 free(new_srv); 751 return (ret); 752 } 753 } else { 754 new_srv->shc_status = ILB_HCS_DISABLED; 755 } 756 757 list_insert_tail(&hc_rule->hcr_servers, new_srv); 758 return (ILB_STATUS_OK); 759 } 760 761 /* Handy macro to cancel a server's timer. */ 762 #define HC_CANCEL_TIMER(srv) \ 763 { \ 764 void *arg; \ 765 int ret; \ 766 if ((srv)->shc_tid != -1) { \ 767 ret = iu_cancel_timer(ilbd_hc_timer_q, (srv)->shc_tid, &arg); \ 768 (srv)->shc_tid = -1; \ 769 assert(ret == 1); \ 770 assert(arg == (srv)); \ 771 } \ 772 hc_timer_restarted = B_TRUE; \ 773 } 774 775 /* Helper routine to dissociate a server from its hc object. */ 776 static ilb_status_t 777 ilbd_hc_srv_rem(ilbd_hc_rule_t *hc_rule, const ilb_sg_srv_t *srv) 778 { 779 ilbd_hc_srv_t *tmp_srv; 780 781 for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL; 782 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) { 783 if (tmp_srv->shc_sg_srv == srv) { 784 list_remove(&hc_rule->hcr_servers, tmp_srv); 785 HC_CANCEL_TIMER(tmp_srv); 786 if (tmp_srv->shc_child_pid != 0) 787 ilbd_hc_kill_probe(tmp_srv); 788 free(tmp_srv); 789 return (ILB_STATUS_OK); 790 } 791 } 792 return (ILB_STATUS_ENOENT); 793 } 794 795 /* Helper routine to dissociate all servers of a rule from its hc object. */ 796 static void 797 ilbd_hc_srv_rem_all(ilbd_hc_rule_t *hc_rule) 798 { 799 ilbd_hc_srv_t *srv; 800 801 while ((srv = list_remove_head(&hc_rule->hcr_servers)) != NULL) { 802 HC_CANCEL_TIMER(srv); 803 if (srv->shc_child_pid != 0) 804 ilbd_hc_kill_probe(srv); 805 free(srv); 806 } 807 } 808 809 /* Associate a rule with its hc object. */ 810 ilb_status_t 811 ilbd_hc_associate_rule(const ilbd_rule_t *rule, int ev_port) 812 { 813 ilbd_hc_t *hc; 814 ilbd_hc_rule_t *hc_rule; 815 ilb_status_t ret; 816 ilbd_sg_t *sg; 817 ilbd_srv_t *ilbd_srv; 818 819 /* The rule is assumed to be initialized appropriately. */ 820 if ((hc = ilbd_get_hc(rule->irl_hcname)) == NULL) { 821 logdebug("ilbd_hc_associate_rule: healthcheck %s does not " 822 "exist", rule->irl_hcname); 823 return (ILB_STATUS_ENOHCINFO); 824 } 825 if ((hc->ihc_test_type == ILBD_HC_TCP && 826 rule->irl_proto != IPPROTO_TCP) || 827 (hc->ihc_test_type == ILBD_HC_UDP && 828 rule->irl_proto != IPPROTO_UDP)) { 829 return (ILB_STATUS_RULE_HC_MISMATCH); 830 } 831 if ((hc_rule = calloc(1, sizeof (ilbd_hc_rule_t))) == NULL) { 832 logdebug("ilbd_hc_associate_rule: out of memory"); 833 return (ILB_STATUS_ENOMEM); 834 } 835 836 hc_rule->hcr_rule = rule; 837 list_create(&hc_rule->hcr_servers, sizeof (ilbd_hc_srv_t), 838 offsetof(ilbd_hc_srv_t, shc_srv_link)); 839 840 /* Add all the servers. */ 841 sg = rule->irl_sg; 842 for (ilbd_srv = list_head(&sg->isg_srvlist); ilbd_srv != NULL; 843 ilbd_srv = list_next(&sg->isg_srvlist, ilbd_srv)) { 844 if ((ret = ilbd_hc_srv_add(hc, hc_rule, &ilbd_srv->isv_srv, 845 ev_port)) != ILB_STATUS_OK) { 846 /* Remove all previously added servers */ 847 ilbd_hc_srv_rem_all(hc_rule); 848 free(hc_rule); 849 return (ret); 850 } 851 } 852 list_insert_tail(&hc->ihc_rules, hc_rule); 853 hc->ihc_rule_cnt++; 854 855 return (ILB_STATUS_OK); 856 } 857 858 /* Dissociate a rule from its hc object. */ 859 ilb_status_t 860 ilbd_hc_dissociate_rule(const ilbd_rule_t *rule) 861 { 862 ilbd_hc_t *hc; 863 ilbd_hc_rule_t *hc_rule; 864 865 /* The rule is assumed to be initialized appropriately. */ 866 if ((hc = ilbd_get_hc(rule->irl_hcname)) == NULL) { 867 logdebug("ilbd_hc_dissociate_rule: healthcheck %s does not " 868 "exist", rule->irl_hcname); 869 return (ILB_STATUS_ENOENT); 870 } 871 for (hc_rule = list_head(&hc->ihc_rules); hc_rule != NULL; 872 hc_rule = list_next(&hc->ihc_rules, hc_rule)) { 873 if (hc_rule->hcr_rule == rule) 874 break; 875 } 876 if (hc_rule == NULL) { 877 logdebug("ilbd_hc_dissociate_rule: rule %s is not associated " 878 "with healtcheck %s", rule->irl_hcname, hc->ihc_name); 879 return (ILB_STATUS_ENOENT); 880 } 881 ilbd_hc_srv_rem_all(hc_rule); 882 list_remove(&hc->ihc_rules, hc_rule); 883 hc->ihc_rule_cnt--; 884 return (ILB_STATUS_OK); 885 } 886 887 /* 888 * Given a hc object name and a rule, check to see if the rule is associated 889 * with the hc object. If it is, the hc object is returned in **hc and the 890 * ilbd_hc_rule_t is returned in **hc_rule. 891 */ 892 static boolean_t 893 ilbd_hc_check_rule(const char *hc_name, const ilbd_rule_t *rule, 894 ilbd_hc_t **hc, ilbd_hc_rule_t **hc_rule) 895 { 896 ilbd_hc_t *tmp_hc; 897 ilbd_hc_rule_t *tmp_hc_rule; 898 899 if ((tmp_hc = ilbd_get_hc(hc_name)) == NULL) 900 return (B_FALSE); 901 for (tmp_hc_rule = list_head(&tmp_hc->ihc_rules); tmp_hc_rule != NULL; 902 tmp_hc_rule = list_next(&tmp_hc->ihc_rules, tmp_hc_rule)) { 903 if (tmp_hc_rule->hcr_rule == rule) { 904 *hc = tmp_hc; 905 *hc_rule = tmp_hc_rule; 906 return (B_TRUE); 907 } 908 } 909 return (B_FALSE); 910 } 911 912 /* Associate a server with its hc object. */ 913 ilb_status_t 914 ilbd_hc_add_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv, 915 int ev_port) 916 { 917 ilbd_hc_t *hc; 918 ilbd_hc_rule_t *hc_rule; 919 920 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule)) 921 return (ILB_STATUS_ENOENT); 922 return (ilbd_hc_srv_add(hc, hc_rule, srv, ev_port)); 923 } 924 925 /* Dissociate a server from its hc object. */ 926 ilb_status_t 927 ilbd_hc_del_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv) 928 { 929 ilbd_hc_t *hc; 930 ilbd_hc_rule_t *hc_rule; 931 932 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule)) 933 return (ILB_STATUS_ENOENT); 934 return (ilbd_hc_srv_rem(hc_rule, srv)); 935 } 936 937 /* Helper routine to enable/disable a server's hc probe. */ 938 static ilb_status_t 939 ilbd_hc_toggle_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv, 940 boolean_t enable) 941 { 942 ilbd_hc_t *hc; 943 ilbd_hc_rule_t *hc_rule; 944 ilbd_hc_srv_t *tmp_srv; 945 ilb_status_t ret; 946 947 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule)) 948 return (ILB_STATUS_ENOENT); 949 for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL; 950 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) { 951 if (tmp_srv->shc_sg_srv != srv) { 952 continue; 953 } 954 if (enable) { 955 if (tmp_srv->shc_status == ILB_HCS_DISABLED) { 956 ret = ilbd_hc_restart_timer(hc, tmp_srv); 957 if (ret != ILB_STATUS_OK) { 958 logerr("%s: cannot start timers for " 959 "rule %s server %s", __func__, 960 rule->irl_name, 961 tmp_srv->shc_sg_srv->sgs_srvID); 962 return (ret); 963 } 964 /* Start from fresh... */ 965 tmp_srv->shc_status = ILB_HCS_UNINIT; 966 tmp_srv->shc_rtt = 0; 967 tmp_srv->shc_fail_cnt = 0; 968 } 969 } else { 970 if (tmp_srv->shc_status != ILB_HCS_DISABLED) { 971 tmp_srv->shc_status = ILB_HCS_DISABLED; 972 HC_CANCEL_TIMER(tmp_srv); 973 if (tmp_srv->shc_child_pid != 0) 974 ilbd_hc_kill_probe(tmp_srv); 975 } 976 } 977 return (ILB_STATUS_OK); 978 } 979 return (ILB_STATUS_ENOENT); 980 } 981 982 ilb_status_t 983 ilbd_hc_enable_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv) 984 { 985 return (ilbd_hc_toggle_server(rule, srv, B_TRUE)); 986 } 987 988 ilb_status_t 989 ilbd_hc_disable_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv) 990 { 991 return (ilbd_hc_toggle_server(rule, srv, B_FALSE)); 992 } 993 994 /* 995 * Helper routine to enable/disable a rule's hc probe (including all its 996 * servers). 997 */ 998 static ilb_status_t 999 ilbd_hc_toggle_rule(const ilbd_rule_t *rule, boolean_t enable) 1000 { 1001 ilbd_hc_t *hc; 1002 ilbd_hc_rule_t *hc_rule; 1003 ilbd_hc_srv_t *tmp_srv; 1004 int ret; 1005 1006 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule)) 1007 return (ILB_STATUS_ENOENT); 1008 1009 for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL; 1010 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) { 1011 if (enable) { 1012 /* 1013 * If the server is disabled in the rule, do not 1014 * restart its timer. 1015 */ 1016 if (tmp_srv->shc_status == ILB_HCS_DISABLED && 1017 ILB_IS_SRV_ENABLED( 1018 tmp_srv->shc_sg_srv->sgs_flags)) { 1019 ret = ilbd_hc_restart_timer(hc, tmp_srv); 1020 if (ret != ILB_STATUS_OK) { 1021 logerr("%s: cannot start timers for " 1022 "rule %s server %s", __func__, 1023 rule->irl_name, 1024 tmp_srv->shc_sg_srv->sgs_srvID); 1025 goto rollback; 1026 } else { 1027 /* Start from fresh... */ 1028 tmp_srv->shc_status = ILB_HCS_UNINIT; 1029 tmp_srv->shc_rtt = 0; 1030 tmp_srv->shc_fail_cnt = 0; 1031 } 1032 } 1033 } else { 1034 if (tmp_srv->shc_status != ILB_HCS_DISABLED) { 1035 HC_CANCEL_TIMER(tmp_srv); 1036 tmp_srv->shc_status = ILB_HCS_DISABLED; 1037 if (tmp_srv->shc_child_pid != 0) 1038 ilbd_hc_kill_probe(tmp_srv); 1039 } 1040 } 1041 } 1042 return (ILB_STATUS_OK); 1043 rollback: 1044 enable = !enable; 1045 for (tmp_srv = list_prev(&hc_rule->hcr_servers, tmp_srv); 1046 tmp_srv != NULL; 1047 tmp_srv = list_prev(&hc_rule->hcr_servers, tmp_srv)) { 1048 if (enable) { 1049 if (tmp_srv->shc_status == ILB_HCS_DISABLED && 1050 ILB_IS_SRV_ENABLED( 1051 tmp_srv->shc_sg_srv->sgs_flags)) { 1052 (void) ilbd_hc_restart_timer(hc, tmp_srv); 1053 tmp_srv->shc_status = ILB_HCS_UNINIT; 1054 tmp_srv->shc_rtt = 0; 1055 tmp_srv->shc_fail_cnt = 0; 1056 } 1057 } else { 1058 if (tmp_srv->shc_status != ILB_HCS_DISABLED) { 1059 HC_CANCEL_TIMER(tmp_srv); 1060 tmp_srv->shc_status = ILB_HCS_DISABLED; 1061 if (tmp_srv->shc_child_pid != 0) 1062 ilbd_hc_kill_probe(tmp_srv); 1063 } 1064 } 1065 } 1066 return (ret); 1067 } 1068 1069 ilb_status_t 1070 ilbd_hc_enable_rule(const ilbd_rule_t *rule) 1071 { 1072 return (ilbd_hc_toggle_rule(rule, B_TRUE)); 1073 } 1074 1075 ilb_status_t 1076 ilbd_hc_disable_rule(const ilbd_rule_t *rule) 1077 { 1078 return (ilbd_hc_toggle_rule(rule, B_FALSE)); 1079 } 1080 1081 static const char * 1082 topo_2_str(ilb_topo_t topo) 1083 { 1084 switch (topo) { 1085 case ILB_TOPO_DSR: 1086 return ("DSR"); 1087 case ILB_TOPO_NAT: 1088 return ("NAT"); 1089 case ILB_TOPO_HALF_NAT: 1090 return ("HALF_NAT"); 1091 default: 1092 /* Should not happen. */ 1093 logerr("%s: unknown topology", __func__); 1094 break; 1095 } 1096 return (""); 1097 } 1098 1099 /* 1100 * Create the argument list to be passed to a hc probe command. 1101 * The passed in argv is assumed to have HC_PROBE_ARGC elements. 1102 */ 1103 static boolean_t 1104 create_argv(ilbd_hc_srv_t *srv, char *argv[]) 1105 { 1106 char buf[INET6_ADDRSTRLEN]; 1107 ilbd_rule_t const *rule; 1108 ilb_sg_srv_t const *sg_srv; 1109 struct in_addr v4_addr; 1110 in_port_t port; 1111 int i; 1112 1113 rule = srv->shc_hc_rule->hcr_rule; 1114 sg_srv = srv->shc_sg_srv; 1115 1116 if (srv->shc_state == ilbd_hc_def_pinging) { 1117 if ((argv[0] = strdup(ILB_PROBE_PING)) == NULL) 1118 return (B_FALSE); 1119 } else { 1120 switch (srv->shc_hc->ihc_test_type) { 1121 case ILBD_HC_USER: 1122 if ((argv[0] = strdup(srv->shc_hc->ihc_test)) == NULL) 1123 return (B_FALSE); 1124 break; 1125 case ILBD_HC_TCP: 1126 case ILBD_HC_UDP: 1127 if ((argv[0] = strdup(ILB_PROBE_PROTO)) == 1128 NULL) { 1129 return (B_FALSE); 1130 } 1131 break; 1132 case ILBD_HC_PING: 1133 if ((argv[0] = strdup(ILB_PROBE_PING)) == NULL) { 1134 return (B_FALSE); 1135 } 1136 break; 1137 } 1138 } 1139 1140 /* 1141 * argv[1] is the VIP. 1142 * 1143 * Right now, the VIP and the backend server addresses should be 1144 * in the same IP address family. Here we don't do that in case 1145 * this assumption is changed in future. 1146 */ 1147 if (IN6_IS_ADDR_V4MAPPED(&rule->irl_vip)) { 1148 IN6_V4MAPPED_TO_INADDR(&rule->irl_vip, &v4_addr); 1149 if (inet_ntop(AF_INET, &v4_addr, buf, sizeof (buf)) == NULL) 1150 goto cleanup; 1151 } else { 1152 if (inet_ntop(AF_INET6, &rule->irl_vip, buf, 1153 sizeof (buf)) == NULL) { 1154 goto cleanup; 1155 } 1156 } 1157 if ((argv[1] = strdup(buf)) == NULL) 1158 goto cleanup; 1159 1160 /* 1161 * argv[2] is the backend server address. 1162 */ 1163 if (IN6_IS_ADDR_V4MAPPED(&sg_srv->sgs_addr)) { 1164 IN6_V4MAPPED_TO_INADDR(&sg_srv->sgs_addr, &v4_addr); 1165 if (inet_ntop(AF_INET, &v4_addr, buf, sizeof (buf)) == NULL) 1166 goto cleanup; 1167 } else { 1168 if (inet_ntop(AF_INET6, &sg_srv->sgs_addr, buf, 1169 sizeof (buf)) == NULL) { 1170 goto cleanup; 1171 } 1172 } 1173 if ((argv[2] = strdup(buf)) == NULL) 1174 goto cleanup; 1175 1176 /* 1177 * argv[3] is the transport protocol used in the rule. 1178 */ 1179 switch (rule->irl_proto) { 1180 case IPPROTO_TCP: 1181 argv[3] = strdup("TCP"); 1182 break; 1183 case IPPROTO_UDP: 1184 argv[3] = strdup("UDP"); 1185 break; 1186 default: 1187 logerr("%s: unknown protocol", __func__); 1188 goto cleanup; 1189 } 1190 if (argv[3] == NULL) 1191 goto cleanup; 1192 1193 /* 1194 * argv[4] is the load balance mode, DSR, NAT, HALF-NAT. 1195 */ 1196 if ((argv[4] = strdup(topo_2_str(rule->irl_topo))) == NULL) 1197 goto cleanup; 1198 1199 /* 1200 * argv[5] is the port range. Right now, there should only be 1 port. 1201 */ 1202 switch (rule->irl_hcpflag) { 1203 case ILB_HCI_PROBE_FIX: 1204 port = ntohs(rule->irl_hcport); 1205 break; 1206 case ILB_HCI_PROBE_ANY: { 1207 in_port_t min, max; 1208 1209 if (ntohs(sg_srv->sgs_minport) == 0) { 1210 min = ntohs(rule->irl_minport); 1211 max = ntohs(rule->irl_maxport); 1212 } else { 1213 min = ntohs(sg_srv->sgs_minport); 1214 max = ntohs(sg_srv->sgs_maxport); 1215 } 1216 if (max > min) 1217 port = min + gethrtime() % (max - min + 1); 1218 else 1219 port = min; 1220 break; 1221 } 1222 default: 1223 logerr("%s: unknown HC flag", __func__); 1224 goto cleanup; 1225 } 1226 (void) sprintf(buf, "%d", port); 1227 if ((argv[5] = strdup(buf)) == NULL) 1228 goto cleanup; 1229 1230 /* 1231 * argv[6] is the probe timeout. 1232 */ 1233 (void) sprintf(buf, "%d", srv->shc_hc->ihc_timeout); 1234 if ((argv[6] = strdup(buf)) == NULL) 1235 goto cleanup; 1236 1237 argv[7] = NULL; 1238 return (B_TRUE); 1239 1240 cleanup: 1241 for (i = 0; i < HC_PROBE_ARGC; i++) { 1242 if (argv[i] != NULL) 1243 free(argv[i]); 1244 } 1245 return (B_FALSE); 1246 } 1247 1248 static void 1249 destroy_argv(char *argv[]) 1250 { 1251 int i; 1252 1253 for (i = 0; argv[i] != NULL; i++) 1254 free(argv[i]); 1255 } 1256 1257 /* Spawn a process to run the hc probe on the given server. */ 1258 static boolean_t 1259 ilbd_run_probe(ilbd_hc_srv_t *srv) 1260 { 1261 posix_spawn_file_actions_t fd_actions; 1262 posix_spawnattr_t attr; 1263 sigset_t child_sigset; 1264 int fds[2]; 1265 int fdflags; 1266 pid_t pid; 1267 char *child_argv[HC_PROBE_ARGC]; 1268 ilbd_hc_probe_event_t *probe_ev; 1269 char *probe_name; 1270 1271 bzero(child_argv, HC_PROBE_ARGC * sizeof (char *)); 1272 if ((probe_ev = calloc(1, sizeof (*probe_ev))) == NULL) { 1273 logdebug("ilbd_run_probe: calloc"); 1274 return (B_FALSE); 1275 } 1276 1277 /* Set up a pipe to get output from probe command. */ 1278 if (pipe(fds) < 0) { 1279 logdebug("ilbd_run_probe: cannot create pipe"); 1280 free(probe_ev); 1281 return (B_FALSE); 1282 } 1283 /* Set our side of the pipe to be non-blocking */ 1284 if ((fdflags = fcntl(fds[0], F_GETFL, 0)) == -1) { 1285 logdebug("ilbd_run_probe: fcntl(F_GETFL)"); 1286 goto cleanup; 1287 } 1288 if (fcntl(fds[0], F_SETFL, fdflags | O_NONBLOCK) == -1) { 1289 logdebug("ilbd_run_probe: fcntl(F_SETFL)"); 1290 goto cleanup; 1291 } 1292 1293 if (posix_spawn_file_actions_init(&fd_actions) != 0) { 1294 logdebug("ilbd_run_probe: posix_spawn_file_actions_init"); 1295 goto cleanup; 1296 } 1297 if (posix_spawnattr_init(&attr) != 0) { 1298 logdebug("ilbd_run_probe: posix_spawnattr_init"); 1299 goto cleanup; 1300 } 1301 if (posix_spawn_file_actions_addclose(&fd_actions, fds[0]) != 0) { 1302 logdebug("ilbd_run_probe: posix_spawn_file_actions_addclose"); 1303 goto cleanup; 1304 } 1305 if (posix_spawn_file_actions_adddup2(&fd_actions, fds[1], 1306 STDOUT_FILENO) != 0) { 1307 logdebug("ilbd_run_probe: posix_spawn_file_actions_dup2"); 1308 goto cleanup; 1309 } 1310 if (posix_spawn_file_actions_addclose(&fd_actions, fds[1]) != 0) { 1311 logdebug("ilbd_run_probe: posix_spawn_file_actions_addclose"); 1312 goto cleanup; 1313 } 1314 1315 /* Reset all signal handling of the child to default. */ 1316 (void) sigfillset(&child_sigset); 1317 if (posix_spawnattr_setsigdefault(&attr, &child_sigset) != 0) { 1318 logdebug("ilbd_run_probe: posix_spawnattr_setsigdefault"); 1319 goto cleanup; 1320 } 1321 /* Don't want SIGCHLD. */ 1322 if (posix_spawnattr_setflags(&attr, POSIX_SPAWN_NOSIGCHLD_NP| 1323 POSIX_SPAWN_SETSIGDEF) != 0) { 1324 logdebug("ilbd_run_probe: posix_spawnattr_setflags"); 1325 goto cleanup; 1326 } 1327 1328 if (!create_argv(srv, child_argv)) { 1329 logdebug("ilbd_run_probe: create_argv"); 1330 goto cleanup; 1331 } 1332 1333 /* 1334 * If we are doing default pinging or not using a user supplied 1335 * probe, we should execute our standard supplied probe. The 1336 * supplied probe command handles all types of probes. And the 1337 * type used depends on argv[0], as filled in by create_argv(). 1338 */ 1339 if (srv->shc_state == ilbd_hc_def_pinging || 1340 srv->shc_hc->ihc_test_type != ILBD_HC_USER) { 1341 probe_name = ILB_PROBE_PROTO; 1342 } else { 1343 probe_name = srv->shc_hc->ihc_test; 1344 } 1345 if (posix_spawn(&pid, probe_name, &fd_actions, &attr, child_argv, 1346 NULL) != 0) { 1347 logerr("%s: posix_spawn: %s for server %s: %s", __func__, 1348 srv->shc_hc->ihc_test, srv->shc_sg_srv->sgs_srvID, 1349 strerror(errno)); 1350 goto cleanup; 1351 } 1352 1353 (void) close(fds[1]); 1354 destroy_argv(child_argv); 1355 srv->shc_child_pid = pid; 1356 srv->shc_child_fd = fds[0]; 1357 srv->shc_ev = probe_ev; 1358 1359 probe_ev->ihp_ev = ILBD_EVENT_PROBE; 1360 probe_ev->ihp_srv = srv; 1361 probe_ev->ihp_pid = pid; 1362 if (port_associate(srv->shc_ev_port, PORT_SOURCE_FD, fds[0], 1363 POLLRDNORM, probe_ev) != 0) { 1364 /* 1365 * Need to kill the child. It will free the srv->shc_ev, 1366 * which is probe_ev. So set probe_ev to NULL. 1367 */ 1368 ilbd_hc_kill_probe(srv); 1369 probe_ev = NULL; 1370 goto cleanup; 1371 } 1372 1373 return (B_TRUE); 1374 1375 cleanup: 1376 (void) close(fds[0]); 1377 (void) close(fds[1]); 1378 destroy_argv(child_argv); 1379 if (probe_ev != NULL) 1380 free(probe_ev); 1381 return (B_FALSE); 1382 } 1383 1384 /* 1385 * Called by ild_hc_probe_return() to re-associate the fd to a child to 1386 * the event port. 1387 */ 1388 static void 1389 reassociate_port(int ev_port, int fd, ilbd_hc_probe_event_t *ev) 1390 { 1391 if (port_associate(ev_port, PORT_SOURCE_FD, fd, 1392 POLLRDNORM, ev) != 0) { 1393 /* 1394 * If we cannot reassociate with the port, the only 1395 * thing we can do now is to kill the child and 1396 * do a blocking wait here... 1397 */ 1398 logdebug("%s: port_associate: %s", __func__, strerror(errno)); 1399 if (kill(ev->ihp_pid, SIGKILL) != 0) 1400 logerr("%s: kill: %s", __func__, strerror(errno)); 1401 if (waitpid(ev->ihp_pid, NULL, 0) != ev->ihp_pid) 1402 logdebug("%s: waitpid: %s", __func__, strerror(errno)); 1403 free(ev); 1404 } 1405 } 1406 1407 /* 1408 * To handle a child probe process hanging up. 1409 */ 1410 static void 1411 ilbd_hc_child_hup(int ev_port, int fd, ilbd_hc_probe_event_t *ev) 1412 { 1413 ilbd_hc_srv_t *srv; 1414 pid_t ret_pid; 1415 int ret; 1416 1417 srv = ev->ihp_srv; 1418 1419 if (!ev->ihp_done) { 1420 /* ilbd does not care about this process anymore ... */ 1421 ev->ihp_done = B_TRUE; 1422 srv->shc_ev = NULL; 1423 srv->shc_child_pid = 0; 1424 HC_CANCEL_TIMER(srv); 1425 ilbd_set_fail_state(srv); 1426 } 1427 ret_pid = waitpid(ev->ihp_pid, &ret, WNOHANG); 1428 switch (ret_pid) { 1429 case -1: 1430 logperror("ilbd_hc_child_hup: waitpid"); 1431 /* FALLTHROUGH */ 1432 case 0: 1433 /* The child has not completed the exit. Wait again. */ 1434 reassociate_port(ev_port, fd, ev); 1435 break; 1436 default: 1437 /* Right now, we just ignore the exit status. */ 1438 if (WIFEXITED(ret)) 1439 ret = WEXITSTATUS(ret); 1440 (void) close(fd); 1441 free(ev); 1442 } 1443 } 1444 1445 /* 1446 * To read the output of a child probe process. 1447 */ 1448 static void 1449 ilbd_hc_child_data(int fd, ilbd_hc_probe_event_t *ev) 1450 { 1451 ilbd_hc_srv_t *srv; 1452 char buf[HC_MAX_PROBE_OUTPUT]; 1453 int ret; 1454 int64_t rtt; 1455 1456 srv = ev->ihp_srv; 1457 1458 bzero(buf, HC_MAX_PROBE_OUTPUT); 1459 ret = read(fd, buf, HC_MAX_PROBE_OUTPUT - 1); 1460 /* Should not happen since event port should have caught this. */ 1461 assert(ret > 0); 1462 1463 /* 1464 * We expect the probe command to print out the RTT only. But 1465 * the command may misbehave and print out more than what we intend to 1466 * read in. So need to do this check below to "flush" out all the 1467 * output from the command. 1468 */ 1469 if (!ev->ihp_done) { 1470 ev->ihp_done = B_TRUE; 1471 /* We don't need to know about this event anymore. */ 1472 srv->shc_ev = NULL; 1473 srv->shc_child_pid = 0; 1474 HC_CANCEL_TIMER(srv); 1475 } else { 1476 return; 1477 } 1478 1479 rtt = strtoll(buf, NULL, 10); 1480 1481 /* 1482 * -1 means the server is dead or the probe somehow fails. Treat 1483 * them both as server is dead. 1484 */ 1485 if (rtt == -1) { 1486 ilbd_set_fail_state(srv); 1487 return; 1488 } else if (rtt > 0) { 1489 /* If the returned RTT value is not valid, just ignore it. */ 1490 if (rtt > 0 && rtt <= UINT_MAX) { 1491 /* Set rtt to be the simple smoothed average. */ 1492 if (srv->shc_rtt == 0) { 1493 srv->shc_rtt = rtt; 1494 } else { 1495 srv->shc_rtt = 3 * ((srv)->shc_rtt >> 2) + 1496 (rtt >> 2); 1497 } 1498 } 1499 1500 } 1501 1502 switch (srv->shc_state) { 1503 case ilbd_hc_def_pinging: 1504 srv->shc_state = ilbd_hc_probing; 1505 1506 /* Ping is OK, now start the probe. */ 1507 ilbd_hc_probe_timer(ilbd_hc_timer_q, srv); 1508 break; 1509 case ilbd_hc_probing: 1510 srv->shc_fail_cnt = 0; 1511 1512 /* Server is dead before, re-enable it. */ 1513 if (srv->shc_status == ILB_HCS_UNREACH || 1514 srv->shc_status == ILB_HCS_DEAD) { 1515 /* 1516 * If enabling the server in kernel fails now, 1517 * hopefully when the timer fires again later, the 1518 * enabling can be done. 1519 */ 1520 if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr, 1521 srv->shc_hc_rule->hcr_rule->irl_name, 1522 stat_declare_srv_alive) != ILB_STATUS_OK) { 1523 logerr("%s: cannot enable server in kernel: " 1524 " rule %s server %s", __func__, 1525 srv->shc_hc_rule->hcr_rule->irl_name, 1526 srv->shc_sg_srv->sgs_srvID); 1527 } else { 1528 srv->shc_status = ILB_HCS_ALIVE; 1529 } 1530 } else { 1531 srv->shc_status = ILB_HCS_ALIVE; 1532 } 1533 if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) { 1534 logerr("%s: cannot restart timer: rule %s server %s", 1535 __func__, srv->shc_hc_rule->hcr_rule->irl_name, 1536 srv->shc_sg_srv->sgs_srvID); 1537 ilbd_mark_server_disabled(srv); 1538 } 1539 break; 1540 default: 1541 logdebug("%s: unknown state", __func__); 1542 break; 1543 } 1544 } 1545 1546 /* 1547 * Handle the return event of a child probe fd. 1548 */ 1549 void 1550 ilbd_hc_probe_return(int ev_port, int fd, int port_events, 1551 ilbd_hc_probe_event_t *ev) 1552 { 1553 /* 1554 * Note that there can be more than one events delivered to us at 1555 * the same time. So we need to check them individually. 1556 */ 1557 if (port_events & POLLRDNORM) 1558 ilbd_hc_child_data(fd, ev); 1559 1560 if (port_events & (POLLHUP|POLLERR)) { 1561 ilbd_hc_child_hup(ev_port, fd, ev); 1562 return; 1563 } 1564 1565 /* 1566 * Re-associate the fd with the port so that when the child 1567 * exits, we can reap the status. 1568 */ 1569 reassociate_port(ev_port, fd, ev); 1570 } 1571