1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2012 Milan Jurik. All rights reserved.
26 */
27
28 #include <sys/types.h>
29 #include <sys/socket.h>
30 #include <sys/list.h>
31 #include <sys/stropts.h>
32 #include <sys/siginfo.h>
33 #include <sys/wait.h>
34 #include <arpa/inet.h>
35 #include <netinet/in.h>
36 #include <stdlib.h>
37 #include <stdio.h>
38 #include <strings.h>
39 #include <stddef.h>
40 #include <unistd.h>
41 #include <libilb.h>
42 #include <port.h>
43 #include <time.h>
44 #include <signal.h>
45 #include <assert.h>
46 #include <errno.h>
47 #include <spawn.h>
48 #include <fcntl.h>
49 #include <limits.h>
50 #include "libilb_impl.h"
51 #include "ilbd.h"
52
53 /* Global list of HC objects */
54 list_t ilbd_hc_list;
55
56 /* Timer queue for all hc related timers. */
57 static iu_tq_t *ilbd_hc_timer_q;
58
59 /* Indicate whether the timer needs to be updated */
60 static boolean_t hc_timer_restarted;
61
62 static void ilbd_hc_probe_timer(iu_tq_t *, void *);
63 static ilb_status_t ilbd_hc_restart_timer(ilbd_hc_t *, ilbd_hc_srv_t *);
64 static boolean_t ilbd_run_probe(ilbd_hc_srv_t *);
65
66 #define MAX(a, b) ((a) > (b) ? (a) : (b))
67
68 /*
69 * Number of arguments passed to a probe. argc[0] is the path name of
70 * the probe.
71 */
72 #define HC_PROBE_ARGC 8
73
74 /*
75 * Max number of characters to be read from the output of a probe. It
76 * is long enough to read in a 64 bit integer.
77 */
78 #define HC_MAX_PROBE_OUTPUT 24
79
80 void
i_ilbd_setup_hc_list(void)81 i_ilbd_setup_hc_list(void)
82 {
83 list_create(&ilbd_hc_list, sizeof (ilbd_hc_t),
84 offsetof(ilbd_hc_t, ihc_link));
85 }
86
87 /*
88 * Given a hc object name, return a pointer to hc object if found.
89 */
90 ilbd_hc_t *
ilbd_get_hc(const char * name)91 ilbd_get_hc(const char *name)
92 {
93 ilbd_hc_t *hc;
94
95 for (hc = list_head(&ilbd_hc_list); hc != NULL;
96 hc = list_next(&ilbd_hc_list, hc)) {
97 if (strcasecmp(hc->ihc_name, name) == 0)
98 return (hc);
99 }
100 return (NULL);
101 }
102
103 /*
104 * Generates an audit record for create-healthcheck,
105 * delete-healtcheck subcommands.
106 */
107 static void
ilbd_audit_hc_event(const char * audit_hcname,const ilb_hc_info_t * audit_hcinfo,ilbd_cmd_t cmd,ilb_status_t rc,ucred_t * ucredp)108 ilbd_audit_hc_event(const char *audit_hcname,
109 const ilb_hc_info_t *audit_hcinfo, ilbd_cmd_t cmd,
110 ilb_status_t rc, ucred_t *ucredp)
111 {
112 adt_session_data_t *ah;
113 adt_event_data_t *event;
114 au_event_t flag;
115 int audit_error;
116
117 if ((ucredp == NULL) && (cmd == ILBD_CREATE_HC)) {
118 /*
119 * we came here from the path where ilbd incorporates
120 * the configuration that is listed in SCF:
121 * i_ilbd_read_config->ilbd_walk_hc_pgs->
122 * ->ilbd_scf_instance_walk_pg->ilbd_create_hc
123 * We skip auditing in that case
124 */
125 logdebug("ilbd_audit_hc_event: skipping auditing");
126 return;
127 }
128
129 if (adt_start_session(&ah, NULL, 0) != 0) {
130 logerr("ilbd_audit_hc_event: adt_start_session failed");
131 exit(EXIT_FAILURE);
132 }
133 if (adt_set_from_ucred(ah, ucredp, ADT_NEW) != 0) {
134 (void) adt_end_session(ah);
135 logerr("ilbd_audit_rule_event: adt_set_from_ucred failed");
136 exit(EXIT_FAILURE);
137 }
138 if (cmd == ILBD_CREATE_HC)
139 flag = ADT_ilb_create_healthcheck;
140 else if (cmd == ILBD_DESTROY_HC)
141 flag = ADT_ilb_delete_healthcheck;
142
143 if ((event = adt_alloc_event(ah, flag)) == NULL) {
144 logerr("ilbd_audit_hc_event: adt_alloc_event failed");
145 exit(EXIT_FAILURE);
146 }
147 (void) memset((char *)event, 0, sizeof (adt_event_data_t));
148
149 switch (cmd) {
150 case ILBD_CREATE_HC:
151 event->adt_ilb_create_healthcheck.auth_used =
152 NET_ILB_CONFIG_AUTH;
153 event->adt_ilb_create_healthcheck.hc_test =
154 (char *)audit_hcinfo->hci_test;
155 event->adt_ilb_create_healthcheck.hc_name =
156 (char *)audit_hcinfo->hci_name;
157
158 /*
159 * If the value 0 is stored, the default values are
160 * set in the kernel. User land does not know about them
161 * So if the user does not specify them, audit record
162 * will show them as 0
163 */
164 event->adt_ilb_create_healthcheck.hc_timeout =
165 audit_hcinfo->hci_timeout;
166 event->adt_ilb_create_healthcheck.hc_count =
167 audit_hcinfo->hci_count;
168 event->adt_ilb_create_healthcheck.hc_interval =
169 audit_hcinfo->hci_interval;
170 break;
171 case ILBD_DESTROY_HC:
172 event->adt_ilb_delete_healthcheck.auth_used =
173 NET_ILB_CONFIG_AUTH;
174 event->adt_ilb_delete_healthcheck.hc_name =
175 (char *)audit_hcname;
176 break;
177 }
178
179 /* Fill in success/failure */
180 if (rc == ILB_STATUS_OK) {
181 if (adt_put_event(event, ADT_SUCCESS, ADT_SUCCESS) != 0) {
182 logerr("ilbd_audit_hc_event: adt_put_event failed");
183 exit(EXIT_FAILURE);
184 }
185 } else {
186 audit_error = ilberror2auditerror(rc);
187 if (adt_put_event(event, ADT_FAILURE, audit_error) != 0) {
188 logerr("ilbd_audit_hc_event: adt_put_event failed");
189 exit(EXIT_FAILURE);
190 }
191 }
192 adt_free_event(event);
193 (void) adt_end_session(ah);
194 }
195
196 /*
197 * Given the ilb_hc_info_t passed in (from the libilb), create a hc object
198 * in ilbd. The parameter ev_port is not used, refer to comments of
199 * ilbd_create_sg() in ilbd_sg.c
200 */
201 /* ARGSUSED */
202 ilb_status_t
ilbd_create_hc(const ilb_hc_info_t * hc_info,int ev_port,const struct passwd * ps,ucred_t * ucredp)203 ilbd_create_hc(const ilb_hc_info_t *hc_info, int ev_port,
204 const struct passwd *ps, ucred_t *ucredp)
205 {
206 ilbd_hc_t *hc;
207 ilb_status_t ret = ILB_STATUS_OK;
208
209 /*
210 * ps == NULL is from the daemon when it starts and load configuration
211 * ps != NULL is from client.
212 */
213 if (ps != NULL) {
214 ret = ilbd_check_client_config_auth(ps);
215 if (ret != ILB_STATUS_OK) {
216 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
217 ret, ucredp);
218 return (ret);
219 }
220 }
221
222 if (hc_info->hci_name[0] == '\0') {
223 logdebug("ilbd_create_hc: missing healthcheck info");
224 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
225 ILB_STATUS_ENOHCINFO, ucredp);
226 return (ILB_STATUS_ENOHCINFO);
227 }
228
229 hc = ilbd_get_hc(hc_info->hci_name);
230 if (hc != NULL) {
231 logdebug("ilbd_create_hc: healthcheck name %s already"
232 " exists", hc_info->hci_name);
233 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
234 ILB_STATUS_EEXIST, ucredp);
235 return (ILB_STATUS_EEXIST);
236 }
237
238 /*
239 * Sanity check on user supplied probe. The given path name
240 * must be a full path name (starts with '/') and is
241 * executable.
242 */
243 if (strcasecmp(hc_info->hci_test, ILB_HC_STR_TCP) != 0 &&
244 strcasecmp(hc_info->hci_test, ILB_HC_STR_UDP) != 0 &&
245 strcasecmp(hc_info->hci_test, ILB_HC_STR_PING) != 0 &&
246 (hc_info->hci_test[0] != '/' ||
247 access(hc_info->hci_test, X_OK) == -1)) {
248 if (errno == ENOENT) {
249 logdebug("ilbd_create_hc: user script %s doesn't "
250 "exist", hc_info->hci_test);
251 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
252 ILB_STATUS_ENOENT, ucredp);
253 return (ILB_STATUS_ENOENT);
254 } else {
255 logdebug("ilbd_create_hc: user script %s is "
256 "invalid", hc_info->hci_test);
257 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
258 ILB_STATUS_EINVAL, ucredp);
259 return (ILB_STATUS_EINVAL);
260 }
261 }
262
263 /* Create and add the hc object */
264 hc = calloc(1, sizeof (ilbd_hc_t));
265 if (hc == NULL) {
266 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
267 ILB_STATUS_ENOMEM, ucredp);
268 return (ILB_STATUS_ENOMEM);
269 }
270 (void) memcpy(&hc->ihc_info, hc_info, sizeof (ilb_hc_info_t));
271 if (strcasecmp(hc->ihc_test, ILB_HC_STR_TCP) == 0)
272 hc->ihc_test_type = ILBD_HC_TCP;
273 else if (strcasecmp(hc->ihc_test, ILB_HC_STR_UDP) == 0)
274 hc->ihc_test_type = ILBD_HC_UDP;
275 else if (strcasecmp(hc->ihc_test, ILB_HC_STR_PING) == 0)
276 hc->ihc_test_type = ILBD_HC_PING;
277 else
278 hc->ihc_test_type = ILBD_HC_USER;
279 list_create(&hc->ihc_rules, sizeof (ilbd_hc_rule_t),
280 offsetof(ilbd_hc_rule_t, hcr_link));
281
282 /* Update SCF */
283 if (ps != NULL) {
284 if ((ret = ilbd_create_pg(ILBD_SCF_HC, (void *)hc)) !=
285 ILB_STATUS_OK) {
286 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
287 ret, ucredp);
288 list_destroy(&hc->ihc_rules);
289 free(hc);
290 return (ret);
291 }
292 }
293
294 /* Everything is fine, now add it to the global list. */
295 list_insert_tail(&ilbd_hc_list, hc);
296 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, ret, ucredp);
297 return (ret);
298 }
299
300 /*
301 * Given a name of a hc object, destroy it.
302 */
303 ilb_status_t
ilbd_destroy_hc(const char * hc_name,const struct passwd * ps,ucred_t * ucredp)304 ilbd_destroy_hc(const char *hc_name, const struct passwd *ps,
305 ucred_t *ucredp)
306 {
307 ilb_status_t ret;
308 ilbd_hc_t *hc;
309
310 /*
311 * No need to check ps == NULL, daemon won't call any destroy func
312 * at start up.
313 */
314 ret = ilbd_check_client_config_auth(ps);
315 if (ret != ILB_STATUS_OK) {
316 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC,
317 ret, ucredp);
318 return (ret);
319 }
320
321 hc = ilbd_get_hc(hc_name);
322 if (hc == NULL) {
323 logdebug("ilbd_destroy_hc: healthcheck %s does not exist",
324 hc_name);
325 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC,
326 ILB_STATUS_ENOENT, ucredp);
327 return (ILB_STATUS_ENOENT);
328 }
329
330 /* If hc is in use, cannot delete it */
331 if (hc->ihc_rule_cnt > 0) {
332 logdebug("ilbd_destroy_hc: healthcheck %s is associated"
333 " with a rule - cannot remove", hc_name);
334 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC,
335 ILB_STATUS_INUSE, ucredp);
336 return (ILB_STATUS_INUSE);
337 }
338
339 if ((ret = ilbd_destroy_pg(ILBD_SCF_HC, hc_name)) !=
340 ILB_STATUS_OK) {
341 logdebug("ilbd_destroy_hc: cannot destroy healthcheck %s "
342 "property group", hc_name);
343 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC,
344 ret, ucredp);
345 return (ret);
346 }
347
348 list_remove(&ilbd_hc_list, hc);
349 list_destroy(&hc->ihc_rules);
350 free(hc);
351 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, ret, ucredp);
352 return (ret);
353 }
354
355 /*
356 * Given a hc object name, return its information. Used by libilb to
357 * get hc info.
358 */
359 ilb_status_t
ilbd_get_hc_info(const char * hc_name,uint32_t * rbuf,size_t * rbufsz)360 ilbd_get_hc_info(const char *hc_name, uint32_t *rbuf, size_t *rbufsz)
361 {
362 ilbd_hc_t *hc;
363 ilb_hc_info_t *hc_info;
364 ilb_comm_t *ic = (ilb_comm_t *)rbuf;
365
366 hc = ilbd_get_hc(hc_name);
367 if (hc == NULL) {
368 logdebug("%s: healthcheck %s does not exist", __func__,
369 hc_name);
370 return (ILB_STATUS_ENOENT);
371 }
372 ilbd_reply_ok(rbuf, rbufsz);
373 hc_info = (ilb_hc_info_t *)&ic->ic_data;
374
375 (void) strlcpy(hc_info->hci_name, hc->ihc_name, sizeof (hc->ihc_name));
376 (void) strlcpy(hc_info->hci_test, hc->ihc_test, sizeof (hc->ihc_test));
377 hc_info->hci_timeout = hc->ihc_timeout;
378 hc_info->hci_count = hc->ihc_count;
379 hc_info->hci_interval = hc->ihc_interval;
380 hc_info->hci_def_ping = hc->ihc_def_ping;
381
382 *rbufsz += sizeof (ilb_hc_info_t);
383
384 return (ILB_STATUS_OK);
385 }
386
387 static void
ilbd_hc_copy_srvs(uint32_t * rbuf,size_t * rbufsz,ilbd_hc_rule_t * hc_rule,const char * rulename)388 ilbd_hc_copy_srvs(uint32_t *rbuf, size_t *rbufsz, ilbd_hc_rule_t *hc_rule,
389 const char *rulename)
390 {
391 ilbd_hc_srv_t *tmp_srv;
392 ilb_hc_srv_t *dst_srv;
393 ilb_hc_rule_srv_t *srvs;
394 size_t tmp_rbufsz;
395 int i;
396
397 tmp_rbufsz = *rbufsz;
398 /* Set up the reply buffer. rbufsz will be set to the new size. */
399 ilbd_reply_ok(rbuf, rbufsz);
400
401 /* Calculate how much space is left for holding server info. */
402 *rbufsz += sizeof (ilb_hc_rule_srv_t);
403 tmp_rbufsz -= *rbufsz;
404
405 srvs = (ilb_hc_rule_srv_t *)&((ilb_comm_t *)rbuf)->ic_data;
406
407 tmp_srv = list_head(&hc_rule->hcr_servers);
408 for (i = 0; tmp_srv != NULL && tmp_rbufsz >= sizeof (*dst_srv); i++) {
409 dst_srv = &srvs->rs_srvs[i];
410
411 (void) strlcpy(dst_srv->hcs_rule_name, rulename, ILB_NAMESZ);
412 (void) strlcpy(dst_srv->hcs_ID, tmp_srv->shc_sg_srv->sgs_srvID,
413 ILB_NAMESZ);
414 (void) strlcpy(dst_srv->hcs_hc_name,
415 tmp_srv->shc_hc->ihc_name, ILB_NAMESZ);
416 dst_srv->hcs_IP = tmp_srv->shc_sg_srv->sgs_addr;
417 dst_srv->hcs_fail_cnt = tmp_srv->shc_fail_cnt;
418 dst_srv->hcs_status = tmp_srv->shc_status;
419 dst_srv->hcs_rtt = tmp_srv->shc_rtt;
420 dst_srv->hcs_lasttime = tmp_srv->shc_lasttime;
421 dst_srv->hcs_nexttime = tmp_srv->shc_nexttime;
422
423 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv);
424 tmp_rbufsz -= sizeof (*dst_srv);
425 }
426 srvs->rs_num_srvs = i;
427 *rbufsz += i * sizeof (*dst_srv);
428 }
429
430 /*
431 * Given a rule name, return the hc status of its servers.
432 */
433 ilb_status_t
ilbd_get_hc_srvs(const char * rulename,uint32_t * rbuf,size_t * rbufsz)434 ilbd_get_hc_srvs(const char *rulename, uint32_t *rbuf, size_t *rbufsz)
435 {
436 ilbd_hc_t *hc;
437 ilbd_hc_rule_t *hc_rule;
438
439 for (hc = list_head(&ilbd_hc_list); hc != NULL;
440 hc = list_next(&ilbd_hc_list, hc)) {
441 for (hc_rule = list_head(&hc->ihc_rules); hc_rule != NULL;
442 hc_rule = list_next(&hc->ihc_rules, hc_rule)) {
443 if (strcasecmp(hc_rule->hcr_rule->irl_name,
444 rulename) != 0) {
445 continue;
446 }
447 ilbd_hc_copy_srvs(rbuf, rbufsz, hc_rule, rulename);
448 return (ILB_STATUS_OK);
449 }
450 }
451 return (ILB_STATUS_RULE_NO_HC);
452 }
453
454 /*
455 * Initialize the hc timer and associate the notification of timeout to
456 * the given event port.
457 */
458 void
ilbd_hc_timer_init(int ev_port,ilbd_timer_event_obj_t * ev_obj)459 ilbd_hc_timer_init(int ev_port, ilbd_timer_event_obj_t *ev_obj)
460 {
461 struct sigevent sigev;
462 port_notify_t notify;
463
464 if ((ilbd_hc_timer_q = iu_tq_create()) == NULL) {
465 logerr("%s: cannot create hc timer queue", __func__);
466 exit(EXIT_FAILURE);
467 }
468 hc_timer_restarted = B_FALSE;
469
470 ev_obj->ev = ILBD_EVENT_TIMER;
471 ev_obj->timerid = -1;
472
473 notify.portnfy_port = ev_port;
474 notify.portnfy_user = ev_obj;
475 sigev.sigev_notify = SIGEV_PORT;
476 sigev.sigev_value.sival_ptr = ¬ify;
477 if (timer_create(CLOCK_REALTIME, &sigev, &ev_obj->timerid) == -1) {
478 logerr("%s: cannot create timer", __func__);
479 exit(EXIT_FAILURE);
480 }
481 }
482
483 /*
484 * HC timeout handler.
485 */
486 void
ilbd_hc_timeout(void)487 ilbd_hc_timeout(void)
488 {
489 (void) iu_expire_timers(ilbd_hc_timer_q);
490 hc_timer_restarted = B_TRUE;
491 }
492
493 /*
494 * Set up the timer to fire at the earliest timeout.
495 */
496 void
ilbd_hc_timer_update(ilbd_timer_event_obj_t * ev_obj)497 ilbd_hc_timer_update(ilbd_timer_event_obj_t *ev_obj)
498 {
499 itimerspec_t itimeout;
500 int timeout;
501
502 /*
503 * There is no change on the timer list, so no need to set up the
504 * timer again.
505 */
506 if (!hc_timer_restarted)
507 return;
508
509 restart:
510 if ((timeout = iu_earliest_timer(ilbd_hc_timer_q)) == INFTIM) {
511 hc_timer_restarted = B_FALSE;
512 return;
513 } else if (timeout == 0) {
514 /*
515 * Handle the timeout immediately. After that (clearing all
516 * the expired timers), check to see if there are still
517 * timers running. If yes, start them.
518 */
519 (void) iu_expire_timers(ilbd_hc_timer_q);
520 goto restart;
521 }
522
523 itimeout.it_value.tv_sec = timeout / MILLISEC + 1;
524 itimeout.it_value.tv_nsec = 0;
525 itimeout.it_interval.tv_sec = 0;
526 itimeout.it_interval.tv_nsec = 0;
527
528 /*
529 * Failure to set a timeout is "OK" since hopefully there will be
530 * other events and timer_settime() will be called again. So
531 * we will only miss some timeouts. But in the worst case, no event
532 * will happen and ilbd will get stuck...
533 */
534 if (timer_settime(ev_obj->timerid, 0, &itimeout, NULL) == -1)
535 logerr("%s: cannot set timer", __func__);
536 hc_timer_restarted = B_FALSE;
537 }
538
539 /*
540 * Kill the probe process of a server.
541 */
542 static void
ilbd_hc_kill_probe(ilbd_hc_srv_t * srv)543 ilbd_hc_kill_probe(ilbd_hc_srv_t *srv)
544 {
545 /*
546 * First dissociate the fd from the event port. It should not
547 * fail.
548 */
549 if (port_dissociate(srv->shc_ev_port, PORT_SOURCE_FD,
550 srv->shc_child_fd) != 0) {
551 logdebug("%s: port_dissociate: %s", __func__, strerror(errno));
552 }
553 (void) close(srv->shc_child_fd);
554 free(srv->shc_ev);
555 srv->shc_ev = NULL;
556
557 /* Then kill the probe process. */
558 if (kill(srv->shc_child_pid, SIGKILL) != 0) {
559 logerr("%s: rule %s server %s: %s", __func__,
560 srv->shc_hc_rule->hcr_rule->irl_name,
561 srv->shc_sg_srv->sgs_srvID, strerror(errno));
562 }
563 /* Should not fail... */
564 if (waitpid(srv->shc_child_pid, NULL, 0) != srv->shc_child_pid) {
565 logdebug("%s: waitpid: rule %s server %s", __func__,
566 srv->shc_hc_rule->hcr_rule->irl_name,
567 srv->shc_sg_srv->sgs_srvID);
568 }
569 srv->shc_child_pid = 0;
570 }
571
572 /*
573 * Disable the server, either because the server is dead or because a timer
574 * cannot be started for this server. Note that this only affects the
575 * transient configuration, meaning only in memory. The persistent
576 * configuration is not affected.
577 */
578 static void
ilbd_mark_server_disabled(ilbd_hc_srv_t * srv)579 ilbd_mark_server_disabled(ilbd_hc_srv_t *srv)
580 {
581 srv->shc_status = ILB_HCS_DISABLED;
582
583 /* Disable the server in kernel. */
584 if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr,
585 srv->shc_hc_rule->hcr_rule->irl_name,
586 stat_declare_srv_dead) != ILB_STATUS_OK) {
587 logerr("%s: cannot disable server in kernel: rule %s "
588 "server %s", __func__,
589 srv->shc_hc_rule->hcr_rule->irl_name,
590 srv->shc_sg_srv->sgs_srvID);
591 }
592 }
593
594 /*
595 * A probe fails, set the state of the server.
596 */
597 static void
ilbd_set_fail_state(ilbd_hc_srv_t * srv)598 ilbd_set_fail_state(ilbd_hc_srv_t *srv)
599 {
600 if (++srv->shc_fail_cnt < srv->shc_hc->ihc_count) {
601 /* Probe again */
602 ilbd_hc_probe_timer(ilbd_hc_timer_q, srv);
603 return;
604 }
605
606 logdebug("%s: rule %s server %s fails %u", __func__,
607 srv->shc_hc_rule->hcr_rule->irl_name, srv->shc_sg_srv->sgs_srvID,
608 srv->shc_fail_cnt);
609
610 /*
611 * If this is a ping test, mark the server as
612 * unreachable instead of dead.
613 */
614 if (srv->shc_hc->ihc_test_type == ILBD_HC_PING ||
615 srv->shc_state == ilbd_hc_def_pinging) {
616 srv->shc_status = ILB_HCS_UNREACH;
617 } else {
618 srv->shc_status = ILB_HCS_DEAD;
619 }
620
621 /* Disable the server in kernel. */
622 if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr,
623 srv->shc_hc_rule->hcr_rule->irl_name, stat_declare_srv_dead) !=
624 ILB_STATUS_OK) {
625 logerr("%s: cannot disable server in kernel: rule %s "
626 "server %s", __func__,
627 srv->shc_hc_rule->hcr_rule->irl_name,
628 srv->shc_sg_srv->sgs_srvID);
629 }
630
631 /* Still keep probing in case the server is alive again. */
632 if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) {
633 /* Only thing to do is to disable the server... */
634 logerr("%s: cannot restart timer: rule %s server %s", __func__,
635 srv->shc_hc_rule->hcr_rule->irl_name,
636 srv->shc_sg_srv->sgs_srvID);
637 srv->shc_status = ILB_HCS_DISABLED;
638 }
639 }
640
641 /*
642 * A probe process has not returned for the ihc_timeout period, we should
643 * kill it. This function is the handler of this.
644 */
645 /* ARGSUSED */
646 static void
ilbd_hc_kill_timer(iu_tq_t * tq,void * arg)647 ilbd_hc_kill_timer(iu_tq_t *tq, void *arg)
648 {
649 ilbd_hc_srv_t *srv = (ilbd_hc_srv_t *)arg;
650
651 ilbd_hc_kill_probe(srv);
652 ilbd_set_fail_state(srv);
653 }
654
655 /*
656 * Probe timeout handler. Send out the appropriate probe.
657 */
658 /* ARGSUSED */
659 static void
ilbd_hc_probe_timer(iu_tq_t * tq,void * arg)660 ilbd_hc_probe_timer(iu_tq_t *tq, void *arg)
661 {
662 ilbd_hc_srv_t *srv = (ilbd_hc_srv_t *)arg;
663
664 /*
665 * If starting the probe fails, just pretend that the timeout has
666 * extended.
667 */
668 if (!ilbd_run_probe(srv)) {
669 /*
670 * If we cannot restart the timer, the only thing we can do
671 * is to disable this server. Hopefully the sys admin will
672 * notice this and enable this server again later.
673 */
674 if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) {
675 logerr("%s: cannot restart timer: rule %s server %s, "
676 "disabling it", __func__,
677 srv->shc_hc_rule->hcr_rule->irl_name,
678 srv->shc_sg_srv->sgs_srvID);
679 ilbd_mark_server_disabled(srv);
680 }
681 return;
682 }
683
684 /*
685 * Similar to above, if kill timer cannot be started, disable the
686 * server.
687 */
688 if ((srv->shc_tid = iu_schedule_timer(ilbd_hc_timer_q,
689 srv->shc_hc->ihc_timeout, ilbd_hc_kill_timer, srv)) == -1) {
690 logerr("%s: cannot start kill timer: rule %s server %s, "
691 "disabling it", __func__,
692 srv->shc_hc_rule->hcr_rule->irl_name,
693 srv->shc_sg_srv->sgs_srvID);
694 ilbd_mark_server_disabled(srv);
695 }
696 hc_timer_restarted = B_TRUE;
697 }
698
699 /* Restart the periodic timer for a given server. */
700 static ilb_status_t
ilbd_hc_restart_timer(ilbd_hc_t * hc,ilbd_hc_srv_t * srv)701 ilbd_hc_restart_timer(ilbd_hc_t *hc, ilbd_hc_srv_t *srv)
702 {
703 int timeout;
704
705 /* Don't allow the timeout interval to be less than 1s */
706 timeout = MAX((hc->ihc_interval >> 1) + (gethrtime() %
707 (hc->ihc_interval + 1)), 1);
708
709 /*
710 * If the probe is actually a ping probe, there is no need to
711 * do default pinging. Just skip the step.
712 */
713 if (hc->ihc_def_ping && hc->ihc_test_type != ILBD_HC_PING)
714 srv->shc_state = ilbd_hc_def_pinging;
715 else
716 srv->shc_state = ilbd_hc_probing;
717 srv->shc_tid = iu_schedule_timer(ilbd_hc_timer_q, timeout,
718 ilbd_hc_probe_timer, srv);
719
720 if (srv->shc_tid == -1)
721 return (ILB_STATUS_TIMER);
722 srv->shc_lasttime = time(NULL);
723 srv->shc_nexttime = time(NULL) + timeout;
724
725 hc_timer_restarted = B_TRUE;
726 return (ILB_STATUS_OK);
727 }
728
729 /* Helper routine to associate a server with its hc object. */
730 static ilb_status_t
ilbd_hc_srv_add(ilbd_hc_t * hc,ilbd_hc_rule_t * hc_rule,const ilb_sg_srv_t * srv,int ev_port)731 ilbd_hc_srv_add(ilbd_hc_t *hc, ilbd_hc_rule_t *hc_rule,
732 const ilb_sg_srv_t *srv, int ev_port)
733 {
734 ilbd_hc_srv_t *new_srv;
735 ilb_status_t ret;
736
737 if ((new_srv = calloc(1, sizeof (ilbd_hc_srv_t))) == NULL)
738 return (ILB_STATUS_ENOMEM);
739 new_srv->shc_hc = hc;
740 new_srv->shc_hc_rule = hc_rule;
741 new_srv->shc_sg_srv = srv;
742 new_srv->shc_ev_port = ev_port;
743 new_srv->shc_tid = -1;
744 new_srv->shc_nexttime = time(NULL);
745 new_srv->shc_lasttime = new_srv->shc_nexttime;
746
747 if ((hc_rule->hcr_rule->irl_flags & ILB_FLAGS_RULE_ENABLED) &&
748 ILB_IS_SRV_ENABLED(srv->sgs_flags)) {
749 new_srv->shc_status = ILB_HCS_UNINIT;
750 ret = ilbd_hc_restart_timer(hc, new_srv);
751 if (ret != ILB_STATUS_OK) {
752 free(new_srv);
753 return (ret);
754 }
755 } else {
756 new_srv->shc_status = ILB_HCS_DISABLED;
757 }
758
759 list_insert_tail(&hc_rule->hcr_servers, new_srv);
760 return (ILB_STATUS_OK);
761 }
762
763 /* Handy macro to cancel a server's timer. */
764 #define HC_CANCEL_TIMER(srv) \
765 { \
766 void *arg; \
767 int ret; \
768 if ((srv)->shc_tid != -1) { \
769 ret = iu_cancel_timer(ilbd_hc_timer_q, (srv)->shc_tid, &arg); \
770 (srv)->shc_tid = -1; \
771 assert(ret == 1); \
772 assert(arg == (srv)); \
773 } \
774 hc_timer_restarted = B_TRUE; \
775 }
776
777 /* Helper routine to dissociate a server from its hc object. */
778 static ilb_status_t
ilbd_hc_srv_rem(ilbd_hc_rule_t * hc_rule,const ilb_sg_srv_t * srv)779 ilbd_hc_srv_rem(ilbd_hc_rule_t *hc_rule, const ilb_sg_srv_t *srv)
780 {
781 ilbd_hc_srv_t *tmp_srv;
782
783 for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL;
784 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) {
785 if (tmp_srv->shc_sg_srv == srv) {
786 list_remove(&hc_rule->hcr_servers, tmp_srv);
787 HC_CANCEL_TIMER(tmp_srv);
788 if (tmp_srv->shc_child_pid != 0)
789 ilbd_hc_kill_probe(tmp_srv);
790 free(tmp_srv);
791 return (ILB_STATUS_OK);
792 }
793 }
794 return (ILB_STATUS_ENOENT);
795 }
796
797 /* Helper routine to dissociate all servers of a rule from its hc object. */
798 static void
ilbd_hc_srv_rem_all(ilbd_hc_rule_t * hc_rule)799 ilbd_hc_srv_rem_all(ilbd_hc_rule_t *hc_rule)
800 {
801 ilbd_hc_srv_t *srv;
802
803 while ((srv = list_remove_head(&hc_rule->hcr_servers)) != NULL) {
804 HC_CANCEL_TIMER(srv);
805 if (srv->shc_child_pid != 0)
806 ilbd_hc_kill_probe(srv);
807 free(srv);
808 }
809 }
810
811 /* Associate a rule with its hc object. */
812 ilb_status_t
ilbd_hc_associate_rule(const ilbd_rule_t * rule,int ev_port)813 ilbd_hc_associate_rule(const ilbd_rule_t *rule, int ev_port)
814 {
815 ilbd_hc_t *hc;
816 ilbd_hc_rule_t *hc_rule;
817 ilb_status_t ret;
818 ilbd_sg_t *sg;
819 ilbd_srv_t *ilbd_srv;
820
821 /* The rule is assumed to be initialized appropriately. */
822 if ((hc = ilbd_get_hc(rule->irl_hcname)) == NULL) {
823 logdebug("ilbd_hc_associate_rule: healthcheck %s does not "
824 "exist", rule->irl_hcname);
825 return (ILB_STATUS_ENOHCINFO);
826 }
827 if ((hc->ihc_test_type == ILBD_HC_TCP &&
828 rule->irl_proto != IPPROTO_TCP) ||
829 (hc->ihc_test_type == ILBD_HC_UDP &&
830 rule->irl_proto != IPPROTO_UDP)) {
831 return (ILB_STATUS_RULE_HC_MISMATCH);
832 }
833 if ((hc_rule = calloc(1, sizeof (ilbd_hc_rule_t))) == NULL) {
834 logdebug("ilbd_hc_associate_rule: out of memory");
835 return (ILB_STATUS_ENOMEM);
836 }
837
838 hc_rule->hcr_rule = rule;
839 list_create(&hc_rule->hcr_servers, sizeof (ilbd_hc_srv_t),
840 offsetof(ilbd_hc_srv_t, shc_srv_link));
841
842 /* Add all the servers. */
843 sg = rule->irl_sg;
844 for (ilbd_srv = list_head(&sg->isg_srvlist); ilbd_srv != NULL;
845 ilbd_srv = list_next(&sg->isg_srvlist, ilbd_srv)) {
846 if ((ret = ilbd_hc_srv_add(hc, hc_rule, &ilbd_srv->isv_srv,
847 ev_port)) != ILB_STATUS_OK) {
848 /* Remove all previously added servers */
849 ilbd_hc_srv_rem_all(hc_rule);
850 list_destroy(&hc_rule->hcr_servers);
851 free(hc_rule);
852 return (ret);
853 }
854 }
855 list_insert_tail(&hc->ihc_rules, hc_rule);
856 hc->ihc_rule_cnt++;
857
858 return (ILB_STATUS_OK);
859 }
860
861 /* Dissociate a rule from its hc object. */
862 ilb_status_t
ilbd_hc_dissociate_rule(const ilbd_rule_t * rule)863 ilbd_hc_dissociate_rule(const ilbd_rule_t *rule)
864 {
865 ilbd_hc_t *hc;
866 ilbd_hc_rule_t *hc_rule;
867
868 /* The rule is assumed to be initialized appropriately. */
869 if ((hc = ilbd_get_hc(rule->irl_hcname)) == NULL) {
870 logdebug("ilbd_hc_dissociate_rule: healthcheck %s does not "
871 "exist", rule->irl_hcname);
872 return (ILB_STATUS_ENOENT);
873 }
874 for (hc_rule = list_head(&hc->ihc_rules); hc_rule != NULL;
875 hc_rule = list_next(&hc->ihc_rules, hc_rule)) {
876 if (hc_rule->hcr_rule == rule)
877 break;
878 }
879 if (hc_rule == NULL) {
880 logdebug("ilbd_hc_dissociate_rule: rule %s is not associated "
881 "with healtcheck %s", rule->irl_hcname, hc->ihc_name);
882 return (ILB_STATUS_ENOENT);
883 }
884 ilbd_hc_srv_rem_all(hc_rule);
885 list_remove(&hc->ihc_rules, hc_rule);
886 hc->ihc_rule_cnt--;
887 list_destroy(&hc_rule->hcr_servers);
888 free(hc_rule);
889 return (ILB_STATUS_OK);
890 }
891
892 /*
893 * Given a hc object name and a rule, check to see if the rule is associated
894 * with the hc object. If it is, the hc object is returned in **hc and the
895 * ilbd_hc_rule_t is returned in **hc_rule.
896 */
897 static boolean_t
ilbd_hc_check_rule(const char * hc_name,const ilbd_rule_t * rule,ilbd_hc_t ** hc,ilbd_hc_rule_t ** hc_rule)898 ilbd_hc_check_rule(const char *hc_name, const ilbd_rule_t *rule,
899 ilbd_hc_t **hc, ilbd_hc_rule_t **hc_rule)
900 {
901 ilbd_hc_t *tmp_hc;
902 ilbd_hc_rule_t *tmp_hc_rule;
903
904 if ((tmp_hc = ilbd_get_hc(hc_name)) == NULL)
905 return (B_FALSE);
906 for (tmp_hc_rule = list_head(&tmp_hc->ihc_rules); tmp_hc_rule != NULL;
907 tmp_hc_rule = list_next(&tmp_hc->ihc_rules, tmp_hc_rule)) {
908 if (tmp_hc_rule->hcr_rule == rule) {
909 *hc = tmp_hc;
910 *hc_rule = tmp_hc_rule;
911 return (B_TRUE);
912 }
913 }
914 return (B_FALSE);
915 }
916
917 /* Associate a server with its hc object. */
918 ilb_status_t
ilbd_hc_add_server(const ilbd_rule_t * rule,const ilb_sg_srv_t * srv,int ev_port)919 ilbd_hc_add_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv,
920 int ev_port)
921 {
922 ilbd_hc_t *hc;
923 ilbd_hc_rule_t *hc_rule;
924
925 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule))
926 return (ILB_STATUS_ENOENT);
927 return (ilbd_hc_srv_add(hc, hc_rule, srv, ev_port));
928 }
929
930 /* Dissociate a server from its hc object. */
931 ilb_status_t
ilbd_hc_del_server(const ilbd_rule_t * rule,const ilb_sg_srv_t * srv)932 ilbd_hc_del_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv)
933 {
934 ilbd_hc_t *hc;
935 ilbd_hc_rule_t *hc_rule;
936
937 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule))
938 return (ILB_STATUS_ENOENT);
939 return (ilbd_hc_srv_rem(hc_rule, srv));
940 }
941
942 /* Helper routine to enable/disable a server's hc probe. */
943 static ilb_status_t
ilbd_hc_toggle_server(const ilbd_rule_t * rule,const ilb_sg_srv_t * srv,boolean_t enable)944 ilbd_hc_toggle_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv,
945 boolean_t enable)
946 {
947 ilbd_hc_t *hc;
948 ilbd_hc_rule_t *hc_rule;
949 ilbd_hc_srv_t *tmp_srv;
950 ilb_status_t ret;
951
952 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule))
953 return (ILB_STATUS_ENOENT);
954 for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL;
955 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) {
956 if (tmp_srv->shc_sg_srv != srv) {
957 continue;
958 }
959 if (enable) {
960 if (tmp_srv->shc_status == ILB_HCS_DISABLED) {
961 ret = ilbd_hc_restart_timer(hc, tmp_srv);
962 if (ret != ILB_STATUS_OK) {
963 logerr("%s: cannot start timers for "
964 "rule %s server %s", __func__,
965 rule->irl_name,
966 tmp_srv->shc_sg_srv->sgs_srvID);
967 return (ret);
968 }
969 /* Start from fresh... */
970 tmp_srv->shc_status = ILB_HCS_UNINIT;
971 tmp_srv->shc_rtt = 0;
972 tmp_srv->shc_fail_cnt = 0;
973 }
974 } else {
975 if (tmp_srv->shc_status != ILB_HCS_DISABLED) {
976 tmp_srv->shc_status = ILB_HCS_DISABLED;
977 HC_CANCEL_TIMER(tmp_srv);
978 if (tmp_srv->shc_child_pid != 0)
979 ilbd_hc_kill_probe(tmp_srv);
980 }
981 }
982 return (ILB_STATUS_OK);
983 }
984 return (ILB_STATUS_ENOENT);
985 }
986
987 ilb_status_t
ilbd_hc_enable_server(const ilbd_rule_t * rule,const ilb_sg_srv_t * srv)988 ilbd_hc_enable_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv)
989 {
990 return (ilbd_hc_toggle_server(rule, srv, B_TRUE));
991 }
992
993 ilb_status_t
ilbd_hc_disable_server(const ilbd_rule_t * rule,const ilb_sg_srv_t * srv)994 ilbd_hc_disable_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv)
995 {
996 return (ilbd_hc_toggle_server(rule, srv, B_FALSE));
997 }
998
999 /*
1000 * Helper routine to enable/disable a rule's hc probe (including all its
1001 * servers).
1002 */
1003 static ilb_status_t
ilbd_hc_toggle_rule(const ilbd_rule_t * rule,boolean_t enable)1004 ilbd_hc_toggle_rule(const ilbd_rule_t *rule, boolean_t enable)
1005 {
1006 ilbd_hc_t *hc;
1007 ilbd_hc_rule_t *hc_rule;
1008 ilbd_hc_srv_t *tmp_srv;
1009 int ret;
1010
1011 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule))
1012 return (ILB_STATUS_ENOENT);
1013
1014 for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL;
1015 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) {
1016 if (enable) {
1017 /*
1018 * If the server is disabled in the rule, do not
1019 * restart its timer.
1020 */
1021 if (tmp_srv->shc_status == ILB_HCS_DISABLED &&
1022 ILB_IS_SRV_ENABLED(
1023 tmp_srv->shc_sg_srv->sgs_flags)) {
1024 ret = ilbd_hc_restart_timer(hc, tmp_srv);
1025 if (ret != ILB_STATUS_OK) {
1026 logerr("%s: cannot start timers for "
1027 "rule %s server %s", __func__,
1028 rule->irl_name,
1029 tmp_srv->shc_sg_srv->sgs_srvID);
1030 goto rollback;
1031 } else {
1032 /* Start from fresh... */
1033 tmp_srv->shc_status = ILB_HCS_UNINIT;
1034 tmp_srv->shc_rtt = 0;
1035 tmp_srv->shc_fail_cnt = 0;
1036 }
1037 }
1038 } else {
1039 if (tmp_srv->shc_status != ILB_HCS_DISABLED) {
1040 HC_CANCEL_TIMER(tmp_srv);
1041 tmp_srv->shc_status = ILB_HCS_DISABLED;
1042 if (tmp_srv->shc_child_pid != 0)
1043 ilbd_hc_kill_probe(tmp_srv);
1044 }
1045 }
1046 }
1047 return (ILB_STATUS_OK);
1048 rollback:
1049 enable = !enable;
1050 for (tmp_srv = list_prev(&hc_rule->hcr_servers, tmp_srv);
1051 tmp_srv != NULL;
1052 tmp_srv = list_prev(&hc_rule->hcr_servers, tmp_srv)) {
1053 if (enable) {
1054 if (tmp_srv->shc_status == ILB_HCS_DISABLED &&
1055 ILB_IS_SRV_ENABLED(
1056 tmp_srv->shc_sg_srv->sgs_flags)) {
1057 (void) ilbd_hc_restart_timer(hc, tmp_srv);
1058 tmp_srv->shc_status = ILB_HCS_UNINIT;
1059 tmp_srv->shc_rtt = 0;
1060 tmp_srv->shc_fail_cnt = 0;
1061 }
1062 } else {
1063 if (tmp_srv->shc_status != ILB_HCS_DISABLED) {
1064 HC_CANCEL_TIMER(tmp_srv);
1065 tmp_srv->shc_status = ILB_HCS_DISABLED;
1066 if (tmp_srv->shc_child_pid != 0)
1067 ilbd_hc_kill_probe(tmp_srv);
1068 }
1069 }
1070 }
1071 return (ret);
1072 }
1073
1074 ilb_status_t
ilbd_hc_enable_rule(const ilbd_rule_t * rule)1075 ilbd_hc_enable_rule(const ilbd_rule_t *rule)
1076 {
1077 return (ilbd_hc_toggle_rule(rule, B_TRUE));
1078 }
1079
1080 ilb_status_t
ilbd_hc_disable_rule(const ilbd_rule_t * rule)1081 ilbd_hc_disable_rule(const ilbd_rule_t *rule)
1082 {
1083 return (ilbd_hc_toggle_rule(rule, B_FALSE));
1084 }
1085
1086 static const char *
topo_2_str(ilb_topo_t topo)1087 topo_2_str(ilb_topo_t topo)
1088 {
1089 switch (topo) {
1090 case ILB_TOPO_DSR:
1091 return ("DSR");
1092 case ILB_TOPO_NAT:
1093 return ("NAT");
1094 case ILB_TOPO_HALF_NAT:
1095 return ("HALF_NAT");
1096 default:
1097 /* Should not happen. */
1098 logerr("%s: unknown topology", __func__);
1099 break;
1100 }
1101 return ("");
1102 }
1103
1104 /*
1105 * Create the argument list to be passed to a hc probe command.
1106 * The passed in argv is assumed to have HC_PROBE_ARGC elements.
1107 */
1108 static boolean_t
create_argv(ilbd_hc_srv_t * srv,char * argv[])1109 create_argv(ilbd_hc_srv_t *srv, char *argv[])
1110 {
1111 char buf[INET6_ADDRSTRLEN];
1112 ilbd_rule_t const *rule;
1113 ilb_sg_srv_t const *sg_srv;
1114 struct in_addr v4_addr;
1115 in_port_t port;
1116 int i;
1117
1118 rule = srv->shc_hc_rule->hcr_rule;
1119 sg_srv = srv->shc_sg_srv;
1120
1121 if (srv->shc_state == ilbd_hc_def_pinging) {
1122 if ((argv[0] = strdup(ILB_PROBE_PING)) == NULL)
1123 return (B_FALSE);
1124 } else {
1125 switch (srv->shc_hc->ihc_test_type) {
1126 case ILBD_HC_USER:
1127 if ((argv[0] = strdup(srv->shc_hc->ihc_test)) == NULL)
1128 return (B_FALSE);
1129 break;
1130 case ILBD_HC_TCP:
1131 case ILBD_HC_UDP:
1132 if ((argv[0] = strdup(ILB_PROBE_PROTO)) ==
1133 NULL) {
1134 return (B_FALSE);
1135 }
1136 break;
1137 case ILBD_HC_PING:
1138 if ((argv[0] = strdup(ILB_PROBE_PING)) == NULL) {
1139 return (B_FALSE);
1140 }
1141 break;
1142 }
1143 }
1144
1145 /*
1146 * argv[1] is the VIP.
1147 *
1148 * Right now, the VIP and the backend server addresses should be
1149 * in the same IP address family. Here we don't do that in case
1150 * this assumption is changed in future.
1151 */
1152 if (IN6_IS_ADDR_V4MAPPED(&rule->irl_vip)) {
1153 IN6_V4MAPPED_TO_INADDR(&rule->irl_vip, &v4_addr);
1154 if (inet_ntop(AF_INET, &v4_addr, buf, sizeof (buf)) == NULL)
1155 goto cleanup;
1156 } else {
1157 if (inet_ntop(AF_INET6, &rule->irl_vip, buf,
1158 sizeof (buf)) == NULL) {
1159 goto cleanup;
1160 }
1161 }
1162 if ((argv[1] = strdup(buf)) == NULL)
1163 goto cleanup;
1164
1165 /*
1166 * argv[2] is the backend server address.
1167 */
1168 if (IN6_IS_ADDR_V4MAPPED(&sg_srv->sgs_addr)) {
1169 IN6_V4MAPPED_TO_INADDR(&sg_srv->sgs_addr, &v4_addr);
1170 if (inet_ntop(AF_INET, &v4_addr, buf, sizeof (buf)) == NULL)
1171 goto cleanup;
1172 } else {
1173 if (inet_ntop(AF_INET6, &sg_srv->sgs_addr, buf,
1174 sizeof (buf)) == NULL) {
1175 goto cleanup;
1176 }
1177 }
1178 if ((argv[2] = strdup(buf)) == NULL)
1179 goto cleanup;
1180
1181 /*
1182 * argv[3] is the transport protocol used in the rule.
1183 */
1184 switch (rule->irl_proto) {
1185 case IPPROTO_TCP:
1186 argv[3] = strdup("TCP");
1187 break;
1188 case IPPROTO_UDP:
1189 argv[3] = strdup("UDP");
1190 break;
1191 default:
1192 logerr("%s: unknown protocol", __func__);
1193 goto cleanup;
1194 }
1195 if (argv[3] == NULL)
1196 goto cleanup;
1197
1198 /*
1199 * argv[4] is the load balance mode, DSR, NAT, HALF-NAT.
1200 */
1201 if ((argv[4] = strdup(topo_2_str(rule->irl_topo))) == NULL)
1202 goto cleanup;
1203
1204 /*
1205 * argv[5] is the port range. Right now, there should only be 1 port.
1206 */
1207 switch (rule->irl_hcpflag) {
1208 case ILB_HCI_PROBE_FIX:
1209 port = ntohs(rule->irl_hcport);
1210 break;
1211 case ILB_HCI_PROBE_ANY: {
1212 in_port_t min, max;
1213
1214 if (ntohs(sg_srv->sgs_minport) == 0) {
1215 min = ntohs(rule->irl_minport);
1216 max = ntohs(rule->irl_maxport);
1217 } else {
1218 min = ntohs(sg_srv->sgs_minport);
1219 max = ntohs(sg_srv->sgs_maxport);
1220 }
1221 if (max > min)
1222 port = min + gethrtime() % (max - min + 1);
1223 else
1224 port = min;
1225 break;
1226 }
1227 default:
1228 logerr("%s: unknown HC flag", __func__);
1229 goto cleanup;
1230 }
1231 (void) sprintf(buf, "%d", port);
1232 if ((argv[5] = strdup(buf)) == NULL)
1233 goto cleanup;
1234
1235 /*
1236 * argv[6] is the probe timeout.
1237 */
1238 (void) sprintf(buf, "%d", srv->shc_hc->ihc_timeout);
1239 if ((argv[6] = strdup(buf)) == NULL)
1240 goto cleanup;
1241
1242 argv[7] = NULL;
1243 return (B_TRUE);
1244
1245 cleanup:
1246 for (i = 0; i < HC_PROBE_ARGC; i++) {
1247 if (argv[i] != NULL)
1248 free(argv[i]);
1249 }
1250 return (B_FALSE);
1251 }
1252
1253 static void
destroy_argv(char * argv[])1254 destroy_argv(char *argv[])
1255 {
1256 int i;
1257
1258 for (i = 0; argv[i] != NULL; i++)
1259 free(argv[i]);
1260 }
1261
1262 /* Spawn a process to run the hc probe on the given server. */
1263 static boolean_t
ilbd_run_probe(ilbd_hc_srv_t * srv)1264 ilbd_run_probe(ilbd_hc_srv_t *srv)
1265 {
1266 posix_spawn_file_actions_t fd_actions;
1267 boolean_t init_fd_actions = B_FALSE;
1268 posix_spawnattr_t attr;
1269 boolean_t init_attr = B_FALSE;
1270 sigset_t child_sigset;
1271 int fds[2];
1272 int fdflags;
1273 pid_t pid;
1274 char *child_argv[HC_PROBE_ARGC];
1275 ilbd_hc_probe_event_t *probe_ev;
1276 char *probe_name;
1277
1278 bzero(child_argv, HC_PROBE_ARGC * sizeof (char *));
1279 if ((probe_ev = calloc(1, sizeof (*probe_ev))) == NULL) {
1280 logdebug("ilbd_run_probe: calloc");
1281 return (B_FALSE);
1282 }
1283
1284 /* Set up a pipe to get output from probe command. */
1285 if (pipe(fds) < 0) {
1286 logdebug("ilbd_run_probe: cannot create pipe");
1287 free(probe_ev);
1288 return (B_FALSE);
1289 }
1290 /* Set our side of the pipe to be non-blocking */
1291 if ((fdflags = fcntl(fds[0], F_GETFL, 0)) == -1) {
1292 logdebug("ilbd_run_probe: fcntl(F_GETFL)");
1293 goto cleanup;
1294 }
1295 if (fcntl(fds[0], F_SETFL, fdflags | O_NONBLOCK) == -1) {
1296 logdebug("ilbd_run_probe: fcntl(F_SETFL)");
1297 goto cleanup;
1298 }
1299
1300 if (posix_spawn_file_actions_init(&fd_actions) != 0) {
1301 logdebug("ilbd_run_probe: posix_spawn_file_actions_init");
1302 goto cleanup;
1303 }
1304 init_fd_actions = B_TRUE;
1305 if (posix_spawnattr_init(&attr) != 0) {
1306 logdebug("ilbd_run_probe: posix_spawnattr_init");
1307 goto cleanup;
1308 }
1309 init_attr = B_TRUE;
1310 if (posix_spawn_file_actions_addclose(&fd_actions, fds[0]) != 0) {
1311 logdebug("ilbd_run_probe: posix_spawn_file_actions_addclose");
1312 goto cleanup;
1313 }
1314 if (posix_spawn_file_actions_adddup2(&fd_actions, fds[1],
1315 STDOUT_FILENO) != 0) {
1316 logdebug("ilbd_run_probe: posix_spawn_file_actions_dup2");
1317 goto cleanup;
1318 }
1319 if (posix_spawn_file_actions_addclose(&fd_actions, fds[1]) != 0) {
1320 logdebug("ilbd_run_probe: posix_spawn_file_actions_addclose");
1321 goto cleanup;
1322 }
1323
1324 /* Reset all signal handling of the child to default. */
1325 (void) sigfillset(&child_sigset);
1326 if (posix_spawnattr_setsigdefault(&attr, &child_sigset) != 0) {
1327 logdebug("ilbd_run_probe: posix_spawnattr_setsigdefault");
1328 goto cleanup;
1329 }
1330 /* Don't want SIGCHLD. */
1331 if (posix_spawnattr_setflags(&attr, POSIX_SPAWN_NOSIGCHLD_NP|
1332 POSIX_SPAWN_SETSIGDEF) != 0) {
1333 logdebug("ilbd_run_probe: posix_spawnattr_setflags");
1334 goto cleanup;
1335 }
1336
1337 if (!create_argv(srv, child_argv)) {
1338 logdebug("ilbd_run_probe: create_argv");
1339 goto cleanup;
1340 }
1341
1342 /*
1343 * If we are doing default pinging or not using a user supplied
1344 * probe, we should execute our standard supplied probe. The
1345 * supplied probe command handles all types of probes. And the
1346 * type used depends on argv[0], as filled in by create_argv().
1347 */
1348 if (srv->shc_state == ilbd_hc_def_pinging ||
1349 srv->shc_hc->ihc_test_type != ILBD_HC_USER) {
1350 probe_name = ILB_PROBE_PROTO;
1351 } else {
1352 probe_name = srv->shc_hc->ihc_test;
1353 }
1354 if (posix_spawn(&pid, probe_name, &fd_actions, &attr, child_argv,
1355 NULL) != 0) {
1356 logerr("%s: posix_spawn: %s for server %s: %s", __func__,
1357 srv->shc_hc->ihc_test, srv->shc_sg_srv->sgs_srvID,
1358 strerror(errno));
1359 goto cleanup;
1360 }
1361
1362 (void) close(fds[1]);
1363 srv->shc_child_pid = pid;
1364 srv->shc_child_fd = fds[0];
1365 srv->shc_ev = probe_ev;
1366
1367 probe_ev->ihp_ev = ILBD_EVENT_PROBE;
1368 probe_ev->ihp_srv = srv;
1369 probe_ev->ihp_pid = pid;
1370 if (port_associate(srv->shc_ev_port, PORT_SOURCE_FD, fds[0],
1371 POLLRDNORM, probe_ev) != 0) {
1372 /*
1373 * Need to kill the child. It will free the srv->shc_ev,
1374 * which is probe_ev. So set probe_ev to NULL.
1375 */
1376 ilbd_hc_kill_probe(srv);
1377 probe_ev = NULL;
1378 goto cleanup;
1379 }
1380
1381 destroy_argv(child_argv);
1382 (void) posix_spawn_file_actions_destroy(&fd_actions);
1383 (void) posix_spawnattr_destroy(&attr);
1384 return (B_TRUE);
1385
1386 cleanup:
1387 destroy_argv(child_argv);
1388 if (init_fd_actions == B_TRUE)
1389 (void) posix_spawn_file_actions_destroy(&fd_actions);
1390 if (init_attr == B_TRUE)
1391 (void) posix_spawnattr_destroy(&attr);
1392 (void) close(fds[0]);
1393 (void) close(fds[1]);
1394 if (probe_ev != NULL)
1395 free(probe_ev);
1396 return (B_FALSE);
1397 }
1398
1399 /*
1400 * Called by ild_hc_probe_return() to re-associate the fd to a child to
1401 * the event port.
1402 */
1403 static void
reassociate_port(int ev_port,int fd,ilbd_hc_probe_event_t * ev)1404 reassociate_port(int ev_port, int fd, ilbd_hc_probe_event_t *ev)
1405 {
1406 if (port_associate(ev_port, PORT_SOURCE_FD, fd,
1407 POLLRDNORM, ev) != 0) {
1408 /*
1409 * If we cannot reassociate with the port, the only
1410 * thing we can do now is to kill the child and
1411 * do a blocking wait here...
1412 */
1413 logdebug("%s: port_associate: %s", __func__, strerror(errno));
1414 if (kill(ev->ihp_pid, SIGKILL) != 0)
1415 logerr("%s: kill: %s", __func__, strerror(errno));
1416 if (waitpid(ev->ihp_pid, NULL, 0) != ev->ihp_pid)
1417 logdebug("%s: waitpid: %s", __func__, strerror(errno));
1418 free(ev);
1419 }
1420 }
1421
1422 /*
1423 * To handle a child probe process hanging up.
1424 */
1425 static void
ilbd_hc_child_hup(int ev_port,int fd,ilbd_hc_probe_event_t * ev)1426 ilbd_hc_child_hup(int ev_port, int fd, ilbd_hc_probe_event_t *ev)
1427 {
1428 ilbd_hc_srv_t *srv;
1429 pid_t ret_pid;
1430 int ret;
1431
1432 srv = ev->ihp_srv;
1433
1434 if (!ev->ihp_done) {
1435 /* ilbd does not care about this process anymore ... */
1436 ev->ihp_done = B_TRUE;
1437 srv->shc_ev = NULL;
1438 srv->shc_child_pid = 0;
1439 HC_CANCEL_TIMER(srv);
1440 ilbd_set_fail_state(srv);
1441 }
1442 ret_pid = waitpid(ev->ihp_pid, &ret, WNOHANG);
1443 switch (ret_pid) {
1444 case -1:
1445 logperror("ilbd_hc_child_hup: waitpid");
1446 /* FALLTHROUGH */
1447 case 0:
1448 /* The child has not completed the exit. Wait again. */
1449 reassociate_port(ev_port, fd, ev);
1450 break;
1451 default:
1452 /* Right now, we just ignore the exit status. */
1453 if (WIFEXITED(ret))
1454 ret = WEXITSTATUS(ret);
1455 (void) close(fd);
1456 free(ev);
1457 }
1458 }
1459
1460 /*
1461 * To read the output of a child probe process.
1462 */
1463 static void
ilbd_hc_child_data(int fd,ilbd_hc_probe_event_t * ev)1464 ilbd_hc_child_data(int fd, ilbd_hc_probe_event_t *ev)
1465 {
1466 ilbd_hc_srv_t *srv;
1467 char buf[HC_MAX_PROBE_OUTPUT];
1468 int ret;
1469 int64_t rtt;
1470
1471 srv = ev->ihp_srv;
1472
1473 bzero(buf, HC_MAX_PROBE_OUTPUT);
1474 ret = read(fd, buf, HC_MAX_PROBE_OUTPUT - 1);
1475 /* Should not happen since event port should have caught this. */
1476 assert(ret > 0);
1477
1478 /*
1479 * We expect the probe command to print out the RTT only. But
1480 * the command may misbehave and print out more than what we intend to
1481 * read in. So need to do this check below to "flush" out all the
1482 * output from the command.
1483 */
1484 if (!ev->ihp_done) {
1485 ev->ihp_done = B_TRUE;
1486 /* We don't need to know about this event anymore. */
1487 srv->shc_ev = NULL;
1488 srv->shc_child_pid = 0;
1489 HC_CANCEL_TIMER(srv);
1490 } else {
1491 return;
1492 }
1493
1494 rtt = strtoll(buf, NULL, 10);
1495
1496 /*
1497 * -1 means the server is dead or the probe somehow fails. Treat
1498 * them both as server is dead.
1499 */
1500 if (rtt == -1) {
1501 ilbd_set_fail_state(srv);
1502 return;
1503 } else if (rtt > 0) {
1504 /* If the returned RTT value is not valid, just ignore it. */
1505 if (rtt > 0 && rtt <= UINT_MAX) {
1506 /* Set rtt to be the simple smoothed average. */
1507 if (srv->shc_rtt == 0) {
1508 srv->shc_rtt = rtt;
1509 } else {
1510 srv->shc_rtt = 3 * ((srv)->shc_rtt >> 2) +
1511 (rtt >> 2);
1512 }
1513 }
1514
1515 }
1516
1517 switch (srv->shc_state) {
1518 case ilbd_hc_def_pinging:
1519 srv->shc_state = ilbd_hc_probing;
1520
1521 /* Ping is OK, now start the probe. */
1522 ilbd_hc_probe_timer(ilbd_hc_timer_q, srv);
1523 break;
1524 case ilbd_hc_probing:
1525 srv->shc_fail_cnt = 0;
1526
1527 /* Server is dead before, re-enable it. */
1528 if (srv->shc_status == ILB_HCS_UNREACH ||
1529 srv->shc_status == ILB_HCS_DEAD) {
1530 /*
1531 * If enabling the server in kernel fails now,
1532 * hopefully when the timer fires again later, the
1533 * enabling can be done.
1534 */
1535 if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr,
1536 srv->shc_hc_rule->hcr_rule->irl_name,
1537 stat_declare_srv_alive) != ILB_STATUS_OK) {
1538 logerr("%s: cannot enable server in kernel: "
1539 " rule %s server %s", __func__,
1540 srv->shc_hc_rule->hcr_rule->irl_name,
1541 srv->shc_sg_srv->sgs_srvID);
1542 } else {
1543 srv->shc_status = ILB_HCS_ALIVE;
1544 }
1545 } else {
1546 srv->shc_status = ILB_HCS_ALIVE;
1547 }
1548 if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) {
1549 logerr("%s: cannot restart timer: rule %s server %s",
1550 __func__, srv->shc_hc_rule->hcr_rule->irl_name,
1551 srv->shc_sg_srv->sgs_srvID);
1552 ilbd_mark_server_disabled(srv);
1553 }
1554 break;
1555 default:
1556 logdebug("%s: unknown state", __func__);
1557 break;
1558 }
1559 }
1560
1561 /*
1562 * Handle the return event of a child probe fd.
1563 */
1564 void
ilbd_hc_probe_return(int ev_port,int fd,int port_events,ilbd_hc_probe_event_t * ev)1565 ilbd_hc_probe_return(int ev_port, int fd, int port_events,
1566 ilbd_hc_probe_event_t *ev)
1567 {
1568 /*
1569 * Note that there can be more than one events delivered to us at
1570 * the same time. So we need to check them individually.
1571 */
1572 if (port_events & POLLRDNORM)
1573 ilbd_hc_child_data(fd, ev);
1574
1575 if (port_events & (POLLHUP|POLLERR)) {
1576 ilbd_hc_child_hup(ev_port, fd, ev);
1577 return;
1578 }
1579
1580 /*
1581 * Re-associate the fd with the port so that when the child
1582 * exits, we can reap the status.
1583 */
1584 reassociate_port(ev_port, fd, ev);
1585 }
1586