1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/socket.h> 29 #include <sys/time.h> 30 31 #include <netinet/in_systm.h> 32 #include <netinet/in.h> 33 #include <netinet/ip.h> 34 #include <netinet/ip6.h> 35 #include <arpa/inet.h> 36 #include <netinet/tcp.h> 37 #include <netinet/ip_icmp.h> 38 #include <netinet/icmp6.h> 39 #include <netinet/udp.h> 40 #include <netdb.h> 41 #include <unistd.h> 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <strings.h> 45 #include <errno.h> 46 #include <limits.h> 47 #include <signal.h> 48 #include <libgen.h> 49 #include <fcntl.h> 50 51 /* 52 * The following values are what ilbd will set argv[0] to. This determines 53 * what type of probe to send out. 54 */ 55 #define PROBE_PING "ilb_ping" 56 #define PROBE_PROTO "ilb_probe" 57 58 /* The transport protocol to use in the probe. Value of argv[3]. */ 59 #define PROTO_TCP "TCP" 60 #define PROTO_UDP "UDP" 61 62 enum probe_type { ping_probe, tcp_probe, udp_probe }; 63 64 /* Load balance mode. Value of argv[4]. */ 65 #define MODE_DSR "DSR" 66 #define MODE_NAT "NAT" 67 #define MODE_HALF_NAT "HALF_NAT" 68 69 enum lb_mode { dsr, nat, half_nat }; 70 71 /* Number of arguments to the command from ilbd. */ 72 #define PROG_ARGC 7 73 74 /* Size of buffer used to receive ICMP packet */ 75 #define RECV_PKT_SZ 256 76 77 /* 78 * Struct to store the probe info (most is passed in using the argv[] array to 79 * the command given by ilbd). The argv[] contains the following. 80 * 81 * argv[0] is either PROBE_PING or PROBE_PROTO 82 * argv[1] is the VIP 83 * argv[2] is the backend server address 84 * argv[3] is the transport protocol used in the rule 85 * argv[4] is the load balance mode, "DSR", "NAT", "HALF-NAT" 86 * argv[5] is the probe port 87 * argv[6] is the probe timeout 88 * 89 * The following three fields are used in sending ICMP ECHO probe. 90 * 91 * echo_id is the ID set in the probe 92 * echo_seq is the sequence set in the probe 93 * echo_cookie is the random number data in a probe 94 * lport is the local port (in network byte order) used to send the probe 95 */ 96 typedef struct { 97 enum probe_type probe; 98 struct in6_addr vip; /* argv[1] */ 99 struct in6_addr srv_addr; /* argv[2] */ 100 int proto; /* argv[3] */ 101 enum lb_mode mode; /* argv[4] */ 102 in_port_t port; /* argv[5] */ 103 uint32_t timeout; /* argv[6] */ 104 105 uint16_t echo_id; 106 uint16_t echo_seq; 107 uint32_t echo_cookie; 108 in_port_t lport; 109 } probe_param_t; 110 111 /* Global variable to indicate whether a timeout means success. */ 112 static boolean_t timeout_is_good; 113 114 /* SIGALRM handler */ 115 /* ARGSUSED */ 116 static void 117 probe_exit(int s) 118 { 119 if (timeout_is_good) { 120 (void) printf("0"); 121 exit(0); 122 } else { 123 (void) printf("-1"); 124 exit(255); 125 } 126 } 127 128 /* 129 * Checksum routine for Internet Protocol family headers (C Version) 130 * (copied from ping.c) 131 */ 132 static ushort_t 133 in_cksum(ushort_t *addr, int len) 134 { 135 int nleft = len; 136 ushort_t *w = addr; 137 ushort_t answer; 138 ushort_t odd_byte = 0; 139 int sum = 0; 140 141 /* 142 * Our algorithm is simple, using a 32 bit accumulator (sum), 143 * we add sequential 16 bit words to it, and at the end, fold 144 * back all the carry bits from the top 16 bits into the lower 145 * 16 bits. 146 */ 147 while (nleft > 1) { 148 sum += *w++; 149 nleft -= 2; 150 } 151 152 /* mop up an odd byte, if necessary */ 153 if (nleft == 1) { 154 *(uchar_t *)(&odd_byte) = *(uchar_t *)w; 155 sum += odd_byte; 156 } 157 158 /* 159 * add back carry outs from top 16 bits to low 16 bits 160 */ 161 sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */ 162 sum += (sum >> 16); /* add carry */ 163 answer = ~sum; /* truncate to 16 bits */ 164 return (answer); 165 } 166 167 /* It is assumed that argv[] contains PROBE_ARGC arguments. */ 168 static boolean_t 169 parse_probe_param(char *argv[], probe_param_t *param) 170 { 171 int32_t port; 172 int64_t timeout; 173 struct in_addr v4addr; 174 175 if (strcmp(basename(argv[0]), PROBE_PING) == 0) { 176 param->probe = ping_probe; 177 } else { 178 if (strcmp(basename(argv[0]), PROBE_PROTO) != 0) 179 return (B_FALSE); 180 181 if (strcasecmp(argv[3], PROTO_TCP) == 0) { 182 param->probe = tcp_probe; 183 param->proto = IPPROTO_TCP; 184 } else if (strcasecmp(argv[3], PROTO_UDP) == 0) { 185 param->probe = udp_probe; 186 param->proto = IPPROTO_UDP; 187 } else { 188 return (B_FALSE); 189 } 190 } 191 192 if (strchr(argv[1], ':') != NULL) { 193 if (inet_pton(AF_INET6, argv[1], ¶m->vip) == 0) 194 return (B_FALSE); 195 } else if (strchr(argv[1], '.') != NULL) { 196 if (inet_pton(AF_INET, argv[1], &v4addr) == 0) 197 return (B_FALSE); 198 IN6_INADDR_TO_V4MAPPED(&v4addr, ¶m->vip); 199 } else { 200 return (B_FALSE); 201 } 202 203 /* 204 * The address family of vip and srv_addr should be the same for 205 * now. But in future, we may allow them to be different... So 206 * we don't do a check here. 207 */ 208 if (strchr(argv[2], ':') != NULL) { 209 if (inet_pton(AF_INET6, argv[2], ¶m->srv_addr) == 0) 210 return (B_FALSE); 211 } else if (strchr(argv[2], '.') != NULL) { 212 if (inet_pton(AF_INET, argv[2], &v4addr) == 0) 213 return (B_FALSE); 214 IN6_INADDR_TO_V4MAPPED(&v4addr, ¶m->srv_addr); 215 } else { 216 return (B_FALSE); 217 } 218 219 if (strcasecmp(argv[4], MODE_DSR) == 0) 220 param->mode = dsr; 221 else if (strcasecmp(argv[4], MODE_NAT) == 0) 222 param->mode = nat; 223 else if (strcasecmp(argv[4], MODE_HALF_NAT) == 0) 224 param->mode = half_nat; 225 else 226 return (B_FALSE); 227 228 if ((port = atoi(argv[5])) <= 0 || port > USHRT_MAX) 229 return (B_FALSE); 230 param->port = port; 231 232 if ((timeout = strtoll(argv[6], NULL, 10)) <= 0 || timeout > UINT_MAX) 233 return (B_FALSE); 234 param->timeout = timeout; 235 236 return (B_TRUE); 237 } 238 239 /* 240 * Set up the destination address to be used to send a probe based on 241 * param. 242 */ 243 static int 244 set_sockaddr(struct sockaddr_storage *addr, socklen_t *addr_len, 245 void **next_hop, probe_param_t *param) 246 { 247 int af; 248 struct in6_addr *param_addr; 249 struct sockaddr_in *v4_addr; 250 struct sockaddr_in6 *v6_addr; 251 boolean_t nh = B_FALSE; 252 253 switch (param->mode) { 254 case dsr: 255 param_addr = ¶m->vip; 256 nh = B_TRUE; 257 break; 258 case nat: 259 case half_nat: 260 param_addr = ¶m->srv_addr; 261 break; 262 } 263 if (IN6_IS_ADDR_V4MAPPED(param_addr)) { 264 af = AF_INET; 265 v4_addr = (struct sockaddr_in *)addr; 266 IN6_V4MAPPED_TO_INADDR(param_addr, &v4_addr->sin_addr); 267 v4_addr->sin_family = AF_INET; 268 v4_addr->sin_port = htons(param->port); 269 270 *addr_len = sizeof (*v4_addr); 271 } else { 272 af = AF_INET6; 273 v6_addr = (struct sockaddr_in6 *)addr; 274 v6_addr->sin6_family = AF_INET6; 275 v6_addr->sin6_addr = *param_addr; 276 v6_addr->sin6_port = htons(param->port); 277 v6_addr->sin6_flowinfo = 0; 278 v6_addr->sin6_scope_id = 0; 279 280 *addr_len = sizeof (*v6_addr); 281 } 282 283 if (!nh) { 284 *next_hop = NULL; 285 return (af); 286 } 287 288 if (af == AF_INET) { 289 ipaddr_t *nh_addr; 290 291 nh_addr = malloc(sizeof (ipaddr_t)); 292 IN6_V4MAPPED_TO_IPADDR(¶m->srv_addr, *nh_addr); 293 *next_hop = nh_addr; 294 } else { 295 struct sockaddr_in6 *nh_addr; 296 297 nh_addr = malloc(sizeof (*nh_addr)); 298 nh_addr->sin6_family = AF_INET6; 299 nh_addr->sin6_addr = param->srv_addr; 300 nh_addr->sin6_flowinfo = 0; 301 nh_addr->sin6_scope_id = 0; 302 *next_hop = nh_addr; 303 } 304 305 return (af); 306 } 307 308 /* 309 * Use TCP to check if the peer server is alive. Create a TCP socket and 310 * then call connect() to reach the peer server. If connect() does not 311 * return within the timeout period, the SIGALRM handler will be invoked 312 * and tell ilbd that the peer server is not alive. 313 */ 314 static int 315 tcp_query(probe_param_t *param) 316 { 317 int ret; 318 int sd, af; 319 struct sockaddr_storage dst_addr; 320 socklen_t dst_addr_len; 321 void *next_hop; 322 hrtime_t start, end; 323 uint32_t rtt; 324 325 ret = 0; 326 next_hop = NULL; 327 328 af = set_sockaddr(&dst_addr, &dst_addr_len, &next_hop, param); 329 330 if ((sd = socket(af, SOCK_STREAM, param->proto)) == -1) 331 return (-1); 332 333 /* DSR mode, need to set the next hop */ 334 if (next_hop != NULL) { 335 if (af == AF_INET) { 336 if (setsockopt(sd, IPPROTO_IP, IP_NEXTHOP, next_hop, 337 sizeof (ipaddr_t)) < 0) { 338 ret = -1; 339 goto out; 340 } 341 } else { 342 if (setsockopt(sd, IPPROTO_IPV6, IPV6_NEXTHOP, 343 next_hop, sizeof (struct sockaddr_in6)) < 0) { 344 ret = -1; 345 goto out; 346 } 347 } 348 } 349 350 timeout_is_good = B_FALSE; 351 (void) alarm(param->timeout); 352 start = gethrtime(); 353 if (connect(sd, (struct sockaddr *)&dst_addr, dst_addr_len) != 0) { 354 ret = -1; 355 goto out; 356 } 357 end = gethrtime(); 358 359 rtt = (end - start) / (NANOSEC / MICROSEC); 360 if (rtt == 0) 361 rtt = 1; 362 (void) printf("%u", rtt); 363 364 out: 365 (void) close(sd); 366 return (ret); 367 } 368 369 /* 370 * Check if the ICMP packet is a port unreachable message in respnsed to 371 * our probe. Return -1 if no, 0 if yes. 372 */ 373 static int 374 check_icmp_unreach_v4(struct icmp *icmph, probe_param_t *param) 375 { 376 struct udphdr *udph; 377 struct ip *iph; 378 379 if (icmph->icmp_type != ICMP_UNREACH) 380 return (-1); 381 if (icmph->icmp_code != ICMP_UNREACH_PORT) 382 return (-1); 383 384 /* LINTED E_BAD_PTR_CAST_ALIGN */ 385 iph = (struct ip *)((char *)icmph + ICMP_MINLEN); 386 if (iph->ip_p != IPPROTO_UDP) 387 return (-1); 388 389 /* LINTED E_BAD_PTR_CAST_ALIGN */ 390 udph = (struct udphdr *)((char *)iph + (iph->ip_hl << 2)); 391 if (udph->uh_dport != htons(param->port)) 392 return (-1); 393 if (udph->uh_sport != param->lport) 394 return (-1); 395 396 /* All matched, it is a response to the probe we sent. */ 397 return (0); 398 } 399 400 /* 401 * Check if the ICMP packet is a reply to our echo request. Need to match 402 * the ID and sequence. 403 */ 404 static int 405 check_icmp_echo_v4(struct icmp *icmph, probe_param_t *param) 406 { 407 uint32_t cookie; 408 in_port_t port; 409 410 if (icmph->icmp_type != ICMP_ECHOREPLY) 411 return (-1); 412 if (icmph->icmp_id != param->echo_id) 413 return (-1); 414 if (icmph->icmp_seq != param->echo_seq) 415 return (-1); 416 417 bcopy(icmph->icmp_data, &cookie, sizeof (cookie)); 418 if (cookie != param->echo_cookie) 419 return (-1); 420 bcopy(icmph->icmp_data + sizeof (cookie), &port, sizeof (port)); 421 if (port != param->port) 422 return (-1); 423 424 /* All matched, it is a response to the echo we sent. */ 425 return (0); 426 } 427 428 /* Verify if an ICMP packet is what we expect. */ 429 static int 430 check_icmp_v4(char *buf, ssize_t rcvd, probe_param_t *param) 431 { 432 struct ip *iph; 433 struct icmp *icmph; 434 435 /* 436 * We can dereference the length field without worry since the stack 437 * should not have sent up the packet if it is smaller than a normal 438 * ICMPv4 packet. 439 */ 440 /* LINTED E_BAD_PTR_CAST_ALIGN */ 441 iph = (struct ip *)buf; 442 /* LINTED E_BAD_PTR_CAST_ALIGN */ 443 icmph = (struct icmp *)((char *)iph + (iph->ip_hl << 2)); 444 445 /* 446 * If we sent an UDP probe, check if the packet is a port 447 * unreachable message in response to our probe. 448 * 449 * If we sent an ICMP echo request, check if the packet is a reply 450 * to our echo request. 451 */ 452 if (param->probe == udp_probe) { 453 /* Is the packet large enough for further checking? */ 454 if (rcvd < 2 * sizeof (struct ip) + ICMP_MINLEN + 455 sizeof (struct udphdr)) { 456 return (-1); 457 } 458 return (check_icmp_unreach_v4(icmph, param)); 459 } else { 460 if (rcvd < sizeof (struct ip) + ICMP_MINLEN) 461 return (-1); 462 return (check_icmp_echo_v4(icmph, param)); 463 } 464 } 465 466 /* 467 * Check if the ICMPv6 packet is a port unreachable message in respnsed to 468 * our probe. Return -1 if no, 0 if yes. 469 */ 470 static int 471 check_icmp_unreach_v6(icmp6_t *icmp6h, probe_param_t *param) 472 { 473 ip6_t *ip6h; 474 struct udphdr *udph; 475 476 if (icmp6h->icmp6_type != ICMP6_DST_UNREACH) 477 return (-1); 478 if (icmp6h->icmp6_code != ICMP6_DST_UNREACH_NOPORT) 479 return (-1); 480 481 /* LINTED E_BAD_PTR_CAST_ALIGN */ 482 ip6h = (ip6_t *)((char *)icmp6h + ICMP6_MINLEN); 483 if (ip6h->ip6_nxt != IPPROTO_UDP) 484 return (-1); 485 486 udph = (struct udphdr *)(ip6h + 1); 487 488 if (udph->uh_dport != htons(param->port)) 489 return (-1); 490 if (udph->uh_sport != param->lport) 491 return (-1); 492 493 return (0); 494 } 495 496 /* 497 * Check if the ICMPv6 packet is a reply to our echo request. Need to match 498 * the ID and sequence. 499 */ 500 static int 501 check_icmp_echo_v6(icmp6_t *icmp6h, probe_param_t *param) 502 { 503 char *tmp; 504 uint32_t cookie; 505 in_port_t port; 506 507 if (icmp6h->icmp6_type != ICMP6_ECHO_REPLY) 508 return (-1); 509 if (icmp6h->icmp6_id != param->echo_id) 510 return (-1); 511 if (icmp6h->icmp6_seq != param->echo_seq) 512 return (-1); 513 tmp = (char *)icmp6h + ICMP6_MINLEN; 514 bcopy(tmp, &cookie, sizeof (cookie)); 515 if (cookie != param->echo_cookie) 516 return (-1); 517 tmp += sizeof (cookie); 518 bcopy(tmp, &port, sizeof (port)); 519 if (port != param->port) 520 return (-1); 521 522 /* All matched, it is a response to the echo we sent. */ 523 return (0); 524 } 525 526 /* Verify if an ICMPv6 packet is what we expect. */ 527 static int 528 check_icmp_v6(char *buf, ssize_t rcvd, probe_param_t *param) 529 { 530 icmp6_t *icmp6h; 531 532 /* LINTED E_BAD_PTR_CAST_ALIGN */ 533 icmp6h = (icmp6_t *)(buf); 534 535 /* 536 * If we sent an UDP probe, check if the packet is a port 537 * unreachable message. 538 * 539 * If we sent an ICMPv6 echo request, check if the packet is a reply. 540 */ 541 if (param->probe == udp_probe) { 542 /* Is the packet large enough for further checking? */ 543 if (rcvd < sizeof (ip6_t) + ICMP6_MINLEN + 544 sizeof (struct udphdr)) { 545 return (-1); 546 } 547 return (check_icmp_unreach_v6(icmp6h, param)); 548 } else { 549 if (rcvd < ICMP6_MINLEN) 550 return (-1); 551 return (check_icmp_echo_v6(icmp6h, param)); 552 } 553 } 554 555 /* 556 * Wait for an ICMP reply indefinitely. If we get what we expect, return 0. 557 * If an error happnes, return -1. 558 */ 559 static int 560 wait_icmp_reply(int af, int recv_sd, struct sockaddr_storage *exp_from, 561 probe_param_t *param) 562 { 563 char buf[RECV_PKT_SZ]; 564 socklen_t from_len; 565 ssize_t rcvd; 566 int ret; 567 568 for (;;) { 569 if (af == AF_INET) { 570 struct sockaddr_in v4_from; 571 572 from_len = sizeof (v4_from); 573 if ((rcvd = recvfrom(recv_sd, buf, RECV_PKT_SZ, 0, 574 (struct sockaddr *)&v4_from, &from_len)) < 0) { 575 ret = -1; 576 break; 577 } 578 579 /* Packet not from our peer, ignore it. */ 580 if ((((struct sockaddr_in *)exp_from)->sin_addr.s_addr) 581 != v4_from.sin_addr.s_addr) { 582 continue; 583 } 584 if (check_icmp_v4(buf, rcvd, param) == 0) { 585 ret = 0; 586 break; 587 } 588 } else { 589 struct sockaddr_in6 v6_from; 590 591 from_len = sizeof (struct sockaddr_in6); 592 if ((rcvd = recvfrom(recv_sd, buf, RECV_PKT_SZ, 0, 593 (struct sockaddr *)&v6_from, &from_len)) < 0) { 594 ret = -1; 595 break; 596 } 597 598 if (!IN6_ARE_ADDR_EQUAL(&(v6_from.sin6_addr), 599 &((struct sockaddr_in6 *)exp_from)->sin6_addr)) { 600 continue; 601 } 602 if (check_icmp_v6(buf, rcvd, param) == 0) { 603 ret = 0; 604 break; 605 } 606 } 607 } 608 return (ret); 609 } 610 611 /* Return the local port used (network byte order) in a socket. */ 612 static int 613 get_lport(int sd, in_port_t *lport) 614 { 615 struct sockaddr_storage addr; 616 socklen_t addr_sz; 617 618 addr_sz = sizeof (addr); 619 if (getsockname(sd, (struct sockaddr *)&addr, &addr_sz) != 0) 620 return (-1); 621 if (addr.ss_family == AF_INET) 622 *lport = ((struct sockaddr_in *)&addr)->sin_port; 623 else 624 *lport = ((struct sockaddr_in6 *)&addr)->sin6_port; 625 return (0); 626 } 627 628 /* 629 * Use UDP to check if the peer server is alive. Send a 0 length UDP packet 630 * to the peer server. If there is no one listening, the peer IP stack 631 * should send back a port unreachable ICMP(v4/v6) packet. If the peer 632 * server is alive, there should be no response. So if we get SIGALRM, 633 * the peer is alive. 634 */ 635 static int 636 udp_query(probe_param_t *param) 637 { 638 int ret; 639 int send_sd, recv_sd, af; 640 struct sockaddr_storage dst_addr; 641 socklen_t addr_len; 642 void *next_hop; 643 char buf[1]; 644 struct itimerval timeout; 645 uint64_t tm; 646 647 ret = 0; 648 next_hop = NULL; 649 650 af = set_sockaddr(&dst_addr, &addr_len, &next_hop, param); 651 652 if ((send_sd = socket(af, SOCK_DGRAM, param->proto)) == -1) 653 return (-1); 654 if ((recv_sd = socket(af, SOCK_RAW, (af == AF_INET) ? IPPROTO_ICMP : 655 IPPROTO_ICMPV6)) == -1) { 656 return (-1); 657 } 658 659 /* DSR mode, need to set the next hop */ 660 if (next_hop != NULL) { 661 if (af == AF_INET) { 662 if (setsockopt(send_sd, IPPROTO_IP, IP_NEXTHOP, 663 next_hop, sizeof (ipaddr_t)) < 0) { 664 ret = -1; 665 goto out; 666 } 667 } else { 668 if (setsockopt(send_sd, IPPROTO_IPV6, IPV6_NEXTHOP, 669 next_hop, sizeof (struct sockaddr_in6)) < 0) { 670 ret = -1; 671 goto out; 672 } 673 } 674 } 675 676 /* 677 * If ilbd asks us to wait at most t, we will wait for at most 678 * t', which is 3/4 of t. If we wait for too long, ilbd may 679 * timeout and kill us. 680 */ 681 timeout.it_interval.tv_sec = 0; 682 timeout.it_interval.tv_usec = 0; 683 tm = (param->timeout * MICROSEC >> 2) * 3; 684 if (tm > MICROSEC) { 685 timeout.it_value.tv_sec = tm / MICROSEC; 686 timeout.it_value.tv_usec = tm - (timeout.it_value.tv_sec * 687 MICROSEC); 688 } else { 689 timeout.it_value.tv_sec = 0; 690 timeout.it_value.tv_usec = tm; 691 } 692 timeout_is_good = B_TRUE; 693 if (setitimer(ITIMER_REAL, &timeout, NULL) != 0) { 694 ret = -1; 695 goto out; 696 } 697 698 if (sendto(send_sd, buf, 0, 0, (struct sockaddr *)&dst_addr, 699 addr_len) != 0) { 700 ret = -1; 701 goto out; 702 } 703 if ((ret = get_lport(send_sd, ¶m->lport)) != 0) 704 goto out; 705 706 /* 707 * If the server app is listening, we should not get back a 708 * response. So if wait_icmp_reply() returns, either there 709 * is an error or we get back something. 710 */ 711 (void) wait_icmp_reply(af, recv_sd, &dst_addr, param); 712 ret = -1; 713 714 out: 715 (void) close(send_sd); 716 (void) close(recv_sd); 717 return (ret); 718 } 719 720 /* 721 * Size (in uint32_t) of the ping packet to be sent to server. It includes 722 * a cookie (random number) + the target port. The cookie and port are used 723 * for matching ping request since there can be many such ping packets sent 724 * to different servers from the same source address and using the same VIP. 725 * The last two bytes are for padding. 726 * 727 */ 728 #define PING_PKT_LEN \ 729 ((ICMP_MINLEN + 2 * sizeof (uint32_t)) / sizeof (uint32_t)) 730 731 /* 732 * Try to get a random number from the pseudo random number device 733 * /dev/urandom. If there is any error, return (uint32_t)gethrtime() 734 * as a back up. 735 */ 736 static uint32_t 737 get_random(void) 738 { 739 int fd; 740 uint32_t num; 741 742 if ((fd = open("/dev/urandom", O_RDONLY)) == -1) 743 return ((uint32_t)gethrtime()); 744 745 if (read(fd, &num, sizeof (num)) != sizeof (num)) 746 num = ((uint32_t)gethrtime()); 747 748 (void) close(fd); 749 return (num); 750 } 751 752 /* 753 * Use ICMP(v4/v6) echo request to check if the peer server machine is 754 * reachable. Send a echo request and expect to get back a echo reply. 755 */ 756 static int 757 ping_query(probe_param_t *param) 758 { 759 int ret; 760 int sd, af; 761 struct sockaddr_storage dst_addr; 762 socklen_t dst_addr_len; 763 void *next_hop; 764 hrtime_t start, end; 765 uint32_t rtt; 766 uint32_t buf[PING_PKT_LEN]; 767 struct icmp *icmph; 768 769 ret = 0; 770 next_hop = NULL; 771 772 af = set_sockaddr(&dst_addr, &dst_addr_len, &next_hop, param); 773 774 if ((sd = socket(af, SOCK_RAW, (af == AF_INET) ? IPPROTO_ICMP : 775 IPPROTO_ICMPV6)) == -1) { 776 return (-1); 777 } 778 779 /* DSR mode, need to set the next hop */ 780 if (next_hop != NULL) { 781 if (af == AF_INET) { 782 if (setsockopt(sd, IPPROTO_IP, IP_NEXTHOP, next_hop, 783 sizeof (ipaddr_t)) < 0) { 784 ret = -1; 785 goto out; 786 } 787 } else { 788 if (setsockopt(sd, IPPROTO_IPV6, IPV6_NEXTHOP, 789 next_hop, sizeof (struct sockaddr_in6)) < 0) { 790 ret = -1; 791 goto out; 792 } 793 } 794 } 795 796 bzero(buf, sizeof (buf)); 797 icmph = (struct icmp *)buf; 798 icmph->icmp_type = af == AF_INET ? ICMP_ECHO : ICMP6_ECHO_REQUEST; 799 icmph->icmp_code = 0; 800 icmph->icmp_cksum = 0; 801 icmph->icmp_id = htons(gethrtime() % USHRT_MAX); 802 icmph->icmp_seq = htons(gethrtime() % USHRT_MAX); 803 804 param->echo_cookie = get_random(); 805 bcopy(¶m->echo_cookie, icmph->icmp_data, 806 sizeof (param->echo_cookie)); 807 bcopy(¶m->port, icmph->icmp_data + sizeof (param->echo_cookie), 808 sizeof (param->port)); 809 icmph->icmp_cksum = in_cksum((ushort_t *)buf, sizeof (buf)); 810 param->echo_id = icmph->icmp_id; 811 param->echo_seq = icmph->icmp_seq; 812 813 timeout_is_good = B_FALSE; 814 (void) alarm(param->timeout); 815 start = gethrtime(); 816 if (sendto(sd, buf, sizeof (buf), 0, (struct sockaddr *)&dst_addr, 817 dst_addr_len) != sizeof (buf)) { 818 ret = -1; 819 goto out; 820 } 821 if (wait_icmp_reply(af, sd, &dst_addr, param) != 0) { 822 ret = -1; 823 goto out; 824 } 825 end = gethrtime(); 826 827 rtt = (end - start) / (NANOSEC / MICROSEC); 828 if (rtt == 0) 829 rtt = 1; 830 (void) printf("%u", rtt); 831 832 out: 833 (void) close(sd); 834 return (ret); 835 } 836 837 int 838 main(int argc, char *argv[]) 839 { 840 probe_param_t param; 841 int ret; 842 843 /* ilbd should pass in PROG_ARGC parameters. */ 844 if (argc != PROG_ARGC) { 845 (void) printf("-1"); 846 return (-1); 847 } 848 849 if (signal(SIGALRM, probe_exit) == SIG_ERR) { 850 (void) printf("-1"); 851 return (-1); 852 } 853 854 if (!parse_probe_param(argv, ¶m)) { 855 (void) printf("-1"); 856 return (-1); 857 } 858 859 switch (param.probe) { 860 case ping_probe: 861 ret = ping_query(¶m); 862 break; 863 case tcp_probe: 864 ret = tcp_query(¶m); 865 break; 866 case udp_probe: 867 ret = udp_query(¶m); 868 break; 869 } 870 871 if (ret == -1) 872 (void) printf("-1"); 873 874 return (ret); 875 } 876