1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/types.h>
28 #include <sys/socket.h>
29 #include <sys/time.h>
30
31 #include <netinet/in_systm.h>
32 #include <netinet/in.h>
33 #include <netinet/ip.h>
34 #include <netinet/ip6.h>
35 #include <arpa/inet.h>
36 #include <netinet/tcp.h>
37 #include <netinet/ip_icmp.h>
38 #include <netinet/icmp6.h>
39 #include <netinet/udp.h>
40 #include <netdb.h>
41 #include <unistd.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <strings.h>
45 #include <errno.h>
46 #include <limits.h>
47 #include <signal.h>
48 #include <libgen.h>
49 #include <fcntl.h>
50
51 /*
52 * The following values are what ilbd will set argv[0] to. This determines
53 * what type of probe to send out.
54 */
55 #define PROBE_PING "ilb_ping"
56 #define PROBE_PROTO "ilb_probe"
57
58 /* The transport protocol to use in the probe. Value of argv[3]. */
59 #define PROTO_TCP "TCP"
60 #define PROTO_UDP "UDP"
61
62 enum probe_type { ping_probe, tcp_probe, udp_probe };
63
64 /* Load balance mode. Value of argv[4]. */
65 #define MODE_DSR "DSR"
66 #define MODE_NAT "NAT"
67 #define MODE_HALF_NAT "HALF_NAT"
68
69 enum lb_mode { dsr, nat, half_nat };
70
71 /* Number of arguments to the command from ilbd. */
72 #define PROG_ARGC 7
73
74 /* Size of buffer used to receive ICMP packet */
75 #define RECV_PKT_SZ 256
76
77 /*
78 * Struct to store the probe info (most is passed in using the argv[] array to
79 * the command given by ilbd). The argv[] contains the following.
80 *
81 * argv[0] is either PROBE_PING or PROBE_PROTO
82 * argv[1] is the VIP
83 * argv[2] is the backend server address
84 * argv[3] is the transport protocol used in the rule
85 * argv[4] is the load balance mode, "DSR", "NAT", "HALF-NAT"
86 * argv[5] is the probe port
87 * argv[6] is the probe timeout
88 *
89 * The following three fields are used in sending ICMP ECHO probe.
90 *
91 * echo_id is the ID set in the probe
92 * echo_seq is the sequence set in the probe
93 * echo_cookie is the random number data in a probe
94 * lport is the local port (in network byte order) used to send the probe
95 */
96 typedef struct {
97 enum probe_type probe;
98 struct in6_addr vip; /* argv[1] */
99 struct in6_addr srv_addr; /* argv[2] */
100 int proto; /* argv[3] */
101 enum lb_mode mode; /* argv[4] */
102 in_port_t port; /* argv[5] */
103 uint32_t timeout; /* argv[6] */
104
105 uint16_t echo_id;
106 uint16_t echo_seq;
107 uint32_t echo_cookie;
108 in_port_t lport;
109 } probe_param_t;
110
111 /* Global variable to indicate whether a timeout means success. */
112 static boolean_t timeout_is_good;
113
114 /* SIGALRM handler */
115 /* ARGSUSED */
116 static void
probe_exit(int s)117 probe_exit(int s)
118 {
119 if (timeout_is_good) {
120 (void) printf("0");
121 exit(0);
122 } else {
123 (void) printf("-1");
124 exit(255);
125 }
126 }
127
128 /*
129 * Checksum routine for Internet Protocol family headers (C Version)
130 * (copied from ping.c)
131 */
132 static ushort_t
in_cksum(ushort_t * addr,int len)133 in_cksum(ushort_t *addr, int len)
134 {
135 int nleft = len;
136 ushort_t *w = addr;
137 ushort_t answer;
138 ushort_t odd_byte = 0;
139 int sum = 0;
140
141 /*
142 * Our algorithm is simple, using a 32 bit accumulator (sum),
143 * we add sequential 16 bit words to it, and at the end, fold
144 * back all the carry bits from the top 16 bits into the lower
145 * 16 bits.
146 */
147 while (nleft > 1) {
148 sum += *w++;
149 nleft -= 2;
150 }
151
152 /* mop up an odd byte, if necessary */
153 if (nleft == 1) {
154 *(uchar_t *)(&odd_byte) = *(uchar_t *)w;
155 sum += odd_byte;
156 }
157
158 /*
159 * add back carry outs from top 16 bits to low 16 bits
160 */
161 sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */
162 sum += (sum >> 16); /* add carry */
163 answer = ~sum; /* truncate to 16 bits */
164 return (answer);
165 }
166
167 /* It is assumed that argv[] contains PROBE_ARGC arguments. */
168 static boolean_t
parse_probe_param(char * argv[],probe_param_t * param)169 parse_probe_param(char *argv[], probe_param_t *param)
170 {
171 int32_t port;
172 int64_t timeout;
173 struct in_addr v4addr;
174
175 if (strcmp(basename(argv[0]), PROBE_PING) == 0) {
176 param->probe = ping_probe;
177 } else {
178 if (strcmp(basename(argv[0]), PROBE_PROTO) != 0)
179 return (B_FALSE);
180
181 if (strcasecmp(argv[3], PROTO_TCP) == 0) {
182 param->probe = tcp_probe;
183 param->proto = IPPROTO_TCP;
184 } else if (strcasecmp(argv[3], PROTO_UDP) == 0) {
185 param->probe = udp_probe;
186 param->proto = IPPROTO_UDP;
187 } else {
188 return (B_FALSE);
189 }
190 }
191
192 if (strchr(argv[1], ':') != NULL) {
193 if (inet_pton(AF_INET6, argv[1], ¶m->vip) == 0)
194 return (B_FALSE);
195 } else if (strchr(argv[1], '.') != NULL) {
196 if (inet_pton(AF_INET, argv[1], &v4addr) == 0)
197 return (B_FALSE);
198 IN6_INADDR_TO_V4MAPPED(&v4addr, ¶m->vip);
199 } else {
200 return (B_FALSE);
201 }
202
203 /*
204 * The address family of vip and srv_addr should be the same for
205 * now. But in future, we may allow them to be different... So
206 * we don't do a check here.
207 */
208 if (strchr(argv[2], ':') != NULL) {
209 if (inet_pton(AF_INET6, argv[2], ¶m->srv_addr) == 0)
210 return (B_FALSE);
211 } else if (strchr(argv[2], '.') != NULL) {
212 if (inet_pton(AF_INET, argv[2], &v4addr) == 0)
213 return (B_FALSE);
214 IN6_INADDR_TO_V4MAPPED(&v4addr, ¶m->srv_addr);
215 } else {
216 return (B_FALSE);
217 }
218
219 if (strcasecmp(argv[4], MODE_DSR) == 0)
220 param->mode = dsr;
221 else if (strcasecmp(argv[4], MODE_NAT) == 0)
222 param->mode = nat;
223 else if (strcasecmp(argv[4], MODE_HALF_NAT) == 0)
224 param->mode = half_nat;
225 else
226 return (B_FALSE);
227
228 if ((port = atoi(argv[5])) <= 0 || port > USHRT_MAX)
229 return (B_FALSE);
230 param->port = port;
231
232 if ((timeout = strtoll(argv[6], NULL, 10)) <= 0 || timeout > UINT_MAX)
233 return (B_FALSE);
234 param->timeout = timeout;
235
236 return (B_TRUE);
237 }
238
239 /*
240 * Set up the destination address to be used to send a probe based on
241 * param.
242 */
243 static int
set_sockaddr(struct sockaddr_storage * addr,socklen_t * addr_len,void ** next_hop,probe_param_t * param)244 set_sockaddr(struct sockaddr_storage *addr, socklen_t *addr_len,
245 void **next_hop, probe_param_t *param)
246 {
247 int af;
248 struct in6_addr *param_addr;
249 struct sockaddr_in *v4_addr;
250 struct sockaddr_in6 *v6_addr;
251 boolean_t nh = B_FALSE;
252
253 switch (param->mode) {
254 case dsr:
255 param_addr = ¶m->vip;
256 nh = B_TRUE;
257 break;
258 case nat:
259 case half_nat:
260 param_addr = ¶m->srv_addr;
261 break;
262 }
263 if (IN6_IS_ADDR_V4MAPPED(param_addr)) {
264 af = AF_INET;
265 v4_addr = (struct sockaddr_in *)addr;
266 IN6_V4MAPPED_TO_INADDR(param_addr, &v4_addr->sin_addr);
267 v4_addr->sin_family = AF_INET;
268 v4_addr->sin_port = htons(param->port);
269
270 *addr_len = sizeof (*v4_addr);
271 } else {
272 af = AF_INET6;
273 v6_addr = (struct sockaddr_in6 *)addr;
274 v6_addr->sin6_family = AF_INET6;
275 v6_addr->sin6_addr = *param_addr;
276 v6_addr->sin6_port = htons(param->port);
277 v6_addr->sin6_flowinfo = 0;
278 v6_addr->sin6_scope_id = 0;
279
280 *addr_len = sizeof (*v6_addr);
281 }
282
283 if (!nh) {
284 *next_hop = NULL;
285 return (af);
286 }
287
288 if (af == AF_INET) {
289 ipaddr_t *nh_addr;
290
291 nh_addr = malloc(sizeof (ipaddr_t));
292 IN6_V4MAPPED_TO_IPADDR(¶m->srv_addr, *nh_addr);
293 *next_hop = nh_addr;
294 } else {
295 struct sockaddr_in6 *nh_addr;
296
297 nh_addr = malloc(sizeof (*nh_addr));
298 nh_addr->sin6_family = AF_INET6;
299 nh_addr->sin6_addr = param->srv_addr;
300 nh_addr->sin6_flowinfo = 0;
301 nh_addr->sin6_scope_id = 0;
302 *next_hop = nh_addr;
303 }
304
305 return (af);
306 }
307
308 /*
309 * Use TCP to check if the peer server is alive. Create a TCP socket and
310 * then call connect() to reach the peer server. If connect() does not
311 * return within the timeout period, the SIGALRM handler will be invoked
312 * and tell ilbd that the peer server is not alive.
313 */
314 static int
tcp_query(probe_param_t * param)315 tcp_query(probe_param_t *param)
316 {
317 int ret;
318 int sd, af;
319 struct sockaddr_storage dst_addr;
320 socklen_t dst_addr_len;
321 void *next_hop;
322 hrtime_t start, end;
323 uint32_t rtt;
324
325 ret = 0;
326 next_hop = NULL;
327
328 af = set_sockaddr(&dst_addr, &dst_addr_len, &next_hop, param);
329
330 if ((sd = socket(af, SOCK_STREAM, param->proto)) == -1)
331 return (-1);
332
333 /* DSR mode, need to set the next hop */
334 if (next_hop != NULL) {
335 if (af == AF_INET) {
336 if (setsockopt(sd, IPPROTO_IP, IP_NEXTHOP, next_hop,
337 sizeof (ipaddr_t)) < 0) {
338 ret = -1;
339 goto out;
340 }
341 } else {
342 if (setsockopt(sd, IPPROTO_IPV6, IPV6_NEXTHOP,
343 next_hop, sizeof (struct sockaddr_in6)) < 0) {
344 ret = -1;
345 goto out;
346 }
347 }
348 }
349
350 timeout_is_good = B_FALSE;
351 (void) alarm(param->timeout);
352 start = gethrtime();
353 if (connect(sd, (struct sockaddr *)&dst_addr, dst_addr_len) != 0) {
354 ret = -1;
355 goto out;
356 }
357 end = gethrtime();
358
359 rtt = (end - start) / (NANOSEC / MICROSEC);
360 if (rtt == 0)
361 rtt = 1;
362 (void) printf("%u", rtt);
363
364 out:
365 (void) close(sd);
366 return (ret);
367 }
368
369 /*
370 * Check if the ICMP packet is a port unreachable message in respnsed to
371 * our probe. Return -1 if no, 0 if yes.
372 */
373 static int
check_icmp_unreach_v4(struct icmp * icmph,probe_param_t * param)374 check_icmp_unreach_v4(struct icmp *icmph, probe_param_t *param)
375 {
376 struct udphdr *udph;
377 struct ip *iph;
378
379 if (icmph->icmp_type != ICMP_UNREACH)
380 return (-1);
381 if (icmph->icmp_code != ICMP_UNREACH_PORT)
382 return (-1);
383
384 /* LINTED E_BAD_PTR_CAST_ALIGN */
385 iph = (struct ip *)((char *)icmph + ICMP_MINLEN);
386 if (iph->ip_p != IPPROTO_UDP)
387 return (-1);
388
389 /* LINTED E_BAD_PTR_CAST_ALIGN */
390 udph = (struct udphdr *)((char *)iph + (iph->ip_hl << 2));
391 if (udph->uh_dport != htons(param->port))
392 return (-1);
393 if (udph->uh_sport != param->lport)
394 return (-1);
395
396 /* All matched, it is a response to the probe we sent. */
397 return (0);
398 }
399
400 /*
401 * Check if the ICMP packet is a reply to our echo request. Need to match
402 * the ID and sequence.
403 */
404 static int
check_icmp_echo_v4(struct icmp * icmph,probe_param_t * param)405 check_icmp_echo_v4(struct icmp *icmph, probe_param_t *param)
406 {
407 uint32_t cookie;
408 in_port_t port;
409
410 if (icmph->icmp_type != ICMP_ECHOREPLY)
411 return (-1);
412 if (icmph->icmp_id != param->echo_id)
413 return (-1);
414 if (icmph->icmp_seq != param->echo_seq)
415 return (-1);
416
417 bcopy(icmph->icmp_data, &cookie, sizeof (cookie));
418 if (cookie != param->echo_cookie)
419 return (-1);
420 bcopy(icmph->icmp_data + sizeof (cookie), &port, sizeof (port));
421 if (port != param->port)
422 return (-1);
423
424 /* All matched, it is a response to the echo we sent. */
425 return (0);
426 }
427
428 /* Verify if an ICMP packet is what we expect. */
429 static int
check_icmp_v4(char * buf,ssize_t rcvd,probe_param_t * param)430 check_icmp_v4(char *buf, ssize_t rcvd, probe_param_t *param)
431 {
432 struct ip *iph;
433 struct icmp *icmph;
434
435 /*
436 * We can dereference the length field without worry since the stack
437 * should not have sent up the packet if it is smaller than a normal
438 * ICMPv4 packet.
439 */
440 /* LINTED E_BAD_PTR_CAST_ALIGN */
441 iph = (struct ip *)buf;
442 /* LINTED E_BAD_PTR_CAST_ALIGN */
443 icmph = (struct icmp *)((char *)iph + (iph->ip_hl << 2));
444
445 /*
446 * If we sent an UDP probe, check if the packet is a port
447 * unreachable message in response to our probe.
448 *
449 * If we sent an ICMP echo request, check if the packet is a reply
450 * to our echo request.
451 */
452 if (param->probe == udp_probe) {
453 /* Is the packet large enough for further checking? */
454 if (rcvd < 2 * sizeof (struct ip) + ICMP_MINLEN +
455 sizeof (struct udphdr)) {
456 return (-1);
457 }
458 return (check_icmp_unreach_v4(icmph, param));
459 } else {
460 if (rcvd < sizeof (struct ip) + ICMP_MINLEN)
461 return (-1);
462 return (check_icmp_echo_v4(icmph, param));
463 }
464 }
465
466 /*
467 * Check if the ICMPv6 packet is a port unreachable message in respnsed to
468 * our probe. Return -1 if no, 0 if yes.
469 */
470 static int
check_icmp_unreach_v6(icmp6_t * icmp6h,probe_param_t * param)471 check_icmp_unreach_v6(icmp6_t *icmp6h, probe_param_t *param)
472 {
473 ip6_t *ip6h;
474 struct udphdr *udph;
475
476 if (icmp6h->icmp6_type != ICMP6_DST_UNREACH)
477 return (-1);
478 if (icmp6h->icmp6_code != ICMP6_DST_UNREACH_NOPORT)
479 return (-1);
480
481 /* LINTED E_BAD_PTR_CAST_ALIGN */
482 ip6h = (ip6_t *)((char *)icmp6h + ICMP6_MINLEN);
483 if (ip6h->ip6_nxt != IPPROTO_UDP)
484 return (-1);
485
486 udph = (struct udphdr *)(ip6h + 1);
487
488 if (udph->uh_dport != htons(param->port))
489 return (-1);
490 if (udph->uh_sport != param->lport)
491 return (-1);
492
493 return (0);
494 }
495
496 /*
497 * Check if the ICMPv6 packet is a reply to our echo request. Need to match
498 * the ID and sequence.
499 */
500 static int
check_icmp_echo_v6(icmp6_t * icmp6h,probe_param_t * param)501 check_icmp_echo_v6(icmp6_t *icmp6h, probe_param_t *param)
502 {
503 char *tmp;
504 uint32_t cookie;
505 in_port_t port;
506
507 if (icmp6h->icmp6_type != ICMP6_ECHO_REPLY)
508 return (-1);
509 if (icmp6h->icmp6_id != param->echo_id)
510 return (-1);
511 if (icmp6h->icmp6_seq != param->echo_seq)
512 return (-1);
513 tmp = (char *)icmp6h + ICMP6_MINLEN;
514 bcopy(tmp, &cookie, sizeof (cookie));
515 if (cookie != param->echo_cookie)
516 return (-1);
517 tmp += sizeof (cookie);
518 bcopy(tmp, &port, sizeof (port));
519 if (port != param->port)
520 return (-1);
521
522 /* All matched, it is a response to the echo we sent. */
523 return (0);
524 }
525
526 /* Verify if an ICMPv6 packet is what we expect. */
527 static int
check_icmp_v6(char * buf,ssize_t rcvd,probe_param_t * param)528 check_icmp_v6(char *buf, ssize_t rcvd, probe_param_t *param)
529 {
530 icmp6_t *icmp6h;
531
532 /* LINTED E_BAD_PTR_CAST_ALIGN */
533 icmp6h = (icmp6_t *)(buf);
534
535 /*
536 * If we sent an UDP probe, check if the packet is a port
537 * unreachable message.
538 *
539 * If we sent an ICMPv6 echo request, check if the packet is a reply.
540 */
541 if (param->probe == udp_probe) {
542 /* Is the packet large enough for further checking? */
543 if (rcvd < sizeof (ip6_t) + ICMP6_MINLEN +
544 sizeof (struct udphdr)) {
545 return (-1);
546 }
547 return (check_icmp_unreach_v6(icmp6h, param));
548 } else {
549 if (rcvd < ICMP6_MINLEN)
550 return (-1);
551 return (check_icmp_echo_v6(icmp6h, param));
552 }
553 }
554
555 /*
556 * Wait for an ICMP reply indefinitely. If we get what we expect, return 0.
557 * If an error happnes, return -1.
558 */
559 static int
wait_icmp_reply(int af,int recv_sd,struct sockaddr_storage * exp_from,probe_param_t * param)560 wait_icmp_reply(int af, int recv_sd, struct sockaddr_storage *exp_from,
561 probe_param_t *param)
562 {
563 char buf[RECV_PKT_SZ];
564 socklen_t from_len;
565 ssize_t rcvd;
566 int ret;
567
568 for (;;) {
569 if (af == AF_INET) {
570 struct sockaddr_in v4_from;
571
572 from_len = sizeof (v4_from);
573 if ((rcvd = recvfrom(recv_sd, buf, RECV_PKT_SZ, 0,
574 (struct sockaddr *)&v4_from, &from_len)) < 0) {
575 ret = -1;
576 break;
577 }
578
579 /* Packet not from our peer, ignore it. */
580 if ((((struct sockaddr_in *)exp_from)->sin_addr.s_addr)
581 != v4_from.sin_addr.s_addr) {
582 continue;
583 }
584 if (check_icmp_v4(buf, rcvd, param) == 0) {
585 ret = 0;
586 break;
587 }
588 } else {
589 struct sockaddr_in6 v6_from;
590
591 from_len = sizeof (struct sockaddr_in6);
592 if ((rcvd = recvfrom(recv_sd, buf, RECV_PKT_SZ, 0,
593 (struct sockaddr *)&v6_from, &from_len)) < 0) {
594 ret = -1;
595 break;
596 }
597
598 if (!IN6_ARE_ADDR_EQUAL(&(v6_from.sin6_addr),
599 &((struct sockaddr_in6 *)exp_from)->sin6_addr)) {
600 continue;
601 }
602 if (check_icmp_v6(buf, rcvd, param) == 0) {
603 ret = 0;
604 break;
605 }
606 }
607 }
608 return (ret);
609 }
610
611 /* Return the local port used (network byte order) in a socket. */
612 static int
get_lport(int sd,in_port_t * lport)613 get_lport(int sd, in_port_t *lport)
614 {
615 struct sockaddr_storage addr;
616 socklen_t addr_sz;
617
618 addr_sz = sizeof (addr);
619 if (getsockname(sd, (struct sockaddr *)&addr, &addr_sz) != 0)
620 return (-1);
621 if (addr.ss_family == AF_INET)
622 *lport = ((struct sockaddr_in *)&addr)->sin_port;
623 else
624 *lport = ((struct sockaddr_in6 *)&addr)->sin6_port;
625 return (0);
626 }
627
628 /*
629 * Use UDP to check if the peer server is alive. Send a 0 length UDP packet
630 * to the peer server. If there is no one listening, the peer IP stack
631 * should send back a port unreachable ICMP(v4/v6) packet. If the peer
632 * server is alive, there should be no response. So if we get SIGALRM,
633 * the peer is alive.
634 */
635 static int
udp_query(probe_param_t * param)636 udp_query(probe_param_t *param)
637 {
638 int ret;
639 int send_sd, recv_sd, af;
640 struct sockaddr_storage dst_addr;
641 socklen_t addr_len;
642 void *next_hop;
643 char buf[1];
644 struct itimerval timeout;
645 uint64_t tm;
646
647 ret = 0;
648 next_hop = NULL;
649
650 af = set_sockaddr(&dst_addr, &addr_len, &next_hop, param);
651
652 if ((send_sd = socket(af, SOCK_DGRAM, param->proto)) == -1)
653 return (-1);
654 if ((recv_sd = socket(af, SOCK_RAW, (af == AF_INET) ? IPPROTO_ICMP :
655 IPPROTO_ICMPV6)) == -1) {
656 return (-1);
657 }
658
659 /* DSR mode, need to set the next hop */
660 if (next_hop != NULL) {
661 if (af == AF_INET) {
662 if (setsockopt(send_sd, IPPROTO_IP, IP_NEXTHOP,
663 next_hop, sizeof (ipaddr_t)) < 0) {
664 ret = -1;
665 goto out;
666 }
667 } else {
668 if (setsockopt(send_sd, IPPROTO_IPV6, IPV6_NEXTHOP,
669 next_hop, sizeof (struct sockaddr_in6)) < 0) {
670 ret = -1;
671 goto out;
672 }
673 }
674 }
675
676 /*
677 * If ilbd asks us to wait at most t, we will wait for at most
678 * t', which is 3/4 of t. If we wait for too long, ilbd may
679 * timeout and kill us.
680 */
681 timeout.it_interval.tv_sec = 0;
682 timeout.it_interval.tv_usec = 0;
683 tm = (param->timeout * MICROSEC >> 2) * 3;
684 if (tm > MICROSEC) {
685 timeout.it_value.tv_sec = tm / MICROSEC;
686 timeout.it_value.tv_usec = tm - (timeout.it_value.tv_sec *
687 MICROSEC);
688 } else {
689 timeout.it_value.tv_sec = 0;
690 timeout.it_value.tv_usec = tm;
691 }
692 timeout_is_good = B_TRUE;
693 if (setitimer(ITIMER_REAL, &timeout, NULL) != 0) {
694 ret = -1;
695 goto out;
696 }
697
698 if (sendto(send_sd, buf, 0, 0, (struct sockaddr *)&dst_addr,
699 addr_len) != 0) {
700 ret = -1;
701 goto out;
702 }
703 if ((ret = get_lport(send_sd, ¶m->lport)) != 0)
704 goto out;
705
706 /*
707 * If the server app is listening, we should not get back a
708 * response. So if wait_icmp_reply() returns, either there
709 * is an error or we get back something.
710 */
711 (void) wait_icmp_reply(af, recv_sd, &dst_addr, param);
712 ret = -1;
713
714 out:
715 (void) close(send_sd);
716 (void) close(recv_sd);
717 return (ret);
718 }
719
720 /*
721 * Size (in uint32_t) of the ping packet to be sent to server. It includes
722 * a cookie (random number) + the target port. The cookie and port are used
723 * for matching ping request since there can be many such ping packets sent
724 * to different servers from the same source address and using the same VIP.
725 * The last two bytes are for padding.
726 *
727 */
728 #define PING_PKT_LEN \
729 ((ICMP_MINLEN + 2 * sizeof (uint32_t)) / sizeof (uint32_t))
730
731 /*
732 * Try to get a random number from the pseudo random number device
733 * /dev/urandom. If there is any error, return (uint32_t)gethrtime()
734 * as a back up.
735 */
736 static uint32_t
get_random(void)737 get_random(void)
738 {
739 int fd;
740 uint32_t num;
741
742 if ((fd = open("/dev/urandom", O_RDONLY)) == -1)
743 return ((uint32_t)gethrtime());
744
745 if (read(fd, &num, sizeof (num)) != sizeof (num))
746 num = ((uint32_t)gethrtime());
747
748 (void) close(fd);
749 return (num);
750 }
751
752 /*
753 * Use ICMP(v4/v6) echo request to check if the peer server machine is
754 * reachable. Send a echo request and expect to get back a echo reply.
755 */
756 static int
ping_query(probe_param_t * param)757 ping_query(probe_param_t *param)
758 {
759 int ret;
760 int sd, af;
761 struct sockaddr_storage dst_addr;
762 socklen_t dst_addr_len;
763 void *next_hop;
764 hrtime_t start, end;
765 uint32_t rtt;
766 uint32_t buf[PING_PKT_LEN];
767 struct icmp *icmph;
768
769 ret = 0;
770 next_hop = NULL;
771
772 af = set_sockaddr(&dst_addr, &dst_addr_len, &next_hop, param);
773
774 if ((sd = socket(af, SOCK_RAW, (af == AF_INET) ? IPPROTO_ICMP :
775 IPPROTO_ICMPV6)) == -1) {
776 return (-1);
777 }
778
779 /* DSR mode, need to set the next hop */
780 if (next_hop != NULL) {
781 if (af == AF_INET) {
782 if (setsockopt(sd, IPPROTO_IP, IP_NEXTHOP, next_hop,
783 sizeof (ipaddr_t)) < 0) {
784 ret = -1;
785 goto out;
786 }
787 } else {
788 if (setsockopt(sd, IPPROTO_IPV6, IPV6_NEXTHOP,
789 next_hop, sizeof (struct sockaddr_in6)) < 0) {
790 ret = -1;
791 goto out;
792 }
793 }
794 }
795
796 bzero(buf, sizeof (buf));
797 icmph = (struct icmp *)buf;
798 icmph->icmp_type = af == AF_INET ? ICMP_ECHO : ICMP6_ECHO_REQUEST;
799 icmph->icmp_code = 0;
800 icmph->icmp_cksum = 0;
801 icmph->icmp_id = htons(gethrtime() % USHRT_MAX);
802 icmph->icmp_seq = htons(gethrtime() % USHRT_MAX);
803
804 param->echo_cookie = get_random();
805 bcopy(¶m->echo_cookie, icmph->icmp_data,
806 sizeof (param->echo_cookie));
807 bcopy(¶m->port, icmph->icmp_data + sizeof (param->echo_cookie),
808 sizeof (param->port));
809 icmph->icmp_cksum = in_cksum((ushort_t *)buf, sizeof (buf));
810 param->echo_id = icmph->icmp_id;
811 param->echo_seq = icmph->icmp_seq;
812
813 timeout_is_good = B_FALSE;
814 (void) alarm(param->timeout);
815 start = gethrtime();
816 if (sendto(sd, buf, sizeof (buf), 0, (struct sockaddr *)&dst_addr,
817 dst_addr_len) != sizeof (buf)) {
818 ret = -1;
819 goto out;
820 }
821 if (wait_icmp_reply(af, sd, &dst_addr, param) != 0) {
822 ret = -1;
823 goto out;
824 }
825 end = gethrtime();
826
827 rtt = (end - start) / (NANOSEC / MICROSEC);
828 if (rtt == 0)
829 rtt = 1;
830 (void) printf("%u", rtt);
831
832 out:
833 (void) close(sd);
834 return (ret);
835 }
836
837 int
main(int argc,char * argv[])838 main(int argc, char *argv[])
839 {
840 probe_param_t param;
841 int ret;
842
843 /* ilbd should pass in PROG_ARGC parameters. */
844 if (argc != PROG_ARGC) {
845 (void) printf("-1");
846 return (-1);
847 }
848
849 if (signal(SIGALRM, probe_exit) == SIG_ERR) {
850 (void) printf("-1");
851 return (-1);
852 }
853
854 if (!parse_probe_param(argv, ¶m)) {
855 (void) printf("-1");
856 return (-1);
857 }
858
859 switch (param.probe) {
860 case ping_probe:
861 ret = ping_query(¶m);
862 break;
863 case tcp_probe:
864 ret = tcp_query(¶m);
865 break;
866 case udp_probe:
867 ret = udp_query(¶m);
868 break;
869 }
870
871 if (ret == -1)
872 (void) printf("-1");
873
874 return (ret);
875 }
876