xref: /titanic_52/usr/src/lib/libslp/clib/slp_net.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Module for all network transactions. SLP messages can be multicast,
31  * unicast over UDP, or unicast over TCP; this module provides routines
32  * for all three. TCP transactions are handled by a single dedicated
33  * thread, while multicast and UDP unicast messages are sent by the
34  * calling thread.
35  *
36  * slp_uc_tcp_send:	enqueues a message on the TCP transaction thread's
37  *				queue.
38  * slp_tcp_wait:	blocks until all TCP-enqueued transactions for
39  *				a given SLP handle are complete
40  * slp_uc_udp_send:	unicasts a message using a datagram
41  * slp_mc_send:		multicasts a message
42  */
43 
44 /*
45  * todo: correct multicast interfaces;
46  */
47 
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <syslog.h>
51 #include <sys/types.h>
52 #include <sys/socket.h>
53 #include <arpa/inet.h>
54 #include <errno.h>
55 #include <unistd.h>
56 #include <time.h>
57 #include <string.h>
58 #include <slp-internal.h>
59 #include <slp_net_utils.h>
60 
61 /*
62  * TCP thread particulars
63  */
64 static SLPBoolean tcp_thr_running = SLP_FALSE;
65 static slp_queue_t *tcp_q;
66 static int tcp_sockfd;
67 static mutex_t start_lock = DEFAULTMUTEX;
68 
69 /* Used to pass arguments to the TCP thread, via 'tcp_q' */
70 struct tcp_rqst {
71 	slp_handle_impl_t *hp;
72 	slp_target_t *target;
73 	const char *scopes;
74 	SLPBoolean free_target;
75 	unsigned short xid;
76 };
77 
78 /* Used to keep track of broadcast interfaces */
79 struct bc_ifs {
80 	struct sockaddr_in *sin;
81 	int num_ifs;
82 };
83 
84 /*
85  * Private utility routines
86  */
87 static SLPError start_tcp_thr();
88 static void tcp_thread();
89 static SLPError make_header(slp_handle_impl_t *, char *, const char *);
90 static void udp_make_msghdr(struct sockaddr_in *, struct iovec *, int,
91 			    struct msghdr *);
92 static SLPError make_mc_target(slp_handle_impl_t *,
93 				struct sockaddr_in *, char *,
94 				struct pollfd **, nfds_t *, struct bc_ifs *);
95 static SLPError make_bc_target(slp_handle_impl_t *, struct in_addr *,
96 				int, struct bc_ifs *);
97 static SLPError mc_sendmsg(struct pollfd *, struct msghdr *,
98 				struct bc_ifs *);
99 static SLPError bc_sendmsg(struct pollfd *, struct msghdr *, struct bc_ifs *);
100 static void mc_recvmsg(struct pollfd *, nfds_t, slp_handle_impl_t *,
101 			const char *, char *, void **, unsigned long long,
102 			unsigned long long, unsigned long long *,
103 			int *, int *, int);
104 static void free_pfds(struct pollfd *, nfds_t);
105 static void tcp_handoff(slp_handle_impl_t *, const char *,
106 			struct sockaddr_in *, unsigned short);
107 static unsigned long long now_millis();
108 static int wait_for_response(unsigned long long, int *,
109 				unsigned long long, unsigned long long *,
110 				struct pollfd [], nfds_t);
111 static int add2pr_list(slp_msg_t *, struct sockaddr_in *, void **);
112 static void free_pr_node(void *, VISIT, int, void *);
113 
114 /*
115  * Unicasts a message using TCP. 'target' is a targets list
116  * containing DAs corresponding to 'scopes'. 'free_target' directs
117  * tcp_thread to free the target list when finished; this is useful
118  * when a target needs to be synthesised by another message thread
119  * (such as slp_mc_send for tcp_handoffs). If this message is a
120  * retransmission due to a large reply, 'xid' should be the same as for
121  * the original message.
122  *
123  * This call returns as soon as the message has been enqueued on 'tcp_q'.
124  * Callers interested in knowing when the transaction has completed
125  * should call slp_tcp_wait with the same SLP handle.
126  */
127 void slp_uc_tcp_send(slp_handle_impl_t *hp, slp_target_t *target,
128 			const char *scopes, SLPBoolean free_target,
129 			unsigned short xid) {
130 	struct tcp_rqst *rqst;
131 
132 	/* initialize TCP vars in handle, if necessary */
133 	if (!hp->tcp_lock) {
134 		if (!(hp->tcp_lock = malloc(sizeof (*(hp->tcp_lock))))) {
135 			slp_err(LOG_CRIT, 0, "slp_uc_tcp_send",
136 				"out of memory");
137 			return;
138 		}
139 		(void) mutex_init(hp->tcp_lock, NULL, NULL);
140 	}
141 	if (!hp->tcp_wait) {
142 		if (!(hp->tcp_wait = malloc(sizeof (*(hp->tcp_wait))))) {
143 			slp_err(LOG_CRIT, 0, "slp_uc_tcp_send",
144 				"out of memory");
145 			return;
146 		}
147 		(void) cond_init(hp->tcp_wait, NULL, NULL);
148 	}
149 	(void) mutex_lock(hp->tcp_lock);
150 	(hp->tcp_ref_cnt)++;
151 	(void) mutex_unlock(hp->tcp_lock);
152 
153 	/* start TCP thread, if not already running */
154 	if (!tcp_thr_running)
155 		if (start_tcp_thr() != SLP_OK)
156 			return;
157 
158 	/* create and enqueue the request */
159 	if (!(rqst = malloc(sizeof (*rqst)))) {
160 		slp_err(LOG_CRIT, 0, "slp_uc_tcp_send", "out of memory");
161 		return;
162 	}
163 	rqst->hp = hp;
164 	rqst->target = target;
165 	rqst->scopes = scopes;
166 	rqst->free_target = free_target;
167 	rqst->xid = xid;
168 	(void) slp_enqueue(tcp_q, rqst);
169 }
170 
171 /*
172  * Wait for TCP to complete, if a transaction corresponding to this
173  * SLP handle is pending. If none are pending, returns immediately.
174  */
175 void slp_tcp_wait(slp_handle_impl_t *hp) {
176 	(void) mutex_lock(hp->tcp_lock);
177 	while (hp->tcp_ref_cnt > 0)
178 		(void) cond_wait(hp->tcp_wait, hp->tcp_lock);
179 	(void) mutex_unlock(hp->tcp_lock);
180 }
181 
182 /*
183  * Unicasts a message using datagrams. 'target' should contain a
184  * list of DAs corresponding to 'scopes'.
185  *
186  * This call does not return until the transaction has completed. It
187  * may handoff a message to the TCP thread if necessary, but will not
188  * wait for that transaction to complete. Hence callers should always
189  * invoke slp_tcp_wait before cleaning up resources.
190  */
191 void slp_uc_udp_send(slp_handle_impl_t *hp, slp_target_t *target,
192 			const char *scopes) {
193 	slp_target_t *ctarg;
194 	struct sockaddr_in *sin;
195 	struct msghdr msg[1];
196 	char header[SLP_DEFAULT_SENDMTU];
197 	int sockfd;
198 	size_t mtu;
199 	SLPBoolean use_tcp;
200 	struct pollfd pfd[1];
201 	unsigned long long now, sent;
202 	char *reply = NULL;
203 
204 	use_tcp = SLP_FALSE;
205 	/* build the header and iovec */
206 	if (make_header(hp, header, scopes) != SLP_OK)
207 		return;
208 
209 	mtu = slp_get_mtu();
210 
211 	/* walk targets list until we either succeed or run out of targets */
212 	for (ctarg = target; ctarg; ctarg = slp_next_failover(ctarg)) {
213 		char *state;
214 		const char *timeouts;
215 		int timeout;
216 
217 		sin = (struct sockaddr_in *)slp_get_target_sin(ctarg);
218 
219 		/* make the socket, msghdr and reply buf */
220 		if ((sockfd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
221 			slp_err(LOG_CRIT, 0, "slp_uc_udp_send",
222 				"could not create socket: %s",
223 				strerror(errno));
224 			return;
225 		}
226 		pfd[0].fd = sockfd;
227 		pfd[0].events = POLLRDNORM;
228 
229 		udp_make_msghdr(sin, hp->msg.iov, hp->msg.iovlen, msg);
230 		if (!reply && !(reply = malloc(mtu))) {
231 			(void) close(sockfd);
232 			slp_err(LOG_CRIT, 0, "slp_uc_udp_send",
233 				"out of memory");
234 			return;
235 		}
236 
237 		/* timeout loop */
238 		timeouts = SLPGetProperty(SLP_CONFIG_DATAGRAMTIMEOUTS);
239 		state = (char *)timeouts;
240 		for (timeout = slp_get_next_onlist(&state);
241 			timeout != -1 &&
242 			!hp->cancel;
243 			timeout = slp_get_next_onlist(&state)) {
244 			int pollerr;
245 
246 			if (sendmsg(sockfd, msg, 0) < 0) {
247 				slp_err(LOG_CRIT, 0, "slp_uc_udp_send",
248 					"sendmsg failed: %s", strerror(errno));
249 				continue; /* try again */
250 			}
251 			sent = now_millis();
252 
253 			pollerr = wait_for_response(
254 				0, &timeout, sent, &now, pfd, 1);
255 
256 			if (pollerr == 0)
257 				/* timeout */
258 				continue;
259 			if (pollerr < 0)
260 				break;
261 
262 			/* only using one fd, so no need to scan pfd */
263 			if (recvfrom(sockfd, reply, mtu, 0, NULL, NULL) < 0) {
264 				/* if reply overflows, hand off to TCP */
265 				if (errno == ENOMEM) {
266 					free(reply); reply = NULL;
267 					use_tcp = SLP_TRUE;
268 					break;
269 				}
270 				slp_err(LOG_CRIT, 0, "slp_uc_udp_send",
271 					"recvfrom failed: %s",
272 					strerror(errno));
273 			} else {
274 				/* success -- but check error code */
275 				slp_proto_err errcode = slp_get_errcode(reply);
276 				switch (errcode) {
277 				case SLP_MSG_PARSE_ERROR:
278 				case SLP_VER_NOT_SUPPORTED:
279 				case SLP_SICK_DA:
280 				case SLP_DA_BUSY_NOW:
281 				case SLP_OPTION_NOT_UNDERSTOOD:
282 				case SLP_RQST_NOT_SUPPORTED: {
283 				    char addrbuf[INET6_ADDRSTRLEN], *cname;
284 
285 				    cname = slp_ntop(addrbuf, INET6_ADDRSTRLEN,
286 					(const void *) &(sin->sin_addr));
287 				    cname = cname ? cname : "[invalid addr]";
288 
289 				    /* drop it */
290 				    slp_err(LOG_INFO, 0,
291 				"DA %s returned error code %d; dropping reply",
292 							cname, errcode);
293 				    free(reply); reply = NULL;
294 				}
295 				}
296 			}
297 			break;
298 		}
299 		if (timeout != -1)
300 			/* success or cancel */
301 			break;
302 		/* else failure */
303 		slp_mark_target_failed(ctarg);
304 	}
305 	(void) close(sockfd);
306 	if (!ctarg || hp->cancel) {
307 		/* failed all attempts or canceled by consumer */
308 		if (reply) free(reply);
309 		return;
310 	}
311 	/* success or tcp handoff */
312 	if (reply) {
313 		if (slp_get_overflow(reply))
314 			use_tcp = SLP_TRUE;
315 		else
316 			slp_mark_target_used(ctarg);
317 		(void) slp_enqueue(hp->q, reply);
318 	}
319 	if (use_tcp)
320 		slp_uc_tcp_send(
321 			hp, ctarg, scopes, SLP_FALSE, slp_get_xid(header));
322 }
323 
324 /*
325  * Multicasts (or broadcasts) a message, using multicast convergance
326  * to collect results. Large replies will cause the message to be handed
327  * off to the TCP thread.
328  *
329  * This call does not return until the transaction is complete. It does
330  * not, however, wait until pending TCP transactions are complete, so
331  * callers should always invoke slp_tcp_wait before cleaning up any
332  * resources.
333  */
334 void slp_mc_send(slp_handle_impl_t *hp, const char *scopes) {
335 	char header[SLP_DEFAULT_SENDMTU], *state;
336 	const char *timeouts;
337 	struct sockaddr_in sin[1];
338 	struct msghdr msg[1];
339 	int maxwait, timeout, noresults, anyresults;
340 	unsigned long long final_to, now, sent;
341 	struct pollfd *pfd;
342 	nfds_t nfds;
343 	void *collator = NULL;
344 	struct bc_ifs bcifs;
345 
346 	/* build the header and iovec */
347 	if (make_header(hp, header, scopes) != SLP_OK)
348 		return;
349 
350 	(void) memset(sin, 0, sizeof (sin));
351 	if (make_mc_target(hp, sin, header, &pfd, &nfds, &bcifs) != SLP_OK)
352 		return;
353 	udp_make_msghdr(sin, hp->msg.iov, hp->msg.iovlen, msg);
354 
355 	maxwait = slp_get_mcmaxwait();
356 	maxwait = maxwait ? maxwait : SLP_DEFAULT_MAXWAIT;
357 
358 	/* set the final timeout */
359 	now = now_millis();
360 	final_to = now + maxwait;
361 
362 	/* timeout prep and loop */
363 	timeouts = SLPGetProperty(SLP_CONFIG_MULTICASTTIMEOUTS);
364 	state = (char *)timeouts;
365 	noresults = anyresults = 0;
366 
367 	for (timeout = slp_get_next_onlist(&state);
368 		timeout != -1 &&
369 		now < final_to &&
370 		noresults < 2 &&
371 		!hp->cancel;
372 		timeout = slp_get_next_onlist(&state)) {
373 
374 		/* send msg */
375 		if (mc_sendmsg(pfd, msg, &bcifs) != SLP_OK) {
376 			continue; /* try again */
377 		}
378 		sent = now_millis();
379 
380 		/* receive results */
381 		mc_recvmsg(pfd, nfds, hp, scopes, header, &collator, final_to,
382 			sent, &now, &noresults, &anyresults, timeout);
383 
384 		if (!anyresults)
385 			noresults++;
386 		anyresults = 0;
387 	}
388 	/* clean up PR list collator */
389 	if (collator)
390 		slp_twalk(collator, free_pr_node, 0, NULL);
391 
392 	/* close all fds in pfd */
393 	free_pfds(pfd, nfds);
394 
395 	/* free broadcast addrs, if used */
396 	if (bcifs.sin) free(bcifs.sin);
397 }
398 
399 /*
400  * Private net helper routines
401  */
402 
403 /*
404  * Starts the tcp_thread and allocates any necessary resources.
405  */
406 static SLPError start_tcp_thr() {
407 	SLPError err;
408 	int terr;
409 
410 	(void) mutex_lock(&start_lock);
411 	/* make sure someone else hasn't already intialized the thread */
412 	if (tcp_thr_running) {
413 		(void) mutex_unlock(&start_lock);
414 		return (SLP_OK);
415 	}
416 
417 	/* create the tcp queue */
418 	if (!(tcp_q = slp_new_queue(&err))) {
419 		(void) mutex_unlock(&start_lock);
420 		return (err);
421 	}
422 
423 	/* start the tcp thread */
424 	if ((terr = thr_create(0, NULL, (void *(*)(void *)) tcp_thread,
425 				NULL, 0, NULL)) != 0) {
426 	    slp_err(LOG_CRIT, 0, "start_tcp_thr",
427 		    "could not start thread: %s", strerror(terr));
428 	    (void) mutex_unlock(&start_lock);
429 	    return (SLP_INTERNAL_SYSTEM_ERROR);
430 	}
431 
432 	tcp_thr_running = SLP_TRUE;
433 	(void) mutex_unlock(&start_lock);
434 	return (SLP_OK);
435 }
436 
437 /*
438  * Called by the tcp thread to shut itself down. The queue must be
439  * empty (and should be, since the tcp thread will only shut itself
440  * down if nothing has been put in its queue for the timeout period).
441  */
442 static void end_tcp_thr() {
443 	(void) mutex_lock(&start_lock);
444 
445 	tcp_thr_running = SLP_FALSE;
446 	slp_destroy_queue(tcp_q);
447 
448 	(void) mutex_unlock(&start_lock);
449 	thr_exit(NULL);
450 }
451 
452 /*
453  * The thread of control for the TCP thread. This sits in a loop, waiting
454  * on 'tcp_q' for new messages. If no message appear after 30 seconds,
455  * this thread cleans up resources and shuts itself down.
456  */
457 static void tcp_thread() {
458 	struct tcp_rqst *rqst;
459 	char *reply, header[SLP_DEFAULT_SENDMTU];
460 	timestruc_t to[1];
461 	to->tv_nsec = 0;
462 
463 	for (;;) {
464 		slp_target_t *ctarg, *targets;
465 		slp_handle_impl_t *hp;
466 		const char *scopes;
467 		struct sockaddr_in *sin;
468 		SLPBoolean free_target, etimed;
469 		unsigned short xid;
470 
471 		/* set idle shutdown timeout */
472 		to->tv_sec = time(NULL) + 30;
473 		/* get the next request from the tcp queue */
474 		if (!(rqst = slp_dequeue_timed(tcp_q, to, &etimed))) {
475 			if (!etimed)
476 				continue;
477 			else
478 				end_tcp_thr();
479 		}
480 
481 		hp = rqst->hp;
482 		scopes = rqst->scopes;
483 		targets = rqst->target;
484 		free_target = rqst->free_target;
485 		xid = rqst->xid;
486 		free(rqst);
487 		reply = NULL;
488 
489 		/* Check if this handle has been cancelled */
490 		if (hp->cancel)
491 			goto transaction_complete;
492 
493 		/* build the header and iovec */
494 		if (make_header(hp, header, scopes) != SLP_OK) {
495 			if (free_target) slp_free_target(targets);
496 			continue;
497 		}
498 		if (xid)
499 			slp_set_xid(header, xid);
500 
501 	/* walk targets list until we either succeed or run out of targets */
502 		for (ctarg = targets;
503 			ctarg && !hp->cancel;
504 			ctarg = slp_next_failover(ctarg)) {
505 
506 			sin = (struct sockaddr_in *)slp_get_target_sin(ctarg);
507 
508 			/* create the socket */
509 			if ((tcp_sockfd = socket(AF_INET, SOCK_STREAM, 0))
510 			    < 0) {
511 				slp_err(LOG_CRIT, 0, "tcp_thread",
512 					"could not create socket: %s",
513 					strerror(errno));
514 				ctarg = NULL;
515 				break;
516 			}
517 
518 			/* connect to target */
519 			if (connect(tcp_sockfd, (struct sockaddr *)sin,
520 				    sizeof (*sin)) < 0) {
521 				slp_err(LOG_INFO, 0, "tcp_thread",
522 					"could not connect, error = %s",
523 					strerror(errno));
524 				goto failed;
525 			}
526 
527 			/* send the message and read the reply */
528 			if (writev(tcp_sockfd, hp->msg.iov, hp->msg.iovlen)
529 			    == -1) {
530 				slp_err(LOG_INFO, 0, "tcp_thread",
531 					"could not send, error = %s",
532 					strerror(errno));
533 				goto failed;
534 			}
535 
536 			/* if success, break out of failover loop */
537 			if ((slp_tcp_read(tcp_sockfd, &reply)) == SLP_OK) {
538 				(void) close(tcp_sockfd);
539 				break;
540 			}
541 
542 		/* else if timed out, mark target failed and try next one */
543 failed:
544 			(void) close(tcp_sockfd);
545 			slp_mark_target_failed(ctarg);
546 		}
547 
548 		if (hp->cancel) {
549 			if (reply) {
550 				free(reply);
551 			}
552 		} else if (ctarg) {
553 			/* success */
554 			(void) slp_enqueue(hp->q, reply);
555 			slp_mark_target_used(ctarg);
556 		}
557 
558 	/* If all TCP transactions on this handle are complete, send notice */
559 transaction_complete:
560 		(void) mutex_lock(hp->tcp_lock);
561 		if (--(hp->tcp_ref_cnt) == 0)
562 			(void) cond_signal(hp->tcp_wait);
563 		(void) mutex_unlock(hp->tcp_lock);
564 
565 		if (free_target)
566 			slp_free_target(targets);
567 	}
568 }
569 
570 /*
571  * Performs a full read for TCP replies, dynamically allocating a
572  * buffer large enough to hold the reply.
573  */
574 SLPError slp_tcp_read(int sockfd, char **reply) {
575 	char lenbuf[5], *p;
576 	size_t nleft;
577 	ssize_t nread;
578 	unsigned int len;
579 
580 	/* find out how long the reply is */
581 	nleft = 5;
582 	p = lenbuf;
583 	while (nleft != 0) {
584 		if ((nread = read(sockfd, p, 5)) < 0) {
585 			if (errno == EINTR)
586 				nread = 0;
587 			else
588 				return (SLP_NETWORK_ERROR);
589 		} else if (nread == 0)
590 			/* shouldn't hit EOF here */
591 			return (SLP_NETWORK_ERROR);
592 		nleft -= nread;
593 		p += nread;
594 	}
595 
596 	len = slp_get_length(lenbuf);
597 
598 	/* allocate space for the reply, and copy in what we've already read */
599 	/* This buffer gets freed by a msg-specific unpacking routine later */
600 	if (!(*reply = malloc(len))) {
601 		slp_err(LOG_CRIT, 0, "tcp_read", "out of memory");
602 		return (SLP_MEMORY_ALLOC_FAILED);
603 	}
604 	(void) memcpy(*reply, lenbuf, 5);
605 
606 	/* read the rest of the message */
607 	nleft = len - 5;
608 	p = *reply + 5;
609 	while (nleft != 0) {
610 		if ((nread = read(sockfd, p, nleft)) < 0) {
611 			if (errno == EINTR)
612 				nread = 0;
613 			else {
614 				free(*reply);
615 				return (SLP_NETWORK_ERROR);
616 			}
617 		} else if (nread == 0)
618 			/*
619 			 * shouldn't hit EOF here, but perhaps we've
620 			 * gotten something useful, so return OK.
621 			 */
622 			return (SLP_OK);
623 
624 		nleft -= nread;
625 		p += nread;
626 	}
627 
628 	return (SLP_OK);
629 }
630 
631 /*
632  * Lays in a SLP header for this message into the scatter / gather
633  * array 'iov'. 'header' is the buffer used to contain the header,
634  * and must contain enough space. 'scopes' should contain a string
635  * with the scopes to be used for this message.
636  */
637 static SLPError make_header(slp_handle_impl_t *hp, char *header,
638 			    const char *scopes) {
639 	SLPError err;
640 	size_t msgLen, off;
641 	int i;
642 	size_t mtu;
643 	unsigned short slen = (unsigned short)strlen(scopes);
644 
645 	mtu = slp_get_mtu();
646 	msgLen = slp_hdrlang_length(hp);
647 	hp->msg.iov[0].iov_base = header;
648 	hp->msg.iov[0].iov_len = msgLen;	/* now the length of the hdr */
649 
650 	/* use the remaining buffer in header for the prlist */
651 	hp->msg.prlist->iov_base = header + msgLen;
652 
653 	for (i = 1; i < hp->msg.iovlen; i++) {
654 		msgLen += hp->msg.iov[i].iov_len;
655 	}
656 	msgLen += slen;
657 
658 	off = 0;
659 	if ((err = slp_add_header(hp->locale, header, mtu,
660 					hp->fid, msgLen, &off)) != SLP_OK)
661 		return (err);
662 
663 	/* start out with empty prlist */
664 	hp->msg.prlist->iov_len = 0;
665 
666 	/* store the scope string len into the space provided by the caller */
667 	off = 0;
668 	if ((err = slp_add_sht((char *)hp->msg.scopeslen.iov_base,
669 				2, slen, &off)) != SLP_OK) {
670 		return (err);
671 	}
672 	hp->msg.scopes->iov_base = (caddr_t)scopes;
673 	hp->msg.scopes->iov_len = slen;
674 
675 	return (SLP_OK);
676 }
677 
678 /*
679  * Populates a struct msghdr suitable for use with sendmsg.
680  */
681 static void udp_make_msghdr(struct sockaddr_in *sin, struct iovec *iov,
682 			    int iovlen, struct msghdr *msg) {
683 	msg->msg_name = (caddr_t)sin;
684 	msg->msg_namelen = 16;
685 	msg->msg_iov = iov;
686 	msg->msg_iovlen = iovlen;
687 	msg->msg_accrights = NULL;
688 	msg->msg_accrightslen = 0;
689 }
690 
691 /*
692  * Sets the address on 'sin', sets the flag in the message header,
693  * and creates an array of pollfds for all interfaces we need to
694  * use. If we need to use only broadcast, and net.slp.interfaces
695  * is set, fills bcifs with an array of subnet broadcast addresses
696  * to which we should send. Returns err != SLP_OK only on catastrophic
697  * error.
698  */
699 static SLPError make_mc_target(slp_handle_impl_t *hp,
700 				struct sockaddr_in *sin, char *header,
701 				struct pollfd **fds, nfds_t *nfds,
702 				struct bc_ifs *bcifs) {
703 
704 	unsigned char ttl = slp_get_multicastTTL();
705 	char *ifs_string;
706 	SLPBoolean have_valid_if = SLP_FALSE;
707 	SLPBoolean use_broadcast = slp_get_usebroadcast();
708 	int fd, i, num_givenifs;
709 	struct in_addr *given_ifs = NULL;
710 	nfds_t nfd_i;
711 
712 	sin->sin_port = htons(SLP_PORT);
713 	sin->sin_family = AF_INET;
714 	slp_set_mcast(header);
715 
716 	/* Get the desired multicast interfaces, if set */
717 	bcifs->sin = NULL;
718 	*fds = NULL;
719 	if ((ifs_string = (char *)SLPGetProperty(
720 		SLP_CONFIG_INTERFACES)) != NULL && *ifs_string) {
721 
722 		char *p, *tstate;
723 
724 		/* count the number of IFs given */
725 		p = strchr(ifs_string, ',');
726 		for (num_givenifs = 1; p; num_givenifs++) {
727 			p = strchr(p + 1, ',');
728 		}
729 
730 		/* copy the given IFs into an array for easier processing */
731 		if (!(given_ifs = calloc(num_givenifs, sizeof (*given_ifs)))) {
732 			slp_err(LOG_CRIT, 0, "make_mc_target",
733 						"out of memory");
734 			return (SLP_MEMORY_ALLOC_FAILED);
735 		}
736 
737 		i = 0;
738 		/* strtok_r will destructively modify, so make a copy first */
739 		if (!(ifs_string = strdup(ifs_string))) {
740 			slp_err(LOG_CRIT, 0, "make_mc_target",
741 						"out of memory");
742 			free(given_ifs);
743 			return (SLP_MEMORY_ALLOC_FAILED);
744 		}
745 		for (
746 			p = strtok_r(ifs_string, ",", &tstate);
747 			p;
748 			p = strtok_r(NULL, ",", &tstate)) {
749 
750 			if (slp_pton(p, &(given_ifs[i])) < 1) {
751 				/* skip */
752 				num_givenifs--;
753 				continue;
754 			}
755 			i++;
756 		}
757 		*nfds = num_givenifs;
758 		free(ifs_string);
759 
760 		/* allocate a pollfd array for all interfaces */
761 		if (!(*fds = calloc(num_givenifs, sizeof (**fds)))) {
762 			slp_err(LOG_CRIT, 0, "make_mc_target",
763 						"out of memory");
764 			free(ifs_string);
765 			free(given_ifs);
766 			return (SLP_MEMORY_ALLOC_FAILED);
767 		}
768 
769 		/* lay the given interfaces into the pollfd array */
770 		for (i = 0; i < num_givenifs; i++) {
771 
772 			/* create a socket to bind to this interface */
773 			if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
774 				slp_err(LOG_CRIT, 0, "make_mc_target",
775 						"could not create socket: %s",
776 						strerror(errno));
777 				free_pfds(*fds, *nfds);
778 				return (SLP_INTERNAL_SYSTEM_ERROR);
779 			}
780 
781 			/* fill in the pollfd structure */
782 			(*fds)[i].fd = fd;
783 			(*fds)[i].events |= POLLRDNORM;
784 
785 			if (use_broadcast) {
786 				struct sockaddr_in bcsin[1];
787 
788 				(void) memcpy(
789 					&(bcsin->sin_addr), &(given_ifs[i]),
790 					sizeof (bcsin->sin_addr));
791 				bcsin->sin_family = AF_INET;
792 				bcsin->sin_port = 0;
793 
794 				/* bind fd to interface */
795 				if (bind(fd, (struct sockaddr *)bcsin,
796 						sizeof (*bcsin)) == 0) {
797 					continue;
798 				}
799 				/* else fallthru to default (multicast) */
800 				slp_err(LOG_INFO, 0, "make_mc_target",
801 				"could not set broadcast interface: %s",
802 					strerror(errno));
803 			}
804 			/* else use multicast */
805 			if (setsockopt(fd, IPPROTO_IP, IP_MULTICAST_IF,
806 					&(given_ifs[i]), sizeof (given_ifs[i]))
807 					< 0) {
808 
809 					slp_err(LOG_INFO, 0, "make_mc_target",
810 				"could not set multicast interface: %s",
811 							strerror(errno));
812 					continue;
813 			}
814 
815 			have_valid_if = SLP_TRUE;
816 		}
817 
818 		if (use_broadcast) {
819 		    SLPError err;
820 
821 		    if ((err = make_bc_target(
822 					hp, given_ifs, num_givenifs, bcifs))
823 			!= SLP_OK) {
824 
825 			if (err == SLP_MEMORY_ALLOC_FAILED) {
826 			    /* the only thing which is really a showstopper */
827 			    return (err);
828 			}
829 
830 			/* else no valid interfaces */
831 			have_valid_if = SLP_FALSE;
832 		    }
833 		}
834 		free(given_ifs);
835 	}
836 
837 	if (!have_valid_if) {
838 		if (*fds && !have_valid_if) {
839 			/* couldn't process net.slp.interfaces property */
840 			free(*fds);
841 		}
842 
843 		/* bind to default interface */
844 		if (!(*fds = calloc(1, sizeof (**fds)))) {
845 			slp_err(LOG_CRIT, 0, "make_mc_target",
846 						"out of memory");
847 			return (SLP_MEMORY_ALLOC_FAILED);
848 		}
849 
850 		if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
851 			slp_err(LOG_CRIT, 0, "make_mc_target",
852 						"could not create socket: %s",
853 						strerror(errno));
854 			free(*fds);
855 			return (SLP_INTERNAL_SYSTEM_ERROR);
856 		}
857 
858 		(**fds).fd = fd;
859 		(**fds).events |= POLLRDNORM;
860 		*nfds = 1;
861 	}
862 
863 	/* set required options on all configured fds */
864 	for (nfd_i = 0; nfd_i < *nfds; nfd_i++) {
865 		if (use_broadcast) {
866 			const int on = 1;
867 			if (setsockopt((*fds)[nfd_i].fd, SOL_SOCKET,
868 					SO_BROADCAST,
869 					(void *) &on, sizeof (on)) < 0) {
870 				slp_err(LOG_CRIT, 0, "make_mc_target",
871 					"could not enable broadcast: %s",
872 					strerror(errno));
873 			}
874 		} else {
875 			if (setsockopt((*fds)[nfd_i].fd, IPPROTO_IP,
876 					IP_MULTICAST_TTL, &ttl, 1) < 0) {
877 				slp_err(LOG_CRIT, 0, "make_mc_target",
878 					    "could not set multicast TTL: %s",
879 					    strerror(errno));
880 			}
881 		}
882 	}
883 
884 	if (use_broadcast) {
885 	    sin->sin_addr.s_addr = INADDR_BROADCAST;
886 	} else {
887 		sin->sin_addr.s_addr = SLP_MULTICAST_ADDRESS;
888 	}
889 
890 	return (SLP_OK);
891 }
892 
893 /*
894  * Obtains the subnet broadcast address for each interface specified
895  * in net.slp.interfaces, and fill bcifs->sin with an array of these
896  * addresses.
897  */
898 static SLPError make_bc_target(slp_handle_impl_t *hp,
899 				struct in_addr *given_ifs,
900 				int num_givenifs, struct bc_ifs *bcifs) {
901 	SLPError err;
902 	int i;
903 
904 	if ((err = slp_broadcast_addrs(hp, given_ifs, num_givenifs,
905 					&(bcifs->sin), &(bcifs->num_ifs)))
906 	    != SLP_OK) {
907 	    return (err);
908 	}
909 
910 	/* set SLP port on each sockaddr_in */
911 	for (i = 0; i < bcifs->num_ifs; i++) {
912 		bcifs->sin[i].sin_port = htons(SLP_PORT);
913 	}
914 
915 	return (SLP_OK);
916 }
917 
918 /*
919  * Sends msg on 1st fd in fds for multicast, or on all interfaces
920  * specified in net.slp.interfaces for broadcast. Returns SLP_OK if
921  * msg was sent successfully on at least one interface; otherwise
922  * returns SLP_NETWORK_ERROR if msg was not sent on any interfaces.
923  */
924 static SLPError mc_sendmsg(struct pollfd *fds,
925 				struct msghdr *msg, struct bc_ifs *bcifs) {
926 
927 	if (slp_get_usebroadcast()) {
928 	    char *ifs = (char *)SLPGetProperty(SLP_CONFIG_INTERFACES);
929 
930 	    /* hand off to broadcast-specific send function */
931 	    if (ifs && *ifs && bc_sendmsg(fds, msg, bcifs) == SLP_OK) {
932 		return (SLP_OK);
933 	    }
934 
935 		/*
936 		 * else  no ifs given, or bc_sendmsg failed, so send on
937 		 * general broadcast addr (255.255.255.255). This will
938 		 * cause the message to be sent on all interfaces. The
939 		 * address will have been set in make_mc_target.
940 		 */
941 	}
942 
943 	/*
944 	 * Send only on one interface -- let routing take care of
945 	 * sending the message everywhere it needs to go. Sending
946 	 * on more than one interface can cause nasty routing loops.
947 	 * Note that this approach doesn't work with partitioned
948 	 * networks.
949 	 */
950 	if (sendmsg(fds[0].fd, msg, 0) < 0) {
951 		slp_err(LOG_CRIT, 0, "mc_sendmsg",
952 			"sendmsg failed: %s", strerror(errno));
953 		return (SLP_NETWORK_ERROR);
954 	}
955 
956 	return (SLP_OK);
957 }
958 
959 /*
960  * Send msg to each subnet broadcast address in bcifs->sin. Note
961  * that we can send on any fd (regardless of which interface to which
962  * it is bound), since the kernel will take care of routing for us.
963  * Returns err != SLP_OK only if no message was sent on any interface.
964  */
965 static SLPError bc_sendmsg(struct pollfd *fds, struct msghdr *msg,
966 				struct bc_ifs *bcifs) {
967 	int i;
968 	SLPBoolean sent_one = SLP_FALSE;
969 
970 	for (i = 0; i < bcifs->num_ifs; i++) {
971 		msg->msg_name = (caddr_t)&(bcifs->sin[i]);
972 
973 		if (sendmsg(fds[0].fd, msg, 0) < 0) {
974 			slp_err(LOG_CRIT, 0, "bc_sendmsg",
975 				"sendmsg failed: %s", strerror(errno));
976 			continue;
977 		}
978 		sent_one = SLP_TRUE;
979 	}
980 	return (sent_one ? SLP_OK : SLP_NETWORK_ERROR);
981 }
982 
983 /*
984  * This is where the bulk of the multicast convergance algorithm resides.
985  * mc_recvmsg() waits for data to be ready on any fd in pfd, iterates
986  * through pfd and reads data from ready fd's. It also checks timeouts
987  * and user-cancels.
988  *
989  * Parameters:
990  *   pfd	IN	an array of pollfd structs containing fds to poll
991  *   nfds	IN	number of elements in pfd
992  *   hp		IN	SLPHandle from originating call
993  *   scopes	IN	scopes to use for this message
994  *   header	IN	the SLP message header for this message
995  *   collator	IN/OUT	btree collator for PR list
996  *   final_to	IN	final timeout
997  *   sent	IN	time when message was sent
998  *   now	IN/OUT	set to current time at beginning of convergance
999  *   noresults	OUT	set to 0 if any results are received
1000  *   anyresults	OUT	set to true if any results are received
1001  *   timeout	IN	time for this convergence iteration
1002  *
1003  * Returns only if an error has occured, or if either this retransmit
1004  * timeout or the final timeout has expired, or if hp->cancel becomes true.
1005  */
1006 static void mc_recvmsg(struct pollfd *pfd, nfds_t nfds, slp_handle_impl_t *hp,
1007 			const char *scopes, char *header, void **collator,
1008 			unsigned long long final_to,
1009 			unsigned long long sent,
1010 			unsigned long long *now,
1011 			int *noresults, int *anyresults, int timeout) {
1012 	char *reply = NULL;
1013 	nfds_t i;
1014 	struct sockaddr_in responder;
1015 	int pollerr;
1016 	socklen_t addrlen = sizeof (responder);
1017 	size_t mtu = slp_get_mtu();
1018 
1019 	for (; !hp->cancel; ) {
1020 	    /* wait until we can read something */
1021 	    pollerr = wait_for_response(
1022 				final_to, &timeout, sent, now, pfd, nfds);
1023 	    if (pollerr == 0)
1024 		/* timeout */
1025 		goto cleanup;
1026 	    if (pollerr < 0)
1027 		/* error */
1028 		goto cleanup;
1029 
1030 	    /* iterate through all fds to find one with data to read */
1031 	    for (i = 0; !hp->cancel && i < nfds; i++) {
1032 
1033 		if (pfd[i].fd < 0 ||
1034 		    !(pfd[i].revents & (POLLRDNORM | POLLERR))) {
1035 
1036 		    /* unused fd or unwanted event */
1037 		    continue;
1038 		}
1039 
1040 		/* alloc reply buffer */
1041 		if (!reply && !(reply = malloc(mtu))) {
1042 		    slp_err(LOG_CRIT, 0, "mc_revcmsg", "out of memory");
1043 		    return;
1044 	    }
1045 		if (recvfrom(pfd[i].fd, reply, mtu, 0,
1046 				(struct sockaddr *)&responder,
1047 				(int *)&addrlen) < 0) {
1048 
1049 		    /* if reply overflows, hand off to TCP */
1050 		    if (errno == ENOMEM) {
1051 			free(reply); reply = NULL;
1052 			tcp_handoff(hp, scopes,
1053 					&responder, slp_get_xid(header));
1054 			continue;
1055 		    }
1056 
1057 		    /* else something nasty happened */
1058 		    slp_err(LOG_CRIT, 0, "mc_recvmsg",
1059 					"recvfrom failed: %s",
1060 					strerror(errno));
1061 		    continue;
1062 		} else {
1063 		    /* success */
1064 		    if (slp_get_overflow(reply)) {
1065 			tcp_handoff(hp, scopes,
1066 					&responder, slp_get_xid(header));
1067 		    }
1068 			/*
1069 			 * Add to the PR list. If this responder has already
1070 			 * answered, it doesn't count.
1071 			 */
1072 		    if (add2pr_list(&(hp->msg), &responder, collator)) {
1073 			(void) slp_enqueue(hp->q, reply);
1074 			*noresults = 0;
1075 			*anyresults = 1;
1076 			reply = NULL;
1077 		    }
1078 
1079 		    /* if we've exceeded maxwait, break out */
1080 		    *now = now_millis();
1081 		    if (*now > final_to)
1082 			goto cleanup;
1083 
1084 		} /* end successful receive */
1085 
1086 	    } /* end fd iteration */
1087 
1088 	    /* reset poll's timeout */
1089 	    timeout = timeout - (int)(*now - sent);
1090 	    if (timeout <= 0) {
1091 		goto cleanup;
1092 	    }
1093 
1094 	} /* end main poll loop */
1095 
1096 cleanup:
1097 	if (reply) {
1098 	    free(reply);
1099 	}
1100 }
1101 
1102 /*
1103  * Closes any open sockets and frees the pollfd array.
1104  */
1105 static void free_pfds(struct pollfd *pfds, nfds_t nfds) {
1106 	nfds_t i;
1107 
1108 	for (i = 0; i < nfds; i++) {
1109 	    if (pfds[i].fd <= 0) {
1110 		continue;
1111 	    }
1112 
1113 	    (void) close(pfds[i].fd);
1114 	}
1115 
1116 	free(pfds);
1117 }
1118 
1119 /*
1120  * Hands off a message to the TCP thread, fabricating a new target
1121  * from 'sin'. 'xid' will be used to create the XID for the TCP message.
1122  */
1123 static void tcp_handoff(slp_handle_impl_t *hp, const char *scopes,
1124 			struct sockaddr_in *sin, unsigned short xid) {
1125 	slp_target_t *target;
1126 
1127 	target = slp_fabricate_target(sin);
1128 	slp_uc_tcp_send(hp, target, scopes, SLP_TRUE, xid);
1129 }
1130 
1131 /*
1132  * Returns the current time in milliseconds.
1133  */
1134 static unsigned long long now_millis() {
1135 	unsigned long long i;
1136 	struct timeval tv[1];
1137 
1138 	(void) gettimeofday(tv, NULL);
1139 	i = (unsigned long long) tv->tv_sec * 1000;
1140 	i += tv->tv_usec / 1000;
1141 	return (i);
1142 }
1143 
1144 /*
1145  * A wrapper around poll which waits until a reply comes in. This will
1146  * wait no longer than 'timeout' before returning. poll can return
1147  * even if no data is on the pipe or timeout has occured, so the
1148  * additional paramaters are used to break out of the wait loop if
1149  * we have exceeded the timeout value. 'final_to' is ignored if it is 0.
1150  *
1151  * returns:	< 0 on error
1152  *		0 on timeout
1153  *		> 0 on success (i.e. ready to read data).
1154  * side effect: 'now' is set to the time when poll found data on the pipe.
1155  */
1156 static int wait_for_response(
1157 	unsigned long long final_to,
1158 	int *timeout,
1159 	unsigned long long sent,
1160 	unsigned long long *now,
1161 	struct pollfd pfd[], nfds_t nfds) {
1162 
1163 	int when, pollerr;
1164 
1165 	/* wait until we can read something */
1166 	for (;;) {
1167 		pollerr = poll(pfd, nfds, *timeout);
1168 		*now = now_millis();
1169 
1170 		/* ready to read */
1171 		if (pollerr > 0)
1172 			return (pollerr);
1173 
1174 		/* time out */
1175 		if (pollerr == 0)
1176 			/* timeout */
1177 			return (0);
1178 
1179 		/* error */
1180 		if (pollerr < 0)
1181 			if (errno == EAGAIN || errno == EINTR) {
1182 				/* poll is weird. */
1183 				when = (int)(*now - sent);
1184 				if (
1185 					(final_to != 0 && *now > final_to) ||
1186 					when > *timeout)
1187 					break;
1188 				*timeout = *timeout - when;
1189 				continue;
1190 			} else {
1191 				slp_err(LOG_INFO, 0, "wait for response",
1192 					"poll error: %s",
1193 					strerror(errno));
1194 				return (pollerr);
1195 			}
1196 	}
1197 
1198 	return (0);
1199 }
1200 
1201 /*
1202  * Adds the cname of the host whose address is in 'sin' to this message's
1203  * previous responder list. The message is contained in 'msg'.
1204  * 'collator' contains the complete previous responder list, so that
1205  * even if the PR list in the message overflows and must be truncated,
1206  * the function can still correctly determine if we have heard from this
1207  * host before.
1208  *
1209  * returns:	1 if this is the first time we've heard from this host
1210  *		0 is this is a duplicate reply
1211  */
1212 static int add2pr_list(
1213 	slp_msg_t *msg,
1214 	struct sockaddr_in *sin,
1215 	void **collator) {
1216 
1217 	char **res, *cname, *p, *header;
1218 	size_t mtu;
1219 	size_t len, off, namelen;
1220 	unsigned short prlen;
1221 
1222 	/* Attempt to resolve the responder's IP address to its host name */
1223 	if (!(cname = slp_gethostbyaddr((char *)&(sin->sin_addr),
1224 					sizeof (sin->sin_addr))))
1225 		return (0);
1226 
1227 	res = slp_tsearch(
1228 		cname, collator,
1229 		(int (*)(const void *, const void *)) strcasecmp);
1230 	if (*res != cname) {
1231 		/* duplicate */
1232 		slp_err(LOG_INFO, 0, "add2pr_list",
1233 			"drop PR ignored by host: %s",
1234 			cname);
1235 		free(cname);
1236 		return (0);
1237 	}
1238 
1239 	/* new responder: add to the msg PR list if there is room */
1240 	mtu = slp_get_mtu();
1241 
1242 	header = msg->iov[0].iov_base;
1243 	len = slp_get_length(header);
1244 
1245 	namelen = strlen(cname);
1246 	if ((namelen + 2 + len) >= mtu)
1247 		return (1);	/* no room */
1248 
1249 	/* else  there is enough room */
1250 	prlen = (unsigned short)msg->prlist->iov_len;
1251 	p = msg->prlist->iov_base + prlen;
1252 	*p = 0;
1253 
1254 	if (prlen) {
1255 		namelen++;	/* add the ',' */
1256 		(void) strcat(p, ",");
1257 	}
1258 	(void) strcat(p, cname);
1259 
1260 	/* update msg and pr list length */
1261 	len += namelen;
1262 	slp_set_length(header, len);
1263 	prlen += (unsigned short)namelen;
1264 	off = 0;
1265 	(void) slp_add_sht(msg->prlistlen.iov_base, 2, prlen, &off);
1266 	msg->prlist->iov_len += namelen;
1267 
1268 	return (1);
1269 }
1270 
1271 /*
1272  * The iterator function used while traversing the previous responder
1273  * tree. Just frees resources.
1274  */
1275 /*ARGSUSED2*/
1276 static void free_pr_node(void *node, VISIT order, int level, void *cookie) {
1277 	if (order == endorder || order == leaf) {
1278 		char *pr = *(char **)node;
1279 		free(pr);
1280 		free(node);
1281 	}
1282 }
1283