xref: /freebsd/contrib/unbound/util/netevent.c (revision d9f0ce31900a48d1a2bfc1c8c86f79d1e831451a)
1 /*
2  * util/netevent.c - event notification
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  *
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  *
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /**
37  * \file
38  *
39  * This file contains event notification functions.
40  */
41 #include "config.h"
42 #include "util/netevent.h"
43 #include "util/log.h"
44 #include "util/net_help.h"
45 #include "util/fptr_wlist.h"
46 #include "sldns/pkthdr.h"
47 #include "sldns/sbuffer.h"
48 #include "dnstap/dnstap.h"
49 #ifdef HAVE_OPENSSL_SSL_H
50 #include <openssl/ssl.h>
51 #endif
52 #ifdef HAVE_OPENSSL_ERR_H
53 #include <openssl/err.h>
54 #endif
55 
56 /* -------- Start of local definitions -------- */
57 /** if CMSG_ALIGN is not defined on this platform, a workaround */
58 #ifndef CMSG_ALIGN
59 #  ifdef __CMSG_ALIGN
60 #    define CMSG_ALIGN(n) __CMSG_ALIGN(n)
61 #  elif defined(CMSG_DATA_ALIGN)
62 #    define CMSG_ALIGN _CMSG_DATA_ALIGN
63 #  else
64 #    define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
65 #  endif
66 #endif
67 
68 /** if CMSG_LEN is not defined on this platform, a workaround */
69 #ifndef CMSG_LEN
70 #  define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
71 #endif
72 
73 /** if CMSG_SPACE is not defined on this platform, a workaround */
74 #ifndef CMSG_SPACE
75 #  ifdef _CMSG_HDR_ALIGN
76 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
77 #  else
78 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
79 #  endif
80 #endif
81 
82 /** The TCP reading or writing query timeout in seconds */
83 #define TCP_QUERY_TIMEOUT 120
84 
85 #ifndef NONBLOCKING_IS_BROKEN
86 /** number of UDP reads to perform per read indication from select */
87 #define NUM_UDP_PER_SELECT 100
88 #else
89 #define NUM_UDP_PER_SELECT 1
90 #endif
91 
92 /* We define libevent structures here to hide the libevent stuff. */
93 
94 #ifdef USE_MINI_EVENT
95 #  ifdef USE_WINSOCK
96 #    include "util/winsock_event.h"
97 #  else
98 #    include "util/mini_event.h"
99 #  endif /* USE_WINSOCK */
100 #else /* USE_MINI_EVENT */
101    /* we use libevent */
102 #  ifdef HAVE_EVENT_H
103 #    include <event.h>
104 #  else
105 #    include "event2/event.h"
106 #    include "event2/event_struct.h"
107 #    include "event2/event_compat.h"
108 #  endif
109 #endif /* USE_MINI_EVENT */
110 
111 /**
112  * The internal event structure for keeping libevent info for the event.
113  * Possibly other structures (list, tree) this is part of.
114  */
115 struct internal_event {
116 	/** the comm base */
117 	struct comm_base* base;
118 	/** libevent event type, alloced here */
119 	struct event ev;
120 };
121 
122 /**
123  * Internal base structure, so that every thread has its own events.
124  */
125 struct internal_base {
126 	/** libevent event_base type. */
127 	struct event_base* base;
128 	/** seconds time pointer points here */
129 	time_t secs;
130 	/** timeval with current time */
131 	struct timeval now;
132 	/** the event used for slow_accept timeouts */
133 	struct event slow_accept;
134 	/** true if slow_accept is enabled */
135 	int slow_accept_enabled;
136 };
137 
138 /**
139  * Internal timer structure, to store timer event in.
140  */
141 struct internal_timer {
142 	/** the comm base */
143 	struct comm_base* base;
144 	/** libevent event type, alloced here */
145 	struct event ev;
146 	/** is timer enabled */
147 	uint8_t enabled;
148 };
149 
150 /**
151  * Internal signal structure, to store signal event in.
152  */
153 struct internal_signal {
154 	/** libevent event type, alloced here */
155 	struct event ev;
156 	/** next in signal list */
157 	struct internal_signal* next;
158 };
159 
160 /** create a tcp handler with a parent */
161 static struct comm_point* comm_point_create_tcp_handler(
162 	struct comm_base *base, struct comm_point* parent, size_t bufsize,
163         comm_point_callback_t* callback, void* callback_arg);
164 
165 /* -------- End of local definitions -------- */
166 
167 #ifdef USE_MINI_EVENT
168 /** minievent updates the time when it blocks. */
169 #define comm_base_now(x) /* nothing to do */
170 #else /* !USE_MINI_EVENT */
171 /** fillup the time values in the event base */
172 static void
173 comm_base_now(struct comm_base* b)
174 {
175 	if(gettimeofday(&b->eb->now, NULL) < 0) {
176 		log_err("gettimeofday: %s", strerror(errno));
177 	}
178 	b->eb->secs = (time_t)b->eb->now.tv_sec;
179 }
180 #endif /* USE_MINI_EVENT */
181 
182 struct comm_base*
183 comm_base_create(int sigs)
184 {
185 	struct comm_base* b = (struct comm_base*)calloc(1,
186 		sizeof(struct comm_base));
187 	if(!b)
188 		return NULL;
189 	b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
190 	if(!b->eb) {
191 		free(b);
192 		return NULL;
193 	}
194 #ifdef USE_MINI_EVENT
195 	(void)sigs;
196 	/* use mini event time-sharing feature */
197 	b->eb->base = event_init(&b->eb->secs, &b->eb->now);
198 #else
199 #  if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
200 	/* libev */
201 	if(sigs)
202 		b->eb->base=(struct event_base *)ev_default_loop(EVFLAG_AUTO);
203 	else
204 		b->eb->base=(struct event_base *)ev_loop_new(EVFLAG_AUTO);
205 #  else
206 	(void)sigs;
207 #    ifdef HAVE_EVENT_BASE_NEW
208 	b->eb->base = event_base_new();
209 #    else
210 	b->eb->base = event_init();
211 #    endif
212 #  endif
213 #endif
214 	if(!b->eb->base) {
215 		free(b->eb);
216 		free(b);
217 		return NULL;
218 	}
219 	comm_base_now(b);
220 	/* avoid event_get_method call which causes crashes even when
221 	 * not printing, because its result is passed */
222 	verbose(VERB_ALGO,
223 #if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
224 		"libev"
225 #elif defined(USE_MINI_EVENT)
226 		"event "
227 #else
228 		"libevent "
229 #endif
230 		"%s uses %s method.",
231 		event_get_version(),
232 #ifdef HAVE_EVENT_BASE_GET_METHOD
233 		event_base_get_method(b->eb->base)
234 #else
235 		"not_obtainable"
236 #endif
237 	);
238 	return b;
239 }
240 
241 struct comm_base*
242 comm_base_create_event(struct event_base* base)
243 {
244 	struct comm_base* b = (struct comm_base*)calloc(1,
245 		sizeof(struct comm_base));
246 	if(!b)
247 		return NULL;
248 	b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
249 	if(!b->eb) {
250 		free(b);
251 		return NULL;
252 	}
253 	b->eb->base = base;
254 	comm_base_now(b);
255 	return b;
256 }
257 
258 void
259 comm_base_delete(struct comm_base* b)
260 {
261 	if(!b)
262 		return;
263 	if(b->eb->slow_accept_enabled) {
264 		if(event_del(&b->eb->slow_accept) != 0) {
265 			log_err("could not event_del slow_accept");
266 		}
267 	}
268 #ifdef USE_MINI_EVENT
269 	event_base_free(b->eb->base);
270 #elif defined(HAVE_EVENT_BASE_FREE) && defined(HAVE_EVENT_BASE_ONCE)
271 	/* only libevent 1.2+ has it, but in 1.2 it is broken -
272 	   assertion fails on signal handling ev that is not deleted
273  	   in libevent 1.3c (event_base_once appears) this is fixed. */
274 	event_base_free(b->eb->base);
275 #endif /* HAVE_EVENT_BASE_FREE and HAVE_EVENT_BASE_ONCE */
276 	b->eb->base = NULL;
277 	free(b->eb);
278 	free(b);
279 }
280 
281 void
282 comm_base_delete_no_base(struct comm_base* b)
283 {
284 	if(!b)
285 		return;
286 	if(b->eb->slow_accept_enabled) {
287 		if(event_del(&b->eb->slow_accept) != 0) {
288 			log_err("could not event_del slow_accept");
289 		}
290 	}
291 	b->eb->base = NULL;
292 	free(b->eb);
293 	free(b);
294 }
295 
296 void
297 comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv)
298 {
299 	*tt = &b->eb->secs;
300 	*tv = &b->eb->now;
301 }
302 
303 void
304 comm_base_dispatch(struct comm_base* b)
305 {
306 	int retval;
307 	retval = event_base_dispatch(b->eb->base);
308 	if(retval != 0) {
309 		fatal_exit("event_dispatch returned error %d, "
310 			"errno is %s", retval, strerror(errno));
311 	}
312 }
313 
314 void comm_base_exit(struct comm_base* b)
315 {
316 	if(event_base_loopexit(b->eb->base, NULL) != 0) {
317 		log_err("Could not loopexit");
318 	}
319 }
320 
321 void comm_base_set_slow_accept_handlers(struct comm_base* b,
322 	void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
323 {
324 	b->stop_accept = stop_acc;
325 	b->start_accept = start_acc;
326 	b->cb_arg = arg;
327 }
328 
329 struct event_base* comm_base_internal(struct comm_base* b)
330 {
331 	return b->eb->base;
332 }
333 
334 /** see if errno for udp has to be logged or not uses globals */
335 static int
336 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
337 {
338 	/* do not log transient errors (unless high verbosity) */
339 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
340 	switch(errno) {
341 #  ifdef ENETUNREACH
342 		case ENETUNREACH:
343 #  endif
344 #  ifdef EHOSTDOWN
345 		case EHOSTDOWN:
346 #  endif
347 #  ifdef EHOSTUNREACH
348 		case EHOSTUNREACH:
349 #  endif
350 #  ifdef ENETDOWN
351 		case ENETDOWN:
352 #  endif
353 			if(verbosity < VERB_ALGO)
354 				return 0;
355 		default:
356 			break;
357 	}
358 #endif
359 	/* permission denied is gotten for every send if the
360 	 * network is disconnected (on some OS), squelch it */
361 	if( ((errno == EPERM)
362 #  ifdef EADDRNOTAVAIL
363 		/* 'Cannot assign requested address' also when disconnected */
364 		|| (errno == EADDRNOTAVAIL)
365 #  endif
366 		) && verbosity < VERB_DETAIL)
367 		return 0;
368 	/* squelch errors where people deploy AAAA ::ffff:bla for
369 	 * authority servers, which we try for intranets. */
370 	if(errno == EINVAL && addr_is_ip4mapped(
371 		(struct sockaddr_storage*)addr, addrlen) &&
372 		verbosity < VERB_DETAIL)
373 		return 0;
374 	/* SO_BROADCAST sockopt can give access to 255.255.255.255,
375 	 * but a dns cache does not need it. */
376 	if(errno == EACCES && addr_is_broadcast(
377 		(struct sockaddr_storage*)addr, addrlen) &&
378 		verbosity < VERB_DETAIL)
379 		return 0;
380 	return 1;
381 }
382 
383 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
384 {
385 	return udp_send_errno_needs_log(addr, addrlen);
386 }
387 
388 /* send a UDP reply */
389 int
390 comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet,
391 	struct sockaddr* addr, socklen_t addrlen)
392 {
393 	ssize_t sent;
394 	log_assert(c->fd != -1);
395 #ifdef UNBOUND_DEBUG
396 	if(sldns_buffer_remaining(packet) == 0)
397 		log_err("error: send empty UDP packet");
398 #endif
399 	log_assert(addr && addrlen > 0);
400 	sent = sendto(c->fd, (void*)sldns_buffer_begin(packet),
401 		sldns_buffer_remaining(packet), 0,
402 		addr, addrlen);
403 	if(sent == -1) {
404 		/* try again and block, waiting for IO to complete,
405 		 * we want to send the answer, and we will wait for
406 		 * the ethernet interface buffer to have space. */
407 #ifndef USE_WINSOCK
408 		if(errno == EAGAIN ||
409 #  ifdef EWOULDBLOCK
410 			errno == EWOULDBLOCK ||
411 #  endif
412 			errno == ENOBUFS) {
413 #else
414 		if(WSAGetLastError() == WSAEINPROGRESS ||
415 			WSAGetLastError() == WSAENOBUFS ||
416 			WSAGetLastError() == WSAEWOULDBLOCK) {
417 #endif
418 			int e;
419 			fd_set_block(c->fd);
420 			sent = sendto(c->fd, (void*)sldns_buffer_begin(packet),
421 				sldns_buffer_remaining(packet), 0,
422 				addr, addrlen);
423 			e = errno;
424 			fd_set_nonblock(c->fd);
425 			errno = e;
426 		}
427 	}
428 	if(sent == -1) {
429 		if(!udp_send_errno_needs_log(addr, addrlen))
430 			return 0;
431 #ifndef USE_WINSOCK
432 		verbose(VERB_OPS, "sendto failed: %s", strerror(errno));
433 #else
434 		verbose(VERB_OPS, "sendto failed: %s",
435 			wsa_strerror(WSAGetLastError()));
436 #endif
437 		log_addr(VERB_OPS, "remote address is",
438 			(struct sockaddr_storage*)addr, addrlen);
439 		return 0;
440 	} else if((size_t)sent != sldns_buffer_remaining(packet)) {
441 		log_err("sent %d in place of %d bytes",
442 			(int)sent, (int)sldns_buffer_remaining(packet));
443 		return 0;
444 	}
445 	return 1;
446 }
447 
448 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
449 /** print debug ancillary info */
450 static void p_ancil(const char* str, struct comm_reply* r)
451 {
452 	if(r->srctype != 4 && r->srctype != 6) {
453 		log_info("%s: unknown srctype %d", str, r->srctype);
454 		return;
455 	}
456 	if(r->srctype == 6) {
457 		char buf[1024];
458 		if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr,
459 			buf, (socklen_t)sizeof(buf)) == 0) {
460 			(void)strlcpy(buf, "(inet_ntop error)", sizeof(buf));
461 		}
462 		buf[sizeof(buf)-1]=0;
463 		log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
464 	} else if(r->srctype == 4) {
465 #ifdef IP_PKTINFO
466 		char buf1[1024], buf2[1024];
467 		if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr,
468 			buf1, (socklen_t)sizeof(buf1)) == 0) {
469 			(void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
470 		}
471 		buf1[sizeof(buf1)-1]=0;
472 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
473 		if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst,
474 			buf2, (socklen_t)sizeof(buf2)) == 0) {
475 			(void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2));
476 		}
477 		buf2[sizeof(buf2)-1]=0;
478 #else
479 		buf2[0]=0;
480 #endif
481 		log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
482 			buf1, buf2);
483 #elif defined(IP_RECVDSTADDR)
484 		char buf1[1024];
485 		if(inet_ntop(AF_INET, &r->pktinfo.v4addr,
486 			buf1, (socklen_t)sizeof(buf1)) == 0) {
487 			(void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
488 		}
489 		buf1[sizeof(buf1)-1]=0;
490 		log_info("%s: %s", str, buf1);
491 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */
492 	}
493 }
494 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
495 
496 /** send a UDP reply over specified interface*/
497 static int
498 comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet,
499 	struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r)
500 {
501 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
502 	ssize_t sent;
503 	struct msghdr msg;
504 	struct iovec iov[1];
505 	char control[256];
506 #ifndef S_SPLINT_S
507 	struct cmsghdr *cmsg;
508 #endif /* S_SPLINT_S */
509 
510 	log_assert(c->fd != -1);
511 #ifdef UNBOUND_DEBUG
512 	if(sldns_buffer_remaining(packet) == 0)
513 		log_err("error: send empty UDP packet");
514 #endif
515 	log_assert(addr && addrlen > 0);
516 
517 	msg.msg_name = addr;
518 	msg.msg_namelen = addrlen;
519 	iov[0].iov_base = sldns_buffer_begin(packet);
520 	iov[0].iov_len = sldns_buffer_remaining(packet);
521 	msg.msg_iov = iov;
522 	msg.msg_iovlen = 1;
523 	msg.msg_control = control;
524 #ifndef S_SPLINT_S
525 	msg.msg_controllen = sizeof(control);
526 #endif /* S_SPLINT_S */
527 	msg.msg_flags = 0;
528 
529 #ifndef S_SPLINT_S
530 	cmsg = CMSG_FIRSTHDR(&msg);
531 	if(r->srctype == 4) {
532 #ifdef IP_PKTINFO
533 		void* cmsg_data;
534 		msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
535 		log_assert(msg.msg_controllen <= sizeof(control));
536 		cmsg->cmsg_level = IPPROTO_IP;
537 		cmsg->cmsg_type = IP_PKTINFO;
538 		memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
539 			sizeof(struct in_pktinfo));
540 		/* unset the ifindex to not bypass the routing tables */
541 		cmsg_data = CMSG_DATA(cmsg);
542 		((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0;
543 		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
544 #elif defined(IP_SENDSRCADDR)
545 		msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
546 		log_assert(msg.msg_controllen <= sizeof(control));
547 		cmsg->cmsg_level = IPPROTO_IP;
548 		cmsg->cmsg_type = IP_SENDSRCADDR;
549 		memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
550 			sizeof(struct in_addr));
551 		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
552 #else
553 		verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
554 		msg.msg_control = NULL;
555 #endif /* IP_PKTINFO or IP_SENDSRCADDR */
556 	} else if(r->srctype == 6) {
557 		void* cmsg_data;
558 		msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
559 		log_assert(msg.msg_controllen <= sizeof(control));
560 		cmsg->cmsg_level = IPPROTO_IPV6;
561 		cmsg->cmsg_type = IPV6_PKTINFO;
562 		memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
563 			sizeof(struct in6_pktinfo));
564 		/* unset the ifindex to not bypass the routing tables */
565 		cmsg_data = CMSG_DATA(cmsg);
566 		((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0;
567 		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
568 	} else {
569 		/* try to pass all 0 to use default route */
570 		msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
571 		log_assert(msg.msg_controllen <= sizeof(control));
572 		cmsg->cmsg_level = IPPROTO_IPV6;
573 		cmsg->cmsg_type = IPV6_PKTINFO;
574 		memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
575 		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
576 	}
577 #endif /* S_SPLINT_S */
578 	if(verbosity >= VERB_ALGO)
579 		p_ancil("send_udp over interface", r);
580 	sent = sendmsg(c->fd, &msg, 0);
581 	if(sent == -1) {
582 		/* try again and block, waiting for IO to complete,
583 		 * we want to send the answer, and we will wait for
584 		 * the ethernet interface buffer to have space. */
585 #ifndef USE_WINSOCK
586 		if(errno == EAGAIN ||
587 #  ifdef EWOULDBLOCK
588 			errno == EWOULDBLOCK ||
589 #  endif
590 			errno == ENOBUFS) {
591 #else
592 		if(WSAGetLastError() == WSAEINPROGRESS ||
593 			WSAGetLastError() == WSAENOBUFS ||
594 			WSAGetLastError() == WSAEWOULDBLOCK) {
595 #endif
596 			int e;
597 			fd_set_block(c->fd);
598 			sent = sendmsg(c->fd, &msg, 0);
599 			e = errno;
600 			fd_set_nonblock(c->fd);
601 			errno = e;
602 		}
603 	}
604 	if(sent == -1) {
605 		if(!udp_send_errno_needs_log(addr, addrlen))
606 			return 0;
607 		verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
608 		log_addr(VERB_OPS, "remote address is",
609 			(struct sockaddr_storage*)addr, addrlen);
610 #ifdef __NetBSD__
611 		/* netbsd 7 has IP_PKTINFO for recv but not send */
612 		if(errno == EINVAL && r->srctype == 4)
613 			log_err("sendmsg: No support for sendmsg(IP_PKTINFO). "
614 				"Please disable interface-automatic");
615 #endif
616 		return 0;
617 	} else if((size_t)sent != sldns_buffer_remaining(packet)) {
618 		log_err("sent %d in place of %d bytes",
619 			(int)sent, (int)sldns_buffer_remaining(packet));
620 		return 0;
621 	}
622 	return 1;
623 #else
624 	(void)c;
625 	(void)packet;
626 	(void)addr;
627 	(void)addrlen;
628 	(void)r;
629 	log_err("sendmsg: IPV6_PKTINFO not supported");
630 	return 0;
631 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
632 }
633 
634 void
635 comm_point_udp_ancil_callback(int fd, short event, void* arg)
636 {
637 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
638 	struct comm_reply rep;
639 	struct msghdr msg;
640 	struct iovec iov[1];
641 	ssize_t rcv;
642 	char ancil[256];
643 	int i;
644 #ifndef S_SPLINT_S
645 	struct cmsghdr* cmsg;
646 #endif /* S_SPLINT_S */
647 
648 	rep.c = (struct comm_point*)arg;
649 	log_assert(rep.c->type == comm_udp);
650 
651 	if(!(event&EV_READ))
652 		return;
653 	log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
654 	comm_base_now(rep.c->ev->base);
655 	for(i=0; i<NUM_UDP_PER_SELECT; i++) {
656 		sldns_buffer_clear(rep.c->buffer);
657 		rep.addrlen = (socklen_t)sizeof(rep.addr);
658 		log_assert(fd != -1);
659 		log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
660 		msg.msg_name = &rep.addr;
661 		msg.msg_namelen = (socklen_t)sizeof(rep.addr);
662 		iov[0].iov_base = sldns_buffer_begin(rep.c->buffer);
663 		iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer);
664 		msg.msg_iov = iov;
665 		msg.msg_iovlen = 1;
666 		msg.msg_control = ancil;
667 #ifndef S_SPLINT_S
668 		msg.msg_controllen = sizeof(ancil);
669 #endif /* S_SPLINT_S */
670 		msg.msg_flags = 0;
671 		rcv = recvmsg(fd, &msg, 0);
672 		if(rcv == -1) {
673 			if(errno != EAGAIN && errno != EINTR) {
674 				log_err("recvmsg failed: %s", strerror(errno));
675 			}
676 			return;
677 		}
678 		rep.addrlen = msg.msg_namelen;
679 		sldns_buffer_skip(rep.c->buffer, rcv);
680 		sldns_buffer_flip(rep.c->buffer);
681 		rep.srctype = 0;
682 #ifndef S_SPLINT_S
683 		for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
684 			cmsg = CMSG_NXTHDR(&msg, cmsg)) {
685 			if( cmsg->cmsg_level == IPPROTO_IPV6 &&
686 				cmsg->cmsg_type == IPV6_PKTINFO) {
687 				rep.srctype = 6;
688 				memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
689 					sizeof(struct in6_pktinfo));
690 				break;
691 #ifdef IP_PKTINFO
692 			} else if( cmsg->cmsg_level == IPPROTO_IP &&
693 				cmsg->cmsg_type == IP_PKTINFO) {
694 				rep.srctype = 4;
695 				memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
696 					sizeof(struct in_pktinfo));
697 				break;
698 #elif defined(IP_RECVDSTADDR)
699 			} else if( cmsg->cmsg_level == IPPROTO_IP &&
700 				cmsg->cmsg_type == IP_RECVDSTADDR) {
701 				rep.srctype = 4;
702 				memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
703 					sizeof(struct in_addr));
704 				break;
705 #endif /* IP_PKTINFO or IP_RECVDSTADDR */
706 			}
707 		}
708 		if(verbosity >= VERB_ALGO)
709 			p_ancil("receive_udp on interface", &rep);
710 #endif /* S_SPLINT_S */
711 		fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
712 		if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
713 			/* send back immediate reply */
714 			(void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer,
715 				(struct sockaddr*)&rep.addr, rep.addrlen, &rep);
716 		}
717 		if(rep.c->fd == -1) /* commpoint closed */
718 			break;
719 	}
720 #else
721 	(void)fd;
722 	(void)event;
723 	(void)arg;
724 	fatal_exit("recvmsg: No support for IPV6_PKTINFO. "
725 		"Please disable interface-automatic");
726 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
727 }
728 
729 void
730 comm_point_udp_callback(int fd, short event, void* arg)
731 {
732 	struct comm_reply rep;
733 	ssize_t rcv;
734 	int i;
735 
736 	rep.c = (struct comm_point*)arg;
737 	log_assert(rep.c->type == comm_udp);
738 
739 	if(!(event&EV_READ))
740 		return;
741 	log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
742 	comm_base_now(rep.c->ev->base);
743 	for(i=0; i<NUM_UDP_PER_SELECT; i++) {
744 		sldns_buffer_clear(rep.c->buffer);
745 		rep.addrlen = (socklen_t)sizeof(rep.addr);
746 		log_assert(fd != -1);
747 		log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
748 		rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer),
749 			sldns_buffer_remaining(rep.c->buffer), 0,
750 			(struct sockaddr*)&rep.addr, &rep.addrlen);
751 		if(rcv == -1) {
752 #ifndef USE_WINSOCK
753 			if(errno != EAGAIN && errno != EINTR)
754 				log_err("recvfrom %d failed: %s",
755 					fd, strerror(errno));
756 #else
757 			if(WSAGetLastError() != WSAEINPROGRESS &&
758 				WSAGetLastError() != WSAECONNRESET &&
759 				WSAGetLastError()!= WSAEWOULDBLOCK)
760 				log_err("recvfrom failed: %s",
761 					wsa_strerror(WSAGetLastError()));
762 #endif
763 			return;
764 		}
765 		sldns_buffer_skip(rep.c->buffer, rcv);
766 		sldns_buffer_flip(rep.c->buffer);
767 		rep.srctype = 0;
768 		fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
769 		if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
770 			/* send back immediate reply */
771 			(void)comm_point_send_udp_msg(rep.c, rep.c->buffer,
772 				(struct sockaddr*)&rep.addr, rep.addrlen);
773 		}
774 		if(rep.c->fd != fd) /* commpoint closed to -1 or reused for
775 		another UDP port. Note rep.c cannot be reused with TCP fd. */
776 			break;
777 	}
778 }
779 
780 /** Use a new tcp handler for new query fd, set to read query */
781 static void
782 setup_tcp_handler(struct comm_point* c, int fd)
783 {
784 	log_assert(c->type == comm_tcp);
785 	log_assert(c->fd == -1);
786 	sldns_buffer_clear(c->buffer);
787 	c->tcp_is_reading = 1;
788 	c->tcp_byte_count = 0;
789 	comm_point_start_listening(c, fd, TCP_QUERY_TIMEOUT);
790 }
791 
792 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
793 	short ATTR_UNUSED(event), void* arg)
794 {
795 	struct comm_base* b = (struct comm_base*)arg;
796 	/* timeout for the slow accept, re-enable accepts again */
797 	if(b->start_accept) {
798 		verbose(VERB_ALGO, "wait is over, slow accept disabled");
799 		fptr_ok(fptr_whitelist_start_accept(b->start_accept));
800 		(*b->start_accept)(b->cb_arg);
801 		b->eb->slow_accept_enabled = 0;
802 	}
803 }
804 
805 int comm_point_perform_accept(struct comm_point* c,
806 	struct sockaddr_storage* addr, socklen_t* addrlen)
807 {
808 	int new_fd;
809 	*addrlen = (socklen_t)sizeof(*addr);
810 	new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
811 	if(new_fd == -1) {
812 #ifndef USE_WINSOCK
813 		/* EINTR is signal interrupt. others are closed connection. */
814 		if(	errno == EINTR || errno == EAGAIN
815 #ifdef EWOULDBLOCK
816 			|| errno == EWOULDBLOCK
817 #endif
818 #ifdef ECONNABORTED
819 			|| errno == ECONNABORTED
820 #endif
821 #ifdef EPROTO
822 			|| errno == EPROTO
823 #endif /* EPROTO */
824 			)
825 			return -1;
826 #if defined(ENFILE) && defined(EMFILE)
827 		if(errno == ENFILE || errno == EMFILE) {
828 			/* out of file descriptors, likely outside of our
829 			 * control. stop accept() calls for some time */
830 			if(c->ev->base->stop_accept) {
831 				struct comm_base* b = c->ev->base;
832 				struct timeval tv;
833 				verbose(VERB_ALGO, "out of file descriptors: "
834 					"slow accept");
835 				b->eb->slow_accept_enabled = 1;
836 				fptr_ok(fptr_whitelist_stop_accept(
837 					b->stop_accept));
838 				(*b->stop_accept)(b->cb_arg);
839 				/* set timeout, no mallocs */
840 				tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
841 				tv.tv_usec = NETEVENT_SLOW_ACCEPT_TIME%1000;
842 				event_set(&b->eb->slow_accept, -1, EV_TIMEOUT,
843 					comm_base_handle_slow_accept, b);
844 				if(event_base_set(b->eb->base,
845 					&b->eb->slow_accept) != 0) {
846 					/* we do not want to log here, because
847 					 * that would spam the logfiles.
848 					 * error: "event_base_set failed." */
849 				}
850 				if(event_add(&b->eb->slow_accept, &tv) != 0) {
851 					/* we do not want to log here,
852 					 * error: "event_add failed." */
853 				}
854 			}
855 			return -1;
856 		}
857 #endif
858 		log_err_addr("accept failed", strerror(errno), addr, *addrlen);
859 #else /* USE_WINSOCK */
860 		if(WSAGetLastError() == WSAEINPROGRESS ||
861 			WSAGetLastError() == WSAECONNRESET)
862 			return -1;
863 		if(WSAGetLastError() == WSAEWOULDBLOCK) {
864 			winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
865 			return -1;
866 		}
867 		log_err_addr("accept failed", wsa_strerror(WSAGetLastError()),
868 			addr, *addrlen);
869 #endif
870 		return -1;
871 	}
872 	fd_set_nonblock(new_fd);
873 	return new_fd;
874 }
875 
876 #ifdef USE_WINSOCK
877 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
878         int ATTR_UNUSED(argi), long argl, long retvalue)
879 {
880 	verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
881 		(oper&BIO_CB_RETURN)?"return":"before",
882 		(oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
883 		WSAGetLastError()==WSAEWOULDBLOCK?"wsawb":"");
884 	/* on windows, check if previous operation caused EWOULDBLOCK */
885 	if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
886 		(oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
887 		if(WSAGetLastError() == WSAEWOULDBLOCK)
888 			winsock_tcp_wouldblock((struct event*)
889 				BIO_get_callback_arg(b), EV_READ);
890 	}
891 	if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
892 		(oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
893 		if(WSAGetLastError() == WSAEWOULDBLOCK)
894 			winsock_tcp_wouldblock((struct event*)
895 				BIO_get_callback_arg(b), EV_WRITE);
896 	}
897 	/* return original return value */
898 	return retvalue;
899 }
900 
901 /** set win bio callbacks for nonblocking operations */
902 void
903 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
904 {
905 	SSL* ssl = (SSL*)thessl;
906 	/* set them both just in case, but usually they are the same BIO */
907 	BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
908 	BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)&c->ev->ev);
909 	BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
910 	BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)&c->ev->ev);
911 }
912 #endif
913 
914 void
915 comm_point_tcp_accept_callback(int fd, short event, void* arg)
916 {
917 	struct comm_point* c = (struct comm_point*)arg, *c_hdl;
918 	int new_fd;
919 	log_assert(c->type == comm_tcp_accept);
920 	if(!(event & EV_READ)) {
921 		log_info("ignoring tcp accept event %d", (int)event);
922 		return;
923 	}
924 	comm_base_now(c->ev->base);
925 	/* find free tcp handler. */
926 	if(!c->tcp_free) {
927 		log_warn("accepted too many tcp, connections full");
928 		return;
929 	}
930 	/* accept incoming connection. */
931 	c_hdl = c->tcp_free;
932 	log_assert(fd != -1);
933 	new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.addr,
934 		&c_hdl->repinfo.addrlen);
935 	if(new_fd == -1)
936 		return;
937 	if(c->ssl) {
938 		c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
939 		if(!c_hdl->ssl) {
940 			c_hdl->fd = new_fd;
941 			comm_point_close(c_hdl);
942 			return;
943 		}
944 		c_hdl->ssl_shake_state = comm_ssl_shake_read;
945 #ifdef USE_WINSOCK
946 		comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
947 #endif
948 	}
949 
950 	/* grab the tcp handler buffers */
951 	c->cur_tcp_count++;
952 	c->tcp_free = c_hdl->tcp_free;
953 	if(!c->tcp_free) {
954 		/* stop accepting incoming queries for now. */
955 		comm_point_stop_listening(c);
956 	}
957 	setup_tcp_handler(c_hdl, new_fd);
958 }
959 
960 /** Make tcp handler free for next assignment */
961 static void
962 reclaim_tcp_handler(struct comm_point* c)
963 {
964 	log_assert(c->type == comm_tcp);
965 	if(c->ssl) {
966 #ifdef HAVE_SSL
967 		SSL_shutdown(c->ssl);
968 		SSL_free(c->ssl);
969 		c->ssl = NULL;
970 #endif
971 	}
972 	comm_point_close(c);
973 	if(c->tcp_parent) {
974 		c->tcp_parent->cur_tcp_count--;
975 		c->tcp_free = c->tcp_parent->tcp_free;
976 		c->tcp_parent->tcp_free = c;
977 		if(!c->tcp_free) {
978 			/* re-enable listening on accept socket */
979 			comm_point_start_listening(c->tcp_parent, -1, -1);
980 		}
981 	}
982 }
983 
984 /** do the callback when writing is done */
985 static void
986 tcp_callback_writer(struct comm_point* c)
987 {
988 	log_assert(c->type == comm_tcp);
989 	sldns_buffer_clear(c->buffer);
990 	if(c->tcp_do_toggle_rw)
991 		c->tcp_is_reading = 1;
992 	c->tcp_byte_count = 0;
993 	/* switch from listening(write) to listening(read) */
994 	comm_point_stop_listening(c);
995 	comm_point_start_listening(c, -1, -1);
996 }
997 
998 /** do the callback when reading is done */
999 static void
1000 tcp_callback_reader(struct comm_point* c)
1001 {
1002 	log_assert(c->type == comm_tcp || c->type == comm_local);
1003 	sldns_buffer_flip(c->buffer);
1004 	if(c->tcp_do_toggle_rw)
1005 		c->tcp_is_reading = 0;
1006 	c->tcp_byte_count = 0;
1007 	if(c->type == comm_tcp)
1008 		comm_point_stop_listening(c);
1009 	fptr_ok(fptr_whitelist_comm_point(c->callback));
1010 	if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
1011 		comm_point_start_listening(c, -1, TCP_QUERY_TIMEOUT);
1012 	}
1013 }
1014 
1015 /** continue ssl handshake */
1016 #ifdef HAVE_SSL
1017 static int
1018 ssl_handshake(struct comm_point* c)
1019 {
1020 	int r;
1021 	if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
1022 		/* read condition satisfied back to writing */
1023 		comm_point_listen_for_rw(c, 1, 1);
1024 		c->ssl_shake_state = comm_ssl_shake_none;
1025 		return 1;
1026 	}
1027 	if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
1028 		/* write condition satisfied, back to reading */
1029 		comm_point_listen_for_rw(c, 1, 0);
1030 		c->ssl_shake_state = comm_ssl_shake_none;
1031 		return 1;
1032 	}
1033 
1034 	ERR_clear_error();
1035 	r = SSL_do_handshake(c->ssl);
1036 	if(r != 1) {
1037 		int want = SSL_get_error(c->ssl, r);
1038 		if(want == SSL_ERROR_WANT_READ) {
1039 			if(c->ssl_shake_state == comm_ssl_shake_read)
1040 				return 1;
1041 			c->ssl_shake_state = comm_ssl_shake_read;
1042 			comm_point_listen_for_rw(c, 1, 0);
1043 			return 1;
1044 		} else if(want == SSL_ERROR_WANT_WRITE) {
1045 			if(c->ssl_shake_state == comm_ssl_shake_write)
1046 				return 1;
1047 			c->ssl_shake_state = comm_ssl_shake_write;
1048 			comm_point_listen_for_rw(c, 0, 1);
1049 			return 1;
1050 		} else if(r == 0) {
1051 			return 0; /* closed */
1052 		} else if(want == SSL_ERROR_SYSCALL) {
1053 			/* SYSCALL and errno==0 means closed uncleanly */
1054 			if(errno != 0)
1055 				log_err("SSL_handshake syscall: %s",
1056 					strerror(errno));
1057 			return 0;
1058 		} else {
1059 			log_crypto_err("ssl handshake failed");
1060 			log_addr(1, "ssl handshake failed", &c->repinfo.addr,
1061 				c->repinfo.addrlen);
1062 			return 0;
1063 		}
1064 	}
1065 	/* this is where peer verification could take place */
1066 	log_addr(VERB_ALGO, "SSL DNS connection", &c->repinfo.addr,
1067 		c->repinfo.addrlen);
1068 
1069 	/* setup listen rw correctly */
1070 	if(c->tcp_is_reading) {
1071 		if(c->ssl_shake_state != comm_ssl_shake_read)
1072 			comm_point_listen_for_rw(c, 1, 0);
1073 	} else {
1074 		comm_point_listen_for_rw(c, 1, 1);
1075 	}
1076 	c->ssl_shake_state = comm_ssl_shake_none;
1077 	return 1;
1078 }
1079 #endif /* HAVE_SSL */
1080 
1081 /** ssl read callback on TCP */
1082 static int
1083 ssl_handle_read(struct comm_point* c)
1084 {
1085 #ifdef HAVE_SSL
1086 	int r;
1087 	if(c->ssl_shake_state != comm_ssl_shake_none) {
1088 		if(!ssl_handshake(c))
1089 			return 0;
1090 		if(c->ssl_shake_state != comm_ssl_shake_none)
1091 			return 1;
1092 	}
1093 	if(c->tcp_byte_count < sizeof(uint16_t)) {
1094 		/* read length bytes */
1095 		ERR_clear_error();
1096 		if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer,
1097 			c->tcp_byte_count), (int)(sizeof(uint16_t) -
1098 			c->tcp_byte_count))) <= 0) {
1099 			int want = SSL_get_error(c->ssl, r);
1100 			if(want == SSL_ERROR_ZERO_RETURN) {
1101 				return 0; /* shutdown, closed */
1102 			} else if(want == SSL_ERROR_WANT_READ) {
1103 				return 1; /* read more later */
1104 			} else if(want == SSL_ERROR_WANT_WRITE) {
1105 				c->ssl_shake_state = comm_ssl_shake_hs_write;
1106 				comm_point_listen_for_rw(c, 0, 1);
1107 				return 1;
1108 			} else if(want == SSL_ERROR_SYSCALL) {
1109 				if(errno != 0)
1110 					log_err("SSL_read syscall: %s",
1111 						strerror(errno));
1112 				return 0;
1113 			}
1114 			log_crypto_err("could not SSL_read");
1115 			return 0;
1116 		}
1117 		c->tcp_byte_count += r;
1118 		if(c->tcp_byte_count != sizeof(uint16_t))
1119 			return 1;
1120 		if(sldns_buffer_read_u16_at(c->buffer, 0) >
1121 			sldns_buffer_capacity(c->buffer)) {
1122 			verbose(VERB_QUERY, "ssl: dropped larger than buffer");
1123 			return 0;
1124 		}
1125 		sldns_buffer_set_limit(c->buffer,
1126 			sldns_buffer_read_u16_at(c->buffer, 0));
1127 		if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1128 			verbose(VERB_QUERY, "ssl: dropped bogus too short.");
1129 			return 0;
1130 		}
1131 		verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
1132 			(int)sldns_buffer_limit(c->buffer));
1133 	}
1134 	log_assert(sldns_buffer_remaining(c->buffer) > 0);
1135 	ERR_clear_error();
1136 	r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
1137 		(int)sldns_buffer_remaining(c->buffer));
1138 	if(r <= 0) {
1139 		int want = SSL_get_error(c->ssl, r);
1140 		if(want == SSL_ERROR_ZERO_RETURN) {
1141 			return 0; /* shutdown, closed */
1142 		} else if(want == SSL_ERROR_WANT_READ) {
1143 			return 1; /* read more later */
1144 		} else if(want == SSL_ERROR_WANT_WRITE) {
1145 			c->ssl_shake_state = comm_ssl_shake_hs_write;
1146 			comm_point_listen_for_rw(c, 0, 1);
1147 			return 1;
1148 		} else if(want == SSL_ERROR_SYSCALL) {
1149 			if(errno != 0)
1150 				log_err("SSL_read syscall: %s",
1151 					strerror(errno));
1152 			return 0;
1153 		}
1154 		log_crypto_err("could not SSL_read");
1155 		return 0;
1156 	}
1157 	sldns_buffer_skip(c->buffer, (ssize_t)r);
1158 	if(sldns_buffer_remaining(c->buffer) <= 0) {
1159 		tcp_callback_reader(c);
1160 	}
1161 	return 1;
1162 #else
1163 	(void)c;
1164 	return 0;
1165 #endif /* HAVE_SSL */
1166 }
1167 
1168 /** ssl write callback on TCP */
1169 static int
1170 ssl_handle_write(struct comm_point* c)
1171 {
1172 #ifdef HAVE_SSL
1173 	int r;
1174 	if(c->ssl_shake_state != comm_ssl_shake_none) {
1175 		if(!ssl_handshake(c))
1176 			return 0;
1177 		if(c->ssl_shake_state != comm_ssl_shake_none)
1178 			return 1;
1179 	}
1180 	/* ignore return, if fails we may simply block */
1181 	(void)SSL_set_mode(c->ssl, SSL_MODE_ENABLE_PARTIAL_WRITE);
1182 	if(c->tcp_byte_count < sizeof(uint16_t)) {
1183 		uint16_t len = htons(sldns_buffer_limit(c->buffer));
1184 		ERR_clear_error();
1185 		r = SSL_write(c->ssl,
1186 			(void*)(((uint8_t*)&len)+c->tcp_byte_count),
1187 			(int)(sizeof(uint16_t)-c->tcp_byte_count));
1188 		if(r <= 0) {
1189 			int want = SSL_get_error(c->ssl, r);
1190 			if(want == SSL_ERROR_ZERO_RETURN) {
1191 				return 0; /* closed */
1192 			} else if(want == SSL_ERROR_WANT_READ) {
1193 				c->ssl_shake_state = comm_ssl_shake_read;
1194 				comm_point_listen_for_rw(c, 1, 0);
1195 				return 1; /* wait for read condition */
1196 			} else if(want == SSL_ERROR_WANT_WRITE) {
1197 				return 1; /* write more later */
1198 			} else if(want == SSL_ERROR_SYSCALL) {
1199 				if(errno != 0)
1200 					log_err("SSL_write syscall: %s",
1201 						strerror(errno));
1202 				return 0;
1203 			}
1204 			log_crypto_err("could not SSL_write");
1205 			return 0;
1206 		}
1207 		c->tcp_byte_count += r;
1208 		if(c->tcp_byte_count < sizeof(uint16_t))
1209 			return 1;
1210 		sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1211 			sizeof(uint16_t));
1212 		if(sldns_buffer_remaining(c->buffer) == 0) {
1213 			tcp_callback_writer(c);
1214 			return 1;
1215 		}
1216 	}
1217 	log_assert(sldns_buffer_remaining(c->buffer) > 0);
1218 	ERR_clear_error();
1219 	r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
1220 		(int)sldns_buffer_remaining(c->buffer));
1221 	if(r <= 0) {
1222 		int want = SSL_get_error(c->ssl, r);
1223 		if(want == SSL_ERROR_ZERO_RETURN) {
1224 			return 0; /* closed */
1225 		} else if(want == SSL_ERROR_WANT_READ) {
1226 			c->ssl_shake_state = comm_ssl_shake_read;
1227 			comm_point_listen_for_rw(c, 1, 0);
1228 			return 1; /* wait for read condition */
1229 		} else if(want == SSL_ERROR_WANT_WRITE) {
1230 			return 1; /* write more later */
1231 		} else if(want == SSL_ERROR_SYSCALL) {
1232 			if(errno != 0)
1233 				log_err("SSL_write syscall: %s",
1234 					strerror(errno));
1235 			return 0;
1236 		}
1237 		log_crypto_err("could not SSL_write");
1238 		return 0;
1239 	}
1240 	sldns_buffer_skip(c->buffer, (ssize_t)r);
1241 
1242 	if(sldns_buffer_remaining(c->buffer) == 0) {
1243 		tcp_callback_writer(c);
1244 	}
1245 	return 1;
1246 #else
1247 	(void)c;
1248 	return 0;
1249 #endif /* HAVE_SSL */
1250 }
1251 
1252 /** handle ssl tcp connection with dns contents */
1253 static int
1254 ssl_handle_it(struct comm_point* c)
1255 {
1256 	if(c->tcp_is_reading)
1257 		return ssl_handle_read(c);
1258 	return ssl_handle_write(c);
1259 }
1260 
1261 /** Handle tcp reading callback.
1262  * @param fd: file descriptor of socket.
1263  * @param c: comm point to read from into buffer.
1264  * @param short_ok: if true, very short packets are OK (for comm_local).
1265  * @return: 0 on error
1266  */
1267 static int
1268 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
1269 {
1270 	ssize_t r;
1271 	log_assert(c->type == comm_tcp || c->type == comm_local);
1272 	if(c->ssl)
1273 		return ssl_handle_it(c);
1274 	if(!c->tcp_is_reading)
1275 		return 0;
1276 
1277 	log_assert(fd != -1);
1278 	if(c->tcp_byte_count < sizeof(uint16_t)) {
1279 		/* read length bytes */
1280 		r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count),
1281 			sizeof(uint16_t)-c->tcp_byte_count, 0);
1282 		if(r == 0)
1283 			return 0;
1284 		else if(r == -1) {
1285 #ifndef USE_WINSOCK
1286 			if(errno == EINTR || errno == EAGAIN)
1287 				return 1;
1288 #ifdef ECONNRESET
1289 			if(errno == ECONNRESET && verbosity < 2)
1290 				return 0; /* silence reset by peer */
1291 #endif
1292 			log_err_addr("read (in tcp s)", strerror(errno),
1293 				&c->repinfo.addr, c->repinfo.addrlen);
1294 #else /* USE_WINSOCK */
1295 			if(WSAGetLastError() == WSAECONNRESET)
1296 				return 0;
1297 			if(WSAGetLastError() == WSAEINPROGRESS)
1298 				return 1;
1299 			if(WSAGetLastError() == WSAEWOULDBLOCK) {
1300 				winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
1301 				return 1;
1302 			}
1303 			log_err_addr("read (in tcp s)",
1304 				wsa_strerror(WSAGetLastError()),
1305 				&c->repinfo.addr, c->repinfo.addrlen);
1306 #endif
1307 			return 0;
1308 		}
1309 		c->tcp_byte_count += r;
1310 		if(c->tcp_byte_count != sizeof(uint16_t))
1311 			return 1;
1312 		if(sldns_buffer_read_u16_at(c->buffer, 0) >
1313 			sldns_buffer_capacity(c->buffer)) {
1314 			verbose(VERB_QUERY, "tcp: dropped larger than buffer");
1315 			return 0;
1316 		}
1317 		sldns_buffer_set_limit(c->buffer,
1318 			sldns_buffer_read_u16_at(c->buffer, 0));
1319 		if(!short_ok &&
1320 			sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1321 			verbose(VERB_QUERY, "tcp: dropped bogus too short.");
1322 			return 0;
1323 		}
1324 		verbose(VERB_ALGO, "Reading tcp query of length %d",
1325 			(int)sldns_buffer_limit(c->buffer));
1326 	}
1327 
1328 	log_assert(sldns_buffer_remaining(c->buffer) > 0);
1329 	r = recv(fd, (void*)sldns_buffer_current(c->buffer),
1330 		sldns_buffer_remaining(c->buffer), 0);
1331 	if(r == 0) {
1332 		return 0;
1333 	} else if(r == -1) {
1334 #ifndef USE_WINSOCK
1335 		if(errno == EINTR || errno == EAGAIN)
1336 			return 1;
1337 		log_err_addr("read (in tcp r)", strerror(errno),
1338 			&c->repinfo.addr, c->repinfo.addrlen);
1339 #else /* USE_WINSOCK */
1340 		if(WSAGetLastError() == WSAECONNRESET)
1341 			return 0;
1342 		if(WSAGetLastError() == WSAEINPROGRESS)
1343 			return 1;
1344 		if(WSAGetLastError() == WSAEWOULDBLOCK) {
1345 			winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
1346 			return 1;
1347 		}
1348 		log_err_addr("read (in tcp r)",
1349 			wsa_strerror(WSAGetLastError()),
1350 			&c->repinfo.addr, c->repinfo.addrlen);
1351 #endif
1352 		return 0;
1353 	}
1354 	sldns_buffer_skip(c->buffer, r);
1355 	if(sldns_buffer_remaining(c->buffer) <= 0) {
1356 		tcp_callback_reader(c);
1357 	}
1358 	return 1;
1359 }
1360 
1361 /**
1362  * Handle tcp writing callback.
1363  * @param fd: file descriptor of socket.
1364  * @param c: comm point to write buffer out of.
1365  * @return: 0 on error
1366  */
1367 static int
1368 comm_point_tcp_handle_write(int fd, struct comm_point* c)
1369 {
1370 	ssize_t r;
1371 	log_assert(c->type == comm_tcp);
1372 	if(c->tcp_is_reading && !c->ssl)
1373 		return 0;
1374 	log_assert(fd != -1);
1375 	if(c->tcp_byte_count == 0 && c->tcp_check_nb_connect) {
1376 		/* check for pending error from nonblocking connect */
1377 		/* from Stevens, unix network programming, vol1, 3rd ed, p450*/
1378 		int error = 0;
1379 		socklen_t len = (socklen_t)sizeof(error);
1380 		if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error,
1381 			&len) < 0){
1382 #ifndef USE_WINSOCK
1383 			error = errno; /* on solaris errno is error */
1384 #else /* USE_WINSOCK */
1385 			error = WSAGetLastError();
1386 #endif
1387 		}
1388 #ifndef USE_WINSOCK
1389 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1390 		if(error == EINPROGRESS || error == EWOULDBLOCK)
1391 			return 1; /* try again later */
1392 		else
1393 #endif
1394 		if(error != 0 && verbosity < 2)
1395 			return 0; /* silence lots of chatter in the logs */
1396                 else if(error != 0) {
1397 			log_err_addr("tcp connect", strerror(error),
1398 				&c->repinfo.addr, c->repinfo.addrlen);
1399 #else /* USE_WINSOCK */
1400 		/* examine error */
1401 		if(error == WSAEINPROGRESS)
1402 			return 1;
1403 		else if(error == WSAEWOULDBLOCK) {
1404 			winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1405 			return 1;
1406 		} else if(error != 0 && verbosity < 2)
1407 			return 0;
1408 		else if(error != 0) {
1409 			log_err_addr("tcp connect", wsa_strerror(error),
1410 				&c->repinfo.addr, c->repinfo.addrlen);
1411 #endif /* USE_WINSOCK */
1412 			return 0;
1413 		}
1414 	}
1415 	if(c->ssl)
1416 		return ssl_handle_it(c);
1417 
1418 	if(c->tcp_byte_count < sizeof(uint16_t)) {
1419 		uint16_t len = htons(sldns_buffer_limit(c->buffer));
1420 #ifdef HAVE_WRITEV
1421 		struct iovec iov[2];
1422 		iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1423 		iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1424 		iov[1].iov_base = sldns_buffer_begin(c->buffer);
1425 		iov[1].iov_len = sldns_buffer_limit(c->buffer);
1426 		log_assert(iov[0].iov_len > 0);
1427 		log_assert(iov[1].iov_len > 0);
1428 		r = writev(fd, iov, 2);
1429 #else /* HAVE_WRITEV */
1430 		r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1431 			sizeof(uint16_t)-c->tcp_byte_count, 0);
1432 #endif /* HAVE_WRITEV */
1433 		if(r == -1) {
1434 #ifndef USE_WINSOCK
1435 #  ifdef EPIPE
1436                 	if(errno == EPIPE && verbosity < 2)
1437                         	return 0; /* silence 'broken pipe' */
1438   #endif
1439 			if(errno == EINTR || errno == EAGAIN)
1440 				return 1;
1441 #  ifdef HAVE_WRITEV
1442 			log_err_addr("tcp writev", strerror(errno),
1443 				&c->repinfo.addr, c->repinfo.addrlen);
1444 #  else /* HAVE_WRITEV */
1445 			log_err_addr("tcp send s", strerror(errno),
1446 				&c->repinfo.addr, c->repinfo.addrlen);
1447 #  endif /* HAVE_WRITEV */
1448 #else
1449 			if(WSAGetLastError() == WSAENOTCONN)
1450 				return 1;
1451 			if(WSAGetLastError() == WSAEINPROGRESS)
1452 				return 1;
1453 			if(WSAGetLastError() == WSAEWOULDBLOCK) {
1454 				winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1455 				return 1;
1456 			}
1457 			log_err_addr("tcp send s",
1458 				wsa_strerror(WSAGetLastError()),
1459 				&c->repinfo.addr, c->repinfo.addrlen);
1460 #endif
1461 			return 0;
1462 		}
1463 		c->tcp_byte_count += r;
1464 		if(c->tcp_byte_count < sizeof(uint16_t))
1465 			return 1;
1466 		sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1467 			sizeof(uint16_t));
1468 		if(sldns_buffer_remaining(c->buffer) == 0) {
1469 			tcp_callback_writer(c);
1470 			return 1;
1471 		}
1472 	}
1473 	log_assert(sldns_buffer_remaining(c->buffer) > 0);
1474 	r = send(fd, (void*)sldns_buffer_current(c->buffer),
1475 		sldns_buffer_remaining(c->buffer), 0);
1476 	if(r == -1) {
1477 #ifndef USE_WINSOCK
1478 		if(errno == EINTR || errno == EAGAIN)
1479 			return 1;
1480 		log_err_addr("tcp send r", strerror(errno),
1481 			&c->repinfo.addr, c->repinfo.addrlen);
1482 #else
1483 		if(WSAGetLastError() == WSAEINPROGRESS)
1484 			return 1;
1485 		if(WSAGetLastError() == WSAEWOULDBLOCK) {
1486 			winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1487 			return 1;
1488 		}
1489 		log_err_addr("tcp send r", wsa_strerror(WSAGetLastError()),
1490 			&c->repinfo.addr, c->repinfo.addrlen);
1491 #endif
1492 		return 0;
1493 	}
1494 	sldns_buffer_skip(c->buffer, r);
1495 
1496 	if(sldns_buffer_remaining(c->buffer) == 0) {
1497 		tcp_callback_writer(c);
1498 	}
1499 
1500 	return 1;
1501 }
1502 
1503 void
1504 comm_point_tcp_handle_callback(int fd, short event, void* arg)
1505 {
1506 	struct comm_point* c = (struct comm_point*)arg;
1507 	log_assert(c->type == comm_tcp);
1508 	comm_base_now(c->ev->base);
1509 
1510 	if(event&EV_READ) {
1511 		if(!comm_point_tcp_handle_read(fd, c, 0)) {
1512 			reclaim_tcp_handler(c);
1513 			if(!c->tcp_do_close) {
1514 				fptr_ok(fptr_whitelist_comm_point(
1515 					c->callback));
1516 				(void)(*c->callback)(c, c->cb_arg,
1517 					NETEVENT_CLOSED, NULL);
1518 			}
1519 		}
1520 		return;
1521 	}
1522 	if(event&EV_WRITE) {
1523 		if(!comm_point_tcp_handle_write(fd, c)) {
1524 			reclaim_tcp_handler(c);
1525 			if(!c->tcp_do_close) {
1526 				fptr_ok(fptr_whitelist_comm_point(
1527 					c->callback));
1528 				(void)(*c->callback)(c, c->cb_arg,
1529 					NETEVENT_CLOSED, NULL);
1530 			}
1531 		}
1532 		return;
1533 	}
1534 	if(event&EV_TIMEOUT) {
1535 		verbose(VERB_QUERY, "tcp took too long, dropped");
1536 		reclaim_tcp_handler(c);
1537 		if(!c->tcp_do_close) {
1538 			fptr_ok(fptr_whitelist_comm_point(c->callback));
1539 			(void)(*c->callback)(c, c->cb_arg,
1540 				NETEVENT_TIMEOUT, NULL);
1541 		}
1542 		return;
1543 	}
1544 	log_err("Ignored event %d for tcphdl.", event);
1545 }
1546 
1547 void comm_point_local_handle_callback(int fd, short event, void* arg)
1548 {
1549 	struct comm_point* c = (struct comm_point*)arg;
1550 	log_assert(c->type == comm_local);
1551 	comm_base_now(c->ev->base);
1552 
1553 	if(event&EV_READ) {
1554 		if(!comm_point_tcp_handle_read(fd, c, 1)) {
1555 			fptr_ok(fptr_whitelist_comm_point(c->callback));
1556 			(void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED,
1557 				NULL);
1558 		}
1559 		return;
1560 	}
1561 	log_err("Ignored event %d for localhdl.", event);
1562 }
1563 
1564 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd),
1565 	short event, void* arg)
1566 {
1567 	struct comm_point* c = (struct comm_point*)arg;
1568 	int err = NETEVENT_NOERROR;
1569 	log_assert(c->type == comm_raw);
1570 	comm_base_now(c->ev->base);
1571 
1572 	if(event&EV_TIMEOUT)
1573 		err = NETEVENT_TIMEOUT;
1574 	fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
1575 	(void)(*c->callback)(c, c->cb_arg, err, NULL);
1576 }
1577 
1578 struct comm_point*
1579 comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer,
1580 	comm_point_callback_t* callback, void* callback_arg)
1581 {
1582 	struct comm_point* c = (struct comm_point*)calloc(1,
1583 		sizeof(struct comm_point));
1584 	short evbits;
1585 	if(!c)
1586 		return NULL;
1587 	c->ev = (struct internal_event*)calloc(1,
1588 		sizeof(struct internal_event));
1589 	if(!c->ev) {
1590 		free(c);
1591 		return NULL;
1592 	}
1593 	c->ev->base = base;
1594 	c->fd = fd;
1595 	c->buffer = buffer;
1596 	c->timeout = NULL;
1597 	c->tcp_is_reading = 0;
1598 	c->tcp_byte_count = 0;
1599 	c->tcp_parent = NULL;
1600 	c->max_tcp_count = 0;
1601 	c->cur_tcp_count = 0;
1602 	c->tcp_handlers = NULL;
1603 	c->tcp_free = NULL;
1604 	c->type = comm_udp;
1605 	c->tcp_do_close = 0;
1606 	c->do_not_close = 0;
1607 	c->tcp_do_toggle_rw = 0;
1608 	c->tcp_check_nb_connect = 0;
1609 	c->inuse = 0;
1610 	c->callback = callback;
1611 	c->cb_arg = callback_arg;
1612 	evbits = EV_READ | EV_PERSIST;
1613 	/* libevent stuff */
1614 	event_set(&c->ev->ev, c->fd, evbits, comm_point_udp_callback, c);
1615 	if(event_base_set(base->eb->base, &c->ev->ev) != 0) {
1616 		log_err("could not baseset udp event");
1617 		comm_point_delete(c);
1618 		return NULL;
1619 	}
1620 	if(fd!=-1 && event_add(&c->ev->ev, c->timeout) != 0 ) {
1621 		log_err("could not add udp event");
1622 		comm_point_delete(c);
1623 		return NULL;
1624 	}
1625 	return c;
1626 }
1627 
1628 struct comm_point*
1629 comm_point_create_udp_ancil(struct comm_base *base, int fd,
1630 	sldns_buffer* buffer,
1631 	comm_point_callback_t* callback, void* callback_arg)
1632 {
1633 	struct comm_point* c = (struct comm_point*)calloc(1,
1634 		sizeof(struct comm_point));
1635 	short evbits;
1636 	if(!c)
1637 		return NULL;
1638 	c->ev = (struct internal_event*)calloc(1,
1639 		sizeof(struct internal_event));
1640 	if(!c->ev) {
1641 		free(c);
1642 		return NULL;
1643 	}
1644 	c->ev->base = base;
1645 	c->fd = fd;
1646 	c->buffer = buffer;
1647 	c->timeout = NULL;
1648 	c->tcp_is_reading = 0;
1649 	c->tcp_byte_count = 0;
1650 	c->tcp_parent = NULL;
1651 	c->max_tcp_count = 0;
1652 	c->cur_tcp_count = 0;
1653 	c->tcp_handlers = NULL;
1654 	c->tcp_free = NULL;
1655 	c->type = comm_udp;
1656 	c->tcp_do_close = 0;
1657 	c->do_not_close = 0;
1658 	c->inuse = 0;
1659 	c->tcp_do_toggle_rw = 0;
1660 	c->tcp_check_nb_connect = 0;
1661 	c->callback = callback;
1662 	c->cb_arg = callback_arg;
1663 	evbits = EV_READ | EV_PERSIST;
1664 	/* libevent stuff */
1665 	event_set(&c->ev->ev, c->fd, evbits, comm_point_udp_ancil_callback, c);
1666 	if(event_base_set(base->eb->base, &c->ev->ev) != 0) {
1667 		log_err("could not baseset udp event");
1668 		comm_point_delete(c);
1669 		return NULL;
1670 	}
1671 	if(fd!=-1 && event_add(&c->ev->ev, c->timeout) != 0 ) {
1672 		log_err("could not add udp event");
1673 		comm_point_delete(c);
1674 		return NULL;
1675 	}
1676 	return c;
1677 }
1678 
1679 static struct comm_point*
1680 comm_point_create_tcp_handler(struct comm_base *base,
1681 	struct comm_point* parent, size_t bufsize,
1682         comm_point_callback_t* callback, void* callback_arg)
1683 {
1684 	struct comm_point* c = (struct comm_point*)calloc(1,
1685 		sizeof(struct comm_point));
1686 	short evbits;
1687 	if(!c)
1688 		return NULL;
1689 	c->ev = (struct internal_event*)calloc(1,
1690 		sizeof(struct internal_event));
1691 	if(!c->ev) {
1692 		free(c);
1693 		return NULL;
1694 	}
1695 	c->ev->base = base;
1696 	c->fd = -1;
1697 	c->buffer = sldns_buffer_new(bufsize);
1698 	if(!c->buffer) {
1699 		free(c->ev);
1700 		free(c);
1701 		return NULL;
1702 	}
1703 	c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
1704 	if(!c->timeout) {
1705 		sldns_buffer_free(c->buffer);
1706 		free(c->ev);
1707 		free(c);
1708 		return NULL;
1709 	}
1710 	c->tcp_is_reading = 0;
1711 	c->tcp_byte_count = 0;
1712 	c->tcp_parent = parent;
1713 	c->max_tcp_count = 0;
1714 	c->cur_tcp_count = 0;
1715 	c->tcp_handlers = NULL;
1716 	c->tcp_free = NULL;
1717 	c->type = comm_tcp;
1718 	c->tcp_do_close = 0;
1719 	c->do_not_close = 0;
1720 	c->tcp_do_toggle_rw = 1;
1721 	c->tcp_check_nb_connect = 0;
1722 	c->repinfo.c = c;
1723 	c->callback = callback;
1724 	c->cb_arg = callback_arg;
1725 	/* add to parent free list */
1726 	c->tcp_free = parent->tcp_free;
1727 	parent->tcp_free = c;
1728 	/* libevent stuff */
1729 	evbits = EV_PERSIST | EV_READ | EV_TIMEOUT;
1730 	event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_handle_callback, c);
1731 	if(event_base_set(base->eb->base, &c->ev->ev) != 0)
1732 	{
1733 		log_err("could not basetset tcphdl event");
1734 		parent->tcp_free = c->tcp_free;
1735 		free(c->ev);
1736 		free(c);
1737 		return NULL;
1738 	}
1739 	return c;
1740 }
1741 
1742 struct comm_point*
1743 comm_point_create_tcp(struct comm_base *base, int fd, int num, size_t bufsize,
1744         comm_point_callback_t* callback, void* callback_arg)
1745 {
1746 	struct comm_point* c = (struct comm_point*)calloc(1,
1747 		sizeof(struct comm_point));
1748 	short evbits;
1749 	int i;
1750 	/* first allocate the TCP accept listener */
1751 	if(!c)
1752 		return NULL;
1753 	c->ev = (struct internal_event*)calloc(1,
1754 		sizeof(struct internal_event));
1755 	if(!c->ev) {
1756 		free(c);
1757 		return NULL;
1758 	}
1759 	c->ev->base = base;
1760 	c->fd = fd;
1761 	c->buffer = NULL;
1762 	c->timeout = NULL;
1763 	c->tcp_is_reading = 0;
1764 	c->tcp_byte_count = 0;
1765 	c->tcp_parent = NULL;
1766 	c->max_tcp_count = num;
1767 	c->cur_tcp_count = 0;
1768 	c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
1769 		sizeof(struct comm_point*));
1770 	if(!c->tcp_handlers) {
1771 		free(c->ev);
1772 		free(c);
1773 		return NULL;
1774 	}
1775 	c->tcp_free = NULL;
1776 	c->type = comm_tcp_accept;
1777 	c->tcp_do_close = 0;
1778 	c->do_not_close = 0;
1779 	c->tcp_do_toggle_rw = 0;
1780 	c->tcp_check_nb_connect = 0;
1781 	c->callback = NULL;
1782 	c->cb_arg = NULL;
1783 	evbits = EV_READ | EV_PERSIST;
1784 	/* libevent stuff */
1785 	event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_accept_callback, c);
1786 	if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1787 		event_add(&c->ev->ev, c->timeout) != 0 )
1788 	{
1789 		log_err("could not add tcpacc event");
1790 		comm_point_delete(c);
1791 		return NULL;
1792 	}
1793 
1794 	/* now prealloc the tcp handlers */
1795 	for(i=0; i<num; i++) {
1796 		c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
1797 			c, bufsize, callback, callback_arg);
1798 		if(!c->tcp_handlers[i]) {
1799 			comm_point_delete(c);
1800 			return NULL;
1801 		}
1802 	}
1803 
1804 	return c;
1805 }
1806 
1807 struct comm_point*
1808 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
1809         comm_point_callback_t* callback, void* callback_arg)
1810 {
1811 	struct comm_point* c = (struct comm_point*)calloc(1,
1812 		sizeof(struct comm_point));
1813 	short evbits;
1814 	if(!c)
1815 		return NULL;
1816 	c->ev = (struct internal_event*)calloc(1,
1817 		sizeof(struct internal_event));
1818 	if(!c->ev) {
1819 		free(c);
1820 		return NULL;
1821 	}
1822 	c->ev->base = base;
1823 	c->fd = -1;
1824 	c->buffer = sldns_buffer_new(bufsize);
1825 	if(!c->buffer) {
1826 		free(c->ev);
1827 		free(c);
1828 		return NULL;
1829 	}
1830 	c->timeout = NULL;
1831 	c->tcp_is_reading = 0;
1832 	c->tcp_byte_count = 0;
1833 	c->tcp_parent = NULL;
1834 	c->max_tcp_count = 0;
1835 	c->cur_tcp_count = 0;
1836 	c->tcp_handlers = NULL;
1837 	c->tcp_free = NULL;
1838 	c->type = comm_tcp;
1839 	c->tcp_do_close = 0;
1840 	c->do_not_close = 0;
1841 	c->tcp_do_toggle_rw = 1;
1842 	c->tcp_check_nb_connect = 1;
1843 	c->repinfo.c = c;
1844 	c->callback = callback;
1845 	c->cb_arg = callback_arg;
1846 	evbits = EV_PERSIST | EV_WRITE;
1847 	event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_handle_callback, c);
1848 	if(event_base_set(base->eb->base, &c->ev->ev) != 0)
1849 	{
1850 		log_err("could not basetset tcpout event");
1851 		sldns_buffer_free(c->buffer);
1852 		free(c->ev);
1853 		free(c);
1854 		return NULL;
1855 	}
1856 
1857 	return c;
1858 }
1859 
1860 struct comm_point*
1861 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
1862         comm_point_callback_t* callback, void* callback_arg)
1863 {
1864 	struct comm_point* c = (struct comm_point*)calloc(1,
1865 		sizeof(struct comm_point));
1866 	short evbits;
1867 	if(!c)
1868 		return NULL;
1869 	c->ev = (struct internal_event*)calloc(1,
1870 		sizeof(struct internal_event));
1871 	if(!c->ev) {
1872 		free(c);
1873 		return NULL;
1874 	}
1875 	c->ev->base = base;
1876 	c->fd = fd;
1877 	c->buffer = sldns_buffer_new(bufsize);
1878 	if(!c->buffer) {
1879 		free(c->ev);
1880 		free(c);
1881 		return NULL;
1882 	}
1883 	c->timeout = NULL;
1884 	c->tcp_is_reading = 1;
1885 	c->tcp_byte_count = 0;
1886 	c->tcp_parent = NULL;
1887 	c->max_tcp_count = 0;
1888 	c->cur_tcp_count = 0;
1889 	c->tcp_handlers = NULL;
1890 	c->tcp_free = NULL;
1891 	c->type = comm_local;
1892 	c->tcp_do_close = 0;
1893 	c->do_not_close = 1;
1894 	c->tcp_do_toggle_rw = 0;
1895 	c->tcp_check_nb_connect = 0;
1896 	c->callback = callback;
1897 	c->cb_arg = callback_arg;
1898 	/* libevent stuff */
1899 	evbits = EV_PERSIST | EV_READ;
1900 	event_set(&c->ev->ev, c->fd, evbits, comm_point_local_handle_callback,
1901 		c);
1902 	if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1903 		event_add(&c->ev->ev, c->timeout) != 0 )
1904 	{
1905 		log_err("could not add localhdl event");
1906 		free(c->ev);
1907 		free(c);
1908 		return NULL;
1909 	}
1910 	return c;
1911 }
1912 
1913 struct comm_point*
1914 comm_point_create_raw(struct comm_base* base, int fd, int writing,
1915 	comm_point_callback_t* callback, void* callback_arg)
1916 {
1917 	struct comm_point* c = (struct comm_point*)calloc(1,
1918 		sizeof(struct comm_point));
1919 	short evbits;
1920 	if(!c)
1921 		return NULL;
1922 	c->ev = (struct internal_event*)calloc(1,
1923 		sizeof(struct internal_event));
1924 	if(!c->ev) {
1925 		free(c);
1926 		return NULL;
1927 	}
1928 	c->ev->base = base;
1929 	c->fd = fd;
1930 	c->buffer = NULL;
1931 	c->timeout = NULL;
1932 	c->tcp_is_reading = 0;
1933 	c->tcp_byte_count = 0;
1934 	c->tcp_parent = NULL;
1935 	c->max_tcp_count = 0;
1936 	c->cur_tcp_count = 0;
1937 	c->tcp_handlers = NULL;
1938 	c->tcp_free = NULL;
1939 	c->type = comm_raw;
1940 	c->tcp_do_close = 0;
1941 	c->do_not_close = 1;
1942 	c->tcp_do_toggle_rw = 0;
1943 	c->tcp_check_nb_connect = 0;
1944 	c->callback = callback;
1945 	c->cb_arg = callback_arg;
1946 	/* libevent stuff */
1947 	if(writing)
1948 		evbits = EV_PERSIST | EV_WRITE;
1949 	else 	evbits = EV_PERSIST | EV_READ;
1950 	event_set(&c->ev->ev, c->fd, evbits, comm_point_raw_handle_callback,
1951 		c);
1952 	if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1953 		event_add(&c->ev->ev, c->timeout) != 0 )
1954 	{
1955 		log_err("could not add rawhdl event");
1956 		free(c->ev);
1957 		free(c);
1958 		return NULL;
1959 	}
1960 	return c;
1961 }
1962 
1963 void
1964 comm_point_close(struct comm_point* c)
1965 {
1966 	if(!c)
1967 		return;
1968 	if(c->fd != -1)
1969 		if(event_del(&c->ev->ev) != 0) {
1970 			log_err("could not event_del on close");
1971 		}
1972 	/* close fd after removing from event lists, or epoll.. is messed up */
1973 	if(c->fd != -1 && !c->do_not_close) {
1974 		verbose(VERB_ALGO, "close fd %d", c->fd);
1975 #ifndef USE_WINSOCK
1976 		close(c->fd);
1977 #else
1978 		closesocket(c->fd);
1979 #endif
1980 	}
1981 	c->fd = -1;
1982 }
1983 
1984 void
1985 comm_point_delete(struct comm_point* c)
1986 {
1987 	if(!c)
1988 		return;
1989 	if(c->type == comm_tcp && c->ssl) {
1990 #ifdef HAVE_SSL
1991 		SSL_shutdown(c->ssl);
1992 		SSL_free(c->ssl);
1993 #endif
1994 	}
1995 	comm_point_close(c);
1996 	if(c->tcp_handlers) {
1997 		int i;
1998 		for(i=0; i<c->max_tcp_count; i++)
1999 			comm_point_delete(c->tcp_handlers[i]);
2000 		free(c->tcp_handlers);
2001 	}
2002 	free(c->timeout);
2003 	if(c->type == comm_tcp || c->type == comm_local)
2004 		sldns_buffer_free(c->buffer);
2005 	free(c->ev);
2006 	free(c);
2007 }
2008 
2009 void
2010 comm_point_send_reply(struct comm_reply *repinfo)
2011 {
2012 	log_assert(repinfo && repinfo->c);
2013 	if(repinfo->c->type == comm_udp) {
2014 		if(repinfo->srctype)
2015 			comm_point_send_udp_msg_if(repinfo->c,
2016 			repinfo->c->buffer, (struct sockaddr*)&repinfo->addr,
2017 			repinfo->addrlen, repinfo);
2018 		else
2019 			comm_point_send_udp_msg(repinfo->c, repinfo->c->buffer,
2020 			(struct sockaddr*)&repinfo->addr, repinfo->addrlen);
2021 #ifdef USE_DNSTAP
2022 		if(repinfo->c->dtenv != NULL &&
2023 		   repinfo->c->dtenv->log_client_response_messages)
2024 			dt_msg_send_client_response(repinfo->c->dtenv,
2025 			&repinfo->addr, repinfo->c->type, repinfo->c->buffer);
2026 #endif
2027 	} else {
2028 #ifdef USE_DNSTAP
2029 		if(repinfo->c->tcp_parent->dtenv != NULL &&
2030 		   repinfo->c->tcp_parent->dtenv->log_client_response_messages)
2031 			dt_msg_send_client_response(repinfo->c->tcp_parent->dtenv,
2032 			&repinfo->addr, repinfo->c->type, repinfo->c->buffer);
2033 #endif
2034 		comm_point_start_listening(repinfo->c, -1, TCP_QUERY_TIMEOUT);
2035 	}
2036 }
2037 
2038 void
2039 comm_point_drop_reply(struct comm_reply* repinfo)
2040 {
2041 	if(!repinfo)
2042 		return;
2043 	log_assert(repinfo && repinfo->c);
2044 	log_assert(repinfo->c->type != comm_tcp_accept);
2045 	if(repinfo->c->type == comm_udp)
2046 		return;
2047 	reclaim_tcp_handler(repinfo->c);
2048 }
2049 
2050 void
2051 comm_point_stop_listening(struct comm_point* c)
2052 {
2053 	verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
2054 	if(event_del(&c->ev->ev) != 0) {
2055 		log_err("event_del error to stoplisten");
2056 	}
2057 }
2058 
2059 void
2060 comm_point_start_listening(struct comm_point* c, int newfd, int sec)
2061 {
2062 	verbose(VERB_ALGO, "comm point start listening %d",
2063 		c->fd==-1?newfd:c->fd);
2064 	if(c->type == comm_tcp_accept && !c->tcp_free) {
2065 		/* no use to start listening no free slots. */
2066 		return;
2067 	}
2068 	if(sec != -1 && sec != 0) {
2069 		if(!c->timeout) {
2070 			c->timeout = (struct timeval*)malloc(sizeof(
2071 				struct timeval));
2072 			if(!c->timeout) {
2073 				log_err("cpsl: malloc failed. No net read.");
2074 				return;
2075 			}
2076 		}
2077 		c->ev->ev.ev_events |= EV_TIMEOUT;
2078 #ifndef S_SPLINT_S /* splint fails on struct timeval. */
2079 		c->timeout->tv_sec = sec;
2080 		c->timeout->tv_usec = 0;
2081 #endif /* S_SPLINT_S */
2082 	}
2083 	if(c->type == comm_tcp) {
2084 		c->ev->ev.ev_events &= ~(EV_READ|EV_WRITE);
2085 		if(c->tcp_is_reading)
2086 			c->ev->ev.ev_events |= EV_READ;
2087 		else	c->ev->ev.ev_events |= EV_WRITE;
2088 	}
2089 	if(newfd != -1) {
2090 		if(c->fd != -1) {
2091 #ifndef USE_WINSOCK
2092 			close(c->fd);
2093 #else
2094 			closesocket(c->fd);
2095 #endif
2096 		}
2097 		c->fd = newfd;
2098 		c->ev->ev.ev_fd = c->fd;
2099 	}
2100 	if(event_add(&c->ev->ev, sec==0?NULL:c->timeout) != 0) {
2101 		log_err("event_add failed. in cpsl.");
2102 	}
2103 }
2104 
2105 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
2106 {
2107 	verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
2108 	if(event_del(&c->ev->ev) != 0) {
2109 		log_err("event_del error to cplf");
2110 	}
2111 	c->ev->ev.ev_events &= ~(EV_READ|EV_WRITE);
2112 	if(rd) c->ev->ev.ev_events |= EV_READ;
2113 	if(wr) c->ev->ev.ev_events |= EV_WRITE;
2114 	if(event_add(&c->ev->ev, c->timeout) != 0) {
2115 		log_err("event_add failed. in cplf.");
2116 	}
2117 }
2118 
2119 size_t comm_point_get_mem(struct comm_point* c)
2120 {
2121 	size_t s;
2122 	if(!c)
2123 		return 0;
2124 	s = sizeof(*c) + sizeof(*c->ev);
2125 	if(c->timeout)
2126 		s += sizeof(*c->timeout);
2127 	if(c->type == comm_tcp || c->type == comm_local)
2128 		s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer);
2129 	if(c->type == comm_tcp_accept) {
2130 		int i;
2131 		for(i=0; i<c->max_tcp_count; i++)
2132 			s += comm_point_get_mem(c->tcp_handlers[i]);
2133 	}
2134 	return s;
2135 }
2136 
2137 struct comm_timer*
2138 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
2139 {
2140 	struct comm_timer *tm = (struct comm_timer*)calloc(1,
2141 		sizeof(struct comm_timer));
2142 	if(!tm)
2143 		return NULL;
2144 	tm->ev_timer = (struct internal_timer*)calloc(1,
2145 		sizeof(struct internal_timer));
2146 	if(!tm->ev_timer) {
2147 		log_err("malloc failed");
2148 		free(tm);
2149 		return NULL;
2150 	}
2151 	tm->ev_timer->base = base;
2152 	tm->callback = cb;
2153 	tm->cb_arg = cb_arg;
2154 	event_set(&tm->ev_timer->ev, -1, EV_TIMEOUT,
2155 		comm_timer_callback, tm);
2156 	if(event_base_set(base->eb->base, &tm->ev_timer->ev) != 0) {
2157 		log_err("timer_create: event_base_set failed.");
2158 		free(tm->ev_timer);
2159 		free(tm);
2160 		return NULL;
2161 	}
2162 	return tm;
2163 }
2164 
2165 void
2166 comm_timer_disable(struct comm_timer* timer)
2167 {
2168 	if(!timer)
2169 		return;
2170 	evtimer_del(&timer->ev_timer->ev);
2171 	timer->ev_timer->enabled = 0;
2172 }
2173 
2174 void
2175 comm_timer_set(struct comm_timer* timer, struct timeval* tv)
2176 {
2177 	log_assert(tv);
2178 	if(timer->ev_timer->enabled)
2179 		comm_timer_disable(timer);
2180 	event_set(&timer->ev_timer->ev, -1, EV_TIMEOUT,
2181 		comm_timer_callback, timer);
2182 	if(event_base_set(timer->ev_timer->base->eb->base,
2183 		&timer->ev_timer->ev) != 0)
2184 		log_err("comm_timer_set: set_base failed.");
2185 	if(evtimer_add(&timer->ev_timer->ev, tv) != 0)
2186 		log_err("comm_timer_set: evtimer_add failed.");
2187 	timer->ev_timer->enabled = 1;
2188 }
2189 
2190 void
2191 comm_timer_delete(struct comm_timer* timer)
2192 {
2193 	if(!timer)
2194 		return;
2195 	comm_timer_disable(timer);
2196 	free(timer->ev_timer);
2197 	free(timer);
2198 }
2199 
2200 void
2201 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
2202 {
2203 	struct comm_timer* tm = (struct comm_timer*)arg;
2204 	if(!(event&EV_TIMEOUT))
2205 		return;
2206 	comm_base_now(tm->ev_timer->base);
2207 	tm->ev_timer->enabled = 0;
2208 	fptr_ok(fptr_whitelist_comm_timer(tm->callback));
2209 	(*tm->callback)(tm->cb_arg);
2210 }
2211 
2212 int
2213 comm_timer_is_set(struct comm_timer* timer)
2214 {
2215 	return (int)timer->ev_timer->enabled;
2216 }
2217 
2218 size_t
2219 comm_timer_get_mem(struct comm_timer* timer)
2220 {
2221 	return sizeof(*timer) + sizeof(struct internal_timer);
2222 }
2223 
2224 struct comm_signal*
2225 comm_signal_create(struct comm_base* base,
2226         void (*callback)(int, void*), void* cb_arg)
2227 {
2228 	struct comm_signal* com = (struct comm_signal*)malloc(
2229 		sizeof(struct comm_signal));
2230 	if(!com) {
2231 		log_err("malloc failed");
2232 		return NULL;
2233 	}
2234 	com->base = base;
2235 	com->callback = callback;
2236 	com->cb_arg = cb_arg;
2237 	com->ev_signal = NULL;
2238 	return com;
2239 }
2240 
2241 void
2242 comm_signal_callback(int sig, short event, void* arg)
2243 {
2244 	struct comm_signal* comsig = (struct comm_signal*)arg;
2245 	if(!(event & EV_SIGNAL))
2246 		return;
2247 	comm_base_now(comsig->base);
2248 	fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
2249 	(*comsig->callback)(sig, comsig->cb_arg);
2250 }
2251 
2252 int
2253 comm_signal_bind(struct comm_signal* comsig, int sig)
2254 {
2255 	struct internal_signal* entry = (struct internal_signal*)calloc(1,
2256 		sizeof(struct internal_signal));
2257 	if(!entry) {
2258 		log_err("malloc failed");
2259 		return 0;
2260 	}
2261 	log_assert(comsig);
2262 	/* add signal event */
2263 	signal_set(&entry->ev, sig, comm_signal_callback, comsig);
2264 	if(event_base_set(comsig->base->eb->base, &entry->ev) != 0) {
2265 		log_err("Could not set signal base");
2266 		free(entry);
2267 		return 0;
2268 	}
2269 	if(signal_add(&entry->ev, NULL) != 0) {
2270 		log_err("Could not add signal handler");
2271 		free(entry);
2272 		return 0;
2273 	}
2274 	/* link into list */
2275 	entry->next = comsig->ev_signal;
2276 	comsig->ev_signal = entry;
2277 	return 1;
2278 }
2279 
2280 void
2281 comm_signal_delete(struct comm_signal* comsig)
2282 {
2283 	struct internal_signal* p, *np;
2284 	if(!comsig)
2285 		return;
2286 	p=comsig->ev_signal;
2287 	while(p) {
2288 		np = p->next;
2289 		signal_del(&p->ev);
2290 		free(p);
2291 		p = np;
2292 	}
2293 	free(comsig);
2294 }
2295