xref: /freebsd/contrib/unbound/util/netevent.c (revision 313376588638950ba1e93c403dd8c97bc52fd3a2)
1 /*
2  * util/netevent.c - event notification
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  *
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  *
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /**
37  * \file
38  *
39  * This file contains event notification functions.
40  */
41 #include "config.h"
42 #include "util/netevent.h"
43 #include "util/log.h"
44 #include "util/net_help.h"
45 #include "util/fptr_wlist.h"
46 #include "ldns/pkthdr.h"
47 #include "ldns/sbuffer.h"
48 #ifdef HAVE_OPENSSL_SSL_H
49 #include <openssl/ssl.h>
50 #endif
51 #ifdef HAVE_OPENSSL_ERR_H
52 #include <openssl/err.h>
53 #endif
54 
55 /* -------- Start of local definitions -------- */
56 /** if CMSG_ALIGN is not defined on this platform, a workaround */
57 #ifndef CMSG_ALIGN
58 #  ifdef _CMSG_DATA_ALIGN
59 #    define CMSG_ALIGN _CMSG_DATA_ALIGN
60 #  else
61 #    define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
62 #  endif
63 #endif
64 
65 /** if CMSG_LEN is not defined on this platform, a workaround */
66 #ifndef CMSG_LEN
67 #  define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
68 #endif
69 
70 /** if CMSG_SPACE is not defined on this platform, a workaround */
71 #ifndef CMSG_SPACE
72 #  ifdef _CMSG_HDR_ALIGN
73 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
74 #  else
75 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
76 #  endif
77 #endif
78 
79 /** The TCP reading or writing query timeout in seconds */
80 #define TCP_QUERY_TIMEOUT 120
81 
82 #ifndef NONBLOCKING_IS_BROKEN
83 /** number of UDP reads to perform per read indication from select */
84 #define NUM_UDP_PER_SELECT 100
85 #else
86 #define NUM_UDP_PER_SELECT 1
87 #endif
88 
89 /* We define libevent structures here to hide the libevent stuff. */
90 
91 #ifdef USE_MINI_EVENT
92 #  ifdef USE_WINSOCK
93 #    include "util/winsock_event.h"
94 #  else
95 #    include "util/mini_event.h"
96 #  endif /* USE_WINSOCK */
97 #else /* USE_MINI_EVENT */
98    /* we use libevent */
99 #  ifdef HAVE_EVENT_H
100 #    include <event.h>
101 #  else
102 #    include "event2/event.h"
103 #    include "event2/event_struct.h"
104 #    include "event2/event_compat.h"
105 #  endif
106 #endif /* USE_MINI_EVENT */
107 
108 /**
109  * The internal event structure for keeping libevent info for the event.
110  * Possibly other structures (list, tree) this is part of.
111  */
112 struct internal_event {
113 	/** the comm base */
114 	struct comm_base* base;
115 	/** libevent event type, alloced here */
116 	struct event ev;
117 };
118 
119 /**
120  * Internal base structure, so that every thread has its own events.
121  */
122 struct internal_base {
123 	/** libevent event_base type. */
124 	struct event_base* base;
125 	/** seconds time pointer points here */
126 	time_t secs;
127 	/** timeval with current time */
128 	struct timeval now;
129 	/** the event used for slow_accept timeouts */
130 	struct event slow_accept;
131 	/** true if slow_accept is enabled */
132 	int slow_accept_enabled;
133 };
134 
135 /**
136  * Internal timer structure, to store timer event in.
137  */
138 struct internal_timer {
139 	/** the comm base */
140 	struct comm_base* base;
141 	/** libevent event type, alloced here */
142 	struct event ev;
143 	/** is timer enabled */
144 	uint8_t enabled;
145 };
146 
147 /**
148  * Internal signal structure, to store signal event in.
149  */
150 struct internal_signal {
151 	/** libevent event type, alloced here */
152 	struct event ev;
153 	/** next in signal list */
154 	struct internal_signal* next;
155 };
156 
157 /** create a tcp handler with a parent */
158 static struct comm_point* comm_point_create_tcp_handler(
159 	struct comm_base *base, struct comm_point* parent, size_t bufsize,
160         comm_point_callback_t* callback, void* callback_arg);
161 
162 /* -------- End of local definitions -------- */
163 
164 #ifdef USE_MINI_EVENT
165 /** minievent updates the time when it blocks. */
166 #define comm_base_now(x) /* nothing to do */
167 #else /* !USE_MINI_EVENT */
168 /** fillup the time values in the event base */
169 static void
170 comm_base_now(struct comm_base* b)
171 {
172 	if(gettimeofday(&b->eb->now, NULL) < 0) {
173 		log_err("gettimeofday: %s", strerror(errno));
174 	}
175 	b->eb->secs = (time_t)b->eb->now.tv_sec;
176 }
177 #endif /* USE_MINI_EVENT */
178 
179 struct comm_base*
180 comm_base_create(int sigs)
181 {
182 	struct comm_base* b = (struct comm_base*)calloc(1,
183 		sizeof(struct comm_base));
184 	if(!b)
185 		return NULL;
186 	b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
187 	if(!b->eb) {
188 		free(b);
189 		return NULL;
190 	}
191 #ifdef USE_MINI_EVENT
192 	(void)sigs;
193 	/* use mini event time-sharing feature */
194 	b->eb->base = event_init(&b->eb->secs, &b->eb->now);
195 #else
196 #  if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
197 	/* libev */
198 	if(sigs)
199 		b->eb->base=(struct event_base *)ev_default_loop(EVFLAG_AUTO);
200 	else
201 		b->eb->base=(struct event_base *)ev_loop_new(EVFLAG_AUTO);
202 #  else
203 	(void)sigs;
204 #    ifdef HAVE_EVENT_BASE_NEW
205 	b->eb->base = event_base_new();
206 #    else
207 	b->eb->base = event_init();
208 #    endif
209 #  endif
210 #endif
211 	if(!b->eb->base) {
212 		free(b->eb);
213 		free(b);
214 		return NULL;
215 	}
216 	comm_base_now(b);
217 	/* avoid event_get_method call which causes crashes even when
218 	 * not printing, because its result is passed */
219 	verbose(VERB_ALGO,
220 #if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
221 		"libev"
222 #elif defined(USE_MINI_EVENT)
223 		"event "
224 #else
225 		"libevent "
226 #endif
227 		"%s uses %s method.",
228 		event_get_version(),
229 #ifdef HAVE_EVENT_BASE_GET_METHOD
230 		event_base_get_method(b->eb->base)
231 #else
232 		"not_obtainable"
233 #endif
234 	);
235 	return b;
236 }
237 
238 struct comm_base*
239 comm_base_create_event(struct event_base* base)
240 {
241 	struct comm_base* b = (struct comm_base*)calloc(1,
242 		sizeof(struct comm_base));
243 	if(!b)
244 		return NULL;
245 	b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
246 	if(!b->eb) {
247 		free(b);
248 		return NULL;
249 	}
250 	b->eb->base = base;
251 	comm_base_now(b);
252 	return b;
253 }
254 
255 void
256 comm_base_delete(struct comm_base* b)
257 {
258 	if(!b)
259 		return;
260 	if(b->eb->slow_accept_enabled) {
261 		if(event_del(&b->eb->slow_accept) != 0) {
262 			log_err("could not event_del slow_accept");
263 		}
264 	}
265 #ifdef USE_MINI_EVENT
266 	event_base_free(b->eb->base);
267 #elif defined(HAVE_EVENT_BASE_FREE) && defined(HAVE_EVENT_BASE_ONCE)
268 	/* only libevent 1.2+ has it, but in 1.2 it is broken -
269 	   assertion fails on signal handling ev that is not deleted
270  	   in libevent 1.3c (event_base_once appears) this is fixed. */
271 	event_base_free(b->eb->base);
272 #endif /* HAVE_EVENT_BASE_FREE and HAVE_EVENT_BASE_ONCE */
273 	b->eb->base = NULL;
274 	free(b->eb);
275 	free(b);
276 }
277 
278 void
279 comm_base_delete_no_base(struct comm_base* b)
280 {
281 	if(!b)
282 		return;
283 	if(b->eb->slow_accept_enabled) {
284 		if(event_del(&b->eb->slow_accept) != 0) {
285 			log_err("could not event_del slow_accept");
286 		}
287 	}
288 	b->eb->base = NULL;
289 	free(b->eb);
290 	free(b);
291 }
292 
293 void
294 comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv)
295 {
296 	*tt = &b->eb->secs;
297 	*tv = &b->eb->now;
298 }
299 
300 void
301 comm_base_dispatch(struct comm_base* b)
302 {
303 	int retval;
304 	retval = event_base_dispatch(b->eb->base);
305 	if(retval != 0) {
306 		fatal_exit("event_dispatch returned error %d, "
307 			"errno is %s", retval, strerror(errno));
308 	}
309 }
310 
311 void comm_base_exit(struct comm_base* b)
312 {
313 	if(event_base_loopexit(b->eb->base, NULL) != 0) {
314 		log_err("Could not loopexit");
315 	}
316 }
317 
318 void comm_base_set_slow_accept_handlers(struct comm_base* b,
319 	void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
320 {
321 	b->stop_accept = stop_acc;
322 	b->start_accept = start_acc;
323 	b->cb_arg = arg;
324 }
325 
326 struct event_base* comm_base_internal(struct comm_base* b)
327 {
328 	return b->eb->base;
329 }
330 
331 /** see if errno for udp has to be logged or not uses globals */
332 static int
333 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
334 {
335 	/* do not log transient errors (unless high verbosity) */
336 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
337 	switch(errno) {
338 #  ifdef ENETUNREACH
339 		case ENETUNREACH:
340 #  endif
341 #  ifdef EHOSTDOWN
342 		case EHOSTDOWN:
343 #  endif
344 #  ifdef EHOSTUNREACH
345 		case EHOSTUNREACH:
346 #  endif
347 #  ifdef ENETDOWN
348 		case ENETDOWN:
349 #  endif
350 			if(verbosity < VERB_ALGO)
351 				return 0;
352 		default:
353 			break;
354 	}
355 #endif
356 	/* permission denied is gotten for every send if the
357 	 * network is disconnected (on some OS), squelch it */
358 	if(errno == EPERM && verbosity < VERB_DETAIL)
359 		return 0;
360 	/* squelch errors where people deploy AAAA ::ffff:bla for
361 	 * authority servers, which we try for intranets. */
362 	if(errno == EINVAL && addr_is_ip4mapped(
363 		(struct sockaddr_storage*)addr, addrlen) &&
364 		verbosity < VERB_DETAIL)
365 		return 0;
366 	/* SO_BROADCAST sockopt can give access to 255.255.255.255,
367 	 * but a dns cache does not need it. */
368 	if(errno == EACCES && addr_is_broadcast(
369 		(struct sockaddr_storage*)addr, addrlen) &&
370 		verbosity < VERB_DETAIL)
371 		return 0;
372 	return 1;
373 }
374 
375 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
376 {
377 	return udp_send_errno_needs_log(addr, addrlen);
378 }
379 
380 /* send a UDP reply */
381 int
382 comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet,
383 	struct sockaddr* addr, socklen_t addrlen)
384 {
385 	ssize_t sent;
386 	log_assert(c->fd != -1);
387 #ifdef UNBOUND_DEBUG
388 	if(sldns_buffer_remaining(packet) == 0)
389 		log_err("error: send empty UDP packet");
390 #endif
391 	log_assert(addr && addrlen > 0);
392 	sent = sendto(c->fd, (void*)sldns_buffer_begin(packet),
393 		sldns_buffer_remaining(packet), 0,
394 		addr, addrlen);
395 	if(sent == -1) {
396 		if(!udp_send_errno_needs_log(addr, addrlen))
397 			return 0;
398 #ifndef USE_WINSOCK
399 		verbose(VERB_OPS, "sendto failed: %s", strerror(errno));
400 #else
401 		verbose(VERB_OPS, "sendto failed: %s",
402 			wsa_strerror(WSAGetLastError()));
403 #endif
404 		log_addr(VERB_OPS, "remote address is",
405 			(struct sockaddr_storage*)addr, addrlen);
406 		return 0;
407 	} else if((size_t)sent != sldns_buffer_remaining(packet)) {
408 		log_err("sent %d in place of %d bytes",
409 			(int)sent, (int)sldns_buffer_remaining(packet));
410 		return 0;
411 	}
412 	return 1;
413 }
414 
415 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
416 /** print debug ancillary info */
417 static void p_ancil(const char* str, struct comm_reply* r)
418 {
419 	if(r->srctype != 4 && r->srctype != 6) {
420 		log_info("%s: unknown srctype %d", str, r->srctype);
421 		return;
422 	}
423 	if(r->srctype == 6) {
424 		char buf[1024];
425 		if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr,
426 			buf, (socklen_t)sizeof(buf)) == 0) {
427 			(void)strlcpy(buf, "(inet_ntop error)", sizeof(buf));
428 		}
429 		buf[sizeof(buf)-1]=0;
430 		log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
431 	} else if(r->srctype == 4) {
432 #ifdef IP_PKTINFO
433 		char buf1[1024], buf2[1024];
434 		if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr,
435 			buf1, (socklen_t)sizeof(buf1)) == 0) {
436 			(void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
437 		}
438 		buf1[sizeof(buf1)-1]=0;
439 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
440 		if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst,
441 			buf2, (socklen_t)sizeof(buf2)) == 0) {
442 			(void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2));
443 		}
444 		buf2[sizeof(buf2)-1]=0;
445 #else
446 		buf2[0]=0;
447 #endif
448 		log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
449 			buf1, buf2);
450 #elif defined(IP_RECVDSTADDR)
451 		char buf1[1024];
452 		if(inet_ntop(AF_INET, &r->pktinfo.v4addr,
453 			buf1, (socklen_t)sizeof(buf1)) == 0) {
454 			(void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
455 		}
456 		buf1[sizeof(buf1)-1]=0;
457 		log_info("%s: %s", str, buf1);
458 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */
459 	}
460 }
461 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
462 
463 /** send a UDP reply over specified interface*/
464 static int
465 comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet,
466 	struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r)
467 {
468 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
469 	ssize_t sent;
470 	struct msghdr msg;
471 	struct iovec iov[1];
472 	char control[256];
473 #ifndef S_SPLINT_S
474 	struct cmsghdr *cmsg;
475 #endif /* S_SPLINT_S */
476 
477 	log_assert(c->fd != -1);
478 #ifdef UNBOUND_DEBUG
479 	if(sldns_buffer_remaining(packet) == 0)
480 		log_err("error: send empty UDP packet");
481 #endif
482 	log_assert(addr && addrlen > 0);
483 
484 	msg.msg_name = addr;
485 	msg.msg_namelen = addrlen;
486 	iov[0].iov_base = sldns_buffer_begin(packet);
487 	iov[0].iov_len = sldns_buffer_remaining(packet);
488 	msg.msg_iov = iov;
489 	msg.msg_iovlen = 1;
490 	msg.msg_control = control;
491 #ifndef S_SPLINT_S
492 	msg.msg_controllen = sizeof(control);
493 #endif /* S_SPLINT_S */
494 	msg.msg_flags = 0;
495 
496 #ifndef S_SPLINT_S
497 	cmsg = CMSG_FIRSTHDR(&msg);
498 	if(r->srctype == 4) {
499 #ifdef IP_PKTINFO
500 		msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
501 		log_assert(msg.msg_controllen <= sizeof(control));
502 		cmsg->cmsg_level = IPPROTO_IP;
503 		cmsg->cmsg_type = IP_PKTINFO;
504 		memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
505 			sizeof(struct in_pktinfo));
506 		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
507 #elif defined(IP_SENDSRCADDR)
508 		msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
509 		log_assert(msg.msg_controllen <= sizeof(control));
510 		cmsg->cmsg_level = IPPROTO_IP;
511 		cmsg->cmsg_type = IP_SENDSRCADDR;
512 		memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
513 			sizeof(struct in_addr));
514 		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
515 #else
516 		verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
517 		msg.msg_control = NULL;
518 #endif /* IP_PKTINFO or IP_SENDSRCADDR */
519 	} else if(r->srctype == 6) {
520 		msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
521 		log_assert(msg.msg_controllen <= sizeof(control));
522 		cmsg->cmsg_level = IPPROTO_IPV6;
523 		cmsg->cmsg_type = IPV6_PKTINFO;
524 		memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
525 			sizeof(struct in6_pktinfo));
526 		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
527 	} else {
528 		/* try to pass all 0 to use default route */
529 		msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
530 		log_assert(msg.msg_controllen <= sizeof(control));
531 		cmsg->cmsg_level = IPPROTO_IPV6;
532 		cmsg->cmsg_type = IPV6_PKTINFO;
533 		memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
534 		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
535 	}
536 #endif /* S_SPLINT_S */
537 	if(verbosity >= VERB_ALGO)
538 		p_ancil("send_udp over interface", r);
539 	sent = sendmsg(c->fd, &msg, 0);
540 	if(sent == -1) {
541 		if(!udp_send_errno_needs_log(addr, addrlen))
542 			return 0;
543 		verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
544 		log_addr(VERB_OPS, "remote address is",
545 			(struct sockaddr_storage*)addr, addrlen);
546 		return 0;
547 	} else if((size_t)sent != sldns_buffer_remaining(packet)) {
548 		log_err("sent %d in place of %d bytes",
549 			(int)sent, (int)sldns_buffer_remaining(packet));
550 		return 0;
551 	}
552 	return 1;
553 #else
554 	(void)c;
555 	(void)packet;
556 	(void)addr;
557 	(void)addrlen;
558 	(void)r;
559 	log_err("sendmsg: IPV6_PKTINFO not supported");
560 	return 0;
561 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
562 }
563 
564 void
565 comm_point_udp_ancil_callback(int fd, short event, void* arg)
566 {
567 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
568 	struct comm_reply rep;
569 	struct msghdr msg;
570 	struct iovec iov[1];
571 	ssize_t rcv;
572 	char ancil[256];
573 	int i;
574 #ifndef S_SPLINT_S
575 	struct cmsghdr* cmsg;
576 #endif /* S_SPLINT_S */
577 
578 	rep.c = (struct comm_point*)arg;
579 	log_assert(rep.c->type == comm_udp);
580 
581 	if(!(event&EV_READ))
582 		return;
583 	log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
584 	comm_base_now(rep.c->ev->base);
585 	for(i=0; i<NUM_UDP_PER_SELECT; i++) {
586 		sldns_buffer_clear(rep.c->buffer);
587 		rep.addrlen = (socklen_t)sizeof(rep.addr);
588 		log_assert(fd != -1);
589 		log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
590 		msg.msg_name = &rep.addr;
591 		msg.msg_namelen = (socklen_t)sizeof(rep.addr);
592 		iov[0].iov_base = sldns_buffer_begin(rep.c->buffer);
593 		iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer);
594 		msg.msg_iov = iov;
595 		msg.msg_iovlen = 1;
596 		msg.msg_control = ancil;
597 #ifndef S_SPLINT_S
598 		msg.msg_controllen = sizeof(ancil);
599 #endif /* S_SPLINT_S */
600 		msg.msg_flags = 0;
601 		rcv = recvmsg(fd, &msg, 0);
602 		if(rcv == -1) {
603 			if(errno != EAGAIN && errno != EINTR) {
604 				log_err("recvmsg failed: %s", strerror(errno));
605 			}
606 			return;
607 		}
608 		rep.addrlen = msg.msg_namelen;
609 		sldns_buffer_skip(rep.c->buffer, rcv);
610 		sldns_buffer_flip(rep.c->buffer);
611 		rep.srctype = 0;
612 #ifndef S_SPLINT_S
613 		for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
614 			cmsg = CMSG_NXTHDR(&msg, cmsg)) {
615 			if( cmsg->cmsg_level == IPPROTO_IPV6 &&
616 				cmsg->cmsg_type == IPV6_PKTINFO) {
617 				rep.srctype = 6;
618 				memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
619 					sizeof(struct in6_pktinfo));
620 				break;
621 #ifdef IP_PKTINFO
622 			} else if( cmsg->cmsg_level == IPPROTO_IP &&
623 				cmsg->cmsg_type == IP_PKTINFO) {
624 				rep.srctype = 4;
625 				memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
626 					sizeof(struct in_pktinfo));
627 				break;
628 #elif defined(IP_RECVDSTADDR)
629 			} else if( cmsg->cmsg_level == IPPROTO_IP &&
630 				cmsg->cmsg_type == IP_RECVDSTADDR) {
631 				rep.srctype = 4;
632 				memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
633 					sizeof(struct in_addr));
634 				break;
635 #endif /* IP_PKTINFO or IP_RECVDSTADDR */
636 			}
637 		}
638 		if(verbosity >= VERB_ALGO)
639 			p_ancil("receive_udp on interface", &rep);
640 #endif /* S_SPLINT_S */
641 		fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
642 		if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
643 			/* send back immediate reply */
644 			(void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer,
645 				(struct sockaddr*)&rep.addr, rep.addrlen, &rep);
646 		}
647 		if(rep.c->fd == -1) /* commpoint closed */
648 			break;
649 	}
650 #else
651 	(void)fd;
652 	(void)event;
653 	(void)arg;
654 	fatal_exit("recvmsg: No support for IPV6_PKTINFO. "
655 		"Please disable interface-automatic");
656 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
657 }
658 
659 void
660 comm_point_udp_callback(int fd, short event, void* arg)
661 {
662 	struct comm_reply rep;
663 	ssize_t rcv;
664 	int i;
665 
666 	rep.c = (struct comm_point*)arg;
667 	log_assert(rep.c->type == comm_udp);
668 
669 	if(!(event&EV_READ))
670 		return;
671 	log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
672 	comm_base_now(rep.c->ev->base);
673 	for(i=0; i<NUM_UDP_PER_SELECT; i++) {
674 		sldns_buffer_clear(rep.c->buffer);
675 		rep.addrlen = (socklen_t)sizeof(rep.addr);
676 		log_assert(fd != -1);
677 		log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
678 		rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer),
679 			sldns_buffer_remaining(rep.c->buffer), 0,
680 			(struct sockaddr*)&rep.addr, &rep.addrlen);
681 		if(rcv == -1) {
682 #ifndef USE_WINSOCK
683 			if(errno != EAGAIN && errno != EINTR)
684 				log_err("recvfrom %d failed: %s",
685 					fd, strerror(errno));
686 #else
687 			if(WSAGetLastError() != WSAEINPROGRESS &&
688 				WSAGetLastError() != WSAECONNRESET &&
689 				WSAGetLastError()!= WSAEWOULDBLOCK)
690 				log_err("recvfrom failed: %s",
691 					wsa_strerror(WSAGetLastError()));
692 #endif
693 			return;
694 		}
695 		sldns_buffer_skip(rep.c->buffer, rcv);
696 		sldns_buffer_flip(rep.c->buffer);
697 		rep.srctype = 0;
698 		fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
699 		if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
700 			/* send back immediate reply */
701 			(void)comm_point_send_udp_msg(rep.c, rep.c->buffer,
702 				(struct sockaddr*)&rep.addr, rep.addrlen);
703 		}
704 		if(rep.c->fd != fd) /* commpoint closed to -1 or reused for
705 		another UDP port. Note rep.c cannot be reused with TCP fd. */
706 			break;
707 	}
708 }
709 
710 /** Use a new tcp handler for new query fd, set to read query */
711 static void
712 setup_tcp_handler(struct comm_point* c, int fd)
713 {
714 	log_assert(c->type == comm_tcp);
715 	log_assert(c->fd == -1);
716 	sldns_buffer_clear(c->buffer);
717 	c->tcp_is_reading = 1;
718 	c->tcp_byte_count = 0;
719 	comm_point_start_listening(c, fd, TCP_QUERY_TIMEOUT);
720 }
721 
722 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
723 	short ATTR_UNUSED(event), void* arg)
724 {
725 	struct comm_base* b = (struct comm_base*)arg;
726 	/* timeout for the slow accept, re-enable accepts again */
727 	if(b->start_accept) {
728 		verbose(VERB_ALGO, "wait is over, slow accept disabled");
729 		fptr_ok(fptr_whitelist_start_accept(b->start_accept));
730 		(*b->start_accept)(b->cb_arg);
731 		b->eb->slow_accept_enabled = 0;
732 	}
733 }
734 
735 int comm_point_perform_accept(struct comm_point* c,
736 	struct sockaddr_storage* addr, socklen_t* addrlen)
737 {
738 	int new_fd;
739 	*addrlen = (socklen_t)sizeof(*addr);
740 	new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
741 	if(new_fd == -1) {
742 #ifndef USE_WINSOCK
743 		/* EINTR is signal interrupt. others are closed connection. */
744 		if(	errno == EINTR || errno == EAGAIN
745 #ifdef EWOULDBLOCK
746 			|| errno == EWOULDBLOCK
747 #endif
748 #ifdef ECONNABORTED
749 			|| errno == ECONNABORTED
750 #endif
751 #ifdef EPROTO
752 			|| errno == EPROTO
753 #endif /* EPROTO */
754 			)
755 			return -1;
756 #if defined(ENFILE) && defined(EMFILE)
757 		if(errno == ENFILE || errno == EMFILE) {
758 			/* out of file descriptors, likely outside of our
759 			 * control. stop accept() calls for some time */
760 			if(c->ev->base->stop_accept) {
761 				struct comm_base* b = c->ev->base;
762 				struct timeval tv;
763 				verbose(VERB_ALGO, "out of file descriptors: "
764 					"slow accept");
765 				b->eb->slow_accept_enabled = 1;
766 				fptr_ok(fptr_whitelist_stop_accept(
767 					b->stop_accept));
768 				(*b->stop_accept)(b->cb_arg);
769 				/* set timeout, no mallocs */
770 				tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
771 				tv.tv_usec = NETEVENT_SLOW_ACCEPT_TIME%1000;
772 				event_set(&b->eb->slow_accept, -1, EV_TIMEOUT,
773 					comm_base_handle_slow_accept, b);
774 				if(event_base_set(b->eb->base,
775 					&b->eb->slow_accept) != 0) {
776 					/* we do not want to log here, because
777 					 * that would spam the logfiles.
778 					 * error: "event_base_set failed." */
779 				}
780 				if(event_add(&b->eb->slow_accept, &tv) != 0) {
781 					/* we do not want to log here,
782 					 * error: "event_add failed." */
783 				}
784 			}
785 			return -1;
786 		}
787 #endif
788 		log_err("accept failed: %s", strerror(errno));
789 #else /* USE_WINSOCK */
790 		if(WSAGetLastError() == WSAEINPROGRESS ||
791 			WSAGetLastError() == WSAECONNRESET)
792 			return -1;
793 		if(WSAGetLastError() == WSAEWOULDBLOCK) {
794 			winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
795 			return -1;
796 		}
797 		log_err("accept failed: %s", wsa_strerror(WSAGetLastError()));
798 #endif
799 		log_addr(0, "remote address is", addr, *addrlen);
800 		return -1;
801 	}
802 	fd_set_nonblock(new_fd);
803 	return new_fd;
804 }
805 
806 #ifdef USE_WINSOCK
807 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
808         int ATTR_UNUSED(argi), long argl, long retvalue)
809 {
810 	verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
811 		(oper&BIO_CB_RETURN)?"return":"before",
812 		(oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
813 		WSAGetLastError()==WSAEWOULDBLOCK?"wsawb":"");
814 	/* on windows, check if previous operation caused EWOULDBLOCK */
815 	if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
816 		(oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
817 		if(WSAGetLastError() == WSAEWOULDBLOCK)
818 			winsock_tcp_wouldblock((struct event*)
819 				BIO_get_callback_arg(b), EV_READ);
820 	}
821 	if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
822 		(oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
823 		if(WSAGetLastError() == WSAEWOULDBLOCK)
824 			winsock_tcp_wouldblock((struct event*)
825 				BIO_get_callback_arg(b), EV_WRITE);
826 	}
827 	/* return original return value */
828 	return retvalue;
829 }
830 
831 /** set win bio callbacks for nonblocking operations */
832 void
833 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
834 {
835 	SSL* ssl = (SSL*)thessl;
836 	/* set them both just in case, but usually they are the same BIO */
837 	BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
838 	BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)&c->ev->ev);
839 	BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
840 	BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)&c->ev->ev);
841 }
842 #endif
843 
844 void
845 comm_point_tcp_accept_callback(int fd, short event, void* arg)
846 {
847 	struct comm_point* c = (struct comm_point*)arg, *c_hdl;
848 	int new_fd;
849 	log_assert(c->type == comm_tcp_accept);
850 	if(!(event & EV_READ)) {
851 		log_info("ignoring tcp accept event %d", (int)event);
852 		return;
853 	}
854 	comm_base_now(c->ev->base);
855 	/* find free tcp handler. */
856 	if(!c->tcp_free) {
857 		log_warn("accepted too many tcp, connections full");
858 		return;
859 	}
860 	/* accept incoming connection. */
861 	c_hdl = c->tcp_free;
862 	log_assert(fd != -1);
863 	new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.addr,
864 		&c_hdl->repinfo.addrlen);
865 	if(new_fd == -1)
866 		return;
867 	if(c->ssl) {
868 		c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
869 		if(!c_hdl->ssl) {
870 			c_hdl->fd = new_fd;
871 			comm_point_close(c_hdl);
872 			return;
873 		}
874 		c_hdl->ssl_shake_state = comm_ssl_shake_read;
875 #ifdef USE_WINSOCK
876 		comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
877 #endif
878 	}
879 
880 	/* grab the tcp handler buffers */
881 	c->tcp_free = c_hdl->tcp_free;
882 	if(!c->tcp_free) {
883 		/* stop accepting incoming queries for now. */
884 		comm_point_stop_listening(c);
885 	}
886 	/* addr is dropped. Not needed for tcp reply. */
887 	setup_tcp_handler(c_hdl, new_fd);
888 }
889 
890 /** Make tcp handler free for next assignment */
891 static void
892 reclaim_tcp_handler(struct comm_point* c)
893 {
894 	log_assert(c->type == comm_tcp);
895 	if(c->ssl) {
896 #ifdef HAVE_SSL
897 		SSL_shutdown(c->ssl);
898 		SSL_free(c->ssl);
899 		c->ssl = NULL;
900 #endif
901 	}
902 	comm_point_close(c);
903 	if(c->tcp_parent) {
904 		c->tcp_free = c->tcp_parent->tcp_free;
905 		c->tcp_parent->tcp_free = c;
906 		if(!c->tcp_free) {
907 			/* re-enable listening on accept socket */
908 			comm_point_start_listening(c->tcp_parent, -1, -1);
909 		}
910 	}
911 }
912 
913 /** do the callback when writing is done */
914 static void
915 tcp_callback_writer(struct comm_point* c)
916 {
917 	log_assert(c->type == comm_tcp);
918 	sldns_buffer_clear(c->buffer);
919 	if(c->tcp_do_toggle_rw)
920 		c->tcp_is_reading = 1;
921 	c->tcp_byte_count = 0;
922 	/* switch from listening(write) to listening(read) */
923 	comm_point_stop_listening(c);
924 	comm_point_start_listening(c, -1, -1);
925 }
926 
927 /** do the callback when reading is done */
928 static void
929 tcp_callback_reader(struct comm_point* c)
930 {
931 	log_assert(c->type == comm_tcp || c->type == comm_local);
932 	sldns_buffer_flip(c->buffer);
933 	if(c->tcp_do_toggle_rw)
934 		c->tcp_is_reading = 0;
935 	c->tcp_byte_count = 0;
936 	if(c->type == comm_tcp)
937 		comm_point_stop_listening(c);
938 	fptr_ok(fptr_whitelist_comm_point(c->callback));
939 	if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
940 		comm_point_start_listening(c, -1, TCP_QUERY_TIMEOUT);
941 	}
942 }
943 
944 /** continue ssl handshake */
945 #ifdef HAVE_SSL
946 static int
947 ssl_handshake(struct comm_point* c)
948 {
949 	int r;
950 	if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
951 		/* read condition satisfied back to writing */
952 		comm_point_listen_for_rw(c, 1, 1);
953 		c->ssl_shake_state = comm_ssl_shake_none;
954 		return 1;
955 	}
956 	if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
957 		/* write condition satisfied, back to reading */
958 		comm_point_listen_for_rw(c, 1, 0);
959 		c->ssl_shake_state = comm_ssl_shake_none;
960 		return 1;
961 	}
962 
963 	ERR_clear_error();
964 	r = SSL_do_handshake(c->ssl);
965 	if(r != 1) {
966 		int want = SSL_get_error(c->ssl, r);
967 		if(want == SSL_ERROR_WANT_READ) {
968 			if(c->ssl_shake_state == comm_ssl_shake_read)
969 				return 1;
970 			c->ssl_shake_state = comm_ssl_shake_read;
971 			comm_point_listen_for_rw(c, 1, 0);
972 			return 1;
973 		} else if(want == SSL_ERROR_WANT_WRITE) {
974 			if(c->ssl_shake_state == comm_ssl_shake_write)
975 				return 1;
976 			c->ssl_shake_state = comm_ssl_shake_write;
977 			comm_point_listen_for_rw(c, 0, 1);
978 			return 1;
979 		} else if(r == 0) {
980 			return 0; /* closed */
981 		} else if(want == SSL_ERROR_SYSCALL) {
982 			/* SYSCALL and errno==0 means closed uncleanly */
983 			if(errno != 0)
984 				log_err("SSL_handshake syscall: %s",
985 					strerror(errno));
986 			return 0;
987 		} else {
988 			log_crypto_err("ssl handshake failed");
989 			log_addr(1, "ssl handshake failed", &c->repinfo.addr,
990 				c->repinfo.addrlen);
991 			return 0;
992 		}
993 	}
994 	/* this is where peer verification could take place */
995 	log_addr(VERB_ALGO, "SSL DNS connection", &c->repinfo.addr,
996 		c->repinfo.addrlen);
997 
998 	/* setup listen rw correctly */
999 	if(c->tcp_is_reading) {
1000 		if(c->ssl_shake_state != comm_ssl_shake_read)
1001 			comm_point_listen_for_rw(c, 1, 0);
1002 	} else {
1003 		comm_point_listen_for_rw(c, 1, 1);
1004 	}
1005 	c->ssl_shake_state = comm_ssl_shake_none;
1006 	return 1;
1007 }
1008 #endif /* HAVE_SSL */
1009 
1010 /** ssl read callback on TCP */
1011 static int
1012 ssl_handle_read(struct comm_point* c)
1013 {
1014 #ifdef HAVE_SSL
1015 	int r;
1016 	if(c->ssl_shake_state != comm_ssl_shake_none) {
1017 		if(!ssl_handshake(c))
1018 			return 0;
1019 		if(c->ssl_shake_state != comm_ssl_shake_none)
1020 			return 1;
1021 	}
1022 	if(c->tcp_byte_count < sizeof(uint16_t)) {
1023 		/* read length bytes */
1024 		ERR_clear_error();
1025 		if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer,
1026 			c->tcp_byte_count), (int)(sizeof(uint16_t) -
1027 			c->tcp_byte_count))) <= 0) {
1028 			int want = SSL_get_error(c->ssl, r);
1029 			if(want == SSL_ERROR_ZERO_RETURN) {
1030 				return 0; /* shutdown, closed */
1031 			} else if(want == SSL_ERROR_WANT_READ) {
1032 				return 1; /* read more later */
1033 			} else if(want == SSL_ERROR_WANT_WRITE) {
1034 				c->ssl_shake_state = comm_ssl_shake_hs_write;
1035 				comm_point_listen_for_rw(c, 0, 1);
1036 				return 1;
1037 			} else if(want == SSL_ERROR_SYSCALL) {
1038 				if(errno != 0)
1039 					log_err("SSL_read syscall: %s",
1040 						strerror(errno));
1041 				return 0;
1042 			}
1043 			log_crypto_err("could not SSL_read");
1044 			return 0;
1045 		}
1046 		c->tcp_byte_count += r;
1047 		if(c->tcp_byte_count != sizeof(uint16_t))
1048 			return 1;
1049 		if(sldns_buffer_read_u16_at(c->buffer, 0) >
1050 			sldns_buffer_capacity(c->buffer)) {
1051 			verbose(VERB_QUERY, "ssl: dropped larger than buffer");
1052 			return 0;
1053 		}
1054 		sldns_buffer_set_limit(c->buffer,
1055 			sldns_buffer_read_u16_at(c->buffer, 0));
1056 		if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1057 			verbose(VERB_QUERY, "ssl: dropped bogus too short.");
1058 			return 0;
1059 		}
1060 		verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
1061 			(int)sldns_buffer_limit(c->buffer));
1062 	}
1063 	log_assert(sldns_buffer_remaining(c->buffer) > 0);
1064 	ERR_clear_error();
1065 	r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
1066 		(int)sldns_buffer_remaining(c->buffer));
1067 	if(r <= 0) {
1068 		int want = SSL_get_error(c->ssl, r);
1069 		if(want == SSL_ERROR_ZERO_RETURN) {
1070 			return 0; /* shutdown, closed */
1071 		} else if(want == SSL_ERROR_WANT_READ) {
1072 			return 1; /* read more later */
1073 		} else if(want == SSL_ERROR_WANT_WRITE) {
1074 			c->ssl_shake_state = comm_ssl_shake_hs_write;
1075 			comm_point_listen_for_rw(c, 0, 1);
1076 			return 1;
1077 		} else if(want == SSL_ERROR_SYSCALL) {
1078 			if(errno != 0)
1079 				log_err("SSL_read syscall: %s",
1080 					strerror(errno));
1081 			return 0;
1082 		}
1083 		log_crypto_err("could not SSL_read");
1084 		return 0;
1085 	}
1086 	sldns_buffer_skip(c->buffer, (ssize_t)r);
1087 	if(sldns_buffer_remaining(c->buffer) <= 0) {
1088 		tcp_callback_reader(c);
1089 	}
1090 	return 1;
1091 #else
1092 	(void)c;
1093 	return 0;
1094 #endif /* HAVE_SSL */
1095 }
1096 
1097 /** ssl write callback on TCP */
1098 static int
1099 ssl_handle_write(struct comm_point* c)
1100 {
1101 #ifdef HAVE_SSL
1102 	int r;
1103 	if(c->ssl_shake_state != comm_ssl_shake_none) {
1104 		if(!ssl_handshake(c))
1105 			return 0;
1106 		if(c->ssl_shake_state != comm_ssl_shake_none)
1107 			return 1;
1108 	}
1109 	/* ignore return, if fails we may simply block */
1110 	(void)SSL_set_mode(c->ssl, SSL_MODE_ENABLE_PARTIAL_WRITE);
1111 	if(c->tcp_byte_count < sizeof(uint16_t)) {
1112 		uint16_t len = htons(sldns_buffer_limit(c->buffer));
1113 		ERR_clear_error();
1114 		r = SSL_write(c->ssl,
1115 			(void*)(((uint8_t*)&len)+c->tcp_byte_count),
1116 			(int)(sizeof(uint16_t)-c->tcp_byte_count));
1117 		if(r <= 0) {
1118 			int want = SSL_get_error(c->ssl, r);
1119 			if(want == SSL_ERROR_ZERO_RETURN) {
1120 				return 0; /* closed */
1121 			} else if(want == SSL_ERROR_WANT_READ) {
1122 				c->ssl_shake_state = comm_ssl_shake_read;
1123 				comm_point_listen_for_rw(c, 1, 0);
1124 				return 1; /* wait for read condition */
1125 			} else if(want == SSL_ERROR_WANT_WRITE) {
1126 				return 1; /* write more later */
1127 			} else if(want == SSL_ERROR_SYSCALL) {
1128 				if(errno != 0)
1129 					log_err("SSL_write syscall: %s",
1130 						strerror(errno));
1131 				return 0;
1132 			}
1133 			log_crypto_err("could not SSL_write");
1134 			return 0;
1135 		}
1136 		c->tcp_byte_count += r;
1137 		if(c->tcp_byte_count < sizeof(uint16_t))
1138 			return 1;
1139 		sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1140 			sizeof(uint16_t));
1141 		if(sldns_buffer_remaining(c->buffer) == 0) {
1142 			tcp_callback_writer(c);
1143 			return 1;
1144 		}
1145 	}
1146 	log_assert(sldns_buffer_remaining(c->buffer) > 0);
1147 	ERR_clear_error();
1148 	r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
1149 		(int)sldns_buffer_remaining(c->buffer));
1150 	if(r <= 0) {
1151 		int want = SSL_get_error(c->ssl, r);
1152 		if(want == SSL_ERROR_ZERO_RETURN) {
1153 			return 0; /* closed */
1154 		} else if(want == SSL_ERROR_WANT_READ) {
1155 			c->ssl_shake_state = comm_ssl_shake_read;
1156 			comm_point_listen_for_rw(c, 1, 0);
1157 			return 1; /* wait for read condition */
1158 		} else if(want == SSL_ERROR_WANT_WRITE) {
1159 			return 1; /* write more later */
1160 		} else if(want == SSL_ERROR_SYSCALL) {
1161 			if(errno != 0)
1162 				log_err("SSL_write syscall: %s",
1163 					strerror(errno));
1164 			return 0;
1165 		}
1166 		log_crypto_err("could not SSL_write");
1167 		return 0;
1168 	}
1169 	sldns_buffer_skip(c->buffer, (ssize_t)r);
1170 
1171 	if(sldns_buffer_remaining(c->buffer) == 0) {
1172 		tcp_callback_writer(c);
1173 	}
1174 	return 1;
1175 #else
1176 	(void)c;
1177 	return 0;
1178 #endif /* HAVE_SSL */
1179 }
1180 
1181 /** handle ssl tcp connection with dns contents */
1182 static int
1183 ssl_handle_it(struct comm_point* c)
1184 {
1185 	if(c->tcp_is_reading)
1186 		return ssl_handle_read(c);
1187 	return ssl_handle_write(c);
1188 }
1189 
1190 /** Handle tcp reading callback.
1191  * @param fd: file descriptor of socket.
1192  * @param c: comm point to read from into buffer.
1193  * @param short_ok: if true, very short packets are OK (for comm_local).
1194  * @return: 0 on error
1195  */
1196 static int
1197 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
1198 {
1199 	ssize_t r;
1200 	log_assert(c->type == comm_tcp || c->type == comm_local);
1201 	if(c->ssl)
1202 		return ssl_handle_it(c);
1203 	if(!c->tcp_is_reading)
1204 		return 0;
1205 
1206 	log_assert(fd != -1);
1207 	if(c->tcp_byte_count < sizeof(uint16_t)) {
1208 		/* read length bytes */
1209 		r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count),
1210 			sizeof(uint16_t)-c->tcp_byte_count, 0);
1211 		if(r == 0)
1212 			return 0;
1213 		else if(r == -1) {
1214 #ifndef USE_WINSOCK
1215 			if(errno == EINTR || errno == EAGAIN)
1216 				return 1;
1217 #ifdef ECONNRESET
1218 			if(errno == ECONNRESET && verbosity < 2)
1219 				return 0; /* silence reset by peer */
1220 #endif
1221 			log_err("read (in tcp s): %s", strerror(errno));
1222 #else /* USE_WINSOCK */
1223 			if(WSAGetLastError() == WSAECONNRESET)
1224 				return 0;
1225 			if(WSAGetLastError() == WSAEINPROGRESS)
1226 				return 1;
1227 			if(WSAGetLastError() == WSAEWOULDBLOCK) {
1228 				winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
1229 				return 1;
1230 			}
1231 			log_err("read (in tcp s): %s",
1232 				wsa_strerror(WSAGetLastError()));
1233 #endif
1234 			log_addr(0, "remote address is", &c->repinfo.addr,
1235 				c->repinfo.addrlen);
1236 			return 0;
1237 		}
1238 		c->tcp_byte_count += r;
1239 		if(c->tcp_byte_count != sizeof(uint16_t))
1240 			return 1;
1241 		if(sldns_buffer_read_u16_at(c->buffer, 0) >
1242 			sldns_buffer_capacity(c->buffer)) {
1243 			verbose(VERB_QUERY, "tcp: dropped larger than buffer");
1244 			return 0;
1245 		}
1246 		sldns_buffer_set_limit(c->buffer,
1247 			sldns_buffer_read_u16_at(c->buffer, 0));
1248 		if(!short_ok &&
1249 			sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1250 			verbose(VERB_QUERY, "tcp: dropped bogus too short.");
1251 			return 0;
1252 		}
1253 		verbose(VERB_ALGO, "Reading tcp query of length %d",
1254 			(int)sldns_buffer_limit(c->buffer));
1255 	}
1256 
1257 	log_assert(sldns_buffer_remaining(c->buffer) > 0);
1258 	r = recv(fd, (void*)sldns_buffer_current(c->buffer),
1259 		sldns_buffer_remaining(c->buffer), 0);
1260 	if(r == 0) {
1261 		return 0;
1262 	} else if(r == -1) {
1263 #ifndef USE_WINSOCK
1264 		if(errno == EINTR || errno == EAGAIN)
1265 			return 1;
1266 		log_err("read (in tcp r): %s", strerror(errno));
1267 #else /* USE_WINSOCK */
1268 		if(WSAGetLastError() == WSAECONNRESET)
1269 			return 0;
1270 		if(WSAGetLastError() == WSAEINPROGRESS)
1271 			return 1;
1272 		if(WSAGetLastError() == WSAEWOULDBLOCK) {
1273 			winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
1274 			return 1;
1275 		}
1276 		log_err("read (in tcp r): %s",
1277 			wsa_strerror(WSAGetLastError()));
1278 #endif
1279 		log_addr(0, "remote address is", &c->repinfo.addr,
1280 			c->repinfo.addrlen);
1281 		return 0;
1282 	}
1283 	sldns_buffer_skip(c->buffer, r);
1284 	if(sldns_buffer_remaining(c->buffer) <= 0) {
1285 		tcp_callback_reader(c);
1286 	}
1287 	return 1;
1288 }
1289 
1290 /**
1291  * Handle tcp writing callback.
1292  * @param fd: file descriptor of socket.
1293  * @param c: comm point to write buffer out of.
1294  * @return: 0 on error
1295  */
1296 static int
1297 comm_point_tcp_handle_write(int fd, struct comm_point* c)
1298 {
1299 	ssize_t r;
1300 	log_assert(c->type == comm_tcp);
1301 	if(c->tcp_is_reading && !c->ssl)
1302 		return 0;
1303 	log_assert(fd != -1);
1304 	if(c->tcp_byte_count == 0 && c->tcp_check_nb_connect) {
1305 		/* check for pending error from nonblocking connect */
1306 		/* from Stevens, unix network programming, vol1, 3rd ed, p450*/
1307 		int error = 0;
1308 		socklen_t len = (socklen_t)sizeof(error);
1309 		if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error,
1310 			&len) < 0){
1311 #ifndef USE_WINSOCK
1312 			error = errno; /* on solaris errno is error */
1313 #else /* USE_WINSOCK */
1314 			error = WSAGetLastError();
1315 #endif
1316 		}
1317 #ifndef USE_WINSOCK
1318 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1319 		if(error == EINPROGRESS || error == EWOULDBLOCK)
1320 			return 1; /* try again later */
1321 		else
1322 #endif
1323 		if(error != 0 && verbosity < 2)
1324 			return 0; /* silence lots of chatter in the logs */
1325                 else if(error != 0) {
1326 			log_err("tcp connect: %s", strerror(error));
1327 #else /* USE_WINSOCK */
1328 		/* examine error */
1329 		if(error == WSAEINPROGRESS)
1330 			return 1;
1331 		else if(error == WSAEWOULDBLOCK) {
1332 			winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1333 			return 1;
1334 		} else if(error != 0 && verbosity < 2)
1335 			return 0;
1336 		else if(error != 0) {
1337 			log_err("tcp connect: %s", wsa_strerror(error));
1338 #endif /* USE_WINSOCK */
1339 			log_addr(0, "remote address is", &c->repinfo.addr,
1340 				c->repinfo.addrlen);
1341 			return 0;
1342 		}
1343 	}
1344 	if(c->ssl)
1345 		return ssl_handle_it(c);
1346 
1347 	if(c->tcp_byte_count < sizeof(uint16_t)) {
1348 		uint16_t len = htons(sldns_buffer_limit(c->buffer));
1349 #ifdef HAVE_WRITEV
1350 		struct iovec iov[2];
1351 		iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1352 		iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1353 		iov[1].iov_base = sldns_buffer_begin(c->buffer);
1354 		iov[1].iov_len = sldns_buffer_limit(c->buffer);
1355 		log_assert(iov[0].iov_len > 0);
1356 		log_assert(iov[1].iov_len > 0);
1357 		r = writev(fd, iov, 2);
1358 #else /* HAVE_WRITEV */
1359 		r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1360 			sizeof(uint16_t)-c->tcp_byte_count, 0);
1361 #endif /* HAVE_WRITEV */
1362 		if(r == -1) {
1363 #ifndef USE_WINSOCK
1364 #ifdef EPIPE
1365                 	if(errno == EPIPE && verbosity < 2)
1366                         	return 0; /* silence 'broken pipe' */
1367 #endif
1368 			if(errno == EINTR || errno == EAGAIN)
1369 				return 1;
1370 			log_err("tcp writev: %s", strerror(errno));
1371 #else
1372 			if(WSAGetLastError() == WSAENOTCONN)
1373 				return 1;
1374 			if(WSAGetLastError() == WSAEINPROGRESS)
1375 				return 1;
1376 			if(WSAGetLastError() == WSAEWOULDBLOCK) {
1377 				winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1378 				return 1;
1379 			}
1380 			log_err("tcp send s: %s",
1381 				wsa_strerror(WSAGetLastError()));
1382 #endif
1383 			log_addr(0, "remote address is", &c->repinfo.addr,
1384 				c->repinfo.addrlen);
1385 			return 0;
1386 		}
1387 		c->tcp_byte_count += r;
1388 		if(c->tcp_byte_count < sizeof(uint16_t))
1389 			return 1;
1390 		sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1391 			sizeof(uint16_t));
1392 		if(sldns_buffer_remaining(c->buffer) == 0) {
1393 			tcp_callback_writer(c);
1394 			return 1;
1395 		}
1396 	}
1397 	log_assert(sldns_buffer_remaining(c->buffer) > 0);
1398 	r = send(fd, (void*)sldns_buffer_current(c->buffer),
1399 		sldns_buffer_remaining(c->buffer), 0);
1400 	if(r == -1) {
1401 #ifndef USE_WINSOCK
1402 		if(errno == EINTR || errno == EAGAIN)
1403 			return 1;
1404 		log_err("tcp send r: %s", strerror(errno));
1405 #else
1406 		if(WSAGetLastError() == WSAEINPROGRESS)
1407 			return 1;
1408 		if(WSAGetLastError() == WSAEWOULDBLOCK) {
1409 			winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1410 			return 1;
1411 		}
1412 		log_err("tcp send r: %s",
1413 			wsa_strerror(WSAGetLastError()));
1414 #endif
1415 		log_addr(0, "remote address is", &c->repinfo.addr,
1416 			c->repinfo.addrlen);
1417 		return 0;
1418 	}
1419 	sldns_buffer_skip(c->buffer, r);
1420 
1421 	if(sldns_buffer_remaining(c->buffer) == 0) {
1422 		tcp_callback_writer(c);
1423 	}
1424 
1425 	return 1;
1426 }
1427 
1428 void
1429 comm_point_tcp_handle_callback(int fd, short event, void* arg)
1430 {
1431 	struct comm_point* c = (struct comm_point*)arg;
1432 	log_assert(c->type == comm_tcp);
1433 	comm_base_now(c->ev->base);
1434 
1435 	if(event&EV_READ) {
1436 		if(!comm_point_tcp_handle_read(fd, c, 0)) {
1437 			reclaim_tcp_handler(c);
1438 			if(!c->tcp_do_close) {
1439 				fptr_ok(fptr_whitelist_comm_point(
1440 					c->callback));
1441 				(void)(*c->callback)(c, c->cb_arg,
1442 					NETEVENT_CLOSED, NULL);
1443 			}
1444 		}
1445 		return;
1446 	}
1447 	if(event&EV_WRITE) {
1448 		if(!comm_point_tcp_handle_write(fd, c)) {
1449 			reclaim_tcp_handler(c);
1450 			if(!c->tcp_do_close) {
1451 				fptr_ok(fptr_whitelist_comm_point(
1452 					c->callback));
1453 				(void)(*c->callback)(c, c->cb_arg,
1454 					NETEVENT_CLOSED, NULL);
1455 			}
1456 		}
1457 		return;
1458 	}
1459 	if(event&EV_TIMEOUT) {
1460 		verbose(VERB_QUERY, "tcp took too long, dropped");
1461 		reclaim_tcp_handler(c);
1462 		if(!c->tcp_do_close) {
1463 			fptr_ok(fptr_whitelist_comm_point(c->callback));
1464 			(void)(*c->callback)(c, c->cb_arg,
1465 				NETEVENT_TIMEOUT, NULL);
1466 		}
1467 		return;
1468 	}
1469 	log_err("Ignored event %d for tcphdl.", event);
1470 }
1471 
1472 void comm_point_local_handle_callback(int fd, short event, void* arg)
1473 {
1474 	struct comm_point* c = (struct comm_point*)arg;
1475 	log_assert(c->type == comm_local);
1476 	comm_base_now(c->ev->base);
1477 
1478 	if(event&EV_READ) {
1479 		if(!comm_point_tcp_handle_read(fd, c, 1)) {
1480 			fptr_ok(fptr_whitelist_comm_point(c->callback));
1481 			(void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED,
1482 				NULL);
1483 		}
1484 		return;
1485 	}
1486 	log_err("Ignored event %d for localhdl.", event);
1487 }
1488 
1489 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd),
1490 	short event, void* arg)
1491 {
1492 	struct comm_point* c = (struct comm_point*)arg;
1493 	int err = NETEVENT_NOERROR;
1494 	log_assert(c->type == comm_raw);
1495 	comm_base_now(c->ev->base);
1496 
1497 	if(event&EV_TIMEOUT)
1498 		err = NETEVENT_TIMEOUT;
1499 	fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
1500 	(void)(*c->callback)(c, c->cb_arg, err, NULL);
1501 }
1502 
1503 struct comm_point*
1504 comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer,
1505 	comm_point_callback_t* callback, void* callback_arg)
1506 {
1507 	struct comm_point* c = (struct comm_point*)calloc(1,
1508 		sizeof(struct comm_point));
1509 	short evbits;
1510 	if(!c)
1511 		return NULL;
1512 	c->ev = (struct internal_event*)calloc(1,
1513 		sizeof(struct internal_event));
1514 	if(!c->ev) {
1515 		free(c);
1516 		return NULL;
1517 	}
1518 	c->ev->base = base;
1519 	c->fd = fd;
1520 	c->buffer = buffer;
1521 	c->timeout = NULL;
1522 	c->tcp_is_reading = 0;
1523 	c->tcp_byte_count = 0;
1524 	c->tcp_parent = NULL;
1525 	c->max_tcp_count = 0;
1526 	c->tcp_handlers = NULL;
1527 	c->tcp_free = NULL;
1528 	c->type = comm_udp;
1529 	c->tcp_do_close = 0;
1530 	c->do_not_close = 0;
1531 	c->tcp_do_toggle_rw = 0;
1532 	c->tcp_check_nb_connect = 0;
1533 	c->inuse = 0;
1534 	c->callback = callback;
1535 	c->cb_arg = callback_arg;
1536 	evbits = EV_READ | EV_PERSIST;
1537 	/* libevent stuff */
1538 	event_set(&c->ev->ev, c->fd, evbits, comm_point_udp_callback, c);
1539 	if(event_base_set(base->eb->base, &c->ev->ev) != 0) {
1540 		log_err("could not baseset udp event");
1541 		comm_point_delete(c);
1542 		return NULL;
1543 	}
1544 	if(fd!=-1 && event_add(&c->ev->ev, c->timeout) != 0 ) {
1545 		log_err("could not add udp event");
1546 		comm_point_delete(c);
1547 		return NULL;
1548 	}
1549 	return c;
1550 }
1551 
1552 struct comm_point*
1553 comm_point_create_udp_ancil(struct comm_base *base, int fd,
1554 	sldns_buffer* buffer,
1555 	comm_point_callback_t* callback, void* callback_arg)
1556 {
1557 	struct comm_point* c = (struct comm_point*)calloc(1,
1558 		sizeof(struct comm_point));
1559 	short evbits;
1560 	if(!c)
1561 		return NULL;
1562 	c->ev = (struct internal_event*)calloc(1,
1563 		sizeof(struct internal_event));
1564 	if(!c->ev) {
1565 		free(c);
1566 		return NULL;
1567 	}
1568 	c->ev->base = base;
1569 	c->fd = fd;
1570 	c->buffer = buffer;
1571 	c->timeout = NULL;
1572 	c->tcp_is_reading = 0;
1573 	c->tcp_byte_count = 0;
1574 	c->tcp_parent = NULL;
1575 	c->max_tcp_count = 0;
1576 	c->tcp_handlers = NULL;
1577 	c->tcp_free = NULL;
1578 	c->type = comm_udp;
1579 	c->tcp_do_close = 0;
1580 	c->do_not_close = 0;
1581 	c->inuse = 0;
1582 	c->tcp_do_toggle_rw = 0;
1583 	c->tcp_check_nb_connect = 0;
1584 	c->callback = callback;
1585 	c->cb_arg = callback_arg;
1586 	evbits = EV_READ | EV_PERSIST;
1587 	/* libevent stuff */
1588 	event_set(&c->ev->ev, c->fd, evbits, comm_point_udp_ancil_callback, c);
1589 	if(event_base_set(base->eb->base, &c->ev->ev) != 0) {
1590 		log_err("could not baseset udp event");
1591 		comm_point_delete(c);
1592 		return NULL;
1593 	}
1594 	if(fd!=-1 && event_add(&c->ev->ev, c->timeout) != 0 ) {
1595 		log_err("could not add udp event");
1596 		comm_point_delete(c);
1597 		return NULL;
1598 	}
1599 	return c;
1600 }
1601 
1602 static struct comm_point*
1603 comm_point_create_tcp_handler(struct comm_base *base,
1604 	struct comm_point* parent, size_t bufsize,
1605         comm_point_callback_t* callback, void* callback_arg)
1606 {
1607 	struct comm_point* c = (struct comm_point*)calloc(1,
1608 		sizeof(struct comm_point));
1609 	short evbits;
1610 	if(!c)
1611 		return NULL;
1612 	c->ev = (struct internal_event*)calloc(1,
1613 		sizeof(struct internal_event));
1614 	if(!c->ev) {
1615 		free(c);
1616 		return NULL;
1617 	}
1618 	c->ev->base = base;
1619 	c->fd = -1;
1620 	c->buffer = sldns_buffer_new(bufsize);
1621 	if(!c->buffer) {
1622 		free(c->ev);
1623 		free(c);
1624 		return NULL;
1625 	}
1626 	c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
1627 	if(!c->timeout) {
1628 		sldns_buffer_free(c->buffer);
1629 		free(c->ev);
1630 		free(c);
1631 		return NULL;
1632 	}
1633 	c->tcp_is_reading = 0;
1634 	c->tcp_byte_count = 0;
1635 	c->tcp_parent = parent;
1636 	c->max_tcp_count = 0;
1637 	c->tcp_handlers = NULL;
1638 	c->tcp_free = NULL;
1639 	c->type = comm_tcp;
1640 	c->tcp_do_close = 0;
1641 	c->do_not_close = 0;
1642 	c->tcp_do_toggle_rw = 1;
1643 	c->tcp_check_nb_connect = 0;
1644 	c->repinfo.c = c;
1645 	c->callback = callback;
1646 	c->cb_arg = callback_arg;
1647 	/* add to parent free list */
1648 	c->tcp_free = parent->tcp_free;
1649 	parent->tcp_free = c;
1650 	/* libevent stuff */
1651 	evbits = EV_PERSIST | EV_READ | EV_TIMEOUT;
1652 	event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_handle_callback, c);
1653 	if(event_base_set(base->eb->base, &c->ev->ev) != 0)
1654 	{
1655 		log_err("could not basetset tcphdl event");
1656 		parent->tcp_free = c->tcp_free;
1657 		free(c->ev);
1658 		free(c);
1659 		return NULL;
1660 	}
1661 	return c;
1662 }
1663 
1664 struct comm_point*
1665 comm_point_create_tcp(struct comm_base *base, int fd, int num, size_t bufsize,
1666         comm_point_callback_t* callback, void* callback_arg)
1667 {
1668 	struct comm_point* c = (struct comm_point*)calloc(1,
1669 		sizeof(struct comm_point));
1670 	short evbits;
1671 	int i;
1672 	/* first allocate the TCP accept listener */
1673 	if(!c)
1674 		return NULL;
1675 	c->ev = (struct internal_event*)calloc(1,
1676 		sizeof(struct internal_event));
1677 	if(!c->ev) {
1678 		free(c);
1679 		return NULL;
1680 	}
1681 	c->ev->base = base;
1682 	c->fd = fd;
1683 	c->buffer = NULL;
1684 	c->timeout = NULL;
1685 	c->tcp_is_reading = 0;
1686 	c->tcp_byte_count = 0;
1687 	c->tcp_parent = NULL;
1688 	c->max_tcp_count = num;
1689 	c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
1690 		sizeof(struct comm_point*));
1691 	if(!c->tcp_handlers) {
1692 		free(c->ev);
1693 		free(c);
1694 		return NULL;
1695 	}
1696 	c->tcp_free = NULL;
1697 	c->type = comm_tcp_accept;
1698 	c->tcp_do_close = 0;
1699 	c->do_not_close = 0;
1700 	c->tcp_do_toggle_rw = 0;
1701 	c->tcp_check_nb_connect = 0;
1702 	c->callback = NULL;
1703 	c->cb_arg = NULL;
1704 	evbits = EV_READ | EV_PERSIST;
1705 	/* libevent stuff */
1706 	event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_accept_callback, c);
1707 	if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1708 		event_add(&c->ev->ev, c->timeout) != 0 )
1709 	{
1710 		log_err("could not add tcpacc event");
1711 		comm_point_delete(c);
1712 		return NULL;
1713 	}
1714 
1715 	/* now prealloc the tcp handlers */
1716 	for(i=0; i<num; i++) {
1717 		c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
1718 			c, bufsize, callback, callback_arg);
1719 		if(!c->tcp_handlers[i]) {
1720 			comm_point_delete(c);
1721 			return NULL;
1722 		}
1723 	}
1724 
1725 	return c;
1726 }
1727 
1728 struct comm_point*
1729 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
1730         comm_point_callback_t* callback, void* callback_arg)
1731 {
1732 	struct comm_point* c = (struct comm_point*)calloc(1,
1733 		sizeof(struct comm_point));
1734 	short evbits;
1735 	if(!c)
1736 		return NULL;
1737 	c->ev = (struct internal_event*)calloc(1,
1738 		sizeof(struct internal_event));
1739 	if(!c->ev) {
1740 		free(c);
1741 		return NULL;
1742 	}
1743 	c->ev->base = base;
1744 	c->fd = -1;
1745 	c->buffer = sldns_buffer_new(bufsize);
1746 	if(!c->buffer) {
1747 		free(c->ev);
1748 		free(c);
1749 		return NULL;
1750 	}
1751 	c->timeout = NULL;
1752 	c->tcp_is_reading = 0;
1753 	c->tcp_byte_count = 0;
1754 	c->tcp_parent = NULL;
1755 	c->max_tcp_count = 0;
1756 	c->tcp_handlers = NULL;
1757 	c->tcp_free = NULL;
1758 	c->type = comm_tcp;
1759 	c->tcp_do_close = 0;
1760 	c->do_not_close = 0;
1761 	c->tcp_do_toggle_rw = 1;
1762 	c->tcp_check_nb_connect = 1;
1763 	c->repinfo.c = c;
1764 	c->callback = callback;
1765 	c->cb_arg = callback_arg;
1766 	evbits = EV_PERSIST | EV_WRITE;
1767 	event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_handle_callback, c);
1768 	if(event_base_set(base->eb->base, &c->ev->ev) != 0)
1769 	{
1770 		log_err("could not basetset tcpout event");
1771 		sldns_buffer_free(c->buffer);
1772 		free(c->ev);
1773 		free(c);
1774 		return NULL;
1775 	}
1776 
1777 	return c;
1778 }
1779 
1780 struct comm_point*
1781 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
1782         comm_point_callback_t* callback, void* callback_arg)
1783 {
1784 	struct comm_point* c = (struct comm_point*)calloc(1,
1785 		sizeof(struct comm_point));
1786 	short evbits;
1787 	if(!c)
1788 		return NULL;
1789 	c->ev = (struct internal_event*)calloc(1,
1790 		sizeof(struct internal_event));
1791 	if(!c->ev) {
1792 		free(c);
1793 		return NULL;
1794 	}
1795 	c->ev->base = base;
1796 	c->fd = fd;
1797 	c->buffer = sldns_buffer_new(bufsize);
1798 	if(!c->buffer) {
1799 		free(c->ev);
1800 		free(c);
1801 		return NULL;
1802 	}
1803 	c->timeout = NULL;
1804 	c->tcp_is_reading = 1;
1805 	c->tcp_byte_count = 0;
1806 	c->tcp_parent = NULL;
1807 	c->max_tcp_count = 0;
1808 	c->tcp_handlers = NULL;
1809 	c->tcp_free = NULL;
1810 	c->type = comm_local;
1811 	c->tcp_do_close = 0;
1812 	c->do_not_close = 1;
1813 	c->tcp_do_toggle_rw = 0;
1814 	c->tcp_check_nb_connect = 0;
1815 	c->callback = callback;
1816 	c->cb_arg = callback_arg;
1817 	/* libevent stuff */
1818 	evbits = EV_PERSIST | EV_READ;
1819 	event_set(&c->ev->ev, c->fd, evbits, comm_point_local_handle_callback,
1820 		c);
1821 	if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1822 		event_add(&c->ev->ev, c->timeout) != 0 )
1823 	{
1824 		log_err("could not add localhdl event");
1825 		free(c->ev);
1826 		free(c);
1827 		return NULL;
1828 	}
1829 	return c;
1830 }
1831 
1832 struct comm_point*
1833 comm_point_create_raw(struct comm_base* base, int fd, int writing,
1834 	comm_point_callback_t* callback, void* callback_arg)
1835 {
1836 	struct comm_point* c = (struct comm_point*)calloc(1,
1837 		sizeof(struct comm_point));
1838 	short evbits;
1839 	if(!c)
1840 		return NULL;
1841 	c->ev = (struct internal_event*)calloc(1,
1842 		sizeof(struct internal_event));
1843 	if(!c->ev) {
1844 		free(c);
1845 		return NULL;
1846 	}
1847 	c->ev->base = base;
1848 	c->fd = fd;
1849 	c->buffer = NULL;
1850 	c->timeout = NULL;
1851 	c->tcp_is_reading = 0;
1852 	c->tcp_byte_count = 0;
1853 	c->tcp_parent = NULL;
1854 	c->max_tcp_count = 0;
1855 	c->tcp_handlers = NULL;
1856 	c->tcp_free = NULL;
1857 	c->type = comm_raw;
1858 	c->tcp_do_close = 0;
1859 	c->do_not_close = 1;
1860 	c->tcp_do_toggle_rw = 0;
1861 	c->tcp_check_nb_connect = 0;
1862 	c->callback = callback;
1863 	c->cb_arg = callback_arg;
1864 	/* libevent stuff */
1865 	if(writing)
1866 		evbits = EV_PERSIST | EV_WRITE;
1867 	else 	evbits = EV_PERSIST | EV_READ;
1868 	event_set(&c->ev->ev, c->fd, evbits, comm_point_raw_handle_callback,
1869 		c);
1870 	if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1871 		event_add(&c->ev->ev, c->timeout) != 0 )
1872 	{
1873 		log_err("could not add rawhdl event");
1874 		free(c->ev);
1875 		free(c);
1876 		return NULL;
1877 	}
1878 	return c;
1879 }
1880 
1881 void
1882 comm_point_close(struct comm_point* c)
1883 {
1884 	if(!c)
1885 		return;
1886 	if(c->fd != -1)
1887 		if(event_del(&c->ev->ev) != 0) {
1888 			log_err("could not event_del on close");
1889 		}
1890 	/* close fd after removing from event lists, or epoll.. is messed up */
1891 	if(c->fd != -1 && !c->do_not_close) {
1892 		verbose(VERB_ALGO, "close fd %d", c->fd);
1893 #ifndef USE_WINSOCK
1894 		close(c->fd);
1895 #else
1896 		closesocket(c->fd);
1897 #endif
1898 	}
1899 	c->fd = -1;
1900 }
1901 
1902 void
1903 comm_point_delete(struct comm_point* c)
1904 {
1905 	if(!c)
1906 		return;
1907 	if(c->type == comm_tcp && c->ssl) {
1908 #ifdef HAVE_SSL
1909 		SSL_shutdown(c->ssl);
1910 		SSL_free(c->ssl);
1911 #endif
1912 	}
1913 	comm_point_close(c);
1914 	if(c->tcp_handlers) {
1915 		int i;
1916 		for(i=0; i<c->max_tcp_count; i++)
1917 			comm_point_delete(c->tcp_handlers[i]);
1918 		free(c->tcp_handlers);
1919 	}
1920 	free(c->timeout);
1921 	if(c->type == comm_tcp || c->type == comm_local)
1922 		sldns_buffer_free(c->buffer);
1923 	free(c->ev);
1924 	free(c);
1925 }
1926 
1927 void
1928 comm_point_send_reply(struct comm_reply *repinfo)
1929 {
1930 	log_assert(repinfo && repinfo->c);
1931 	if(repinfo->c->type == comm_udp) {
1932 		if(repinfo->srctype)
1933 			comm_point_send_udp_msg_if(repinfo->c,
1934 			repinfo->c->buffer, (struct sockaddr*)&repinfo->addr,
1935 			repinfo->addrlen, repinfo);
1936 		else
1937 			comm_point_send_udp_msg(repinfo->c, repinfo->c->buffer,
1938 			(struct sockaddr*)&repinfo->addr, repinfo->addrlen);
1939 	} else {
1940 		comm_point_start_listening(repinfo->c, -1, TCP_QUERY_TIMEOUT);
1941 	}
1942 }
1943 
1944 void
1945 comm_point_drop_reply(struct comm_reply* repinfo)
1946 {
1947 	if(!repinfo)
1948 		return;
1949 	log_assert(repinfo && repinfo->c);
1950 	log_assert(repinfo->c->type != comm_tcp_accept);
1951 	if(repinfo->c->type == comm_udp)
1952 		return;
1953 	reclaim_tcp_handler(repinfo->c);
1954 }
1955 
1956 void
1957 comm_point_stop_listening(struct comm_point* c)
1958 {
1959 	verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
1960 	if(event_del(&c->ev->ev) != 0) {
1961 		log_err("event_del error to stoplisten");
1962 	}
1963 }
1964 
1965 void
1966 comm_point_start_listening(struct comm_point* c, int newfd, int sec)
1967 {
1968 	verbose(VERB_ALGO, "comm point start listening %d",
1969 		c->fd==-1?newfd:c->fd);
1970 	if(c->type == comm_tcp_accept && !c->tcp_free) {
1971 		/* no use to start listening no free slots. */
1972 		return;
1973 	}
1974 	if(sec != -1 && sec != 0) {
1975 		if(!c->timeout) {
1976 			c->timeout = (struct timeval*)malloc(sizeof(
1977 				struct timeval));
1978 			if(!c->timeout) {
1979 				log_err("cpsl: malloc failed. No net read.");
1980 				return;
1981 			}
1982 		}
1983 		c->ev->ev.ev_events |= EV_TIMEOUT;
1984 #ifndef S_SPLINT_S /* splint fails on struct timeval. */
1985 		c->timeout->tv_sec = sec;
1986 		c->timeout->tv_usec = 0;
1987 #endif /* S_SPLINT_S */
1988 	}
1989 	if(c->type == comm_tcp) {
1990 		c->ev->ev.ev_events &= ~(EV_READ|EV_WRITE);
1991 		if(c->tcp_is_reading)
1992 			c->ev->ev.ev_events |= EV_READ;
1993 		else	c->ev->ev.ev_events |= EV_WRITE;
1994 	}
1995 	if(newfd != -1) {
1996 		if(c->fd != -1) {
1997 #ifndef USE_WINSOCK
1998 			close(c->fd);
1999 #else
2000 			closesocket(c->fd);
2001 #endif
2002 		}
2003 		c->fd = newfd;
2004 		c->ev->ev.ev_fd = c->fd;
2005 	}
2006 	if(event_add(&c->ev->ev, sec==0?NULL:c->timeout) != 0) {
2007 		log_err("event_add failed. in cpsl.");
2008 	}
2009 }
2010 
2011 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
2012 {
2013 	verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
2014 	if(event_del(&c->ev->ev) != 0) {
2015 		log_err("event_del error to cplf");
2016 	}
2017 	c->ev->ev.ev_events &= ~(EV_READ|EV_WRITE);
2018 	if(rd) c->ev->ev.ev_events |= EV_READ;
2019 	if(wr) c->ev->ev.ev_events |= EV_WRITE;
2020 	if(event_add(&c->ev->ev, c->timeout) != 0) {
2021 		log_err("event_add failed. in cplf.");
2022 	}
2023 }
2024 
2025 size_t comm_point_get_mem(struct comm_point* c)
2026 {
2027 	size_t s;
2028 	if(!c)
2029 		return 0;
2030 	s = sizeof(*c) + sizeof(*c->ev);
2031 	if(c->timeout)
2032 		s += sizeof(*c->timeout);
2033 	if(c->type == comm_tcp || c->type == comm_local)
2034 		s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer);
2035 	if(c->type == comm_tcp_accept) {
2036 		int i;
2037 		for(i=0; i<c->max_tcp_count; i++)
2038 			s += comm_point_get_mem(c->tcp_handlers[i]);
2039 	}
2040 	return s;
2041 }
2042 
2043 struct comm_timer*
2044 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
2045 {
2046 	struct comm_timer *tm = (struct comm_timer*)calloc(1,
2047 		sizeof(struct comm_timer));
2048 	if(!tm)
2049 		return NULL;
2050 	tm->ev_timer = (struct internal_timer*)calloc(1,
2051 		sizeof(struct internal_timer));
2052 	if(!tm->ev_timer) {
2053 		log_err("malloc failed");
2054 		free(tm);
2055 		return NULL;
2056 	}
2057 	tm->ev_timer->base = base;
2058 	tm->callback = cb;
2059 	tm->cb_arg = cb_arg;
2060 	event_set(&tm->ev_timer->ev, -1, EV_TIMEOUT,
2061 		comm_timer_callback, tm);
2062 	if(event_base_set(base->eb->base, &tm->ev_timer->ev) != 0) {
2063 		log_err("timer_create: event_base_set failed.");
2064 		free(tm->ev_timer);
2065 		free(tm);
2066 		return NULL;
2067 	}
2068 	return tm;
2069 }
2070 
2071 void
2072 comm_timer_disable(struct comm_timer* timer)
2073 {
2074 	if(!timer)
2075 		return;
2076 	evtimer_del(&timer->ev_timer->ev);
2077 	timer->ev_timer->enabled = 0;
2078 }
2079 
2080 void
2081 comm_timer_set(struct comm_timer* timer, struct timeval* tv)
2082 {
2083 	log_assert(tv);
2084 	if(timer->ev_timer->enabled)
2085 		comm_timer_disable(timer);
2086 	event_set(&timer->ev_timer->ev, -1, EV_TIMEOUT,
2087 		comm_timer_callback, timer);
2088 	if(event_base_set(timer->ev_timer->base->eb->base,
2089 		&timer->ev_timer->ev) != 0)
2090 		log_err("comm_timer_set: set_base failed.");
2091 	if(evtimer_add(&timer->ev_timer->ev, tv) != 0)
2092 		log_err("comm_timer_set: evtimer_add failed.");
2093 	timer->ev_timer->enabled = 1;
2094 }
2095 
2096 void
2097 comm_timer_delete(struct comm_timer* timer)
2098 {
2099 	if(!timer)
2100 		return;
2101 	comm_timer_disable(timer);
2102 	free(timer->ev_timer);
2103 	free(timer);
2104 }
2105 
2106 void
2107 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
2108 {
2109 	struct comm_timer* tm = (struct comm_timer*)arg;
2110 	if(!(event&EV_TIMEOUT))
2111 		return;
2112 	comm_base_now(tm->ev_timer->base);
2113 	tm->ev_timer->enabled = 0;
2114 	fptr_ok(fptr_whitelist_comm_timer(tm->callback));
2115 	(*tm->callback)(tm->cb_arg);
2116 }
2117 
2118 int
2119 comm_timer_is_set(struct comm_timer* timer)
2120 {
2121 	return (int)timer->ev_timer->enabled;
2122 }
2123 
2124 size_t
2125 comm_timer_get_mem(struct comm_timer* timer)
2126 {
2127 	return sizeof(*timer) + sizeof(struct internal_timer);
2128 }
2129 
2130 struct comm_signal*
2131 comm_signal_create(struct comm_base* base,
2132         void (*callback)(int, void*), void* cb_arg)
2133 {
2134 	struct comm_signal* com = (struct comm_signal*)malloc(
2135 		sizeof(struct comm_signal));
2136 	if(!com) {
2137 		log_err("malloc failed");
2138 		return NULL;
2139 	}
2140 	com->base = base;
2141 	com->callback = callback;
2142 	com->cb_arg = cb_arg;
2143 	com->ev_signal = NULL;
2144 	return com;
2145 }
2146 
2147 void
2148 comm_signal_callback(int sig, short event, void* arg)
2149 {
2150 	struct comm_signal* comsig = (struct comm_signal*)arg;
2151 	if(!(event & EV_SIGNAL))
2152 		return;
2153 	comm_base_now(comsig->base);
2154 	fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
2155 	(*comsig->callback)(sig, comsig->cb_arg);
2156 }
2157 
2158 int
2159 comm_signal_bind(struct comm_signal* comsig, int sig)
2160 {
2161 	struct internal_signal* entry = (struct internal_signal*)calloc(1,
2162 		sizeof(struct internal_signal));
2163 	if(!entry) {
2164 		log_err("malloc failed");
2165 		return 0;
2166 	}
2167 	log_assert(comsig);
2168 	/* add signal event */
2169 	signal_set(&entry->ev, sig, comm_signal_callback, comsig);
2170 	if(event_base_set(comsig->base->eb->base, &entry->ev) != 0) {
2171 		log_err("Could not set signal base");
2172 		free(entry);
2173 		return 0;
2174 	}
2175 	if(signal_add(&entry->ev, NULL) != 0) {
2176 		log_err("Could not add signal handler");
2177 		free(entry);
2178 		return 0;
2179 	}
2180 	/* link into list */
2181 	entry->next = comsig->ev_signal;
2182 	comsig->ev_signal = entry;
2183 	return 1;
2184 }
2185 
2186 void
2187 comm_signal_delete(struct comm_signal* comsig)
2188 {
2189 	struct internal_signal* p, *np;
2190 	if(!comsig)
2191 		return;
2192 	p=comsig->ev_signal;
2193 	while(p) {
2194 		np = p->next;
2195 		signal_del(&p->ev);
2196 		free(p);
2197 		p = np;
2198 	}
2199 	free(comsig);
2200 }
2201