1 /*
2 * util/netevent.c - event notification
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 /**
37 * \file
38 *
39 * This file contains event notification functions.
40 */
41 #include "config.h"
42 #include "util/netevent.h"
43 #include "util/ub_event.h"
44 #include "util/log.h"
45 #include "util/net_help.h"
46 #include "util/tcp_conn_limit.h"
47 #include "util/fptr_wlist.h"
48 #include "util/proxy_protocol.h"
49 #include "util/timeval_func.h"
50 #include "sldns/pkthdr.h"
51 #include "sldns/sbuffer.h"
52 #include "sldns/str2wire.h"
53 #include "dnstap/dnstap.h"
54 #include "dnscrypt/dnscrypt.h"
55 #include "services/listen_dnsport.h"
56 #include "util/random.h"
57 #ifdef HAVE_SYS_TYPES_H
58 #include <sys/types.h>
59 #endif
60 #ifdef HAVE_SYS_SOCKET_H
61 #include <sys/socket.h>
62 #endif
63 #ifdef HAVE_NETDB_H
64 #include <netdb.h>
65 #endif
66 #ifdef HAVE_POLL_H
67 #include <poll.h>
68 #endif
69
70 #ifdef HAVE_OPENSSL_SSL_H
71 #include <openssl/ssl.h>
72 #endif
73 #ifdef HAVE_OPENSSL_ERR_H
74 #include <openssl/err.h>
75 #endif
76
77 #ifdef HAVE_NGTCP2
78 #include <ngtcp2/ngtcp2.h>
79 #include <ngtcp2/ngtcp2_crypto.h>
80 #endif
81
82 #ifdef HAVE_LINUX_NET_TSTAMP_H
83 #include <linux/net_tstamp.h>
84 #endif
85
86 /* -------- Start of local definitions -------- */
87 /** if CMSG_ALIGN is not defined on this platform, a workaround */
88 #ifndef CMSG_ALIGN
89 # ifdef __CMSG_ALIGN
90 # define CMSG_ALIGN(n) __CMSG_ALIGN(n)
91 # elif defined(CMSG_DATA_ALIGN)
92 # define CMSG_ALIGN _CMSG_DATA_ALIGN
93 # else
94 # define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
95 # endif
96 #endif
97
98 /** if CMSG_LEN is not defined on this platform, a workaround */
99 #ifndef CMSG_LEN
100 # define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
101 #endif
102
103 /** if CMSG_SPACE is not defined on this platform, a workaround */
104 #ifndef CMSG_SPACE
105 # ifdef _CMSG_HDR_ALIGN
106 # define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
107 # else
108 # define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
109 # endif
110 #endif
111
112 /** The TCP writing query timeout in milliseconds */
113 #define TCP_QUERY_TIMEOUT 120000
114 /** The minimum actual TCP timeout to use, regardless of what we advertise,
115 * in msec */
116 #define TCP_QUERY_TIMEOUT_MINIMUM 200
117
118 #ifndef NONBLOCKING_IS_BROKEN
119 /** number of UDP reads to perform per read indication from select */
120 #define NUM_UDP_PER_SELECT 100
121 #else
122 #define NUM_UDP_PER_SELECT 1
123 #endif
124
125 /** timeout in millisec to wait for write to unblock, packets dropped after.*/
126 #define SEND_BLOCKED_WAIT_TIMEOUT 200
127 /** max number of times to wait for write to unblock, packets dropped after.*/
128 #define SEND_BLOCKED_MAX_RETRY 5
129
130 /** Let's make timestamping code cleaner and redefine SO_TIMESTAMP* */
131 #ifndef SO_TIMESTAMP
132 #define SO_TIMESTAMP 29
133 #endif
134 #ifndef SO_TIMESTAMPNS
135 #define SO_TIMESTAMPNS 35
136 #endif
137 #ifndef SO_TIMESTAMPING
138 #define SO_TIMESTAMPING 37
139 #endif
140 /**
141 * The internal event structure for keeping ub_event info for the event.
142 * Possibly other structures (list, tree) this is part of.
143 */
144 struct internal_event {
145 /** the comm base */
146 struct comm_base* base;
147 /** ub_event event type */
148 struct ub_event* ev;
149 };
150
151 /**
152 * Internal base structure, so that every thread has its own events.
153 */
154 struct internal_base {
155 /** ub_event event_base type. */
156 struct ub_event_base* base;
157 /** seconds time pointer points here */
158 time_t secs;
159 /** timeval with current time */
160 struct timeval now;
161 /** the event used for slow_accept timeouts */
162 struct ub_event* slow_accept;
163 /** true if slow_accept is enabled */
164 int slow_accept_enabled;
165 /** last log time for slow logging of file descriptor errors */
166 time_t last_slow_log;
167 /** last log time for slow logging of write wait failures */
168 time_t last_writewait_log;
169 };
170
171 /**
172 * Internal timer structure, to store timer event in.
173 */
174 struct internal_timer {
175 /** the super struct from which derived */
176 struct comm_timer super;
177 /** the comm base */
178 struct comm_base* base;
179 /** ub_event event type */
180 struct ub_event* ev;
181 /** is timer enabled */
182 uint8_t enabled;
183 };
184
185 /**
186 * Internal signal structure, to store signal event in.
187 */
188 struct internal_signal {
189 /** ub_event event type */
190 struct ub_event* ev;
191 /** next in signal list */
192 struct internal_signal* next;
193 };
194
195 /** create a tcp handler with a parent */
196 static struct comm_point* comm_point_create_tcp_handler(
197 struct comm_base *base, struct comm_point* parent, size_t bufsize,
198 struct sldns_buffer* spoolbuf, comm_point_callback_type* callback,
199 void* callback_arg, struct unbound_socket* socket);
200
201 /* -------- End of local definitions -------- */
202
203 struct comm_base*
comm_base_create(int sigs)204 comm_base_create(int sigs)
205 {
206 struct comm_base* b = (struct comm_base*)calloc(1,
207 sizeof(struct comm_base));
208 const char *evnm="event", *evsys="", *evmethod="";
209
210 if(!b)
211 return NULL;
212 b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
213 if(!b->eb) {
214 free(b);
215 return NULL;
216 }
217 b->eb->base = ub_default_event_base(sigs, &b->eb->secs, &b->eb->now);
218 if(!b->eb->base) {
219 free(b->eb);
220 free(b);
221 return NULL;
222 }
223 ub_comm_base_now(b);
224 ub_get_event_sys(b->eb->base, &evnm, &evsys, &evmethod);
225 verbose(VERB_ALGO, "%s %s uses %s method.", evnm, evsys, evmethod);
226 return b;
227 }
228
229 struct comm_base*
comm_base_create_event(struct ub_event_base * base)230 comm_base_create_event(struct ub_event_base* base)
231 {
232 struct comm_base* b = (struct comm_base*)calloc(1,
233 sizeof(struct comm_base));
234 if(!b)
235 return NULL;
236 b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
237 if(!b->eb) {
238 free(b);
239 return NULL;
240 }
241 b->eb->base = base;
242 ub_comm_base_now(b);
243 return b;
244 }
245
246 void
comm_base_delete(struct comm_base * b)247 comm_base_delete(struct comm_base* b)
248 {
249 if(!b)
250 return;
251 if(b->eb->slow_accept_enabled) {
252 if(ub_event_del(b->eb->slow_accept) != 0) {
253 log_err("could not event_del slow_accept");
254 }
255 ub_event_free(b->eb->slow_accept);
256 }
257 ub_event_base_free(b->eb->base);
258 b->eb->base = NULL;
259 free(b->eb);
260 free(b);
261 }
262
263 void
comm_base_delete_no_base(struct comm_base * b)264 comm_base_delete_no_base(struct comm_base* b)
265 {
266 if(!b)
267 return;
268 if(b->eb->slow_accept_enabled) {
269 if(ub_event_del(b->eb->slow_accept) != 0) {
270 log_err("could not event_del slow_accept");
271 }
272 ub_event_free(b->eb->slow_accept);
273 }
274 b->eb->base = NULL;
275 free(b->eb);
276 free(b);
277 }
278
279 void
comm_base_timept(struct comm_base * b,time_t ** tt,struct timeval ** tv)280 comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv)
281 {
282 *tt = &b->eb->secs;
283 *tv = &b->eb->now;
284 }
285
286 void
comm_base_dispatch(struct comm_base * b)287 comm_base_dispatch(struct comm_base* b)
288 {
289 int retval;
290 retval = ub_event_base_dispatch(b->eb->base);
291 if(retval < 0) {
292 fatal_exit("event_dispatch returned error %d, "
293 "errno is %s", retval, strerror(errno));
294 }
295 }
296
comm_base_exit(struct comm_base * b)297 void comm_base_exit(struct comm_base* b)
298 {
299 if(ub_event_base_loopexit(b->eb->base) != 0) {
300 log_err("Could not loopexit");
301 }
302 }
303
comm_base_set_slow_accept_handlers(struct comm_base * b,void (* stop_acc)(void *),void (* start_acc)(void *),void * arg)304 void comm_base_set_slow_accept_handlers(struct comm_base* b,
305 void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
306 {
307 b->stop_accept = stop_acc;
308 b->start_accept = start_acc;
309 b->cb_arg = arg;
310 }
311
comm_base_internal(struct comm_base * b)312 struct ub_event_base* comm_base_internal(struct comm_base* b)
313 {
314 return b->eb->base;
315 }
316
comm_point_internal(struct comm_point * c)317 struct ub_event* comm_point_internal(struct comm_point* c)
318 {
319 return c->ev->ev;
320 }
321
322 /** see if errno for udp has to be logged or not uses globals */
323 static int
udp_send_errno_needs_log(struct sockaddr * addr,socklen_t addrlen)324 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
325 {
326 /* do not log transient errors (unless high verbosity) */
327 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
328 switch(errno) {
329 # ifdef ENETUNREACH
330 case ENETUNREACH:
331 # endif
332 # ifdef EHOSTDOWN
333 case EHOSTDOWN:
334 # endif
335 # ifdef EHOSTUNREACH
336 case EHOSTUNREACH:
337 # endif
338 # ifdef ENETDOWN
339 case ENETDOWN:
340 # endif
341 case EPERM:
342 case EACCES:
343 if(verbosity < VERB_ALGO)
344 return 0;
345 break;
346 default:
347 break;
348 }
349 #endif
350 /* permission denied is gotten for every send if the
351 * network is disconnected (on some OS), squelch it */
352 if( ((errno == EPERM)
353 # ifdef EADDRNOTAVAIL
354 /* 'Cannot assign requested address' also when disconnected */
355 || (errno == EADDRNOTAVAIL)
356 # endif
357 ) && verbosity < VERB_ALGO)
358 return 0;
359 # ifdef EADDRINUSE
360 /* If SO_REUSEADDR is set, we could try to connect to the same server
361 * from the same source port twice. */
362 if(errno == EADDRINUSE && verbosity < VERB_DETAIL)
363 return 0;
364 # endif
365 /* squelch errors where people deploy AAAA ::ffff:bla for
366 * authority servers, which we try for intranets. */
367 if(errno == EINVAL && addr_is_ip4mapped(
368 (struct sockaddr_storage*)addr, addrlen) &&
369 verbosity < VERB_DETAIL)
370 return 0;
371 /* SO_BROADCAST sockopt can give access to 255.255.255.255,
372 * but a dns cache does not need it. */
373 if(errno == EACCES && addr_is_broadcast(
374 (struct sockaddr_storage*)addr, addrlen) &&
375 verbosity < VERB_DETAIL)
376 return 0;
377 # ifdef ENOTCONN
378 /* For 0.0.0.0, ::0 targets it can return that socket is not connected.
379 * This can be ignored, and the address skipped. It remains
380 * possible to send there for completeness in configuration. */
381 if(errno == ENOTCONN && addr_is_any(
382 (struct sockaddr_storage*)addr, addrlen) &&
383 verbosity < VERB_DETAIL)
384 return 0;
385 # endif
386 return 1;
387 }
388
tcp_connect_errno_needs_log(struct sockaddr * addr,socklen_t addrlen)389 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
390 {
391 return udp_send_errno_needs_log(addr, addrlen);
392 }
393
394 /* send a UDP reply */
395 int
comm_point_send_udp_msg(struct comm_point * c,sldns_buffer * packet,struct sockaddr * addr,socklen_t addrlen,int is_connected)396 comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet,
397 struct sockaddr* addr, socklen_t addrlen, int is_connected)
398 {
399 ssize_t sent;
400 log_assert(c->fd != -1);
401 #ifdef UNBOUND_DEBUG
402 if(sldns_buffer_remaining(packet) == 0)
403 log_err("error: send empty UDP packet");
404 #endif
405 log_assert(addr && addrlen > 0);
406 if(!is_connected) {
407 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet),
408 sldns_buffer_remaining(packet), 0,
409 addr, addrlen);
410 } else {
411 sent = send(c->fd, (void*)sldns_buffer_begin(packet),
412 sldns_buffer_remaining(packet), 0);
413 }
414 if(sent == -1) {
415 /* try again and block, waiting for IO to complete,
416 * we want to send the answer, and we will wait for
417 * the ethernet interface buffer to have space. */
418 #ifndef USE_WINSOCK
419 if(errno == EAGAIN || errno == EINTR ||
420 # ifdef EWOULDBLOCK
421 errno == EWOULDBLOCK ||
422 # endif
423 errno == ENOBUFS) {
424 #else
425 if(WSAGetLastError() == WSAEINPROGRESS ||
426 WSAGetLastError() == WSAEINTR ||
427 WSAGetLastError() == WSAENOBUFS ||
428 WSAGetLastError() == WSAEWOULDBLOCK) {
429 #endif
430 int retries = 0;
431 /* if we set the fd blocking, other threads suddenly
432 * have a blocking fd that they operate on */
433 while(sent == -1 && retries < SEND_BLOCKED_MAX_RETRY && (
434 #ifndef USE_WINSOCK
435 errno == EAGAIN || errno == EINTR ||
436 # ifdef EWOULDBLOCK
437 errno == EWOULDBLOCK ||
438 # endif
439 errno == ENOBUFS
440 #else
441 WSAGetLastError() == WSAEINPROGRESS ||
442 WSAGetLastError() == WSAEINTR ||
443 WSAGetLastError() == WSAENOBUFS ||
444 WSAGetLastError() == WSAEWOULDBLOCK
445 #endif
446 )) {
447 #if defined(HAVE_POLL) || defined(USE_WINSOCK)
448 int send_nobufs = (
449 #ifndef USE_WINSOCK
450 errno == ENOBUFS
451 #else
452 WSAGetLastError() == WSAENOBUFS
453 #endif
454 );
455 struct pollfd p;
456 int pret;
457 memset(&p, 0, sizeof(p));
458 p.fd = c->fd;
459 p.events = POLLOUT
460 #ifndef USE_WINSOCK
461 | POLLERR | POLLHUP
462 #endif
463 ;
464 # ifndef USE_WINSOCK
465 pret = poll(&p, 1, SEND_BLOCKED_WAIT_TIMEOUT);
466 # else
467 pret = WSAPoll(&p, 1,
468 SEND_BLOCKED_WAIT_TIMEOUT);
469 # endif
470 if(pret == 0) {
471 /* timer expired */
472 struct comm_base* b = c->ev->base;
473 if(b->eb->last_writewait_log+SLOW_LOG_TIME <=
474 b->eb->secs) {
475 b->eb->last_writewait_log = b->eb->secs;
476 verbose(VERB_OPS, "send udp blocked "
477 "for long, dropping packet.");
478 }
479 return 0;
480 } else if(pret < 0 &&
481 #ifndef USE_WINSOCK
482 errno != EAGAIN && errno != EINTR &&
483 # ifdef EWOULDBLOCK
484 errno != EWOULDBLOCK &&
485 # endif
486 errno != ENOMEM && errno != ENOBUFS
487 #else
488 WSAGetLastError() != WSAEINPROGRESS &&
489 WSAGetLastError() != WSAEINTR &&
490 WSAGetLastError() != WSAENOBUFS &&
491 WSAGetLastError() != WSAEWOULDBLOCK
492 #endif
493 ) {
494 log_err("poll udp out failed: %s",
495 sock_strerror(errno));
496 return 0;
497 } else if((pret < 0 &&
498 #ifndef USE_WINSOCK
499 ( errno == ENOBUFS /* Maybe some systems */
500 || errno == ENOMEM /* Linux */
501 || errno == EAGAIN) /* Macos, solaris, openbsd */
502 #else
503 WSAGetLastError() == WSAENOBUFS
504 #endif
505 ) || (send_nobufs && retries > 0)) {
506 /* ENOBUFS/ENOMEM/EAGAIN, and poll
507 * returned without
508 * a timeout. Or the retried send call
509 * returned ENOBUFS/ENOMEM/EAGAIN.
510 * It is good to wait a bit for the
511 * error to clear. */
512 /* The timeout is 20*(2^(retries+1)),
513 * it increases exponentially, starting
514 * at 40 msec. After 5 tries, 1240 msec
515 * have passed in total, when poll
516 * returned the error, and 1200 msec
517 * when send returned the errors. */
518 #ifndef USE_WINSOCK
519 pret = poll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1));
520 #else
521 Sleep((SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1));
522 pret = 0;
523 #endif
524 if(pret < 0
525 #ifndef USE_WINSOCK
526 && errno != EAGAIN && errno != EINTR &&
527 # ifdef EWOULDBLOCK
528 errno != EWOULDBLOCK &&
529 # endif
530 errno != ENOMEM && errno != ENOBUFS
531 #else
532 /* Sleep does not error */
533 #endif
534 ) {
535 log_err("poll udp out timer failed: %s",
536 sock_strerror(errno));
537 }
538 }
539 #endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */
540 retries++;
541 if (!is_connected) {
542 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet),
543 sldns_buffer_remaining(packet), 0,
544 addr, addrlen);
545 } else {
546 sent = send(c->fd, (void*)sldns_buffer_begin(packet),
547 sldns_buffer_remaining(packet), 0);
548 }
549 }
550 }
551 }
552 if(sent == -1) {
553 if(!udp_send_errno_needs_log(addr, addrlen))
554 return 0;
555 if (!is_connected) {
556 verbose(VERB_OPS, "sendto failed: %s", sock_strerror(errno));
557 } else {
558 verbose(VERB_OPS, "send failed: %s", sock_strerror(errno));
559 }
560 if(addr)
561 log_addr(VERB_OPS, "remote address is",
562 (struct sockaddr_storage*)addr, addrlen);
563 return 0;
564 } else if((size_t)sent != sldns_buffer_remaining(packet)) {
565 log_err("sent %d in place of %d bytes",
566 (int)sent, (int)sldns_buffer_remaining(packet));
567 return 0;
568 }
569 return 1;
570 }
571
572 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
573 /** print debug ancillary info */
574 static void p_ancil(const char* str, struct comm_reply* r)
575 {
576 if(r->srctype != 4 && r->srctype != 6) {
577 log_info("%s: unknown srctype %d", str, r->srctype);
578 return;
579 }
580
581 if(r->srctype == 6) {
582 #ifdef IPV6_PKTINFO
583 char buf[1024];
584 if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr,
585 buf, (socklen_t)sizeof(buf)) == 0) {
586 (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf));
587 }
588 buf[sizeof(buf)-1]=0;
589 log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
590 #endif
591 } else if(r->srctype == 4) {
592 #ifdef IP_PKTINFO
593 char buf1[1024], buf2[1024];
594 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr,
595 buf1, (socklen_t)sizeof(buf1)) == 0) {
596 (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
597 }
598 buf1[sizeof(buf1)-1]=0;
599 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
600 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst,
601 buf2, (socklen_t)sizeof(buf2)) == 0) {
602 (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2));
603 }
604 buf2[sizeof(buf2)-1]=0;
605 #else
606 buf2[0]=0;
607 #endif
608 log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
609 buf1, buf2);
610 #elif defined(IP_RECVDSTADDR)
611 char buf1[1024];
612 if(inet_ntop(AF_INET, &r->pktinfo.v4addr,
613 buf1, (socklen_t)sizeof(buf1)) == 0) {
614 (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
615 }
616 buf1[sizeof(buf1)-1]=0;
617 log_info("%s: %s", str, buf1);
618 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */
619 }
620 }
621 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
622
623 /** send a UDP reply over specified interface*/
624 static int
625 comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet,
626 struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r)
627 {
628 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
629 ssize_t sent;
630 struct msghdr msg;
631 struct iovec iov[1];
632 union {
633 struct cmsghdr hdr;
634 char buf[256];
635 } control;
636 #ifndef S_SPLINT_S
637 struct cmsghdr *cmsg;
638 #endif /* S_SPLINT_S */
639
640 log_assert(c->fd != -1);
641 #ifdef UNBOUND_DEBUG
642 if(sldns_buffer_remaining(packet) == 0)
643 log_err("error: send empty UDP packet");
644 #endif
645 log_assert(addr && addrlen > 0);
646
647 msg.msg_name = addr;
648 msg.msg_namelen = addrlen;
649 iov[0].iov_base = sldns_buffer_begin(packet);
650 iov[0].iov_len = sldns_buffer_remaining(packet);
651 msg.msg_iov = iov;
652 msg.msg_iovlen = 1;
653 msg.msg_control = control.buf;
654 #ifndef S_SPLINT_S
655 msg.msg_controllen = sizeof(control.buf);
656 #endif /* S_SPLINT_S */
657 msg.msg_flags = 0;
658
659 #ifndef S_SPLINT_S
660 cmsg = CMSG_FIRSTHDR(&msg);
661 if(r->srctype == 4) {
662 #ifdef IP_PKTINFO
663 void* cmsg_data;
664 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
665 log_assert(msg.msg_controllen <= sizeof(control.buf));
666 cmsg->cmsg_level = IPPROTO_IP;
667 cmsg->cmsg_type = IP_PKTINFO;
668 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
669 sizeof(struct in_pktinfo));
670 /* unset the ifindex to not bypass the routing tables */
671 cmsg_data = CMSG_DATA(cmsg);
672 ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0;
673 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
674 /* zero the padding bytes inserted by the CMSG_LEN */
675 if(sizeof(struct in_pktinfo) < cmsg->cmsg_len)
676 memset(((uint8_t*)(CMSG_DATA(cmsg))) +
677 sizeof(struct in_pktinfo), 0, cmsg->cmsg_len
678 - sizeof(struct in_pktinfo));
679 #elif defined(IP_SENDSRCADDR)
680 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
681 log_assert(msg.msg_controllen <= sizeof(control.buf));
682 cmsg->cmsg_level = IPPROTO_IP;
683 cmsg->cmsg_type = IP_SENDSRCADDR;
684 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
685 sizeof(struct in_addr));
686 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
687 /* zero the padding bytes inserted by the CMSG_LEN */
688 if(sizeof(struct in_addr) < cmsg->cmsg_len)
689 memset(((uint8_t*)(CMSG_DATA(cmsg))) +
690 sizeof(struct in_addr), 0, cmsg->cmsg_len
691 - sizeof(struct in_addr));
692 #else
693 verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
694 msg.msg_control = NULL;
695 #endif /* IP_PKTINFO or IP_SENDSRCADDR */
696 } else if(r->srctype == 6) {
697 void* cmsg_data;
698 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
699 log_assert(msg.msg_controllen <= sizeof(control.buf));
700 cmsg->cmsg_level = IPPROTO_IPV6;
701 cmsg->cmsg_type = IPV6_PKTINFO;
702 memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
703 sizeof(struct in6_pktinfo));
704 /* unset the ifindex to not bypass the routing tables */
705 cmsg_data = CMSG_DATA(cmsg);
706 ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0;
707 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
708 /* zero the padding bytes inserted by the CMSG_LEN */
709 if(sizeof(struct in6_pktinfo) < cmsg->cmsg_len)
710 memset(((uint8_t*)(CMSG_DATA(cmsg))) +
711 sizeof(struct in6_pktinfo), 0, cmsg->cmsg_len
712 - sizeof(struct in6_pktinfo));
713 } else {
714 /* try to pass all 0 to use default route */
715 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
716 log_assert(msg.msg_controllen <= sizeof(control.buf));
717 cmsg->cmsg_level = IPPROTO_IPV6;
718 cmsg->cmsg_type = IPV6_PKTINFO;
719 memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
720 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
721 /* zero the padding bytes inserted by the CMSG_LEN */
722 if(sizeof(struct in6_pktinfo) < cmsg->cmsg_len)
723 memset(((uint8_t*)(CMSG_DATA(cmsg))) +
724 sizeof(struct in6_pktinfo), 0, cmsg->cmsg_len
725 - sizeof(struct in6_pktinfo));
726 }
727 #endif /* S_SPLINT_S */
728 if(verbosity >= VERB_ALGO && r->srctype != 0)
729 p_ancil("send_udp over interface", r);
730 sent = sendmsg(c->fd, &msg, 0);
731 if(sent == -1) {
732 /* try again and block, waiting for IO to complete,
733 * we want to send the answer, and we will wait for
734 * the ethernet interface buffer to have space. */
735 #ifndef USE_WINSOCK
736 if(errno == EAGAIN || errno == EINTR ||
737 # ifdef EWOULDBLOCK
738 errno == EWOULDBLOCK ||
739 # endif
740 errno == ENOBUFS) {
741 #else
742 if(WSAGetLastError() == WSAEINPROGRESS ||
743 WSAGetLastError() == WSAEINTR ||
744 WSAGetLastError() == WSAENOBUFS ||
745 WSAGetLastError() == WSAEWOULDBLOCK) {
746 #endif
747 int retries = 0;
748 while(sent == -1 && retries < SEND_BLOCKED_MAX_RETRY && (
749 #ifndef USE_WINSOCK
750 errno == EAGAIN || errno == EINTR ||
751 # ifdef EWOULDBLOCK
752 errno == EWOULDBLOCK ||
753 # endif
754 errno == ENOBUFS
755 #else
756 WSAGetLastError() == WSAEINPROGRESS ||
757 WSAGetLastError() == WSAEINTR ||
758 WSAGetLastError() == WSAENOBUFS ||
759 WSAGetLastError() == WSAEWOULDBLOCK
760 #endif
761 )) {
762 #if defined(HAVE_POLL) || defined(USE_WINSOCK)
763 int send_nobufs = (
764 #ifndef USE_WINSOCK
765 errno == ENOBUFS
766 #else
767 WSAGetLastError() == WSAENOBUFS
768 #endif
769 );
770 struct pollfd p;
771 int pret;
772 memset(&p, 0, sizeof(p));
773 p.fd = c->fd;
774 p.events = POLLOUT
775 #ifndef USE_WINSOCK
776 | POLLERR | POLLHUP
777 #endif
778 ;
779 # ifndef USE_WINSOCK
780 pret = poll(&p, 1, SEND_BLOCKED_WAIT_TIMEOUT);
781 # else
782 pret = WSAPoll(&p, 1,
783 SEND_BLOCKED_WAIT_TIMEOUT);
784 # endif
785 if(pret == 0) {
786 /* timer expired */
787 struct comm_base* b = c->ev->base;
788 if(b->eb->last_writewait_log+SLOW_LOG_TIME <=
789 b->eb->secs) {
790 b->eb->last_writewait_log = b->eb->secs;
791 verbose(VERB_OPS, "send udp blocked "
792 "for long, dropping packet.");
793 }
794 return 0;
795 } else if(pret < 0 &&
796 #ifndef USE_WINSOCK
797 errno != EAGAIN && errno != EINTR &&
798 # ifdef EWOULDBLOCK
799 errno != EWOULDBLOCK &&
800 # endif
801 errno != ENOMEM && errno != ENOBUFS
802 #else
803 WSAGetLastError() != WSAEINPROGRESS &&
804 WSAGetLastError() != WSAEINTR &&
805 WSAGetLastError() != WSAENOBUFS &&
806 WSAGetLastError() != WSAEWOULDBLOCK
807 #endif
808 ) {
809 log_err("poll udp out failed: %s",
810 sock_strerror(errno));
811 return 0;
812 } else if((pret < 0 &&
813 #ifndef USE_WINSOCK
814 ( errno == ENOBUFS /* Maybe some systems */
815 || errno == ENOMEM /* Linux */
816 || errno == EAGAIN) /* Macos, solaris, openbsd */
817 #else
818 WSAGetLastError() == WSAENOBUFS
819 #endif
820 ) || (send_nobufs && retries > 0)) {
821 /* ENOBUFS/ENOMEM/EAGAIN, and poll
822 * returned without
823 * a timeout. Or the retried send call
824 * returned ENOBUFS/ENOMEM/EAGAIN.
825 * It is good to wait a bit for the
826 * error to clear. */
827 /* The timeout is 20*(2^(retries+1)),
828 * it increases exponentially, starting
829 * at 40 msec. After 5 tries, 1240 msec
830 * have passed in total, when poll
831 * returned the error, and 1200 msec
832 * when send returned the errors. */
833 #ifndef USE_WINSOCK
834 pret = poll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1));
835 #else
836 Sleep((SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1));
837 pret = 0;
838 #endif
839 if(pret < 0
840 #ifndef USE_WINSOCK
841 && errno != EAGAIN && errno != EINTR &&
842 # ifdef EWOULDBLOCK
843 errno != EWOULDBLOCK &&
844 # endif
845 errno != ENOMEM && errno != ENOBUFS
846 #else /* USE_WINSOCK */
847 /* Sleep does not error */
848 #endif
849 ) {
850 log_err("poll udp out timer failed: %s",
851 sock_strerror(errno));
852 }
853 }
854 #endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */
855 retries++;
856 sent = sendmsg(c->fd, &msg, 0);
857 }
858 }
859 }
860 if(sent == -1) {
861 if(!udp_send_errno_needs_log(addr, addrlen))
862 return 0;
863 verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
864 log_addr(VERB_OPS, "remote address is",
865 (struct sockaddr_storage*)addr, addrlen);
866 #ifdef __NetBSD__
867 /* netbsd 7 has IP_PKTINFO for recv but not send */
868 if(errno == EINVAL && r->srctype == 4)
869 log_err("sendmsg: No support for sendmsg(IP_PKTINFO). "
870 "Please disable interface-automatic");
871 #endif
872 return 0;
873 } else if((size_t)sent != sldns_buffer_remaining(packet)) {
874 log_err("sent %d in place of %d bytes",
875 (int)sent, (int)sldns_buffer_remaining(packet));
876 return 0;
877 }
878 return 1;
879 #else
880 (void)c;
881 (void)packet;
882 (void)addr;
883 (void)addrlen;
884 (void)r;
885 log_err("sendmsg: IPV6_PKTINFO not supported");
886 return 0;
887 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
888 }
889
890 /** return true is UDP receive error needs to be logged */
891 static int udp_recv_needs_log(int err)
892 {
893 switch(err) {
894 case EACCES: /* some hosts send ICMP 'Permission Denied' */
895 #ifndef USE_WINSOCK
896 case ECONNREFUSED:
897 # ifdef ENETUNREACH
898 case ENETUNREACH:
899 # endif
900 # ifdef EHOSTDOWN
901 case EHOSTDOWN:
902 # endif
903 # ifdef EHOSTUNREACH
904 case EHOSTUNREACH:
905 # endif
906 # ifdef ENETDOWN
907 case ENETDOWN:
908 # endif
909 #else /* USE_WINSOCK */
910 case WSAECONNREFUSED:
911 case WSAENETUNREACH:
912 case WSAEHOSTDOWN:
913 case WSAEHOSTUNREACH:
914 case WSAENETDOWN:
915 #endif
916 if(verbosity >= VERB_ALGO)
917 return 1;
918 return 0;
919 default:
920 break;
921 }
922 return 1;
923 }
924
925 /** Parses the PROXYv2 header from buf and updates the comm_reply struct.
926 * Returns 1 on success, 0 on failure. */
927 static int consume_pp2_header(struct sldns_buffer* buf, struct comm_reply* rep,
928 int stream) {
929 size_t size;
930 struct pp2_header *header;
931 int err = pp2_read_header(sldns_buffer_begin(buf),
932 sldns_buffer_remaining(buf));
933 if(err) return 0;
934 header = (struct pp2_header*)sldns_buffer_begin(buf);
935 size = PP2_HEADER_SIZE + ntohs(header->len);
936 if((header->ver_cmd & 0xF) == PP2_CMD_LOCAL) {
937 /* A connection from the proxy itself.
938 * No need to do anything with addresses. */
939 goto done;
940 }
941 if(header->fam_prot == PP2_UNSPEC_UNSPEC) {
942 /* Unspecified family and protocol. This could be used for
943 * health checks by proxies.
944 * No need to do anything with addresses. */
945 goto done;
946 }
947 /* Read the proxied address */
948 switch(header->fam_prot) {
949 case PP2_INET_STREAM:
950 case PP2_INET_DGRAM:
951 {
952 struct sockaddr_in* addr =
953 (struct sockaddr_in*)&rep->client_addr;
954 addr->sin_family = AF_INET;
955 addr->sin_addr.s_addr = header->addr.addr4.src_addr;
956 addr->sin_port = header->addr.addr4.src_port;
957 rep->client_addrlen = (socklen_t)sizeof(struct sockaddr_in);
958 }
959 /* Ignore the destination address; it should be us. */
960 break;
961 case PP2_INET6_STREAM:
962 case PP2_INET6_DGRAM:
963 {
964 struct sockaddr_in6* addr =
965 (struct sockaddr_in6*)&rep->client_addr;
966 memset(addr, 0, sizeof(*addr));
967 addr->sin6_family = AF_INET6;
968 memcpy(&addr->sin6_addr,
969 header->addr.addr6.src_addr, 16);
970 addr->sin6_port = header->addr.addr6.src_port;
971 rep->client_addrlen = (socklen_t)sizeof(struct sockaddr_in6);
972 }
973 /* Ignore the destination address; it should be us. */
974 break;
975 default:
976 log_err("proxy_protocol: unsupported family and "
977 "protocol 0x%x", (int)header->fam_prot);
978 return 0;
979 }
980 rep->is_proxied = 1;
981 done:
982 if(!stream) {
983 /* We are reading a whole packet;
984 * Move the rest of the data to overwrite the PROXYv2 header */
985 /* XXX can we do better to avoid memmove? */
986 memmove(header, ((char*)header)+size,
987 sldns_buffer_limit(buf)-size);
988 sldns_buffer_set_limit(buf, sldns_buffer_limit(buf)-size);
989 }
990 return 1;
991 }
992
993 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
994 void
995 comm_point_udp_ancil_callback(int fd, short event, void* arg)
996 {
997 struct comm_reply rep;
998 struct msghdr msg;
999 struct iovec iov[1];
1000 ssize_t rcv;
1001 union {
1002 struct cmsghdr hdr;
1003 char buf[256];
1004 } ancil;
1005 int i;
1006 #ifndef S_SPLINT_S
1007 struct cmsghdr* cmsg;
1008 #endif /* S_SPLINT_S */
1009 #ifdef HAVE_LINUX_NET_TSTAMP_H
1010 struct timespec *ts;
1011 #endif /* HAVE_LINUX_NET_TSTAMP_H */
1012
1013 rep.c = (struct comm_point*)arg;
1014 log_assert(rep.c->type == comm_udp);
1015
1016 if(!(event&UB_EV_READ))
1017 return;
1018 log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
1019 ub_comm_base_now(rep.c->ev->base);
1020 for(i=0; i<NUM_UDP_PER_SELECT; i++) {
1021 sldns_buffer_clear(rep.c->buffer);
1022 timeval_clear(&rep.c->recv_tv);
1023 rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr);
1024 log_assert(fd != -1);
1025 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
1026 msg.msg_name = &rep.remote_addr;
1027 msg.msg_namelen = (socklen_t)sizeof(rep.remote_addr);
1028 iov[0].iov_base = sldns_buffer_begin(rep.c->buffer);
1029 iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer);
1030 msg.msg_iov = iov;
1031 msg.msg_iovlen = 1;
1032 msg.msg_control = ancil.buf;
1033 #ifndef S_SPLINT_S
1034 msg.msg_controllen = sizeof(ancil.buf);
1035 #endif /* S_SPLINT_S */
1036 msg.msg_flags = 0;
1037 rcv = recvmsg(fd, &msg, MSG_DONTWAIT);
1038 if(rcv == -1) {
1039 if(errno != EAGAIN && errno != EINTR
1040 && udp_recv_needs_log(errno)) {
1041 log_err("recvmsg failed: %s", strerror(errno));
1042 }
1043 return;
1044 }
1045 rep.remote_addrlen = msg.msg_namelen;
1046 sldns_buffer_skip(rep.c->buffer, rcv);
1047 sldns_buffer_flip(rep.c->buffer);
1048 rep.srctype = 0;
1049 rep.is_proxied = 0;
1050 #ifndef S_SPLINT_S
1051 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
1052 cmsg = CMSG_NXTHDR(&msg, cmsg)) {
1053 if( cmsg->cmsg_level == IPPROTO_IPV6 &&
1054 cmsg->cmsg_type == IPV6_PKTINFO) {
1055 rep.srctype = 6;
1056 memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
1057 sizeof(struct in6_pktinfo));
1058 break;
1059 #ifdef IP_PKTINFO
1060 } else if( cmsg->cmsg_level == IPPROTO_IP &&
1061 cmsg->cmsg_type == IP_PKTINFO) {
1062 rep.srctype = 4;
1063 memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
1064 sizeof(struct in_pktinfo));
1065 break;
1066 #elif defined(IP_RECVDSTADDR)
1067 } else if( cmsg->cmsg_level == IPPROTO_IP &&
1068 cmsg->cmsg_type == IP_RECVDSTADDR) {
1069 rep.srctype = 4;
1070 memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
1071 sizeof(struct in_addr));
1072 break;
1073 #endif /* IP_PKTINFO or IP_RECVDSTADDR */
1074 #ifdef HAVE_LINUX_NET_TSTAMP_H
1075 } else if( cmsg->cmsg_level == SOL_SOCKET &&
1076 cmsg->cmsg_type == SO_TIMESTAMPNS) {
1077 ts = (struct timespec *)CMSG_DATA(cmsg);
1078 TIMESPEC_TO_TIMEVAL(&rep.c->recv_tv, ts);
1079 } else if( cmsg->cmsg_level == SOL_SOCKET &&
1080 cmsg->cmsg_type == SO_TIMESTAMPING) {
1081 ts = (struct timespec *)CMSG_DATA(cmsg);
1082 TIMESPEC_TO_TIMEVAL(&rep.c->recv_tv, ts);
1083 } else if( cmsg->cmsg_level == SOL_SOCKET &&
1084 cmsg->cmsg_type == SO_TIMESTAMP) {
1085 memmove(&rep.c->recv_tv, CMSG_DATA(cmsg), sizeof(struct timeval));
1086 #elif defined(SO_TIMESTAMP) && defined(SCM_TIMESTAMP)
1087 } else if( cmsg->cmsg_level == SOL_SOCKET &&
1088 cmsg->cmsg_type == SCM_TIMESTAMP) {
1089 /* FreeBSD and also Linux. */
1090 memmove(&rep.c->recv_tv, CMSG_DATA(cmsg), sizeof(struct timeval));
1091 #endif /* HAVE_LINUX_NET_TSTAMP_H */
1092 }
1093 }
1094
1095 if(verbosity >= VERB_ALGO && rep.srctype != 0)
1096 p_ancil("receive_udp on interface", &rep);
1097 #endif /* S_SPLINT_S */
1098
1099 if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer,
1100 &rep, 0)) {
1101 log_err("proxy_protocol: could not consume PROXYv2 header");
1102 return;
1103 }
1104 if(!rep.is_proxied) {
1105 rep.client_addrlen = rep.remote_addrlen;
1106 memmove(&rep.client_addr, &rep.remote_addr,
1107 rep.remote_addrlen);
1108 }
1109
1110 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
1111 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
1112 /* send back immediate reply */
1113 struct sldns_buffer *buffer;
1114 #ifdef USE_DNSCRYPT
1115 buffer = rep.c->dnscrypt_buffer;
1116 #else
1117 buffer = rep.c->buffer;
1118 #endif
1119 (void)comm_point_send_udp_msg_if(rep.c, buffer,
1120 (struct sockaddr*)&rep.remote_addr,
1121 rep.remote_addrlen, &rep);
1122 }
1123 if(!rep.c || rep.c->fd == -1) /* commpoint closed */
1124 break;
1125 }
1126 }
1127 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
1128
1129 void
1130 comm_point_udp_callback(int fd, short event, void* arg)
1131 {
1132 struct comm_reply rep;
1133 ssize_t rcv;
1134 int i;
1135 struct sldns_buffer *buffer;
1136
1137 rep.c = (struct comm_point*)arg;
1138 log_assert(rep.c->type == comm_udp);
1139
1140 if(!(event&UB_EV_READ))
1141 return;
1142 log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
1143 ub_comm_base_now(rep.c->ev->base);
1144 for(i=0; i<NUM_UDP_PER_SELECT; i++) {
1145 sldns_buffer_clear(rep.c->buffer);
1146 rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr);
1147 log_assert(fd != -1);
1148 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
1149 rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer),
1150 sldns_buffer_remaining(rep.c->buffer), MSG_DONTWAIT,
1151 (struct sockaddr*)&rep.remote_addr, &rep.remote_addrlen);
1152 if(rcv == -1) {
1153 #ifndef USE_WINSOCK
1154 if(errno != EAGAIN && errno != EINTR
1155 && udp_recv_needs_log(errno))
1156 log_err("recvfrom %d failed: %s",
1157 fd, strerror(errno));
1158 #else
1159 if(WSAGetLastError() != WSAEINPROGRESS &&
1160 WSAGetLastError() != WSAECONNRESET &&
1161 WSAGetLastError()!= WSAEWOULDBLOCK &&
1162 udp_recv_needs_log(WSAGetLastError()))
1163 log_err("recvfrom failed: %s",
1164 wsa_strerror(WSAGetLastError()));
1165 #endif
1166 return;
1167 }
1168 sldns_buffer_skip(rep.c->buffer, rcv);
1169 sldns_buffer_flip(rep.c->buffer);
1170 rep.srctype = 0;
1171 rep.is_proxied = 0;
1172
1173 if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer,
1174 &rep, 0)) {
1175 log_err("proxy_protocol: could not consume PROXYv2 header");
1176 return;
1177 }
1178 if(!rep.is_proxied) {
1179 rep.client_addrlen = rep.remote_addrlen;
1180 memmove(&rep.client_addr, &rep.remote_addr,
1181 rep.remote_addrlen);
1182 }
1183
1184 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
1185 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
1186 /* send back immediate reply */
1187 #ifdef USE_DNSCRYPT
1188 buffer = rep.c->dnscrypt_buffer;
1189 #else
1190 buffer = rep.c->buffer;
1191 #endif
1192 (void)comm_point_send_udp_msg(rep.c, buffer,
1193 (struct sockaddr*)&rep.remote_addr,
1194 rep.remote_addrlen, 0);
1195 }
1196 if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for
1197 another UDP port. Note rep.c cannot be reused with TCP fd. */
1198 break;
1199 }
1200 }
1201
1202 #ifdef HAVE_NGTCP2
1203 void
1204 doq_pkt_addr_init(struct doq_pkt_addr* paddr)
1205 {
1206 paddr->addrlen = (socklen_t)sizeof(paddr->addr);
1207 paddr->localaddrlen = (socklen_t)sizeof(paddr->localaddr);
1208 paddr->ifindex = 0;
1209 }
1210
1211 /** set the ecn on the transmission */
1212 static void
1213 doq_set_ecn(int fd, int family, uint32_t ecn)
1214 {
1215 unsigned int val = ecn;
1216 if(family == AF_INET6) {
1217 if(setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, &val,
1218 (socklen_t)sizeof(val)) == -1) {
1219 log_err("setsockopt(.. IPV6_TCLASS ..): %s",
1220 strerror(errno));
1221 }
1222 return;
1223 }
1224 if(setsockopt(fd, IPPROTO_IP, IP_TOS, &val,
1225 (socklen_t)sizeof(val)) == -1) {
1226 log_err("setsockopt(.. IP_TOS ..): %s",
1227 strerror(errno));
1228 }
1229 }
1230
1231 /** set the local address in the control ancillary data */
1232 static void
1233 doq_set_localaddr_cmsg(struct msghdr* msg, size_t control_size,
1234 struct doq_addr_storage* localaddr, socklen_t localaddrlen,
1235 int ifindex)
1236 {
1237 #ifndef S_SPLINT_S
1238 struct cmsghdr* cmsg;
1239 #endif /* S_SPLINT_S */
1240 #ifndef S_SPLINT_S
1241 cmsg = CMSG_FIRSTHDR(msg);
1242 if(localaddr->sockaddr.in.sin_family == AF_INET) {
1243 #ifdef IP_PKTINFO
1244 struct sockaddr_in* sa = (struct sockaddr_in*)localaddr;
1245 struct in_pktinfo v4info;
1246 log_assert(localaddrlen >= sizeof(struct sockaddr_in));
1247 msg->msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
1248 memset(msg->msg_control, 0, msg->msg_controllen);
1249 log_assert(msg->msg_controllen <= control_size);
1250 cmsg->cmsg_level = IPPROTO_IP;
1251 cmsg->cmsg_type = IP_PKTINFO;
1252 memset(&v4info, 0, sizeof(v4info));
1253 # ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
1254 memmove(&v4info.ipi_spec_dst, &sa->sin_addr,
1255 sizeof(struct in_addr));
1256 # else
1257 memmove(&v4info.ipi_addr, &sa->sin_addr,
1258 sizeof(struct in_addr));
1259 # endif
1260 v4info.ipi_ifindex = ifindex;
1261 memmove(CMSG_DATA(cmsg), &v4info, sizeof(struct in_pktinfo));
1262 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
1263 #elif defined(IP_SENDSRCADDR)
1264 struct sockaddr_in* sa= (struct sockaddr_in*)localaddr;
1265 log_assert(localaddrlen >= sizeof(struct sockaddr_in));
1266 msg->msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
1267 memset(msg->msg_control, 0, msg->msg_controllen);
1268 log_assert(msg->msg_controllen <= control_size);
1269 cmsg->cmsg_level = IPPROTO_IP;
1270 cmsg->cmsg_type = IP_SENDSRCADDR;
1271 memmove(CMSG_DATA(cmsg), &sa->sin_addr,
1272 sizeof(struct in_addr));
1273 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
1274 #endif
1275 } else {
1276 struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr;
1277 struct in6_pktinfo v6info;
1278 log_assert(localaddrlen >= sizeof(struct sockaddr_in6));
1279 msg->msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
1280 memset(msg->msg_control, 0, msg->msg_controllen);
1281 log_assert(msg->msg_controllen <= control_size);
1282 cmsg->cmsg_level = IPPROTO_IPV6;
1283 cmsg->cmsg_type = IPV6_PKTINFO;
1284 memset(&v6info, 0, sizeof(v6info));
1285 memmove(&v6info.ipi6_addr, &sa6->sin6_addr,
1286 sizeof(struct in6_addr));
1287 v6info.ipi6_ifindex = ifindex;
1288 memmove(CMSG_DATA(cmsg), &v6info, sizeof(struct in6_pktinfo));
1289 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
1290 }
1291 #endif /* S_SPLINT_S */
1292 /* Ignore unused variables, if no assertions are compiled. */
1293 (void)localaddrlen;
1294 (void)control_size;
1295 }
1296
1297 /** write address and port into strings */
1298 static int
1299 doq_print_addr_port(struct doq_addr_storage* addr, socklen_t addrlen,
1300 char* host, size_t hostlen, char* port, size_t portlen)
1301 {
1302 if(addr->sockaddr.in.sin_family == AF_INET) {
1303 struct sockaddr_in* sa = (struct sockaddr_in*)addr;
1304 log_assert(addrlen >= sizeof(*sa));
1305 if(inet_ntop(sa->sin_family, &sa->sin_addr, host,
1306 (socklen_t)hostlen) == 0) {
1307 log_hex("inet_ntop error: address", &sa->sin_addr,
1308 sizeof(sa->sin_addr));
1309 return 0;
1310 }
1311 snprintf(port, portlen, "%u", (unsigned)ntohs(sa->sin_port));
1312 } else if(addr->sockaddr.in.sin_family == AF_INET6) {
1313 struct sockaddr_in6* sa6 = (struct sockaddr_in6*)addr;
1314 log_assert(addrlen >= sizeof(*sa6));
1315 if(inet_ntop(sa6->sin6_family, &sa6->sin6_addr, host,
1316 (socklen_t)hostlen) == 0) {
1317 log_hex("inet_ntop error: address", &sa6->sin6_addr,
1318 sizeof(sa6->sin6_addr));
1319 return 0;
1320 }
1321 snprintf(port, portlen, "%u", (unsigned)ntohs(sa6->sin6_port));
1322 }
1323 return 1;
1324 }
1325
1326 /** doq store the blocked packet when write has blocked */
1327 static void
1328 doq_store_blocked_pkt(struct comm_point* c, struct doq_pkt_addr* paddr,
1329 uint32_t ecn)
1330 {
1331 if(c->doq_socket->have_blocked_pkt)
1332 return; /* should not happen that we write when there is
1333 already a blocked write, but if so, drop it. */
1334 if(sldns_buffer_limit(c->doq_socket->pkt_buf) >
1335 sldns_buffer_capacity(c->doq_socket->blocked_pkt))
1336 return; /* impossibly large, drop packet. impossible because
1337 pkt_buf and blocked_pkt are the same size. */
1338 c->doq_socket->have_blocked_pkt = 1;
1339 c->doq_socket->blocked_pkt_pi.ecn = ecn;
1340 memcpy(c->doq_socket->blocked_paddr, paddr,
1341 sizeof(*c->doq_socket->blocked_paddr));
1342 sldns_buffer_clear(c->doq_socket->blocked_pkt);
1343 sldns_buffer_write(c->doq_socket->blocked_pkt,
1344 sldns_buffer_begin(c->doq_socket->pkt_buf),
1345 sldns_buffer_limit(c->doq_socket->pkt_buf));
1346 sldns_buffer_flip(c->doq_socket->blocked_pkt);
1347 }
1348
1349 void
1350 doq_send_pkt(struct comm_point* c, struct doq_pkt_addr* paddr, uint32_t ecn)
1351 {
1352 struct msghdr msg;
1353 struct iovec iov[1];
1354 union {
1355 struct cmsghdr hdr;
1356 char buf[256];
1357 } control;
1358 ssize_t ret;
1359 iov[0].iov_base = sldns_buffer_begin(c->doq_socket->pkt_buf);
1360 iov[0].iov_len = sldns_buffer_limit(c->doq_socket->pkt_buf);
1361 memset(&msg, 0, sizeof(msg));
1362 msg.msg_name = (void*)&paddr->addr;
1363 msg.msg_namelen = paddr->addrlen;
1364 msg.msg_iov = iov;
1365 msg.msg_iovlen = 1;
1366 msg.msg_control = control.buf;
1367 #ifndef S_SPLINT_S
1368 msg.msg_controllen = sizeof(control.buf);
1369 #endif /* S_SPLINT_S */
1370 msg.msg_flags = 0;
1371
1372 doq_set_localaddr_cmsg(&msg, sizeof(control.buf), &paddr->localaddr,
1373 paddr->localaddrlen, paddr->ifindex);
1374 doq_set_ecn(c->fd, paddr->addr.sockaddr.in.sin_family, ecn);
1375
1376 for(;;) {
1377 ret = sendmsg(c->fd, &msg, MSG_DONTWAIT);
1378 if(ret == -1 && errno == EINTR)
1379 continue;
1380 break;
1381 }
1382 if(ret == -1) {
1383 #ifndef USE_WINSOCK
1384 if(errno == EAGAIN ||
1385 # ifdef EWOULDBLOCK
1386 errno == EWOULDBLOCK ||
1387 # endif
1388 errno == ENOBUFS)
1389 #else
1390 if(WSAGetLastError() == WSAEINPROGRESS ||
1391 WSAGetLastError() == WSAENOBUFS ||
1392 WSAGetLastError() == WSAEWOULDBLOCK)
1393 #endif
1394 {
1395 /* udp send has blocked */
1396 doq_store_blocked_pkt(c, paddr, ecn);
1397 return;
1398 }
1399 if(!udp_send_errno_needs_log((void*)&paddr->addr,
1400 paddr->addrlen))
1401 return;
1402 if(verbosity >= VERB_OPS) {
1403 char host[256], port[32];
1404 if(doq_print_addr_port(&paddr->addr, paddr->addrlen,
1405 host, sizeof(host), port, sizeof(port))) {
1406 verbose(VERB_OPS, "doq sendmsg to %s %s "
1407 "failed: %s", host, port,
1408 strerror(errno));
1409 } else {
1410 verbose(VERB_OPS, "doq sendmsg failed: %s",
1411 strerror(errno));
1412 }
1413 }
1414 return;
1415 } else if(ret != (ssize_t)sldns_buffer_limit(c->doq_socket->pkt_buf)) {
1416 char host[256], port[32];
1417 if(doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
1418 sizeof(host), port, sizeof(port))) {
1419 log_err("doq sendmsg to %s %s failed: "
1420 "sent %d in place of %d bytes",
1421 host, port, (int)ret,
1422 (int)sldns_buffer_limit(c->doq_socket->pkt_buf));
1423 } else {
1424 log_err("doq sendmsg failed: "
1425 "sent %d in place of %d bytes",
1426 (int)ret, (int)sldns_buffer_limit(c->doq_socket->pkt_buf));
1427 }
1428 return;
1429 }
1430 }
1431
1432 /** fetch port number */
1433 static int
1434 doq_sockaddr_get_port(struct doq_addr_storage* addr)
1435 {
1436 if(addr->sockaddr.in.sin_family == AF_INET) {
1437 struct sockaddr_in* sa = (struct sockaddr_in*)addr;
1438 return ntohs(sa->sin_port);
1439 } else if(addr->sockaddr.in.sin_family == AF_INET6) {
1440 struct sockaddr_in6* sa6 = (struct sockaddr_in6*)addr;
1441 return ntohs(sa6->sin6_port);
1442 }
1443 return 0;
1444 }
1445
1446 /** get local address from ancillary data headers */
1447 static int
1448 doq_get_localaddr_cmsg(struct comm_point* c, struct doq_pkt_addr* paddr,
1449 int* pkt_continue, struct msghdr* msg)
1450 {
1451 #ifndef S_SPLINT_S
1452 struct cmsghdr* cmsg;
1453 #endif /* S_SPLINT_S */
1454
1455 memset(&paddr->localaddr, 0, sizeof(paddr->localaddr));
1456 #ifndef S_SPLINT_S
1457 for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
1458 cmsg = CMSG_NXTHDR(msg, cmsg)) {
1459 if( cmsg->cmsg_level == IPPROTO_IPV6 &&
1460 cmsg->cmsg_type == IPV6_PKTINFO) {
1461 struct in6_pktinfo* v6info =
1462 (struct in6_pktinfo*)CMSG_DATA(cmsg);
1463 struct sockaddr_in6* sa= (struct sockaddr_in6*)
1464 &paddr->localaddr;
1465 struct sockaddr_in6* rema = (struct sockaddr_in6*)
1466 &paddr->addr;
1467 if(rema->sin6_family != AF_INET6) {
1468 log_err("doq cmsg family mismatch cmsg is ip6");
1469 *pkt_continue = 1;
1470 return 0;
1471 }
1472 sa->sin6_family = AF_INET6;
1473 sa->sin6_port = htons(doq_sockaddr_get_port(
1474 (void*)c->socket->addr));
1475 paddr->ifindex = v6info->ipi6_ifindex;
1476 memmove(&sa->sin6_addr, &v6info->ipi6_addr,
1477 sizeof(struct in6_addr));
1478 paddr->localaddrlen = sizeof(struct sockaddr_in6);
1479 break;
1480 #ifdef IP_PKTINFO
1481 } else if( cmsg->cmsg_level == IPPROTO_IP &&
1482 cmsg->cmsg_type == IP_PKTINFO) {
1483 struct in_pktinfo* v4info =
1484 (struct in_pktinfo*)CMSG_DATA(cmsg);
1485 struct sockaddr_in* sa= (struct sockaddr_in*)
1486 &paddr->localaddr;
1487 struct sockaddr_in* rema = (struct sockaddr_in*)
1488 &paddr->addr;
1489 if(rema->sin_family != AF_INET) {
1490 log_err("doq cmsg family mismatch cmsg is ip4");
1491 *pkt_continue = 1;
1492 return 0;
1493 }
1494 sa->sin_family = AF_INET;
1495 sa->sin_port = htons(doq_sockaddr_get_port(
1496 (void*)c->socket->addr));
1497 paddr->ifindex = v4info->ipi_ifindex;
1498 memmove(&sa->sin_addr, &v4info->ipi_addr,
1499 sizeof(struct in_addr));
1500 paddr->localaddrlen = sizeof(struct sockaddr_in);
1501 break;
1502 #elif defined(IP_RECVDSTADDR)
1503 } else if( cmsg->cmsg_level == IPPROTO_IP &&
1504 cmsg->cmsg_type == IP_RECVDSTADDR) {
1505 struct sockaddr_in* sa= (struct sockaddr_in*)
1506 &paddr->localaddr;
1507 struct sockaddr_in* rema = (struct sockaddr_in*)
1508 &paddr->addr;
1509 if(rema->sin_family != AF_INET) {
1510 log_err("doq cmsg family mismatch cmsg is ip4");
1511 *pkt_continue = 1;
1512 return 0;
1513 }
1514 sa->sin_family = AF_INET;
1515 sa->sin_port = htons(doq_sockaddr_get_port(
1516 (void*)c->socket->addr));
1517 paddr->ifindex = 0;
1518 memmove(&sa.sin_addr, CMSG_DATA(cmsg),
1519 sizeof(struct in_addr));
1520 paddr->localaddrlen = sizeof(struct sockaddr_in);
1521 break;
1522 #endif /* IP_PKTINFO or IP_RECVDSTADDR */
1523 }
1524 }
1525 #endif /* S_SPLINT_S */
1526
1527 return 1;
1528 }
1529
1530 /** get packet ecn information */
1531 static uint32_t
1532 msghdr_get_ecn(struct msghdr* msg, int family)
1533 {
1534 #ifndef S_SPLINT_S
1535 struct cmsghdr* cmsg;
1536 if(family == AF_INET6) {
1537 for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
1538 cmsg = CMSG_NXTHDR(msg, cmsg)) {
1539 if(cmsg->cmsg_level == IPPROTO_IPV6 &&
1540 cmsg->cmsg_type == IPV6_TCLASS &&
1541 cmsg->cmsg_len != 0) {
1542 uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg);
1543 return *ecn;
1544 }
1545 }
1546 return 0;
1547 }
1548 for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
1549 cmsg = CMSG_NXTHDR(msg, cmsg)) {
1550 if(cmsg->cmsg_level == IPPROTO_IP &&
1551 cmsg->cmsg_type == IP_TOS &&
1552 cmsg->cmsg_len != 0) {
1553 uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg);
1554 return *ecn;
1555 }
1556 }
1557 #endif /* S_SPLINT_S */
1558 return 0;
1559 }
1560
1561 /** receive packet for DoQ on UDP. get ancillary data for addresses,
1562 * return false if failed and the callback can stop receiving UDP packets
1563 * if pkt_continue is false. */
1564 static int
1565 doq_recv(struct comm_point* c, struct doq_pkt_addr* paddr, int* pkt_continue,
1566 struct ngtcp2_pkt_info* pi)
1567 {
1568 struct msghdr msg;
1569 struct iovec iov[1];
1570 ssize_t rcv;
1571 union {
1572 struct cmsghdr hdr;
1573 char buf[256];
1574 } ancil;
1575
1576 msg.msg_name = &paddr->addr;
1577 msg.msg_namelen = (socklen_t)sizeof(paddr->addr);
1578 iov[0].iov_base = sldns_buffer_begin(c->doq_socket->pkt_buf);
1579 iov[0].iov_len = sldns_buffer_remaining(c->doq_socket->pkt_buf);
1580 msg.msg_iov = iov;
1581 msg.msg_iovlen = 1;
1582 msg.msg_control = ancil.buf;
1583 #ifndef S_SPLINT_S
1584 msg.msg_controllen = sizeof(ancil.buf);
1585 #endif /* S_SPLINT_S */
1586 msg.msg_flags = 0;
1587
1588 rcv = recvmsg(c->fd, &msg, MSG_DONTWAIT);
1589 if(rcv == -1) {
1590 if(errno != EAGAIN && errno != EINTR
1591 && udp_recv_needs_log(errno)) {
1592 log_err("recvmsg failed for doq: %s", strerror(errno));
1593 }
1594 *pkt_continue = 0;
1595 return 0;
1596 }
1597
1598 paddr->addrlen = msg.msg_namelen;
1599 sldns_buffer_skip(c->doq_socket->pkt_buf, rcv);
1600 sldns_buffer_flip(c->doq_socket->pkt_buf);
1601 if(!doq_get_localaddr_cmsg(c, paddr, pkt_continue, &msg))
1602 return 0;
1603 pi->ecn = msghdr_get_ecn(&msg, paddr->addr.sockaddr.in.sin_family);
1604 return 1;
1605 }
1606
1607 /** send the version negotiation for doq. scid and dcid are flipped around
1608 * to send back to the client. */
1609 static void
1610 doq_send_version_negotiation(struct comm_point* c, struct doq_pkt_addr* paddr,
1611 const uint8_t* dcid, size_t dcidlen, const uint8_t* scid,
1612 size_t scidlen)
1613 {
1614 uint32_t versions[2];
1615 size_t versions_len = 0;
1616 ngtcp2_ssize ret;
1617 uint8_t unused_random;
1618
1619 /* fill the array with supported versions */
1620 versions[0] = NGTCP2_PROTO_VER_V1;
1621 versions_len = 1;
1622 unused_random = ub_random_max(c->doq_socket->rnd, 256);
1623 sldns_buffer_clear(c->doq_socket->pkt_buf);
1624 ret = ngtcp2_pkt_write_version_negotiation(
1625 sldns_buffer_begin(c->doq_socket->pkt_buf),
1626 sldns_buffer_capacity(c->doq_socket->pkt_buf), unused_random,
1627 dcid, dcidlen, scid, scidlen, versions, versions_len);
1628 if(ret < 0) {
1629 log_err("ngtcp2_pkt_write_version_negotiation failed: %s",
1630 ngtcp2_strerror(ret));
1631 return;
1632 }
1633 sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
1634 sldns_buffer_flip(c->doq_socket->pkt_buf);
1635 doq_send_pkt(c, paddr, 0);
1636 }
1637
1638 /** Find the doq_conn object by remote address and dcid */
1639 static struct doq_conn*
1640 doq_conn_find(struct doq_table* table, struct doq_addr_storage* addr,
1641 socklen_t addrlen, struct doq_addr_storage* localaddr,
1642 socklen_t localaddrlen, int ifindex, const uint8_t* dcid,
1643 size_t dcidlen)
1644 {
1645 struct rbnode_type* node;
1646 struct doq_conn key;
1647 memset(&key.node, 0, sizeof(key.node));
1648 key.node.key = &key;
1649 memmove(&key.key.paddr.addr, addr, addrlen);
1650 key.key.paddr.addrlen = addrlen;
1651 memmove(&key.key.paddr.localaddr, localaddr, localaddrlen);
1652 key.key.paddr.localaddrlen = localaddrlen;
1653 key.key.paddr.ifindex = ifindex;
1654 key.key.dcid = (void*)dcid;
1655 key.key.dcidlen = dcidlen;
1656 node = rbtree_search(table->conn_tree, &key);
1657 if(node)
1658 return (struct doq_conn*)node->key;
1659 return NULL;
1660 }
1661
1662 /** find the doq_con by the connection id */
1663 static struct doq_conn*
1664 doq_conn_find_by_id(struct doq_table* table, const uint8_t* dcid,
1665 size_t dcidlen)
1666 {
1667 struct doq_conid* conid;
1668 lock_rw_rdlock(&table->conid_lock);
1669 conid = doq_conid_find(table, dcid, dcidlen);
1670 if(conid) {
1671 /* make a copy of the key */
1672 struct doq_conn* conn;
1673 struct doq_conn_key key = conid->key;
1674 uint8_t cid[NGTCP2_MAX_CIDLEN];
1675 log_assert(conid->key.dcidlen <= NGTCP2_MAX_CIDLEN);
1676 memcpy(cid, conid->key.dcid, conid->key.dcidlen);
1677 key.dcid = cid;
1678 lock_rw_unlock(&table->conid_lock);
1679
1680 /* now that the conid lock is released, look up the conn */
1681 lock_rw_rdlock(&table->lock);
1682 conn = doq_conn_find(table, &key.paddr.addr,
1683 key.paddr.addrlen, &key.paddr.localaddr,
1684 key.paddr.localaddrlen, key.paddr.ifindex, key.dcid,
1685 key.dcidlen);
1686 if(!conn) {
1687 /* The connection got deleted between the conid lookup
1688 * and the connection lock grab, it no longer exists,
1689 * so return null. */
1690 lock_rw_unlock(&table->lock);
1691 return NULL;
1692 }
1693 lock_basic_lock(&conn->lock);
1694 if(conn->is_deleted) {
1695 lock_rw_unlock(&table->lock);
1696 lock_basic_unlock(&conn->lock);
1697 return NULL;
1698 }
1699 lock_rw_unlock(&table->lock);
1700 return conn;
1701 }
1702 lock_rw_unlock(&table->conid_lock);
1703 return NULL;
1704 }
1705
1706 /** Find the doq_conn, by addr or by connection id */
1707 static struct doq_conn*
1708 doq_conn_find_by_addr_or_cid(struct doq_table* table,
1709 struct doq_pkt_addr* paddr, const uint8_t* dcid, size_t dcidlen)
1710 {
1711 struct doq_conn* conn;
1712 lock_rw_rdlock(&table->lock);
1713 conn = doq_conn_find(table, &paddr->addr, paddr->addrlen,
1714 &paddr->localaddr, paddr->localaddrlen, paddr->ifindex,
1715 dcid, dcidlen);
1716 if(conn && conn->is_deleted) {
1717 conn = NULL;
1718 }
1719 if(conn) {
1720 lock_basic_lock(&conn->lock);
1721 lock_rw_unlock(&table->lock);
1722 verbose(VERB_ALGO, "doq: found connection by address, dcid");
1723 } else {
1724 lock_rw_unlock(&table->lock);
1725 conn = doq_conn_find_by_id(table, dcid, dcidlen);
1726 if(conn) {
1727 verbose(VERB_ALGO, "doq: found connection by dcid");
1728 }
1729 }
1730 return conn;
1731 }
1732
1733 /** decode doq packet header, false on handled or failure, true to continue
1734 * to process the packet */
1735 static int
1736 doq_decode_pkt_header_negotiate(struct comm_point* c,
1737 struct doq_pkt_addr* paddr, struct doq_conn** conn)
1738 {
1739 #ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1740 struct ngtcp2_version_cid vc;
1741 #else
1742 uint32_t version;
1743 const uint8_t *dcid, *scid;
1744 size_t dcidlen, scidlen;
1745 #endif
1746 int rv;
1747
1748 #ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1749 rv = ngtcp2_pkt_decode_version_cid(&vc,
1750 sldns_buffer_begin(c->doq_socket->pkt_buf),
1751 sldns_buffer_limit(c->doq_socket->pkt_buf),
1752 c->doq_socket->sv_scidlen);
1753 #else
1754 rv = ngtcp2_pkt_decode_version_cid(&version, &dcid, &dcidlen,
1755 &scid, &scidlen, sldns_buffer_begin(c->doq_socket->pkt_buf),
1756 sldns_buffer_limit(c->doq_socket->pkt_buf), c->doq_socket->sv_scidlen);
1757 #endif
1758 if(rv != 0) {
1759 if(rv == NGTCP2_ERR_VERSION_NEGOTIATION) {
1760 /* send the version negotiation */
1761 doq_send_version_negotiation(c, paddr,
1762 #ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1763 vc.scid, vc.scidlen, vc.dcid, vc.dcidlen
1764 #else
1765 scid, scidlen, dcid, dcidlen
1766 #endif
1767 );
1768 return 0;
1769 }
1770 verbose(VERB_ALGO, "doq: could not decode version "
1771 "and CID from QUIC packet header: %s",
1772 ngtcp2_strerror(rv));
1773 return 0;
1774 }
1775
1776 if(verbosity >= VERB_ALGO) {
1777 verbose(VERB_ALGO, "ngtcp2_pkt_decode_version_cid packet has "
1778 "QUIC protocol version %u", (unsigned)
1779 #ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1780 vc.
1781 #endif
1782 version
1783 );
1784 log_hex("dcid",
1785 #ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1786 (void*)vc.dcid, vc.dcidlen
1787 #else
1788 (void*)dcid, dcidlen
1789 #endif
1790 );
1791 log_hex("scid",
1792 #ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1793 (void*)vc.scid, vc.scidlen
1794 #else
1795 (void*)scid, scidlen
1796 #endif
1797 );
1798 }
1799 *conn = doq_conn_find_by_addr_or_cid(c->doq_socket->table, paddr,
1800 #ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1801 vc.dcid, vc.dcidlen
1802 #else
1803 dcid, dcidlen
1804 #endif
1805 );
1806 if(*conn)
1807 (*conn)->doq_socket = c->doq_socket;
1808 return 1;
1809 }
1810
1811 /** fill cid structure with random data */
1812 static void doq_cid_randfill(struct ngtcp2_cid* cid, size_t datalen,
1813 struct ub_randstate* rnd)
1814 {
1815 uint8_t buf[32];
1816 if(datalen > sizeof(buf))
1817 datalen = sizeof(buf);
1818 doq_fill_rand(rnd, buf, datalen);
1819 ngtcp2_cid_init(cid, buf, datalen);
1820 }
1821
1822 /** send retry packet for doq connection. */
1823 static void
1824 doq_send_retry(struct comm_point* c, struct doq_pkt_addr* paddr,
1825 struct ngtcp2_pkt_hd* hd)
1826 {
1827 char host[256], port[32];
1828 struct ngtcp2_cid scid;
1829 uint8_t token[NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN];
1830 ngtcp2_tstamp ts;
1831 ngtcp2_ssize tokenlen, ret;
1832
1833 if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
1834 sizeof(host), port, sizeof(port))) {
1835 log_err("doq_send_retry failed");
1836 return;
1837 }
1838 verbose(VERB_ALGO, "doq: sending retry packet to %s %s", host, port);
1839
1840 /* the server chosen source connection ID */
1841 scid.datalen = c->doq_socket->sv_scidlen;
1842 doq_cid_randfill(&scid, scid.datalen, c->doq_socket->rnd);
1843
1844 ts = doq_get_timestamp_nanosec();
1845
1846 tokenlen = ngtcp2_crypto_generate_retry_token(token,
1847 c->doq_socket->static_secret, c->doq_socket->static_secret_len,
1848 hd->version, (void*)&paddr->addr, paddr->addrlen, &scid,
1849 &hd->dcid, ts);
1850 if(tokenlen < 0) {
1851 log_err("ngtcp2_crypto_generate_retry_token failed: %s",
1852 ngtcp2_strerror(tokenlen));
1853 return;
1854 }
1855
1856 sldns_buffer_clear(c->doq_socket->pkt_buf);
1857 ret = ngtcp2_crypto_write_retry(sldns_buffer_begin(c->doq_socket->pkt_buf),
1858 sldns_buffer_capacity(c->doq_socket->pkt_buf), hd->version,
1859 &hd->scid, &scid, &hd->dcid, token, tokenlen);
1860 if(ret < 0) {
1861 log_err("ngtcp2_crypto_write_retry failed: %s",
1862 ngtcp2_strerror(ret));
1863 return;
1864 }
1865 sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
1866 sldns_buffer_flip(c->doq_socket->pkt_buf);
1867 doq_send_pkt(c, paddr, 0);
1868 }
1869
1870 /** doq send stateless connection close */
1871 static void
1872 doq_send_stateless_connection_close(struct comm_point* c,
1873 struct doq_pkt_addr* paddr, struct ngtcp2_pkt_hd* hd,
1874 uint64_t error_code)
1875 {
1876 ngtcp2_ssize ret;
1877 sldns_buffer_clear(c->doq_socket->pkt_buf);
1878 ret = ngtcp2_crypto_write_connection_close(
1879 sldns_buffer_begin(c->doq_socket->pkt_buf),
1880 sldns_buffer_capacity(c->doq_socket->pkt_buf), hd->version, &hd->scid,
1881 &hd->dcid, error_code, NULL, 0);
1882 if(ret < 0) {
1883 log_err("ngtcp2_crypto_write_connection_close failed: %s",
1884 ngtcp2_strerror(ret));
1885 return;
1886 }
1887 sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
1888 sldns_buffer_flip(c->doq_socket->pkt_buf);
1889 doq_send_pkt(c, paddr, 0);
1890 }
1891
1892 /** doq verify retry token, false on failure */
1893 static int
1894 doq_verify_retry_token(struct comm_point* c, struct doq_pkt_addr* paddr,
1895 struct ngtcp2_cid* ocid, struct ngtcp2_pkt_hd* hd)
1896 {
1897 char host[256], port[32];
1898 ngtcp2_tstamp ts;
1899 if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
1900 sizeof(host), port, sizeof(port))) {
1901 log_err("doq_verify_retry_token failed");
1902 return 0;
1903 }
1904 ts = doq_get_timestamp_nanosec();
1905 verbose(VERB_ALGO, "doq: verifying retry token from %s %s", host,
1906 port);
1907 if(ngtcp2_crypto_verify_retry_token(ocid,
1908 #ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
1909 hd->token, hd->tokenlen,
1910 #else
1911 hd->token.base, hd->token.len,
1912 #endif
1913 c->doq_socket->static_secret,
1914 c->doq_socket->static_secret_len, hd->version,
1915 (void*)&paddr->addr, paddr->addrlen, &hd->dcid,
1916 10*NGTCP2_SECONDS, ts) != 0) {
1917 verbose(VERB_ALGO, "doq: could not verify retry token "
1918 "from %s %s", host, port);
1919 return 0;
1920 }
1921 verbose(VERB_ALGO, "doq: verified retry token from %s %s", host, port);
1922 return 1;
1923 }
1924
1925 /** doq verify token, false on failure */
1926 static int
1927 doq_verify_token(struct comm_point* c, struct doq_pkt_addr* paddr,
1928 struct ngtcp2_pkt_hd* hd)
1929 {
1930 char host[256], port[32];
1931 ngtcp2_tstamp ts;
1932 if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
1933 sizeof(host), port, sizeof(port))) {
1934 log_err("doq_verify_token failed");
1935 return 0;
1936 }
1937 ts = doq_get_timestamp_nanosec();
1938 verbose(VERB_ALGO, "doq: verifying token from %s %s", host, port);
1939 if(ngtcp2_crypto_verify_regular_token(
1940 #ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
1941 hd->token, hd->tokenlen,
1942 #else
1943 hd->token.base, hd->token.len,
1944 #endif
1945 c->doq_socket->static_secret, c->doq_socket->static_secret_len,
1946 (void*)&paddr->addr, paddr->addrlen, 3600*NGTCP2_SECONDS,
1947 ts) != 0) {
1948 verbose(VERB_ALGO, "doq: could not verify token from %s %s",
1949 host, port);
1950 return 0;
1951 }
1952 verbose(VERB_ALGO, "doq: verified token from %s %s", host, port);
1953 return 1;
1954 }
1955
1956 /** delete and remove from the lookup tree the doq_conn connection */
1957 static void
1958 doq_delete_connection(struct comm_point* c, struct doq_conn* conn)
1959 {
1960 struct doq_conn copy;
1961 uint8_t cid[NGTCP2_MAX_CIDLEN];
1962 rbnode_type* node;
1963 if(!conn)
1964 return;
1965 /* Copy the key and set it deleted. */
1966 conn->is_deleted = 1;
1967 doq_conn_write_disable(conn);
1968 copy.key = conn->key;
1969 log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN);
1970 memcpy(cid, conn->key.dcid, conn->key.dcidlen);
1971 copy.key.dcid = cid;
1972 copy.node.key = ©
1973 lock_basic_unlock(&conn->lock);
1974
1975 /* Now get the table lock to delete it from the tree */
1976 lock_rw_wrlock(&c->doq_socket->table->lock);
1977 node = rbtree_delete(c->doq_socket->table->conn_tree, copy.node.key);
1978 if(node) {
1979 conn = (struct doq_conn*)node->key;
1980 lock_basic_lock(&conn->lock);
1981 doq_conn_write_list_remove(c->doq_socket->table, conn);
1982 if(conn->timer.timer_in_list) {
1983 /* Remove timer from list first, because finding the
1984 * rbnode element of the setlist of same timeouts
1985 * needs tree lookup. Edit the tree structure after
1986 * that lookup. */
1987 doq_timer_list_remove(c->doq_socket->table,
1988 &conn->timer);
1989 }
1990 if(conn->timer.timer_in_tree)
1991 doq_timer_tree_remove(c->doq_socket->table,
1992 &conn->timer);
1993 }
1994 lock_rw_unlock(&c->doq_socket->table->lock);
1995 if(node) {
1996 lock_basic_unlock(&conn->lock);
1997 doq_table_quic_size_subtract(c->doq_socket->table,
1998 sizeof(*conn)+conn->key.dcidlen);
1999 doq_conn_delete(conn, c->doq_socket->table);
2000 }
2001 }
2002
2003 /** create and setup a new doq connection, to a new destination, or with
2004 * a new dcid. It has a new set of streams. It is inserted in the lookup tree.
2005 * Returns NULL on failure. */
2006 static struct doq_conn*
2007 doq_setup_new_conn(struct comm_point* c, struct doq_pkt_addr* paddr,
2008 struct ngtcp2_pkt_hd* hd, struct ngtcp2_cid* ocid)
2009 {
2010 struct doq_conn* conn;
2011 if(!doq_table_quic_size_available(c->doq_socket->table,
2012 c->doq_socket->cfg, sizeof(*conn)+hd->dcid.datalen
2013 + sizeof(struct doq_stream)
2014 + 100 /* estimated input query */
2015 + 1200 /* estimated output query */)) {
2016 verbose(VERB_ALGO, "doq: no mem available for new connection");
2017 doq_send_stateless_connection_close(c, paddr, hd,
2018 NGTCP2_CONNECTION_REFUSED);
2019 return NULL;
2020 }
2021 conn = doq_conn_create(c, paddr, hd->dcid.data, hd->dcid.datalen,
2022 hd->version);
2023 if(!conn) {
2024 log_err("doq: could not allocate doq_conn");
2025 return NULL;
2026 }
2027 lock_rw_wrlock(&c->doq_socket->table->lock);
2028 lock_basic_lock(&conn->lock);
2029 if(!rbtree_insert(c->doq_socket->table->conn_tree, &conn->node)) {
2030 lock_rw_unlock(&c->doq_socket->table->lock);
2031 log_err("doq: duplicate connection");
2032 /* conn has no entry in writelist, and no timer yet. */
2033 lock_basic_unlock(&conn->lock);
2034 doq_conn_delete(conn, c->doq_socket->table);
2035 return NULL;
2036 }
2037 lock_rw_unlock(&c->doq_socket->table->lock);
2038 doq_table_quic_size_add(c->doq_socket->table,
2039 sizeof(*conn)+conn->key.dcidlen);
2040 verbose(VERB_ALGO, "doq: created new connection");
2041
2042 /* the scid and dcid switch meaning from the accepted client
2043 * connection to the server connection. The 'source' and 'destination'
2044 * meaning is reversed. */
2045 if(!doq_conn_setup(conn, hd->scid.data, hd->scid.datalen,
2046 (ocid?ocid->data:NULL), (ocid?ocid->datalen:0),
2047 #ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2048 hd->token, hd->tokenlen
2049 #else
2050 hd->token.base, hd->token.len
2051 #endif
2052 )) {
2053 log_err("doq: could not set up connection");
2054 doq_delete_connection(c, conn);
2055 return NULL;
2056 }
2057 return conn;
2058 }
2059
2060 /** perform doq address validation */
2061 static int
2062 doq_address_validation(struct comm_point* c, struct doq_pkt_addr* paddr,
2063 struct ngtcp2_pkt_hd* hd, struct ngtcp2_cid* ocid,
2064 struct ngtcp2_cid** pocid)
2065 {
2066 #ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2067 const uint8_t* token = hd->token;
2068 size_t tokenlen = hd->tokenlen;
2069 #else
2070 const uint8_t* token = hd->token.base;
2071 size_t tokenlen = hd->token.len;
2072 #endif
2073 verbose(VERB_ALGO, "doq stateless address validation");
2074
2075 if(tokenlen == 0 || token == NULL) {
2076 doq_send_retry(c, paddr, hd);
2077 return 0;
2078 }
2079 if(token[0] != NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY &&
2080 hd->dcid.datalen < NGTCP2_MIN_INITIAL_DCIDLEN) {
2081 doq_send_stateless_connection_close(c, paddr, hd,
2082 NGTCP2_INVALID_TOKEN);
2083 return 0;
2084 }
2085 if(token[0] == NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY) {
2086 if(!doq_verify_retry_token(c, paddr, ocid, hd)) {
2087 doq_send_stateless_connection_close(c, paddr, hd,
2088 NGTCP2_INVALID_TOKEN);
2089 return 0;
2090 }
2091 *pocid = ocid;
2092 } else if(token[0] == NGTCP2_CRYPTO_TOKEN_MAGIC_REGULAR) {
2093 if(!doq_verify_token(c, paddr, hd)) {
2094 doq_send_retry(c, paddr, hd);
2095 return 0;
2096 }
2097 #ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2098 hd->token = NULL;
2099 hd->tokenlen = 0;
2100 #else
2101 hd->token.base = NULL;
2102 hd->token.len = 0;
2103 #endif
2104 } else {
2105 verbose(VERB_ALGO, "doq address validation: unrecognised "
2106 "token in hd.token.base with magic byte 0x%2.2x",
2107 (int)token[0]);
2108 if(c->doq_socket->validate_addr) {
2109 doq_send_retry(c, paddr, hd);
2110 return 0;
2111 }
2112 #ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2113 hd->token = NULL;
2114 hd->tokenlen = 0;
2115 #else
2116 hd->token.base = NULL;
2117 hd->token.len = 0;
2118 #endif
2119 }
2120 return 1;
2121 }
2122
2123 /** the doq accept, returns false if no further processing of content */
2124 static int
2125 doq_accept(struct comm_point* c, struct doq_pkt_addr* paddr,
2126 struct doq_conn** conn, struct ngtcp2_pkt_info* pi)
2127 {
2128 int rv;
2129 struct ngtcp2_pkt_hd hd;
2130 struct ngtcp2_cid ocid, *pocid=NULL;
2131 int err_retry;
2132 memset(&hd, 0, sizeof(hd));
2133 rv = ngtcp2_accept(&hd, sldns_buffer_begin(c->doq_socket->pkt_buf),
2134 sldns_buffer_limit(c->doq_socket->pkt_buf));
2135 if(rv != 0) {
2136 if(rv == NGTCP2_ERR_RETRY) {
2137 doq_send_retry(c, paddr, &hd);
2138 return 0;
2139 }
2140 log_err("doq: initial packet failed, ngtcp2_accept failed: %s",
2141 ngtcp2_strerror(rv));
2142 return 0;
2143 }
2144 if(c->doq_socket->validate_addr ||
2145 #ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2146 hd.tokenlen
2147 #else
2148 hd.token.len
2149 #endif
2150 ) {
2151 if(!doq_address_validation(c, paddr, &hd, &ocid, &pocid))
2152 return 0;
2153 }
2154 *conn = doq_setup_new_conn(c, paddr, &hd, pocid);
2155 if(!*conn)
2156 return 0;
2157 (*conn)->doq_socket = c->doq_socket;
2158 if(!doq_conn_recv(c, paddr, *conn, pi, &err_retry, NULL)) {
2159 if(err_retry)
2160 doq_send_retry(c, paddr, &hd);
2161 doq_delete_connection(c, *conn);
2162 *conn = NULL;
2163 return 0;
2164 }
2165 return 1;
2166 }
2167
2168 /** doq pickup a timer to wait for for the worker. If any timer exists. */
2169 static void
2170 doq_pickup_timer(struct comm_point* c)
2171 {
2172 struct doq_timer* t;
2173 struct timeval tv;
2174 int have_time = 0;
2175 memset(&tv, 0, sizeof(tv));
2176
2177 lock_rw_wrlock(&c->doq_socket->table->lock);
2178 RBTREE_FOR(t, struct doq_timer*, c->doq_socket->table->timer_tree) {
2179 if(t->worker_doq_socket == NULL ||
2180 t->worker_doq_socket == c->doq_socket) {
2181 /* pick up this element */
2182 t->worker_doq_socket = c->doq_socket;
2183 have_time = 1;
2184 memcpy(&tv, &t->time, sizeof(tv));
2185 break;
2186 }
2187 }
2188 lock_rw_unlock(&c->doq_socket->table->lock);
2189
2190 if(have_time) {
2191 struct timeval rel;
2192 timeval_subtract(&rel, &tv, c->doq_socket->now_tv);
2193 comm_timer_set(c->doq_socket->timer, &rel);
2194 memcpy(&c->doq_socket->marked_time, &tv,
2195 sizeof(c->doq_socket->marked_time));
2196 verbose(VERB_ALGO, "doq pickup timer at %d.%6.6d in %d.%6.6d",
2197 (int)tv.tv_sec, (int)tv.tv_usec, (int)rel.tv_sec,
2198 (int)rel.tv_usec);
2199 } else {
2200 if(comm_timer_is_set(c->doq_socket->timer))
2201 comm_timer_disable(c->doq_socket->timer);
2202 memset(&c->doq_socket->marked_time, 0,
2203 sizeof(c->doq_socket->marked_time));
2204 verbose(VERB_ALGO, "doq timer disabled");
2205 }
2206 }
2207
2208 /** doq done with connection, release locks and setup timer and write */
2209 static void
2210 doq_done_setup_timer_and_write(struct comm_point* c, struct doq_conn* conn)
2211 {
2212 struct doq_conn copy;
2213 uint8_t cid[NGTCP2_MAX_CIDLEN];
2214 rbnode_type* node;
2215 struct timeval new_tv;
2216 int write_change = 0, timer_change = 0;
2217
2218 /* No longer in callbacks, so the pointer to doq_socket is back
2219 * to NULL. */
2220 conn->doq_socket = NULL;
2221
2222 if(doq_conn_check_timer(conn, &new_tv))
2223 timer_change = 1;
2224 if( (conn->write_interest && !conn->on_write_list) ||
2225 (!conn->write_interest && conn->on_write_list))
2226 write_change = 1;
2227
2228 if(!timer_change && !write_change) {
2229 /* Nothing to do. */
2230 lock_basic_unlock(&conn->lock);
2231 return;
2232 }
2233
2234 /* The table lock is needed to change the write list and timer tree.
2235 * So the connection lock is release and then the connection is
2236 * looked up again. */
2237 copy.key = conn->key;
2238 log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN);
2239 memcpy(cid, conn->key.dcid, conn->key.dcidlen);
2240 copy.key.dcid = cid;
2241 copy.node.key = ©
2242 lock_basic_unlock(&conn->lock);
2243
2244 lock_rw_wrlock(&c->doq_socket->table->lock);
2245 node = rbtree_search(c->doq_socket->table->conn_tree, copy.node.key);
2246 if(!node) {
2247 lock_rw_unlock(&c->doq_socket->table->lock);
2248 /* Must have been deleted in the mean time. */
2249 return;
2250 }
2251 conn = (struct doq_conn*)node->key;
2252 lock_basic_lock(&conn->lock);
2253 if(conn->is_deleted) {
2254 /* It is deleted now. */
2255 lock_rw_unlock(&c->doq_socket->table->lock);
2256 lock_basic_unlock(&conn->lock);
2257 return;
2258 }
2259
2260 if(write_change) {
2261 /* Edit the write lists, we are holding the table.lock and can
2262 * edit the list first,last and also prev,next and on_list
2263 * elements in the doq_conn structures. */
2264 doq_conn_set_write_list(c->doq_socket->table, conn);
2265 }
2266 if(timer_change) {
2267 doq_timer_set(c->doq_socket->table, &conn->timer,
2268 c->doq_socket, &new_tv);
2269 }
2270 lock_rw_unlock(&c->doq_socket->table->lock);
2271 lock_basic_unlock(&conn->lock);
2272 }
2273
2274 /** doq done with connection callbacks, release locks and setup write */
2275 static void
2276 doq_done_with_conn_cb(struct comm_point* c, struct doq_conn* conn)
2277 {
2278 struct doq_conn copy;
2279 uint8_t cid[NGTCP2_MAX_CIDLEN];
2280 rbnode_type* node;
2281
2282 /* no longer in callbacks, so the pointer to doq_socket is back
2283 * to NULL. */
2284 conn->doq_socket = NULL;
2285
2286 if( (conn->write_interest && conn->on_write_list) ||
2287 (!conn->write_interest && !conn->on_write_list)) {
2288 /* The connection already has the required write list
2289 * status. */
2290 lock_basic_unlock(&conn->lock);
2291 return;
2292 }
2293
2294 /* To edit the write list of connections we have to hold the table
2295 * lock, so we release the connection and then look it up again. */
2296 copy.key = conn->key;
2297 log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN);
2298 memcpy(cid, conn->key.dcid, conn->key.dcidlen);
2299 copy.key.dcid = cid;
2300 copy.node.key = ©
2301 lock_basic_unlock(&conn->lock);
2302
2303 lock_rw_wrlock(&c->doq_socket->table->lock);
2304 node = rbtree_search(c->doq_socket->table->conn_tree, copy.node.key);
2305 if(!node) {
2306 lock_rw_unlock(&c->doq_socket->table->lock);
2307 /* must have been deleted in the mean time */
2308 return;
2309 }
2310 conn = (struct doq_conn*)node->key;
2311 lock_basic_lock(&conn->lock);
2312 if(conn->is_deleted) {
2313 /* it is deleted now. */
2314 lock_rw_unlock(&c->doq_socket->table->lock);
2315 lock_basic_unlock(&conn->lock);
2316 return;
2317 }
2318
2319 /* edit the write lists, we are holding the table.lock and can
2320 * edit the list first,last and also prev,next and on_list elements
2321 * in the doq_conn structures. */
2322 doq_conn_set_write_list(c->doq_socket->table, conn);
2323 lock_rw_unlock(&c->doq_socket->table->lock);
2324 lock_basic_unlock(&conn->lock);
2325 }
2326
2327 /** doq count the length of the write list */
2328 static size_t
2329 doq_write_list_length(struct comm_point* c)
2330 {
2331 size_t count = 0;
2332 struct doq_conn* conn;
2333 lock_rw_rdlock(&c->doq_socket->table->lock);
2334 conn = c->doq_socket->table->write_list_first;
2335 while(conn) {
2336 count++;
2337 conn = conn->write_next;
2338 }
2339 lock_rw_unlock(&c->doq_socket->table->lock);
2340 return count;
2341 }
2342
2343 /** doq pop the first element from the write list to have write events */
2344 static struct doq_conn*
2345 doq_pop_write_conn(struct comm_point* c)
2346 {
2347 struct doq_conn* conn;
2348 lock_rw_wrlock(&c->doq_socket->table->lock);
2349 conn = doq_table_pop_first(c->doq_socket->table);
2350 while(conn && conn->is_deleted) {
2351 lock_basic_unlock(&conn->lock);
2352 conn = doq_table_pop_first(c->doq_socket->table);
2353 }
2354 lock_rw_unlock(&c->doq_socket->table->lock);
2355 if(conn)
2356 conn->doq_socket = c->doq_socket;
2357 return conn;
2358 }
2359
2360 /** doq the connection is done with write callbacks, release it. */
2361 static void
2362 doq_done_with_write_cb(struct comm_point* c, struct doq_conn* conn,
2363 int delete_it)
2364 {
2365 if(delete_it) {
2366 doq_delete_connection(c, conn);
2367 return;
2368 }
2369 doq_done_setup_timer_and_write(c, conn);
2370 }
2371
2372 /** see if the doq socket wants to write packets */
2373 static int
2374 doq_socket_want_write(struct comm_point* c)
2375 {
2376 int want_write = 0;
2377 if(c->doq_socket->have_blocked_pkt)
2378 return 1;
2379 lock_rw_rdlock(&c->doq_socket->table->lock);
2380 if(c->doq_socket->table->write_list_first)
2381 want_write = 1;
2382 lock_rw_unlock(&c->doq_socket->table->lock);
2383 return want_write;
2384 }
2385
2386 /** enable write event for the doq server socket fd */
2387 static void
2388 doq_socket_write_enable(struct comm_point* c)
2389 {
2390 verbose(VERB_ALGO, "doq socket want write");
2391 if(c->doq_socket->event_has_write)
2392 return;
2393 comm_point_listen_for_rw(c, 1, 1);
2394 c->doq_socket->event_has_write = 1;
2395 }
2396
2397 /** disable write event for the doq server socket fd */
2398 static void
2399 doq_socket_write_disable(struct comm_point* c)
2400 {
2401 verbose(VERB_ALGO, "doq socket want no write");
2402 if(!c->doq_socket->event_has_write)
2403 return;
2404 comm_point_listen_for_rw(c, 1, 0);
2405 c->doq_socket->event_has_write = 0;
2406 }
2407
2408 /** write blocked packet, if possible. returns false if failed, again. */
2409 static int
2410 doq_write_blocked_pkt(struct comm_point* c)
2411 {
2412 struct doq_pkt_addr paddr;
2413 if(!c->doq_socket->have_blocked_pkt)
2414 return 1;
2415 c->doq_socket->have_blocked_pkt = 0;
2416 if(sldns_buffer_limit(c->doq_socket->blocked_pkt) >
2417 sldns_buffer_remaining(c->doq_socket->pkt_buf))
2418 return 1; /* impossibly large, drop it.
2419 impossible since pkt_buf is same size as blocked_pkt buf. */
2420 sldns_buffer_clear(c->doq_socket->pkt_buf);
2421 sldns_buffer_write(c->doq_socket->pkt_buf,
2422 sldns_buffer_begin(c->doq_socket->blocked_pkt),
2423 sldns_buffer_limit(c->doq_socket->blocked_pkt));
2424 sldns_buffer_flip(c->doq_socket->pkt_buf);
2425 memcpy(&paddr, c->doq_socket->blocked_paddr, sizeof(paddr));
2426 doq_send_pkt(c, &paddr, c->doq_socket->blocked_pkt_pi.ecn);
2427 if(c->doq_socket->have_blocked_pkt)
2428 return 0;
2429 return 1;
2430 }
2431
2432 /** doq find a timer that timeouted and return the conn, locked. */
2433 static struct doq_conn*
2434 doq_timer_timeout_conn(struct doq_server_socket* doq_socket)
2435 {
2436 struct doq_conn* conn = NULL;
2437 struct rbnode_type* node;
2438 lock_rw_wrlock(&doq_socket->table->lock);
2439 node = rbtree_first(doq_socket->table->timer_tree);
2440 if(node && node != RBTREE_NULL) {
2441 struct doq_timer* t = (struct doq_timer*)node;
2442 conn = t->conn;
2443
2444 /* If now < timer then no further timeouts in tree. */
2445 if(timeval_smaller(doq_socket->now_tv, &t->time)) {
2446 lock_rw_unlock(&doq_socket->table->lock);
2447 return NULL;
2448 }
2449
2450 lock_basic_lock(&conn->lock);
2451 conn->doq_socket = doq_socket;
2452
2453 /* Now that the timer is fired, remove it. */
2454 doq_timer_unset(doq_socket->table, t);
2455 lock_rw_unlock(&doq_socket->table->lock);
2456 return conn;
2457 }
2458 lock_rw_unlock(&doq_socket->table->lock);
2459 return NULL;
2460 }
2461
2462 /** doq timer erase the marker that said which timer the worker uses. */
2463 static void
2464 doq_timer_erase_marker(struct doq_server_socket* doq_socket)
2465 {
2466 struct doq_timer* t;
2467 lock_rw_wrlock(&doq_socket->table->lock);
2468 t = doq_timer_find_time(doq_socket->table, &doq_socket->marked_time);
2469 if(t && t->worker_doq_socket == doq_socket)
2470 t->worker_doq_socket = NULL;
2471 lock_rw_unlock(&doq_socket->table->lock);
2472 memset(&doq_socket->marked_time, 0, sizeof(doq_socket->marked_time));
2473 }
2474
2475 void
2476 doq_timer_cb(void* arg)
2477 {
2478 struct doq_server_socket* doq_socket = (struct doq_server_socket*)arg;
2479 struct doq_conn* conn;
2480 verbose(VERB_ALGO, "doq timer callback");
2481
2482 doq_timer_erase_marker(doq_socket);
2483
2484 while((conn = doq_timer_timeout_conn(doq_socket)) != NULL) {
2485 if(conn->is_deleted ||
2486 #ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
2487 ngtcp2_conn_in_closing_period(conn->conn) ||
2488 #else
2489 ngtcp2_conn_is_in_closing_period(conn->conn) ||
2490 #endif
2491 #ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
2492 ngtcp2_conn_in_draining_period(conn->conn)
2493 #else
2494 ngtcp2_conn_is_in_draining_period(conn->conn)
2495 #endif
2496 ) {
2497 if(verbosity >= VERB_ALGO) {
2498 char remotestr[256];
2499 addr_to_str((void*)&conn->key.paddr.addr,
2500 conn->key.paddr.addrlen, remotestr,
2501 sizeof(remotestr));
2502 verbose(VERB_ALGO, "doq conn %s is deleted "
2503 "after timeout", remotestr);
2504 }
2505 doq_delete_connection(doq_socket->cp, conn);
2506 continue;
2507 }
2508 if(!doq_conn_handle_timeout(conn))
2509 doq_delete_connection(doq_socket->cp, conn);
2510 else doq_done_setup_timer_and_write(doq_socket->cp, conn);
2511 }
2512
2513 if(doq_socket_want_write(doq_socket->cp))
2514 doq_socket_write_enable(doq_socket->cp);
2515 else doq_socket_write_disable(doq_socket->cp);
2516 doq_pickup_timer(doq_socket->cp);
2517 }
2518
2519 void
2520 comm_point_doq_callback(int fd, short event, void* arg)
2521 {
2522 struct comm_point* c;
2523 struct doq_pkt_addr paddr;
2524 int i, pkt_continue, err_drop;
2525 struct doq_conn* conn;
2526 struct ngtcp2_pkt_info pi;
2527 size_t count, num_len;
2528
2529 c = (struct comm_point*)arg;
2530 log_assert(c->type == comm_doq);
2531
2532 log_assert(c && c->doq_socket->pkt_buf && c->fd == fd);
2533 ub_comm_base_now(c->ev->base);
2534
2535 /* see if there is a blocked packet, and send that if possible.
2536 * do not attempt to read yet, even if possible, that would just
2537 * push more answers in reply to those read packets onto the list
2538 * of written replies. First attempt to clear the write content out.
2539 * That keeps the memory usage from bloating up. */
2540 if(c->doq_socket->have_blocked_pkt) {
2541 if(!doq_write_blocked_pkt(c)) {
2542 /* this write has also blocked, attempt to write
2543 * later. Make sure the event listens to write
2544 * events. */
2545 if(!c->doq_socket->event_has_write)
2546 doq_socket_write_enable(c);
2547 doq_pickup_timer(c);
2548 return;
2549 }
2550 }
2551
2552 /* see if there is write interest */
2553 count = 0;
2554 num_len = doq_write_list_length(c);
2555 while((conn = doq_pop_write_conn(c)) != NULL) {
2556 if(conn->is_deleted ||
2557 #ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
2558 ngtcp2_conn_in_closing_period(conn->conn) ||
2559 #else
2560 ngtcp2_conn_is_in_closing_period(conn->conn) ||
2561 #endif
2562 #ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
2563 ngtcp2_conn_in_draining_period(conn->conn)
2564 #else
2565 ngtcp2_conn_is_in_draining_period(conn->conn)
2566 #endif
2567 ) {
2568 conn->doq_socket = NULL;
2569 lock_basic_unlock(&conn->lock);
2570 if(c->doq_socket->have_blocked_pkt) {
2571 if(!c->doq_socket->event_has_write)
2572 doq_socket_write_enable(c);
2573 doq_pickup_timer(c);
2574 return;
2575 }
2576 if(++count > num_len*2)
2577 break;
2578 continue;
2579 }
2580 if(verbosity >= VERB_ALGO) {
2581 char remotestr[256];
2582 addr_to_str((void*)&conn->key.paddr.addr,
2583 conn->key.paddr.addrlen, remotestr,
2584 sizeof(remotestr));
2585 verbose(VERB_ALGO, "doq write connection %s %d",
2586 remotestr, doq_sockaddr_get_port(
2587 &conn->key.paddr.addr));
2588 }
2589 if(doq_conn_write_streams(c, conn, &err_drop))
2590 err_drop = 0;
2591 doq_done_with_write_cb(c, conn, err_drop);
2592 if(c->doq_socket->have_blocked_pkt) {
2593 if(!c->doq_socket->event_has_write)
2594 doq_socket_write_enable(c);
2595 doq_pickup_timer(c);
2596 return;
2597 }
2598 /* Stop overly long write lists that are created
2599 * while we are processing. Do those next time there
2600 * is a write callback. Stops long loops, and keeps
2601 * fair for other events. */
2602 if(++count > num_len*2)
2603 break;
2604 }
2605
2606 /* check for data to read */
2607 if((event&UB_EV_READ)!=0)
2608 for(i=0; i<NUM_UDP_PER_SELECT; i++) {
2609 /* there may be a blocked write packet and if so, stop
2610 * reading because the reply cannot get written. The
2611 * blocked packet could be written during the conn_recv
2612 * handling of replies, or for a connection close. */
2613 if(c->doq_socket->have_blocked_pkt) {
2614 if(!c->doq_socket->event_has_write)
2615 doq_socket_write_enable(c);
2616 doq_pickup_timer(c);
2617 return;
2618 }
2619 sldns_buffer_clear(c->doq_socket->pkt_buf);
2620 doq_pkt_addr_init(&paddr);
2621 log_assert(fd != -1);
2622 log_assert(sldns_buffer_remaining(c->doq_socket->pkt_buf) > 0);
2623 if(!doq_recv(c, &paddr, &pkt_continue, &pi)) {
2624 if(pkt_continue)
2625 continue;
2626 break;
2627 }
2628
2629 /* handle incoming packet from remote addr to localaddr */
2630 if(verbosity >= VERB_ALGO) {
2631 char remotestr[256], localstr[256];
2632 addr_to_str((void*)&paddr.addr, paddr.addrlen,
2633 remotestr, sizeof(remotestr));
2634 addr_to_str((void*)&paddr.localaddr,
2635 paddr.localaddrlen, localstr,
2636 sizeof(localstr));
2637 log_info("incoming doq packet from %s port %d on "
2638 "%s port %d ifindex %d",
2639 remotestr, doq_sockaddr_get_port(&paddr.addr),
2640 localstr,
2641 doq_sockaddr_get_port(&paddr.localaddr),
2642 paddr.ifindex);
2643 log_info("doq_recv length %d ecn 0x%x",
2644 (int)sldns_buffer_limit(c->doq_socket->pkt_buf),
2645 (int)pi.ecn);
2646 }
2647
2648 if(sldns_buffer_limit(c->doq_socket->pkt_buf) == 0)
2649 continue;
2650
2651 conn = NULL;
2652 if(!doq_decode_pkt_header_negotiate(c, &paddr, &conn))
2653 continue;
2654 if(!conn) {
2655 if(!doq_accept(c, &paddr, &conn, &pi))
2656 continue;
2657 if(!doq_conn_write_streams(c, conn, NULL)) {
2658 doq_delete_connection(c, conn);
2659 continue;
2660 }
2661 doq_done_setup_timer_and_write(c, conn);
2662 continue;
2663 }
2664 if(
2665 #ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
2666 ngtcp2_conn_in_closing_period(conn->conn)
2667 #else
2668 ngtcp2_conn_is_in_closing_period(conn->conn)
2669 #endif
2670 ) {
2671 if(!doq_conn_send_close(c, conn)) {
2672 doq_delete_connection(c, conn);
2673 } else {
2674 doq_done_setup_timer_and_write(c, conn);
2675 }
2676 continue;
2677 }
2678 if(
2679 #ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
2680 ngtcp2_conn_in_draining_period(conn->conn)
2681 #else
2682 ngtcp2_conn_is_in_draining_period(conn->conn)
2683 #endif
2684 ) {
2685 doq_done_setup_timer_and_write(c, conn);
2686 continue;
2687 }
2688 if(!doq_conn_recv(c, &paddr, conn, &pi, NULL, &err_drop)) {
2689 /* The receive failed, and if it also failed to send
2690 * a close, drop the connection. That means it is not
2691 * in the closing period. */
2692 if(err_drop) {
2693 doq_delete_connection(c, conn);
2694 } else {
2695 doq_done_setup_timer_and_write(c, conn);
2696 }
2697 continue;
2698 }
2699 if(!doq_conn_write_streams(c, conn, &err_drop)) {
2700 if(err_drop) {
2701 doq_delete_connection(c, conn);
2702 } else {
2703 doq_done_setup_timer_and_write(c, conn);
2704 }
2705 continue;
2706 }
2707 doq_done_setup_timer_and_write(c, conn);
2708 }
2709
2710 /* see if we want to have more write events */
2711 verbose(VERB_ALGO, "doq check write enable");
2712 if(doq_socket_want_write(c))
2713 doq_socket_write_enable(c);
2714 else doq_socket_write_disable(c);
2715 doq_pickup_timer(c);
2716 }
2717
2718 /** create new doq server socket structure */
2719 static struct doq_server_socket*
2720 doq_server_socket_create(struct doq_table* table, struct ub_randstate* rnd,
2721 const void* quic_sslctx, struct comm_point* c, struct comm_base* base,
2722 struct config_file* cfg)
2723 {
2724 size_t doq_buffer_size = 4096; /* bytes buffer size, for one packet. */
2725 struct doq_server_socket* doq_socket;
2726 log_assert(table != NULL);
2727 doq_socket = calloc(1, sizeof(*doq_socket));
2728 if(!doq_socket) {
2729 return NULL;
2730 }
2731 doq_socket->table = table;
2732 doq_socket->rnd = rnd;
2733 doq_socket->validate_addr = 1;
2734 /* the doq_socket has its own copy of the static secret, as
2735 * well as other config values, so that they do not need table.lock */
2736 doq_socket->static_secret_len = table->static_secret_len;
2737 doq_socket->static_secret = memdup(table->static_secret,
2738 table->static_secret_len);
2739 if(!doq_socket->static_secret) {
2740 free(doq_socket);
2741 return NULL;
2742 }
2743 doq_socket->ctx = (SSL_CTX*)quic_sslctx;
2744 doq_socket->idle_timeout = table->idle_timeout;
2745 doq_socket->sv_scidlen = table->sv_scidlen;
2746 doq_socket->cp = c;
2747 doq_socket->pkt_buf = sldns_buffer_new(doq_buffer_size);
2748 if(!doq_socket->pkt_buf) {
2749 free(doq_socket->static_secret);
2750 free(doq_socket);
2751 return NULL;
2752 }
2753 doq_socket->blocked_pkt = sldns_buffer_new(
2754 sldns_buffer_capacity(doq_socket->pkt_buf));
2755 if(!doq_socket->pkt_buf) {
2756 free(doq_socket->static_secret);
2757 sldns_buffer_free(doq_socket->pkt_buf);
2758 free(doq_socket);
2759 return NULL;
2760 }
2761 doq_socket->blocked_paddr = calloc(1,
2762 sizeof(*doq_socket->blocked_paddr));
2763 if(!doq_socket->blocked_paddr) {
2764 free(doq_socket->static_secret);
2765 sldns_buffer_free(doq_socket->pkt_buf);
2766 sldns_buffer_free(doq_socket->blocked_pkt);
2767 free(doq_socket);
2768 return NULL;
2769 }
2770 doq_socket->timer = comm_timer_create(base, doq_timer_cb, doq_socket);
2771 if(!doq_socket->timer) {
2772 free(doq_socket->static_secret);
2773 sldns_buffer_free(doq_socket->pkt_buf);
2774 sldns_buffer_free(doq_socket->blocked_pkt);
2775 free(doq_socket->blocked_paddr);
2776 free(doq_socket);
2777 return NULL;
2778 }
2779 memset(&doq_socket->marked_time, 0, sizeof(doq_socket->marked_time));
2780 comm_base_timept(base, &doq_socket->now_tt, &doq_socket->now_tv);
2781 doq_socket->cfg = cfg;
2782 return doq_socket;
2783 }
2784
2785 /** delete doq server socket structure */
2786 static void
2787 doq_server_socket_delete(struct doq_server_socket* doq_socket)
2788 {
2789 if(!doq_socket)
2790 return;
2791 free(doq_socket->static_secret);
2792 #ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
2793 free(doq_socket->quic_method);
2794 #endif
2795 sldns_buffer_free(doq_socket->pkt_buf);
2796 sldns_buffer_free(doq_socket->blocked_pkt);
2797 free(doq_socket->blocked_paddr);
2798 comm_timer_delete(doq_socket->timer);
2799 free(doq_socket);
2800 }
2801
2802 /** find repinfo in the doq table */
2803 static struct doq_conn*
2804 doq_lookup_repinfo(struct doq_table* table, struct comm_reply* repinfo)
2805 {
2806 struct doq_conn* conn;
2807 struct doq_conn_key key;
2808 log_assert(table != NULL);
2809 doq_conn_key_from_repinfo(&key, repinfo);
2810 lock_rw_rdlock(&table->lock);
2811 conn = doq_conn_find(table, &key.paddr.addr,
2812 key.paddr.addrlen, &key.paddr.localaddr,
2813 key.paddr.localaddrlen, key.paddr.ifindex, key.dcid,
2814 key.dcidlen);
2815 if(conn) {
2816 lock_basic_lock(&conn->lock);
2817 lock_rw_unlock(&table->lock);
2818 return conn;
2819 }
2820 lock_rw_unlock(&table->lock);
2821 return NULL;
2822 }
2823
2824 /** doq find connection and stream. From inside callbacks from worker. */
2825 static int
2826 doq_lookup_conn_stream(struct comm_reply* repinfo, struct comm_point* c,
2827 struct doq_conn** conn, struct doq_stream** stream)
2828 {
2829 log_assert(c->doq_socket);
2830 if(c->doq_socket->current_conn) {
2831 *conn = c->doq_socket->current_conn;
2832 } else {
2833 *conn = doq_lookup_repinfo(c->doq_socket->table, repinfo);
2834 if((*conn) && (*conn)->is_deleted) {
2835 lock_basic_unlock(&(*conn)->lock);
2836 *conn = NULL;
2837 }
2838 if(*conn) {
2839 (*conn)->doq_socket = c->doq_socket;
2840 }
2841 }
2842 if(!*conn) {
2843 *stream = NULL;
2844 return 0;
2845 }
2846 *stream = doq_stream_find(*conn, repinfo->doq_streamid);
2847 if(!*stream) {
2848 if(!c->doq_socket->current_conn) {
2849 /* Not inside callbacks, we have our own lock on conn.
2850 * Release it. */
2851 lock_basic_unlock(&(*conn)->lock);
2852 }
2853 return 0;
2854 }
2855 if((*stream)->is_closed) {
2856 /* stream is closed, ignore reply or drop */
2857 if(!c->doq_socket->current_conn) {
2858 /* Not inside callbacks, we have our own lock on conn.
2859 * Release it. */
2860 lock_basic_unlock(&(*conn)->lock);
2861 }
2862 return 0;
2863 }
2864 return 1;
2865 }
2866
2867 /** doq send a reply from a comm reply */
2868 static void
2869 doq_socket_send_reply(struct comm_reply* repinfo)
2870 {
2871 struct doq_conn* conn;
2872 struct doq_stream* stream;
2873 log_assert(repinfo->c->type == comm_doq);
2874 if(!doq_lookup_conn_stream(repinfo, repinfo->c, &conn, &stream)) {
2875 verbose(VERB_ALGO, "doq: send_reply but %s is gone",
2876 (conn?"stream":"connection"));
2877 /* No stream, it may have been closed. */
2878 /* Drop the reply, it cannot be sent. */
2879 return;
2880 }
2881 if(!doq_stream_send_reply(conn, stream, repinfo->c->buffer))
2882 doq_stream_close(conn, stream, 1);
2883 if(!repinfo->c->doq_socket->current_conn) {
2884 /* Not inside callbacks, we have our own lock on conn.
2885 * Release it. */
2886 doq_done_with_conn_cb(repinfo->c, conn);
2887 /* since we sent a reply, or closed it, the assumption is
2888 * that there is something to write, so enable write event.
2889 * It waits until the write event happens to write the
2890 * streams with answers, this allows some answers to be
2891 * answered before the event loop reaches the doq fd, in
2892 * repinfo->c->fd, and that collates answers. That would
2893 * not happen if we write doq packets right now. */
2894 doq_socket_write_enable(repinfo->c);
2895 }
2896 }
2897
2898 /** doq drop a reply from a comm reply */
2899 static void
2900 doq_socket_drop_reply(struct comm_reply* repinfo)
2901 {
2902 struct doq_conn* conn;
2903 struct doq_stream* stream;
2904 log_assert(repinfo->c->type == comm_doq);
2905 if(!doq_lookup_conn_stream(repinfo, repinfo->c, &conn, &stream)) {
2906 verbose(VERB_ALGO, "doq: drop_reply but %s is gone",
2907 (conn?"stream":"connection"));
2908 /* The connection or stream is already gone. */
2909 return;
2910 }
2911 doq_stream_close(conn, stream, 1);
2912 if(!repinfo->c->doq_socket->current_conn) {
2913 /* Not inside callbacks, we have our own lock on conn.
2914 * Release it. */
2915 doq_done_with_conn_cb(repinfo->c, conn);
2916 doq_socket_write_enable(repinfo->c);
2917 }
2918 }
2919 #endif /* HAVE_NGTCP2 */
2920
2921 int adjusted_tcp_timeout(struct comm_point* c)
2922 {
2923 if(c->tcp_timeout_msec < TCP_QUERY_TIMEOUT_MINIMUM)
2924 return TCP_QUERY_TIMEOUT_MINIMUM;
2925 return c->tcp_timeout_msec;
2926 }
2927
2928 /** Use a new tcp handler for new query fd, set to read query */
2929 static void
2930 setup_tcp_handler(struct comm_point* c, int fd, int cur, int max)
2931 {
2932 int handler_usage;
2933 log_assert(c->type == comm_tcp || c->type == comm_http);
2934 log_assert(c->fd == -1);
2935 sldns_buffer_clear(c->buffer);
2936 #ifdef USE_DNSCRYPT
2937 if (c->dnscrypt)
2938 sldns_buffer_clear(c->dnscrypt_buffer);
2939 #endif
2940 c->tcp_is_reading = 1;
2941 c->tcp_byte_count = 0;
2942 c->tcp_keepalive = 0;
2943 /* if more than half the tcp handlers are in use, use a shorter
2944 * timeout for this TCP connection, we need to make space for
2945 * other connections to be able to get attention */
2946 /* If > 50% TCP handler structures in use, set timeout to 1/100th
2947 * configured value.
2948 * If > 65%TCP handler structures in use, set to 1/500th configured
2949 * value.
2950 * If > 80% TCP handler structures in use, set to 0.
2951 *
2952 * If the timeout to use falls below 200 milliseconds, an actual
2953 * timeout of 200ms is used.
2954 */
2955 handler_usage = (cur * 100) / max;
2956 if(handler_usage > 50 && handler_usage <= 65)
2957 c->tcp_timeout_msec /= 100;
2958 else if (handler_usage > 65 && handler_usage <= 80)
2959 c->tcp_timeout_msec /= 500;
2960 else if (handler_usage > 80)
2961 c->tcp_timeout_msec = 0;
2962 comm_point_start_listening(c, fd, adjusted_tcp_timeout(c));
2963 }
2964
2965 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
2966 short ATTR_UNUSED(event), void* arg)
2967 {
2968 struct comm_base* b = (struct comm_base*)arg;
2969 /* timeout for the slow accept, re-enable accepts again */
2970 if(b->start_accept) {
2971 verbose(VERB_ALGO, "wait is over, slow accept disabled");
2972 fptr_ok(fptr_whitelist_start_accept(b->start_accept));
2973 (*b->start_accept)(b->cb_arg);
2974 b->eb->slow_accept_enabled = 0;
2975 }
2976 }
2977
2978 int comm_point_perform_accept(struct comm_point* c,
2979 struct sockaddr_storage* addr, socklen_t* addrlen)
2980 {
2981 int new_fd;
2982 *addrlen = (socklen_t)sizeof(*addr);
2983 #ifndef HAVE_ACCEPT4
2984 new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
2985 #else
2986 /* SOCK_NONBLOCK saves extra calls to fcntl for the same result */
2987 new_fd = accept4(c->fd, (struct sockaddr*)addr, addrlen, SOCK_NONBLOCK);
2988 #endif
2989 if(new_fd == -1) {
2990 #ifndef USE_WINSOCK
2991 /* EINTR is signal interrupt. others are closed connection. */
2992 if( errno == EINTR || errno == EAGAIN
2993 #ifdef EWOULDBLOCK
2994 || errno == EWOULDBLOCK
2995 #endif
2996 #ifdef ECONNABORTED
2997 || errno == ECONNABORTED
2998 #endif
2999 #ifdef EPROTO
3000 || errno == EPROTO
3001 #endif /* EPROTO */
3002 )
3003 return -1;
3004 #if defined(ENFILE) && defined(EMFILE)
3005 if(errno == ENFILE || errno == EMFILE) {
3006 /* out of file descriptors, likely outside of our
3007 * control. stop accept() calls for some time */
3008 if(c->ev->base->stop_accept) {
3009 struct comm_base* b = c->ev->base;
3010 struct timeval tv;
3011 verbose(VERB_ALGO, "out of file descriptors: "
3012 "slow accept");
3013 ub_comm_base_now(b);
3014 if(b->eb->last_slow_log+SLOW_LOG_TIME <=
3015 b->eb->secs) {
3016 b->eb->last_slow_log = b->eb->secs;
3017 verbose(VERB_OPS, "accept failed, "
3018 "slow down accept for %d "
3019 "msec: %s",
3020 NETEVENT_SLOW_ACCEPT_TIME,
3021 sock_strerror(errno));
3022 }
3023 b->eb->slow_accept_enabled = 1;
3024 fptr_ok(fptr_whitelist_stop_accept(
3025 b->stop_accept));
3026 (*b->stop_accept)(b->cb_arg);
3027 /* set timeout, no mallocs */
3028 tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
3029 tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000;
3030 b->eb->slow_accept = ub_event_new(b->eb->base,
3031 -1, UB_EV_TIMEOUT,
3032 comm_base_handle_slow_accept, b);
3033 if(b->eb->slow_accept == NULL) {
3034 /* we do not want to log here, because
3035 * that would spam the logfiles.
3036 * error: "event_base_set failed." */
3037 }
3038 else if(ub_event_add(b->eb->slow_accept, &tv)
3039 != 0) {
3040 /* we do not want to log here,
3041 * error: "event_add failed." */
3042 }
3043 } else {
3044 log_err("accept, with no slow down, "
3045 "failed: %s", sock_strerror(errno));
3046 }
3047 return -1;
3048 }
3049 #endif
3050 #else /* USE_WINSOCK */
3051 if(WSAGetLastError() == WSAEINPROGRESS ||
3052 WSAGetLastError() == WSAECONNRESET)
3053 return -1;
3054 if(WSAGetLastError() == WSAEWOULDBLOCK) {
3055 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3056 return -1;
3057 }
3058 #endif
3059 log_err_addr("accept failed", sock_strerror(errno), addr,
3060 *addrlen);
3061 return -1;
3062 }
3063 if(c->tcp_conn_limit && c->type == comm_tcp_accept) {
3064 c->tcl_addr = tcl_addr_lookup(c->tcp_conn_limit, addr, *addrlen);
3065 if(!tcl_new_connection(c->tcl_addr)) {
3066 if(verbosity >= 3)
3067 log_err_addr("accept rejected",
3068 "connection limit exceeded", addr, *addrlen);
3069 sock_close(new_fd);
3070 return -1;
3071 }
3072 }
3073 #ifndef HAVE_ACCEPT4
3074 fd_set_nonblock(new_fd);
3075 #endif
3076 return new_fd;
3077 }
3078
3079 #ifdef USE_WINSOCK
3080 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
3081 #ifdef HAVE_BIO_SET_CALLBACK_EX
3082 size_t ATTR_UNUSED(len),
3083 #endif
3084 int ATTR_UNUSED(argi), long argl,
3085 #ifndef HAVE_BIO_SET_CALLBACK_EX
3086 long retvalue
3087 #else
3088 int retvalue, size_t* ATTR_UNUSED(processed)
3089 #endif
3090 )
3091 {
3092 int wsa_err = WSAGetLastError(); /* store errcode before it is gone */
3093 verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
3094 (oper&BIO_CB_RETURN)?"return":"before",
3095 (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
3096 wsa_err==WSAEWOULDBLOCK?"wsawb":"");
3097 /* on windows, check if previous operation caused EWOULDBLOCK */
3098 if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
3099 (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
3100 if(wsa_err == WSAEWOULDBLOCK)
3101 ub_winsock_tcp_wouldblock((struct ub_event*)
3102 BIO_get_callback_arg(b), UB_EV_READ);
3103 }
3104 if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
3105 (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
3106 if(wsa_err == WSAEWOULDBLOCK)
3107 ub_winsock_tcp_wouldblock((struct ub_event*)
3108 BIO_get_callback_arg(b), UB_EV_WRITE);
3109 }
3110 /* return original return value */
3111 return retvalue;
3112 }
3113
3114 /** set win bio callbacks for nonblocking operations */
3115 void
3116 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
3117 {
3118 SSL* ssl = (SSL*)thessl;
3119 /* set them both just in case, but usually they are the same BIO */
3120 #ifdef HAVE_BIO_SET_CALLBACK_EX
3121 BIO_set_callback_ex(SSL_get_rbio(ssl), &win_bio_cb);
3122 #else
3123 BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
3124 #endif
3125 BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)c->ev->ev);
3126 #ifdef HAVE_BIO_SET_CALLBACK_EX
3127 BIO_set_callback_ex(SSL_get_wbio(ssl), &win_bio_cb);
3128 #else
3129 BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
3130 #endif
3131 BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)c->ev->ev);
3132 }
3133 #endif
3134
3135 #ifdef HAVE_NGHTTP2
3136 /** Create http2 session server. Per connection, after TCP accepted.*/
3137 static int http2_session_server_create(struct http2_session* h2_session)
3138 {
3139 log_assert(h2_session->callbacks);
3140 h2_session->is_drop = 0;
3141 if(nghttp2_session_server_new(&h2_session->session,
3142 h2_session->callbacks,
3143 h2_session) == NGHTTP2_ERR_NOMEM) {
3144 log_err("failed to create nghttp2 session server");
3145 return 0;
3146 }
3147
3148 return 1;
3149 }
3150
3151 /** Submit http2 setting to session. Once per session. */
3152 static int http2_submit_settings(struct http2_session* h2_session)
3153 {
3154 int ret;
3155 nghttp2_settings_entry settings[1] = {
3156 {NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS,
3157 h2_session->c->http2_max_streams}};
3158
3159 ret = nghttp2_submit_settings(h2_session->session, NGHTTP2_FLAG_NONE,
3160 settings, 1);
3161 if(ret) {
3162 verbose(VERB_QUERY, "http2: submit_settings failed, "
3163 "error: %s", nghttp2_strerror(ret));
3164 return 0;
3165 }
3166 return 1;
3167 }
3168 #endif /* HAVE_NGHTTP2 */
3169
3170 #ifdef HAVE_NGHTTP2
3171 /** Delete http2 stream. After session delete or stream close callback */
3172 static void http2_stream_delete(struct http2_session* h2_session,
3173 struct http2_stream* h2_stream)
3174 {
3175 if(h2_stream->mesh_state) {
3176 mesh_state_remove_reply(h2_stream->mesh, h2_stream->mesh_state,
3177 h2_session->c);
3178 h2_stream->mesh_state = NULL;
3179 }
3180 http2_req_stream_clear(h2_stream);
3181 free(h2_stream);
3182 }
3183 #endif /* HAVE_NGHTTP2 */
3184
3185 /** delete http2 session server. After closing connection. */
3186 static void http2_session_server_delete(struct http2_session* h2_session)
3187 {
3188 #ifdef HAVE_NGHTTP2
3189 struct http2_stream* h2_stream, *next;
3190 nghttp2_session_del(h2_session->session); /* NULL input is fine */
3191 h2_session->session = NULL;
3192 for(h2_stream = h2_session->first_stream; h2_stream;) {
3193 next = h2_stream->next;
3194 http2_stream_delete(h2_session, h2_stream);
3195 h2_stream = next;
3196 }
3197 h2_session->first_stream = NULL;
3198 h2_session->is_drop = 0;
3199 h2_session->postpone_drop = 0;
3200 h2_session->c->h2_stream = NULL;
3201 #endif
3202 (void)h2_session;
3203 }
3204
3205 void
3206 comm_point_tcp_accept_callback(int fd, short event, void* arg)
3207 {
3208 struct comm_point* c = (struct comm_point*)arg, *c_hdl;
3209 int new_fd;
3210 log_assert(c->type == comm_tcp_accept);
3211 if(!(event & UB_EV_READ)) {
3212 log_info("ignoring tcp accept event %d", (int)event);
3213 return;
3214 }
3215 ub_comm_base_now(c->ev->base);
3216 /* find free tcp handler. */
3217 if(!c->tcp_free) {
3218 log_warn("accepted too many tcp, connections full");
3219 return;
3220 }
3221 /* accept incoming connection. */
3222 c_hdl = c->tcp_free;
3223 /* Should not happen: inconsistent tcp_free state in
3224 * accept_callback. */
3225 log_assert(c_hdl->is_in_tcp_free);
3226 /* clear leftover flags from previous use, and then set the
3227 * correct event base for the event structure for libevent */
3228 ub_event_free(c_hdl->ev->ev);
3229 c_hdl->ev->ev = NULL;
3230 if((c_hdl->type == comm_tcp && c_hdl->tcp_req_info) ||
3231 c_hdl->type == comm_local || c_hdl->type == comm_raw)
3232 c_hdl->tcp_do_toggle_rw = 0;
3233 else c_hdl->tcp_do_toggle_rw = 1;
3234
3235 if(c_hdl->type == comm_http) {
3236 #ifdef HAVE_NGHTTP2
3237 if(!c_hdl->h2_session ||
3238 !http2_session_server_create(c_hdl->h2_session)) {
3239 log_warn("failed to create nghttp2");
3240 return;
3241 }
3242 if(!c_hdl->h2_session ||
3243 !http2_submit_settings(c_hdl->h2_session)) {
3244 log_warn("failed to submit http2 settings");
3245 if(c_hdl->h2_session)
3246 http2_session_server_delete(c_hdl->h2_session);
3247 return;
3248 }
3249 if(!c->ssl) {
3250 c_hdl->tcp_do_toggle_rw = 0;
3251 c_hdl->use_h2 = 1;
3252 }
3253 #endif
3254 c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1,
3255 UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT,
3256 comm_point_http_handle_callback, c_hdl);
3257 } else {
3258 c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1,
3259 UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT,
3260 comm_point_tcp_handle_callback, c_hdl);
3261 }
3262 if(!c_hdl->ev->ev) {
3263 log_warn("could not ub_event_new, dropped tcp");
3264 #ifdef HAVE_NGHTTP2
3265 if(c_hdl->type == comm_http && c_hdl->h2_session)
3266 http2_session_server_delete(c_hdl->h2_session);
3267 #endif
3268 return;
3269 }
3270 log_assert(fd != -1);
3271 (void)fd;
3272 new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.remote_addr,
3273 &c_hdl->repinfo.remote_addrlen);
3274 if(new_fd == -1) {
3275 #ifdef HAVE_NGHTTP2
3276 if(c_hdl->type == comm_http && c_hdl->h2_session)
3277 http2_session_server_delete(c_hdl->h2_session);
3278 #endif
3279 return;
3280 }
3281 /* Copy remote_address to client_address.
3282 * Simplest way/time for streams to do that. */
3283 c_hdl->repinfo.client_addrlen = c_hdl->repinfo.remote_addrlen;
3284 memmove(&c_hdl->repinfo.client_addr,
3285 &c_hdl->repinfo.remote_addr,
3286 c_hdl->repinfo.remote_addrlen);
3287 if(c->ssl) {
3288 c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
3289 if(!c_hdl->ssl) {
3290 c_hdl->fd = new_fd;
3291 comm_point_close(c_hdl);
3292 return;
3293 }
3294 c_hdl->ssl_shake_state = comm_ssl_shake_read;
3295 #ifdef USE_WINSOCK
3296 comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
3297 #endif
3298 }
3299
3300 /* Paranoia: Check that the state has not changed from above: */
3301 /* Should not happen: tcp_free state changed within accept_callback. */
3302 log_assert(c_hdl == c->tcp_free);
3303 log_assert(c_hdl->is_in_tcp_free);
3304 /* grab the tcp handler buffers */
3305 c->cur_tcp_count++;
3306 c->tcp_free = c_hdl->tcp_free;
3307 c_hdl->tcp_free = NULL;
3308 c_hdl->is_in_tcp_free = 0;
3309 if(!c->tcp_free) {
3310 /* stop accepting incoming queries for now. */
3311 comm_point_stop_listening(c);
3312 }
3313 setup_tcp_handler(c_hdl, new_fd, c->cur_tcp_count, c->max_tcp_count);
3314 }
3315
3316 /** Make tcp handler free for next assignment */
3317 static void
3318 reclaim_tcp_handler(struct comm_point* c)
3319 {
3320 log_assert(c->type == comm_tcp);
3321 if(c->ssl) {
3322 #ifdef HAVE_SSL
3323 SSL_shutdown(c->ssl);
3324 SSL_free(c->ssl);
3325 c->ssl = NULL;
3326 #endif
3327 }
3328 comm_point_close(c);
3329 if(c->tcp_parent && !c->is_in_tcp_free) {
3330 /* Should not happen: bad tcp_free state in reclaim_tcp. */
3331 log_assert(c->tcp_free == NULL);
3332 log_assert(c->tcp_parent->cur_tcp_count > 0);
3333 c->tcp_parent->cur_tcp_count--;
3334 c->tcp_free = c->tcp_parent->tcp_free;
3335 c->tcp_parent->tcp_free = c;
3336 c->is_in_tcp_free = 1;
3337 if(!c->tcp_free) {
3338 /* re-enable listening on accept socket */
3339 comm_point_start_listening(c->tcp_parent, -1, -1);
3340 }
3341 }
3342 c->tcp_more_read_again = NULL;
3343 c->tcp_more_write_again = NULL;
3344 c->tcp_byte_count = 0;
3345 c->pp2_header_state = pp2_header_none;
3346 sldns_buffer_clear(c->buffer);
3347 }
3348
3349 /** do the callback when writing is done */
3350 static void
3351 tcp_callback_writer(struct comm_point* c)
3352 {
3353 log_assert(c->type == comm_tcp);
3354 if(!c->tcp_write_and_read) {
3355 sldns_buffer_clear(c->buffer);
3356 c->tcp_byte_count = 0;
3357 }
3358 if(c->tcp_do_toggle_rw)
3359 c->tcp_is_reading = 1;
3360 /* switch from listening(write) to listening(read) */
3361 if(c->tcp_req_info) {
3362 tcp_req_info_handle_writedone(c->tcp_req_info);
3363 } else {
3364 comm_point_stop_listening(c);
3365 if(c->tcp_write_and_read) {
3366 fptr_ok(fptr_whitelist_comm_point(c->callback));
3367 if( (*c->callback)(c, c->cb_arg, NETEVENT_PKT_WRITTEN,
3368 &c->repinfo) ) {
3369 comm_point_start_listening(c, -1,
3370 adjusted_tcp_timeout(c));
3371 }
3372 } else {
3373 comm_point_start_listening(c, -1,
3374 adjusted_tcp_timeout(c));
3375 }
3376 }
3377 }
3378
3379 /** do the callback when reading is done */
3380 static void
3381 tcp_callback_reader(struct comm_point* c)
3382 {
3383 log_assert(c->type == comm_tcp || c->type == comm_local);
3384 sldns_buffer_flip(c->buffer);
3385 if(c->tcp_do_toggle_rw)
3386 c->tcp_is_reading = 0;
3387 c->tcp_byte_count = 0;
3388 if(c->tcp_req_info) {
3389 tcp_req_info_handle_readdone(c->tcp_req_info);
3390 } else {
3391 if(c->type == comm_tcp)
3392 comm_point_stop_listening(c);
3393 fptr_ok(fptr_whitelist_comm_point(c->callback));
3394 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
3395 comm_point_start_listening(c, -1,
3396 adjusted_tcp_timeout(c));
3397 }
3398 }
3399 }
3400
3401 #ifdef HAVE_SSL
3402 /** true if the ssl handshake error has to be squelched from the logs */
3403 int
3404 squelch_err_ssl_handshake(unsigned long err)
3405 {
3406 if(verbosity >= VERB_QUERY)
3407 return 0; /* only squelch on low verbosity */
3408 if(ERR_GET_LIB(err) == ERR_LIB_SSL &&
3409 (ERR_GET_REASON(err) == SSL_R_HTTPS_PROXY_REQUEST ||
3410 ERR_GET_REASON(err) == SSL_R_HTTP_REQUEST ||
3411 ERR_GET_REASON(err) == SSL_R_WRONG_VERSION_NUMBER ||
3412 ERR_GET_REASON(err) == SSL_R_SSLV3_ALERT_BAD_CERTIFICATE
3413 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO
3414 || ERR_GET_REASON(err) == SSL_R_NO_SHARED_CIPHER
3415 #endif
3416 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO
3417 || ERR_GET_REASON(err) == SSL_R_UNKNOWN_PROTOCOL
3418 || ERR_GET_REASON(err) == SSL_R_UNSUPPORTED_PROTOCOL
3419 # ifdef SSL_R_VERSION_TOO_LOW
3420 || ERR_GET_REASON(err) == SSL_R_VERSION_TOO_LOW
3421 # endif
3422 #endif
3423 ))
3424 return 1;
3425 return 0;
3426 }
3427 #endif /* HAVE_SSL */
3428
3429 /** continue ssl handshake */
3430 #ifdef HAVE_SSL
3431 static int
3432 ssl_handshake(struct comm_point* c)
3433 {
3434 int r;
3435 if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
3436 /* read condition satisfied back to writing */
3437 comm_point_listen_for_rw(c, 0, 1);
3438 c->ssl_shake_state = comm_ssl_shake_none;
3439 return 1;
3440 }
3441 if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
3442 /* write condition satisfied, back to reading */
3443 comm_point_listen_for_rw(c, 1, 0);
3444 c->ssl_shake_state = comm_ssl_shake_none;
3445 return 1;
3446 }
3447
3448 ERR_clear_error();
3449 r = SSL_do_handshake(c->ssl);
3450 if(r != 1) {
3451 int want = SSL_get_error(c->ssl, r);
3452 if(want == SSL_ERROR_WANT_READ) {
3453 if(c->ssl_shake_state == comm_ssl_shake_read)
3454 return 1;
3455 c->ssl_shake_state = comm_ssl_shake_read;
3456 comm_point_listen_for_rw(c, 1, 0);
3457 return 1;
3458 } else if(want == SSL_ERROR_WANT_WRITE) {
3459 if(c->ssl_shake_state == comm_ssl_shake_write)
3460 return 1;
3461 c->ssl_shake_state = comm_ssl_shake_write;
3462 comm_point_listen_for_rw(c, 0, 1);
3463 return 1;
3464 } else if(r == 0) {
3465 return 0; /* closed */
3466 } else if(want == SSL_ERROR_SYSCALL) {
3467 /* SYSCALL and errno==0 means closed uncleanly */
3468 #ifdef EPIPE
3469 if(errno == EPIPE && verbosity < 2)
3470 return 0; /* silence 'broken pipe' */
3471 #endif
3472 #ifdef ECONNRESET
3473 if(errno == ECONNRESET && verbosity < 2)
3474 return 0; /* silence reset by peer */
3475 #endif
3476 if(!tcp_connect_errno_needs_log(
3477 (struct sockaddr*)&c->repinfo.remote_addr,
3478 c->repinfo.remote_addrlen))
3479 return 0; /* silence connect failures that
3480 show up because after connect this is the
3481 first system call that accesses the socket */
3482 if(errno != 0)
3483 log_err("SSL_handshake syscall: %s",
3484 strerror(errno));
3485 return 0;
3486 } else {
3487 unsigned long err = ERR_get_error();
3488 if(!squelch_err_ssl_handshake(err)) {
3489 long vr;
3490 log_crypto_err_io_code("ssl handshake failed",
3491 want, err);
3492 if((vr=SSL_get_verify_result(c->ssl)) != 0)
3493 log_err("ssl handshake cert error: %s",
3494 X509_verify_cert_error_string(
3495 vr));
3496 log_addr(VERB_OPS, "ssl handshake failed",
3497 &c->repinfo.remote_addr,
3498 c->repinfo.remote_addrlen);
3499 }
3500 return 0;
3501 }
3502 }
3503 /* this is where peer verification could take place */
3504 if((SSL_get_verify_mode(c->ssl)&SSL_VERIFY_PEER)) {
3505 /* verification */
3506 if(SSL_get_verify_result(c->ssl) == X509_V_OK) {
3507 #ifdef HAVE_SSL_GET1_PEER_CERTIFICATE
3508 X509* x = SSL_get1_peer_certificate(c->ssl);
3509 #else
3510 X509* x = SSL_get_peer_certificate(c->ssl);
3511 #endif
3512 if(!x) {
3513 log_addr(VERB_ALGO, "SSL connection failed: "
3514 "no certificate",
3515 &c->repinfo.remote_addr,
3516 c->repinfo.remote_addrlen);
3517 return 0;
3518 }
3519 log_cert(VERB_ALGO, "peer certificate", x);
3520 #ifdef HAVE_SSL_GET0_PEERNAME
3521 if(SSL_get0_peername(c->ssl)) {
3522 char buf[255];
3523 snprintf(buf, sizeof(buf), "SSL connection "
3524 "to %s authenticated",
3525 SSL_get0_peername(c->ssl));
3526 log_addr(VERB_ALGO, buf, &c->repinfo.remote_addr,
3527 c->repinfo.remote_addrlen);
3528 } else {
3529 #endif
3530 log_addr(VERB_ALGO, "SSL connection "
3531 "authenticated", &c->repinfo.remote_addr,
3532 c->repinfo.remote_addrlen);
3533 #ifdef HAVE_SSL_GET0_PEERNAME
3534 }
3535 #endif
3536 X509_free(x);
3537 } else {
3538 #ifdef HAVE_SSL_GET1_PEER_CERTIFICATE
3539 X509* x = SSL_get1_peer_certificate(c->ssl);
3540 #else
3541 X509* x = SSL_get_peer_certificate(c->ssl);
3542 #endif
3543 if(x) {
3544 log_cert(VERB_ALGO, "peer certificate", x);
3545 X509_free(x);
3546 }
3547 log_addr(VERB_ALGO, "SSL connection failed: "
3548 "failed to authenticate",
3549 &c->repinfo.remote_addr,
3550 c->repinfo.remote_addrlen);
3551 return 0;
3552 }
3553 } else {
3554 /* unauthenticated, the verify peer flag was not set
3555 * in c->ssl when the ssl object was created from ssl_ctx */
3556 log_addr(VERB_ALGO, "SSL connection", &c->repinfo.remote_addr,
3557 c->repinfo.remote_addrlen);
3558 }
3559
3560 #ifdef HAVE_SSL_GET0_ALPN_SELECTED
3561 /* check if http2 use is negotiated */
3562 if(c->type == comm_http && c->h2_session) {
3563 const unsigned char *alpn;
3564 unsigned int alpnlen = 0;
3565 SSL_get0_alpn_selected(c->ssl, &alpn, &alpnlen);
3566 if(alpnlen == 2 && memcmp("h2", alpn, 2) == 0) {
3567 /* connection upgraded to HTTP2 */
3568 c->tcp_do_toggle_rw = 0;
3569 c->use_h2 = 1;
3570 } else {
3571 verbose(VERB_ALGO, "client doesn't support HTTP/2");
3572 return 0;
3573 }
3574 }
3575 #endif
3576
3577 /* setup listen rw correctly */
3578 if(c->tcp_is_reading) {
3579 if(c->ssl_shake_state != comm_ssl_shake_read)
3580 comm_point_listen_for_rw(c, 1, 0);
3581 } else {
3582 comm_point_listen_for_rw(c, 0, 1);
3583 }
3584 c->ssl_shake_state = comm_ssl_shake_none;
3585 return 1;
3586 }
3587 #endif /* HAVE_SSL */
3588
3589 /** ssl read callback on TCP */
3590 static int
3591 ssl_handle_read(struct comm_point* c)
3592 {
3593 #ifdef HAVE_SSL
3594 int r;
3595 if(c->ssl_shake_state != comm_ssl_shake_none) {
3596 if(!ssl_handshake(c))
3597 return 0;
3598 if(c->ssl_shake_state != comm_ssl_shake_none)
3599 return 1;
3600 }
3601 if(c->pp2_enabled && c->pp2_header_state != pp2_header_done) {
3602 struct pp2_header* header = NULL;
3603 size_t want_read_size = 0;
3604 size_t current_read_size = 0;
3605 if(c->pp2_header_state == pp2_header_none) {
3606 want_read_size = PP2_HEADER_SIZE;
3607 if(sldns_buffer_remaining(c->buffer)<want_read_size) {
3608 log_err_addr("proxy_protocol: not enough "
3609 "buffer size to read PROXYv2 header", "",
3610 &c->repinfo.remote_addr,
3611 c->repinfo.remote_addrlen);
3612 return 0;
3613 }
3614 verbose(VERB_ALGO, "proxy_protocol: reading fixed "
3615 "part of PROXYv2 header (len %lu)",
3616 (unsigned long)want_read_size);
3617 current_read_size = want_read_size;
3618 if(c->tcp_byte_count < current_read_size) {
3619 ERR_clear_error();
3620 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(
3621 c->buffer, c->tcp_byte_count),
3622 current_read_size -
3623 c->tcp_byte_count)) <= 0) {
3624 int want = SSL_get_error(c->ssl, r);
3625 if(want == SSL_ERROR_ZERO_RETURN) {
3626 if(c->tcp_req_info)
3627 return tcp_req_info_handle_read_close(c->tcp_req_info);
3628 return 0; /* shutdown, closed */
3629 } else if(want == SSL_ERROR_WANT_READ) {
3630 #ifdef USE_WINSOCK
3631 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3632 #endif
3633 return 1; /* read more later */
3634 } else if(want == SSL_ERROR_WANT_WRITE) {
3635 c->ssl_shake_state = comm_ssl_shake_hs_write;
3636 comm_point_listen_for_rw(c, 0, 1);
3637 return 1;
3638 } else if(want == SSL_ERROR_SYSCALL) {
3639 #ifdef ECONNRESET
3640 if(errno == ECONNRESET && verbosity < 2)
3641 return 0; /* silence reset by peer */
3642 #endif
3643 if(errno != 0)
3644 log_err("SSL_read syscall: %s",
3645 strerror(errno));
3646 return 0;
3647 }
3648 log_crypto_err_io("could not SSL_read",
3649 want);
3650 return 0;
3651 }
3652 c->tcp_byte_count += r;
3653 sldns_buffer_skip(c->buffer, r);
3654 if(c->tcp_byte_count != current_read_size) return 1;
3655 c->pp2_header_state = pp2_header_init;
3656 }
3657 }
3658 if(c->pp2_header_state == pp2_header_init) {
3659 int err;
3660 err = pp2_read_header(
3661 sldns_buffer_begin(c->buffer),
3662 sldns_buffer_limit(c->buffer));
3663 if(err) {
3664 log_err("proxy_protocol: could not parse "
3665 "PROXYv2 header (%s)",
3666 pp_lookup_error(err));
3667 return 0;
3668 }
3669 header = (struct pp2_header*)sldns_buffer_begin(c->buffer);
3670 want_read_size = ntohs(header->len);
3671 if(sldns_buffer_limit(c->buffer) <
3672 PP2_HEADER_SIZE + want_read_size) {
3673 log_err_addr("proxy_protocol: not enough "
3674 "buffer size to read PROXYv2 header", "",
3675 &c->repinfo.remote_addr,
3676 c->repinfo.remote_addrlen);
3677 return 0;
3678 }
3679 verbose(VERB_ALGO, "proxy_protocol: reading variable "
3680 "part of PROXYv2 header (len %lu)",
3681 (unsigned long)want_read_size);
3682 current_read_size = PP2_HEADER_SIZE + want_read_size;
3683 if(want_read_size == 0) {
3684 /* nothing more to read; header is complete */
3685 c->pp2_header_state = pp2_header_done;
3686 } else if(c->tcp_byte_count < current_read_size) {
3687 ERR_clear_error();
3688 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(
3689 c->buffer, c->tcp_byte_count),
3690 current_read_size -
3691 c->tcp_byte_count)) <= 0) {
3692 int want = SSL_get_error(c->ssl, r);
3693 if(want == SSL_ERROR_ZERO_RETURN) {
3694 if(c->tcp_req_info)
3695 return tcp_req_info_handle_read_close(c->tcp_req_info);
3696 return 0; /* shutdown, closed */
3697 } else if(want == SSL_ERROR_WANT_READ) {
3698 #ifdef USE_WINSOCK
3699 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3700 #endif
3701 return 1; /* read more later */
3702 } else if(want == SSL_ERROR_WANT_WRITE) {
3703 c->ssl_shake_state = comm_ssl_shake_hs_write;
3704 comm_point_listen_for_rw(c, 0, 1);
3705 return 1;
3706 } else if(want == SSL_ERROR_SYSCALL) {
3707 #ifdef ECONNRESET
3708 if(errno == ECONNRESET && verbosity < 2)
3709 return 0; /* silence reset by peer */
3710 #endif
3711 if(errno != 0)
3712 log_err("SSL_read syscall: %s",
3713 strerror(errno));
3714 return 0;
3715 }
3716 log_crypto_err_io("could not SSL_read",
3717 want);
3718 return 0;
3719 }
3720 c->tcp_byte_count += r;
3721 sldns_buffer_skip(c->buffer, r);
3722 if(c->tcp_byte_count != current_read_size) return 1;
3723 c->pp2_header_state = pp2_header_done;
3724 }
3725 }
3726 if(c->pp2_header_state != pp2_header_done || !header) {
3727 log_err_addr("proxy_protocol: wrong state for the "
3728 "PROXYv2 header", "", &c->repinfo.remote_addr,
3729 c->repinfo.remote_addrlen);
3730 return 0;
3731 }
3732 sldns_buffer_flip(c->buffer);
3733 if(!consume_pp2_header(c->buffer, &c->repinfo, 1)) {
3734 log_err_addr("proxy_protocol: could not consume "
3735 "PROXYv2 header", "", &c->repinfo.remote_addr,
3736 c->repinfo.remote_addrlen);
3737 return 0;
3738 }
3739 verbose(VERB_ALGO, "proxy_protocol: successful read of "
3740 "PROXYv2 header");
3741 /* Clear and reset the buffer to read the following
3742 * DNS packet(s). */
3743 sldns_buffer_clear(c->buffer);
3744 c->tcp_byte_count = 0;
3745 return 1;
3746 }
3747 if(c->tcp_byte_count < sizeof(uint16_t)) {
3748 /* read length bytes */
3749 ERR_clear_error();
3750 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer,
3751 c->tcp_byte_count), (int)(sizeof(uint16_t) -
3752 c->tcp_byte_count))) <= 0) {
3753 int want = SSL_get_error(c->ssl, r);
3754 if(want == SSL_ERROR_ZERO_RETURN) {
3755 if(c->tcp_req_info)
3756 return tcp_req_info_handle_read_close(c->tcp_req_info);
3757 return 0; /* shutdown, closed */
3758 } else if(want == SSL_ERROR_WANT_READ) {
3759 #ifdef USE_WINSOCK
3760 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3761 #endif
3762 return 1; /* read more later */
3763 } else if(want == SSL_ERROR_WANT_WRITE) {
3764 c->ssl_shake_state = comm_ssl_shake_hs_write;
3765 comm_point_listen_for_rw(c, 0, 1);
3766 return 1;
3767 } else if(want == SSL_ERROR_SYSCALL) {
3768 #ifdef ECONNRESET
3769 if(errno == ECONNRESET && verbosity < 2)
3770 return 0; /* silence reset by peer */
3771 #endif
3772 if(errno != 0)
3773 log_err("SSL_read syscall: %s",
3774 strerror(errno));
3775 return 0;
3776 }
3777 log_crypto_err_io("could not SSL_read", want);
3778 return 0;
3779 }
3780 c->tcp_byte_count += r;
3781 if(c->tcp_byte_count < sizeof(uint16_t))
3782 return 1;
3783 if(sldns_buffer_read_u16_at(c->buffer, 0) >
3784 sldns_buffer_capacity(c->buffer)) {
3785 verbose(VERB_QUERY, "ssl: dropped larger than buffer");
3786 return 0;
3787 }
3788 sldns_buffer_set_limit(c->buffer,
3789 sldns_buffer_read_u16_at(c->buffer, 0));
3790 if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
3791 verbose(VERB_QUERY, "ssl: dropped bogus too short.");
3792 return 0;
3793 }
3794 sldns_buffer_skip(c->buffer, (ssize_t)(c->tcp_byte_count-sizeof(uint16_t)));
3795 verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
3796 (int)sldns_buffer_limit(c->buffer));
3797 }
3798 if(sldns_buffer_remaining(c->buffer) > 0) {
3799 ERR_clear_error();
3800 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
3801 (int)sldns_buffer_remaining(c->buffer));
3802 if(r <= 0) {
3803 int want = SSL_get_error(c->ssl, r);
3804 if(want == SSL_ERROR_ZERO_RETURN) {
3805 if(c->tcp_req_info)
3806 return tcp_req_info_handle_read_close(c->tcp_req_info);
3807 return 0; /* shutdown, closed */
3808 } else if(want == SSL_ERROR_WANT_READ) {
3809 #ifdef USE_WINSOCK
3810 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3811 #endif
3812 return 1; /* read more later */
3813 } else if(want == SSL_ERROR_WANT_WRITE) {
3814 c->ssl_shake_state = comm_ssl_shake_hs_write;
3815 comm_point_listen_for_rw(c, 0, 1);
3816 return 1;
3817 } else if(want == SSL_ERROR_SYSCALL) {
3818 #ifdef ECONNRESET
3819 if(errno == ECONNRESET && verbosity < 2)
3820 return 0; /* silence reset by peer */
3821 #endif
3822 if(errno != 0)
3823 log_err("SSL_read syscall: %s",
3824 strerror(errno));
3825 return 0;
3826 }
3827 log_crypto_err_io("could not SSL_read", want);
3828 return 0;
3829 }
3830 sldns_buffer_skip(c->buffer, (ssize_t)r);
3831 }
3832 if(sldns_buffer_remaining(c->buffer) <= 0) {
3833 tcp_callback_reader(c);
3834 }
3835 return 1;
3836 #else
3837 (void)c;
3838 return 0;
3839 #endif /* HAVE_SSL */
3840 }
3841
3842 /** ssl write callback on TCP */
3843 static int
3844 ssl_handle_write(struct comm_point* c)
3845 {
3846 #ifdef HAVE_SSL
3847 int r;
3848 if(c->ssl_shake_state != comm_ssl_shake_none) {
3849 if(!ssl_handshake(c))
3850 return 0;
3851 if(c->ssl_shake_state != comm_ssl_shake_none)
3852 return 1;
3853 }
3854 /* ignore return, if fails we may simply block */
3855 (void)SSL_set_mode(c->ssl, (long)SSL_MODE_ENABLE_PARTIAL_WRITE);
3856 if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) {
3857 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(c->buffer));
3858 ERR_clear_error();
3859 if(c->tcp_write_and_read) {
3860 if(c->tcp_write_pkt_len + 2 < LDNS_RR_BUF_SIZE) {
3861 /* combine the tcp length and the query for
3862 * write, this emulates writev */
3863 uint8_t buf[LDNS_RR_BUF_SIZE];
3864 memmove(buf, &len, sizeof(uint16_t));
3865 memmove(buf+sizeof(uint16_t),
3866 c->tcp_write_pkt,
3867 c->tcp_write_pkt_len);
3868 r = SSL_write(c->ssl,
3869 (void*)(buf+c->tcp_write_byte_count),
3870 c->tcp_write_pkt_len + 2 -
3871 c->tcp_write_byte_count);
3872 } else {
3873 r = SSL_write(c->ssl,
3874 (void*)(((uint8_t*)&len)+c->tcp_write_byte_count),
3875 (int)(sizeof(uint16_t)-c->tcp_write_byte_count));
3876 }
3877 } else if(sizeof(uint16_t)+sldns_buffer_remaining(c->buffer) <
3878 LDNS_RR_BUF_SIZE) {
3879 /* combine the tcp length and the query for write,
3880 * this emulates writev */
3881 uint8_t buf[LDNS_RR_BUF_SIZE];
3882 memmove(buf, &len, sizeof(uint16_t));
3883 memmove(buf+sizeof(uint16_t),
3884 sldns_buffer_current(c->buffer),
3885 sldns_buffer_remaining(c->buffer));
3886 r = SSL_write(c->ssl, (void*)(buf+c->tcp_byte_count),
3887 (int)(sizeof(uint16_t)+
3888 sldns_buffer_remaining(c->buffer)
3889 - c->tcp_byte_count));
3890 } else {
3891 r = SSL_write(c->ssl,
3892 (void*)(((uint8_t*)&len)+c->tcp_byte_count),
3893 (int)(sizeof(uint16_t)-c->tcp_byte_count));
3894 }
3895 if(r <= 0) {
3896 int want = SSL_get_error(c->ssl, r);
3897 if(want == SSL_ERROR_ZERO_RETURN) {
3898 return 0; /* closed */
3899 } else if(want == SSL_ERROR_WANT_READ) {
3900 c->ssl_shake_state = comm_ssl_shake_hs_read;
3901 comm_point_listen_for_rw(c, 1, 0);
3902 return 1; /* wait for read condition */
3903 } else if(want == SSL_ERROR_WANT_WRITE) {
3904 #ifdef USE_WINSOCK
3905 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
3906 #endif
3907 return 1; /* write more later */
3908 } else if(want == SSL_ERROR_SYSCALL) {
3909 #ifdef EPIPE
3910 if(errno == EPIPE && verbosity < 2)
3911 return 0; /* silence 'broken pipe' */
3912 #endif
3913 if(errno != 0)
3914 log_err("SSL_write syscall: %s",
3915 strerror(errno));
3916 return 0;
3917 }
3918 log_crypto_err_io("could not SSL_write", want);
3919 return 0;
3920 }
3921 if(c->tcp_write_and_read) {
3922 c->tcp_write_byte_count += r;
3923 if(c->tcp_write_byte_count < sizeof(uint16_t))
3924 return 1;
3925 } else {
3926 c->tcp_byte_count += r;
3927 if(c->tcp_byte_count < sizeof(uint16_t))
3928 return 1;
3929 sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
3930 sizeof(uint16_t));
3931 }
3932 if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
3933 tcp_callback_writer(c);
3934 return 1;
3935 }
3936 }
3937 log_assert(c->tcp_write_and_read || sldns_buffer_remaining(c->buffer) > 0);
3938 log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2);
3939 ERR_clear_error();
3940 if(c->tcp_write_and_read) {
3941 r = SSL_write(c->ssl, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2),
3942 (int)(c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count));
3943 } else {
3944 r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
3945 (int)sldns_buffer_remaining(c->buffer));
3946 }
3947 if(r <= 0) {
3948 int want = SSL_get_error(c->ssl, r);
3949 if(want == SSL_ERROR_ZERO_RETURN) {
3950 return 0; /* closed */
3951 } else if(want == SSL_ERROR_WANT_READ) {
3952 c->ssl_shake_state = comm_ssl_shake_hs_read;
3953 comm_point_listen_for_rw(c, 1, 0);
3954 return 1; /* wait for read condition */
3955 } else if(want == SSL_ERROR_WANT_WRITE) {
3956 #ifdef USE_WINSOCK
3957 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
3958 #endif
3959 return 1; /* write more later */
3960 } else if(want == SSL_ERROR_SYSCALL) {
3961 #ifdef EPIPE
3962 if(errno == EPIPE && verbosity < 2)
3963 return 0; /* silence 'broken pipe' */
3964 #endif
3965 if(errno != 0)
3966 log_err("SSL_write syscall: %s",
3967 strerror(errno));
3968 return 0;
3969 }
3970 log_crypto_err_io("could not SSL_write", want);
3971 return 0;
3972 }
3973 if(c->tcp_write_and_read) {
3974 c->tcp_write_byte_count += r;
3975 } else {
3976 sldns_buffer_skip(c->buffer, (ssize_t)r);
3977 }
3978
3979 if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
3980 tcp_callback_writer(c);
3981 }
3982 return 1;
3983 #else
3984 (void)c;
3985 return 0;
3986 #endif /* HAVE_SSL */
3987 }
3988
3989 /** handle ssl tcp connection with dns contents */
3990 static int
3991 ssl_handle_it(struct comm_point* c, int is_write)
3992 {
3993 /* handle case where renegotiation wants read during write call
3994 * or write during read calls */
3995 if(is_write && c->ssl_shake_state == comm_ssl_shake_hs_write)
3996 return ssl_handle_read(c);
3997 else if(!is_write && c->ssl_shake_state == comm_ssl_shake_hs_read)
3998 return ssl_handle_write(c);
3999 /* handle read events for read operation and write events for a
4000 * write operation */
4001 else if(!is_write)
4002 return ssl_handle_read(c);
4003 return ssl_handle_write(c);
4004 }
4005
4006 /**
4007 * Handle tcp reading callback.
4008 * @param fd: file descriptor of socket.
4009 * @param c: comm point to read from into buffer.
4010 * @param short_ok: if true, very short packets are OK (for comm_local).
4011 * @return: 0 on error
4012 */
4013 static int
4014 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
4015 {
4016 ssize_t r;
4017 int recv_initial = 0;
4018 log_assert(c->type == comm_tcp || c->type == comm_local);
4019 if(c->ssl)
4020 return ssl_handle_it(c, 0);
4021 if(!c->tcp_is_reading && !c->tcp_write_and_read)
4022 return 0;
4023
4024 log_assert(fd != -1);
4025 if(c->pp2_enabled && c->pp2_header_state != pp2_header_done) {
4026 struct pp2_header* header = NULL;
4027 size_t want_read_size = 0;
4028 size_t current_read_size = 0;
4029 if(c->pp2_header_state == pp2_header_none) {
4030 want_read_size = PP2_HEADER_SIZE;
4031 if(sldns_buffer_remaining(c->buffer)<want_read_size) {
4032 log_err_addr("proxy_protocol: not enough "
4033 "buffer size to read PROXYv2 header", "",
4034 &c->repinfo.remote_addr,
4035 c->repinfo.remote_addrlen);
4036 return 0;
4037 }
4038 verbose(VERB_ALGO, "proxy_protocol: reading fixed "
4039 "part of PROXYv2 header (len %lu)",
4040 (unsigned long)want_read_size);
4041 current_read_size = want_read_size;
4042 if(c->tcp_byte_count < current_read_size) {
4043 r = recv(fd, (void*)sldns_buffer_at(c->buffer,
4044 c->tcp_byte_count),
4045 current_read_size-c->tcp_byte_count, MSG_DONTWAIT);
4046 if(r == 0) {
4047 if(c->tcp_req_info)
4048 return tcp_req_info_handle_read_close(c->tcp_req_info);
4049 return 0;
4050 } else if(r == -1) {
4051 goto recv_error_initial;
4052 }
4053 c->tcp_byte_count += r;
4054 sldns_buffer_skip(c->buffer, r);
4055 if(c->tcp_byte_count != current_read_size) return 1;
4056 c->pp2_header_state = pp2_header_init;
4057 }
4058 }
4059 if(c->pp2_header_state == pp2_header_init) {
4060 int err;
4061 err = pp2_read_header(
4062 sldns_buffer_begin(c->buffer),
4063 sldns_buffer_limit(c->buffer));
4064 if(err) {
4065 log_err("proxy_protocol: could not parse "
4066 "PROXYv2 header (%s)",
4067 pp_lookup_error(err));
4068 return 0;
4069 }
4070 header = (struct pp2_header*)sldns_buffer_begin(c->buffer);
4071 want_read_size = ntohs(header->len);
4072 if(sldns_buffer_limit(c->buffer) <
4073 PP2_HEADER_SIZE + want_read_size) {
4074 log_err_addr("proxy_protocol: not enough "
4075 "buffer size to read PROXYv2 header", "",
4076 &c->repinfo.remote_addr,
4077 c->repinfo.remote_addrlen);
4078 return 0;
4079 }
4080 verbose(VERB_ALGO, "proxy_protocol: reading variable "
4081 "part of PROXYv2 header (len %lu)",
4082 (unsigned long)want_read_size);
4083 current_read_size = PP2_HEADER_SIZE + want_read_size;
4084 if(want_read_size == 0) {
4085 /* nothing more to read; header is complete */
4086 c->pp2_header_state = pp2_header_done;
4087 } else if(c->tcp_byte_count < current_read_size) {
4088 r = recv(fd, (void*)sldns_buffer_at(c->buffer,
4089 c->tcp_byte_count),
4090 current_read_size-c->tcp_byte_count, MSG_DONTWAIT);
4091 if(r == 0) {
4092 if(c->tcp_req_info)
4093 return tcp_req_info_handle_read_close(c->tcp_req_info);
4094 return 0;
4095 } else if(r == -1) {
4096 goto recv_error;
4097 }
4098 c->tcp_byte_count += r;
4099 sldns_buffer_skip(c->buffer, r);
4100 if(c->tcp_byte_count != current_read_size) return 1;
4101 c->pp2_header_state = pp2_header_done;
4102 }
4103 }
4104 if(c->pp2_header_state != pp2_header_done || !header) {
4105 log_err_addr("proxy_protocol: wrong state for the "
4106 "PROXYv2 header", "", &c->repinfo.remote_addr,
4107 c->repinfo.remote_addrlen);
4108 return 0;
4109 }
4110 sldns_buffer_flip(c->buffer);
4111 if(!consume_pp2_header(c->buffer, &c->repinfo, 1)) {
4112 log_err_addr("proxy_protocol: could not consume "
4113 "PROXYv2 header", "", &c->repinfo.remote_addr,
4114 c->repinfo.remote_addrlen);
4115 return 0;
4116 }
4117 verbose(VERB_ALGO, "proxy_protocol: successful read of "
4118 "PROXYv2 header");
4119 /* Clear and reset the buffer to read the following
4120 * DNS packet(s). */
4121 sldns_buffer_clear(c->buffer);
4122 c->tcp_byte_count = 0;
4123 return 1;
4124 }
4125
4126 if(c->tcp_byte_count < sizeof(uint16_t)) {
4127 /* read length bytes */
4128 r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count),
4129 sizeof(uint16_t)-c->tcp_byte_count, MSG_DONTWAIT);
4130 if(r == 0) {
4131 if(c->tcp_req_info)
4132 return tcp_req_info_handle_read_close(c->tcp_req_info);
4133 return 0;
4134 } else if(r == -1) {
4135 if(c->pp2_enabled) goto recv_error;
4136 goto recv_error_initial;
4137 }
4138 c->tcp_byte_count += r;
4139 if(c->tcp_byte_count != sizeof(uint16_t))
4140 return 1;
4141 if(sldns_buffer_read_u16_at(c->buffer, 0) >
4142 sldns_buffer_capacity(c->buffer)) {
4143 verbose(VERB_QUERY, "tcp: dropped larger than buffer");
4144 return 0;
4145 }
4146 sldns_buffer_set_limit(c->buffer,
4147 sldns_buffer_read_u16_at(c->buffer, 0));
4148 if(!short_ok &&
4149 sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
4150 verbose(VERB_QUERY, "tcp: dropped bogus too short.");
4151 return 0;
4152 }
4153 verbose(VERB_ALGO, "Reading tcp query of length %d",
4154 (int)sldns_buffer_limit(c->buffer));
4155 }
4156
4157 if(sldns_buffer_remaining(c->buffer) == 0)
4158 log_err("in comm_point_tcp_handle_read buffer_remaining is "
4159 "not > 0 as expected, continuing with (harmless) 0 "
4160 "length recv");
4161 r = recv(fd, (void*)sldns_buffer_current(c->buffer),
4162 sldns_buffer_remaining(c->buffer), MSG_DONTWAIT);
4163 if(r == 0) {
4164 if(c->tcp_req_info)
4165 return tcp_req_info_handle_read_close(c->tcp_req_info);
4166 return 0;
4167 } else if(r == -1) {
4168 goto recv_error;
4169 }
4170 sldns_buffer_skip(c->buffer, r);
4171 if(sldns_buffer_remaining(c->buffer) <= 0) {
4172 tcp_callback_reader(c);
4173 }
4174 return 1;
4175
4176 recv_error_initial:
4177 recv_initial = 1;
4178 recv_error:
4179 #ifndef USE_WINSOCK
4180 if(errno == EINTR || errno == EAGAIN)
4181 return 1;
4182 #ifdef ECONNRESET
4183 if(errno == ECONNRESET && verbosity < 2)
4184 return 0; /* silence reset by peer */
4185 #endif
4186 if(recv_initial) {
4187 #ifdef ECONNREFUSED
4188 if(errno == ECONNREFUSED && verbosity < 2)
4189 return 0; /* silence reset by peer */
4190 #endif
4191 #ifdef ENETUNREACH
4192 if(errno == ENETUNREACH && verbosity < 2)
4193 return 0; /* silence it */
4194 #endif
4195 #ifdef EHOSTDOWN
4196 if(errno == EHOSTDOWN && verbosity < 2)
4197 return 0; /* silence it */
4198 #endif
4199 #ifdef EHOSTUNREACH
4200 if(errno == EHOSTUNREACH && verbosity < 2)
4201 return 0; /* silence it */
4202 #endif
4203 #ifdef ENETDOWN
4204 if(errno == ENETDOWN && verbosity < 2)
4205 return 0; /* silence it */
4206 #endif
4207 #ifdef EACCES
4208 if(errno == EACCES && verbosity < 2)
4209 return 0; /* silence it */
4210 #endif
4211 #ifdef ENOTCONN
4212 if(errno == ENOTCONN) {
4213 log_err_addr("read (in tcp initial) failed and this "
4214 "could be because TCP Fast Open is "
4215 "enabled [--disable-tfo-client "
4216 "--disable-tfo-server] but does not "
4217 "work", sock_strerror(errno),
4218 &c->repinfo.remote_addr,
4219 c->repinfo.remote_addrlen);
4220 return 0;
4221 }
4222 #endif
4223 }
4224 #else /* USE_WINSOCK */
4225 if(recv_initial) {
4226 if(WSAGetLastError() == WSAECONNREFUSED && verbosity < 2)
4227 return 0;
4228 if(WSAGetLastError() == WSAEHOSTDOWN && verbosity < 2)
4229 return 0;
4230 if(WSAGetLastError() == WSAEHOSTUNREACH && verbosity < 2)
4231 return 0;
4232 if(WSAGetLastError() == WSAENETDOWN && verbosity < 2)
4233 return 0;
4234 if(WSAGetLastError() == WSAENETUNREACH && verbosity < 2)
4235 return 0;
4236 }
4237 if(WSAGetLastError() == WSAECONNRESET)
4238 return 0;
4239 if(WSAGetLastError() == WSAEINPROGRESS)
4240 return 1;
4241 if(WSAGetLastError() == WSAEWOULDBLOCK) {
4242 ub_winsock_tcp_wouldblock(c->ev->ev,
4243 UB_EV_READ);
4244 return 1;
4245 }
4246 #endif
4247 log_err_addr((recv_initial?"read (in tcp initial)":"read (in tcp)"),
4248 sock_strerror(errno), &c->repinfo.remote_addr,
4249 c->repinfo.remote_addrlen);
4250 return 0;
4251 }
4252
4253 /**
4254 * Handle tcp writing callback.
4255 * @param fd: file descriptor of socket.
4256 * @param c: comm point to write buffer out of.
4257 * @return: 0 on error
4258 */
4259 static int
4260 comm_point_tcp_handle_write(int fd, struct comm_point* c)
4261 {
4262 ssize_t r;
4263 struct sldns_buffer *buffer;
4264 log_assert(c->type == comm_tcp);
4265 #ifdef USE_DNSCRYPT
4266 buffer = c->dnscrypt_buffer;
4267 #else
4268 buffer = c->buffer;
4269 #endif
4270 if(c->tcp_is_reading && !c->ssl && !c->tcp_write_and_read)
4271 return 0;
4272 log_assert(fd != -1);
4273 if(((!c->tcp_write_and_read && c->tcp_byte_count == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == 0)) && c->tcp_check_nb_connect) {
4274 /* check for pending error from nonblocking connect */
4275 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
4276 int error = 0;
4277 socklen_t len = (socklen_t)sizeof(error);
4278 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error,
4279 &len) < 0){
4280 #ifndef USE_WINSOCK
4281 error = errno; /* on solaris errno is error */
4282 #else /* USE_WINSOCK */
4283 error = WSAGetLastError();
4284 #endif
4285 }
4286 #ifndef USE_WINSOCK
4287 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
4288 if(error == EINPROGRESS || error == EWOULDBLOCK)
4289 return 1; /* try again later */
4290 else
4291 #endif
4292 if(error != 0 && verbosity < 2)
4293 return 0; /* silence lots of chatter in the logs */
4294 else if(error != 0) {
4295 log_err_addr("tcp connect", strerror(error),
4296 &c->repinfo.remote_addr,
4297 c->repinfo.remote_addrlen);
4298 #else /* USE_WINSOCK */
4299 /* examine error */
4300 if(error == WSAEINPROGRESS)
4301 return 1;
4302 else if(error == WSAEWOULDBLOCK) {
4303 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
4304 return 1;
4305 } else if(error != 0 && verbosity < 2)
4306 return 0;
4307 else if(error != 0) {
4308 log_err_addr("tcp connect", wsa_strerror(error),
4309 &c->repinfo.remote_addr,
4310 c->repinfo.remote_addrlen);
4311 #endif /* USE_WINSOCK */
4312 return 0;
4313 }
4314 }
4315 if(c->ssl)
4316 return ssl_handle_it(c, 1);
4317
4318 #ifdef USE_MSG_FASTOPEN
4319 /* Only try this on first use of a connection that uses tfo,
4320 otherwise fall through to normal write */
4321 /* Also, TFO support on WINDOWS not implemented at the moment */
4322 if(c->tcp_do_fastopen == 1) {
4323 /* this form of sendmsg() does both a connect() and send() so need to
4324 look for various flavours of error*/
4325 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer));
4326 struct msghdr msg;
4327 struct iovec iov[2];
4328 c->tcp_do_fastopen = 0;
4329 memset(&msg, 0, sizeof(msg));
4330 if(c->tcp_write_and_read) {
4331 iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count;
4332 iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count;
4333 iov[1].iov_base = c->tcp_write_pkt;
4334 iov[1].iov_len = c->tcp_write_pkt_len;
4335 } else {
4336 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
4337 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
4338 iov[1].iov_base = sldns_buffer_begin(buffer);
4339 iov[1].iov_len = sldns_buffer_limit(buffer);
4340 }
4341 log_assert(iov[0].iov_len > 0);
4342 msg.msg_name = &c->repinfo.remote_addr;
4343 msg.msg_namelen = c->repinfo.remote_addrlen;
4344 msg.msg_iov = iov;
4345 msg.msg_iovlen = 2;
4346 r = sendmsg(fd, &msg, MSG_FASTOPEN);
4347 if (r == -1) {
4348 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
4349 /* Handshake is underway, maybe because no TFO cookie available.
4350 Come back to write the message*/
4351 if(errno == EINPROGRESS || errno == EWOULDBLOCK)
4352 return 1;
4353 #endif
4354 if(errno == EINTR || errno == EAGAIN)
4355 return 1;
4356 /* Not handling EISCONN here as shouldn't ever hit that case.*/
4357 if(errno != EPIPE
4358 #ifdef EOPNOTSUPP
4359 /* if /proc/sys/net/ipv4/tcp_fastopen is
4360 * disabled on Linux, sendmsg may return
4361 * 'Operation not supported', if so
4362 * fallthrough to ordinary connect. */
4363 && errno != EOPNOTSUPP
4364 #endif
4365 && errno != 0) {
4366 if(verbosity < 2)
4367 return 0; /* silence lots of chatter in the logs */
4368 log_err_addr("tcp sendmsg", strerror(errno),
4369 &c->repinfo.remote_addr,
4370 c->repinfo.remote_addrlen);
4371 return 0;
4372 }
4373 verbose(VERB_ALGO, "tcp sendmsg for fastopen failed (with %s), try normal connect", strerror(errno));
4374 /* fallthrough to nonFASTOPEN
4375 * (MSG_FASTOPEN on Linux 3 produces EPIPE)
4376 * we need to perform connect() */
4377 if(connect(fd, (struct sockaddr *)&c->repinfo.remote_addr,
4378 c->repinfo.remote_addrlen) == -1) {
4379 #ifdef EINPROGRESS
4380 if(errno == EINPROGRESS)
4381 return 1; /* wait until connect done*/
4382 #endif
4383 #ifdef USE_WINSOCK
4384 if(WSAGetLastError() == WSAEINPROGRESS ||
4385 WSAGetLastError() == WSAEWOULDBLOCK)
4386 return 1; /* wait until connect done*/
4387 #endif
4388 if(tcp_connect_errno_needs_log(
4389 (struct sockaddr *)&c->repinfo.remote_addr,
4390 c->repinfo.remote_addrlen)) {
4391 log_err_addr("outgoing tcp: connect after EPIPE for fastopen",
4392 strerror(errno),
4393 &c->repinfo.remote_addr,
4394 c->repinfo.remote_addrlen);
4395 }
4396 return 0;
4397 }
4398
4399 } else {
4400 if(c->tcp_write_and_read) {
4401 c->tcp_write_byte_count += r;
4402 if(c->tcp_write_byte_count < sizeof(uint16_t))
4403 return 1;
4404 } else {
4405 c->tcp_byte_count += r;
4406 if(c->tcp_byte_count < sizeof(uint16_t))
4407 return 1;
4408 sldns_buffer_set_position(buffer, c->tcp_byte_count -
4409 sizeof(uint16_t));
4410 }
4411 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
4412 tcp_callback_writer(c);
4413 return 1;
4414 }
4415 }
4416 }
4417 #endif /* USE_MSG_FASTOPEN */
4418
4419 if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) {
4420 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer));
4421 #ifdef HAVE_WRITEV
4422 struct iovec iov[2];
4423 if(c->tcp_write_and_read) {
4424 iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count;
4425 iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count;
4426 iov[1].iov_base = c->tcp_write_pkt;
4427 iov[1].iov_len = c->tcp_write_pkt_len;
4428 } else {
4429 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
4430 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
4431 iov[1].iov_base = sldns_buffer_begin(buffer);
4432 iov[1].iov_len = sldns_buffer_limit(buffer);
4433 }
4434 log_assert(iov[0].iov_len > 0);
4435 r = writev(fd, iov, 2);
4436 #else /* HAVE_WRITEV */
4437 if(c->tcp_write_and_read) {
4438 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_write_byte_count),
4439 sizeof(uint16_t)-c->tcp_write_byte_count, 0);
4440 } else {
4441 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
4442 sizeof(uint16_t)-c->tcp_byte_count, 0);
4443 }
4444 #endif /* HAVE_WRITEV */
4445 if(r == -1) {
4446 #ifndef USE_WINSOCK
4447 # ifdef EPIPE
4448 if(errno == EPIPE && verbosity < 2)
4449 return 0; /* silence 'broken pipe' */
4450 #endif
4451 if(errno == EINTR || errno == EAGAIN)
4452 return 1;
4453 #ifdef ECONNRESET
4454 if(errno == ECONNRESET && verbosity < 2)
4455 return 0; /* silence reset by peer */
4456 #endif
4457 # ifdef HAVE_WRITEV
4458 log_err_addr("tcp writev", strerror(errno),
4459 &c->repinfo.remote_addr,
4460 c->repinfo.remote_addrlen);
4461 # else /* HAVE_WRITEV */
4462 log_err_addr("tcp send s", strerror(errno),
4463 &c->repinfo.remote_addr,
4464 c->repinfo.remote_addrlen);
4465 # endif /* HAVE_WRITEV */
4466 #else
4467 if(WSAGetLastError() == WSAENOTCONN)
4468 return 1;
4469 if(WSAGetLastError() == WSAEINPROGRESS)
4470 return 1;
4471 if(WSAGetLastError() == WSAEWOULDBLOCK) {
4472 ub_winsock_tcp_wouldblock(c->ev->ev,
4473 UB_EV_WRITE);
4474 return 1;
4475 }
4476 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
4477 return 0; /* silence reset by peer */
4478 log_err_addr("tcp send s",
4479 wsa_strerror(WSAGetLastError()),
4480 &c->repinfo.remote_addr,
4481 c->repinfo.remote_addrlen);
4482 #endif
4483 return 0;
4484 }
4485 if(c->tcp_write_and_read) {
4486 c->tcp_write_byte_count += r;
4487 if(c->tcp_write_byte_count < sizeof(uint16_t))
4488 return 1;
4489 } else {
4490 c->tcp_byte_count += r;
4491 if(c->tcp_byte_count < sizeof(uint16_t))
4492 return 1;
4493 sldns_buffer_set_position(buffer, c->tcp_byte_count -
4494 sizeof(uint16_t));
4495 }
4496 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
4497 tcp_callback_writer(c);
4498 return 1;
4499 }
4500 }
4501 log_assert(c->tcp_write_and_read || sldns_buffer_remaining(buffer) > 0);
4502 log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2);
4503 if(c->tcp_write_and_read) {
4504 r = send(fd, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2),
4505 c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count, 0);
4506 } else {
4507 r = send(fd, (void*)sldns_buffer_current(buffer),
4508 sldns_buffer_remaining(buffer), 0);
4509 }
4510 if(r == -1) {
4511 #ifndef USE_WINSOCK
4512 if(errno == EINTR || errno == EAGAIN)
4513 return 1;
4514 #ifdef ECONNRESET
4515 if(errno == ECONNRESET && verbosity < 2)
4516 return 0; /* silence reset by peer */
4517 #endif
4518 #else
4519 if(WSAGetLastError() == WSAEINPROGRESS)
4520 return 1;
4521 if(WSAGetLastError() == WSAEWOULDBLOCK) {
4522 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
4523 return 1;
4524 }
4525 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
4526 return 0; /* silence reset by peer */
4527 #endif
4528 log_err_addr("tcp send r", sock_strerror(errno),
4529 &c->repinfo.remote_addr,
4530 c->repinfo.remote_addrlen);
4531 return 0;
4532 }
4533 if(c->tcp_write_and_read) {
4534 c->tcp_write_byte_count += r;
4535 } else {
4536 sldns_buffer_skip(buffer, r);
4537 }
4538
4539 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
4540 tcp_callback_writer(c);
4541 }
4542
4543 return 1;
4544 }
4545
4546 /** read again to drain buffers when there could be more to read, returns 0
4547 * on failure which means the comm point is closed. */
4548 static int
4549 tcp_req_info_read_again(int fd, struct comm_point* c)
4550 {
4551 while(c->tcp_req_info->read_again) {
4552 int r;
4553 c->tcp_req_info->read_again = 0;
4554 if(c->tcp_is_reading)
4555 r = comm_point_tcp_handle_read(fd, c, 0);
4556 else r = comm_point_tcp_handle_write(fd, c);
4557 if(!r) {
4558 reclaim_tcp_handler(c);
4559 if(!c->tcp_do_close) {
4560 fptr_ok(fptr_whitelist_comm_point(
4561 c->callback));
4562 (void)(*c->callback)(c, c->cb_arg,
4563 NETEVENT_CLOSED, NULL);
4564 }
4565 return 0;
4566 }
4567 }
4568 return 1;
4569 }
4570
4571 /** read again to drain buffers when there could be more to read */
4572 static void
4573 tcp_more_read_again(int fd, struct comm_point* c)
4574 {
4575 /* if the packet is done, but another one could be waiting on
4576 * the connection, the callback signals this, and we try again */
4577 /* this continues until the read routines get EAGAIN or so,
4578 * and thus does not call the callback, and the bool is 0 */
4579 int* moreread = c->tcp_more_read_again;
4580 while(moreread && *moreread) {
4581 *moreread = 0;
4582 if(!comm_point_tcp_handle_read(fd, c, 0)) {
4583 reclaim_tcp_handler(c);
4584 if(!c->tcp_do_close) {
4585 fptr_ok(fptr_whitelist_comm_point(
4586 c->callback));
4587 (void)(*c->callback)(c, c->cb_arg,
4588 NETEVENT_CLOSED, NULL);
4589 }
4590 return;
4591 }
4592 }
4593 }
4594
4595 /** write again to fill up when there could be more to write */
4596 static void
4597 tcp_more_write_again(int fd, struct comm_point* c)
4598 {
4599 /* if the packet is done, but another is waiting to be written,
4600 * the callback signals it and we try again. */
4601 /* this continues until the write routines get EAGAIN or so,
4602 * and thus does not call the callback, and the bool is 0 */
4603 int* morewrite = c->tcp_more_write_again;
4604 while(morewrite && *morewrite) {
4605 *morewrite = 0;
4606 if(!comm_point_tcp_handle_write(fd, c)) {
4607 reclaim_tcp_handler(c);
4608 if(!c->tcp_do_close) {
4609 fptr_ok(fptr_whitelist_comm_point(
4610 c->callback));
4611 (void)(*c->callback)(c, c->cb_arg,
4612 NETEVENT_CLOSED, NULL);
4613 }
4614 return;
4615 }
4616 }
4617 }
4618
4619 void
4620 comm_point_tcp_handle_callback(int fd, short event, void* arg)
4621 {
4622 struct comm_point* c = (struct comm_point*)arg;
4623 log_assert(c->type == comm_tcp);
4624 ub_comm_base_now(c->ev->base);
4625
4626 if(c->fd == -1 || c->fd != fd)
4627 return; /* duplicate event, but commpoint closed. */
4628
4629 #ifdef USE_DNSCRYPT
4630 /* Initialize if this is a dnscrypt socket */
4631 if(c->tcp_parent) {
4632 c->dnscrypt = c->tcp_parent->dnscrypt;
4633 }
4634 if(c->dnscrypt && c->dnscrypt_buffer == c->buffer) {
4635 c->dnscrypt_buffer = sldns_buffer_new(sldns_buffer_capacity(c->buffer));
4636 if(!c->dnscrypt_buffer) {
4637 log_err("Could not allocate dnscrypt buffer");
4638 reclaim_tcp_handler(c);
4639 if(!c->tcp_do_close) {
4640 fptr_ok(fptr_whitelist_comm_point(
4641 c->callback));
4642 (void)(*c->callback)(c, c->cb_arg,
4643 NETEVENT_CLOSED, NULL);
4644 }
4645 return;
4646 }
4647 }
4648 #endif
4649
4650 if((event&UB_EV_TIMEOUT)) {
4651 verbose(VERB_QUERY, "tcp took too long, dropped");
4652 reclaim_tcp_handler(c);
4653 if(!c->tcp_do_close) {
4654 fptr_ok(fptr_whitelist_comm_point(c->callback));
4655 (void)(*c->callback)(c, c->cb_arg,
4656 NETEVENT_TIMEOUT, NULL);
4657 }
4658 return;
4659 }
4660 if((event&UB_EV_READ)
4661 #ifdef USE_MSG_FASTOPEN
4662 && !(c->tcp_do_fastopen && (event&UB_EV_WRITE))
4663 #endif
4664 ) {
4665 int has_tcpq = (c->tcp_req_info != NULL);
4666 int* moreread = c->tcp_more_read_again;
4667 if(!comm_point_tcp_handle_read(fd, c, 0)) {
4668 reclaim_tcp_handler(c);
4669 if(!c->tcp_do_close) {
4670 fptr_ok(fptr_whitelist_comm_point(
4671 c->callback));
4672 (void)(*c->callback)(c, c->cb_arg,
4673 NETEVENT_CLOSED, NULL);
4674 }
4675 return;
4676 }
4677 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) {
4678 if(!tcp_req_info_read_again(fd, c))
4679 return;
4680 }
4681 if(moreread && *moreread)
4682 tcp_more_read_again(fd, c);
4683 return;
4684 }
4685 if((event&UB_EV_WRITE)) {
4686 int has_tcpq = (c->tcp_req_info != NULL);
4687 int* morewrite = c->tcp_more_write_again;
4688 if(!comm_point_tcp_handle_write(fd, c)) {
4689 reclaim_tcp_handler(c);
4690 if(!c->tcp_do_close) {
4691 fptr_ok(fptr_whitelist_comm_point(
4692 c->callback));
4693 (void)(*c->callback)(c, c->cb_arg,
4694 NETEVENT_CLOSED, NULL);
4695 }
4696 return;
4697 }
4698 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) {
4699 if(!tcp_req_info_read_again(fd, c))
4700 return;
4701 }
4702 if(morewrite && *morewrite)
4703 tcp_more_write_again(fd, c);
4704 return;
4705 }
4706 log_err("Ignored event %d for tcphdl.", event);
4707 }
4708
4709 /** Make http handler free for next assignment */
4710 static void
4711 reclaim_http_handler(struct comm_point* c)
4712 {
4713 log_assert(c->type == comm_http);
4714 if(c->ssl) {
4715 #ifdef HAVE_SSL
4716 SSL_shutdown(c->ssl);
4717 SSL_free(c->ssl);
4718 c->ssl = NULL;
4719 #endif
4720 }
4721 comm_point_close(c);
4722 if(c->tcp_parent && !c->is_in_tcp_free) {
4723 /* Should not happen: bad tcp_free state in reclaim_http. */
4724 log_assert(c->tcp_free == NULL);
4725 log_assert(c->tcp_parent->cur_tcp_count > 0);
4726 c->tcp_parent->cur_tcp_count--;
4727 c->tcp_free = c->tcp_parent->tcp_free;
4728 c->tcp_parent->tcp_free = c;
4729 c->is_in_tcp_free = 1;
4730 if(!c->tcp_free) {
4731 /* re-enable listening on accept socket */
4732 comm_point_start_listening(c->tcp_parent, -1, -1);
4733 }
4734 }
4735 }
4736
4737 /** read more data for http (with ssl) */
4738 static int
4739 ssl_http_read_more(struct comm_point* c)
4740 {
4741 #ifdef HAVE_SSL
4742 int r;
4743 log_assert(sldns_buffer_remaining(c->buffer) > 0);
4744 ERR_clear_error();
4745 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
4746 (int)sldns_buffer_remaining(c->buffer));
4747 if(r <= 0) {
4748 int want = SSL_get_error(c->ssl, r);
4749 if(want == SSL_ERROR_ZERO_RETURN) {
4750 return 0; /* shutdown, closed */
4751 } else if(want == SSL_ERROR_WANT_READ) {
4752 return 1; /* read more later */
4753 } else if(want == SSL_ERROR_WANT_WRITE) {
4754 c->ssl_shake_state = comm_ssl_shake_hs_write;
4755 comm_point_listen_for_rw(c, 0, 1);
4756 return 1;
4757 } else if(want == SSL_ERROR_SYSCALL) {
4758 #ifdef ECONNRESET
4759 if(errno == ECONNRESET && verbosity < 2)
4760 return 0; /* silence reset by peer */
4761 #endif
4762 if(errno != 0)
4763 log_err("SSL_read syscall: %s",
4764 strerror(errno));
4765 return 0;
4766 }
4767 log_crypto_err_io("could not SSL_read", want);
4768 return 0;
4769 }
4770 verbose(VERB_ALGO, "ssl http read more skip to %d + %d",
4771 (int)sldns_buffer_position(c->buffer), (int)r);
4772 sldns_buffer_skip(c->buffer, (ssize_t)r);
4773 return 1;
4774 #else
4775 (void)c;
4776 return 0;
4777 #endif /* HAVE_SSL */
4778 }
4779
4780 /** read more data for http */
4781 static int
4782 http_read_more(int fd, struct comm_point* c)
4783 {
4784 ssize_t r;
4785 log_assert(sldns_buffer_remaining(c->buffer) > 0);
4786 r = recv(fd, (void*)sldns_buffer_current(c->buffer),
4787 sldns_buffer_remaining(c->buffer), MSG_DONTWAIT);
4788 if(r == 0) {
4789 return 0;
4790 } else if(r == -1) {
4791 #ifndef USE_WINSOCK
4792 if(errno == EINTR || errno == EAGAIN)
4793 return 1;
4794 #else /* USE_WINSOCK */
4795 if(WSAGetLastError() == WSAECONNRESET)
4796 return 0;
4797 if(WSAGetLastError() == WSAEINPROGRESS)
4798 return 1;
4799 if(WSAGetLastError() == WSAEWOULDBLOCK) {
4800 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
4801 return 1;
4802 }
4803 #endif
4804 log_err_addr("read (in http r)", sock_strerror(errno),
4805 &c->repinfo.remote_addr, c->repinfo.remote_addrlen);
4806 return 0;
4807 }
4808 verbose(VERB_ALGO, "http read more skip to %d + %d",
4809 (int)sldns_buffer_position(c->buffer), (int)r);
4810 sldns_buffer_skip(c->buffer, r);
4811 return 1;
4812 }
4813
4814 /** return true if http header has been read (one line complete) */
4815 static int
4816 http_header_done(sldns_buffer* buf)
4817 {
4818 size_t i;
4819 for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) {
4820 /* there was a \r before the \n, but we ignore that */
4821 if((char)sldns_buffer_read_u8_at(buf, i) == '\n')
4822 return 1;
4823 }
4824 return 0;
4825 }
4826
4827 /** return character string into buffer for header line, moves buffer
4828 * past that line and puts zero terminator into linefeed-newline */
4829 static char*
4830 http_header_line(sldns_buffer* buf)
4831 {
4832 char* result = (char*)sldns_buffer_current(buf);
4833 size_t i;
4834 for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) {
4835 /* terminate the string on the \r */
4836 if((char)sldns_buffer_read_u8_at(buf, i) == '\r')
4837 sldns_buffer_write_u8_at(buf, i, 0);
4838 /* terminate on the \n and skip past the it and done */
4839 if((char)sldns_buffer_read_u8_at(buf, i) == '\n') {
4840 sldns_buffer_write_u8_at(buf, i, 0);
4841 sldns_buffer_set_position(buf, i+1);
4842 return result;
4843 }
4844 }
4845 return NULL;
4846 }
4847
4848 /** move unread buffer to start and clear rest for putting the rest into it */
4849 static void
4850 http_moveover_buffer(sldns_buffer* buf)
4851 {
4852 size_t pos = sldns_buffer_position(buf);
4853 size_t len = sldns_buffer_remaining(buf);
4854 sldns_buffer_clear(buf);
4855 memmove(sldns_buffer_begin(buf), sldns_buffer_at(buf, pos), len);
4856 sldns_buffer_set_position(buf, len);
4857 }
4858
4859 /** a http header is complete, process it */
4860 static int
4861 http_process_initial_header(struct comm_point* c)
4862 {
4863 char* line = http_header_line(c->buffer);
4864 if(!line) return 1;
4865 verbose(VERB_ALGO, "http header: %s", line);
4866 if(strncasecmp(line, "HTTP/1.1 ", 9) == 0) {
4867 /* check returncode */
4868 if(line[9] != '2') {
4869 verbose(VERB_ALGO, "http bad status %s", line+9);
4870 return 0;
4871 }
4872 } else if(strncasecmp(line, "Content-Length: ", 16) == 0) {
4873 if(!c->http_is_chunked) {
4874 char* end = NULL;
4875 long long cl;
4876 errno = 0;
4877 cl = strtoll(line+16, &end, 10);
4878 if(end == line+16 || errno != 0 || cl < 0) {
4879 verbose(VERB_ALGO, "http invalid Content-Length: " ARG_LL "d", cl);
4880 return 0; /* reject */
4881 }
4882 c->tcp_byte_count = (size_t)cl;
4883 }
4884 } else if(strncasecmp(line, "Transfer-Encoding: chunked", 19+7) == 0) {
4885 c->tcp_byte_count = 0;
4886 c->http_is_chunked = 1;
4887 } else if(line[0] == 0) {
4888 /* end of initial headers */
4889 c->http_in_headers = 0;
4890 if(c->http_is_chunked)
4891 c->http_in_chunk_headers = 1;
4892 /* remove header text from front of buffer
4893 * the buffer is going to be used to return the data segment
4894 * itself and we don't want the header to get returned
4895 * prepended with it */
4896 http_moveover_buffer(c->buffer);
4897 sldns_buffer_flip(c->buffer);
4898 return 1;
4899 }
4900 /* ignore other headers */
4901 return 1;
4902 }
4903
4904 /** a chunk header is complete, process it, return 0=fail, 1=continue next
4905 * header line, 2=done with chunked transfer*/
4906 static int
4907 http_process_chunk_header(struct comm_point* c)
4908 {
4909 char* line = http_header_line(c->buffer);
4910 if(!line) return 1;
4911 if(c->http_in_chunk_headers == 3) {
4912 verbose(VERB_ALGO, "http chunk trailer: %s", line);
4913 /* are we done ? */
4914 if(line[0] == 0 && c->tcp_byte_count == 0) {
4915 /* callback of http reader when NETEVENT_DONE,
4916 * end of data, with no data in buffer */
4917 sldns_buffer_set_position(c->buffer, 0);
4918 sldns_buffer_set_limit(c->buffer, 0);
4919 fptr_ok(fptr_whitelist_comm_point(c->callback));
4920 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL);
4921 /* return that we are done */
4922 return 2;
4923 }
4924 if(line[0] == 0) {
4925 /* continue with header of the next chunk */
4926 c->http_in_chunk_headers = 1;
4927 /* remove header text from front of buffer */
4928 http_moveover_buffer(c->buffer);
4929 sldns_buffer_flip(c->buffer);
4930 return 1;
4931 }
4932 /* ignore further trail headers */
4933 return 1;
4934 }
4935 verbose(VERB_ALGO, "http chunk header: %s", line);
4936 if(c->http_in_chunk_headers == 1) {
4937 /* read chunked start line */
4938 char* end = NULL;
4939 long chunk_sz;
4940 errno = 0;
4941 chunk_sz = strtol(line, &end, 16);
4942 if(end == line || errno != 0 || chunk_sz < 0) {
4943 verbose(VERB_ALGO, "http invalid chunk size: %ld",
4944 chunk_sz);
4945 return 0;
4946 }
4947 c->tcp_byte_count = (size_t)chunk_sz;
4948 c->http_in_chunk_headers = 0;
4949 /* remove header text from front of buffer */
4950 http_moveover_buffer(c->buffer);
4951 sldns_buffer_flip(c->buffer);
4952 if(c->tcp_byte_count == 0) {
4953 /* done with chunks, process chunk_trailer lines */
4954 c->http_in_chunk_headers = 3;
4955 }
4956 return 1;
4957 }
4958 /* ignore other headers */
4959 return 1;
4960 }
4961
4962 /** handle nonchunked data segment, 0=fail, 1=wait */
4963 static int
4964 http_nonchunk_segment(struct comm_point* c)
4965 {
4966 /* c->buffer at position..limit has new data we read in.
4967 * the buffer itself is full of nonchunked data.
4968 * we are looking to read tcp_byte_count more data
4969 * and then the transfer is done. */
4970 size_t remainbufferlen;
4971 size_t got_now = sldns_buffer_limit(c->buffer);
4972 if(c->tcp_byte_count <= got_now) {
4973 /* done, this is the last data fragment */
4974 c->http_stored = 0;
4975 sldns_buffer_set_position(c->buffer, 0);
4976 fptr_ok(fptr_whitelist_comm_point(c->callback));
4977 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL);
4978 return 1;
4979 }
4980 /* if we have the buffer space,
4981 * read more data collected into the buffer */
4982 remainbufferlen = sldns_buffer_capacity(c->buffer) -
4983 sldns_buffer_limit(c->buffer);
4984 if(remainbufferlen+got_now >= c->tcp_byte_count ||
4985 remainbufferlen >= (size_t)(c->ssl?16384:2048)) {
4986 size_t total = sldns_buffer_limit(c->buffer);
4987 sldns_buffer_clear(c->buffer);
4988 sldns_buffer_set_position(c->buffer, total);
4989 c->http_stored = total;
4990 /* return and wait to read more */
4991 return 1;
4992 }
4993 /* call callback with this data amount, then
4994 * wait for more */
4995 c->tcp_byte_count -= got_now;
4996 c->http_stored = 0;
4997 sldns_buffer_set_position(c->buffer, 0);
4998 fptr_ok(fptr_whitelist_comm_point(c->callback));
4999 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL);
5000 /* c->callback has to buffer_clear(c->buffer). */
5001 /* return and wait to read more */
5002 return 1;
5003 }
5004
5005 /** handle chunked data segment, return 0=fail, 1=wait, 2=process more */
5006 static int
5007 http_chunked_segment(struct comm_point* c)
5008 {
5009 /* the c->buffer has from position..limit new data we read. */
5010 /* the current chunk has length tcp_byte_count.
5011 * once we read that read more chunk headers.
5012 */
5013 size_t remainbufferlen;
5014 size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored;
5015 verbose(VERB_ALGO, "http_chunked_segment: got now %d, tcpbytcount %d, http_stored %d, buffer pos %d, buffer limit %d", (int)got_now, (int)c->tcp_byte_count, (int)c->http_stored, (int)sldns_buffer_position(c->buffer), (int)sldns_buffer_limit(c->buffer));
5016 if(c->tcp_byte_count <= got_now) {
5017 /* the chunk has completed (with perhaps some extra data
5018 * from next chunk header and next chunk) */
5019 /* save too much info into temp buffer */
5020 size_t fraglen;
5021 struct comm_reply repinfo;
5022 c->http_stored = 0;
5023 sldns_buffer_skip(c->buffer, (ssize_t)c->tcp_byte_count);
5024 sldns_buffer_clear(c->http_temp);
5025 sldns_buffer_write(c->http_temp,
5026 sldns_buffer_current(c->buffer),
5027 sldns_buffer_remaining(c->buffer));
5028 sldns_buffer_flip(c->http_temp);
5029
5030 /* callback with this fragment */
5031 fraglen = sldns_buffer_position(c->buffer);
5032 sldns_buffer_set_position(c->buffer, 0);
5033 sldns_buffer_set_limit(c->buffer, fraglen);
5034 repinfo = c->repinfo;
5035 fptr_ok(fptr_whitelist_comm_point(c->callback));
5036 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &repinfo);
5037 /* c->callback has to buffer_clear(). */
5038
5039 /* is commpoint deleted? */
5040 if(!repinfo.c) {
5041 return 1;
5042 }
5043 /* copy waiting info */
5044 sldns_buffer_clear(c->buffer);
5045 sldns_buffer_write(c->buffer,
5046 sldns_buffer_begin(c->http_temp),
5047 sldns_buffer_remaining(c->http_temp));
5048 sldns_buffer_flip(c->buffer);
5049 /* process end of chunk trailer header lines, until
5050 * an empty line */
5051 c->http_in_chunk_headers = 3;
5052 /* process more data in buffer (if any) */
5053 return 2;
5054 }
5055 c->tcp_byte_count -= got_now;
5056
5057 /* if we have the buffer space,
5058 * read more data collected into the buffer */
5059 remainbufferlen = sldns_buffer_capacity(c->buffer) -
5060 sldns_buffer_limit(c->buffer);
5061 if(remainbufferlen >= c->tcp_byte_count ||
5062 remainbufferlen >= 2048) {
5063 size_t total = sldns_buffer_limit(c->buffer);
5064 sldns_buffer_clear(c->buffer);
5065 sldns_buffer_set_position(c->buffer, total);
5066 c->http_stored = total;
5067 /* return and wait to read more */
5068 return 1;
5069 }
5070
5071 /* callback of http reader for a new part of the data */
5072 c->http_stored = 0;
5073 sldns_buffer_set_position(c->buffer, 0);
5074 fptr_ok(fptr_whitelist_comm_point(c->callback));
5075 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL);
5076 /* c->callback has to buffer_clear(c->buffer). */
5077 /* return and wait to read more */
5078 return 1;
5079 }
5080
5081 #ifdef HAVE_NGHTTP2
5082 /** Create new http2 session. Called when creating handling comm point. */
5083 static struct http2_session* http2_session_create(struct comm_point* c)
5084 {
5085 struct http2_session* session = calloc(1, sizeof(*session));
5086 if(!session) {
5087 log_err("malloc failure while creating http2 session");
5088 return NULL;
5089 }
5090 session->c = c;
5091
5092 return session;
5093 }
5094 #endif
5095
5096 /** Delete http2 session. After closing connection or on error */
5097 static void http2_session_delete(struct http2_session* h2_session)
5098 {
5099 #ifdef HAVE_NGHTTP2
5100 if(h2_session->callbacks)
5101 nghttp2_session_callbacks_del(h2_session->callbacks);
5102 free(h2_session);
5103 #else
5104 (void)h2_session;
5105 #endif
5106 }
5107
5108 #ifdef HAVE_NGHTTP2
5109 struct http2_stream* http2_stream_create(int32_t stream_id)
5110 {
5111 struct http2_stream* h2_stream = calloc(1, sizeof(*h2_stream));
5112 if(!h2_stream) {
5113 log_err("malloc failure while creating http2 stream");
5114 return NULL;
5115 }
5116 h2_stream->stream_id = stream_id;
5117 return h2_stream;
5118 }
5119 #endif
5120
5121 void http2_stream_add_meshstate(struct http2_stream* h2_stream,
5122 struct mesh_area* mesh, struct mesh_state* m)
5123 {
5124 h2_stream->mesh = mesh;
5125 h2_stream->mesh_state = m;
5126 }
5127
5128 void http2_stream_remove_mesh_state(struct http2_stream* h2_stream)
5129 {
5130 if(!h2_stream)
5131 return;
5132 h2_stream->mesh_state = NULL;
5133 }
5134
5135 #ifdef HAVE_NGHTTP2
5136 void http2_session_add_stream(struct http2_session* h2_session,
5137 struct http2_stream* h2_stream)
5138 {
5139 if(h2_session->first_stream)
5140 h2_session->first_stream->prev = h2_stream;
5141 h2_stream->next = h2_session->first_stream;
5142 h2_session->first_stream = h2_stream;
5143 }
5144
5145 /** remove stream from session linked list. After stream close callback or
5146 * closing connection */
5147 static void http2_session_remove_stream(struct http2_session* h2_session,
5148 struct http2_stream* h2_stream)
5149 {
5150 if(h2_stream->prev)
5151 h2_stream->prev->next = h2_stream->next;
5152 else
5153 h2_session->first_stream = h2_stream->next;
5154 if(h2_stream->next)
5155 h2_stream->next->prev = h2_stream->prev;
5156
5157 }
5158
5159 int http2_stream_close_cb(nghttp2_session* ATTR_UNUSED(session),
5160 int32_t stream_id, uint32_t ATTR_UNUSED(error_code), void* cb_arg)
5161 {
5162 struct http2_stream* h2_stream;
5163 struct http2_session* h2_session = (struct http2_session*)cb_arg;
5164 if(!(h2_stream = nghttp2_session_get_stream_user_data(
5165 h2_session->session, stream_id))) {
5166 return 0;
5167 }
5168 http2_session_remove_stream(h2_session, h2_stream);
5169 http2_stream_delete(h2_session, h2_stream);
5170 return 0;
5171 }
5172
5173 ssize_t http2_recv_cb(nghttp2_session* ATTR_UNUSED(session), uint8_t* buf,
5174 size_t len, int ATTR_UNUSED(flags), void* cb_arg)
5175 {
5176 struct http2_session* h2_session = (struct http2_session*)cb_arg;
5177 ssize_t ret;
5178
5179 log_assert(h2_session->c->type == comm_http);
5180 log_assert(h2_session->c->h2_session);
5181 if(++h2_session->reads_count > h2_session->c->http2_max_streams) {
5182 /* We are somewhat arbitrarily capping the amount of
5183 * consecutive reads on the HTTP2 session to the number of max
5184 * allowed streams.
5185 * When we reach the cap, error out with NGHTTP2_ERR_WOULDBLOCK
5186 * to signal nghttp2_session_recv() to stop reading for now. */
5187 h2_session->reads_count = 0;
5188 return NGHTTP2_ERR_WOULDBLOCK;
5189 }
5190
5191 #ifdef HAVE_SSL
5192 if(h2_session->c->ssl) {
5193 int r;
5194 ERR_clear_error();
5195 r = SSL_read(h2_session->c->ssl, buf, len);
5196 if(r <= 0) {
5197 int want = SSL_get_error(h2_session->c->ssl, r);
5198 if(want == SSL_ERROR_ZERO_RETURN) {
5199 return NGHTTP2_ERR_EOF;
5200 } else if(want == SSL_ERROR_WANT_READ) {
5201 return NGHTTP2_ERR_WOULDBLOCK;
5202 } else if(want == SSL_ERROR_WANT_WRITE) {
5203 h2_session->c->ssl_shake_state = comm_ssl_shake_hs_write;
5204 comm_point_listen_for_rw(h2_session->c, 0, 1);
5205 return NGHTTP2_ERR_WOULDBLOCK;
5206 } else if(want == SSL_ERROR_SYSCALL) {
5207 #ifdef ECONNRESET
5208 if(errno == ECONNRESET && verbosity < 2)
5209 return NGHTTP2_ERR_CALLBACK_FAILURE;
5210 #endif
5211 if(errno != 0)
5212 log_err("SSL_read syscall: %s",
5213 strerror(errno));
5214 return NGHTTP2_ERR_CALLBACK_FAILURE;
5215 }
5216 log_crypto_err_io("could not SSL_read", want);
5217 return NGHTTP2_ERR_CALLBACK_FAILURE;
5218 }
5219 return r;
5220 }
5221 #endif /* HAVE_SSL */
5222
5223 ret = recv(h2_session->c->fd, (void*)buf, len, MSG_DONTWAIT);
5224 if(ret == 0) {
5225 return NGHTTP2_ERR_EOF;
5226 } else if(ret < 0) {
5227 #ifndef USE_WINSOCK
5228 if(errno == EINTR || errno == EAGAIN)
5229 return NGHTTP2_ERR_WOULDBLOCK;
5230 #ifdef ECONNRESET
5231 if(errno == ECONNRESET && verbosity < 2)
5232 return NGHTTP2_ERR_CALLBACK_FAILURE;
5233 #endif
5234 log_err_addr("could not http2 recv: %s", strerror(errno),
5235 &h2_session->c->repinfo.remote_addr,
5236 h2_session->c->repinfo.remote_addrlen);
5237 #else /* USE_WINSOCK */
5238 if(WSAGetLastError() == WSAECONNRESET)
5239 return NGHTTP2_ERR_CALLBACK_FAILURE;
5240 if(WSAGetLastError() == WSAEINPROGRESS)
5241 return NGHTTP2_ERR_WOULDBLOCK;
5242 if(WSAGetLastError() == WSAEWOULDBLOCK) {
5243 ub_winsock_tcp_wouldblock(h2_session->c->ev->ev,
5244 UB_EV_READ);
5245 return NGHTTP2_ERR_WOULDBLOCK;
5246 }
5247 log_err_addr("could not http2 recv: %s",
5248 wsa_strerror(WSAGetLastError()),
5249 &h2_session->c->repinfo.remote_addr,
5250 h2_session->c->repinfo.remote_addrlen);
5251 #endif
5252 return NGHTTP2_ERR_CALLBACK_FAILURE;
5253 }
5254 return ret;
5255 }
5256 #endif /* HAVE_NGHTTP2 */
5257
5258 /** Handle http2 read */
5259 static int
5260 comm_point_http2_handle_read(int ATTR_UNUSED(fd), struct comm_point* c)
5261 {
5262 #ifdef HAVE_NGHTTP2
5263 int ret;
5264 log_assert(c->h2_session);
5265
5266 /* reading until recv cb returns NGHTTP2_ERR_WOULDBLOCK */
5267 ret = nghttp2_session_recv(c->h2_session->session);
5268 if(ret) {
5269 if(ret != NGHTTP2_ERR_EOF &&
5270 ret != NGHTTP2_ERR_CALLBACK_FAILURE) {
5271 char a[256];
5272 addr_to_str(&c->repinfo.remote_addr,
5273 c->repinfo.remote_addrlen, a, sizeof(a));
5274 verbose(VERB_QUERY, "http2: session_recv from %s failed, "
5275 "error: %s", a, nghttp2_strerror(ret));
5276 }
5277 return 0;
5278 }
5279 if(nghttp2_session_want_write(c->h2_session->session)) {
5280 c->tcp_is_reading = 0;
5281 comm_point_stop_listening(c);
5282 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c));
5283 } else if(!nghttp2_session_want_read(c->h2_session->session))
5284 return 0; /* connection can be closed */
5285 return 1;
5286 #else
5287 (void)c;
5288 return 0;
5289 #endif
5290 }
5291
5292 /**
5293 * Handle http reading callback.
5294 * @param fd: file descriptor of socket.
5295 * @param c: comm point to read from into buffer.
5296 * @return: 0 on error
5297 */
5298 static int
5299 comm_point_http_handle_read(int fd, struct comm_point* c)
5300 {
5301 log_assert(c->type == comm_http);
5302 log_assert(fd != -1);
5303
5304 /* if we are in ssl handshake, handle SSL handshake */
5305 #ifdef HAVE_SSL
5306 if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) {
5307 if(!ssl_handshake(c))
5308 return 0;
5309 if(c->ssl_shake_state != comm_ssl_shake_none)
5310 return 1;
5311 }
5312 #endif /* HAVE_SSL */
5313
5314 if(!c->tcp_is_reading)
5315 return 1;
5316
5317 if(c->use_h2) {
5318 return comm_point_http2_handle_read(fd, c);
5319 }
5320
5321 /* http version is <= http/1.1 */
5322
5323 if(c->http_min_version >= http_version_2) {
5324 /* HTTP/2 failed, not allowed to use lower version. */
5325 return 0;
5326 }
5327
5328 /* read more data */
5329 if(c->ssl) {
5330 if(!ssl_http_read_more(c))
5331 return 0;
5332 } else {
5333 if(!http_read_more(fd, c))
5334 return 0;
5335 }
5336
5337 if(c->http_stored >= sldns_buffer_position(c->buffer)) {
5338 /* read did not work but we wanted more data, there is
5339 * no bytes to process now. */
5340 return 1;
5341 }
5342 sldns_buffer_flip(c->buffer);
5343 /* if we are partway in a segment of data, position us at the point
5344 * where we left off previously */
5345 if(c->http_stored < sldns_buffer_limit(c->buffer))
5346 sldns_buffer_set_position(c->buffer, c->http_stored);
5347 else sldns_buffer_set_position(c->buffer, sldns_buffer_limit(c->buffer));
5348
5349 while(sldns_buffer_remaining(c->buffer) > 0) {
5350 /* Handle HTTP/1.x data */
5351 /* if we are reading headers, read more headers */
5352 if(c->http_in_headers || c->http_in_chunk_headers) {
5353 /* if header is done, process the header */
5354 if(!http_header_done(c->buffer)) {
5355 /* copy remaining data to front of buffer
5356 * and set rest for writing into it */
5357 http_moveover_buffer(c->buffer);
5358 /* return and wait to read more */
5359 return 1;
5360 }
5361 if(!c->http_in_chunk_headers) {
5362 /* process initial headers */
5363 if(!http_process_initial_header(c))
5364 return 0;
5365 } else {
5366 /* process chunk headers */
5367 int r = http_process_chunk_header(c);
5368 if(r == 0) return 0;
5369 if(r == 2) return 1; /* done */
5370 /* r == 1, continue */
5371 }
5372 /* see if we have more to process */
5373 continue;
5374 }
5375
5376 if(!c->http_is_chunked) {
5377 /* if we are reading nonchunks, process that*/
5378 return http_nonchunk_segment(c);
5379 } else {
5380 /* if we are reading chunks, read the chunk */
5381 int r = http_chunked_segment(c);
5382 if(r == 0) return 0;
5383 if(r == 1) return 1;
5384 continue;
5385 }
5386 }
5387 /* broke out of the loop; could not process header instead need
5388 * to read more */
5389 /* moveover any remaining data and read more data */
5390 http_moveover_buffer(c->buffer);
5391 /* return and wait to read more */
5392 return 1;
5393 }
5394
5395 /** check pending connect for http */
5396 static int
5397 http_check_connect(int fd, struct comm_point* c)
5398 {
5399 /* check for pending error from nonblocking connect */
5400 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
5401 int error = 0;
5402 socklen_t len = (socklen_t)sizeof(error);
5403 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error,
5404 &len) < 0){
5405 #ifndef USE_WINSOCK
5406 error = errno; /* on solaris errno is error */
5407 #else /* USE_WINSOCK */
5408 error = WSAGetLastError();
5409 #endif
5410 }
5411 #ifndef USE_WINSOCK
5412 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
5413 if(error == EINPROGRESS || error == EWOULDBLOCK)
5414 return 1; /* try again later */
5415 else
5416 #endif
5417 if(error != 0 && verbosity < 2)
5418 return 0; /* silence lots of chatter in the logs */
5419 else if(error != 0) {
5420 log_err_addr("http connect", strerror(error),
5421 &c->repinfo.remote_addr, c->repinfo.remote_addrlen);
5422 #else /* USE_WINSOCK */
5423 /* examine error */
5424 if(error == WSAEINPROGRESS)
5425 return 1;
5426 else if(error == WSAEWOULDBLOCK) {
5427 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
5428 return 1;
5429 } else if(error != 0 && verbosity < 2)
5430 return 0;
5431 else if(error != 0) {
5432 log_err_addr("http connect", wsa_strerror(error),
5433 &c->repinfo.remote_addr, c->repinfo.remote_addrlen);
5434 #endif /* USE_WINSOCK */
5435 return 0;
5436 }
5437 /* keep on processing this socket */
5438 return 2;
5439 }
5440
5441 /** write more data for http (with ssl) */
5442 static int
5443 ssl_http_write_more(struct comm_point* c)
5444 {
5445 #ifdef HAVE_SSL
5446 int r;
5447 log_assert(sldns_buffer_remaining(c->buffer) > 0);
5448 ERR_clear_error();
5449 r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
5450 (int)sldns_buffer_remaining(c->buffer));
5451 if(r <= 0) {
5452 int want = SSL_get_error(c->ssl, r);
5453 if(want == SSL_ERROR_ZERO_RETURN) {
5454 return 0; /* closed */
5455 } else if(want == SSL_ERROR_WANT_READ) {
5456 c->ssl_shake_state = comm_ssl_shake_hs_read;
5457 comm_point_listen_for_rw(c, 1, 0);
5458 return 1; /* wait for read condition */
5459 } else if(want == SSL_ERROR_WANT_WRITE) {
5460 return 1; /* write more later */
5461 } else if(want == SSL_ERROR_SYSCALL) {
5462 #ifdef EPIPE
5463 if(errno == EPIPE && verbosity < 2)
5464 return 0; /* silence 'broken pipe' */
5465 #endif
5466 if(errno != 0)
5467 log_err("SSL_write syscall: %s",
5468 strerror(errno));
5469 return 0;
5470 }
5471 log_crypto_err_io("could not SSL_write", want);
5472 return 0;
5473 }
5474 sldns_buffer_skip(c->buffer, (ssize_t)r);
5475 return 1;
5476 #else
5477 (void)c;
5478 return 0;
5479 #endif /* HAVE_SSL */
5480 }
5481
5482 /** write more data for http */
5483 static int
5484 http_write_more(int fd, struct comm_point* c)
5485 {
5486 ssize_t r;
5487 log_assert(sldns_buffer_remaining(c->buffer) > 0);
5488 r = send(fd, (void*)sldns_buffer_current(c->buffer),
5489 sldns_buffer_remaining(c->buffer), 0);
5490 if(r == -1) {
5491 #ifndef USE_WINSOCK
5492 if(errno == EINTR || errno == EAGAIN)
5493 return 1;
5494 #else
5495 if(WSAGetLastError() == WSAEINPROGRESS)
5496 return 1;
5497 if(WSAGetLastError() == WSAEWOULDBLOCK) {
5498 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
5499 return 1;
5500 }
5501 #endif
5502 log_err_addr("http send r", sock_strerror(errno),
5503 &c->repinfo.remote_addr, c->repinfo.remote_addrlen);
5504 return 0;
5505 }
5506 sldns_buffer_skip(c->buffer, r);
5507 return 1;
5508 }
5509
5510 #ifdef HAVE_NGHTTP2
5511 ssize_t http2_send_cb(nghttp2_session* ATTR_UNUSED(session), const uint8_t* buf,
5512 size_t len, int ATTR_UNUSED(flags), void* cb_arg)
5513 {
5514 ssize_t ret;
5515 struct http2_session* h2_session = (struct http2_session*)cb_arg;
5516 log_assert(h2_session->c->type == comm_http);
5517 log_assert(h2_session->c->h2_session);
5518
5519 #ifdef HAVE_SSL
5520 if(h2_session->c->ssl) {
5521 int r;
5522 ERR_clear_error();
5523 r = SSL_write(h2_session->c->ssl, buf, len);
5524 if(r <= 0) {
5525 int want = SSL_get_error(h2_session->c->ssl, r);
5526 if(want == SSL_ERROR_ZERO_RETURN) {
5527 return NGHTTP2_ERR_CALLBACK_FAILURE;
5528 } else if(want == SSL_ERROR_WANT_READ) {
5529 h2_session->c->ssl_shake_state = comm_ssl_shake_hs_read;
5530 comm_point_listen_for_rw(h2_session->c, 1, 0);
5531 return NGHTTP2_ERR_WOULDBLOCK;
5532 } else if(want == SSL_ERROR_WANT_WRITE) {
5533 return NGHTTP2_ERR_WOULDBLOCK;
5534 } else if(want == SSL_ERROR_SYSCALL) {
5535 #ifdef EPIPE
5536 if(errno == EPIPE && verbosity < 2)
5537 return NGHTTP2_ERR_CALLBACK_FAILURE;
5538 #endif
5539 if(errno != 0)
5540 log_err("SSL_write syscall: %s",
5541 strerror(errno));
5542 return NGHTTP2_ERR_CALLBACK_FAILURE;
5543 }
5544 log_crypto_err_io("could not SSL_write", want);
5545 return NGHTTP2_ERR_CALLBACK_FAILURE;
5546 }
5547 return r;
5548 }
5549 #endif /* HAVE_SSL */
5550
5551 ret = send(h2_session->c->fd, (void*)buf, len, 0);
5552 if(ret == 0) {
5553 return NGHTTP2_ERR_CALLBACK_FAILURE;
5554 } else if(ret < 0) {
5555 #ifndef USE_WINSOCK
5556 if(errno == EINTR || errno == EAGAIN)
5557 return NGHTTP2_ERR_WOULDBLOCK;
5558 #ifdef EPIPE
5559 if(errno == EPIPE && verbosity < 2)
5560 return NGHTTP2_ERR_CALLBACK_FAILURE;
5561 #endif
5562 #ifdef ECONNRESET
5563 if(errno == ECONNRESET && verbosity < 2)
5564 return NGHTTP2_ERR_CALLBACK_FAILURE;
5565 #endif
5566 log_err_addr("could not http2 write: %s", strerror(errno),
5567 &h2_session->c->repinfo.remote_addr,
5568 h2_session->c->repinfo.remote_addrlen);
5569 #else /* USE_WINSOCK */
5570 if(WSAGetLastError() == WSAENOTCONN)
5571 return NGHTTP2_ERR_WOULDBLOCK;
5572 if(WSAGetLastError() == WSAEINPROGRESS)
5573 return NGHTTP2_ERR_WOULDBLOCK;
5574 if(WSAGetLastError() == WSAEWOULDBLOCK) {
5575 ub_winsock_tcp_wouldblock(h2_session->c->ev->ev,
5576 UB_EV_WRITE);
5577 return NGHTTP2_ERR_WOULDBLOCK;
5578 }
5579 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
5580 return NGHTTP2_ERR_CALLBACK_FAILURE;
5581 log_err_addr("could not http2 write: %s",
5582 wsa_strerror(WSAGetLastError()),
5583 &h2_session->c->repinfo.remote_addr,
5584 h2_session->c->repinfo.remote_addrlen);
5585 #endif
5586 return NGHTTP2_ERR_CALLBACK_FAILURE;
5587 }
5588 return ret;
5589 }
5590 #endif /* HAVE_NGHTTP2 */
5591
5592 /** Handle http2 writing */
5593 static int
5594 comm_point_http2_handle_write(int ATTR_UNUSED(fd), struct comm_point* c)
5595 {
5596 #ifdef HAVE_NGHTTP2
5597 int ret;
5598 log_assert(c->h2_session);
5599
5600 ret = nghttp2_session_send(c->h2_session->session);
5601 if(ret) {
5602 verbose(VERB_QUERY, "http2: session_send failed, "
5603 "error: %s", nghttp2_strerror(ret));
5604 return 0;
5605 }
5606
5607 if(nghttp2_session_want_read(c->h2_session->session)) {
5608 c->tcp_is_reading = 1;
5609 comm_point_stop_listening(c);
5610 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c));
5611 } else if(!nghttp2_session_want_write(c->h2_session->session))
5612 return 0; /* connection can be closed */
5613 return 1;
5614 #else
5615 (void)c;
5616 return 0;
5617 #endif
5618 }
5619
5620 /**
5621 * Handle http writing callback.
5622 * @param fd: file descriptor of socket.
5623 * @param c: comm point to write buffer out of.
5624 * @return: 0 on error
5625 */
5626 static int
5627 comm_point_http_handle_write(int fd, struct comm_point* c)
5628 {
5629 log_assert(c->type == comm_http);
5630 log_assert(fd != -1);
5631
5632 /* check pending connect errors, if that fails, we wait for more,
5633 * or we can continue to write contents */
5634 if(c->tcp_check_nb_connect) {
5635 int r = http_check_connect(fd, c);
5636 if(r == 0) return 0;
5637 if(r == 1) return 1;
5638 c->tcp_check_nb_connect = 0;
5639 }
5640 /* if we are in ssl handshake, handle SSL handshake */
5641 #ifdef HAVE_SSL
5642 if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) {
5643 if(!ssl_handshake(c))
5644 return 0;
5645 if(c->ssl_shake_state != comm_ssl_shake_none)
5646 return 1;
5647 }
5648 #endif /* HAVE_SSL */
5649 if(c->tcp_is_reading)
5650 return 1;
5651
5652 if(c->use_h2) {
5653 return comm_point_http2_handle_write(fd, c);
5654 }
5655
5656 /* http version is <= http/1.1 */
5657
5658 if(c->http_min_version >= http_version_2) {
5659 /* HTTP/2 failed, not allowed to use lower version. */
5660 return 0;
5661 }
5662
5663 /* if we are writing, write more */
5664 if(c->ssl) {
5665 if(!ssl_http_write_more(c))
5666 return 0;
5667 } else {
5668 if(!http_write_more(fd, c))
5669 return 0;
5670 }
5671
5672 /* we write a single buffer contents, that can contain
5673 * the http request, and then flip to read the results */
5674 /* see if write is done */
5675 if(sldns_buffer_remaining(c->buffer) == 0) {
5676 sldns_buffer_clear(c->buffer);
5677 if(c->tcp_do_toggle_rw)
5678 c->tcp_is_reading = 1;
5679 c->tcp_byte_count = 0;
5680 /* switch from listening(write) to listening(read) */
5681 comm_point_stop_listening(c);
5682 comm_point_start_listening(c, -1, -1);
5683 }
5684 return 1;
5685 }
5686
5687 void
5688 comm_point_http_handle_callback(int fd, short event, void* arg)
5689 {
5690 struct comm_point* c = (struct comm_point*)arg;
5691 log_assert(c->type == comm_http);
5692 ub_comm_base_now(c->ev->base);
5693
5694 if((event&UB_EV_TIMEOUT)) {
5695 verbose(VERB_QUERY, "http took too long, dropped");
5696 reclaim_http_handler(c);
5697 if(!c->tcp_do_close) {
5698 fptr_ok(fptr_whitelist_comm_point(c->callback));
5699 (void)(*c->callback)(c, c->cb_arg,
5700 NETEVENT_TIMEOUT, NULL);
5701 }
5702 return;
5703 }
5704 if((event&UB_EV_READ)) {
5705 if(!comm_point_http_handle_read(fd, c)) {
5706 reclaim_http_handler(c);
5707 if(!c->tcp_do_close) {
5708 fptr_ok(fptr_whitelist_comm_point(
5709 c->callback));
5710 (void)(*c->callback)(c, c->cb_arg,
5711 NETEVENT_CLOSED, NULL);
5712 }
5713 }
5714 return;
5715 }
5716 if((event&UB_EV_WRITE)) {
5717 if(!comm_point_http_handle_write(fd, c)) {
5718 reclaim_http_handler(c);
5719 if(!c->tcp_do_close) {
5720 fptr_ok(fptr_whitelist_comm_point(
5721 c->callback));
5722 (void)(*c->callback)(c, c->cb_arg,
5723 NETEVENT_CLOSED, NULL);
5724 }
5725 }
5726 return;
5727 }
5728 log_err("Ignored event %d for httphdl.", event);
5729 }
5730
5731 void comm_point_local_handle_callback(int fd, short event, void* arg)
5732 {
5733 struct comm_point* c = (struct comm_point*)arg;
5734 log_assert(c->type == comm_local);
5735 ub_comm_base_now(c->ev->base);
5736
5737 if((event&UB_EV_READ)) {
5738 if(!comm_point_tcp_handle_read(fd, c, 1)) {
5739 fptr_ok(fptr_whitelist_comm_point(c->callback));
5740 (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED,
5741 NULL);
5742 }
5743 return;
5744 }
5745 log_err("Ignored event %d for localhdl.", event);
5746 }
5747
5748 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd),
5749 short event, void* arg)
5750 {
5751 struct comm_point* c = (struct comm_point*)arg;
5752 int err = NETEVENT_NOERROR;
5753 log_assert(c->type == comm_raw);
5754 ub_comm_base_now(c->ev->base);
5755
5756 if((event&UB_EV_TIMEOUT))
5757 err = NETEVENT_TIMEOUT;
5758 fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
5759 (void)(*c->callback)(c, c->cb_arg, err, NULL);
5760 }
5761
5762 struct comm_point*
5763 comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer,
5764 int pp2_enabled, comm_point_callback_type* callback,
5765 void* callback_arg, struct unbound_socket* socket)
5766 {
5767 struct comm_point* c = (struct comm_point*)calloc(1,
5768 sizeof(struct comm_point));
5769 short evbits;
5770 if(!c)
5771 return NULL;
5772 c->ev = (struct internal_event*)calloc(1,
5773 sizeof(struct internal_event));
5774 if(!c->ev) {
5775 free(c);
5776 return NULL;
5777 }
5778 c->ev->base = base;
5779 c->fd = fd;
5780 c->buffer = buffer;
5781 c->timeout = NULL;
5782 c->tcp_is_reading = 0;
5783 c->tcp_byte_count = 0;
5784 c->tcp_parent = NULL;
5785 c->max_tcp_count = 0;
5786 c->cur_tcp_count = 0;
5787 c->tcp_handlers = NULL;
5788 c->tcp_free = NULL;
5789 c->is_in_tcp_free = 0;
5790 c->type = comm_udp;
5791 c->tcp_do_close = 0;
5792 c->do_not_close = 0;
5793 c->tcp_do_toggle_rw = 0;
5794 c->tcp_check_nb_connect = 0;
5795 #ifdef USE_MSG_FASTOPEN
5796 c->tcp_do_fastopen = 0;
5797 #endif
5798 #ifdef USE_DNSCRYPT
5799 c->dnscrypt = 0;
5800 c->dnscrypt_buffer = buffer;
5801 #endif
5802 c->inuse = 0;
5803 c->callback = callback;
5804 c->cb_arg = callback_arg;
5805 c->socket = socket;
5806 c->pp2_enabled = pp2_enabled;
5807 c->pp2_header_state = pp2_header_none;
5808 evbits = UB_EV_READ | UB_EV_PERSIST;
5809 /* ub_event stuff */
5810 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
5811 comm_point_udp_callback, c);
5812 if(c->ev->ev == NULL) {
5813 log_err("could not baseset udp event");
5814 comm_point_delete(c);
5815 return NULL;
5816 }
5817 if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
5818 log_err("could not add udp event");
5819 comm_point_delete(c);
5820 return NULL;
5821 }
5822 c->event_added = 1;
5823 return c;
5824 }
5825
5826 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
5827 struct comm_point*
5828 comm_point_create_udp_ancil(struct comm_base *base, int fd,
5829 sldns_buffer* buffer, int pp2_enabled,
5830 comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket)
5831 {
5832 struct comm_point* c = (struct comm_point*)calloc(1,
5833 sizeof(struct comm_point));
5834 short evbits;
5835 if(!c)
5836 return NULL;
5837 c->ev = (struct internal_event*)calloc(1,
5838 sizeof(struct internal_event));
5839 if(!c->ev) {
5840 free(c);
5841 return NULL;
5842 }
5843 c->ev->base = base;
5844 c->fd = fd;
5845 c->buffer = buffer;
5846 c->timeout = NULL;
5847 c->tcp_is_reading = 0;
5848 c->tcp_byte_count = 0;
5849 c->tcp_parent = NULL;
5850 c->max_tcp_count = 0;
5851 c->cur_tcp_count = 0;
5852 c->tcp_handlers = NULL;
5853 c->tcp_free = NULL;
5854 c->is_in_tcp_free = 0;
5855 c->type = comm_udp;
5856 c->tcp_do_close = 0;
5857 c->do_not_close = 0;
5858 #ifdef USE_DNSCRYPT
5859 c->dnscrypt = 0;
5860 c->dnscrypt_buffer = buffer;
5861 #endif
5862 c->inuse = 0;
5863 c->tcp_do_toggle_rw = 0;
5864 c->tcp_check_nb_connect = 0;
5865 #ifdef USE_MSG_FASTOPEN
5866 c->tcp_do_fastopen = 0;
5867 #endif
5868 c->callback = callback;
5869 c->cb_arg = callback_arg;
5870 c->socket = socket;
5871 c->pp2_enabled = pp2_enabled;
5872 c->pp2_header_state = pp2_header_none;
5873 evbits = UB_EV_READ | UB_EV_PERSIST;
5874 /* ub_event stuff */
5875 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
5876 comm_point_udp_ancil_callback, c);
5877 if(c->ev->ev == NULL) {
5878 log_err("could not baseset udp event");
5879 comm_point_delete(c);
5880 return NULL;
5881 }
5882 if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
5883 log_err("could not add udp event");
5884 comm_point_delete(c);
5885 return NULL;
5886 }
5887 c->event_added = 1;
5888 return c;
5889 }
5890 #endif
5891
5892 struct comm_point*
5893 comm_point_create_doq(struct comm_base *base, int fd, sldns_buffer* buffer,
5894 comm_point_callback_type* callback, void* callback_arg,
5895 struct unbound_socket* socket, struct doq_table* table,
5896 struct ub_randstate* rnd, const void* quic_sslctx,
5897 struct config_file* cfg)
5898 {
5899 #ifdef HAVE_NGTCP2
5900 struct comm_point* c = (struct comm_point*)calloc(1,
5901 sizeof(struct comm_point));
5902 short evbits;
5903 log_assert(table != NULL);
5904 if(!c)
5905 return NULL;
5906 c->ev = (struct internal_event*)calloc(1,
5907 sizeof(struct internal_event));
5908 if(!c->ev) {
5909 free(c);
5910 return NULL;
5911 }
5912 c->ev->base = base;
5913 c->fd = fd;
5914 c->buffer = buffer;
5915 c->timeout = NULL;
5916 c->tcp_is_reading = 0;
5917 c->tcp_byte_count = 0;
5918 c->tcp_parent = NULL;
5919 c->max_tcp_count = 0;
5920 c->cur_tcp_count = 0;
5921 c->tcp_handlers = NULL;
5922 c->tcp_free = NULL;
5923 c->is_in_tcp_free = 0;
5924 c->type = comm_doq;
5925 c->tcp_do_close = 0;
5926 c->do_not_close = 0;
5927 c->tcp_do_toggle_rw = 0;
5928 c->tcp_check_nb_connect = 0;
5929 #ifdef USE_MSG_FASTOPEN
5930 c->tcp_do_fastopen = 0;
5931 #endif
5932 #ifdef USE_DNSCRYPT
5933 c->dnscrypt = 0;
5934 c->dnscrypt_buffer = NULL;
5935 #endif
5936 c->doq_socket = doq_server_socket_create(table, rnd, quic_sslctx, c,
5937 base, cfg);
5938 if(!c->doq_socket) {
5939 log_err("could not create doq comm_point");
5940 comm_point_delete(c);
5941 return NULL;
5942 }
5943 c->inuse = 0;
5944 c->callback = callback;
5945 c->cb_arg = callback_arg;
5946 c->socket = socket;
5947 c->pp2_enabled = 0;
5948 c->pp2_header_state = pp2_header_none;
5949 evbits = UB_EV_READ | UB_EV_PERSIST;
5950 /* ub_event stuff */
5951 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
5952 comm_point_doq_callback, c);
5953 if(c->ev->ev == NULL) {
5954 log_err("could not baseset udp event");
5955 comm_point_delete(c);
5956 return NULL;
5957 }
5958 if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
5959 log_err("could not add udp event");
5960 comm_point_delete(c);
5961 return NULL;
5962 }
5963 c->event_added = 1;
5964 return c;
5965 #else
5966 /* no libngtcp2, so no QUIC support */
5967 (void)base;
5968 (void)buffer;
5969 (void)callback;
5970 (void)callback_arg;
5971 (void)socket;
5972 (void)rnd;
5973 (void)table;
5974 (void)quic_sslctx;
5975 (void)cfg;
5976 sock_close(fd);
5977 return NULL;
5978 #endif /* HAVE_NGTCP2 */
5979 }
5980
5981 static struct comm_point*
5982 comm_point_create_tcp_handler(struct comm_base *base,
5983 struct comm_point* parent, size_t bufsize,
5984 struct sldns_buffer* spoolbuf, comm_point_callback_type* callback,
5985 void* callback_arg, struct unbound_socket* socket)
5986 {
5987 struct comm_point* c = (struct comm_point*)calloc(1,
5988 sizeof(struct comm_point));
5989 short evbits;
5990 if(!c)
5991 return NULL;
5992 c->ev = (struct internal_event*)calloc(1,
5993 sizeof(struct internal_event));
5994 if(!c->ev) {
5995 free(c);
5996 return NULL;
5997 }
5998 c->ev->base = base;
5999 c->fd = -1;
6000 c->buffer = sldns_buffer_new(bufsize);
6001 if(!c->buffer) {
6002 free(c->ev);
6003 free(c);
6004 return NULL;
6005 }
6006 c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
6007 if(!c->timeout) {
6008 sldns_buffer_free(c->buffer);
6009 free(c->ev);
6010 free(c);
6011 return NULL;
6012 }
6013 c->tcp_is_reading = 0;
6014 c->tcp_byte_count = 0;
6015 c->tcp_parent = parent;
6016 c->tcp_timeout_msec = parent->tcp_timeout_msec;
6017 c->tcp_conn_limit = parent->tcp_conn_limit;
6018 c->tcl_addr = NULL;
6019 c->tcp_keepalive = 0;
6020 c->max_tcp_count = 0;
6021 c->cur_tcp_count = 0;
6022 c->tcp_handlers = NULL;
6023 c->tcp_free = NULL;
6024 c->is_in_tcp_free = 0;
6025 c->type = comm_tcp;
6026 c->tcp_do_close = 0;
6027 c->do_not_close = 0;
6028 c->tcp_do_toggle_rw = 1;
6029 c->tcp_check_nb_connect = 0;
6030 #ifdef USE_MSG_FASTOPEN
6031 c->tcp_do_fastopen = 0;
6032 #endif
6033 #ifdef USE_DNSCRYPT
6034 c->dnscrypt = 0;
6035 /* We don't know just yet if this is a dnscrypt channel. Allocation
6036 * will be done when handling the callback. */
6037 c->dnscrypt_buffer = c->buffer;
6038 #endif
6039 c->repinfo.c = c;
6040 c->callback = callback;
6041 c->cb_arg = callback_arg;
6042 c->socket = socket;
6043 c->pp2_enabled = parent->pp2_enabled;
6044 c->pp2_header_state = pp2_header_none;
6045 if(spoolbuf) {
6046 c->tcp_req_info = tcp_req_info_create(spoolbuf);
6047 if(!c->tcp_req_info) {
6048 log_err("could not create tcp commpoint");
6049 sldns_buffer_free(c->buffer);
6050 free(c->timeout);
6051 free(c->ev);
6052 free(c);
6053 return NULL;
6054 }
6055 c->tcp_req_info->cp = c;
6056 c->tcp_do_close = 1;
6057 c->tcp_do_toggle_rw = 0;
6058 }
6059 /* add to parent free list */
6060 c->tcp_free = parent->tcp_free;
6061 parent->tcp_free = c;
6062 c->is_in_tcp_free = 1;
6063 /* ub_event stuff */
6064 evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT;
6065 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6066 comm_point_tcp_handle_callback, c);
6067 if(c->ev->ev == NULL)
6068 {
6069 log_err("could not basetset tcphdl event");
6070 parent->tcp_free = c->tcp_free;
6071 tcp_req_info_delete(c->tcp_req_info);
6072 sldns_buffer_free(c->buffer);
6073 free(c->timeout);
6074 free(c->ev);
6075 free(c);
6076 return NULL;
6077 }
6078 return c;
6079 }
6080
6081 static struct comm_point*
6082 comm_point_create_http_handler(struct comm_base *base,
6083 struct comm_point* parent, size_t bufsize, int harden_large_queries,
6084 uint32_t http_max_streams, char* http_endpoint,
6085 comm_point_callback_type* callback, void* callback_arg,
6086 struct unbound_socket* socket)
6087 {
6088 struct comm_point* c = (struct comm_point*)calloc(1,
6089 sizeof(struct comm_point));
6090 short evbits;
6091 if(!c)
6092 return NULL;
6093 c->ev = (struct internal_event*)calloc(1,
6094 sizeof(struct internal_event));
6095 if(!c->ev) {
6096 free(c);
6097 return NULL;
6098 }
6099 c->ev->base = base;
6100 c->fd = -1;
6101 c->buffer = sldns_buffer_new(bufsize);
6102 if(!c->buffer) {
6103 free(c->ev);
6104 free(c);
6105 return NULL;
6106 }
6107 c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
6108 if(!c->timeout) {
6109 sldns_buffer_free(c->buffer);
6110 free(c->ev);
6111 free(c);
6112 return NULL;
6113 }
6114 c->tcp_is_reading = 0;
6115 c->tcp_byte_count = 0;
6116 c->tcp_parent = parent;
6117 c->tcp_timeout_msec = parent->tcp_timeout_msec;
6118 c->tcp_conn_limit = parent->tcp_conn_limit;
6119 c->tcl_addr = NULL;
6120 c->tcp_keepalive = 0;
6121 c->max_tcp_count = 0;
6122 c->cur_tcp_count = 0;
6123 c->tcp_handlers = NULL;
6124 c->tcp_free = NULL;
6125 c->is_in_tcp_free = 0;
6126 c->type = comm_http;
6127 c->tcp_do_close = 1;
6128 c->do_not_close = 0;
6129 c->tcp_do_toggle_rw = 1; /* will be set to 0 after http2 upgrade */
6130 c->tcp_check_nb_connect = 0;
6131 #ifdef USE_MSG_FASTOPEN
6132 c->tcp_do_fastopen = 0;
6133 #endif
6134 #ifdef USE_DNSCRYPT
6135 c->dnscrypt = 0;
6136 c->dnscrypt_buffer = NULL;
6137 #endif
6138 c->repinfo.c = c;
6139 c->callback = callback;
6140 c->cb_arg = callback_arg;
6141 c->socket = socket;
6142 c->pp2_enabled = 0;
6143 c->pp2_header_state = pp2_header_none;
6144
6145 c->http_min_version = http_version_2;
6146 c->http2_stream_max_qbuffer_size = bufsize;
6147 if(harden_large_queries && bufsize > 512)
6148 c->http2_stream_max_qbuffer_size = 512;
6149 c->http2_max_streams = http_max_streams;
6150 if(!(c->http_endpoint = strdup(http_endpoint))) {
6151 log_err("could not strdup http_endpoint");
6152 sldns_buffer_free(c->buffer);
6153 free(c->timeout);
6154 free(c->ev);
6155 free(c);
6156 return NULL;
6157 }
6158 c->use_h2 = 0;
6159 #ifdef HAVE_NGHTTP2
6160 if(!(c->h2_session = http2_session_create(c))) {
6161 log_err("could not create http2 session");
6162 free(c->http_endpoint);
6163 sldns_buffer_free(c->buffer);
6164 free(c->timeout);
6165 free(c->ev);
6166 free(c);
6167 return NULL;
6168 }
6169 if(!(c->h2_session->callbacks = http2_req_callbacks_create())) {
6170 log_err("could not create http2 callbacks");
6171 http2_session_delete(c->h2_session);
6172 free(c->http_endpoint);
6173 sldns_buffer_free(c->buffer);
6174 free(c->timeout);
6175 free(c->ev);
6176 free(c);
6177 return NULL;
6178 }
6179 #endif
6180
6181 /* add to parent free list */
6182 c->tcp_free = parent->tcp_free;
6183 parent->tcp_free = c;
6184 c->is_in_tcp_free = 1;
6185 /* ub_event stuff */
6186 evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT;
6187 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6188 comm_point_http_handle_callback, c);
6189 if(c->ev->ev == NULL)
6190 {
6191 log_err("could not set http handler event");
6192 parent->tcp_free = c->tcp_free;
6193 http2_session_delete(c->h2_session);
6194 sldns_buffer_free(c->buffer);
6195 free(c->timeout);
6196 free(c->ev);
6197 free(c);
6198 return NULL;
6199 }
6200 return c;
6201 }
6202
6203 struct comm_point*
6204 comm_point_create_tcp(struct comm_base *base, int fd, int num,
6205 int idle_timeout, int harden_large_queries,
6206 uint32_t http_max_streams, char* http_endpoint,
6207 struct tcl_list* tcp_conn_limit, size_t bufsize,
6208 struct sldns_buffer* spoolbuf, enum listen_type port_type,
6209 int pp2_enabled, comm_point_callback_type* callback,
6210 void* callback_arg, struct unbound_socket* socket)
6211 {
6212 struct comm_point* c = (struct comm_point*)calloc(1,
6213 sizeof(struct comm_point));
6214 short evbits;
6215 int i;
6216 /* first allocate the TCP accept listener */
6217 if(!c)
6218 return NULL;
6219 c->ev = (struct internal_event*)calloc(1,
6220 sizeof(struct internal_event));
6221 if(!c->ev) {
6222 free(c);
6223 return NULL;
6224 }
6225 c->ev->base = base;
6226 c->fd = fd;
6227 c->buffer = NULL;
6228 c->timeout = NULL;
6229 c->tcp_is_reading = 0;
6230 c->tcp_byte_count = 0;
6231 c->tcp_timeout_msec = idle_timeout;
6232 c->tcp_conn_limit = tcp_conn_limit;
6233 c->tcl_addr = NULL;
6234 c->tcp_keepalive = 0;
6235 c->tcp_parent = NULL;
6236 c->max_tcp_count = num;
6237 c->cur_tcp_count = 0;
6238 c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
6239 sizeof(struct comm_point*));
6240 if(!c->tcp_handlers) {
6241 free(c->ev);
6242 free(c);
6243 return NULL;
6244 }
6245 c->tcp_free = NULL;
6246 c->is_in_tcp_free = 0;
6247 c->type = comm_tcp_accept;
6248 c->tcp_do_close = 0;
6249 c->do_not_close = 0;
6250 c->tcp_do_toggle_rw = 0;
6251 c->tcp_check_nb_connect = 0;
6252 #ifdef USE_MSG_FASTOPEN
6253 c->tcp_do_fastopen = 0;
6254 #endif
6255 #ifdef USE_DNSCRYPT
6256 c->dnscrypt = 0;
6257 c->dnscrypt_buffer = NULL;
6258 #endif
6259 c->callback = NULL;
6260 c->cb_arg = NULL;
6261 c->socket = socket;
6262 c->pp2_enabled = (port_type==listen_type_http?0:pp2_enabled);
6263 c->pp2_header_state = pp2_header_none;
6264 evbits = UB_EV_READ | UB_EV_PERSIST;
6265 /* ub_event stuff */
6266 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6267 comm_point_tcp_accept_callback, c);
6268 if(c->ev->ev == NULL) {
6269 log_err("could not baseset tcpacc event");
6270 comm_point_delete(c);
6271 return NULL;
6272 }
6273 if (ub_event_add(c->ev->ev, c->timeout) != 0) {
6274 log_err("could not add tcpacc event");
6275 comm_point_delete(c);
6276 return NULL;
6277 }
6278 c->event_added = 1;
6279 /* now prealloc the handlers */
6280 for(i=0; i<num; i++) {
6281 if(port_type == listen_type_tcp ||
6282 port_type == listen_type_ssl ||
6283 port_type == listen_type_tcp_dnscrypt) {
6284 c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
6285 c, bufsize, spoolbuf, callback, callback_arg, socket);
6286 } else if(port_type == listen_type_http) {
6287 c->tcp_handlers[i] = comm_point_create_http_handler(
6288 base, c, bufsize, harden_large_queries,
6289 http_max_streams, http_endpoint,
6290 callback, callback_arg, socket);
6291 }
6292 else {
6293 log_err("could not create tcp handler, unknown listen "
6294 "type");
6295 return NULL;
6296 }
6297 if(!c->tcp_handlers[i]) {
6298 comm_point_delete(c);
6299 return NULL;
6300 }
6301 }
6302
6303 return c;
6304 }
6305
6306 struct comm_point*
6307 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
6308 comm_point_callback_type* callback, void* callback_arg)
6309 {
6310 struct comm_point* c = (struct comm_point*)calloc(1,
6311 sizeof(struct comm_point));
6312 short evbits;
6313 if(!c)
6314 return NULL;
6315 c->ev = (struct internal_event*)calloc(1,
6316 sizeof(struct internal_event));
6317 if(!c->ev) {
6318 free(c);
6319 return NULL;
6320 }
6321 c->ev->base = base;
6322 c->fd = -1;
6323 c->buffer = sldns_buffer_new(bufsize);
6324 if(!c->buffer) {
6325 free(c->ev);
6326 free(c);
6327 return NULL;
6328 }
6329 c->timeout = NULL;
6330 c->tcp_is_reading = 0;
6331 c->tcp_byte_count = 0;
6332 c->tcp_timeout_msec = TCP_QUERY_TIMEOUT;
6333 c->tcp_conn_limit = NULL;
6334 c->tcl_addr = NULL;
6335 c->tcp_keepalive = 0;
6336 c->tcp_parent = NULL;
6337 c->max_tcp_count = 0;
6338 c->cur_tcp_count = 0;
6339 c->tcp_handlers = NULL;
6340 c->tcp_free = NULL;
6341 c->is_in_tcp_free = 0;
6342 c->type = comm_tcp;
6343 c->tcp_do_close = 0;
6344 c->do_not_close = 0;
6345 c->tcp_do_toggle_rw = 1;
6346 c->tcp_check_nb_connect = 1;
6347 #ifdef USE_MSG_FASTOPEN
6348 c->tcp_do_fastopen = 1;
6349 #endif
6350 #ifdef USE_DNSCRYPT
6351 c->dnscrypt = 0;
6352 c->dnscrypt_buffer = c->buffer;
6353 #endif
6354 c->repinfo.c = c;
6355 c->callback = callback;
6356 c->cb_arg = callback_arg;
6357 c->pp2_enabled = 0;
6358 c->pp2_header_state = pp2_header_none;
6359 evbits = UB_EV_PERSIST | UB_EV_WRITE;
6360 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6361 comm_point_tcp_handle_callback, c);
6362 if(c->ev->ev == NULL)
6363 {
6364 log_err("could not baseset tcpout event");
6365 sldns_buffer_free(c->buffer);
6366 free(c->ev);
6367 free(c);
6368 return NULL;
6369 }
6370
6371 return c;
6372 }
6373
6374 struct comm_point*
6375 comm_point_create_http_out(struct comm_base *base, size_t bufsize,
6376 comm_point_callback_type* callback, void* callback_arg,
6377 sldns_buffer* temp)
6378 {
6379 struct comm_point* c = (struct comm_point*)calloc(1,
6380 sizeof(struct comm_point));
6381 short evbits;
6382 if(!c)
6383 return NULL;
6384 c->ev = (struct internal_event*)calloc(1,
6385 sizeof(struct internal_event));
6386 if(!c->ev) {
6387 free(c);
6388 return NULL;
6389 }
6390 c->ev->base = base;
6391 c->fd = -1;
6392 c->buffer = sldns_buffer_new(bufsize);
6393 if(!c->buffer) {
6394 free(c->ev);
6395 free(c);
6396 return NULL;
6397 }
6398 c->timeout = NULL;
6399 c->tcp_is_reading = 0;
6400 c->tcp_byte_count = 0;
6401 c->tcp_parent = NULL;
6402 c->max_tcp_count = 0;
6403 c->cur_tcp_count = 0;
6404 c->tcp_handlers = NULL;
6405 c->tcp_free = NULL;
6406 c->is_in_tcp_free = 0;
6407 c->type = comm_http;
6408 c->tcp_do_close = 0;
6409 c->do_not_close = 0;
6410 c->tcp_do_toggle_rw = 1;
6411 c->tcp_check_nb_connect = 1;
6412 c->http_in_headers = 1;
6413 c->http_in_chunk_headers = 0;
6414 c->http_is_chunked = 0;
6415 c->http_temp = temp;
6416 #ifdef USE_MSG_FASTOPEN
6417 c->tcp_do_fastopen = 1;
6418 #endif
6419 #ifdef USE_DNSCRYPT
6420 c->dnscrypt = 0;
6421 c->dnscrypt_buffer = c->buffer;
6422 #endif
6423 c->repinfo.c = c;
6424 c->callback = callback;
6425 c->cb_arg = callback_arg;
6426 c->pp2_enabled = 0;
6427 c->pp2_header_state = pp2_header_none;
6428 evbits = UB_EV_PERSIST | UB_EV_WRITE;
6429 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6430 comm_point_http_handle_callback, c);
6431 if(c->ev->ev == NULL)
6432 {
6433 log_err("could not baseset tcpout event");
6434 #ifdef HAVE_SSL
6435 SSL_free(c->ssl);
6436 #endif
6437 sldns_buffer_free(c->buffer);
6438 free(c->ev);
6439 free(c);
6440 return NULL;
6441 }
6442
6443 return c;
6444 }
6445
6446 struct comm_point*
6447 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
6448 comm_point_callback_type* callback, void* callback_arg)
6449 {
6450 struct comm_point* c = (struct comm_point*)calloc(1,
6451 sizeof(struct comm_point));
6452 short evbits;
6453 if(!c)
6454 return NULL;
6455 c->ev = (struct internal_event*)calloc(1,
6456 sizeof(struct internal_event));
6457 if(!c->ev) {
6458 free(c);
6459 return NULL;
6460 }
6461 c->ev->base = base;
6462 c->fd = fd;
6463 c->buffer = sldns_buffer_new(bufsize);
6464 if(!c->buffer) {
6465 free(c->ev);
6466 free(c);
6467 return NULL;
6468 }
6469 c->timeout = NULL;
6470 c->tcp_is_reading = 1;
6471 c->tcp_byte_count = 0;
6472 c->tcp_parent = NULL;
6473 c->max_tcp_count = 0;
6474 c->cur_tcp_count = 0;
6475 c->tcp_handlers = NULL;
6476 c->tcp_free = NULL;
6477 c->is_in_tcp_free = 0;
6478 c->type = comm_local;
6479 c->tcp_do_close = 0;
6480 c->do_not_close = 1;
6481 c->tcp_do_toggle_rw = 0;
6482 c->tcp_check_nb_connect = 0;
6483 #ifdef USE_MSG_FASTOPEN
6484 c->tcp_do_fastopen = 0;
6485 #endif
6486 #ifdef USE_DNSCRYPT
6487 c->dnscrypt = 0;
6488 c->dnscrypt_buffer = c->buffer;
6489 #endif
6490 c->callback = callback;
6491 c->cb_arg = callback_arg;
6492 c->pp2_enabled = 0;
6493 c->pp2_header_state = pp2_header_none;
6494 /* ub_event stuff */
6495 evbits = UB_EV_PERSIST | UB_EV_READ;
6496 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6497 comm_point_local_handle_callback, c);
6498 if(c->ev->ev == NULL) {
6499 log_err("could not baseset localhdl event");
6500 free(c->ev);
6501 free(c);
6502 return NULL;
6503 }
6504 if (ub_event_add(c->ev->ev, c->timeout) != 0) {
6505 log_err("could not add localhdl event");
6506 ub_event_free(c->ev->ev);
6507 free(c->ev);
6508 free(c);
6509 return NULL;
6510 }
6511 c->event_added = 1;
6512 return c;
6513 }
6514
6515 struct comm_point*
6516 comm_point_create_raw(struct comm_base* base, int fd, int writing,
6517 comm_point_callback_type* callback, void* callback_arg)
6518 {
6519 struct comm_point* c = (struct comm_point*)calloc(1,
6520 sizeof(struct comm_point));
6521 short evbits;
6522 if(!c)
6523 return NULL;
6524 c->ev = (struct internal_event*)calloc(1,
6525 sizeof(struct internal_event));
6526 if(!c->ev) {
6527 free(c);
6528 return NULL;
6529 }
6530 c->ev->base = base;
6531 c->fd = fd;
6532 c->buffer = NULL;
6533 c->timeout = NULL;
6534 c->tcp_is_reading = 0;
6535 c->tcp_byte_count = 0;
6536 c->tcp_parent = NULL;
6537 c->max_tcp_count = 0;
6538 c->cur_tcp_count = 0;
6539 c->tcp_handlers = NULL;
6540 c->tcp_free = NULL;
6541 c->is_in_tcp_free = 0;
6542 c->type = comm_raw;
6543 c->tcp_do_close = 0;
6544 c->do_not_close = 1;
6545 c->tcp_do_toggle_rw = 0;
6546 c->tcp_check_nb_connect = 0;
6547 #ifdef USE_MSG_FASTOPEN
6548 c->tcp_do_fastopen = 0;
6549 #endif
6550 #ifdef USE_DNSCRYPT
6551 c->dnscrypt = 0;
6552 c->dnscrypt_buffer = c->buffer;
6553 #endif
6554 c->callback = callback;
6555 c->cb_arg = callback_arg;
6556 c->pp2_enabled = 0;
6557 c->pp2_header_state = pp2_header_none;
6558 /* ub_event stuff */
6559 if(writing)
6560 evbits = UB_EV_PERSIST | UB_EV_WRITE;
6561 else evbits = UB_EV_PERSIST | UB_EV_READ;
6562 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6563 comm_point_raw_handle_callback, c);
6564 if(c->ev->ev == NULL) {
6565 log_err("could not baseset rawhdl event");
6566 free(c->ev);
6567 free(c);
6568 return NULL;
6569 }
6570 if (ub_event_add(c->ev->ev, c->timeout) != 0) {
6571 log_err("could not add rawhdl event");
6572 ub_event_free(c->ev->ev);
6573 free(c->ev);
6574 free(c);
6575 return NULL;
6576 }
6577 c->event_added = 1;
6578 return c;
6579 }
6580
6581 void
6582 comm_point_close(struct comm_point* c)
6583 {
6584 if(!c)
6585 return;
6586 if(c->fd != -1) {
6587 verbose(5, "comm_point_close of %d: event_del", c->fd);
6588 if(c->event_added) {
6589 if(ub_event_del(c->ev->ev) != 0) {
6590 log_err("could not event_del on close");
6591 }
6592 c->event_added = 0;
6593 }
6594 }
6595 tcl_close_connection(c->tcl_addr);
6596 if(c->tcp_req_info)
6597 tcp_req_info_clear(c->tcp_req_info);
6598 if(c->h2_session)
6599 http2_session_server_delete(c->h2_session);
6600 /* stop the comm point from reading or writing after it is closed. */
6601 if(c->tcp_more_read_again && *c->tcp_more_read_again)
6602 *c->tcp_more_read_again = 0;
6603 if(c->tcp_more_write_again && *c->tcp_more_write_again)
6604 *c->tcp_more_write_again = 0;
6605
6606 /* close fd after removing from event lists, or epoll.. is messed up */
6607 if(c->fd != -1 && !c->do_not_close) {
6608 #ifdef USE_WINSOCK
6609 if(c->type == comm_tcp || c->type == comm_http) {
6610 /* delete sticky events for the fd, it gets closed */
6611 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
6612 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
6613 }
6614 #endif
6615 verbose(VERB_ALGO, "close fd %d", c->fd);
6616 sock_close(c->fd);
6617 }
6618 c->fd = -1;
6619 }
6620
6621 void
6622 comm_point_delete(struct comm_point* c)
6623 {
6624 if(!c)
6625 return;
6626 if((c->type == comm_tcp || c->type == comm_http) && c->ssl) {
6627 #ifdef HAVE_SSL
6628 SSL_shutdown(c->ssl);
6629 SSL_free(c->ssl);
6630 #endif
6631 }
6632 if(c->type == comm_http && c->http_endpoint) {
6633 free(c->http_endpoint);
6634 c->http_endpoint = NULL;
6635 }
6636 comm_point_close(c);
6637 if(c->tcp_handlers) {
6638 int i;
6639 for(i=0; i<c->max_tcp_count; i++)
6640 comm_point_delete(c->tcp_handlers[i]);
6641 free(c->tcp_handlers);
6642 }
6643 free(c->timeout);
6644 if(c->type == comm_tcp || c->type == comm_local || c->type == comm_http) {
6645 sldns_buffer_free(c->buffer);
6646 #ifdef USE_DNSCRYPT
6647 if(c->dnscrypt && c->dnscrypt_buffer != c->buffer) {
6648 sldns_buffer_free(c->dnscrypt_buffer);
6649 }
6650 #endif
6651 if(c->tcp_req_info) {
6652 tcp_req_info_delete(c->tcp_req_info);
6653 }
6654 if(c->h2_session) {
6655 http2_session_delete(c->h2_session);
6656 }
6657 }
6658 #ifdef HAVE_NGTCP2
6659 if(c->doq_socket)
6660 doq_server_socket_delete(c->doq_socket);
6661 #endif
6662 ub_event_free(c->ev->ev);
6663 free(c->ev);
6664 free(c);
6665 }
6666
6667 #ifdef USE_DNSTAP
6668 static void
6669 send_reply_dnstap(struct dt_env* dtenv,
6670 struct sockaddr* addr, socklen_t addrlen,
6671 struct sockaddr_storage* client_addr, socklen_t client_addrlen,
6672 enum comm_point_type type, void* ssl, sldns_buffer* buffer)
6673 {
6674 log_addr(VERB_ALGO, "from local addr", (void*)addr, addrlen);
6675 log_addr(VERB_ALGO, "response to client", client_addr, client_addrlen);
6676 dt_msg_send_client_response(dtenv, client_addr,
6677 (struct sockaddr_storage*)addr, type, ssl, buffer);
6678 }
6679 #endif
6680
6681 void
6682 comm_point_send_reply(struct comm_reply *repinfo)
6683 {
6684 struct sldns_buffer* buffer;
6685 log_assert(repinfo && repinfo->c);
6686 #ifdef USE_DNSCRYPT
6687 buffer = repinfo->c->dnscrypt_buffer;
6688 if(!dnsc_handle_uncurved_request(repinfo)) {
6689 return;
6690 }
6691 #else
6692 buffer = repinfo->c->buffer;
6693 #endif
6694 if(repinfo->c->type == comm_udp) {
6695 if(repinfo->srctype)
6696 comm_point_send_udp_msg_if(repinfo->c, buffer,
6697 (struct sockaddr*)&repinfo->remote_addr,
6698 repinfo->remote_addrlen, repinfo);
6699 else
6700 comm_point_send_udp_msg(repinfo->c, buffer,
6701 (struct sockaddr*)&repinfo->remote_addr,
6702 repinfo->remote_addrlen, 0);
6703 #ifdef USE_DNSTAP
6704 /*
6705 * sending src (client)/dst (local service) addresses over
6706 * DNSTAP from udp callback
6707 */
6708 if(repinfo->c->dtenv != NULL && repinfo->c->dtenv->log_client_response_messages) {
6709 send_reply_dnstap(repinfo->c->dtenv,
6710 repinfo->c->socket->addr,
6711 repinfo->c->socket->addrlen,
6712 &repinfo->client_addr, repinfo->client_addrlen,
6713 repinfo->c->type, repinfo->c->ssl,
6714 repinfo->c->buffer);
6715 }
6716 #endif
6717 } else {
6718 #ifdef USE_DNSTAP
6719 struct dt_env* dtenv =
6720 #ifdef HAVE_NGTCP2
6721 repinfo->c->doq_socket
6722 ?repinfo->c->dtenv:
6723 #endif
6724 repinfo->c->tcp_parent->dtenv;
6725 struct sldns_buffer* dtbuffer = repinfo->c->tcp_req_info
6726 ?repinfo->c->tcp_req_info->spool_buffer
6727 :repinfo->c->buffer;
6728 #ifdef USE_DNSCRYPT
6729 if(repinfo->c->dnscrypt && repinfo->is_dnscrypted)
6730 dtbuffer = repinfo->c->buffer;
6731 #endif
6732 /*
6733 * sending src (client)/dst (local service) addresses over
6734 * DNSTAP from other callbacks
6735 */
6736 if(dtenv != NULL && dtenv->log_client_response_messages) {
6737 send_reply_dnstap(dtenv,
6738 repinfo->c->socket->addr,
6739 repinfo->c->socket->addrlen,
6740 &repinfo->client_addr, repinfo->client_addrlen,
6741 repinfo->c->type, repinfo->c->ssl,
6742 dtbuffer);
6743 }
6744 #endif
6745 if(repinfo->c->tcp_req_info) {
6746 tcp_req_info_send_reply(repinfo->c->tcp_req_info);
6747 } else if(repinfo->c->use_h2) {
6748 if(!http2_submit_dns_response(repinfo->c->h2_session)) {
6749 return;
6750 }
6751 repinfo->c->h2_stream = NULL;
6752 repinfo->c->tcp_is_reading = 0;
6753 comm_point_stop_listening(repinfo->c);
6754 comm_point_start_listening(repinfo->c, -1,
6755 adjusted_tcp_timeout(repinfo->c));
6756 return;
6757 #ifdef HAVE_NGTCP2
6758 } else if(repinfo->c->doq_socket) {
6759 doq_socket_send_reply(repinfo);
6760 #endif
6761 } else {
6762 comm_point_start_listening(repinfo->c, -1,
6763 adjusted_tcp_timeout(repinfo->c));
6764 }
6765 }
6766 }
6767
6768 void
6769 comm_point_drop_reply(struct comm_reply* repinfo)
6770 {
6771 if(!repinfo)
6772 return;
6773 log_assert(repinfo->c);
6774 log_assert(repinfo->c->type != comm_tcp_accept);
6775 if(repinfo->c->type == comm_udp)
6776 return;
6777 if(repinfo->c->tcp_req_info)
6778 repinfo->c->tcp_req_info->is_drop = 1;
6779 if(repinfo->c->type == comm_http) {
6780 if(repinfo->c->h2_session) {
6781 repinfo->c->h2_session->is_drop = 1;
6782 if(!repinfo->c->h2_session->postpone_drop)
6783 reclaim_http_handler(repinfo->c);
6784 return;
6785 }
6786 reclaim_http_handler(repinfo->c);
6787 return;
6788 #ifdef HAVE_NGTCP2
6789 } else if(repinfo->c->doq_socket) {
6790 doq_socket_drop_reply(repinfo);
6791 return;
6792 #endif
6793 }
6794 reclaim_tcp_handler(repinfo->c);
6795 }
6796
6797 void
6798 comm_point_stop_listening(struct comm_point* c)
6799 {
6800 verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
6801 if(c->event_added) {
6802 if(ub_event_del(c->ev->ev) != 0) {
6803 log_err("event_del error to stoplisten");
6804 }
6805 c->event_added = 0;
6806 }
6807 }
6808
6809 void
6810 comm_point_start_listening(struct comm_point* c, int newfd, int msec)
6811 {
6812 verbose(VERB_ALGO, "comm point start listening %d (%d msec)",
6813 c->fd==-1?newfd:c->fd, msec);
6814 if(c->type == comm_tcp_accept && !c->tcp_free) {
6815 /* no use to start listening no free slots. */
6816 return;
6817 }
6818 if(c->event_added) {
6819 if(ub_event_del(c->ev->ev) != 0) {
6820 log_err("event_del error to startlisten");
6821 }
6822 c->event_added = 0;
6823 }
6824 if(msec != -1 && msec != 0) {
6825 if(!c->timeout) {
6826 c->timeout = (struct timeval*)malloc(sizeof(
6827 struct timeval));
6828 if(!c->timeout) {
6829 log_err("cpsl: malloc failed. No net read.");
6830 return;
6831 }
6832 }
6833 ub_event_add_bits(c->ev->ev, UB_EV_TIMEOUT);
6834 #ifndef S_SPLINT_S /* splint fails on struct timeval. */
6835 c->timeout->tv_sec = msec/1000;
6836 c->timeout->tv_usec = (msec%1000)*1000;
6837 #endif /* S_SPLINT_S */
6838 } else {
6839 if(msec == 0 || !c->timeout) {
6840 ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT);
6841 }
6842 }
6843 if(c->type == comm_tcp || c->type == comm_http) {
6844 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
6845 if(c->tcp_write_and_read) {
6846 verbose(5, "startlistening %d mode rw", (newfd==-1?c->fd:newfd));
6847 ub_event_add_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
6848 } else if(c->tcp_is_reading) {
6849 verbose(5, "startlistening %d mode r", (newfd==-1?c->fd:newfd));
6850 ub_event_add_bits(c->ev->ev, UB_EV_READ);
6851 } else {
6852 verbose(5, "startlistening %d mode w", (newfd==-1?c->fd:newfd));
6853 ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
6854 }
6855 }
6856 if(newfd != -1) {
6857 if(c->fd != -1 && c->fd != newfd) {
6858 verbose(5, "cpsl close of fd %d for %d", c->fd, newfd);
6859 sock_close(c->fd);
6860 }
6861 c->fd = newfd;
6862 ub_event_set_fd(c->ev->ev, c->fd);
6863 }
6864 if(ub_event_add(c->ev->ev, msec==0?NULL:c->timeout) != 0) {
6865 log_err("event_add failed. in cpsl.");
6866 return;
6867 }
6868 c->event_added = 1;
6869 }
6870
6871 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
6872 {
6873 verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
6874 if(c->event_added) {
6875 if(ub_event_del(c->ev->ev) != 0) {
6876 log_err("event_del error to cplf");
6877 }
6878 c->event_added = 0;
6879 }
6880 if(!c->timeout) {
6881 ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT);
6882 }
6883 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
6884 if(rd) ub_event_add_bits(c->ev->ev, UB_EV_READ);
6885 if(wr) ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
6886 if(ub_event_add(c->ev->ev, c->timeout) != 0) {
6887 log_err("event_add failed. in cplf.");
6888 return;
6889 }
6890 c->event_added = 1;
6891 }
6892
6893 size_t comm_point_get_mem(struct comm_point* c)
6894 {
6895 size_t s;
6896 if(!c)
6897 return 0;
6898 s = sizeof(*c) + sizeof(*c->ev);
6899 if(c->timeout)
6900 s += sizeof(*c->timeout);
6901 if(c->type == comm_tcp || c->type == comm_local) {
6902 s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer);
6903 #ifdef USE_DNSCRYPT
6904 s += sizeof(*c->dnscrypt_buffer);
6905 if(c->buffer != c->dnscrypt_buffer) {
6906 s += sldns_buffer_capacity(c->dnscrypt_buffer);
6907 }
6908 #endif
6909 }
6910 if(c->type == comm_tcp_accept) {
6911 int i;
6912 for(i=0; i<c->max_tcp_count; i++)
6913 s += comm_point_get_mem(c->tcp_handlers[i]);
6914 }
6915 return s;
6916 }
6917
6918 struct comm_timer*
6919 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
6920 {
6921 struct internal_timer *tm = (struct internal_timer*)calloc(1,
6922 sizeof(struct internal_timer));
6923 if(!tm) {
6924 log_err("malloc failed");
6925 return NULL;
6926 }
6927 tm->super.ev_timer = tm;
6928 tm->base = base;
6929 tm->super.callback = cb;
6930 tm->super.cb_arg = cb_arg;
6931 tm->ev = ub_event_new(base->eb->base, -1, UB_EV_TIMEOUT,
6932 comm_timer_callback, &tm->super);
6933 if(tm->ev == NULL) {
6934 log_err("timer_create: event_base_set failed.");
6935 free(tm);
6936 return NULL;
6937 }
6938 return &tm->super;
6939 }
6940
6941 void
6942 comm_timer_disable(struct comm_timer* timer)
6943 {
6944 if(!timer)
6945 return;
6946 ub_timer_del(timer->ev_timer->ev);
6947 timer->ev_timer->enabled = 0;
6948 }
6949
6950 void
6951 comm_timer_set(struct comm_timer* timer, struct timeval* tv)
6952 {
6953 log_assert(tv);
6954 if(timer->ev_timer->enabled)
6955 comm_timer_disable(timer);
6956 if(ub_timer_add(timer->ev_timer->ev, timer->ev_timer->base->eb->base,
6957 comm_timer_callback, timer, tv) != 0)
6958 log_err("comm_timer_set: evtimer_add failed.");
6959 timer->ev_timer->enabled = 1;
6960 }
6961
6962 void
6963 comm_timer_delete(struct comm_timer* timer)
6964 {
6965 if(!timer)
6966 return;
6967 comm_timer_disable(timer);
6968 /* Free the sub struct timer->ev_timer derived from the super struct timer.
6969 * i.e. assert(timer == timer->ev_timer)
6970 */
6971 ub_event_free(timer->ev_timer->ev);
6972 free(timer->ev_timer);
6973 }
6974
6975 void
6976 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
6977 {
6978 struct comm_timer* tm = (struct comm_timer*)arg;
6979 if(!(event&UB_EV_TIMEOUT))
6980 return;
6981 ub_comm_base_now(tm->ev_timer->base);
6982 tm->ev_timer->enabled = 0;
6983 fptr_ok(fptr_whitelist_comm_timer(tm->callback));
6984 (*tm->callback)(tm->cb_arg);
6985 }
6986
6987 int
6988 comm_timer_is_set(struct comm_timer* timer)
6989 {
6990 return (int)timer->ev_timer->enabled;
6991 }
6992
6993 size_t
6994 comm_timer_get_mem(struct comm_timer* timer)
6995 {
6996 if(!timer) return 0;
6997 return sizeof(struct internal_timer);
6998 }
6999
7000 struct comm_signal*
7001 comm_signal_create(struct comm_base* base,
7002 void (*callback)(int, void*), void* cb_arg)
7003 {
7004 struct comm_signal* com = (struct comm_signal*)malloc(
7005 sizeof(struct comm_signal));
7006 if(!com) {
7007 log_err("malloc failed");
7008 return NULL;
7009 }
7010 com->base = base;
7011 com->callback = callback;
7012 com->cb_arg = cb_arg;
7013 com->ev_signal = NULL;
7014 return com;
7015 }
7016
7017 void
7018 comm_signal_callback(int sig, short event, void* arg)
7019 {
7020 struct comm_signal* comsig = (struct comm_signal*)arg;
7021 if(!(event & UB_EV_SIGNAL))
7022 return;
7023 ub_comm_base_now(comsig->base);
7024 fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
7025 (*comsig->callback)(sig, comsig->cb_arg);
7026 }
7027
7028 int
7029 comm_signal_bind(struct comm_signal* comsig, int sig)
7030 {
7031 struct internal_signal* entry = (struct internal_signal*)calloc(1,
7032 sizeof(struct internal_signal));
7033 if(!entry) {
7034 log_err("malloc failed");
7035 return 0;
7036 }
7037 log_assert(comsig);
7038 /* add signal event */
7039 entry->ev = ub_signal_new(comsig->base->eb->base, sig,
7040 comm_signal_callback, comsig);
7041 if(entry->ev == NULL) {
7042 log_err("Could not create signal event");
7043 free(entry);
7044 return 0;
7045 }
7046 if(ub_signal_add(entry->ev, NULL) != 0) {
7047 log_err("Could not add signal handler");
7048 ub_event_free(entry->ev);
7049 free(entry);
7050 return 0;
7051 }
7052 /* link into list */
7053 entry->next = comsig->ev_signal;
7054 comsig->ev_signal = entry;
7055 return 1;
7056 }
7057
7058 void
7059 comm_signal_delete(struct comm_signal* comsig)
7060 {
7061 struct internal_signal* p, *np;
7062 if(!comsig)
7063 return;
7064 p=comsig->ev_signal;
7065 while(p) {
7066 np = p->next;
7067 ub_signal_del(p->ev);
7068 ub_event_free(p->ev);
7069 free(p);
7070 p = np;
7071 }
7072 free(comsig);
7073 }
7074