1 /*
2 * services/listen_dnsport.c - listen on port 53 for incoming DNS queries.
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 /**
37 * \file
38 *
39 * This file has functions to get queries from clients.
40 */
41 #include "config.h"
42 #ifdef HAVE_SYS_TYPES_H
43 # include <sys/types.h>
44 #endif
45 #include <sys/time.h>
46 #include <limits.h>
47 #ifdef USE_TCP_FASTOPEN
48 #include <netinet/tcp.h>
49 #endif
50 #include <ctype.h>
51 #include "services/listen_dnsport.h"
52 #include "services/outside_network.h"
53 #include "util/netevent.h"
54 #include "util/log.h"
55 #include "util/config_file.h"
56 #include "util/net_help.h"
57 #include "sldns/sbuffer.h"
58 #include "sldns/parseutil.h"
59 #include "sldns/wire2str.h"
60 #include "services/mesh.h"
61 #include "util/fptr_wlist.h"
62 #include "util/locks.h"
63 #include "util/timeval_func.h"
64
65 #ifdef HAVE_NETDB_H
66 #include <netdb.h>
67 #endif
68 #include <fcntl.h>
69
70 #ifdef HAVE_SYS_UN_H
71 #include <sys/un.h>
72 #endif
73
74 #ifdef HAVE_SYSTEMD
75 #include <systemd/sd-daemon.h>
76 #endif
77
78 #ifdef HAVE_IFADDRS_H
79 #include <ifaddrs.h>
80 #endif
81 #ifdef HAVE_NET_IF_H
82 #include <net/if.h>
83 #endif
84
85 #ifdef HAVE_TIME_H
86 #include <time.h>
87 #endif
88 #include <sys/time.h>
89
90 #ifdef HAVE_NGTCP2
91 #include <ngtcp2/ngtcp2.h>
92 #include <ngtcp2/ngtcp2_crypto.h>
93 #ifdef HAVE_NGTCP2_NGTCP2_CRYPTO_OSSL_H
94 #include <ngtcp2/ngtcp2_crypto_ossl.h>
95 #elif defined(HAVE_NGTCP2_NGTCP2_CRYPTO_QUICTLS_H)
96 #include <ngtcp2/ngtcp2_crypto_quictls.h>
97 #elif defined(HAVE_NGTCP2_NGTCP2_CRYPTO_OPENSSL_H)
98 #include <ngtcp2/ngtcp2_crypto_openssl.h>
99 #define MAKE_QUIC_METHOD 1
100 #endif
101 #endif
102
103 #ifdef HAVE_OPENSSL_SSL_H
104 #include <openssl/ssl.h>
105 #endif
106
107 #ifdef HAVE_LINUX_NET_TSTAMP_H
108 #include <linux/net_tstamp.h>
109 #endif
110
111 /** number of queued TCP connections for listen() */
112 #define TCP_BACKLOG 256
113
114 #ifndef THREADS_DISABLED
115 /** lock on the counter of stream buffer memory */
116 static lock_basic_type stream_wait_count_lock;
117 /** lock on the counter of HTTP2 query buffer memory */
118 static lock_basic_type http2_query_buffer_count_lock;
119 /** lock on the counter of HTTP2 response buffer memory */
120 static lock_basic_type http2_response_buffer_count_lock;
121 #endif
122 /** size (in bytes) of stream wait buffers */
123 static size_t stream_wait_count = 0;
124 /** is the lock initialised for stream wait buffers */
125 static int stream_wait_lock_inited = 0;
126 /** size (in bytes) of HTTP2 query buffers */
127 static size_t http2_query_buffer_count = 0;
128 /** is the lock initialised for HTTP2 query buffers */
129 static int http2_query_buffer_lock_inited = 0;
130 /** size (in bytes) of HTTP2 response buffers */
131 static size_t http2_response_buffer_count = 0;
132 /** is the lock initialised for HTTP2 response buffers */
133 static int http2_response_buffer_lock_inited = 0;
134
135 /**
136 * Debug print of the getaddrinfo returned address.
137 * @param addr: the address returned.
138 * @param additional: additional text that describes the type of socket,
139 * or NULL for no text.
140 */
141 static void
verbose_print_addr(struct addrinfo * addr,const char * additional)142 verbose_print_addr(struct addrinfo *addr, const char* additional)
143 {
144 if(verbosity >= VERB_ALGO) {
145 char buf[100];
146 void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr;
147 #ifdef INET6
148 if(addr->ai_family == AF_INET6)
149 sinaddr = &((struct sockaddr_in6*)addr->ai_addr)->
150 sin6_addr;
151 #endif /* INET6 */
152 if(inet_ntop(addr->ai_family, sinaddr, buf,
153 (socklen_t)sizeof(buf)) == 0) {
154 (void)strlcpy(buf, "(null)", sizeof(buf));
155 }
156 buf[sizeof(buf)-1] = 0;
157 verbose(VERB_ALGO, "creating %s%s socket %s %d%s%s",
158 addr->ai_socktype==SOCK_DGRAM?"udp":
159 addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
160 addr->ai_family==AF_INET?"4":
161 addr->ai_family==AF_INET6?"6":
162 "_otherfam", buf,
163 ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port),
164 (additional?" ":""), (additional?additional:""));
165 }
166 }
167
168 void
verbose_print_unbound_socket(struct unbound_socket * ub_sock)169 verbose_print_unbound_socket(struct unbound_socket* ub_sock)
170 {
171 if(verbosity >= VERB_ALGO) {
172 char buf[256];
173 log_info("listing of unbound_socket structure:");
174 addr_to_str((void*)ub_sock->addr, ub_sock->addrlen, buf,
175 sizeof(buf));
176 log_info("%s s is: %d, fam is: %s, acl: %s", buf, ub_sock->s,
177 ub_sock->fam == AF_INET?"AF_INET":"AF_INET6",
178 ub_sock->acl?"yes":"no");
179 }
180 }
181
182 #ifdef HAVE_SYSTEMD
183 static int
systemd_get_activated(int family,int socktype,int listen,struct sockaddr * addr,socklen_t addrlen,const char * path)184 systemd_get_activated(int family, int socktype, int listen,
185 struct sockaddr *addr, socklen_t addrlen,
186 const char *path)
187 {
188 int i = 0;
189 int r = 0;
190 int s = -1;
191 const char* listen_pid, *listen_fds;
192
193 /* We should use "listen" option only for stream protocols. For UDP it should be -1 */
194
195 if((r = sd_booted()) < 1) {
196 if(r == 0)
197 log_warn("systemd is not running");
198 else
199 log_err("systemd sd_booted(): %s", strerror(-r));
200 return -1;
201 }
202
203 listen_pid = getenv("LISTEN_PID");
204 listen_fds = getenv("LISTEN_FDS");
205
206 if (!listen_pid) {
207 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_PID");
208 return -1;
209 }
210
211 if (!listen_fds) {
212 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_FDS");
213 return -1;
214 }
215
216 if((r = sd_listen_fds(0)) < 1) {
217 if(r == 0)
218 log_warn("systemd: did not return socket, check unit configuration");
219 else
220 log_err("systemd sd_listen_fds(): %s", strerror(-r));
221 return -1;
222 }
223
224 for(i = 0; i < r; i++) {
225 if(sd_is_socket(SD_LISTEN_FDS_START + i, family, socktype, listen)) {
226 s = SD_LISTEN_FDS_START + i;
227 break;
228 }
229 }
230 if (s == -1) {
231 if (addr)
232 log_err_addr("systemd sd_listen_fds()",
233 "no such socket",
234 (struct sockaddr_storage *)addr, addrlen);
235 else
236 log_err("systemd sd_listen_fds(): %s", path);
237 }
238 return s;
239 }
240 #endif
241
242 int
create_udp_sock(int family,int socktype,struct sockaddr * addr,socklen_t addrlen,int v6only,int * inuse,int * noproto,int rcv,int snd,int listen,int * reuseport,int transparent,int freebind,int use_systemd,int dscp)243 create_udp_sock(int family, int socktype, struct sockaddr* addr,
244 socklen_t addrlen, int v6only, int* inuse, int* noproto,
245 int rcv, int snd, int listen, int* reuseport, int transparent,
246 int freebind, int use_systemd, int dscp)
247 {
248 int s;
249 char* err;
250 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined (SO_BINDANY)
251 int on=1;
252 #endif
253 #ifdef IPV6_MTU
254 int mtu = IPV6_MIN_MTU;
255 #endif
256 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF)
257 (void)rcv;
258 #endif
259 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF)
260 (void)snd;
261 #endif
262 #ifndef IPV6_V6ONLY
263 (void)v6only;
264 #endif
265 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
266 (void)transparent;
267 #endif
268 #if !defined(IP_FREEBIND)
269 (void)freebind;
270 #endif
271 #ifdef HAVE_SYSTEMD
272 int got_fd_from_systemd = 0;
273
274 if (!use_systemd
275 || (use_systemd
276 && (s = systemd_get_activated(family, socktype, -1, addr,
277 addrlen, NULL)) == -1)) {
278 #else
279 (void)use_systemd;
280 #endif
281 if((s = socket(family, socktype, 0)) == -1) {
282 *inuse = 0;
283 #ifndef USE_WINSOCK
284 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
285 *noproto = 1;
286 return -1;
287 }
288 #else
289 if(WSAGetLastError() == WSAEAFNOSUPPORT ||
290 WSAGetLastError() == WSAEPROTONOSUPPORT) {
291 *noproto = 1;
292 return -1;
293 }
294 #endif
295 log_err("can't create socket: %s", sock_strerror(errno));
296 *noproto = 0;
297 return -1;
298 }
299 #ifdef HAVE_SYSTEMD
300 } else {
301 got_fd_from_systemd = 1;
302 }
303 #endif
304 if(listen) {
305 #ifdef SO_REUSEADDR
306 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
307 (socklen_t)sizeof(on)) < 0) {
308 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
309 sock_strerror(errno));
310 #ifndef USE_WINSOCK
311 if(errno != ENOSYS) {
312 close(s);
313 *noproto = 0;
314 *inuse = 0;
315 return -1;
316 }
317 #else
318 closesocket(s);
319 *noproto = 0;
320 *inuse = 0;
321 return -1;
322 #endif
323 }
324 #endif /* SO_REUSEADDR */
325 #ifdef SO_REUSEPORT
326 # ifdef SO_REUSEPORT_LB
327 /* on FreeBSD 12 we have SO_REUSEPORT_LB that does loadbalance
328 * like SO_REUSEPORT on Linux. This is what the users want
329 * with the config option in unbound.conf; if we actually
330 * need local address and port reuse they'll also need to
331 * have SO_REUSEPORT set for them, assume it was _LB they want.
332 */
333 if (reuseport && *reuseport &&
334 setsockopt(s, SOL_SOCKET, SO_REUSEPORT_LB, (void*)&on,
335 (socklen_t)sizeof(on)) < 0) {
336 #ifdef ENOPROTOOPT
337 if(errno != ENOPROTOOPT || verbosity >= 3)
338 log_warn("setsockopt(.. SO_REUSEPORT_LB ..) failed: %s",
339 strerror(errno));
340 #endif
341 /* this option is not essential, we can continue */
342 *reuseport = 0;
343 }
344 # else /* no SO_REUSEPORT_LB */
345
346 /* try to set SO_REUSEPORT so that incoming
347 * queries are distributed evenly among the receiving threads.
348 * Each thread must have its own socket bound to the same port,
349 * with SO_REUSEPORT set on each socket.
350 */
351 if (reuseport && *reuseport &&
352 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
353 (socklen_t)sizeof(on)) < 0) {
354 #ifdef ENOPROTOOPT
355 if(errno != ENOPROTOOPT || verbosity >= 3)
356 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
357 strerror(errno));
358 #endif
359 /* this option is not essential, we can continue */
360 *reuseport = 0;
361 }
362 # endif /* SO_REUSEPORT_LB */
363 #else
364 (void)reuseport;
365 #endif /* defined(SO_REUSEPORT) */
366 #ifdef IP_TRANSPARENT
367 if (transparent &&
368 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
369 (socklen_t)sizeof(on)) < 0) {
370 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
371 strerror(errno));
372 }
373 #elif defined(IP_BINDANY)
374 if (transparent &&
375 setsockopt(s, (family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
376 (family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
377 (void*)&on, (socklen_t)sizeof(on)) < 0) {
378 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
379 (family==AF_INET6?"V6":""), strerror(errno));
380 }
381 #elif defined(SO_BINDANY)
382 if (transparent &&
383 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on,
384 (socklen_t)sizeof(on)) < 0) {
385 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
386 strerror(errno));
387 }
388 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
389 }
390 #ifdef IP_FREEBIND
391 if(freebind &&
392 setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
393 (socklen_t)sizeof(on)) < 0) {
394 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
395 strerror(errno));
396 }
397 #endif /* IP_FREEBIND */
398 if(rcv) {
399 #ifdef SO_RCVBUF
400 int got;
401 socklen_t slen = (socklen_t)sizeof(got);
402 # ifdef SO_RCVBUFFORCE
403 /* Linux specific: try to use root permission to override
404 * system limits on rcvbuf. The limit is stored in
405 * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */
406 if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv,
407 (socklen_t)sizeof(rcv)) < 0) {
408 if(errno != EPERM) {
409 log_err("setsockopt(..., SO_RCVBUFFORCE, "
410 "...) failed: %s", sock_strerror(errno));
411 sock_close(s);
412 *noproto = 0;
413 *inuse = 0;
414 return -1;
415 }
416 # endif /* SO_RCVBUFFORCE */
417 if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv,
418 (socklen_t)sizeof(rcv)) < 0) {
419 log_err("setsockopt(..., SO_RCVBUF, "
420 "...) failed: %s", sock_strerror(errno));
421 sock_close(s);
422 *noproto = 0;
423 *inuse = 0;
424 return -1;
425 }
426 /* check if we got the right thing or if system
427 * reduced to some system max. Warn if so */
428 if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got,
429 &slen) >= 0 && got < rcv/2) {
430 log_warn("so-rcvbuf %u was not granted. "
431 "Got %u. To fix: start with "
432 "root permissions(linux) or sysctl "
433 "bigger net.core.rmem_max(linux) or "
434 "kern.ipc.maxsockbuf(bsd) values.",
435 (unsigned)rcv, (unsigned)got);
436 }
437 # ifdef SO_RCVBUFFORCE
438 }
439 # endif
440 #endif /* SO_RCVBUF */
441 }
442 /* first do RCVBUF as the receive buffer is more important */
443 if(snd) {
444 #ifdef SO_SNDBUF
445 int got;
446 socklen_t slen = (socklen_t)sizeof(got);
447 # ifdef SO_SNDBUFFORCE
448 /* Linux specific: try to use root permission to override
449 * system limits on sndbuf. The limit is stored in
450 * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */
451 if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd,
452 (socklen_t)sizeof(snd)) < 0) {
453 if(errno != EPERM && errno != ENOBUFS) {
454 log_err("setsockopt(..., SO_SNDBUFFORCE, "
455 "...) failed: %s", sock_strerror(errno));
456 sock_close(s);
457 *noproto = 0;
458 *inuse = 0;
459 return -1;
460 }
461 if(errno != EPERM) {
462 verbose(VERB_ALGO, "setsockopt(..., SO_SNDBUFFORCE, "
463 "...) was not granted: %s", sock_strerror(errno));
464 }
465 # endif /* SO_SNDBUFFORCE */
466 if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd,
467 (socklen_t)sizeof(snd)) < 0) {
468 if(errno != ENOSYS && errno != ENOBUFS) {
469 log_err("setsockopt(..., SO_SNDBUF, "
470 "...) failed: %s", sock_strerror(errno));
471 sock_close(s);
472 *noproto = 0;
473 *inuse = 0;
474 return -1;
475 }
476 log_warn("setsockopt(..., SO_SNDBUF, "
477 "...) was not granted: %s", sock_strerror(errno));
478 }
479 /* check if we got the right thing or if system
480 * reduced to some system max. Warn if so */
481 if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got,
482 &slen) >= 0 && got < snd/2) {
483 log_warn("so-sndbuf %u was not granted. "
484 "Got %u. To fix: start with "
485 "root permissions(linux) or sysctl "
486 "bigger net.core.wmem_max(linux) or "
487 "kern.ipc.maxsockbuf(bsd) values. or "
488 "set so-sndbuf: 0 (use system value).",
489 (unsigned)snd, (unsigned)got);
490 }
491 # ifdef SO_SNDBUFFORCE
492 }
493 # endif
494 #endif /* SO_SNDBUF */
495 }
496 err = set_ip_dscp(s, family, dscp);
497 if(err != NULL)
498 log_warn("error setting IP DiffServ codepoint %d on UDP socket: %s", dscp, err);
499 if(family == AF_INET6) {
500 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
501 int omit6_set = 0;
502 int action;
503 # endif
504 # if defined(IPV6_V6ONLY)
505 if(v6only
506 # ifdef HAVE_SYSTEMD
507 /* Systemd wants to control if the socket is v6 only
508 * or both, with BindIPv6Only=default, ipv6-only or
509 * both in systemd.socket, so it is not set here. */
510 && !got_fd_from_systemd
511 # endif
512 ) {
513 int val=(v6only==2)?0:1;
514 if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
515 (void*)&val, (socklen_t)sizeof(val)) < 0) {
516 log_err("setsockopt(..., IPV6_V6ONLY"
517 ", ...) failed: %s", sock_strerror(errno));
518 sock_close(s);
519 *noproto = 0;
520 *inuse = 0;
521 return -1;
522 }
523 }
524 # endif
525 # if defined(IPV6_USE_MIN_MTU)
526 /*
527 * There is no fragmentation of IPv6 datagrams
528 * during forwarding in the network. Therefore
529 * we do not send UDP datagrams larger than
530 * the minimum IPv6 MTU of 1280 octets. The
531 * EDNS0 message length can be larger if the
532 * network stack supports IPV6_USE_MIN_MTU.
533 */
534 if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
535 (void*)&on, (socklen_t)sizeof(on)) < 0) {
536 log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
537 "...) failed: %s", sock_strerror(errno));
538 sock_close(s);
539 *noproto = 0;
540 *inuse = 0;
541 return -1;
542 }
543 # elif defined(IPV6_MTU)
544 # ifndef USE_WINSOCK
545 /*
546 * On Linux, to send no larger than 1280, the PMTUD is
547 * disabled by default for datagrams anyway, so we set
548 * the MTU to use.
549 */
550 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU,
551 (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
552 log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
553 sock_strerror(errno));
554 sock_close(s);
555 *noproto = 0;
556 *inuse = 0;
557 return -1;
558 }
559 # elif defined(IPV6_USER_MTU)
560 /* As later versions of the mingw crosscompiler define
561 * IPV6_MTU, do the same for windows but use IPV6_USER_MTU
562 * instead which is writable; IPV6_MTU is readonly there. */
563 if (setsockopt(s, IPPROTO_IPV6, IPV6_USER_MTU,
564 (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
565 if (WSAGetLastError() != WSAENOPROTOOPT) {
566 log_err("setsockopt(..., IPV6_USER_MTU, ...) failed: %s",
567 wsa_strerror(WSAGetLastError()));
568 sock_close(s);
569 *noproto = 0;
570 *inuse = 0;
571 return -1;
572 }
573 }
574 # endif /* USE_WINSOCK */
575 # endif /* IPv6 MTU */
576 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
577 # if defined(IP_PMTUDISC_OMIT)
578 action = IP_PMTUDISC_OMIT;
579 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
580 &action, (socklen_t)sizeof(action)) < 0) {
581
582 if (errno != EINVAL) {
583 log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
584 strerror(errno));
585 sock_close(s);
586 *noproto = 0;
587 *inuse = 0;
588 return -1;
589 }
590 }
591 else
592 {
593 omit6_set = 1;
594 }
595 # endif
596 if (omit6_set == 0) {
597 action = IP_PMTUDISC_DONT;
598 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
599 &action, (socklen_t)sizeof(action)) < 0) {
600 log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
601 strerror(errno));
602 sock_close(s);
603 *noproto = 0;
604 *inuse = 0;
605 return -1;
606 }
607 }
608 # endif /* IPV6_MTU_DISCOVER */
609 } else if(family == AF_INET) {
610 # if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
611 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that
612 * PMTU information is not accepted, but fragmentation is allowed
613 * if and only if the packet size exceeds the outgoing interface MTU
614 * (and also uses the interface mtu to determine the size of the packets).
615 * So there won't be any EMSGSIZE error. Against DNS fragmentation attacks.
616 * FreeBSD already has same semantics without setting the option. */
617 int omit_set = 0;
618 int action;
619 # if defined(IP_PMTUDISC_OMIT)
620 action = IP_PMTUDISC_OMIT;
621 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
622 &action, (socklen_t)sizeof(action)) < 0) {
623
624 if (errno != EINVAL) {
625 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
626 strerror(errno));
627 sock_close(s);
628 *noproto = 0;
629 *inuse = 0;
630 return -1;
631 }
632 }
633 else
634 {
635 omit_set = 1;
636 }
637 # endif
638 if (omit_set == 0) {
639 action = IP_PMTUDISC_DONT;
640 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
641 &action, (socklen_t)sizeof(action)) < 0) {
642 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
643 strerror(errno));
644 sock_close(s);
645 *noproto = 0;
646 *inuse = 0;
647 return -1;
648 }
649 }
650 # elif defined(IP_DONTFRAG) && !defined(__APPLE__)
651 /* the IP_DONTFRAG option if defined in the 11.0 OSX headers,
652 * but does not work on that version, so we exclude it */
653 /* a nonzero value disables fragmentation, according to
654 * docs.oracle.com for ip(4). */
655 int off = 1;
656 if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG,
657 &off, (socklen_t)sizeof(off)) < 0) {
658 log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s",
659 strerror(errno));
660 sock_close(s);
661 *noproto = 0;
662 *inuse = 0;
663 return -1;
664 }
665 # endif /* IPv4 MTU */
666 }
667 if(
668 #ifdef HAVE_SYSTEMD
669 !got_fd_from_systemd &&
670 #endif
671 bind(s, (struct sockaddr*)addr, addrlen) != 0) {
672 *noproto = 0;
673 *inuse = 0;
674 #ifndef USE_WINSOCK
675 #ifdef EADDRINUSE
676 *inuse = (errno == EADDRINUSE);
677 /* detect freebsd jail with no ipv6 permission */
678 if(family==AF_INET6 && errno==EINVAL)
679 *noproto = 1;
680 else if(errno != EADDRINUSE &&
681 !(errno == EACCES && verbosity < 4 && !listen)
682 #ifdef EADDRNOTAVAIL
683 && !(errno == EADDRNOTAVAIL && verbosity < 4 && !listen)
684 #endif
685 ) {
686 log_err_addr("can't bind socket", strerror(errno),
687 (struct sockaddr_storage*)addr, addrlen);
688 }
689 #endif /* EADDRINUSE */
690 #else /* USE_WINSOCK */
691 if(WSAGetLastError() != WSAEADDRINUSE &&
692 WSAGetLastError() != WSAEADDRNOTAVAIL &&
693 !(WSAGetLastError() == WSAEACCES && verbosity < 4 && !listen)) {
694 log_err_addr("can't bind socket",
695 wsa_strerror(WSAGetLastError()),
696 (struct sockaddr_storage*)addr, addrlen);
697 }
698 #endif /* USE_WINSOCK */
699 sock_close(s);
700 return -1;
701 }
702 if(!fd_set_nonblock(s)) {
703 *noproto = 0;
704 *inuse = 0;
705 sock_close(s);
706 return -1;
707 }
708 return s;
709 }
710
711 int
create_tcp_accept_sock(struct addrinfo * addr,int v6only,int * noproto,int * reuseport,int transparent,int mss,int nodelay,int freebind,int use_systemd,int dscp,const char * additional)712 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
713 int* reuseport, int transparent, int mss, int nodelay, int freebind,
714 int use_systemd, int dscp, const char* additional)
715 {
716 int s = -1;
717 char* err;
718 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) \
719 || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT) \
720 || defined(IP_BINDANY) || defined(IP_FREEBIND) \
721 || defined(SO_BINDANY) || defined(TCP_NODELAY)
722 int on = 1;
723 #endif
724 #ifdef HAVE_SYSTEMD
725 int got_fd_from_systemd = 0;
726 #endif
727 #ifdef USE_TCP_FASTOPEN
728 int qlen;
729 #endif
730 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
731 (void)transparent;
732 #endif
733 #if !defined(IP_FREEBIND)
734 (void)freebind;
735 #endif
736 verbose_print_addr(addr, additional);
737 *noproto = 0;
738 #ifdef HAVE_SYSTEMD
739 if (!use_systemd ||
740 (use_systemd
741 && (s = systemd_get_activated(addr->ai_family, addr->ai_socktype, 1,
742 addr->ai_addr, addr->ai_addrlen,
743 NULL)) == -1)) {
744 #else
745 (void)use_systemd;
746 #endif
747 if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
748 #ifndef USE_WINSOCK
749 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
750 *noproto = 1;
751 return -1;
752 }
753 #else
754 if(WSAGetLastError() == WSAEAFNOSUPPORT ||
755 WSAGetLastError() == WSAEPROTONOSUPPORT) {
756 *noproto = 1;
757 return -1;
758 }
759 #endif
760 log_err("can't create socket: %s", sock_strerror(errno));
761 return -1;
762 }
763 if(nodelay) {
764 #if defined(IPPROTO_TCP) && defined(TCP_NODELAY)
765 if(setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (void*)&on,
766 (socklen_t)sizeof(on)) < 0) {
767 #ifndef USE_WINSOCK
768 log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s",
769 strerror(errno));
770 #else
771 log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s",
772 wsa_strerror(WSAGetLastError()));
773 #endif
774 }
775 #else
776 log_warn(" setsockopt(TCP_NODELAY) unsupported");
777 #endif /* defined(IPPROTO_TCP) && defined(TCP_NODELAY) */
778 }
779 if (mss > 0) {
780 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
781 if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG, (void*)&mss,
782 (socklen_t)sizeof(mss)) < 0) {
783 log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s",
784 sock_strerror(errno));
785 } else {
786 verbose(VERB_ALGO,
787 " tcp socket mss set to %d", mss);
788 }
789 #else
790 log_warn(" setsockopt(TCP_MAXSEG) unsupported");
791 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */
792 }
793 #ifdef HAVE_SYSTEMD
794 } else {
795 got_fd_from_systemd = 1;
796 }
797 #endif
798 #ifdef SO_REUSEADDR
799 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
800 (socklen_t)sizeof(on)) < 0) {
801 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
802 sock_strerror(errno));
803 sock_close(s);
804 return -1;
805 }
806 #endif /* SO_REUSEADDR */
807 #ifdef IP_FREEBIND
808 if (freebind && setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
809 (socklen_t)sizeof(on)) < 0) {
810 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
811 strerror(errno));
812 }
813 #endif /* IP_FREEBIND */
814 #ifdef SO_REUSEPORT
815 /* try to set SO_REUSEPORT so that incoming
816 * connections are distributed evenly among the receiving threads.
817 * Each thread must have its own socket bound to the same port,
818 * with SO_REUSEPORT set on each socket.
819 */
820 if (reuseport && *reuseport &&
821 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
822 (socklen_t)sizeof(on)) < 0) {
823 #ifdef ENOPROTOOPT
824 if(errno != ENOPROTOOPT || verbosity >= 3)
825 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
826 strerror(errno));
827 #endif
828 /* this option is not essential, we can continue */
829 *reuseport = 0;
830 }
831 #else
832 (void)reuseport;
833 #endif /* defined(SO_REUSEPORT) */
834 #if defined(IPV6_V6ONLY)
835 if(addr->ai_family == AF_INET6 && v6only
836 # ifdef HAVE_SYSTEMD
837 /* Systemd wants to control if the socket is v6 only
838 * or both, with BindIPv6Only=default, ipv6-only or
839 * both in systemd.socket, so it is not set here. */
840 && !got_fd_from_systemd
841 # endif
842 ) {
843 if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
844 (void*)&on, (socklen_t)sizeof(on)) < 0) {
845 log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
846 sock_strerror(errno));
847 sock_close(s);
848 return -1;
849 }
850 }
851 #else
852 (void)v6only;
853 #endif /* IPV6_V6ONLY */
854 #ifdef IP_TRANSPARENT
855 if (transparent &&
856 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
857 (socklen_t)sizeof(on)) < 0) {
858 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
859 strerror(errno));
860 }
861 #elif defined(IP_BINDANY)
862 if (transparent &&
863 setsockopt(s, (addr->ai_family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
864 (addr->ai_family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
865 (void*)&on, (socklen_t)sizeof(on)) < 0) {
866 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
867 (addr->ai_family==AF_INET6?"V6":""), strerror(errno));
868 }
869 #elif defined(SO_BINDANY)
870 if (transparent &&
871 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, (socklen_t)
872 sizeof(on)) < 0) {
873 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
874 strerror(errno));
875 }
876 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
877 err = set_ip_dscp(s, addr->ai_family, dscp);
878 if(err != NULL)
879 log_warn("error setting IP DiffServ codepoint %d on TCP socket: %s", dscp, err);
880 if(
881 #ifdef HAVE_SYSTEMD
882 !got_fd_from_systemd &&
883 #endif
884 bind(s, addr->ai_addr, addr->ai_addrlen) != 0) {
885 #ifndef USE_WINSOCK
886 /* detect freebsd jail with no ipv6 permission */
887 if(addr->ai_family==AF_INET6 && errno==EINVAL)
888 *noproto = 1;
889 else {
890 log_err_addr("can't bind socket", strerror(errno),
891 (struct sockaddr_storage*)addr->ai_addr,
892 addr->ai_addrlen);
893 }
894 #else
895 log_err_addr("can't bind socket",
896 wsa_strerror(WSAGetLastError()),
897 (struct sockaddr_storage*)addr->ai_addr,
898 addr->ai_addrlen);
899 #endif
900 sock_close(s);
901 return -1;
902 }
903 if(!fd_set_nonblock(s)) {
904 sock_close(s);
905 return -1;
906 }
907 if(listen(s, TCP_BACKLOG) == -1) {
908 log_err("can't listen: %s", sock_strerror(errno));
909 sock_close(s);
910 return -1;
911 }
912 #ifdef USE_TCP_FASTOPEN
913 /* qlen specifies how many outstanding TFO requests to allow. Limit is a defense
914 against IP spoofing attacks as suggested in RFC7413 */
915 #ifdef __APPLE__
916 /* OS X implementation only supports qlen of 1 via this call. Actual
917 value is configured by the net.inet.tcp.fastopen_backlog kernel param. */
918 qlen = 1;
919 #else
920 /* 5 is recommended on linux */
921 qlen = 5;
922 #endif
923 if ((setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &qlen,
924 sizeof(qlen))) == -1 ) {
925 #ifdef ENOPROTOOPT
926 /* squelch ENOPROTOOPT: freebsd server mode with kernel support
927 disabled, except when verbosity enabled for debugging */
928 if(errno != ENOPROTOOPT || verbosity >= 3) {
929 #endif
930 if(errno == EPERM) {
931 log_warn("Setting TCP Fast Open as server failed: %s ; this could likely be because sysctl net.inet.tcp.fastopen.enabled, net.inet.tcp.fastopen.server_enable, or net.ipv4.tcp_fastopen is disabled", strerror(errno));
932 } else {
933 log_err("Setting TCP Fast Open as server failed: %s", strerror(errno));
934 }
935 #ifdef ENOPROTOOPT
936 }
937 #endif
938 }
939 #endif
940 return s;
941 }
942
943 char*
set_ip_dscp(int socket,int addrfamily,int dscp)944 set_ip_dscp(int socket, int addrfamily, int dscp)
945 {
946 int ds;
947
948 if(dscp == 0)
949 return NULL;
950 ds = dscp << 2;
951 switch(addrfamily) {
952 case AF_INET6:
953 #ifdef IPV6_TCLASS
954 if(setsockopt(socket, IPPROTO_IPV6, IPV6_TCLASS, (void*)&ds,
955 sizeof(ds)) < 0)
956 return sock_strerror(errno);
957 break;
958 #else
959 return "IPV6_TCLASS not defined on this system";
960 #endif
961 default:
962 if(setsockopt(socket, IPPROTO_IP, IP_TOS, (void*)&ds, sizeof(ds)) < 0)
963 return sock_strerror(errno);
964 break;
965 }
966 return NULL;
967 }
968
969 int
create_local_accept_sock(const char * path,int * noproto,int use_systemd)970 create_local_accept_sock(const char *path, int* noproto, int use_systemd)
971 {
972 #ifdef HAVE_SYSTEMD
973 int ret;
974
975 if (use_systemd && (ret = systemd_get_activated(AF_LOCAL, SOCK_STREAM, 1, NULL, 0, path)) != -1)
976 return ret;
977 else {
978 #endif
979 #ifdef HAVE_SYS_UN_H
980 int s;
981 struct sockaddr_un usock;
982 #ifndef HAVE_SYSTEMD
983 (void)use_systemd;
984 #endif
985
986 verbose(VERB_ALGO, "creating unix socket %s", path);
987 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
988 /* this member exists on BSDs, not Linux */
989 usock.sun_len = (unsigned)sizeof(usock);
990 #endif
991 usock.sun_family = AF_LOCAL;
992 /* length is 92-108, 104 on FreeBSD */
993 (void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path));
994
995 if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) {
996 log_err("Cannot create local socket %s (%s)",
997 path, strerror(errno));
998 return -1;
999 }
1000
1001 if (unlink(path) && errno != ENOENT) {
1002 /* The socket already exists and cannot be removed */
1003 log_err("Cannot remove old local socket %s (%s)",
1004 path, strerror(errno));
1005 goto err;
1006 }
1007
1008 if (bind(s, (struct sockaddr *)&usock,
1009 (socklen_t)sizeof(struct sockaddr_un)) == -1) {
1010 log_err("Cannot bind local socket %s (%s)",
1011 path, strerror(errno));
1012 goto err;
1013 }
1014
1015 if (!fd_set_nonblock(s)) {
1016 log_err("Cannot set non-blocking mode");
1017 goto err;
1018 }
1019
1020 if (listen(s, TCP_BACKLOG) == -1) {
1021 log_err("can't listen: %s", strerror(errno));
1022 goto err;
1023 }
1024
1025 (void)noproto; /*unused*/
1026 return s;
1027
1028 err:
1029 sock_close(s);
1030 return -1;
1031
1032 #ifdef HAVE_SYSTEMD
1033 }
1034 #endif
1035 #else
1036 (void)use_systemd;
1037 (void)path;
1038 log_err("Local sockets are not supported");
1039 *noproto = 1;
1040 return -1;
1041 #endif
1042 }
1043
1044
1045 /**
1046 * Create socket from getaddrinfo results
1047 */
1048 static int
make_sock(int stype,const char * ifname,int port,struct addrinfo * hints,int v6only,int * noip6,size_t rcv,size_t snd,int * reuseport,int transparent,int tcp_mss,int nodelay,int freebind,int use_systemd,int dscp,struct unbound_socket * ub_sock,const char * additional)1049 make_sock(int stype, const char* ifname, int port,
1050 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
1051 int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
1052 int use_systemd, int dscp, struct unbound_socket* ub_sock,
1053 const char* additional)
1054 {
1055 struct addrinfo *res = NULL;
1056 int r, s, inuse, noproto;
1057 char portbuf[32];
1058 snprintf(portbuf, sizeof(portbuf), "%d", port);
1059 hints->ai_socktype = stype;
1060 *noip6 = 0;
1061 if((r=getaddrinfo(ifname, portbuf, hints, &res)) != 0 || !res) {
1062 #ifdef USE_WINSOCK
1063 if(r == EAI_NONAME && hints->ai_family == AF_INET6){
1064 *noip6 = 1; /* 'Host not found' for IP6 on winXP */
1065 return -1;
1066 }
1067 #endif
1068 log_err("node %s:%s getaddrinfo: %s %s",
1069 ifname?ifname:"default", portbuf, gai_strerror(r),
1070 #ifdef EAI_SYSTEM
1071 (r==EAI_SYSTEM?(char*)strerror(errno):"")
1072 #else
1073 ""
1074 #endif
1075 );
1076 return -1;
1077 }
1078 if(stype == SOCK_DGRAM) {
1079 verbose_print_addr(res, additional);
1080 s = create_udp_sock(res->ai_family, res->ai_socktype,
1081 (struct sockaddr*)res->ai_addr, res->ai_addrlen,
1082 v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
1083 reuseport, transparent, freebind, use_systemd, dscp);
1084 if(s == -1 && inuse) {
1085 log_err("bind: address already in use");
1086 } else if(s == -1 && noproto && hints->ai_family == AF_INET6){
1087 *noip6 = 1;
1088 }
1089 } else {
1090 s = create_tcp_accept_sock(res, v6only, &noproto, reuseport,
1091 transparent, tcp_mss, nodelay, freebind, use_systemd,
1092 dscp, additional);
1093 if(s == -1 && noproto && hints->ai_family == AF_INET6){
1094 *noip6 = 1;
1095 }
1096 }
1097
1098 if(!res->ai_addr) {
1099 log_err("getaddrinfo returned no address");
1100 freeaddrinfo(res);
1101 sock_close(s);
1102 return -1;
1103 }
1104 ub_sock->addr = memdup(res->ai_addr, res->ai_addrlen);
1105 ub_sock->addrlen = res->ai_addrlen;
1106 if(!ub_sock->addr) {
1107 log_err("out of memory: allocate listening address");
1108 freeaddrinfo(res);
1109 sock_close(s);
1110 return -1;
1111 }
1112 freeaddrinfo(res);
1113
1114 ub_sock->s = s;
1115 ub_sock->fam = hints->ai_family;
1116 ub_sock->acl = NULL;
1117
1118 return s;
1119 }
1120
1121 /** make socket and first see if ifname contains port override info */
1122 static int
make_sock_port(int stype,const char * ifname,int port,struct addrinfo * hints,int v6only,int * noip6,size_t rcv,size_t snd,int * reuseport,int transparent,int tcp_mss,int nodelay,int freebind,int use_systemd,int dscp,struct unbound_socket * ub_sock,const char * additional)1123 make_sock_port(int stype, const char* ifname, int port,
1124 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
1125 int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
1126 int use_systemd, int dscp, struct unbound_socket* ub_sock,
1127 const char* additional)
1128 {
1129 char* s = strchr(ifname, '@');
1130 if(s) {
1131 /* override port with ifspec@port */
1132 int port;
1133 char newif[128];
1134 if((size_t)(s-ifname) >= sizeof(newif)) {
1135 log_err("ifname too long: %s", ifname);
1136 *noip6 = 0;
1137 return -1;
1138 }
1139 port = atoi(s+1);
1140 if(port < 0 || 0 == port || port > 65535) {
1141 log_err("invalid portnumber in interface: %s", ifname);
1142 *noip6 = 0;
1143 return -1;
1144 }
1145 (void)strlcpy(newif, ifname, sizeof(newif));
1146 newif[s-ifname] = 0;
1147 return make_sock(stype, newif, port, hints, v6only, noip6, rcv,
1148 snd, reuseport, transparent, tcp_mss, nodelay, freebind,
1149 use_systemd, dscp, ub_sock, additional);
1150 }
1151 return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
1152 reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd,
1153 dscp, ub_sock, additional);
1154 }
1155
1156 /**
1157 * Add port to open ports list.
1158 * @param list: list head. changed.
1159 * @param s: fd.
1160 * @param ftype: if fd is UDP.
1161 * @param pp2_enabled: if PROXYv2 is enabled for this port.
1162 * @param ub_sock: socket with address.
1163 * @return false on failure. list in unchanged then.
1164 */
1165 static int
port_insert(struct listen_port ** list,int s,enum listen_type ftype,int pp2_enabled,struct unbound_socket * ub_sock)1166 port_insert(struct listen_port** list, int s, enum listen_type ftype,
1167 int pp2_enabled, struct unbound_socket* ub_sock)
1168 {
1169 struct listen_port* item = (struct listen_port*)malloc(
1170 sizeof(struct listen_port));
1171 if(!item)
1172 return 0;
1173 item->next = *list;
1174 item->fd = s;
1175 item->ftype = ftype;
1176 item->pp2_enabled = pp2_enabled;
1177 item->socket = ub_sock;
1178 *list = item;
1179 return 1;
1180 }
1181
1182 /** set fd to receive software timestamps */
1183 static int
set_recvtimestamp(int s)1184 set_recvtimestamp(int s)
1185 {
1186 #ifdef HAVE_LINUX_NET_TSTAMP_H
1187 int opt = SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE;
1188 if (setsockopt(s, SOL_SOCKET, SO_TIMESTAMPNS, (void*)&opt, (socklen_t)sizeof(opt)) < 0) {
1189 log_err("setsockopt(..., SO_TIMESTAMPNS, ...) failed: %s",
1190 strerror(errno));
1191 return 0;
1192 }
1193 return 1;
1194 #elif defined(SO_TIMESTAMP) && defined(SCM_TIMESTAMP)
1195 int on = 1;
1196 /* FreeBSD and also Linux. */
1197 if (setsockopt(s, SOL_SOCKET, SO_TIMESTAMP, (void*)&on, (socklen_t)sizeof(on)) < 0) {
1198 log_err("setsockopt(..., SO_TIMESTAMP, ...) failed: %s",
1199 strerror(errno));
1200 return 0;
1201 }
1202 return 1;
1203 #else
1204 log_err("packets timestamping is not supported on this platform");
1205 (void)s;
1206 return 0;
1207 #endif
1208 }
1209
1210 /** set fd to receive source address packet info */
1211 static int
set_recvpktinfo(int s,int family)1212 set_recvpktinfo(int s, int family)
1213 {
1214 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO)
1215 int on = 1;
1216 #else
1217 (void)s;
1218 #endif
1219 if(family == AF_INET6) {
1220 # ifdef IPV6_RECVPKTINFO
1221 if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1222 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1223 log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s",
1224 strerror(errno));
1225 return 0;
1226 }
1227 # elif defined(IPV6_PKTINFO)
1228 if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO,
1229 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1230 log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s",
1231 strerror(errno));
1232 return 0;
1233 }
1234 # else
1235 log_err("no IPV6_RECVPKTINFO and IPV6_PKTINFO options, please "
1236 "disable interface-automatic or do-ip6 in config");
1237 return 0;
1238 # endif /* defined IPV6_RECVPKTINFO */
1239
1240 } else if(family == AF_INET) {
1241 # ifdef IP_PKTINFO
1242 if(setsockopt(s, IPPROTO_IP, IP_PKTINFO,
1243 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1244 log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s",
1245 strerror(errno));
1246 return 0;
1247 }
1248 # elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)
1249 if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR,
1250 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1251 log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s",
1252 strerror(errno));
1253 return 0;
1254 }
1255 # else
1256 log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable "
1257 "interface-automatic or do-ip4 in config");
1258 return 0;
1259 # endif /* IP_PKTINFO */
1260
1261 }
1262 return 1;
1263 }
1264
1265 /**
1266 * Helper for ports_open. Creates one interface (or NULL for default).
1267 * @param ifname: The interface ip address.
1268 * @param do_auto: use automatic interface detection.
1269 * If enabled, then ifname must be the wildcard name.
1270 * @param do_udp: if udp should be used.
1271 * @param do_tcp: if tcp should be used.
1272 * @param hints: for getaddrinfo. family and flags have to be set by caller.
1273 * @param port: Port number to use.
1274 * @param list: list of open ports, appended to, changed to point to list head.
1275 * @param rcv: receive buffer size for UDP
1276 * @param snd: send buffer size for UDP
1277 * @param ssl_port: ssl service port number
1278 * @param tls_additional_port: list of additional ssl service port numbers.
1279 * @param https_port: DoH service port number
1280 * @param proxy_protocol_port: list of PROXYv2 port numbers.
1281 * @param reuseport: try to set SO_REUSEPORT if nonNULL and true.
1282 * set to false on exit if reuseport failed due to no kernel support.
1283 * @param transparent: set IP_TRANSPARENT socket option.
1284 * @param tcp_mss: maximum segment size of tcp socket. default if zero.
1285 * @param freebind: set IP_FREEBIND socket option.
1286 * @param http2_nodelay: set TCP_NODELAY on HTTP/2 connection
1287 * @param use_systemd: if true, fetch sockets from systemd.
1288 * @param dnscrypt_port: dnscrypt service port number
1289 * @param dscp: DSCP to use.
1290 * @param quic_port: dns over quic port number.
1291 * @param http_notls_downstream: if no tls is used for https downstream.
1292 * @param sock_queue_timeout: the sock_queue_timeout from config. Seconds to
1293 * wait to discard if UDP packets have waited for long in the socket
1294 * buffer.
1295 * @return: returns false on error.
1296 */
1297 static int
ports_create_if(const char * ifname,int do_auto,int do_udp,int do_tcp,struct addrinfo * hints,int port,struct listen_port ** list,size_t rcv,size_t snd,int ssl_port,struct config_strlist * tls_additional_port,int https_port,struct config_strlist * proxy_protocol_port,int * reuseport,int transparent,int tcp_mss,int freebind,int http2_nodelay,int use_systemd,int dnscrypt_port,int dscp,int quic_port,int http_notls_downstream,int sock_queue_timeout)1298 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
1299 struct addrinfo *hints, int port, struct listen_port** list,
1300 size_t rcv, size_t snd, int ssl_port,
1301 struct config_strlist* tls_additional_port, int https_port,
1302 struct config_strlist* proxy_protocol_port,
1303 int* reuseport, int transparent, int tcp_mss, int freebind,
1304 int http2_nodelay, int use_systemd, int dnscrypt_port, int dscp,
1305 int quic_port, int http_notls_downstream, int sock_queue_timeout)
1306 {
1307 int s, noip6=0;
1308 int is_ssl = if_is_ssl(ifname, port, ssl_port, tls_additional_port);
1309 int is_https = if_is_https(ifname, port, https_port);
1310 int is_dnscrypt = if_is_dnscrypt(ifname, port, dnscrypt_port);
1311 int is_pp2 = if_is_pp2(ifname, port, proxy_protocol_port);
1312 int is_doq = if_is_quic(ifname, port, quic_port);
1313 /* Always set TCP_NODELAY on TLS connection as it speeds up the TLS
1314 * handshake. DoH had already such option so we respect it.
1315 * Otherwise the server waits before sending more handshake data for
1316 * the client ACK (Nagle's algorithm), which is delayed because the
1317 * client waits for more data before ACKing (delayed ACK). */
1318 int nodelay = is_https?http2_nodelay:is_ssl;
1319 struct unbound_socket* ub_sock;
1320 const char* add = NULL;
1321
1322 if(!do_udp && !do_tcp)
1323 return 0;
1324
1325 if(is_pp2) {
1326 if(is_dnscrypt) {
1327 fatal_exit("PROXYv2 and DNSCrypt combination not "
1328 "supported!");
1329 } else if(is_https) {
1330 fatal_exit("PROXYv2 and DoH combination not "
1331 "supported!");
1332 } else if(is_doq) {
1333 fatal_exit("PROXYv2 and DoQ combination not "
1334 "supported!");
1335 }
1336 }
1337
1338 /* Check if both UDP and TCP ports should be open.
1339 * In the case of encrypted channels, probably an unencrypted channel
1340 * at the same port is not desired. */
1341 if((is_ssl || is_https) && !is_doq) do_udp = do_auto = 0;
1342 if((is_doq) && !(is_https || is_ssl)) do_tcp = 0;
1343
1344 if(do_auto) {
1345 ub_sock = calloc(1, sizeof(struct unbound_socket));
1346 if(!ub_sock)
1347 return 0;
1348 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
1349 &noip6, rcv, snd, reuseport, transparent,
1350 tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock,
1351 (is_dnscrypt?"udpancil_dnscrypt":"udpancil"))) == -1) {
1352 free(ub_sock->addr);
1353 free(ub_sock);
1354 if(noip6) {
1355 log_warn("IPv6 protocol not available");
1356 return 1;
1357 }
1358 return 0;
1359 }
1360 /* getting source addr packet info is highly non-portable */
1361 if(!set_recvpktinfo(s, hints->ai_family)) {
1362 sock_close(s);
1363 free(ub_sock->addr);
1364 free(ub_sock);
1365 return 0;
1366 }
1367 if (sock_queue_timeout && !set_recvtimestamp(s)) {
1368 log_warn("socket timestamping is not available");
1369 }
1370 if(!port_insert(list, s, is_dnscrypt
1371 ?listen_type_udpancil_dnscrypt:listen_type_udpancil,
1372 is_pp2, ub_sock)) {
1373 sock_close(s);
1374 free(ub_sock->addr);
1375 free(ub_sock);
1376 return 0;
1377 }
1378 } else if(do_udp) {
1379 enum listen_type udp_port_type;
1380 ub_sock = calloc(1, sizeof(struct unbound_socket));
1381 if(!ub_sock)
1382 return 0;
1383 if(is_dnscrypt) {
1384 udp_port_type = listen_type_udp_dnscrypt;
1385 add = "dnscrypt";
1386 } else if(is_doq) {
1387 udp_port_type = listen_type_doq;
1388 add = "doq";
1389 if(if_listens_on(ifname, port, 53, NULL)) {
1390 log_err("DNS over QUIC is strictly not "
1391 "allowed on port 53 as per RFC 9250. "
1392 "Port 53 is for DNS datagrams. Error "
1393 "for interface '%s'.", ifname);
1394 free(ub_sock->addr);
1395 free(ub_sock);
1396 return 0;
1397 }
1398 } else {
1399 udp_port_type = listen_type_udp;
1400 add = NULL;
1401 }
1402 /* regular udp socket */
1403 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
1404 &noip6, rcv, snd, reuseport, transparent,
1405 tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock,
1406 add)) == -1) {
1407 free(ub_sock->addr);
1408 free(ub_sock);
1409 if(noip6) {
1410 log_warn("IPv6 protocol not available");
1411 return 1;
1412 }
1413 return 0;
1414 }
1415 if(udp_port_type == listen_type_doq) {
1416 if(!set_recvpktinfo(s, hints->ai_family)) {
1417 sock_close(s);
1418 free(ub_sock->addr);
1419 free(ub_sock);
1420 return 0;
1421 }
1422 }
1423 if(udp_port_type == listen_type_udp && sock_queue_timeout)
1424 udp_port_type = listen_type_udpancil;
1425 if (sock_queue_timeout) {
1426 if(!set_recvtimestamp(s)) {
1427 log_warn("socket timestamping is not available");
1428 } else {
1429 if(udp_port_type == listen_type_udp)
1430 udp_port_type = listen_type_udpancil;
1431 }
1432 }
1433 if(!port_insert(list, s, udp_port_type, is_pp2, ub_sock)) {
1434 sock_close(s);
1435 free(ub_sock->addr);
1436 free(ub_sock);
1437 return 0;
1438 }
1439 }
1440 if(do_tcp) {
1441 enum listen_type port_type;
1442 ub_sock = calloc(1, sizeof(struct unbound_socket));
1443 if(!ub_sock)
1444 return 0;
1445 if(is_ssl) {
1446 port_type = listen_type_ssl;
1447 add = "tls";
1448 } else if(is_https) {
1449 port_type = listen_type_http;
1450 add = "https";
1451 if(http_notls_downstream)
1452 add = "http";
1453 } else if(is_dnscrypt) {
1454 port_type = listen_type_tcp_dnscrypt;
1455 add = "dnscrypt";
1456 } else {
1457 port_type = listen_type_tcp;
1458 add = NULL;
1459 }
1460 if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1,
1461 &noip6, 0, 0, reuseport, transparent, tcp_mss, nodelay,
1462 freebind, use_systemd, dscp, ub_sock, add)) == -1) {
1463 free(ub_sock->addr);
1464 free(ub_sock);
1465 if(noip6) {
1466 /*log_warn("IPv6 protocol not available");*/
1467 return 1;
1468 }
1469 return 0;
1470 }
1471 if(is_ssl)
1472 verbose(VERB_ALGO, "setup TCP for SSL service");
1473 if(!port_insert(list, s, port_type, is_pp2, ub_sock)) {
1474 sock_close(s);
1475 free(ub_sock->addr);
1476 free(ub_sock);
1477 return 0;
1478 }
1479 }
1480 return 1;
1481 }
1482
1483 /**
1484 * Add items to commpoint list in front.
1485 * @param c: commpoint to add.
1486 * @param front: listen struct.
1487 * @return: false on failure.
1488 */
1489 static int
listen_cp_insert(struct comm_point * c,struct listen_dnsport * front)1490 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front)
1491 {
1492 struct listen_list* item = (struct listen_list*)malloc(
1493 sizeof(struct listen_list));
1494 if(!item)
1495 return 0;
1496 item->com = c;
1497 item->next = front->cps;
1498 front->cps = item;
1499 return 1;
1500 }
1501
listen_setup_locks(void)1502 void listen_setup_locks(void)
1503 {
1504 if(!stream_wait_lock_inited) {
1505 lock_basic_init(&stream_wait_count_lock);
1506 stream_wait_lock_inited = 1;
1507 }
1508 if(!http2_query_buffer_lock_inited) {
1509 lock_basic_init(&http2_query_buffer_count_lock);
1510 http2_query_buffer_lock_inited = 1;
1511 }
1512 if(!http2_response_buffer_lock_inited) {
1513 lock_basic_init(&http2_response_buffer_count_lock);
1514 http2_response_buffer_lock_inited = 1;
1515 }
1516 }
1517
listen_desetup_locks(void)1518 void listen_desetup_locks(void)
1519 {
1520 if(stream_wait_lock_inited) {
1521 stream_wait_lock_inited = 0;
1522 lock_basic_destroy(&stream_wait_count_lock);
1523 }
1524 if(http2_query_buffer_lock_inited) {
1525 http2_query_buffer_lock_inited = 0;
1526 lock_basic_destroy(&http2_query_buffer_count_lock);
1527 }
1528 if(http2_response_buffer_lock_inited) {
1529 http2_response_buffer_lock_inited = 0;
1530 lock_basic_destroy(&http2_response_buffer_count_lock);
1531 }
1532 }
1533
1534 struct listen_dnsport*
listen_create(struct comm_base * base,struct listen_port * ports,size_t bufsize,int tcp_accept_count,int tcp_idle_timeout,int harden_large_queries,uint32_t http_max_streams,char * http_endpoint,int http_notls,struct tcl_list * tcp_conn_limit,void * dot_sslctx,void * doh_sslctx,void * quic_sslctx,struct dt_env * dtenv,struct doq_table * doq_table,struct ub_randstate * rnd,struct config_file * cfg,comm_point_callback_type * cb,void * cb_arg)1535 listen_create(struct comm_base* base, struct listen_port* ports,
1536 size_t bufsize, int tcp_accept_count, int tcp_idle_timeout,
1537 int harden_large_queries, uint32_t http_max_streams,
1538 char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit,
1539 void* dot_sslctx, void* doh_sslctx, void* quic_sslctx,
1540 struct dt_env* dtenv,
1541 struct doq_table* doq_table,
1542 struct ub_randstate* rnd,struct config_file* cfg,
1543 comm_point_callback_type* cb, void *cb_arg)
1544 {
1545 struct listen_dnsport* front = (struct listen_dnsport*)
1546 malloc(sizeof(struct listen_dnsport));
1547 if(!front)
1548 return NULL;
1549 front->cps = NULL;
1550 front->udp_buff = sldns_buffer_new(bufsize);
1551 #ifdef USE_DNSCRYPT
1552 front->dnscrypt_udp_buff = NULL;
1553 #endif
1554 if(!front->udp_buff) {
1555 free(front);
1556 return NULL;
1557 }
1558
1559 /* create comm points as needed */
1560 while(ports) {
1561 struct comm_point* cp = NULL;
1562 if(ports->ftype == listen_type_udp ||
1563 ports->ftype == listen_type_udp_dnscrypt) {
1564 cp = comm_point_create_udp(base, ports->fd,
1565 front->udp_buff, ports->pp2_enabled, cb,
1566 cb_arg, ports->socket);
1567 } else if(ports->ftype == listen_type_doq && doq_table) {
1568 #ifndef HAVE_NGTCP2
1569 log_warn("Unbound is not compiled with "
1570 "ngtcp2. This is required to use DNS "
1571 "over QUIC.");
1572 #endif
1573 cp = comm_point_create_doq(base, ports->fd,
1574 front->udp_buff, cb, cb_arg, ports->socket,
1575 doq_table, rnd, quic_sslctx, cfg);
1576 } else if(ports->ftype == listen_type_tcp ||
1577 ports->ftype == listen_type_tcp_dnscrypt) {
1578 cp = comm_point_create_tcp(base, ports->fd,
1579 tcp_accept_count, tcp_idle_timeout,
1580 harden_large_queries, 0, NULL,
1581 tcp_conn_limit, bufsize, front->udp_buff,
1582 ports->ftype, ports->pp2_enabled, cb, cb_arg,
1583 ports->socket);
1584 } else if(ports->ftype == listen_type_ssl ||
1585 ports->ftype == listen_type_http) {
1586 cp = comm_point_create_tcp(base, ports->fd,
1587 tcp_accept_count, tcp_idle_timeout,
1588 harden_large_queries,
1589 http_max_streams, http_endpoint,
1590 tcp_conn_limit, bufsize, front->udp_buff,
1591 ports->ftype, ports->pp2_enabled, cb, cb_arg,
1592 ports->socket);
1593 if(ports->ftype == listen_type_http) {
1594 if(!doh_sslctx && !http_notls) {
1595 log_warn("HTTPS port configured, but "
1596 "no TLS tls-service-key or "
1597 "tls-service-pem set");
1598 }
1599 #ifndef HAVE_SSL_CTX_SET_ALPN_SELECT_CB
1600 if(!http_notls) {
1601 log_warn("Unbound is not compiled "
1602 "with an OpenSSL version "
1603 "supporting ALPN "
1604 "(OpenSSL >= 1.0.2). This "
1605 "is required to use "
1606 "DNS-over-HTTPS");
1607 }
1608 #endif
1609 #ifndef HAVE_NGHTTP2_NGHTTP2_H
1610 log_warn("Unbound is not compiled with "
1611 "nghttp2. This is required to use "
1612 "DNS-over-HTTPS.");
1613 #endif
1614 }
1615 } else if(ports->ftype == listen_type_udpancil ||
1616 ports->ftype == listen_type_udpancil_dnscrypt) {
1617 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
1618 cp = comm_point_create_udp_ancil(base, ports->fd,
1619 front->udp_buff, ports->pp2_enabled, cb,
1620 cb_arg, ports->socket);
1621 #else
1622 log_warn("This system does not support UDP ancillary data.");
1623 #endif
1624 }
1625 if(!cp) {
1626 log_err("can't create commpoint");
1627 listen_delete(front);
1628 return NULL;
1629 }
1630 if((http_notls && ports->ftype == listen_type_http) ||
1631 (ports->ftype == listen_type_tcp) ||
1632 (ports->ftype == listen_type_udp) ||
1633 (ports->ftype == listen_type_udpancil) ||
1634 (ports->ftype == listen_type_tcp_dnscrypt) ||
1635 (ports->ftype == listen_type_udp_dnscrypt) ||
1636 (ports->ftype == listen_type_udpancil_dnscrypt)) {
1637 cp->ssl = NULL;
1638 } else if(ports->ftype == listen_type_doq) {
1639 cp->ssl = quic_sslctx;
1640 } else if(ports->ftype == listen_type_http) {
1641 cp->ssl = doh_sslctx;
1642 } else {
1643 cp->ssl = dot_sslctx;
1644 }
1645 cp->dtenv = dtenv;
1646 cp->do_not_close = 1;
1647 #ifdef USE_DNSCRYPT
1648 if (ports->ftype == listen_type_udp_dnscrypt ||
1649 ports->ftype == listen_type_tcp_dnscrypt ||
1650 ports->ftype == listen_type_udpancil_dnscrypt) {
1651 cp->dnscrypt = 1;
1652 cp->dnscrypt_buffer = sldns_buffer_new(bufsize);
1653 if(!cp->dnscrypt_buffer) {
1654 log_err("can't alloc dnscrypt_buffer");
1655 comm_point_delete(cp);
1656 listen_delete(front);
1657 return NULL;
1658 }
1659 front->dnscrypt_udp_buff = cp->dnscrypt_buffer;
1660 }
1661 #endif
1662 if(!listen_cp_insert(cp, front)) {
1663 log_err("malloc failed");
1664 comm_point_delete(cp);
1665 listen_delete(front);
1666 return NULL;
1667 }
1668 ports = ports->next;
1669 }
1670 if(!front->cps) {
1671 log_err("Could not open sockets to accept queries.");
1672 listen_delete(front);
1673 return NULL;
1674 }
1675
1676 return front;
1677 }
1678
1679 void
listen_list_delete(struct listen_list * list)1680 listen_list_delete(struct listen_list* list)
1681 {
1682 struct listen_list *p = list, *pn;
1683 while(p) {
1684 pn = p->next;
1685 comm_point_delete(p->com);
1686 free(p);
1687 p = pn;
1688 }
1689 }
1690
1691 void
listen_delete(struct listen_dnsport * front)1692 listen_delete(struct listen_dnsport* front)
1693 {
1694 if(!front)
1695 return;
1696 listen_list_delete(front->cps);
1697 #ifdef USE_DNSCRYPT
1698 if(front->dnscrypt_udp_buff &&
1699 front->udp_buff != front->dnscrypt_udp_buff) {
1700 sldns_buffer_free(front->dnscrypt_udp_buff);
1701 }
1702 #endif
1703 sldns_buffer_free(front->udp_buff);
1704 free(front);
1705 }
1706
1707 #ifdef HAVE_GETIFADDRS
1708 static int
resolve_ifa_name(struct ifaddrs * ifas,const char * search_ifa,char *** ip_addresses,int * ip_addresses_size)1709 resolve_ifa_name(struct ifaddrs *ifas, const char *search_ifa, char ***ip_addresses, int *ip_addresses_size)
1710 {
1711 struct ifaddrs *ifa;
1712 void *tmpbuf;
1713 int last_ip_addresses_size = *ip_addresses_size;
1714
1715 for(ifa = ifas; ifa != NULL; ifa = ifa->ifa_next) {
1716 sa_family_t family;
1717 const char* atsign;
1718 #ifdef INET6 /* | address ip | % | ifa name | @ | port | nul */
1719 char addr_buf[INET6_ADDRSTRLEN + 1 + IF_NAMESIZE + 1 + 16 + 1];
1720 #else
1721 char addr_buf[INET_ADDRSTRLEN + 1 + 16 + 1];
1722 #endif
1723
1724 if((atsign=strrchr(search_ifa, '@')) != NULL) {
1725 if(strlen(ifa->ifa_name) != (size_t)(atsign-search_ifa)
1726 || strncmp(ifa->ifa_name, search_ifa,
1727 atsign-search_ifa) != 0)
1728 continue;
1729 } else {
1730 if(strcmp(ifa->ifa_name, search_ifa) != 0)
1731 continue;
1732 atsign = "";
1733 }
1734
1735 if(ifa->ifa_addr == NULL)
1736 continue;
1737
1738 family = ifa->ifa_addr->sa_family;
1739 if(family == AF_INET) {
1740 char a4[INET_ADDRSTRLEN + 1];
1741 struct sockaddr_in *in4 = (struct sockaddr_in *)
1742 ifa->ifa_addr;
1743 if(!inet_ntop(family, &in4->sin_addr, a4, sizeof(a4))) {
1744 log_err("inet_ntop failed");
1745 return 0;
1746 }
1747 snprintf(addr_buf, sizeof(addr_buf), "%s%s",
1748 a4, atsign);
1749 }
1750 #ifdef INET6
1751 else if(family == AF_INET6) {
1752 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)
1753 ifa->ifa_addr;
1754 char a6[INET6_ADDRSTRLEN + 1];
1755 char if_index_name[IF_NAMESIZE + 1];
1756 if_index_name[0] = 0;
1757 if(!inet_ntop(family, &in6->sin6_addr, a6, sizeof(a6))) {
1758 log_err("inet_ntop failed");
1759 return 0;
1760 }
1761 (void)if_indextoname(in6->sin6_scope_id,
1762 (char *)if_index_name);
1763 if (strlen(if_index_name) != 0) {
1764 snprintf(addr_buf, sizeof(addr_buf),
1765 "%s%%%s%s", a6, if_index_name, atsign);
1766 } else {
1767 snprintf(addr_buf, sizeof(addr_buf), "%s%s",
1768 a6, atsign);
1769 }
1770 }
1771 #endif
1772 else {
1773 continue;
1774 }
1775 verbose(4, "interface %s has address %s", search_ifa, addr_buf);
1776
1777 tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1));
1778 if(!tmpbuf) {
1779 log_err("realloc failed: out of memory");
1780 return 0;
1781 } else {
1782 *ip_addresses = tmpbuf;
1783 }
1784 (*ip_addresses)[*ip_addresses_size] = strdup(addr_buf);
1785 if(!(*ip_addresses)[*ip_addresses_size]) {
1786 log_err("strdup failed: out of memory");
1787 return 0;
1788 }
1789 (*ip_addresses_size)++;
1790 }
1791
1792 if (*ip_addresses_size == last_ip_addresses_size) {
1793 tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1));
1794 if(!tmpbuf) {
1795 log_err("realloc failed: out of memory");
1796 return 0;
1797 } else {
1798 *ip_addresses = tmpbuf;
1799 }
1800 (*ip_addresses)[*ip_addresses_size] = strdup(search_ifa);
1801 if(!(*ip_addresses)[*ip_addresses_size]) {
1802 log_err("strdup failed: out of memory");
1803 return 0;
1804 }
1805 (*ip_addresses_size)++;
1806 }
1807 return 1;
1808 }
1809 #endif /* HAVE_GETIFADDRS */
1810
resolve_interface_names(char ** ifs,int num_ifs,struct config_strlist * list,char *** resif,int * num_resif)1811 int resolve_interface_names(char** ifs, int num_ifs,
1812 struct config_strlist* list, char*** resif, int* num_resif)
1813 {
1814 #ifdef HAVE_GETIFADDRS
1815 struct ifaddrs *addrs = NULL;
1816 if(num_ifs == 0 && list == NULL) {
1817 *resif = NULL;
1818 *num_resif = 0;
1819 return 1;
1820 }
1821 if(getifaddrs(&addrs) == -1) {
1822 log_err("failed to list interfaces: getifaddrs: %s",
1823 strerror(errno));
1824 freeifaddrs(addrs);
1825 return 0;
1826 }
1827 if(ifs) {
1828 int i;
1829 for(i=0; i<num_ifs; i++) {
1830 if(!resolve_ifa_name(addrs, ifs[i], resif, num_resif)) {
1831 freeifaddrs(addrs);
1832 config_del_strarray(*resif, *num_resif);
1833 *resif = NULL;
1834 *num_resif = 0;
1835 return 0;
1836 }
1837 }
1838 }
1839 if(list) {
1840 struct config_strlist* p;
1841 for(p = list; p; p = p->next) {
1842 if(!resolve_ifa_name(addrs, p->str, resif, num_resif)) {
1843 freeifaddrs(addrs);
1844 config_del_strarray(*resif, *num_resif);
1845 *resif = NULL;
1846 *num_resif = 0;
1847 return 0;
1848 }
1849 }
1850 }
1851 freeifaddrs(addrs);
1852 return 1;
1853 #else
1854 struct config_strlist* p;
1855 if(num_ifs == 0 && list == NULL) {
1856 *resif = NULL;
1857 *num_resif = 0;
1858 return 1;
1859 }
1860 *num_resif = num_ifs;
1861 for(p = list; p; p = p->next) {
1862 (*num_resif)++;
1863 }
1864 *resif = calloc(*num_resif, sizeof(**resif));
1865 if(!*resif) {
1866 log_err("out of memory");
1867 return 0;
1868 }
1869 if(ifs) {
1870 int i;
1871 for(i=0; i<num_ifs; i++) {
1872 (*resif)[i] = strdup(ifs[i]);
1873 if(!((*resif)[i])) {
1874 log_err("out of memory");
1875 config_del_strarray(*resif, *num_resif);
1876 *resif = NULL;
1877 *num_resif = 0;
1878 return 0;
1879 }
1880 }
1881 }
1882 if(list) {
1883 int idx = num_ifs;
1884 for(p = list; p; p = p->next) {
1885 (*resif)[idx] = strdup(p->str);
1886 if(!((*resif)[idx])) {
1887 log_err("out of memory");
1888 config_del_strarray(*resif, *num_resif);
1889 *resif = NULL;
1890 *num_resif = 0;
1891 return 0;
1892 }
1893 idx++;
1894 }
1895 }
1896 return 1;
1897 #endif /* HAVE_GETIFADDRS */
1898 }
1899
1900 struct listen_port*
listening_ports_open(struct config_file * cfg,char ** ifs,int num_ifs,int * reuseport)1901 listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs,
1902 int* reuseport)
1903 {
1904 struct listen_port* list = NULL;
1905 struct addrinfo hints;
1906 int i, do_ip4, do_ip6;
1907 int do_tcp, do_auto;
1908 do_ip4 = cfg->do_ip4;
1909 do_ip6 = cfg->do_ip6;
1910 do_tcp = cfg->do_tcp;
1911 do_auto = cfg->if_automatic && cfg->do_udp;
1912 if(cfg->incoming_num_tcp == 0)
1913 do_tcp = 0;
1914
1915 /* getaddrinfo */
1916 memset(&hints, 0, sizeof(hints));
1917 hints.ai_flags = AI_PASSIVE;
1918 /* no name lookups on our listening ports */
1919 if(num_ifs > 0)
1920 hints.ai_flags |= AI_NUMERICHOST;
1921 hints.ai_family = AF_UNSPEC;
1922 #ifndef INET6
1923 do_ip6 = 0;
1924 #endif
1925 if(!do_ip4 && !do_ip6) {
1926 return NULL;
1927 }
1928 /* create ip4 and ip6 ports so that return addresses are nice. */
1929 if(do_auto || num_ifs == 0) {
1930 if(do_auto && cfg->if_automatic_ports &&
1931 cfg->if_automatic_ports[0]!=0) {
1932 char* now = cfg->if_automatic_ports;
1933 while(now && *now) {
1934 char* after;
1935 int extraport;
1936 while(isspace((unsigned char)*now))
1937 now++;
1938 if(!*now)
1939 break;
1940 after = now;
1941 extraport = (int)strtol(now, &after, 10);
1942 if(extraport < 0 || extraport > 65535) {
1943 log_err("interface-automatic-ports port number out of range, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports);
1944 listening_ports_free(list);
1945 return NULL;
1946 }
1947 if(extraport == 0 && now == after) {
1948 log_err("interface-automatic-ports could not be parsed, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports);
1949 listening_ports_free(list);
1950 return NULL;
1951 }
1952 now = after;
1953 if(do_ip6) {
1954 hints.ai_family = AF_INET6;
1955 if(!ports_create_if("::0",
1956 do_auto, cfg->do_udp, do_tcp,
1957 &hints, extraport, &list,
1958 cfg->so_rcvbuf, cfg->so_sndbuf,
1959 cfg->ssl_port, cfg->tls_additional_port,
1960 cfg->https_port,
1961 cfg->proxy_protocol_port,
1962 reuseport, cfg->ip_transparent,
1963 cfg->tcp_mss, cfg->ip_freebind,
1964 cfg->http_nodelay, cfg->use_systemd,
1965 cfg->dnscrypt_port, cfg->ip_dscp,
1966 cfg->quic_port, cfg->http_notls_downstream,
1967 cfg->sock_queue_timeout)) {
1968 listening_ports_free(list);
1969 return NULL;
1970 }
1971 }
1972 if(do_ip4) {
1973 hints.ai_family = AF_INET;
1974 if(!ports_create_if("0.0.0.0",
1975 do_auto, cfg->do_udp, do_tcp,
1976 &hints, extraport, &list,
1977 cfg->so_rcvbuf, cfg->so_sndbuf,
1978 cfg->ssl_port, cfg->tls_additional_port,
1979 cfg->https_port,
1980 cfg->proxy_protocol_port,
1981 reuseport, cfg->ip_transparent,
1982 cfg->tcp_mss, cfg->ip_freebind,
1983 cfg->http_nodelay, cfg->use_systemd,
1984 cfg->dnscrypt_port, cfg->ip_dscp,
1985 cfg->quic_port, cfg->http_notls_downstream,
1986 cfg->sock_queue_timeout)) {
1987 listening_ports_free(list);
1988 return NULL;
1989 }
1990 }
1991 }
1992 return list;
1993 }
1994 if(do_ip6) {
1995 hints.ai_family = AF_INET6;
1996 if(!ports_create_if(do_auto?"::0":"::1",
1997 do_auto, cfg->do_udp, do_tcp,
1998 &hints, cfg->port, &list,
1999 cfg->so_rcvbuf, cfg->so_sndbuf,
2000 cfg->ssl_port, cfg->tls_additional_port,
2001 cfg->https_port, cfg->proxy_protocol_port,
2002 reuseport, cfg->ip_transparent,
2003 cfg->tcp_mss, cfg->ip_freebind,
2004 cfg->http_nodelay, cfg->use_systemd,
2005 cfg->dnscrypt_port, cfg->ip_dscp,
2006 cfg->quic_port, cfg->http_notls_downstream,
2007 cfg->sock_queue_timeout)) {
2008 listening_ports_free(list);
2009 return NULL;
2010 }
2011 }
2012 if(do_ip4) {
2013 hints.ai_family = AF_INET;
2014 if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1",
2015 do_auto, cfg->do_udp, do_tcp,
2016 &hints, cfg->port, &list,
2017 cfg->so_rcvbuf, cfg->so_sndbuf,
2018 cfg->ssl_port, cfg->tls_additional_port,
2019 cfg->https_port, cfg->proxy_protocol_port,
2020 reuseport, cfg->ip_transparent,
2021 cfg->tcp_mss, cfg->ip_freebind,
2022 cfg->http_nodelay, cfg->use_systemd,
2023 cfg->dnscrypt_port, cfg->ip_dscp,
2024 cfg->quic_port, cfg->http_notls_downstream,
2025 cfg->sock_queue_timeout)) {
2026 listening_ports_free(list);
2027 return NULL;
2028 }
2029 }
2030 } else for(i = 0; i<num_ifs; i++) {
2031 if(str_is_ip6(ifs[i])) {
2032 if(!do_ip6)
2033 continue;
2034 hints.ai_family = AF_INET6;
2035 if(!ports_create_if(ifs[i], 0, cfg->do_udp,
2036 do_tcp, &hints, cfg->port, &list,
2037 cfg->so_rcvbuf, cfg->so_sndbuf,
2038 cfg->ssl_port, cfg->tls_additional_port,
2039 cfg->https_port, cfg->proxy_protocol_port,
2040 reuseport, cfg->ip_transparent,
2041 cfg->tcp_mss, cfg->ip_freebind,
2042 cfg->http_nodelay, cfg->use_systemd,
2043 cfg->dnscrypt_port, cfg->ip_dscp,
2044 cfg->quic_port, cfg->http_notls_downstream,
2045 cfg->sock_queue_timeout)) {
2046 listening_ports_free(list);
2047 return NULL;
2048 }
2049 } else {
2050 if(!do_ip4)
2051 continue;
2052 hints.ai_family = AF_INET;
2053 if(!ports_create_if(ifs[i], 0, cfg->do_udp,
2054 do_tcp, &hints, cfg->port, &list,
2055 cfg->so_rcvbuf, cfg->so_sndbuf,
2056 cfg->ssl_port, cfg->tls_additional_port,
2057 cfg->https_port, cfg->proxy_protocol_port,
2058 reuseport, cfg->ip_transparent,
2059 cfg->tcp_mss, cfg->ip_freebind,
2060 cfg->http_nodelay, cfg->use_systemd,
2061 cfg->dnscrypt_port, cfg->ip_dscp,
2062 cfg->quic_port, cfg->http_notls_downstream,
2063 cfg->sock_queue_timeout)) {
2064 listening_ports_free(list);
2065 return NULL;
2066 }
2067 }
2068 }
2069
2070 return list;
2071 }
2072
listening_ports_free(struct listen_port * list)2073 void listening_ports_free(struct listen_port* list)
2074 {
2075 struct listen_port* nx;
2076 while(list) {
2077 nx = list->next;
2078 if(list->fd != -1) {
2079 sock_close(list->fd);
2080 }
2081 /* rc_ports don't have ub_socket */
2082 if(list->socket) {
2083 free(list->socket->addr);
2084 free(list->socket);
2085 }
2086 free(list);
2087 list = nx;
2088 }
2089 }
2090
listen_get_mem(struct listen_dnsport * listen)2091 size_t listen_get_mem(struct listen_dnsport* listen)
2092 {
2093 struct listen_list* p;
2094 size_t s = sizeof(*listen) + sizeof(*listen->base) +
2095 sizeof(*listen->udp_buff) +
2096 sldns_buffer_capacity(listen->udp_buff);
2097 #ifdef USE_DNSCRYPT
2098 s += sizeof(*listen->dnscrypt_udp_buff);
2099 if(listen->udp_buff != listen->dnscrypt_udp_buff){
2100 s += sldns_buffer_capacity(listen->dnscrypt_udp_buff);
2101 }
2102 #endif
2103 for(p = listen->cps; p; p = p->next) {
2104 s += sizeof(*p);
2105 s += comm_point_get_mem(p->com);
2106 }
2107 return s;
2108 }
2109
listen_stop_accept(struct listen_dnsport * listen)2110 void listen_stop_accept(struct listen_dnsport* listen)
2111 {
2112 /* do not stop the ones that have no tcp_free list
2113 * (they have already stopped listening) */
2114 struct listen_list* p;
2115 for(p=listen->cps; p; p=p->next) {
2116 if(p->com->type == comm_tcp_accept &&
2117 p->com->tcp_free != NULL) {
2118 comm_point_stop_listening(p->com);
2119 }
2120 }
2121 }
2122
listen_start_accept(struct listen_dnsport * listen)2123 void listen_start_accept(struct listen_dnsport* listen)
2124 {
2125 /* do not start the ones that have no tcp_free list, it is no
2126 * use to listen to them because they have no free tcp handlers */
2127 struct listen_list* p;
2128 for(p=listen->cps; p; p=p->next) {
2129 if(p->com->type == comm_tcp_accept &&
2130 p->com->tcp_free != NULL) {
2131 comm_point_start_listening(p->com, -1, -1);
2132 }
2133 }
2134 }
2135
2136 struct tcp_req_info*
tcp_req_info_create(struct sldns_buffer * spoolbuf)2137 tcp_req_info_create(struct sldns_buffer* spoolbuf)
2138 {
2139 struct tcp_req_info* req = (struct tcp_req_info*)malloc(sizeof(*req));
2140 if(!req) {
2141 log_err("malloc failure for new stream outoforder processing structure");
2142 return NULL;
2143 }
2144 memset(req, 0, sizeof(*req));
2145 req->spool_buffer = spoolbuf;
2146 return req;
2147 }
2148
2149 void
tcp_req_info_delete(struct tcp_req_info * req)2150 tcp_req_info_delete(struct tcp_req_info* req)
2151 {
2152 if(!req) return;
2153 tcp_req_info_clear(req);
2154 /* cp is pointer back to commpoint that owns this struct and
2155 * called delete on us */
2156 /* spool_buffer is shared udp buffer, not deleted here */
2157 free(req);
2158 }
2159
tcp_req_info_clear(struct tcp_req_info * req)2160 void tcp_req_info_clear(struct tcp_req_info* req)
2161 {
2162 struct tcp_req_open_item* open, *nopen;
2163 struct tcp_req_done_item* item, *nitem;
2164 if(!req) return;
2165
2166 /* free outstanding request mesh reply entries */
2167 open = req->open_req_list;
2168 while(open) {
2169 nopen = open->next;
2170 mesh_state_remove_reply(open->mesh, open->mesh_state, req->cp);
2171 free(open);
2172 open = nopen;
2173 }
2174 req->open_req_list = NULL;
2175 req->num_open_req = 0;
2176
2177 /* free pending writable result packets */
2178 item = req->done_req_list;
2179 while(item) {
2180 nitem = item->next;
2181 lock_basic_lock(&stream_wait_count_lock);
2182 stream_wait_count -= (sizeof(struct tcp_req_done_item)
2183 +item->len);
2184 lock_basic_unlock(&stream_wait_count_lock);
2185 free(item->buf);
2186 free(item);
2187 item = nitem;
2188 }
2189 req->done_req_list = NULL;
2190 req->num_done_req = 0;
2191 req->read_is_closed = 0;
2192 }
2193
2194 void
tcp_req_info_remove_mesh_state(struct tcp_req_info * req,struct mesh_state * m)2195 tcp_req_info_remove_mesh_state(struct tcp_req_info* req, struct mesh_state* m)
2196 {
2197 struct tcp_req_open_item* open, *prev = NULL;
2198 if(!req || !m) return;
2199 open = req->open_req_list;
2200 while(open) {
2201 if(open->mesh_state == m) {
2202 struct tcp_req_open_item* next;
2203 if(prev) prev->next = open->next;
2204 else req->open_req_list = open->next;
2205 /* caller has to manage the mesh state reply entry */
2206 next = open->next;
2207 free(open);
2208 req->num_open_req --;
2209
2210 /* prev = prev; */
2211 open = next;
2212 continue;
2213 }
2214 prev = open;
2215 open = open->next;
2216 }
2217 }
2218
2219 /** setup listening for read or write */
2220 static void
tcp_req_info_setup_listen(struct tcp_req_info * req)2221 tcp_req_info_setup_listen(struct tcp_req_info* req)
2222 {
2223 int wr = 0;
2224 int rd = 0;
2225
2226 if(req->cp->tcp_byte_count != 0) {
2227 /* cannot change, halfway through */
2228 return;
2229 }
2230
2231 if(!req->cp->tcp_is_reading)
2232 wr = 1;
2233 if(!req->read_is_closed)
2234 rd = 1;
2235
2236 if(wr) {
2237 req->cp->tcp_is_reading = 0;
2238 comm_point_stop_listening(req->cp);
2239 comm_point_start_listening(req->cp, -1,
2240 adjusted_tcp_timeout(req->cp));
2241 } else if(rd) {
2242 req->cp->tcp_is_reading = 1;
2243 comm_point_stop_listening(req->cp);
2244 comm_point_start_listening(req->cp, -1,
2245 adjusted_tcp_timeout(req->cp));
2246 /* and also read it (from SSL stack buffers), so
2247 * no event read event is expected since the remainder of
2248 * the TLS frame is sitting in the buffers. */
2249 req->read_again = 1;
2250 } else {
2251 comm_point_stop_listening(req->cp);
2252 comm_point_start_listening(req->cp, -1,
2253 adjusted_tcp_timeout(req->cp));
2254 comm_point_listen_for_rw(req->cp, 0, 0);
2255 }
2256 }
2257
2258 /** remove first item from list of pending results */
2259 static struct tcp_req_done_item*
tcp_req_info_pop_done(struct tcp_req_info * req)2260 tcp_req_info_pop_done(struct tcp_req_info* req)
2261 {
2262 struct tcp_req_done_item* item;
2263 log_assert(req->num_done_req > 0 && req->done_req_list);
2264 item = req->done_req_list;
2265 lock_basic_lock(&stream_wait_count_lock);
2266 stream_wait_count -= (sizeof(struct tcp_req_done_item)+item->len);
2267 lock_basic_unlock(&stream_wait_count_lock);
2268 req->done_req_list = req->done_req_list->next;
2269 req->num_done_req --;
2270 return item;
2271 }
2272
2273 /** Send given buffer and setup to write */
2274 static void
tcp_req_info_start_write_buf(struct tcp_req_info * req,uint8_t * buf,size_t len)2275 tcp_req_info_start_write_buf(struct tcp_req_info* req, uint8_t* buf,
2276 size_t len)
2277 {
2278 sldns_buffer_clear(req->cp->buffer);
2279 sldns_buffer_write(req->cp->buffer, buf, len);
2280 sldns_buffer_flip(req->cp->buffer);
2281
2282 req->cp->tcp_is_reading = 0; /* we are now writing */
2283 }
2284
2285 /** pick up the next result and start writing it to the channel */
2286 static void
tcp_req_pickup_next_result(struct tcp_req_info * req)2287 tcp_req_pickup_next_result(struct tcp_req_info* req)
2288 {
2289 if(req->num_done_req > 0) {
2290 /* unlist the done item from the list of pending results */
2291 struct tcp_req_done_item* item = tcp_req_info_pop_done(req);
2292 tcp_req_info_start_write_buf(req, item->buf, item->len);
2293 free(item->buf);
2294 free(item);
2295 }
2296 }
2297
2298 /** the read channel has closed */
2299 int
tcp_req_info_handle_read_close(struct tcp_req_info * req)2300 tcp_req_info_handle_read_close(struct tcp_req_info* req)
2301 {
2302 verbose(VERB_ALGO, "tcp channel read side closed %d", req->cp->fd);
2303 /* RFC 7766 6.2.4 says to drop pending replies when client closes. */
2304 return 0; /* drop connection */
2305 }
2306
2307 void
tcp_req_info_handle_writedone(struct tcp_req_info * req)2308 tcp_req_info_handle_writedone(struct tcp_req_info* req)
2309 {
2310 /* back to reading state, we finished this write event */
2311 sldns_buffer_clear(req->cp->buffer);
2312 if(req->num_done_req == 0 && req->read_is_closed) {
2313 /* no more to write and nothing to read, close it */
2314 comm_point_drop_reply(&req->cp->repinfo);
2315 return;
2316 }
2317 req->cp->tcp_is_reading = 1;
2318 /* see if another result needs writing */
2319 tcp_req_pickup_next_result(req);
2320
2321 /* see if there is more to write, if not stop_listening for writing */
2322 /* see if new requests are allowed, if so, start_listening
2323 * for reading */
2324 tcp_req_info_setup_listen(req);
2325 }
2326
2327 void
tcp_req_info_handle_readdone(struct tcp_req_info * req)2328 tcp_req_info_handle_readdone(struct tcp_req_info* req)
2329 {
2330 struct comm_point* c = req->cp;
2331
2332 /* we want to read up several requests, unless there are
2333 * pending answers */
2334
2335 req->is_drop = 0;
2336 req->is_reply = 0;
2337 req->in_worker_handle = 1;
2338 sldns_buffer_set_limit(req->spool_buffer, 0);
2339 /* handle the current request */
2340 /* this calls the worker handle request routine that could give
2341 * a cache response, or localdata response, or drop the reply,
2342 * or schedule a mesh entry for later */
2343 fptr_ok(fptr_whitelist_comm_point(c->callback));
2344 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
2345 req->in_worker_handle = 0;
2346 /* there is an answer, put it up. It is already in the
2347 * c->buffer, just send it. */
2348 /* since we were just reading a query, the channel is
2349 * clear to write to */
2350 send_it:
2351 c->tcp_is_reading = 0;
2352 comm_point_stop_listening(c);
2353 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c));
2354 return;
2355 }
2356 req->in_worker_handle = 0;
2357 /* it should be waiting in the mesh for recursion.
2358 * If mesh failed to add a new entry and called commpoint_drop_reply.
2359 * Then the mesh state has been cleared. */
2360 if(req->is_drop) {
2361 /* the reply has been dropped, stream has been closed. */
2362 return;
2363 }
2364 /* If mesh failed(mallocfail) and called commpoint_send_reply with
2365 * something like servfail then we pick up that reply below. */
2366 if(req->is_reply) {
2367 goto send_it;
2368 }
2369
2370 sldns_buffer_clear(c->buffer);
2371 /* if pending answers, pick up an answer and start sending it */
2372 tcp_req_pickup_next_result(req);
2373
2374 /* if answers pending, start sending answers */
2375 /* read more requests if we can have more requests */
2376 tcp_req_info_setup_listen(req);
2377 }
2378
2379 int
tcp_req_info_add_meshstate(struct tcp_req_info * req,struct mesh_area * mesh,struct mesh_state * m)2380 tcp_req_info_add_meshstate(struct tcp_req_info* req,
2381 struct mesh_area* mesh, struct mesh_state* m)
2382 {
2383 struct tcp_req_open_item* item;
2384 log_assert(req && mesh && m);
2385 item = (struct tcp_req_open_item*)malloc(sizeof(*item));
2386 if(!item) return 0;
2387 item->next = req->open_req_list;
2388 item->mesh = mesh;
2389 item->mesh_state = m;
2390 req->open_req_list = item;
2391 req->num_open_req++;
2392 return 1;
2393 }
2394
2395 /** Add a result to the result list. At the end. */
2396 static int
tcp_req_info_add_result(struct tcp_req_info * req,uint8_t * buf,size_t len)2397 tcp_req_info_add_result(struct tcp_req_info* req, uint8_t* buf, size_t len)
2398 {
2399 struct tcp_req_done_item* last = NULL;
2400 struct tcp_req_done_item* item;
2401 size_t space;
2402
2403 /* see if we have space */
2404 space = sizeof(struct tcp_req_done_item) + len;
2405 lock_basic_lock(&stream_wait_count_lock);
2406 if(stream_wait_count + space > stream_wait_max) {
2407 lock_basic_unlock(&stream_wait_count_lock);
2408 verbose(VERB_ALGO, "drop stream reply, no space left, in stream-wait-size");
2409 return 0;
2410 }
2411 stream_wait_count += space;
2412 lock_basic_unlock(&stream_wait_count_lock);
2413
2414 /* find last element */
2415 last = req->done_req_list;
2416 while(last && last->next)
2417 last = last->next;
2418
2419 /* create new element */
2420 item = (struct tcp_req_done_item*)malloc(sizeof(*item));
2421 if(!item) {
2422 log_err("malloc failure, for stream result list");
2423 return 0;
2424 }
2425 item->next = NULL;
2426 item->len = len;
2427 item->buf = memdup(buf, len);
2428 if(!item->buf) {
2429 free(item);
2430 log_err("malloc failure, adding reply to stream result list");
2431 return 0;
2432 }
2433
2434 /* link in */
2435 if(last) last->next = item;
2436 else req->done_req_list = item;
2437 req->num_done_req++;
2438 return 1;
2439 }
2440
2441 void
tcp_req_info_send_reply(struct tcp_req_info * req)2442 tcp_req_info_send_reply(struct tcp_req_info* req)
2443 {
2444 if(req->in_worker_handle) {
2445 /* reply from mesh is in the spool_buffer */
2446 /* copy now, so that the spool buffer is free for other tasks
2447 * before the callback is done */
2448 sldns_buffer_clear(req->cp->buffer);
2449 sldns_buffer_write(req->cp->buffer,
2450 sldns_buffer_begin(req->spool_buffer),
2451 sldns_buffer_limit(req->spool_buffer));
2452 sldns_buffer_flip(req->cp->buffer);
2453 req->is_reply = 1;
2454 return;
2455 }
2456 /* now that the query has been handled, that mesh_reply entry
2457 * should be removed, from the tcp_req_info list,
2458 * the mesh state cleanup removes then with region_cleanup and
2459 * replies_sent true. */
2460 /* see if we can send it straight away (we are not doing
2461 * anything else). If so, copy to buffer and start */
2462 if(req->cp->tcp_is_reading && req->cp->tcp_byte_count == 0) {
2463 /* buffer is free, and was ready to read new query into,
2464 * but we are now going to use it to send this answer */
2465 tcp_req_info_start_write_buf(req,
2466 sldns_buffer_begin(req->spool_buffer),
2467 sldns_buffer_limit(req->spool_buffer));
2468 /* switch to listen to write events */
2469 comm_point_stop_listening(req->cp);
2470 comm_point_start_listening(req->cp, -1,
2471 adjusted_tcp_timeout(req->cp));
2472 return;
2473 }
2474 /* queue up the answer behind the others already pending */
2475 if(!tcp_req_info_add_result(req, sldns_buffer_begin(req->spool_buffer),
2476 sldns_buffer_limit(req->spool_buffer))) {
2477 /* drop the connection, we are out of resources */
2478 comm_point_drop_reply(&req->cp->repinfo);
2479 }
2480 }
2481
tcp_req_info_get_stream_buffer_size(void)2482 size_t tcp_req_info_get_stream_buffer_size(void)
2483 {
2484 size_t s;
2485 if(!stream_wait_lock_inited)
2486 return stream_wait_count;
2487 lock_basic_lock(&stream_wait_count_lock);
2488 s = stream_wait_count;
2489 lock_basic_unlock(&stream_wait_count_lock);
2490 return s;
2491 }
2492
http2_get_query_buffer_size(void)2493 size_t http2_get_query_buffer_size(void)
2494 {
2495 size_t s;
2496 if(!http2_query_buffer_lock_inited)
2497 return http2_query_buffer_count;
2498 lock_basic_lock(&http2_query_buffer_count_lock);
2499 s = http2_query_buffer_count;
2500 lock_basic_unlock(&http2_query_buffer_count_lock);
2501 return s;
2502 }
2503
http2_get_response_buffer_size(void)2504 size_t http2_get_response_buffer_size(void)
2505 {
2506 size_t s;
2507 if(!http2_response_buffer_lock_inited)
2508 return http2_response_buffer_count;
2509 lock_basic_lock(&http2_response_buffer_count_lock);
2510 s = http2_response_buffer_count;
2511 lock_basic_unlock(&http2_response_buffer_count_lock);
2512 return s;
2513 }
2514
2515 #ifdef HAVE_NGHTTP2
2516 /** nghttp2 callback. Used to copy response from rbuffer to nghttp2 session */
http2_submit_response_read_callback(nghttp2_session * ATTR_UNUSED (session),int32_t stream_id,uint8_t * buf,size_t length,uint32_t * data_flags,nghttp2_data_source * source,void * ATTR_UNUSED (cb_arg))2517 static ssize_t http2_submit_response_read_callback(
2518 nghttp2_session* ATTR_UNUSED(session),
2519 int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags,
2520 nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg))
2521 {
2522 struct http2_stream* h2_stream;
2523 struct http2_session* h2_session = source->ptr;
2524 size_t copylen = length;
2525 if(!(h2_stream = nghttp2_session_get_stream_user_data(
2526 h2_session->session, stream_id))) {
2527 verbose(VERB_QUERY, "http2: cannot get stream data, closing "
2528 "stream");
2529 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2530 }
2531 if(!h2_stream->rbuffer ||
2532 sldns_buffer_remaining(h2_stream->rbuffer) == 0) {
2533 verbose(VERB_QUERY, "http2: cannot submit buffer. No data "
2534 "available in rbuffer");
2535 /* rbuffer will be free'd in frame close cb */
2536 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2537 }
2538
2539 if(copylen > sldns_buffer_remaining(h2_stream->rbuffer))
2540 copylen = sldns_buffer_remaining(h2_stream->rbuffer);
2541 if(copylen > SSIZE_MAX)
2542 copylen = SSIZE_MAX; /* will probably never happen */
2543
2544 memcpy(buf, sldns_buffer_current(h2_stream->rbuffer), copylen);
2545 sldns_buffer_skip(h2_stream->rbuffer, copylen);
2546
2547 if(sldns_buffer_remaining(h2_stream->rbuffer) == 0) {
2548 *data_flags |= NGHTTP2_DATA_FLAG_EOF;
2549 lock_basic_lock(&http2_response_buffer_count_lock);
2550 http2_response_buffer_count -=
2551 sldns_buffer_capacity(h2_stream->rbuffer);
2552 lock_basic_unlock(&http2_response_buffer_count_lock);
2553 sldns_buffer_free(h2_stream->rbuffer);
2554 h2_stream->rbuffer = NULL;
2555 }
2556
2557 return copylen;
2558 }
2559
2560 /**
2561 * Send RST_STREAM frame for stream.
2562 * @param h2_session: http2 session to submit frame to
2563 * @param h2_stream: http2 stream containing frame ID to use in RST_STREAM
2564 * @return 0 on error, 1 otherwise
2565 */
http2_submit_rst_stream(struct http2_session * h2_session,struct http2_stream * h2_stream)2566 static int http2_submit_rst_stream(struct http2_session* h2_session,
2567 struct http2_stream* h2_stream)
2568 {
2569 int ret = nghttp2_submit_rst_stream(h2_session->session,
2570 NGHTTP2_FLAG_NONE, h2_stream->stream_id,
2571 NGHTTP2_INTERNAL_ERROR);
2572 if(ret) {
2573 verbose(VERB_QUERY, "http2: nghttp2_submit_rst_stream failed, "
2574 "error: %s", nghttp2_strerror(ret));
2575 return 0;
2576 }
2577 return 1;
2578 }
2579
2580 /**
2581 * DNS response ready to be submitted to nghttp2, to be prepared for sending
2582 * out. Response is stored in c->buffer. Copy to rbuffer because the c->buffer
2583 * might be used before this will be sent out.
2584 * @param h2_session: http2 session, containing c->buffer which contains answer
2585 * @return 0 on error, 1 otherwise
2586 */
http2_submit_dns_response(struct http2_session * h2_session)2587 int http2_submit_dns_response(struct http2_session* h2_session)
2588 {
2589 int ret;
2590 nghttp2_data_provider data_prd;
2591 char status[4];
2592 nghttp2_nv headers[3];
2593 struct http2_stream* h2_stream = h2_session->c->h2_stream;
2594 size_t rlen;
2595 char rlen_str[32];
2596
2597 if(h2_stream->rbuffer) {
2598 log_err("http2 submit response error: rbuffer already "
2599 "exists");
2600 return 0;
2601 }
2602 if(sldns_buffer_remaining(h2_session->c->buffer) == 0) {
2603 log_err("http2 submit response error: c->buffer not complete");
2604 return 0;
2605 }
2606
2607 if(snprintf(status, 4, "%d", h2_stream->status) != 3) {
2608 verbose(VERB_QUERY, "http2: submit response error: "
2609 "invalid status");
2610 return 0;
2611 }
2612
2613 rlen = sldns_buffer_remaining(h2_session->c->buffer);
2614 snprintf(rlen_str, sizeof(rlen_str), "%u", (unsigned)rlen);
2615
2616 lock_basic_lock(&http2_response_buffer_count_lock);
2617 if(http2_response_buffer_count + rlen > http2_response_buffer_max) {
2618 lock_basic_unlock(&http2_response_buffer_count_lock);
2619 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2620 "in https-response-buffer-size");
2621 return http2_submit_rst_stream(h2_session, h2_stream);
2622 }
2623 http2_response_buffer_count += rlen;
2624 lock_basic_unlock(&http2_response_buffer_count_lock);
2625
2626 if(!(h2_stream->rbuffer = sldns_buffer_new(rlen))) {
2627 lock_basic_lock(&http2_response_buffer_count_lock);
2628 http2_response_buffer_count -= rlen;
2629 lock_basic_unlock(&http2_response_buffer_count_lock);
2630 log_err("http2 submit response error: malloc failure");
2631 return 0;
2632 }
2633
2634 headers[0].name = (uint8_t*)":status";
2635 headers[0].namelen = 7;
2636 headers[0].value = (uint8_t*)status;
2637 headers[0].valuelen = 3;
2638 headers[0].flags = NGHTTP2_NV_FLAG_NONE;
2639
2640 headers[1].name = (uint8_t*)"content-type";
2641 headers[1].namelen = 12;
2642 headers[1].value = (uint8_t*)"application/dns-message";
2643 headers[1].valuelen = 23;
2644 headers[1].flags = NGHTTP2_NV_FLAG_NONE;
2645
2646 headers[2].name = (uint8_t*)"content-length";
2647 headers[2].namelen = 14;
2648 headers[2].value = (uint8_t*)rlen_str;
2649 headers[2].valuelen = strlen(rlen_str);
2650 headers[2].flags = NGHTTP2_NV_FLAG_NONE;
2651
2652 sldns_buffer_write(h2_stream->rbuffer,
2653 sldns_buffer_current(h2_session->c->buffer),
2654 sldns_buffer_remaining(h2_session->c->buffer));
2655 sldns_buffer_flip(h2_stream->rbuffer);
2656
2657 data_prd.source.ptr = h2_session;
2658 data_prd.read_callback = http2_submit_response_read_callback;
2659 ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id,
2660 headers, 3, &data_prd);
2661 if(ret) {
2662 verbose(VERB_QUERY, "http2: set_stream_user_data failed, "
2663 "error: %s", nghttp2_strerror(ret));
2664 return 0;
2665 }
2666 return 1;
2667 }
2668 #else
http2_submit_dns_response(void * ATTR_UNUSED (v))2669 int http2_submit_dns_response(void* ATTR_UNUSED(v))
2670 {
2671 return 0;
2672 }
2673 #endif
2674
2675 #ifdef HAVE_NGHTTP2
2676 /** HTTP status to descriptive string */
http_status_to_str(enum http_status s)2677 static char* http_status_to_str(enum http_status s)
2678 {
2679 switch(s) {
2680 case HTTP_STATUS_OK:
2681 return "OK";
2682 case HTTP_STATUS_BAD_REQUEST:
2683 return "Bad Request";
2684 case HTTP_STATUS_NOT_FOUND:
2685 return "Not Found";
2686 case HTTP_STATUS_PAYLOAD_TOO_LARGE:
2687 return "Payload Too Large";
2688 case HTTP_STATUS_URI_TOO_LONG:
2689 return "URI Too Long";
2690 case HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE:
2691 return "Unsupported Media Type";
2692 case HTTP_STATUS_NOT_IMPLEMENTED:
2693 return "Not Implemented";
2694 }
2695 return "Status Unknown";
2696 }
2697
2698 /** nghttp2 callback. Used to copy error message to nghttp2 session */
http2_submit_error_read_callback(nghttp2_session * ATTR_UNUSED (session),int32_t stream_id,uint8_t * buf,size_t length,uint32_t * data_flags,nghttp2_data_source * source,void * ATTR_UNUSED (cb_arg))2699 static ssize_t http2_submit_error_read_callback(
2700 nghttp2_session* ATTR_UNUSED(session),
2701 int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags,
2702 nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg))
2703 {
2704 struct http2_stream* h2_stream;
2705 struct http2_session* h2_session = source->ptr;
2706 char* msg;
2707 if(!(h2_stream = nghttp2_session_get_stream_user_data(
2708 h2_session->session, stream_id))) {
2709 verbose(VERB_QUERY, "http2: cannot get stream data, closing "
2710 "stream");
2711 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2712 }
2713 *data_flags |= NGHTTP2_DATA_FLAG_EOF;
2714 msg = http_status_to_str(h2_stream->status);
2715 if(length < strlen(msg))
2716 return 0; /* not worth trying over multiple frames */
2717 memcpy(buf, msg, strlen(msg));
2718 return strlen(msg);
2719
2720 }
2721
2722 /**
2723 * HTTP error response ready to be submitted to nghttp2, to be prepared for
2724 * sending out. Message body will contain descriptive string for HTTP status.
2725 * @param h2_session: http2 session to submit to
2726 * @param h2_stream: http2 stream containing HTTP status to use for error
2727 * @return 0 on error, 1 otherwise
2728 */
http2_submit_error(struct http2_session * h2_session,struct http2_stream * h2_stream)2729 static int http2_submit_error(struct http2_session* h2_session,
2730 struct http2_stream* h2_stream)
2731 {
2732 int ret;
2733 char status[4];
2734 nghttp2_data_provider data_prd;
2735 nghttp2_nv headers[1]; /* will be copied by nghttp */
2736 if(snprintf(status, 4, "%d", h2_stream->status) != 3) {
2737 verbose(VERB_QUERY, "http2: submit error failed, "
2738 "invalid status");
2739 return 0;
2740 }
2741 headers[0].name = (uint8_t*)":status";
2742 headers[0].namelen = 7;
2743 headers[0].value = (uint8_t*)status;
2744 headers[0].valuelen = 3;
2745 headers[0].flags = NGHTTP2_NV_FLAG_NONE;
2746
2747 data_prd.source.ptr = h2_session;
2748 data_prd.read_callback = http2_submit_error_read_callback;
2749
2750 ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id,
2751 headers, 1, &data_prd);
2752 if(ret) {
2753 verbose(VERB_QUERY, "http2: submit error failed, "
2754 "error: %s", nghttp2_strerror(ret));
2755 return 0;
2756 }
2757 return 1;
2758 }
2759
2760 /**
2761 * Start query handling. Query is stored in the stream, and will be free'd here.
2762 * @param h2_session: http2 session, containing comm point
2763 * @param h2_stream: stream containing buffered query
2764 * @return: -1 on error, 1 if answer is stored in c->buffer, 0 if there is no
2765 * reply available (yet).
2766 */
http2_query_read_done(struct http2_session * h2_session,struct http2_stream * h2_stream)2767 static int http2_query_read_done(struct http2_session* h2_session,
2768 struct http2_stream* h2_stream)
2769 {
2770 log_assert(h2_stream->qbuffer);
2771
2772 if(h2_session->c->h2_stream) {
2773 verbose(VERB_ALGO, "http2_query_read_done failure: shared "
2774 "buffer already assigned to stream");
2775 return -1;
2776 }
2777
2778 /* the c->buffer might be used by mesh_send_reply and no be cleard
2779 * need to be cleared before use */
2780 sldns_buffer_clear(h2_session->c->buffer);
2781 if(sldns_buffer_remaining(h2_session->c->buffer) <
2782 sldns_buffer_remaining(h2_stream->qbuffer)) {
2783 /* qbuffer will be free'd in frame close cb */
2784 sldns_buffer_clear(h2_session->c->buffer);
2785 verbose(VERB_ALGO, "http2_query_read_done failure: can't fit "
2786 "qbuffer in c->buffer");
2787 return -1;
2788 }
2789
2790 sldns_buffer_write(h2_session->c->buffer,
2791 sldns_buffer_current(h2_stream->qbuffer),
2792 sldns_buffer_remaining(h2_stream->qbuffer));
2793
2794 lock_basic_lock(&http2_query_buffer_count_lock);
2795 http2_query_buffer_count -= sldns_buffer_capacity(h2_stream->qbuffer);
2796 lock_basic_unlock(&http2_query_buffer_count_lock);
2797 sldns_buffer_free(h2_stream->qbuffer);
2798 h2_stream->qbuffer = NULL;
2799
2800 sldns_buffer_flip(h2_session->c->buffer);
2801 h2_session->c->h2_stream = h2_stream;
2802 fptr_ok(fptr_whitelist_comm_point(h2_session->c->callback));
2803 if((*h2_session->c->callback)(h2_session->c, h2_session->c->cb_arg,
2804 NETEVENT_NOERROR, &h2_session->c->repinfo)) {
2805 return 1; /* answer in c->buffer */
2806 }
2807 sldns_buffer_clear(h2_session->c->buffer);
2808 h2_session->c->h2_stream = NULL;
2809 return 0; /* mesh state added, or dropped */
2810 }
2811
2812 /** nghttp2 callback. Used to check if the received frame indicates the end of a
2813 * stream. Gather collected request data and start query handling. */
http2_req_frame_recv_cb(nghttp2_session * session,const nghttp2_frame * frame,void * cb_arg)2814 static int http2_req_frame_recv_cb(nghttp2_session* session,
2815 const nghttp2_frame* frame, void* cb_arg)
2816 {
2817 struct http2_session* h2_session = (struct http2_session*)cb_arg;
2818 struct http2_stream* h2_stream;
2819 int query_read_done;
2820
2821 if((frame->hd.type != NGHTTP2_DATA &&
2822 frame->hd.type != NGHTTP2_HEADERS) ||
2823 !(frame->hd.flags & NGHTTP2_FLAG_END_STREAM)) {
2824 return 0;
2825 }
2826
2827 if(!(h2_stream = nghttp2_session_get_stream_user_data(
2828 session, frame->hd.stream_id)))
2829 return 0;
2830
2831 if(h2_stream->invalid_endpoint) {
2832 h2_stream->status = HTTP_STATUS_NOT_FOUND;
2833 goto submit_http_error;
2834 }
2835
2836 if(h2_stream->invalid_content_type) {
2837 h2_stream->status = HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE;
2838 goto submit_http_error;
2839 }
2840
2841 if(h2_stream->http_method != HTTP_METHOD_GET &&
2842 h2_stream->http_method != HTTP_METHOD_POST) {
2843 h2_stream->status = HTTP_STATUS_NOT_IMPLEMENTED;
2844 goto submit_http_error;
2845 }
2846
2847 if(h2_stream->query_too_large) {
2848 if(h2_stream->http_method == HTTP_METHOD_POST)
2849 h2_stream->status = HTTP_STATUS_PAYLOAD_TOO_LARGE;
2850 else
2851 h2_stream->status = HTTP_STATUS_URI_TOO_LONG;
2852 goto submit_http_error;
2853 }
2854
2855 if(!h2_stream->qbuffer) {
2856 h2_stream->status = HTTP_STATUS_BAD_REQUEST;
2857 goto submit_http_error;
2858 }
2859
2860 if(h2_stream->status) {
2861 submit_http_error:
2862 verbose(VERB_QUERY, "http2 request invalid, returning :status="
2863 "%d", h2_stream->status);
2864 if(!http2_submit_error(h2_session, h2_stream)) {
2865 return NGHTTP2_ERR_CALLBACK_FAILURE;
2866 }
2867 return 0;
2868 }
2869 h2_stream->status = HTTP_STATUS_OK;
2870
2871 sldns_buffer_flip(h2_stream->qbuffer);
2872 h2_session->postpone_drop = 1;
2873 query_read_done = http2_query_read_done(h2_session, h2_stream);
2874 h2_session->postpone_drop = 0;
2875 if(query_read_done < 0)
2876 return NGHTTP2_ERR_CALLBACK_FAILURE;
2877 else if(!query_read_done) {
2878 if(h2_session->is_drop) {
2879 /* connection needs to be closed. Return failure to make
2880 * sure no other action are taken anymore on comm point.
2881 * failure will result in reclaiming (and closing)
2882 * of comm point. */
2883 verbose(VERB_QUERY, "http2 query dropped in worker cb");
2884 return NGHTTP2_ERR_CALLBACK_FAILURE;
2885 }
2886 /* nothing to submit right now, query added to mesh. */
2887 return 0;
2888 }
2889 if(!http2_submit_dns_response(h2_session)) {
2890 sldns_buffer_clear(h2_session->c->buffer);
2891 h2_session->c->h2_stream = NULL;
2892 return NGHTTP2_ERR_CALLBACK_FAILURE;
2893 }
2894 verbose(VERB_QUERY, "http2 query submitted to session");
2895 sldns_buffer_clear(h2_session->c->buffer);
2896 h2_session->c->h2_stream = NULL;
2897 return 0;
2898 }
2899
2900 /** nghttp2 callback. Used to detect start of new streams. */
http2_req_begin_headers_cb(nghttp2_session * session,const nghttp2_frame * frame,void * cb_arg)2901 static int http2_req_begin_headers_cb(nghttp2_session* session,
2902 const nghttp2_frame* frame, void* cb_arg)
2903 {
2904 struct http2_session* h2_session = (struct http2_session*)cb_arg;
2905 struct http2_stream* h2_stream;
2906 int ret;
2907 if(frame->hd.type != NGHTTP2_HEADERS ||
2908 frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
2909 /* only interested in request headers */
2910 return 0;
2911 }
2912 if(!(h2_stream = http2_stream_create(frame->hd.stream_id))) {
2913 log_err("malloc failure while creating http2 stream");
2914 return NGHTTP2_ERR_CALLBACK_FAILURE;
2915 }
2916 http2_session_add_stream(h2_session, h2_stream);
2917 ret = nghttp2_session_set_stream_user_data(session,
2918 frame->hd.stream_id, h2_stream);
2919 if(ret) {
2920 /* stream does not exist */
2921 verbose(VERB_QUERY, "http2: set_stream_user_data failed, "
2922 "error: %s", nghttp2_strerror(ret));
2923 return NGHTTP2_ERR_CALLBACK_FAILURE;
2924 }
2925
2926 return 0;
2927 }
2928
2929 /**
2930 * base64url decode, store in qbuffer
2931 * @param h2_session: http2 session
2932 * @param h2_stream: http2 stream
2933 * @param start: start of the base64 string
2934 * @param length: length of the base64 string
2935 * @return: 0 on error, 1 otherwise. query will be stored in h2_stream->qbuffer,
2936 * buffer will be NULL is unparseble.
2937 */
http2_buffer_uri_query(struct http2_session * h2_session,struct http2_stream * h2_stream,const uint8_t * start,size_t length)2938 static int http2_buffer_uri_query(struct http2_session* h2_session,
2939 struct http2_stream* h2_stream, const uint8_t* start, size_t length)
2940 {
2941 size_t expectb64len;
2942 int b64len;
2943 if(h2_stream->http_method == HTTP_METHOD_POST)
2944 return 1;
2945 if(length == 0)
2946 return 1;
2947 if(h2_stream->qbuffer) {
2948 verbose(VERB_ALGO, "http2_req_header fail, "
2949 "qbuffer already set");
2950 return 0;
2951 }
2952
2953 /* calculate size, might be a bit bigger than the real
2954 * decoded buffer size */
2955 expectb64len = sldns_b64_pton_calculate_size(length);
2956 log_assert(expectb64len > 0);
2957 if(expectb64len >
2958 h2_session->c->http2_stream_max_qbuffer_size) {
2959 h2_stream->query_too_large = 1;
2960 return 1;
2961 }
2962
2963 lock_basic_lock(&http2_query_buffer_count_lock);
2964 if(http2_query_buffer_count + expectb64len > http2_query_buffer_max) {
2965 lock_basic_unlock(&http2_query_buffer_count_lock);
2966 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2967 "in http2-query-buffer-size");
2968 return http2_submit_rst_stream(h2_session, h2_stream);
2969 }
2970 http2_query_buffer_count += expectb64len;
2971 lock_basic_unlock(&http2_query_buffer_count_lock);
2972 if(!(h2_stream->qbuffer = sldns_buffer_new(expectb64len))) {
2973 lock_basic_lock(&http2_query_buffer_count_lock);
2974 http2_query_buffer_count -= expectb64len;
2975 lock_basic_unlock(&http2_query_buffer_count_lock);
2976 log_err("http2_req_header fail, qbuffer "
2977 "malloc failure");
2978 return 0;
2979 }
2980
2981 if(sldns_b64_contains_nonurl((char const*)start, length)) {
2982 char buf[65536+4];
2983 verbose(VERB_ALGO, "HTTP2 stream contains wrong b64 encoding");
2984 /* copy to the scratch buffer temporarily to terminate the
2985 * string with a zero */
2986 if(length+1 > sizeof(buf)) {
2987 /* too long */
2988 lock_basic_lock(&http2_query_buffer_count_lock);
2989 http2_query_buffer_count -= expectb64len;
2990 lock_basic_unlock(&http2_query_buffer_count_lock);
2991 sldns_buffer_free(h2_stream->qbuffer);
2992 h2_stream->qbuffer = NULL;
2993 return 1;
2994 }
2995 memmove(buf, start, length);
2996 buf[length] = 0;
2997 if(!(b64len = sldns_b64_pton(buf, sldns_buffer_current(
2998 h2_stream->qbuffer), expectb64len)) || b64len < 0) {
2999 lock_basic_lock(&http2_query_buffer_count_lock);
3000 http2_query_buffer_count -= expectb64len;
3001 lock_basic_unlock(&http2_query_buffer_count_lock);
3002 sldns_buffer_free(h2_stream->qbuffer);
3003 h2_stream->qbuffer = NULL;
3004 return 1;
3005 }
3006 } else {
3007 if(!(b64len = sldns_b64url_pton(
3008 (char const *)start, length,
3009 sldns_buffer_current(h2_stream->qbuffer),
3010 expectb64len)) || b64len < 0) {
3011 lock_basic_lock(&http2_query_buffer_count_lock);
3012 http2_query_buffer_count -= expectb64len;
3013 lock_basic_unlock(&http2_query_buffer_count_lock);
3014 sldns_buffer_free(h2_stream->qbuffer);
3015 h2_stream->qbuffer = NULL;
3016 /* return without error, method can be an
3017 * unknown POST */
3018 return 1;
3019 }
3020 }
3021 sldns_buffer_skip(h2_stream->qbuffer, (size_t)b64len);
3022 return 1;
3023 }
3024
3025 /** nghttp2 callback. Used to parse headers from HEADER frames. */
http2_req_header_cb(nghttp2_session * session,const nghttp2_frame * frame,const uint8_t * name,size_t namelen,const uint8_t * value,size_t valuelen,uint8_t ATTR_UNUSED (flags),void * cb_arg)3026 static int http2_req_header_cb(nghttp2_session* session,
3027 const nghttp2_frame* frame, const uint8_t* name, size_t namelen,
3028 const uint8_t* value, size_t valuelen, uint8_t ATTR_UNUSED(flags),
3029 void* cb_arg)
3030 {
3031 struct http2_stream* h2_stream = NULL;
3032 struct http2_session* h2_session = (struct http2_session*)cb_arg;
3033 /* nghttp2 deals with CONTINUATION frames and provides them as part of
3034 * the HEADER */
3035 if(frame->hd.type != NGHTTP2_HEADERS ||
3036 frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
3037 /* only interested in request headers */
3038 return 0;
3039 }
3040 if(!(h2_stream = nghttp2_session_get_stream_user_data(session,
3041 frame->hd.stream_id)))
3042 return 0;
3043
3044 /* earlier checks already indicate we can stop handling this query */
3045 if(h2_stream->http_method == HTTP_METHOD_UNSUPPORTED ||
3046 h2_stream->invalid_content_type ||
3047 h2_stream->invalid_endpoint)
3048 return 0;
3049
3050
3051 /* nghttp2 performs some sanity checks in the headers, including:
3052 * name and value are guaranteed to be null terminated
3053 * name is guaranteed to be lowercase
3054 * content-length value is guaranteed to contain digits
3055 */
3056
3057 if(!h2_stream->http_method && namelen == 7 &&
3058 memcmp(":method", name, namelen) == 0) {
3059 /* Case insensitive check on :method value to be on the safe
3060 * side. I failed to find text about case sensitivity in specs.
3061 */
3062 if(valuelen == 3 && strcasecmp("GET", (const char*)value) == 0)
3063 h2_stream->http_method = HTTP_METHOD_GET;
3064 else if(valuelen == 4 &&
3065 strcasecmp("POST", (const char*)value) == 0) {
3066 h2_stream->http_method = HTTP_METHOD_POST;
3067 if(h2_stream->qbuffer) {
3068 /* POST method uses query from DATA frames */
3069 lock_basic_lock(&http2_query_buffer_count_lock);
3070 http2_query_buffer_count -=
3071 sldns_buffer_capacity(h2_stream->qbuffer);
3072 lock_basic_unlock(&http2_query_buffer_count_lock);
3073 sldns_buffer_free(h2_stream->qbuffer);
3074 h2_stream->qbuffer = NULL;
3075 }
3076 } else
3077 h2_stream->http_method = HTTP_METHOD_UNSUPPORTED;
3078 return 0;
3079 }
3080 if(namelen == 5 && memcmp(":path", name, namelen) == 0) {
3081 /* :path may contain DNS query, depending on method. Method might
3082 * not be known yet here, so check after finishing receiving
3083 * stream. */
3084 #define HTTP_QUERY_PARAM "?dns="
3085 size_t el = strlen(h2_session->c->http_endpoint);
3086 size_t qpl = strlen(HTTP_QUERY_PARAM);
3087
3088 if(valuelen < el || memcmp(h2_session->c->http_endpoint,
3089 value, el) != 0) {
3090 h2_stream->invalid_endpoint = 1;
3091 return 0;
3092 }
3093 /* larger than endpoint only allowed if it is for the query
3094 * parameter */
3095 if(valuelen <= el+qpl ||
3096 memcmp(HTTP_QUERY_PARAM, value+el, qpl) != 0) {
3097 if(valuelen != el)
3098 h2_stream->invalid_endpoint = 1;
3099 return 0;
3100 }
3101
3102 if(!http2_buffer_uri_query(h2_session, h2_stream,
3103 value+(el+qpl), valuelen-(el+qpl))) {
3104 return NGHTTP2_ERR_CALLBACK_FAILURE;
3105 }
3106 return 0;
3107 }
3108 /* Content type is a SHOULD (rfc7231#section-3.1.1.5) when using POST,
3109 * and not needed when using GET. Don't enforce.
3110 * If set only allow lowercase "application/dns-message".
3111 *
3112 * Clients SHOULD (rfc8484#section-4.1) set an accept header, but MUST
3113 * be able to handle "application/dns-message". Since that is the only
3114 * content-type supported we can ignore the accept header.
3115 */
3116 if((namelen == 12 && memcmp("content-type", name, namelen) == 0)) {
3117 if(valuelen != 23 || memcmp("application/dns-message", value,
3118 valuelen) != 0) {
3119 h2_stream->invalid_content_type = 1;
3120 }
3121 }
3122
3123 /* Only interested in content-lentg for POST (on not yet known) method.
3124 */
3125 if((!h2_stream->http_method ||
3126 h2_stream->http_method == HTTP_METHOD_POST) &&
3127 !h2_stream->content_length && namelen == 14 &&
3128 memcmp("content-length", name, namelen) == 0) {
3129 if(valuelen > 5) {
3130 h2_stream->query_too_large = 1;
3131 return 0;
3132 }
3133 /* guaranteed to only contain digits and be null terminated */
3134 h2_stream->content_length = atoi((const char*)value);
3135 if(h2_stream->content_length >
3136 h2_session->c->http2_stream_max_qbuffer_size) {
3137 h2_stream->query_too_large = 1;
3138 return 0;
3139 }
3140 }
3141 return 0;
3142 }
3143
3144 /** nghttp2 callback. Used to get data from DATA frames, which can contain
3145 * queries in POST requests. */
http2_req_data_chunk_recv_cb(nghttp2_session * ATTR_UNUSED (session),uint8_t ATTR_UNUSED (flags),int32_t stream_id,const uint8_t * data,size_t len,void * cb_arg)3146 static int http2_req_data_chunk_recv_cb(nghttp2_session* ATTR_UNUSED(session),
3147 uint8_t ATTR_UNUSED(flags), int32_t stream_id, const uint8_t* data,
3148 size_t len, void* cb_arg)
3149 {
3150 struct http2_session* h2_session = (struct http2_session*)cb_arg;
3151 struct http2_stream* h2_stream;
3152 size_t qlen = 0;
3153
3154 if(!(h2_stream = nghttp2_session_get_stream_user_data(
3155 h2_session->session, stream_id))) {
3156 return 0;
3157 }
3158
3159 if(h2_stream->query_too_large)
3160 return 0;
3161
3162 if(!h2_stream->qbuffer) {
3163 if(h2_stream->content_length) {
3164 if(h2_stream->content_length < len)
3165 /* getting more data in DATA frame than
3166 * advertised in content-length header. */
3167 return NGHTTP2_ERR_CALLBACK_FAILURE;
3168 qlen = h2_stream->content_length;
3169 } else if(len <= h2_session->c->http2_stream_max_qbuffer_size) {
3170 /* setting this to msg-buffer-size can result in a lot
3171 * of memory consumption. Most queries should fit in a
3172 * single DATA frame, and most POST queries will
3173 * contain content-length which does not impose this
3174 * limit. */
3175 qlen = len;
3176 }
3177 }
3178 if(!h2_stream->qbuffer && qlen) {
3179 lock_basic_lock(&http2_query_buffer_count_lock);
3180 if(http2_query_buffer_count + qlen > http2_query_buffer_max) {
3181 lock_basic_unlock(&http2_query_buffer_count_lock);
3182 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
3183 "in http2-query-buffer-size");
3184 return http2_submit_rst_stream(h2_session, h2_stream);
3185 }
3186 http2_query_buffer_count += qlen;
3187 lock_basic_unlock(&http2_query_buffer_count_lock);
3188 if(!(h2_stream->qbuffer = sldns_buffer_new(qlen))) {
3189 lock_basic_lock(&http2_query_buffer_count_lock);
3190 http2_query_buffer_count -= qlen;
3191 lock_basic_unlock(&http2_query_buffer_count_lock);
3192 }
3193 }
3194
3195 if(!h2_stream->qbuffer ||
3196 sldns_buffer_remaining(h2_stream->qbuffer) < len) {
3197 verbose(VERB_ALGO, "http2 data_chunk_recv failed. Not enough "
3198 "buffer space for POST query. Can happen on multi "
3199 "frame requests without content-length header");
3200 h2_stream->query_too_large = 1;
3201 return 0;
3202 }
3203
3204 sldns_buffer_write(h2_stream->qbuffer, data, len);
3205
3206 return 0;
3207 }
3208
http2_req_stream_clear(struct http2_stream * h2_stream)3209 void http2_req_stream_clear(struct http2_stream* h2_stream)
3210 {
3211 if(h2_stream->qbuffer) {
3212 lock_basic_lock(&http2_query_buffer_count_lock);
3213 http2_query_buffer_count -=
3214 sldns_buffer_capacity(h2_stream->qbuffer);
3215 lock_basic_unlock(&http2_query_buffer_count_lock);
3216 sldns_buffer_free(h2_stream->qbuffer);
3217 h2_stream->qbuffer = NULL;
3218 }
3219 if(h2_stream->rbuffer) {
3220 lock_basic_lock(&http2_response_buffer_count_lock);
3221 http2_response_buffer_count -=
3222 sldns_buffer_capacity(h2_stream->rbuffer);
3223 lock_basic_unlock(&http2_response_buffer_count_lock);
3224 sldns_buffer_free(h2_stream->rbuffer);
3225 h2_stream->rbuffer = NULL;
3226 }
3227 }
3228
http2_req_callbacks_create(void)3229 nghttp2_session_callbacks* http2_req_callbacks_create(void)
3230 {
3231 nghttp2_session_callbacks *callbacks;
3232 if(nghttp2_session_callbacks_new(&callbacks) == NGHTTP2_ERR_NOMEM) {
3233 log_err("failed to initialize nghttp2 callback");
3234 return NULL;
3235 }
3236 /* reception of header block started, used to create h2_stream */
3237 nghttp2_session_callbacks_set_on_begin_headers_callback(callbacks,
3238 http2_req_begin_headers_cb);
3239 /* complete frame received, used to get data from stream if frame
3240 * has end stream flag, and start processing query */
3241 nghttp2_session_callbacks_set_on_frame_recv_callback(callbacks,
3242 http2_req_frame_recv_cb);
3243 /* get request info from headers */
3244 nghttp2_session_callbacks_set_on_header_callback(callbacks,
3245 http2_req_header_cb);
3246 /* get data from DATA frames, containing POST query */
3247 nghttp2_session_callbacks_set_on_data_chunk_recv_callback(callbacks,
3248 http2_req_data_chunk_recv_cb);
3249
3250 /* generic HTTP2 callbacks */
3251 nghttp2_session_callbacks_set_recv_callback(callbacks, http2_recv_cb);
3252 nghttp2_session_callbacks_set_send_callback(callbacks, http2_send_cb);
3253 nghttp2_session_callbacks_set_on_stream_close_callback(callbacks,
3254 http2_stream_close_cb);
3255
3256 return callbacks;
3257 }
3258 #endif /* HAVE_NGHTTP2 */
3259
3260 #ifdef HAVE_NGTCP2
3261 struct doq_table*
doq_table_create(struct config_file * cfg,struct ub_randstate * rnd)3262 doq_table_create(struct config_file* cfg, struct ub_randstate* rnd)
3263 {
3264 struct doq_table* table;
3265
3266 if (!cfg->quic_port)
3267 return NULL;
3268 table = calloc(1, sizeof(*table));
3269 if(!table)
3270 return NULL;
3271 #ifdef USE_NGTCP2_CRYPTO_OSSL
3272 /* Initialize the ossl crypto, it is harmless to call twice,
3273 * and this is before use of doq connections. */
3274 if(ngtcp2_crypto_ossl_init() != 0) {
3275 log_err("ngtcp2_crypto_ossl_init failed");
3276 free(table);
3277 return NULL;
3278 }
3279 #elif defined(HAVE_NGTCP2_CRYPTO_QUICTLS_INIT)
3280 if(ngtcp2_crypto_quictls_init() != 0) {
3281 log_err("ngtcp2_crypto_quictls_init failed");
3282 free(table);
3283 return NULL;
3284 }
3285 #endif
3286 table->idle_timeout = ((uint64_t)cfg->tcp_idle_timeout)*
3287 NGTCP2_MILLISECONDS;
3288 table->sv_scidlen = 16;
3289 table->static_secret_len = 16;
3290 table->static_secret = malloc(table->static_secret_len);
3291 if(!table->static_secret) {
3292 free(table);
3293 return NULL;
3294 }
3295 doq_fill_rand(rnd, table->static_secret, table->static_secret_len);
3296 table->conn_tree = rbtree_create(doq_conn_cmp);
3297 if(!table->conn_tree) {
3298 free(table->static_secret);
3299 free(table);
3300 return NULL;
3301 }
3302 table->conid_tree = rbtree_create(doq_conid_cmp);
3303 if(!table->conid_tree) {
3304 free(table->static_secret);
3305 free(table->conn_tree);
3306 free(table);
3307 return NULL;
3308 }
3309 table->timer_tree = rbtree_create(doq_timer_cmp);
3310 if(!table->timer_tree) {
3311 free(table->static_secret);
3312 free(table->conn_tree);
3313 free(table->conid_tree);
3314 free(table);
3315 return NULL;
3316 }
3317 lock_rw_init(&table->lock);
3318 lock_rw_init(&table->conid_lock);
3319 lock_basic_init(&table->size_lock);
3320 lock_protect(&table->lock, &table->static_secret,
3321 sizeof(table->static_secret));
3322 lock_protect(&table->lock, &table->static_secret_len,
3323 sizeof(table->static_secret_len));
3324 lock_protect(&table->lock, table->static_secret,
3325 table->static_secret_len);
3326 lock_protect(&table->lock, &table->sv_scidlen,
3327 sizeof(table->sv_scidlen));
3328 lock_protect(&table->lock, &table->idle_timeout,
3329 sizeof(table->idle_timeout));
3330 lock_protect(&table->lock, &table->conn_tree, sizeof(table->conn_tree));
3331 lock_protect(&table->lock, table->conn_tree, sizeof(*table->conn_tree));
3332 lock_protect(&table->conid_lock, table->conid_tree,
3333 sizeof(*table->conid_tree));
3334 lock_protect(&table->lock, table->timer_tree,
3335 sizeof(*table->timer_tree));
3336 lock_protect(&table->size_lock, &table->current_size,
3337 sizeof(table->current_size));
3338 return table;
3339 }
3340
3341 /** delete elements from the connection tree */
3342 static void
conn_tree_del(rbnode_type * node,void * arg)3343 conn_tree_del(rbnode_type* node, void* arg)
3344 {
3345 struct doq_table* table = (struct doq_table*)arg;
3346 struct doq_conn* conn;
3347 if(!node || !table)
3348 return;
3349 conn = (struct doq_conn*)node->key;
3350 if(conn->timer.timer_in_list) {
3351 /* Remove timer from list first, because finding the rbnode
3352 * element of the setlist of same timeouts needs tree lookup.
3353 * Edit the tree structure after that lookup. */
3354 doq_timer_list_remove(conn->table, &conn->timer);
3355 }
3356 if(conn->timer.timer_in_tree)
3357 doq_timer_tree_remove(conn->table, &conn->timer);
3358 doq_table_quic_size_subtract(table, sizeof(*conn)+conn->key.dcidlen);
3359 doq_conn_delete(conn, table);
3360 }
3361
3362 /** delete elements from the connection id tree */
3363 static void
conid_tree_del(rbnode_type * node,void * ATTR_UNUSED (arg))3364 conid_tree_del(rbnode_type* node, void* ATTR_UNUSED(arg))
3365 {
3366 if(!node)
3367 return;
3368 doq_conid_delete((struct doq_conid*)node->key);
3369 }
3370
3371 void
doq_table_delete(struct doq_table * table)3372 doq_table_delete(struct doq_table* table)
3373 {
3374 if(!table)
3375 return;
3376 lock_rw_destroy(&table->lock);
3377 free(table->static_secret);
3378 if(table->conn_tree) {
3379 traverse_postorder(table->conn_tree, conn_tree_del, table);
3380 free(table->conn_tree);
3381 }
3382 lock_rw_destroy(&table->conid_lock);
3383 if(table->conid_tree) {
3384 /* The tree should be empty, because the doq_conn_delete calls
3385 * above should have also removed their conid elements. */
3386 traverse_postorder(table->conid_tree, conid_tree_del, NULL);
3387 free(table->conid_tree);
3388 }
3389 lock_basic_destroy(&table->size_lock);
3390 if(table->timer_tree) {
3391 /* The tree should be empty, because the conn_tree_del calls
3392 * above should also have removed them. Also the doq_timer
3393 * is part of the doq_conn struct, so is already freed. */
3394 free(table->timer_tree);
3395 }
3396 table->write_list_first = NULL;
3397 table->write_list_last = NULL;
3398 free(table);
3399 }
3400
3401 struct doq_timer*
doq_timer_find_time(struct doq_table * table,struct timeval * tv)3402 doq_timer_find_time(struct doq_table* table, struct timeval* tv)
3403 {
3404 struct doq_timer key;
3405 struct rbnode_type* node;
3406 log_assert(table != NULL);
3407 memset(&key, 0, sizeof(key));
3408 key.time.tv_sec = tv->tv_sec;
3409 key.time.tv_usec = tv->tv_usec;
3410 node = rbtree_search(table->timer_tree, &key);
3411 if(node)
3412 return (struct doq_timer*)node->key;
3413 return NULL;
3414 }
3415
3416 void
doq_timer_tree_remove(struct doq_table * table,struct doq_timer * timer)3417 doq_timer_tree_remove(struct doq_table* table, struct doq_timer* timer)
3418 {
3419 if(!timer->timer_in_tree)
3420 return;
3421 rbtree_delete(table->timer_tree, timer);
3422 timer->timer_in_tree = 0;
3423 /* This item could have more timers in the same set. */
3424 if(timer->setlist_first) {
3425 struct doq_timer* rb_timer = timer->setlist_first;
3426 /* del first element from setlist */
3427 if(rb_timer->setlist_next)
3428 rb_timer->setlist_next->setlist_prev = NULL;
3429 else
3430 timer->setlist_last = NULL;
3431 timer->setlist_first = rb_timer->setlist_next;
3432 rb_timer->setlist_prev = NULL;
3433 rb_timer->setlist_next = NULL;
3434 rb_timer->timer_in_list = 0;
3435 /* insert it into the tree as new rb element */
3436 memset(&rb_timer->node, 0, sizeof(rb_timer->node));
3437 rb_timer->node.key = rb_timer;
3438 rbtree_insert(table->timer_tree, &rb_timer->node);
3439 rb_timer->timer_in_tree = 1;
3440 /* the setlist, if any remainder, moves to the rb element */
3441 rb_timer->setlist_first = timer->setlist_first;
3442 rb_timer->setlist_last = timer->setlist_last;
3443 timer->setlist_first = NULL;
3444 timer->setlist_last = NULL;
3445 rb_timer->worker_doq_socket = timer->worker_doq_socket;
3446 }
3447 timer->worker_doq_socket = NULL;
3448 }
3449
3450 void
doq_timer_list_remove(struct doq_table * table,struct doq_timer * timer)3451 doq_timer_list_remove(struct doq_table* table, struct doq_timer* timer)
3452 {
3453 struct doq_timer* rb_timer;
3454 if(!timer->timer_in_list)
3455 return;
3456 /* The item in the rbtree has the list start and end. */
3457 rb_timer = doq_timer_find_time(table, &timer->time);
3458 if(rb_timer) {
3459 if(timer->setlist_prev)
3460 timer->setlist_prev->setlist_next = timer->setlist_next;
3461 else
3462 rb_timer->setlist_first = timer->setlist_next;
3463 if(timer->setlist_next)
3464 timer->setlist_next->setlist_prev = timer->setlist_prev;
3465 else
3466 rb_timer->setlist_last = timer->setlist_prev;
3467 timer->setlist_prev = NULL;
3468 timer->setlist_next = NULL;
3469 }
3470 timer->timer_in_list = 0;
3471 }
3472
3473 /** doq append timer to setlist */
3474 static void
doq_timer_list_append(struct doq_timer * rb_timer,struct doq_timer * timer)3475 doq_timer_list_append(struct doq_timer* rb_timer, struct doq_timer* timer)
3476 {
3477 log_assert(timer->timer_in_list == 0);
3478 timer->timer_in_list = 1;
3479 timer->setlist_next = NULL;
3480 timer->setlist_prev = rb_timer->setlist_last;
3481 if(rb_timer->setlist_last)
3482 rb_timer->setlist_last->setlist_next = timer;
3483 else
3484 rb_timer->setlist_first = timer;
3485 rb_timer->setlist_last = timer;
3486 }
3487
3488 void
doq_timer_unset(struct doq_table * table,struct doq_timer * timer)3489 doq_timer_unset(struct doq_table* table, struct doq_timer* timer)
3490 {
3491 if(timer->timer_in_list) {
3492 /* Remove timer from list first, because finding the rbnode
3493 * element of the setlist of same timeouts needs tree lookup.
3494 * Edit the tree structure after that lookup. */
3495 doq_timer_list_remove(table, timer);
3496 }
3497 if(timer->timer_in_tree)
3498 doq_timer_tree_remove(table, timer);
3499 timer->worker_doq_socket = NULL;
3500 }
3501
doq_timer_set(struct doq_table * table,struct doq_timer * timer,struct doq_server_socket * worker_doq_socket,struct timeval * tv)3502 void doq_timer_set(struct doq_table* table, struct doq_timer* timer,
3503 struct doq_server_socket* worker_doq_socket, struct timeval* tv)
3504 {
3505 struct doq_timer* rb_timer;
3506 if(verbosity >= VERB_ALGO && timer->conn) {
3507 char a[256];
3508 struct timeval rel;
3509 addr_to_str((void*)&timer->conn->key.paddr.addr,
3510 timer->conn->key.paddr.addrlen, a, sizeof(a));
3511 timeval_subtract(&rel, tv, worker_doq_socket->now_tv);
3512 verbose(VERB_ALGO, "doq %s timer set %d.%6.6d in %d.%6.6d",
3513 a, (int)tv->tv_sec, (int)tv->tv_usec,
3514 (int)rel.tv_sec, (int)rel.tv_usec);
3515 }
3516 if(timer->timer_in_tree || timer->timer_in_list) {
3517 if(timer->time.tv_sec == tv->tv_sec &&
3518 timer->time.tv_usec == tv->tv_usec)
3519 return; /* already set on that time */
3520 doq_timer_unset(table, timer);
3521 }
3522 timer->time.tv_sec = tv->tv_sec;
3523 timer->time.tv_usec = tv->tv_usec;
3524 rb_timer = doq_timer_find_time(table, tv);
3525 if(rb_timer) {
3526 /* There is a timeout already with this value. Timer is
3527 * added to the setlist. */
3528 doq_timer_list_append(rb_timer, timer);
3529 } else {
3530 /* There is no timeout with this value. Make timer a new
3531 * tree element. */
3532 memset(&timer->node, 0, sizeof(timer->node));
3533 timer->node.key = timer;
3534 rbtree_insert(table->timer_tree, &timer->node);
3535 timer->timer_in_tree = 1;
3536 timer->setlist_first = NULL;
3537 timer->setlist_last = NULL;
3538 timer->worker_doq_socket = worker_doq_socket;
3539 }
3540 }
3541
3542 struct doq_conn*
doq_conn_create(struct comm_point * c,struct doq_pkt_addr * paddr,const uint8_t * dcid,size_t dcidlen,uint32_t version)3543 doq_conn_create(struct comm_point* c, struct doq_pkt_addr* paddr,
3544 const uint8_t* dcid, size_t dcidlen, uint32_t version)
3545 {
3546 struct doq_conn* conn = calloc(1, sizeof(*conn));
3547 if(!conn)
3548 return NULL;
3549 conn->node.key = conn;
3550 conn->doq_socket = c->doq_socket;
3551 conn->table = c->doq_socket->table;
3552 memmove(&conn->key.paddr.addr, &paddr->addr, paddr->addrlen);
3553 conn->key.paddr.addrlen = paddr->addrlen;
3554 memmove(&conn->key.paddr.localaddr, &paddr->localaddr,
3555 paddr->localaddrlen);
3556 conn->key.paddr.localaddrlen = paddr->localaddrlen;
3557 conn->key.paddr.ifindex = paddr->ifindex;
3558 conn->key.dcid = memdup((void*)dcid, dcidlen);
3559 if(!conn->key.dcid) {
3560 free(conn);
3561 return NULL;
3562 }
3563 conn->key.dcidlen = dcidlen;
3564 conn->version = version;
3565 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
3566 ngtcp2_ccerr_default(&conn->ccerr);
3567 #else
3568 ngtcp2_connection_close_error_default(&conn->last_error);
3569 #endif
3570 rbtree_init(&conn->stream_tree, &doq_stream_cmp);
3571 conn->timer.conn = conn;
3572 lock_basic_init(&conn->lock);
3573 lock_protect(&conn->lock, &conn->key, sizeof(conn->key));
3574 lock_protect(&conn->lock, &conn->doq_socket, sizeof(conn->doq_socket));
3575 lock_protect(&conn->lock, &conn->table, sizeof(conn->table));
3576 lock_protect(&conn->lock, &conn->is_deleted, sizeof(conn->is_deleted));
3577 lock_protect(&conn->lock, &conn->version, sizeof(conn->version));
3578 lock_protect(&conn->lock, &conn->conn, sizeof(conn->conn));
3579 lock_protect(&conn->lock, &conn->conid_list, sizeof(conn->conid_list));
3580 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
3581 lock_protect(&conn->lock, &conn->ccerr, sizeof(conn->ccerr));
3582 #else
3583 lock_protect(&conn->lock, &conn->last_error, sizeof(conn->last_error));
3584 #endif
3585 lock_protect(&conn->lock, &conn->tls_alert, sizeof(conn->tls_alert));
3586 lock_protect(&conn->lock, &conn->ssl, sizeof(conn->ssl));
3587 lock_protect(&conn->lock, &conn->close_pkt, sizeof(conn->close_pkt));
3588 lock_protect(&conn->lock, &conn->close_pkt_len, sizeof(conn->close_pkt_len));
3589 lock_protect(&conn->lock, &conn->close_ecn, sizeof(conn->close_ecn));
3590 lock_protect(&conn->lock, &conn->stream_tree, sizeof(conn->stream_tree));
3591 lock_protect(&conn->lock, &conn->stream_write_first, sizeof(conn->stream_write_first));
3592 lock_protect(&conn->lock, &conn->stream_write_last, sizeof(conn->stream_write_last));
3593 lock_protect(&conn->lock, &conn->write_interest, sizeof(conn->write_interest));
3594 lock_protect(&conn->lock, &conn->on_write_list, sizeof(conn->on_write_list));
3595 lock_protect(&conn->lock, &conn->write_prev, sizeof(conn->write_prev));
3596 lock_protect(&conn->lock, &conn->write_next, sizeof(conn->write_next));
3597 return conn;
3598 }
3599
3600 /** delete stream tree node */
3601 static void
stream_tree_del(rbnode_type * node,void * arg)3602 stream_tree_del(rbnode_type* node, void* arg)
3603 {
3604 struct doq_table* table = (struct doq_table*)arg;
3605 struct doq_stream* stream;
3606 if(!node)
3607 return;
3608 stream = (struct doq_stream*)node;
3609 if(stream->in)
3610 doq_table_quic_size_subtract(table, stream->inlen);
3611 if(stream->out)
3612 doq_table_quic_size_subtract(table, stream->outlen);
3613 doq_table_quic_size_subtract(table, sizeof(*stream));
3614 doq_stream_delete(stream);
3615 }
3616
3617 void
doq_conn_delete(struct doq_conn * conn,struct doq_table * table)3618 doq_conn_delete(struct doq_conn* conn, struct doq_table* table)
3619 {
3620 if(!conn)
3621 return;
3622 lock_basic_destroy(&conn->lock);
3623 lock_rw_wrlock(&conn->table->conid_lock);
3624 doq_conn_clear_conids(conn);
3625 lock_rw_unlock(&conn->table->conid_lock);
3626 /* Remove the app data from ngtcp2 before SSL_free of conn->ssl,
3627 * because the ngtcp2 conn is deleted. */
3628 SSL_set_app_data(conn->ssl, NULL);
3629 if(conn->stream_tree.count != 0) {
3630 traverse_postorder(&conn->stream_tree, stream_tree_del, table);
3631 }
3632 free(conn->key.dcid);
3633 SSL_free(conn->ssl);
3634 #ifdef USE_NGTCP2_CRYPTO_OSSL
3635 ngtcp2_crypto_ossl_ctx_del(conn->ossl_ctx);
3636 #endif
3637 ngtcp2_conn_del(conn->conn);
3638 free(conn->close_pkt);
3639 free(conn);
3640 }
3641
3642 int
doq_conn_cmp(const void * key1,const void * key2)3643 doq_conn_cmp(const void* key1, const void* key2)
3644 {
3645 struct doq_conn* c = (struct doq_conn*)key1;
3646 struct doq_conn* d = (struct doq_conn*)key2;
3647 int r;
3648 /* Compared in the order destination address, then
3649 * local address, ifindex and then dcid.
3650 * So that for a search for findlessorequal for the destination
3651 * address will find connections to that address, with different
3652 * dcids.
3653 * Also a printout in sorted order prints the connections by IP
3654 * address of destination, and then a number of them depending on the
3655 * dcids. */
3656 if(c->key.paddr.addrlen != d->key.paddr.addrlen) {
3657 if(c->key.paddr.addrlen < d->key.paddr.addrlen)
3658 return -1;
3659 return 1;
3660 }
3661 if((r=memcmp(&c->key.paddr.addr, &d->key.paddr.addr,
3662 c->key.paddr.addrlen))!=0)
3663 return r;
3664 if(c->key.paddr.localaddrlen != d->key.paddr.localaddrlen) {
3665 if(c->key.paddr.localaddrlen < d->key.paddr.localaddrlen)
3666 return -1;
3667 return 1;
3668 }
3669 if((r=memcmp(&c->key.paddr.localaddr, &d->key.paddr.localaddr,
3670 c->key.paddr.localaddrlen))!=0)
3671 return r;
3672 if(c->key.paddr.ifindex != d->key.paddr.ifindex) {
3673 if(c->key.paddr.ifindex < d->key.paddr.ifindex)
3674 return -1;
3675 return 1;
3676 }
3677 if(c->key.dcidlen != d->key.dcidlen) {
3678 if(c->key.dcidlen < d->key.dcidlen)
3679 return -1;
3680 return 1;
3681 }
3682 if((r=memcmp(c->key.dcid, d->key.dcid, c->key.dcidlen))!=0)
3683 return r;
3684 return 0;
3685 }
3686
doq_conid_cmp(const void * key1,const void * key2)3687 int doq_conid_cmp(const void* key1, const void* key2)
3688 {
3689 struct doq_conid* c = (struct doq_conid*)key1;
3690 struct doq_conid* d = (struct doq_conid*)key2;
3691 if(c->cidlen != d->cidlen) {
3692 if(c->cidlen < d->cidlen)
3693 return -1;
3694 return 1;
3695 }
3696 return memcmp(c->cid, d->cid, c->cidlen);
3697 }
3698
doq_timer_cmp(const void * key1,const void * key2)3699 int doq_timer_cmp(const void* key1, const void* key2)
3700 {
3701 struct doq_timer* e = (struct doq_timer*)key1;
3702 struct doq_timer* f = (struct doq_timer*)key2;
3703 if(e->time.tv_sec < f->time.tv_sec)
3704 return -1;
3705 if(e->time.tv_sec > f->time.tv_sec)
3706 return 1;
3707 if(e->time.tv_usec < f->time.tv_usec)
3708 return -1;
3709 if(e->time.tv_usec > f->time.tv_usec)
3710 return 1;
3711 return 0;
3712 }
3713
doq_stream_cmp(const void * key1,const void * key2)3714 int doq_stream_cmp(const void* key1, const void* key2)
3715 {
3716 struct doq_stream* c = (struct doq_stream*)key1;
3717 struct doq_stream* d = (struct doq_stream*)key2;
3718 if(c->stream_id != d->stream_id) {
3719 if(c->stream_id < d->stream_id)
3720 return -1;
3721 return 1;
3722 }
3723 return 0;
3724 }
3725
3726 /** doq store a local address in repinfo */
3727 static void
doq_repinfo_store_localaddr(struct comm_reply * repinfo,struct doq_addr_storage * localaddr,socklen_t localaddrlen)3728 doq_repinfo_store_localaddr(struct comm_reply* repinfo,
3729 struct doq_addr_storage* localaddr, socklen_t localaddrlen)
3730 {
3731 /* use the pktinfo that we have for ancillary udp data otherwise,
3732 * this saves space for a sockaddr */
3733 memset(&repinfo->pktinfo, 0, sizeof(repinfo->pktinfo));
3734 if(addr_is_ip6((void*)localaddr, localaddrlen)) {
3735 #ifdef IPV6_PKTINFO
3736 struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr;
3737 memmove(&repinfo->pktinfo.v6info.ipi6_addr,
3738 &sa6->sin6_addr, sizeof(struct in6_addr));
3739 repinfo->doq_srcport = sa6->sin6_port;
3740 #endif
3741 repinfo->srctype = 6;
3742 } else {
3743 #ifdef IP_PKTINFO
3744 struct sockaddr_in* sa = (struct sockaddr_in*)localaddr;
3745 memmove(&repinfo->pktinfo.v4info.ipi_addr,
3746 &sa->sin_addr, sizeof(struct in_addr));
3747 repinfo->doq_srcport = sa->sin_port;
3748 #elif defined(IP_RECVDSTADDR)
3749 struct sockaddr_in* sa = (struct sockaddr_in*)localaddr;
3750 memmove(&repinfo->pktinfo.v4addr, &sa->sin_addr,
3751 sizeof(struct in_addr));
3752 repinfo->doq_srcport = sa->sin_port;
3753 #endif
3754 repinfo->srctype = 4;
3755 }
3756 }
3757
3758 /** doq retrieve localaddr from repinfo */
3759 static void
doq_repinfo_retrieve_localaddr(struct comm_reply * repinfo,struct doq_addr_storage * localaddr,socklen_t * localaddrlen)3760 doq_repinfo_retrieve_localaddr(struct comm_reply* repinfo,
3761 struct doq_addr_storage* localaddr, socklen_t* localaddrlen)
3762 {
3763 if(repinfo->srctype == 6) {
3764 #ifdef IPV6_PKTINFO
3765 struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr;
3766 *localaddrlen = (socklen_t)sizeof(struct sockaddr_in6);
3767 memset(sa6, 0, *localaddrlen);
3768 sa6->sin6_family = AF_INET6;
3769 memmove(&sa6->sin6_addr, &repinfo->pktinfo.v6info.ipi6_addr,
3770 sizeof(struct in6_addr));
3771 sa6->sin6_port = repinfo->doq_srcport;
3772 #endif
3773 } else {
3774 #ifdef IP_PKTINFO
3775 struct sockaddr_in* sa = (struct sockaddr_in*)localaddr;
3776 *localaddrlen = (socklen_t)sizeof(struct sockaddr_in);
3777 memset(sa, 0, *localaddrlen);
3778 sa->sin_family = AF_INET;
3779 memmove(&sa->sin_addr, &repinfo->pktinfo.v4info.ipi_addr,
3780 sizeof(struct in_addr));
3781 sa->sin_port = repinfo->doq_srcport;
3782 #elif defined(IP_RECVDSTADDR)
3783 struct sockaddr_in* sa = (struct sockaddr_in*)localaddr;
3784 *localaddrlen = (socklen_t)sizeof(struct sockaddr_in);
3785 memset(sa, 0, *localaddrlen);
3786 sa->sin_family = AF_INET;
3787 memmove(&sa->sin_addr, &repinfo->pktinfo.v4addr,
3788 sizeof(struct in_addr));
3789 sa->sin_port = repinfo->doq_srcport;
3790 #endif
3791 }
3792 }
3793
3794 /** doq write a connection key into repinfo, false if it does not fit */
3795 static int
doq_conn_key_store_repinfo(struct doq_conn_key * key,struct comm_reply * repinfo)3796 doq_conn_key_store_repinfo(struct doq_conn_key* key,
3797 struct comm_reply* repinfo)
3798 {
3799 repinfo->is_proxied = 0;
3800 repinfo->doq_ifindex = key->paddr.ifindex;
3801 repinfo->remote_addrlen = key->paddr.addrlen;
3802 memmove(&repinfo->remote_addr, &key->paddr.addr,
3803 repinfo->remote_addrlen);
3804 repinfo->client_addrlen = key->paddr.addrlen;
3805 memmove(&repinfo->client_addr, &key->paddr.addr,
3806 repinfo->client_addrlen);
3807 doq_repinfo_store_localaddr(repinfo, &key->paddr.localaddr,
3808 key->paddr.localaddrlen);
3809 if(key->dcidlen > sizeof(repinfo->doq_dcid))
3810 return 0;
3811 repinfo->doq_dcidlen = key->dcidlen;
3812 memmove(repinfo->doq_dcid, key->dcid, key->dcidlen);
3813 return 1;
3814 }
3815
3816 void
doq_conn_key_from_repinfo(struct doq_conn_key * key,struct comm_reply * repinfo)3817 doq_conn_key_from_repinfo(struct doq_conn_key* key, struct comm_reply* repinfo)
3818 {
3819 key->paddr.ifindex = repinfo->doq_ifindex;
3820 key->paddr.addrlen = repinfo->remote_addrlen;
3821 memmove(&key->paddr.addr, &repinfo->remote_addr,
3822 repinfo->remote_addrlen);
3823 doq_repinfo_retrieve_localaddr(repinfo, &key->paddr.localaddr,
3824 &key->paddr.localaddrlen);
3825 key->dcidlen = repinfo->doq_dcidlen;
3826 key->dcid = repinfo->doq_dcid;
3827 }
3828
3829 /** doq add a stream to the connection */
3830 static void
doq_conn_add_stream(struct doq_conn * conn,struct doq_stream * stream)3831 doq_conn_add_stream(struct doq_conn* conn, struct doq_stream* stream)
3832 {
3833 (void)rbtree_insert(&conn->stream_tree, &stream->node);
3834 }
3835
3836 /** doq delete a stream from the connection */
3837 static void
doq_conn_del_stream(struct doq_conn * conn,struct doq_stream * stream)3838 doq_conn_del_stream(struct doq_conn* conn, struct doq_stream* stream)
3839 {
3840 (void)rbtree_delete(&conn->stream_tree, &stream->node);
3841 }
3842
3843 /** doq create new stream */
3844 static struct doq_stream*
doq_stream_create(int64_t stream_id)3845 doq_stream_create(int64_t stream_id)
3846 {
3847 struct doq_stream* stream = calloc(1, sizeof(*stream));
3848 if(!stream)
3849 return NULL;
3850 stream->node.key = stream;
3851 stream->stream_id = stream_id;
3852 return stream;
3853 }
3854
doq_stream_delete(struct doq_stream * stream)3855 void doq_stream_delete(struct doq_stream* stream)
3856 {
3857 if(!stream)
3858 return;
3859 free(stream->in);
3860 free(stream->out);
3861 free(stream);
3862 }
3863
3864 struct doq_stream*
doq_stream_find(struct doq_conn * conn,int64_t stream_id)3865 doq_stream_find(struct doq_conn* conn, int64_t stream_id)
3866 {
3867 rbnode_type* node;
3868 struct doq_stream key;
3869 key.node.key = &key;
3870 key.stream_id = stream_id;
3871 node = rbtree_search(&conn->stream_tree, &key);
3872 if(node)
3873 return (struct doq_stream*)node->key;
3874 return NULL;
3875 }
3876
3877 /** doq put stream on the conn write list */
3878 static void
doq_stream_on_write_list(struct doq_conn * conn,struct doq_stream * stream)3879 doq_stream_on_write_list(struct doq_conn* conn, struct doq_stream* stream)
3880 {
3881 if(stream->on_write_list)
3882 return;
3883 stream->write_prev = conn->stream_write_last;
3884 if(conn->stream_write_last)
3885 conn->stream_write_last->write_next = stream;
3886 else
3887 conn->stream_write_first = stream;
3888 conn->stream_write_last = stream;
3889 stream->write_next = NULL;
3890 stream->on_write_list = 1;
3891 }
3892
3893 /** doq remove stream from the conn write list */
3894 static void
doq_stream_off_write_list(struct doq_conn * conn,struct doq_stream * stream)3895 doq_stream_off_write_list(struct doq_conn* conn, struct doq_stream* stream)
3896 {
3897 if(!stream->on_write_list)
3898 return;
3899 if(stream->write_next)
3900 stream->write_next->write_prev = stream->write_prev;
3901 else conn->stream_write_last = stream->write_prev;
3902 if(stream->write_prev)
3903 stream->write_prev->write_next = stream->write_next;
3904 else conn->stream_write_first = stream->write_next;
3905 stream->write_prev = NULL;
3906 stream->write_next = NULL;
3907 stream->on_write_list = 0;
3908 }
3909
3910 /** doq stream remove in buffer */
3911 static void
doq_stream_remove_in_buffer(struct doq_stream * stream,struct doq_table * table)3912 doq_stream_remove_in_buffer(struct doq_stream* stream, struct doq_table* table)
3913 {
3914 if(stream->in) {
3915 doq_table_quic_size_subtract(table, stream->inlen);
3916 free(stream->in);
3917 stream->in = NULL;
3918 stream->inlen = 0;
3919 }
3920 }
3921
3922 /** doq stream remove out buffer */
3923 static void
doq_stream_remove_out_buffer(struct doq_stream * stream,struct doq_table * table)3924 doq_stream_remove_out_buffer(struct doq_stream* stream,
3925 struct doq_table* table)
3926 {
3927 if(stream->out) {
3928 doq_table_quic_size_subtract(table, stream->outlen);
3929 free(stream->out);
3930 stream->out = NULL;
3931 stream->outlen = 0;
3932 }
3933 }
3934
3935 int
doq_stream_close(struct doq_conn * conn,struct doq_stream * stream,int send_shutdown)3936 doq_stream_close(struct doq_conn* conn, struct doq_stream* stream,
3937 int send_shutdown)
3938 {
3939 int ret;
3940 if(stream->is_closed)
3941 return 1;
3942 stream->is_closed = 1;
3943 doq_stream_off_write_list(conn, stream);
3944 if(send_shutdown) {
3945 verbose(VERB_ALGO, "doq: shutdown stream_id %d with app_error_code %d",
3946 (int)stream->stream_id, (int)DOQ_APP_ERROR_CODE);
3947 ret = ngtcp2_conn_shutdown_stream(conn->conn,
3948 #ifdef HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4
3949 0,
3950 #endif
3951 stream->stream_id, DOQ_APP_ERROR_CODE);
3952 if(ret != 0) {
3953 log_err("doq ngtcp2_conn_shutdown_stream %d failed: %s",
3954 (int)stream->stream_id, ngtcp2_strerror(ret));
3955 return 0;
3956 }
3957 doq_conn_write_enable(conn);
3958 }
3959 verbose(VERB_ALGO, "doq: conn extend max streams bidi by 1");
3960 ngtcp2_conn_extend_max_streams_bidi(conn->conn, 1);
3961 doq_conn_write_enable(conn);
3962 doq_stream_remove_in_buffer(stream, conn->doq_socket->table);
3963 doq_stream_remove_out_buffer(stream, conn->doq_socket->table);
3964 doq_table_quic_size_subtract(conn->doq_socket->table, sizeof(*stream));
3965 doq_conn_del_stream(conn, stream);
3966 doq_stream_delete(stream);
3967 return 1;
3968 }
3969
3970 /** doq stream pick up answer data from buffer */
3971 static int
doq_stream_pickup_answer(struct doq_stream * stream,struct sldns_buffer * buf)3972 doq_stream_pickup_answer(struct doq_stream* stream, struct sldns_buffer* buf)
3973 {
3974 stream->is_answer_available = 1;
3975 if(stream->out) {
3976 free(stream->out);
3977 stream->out = NULL;
3978 stream->outlen = 0;
3979 }
3980 stream->nwrite = 0;
3981 stream->outlen = sldns_buffer_limit(buf);
3982 /* For quic the output bytes have to stay allocated and available,
3983 * for potential resends, until the remote end has acknowledged them.
3984 * This includes the tcplen start uint16_t, in outlen_wire. */
3985 stream->outlen_wire = htons(stream->outlen);
3986 stream->out = memdup(sldns_buffer_begin(buf), sldns_buffer_limit(buf));
3987 if(!stream->out) {
3988 log_err("doq could not send answer: out of memory");
3989 return 0;
3990 }
3991 return 1;
3992 }
3993
3994 int
doq_stream_send_reply(struct doq_conn * conn,struct doq_stream * stream,struct sldns_buffer * buf)3995 doq_stream_send_reply(struct doq_conn* conn, struct doq_stream* stream,
3996 struct sldns_buffer* buf)
3997 {
3998 if(verbosity >= VERB_ALGO) {
3999 char* s = sldns_wire2str_pkt(sldns_buffer_begin(buf),
4000 sldns_buffer_limit(buf));
4001 verbose(VERB_ALGO, "doq stream %d response\n%s",
4002 (int)stream->stream_id, (s?s:"null"));
4003 free(s);
4004 }
4005 if(stream->out)
4006 doq_table_quic_size_subtract(conn->doq_socket->table,
4007 stream->outlen);
4008 if(!doq_stream_pickup_answer(stream, buf))
4009 return 0;
4010 doq_table_quic_size_add(conn->doq_socket->table, stream->outlen);
4011 doq_stream_on_write_list(conn, stream);
4012 doq_conn_write_enable(conn);
4013 return 1;
4014 }
4015
4016 /** doq stream data length has completed, allocations can be done. False on
4017 * allocation failure. */
4018 static int
doq_stream_datalen_complete(struct doq_stream * stream,struct doq_table * table)4019 doq_stream_datalen_complete(struct doq_stream* stream, struct doq_table* table)
4020 {
4021 if(stream->inlen > 1024*1024) {
4022 log_err("doq stream in length too large %d",
4023 (int)stream->inlen);
4024 return 0;
4025 }
4026 stream->in = calloc(1, stream->inlen);
4027 if(!stream->in) {
4028 log_err("doq could not read stream, calloc failed: "
4029 "out of memory");
4030 return 0;
4031 }
4032 doq_table_quic_size_add(table, stream->inlen);
4033 return 1;
4034 }
4035
4036 /** doq stream data is complete, the input data has been received. */
4037 static int
doq_stream_data_complete(struct doq_conn * conn,struct doq_stream * stream)4038 doq_stream_data_complete(struct doq_conn* conn, struct doq_stream* stream)
4039 {
4040 struct comm_point* c;
4041 if(verbosity >= VERB_ALGO) {
4042 char* s = sldns_wire2str_pkt(stream->in, stream->inlen);
4043 char a[128];
4044 addr_to_str((void*)&conn->key.paddr.addr,
4045 conn->key.paddr.addrlen, a, sizeof(a));
4046 verbose(VERB_ALGO, "doq %s stream %d incoming query\n%s",
4047 a, (int)stream->stream_id, (s?s:"null"));
4048 free(s);
4049 }
4050 stream->is_query_complete = 1;
4051 c = conn->doq_socket->cp;
4052 if(!stream->in) {
4053 verbose(VERB_ALGO, "doq_stream_data_complete: no in buffer");
4054 return 0;
4055 }
4056 if(stream->inlen > sldns_buffer_capacity(c->buffer)) {
4057 verbose(VERB_ALGO, "doq_stream_data_complete: query too long");
4058 return 0;
4059 }
4060 sldns_buffer_clear(c->buffer);
4061 sldns_buffer_write(c->buffer, stream->in, stream->inlen);
4062 sldns_buffer_flip(c->buffer);
4063 c->repinfo.c = c;
4064 if(!doq_conn_key_store_repinfo(&conn->key, &c->repinfo)) {
4065 verbose(VERB_ALGO, "doq_stream_data_complete: connection "
4066 "DCID too long");
4067 return 0;
4068 }
4069 c->repinfo.doq_streamid = stream->stream_id;
4070 conn->doq_socket->current_conn = conn;
4071 fptr_ok(fptr_whitelist_comm_point(c->callback));
4072 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo)) {
4073 conn->doq_socket->current_conn = NULL;
4074 if(!doq_stream_send_reply(conn, stream, c->buffer)) {
4075 verbose(VERB_ALGO, "doq: failed to send_reply");
4076 return 0;
4077 }
4078 return 1;
4079 }
4080 conn->doq_socket->current_conn = NULL;
4081 return 1;
4082 }
4083
4084 /** doq receive data for a stream, more bytes of the incoming data */
4085 static int
doq_stream_recv_data(struct doq_stream * stream,const uint8_t * data,size_t datalen,int * recv_done,struct doq_table * table)4086 doq_stream_recv_data(struct doq_stream* stream, const uint8_t* data,
4087 size_t datalen, int* recv_done, struct doq_table* table)
4088 {
4089 int got_data = 0;
4090 /* read the tcplength uint16_t at the start */
4091 if(stream->nread < 2) {
4092 uint16_t tcplen = 0;
4093 size_t todolen = 2 - stream->nread;
4094
4095 if(stream->nread > 0) {
4096 /* put in the already read byte if there is one */
4097 tcplen = stream->inlen;
4098 }
4099 if(datalen < todolen)
4100 todolen = datalen;
4101 memmove(((uint8_t*)&tcplen)+stream->nread, data, todolen);
4102 stream->nread += todolen;
4103 data += todolen;
4104 datalen -= todolen;
4105 if(stream->nread == 2) {
4106 /* the initial length value is completed */
4107 stream->inlen = ntohs(tcplen);
4108 if(!doq_stream_datalen_complete(stream, table))
4109 return 0;
4110 } else {
4111 /* store for later */
4112 stream->inlen = tcplen;
4113 return 1;
4114 }
4115 }
4116 /* if there are more data bytes */
4117 if(datalen > 0) {
4118 size_t to_write = datalen;
4119 if(stream->nread-2 > stream->inlen) {
4120 verbose(VERB_ALGO, "doq stream buffer too small");
4121 return 0;
4122 }
4123 if(datalen > stream->inlen - (stream->nread-2))
4124 to_write = stream->inlen - (stream->nread-2);
4125 if(to_write > 0) {
4126 if(!stream->in) {
4127 verbose(VERB_ALGO, "doq: stream has "
4128 "no buffer");
4129 return 0;
4130 }
4131 memmove(stream->in+(stream->nread-2), data, to_write);
4132 stream->nread += to_write;
4133 data += to_write;
4134 datalen -= to_write;
4135 got_data = 1;
4136 }
4137 }
4138 /* Are there extra bytes received after the end? If so, log them. */
4139 if(datalen > 0) {
4140 if(verbosity >= VERB_ALGO)
4141 log_hex("doq stream has extra bytes received after end",
4142 (void*)data, datalen);
4143 }
4144 /* Is the input data complete? */
4145 if(got_data && stream->nread >= stream->inlen+2) {
4146 if(!stream->in) {
4147 verbose(VERB_ALGO, "doq: completed stream has "
4148 "no buffer");
4149 return 0;
4150 }
4151 *recv_done = 1;
4152 }
4153 return 1;
4154 }
4155
4156 /** doq receive FIN for a stream. No more bytes are going to arrive. */
4157 static int
doq_stream_recv_fin(struct doq_conn * conn,struct doq_stream * stream,int recv_done)4158 doq_stream_recv_fin(struct doq_conn* conn, struct doq_stream* stream, int
4159 recv_done)
4160 {
4161 if(!stream->is_query_complete && !recv_done) {
4162 verbose(VERB_ALGO, "doq: stream recv FIN, but is "
4163 "not complete, have %d of %d bytes",
4164 ((int)stream->nread)-2, (int)stream->inlen);
4165 if(!doq_stream_close(conn, stream, 1))
4166 return 0;
4167 }
4168 return 1;
4169 }
4170
doq_fill_rand(struct ub_randstate * rnd,uint8_t * buf,size_t len)4171 void doq_fill_rand(struct ub_randstate* rnd, uint8_t* buf, size_t len)
4172 {
4173 size_t i;
4174 for(i=0; i<len; i++)
4175 buf[i] = ub_random(rnd)&0xff;
4176 }
4177
4178 /** generate new connection id, checks for duplicates.
4179 * caller must hold lock on conid tree. */
4180 static int
doq_conn_generate_new_conid(struct doq_conn * conn,uint8_t * data,size_t datalen)4181 doq_conn_generate_new_conid(struct doq_conn* conn, uint8_t* data,
4182 size_t datalen)
4183 {
4184 int max_try = 100;
4185 int i;
4186 for(i=0; i<max_try; i++) {
4187 doq_fill_rand(conn->doq_socket->rnd, data, datalen);
4188 if(!doq_conid_find(conn->table, data, datalen)) {
4189 /* Found an unused connection id. */
4190 return 1;
4191 }
4192 }
4193 verbose(VERB_ALGO, "doq_conn_generate_new_conid failed: could not "
4194 "generate random unused connection id value in %d attempts.",
4195 max_try);
4196 return 0;
4197 }
4198
4199 /** ngtcp2 rand callback function */
4200 static void
doq_rand_cb(uint8_t * dest,size_t destlen,const ngtcp2_rand_ctx * rand_ctx)4201 doq_rand_cb(uint8_t* dest, size_t destlen, const ngtcp2_rand_ctx* rand_ctx)
4202 {
4203 struct ub_randstate* rnd = (struct ub_randstate*)
4204 rand_ctx->native_handle;
4205 doq_fill_rand(rnd, dest, destlen);
4206 }
4207
4208 /** ngtcp2 get_new_connection_id callback function */
4209 static int
doq_get_new_connection_id_cb(ngtcp2_conn * ATTR_UNUSED (conn),ngtcp2_cid * cid,uint8_t * token,size_t cidlen,void * user_data)4210 doq_get_new_connection_id_cb(ngtcp2_conn* ATTR_UNUSED(conn), ngtcp2_cid* cid,
4211 uint8_t* token, size_t cidlen, void* user_data)
4212 {
4213 struct doq_conn* doq_conn = (struct doq_conn*)user_data;
4214 /* Lock the conid tree, so we can check for duplicates while
4215 * generating the id, and then insert it, whilst keeping the tree
4216 * locked against other modifications, guaranteeing uniqueness. */
4217 lock_rw_wrlock(&doq_conn->table->conid_lock);
4218 if(!doq_conn_generate_new_conid(doq_conn, cid->data, cidlen)) {
4219 lock_rw_unlock(&doq_conn->table->conid_lock);
4220 return NGTCP2_ERR_CALLBACK_FAILURE;
4221 }
4222 cid->datalen = cidlen;
4223 if(ngtcp2_crypto_generate_stateless_reset_token(token,
4224 doq_conn->doq_socket->static_secret,
4225 doq_conn->doq_socket->static_secret_len, cid) != 0) {
4226 lock_rw_unlock(&doq_conn->table->conid_lock);
4227 return NGTCP2_ERR_CALLBACK_FAILURE;
4228 }
4229 if(!doq_conn_associate_conid(doq_conn, cid->data, cid->datalen)) {
4230 lock_rw_unlock(&doq_conn->table->conid_lock);
4231 return NGTCP2_ERR_CALLBACK_FAILURE;
4232 }
4233 lock_rw_unlock(&doq_conn->table->conid_lock);
4234 return 0;
4235 }
4236
4237 /** ngtcp2 remove_connection_id callback function */
4238 static int
doq_remove_connection_id_cb(ngtcp2_conn * ATTR_UNUSED (conn),const ngtcp2_cid * cid,void * user_data)4239 doq_remove_connection_id_cb(ngtcp2_conn* ATTR_UNUSED(conn),
4240 const ngtcp2_cid* cid, void* user_data)
4241 {
4242 struct doq_conn* doq_conn = (struct doq_conn*)user_data;
4243 lock_rw_wrlock(&doq_conn->table->conid_lock);
4244 doq_conn_dissociate_conid(doq_conn, cid->data, cid->datalen);
4245 lock_rw_unlock(&doq_conn->table->conid_lock);
4246 return 0;
4247 }
4248
4249 /** doq submit a new token */
4250 static int
doq_submit_new_token(struct doq_conn * conn)4251 doq_submit_new_token(struct doq_conn* conn)
4252 {
4253 uint8_t token[NGTCP2_CRYPTO_MAX_REGULAR_TOKENLEN];
4254 ngtcp2_ssize tokenlen;
4255 int ret;
4256 const ngtcp2_path* path = ngtcp2_conn_get_path(conn->conn);
4257 ngtcp2_tstamp ts = doq_get_timestamp_nanosec();
4258
4259 tokenlen = ngtcp2_crypto_generate_regular_token(token,
4260 conn->doq_socket->static_secret,
4261 conn->doq_socket->static_secret_len, path->remote.addr,
4262 path->remote.addrlen, ts);
4263 if(tokenlen < 0) {
4264 log_err("doq ngtcp2_crypto_generate_regular_token failed");
4265 return 1;
4266 }
4267
4268 verbose(VERB_ALGO, "doq submit new token");
4269 ret = ngtcp2_conn_submit_new_token(conn->conn, token, tokenlen);
4270 if(ret != 0) {
4271 log_err("doq ngtcp2_conn_submit_new_token failed: %s",
4272 ngtcp2_strerror(ret));
4273 return 0;
4274 }
4275 return 1;
4276 }
4277
4278 /** ngtcp2 handshake_completed callback function */
4279 static int
doq_handshake_completed_cb(ngtcp2_conn * ATTR_UNUSED (conn),void * user_data)4280 doq_handshake_completed_cb(ngtcp2_conn* ATTR_UNUSED(conn), void* user_data)
4281 {
4282 struct doq_conn* doq_conn = (struct doq_conn*)user_data;
4283 verbose(VERB_ALGO, "doq handshake_completed callback");
4284 verbose(VERB_ALGO, "ngtcp2_conn_get_max_data_left is %d",
4285 (int)ngtcp2_conn_get_max_data_left(doq_conn->conn));
4286 #ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI
4287 verbose(VERB_ALGO, "ngtcp2_conn_get_max_local_streams_uni is %d",
4288 (int)ngtcp2_conn_get_max_local_streams_uni(doq_conn->conn));
4289 #endif
4290 verbose(VERB_ALGO, "ngtcp2_conn_get_streams_uni_left is %d",
4291 (int)ngtcp2_conn_get_streams_uni_left(doq_conn->conn));
4292 verbose(VERB_ALGO, "ngtcp2_conn_get_streams_bidi_left is %d",
4293 (int)ngtcp2_conn_get_streams_bidi_left(doq_conn->conn));
4294 verbose(VERB_ALGO, "negotiated cipher name is %s",
4295 SSL_get_cipher_name(doq_conn->ssl));
4296 if(verbosity > VERB_ALGO) {
4297 const unsigned char* alpn = NULL;
4298 unsigned int alpnlen = 0;
4299 char alpnstr[128];
4300 SSL_get0_alpn_selected(doq_conn->ssl, &alpn, &alpnlen);
4301 if(alpnlen > sizeof(alpnstr)-1)
4302 alpnlen = sizeof(alpnstr)-1;
4303 memmove(alpnstr, alpn, alpnlen);
4304 alpnstr[alpnlen]=0;
4305 verbose(VERB_ALGO, "negotiated ALPN is '%s'", alpnstr);
4306 }
4307
4308 if(!doq_submit_new_token(doq_conn))
4309 return -1;
4310 return 0;
4311 }
4312
4313 /** ngtcp2 stream_open callback function */
4314 static int
doq_stream_open_cb(ngtcp2_conn * ATTR_UNUSED (conn),int64_t stream_id,void * user_data)4315 doq_stream_open_cb(ngtcp2_conn* ATTR_UNUSED(conn), int64_t stream_id,
4316 void* user_data)
4317 {
4318 struct doq_conn* doq_conn = (struct doq_conn*)user_data;
4319 struct doq_stream* stream;
4320 verbose(VERB_ALGO, "doq new stream %x", (int)stream_id);
4321 if(doq_stream_find(doq_conn, stream_id)) {
4322 verbose(VERB_ALGO, "doq: stream with this id already exists");
4323 return 0;
4324 }
4325 if(stream_id != 0 && stream_id != 4 && /* allow one stream on a new connection */
4326 !doq_table_quic_size_available(doq_conn->doq_socket->table,
4327 doq_conn->doq_socket->cfg, sizeof(*stream)
4328 + 100 /* estimated query in */
4329 + 512 /* estimated response out */
4330 )) {
4331 int rv;
4332 verbose(VERB_ALGO, "doq: no mem for new stream");
4333 rv = ngtcp2_conn_shutdown_stream(doq_conn->conn,
4334 #ifdef HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4
4335 0,
4336 #endif
4337 stream_id, NGTCP2_CONNECTION_REFUSED);
4338 if(rv != 0) {
4339 log_err("ngtcp2_conn_shutdown_stream failed: %s",
4340 ngtcp2_strerror(rv));
4341 return NGTCP2_ERR_CALLBACK_FAILURE;
4342 }
4343 return 0;
4344 }
4345 stream = doq_stream_create(stream_id);
4346 if(!stream) {
4347 log_err("doq: could not doq_stream_create: out of memory");
4348 return NGTCP2_ERR_CALLBACK_FAILURE;
4349 }
4350 doq_table_quic_size_add(doq_conn->doq_socket->table, sizeof(*stream));
4351 doq_conn_add_stream(doq_conn, stream);
4352 return 0;
4353 }
4354
4355 /** ngtcp2 recv_stream_data callback function */
4356 static int
doq_recv_stream_data_cb(ngtcp2_conn * ATTR_UNUSED (conn),uint32_t flags,int64_t stream_id,uint64_t offset,const uint8_t * data,size_t datalen,void * user_data,void * ATTR_UNUSED (stream_user_data))4357 doq_recv_stream_data_cb(ngtcp2_conn* ATTR_UNUSED(conn), uint32_t flags,
4358 int64_t stream_id, uint64_t offset, const uint8_t* data,
4359 size_t datalen, void* user_data, void* ATTR_UNUSED(stream_user_data))
4360 {
4361 int recv_done = 0;
4362 struct doq_conn* doq_conn = (struct doq_conn*)user_data;
4363 struct doq_stream* stream;
4364 verbose(VERB_ALGO, "doq recv stream data stream id %d offset %d "
4365 "datalen %d%s%s", (int)stream_id, (int)offset, (int)datalen,
4366 ((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0?" FIN":""),
4367 #ifdef NGTCP2_STREAM_DATA_FLAG_0RTT
4368 ((flags&NGTCP2_STREAM_DATA_FLAG_0RTT)!=0?" 0RTT":"")
4369 #else
4370 ((flags&NGTCP2_STREAM_DATA_FLAG_EARLY)!=0?" EARLY":"")
4371 #endif
4372 );
4373 stream = doq_stream_find(doq_conn, stream_id);
4374 if(!stream) {
4375 verbose(VERB_ALGO, "doq: received stream data for "
4376 "unknown stream %d", (int)stream_id);
4377 return 0;
4378 }
4379 if(stream->is_closed) {
4380 verbose(VERB_ALGO, "doq: stream is closed, ignore recv data");
4381 return 0;
4382 }
4383 if(datalen != 0) {
4384 if(!doq_stream_recv_data(stream, data, datalen, &recv_done,
4385 doq_conn->doq_socket->table))
4386 return NGTCP2_ERR_CALLBACK_FAILURE;
4387 }
4388 if((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0) {
4389 if(!doq_stream_recv_fin(doq_conn, stream, recv_done))
4390 return NGTCP2_ERR_CALLBACK_FAILURE;
4391 }
4392 ngtcp2_conn_extend_max_stream_offset(doq_conn->conn, stream_id,
4393 datalen);
4394 ngtcp2_conn_extend_max_offset(doq_conn->conn, datalen);
4395 if(recv_done) {
4396 if(!doq_stream_data_complete(doq_conn, stream))
4397 return NGTCP2_ERR_CALLBACK_FAILURE;
4398 }
4399 return 0;
4400 }
4401
4402 /** ngtcp2 stream_close callback function */
4403 static int
doq_stream_close_cb(ngtcp2_conn * ATTR_UNUSED (conn),uint32_t flags,int64_t stream_id,uint64_t app_error_code,void * user_data,void * ATTR_UNUSED (stream_user_data))4404 doq_stream_close_cb(ngtcp2_conn* ATTR_UNUSED(conn), uint32_t flags,
4405 int64_t stream_id, uint64_t app_error_code, void* user_data,
4406 void* ATTR_UNUSED(stream_user_data))
4407 {
4408 struct doq_conn* doq_conn = (struct doq_conn*)user_data;
4409 struct doq_stream* stream;
4410 if((flags&NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET)!=0)
4411 verbose(VERB_ALGO, "doq stream close for stream id %d %sapp_error_code %d",
4412 (int)stream_id,
4413 (((flags&NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET)!=0)?
4414 "APP_ERROR_CODE_SET ":""),
4415 (int)app_error_code);
4416 else
4417 verbose(VERB_ALGO, "doq stream close for stream id %d",
4418 (int)stream_id);
4419
4420 stream = doq_stream_find(doq_conn, stream_id);
4421 if(!stream) {
4422 verbose(VERB_ALGO, "doq: stream close for "
4423 "unknown stream %d", (int)stream_id);
4424 return 0;
4425 }
4426 if(!doq_stream_close(doq_conn, stream, 0))
4427 return NGTCP2_ERR_CALLBACK_FAILURE;
4428 return 0;
4429 }
4430
4431 /** ngtcp2 stream_reset callback function */
4432 static int
doq_stream_reset_cb(ngtcp2_conn * ATTR_UNUSED (conn),int64_t stream_id,uint64_t final_size,uint64_t app_error_code,void * user_data,void * ATTR_UNUSED (stream_user_data))4433 doq_stream_reset_cb(ngtcp2_conn* ATTR_UNUSED(conn), int64_t stream_id,
4434 uint64_t final_size, uint64_t app_error_code, void* user_data,
4435 void* ATTR_UNUSED(stream_user_data))
4436 {
4437 struct doq_conn* doq_conn = (struct doq_conn*)user_data;
4438 struct doq_stream* stream;
4439 verbose(VERB_ALGO, "doq stream reset for stream id %d final_size %d "
4440 "app_error_code %d", (int)stream_id, (int)final_size,
4441 (int)app_error_code);
4442
4443 stream = doq_stream_find(doq_conn, stream_id);
4444 if(!stream) {
4445 verbose(VERB_ALGO, "doq: stream reset for "
4446 "unknown stream %d", (int)stream_id);
4447 return 0;
4448 }
4449 if(!doq_stream_close(doq_conn, stream, 0))
4450 return NGTCP2_ERR_CALLBACK_FAILURE;
4451 return 0;
4452 }
4453
4454 /** ngtcp2 acked_stream_data_offset callback function */
4455 static int
doq_acked_stream_data_offset_cb(ngtcp2_conn * ATTR_UNUSED (conn),int64_t stream_id,uint64_t offset,uint64_t datalen,void * user_data,void * ATTR_UNUSED (stream_user_data))4456 doq_acked_stream_data_offset_cb(ngtcp2_conn* ATTR_UNUSED(conn),
4457 int64_t stream_id, uint64_t offset, uint64_t datalen, void* user_data,
4458 void* ATTR_UNUSED(stream_user_data))
4459 {
4460 struct doq_conn* doq_conn = (struct doq_conn*)user_data;
4461 struct doq_stream* stream;
4462 verbose(VERB_ALGO, "doq stream acked data for stream id %d offset %d "
4463 "datalen %d", (int)stream_id, (int)offset, (int)datalen);
4464
4465 stream = doq_stream_find(doq_conn, stream_id);
4466 if(!stream) {
4467 verbose(VERB_ALGO, "doq: stream acked data for "
4468 "unknown stream %d", (int)stream_id);
4469 return 0;
4470 }
4471 /* Acked the data from [offset .. offset+datalen). */
4472 if(stream->is_closed)
4473 return 0;
4474 if(offset+datalen >= stream->outlen) {
4475 doq_stream_remove_in_buffer(stream,
4476 doq_conn->doq_socket->table);
4477 doq_stream_remove_out_buffer(stream,
4478 doq_conn->doq_socket->table);
4479 }
4480 return 0;
4481 }
4482
4483 /** ngtc2p log_printf callback function */
4484 static void
doq_log_printf_cb(void * ATTR_UNUSED (user_data),const char * fmt,...)4485 doq_log_printf_cb(void* ATTR_UNUSED(user_data), const char* fmt, ...)
4486 {
4487 char buf[1024];
4488 va_list ap;
4489 va_start(ap, fmt);
4490 vsnprintf(buf, sizeof(buf), fmt, ap);
4491 verbose(VERB_ALGO, "libngtcp2: %s", buf);
4492 va_end(ap);
4493 }
4494
4495 #ifdef MAKE_QUIC_METHOD
4496 /** the doq application tx key callback, false on failure */
4497 static int
doq_application_tx_key_cb(struct doq_conn * conn)4498 doq_application_tx_key_cb(struct doq_conn* conn)
4499 {
4500 verbose(VERB_ALGO, "doq application tx key cb");
4501 /* The server does not want to open streams to the client,
4502 * the client instead initiates by opening bidi streams. */
4503 verbose(VERB_ALGO, "doq ngtcp2_conn_get_max_data_left is %d",
4504 (int)ngtcp2_conn_get_max_data_left(conn->conn));
4505 #ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI
4506 verbose(VERB_ALGO, "doq ngtcp2_conn_get_max_local_streams_uni is %d",
4507 (int)ngtcp2_conn_get_max_local_streams_uni(conn->conn));
4508 #endif
4509 verbose(VERB_ALGO, "doq ngtcp2_conn_get_streams_uni_left is %d",
4510 (int)ngtcp2_conn_get_streams_uni_left(conn->conn));
4511 verbose(VERB_ALGO, "doq ngtcp2_conn_get_streams_bidi_left is %d",
4512 (int)ngtcp2_conn_get_streams_bidi_left(conn->conn));
4513 return 1;
4514 }
4515
4516 /** quic_method set_encryption_secrets function */
4517 static int
doq_set_encryption_secrets(SSL * ssl,OSSL_ENCRYPTION_LEVEL ossl_level,const uint8_t * read_secret,const uint8_t * write_secret,size_t secret_len)4518 doq_set_encryption_secrets(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level,
4519 const uint8_t *read_secret, const uint8_t *write_secret,
4520 size_t secret_len)
4521 {
4522 struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl);
4523 #ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL
4524 ngtcp2_encryption_level
4525 #else
4526 ngtcp2_crypto_level
4527 #endif
4528 level =
4529 #ifdef USE_NGTCP2_CRYPTO_OSSL
4530 ngtcp2_crypto_ossl_from_ossl_encryption_level(ossl_level);
4531 #elif defined(HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL)
4532 ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level);
4533 #else
4534 ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level);
4535 #endif
4536
4537 if(read_secret) {
4538 verbose(VERB_ALGO, "doq: ngtcp2_crypto_derive_and_install_rx_key for level %d ossl %d", (int)level, (int)ossl_level);
4539 if(ngtcp2_crypto_derive_and_install_rx_key(doq_conn->conn,
4540 NULL, NULL, NULL, level, read_secret, secret_len)
4541 != 0) {
4542 log_err("ngtcp2_crypto_derive_and_install_rx_key "
4543 "failed");
4544 return 0;
4545 }
4546 }
4547
4548 if(write_secret) {
4549 verbose(VERB_ALGO, "doq: ngtcp2_crypto_derive_and_install_tx_key for level %d ossl %d", (int)level, (int)ossl_level);
4550 if(ngtcp2_crypto_derive_and_install_tx_key(doq_conn->conn,
4551 NULL, NULL, NULL, level, write_secret, secret_len)
4552 != 0) {
4553 log_err("ngtcp2_crypto_derive_and_install_tx_key "
4554 "failed");
4555 return 0;
4556 }
4557 if(level == NGTCP2_CRYPTO_LEVEL_APPLICATION) {
4558 if(!doq_application_tx_key_cb(doq_conn))
4559 return 0;
4560 }
4561 }
4562 return 1;
4563 }
4564
4565 /** quic_method add_handshake_data function */
4566 static int
doq_add_handshake_data(SSL * ssl,OSSL_ENCRYPTION_LEVEL ossl_level,const uint8_t * data,size_t len)4567 doq_add_handshake_data(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level,
4568 const uint8_t *data, size_t len)
4569 {
4570 struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl);
4571 #ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL
4572 ngtcp2_encryption_level
4573 #else
4574 ngtcp2_crypto_level
4575 #endif
4576 level =
4577 #ifdef USE_NGTCP2_CRYPTO_OSSL
4578 ngtcp2_crypto_ossl_from_ossl_encryption_level(ossl_level);
4579 #elif defined(HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL)
4580 ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level);
4581 #else
4582 ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level);
4583 #endif
4584 int rv;
4585
4586 verbose(VERB_ALGO, "doq_add_handshake_data: "
4587 "ngtcp2_con_submit_crypto_data level %d", (int)level);
4588 rv = ngtcp2_conn_submit_crypto_data(doq_conn->conn, level, data, len);
4589 if(rv != 0) {
4590 log_err("ngtcp2_conn_submit_crypto_data failed: %s",
4591 ngtcp2_strerror(rv));
4592 ngtcp2_conn_set_tls_error(doq_conn->conn, rv);
4593 return 0;
4594 }
4595 return 1;
4596 }
4597
4598 /** quic_method flush_flight function */
4599 static int
doq_flush_flight(SSL * ATTR_UNUSED (ssl))4600 doq_flush_flight(SSL* ATTR_UNUSED(ssl))
4601 {
4602 return 1;
4603 }
4604
4605 /** quic_method send_alert function */
4606 static int
doq_send_alert(SSL * ssl,enum ssl_encryption_level_t ATTR_UNUSED (level),uint8_t alert)4607 doq_send_alert(SSL *ssl, enum ssl_encryption_level_t ATTR_UNUSED(level),
4608 uint8_t alert)
4609 {
4610 struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl);
4611 doq_conn->tls_alert = alert;
4612 return 1;
4613 }
4614 #endif /* MAKE_QUIC_METHOD */
4615
4616 /** ALPN select callback for the doq SSL context */
4617 static int
doq_alpn_select_cb(SSL * ATTR_UNUSED (ssl),const unsigned char ** out,unsigned char * outlen,const unsigned char * in,unsigned int inlen,void * ATTR_UNUSED (arg))4618 doq_alpn_select_cb(SSL* ATTR_UNUSED(ssl), const unsigned char** out,
4619 unsigned char* outlen, const unsigned char* in, unsigned int inlen,
4620 void* ATTR_UNUSED(arg))
4621 {
4622 /* select "doq" */
4623 int ret = SSL_select_next_proto((void*)out, outlen,
4624 (const unsigned char*)"\x03""doq", 4, in, inlen);
4625 if(ret == OPENSSL_NPN_NEGOTIATED)
4626 return SSL_TLSEXT_ERR_OK;
4627 verbose(VERB_ALGO, "doq alpn_select_cb: ALPN from client does "
4628 "not have 'doq'");
4629 return SSL_TLSEXT_ERR_ALERT_FATAL;
4630 }
4631
quic_sslctx_create(char * key,char * pem,char * verifypem)4632 void* quic_sslctx_create(char* key, char* pem, char* verifypem)
4633 {
4634 #ifdef HAVE_NGTCP2
4635 char* sid_ctx = "unbound server";
4636 #ifdef MAKE_QUIC_METHOD
4637 SSL_QUIC_METHOD* quic_method;
4638 #endif
4639 SSL_CTX* ctx = SSL_CTX_new(TLS_server_method());
4640 if(!ctx) {
4641 log_crypto_err("Could not SSL_CTX_new");
4642 return NULL;
4643 }
4644 if(!key || key[0] == 0) {
4645 log_err("doq: error, no tls-service-key file specified");
4646 SSL_CTX_free(ctx);
4647 return NULL;
4648 }
4649 if(!pem || pem[0] == 0) {
4650 log_err("doq: error, no tls-service-pem file specified");
4651 SSL_CTX_free(ctx);
4652 return NULL;
4653 }
4654 SSL_CTX_set_options(ctx,
4655 (SSL_OP_ALL & ~SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS) |
4656 SSL_OP_SINGLE_ECDH_USE |
4657 SSL_OP_CIPHER_SERVER_PREFERENCE |
4658 SSL_OP_NO_ANTI_REPLAY);
4659 SSL_CTX_set_mode(ctx, SSL_MODE_RELEASE_BUFFERS);
4660 SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION);
4661 SSL_CTX_set_max_proto_version(ctx, TLS1_3_VERSION);
4662 #ifdef HAVE_SSL_CTX_SET_ALPN_SELECT_CB
4663 SSL_CTX_set_alpn_select_cb(ctx, doq_alpn_select_cb, NULL);
4664 #endif
4665 SSL_CTX_set_default_verify_paths(ctx);
4666 if(!SSL_CTX_use_certificate_chain_file(ctx, pem)) {
4667 log_err("doq: error for cert file: %s", pem);
4668 log_crypto_err("doq: error in "
4669 "SSL_CTX_use_certificate_chain_file");
4670 SSL_CTX_free(ctx);
4671 return NULL;
4672 }
4673 if(!SSL_CTX_use_PrivateKey_file(ctx, key, SSL_FILETYPE_PEM)) {
4674 log_err("doq: error for private key file: %s", key);
4675 log_crypto_err("doq: error in SSL_CTX_use_PrivateKey_file");
4676 SSL_CTX_free(ctx);
4677 return NULL;
4678 }
4679 if(!SSL_CTX_check_private_key(ctx)) {
4680 log_err("doq: error for key file: %s", key);
4681 log_crypto_err("doq: error in SSL_CTX_check_private_key");
4682 SSL_CTX_free(ctx);
4683 return NULL;
4684 }
4685 SSL_CTX_set_session_id_context(ctx, (void*)sid_ctx, strlen(sid_ctx));
4686 if(verifypem && verifypem[0]) {
4687 if(!SSL_CTX_load_verify_locations(ctx, verifypem, NULL)) {
4688 log_err("doq: error for verify pem file: %s",
4689 verifypem);
4690 log_crypto_err("doq: error in "
4691 "SSL_CTX_load_verify_locations");
4692 SSL_CTX_free(ctx);
4693 return NULL;
4694 }
4695 SSL_CTX_set_client_CA_list(ctx, SSL_load_client_CA_file(
4696 verifypem));
4697 SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER|
4698 SSL_VERIFY_CLIENT_ONCE|
4699 SSL_VERIFY_FAIL_IF_NO_PEER_CERT, NULL);
4700 }
4701
4702 SSL_CTX_set_max_early_data(ctx, 0xffffffff);
4703 #ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
4704 if(ngtcp2_crypto_quictls_configure_server_context(ctx) != 0) {
4705 log_err("ngtcp2_crypto_quictls_configure_server_context failed");
4706 SSL_CTX_free(ctx);
4707 return NULL;
4708 }
4709 #elif defined(MAKE_QUIC_METHOD)
4710 /* The quic_method needs to remain valid during the SSL_CTX
4711 * lifetime, so we allocate it. It is freed with the
4712 * doq_server_socket. */
4713 quic_method = calloc(1, sizeof(SSL_QUIC_METHOD));
4714 if(!quic_method) {
4715 log_err("calloc failed: out of memory");
4716 SSL_CTX_free(ctx);
4717 return NULL;
4718 }
4719 doq_socket->quic_method = quic_method;
4720 quic_method->set_encryption_secrets = doq_set_encryption_secrets;
4721 quic_method->add_handshake_data = doq_add_handshake_data;
4722 quic_method->flush_flight = doq_flush_flight;
4723 quic_method->send_alert = doq_send_alert;
4724 SSL_CTX_set_quic_method(ctx, doq_socket->quic_method);
4725 #endif
4726 return ctx;
4727 #else /* HAVE_NGTCP2 */
4728 (void)key; (void)pem; (void)verifypem;
4729 return NULL;
4730 #endif /* HAVE_NGTCP2 */
4731 }
4732
4733 /** Get the ngtcp2_conn from ssl userdata of type ngtcp2_conn_ref */
doq_conn_ref_get_conn(ngtcp2_crypto_conn_ref * conn_ref)4734 static ngtcp2_conn* doq_conn_ref_get_conn(ngtcp2_crypto_conn_ref* conn_ref)
4735 {
4736 struct doq_conn* conn = (struct doq_conn*)conn_ref->user_data;
4737 return conn->conn;
4738 }
4739
4740 /** create new SSL session for server connection */
4741 static SSL*
doq_ssl_server_setup(SSL_CTX * ctx,struct doq_conn * conn)4742 doq_ssl_server_setup(SSL_CTX* ctx, struct doq_conn* conn)
4743 {
4744 #ifdef USE_NGTCP2_CRYPTO_OSSL
4745 int ret;
4746 #endif
4747 SSL* ssl = SSL_new(ctx);
4748 if(!ssl) {
4749 log_crypto_err("doq: SSL_new failed");
4750 return NULL;
4751 }
4752 #ifdef USE_NGTCP2_CRYPTO_OSSL
4753 if((ret=ngtcp2_crypto_ossl_ctx_new(&conn->ossl_ctx, NULL)) != 0) {
4754 log_err("doq: ngtcp2_crypto_ossl_ctx_new failed: %s",
4755 ngtcp2_strerror(ret));
4756 SSL_free(ssl);
4757 return NULL;
4758 }
4759 ngtcp2_crypto_ossl_ctx_set_ssl(conn->ossl_ctx, ssl);
4760 if(ngtcp2_crypto_ossl_configure_server_session(ssl) != 0) {
4761 log_err("doq: ngtcp2_crypto_ossl_configure_server_session failed");
4762 SSL_free(ssl);
4763 return NULL;
4764 }
4765 #endif
4766 #if defined(USE_NGTCP2_CRYPTO_OSSL) || defined(HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT)
4767 conn->conn_ref.get_conn = &doq_conn_ref_get_conn;
4768 conn->conn_ref.user_data = conn;
4769 SSL_set_app_data(ssl, &conn->conn_ref);
4770 #else
4771 SSL_set_app_data(ssl, conn);
4772 #endif
4773 SSL_set_accept_state(ssl);
4774 #ifdef USE_NGTCP2_CRYPTO_OSSL
4775 SSL_set_quic_tls_early_data_enabled(ssl, 1);
4776 #else
4777 SSL_set_quic_early_data_enabled(ssl, 1);
4778 #endif
4779 return ssl;
4780 }
4781
4782 int
doq_conn_setup(struct doq_conn * conn,uint8_t * scid,size_t scidlen,uint8_t * ocid,size_t ocidlen,const uint8_t * token,size_t tokenlen)4783 doq_conn_setup(struct doq_conn* conn, uint8_t* scid, size_t scidlen,
4784 uint8_t* ocid, size_t ocidlen, const uint8_t* token, size_t tokenlen)
4785 {
4786 int rv;
4787 struct ngtcp2_cid dcid, sv_scid, scid_cid;
4788 struct ngtcp2_path path;
4789 struct ngtcp2_callbacks callbacks;
4790 struct ngtcp2_settings settings;
4791 struct ngtcp2_transport_params params;
4792 memset(&dcid, 0, sizeof(dcid));
4793 memset(&sv_scid, 0, sizeof(sv_scid));
4794 memset(&scid_cid, 0, sizeof(scid_cid));
4795 memset(&path, 0, sizeof(path));
4796 memset(&callbacks, 0, sizeof(callbacks));
4797 memset(&settings, 0, sizeof(settings));
4798 memset(¶ms, 0, sizeof(params));
4799
4800 ngtcp2_cid_init(&scid_cid, scid, scidlen);
4801 ngtcp2_cid_init(&dcid, conn->key.dcid, conn->key.dcidlen);
4802
4803 path.remote.addr = (struct sockaddr*)&conn->key.paddr.addr;
4804 path.remote.addrlen = conn->key.paddr.addrlen;
4805 path.local.addr = (struct sockaddr*)&conn->key.paddr.localaddr;
4806 path.local.addrlen = conn->key.paddr.localaddrlen;
4807
4808 callbacks.recv_client_initial = ngtcp2_crypto_recv_client_initial_cb;
4809 callbacks.recv_crypto_data = ngtcp2_crypto_recv_crypto_data_cb;
4810 callbacks.encrypt = ngtcp2_crypto_encrypt_cb;
4811 callbacks.decrypt = ngtcp2_crypto_decrypt_cb;
4812 callbacks.hp_mask = ngtcp2_crypto_hp_mask;
4813 callbacks.update_key = ngtcp2_crypto_update_key_cb;
4814 callbacks.delete_crypto_aead_ctx =
4815 ngtcp2_crypto_delete_crypto_aead_ctx_cb;
4816 callbacks.delete_crypto_cipher_ctx =
4817 ngtcp2_crypto_delete_crypto_cipher_ctx_cb;
4818 callbacks.get_path_challenge_data =
4819 ngtcp2_crypto_get_path_challenge_data_cb;
4820 callbacks.version_negotiation = ngtcp2_crypto_version_negotiation_cb;
4821 callbacks.rand = doq_rand_cb;
4822 callbacks.get_new_connection_id = doq_get_new_connection_id_cb;
4823 callbacks.remove_connection_id = doq_remove_connection_id_cb;
4824 callbacks.handshake_completed = doq_handshake_completed_cb;
4825 callbacks.stream_open = doq_stream_open_cb;
4826 callbacks.stream_close = doq_stream_close_cb;
4827 callbacks.stream_reset = doq_stream_reset_cb;
4828 callbacks.acked_stream_data_offset = doq_acked_stream_data_offset_cb;
4829 callbacks.recv_stream_data = doq_recv_stream_data_cb;
4830
4831 ngtcp2_settings_default(&settings);
4832 if(verbosity >= VERB_ALGO) {
4833 settings.log_printf = doq_log_printf_cb;
4834 }
4835 settings.rand_ctx.native_handle = conn->doq_socket->rnd;
4836 settings.initial_ts = doq_get_timestamp_nanosec();
4837 settings.max_stream_window = 6*1024*1024;
4838 settings.max_window = 6*1024*1024;
4839 #ifdef HAVE_STRUCT_NGTCP2_SETTINGS_TOKENLEN
4840 settings.token = (void*)token;
4841 settings.tokenlen = tokenlen;
4842 #else
4843 settings.token.base = (void*)token;
4844 settings.token.len = tokenlen;
4845 #endif
4846
4847 ngtcp2_transport_params_default(¶ms);
4848 params.max_idle_timeout = conn->doq_socket->idle_timeout;
4849 params.active_connection_id_limit = 7;
4850 params.initial_max_stream_data_bidi_local = 256*1024;
4851 params.initial_max_stream_data_bidi_remote = 256*1024;
4852 params.initial_max_data = 1024*1024;
4853 /* DoQ uses bidi streams, so we allow 0 uni streams. */
4854 params.initial_max_streams_uni = 0;
4855 /* Initial max on number of bidi streams the remote end can open.
4856 * That is the number of queries it can make, at first. */
4857 params.initial_max_streams_bidi = 10;
4858 if(ocid) {
4859 ngtcp2_cid_init(¶ms.original_dcid, ocid, ocidlen);
4860 ngtcp2_cid_init(¶ms.retry_scid, conn->key.dcid,
4861 conn->key.dcidlen);
4862 params.retry_scid_present = 1;
4863 } else {
4864 ngtcp2_cid_init(¶ms.original_dcid, conn->key.dcid,
4865 conn->key.dcidlen);
4866 }
4867 #ifdef HAVE_STRUCT_NGTCP2_TRANSPORT_PARAMS_ORIGINAL_DCID_PRESENT
4868 params.original_dcid_present = 1;
4869 #endif
4870 doq_fill_rand(conn->doq_socket->rnd, params.stateless_reset_token,
4871 sizeof(params.stateless_reset_token));
4872 sv_scid.datalen = conn->doq_socket->sv_scidlen;
4873 lock_rw_wrlock(&conn->table->conid_lock);
4874 if(!doq_conn_generate_new_conid(conn, sv_scid.data, sv_scid.datalen)) {
4875 lock_rw_unlock(&conn->table->conid_lock);
4876 return 0;
4877 }
4878
4879 rv = ngtcp2_conn_server_new(&conn->conn, &scid_cid, &sv_scid, &path,
4880 conn->version, &callbacks, &settings, ¶ms, NULL, conn);
4881 if(rv != 0) {
4882 lock_rw_unlock(&conn->table->conid_lock);
4883 log_err("ngtcp2_conn_server_new failed: %s",
4884 ngtcp2_strerror(rv));
4885 return 0;
4886 }
4887 if(!doq_conn_setup_conids(conn)) {
4888 lock_rw_unlock(&conn->table->conid_lock);
4889 log_err("doq_conn_setup_conids failed: out of memory");
4890 return 0;
4891 }
4892 lock_rw_unlock(&conn->table->conid_lock);
4893 conn->ssl = doq_ssl_server_setup((SSL_CTX*)conn->doq_socket->ctx,
4894 conn);
4895 if(!conn->ssl) {
4896 log_err("doq_ssl_server_setup failed");
4897 return 0;
4898 }
4899 #ifdef USE_NGTCP2_CRYPTO_OSSL
4900 ngtcp2_conn_set_tls_native_handle(conn->conn, conn->ossl_ctx);
4901 #else
4902 ngtcp2_conn_set_tls_native_handle(conn->conn, conn->ssl);
4903 #endif
4904 doq_conn_write_enable(conn);
4905 return 1;
4906 }
4907
4908 struct doq_conid*
doq_conid_find(struct doq_table * table,const uint8_t * data,size_t datalen)4909 doq_conid_find(struct doq_table* table, const uint8_t* data, size_t datalen)
4910 {
4911 struct rbnode_type* node;
4912 struct doq_conid key;
4913 key.node.key = &key;
4914 key.cid = (void*)data;
4915 key.cidlen = datalen;
4916 log_assert(table != NULL);
4917 node = rbtree_search(table->conid_tree, &key);
4918 if(node)
4919 return (struct doq_conid*)node->key;
4920 return NULL;
4921 }
4922
4923 /** insert conid in the conid list */
4924 static void
doq_conid_list_insert(struct doq_conn * conn,struct doq_conid * conid)4925 doq_conid_list_insert(struct doq_conn* conn, struct doq_conid* conid)
4926 {
4927 conid->prev = NULL;
4928 conid->next = conn->conid_list;
4929 if(conn->conid_list)
4930 conn->conid_list->prev = conid;
4931 conn->conid_list = conid;
4932 }
4933
4934 /** remove conid from the conid list */
4935 static void
doq_conid_list_remove(struct doq_conn * conn,struct doq_conid * conid)4936 doq_conid_list_remove(struct doq_conn* conn, struct doq_conid* conid)
4937 {
4938 if(conid->prev)
4939 conid->prev->next = conid->next;
4940 else conn->conid_list = conid->next;
4941 if(conid->next)
4942 conid->next->prev = conid->prev;
4943 }
4944
4945 /** create a doq_conid */
4946 static struct doq_conid*
doq_conid_create(uint8_t * data,size_t datalen,struct doq_conn_key * key)4947 doq_conid_create(uint8_t* data, size_t datalen, struct doq_conn_key* key)
4948 {
4949 struct doq_conid* conid;
4950 conid = calloc(1, sizeof(*conid));
4951 if(!conid)
4952 return NULL;
4953 conid->cid = memdup(data, datalen);
4954 if(!conid->cid) {
4955 free(conid);
4956 return NULL;
4957 }
4958 conid->cidlen = datalen;
4959 conid->node.key = conid;
4960 conid->key = *key;
4961 conid->key.dcid = memdup(key->dcid, key->dcidlen);
4962 if(!conid->key.dcid) {
4963 free(conid->cid);
4964 free(conid);
4965 return NULL;
4966 }
4967 return conid;
4968 }
4969
4970 void
doq_conid_delete(struct doq_conid * conid)4971 doq_conid_delete(struct doq_conid* conid)
4972 {
4973 if(!conid)
4974 return;
4975 free(conid->key.dcid);
4976 free(conid->cid);
4977 free(conid);
4978 }
4979
4980 /** return true if the conid is for the conn. */
4981 static int
conid_is_for_conn(struct doq_conn * conn,struct doq_conid * conid)4982 conid_is_for_conn(struct doq_conn* conn, struct doq_conid* conid)
4983 {
4984 if(conid->key.dcidlen == conn->key.dcidlen &&
4985 memcmp(conid->key.dcid, conn->key.dcid, conid->key.dcidlen)==0
4986 && conid->key.paddr.addrlen == conn->key.paddr.addrlen &&
4987 memcmp(&conid->key.paddr.addr, &conn->key.paddr.addr,
4988 conid->key.paddr.addrlen) == 0 &&
4989 conid->key.paddr.localaddrlen == conn->key.paddr.localaddrlen &&
4990 memcmp(&conid->key.paddr.localaddr, &conn->key.paddr.localaddr,
4991 conid->key.paddr.localaddrlen) == 0 &&
4992 conid->key.paddr.ifindex == conn->key.paddr.ifindex)
4993 return 1;
4994 return 0;
4995 }
4996
4997 int
doq_conn_associate_conid(struct doq_conn * conn,uint8_t * data,size_t datalen)4998 doq_conn_associate_conid(struct doq_conn* conn, uint8_t* data, size_t datalen)
4999 {
5000 struct doq_conid* conid;
5001 conid = doq_conid_find(conn->table, data, datalen);
5002 if(conid && !conid_is_for_conn(conn, conid)) {
5003 verbose(VERB_ALGO, "doq connection id already exists for "
5004 "another doq_conn. Ignoring second connection id.");
5005 /* Already exists to another conn, ignore it.
5006 * This works, in that the conid is listed in the doq_conn
5007 * conid_list element, and removed from there. So our conid
5008 * tree and list are fine, when created and removed.
5009 * The tree now does not have the lookup element pointing
5010 * to this connection. */
5011 return 1;
5012 }
5013 if(conid)
5014 return 1; /* already inserted */
5015 conid = doq_conid_create(data, datalen, &conn->key);
5016 if(!conid)
5017 return 0;
5018 doq_conid_list_insert(conn, conid);
5019 (void)rbtree_insert(conn->table->conid_tree, &conid->node);
5020 return 1;
5021 }
5022
5023 void
doq_conn_dissociate_conid(struct doq_conn * conn,const uint8_t * data,size_t datalen)5024 doq_conn_dissociate_conid(struct doq_conn* conn, const uint8_t* data,
5025 size_t datalen)
5026 {
5027 struct doq_conid* conid;
5028 conid = doq_conid_find(conn->table, data, datalen);
5029 if(conid && !conid_is_for_conn(conn, conid))
5030 return;
5031 if(conid) {
5032 (void)rbtree_delete(conn->table->conid_tree,
5033 conid->node.key);
5034 doq_conid_list_remove(conn, conid);
5035 doq_conid_delete(conid);
5036 }
5037 }
5038
5039 /** associate the scid array and also the dcid.
5040 * caller must hold the locks on conn and doq_table.conid_lock. */
5041 static int
doq_conn_setup_id_array_and_dcid(struct doq_conn * conn,struct ngtcp2_cid * scids,size_t num_scid)5042 doq_conn_setup_id_array_and_dcid(struct doq_conn* conn,
5043 struct ngtcp2_cid* scids, size_t num_scid)
5044 {
5045 size_t i;
5046 for(i=0; i<num_scid; i++) {
5047 if(!doq_conn_associate_conid(conn, scids[i].data,
5048 scids[i].datalen))
5049 return 0;
5050 }
5051 if(!doq_conn_associate_conid(conn, conn->key.dcid, conn->key.dcidlen))
5052 return 0;
5053 return 1;
5054 }
5055
5056 int
doq_conn_setup_conids(struct doq_conn * conn)5057 doq_conn_setup_conids(struct doq_conn* conn)
5058 {
5059 size_t num_scid =
5060 #ifndef HAVE_NGTCP2_CONN_GET_NUM_SCID
5061 ngtcp2_conn_get_scid(conn->conn, NULL);
5062 #else
5063 ngtcp2_conn_get_num_scid(conn->conn);
5064 #endif
5065 if(num_scid <= 4) {
5066 struct ngtcp2_cid ids[4];
5067 /* Usually there are not that many scids when just accepted,
5068 * like only 2. */
5069 ngtcp2_conn_get_scid(conn->conn, ids);
5070 return doq_conn_setup_id_array_and_dcid(conn, ids, num_scid);
5071 } else {
5072 struct ngtcp2_cid *scids = calloc(num_scid,
5073 sizeof(struct ngtcp2_cid));
5074 if(!scids)
5075 return 0;
5076 ngtcp2_conn_get_scid(conn->conn, scids);
5077 if(!doq_conn_setup_id_array_and_dcid(conn, scids, num_scid)) {
5078 free(scids);
5079 return 0;
5080 }
5081 free(scids);
5082 }
5083 return 1;
5084 }
5085
5086 void
doq_conn_clear_conids(struct doq_conn * conn)5087 doq_conn_clear_conids(struct doq_conn* conn)
5088 {
5089 struct doq_conid* p, *next;
5090 if(!conn)
5091 return;
5092 p = conn->conid_list;
5093 while(p) {
5094 next = p->next;
5095 (void)rbtree_delete(conn->table->conid_tree, p->node.key);
5096 doq_conid_delete(p);
5097 p = next;
5098 }
5099 conn->conid_list = NULL;
5100 }
5101
doq_get_timestamp_nanosec(void)5102 ngtcp2_tstamp doq_get_timestamp_nanosec(void)
5103 {
5104 #ifdef CLOCK_REALTIME
5105 struct timespec tp;
5106 memset(&tp, 0, sizeof(tp));
5107 /* Get a nanosecond time, that can be compared with the event base. */
5108 if(clock_gettime(CLOCK_REALTIME, &tp) == -1) {
5109 log_err("clock_gettime failed: %s", strerror(errno));
5110 }
5111 return ((uint64_t)tp.tv_sec)*((uint64_t)1000000000) +
5112 ((uint64_t)tp.tv_nsec);
5113 #else
5114 struct timeval tv;
5115 if(gettimeofday(&tv, NULL) < 0) {
5116 log_err("gettimeofday failed: %s", strerror(errno));
5117 }
5118 return ((uint64_t)tv.tv_sec)*((uint64_t)1000000000) +
5119 ((uint64_t)tv.tv_usec)*((uint64_t)1000);
5120 #endif /* CLOCK_REALTIME */
5121 }
5122
5123 /** doq start the closing period for the connection. */
5124 static int
doq_conn_start_closing_period(struct comm_point * c,struct doq_conn * conn)5125 doq_conn_start_closing_period(struct comm_point* c, struct doq_conn* conn)
5126 {
5127 struct ngtcp2_path_storage ps;
5128 struct ngtcp2_pkt_info pi;
5129 ngtcp2_ssize ret;
5130 if(!conn)
5131 return 1;
5132 if(
5133 #ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
5134 ngtcp2_conn_in_closing_period(conn->conn)
5135 #else
5136 ngtcp2_conn_is_in_closing_period(conn->conn)
5137 #endif
5138 )
5139 return 1;
5140 if(
5141 #ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
5142 ngtcp2_conn_in_draining_period(conn->conn)
5143 #else
5144 ngtcp2_conn_is_in_draining_period(conn->conn)
5145 #endif
5146 ) {
5147 doq_conn_write_disable(conn);
5148 return 1;
5149 }
5150 ngtcp2_path_storage_zero(&ps);
5151 sldns_buffer_clear(c->doq_socket->pkt_buf);
5152 /* the call to ngtcp2_conn_write_connection_close causes the
5153 * conn to be closed. It is now in the closing period. */
5154 ret = ngtcp2_conn_write_connection_close(conn->conn, &ps.path,
5155 &pi, sldns_buffer_begin(c->doq_socket->pkt_buf),
5156 sldns_buffer_remaining(c->doq_socket->pkt_buf),
5157 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
5158 &conn->ccerr
5159 #else
5160 &conn->last_error
5161 #endif
5162 , doq_get_timestamp_nanosec());
5163 if(ret < 0) {
5164 log_err("doq ngtcp2_conn_write_connection_close failed: %s",
5165 ngtcp2_strerror(ret));
5166 return 0;
5167 }
5168 if(ret == 0) {
5169 return 0;
5170 }
5171 sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
5172 sldns_buffer_flip(c->doq_socket->pkt_buf);
5173
5174 /* The close packet is allocated, because it may have to be repeated.
5175 * When incoming packets have this connection dcid. */
5176 conn->close_pkt = memdup(sldns_buffer_begin(c->doq_socket->pkt_buf),
5177 sldns_buffer_limit(c->doq_socket->pkt_buf));
5178 if(!conn->close_pkt) {
5179 log_err("doq: could not allocate close packet: out of memory");
5180 return 0;
5181 }
5182 conn->close_pkt_len = sldns_buffer_limit(c->doq_socket->pkt_buf);
5183 conn->close_ecn = pi.ecn;
5184 return 1;
5185 }
5186
5187 /** doq send the close packet for the connection, perhaps again. */
5188 int
doq_conn_send_close(struct comm_point * c,struct doq_conn * conn)5189 doq_conn_send_close(struct comm_point* c, struct doq_conn* conn)
5190 {
5191 if(!conn)
5192 return 0;
5193 if(!conn->close_pkt)
5194 return 0;
5195 if(conn->close_pkt_len > sldns_buffer_capacity(c->doq_socket->pkt_buf))
5196 return 0;
5197 sldns_buffer_clear(c->doq_socket->pkt_buf);
5198 sldns_buffer_write(c->doq_socket->pkt_buf, conn->close_pkt, conn->close_pkt_len);
5199 sldns_buffer_flip(c->doq_socket->pkt_buf);
5200 verbose(VERB_ALGO, "doq send connection close");
5201 doq_send_pkt(c, &conn->key.paddr, conn->close_ecn);
5202 doq_conn_write_disable(conn);
5203 return 1;
5204 }
5205
5206 /** doq close the connection on error. If it returns a failure, it
5207 * does not wait to send a close, and the connection can be dropped. */
5208 static int
doq_conn_close_error(struct comm_point * c,struct doq_conn * conn)5209 doq_conn_close_error(struct comm_point* c, struct doq_conn* conn)
5210 {
5211 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
5212 if(conn->ccerr.type == NGTCP2_CCERR_TYPE_IDLE_CLOSE)
5213 return 0;
5214 #else
5215 if(conn->last_error.type ==
5216 NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_IDLE_CLOSE)
5217 return 0;
5218 #endif
5219 if(!doq_conn_start_closing_period(c, conn))
5220 return 0;
5221 if(
5222 #ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
5223 ngtcp2_conn_in_draining_period(conn->conn)
5224 #else
5225 ngtcp2_conn_is_in_draining_period(conn->conn)
5226 #endif
5227 ) {
5228 doq_conn_write_disable(conn);
5229 return 1;
5230 }
5231 doq_conn_write_enable(conn);
5232 if(!doq_conn_send_close(c, conn))
5233 return 0;
5234 return 1;
5235 }
5236
5237 int
doq_conn_recv(struct comm_point * c,struct doq_pkt_addr * paddr,struct doq_conn * conn,struct ngtcp2_pkt_info * pi,int * err_retry,int * err_drop)5238 doq_conn_recv(struct comm_point* c, struct doq_pkt_addr* paddr,
5239 struct doq_conn* conn, struct ngtcp2_pkt_info* pi, int* err_retry,
5240 int* err_drop)
5241 {
5242 int ret;
5243 ngtcp2_tstamp ts;
5244 struct ngtcp2_path path;
5245 memset(&path, 0, sizeof(path));
5246 path.remote.addr = (struct sockaddr*)&paddr->addr;
5247 path.remote.addrlen = paddr->addrlen;
5248 path.local.addr = (struct sockaddr*)&paddr->localaddr;
5249 path.local.addrlen = paddr->localaddrlen;
5250 ts = doq_get_timestamp_nanosec();
5251
5252 ret = ngtcp2_conn_read_pkt(conn->conn, &path, pi,
5253 sldns_buffer_begin(c->doq_socket->pkt_buf),
5254 sldns_buffer_limit(c->doq_socket->pkt_buf), ts);
5255 if(ret != 0) {
5256 if(err_retry)
5257 *err_retry = 0;
5258 if(err_drop)
5259 *err_drop = 0;
5260 if(ret == NGTCP2_ERR_DRAINING) {
5261 verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s",
5262 ngtcp2_strerror(ret));
5263 doq_conn_write_disable(conn);
5264 return 0;
5265 } else if(ret == NGTCP2_ERR_DROP_CONN) {
5266 verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s",
5267 ngtcp2_strerror(ret));
5268 if(err_drop)
5269 *err_drop = 1;
5270 return 0;
5271 } else if(ret == NGTCP2_ERR_RETRY) {
5272 verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s",
5273 ngtcp2_strerror(ret));
5274 if(err_retry)
5275 *err_retry = 1;
5276 if(err_drop)
5277 *err_drop = 1;
5278 return 0;
5279 } else if(ret == NGTCP2_ERR_CRYPTO) {
5280 if(
5281 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
5282 !conn->ccerr.error_code
5283 #else
5284 !conn->last_error.error_code
5285 #endif
5286 ) {
5287 /* in picotls the tls alert may need to be
5288 * copied, but this is with openssl. And there
5289 * is conn->tls_alert. */
5290 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
5291 ngtcp2_ccerr_set_tls_alert(&conn->ccerr,
5292 conn->tls_alert, NULL, 0);
5293 #else
5294 ngtcp2_connection_close_error_set_transport_error_tls_alert(
5295 &conn->last_error, conn->tls_alert,
5296 NULL, 0);
5297 #endif
5298 }
5299 } else {
5300 if(
5301 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
5302 !conn->ccerr.error_code
5303 #else
5304 !conn->last_error.error_code
5305 #endif
5306 ) {
5307 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
5308 ngtcp2_ccerr_set_liberr(&conn->ccerr, ret,
5309 NULL, 0);
5310 #else
5311 ngtcp2_connection_close_error_set_transport_error_liberr(
5312 &conn->last_error, ret, NULL, 0);
5313 #endif
5314 }
5315 }
5316 log_err("ngtcp2_conn_read_pkt failed: %s",
5317 ngtcp2_strerror(ret));
5318 if(!doq_conn_close_error(c, conn)) {
5319 if(err_drop)
5320 *err_drop = 1;
5321 }
5322 return 0;
5323 }
5324 doq_conn_write_enable(conn);
5325 return 1;
5326 }
5327
5328 /** doq stream write is done */
5329 static void
doq_stream_write_is_done(struct doq_conn * conn,struct doq_stream * stream)5330 doq_stream_write_is_done(struct doq_conn* conn, struct doq_stream* stream)
5331 {
5332 /* Cannot deallocate, the buffer may be needed for resends. */
5333 doq_stream_off_write_list(conn, stream);
5334 }
5335
5336 int
doq_conn_write_streams(struct comm_point * c,struct doq_conn * conn,int * err_drop)5337 doq_conn_write_streams(struct comm_point* c, struct doq_conn* conn,
5338 int* err_drop)
5339 {
5340 struct doq_stream* stream = conn->stream_write_first;
5341 ngtcp2_path_storage ps;
5342 ngtcp2_tstamp ts = doq_get_timestamp_nanosec();
5343 size_t num_packets = 0, max_packets = 65535;
5344 ngtcp2_path_storage_zero(&ps);
5345
5346 for(;;) {
5347 int64_t stream_id;
5348 uint32_t flags = 0;
5349 ngtcp2_pkt_info pi;
5350 ngtcp2_vec datav[2];
5351 size_t datav_count = 0;
5352 ngtcp2_ssize ret, ndatalen = 0;
5353 int fin;
5354
5355 if(stream) {
5356 /* data to send */
5357 verbose(VERB_ALGO, "doq: doq_conn write stream %d",
5358 (int)stream->stream_id);
5359 stream_id = stream->stream_id;
5360 fin = 1;
5361 if(stream->nwrite < 2) {
5362 datav[0].base = ((uint8_t*)&stream->
5363 outlen_wire) + stream->nwrite;
5364 datav[0].len = 2 - stream->nwrite;
5365 datav[1].base = stream->out;
5366 datav[1].len = stream->outlen;
5367 datav_count = 2;
5368 } else {
5369 datav[0].base = stream->out +
5370 (stream->nwrite-2);
5371 datav[0].len = stream->outlen -
5372 (stream->nwrite-2);
5373 datav_count = 1;
5374 }
5375 } else {
5376 /* no data to send */
5377 verbose(VERB_ALGO, "doq: doq_conn write stream -1");
5378 stream_id = -1;
5379 fin = 0;
5380 datav[0].base = NULL;
5381 datav[0].len = 0;
5382 datav_count = 1;
5383 }
5384
5385 /* if more streams, set it to write more */
5386 if(stream && stream->write_next)
5387 flags |= NGTCP2_WRITE_STREAM_FLAG_MORE;
5388 if(fin)
5389 flags |= NGTCP2_WRITE_STREAM_FLAG_FIN;
5390
5391 sldns_buffer_clear(c->doq_socket->pkt_buf);
5392 ret = ngtcp2_conn_writev_stream(conn->conn, &ps.path, &pi,
5393 sldns_buffer_begin(c->doq_socket->pkt_buf),
5394 sldns_buffer_remaining(c->doq_socket->pkt_buf),
5395 &ndatalen, flags, stream_id, datav, datav_count, ts);
5396 if(ret < 0) {
5397 if(ret == NGTCP2_ERR_WRITE_MORE) {
5398 verbose(VERB_ALGO, "doq: write more, ndatalen %d", (int)ndatalen);
5399 if(stream) {
5400 if(ndatalen >= 0)
5401 stream->nwrite += ndatalen;
5402 if(stream->nwrite >= stream->outlen+2)
5403 doq_stream_write_is_done(
5404 conn, stream);
5405 stream = stream->write_next;
5406 }
5407 continue;
5408 } else if(ret == NGTCP2_ERR_STREAM_DATA_BLOCKED) {
5409 verbose(VERB_ALGO, "doq: ngtcp2_conn_writev_stream returned NGTCP2_ERR_STREAM_DATA_BLOCKED");
5410 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
5411 ngtcp2_ccerr_set_application_error(
5412 &conn->ccerr, -1, NULL, 0);
5413 #else
5414 ngtcp2_connection_close_error_set_application_error(&conn->last_error, -1, NULL, 0);
5415 #endif
5416 if(err_drop)
5417 *err_drop = 0;
5418 if(!doq_conn_close_error(c, conn)) {
5419 if(err_drop)
5420 *err_drop = 1;
5421 }
5422 return 0;
5423 } else if(ret == NGTCP2_ERR_STREAM_SHUT_WR) {
5424 verbose(VERB_ALGO, "doq: ngtcp2_conn_writev_stream returned NGTCP2_ERR_STREAM_SHUT_WR");
5425 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
5426 ngtcp2_ccerr_set_application_error(
5427 &conn->ccerr, -1, NULL, 0);
5428 #else
5429 ngtcp2_connection_close_error_set_application_error(&conn->last_error, -1, NULL, 0);
5430 #endif
5431 if(err_drop)
5432 *err_drop = 0;
5433 if(!doq_conn_close_error(c, conn)) {
5434 if(err_drop)
5435 *err_drop = 1;
5436 }
5437 return 0;
5438 }
5439
5440 log_err("doq: ngtcp2_conn_writev_stream failed: %s",
5441 ngtcp2_strerror(ret));
5442 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
5443 ngtcp2_ccerr_set_liberr(&conn->ccerr, ret, NULL, 0);
5444 #else
5445 ngtcp2_connection_close_error_set_transport_error_liberr(
5446 &conn->last_error, ret, NULL, 0);
5447 #endif
5448 if(err_drop)
5449 *err_drop = 0;
5450 if(!doq_conn_close_error(c, conn)) {
5451 if(err_drop)
5452 *err_drop = 1;
5453 }
5454 return 0;
5455 }
5456 verbose(VERB_ALGO, "doq: writev_stream pkt size %d ndatawritten %d",
5457 (int)ret, (int)ndatalen);
5458
5459 if(ndatalen >= 0 && stream) {
5460 stream->nwrite += ndatalen;
5461 if(stream->nwrite >= stream->outlen+2)
5462 doq_stream_write_is_done(conn, stream);
5463 }
5464 if(ret == 0) {
5465 /* congestion limited */
5466 doq_conn_write_disable(conn);
5467 ngtcp2_conn_update_pkt_tx_time(conn->conn, ts);
5468 return 1;
5469 }
5470 sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
5471 sldns_buffer_flip(c->doq_socket->pkt_buf);
5472 doq_send_pkt(c, &conn->key.paddr, pi.ecn);
5473
5474 if(c->doq_socket->have_blocked_pkt)
5475 break;
5476 if(++num_packets == max_packets)
5477 break;
5478 if(stream)
5479 stream = stream->write_next;
5480 }
5481 ngtcp2_conn_update_pkt_tx_time(conn->conn, ts);
5482 return 1;
5483 }
5484
5485 void
doq_conn_write_enable(struct doq_conn * conn)5486 doq_conn_write_enable(struct doq_conn* conn)
5487 {
5488 conn->write_interest = 1;
5489 }
5490
5491 void
doq_conn_write_disable(struct doq_conn * conn)5492 doq_conn_write_disable(struct doq_conn* conn)
5493 {
5494 conn->write_interest = 0;
5495 }
5496
5497 /** doq append the connection to the write list */
5498 static void
doq_conn_write_list_append(struct doq_table * table,struct doq_conn * conn)5499 doq_conn_write_list_append(struct doq_table* table, struct doq_conn* conn)
5500 {
5501 if(conn->on_write_list)
5502 return;
5503 conn->write_prev = table->write_list_last;
5504 if(table->write_list_last)
5505 table->write_list_last->write_next = conn;
5506 else table->write_list_first = conn;
5507 conn->write_next = NULL;
5508 table->write_list_last = conn;
5509 conn->on_write_list = 1;
5510 }
5511
5512 void
doq_conn_write_list_remove(struct doq_table * table,struct doq_conn * conn)5513 doq_conn_write_list_remove(struct doq_table* table, struct doq_conn* conn)
5514 {
5515 if(!conn->on_write_list)
5516 return;
5517 if(conn->write_next)
5518 conn->write_next->write_prev = conn->write_prev;
5519 else table->write_list_last = conn->write_prev;
5520 if(conn->write_prev)
5521 conn->write_prev->write_next = conn->write_next;
5522 else table->write_list_first = conn->write_next;
5523 conn->write_prev = NULL;
5524 conn->write_next = NULL;
5525 conn->on_write_list = 0;
5526 }
5527
5528 void
doq_conn_set_write_list(struct doq_table * table,struct doq_conn * conn)5529 doq_conn_set_write_list(struct doq_table* table, struct doq_conn* conn)
5530 {
5531 if(conn->write_interest && conn->on_write_list)
5532 return;
5533 if(!conn->write_interest && !conn->on_write_list)
5534 return;
5535 if(conn->write_interest)
5536 doq_conn_write_list_append(table, conn);
5537 else doq_conn_write_list_remove(table, conn);
5538 }
5539
5540 struct doq_conn*
doq_table_pop_first(struct doq_table * table)5541 doq_table_pop_first(struct doq_table* table)
5542 {
5543 struct doq_conn* conn = table->write_list_first;
5544 if(!conn)
5545 return NULL;
5546 lock_basic_lock(&conn->lock);
5547 table->write_list_first = conn->write_next;
5548 if(conn->write_next)
5549 conn->write_next->write_prev = NULL;
5550 else table->write_list_last = NULL;
5551 conn->write_next = NULL;
5552 conn->write_prev = NULL;
5553 conn->on_write_list = 0;
5554 return conn;
5555 }
5556
5557 int
doq_conn_check_timer(struct doq_conn * conn,struct timeval * tv)5558 doq_conn_check_timer(struct doq_conn* conn, struct timeval* tv)
5559 {
5560 ngtcp2_tstamp expiry = ngtcp2_conn_get_expiry(conn->conn);
5561 ngtcp2_tstamp now = doq_get_timestamp_nanosec();
5562 ngtcp2_tstamp t;
5563
5564 if(expiry <= now) {
5565 /* The timer has already expired, add with zero timeout.
5566 * This should call the callback straight away. Calling it
5567 * from the event callbacks is cleaner than calling it here,
5568 * because then it is always called with the same locks and
5569 * so on. This routine only has the conn.lock. */
5570 t = now;
5571 } else {
5572 t = expiry;
5573 }
5574
5575 /* convert to timeval */
5576 memset(tv, 0, sizeof(*tv));
5577 tv->tv_sec = t / NGTCP2_SECONDS;
5578 tv->tv_usec = (t / NGTCP2_MICROSECONDS)%1000000;
5579
5580 /* If we already have a timer, is it the right value? */
5581 if(conn->timer.timer_in_tree || conn->timer.timer_in_list) {
5582 if(conn->timer.time.tv_sec == tv->tv_sec &&
5583 conn->timer.time.tv_usec == tv->tv_usec)
5584 return 0;
5585 }
5586 return 1;
5587 }
5588
5589 /* doq print connection log */
5590 static void
doq_conn_log_line(struct doq_conn * conn,char * s)5591 doq_conn_log_line(struct doq_conn* conn, char* s)
5592 {
5593 char remotestr[256], localstr[256];
5594 addr_to_str((void*)&conn->key.paddr.addr, conn->key.paddr.addrlen,
5595 remotestr, sizeof(remotestr));
5596 addr_to_str((void*)&conn->key.paddr.localaddr,
5597 conn->key.paddr.localaddrlen, localstr, sizeof(localstr));
5598 log_info("doq conn %s %s %s", remotestr, localstr, s);
5599 }
5600
5601 int
doq_conn_handle_timeout(struct doq_conn * conn)5602 doq_conn_handle_timeout(struct doq_conn* conn)
5603 {
5604 ngtcp2_tstamp now = doq_get_timestamp_nanosec();
5605 int rv;
5606
5607 if(verbosity >= VERB_ALGO)
5608 doq_conn_log_line(conn, "timeout");
5609
5610 rv = ngtcp2_conn_handle_expiry(conn->conn, now);
5611 if(rv != 0) {
5612 verbose(VERB_ALGO, "ngtcp2_conn_handle_expiry failed: %s",
5613 ngtcp2_strerror(rv));
5614 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
5615 ngtcp2_ccerr_set_liberr(&conn->ccerr, rv, NULL, 0);
5616 #else
5617 ngtcp2_connection_close_error_set_transport_error_liberr(
5618 &conn->last_error, rv, NULL, 0);
5619 #endif
5620 if(!doq_conn_close_error(conn->doq_socket->cp, conn)) {
5621 /* failed, return for deletion */
5622 return 0;
5623 }
5624 return 1;
5625 }
5626 doq_conn_write_enable(conn);
5627 if(!doq_conn_write_streams(conn->doq_socket->cp, conn, NULL)) {
5628 /* failed, return for deletion. */
5629 return 0;
5630 }
5631 return 1;
5632 }
5633
5634 void
doq_table_quic_size_add(struct doq_table * table,size_t add)5635 doq_table_quic_size_add(struct doq_table* table, size_t add)
5636 {
5637 lock_basic_lock(&table->size_lock);
5638 table->current_size += add;
5639 lock_basic_unlock(&table->size_lock);
5640 }
5641
5642 void
doq_table_quic_size_subtract(struct doq_table * table,size_t subtract)5643 doq_table_quic_size_subtract(struct doq_table* table, size_t subtract)
5644 {
5645 lock_basic_lock(&table->size_lock);
5646 if(table->current_size < subtract)
5647 table->current_size = 0;
5648 else table->current_size -= subtract;
5649 lock_basic_unlock(&table->size_lock);
5650 }
5651
5652 int
doq_table_quic_size_available(struct doq_table * table,struct config_file * cfg,size_t mem)5653 doq_table_quic_size_available(struct doq_table* table,
5654 struct config_file* cfg, size_t mem)
5655 {
5656 size_t cur;
5657 if (!table)
5658 return 0;
5659 lock_basic_lock(&table->size_lock);
5660 cur = table->current_size;
5661 lock_basic_unlock(&table->size_lock);
5662
5663 if(cur + mem > cfg->quic_size)
5664 return 0;
5665 return 1;
5666 }
5667
doq_table_quic_size_get(struct doq_table * table)5668 size_t doq_table_quic_size_get(struct doq_table* table)
5669 {
5670 size_t sz;
5671 if(!table)
5672 return 0;
5673 lock_basic_lock(&table->size_lock);
5674 sz = table->current_size;
5675 lock_basic_unlock(&table->size_lock);
5676 return sz;
5677 }
5678 #endif /* HAVE_NGTCP2 */
5679