1 /* 2 * services/listen_dnsport.c - listen on port 53 for incoming DNS queries. 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file has functions to get queries from clients. 40 */ 41 #include "config.h" 42 #ifdef HAVE_SYS_TYPES_H 43 # include <sys/types.h> 44 #endif 45 #include <sys/time.h> 46 #include <limits.h> 47 #ifdef USE_TCP_FASTOPEN 48 #include <netinet/tcp.h> 49 #endif 50 #include <ctype.h> 51 #include "services/listen_dnsport.h" 52 #include "services/outside_network.h" 53 #include "util/netevent.h" 54 #include "util/log.h" 55 #include "util/config_file.h" 56 #include "util/net_help.h" 57 #include "sldns/sbuffer.h" 58 #include "sldns/parseutil.h" 59 #include "sldns/wire2str.h" 60 #include "services/mesh.h" 61 #include "util/fptr_wlist.h" 62 #include "util/locks.h" 63 #include "util/timeval_func.h" 64 65 #ifdef HAVE_NETDB_H 66 #include <netdb.h> 67 #endif 68 #include <fcntl.h> 69 70 #ifdef HAVE_SYS_UN_H 71 #include <sys/un.h> 72 #endif 73 74 #ifdef HAVE_SYSTEMD 75 #include <systemd/sd-daemon.h> 76 #endif 77 78 #ifdef HAVE_IFADDRS_H 79 #include <ifaddrs.h> 80 #endif 81 #ifdef HAVE_NET_IF_H 82 #include <net/if.h> 83 #endif 84 85 #ifdef HAVE_TIME_H 86 #include <time.h> 87 #endif 88 #include <sys/time.h> 89 90 #ifdef HAVE_NGTCP2 91 #include <ngtcp2/ngtcp2.h> 92 #include <ngtcp2/ngtcp2_crypto.h> 93 #ifdef HAVE_NGTCP2_NGTCP2_CRYPTO_QUICTLS_H 94 #include <ngtcp2/ngtcp2_crypto_quictls.h> 95 #else 96 #include <ngtcp2/ngtcp2_crypto_openssl.h> 97 #endif 98 #endif 99 100 #ifdef HAVE_OPENSSL_SSL_H 101 #include <openssl/ssl.h> 102 #endif 103 104 #ifdef HAVE_LINUX_NET_TSTAMP_H 105 #include <linux/net_tstamp.h> 106 #endif 107 108 /** number of queued TCP connections for listen() */ 109 #define TCP_BACKLOG 256 110 111 #ifndef THREADS_DISABLED 112 /** lock on the counter of stream buffer memory */ 113 static lock_basic_type stream_wait_count_lock; 114 /** lock on the counter of HTTP2 query buffer memory */ 115 static lock_basic_type http2_query_buffer_count_lock; 116 /** lock on the counter of HTTP2 response buffer memory */ 117 static lock_basic_type http2_response_buffer_count_lock; 118 #endif 119 /** size (in bytes) of stream wait buffers */ 120 static size_t stream_wait_count = 0; 121 /** is the lock initialised for stream wait buffers */ 122 static int stream_wait_lock_inited = 0; 123 /** size (in bytes) of HTTP2 query buffers */ 124 static size_t http2_query_buffer_count = 0; 125 /** is the lock initialised for HTTP2 query buffers */ 126 static int http2_query_buffer_lock_inited = 0; 127 /** size (in bytes) of HTTP2 response buffers */ 128 static size_t http2_response_buffer_count = 0; 129 /** is the lock initialised for HTTP2 response buffers */ 130 static int http2_response_buffer_lock_inited = 0; 131 132 /** 133 * Debug print of the getaddrinfo returned address. 134 * @param addr: the address returned. 135 * @param additional: additional text that describes the type of socket, 136 * or NULL for no text. 137 */ 138 static void 139 verbose_print_addr(struct addrinfo *addr, const char* additional) 140 { 141 if(verbosity >= VERB_ALGO) { 142 char buf[100]; 143 void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr; 144 #ifdef INET6 145 if(addr->ai_family == AF_INET6) 146 sinaddr = &((struct sockaddr_in6*)addr->ai_addr)-> 147 sin6_addr; 148 #endif /* INET6 */ 149 if(inet_ntop(addr->ai_family, sinaddr, buf, 150 (socklen_t)sizeof(buf)) == 0) { 151 (void)strlcpy(buf, "(null)", sizeof(buf)); 152 } 153 buf[sizeof(buf)-1] = 0; 154 verbose(VERB_ALGO, "creating %s%s socket %s %d%s%s", 155 addr->ai_socktype==SOCK_DGRAM?"udp": 156 addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto", 157 addr->ai_family==AF_INET?"4": 158 addr->ai_family==AF_INET6?"6": 159 "_otherfam", buf, 160 ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port), 161 (additional?" ":""), (additional?additional:"")); 162 } 163 } 164 165 void 166 verbose_print_unbound_socket(struct unbound_socket* ub_sock) 167 { 168 if(verbosity >= VERB_ALGO) { 169 char buf[256]; 170 log_info("listing of unbound_socket structure:"); 171 addr_to_str((void*)ub_sock->addr, ub_sock->addrlen, buf, 172 sizeof(buf)); 173 log_info("%s s is: %d, fam is: %s, acl: %s", buf, ub_sock->s, 174 ub_sock->fam == AF_INET?"AF_INET":"AF_INET6", 175 ub_sock->acl?"yes":"no"); 176 } 177 } 178 179 #ifdef HAVE_SYSTEMD 180 static int 181 systemd_get_activated(int family, int socktype, int listen, 182 struct sockaddr *addr, socklen_t addrlen, 183 const char *path) 184 { 185 int i = 0; 186 int r = 0; 187 int s = -1; 188 const char* listen_pid, *listen_fds; 189 190 /* We should use "listen" option only for stream protocols. For UDP it should be -1 */ 191 192 if((r = sd_booted()) < 1) { 193 if(r == 0) 194 log_warn("systemd is not running"); 195 else 196 log_err("systemd sd_booted(): %s", strerror(-r)); 197 return -1; 198 } 199 200 listen_pid = getenv("LISTEN_PID"); 201 listen_fds = getenv("LISTEN_FDS"); 202 203 if (!listen_pid) { 204 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_PID"); 205 return -1; 206 } 207 208 if (!listen_fds) { 209 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_FDS"); 210 return -1; 211 } 212 213 if((r = sd_listen_fds(0)) < 1) { 214 if(r == 0) 215 log_warn("systemd: did not return socket, check unit configuration"); 216 else 217 log_err("systemd sd_listen_fds(): %s", strerror(-r)); 218 return -1; 219 } 220 221 for(i = 0; i < r; i++) { 222 if(sd_is_socket(SD_LISTEN_FDS_START + i, family, socktype, listen)) { 223 s = SD_LISTEN_FDS_START + i; 224 break; 225 } 226 } 227 if (s == -1) { 228 if (addr) 229 log_err_addr("systemd sd_listen_fds()", 230 "no such socket", 231 (struct sockaddr_storage *)addr, addrlen); 232 else 233 log_err("systemd sd_listen_fds(): %s", path); 234 } 235 return s; 236 } 237 #endif 238 239 int 240 create_udp_sock(int family, int socktype, struct sockaddr* addr, 241 socklen_t addrlen, int v6only, int* inuse, int* noproto, 242 int rcv, int snd, int listen, int* reuseport, int transparent, 243 int freebind, int use_systemd, int dscp) 244 { 245 int s; 246 char* err; 247 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined (SO_BINDANY) 248 int on=1; 249 #endif 250 #ifdef IPV6_MTU 251 int mtu = IPV6_MIN_MTU; 252 #endif 253 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF) 254 (void)rcv; 255 #endif 256 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF) 257 (void)snd; 258 #endif 259 #ifndef IPV6_V6ONLY 260 (void)v6only; 261 #endif 262 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY) 263 (void)transparent; 264 #endif 265 #if !defined(IP_FREEBIND) 266 (void)freebind; 267 #endif 268 #ifdef HAVE_SYSTEMD 269 int got_fd_from_systemd = 0; 270 271 if (!use_systemd 272 || (use_systemd 273 && (s = systemd_get_activated(family, socktype, -1, addr, 274 addrlen, NULL)) == -1)) { 275 #else 276 (void)use_systemd; 277 #endif 278 if((s = socket(family, socktype, 0)) == -1) { 279 *inuse = 0; 280 #ifndef USE_WINSOCK 281 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) { 282 *noproto = 1; 283 return -1; 284 } 285 #else 286 if(WSAGetLastError() == WSAEAFNOSUPPORT || 287 WSAGetLastError() == WSAEPROTONOSUPPORT) { 288 *noproto = 1; 289 return -1; 290 } 291 #endif 292 log_err("can't create socket: %s", sock_strerror(errno)); 293 *noproto = 0; 294 return -1; 295 } 296 #ifdef HAVE_SYSTEMD 297 } else { 298 got_fd_from_systemd = 1; 299 } 300 #endif 301 if(listen) { 302 #ifdef SO_REUSEADDR 303 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, 304 (socklen_t)sizeof(on)) < 0) { 305 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s", 306 sock_strerror(errno)); 307 #ifndef USE_WINSOCK 308 if(errno != ENOSYS) { 309 close(s); 310 *noproto = 0; 311 *inuse = 0; 312 return -1; 313 } 314 #else 315 closesocket(s); 316 *noproto = 0; 317 *inuse = 0; 318 return -1; 319 #endif 320 } 321 #endif /* SO_REUSEADDR */ 322 #ifdef SO_REUSEPORT 323 # ifdef SO_REUSEPORT_LB 324 /* on FreeBSD 12 we have SO_REUSEPORT_LB that does loadbalance 325 * like SO_REUSEPORT on Linux. This is what the users want 326 * with the config option in unbound.conf; if we actually 327 * need local address and port reuse they'll also need to 328 * have SO_REUSEPORT set for them, assume it was _LB they want. 329 */ 330 if (reuseport && *reuseport && 331 setsockopt(s, SOL_SOCKET, SO_REUSEPORT_LB, (void*)&on, 332 (socklen_t)sizeof(on)) < 0) { 333 #ifdef ENOPROTOOPT 334 if(errno != ENOPROTOOPT || verbosity >= 3) 335 log_warn("setsockopt(.. SO_REUSEPORT_LB ..) failed: %s", 336 strerror(errno)); 337 #endif 338 /* this option is not essential, we can continue */ 339 *reuseport = 0; 340 } 341 # else /* no SO_REUSEPORT_LB */ 342 343 /* try to set SO_REUSEPORT so that incoming 344 * queries are distributed evenly among the receiving threads. 345 * Each thread must have its own socket bound to the same port, 346 * with SO_REUSEPORT set on each socket. 347 */ 348 if (reuseport && *reuseport && 349 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on, 350 (socklen_t)sizeof(on)) < 0) { 351 #ifdef ENOPROTOOPT 352 if(errno != ENOPROTOOPT || verbosity >= 3) 353 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s", 354 strerror(errno)); 355 #endif 356 /* this option is not essential, we can continue */ 357 *reuseport = 0; 358 } 359 # endif /* SO_REUSEPORT_LB */ 360 #else 361 (void)reuseport; 362 #endif /* defined(SO_REUSEPORT) */ 363 #ifdef IP_TRANSPARENT 364 if (transparent && 365 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on, 366 (socklen_t)sizeof(on)) < 0) { 367 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s", 368 strerror(errno)); 369 } 370 #elif defined(IP_BINDANY) 371 if (transparent && 372 setsockopt(s, (family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP), 373 (family == AF_INET6? IPV6_BINDANY:IP_BINDANY), 374 (void*)&on, (socklen_t)sizeof(on)) < 0) { 375 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s", 376 (family==AF_INET6?"V6":""), strerror(errno)); 377 } 378 #elif defined(SO_BINDANY) 379 if (transparent && 380 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, 381 (socklen_t)sizeof(on)) < 0) { 382 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s", 383 strerror(errno)); 384 } 385 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */ 386 } 387 #ifdef IP_FREEBIND 388 if(freebind && 389 setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on, 390 (socklen_t)sizeof(on)) < 0) { 391 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s", 392 strerror(errno)); 393 } 394 #endif /* IP_FREEBIND */ 395 if(rcv) { 396 #ifdef SO_RCVBUF 397 int got; 398 socklen_t slen = (socklen_t)sizeof(got); 399 # ifdef SO_RCVBUFFORCE 400 /* Linux specific: try to use root permission to override 401 * system limits on rcvbuf. The limit is stored in 402 * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */ 403 if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv, 404 (socklen_t)sizeof(rcv)) < 0) { 405 if(errno != EPERM) { 406 log_err("setsockopt(..., SO_RCVBUFFORCE, " 407 "...) failed: %s", sock_strerror(errno)); 408 sock_close(s); 409 *noproto = 0; 410 *inuse = 0; 411 return -1; 412 } 413 # endif /* SO_RCVBUFFORCE */ 414 if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv, 415 (socklen_t)sizeof(rcv)) < 0) { 416 log_err("setsockopt(..., SO_RCVBUF, " 417 "...) failed: %s", sock_strerror(errno)); 418 sock_close(s); 419 *noproto = 0; 420 *inuse = 0; 421 return -1; 422 } 423 /* check if we got the right thing or if system 424 * reduced to some system max. Warn if so */ 425 if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got, 426 &slen) >= 0 && got < rcv/2) { 427 log_warn("so-rcvbuf %u was not granted. " 428 "Got %u. To fix: start with " 429 "root permissions(linux) or sysctl " 430 "bigger net.core.rmem_max(linux) or " 431 "kern.ipc.maxsockbuf(bsd) values.", 432 (unsigned)rcv, (unsigned)got); 433 } 434 # ifdef SO_RCVBUFFORCE 435 } 436 # endif 437 #endif /* SO_RCVBUF */ 438 } 439 /* first do RCVBUF as the receive buffer is more important */ 440 if(snd) { 441 #ifdef SO_SNDBUF 442 int got; 443 socklen_t slen = (socklen_t)sizeof(got); 444 # ifdef SO_SNDBUFFORCE 445 /* Linux specific: try to use root permission to override 446 * system limits on sndbuf. The limit is stored in 447 * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */ 448 if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd, 449 (socklen_t)sizeof(snd)) < 0) { 450 if(errno != EPERM) { 451 log_err("setsockopt(..., SO_SNDBUFFORCE, " 452 "...) failed: %s", sock_strerror(errno)); 453 sock_close(s); 454 *noproto = 0; 455 *inuse = 0; 456 return -1; 457 } 458 # endif /* SO_SNDBUFFORCE */ 459 if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd, 460 (socklen_t)sizeof(snd)) < 0) { 461 log_err("setsockopt(..., SO_SNDBUF, " 462 "...) failed: %s", sock_strerror(errno)); 463 sock_close(s); 464 *noproto = 0; 465 *inuse = 0; 466 return -1; 467 } 468 /* check if we got the right thing or if system 469 * reduced to some system max. Warn if so */ 470 if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got, 471 &slen) >= 0 && got < snd/2) { 472 log_warn("so-sndbuf %u was not granted. " 473 "Got %u. To fix: start with " 474 "root permissions(linux) or sysctl " 475 "bigger net.core.wmem_max(linux) or " 476 "kern.ipc.maxsockbuf(bsd) values.", 477 (unsigned)snd, (unsigned)got); 478 } 479 # ifdef SO_SNDBUFFORCE 480 } 481 # endif 482 #endif /* SO_SNDBUF */ 483 } 484 err = set_ip_dscp(s, family, dscp); 485 if(err != NULL) 486 log_warn("error setting IP DiffServ codepoint %d on UDP socket: %s", dscp, err); 487 if(family == AF_INET6) { 488 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) 489 int omit6_set = 0; 490 int action; 491 # endif 492 # if defined(IPV6_V6ONLY) 493 if(v6only 494 # ifdef HAVE_SYSTEMD 495 /* Systemd wants to control if the socket is v6 only 496 * or both, with BindIPv6Only=default, ipv6-only or 497 * both in systemd.socket, so it is not set here. */ 498 && !got_fd_from_systemd 499 # endif 500 ) { 501 int val=(v6only==2)?0:1; 502 if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 503 (void*)&val, (socklen_t)sizeof(val)) < 0) { 504 log_err("setsockopt(..., IPV6_V6ONLY" 505 ", ...) failed: %s", sock_strerror(errno)); 506 sock_close(s); 507 *noproto = 0; 508 *inuse = 0; 509 return -1; 510 } 511 } 512 # endif 513 # if defined(IPV6_USE_MIN_MTU) 514 /* 515 * There is no fragmentation of IPv6 datagrams 516 * during forwarding in the network. Therefore 517 * we do not send UDP datagrams larger than 518 * the minimum IPv6 MTU of 1280 octets. The 519 * EDNS0 message length can be larger if the 520 * network stack supports IPV6_USE_MIN_MTU. 521 */ 522 if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU, 523 (void*)&on, (socklen_t)sizeof(on)) < 0) { 524 log_err("setsockopt(..., IPV6_USE_MIN_MTU, " 525 "...) failed: %s", sock_strerror(errno)); 526 sock_close(s); 527 *noproto = 0; 528 *inuse = 0; 529 return -1; 530 } 531 # elif defined(IPV6_MTU) 532 # ifndef USE_WINSOCK 533 /* 534 * On Linux, to send no larger than 1280, the PMTUD is 535 * disabled by default for datagrams anyway, so we set 536 * the MTU to use. 537 */ 538 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU, 539 (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) { 540 log_err("setsockopt(..., IPV6_MTU, ...) failed: %s", 541 sock_strerror(errno)); 542 sock_close(s); 543 *noproto = 0; 544 *inuse = 0; 545 return -1; 546 } 547 # elif defined(IPV6_USER_MTU) 548 /* As later versions of the mingw crosscompiler define 549 * IPV6_MTU, do the same for windows but use IPV6_USER_MTU 550 * instead which is writable; IPV6_MTU is readonly there. */ 551 if (setsockopt(s, IPPROTO_IPV6, IPV6_USER_MTU, 552 (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) { 553 if (WSAGetLastError() != WSAENOPROTOOPT) { 554 log_err("setsockopt(..., IPV6_USER_MTU, ...) failed: %s", 555 wsa_strerror(WSAGetLastError())); 556 sock_close(s); 557 *noproto = 0; 558 *inuse = 0; 559 return -1; 560 } 561 } 562 # endif /* USE_WINSOCK */ 563 # endif /* IPv6 MTU */ 564 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) 565 # if defined(IP_PMTUDISC_OMIT) 566 action = IP_PMTUDISC_OMIT; 567 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER, 568 &action, (socklen_t)sizeof(action)) < 0) { 569 570 if (errno != EINVAL) { 571 log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s", 572 strerror(errno)); 573 sock_close(s); 574 *noproto = 0; 575 *inuse = 0; 576 return -1; 577 } 578 } 579 else 580 { 581 omit6_set = 1; 582 } 583 # endif 584 if (omit6_set == 0) { 585 action = IP_PMTUDISC_DONT; 586 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER, 587 &action, (socklen_t)sizeof(action)) < 0) { 588 log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s", 589 strerror(errno)); 590 sock_close(s); 591 *noproto = 0; 592 *inuse = 0; 593 return -1; 594 } 595 } 596 # endif /* IPV6_MTU_DISCOVER */ 597 } else if(family == AF_INET) { 598 # if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) 599 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that 600 * PMTU information is not accepted, but fragmentation is allowed 601 * if and only if the packet size exceeds the outgoing interface MTU 602 * (and also uses the interface mtu to determine the size of the packets). 603 * So there won't be any EMSGSIZE error. Against DNS fragmentation attacks. 604 * FreeBSD already has same semantics without setting the option. */ 605 int omit_set = 0; 606 int action; 607 # if defined(IP_PMTUDISC_OMIT) 608 action = IP_PMTUDISC_OMIT; 609 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER, 610 &action, (socklen_t)sizeof(action)) < 0) { 611 612 if (errno != EINVAL) { 613 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s", 614 strerror(errno)); 615 sock_close(s); 616 *noproto = 0; 617 *inuse = 0; 618 return -1; 619 } 620 } 621 else 622 { 623 omit_set = 1; 624 } 625 # endif 626 if (omit_set == 0) { 627 action = IP_PMTUDISC_DONT; 628 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER, 629 &action, (socklen_t)sizeof(action)) < 0) { 630 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s", 631 strerror(errno)); 632 sock_close(s); 633 *noproto = 0; 634 *inuse = 0; 635 return -1; 636 } 637 } 638 # elif defined(IP_DONTFRAG) && !defined(__APPLE__) 639 /* the IP_DONTFRAG option if defined in the 11.0 OSX headers, 640 * but does not work on that version, so we exclude it */ 641 /* a nonzero value disables fragmentation, according to 642 * docs.oracle.com for ip(4). */ 643 int off = 1; 644 if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG, 645 &off, (socklen_t)sizeof(off)) < 0) { 646 log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s", 647 strerror(errno)); 648 sock_close(s); 649 *noproto = 0; 650 *inuse = 0; 651 return -1; 652 } 653 # endif /* IPv4 MTU */ 654 } 655 if( 656 #ifdef HAVE_SYSTEMD 657 !got_fd_from_systemd && 658 #endif 659 bind(s, (struct sockaddr*)addr, addrlen) != 0) { 660 *noproto = 0; 661 *inuse = 0; 662 #ifndef USE_WINSOCK 663 #ifdef EADDRINUSE 664 *inuse = (errno == EADDRINUSE); 665 /* detect freebsd jail with no ipv6 permission */ 666 if(family==AF_INET6 && errno==EINVAL) 667 *noproto = 1; 668 else if(errno != EADDRINUSE && 669 !(errno == EACCES && verbosity < 4 && !listen) 670 #ifdef EADDRNOTAVAIL 671 && !(errno == EADDRNOTAVAIL && verbosity < 4 && !listen) 672 #endif 673 ) { 674 log_err_addr("can't bind socket", strerror(errno), 675 (struct sockaddr_storage*)addr, addrlen); 676 } 677 #endif /* EADDRINUSE */ 678 #else /* USE_WINSOCK */ 679 if(WSAGetLastError() != WSAEADDRINUSE && 680 WSAGetLastError() != WSAEADDRNOTAVAIL && 681 !(WSAGetLastError() == WSAEACCES && verbosity < 4 && !listen)) { 682 log_err_addr("can't bind socket", 683 wsa_strerror(WSAGetLastError()), 684 (struct sockaddr_storage*)addr, addrlen); 685 } 686 #endif /* USE_WINSOCK */ 687 sock_close(s); 688 return -1; 689 } 690 if(!fd_set_nonblock(s)) { 691 *noproto = 0; 692 *inuse = 0; 693 sock_close(s); 694 return -1; 695 } 696 return s; 697 } 698 699 int 700 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto, 701 int* reuseport, int transparent, int mss, int nodelay, int freebind, 702 int use_systemd, int dscp, const char* additional) 703 { 704 int s = -1; 705 char* err; 706 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) \ 707 || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT) \ 708 || defined(IP_BINDANY) || defined(IP_FREEBIND) \ 709 || defined(SO_BINDANY) || defined(TCP_NODELAY) 710 int on = 1; 711 #endif 712 #ifdef HAVE_SYSTEMD 713 int got_fd_from_systemd = 0; 714 #endif 715 #ifdef USE_TCP_FASTOPEN 716 int qlen; 717 #endif 718 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY) 719 (void)transparent; 720 #endif 721 #if !defined(IP_FREEBIND) 722 (void)freebind; 723 #endif 724 verbose_print_addr(addr, additional); 725 *noproto = 0; 726 #ifdef HAVE_SYSTEMD 727 if (!use_systemd || 728 (use_systemd 729 && (s = systemd_get_activated(addr->ai_family, addr->ai_socktype, 1, 730 addr->ai_addr, addr->ai_addrlen, 731 NULL)) == -1)) { 732 #else 733 (void)use_systemd; 734 #endif 735 if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) { 736 #ifndef USE_WINSOCK 737 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) { 738 *noproto = 1; 739 return -1; 740 } 741 #else 742 if(WSAGetLastError() == WSAEAFNOSUPPORT || 743 WSAGetLastError() == WSAEPROTONOSUPPORT) { 744 *noproto = 1; 745 return -1; 746 } 747 #endif 748 log_err("can't create socket: %s", sock_strerror(errno)); 749 return -1; 750 } 751 if(nodelay) { 752 #if defined(IPPROTO_TCP) && defined(TCP_NODELAY) 753 if(setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (void*)&on, 754 (socklen_t)sizeof(on)) < 0) { 755 #ifndef USE_WINSOCK 756 log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s", 757 strerror(errno)); 758 #else 759 log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s", 760 wsa_strerror(WSAGetLastError())); 761 #endif 762 } 763 #else 764 log_warn(" setsockopt(TCP_NODELAY) unsupported"); 765 #endif /* defined(IPPROTO_TCP) && defined(TCP_NODELAY) */ 766 } 767 if (mss > 0) { 768 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG) 769 if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG, (void*)&mss, 770 (socklen_t)sizeof(mss)) < 0) { 771 log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s", 772 sock_strerror(errno)); 773 } else { 774 verbose(VERB_ALGO, 775 " tcp socket mss set to %d", mss); 776 } 777 #else 778 log_warn(" setsockopt(TCP_MAXSEG) unsupported"); 779 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */ 780 } 781 #ifdef HAVE_SYSTEMD 782 } else { 783 got_fd_from_systemd = 1; 784 } 785 #endif 786 #ifdef SO_REUSEADDR 787 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, 788 (socklen_t)sizeof(on)) < 0) { 789 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s", 790 sock_strerror(errno)); 791 sock_close(s); 792 return -1; 793 } 794 #endif /* SO_REUSEADDR */ 795 #ifdef IP_FREEBIND 796 if (freebind && setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on, 797 (socklen_t)sizeof(on)) < 0) { 798 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s", 799 strerror(errno)); 800 } 801 #endif /* IP_FREEBIND */ 802 #ifdef SO_REUSEPORT 803 /* try to set SO_REUSEPORT so that incoming 804 * connections are distributed evenly among the receiving threads. 805 * Each thread must have its own socket bound to the same port, 806 * with SO_REUSEPORT set on each socket. 807 */ 808 if (reuseport && *reuseport && 809 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on, 810 (socklen_t)sizeof(on)) < 0) { 811 #ifdef ENOPROTOOPT 812 if(errno != ENOPROTOOPT || verbosity >= 3) 813 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s", 814 strerror(errno)); 815 #endif 816 /* this option is not essential, we can continue */ 817 *reuseport = 0; 818 } 819 #else 820 (void)reuseport; 821 #endif /* defined(SO_REUSEPORT) */ 822 #if defined(IPV6_V6ONLY) 823 if(addr->ai_family == AF_INET6 && v6only 824 # ifdef HAVE_SYSTEMD 825 /* Systemd wants to control if the socket is v6 only 826 * or both, with BindIPv6Only=default, ipv6-only or 827 * both in systemd.socket, so it is not set here. */ 828 && !got_fd_from_systemd 829 # endif 830 ) { 831 if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 832 (void*)&on, (socklen_t)sizeof(on)) < 0) { 833 log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s", 834 sock_strerror(errno)); 835 sock_close(s); 836 return -1; 837 } 838 } 839 #else 840 (void)v6only; 841 #endif /* IPV6_V6ONLY */ 842 #ifdef IP_TRANSPARENT 843 if (transparent && 844 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on, 845 (socklen_t)sizeof(on)) < 0) { 846 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s", 847 strerror(errno)); 848 } 849 #elif defined(IP_BINDANY) 850 if (transparent && 851 setsockopt(s, (addr->ai_family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP), 852 (addr->ai_family == AF_INET6? IPV6_BINDANY:IP_BINDANY), 853 (void*)&on, (socklen_t)sizeof(on)) < 0) { 854 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s", 855 (addr->ai_family==AF_INET6?"V6":""), strerror(errno)); 856 } 857 #elif defined(SO_BINDANY) 858 if (transparent && 859 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, (socklen_t) 860 sizeof(on)) < 0) { 861 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s", 862 strerror(errno)); 863 } 864 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */ 865 err = set_ip_dscp(s, addr->ai_family, dscp); 866 if(err != NULL) 867 log_warn("error setting IP DiffServ codepoint %d on TCP socket: %s", dscp, err); 868 if( 869 #ifdef HAVE_SYSTEMD 870 !got_fd_from_systemd && 871 #endif 872 bind(s, addr->ai_addr, addr->ai_addrlen) != 0) { 873 #ifndef USE_WINSOCK 874 /* detect freebsd jail with no ipv6 permission */ 875 if(addr->ai_family==AF_INET6 && errno==EINVAL) 876 *noproto = 1; 877 else { 878 log_err_addr("can't bind socket", strerror(errno), 879 (struct sockaddr_storage*)addr->ai_addr, 880 addr->ai_addrlen); 881 } 882 #else 883 log_err_addr("can't bind socket", 884 wsa_strerror(WSAGetLastError()), 885 (struct sockaddr_storage*)addr->ai_addr, 886 addr->ai_addrlen); 887 #endif 888 sock_close(s); 889 return -1; 890 } 891 if(!fd_set_nonblock(s)) { 892 sock_close(s); 893 return -1; 894 } 895 if(listen(s, TCP_BACKLOG) == -1) { 896 log_err("can't listen: %s", sock_strerror(errno)); 897 sock_close(s); 898 return -1; 899 } 900 #ifdef USE_TCP_FASTOPEN 901 /* qlen specifies how many outstanding TFO requests to allow. Limit is a defense 902 against IP spoofing attacks as suggested in RFC7413 */ 903 #ifdef __APPLE__ 904 /* OS X implementation only supports qlen of 1 via this call. Actual 905 value is configured by the net.inet.tcp.fastopen_backlog kernel parm. */ 906 qlen = 1; 907 #else 908 /* 5 is recommended on linux */ 909 qlen = 5; 910 #endif 911 if ((setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &qlen, 912 sizeof(qlen))) == -1 ) { 913 #ifdef ENOPROTOOPT 914 /* squelch ENOPROTOOPT: freebsd server mode with kernel support 915 disabled, except when verbosity enabled for debugging */ 916 if(errno != ENOPROTOOPT || verbosity >= 3) { 917 #endif 918 if(errno == EPERM) { 919 log_warn("Setting TCP Fast Open as server failed: %s ; this could likely be because sysctl net.inet.tcp.fastopen.enabled, net.inet.tcp.fastopen.server_enable, or net.ipv4.tcp_fastopen is disabled", strerror(errno)); 920 } else { 921 log_err("Setting TCP Fast Open as server failed: %s", strerror(errno)); 922 } 923 #ifdef ENOPROTOOPT 924 } 925 #endif 926 } 927 #endif 928 return s; 929 } 930 931 char* 932 set_ip_dscp(int socket, int addrfamily, int dscp) 933 { 934 int ds; 935 936 if(dscp == 0) 937 return NULL; 938 ds = dscp << 2; 939 switch(addrfamily) { 940 case AF_INET6: 941 #ifdef IPV6_TCLASS 942 if(setsockopt(socket, IPPROTO_IPV6, IPV6_TCLASS, (void*)&ds, 943 sizeof(ds)) < 0) 944 return sock_strerror(errno); 945 break; 946 #else 947 return "IPV6_TCLASS not defined on this system"; 948 #endif 949 default: 950 if(setsockopt(socket, IPPROTO_IP, IP_TOS, (void*)&ds, sizeof(ds)) < 0) 951 return sock_strerror(errno); 952 break; 953 } 954 return NULL; 955 } 956 957 int 958 create_local_accept_sock(const char *path, int* noproto, int use_systemd) 959 { 960 #ifdef HAVE_SYSTEMD 961 int ret; 962 963 if (use_systemd && (ret = systemd_get_activated(AF_LOCAL, SOCK_STREAM, 1, NULL, 0, path)) != -1) 964 return ret; 965 else { 966 #endif 967 #ifdef HAVE_SYS_UN_H 968 int s; 969 struct sockaddr_un usock; 970 #ifndef HAVE_SYSTEMD 971 (void)use_systemd; 972 #endif 973 974 verbose(VERB_ALGO, "creating unix socket %s", path); 975 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN 976 /* this member exists on BSDs, not Linux */ 977 usock.sun_len = (unsigned)sizeof(usock); 978 #endif 979 usock.sun_family = AF_LOCAL; 980 /* length is 92-108, 104 on FreeBSD */ 981 (void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path)); 982 983 if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) { 984 log_err("Cannot create local socket %s (%s)", 985 path, strerror(errno)); 986 return -1; 987 } 988 989 if (unlink(path) && errno != ENOENT) { 990 /* The socket already exists and cannot be removed */ 991 log_err("Cannot remove old local socket %s (%s)", 992 path, strerror(errno)); 993 goto err; 994 } 995 996 if (bind(s, (struct sockaddr *)&usock, 997 (socklen_t)sizeof(struct sockaddr_un)) == -1) { 998 log_err("Cannot bind local socket %s (%s)", 999 path, strerror(errno)); 1000 goto err; 1001 } 1002 1003 if (!fd_set_nonblock(s)) { 1004 log_err("Cannot set non-blocking mode"); 1005 goto err; 1006 } 1007 1008 if (listen(s, TCP_BACKLOG) == -1) { 1009 log_err("can't listen: %s", strerror(errno)); 1010 goto err; 1011 } 1012 1013 (void)noproto; /*unused*/ 1014 return s; 1015 1016 err: 1017 sock_close(s); 1018 return -1; 1019 1020 #ifdef HAVE_SYSTEMD 1021 } 1022 #endif 1023 #else 1024 (void)use_systemd; 1025 (void)path; 1026 log_err("Local sockets are not supported"); 1027 *noproto = 1; 1028 return -1; 1029 #endif 1030 } 1031 1032 1033 /** 1034 * Create socket from getaddrinfo results 1035 */ 1036 static int 1037 make_sock(int stype, const char* ifname, int port, 1038 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, 1039 int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind, 1040 int use_systemd, int dscp, struct unbound_socket* ub_sock, 1041 const char* additional) 1042 { 1043 struct addrinfo *res = NULL; 1044 int r, s, inuse, noproto; 1045 char portbuf[32]; 1046 snprintf(portbuf, sizeof(portbuf), "%d", port); 1047 hints->ai_socktype = stype; 1048 *noip6 = 0; 1049 if((r=getaddrinfo(ifname, portbuf, hints, &res)) != 0 || !res) { 1050 #ifdef USE_WINSOCK 1051 if(r == EAI_NONAME && hints->ai_family == AF_INET6){ 1052 *noip6 = 1; /* 'Host not found' for IP6 on winXP */ 1053 return -1; 1054 } 1055 #endif 1056 log_err("node %s:%s getaddrinfo: %s %s", 1057 ifname?ifname:"default", portbuf, gai_strerror(r), 1058 #ifdef EAI_SYSTEM 1059 (r==EAI_SYSTEM?(char*)strerror(errno):"") 1060 #else 1061 "" 1062 #endif 1063 ); 1064 return -1; 1065 } 1066 if(stype == SOCK_DGRAM) { 1067 verbose_print_addr(res, additional); 1068 s = create_udp_sock(res->ai_family, res->ai_socktype, 1069 (struct sockaddr*)res->ai_addr, res->ai_addrlen, 1070 v6only, &inuse, &noproto, (int)rcv, (int)snd, 1, 1071 reuseport, transparent, freebind, use_systemd, dscp); 1072 if(s == -1 && inuse) { 1073 log_err("bind: address already in use"); 1074 } else if(s == -1 && noproto && hints->ai_family == AF_INET6){ 1075 *noip6 = 1; 1076 } 1077 } else { 1078 s = create_tcp_accept_sock(res, v6only, &noproto, reuseport, 1079 transparent, tcp_mss, nodelay, freebind, use_systemd, 1080 dscp, additional); 1081 if(s == -1 && noproto && hints->ai_family == AF_INET6){ 1082 *noip6 = 1; 1083 } 1084 } 1085 1086 if(!res->ai_addr) { 1087 log_err("getaddrinfo returned no address"); 1088 freeaddrinfo(res); 1089 sock_close(s); 1090 return -1; 1091 } 1092 ub_sock->addr = memdup(res->ai_addr, res->ai_addrlen); 1093 ub_sock->addrlen = res->ai_addrlen; 1094 if(!ub_sock->addr) { 1095 log_err("out of memory: allocate listening address"); 1096 freeaddrinfo(res); 1097 sock_close(s); 1098 return -1; 1099 } 1100 freeaddrinfo(res); 1101 1102 ub_sock->s = s; 1103 ub_sock->fam = hints->ai_family; 1104 ub_sock->acl = NULL; 1105 1106 return s; 1107 } 1108 1109 /** make socket and first see if ifname contains port override info */ 1110 static int 1111 make_sock_port(int stype, const char* ifname, int port, 1112 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, 1113 int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind, 1114 int use_systemd, int dscp, struct unbound_socket* ub_sock, 1115 const char* additional) 1116 { 1117 char* s = strchr(ifname, '@'); 1118 if(s) { 1119 /* override port with ifspec@port */ 1120 int port; 1121 char newif[128]; 1122 if((size_t)(s-ifname) >= sizeof(newif)) { 1123 log_err("ifname too long: %s", ifname); 1124 *noip6 = 0; 1125 return -1; 1126 } 1127 port = atoi(s+1); 1128 if(port < 0 || 0 == port || port > 65535) { 1129 log_err("invalid portnumber in interface: %s", ifname); 1130 *noip6 = 0; 1131 return -1; 1132 } 1133 (void)strlcpy(newif, ifname, sizeof(newif)); 1134 newif[s-ifname] = 0; 1135 return make_sock(stype, newif, port, hints, v6only, noip6, rcv, 1136 snd, reuseport, transparent, tcp_mss, nodelay, freebind, 1137 use_systemd, dscp, ub_sock, additional); 1138 } 1139 return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd, 1140 reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd, 1141 dscp, ub_sock, additional); 1142 } 1143 1144 /** 1145 * Add port to open ports list. 1146 * @param list: list head. changed. 1147 * @param s: fd. 1148 * @param ftype: if fd is UDP. 1149 * @param pp2_enabled: if PROXYv2 is enabled for this port. 1150 * @param ub_sock: socket with address. 1151 * @return false on failure. list in unchanged then. 1152 */ 1153 static int 1154 port_insert(struct listen_port** list, int s, enum listen_type ftype, 1155 int pp2_enabled, struct unbound_socket* ub_sock) 1156 { 1157 struct listen_port* item = (struct listen_port*)malloc( 1158 sizeof(struct listen_port)); 1159 if(!item) 1160 return 0; 1161 item->next = *list; 1162 item->fd = s; 1163 item->ftype = ftype; 1164 item->pp2_enabled = pp2_enabled; 1165 item->socket = ub_sock; 1166 *list = item; 1167 return 1; 1168 } 1169 1170 /** set fd to receive software timestamps */ 1171 static int 1172 set_recvtimestamp(int s) 1173 { 1174 #ifdef HAVE_LINUX_NET_TSTAMP_H 1175 int opt = SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; 1176 if (setsockopt(s, SOL_SOCKET, SO_TIMESTAMPNS, (void*)&opt, (socklen_t)sizeof(opt)) < 0) { 1177 log_err("setsockopt(..., SO_TIMESTAMPNS, ...) failed: %s", 1178 strerror(errno)); 1179 return 0; 1180 } 1181 return 1; 1182 #else 1183 log_err("packets timestamping is not supported on this platform"); 1184 (void)s; 1185 return 0; 1186 #endif 1187 } 1188 1189 /** set fd to receive source address packet info */ 1190 static int 1191 set_recvpktinfo(int s, int family) 1192 { 1193 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO) 1194 int on = 1; 1195 #else 1196 (void)s; 1197 #endif 1198 if(family == AF_INET6) { 1199 # ifdef IPV6_RECVPKTINFO 1200 if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO, 1201 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1202 log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s", 1203 strerror(errno)); 1204 return 0; 1205 } 1206 # elif defined(IPV6_PKTINFO) 1207 if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO, 1208 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1209 log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s", 1210 strerror(errno)); 1211 return 0; 1212 } 1213 # else 1214 log_err("no IPV6_RECVPKTINFO and IPV6_PKTINFO options, please " 1215 "disable interface-automatic or do-ip6 in config"); 1216 return 0; 1217 # endif /* defined IPV6_RECVPKTINFO */ 1218 1219 } else if(family == AF_INET) { 1220 # ifdef IP_PKTINFO 1221 if(setsockopt(s, IPPROTO_IP, IP_PKTINFO, 1222 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1223 log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s", 1224 strerror(errno)); 1225 return 0; 1226 } 1227 # elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR) 1228 if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR, 1229 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1230 log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s", 1231 strerror(errno)); 1232 return 0; 1233 } 1234 # else 1235 log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable " 1236 "interface-automatic or do-ip4 in config"); 1237 return 0; 1238 # endif /* IP_PKTINFO */ 1239 1240 } 1241 return 1; 1242 } 1243 1244 /** 1245 * Helper for ports_open. Creates one interface (or NULL for default). 1246 * @param ifname: The interface ip address. 1247 * @param do_auto: use automatic interface detection. 1248 * If enabled, then ifname must be the wildcard name. 1249 * @param do_udp: if udp should be used. 1250 * @param do_tcp: if tcp should be used. 1251 * @param hints: for getaddrinfo. family and flags have to be set by caller. 1252 * @param port: Port number to use. 1253 * @param list: list of open ports, appended to, changed to point to list head. 1254 * @param rcv: receive buffer size for UDP 1255 * @param snd: send buffer size for UDP 1256 * @param ssl_port: ssl service port number 1257 * @param tls_additional_port: list of additional ssl service port numbers. 1258 * @param https_port: DoH service port number 1259 * @param proxy_protocol_port: list of PROXYv2 port numbers. 1260 * @param reuseport: try to set SO_REUSEPORT if nonNULL and true. 1261 * set to false on exit if reuseport failed due to no kernel support. 1262 * @param transparent: set IP_TRANSPARENT socket option. 1263 * @param tcp_mss: maximum segment size of tcp socket. default if zero. 1264 * @param freebind: set IP_FREEBIND socket option. 1265 * @param http2_nodelay: set TCP_NODELAY on HTTP/2 connection 1266 * @param use_systemd: if true, fetch sockets from systemd. 1267 * @param dnscrypt_port: dnscrypt service port number 1268 * @param dscp: DSCP to use. 1269 * @param quic_port: dns over quic port number. 1270 * @param http_notls_downstream: if no tls is used for https downstream. 1271 * @param sock_queue_timeout: the sock_queue_timeout from config. Seconds to 1272 * wait to discard if UDP packets have waited for long in the socket 1273 * buffer. 1274 * @return: returns false on error. 1275 */ 1276 static int 1277 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, 1278 struct addrinfo *hints, int port, struct listen_port** list, 1279 size_t rcv, size_t snd, int ssl_port, 1280 struct config_strlist* tls_additional_port, int https_port, 1281 struct config_strlist* proxy_protocol_port, 1282 int* reuseport, int transparent, int tcp_mss, int freebind, 1283 int http2_nodelay, int use_systemd, int dnscrypt_port, int dscp, 1284 int quic_port, int http_notls_downstream, int sock_queue_timeout) 1285 { 1286 int s, noip6=0; 1287 int is_ssl = if_is_ssl(ifname, port, ssl_port, tls_additional_port); 1288 int is_https = if_is_https(ifname, port, https_port); 1289 int is_dnscrypt = if_is_dnscrypt(ifname, port, dnscrypt_port); 1290 int is_pp2 = if_is_pp2(ifname, port, proxy_protocol_port); 1291 int is_doq = if_is_quic(ifname, port, quic_port); 1292 /* Always set TCP_NODELAY on TLS connection as it speeds up the TLS 1293 * handshake. DoH had already such option so we respect it. 1294 * Otherwise the server waits before sending more handshake data for 1295 * the client ACK (Nagle's algorithm), which is delayed because the 1296 * client waits for more data before ACKing (delayed ACK). */ 1297 int nodelay = is_https?http2_nodelay:is_ssl; 1298 struct unbound_socket* ub_sock; 1299 const char* add = NULL; 1300 1301 if(!do_udp && !do_tcp) 1302 return 0; 1303 1304 if(is_pp2) { 1305 if(is_dnscrypt) { 1306 fatal_exit("PROXYv2 and DNSCrypt combination not " 1307 "supported!"); 1308 } else if(is_https) { 1309 fatal_exit("PROXYv2 and DoH combination not " 1310 "supported!"); 1311 } else if(is_doq) { 1312 fatal_exit("PROXYv2 and DoQ combination not " 1313 "supported!"); 1314 } 1315 } 1316 1317 /* Check if both UDP and TCP ports should be open. 1318 * In the case of encrypted channels, probably an unencrypted channel 1319 * at the same port is not desired. */ 1320 if((is_ssl || is_https) && !is_doq) do_udp = do_auto = 0; 1321 if((is_doq) && !(is_https || is_ssl)) do_tcp = 0; 1322 1323 if(do_auto) { 1324 ub_sock = calloc(1, sizeof(struct unbound_socket)); 1325 if(!ub_sock) 1326 return 0; 1327 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, 1328 &noip6, rcv, snd, reuseport, transparent, 1329 tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock, 1330 (is_dnscrypt?"udpancil_dnscrypt":"udpancil"))) == -1) { 1331 free(ub_sock->addr); 1332 free(ub_sock); 1333 if(noip6) { 1334 log_warn("IPv6 protocol not available"); 1335 return 1; 1336 } 1337 return 0; 1338 } 1339 /* getting source addr packet info is highly non-portable */ 1340 if(!set_recvpktinfo(s, hints->ai_family)) { 1341 sock_close(s); 1342 free(ub_sock->addr); 1343 free(ub_sock); 1344 return 0; 1345 } 1346 if (sock_queue_timeout && !set_recvtimestamp(s)) { 1347 log_warn("socket timestamping is not available"); 1348 } 1349 if(!port_insert(list, s, is_dnscrypt 1350 ?listen_type_udpancil_dnscrypt:listen_type_udpancil, 1351 is_pp2, ub_sock)) { 1352 sock_close(s); 1353 free(ub_sock->addr); 1354 free(ub_sock); 1355 return 0; 1356 } 1357 } else if(do_udp) { 1358 enum listen_type udp_port_type; 1359 ub_sock = calloc(1, sizeof(struct unbound_socket)); 1360 if(!ub_sock) 1361 return 0; 1362 if(is_dnscrypt) { 1363 udp_port_type = listen_type_udp_dnscrypt; 1364 add = "dnscrypt"; 1365 } else if(is_doq) { 1366 udp_port_type = listen_type_doq; 1367 add = "doq"; 1368 if(if_listens_on(ifname, port, 53, NULL)) { 1369 log_err("DNS over QUIC is strictly not " 1370 "allowed on port 53 as per RFC 9250. " 1371 "Port 53 is for DNS datagrams. Error " 1372 "for interface '%s'.", ifname); 1373 free(ub_sock->addr); 1374 free(ub_sock); 1375 return 0; 1376 } 1377 } else { 1378 udp_port_type = listen_type_udp; 1379 add = NULL; 1380 } 1381 /* regular udp socket */ 1382 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, 1383 &noip6, rcv, snd, reuseport, transparent, 1384 tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock, 1385 add)) == -1) { 1386 free(ub_sock->addr); 1387 free(ub_sock); 1388 if(noip6) { 1389 log_warn("IPv6 protocol not available"); 1390 return 1; 1391 } 1392 return 0; 1393 } 1394 if(udp_port_type == listen_type_doq) { 1395 if(!set_recvpktinfo(s, hints->ai_family)) { 1396 sock_close(s); 1397 free(ub_sock->addr); 1398 free(ub_sock); 1399 return 0; 1400 } 1401 } 1402 if(udp_port_type == listen_type_udp && sock_queue_timeout) 1403 udp_port_type = listen_type_udpancil; 1404 if (sock_queue_timeout) { 1405 if(!set_recvtimestamp(s)) { 1406 log_warn("socket timestamping is not available"); 1407 } else { 1408 if(udp_port_type == listen_type_udp) 1409 udp_port_type = listen_type_udpancil; 1410 } 1411 } 1412 if(!port_insert(list, s, udp_port_type, is_pp2, ub_sock)) { 1413 sock_close(s); 1414 free(ub_sock->addr); 1415 free(ub_sock); 1416 return 0; 1417 } 1418 } 1419 if(do_tcp) { 1420 enum listen_type port_type; 1421 ub_sock = calloc(1, sizeof(struct unbound_socket)); 1422 if(!ub_sock) 1423 return 0; 1424 if(is_ssl) { 1425 port_type = listen_type_ssl; 1426 add = "tls"; 1427 } else if(is_https) { 1428 port_type = listen_type_http; 1429 add = "https"; 1430 if(http_notls_downstream) 1431 add = "http"; 1432 } else if(is_dnscrypt) { 1433 port_type = listen_type_tcp_dnscrypt; 1434 add = "dnscrypt"; 1435 } else { 1436 port_type = listen_type_tcp; 1437 add = NULL; 1438 } 1439 if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1, 1440 &noip6, 0, 0, reuseport, transparent, tcp_mss, nodelay, 1441 freebind, use_systemd, dscp, ub_sock, add)) == -1) { 1442 free(ub_sock->addr); 1443 free(ub_sock); 1444 if(noip6) { 1445 /*log_warn("IPv6 protocol not available");*/ 1446 return 1; 1447 } 1448 return 0; 1449 } 1450 if(is_ssl) 1451 verbose(VERB_ALGO, "setup TCP for SSL service"); 1452 if(!port_insert(list, s, port_type, is_pp2, ub_sock)) { 1453 sock_close(s); 1454 free(ub_sock->addr); 1455 free(ub_sock); 1456 return 0; 1457 } 1458 } 1459 return 1; 1460 } 1461 1462 /** 1463 * Add items to commpoint list in front. 1464 * @param c: commpoint to add. 1465 * @param front: listen struct. 1466 * @return: false on failure. 1467 */ 1468 static int 1469 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front) 1470 { 1471 struct listen_list* item = (struct listen_list*)malloc( 1472 sizeof(struct listen_list)); 1473 if(!item) 1474 return 0; 1475 item->com = c; 1476 item->next = front->cps; 1477 front->cps = item; 1478 return 1; 1479 } 1480 1481 void listen_setup_locks(void) 1482 { 1483 if(!stream_wait_lock_inited) { 1484 lock_basic_init(&stream_wait_count_lock); 1485 stream_wait_lock_inited = 1; 1486 } 1487 if(!http2_query_buffer_lock_inited) { 1488 lock_basic_init(&http2_query_buffer_count_lock); 1489 http2_query_buffer_lock_inited = 1; 1490 } 1491 if(!http2_response_buffer_lock_inited) { 1492 lock_basic_init(&http2_response_buffer_count_lock); 1493 http2_response_buffer_lock_inited = 1; 1494 } 1495 } 1496 1497 void listen_desetup_locks(void) 1498 { 1499 if(stream_wait_lock_inited) { 1500 stream_wait_lock_inited = 0; 1501 lock_basic_destroy(&stream_wait_count_lock); 1502 } 1503 if(http2_query_buffer_lock_inited) { 1504 http2_query_buffer_lock_inited = 0; 1505 lock_basic_destroy(&http2_query_buffer_count_lock); 1506 } 1507 if(http2_response_buffer_lock_inited) { 1508 http2_response_buffer_lock_inited = 0; 1509 lock_basic_destroy(&http2_response_buffer_count_lock); 1510 } 1511 } 1512 1513 struct listen_dnsport* 1514 listen_create(struct comm_base* base, struct listen_port* ports, 1515 size_t bufsize, int tcp_accept_count, int tcp_idle_timeout, 1516 int harden_large_queries, uint32_t http_max_streams, 1517 char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit, 1518 void* dot_sslctx, void* doh_sslctx, void* quic_sslctx, 1519 struct dt_env* dtenv, 1520 struct doq_table* doq_table, 1521 struct ub_randstate* rnd,struct config_file* cfg, 1522 comm_point_callback_type* cb, void *cb_arg) 1523 { 1524 struct listen_dnsport* front = (struct listen_dnsport*) 1525 malloc(sizeof(struct listen_dnsport)); 1526 if(!front) 1527 return NULL; 1528 front->cps = NULL; 1529 front->udp_buff = sldns_buffer_new(bufsize); 1530 #ifdef USE_DNSCRYPT 1531 front->dnscrypt_udp_buff = NULL; 1532 #endif 1533 if(!front->udp_buff) { 1534 free(front); 1535 return NULL; 1536 } 1537 1538 /* create comm points as needed */ 1539 while(ports) { 1540 struct comm_point* cp = NULL; 1541 if(ports->ftype == listen_type_udp || 1542 ports->ftype == listen_type_udp_dnscrypt) { 1543 cp = comm_point_create_udp(base, ports->fd, 1544 front->udp_buff, ports->pp2_enabled, cb, 1545 cb_arg, ports->socket); 1546 } else if(ports->ftype == listen_type_doq) { 1547 #ifndef HAVE_NGTCP2 1548 log_warn("Unbound is not compiled with " 1549 "ngtcp2. This is required to use DNS " 1550 "over QUIC."); 1551 #endif 1552 cp = comm_point_create_doq(base, ports->fd, 1553 front->udp_buff, cb, cb_arg, ports->socket, 1554 doq_table, rnd, quic_sslctx, cfg); 1555 } else if(ports->ftype == listen_type_tcp || 1556 ports->ftype == listen_type_tcp_dnscrypt) { 1557 cp = comm_point_create_tcp(base, ports->fd, 1558 tcp_accept_count, tcp_idle_timeout, 1559 harden_large_queries, 0, NULL, 1560 tcp_conn_limit, bufsize, front->udp_buff, 1561 ports->ftype, ports->pp2_enabled, cb, cb_arg, 1562 ports->socket); 1563 } else if(ports->ftype == listen_type_ssl || 1564 ports->ftype == listen_type_http) { 1565 cp = comm_point_create_tcp(base, ports->fd, 1566 tcp_accept_count, tcp_idle_timeout, 1567 harden_large_queries, 1568 http_max_streams, http_endpoint, 1569 tcp_conn_limit, bufsize, front->udp_buff, 1570 ports->ftype, ports->pp2_enabled, cb, cb_arg, 1571 ports->socket); 1572 if(ports->ftype == listen_type_http) { 1573 if(!doh_sslctx && !http_notls) { 1574 log_warn("HTTPS port configured, but " 1575 "no TLS tls-service-key or " 1576 "tls-service-pem set"); 1577 } 1578 #ifndef HAVE_SSL_CTX_SET_ALPN_SELECT_CB 1579 if(!http_notls) { 1580 log_warn("Unbound is not compiled " 1581 "with an OpenSSL version " 1582 "supporting ALPN " 1583 "(OpenSSL >= 1.0.2). This " 1584 "is required to use " 1585 "DNS-over-HTTPS"); 1586 } 1587 #endif 1588 #ifndef HAVE_NGHTTP2_NGHTTP2_H 1589 log_warn("Unbound is not compiled with " 1590 "nghttp2. This is required to use " 1591 "DNS-over-HTTPS."); 1592 #endif 1593 } 1594 } else if(ports->ftype == listen_type_udpancil || 1595 ports->ftype == listen_type_udpancil_dnscrypt) { 1596 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG) 1597 cp = comm_point_create_udp_ancil(base, ports->fd, 1598 front->udp_buff, ports->pp2_enabled, cb, 1599 cb_arg, ports->socket); 1600 #else 1601 log_warn("This system does not support UDP ancilliary data."); 1602 #endif 1603 } 1604 if(!cp) { 1605 log_err("can't create commpoint"); 1606 listen_delete(front); 1607 return NULL; 1608 } 1609 if((http_notls && ports->ftype == listen_type_http) || 1610 (ports->ftype == listen_type_tcp) || 1611 (ports->ftype == listen_type_udp) || 1612 (ports->ftype == listen_type_udpancil) || 1613 (ports->ftype == listen_type_tcp_dnscrypt) || 1614 (ports->ftype == listen_type_udp_dnscrypt) || 1615 (ports->ftype == listen_type_udpancil_dnscrypt)) { 1616 cp->ssl = NULL; 1617 } else if(ports->ftype == listen_type_doq) { 1618 cp->ssl = quic_sslctx; 1619 } else if(ports->ftype == listen_type_http) { 1620 cp->ssl = doh_sslctx; 1621 } else { 1622 cp->ssl = dot_sslctx; 1623 } 1624 cp->dtenv = dtenv; 1625 cp->do_not_close = 1; 1626 #ifdef USE_DNSCRYPT 1627 if (ports->ftype == listen_type_udp_dnscrypt || 1628 ports->ftype == listen_type_tcp_dnscrypt || 1629 ports->ftype == listen_type_udpancil_dnscrypt) { 1630 cp->dnscrypt = 1; 1631 cp->dnscrypt_buffer = sldns_buffer_new(bufsize); 1632 if(!cp->dnscrypt_buffer) { 1633 log_err("can't alloc dnscrypt_buffer"); 1634 comm_point_delete(cp); 1635 listen_delete(front); 1636 return NULL; 1637 } 1638 front->dnscrypt_udp_buff = cp->dnscrypt_buffer; 1639 } 1640 #endif 1641 if(!listen_cp_insert(cp, front)) { 1642 log_err("malloc failed"); 1643 comm_point_delete(cp); 1644 listen_delete(front); 1645 return NULL; 1646 } 1647 ports = ports->next; 1648 } 1649 if(!front->cps) { 1650 log_err("Could not open sockets to accept queries."); 1651 listen_delete(front); 1652 return NULL; 1653 } 1654 1655 return front; 1656 } 1657 1658 void 1659 listen_list_delete(struct listen_list* list) 1660 { 1661 struct listen_list *p = list, *pn; 1662 while(p) { 1663 pn = p->next; 1664 comm_point_delete(p->com); 1665 free(p); 1666 p = pn; 1667 } 1668 } 1669 1670 void 1671 listen_delete(struct listen_dnsport* front) 1672 { 1673 if(!front) 1674 return; 1675 listen_list_delete(front->cps); 1676 #ifdef USE_DNSCRYPT 1677 if(front->dnscrypt_udp_buff && 1678 front->udp_buff != front->dnscrypt_udp_buff) { 1679 sldns_buffer_free(front->dnscrypt_udp_buff); 1680 } 1681 #endif 1682 sldns_buffer_free(front->udp_buff); 1683 free(front); 1684 } 1685 1686 #ifdef HAVE_GETIFADDRS 1687 static int 1688 resolve_ifa_name(struct ifaddrs *ifas, const char *search_ifa, char ***ip_addresses, int *ip_addresses_size) 1689 { 1690 struct ifaddrs *ifa; 1691 void *tmpbuf; 1692 int last_ip_addresses_size = *ip_addresses_size; 1693 1694 for(ifa = ifas; ifa != NULL; ifa = ifa->ifa_next) { 1695 sa_family_t family; 1696 const char* atsign; 1697 #ifdef INET6 /* | address ip | % | ifa name | @ | port | nul */ 1698 char addr_buf[INET6_ADDRSTRLEN + 1 + IF_NAMESIZE + 1 + 16 + 1]; 1699 #else 1700 char addr_buf[INET_ADDRSTRLEN + 1 + 16 + 1]; 1701 #endif 1702 1703 if((atsign=strrchr(search_ifa, '@')) != NULL) { 1704 if(strlen(ifa->ifa_name) != (size_t)(atsign-search_ifa) 1705 || strncmp(ifa->ifa_name, search_ifa, 1706 atsign-search_ifa) != 0) 1707 continue; 1708 } else { 1709 if(strcmp(ifa->ifa_name, search_ifa) != 0) 1710 continue; 1711 atsign = ""; 1712 } 1713 1714 if(ifa->ifa_addr == NULL) 1715 continue; 1716 1717 family = ifa->ifa_addr->sa_family; 1718 if(family == AF_INET) { 1719 char a4[INET_ADDRSTRLEN + 1]; 1720 struct sockaddr_in *in4 = (struct sockaddr_in *) 1721 ifa->ifa_addr; 1722 if(!inet_ntop(family, &in4->sin_addr, a4, sizeof(a4))) { 1723 log_err("inet_ntop failed"); 1724 return 0; 1725 } 1726 snprintf(addr_buf, sizeof(addr_buf), "%s%s", 1727 a4, atsign); 1728 } 1729 #ifdef INET6 1730 else if(family == AF_INET6) { 1731 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) 1732 ifa->ifa_addr; 1733 char a6[INET6_ADDRSTRLEN + 1]; 1734 char if_index_name[IF_NAMESIZE + 1]; 1735 if_index_name[0] = 0; 1736 if(!inet_ntop(family, &in6->sin6_addr, a6, sizeof(a6))) { 1737 log_err("inet_ntop failed"); 1738 return 0; 1739 } 1740 (void)if_indextoname(in6->sin6_scope_id, 1741 (char *)if_index_name); 1742 if (strlen(if_index_name) != 0) { 1743 snprintf(addr_buf, sizeof(addr_buf), 1744 "%s%%%s%s", a6, if_index_name, atsign); 1745 } else { 1746 snprintf(addr_buf, sizeof(addr_buf), "%s%s", 1747 a6, atsign); 1748 } 1749 } 1750 #endif 1751 else { 1752 continue; 1753 } 1754 verbose(4, "interface %s has address %s", search_ifa, addr_buf); 1755 1756 tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1)); 1757 if(!tmpbuf) { 1758 log_err("realloc failed: out of memory"); 1759 return 0; 1760 } else { 1761 *ip_addresses = tmpbuf; 1762 } 1763 (*ip_addresses)[*ip_addresses_size] = strdup(addr_buf); 1764 if(!(*ip_addresses)[*ip_addresses_size]) { 1765 log_err("strdup failed: out of memory"); 1766 return 0; 1767 } 1768 (*ip_addresses_size)++; 1769 } 1770 1771 if (*ip_addresses_size == last_ip_addresses_size) { 1772 tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1)); 1773 if(!tmpbuf) { 1774 log_err("realloc failed: out of memory"); 1775 return 0; 1776 } else { 1777 *ip_addresses = tmpbuf; 1778 } 1779 (*ip_addresses)[*ip_addresses_size] = strdup(search_ifa); 1780 if(!(*ip_addresses)[*ip_addresses_size]) { 1781 log_err("strdup failed: out of memory"); 1782 return 0; 1783 } 1784 (*ip_addresses_size)++; 1785 } 1786 return 1; 1787 } 1788 #endif /* HAVE_GETIFADDRS */ 1789 1790 int resolve_interface_names(char** ifs, int num_ifs, 1791 struct config_strlist* list, char*** resif, int* num_resif) 1792 { 1793 #ifdef HAVE_GETIFADDRS 1794 struct ifaddrs *addrs = NULL; 1795 if(num_ifs == 0 && list == NULL) { 1796 *resif = NULL; 1797 *num_resif = 0; 1798 return 1; 1799 } 1800 if(getifaddrs(&addrs) == -1) { 1801 log_err("failed to list interfaces: getifaddrs: %s", 1802 strerror(errno)); 1803 freeifaddrs(addrs); 1804 return 0; 1805 } 1806 if(ifs) { 1807 int i; 1808 for(i=0; i<num_ifs; i++) { 1809 if(!resolve_ifa_name(addrs, ifs[i], resif, num_resif)) { 1810 freeifaddrs(addrs); 1811 config_del_strarray(*resif, *num_resif); 1812 *resif = NULL; 1813 *num_resif = 0; 1814 return 0; 1815 } 1816 } 1817 } 1818 if(list) { 1819 struct config_strlist* p; 1820 for(p = list; p; p = p->next) { 1821 if(!resolve_ifa_name(addrs, p->str, resif, num_resif)) { 1822 freeifaddrs(addrs); 1823 config_del_strarray(*resif, *num_resif); 1824 *resif = NULL; 1825 *num_resif = 0; 1826 return 0; 1827 } 1828 } 1829 } 1830 freeifaddrs(addrs); 1831 return 1; 1832 #else 1833 struct config_strlist* p; 1834 if(num_ifs == 0 && list == NULL) { 1835 *resif = NULL; 1836 *num_resif = 0; 1837 return 1; 1838 } 1839 *num_resif = num_ifs; 1840 for(p = list; p; p = p->next) { 1841 (*num_resif)++; 1842 } 1843 *resif = calloc(*num_resif, sizeof(**resif)); 1844 if(!*resif) { 1845 log_err("out of memory"); 1846 return 0; 1847 } 1848 if(ifs) { 1849 int i; 1850 for(i=0; i<num_ifs; i++) { 1851 (*resif)[i] = strdup(ifs[i]); 1852 if(!((*resif)[i])) { 1853 log_err("out of memory"); 1854 config_del_strarray(*resif, *num_resif); 1855 *resif = NULL; 1856 *num_resif = 0; 1857 return 0; 1858 } 1859 } 1860 } 1861 if(list) { 1862 int idx = num_ifs; 1863 for(p = list; p; p = p->next) { 1864 (*resif)[idx] = strdup(p->str); 1865 if(!((*resif)[idx])) { 1866 log_err("out of memory"); 1867 config_del_strarray(*resif, *num_resif); 1868 *resif = NULL; 1869 *num_resif = 0; 1870 return 0; 1871 } 1872 idx++; 1873 } 1874 } 1875 return 1; 1876 #endif /* HAVE_GETIFADDRS */ 1877 } 1878 1879 struct listen_port* 1880 listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, 1881 int* reuseport) 1882 { 1883 struct listen_port* list = NULL; 1884 struct addrinfo hints; 1885 int i, do_ip4, do_ip6; 1886 int do_tcp, do_auto; 1887 do_ip4 = cfg->do_ip4; 1888 do_ip6 = cfg->do_ip6; 1889 do_tcp = cfg->do_tcp; 1890 do_auto = cfg->if_automatic && cfg->do_udp; 1891 if(cfg->incoming_num_tcp == 0) 1892 do_tcp = 0; 1893 1894 /* getaddrinfo */ 1895 memset(&hints, 0, sizeof(hints)); 1896 hints.ai_flags = AI_PASSIVE; 1897 /* no name lookups on our listening ports */ 1898 if(num_ifs > 0) 1899 hints.ai_flags |= AI_NUMERICHOST; 1900 hints.ai_family = AF_UNSPEC; 1901 #ifndef INET6 1902 do_ip6 = 0; 1903 #endif 1904 if(!do_ip4 && !do_ip6) { 1905 return NULL; 1906 } 1907 /* create ip4 and ip6 ports so that return addresses are nice. */ 1908 if(do_auto || num_ifs == 0) { 1909 if(do_auto && cfg->if_automatic_ports && 1910 cfg->if_automatic_ports[0]!=0) { 1911 char* now = cfg->if_automatic_ports; 1912 while(now && *now) { 1913 char* after; 1914 int extraport; 1915 while(isspace((unsigned char)*now)) 1916 now++; 1917 if(!*now) 1918 break; 1919 after = now; 1920 extraport = (int)strtol(now, &after, 10); 1921 if(extraport < 0 || extraport > 65535) { 1922 log_err("interface-automatic-ports port number out of range, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports); 1923 listening_ports_free(list); 1924 return NULL; 1925 } 1926 if(extraport == 0 && now == after) { 1927 log_err("interface-automatic-ports could not be parsed, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports); 1928 listening_ports_free(list); 1929 return NULL; 1930 } 1931 now = after; 1932 if(do_ip6) { 1933 hints.ai_family = AF_INET6; 1934 if(!ports_create_if("::0", 1935 do_auto, cfg->do_udp, do_tcp, 1936 &hints, extraport, &list, 1937 cfg->so_rcvbuf, cfg->so_sndbuf, 1938 cfg->ssl_port, cfg->tls_additional_port, 1939 cfg->https_port, 1940 cfg->proxy_protocol_port, 1941 reuseport, cfg->ip_transparent, 1942 cfg->tcp_mss, cfg->ip_freebind, 1943 cfg->http_nodelay, cfg->use_systemd, 1944 cfg->dnscrypt_port, cfg->ip_dscp, 1945 cfg->quic_port, cfg->http_notls_downstream, 1946 cfg->sock_queue_timeout)) { 1947 listening_ports_free(list); 1948 return NULL; 1949 } 1950 } 1951 if(do_ip4) { 1952 hints.ai_family = AF_INET; 1953 if(!ports_create_if("0.0.0.0", 1954 do_auto, cfg->do_udp, do_tcp, 1955 &hints, extraport, &list, 1956 cfg->so_rcvbuf, cfg->so_sndbuf, 1957 cfg->ssl_port, cfg->tls_additional_port, 1958 cfg->https_port, 1959 cfg->proxy_protocol_port, 1960 reuseport, cfg->ip_transparent, 1961 cfg->tcp_mss, cfg->ip_freebind, 1962 cfg->http_nodelay, cfg->use_systemd, 1963 cfg->dnscrypt_port, cfg->ip_dscp, 1964 cfg->quic_port, cfg->http_notls_downstream, 1965 cfg->sock_queue_timeout)) { 1966 listening_ports_free(list); 1967 return NULL; 1968 } 1969 } 1970 } 1971 return list; 1972 } 1973 if(do_ip6) { 1974 hints.ai_family = AF_INET6; 1975 if(!ports_create_if(do_auto?"::0":"::1", 1976 do_auto, cfg->do_udp, do_tcp, 1977 &hints, cfg->port, &list, 1978 cfg->so_rcvbuf, cfg->so_sndbuf, 1979 cfg->ssl_port, cfg->tls_additional_port, 1980 cfg->https_port, cfg->proxy_protocol_port, 1981 reuseport, cfg->ip_transparent, 1982 cfg->tcp_mss, cfg->ip_freebind, 1983 cfg->http_nodelay, cfg->use_systemd, 1984 cfg->dnscrypt_port, cfg->ip_dscp, 1985 cfg->quic_port, cfg->http_notls_downstream, 1986 cfg->sock_queue_timeout)) { 1987 listening_ports_free(list); 1988 return NULL; 1989 } 1990 } 1991 if(do_ip4) { 1992 hints.ai_family = AF_INET; 1993 if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1", 1994 do_auto, cfg->do_udp, do_tcp, 1995 &hints, cfg->port, &list, 1996 cfg->so_rcvbuf, cfg->so_sndbuf, 1997 cfg->ssl_port, cfg->tls_additional_port, 1998 cfg->https_port, cfg->proxy_protocol_port, 1999 reuseport, cfg->ip_transparent, 2000 cfg->tcp_mss, cfg->ip_freebind, 2001 cfg->http_nodelay, cfg->use_systemd, 2002 cfg->dnscrypt_port, cfg->ip_dscp, 2003 cfg->quic_port, cfg->http_notls_downstream, 2004 cfg->sock_queue_timeout)) { 2005 listening_ports_free(list); 2006 return NULL; 2007 } 2008 } 2009 } else for(i = 0; i<num_ifs; i++) { 2010 if(str_is_ip6(ifs[i])) { 2011 if(!do_ip6) 2012 continue; 2013 hints.ai_family = AF_INET6; 2014 if(!ports_create_if(ifs[i], 0, cfg->do_udp, 2015 do_tcp, &hints, cfg->port, &list, 2016 cfg->so_rcvbuf, cfg->so_sndbuf, 2017 cfg->ssl_port, cfg->tls_additional_port, 2018 cfg->https_port, cfg->proxy_protocol_port, 2019 reuseport, cfg->ip_transparent, 2020 cfg->tcp_mss, cfg->ip_freebind, 2021 cfg->http_nodelay, cfg->use_systemd, 2022 cfg->dnscrypt_port, cfg->ip_dscp, 2023 cfg->quic_port, cfg->http_notls_downstream, 2024 cfg->sock_queue_timeout)) { 2025 listening_ports_free(list); 2026 return NULL; 2027 } 2028 } else { 2029 if(!do_ip4) 2030 continue; 2031 hints.ai_family = AF_INET; 2032 if(!ports_create_if(ifs[i], 0, cfg->do_udp, 2033 do_tcp, &hints, cfg->port, &list, 2034 cfg->so_rcvbuf, cfg->so_sndbuf, 2035 cfg->ssl_port, cfg->tls_additional_port, 2036 cfg->https_port, cfg->proxy_protocol_port, 2037 reuseport, cfg->ip_transparent, 2038 cfg->tcp_mss, cfg->ip_freebind, 2039 cfg->http_nodelay, cfg->use_systemd, 2040 cfg->dnscrypt_port, cfg->ip_dscp, 2041 cfg->quic_port, cfg->http_notls_downstream, 2042 cfg->sock_queue_timeout)) { 2043 listening_ports_free(list); 2044 return NULL; 2045 } 2046 } 2047 } 2048 2049 return list; 2050 } 2051 2052 void listening_ports_free(struct listen_port* list) 2053 { 2054 struct listen_port* nx; 2055 while(list) { 2056 nx = list->next; 2057 if(list->fd != -1) { 2058 sock_close(list->fd); 2059 } 2060 /* rc_ports don't have ub_socket */ 2061 if(list->socket) { 2062 free(list->socket->addr); 2063 free(list->socket); 2064 } 2065 free(list); 2066 list = nx; 2067 } 2068 } 2069 2070 size_t listen_get_mem(struct listen_dnsport* listen) 2071 { 2072 struct listen_list* p; 2073 size_t s = sizeof(*listen) + sizeof(*listen->base) + 2074 sizeof(*listen->udp_buff) + 2075 sldns_buffer_capacity(listen->udp_buff); 2076 #ifdef USE_DNSCRYPT 2077 s += sizeof(*listen->dnscrypt_udp_buff); 2078 if(listen->udp_buff != listen->dnscrypt_udp_buff){ 2079 s += sldns_buffer_capacity(listen->dnscrypt_udp_buff); 2080 } 2081 #endif 2082 for(p = listen->cps; p; p = p->next) { 2083 s += sizeof(*p); 2084 s += comm_point_get_mem(p->com); 2085 } 2086 return s; 2087 } 2088 2089 void listen_stop_accept(struct listen_dnsport* listen) 2090 { 2091 /* do not stop the ones that have no tcp_free list 2092 * (they have already stopped listening) */ 2093 struct listen_list* p; 2094 for(p=listen->cps; p; p=p->next) { 2095 if(p->com->type == comm_tcp_accept && 2096 p->com->tcp_free != NULL) { 2097 comm_point_stop_listening(p->com); 2098 } 2099 } 2100 } 2101 2102 void listen_start_accept(struct listen_dnsport* listen) 2103 { 2104 /* do not start the ones that have no tcp_free list, it is no 2105 * use to listen to them because they have no free tcp handlers */ 2106 struct listen_list* p; 2107 for(p=listen->cps; p; p=p->next) { 2108 if(p->com->type == comm_tcp_accept && 2109 p->com->tcp_free != NULL) { 2110 comm_point_start_listening(p->com, -1, -1); 2111 } 2112 } 2113 } 2114 2115 struct tcp_req_info* 2116 tcp_req_info_create(struct sldns_buffer* spoolbuf) 2117 { 2118 struct tcp_req_info* req = (struct tcp_req_info*)malloc(sizeof(*req)); 2119 if(!req) { 2120 log_err("malloc failure for new stream outoforder processing structure"); 2121 return NULL; 2122 } 2123 memset(req, 0, sizeof(*req)); 2124 req->spool_buffer = spoolbuf; 2125 return req; 2126 } 2127 2128 void 2129 tcp_req_info_delete(struct tcp_req_info* req) 2130 { 2131 if(!req) return; 2132 tcp_req_info_clear(req); 2133 /* cp is pointer back to commpoint that owns this struct and 2134 * called delete on us */ 2135 /* spool_buffer is shared udp buffer, not deleted here */ 2136 free(req); 2137 } 2138 2139 void tcp_req_info_clear(struct tcp_req_info* req) 2140 { 2141 struct tcp_req_open_item* open, *nopen; 2142 struct tcp_req_done_item* item, *nitem; 2143 if(!req) return; 2144 2145 /* free outstanding request mesh reply entries */ 2146 open = req->open_req_list; 2147 while(open) { 2148 nopen = open->next; 2149 mesh_state_remove_reply(open->mesh, open->mesh_state, req->cp); 2150 free(open); 2151 open = nopen; 2152 } 2153 req->open_req_list = NULL; 2154 req->num_open_req = 0; 2155 2156 /* free pending writable result packets */ 2157 item = req->done_req_list; 2158 while(item) { 2159 nitem = item->next; 2160 lock_basic_lock(&stream_wait_count_lock); 2161 stream_wait_count -= (sizeof(struct tcp_req_done_item) 2162 +item->len); 2163 lock_basic_unlock(&stream_wait_count_lock); 2164 free(item->buf); 2165 free(item); 2166 item = nitem; 2167 } 2168 req->done_req_list = NULL; 2169 req->num_done_req = 0; 2170 req->read_is_closed = 0; 2171 } 2172 2173 void 2174 tcp_req_info_remove_mesh_state(struct tcp_req_info* req, struct mesh_state* m) 2175 { 2176 struct tcp_req_open_item* open, *prev = NULL; 2177 if(!req || !m) return; 2178 open = req->open_req_list; 2179 while(open) { 2180 if(open->mesh_state == m) { 2181 struct tcp_req_open_item* next; 2182 if(prev) prev->next = open->next; 2183 else req->open_req_list = open->next; 2184 /* caller has to manage the mesh state reply entry */ 2185 next = open->next; 2186 free(open); 2187 req->num_open_req --; 2188 2189 /* prev = prev; */ 2190 open = next; 2191 continue; 2192 } 2193 prev = open; 2194 open = open->next; 2195 } 2196 } 2197 2198 /** setup listening for read or write */ 2199 static void 2200 tcp_req_info_setup_listen(struct tcp_req_info* req) 2201 { 2202 int wr = 0; 2203 int rd = 0; 2204 2205 if(req->cp->tcp_byte_count != 0) { 2206 /* cannot change, halfway through */ 2207 return; 2208 } 2209 2210 if(!req->cp->tcp_is_reading) 2211 wr = 1; 2212 if(!req->read_is_closed) 2213 rd = 1; 2214 2215 if(wr) { 2216 req->cp->tcp_is_reading = 0; 2217 comm_point_stop_listening(req->cp); 2218 comm_point_start_listening(req->cp, -1, 2219 adjusted_tcp_timeout(req->cp)); 2220 } else if(rd) { 2221 req->cp->tcp_is_reading = 1; 2222 comm_point_stop_listening(req->cp); 2223 comm_point_start_listening(req->cp, -1, 2224 adjusted_tcp_timeout(req->cp)); 2225 /* and also read it (from SSL stack buffers), so 2226 * no event read event is expected since the remainder of 2227 * the TLS frame is sitting in the buffers. */ 2228 req->read_again = 1; 2229 } else { 2230 comm_point_stop_listening(req->cp); 2231 comm_point_start_listening(req->cp, -1, 2232 adjusted_tcp_timeout(req->cp)); 2233 comm_point_listen_for_rw(req->cp, 0, 0); 2234 } 2235 } 2236 2237 /** remove first item from list of pending results */ 2238 static struct tcp_req_done_item* 2239 tcp_req_info_pop_done(struct tcp_req_info* req) 2240 { 2241 struct tcp_req_done_item* item; 2242 log_assert(req->num_done_req > 0 && req->done_req_list); 2243 item = req->done_req_list; 2244 lock_basic_lock(&stream_wait_count_lock); 2245 stream_wait_count -= (sizeof(struct tcp_req_done_item)+item->len); 2246 lock_basic_unlock(&stream_wait_count_lock); 2247 req->done_req_list = req->done_req_list->next; 2248 req->num_done_req --; 2249 return item; 2250 } 2251 2252 /** Send given buffer and setup to write */ 2253 static void 2254 tcp_req_info_start_write_buf(struct tcp_req_info* req, uint8_t* buf, 2255 size_t len) 2256 { 2257 sldns_buffer_clear(req->cp->buffer); 2258 sldns_buffer_write(req->cp->buffer, buf, len); 2259 sldns_buffer_flip(req->cp->buffer); 2260 2261 req->cp->tcp_is_reading = 0; /* we are now writing */ 2262 } 2263 2264 /** pick up the next result and start writing it to the channel */ 2265 static void 2266 tcp_req_pickup_next_result(struct tcp_req_info* req) 2267 { 2268 if(req->num_done_req > 0) { 2269 /* unlist the done item from the list of pending results */ 2270 struct tcp_req_done_item* item = tcp_req_info_pop_done(req); 2271 tcp_req_info_start_write_buf(req, item->buf, item->len); 2272 free(item->buf); 2273 free(item); 2274 } 2275 } 2276 2277 /** the read channel has closed */ 2278 int 2279 tcp_req_info_handle_read_close(struct tcp_req_info* req) 2280 { 2281 verbose(VERB_ALGO, "tcp channel read side closed %d", req->cp->fd); 2282 /* reset byte count for (potential) partial read */ 2283 req->cp->tcp_byte_count = 0; 2284 /* if we still have results to write, pick up next and write it */ 2285 if(req->num_done_req != 0) { 2286 tcp_req_pickup_next_result(req); 2287 tcp_req_info_setup_listen(req); 2288 return 1; 2289 } 2290 /* if nothing to do, this closes the connection */ 2291 if(req->num_open_req == 0 && req->num_done_req == 0) 2292 return 0; 2293 /* otherwise, we must be waiting for dns resolve, wait with timeout */ 2294 req->read_is_closed = 1; 2295 tcp_req_info_setup_listen(req); 2296 return 1; 2297 } 2298 2299 void 2300 tcp_req_info_handle_writedone(struct tcp_req_info* req) 2301 { 2302 /* back to reading state, we finished this write event */ 2303 sldns_buffer_clear(req->cp->buffer); 2304 if(req->num_done_req == 0 && req->read_is_closed) { 2305 /* no more to write and nothing to read, close it */ 2306 comm_point_drop_reply(&req->cp->repinfo); 2307 return; 2308 } 2309 req->cp->tcp_is_reading = 1; 2310 /* see if another result needs writing */ 2311 tcp_req_pickup_next_result(req); 2312 2313 /* see if there is more to write, if not stop_listening for writing */ 2314 /* see if new requests are allowed, if so, start_listening 2315 * for reading */ 2316 tcp_req_info_setup_listen(req); 2317 } 2318 2319 void 2320 tcp_req_info_handle_readdone(struct tcp_req_info* req) 2321 { 2322 struct comm_point* c = req->cp; 2323 2324 /* we want to read up several requests, unless there are 2325 * pending answers */ 2326 2327 req->is_drop = 0; 2328 req->is_reply = 0; 2329 req->in_worker_handle = 1; 2330 sldns_buffer_set_limit(req->spool_buffer, 0); 2331 /* handle the current request */ 2332 /* this calls the worker handle request routine that could give 2333 * a cache response, or localdata response, or drop the reply, 2334 * or schedule a mesh entry for later */ 2335 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2336 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) { 2337 req->in_worker_handle = 0; 2338 /* there is an answer, put it up. It is already in the 2339 * c->buffer, just send it. */ 2340 /* since we were just reading a query, the channel is 2341 * clear to write to */ 2342 send_it: 2343 c->tcp_is_reading = 0; 2344 comm_point_stop_listening(c); 2345 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c)); 2346 return; 2347 } 2348 req->in_worker_handle = 0; 2349 /* it should be waiting in the mesh for recursion. 2350 * If mesh failed to add a new entry and called commpoint_drop_reply. 2351 * Then the mesh state has been cleared. */ 2352 if(req->is_drop) { 2353 /* the reply has been dropped, stream has been closed. */ 2354 return; 2355 } 2356 /* If mesh failed(mallocfail) and called commpoint_send_reply with 2357 * something like servfail then we pick up that reply below. */ 2358 if(req->is_reply) { 2359 goto send_it; 2360 } 2361 2362 sldns_buffer_clear(c->buffer); 2363 /* if pending answers, pick up an answer and start sending it */ 2364 tcp_req_pickup_next_result(req); 2365 2366 /* if answers pending, start sending answers */ 2367 /* read more requests if we can have more requests */ 2368 tcp_req_info_setup_listen(req); 2369 } 2370 2371 int 2372 tcp_req_info_add_meshstate(struct tcp_req_info* req, 2373 struct mesh_area* mesh, struct mesh_state* m) 2374 { 2375 struct tcp_req_open_item* item; 2376 log_assert(req && mesh && m); 2377 item = (struct tcp_req_open_item*)malloc(sizeof(*item)); 2378 if(!item) return 0; 2379 item->next = req->open_req_list; 2380 item->mesh = mesh; 2381 item->mesh_state = m; 2382 req->open_req_list = item; 2383 req->num_open_req++; 2384 return 1; 2385 } 2386 2387 /** Add a result to the result list. At the end. */ 2388 static int 2389 tcp_req_info_add_result(struct tcp_req_info* req, uint8_t* buf, size_t len) 2390 { 2391 struct tcp_req_done_item* last = NULL; 2392 struct tcp_req_done_item* item; 2393 size_t space; 2394 2395 /* see if we have space */ 2396 space = sizeof(struct tcp_req_done_item) + len; 2397 lock_basic_lock(&stream_wait_count_lock); 2398 if(stream_wait_count + space > stream_wait_max) { 2399 lock_basic_unlock(&stream_wait_count_lock); 2400 verbose(VERB_ALGO, "drop stream reply, no space left, in stream-wait-size"); 2401 return 0; 2402 } 2403 stream_wait_count += space; 2404 lock_basic_unlock(&stream_wait_count_lock); 2405 2406 /* find last element */ 2407 last = req->done_req_list; 2408 while(last && last->next) 2409 last = last->next; 2410 2411 /* create new element */ 2412 item = (struct tcp_req_done_item*)malloc(sizeof(*item)); 2413 if(!item) { 2414 log_err("malloc failure, for stream result list"); 2415 return 0; 2416 } 2417 item->next = NULL; 2418 item->len = len; 2419 item->buf = memdup(buf, len); 2420 if(!item->buf) { 2421 free(item); 2422 log_err("malloc failure, adding reply to stream result list"); 2423 return 0; 2424 } 2425 2426 /* link in */ 2427 if(last) last->next = item; 2428 else req->done_req_list = item; 2429 req->num_done_req++; 2430 return 1; 2431 } 2432 2433 void 2434 tcp_req_info_send_reply(struct tcp_req_info* req) 2435 { 2436 if(req->in_worker_handle) { 2437 /* reply from mesh is in the spool_buffer */ 2438 /* copy now, so that the spool buffer is free for other tasks 2439 * before the callback is done */ 2440 sldns_buffer_clear(req->cp->buffer); 2441 sldns_buffer_write(req->cp->buffer, 2442 sldns_buffer_begin(req->spool_buffer), 2443 sldns_buffer_limit(req->spool_buffer)); 2444 sldns_buffer_flip(req->cp->buffer); 2445 req->is_reply = 1; 2446 return; 2447 } 2448 /* now that the query has been handled, that mesh_reply entry 2449 * should be removed, from the tcp_req_info list, 2450 * the mesh state cleanup removes then with region_cleanup and 2451 * replies_sent true. */ 2452 /* see if we can send it straight away (we are not doing 2453 * anything else). If so, copy to buffer and start */ 2454 if(req->cp->tcp_is_reading && req->cp->tcp_byte_count == 0) { 2455 /* buffer is free, and was ready to read new query into, 2456 * but we are now going to use it to send this answer */ 2457 tcp_req_info_start_write_buf(req, 2458 sldns_buffer_begin(req->spool_buffer), 2459 sldns_buffer_limit(req->spool_buffer)); 2460 /* switch to listen to write events */ 2461 comm_point_stop_listening(req->cp); 2462 comm_point_start_listening(req->cp, -1, 2463 adjusted_tcp_timeout(req->cp)); 2464 return; 2465 } 2466 /* queue up the answer behind the others already pending */ 2467 if(!tcp_req_info_add_result(req, sldns_buffer_begin(req->spool_buffer), 2468 sldns_buffer_limit(req->spool_buffer))) { 2469 /* drop the connection, we are out of resources */ 2470 comm_point_drop_reply(&req->cp->repinfo); 2471 } 2472 } 2473 2474 size_t tcp_req_info_get_stream_buffer_size(void) 2475 { 2476 size_t s; 2477 if(!stream_wait_lock_inited) 2478 return stream_wait_count; 2479 lock_basic_lock(&stream_wait_count_lock); 2480 s = stream_wait_count; 2481 lock_basic_unlock(&stream_wait_count_lock); 2482 return s; 2483 } 2484 2485 size_t http2_get_query_buffer_size(void) 2486 { 2487 size_t s; 2488 if(!http2_query_buffer_lock_inited) 2489 return http2_query_buffer_count; 2490 lock_basic_lock(&http2_query_buffer_count_lock); 2491 s = http2_query_buffer_count; 2492 lock_basic_unlock(&http2_query_buffer_count_lock); 2493 return s; 2494 } 2495 2496 size_t http2_get_response_buffer_size(void) 2497 { 2498 size_t s; 2499 if(!http2_response_buffer_lock_inited) 2500 return http2_response_buffer_count; 2501 lock_basic_lock(&http2_response_buffer_count_lock); 2502 s = http2_response_buffer_count; 2503 lock_basic_unlock(&http2_response_buffer_count_lock); 2504 return s; 2505 } 2506 2507 #ifdef HAVE_NGHTTP2 2508 /** nghttp2 callback. Used to copy response from rbuffer to nghttp2 session */ 2509 static ssize_t http2_submit_response_read_callback( 2510 nghttp2_session* ATTR_UNUSED(session), 2511 int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags, 2512 nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg)) 2513 { 2514 struct http2_stream* h2_stream; 2515 struct http2_session* h2_session = source->ptr; 2516 size_t copylen = length; 2517 if(!(h2_stream = nghttp2_session_get_stream_user_data( 2518 h2_session->session, stream_id))) { 2519 verbose(VERB_QUERY, "http2: cannot get stream data, closing " 2520 "stream"); 2521 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE; 2522 } 2523 if(!h2_stream->rbuffer || 2524 sldns_buffer_remaining(h2_stream->rbuffer) == 0) { 2525 verbose(VERB_QUERY, "http2: cannot submit buffer. No data " 2526 "available in rbuffer"); 2527 /* rbuffer will be free'd in frame close cb */ 2528 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE; 2529 } 2530 2531 if(copylen > sldns_buffer_remaining(h2_stream->rbuffer)) 2532 copylen = sldns_buffer_remaining(h2_stream->rbuffer); 2533 if(copylen > SSIZE_MAX) 2534 copylen = SSIZE_MAX; /* will probably never happen */ 2535 2536 memcpy(buf, sldns_buffer_current(h2_stream->rbuffer), copylen); 2537 sldns_buffer_skip(h2_stream->rbuffer, copylen); 2538 2539 if(sldns_buffer_remaining(h2_stream->rbuffer) == 0) { 2540 *data_flags |= NGHTTP2_DATA_FLAG_EOF; 2541 lock_basic_lock(&http2_response_buffer_count_lock); 2542 http2_response_buffer_count -= 2543 sldns_buffer_capacity(h2_stream->rbuffer); 2544 lock_basic_unlock(&http2_response_buffer_count_lock); 2545 sldns_buffer_free(h2_stream->rbuffer); 2546 h2_stream->rbuffer = NULL; 2547 } 2548 2549 return copylen; 2550 } 2551 2552 /** 2553 * Send RST_STREAM frame for stream. 2554 * @param h2_session: http2 session to submit frame to 2555 * @param h2_stream: http2 stream containing frame ID to use in RST_STREAM 2556 * @return 0 on error, 1 otherwise 2557 */ 2558 static int http2_submit_rst_stream(struct http2_session* h2_session, 2559 struct http2_stream* h2_stream) 2560 { 2561 int ret = nghttp2_submit_rst_stream(h2_session->session, 2562 NGHTTP2_FLAG_NONE, h2_stream->stream_id, 2563 NGHTTP2_INTERNAL_ERROR); 2564 if(ret) { 2565 verbose(VERB_QUERY, "http2: nghttp2_submit_rst_stream failed, " 2566 "error: %s", nghttp2_strerror(ret)); 2567 return 0; 2568 } 2569 return 1; 2570 } 2571 2572 /** 2573 * DNS response ready to be submitted to nghttp2, to be prepared for sending 2574 * out. Response is stored in c->buffer. Copy to rbuffer because the c->buffer 2575 * might be used before this will be sent out. 2576 * @param h2_session: http2 session, containing c->buffer which contains answer 2577 * @return 0 on error, 1 otherwise 2578 */ 2579 int http2_submit_dns_response(struct http2_session* h2_session) 2580 { 2581 int ret; 2582 nghttp2_data_provider data_prd; 2583 char status[4]; 2584 nghttp2_nv headers[3]; 2585 struct http2_stream* h2_stream = h2_session->c->h2_stream; 2586 size_t rlen; 2587 char rlen_str[32]; 2588 2589 if(h2_stream->rbuffer) { 2590 log_err("http2 submit response error: rbuffer already " 2591 "exists"); 2592 return 0; 2593 } 2594 if(sldns_buffer_remaining(h2_session->c->buffer) == 0) { 2595 log_err("http2 submit response error: c->buffer not complete"); 2596 return 0; 2597 } 2598 2599 if(snprintf(status, 4, "%d", h2_stream->status) != 3) { 2600 verbose(VERB_QUERY, "http2: submit response error: " 2601 "invalid status"); 2602 return 0; 2603 } 2604 2605 rlen = sldns_buffer_remaining(h2_session->c->buffer); 2606 snprintf(rlen_str, sizeof(rlen_str), "%u", (unsigned)rlen); 2607 2608 lock_basic_lock(&http2_response_buffer_count_lock); 2609 if(http2_response_buffer_count + rlen > http2_response_buffer_max) { 2610 lock_basic_unlock(&http2_response_buffer_count_lock); 2611 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, " 2612 "in https-response-buffer-size"); 2613 return http2_submit_rst_stream(h2_session, h2_stream); 2614 } 2615 http2_response_buffer_count += rlen; 2616 lock_basic_unlock(&http2_response_buffer_count_lock); 2617 2618 if(!(h2_stream->rbuffer = sldns_buffer_new(rlen))) { 2619 lock_basic_lock(&http2_response_buffer_count_lock); 2620 http2_response_buffer_count -= rlen; 2621 lock_basic_unlock(&http2_response_buffer_count_lock); 2622 log_err("http2 submit response error: malloc failure"); 2623 return 0; 2624 } 2625 2626 headers[0].name = (uint8_t*)":status"; 2627 headers[0].namelen = 7; 2628 headers[0].value = (uint8_t*)status; 2629 headers[0].valuelen = 3; 2630 headers[0].flags = NGHTTP2_NV_FLAG_NONE; 2631 2632 headers[1].name = (uint8_t*)"content-type"; 2633 headers[1].namelen = 12; 2634 headers[1].value = (uint8_t*)"application/dns-message"; 2635 headers[1].valuelen = 23; 2636 headers[1].flags = NGHTTP2_NV_FLAG_NONE; 2637 2638 headers[2].name = (uint8_t*)"content-length"; 2639 headers[2].namelen = 14; 2640 headers[2].value = (uint8_t*)rlen_str; 2641 headers[2].valuelen = strlen(rlen_str); 2642 headers[2].flags = NGHTTP2_NV_FLAG_NONE; 2643 2644 sldns_buffer_write(h2_stream->rbuffer, 2645 sldns_buffer_current(h2_session->c->buffer), 2646 sldns_buffer_remaining(h2_session->c->buffer)); 2647 sldns_buffer_flip(h2_stream->rbuffer); 2648 2649 data_prd.source.ptr = h2_session; 2650 data_prd.read_callback = http2_submit_response_read_callback; 2651 ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id, 2652 headers, 3, &data_prd); 2653 if(ret) { 2654 verbose(VERB_QUERY, "http2: set_stream_user_data failed, " 2655 "error: %s", nghttp2_strerror(ret)); 2656 return 0; 2657 } 2658 return 1; 2659 } 2660 #else 2661 int http2_submit_dns_response(void* ATTR_UNUSED(v)) 2662 { 2663 return 0; 2664 } 2665 #endif 2666 2667 #ifdef HAVE_NGHTTP2 2668 /** HTTP status to descriptive string */ 2669 static char* http_status_to_str(enum http_status s) 2670 { 2671 switch(s) { 2672 case HTTP_STATUS_OK: 2673 return "OK"; 2674 case HTTP_STATUS_BAD_REQUEST: 2675 return "Bad Request"; 2676 case HTTP_STATUS_NOT_FOUND: 2677 return "Not Found"; 2678 case HTTP_STATUS_PAYLOAD_TOO_LARGE: 2679 return "Payload Too Large"; 2680 case HTTP_STATUS_URI_TOO_LONG: 2681 return "URI Too Long"; 2682 case HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE: 2683 return "Unsupported Media Type"; 2684 case HTTP_STATUS_NOT_IMPLEMENTED: 2685 return "Not Implemented"; 2686 } 2687 return "Status Unknown"; 2688 } 2689 2690 /** nghttp2 callback. Used to copy error message to nghttp2 session */ 2691 static ssize_t http2_submit_error_read_callback( 2692 nghttp2_session* ATTR_UNUSED(session), 2693 int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags, 2694 nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg)) 2695 { 2696 struct http2_stream* h2_stream; 2697 struct http2_session* h2_session = source->ptr; 2698 char* msg; 2699 if(!(h2_stream = nghttp2_session_get_stream_user_data( 2700 h2_session->session, stream_id))) { 2701 verbose(VERB_QUERY, "http2: cannot get stream data, closing " 2702 "stream"); 2703 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE; 2704 } 2705 *data_flags |= NGHTTP2_DATA_FLAG_EOF; 2706 msg = http_status_to_str(h2_stream->status); 2707 if(length < strlen(msg)) 2708 return 0; /* not worth trying over multiple frames */ 2709 memcpy(buf, msg, strlen(msg)); 2710 return strlen(msg); 2711 2712 } 2713 2714 /** 2715 * HTTP error response ready to be submitted to nghttp2, to be prepared for 2716 * sending out. Message body will contain descriptive string for HTTP status. 2717 * @param h2_session: http2 session to submit to 2718 * @param h2_stream: http2 stream containing HTTP status to use for error 2719 * @return 0 on error, 1 otherwise 2720 */ 2721 static int http2_submit_error(struct http2_session* h2_session, 2722 struct http2_stream* h2_stream) 2723 { 2724 int ret; 2725 char status[4]; 2726 nghttp2_data_provider data_prd; 2727 nghttp2_nv headers[1]; /* will be copied by nghttp */ 2728 if(snprintf(status, 4, "%d", h2_stream->status) != 3) { 2729 verbose(VERB_QUERY, "http2: submit error failed, " 2730 "invalid status"); 2731 return 0; 2732 } 2733 headers[0].name = (uint8_t*)":status"; 2734 headers[0].namelen = 7; 2735 headers[0].value = (uint8_t*)status; 2736 headers[0].valuelen = 3; 2737 headers[0].flags = NGHTTP2_NV_FLAG_NONE; 2738 2739 data_prd.source.ptr = h2_session; 2740 data_prd.read_callback = http2_submit_error_read_callback; 2741 2742 ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id, 2743 headers, 1, &data_prd); 2744 if(ret) { 2745 verbose(VERB_QUERY, "http2: submit error failed, " 2746 "error: %s", nghttp2_strerror(ret)); 2747 return 0; 2748 } 2749 return 1; 2750 } 2751 2752 /** 2753 * Start query handling. Query is stored in the stream, and will be free'd here. 2754 * @param h2_session: http2 session, containing comm point 2755 * @param h2_stream: stream containing buffered query 2756 * @return: -1 on error, 1 if answer is stored in c->buffer, 0 if there is no 2757 * reply available (yet). 2758 */ 2759 static int http2_query_read_done(struct http2_session* h2_session, 2760 struct http2_stream* h2_stream) 2761 { 2762 log_assert(h2_stream->qbuffer); 2763 2764 if(h2_session->c->h2_stream) { 2765 verbose(VERB_ALGO, "http2_query_read_done failure: shared " 2766 "buffer already assigned to stream"); 2767 return -1; 2768 } 2769 2770 /* the c->buffer might be used by mesh_send_reply and no be cleard 2771 * need to be cleared before use */ 2772 sldns_buffer_clear(h2_session->c->buffer); 2773 if(sldns_buffer_remaining(h2_session->c->buffer) < 2774 sldns_buffer_remaining(h2_stream->qbuffer)) { 2775 /* qbuffer will be free'd in frame close cb */ 2776 sldns_buffer_clear(h2_session->c->buffer); 2777 verbose(VERB_ALGO, "http2_query_read_done failure: can't fit " 2778 "qbuffer in c->buffer"); 2779 return -1; 2780 } 2781 2782 sldns_buffer_write(h2_session->c->buffer, 2783 sldns_buffer_current(h2_stream->qbuffer), 2784 sldns_buffer_remaining(h2_stream->qbuffer)); 2785 2786 lock_basic_lock(&http2_query_buffer_count_lock); 2787 http2_query_buffer_count -= sldns_buffer_capacity(h2_stream->qbuffer); 2788 lock_basic_unlock(&http2_query_buffer_count_lock); 2789 sldns_buffer_free(h2_stream->qbuffer); 2790 h2_stream->qbuffer = NULL; 2791 2792 sldns_buffer_flip(h2_session->c->buffer); 2793 h2_session->c->h2_stream = h2_stream; 2794 fptr_ok(fptr_whitelist_comm_point(h2_session->c->callback)); 2795 if((*h2_session->c->callback)(h2_session->c, h2_session->c->cb_arg, 2796 NETEVENT_NOERROR, &h2_session->c->repinfo)) { 2797 return 1; /* answer in c->buffer */ 2798 } 2799 sldns_buffer_clear(h2_session->c->buffer); 2800 h2_session->c->h2_stream = NULL; 2801 return 0; /* mesh state added, or dropped */ 2802 } 2803 2804 /** nghttp2 callback. Used to check if the received frame indicates the end of a 2805 * stream. Gather collected request data and start query handling. */ 2806 static int http2_req_frame_recv_cb(nghttp2_session* session, 2807 const nghttp2_frame* frame, void* cb_arg) 2808 { 2809 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2810 struct http2_stream* h2_stream; 2811 int query_read_done; 2812 2813 if((frame->hd.type != NGHTTP2_DATA && 2814 frame->hd.type != NGHTTP2_HEADERS) || 2815 !(frame->hd.flags & NGHTTP2_FLAG_END_STREAM)) { 2816 return 0; 2817 } 2818 2819 if(!(h2_stream = nghttp2_session_get_stream_user_data( 2820 session, frame->hd.stream_id))) 2821 return 0; 2822 2823 if(h2_stream->invalid_endpoint) { 2824 h2_stream->status = HTTP_STATUS_NOT_FOUND; 2825 goto submit_http_error; 2826 } 2827 2828 if(h2_stream->invalid_content_type) { 2829 h2_stream->status = HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE; 2830 goto submit_http_error; 2831 } 2832 2833 if(h2_stream->http_method != HTTP_METHOD_GET && 2834 h2_stream->http_method != HTTP_METHOD_POST) { 2835 h2_stream->status = HTTP_STATUS_NOT_IMPLEMENTED; 2836 goto submit_http_error; 2837 } 2838 2839 if(h2_stream->query_too_large) { 2840 if(h2_stream->http_method == HTTP_METHOD_POST) 2841 h2_stream->status = HTTP_STATUS_PAYLOAD_TOO_LARGE; 2842 else 2843 h2_stream->status = HTTP_STATUS_URI_TOO_LONG; 2844 goto submit_http_error; 2845 } 2846 2847 if(!h2_stream->qbuffer) { 2848 h2_stream->status = HTTP_STATUS_BAD_REQUEST; 2849 goto submit_http_error; 2850 } 2851 2852 if(h2_stream->status) { 2853 submit_http_error: 2854 verbose(VERB_QUERY, "http2 request invalid, returning :status=" 2855 "%d", h2_stream->status); 2856 if(!http2_submit_error(h2_session, h2_stream)) { 2857 return NGHTTP2_ERR_CALLBACK_FAILURE; 2858 } 2859 return 0; 2860 } 2861 h2_stream->status = HTTP_STATUS_OK; 2862 2863 sldns_buffer_flip(h2_stream->qbuffer); 2864 h2_session->postpone_drop = 1; 2865 query_read_done = http2_query_read_done(h2_session, h2_stream); 2866 if(query_read_done < 0) 2867 return NGHTTP2_ERR_CALLBACK_FAILURE; 2868 else if(!query_read_done) { 2869 if(h2_session->is_drop) { 2870 /* connection needs to be closed. Return failure to make 2871 * sure no other action are taken anymore on comm point. 2872 * failure will result in reclaiming (and closing) 2873 * of comm point. */ 2874 verbose(VERB_QUERY, "http2 query dropped in worker cb"); 2875 h2_session->postpone_drop = 0; 2876 return NGHTTP2_ERR_CALLBACK_FAILURE; 2877 } 2878 /* nothing to submit right now, query added to mesh. */ 2879 h2_session->postpone_drop = 0; 2880 return 0; 2881 } 2882 if(!http2_submit_dns_response(h2_session)) { 2883 sldns_buffer_clear(h2_session->c->buffer); 2884 h2_session->c->h2_stream = NULL; 2885 return NGHTTP2_ERR_CALLBACK_FAILURE; 2886 } 2887 verbose(VERB_QUERY, "http2 query submitted to session"); 2888 sldns_buffer_clear(h2_session->c->buffer); 2889 h2_session->c->h2_stream = NULL; 2890 return 0; 2891 } 2892 2893 /** nghttp2 callback. Used to detect start of new streams. */ 2894 static int http2_req_begin_headers_cb(nghttp2_session* session, 2895 const nghttp2_frame* frame, void* cb_arg) 2896 { 2897 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2898 struct http2_stream* h2_stream; 2899 int ret; 2900 if(frame->hd.type != NGHTTP2_HEADERS || 2901 frame->headers.cat != NGHTTP2_HCAT_REQUEST) { 2902 /* only interested in request headers */ 2903 return 0; 2904 } 2905 if(!(h2_stream = http2_stream_create(frame->hd.stream_id))) { 2906 log_err("malloc failure while creating http2 stream"); 2907 return NGHTTP2_ERR_CALLBACK_FAILURE; 2908 } 2909 http2_session_add_stream(h2_session, h2_stream); 2910 ret = nghttp2_session_set_stream_user_data(session, 2911 frame->hd.stream_id, h2_stream); 2912 if(ret) { 2913 /* stream does not exist */ 2914 verbose(VERB_QUERY, "http2: set_stream_user_data failed, " 2915 "error: %s", nghttp2_strerror(ret)); 2916 return NGHTTP2_ERR_CALLBACK_FAILURE; 2917 } 2918 2919 return 0; 2920 } 2921 2922 /** 2923 * base64url decode, store in qbuffer 2924 * @param h2_session: http2 session 2925 * @param h2_stream: http2 stream 2926 * @param start: start of the base64 string 2927 * @param length: length of the base64 string 2928 * @return: 0 on error, 1 otherwise. query will be stored in h2_stream->qbuffer, 2929 * buffer will be NULL is unparseble. 2930 */ 2931 static int http2_buffer_uri_query(struct http2_session* h2_session, 2932 struct http2_stream* h2_stream, const uint8_t* start, size_t length) 2933 { 2934 size_t expectb64len; 2935 int b64len; 2936 if(h2_stream->http_method == HTTP_METHOD_POST) 2937 return 1; 2938 if(length == 0) 2939 return 1; 2940 if(h2_stream->qbuffer) { 2941 verbose(VERB_ALGO, "http2_req_header fail, " 2942 "qbuffer already set"); 2943 return 0; 2944 } 2945 2946 /* calculate size, might be a bit bigger than the real 2947 * decoded buffer size */ 2948 expectb64len = sldns_b64_pton_calculate_size(length); 2949 log_assert(expectb64len > 0); 2950 if(expectb64len > 2951 h2_session->c->http2_stream_max_qbuffer_size) { 2952 h2_stream->query_too_large = 1; 2953 return 1; 2954 } 2955 2956 lock_basic_lock(&http2_query_buffer_count_lock); 2957 if(http2_query_buffer_count + expectb64len > http2_query_buffer_max) { 2958 lock_basic_unlock(&http2_query_buffer_count_lock); 2959 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, " 2960 "in http2-query-buffer-size"); 2961 return http2_submit_rst_stream(h2_session, h2_stream); 2962 } 2963 http2_query_buffer_count += expectb64len; 2964 lock_basic_unlock(&http2_query_buffer_count_lock); 2965 if(!(h2_stream->qbuffer = sldns_buffer_new(expectb64len))) { 2966 lock_basic_lock(&http2_query_buffer_count_lock); 2967 http2_query_buffer_count -= expectb64len; 2968 lock_basic_unlock(&http2_query_buffer_count_lock); 2969 log_err("http2_req_header fail, qbuffer " 2970 "malloc failure"); 2971 return 0; 2972 } 2973 2974 if(sldns_b64_contains_nonurl((char const*)start, length)) { 2975 char buf[65536+4]; 2976 verbose(VERB_ALGO, "HTTP2 stream contains wrong b64 encoding"); 2977 /* copy to the scratch buffer temporarily to terminate the 2978 * string with a zero */ 2979 if(length+1 > sizeof(buf)) { 2980 /* too long */ 2981 lock_basic_lock(&http2_query_buffer_count_lock); 2982 http2_query_buffer_count -= expectb64len; 2983 lock_basic_unlock(&http2_query_buffer_count_lock); 2984 sldns_buffer_free(h2_stream->qbuffer); 2985 h2_stream->qbuffer = NULL; 2986 return 1; 2987 } 2988 memmove(buf, start, length); 2989 buf[length] = 0; 2990 if(!(b64len = sldns_b64_pton(buf, sldns_buffer_current( 2991 h2_stream->qbuffer), expectb64len)) || b64len < 0) { 2992 lock_basic_lock(&http2_query_buffer_count_lock); 2993 http2_query_buffer_count -= expectb64len; 2994 lock_basic_unlock(&http2_query_buffer_count_lock); 2995 sldns_buffer_free(h2_stream->qbuffer); 2996 h2_stream->qbuffer = NULL; 2997 return 1; 2998 } 2999 } else { 3000 if(!(b64len = sldns_b64url_pton( 3001 (char const *)start, length, 3002 sldns_buffer_current(h2_stream->qbuffer), 3003 expectb64len)) || b64len < 0) { 3004 lock_basic_lock(&http2_query_buffer_count_lock); 3005 http2_query_buffer_count -= expectb64len; 3006 lock_basic_unlock(&http2_query_buffer_count_lock); 3007 sldns_buffer_free(h2_stream->qbuffer); 3008 h2_stream->qbuffer = NULL; 3009 /* return without error, method can be an 3010 * unknown POST */ 3011 return 1; 3012 } 3013 } 3014 sldns_buffer_skip(h2_stream->qbuffer, (size_t)b64len); 3015 return 1; 3016 } 3017 3018 /** nghttp2 callback. Used to parse headers from HEADER frames. */ 3019 static int http2_req_header_cb(nghttp2_session* session, 3020 const nghttp2_frame* frame, const uint8_t* name, size_t namelen, 3021 const uint8_t* value, size_t valuelen, uint8_t ATTR_UNUSED(flags), 3022 void* cb_arg) 3023 { 3024 struct http2_stream* h2_stream = NULL; 3025 struct http2_session* h2_session = (struct http2_session*)cb_arg; 3026 /* nghttp2 deals with CONTINUATION frames and provides them as part of 3027 * the HEADER */ 3028 if(frame->hd.type != NGHTTP2_HEADERS || 3029 frame->headers.cat != NGHTTP2_HCAT_REQUEST) { 3030 /* only interested in request headers */ 3031 return 0; 3032 } 3033 if(!(h2_stream = nghttp2_session_get_stream_user_data(session, 3034 frame->hd.stream_id))) 3035 return 0; 3036 3037 /* earlier checks already indicate we can stop handling this query */ 3038 if(h2_stream->http_method == HTTP_METHOD_UNSUPPORTED || 3039 h2_stream->invalid_content_type || 3040 h2_stream->invalid_endpoint) 3041 return 0; 3042 3043 3044 /* nghttp2 performs some sanity checks in the headers, including: 3045 * name and value are guaranteed to be null terminated 3046 * name is guaranteed to be lowercase 3047 * content-length value is guaranteed to contain digits 3048 */ 3049 3050 if(!h2_stream->http_method && namelen == 7 && 3051 memcmp(":method", name, namelen) == 0) { 3052 /* Case insensitive check on :method value to be on the safe 3053 * side. I failed to find text about case sensitivity in specs. 3054 */ 3055 if(valuelen == 3 && strcasecmp("GET", (const char*)value) == 0) 3056 h2_stream->http_method = HTTP_METHOD_GET; 3057 else if(valuelen == 4 && 3058 strcasecmp("POST", (const char*)value) == 0) { 3059 h2_stream->http_method = HTTP_METHOD_POST; 3060 if(h2_stream->qbuffer) { 3061 /* POST method uses query from DATA frames */ 3062 lock_basic_lock(&http2_query_buffer_count_lock); 3063 http2_query_buffer_count -= 3064 sldns_buffer_capacity(h2_stream->qbuffer); 3065 lock_basic_unlock(&http2_query_buffer_count_lock); 3066 sldns_buffer_free(h2_stream->qbuffer); 3067 h2_stream->qbuffer = NULL; 3068 } 3069 } else 3070 h2_stream->http_method = HTTP_METHOD_UNSUPPORTED; 3071 return 0; 3072 } 3073 if(namelen == 5 && memcmp(":path", name, namelen) == 0) { 3074 /* :path may contain DNS query, depending on method. Method might 3075 * not be known yet here, so check after finishing receiving 3076 * stream. */ 3077 #define HTTP_QUERY_PARAM "?dns=" 3078 size_t el = strlen(h2_session->c->http_endpoint); 3079 size_t qpl = strlen(HTTP_QUERY_PARAM); 3080 3081 if(valuelen < el || memcmp(h2_session->c->http_endpoint, 3082 value, el) != 0) { 3083 h2_stream->invalid_endpoint = 1; 3084 return 0; 3085 } 3086 /* larger than endpoint only allowed if it is for the query 3087 * parameter */ 3088 if(valuelen <= el+qpl || 3089 memcmp(HTTP_QUERY_PARAM, value+el, qpl) != 0) { 3090 if(valuelen != el) 3091 h2_stream->invalid_endpoint = 1; 3092 return 0; 3093 } 3094 3095 if(!http2_buffer_uri_query(h2_session, h2_stream, 3096 value+(el+qpl), valuelen-(el+qpl))) { 3097 return NGHTTP2_ERR_CALLBACK_FAILURE; 3098 } 3099 return 0; 3100 } 3101 /* Content type is a SHOULD (rfc7231#section-3.1.1.5) when using POST, 3102 * and not needed when using GET. Don't enfore. 3103 * If set only allow lowercase "application/dns-message". 3104 * 3105 * Clients SHOULD (rfc8484#section-4.1) set an accept header, but MUST 3106 * be able to handle "application/dns-message". Since that is the only 3107 * content-type supported we can ignore the accept header. 3108 */ 3109 if((namelen == 12 && memcmp("content-type", name, namelen) == 0)) { 3110 if(valuelen != 23 || memcmp("application/dns-message", value, 3111 valuelen) != 0) { 3112 h2_stream->invalid_content_type = 1; 3113 } 3114 } 3115 3116 /* Only interested in content-lentg for POST (on not yet known) method. 3117 */ 3118 if((!h2_stream->http_method || 3119 h2_stream->http_method == HTTP_METHOD_POST) && 3120 !h2_stream->content_length && namelen == 14 && 3121 memcmp("content-length", name, namelen) == 0) { 3122 if(valuelen > 5) { 3123 h2_stream->query_too_large = 1; 3124 return 0; 3125 } 3126 /* guaranteed to only contain digits and be null terminated */ 3127 h2_stream->content_length = atoi((const char*)value); 3128 if(h2_stream->content_length > 3129 h2_session->c->http2_stream_max_qbuffer_size) { 3130 h2_stream->query_too_large = 1; 3131 return 0; 3132 } 3133 } 3134 return 0; 3135 } 3136 3137 /** nghttp2 callback. Used to get data from DATA frames, which can contain 3138 * queries in POST requests. */ 3139 static int http2_req_data_chunk_recv_cb(nghttp2_session* ATTR_UNUSED(session), 3140 uint8_t ATTR_UNUSED(flags), int32_t stream_id, const uint8_t* data, 3141 size_t len, void* cb_arg) 3142 { 3143 struct http2_session* h2_session = (struct http2_session*)cb_arg; 3144 struct http2_stream* h2_stream; 3145 size_t qlen = 0; 3146 3147 if(!(h2_stream = nghttp2_session_get_stream_user_data( 3148 h2_session->session, stream_id))) { 3149 return 0; 3150 } 3151 3152 if(h2_stream->query_too_large) 3153 return 0; 3154 3155 if(!h2_stream->qbuffer) { 3156 if(h2_stream->content_length) { 3157 if(h2_stream->content_length < len) 3158 /* getting more data in DATA frame than 3159 * advertised in content-length header. */ 3160 return NGHTTP2_ERR_CALLBACK_FAILURE; 3161 qlen = h2_stream->content_length; 3162 } else if(len <= h2_session->c->http2_stream_max_qbuffer_size) { 3163 /* setting this to msg-buffer-size can result in a lot 3164 * of memory consuption. Most queries should fit in a 3165 * single DATA frame, and most POST queries will 3166 * contain content-length which does not impose this 3167 * limit. */ 3168 qlen = len; 3169 } 3170 } 3171 if(!h2_stream->qbuffer && qlen) { 3172 lock_basic_lock(&http2_query_buffer_count_lock); 3173 if(http2_query_buffer_count + qlen > http2_query_buffer_max) { 3174 lock_basic_unlock(&http2_query_buffer_count_lock); 3175 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, " 3176 "in http2-query-buffer-size"); 3177 return http2_submit_rst_stream(h2_session, h2_stream); 3178 } 3179 http2_query_buffer_count += qlen; 3180 lock_basic_unlock(&http2_query_buffer_count_lock); 3181 if(!(h2_stream->qbuffer = sldns_buffer_new(qlen))) { 3182 lock_basic_lock(&http2_query_buffer_count_lock); 3183 http2_query_buffer_count -= qlen; 3184 lock_basic_unlock(&http2_query_buffer_count_lock); 3185 } 3186 } 3187 3188 if(!h2_stream->qbuffer || 3189 sldns_buffer_remaining(h2_stream->qbuffer) < len) { 3190 verbose(VERB_ALGO, "http2 data_chunck_recv failed. Not enough " 3191 "buffer space for POST query. Can happen on multi " 3192 "frame requests without content-length header"); 3193 h2_stream->query_too_large = 1; 3194 return 0; 3195 } 3196 3197 sldns_buffer_write(h2_stream->qbuffer, data, len); 3198 3199 return 0; 3200 } 3201 3202 void http2_req_stream_clear(struct http2_stream* h2_stream) 3203 { 3204 if(h2_stream->qbuffer) { 3205 lock_basic_lock(&http2_query_buffer_count_lock); 3206 http2_query_buffer_count -= 3207 sldns_buffer_capacity(h2_stream->qbuffer); 3208 lock_basic_unlock(&http2_query_buffer_count_lock); 3209 sldns_buffer_free(h2_stream->qbuffer); 3210 h2_stream->qbuffer = NULL; 3211 } 3212 if(h2_stream->rbuffer) { 3213 lock_basic_lock(&http2_response_buffer_count_lock); 3214 http2_response_buffer_count -= 3215 sldns_buffer_capacity(h2_stream->rbuffer); 3216 lock_basic_unlock(&http2_response_buffer_count_lock); 3217 sldns_buffer_free(h2_stream->rbuffer); 3218 h2_stream->rbuffer = NULL; 3219 } 3220 } 3221 3222 nghttp2_session_callbacks* http2_req_callbacks_create(void) 3223 { 3224 nghttp2_session_callbacks *callbacks; 3225 if(nghttp2_session_callbacks_new(&callbacks) == NGHTTP2_ERR_NOMEM) { 3226 log_err("failed to initialize nghttp2 callback"); 3227 return NULL; 3228 } 3229 /* reception of header block started, used to create h2_stream */ 3230 nghttp2_session_callbacks_set_on_begin_headers_callback(callbacks, 3231 http2_req_begin_headers_cb); 3232 /* complete frame received, used to get data from stream if frame 3233 * has end stream flag, and start processing query */ 3234 nghttp2_session_callbacks_set_on_frame_recv_callback(callbacks, 3235 http2_req_frame_recv_cb); 3236 /* get request info from headers */ 3237 nghttp2_session_callbacks_set_on_header_callback(callbacks, 3238 http2_req_header_cb); 3239 /* get data from DATA frames, containing POST query */ 3240 nghttp2_session_callbacks_set_on_data_chunk_recv_callback(callbacks, 3241 http2_req_data_chunk_recv_cb); 3242 3243 /* generic HTTP2 callbacks */ 3244 nghttp2_session_callbacks_set_recv_callback(callbacks, http2_recv_cb); 3245 nghttp2_session_callbacks_set_send_callback(callbacks, http2_send_cb); 3246 nghttp2_session_callbacks_set_on_stream_close_callback(callbacks, 3247 http2_stream_close_cb); 3248 3249 return callbacks; 3250 } 3251 #endif /* HAVE_NGHTTP2 */ 3252 3253 #ifdef HAVE_NGTCP2 3254 struct doq_table* 3255 doq_table_create(struct config_file* cfg, struct ub_randstate* rnd) 3256 { 3257 struct doq_table* table = calloc(1, sizeof(*table)); 3258 if(!table) 3259 return NULL; 3260 table->idle_timeout = ((uint64_t)cfg->tcp_idle_timeout)* 3261 NGTCP2_MILLISECONDS; 3262 table->sv_scidlen = 16; 3263 table->static_secret_len = 16; 3264 table->static_secret = malloc(table->static_secret_len); 3265 if(!table->static_secret) { 3266 free(table); 3267 return NULL; 3268 } 3269 doq_fill_rand(rnd, table->static_secret, table->static_secret_len); 3270 table->conn_tree = rbtree_create(doq_conn_cmp); 3271 if(!table->conn_tree) { 3272 free(table->static_secret); 3273 free(table); 3274 return NULL; 3275 } 3276 table->conid_tree = rbtree_create(doq_conid_cmp); 3277 if(!table->conid_tree) { 3278 free(table->static_secret); 3279 free(table->conn_tree); 3280 free(table); 3281 return NULL; 3282 } 3283 table->timer_tree = rbtree_create(doq_timer_cmp); 3284 if(!table->timer_tree) { 3285 free(table->static_secret); 3286 free(table->conn_tree); 3287 free(table->conid_tree); 3288 free(table); 3289 return NULL; 3290 } 3291 lock_rw_init(&table->lock); 3292 lock_rw_init(&table->conid_lock); 3293 lock_basic_init(&table->size_lock); 3294 lock_protect(&table->lock, &table->static_secret, 3295 sizeof(table->static_secret)); 3296 lock_protect(&table->lock, &table->static_secret_len, 3297 sizeof(table->static_secret_len)); 3298 lock_protect(&table->lock, table->static_secret, 3299 table->static_secret_len); 3300 lock_protect(&table->lock, &table->sv_scidlen, 3301 sizeof(table->sv_scidlen)); 3302 lock_protect(&table->lock, &table->idle_timeout, 3303 sizeof(table->idle_timeout)); 3304 lock_protect(&table->lock, &table->conn_tree, sizeof(table->conn_tree)); 3305 lock_protect(&table->lock, table->conn_tree, sizeof(*table->conn_tree)); 3306 lock_protect(&table->conid_lock, table->conid_tree, 3307 sizeof(*table->conid_tree)); 3308 lock_protect(&table->lock, table->timer_tree, 3309 sizeof(*table->timer_tree)); 3310 lock_protect(&table->size_lock, &table->current_size, 3311 sizeof(table->current_size)); 3312 return table; 3313 } 3314 3315 /** delete elements from the connection tree */ 3316 static void 3317 conn_tree_del(rbnode_type* node, void* arg) 3318 { 3319 struct doq_table* table = (struct doq_table*)arg; 3320 struct doq_conn* conn; 3321 if(!node) 3322 return; 3323 conn = (struct doq_conn*)node->key; 3324 if(conn->timer.timer_in_list) { 3325 /* Remove timer from list first, because finding the rbnode 3326 * element of the setlist of same timeouts needs tree lookup. 3327 * Edit the tree structure after that lookup. */ 3328 doq_timer_list_remove(conn->table, &conn->timer); 3329 } 3330 if(conn->timer.timer_in_tree) 3331 doq_timer_tree_remove(conn->table, &conn->timer); 3332 doq_table_quic_size_subtract(table, sizeof(*conn)+conn->key.dcidlen); 3333 doq_conn_delete(conn, table); 3334 } 3335 3336 /** delete elements from the connection id tree */ 3337 static void 3338 conid_tree_del(rbnode_type* node, void* ATTR_UNUSED(arg)) 3339 { 3340 if(!node) 3341 return; 3342 doq_conid_delete((struct doq_conid*)node->key); 3343 } 3344 3345 void 3346 doq_table_delete(struct doq_table* table) 3347 { 3348 if(!table) 3349 return; 3350 lock_rw_destroy(&table->lock); 3351 free(table->static_secret); 3352 if(table->conn_tree) { 3353 traverse_postorder(table->conn_tree, conn_tree_del, table); 3354 free(table->conn_tree); 3355 } 3356 lock_rw_destroy(&table->conid_lock); 3357 if(table->conid_tree) { 3358 /* The tree should be empty, because the doq_conn_delete calls 3359 * above should have also removed their conid elements. */ 3360 traverse_postorder(table->conid_tree, conid_tree_del, NULL); 3361 free(table->conid_tree); 3362 } 3363 lock_basic_destroy(&table->size_lock); 3364 if(table->timer_tree) { 3365 /* The tree should be empty, because the conn_tree_del calls 3366 * above should also have removed them. Also the doq_timer 3367 * is part of the doq_conn struct, so is already freed. */ 3368 free(table->timer_tree); 3369 } 3370 table->write_list_first = NULL; 3371 table->write_list_last = NULL; 3372 free(table); 3373 } 3374 3375 struct doq_timer* 3376 doq_timer_find_time(struct doq_table* table, struct timeval* tv) 3377 { 3378 struct doq_timer key; 3379 struct rbnode_type* node; 3380 memset(&key, 0, sizeof(key)); 3381 key.time.tv_sec = tv->tv_sec; 3382 key.time.tv_usec = tv->tv_usec; 3383 node = rbtree_search(table->timer_tree, &key); 3384 if(node) 3385 return (struct doq_timer*)node->key; 3386 return NULL; 3387 } 3388 3389 void 3390 doq_timer_tree_remove(struct doq_table* table, struct doq_timer* timer) 3391 { 3392 if(!timer->timer_in_tree) 3393 return; 3394 rbtree_delete(table->timer_tree, timer); 3395 timer->timer_in_tree = 0; 3396 /* This item could have more timers in the same set. */ 3397 if(timer->setlist_first) { 3398 struct doq_timer* rb_timer = timer->setlist_first; 3399 /* del first element from setlist */ 3400 if(rb_timer->setlist_next) 3401 rb_timer->setlist_next->setlist_prev = NULL; 3402 else 3403 timer->setlist_last = NULL; 3404 timer->setlist_first = rb_timer->setlist_next; 3405 rb_timer->setlist_prev = NULL; 3406 rb_timer->setlist_next = NULL; 3407 rb_timer->timer_in_list = 0; 3408 /* insert it into the tree as new rb element */ 3409 memset(&rb_timer->node, 0, sizeof(rb_timer->node)); 3410 rb_timer->node.key = rb_timer; 3411 rbtree_insert(table->timer_tree, &rb_timer->node); 3412 rb_timer->timer_in_tree = 1; 3413 /* the setlist, if any remainder, moves to the rb element */ 3414 rb_timer->setlist_first = timer->setlist_first; 3415 rb_timer->setlist_last = timer->setlist_last; 3416 timer->setlist_first = NULL; 3417 timer->setlist_last = NULL; 3418 rb_timer->worker_doq_socket = timer->worker_doq_socket; 3419 } 3420 timer->worker_doq_socket = NULL; 3421 } 3422 3423 void 3424 doq_timer_list_remove(struct doq_table* table, struct doq_timer* timer) 3425 { 3426 struct doq_timer* rb_timer; 3427 if(!timer->timer_in_list) 3428 return; 3429 /* The item in the rbtree has the list start and end. */ 3430 rb_timer = doq_timer_find_time(table, &timer->time); 3431 if(rb_timer) { 3432 if(timer->setlist_prev) 3433 timer->setlist_prev->setlist_next = timer->setlist_next; 3434 else 3435 rb_timer->setlist_first = timer->setlist_next; 3436 if(timer->setlist_next) 3437 timer->setlist_next->setlist_prev = timer->setlist_prev; 3438 else 3439 rb_timer->setlist_last = timer->setlist_prev; 3440 timer->setlist_prev = NULL; 3441 timer->setlist_next = NULL; 3442 } 3443 timer->timer_in_list = 0; 3444 } 3445 3446 /** doq append timer to setlist */ 3447 static void 3448 doq_timer_list_append(struct doq_timer* rb_timer, struct doq_timer* timer) 3449 { 3450 log_assert(timer->timer_in_list == 0); 3451 timer->timer_in_list = 1; 3452 timer->setlist_next = NULL; 3453 timer->setlist_prev = rb_timer->setlist_last; 3454 if(rb_timer->setlist_last) 3455 rb_timer->setlist_last->setlist_next = timer; 3456 else 3457 rb_timer->setlist_first = timer; 3458 rb_timer->setlist_last = timer; 3459 } 3460 3461 void 3462 doq_timer_unset(struct doq_table* table, struct doq_timer* timer) 3463 { 3464 if(timer->timer_in_list) { 3465 /* Remove timer from list first, because finding the rbnode 3466 * element of the setlist of same timeouts needs tree lookup. 3467 * Edit the tree structure after that lookup. */ 3468 doq_timer_list_remove(table, timer); 3469 } 3470 if(timer->timer_in_tree) 3471 doq_timer_tree_remove(table, timer); 3472 timer->worker_doq_socket = NULL; 3473 } 3474 3475 void doq_timer_set(struct doq_table* table, struct doq_timer* timer, 3476 struct doq_server_socket* worker_doq_socket, struct timeval* tv) 3477 { 3478 struct doq_timer* rb_timer; 3479 if(verbosity >= VERB_ALGO && timer->conn) { 3480 char a[256]; 3481 struct timeval rel; 3482 addr_to_str((void*)&timer->conn->key.paddr.addr, 3483 timer->conn->key.paddr.addrlen, a, sizeof(a)); 3484 timeval_subtract(&rel, tv, worker_doq_socket->now_tv); 3485 verbose(VERB_ALGO, "doq %s timer set %d.%6.6d in %d.%6.6d", 3486 a, (int)tv->tv_sec, (int)tv->tv_usec, 3487 (int)rel.tv_sec, (int)rel.tv_usec); 3488 } 3489 if(timer->timer_in_tree || timer->timer_in_list) { 3490 if(timer->time.tv_sec == tv->tv_sec && 3491 timer->time.tv_usec == tv->tv_usec) 3492 return; /* already set on that time */ 3493 doq_timer_unset(table, timer); 3494 } 3495 timer->time.tv_sec = tv->tv_sec; 3496 timer->time.tv_usec = tv->tv_usec; 3497 rb_timer = doq_timer_find_time(table, tv); 3498 if(rb_timer) { 3499 /* There is a timeout already with this value. Timer is 3500 * added to the setlist. */ 3501 doq_timer_list_append(rb_timer, timer); 3502 } else { 3503 /* There is no timeout with this value. Make timer a new 3504 * tree element. */ 3505 memset(&timer->node, 0, sizeof(timer->node)); 3506 timer->node.key = timer; 3507 rbtree_insert(table->timer_tree, &timer->node); 3508 timer->timer_in_tree = 1; 3509 timer->setlist_first = NULL; 3510 timer->setlist_last = NULL; 3511 timer->worker_doq_socket = worker_doq_socket; 3512 } 3513 } 3514 3515 struct doq_conn* 3516 doq_conn_create(struct comm_point* c, struct doq_pkt_addr* paddr, 3517 const uint8_t* dcid, size_t dcidlen, uint32_t version) 3518 { 3519 struct doq_conn* conn = calloc(1, sizeof(*conn)); 3520 if(!conn) 3521 return NULL; 3522 conn->node.key = conn; 3523 conn->doq_socket = c->doq_socket; 3524 conn->table = c->doq_socket->table; 3525 memmove(&conn->key.paddr.addr, &paddr->addr, paddr->addrlen); 3526 conn->key.paddr.addrlen = paddr->addrlen; 3527 memmove(&conn->key.paddr.localaddr, &paddr->localaddr, 3528 paddr->localaddrlen); 3529 conn->key.paddr.localaddrlen = paddr->localaddrlen; 3530 conn->key.paddr.ifindex = paddr->ifindex; 3531 conn->key.dcid = memdup((void*)dcid, dcidlen); 3532 if(!conn->key.dcid) { 3533 free(conn); 3534 return NULL; 3535 } 3536 conn->key.dcidlen = dcidlen; 3537 conn->version = version; 3538 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 3539 ngtcp2_ccerr_default(&conn->ccerr); 3540 #else 3541 ngtcp2_connection_close_error_default(&conn->last_error); 3542 #endif 3543 rbtree_init(&conn->stream_tree, &doq_stream_cmp); 3544 conn->timer.conn = conn; 3545 lock_basic_init(&conn->lock); 3546 lock_protect(&conn->lock, &conn->key, sizeof(conn->key)); 3547 lock_protect(&conn->lock, &conn->doq_socket, sizeof(conn->doq_socket)); 3548 lock_protect(&conn->lock, &conn->table, sizeof(conn->table)); 3549 lock_protect(&conn->lock, &conn->is_deleted, sizeof(conn->is_deleted)); 3550 lock_protect(&conn->lock, &conn->version, sizeof(conn->version)); 3551 lock_protect(&conn->lock, &conn->conn, sizeof(conn->conn)); 3552 lock_protect(&conn->lock, &conn->conid_list, sizeof(conn->conid_list)); 3553 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 3554 lock_protect(&conn->lock, &conn->ccerr, sizeof(conn->ccerr)); 3555 #else 3556 lock_protect(&conn->lock, &conn->last_error, sizeof(conn->last_error)); 3557 #endif 3558 lock_protect(&conn->lock, &conn->tls_alert, sizeof(conn->tls_alert)); 3559 lock_protect(&conn->lock, &conn->ssl, sizeof(conn->ssl)); 3560 lock_protect(&conn->lock, &conn->close_pkt, sizeof(conn->close_pkt)); 3561 lock_protect(&conn->lock, &conn->close_pkt_len, sizeof(conn->close_pkt_len)); 3562 lock_protect(&conn->lock, &conn->close_ecn, sizeof(conn->close_ecn)); 3563 lock_protect(&conn->lock, &conn->stream_tree, sizeof(conn->stream_tree)); 3564 lock_protect(&conn->lock, &conn->stream_write_first, sizeof(conn->stream_write_first)); 3565 lock_protect(&conn->lock, &conn->stream_write_last, sizeof(conn->stream_write_last)); 3566 lock_protect(&conn->lock, &conn->write_interest, sizeof(conn->write_interest)); 3567 lock_protect(&conn->lock, &conn->on_write_list, sizeof(conn->on_write_list)); 3568 lock_protect(&conn->lock, &conn->write_prev, sizeof(conn->write_prev)); 3569 lock_protect(&conn->lock, &conn->write_next, sizeof(conn->write_next)); 3570 return conn; 3571 } 3572 3573 /** delete stream tree node */ 3574 static void 3575 stream_tree_del(rbnode_type* node, void* arg) 3576 { 3577 struct doq_table* table = (struct doq_table*)arg; 3578 struct doq_stream* stream; 3579 if(!node) 3580 return; 3581 stream = (struct doq_stream*)node; 3582 if(stream->in) 3583 doq_table_quic_size_subtract(table, stream->inlen); 3584 if(stream->out) 3585 doq_table_quic_size_subtract(table, stream->outlen); 3586 doq_table_quic_size_subtract(table, sizeof(*stream)); 3587 doq_stream_delete(stream); 3588 } 3589 3590 void 3591 doq_conn_delete(struct doq_conn* conn, struct doq_table* table) 3592 { 3593 if(!conn) 3594 return; 3595 lock_basic_destroy(&conn->lock); 3596 lock_rw_wrlock(&conn->table->conid_lock); 3597 doq_conn_clear_conids(conn); 3598 lock_rw_unlock(&conn->table->conid_lock); 3599 ngtcp2_conn_del(conn->conn); 3600 if(conn->stream_tree.count != 0) { 3601 traverse_postorder(&conn->stream_tree, stream_tree_del, table); 3602 } 3603 free(conn->key.dcid); 3604 SSL_free(conn->ssl); 3605 free(conn->close_pkt); 3606 free(conn); 3607 } 3608 3609 int 3610 doq_conn_cmp(const void* key1, const void* key2) 3611 { 3612 struct doq_conn* c = (struct doq_conn*)key1; 3613 struct doq_conn* d = (struct doq_conn*)key2; 3614 int r; 3615 /* Compared in the order destination address, then 3616 * local address, ifindex and then dcid. 3617 * So that for a search for findlessorequal for the destination 3618 * address will find connections to that address, with different 3619 * dcids. 3620 * Also a printout in sorted order prints the connections by IP 3621 * address of destination, and then a number of them depending on the 3622 * dcids. */ 3623 if(c->key.paddr.addrlen != d->key.paddr.addrlen) { 3624 if(c->key.paddr.addrlen < d->key.paddr.addrlen) 3625 return -1; 3626 return 1; 3627 } 3628 if((r=memcmp(&c->key.paddr.addr, &d->key.paddr.addr, 3629 c->key.paddr.addrlen))!=0) 3630 return r; 3631 if(c->key.paddr.localaddrlen != d->key.paddr.localaddrlen) { 3632 if(c->key.paddr.localaddrlen < d->key.paddr.localaddrlen) 3633 return -1; 3634 return 1; 3635 } 3636 if((r=memcmp(&c->key.paddr.localaddr, &d->key.paddr.localaddr, 3637 c->key.paddr.localaddrlen))!=0) 3638 return r; 3639 if(c->key.paddr.ifindex != d->key.paddr.ifindex) { 3640 if(c->key.paddr.ifindex < d->key.paddr.ifindex) 3641 return -1; 3642 return 1; 3643 } 3644 if(c->key.dcidlen != d->key.dcidlen) { 3645 if(c->key.dcidlen < d->key.dcidlen) 3646 return -1; 3647 return 1; 3648 } 3649 if((r=memcmp(c->key.dcid, d->key.dcid, c->key.dcidlen))!=0) 3650 return r; 3651 return 0; 3652 } 3653 3654 int doq_conid_cmp(const void* key1, const void* key2) 3655 { 3656 struct doq_conid* c = (struct doq_conid*)key1; 3657 struct doq_conid* d = (struct doq_conid*)key2; 3658 if(c->cidlen != d->cidlen) { 3659 if(c->cidlen < d->cidlen) 3660 return -1; 3661 return 1; 3662 } 3663 return memcmp(c->cid, d->cid, c->cidlen); 3664 } 3665 3666 int doq_timer_cmp(const void* key1, const void* key2) 3667 { 3668 struct doq_timer* e = (struct doq_timer*)key1; 3669 struct doq_timer* f = (struct doq_timer*)key2; 3670 if(e->time.tv_sec < f->time.tv_sec) 3671 return -1; 3672 if(e->time.tv_sec > f->time.tv_sec) 3673 return 1; 3674 if(e->time.tv_usec < f->time.tv_usec) 3675 return -1; 3676 if(e->time.tv_usec > f->time.tv_usec) 3677 return 1; 3678 return 0; 3679 } 3680 3681 int doq_stream_cmp(const void* key1, const void* key2) 3682 { 3683 struct doq_stream* c = (struct doq_stream*)key1; 3684 struct doq_stream* d = (struct doq_stream*)key2; 3685 if(c->stream_id != d->stream_id) { 3686 if(c->stream_id < d->stream_id) 3687 return -1; 3688 return 1; 3689 } 3690 return 0; 3691 } 3692 3693 /** doq store a local address in repinfo */ 3694 static void 3695 doq_repinfo_store_localaddr(struct comm_reply* repinfo, 3696 struct doq_addr_storage* localaddr, socklen_t localaddrlen) 3697 { 3698 /* use the pktinfo that we have for ancillary udp data otherwise, 3699 * this saves space for a sockaddr */ 3700 memset(&repinfo->pktinfo, 0, sizeof(repinfo->pktinfo)); 3701 if(addr_is_ip6((void*)localaddr, localaddrlen)) { 3702 #ifdef IPV6_PKTINFO 3703 struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr; 3704 memmove(&repinfo->pktinfo.v6info.ipi6_addr, 3705 &sa6->sin6_addr, sizeof(struct in6_addr)); 3706 repinfo->doq_srcport = sa6->sin6_port; 3707 #endif 3708 repinfo->srctype = 6; 3709 } else { 3710 #ifdef IP_PKTINFO 3711 struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; 3712 memmove(&repinfo->pktinfo.v4info.ipi_addr, 3713 &sa->sin_addr, sizeof(struct in_addr)); 3714 repinfo->doq_srcport = sa->sin_port; 3715 #elif defined(IP_RECVDSTADDR) 3716 struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; 3717 memmove(&repinfo->pktinfo.v4addr, &sa->sin_addr, 3718 sizeof(struct in_addr)); 3719 repinfo->doq_srcport = sa->sin_port; 3720 #endif 3721 repinfo->srctype = 4; 3722 } 3723 } 3724 3725 /** doq retrieve localaddr from repinfo */ 3726 static void 3727 doq_repinfo_retrieve_localaddr(struct comm_reply* repinfo, 3728 struct doq_addr_storage* localaddr, socklen_t* localaddrlen) 3729 { 3730 if(repinfo->srctype == 6) { 3731 #ifdef IPV6_PKTINFO 3732 struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr; 3733 *localaddrlen = (socklen_t)sizeof(struct sockaddr_in6); 3734 memset(sa6, 0, *localaddrlen); 3735 sa6->sin6_family = AF_INET6; 3736 memmove(&sa6->sin6_addr, &repinfo->pktinfo.v6info.ipi6_addr, 3737 *localaddrlen); 3738 sa6->sin6_port = repinfo->doq_srcport; 3739 #endif 3740 } else { 3741 #ifdef IP_PKTINFO 3742 struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; 3743 *localaddrlen = (socklen_t)sizeof(struct sockaddr_in); 3744 memset(sa, 0, *localaddrlen); 3745 sa->sin_family = AF_INET; 3746 memmove(&sa->sin_addr, &repinfo->pktinfo.v4info.ipi_addr, 3747 *localaddrlen); 3748 sa->sin_port = repinfo->doq_srcport; 3749 #elif defined(IP_RECVDSTADDR) 3750 struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; 3751 *localaddrlen = (socklen_t)sizeof(struct sockaddr_in); 3752 memset(sa, 0, *localaddrlen); 3753 sa->sin_family = AF_INET; 3754 memmove(&sa->sin_addr, &repinfo->pktinfo.v4addr, 3755 sizeof(struct in_addr)); 3756 sa->sin_port = repinfo->doq_srcport; 3757 #endif 3758 } 3759 } 3760 3761 /** doq write a connection key into repinfo, false if it does not fit */ 3762 static int 3763 doq_conn_key_store_repinfo(struct doq_conn_key* key, 3764 struct comm_reply* repinfo) 3765 { 3766 repinfo->is_proxied = 0; 3767 repinfo->doq_ifindex = key->paddr.ifindex; 3768 repinfo->remote_addrlen = key->paddr.addrlen; 3769 memmove(&repinfo->remote_addr, &key->paddr.addr, 3770 repinfo->remote_addrlen); 3771 repinfo->client_addrlen = key->paddr.addrlen; 3772 memmove(&repinfo->client_addr, &key->paddr.addr, 3773 repinfo->client_addrlen); 3774 doq_repinfo_store_localaddr(repinfo, &key->paddr.localaddr, 3775 key->paddr.localaddrlen); 3776 if(key->dcidlen > sizeof(repinfo->doq_dcid)) 3777 return 0; 3778 repinfo->doq_dcidlen = key->dcidlen; 3779 memmove(repinfo->doq_dcid, key->dcid, key->dcidlen); 3780 return 1; 3781 } 3782 3783 void 3784 doq_conn_key_from_repinfo(struct doq_conn_key* key, struct comm_reply* repinfo) 3785 { 3786 key->paddr.ifindex = repinfo->doq_ifindex; 3787 key->paddr.addrlen = repinfo->remote_addrlen; 3788 memmove(&key->paddr.addr, &repinfo->remote_addr, 3789 repinfo->remote_addrlen); 3790 doq_repinfo_retrieve_localaddr(repinfo, &key->paddr.localaddr, 3791 &key->paddr.localaddrlen); 3792 key->dcidlen = repinfo->doq_dcidlen; 3793 key->dcid = repinfo->doq_dcid; 3794 } 3795 3796 /** doq add a stream to the connection */ 3797 static void 3798 doq_conn_add_stream(struct doq_conn* conn, struct doq_stream* stream) 3799 { 3800 (void)rbtree_insert(&conn->stream_tree, &stream->node); 3801 } 3802 3803 /** doq delete a stream from the connection */ 3804 static void 3805 doq_conn_del_stream(struct doq_conn* conn, struct doq_stream* stream) 3806 { 3807 (void)rbtree_delete(&conn->stream_tree, &stream->node); 3808 } 3809 3810 /** doq create new stream */ 3811 static struct doq_stream* 3812 doq_stream_create(int64_t stream_id) 3813 { 3814 struct doq_stream* stream = calloc(1, sizeof(*stream)); 3815 if(!stream) 3816 return NULL; 3817 stream->node.key = stream; 3818 stream->stream_id = stream_id; 3819 return stream; 3820 } 3821 3822 void doq_stream_delete(struct doq_stream* stream) 3823 { 3824 if(!stream) 3825 return; 3826 free(stream->in); 3827 free(stream->out); 3828 free(stream); 3829 } 3830 3831 struct doq_stream* 3832 doq_stream_find(struct doq_conn* conn, int64_t stream_id) 3833 { 3834 rbnode_type* node; 3835 struct doq_stream key; 3836 key.node.key = &key; 3837 key.stream_id = stream_id; 3838 node = rbtree_search(&conn->stream_tree, &key); 3839 if(node) 3840 return (struct doq_stream*)node->key; 3841 return NULL; 3842 } 3843 3844 /** doq put stream on the conn write list */ 3845 static void 3846 doq_stream_on_write_list(struct doq_conn* conn, struct doq_stream* stream) 3847 { 3848 if(stream->on_write_list) 3849 return; 3850 stream->write_prev = conn->stream_write_last; 3851 if(conn->stream_write_last) 3852 conn->stream_write_last->write_next = stream; 3853 else 3854 conn->stream_write_first = stream; 3855 conn->stream_write_last = stream; 3856 stream->write_next = NULL; 3857 stream->on_write_list = 1; 3858 } 3859 3860 /** doq remove stream from the conn write list */ 3861 static void 3862 doq_stream_off_write_list(struct doq_conn* conn, struct doq_stream* stream) 3863 { 3864 if(!stream->on_write_list) 3865 return; 3866 if(stream->write_next) 3867 stream->write_next->write_prev = stream->write_prev; 3868 else conn->stream_write_last = stream->write_prev; 3869 if(stream->write_prev) 3870 stream->write_prev->write_next = stream->write_next; 3871 else conn->stream_write_first = stream->write_next; 3872 stream->write_prev = NULL; 3873 stream->write_next = NULL; 3874 stream->on_write_list = 0; 3875 } 3876 3877 /** doq stream remove in buffer */ 3878 static void 3879 doq_stream_remove_in_buffer(struct doq_stream* stream, struct doq_table* table) 3880 { 3881 if(stream->in) { 3882 doq_table_quic_size_subtract(table, stream->inlen); 3883 free(stream->in); 3884 stream->in = NULL; 3885 stream->inlen = 0; 3886 } 3887 } 3888 3889 /** doq stream remove out buffer */ 3890 static void 3891 doq_stream_remove_out_buffer(struct doq_stream* stream, 3892 struct doq_table* table) 3893 { 3894 if(stream->out) { 3895 doq_table_quic_size_subtract(table, stream->outlen); 3896 free(stream->out); 3897 stream->out = NULL; 3898 stream->outlen = 0; 3899 } 3900 } 3901 3902 int 3903 doq_stream_close(struct doq_conn* conn, struct doq_stream* stream, 3904 int send_shutdown) 3905 { 3906 int ret; 3907 if(stream->is_closed) 3908 return 1; 3909 stream->is_closed = 1; 3910 doq_stream_off_write_list(conn, stream); 3911 if(send_shutdown) { 3912 verbose(VERB_ALGO, "doq: shutdown stream_id %d with app_error_code %d", 3913 (int)stream->stream_id, (int)DOQ_APP_ERROR_CODE); 3914 ret = ngtcp2_conn_shutdown_stream(conn->conn, 3915 #ifdef HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4 3916 0, 3917 #endif 3918 stream->stream_id, DOQ_APP_ERROR_CODE); 3919 if(ret != 0) { 3920 log_err("doq ngtcp2_conn_shutdown_stream %d failed: %s", 3921 (int)stream->stream_id, ngtcp2_strerror(ret)); 3922 return 0; 3923 } 3924 doq_conn_write_enable(conn); 3925 } 3926 verbose(VERB_ALGO, "doq: conn extend max streams bidi by 1"); 3927 ngtcp2_conn_extend_max_streams_bidi(conn->conn, 1); 3928 doq_conn_write_enable(conn); 3929 doq_stream_remove_in_buffer(stream, conn->doq_socket->table); 3930 doq_stream_remove_out_buffer(stream, conn->doq_socket->table); 3931 doq_table_quic_size_subtract(conn->doq_socket->table, sizeof(*stream)); 3932 doq_conn_del_stream(conn, stream); 3933 doq_stream_delete(stream); 3934 return 1; 3935 } 3936 3937 /** doq stream pick up answer data from buffer */ 3938 static int 3939 doq_stream_pickup_answer(struct doq_stream* stream, struct sldns_buffer* buf) 3940 { 3941 stream->is_answer_available = 1; 3942 if(stream->out) { 3943 free(stream->out); 3944 stream->out = NULL; 3945 stream->outlen = 0; 3946 } 3947 stream->nwrite = 0; 3948 stream->outlen = sldns_buffer_limit(buf); 3949 /* For quic the output bytes have to stay allocated and available, 3950 * for potential resends, until the remote end has acknowledged them. 3951 * This includes the tcplen start uint16_t, in outlen_wire. */ 3952 stream->outlen_wire = htons(stream->outlen); 3953 stream->out = memdup(sldns_buffer_begin(buf), sldns_buffer_limit(buf)); 3954 if(!stream->out) { 3955 log_err("doq could not send answer: out of memory"); 3956 return 0; 3957 } 3958 return 1; 3959 } 3960 3961 int 3962 doq_stream_send_reply(struct doq_conn* conn, struct doq_stream* stream, 3963 struct sldns_buffer* buf) 3964 { 3965 if(verbosity >= VERB_ALGO) { 3966 char* s = sldns_wire2str_pkt(sldns_buffer_begin(buf), 3967 sldns_buffer_limit(buf)); 3968 verbose(VERB_ALGO, "doq stream %d response\n%s", 3969 (int)stream->stream_id, (s?s:"null")); 3970 free(s); 3971 } 3972 if(stream->out) 3973 doq_table_quic_size_subtract(conn->doq_socket->table, 3974 stream->outlen); 3975 if(!doq_stream_pickup_answer(stream, buf)) 3976 return 0; 3977 doq_table_quic_size_add(conn->doq_socket->table, stream->outlen); 3978 doq_stream_on_write_list(conn, stream); 3979 doq_conn_write_enable(conn); 3980 return 1; 3981 } 3982 3983 /** doq stream data length has completed, allocations can be done. False on 3984 * allocation failure. */ 3985 static int 3986 doq_stream_datalen_complete(struct doq_stream* stream, struct doq_table* table) 3987 { 3988 if(stream->inlen > 1024*1024) { 3989 log_err("doq stream in length too large %d", 3990 (int)stream->inlen); 3991 return 0; 3992 } 3993 stream->in = calloc(1, stream->inlen); 3994 if(!stream->in) { 3995 log_err("doq could not read stream, calloc failed: " 3996 "out of memory"); 3997 return 0; 3998 } 3999 doq_table_quic_size_add(table, stream->inlen); 4000 return 1; 4001 } 4002 4003 /** doq stream data is complete, the input data has been received. */ 4004 static int 4005 doq_stream_data_complete(struct doq_conn* conn, struct doq_stream* stream) 4006 { 4007 struct comm_point* c; 4008 if(verbosity >= VERB_ALGO) { 4009 char* s = sldns_wire2str_pkt(stream->in, stream->inlen); 4010 char a[128]; 4011 addr_to_str((void*)&conn->key.paddr.addr, 4012 conn->key.paddr.addrlen, a, sizeof(a)); 4013 verbose(VERB_ALGO, "doq %s stream %d incoming query\n%s", 4014 a, (int)stream->stream_id, (s?s:"null")); 4015 free(s); 4016 } 4017 stream->is_query_complete = 1; 4018 c = conn->doq_socket->cp; 4019 if(!stream->in) { 4020 verbose(VERB_ALGO, "doq_stream_data_complete: no in buffer"); 4021 return 0; 4022 } 4023 if(stream->inlen > sldns_buffer_capacity(c->buffer)) { 4024 verbose(VERB_ALGO, "doq_stream_data_complete: query too long"); 4025 return 0; 4026 } 4027 sldns_buffer_clear(c->buffer); 4028 sldns_buffer_write(c->buffer, stream->in, stream->inlen); 4029 sldns_buffer_flip(c->buffer); 4030 c->repinfo.c = c; 4031 if(!doq_conn_key_store_repinfo(&conn->key, &c->repinfo)) { 4032 verbose(VERB_ALGO, "doq_stream_data_complete: connection " 4033 "DCID too long"); 4034 return 0; 4035 } 4036 c->repinfo.doq_streamid = stream->stream_id; 4037 conn->doq_socket->current_conn = conn; 4038 fptr_ok(fptr_whitelist_comm_point(c->callback)); 4039 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo)) { 4040 conn->doq_socket->current_conn = NULL; 4041 if(!doq_stream_send_reply(conn, stream, c->buffer)) { 4042 verbose(VERB_ALGO, "doq: failed to send_reply"); 4043 return 0; 4044 } 4045 return 1; 4046 } 4047 conn->doq_socket->current_conn = NULL; 4048 return 1; 4049 } 4050 4051 /** doq receive data for a stream, more bytes of the incoming data */ 4052 static int 4053 doq_stream_recv_data(struct doq_stream* stream, const uint8_t* data, 4054 size_t datalen, int* recv_done, struct doq_table* table) 4055 { 4056 int got_data = 0; 4057 /* read the tcplength uint16_t at the start */ 4058 if(stream->nread < 2) { 4059 uint16_t tcplen = 0; 4060 size_t todolen = 2 - stream->nread; 4061 4062 if(stream->nread > 0) { 4063 /* put in the already read byte if there is one */ 4064 tcplen = stream->inlen; 4065 } 4066 if(datalen < todolen) 4067 todolen = datalen; 4068 memmove(((uint8_t*)&tcplen)+stream->nread, data, todolen); 4069 stream->nread += todolen; 4070 data += todolen; 4071 datalen -= todolen; 4072 if(stream->nread == 2) { 4073 /* the initial length value is completed */ 4074 stream->inlen = ntohs(tcplen); 4075 if(!doq_stream_datalen_complete(stream, table)) 4076 return 0; 4077 } else { 4078 /* store for later */ 4079 stream->inlen = tcplen; 4080 return 1; 4081 } 4082 } 4083 /* if there are more data bytes */ 4084 if(datalen > 0) { 4085 size_t to_write = datalen; 4086 if(stream->nread-2 > stream->inlen) { 4087 verbose(VERB_ALGO, "doq stream buffer too small"); 4088 return 0; 4089 } 4090 if(datalen > stream->inlen - (stream->nread-2)) 4091 to_write = stream->inlen - (stream->nread-2); 4092 if(to_write > 0) { 4093 if(!stream->in) { 4094 verbose(VERB_ALGO, "doq: stream has " 4095 "no buffer"); 4096 return 0; 4097 } 4098 memmove(stream->in+(stream->nread-2), data, to_write); 4099 stream->nread += to_write; 4100 data += to_write; 4101 datalen -= to_write; 4102 got_data = 1; 4103 } 4104 } 4105 /* Are there extra bytes received after the end? If so, log them. */ 4106 if(datalen > 0) { 4107 if(verbosity >= VERB_ALGO) 4108 log_hex("doq stream has extra bytes received after end", 4109 (void*)data, datalen); 4110 } 4111 /* Is the input data complete? */ 4112 if(got_data && stream->nread >= stream->inlen+2) { 4113 if(!stream->in) { 4114 verbose(VERB_ALGO, "doq: completed stream has " 4115 "no buffer"); 4116 return 0; 4117 } 4118 *recv_done = 1; 4119 } 4120 return 1; 4121 } 4122 4123 /** doq receive FIN for a stream. No more bytes are going to arrive. */ 4124 static int 4125 doq_stream_recv_fin(struct doq_conn* conn, struct doq_stream* stream, int 4126 recv_done) 4127 { 4128 if(!stream->is_query_complete && !recv_done) { 4129 verbose(VERB_ALGO, "doq: stream recv FIN, but is " 4130 "not complete, have %d of %d bytes", 4131 ((int)stream->nread)-2, (int)stream->inlen); 4132 if(!doq_stream_close(conn, stream, 1)) 4133 return 0; 4134 } 4135 return 1; 4136 } 4137 4138 void doq_fill_rand(struct ub_randstate* rnd, uint8_t* buf, size_t len) 4139 { 4140 size_t i; 4141 for(i=0; i<len; i++) 4142 buf[i] = ub_random(rnd)&0xff; 4143 } 4144 4145 /** generate new connection id, checks for duplicates. 4146 * caller must hold lock on conid tree. */ 4147 static int 4148 doq_conn_generate_new_conid(struct doq_conn* conn, uint8_t* data, 4149 size_t datalen) 4150 { 4151 int max_try = 100; 4152 int i; 4153 for(i=0; i<max_try; i++) { 4154 doq_fill_rand(conn->doq_socket->rnd, data, datalen); 4155 if(!doq_conid_find(conn->table, data, datalen)) { 4156 /* Found an unused connection id. */ 4157 return 1; 4158 } 4159 } 4160 verbose(VERB_ALGO, "doq_conn_generate_new_conid failed: could not " 4161 "generate random unused connection id value in %d attempts.", 4162 max_try); 4163 return 0; 4164 } 4165 4166 /** ngtcp2 rand callback function */ 4167 static void 4168 doq_rand_cb(uint8_t* dest, size_t destlen, const ngtcp2_rand_ctx* rand_ctx) 4169 { 4170 struct ub_randstate* rnd = (struct ub_randstate*) 4171 rand_ctx->native_handle; 4172 doq_fill_rand(rnd, dest, destlen); 4173 } 4174 4175 /** ngtcp2 get_new_connection_id callback function */ 4176 static int 4177 doq_get_new_connection_id_cb(ngtcp2_conn* ATTR_UNUSED(conn), ngtcp2_cid* cid, 4178 uint8_t* token, size_t cidlen, void* user_data) 4179 { 4180 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4181 /* Lock the conid tree, so we can check for duplicates while 4182 * generating the id, and then insert it, whilst keeping the tree 4183 * locked against other modifications, guaranteeing uniqueness. */ 4184 lock_rw_wrlock(&doq_conn->table->conid_lock); 4185 if(!doq_conn_generate_new_conid(doq_conn, cid->data, cidlen)) { 4186 lock_rw_unlock(&doq_conn->table->conid_lock); 4187 return NGTCP2_ERR_CALLBACK_FAILURE; 4188 } 4189 cid->datalen = cidlen; 4190 if(ngtcp2_crypto_generate_stateless_reset_token(token, 4191 doq_conn->doq_socket->static_secret, 4192 doq_conn->doq_socket->static_secret_len, cid) != 0) { 4193 lock_rw_unlock(&doq_conn->table->conid_lock); 4194 return NGTCP2_ERR_CALLBACK_FAILURE; 4195 } 4196 if(!doq_conn_associate_conid(doq_conn, cid->data, cid->datalen)) { 4197 lock_rw_unlock(&doq_conn->table->conid_lock); 4198 return NGTCP2_ERR_CALLBACK_FAILURE; 4199 } 4200 lock_rw_unlock(&doq_conn->table->conid_lock); 4201 return 0; 4202 } 4203 4204 /** ngtcp2 remove_connection_id callback function */ 4205 static int 4206 doq_remove_connection_id_cb(ngtcp2_conn* ATTR_UNUSED(conn), 4207 const ngtcp2_cid* cid, void* user_data) 4208 { 4209 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4210 lock_rw_wrlock(&doq_conn->table->conid_lock); 4211 doq_conn_dissociate_conid(doq_conn, cid->data, cid->datalen); 4212 lock_rw_unlock(&doq_conn->table->conid_lock); 4213 return 0; 4214 } 4215 4216 /** doq submit a new token */ 4217 static int 4218 doq_submit_new_token(struct doq_conn* conn) 4219 { 4220 uint8_t token[NGTCP2_CRYPTO_MAX_REGULAR_TOKENLEN]; 4221 ngtcp2_ssize tokenlen; 4222 int ret; 4223 const ngtcp2_path* path = ngtcp2_conn_get_path(conn->conn); 4224 ngtcp2_tstamp ts = doq_get_timestamp_nanosec(); 4225 4226 tokenlen = ngtcp2_crypto_generate_regular_token(token, 4227 conn->doq_socket->static_secret, 4228 conn->doq_socket->static_secret_len, path->remote.addr, 4229 path->remote.addrlen, ts); 4230 if(tokenlen < 0) { 4231 log_err("doq ngtcp2_crypto_generate_regular_token failed"); 4232 return 1; 4233 } 4234 4235 verbose(VERB_ALGO, "doq submit new token"); 4236 ret = ngtcp2_conn_submit_new_token(conn->conn, token, tokenlen); 4237 if(ret != 0) { 4238 log_err("doq ngtcp2_conn_submit_new_token failed: %s", 4239 ngtcp2_strerror(ret)); 4240 return 0; 4241 } 4242 return 1; 4243 } 4244 4245 /** ngtcp2 handshake_completed callback function */ 4246 static int 4247 doq_handshake_completed_cb(ngtcp2_conn* ATTR_UNUSED(conn), void* user_data) 4248 { 4249 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4250 verbose(VERB_ALGO, "doq handshake_completed callback"); 4251 verbose(VERB_ALGO, "ngtcp2_conn_get_max_data_left is %d", 4252 (int)ngtcp2_conn_get_max_data_left(doq_conn->conn)); 4253 #ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI 4254 verbose(VERB_ALGO, "ngtcp2_conn_get_max_local_streams_uni is %d", 4255 (int)ngtcp2_conn_get_max_local_streams_uni(doq_conn->conn)); 4256 #endif 4257 verbose(VERB_ALGO, "ngtcp2_conn_get_streams_uni_left is %d", 4258 (int)ngtcp2_conn_get_streams_uni_left(doq_conn->conn)); 4259 verbose(VERB_ALGO, "ngtcp2_conn_get_streams_bidi_left is %d", 4260 (int)ngtcp2_conn_get_streams_bidi_left(doq_conn->conn)); 4261 verbose(VERB_ALGO, "negotiated cipher name is %s", 4262 SSL_get_cipher_name(doq_conn->ssl)); 4263 if(verbosity > VERB_ALGO) { 4264 const unsigned char* alpn = NULL; 4265 unsigned int alpnlen = 0; 4266 char alpnstr[128]; 4267 SSL_get0_alpn_selected(doq_conn->ssl, &alpn, &alpnlen); 4268 if(alpnlen > sizeof(alpnstr)-1) 4269 alpnlen = sizeof(alpnstr)-1; 4270 memmove(alpnstr, alpn, alpnlen); 4271 alpnstr[alpnlen]=0; 4272 verbose(VERB_ALGO, "negotiated ALPN is '%s'", alpnstr); 4273 } 4274 4275 if(!doq_submit_new_token(doq_conn)) 4276 return -1; 4277 return 0; 4278 } 4279 4280 /** ngtcp2 stream_open callback function */ 4281 static int 4282 doq_stream_open_cb(ngtcp2_conn* ATTR_UNUSED(conn), int64_t stream_id, 4283 void* user_data) 4284 { 4285 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4286 struct doq_stream* stream; 4287 verbose(VERB_ALGO, "doq new stream %x", (int)stream_id); 4288 if(doq_stream_find(doq_conn, stream_id)) { 4289 verbose(VERB_ALGO, "doq: stream with this id already exists"); 4290 return 0; 4291 } 4292 if(stream_id != 0 && stream_id != 4 && /* allow one stream on a new connection */ 4293 !doq_table_quic_size_available(doq_conn->doq_socket->table, 4294 doq_conn->doq_socket->cfg, sizeof(*stream) 4295 + 100 /* estimated query in */ 4296 + 512 /* estimated response out */ 4297 )) { 4298 int rv; 4299 verbose(VERB_ALGO, "doq: no mem for new stream"); 4300 rv = ngtcp2_conn_shutdown_stream(doq_conn->conn, 4301 #ifdef HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4 4302 0, 4303 #endif 4304 stream_id, NGTCP2_CONNECTION_REFUSED); 4305 if(rv != 0) { 4306 log_err("ngtcp2_conn_shutdown_stream failed: %s", 4307 ngtcp2_strerror(rv)); 4308 return NGTCP2_ERR_CALLBACK_FAILURE; 4309 } 4310 return 0; 4311 } 4312 stream = doq_stream_create(stream_id); 4313 if(!stream) { 4314 log_err("doq: could not doq_stream_create: out of memory"); 4315 return NGTCP2_ERR_CALLBACK_FAILURE; 4316 } 4317 doq_table_quic_size_add(doq_conn->doq_socket->table, sizeof(*stream)); 4318 doq_conn_add_stream(doq_conn, stream); 4319 return 0; 4320 } 4321 4322 /** ngtcp2 recv_stream_data callback function */ 4323 static int 4324 doq_recv_stream_data_cb(ngtcp2_conn* ATTR_UNUSED(conn), uint32_t flags, 4325 int64_t stream_id, uint64_t offset, const uint8_t* data, 4326 size_t datalen, void* user_data, void* ATTR_UNUSED(stream_user_data)) 4327 { 4328 int recv_done = 0; 4329 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4330 struct doq_stream* stream; 4331 verbose(VERB_ALGO, "doq recv stream data stream id %d offset %d " 4332 "datalen %d%s%s", (int)stream_id, (int)offset, (int)datalen, 4333 ((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0?" FIN":""), 4334 #ifdef NGTCP2_STREAM_DATA_FLAG_0RTT 4335 ((flags&NGTCP2_STREAM_DATA_FLAG_0RTT)!=0?" 0RTT":"") 4336 #else 4337 ((flags&NGTCP2_STREAM_DATA_FLAG_EARLY)!=0?" EARLY":"") 4338 #endif 4339 ); 4340 stream = doq_stream_find(doq_conn, stream_id); 4341 if(!stream) { 4342 verbose(VERB_ALGO, "doq: received stream data for " 4343 "unknown stream %d", (int)stream_id); 4344 return 0; 4345 } 4346 if(stream->is_closed) { 4347 verbose(VERB_ALGO, "doq: stream is closed, ignore recv data"); 4348 return 0; 4349 } 4350 if(datalen != 0) { 4351 if(!doq_stream_recv_data(stream, data, datalen, &recv_done, 4352 doq_conn->doq_socket->table)) 4353 return NGTCP2_ERR_CALLBACK_FAILURE; 4354 } 4355 if((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0) { 4356 if(!doq_stream_recv_fin(doq_conn, stream, recv_done)) 4357 return NGTCP2_ERR_CALLBACK_FAILURE; 4358 } 4359 ngtcp2_conn_extend_max_stream_offset(doq_conn->conn, stream_id, 4360 datalen); 4361 ngtcp2_conn_extend_max_offset(doq_conn->conn, datalen); 4362 if(recv_done) { 4363 if(!doq_stream_data_complete(doq_conn, stream)) 4364 return NGTCP2_ERR_CALLBACK_FAILURE; 4365 } 4366 return 0; 4367 } 4368 4369 /** ngtcp2 stream_close callback function */ 4370 static int 4371 doq_stream_close_cb(ngtcp2_conn* ATTR_UNUSED(conn), uint32_t flags, 4372 int64_t stream_id, uint64_t app_error_code, void* user_data, 4373 void* ATTR_UNUSED(stream_user_data)) 4374 { 4375 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4376 struct doq_stream* stream; 4377 if((flags&NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET)!=0) 4378 verbose(VERB_ALGO, "doq stream close for stream id %d %sapp_error_code %d", 4379 (int)stream_id, 4380 (((flags&NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET)!=0)? 4381 "APP_ERROR_CODE_SET ":""), 4382 (int)app_error_code); 4383 else 4384 verbose(VERB_ALGO, "doq stream close for stream id %d", 4385 (int)stream_id); 4386 4387 stream = doq_stream_find(doq_conn, stream_id); 4388 if(!stream) { 4389 verbose(VERB_ALGO, "doq: stream close for " 4390 "unknown stream %d", (int)stream_id); 4391 return 0; 4392 } 4393 if(!doq_stream_close(doq_conn, stream, 0)) 4394 return NGTCP2_ERR_CALLBACK_FAILURE; 4395 return 0; 4396 } 4397 4398 /** ngtcp2 stream_reset callback function */ 4399 static int 4400 doq_stream_reset_cb(ngtcp2_conn* ATTR_UNUSED(conn), int64_t stream_id, 4401 uint64_t final_size, uint64_t app_error_code, void* user_data, 4402 void* ATTR_UNUSED(stream_user_data)) 4403 { 4404 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4405 struct doq_stream* stream; 4406 verbose(VERB_ALGO, "doq stream reset for stream id %d final_size %d " 4407 "app_error_code %d", (int)stream_id, (int)final_size, 4408 (int)app_error_code); 4409 4410 stream = doq_stream_find(doq_conn, stream_id); 4411 if(!stream) { 4412 verbose(VERB_ALGO, "doq: stream reset for " 4413 "unknown stream %d", (int)stream_id); 4414 return 0; 4415 } 4416 if(!doq_stream_close(doq_conn, stream, 0)) 4417 return NGTCP2_ERR_CALLBACK_FAILURE; 4418 return 0; 4419 } 4420 4421 /** ngtcp2 acked_stream_data_offset callback function */ 4422 static int 4423 doq_acked_stream_data_offset_cb(ngtcp2_conn* ATTR_UNUSED(conn), 4424 int64_t stream_id, uint64_t offset, uint64_t datalen, void* user_data, 4425 void* ATTR_UNUSED(stream_user_data)) 4426 { 4427 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4428 struct doq_stream* stream; 4429 verbose(VERB_ALGO, "doq stream acked data for stream id %d offset %d " 4430 "datalen %d", (int)stream_id, (int)offset, (int)datalen); 4431 4432 stream = doq_stream_find(doq_conn, stream_id); 4433 if(!stream) { 4434 verbose(VERB_ALGO, "doq: stream acked data for " 4435 "unknown stream %d", (int)stream_id); 4436 return 0; 4437 } 4438 /* Acked the data from [offset .. offset+datalen). */ 4439 if(stream->is_closed) 4440 return 0; 4441 if(offset+datalen >= stream->outlen) { 4442 doq_stream_remove_in_buffer(stream, 4443 doq_conn->doq_socket->table); 4444 doq_stream_remove_out_buffer(stream, 4445 doq_conn->doq_socket->table); 4446 } 4447 return 0; 4448 } 4449 4450 /** ngtc2p log_printf callback function */ 4451 static void 4452 doq_log_printf_cb(void* ATTR_UNUSED(user_data), const char* fmt, ...) 4453 { 4454 char buf[1024]; 4455 va_list ap; 4456 va_start(ap, fmt); 4457 vsnprintf(buf, sizeof(buf), fmt, ap); 4458 verbose(VERB_ALGO, "libngtcp2: %s", buf); 4459 va_end(ap); 4460 } 4461 4462 #ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT 4463 /** the doq application tx key callback, false on failure */ 4464 static int 4465 doq_application_tx_key_cb(struct doq_conn* conn) 4466 { 4467 verbose(VERB_ALGO, "doq application tx key cb"); 4468 /* The server does not want to open streams to the client, 4469 * the client instead initiates by opening bidi streams. */ 4470 verbose(VERB_ALGO, "doq ngtcp2_conn_get_max_data_left is %d", 4471 (int)ngtcp2_conn_get_max_data_left(conn->conn)); 4472 #ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI 4473 verbose(VERB_ALGO, "doq ngtcp2_conn_get_max_local_streams_uni is %d", 4474 (int)ngtcp2_conn_get_max_local_streams_uni(conn->conn)); 4475 #endif 4476 verbose(VERB_ALGO, "doq ngtcp2_conn_get_streams_uni_left is %d", 4477 (int)ngtcp2_conn_get_streams_uni_left(conn->conn)); 4478 verbose(VERB_ALGO, "doq ngtcp2_conn_get_streams_bidi_left is %d", 4479 (int)ngtcp2_conn_get_streams_bidi_left(conn->conn)); 4480 return 1; 4481 } 4482 4483 /** quic_method set_encryption_secrets function */ 4484 static int 4485 doq_set_encryption_secrets(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level, 4486 const uint8_t *read_secret, const uint8_t *write_secret, 4487 size_t secret_len) 4488 { 4489 struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl); 4490 #ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL 4491 ngtcp2_encryption_level 4492 #else 4493 ngtcp2_crypto_level 4494 #endif 4495 level = 4496 #ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL 4497 ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level); 4498 #else 4499 ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level); 4500 #endif 4501 4502 if(read_secret) { 4503 verbose(VERB_ALGO, "doq: ngtcp2_crypto_derive_and_install_rx_key for level %d ossl %d", (int)level, (int)ossl_level); 4504 if(ngtcp2_crypto_derive_and_install_rx_key(doq_conn->conn, 4505 NULL, NULL, NULL, level, read_secret, secret_len) 4506 != 0) { 4507 log_err("ngtcp2_crypto_derive_and_install_rx_key " 4508 "failed"); 4509 return 0; 4510 } 4511 } 4512 4513 if(write_secret) { 4514 verbose(VERB_ALGO, "doq: ngtcp2_crypto_derive_and_install_tx_key for level %d ossl %d", (int)level, (int)ossl_level); 4515 if(ngtcp2_crypto_derive_and_install_tx_key(doq_conn->conn, 4516 NULL, NULL, NULL, level, write_secret, secret_len) 4517 != 0) { 4518 log_err("ngtcp2_crypto_derive_and_install_tx_key " 4519 "failed"); 4520 return 0; 4521 } 4522 if(level == NGTCP2_CRYPTO_LEVEL_APPLICATION) { 4523 if(!doq_application_tx_key_cb(doq_conn)) 4524 return 0; 4525 } 4526 } 4527 return 1; 4528 } 4529 4530 /** quic_method add_handshake_data function */ 4531 static int 4532 doq_add_handshake_data(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level, 4533 const uint8_t *data, size_t len) 4534 { 4535 struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl); 4536 #ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL 4537 ngtcp2_encryption_level 4538 #else 4539 ngtcp2_crypto_level 4540 #endif 4541 level = 4542 #ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL 4543 ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level); 4544 #else 4545 ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level); 4546 #endif 4547 int rv; 4548 4549 verbose(VERB_ALGO, "doq_add_handshake_data: " 4550 "ngtcp2_con_submit_crypto_data level %d", (int)level); 4551 rv = ngtcp2_conn_submit_crypto_data(doq_conn->conn, level, data, len); 4552 if(rv != 0) { 4553 log_err("ngtcp2_conn_submit_crypto_data failed: %s", 4554 ngtcp2_strerror(rv)); 4555 ngtcp2_conn_set_tls_error(doq_conn->conn, rv); 4556 return 0; 4557 } 4558 return 1; 4559 } 4560 4561 /** quic_method flush_flight function */ 4562 static int 4563 doq_flush_flight(SSL* ATTR_UNUSED(ssl)) 4564 { 4565 return 1; 4566 } 4567 4568 /** quic_method send_alert function */ 4569 static int 4570 doq_send_alert(SSL *ssl, enum ssl_encryption_level_t ATTR_UNUSED(level), 4571 uint8_t alert) 4572 { 4573 struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl); 4574 doq_conn->tls_alert = alert; 4575 return 1; 4576 } 4577 #endif /* HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT */ 4578 4579 /** ALPN select callback for the doq SSL context */ 4580 static int 4581 doq_alpn_select_cb(SSL* ATTR_UNUSED(ssl), const unsigned char** out, 4582 unsigned char* outlen, const unsigned char* in, unsigned int inlen, 4583 void* ATTR_UNUSED(arg)) 4584 { 4585 /* select "doq" */ 4586 int ret = SSL_select_next_proto((void*)out, outlen, 4587 (const unsigned char*)"\x03""doq", 4, in, inlen); 4588 if(ret == OPENSSL_NPN_NEGOTIATED) 4589 return SSL_TLSEXT_ERR_OK; 4590 verbose(VERB_ALGO, "doq alpn_select_cb: ALPN from client does " 4591 "not have 'doq'"); 4592 return SSL_TLSEXT_ERR_ALERT_FATAL; 4593 } 4594 4595 void* quic_sslctx_create(char* key, char* pem, char* verifypem) 4596 { 4597 #ifdef HAVE_NGTCP2 4598 char* sid_ctx = "unbound server"; 4599 #ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT 4600 SSL_QUIC_METHOD* quic_method; 4601 #endif 4602 SSL_CTX* ctx = SSL_CTX_new(TLS_server_method()); 4603 if(!ctx) { 4604 log_crypto_err("Could not SSL_CTX_new"); 4605 return NULL; 4606 } 4607 if(!key || key[0] == 0) { 4608 log_err("doq: error, no tls-service-key file specified"); 4609 SSL_CTX_free(ctx); 4610 return NULL; 4611 } 4612 if(!pem || pem[0] == 0) { 4613 log_err("doq: error, no tls-service-pem file specified"); 4614 SSL_CTX_free(ctx); 4615 return NULL; 4616 } 4617 SSL_CTX_set_options(ctx, 4618 (SSL_OP_ALL & ~SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS) | 4619 SSL_OP_SINGLE_ECDH_USE | 4620 SSL_OP_CIPHER_SERVER_PREFERENCE | 4621 SSL_OP_NO_ANTI_REPLAY); 4622 SSL_CTX_set_mode(ctx, SSL_MODE_RELEASE_BUFFERS); 4623 SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION); 4624 SSL_CTX_set_max_proto_version(ctx, TLS1_3_VERSION); 4625 #ifdef HAVE_SSL_CTX_SET_ALPN_SELECT_CB 4626 SSL_CTX_set_alpn_select_cb(ctx, doq_alpn_select_cb, NULL); 4627 #endif 4628 SSL_CTX_set_default_verify_paths(ctx); 4629 if(!SSL_CTX_use_certificate_chain_file(ctx, pem)) { 4630 log_err("doq: error for cert file: %s", pem); 4631 log_crypto_err("doq: error in " 4632 "SSL_CTX_use_certificate_chain_file"); 4633 SSL_CTX_free(ctx); 4634 return NULL; 4635 } 4636 if(!SSL_CTX_use_PrivateKey_file(ctx, key, SSL_FILETYPE_PEM)) { 4637 log_err("doq: error for private key file: %s", key); 4638 log_crypto_err("doq: error in SSL_CTX_use_PrivateKey_file"); 4639 SSL_CTX_free(ctx); 4640 return NULL; 4641 } 4642 if(!SSL_CTX_check_private_key(ctx)) { 4643 log_err("doq: error for key file: %s", key); 4644 log_crypto_err("doq: error in SSL_CTX_check_private_key"); 4645 SSL_CTX_free(ctx); 4646 return NULL; 4647 } 4648 SSL_CTX_set_session_id_context(ctx, (void*)sid_ctx, strlen(sid_ctx)); 4649 if(verifypem && verifypem[0]) { 4650 if(!SSL_CTX_load_verify_locations(ctx, verifypem, NULL)) { 4651 log_err("doq: error for verify pem file: %s", 4652 verifypem); 4653 log_crypto_err("doq: error in " 4654 "SSL_CTX_load_verify_locations"); 4655 SSL_CTX_free(ctx); 4656 return NULL; 4657 } 4658 SSL_CTX_set_client_CA_list(ctx, SSL_load_client_CA_file( 4659 verifypem)); 4660 SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER| 4661 SSL_VERIFY_CLIENT_ONCE| 4662 SSL_VERIFY_FAIL_IF_NO_PEER_CERT, NULL); 4663 } 4664 4665 SSL_CTX_set_max_early_data(ctx, 0xffffffff); 4666 #ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT 4667 if(ngtcp2_crypto_quictls_configure_server_context(ctx) != 0) { 4668 log_err("ngtcp2_crypto_quictls_configure_server_context failed"); 4669 SSL_CTX_free(ctx); 4670 return NULL; 4671 } 4672 #else /* HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT */ 4673 /* The quic_method needs to remain valid during the SSL_CTX 4674 * lifetime, so we allocate it. It is freed with the 4675 * doq_server_socket. */ 4676 quic_method = calloc(1, sizeof(SSL_QUIC_METHOD)); 4677 if(!quic_method) { 4678 log_err("calloc failed: out of memory"); 4679 SSL_CTX_free(ctx); 4680 return NULL; 4681 } 4682 doq_socket->quic_method = quic_method; 4683 quic_method->set_encryption_secrets = doq_set_encryption_secrets; 4684 quic_method->add_handshake_data = doq_add_handshake_data; 4685 quic_method->flush_flight = doq_flush_flight; 4686 quic_method->send_alert = doq_send_alert; 4687 SSL_CTX_set_quic_method(ctx, doq_socket->quic_method); 4688 #endif 4689 return ctx; 4690 #else /* HAVE_NGTCP2 */ 4691 (void)key; (void)pem; (void)verifypem; 4692 return NULL; 4693 #endif /* HAVE_NGTCP2 */ 4694 } 4695 4696 /** Get the ngtcp2_conn from ssl userdata of type ngtcp2_conn_ref */ 4697 static ngtcp2_conn* doq_conn_ref_get_conn(ngtcp2_crypto_conn_ref* conn_ref) 4698 { 4699 struct doq_conn* conn = (struct doq_conn*)conn_ref->user_data; 4700 return conn->conn; 4701 } 4702 4703 /** create new SSL session for server connection */ 4704 static SSL* 4705 doq_ssl_server_setup(SSL_CTX* ctx, struct doq_conn* conn) 4706 { 4707 SSL* ssl = SSL_new(ctx); 4708 if(!ssl) { 4709 log_crypto_err("doq: SSL_new failed"); 4710 return NULL; 4711 } 4712 #ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT 4713 conn->conn_ref.get_conn = &doq_conn_ref_get_conn; 4714 conn->conn_ref.user_data = conn; 4715 SSL_set_app_data(ssl, &conn->conn_ref); 4716 #else 4717 SSL_set_app_data(ssl, conn); 4718 #endif 4719 SSL_set_accept_state(ssl); 4720 SSL_set_quic_early_data_enabled(ssl, 1); 4721 return ssl; 4722 } 4723 4724 int 4725 doq_conn_setup(struct doq_conn* conn, uint8_t* scid, size_t scidlen, 4726 uint8_t* ocid, size_t ocidlen, const uint8_t* token, size_t tokenlen) 4727 { 4728 int rv; 4729 struct ngtcp2_cid dcid, sv_scid, scid_cid; 4730 struct ngtcp2_path path; 4731 struct ngtcp2_callbacks callbacks; 4732 struct ngtcp2_settings settings; 4733 struct ngtcp2_transport_params params; 4734 memset(&dcid, 0, sizeof(dcid)); 4735 memset(&sv_scid, 0, sizeof(sv_scid)); 4736 memset(&scid_cid, 0, sizeof(scid_cid)); 4737 memset(&path, 0, sizeof(path)); 4738 memset(&callbacks, 0, sizeof(callbacks)); 4739 memset(&settings, 0, sizeof(settings)); 4740 memset(¶ms, 0, sizeof(params)); 4741 4742 ngtcp2_cid_init(&scid_cid, scid, scidlen); 4743 ngtcp2_cid_init(&dcid, conn->key.dcid, conn->key.dcidlen); 4744 4745 path.remote.addr = (struct sockaddr*)&conn->key.paddr.addr; 4746 path.remote.addrlen = conn->key.paddr.addrlen; 4747 path.local.addr = (struct sockaddr*)&conn->key.paddr.localaddr; 4748 path.local.addrlen = conn->key.paddr.localaddrlen; 4749 4750 callbacks.recv_client_initial = ngtcp2_crypto_recv_client_initial_cb; 4751 callbacks.recv_crypto_data = ngtcp2_crypto_recv_crypto_data_cb; 4752 callbacks.encrypt = ngtcp2_crypto_encrypt_cb; 4753 callbacks.decrypt = ngtcp2_crypto_decrypt_cb; 4754 callbacks.hp_mask = ngtcp2_crypto_hp_mask; 4755 callbacks.update_key = ngtcp2_crypto_update_key_cb; 4756 callbacks.delete_crypto_aead_ctx = 4757 ngtcp2_crypto_delete_crypto_aead_ctx_cb; 4758 callbacks.delete_crypto_cipher_ctx = 4759 ngtcp2_crypto_delete_crypto_cipher_ctx_cb; 4760 callbacks.get_path_challenge_data = 4761 ngtcp2_crypto_get_path_challenge_data_cb; 4762 callbacks.version_negotiation = ngtcp2_crypto_version_negotiation_cb; 4763 callbacks.rand = doq_rand_cb; 4764 callbacks.get_new_connection_id = doq_get_new_connection_id_cb; 4765 callbacks.remove_connection_id = doq_remove_connection_id_cb; 4766 callbacks.handshake_completed = doq_handshake_completed_cb; 4767 callbacks.stream_open = doq_stream_open_cb; 4768 callbacks.stream_close = doq_stream_close_cb; 4769 callbacks.stream_reset = doq_stream_reset_cb; 4770 callbacks.acked_stream_data_offset = doq_acked_stream_data_offset_cb; 4771 callbacks.recv_stream_data = doq_recv_stream_data_cb; 4772 4773 ngtcp2_settings_default(&settings); 4774 if(verbosity >= VERB_ALGO) { 4775 settings.log_printf = doq_log_printf_cb; 4776 } 4777 settings.rand_ctx.native_handle = conn->doq_socket->rnd; 4778 settings.initial_ts = doq_get_timestamp_nanosec(); 4779 settings.max_stream_window = 6*1024*1024; 4780 settings.max_window = 6*1024*1024; 4781 #ifdef HAVE_STRUCT_NGTCP2_SETTINGS_TOKENLEN 4782 settings.token = (void*)token; 4783 settings.tokenlen = tokenlen; 4784 #else 4785 settings.token.base = (void*)token; 4786 settings.token.len = tokenlen; 4787 #endif 4788 4789 ngtcp2_transport_params_default(¶ms); 4790 params.max_idle_timeout = conn->doq_socket->idle_timeout; 4791 params.active_connection_id_limit = 7; 4792 params.initial_max_stream_data_bidi_local = 256*1024; 4793 params.initial_max_stream_data_bidi_remote = 256*1024; 4794 params.initial_max_data = 1024*1024; 4795 /* DoQ uses bidi streams, so we allow 0 uni streams. */ 4796 params.initial_max_streams_uni = 0; 4797 /* Initial max on number of bidi streams the remote end can open. 4798 * That is the number of queries it can make, at first. */ 4799 params.initial_max_streams_bidi = 10; 4800 if(ocid) { 4801 ngtcp2_cid_init(¶ms.original_dcid, ocid, ocidlen); 4802 ngtcp2_cid_init(¶ms.retry_scid, conn->key.dcid, 4803 conn->key.dcidlen); 4804 params.retry_scid_present = 1; 4805 } else { 4806 ngtcp2_cid_init(¶ms.original_dcid, conn->key.dcid, 4807 conn->key.dcidlen); 4808 } 4809 #ifdef HAVE_STRUCT_NGTCP2_TRANSPORT_PARAMS_ORIGINAL_DCID_PRESENT 4810 params.original_dcid_present = 1; 4811 #endif 4812 doq_fill_rand(conn->doq_socket->rnd, params.stateless_reset_token, 4813 sizeof(params.stateless_reset_token)); 4814 sv_scid.datalen = conn->doq_socket->sv_scidlen; 4815 lock_rw_wrlock(&conn->table->conid_lock); 4816 if(!doq_conn_generate_new_conid(conn, sv_scid.data, sv_scid.datalen)) { 4817 lock_rw_unlock(&conn->table->conid_lock); 4818 return 0; 4819 } 4820 4821 rv = ngtcp2_conn_server_new(&conn->conn, &scid_cid, &sv_scid, &path, 4822 conn->version, &callbacks, &settings, ¶ms, NULL, conn); 4823 if(rv != 0) { 4824 lock_rw_unlock(&conn->table->conid_lock); 4825 log_err("ngtcp2_conn_server_new failed: %s", 4826 ngtcp2_strerror(rv)); 4827 return 0; 4828 } 4829 if(!doq_conn_setup_conids(conn)) { 4830 lock_rw_unlock(&conn->table->conid_lock); 4831 log_err("doq_conn_setup_conids failed: out of memory"); 4832 return 0; 4833 } 4834 lock_rw_unlock(&conn->table->conid_lock); 4835 conn->ssl = doq_ssl_server_setup((SSL_CTX*)conn->doq_socket->ctx, 4836 conn); 4837 if(!conn->ssl) { 4838 log_err("doq_ssl_server_setup failed"); 4839 return 0; 4840 } 4841 ngtcp2_conn_set_tls_native_handle(conn->conn, conn->ssl); 4842 doq_conn_write_enable(conn); 4843 return 1; 4844 } 4845 4846 struct doq_conid* 4847 doq_conid_find(struct doq_table* table, const uint8_t* data, size_t datalen) 4848 { 4849 struct rbnode_type* node; 4850 struct doq_conid key; 4851 key.node.key = &key; 4852 key.cid = (void*)data; 4853 key.cidlen = datalen; 4854 node = rbtree_search(table->conid_tree, &key); 4855 if(node) 4856 return (struct doq_conid*)node->key; 4857 return NULL; 4858 } 4859 4860 /** insert conid in the conid list */ 4861 static void 4862 doq_conid_list_insert(struct doq_conn* conn, struct doq_conid* conid) 4863 { 4864 conid->prev = NULL; 4865 conid->next = conn->conid_list; 4866 if(conn->conid_list) 4867 conn->conid_list->prev = conid; 4868 conn->conid_list = conid; 4869 } 4870 4871 /** remove conid from the conid list */ 4872 static void 4873 doq_conid_list_remove(struct doq_conn* conn, struct doq_conid* conid) 4874 { 4875 if(conid->prev) 4876 conid->prev->next = conid->next; 4877 else conn->conid_list = conid->next; 4878 if(conid->next) 4879 conid->next->prev = conid->prev; 4880 } 4881 4882 /** create a doq_conid */ 4883 static struct doq_conid* 4884 doq_conid_create(uint8_t* data, size_t datalen, struct doq_conn_key* key) 4885 { 4886 struct doq_conid* conid; 4887 conid = calloc(1, sizeof(*conid)); 4888 if(!conid) 4889 return NULL; 4890 conid->cid = memdup(data, datalen); 4891 if(!conid->cid) { 4892 free(conid); 4893 return NULL; 4894 } 4895 conid->cidlen = datalen; 4896 conid->node.key = conid; 4897 conid->key = *key; 4898 conid->key.dcid = memdup(key->dcid, key->dcidlen); 4899 if(!conid->key.dcid) { 4900 free(conid->cid); 4901 free(conid); 4902 return NULL; 4903 } 4904 return conid; 4905 } 4906 4907 void 4908 doq_conid_delete(struct doq_conid* conid) 4909 { 4910 if(!conid) 4911 return; 4912 free(conid->key.dcid); 4913 free(conid->cid); 4914 free(conid); 4915 } 4916 4917 /** return true if the conid is for the conn. */ 4918 static int 4919 conid_is_for_conn(struct doq_conn* conn, struct doq_conid* conid) 4920 { 4921 if(conid->key.dcidlen == conn->key.dcidlen && 4922 memcmp(conid->key.dcid, conn->key.dcid, conid->key.dcidlen)==0 4923 && conid->key.paddr.addrlen == conn->key.paddr.addrlen && 4924 memcmp(&conid->key.paddr.addr, &conn->key.paddr.addr, 4925 conid->key.paddr.addrlen) == 0 && 4926 conid->key.paddr.localaddrlen == conn->key.paddr.localaddrlen && 4927 memcmp(&conid->key.paddr.localaddr, &conn->key.paddr.localaddr, 4928 conid->key.paddr.localaddrlen) == 0 && 4929 conid->key.paddr.ifindex == conn->key.paddr.ifindex) 4930 return 1; 4931 return 0; 4932 } 4933 4934 int 4935 doq_conn_associate_conid(struct doq_conn* conn, uint8_t* data, size_t datalen) 4936 { 4937 struct doq_conid* conid; 4938 conid = doq_conid_find(conn->table, data, datalen); 4939 if(conid && !conid_is_for_conn(conn, conid)) { 4940 verbose(VERB_ALGO, "doq connection id already exists for " 4941 "another doq_conn. Ignoring second connection id."); 4942 /* Already exists to another conn, ignore it. 4943 * This works, in that the conid is listed in the doq_conn 4944 * conid_list element, and removed from there. So our conid 4945 * tree and list are fine, when created and removed. 4946 * The tree now does not have the lookup element pointing 4947 * to this connection. */ 4948 return 1; 4949 } 4950 if(conid) 4951 return 1; /* already inserted */ 4952 conid = doq_conid_create(data, datalen, &conn->key); 4953 if(!conid) 4954 return 0; 4955 doq_conid_list_insert(conn, conid); 4956 (void)rbtree_insert(conn->table->conid_tree, &conid->node); 4957 return 1; 4958 } 4959 4960 void 4961 doq_conn_dissociate_conid(struct doq_conn* conn, const uint8_t* data, 4962 size_t datalen) 4963 { 4964 struct doq_conid* conid; 4965 conid = doq_conid_find(conn->table, data, datalen); 4966 if(conid && !conid_is_for_conn(conn, conid)) 4967 return; 4968 if(conid) { 4969 (void)rbtree_delete(conn->table->conid_tree, 4970 conid->node.key); 4971 doq_conid_list_remove(conn, conid); 4972 doq_conid_delete(conid); 4973 } 4974 } 4975 4976 /** associate the scid array and also the dcid. 4977 * caller must hold the locks on conn and doq_table.conid_lock. */ 4978 static int 4979 doq_conn_setup_id_array_and_dcid(struct doq_conn* conn, 4980 struct ngtcp2_cid* scids, size_t num_scid) 4981 { 4982 size_t i; 4983 for(i=0; i<num_scid; i++) { 4984 if(!doq_conn_associate_conid(conn, scids[i].data, 4985 scids[i].datalen)) 4986 return 0; 4987 } 4988 if(!doq_conn_associate_conid(conn, conn->key.dcid, conn->key.dcidlen)) 4989 return 0; 4990 return 1; 4991 } 4992 4993 int 4994 doq_conn_setup_conids(struct doq_conn* conn) 4995 { 4996 size_t num_scid = 4997 #ifndef HAVE_NGTCP2_CONN_GET_NUM_SCID 4998 ngtcp2_conn_get_scid(conn->conn, NULL); 4999 #else 5000 ngtcp2_conn_get_num_scid(conn->conn); 5001 #endif 5002 if(num_scid <= 4) { 5003 struct ngtcp2_cid ids[4]; 5004 /* Usually there are not that many scids when just accepted, 5005 * like only 2. */ 5006 ngtcp2_conn_get_scid(conn->conn, ids); 5007 return doq_conn_setup_id_array_and_dcid(conn, ids, num_scid); 5008 } else { 5009 struct ngtcp2_cid *scids = calloc(num_scid, 5010 sizeof(struct ngtcp2_cid)); 5011 if(!scids) 5012 return 0; 5013 ngtcp2_conn_get_scid(conn->conn, scids); 5014 if(!doq_conn_setup_id_array_and_dcid(conn, scids, num_scid)) { 5015 free(scids); 5016 return 0; 5017 } 5018 free(scids); 5019 } 5020 return 1; 5021 } 5022 5023 void 5024 doq_conn_clear_conids(struct doq_conn* conn) 5025 { 5026 struct doq_conid* p, *next; 5027 if(!conn) 5028 return; 5029 p = conn->conid_list; 5030 while(p) { 5031 next = p->next; 5032 (void)rbtree_delete(conn->table->conid_tree, p->node.key); 5033 doq_conid_delete(p); 5034 p = next; 5035 } 5036 conn->conid_list = NULL; 5037 } 5038 5039 ngtcp2_tstamp doq_get_timestamp_nanosec(void) 5040 { 5041 #ifdef CLOCK_REALTIME 5042 struct timespec tp; 5043 memset(&tp, 0, sizeof(tp)); 5044 /* Get a nanosecond time, that can be compared with the event base. */ 5045 if(clock_gettime(CLOCK_REALTIME, &tp) == -1) { 5046 log_err("clock_gettime failed: %s", strerror(errno)); 5047 } 5048 return ((uint64_t)tp.tv_sec)*((uint64_t)1000000000) + 5049 ((uint64_t)tp.tv_nsec); 5050 #else 5051 struct timeval tv; 5052 if(gettimeofday(&tv, NULL) < 0) { 5053 log_err("gettimeofday failed: %s", strerror(errno)); 5054 } 5055 return ((uint64_t)tv.tv_sec)*((uint64_t)1000000000) + 5056 ((uint64_t)tv.tv_usec)*((uint64_t)1000); 5057 #endif /* CLOCK_REALTIME */ 5058 } 5059 5060 /** doq start the closing period for the connection. */ 5061 static int 5062 doq_conn_start_closing_period(struct comm_point* c, struct doq_conn* conn) 5063 { 5064 struct ngtcp2_path_storage ps; 5065 struct ngtcp2_pkt_info pi; 5066 ngtcp2_ssize ret; 5067 if(!conn) 5068 return 1; 5069 if( 5070 #ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD 5071 ngtcp2_conn_in_closing_period(conn->conn) 5072 #else 5073 ngtcp2_conn_is_in_closing_period(conn->conn) 5074 #endif 5075 ) 5076 return 1; 5077 if( 5078 #ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD 5079 ngtcp2_conn_in_draining_period(conn->conn) 5080 #else 5081 ngtcp2_conn_is_in_draining_period(conn->conn) 5082 #endif 5083 ) { 5084 doq_conn_write_disable(conn); 5085 return 1; 5086 } 5087 ngtcp2_path_storage_zero(&ps); 5088 sldns_buffer_clear(c->doq_socket->pkt_buf); 5089 /* the call to ngtcp2_conn_write_connection_close causes the 5090 * conn to be closed. It is now in the closing period. */ 5091 ret = ngtcp2_conn_write_connection_close(conn->conn, &ps.path, 5092 &pi, sldns_buffer_begin(c->doq_socket->pkt_buf), 5093 sldns_buffer_remaining(c->doq_socket->pkt_buf), 5094 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5095 &conn->ccerr 5096 #else 5097 &conn->last_error 5098 #endif 5099 , doq_get_timestamp_nanosec()); 5100 if(ret < 0) { 5101 log_err("doq ngtcp2_conn_write_connection_close failed: %s", 5102 ngtcp2_strerror(ret)); 5103 return 0; 5104 } 5105 if(ret == 0) { 5106 return 0; 5107 } 5108 sldns_buffer_set_position(c->doq_socket->pkt_buf, ret); 5109 sldns_buffer_flip(c->doq_socket->pkt_buf); 5110 5111 /* The close packet is allocated, because it may have to be repeated. 5112 * When incoming packets have this connection dcid. */ 5113 conn->close_pkt = memdup(sldns_buffer_begin(c->doq_socket->pkt_buf), 5114 sldns_buffer_limit(c->doq_socket->pkt_buf)); 5115 if(!conn->close_pkt) { 5116 log_err("doq: could not allocate close packet: out of memory"); 5117 return 0; 5118 } 5119 conn->close_pkt_len = sldns_buffer_limit(c->doq_socket->pkt_buf); 5120 conn->close_ecn = pi.ecn; 5121 return 1; 5122 } 5123 5124 /** doq send the close packet for the connection, perhaps again. */ 5125 int 5126 doq_conn_send_close(struct comm_point* c, struct doq_conn* conn) 5127 { 5128 if(!conn) 5129 return 0; 5130 if(!conn->close_pkt) 5131 return 0; 5132 if(conn->close_pkt_len > sldns_buffer_capacity(c->doq_socket->pkt_buf)) 5133 return 0; 5134 sldns_buffer_clear(c->doq_socket->pkt_buf); 5135 sldns_buffer_write(c->doq_socket->pkt_buf, conn->close_pkt, conn->close_pkt_len); 5136 sldns_buffer_flip(c->doq_socket->pkt_buf); 5137 verbose(VERB_ALGO, "doq send connection close"); 5138 doq_send_pkt(c, &conn->key.paddr, conn->close_ecn); 5139 doq_conn_write_disable(conn); 5140 return 1; 5141 } 5142 5143 /** doq close the connection on error. If it returns a failure, it 5144 * does not wait to send a close, and the connection can be dropped. */ 5145 static int 5146 doq_conn_close_error(struct comm_point* c, struct doq_conn* conn) 5147 { 5148 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5149 if(conn->ccerr.type == NGTCP2_CCERR_TYPE_IDLE_CLOSE) 5150 return 0; 5151 #else 5152 if(conn->last_error.type == 5153 NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_IDLE_CLOSE) 5154 return 0; 5155 #endif 5156 if(!doq_conn_start_closing_period(c, conn)) 5157 return 0; 5158 if( 5159 #ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD 5160 ngtcp2_conn_in_draining_period(conn->conn) 5161 #else 5162 ngtcp2_conn_is_in_draining_period(conn->conn) 5163 #endif 5164 ) { 5165 doq_conn_write_disable(conn); 5166 return 1; 5167 } 5168 doq_conn_write_enable(conn); 5169 if(!doq_conn_send_close(c, conn)) 5170 return 0; 5171 return 1; 5172 } 5173 5174 int 5175 doq_conn_recv(struct comm_point* c, struct doq_pkt_addr* paddr, 5176 struct doq_conn* conn, struct ngtcp2_pkt_info* pi, int* err_retry, 5177 int* err_drop) 5178 { 5179 int ret; 5180 ngtcp2_tstamp ts; 5181 struct ngtcp2_path path; 5182 memset(&path, 0, sizeof(path)); 5183 path.remote.addr = (struct sockaddr*)&paddr->addr; 5184 path.remote.addrlen = paddr->addrlen; 5185 path.local.addr = (struct sockaddr*)&paddr->localaddr; 5186 path.local.addrlen = paddr->localaddrlen; 5187 ts = doq_get_timestamp_nanosec(); 5188 5189 ret = ngtcp2_conn_read_pkt(conn->conn, &path, pi, 5190 sldns_buffer_begin(c->doq_socket->pkt_buf), 5191 sldns_buffer_limit(c->doq_socket->pkt_buf), ts); 5192 if(ret != 0) { 5193 if(err_retry) 5194 *err_retry = 0; 5195 if(err_drop) 5196 *err_drop = 0; 5197 if(ret == NGTCP2_ERR_DRAINING) { 5198 verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s", 5199 ngtcp2_strerror(ret)); 5200 doq_conn_write_disable(conn); 5201 return 0; 5202 } else if(ret == NGTCP2_ERR_DROP_CONN) { 5203 verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s", 5204 ngtcp2_strerror(ret)); 5205 if(err_drop) 5206 *err_drop = 1; 5207 return 0; 5208 } else if(ret == NGTCP2_ERR_RETRY) { 5209 verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s", 5210 ngtcp2_strerror(ret)); 5211 if(err_retry) 5212 *err_retry = 1; 5213 if(err_drop) 5214 *err_drop = 1; 5215 return 0; 5216 } else if(ret == NGTCP2_ERR_CRYPTO) { 5217 if( 5218 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5219 !conn->ccerr.error_code 5220 #else 5221 !conn->last_error.error_code 5222 #endif 5223 ) { 5224 /* in picotls the tls alert may need to be 5225 * copied, but this is with openssl. And there 5226 * is conn->tls_alert. */ 5227 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5228 ngtcp2_ccerr_set_tls_alert(&conn->ccerr, 5229 conn->tls_alert, NULL, 0); 5230 #else 5231 ngtcp2_connection_close_error_set_transport_error_tls_alert( 5232 &conn->last_error, conn->tls_alert, 5233 NULL, 0); 5234 #endif 5235 } 5236 } else { 5237 if( 5238 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5239 !conn->ccerr.error_code 5240 #else 5241 !conn->last_error.error_code 5242 #endif 5243 ) { 5244 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5245 ngtcp2_ccerr_set_liberr(&conn->ccerr, ret, 5246 NULL, 0); 5247 #else 5248 ngtcp2_connection_close_error_set_transport_error_liberr( 5249 &conn->last_error, ret, NULL, 0); 5250 #endif 5251 } 5252 } 5253 log_err("ngtcp2_conn_read_pkt failed: %s", 5254 ngtcp2_strerror(ret)); 5255 if(!doq_conn_close_error(c, conn)) { 5256 if(err_drop) 5257 *err_drop = 1; 5258 } 5259 return 0; 5260 } 5261 doq_conn_write_enable(conn); 5262 return 1; 5263 } 5264 5265 /** doq stream write is done */ 5266 static void 5267 doq_stream_write_is_done(struct doq_conn* conn, struct doq_stream* stream) 5268 { 5269 /* Cannot deallocate, the buffer may be needed for resends. */ 5270 doq_stream_off_write_list(conn, stream); 5271 } 5272 5273 int 5274 doq_conn_write_streams(struct comm_point* c, struct doq_conn* conn, 5275 int* err_drop) 5276 { 5277 struct doq_stream* stream = conn->stream_write_first; 5278 ngtcp2_path_storage ps; 5279 ngtcp2_tstamp ts = doq_get_timestamp_nanosec(); 5280 size_t num_packets = 0, max_packets = 65535; 5281 ngtcp2_path_storage_zero(&ps); 5282 5283 for(;;) { 5284 int64_t stream_id; 5285 uint32_t flags = 0; 5286 ngtcp2_pkt_info pi; 5287 ngtcp2_vec datav[2]; 5288 size_t datav_count = 0; 5289 ngtcp2_ssize ret, ndatalen = 0; 5290 int fin; 5291 5292 if(stream) { 5293 /* data to send */ 5294 verbose(VERB_ALGO, "doq: doq_conn write stream %d", 5295 (int)stream->stream_id); 5296 stream_id = stream->stream_id; 5297 fin = 1; 5298 if(stream->nwrite < 2) { 5299 datav[0].base = ((uint8_t*)&stream-> 5300 outlen_wire) + stream->nwrite; 5301 datav[0].len = 2 - stream->nwrite; 5302 datav[1].base = stream->out; 5303 datav[1].len = stream->outlen; 5304 datav_count = 2; 5305 } else { 5306 datav[0].base = stream->out + 5307 (stream->nwrite-2); 5308 datav[0].len = stream->outlen - 5309 (stream->nwrite-2); 5310 datav_count = 1; 5311 } 5312 } else { 5313 /* no data to send */ 5314 verbose(VERB_ALGO, "doq: doq_conn write stream -1"); 5315 stream_id = -1; 5316 fin = 0; 5317 datav[0].base = NULL; 5318 datav[0].len = 0; 5319 datav_count = 1; 5320 } 5321 5322 /* if more streams, set it to write more */ 5323 if(stream && stream->write_next) 5324 flags |= NGTCP2_WRITE_STREAM_FLAG_MORE; 5325 if(fin) 5326 flags |= NGTCP2_WRITE_STREAM_FLAG_FIN; 5327 5328 sldns_buffer_clear(c->doq_socket->pkt_buf); 5329 ret = ngtcp2_conn_writev_stream(conn->conn, &ps.path, &pi, 5330 sldns_buffer_begin(c->doq_socket->pkt_buf), 5331 sldns_buffer_remaining(c->doq_socket->pkt_buf), 5332 &ndatalen, flags, stream_id, datav, datav_count, ts); 5333 if(ret < 0) { 5334 if(ret == NGTCP2_ERR_WRITE_MORE) { 5335 verbose(VERB_ALGO, "doq: write more, ndatalen %d", (int)ndatalen); 5336 if(stream) { 5337 if(ndatalen >= 0) 5338 stream->nwrite += ndatalen; 5339 if(stream->nwrite >= stream->outlen+2) 5340 doq_stream_write_is_done( 5341 conn, stream); 5342 stream = stream->write_next; 5343 } 5344 continue; 5345 } else if(ret == NGTCP2_ERR_STREAM_DATA_BLOCKED) { 5346 verbose(VERB_ALGO, "doq: ngtcp2_conn_writev_stream returned NGTCP2_ERR_STREAM_DATA_BLOCKED"); 5347 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5348 ngtcp2_ccerr_set_application_error( 5349 &conn->ccerr, -1, NULL, 0); 5350 #else 5351 ngtcp2_connection_close_error_set_application_error(&conn->last_error, -1, NULL, 0); 5352 #endif 5353 if(err_drop) 5354 *err_drop = 0; 5355 if(!doq_conn_close_error(c, conn)) { 5356 if(err_drop) 5357 *err_drop = 1; 5358 } 5359 return 0; 5360 } else if(ret == NGTCP2_ERR_STREAM_SHUT_WR) { 5361 verbose(VERB_ALGO, "doq: ngtcp2_conn_writev_stream returned NGTCP2_ERR_STREAM_SHUT_WR"); 5362 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5363 ngtcp2_ccerr_set_application_error( 5364 &conn->ccerr, -1, NULL, 0); 5365 #else 5366 ngtcp2_connection_close_error_set_application_error(&conn->last_error, -1, NULL, 0); 5367 #endif 5368 if(err_drop) 5369 *err_drop = 0; 5370 if(!doq_conn_close_error(c, conn)) { 5371 if(err_drop) 5372 *err_drop = 1; 5373 } 5374 return 0; 5375 } 5376 5377 log_err("doq: ngtcp2_conn_writev_stream failed: %s", 5378 ngtcp2_strerror(ret)); 5379 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5380 ngtcp2_ccerr_set_liberr(&conn->ccerr, ret, NULL, 0); 5381 #else 5382 ngtcp2_connection_close_error_set_transport_error_liberr( 5383 &conn->last_error, ret, NULL, 0); 5384 #endif 5385 if(err_drop) 5386 *err_drop = 0; 5387 if(!doq_conn_close_error(c, conn)) { 5388 if(err_drop) 5389 *err_drop = 1; 5390 } 5391 return 0; 5392 } 5393 verbose(VERB_ALGO, "doq: writev_stream pkt size %d ndatawritten %d", 5394 (int)ret, (int)ndatalen); 5395 5396 if(ndatalen >= 0 && stream) { 5397 stream->nwrite += ndatalen; 5398 if(stream->nwrite >= stream->outlen+2) 5399 doq_stream_write_is_done(conn, stream); 5400 } 5401 if(ret == 0) { 5402 /* congestion limited */ 5403 doq_conn_write_disable(conn); 5404 ngtcp2_conn_update_pkt_tx_time(conn->conn, ts); 5405 return 1; 5406 } 5407 sldns_buffer_set_position(c->doq_socket->pkt_buf, ret); 5408 sldns_buffer_flip(c->doq_socket->pkt_buf); 5409 doq_send_pkt(c, &conn->key.paddr, pi.ecn); 5410 5411 if(c->doq_socket->have_blocked_pkt) 5412 break; 5413 if(++num_packets == max_packets) 5414 break; 5415 if(stream) 5416 stream = stream->write_next; 5417 } 5418 ngtcp2_conn_update_pkt_tx_time(conn->conn, ts); 5419 return 1; 5420 } 5421 5422 void 5423 doq_conn_write_enable(struct doq_conn* conn) 5424 { 5425 conn->write_interest = 1; 5426 } 5427 5428 void 5429 doq_conn_write_disable(struct doq_conn* conn) 5430 { 5431 conn->write_interest = 0; 5432 } 5433 5434 /** doq append the connection to the write list */ 5435 static void 5436 doq_conn_write_list_append(struct doq_table* table, struct doq_conn* conn) 5437 { 5438 if(conn->on_write_list) 5439 return; 5440 conn->write_prev = table->write_list_last; 5441 if(table->write_list_last) 5442 table->write_list_last->write_next = conn; 5443 else table->write_list_first = conn; 5444 conn->write_next = NULL; 5445 table->write_list_last = conn; 5446 conn->on_write_list = 1; 5447 } 5448 5449 void 5450 doq_conn_write_list_remove(struct doq_table* table, struct doq_conn* conn) 5451 { 5452 if(!conn->on_write_list) 5453 return; 5454 if(conn->write_next) 5455 conn->write_next->write_prev = conn->write_prev; 5456 else table->write_list_last = conn->write_prev; 5457 if(conn->write_prev) 5458 conn->write_prev->write_next = conn->write_next; 5459 else table->write_list_first = conn->write_next; 5460 conn->write_prev = NULL; 5461 conn->write_next = NULL; 5462 conn->on_write_list = 0; 5463 } 5464 5465 void 5466 doq_conn_set_write_list(struct doq_table* table, struct doq_conn* conn) 5467 { 5468 if(conn->write_interest && conn->on_write_list) 5469 return; 5470 if(!conn->write_interest && !conn->on_write_list) 5471 return; 5472 if(conn->write_interest) 5473 doq_conn_write_list_append(table, conn); 5474 else doq_conn_write_list_remove(table, conn); 5475 } 5476 5477 struct doq_conn* 5478 doq_table_pop_first(struct doq_table* table) 5479 { 5480 struct doq_conn* conn = table->write_list_first; 5481 if(!conn) 5482 return NULL; 5483 lock_basic_lock(&conn->lock); 5484 table->write_list_first = conn->write_next; 5485 if(conn->write_next) 5486 conn->write_next->write_prev = NULL; 5487 else table->write_list_last = NULL; 5488 conn->write_next = NULL; 5489 conn->write_prev = NULL; 5490 conn->on_write_list = 0; 5491 return conn; 5492 } 5493 5494 int 5495 doq_conn_check_timer(struct doq_conn* conn, struct timeval* tv) 5496 { 5497 ngtcp2_tstamp expiry = ngtcp2_conn_get_expiry(conn->conn); 5498 ngtcp2_tstamp now = doq_get_timestamp_nanosec(); 5499 ngtcp2_tstamp t; 5500 5501 if(expiry <= now) { 5502 /* The timer has already expired, add with zero timeout. 5503 * This should call the callback straight away. Calling it 5504 * from the event callbacks is cleaner than calling it here, 5505 * because then it is always called with the same locks and 5506 * so on. This routine only has the conn.lock. */ 5507 t = now; 5508 } else { 5509 t = expiry; 5510 } 5511 5512 /* convert to timeval */ 5513 memset(tv, 0, sizeof(*tv)); 5514 tv->tv_sec = t / NGTCP2_SECONDS; 5515 tv->tv_usec = (t / NGTCP2_MICROSECONDS)%1000000; 5516 5517 /* If we already have a timer, is it the right value? */ 5518 if(conn->timer.timer_in_tree || conn->timer.timer_in_list) { 5519 if(conn->timer.time.tv_sec == tv->tv_sec && 5520 conn->timer.time.tv_usec == tv->tv_usec) 5521 return 0; 5522 } 5523 return 1; 5524 } 5525 5526 /* doq print connection log */ 5527 static void 5528 doq_conn_log_line(struct doq_conn* conn, char* s) 5529 { 5530 char remotestr[256], localstr[256]; 5531 addr_to_str((void*)&conn->key.paddr.addr, conn->key.paddr.addrlen, 5532 remotestr, sizeof(remotestr)); 5533 addr_to_str((void*)&conn->key.paddr.localaddr, 5534 conn->key.paddr.localaddrlen, localstr, sizeof(localstr)); 5535 log_info("doq conn %s %s %s", remotestr, localstr, s); 5536 } 5537 5538 int 5539 doq_conn_handle_timeout(struct doq_conn* conn) 5540 { 5541 ngtcp2_tstamp now = doq_get_timestamp_nanosec(); 5542 int rv; 5543 5544 if(verbosity >= VERB_ALGO) 5545 doq_conn_log_line(conn, "timeout"); 5546 5547 rv = ngtcp2_conn_handle_expiry(conn->conn, now); 5548 if(rv != 0) { 5549 verbose(VERB_ALGO, "ngtcp2_conn_handle_expiry failed: %s", 5550 ngtcp2_strerror(rv)); 5551 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5552 ngtcp2_ccerr_set_liberr(&conn->ccerr, rv, NULL, 0); 5553 #else 5554 ngtcp2_connection_close_error_set_transport_error_liberr( 5555 &conn->last_error, rv, NULL, 0); 5556 #endif 5557 if(!doq_conn_close_error(conn->doq_socket->cp, conn)) { 5558 /* failed, return for deletion */ 5559 return 0; 5560 } 5561 return 1; 5562 } 5563 doq_conn_write_enable(conn); 5564 if(!doq_conn_write_streams(conn->doq_socket->cp, conn, NULL)) { 5565 /* failed, return for deletion. */ 5566 return 0; 5567 } 5568 return 1; 5569 } 5570 5571 void 5572 doq_table_quic_size_add(struct doq_table* table, size_t add) 5573 { 5574 lock_basic_lock(&table->size_lock); 5575 table->current_size += add; 5576 lock_basic_unlock(&table->size_lock); 5577 } 5578 5579 void 5580 doq_table_quic_size_subtract(struct doq_table* table, size_t subtract) 5581 { 5582 lock_basic_lock(&table->size_lock); 5583 if(table->current_size < subtract) 5584 table->current_size = 0; 5585 else table->current_size -= subtract; 5586 lock_basic_unlock(&table->size_lock); 5587 } 5588 5589 int 5590 doq_table_quic_size_available(struct doq_table* table, 5591 struct config_file* cfg, size_t mem) 5592 { 5593 size_t cur; 5594 lock_basic_lock(&table->size_lock); 5595 cur = table->current_size; 5596 lock_basic_unlock(&table->size_lock); 5597 5598 if(cur + mem > cfg->quic_size) 5599 return 0; 5600 return 1; 5601 } 5602 5603 size_t doq_table_quic_size_get(struct doq_table* table) 5604 { 5605 size_t sz; 5606 if(!table) 5607 return 0; 5608 lock_basic_lock(&table->size_lock); 5609 sz = table->current_size; 5610 lock_basic_unlock(&table->size_lock); 5611 return sz; 5612 } 5613 #endif /* HAVE_NGTCP2 */ 5614