1 /* 2 * util/netevent.c - event notification 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file contains event notification functions. 40 */ 41 #include "config.h" 42 #include "util/netevent.h" 43 #include "util/ub_event.h" 44 #include "util/log.h" 45 #include "util/net_help.h" 46 #include "util/tcp_conn_limit.h" 47 #include "util/fptr_wlist.h" 48 #include "util/proxy_protocol.h" 49 #include "util/timeval_func.h" 50 #include "sldns/pkthdr.h" 51 #include "sldns/sbuffer.h" 52 #include "sldns/str2wire.h" 53 #include "dnstap/dnstap.h" 54 #include "dnscrypt/dnscrypt.h" 55 #include "services/listen_dnsport.h" 56 #include "util/random.h" 57 #ifdef HAVE_SYS_TYPES_H 58 #include <sys/types.h> 59 #endif 60 #ifdef HAVE_SYS_SOCKET_H 61 #include <sys/socket.h> 62 #endif 63 #ifdef HAVE_NETDB_H 64 #include <netdb.h> 65 #endif 66 #ifdef HAVE_POLL_H 67 #include <poll.h> 68 #endif 69 70 #ifdef HAVE_OPENSSL_SSL_H 71 #include <openssl/ssl.h> 72 #endif 73 #ifdef HAVE_OPENSSL_ERR_H 74 #include <openssl/err.h> 75 #endif 76 77 #ifdef HAVE_NGTCP2 78 #include <ngtcp2/ngtcp2.h> 79 #include <ngtcp2/ngtcp2_crypto.h> 80 #endif 81 82 #ifdef HAVE_LINUX_NET_TSTAMP_H 83 #include <linux/net_tstamp.h> 84 #endif 85 86 /* -------- Start of local definitions -------- */ 87 /** if CMSG_ALIGN is not defined on this platform, a workaround */ 88 #ifndef CMSG_ALIGN 89 # ifdef __CMSG_ALIGN 90 # define CMSG_ALIGN(n) __CMSG_ALIGN(n) 91 # elif defined(CMSG_DATA_ALIGN) 92 # define CMSG_ALIGN _CMSG_DATA_ALIGN 93 # else 94 # define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1)) 95 # endif 96 #endif 97 98 /** if CMSG_LEN is not defined on this platform, a workaround */ 99 #ifndef CMSG_LEN 100 # define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len)) 101 #endif 102 103 /** if CMSG_SPACE is not defined on this platform, a workaround */ 104 #ifndef CMSG_SPACE 105 # ifdef _CMSG_HDR_ALIGN 106 # define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr))) 107 # else 108 # define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr))) 109 # endif 110 #endif 111 112 /** The TCP writing query timeout in milliseconds */ 113 #define TCP_QUERY_TIMEOUT 120000 114 /** The minimum actual TCP timeout to use, regardless of what we advertise, 115 * in msec */ 116 #define TCP_QUERY_TIMEOUT_MINIMUM 200 117 118 #ifndef NONBLOCKING_IS_BROKEN 119 /** number of UDP reads to perform per read indication from select */ 120 #define NUM_UDP_PER_SELECT 100 121 #else 122 #define NUM_UDP_PER_SELECT 1 123 #endif 124 125 /** timeout in millisec to wait for write to unblock, packets dropped after.*/ 126 #define SEND_BLOCKED_WAIT_TIMEOUT 200 127 /** max number of times to wait for write to unblock, packets dropped after.*/ 128 #define SEND_BLOCKED_MAX_RETRY 5 129 130 /** Let's make timestamping code cleaner and redefine SO_TIMESTAMP* */ 131 #ifndef SO_TIMESTAMP 132 #define SO_TIMESTAMP 29 133 #endif 134 #ifndef SO_TIMESTAMPNS 135 #define SO_TIMESTAMPNS 35 136 #endif 137 #ifndef SO_TIMESTAMPING 138 #define SO_TIMESTAMPING 37 139 #endif 140 /** 141 * The internal event structure for keeping ub_event info for the event. 142 * Possibly other structures (list, tree) this is part of. 143 */ 144 struct internal_event { 145 /** the comm base */ 146 struct comm_base* base; 147 /** ub_event event type */ 148 struct ub_event* ev; 149 }; 150 151 /** 152 * Internal base structure, so that every thread has its own events. 153 */ 154 struct internal_base { 155 /** ub_event event_base type. */ 156 struct ub_event_base* base; 157 /** seconds time pointer points here */ 158 time_t secs; 159 /** timeval with current time */ 160 struct timeval now; 161 /** the event used for slow_accept timeouts */ 162 struct ub_event* slow_accept; 163 /** true if slow_accept is enabled */ 164 int slow_accept_enabled; 165 /** last log time for slow logging of file descriptor errors */ 166 time_t last_slow_log; 167 /** last log time for slow logging of write wait failures */ 168 time_t last_writewait_log; 169 }; 170 171 /** 172 * Internal timer structure, to store timer event in. 173 */ 174 struct internal_timer { 175 /** the super struct from which derived */ 176 struct comm_timer super; 177 /** the comm base */ 178 struct comm_base* base; 179 /** ub_event event type */ 180 struct ub_event* ev; 181 /** is timer enabled */ 182 uint8_t enabled; 183 }; 184 185 /** 186 * Internal signal structure, to store signal event in. 187 */ 188 struct internal_signal { 189 /** ub_event event type */ 190 struct ub_event* ev; 191 /** next in signal list */ 192 struct internal_signal* next; 193 }; 194 195 /** create a tcp handler with a parent */ 196 static struct comm_point* comm_point_create_tcp_handler( 197 struct comm_base *base, struct comm_point* parent, size_t bufsize, 198 struct sldns_buffer* spoolbuf, comm_point_callback_type* callback, 199 void* callback_arg, struct unbound_socket* socket); 200 201 /* -------- End of local definitions -------- */ 202 203 struct comm_base* 204 comm_base_create(int sigs) 205 { 206 struct comm_base* b = (struct comm_base*)calloc(1, 207 sizeof(struct comm_base)); 208 const char *evnm="event", *evsys="", *evmethod=""; 209 210 if(!b) 211 return NULL; 212 b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base)); 213 if(!b->eb) { 214 free(b); 215 return NULL; 216 } 217 b->eb->base = ub_default_event_base(sigs, &b->eb->secs, &b->eb->now); 218 if(!b->eb->base) { 219 free(b->eb); 220 free(b); 221 return NULL; 222 } 223 ub_comm_base_now(b); 224 ub_get_event_sys(b->eb->base, &evnm, &evsys, &evmethod); 225 verbose(VERB_ALGO, "%s %s uses %s method.", evnm, evsys, evmethod); 226 return b; 227 } 228 229 struct comm_base* 230 comm_base_create_event(struct ub_event_base* base) 231 { 232 struct comm_base* b = (struct comm_base*)calloc(1, 233 sizeof(struct comm_base)); 234 if(!b) 235 return NULL; 236 b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base)); 237 if(!b->eb) { 238 free(b); 239 return NULL; 240 } 241 b->eb->base = base; 242 ub_comm_base_now(b); 243 return b; 244 } 245 246 void 247 comm_base_delete(struct comm_base* b) 248 { 249 if(!b) 250 return; 251 if(b->eb->slow_accept_enabled) { 252 if(ub_event_del(b->eb->slow_accept) != 0) { 253 log_err("could not event_del slow_accept"); 254 } 255 ub_event_free(b->eb->slow_accept); 256 } 257 ub_event_base_free(b->eb->base); 258 b->eb->base = NULL; 259 free(b->eb); 260 free(b); 261 } 262 263 void 264 comm_base_delete_no_base(struct comm_base* b) 265 { 266 if(!b) 267 return; 268 if(b->eb->slow_accept_enabled) { 269 if(ub_event_del(b->eb->slow_accept) != 0) { 270 log_err("could not event_del slow_accept"); 271 } 272 ub_event_free(b->eb->slow_accept); 273 } 274 b->eb->base = NULL; 275 free(b->eb); 276 free(b); 277 } 278 279 void 280 comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv) 281 { 282 *tt = &b->eb->secs; 283 *tv = &b->eb->now; 284 } 285 286 void 287 comm_base_dispatch(struct comm_base* b) 288 { 289 int retval; 290 retval = ub_event_base_dispatch(b->eb->base); 291 if(retval < 0) { 292 fatal_exit("event_dispatch returned error %d, " 293 "errno is %s", retval, strerror(errno)); 294 } 295 } 296 297 void comm_base_exit(struct comm_base* b) 298 { 299 if(ub_event_base_loopexit(b->eb->base) != 0) { 300 log_err("Could not loopexit"); 301 } 302 } 303 304 void comm_base_set_slow_accept_handlers(struct comm_base* b, 305 void (*stop_acc)(void*), void (*start_acc)(void*), void* arg) 306 { 307 b->stop_accept = stop_acc; 308 b->start_accept = start_acc; 309 b->cb_arg = arg; 310 } 311 312 struct ub_event_base* comm_base_internal(struct comm_base* b) 313 { 314 return b->eb->base; 315 } 316 317 /** see if errno for udp has to be logged or not uses globals */ 318 static int 319 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen) 320 { 321 /* do not log transient errors (unless high verbosity) */ 322 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN) 323 switch(errno) { 324 # ifdef ENETUNREACH 325 case ENETUNREACH: 326 # endif 327 # ifdef EHOSTDOWN 328 case EHOSTDOWN: 329 # endif 330 # ifdef EHOSTUNREACH 331 case EHOSTUNREACH: 332 # endif 333 # ifdef ENETDOWN 334 case ENETDOWN: 335 # endif 336 case EPERM: 337 case EACCES: 338 if(verbosity < VERB_ALGO) 339 return 0; 340 break; 341 default: 342 break; 343 } 344 #endif 345 /* permission denied is gotten for every send if the 346 * network is disconnected (on some OS), squelch it */ 347 if( ((errno == EPERM) 348 # ifdef EADDRNOTAVAIL 349 /* 'Cannot assign requested address' also when disconnected */ 350 || (errno == EADDRNOTAVAIL) 351 # endif 352 ) && verbosity < VERB_ALGO) 353 return 0; 354 # ifdef EADDRINUSE 355 /* If SO_REUSEADDR is set, we could try to connect to the same server 356 * from the same source port twice. */ 357 if(errno == EADDRINUSE && verbosity < VERB_DETAIL) 358 return 0; 359 # endif 360 /* squelch errors where people deploy AAAA ::ffff:bla for 361 * authority servers, which we try for intranets. */ 362 if(errno == EINVAL && addr_is_ip4mapped( 363 (struct sockaddr_storage*)addr, addrlen) && 364 verbosity < VERB_DETAIL) 365 return 0; 366 /* SO_BROADCAST sockopt can give access to 255.255.255.255, 367 * but a dns cache does not need it. */ 368 if(errno == EACCES && addr_is_broadcast( 369 (struct sockaddr_storage*)addr, addrlen) && 370 verbosity < VERB_DETAIL) 371 return 0; 372 return 1; 373 } 374 375 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen) 376 { 377 return udp_send_errno_needs_log(addr, addrlen); 378 } 379 380 /* send a UDP reply */ 381 int 382 comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet, 383 struct sockaddr* addr, socklen_t addrlen, int is_connected) 384 { 385 ssize_t sent; 386 log_assert(c->fd != -1); 387 #ifdef UNBOUND_DEBUG 388 if(sldns_buffer_remaining(packet) == 0) 389 log_err("error: send empty UDP packet"); 390 #endif 391 log_assert(addr && addrlen > 0); 392 if(!is_connected) { 393 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 394 sldns_buffer_remaining(packet), 0, 395 addr, addrlen); 396 } else { 397 sent = send(c->fd, (void*)sldns_buffer_begin(packet), 398 sldns_buffer_remaining(packet), 0); 399 } 400 if(sent == -1) { 401 /* try again and block, waiting for IO to complete, 402 * we want to send the answer, and we will wait for 403 * the ethernet interface buffer to have space. */ 404 #ifndef USE_WINSOCK 405 if(errno == EAGAIN || errno == EINTR || 406 # ifdef EWOULDBLOCK 407 errno == EWOULDBLOCK || 408 # endif 409 errno == ENOBUFS) { 410 #else 411 if(WSAGetLastError() == WSAEINPROGRESS || 412 WSAGetLastError() == WSAEINTR || 413 WSAGetLastError() == WSAENOBUFS || 414 WSAGetLastError() == WSAEWOULDBLOCK) { 415 #endif 416 int retries = 0; 417 /* if we set the fd blocking, other threads suddenly 418 * have a blocking fd that they operate on */ 419 while(sent == -1 && retries < SEND_BLOCKED_MAX_RETRY && ( 420 #ifndef USE_WINSOCK 421 errno == EAGAIN || errno == EINTR || 422 # ifdef EWOULDBLOCK 423 errno == EWOULDBLOCK || 424 # endif 425 errno == ENOBUFS 426 #else 427 WSAGetLastError() == WSAEINPROGRESS || 428 WSAGetLastError() == WSAEINTR || 429 WSAGetLastError() == WSAENOBUFS || 430 WSAGetLastError() == WSAEWOULDBLOCK 431 #endif 432 )) { 433 #if defined(HAVE_POLL) || defined(USE_WINSOCK) 434 int send_nobufs = ( 435 #ifndef USE_WINSOCK 436 errno == ENOBUFS 437 #else 438 WSAGetLastError() == WSAENOBUFS 439 #endif 440 ); 441 struct pollfd p; 442 int pret; 443 memset(&p, 0, sizeof(p)); 444 p.fd = c->fd; 445 p.events = POLLOUT | POLLERR | POLLHUP; 446 # ifndef USE_WINSOCK 447 pret = poll(&p, 1, SEND_BLOCKED_WAIT_TIMEOUT); 448 # else 449 pret = WSAPoll(&p, 1, 450 SEND_BLOCKED_WAIT_TIMEOUT); 451 # endif 452 if(pret == 0) { 453 /* timer expired */ 454 struct comm_base* b = c->ev->base; 455 if(b->eb->last_writewait_log+SLOW_LOG_TIME <= 456 b->eb->secs) { 457 b->eb->last_writewait_log = b->eb->secs; 458 verbose(VERB_OPS, "send udp blocked " 459 "for long, dropping packet."); 460 } 461 return 0; 462 } else if(pret < 0 && 463 #ifndef USE_WINSOCK 464 errno != EAGAIN && errno != EINTR && 465 # ifdef EWOULDBLOCK 466 errno != EWOULDBLOCK && 467 # endif 468 errno != ENOBUFS 469 #else 470 WSAGetLastError() != WSAEINPROGRESS && 471 WSAGetLastError() != WSAEINTR && 472 WSAGetLastError() != WSAENOBUFS && 473 WSAGetLastError() != WSAEWOULDBLOCK 474 #endif 475 ) { 476 log_err("poll udp out failed: %s", 477 sock_strerror(errno)); 478 return 0; 479 } else if((pret < 0 && 480 #ifndef USE_WINSOCK 481 errno == ENOBUFS 482 #else 483 WSAGetLastError() == WSAENOBUFS 484 #endif 485 ) || (send_nobufs && retries > 0)) { 486 /* ENOBUFS, and poll returned without 487 * a timeout. Or the retried send call 488 * returned ENOBUFS. It is good to 489 * wait a bit for the error to clear. */ 490 /* The timeout is 20*(2^(retries+1)), 491 * it increases exponentially, starting 492 * at 40 msec. After 5 tries, 1240 msec 493 * have passed in total, when poll 494 * returned the error, and 1200 msec 495 * when send returned the errors. */ 496 #ifndef USE_WINSOCK 497 pret = poll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); 498 #else 499 pret = WSAPoll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); 500 #endif 501 if(pret < 0 && 502 #ifndef USE_WINSOCK 503 errno != EAGAIN && errno != EINTR && 504 # ifdef EWOULDBLOCK 505 errno != EWOULDBLOCK && 506 # endif 507 errno != ENOBUFS 508 #else 509 WSAGetLastError() != WSAEINPROGRESS && 510 WSAGetLastError() != WSAEINTR && 511 WSAGetLastError() != WSAENOBUFS && 512 WSAGetLastError() != WSAEWOULDBLOCK 513 #endif 514 ) { 515 log_err("poll udp out timer failed: %s", 516 sock_strerror(errno)); 517 } 518 } 519 #endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */ 520 retries++; 521 if (!is_connected) { 522 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 523 sldns_buffer_remaining(packet), 0, 524 addr, addrlen); 525 } else { 526 sent = send(c->fd, (void*)sldns_buffer_begin(packet), 527 sldns_buffer_remaining(packet), 0); 528 } 529 } 530 } 531 } 532 if(sent == -1) { 533 if(!udp_send_errno_needs_log(addr, addrlen)) 534 return 0; 535 if (!is_connected) { 536 verbose(VERB_OPS, "sendto failed: %s", sock_strerror(errno)); 537 } else { 538 verbose(VERB_OPS, "send failed: %s", sock_strerror(errno)); 539 } 540 if(addr) 541 log_addr(VERB_OPS, "remote address is", 542 (struct sockaddr_storage*)addr, addrlen); 543 return 0; 544 } else if((size_t)sent != sldns_buffer_remaining(packet)) { 545 log_err("sent %d in place of %d bytes", 546 (int)sent, (int)sldns_buffer_remaining(packet)); 547 return 0; 548 } 549 return 1; 550 } 551 552 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG)) 553 /** print debug ancillary info */ 554 static void p_ancil(const char* str, struct comm_reply* r) 555 { 556 if(r->srctype != 4 && r->srctype != 6) { 557 log_info("%s: unknown srctype %d", str, r->srctype); 558 return; 559 } 560 561 if(r->srctype == 6) { 562 #ifdef IPV6_PKTINFO 563 char buf[1024]; 564 if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr, 565 buf, (socklen_t)sizeof(buf)) == 0) { 566 (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf)); 567 } 568 buf[sizeof(buf)-1]=0; 569 log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex); 570 #endif 571 } else if(r->srctype == 4) { 572 #ifdef IP_PKTINFO 573 char buf1[1024], buf2[1024]; 574 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr, 575 buf1, (socklen_t)sizeof(buf1)) == 0) { 576 (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1)); 577 } 578 buf1[sizeof(buf1)-1]=0; 579 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST 580 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst, 581 buf2, (socklen_t)sizeof(buf2)) == 0) { 582 (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2)); 583 } 584 buf2[sizeof(buf2)-1]=0; 585 #else 586 buf2[0]=0; 587 #endif 588 log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex, 589 buf1, buf2); 590 #elif defined(IP_RECVDSTADDR) 591 char buf1[1024]; 592 if(inet_ntop(AF_INET, &r->pktinfo.v4addr, 593 buf1, (socklen_t)sizeof(buf1)) == 0) { 594 (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1)); 595 } 596 buf1[sizeof(buf1)-1]=0; 597 log_info("%s: %s", str, buf1); 598 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */ 599 } 600 } 601 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */ 602 603 /** send a UDP reply over specified interface*/ 604 static int 605 comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet, 606 struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r) 607 { 608 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG) 609 ssize_t sent; 610 struct msghdr msg; 611 struct iovec iov[1]; 612 union { 613 struct cmsghdr hdr; 614 char buf[256]; 615 } control; 616 #ifndef S_SPLINT_S 617 struct cmsghdr *cmsg; 618 #endif /* S_SPLINT_S */ 619 620 log_assert(c->fd != -1); 621 #ifdef UNBOUND_DEBUG 622 if(sldns_buffer_remaining(packet) == 0) 623 log_err("error: send empty UDP packet"); 624 #endif 625 log_assert(addr && addrlen > 0); 626 627 msg.msg_name = addr; 628 msg.msg_namelen = addrlen; 629 iov[0].iov_base = sldns_buffer_begin(packet); 630 iov[0].iov_len = sldns_buffer_remaining(packet); 631 msg.msg_iov = iov; 632 msg.msg_iovlen = 1; 633 msg.msg_control = control.buf; 634 #ifndef S_SPLINT_S 635 msg.msg_controllen = sizeof(control.buf); 636 #endif /* S_SPLINT_S */ 637 msg.msg_flags = 0; 638 639 #ifndef S_SPLINT_S 640 cmsg = CMSG_FIRSTHDR(&msg); 641 if(r->srctype == 4) { 642 #ifdef IP_PKTINFO 643 void* cmsg_data; 644 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo)); 645 log_assert(msg.msg_controllen <= sizeof(control.buf)); 646 cmsg->cmsg_level = IPPROTO_IP; 647 cmsg->cmsg_type = IP_PKTINFO; 648 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info, 649 sizeof(struct in_pktinfo)); 650 /* unset the ifindex to not bypass the routing tables */ 651 cmsg_data = CMSG_DATA(cmsg); 652 ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0; 653 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo)); 654 /* zero the padding bytes inserted by the CMSG_LEN */ 655 if(sizeof(struct in_pktinfo) < cmsg->cmsg_len) 656 memset(((uint8_t*)(CMSG_DATA(cmsg))) + 657 sizeof(struct in_pktinfo), 0, cmsg->cmsg_len 658 - sizeof(struct in_pktinfo)); 659 #elif defined(IP_SENDSRCADDR) 660 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr)); 661 log_assert(msg.msg_controllen <= sizeof(control.buf)); 662 cmsg->cmsg_level = IPPROTO_IP; 663 cmsg->cmsg_type = IP_SENDSRCADDR; 664 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr, 665 sizeof(struct in_addr)); 666 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr)); 667 /* zero the padding bytes inserted by the CMSG_LEN */ 668 if(sizeof(struct in_addr) < cmsg->cmsg_len) 669 memset(((uint8_t*)(CMSG_DATA(cmsg))) + 670 sizeof(struct in_addr), 0, cmsg->cmsg_len 671 - sizeof(struct in_addr)); 672 #else 673 verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR"); 674 msg.msg_control = NULL; 675 #endif /* IP_PKTINFO or IP_SENDSRCADDR */ 676 } else if(r->srctype == 6) { 677 void* cmsg_data; 678 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); 679 log_assert(msg.msg_controllen <= sizeof(control.buf)); 680 cmsg->cmsg_level = IPPROTO_IPV6; 681 cmsg->cmsg_type = IPV6_PKTINFO; 682 memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info, 683 sizeof(struct in6_pktinfo)); 684 /* unset the ifindex to not bypass the routing tables */ 685 cmsg_data = CMSG_DATA(cmsg); 686 ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0; 687 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); 688 /* zero the padding bytes inserted by the CMSG_LEN */ 689 if(sizeof(struct in6_pktinfo) < cmsg->cmsg_len) 690 memset(((uint8_t*)(CMSG_DATA(cmsg))) + 691 sizeof(struct in6_pktinfo), 0, cmsg->cmsg_len 692 - sizeof(struct in6_pktinfo)); 693 } else { 694 /* try to pass all 0 to use default route */ 695 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); 696 log_assert(msg.msg_controllen <= sizeof(control.buf)); 697 cmsg->cmsg_level = IPPROTO_IPV6; 698 cmsg->cmsg_type = IPV6_PKTINFO; 699 memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo)); 700 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); 701 /* zero the padding bytes inserted by the CMSG_LEN */ 702 if(sizeof(struct in6_pktinfo) < cmsg->cmsg_len) 703 memset(((uint8_t*)(CMSG_DATA(cmsg))) + 704 sizeof(struct in6_pktinfo), 0, cmsg->cmsg_len 705 - sizeof(struct in6_pktinfo)); 706 } 707 #endif /* S_SPLINT_S */ 708 if(verbosity >= VERB_ALGO && r->srctype != 0) 709 p_ancil("send_udp over interface", r); 710 sent = sendmsg(c->fd, &msg, 0); 711 if(sent == -1) { 712 /* try again and block, waiting for IO to complete, 713 * we want to send the answer, and we will wait for 714 * the ethernet interface buffer to have space. */ 715 #ifndef USE_WINSOCK 716 if(errno == EAGAIN || errno == EINTR || 717 # ifdef EWOULDBLOCK 718 errno == EWOULDBLOCK || 719 # endif 720 errno == ENOBUFS) { 721 #else 722 if(WSAGetLastError() == WSAEINPROGRESS || 723 WSAGetLastError() == WSAEINTR || 724 WSAGetLastError() == WSAENOBUFS || 725 WSAGetLastError() == WSAEWOULDBLOCK) { 726 #endif 727 int retries = 0; 728 while(sent == -1 && retries < SEND_BLOCKED_MAX_RETRY && ( 729 #ifndef USE_WINSOCK 730 errno == EAGAIN || errno == EINTR || 731 # ifdef EWOULDBLOCK 732 errno == EWOULDBLOCK || 733 # endif 734 errno == ENOBUFS 735 #else 736 WSAGetLastError() == WSAEINPROGRESS || 737 WSAGetLastError() == WSAEINTR || 738 WSAGetLastError() == WSAENOBUFS || 739 WSAGetLastError() == WSAEWOULDBLOCK 740 #endif 741 )) { 742 #if defined(HAVE_POLL) || defined(USE_WINSOCK) 743 int send_nobufs = ( 744 #ifndef USE_WINSOCK 745 errno == ENOBUFS 746 #else 747 WSAGetLastError() == WSAENOBUFS 748 #endif 749 ); 750 struct pollfd p; 751 int pret; 752 memset(&p, 0, sizeof(p)); 753 p.fd = c->fd; 754 p.events = POLLOUT | POLLERR | POLLHUP; 755 # ifndef USE_WINSOCK 756 pret = poll(&p, 1, SEND_BLOCKED_WAIT_TIMEOUT); 757 # else 758 pret = WSAPoll(&p, 1, 759 SEND_BLOCKED_WAIT_TIMEOUT); 760 # endif 761 if(pret == 0) { 762 /* timer expired */ 763 struct comm_base* b = c->ev->base; 764 if(b->eb->last_writewait_log+SLOW_LOG_TIME <= 765 b->eb->secs) { 766 b->eb->last_writewait_log = b->eb->secs; 767 verbose(VERB_OPS, "send udp blocked " 768 "for long, dropping packet."); 769 } 770 return 0; 771 } else if(pret < 0 && 772 #ifndef USE_WINSOCK 773 errno != EAGAIN && errno != EINTR && 774 # ifdef EWOULDBLOCK 775 errno != EWOULDBLOCK && 776 # endif 777 errno != ENOBUFS 778 #else 779 WSAGetLastError() != WSAEINPROGRESS && 780 WSAGetLastError() != WSAEINTR && 781 WSAGetLastError() != WSAENOBUFS && 782 WSAGetLastError() != WSAEWOULDBLOCK 783 #endif 784 ) { 785 log_err("poll udp out failed: %s", 786 sock_strerror(errno)); 787 return 0; 788 } else if((pret < 0 && 789 #ifndef USE_WINSOCK 790 errno == ENOBUFS 791 #else 792 WSAGetLastError() == WSAENOBUFS 793 #endif 794 ) || (send_nobufs && retries > 0)) { 795 /* ENOBUFS, and poll returned without 796 * a timeout. Or the retried send call 797 * returned ENOBUFS. It is good to 798 * wait a bit for the error to clear. */ 799 /* The timeout is 20*(2^(retries+1)), 800 * it increases exponentially, starting 801 * at 40 msec. After 5 tries, 1240 msec 802 * have passed in total, when poll 803 * returned the error, and 1200 msec 804 * when send returned the errors. */ 805 #ifndef USE_WINSOCK 806 pret = poll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); 807 #else 808 pret = WSAPoll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); 809 #endif 810 if(pret < 0 && 811 #ifndef USE_WINSOCK 812 errno != EAGAIN && errno != EINTR && 813 # ifdef EWOULDBLOCK 814 errno != EWOULDBLOCK && 815 # endif 816 errno != ENOBUFS 817 #else 818 WSAGetLastError() != WSAEINPROGRESS && 819 WSAGetLastError() != WSAEINTR && 820 WSAGetLastError() != WSAENOBUFS && 821 WSAGetLastError() != WSAEWOULDBLOCK 822 #endif 823 ) { 824 log_err("poll udp out timer failed: %s", 825 sock_strerror(errno)); 826 } 827 } 828 #endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */ 829 retries++; 830 sent = sendmsg(c->fd, &msg, 0); 831 } 832 } 833 } 834 if(sent == -1) { 835 if(!udp_send_errno_needs_log(addr, addrlen)) 836 return 0; 837 verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno)); 838 log_addr(VERB_OPS, "remote address is", 839 (struct sockaddr_storage*)addr, addrlen); 840 #ifdef __NetBSD__ 841 /* netbsd 7 has IP_PKTINFO for recv but not send */ 842 if(errno == EINVAL && r->srctype == 4) 843 log_err("sendmsg: No support for sendmsg(IP_PKTINFO). " 844 "Please disable interface-automatic"); 845 #endif 846 return 0; 847 } else if((size_t)sent != sldns_buffer_remaining(packet)) { 848 log_err("sent %d in place of %d bytes", 849 (int)sent, (int)sldns_buffer_remaining(packet)); 850 return 0; 851 } 852 return 1; 853 #else 854 (void)c; 855 (void)packet; 856 (void)addr; 857 (void)addrlen; 858 (void)r; 859 log_err("sendmsg: IPV6_PKTINFO not supported"); 860 return 0; 861 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */ 862 } 863 864 /** return true is UDP receive error needs to be logged */ 865 static int udp_recv_needs_log(int err) 866 { 867 switch(err) { 868 case EACCES: /* some hosts send ICMP 'Permission Denied' */ 869 #ifndef USE_WINSOCK 870 case ECONNREFUSED: 871 # ifdef ENETUNREACH 872 case ENETUNREACH: 873 # endif 874 # ifdef EHOSTDOWN 875 case EHOSTDOWN: 876 # endif 877 # ifdef EHOSTUNREACH 878 case EHOSTUNREACH: 879 # endif 880 # ifdef ENETDOWN 881 case ENETDOWN: 882 # endif 883 #else /* USE_WINSOCK */ 884 case WSAECONNREFUSED: 885 case WSAENETUNREACH: 886 case WSAEHOSTDOWN: 887 case WSAEHOSTUNREACH: 888 case WSAENETDOWN: 889 #endif 890 if(verbosity >= VERB_ALGO) 891 return 1; 892 return 0; 893 default: 894 break; 895 } 896 return 1; 897 } 898 899 /** Parses the PROXYv2 header from buf and updates the comm_reply struct. 900 * Returns 1 on success, 0 on failure. */ 901 static int consume_pp2_header(struct sldns_buffer* buf, struct comm_reply* rep, 902 int stream) { 903 size_t size; 904 struct pp2_header *header; 905 int err = pp2_read_header(sldns_buffer_begin(buf), 906 sldns_buffer_remaining(buf)); 907 if(err) return 0; 908 header = (struct pp2_header*)sldns_buffer_begin(buf); 909 size = PP2_HEADER_SIZE + ntohs(header->len); 910 if((header->ver_cmd & 0xF) == PP2_CMD_LOCAL) { 911 /* A connection from the proxy itself. 912 * No need to do anything with addresses. */ 913 goto done; 914 } 915 if(header->fam_prot == PP2_UNSPEC_UNSPEC) { 916 /* Unspecified family and protocol. This could be used for 917 * health checks by proxies. 918 * No need to do anything with addresses. */ 919 goto done; 920 } 921 /* Read the proxied address */ 922 switch(header->fam_prot) { 923 case PP2_INET_STREAM: 924 case PP2_INET_DGRAM: 925 { 926 struct sockaddr_in* addr = 927 (struct sockaddr_in*)&rep->client_addr; 928 addr->sin_family = AF_INET; 929 addr->sin_addr.s_addr = header->addr.addr4.src_addr; 930 addr->sin_port = header->addr.addr4.src_port; 931 rep->client_addrlen = (socklen_t)sizeof(struct sockaddr_in); 932 } 933 /* Ignore the destination address; it should be us. */ 934 break; 935 case PP2_INET6_STREAM: 936 case PP2_INET6_DGRAM: 937 { 938 struct sockaddr_in6* addr = 939 (struct sockaddr_in6*)&rep->client_addr; 940 memset(addr, 0, sizeof(*addr)); 941 addr->sin6_family = AF_INET6; 942 memcpy(&addr->sin6_addr, 943 header->addr.addr6.src_addr, 16); 944 addr->sin6_port = header->addr.addr6.src_port; 945 rep->client_addrlen = (socklen_t)sizeof(struct sockaddr_in6); 946 } 947 /* Ignore the destination address; it should be us. */ 948 break; 949 default: 950 log_err("proxy_protocol: unsupported family and " 951 "protocol 0x%x", (int)header->fam_prot); 952 return 0; 953 } 954 rep->is_proxied = 1; 955 done: 956 if(!stream) { 957 /* We are reading a whole packet; 958 * Move the rest of the data to overwrite the PROXYv2 header */ 959 /* XXX can we do better to avoid memmove? */ 960 memmove(header, ((char*)header)+size, 961 sldns_buffer_limit(buf)-size); 962 sldns_buffer_set_limit(buf, sldns_buffer_limit(buf)-size); 963 } 964 return 1; 965 } 966 967 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG) 968 void 969 comm_point_udp_ancil_callback(int fd, short event, void* arg) 970 { 971 struct comm_reply rep; 972 struct msghdr msg; 973 struct iovec iov[1]; 974 ssize_t rcv; 975 union { 976 struct cmsghdr hdr; 977 char buf[256]; 978 } ancil; 979 int i; 980 #ifndef S_SPLINT_S 981 struct cmsghdr* cmsg; 982 #endif /* S_SPLINT_S */ 983 #ifdef HAVE_LINUX_NET_TSTAMP_H 984 struct timespec *ts; 985 #endif /* HAVE_LINUX_NET_TSTAMP_H */ 986 987 rep.c = (struct comm_point*)arg; 988 log_assert(rep.c->type == comm_udp); 989 990 if(!(event&UB_EV_READ)) 991 return; 992 log_assert(rep.c && rep.c->buffer && rep.c->fd == fd); 993 ub_comm_base_now(rep.c->ev->base); 994 for(i=0; i<NUM_UDP_PER_SELECT; i++) { 995 sldns_buffer_clear(rep.c->buffer); 996 timeval_clear(&rep.c->recv_tv); 997 rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr); 998 log_assert(fd != -1); 999 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0); 1000 msg.msg_name = &rep.remote_addr; 1001 msg.msg_namelen = (socklen_t)sizeof(rep.remote_addr); 1002 iov[0].iov_base = sldns_buffer_begin(rep.c->buffer); 1003 iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer); 1004 msg.msg_iov = iov; 1005 msg.msg_iovlen = 1; 1006 msg.msg_control = ancil.buf; 1007 #ifndef S_SPLINT_S 1008 msg.msg_controllen = sizeof(ancil.buf); 1009 #endif /* S_SPLINT_S */ 1010 msg.msg_flags = 0; 1011 rcv = recvmsg(fd, &msg, MSG_DONTWAIT); 1012 if(rcv == -1) { 1013 if(errno != EAGAIN && errno != EINTR 1014 && udp_recv_needs_log(errno)) { 1015 log_err("recvmsg failed: %s", strerror(errno)); 1016 } 1017 return; 1018 } 1019 rep.remote_addrlen = msg.msg_namelen; 1020 sldns_buffer_skip(rep.c->buffer, rcv); 1021 sldns_buffer_flip(rep.c->buffer); 1022 rep.srctype = 0; 1023 rep.is_proxied = 0; 1024 #ifndef S_SPLINT_S 1025 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; 1026 cmsg = CMSG_NXTHDR(&msg, cmsg)) { 1027 if( cmsg->cmsg_level == IPPROTO_IPV6 && 1028 cmsg->cmsg_type == IPV6_PKTINFO) { 1029 rep.srctype = 6; 1030 memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg), 1031 sizeof(struct in6_pktinfo)); 1032 break; 1033 #ifdef IP_PKTINFO 1034 } else if( cmsg->cmsg_level == IPPROTO_IP && 1035 cmsg->cmsg_type == IP_PKTINFO) { 1036 rep.srctype = 4; 1037 memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg), 1038 sizeof(struct in_pktinfo)); 1039 break; 1040 #elif defined(IP_RECVDSTADDR) 1041 } else if( cmsg->cmsg_level == IPPROTO_IP && 1042 cmsg->cmsg_type == IP_RECVDSTADDR) { 1043 rep.srctype = 4; 1044 memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg), 1045 sizeof(struct in_addr)); 1046 break; 1047 #endif /* IP_PKTINFO or IP_RECVDSTADDR */ 1048 #ifdef HAVE_LINUX_NET_TSTAMP_H 1049 } else if( cmsg->cmsg_level == SOL_SOCKET && 1050 cmsg->cmsg_type == SO_TIMESTAMPNS) { 1051 ts = (struct timespec *)CMSG_DATA(cmsg); 1052 TIMESPEC_TO_TIMEVAL(&rep.c->recv_tv, ts); 1053 } else if( cmsg->cmsg_level == SOL_SOCKET && 1054 cmsg->cmsg_type == SO_TIMESTAMPING) { 1055 ts = (struct timespec *)CMSG_DATA(cmsg); 1056 TIMESPEC_TO_TIMEVAL(&rep.c->recv_tv, ts); 1057 } else if( cmsg->cmsg_level == SOL_SOCKET && 1058 cmsg->cmsg_type == SO_TIMESTAMP) { 1059 memmove(&rep.c->recv_tv, CMSG_DATA(cmsg), sizeof(struct timeval)); 1060 #endif /* HAVE_LINUX_NET_TSTAMP_H */ 1061 } 1062 } 1063 1064 if(verbosity >= VERB_ALGO && rep.srctype != 0) 1065 p_ancil("receive_udp on interface", &rep); 1066 #endif /* S_SPLINT_S */ 1067 1068 if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer, 1069 &rep, 0)) { 1070 log_err("proxy_protocol: could not consume PROXYv2 header"); 1071 return; 1072 } 1073 if(!rep.is_proxied) { 1074 rep.client_addrlen = rep.remote_addrlen; 1075 memmove(&rep.client_addr, &rep.remote_addr, 1076 rep.remote_addrlen); 1077 } 1078 1079 fptr_ok(fptr_whitelist_comm_point(rep.c->callback)); 1080 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) { 1081 /* send back immediate reply */ 1082 struct sldns_buffer *buffer; 1083 #ifdef USE_DNSCRYPT 1084 buffer = rep.c->dnscrypt_buffer; 1085 #else 1086 buffer = rep.c->buffer; 1087 #endif 1088 (void)comm_point_send_udp_msg_if(rep.c, buffer, 1089 (struct sockaddr*)&rep.remote_addr, 1090 rep.remote_addrlen, &rep); 1091 } 1092 if(!rep.c || rep.c->fd == -1) /* commpoint closed */ 1093 break; 1094 } 1095 } 1096 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */ 1097 1098 void 1099 comm_point_udp_callback(int fd, short event, void* arg) 1100 { 1101 struct comm_reply rep; 1102 ssize_t rcv; 1103 int i; 1104 struct sldns_buffer *buffer; 1105 1106 rep.c = (struct comm_point*)arg; 1107 log_assert(rep.c->type == comm_udp); 1108 1109 if(!(event&UB_EV_READ)) 1110 return; 1111 log_assert(rep.c && rep.c->buffer && rep.c->fd == fd); 1112 ub_comm_base_now(rep.c->ev->base); 1113 for(i=0; i<NUM_UDP_PER_SELECT; i++) { 1114 sldns_buffer_clear(rep.c->buffer); 1115 rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr); 1116 log_assert(fd != -1); 1117 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0); 1118 rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer), 1119 sldns_buffer_remaining(rep.c->buffer), MSG_DONTWAIT, 1120 (struct sockaddr*)&rep.remote_addr, &rep.remote_addrlen); 1121 if(rcv == -1) { 1122 #ifndef USE_WINSOCK 1123 if(errno != EAGAIN && errno != EINTR 1124 && udp_recv_needs_log(errno)) 1125 log_err("recvfrom %d failed: %s", 1126 fd, strerror(errno)); 1127 #else 1128 if(WSAGetLastError() != WSAEINPROGRESS && 1129 WSAGetLastError() != WSAECONNRESET && 1130 WSAGetLastError()!= WSAEWOULDBLOCK && 1131 udp_recv_needs_log(WSAGetLastError())) 1132 log_err("recvfrom failed: %s", 1133 wsa_strerror(WSAGetLastError())); 1134 #endif 1135 return; 1136 } 1137 sldns_buffer_skip(rep.c->buffer, rcv); 1138 sldns_buffer_flip(rep.c->buffer); 1139 rep.srctype = 0; 1140 rep.is_proxied = 0; 1141 1142 if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer, 1143 &rep, 0)) { 1144 log_err("proxy_protocol: could not consume PROXYv2 header"); 1145 return; 1146 } 1147 if(!rep.is_proxied) { 1148 rep.client_addrlen = rep.remote_addrlen; 1149 memmove(&rep.client_addr, &rep.remote_addr, 1150 rep.remote_addrlen); 1151 } 1152 1153 fptr_ok(fptr_whitelist_comm_point(rep.c->callback)); 1154 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) { 1155 /* send back immediate reply */ 1156 #ifdef USE_DNSCRYPT 1157 buffer = rep.c->dnscrypt_buffer; 1158 #else 1159 buffer = rep.c->buffer; 1160 #endif 1161 (void)comm_point_send_udp_msg(rep.c, buffer, 1162 (struct sockaddr*)&rep.remote_addr, 1163 rep.remote_addrlen, 0); 1164 } 1165 if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for 1166 another UDP port. Note rep.c cannot be reused with TCP fd. */ 1167 break; 1168 } 1169 } 1170 1171 #ifdef HAVE_NGTCP2 1172 void 1173 doq_pkt_addr_init(struct doq_pkt_addr* paddr) 1174 { 1175 paddr->addrlen = (socklen_t)sizeof(paddr->addr); 1176 paddr->localaddrlen = (socklen_t)sizeof(paddr->localaddr); 1177 paddr->ifindex = 0; 1178 } 1179 1180 /** set the ecn on the transmission */ 1181 static void 1182 doq_set_ecn(int fd, int family, uint32_t ecn) 1183 { 1184 unsigned int val = ecn; 1185 if(family == AF_INET6) { 1186 if(setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, &val, 1187 (socklen_t)sizeof(val)) == -1) { 1188 log_err("setsockopt(.. IPV6_TCLASS ..): %s", 1189 strerror(errno)); 1190 } 1191 return; 1192 } 1193 if(setsockopt(fd, IPPROTO_IP, IP_TOS, &val, 1194 (socklen_t)sizeof(val)) == -1) { 1195 log_err("setsockopt(.. IP_TOS ..): %s", 1196 strerror(errno)); 1197 } 1198 } 1199 1200 /** set the local address in the control ancillary data */ 1201 static void 1202 doq_set_localaddr_cmsg(struct msghdr* msg, size_t control_size, 1203 struct doq_addr_storage* localaddr, socklen_t localaddrlen, 1204 int ifindex) 1205 { 1206 #ifndef S_SPLINT_S 1207 struct cmsghdr* cmsg; 1208 #endif /* S_SPLINT_S */ 1209 #ifndef S_SPLINT_S 1210 cmsg = CMSG_FIRSTHDR(msg); 1211 if(localaddr->sockaddr.in.sin_family == AF_INET) { 1212 #ifdef IP_PKTINFO 1213 struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; 1214 struct in_pktinfo v4info; 1215 log_assert(localaddrlen >= sizeof(struct sockaddr_in)); 1216 msg->msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo)); 1217 memset(msg->msg_control, 0, msg->msg_controllen); 1218 log_assert(msg->msg_controllen <= control_size); 1219 cmsg->cmsg_level = IPPROTO_IP; 1220 cmsg->cmsg_type = IP_PKTINFO; 1221 memset(&v4info, 0, sizeof(v4info)); 1222 # ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST 1223 memmove(&v4info.ipi_spec_dst, &sa->sin_addr, 1224 sizeof(struct in_addr)); 1225 # else 1226 memmove(&v4info.ipi_addr, &sa->sin_addr, 1227 sizeof(struct in_addr)); 1228 # endif 1229 v4info.ipi_ifindex = ifindex; 1230 memmove(CMSG_DATA(cmsg), &v4info, sizeof(struct in_pktinfo)); 1231 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo)); 1232 #elif defined(IP_SENDSRCADDR) 1233 struct sockaddr_in* sa= (struct sockaddr_in*)localaddr; 1234 log_assert(localaddrlen >= sizeof(struct sockaddr_in)); 1235 msg->msg_controllen = CMSG_SPACE(sizeof(struct in_addr)); 1236 memset(msg->msg_control, 0, msg->msg_controllen); 1237 log_assert(msg->msg_controllen <= control_size); 1238 cmsg->cmsg_level = IPPROTO_IP; 1239 cmsg->cmsg_type = IP_SENDSRCADDR; 1240 memmove(CMSG_DATA(cmsg), &sa->sin_addr, 1241 sizeof(struct in_addr)); 1242 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr)); 1243 #endif 1244 } else { 1245 struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr; 1246 struct in6_pktinfo v6info; 1247 log_assert(localaddrlen >= sizeof(struct sockaddr_in6)); 1248 msg->msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); 1249 memset(msg->msg_control, 0, msg->msg_controllen); 1250 log_assert(msg->msg_controllen <= control_size); 1251 cmsg->cmsg_level = IPPROTO_IPV6; 1252 cmsg->cmsg_type = IPV6_PKTINFO; 1253 memset(&v6info, 0, sizeof(v6info)); 1254 memmove(&v6info.ipi6_addr, &sa6->sin6_addr, 1255 sizeof(struct in6_addr)); 1256 v6info.ipi6_ifindex = ifindex; 1257 memmove(CMSG_DATA(cmsg), &v6info, sizeof(struct in6_pktinfo)); 1258 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); 1259 } 1260 #endif /* S_SPLINT_S */ 1261 /* Ignore unused variables, if no assertions are compiled. */ 1262 (void)localaddrlen; 1263 (void)control_size; 1264 } 1265 1266 /** write address and port into strings */ 1267 static int 1268 doq_print_addr_port(struct doq_addr_storage* addr, socklen_t addrlen, 1269 char* host, size_t hostlen, char* port, size_t portlen) 1270 { 1271 if(addr->sockaddr.in.sin_family == AF_INET) { 1272 struct sockaddr_in* sa = (struct sockaddr_in*)addr; 1273 log_assert(addrlen >= sizeof(*sa)); 1274 if(inet_ntop(sa->sin_family, &sa->sin_addr, host, 1275 (socklen_t)hostlen) == 0) { 1276 log_hex("inet_ntop error: address", &sa->sin_addr, 1277 sizeof(sa->sin_addr)); 1278 return 0; 1279 } 1280 snprintf(port, portlen, "%u", (unsigned)ntohs(sa->sin_port)); 1281 } else if(addr->sockaddr.in.sin_family == AF_INET6) { 1282 struct sockaddr_in6* sa6 = (struct sockaddr_in6*)addr; 1283 log_assert(addrlen >= sizeof(*sa6)); 1284 if(inet_ntop(sa6->sin6_family, &sa6->sin6_addr, host, 1285 (socklen_t)hostlen) == 0) { 1286 log_hex("inet_ntop error: address", &sa6->sin6_addr, 1287 sizeof(sa6->sin6_addr)); 1288 return 0; 1289 } 1290 snprintf(port, portlen, "%u", (unsigned)ntohs(sa6->sin6_port)); 1291 } 1292 return 1; 1293 } 1294 1295 /** doq store the blocked packet when write has blocked */ 1296 static void 1297 doq_store_blocked_pkt(struct comm_point* c, struct doq_pkt_addr* paddr, 1298 uint32_t ecn) 1299 { 1300 if(c->doq_socket->have_blocked_pkt) 1301 return; /* should not happen that we write when there is 1302 already a blocked write, but if so, drop it. */ 1303 if(sldns_buffer_limit(c->doq_socket->pkt_buf) > 1304 sldns_buffer_capacity(c->doq_socket->blocked_pkt)) 1305 return; /* impossibly large, drop packet. impossible because 1306 pkt_buf and blocked_pkt are the same size. */ 1307 c->doq_socket->have_blocked_pkt = 1; 1308 c->doq_socket->blocked_pkt_pi.ecn = ecn; 1309 memcpy(c->doq_socket->blocked_paddr, paddr, 1310 sizeof(*c->doq_socket->blocked_paddr)); 1311 sldns_buffer_clear(c->doq_socket->blocked_pkt); 1312 sldns_buffer_write(c->doq_socket->blocked_pkt, 1313 sldns_buffer_begin(c->doq_socket->pkt_buf), 1314 sldns_buffer_limit(c->doq_socket->pkt_buf)); 1315 sldns_buffer_flip(c->doq_socket->blocked_pkt); 1316 } 1317 1318 void 1319 doq_send_pkt(struct comm_point* c, struct doq_pkt_addr* paddr, uint32_t ecn) 1320 { 1321 struct msghdr msg; 1322 struct iovec iov[1]; 1323 union { 1324 struct cmsghdr hdr; 1325 char buf[256]; 1326 } control; 1327 ssize_t ret; 1328 iov[0].iov_base = sldns_buffer_begin(c->doq_socket->pkt_buf); 1329 iov[0].iov_len = sldns_buffer_limit(c->doq_socket->pkt_buf); 1330 memset(&msg, 0, sizeof(msg)); 1331 msg.msg_name = (void*)&paddr->addr; 1332 msg.msg_namelen = paddr->addrlen; 1333 msg.msg_iov = iov; 1334 msg.msg_iovlen = 1; 1335 msg.msg_control = control.buf; 1336 #ifndef S_SPLINT_S 1337 msg.msg_controllen = sizeof(control.buf); 1338 #endif /* S_SPLINT_S */ 1339 msg.msg_flags = 0; 1340 1341 doq_set_localaddr_cmsg(&msg, sizeof(control.buf), &paddr->localaddr, 1342 paddr->localaddrlen, paddr->ifindex); 1343 doq_set_ecn(c->fd, paddr->addr.sockaddr.in.sin_family, ecn); 1344 1345 for(;;) { 1346 ret = sendmsg(c->fd, &msg, MSG_DONTWAIT); 1347 if(ret == -1 && errno == EINTR) 1348 continue; 1349 break; 1350 } 1351 if(ret == -1) { 1352 #ifndef USE_WINSOCK 1353 if(errno == EAGAIN || 1354 # ifdef EWOULDBLOCK 1355 errno == EWOULDBLOCK || 1356 # endif 1357 errno == ENOBUFS) 1358 #else 1359 if(WSAGetLastError() == WSAEINPROGRESS || 1360 WSAGetLastError() == WSAENOBUFS || 1361 WSAGetLastError() == WSAEWOULDBLOCK) 1362 #endif 1363 { 1364 /* udp send has blocked */ 1365 doq_store_blocked_pkt(c, paddr, ecn); 1366 return; 1367 } 1368 if(!udp_send_errno_needs_log((void*)&paddr->addr, 1369 paddr->addrlen)) 1370 return; 1371 if(verbosity >= VERB_OPS) { 1372 char host[256], port[32]; 1373 if(doq_print_addr_port(&paddr->addr, paddr->addrlen, 1374 host, sizeof(host), port, sizeof(port))) { 1375 verbose(VERB_OPS, "doq sendmsg to %s %s " 1376 "failed: %s", host, port, 1377 strerror(errno)); 1378 } else { 1379 verbose(VERB_OPS, "doq sendmsg failed: %s", 1380 strerror(errno)); 1381 } 1382 } 1383 return; 1384 } else if(ret != (ssize_t)sldns_buffer_limit(c->doq_socket->pkt_buf)) { 1385 char host[256], port[32]; 1386 if(doq_print_addr_port(&paddr->addr, paddr->addrlen, host, 1387 sizeof(host), port, sizeof(port))) { 1388 log_err("doq sendmsg to %s %s failed: " 1389 "sent %d in place of %d bytes", 1390 host, port, (int)ret, 1391 (int)sldns_buffer_limit(c->doq_socket->pkt_buf)); 1392 } else { 1393 log_err("doq sendmsg failed: " 1394 "sent %d in place of %d bytes", 1395 (int)ret, (int)sldns_buffer_limit(c->doq_socket->pkt_buf)); 1396 } 1397 return; 1398 } 1399 } 1400 1401 /** fetch port number */ 1402 static int 1403 doq_sockaddr_get_port(struct doq_addr_storage* addr) 1404 { 1405 if(addr->sockaddr.in.sin_family == AF_INET) { 1406 struct sockaddr_in* sa = (struct sockaddr_in*)addr; 1407 return ntohs(sa->sin_port); 1408 } else if(addr->sockaddr.in.sin_family == AF_INET6) { 1409 struct sockaddr_in6* sa6 = (struct sockaddr_in6*)addr; 1410 return ntohs(sa6->sin6_port); 1411 } 1412 return 0; 1413 } 1414 1415 /** get local address from ancillary data headers */ 1416 static int 1417 doq_get_localaddr_cmsg(struct comm_point* c, struct doq_pkt_addr* paddr, 1418 int* pkt_continue, struct msghdr* msg) 1419 { 1420 #ifndef S_SPLINT_S 1421 struct cmsghdr* cmsg; 1422 #endif /* S_SPLINT_S */ 1423 1424 memset(&paddr->localaddr, 0, sizeof(paddr->localaddr)); 1425 #ifndef S_SPLINT_S 1426 for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; 1427 cmsg = CMSG_NXTHDR(msg, cmsg)) { 1428 if( cmsg->cmsg_level == IPPROTO_IPV6 && 1429 cmsg->cmsg_type == IPV6_PKTINFO) { 1430 struct in6_pktinfo* v6info = 1431 (struct in6_pktinfo*)CMSG_DATA(cmsg); 1432 struct sockaddr_in6* sa= (struct sockaddr_in6*) 1433 &paddr->localaddr; 1434 struct sockaddr_in6* rema = (struct sockaddr_in6*) 1435 &paddr->addr; 1436 if(rema->sin6_family != AF_INET6) { 1437 log_err("doq cmsg family mismatch cmsg is ip6"); 1438 *pkt_continue = 1; 1439 return 0; 1440 } 1441 sa->sin6_family = AF_INET6; 1442 sa->sin6_port = htons(doq_sockaddr_get_port( 1443 (void*)c->socket->addr)); 1444 paddr->ifindex = v6info->ipi6_ifindex; 1445 memmove(&sa->sin6_addr, &v6info->ipi6_addr, 1446 sizeof(struct in6_addr)); 1447 paddr->localaddrlen = sizeof(struct sockaddr_in6); 1448 break; 1449 #ifdef IP_PKTINFO 1450 } else if( cmsg->cmsg_level == IPPROTO_IP && 1451 cmsg->cmsg_type == IP_PKTINFO) { 1452 struct in_pktinfo* v4info = 1453 (struct in_pktinfo*)CMSG_DATA(cmsg); 1454 struct sockaddr_in* sa= (struct sockaddr_in*) 1455 &paddr->localaddr; 1456 struct sockaddr_in* rema = (struct sockaddr_in*) 1457 &paddr->addr; 1458 if(rema->sin_family != AF_INET) { 1459 log_err("doq cmsg family mismatch cmsg is ip4"); 1460 *pkt_continue = 1; 1461 return 0; 1462 } 1463 sa->sin_family = AF_INET; 1464 sa->sin_port = htons(doq_sockaddr_get_port( 1465 (void*)c->socket->addr)); 1466 paddr->ifindex = v4info->ipi_ifindex; 1467 memmove(&sa->sin_addr, &v4info->ipi_addr, 1468 sizeof(struct in_addr)); 1469 paddr->localaddrlen = sizeof(struct sockaddr_in); 1470 break; 1471 #elif defined(IP_RECVDSTADDR) 1472 } else if( cmsg->cmsg_level == IPPROTO_IP && 1473 cmsg->cmsg_type == IP_RECVDSTADDR) { 1474 struct sockaddr_in* sa= (struct sockaddr_in*) 1475 &paddr->localaddr; 1476 struct sockaddr_in* rema = (struct sockaddr_in*) 1477 &paddr->addr; 1478 if(rema->sin_family != AF_INET) { 1479 log_err("doq cmsg family mismatch cmsg is ip4"); 1480 *pkt_continue = 1; 1481 return 0; 1482 } 1483 sa->sin_family = AF_INET; 1484 sa->sin_port = htons(doq_sockaddr_get_port( 1485 (void*)c->socket->addr)); 1486 paddr->ifindex = 0; 1487 memmove(&sa.sin_addr, CMSG_DATA(cmsg), 1488 sizeof(struct in_addr)); 1489 paddr->localaddrlen = sizeof(struct sockaddr_in); 1490 break; 1491 #endif /* IP_PKTINFO or IP_RECVDSTADDR */ 1492 } 1493 } 1494 #endif /* S_SPLINT_S */ 1495 1496 return 1; 1497 } 1498 1499 /** get packet ecn information */ 1500 static uint32_t 1501 msghdr_get_ecn(struct msghdr* msg, int family) 1502 { 1503 #ifndef S_SPLINT_S 1504 struct cmsghdr* cmsg; 1505 if(family == AF_INET6) { 1506 for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; 1507 cmsg = CMSG_NXTHDR(msg, cmsg)) { 1508 if(cmsg->cmsg_level == IPPROTO_IPV6 && 1509 cmsg->cmsg_type == IPV6_TCLASS && 1510 cmsg->cmsg_len != 0) { 1511 uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg); 1512 return *ecn; 1513 } 1514 } 1515 return 0; 1516 } 1517 for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; 1518 cmsg = CMSG_NXTHDR(msg, cmsg)) { 1519 if(cmsg->cmsg_level == IPPROTO_IP && 1520 cmsg->cmsg_type == IP_TOS && 1521 cmsg->cmsg_len != 0) { 1522 uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg); 1523 return *ecn; 1524 } 1525 } 1526 #endif /* S_SPLINT_S */ 1527 return 0; 1528 } 1529 1530 /** receive packet for DoQ on UDP. get ancillary data for addresses, 1531 * return false if failed and the callback can stop receiving UDP packets 1532 * if pkt_continue is false. */ 1533 static int 1534 doq_recv(struct comm_point* c, struct doq_pkt_addr* paddr, int* pkt_continue, 1535 struct ngtcp2_pkt_info* pi) 1536 { 1537 struct msghdr msg; 1538 struct iovec iov[1]; 1539 ssize_t rcv; 1540 union { 1541 struct cmsghdr hdr; 1542 char buf[256]; 1543 } ancil; 1544 1545 msg.msg_name = &paddr->addr; 1546 msg.msg_namelen = (socklen_t)sizeof(paddr->addr); 1547 iov[0].iov_base = sldns_buffer_begin(c->doq_socket->pkt_buf); 1548 iov[0].iov_len = sldns_buffer_remaining(c->doq_socket->pkt_buf); 1549 msg.msg_iov = iov; 1550 msg.msg_iovlen = 1; 1551 msg.msg_control = ancil.buf; 1552 #ifndef S_SPLINT_S 1553 msg.msg_controllen = sizeof(ancil.buf); 1554 #endif /* S_SPLINT_S */ 1555 msg.msg_flags = 0; 1556 1557 rcv = recvmsg(c->fd, &msg, MSG_DONTWAIT); 1558 if(rcv == -1) { 1559 if(errno != EAGAIN && errno != EINTR 1560 && udp_recv_needs_log(errno)) { 1561 log_err("recvmsg failed for doq: %s", strerror(errno)); 1562 } 1563 *pkt_continue = 0; 1564 return 0; 1565 } 1566 1567 paddr->addrlen = msg.msg_namelen; 1568 sldns_buffer_skip(c->doq_socket->pkt_buf, rcv); 1569 sldns_buffer_flip(c->doq_socket->pkt_buf); 1570 if(!doq_get_localaddr_cmsg(c, paddr, pkt_continue, &msg)) 1571 return 0; 1572 pi->ecn = msghdr_get_ecn(&msg, paddr->addr.sockaddr.in.sin_family); 1573 return 1; 1574 } 1575 1576 /** send the version negotiation for doq. scid and dcid are flipped around 1577 * to send back to the client. */ 1578 static void 1579 doq_send_version_negotiation(struct comm_point* c, struct doq_pkt_addr* paddr, 1580 const uint8_t* dcid, size_t dcidlen, const uint8_t* scid, 1581 size_t scidlen) 1582 { 1583 uint32_t versions[2]; 1584 size_t versions_len = 0; 1585 ngtcp2_ssize ret; 1586 uint8_t unused_random; 1587 1588 /* fill the array with supported versions */ 1589 versions[0] = NGTCP2_PROTO_VER_V1; 1590 versions_len = 1; 1591 unused_random = ub_random_max(c->doq_socket->rnd, 256); 1592 sldns_buffer_clear(c->doq_socket->pkt_buf); 1593 ret = ngtcp2_pkt_write_version_negotiation( 1594 sldns_buffer_begin(c->doq_socket->pkt_buf), 1595 sldns_buffer_capacity(c->doq_socket->pkt_buf), unused_random, 1596 dcid, dcidlen, scid, scidlen, versions, versions_len); 1597 if(ret < 0) { 1598 log_err("ngtcp2_pkt_write_version_negotiation failed: %s", 1599 ngtcp2_strerror(ret)); 1600 return; 1601 } 1602 sldns_buffer_set_position(c->doq_socket->pkt_buf, ret); 1603 sldns_buffer_flip(c->doq_socket->pkt_buf); 1604 doq_send_pkt(c, paddr, 0); 1605 } 1606 1607 /** Find the doq_conn object by remote address and dcid */ 1608 static struct doq_conn* 1609 doq_conn_find(struct doq_table* table, struct doq_addr_storage* addr, 1610 socklen_t addrlen, struct doq_addr_storage* localaddr, 1611 socklen_t localaddrlen, int ifindex, const uint8_t* dcid, 1612 size_t dcidlen) 1613 { 1614 struct rbnode_type* node; 1615 struct doq_conn key; 1616 memset(&key.node, 0, sizeof(key.node)); 1617 key.node.key = &key; 1618 memmove(&key.key.paddr.addr, addr, addrlen); 1619 key.key.paddr.addrlen = addrlen; 1620 memmove(&key.key.paddr.localaddr, localaddr, localaddrlen); 1621 key.key.paddr.localaddrlen = localaddrlen; 1622 key.key.paddr.ifindex = ifindex; 1623 key.key.dcid = (void*)dcid; 1624 key.key.dcidlen = dcidlen; 1625 node = rbtree_search(table->conn_tree, &key); 1626 if(node) 1627 return (struct doq_conn*)node->key; 1628 return NULL; 1629 } 1630 1631 /** find the doq_con by the connection id */ 1632 static struct doq_conn* 1633 doq_conn_find_by_id(struct doq_table* table, const uint8_t* dcid, 1634 size_t dcidlen) 1635 { 1636 struct doq_conid* conid; 1637 lock_rw_rdlock(&table->conid_lock); 1638 conid = doq_conid_find(table, dcid, dcidlen); 1639 if(conid) { 1640 /* make a copy of the key */ 1641 struct doq_conn* conn; 1642 struct doq_conn_key key = conid->key; 1643 uint8_t cid[NGTCP2_MAX_CIDLEN]; 1644 log_assert(conid->key.dcidlen <= NGTCP2_MAX_CIDLEN); 1645 memcpy(cid, conid->key.dcid, conid->key.dcidlen); 1646 key.dcid = cid; 1647 lock_rw_unlock(&table->conid_lock); 1648 1649 /* now that the conid lock is released, look up the conn */ 1650 lock_rw_rdlock(&table->lock); 1651 conn = doq_conn_find(table, &key.paddr.addr, 1652 key.paddr.addrlen, &key.paddr.localaddr, 1653 key.paddr.localaddrlen, key.paddr.ifindex, key.dcid, 1654 key.dcidlen); 1655 if(!conn) { 1656 /* The connection got deleted between the conid lookup 1657 * and the connection lock grab, it no longer exists, 1658 * so return null. */ 1659 lock_rw_unlock(&table->lock); 1660 return NULL; 1661 } 1662 lock_basic_lock(&conn->lock); 1663 if(conn->is_deleted) { 1664 lock_rw_unlock(&table->lock); 1665 lock_basic_unlock(&conn->lock); 1666 return NULL; 1667 } 1668 lock_rw_unlock(&table->lock); 1669 return conn; 1670 } 1671 lock_rw_unlock(&table->conid_lock); 1672 return NULL; 1673 } 1674 1675 /** Find the doq_conn, by addr or by connection id */ 1676 static struct doq_conn* 1677 doq_conn_find_by_addr_or_cid(struct doq_table* table, 1678 struct doq_pkt_addr* paddr, const uint8_t* dcid, size_t dcidlen) 1679 { 1680 struct doq_conn* conn; 1681 lock_rw_rdlock(&table->lock); 1682 conn = doq_conn_find(table, &paddr->addr, paddr->addrlen, 1683 &paddr->localaddr, paddr->localaddrlen, paddr->ifindex, 1684 dcid, dcidlen); 1685 if(conn && conn->is_deleted) { 1686 conn = NULL; 1687 } 1688 if(conn) { 1689 lock_basic_lock(&conn->lock); 1690 lock_rw_unlock(&table->lock); 1691 verbose(VERB_ALGO, "doq: found connection by address, dcid"); 1692 } else { 1693 lock_rw_unlock(&table->lock); 1694 conn = doq_conn_find_by_id(table, dcid, dcidlen); 1695 if(conn) { 1696 verbose(VERB_ALGO, "doq: found connection by dcid"); 1697 } 1698 } 1699 return conn; 1700 } 1701 1702 /** decode doq packet header, false on handled or failure, true to continue 1703 * to process the packet */ 1704 static int 1705 doq_decode_pkt_header_negotiate(struct comm_point* c, 1706 struct doq_pkt_addr* paddr, struct doq_conn** conn) 1707 { 1708 #ifdef HAVE_STRUCT_NGTCP2_VERSION_CID 1709 struct ngtcp2_version_cid vc; 1710 #else 1711 uint32_t version; 1712 const uint8_t *dcid, *scid; 1713 size_t dcidlen, scidlen; 1714 #endif 1715 int rv; 1716 1717 #ifdef HAVE_STRUCT_NGTCP2_VERSION_CID 1718 rv = ngtcp2_pkt_decode_version_cid(&vc, 1719 sldns_buffer_begin(c->doq_socket->pkt_buf), 1720 sldns_buffer_limit(c->doq_socket->pkt_buf), 1721 c->doq_socket->sv_scidlen); 1722 #else 1723 rv = ngtcp2_pkt_decode_version_cid(&version, &dcid, &dcidlen, 1724 &scid, &scidlen, sldns_buffer_begin(c->doq_socket->pkt_buf), 1725 sldns_buffer_limit(c->doq_socket->pkt_buf), c->doq_socket->sv_scidlen); 1726 #endif 1727 if(rv != 0) { 1728 if(rv == NGTCP2_ERR_VERSION_NEGOTIATION) { 1729 /* send the version negotiation */ 1730 doq_send_version_negotiation(c, paddr, 1731 #ifdef HAVE_STRUCT_NGTCP2_VERSION_CID 1732 vc.scid, vc.scidlen, vc.dcid, vc.dcidlen 1733 #else 1734 scid, scidlen, dcid, dcidlen 1735 #endif 1736 ); 1737 return 0; 1738 } 1739 verbose(VERB_ALGO, "doq: could not decode version " 1740 "and CID from QUIC packet header: %s", 1741 ngtcp2_strerror(rv)); 1742 return 0; 1743 } 1744 1745 if(verbosity >= VERB_ALGO) { 1746 verbose(VERB_ALGO, "ngtcp2_pkt_decode_version_cid packet has " 1747 "QUIC protocol version %u", (unsigned) 1748 #ifdef HAVE_STRUCT_NGTCP2_VERSION_CID 1749 vc. 1750 #endif 1751 version 1752 ); 1753 log_hex("dcid", 1754 #ifdef HAVE_STRUCT_NGTCP2_VERSION_CID 1755 (void*)vc.dcid, vc.dcidlen 1756 #else 1757 (void*)dcid, dcidlen 1758 #endif 1759 ); 1760 log_hex("scid", 1761 #ifdef HAVE_STRUCT_NGTCP2_VERSION_CID 1762 (void*)vc.scid, vc.scidlen 1763 #else 1764 (void*)scid, scidlen 1765 #endif 1766 ); 1767 } 1768 *conn = doq_conn_find_by_addr_or_cid(c->doq_socket->table, paddr, 1769 #ifdef HAVE_STRUCT_NGTCP2_VERSION_CID 1770 vc.dcid, vc.dcidlen 1771 #else 1772 dcid, dcidlen 1773 #endif 1774 ); 1775 if(*conn) 1776 (*conn)->doq_socket = c->doq_socket; 1777 return 1; 1778 } 1779 1780 /** fill cid structure with random data */ 1781 static void doq_cid_randfill(struct ngtcp2_cid* cid, size_t datalen, 1782 struct ub_randstate* rnd) 1783 { 1784 uint8_t buf[32]; 1785 if(datalen > sizeof(buf)) 1786 datalen = sizeof(buf); 1787 doq_fill_rand(rnd, buf, datalen); 1788 ngtcp2_cid_init(cid, buf, datalen); 1789 } 1790 1791 /** send retry packet for doq connection. */ 1792 static void 1793 doq_send_retry(struct comm_point* c, struct doq_pkt_addr* paddr, 1794 struct ngtcp2_pkt_hd* hd) 1795 { 1796 char host[256], port[32]; 1797 struct ngtcp2_cid scid; 1798 uint8_t token[NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN]; 1799 ngtcp2_tstamp ts; 1800 ngtcp2_ssize tokenlen, ret; 1801 1802 if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host, 1803 sizeof(host), port, sizeof(port))) { 1804 log_err("doq_send_retry failed"); 1805 return; 1806 } 1807 verbose(VERB_ALGO, "doq: sending retry packet to %s %s", host, port); 1808 1809 /* the server chosen source connection ID */ 1810 scid.datalen = c->doq_socket->sv_scidlen; 1811 doq_cid_randfill(&scid, scid.datalen, c->doq_socket->rnd); 1812 1813 ts = doq_get_timestamp_nanosec(); 1814 1815 tokenlen = ngtcp2_crypto_generate_retry_token(token, 1816 c->doq_socket->static_secret, c->doq_socket->static_secret_len, 1817 hd->version, (void*)&paddr->addr, paddr->addrlen, &scid, 1818 &hd->dcid, ts); 1819 if(tokenlen < 0) { 1820 log_err("ngtcp2_crypto_generate_retry_token failed: %s", 1821 ngtcp2_strerror(tokenlen)); 1822 return; 1823 } 1824 1825 sldns_buffer_clear(c->doq_socket->pkt_buf); 1826 ret = ngtcp2_crypto_write_retry(sldns_buffer_begin(c->doq_socket->pkt_buf), 1827 sldns_buffer_capacity(c->doq_socket->pkt_buf), hd->version, 1828 &hd->scid, &scid, &hd->dcid, token, tokenlen); 1829 if(ret < 0) { 1830 log_err("ngtcp2_crypto_write_retry failed: %s", 1831 ngtcp2_strerror(ret)); 1832 return; 1833 } 1834 sldns_buffer_set_position(c->doq_socket->pkt_buf, ret); 1835 sldns_buffer_flip(c->doq_socket->pkt_buf); 1836 doq_send_pkt(c, paddr, 0); 1837 } 1838 1839 /** doq send stateless connection close */ 1840 static void 1841 doq_send_stateless_connection_close(struct comm_point* c, 1842 struct doq_pkt_addr* paddr, struct ngtcp2_pkt_hd* hd, 1843 uint64_t error_code) 1844 { 1845 ngtcp2_ssize ret; 1846 sldns_buffer_clear(c->doq_socket->pkt_buf); 1847 ret = ngtcp2_crypto_write_connection_close( 1848 sldns_buffer_begin(c->doq_socket->pkt_buf), 1849 sldns_buffer_capacity(c->doq_socket->pkt_buf), hd->version, &hd->scid, 1850 &hd->dcid, error_code, NULL, 0); 1851 if(ret < 0) { 1852 log_err("ngtcp2_crypto_write_connection_close failed: %s", 1853 ngtcp2_strerror(ret)); 1854 return; 1855 } 1856 sldns_buffer_set_position(c->doq_socket->pkt_buf, ret); 1857 sldns_buffer_flip(c->doq_socket->pkt_buf); 1858 doq_send_pkt(c, paddr, 0); 1859 } 1860 1861 /** doq verify retry token, false on failure */ 1862 static int 1863 doq_verify_retry_token(struct comm_point* c, struct doq_pkt_addr* paddr, 1864 struct ngtcp2_cid* ocid, struct ngtcp2_pkt_hd* hd) 1865 { 1866 char host[256], port[32]; 1867 ngtcp2_tstamp ts; 1868 if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host, 1869 sizeof(host), port, sizeof(port))) { 1870 log_err("doq_verify_retry_token failed"); 1871 return 0; 1872 } 1873 ts = doq_get_timestamp_nanosec(); 1874 verbose(VERB_ALGO, "doq: verifying retry token from %s %s", host, 1875 port); 1876 if(ngtcp2_crypto_verify_retry_token(ocid, 1877 #ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN 1878 hd->token, hd->tokenlen, 1879 #else 1880 hd->token.base, hd->token.len, 1881 #endif 1882 c->doq_socket->static_secret, 1883 c->doq_socket->static_secret_len, hd->version, 1884 (void*)&paddr->addr, paddr->addrlen, &hd->dcid, 1885 10*NGTCP2_SECONDS, ts) != 0) { 1886 verbose(VERB_ALGO, "doq: could not verify retry token " 1887 "from %s %s", host, port); 1888 return 0; 1889 } 1890 verbose(VERB_ALGO, "doq: verified retry token from %s %s", host, port); 1891 return 1; 1892 } 1893 1894 /** doq verify token, false on failure */ 1895 static int 1896 doq_verify_token(struct comm_point* c, struct doq_pkt_addr* paddr, 1897 struct ngtcp2_pkt_hd* hd) 1898 { 1899 char host[256], port[32]; 1900 ngtcp2_tstamp ts; 1901 if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host, 1902 sizeof(host), port, sizeof(port))) { 1903 log_err("doq_verify_token failed"); 1904 return 0; 1905 } 1906 ts = doq_get_timestamp_nanosec(); 1907 verbose(VERB_ALGO, "doq: verifying token from %s %s", host, port); 1908 if(ngtcp2_crypto_verify_regular_token( 1909 #ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN 1910 hd->token, hd->tokenlen, 1911 #else 1912 hd->token.base, hd->token.len, 1913 #endif 1914 c->doq_socket->static_secret, c->doq_socket->static_secret_len, 1915 (void*)&paddr->addr, paddr->addrlen, 3600*NGTCP2_SECONDS, 1916 ts) != 0) { 1917 verbose(VERB_ALGO, "doq: could not verify token from %s %s", 1918 host, port); 1919 return 0; 1920 } 1921 verbose(VERB_ALGO, "doq: verified token from %s %s", host, port); 1922 return 1; 1923 } 1924 1925 /** delete and remove from the lookup tree the doq_conn connection */ 1926 static void 1927 doq_delete_connection(struct comm_point* c, struct doq_conn* conn) 1928 { 1929 struct doq_conn copy; 1930 uint8_t cid[NGTCP2_MAX_CIDLEN]; 1931 rbnode_type* node; 1932 if(!conn) 1933 return; 1934 /* Copy the key and set it deleted. */ 1935 conn->is_deleted = 1; 1936 doq_conn_write_disable(conn); 1937 copy.key = conn->key; 1938 log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN); 1939 memcpy(cid, conn->key.dcid, conn->key.dcidlen); 1940 copy.key.dcid = cid; 1941 copy.node.key = © 1942 lock_basic_unlock(&conn->lock); 1943 1944 /* Now get the table lock to delete it from the tree */ 1945 lock_rw_wrlock(&c->doq_socket->table->lock); 1946 node = rbtree_delete(c->doq_socket->table->conn_tree, copy.node.key); 1947 if(node) { 1948 conn = (struct doq_conn*)node->key; 1949 lock_basic_lock(&conn->lock); 1950 doq_conn_write_list_remove(c->doq_socket->table, conn); 1951 if(conn->timer.timer_in_list) { 1952 /* Remove timer from list first, because finding the 1953 * rbnode element of the setlist of same timeouts 1954 * needs tree lookup. Edit the tree structure after 1955 * that lookup. */ 1956 doq_timer_list_remove(c->doq_socket->table, 1957 &conn->timer); 1958 } 1959 if(conn->timer.timer_in_tree) 1960 doq_timer_tree_remove(c->doq_socket->table, 1961 &conn->timer); 1962 } 1963 lock_rw_unlock(&c->doq_socket->table->lock); 1964 if(node) { 1965 lock_basic_unlock(&conn->lock); 1966 doq_table_quic_size_subtract(c->doq_socket->table, 1967 sizeof(*conn)+conn->key.dcidlen); 1968 doq_conn_delete(conn, c->doq_socket->table); 1969 } 1970 } 1971 1972 /** create and setup a new doq connection, to a new destination, or with 1973 * a new dcid. It has a new set of streams. It is inserted in the lookup tree. 1974 * Returns NULL on failure. */ 1975 static struct doq_conn* 1976 doq_setup_new_conn(struct comm_point* c, struct doq_pkt_addr* paddr, 1977 struct ngtcp2_pkt_hd* hd, struct ngtcp2_cid* ocid) 1978 { 1979 struct doq_conn* conn; 1980 if(!doq_table_quic_size_available(c->doq_socket->table, 1981 c->doq_socket->cfg, sizeof(*conn)+hd->dcid.datalen 1982 + sizeof(struct doq_stream) 1983 + 100 /* estimated input query */ 1984 + 1200 /* estimated output query */)) { 1985 verbose(VERB_ALGO, "doq: no mem available for new connection"); 1986 doq_send_stateless_connection_close(c, paddr, hd, 1987 NGTCP2_CONNECTION_REFUSED); 1988 return NULL; 1989 } 1990 conn = doq_conn_create(c, paddr, hd->dcid.data, hd->dcid.datalen, 1991 hd->version); 1992 if(!conn) { 1993 log_err("doq: could not allocate doq_conn"); 1994 return NULL; 1995 } 1996 lock_rw_wrlock(&c->doq_socket->table->lock); 1997 lock_basic_lock(&conn->lock); 1998 if(!rbtree_insert(c->doq_socket->table->conn_tree, &conn->node)) { 1999 lock_rw_unlock(&c->doq_socket->table->lock); 2000 log_err("doq: duplicate connection"); 2001 /* conn has no entry in writelist, and no timer yet. */ 2002 lock_basic_unlock(&conn->lock); 2003 doq_conn_delete(conn, c->doq_socket->table); 2004 return NULL; 2005 } 2006 lock_rw_unlock(&c->doq_socket->table->lock); 2007 doq_table_quic_size_add(c->doq_socket->table, 2008 sizeof(*conn)+conn->key.dcidlen); 2009 verbose(VERB_ALGO, "doq: created new connection"); 2010 2011 /* the scid and dcid switch meaning from the accepted client 2012 * connection to the server connection. The 'source' and 'destination' 2013 * meaning is reversed. */ 2014 if(!doq_conn_setup(conn, hd->scid.data, hd->scid.datalen, 2015 (ocid?ocid->data:NULL), (ocid?ocid->datalen:0), 2016 #ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN 2017 hd->token, hd->tokenlen 2018 #else 2019 hd->token.base, hd->token.len 2020 #endif 2021 )) { 2022 log_err("doq: could not set up connection"); 2023 doq_delete_connection(c, conn); 2024 return NULL; 2025 } 2026 return conn; 2027 } 2028 2029 /** perform doq address validation */ 2030 static int 2031 doq_address_validation(struct comm_point* c, struct doq_pkt_addr* paddr, 2032 struct ngtcp2_pkt_hd* hd, struct ngtcp2_cid* ocid, 2033 struct ngtcp2_cid** pocid) 2034 { 2035 #ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN 2036 const uint8_t* token = hd->token; 2037 size_t tokenlen = hd->tokenlen; 2038 #else 2039 const uint8_t* token = hd->token.base; 2040 size_t tokenlen = hd->token.len; 2041 #endif 2042 verbose(VERB_ALGO, "doq stateless address validation"); 2043 2044 if(tokenlen == 0 || token == NULL) { 2045 doq_send_retry(c, paddr, hd); 2046 return 0; 2047 } 2048 if(token[0] != NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY && 2049 hd->dcid.datalen < NGTCP2_MIN_INITIAL_DCIDLEN) { 2050 doq_send_stateless_connection_close(c, paddr, hd, 2051 NGTCP2_INVALID_TOKEN); 2052 return 0; 2053 } 2054 if(token[0] == NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY) { 2055 if(!doq_verify_retry_token(c, paddr, ocid, hd)) { 2056 doq_send_stateless_connection_close(c, paddr, hd, 2057 NGTCP2_INVALID_TOKEN); 2058 return 0; 2059 } 2060 *pocid = ocid; 2061 } else if(token[0] == NGTCP2_CRYPTO_TOKEN_MAGIC_REGULAR) { 2062 if(!doq_verify_token(c, paddr, hd)) { 2063 doq_send_retry(c, paddr, hd); 2064 return 0; 2065 } 2066 #ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN 2067 hd->token = NULL; 2068 hd->tokenlen = 0; 2069 #else 2070 hd->token.base = NULL; 2071 hd->token.len = 0; 2072 #endif 2073 } else { 2074 verbose(VERB_ALGO, "doq address validation: unrecognised " 2075 "token in hd.token.base with magic byte 0x%2.2x", 2076 (int)token[0]); 2077 if(c->doq_socket->validate_addr) { 2078 doq_send_retry(c, paddr, hd); 2079 return 0; 2080 } 2081 #ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN 2082 hd->token = NULL; 2083 hd->tokenlen = 0; 2084 #else 2085 hd->token.base = NULL; 2086 hd->token.len = 0; 2087 #endif 2088 } 2089 return 1; 2090 } 2091 2092 /** the doq accept, returns false if no further processing of content */ 2093 static int 2094 doq_accept(struct comm_point* c, struct doq_pkt_addr* paddr, 2095 struct doq_conn** conn, struct ngtcp2_pkt_info* pi) 2096 { 2097 int rv; 2098 struct ngtcp2_pkt_hd hd; 2099 struct ngtcp2_cid ocid, *pocid=NULL; 2100 int err_retry; 2101 memset(&hd, 0, sizeof(hd)); 2102 rv = ngtcp2_accept(&hd, sldns_buffer_begin(c->doq_socket->pkt_buf), 2103 sldns_buffer_limit(c->doq_socket->pkt_buf)); 2104 if(rv != 0) { 2105 if(rv == NGTCP2_ERR_RETRY) { 2106 doq_send_retry(c, paddr, &hd); 2107 return 0; 2108 } 2109 log_err("doq: initial packet failed, ngtcp2_accept failed: %s", 2110 ngtcp2_strerror(rv)); 2111 return 0; 2112 } 2113 if(c->doq_socket->validate_addr || 2114 #ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN 2115 hd.tokenlen 2116 #else 2117 hd.token.len 2118 #endif 2119 ) { 2120 if(!doq_address_validation(c, paddr, &hd, &ocid, &pocid)) 2121 return 0; 2122 } 2123 *conn = doq_setup_new_conn(c, paddr, &hd, pocid); 2124 if(!*conn) 2125 return 0; 2126 (*conn)->doq_socket = c->doq_socket; 2127 if(!doq_conn_recv(c, paddr, *conn, pi, &err_retry, NULL)) { 2128 if(err_retry) 2129 doq_send_retry(c, paddr, &hd); 2130 doq_delete_connection(c, *conn); 2131 *conn = NULL; 2132 return 0; 2133 } 2134 return 1; 2135 } 2136 2137 /** doq pickup a timer to wait for for the worker. If any timer exists. */ 2138 static void 2139 doq_pickup_timer(struct comm_point* c) 2140 { 2141 struct doq_timer* t; 2142 struct timeval tv; 2143 int have_time = 0; 2144 memset(&tv, 0, sizeof(tv)); 2145 2146 lock_rw_wrlock(&c->doq_socket->table->lock); 2147 RBTREE_FOR(t, struct doq_timer*, c->doq_socket->table->timer_tree) { 2148 if(t->worker_doq_socket == NULL || 2149 t->worker_doq_socket == c->doq_socket) { 2150 /* pick up this element */ 2151 t->worker_doq_socket = c->doq_socket; 2152 have_time = 1; 2153 memcpy(&tv, &t->time, sizeof(tv)); 2154 break; 2155 } 2156 } 2157 lock_rw_unlock(&c->doq_socket->table->lock); 2158 2159 if(have_time) { 2160 struct timeval rel; 2161 timeval_subtract(&rel, &tv, c->doq_socket->now_tv); 2162 comm_timer_set(c->doq_socket->timer, &rel); 2163 memcpy(&c->doq_socket->marked_time, &tv, 2164 sizeof(c->doq_socket->marked_time)); 2165 verbose(VERB_ALGO, "doq pickup timer at %d.%6.6d in %d.%6.6d", 2166 (int)tv.tv_sec, (int)tv.tv_usec, (int)rel.tv_sec, 2167 (int)rel.tv_usec); 2168 } else { 2169 if(comm_timer_is_set(c->doq_socket->timer)) 2170 comm_timer_disable(c->doq_socket->timer); 2171 memset(&c->doq_socket->marked_time, 0, 2172 sizeof(c->doq_socket->marked_time)); 2173 verbose(VERB_ALGO, "doq timer disabled"); 2174 } 2175 } 2176 2177 /** doq done with connection, release locks and setup timer and write */ 2178 static void 2179 doq_done_setup_timer_and_write(struct comm_point* c, struct doq_conn* conn) 2180 { 2181 struct doq_conn copy; 2182 uint8_t cid[NGTCP2_MAX_CIDLEN]; 2183 rbnode_type* node; 2184 struct timeval new_tv; 2185 int write_change = 0, timer_change = 0; 2186 2187 /* No longer in callbacks, so the pointer to doq_socket is back 2188 * to NULL. */ 2189 conn->doq_socket = NULL; 2190 2191 if(doq_conn_check_timer(conn, &new_tv)) 2192 timer_change = 1; 2193 if( (conn->write_interest && !conn->on_write_list) || 2194 (!conn->write_interest && conn->on_write_list)) 2195 write_change = 1; 2196 2197 if(!timer_change && !write_change) { 2198 /* Nothing to do. */ 2199 lock_basic_unlock(&conn->lock); 2200 return; 2201 } 2202 2203 /* The table lock is needed to change the write list and timer tree. 2204 * So the connection lock is release and then the connection is 2205 * looked up again. */ 2206 copy.key = conn->key; 2207 log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN); 2208 memcpy(cid, conn->key.dcid, conn->key.dcidlen); 2209 copy.key.dcid = cid; 2210 copy.node.key = © 2211 lock_basic_unlock(&conn->lock); 2212 2213 lock_rw_wrlock(&c->doq_socket->table->lock); 2214 node = rbtree_search(c->doq_socket->table->conn_tree, copy.node.key); 2215 if(!node) { 2216 lock_rw_unlock(&c->doq_socket->table->lock); 2217 /* Must have been deleted in the mean time. */ 2218 return; 2219 } 2220 conn = (struct doq_conn*)node->key; 2221 lock_basic_lock(&conn->lock); 2222 if(conn->is_deleted) { 2223 /* It is deleted now. */ 2224 lock_rw_unlock(&c->doq_socket->table->lock); 2225 lock_basic_unlock(&conn->lock); 2226 return; 2227 } 2228 2229 if(write_change) { 2230 /* Edit the write lists, we are holding the table.lock and can 2231 * edit the list first,last and also prev,next and on_list 2232 * elements in the doq_conn structures. */ 2233 doq_conn_set_write_list(c->doq_socket->table, conn); 2234 } 2235 if(timer_change) { 2236 doq_timer_set(c->doq_socket->table, &conn->timer, 2237 c->doq_socket, &new_tv); 2238 } 2239 lock_rw_unlock(&c->doq_socket->table->lock); 2240 lock_basic_unlock(&conn->lock); 2241 } 2242 2243 /** doq done with connection callbacks, release locks and setup write */ 2244 static void 2245 doq_done_with_conn_cb(struct comm_point* c, struct doq_conn* conn) 2246 { 2247 struct doq_conn copy; 2248 uint8_t cid[NGTCP2_MAX_CIDLEN]; 2249 rbnode_type* node; 2250 2251 /* no longer in callbacks, so the pointer to doq_socket is back 2252 * to NULL. */ 2253 conn->doq_socket = NULL; 2254 2255 if( (conn->write_interest && conn->on_write_list) || 2256 (!conn->write_interest && !conn->on_write_list)) { 2257 /* The connection already has the required write list 2258 * status. */ 2259 lock_basic_unlock(&conn->lock); 2260 return; 2261 } 2262 2263 /* To edit the write list of connections we have to hold the table 2264 * lock, so we release the connection and then look it up again. */ 2265 copy.key = conn->key; 2266 log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN); 2267 memcpy(cid, conn->key.dcid, conn->key.dcidlen); 2268 copy.key.dcid = cid; 2269 copy.node.key = © 2270 lock_basic_unlock(&conn->lock); 2271 2272 lock_rw_wrlock(&c->doq_socket->table->lock); 2273 node = rbtree_search(c->doq_socket->table->conn_tree, copy.node.key); 2274 if(!node) { 2275 lock_rw_unlock(&c->doq_socket->table->lock); 2276 /* must have been deleted in the mean time */ 2277 return; 2278 } 2279 conn = (struct doq_conn*)node->key; 2280 lock_basic_lock(&conn->lock); 2281 if(conn->is_deleted) { 2282 /* it is deleted now. */ 2283 lock_rw_unlock(&c->doq_socket->table->lock); 2284 lock_basic_unlock(&conn->lock); 2285 return; 2286 } 2287 2288 /* edit the write lists, we are holding the table.lock and can 2289 * edit the list first,last and also prev,next and on_list elements 2290 * in the doq_conn structures. */ 2291 doq_conn_set_write_list(c->doq_socket->table, conn); 2292 lock_rw_unlock(&c->doq_socket->table->lock); 2293 lock_basic_unlock(&conn->lock); 2294 } 2295 2296 /** doq count the length of the write list */ 2297 static size_t 2298 doq_write_list_length(struct comm_point* c) 2299 { 2300 size_t count = 0; 2301 struct doq_conn* conn; 2302 lock_rw_rdlock(&c->doq_socket->table->lock); 2303 conn = c->doq_socket->table->write_list_first; 2304 while(conn) { 2305 count++; 2306 conn = conn->write_next; 2307 } 2308 lock_rw_unlock(&c->doq_socket->table->lock); 2309 return count; 2310 } 2311 2312 /** doq pop the first element from the write list to have write events */ 2313 static struct doq_conn* 2314 doq_pop_write_conn(struct comm_point* c) 2315 { 2316 struct doq_conn* conn; 2317 lock_rw_wrlock(&c->doq_socket->table->lock); 2318 conn = doq_table_pop_first(c->doq_socket->table); 2319 while(conn && conn->is_deleted) { 2320 lock_basic_unlock(&conn->lock); 2321 conn = doq_table_pop_first(c->doq_socket->table); 2322 } 2323 lock_rw_unlock(&c->doq_socket->table->lock); 2324 if(conn) 2325 conn->doq_socket = c->doq_socket; 2326 return conn; 2327 } 2328 2329 /** doq the connection is done with write callbacks, release it. */ 2330 static void 2331 doq_done_with_write_cb(struct comm_point* c, struct doq_conn* conn, 2332 int delete_it) 2333 { 2334 if(delete_it) { 2335 doq_delete_connection(c, conn); 2336 return; 2337 } 2338 doq_done_setup_timer_and_write(c, conn); 2339 } 2340 2341 /** see if the doq socket wants to write packets */ 2342 static int 2343 doq_socket_want_write(struct comm_point* c) 2344 { 2345 int want_write = 0; 2346 if(c->doq_socket->have_blocked_pkt) 2347 return 1; 2348 lock_rw_rdlock(&c->doq_socket->table->lock); 2349 if(c->doq_socket->table->write_list_first) 2350 want_write = 1; 2351 lock_rw_unlock(&c->doq_socket->table->lock); 2352 return want_write; 2353 } 2354 2355 /** enable write event for the doq server socket fd */ 2356 static void 2357 doq_socket_write_enable(struct comm_point* c) 2358 { 2359 verbose(VERB_ALGO, "doq socket want write"); 2360 if(c->doq_socket->event_has_write) 2361 return; 2362 comm_point_listen_for_rw(c, 1, 1); 2363 c->doq_socket->event_has_write = 1; 2364 } 2365 2366 /** disable write event for the doq server socket fd */ 2367 static void 2368 doq_socket_write_disable(struct comm_point* c) 2369 { 2370 verbose(VERB_ALGO, "doq socket want no write"); 2371 if(!c->doq_socket->event_has_write) 2372 return; 2373 comm_point_listen_for_rw(c, 1, 0); 2374 c->doq_socket->event_has_write = 0; 2375 } 2376 2377 /** write blocked packet, if possible. returns false if failed, again. */ 2378 static int 2379 doq_write_blocked_pkt(struct comm_point* c) 2380 { 2381 struct doq_pkt_addr paddr; 2382 if(!c->doq_socket->have_blocked_pkt) 2383 return 1; 2384 c->doq_socket->have_blocked_pkt = 0; 2385 if(sldns_buffer_limit(c->doq_socket->blocked_pkt) > 2386 sldns_buffer_remaining(c->doq_socket->pkt_buf)) 2387 return 1; /* impossibly large, drop it. 2388 impossible since pkt_buf is same size as blocked_pkt buf. */ 2389 sldns_buffer_clear(c->doq_socket->pkt_buf); 2390 sldns_buffer_write(c->doq_socket->pkt_buf, 2391 sldns_buffer_begin(c->doq_socket->blocked_pkt), 2392 sldns_buffer_limit(c->doq_socket->blocked_pkt)); 2393 sldns_buffer_flip(c->doq_socket->pkt_buf); 2394 memcpy(&paddr, c->doq_socket->blocked_paddr, sizeof(paddr)); 2395 doq_send_pkt(c, &paddr, c->doq_socket->blocked_pkt_pi.ecn); 2396 if(c->doq_socket->have_blocked_pkt) 2397 return 0; 2398 return 1; 2399 } 2400 2401 /** doq find a timer that timeouted and return the conn, locked. */ 2402 static struct doq_conn* 2403 doq_timer_timeout_conn(struct doq_server_socket* doq_socket) 2404 { 2405 struct doq_conn* conn = NULL; 2406 struct rbnode_type* node; 2407 lock_rw_wrlock(&doq_socket->table->lock); 2408 node = rbtree_first(doq_socket->table->timer_tree); 2409 if(node && node != RBTREE_NULL) { 2410 struct doq_timer* t = (struct doq_timer*)node; 2411 conn = t->conn; 2412 2413 /* If now < timer then no further timeouts in tree. */ 2414 if(timeval_smaller(doq_socket->now_tv, &t->time)) { 2415 lock_rw_unlock(&doq_socket->table->lock); 2416 return NULL; 2417 } 2418 2419 lock_basic_lock(&conn->lock); 2420 conn->doq_socket = doq_socket; 2421 2422 /* Now that the timer is fired, remove it. */ 2423 doq_timer_unset(doq_socket->table, t); 2424 lock_rw_unlock(&doq_socket->table->lock); 2425 return conn; 2426 } 2427 lock_rw_unlock(&doq_socket->table->lock); 2428 return NULL; 2429 } 2430 2431 /** doq timer erase the marker that said which timer the worker uses. */ 2432 static void 2433 doq_timer_erase_marker(struct doq_server_socket* doq_socket) 2434 { 2435 struct doq_timer* t; 2436 lock_rw_wrlock(&doq_socket->table->lock); 2437 t = doq_timer_find_time(doq_socket->table, &doq_socket->marked_time); 2438 if(t && t->worker_doq_socket == doq_socket) 2439 t->worker_doq_socket = NULL; 2440 lock_rw_unlock(&doq_socket->table->lock); 2441 memset(&doq_socket->marked_time, 0, sizeof(doq_socket->marked_time)); 2442 } 2443 2444 void 2445 doq_timer_cb(void* arg) 2446 { 2447 struct doq_server_socket* doq_socket = (struct doq_server_socket*)arg; 2448 struct doq_conn* conn; 2449 verbose(VERB_ALGO, "doq timer callback"); 2450 2451 doq_timer_erase_marker(doq_socket); 2452 2453 while((conn = doq_timer_timeout_conn(doq_socket)) != NULL) { 2454 if(conn->is_deleted || 2455 #ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD 2456 ngtcp2_conn_in_closing_period(conn->conn) || 2457 #else 2458 ngtcp2_conn_is_in_closing_period(conn->conn) || 2459 #endif 2460 #ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD 2461 ngtcp2_conn_in_draining_period(conn->conn) 2462 #else 2463 ngtcp2_conn_is_in_draining_period(conn->conn) 2464 #endif 2465 ) { 2466 if(verbosity >= VERB_ALGO) { 2467 char remotestr[256]; 2468 addr_to_str((void*)&conn->key.paddr.addr, 2469 conn->key.paddr.addrlen, remotestr, 2470 sizeof(remotestr)); 2471 verbose(VERB_ALGO, "doq conn %s is deleted " 2472 "after timeout", remotestr); 2473 } 2474 doq_delete_connection(doq_socket->cp, conn); 2475 continue; 2476 } 2477 if(!doq_conn_handle_timeout(conn)) 2478 doq_delete_connection(doq_socket->cp, conn); 2479 else doq_done_setup_timer_and_write(doq_socket->cp, conn); 2480 } 2481 2482 if(doq_socket_want_write(doq_socket->cp)) 2483 doq_socket_write_enable(doq_socket->cp); 2484 else doq_socket_write_disable(doq_socket->cp); 2485 doq_pickup_timer(doq_socket->cp); 2486 } 2487 2488 void 2489 comm_point_doq_callback(int fd, short event, void* arg) 2490 { 2491 struct comm_point* c; 2492 struct doq_pkt_addr paddr; 2493 int i, pkt_continue, err_drop; 2494 struct doq_conn* conn; 2495 struct ngtcp2_pkt_info pi; 2496 size_t count, num_len; 2497 2498 c = (struct comm_point*)arg; 2499 log_assert(c->type == comm_doq); 2500 2501 log_assert(c && c->doq_socket->pkt_buf && c->fd == fd); 2502 ub_comm_base_now(c->ev->base); 2503 2504 /* see if there is a blocked packet, and send that if possible. 2505 * do not attempt to read yet, even if possible, that would just 2506 * push more answers in reply to those read packets onto the list 2507 * of written replies. First attempt to clear the write content out. 2508 * That keeps the memory usage from bloating up. */ 2509 if(c->doq_socket->have_blocked_pkt) { 2510 if(!doq_write_blocked_pkt(c)) { 2511 /* this write has also blocked, attempt to write 2512 * later. Make sure the event listens to write 2513 * events. */ 2514 if(!c->doq_socket->event_has_write) 2515 doq_socket_write_enable(c); 2516 doq_pickup_timer(c); 2517 return; 2518 } 2519 } 2520 2521 /* see if there is write interest */ 2522 count = 0; 2523 num_len = doq_write_list_length(c); 2524 while((conn = doq_pop_write_conn(c)) != NULL) { 2525 if(conn->is_deleted || 2526 #ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD 2527 ngtcp2_conn_in_closing_period(conn->conn) || 2528 #else 2529 ngtcp2_conn_is_in_closing_period(conn->conn) || 2530 #endif 2531 #ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD 2532 ngtcp2_conn_in_draining_period(conn->conn) 2533 #else 2534 ngtcp2_conn_is_in_draining_period(conn->conn) 2535 #endif 2536 ) { 2537 conn->doq_socket = NULL; 2538 lock_basic_unlock(&conn->lock); 2539 if(c->doq_socket->have_blocked_pkt) { 2540 if(!c->doq_socket->event_has_write) 2541 doq_socket_write_enable(c); 2542 doq_pickup_timer(c); 2543 return; 2544 } 2545 if(++count > num_len*2) 2546 break; 2547 continue; 2548 } 2549 if(verbosity >= VERB_ALGO) { 2550 char remotestr[256]; 2551 addr_to_str((void*)&conn->key.paddr.addr, 2552 conn->key.paddr.addrlen, remotestr, 2553 sizeof(remotestr)); 2554 verbose(VERB_ALGO, "doq write connection %s %d", 2555 remotestr, doq_sockaddr_get_port( 2556 &conn->key.paddr.addr)); 2557 } 2558 if(doq_conn_write_streams(c, conn, &err_drop)) 2559 err_drop = 0; 2560 doq_done_with_write_cb(c, conn, err_drop); 2561 if(c->doq_socket->have_blocked_pkt) { 2562 if(!c->doq_socket->event_has_write) 2563 doq_socket_write_enable(c); 2564 doq_pickup_timer(c); 2565 return; 2566 } 2567 /* Stop overly long write lists that are created 2568 * while we are processing. Do those next time there 2569 * is a write callback. Stops long loops, and keeps 2570 * fair for other events. */ 2571 if(++count > num_len*2) 2572 break; 2573 } 2574 2575 /* check for data to read */ 2576 if((event&UB_EV_READ)!=0) 2577 for(i=0; i<NUM_UDP_PER_SELECT; i++) { 2578 /* there may be a blocked write packet and if so, stop 2579 * reading because the reply cannot get written. The 2580 * blocked packet could be written during the conn_recv 2581 * handling of replies, or for a connection close. */ 2582 if(c->doq_socket->have_blocked_pkt) { 2583 if(!c->doq_socket->event_has_write) 2584 doq_socket_write_enable(c); 2585 doq_pickup_timer(c); 2586 return; 2587 } 2588 sldns_buffer_clear(c->doq_socket->pkt_buf); 2589 doq_pkt_addr_init(&paddr); 2590 log_assert(fd != -1); 2591 log_assert(sldns_buffer_remaining(c->doq_socket->pkt_buf) > 0); 2592 if(!doq_recv(c, &paddr, &pkt_continue, &pi)) { 2593 if(pkt_continue) 2594 continue; 2595 break; 2596 } 2597 2598 /* handle incoming packet from remote addr to localaddr */ 2599 if(verbosity >= VERB_ALGO) { 2600 char remotestr[256], localstr[256]; 2601 addr_to_str((void*)&paddr.addr, paddr.addrlen, 2602 remotestr, sizeof(remotestr)); 2603 addr_to_str((void*)&paddr.localaddr, 2604 paddr.localaddrlen, localstr, 2605 sizeof(localstr)); 2606 log_info("incoming doq packet from %s port %d on " 2607 "%s port %d ifindex %d", 2608 remotestr, doq_sockaddr_get_port(&paddr.addr), 2609 localstr, 2610 doq_sockaddr_get_port(&paddr.localaddr), 2611 paddr.ifindex); 2612 log_info("doq_recv length %d ecn 0x%x", 2613 (int)sldns_buffer_limit(c->doq_socket->pkt_buf), 2614 (int)pi.ecn); 2615 } 2616 2617 if(sldns_buffer_limit(c->doq_socket->pkt_buf) == 0) 2618 continue; 2619 2620 conn = NULL; 2621 if(!doq_decode_pkt_header_negotiate(c, &paddr, &conn)) 2622 continue; 2623 if(!conn) { 2624 if(!doq_accept(c, &paddr, &conn, &pi)) 2625 continue; 2626 if(!doq_conn_write_streams(c, conn, NULL)) { 2627 doq_delete_connection(c, conn); 2628 continue; 2629 } 2630 doq_done_setup_timer_and_write(c, conn); 2631 continue; 2632 } 2633 if( 2634 #ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD 2635 ngtcp2_conn_in_closing_period(conn->conn) 2636 #else 2637 ngtcp2_conn_is_in_closing_period(conn->conn) 2638 #endif 2639 ) { 2640 if(!doq_conn_send_close(c, conn)) { 2641 doq_delete_connection(c, conn); 2642 } else { 2643 doq_done_setup_timer_and_write(c, conn); 2644 } 2645 continue; 2646 } 2647 if( 2648 #ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD 2649 ngtcp2_conn_in_draining_period(conn->conn) 2650 #else 2651 ngtcp2_conn_is_in_draining_period(conn->conn) 2652 #endif 2653 ) { 2654 doq_done_setup_timer_and_write(c, conn); 2655 continue; 2656 } 2657 if(!doq_conn_recv(c, &paddr, conn, &pi, NULL, &err_drop)) { 2658 /* The receive failed, and if it also failed to send 2659 * a close, drop the connection. That means it is not 2660 * in the closing period. */ 2661 if(err_drop) { 2662 doq_delete_connection(c, conn); 2663 } else { 2664 doq_done_setup_timer_and_write(c, conn); 2665 } 2666 continue; 2667 } 2668 if(!doq_conn_write_streams(c, conn, &err_drop)) { 2669 if(err_drop) { 2670 doq_delete_connection(c, conn); 2671 } else { 2672 doq_done_setup_timer_and_write(c, conn); 2673 } 2674 continue; 2675 } 2676 doq_done_setup_timer_and_write(c, conn); 2677 } 2678 2679 /* see if we want to have more write events */ 2680 verbose(VERB_ALGO, "doq check write enable"); 2681 if(doq_socket_want_write(c)) 2682 doq_socket_write_enable(c); 2683 else doq_socket_write_disable(c); 2684 doq_pickup_timer(c); 2685 } 2686 2687 /** create new doq server socket structure */ 2688 static struct doq_server_socket* 2689 doq_server_socket_create(struct doq_table* table, struct ub_randstate* rnd, 2690 const char* ssl_service_key, const char* ssl_service_pem, 2691 struct comm_point* c, struct comm_base* base, struct config_file* cfg) 2692 { 2693 size_t doq_buffer_size = 4096; /* bytes buffer size, for one packet. */ 2694 struct doq_server_socket* doq_socket; 2695 doq_socket = calloc(1, sizeof(*doq_socket)); 2696 if(!doq_socket) { 2697 return NULL; 2698 } 2699 doq_socket->table = table; 2700 doq_socket->rnd = rnd; 2701 doq_socket->validate_addr = 1; 2702 if(ssl_service_key == NULL || ssl_service_key[0]==0) { 2703 log_err("doq server socket create: no tls-service-key"); 2704 free(doq_socket); 2705 return NULL; 2706 } 2707 if(ssl_service_pem == NULL || ssl_service_pem[0]==0) { 2708 log_err("doq server socket create: no tls-service-pem"); 2709 free(doq_socket); 2710 return NULL; 2711 } 2712 doq_socket->ssl_service_key = strdup(ssl_service_key); 2713 if(!doq_socket->ssl_service_key) { 2714 free(doq_socket); 2715 return NULL; 2716 } 2717 doq_socket->ssl_service_pem = strdup(ssl_service_pem); 2718 if(!doq_socket->ssl_service_pem) { 2719 free(doq_socket->ssl_service_key); 2720 free(doq_socket); 2721 return NULL; 2722 } 2723 doq_socket->ssl_verify_pem = NULL; 2724 /* the doq_socket has its own copy of the static secret, as 2725 * well as other config values, so that they do not need table.lock */ 2726 doq_socket->static_secret_len = table->static_secret_len; 2727 doq_socket->static_secret = memdup(table->static_secret, 2728 table->static_secret_len); 2729 if(!doq_socket->static_secret) { 2730 free(doq_socket->ssl_service_key); 2731 free(doq_socket->ssl_service_pem); 2732 free(doq_socket->ssl_verify_pem); 2733 free(doq_socket); 2734 return NULL; 2735 } 2736 if(!doq_socket_setup_ctx(doq_socket)) { 2737 free(doq_socket->ssl_service_key); 2738 free(doq_socket->ssl_service_pem); 2739 free(doq_socket->ssl_verify_pem); 2740 free(doq_socket->static_secret); 2741 free(doq_socket); 2742 return NULL; 2743 } 2744 doq_socket->idle_timeout = table->idle_timeout; 2745 doq_socket->sv_scidlen = table->sv_scidlen; 2746 doq_socket->cp = c; 2747 doq_socket->pkt_buf = sldns_buffer_new(doq_buffer_size); 2748 if(!doq_socket->pkt_buf) { 2749 free(doq_socket->ssl_service_key); 2750 free(doq_socket->ssl_service_pem); 2751 free(doq_socket->ssl_verify_pem); 2752 free(doq_socket->static_secret); 2753 SSL_CTX_free(doq_socket->ctx); 2754 free(doq_socket); 2755 return NULL; 2756 } 2757 doq_socket->blocked_pkt = sldns_buffer_new( 2758 sldns_buffer_capacity(doq_socket->pkt_buf)); 2759 if(!doq_socket->pkt_buf) { 2760 free(doq_socket->ssl_service_key); 2761 free(doq_socket->ssl_service_pem); 2762 free(doq_socket->ssl_verify_pem); 2763 free(doq_socket->static_secret); 2764 SSL_CTX_free(doq_socket->ctx); 2765 sldns_buffer_free(doq_socket->pkt_buf); 2766 free(doq_socket); 2767 return NULL; 2768 } 2769 doq_socket->blocked_paddr = calloc(1, 2770 sizeof(*doq_socket->blocked_paddr)); 2771 if(!doq_socket->blocked_paddr) { 2772 free(doq_socket->ssl_service_key); 2773 free(doq_socket->ssl_service_pem); 2774 free(doq_socket->ssl_verify_pem); 2775 free(doq_socket->static_secret); 2776 SSL_CTX_free(doq_socket->ctx); 2777 sldns_buffer_free(doq_socket->pkt_buf); 2778 sldns_buffer_free(doq_socket->blocked_pkt); 2779 free(doq_socket); 2780 return NULL; 2781 } 2782 doq_socket->timer = comm_timer_create(base, doq_timer_cb, doq_socket); 2783 if(!doq_socket->timer) { 2784 free(doq_socket->ssl_service_key); 2785 free(doq_socket->ssl_service_pem); 2786 free(doq_socket->ssl_verify_pem); 2787 free(doq_socket->static_secret); 2788 SSL_CTX_free(doq_socket->ctx); 2789 sldns_buffer_free(doq_socket->pkt_buf); 2790 sldns_buffer_free(doq_socket->blocked_pkt); 2791 free(doq_socket->blocked_paddr); 2792 free(doq_socket); 2793 return NULL; 2794 } 2795 memset(&doq_socket->marked_time, 0, sizeof(doq_socket->marked_time)); 2796 comm_base_timept(base, &doq_socket->now_tt, &doq_socket->now_tv); 2797 doq_socket->cfg = cfg; 2798 return doq_socket; 2799 } 2800 2801 /** delete doq server socket structure */ 2802 static void 2803 doq_server_socket_delete(struct doq_server_socket* doq_socket) 2804 { 2805 if(!doq_socket) 2806 return; 2807 free(doq_socket->static_secret); 2808 SSL_CTX_free(doq_socket->ctx); 2809 #ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT 2810 free(doq_socket->quic_method); 2811 #endif 2812 free(doq_socket->ssl_service_key); 2813 free(doq_socket->ssl_service_pem); 2814 free(doq_socket->ssl_verify_pem); 2815 sldns_buffer_free(doq_socket->pkt_buf); 2816 sldns_buffer_free(doq_socket->blocked_pkt); 2817 free(doq_socket->blocked_paddr); 2818 comm_timer_delete(doq_socket->timer); 2819 free(doq_socket); 2820 } 2821 2822 /** find repinfo in the doq table */ 2823 static struct doq_conn* 2824 doq_lookup_repinfo(struct doq_table* table, struct comm_reply* repinfo) 2825 { 2826 struct doq_conn* conn; 2827 struct doq_conn_key key; 2828 doq_conn_key_from_repinfo(&key, repinfo); 2829 lock_rw_rdlock(&table->lock); 2830 conn = doq_conn_find(table, &key.paddr.addr, 2831 key.paddr.addrlen, &key.paddr.localaddr, 2832 key.paddr.localaddrlen, key.paddr.ifindex, key.dcid, 2833 key.dcidlen); 2834 if(conn) { 2835 lock_basic_lock(&conn->lock); 2836 lock_rw_unlock(&table->lock); 2837 return conn; 2838 } 2839 lock_rw_unlock(&table->lock); 2840 return NULL; 2841 } 2842 2843 /** doq find connection and stream. From inside callbacks from worker. */ 2844 static int 2845 doq_lookup_conn_stream(struct comm_reply* repinfo, struct comm_point* c, 2846 struct doq_conn** conn, struct doq_stream** stream) 2847 { 2848 if(c->doq_socket->current_conn) { 2849 *conn = c->doq_socket->current_conn; 2850 } else { 2851 *conn = doq_lookup_repinfo(c->doq_socket->table, repinfo); 2852 if((*conn) && (*conn)->is_deleted) { 2853 lock_basic_unlock(&(*conn)->lock); 2854 *conn = NULL; 2855 } 2856 if(*conn) { 2857 (*conn)->doq_socket = c->doq_socket; 2858 } 2859 } 2860 if(!*conn) { 2861 *stream = NULL; 2862 return 0; 2863 } 2864 *stream = doq_stream_find(*conn, repinfo->doq_streamid); 2865 if(!*stream) { 2866 if(!c->doq_socket->current_conn) { 2867 /* Not inside callbacks, we have our own lock on conn. 2868 * Release it. */ 2869 lock_basic_unlock(&(*conn)->lock); 2870 } 2871 return 0; 2872 } 2873 if((*stream)->is_closed) { 2874 /* stream is closed, ignore reply or drop */ 2875 if(!c->doq_socket->current_conn) { 2876 /* Not inside callbacks, we have our own lock on conn. 2877 * Release it. */ 2878 lock_basic_unlock(&(*conn)->lock); 2879 } 2880 return 0; 2881 } 2882 return 1; 2883 } 2884 2885 /** doq send a reply from a comm reply */ 2886 static void 2887 doq_socket_send_reply(struct comm_reply* repinfo) 2888 { 2889 struct doq_conn* conn; 2890 struct doq_stream* stream; 2891 log_assert(repinfo->c->type == comm_doq); 2892 if(!doq_lookup_conn_stream(repinfo, repinfo->c, &conn, &stream)) { 2893 verbose(VERB_ALGO, "doq: send_reply but %s is gone", 2894 (conn?"stream":"connection")); 2895 /* No stream, it may have been closed. */ 2896 /* Drop the reply, it cannot be sent. */ 2897 return; 2898 } 2899 if(!doq_stream_send_reply(conn, stream, repinfo->c->buffer)) 2900 doq_stream_close(conn, stream, 1); 2901 if(!repinfo->c->doq_socket->current_conn) { 2902 /* Not inside callbacks, we have our own lock on conn. 2903 * Release it. */ 2904 doq_done_with_conn_cb(repinfo->c, conn); 2905 /* since we sent a reply, or closed it, the assumption is 2906 * that there is something to write, so enable write event. 2907 * It waits until the write event happens to write the 2908 * streams with answers, this allows some answers to be 2909 * answered before the event loop reaches the doq fd, in 2910 * repinfo->c->fd, and that collates answers. That would 2911 * not happen if we write doq packets right now. */ 2912 doq_socket_write_enable(repinfo->c); 2913 } 2914 } 2915 2916 /** doq drop a reply from a comm reply */ 2917 static void 2918 doq_socket_drop_reply(struct comm_reply* repinfo) 2919 { 2920 struct doq_conn* conn; 2921 struct doq_stream* stream; 2922 log_assert(repinfo->c->type == comm_doq); 2923 if(!doq_lookup_conn_stream(repinfo, repinfo->c, &conn, &stream)) { 2924 verbose(VERB_ALGO, "doq: drop_reply but %s is gone", 2925 (conn?"stream":"connection")); 2926 /* The connection or stream is already gone. */ 2927 return; 2928 } 2929 doq_stream_close(conn, stream, 1); 2930 if(!repinfo->c->doq_socket->current_conn) { 2931 /* Not inside callbacks, we have our own lock on conn. 2932 * Release it. */ 2933 doq_done_with_conn_cb(repinfo->c, conn); 2934 doq_socket_write_enable(repinfo->c); 2935 } 2936 } 2937 #endif /* HAVE_NGTCP2 */ 2938 2939 int adjusted_tcp_timeout(struct comm_point* c) 2940 { 2941 if(c->tcp_timeout_msec < TCP_QUERY_TIMEOUT_MINIMUM) 2942 return TCP_QUERY_TIMEOUT_MINIMUM; 2943 return c->tcp_timeout_msec; 2944 } 2945 2946 /** Use a new tcp handler for new query fd, set to read query */ 2947 static void 2948 setup_tcp_handler(struct comm_point* c, int fd, int cur, int max) 2949 { 2950 int handler_usage; 2951 log_assert(c->type == comm_tcp || c->type == comm_http); 2952 log_assert(c->fd == -1); 2953 sldns_buffer_clear(c->buffer); 2954 #ifdef USE_DNSCRYPT 2955 if (c->dnscrypt) 2956 sldns_buffer_clear(c->dnscrypt_buffer); 2957 #endif 2958 c->tcp_is_reading = 1; 2959 c->tcp_byte_count = 0; 2960 c->tcp_keepalive = 0; 2961 /* if more than half the tcp handlers are in use, use a shorter 2962 * timeout for this TCP connection, we need to make space for 2963 * other connections to be able to get attention */ 2964 /* If > 50% TCP handler structures in use, set timeout to 1/100th 2965 * configured value. 2966 * If > 65%TCP handler structures in use, set to 1/500th configured 2967 * value. 2968 * If > 80% TCP handler structures in use, set to 0. 2969 * 2970 * If the timeout to use falls below 200 milliseconds, an actual 2971 * timeout of 200ms is used. 2972 */ 2973 handler_usage = (cur * 100) / max; 2974 if(handler_usage > 50 && handler_usage <= 65) 2975 c->tcp_timeout_msec /= 100; 2976 else if (handler_usage > 65 && handler_usage <= 80) 2977 c->tcp_timeout_msec /= 500; 2978 else if (handler_usage > 80) 2979 c->tcp_timeout_msec = 0; 2980 comm_point_start_listening(c, fd, adjusted_tcp_timeout(c)); 2981 } 2982 2983 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd), 2984 short ATTR_UNUSED(event), void* arg) 2985 { 2986 struct comm_base* b = (struct comm_base*)arg; 2987 /* timeout for the slow accept, re-enable accepts again */ 2988 if(b->start_accept) { 2989 verbose(VERB_ALGO, "wait is over, slow accept disabled"); 2990 fptr_ok(fptr_whitelist_start_accept(b->start_accept)); 2991 (*b->start_accept)(b->cb_arg); 2992 b->eb->slow_accept_enabled = 0; 2993 } 2994 } 2995 2996 int comm_point_perform_accept(struct comm_point* c, 2997 struct sockaddr_storage* addr, socklen_t* addrlen) 2998 { 2999 int new_fd; 3000 *addrlen = (socklen_t)sizeof(*addr); 3001 #ifndef HAVE_ACCEPT4 3002 new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen); 3003 #else 3004 /* SOCK_NONBLOCK saves extra calls to fcntl for the same result */ 3005 new_fd = accept4(c->fd, (struct sockaddr*)addr, addrlen, SOCK_NONBLOCK); 3006 #endif 3007 if(new_fd == -1) { 3008 #ifndef USE_WINSOCK 3009 /* EINTR is signal interrupt. others are closed connection. */ 3010 if( errno == EINTR || errno == EAGAIN 3011 #ifdef EWOULDBLOCK 3012 || errno == EWOULDBLOCK 3013 #endif 3014 #ifdef ECONNABORTED 3015 || errno == ECONNABORTED 3016 #endif 3017 #ifdef EPROTO 3018 || errno == EPROTO 3019 #endif /* EPROTO */ 3020 ) 3021 return -1; 3022 #if defined(ENFILE) && defined(EMFILE) 3023 if(errno == ENFILE || errno == EMFILE) { 3024 /* out of file descriptors, likely outside of our 3025 * control. stop accept() calls for some time */ 3026 if(c->ev->base->stop_accept) { 3027 struct comm_base* b = c->ev->base; 3028 struct timeval tv; 3029 verbose(VERB_ALGO, "out of file descriptors: " 3030 "slow accept"); 3031 ub_comm_base_now(b); 3032 if(b->eb->last_slow_log+SLOW_LOG_TIME <= 3033 b->eb->secs) { 3034 b->eb->last_slow_log = b->eb->secs; 3035 verbose(VERB_OPS, "accept failed, " 3036 "slow down accept for %d " 3037 "msec: %s", 3038 NETEVENT_SLOW_ACCEPT_TIME, 3039 sock_strerror(errno)); 3040 } 3041 b->eb->slow_accept_enabled = 1; 3042 fptr_ok(fptr_whitelist_stop_accept( 3043 b->stop_accept)); 3044 (*b->stop_accept)(b->cb_arg); 3045 /* set timeout, no mallocs */ 3046 tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000; 3047 tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000; 3048 b->eb->slow_accept = ub_event_new(b->eb->base, 3049 -1, UB_EV_TIMEOUT, 3050 comm_base_handle_slow_accept, b); 3051 if(b->eb->slow_accept == NULL) { 3052 /* we do not want to log here, because 3053 * that would spam the logfiles. 3054 * error: "event_base_set failed." */ 3055 } 3056 else if(ub_event_add(b->eb->slow_accept, &tv) 3057 != 0) { 3058 /* we do not want to log here, 3059 * error: "event_add failed." */ 3060 } 3061 } else { 3062 log_err("accept, with no slow down, " 3063 "failed: %s", sock_strerror(errno)); 3064 } 3065 return -1; 3066 } 3067 #endif 3068 #else /* USE_WINSOCK */ 3069 if(WSAGetLastError() == WSAEINPROGRESS || 3070 WSAGetLastError() == WSAECONNRESET) 3071 return -1; 3072 if(WSAGetLastError() == WSAEWOULDBLOCK) { 3073 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 3074 return -1; 3075 } 3076 #endif 3077 log_err_addr("accept failed", sock_strerror(errno), addr, 3078 *addrlen); 3079 return -1; 3080 } 3081 if(c->tcp_conn_limit && c->type == comm_tcp_accept) { 3082 c->tcl_addr = tcl_addr_lookup(c->tcp_conn_limit, addr, *addrlen); 3083 if(!tcl_new_connection(c->tcl_addr)) { 3084 if(verbosity >= 3) 3085 log_err_addr("accept rejected", 3086 "connection limit exceeded", addr, *addrlen); 3087 close(new_fd); 3088 return -1; 3089 } 3090 } 3091 #ifndef HAVE_ACCEPT4 3092 fd_set_nonblock(new_fd); 3093 #endif 3094 return new_fd; 3095 } 3096 3097 #ifdef USE_WINSOCK 3098 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp), 3099 #ifdef HAVE_BIO_SET_CALLBACK_EX 3100 size_t ATTR_UNUSED(len), 3101 #endif 3102 int ATTR_UNUSED(argi), long argl, 3103 #ifndef HAVE_BIO_SET_CALLBACK_EX 3104 long retvalue 3105 #else 3106 int retvalue, size_t* ATTR_UNUSED(processed) 3107 #endif 3108 ) 3109 { 3110 int wsa_err = WSAGetLastError(); /* store errcode before it is gone */ 3111 verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper, 3112 (oper&BIO_CB_RETURN)?"return":"before", 3113 (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"), 3114 wsa_err==WSAEWOULDBLOCK?"wsawb":""); 3115 /* on windows, check if previous operation caused EWOULDBLOCK */ 3116 if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) || 3117 (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) { 3118 if(wsa_err == WSAEWOULDBLOCK) 3119 ub_winsock_tcp_wouldblock((struct ub_event*) 3120 BIO_get_callback_arg(b), UB_EV_READ); 3121 } 3122 if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) || 3123 (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) { 3124 if(wsa_err == WSAEWOULDBLOCK) 3125 ub_winsock_tcp_wouldblock((struct ub_event*) 3126 BIO_get_callback_arg(b), UB_EV_WRITE); 3127 } 3128 /* return original return value */ 3129 return retvalue; 3130 } 3131 3132 /** set win bio callbacks for nonblocking operations */ 3133 void 3134 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl) 3135 { 3136 SSL* ssl = (SSL*)thessl; 3137 /* set them both just in case, but usually they are the same BIO */ 3138 #ifdef HAVE_BIO_SET_CALLBACK_EX 3139 BIO_set_callback_ex(SSL_get_rbio(ssl), &win_bio_cb); 3140 #else 3141 BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb); 3142 #endif 3143 BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)c->ev->ev); 3144 #ifdef HAVE_BIO_SET_CALLBACK_EX 3145 BIO_set_callback_ex(SSL_get_wbio(ssl), &win_bio_cb); 3146 #else 3147 BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb); 3148 #endif 3149 BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)c->ev->ev); 3150 } 3151 #endif 3152 3153 #ifdef HAVE_NGHTTP2 3154 /** Create http2 session server. Per connection, after TCP accepted.*/ 3155 static int http2_session_server_create(struct http2_session* h2_session) 3156 { 3157 log_assert(h2_session->callbacks); 3158 h2_session->is_drop = 0; 3159 if(nghttp2_session_server_new(&h2_session->session, 3160 h2_session->callbacks, 3161 h2_session) == NGHTTP2_ERR_NOMEM) { 3162 log_err("failed to create nghttp2 session server"); 3163 return 0; 3164 } 3165 3166 return 1; 3167 } 3168 3169 /** Submit http2 setting to session. Once per session. */ 3170 static int http2_submit_settings(struct http2_session* h2_session) 3171 { 3172 int ret; 3173 nghttp2_settings_entry settings[1] = { 3174 {NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, 3175 h2_session->c->http2_max_streams}}; 3176 3177 ret = nghttp2_submit_settings(h2_session->session, NGHTTP2_FLAG_NONE, 3178 settings, 1); 3179 if(ret) { 3180 verbose(VERB_QUERY, "http2: submit_settings failed, " 3181 "error: %s", nghttp2_strerror(ret)); 3182 return 0; 3183 } 3184 return 1; 3185 } 3186 #endif /* HAVE_NGHTTP2 */ 3187 3188 3189 void 3190 comm_point_tcp_accept_callback(int fd, short event, void* arg) 3191 { 3192 struct comm_point* c = (struct comm_point*)arg, *c_hdl; 3193 int new_fd; 3194 log_assert(c->type == comm_tcp_accept); 3195 if(!(event & UB_EV_READ)) { 3196 log_info("ignoring tcp accept event %d", (int)event); 3197 return; 3198 } 3199 ub_comm_base_now(c->ev->base); 3200 /* find free tcp handler. */ 3201 if(!c->tcp_free) { 3202 log_warn("accepted too many tcp, connections full"); 3203 return; 3204 } 3205 /* accept incoming connection. */ 3206 c_hdl = c->tcp_free; 3207 /* clear leftover flags from previous use, and then set the 3208 * correct event base for the event structure for libevent */ 3209 ub_event_free(c_hdl->ev->ev); 3210 c_hdl->ev->ev = NULL; 3211 if((c_hdl->type == comm_tcp && c_hdl->tcp_req_info) || 3212 c_hdl->type == comm_local || c_hdl->type == comm_raw) 3213 c_hdl->tcp_do_toggle_rw = 0; 3214 else c_hdl->tcp_do_toggle_rw = 1; 3215 3216 if(c_hdl->type == comm_http) { 3217 #ifdef HAVE_NGHTTP2 3218 if(!c_hdl->h2_session || 3219 !http2_session_server_create(c_hdl->h2_session)) { 3220 log_warn("failed to create nghttp2"); 3221 return; 3222 } 3223 if(!c_hdl->h2_session || 3224 !http2_submit_settings(c_hdl->h2_session)) { 3225 log_warn("failed to submit http2 settings"); 3226 return; 3227 } 3228 if(!c->ssl) { 3229 c_hdl->tcp_do_toggle_rw = 0; 3230 c_hdl->use_h2 = 1; 3231 } 3232 #endif 3233 c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1, 3234 UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT, 3235 comm_point_http_handle_callback, c_hdl); 3236 } else { 3237 c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1, 3238 UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT, 3239 comm_point_tcp_handle_callback, c_hdl); 3240 } 3241 if(!c_hdl->ev->ev) { 3242 log_warn("could not ub_event_new, dropped tcp"); 3243 return; 3244 } 3245 log_assert(fd != -1); 3246 (void)fd; 3247 new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.remote_addr, 3248 &c_hdl->repinfo.remote_addrlen); 3249 if(new_fd == -1) 3250 return; 3251 /* Copy remote_address to client_address. 3252 * Simplest way/time for streams to do that. */ 3253 c_hdl->repinfo.client_addrlen = c_hdl->repinfo.remote_addrlen; 3254 memmove(&c_hdl->repinfo.client_addr, 3255 &c_hdl->repinfo.remote_addr, 3256 c_hdl->repinfo.remote_addrlen); 3257 if(c->ssl) { 3258 c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd); 3259 if(!c_hdl->ssl) { 3260 c_hdl->fd = new_fd; 3261 comm_point_close(c_hdl); 3262 return; 3263 } 3264 c_hdl->ssl_shake_state = comm_ssl_shake_read; 3265 #ifdef USE_WINSOCK 3266 comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl); 3267 #endif 3268 } 3269 3270 /* grab the tcp handler buffers */ 3271 c->cur_tcp_count++; 3272 c->tcp_free = c_hdl->tcp_free; 3273 c_hdl->tcp_free = NULL; 3274 if(!c->tcp_free) { 3275 /* stop accepting incoming queries for now. */ 3276 comm_point_stop_listening(c); 3277 } 3278 setup_tcp_handler(c_hdl, new_fd, c->cur_tcp_count, c->max_tcp_count); 3279 } 3280 3281 /** Make tcp handler free for next assignment */ 3282 static void 3283 reclaim_tcp_handler(struct comm_point* c) 3284 { 3285 log_assert(c->type == comm_tcp); 3286 if(c->ssl) { 3287 #ifdef HAVE_SSL 3288 SSL_shutdown(c->ssl); 3289 SSL_free(c->ssl); 3290 c->ssl = NULL; 3291 #endif 3292 } 3293 comm_point_close(c); 3294 if(c->tcp_parent) { 3295 if(c != c->tcp_parent->tcp_free) { 3296 c->tcp_parent->cur_tcp_count--; 3297 c->tcp_free = c->tcp_parent->tcp_free; 3298 c->tcp_parent->tcp_free = c; 3299 } 3300 if(!c->tcp_free) { 3301 /* re-enable listening on accept socket */ 3302 comm_point_start_listening(c->tcp_parent, -1, -1); 3303 } 3304 } 3305 c->tcp_more_read_again = NULL; 3306 c->tcp_more_write_again = NULL; 3307 c->tcp_byte_count = 0; 3308 c->pp2_header_state = pp2_header_none; 3309 sldns_buffer_clear(c->buffer); 3310 } 3311 3312 /** do the callback when writing is done */ 3313 static void 3314 tcp_callback_writer(struct comm_point* c) 3315 { 3316 log_assert(c->type == comm_tcp); 3317 if(!c->tcp_write_and_read) { 3318 sldns_buffer_clear(c->buffer); 3319 c->tcp_byte_count = 0; 3320 } 3321 if(c->tcp_do_toggle_rw) 3322 c->tcp_is_reading = 1; 3323 /* switch from listening(write) to listening(read) */ 3324 if(c->tcp_req_info) { 3325 tcp_req_info_handle_writedone(c->tcp_req_info); 3326 } else { 3327 comm_point_stop_listening(c); 3328 if(c->tcp_write_and_read) { 3329 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3330 if( (*c->callback)(c, c->cb_arg, NETEVENT_PKT_WRITTEN, 3331 &c->repinfo) ) { 3332 comm_point_start_listening(c, -1, 3333 adjusted_tcp_timeout(c)); 3334 } 3335 } else { 3336 comm_point_start_listening(c, -1, 3337 adjusted_tcp_timeout(c)); 3338 } 3339 } 3340 } 3341 3342 /** do the callback when reading is done */ 3343 static void 3344 tcp_callback_reader(struct comm_point* c) 3345 { 3346 log_assert(c->type == comm_tcp || c->type == comm_local); 3347 sldns_buffer_flip(c->buffer); 3348 if(c->tcp_do_toggle_rw) 3349 c->tcp_is_reading = 0; 3350 c->tcp_byte_count = 0; 3351 if(c->tcp_req_info) { 3352 tcp_req_info_handle_readdone(c->tcp_req_info); 3353 } else { 3354 if(c->type == comm_tcp) 3355 comm_point_stop_listening(c); 3356 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3357 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) { 3358 comm_point_start_listening(c, -1, 3359 adjusted_tcp_timeout(c)); 3360 } 3361 } 3362 } 3363 3364 #ifdef HAVE_SSL 3365 /** true if the ssl handshake error has to be squelched from the logs */ 3366 int 3367 squelch_err_ssl_handshake(unsigned long err) 3368 { 3369 if(verbosity >= VERB_QUERY) 3370 return 0; /* only squelch on low verbosity */ 3371 if(ERR_GET_LIB(err) == ERR_LIB_SSL && 3372 (ERR_GET_REASON(err) == SSL_R_HTTPS_PROXY_REQUEST || 3373 ERR_GET_REASON(err) == SSL_R_HTTP_REQUEST || 3374 ERR_GET_REASON(err) == SSL_R_WRONG_VERSION_NUMBER || 3375 ERR_GET_REASON(err) == SSL_R_SSLV3_ALERT_BAD_CERTIFICATE 3376 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO 3377 || ERR_GET_REASON(err) == SSL_R_NO_SHARED_CIPHER 3378 #endif 3379 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO 3380 || ERR_GET_REASON(err) == SSL_R_UNKNOWN_PROTOCOL 3381 || ERR_GET_REASON(err) == SSL_R_UNSUPPORTED_PROTOCOL 3382 # ifdef SSL_R_VERSION_TOO_LOW 3383 || ERR_GET_REASON(err) == SSL_R_VERSION_TOO_LOW 3384 # endif 3385 #endif 3386 )) 3387 return 1; 3388 return 0; 3389 } 3390 #endif /* HAVE_SSL */ 3391 3392 /** continue ssl handshake */ 3393 #ifdef HAVE_SSL 3394 static int 3395 ssl_handshake(struct comm_point* c) 3396 { 3397 int r; 3398 if(c->ssl_shake_state == comm_ssl_shake_hs_read) { 3399 /* read condition satisfied back to writing */ 3400 comm_point_listen_for_rw(c, 0, 1); 3401 c->ssl_shake_state = comm_ssl_shake_none; 3402 return 1; 3403 } 3404 if(c->ssl_shake_state == comm_ssl_shake_hs_write) { 3405 /* write condition satisfied, back to reading */ 3406 comm_point_listen_for_rw(c, 1, 0); 3407 c->ssl_shake_state = comm_ssl_shake_none; 3408 return 1; 3409 } 3410 3411 ERR_clear_error(); 3412 r = SSL_do_handshake(c->ssl); 3413 if(r != 1) { 3414 int want = SSL_get_error(c->ssl, r); 3415 if(want == SSL_ERROR_WANT_READ) { 3416 if(c->ssl_shake_state == comm_ssl_shake_read) 3417 return 1; 3418 c->ssl_shake_state = comm_ssl_shake_read; 3419 comm_point_listen_for_rw(c, 1, 0); 3420 return 1; 3421 } else if(want == SSL_ERROR_WANT_WRITE) { 3422 if(c->ssl_shake_state == comm_ssl_shake_write) 3423 return 1; 3424 c->ssl_shake_state = comm_ssl_shake_write; 3425 comm_point_listen_for_rw(c, 0, 1); 3426 return 1; 3427 } else if(r == 0) { 3428 return 0; /* closed */ 3429 } else if(want == SSL_ERROR_SYSCALL) { 3430 /* SYSCALL and errno==0 means closed uncleanly */ 3431 #ifdef EPIPE 3432 if(errno == EPIPE && verbosity < 2) 3433 return 0; /* silence 'broken pipe' */ 3434 #endif 3435 #ifdef ECONNRESET 3436 if(errno == ECONNRESET && verbosity < 2) 3437 return 0; /* silence reset by peer */ 3438 #endif 3439 if(!tcp_connect_errno_needs_log( 3440 (struct sockaddr*)&c->repinfo.remote_addr, 3441 c->repinfo.remote_addrlen)) 3442 return 0; /* silence connect failures that 3443 show up because after connect this is the 3444 first system call that accesses the socket */ 3445 if(errno != 0) 3446 log_err("SSL_handshake syscall: %s", 3447 strerror(errno)); 3448 return 0; 3449 } else { 3450 unsigned long err = ERR_get_error(); 3451 if(!squelch_err_ssl_handshake(err)) { 3452 long vr; 3453 log_crypto_err_io_code("ssl handshake failed", 3454 want, err); 3455 if((vr=SSL_get_verify_result(c->ssl)) != 0) 3456 log_err("ssl handshake cert error: %s", 3457 X509_verify_cert_error_string( 3458 vr)); 3459 log_addr(VERB_OPS, "ssl handshake failed", 3460 &c->repinfo.remote_addr, 3461 c->repinfo.remote_addrlen); 3462 } 3463 return 0; 3464 } 3465 } 3466 /* this is where peer verification could take place */ 3467 if((SSL_get_verify_mode(c->ssl)&SSL_VERIFY_PEER)) { 3468 /* verification */ 3469 if(SSL_get_verify_result(c->ssl) == X509_V_OK) { 3470 #ifdef HAVE_SSL_GET1_PEER_CERTIFICATE 3471 X509* x = SSL_get1_peer_certificate(c->ssl); 3472 #else 3473 X509* x = SSL_get_peer_certificate(c->ssl); 3474 #endif 3475 if(!x) { 3476 log_addr(VERB_ALGO, "SSL connection failed: " 3477 "no certificate", 3478 &c->repinfo.remote_addr, 3479 c->repinfo.remote_addrlen); 3480 return 0; 3481 } 3482 log_cert(VERB_ALGO, "peer certificate", x); 3483 #ifdef HAVE_SSL_GET0_PEERNAME 3484 if(SSL_get0_peername(c->ssl)) { 3485 char buf[255]; 3486 snprintf(buf, sizeof(buf), "SSL connection " 3487 "to %s authenticated", 3488 SSL_get0_peername(c->ssl)); 3489 log_addr(VERB_ALGO, buf, &c->repinfo.remote_addr, 3490 c->repinfo.remote_addrlen); 3491 } else { 3492 #endif 3493 log_addr(VERB_ALGO, "SSL connection " 3494 "authenticated", &c->repinfo.remote_addr, 3495 c->repinfo.remote_addrlen); 3496 #ifdef HAVE_SSL_GET0_PEERNAME 3497 } 3498 #endif 3499 X509_free(x); 3500 } else { 3501 #ifdef HAVE_SSL_GET1_PEER_CERTIFICATE 3502 X509* x = SSL_get1_peer_certificate(c->ssl); 3503 #else 3504 X509* x = SSL_get_peer_certificate(c->ssl); 3505 #endif 3506 if(x) { 3507 log_cert(VERB_ALGO, "peer certificate", x); 3508 X509_free(x); 3509 } 3510 log_addr(VERB_ALGO, "SSL connection failed: " 3511 "failed to authenticate", 3512 &c->repinfo.remote_addr, 3513 c->repinfo.remote_addrlen); 3514 return 0; 3515 } 3516 } else { 3517 /* unauthenticated, the verify peer flag was not set 3518 * in c->ssl when the ssl object was created from ssl_ctx */ 3519 log_addr(VERB_ALGO, "SSL connection", &c->repinfo.remote_addr, 3520 c->repinfo.remote_addrlen); 3521 } 3522 3523 #ifdef HAVE_SSL_GET0_ALPN_SELECTED 3524 /* check if http2 use is negotiated */ 3525 if(c->type == comm_http && c->h2_session) { 3526 const unsigned char *alpn; 3527 unsigned int alpnlen = 0; 3528 SSL_get0_alpn_selected(c->ssl, &alpn, &alpnlen); 3529 if(alpnlen == 2 && memcmp("h2", alpn, 2) == 0) { 3530 /* connection upgraded to HTTP2 */ 3531 c->tcp_do_toggle_rw = 0; 3532 c->use_h2 = 1; 3533 } else { 3534 verbose(VERB_ALGO, "client doesn't support HTTP/2"); 3535 return 0; 3536 } 3537 } 3538 #endif 3539 3540 /* setup listen rw correctly */ 3541 if(c->tcp_is_reading) { 3542 if(c->ssl_shake_state != comm_ssl_shake_read) 3543 comm_point_listen_for_rw(c, 1, 0); 3544 } else { 3545 comm_point_listen_for_rw(c, 0, 1); 3546 } 3547 c->ssl_shake_state = comm_ssl_shake_none; 3548 return 1; 3549 } 3550 #endif /* HAVE_SSL */ 3551 3552 /** ssl read callback on TCP */ 3553 static int 3554 ssl_handle_read(struct comm_point* c) 3555 { 3556 #ifdef HAVE_SSL 3557 int r; 3558 if(c->ssl_shake_state != comm_ssl_shake_none) { 3559 if(!ssl_handshake(c)) 3560 return 0; 3561 if(c->ssl_shake_state != comm_ssl_shake_none) 3562 return 1; 3563 } 3564 if(c->pp2_enabled && c->pp2_header_state != pp2_header_done) { 3565 struct pp2_header* header = NULL; 3566 size_t want_read_size = 0; 3567 size_t current_read_size = 0; 3568 if(c->pp2_header_state == pp2_header_none) { 3569 want_read_size = PP2_HEADER_SIZE; 3570 if(sldns_buffer_remaining(c->buffer)<want_read_size) { 3571 log_err_addr("proxy_protocol: not enough " 3572 "buffer size to read PROXYv2 header", "", 3573 &c->repinfo.remote_addr, 3574 c->repinfo.remote_addrlen); 3575 return 0; 3576 } 3577 verbose(VERB_ALGO, "proxy_protocol: reading fixed " 3578 "part of PROXYv2 header (len %lu)", 3579 (unsigned long)want_read_size); 3580 current_read_size = want_read_size; 3581 if(c->tcp_byte_count < current_read_size) { 3582 ERR_clear_error(); 3583 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at( 3584 c->buffer, c->tcp_byte_count), 3585 current_read_size - 3586 c->tcp_byte_count)) <= 0) { 3587 int want = SSL_get_error(c->ssl, r); 3588 if(want == SSL_ERROR_ZERO_RETURN) { 3589 if(c->tcp_req_info) 3590 return tcp_req_info_handle_read_close(c->tcp_req_info); 3591 return 0; /* shutdown, closed */ 3592 } else if(want == SSL_ERROR_WANT_READ) { 3593 #ifdef USE_WINSOCK 3594 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 3595 #endif 3596 return 1; /* read more later */ 3597 } else if(want == SSL_ERROR_WANT_WRITE) { 3598 c->ssl_shake_state = comm_ssl_shake_hs_write; 3599 comm_point_listen_for_rw(c, 0, 1); 3600 return 1; 3601 } else if(want == SSL_ERROR_SYSCALL) { 3602 #ifdef ECONNRESET 3603 if(errno == ECONNRESET && verbosity < 2) 3604 return 0; /* silence reset by peer */ 3605 #endif 3606 if(errno != 0) 3607 log_err("SSL_read syscall: %s", 3608 strerror(errno)); 3609 return 0; 3610 } 3611 log_crypto_err_io("could not SSL_read", 3612 want); 3613 return 0; 3614 } 3615 c->tcp_byte_count += r; 3616 sldns_buffer_skip(c->buffer, r); 3617 if(c->tcp_byte_count != current_read_size) return 1; 3618 c->pp2_header_state = pp2_header_init; 3619 } 3620 } 3621 if(c->pp2_header_state == pp2_header_init) { 3622 int err; 3623 err = pp2_read_header( 3624 sldns_buffer_begin(c->buffer), 3625 sldns_buffer_limit(c->buffer)); 3626 if(err) { 3627 log_err("proxy_protocol: could not parse " 3628 "PROXYv2 header (%s)", 3629 pp_lookup_error(err)); 3630 return 0; 3631 } 3632 header = (struct pp2_header*)sldns_buffer_begin(c->buffer); 3633 want_read_size = ntohs(header->len); 3634 if(sldns_buffer_limit(c->buffer) < 3635 PP2_HEADER_SIZE + want_read_size) { 3636 log_err_addr("proxy_protocol: not enough " 3637 "buffer size to read PROXYv2 header", "", 3638 &c->repinfo.remote_addr, 3639 c->repinfo.remote_addrlen); 3640 return 0; 3641 } 3642 verbose(VERB_ALGO, "proxy_protocol: reading variable " 3643 "part of PROXYv2 header (len %lu)", 3644 (unsigned long)want_read_size); 3645 current_read_size = PP2_HEADER_SIZE + want_read_size; 3646 if(want_read_size == 0) { 3647 /* nothing more to read; header is complete */ 3648 c->pp2_header_state = pp2_header_done; 3649 } else if(c->tcp_byte_count < current_read_size) { 3650 ERR_clear_error(); 3651 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at( 3652 c->buffer, c->tcp_byte_count), 3653 current_read_size - 3654 c->tcp_byte_count)) <= 0) { 3655 int want = SSL_get_error(c->ssl, r); 3656 if(want == SSL_ERROR_ZERO_RETURN) { 3657 if(c->tcp_req_info) 3658 return tcp_req_info_handle_read_close(c->tcp_req_info); 3659 return 0; /* shutdown, closed */ 3660 } else if(want == SSL_ERROR_WANT_READ) { 3661 #ifdef USE_WINSOCK 3662 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 3663 #endif 3664 return 1; /* read more later */ 3665 } else if(want == SSL_ERROR_WANT_WRITE) { 3666 c->ssl_shake_state = comm_ssl_shake_hs_write; 3667 comm_point_listen_for_rw(c, 0, 1); 3668 return 1; 3669 } else if(want == SSL_ERROR_SYSCALL) { 3670 #ifdef ECONNRESET 3671 if(errno == ECONNRESET && verbosity < 2) 3672 return 0; /* silence reset by peer */ 3673 #endif 3674 if(errno != 0) 3675 log_err("SSL_read syscall: %s", 3676 strerror(errno)); 3677 return 0; 3678 } 3679 log_crypto_err_io("could not SSL_read", 3680 want); 3681 return 0; 3682 } 3683 c->tcp_byte_count += r; 3684 sldns_buffer_skip(c->buffer, r); 3685 if(c->tcp_byte_count != current_read_size) return 1; 3686 c->pp2_header_state = pp2_header_done; 3687 } 3688 } 3689 if(c->pp2_header_state != pp2_header_done || !header) { 3690 log_err_addr("proxy_protocol: wrong state for the " 3691 "PROXYv2 header", "", &c->repinfo.remote_addr, 3692 c->repinfo.remote_addrlen); 3693 return 0; 3694 } 3695 sldns_buffer_flip(c->buffer); 3696 if(!consume_pp2_header(c->buffer, &c->repinfo, 1)) { 3697 log_err_addr("proxy_protocol: could not consume " 3698 "PROXYv2 header", "", &c->repinfo.remote_addr, 3699 c->repinfo.remote_addrlen); 3700 return 0; 3701 } 3702 verbose(VERB_ALGO, "proxy_protocol: successful read of " 3703 "PROXYv2 header"); 3704 /* Clear and reset the buffer to read the following 3705 * DNS packet(s). */ 3706 sldns_buffer_clear(c->buffer); 3707 c->tcp_byte_count = 0; 3708 return 1; 3709 } 3710 if(c->tcp_byte_count < sizeof(uint16_t)) { 3711 /* read length bytes */ 3712 ERR_clear_error(); 3713 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer, 3714 c->tcp_byte_count), (int)(sizeof(uint16_t) - 3715 c->tcp_byte_count))) <= 0) { 3716 int want = SSL_get_error(c->ssl, r); 3717 if(want == SSL_ERROR_ZERO_RETURN) { 3718 if(c->tcp_req_info) 3719 return tcp_req_info_handle_read_close(c->tcp_req_info); 3720 return 0; /* shutdown, closed */ 3721 } else if(want == SSL_ERROR_WANT_READ) { 3722 #ifdef USE_WINSOCK 3723 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 3724 #endif 3725 return 1; /* read more later */ 3726 } else if(want == SSL_ERROR_WANT_WRITE) { 3727 c->ssl_shake_state = comm_ssl_shake_hs_write; 3728 comm_point_listen_for_rw(c, 0, 1); 3729 return 1; 3730 } else if(want == SSL_ERROR_SYSCALL) { 3731 #ifdef ECONNRESET 3732 if(errno == ECONNRESET && verbosity < 2) 3733 return 0; /* silence reset by peer */ 3734 #endif 3735 if(errno != 0) 3736 log_err("SSL_read syscall: %s", 3737 strerror(errno)); 3738 return 0; 3739 } 3740 log_crypto_err_io("could not SSL_read", want); 3741 return 0; 3742 } 3743 c->tcp_byte_count += r; 3744 if(c->tcp_byte_count < sizeof(uint16_t)) 3745 return 1; 3746 if(sldns_buffer_read_u16_at(c->buffer, 0) > 3747 sldns_buffer_capacity(c->buffer)) { 3748 verbose(VERB_QUERY, "ssl: dropped larger than buffer"); 3749 return 0; 3750 } 3751 sldns_buffer_set_limit(c->buffer, 3752 sldns_buffer_read_u16_at(c->buffer, 0)); 3753 if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) { 3754 verbose(VERB_QUERY, "ssl: dropped bogus too short."); 3755 return 0; 3756 } 3757 sldns_buffer_skip(c->buffer, (ssize_t)(c->tcp_byte_count-sizeof(uint16_t))); 3758 verbose(VERB_ALGO, "Reading ssl tcp query of length %d", 3759 (int)sldns_buffer_limit(c->buffer)); 3760 } 3761 if(sldns_buffer_remaining(c->buffer) > 0) { 3762 ERR_clear_error(); 3763 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer), 3764 (int)sldns_buffer_remaining(c->buffer)); 3765 if(r <= 0) { 3766 int want = SSL_get_error(c->ssl, r); 3767 if(want == SSL_ERROR_ZERO_RETURN) { 3768 if(c->tcp_req_info) 3769 return tcp_req_info_handle_read_close(c->tcp_req_info); 3770 return 0; /* shutdown, closed */ 3771 } else if(want == SSL_ERROR_WANT_READ) { 3772 #ifdef USE_WINSOCK 3773 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 3774 #endif 3775 return 1; /* read more later */ 3776 } else if(want == SSL_ERROR_WANT_WRITE) { 3777 c->ssl_shake_state = comm_ssl_shake_hs_write; 3778 comm_point_listen_for_rw(c, 0, 1); 3779 return 1; 3780 } else if(want == SSL_ERROR_SYSCALL) { 3781 #ifdef ECONNRESET 3782 if(errno == ECONNRESET && verbosity < 2) 3783 return 0; /* silence reset by peer */ 3784 #endif 3785 if(errno != 0) 3786 log_err("SSL_read syscall: %s", 3787 strerror(errno)); 3788 return 0; 3789 } 3790 log_crypto_err_io("could not SSL_read", want); 3791 return 0; 3792 } 3793 sldns_buffer_skip(c->buffer, (ssize_t)r); 3794 } 3795 if(sldns_buffer_remaining(c->buffer) <= 0) { 3796 tcp_callback_reader(c); 3797 } 3798 return 1; 3799 #else 3800 (void)c; 3801 return 0; 3802 #endif /* HAVE_SSL */ 3803 } 3804 3805 /** ssl write callback on TCP */ 3806 static int 3807 ssl_handle_write(struct comm_point* c) 3808 { 3809 #ifdef HAVE_SSL 3810 int r; 3811 if(c->ssl_shake_state != comm_ssl_shake_none) { 3812 if(!ssl_handshake(c)) 3813 return 0; 3814 if(c->ssl_shake_state != comm_ssl_shake_none) 3815 return 1; 3816 } 3817 /* ignore return, if fails we may simply block */ 3818 (void)SSL_set_mode(c->ssl, (long)SSL_MODE_ENABLE_PARTIAL_WRITE); 3819 if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) { 3820 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(c->buffer)); 3821 ERR_clear_error(); 3822 if(c->tcp_write_and_read) { 3823 if(c->tcp_write_pkt_len + 2 < LDNS_RR_BUF_SIZE) { 3824 /* combine the tcp length and the query for 3825 * write, this emulates writev */ 3826 uint8_t buf[LDNS_RR_BUF_SIZE]; 3827 memmove(buf, &len, sizeof(uint16_t)); 3828 memmove(buf+sizeof(uint16_t), 3829 c->tcp_write_pkt, 3830 c->tcp_write_pkt_len); 3831 r = SSL_write(c->ssl, 3832 (void*)(buf+c->tcp_write_byte_count), 3833 c->tcp_write_pkt_len + 2 - 3834 c->tcp_write_byte_count); 3835 } else { 3836 r = SSL_write(c->ssl, 3837 (void*)(((uint8_t*)&len)+c->tcp_write_byte_count), 3838 (int)(sizeof(uint16_t)-c->tcp_write_byte_count)); 3839 } 3840 } else if(sizeof(uint16_t)+sldns_buffer_remaining(c->buffer) < 3841 LDNS_RR_BUF_SIZE) { 3842 /* combine the tcp length and the query for write, 3843 * this emulates writev */ 3844 uint8_t buf[LDNS_RR_BUF_SIZE]; 3845 memmove(buf, &len, sizeof(uint16_t)); 3846 memmove(buf+sizeof(uint16_t), 3847 sldns_buffer_current(c->buffer), 3848 sldns_buffer_remaining(c->buffer)); 3849 r = SSL_write(c->ssl, (void*)(buf+c->tcp_byte_count), 3850 (int)(sizeof(uint16_t)+ 3851 sldns_buffer_remaining(c->buffer) 3852 - c->tcp_byte_count)); 3853 } else { 3854 r = SSL_write(c->ssl, 3855 (void*)(((uint8_t*)&len)+c->tcp_byte_count), 3856 (int)(sizeof(uint16_t)-c->tcp_byte_count)); 3857 } 3858 if(r <= 0) { 3859 int want = SSL_get_error(c->ssl, r); 3860 if(want == SSL_ERROR_ZERO_RETURN) { 3861 return 0; /* closed */ 3862 } else if(want == SSL_ERROR_WANT_READ) { 3863 c->ssl_shake_state = comm_ssl_shake_hs_read; 3864 comm_point_listen_for_rw(c, 1, 0); 3865 return 1; /* wait for read condition */ 3866 } else if(want == SSL_ERROR_WANT_WRITE) { 3867 #ifdef USE_WINSOCK 3868 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 3869 #endif 3870 return 1; /* write more later */ 3871 } else if(want == SSL_ERROR_SYSCALL) { 3872 #ifdef EPIPE 3873 if(errno == EPIPE && verbosity < 2) 3874 return 0; /* silence 'broken pipe' */ 3875 #endif 3876 if(errno != 0) 3877 log_err("SSL_write syscall: %s", 3878 strerror(errno)); 3879 return 0; 3880 } 3881 log_crypto_err_io("could not SSL_write", want); 3882 return 0; 3883 } 3884 if(c->tcp_write_and_read) { 3885 c->tcp_write_byte_count += r; 3886 if(c->tcp_write_byte_count < sizeof(uint16_t)) 3887 return 1; 3888 } else { 3889 c->tcp_byte_count += r; 3890 if(c->tcp_byte_count < sizeof(uint16_t)) 3891 return 1; 3892 sldns_buffer_set_position(c->buffer, c->tcp_byte_count - 3893 sizeof(uint16_t)); 3894 } 3895 if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 3896 tcp_callback_writer(c); 3897 return 1; 3898 } 3899 } 3900 log_assert(c->tcp_write_and_read || sldns_buffer_remaining(c->buffer) > 0); 3901 log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2); 3902 ERR_clear_error(); 3903 if(c->tcp_write_and_read) { 3904 r = SSL_write(c->ssl, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2), 3905 (int)(c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count)); 3906 } else { 3907 r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer), 3908 (int)sldns_buffer_remaining(c->buffer)); 3909 } 3910 if(r <= 0) { 3911 int want = SSL_get_error(c->ssl, r); 3912 if(want == SSL_ERROR_ZERO_RETURN) { 3913 return 0; /* closed */ 3914 } else if(want == SSL_ERROR_WANT_READ) { 3915 c->ssl_shake_state = comm_ssl_shake_hs_read; 3916 comm_point_listen_for_rw(c, 1, 0); 3917 return 1; /* wait for read condition */ 3918 } else if(want == SSL_ERROR_WANT_WRITE) { 3919 #ifdef USE_WINSOCK 3920 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 3921 #endif 3922 return 1; /* write more later */ 3923 } else if(want == SSL_ERROR_SYSCALL) { 3924 #ifdef EPIPE 3925 if(errno == EPIPE && verbosity < 2) 3926 return 0; /* silence 'broken pipe' */ 3927 #endif 3928 if(errno != 0) 3929 log_err("SSL_write syscall: %s", 3930 strerror(errno)); 3931 return 0; 3932 } 3933 log_crypto_err_io("could not SSL_write", want); 3934 return 0; 3935 } 3936 if(c->tcp_write_and_read) { 3937 c->tcp_write_byte_count += r; 3938 } else { 3939 sldns_buffer_skip(c->buffer, (ssize_t)r); 3940 } 3941 3942 if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 3943 tcp_callback_writer(c); 3944 } 3945 return 1; 3946 #else 3947 (void)c; 3948 return 0; 3949 #endif /* HAVE_SSL */ 3950 } 3951 3952 /** handle ssl tcp connection with dns contents */ 3953 static int 3954 ssl_handle_it(struct comm_point* c, int is_write) 3955 { 3956 /* handle case where renegotiation wants read during write call 3957 * or write during read calls */ 3958 if(is_write && c->ssl_shake_state == comm_ssl_shake_hs_write) 3959 return ssl_handle_read(c); 3960 else if(!is_write && c->ssl_shake_state == comm_ssl_shake_hs_read) 3961 return ssl_handle_write(c); 3962 /* handle read events for read operation and write events for a 3963 * write operation */ 3964 else if(!is_write) 3965 return ssl_handle_read(c); 3966 return ssl_handle_write(c); 3967 } 3968 3969 /** 3970 * Handle tcp reading callback. 3971 * @param fd: file descriptor of socket. 3972 * @param c: comm point to read from into buffer. 3973 * @param short_ok: if true, very short packets are OK (for comm_local). 3974 * @return: 0 on error 3975 */ 3976 static int 3977 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok) 3978 { 3979 ssize_t r; 3980 int recv_initial = 0; 3981 log_assert(c->type == comm_tcp || c->type == comm_local); 3982 if(c->ssl) 3983 return ssl_handle_it(c, 0); 3984 if(!c->tcp_is_reading && !c->tcp_write_and_read) 3985 return 0; 3986 3987 log_assert(fd != -1); 3988 if(c->pp2_enabled && c->pp2_header_state != pp2_header_done) { 3989 struct pp2_header* header = NULL; 3990 size_t want_read_size = 0; 3991 size_t current_read_size = 0; 3992 if(c->pp2_header_state == pp2_header_none) { 3993 want_read_size = PP2_HEADER_SIZE; 3994 if(sldns_buffer_remaining(c->buffer)<want_read_size) { 3995 log_err_addr("proxy_protocol: not enough " 3996 "buffer size to read PROXYv2 header", "", 3997 &c->repinfo.remote_addr, 3998 c->repinfo.remote_addrlen); 3999 return 0; 4000 } 4001 verbose(VERB_ALGO, "proxy_protocol: reading fixed " 4002 "part of PROXYv2 header (len %lu)", 4003 (unsigned long)want_read_size); 4004 current_read_size = want_read_size; 4005 if(c->tcp_byte_count < current_read_size) { 4006 r = recv(fd, (void*)sldns_buffer_at(c->buffer, 4007 c->tcp_byte_count), 4008 current_read_size-c->tcp_byte_count, MSG_DONTWAIT); 4009 if(r == 0) { 4010 if(c->tcp_req_info) 4011 return tcp_req_info_handle_read_close(c->tcp_req_info); 4012 return 0; 4013 } else if(r == -1) { 4014 goto recv_error_initial; 4015 } 4016 c->tcp_byte_count += r; 4017 sldns_buffer_skip(c->buffer, r); 4018 if(c->tcp_byte_count != current_read_size) return 1; 4019 c->pp2_header_state = pp2_header_init; 4020 } 4021 } 4022 if(c->pp2_header_state == pp2_header_init) { 4023 int err; 4024 err = pp2_read_header( 4025 sldns_buffer_begin(c->buffer), 4026 sldns_buffer_limit(c->buffer)); 4027 if(err) { 4028 log_err("proxy_protocol: could not parse " 4029 "PROXYv2 header (%s)", 4030 pp_lookup_error(err)); 4031 return 0; 4032 } 4033 header = (struct pp2_header*)sldns_buffer_begin(c->buffer); 4034 want_read_size = ntohs(header->len); 4035 if(sldns_buffer_limit(c->buffer) < 4036 PP2_HEADER_SIZE + want_read_size) { 4037 log_err_addr("proxy_protocol: not enough " 4038 "buffer size to read PROXYv2 header", "", 4039 &c->repinfo.remote_addr, 4040 c->repinfo.remote_addrlen); 4041 return 0; 4042 } 4043 verbose(VERB_ALGO, "proxy_protocol: reading variable " 4044 "part of PROXYv2 header (len %lu)", 4045 (unsigned long)want_read_size); 4046 current_read_size = PP2_HEADER_SIZE + want_read_size; 4047 if(want_read_size == 0) { 4048 /* nothing more to read; header is complete */ 4049 c->pp2_header_state = pp2_header_done; 4050 } else if(c->tcp_byte_count < current_read_size) { 4051 r = recv(fd, (void*)sldns_buffer_at(c->buffer, 4052 c->tcp_byte_count), 4053 current_read_size-c->tcp_byte_count, MSG_DONTWAIT); 4054 if(r == 0) { 4055 if(c->tcp_req_info) 4056 return tcp_req_info_handle_read_close(c->tcp_req_info); 4057 return 0; 4058 } else if(r == -1) { 4059 goto recv_error; 4060 } 4061 c->tcp_byte_count += r; 4062 sldns_buffer_skip(c->buffer, r); 4063 if(c->tcp_byte_count != current_read_size) return 1; 4064 c->pp2_header_state = pp2_header_done; 4065 } 4066 } 4067 if(c->pp2_header_state != pp2_header_done || !header) { 4068 log_err_addr("proxy_protocol: wrong state for the " 4069 "PROXYv2 header", "", &c->repinfo.remote_addr, 4070 c->repinfo.remote_addrlen); 4071 return 0; 4072 } 4073 sldns_buffer_flip(c->buffer); 4074 if(!consume_pp2_header(c->buffer, &c->repinfo, 1)) { 4075 log_err_addr("proxy_protocol: could not consume " 4076 "PROXYv2 header", "", &c->repinfo.remote_addr, 4077 c->repinfo.remote_addrlen); 4078 return 0; 4079 } 4080 verbose(VERB_ALGO, "proxy_protocol: successful read of " 4081 "PROXYv2 header"); 4082 /* Clear and reset the buffer to read the following 4083 * DNS packet(s). */ 4084 sldns_buffer_clear(c->buffer); 4085 c->tcp_byte_count = 0; 4086 return 1; 4087 } 4088 4089 if(c->tcp_byte_count < sizeof(uint16_t)) { 4090 /* read length bytes */ 4091 r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count), 4092 sizeof(uint16_t)-c->tcp_byte_count, MSG_DONTWAIT); 4093 if(r == 0) { 4094 if(c->tcp_req_info) 4095 return tcp_req_info_handle_read_close(c->tcp_req_info); 4096 return 0; 4097 } else if(r == -1) { 4098 if(c->pp2_enabled) goto recv_error; 4099 goto recv_error_initial; 4100 } 4101 c->tcp_byte_count += r; 4102 if(c->tcp_byte_count != sizeof(uint16_t)) 4103 return 1; 4104 if(sldns_buffer_read_u16_at(c->buffer, 0) > 4105 sldns_buffer_capacity(c->buffer)) { 4106 verbose(VERB_QUERY, "tcp: dropped larger than buffer"); 4107 return 0; 4108 } 4109 sldns_buffer_set_limit(c->buffer, 4110 sldns_buffer_read_u16_at(c->buffer, 0)); 4111 if(!short_ok && 4112 sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) { 4113 verbose(VERB_QUERY, "tcp: dropped bogus too short."); 4114 return 0; 4115 } 4116 verbose(VERB_ALGO, "Reading tcp query of length %d", 4117 (int)sldns_buffer_limit(c->buffer)); 4118 } 4119 4120 if(sldns_buffer_remaining(c->buffer) == 0) 4121 log_err("in comm_point_tcp_handle_read buffer_remaining is " 4122 "not > 0 as expected, continuing with (harmless) 0 " 4123 "length recv"); 4124 r = recv(fd, (void*)sldns_buffer_current(c->buffer), 4125 sldns_buffer_remaining(c->buffer), MSG_DONTWAIT); 4126 if(r == 0) { 4127 if(c->tcp_req_info) 4128 return tcp_req_info_handle_read_close(c->tcp_req_info); 4129 return 0; 4130 } else if(r == -1) { 4131 goto recv_error; 4132 } 4133 sldns_buffer_skip(c->buffer, r); 4134 if(sldns_buffer_remaining(c->buffer) <= 0) { 4135 tcp_callback_reader(c); 4136 } 4137 return 1; 4138 4139 recv_error_initial: 4140 recv_initial = 1; 4141 recv_error: 4142 #ifndef USE_WINSOCK 4143 if(errno == EINTR || errno == EAGAIN) 4144 return 1; 4145 #ifdef ECONNRESET 4146 if(errno == ECONNRESET && verbosity < 2) 4147 return 0; /* silence reset by peer */ 4148 #endif 4149 if(recv_initial) { 4150 #ifdef ECONNREFUSED 4151 if(errno == ECONNREFUSED && verbosity < 2) 4152 return 0; /* silence reset by peer */ 4153 #endif 4154 #ifdef ENETUNREACH 4155 if(errno == ENETUNREACH && verbosity < 2) 4156 return 0; /* silence it */ 4157 #endif 4158 #ifdef EHOSTDOWN 4159 if(errno == EHOSTDOWN && verbosity < 2) 4160 return 0; /* silence it */ 4161 #endif 4162 #ifdef EHOSTUNREACH 4163 if(errno == EHOSTUNREACH && verbosity < 2) 4164 return 0; /* silence it */ 4165 #endif 4166 #ifdef ENETDOWN 4167 if(errno == ENETDOWN && verbosity < 2) 4168 return 0; /* silence it */ 4169 #endif 4170 #ifdef EACCES 4171 if(errno == EACCES && verbosity < 2) 4172 return 0; /* silence it */ 4173 #endif 4174 #ifdef ENOTCONN 4175 if(errno == ENOTCONN) { 4176 log_err_addr("read (in tcp initial) failed and this " 4177 "could be because TCP Fast Open is " 4178 "enabled [--disable-tfo-client " 4179 "--disable-tfo-server] but does not " 4180 "work", sock_strerror(errno), 4181 &c->repinfo.remote_addr, 4182 c->repinfo.remote_addrlen); 4183 return 0; 4184 } 4185 #endif 4186 } 4187 #else /* USE_WINSOCK */ 4188 if(recv_initial) { 4189 if(WSAGetLastError() == WSAECONNREFUSED && verbosity < 2) 4190 return 0; 4191 if(WSAGetLastError() == WSAEHOSTDOWN && verbosity < 2) 4192 return 0; 4193 if(WSAGetLastError() == WSAEHOSTUNREACH && verbosity < 2) 4194 return 0; 4195 if(WSAGetLastError() == WSAENETDOWN && verbosity < 2) 4196 return 0; 4197 if(WSAGetLastError() == WSAENETUNREACH && verbosity < 2) 4198 return 0; 4199 } 4200 if(WSAGetLastError() == WSAECONNRESET) 4201 return 0; 4202 if(WSAGetLastError() == WSAEINPROGRESS) 4203 return 1; 4204 if(WSAGetLastError() == WSAEWOULDBLOCK) { 4205 ub_winsock_tcp_wouldblock(c->ev->ev, 4206 UB_EV_READ); 4207 return 1; 4208 } 4209 #endif 4210 log_err_addr((recv_initial?"read (in tcp initial)":"read (in tcp)"), 4211 sock_strerror(errno), &c->repinfo.remote_addr, 4212 c->repinfo.remote_addrlen); 4213 return 0; 4214 } 4215 4216 /** 4217 * Handle tcp writing callback. 4218 * @param fd: file descriptor of socket. 4219 * @param c: comm point to write buffer out of. 4220 * @return: 0 on error 4221 */ 4222 static int 4223 comm_point_tcp_handle_write(int fd, struct comm_point* c) 4224 { 4225 ssize_t r; 4226 struct sldns_buffer *buffer; 4227 log_assert(c->type == comm_tcp); 4228 #ifdef USE_DNSCRYPT 4229 buffer = c->dnscrypt_buffer; 4230 #else 4231 buffer = c->buffer; 4232 #endif 4233 if(c->tcp_is_reading && !c->ssl && !c->tcp_write_and_read) 4234 return 0; 4235 log_assert(fd != -1); 4236 if(((!c->tcp_write_and_read && c->tcp_byte_count == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == 0)) && c->tcp_check_nb_connect) { 4237 /* check for pending error from nonblocking connect */ 4238 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/ 4239 int error = 0; 4240 socklen_t len = (socklen_t)sizeof(error); 4241 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 4242 &len) < 0){ 4243 #ifndef USE_WINSOCK 4244 error = errno; /* on solaris errno is error */ 4245 #else /* USE_WINSOCK */ 4246 error = WSAGetLastError(); 4247 #endif 4248 } 4249 #ifndef USE_WINSOCK 4250 #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 4251 if(error == EINPROGRESS || error == EWOULDBLOCK) 4252 return 1; /* try again later */ 4253 else 4254 #endif 4255 if(error != 0 && verbosity < 2) 4256 return 0; /* silence lots of chatter in the logs */ 4257 else if(error != 0) { 4258 log_err_addr("tcp connect", strerror(error), 4259 &c->repinfo.remote_addr, 4260 c->repinfo.remote_addrlen); 4261 #else /* USE_WINSOCK */ 4262 /* examine error */ 4263 if(error == WSAEINPROGRESS) 4264 return 1; 4265 else if(error == WSAEWOULDBLOCK) { 4266 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 4267 return 1; 4268 } else if(error != 0 && verbosity < 2) 4269 return 0; 4270 else if(error != 0) { 4271 log_err_addr("tcp connect", wsa_strerror(error), 4272 &c->repinfo.remote_addr, 4273 c->repinfo.remote_addrlen); 4274 #endif /* USE_WINSOCK */ 4275 return 0; 4276 } 4277 } 4278 if(c->ssl) 4279 return ssl_handle_it(c, 1); 4280 4281 #ifdef USE_MSG_FASTOPEN 4282 /* Only try this on first use of a connection that uses tfo, 4283 otherwise fall through to normal write */ 4284 /* Also, TFO support on WINDOWS not implemented at the moment */ 4285 if(c->tcp_do_fastopen == 1) { 4286 /* this form of sendmsg() does both a connect() and send() so need to 4287 look for various flavours of error*/ 4288 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer)); 4289 struct msghdr msg; 4290 struct iovec iov[2]; 4291 c->tcp_do_fastopen = 0; 4292 memset(&msg, 0, sizeof(msg)); 4293 if(c->tcp_write_and_read) { 4294 iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count; 4295 iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count; 4296 iov[1].iov_base = c->tcp_write_pkt; 4297 iov[1].iov_len = c->tcp_write_pkt_len; 4298 } else { 4299 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count; 4300 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count; 4301 iov[1].iov_base = sldns_buffer_begin(buffer); 4302 iov[1].iov_len = sldns_buffer_limit(buffer); 4303 } 4304 log_assert(iov[0].iov_len > 0); 4305 msg.msg_name = &c->repinfo.remote_addr; 4306 msg.msg_namelen = c->repinfo.remote_addrlen; 4307 msg.msg_iov = iov; 4308 msg.msg_iovlen = 2; 4309 r = sendmsg(fd, &msg, MSG_FASTOPEN); 4310 if (r == -1) { 4311 #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 4312 /* Handshake is underway, maybe because no TFO cookie available. 4313 Come back to write the message*/ 4314 if(errno == EINPROGRESS || errno == EWOULDBLOCK) 4315 return 1; 4316 #endif 4317 if(errno == EINTR || errno == EAGAIN) 4318 return 1; 4319 /* Not handling EISCONN here as shouldn't ever hit that case.*/ 4320 if(errno != EPIPE 4321 #ifdef EOPNOTSUPP 4322 /* if /proc/sys/net/ipv4/tcp_fastopen is 4323 * disabled on Linux, sendmsg may return 4324 * 'Operation not supported', if so 4325 * fallthrough to ordinary connect. */ 4326 && errno != EOPNOTSUPP 4327 #endif 4328 && errno != 0) { 4329 if(verbosity < 2) 4330 return 0; /* silence lots of chatter in the logs */ 4331 log_err_addr("tcp sendmsg", strerror(errno), 4332 &c->repinfo.remote_addr, 4333 c->repinfo.remote_addrlen); 4334 return 0; 4335 } 4336 verbose(VERB_ALGO, "tcp sendmsg for fastopen failed (with %s), try normal connect", strerror(errno)); 4337 /* fallthrough to nonFASTOPEN 4338 * (MSG_FASTOPEN on Linux 3 produces EPIPE) 4339 * we need to perform connect() */ 4340 if(connect(fd, (struct sockaddr *)&c->repinfo.remote_addr, 4341 c->repinfo.remote_addrlen) == -1) { 4342 #ifdef EINPROGRESS 4343 if(errno == EINPROGRESS) 4344 return 1; /* wait until connect done*/ 4345 #endif 4346 #ifdef USE_WINSOCK 4347 if(WSAGetLastError() == WSAEINPROGRESS || 4348 WSAGetLastError() == WSAEWOULDBLOCK) 4349 return 1; /* wait until connect done*/ 4350 #endif 4351 if(tcp_connect_errno_needs_log( 4352 (struct sockaddr *)&c->repinfo.remote_addr, 4353 c->repinfo.remote_addrlen)) { 4354 log_err_addr("outgoing tcp: connect after EPIPE for fastopen", 4355 strerror(errno), 4356 &c->repinfo.remote_addr, 4357 c->repinfo.remote_addrlen); 4358 } 4359 return 0; 4360 } 4361 4362 } else { 4363 if(c->tcp_write_and_read) { 4364 c->tcp_write_byte_count += r; 4365 if(c->tcp_write_byte_count < sizeof(uint16_t)) 4366 return 1; 4367 } else { 4368 c->tcp_byte_count += r; 4369 if(c->tcp_byte_count < sizeof(uint16_t)) 4370 return 1; 4371 sldns_buffer_set_position(buffer, c->tcp_byte_count - 4372 sizeof(uint16_t)); 4373 } 4374 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 4375 tcp_callback_writer(c); 4376 return 1; 4377 } 4378 } 4379 } 4380 #endif /* USE_MSG_FASTOPEN */ 4381 4382 if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) { 4383 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer)); 4384 #ifdef HAVE_WRITEV 4385 struct iovec iov[2]; 4386 if(c->tcp_write_and_read) { 4387 iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count; 4388 iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count; 4389 iov[1].iov_base = c->tcp_write_pkt; 4390 iov[1].iov_len = c->tcp_write_pkt_len; 4391 } else { 4392 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count; 4393 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count; 4394 iov[1].iov_base = sldns_buffer_begin(buffer); 4395 iov[1].iov_len = sldns_buffer_limit(buffer); 4396 } 4397 log_assert(iov[0].iov_len > 0); 4398 r = writev(fd, iov, 2); 4399 #else /* HAVE_WRITEV */ 4400 if(c->tcp_write_and_read) { 4401 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_write_byte_count), 4402 sizeof(uint16_t)-c->tcp_write_byte_count, 0); 4403 } else { 4404 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count), 4405 sizeof(uint16_t)-c->tcp_byte_count, 0); 4406 } 4407 #endif /* HAVE_WRITEV */ 4408 if(r == -1) { 4409 #ifndef USE_WINSOCK 4410 # ifdef EPIPE 4411 if(errno == EPIPE && verbosity < 2) 4412 return 0; /* silence 'broken pipe' */ 4413 #endif 4414 if(errno == EINTR || errno == EAGAIN) 4415 return 1; 4416 #ifdef ECONNRESET 4417 if(errno == ECONNRESET && verbosity < 2) 4418 return 0; /* silence reset by peer */ 4419 #endif 4420 # ifdef HAVE_WRITEV 4421 log_err_addr("tcp writev", strerror(errno), 4422 &c->repinfo.remote_addr, 4423 c->repinfo.remote_addrlen); 4424 # else /* HAVE_WRITEV */ 4425 log_err_addr("tcp send s", strerror(errno), 4426 &c->repinfo.remote_addr, 4427 c->repinfo.remote_addrlen); 4428 # endif /* HAVE_WRITEV */ 4429 #else 4430 if(WSAGetLastError() == WSAENOTCONN) 4431 return 1; 4432 if(WSAGetLastError() == WSAEINPROGRESS) 4433 return 1; 4434 if(WSAGetLastError() == WSAEWOULDBLOCK) { 4435 ub_winsock_tcp_wouldblock(c->ev->ev, 4436 UB_EV_WRITE); 4437 return 1; 4438 } 4439 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 4440 return 0; /* silence reset by peer */ 4441 log_err_addr("tcp send s", 4442 wsa_strerror(WSAGetLastError()), 4443 &c->repinfo.remote_addr, 4444 c->repinfo.remote_addrlen); 4445 #endif 4446 return 0; 4447 } 4448 if(c->tcp_write_and_read) { 4449 c->tcp_write_byte_count += r; 4450 if(c->tcp_write_byte_count < sizeof(uint16_t)) 4451 return 1; 4452 } else { 4453 c->tcp_byte_count += r; 4454 if(c->tcp_byte_count < sizeof(uint16_t)) 4455 return 1; 4456 sldns_buffer_set_position(buffer, c->tcp_byte_count - 4457 sizeof(uint16_t)); 4458 } 4459 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 4460 tcp_callback_writer(c); 4461 return 1; 4462 } 4463 } 4464 log_assert(c->tcp_write_and_read || sldns_buffer_remaining(buffer) > 0); 4465 log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2); 4466 if(c->tcp_write_and_read) { 4467 r = send(fd, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2), 4468 c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count, 0); 4469 } else { 4470 r = send(fd, (void*)sldns_buffer_current(buffer), 4471 sldns_buffer_remaining(buffer), 0); 4472 } 4473 if(r == -1) { 4474 #ifndef USE_WINSOCK 4475 if(errno == EINTR || errno == EAGAIN) 4476 return 1; 4477 #ifdef ECONNRESET 4478 if(errno == ECONNRESET && verbosity < 2) 4479 return 0; /* silence reset by peer */ 4480 #endif 4481 #else 4482 if(WSAGetLastError() == WSAEINPROGRESS) 4483 return 1; 4484 if(WSAGetLastError() == WSAEWOULDBLOCK) { 4485 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 4486 return 1; 4487 } 4488 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 4489 return 0; /* silence reset by peer */ 4490 #endif 4491 log_err_addr("tcp send r", sock_strerror(errno), 4492 &c->repinfo.remote_addr, 4493 c->repinfo.remote_addrlen); 4494 return 0; 4495 } 4496 if(c->tcp_write_and_read) { 4497 c->tcp_write_byte_count += r; 4498 } else { 4499 sldns_buffer_skip(buffer, r); 4500 } 4501 4502 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 4503 tcp_callback_writer(c); 4504 } 4505 4506 return 1; 4507 } 4508 4509 /** read again to drain buffers when there could be more to read, returns 0 4510 * on failure which means the comm point is closed. */ 4511 static int 4512 tcp_req_info_read_again(int fd, struct comm_point* c) 4513 { 4514 while(c->tcp_req_info->read_again) { 4515 int r; 4516 c->tcp_req_info->read_again = 0; 4517 if(c->tcp_is_reading) 4518 r = comm_point_tcp_handle_read(fd, c, 0); 4519 else r = comm_point_tcp_handle_write(fd, c); 4520 if(!r) { 4521 reclaim_tcp_handler(c); 4522 if(!c->tcp_do_close) { 4523 fptr_ok(fptr_whitelist_comm_point( 4524 c->callback)); 4525 (void)(*c->callback)(c, c->cb_arg, 4526 NETEVENT_CLOSED, NULL); 4527 } 4528 return 0; 4529 } 4530 } 4531 return 1; 4532 } 4533 4534 /** read again to drain buffers when there could be more to read */ 4535 static void 4536 tcp_more_read_again(int fd, struct comm_point* c) 4537 { 4538 /* if the packet is done, but another one could be waiting on 4539 * the connection, the callback signals this, and we try again */ 4540 /* this continues until the read routines get EAGAIN or so, 4541 * and thus does not call the callback, and the bool is 0 */ 4542 int* moreread = c->tcp_more_read_again; 4543 while(moreread && *moreread) { 4544 *moreread = 0; 4545 if(!comm_point_tcp_handle_read(fd, c, 0)) { 4546 reclaim_tcp_handler(c); 4547 if(!c->tcp_do_close) { 4548 fptr_ok(fptr_whitelist_comm_point( 4549 c->callback)); 4550 (void)(*c->callback)(c, c->cb_arg, 4551 NETEVENT_CLOSED, NULL); 4552 } 4553 return; 4554 } 4555 } 4556 } 4557 4558 /** write again to fill up when there could be more to write */ 4559 static void 4560 tcp_more_write_again(int fd, struct comm_point* c) 4561 { 4562 /* if the packet is done, but another is waiting to be written, 4563 * the callback signals it and we try again. */ 4564 /* this continues until the write routines get EAGAIN or so, 4565 * and thus does not call the callback, and the bool is 0 */ 4566 int* morewrite = c->tcp_more_write_again; 4567 while(morewrite && *morewrite) { 4568 *morewrite = 0; 4569 if(!comm_point_tcp_handle_write(fd, c)) { 4570 reclaim_tcp_handler(c); 4571 if(!c->tcp_do_close) { 4572 fptr_ok(fptr_whitelist_comm_point( 4573 c->callback)); 4574 (void)(*c->callback)(c, c->cb_arg, 4575 NETEVENT_CLOSED, NULL); 4576 } 4577 return; 4578 } 4579 } 4580 } 4581 4582 void 4583 comm_point_tcp_handle_callback(int fd, short event, void* arg) 4584 { 4585 struct comm_point* c = (struct comm_point*)arg; 4586 log_assert(c->type == comm_tcp); 4587 ub_comm_base_now(c->ev->base); 4588 4589 if(c->fd == -1 || c->fd != fd) 4590 return; /* duplicate event, but commpoint closed. */ 4591 4592 #ifdef USE_DNSCRYPT 4593 /* Initialize if this is a dnscrypt socket */ 4594 if(c->tcp_parent) { 4595 c->dnscrypt = c->tcp_parent->dnscrypt; 4596 } 4597 if(c->dnscrypt && c->dnscrypt_buffer == c->buffer) { 4598 c->dnscrypt_buffer = sldns_buffer_new(sldns_buffer_capacity(c->buffer)); 4599 if(!c->dnscrypt_buffer) { 4600 log_err("Could not allocate dnscrypt buffer"); 4601 reclaim_tcp_handler(c); 4602 if(!c->tcp_do_close) { 4603 fptr_ok(fptr_whitelist_comm_point( 4604 c->callback)); 4605 (void)(*c->callback)(c, c->cb_arg, 4606 NETEVENT_CLOSED, NULL); 4607 } 4608 return; 4609 } 4610 } 4611 #endif 4612 4613 if(event&UB_EV_TIMEOUT) { 4614 verbose(VERB_QUERY, "tcp took too long, dropped"); 4615 reclaim_tcp_handler(c); 4616 if(!c->tcp_do_close) { 4617 fptr_ok(fptr_whitelist_comm_point(c->callback)); 4618 (void)(*c->callback)(c, c->cb_arg, 4619 NETEVENT_TIMEOUT, NULL); 4620 } 4621 return; 4622 } 4623 if(event&UB_EV_READ 4624 #ifdef USE_MSG_FASTOPEN 4625 && !(c->tcp_do_fastopen && (event&UB_EV_WRITE)) 4626 #endif 4627 ) { 4628 int has_tcpq = (c->tcp_req_info != NULL); 4629 int* moreread = c->tcp_more_read_again; 4630 if(!comm_point_tcp_handle_read(fd, c, 0)) { 4631 reclaim_tcp_handler(c); 4632 if(!c->tcp_do_close) { 4633 fptr_ok(fptr_whitelist_comm_point( 4634 c->callback)); 4635 (void)(*c->callback)(c, c->cb_arg, 4636 NETEVENT_CLOSED, NULL); 4637 } 4638 return; 4639 } 4640 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) { 4641 if(!tcp_req_info_read_again(fd, c)) 4642 return; 4643 } 4644 if(moreread && *moreread) 4645 tcp_more_read_again(fd, c); 4646 return; 4647 } 4648 if(event&UB_EV_WRITE) { 4649 int has_tcpq = (c->tcp_req_info != NULL); 4650 int* morewrite = c->tcp_more_write_again; 4651 if(!comm_point_tcp_handle_write(fd, c)) { 4652 reclaim_tcp_handler(c); 4653 if(!c->tcp_do_close) { 4654 fptr_ok(fptr_whitelist_comm_point( 4655 c->callback)); 4656 (void)(*c->callback)(c, c->cb_arg, 4657 NETEVENT_CLOSED, NULL); 4658 } 4659 return; 4660 } 4661 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) { 4662 if(!tcp_req_info_read_again(fd, c)) 4663 return; 4664 } 4665 if(morewrite && *morewrite) 4666 tcp_more_write_again(fd, c); 4667 return; 4668 } 4669 log_err("Ignored event %d for tcphdl.", event); 4670 } 4671 4672 /** Make http handler free for next assignment */ 4673 static void 4674 reclaim_http_handler(struct comm_point* c) 4675 { 4676 log_assert(c->type == comm_http); 4677 if(c->ssl) { 4678 #ifdef HAVE_SSL 4679 SSL_shutdown(c->ssl); 4680 SSL_free(c->ssl); 4681 c->ssl = NULL; 4682 #endif 4683 } 4684 comm_point_close(c); 4685 if(c->tcp_parent) { 4686 if(c != c->tcp_parent->tcp_free) { 4687 c->tcp_parent->cur_tcp_count--; 4688 c->tcp_free = c->tcp_parent->tcp_free; 4689 c->tcp_parent->tcp_free = c; 4690 } 4691 if(!c->tcp_free) { 4692 /* re-enable listening on accept socket */ 4693 comm_point_start_listening(c->tcp_parent, -1, -1); 4694 } 4695 } 4696 } 4697 4698 /** read more data for http (with ssl) */ 4699 static int 4700 ssl_http_read_more(struct comm_point* c) 4701 { 4702 #ifdef HAVE_SSL 4703 int r; 4704 log_assert(sldns_buffer_remaining(c->buffer) > 0); 4705 ERR_clear_error(); 4706 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer), 4707 (int)sldns_buffer_remaining(c->buffer)); 4708 if(r <= 0) { 4709 int want = SSL_get_error(c->ssl, r); 4710 if(want == SSL_ERROR_ZERO_RETURN) { 4711 return 0; /* shutdown, closed */ 4712 } else if(want == SSL_ERROR_WANT_READ) { 4713 return 1; /* read more later */ 4714 } else if(want == SSL_ERROR_WANT_WRITE) { 4715 c->ssl_shake_state = comm_ssl_shake_hs_write; 4716 comm_point_listen_for_rw(c, 0, 1); 4717 return 1; 4718 } else if(want == SSL_ERROR_SYSCALL) { 4719 #ifdef ECONNRESET 4720 if(errno == ECONNRESET && verbosity < 2) 4721 return 0; /* silence reset by peer */ 4722 #endif 4723 if(errno != 0) 4724 log_err("SSL_read syscall: %s", 4725 strerror(errno)); 4726 return 0; 4727 } 4728 log_crypto_err_io("could not SSL_read", want); 4729 return 0; 4730 } 4731 verbose(VERB_ALGO, "ssl http read more skip to %d + %d", 4732 (int)sldns_buffer_position(c->buffer), (int)r); 4733 sldns_buffer_skip(c->buffer, (ssize_t)r); 4734 return 1; 4735 #else 4736 (void)c; 4737 return 0; 4738 #endif /* HAVE_SSL */ 4739 } 4740 4741 /** read more data for http */ 4742 static int 4743 http_read_more(int fd, struct comm_point* c) 4744 { 4745 ssize_t r; 4746 log_assert(sldns_buffer_remaining(c->buffer) > 0); 4747 r = recv(fd, (void*)sldns_buffer_current(c->buffer), 4748 sldns_buffer_remaining(c->buffer), MSG_DONTWAIT); 4749 if(r == 0) { 4750 return 0; 4751 } else if(r == -1) { 4752 #ifndef USE_WINSOCK 4753 if(errno == EINTR || errno == EAGAIN) 4754 return 1; 4755 #else /* USE_WINSOCK */ 4756 if(WSAGetLastError() == WSAECONNRESET) 4757 return 0; 4758 if(WSAGetLastError() == WSAEINPROGRESS) 4759 return 1; 4760 if(WSAGetLastError() == WSAEWOULDBLOCK) { 4761 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 4762 return 1; 4763 } 4764 #endif 4765 log_err_addr("read (in http r)", sock_strerror(errno), 4766 &c->repinfo.remote_addr, c->repinfo.remote_addrlen); 4767 return 0; 4768 } 4769 verbose(VERB_ALGO, "http read more skip to %d + %d", 4770 (int)sldns_buffer_position(c->buffer), (int)r); 4771 sldns_buffer_skip(c->buffer, r); 4772 return 1; 4773 } 4774 4775 /** return true if http header has been read (one line complete) */ 4776 static int 4777 http_header_done(sldns_buffer* buf) 4778 { 4779 size_t i; 4780 for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) { 4781 /* there was a \r before the \n, but we ignore that */ 4782 if((char)sldns_buffer_read_u8_at(buf, i) == '\n') 4783 return 1; 4784 } 4785 return 0; 4786 } 4787 4788 /** return character string into buffer for header line, moves buffer 4789 * past that line and puts zero terminator into linefeed-newline */ 4790 static char* 4791 http_header_line(sldns_buffer* buf) 4792 { 4793 char* result = (char*)sldns_buffer_current(buf); 4794 size_t i; 4795 for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) { 4796 /* terminate the string on the \r */ 4797 if((char)sldns_buffer_read_u8_at(buf, i) == '\r') 4798 sldns_buffer_write_u8_at(buf, i, 0); 4799 /* terminate on the \n and skip past the it and done */ 4800 if((char)sldns_buffer_read_u8_at(buf, i) == '\n') { 4801 sldns_buffer_write_u8_at(buf, i, 0); 4802 sldns_buffer_set_position(buf, i+1); 4803 return result; 4804 } 4805 } 4806 return NULL; 4807 } 4808 4809 /** move unread buffer to start and clear rest for putting the rest into it */ 4810 static void 4811 http_moveover_buffer(sldns_buffer* buf) 4812 { 4813 size_t pos = sldns_buffer_position(buf); 4814 size_t len = sldns_buffer_remaining(buf); 4815 sldns_buffer_clear(buf); 4816 memmove(sldns_buffer_begin(buf), sldns_buffer_at(buf, pos), len); 4817 sldns_buffer_set_position(buf, len); 4818 } 4819 4820 /** a http header is complete, process it */ 4821 static int 4822 http_process_initial_header(struct comm_point* c) 4823 { 4824 char* line = http_header_line(c->buffer); 4825 if(!line) return 1; 4826 verbose(VERB_ALGO, "http header: %s", line); 4827 if(strncasecmp(line, "HTTP/1.1 ", 9) == 0) { 4828 /* check returncode */ 4829 if(line[9] != '2') { 4830 verbose(VERB_ALGO, "http bad status %s", line+9); 4831 return 0; 4832 } 4833 } else if(strncasecmp(line, "Content-Length: ", 16) == 0) { 4834 if(!c->http_is_chunked) 4835 c->tcp_byte_count = (size_t)atoi(line+16); 4836 } else if(strncasecmp(line, "Transfer-Encoding: chunked", 19+7) == 0) { 4837 c->tcp_byte_count = 0; 4838 c->http_is_chunked = 1; 4839 } else if(line[0] == 0) { 4840 /* end of initial headers */ 4841 c->http_in_headers = 0; 4842 if(c->http_is_chunked) 4843 c->http_in_chunk_headers = 1; 4844 /* remove header text from front of buffer 4845 * the buffer is going to be used to return the data segment 4846 * itself and we don't want the header to get returned 4847 * prepended with it */ 4848 http_moveover_buffer(c->buffer); 4849 sldns_buffer_flip(c->buffer); 4850 return 1; 4851 } 4852 /* ignore other headers */ 4853 return 1; 4854 } 4855 4856 /** a chunk header is complete, process it, return 0=fail, 1=continue next 4857 * header line, 2=done with chunked transfer*/ 4858 static int 4859 http_process_chunk_header(struct comm_point* c) 4860 { 4861 char* line = http_header_line(c->buffer); 4862 if(!line) return 1; 4863 if(c->http_in_chunk_headers == 3) { 4864 verbose(VERB_ALGO, "http chunk trailer: %s", line); 4865 /* are we done ? */ 4866 if(line[0] == 0 && c->tcp_byte_count == 0) { 4867 /* callback of http reader when NETEVENT_DONE, 4868 * end of data, with no data in buffer */ 4869 sldns_buffer_set_position(c->buffer, 0); 4870 sldns_buffer_set_limit(c->buffer, 0); 4871 fptr_ok(fptr_whitelist_comm_point(c->callback)); 4872 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL); 4873 /* return that we are done */ 4874 return 2; 4875 } 4876 if(line[0] == 0) { 4877 /* continue with header of the next chunk */ 4878 c->http_in_chunk_headers = 1; 4879 /* remove header text from front of buffer */ 4880 http_moveover_buffer(c->buffer); 4881 sldns_buffer_flip(c->buffer); 4882 return 1; 4883 } 4884 /* ignore further trail headers */ 4885 return 1; 4886 } 4887 verbose(VERB_ALGO, "http chunk header: %s", line); 4888 if(c->http_in_chunk_headers == 1) { 4889 /* read chunked start line */ 4890 char* end = NULL; 4891 c->tcp_byte_count = (size_t)strtol(line, &end, 16); 4892 if(end == line) 4893 return 0; 4894 c->http_in_chunk_headers = 0; 4895 /* remove header text from front of buffer */ 4896 http_moveover_buffer(c->buffer); 4897 sldns_buffer_flip(c->buffer); 4898 if(c->tcp_byte_count == 0) { 4899 /* done with chunks, process chunk_trailer lines */ 4900 c->http_in_chunk_headers = 3; 4901 } 4902 return 1; 4903 } 4904 /* ignore other headers */ 4905 return 1; 4906 } 4907 4908 /** handle nonchunked data segment, 0=fail, 1=wait */ 4909 static int 4910 http_nonchunk_segment(struct comm_point* c) 4911 { 4912 /* c->buffer at position..limit has new data we read in. 4913 * the buffer itself is full of nonchunked data. 4914 * we are looking to read tcp_byte_count more data 4915 * and then the transfer is done. */ 4916 size_t remainbufferlen; 4917 size_t got_now = sldns_buffer_limit(c->buffer); 4918 if(c->tcp_byte_count <= got_now) { 4919 /* done, this is the last data fragment */ 4920 c->http_stored = 0; 4921 sldns_buffer_set_position(c->buffer, 0); 4922 fptr_ok(fptr_whitelist_comm_point(c->callback)); 4923 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL); 4924 return 1; 4925 } 4926 /* if we have the buffer space, 4927 * read more data collected into the buffer */ 4928 remainbufferlen = sldns_buffer_capacity(c->buffer) - 4929 sldns_buffer_limit(c->buffer); 4930 if(remainbufferlen+got_now >= c->tcp_byte_count || 4931 remainbufferlen >= (size_t)(c->ssl?16384:2048)) { 4932 size_t total = sldns_buffer_limit(c->buffer); 4933 sldns_buffer_clear(c->buffer); 4934 sldns_buffer_set_position(c->buffer, total); 4935 c->http_stored = total; 4936 /* return and wait to read more */ 4937 return 1; 4938 } 4939 /* call callback with this data amount, then 4940 * wait for more */ 4941 c->tcp_byte_count -= got_now; 4942 c->http_stored = 0; 4943 sldns_buffer_set_position(c->buffer, 0); 4944 fptr_ok(fptr_whitelist_comm_point(c->callback)); 4945 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL); 4946 /* c->callback has to buffer_clear(c->buffer). */ 4947 /* return and wait to read more */ 4948 return 1; 4949 } 4950 4951 /** handle chunked data segment, return 0=fail, 1=wait, 2=process more */ 4952 static int 4953 http_chunked_segment(struct comm_point* c) 4954 { 4955 /* the c->buffer has from position..limit new data we read. */ 4956 /* the current chunk has length tcp_byte_count. 4957 * once we read that read more chunk headers. 4958 */ 4959 size_t remainbufferlen; 4960 size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored; 4961 verbose(VERB_ALGO, "http_chunked_segment: got now %d, tcpbytcount %d, http_stored %d, buffer pos %d, buffer limit %d", (int)got_now, (int)c->tcp_byte_count, (int)c->http_stored, (int)sldns_buffer_position(c->buffer), (int)sldns_buffer_limit(c->buffer)); 4962 if(c->tcp_byte_count <= got_now) { 4963 /* the chunk has completed (with perhaps some extra data 4964 * from next chunk header and next chunk) */ 4965 /* save too much info into temp buffer */ 4966 size_t fraglen; 4967 struct comm_reply repinfo; 4968 c->http_stored = 0; 4969 sldns_buffer_skip(c->buffer, (ssize_t)c->tcp_byte_count); 4970 sldns_buffer_clear(c->http_temp); 4971 sldns_buffer_write(c->http_temp, 4972 sldns_buffer_current(c->buffer), 4973 sldns_buffer_remaining(c->buffer)); 4974 sldns_buffer_flip(c->http_temp); 4975 4976 /* callback with this fragment */ 4977 fraglen = sldns_buffer_position(c->buffer); 4978 sldns_buffer_set_position(c->buffer, 0); 4979 sldns_buffer_set_limit(c->buffer, fraglen); 4980 repinfo = c->repinfo; 4981 fptr_ok(fptr_whitelist_comm_point(c->callback)); 4982 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &repinfo); 4983 /* c->callback has to buffer_clear(). */ 4984 4985 /* is commpoint deleted? */ 4986 if(!repinfo.c) { 4987 return 1; 4988 } 4989 /* copy waiting info */ 4990 sldns_buffer_clear(c->buffer); 4991 sldns_buffer_write(c->buffer, 4992 sldns_buffer_begin(c->http_temp), 4993 sldns_buffer_remaining(c->http_temp)); 4994 sldns_buffer_flip(c->buffer); 4995 /* process end of chunk trailer header lines, until 4996 * an empty line */ 4997 c->http_in_chunk_headers = 3; 4998 /* process more data in buffer (if any) */ 4999 return 2; 5000 } 5001 c->tcp_byte_count -= got_now; 5002 5003 /* if we have the buffer space, 5004 * read more data collected into the buffer */ 5005 remainbufferlen = sldns_buffer_capacity(c->buffer) - 5006 sldns_buffer_limit(c->buffer); 5007 if(remainbufferlen >= c->tcp_byte_count || 5008 remainbufferlen >= 2048) { 5009 size_t total = sldns_buffer_limit(c->buffer); 5010 sldns_buffer_clear(c->buffer); 5011 sldns_buffer_set_position(c->buffer, total); 5012 c->http_stored = total; 5013 /* return and wait to read more */ 5014 return 1; 5015 } 5016 5017 /* callback of http reader for a new part of the data */ 5018 c->http_stored = 0; 5019 sldns_buffer_set_position(c->buffer, 0); 5020 fptr_ok(fptr_whitelist_comm_point(c->callback)); 5021 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL); 5022 /* c->callback has to buffer_clear(c->buffer). */ 5023 /* return and wait to read more */ 5024 return 1; 5025 } 5026 5027 #ifdef HAVE_NGHTTP2 5028 /** Create new http2 session. Called when creating handling comm point. */ 5029 static struct http2_session* http2_session_create(struct comm_point* c) 5030 { 5031 struct http2_session* session = calloc(1, sizeof(*session)); 5032 if(!session) { 5033 log_err("malloc failure while creating http2 session"); 5034 return NULL; 5035 } 5036 session->c = c; 5037 5038 return session; 5039 } 5040 #endif 5041 5042 /** Delete http2 session. After closing connection or on error */ 5043 static void http2_session_delete(struct http2_session* h2_session) 5044 { 5045 #ifdef HAVE_NGHTTP2 5046 if(h2_session->callbacks) 5047 nghttp2_session_callbacks_del(h2_session->callbacks); 5048 free(h2_session); 5049 #else 5050 (void)h2_session; 5051 #endif 5052 } 5053 5054 #ifdef HAVE_NGHTTP2 5055 struct http2_stream* http2_stream_create(int32_t stream_id) 5056 { 5057 struct http2_stream* h2_stream = calloc(1, sizeof(*h2_stream)); 5058 if(!h2_stream) { 5059 log_err("malloc failure while creating http2 stream"); 5060 return NULL; 5061 } 5062 h2_stream->stream_id = stream_id; 5063 return h2_stream; 5064 } 5065 5066 /** Delete http2 stream. After session delete or stream close callback */ 5067 static void http2_stream_delete(struct http2_session* h2_session, 5068 struct http2_stream* h2_stream) 5069 { 5070 if(h2_stream->mesh_state) { 5071 mesh_state_remove_reply(h2_stream->mesh, h2_stream->mesh_state, 5072 h2_session->c); 5073 h2_stream->mesh_state = NULL; 5074 } 5075 http2_req_stream_clear(h2_stream); 5076 free(h2_stream); 5077 } 5078 #endif 5079 5080 void http2_stream_add_meshstate(struct http2_stream* h2_stream, 5081 struct mesh_area* mesh, struct mesh_state* m) 5082 { 5083 h2_stream->mesh = mesh; 5084 h2_stream->mesh_state = m; 5085 } 5086 5087 void http2_stream_remove_mesh_state(struct http2_stream* h2_stream) 5088 { 5089 if(!h2_stream) 5090 return; 5091 h2_stream->mesh_state = NULL; 5092 } 5093 5094 /** delete http2 session server. After closing connection. */ 5095 static void http2_session_server_delete(struct http2_session* h2_session) 5096 { 5097 #ifdef HAVE_NGHTTP2 5098 struct http2_stream* h2_stream, *next; 5099 nghttp2_session_del(h2_session->session); /* NULL input is fine */ 5100 h2_session->session = NULL; 5101 for(h2_stream = h2_session->first_stream; h2_stream;) { 5102 next = h2_stream->next; 5103 http2_stream_delete(h2_session, h2_stream); 5104 h2_stream = next; 5105 } 5106 h2_session->first_stream = NULL; 5107 h2_session->is_drop = 0; 5108 h2_session->postpone_drop = 0; 5109 h2_session->c->h2_stream = NULL; 5110 #endif 5111 (void)h2_session; 5112 } 5113 5114 #ifdef HAVE_NGHTTP2 5115 void http2_session_add_stream(struct http2_session* h2_session, 5116 struct http2_stream* h2_stream) 5117 { 5118 if(h2_session->first_stream) 5119 h2_session->first_stream->prev = h2_stream; 5120 h2_stream->next = h2_session->first_stream; 5121 h2_session->first_stream = h2_stream; 5122 } 5123 5124 /** remove stream from session linked list. After stream close callback or 5125 * closing connection */ 5126 static void http2_session_remove_stream(struct http2_session* h2_session, 5127 struct http2_stream* h2_stream) 5128 { 5129 if(h2_stream->prev) 5130 h2_stream->prev->next = h2_stream->next; 5131 else 5132 h2_session->first_stream = h2_stream->next; 5133 if(h2_stream->next) 5134 h2_stream->next->prev = h2_stream->prev; 5135 5136 } 5137 5138 int http2_stream_close_cb(nghttp2_session* ATTR_UNUSED(session), 5139 int32_t stream_id, uint32_t ATTR_UNUSED(error_code), void* cb_arg) 5140 { 5141 struct http2_stream* h2_stream; 5142 struct http2_session* h2_session = (struct http2_session*)cb_arg; 5143 if(!(h2_stream = nghttp2_session_get_stream_user_data( 5144 h2_session->session, stream_id))) { 5145 return 0; 5146 } 5147 http2_session_remove_stream(h2_session, h2_stream); 5148 http2_stream_delete(h2_session, h2_stream); 5149 return 0; 5150 } 5151 5152 ssize_t http2_recv_cb(nghttp2_session* ATTR_UNUSED(session), uint8_t* buf, 5153 size_t len, int ATTR_UNUSED(flags), void* cb_arg) 5154 { 5155 struct http2_session* h2_session = (struct http2_session*)cb_arg; 5156 ssize_t ret; 5157 5158 log_assert(h2_session->c->type == comm_http); 5159 log_assert(h2_session->c->h2_session); 5160 5161 #ifdef HAVE_SSL 5162 if(h2_session->c->ssl) { 5163 int r; 5164 ERR_clear_error(); 5165 r = SSL_read(h2_session->c->ssl, buf, len); 5166 if(r <= 0) { 5167 int want = SSL_get_error(h2_session->c->ssl, r); 5168 if(want == SSL_ERROR_ZERO_RETURN) { 5169 return NGHTTP2_ERR_EOF; 5170 } else if(want == SSL_ERROR_WANT_READ) { 5171 return NGHTTP2_ERR_WOULDBLOCK; 5172 } else if(want == SSL_ERROR_WANT_WRITE) { 5173 h2_session->c->ssl_shake_state = comm_ssl_shake_hs_write; 5174 comm_point_listen_for_rw(h2_session->c, 0, 1); 5175 return NGHTTP2_ERR_WOULDBLOCK; 5176 } else if(want == SSL_ERROR_SYSCALL) { 5177 #ifdef ECONNRESET 5178 if(errno == ECONNRESET && verbosity < 2) 5179 return NGHTTP2_ERR_CALLBACK_FAILURE; 5180 #endif 5181 if(errno != 0) 5182 log_err("SSL_read syscall: %s", 5183 strerror(errno)); 5184 return NGHTTP2_ERR_CALLBACK_FAILURE; 5185 } 5186 log_crypto_err_io("could not SSL_read", want); 5187 return NGHTTP2_ERR_CALLBACK_FAILURE; 5188 } 5189 return r; 5190 } 5191 #endif /* HAVE_SSL */ 5192 5193 ret = recv(h2_session->c->fd, buf, len, MSG_DONTWAIT); 5194 if(ret == 0) { 5195 return NGHTTP2_ERR_EOF; 5196 } else if(ret < 0) { 5197 #ifndef USE_WINSOCK 5198 if(errno == EINTR || errno == EAGAIN) 5199 return NGHTTP2_ERR_WOULDBLOCK; 5200 #ifdef ECONNRESET 5201 if(errno == ECONNRESET && verbosity < 2) 5202 return NGHTTP2_ERR_CALLBACK_FAILURE; 5203 #endif 5204 log_err_addr("could not http2 recv: %s", strerror(errno), 5205 &h2_session->c->repinfo.remote_addr, 5206 h2_session->c->repinfo.remote_addrlen); 5207 #else /* USE_WINSOCK */ 5208 if(WSAGetLastError() == WSAECONNRESET) 5209 return NGHTTP2_ERR_CALLBACK_FAILURE; 5210 if(WSAGetLastError() == WSAEINPROGRESS) 5211 return NGHTTP2_ERR_WOULDBLOCK; 5212 if(WSAGetLastError() == WSAEWOULDBLOCK) { 5213 ub_winsock_tcp_wouldblock(h2_session->c->ev->ev, 5214 UB_EV_READ); 5215 return NGHTTP2_ERR_WOULDBLOCK; 5216 } 5217 log_err_addr("could not http2 recv: %s", 5218 wsa_strerror(WSAGetLastError()), 5219 &h2_session->c->repinfo.remote_addr, 5220 h2_session->c->repinfo.remote_addrlen); 5221 #endif 5222 return NGHTTP2_ERR_CALLBACK_FAILURE; 5223 } 5224 return ret; 5225 } 5226 #endif /* HAVE_NGHTTP2 */ 5227 5228 /** Handle http2 read */ 5229 static int 5230 comm_point_http2_handle_read(int ATTR_UNUSED(fd), struct comm_point* c) 5231 { 5232 #ifdef HAVE_NGHTTP2 5233 int ret; 5234 log_assert(c->h2_session); 5235 5236 /* reading until recv cb returns NGHTTP2_ERR_WOULDBLOCK */ 5237 ret = nghttp2_session_recv(c->h2_session->session); 5238 if(ret) { 5239 if(ret != NGHTTP2_ERR_EOF && 5240 ret != NGHTTP2_ERR_CALLBACK_FAILURE) { 5241 char a[256]; 5242 addr_to_str(&c->repinfo.remote_addr, 5243 c->repinfo.remote_addrlen, a, sizeof(a)); 5244 verbose(VERB_QUERY, "http2: session_recv from %s failed, " 5245 "error: %s", a, nghttp2_strerror(ret)); 5246 } 5247 return 0; 5248 } 5249 if(nghttp2_session_want_write(c->h2_session->session)) { 5250 c->tcp_is_reading = 0; 5251 comm_point_stop_listening(c); 5252 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c)); 5253 } else if(!nghttp2_session_want_read(c->h2_session->session)) 5254 return 0; /* connection can be closed */ 5255 return 1; 5256 #else 5257 (void)c; 5258 return 0; 5259 #endif 5260 } 5261 5262 /** 5263 * Handle http reading callback. 5264 * @param fd: file descriptor of socket. 5265 * @param c: comm point to read from into buffer. 5266 * @return: 0 on error 5267 */ 5268 static int 5269 comm_point_http_handle_read(int fd, struct comm_point* c) 5270 { 5271 log_assert(c->type == comm_http); 5272 log_assert(fd != -1); 5273 5274 /* if we are in ssl handshake, handle SSL handshake */ 5275 #ifdef HAVE_SSL 5276 if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) { 5277 if(!ssl_handshake(c)) 5278 return 0; 5279 if(c->ssl_shake_state != comm_ssl_shake_none) 5280 return 1; 5281 } 5282 #endif /* HAVE_SSL */ 5283 5284 if(!c->tcp_is_reading) 5285 return 1; 5286 5287 if(c->use_h2) { 5288 return comm_point_http2_handle_read(fd, c); 5289 } 5290 5291 /* http version is <= http/1.1 */ 5292 5293 if(c->http_min_version >= http_version_2) { 5294 /* HTTP/2 failed, not allowed to use lower version. */ 5295 return 0; 5296 } 5297 5298 /* read more data */ 5299 if(c->ssl) { 5300 if(!ssl_http_read_more(c)) 5301 return 0; 5302 } else { 5303 if(!http_read_more(fd, c)) 5304 return 0; 5305 } 5306 5307 if(c->http_stored >= sldns_buffer_position(c->buffer)) { 5308 /* read did not work but we wanted more data, there is 5309 * no bytes to process now. */ 5310 return 1; 5311 } 5312 sldns_buffer_flip(c->buffer); 5313 /* if we are partway in a segment of data, position us at the point 5314 * where we left off previously */ 5315 if(c->http_stored < sldns_buffer_limit(c->buffer)) 5316 sldns_buffer_set_position(c->buffer, c->http_stored); 5317 else sldns_buffer_set_position(c->buffer, sldns_buffer_limit(c->buffer)); 5318 5319 while(sldns_buffer_remaining(c->buffer) > 0) { 5320 /* Handle HTTP/1.x data */ 5321 /* if we are reading headers, read more headers */ 5322 if(c->http_in_headers || c->http_in_chunk_headers) { 5323 /* if header is done, process the header */ 5324 if(!http_header_done(c->buffer)) { 5325 /* copy remaining data to front of buffer 5326 * and set rest for writing into it */ 5327 http_moveover_buffer(c->buffer); 5328 /* return and wait to read more */ 5329 return 1; 5330 } 5331 if(!c->http_in_chunk_headers) { 5332 /* process initial headers */ 5333 if(!http_process_initial_header(c)) 5334 return 0; 5335 } else { 5336 /* process chunk headers */ 5337 int r = http_process_chunk_header(c); 5338 if(r == 0) return 0; 5339 if(r == 2) return 1; /* done */ 5340 /* r == 1, continue */ 5341 } 5342 /* see if we have more to process */ 5343 continue; 5344 } 5345 5346 if(!c->http_is_chunked) { 5347 /* if we are reading nonchunks, process that*/ 5348 return http_nonchunk_segment(c); 5349 } else { 5350 /* if we are reading chunks, read the chunk */ 5351 int r = http_chunked_segment(c); 5352 if(r == 0) return 0; 5353 if(r == 1) return 1; 5354 continue; 5355 } 5356 } 5357 /* broke out of the loop; could not process header instead need 5358 * to read more */ 5359 /* moveover any remaining data and read more data */ 5360 http_moveover_buffer(c->buffer); 5361 /* return and wait to read more */ 5362 return 1; 5363 } 5364 5365 /** check pending connect for http */ 5366 static int 5367 http_check_connect(int fd, struct comm_point* c) 5368 { 5369 /* check for pending error from nonblocking connect */ 5370 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/ 5371 int error = 0; 5372 socklen_t len = (socklen_t)sizeof(error); 5373 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 5374 &len) < 0){ 5375 #ifndef USE_WINSOCK 5376 error = errno; /* on solaris errno is error */ 5377 #else /* USE_WINSOCK */ 5378 error = WSAGetLastError(); 5379 #endif 5380 } 5381 #ifndef USE_WINSOCK 5382 #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 5383 if(error == EINPROGRESS || error == EWOULDBLOCK) 5384 return 1; /* try again later */ 5385 else 5386 #endif 5387 if(error != 0 && verbosity < 2) 5388 return 0; /* silence lots of chatter in the logs */ 5389 else if(error != 0) { 5390 log_err_addr("http connect", strerror(error), 5391 &c->repinfo.remote_addr, c->repinfo.remote_addrlen); 5392 #else /* USE_WINSOCK */ 5393 /* examine error */ 5394 if(error == WSAEINPROGRESS) 5395 return 1; 5396 else if(error == WSAEWOULDBLOCK) { 5397 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 5398 return 1; 5399 } else if(error != 0 && verbosity < 2) 5400 return 0; 5401 else if(error != 0) { 5402 log_err_addr("http connect", wsa_strerror(error), 5403 &c->repinfo.remote_addr, c->repinfo.remote_addrlen); 5404 #endif /* USE_WINSOCK */ 5405 return 0; 5406 } 5407 /* keep on processing this socket */ 5408 return 2; 5409 } 5410 5411 /** write more data for http (with ssl) */ 5412 static int 5413 ssl_http_write_more(struct comm_point* c) 5414 { 5415 #ifdef HAVE_SSL 5416 int r; 5417 log_assert(sldns_buffer_remaining(c->buffer) > 0); 5418 ERR_clear_error(); 5419 r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer), 5420 (int)sldns_buffer_remaining(c->buffer)); 5421 if(r <= 0) { 5422 int want = SSL_get_error(c->ssl, r); 5423 if(want == SSL_ERROR_ZERO_RETURN) { 5424 return 0; /* closed */ 5425 } else if(want == SSL_ERROR_WANT_READ) { 5426 c->ssl_shake_state = comm_ssl_shake_hs_read; 5427 comm_point_listen_for_rw(c, 1, 0); 5428 return 1; /* wait for read condition */ 5429 } else if(want == SSL_ERROR_WANT_WRITE) { 5430 return 1; /* write more later */ 5431 } else if(want == SSL_ERROR_SYSCALL) { 5432 #ifdef EPIPE 5433 if(errno == EPIPE && verbosity < 2) 5434 return 0; /* silence 'broken pipe' */ 5435 #endif 5436 if(errno != 0) 5437 log_err("SSL_write syscall: %s", 5438 strerror(errno)); 5439 return 0; 5440 } 5441 log_crypto_err_io("could not SSL_write", want); 5442 return 0; 5443 } 5444 sldns_buffer_skip(c->buffer, (ssize_t)r); 5445 return 1; 5446 #else 5447 (void)c; 5448 return 0; 5449 #endif /* HAVE_SSL */ 5450 } 5451 5452 /** write more data for http */ 5453 static int 5454 http_write_more(int fd, struct comm_point* c) 5455 { 5456 ssize_t r; 5457 log_assert(sldns_buffer_remaining(c->buffer) > 0); 5458 r = send(fd, (void*)sldns_buffer_current(c->buffer), 5459 sldns_buffer_remaining(c->buffer), 0); 5460 if(r == -1) { 5461 #ifndef USE_WINSOCK 5462 if(errno == EINTR || errno == EAGAIN) 5463 return 1; 5464 #else 5465 if(WSAGetLastError() == WSAEINPROGRESS) 5466 return 1; 5467 if(WSAGetLastError() == WSAEWOULDBLOCK) { 5468 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 5469 return 1; 5470 } 5471 #endif 5472 log_err_addr("http send r", sock_strerror(errno), 5473 &c->repinfo.remote_addr, c->repinfo.remote_addrlen); 5474 return 0; 5475 } 5476 sldns_buffer_skip(c->buffer, r); 5477 return 1; 5478 } 5479 5480 #ifdef HAVE_NGHTTP2 5481 ssize_t http2_send_cb(nghttp2_session* ATTR_UNUSED(session), const uint8_t* buf, 5482 size_t len, int ATTR_UNUSED(flags), void* cb_arg) 5483 { 5484 ssize_t ret; 5485 struct http2_session* h2_session = (struct http2_session*)cb_arg; 5486 log_assert(h2_session->c->type == comm_http); 5487 log_assert(h2_session->c->h2_session); 5488 5489 #ifdef HAVE_SSL 5490 if(h2_session->c->ssl) { 5491 int r; 5492 ERR_clear_error(); 5493 r = SSL_write(h2_session->c->ssl, buf, len); 5494 if(r <= 0) { 5495 int want = SSL_get_error(h2_session->c->ssl, r); 5496 if(want == SSL_ERROR_ZERO_RETURN) { 5497 return NGHTTP2_ERR_CALLBACK_FAILURE; 5498 } else if(want == SSL_ERROR_WANT_READ) { 5499 h2_session->c->ssl_shake_state = comm_ssl_shake_hs_read; 5500 comm_point_listen_for_rw(h2_session->c, 1, 0); 5501 return NGHTTP2_ERR_WOULDBLOCK; 5502 } else if(want == SSL_ERROR_WANT_WRITE) { 5503 return NGHTTP2_ERR_WOULDBLOCK; 5504 } else if(want == SSL_ERROR_SYSCALL) { 5505 #ifdef EPIPE 5506 if(errno == EPIPE && verbosity < 2) 5507 return NGHTTP2_ERR_CALLBACK_FAILURE; 5508 #endif 5509 if(errno != 0) 5510 log_err("SSL_write syscall: %s", 5511 strerror(errno)); 5512 return NGHTTP2_ERR_CALLBACK_FAILURE; 5513 } 5514 log_crypto_err_io("could not SSL_write", want); 5515 return NGHTTP2_ERR_CALLBACK_FAILURE; 5516 } 5517 return r; 5518 } 5519 #endif /* HAVE_SSL */ 5520 5521 ret = send(h2_session->c->fd, buf, len, 0); 5522 if(ret == 0) { 5523 return NGHTTP2_ERR_CALLBACK_FAILURE; 5524 } else if(ret < 0) { 5525 #ifndef USE_WINSOCK 5526 if(errno == EINTR || errno == EAGAIN) 5527 return NGHTTP2_ERR_WOULDBLOCK; 5528 #ifdef EPIPE 5529 if(errno == EPIPE && verbosity < 2) 5530 return NGHTTP2_ERR_CALLBACK_FAILURE; 5531 #endif 5532 #ifdef ECONNRESET 5533 if(errno == ECONNRESET && verbosity < 2) 5534 return NGHTTP2_ERR_CALLBACK_FAILURE; 5535 #endif 5536 log_err_addr("could not http2 write: %s", strerror(errno), 5537 &h2_session->c->repinfo.remote_addr, 5538 h2_session->c->repinfo.remote_addrlen); 5539 #else /* USE_WINSOCK */ 5540 if(WSAGetLastError() == WSAENOTCONN) 5541 return NGHTTP2_ERR_WOULDBLOCK; 5542 if(WSAGetLastError() == WSAEINPROGRESS) 5543 return NGHTTP2_ERR_WOULDBLOCK; 5544 if(WSAGetLastError() == WSAEWOULDBLOCK) { 5545 ub_winsock_tcp_wouldblock(h2_session->c->ev->ev, 5546 UB_EV_WRITE); 5547 return NGHTTP2_ERR_WOULDBLOCK; 5548 } 5549 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 5550 return NGHTTP2_ERR_CALLBACK_FAILURE; 5551 log_err_addr("could not http2 write: %s", 5552 wsa_strerror(WSAGetLastError()), 5553 &h2_session->c->repinfo.remote_addr, 5554 h2_session->c->repinfo.remote_addrlen); 5555 #endif 5556 return NGHTTP2_ERR_CALLBACK_FAILURE; 5557 } 5558 return ret; 5559 } 5560 #endif /* HAVE_NGHTTP2 */ 5561 5562 /** Handle http2 writing */ 5563 static int 5564 comm_point_http2_handle_write(int ATTR_UNUSED(fd), struct comm_point* c) 5565 { 5566 #ifdef HAVE_NGHTTP2 5567 int ret; 5568 log_assert(c->h2_session); 5569 5570 ret = nghttp2_session_send(c->h2_session->session); 5571 if(ret) { 5572 verbose(VERB_QUERY, "http2: session_send failed, " 5573 "error: %s", nghttp2_strerror(ret)); 5574 return 0; 5575 } 5576 5577 if(nghttp2_session_want_read(c->h2_session->session)) { 5578 c->tcp_is_reading = 1; 5579 comm_point_stop_listening(c); 5580 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c)); 5581 } else if(!nghttp2_session_want_write(c->h2_session->session)) 5582 return 0; /* connection can be closed */ 5583 return 1; 5584 #else 5585 (void)c; 5586 return 0; 5587 #endif 5588 } 5589 5590 /** 5591 * Handle http writing callback. 5592 * @param fd: file descriptor of socket. 5593 * @param c: comm point to write buffer out of. 5594 * @return: 0 on error 5595 */ 5596 static int 5597 comm_point_http_handle_write(int fd, struct comm_point* c) 5598 { 5599 log_assert(c->type == comm_http); 5600 log_assert(fd != -1); 5601 5602 /* check pending connect errors, if that fails, we wait for more, 5603 * or we can continue to write contents */ 5604 if(c->tcp_check_nb_connect) { 5605 int r = http_check_connect(fd, c); 5606 if(r == 0) return 0; 5607 if(r == 1) return 1; 5608 c->tcp_check_nb_connect = 0; 5609 } 5610 /* if we are in ssl handshake, handle SSL handshake */ 5611 #ifdef HAVE_SSL 5612 if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) { 5613 if(!ssl_handshake(c)) 5614 return 0; 5615 if(c->ssl_shake_state != comm_ssl_shake_none) 5616 return 1; 5617 } 5618 #endif /* HAVE_SSL */ 5619 if(c->tcp_is_reading) 5620 return 1; 5621 5622 if(c->use_h2) { 5623 return comm_point_http2_handle_write(fd, c); 5624 } 5625 5626 /* http version is <= http/1.1 */ 5627 5628 if(c->http_min_version >= http_version_2) { 5629 /* HTTP/2 failed, not allowed to use lower version. */ 5630 return 0; 5631 } 5632 5633 /* if we are writing, write more */ 5634 if(c->ssl) { 5635 if(!ssl_http_write_more(c)) 5636 return 0; 5637 } else { 5638 if(!http_write_more(fd, c)) 5639 return 0; 5640 } 5641 5642 /* we write a single buffer contents, that can contain 5643 * the http request, and then flip to read the results */ 5644 /* see if write is done */ 5645 if(sldns_buffer_remaining(c->buffer) == 0) { 5646 sldns_buffer_clear(c->buffer); 5647 if(c->tcp_do_toggle_rw) 5648 c->tcp_is_reading = 1; 5649 c->tcp_byte_count = 0; 5650 /* switch from listening(write) to listening(read) */ 5651 comm_point_stop_listening(c); 5652 comm_point_start_listening(c, -1, -1); 5653 } 5654 return 1; 5655 } 5656 5657 void 5658 comm_point_http_handle_callback(int fd, short event, void* arg) 5659 { 5660 struct comm_point* c = (struct comm_point*)arg; 5661 log_assert(c->type == comm_http); 5662 ub_comm_base_now(c->ev->base); 5663 5664 if(event&UB_EV_TIMEOUT) { 5665 verbose(VERB_QUERY, "http took too long, dropped"); 5666 reclaim_http_handler(c); 5667 if(!c->tcp_do_close) { 5668 fptr_ok(fptr_whitelist_comm_point(c->callback)); 5669 (void)(*c->callback)(c, c->cb_arg, 5670 NETEVENT_TIMEOUT, NULL); 5671 } 5672 return; 5673 } 5674 if(event&UB_EV_READ) { 5675 if(!comm_point_http_handle_read(fd, c)) { 5676 reclaim_http_handler(c); 5677 if(!c->tcp_do_close) { 5678 fptr_ok(fptr_whitelist_comm_point( 5679 c->callback)); 5680 (void)(*c->callback)(c, c->cb_arg, 5681 NETEVENT_CLOSED, NULL); 5682 } 5683 } 5684 return; 5685 } 5686 if(event&UB_EV_WRITE) { 5687 if(!comm_point_http_handle_write(fd, c)) { 5688 reclaim_http_handler(c); 5689 if(!c->tcp_do_close) { 5690 fptr_ok(fptr_whitelist_comm_point( 5691 c->callback)); 5692 (void)(*c->callback)(c, c->cb_arg, 5693 NETEVENT_CLOSED, NULL); 5694 } 5695 } 5696 return; 5697 } 5698 log_err("Ignored event %d for httphdl.", event); 5699 } 5700 5701 void comm_point_local_handle_callback(int fd, short event, void* arg) 5702 { 5703 struct comm_point* c = (struct comm_point*)arg; 5704 log_assert(c->type == comm_local); 5705 ub_comm_base_now(c->ev->base); 5706 5707 if(event&UB_EV_READ) { 5708 if(!comm_point_tcp_handle_read(fd, c, 1)) { 5709 fptr_ok(fptr_whitelist_comm_point(c->callback)); 5710 (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED, 5711 NULL); 5712 } 5713 return; 5714 } 5715 log_err("Ignored event %d for localhdl.", event); 5716 } 5717 5718 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd), 5719 short event, void* arg) 5720 { 5721 struct comm_point* c = (struct comm_point*)arg; 5722 int err = NETEVENT_NOERROR; 5723 log_assert(c->type == comm_raw); 5724 ub_comm_base_now(c->ev->base); 5725 5726 if(event&UB_EV_TIMEOUT) 5727 err = NETEVENT_TIMEOUT; 5728 fptr_ok(fptr_whitelist_comm_point_raw(c->callback)); 5729 (void)(*c->callback)(c, c->cb_arg, err, NULL); 5730 } 5731 5732 struct comm_point* 5733 comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer, 5734 int pp2_enabled, comm_point_callback_type* callback, 5735 void* callback_arg, struct unbound_socket* socket) 5736 { 5737 struct comm_point* c = (struct comm_point*)calloc(1, 5738 sizeof(struct comm_point)); 5739 short evbits; 5740 if(!c) 5741 return NULL; 5742 c->ev = (struct internal_event*)calloc(1, 5743 sizeof(struct internal_event)); 5744 if(!c->ev) { 5745 free(c); 5746 return NULL; 5747 } 5748 c->ev->base = base; 5749 c->fd = fd; 5750 c->buffer = buffer; 5751 c->timeout = NULL; 5752 c->tcp_is_reading = 0; 5753 c->tcp_byte_count = 0; 5754 c->tcp_parent = NULL; 5755 c->max_tcp_count = 0; 5756 c->cur_tcp_count = 0; 5757 c->tcp_handlers = NULL; 5758 c->tcp_free = NULL; 5759 c->type = comm_udp; 5760 c->tcp_do_close = 0; 5761 c->do_not_close = 0; 5762 c->tcp_do_toggle_rw = 0; 5763 c->tcp_check_nb_connect = 0; 5764 #ifdef USE_MSG_FASTOPEN 5765 c->tcp_do_fastopen = 0; 5766 #endif 5767 #ifdef USE_DNSCRYPT 5768 c->dnscrypt = 0; 5769 c->dnscrypt_buffer = buffer; 5770 #endif 5771 c->inuse = 0; 5772 c->callback = callback; 5773 c->cb_arg = callback_arg; 5774 c->socket = socket; 5775 c->pp2_enabled = pp2_enabled; 5776 c->pp2_header_state = pp2_header_none; 5777 evbits = UB_EV_READ | UB_EV_PERSIST; 5778 /* ub_event stuff */ 5779 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 5780 comm_point_udp_callback, c); 5781 if(c->ev->ev == NULL) { 5782 log_err("could not baseset udp event"); 5783 comm_point_delete(c); 5784 return NULL; 5785 } 5786 if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) { 5787 log_err("could not add udp event"); 5788 comm_point_delete(c); 5789 return NULL; 5790 } 5791 c->event_added = 1; 5792 return c; 5793 } 5794 5795 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG) 5796 struct comm_point* 5797 comm_point_create_udp_ancil(struct comm_base *base, int fd, 5798 sldns_buffer* buffer, int pp2_enabled, 5799 comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket) 5800 { 5801 struct comm_point* c = (struct comm_point*)calloc(1, 5802 sizeof(struct comm_point)); 5803 short evbits; 5804 if(!c) 5805 return NULL; 5806 c->ev = (struct internal_event*)calloc(1, 5807 sizeof(struct internal_event)); 5808 if(!c->ev) { 5809 free(c); 5810 return NULL; 5811 } 5812 c->ev->base = base; 5813 c->fd = fd; 5814 c->buffer = buffer; 5815 c->timeout = NULL; 5816 c->tcp_is_reading = 0; 5817 c->tcp_byte_count = 0; 5818 c->tcp_parent = NULL; 5819 c->max_tcp_count = 0; 5820 c->cur_tcp_count = 0; 5821 c->tcp_handlers = NULL; 5822 c->tcp_free = NULL; 5823 c->type = comm_udp; 5824 c->tcp_do_close = 0; 5825 c->do_not_close = 0; 5826 #ifdef USE_DNSCRYPT 5827 c->dnscrypt = 0; 5828 c->dnscrypt_buffer = buffer; 5829 #endif 5830 c->inuse = 0; 5831 c->tcp_do_toggle_rw = 0; 5832 c->tcp_check_nb_connect = 0; 5833 #ifdef USE_MSG_FASTOPEN 5834 c->tcp_do_fastopen = 0; 5835 #endif 5836 c->callback = callback; 5837 c->cb_arg = callback_arg; 5838 c->socket = socket; 5839 c->pp2_enabled = pp2_enabled; 5840 c->pp2_header_state = pp2_header_none; 5841 evbits = UB_EV_READ | UB_EV_PERSIST; 5842 /* ub_event stuff */ 5843 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 5844 comm_point_udp_ancil_callback, c); 5845 if(c->ev->ev == NULL) { 5846 log_err("could not baseset udp event"); 5847 comm_point_delete(c); 5848 return NULL; 5849 } 5850 if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) { 5851 log_err("could not add udp event"); 5852 comm_point_delete(c); 5853 return NULL; 5854 } 5855 c->event_added = 1; 5856 return c; 5857 } 5858 #endif 5859 5860 struct comm_point* 5861 comm_point_create_doq(struct comm_base *base, int fd, sldns_buffer* buffer, 5862 comm_point_callback_type* callback, void* callback_arg, 5863 struct unbound_socket* socket, struct doq_table* table, 5864 struct ub_randstate* rnd, const char* ssl_service_key, 5865 const char* ssl_service_pem, struct config_file* cfg) 5866 { 5867 #ifdef HAVE_NGTCP2 5868 struct comm_point* c = (struct comm_point*)calloc(1, 5869 sizeof(struct comm_point)); 5870 short evbits; 5871 if(!c) 5872 return NULL; 5873 c->ev = (struct internal_event*)calloc(1, 5874 sizeof(struct internal_event)); 5875 if(!c->ev) { 5876 free(c); 5877 return NULL; 5878 } 5879 c->ev->base = base; 5880 c->fd = fd; 5881 c->buffer = buffer; 5882 c->timeout = NULL; 5883 c->tcp_is_reading = 0; 5884 c->tcp_byte_count = 0; 5885 c->tcp_parent = NULL; 5886 c->max_tcp_count = 0; 5887 c->cur_tcp_count = 0; 5888 c->tcp_handlers = NULL; 5889 c->tcp_free = NULL; 5890 c->type = comm_doq; 5891 c->tcp_do_close = 0; 5892 c->do_not_close = 0; 5893 c->tcp_do_toggle_rw = 0; 5894 c->tcp_check_nb_connect = 0; 5895 #ifdef USE_MSG_FASTOPEN 5896 c->tcp_do_fastopen = 0; 5897 #endif 5898 #ifdef USE_DNSCRYPT 5899 c->dnscrypt = 0; 5900 c->dnscrypt_buffer = NULL; 5901 #endif 5902 #ifdef HAVE_NGTCP2 5903 c->doq_socket = doq_server_socket_create(table, rnd, ssl_service_key, 5904 ssl_service_pem, c, base, cfg); 5905 if(!c->doq_socket) { 5906 log_err("could not create doq comm_point"); 5907 comm_point_delete(c); 5908 return NULL; 5909 } 5910 #endif 5911 c->inuse = 0; 5912 c->callback = callback; 5913 c->cb_arg = callback_arg; 5914 c->socket = socket; 5915 c->pp2_enabled = 0; 5916 c->pp2_header_state = pp2_header_none; 5917 evbits = UB_EV_READ | UB_EV_PERSIST; 5918 /* ub_event stuff */ 5919 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 5920 comm_point_doq_callback, c); 5921 if(c->ev->ev == NULL) { 5922 log_err("could not baseset udp event"); 5923 comm_point_delete(c); 5924 return NULL; 5925 } 5926 if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) { 5927 log_err("could not add udp event"); 5928 comm_point_delete(c); 5929 return NULL; 5930 } 5931 c->event_added = 1; 5932 return c; 5933 #else 5934 /* no libngtcp2, so no QUIC support */ 5935 (void)base; 5936 (void)buffer; 5937 (void)callback; 5938 (void)callback_arg; 5939 (void)socket; 5940 (void)rnd; 5941 (void)table; 5942 (void)ssl_service_key; 5943 (void)ssl_service_pem; 5944 (void)cfg; 5945 sock_close(fd); 5946 return NULL; 5947 #endif /* HAVE_NGTCP2 */ 5948 } 5949 5950 static struct comm_point* 5951 comm_point_create_tcp_handler(struct comm_base *base, 5952 struct comm_point* parent, size_t bufsize, 5953 struct sldns_buffer* spoolbuf, comm_point_callback_type* callback, 5954 void* callback_arg, struct unbound_socket* socket) 5955 { 5956 struct comm_point* c = (struct comm_point*)calloc(1, 5957 sizeof(struct comm_point)); 5958 short evbits; 5959 if(!c) 5960 return NULL; 5961 c->ev = (struct internal_event*)calloc(1, 5962 sizeof(struct internal_event)); 5963 if(!c->ev) { 5964 free(c); 5965 return NULL; 5966 } 5967 c->ev->base = base; 5968 c->fd = -1; 5969 c->buffer = sldns_buffer_new(bufsize); 5970 if(!c->buffer) { 5971 free(c->ev); 5972 free(c); 5973 return NULL; 5974 } 5975 c->timeout = (struct timeval*)malloc(sizeof(struct timeval)); 5976 if(!c->timeout) { 5977 sldns_buffer_free(c->buffer); 5978 free(c->ev); 5979 free(c); 5980 return NULL; 5981 } 5982 c->tcp_is_reading = 0; 5983 c->tcp_byte_count = 0; 5984 c->tcp_parent = parent; 5985 c->tcp_timeout_msec = parent->tcp_timeout_msec; 5986 c->tcp_conn_limit = parent->tcp_conn_limit; 5987 c->tcl_addr = NULL; 5988 c->tcp_keepalive = 0; 5989 c->max_tcp_count = 0; 5990 c->cur_tcp_count = 0; 5991 c->tcp_handlers = NULL; 5992 c->tcp_free = NULL; 5993 c->type = comm_tcp; 5994 c->tcp_do_close = 0; 5995 c->do_not_close = 0; 5996 c->tcp_do_toggle_rw = 1; 5997 c->tcp_check_nb_connect = 0; 5998 #ifdef USE_MSG_FASTOPEN 5999 c->tcp_do_fastopen = 0; 6000 #endif 6001 #ifdef USE_DNSCRYPT 6002 c->dnscrypt = 0; 6003 /* We don't know just yet if this is a dnscrypt channel. Allocation 6004 * will be done when handling the callback. */ 6005 c->dnscrypt_buffer = c->buffer; 6006 #endif 6007 c->repinfo.c = c; 6008 c->callback = callback; 6009 c->cb_arg = callback_arg; 6010 c->socket = socket; 6011 c->pp2_enabled = parent->pp2_enabled; 6012 c->pp2_header_state = pp2_header_none; 6013 if(spoolbuf) { 6014 c->tcp_req_info = tcp_req_info_create(spoolbuf); 6015 if(!c->tcp_req_info) { 6016 log_err("could not create tcp commpoint"); 6017 sldns_buffer_free(c->buffer); 6018 free(c->timeout); 6019 free(c->ev); 6020 free(c); 6021 return NULL; 6022 } 6023 c->tcp_req_info->cp = c; 6024 c->tcp_do_close = 1; 6025 c->tcp_do_toggle_rw = 0; 6026 } 6027 /* add to parent free list */ 6028 c->tcp_free = parent->tcp_free; 6029 parent->tcp_free = c; 6030 /* ub_event stuff */ 6031 evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT; 6032 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 6033 comm_point_tcp_handle_callback, c); 6034 if(c->ev->ev == NULL) 6035 { 6036 log_err("could not basetset tcphdl event"); 6037 parent->tcp_free = c->tcp_free; 6038 tcp_req_info_delete(c->tcp_req_info); 6039 sldns_buffer_free(c->buffer); 6040 free(c->timeout); 6041 free(c->ev); 6042 free(c); 6043 return NULL; 6044 } 6045 return c; 6046 } 6047 6048 static struct comm_point* 6049 comm_point_create_http_handler(struct comm_base *base, 6050 struct comm_point* parent, size_t bufsize, int harden_large_queries, 6051 uint32_t http_max_streams, char* http_endpoint, 6052 comm_point_callback_type* callback, void* callback_arg, 6053 struct unbound_socket* socket) 6054 { 6055 struct comm_point* c = (struct comm_point*)calloc(1, 6056 sizeof(struct comm_point)); 6057 short evbits; 6058 if(!c) 6059 return NULL; 6060 c->ev = (struct internal_event*)calloc(1, 6061 sizeof(struct internal_event)); 6062 if(!c->ev) { 6063 free(c); 6064 return NULL; 6065 } 6066 c->ev->base = base; 6067 c->fd = -1; 6068 c->buffer = sldns_buffer_new(bufsize); 6069 if(!c->buffer) { 6070 free(c->ev); 6071 free(c); 6072 return NULL; 6073 } 6074 c->timeout = (struct timeval*)malloc(sizeof(struct timeval)); 6075 if(!c->timeout) { 6076 sldns_buffer_free(c->buffer); 6077 free(c->ev); 6078 free(c); 6079 return NULL; 6080 } 6081 c->tcp_is_reading = 0; 6082 c->tcp_byte_count = 0; 6083 c->tcp_parent = parent; 6084 c->tcp_timeout_msec = parent->tcp_timeout_msec; 6085 c->tcp_conn_limit = parent->tcp_conn_limit; 6086 c->tcl_addr = NULL; 6087 c->tcp_keepalive = 0; 6088 c->max_tcp_count = 0; 6089 c->cur_tcp_count = 0; 6090 c->tcp_handlers = NULL; 6091 c->tcp_free = NULL; 6092 c->type = comm_http; 6093 c->tcp_do_close = 1; 6094 c->do_not_close = 0; 6095 c->tcp_do_toggle_rw = 1; /* will be set to 0 after http2 upgrade */ 6096 c->tcp_check_nb_connect = 0; 6097 #ifdef USE_MSG_FASTOPEN 6098 c->tcp_do_fastopen = 0; 6099 #endif 6100 #ifdef USE_DNSCRYPT 6101 c->dnscrypt = 0; 6102 c->dnscrypt_buffer = NULL; 6103 #endif 6104 c->repinfo.c = c; 6105 c->callback = callback; 6106 c->cb_arg = callback_arg; 6107 c->socket = socket; 6108 c->pp2_enabled = 0; 6109 c->pp2_header_state = pp2_header_none; 6110 6111 c->http_min_version = http_version_2; 6112 c->http2_stream_max_qbuffer_size = bufsize; 6113 if(harden_large_queries && bufsize > 512) 6114 c->http2_stream_max_qbuffer_size = 512; 6115 c->http2_max_streams = http_max_streams; 6116 if(!(c->http_endpoint = strdup(http_endpoint))) { 6117 log_err("could not strdup http_endpoint"); 6118 sldns_buffer_free(c->buffer); 6119 free(c->timeout); 6120 free(c->ev); 6121 free(c); 6122 return NULL; 6123 } 6124 c->use_h2 = 0; 6125 #ifdef HAVE_NGHTTP2 6126 if(!(c->h2_session = http2_session_create(c))) { 6127 log_err("could not create http2 session"); 6128 free(c->http_endpoint); 6129 sldns_buffer_free(c->buffer); 6130 free(c->timeout); 6131 free(c->ev); 6132 free(c); 6133 return NULL; 6134 } 6135 if(!(c->h2_session->callbacks = http2_req_callbacks_create())) { 6136 log_err("could not create http2 callbacks"); 6137 http2_session_delete(c->h2_session); 6138 free(c->http_endpoint); 6139 sldns_buffer_free(c->buffer); 6140 free(c->timeout); 6141 free(c->ev); 6142 free(c); 6143 return NULL; 6144 } 6145 #endif 6146 6147 /* add to parent free list */ 6148 c->tcp_free = parent->tcp_free; 6149 parent->tcp_free = c; 6150 /* ub_event stuff */ 6151 evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT; 6152 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 6153 comm_point_http_handle_callback, c); 6154 if(c->ev->ev == NULL) 6155 { 6156 log_err("could not set http handler event"); 6157 parent->tcp_free = c->tcp_free; 6158 http2_session_delete(c->h2_session); 6159 sldns_buffer_free(c->buffer); 6160 free(c->timeout); 6161 free(c->ev); 6162 free(c); 6163 return NULL; 6164 } 6165 return c; 6166 } 6167 6168 struct comm_point* 6169 comm_point_create_tcp(struct comm_base *base, int fd, int num, 6170 int idle_timeout, int harden_large_queries, 6171 uint32_t http_max_streams, char* http_endpoint, 6172 struct tcl_list* tcp_conn_limit, size_t bufsize, 6173 struct sldns_buffer* spoolbuf, enum listen_type port_type, 6174 int pp2_enabled, comm_point_callback_type* callback, 6175 void* callback_arg, struct unbound_socket* socket) 6176 { 6177 struct comm_point* c = (struct comm_point*)calloc(1, 6178 sizeof(struct comm_point)); 6179 short evbits; 6180 int i; 6181 /* first allocate the TCP accept listener */ 6182 if(!c) 6183 return NULL; 6184 c->ev = (struct internal_event*)calloc(1, 6185 sizeof(struct internal_event)); 6186 if(!c->ev) { 6187 free(c); 6188 return NULL; 6189 } 6190 c->ev->base = base; 6191 c->fd = fd; 6192 c->buffer = NULL; 6193 c->timeout = NULL; 6194 c->tcp_is_reading = 0; 6195 c->tcp_byte_count = 0; 6196 c->tcp_timeout_msec = idle_timeout; 6197 c->tcp_conn_limit = tcp_conn_limit; 6198 c->tcl_addr = NULL; 6199 c->tcp_keepalive = 0; 6200 c->tcp_parent = NULL; 6201 c->max_tcp_count = num; 6202 c->cur_tcp_count = 0; 6203 c->tcp_handlers = (struct comm_point**)calloc((size_t)num, 6204 sizeof(struct comm_point*)); 6205 if(!c->tcp_handlers) { 6206 free(c->ev); 6207 free(c); 6208 return NULL; 6209 } 6210 c->tcp_free = NULL; 6211 c->type = comm_tcp_accept; 6212 c->tcp_do_close = 0; 6213 c->do_not_close = 0; 6214 c->tcp_do_toggle_rw = 0; 6215 c->tcp_check_nb_connect = 0; 6216 #ifdef USE_MSG_FASTOPEN 6217 c->tcp_do_fastopen = 0; 6218 #endif 6219 #ifdef USE_DNSCRYPT 6220 c->dnscrypt = 0; 6221 c->dnscrypt_buffer = NULL; 6222 #endif 6223 c->callback = NULL; 6224 c->cb_arg = NULL; 6225 c->socket = socket; 6226 c->pp2_enabled = (port_type==listen_type_http?0:pp2_enabled); 6227 c->pp2_header_state = pp2_header_none; 6228 evbits = UB_EV_READ | UB_EV_PERSIST; 6229 /* ub_event stuff */ 6230 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 6231 comm_point_tcp_accept_callback, c); 6232 if(c->ev->ev == NULL) { 6233 log_err("could not baseset tcpacc event"); 6234 comm_point_delete(c); 6235 return NULL; 6236 } 6237 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 6238 log_err("could not add tcpacc event"); 6239 comm_point_delete(c); 6240 return NULL; 6241 } 6242 c->event_added = 1; 6243 /* now prealloc the handlers */ 6244 for(i=0; i<num; i++) { 6245 if(port_type == listen_type_tcp || 6246 port_type == listen_type_ssl || 6247 port_type == listen_type_tcp_dnscrypt) { 6248 c->tcp_handlers[i] = comm_point_create_tcp_handler(base, 6249 c, bufsize, spoolbuf, callback, callback_arg, socket); 6250 } else if(port_type == listen_type_http) { 6251 c->tcp_handlers[i] = comm_point_create_http_handler( 6252 base, c, bufsize, harden_large_queries, 6253 http_max_streams, http_endpoint, 6254 callback, callback_arg, socket); 6255 } 6256 else { 6257 log_err("could not create tcp handler, unknown listen " 6258 "type"); 6259 return NULL; 6260 } 6261 if(!c->tcp_handlers[i]) { 6262 comm_point_delete(c); 6263 return NULL; 6264 } 6265 } 6266 6267 return c; 6268 } 6269 6270 struct comm_point* 6271 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize, 6272 comm_point_callback_type* callback, void* callback_arg) 6273 { 6274 struct comm_point* c = (struct comm_point*)calloc(1, 6275 sizeof(struct comm_point)); 6276 short evbits; 6277 if(!c) 6278 return NULL; 6279 c->ev = (struct internal_event*)calloc(1, 6280 sizeof(struct internal_event)); 6281 if(!c->ev) { 6282 free(c); 6283 return NULL; 6284 } 6285 c->ev->base = base; 6286 c->fd = -1; 6287 c->buffer = sldns_buffer_new(bufsize); 6288 if(!c->buffer) { 6289 free(c->ev); 6290 free(c); 6291 return NULL; 6292 } 6293 c->timeout = NULL; 6294 c->tcp_is_reading = 0; 6295 c->tcp_byte_count = 0; 6296 c->tcp_timeout_msec = TCP_QUERY_TIMEOUT; 6297 c->tcp_conn_limit = NULL; 6298 c->tcl_addr = NULL; 6299 c->tcp_keepalive = 0; 6300 c->tcp_parent = NULL; 6301 c->max_tcp_count = 0; 6302 c->cur_tcp_count = 0; 6303 c->tcp_handlers = NULL; 6304 c->tcp_free = NULL; 6305 c->type = comm_tcp; 6306 c->tcp_do_close = 0; 6307 c->do_not_close = 0; 6308 c->tcp_do_toggle_rw = 1; 6309 c->tcp_check_nb_connect = 1; 6310 #ifdef USE_MSG_FASTOPEN 6311 c->tcp_do_fastopen = 1; 6312 #endif 6313 #ifdef USE_DNSCRYPT 6314 c->dnscrypt = 0; 6315 c->dnscrypt_buffer = c->buffer; 6316 #endif 6317 c->repinfo.c = c; 6318 c->callback = callback; 6319 c->cb_arg = callback_arg; 6320 c->pp2_enabled = 0; 6321 c->pp2_header_state = pp2_header_none; 6322 evbits = UB_EV_PERSIST | UB_EV_WRITE; 6323 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 6324 comm_point_tcp_handle_callback, c); 6325 if(c->ev->ev == NULL) 6326 { 6327 log_err("could not baseset tcpout event"); 6328 sldns_buffer_free(c->buffer); 6329 free(c->ev); 6330 free(c); 6331 return NULL; 6332 } 6333 6334 return c; 6335 } 6336 6337 struct comm_point* 6338 comm_point_create_http_out(struct comm_base *base, size_t bufsize, 6339 comm_point_callback_type* callback, void* callback_arg, 6340 sldns_buffer* temp) 6341 { 6342 struct comm_point* c = (struct comm_point*)calloc(1, 6343 sizeof(struct comm_point)); 6344 short evbits; 6345 if(!c) 6346 return NULL; 6347 c->ev = (struct internal_event*)calloc(1, 6348 sizeof(struct internal_event)); 6349 if(!c->ev) { 6350 free(c); 6351 return NULL; 6352 } 6353 c->ev->base = base; 6354 c->fd = -1; 6355 c->buffer = sldns_buffer_new(bufsize); 6356 if(!c->buffer) { 6357 free(c->ev); 6358 free(c); 6359 return NULL; 6360 } 6361 c->timeout = NULL; 6362 c->tcp_is_reading = 0; 6363 c->tcp_byte_count = 0; 6364 c->tcp_parent = NULL; 6365 c->max_tcp_count = 0; 6366 c->cur_tcp_count = 0; 6367 c->tcp_handlers = NULL; 6368 c->tcp_free = NULL; 6369 c->type = comm_http; 6370 c->tcp_do_close = 0; 6371 c->do_not_close = 0; 6372 c->tcp_do_toggle_rw = 1; 6373 c->tcp_check_nb_connect = 1; 6374 c->http_in_headers = 1; 6375 c->http_in_chunk_headers = 0; 6376 c->http_is_chunked = 0; 6377 c->http_temp = temp; 6378 #ifdef USE_MSG_FASTOPEN 6379 c->tcp_do_fastopen = 1; 6380 #endif 6381 #ifdef USE_DNSCRYPT 6382 c->dnscrypt = 0; 6383 c->dnscrypt_buffer = c->buffer; 6384 #endif 6385 c->repinfo.c = c; 6386 c->callback = callback; 6387 c->cb_arg = callback_arg; 6388 c->pp2_enabled = 0; 6389 c->pp2_header_state = pp2_header_none; 6390 evbits = UB_EV_PERSIST | UB_EV_WRITE; 6391 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 6392 comm_point_http_handle_callback, c); 6393 if(c->ev->ev == NULL) 6394 { 6395 log_err("could not baseset tcpout event"); 6396 #ifdef HAVE_SSL 6397 SSL_free(c->ssl); 6398 #endif 6399 sldns_buffer_free(c->buffer); 6400 free(c->ev); 6401 free(c); 6402 return NULL; 6403 } 6404 6405 return c; 6406 } 6407 6408 struct comm_point* 6409 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize, 6410 comm_point_callback_type* callback, void* callback_arg) 6411 { 6412 struct comm_point* c = (struct comm_point*)calloc(1, 6413 sizeof(struct comm_point)); 6414 short evbits; 6415 if(!c) 6416 return NULL; 6417 c->ev = (struct internal_event*)calloc(1, 6418 sizeof(struct internal_event)); 6419 if(!c->ev) { 6420 free(c); 6421 return NULL; 6422 } 6423 c->ev->base = base; 6424 c->fd = fd; 6425 c->buffer = sldns_buffer_new(bufsize); 6426 if(!c->buffer) { 6427 free(c->ev); 6428 free(c); 6429 return NULL; 6430 } 6431 c->timeout = NULL; 6432 c->tcp_is_reading = 1; 6433 c->tcp_byte_count = 0; 6434 c->tcp_parent = NULL; 6435 c->max_tcp_count = 0; 6436 c->cur_tcp_count = 0; 6437 c->tcp_handlers = NULL; 6438 c->tcp_free = NULL; 6439 c->type = comm_local; 6440 c->tcp_do_close = 0; 6441 c->do_not_close = 1; 6442 c->tcp_do_toggle_rw = 0; 6443 c->tcp_check_nb_connect = 0; 6444 #ifdef USE_MSG_FASTOPEN 6445 c->tcp_do_fastopen = 0; 6446 #endif 6447 #ifdef USE_DNSCRYPT 6448 c->dnscrypt = 0; 6449 c->dnscrypt_buffer = c->buffer; 6450 #endif 6451 c->callback = callback; 6452 c->cb_arg = callback_arg; 6453 c->pp2_enabled = 0; 6454 c->pp2_header_state = pp2_header_none; 6455 /* ub_event stuff */ 6456 evbits = UB_EV_PERSIST | UB_EV_READ; 6457 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 6458 comm_point_local_handle_callback, c); 6459 if(c->ev->ev == NULL) { 6460 log_err("could not baseset localhdl event"); 6461 free(c->ev); 6462 free(c); 6463 return NULL; 6464 } 6465 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 6466 log_err("could not add localhdl event"); 6467 ub_event_free(c->ev->ev); 6468 free(c->ev); 6469 free(c); 6470 return NULL; 6471 } 6472 c->event_added = 1; 6473 return c; 6474 } 6475 6476 struct comm_point* 6477 comm_point_create_raw(struct comm_base* base, int fd, int writing, 6478 comm_point_callback_type* callback, void* callback_arg) 6479 { 6480 struct comm_point* c = (struct comm_point*)calloc(1, 6481 sizeof(struct comm_point)); 6482 short evbits; 6483 if(!c) 6484 return NULL; 6485 c->ev = (struct internal_event*)calloc(1, 6486 sizeof(struct internal_event)); 6487 if(!c->ev) { 6488 free(c); 6489 return NULL; 6490 } 6491 c->ev->base = base; 6492 c->fd = fd; 6493 c->buffer = NULL; 6494 c->timeout = NULL; 6495 c->tcp_is_reading = 0; 6496 c->tcp_byte_count = 0; 6497 c->tcp_parent = NULL; 6498 c->max_tcp_count = 0; 6499 c->cur_tcp_count = 0; 6500 c->tcp_handlers = NULL; 6501 c->tcp_free = NULL; 6502 c->type = comm_raw; 6503 c->tcp_do_close = 0; 6504 c->do_not_close = 1; 6505 c->tcp_do_toggle_rw = 0; 6506 c->tcp_check_nb_connect = 0; 6507 #ifdef USE_MSG_FASTOPEN 6508 c->tcp_do_fastopen = 0; 6509 #endif 6510 #ifdef USE_DNSCRYPT 6511 c->dnscrypt = 0; 6512 c->dnscrypt_buffer = c->buffer; 6513 #endif 6514 c->callback = callback; 6515 c->cb_arg = callback_arg; 6516 c->pp2_enabled = 0; 6517 c->pp2_header_state = pp2_header_none; 6518 /* ub_event stuff */ 6519 if(writing) 6520 evbits = UB_EV_PERSIST | UB_EV_WRITE; 6521 else evbits = UB_EV_PERSIST | UB_EV_READ; 6522 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 6523 comm_point_raw_handle_callback, c); 6524 if(c->ev->ev == NULL) { 6525 log_err("could not baseset rawhdl event"); 6526 free(c->ev); 6527 free(c); 6528 return NULL; 6529 } 6530 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 6531 log_err("could not add rawhdl event"); 6532 ub_event_free(c->ev->ev); 6533 free(c->ev); 6534 free(c); 6535 return NULL; 6536 } 6537 c->event_added = 1; 6538 return c; 6539 } 6540 6541 void 6542 comm_point_close(struct comm_point* c) 6543 { 6544 if(!c) 6545 return; 6546 if(c->fd != -1) { 6547 verbose(5, "comm_point_close of %d: event_del", c->fd); 6548 if(c->event_added) { 6549 if(ub_event_del(c->ev->ev) != 0) { 6550 log_err("could not event_del on close"); 6551 } 6552 c->event_added = 0; 6553 } 6554 } 6555 tcl_close_connection(c->tcl_addr); 6556 if(c->tcp_req_info) 6557 tcp_req_info_clear(c->tcp_req_info); 6558 if(c->h2_session) 6559 http2_session_server_delete(c->h2_session); 6560 /* stop the comm point from reading or writing after it is closed. */ 6561 if(c->tcp_more_read_again && *c->tcp_more_read_again) 6562 *c->tcp_more_read_again = 0; 6563 if(c->tcp_more_write_again && *c->tcp_more_write_again) 6564 *c->tcp_more_write_again = 0; 6565 6566 /* close fd after removing from event lists, or epoll.. is messed up */ 6567 if(c->fd != -1 && !c->do_not_close) { 6568 #ifdef USE_WINSOCK 6569 if(c->type == comm_tcp || c->type == comm_http) { 6570 /* delete sticky events for the fd, it gets closed */ 6571 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 6572 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 6573 } 6574 #endif 6575 verbose(VERB_ALGO, "close fd %d", c->fd); 6576 sock_close(c->fd); 6577 } 6578 c->fd = -1; 6579 } 6580 6581 void 6582 comm_point_delete(struct comm_point* c) 6583 { 6584 if(!c) 6585 return; 6586 if((c->type == comm_tcp || c->type == comm_http) && c->ssl) { 6587 #ifdef HAVE_SSL 6588 SSL_shutdown(c->ssl); 6589 SSL_free(c->ssl); 6590 #endif 6591 } 6592 if(c->type == comm_http && c->http_endpoint) { 6593 free(c->http_endpoint); 6594 c->http_endpoint = NULL; 6595 } 6596 comm_point_close(c); 6597 if(c->tcp_handlers) { 6598 int i; 6599 for(i=0; i<c->max_tcp_count; i++) 6600 comm_point_delete(c->tcp_handlers[i]); 6601 free(c->tcp_handlers); 6602 } 6603 free(c->timeout); 6604 if(c->type == comm_tcp || c->type == comm_local || c->type == comm_http) { 6605 sldns_buffer_free(c->buffer); 6606 #ifdef USE_DNSCRYPT 6607 if(c->dnscrypt && c->dnscrypt_buffer != c->buffer) { 6608 sldns_buffer_free(c->dnscrypt_buffer); 6609 } 6610 #endif 6611 if(c->tcp_req_info) { 6612 tcp_req_info_delete(c->tcp_req_info); 6613 } 6614 if(c->h2_session) { 6615 http2_session_delete(c->h2_session); 6616 } 6617 } 6618 #ifdef HAVE_NGTCP2 6619 if(c->doq_socket) 6620 doq_server_socket_delete(c->doq_socket); 6621 #endif 6622 ub_event_free(c->ev->ev); 6623 free(c->ev); 6624 free(c); 6625 } 6626 6627 #ifdef USE_DNSTAP 6628 static void 6629 send_reply_dnstap(struct dt_env* dtenv, 6630 struct sockaddr* addr, socklen_t addrlen, 6631 struct sockaddr_storage* client_addr, socklen_t client_addrlen, 6632 enum comm_point_type type, void* ssl, sldns_buffer* buffer) 6633 { 6634 log_addr(VERB_ALGO, "from local addr", (void*)addr, addrlen); 6635 log_addr(VERB_ALGO, "response to client", client_addr, client_addrlen); 6636 dt_msg_send_client_response(dtenv, client_addr, 6637 (struct sockaddr_storage*)addr, type, ssl, buffer); 6638 } 6639 #endif 6640 6641 void 6642 comm_point_send_reply(struct comm_reply *repinfo) 6643 { 6644 struct sldns_buffer* buffer; 6645 log_assert(repinfo && repinfo->c); 6646 #ifdef USE_DNSCRYPT 6647 buffer = repinfo->c->dnscrypt_buffer; 6648 if(!dnsc_handle_uncurved_request(repinfo)) { 6649 return; 6650 } 6651 #else 6652 buffer = repinfo->c->buffer; 6653 #endif 6654 if(repinfo->c->type == comm_udp) { 6655 if(repinfo->srctype) 6656 comm_point_send_udp_msg_if(repinfo->c, buffer, 6657 (struct sockaddr*)&repinfo->remote_addr, 6658 repinfo->remote_addrlen, repinfo); 6659 else 6660 comm_point_send_udp_msg(repinfo->c, buffer, 6661 (struct sockaddr*)&repinfo->remote_addr, 6662 repinfo->remote_addrlen, 0); 6663 #ifdef USE_DNSTAP 6664 /* 6665 * sending src (client)/dst (local service) addresses over 6666 * DNSTAP from udp callback 6667 */ 6668 if(repinfo->c->dtenv != NULL && repinfo->c->dtenv->log_client_response_messages) { 6669 send_reply_dnstap(repinfo->c->dtenv, 6670 repinfo->c->socket->addr, 6671 repinfo->c->socket->addrlen, 6672 &repinfo->client_addr, repinfo->client_addrlen, 6673 repinfo->c->type, repinfo->c->ssl, 6674 repinfo->c->buffer); 6675 } 6676 #endif 6677 } else { 6678 #ifdef USE_DNSTAP 6679 struct dt_env* dtenv = 6680 #ifdef HAVE_NGTCP2 6681 repinfo->c->doq_socket 6682 ?repinfo->c->dtenv: 6683 #endif 6684 repinfo->c->tcp_parent->dtenv; 6685 struct sldns_buffer* dtbuffer = repinfo->c->tcp_req_info 6686 ?repinfo->c->tcp_req_info->spool_buffer 6687 :repinfo->c->buffer; 6688 #ifdef USE_DNSCRYPT 6689 if(repinfo->c->dnscrypt && repinfo->is_dnscrypted) 6690 dtbuffer = repinfo->c->buffer; 6691 #endif 6692 /* 6693 * sending src (client)/dst (local service) addresses over 6694 * DNSTAP from other callbacks 6695 */ 6696 if(dtenv != NULL && dtenv->log_client_response_messages) { 6697 send_reply_dnstap(dtenv, 6698 repinfo->c->socket->addr, 6699 repinfo->c->socket->addrlen, 6700 &repinfo->client_addr, repinfo->client_addrlen, 6701 repinfo->c->type, repinfo->c->ssl, 6702 dtbuffer); 6703 } 6704 #endif 6705 if(repinfo->c->tcp_req_info) { 6706 tcp_req_info_send_reply(repinfo->c->tcp_req_info); 6707 } else if(repinfo->c->use_h2) { 6708 if(!http2_submit_dns_response(repinfo->c->h2_session)) { 6709 comm_point_drop_reply(repinfo); 6710 return; 6711 } 6712 repinfo->c->h2_stream = NULL; 6713 repinfo->c->tcp_is_reading = 0; 6714 comm_point_stop_listening(repinfo->c); 6715 comm_point_start_listening(repinfo->c, -1, 6716 adjusted_tcp_timeout(repinfo->c)); 6717 return; 6718 #ifdef HAVE_NGTCP2 6719 } else if(repinfo->c->doq_socket) { 6720 doq_socket_send_reply(repinfo); 6721 #endif 6722 } else { 6723 comm_point_start_listening(repinfo->c, -1, 6724 adjusted_tcp_timeout(repinfo->c)); 6725 } 6726 } 6727 } 6728 6729 void 6730 comm_point_drop_reply(struct comm_reply* repinfo) 6731 { 6732 if(!repinfo) 6733 return; 6734 log_assert(repinfo->c); 6735 log_assert(repinfo->c->type != comm_tcp_accept); 6736 if(repinfo->c->type == comm_udp) 6737 return; 6738 if(repinfo->c->tcp_req_info) 6739 repinfo->c->tcp_req_info->is_drop = 1; 6740 if(repinfo->c->type == comm_http) { 6741 if(repinfo->c->h2_session) { 6742 repinfo->c->h2_session->is_drop = 1; 6743 if(!repinfo->c->h2_session->postpone_drop) 6744 reclaim_http_handler(repinfo->c); 6745 return; 6746 } 6747 reclaim_http_handler(repinfo->c); 6748 return; 6749 #ifdef HAVE_NGTCP2 6750 } else if(repinfo->c->type == comm_doq) { 6751 doq_socket_drop_reply(repinfo); 6752 return; 6753 #endif 6754 } 6755 reclaim_tcp_handler(repinfo->c); 6756 } 6757 6758 void 6759 comm_point_stop_listening(struct comm_point* c) 6760 { 6761 verbose(VERB_ALGO, "comm point stop listening %d", c->fd); 6762 if(c->event_added) { 6763 if(ub_event_del(c->ev->ev) != 0) { 6764 log_err("event_del error to stoplisten"); 6765 } 6766 c->event_added = 0; 6767 } 6768 } 6769 6770 void 6771 comm_point_start_listening(struct comm_point* c, int newfd, int msec) 6772 { 6773 verbose(VERB_ALGO, "comm point start listening %d (%d msec)", 6774 c->fd==-1?newfd:c->fd, msec); 6775 if(c->type == comm_tcp_accept && !c->tcp_free) { 6776 /* no use to start listening no free slots. */ 6777 return; 6778 } 6779 if(c->event_added) { 6780 if(ub_event_del(c->ev->ev) != 0) { 6781 log_err("event_del error to startlisten"); 6782 } 6783 c->event_added = 0; 6784 } 6785 if(msec != -1 && msec != 0) { 6786 if(!c->timeout) { 6787 c->timeout = (struct timeval*)malloc(sizeof( 6788 struct timeval)); 6789 if(!c->timeout) { 6790 log_err("cpsl: malloc failed. No net read."); 6791 return; 6792 } 6793 } 6794 ub_event_add_bits(c->ev->ev, UB_EV_TIMEOUT); 6795 #ifndef S_SPLINT_S /* splint fails on struct timeval. */ 6796 c->timeout->tv_sec = msec/1000; 6797 c->timeout->tv_usec = (msec%1000)*1000; 6798 #endif /* S_SPLINT_S */ 6799 } else { 6800 if(msec == 0 || !c->timeout) { 6801 ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT); 6802 } 6803 } 6804 if(c->type == comm_tcp || c->type == comm_http) { 6805 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 6806 if(c->tcp_write_and_read) { 6807 verbose(5, "startlistening %d mode rw", (newfd==-1?c->fd:newfd)); 6808 ub_event_add_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 6809 } else if(c->tcp_is_reading) { 6810 verbose(5, "startlistening %d mode r", (newfd==-1?c->fd:newfd)); 6811 ub_event_add_bits(c->ev->ev, UB_EV_READ); 6812 } else { 6813 verbose(5, "startlistening %d mode w", (newfd==-1?c->fd:newfd)); 6814 ub_event_add_bits(c->ev->ev, UB_EV_WRITE); 6815 } 6816 } 6817 if(newfd != -1) { 6818 if(c->fd != -1 && c->fd != newfd) { 6819 verbose(5, "cpsl close of fd %d for %d", c->fd, newfd); 6820 sock_close(c->fd); 6821 } 6822 c->fd = newfd; 6823 ub_event_set_fd(c->ev->ev, c->fd); 6824 } 6825 if(ub_event_add(c->ev->ev, msec==0?NULL:c->timeout) != 0) { 6826 log_err("event_add failed. in cpsl."); 6827 return; 6828 } 6829 c->event_added = 1; 6830 } 6831 6832 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr) 6833 { 6834 verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr); 6835 if(c->event_added) { 6836 if(ub_event_del(c->ev->ev) != 0) { 6837 log_err("event_del error to cplf"); 6838 } 6839 c->event_added = 0; 6840 } 6841 if(!c->timeout) { 6842 ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT); 6843 } 6844 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 6845 if(rd) ub_event_add_bits(c->ev->ev, UB_EV_READ); 6846 if(wr) ub_event_add_bits(c->ev->ev, UB_EV_WRITE); 6847 if(ub_event_add(c->ev->ev, c->timeout) != 0) { 6848 log_err("event_add failed. in cplf."); 6849 return; 6850 } 6851 c->event_added = 1; 6852 } 6853 6854 size_t comm_point_get_mem(struct comm_point* c) 6855 { 6856 size_t s; 6857 if(!c) 6858 return 0; 6859 s = sizeof(*c) + sizeof(*c->ev); 6860 if(c->timeout) 6861 s += sizeof(*c->timeout); 6862 if(c->type == comm_tcp || c->type == comm_local) { 6863 s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer); 6864 #ifdef USE_DNSCRYPT 6865 s += sizeof(*c->dnscrypt_buffer); 6866 if(c->buffer != c->dnscrypt_buffer) { 6867 s += sldns_buffer_capacity(c->dnscrypt_buffer); 6868 } 6869 #endif 6870 } 6871 if(c->type == comm_tcp_accept) { 6872 int i; 6873 for(i=0; i<c->max_tcp_count; i++) 6874 s += comm_point_get_mem(c->tcp_handlers[i]); 6875 } 6876 return s; 6877 } 6878 6879 struct comm_timer* 6880 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg) 6881 { 6882 struct internal_timer *tm = (struct internal_timer*)calloc(1, 6883 sizeof(struct internal_timer)); 6884 if(!tm) { 6885 log_err("malloc failed"); 6886 return NULL; 6887 } 6888 tm->super.ev_timer = tm; 6889 tm->base = base; 6890 tm->super.callback = cb; 6891 tm->super.cb_arg = cb_arg; 6892 tm->ev = ub_event_new(base->eb->base, -1, UB_EV_TIMEOUT, 6893 comm_timer_callback, &tm->super); 6894 if(tm->ev == NULL) { 6895 log_err("timer_create: event_base_set failed."); 6896 free(tm); 6897 return NULL; 6898 } 6899 return &tm->super; 6900 } 6901 6902 void 6903 comm_timer_disable(struct comm_timer* timer) 6904 { 6905 if(!timer) 6906 return; 6907 ub_timer_del(timer->ev_timer->ev); 6908 timer->ev_timer->enabled = 0; 6909 } 6910 6911 void 6912 comm_timer_set(struct comm_timer* timer, struct timeval* tv) 6913 { 6914 log_assert(tv); 6915 if(timer->ev_timer->enabled) 6916 comm_timer_disable(timer); 6917 if(ub_timer_add(timer->ev_timer->ev, timer->ev_timer->base->eb->base, 6918 comm_timer_callback, timer, tv) != 0) 6919 log_err("comm_timer_set: evtimer_add failed."); 6920 timer->ev_timer->enabled = 1; 6921 } 6922 6923 void 6924 comm_timer_delete(struct comm_timer* timer) 6925 { 6926 if(!timer) 6927 return; 6928 comm_timer_disable(timer); 6929 /* Free the sub struct timer->ev_timer derived from the super struct timer. 6930 * i.e. assert(timer == timer->ev_timer) 6931 */ 6932 ub_event_free(timer->ev_timer->ev); 6933 free(timer->ev_timer); 6934 } 6935 6936 void 6937 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg) 6938 { 6939 struct comm_timer* tm = (struct comm_timer*)arg; 6940 if(!(event&UB_EV_TIMEOUT)) 6941 return; 6942 ub_comm_base_now(tm->ev_timer->base); 6943 tm->ev_timer->enabled = 0; 6944 fptr_ok(fptr_whitelist_comm_timer(tm->callback)); 6945 (*tm->callback)(tm->cb_arg); 6946 } 6947 6948 int 6949 comm_timer_is_set(struct comm_timer* timer) 6950 { 6951 return (int)timer->ev_timer->enabled; 6952 } 6953 6954 size_t 6955 comm_timer_get_mem(struct comm_timer* ATTR_UNUSED(timer)) 6956 { 6957 return sizeof(struct internal_timer); 6958 } 6959 6960 struct comm_signal* 6961 comm_signal_create(struct comm_base* base, 6962 void (*callback)(int, void*), void* cb_arg) 6963 { 6964 struct comm_signal* com = (struct comm_signal*)malloc( 6965 sizeof(struct comm_signal)); 6966 if(!com) { 6967 log_err("malloc failed"); 6968 return NULL; 6969 } 6970 com->base = base; 6971 com->callback = callback; 6972 com->cb_arg = cb_arg; 6973 com->ev_signal = NULL; 6974 return com; 6975 } 6976 6977 void 6978 comm_signal_callback(int sig, short event, void* arg) 6979 { 6980 struct comm_signal* comsig = (struct comm_signal*)arg; 6981 if(!(event & UB_EV_SIGNAL)) 6982 return; 6983 ub_comm_base_now(comsig->base); 6984 fptr_ok(fptr_whitelist_comm_signal(comsig->callback)); 6985 (*comsig->callback)(sig, comsig->cb_arg); 6986 } 6987 6988 int 6989 comm_signal_bind(struct comm_signal* comsig, int sig) 6990 { 6991 struct internal_signal* entry = (struct internal_signal*)calloc(1, 6992 sizeof(struct internal_signal)); 6993 if(!entry) { 6994 log_err("malloc failed"); 6995 return 0; 6996 } 6997 log_assert(comsig); 6998 /* add signal event */ 6999 entry->ev = ub_signal_new(comsig->base->eb->base, sig, 7000 comm_signal_callback, comsig); 7001 if(entry->ev == NULL) { 7002 log_err("Could not create signal event"); 7003 free(entry); 7004 return 0; 7005 } 7006 if(ub_signal_add(entry->ev, NULL) != 0) { 7007 log_err("Could not add signal handler"); 7008 ub_event_free(entry->ev); 7009 free(entry); 7010 return 0; 7011 } 7012 /* link into list */ 7013 entry->next = comsig->ev_signal; 7014 comsig->ev_signal = entry; 7015 return 1; 7016 } 7017 7018 void 7019 comm_signal_delete(struct comm_signal* comsig) 7020 { 7021 struct internal_signal* p, *np; 7022 if(!comsig) 7023 return; 7024 p=comsig->ev_signal; 7025 while(p) { 7026 np = p->next; 7027 ub_signal_del(p->ev); 7028 ub_event_free(p->ev); 7029 free(p); 7030 p = np; 7031 } 7032 free(comsig); 7033 } 7034