1 /* 2 * util/netevent.c - event notification 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file contains event notification functions. 40 */ 41 #include "config.h" 42 #include "util/netevent.h" 43 #include "util/ub_event.h" 44 #include "util/log.h" 45 #include "util/net_help.h" 46 #include "util/tcp_conn_limit.h" 47 #include "util/fptr_wlist.h" 48 #include "util/proxy_protocol.h" 49 #include "util/timeval_func.h" 50 #include "sldns/pkthdr.h" 51 #include "sldns/sbuffer.h" 52 #include "sldns/str2wire.h" 53 #include "dnstap/dnstap.h" 54 #include "dnscrypt/dnscrypt.h" 55 #include "services/listen_dnsport.h" 56 #ifdef HAVE_SYS_TYPES_H 57 #include <sys/types.h> 58 #endif 59 #ifdef HAVE_SYS_SOCKET_H 60 #include <sys/socket.h> 61 #endif 62 #ifdef HAVE_NETDB_H 63 #include <netdb.h> 64 #endif 65 #ifdef HAVE_POLL_H 66 #include <poll.h> 67 #endif 68 69 #ifdef HAVE_OPENSSL_SSL_H 70 #include <openssl/ssl.h> 71 #endif 72 #ifdef HAVE_OPENSSL_ERR_H 73 #include <openssl/err.h> 74 #endif 75 #ifdef HAVE_LINUX_NET_TSTAMP_H 76 #include <linux/net_tstamp.h> 77 #endif 78 /* -------- Start of local definitions -------- */ 79 /** if CMSG_ALIGN is not defined on this platform, a workaround */ 80 #ifndef CMSG_ALIGN 81 # ifdef __CMSG_ALIGN 82 # define CMSG_ALIGN(n) __CMSG_ALIGN(n) 83 # elif defined(CMSG_DATA_ALIGN) 84 # define CMSG_ALIGN _CMSG_DATA_ALIGN 85 # else 86 # define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1)) 87 # endif 88 #endif 89 90 /** if CMSG_LEN is not defined on this platform, a workaround */ 91 #ifndef CMSG_LEN 92 # define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len)) 93 #endif 94 95 /** if CMSG_SPACE is not defined on this platform, a workaround */ 96 #ifndef CMSG_SPACE 97 # ifdef _CMSG_HDR_ALIGN 98 # define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr))) 99 # else 100 # define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr))) 101 # endif 102 #endif 103 104 /** The TCP writing query timeout in milliseconds */ 105 #define TCP_QUERY_TIMEOUT 120000 106 /** The minimum actual TCP timeout to use, regardless of what we advertise, 107 * in msec */ 108 #define TCP_QUERY_TIMEOUT_MINIMUM 200 109 110 #ifndef NONBLOCKING_IS_BROKEN 111 /** number of UDP reads to perform per read indication from select */ 112 #define NUM_UDP_PER_SELECT 100 113 #else 114 #define NUM_UDP_PER_SELECT 1 115 #endif 116 117 /** timeout in millisec to wait for write to unblock, packets dropped after.*/ 118 #define SEND_BLOCKED_WAIT_TIMEOUT 200 119 /** max number of times to wait for write to unblock, packets dropped after.*/ 120 #define SEND_BLOCKED_MAX_RETRY 5 121 122 /** Let's make timestamping code cleaner and redefine SO_TIMESTAMP* */ 123 #ifndef SO_TIMESTAMP 124 #define SO_TIMESTAMP 29 125 #endif 126 #ifndef SO_TIMESTAMPNS 127 #define SO_TIMESTAMPNS 35 128 #endif 129 #ifndef SO_TIMESTAMPING 130 #define SO_TIMESTAMPING 37 131 #endif 132 /** 133 * The internal event structure for keeping ub_event info for the event. 134 * Possibly other structures (list, tree) this is part of. 135 */ 136 struct internal_event { 137 /** the comm base */ 138 struct comm_base* base; 139 /** ub_event event type */ 140 struct ub_event* ev; 141 }; 142 143 /** 144 * Internal base structure, so that every thread has its own events. 145 */ 146 struct internal_base { 147 /** ub_event event_base type. */ 148 struct ub_event_base* base; 149 /** seconds time pointer points here */ 150 time_t secs; 151 /** timeval with current time */ 152 struct timeval now; 153 /** the event used for slow_accept timeouts */ 154 struct ub_event* slow_accept; 155 /** true if slow_accept is enabled */ 156 int slow_accept_enabled; 157 /** last log time for slow logging of file descriptor errors */ 158 time_t last_slow_log; 159 /** last log time for slow logging of write wait failures */ 160 time_t last_writewait_log; 161 }; 162 163 /** 164 * Internal timer structure, to store timer event in. 165 */ 166 struct internal_timer { 167 /** the super struct from which derived */ 168 struct comm_timer super; 169 /** the comm base */ 170 struct comm_base* base; 171 /** ub_event event type */ 172 struct ub_event* ev; 173 /** is timer enabled */ 174 uint8_t enabled; 175 }; 176 177 /** 178 * Internal signal structure, to store signal event in. 179 */ 180 struct internal_signal { 181 /** ub_event event type */ 182 struct ub_event* ev; 183 /** next in signal list */ 184 struct internal_signal* next; 185 }; 186 187 /** create a tcp handler with a parent */ 188 static struct comm_point* comm_point_create_tcp_handler( 189 struct comm_base *base, struct comm_point* parent, size_t bufsize, 190 struct sldns_buffer* spoolbuf, comm_point_callback_type* callback, 191 void* callback_arg, struct unbound_socket* socket); 192 193 /* -------- End of local definitions -------- */ 194 195 struct comm_base* 196 comm_base_create(int sigs) 197 { 198 struct comm_base* b = (struct comm_base*)calloc(1, 199 sizeof(struct comm_base)); 200 const char *evnm="event", *evsys="", *evmethod=""; 201 202 if(!b) 203 return NULL; 204 b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base)); 205 if(!b->eb) { 206 free(b); 207 return NULL; 208 } 209 b->eb->base = ub_default_event_base(sigs, &b->eb->secs, &b->eb->now); 210 if(!b->eb->base) { 211 free(b->eb); 212 free(b); 213 return NULL; 214 } 215 ub_comm_base_now(b); 216 ub_get_event_sys(b->eb->base, &evnm, &evsys, &evmethod); 217 verbose(VERB_ALGO, "%s %s uses %s method.", evnm, evsys, evmethod); 218 return b; 219 } 220 221 struct comm_base* 222 comm_base_create_event(struct ub_event_base* base) 223 { 224 struct comm_base* b = (struct comm_base*)calloc(1, 225 sizeof(struct comm_base)); 226 if(!b) 227 return NULL; 228 b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base)); 229 if(!b->eb) { 230 free(b); 231 return NULL; 232 } 233 b->eb->base = base; 234 ub_comm_base_now(b); 235 return b; 236 } 237 238 void 239 comm_base_delete(struct comm_base* b) 240 { 241 if(!b) 242 return; 243 if(b->eb->slow_accept_enabled) { 244 if(ub_event_del(b->eb->slow_accept) != 0) { 245 log_err("could not event_del slow_accept"); 246 } 247 ub_event_free(b->eb->slow_accept); 248 } 249 ub_event_base_free(b->eb->base); 250 b->eb->base = NULL; 251 free(b->eb); 252 free(b); 253 } 254 255 void 256 comm_base_delete_no_base(struct comm_base* b) 257 { 258 if(!b) 259 return; 260 if(b->eb->slow_accept_enabled) { 261 if(ub_event_del(b->eb->slow_accept) != 0) { 262 log_err("could not event_del slow_accept"); 263 } 264 ub_event_free(b->eb->slow_accept); 265 } 266 b->eb->base = NULL; 267 free(b->eb); 268 free(b); 269 } 270 271 void 272 comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv) 273 { 274 *tt = &b->eb->secs; 275 *tv = &b->eb->now; 276 } 277 278 void 279 comm_base_dispatch(struct comm_base* b) 280 { 281 int retval; 282 retval = ub_event_base_dispatch(b->eb->base); 283 if(retval < 0) { 284 fatal_exit("event_dispatch returned error %d, " 285 "errno is %s", retval, strerror(errno)); 286 } 287 } 288 289 void comm_base_exit(struct comm_base* b) 290 { 291 if(ub_event_base_loopexit(b->eb->base) != 0) { 292 log_err("Could not loopexit"); 293 } 294 } 295 296 void comm_base_set_slow_accept_handlers(struct comm_base* b, 297 void (*stop_acc)(void*), void (*start_acc)(void*), void* arg) 298 { 299 b->stop_accept = stop_acc; 300 b->start_accept = start_acc; 301 b->cb_arg = arg; 302 } 303 304 struct ub_event_base* comm_base_internal(struct comm_base* b) 305 { 306 return b->eb->base; 307 } 308 309 /** see if errno for udp has to be logged or not uses globals */ 310 static int 311 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen) 312 { 313 /* do not log transient errors (unless high verbosity) */ 314 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN) 315 switch(errno) { 316 # ifdef ENETUNREACH 317 case ENETUNREACH: 318 # endif 319 # ifdef EHOSTDOWN 320 case EHOSTDOWN: 321 # endif 322 # ifdef EHOSTUNREACH 323 case EHOSTUNREACH: 324 # endif 325 # ifdef ENETDOWN 326 case ENETDOWN: 327 # endif 328 case EPERM: 329 case EACCES: 330 if(verbosity < VERB_ALGO) 331 return 0; 332 break; 333 default: 334 break; 335 } 336 #endif 337 /* permission denied is gotten for every send if the 338 * network is disconnected (on some OS), squelch it */ 339 if( ((errno == EPERM) 340 # ifdef EADDRNOTAVAIL 341 /* 'Cannot assign requested address' also when disconnected */ 342 || (errno == EADDRNOTAVAIL) 343 # endif 344 ) && verbosity < VERB_ALGO) 345 return 0; 346 # ifdef EADDRINUSE 347 /* If SO_REUSEADDR is set, we could try to connect to the same server 348 * from the same source port twice. */ 349 if(errno == EADDRINUSE && verbosity < VERB_DETAIL) 350 return 0; 351 # endif 352 /* squelch errors where people deploy AAAA ::ffff:bla for 353 * authority servers, which we try for intranets. */ 354 if(errno == EINVAL && addr_is_ip4mapped( 355 (struct sockaddr_storage*)addr, addrlen) && 356 verbosity < VERB_DETAIL) 357 return 0; 358 /* SO_BROADCAST sockopt can give access to 255.255.255.255, 359 * but a dns cache does not need it. */ 360 if(errno == EACCES && addr_is_broadcast( 361 (struct sockaddr_storage*)addr, addrlen) && 362 verbosity < VERB_DETAIL) 363 return 0; 364 return 1; 365 } 366 367 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen) 368 { 369 return udp_send_errno_needs_log(addr, addrlen); 370 } 371 372 /* send a UDP reply */ 373 int 374 comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet, 375 struct sockaddr* addr, socklen_t addrlen, int is_connected) 376 { 377 ssize_t sent; 378 log_assert(c->fd != -1); 379 #ifdef UNBOUND_DEBUG 380 if(sldns_buffer_remaining(packet) == 0) 381 log_err("error: send empty UDP packet"); 382 #endif 383 log_assert(addr && addrlen > 0); 384 if(!is_connected) { 385 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 386 sldns_buffer_remaining(packet), 0, 387 addr, addrlen); 388 } else { 389 sent = send(c->fd, (void*)sldns_buffer_begin(packet), 390 sldns_buffer_remaining(packet), 0); 391 } 392 if(sent == -1) { 393 /* try again and block, waiting for IO to complete, 394 * we want to send the answer, and we will wait for 395 * the ethernet interface buffer to have space. */ 396 #ifndef USE_WINSOCK 397 if(errno == EAGAIN || errno == EINTR || 398 # ifdef EWOULDBLOCK 399 errno == EWOULDBLOCK || 400 # endif 401 errno == ENOBUFS) { 402 #else 403 if(WSAGetLastError() == WSAEINPROGRESS || 404 WSAGetLastError() == WSAEINTR || 405 WSAGetLastError() == WSAENOBUFS || 406 WSAGetLastError() == WSAEWOULDBLOCK) { 407 #endif 408 int retries = 0; 409 /* if we set the fd blocking, other threads suddenly 410 * have a blocking fd that they operate on */ 411 while(sent == -1 && retries < SEND_BLOCKED_MAX_RETRY && ( 412 #ifndef USE_WINSOCK 413 errno == EAGAIN || errno == EINTR || 414 # ifdef EWOULDBLOCK 415 errno == EWOULDBLOCK || 416 # endif 417 errno == ENOBUFS 418 #else 419 WSAGetLastError() == WSAEINPROGRESS || 420 WSAGetLastError() == WSAEINTR || 421 WSAGetLastError() == WSAENOBUFS || 422 WSAGetLastError() == WSAEWOULDBLOCK 423 #endif 424 )) { 425 #if defined(HAVE_POLL) || defined(USE_WINSOCK) 426 int send_nobufs = ( 427 #ifndef USE_WINSOCK 428 errno == ENOBUFS 429 #else 430 WSAGetLastError() == WSAENOBUFS 431 #endif 432 ); 433 struct pollfd p; 434 int pret; 435 memset(&p, 0, sizeof(p)); 436 p.fd = c->fd; 437 p.events = POLLOUT | POLLERR | POLLHUP; 438 # ifndef USE_WINSOCK 439 pret = poll(&p, 1, SEND_BLOCKED_WAIT_TIMEOUT); 440 # else 441 pret = WSAPoll(&p, 1, 442 SEND_BLOCKED_WAIT_TIMEOUT); 443 # endif 444 if(pret == 0) { 445 /* timer expired */ 446 struct comm_base* b = c->ev->base; 447 if(b->eb->last_writewait_log+SLOW_LOG_TIME <= 448 b->eb->secs) { 449 b->eb->last_writewait_log = b->eb->secs; 450 verbose(VERB_OPS, "send udp blocked " 451 "for long, dropping packet."); 452 } 453 return 0; 454 } else if(pret < 0 && 455 #ifndef USE_WINSOCK 456 errno != EAGAIN && errno != EINTR && 457 # ifdef EWOULDBLOCK 458 errno != EWOULDBLOCK && 459 # endif 460 errno != ENOBUFS 461 #else 462 WSAGetLastError() != WSAEINPROGRESS && 463 WSAGetLastError() != WSAEINTR && 464 WSAGetLastError() != WSAENOBUFS && 465 WSAGetLastError() != WSAEWOULDBLOCK 466 #endif 467 ) { 468 log_err("poll udp out failed: %s", 469 sock_strerror(errno)); 470 return 0; 471 } else if((pret < 0 && 472 #ifndef USE_WINSOCK 473 errno == ENOBUFS 474 #else 475 WSAGetLastError() == WSAENOBUFS 476 #endif 477 ) || (send_nobufs && retries > 0)) { 478 /* ENOBUFS, and poll returned without 479 * a timeout. Or the retried send call 480 * returned ENOBUFS. It is good to 481 * wait a bit for the error to clear. */ 482 /* The timeout is 20*(2^(retries+1)), 483 * it increases exponentially, starting 484 * at 40 msec. After 5 tries, 1240 msec 485 * have passed in total, when poll 486 * returned the error, and 1200 msec 487 * when send returned the errors. */ 488 #ifndef USE_WINSOCK 489 pret = poll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); 490 #else 491 pret = WSAPoll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); 492 #endif 493 if(pret < 0 && 494 #ifndef USE_WINSOCK 495 errno != EAGAIN && errno != EINTR && 496 # ifdef EWOULDBLOCK 497 errno != EWOULDBLOCK && 498 # endif 499 errno != ENOBUFS 500 #else 501 WSAGetLastError() != WSAEINPROGRESS && 502 WSAGetLastError() != WSAEINTR && 503 WSAGetLastError() != WSAENOBUFS && 504 WSAGetLastError() != WSAEWOULDBLOCK 505 #endif 506 ) { 507 log_err("poll udp out timer failed: %s", 508 sock_strerror(errno)); 509 } 510 } 511 #endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */ 512 retries++; 513 if (!is_connected) { 514 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 515 sldns_buffer_remaining(packet), 0, 516 addr, addrlen); 517 } else { 518 sent = send(c->fd, (void*)sldns_buffer_begin(packet), 519 sldns_buffer_remaining(packet), 0); 520 } 521 } 522 } 523 } 524 if(sent == -1) { 525 if(!udp_send_errno_needs_log(addr, addrlen)) 526 return 0; 527 if (!is_connected) { 528 verbose(VERB_OPS, "sendto failed: %s", sock_strerror(errno)); 529 } else { 530 verbose(VERB_OPS, "send failed: %s", sock_strerror(errno)); 531 } 532 if(addr) 533 log_addr(VERB_OPS, "remote address is", 534 (struct sockaddr_storage*)addr, addrlen); 535 return 0; 536 } else if((size_t)sent != sldns_buffer_remaining(packet)) { 537 log_err("sent %d in place of %d bytes", 538 (int)sent, (int)sldns_buffer_remaining(packet)); 539 return 0; 540 } 541 return 1; 542 } 543 544 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG)) 545 /** print debug ancillary info */ 546 static void p_ancil(const char* str, struct comm_reply* r) 547 { 548 if(r->srctype != 4 && r->srctype != 6) { 549 log_info("%s: unknown srctype %d", str, r->srctype); 550 return; 551 } 552 553 if(r->srctype == 6) { 554 #ifdef IPV6_PKTINFO 555 char buf[1024]; 556 if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr, 557 buf, (socklen_t)sizeof(buf)) == 0) { 558 (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf)); 559 } 560 buf[sizeof(buf)-1]=0; 561 log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex); 562 #endif 563 } else if(r->srctype == 4) { 564 #ifdef IP_PKTINFO 565 char buf1[1024], buf2[1024]; 566 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr, 567 buf1, (socklen_t)sizeof(buf1)) == 0) { 568 (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1)); 569 } 570 buf1[sizeof(buf1)-1]=0; 571 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST 572 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst, 573 buf2, (socklen_t)sizeof(buf2)) == 0) { 574 (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2)); 575 } 576 buf2[sizeof(buf2)-1]=0; 577 #else 578 buf2[0]=0; 579 #endif 580 log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex, 581 buf1, buf2); 582 #elif defined(IP_RECVDSTADDR) 583 char buf1[1024]; 584 if(inet_ntop(AF_INET, &r->pktinfo.v4addr, 585 buf1, (socklen_t)sizeof(buf1)) == 0) { 586 (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1)); 587 } 588 buf1[sizeof(buf1)-1]=0; 589 log_info("%s: %s", str, buf1); 590 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */ 591 } 592 } 593 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */ 594 595 /** send a UDP reply over specified interface*/ 596 static int 597 comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet, 598 struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r) 599 { 600 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG) 601 ssize_t sent; 602 struct msghdr msg; 603 struct iovec iov[1]; 604 union { 605 struct cmsghdr hdr; 606 char buf[256]; 607 } control; 608 #ifndef S_SPLINT_S 609 struct cmsghdr *cmsg; 610 #endif /* S_SPLINT_S */ 611 612 log_assert(c->fd != -1); 613 #ifdef UNBOUND_DEBUG 614 if(sldns_buffer_remaining(packet) == 0) 615 log_err("error: send empty UDP packet"); 616 #endif 617 log_assert(addr && addrlen > 0); 618 619 msg.msg_name = addr; 620 msg.msg_namelen = addrlen; 621 iov[0].iov_base = sldns_buffer_begin(packet); 622 iov[0].iov_len = sldns_buffer_remaining(packet); 623 msg.msg_iov = iov; 624 msg.msg_iovlen = 1; 625 msg.msg_control = control.buf; 626 #ifndef S_SPLINT_S 627 msg.msg_controllen = sizeof(control.buf); 628 #endif /* S_SPLINT_S */ 629 msg.msg_flags = 0; 630 631 #ifndef S_SPLINT_S 632 cmsg = CMSG_FIRSTHDR(&msg); 633 if(r->srctype == 4) { 634 #ifdef IP_PKTINFO 635 void* cmsg_data; 636 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo)); 637 log_assert(msg.msg_controllen <= sizeof(control.buf)); 638 cmsg->cmsg_level = IPPROTO_IP; 639 cmsg->cmsg_type = IP_PKTINFO; 640 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info, 641 sizeof(struct in_pktinfo)); 642 /* unset the ifindex to not bypass the routing tables */ 643 cmsg_data = CMSG_DATA(cmsg); 644 ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0; 645 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo)); 646 /* zero the padding bytes inserted by the CMSG_LEN */ 647 if(sizeof(struct in_pktinfo) < cmsg->cmsg_len) 648 memset(((uint8_t*)(CMSG_DATA(cmsg))) + 649 sizeof(struct in_pktinfo), 0, cmsg->cmsg_len 650 - sizeof(struct in_pktinfo)); 651 #elif defined(IP_SENDSRCADDR) 652 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr)); 653 log_assert(msg.msg_controllen <= sizeof(control.buf)); 654 cmsg->cmsg_level = IPPROTO_IP; 655 cmsg->cmsg_type = IP_SENDSRCADDR; 656 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr, 657 sizeof(struct in_addr)); 658 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr)); 659 /* zero the padding bytes inserted by the CMSG_LEN */ 660 if(sizeof(struct in_addr) < cmsg->cmsg_len) 661 memset(((uint8_t*)(CMSG_DATA(cmsg))) + 662 sizeof(struct in_addr), 0, cmsg->cmsg_len 663 - sizeof(struct in_addr)); 664 #else 665 verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR"); 666 msg.msg_control = NULL; 667 #endif /* IP_PKTINFO or IP_SENDSRCADDR */ 668 } else if(r->srctype == 6) { 669 void* cmsg_data; 670 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); 671 log_assert(msg.msg_controllen <= sizeof(control.buf)); 672 cmsg->cmsg_level = IPPROTO_IPV6; 673 cmsg->cmsg_type = IPV6_PKTINFO; 674 memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info, 675 sizeof(struct in6_pktinfo)); 676 /* unset the ifindex to not bypass the routing tables */ 677 cmsg_data = CMSG_DATA(cmsg); 678 ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0; 679 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); 680 /* zero the padding bytes inserted by the CMSG_LEN */ 681 if(sizeof(struct in6_pktinfo) < cmsg->cmsg_len) 682 memset(((uint8_t*)(CMSG_DATA(cmsg))) + 683 sizeof(struct in6_pktinfo), 0, cmsg->cmsg_len 684 - sizeof(struct in6_pktinfo)); 685 } else { 686 /* try to pass all 0 to use default route */ 687 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); 688 log_assert(msg.msg_controllen <= sizeof(control.buf)); 689 cmsg->cmsg_level = IPPROTO_IPV6; 690 cmsg->cmsg_type = IPV6_PKTINFO; 691 memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo)); 692 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); 693 /* zero the padding bytes inserted by the CMSG_LEN */ 694 if(sizeof(struct in6_pktinfo) < cmsg->cmsg_len) 695 memset(((uint8_t*)(CMSG_DATA(cmsg))) + 696 sizeof(struct in6_pktinfo), 0, cmsg->cmsg_len 697 - sizeof(struct in6_pktinfo)); 698 } 699 #endif /* S_SPLINT_S */ 700 if(verbosity >= VERB_ALGO && r->srctype != 0) 701 p_ancil("send_udp over interface", r); 702 sent = sendmsg(c->fd, &msg, 0); 703 if(sent == -1) { 704 /* try again and block, waiting for IO to complete, 705 * we want to send the answer, and we will wait for 706 * the ethernet interface buffer to have space. */ 707 #ifndef USE_WINSOCK 708 if(errno == EAGAIN || errno == EINTR || 709 # ifdef EWOULDBLOCK 710 errno == EWOULDBLOCK || 711 # endif 712 errno == ENOBUFS) { 713 #else 714 if(WSAGetLastError() == WSAEINPROGRESS || 715 WSAGetLastError() == WSAEINTR || 716 WSAGetLastError() == WSAENOBUFS || 717 WSAGetLastError() == WSAEWOULDBLOCK) { 718 #endif 719 int retries = 0; 720 while(sent == -1 && retries < SEND_BLOCKED_MAX_RETRY && ( 721 #ifndef USE_WINSOCK 722 errno == EAGAIN || errno == EINTR || 723 # ifdef EWOULDBLOCK 724 errno == EWOULDBLOCK || 725 # endif 726 errno == ENOBUFS 727 #else 728 WSAGetLastError() == WSAEINPROGRESS || 729 WSAGetLastError() == WSAEINTR || 730 WSAGetLastError() == WSAENOBUFS || 731 WSAGetLastError() == WSAEWOULDBLOCK 732 #endif 733 )) { 734 #if defined(HAVE_POLL) || defined(USE_WINSOCK) 735 int send_nobufs = ( 736 #ifndef USE_WINSOCK 737 errno == ENOBUFS 738 #else 739 WSAGetLastError() == WSAENOBUFS 740 #endif 741 ); 742 struct pollfd p; 743 int pret; 744 memset(&p, 0, sizeof(p)); 745 p.fd = c->fd; 746 p.events = POLLOUT | POLLERR | POLLHUP; 747 # ifndef USE_WINSOCK 748 pret = poll(&p, 1, SEND_BLOCKED_WAIT_TIMEOUT); 749 # else 750 pret = WSAPoll(&p, 1, 751 SEND_BLOCKED_WAIT_TIMEOUT); 752 # endif 753 if(pret == 0) { 754 /* timer expired */ 755 struct comm_base* b = c->ev->base; 756 if(b->eb->last_writewait_log+SLOW_LOG_TIME <= 757 b->eb->secs) { 758 b->eb->last_writewait_log = b->eb->secs; 759 verbose(VERB_OPS, "send udp blocked " 760 "for long, dropping packet."); 761 } 762 return 0; 763 } else if(pret < 0 && 764 #ifndef USE_WINSOCK 765 errno != EAGAIN && errno != EINTR && 766 # ifdef EWOULDBLOCK 767 errno != EWOULDBLOCK && 768 # endif 769 errno != ENOBUFS 770 #else 771 WSAGetLastError() != WSAEINPROGRESS && 772 WSAGetLastError() != WSAEINTR && 773 WSAGetLastError() != WSAENOBUFS && 774 WSAGetLastError() != WSAEWOULDBLOCK 775 #endif 776 ) { 777 log_err("poll udp out failed: %s", 778 sock_strerror(errno)); 779 return 0; 780 } else if((pret < 0 && 781 #ifndef USE_WINSOCK 782 errno == ENOBUFS 783 #else 784 WSAGetLastError() == WSAENOBUFS 785 #endif 786 ) || (send_nobufs && retries > 0)) { 787 /* ENOBUFS, and poll returned without 788 * a timeout. Or the retried send call 789 * returned ENOBUFS. It is good to 790 * wait a bit for the error to clear. */ 791 /* The timeout is 20*(2^(retries+1)), 792 * it increases exponentially, starting 793 * at 40 msec. After 5 tries, 1240 msec 794 * have passed in total, when poll 795 * returned the error, and 1200 msec 796 * when send returned the errors. */ 797 #ifndef USE_WINSOCK 798 pret = poll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); 799 #else 800 pret = WSAPoll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); 801 #endif 802 if(pret < 0 && 803 #ifndef USE_WINSOCK 804 errno != EAGAIN && errno != EINTR && 805 # ifdef EWOULDBLOCK 806 errno != EWOULDBLOCK && 807 # endif 808 errno != ENOBUFS 809 #else 810 WSAGetLastError() != WSAEINPROGRESS && 811 WSAGetLastError() != WSAEINTR && 812 WSAGetLastError() != WSAENOBUFS && 813 WSAGetLastError() != WSAEWOULDBLOCK 814 #endif 815 ) { 816 log_err("poll udp out timer failed: %s", 817 sock_strerror(errno)); 818 } 819 } 820 #endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */ 821 retries++; 822 sent = sendmsg(c->fd, &msg, 0); 823 } 824 } 825 } 826 if(sent == -1) { 827 if(!udp_send_errno_needs_log(addr, addrlen)) 828 return 0; 829 verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno)); 830 log_addr(VERB_OPS, "remote address is", 831 (struct sockaddr_storage*)addr, addrlen); 832 #ifdef __NetBSD__ 833 /* netbsd 7 has IP_PKTINFO for recv but not send */ 834 if(errno == EINVAL && r->srctype == 4) 835 log_err("sendmsg: No support for sendmsg(IP_PKTINFO). " 836 "Please disable interface-automatic"); 837 #endif 838 return 0; 839 } else if((size_t)sent != sldns_buffer_remaining(packet)) { 840 log_err("sent %d in place of %d bytes", 841 (int)sent, (int)sldns_buffer_remaining(packet)); 842 return 0; 843 } 844 return 1; 845 #else 846 (void)c; 847 (void)packet; 848 (void)addr; 849 (void)addrlen; 850 (void)r; 851 log_err("sendmsg: IPV6_PKTINFO not supported"); 852 return 0; 853 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */ 854 } 855 856 /** return true is UDP receive error needs to be logged */ 857 static int udp_recv_needs_log(int err) 858 { 859 switch(err) { 860 case EACCES: /* some hosts send ICMP 'Permission Denied' */ 861 #ifndef USE_WINSOCK 862 case ECONNREFUSED: 863 # ifdef ENETUNREACH 864 case ENETUNREACH: 865 # endif 866 # ifdef EHOSTDOWN 867 case EHOSTDOWN: 868 # endif 869 # ifdef EHOSTUNREACH 870 case EHOSTUNREACH: 871 # endif 872 # ifdef ENETDOWN 873 case ENETDOWN: 874 # endif 875 #else /* USE_WINSOCK */ 876 case WSAECONNREFUSED: 877 case WSAENETUNREACH: 878 case WSAEHOSTDOWN: 879 case WSAEHOSTUNREACH: 880 case WSAENETDOWN: 881 #endif 882 if(verbosity >= VERB_ALGO) 883 return 1; 884 return 0; 885 default: 886 break; 887 } 888 return 1; 889 } 890 891 /** Parses the PROXYv2 header from buf and updates the comm_reply struct. 892 * Returns 1 on success, 0 on failure. */ 893 static int consume_pp2_header(struct sldns_buffer* buf, struct comm_reply* rep, 894 int stream) { 895 size_t size; 896 struct pp2_header *header; 897 int err = pp2_read_header(sldns_buffer_begin(buf), 898 sldns_buffer_remaining(buf)); 899 if(err) return 0; 900 header = (struct pp2_header*)sldns_buffer_begin(buf); 901 size = PP2_HEADER_SIZE + ntohs(header->len); 902 if((header->ver_cmd & 0xF) == PP2_CMD_LOCAL) { 903 /* A connection from the proxy itself. 904 * No need to do anything with addresses. */ 905 goto done; 906 } 907 if(header->fam_prot == PP2_UNSPEC_UNSPEC) { 908 /* Unspecified family and protocol. This could be used for 909 * health checks by proxies. 910 * No need to do anything with addresses. */ 911 goto done; 912 } 913 /* Read the proxied address */ 914 switch(header->fam_prot) { 915 case PP2_INET_STREAM: 916 case PP2_INET_DGRAM: 917 { 918 struct sockaddr_in* addr = 919 (struct sockaddr_in*)&rep->client_addr; 920 addr->sin_family = AF_INET; 921 addr->sin_addr.s_addr = header->addr.addr4.src_addr; 922 addr->sin_port = header->addr.addr4.src_port; 923 rep->client_addrlen = (socklen_t)sizeof(struct sockaddr_in); 924 } 925 /* Ignore the destination address; it should be us. */ 926 break; 927 case PP2_INET6_STREAM: 928 case PP2_INET6_DGRAM: 929 { 930 struct sockaddr_in6* addr = 931 (struct sockaddr_in6*)&rep->client_addr; 932 memset(addr, 0, sizeof(*addr)); 933 addr->sin6_family = AF_INET6; 934 memcpy(&addr->sin6_addr, 935 header->addr.addr6.src_addr, 16); 936 addr->sin6_port = header->addr.addr6.src_port; 937 rep->client_addrlen = (socklen_t)sizeof(struct sockaddr_in6); 938 } 939 /* Ignore the destination address; it should be us. */ 940 break; 941 default: 942 log_err("proxy_protocol: unsupported family and " 943 "protocol 0x%x", (int)header->fam_prot); 944 return 0; 945 } 946 rep->is_proxied = 1; 947 done: 948 if(!stream) { 949 /* We are reading a whole packet; 950 * Move the rest of the data to overwrite the PROXYv2 header */ 951 /* XXX can we do better to avoid memmove? */ 952 memmove(header, ((char*)header)+size, 953 sldns_buffer_limit(buf)-size); 954 sldns_buffer_set_limit(buf, sldns_buffer_limit(buf)-size); 955 } 956 return 1; 957 } 958 959 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG) 960 void 961 comm_point_udp_ancil_callback(int fd, short event, void* arg) 962 { 963 struct comm_reply rep; 964 struct msghdr msg; 965 struct iovec iov[1]; 966 ssize_t rcv; 967 union { 968 struct cmsghdr hdr; 969 char buf[256]; 970 } ancil; 971 int i; 972 #ifndef S_SPLINT_S 973 struct cmsghdr* cmsg; 974 #endif /* S_SPLINT_S */ 975 #ifdef HAVE_LINUX_NET_TSTAMP_H 976 struct timespec *ts; 977 #endif /* HAVE_LINUX_NET_TSTAMP_H */ 978 979 rep.c = (struct comm_point*)arg; 980 log_assert(rep.c->type == comm_udp); 981 982 if(!(event&UB_EV_READ)) 983 return; 984 log_assert(rep.c && rep.c->buffer && rep.c->fd == fd); 985 ub_comm_base_now(rep.c->ev->base); 986 for(i=0; i<NUM_UDP_PER_SELECT; i++) { 987 sldns_buffer_clear(rep.c->buffer); 988 timeval_clear(&rep.c->recv_tv); 989 rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr); 990 log_assert(fd != -1); 991 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0); 992 msg.msg_name = &rep.remote_addr; 993 msg.msg_namelen = (socklen_t)sizeof(rep.remote_addr); 994 iov[0].iov_base = sldns_buffer_begin(rep.c->buffer); 995 iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer); 996 msg.msg_iov = iov; 997 msg.msg_iovlen = 1; 998 msg.msg_control = ancil.buf; 999 #ifndef S_SPLINT_S 1000 msg.msg_controllen = sizeof(ancil.buf); 1001 #endif /* S_SPLINT_S */ 1002 msg.msg_flags = 0; 1003 rcv = recvmsg(fd, &msg, MSG_DONTWAIT); 1004 if(rcv == -1) { 1005 if(errno != EAGAIN && errno != EINTR 1006 && udp_recv_needs_log(errno)) { 1007 log_err("recvmsg failed: %s", strerror(errno)); 1008 } 1009 return; 1010 } 1011 rep.remote_addrlen = msg.msg_namelen; 1012 sldns_buffer_skip(rep.c->buffer, rcv); 1013 sldns_buffer_flip(rep.c->buffer); 1014 rep.srctype = 0; 1015 rep.is_proxied = 0; 1016 #ifndef S_SPLINT_S 1017 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; 1018 cmsg = CMSG_NXTHDR(&msg, cmsg)) { 1019 if( cmsg->cmsg_level == IPPROTO_IPV6 && 1020 cmsg->cmsg_type == IPV6_PKTINFO) { 1021 rep.srctype = 6; 1022 memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg), 1023 sizeof(struct in6_pktinfo)); 1024 break; 1025 #ifdef IP_PKTINFO 1026 } else if( cmsg->cmsg_level == IPPROTO_IP && 1027 cmsg->cmsg_type == IP_PKTINFO) { 1028 rep.srctype = 4; 1029 memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg), 1030 sizeof(struct in_pktinfo)); 1031 break; 1032 #elif defined(IP_RECVDSTADDR) 1033 } else if( cmsg->cmsg_level == IPPROTO_IP && 1034 cmsg->cmsg_type == IP_RECVDSTADDR) { 1035 rep.srctype = 4; 1036 memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg), 1037 sizeof(struct in_addr)); 1038 break; 1039 #endif /* IP_PKTINFO or IP_RECVDSTADDR */ 1040 #ifdef HAVE_LINUX_NET_TSTAMP_H 1041 } else if( cmsg->cmsg_level == SOL_SOCKET && 1042 cmsg->cmsg_type == SO_TIMESTAMPNS) { 1043 ts = (struct timespec *)CMSG_DATA(cmsg); 1044 TIMESPEC_TO_TIMEVAL(&rep.c->recv_tv, ts); 1045 } else if( cmsg->cmsg_level == SOL_SOCKET && 1046 cmsg->cmsg_type == SO_TIMESTAMPING) { 1047 ts = (struct timespec *)CMSG_DATA(cmsg); 1048 TIMESPEC_TO_TIMEVAL(&rep.c->recv_tv, ts); 1049 } else if( cmsg->cmsg_level == SOL_SOCKET && 1050 cmsg->cmsg_type == SO_TIMESTAMP) { 1051 memmove(&rep.c->recv_tv, CMSG_DATA(cmsg), sizeof(struct timeval)); 1052 #endif /* HAVE_LINUX_NET_TSTAMP_H */ 1053 } 1054 } 1055 1056 if(verbosity >= VERB_ALGO && rep.srctype != 0) 1057 p_ancil("receive_udp on interface", &rep); 1058 #endif /* S_SPLINT_S */ 1059 1060 if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer, 1061 &rep, 0)) { 1062 log_err("proxy_protocol: could not consume PROXYv2 header"); 1063 return; 1064 } 1065 if(!rep.is_proxied) { 1066 rep.client_addrlen = rep.remote_addrlen; 1067 memmove(&rep.client_addr, &rep.remote_addr, 1068 rep.remote_addrlen); 1069 } 1070 1071 fptr_ok(fptr_whitelist_comm_point(rep.c->callback)); 1072 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) { 1073 /* send back immediate reply */ 1074 struct sldns_buffer *buffer; 1075 #ifdef USE_DNSCRYPT 1076 buffer = rep.c->dnscrypt_buffer; 1077 #else 1078 buffer = rep.c->buffer; 1079 #endif 1080 (void)comm_point_send_udp_msg_if(rep.c, buffer, 1081 (struct sockaddr*)&rep.remote_addr, 1082 rep.remote_addrlen, &rep); 1083 } 1084 if(!rep.c || rep.c->fd == -1) /* commpoint closed */ 1085 break; 1086 } 1087 } 1088 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */ 1089 1090 void 1091 comm_point_udp_callback(int fd, short event, void* arg) 1092 { 1093 struct comm_reply rep; 1094 ssize_t rcv; 1095 int i; 1096 struct sldns_buffer *buffer; 1097 1098 rep.c = (struct comm_point*)arg; 1099 log_assert(rep.c->type == comm_udp); 1100 1101 if(!(event&UB_EV_READ)) 1102 return; 1103 log_assert(rep.c && rep.c->buffer && rep.c->fd == fd); 1104 ub_comm_base_now(rep.c->ev->base); 1105 for(i=0; i<NUM_UDP_PER_SELECT; i++) { 1106 sldns_buffer_clear(rep.c->buffer); 1107 rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr); 1108 log_assert(fd != -1); 1109 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0); 1110 rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer), 1111 sldns_buffer_remaining(rep.c->buffer), MSG_DONTWAIT, 1112 (struct sockaddr*)&rep.remote_addr, &rep.remote_addrlen); 1113 if(rcv == -1) { 1114 #ifndef USE_WINSOCK 1115 if(errno != EAGAIN && errno != EINTR 1116 && udp_recv_needs_log(errno)) 1117 log_err("recvfrom %d failed: %s", 1118 fd, strerror(errno)); 1119 #else 1120 if(WSAGetLastError() != WSAEINPROGRESS && 1121 WSAGetLastError() != WSAECONNRESET && 1122 WSAGetLastError()!= WSAEWOULDBLOCK && 1123 udp_recv_needs_log(WSAGetLastError())) 1124 log_err("recvfrom failed: %s", 1125 wsa_strerror(WSAGetLastError())); 1126 #endif 1127 return; 1128 } 1129 sldns_buffer_skip(rep.c->buffer, rcv); 1130 sldns_buffer_flip(rep.c->buffer); 1131 rep.srctype = 0; 1132 rep.is_proxied = 0; 1133 1134 if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer, 1135 &rep, 0)) { 1136 log_err("proxy_protocol: could not consume PROXYv2 header"); 1137 return; 1138 } 1139 if(!rep.is_proxied) { 1140 rep.client_addrlen = rep.remote_addrlen; 1141 memmove(&rep.client_addr, &rep.remote_addr, 1142 rep.remote_addrlen); 1143 } 1144 1145 fptr_ok(fptr_whitelist_comm_point(rep.c->callback)); 1146 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) { 1147 /* send back immediate reply */ 1148 #ifdef USE_DNSCRYPT 1149 buffer = rep.c->dnscrypt_buffer; 1150 #else 1151 buffer = rep.c->buffer; 1152 #endif 1153 (void)comm_point_send_udp_msg(rep.c, buffer, 1154 (struct sockaddr*)&rep.remote_addr, 1155 rep.remote_addrlen, 0); 1156 } 1157 if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for 1158 another UDP port. Note rep.c cannot be reused with TCP fd. */ 1159 break; 1160 } 1161 } 1162 1163 int adjusted_tcp_timeout(struct comm_point* c) 1164 { 1165 if(c->tcp_timeout_msec < TCP_QUERY_TIMEOUT_MINIMUM) 1166 return TCP_QUERY_TIMEOUT_MINIMUM; 1167 return c->tcp_timeout_msec; 1168 } 1169 1170 /** Use a new tcp handler for new query fd, set to read query */ 1171 static void 1172 setup_tcp_handler(struct comm_point* c, int fd, int cur, int max) 1173 { 1174 int handler_usage; 1175 log_assert(c->type == comm_tcp || c->type == comm_http); 1176 log_assert(c->fd == -1); 1177 sldns_buffer_clear(c->buffer); 1178 #ifdef USE_DNSCRYPT 1179 if (c->dnscrypt) 1180 sldns_buffer_clear(c->dnscrypt_buffer); 1181 #endif 1182 c->tcp_is_reading = 1; 1183 c->tcp_byte_count = 0; 1184 c->tcp_keepalive = 0; 1185 /* if more than half the tcp handlers are in use, use a shorter 1186 * timeout for this TCP connection, we need to make space for 1187 * other connections to be able to get attention */ 1188 /* If > 50% TCP handler structures in use, set timeout to 1/100th 1189 * configured value. 1190 * If > 65%TCP handler structures in use, set to 1/500th configured 1191 * value. 1192 * If > 80% TCP handler structures in use, set to 0. 1193 * 1194 * If the timeout to use falls below 200 milliseconds, an actual 1195 * timeout of 200ms is used. 1196 */ 1197 handler_usage = (cur * 100) / max; 1198 if(handler_usage > 50 && handler_usage <= 65) 1199 c->tcp_timeout_msec /= 100; 1200 else if (handler_usage > 65 && handler_usage <= 80) 1201 c->tcp_timeout_msec /= 500; 1202 else if (handler_usage > 80) 1203 c->tcp_timeout_msec = 0; 1204 comm_point_start_listening(c, fd, adjusted_tcp_timeout(c)); 1205 } 1206 1207 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd), 1208 short ATTR_UNUSED(event), void* arg) 1209 { 1210 struct comm_base* b = (struct comm_base*)arg; 1211 /* timeout for the slow accept, re-enable accepts again */ 1212 if(b->start_accept) { 1213 verbose(VERB_ALGO, "wait is over, slow accept disabled"); 1214 fptr_ok(fptr_whitelist_start_accept(b->start_accept)); 1215 (*b->start_accept)(b->cb_arg); 1216 b->eb->slow_accept_enabled = 0; 1217 } 1218 } 1219 1220 int comm_point_perform_accept(struct comm_point* c, 1221 struct sockaddr_storage* addr, socklen_t* addrlen) 1222 { 1223 int new_fd; 1224 *addrlen = (socklen_t)sizeof(*addr); 1225 #ifndef HAVE_ACCEPT4 1226 new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen); 1227 #else 1228 /* SOCK_NONBLOCK saves extra calls to fcntl for the same result */ 1229 new_fd = accept4(c->fd, (struct sockaddr*)addr, addrlen, SOCK_NONBLOCK); 1230 #endif 1231 if(new_fd == -1) { 1232 #ifndef USE_WINSOCK 1233 /* EINTR is signal interrupt. others are closed connection. */ 1234 if( errno == EINTR || errno == EAGAIN 1235 #ifdef EWOULDBLOCK 1236 || errno == EWOULDBLOCK 1237 #endif 1238 #ifdef ECONNABORTED 1239 || errno == ECONNABORTED 1240 #endif 1241 #ifdef EPROTO 1242 || errno == EPROTO 1243 #endif /* EPROTO */ 1244 ) 1245 return -1; 1246 #if defined(ENFILE) && defined(EMFILE) 1247 if(errno == ENFILE || errno == EMFILE) { 1248 /* out of file descriptors, likely outside of our 1249 * control. stop accept() calls for some time */ 1250 if(c->ev->base->stop_accept) { 1251 struct comm_base* b = c->ev->base; 1252 struct timeval tv; 1253 verbose(VERB_ALGO, "out of file descriptors: " 1254 "slow accept"); 1255 ub_comm_base_now(b); 1256 if(b->eb->last_slow_log+SLOW_LOG_TIME <= 1257 b->eb->secs) { 1258 b->eb->last_slow_log = b->eb->secs; 1259 verbose(VERB_OPS, "accept failed, " 1260 "slow down accept for %d " 1261 "msec: %s", 1262 NETEVENT_SLOW_ACCEPT_TIME, 1263 sock_strerror(errno)); 1264 } 1265 b->eb->slow_accept_enabled = 1; 1266 fptr_ok(fptr_whitelist_stop_accept( 1267 b->stop_accept)); 1268 (*b->stop_accept)(b->cb_arg); 1269 /* set timeout, no mallocs */ 1270 tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000; 1271 tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000; 1272 b->eb->slow_accept = ub_event_new(b->eb->base, 1273 -1, UB_EV_TIMEOUT, 1274 comm_base_handle_slow_accept, b); 1275 if(b->eb->slow_accept == NULL) { 1276 /* we do not want to log here, because 1277 * that would spam the logfiles. 1278 * error: "event_base_set failed." */ 1279 } 1280 else if(ub_event_add(b->eb->slow_accept, &tv) 1281 != 0) { 1282 /* we do not want to log here, 1283 * error: "event_add failed." */ 1284 } 1285 } else { 1286 log_err("accept, with no slow down, " 1287 "failed: %s", sock_strerror(errno)); 1288 } 1289 return -1; 1290 } 1291 #endif 1292 #else /* USE_WINSOCK */ 1293 if(WSAGetLastError() == WSAEINPROGRESS || 1294 WSAGetLastError() == WSAECONNRESET) 1295 return -1; 1296 if(WSAGetLastError() == WSAEWOULDBLOCK) { 1297 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1298 return -1; 1299 } 1300 #endif 1301 log_err_addr("accept failed", sock_strerror(errno), addr, 1302 *addrlen); 1303 return -1; 1304 } 1305 if(c->tcp_conn_limit && c->type == comm_tcp_accept) { 1306 c->tcl_addr = tcl_addr_lookup(c->tcp_conn_limit, addr, *addrlen); 1307 if(!tcl_new_connection(c->tcl_addr)) { 1308 if(verbosity >= 3) 1309 log_err_addr("accept rejected", 1310 "connection limit exceeded", addr, *addrlen); 1311 close(new_fd); 1312 return -1; 1313 } 1314 } 1315 #ifndef HAVE_ACCEPT4 1316 fd_set_nonblock(new_fd); 1317 #endif 1318 return new_fd; 1319 } 1320 1321 #ifdef USE_WINSOCK 1322 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp), 1323 #ifdef HAVE_BIO_SET_CALLBACK_EX 1324 size_t ATTR_UNUSED(len), 1325 #endif 1326 int ATTR_UNUSED(argi), long argl, 1327 #ifndef HAVE_BIO_SET_CALLBACK_EX 1328 long retvalue 1329 #else 1330 int retvalue, size_t* ATTR_UNUSED(processed) 1331 #endif 1332 ) 1333 { 1334 int wsa_err = WSAGetLastError(); /* store errcode before it is gone */ 1335 verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper, 1336 (oper&BIO_CB_RETURN)?"return":"before", 1337 (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"), 1338 wsa_err==WSAEWOULDBLOCK?"wsawb":""); 1339 /* on windows, check if previous operation caused EWOULDBLOCK */ 1340 if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) || 1341 (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) { 1342 if(wsa_err == WSAEWOULDBLOCK) 1343 ub_winsock_tcp_wouldblock((struct ub_event*) 1344 BIO_get_callback_arg(b), UB_EV_READ); 1345 } 1346 if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) || 1347 (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) { 1348 if(wsa_err == WSAEWOULDBLOCK) 1349 ub_winsock_tcp_wouldblock((struct ub_event*) 1350 BIO_get_callback_arg(b), UB_EV_WRITE); 1351 } 1352 /* return original return value */ 1353 return retvalue; 1354 } 1355 1356 /** set win bio callbacks for nonblocking operations */ 1357 void 1358 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl) 1359 { 1360 SSL* ssl = (SSL*)thessl; 1361 /* set them both just in case, but usually they are the same BIO */ 1362 #ifdef HAVE_BIO_SET_CALLBACK_EX 1363 BIO_set_callback_ex(SSL_get_rbio(ssl), &win_bio_cb); 1364 #else 1365 BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb); 1366 #endif 1367 BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)c->ev->ev); 1368 #ifdef HAVE_BIO_SET_CALLBACK_EX 1369 BIO_set_callback_ex(SSL_get_wbio(ssl), &win_bio_cb); 1370 #else 1371 BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb); 1372 #endif 1373 BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)c->ev->ev); 1374 } 1375 #endif 1376 1377 #ifdef HAVE_NGHTTP2 1378 /** Create http2 session server. Per connection, after TCP accepted.*/ 1379 static int http2_session_server_create(struct http2_session* h2_session) 1380 { 1381 log_assert(h2_session->callbacks); 1382 h2_session->is_drop = 0; 1383 if(nghttp2_session_server_new(&h2_session->session, 1384 h2_session->callbacks, 1385 h2_session) == NGHTTP2_ERR_NOMEM) { 1386 log_err("failed to create nghttp2 session server"); 1387 return 0; 1388 } 1389 1390 return 1; 1391 } 1392 1393 /** Submit http2 setting to session. Once per session. */ 1394 static int http2_submit_settings(struct http2_session* h2_session) 1395 { 1396 int ret; 1397 nghttp2_settings_entry settings[1] = { 1398 {NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, 1399 h2_session->c->http2_max_streams}}; 1400 1401 ret = nghttp2_submit_settings(h2_session->session, NGHTTP2_FLAG_NONE, 1402 settings, 1); 1403 if(ret) { 1404 verbose(VERB_QUERY, "http2: submit_settings failed, " 1405 "error: %s", nghttp2_strerror(ret)); 1406 return 0; 1407 } 1408 return 1; 1409 } 1410 #endif /* HAVE_NGHTTP2 */ 1411 1412 1413 void 1414 comm_point_tcp_accept_callback(int fd, short event, void* arg) 1415 { 1416 struct comm_point* c = (struct comm_point*)arg, *c_hdl; 1417 int new_fd; 1418 log_assert(c->type == comm_tcp_accept); 1419 if(!(event & UB_EV_READ)) { 1420 log_info("ignoring tcp accept event %d", (int)event); 1421 return; 1422 } 1423 ub_comm_base_now(c->ev->base); 1424 /* find free tcp handler. */ 1425 if(!c->tcp_free) { 1426 log_warn("accepted too many tcp, connections full"); 1427 return; 1428 } 1429 /* accept incoming connection. */ 1430 c_hdl = c->tcp_free; 1431 /* clear leftover flags from previous use, and then set the 1432 * correct event base for the event structure for libevent */ 1433 ub_event_free(c_hdl->ev->ev); 1434 c_hdl->ev->ev = NULL; 1435 if((c_hdl->type == comm_tcp && c_hdl->tcp_req_info) || 1436 c_hdl->type == comm_local || c_hdl->type == comm_raw) 1437 c_hdl->tcp_do_toggle_rw = 0; 1438 else c_hdl->tcp_do_toggle_rw = 1; 1439 1440 if(c_hdl->type == comm_http) { 1441 #ifdef HAVE_NGHTTP2 1442 if(!c_hdl->h2_session || 1443 !http2_session_server_create(c_hdl->h2_session)) { 1444 log_warn("failed to create nghttp2"); 1445 return; 1446 } 1447 if(!c_hdl->h2_session || 1448 !http2_submit_settings(c_hdl->h2_session)) { 1449 log_warn("failed to submit http2 settings"); 1450 return; 1451 } 1452 if(!c->ssl) { 1453 c_hdl->tcp_do_toggle_rw = 0; 1454 c_hdl->use_h2 = 1; 1455 } 1456 #endif 1457 c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1, 1458 UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT, 1459 comm_point_http_handle_callback, c_hdl); 1460 } else { 1461 c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1, 1462 UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT, 1463 comm_point_tcp_handle_callback, c_hdl); 1464 } 1465 if(!c_hdl->ev->ev) { 1466 log_warn("could not ub_event_new, dropped tcp"); 1467 return; 1468 } 1469 log_assert(fd != -1); 1470 (void)fd; 1471 new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.remote_addr, 1472 &c_hdl->repinfo.remote_addrlen); 1473 if(new_fd == -1) 1474 return; 1475 /* Copy remote_address to client_address. 1476 * Simplest way/time for streams to do that. */ 1477 c_hdl->repinfo.client_addrlen = c_hdl->repinfo.remote_addrlen; 1478 memmove(&c_hdl->repinfo.client_addr, 1479 &c_hdl->repinfo.remote_addr, 1480 c_hdl->repinfo.remote_addrlen); 1481 if(c->ssl) { 1482 c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd); 1483 if(!c_hdl->ssl) { 1484 c_hdl->fd = new_fd; 1485 comm_point_close(c_hdl); 1486 return; 1487 } 1488 c_hdl->ssl_shake_state = comm_ssl_shake_read; 1489 #ifdef USE_WINSOCK 1490 comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl); 1491 #endif 1492 } 1493 1494 /* grab the tcp handler buffers */ 1495 c->cur_tcp_count++; 1496 c->tcp_free = c_hdl->tcp_free; 1497 c_hdl->tcp_free = NULL; 1498 if(!c->tcp_free) { 1499 /* stop accepting incoming queries for now. */ 1500 comm_point_stop_listening(c); 1501 } 1502 setup_tcp_handler(c_hdl, new_fd, c->cur_tcp_count, c->max_tcp_count); 1503 } 1504 1505 /** Make tcp handler free for next assignment */ 1506 static void 1507 reclaim_tcp_handler(struct comm_point* c) 1508 { 1509 log_assert(c->type == comm_tcp); 1510 if(c->ssl) { 1511 #ifdef HAVE_SSL 1512 SSL_shutdown(c->ssl); 1513 SSL_free(c->ssl); 1514 c->ssl = NULL; 1515 #endif 1516 } 1517 comm_point_close(c); 1518 if(c->tcp_parent) { 1519 if(c != c->tcp_parent->tcp_free) { 1520 c->tcp_parent->cur_tcp_count--; 1521 c->tcp_free = c->tcp_parent->tcp_free; 1522 c->tcp_parent->tcp_free = c; 1523 } 1524 if(!c->tcp_free) { 1525 /* re-enable listening on accept socket */ 1526 comm_point_start_listening(c->tcp_parent, -1, -1); 1527 } 1528 } 1529 c->tcp_more_read_again = NULL; 1530 c->tcp_more_write_again = NULL; 1531 c->tcp_byte_count = 0; 1532 c->pp2_header_state = pp2_header_none; 1533 sldns_buffer_clear(c->buffer); 1534 } 1535 1536 /** do the callback when writing is done */ 1537 static void 1538 tcp_callback_writer(struct comm_point* c) 1539 { 1540 log_assert(c->type == comm_tcp); 1541 if(!c->tcp_write_and_read) { 1542 sldns_buffer_clear(c->buffer); 1543 c->tcp_byte_count = 0; 1544 } 1545 if(c->tcp_do_toggle_rw) 1546 c->tcp_is_reading = 1; 1547 /* switch from listening(write) to listening(read) */ 1548 if(c->tcp_req_info) { 1549 tcp_req_info_handle_writedone(c->tcp_req_info); 1550 } else { 1551 comm_point_stop_listening(c); 1552 if(c->tcp_write_and_read) { 1553 fptr_ok(fptr_whitelist_comm_point(c->callback)); 1554 if( (*c->callback)(c, c->cb_arg, NETEVENT_PKT_WRITTEN, 1555 &c->repinfo) ) { 1556 comm_point_start_listening(c, -1, 1557 adjusted_tcp_timeout(c)); 1558 } 1559 } else { 1560 comm_point_start_listening(c, -1, 1561 adjusted_tcp_timeout(c)); 1562 } 1563 } 1564 } 1565 1566 /** do the callback when reading is done */ 1567 static void 1568 tcp_callback_reader(struct comm_point* c) 1569 { 1570 log_assert(c->type == comm_tcp || c->type == comm_local); 1571 sldns_buffer_flip(c->buffer); 1572 if(c->tcp_do_toggle_rw) 1573 c->tcp_is_reading = 0; 1574 c->tcp_byte_count = 0; 1575 if(c->tcp_req_info) { 1576 tcp_req_info_handle_readdone(c->tcp_req_info); 1577 } else { 1578 if(c->type == comm_tcp) 1579 comm_point_stop_listening(c); 1580 fptr_ok(fptr_whitelist_comm_point(c->callback)); 1581 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) { 1582 comm_point_start_listening(c, -1, 1583 adjusted_tcp_timeout(c)); 1584 } 1585 } 1586 } 1587 1588 #ifdef HAVE_SSL 1589 /** true if the ssl handshake error has to be squelched from the logs */ 1590 int 1591 squelch_err_ssl_handshake(unsigned long err) 1592 { 1593 if(verbosity >= VERB_QUERY) 1594 return 0; /* only squelch on low verbosity */ 1595 if(ERR_GET_LIB(err) == ERR_LIB_SSL && 1596 (ERR_GET_REASON(err) == SSL_R_HTTPS_PROXY_REQUEST || 1597 ERR_GET_REASON(err) == SSL_R_HTTP_REQUEST || 1598 ERR_GET_REASON(err) == SSL_R_WRONG_VERSION_NUMBER || 1599 ERR_GET_REASON(err) == SSL_R_SSLV3_ALERT_BAD_CERTIFICATE 1600 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO 1601 || ERR_GET_REASON(err) == SSL_R_NO_SHARED_CIPHER 1602 #endif 1603 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO 1604 || ERR_GET_REASON(err) == SSL_R_UNKNOWN_PROTOCOL 1605 || ERR_GET_REASON(err) == SSL_R_UNSUPPORTED_PROTOCOL 1606 # ifdef SSL_R_VERSION_TOO_LOW 1607 || ERR_GET_REASON(err) == SSL_R_VERSION_TOO_LOW 1608 # endif 1609 #endif 1610 )) 1611 return 1; 1612 return 0; 1613 } 1614 #endif /* HAVE_SSL */ 1615 1616 /** continue ssl handshake */ 1617 #ifdef HAVE_SSL 1618 static int 1619 ssl_handshake(struct comm_point* c) 1620 { 1621 int r; 1622 if(c->ssl_shake_state == comm_ssl_shake_hs_read) { 1623 /* read condition satisfied back to writing */ 1624 comm_point_listen_for_rw(c, 0, 1); 1625 c->ssl_shake_state = comm_ssl_shake_none; 1626 return 1; 1627 } 1628 if(c->ssl_shake_state == comm_ssl_shake_hs_write) { 1629 /* write condition satisfied, back to reading */ 1630 comm_point_listen_for_rw(c, 1, 0); 1631 c->ssl_shake_state = comm_ssl_shake_none; 1632 return 1; 1633 } 1634 1635 ERR_clear_error(); 1636 r = SSL_do_handshake(c->ssl); 1637 if(r != 1) { 1638 int want = SSL_get_error(c->ssl, r); 1639 if(want == SSL_ERROR_WANT_READ) { 1640 if(c->ssl_shake_state == comm_ssl_shake_read) 1641 return 1; 1642 c->ssl_shake_state = comm_ssl_shake_read; 1643 comm_point_listen_for_rw(c, 1, 0); 1644 return 1; 1645 } else if(want == SSL_ERROR_WANT_WRITE) { 1646 if(c->ssl_shake_state == comm_ssl_shake_write) 1647 return 1; 1648 c->ssl_shake_state = comm_ssl_shake_write; 1649 comm_point_listen_for_rw(c, 0, 1); 1650 return 1; 1651 } else if(r == 0) { 1652 return 0; /* closed */ 1653 } else if(want == SSL_ERROR_SYSCALL) { 1654 /* SYSCALL and errno==0 means closed uncleanly */ 1655 #ifdef EPIPE 1656 if(errno == EPIPE && verbosity < 2) 1657 return 0; /* silence 'broken pipe' */ 1658 #endif 1659 #ifdef ECONNRESET 1660 if(errno == ECONNRESET && verbosity < 2) 1661 return 0; /* silence reset by peer */ 1662 #endif 1663 if(!tcp_connect_errno_needs_log( 1664 (struct sockaddr*)&c->repinfo.remote_addr, 1665 c->repinfo.remote_addrlen)) 1666 return 0; /* silence connect failures that 1667 show up because after connect this is the 1668 first system call that accesses the socket */ 1669 if(errno != 0) 1670 log_err("SSL_handshake syscall: %s", 1671 strerror(errno)); 1672 return 0; 1673 } else { 1674 unsigned long err = ERR_get_error(); 1675 if(!squelch_err_ssl_handshake(err)) { 1676 long vr; 1677 log_crypto_err_io_code("ssl handshake failed", 1678 want, err); 1679 if((vr=SSL_get_verify_result(c->ssl)) != 0) 1680 log_err("ssl handshake cert error: %s", 1681 X509_verify_cert_error_string( 1682 vr)); 1683 log_addr(VERB_OPS, "ssl handshake failed", 1684 &c->repinfo.remote_addr, 1685 c->repinfo.remote_addrlen); 1686 } 1687 return 0; 1688 } 1689 } 1690 /* this is where peer verification could take place */ 1691 if((SSL_get_verify_mode(c->ssl)&SSL_VERIFY_PEER)) { 1692 /* verification */ 1693 if(SSL_get_verify_result(c->ssl) == X509_V_OK) { 1694 #ifdef HAVE_SSL_GET1_PEER_CERTIFICATE 1695 X509* x = SSL_get1_peer_certificate(c->ssl); 1696 #else 1697 X509* x = SSL_get_peer_certificate(c->ssl); 1698 #endif 1699 if(!x) { 1700 log_addr(VERB_ALGO, "SSL connection failed: " 1701 "no certificate", 1702 &c->repinfo.remote_addr, 1703 c->repinfo.remote_addrlen); 1704 return 0; 1705 } 1706 log_cert(VERB_ALGO, "peer certificate", x); 1707 #ifdef HAVE_SSL_GET0_PEERNAME 1708 if(SSL_get0_peername(c->ssl)) { 1709 char buf[255]; 1710 snprintf(buf, sizeof(buf), "SSL connection " 1711 "to %s authenticated", 1712 SSL_get0_peername(c->ssl)); 1713 log_addr(VERB_ALGO, buf, &c->repinfo.remote_addr, 1714 c->repinfo.remote_addrlen); 1715 } else { 1716 #endif 1717 log_addr(VERB_ALGO, "SSL connection " 1718 "authenticated", &c->repinfo.remote_addr, 1719 c->repinfo.remote_addrlen); 1720 #ifdef HAVE_SSL_GET0_PEERNAME 1721 } 1722 #endif 1723 X509_free(x); 1724 } else { 1725 #ifdef HAVE_SSL_GET1_PEER_CERTIFICATE 1726 X509* x = SSL_get1_peer_certificate(c->ssl); 1727 #else 1728 X509* x = SSL_get_peer_certificate(c->ssl); 1729 #endif 1730 if(x) { 1731 log_cert(VERB_ALGO, "peer certificate", x); 1732 X509_free(x); 1733 } 1734 log_addr(VERB_ALGO, "SSL connection failed: " 1735 "failed to authenticate", 1736 &c->repinfo.remote_addr, 1737 c->repinfo.remote_addrlen); 1738 return 0; 1739 } 1740 } else { 1741 /* unauthenticated, the verify peer flag was not set 1742 * in c->ssl when the ssl object was created from ssl_ctx */ 1743 log_addr(VERB_ALGO, "SSL connection", &c->repinfo.remote_addr, 1744 c->repinfo.remote_addrlen); 1745 } 1746 1747 #ifdef HAVE_SSL_GET0_ALPN_SELECTED 1748 /* check if http2 use is negotiated */ 1749 if(c->type == comm_http && c->h2_session) { 1750 const unsigned char *alpn; 1751 unsigned int alpnlen = 0; 1752 SSL_get0_alpn_selected(c->ssl, &alpn, &alpnlen); 1753 if(alpnlen == 2 && memcmp("h2", alpn, 2) == 0) { 1754 /* connection upgraded to HTTP2 */ 1755 c->tcp_do_toggle_rw = 0; 1756 c->use_h2 = 1; 1757 } else { 1758 verbose(VERB_ALGO, "client doesn't support HTTP/2"); 1759 return 0; 1760 } 1761 } 1762 #endif 1763 1764 /* setup listen rw correctly */ 1765 if(c->tcp_is_reading) { 1766 if(c->ssl_shake_state != comm_ssl_shake_read) 1767 comm_point_listen_for_rw(c, 1, 0); 1768 } else { 1769 comm_point_listen_for_rw(c, 0, 1); 1770 } 1771 c->ssl_shake_state = comm_ssl_shake_none; 1772 return 1; 1773 } 1774 #endif /* HAVE_SSL */ 1775 1776 /** ssl read callback on TCP */ 1777 static int 1778 ssl_handle_read(struct comm_point* c) 1779 { 1780 #ifdef HAVE_SSL 1781 int r; 1782 if(c->ssl_shake_state != comm_ssl_shake_none) { 1783 if(!ssl_handshake(c)) 1784 return 0; 1785 if(c->ssl_shake_state != comm_ssl_shake_none) 1786 return 1; 1787 } 1788 if(c->pp2_enabled && c->pp2_header_state != pp2_header_done) { 1789 struct pp2_header* header = NULL; 1790 size_t want_read_size = 0; 1791 size_t current_read_size = 0; 1792 if(c->pp2_header_state == pp2_header_none) { 1793 want_read_size = PP2_HEADER_SIZE; 1794 if(sldns_buffer_remaining(c->buffer)<want_read_size) { 1795 log_err_addr("proxy_protocol: not enough " 1796 "buffer size to read PROXYv2 header", "", 1797 &c->repinfo.remote_addr, 1798 c->repinfo.remote_addrlen); 1799 return 0; 1800 } 1801 verbose(VERB_ALGO, "proxy_protocol: reading fixed " 1802 "part of PROXYv2 header (len %lu)", 1803 (unsigned long)want_read_size); 1804 current_read_size = want_read_size; 1805 if(c->tcp_byte_count < current_read_size) { 1806 ERR_clear_error(); 1807 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at( 1808 c->buffer, c->tcp_byte_count), 1809 current_read_size - 1810 c->tcp_byte_count)) <= 0) { 1811 int want = SSL_get_error(c->ssl, r); 1812 if(want == SSL_ERROR_ZERO_RETURN) { 1813 if(c->tcp_req_info) 1814 return tcp_req_info_handle_read_close(c->tcp_req_info); 1815 return 0; /* shutdown, closed */ 1816 } else if(want == SSL_ERROR_WANT_READ) { 1817 #ifdef USE_WINSOCK 1818 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1819 #endif 1820 return 1; /* read more later */ 1821 } else if(want == SSL_ERROR_WANT_WRITE) { 1822 c->ssl_shake_state = comm_ssl_shake_hs_write; 1823 comm_point_listen_for_rw(c, 0, 1); 1824 return 1; 1825 } else if(want == SSL_ERROR_SYSCALL) { 1826 #ifdef ECONNRESET 1827 if(errno == ECONNRESET && verbosity < 2) 1828 return 0; /* silence reset by peer */ 1829 #endif 1830 if(errno != 0) 1831 log_err("SSL_read syscall: %s", 1832 strerror(errno)); 1833 return 0; 1834 } 1835 log_crypto_err_io("could not SSL_read", 1836 want); 1837 return 0; 1838 } 1839 c->tcp_byte_count += r; 1840 sldns_buffer_skip(c->buffer, r); 1841 if(c->tcp_byte_count != current_read_size) return 1; 1842 c->pp2_header_state = pp2_header_init; 1843 } 1844 } 1845 if(c->pp2_header_state == pp2_header_init) { 1846 int err; 1847 err = pp2_read_header( 1848 sldns_buffer_begin(c->buffer), 1849 sldns_buffer_limit(c->buffer)); 1850 if(err) { 1851 log_err("proxy_protocol: could not parse " 1852 "PROXYv2 header (%s)", 1853 pp_lookup_error(err)); 1854 return 0; 1855 } 1856 header = (struct pp2_header*)sldns_buffer_begin(c->buffer); 1857 want_read_size = ntohs(header->len); 1858 if(sldns_buffer_limit(c->buffer) < 1859 PP2_HEADER_SIZE + want_read_size) { 1860 log_err_addr("proxy_protocol: not enough " 1861 "buffer size to read PROXYv2 header", "", 1862 &c->repinfo.remote_addr, 1863 c->repinfo.remote_addrlen); 1864 return 0; 1865 } 1866 verbose(VERB_ALGO, "proxy_protocol: reading variable " 1867 "part of PROXYv2 header (len %lu)", 1868 (unsigned long)want_read_size); 1869 current_read_size = PP2_HEADER_SIZE + want_read_size; 1870 if(want_read_size == 0) { 1871 /* nothing more to read; header is complete */ 1872 c->pp2_header_state = pp2_header_done; 1873 } else if(c->tcp_byte_count < current_read_size) { 1874 ERR_clear_error(); 1875 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at( 1876 c->buffer, c->tcp_byte_count), 1877 current_read_size - 1878 c->tcp_byte_count)) <= 0) { 1879 int want = SSL_get_error(c->ssl, r); 1880 if(want == SSL_ERROR_ZERO_RETURN) { 1881 if(c->tcp_req_info) 1882 return tcp_req_info_handle_read_close(c->tcp_req_info); 1883 return 0; /* shutdown, closed */ 1884 } else if(want == SSL_ERROR_WANT_READ) { 1885 #ifdef USE_WINSOCK 1886 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1887 #endif 1888 return 1; /* read more later */ 1889 } else if(want == SSL_ERROR_WANT_WRITE) { 1890 c->ssl_shake_state = comm_ssl_shake_hs_write; 1891 comm_point_listen_for_rw(c, 0, 1); 1892 return 1; 1893 } else if(want == SSL_ERROR_SYSCALL) { 1894 #ifdef ECONNRESET 1895 if(errno == ECONNRESET && verbosity < 2) 1896 return 0; /* silence reset by peer */ 1897 #endif 1898 if(errno != 0) 1899 log_err("SSL_read syscall: %s", 1900 strerror(errno)); 1901 return 0; 1902 } 1903 log_crypto_err_io("could not SSL_read", 1904 want); 1905 return 0; 1906 } 1907 c->tcp_byte_count += r; 1908 sldns_buffer_skip(c->buffer, r); 1909 if(c->tcp_byte_count != current_read_size) return 1; 1910 c->pp2_header_state = pp2_header_done; 1911 } 1912 } 1913 if(c->pp2_header_state != pp2_header_done || !header) { 1914 log_err_addr("proxy_protocol: wrong state for the " 1915 "PROXYv2 header", "", &c->repinfo.remote_addr, 1916 c->repinfo.remote_addrlen); 1917 return 0; 1918 } 1919 sldns_buffer_flip(c->buffer); 1920 if(!consume_pp2_header(c->buffer, &c->repinfo, 1)) { 1921 log_err_addr("proxy_protocol: could not consume " 1922 "PROXYv2 header", "", &c->repinfo.remote_addr, 1923 c->repinfo.remote_addrlen); 1924 return 0; 1925 } 1926 verbose(VERB_ALGO, "proxy_protocol: successful read of " 1927 "PROXYv2 header"); 1928 /* Clear and reset the buffer to read the following 1929 * DNS packet(s). */ 1930 sldns_buffer_clear(c->buffer); 1931 c->tcp_byte_count = 0; 1932 return 1; 1933 } 1934 if(c->tcp_byte_count < sizeof(uint16_t)) { 1935 /* read length bytes */ 1936 ERR_clear_error(); 1937 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer, 1938 c->tcp_byte_count), (int)(sizeof(uint16_t) - 1939 c->tcp_byte_count))) <= 0) { 1940 int want = SSL_get_error(c->ssl, r); 1941 if(want == SSL_ERROR_ZERO_RETURN) { 1942 if(c->tcp_req_info) 1943 return tcp_req_info_handle_read_close(c->tcp_req_info); 1944 return 0; /* shutdown, closed */ 1945 } else if(want == SSL_ERROR_WANT_READ) { 1946 #ifdef USE_WINSOCK 1947 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1948 #endif 1949 return 1; /* read more later */ 1950 } else if(want == SSL_ERROR_WANT_WRITE) { 1951 c->ssl_shake_state = comm_ssl_shake_hs_write; 1952 comm_point_listen_for_rw(c, 0, 1); 1953 return 1; 1954 } else if(want == SSL_ERROR_SYSCALL) { 1955 #ifdef ECONNRESET 1956 if(errno == ECONNRESET && verbosity < 2) 1957 return 0; /* silence reset by peer */ 1958 #endif 1959 if(errno != 0) 1960 log_err("SSL_read syscall: %s", 1961 strerror(errno)); 1962 return 0; 1963 } 1964 log_crypto_err_io("could not SSL_read", want); 1965 return 0; 1966 } 1967 c->tcp_byte_count += r; 1968 if(c->tcp_byte_count < sizeof(uint16_t)) 1969 return 1; 1970 if(sldns_buffer_read_u16_at(c->buffer, 0) > 1971 sldns_buffer_capacity(c->buffer)) { 1972 verbose(VERB_QUERY, "ssl: dropped larger than buffer"); 1973 return 0; 1974 } 1975 sldns_buffer_set_limit(c->buffer, 1976 sldns_buffer_read_u16_at(c->buffer, 0)); 1977 if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) { 1978 verbose(VERB_QUERY, "ssl: dropped bogus too short."); 1979 return 0; 1980 } 1981 sldns_buffer_skip(c->buffer, (ssize_t)(c->tcp_byte_count-sizeof(uint16_t))); 1982 verbose(VERB_ALGO, "Reading ssl tcp query of length %d", 1983 (int)sldns_buffer_limit(c->buffer)); 1984 } 1985 if(sldns_buffer_remaining(c->buffer) > 0) { 1986 ERR_clear_error(); 1987 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer), 1988 (int)sldns_buffer_remaining(c->buffer)); 1989 if(r <= 0) { 1990 int want = SSL_get_error(c->ssl, r); 1991 if(want == SSL_ERROR_ZERO_RETURN) { 1992 if(c->tcp_req_info) 1993 return tcp_req_info_handle_read_close(c->tcp_req_info); 1994 return 0; /* shutdown, closed */ 1995 } else if(want == SSL_ERROR_WANT_READ) { 1996 #ifdef USE_WINSOCK 1997 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1998 #endif 1999 return 1; /* read more later */ 2000 } else if(want == SSL_ERROR_WANT_WRITE) { 2001 c->ssl_shake_state = comm_ssl_shake_hs_write; 2002 comm_point_listen_for_rw(c, 0, 1); 2003 return 1; 2004 } else if(want == SSL_ERROR_SYSCALL) { 2005 #ifdef ECONNRESET 2006 if(errno == ECONNRESET && verbosity < 2) 2007 return 0; /* silence reset by peer */ 2008 #endif 2009 if(errno != 0) 2010 log_err("SSL_read syscall: %s", 2011 strerror(errno)); 2012 return 0; 2013 } 2014 log_crypto_err_io("could not SSL_read", want); 2015 return 0; 2016 } 2017 sldns_buffer_skip(c->buffer, (ssize_t)r); 2018 } 2019 if(sldns_buffer_remaining(c->buffer) <= 0) { 2020 tcp_callback_reader(c); 2021 } 2022 return 1; 2023 #else 2024 (void)c; 2025 return 0; 2026 #endif /* HAVE_SSL */ 2027 } 2028 2029 /** ssl write callback on TCP */ 2030 static int 2031 ssl_handle_write(struct comm_point* c) 2032 { 2033 #ifdef HAVE_SSL 2034 int r; 2035 if(c->ssl_shake_state != comm_ssl_shake_none) { 2036 if(!ssl_handshake(c)) 2037 return 0; 2038 if(c->ssl_shake_state != comm_ssl_shake_none) 2039 return 1; 2040 } 2041 /* ignore return, if fails we may simply block */ 2042 (void)SSL_set_mode(c->ssl, (long)SSL_MODE_ENABLE_PARTIAL_WRITE); 2043 if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) { 2044 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(c->buffer)); 2045 ERR_clear_error(); 2046 if(c->tcp_write_and_read) { 2047 if(c->tcp_write_pkt_len + 2 < LDNS_RR_BUF_SIZE) { 2048 /* combine the tcp length and the query for 2049 * write, this emulates writev */ 2050 uint8_t buf[LDNS_RR_BUF_SIZE]; 2051 memmove(buf, &len, sizeof(uint16_t)); 2052 memmove(buf+sizeof(uint16_t), 2053 c->tcp_write_pkt, 2054 c->tcp_write_pkt_len); 2055 r = SSL_write(c->ssl, 2056 (void*)(buf+c->tcp_write_byte_count), 2057 c->tcp_write_pkt_len + 2 - 2058 c->tcp_write_byte_count); 2059 } else { 2060 r = SSL_write(c->ssl, 2061 (void*)(((uint8_t*)&len)+c->tcp_write_byte_count), 2062 (int)(sizeof(uint16_t)-c->tcp_write_byte_count)); 2063 } 2064 } else if(sizeof(uint16_t)+sldns_buffer_remaining(c->buffer) < 2065 LDNS_RR_BUF_SIZE) { 2066 /* combine the tcp length and the query for write, 2067 * this emulates writev */ 2068 uint8_t buf[LDNS_RR_BUF_SIZE]; 2069 memmove(buf, &len, sizeof(uint16_t)); 2070 memmove(buf+sizeof(uint16_t), 2071 sldns_buffer_current(c->buffer), 2072 sldns_buffer_remaining(c->buffer)); 2073 r = SSL_write(c->ssl, (void*)(buf+c->tcp_byte_count), 2074 (int)(sizeof(uint16_t)+ 2075 sldns_buffer_remaining(c->buffer) 2076 - c->tcp_byte_count)); 2077 } else { 2078 r = SSL_write(c->ssl, 2079 (void*)(((uint8_t*)&len)+c->tcp_byte_count), 2080 (int)(sizeof(uint16_t)-c->tcp_byte_count)); 2081 } 2082 if(r <= 0) { 2083 int want = SSL_get_error(c->ssl, r); 2084 if(want == SSL_ERROR_ZERO_RETURN) { 2085 return 0; /* closed */ 2086 } else if(want == SSL_ERROR_WANT_READ) { 2087 c->ssl_shake_state = comm_ssl_shake_hs_read; 2088 comm_point_listen_for_rw(c, 1, 0); 2089 return 1; /* wait for read condition */ 2090 } else if(want == SSL_ERROR_WANT_WRITE) { 2091 #ifdef USE_WINSOCK 2092 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 2093 #endif 2094 return 1; /* write more later */ 2095 } else if(want == SSL_ERROR_SYSCALL) { 2096 #ifdef EPIPE 2097 if(errno == EPIPE && verbosity < 2) 2098 return 0; /* silence 'broken pipe' */ 2099 #endif 2100 if(errno != 0) 2101 log_err("SSL_write syscall: %s", 2102 strerror(errno)); 2103 return 0; 2104 } 2105 log_crypto_err_io("could not SSL_write", want); 2106 return 0; 2107 } 2108 if(c->tcp_write_and_read) { 2109 c->tcp_write_byte_count += r; 2110 if(c->tcp_write_byte_count < sizeof(uint16_t)) 2111 return 1; 2112 } else { 2113 c->tcp_byte_count += r; 2114 if(c->tcp_byte_count < sizeof(uint16_t)) 2115 return 1; 2116 sldns_buffer_set_position(c->buffer, c->tcp_byte_count - 2117 sizeof(uint16_t)); 2118 } 2119 if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 2120 tcp_callback_writer(c); 2121 return 1; 2122 } 2123 } 2124 log_assert(c->tcp_write_and_read || sldns_buffer_remaining(c->buffer) > 0); 2125 log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2); 2126 ERR_clear_error(); 2127 if(c->tcp_write_and_read) { 2128 r = SSL_write(c->ssl, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2), 2129 (int)(c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count)); 2130 } else { 2131 r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer), 2132 (int)sldns_buffer_remaining(c->buffer)); 2133 } 2134 if(r <= 0) { 2135 int want = SSL_get_error(c->ssl, r); 2136 if(want == SSL_ERROR_ZERO_RETURN) { 2137 return 0; /* closed */ 2138 } else if(want == SSL_ERROR_WANT_READ) { 2139 c->ssl_shake_state = comm_ssl_shake_hs_read; 2140 comm_point_listen_for_rw(c, 1, 0); 2141 return 1; /* wait for read condition */ 2142 } else if(want == SSL_ERROR_WANT_WRITE) { 2143 #ifdef USE_WINSOCK 2144 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 2145 #endif 2146 return 1; /* write more later */ 2147 } else if(want == SSL_ERROR_SYSCALL) { 2148 #ifdef EPIPE 2149 if(errno == EPIPE && verbosity < 2) 2150 return 0; /* silence 'broken pipe' */ 2151 #endif 2152 if(errno != 0) 2153 log_err("SSL_write syscall: %s", 2154 strerror(errno)); 2155 return 0; 2156 } 2157 log_crypto_err_io("could not SSL_write", want); 2158 return 0; 2159 } 2160 if(c->tcp_write_and_read) { 2161 c->tcp_write_byte_count += r; 2162 } else { 2163 sldns_buffer_skip(c->buffer, (ssize_t)r); 2164 } 2165 2166 if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 2167 tcp_callback_writer(c); 2168 } 2169 return 1; 2170 #else 2171 (void)c; 2172 return 0; 2173 #endif /* HAVE_SSL */ 2174 } 2175 2176 /** handle ssl tcp connection with dns contents */ 2177 static int 2178 ssl_handle_it(struct comm_point* c, int is_write) 2179 { 2180 /* handle case where renegotiation wants read during write call 2181 * or write during read calls */ 2182 if(is_write && c->ssl_shake_state == comm_ssl_shake_hs_write) 2183 return ssl_handle_read(c); 2184 else if(!is_write && c->ssl_shake_state == comm_ssl_shake_hs_read) 2185 return ssl_handle_write(c); 2186 /* handle read events for read operation and write events for a 2187 * write operation */ 2188 else if(!is_write) 2189 return ssl_handle_read(c); 2190 return ssl_handle_write(c); 2191 } 2192 2193 /** 2194 * Handle tcp reading callback. 2195 * @param fd: file descriptor of socket. 2196 * @param c: comm point to read from into buffer. 2197 * @param short_ok: if true, very short packets are OK (for comm_local). 2198 * @return: 0 on error 2199 */ 2200 static int 2201 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok) 2202 { 2203 ssize_t r; 2204 int recv_initial = 0; 2205 log_assert(c->type == comm_tcp || c->type == comm_local); 2206 if(c->ssl) 2207 return ssl_handle_it(c, 0); 2208 if(!c->tcp_is_reading && !c->tcp_write_and_read) 2209 return 0; 2210 2211 log_assert(fd != -1); 2212 if(c->pp2_enabled && c->pp2_header_state != pp2_header_done) { 2213 struct pp2_header* header = NULL; 2214 size_t want_read_size = 0; 2215 size_t current_read_size = 0; 2216 if(c->pp2_header_state == pp2_header_none) { 2217 want_read_size = PP2_HEADER_SIZE; 2218 if(sldns_buffer_remaining(c->buffer)<want_read_size) { 2219 log_err_addr("proxy_protocol: not enough " 2220 "buffer size to read PROXYv2 header", "", 2221 &c->repinfo.remote_addr, 2222 c->repinfo.remote_addrlen); 2223 return 0; 2224 } 2225 verbose(VERB_ALGO, "proxy_protocol: reading fixed " 2226 "part of PROXYv2 header (len %lu)", 2227 (unsigned long)want_read_size); 2228 current_read_size = want_read_size; 2229 if(c->tcp_byte_count < current_read_size) { 2230 r = recv(fd, (void*)sldns_buffer_at(c->buffer, 2231 c->tcp_byte_count), 2232 current_read_size-c->tcp_byte_count, MSG_DONTWAIT); 2233 if(r == 0) { 2234 if(c->tcp_req_info) 2235 return tcp_req_info_handle_read_close(c->tcp_req_info); 2236 return 0; 2237 } else if(r == -1) { 2238 goto recv_error_initial; 2239 } 2240 c->tcp_byte_count += r; 2241 sldns_buffer_skip(c->buffer, r); 2242 if(c->tcp_byte_count != current_read_size) return 1; 2243 c->pp2_header_state = pp2_header_init; 2244 } 2245 } 2246 if(c->pp2_header_state == pp2_header_init) { 2247 int err; 2248 err = pp2_read_header( 2249 sldns_buffer_begin(c->buffer), 2250 sldns_buffer_limit(c->buffer)); 2251 if(err) { 2252 log_err("proxy_protocol: could not parse " 2253 "PROXYv2 header (%s)", 2254 pp_lookup_error(err)); 2255 return 0; 2256 } 2257 header = (struct pp2_header*)sldns_buffer_begin(c->buffer); 2258 want_read_size = ntohs(header->len); 2259 if(sldns_buffer_limit(c->buffer) < 2260 PP2_HEADER_SIZE + want_read_size) { 2261 log_err_addr("proxy_protocol: not enough " 2262 "buffer size to read PROXYv2 header", "", 2263 &c->repinfo.remote_addr, 2264 c->repinfo.remote_addrlen); 2265 return 0; 2266 } 2267 verbose(VERB_ALGO, "proxy_protocol: reading variable " 2268 "part of PROXYv2 header (len %lu)", 2269 (unsigned long)want_read_size); 2270 current_read_size = PP2_HEADER_SIZE + want_read_size; 2271 if(want_read_size == 0) { 2272 /* nothing more to read; header is complete */ 2273 c->pp2_header_state = pp2_header_done; 2274 } else if(c->tcp_byte_count < current_read_size) { 2275 r = recv(fd, (void*)sldns_buffer_at(c->buffer, 2276 c->tcp_byte_count), 2277 current_read_size-c->tcp_byte_count, MSG_DONTWAIT); 2278 if(r == 0) { 2279 if(c->tcp_req_info) 2280 return tcp_req_info_handle_read_close(c->tcp_req_info); 2281 return 0; 2282 } else if(r == -1) { 2283 goto recv_error; 2284 } 2285 c->tcp_byte_count += r; 2286 sldns_buffer_skip(c->buffer, r); 2287 if(c->tcp_byte_count != current_read_size) return 1; 2288 c->pp2_header_state = pp2_header_done; 2289 } 2290 } 2291 if(c->pp2_header_state != pp2_header_done || !header) { 2292 log_err_addr("proxy_protocol: wrong state for the " 2293 "PROXYv2 header", "", &c->repinfo.remote_addr, 2294 c->repinfo.remote_addrlen); 2295 return 0; 2296 } 2297 sldns_buffer_flip(c->buffer); 2298 if(!consume_pp2_header(c->buffer, &c->repinfo, 1)) { 2299 log_err_addr("proxy_protocol: could not consume " 2300 "PROXYv2 header", "", &c->repinfo.remote_addr, 2301 c->repinfo.remote_addrlen); 2302 return 0; 2303 } 2304 verbose(VERB_ALGO, "proxy_protocol: successful read of " 2305 "PROXYv2 header"); 2306 /* Clear and reset the buffer to read the following 2307 * DNS packet(s). */ 2308 sldns_buffer_clear(c->buffer); 2309 c->tcp_byte_count = 0; 2310 return 1; 2311 } 2312 2313 if(c->tcp_byte_count < sizeof(uint16_t)) { 2314 /* read length bytes */ 2315 r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count), 2316 sizeof(uint16_t)-c->tcp_byte_count, MSG_DONTWAIT); 2317 if(r == 0) { 2318 if(c->tcp_req_info) 2319 return tcp_req_info_handle_read_close(c->tcp_req_info); 2320 return 0; 2321 } else if(r == -1) { 2322 if(c->pp2_enabled) goto recv_error; 2323 goto recv_error_initial; 2324 } 2325 c->tcp_byte_count += r; 2326 if(c->tcp_byte_count != sizeof(uint16_t)) 2327 return 1; 2328 if(sldns_buffer_read_u16_at(c->buffer, 0) > 2329 sldns_buffer_capacity(c->buffer)) { 2330 verbose(VERB_QUERY, "tcp: dropped larger than buffer"); 2331 return 0; 2332 } 2333 sldns_buffer_set_limit(c->buffer, 2334 sldns_buffer_read_u16_at(c->buffer, 0)); 2335 if(!short_ok && 2336 sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) { 2337 verbose(VERB_QUERY, "tcp: dropped bogus too short."); 2338 return 0; 2339 } 2340 verbose(VERB_ALGO, "Reading tcp query of length %d", 2341 (int)sldns_buffer_limit(c->buffer)); 2342 } 2343 2344 if(sldns_buffer_remaining(c->buffer) == 0) 2345 log_err("in comm_point_tcp_handle_read buffer_remaining is " 2346 "not > 0 as expected, continuing with (harmless) 0 " 2347 "length recv"); 2348 r = recv(fd, (void*)sldns_buffer_current(c->buffer), 2349 sldns_buffer_remaining(c->buffer), MSG_DONTWAIT); 2350 if(r == 0) { 2351 if(c->tcp_req_info) 2352 return tcp_req_info_handle_read_close(c->tcp_req_info); 2353 return 0; 2354 } else if(r == -1) { 2355 goto recv_error; 2356 } 2357 sldns_buffer_skip(c->buffer, r); 2358 if(sldns_buffer_remaining(c->buffer) <= 0) { 2359 tcp_callback_reader(c); 2360 } 2361 return 1; 2362 2363 recv_error_initial: 2364 recv_initial = 1; 2365 recv_error: 2366 #ifndef USE_WINSOCK 2367 if(errno == EINTR || errno == EAGAIN) 2368 return 1; 2369 #ifdef ECONNRESET 2370 if(errno == ECONNRESET && verbosity < 2) 2371 return 0; /* silence reset by peer */ 2372 #endif 2373 if(recv_initial) { 2374 #ifdef ECONNREFUSED 2375 if(errno == ECONNREFUSED && verbosity < 2) 2376 return 0; /* silence reset by peer */ 2377 #endif 2378 #ifdef ENETUNREACH 2379 if(errno == ENETUNREACH && verbosity < 2) 2380 return 0; /* silence it */ 2381 #endif 2382 #ifdef EHOSTDOWN 2383 if(errno == EHOSTDOWN && verbosity < 2) 2384 return 0; /* silence it */ 2385 #endif 2386 #ifdef EHOSTUNREACH 2387 if(errno == EHOSTUNREACH && verbosity < 2) 2388 return 0; /* silence it */ 2389 #endif 2390 #ifdef ENETDOWN 2391 if(errno == ENETDOWN && verbosity < 2) 2392 return 0; /* silence it */ 2393 #endif 2394 #ifdef EACCES 2395 if(errno == EACCES && verbosity < 2) 2396 return 0; /* silence it */ 2397 #endif 2398 #ifdef ENOTCONN 2399 if(errno == ENOTCONN) { 2400 log_err_addr("read (in tcp initial) failed and this " 2401 "could be because TCP Fast Open is " 2402 "enabled [--disable-tfo-client " 2403 "--disable-tfo-server] but does not " 2404 "work", sock_strerror(errno), 2405 &c->repinfo.remote_addr, 2406 c->repinfo.remote_addrlen); 2407 return 0; 2408 } 2409 #endif 2410 } 2411 #else /* USE_WINSOCK */ 2412 if(recv_initial) { 2413 if(WSAGetLastError() == WSAECONNREFUSED && verbosity < 2) 2414 return 0; 2415 if(WSAGetLastError() == WSAEHOSTDOWN && verbosity < 2) 2416 return 0; 2417 if(WSAGetLastError() == WSAEHOSTUNREACH && verbosity < 2) 2418 return 0; 2419 if(WSAGetLastError() == WSAENETDOWN && verbosity < 2) 2420 return 0; 2421 if(WSAGetLastError() == WSAENETUNREACH && verbosity < 2) 2422 return 0; 2423 } 2424 if(WSAGetLastError() == WSAECONNRESET) 2425 return 0; 2426 if(WSAGetLastError() == WSAEINPROGRESS) 2427 return 1; 2428 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2429 ub_winsock_tcp_wouldblock(c->ev->ev, 2430 UB_EV_READ); 2431 return 1; 2432 } 2433 #endif 2434 log_err_addr((recv_initial?"read (in tcp initial)":"read (in tcp)"), 2435 sock_strerror(errno), &c->repinfo.remote_addr, 2436 c->repinfo.remote_addrlen); 2437 return 0; 2438 } 2439 2440 /** 2441 * Handle tcp writing callback. 2442 * @param fd: file descriptor of socket. 2443 * @param c: comm point to write buffer out of. 2444 * @return: 0 on error 2445 */ 2446 static int 2447 comm_point_tcp_handle_write(int fd, struct comm_point* c) 2448 { 2449 ssize_t r; 2450 struct sldns_buffer *buffer; 2451 log_assert(c->type == comm_tcp); 2452 #ifdef USE_DNSCRYPT 2453 buffer = c->dnscrypt_buffer; 2454 #else 2455 buffer = c->buffer; 2456 #endif 2457 if(c->tcp_is_reading && !c->ssl && !c->tcp_write_and_read) 2458 return 0; 2459 log_assert(fd != -1); 2460 if(((!c->tcp_write_and_read && c->tcp_byte_count == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == 0)) && c->tcp_check_nb_connect) { 2461 /* check for pending error from nonblocking connect */ 2462 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/ 2463 int error = 0; 2464 socklen_t len = (socklen_t)sizeof(error); 2465 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 2466 &len) < 0){ 2467 #ifndef USE_WINSOCK 2468 error = errno; /* on solaris errno is error */ 2469 #else /* USE_WINSOCK */ 2470 error = WSAGetLastError(); 2471 #endif 2472 } 2473 #ifndef USE_WINSOCK 2474 #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 2475 if(error == EINPROGRESS || error == EWOULDBLOCK) 2476 return 1; /* try again later */ 2477 else 2478 #endif 2479 if(error != 0 && verbosity < 2) 2480 return 0; /* silence lots of chatter in the logs */ 2481 else if(error != 0) { 2482 log_err_addr("tcp connect", strerror(error), 2483 &c->repinfo.remote_addr, 2484 c->repinfo.remote_addrlen); 2485 #else /* USE_WINSOCK */ 2486 /* examine error */ 2487 if(error == WSAEINPROGRESS) 2488 return 1; 2489 else if(error == WSAEWOULDBLOCK) { 2490 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 2491 return 1; 2492 } else if(error != 0 && verbosity < 2) 2493 return 0; 2494 else if(error != 0) { 2495 log_err_addr("tcp connect", wsa_strerror(error), 2496 &c->repinfo.remote_addr, 2497 c->repinfo.remote_addrlen); 2498 #endif /* USE_WINSOCK */ 2499 return 0; 2500 } 2501 } 2502 if(c->ssl) 2503 return ssl_handle_it(c, 1); 2504 2505 #ifdef USE_MSG_FASTOPEN 2506 /* Only try this on first use of a connection that uses tfo, 2507 otherwise fall through to normal write */ 2508 /* Also, TFO support on WINDOWS not implemented at the moment */ 2509 if(c->tcp_do_fastopen == 1) { 2510 /* this form of sendmsg() does both a connect() and send() so need to 2511 look for various flavours of error*/ 2512 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer)); 2513 struct msghdr msg; 2514 struct iovec iov[2]; 2515 c->tcp_do_fastopen = 0; 2516 memset(&msg, 0, sizeof(msg)); 2517 if(c->tcp_write_and_read) { 2518 iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count; 2519 iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count; 2520 iov[1].iov_base = c->tcp_write_pkt; 2521 iov[1].iov_len = c->tcp_write_pkt_len; 2522 } else { 2523 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count; 2524 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count; 2525 iov[1].iov_base = sldns_buffer_begin(buffer); 2526 iov[1].iov_len = sldns_buffer_limit(buffer); 2527 } 2528 log_assert(iov[0].iov_len > 0); 2529 msg.msg_name = &c->repinfo.remote_addr; 2530 msg.msg_namelen = c->repinfo.remote_addrlen; 2531 msg.msg_iov = iov; 2532 msg.msg_iovlen = 2; 2533 r = sendmsg(fd, &msg, MSG_FASTOPEN); 2534 if (r == -1) { 2535 #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 2536 /* Handshake is underway, maybe because no TFO cookie available. 2537 Come back to write the message*/ 2538 if(errno == EINPROGRESS || errno == EWOULDBLOCK) 2539 return 1; 2540 #endif 2541 if(errno == EINTR || errno == EAGAIN) 2542 return 1; 2543 /* Not handling EISCONN here as shouldn't ever hit that case.*/ 2544 if(errno != EPIPE 2545 #ifdef EOPNOTSUPP 2546 /* if /proc/sys/net/ipv4/tcp_fastopen is 2547 * disabled on Linux, sendmsg may return 2548 * 'Operation not supported', if so 2549 * fallthrough to ordinary connect. */ 2550 && errno != EOPNOTSUPP 2551 #endif 2552 && errno != 0) { 2553 if(verbosity < 2) 2554 return 0; /* silence lots of chatter in the logs */ 2555 log_err_addr("tcp sendmsg", strerror(errno), 2556 &c->repinfo.remote_addr, 2557 c->repinfo.remote_addrlen); 2558 return 0; 2559 } 2560 verbose(VERB_ALGO, "tcp sendmsg for fastopen failed (with %s), try normal connect", strerror(errno)); 2561 /* fallthrough to nonFASTOPEN 2562 * (MSG_FASTOPEN on Linux 3 produces EPIPE) 2563 * we need to perform connect() */ 2564 if(connect(fd, (struct sockaddr *)&c->repinfo.remote_addr, 2565 c->repinfo.remote_addrlen) == -1) { 2566 #ifdef EINPROGRESS 2567 if(errno == EINPROGRESS) 2568 return 1; /* wait until connect done*/ 2569 #endif 2570 #ifdef USE_WINSOCK 2571 if(WSAGetLastError() == WSAEINPROGRESS || 2572 WSAGetLastError() == WSAEWOULDBLOCK) 2573 return 1; /* wait until connect done*/ 2574 #endif 2575 if(tcp_connect_errno_needs_log( 2576 (struct sockaddr *)&c->repinfo.remote_addr, 2577 c->repinfo.remote_addrlen)) { 2578 log_err_addr("outgoing tcp: connect after EPIPE for fastopen", 2579 strerror(errno), 2580 &c->repinfo.remote_addr, 2581 c->repinfo.remote_addrlen); 2582 } 2583 return 0; 2584 } 2585 2586 } else { 2587 if(c->tcp_write_and_read) { 2588 c->tcp_write_byte_count += r; 2589 if(c->tcp_write_byte_count < sizeof(uint16_t)) 2590 return 1; 2591 } else { 2592 c->tcp_byte_count += r; 2593 if(c->tcp_byte_count < sizeof(uint16_t)) 2594 return 1; 2595 sldns_buffer_set_position(buffer, c->tcp_byte_count - 2596 sizeof(uint16_t)); 2597 } 2598 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 2599 tcp_callback_writer(c); 2600 return 1; 2601 } 2602 } 2603 } 2604 #endif /* USE_MSG_FASTOPEN */ 2605 2606 if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) { 2607 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer)); 2608 #ifdef HAVE_WRITEV 2609 struct iovec iov[2]; 2610 if(c->tcp_write_and_read) { 2611 iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count; 2612 iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count; 2613 iov[1].iov_base = c->tcp_write_pkt; 2614 iov[1].iov_len = c->tcp_write_pkt_len; 2615 } else { 2616 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count; 2617 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count; 2618 iov[1].iov_base = sldns_buffer_begin(buffer); 2619 iov[1].iov_len = sldns_buffer_limit(buffer); 2620 } 2621 log_assert(iov[0].iov_len > 0); 2622 r = writev(fd, iov, 2); 2623 #else /* HAVE_WRITEV */ 2624 if(c->tcp_write_and_read) { 2625 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_write_byte_count), 2626 sizeof(uint16_t)-c->tcp_write_byte_count, 0); 2627 } else { 2628 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count), 2629 sizeof(uint16_t)-c->tcp_byte_count, 0); 2630 } 2631 #endif /* HAVE_WRITEV */ 2632 if(r == -1) { 2633 #ifndef USE_WINSOCK 2634 # ifdef EPIPE 2635 if(errno == EPIPE && verbosity < 2) 2636 return 0; /* silence 'broken pipe' */ 2637 #endif 2638 if(errno == EINTR || errno == EAGAIN) 2639 return 1; 2640 #ifdef ECONNRESET 2641 if(errno == ECONNRESET && verbosity < 2) 2642 return 0; /* silence reset by peer */ 2643 #endif 2644 # ifdef HAVE_WRITEV 2645 log_err_addr("tcp writev", strerror(errno), 2646 &c->repinfo.remote_addr, 2647 c->repinfo.remote_addrlen); 2648 # else /* HAVE_WRITEV */ 2649 log_err_addr("tcp send s", strerror(errno), 2650 &c->repinfo.remote_addr, 2651 c->repinfo.remote_addrlen); 2652 # endif /* HAVE_WRITEV */ 2653 #else 2654 if(WSAGetLastError() == WSAENOTCONN) 2655 return 1; 2656 if(WSAGetLastError() == WSAEINPROGRESS) 2657 return 1; 2658 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2659 ub_winsock_tcp_wouldblock(c->ev->ev, 2660 UB_EV_WRITE); 2661 return 1; 2662 } 2663 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 2664 return 0; /* silence reset by peer */ 2665 log_err_addr("tcp send s", 2666 wsa_strerror(WSAGetLastError()), 2667 &c->repinfo.remote_addr, 2668 c->repinfo.remote_addrlen); 2669 #endif 2670 return 0; 2671 } 2672 if(c->tcp_write_and_read) { 2673 c->tcp_write_byte_count += r; 2674 if(c->tcp_write_byte_count < sizeof(uint16_t)) 2675 return 1; 2676 } else { 2677 c->tcp_byte_count += r; 2678 if(c->tcp_byte_count < sizeof(uint16_t)) 2679 return 1; 2680 sldns_buffer_set_position(buffer, c->tcp_byte_count - 2681 sizeof(uint16_t)); 2682 } 2683 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 2684 tcp_callback_writer(c); 2685 return 1; 2686 } 2687 } 2688 log_assert(c->tcp_write_and_read || sldns_buffer_remaining(buffer) > 0); 2689 log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2); 2690 if(c->tcp_write_and_read) { 2691 r = send(fd, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2), 2692 c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count, 0); 2693 } else { 2694 r = send(fd, (void*)sldns_buffer_current(buffer), 2695 sldns_buffer_remaining(buffer), 0); 2696 } 2697 if(r == -1) { 2698 #ifndef USE_WINSOCK 2699 if(errno == EINTR || errno == EAGAIN) 2700 return 1; 2701 #ifdef ECONNRESET 2702 if(errno == ECONNRESET && verbosity < 2) 2703 return 0; /* silence reset by peer */ 2704 #endif 2705 #else 2706 if(WSAGetLastError() == WSAEINPROGRESS) 2707 return 1; 2708 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2709 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 2710 return 1; 2711 } 2712 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 2713 return 0; /* silence reset by peer */ 2714 #endif 2715 log_err_addr("tcp send r", sock_strerror(errno), 2716 &c->repinfo.remote_addr, 2717 c->repinfo.remote_addrlen); 2718 return 0; 2719 } 2720 if(c->tcp_write_and_read) { 2721 c->tcp_write_byte_count += r; 2722 } else { 2723 sldns_buffer_skip(buffer, r); 2724 } 2725 2726 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 2727 tcp_callback_writer(c); 2728 } 2729 2730 return 1; 2731 } 2732 2733 /** read again to drain buffers when there could be more to read, returns 0 2734 * on failure which means the comm point is closed. */ 2735 static int 2736 tcp_req_info_read_again(int fd, struct comm_point* c) 2737 { 2738 while(c->tcp_req_info->read_again) { 2739 int r; 2740 c->tcp_req_info->read_again = 0; 2741 if(c->tcp_is_reading) 2742 r = comm_point_tcp_handle_read(fd, c, 0); 2743 else r = comm_point_tcp_handle_write(fd, c); 2744 if(!r) { 2745 reclaim_tcp_handler(c); 2746 if(!c->tcp_do_close) { 2747 fptr_ok(fptr_whitelist_comm_point( 2748 c->callback)); 2749 (void)(*c->callback)(c, c->cb_arg, 2750 NETEVENT_CLOSED, NULL); 2751 } 2752 return 0; 2753 } 2754 } 2755 return 1; 2756 } 2757 2758 /** read again to drain buffers when there could be more to read */ 2759 static void 2760 tcp_more_read_again(int fd, struct comm_point* c) 2761 { 2762 /* if the packet is done, but another one could be waiting on 2763 * the connection, the callback signals this, and we try again */ 2764 /* this continues until the read routines get EAGAIN or so, 2765 * and thus does not call the callback, and the bool is 0 */ 2766 int* moreread = c->tcp_more_read_again; 2767 while(moreread && *moreread) { 2768 *moreread = 0; 2769 if(!comm_point_tcp_handle_read(fd, c, 0)) { 2770 reclaim_tcp_handler(c); 2771 if(!c->tcp_do_close) { 2772 fptr_ok(fptr_whitelist_comm_point( 2773 c->callback)); 2774 (void)(*c->callback)(c, c->cb_arg, 2775 NETEVENT_CLOSED, NULL); 2776 } 2777 return; 2778 } 2779 } 2780 } 2781 2782 /** write again to fill up when there could be more to write */ 2783 static void 2784 tcp_more_write_again(int fd, struct comm_point* c) 2785 { 2786 /* if the packet is done, but another is waiting to be written, 2787 * the callback signals it and we try again. */ 2788 /* this continues until the write routines get EAGAIN or so, 2789 * and thus does not call the callback, and the bool is 0 */ 2790 int* morewrite = c->tcp_more_write_again; 2791 while(morewrite && *morewrite) { 2792 *morewrite = 0; 2793 if(!comm_point_tcp_handle_write(fd, c)) { 2794 reclaim_tcp_handler(c); 2795 if(!c->tcp_do_close) { 2796 fptr_ok(fptr_whitelist_comm_point( 2797 c->callback)); 2798 (void)(*c->callback)(c, c->cb_arg, 2799 NETEVENT_CLOSED, NULL); 2800 } 2801 return; 2802 } 2803 } 2804 } 2805 2806 void 2807 comm_point_tcp_handle_callback(int fd, short event, void* arg) 2808 { 2809 struct comm_point* c = (struct comm_point*)arg; 2810 log_assert(c->type == comm_tcp); 2811 ub_comm_base_now(c->ev->base); 2812 2813 if(c->fd == -1 || c->fd != fd) 2814 return; /* duplicate event, but commpoint closed. */ 2815 2816 #ifdef USE_DNSCRYPT 2817 /* Initialize if this is a dnscrypt socket */ 2818 if(c->tcp_parent) { 2819 c->dnscrypt = c->tcp_parent->dnscrypt; 2820 } 2821 if(c->dnscrypt && c->dnscrypt_buffer == c->buffer) { 2822 c->dnscrypt_buffer = sldns_buffer_new(sldns_buffer_capacity(c->buffer)); 2823 if(!c->dnscrypt_buffer) { 2824 log_err("Could not allocate dnscrypt buffer"); 2825 reclaim_tcp_handler(c); 2826 if(!c->tcp_do_close) { 2827 fptr_ok(fptr_whitelist_comm_point( 2828 c->callback)); 2829 (void)(*c->callback)(c, c->cb_arg, 2830 NETEVENT_CLOSED, NULL); 2831 } 2832 return; 2833 } 2834 } 2835 #endif 2836 2837 if(event&UB_EV_TIMEOUT) { 2838 verbose(VERB_QUERY, "tcp took too long, dropped"); 2839 reclaim_tcp_handler(c); 2840 if(!c->tcp_do_close) { 2841 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2842 (void)(*c->callback)(c, c->cb_arg, 2843 NETEVENT_TIMEOUT, NULL); 2844 } 2845 return; 2846 } 2847 if(event&UB_EV_READ 2848 #ifdef USE_MSG_FASTOPEN 2849 && !(c->tcp_do_fastopen && (event&UB_EV_WRITE)) 2850 #endif 2851 ) { 2852 int has_tcpq = (c->tcp_req_info != NULL); 2853 int* moreread = c->tcp_more_read_again; 2854 if(!comm_point_tcp_handle_read(fd, c, 0)) { 2855 reclaim_tcp_handler(c); 2856 if(!c->tcp_do_close) { 2857 fptr_ok(fptr_whitelist_comm_point( 2858 c->callback)); 2859 (void)(*c->callback)(c, c->cb_arg, 2860 NETEVENT_CLOSED, NULL); 2861 } 2862 return; 2863 } 2864 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) { 2865 if(!tcp_req_info_read_again(fd, c)) 2866 return; 2867 } 2868 if(moreread && *moreread) 2869 tcp_more_read_again(fd, c); 2870 return; 2871 } 2872 if(event&UB_EV_WRITE) { 2873 int has_tcpq = (c->tcp_req_info != NULL); 2874 int* morewrite = c->tcp_more_write_again; 2875 if(!comm_point_tcp_handle_write(fd, c)) { 2876 reclaim_tcp_handler(c); 2877 if(!c->tcp_do_close) { 2878 fptr_ok(fptr_whitelist_comm_point( 2879 c->callback)); 2880 (void)(*c->callback)(c, c->cb_arg, 2881 NETEVENT_CLOSED, NULL); 2882 } 2883 return; 2884 } 2885 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) { 2886 if(!tcp_req_info_read_again(fd, c)) 2887 return; 2888 } 2889 if(morewrite && *morewrite) 2890 tcp_more_write_again(fd, c); 2891 return; 2892 } 2893 log_err("Ignored event %d for tcphdl.", event); 2894 } 2895 2896 /** Make http handler free for next assignment */ 2897 static void 2898 reclaim_http_handler(struct comm_point* c) 2899 { 2900 log_assert(c->type == comm_http); 2901 if(c->ssl) { 2902 #ifdef HAVE_SSL 2903 SSL_shutdown(c->ssl); 2904 SSL_free(c->ssl); 2905 c->ssl = NULL; 2906 #endif 2907 } 2908 comm_point_close(c); 2909 if(c->tcp_parent) { 2910 if(c != c->tcp_parent->tcp_free) { 2911 c->tcp_parent->cur_tcp_count--; 2912 c->tcp_free = c->tcp_parent->tcp_free; 2913 c->tcp_parent->tcp_free = c; 2914 } 2915 if(!c->tcp_free) { 2916 /* re-enable listening on accept socket */ 2917 comm_point_start_listening(c->tcp_parent, -1, -1); 2918 } 2919 } 2920 } 2921 2922 /** read more data for http (with ssl) */ 2923 static int 2924 ssl_http_read_more(struct comm_point* c) 2925 { 2926 #ifdef HAVE_SSL 2927 int r; 2928 log_assert(sldns_buffer_remaining(c->buffer) > 0); 2929 ERR_clear_error(); 2930 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer), 2931 (int)sldns_buffer_remaining(c->buffer)); 2932 if(r <= 0) { 2933 int want = SSL_get_error(c->ssl, r); 2934 if(want == SSL_ERROR_ZERO_RETURN) { 2935 return 0; /* shutdown, closed */ 2936 } else if(want == SSL_ERROR_WANT_READ) { 2937 return 1; /* read more later */ 2938 } else if(want == SSL_ERROR_WANT_WRITE) { 2939 c->ssl_shake_state = comm_ssl_shake_hs_write; 2940 comm_point_listen_for_rw(c, 0, 1); 2941 return 1; 2942 } else if(want == SSL_ERROR_SYSCALL) { 2943 #ifdef ECONNRESET 2944 if(errno == ECONNRESET && verbosity < 2) 2945 return 0; /* silence reset by peer */ 2946 #endif 2947 if(errno != 0) 2948 log_err("SSL_read syscall: %s", 2949 strerror(errno)); 2950 return 0; 2951 } 2952 log_crypto_err_io("could not SSL_read", want); 2953 return 0; 2954 } 2955 verbose(VERB_ALGO, "ssl http read more skip to %d + %d", 2956 (int)sldns_buffer_position(c->buffer), (int)r); 2957 sldns_buffer_skip(c->buffer, (ssize_t)r); 2958 return 1; 2959 #else 2960 (void)c; 2961 return 0; 2962 #endif /* HAVE_SSL */ 2963 } 2964 2965 /** read more data for http */ 2966 static int 2967 http_read_more(int fd, struct comm_point* c) 2968 { 2969 ssize_t r; 2970 log_assert(sldns_buffer_remaining(c->buffer) > 0); 2971 r = recv(fd, (void*)sldns_buffer_current(c->buffer), 2972 sldns_buffer_remaining(c->buffer), MSG_DONTWAIT); 2973 if(r == 0) { 2974 return 0; 2975 } else if(r == -1) { 2976 #ifndef USE_WINSOCK 2977 if(errno == EINTR || errno == EAGAIN) 2978 return 1; 2979 #else /* USE_WINSOCK */ 2980 if(WSAGetLastError() == WSAECONNRESET) 2981 return 0; 2982 if(WSAGetLastError() == WSAEINPROGRESS) 2983 return 1; 2984 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2985 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 2986 return 1; 2987 } 2988 #endif 2989 log_err_addr("read (in http r)", sock_strerror(errno), 2990 &c->repinfo.remote_addr, c->repinfo.remote_addrlen); 2991 return 0; 2992 } 2993 verbose(VERB_ALGO, "http read more skip to %d + %d", 2994 (int)sldns_buffer_position(c->buffer), (int)r); 2995 sldns_buffer_skip(c->buffer, r); 2996 return 1; 2997 } 2998 2999 /** return true if http header has been read (one line complete) */ 3000 static int 3001 http_header_done(sldns_buffer* buf) 3002 { 3003 size_t i; 3004 for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) { 3005 /* there was a \r before the \n, but we ignore that */ 3006 if((char)sldns_buffer_read_u8_at(buf, i) == '\n') 3007 return 1; 3008 } 3009 return 0; 3010 } 3011 3012 /** return character string into buffer for header line, moves buffer 3013 * past that line and puts zero terminator into linefeed-newline */ 3014 static char* 3015 http_header_line(sldns_buffer* buf) 3016 { 3017 char* result = (char*)sldns_buffer_current(buf); 3018 size_t i; 3019 for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) { 3020 /* terminate the string on the \r */ 3021 if((char)sldns_buffer_read_u8_at(buf, i) == '\r') 3022 sldns_buffer_write_u8_at(buf, i, 0); 3023 /* terminate on the \n and skip past the it and done */ 3024 if((char)sldns_buffer_read_u8_at(buf, i) == '\n') { 3025 sldns_buffer_write_u8_at(buf, i, 0); 3026 sldns_buffer_set_position(buf, i+1); 3027 return result; 3028 } 3029 } 3030 return NULL; 3031 } 3032 3033 /** move unread buffer to start and clear rest for putting the rest into it */ 3034 static void 3035 http_moveover_buffer(sldns_buffer* buf) 3036 { 3037 size_t pos = sldns_buffer_position(buf); 3038 size_t len = sldns_buffer_remaining(buf); 3039 sldns_buffer_clear(buf); 3040 memmove(sldns_buffer_begin(buf), sldns_buffer_at(buf, pos), len); 3041 sldns_buffer_set_position(buf, len); 3042 } 3043 3044 /** a http header is complete, process it */ 3045 static int 3046 http_process_initial_header(struct comm_point* c) 3047 { 3048 char* line = http_header_line(c->buffer); 3049 if(!line) return 1; 3050 verbose(VERB_ALGO, "http header: %s", line); 3051 if(strncasecmp(line, "HTTP/1.1 ", 9) == 0) { 3052 /* check returncode */ 3053 if(line[9] != '2') { 3054 verbose(VERB_ALGO, "http bad status %s", line+9); 3055 return 0; 3056 } 3057 } else if(strncasecmp(line, "Content-Length: ", 16) == 0) { 3058 if(!c->http_is_chunked) 3059 c->tcp_byte_count = (size_t)atoi(line+16); 3060 } else if(strncasecmp(line, "Transfer-Encoding: chunked", 19+7) == 0) { 3061 c->tcp_byte_count = 0; 3062 c->http_is_chunked = 1; 3063 } else if(line[0] == 0) { 3064 /* end of initial headers */ 3065 c->http_in_headers = 0; 3066 if(c->http_is_chunked) 3067 c->http_in_chunk_headers = 1; 3068 /* remove header text from front of buffer 3069 * the buffer is going to be used to return the data segment 3070 * itself and we don't want the header to get returned 3071 * prepended with it */ 3072 http_moveover_buffer(c->buffer); 3073 sldns_buffer_flip(c->buffer); 3074 return 1; 3075 } 3076 /* ignore other headers */ 3077 return 1; 3078 } 3079 3080 /** a chunk header is complete, process it, return 0=fail, 1=continue next 3081 * header line, 2=done with chunked transfer*/ 3082 static int 3083 http_process_chunk_header(struct comm_point* c) 3084 { 3085 char* line = http_header_line(c->buffer); 3086 if(!line) return 1; 3087 if(c->http_in_chunk_headers == 3) { 3088 verbose(VERB_ALGO, "http chunk trailer: %s", line); 3089 /* are we done ? */ 3090 if(line[0] == 0 && c->tcp_byte_count == 0) { 3091 /* callback of http reader when NETEVENT_DONE, 3092 * end of data, with no data in buffer */ 3093 sldns_buffer_set_position(c->buffer, 0); 3094 sldns_buffer_set_limit(c->buffer, 0); 3095 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3096 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL); 3097 /* return that we are done */ 3098 return 2; 3099 } 3100 if(line[0] == 0) { 3101 /* continue with header of the next chunk */ 3102 c->http_in_chunk_headers = 1; 3103 /* remove header text from front of buffer */ 3104 http_moveover_buffer(c->buffer); 3105 sldns_buffer_flip(c->buffer); 3106 return 1; 3107 } 3108 /* ignore further trail headers */ 3109 return 1; 3110 } 3111 verbose(VERB_ALGO, "http chunk header: %s", line); 3112 if(c->http_in_chunk_headers == 1) { 3113 /* read chunked start line */ 3114 char* end = NULL; 3115 c->tcp_byte_count = (size_t)strtol(line, &end, 16); 3116 if(end == line) 3117 return 0; 3118 c->http_in_chunk_headers = 0; 3119 /* remove header text from front of buffer */ 3120 http_moveover_buffer(c->buffer); 3121 sldns_buffer_flip(c->buffer); 3122 if(c->tcp_byte_count == 0) { 3123 /* done with chunks, process chunk_trailer lines */ 3124 c->http_in_chunk_headers = 3; 3125 } 3126 return 1; 3127 } 3128 /* ignore other headers */ 3129 return 1; 3130 } 3131 3132 /** handle nonchunked data segment, 0=fail, 1=wait */ 3133 static int 3134 http_nonchunk_segment(struct comm_point* c) 3135 { 3136 /* c->buffer at position..limit has new data we read in. 3137 * the buffer itself is full of nonchunked data. 3138 * we are looking to read tcp_byte_count more data 3139 * and then the transfer is done. */ 3140 size_t remainbufferlen; 3141 size_t got_now = sldns_buffer_limit(c->buffer); 3142 if(c->tcp_byte_count <= got_now) { 3143 /* done, this is the last data fragment */ 3144 c->http_stored = 0; 3145 sldns_buffer_set_position(c->buffer, 0); 3146 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3147 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL); 3148 return 1; 3149 } 3150 /* if we have the buffer space, 3151 * read more data collected into the buffer */ 3152 remainbufferlen = sldns_buffer_capacity(c->buffer) - 3153 sldns_buffer_limit(c->buffer); 3154 if(remainbufferlen+got_now >= c->tcp_byte_count || 3155 remainbufferlen >= (size_t)(c->ssl?16384:2048)) { 3156 size_t total = sldns_buffer_limit(c->buffer); 3157 sldns_buffer_clear(c->buffer); 3158 sldns_buffer_set_position(c->buffer, total); 3159 c->http_stored = total; 3160 /* return and wait to read more */ 3161 return 1; 3162 } 3163 /* call callback with this data amount, then 3164 * wait for more */ 3165 c->tcp_byte_count -= got_now; 3166 c->http_stored = 0; 3167 sldns_buffer_set_position(c->buffer, 0); 3168 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3169 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL); 3170 /* c->callback has to buffer_clear(c->buffer). */ 3171 /* return and wait to read more */ 3172 return 1; 3173 } 3174 3175 /** handle chunked data segment, return 0=fail, 1=wait, 2=process more */ 3176 static int 3177 http_chunked_segment(struct comm_point* c) 3178 { 3179 /* the c->buffer has from position..limit new data we read. */ 3180 /* the current chunk has length tcp_byte_count. 3181 * once we read that read more chunk headers. 3182 */ 3183 size_t remainbufferlen; 3184 size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored; 3185 verbose(VERB_ALGO, "http_chunked_segment: got now %d, tcpbytcount %d, http_stored %d, buffer pos %d, buffer limit %d", (int)got_now, (int)c->tcp_byte_count, (int)c->http_stored, (int)sldns_buffer_position(c->buffer), (int)sldns_buffer_limit(c->buffer)); 3186 if(c->tcp_byte_count <= got_now) { 3187 /* the chunk has completed (with perhaps some extra data 3188 * from next chunk header and next chunk) */ 3189 /* save too much info into temp buffer */ 3190 size_t fraglen; 3191 struct comm_reply repinfo; 3192 c->http_stored = 0; 3193 sldns_buffer_skip(c->buffer, (ssize_t)c->tcp_byte_count); 3194 sldns_buffer_clear(c->http_temp); 3195 sldns_buffer_write(c->http_temp, 3196 sldns_buffer_current(c->buffer), 3197 sldns_buffer_remaining(c->buffer)); 3198 sldns_buffer_flip(c->http_temp); 3199 3200 /* callback with this fragment */ 3201 fraglen = sldns_buffer_position(c->buffer); 3202 sldns_buffer_set_position(c->buffer, 0); 3203 sldns_buffer_set_limit(c->buffer, fraglen); 3204 repinfo = c->repinfo; 3205 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3206 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &repinfo); 3207 /* c->callback has to buffer_clear(). */ 3208 3209 /* is commpoint deleted? */ 3210 if(!repinfo.c) { 3211 return 1; 3212 } 3213 /* copy waiting info */ 3214 sldns_buffer_clear(c->buffer); 3215 sldns_buffer_write(c->buffer, 3216 sldns_buffer_begin(c->http_temp), 3217 sldns_buffer_remaining(c->http_temp)); 3218 sldns_buffer_flip(c->buffer); 3219 /* process end of chunk trailer header lines, until 3220 * an empty line */ 3221 c->http_in_chunk_headers = 3; 3222 /* process more data in buffer (if any) */ 3223 return 2; 3224 } 3225 c->tcp_byte_count -= got_now; 3226 3227 /* if we have the buffer space, 3228 * read more data collected into the buffer */ 3229 remainbufferlen = sldns_buffer_capacity(c->buffer) - 3230 sldns_buffer_limit(c->buffer); 3231 if(remainbufferlen >= c->tcp_byte_count || 3232 remainbufferlen >= 2048) { 3233 size_t total = sldns_buffer_limit(c->buffer); 3234 sldns_buffer_clear(c->buffer); 3235 sldns_buffer_set_position(c->buffer, total); 3236 c->http_stored = total; 3237 /* return and wait to read more */ 3238 return 1; 3239 } 3240 3241 /* callback of http reader for a new part of the data */ 3242 c->http_stored = 0; 3243 sldns_buffer_set_position(c->buffer, 0); 3244 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3245 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL); 3246 /* c->callback has to buffer_clear(c->buffer). */ 3247 /* return and wait to read more */ 3248 return 1; 3249 } 3250 3251 #ifdef HAVE_NGHTTP2 3252 /** Create new http2 session. Called when creating handling comm point. */ 3253 static struct http2_session* http2_session_create(struct comm_point* c) 3254 { 3255 struct http2_session* session = calloc(1, sizeof(*session)); 3256 if(!session) { 3257 log_err("malloc failure while creating http2 session"); 3258 return NULL; 3259 } 3260 session->c = c; 3261 3262 return session; 3263 } 3264 #endif 3265 3266 /** Delete http2 session. After closing connection or on error */ 3267 static void http2_session_delete(struct http2_session* h2_session) 3268 { 3269 #ifdef HAVE_NGHTTP2 3270 if(h2_session->callbacks) 3271 nghttp2_session_callbacks_del(h2_session->callbacks); 3272 free(h2_session); 3273 #else 3274 (void)h2_session; 3275 #endif 3276 } 3277 3278 #ifdef HAVE_NGHTTP2 3279 struct http2_stream* http2_stream_create(int32_t stream_id) 3280 { 3281 struct http2_stream* h2_stream = calloc(1, sizeof(*h2_stream)); 3282 if(!h2_stream) { 3283 log_err("malloc failure while creating http2 stream"); 3284 return NULL; 3285 } 3286 h2_stream->stream_id = stream_id; 3287 return h2_stream; 3288 } 3289 3290 /** Delete http2 stream. After session delete or stream close callback */ 3291 static void http2_stream_delete(struct http2_session* h2_session, 3292 struct http2_stream* h2_stream) 3293 { 3294 if(h2_stream->mesh_state) { 3295 mesh_state_remove_reply(h2_stream->mesh, h2_stream->mesh_state, 3296 h2_session->c); 3297 h2_stream->mesh_state = NULL; 3298 } 3299 http2_req_stream_clear(h2_stream); 3300 free(h2_stream); 3301 } 3302 #endif 3303 3304 void http2_stream_add_meshstate(struct http2_stream* h2_stream, 3305 struct mesh_area* mesh, struct mesh_state* m) 3306 { 3307 h2_stream->mesh = mesh; 3308 h2_stream->mesh_state = m; 3309 } 3310 3311 void http2_stream_remove_mesh_state(struct http2_stream* h2_stream) 3312 { 3313 if(!h2_stream) 3314 return; 3315 h2_stream->mesh_state = NULL; 3316 } 3317 3318 /** delete http2 session server. After closing connection. */ 3319 static void http2_session_server_delete(struct http2_session* h2_session) 3320 { 3321 #ifdef HAVE_NGHTTP2 3322 struct http2_stream* h2_stream, *next; 3323 nghttp2_session_del(h2_session->session); /* NULL input is fine */ 3324 h2_session->session = NULL; 3325 for(h2_stream = h2_session->first_stream; h2_stream;) { 3326 next = h2_stream->next; 3327 http2_stream_delete(h2_session, h2_stream); 3328 h2_stream = next; 3329 } 3330 h2_session->first_stream = NULL; 3331 h2_session->is_drop = 0; 3332 h2_session->postpone_drop = 0; 3333 h2_session->c->h2_stream = NULL; 3334 #endif 3335 (void)h2_session; 3336 } 3337 3338 #ifdef HAVE_NGHTTP2 3339 void http2_session_add_stream(struct http2_session* h2_session, 3340 struct http2_stream* h2_stream) 3341 { 3342 if(h2_session->first_stream) 3343 h2_session->first_stream->prev = h2_stream; 3344 h2_stream->next = h2_session->first_stream; 3345 h2_session->first_stream = h2_stream; 3346 } 3347 3348 /** remove stream from session linked list. After stream close callback or 3349 * closing connection */ 3350 static void http2_session_remove_stream(struct http2_session* h2_session, 3351 struct http2_stream* h2_stream) 3352 { 3353 if(h2_stream->prev) 3354 h2_stream->prev->next = h2_stream->next; 3355 else 3356 h2_session->first_stream = h2_stream->next; 3357 if(h2_stream->next) 3358 h2_stream->next->prev = h2_stream->prev; 3359 3360 } 3361 3362 int http2_stream_close_cb(nghttp2_session* ATTR_UNUSED(session), 3363 int32_t stream_id, uint32_t ATTR_UNUSED(error_code), void* cb_arg) 3364 { 3365 struct http2_stream* h2_stream; 3366 struct http2_session* h2_session = (struct http2_session*)cb_arg; 3367 if(!(h2_stream = nghttp2_session_get_stream_user_data( 3368 h2_session->session, stream_id))) { 3369 return 0; 3370 } 3371 http2_session_remove_stream(h2_session, h2_stream); 3372 http2_stream_delete(h2_session, h2_stream); 3373 return 0; 3374 } 3375 3376 ssize_t http2_recv_cb(nghttp2_session* ATTR_UNUSED(session), uint8_t* buf, 3377 size_t len, int ATTR_UNUSED(flags), void* cb_arg) 3378 { 3379 struct http2_session* h2_session = (struct http2_session*)cb_arg; 3380 ssize_t ret; 3381 3382 log_assert(h2_session->c->type == comm_http); 3383 log_assert(h2_session->c->h2_session); 3384 3385 #ifdef HAVE_SSL 3386 if(h2_session->c->ssl) { 3387 int r; 3388 ERR_clear_error(); 3389 r = SSL_read(h2_session->c->ssl, buf, len); 3390 if(r <= 0) { 3391 int want = SSL_get_error(h2_session->c->ssl, r); 3392 if(want == SSL_ERROR_ZERO_RETURN) { 3393 return NGHTTP2_ERR_EOF; 3394 } else if(want == SSL_ERROR_WANT_READ) { 3395 return NGHTTP2_ERR_WOULDBLOCK; 3396 } else if(want == SSL_ERROR_WANT_WRITE) { 3397 h2_session->c->ssl_shake_state = comm_ssl_shake_hs_write; 3398 comm_point_listen_for_rw(h2_session->c, 0, 1); 3399 return NGHTTP2_ERR_WOULDBLOCK; 3400 } else if(want == SSL_ERROR_SYSCALL) { 3401 #ifdef ECONNRESET 3402 if(errno == ECONNRESET && verbosity < 2) 3403 return NGHTTP2_ERR_CALLBACK_FAILURE; 3404 #endif 3405 if(errno != 0) 3406 log_err("SSL_read syscall: %s", 3407 strerror(errno)); 3408 return NGHTTP2_ERR_CALLBACK_FAILURE; 3409 } 3410 log_crypto_err_io("could not SSL_read", want); 3411 return NGHTTP2_ERR_CALLBACK_FAILURE; 3412 } 3413 return r; 3414 } 3415 #endif /* HAVE_SSL */ 3416 3417 ret = recv(h2_session->c->fd, buf, len, MSG_DONTWAIT); 3418 if(ret == 0) { 3419 return NGHTTP2_ERR_EOF; 3420 } else if(ret < 0) { 3421 #ifndef USE_WINSOCK 3422 if(errno == EINTR || errno == EAGAIN) 3423 return NGHTTP2_ERR_WOULDBLOCK; 3424 #ifdef ECONNRESET 3425 if(errno == ECONNRESET && verbosity < 2) 3426 return NGHTTP2_ERR_CALLBACK_FAILURE; 3427 #endif 3428 log_err_addr("could not http2 recv: %s", strerror(errno), 3429 &h2_session->c->repinfo.remote_addr, 3430 h2_session->c->repinfo.remote_addrlen); 3431 #else /* USE_WINSOCK */ 3432 if(WSAGetLastError() == WSAECONNRESET) 3433 return NGHTTP2_ERR_CALLBACK_FAILURE; 3434 if(WSAGetLastError() == WSAEINPROGRESS) 3435 return NGHTTP2_ERR_WOULDBLOCK; 3436 if(WSAGetLastError() == WSAEWOULDBLOCK) { 3437 ub_winsock_tcp_wouldblock(h2_session->c->ev->ev, 3438 UB_EV_READ); 3439 return NGHTTP2_ERR_WOULDBLOCK; 3440 } 3441 log_err_addr("could not http2 recv: %s", 3442 wsa_strerror(WSAGetLastError()), 3443 &h2_session->c->repinfo.remote_addr, 3444 h2_session->c->repinfo.remote_addrlen); 3445 #endif 3446 return NGHTTP2_ERR_CALLBACK_FAILURE; 3447 } 3448 return ret; 3449 } 3450 #endif /* HAVE_NGHTTP2 */ 3451 3452 /** Handle http2 read */ 3453 static int 3454 comm_point_http2_handle_read(int ATTR_UNUSED(fd), struct comm_point* c) 3455 { 3456 #ifdef HAVE_NGHTTP2 3457 int ret; 3458 log_assert(c->h2_session); 3459 3460 /* reading until recv cb returns NGHTTP2_ERR_WOULDBLOCK */ 3461 ret = nghttp2_session_recv(c->h2_session->session); 3462 if(ret) { 3463 if(ret != NGHTTP2_ERR_EOF && 3464 ret != NGHTTP2_ERR_CALLBACK_FAILURE) { 3465 char a[256]; 3466 addr_to_str(&c->repinfo.remote_addr, 3467 c->repinfo.remote_addrlen, a, sizeof(a)); 3468 verbose(VERB_QUERY, "http2: session_recv from %s failed, " 3469 "error: %s", a, nghttp2_strerror(ret)); 3470 } 3471 return 0; 3472 } 3473 if(nghttp2_session_want_write(c->h2_session->session)) { 3474 c->tcp_is_reading = 0; 3475 comm_point_stop_listening(c); 3476 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c)); 3477 } else if(!nghttp2_session_want_read(c->h2_session->session)) 3478 return 0; /* connection can be closed */ 3479 return 1; 3480 #else 3481 (void)c; 3482 return 0; 3483 #endif 3484 } 3485 3486 /** 3487 * Handle http reading callback. 3488 * @param fd: file descriptor of socket. 3489 * @param c: comm point to read from into buffer. 3490 * @return: 0 on error 3491 */ 3492 static int 3493 comm_point_http_handle_read(int fd, struct comm_point* c) 3494 { 3495 log_assert(c->type == comm_http); 3496 log_assert(fd != -1); 3497 3498 /* if we are in ssl handshake, handle SSL handshake */ 3499 #ifdef HAVE_SSL 3500 if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) { 3501 if(!ssl_handshake(c)) 3502 return 0; 3503 if(c->ssl_shake_state != comm_ssl_shake_none) 3504 return 1; 3505 } 3506 #endif /* HAVE_SSL */ 3507 3508 if(!c->tcp_is_reading) 3509 return 1; 3510 3511 if(c->use_h2) { 3512 return comm_point_http2_handle_read(fd, c); 3513 } 3514 3515 /* http version is <= http/1.1 */ 3516 3517 if(c->http_min_version >= http_version_2) { 3518 /* HTTP/2 failed, not allowed to use lower version. */ 3519 return 0; 3520 } 3521 3522 /* read more data */ 3523 if(c->ssl) { 3524 if(!ssl_http_read_more(c)) 3525 return 0; 3526 } else { 3527 if(!http_read_more(fd, c)) 3528 return 0; 3529 } 3530 3531 if(c->http_stored >= sldns_buffer_position(c->buffer)) { 3532 /* read did not work but we wanted more data, there is 3533 * no bytes to process now. */ 3534 return 1; 3535 } 3536 sldns_buffer_flip(c->buffer); 3537 /* if we are partway in a segment of data, position us at the point 3538 * where we left off previously */ 3539 if(c->http_stored < sldns_buffer_limit(c->buffer)) 3540 sldns_buffer_set_position(c->buffer, c->http_stored); 3541 else sldns_buffer_set_position(c->buffer, sldns_buffer_limit(c->buffer)); 3542 3543 while(sldns_buffer_remaining(c->buffer) > 0) { 3544 /* Handle HTTP/1.x data */ 3545 /* if we are reading headers, read more headers */ 3546 if(c->http_in_headers || c->http_in_chunk_headers) { 3547 /* if header is done, process the header */ 3548 if(!http_header_done(c->buffer)) { 3549 /* copy remaining data to front of buffer 3550 * and set rest for writing into it */ 3551 http_moveover_buffer(c->buffer); 3552 /* return and wait to read more */ 3553 return 1; 3554 } 3555 if(!c->http_in_chunk_headers) { 3556 /* process initial headers */ 3557 if(!http_process_initial_header(c)) 3558 return 0; 3559 } else { 3560 /* process chunk headers */ 3561 int r = http_process_chunk_header(c); 3562 if(r == 0) return 0; 3563 if(r == 2) return 1; /* done */ 3564 /* r == 1, continue */ 3565 } 3566 /* see if we have more to process */ 3567 continue; 3568 } 3569 3570 if(!c->http_is_chunked) { 3571 /* if we are reading nonchunks, process that*/ 3572 return http_nonchunk_segment(c); 3573 } else { 3574 /* if we are reading chunks, read the chunk */ 3575 int r = http_chunked_segment(c); 3576 if(r == 0) return 0; 3577 if(r == 1) return 1; 3578 continue; 3579 } 3580 } 3581 /* broke out of the loop; could not process header instead need 3582 * to read more */ 3583 /* moveover any remaining data and read more data */ 3584 http_moveover_buffer(c->buffer); 3585 /* return and wait to read more */ 3586 return 1; 3587 } 3588 3589 /** check pending connect for http */ 3590 static int 3591 http_check_connect(int fd, struct comm_point* c) 3592 { 3593 /* check for pending error from nonblocking connect */ 3594 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/ 3595 int error = 0; 3596 socklen_t len = (socklen_t)sizeof(error); 3597 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 3598 &len) < 0){ 3599 #ifndef USE_WINSOCK 3600 error = errno; /* on solaris errno is error */ 3601 #else /* USE_WINSOCK */ 3602 error = WSAGetLastError(); 3603 #endif 3604 } 3605 #ifndef USE_WINSOCK 3606 #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 3607 if(error == EINPROGRESS || error == EWOULDBLOCK) 3608 return 1; /* try again later */ 3609 else 3610 #endif 3611 if(error != 0 && verbosity < 2) 3612 return 0; /* silence lots of chatter in the logs */ 3613 else if(error != 0) { 3614 log_err_addr("http connect", strerror(error), 3615 &c->repinfo.remote_addr, c->repinfo.remote_addrlen); 3616 #else /* USE_WINSOCK */ 3617 /* examine error */ 3618 if(error == WSAEINPROGRESS) 3619 return 1; 3620 else if(error == WSAEWOULDBLOCK) { 3621 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 3622 return 1; 3623 } else if(error != 0 && verbosity < 2) 3624 return 0; 3625 else if(error != 0) { 3626 log_err_addr("http connect", wsa_strerror(error), 3627 &c->repinfo.remote_addr, c->repinfo.remote_addrlen); 3628 #endif /* USE_WINSOCK */ 3629 return 0; 3630 } 3631 /* keep on processing this socket */ 3632 return 2; 3633 } 3634 3635 /** write more data for http (with ssl) */ 3636 static int 3637 ssl_http_write_more(struct comm_point* c) 3638 { 3639 #ifdef HAVE_SSL 3640 int r; 3641 log_assert(sldns_buffer_remaining(c->buffer) > 0); 3642 ERR_clear_error(); 3643 r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer), 3644 (int)sldns_buffer_remaining(c->buffer)); 3645 if(r <= 0) { 3646 int want = SSL_get_error(c->ssl, r); 3647 if(want == SSL_ERROR_ZERO_RETURN) { 3648 return 0; /* closed */ 3649 } else if(want == SSL_ERROR_WANT_READ) { 3650 c->ssl_shake_state = comm_ssl_shake_hs_read; 3651 comm_point_listen_for_rw(c, 1, 0); 3652 return 1; /* wait for read condition */ 3653 } else if(want == SSL_ERROR_WANT_WRITE) { 3654 return 1; /* write more later */ 3655 } else if(want == SSL_ERROR_SYSCALL) { 3656 #ifdef EPIPE 3657 if(errno == EPIPE && verbosity < 2) 3658 return 0; /* silence 'broken pipe' */ 3659 #endif 3660 if(errno != 0) 3661 log_err("SSL_write syscall: %s", 3662 strerror(errno)); 3663 return 0; 3664 } 3665 log_crypto_err_io("could not SSL_write", want); 3666 return 0; 3667 } 3668 sldns_buffer_skip(c->buffer, (ssize_t)r); 3669 return 1; 3670 #else 3671 (void)c; 3672 return 0; 3673 #endif /* HAVE_SSL */ 3674 } 3675 3676 /** write more data for http */ 3677 static int 3678 http_write_more(int fd, struct comm_point* c) 3679 { 3680 ssize_t r; 3681 log_assert(sldns_buffer_remaining(c->buffer) > 0); 3682 r = send(fd, (void*)sldns_buffer_current(c->buffer), 3683 sldns_buffer_remaining(c->buffer), 0); 3684 if(r == -1) { 3685 #ifndef USE_WINSOCK 3686 if(errno == EINTR || errno == EAGAIN) 3687 return 1; 3688 #else 3689 if(WSAGetLastError() == WSAEINPROGRESS) 3690 return 1; 3691 if(WSAGetLastError() == WSAEWOULDBLOCK) { 3692 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 3693 return 1; 3694 } 3695 #endif 3696 log_err_addr("http send r", sock_strerror(errno), 3697 &c->repinfo.remote_addr, c->repinfo.remote_addrlen); 3698 return 0; 3699 } 3700 sldns_buffer_skip(c->buffer, r); 3701 return 1; 3702 } 3703 3704 #ifdef HAVE_NGHTTP2 3705 ssize_t http2_send_cb(nghttp2_session* ATTR_UNUSED(session), const uint8_t* buf, 3706 size_t len, int ATTR_UNUSED(flags), void* cb_arg) 3707 { 3708 ssize_t ret; 3709 struct http2_session* h2_session = (struct http2_session*)cb_arg; 3710 log_assert(h2_session->c->type == comm_http); 3711 log_assert(h2_session->c->h2_session); 3712 3713 #ifdef HAVE_SSL 3714 if(h2_session->c->ssl) { 3715 int r; 3716 ERR_clear_error(); 3717 r = SSL_write(h2_session->c->ssl, buf, len); 3718 if(r <= 0) { 3719 int want = SSL_get_error(h2_session->c->ssl, r); 3720 if(want == SSL_ERROR_ZERO_RETURN) { 3721 return NGHTTP2_ERR_CALLBACK_FAILURE; 3722 } else if(want == SSL_ERROR_WANT_READ) { 3723 h2_session->c->ssl_shake_state = comm_ssl_shake_hs_read; 3724 comm_point_listen_for_rw(h2_session->c, 1, 0); 3725 return NGHTTP2_ERR_WOULDBLOCK; 3726 } else if(want == SSL_ERROR_WANT_WRITE) { 3727 return NGHTTP2_ERR_WOULDBLOCK; 3728 } else if(want == SSL_ERROR_SYSCALL) { 3729 #ifdef EPIPE 3730 if(errno == EPIPE && verbosity < 2) 3731 return NGHTTP2_ERR_CALLBACK_FAILURE; 3732 #endif 3733 if(errno != 0) 3734 log_err("SSL_write syscall: %s", 3735 strerror(errno)); 3736 return NGHTTP2_ERR_CALLBACK_FAILURE; 3737 } 3738 log_crypto_err_io("could not SSL_write", want); 3739 return NGHTTP2_ERR_CALLBACK_FAILURE; 3740 } 3741 return r; 3742 } 3743 #endif /* HAVE_SSL */ 3744 3745 ret = send(h2_session->c->fd, buf, len, 0); 3746 if(ret == 0) { 3747 return NGHTTP2_ERR_CALLBACK_FAILURE; 3748 } else if(ret < 0) { 3749 #ifndef USE_WINSOCK 3750 if(errno == EINTR || errno == EAGAIN) 3751 return NGHTTP2_ERR_WOULDBLOCK; 3752 #ifdef EPIPE 3753 if(errno == EPIPE && verbosity < 2) 3754 return NGHTTP2_ERR_CALLBACK_FAILURE; 3755 #endif 3756 #ifdef ECONNRESET 3757 if(errno == ECONNRESET && verbosity < 2) 3758 return NGHTTP2_ERR_CALLBACK_FAILURE; 3759 #endif 3760 log_err_addr("could not http2 write: %s", strerror(errno), 3761 &h2_session->c->repinfo.remote_addr, 3762 h2_session->c->repinfo.remote_addrlen); 3763 #else /* USE_WINSOCK */ 3764 if(WSAGetLastError() == WSAENOTCONN) 3765 return NGHTTP2_ERR_WOULDBLOCK; 3766 if(WSAGetLastError() == WSAEINPROGRESS) 3767 return NGHTTP2_ERR_WOULDBLOCK; 3768 if(WSAGetLastError() == WSAEWOULDBLOCK) { 3769 ub_winsock_tcp_wouldblock(h2_session->c->ev->ev, 3770 UB_EV_WRITE); 3771 return NGHTTP2_ERR_WOULDBLOCK; 3772 } 3773 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 3774 return NGHTTP2_ERR_CALLBACK_FAILURE; 3775 log_err_addr("could not http2 write: %s", 3776 wsa_strerror(WSAGetLastError()), 3777 &h2_session->c->repinfo.remote_addr, 3778 h2_session->c->repinfo.remote_addrlen); 3779 #endif 3780 return NGHTTP2_ERR_CALLBACK_FAILURE; 3781 } 3782 return ret; 3783 } 3784 #endif /* HAVE_NGHTTP2 */ 3785 3786 /** Handle http2 writing */ 3787 static int 3788 comm_point_http2_handle_write(int ATTR_UNUSED(fd), struct comm_point* c) 3789 { 3790 #ifdef HAVE_NGHTTP2 3791 int ret; 3792 log_assert(c->h2_session); 3793 3794 ret = nghttp2_session_send(c->h2_session->session); 3795 if(ret) { 3796 verbose(VERB_QUERY, "http2: session_send failed, " 3797 "error: %s", nghttp2_strerror(ret)); 3798 return 0; 3799 } 3800 3801 if(nghttp2_session_want_read(c->h2_session->session)) { 3802 c->tcp_is_reading = 1; 3803 comm_point_stop_listening(c); 3804 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c)); 3805 } else if(!nghttp2_session_want_write(c->h2_session->session)) 3806 return 0; /* connection can be closed */ 3807 return 1; 3808 #else 3809 (void)c; 3810 return 0; 3811 #endif 3812 } 3813 3814 /** 3815 * Handle http writing callback. 3816 * @param fd: file descriptor of socket. 3817 * @param c: comm point to write buffer out of. 3818 * @return: 0 on error 3819 */ 3820 static int 3821 comm_point_http_handle_write(int fd, struct comm_point* c) 3822 { 3823 log_assert(c->type == comm_http); 3824 log_assert(fd != -1); 3825 3826 /* check pending connect errors, if that fails, we wait for more, 3827 * or we can continue to write contents */ 3828 if(c->tcp_check_nb_connect) { 3829 int r = http_check_connect(fd, c); 3830 if(r == 0) return 0; 3831 if(r == 1) return 1; 3832 c->tcp_check_nb_connect = 0; 3833 } 3834 /* if we are in ssl handshake, handle SSL handshake */ 3835 #ifdef HAVE_SSL 3836 if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) { 3837 if(!ssl_handshake(c)) 3838 return 0; 3839 if(c->ssl_shake_state != comm_ssl_shake_none) 3840 return 1; 3841 } 3842 #endif /* HAVE_SSL */ 3843 if(c->tcp_is_reading) 3844 return 1; 3845 3846 if(c->use_h2) { 3847 return comm_point_http2_handle_write(fd, c); 3848 } 3849 3850 /* http version is <= http/1.1 */ 3851 3852 if(c->http_min_version >= http_version_2) { 3853 /* HTTP/2 failed, not allowed to use lower version. */ 3854 return 0; 3855 } 3856 3857 /* if we are writing, write more */ 3858 if(c->ssl) { 3859 if(!ssl_http_write_more(c)) 3860 return 0; 3861 } else { 3862 if(!http_write_more(fd, c)) 3863 return 0; 3864 } 3865 3866 /* we write a single buffer contents, that can contain 3867 * the http request, and then flip to read the results */ 3868 /* see if write is done */ 3869 if(sldns_buffer_remaining(c->buffer) == 0) { 3870 sldns_buffer_clear(c->buffer); 3871 if(c->tcp_do_toggle_rw) 3872 c->tcp_is_reading = 1; 3873 c->tcp_byte_count = 0; 3874 /* switch from listening(write) to listening(read) */ 3875 comm_point_stop_listening(c); 3876 comm_point_start_listening(c, -1, -1); 3877 } 3878 return 1; 3879 } 3880 3881 void 3882 comm_point_http_handle_callback(int fd, short event, void* arg) 3883 { 3884 struct comm_point* c = (struct comm_point*)arg; 3885 log_assert(c->type == comm_http); 3886 ub_comm_base_now(c->ev->base); 3887 3888 if(event&UB_EV_TIMEOUT) { 3889 verbose(VERB_QUERY, "http took too long, dropped"); 3890 reclaim_http_handler(c); 3891 if(!c->tcp_do_close) { 3892 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3893 (void)(*c->callback)(c, c->cb_arg, 3894 NETEVENT_TIMEOUT, NULL); 3895 } 3896 return; 3897 } 3898 if(event&UB_EV_READ) { 3899 if(!comm_point_http_handle_read(fd, c)) { 3900 reclaim_http_handler(c); 3901 if(!c->tcp_do_close) { 3902 fptr_ok(fptr_whitelist_comm_point( 3903 c->callback)); 3904 (void)(*c->callback)(c, c->cb_arg, 3905 NETEVENT_CLOSED, NULL); 3906 } 3907 } 3908 return; 3909 } 3910 if(event&UB_EV_WRITE) { 3911 if(!comm_point_http_handle_write(fd, c)) { 3912 reclaim_http_handler(c); 3913 if(!c->tcp_do_close) { 3914 fptr_ok(fptr_whitelist_comm_point( 3915 c->callback)); 3916 (void)(*c->callback)(c, c->cb_arg, 3917 NETEVENT_CLOSED, NULL); 3918 } 3919 } 3920 return; 3921 } 3922 log_err("Ignored event %d for httphdl.", event); 3923 } 3924 3925 void comm_point_local_handle_callback(int fd, short event, void* arg) 3926 { 3927 struct comm_point* c = (struct comm_point*)arg; 3928 log_assert(c->type == comm_local); 3929 ub_comm_base_now(c->ev->base); 3930 3931 if(event&UB_EV_READ) { 3932 if(!comm_point_tcp_handle_read(fd, c, 1)) { 3933 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3934 (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED, 3935 NULL); 3936 } 3937 return; 3938 } 3939 log_err("Ignored event %d for localhdl.", event); 3940 } 3941 3942 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd), 3943 short event, void* arg) 3944 { 3945 struct comm_point* c = (struct comm_point*)arg; 3946 int err = NETEVENT_NOERROR; 3947 log_assert(c->type == comm_raw); 3948 ub_comm_base_now(c->ev->base); 3949 3950 if(event&UB_EV_TIMEOUT) 3951 err = NETEVENT_TIMEOUT; 3952 fptr_ok(fptr_whitelist_comm_point_raw(c->callback)); 3953 (void)(*c->callback)(c, c->cb_arg, err, NULL); 3954 } 3955 3956 struct comm_point* 3957 comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer, 3958 int pp2_enabled, comm_point_callback_type* callback, 3959 void* callback_arg, struct unbound_socket* socket) 3960 { 3961 struct comm_point* c = (struct comm_point*)calloc(1, 3962 sizeof(struct comm_point)); 3963 short evbits; 3964 if(!c) 3965 return NULL; 3966 c->ev = (struct internal_event*)calloc(1, 3967 sizeof(struct internal_event)); 3968 if(!c->ev) { 3969 free(c); 3970 return NULL; 3971 } 3972 c->ev->base = base; 3973 c->fd = fd; 3974 c->buffer = buffer; 3975 c->timeout = NULL; 3976 c->tcp_is_reading = 0; 3977 c->tcp_byte_count = 0; 3978 c->tcp_parent = NULL; 3979 c->max_tcp_count = 0; 3980 c->cur_tcp_count = 0; 3981 c->tcp_handlers = NULL; 3982 c->tcp_free = NULL; 3983 c->type = comm_udp; 3984 c->tcp_do_close = 0; 3985 c->do_not_close = 0; 3986 c->tcp_do_toggle_rw = 0; 3987 c->tcp_check_nb_connect = 0; 3988 #ifdef USE_MSG_FASTOPEN 3989 c->tcp_do_fastopen = 0; 3990 #endif 3991 #ifdef USE_DNSCRYPT 3992 c->dnscrypt = 0; 3993 c->dnscrypt_buffer = buffer; 3994 #endif 3995 c->inuse = 0; 3996 c->callback = callback; 3997 c->cb_arg = callback_arg; 3998 c->socket = socket; 3999 c->pp2_enabled = pp2_enabled; 4000 c->pp2_header_state = pp2_header_none; 4001 evbits = UB_EV_READ | UB_EV_PERSIST; 4002 /* ub_event stuff */ 4003 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 4004 comm_point_udp_callback, c); 4005 if(c->ev->ev == NULL) { 4006 log_err("could not baseset udp event"); 4007 comm_point_delete(c); 4008 return NULL; 4009 } 4010 if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) { 4011 log_err("could not add udp event"); 4012 comm_point_delete(c); 4013 return NULL; 4014 } 4015 c->event_added = 1; 4016 return c; 4017 } 4018 4019 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG) 4020 struct comm_point* 4021 comm_point_create_udp_ancil(struct comm_base *base, int fd, 4022 sldns_buffer* buffer, int pp2_enabled, 4023 comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket) 4024 { 4025 struct comm_point* c = (struct comm_point*)calloc(1, 4026 sizeof(struct comm_point)); 4027 short evbits; 4028 if(!c) 4029 return NULL; 4030 c->ev = (struct internal_event*)calloc(1, 4031 sizeof(struct internal_event)); 4032 if(!c->ev) { 4033 free(c); 4034 return NULL; 4035 } 4036 c->ev->base = base; 4037 c->fd = fd; 4038 c->buffer = buffer; 4039 c->timeout = NULL; 4040 c->tcp_is_reading = 0; 4041 c->tcp_byte_count = 0; 4042 c->tcp_parent = NULL; 4043 c->max_tcp_count = 0; 4044 c->cur_tcp_count = 0; 4045 c->tcp_handlers = NULL; 4046 c->tcp_free = NULL; 4047 c->type = comm_udp; 4048 c->tcp_do_close = 0; 4049 c->do_not_close = 0; 4050 #ifdef USE_DNSCRYPT 4051 c->dnscrypt = 0; 4052 c->dnscrypt_buffer = buffer; 4053 #endif 4054 c->inuse = 0; 4055 c->tcp_do_toggle_rw = 0; 4056 c->tcp_check_nb_connect = 0; 4057 #ifdef USE_MSG_FASTOPEN 4058 c->tcp_do_fastopen = 0; 4059 #endif 4060 c->callback = callback; 4061 c->cb_arg = callback_arg; 4062 c->socket = socket; 4063 c->pp2_enabled = pp2_enabled; 4064 c->pp2_header_state = pp2_header_none; 4065 evbits = UB_EV_READ | UB_EV_PERSIST; 4066 /* ub_event stuff */ 4067 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 4068 comm_point_udp_ancil_callback, c); 4069 if(c->ev->ev == NULL) { 4070 log_err("could not baseset udp event"); 4071 comm_point_delete(c); 4072 return NULL; 4073 } 4074 if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) { 4075 log_err("could not add udp event"); 4076 comm_point_delete(c); 4077 return NULL; 4078 } 4079 c->event_added = 1; 4080 return c; 4081 } 4082 #endif 4083 4084 static struct comm_point* 4085 comm_point_create_tcp_handler(struct comm_base *base, 4086 struct comm_point* parent, size_t bufsize, 4087 struct sldns_buffer* spoolbuf, comm_point_callback_type* callback, 4088 void* callback_arg, struct unbound_socket* socket) 4089 { 4090 struct comm_point* c = (struct comm_point*)calloc(1, 4091 sizeof(struct comm_point)); 4092 short evbits; 4093 if(!c) 4094 return NULL; 4095 c->ev = (struct internal_event*)calloc(1, 4096 sizeof(struct internal_event)); 4097 if(!c->ev) { 4098 free(c); 4099 return NULL; 4100 } 4101 c->ev->base = base; 4102 c->fd = -1; 4103 c->buffer = sldns_buffer_new(bufsize); 4104 if(!c->buffer) { 4105 free(c->ev); 4106 free(c); 4107 return NULL; 4108 } 4109 c->timeout = (struct timeval*)malloc(sizeof(struct timeval)); 4110 if(!c->timeout) { 4111 sldns_buffer_free(c->buffer); 4112 free(c->ev); 4113 free(c); 4114 return NULL; 4115 } 4116 c->tcp_is_reading = 0; 4117 c->tcp_byte_count = 0; 4118 c->tcp_parent = parent; 4119 c->tcp_timeout_msec = parent->tcp_timeout_msec; 4120 c->tcp_conn_limit = parent->tcp_conn_limit; 4121 c->tcl_addr = NULL; 4122 c->tcp_keepalive = 0; 4123 c->max_tcp_count = 0; 4124 c->cur_tcp_count = 0; 4125 c->tcp_handlers = NULL; 4126 c->tcp_free = NULL; 4127 c->type = comm_tcp; 4128 c->tcp_do_close = 0; 4129 c->do_not_close = 0; 4130 c->tcp_do_toggle_rw = 1; 4131 c->tcp_check_nb_connect = 0; 4132 #ifdef USE_MSG_FASTOPEN 4133 c->tcp_do_fastopen = 0; 4134 #endif 4135 #ifdef USE_DNSCRYPT 4136 c->dnscrypt = 0; 4137 /* We don't know just yet if this is a dnscrypt channel. Allocation 4138 * will be done when handling the callback. */ 4139 c->dnscrypt_buffer = c->buffer; 4140 #endif 4141 c->repinfo.c = c; 4142 c->callback = callback; 4143 c->cb_arg = callback_arg; 4144 c->socket = socket; 4145 c->pp2_enabled = parent->pp2_enabled; 4146 c->pp2_header_state = pp2_header_none; 4147 if(spoolbuf) { 4148 c->tcp_req_info = tcp_req_info_create(spoolbuf); 4149 if(!c->tcp_req_info) { 4150 log_err("could not create tcp commpoint"); 4151 sldns_buffer_free(c->buffer); 4152 free(c->timeout); 4153 free(c->ev); 4154 free(c); 4155 return NULL; 4156 } 4157 c->tcp_req_info->cp = c; 4158 c->tcp_do_close = 1; 4159 c->tcp_do_toggle_rw = 0; 4160 } 4161 /* add to parent free list */ 4162 c->tcp_free = parent->tcp_free; 4163 parent->tcp_free = c; 4164 /* ub_event stuff */ 4165 evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT; 4166 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 4167 comm_point_tcp_handle_callback, c); 4168 if(c->ev->ev == NULL) 4169 { 4170 log_err("could not basetset tcphdl event"); 4171 parent->tcp_free = c->tcp_free; 4172 tcp_req_info_delete(c->tcp_req_info); 4173 sldns_buffer_free(c->buffer); 4174 free(c->timeout); 4175 free(c->ev); 4176 free(c); 4177 return NULL; 4178 } 4179 return c; 4180 } 4181 4182 static struct comm_point* 4183 comm_point_create_http_handler(struct comm_base *base, 4184 struct comm_point* parent, size_t bufsize, int harden_large_queries, 4185 uint32_t http_max_streams, char* http_endpoint, 4186 comm_point_callback_type* callback, void* callback_arg, 4187 struct unbound_socket* socket) 4188 { 4189 struct comm_point* c = (struct comm_point*)calloc(1, 4190 sizeof(struct comm_point)); 4191 short evbits; 4192 if(!c) 4193 return NULL; 4194 c->ev = (struct internal_event*)calloc(1, 4195 sizeof(struct internal_event)); 4196 if(!c->ev) { 4197 free(c); 4198 return NULL; 4199 } 4200 c->ev->base = base; 4201 c->fd = -1; 4202 c->buffer = sldns_buffer_new(bufsize); 4203 if(!c->buffer) { 4204 free(c->ev); 4205 free(c); 4206 return NULL; 4207 } 4208 c->timeout = (struct timeval*)malloc(sizeof(struct timeval)); 4209 if(!c->timeout) { 4210 sldns_buffer_free(c->buffer); 4211 free(c->ev); 4212 free(c); 4213 return NULL; 4214 } 4215 c->tcp_is_reading = 0; 4216 c->tcp_byte_count = 0; 4217 c->tcp_parent = parent; 4218 c->tcp_timeout_msec = parent->tcp_timeout_msec; 4219 c->tcp_conn_limit = parent->tcp_conn_limit; 4220 c->tcl_addr = NULL; 4221 c->tcp_keepalive = 0; 4222 c->max_tcp_count = 0; 4223 c->cur_tcp_count = 0; 4224 c->tcp_handlers = NULL; 4225 c->tcp_free = NULL; 4226 c->type = comm_http; 4227 c->tcp_do_close = 1; 4228 c->do_not_close = 0; 4229 c->tcp_do_toggle_rw = 1; /* will be set to 0 after http2 upgrade */ 4230 c->tcp_check_nb_connect = 0; 4231 #ifdef USE_MSG_FASTOPEN 4232 c->tcp_do_fastopen = 0; 4233 #endif 4234 #ifdef USE_DNSCRYPT 4235 c->dnscrypt = 0; 4236 c->dnscrypt_buffer = NULL; 4237 #endif 4238 c->repinfo.c = c; 4239 c->callback = callback; 4240 c->cb_arg = callback_arg; 4241 c->socket = socket; 4242 c->pp2_enabled = 0; 4243 c->pp2_header_state = pp2_header_none; 4244 4245 c->http_min_version = http_version_2; 4246 c->http2_stream_max_qbuffer_size = bufsize; 4247 if(harden_large_queries && bufsize > 512) 4248 c->http2_stream_max_qbuffer_size = 512; 4249 c->http2_max_streams = http_max_streams; 4250 if(!(c->http_endpoint = strdup(http_endpoint))) { 4251 log_err("could not strdup http_endpoint"); 4252 sldns_buffer_free(c->buffer); 4253 free(c->timeout); 4254 free(c->ev); 4255 free(c); 4256 return NULL; 4257 } 4258 c->use_h2 = 0; 4259 #ifdef HAVE_NGHTTP2 4260 if(!(c->h2_session = http2_session_create(c))) { 4261 log_err("could not create http2 session"); 4262 free(c->http_endpoint); 4263 sldns_buffer_free(c->buffer); 4264 free(c->timeout); 4265 free(c->ev); 4266 free(c); 4267 return NULL; 4268 } 4269 if(!(c->h2_session->callbacks = http2_req_callbacks_create())) { 4270 log_err("could not create http2 callbacks"); 4271 http2_session_delete(c->h2_session); 4272 free(c->http_endpoint); 4273 sldns_buffer_free(c->buffer); 4274 free(c->timeout); 4275 free(c->ev); 4276 free(c); 4277 return NULL; 4278 } 4279 #endif 4280 4281 /* add to parent free list */ 4282 c->tcp_free = parent->tcp_free; 4283 parent->tcp_free = c; 4284 /* ub_event stuff */ 4285 evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT; 4286 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 4287 comm_point_http_handle_callback, c); 4288 if(c->ev->ev == NULL) 4289 { 4290 log_err("could not set http handler event"); 4291 parent->tcp_free = c->tcp_free; 4292 http2_session_delete(c->h2_session); 4293 sldns_buffer_free(c->buffer); 4294 free(c->timeout); 4295 free(c->ev); 4296 free(c); 4297 return NULL; 4298 } 4299 return c; 4300 } 4301 4302 struct comm_point* 4303 comm_point_create_tcp(struct comm_base *base, int fd, int num, 4304 int idle_timeout, int harden_large_queries, 4305 uint32_t http_max_streams, char* http_endpoint, 4306 struct tcl_list* tcp_conn_limit, size_t bufsize, 4307 struct sldns_buffer* spoolbuf, enum listen_type port_type, 4308 int pp2_enabled, comm_point_callback_type* callback, 4309 void* callback_arg, struct unbound_socket* socket) 4310 { 4311 struct comm_point* c = (struct comm_point*)calloc(1, 4312 sizeof(struct comm_point)); 4313 short evbits; 4314 int i; 4315 /* first allocate the TCP accept listener */ 4316 if(!c) 4317 return NULL; 4318 c->ev = (struct internal_event*)calloc(1, 4319 sizeof(struct internal_event)); 4320 if(!c->ev) { 4321 free(c); 4322 return NULL; 4323 } 4324 c->ev->base = base; 4325 c->fd = fd; 4326 c->buffer = NULL; 4327 c->timeout = NULL; 4328 c->tcp_is_reading = 0; 4329 c->tcp_byte_count = 0; 4330 c->tcp_timeout_msec = idle_timeout; 4331 c->tcp_conn_limit = tcp_conn_limit; 4332 c->tcl_addr = NULL; 4333 c->tcp_keepalive = 0; 4334 c->tcp_parent = NULL; 4335 c->max_tcp_count = num; 4336 c->cur_tcp_count = 0; 4337 c->tcp_handlers = (struct comm_point**)calloc((size_t)num, 4338 sizeof(struct comm_point*)); 4339 if(!c->tcp_handlers) { 4340 free(c->ev); 4341 free(c); 4342 return NULL; 4343 } 4344 c->tcp_free = NULL; 4345 c->type = comm_tcp_accept; 4346 c->tcp_do_close = 0; 4347 c->do_not_close = 0; 4348 c->tcp_do_toggle_rw = 0; 4349 c->tcp_check_nb_connect = 0; 4350 #ifdef USE_MSG_FASTOPEN 4351 c->tcp_do_fastopen = 0; 4352 #endif 4353 #ifdef USE_DNSCRYPT 4354 c->dnscrypt = 0; 4355 c->dnscrypt_buffer = NULL; 4356 #endif 4357 c->callback = NULL; 4358 c->cb_arg = NULL; 4359 c->socket = socket; 4360 c->pp2_enabled = (port_type==listen_type_http?0:pp2_enabled); 4361 c->pp2_header_state = pp2_header_none; 4362 evbits = UB_EV_READ | UB_EV_PERSIST; 4363 /* ub_event stuff */ 4364 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 4365 comm_point_tcp_accept_callback, c); 4366 if(c->ev->ev == NULL) { 4367 log_err("could not baseset tcpacc event"); 4368 comm_point_delete(c); 4369 return NULL; 4370 } 4371 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 4372 log_err("could not add tcpacc event"); 4373 comm_point_delete(c); 4374 return NULL; 4375 } 4376 c->event_added = 1; 4377 /* now prealloc the handlers */ 4378 for(i=0; i<num; i++) { 4379 if(port_type == listen_type_tcp || 4380 port_type == listen_type_ssl || 4381 port_type == listen_type_tcp_dnscrypt) { 4382 c->tcp_handlers[i] = comm_point_create_tcp_handler(base, 4383 c, bufsize, spoolbuf, callback, callback_arg, socket); 4384 } else if(port_type == listen_type_http) { 4385 c->tcp_handlers[i] = comm_point_create_http_handler( 4386 base, c, bufsize, harden_large_queries, 4387 http_max_streams, http_endpoint, 4388 callback, callback_arg, socket); 4389 } 4390 else { 4391 log_err("could not create tcp handler, unknown listen " 4392 "type"); 4393 return NULL; 4394 } 4395 if(!c->tcp_handlers[i]) { 4396 comm_point_delete(c); 4397 return NULL; 4398 } 4399 } 4400 4401 return c; 4402 } 4403 4404 struct comm_point* 4405 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize, 4406 comm_point_callback_type* callback, void* callback_arg) 4407 { 4408 struct comm_point* c = (struct comm_point*)calloc(1, 4409 sizeof(struct comm_point)); 4410 short evbits; 4411 if(!c) 4412 return NULL; 4413 c->ev = (struct internal_event*)calloc(1, 4414 sizeof(struct internal_event)); 4415 if(!c->ev) { 4416 free(c); 4417 return NULL; 4418 } 4419 c->ev->base = base; 4420 c->fd = -1; 4421 c->buffer = sldns_buffer_new(bufsize); 4422 if(!c->buffer) { 4423 free(c->ev); 4424 free(c); 4425 return NULL; 4426 } 4427 c->timeout = NULL; 4428 c->tcp_is_reading = 0; 4429 c->tcp_byte_count = 0; 4430 c->tcp_timeout_msec = TCP_QUERY_TIMEOUT; 4431 c->tcp_conn_limit = NULL; 4432 c->tcl_addr = NULL; 4433 c->tcp_keepalive = 0; 4434 c->tcp_parent = NULL; 4435 c->max_tcp_count = 0; 4436 c->cur_tcp_count = 0; 4437 c->tcp_handlers = NULL; 4438 c->tcp_free = NULL; 4439 c->type = comm_tcp; 4440 c->tcp_do_close = 0; 4441 c->do_not_close = 0; 4442 c->tcp_do_toggle_rw = 1; 4443 c->tcp_check_nb_connect = 1; 4444 #ifdef USE_MSG_FASTOPEN 4445 c->tcp_do_fastopen = 1; 4446 #endif 4447 #ifdef USE_DNSCRYPT 4448 c->dnscrypt = 0; 4449 c->dnscrypt_buffer = c->buffer; 4450 #endif 4451 c->repinfo.c = c; 4452 c->callback = callback; 4453 c->cb_arg = callback_arg; 4454 c->pp2_enabled = 0; 4455 c->pp2_header_state = pp2_header_none; 4456 evbits = UB_EV_PERSIST | UB_EV_WRITE; 4457 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 4458 comm_point_tcp_handle_callback, c); 4459 if(c->ev->ev == NULL) 4460 { 4461 log_err("could not baseset tcpout event"); 4462 sldns_buffer_free(c->buffer); 4463 free(c->ev); 4464 free(c); 4465 return NULL; 4466 } 4467 4468 return c; 4469 } 4470 4471 struct comm_point* 4472 comm_point_create_http_out(struct comm_base *base, size_t bufsize, 4473 comm_point_callback_type* callback, void* callback_arg, 4474 sldns_buffer* temp) 4475 { 4476 struct comm_point* c = (struct comm_point*)calloc(1, 4477 sizeof(struct comm_point)); 4478 short evbits; 4479 if(!c) 4480 return NULL; 4481 c->ev = (struct internal_event*)calloc(1, 4482 sizeof(struct internal_event)); 4483 if(!c->ev) { 4484 free(c); 4485 return NULL; 4486 } 4487 c->ev->base = base; 4488 c->fd = -1; 4489 c->buffer = sldns_buffer_new(bufsize); 4490 if(!c->buffer) { 4491 free(c->ev); 4492 free(c); 4493 return NULL; 4494 } 4495 c->timeout = NULL; 4496 c->tcp_is_reading = 0; 4497 c->tcp_byte_count = 0; 4498 c->tcp_parent = NULL; 4499 c->max_tcp_count = 0; 4500 c->cur_tcp_count = 0; 4501 c->tcp_handlers = NULL; 4502 c->tcp_free = NULL; 4503 c->type = comm_http; 4504 c->tcp_do_close = 0; 4505 c->do_not_close = 0; 4506 c->tcp_do_toggle_rw = 1; 4507 c->tcp_check_nb_connect = 1; 4508 c->http_in_headers = 1; 4509 c->http_in_chunk_headers = 0; 4510 c->http_is_chunked = 0; 4511 c->http_temp = temp; 4512 #ifdef USE_MSG_FASTOPEN 4513 c->tcp_do_fastopen = 1; 4514 #endif 4515 #ifdef USE_DNSCRYPT 4516 c->dnscrypt = 0; 4517 c->dnscrypt_buffer = c->buffer; 4518 #endif 4519 c->repinfo.c = c; 4520 c->callback = callback; 4521 c->cb_arg = callback_arg; 4522 c->pp2_enabled = 0; 4523 c->pp2_header_state = pp2_header_none; 4524 evbits = UB_EV_PERSIST | UB_EV_WRITE; 4525 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 4526 comm_point_http_handle_callback, c); 4527 if(c->ev->ev == NULL) 4528 { 4529 log_err("could not baseset tcpout event"); 4530 #ifdef HAVE_SSL 4531 SSL_free(c->ssl); 4532 #endif 4533 sldns_buffer_free(c->buffer); 4534 free(c->ev); 4535 free(c); 4536 return NULL; 4537 } 4538 4539 return c; 4540 } 4541 4542 struct comm_point* 4543 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize, 4544 comm_point_callback_type* callback, void* callback_arg) 4545 { 4546 struct comm_point* c = (struct comm_point*)calloc(1, 4547 sizeof(struct comm_point)); 4548 short evbits; 4549 if(!c) 4550 return NULL; 4551 c->ev = (struct internal_event*)calloc(1, 4552 sizeof(struct internal_event)); 4553 if(!c->ev) { 4554 free(c); 4555 return NULL; 4556 } 4557 c->ev->base = base; 4558 c->fd = fd; 4559 c->buffer = sldns_buffer_new(bufsize); 4560 if(!c->buffer) { 4561 free(c->ev); 4562 free(c); 4563 return NULL; 4564 } 4565 c->timeout = NULL; 4566 c->tcp_is_reading = 1; 4567 c->tcp_byte_count = 0; 4568 c->tcp_parent = NULL; 4569 c->max_tcp_count = 0; 4570 c->cur_tcp_count = 0; 4571 c->tcp_handlers = NULL; 4572 c->tcp_free = NULL; 4573 c->type = comm_local; 4574 c->tcp_do_close = 0; 4575 c->do_not_close = 1; 4576 c->tcp_do_toggle_rw = 0; 4577 c->tcp_check_nb_connect = 0; 4578 #ifdef USE_MSG_FASTOPEN 4579 c->tcp_do_fastopen = 0; 4580 #endif 4581 #ifdef USE_DNSCRYPT 4582 c->dnscrypt = 0; 4583 c->dnscrypt_buffer = c->buffer; 4584 #endif 4585 c->callback = callback; 4586 c->cb_arg = callback_arg; 4587 c->pp2_enabled = 0; 4588 c->pp2_header_state = pp2_header_none; 4589 /* ub_event stuff */ 4590 evbits = UB_EV_PERSIST | UB_EV_READ; 4591 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 4592 comm_point_local_handle_callback, c); 4593 if(c->ev->ev == NULL) { 4594 log_err("could not baseset localhdl event"); 4595 free(c->ev); 4596 free(c); 4597 return NULL; 4598 } 4599 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 4600 log_err("could not add localhdl event"); 4601 ub_event_free(c->ev->ev); 4602 free(c->ev); 4603 free(c); 4604 return NULL; 4605 } 4606 c->event_added = 1; 4607 return c; 4608 } 4609 4610 struct comm_point* 4611 comm_point_create_raw(struct comm_base* base, int fd, int writing, 4612 comm_point_callback_type* callback, void* callback_arg) 4613 { 4614 struct comm_point* c = (struct comm_point*)calloc(1, 4615 sizeof(struct comm_point)); 4616 short evbits; 4617 if(!c) 4618 return NULL; 4619 c->ev = (struct internal_event*)calloc(1, 4620 sizeof(struct internal_event)); 4621 if(!c->ev) { 4622 free(c); 4623 return NULL; 4624 } 4625 c->ev->base = base; 4626 c->fd = fd; 4627 c->buffer = NULL; 4628 c->timeout = NULL; 4629 c->tcp_is_reading = 0; 4630 c->tcp_byte_count = 0; 4631 c->tcp_parent = NULL; 4632 c->max_tcp_count = 0; 4633 c->cur_tcp_count = 0; 4634 c->tcp_handlers = NULL; 4635 c->tcp_free = NULL; 4636 c->type = comm_raw; 4637 c->tcp_do_close = 0; 4638 c->do_not_close = 1; 4639 c->tcp_do_toggle_rw = 0; 4640 c->tcp_check_nb_connect = 0; 4641 #ifdef USE_MSG_FASTOPEN 4642 c->tcp_do_fastopen = 0; 4643 #endif 4644 #ifdef USE_DNSCRYPT 4645 c->dnscrypt = 0; 4646 c->dnscrypt_buffer = c->buffer; 4647 #endif 4648 c->callback = callback; 4649 c->cb_arg = callback_arg; 4650 c->pp2_enabled = 0; 4651 c->pp2_header_state = pp2_header_none; 4652 /* ub_event stuff */ 4653 if(writing) 4654 evbits = UB_EV_PERSIST | UB_EV_WRITE; 4655 else evbits = UB_EV_PERSIST | UB_EV_READ; 4656 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 4657 comm_point_raw_handle_callback, c); 4658 if(c->ev->ev == NULL) { 4659 log_err("could not baseset rawhdl event"); 4660 free(c->ev); 4661 free(c); 4662 return NULL; 4663 } 4664 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 4665 log_err("could not add rawhdl event"); 4666 ub_event_free(c->ev->ev); 4667 free(c->ev); 4668 free(c); 4669 return NULL; 4670 } 4671 c->event_added = 1; 4672 return c; 4673 } 4674 4675 void 4676 comm_point_close(struct comm_point* c) 4677 { 4678 if(!c) 4679 return; 4680 if(c->fd != -1) { 4681 verbose(5, "comm_point_close of %d: event_del", c->fd); 4682 if(c->event_added) { 4683 if(ub_event_del(c->ev->ev) != 0) { 4684 log_err("could not event_del on close"); 4685 } 4686 c->event_added = 0; 4687 } 4688 } 4689 tcl_close_connection(c->tcl_addr); 4690 if(c->tcp_req_info) 4691 tcp_req_info_clear(c->tcp_req_info); 4692 if(c->h2_session) 4693 http2_session_server_delete(c->h2_session); 4694 /* stop the comm point from reading or writing after it is closed. */ 4695 if(c->tcp_more_read_again && *c->tcp_more_read_again) 4696 *c->tcp_more_read_again = 0; 4697 if(c->tcp_more_write_again && *c->tcp_more_write_again) 4698 *c->tcp_more_write_again = 0; 4699 4700 /* close fd after removing from event lists, or epoll.. is messed up */ 4701 if(c->fd != -1 && !c->do_not_close) { 4702 #ifdef USE_WINSOCK 4703 if(c->type == comm_tcp || c->type == comm_http) { 4704 /* delete sticky events for the fd, it gets closed */ 4705 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 4706 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 4707 } 4708 #endif 4709 verbose(VERB_ALGO, "close fd %d", c->fd); 4710 sock_close(c->fd); 4711 } 4712 c->fd = -1; 4713 } 4714 4715 void 4716 comm_point_delete(struct comm_point* c) 4717 { 4718 if(!c) 4719 return; 4720 if((c->type == comm_tcp || c->type == comm_http) && c->ssl) { 4721 #ifdef HAVE_SSL 4722 SSL_shutdown(c->ssl); 4723 SSL_free(c->ssl); 4724 #endif 4725 } 4726 if(c->type == comm_http && c->http_endpoint) { 4727 free(c->http_endpoint); 4728 c->http_endpoint = NULL; 4729 } 4730 comm_point_close(c); 4731 if(c->tcp_handlers) { 4732 int i; 4733 for(i=0; i<c->max_tcp_count; i++) 4734 comm_point_delete(c->tcp_handlers[i]); 4735 free(c->tcp_handlers); 4736 } 4737 free(c->timeout); 4738 if(c->type == comm_tcp || c->type == comm_local || c->type == comm_http) { 4739 sldns_buffer_free(c->buffer); 4740 #ifdef USE_DNSCRYPT 4741 if(c->dnscrypt && c->dnscrypt_buffer != c->buffer) { 4742 sldns_buffer_free(c->dnscrypt_buffer); 4743 } 4744 #endif 4745 if(c->tcp_req_info) { 4746 tcp_req_info_delete(c->tcp_req_info); 4747 } 4748 if(c->h2_session) { 4749 http2_session_delete(c->h2_session); 4750 } 4751 } 4752 ub_event_free(c->ev->ev); 4753 free(c->ev); 4754 free(c); 4755 } 4756 4757 void 4758 comm_point_send_reply(struct comm_reply *repinfo) 4759 { 4760 struct sldns_buffer* buffer; 4761 log_assert(repinfo && repinfo->c); 4762 #ifdef USE_DNSCRYPT 4763 buffer = repinfo->c->dnscrypt_buffer; 4764 if(!dnsc_handle_uncurved_request(repinfo)) { 4765 return; 4766 } 4767 #else 4768 buffer = repinfo->c->buffer; 4769 #endif 4770 if(repinfo->c->type == comm_udp) { 4771 if(repinfo->srctype) 4772 comm_point_send_udp_msg_if(repinfo->c, buffer, 4773 (struct sockaddr*)&repinfo->remote_addr, 4774 repinfo->remote_addrlen, repinfo); 4775 else 4776 comm_point_send_udp_msg(repinfo->c, buffer, 4777 (struct sockaddr*)&repinfo->remote_addr, 4778 repinfo->remote_addrlen, 0); 4779 #ifdef USE_DNSTAP 4780 /* 4781 * sending src (client)/dst (local service) addresses over DNSTAP from udp callback 4782 */ 4783 if(repinfo->c->dtenv != NULL && repinfo->c->dtenv->log_client_response_messages) { 4784 log_addr(VERB_ALGO, "from local addr", (void*)repinfo->c->socket->addr, repinfo->c->socket->addrlen); 4785 log_addr(VERB_ALGO, "response to client", &repinfo->client_addr, repinfo->client_addrlen); 4786 dt_msg_send_client_response(repinfo->c->dtenv, &repinfo->client_addr, (void*)repinfo->c->socket->addr, repinfo->c->type, repinfo->c->ssl, repinfo->c->buffer); 4787 } 4788 #endif 4789 } else { 4790 #ifdef USE_DNSTAP 4791 /* 4792 * sending src (client)/dst (local service) addresses over DNSTAP from TCP callback 4793 */ 4794 if(repinfo->c->tcp_parent->dtenv != NULL && repinfo->c->tcp_parent->dtenv->log_client_response_messages) { 4795 log_addr(VERB_ALGO, "from local addr", (void*)repinfo->c->socket->addr, repinfo->c->socket->addrlen); 4796 log_addr(VERB_ALGO, "response to client", &repinfo->client_addr, repinfo->client_addrlen); 4797 dt_msg_send_client_response(repinfo->c->tcp_parent->dtenv, &repinfo->client_addr, (void*)repinfo->c->socket->addr, repinfo->c->type, repinfo->c->ssl, 4798 ( repinfo->c->tcp_req_info? repinfo->c->tcp_req_info->spool_buffer: repinfo->c->buffer )); 4799 } 4800 #endif 4801 if(repinfo->c->tcp_req_info) { 4802 tcp_req_info_send_reply(repinfo->c->tcp_req_info); 4803 } else if(repinfo->c->use_h2) { 4804 if(!http2_submit_dns_response(repinfo->c->h2_session)) { 4805 comm_point_drop_reply(repinfo); 4806 return; 4807 } 4808 repinfo->c->h2_stream = NULL; 4809 repinfo->c->tcp_is_reading = 0; 4810 comm_point_stop_listening(repinfo->c); 4811 comm_point_start_listening(repinfo->c, -1, 4812 adjusted_tcp_timeout(repinfo->c)); 4813 return; 4814 } else { 4815 comm_point_start_listening(repinfo->c, -1, 4816 adjusted_tcp_timeout(repinfo->c)); 4817 } 4818 } 4819 } 4820 4821 void 4822 comm_point_drop_reply(struct comm_reply* repinfo) 4823 { 4824 if(!repinfo) 4825 return; 4826 log_assert(repinfo->c); 4827 log_assert(repinfo->c->type != comm_tcp_accept); 4828 if(repinfo->c->type == comm_udp) 4829 return; 4830 if(repinfo->c->tcp_req_info) 4831 repinfo->c->tcp_req_info->is_drop = 1; 4832 if(repinfo->c->type == comm_http) { 4833 if(repinfo->c->h2_session) { 4834 repinfo->c->h2_session->is_drop = 1; 4835 if(!repinfo->c->h2_session->postpone_drop) 4836 reclaim_http_handler(repinfo->c); 4837 return; 4838 } 4839 reclaim_http_handler(repinfo->c); 4840 return; 4841 } 4842 reclaim_tcp_handler(repinfo->c); 4843 } 4844 4845 void 4846 comm_point_stop_listening(struct comm_point* c) 4847 { 4848 verbose(VERB_ALGO, "comm point stop listening %d", c->fd); 4849 if(c->event_added) { 4850 if(ub_event_del(c->ev->ev) != 0) { 4851 log_err("event_del error to stoplisten"); 4852 } 4853 c->event_added = 0; 4854 } 4855 } 4856 4857 void 4858 comm_point_start_listening(struct comm_point* c, int newfd, int msec) 4859 { 4860 verbose(VERB_ALGO, "comm point start listening %d (%d msec)", 4861 c->fd==-1?newfd:c->fd, msec); 4862 if(c->type == comm_tcp_accept && !c->tcp_free) { 4863 /* no use to start listening no free slots. */ 4864 return; 4865 } 4866 if(c->event_added) { 4867 if(ub_event_del(c->ev->ev) != 0) { 4868 log_err("event_del error to startlisten"); 4869 } 4870 c->event_added = 0; 4871 } 4872 if(msec != -1 && msec != 0) { 4873 if(!c->timeout) { 4874 c->timeout = (struct timeval*)malloc(sizeof( 4875 struct timeval)); 4876 if(!c->timeout) { 4877 log_err("cpsl: malloc failed. No net read."); 4878 return; 4879 } 4880 } 4881 ub_event_add_bits(c->ev->ev, UB_EV_TIMEOUT); 4882 #ifndef S_SPLINT_S /* splint fails on struct timeval. */ 4883 c->timeout->tv_sec = msec/1000; 4884 c->timeout->tv_usec = (msec%1000)*1000; 4885 #endif /* S_SPLINT_S */ 4886 } else { 4887 if(msec == 0 || !c->timeout) { 4888 ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT); 4889 } 4890 } 4891 if(c->type == comm_tcp || c->type == comm_http) { 4892 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4893 if(c->tcp_write_and_read) { 4894 verbose(5, "startlistening %d mode rw", (newfd==-1?c->fd:newfd)); 4895 ub_event_add_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4896 } else if(c->tcp_is_reading) { 4897 verbose(5, "startlistening %d mode r", (newfd==-1?c->fd:newfd)); 4898 ub_event_add_bits(c->ev->ev, UB_EV_READ); 4899 } else { 4900 verbose(5, "startlistening %d mode w", (newfd==-1?c->fd:newfd)); 4901 ub_event_add_bits(c->ev->ev, UB_EV_WRITE); 4902 } 4903 } 4904 if(newfd != -1) { 4905 if(c->fd != -1 && c->fd != newfd) { 4906 verbose(5, "cpsl close of fd %d for %d", c->fd, newfd); 4907 sock_close(c->fd); 4908 } 4909 c->fd = newfd; 4910 ub_event_set_fd(c->ev->ev, c->fd); 4911 } 4912 if(ub_event_add(c->ev->ev, msec==0?NULL:c->timeout) != 0) { 4913 log_err("event_add failed. in cpsl."); 4914 return; 4915 } 4916 c->event_added = 1; 4917 } 4918 4919 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr) 4920 { 4921 verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr); 4922 if(c->event_added) { 4923 if(ub_event_del(c->ev->ev) != 0) { 4924 log_err("event_del error to cplf"); 4925 } 4926 c->event_added = 0; 4927 } 4928 if(!c->timeout) { 4929 ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT); 4930 } 4931 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4932 if(rd) ub_event_add_bits(c->ev->ev, UB_EV_READ); 4933 if(wr) ub_event_add_bits(c->ev->ev, UB_EV_WRITE); 4934 if(ub_event_add(c->ev->ev, c->timeout) != 0) { 4935 log_err("event_add failed. in cplf."); 4936 return; 4937 } 4938 c->event_added = 1; 4939 } 4940 4941 size_t comm_point_get_mem(struct comm_point* c) 4942 { 4943 size_t s; 4944 if(!c) 4945 return 0; 4946 s = sizeof(*c) + sizeof(*c->ev); 4947 if(c->timeout) 4948 s += sizeof(*c->timeout); 4949 if(c->type == comm_tcp || c->type == comm_local) { 4950 s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer); 4951 #ifdef USE_DNSCRYPT 4952 s += sizeof(*c->dnscrypt_buffer); 4953 if(c->buffer != c->dnscrypt_buffer) { 4954 s += sldns_buffer_capacity(c->dnscrypt_buffer); 4955 } 4956 #endif 4957 } 4958 if(c->type == comm_tcp_accept) { 4959 int i; 4960 for(i=0; i<c->max_tcp_count; i++) 4961 s += comm_point_get_mem(c->tcp_handlers[i]); 4962 } 4963 return s; 4964 } 4965 4966 struct comm_timer* 4967 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg) 4968 { 4969 struct internal_timer *tm = (struct internal_timer*)calloc(1, 4970 sizeof(struct internal_timer)); 4971 if(!tm) { 4972 log_err("malloc failed"); 4973 return NULL; 4974 } 4975 tm->super.ev_timer = tm; 4976 tm->base = base; 4977 tm->super.callback = cb; 4978 tm->super.cb_arg = cb_arg; 4979 tm->ev = ub_event_new(base->eb->base, -1, UB_EV_TIMEOUT, 4980 comm_timer_callback, &tm->super); 4981 if(tm->ev == NULL) { 4982 log_err("timer_create: event_base_set failed."); 4983 free(tm); 4984 return NULL; 4985 } 4986 return &tm->super; 4987 } 4988 4989 void 4990 comm_timer_disable(struct comm_timer* timer) 4991 { 4992 if(!timer) 4993 return; 4994 ub_timer_del(timer->ev_timer->ev); 4995 timer->ev_timer->enabled = 0; 4996 } 4997 4998 void 4999 comm_timer_set(struct comm_timer* timer, struct timeval* tv) 5000 { 5001 log_assert(tv); 5002 if(timer->ev_timer->enabled) 5003 comm_timer_disable(timer); 5004 if(ub_timer_add(timer->ev_timer->ev, timer->ev_timer->base->eb->base, 5005 comm_timer_callback, timer, tv) != 0) 5006 log_err("comm_timer_set: evtimer_add failed."); 5007 timer->ev_timer->enabled = 1; 5008 } 5009 5010 void 5011 comm_timer_delete(struct comm_timer* timer) 5012 { 5013 if(!timer) 5014 return; 5015 comm_timer_disable(timer); 5016 /* Free the sub struct timer->ev_timer derived from the super struct timer. 5017 * i.e. assert(timer == timer->ev_timer) 5018 */ 5019 ub_event_free(timer->ev_timer->ev); 5020 free(timer->ev_timer); 5021 } 5022 5023 void 5024 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg) 5025 { 5026 struct comm_timer* tm = (struct comm_timer*)arg; 5027 if(!(event&UB_EV_TIMEOUT)) 5028 return; 5029 ub_comm_base_now(tm->ev_timer->base); 5030 tm->ev_timer->enabled = 0; 5031 fptr_ok(fptr_whitelist_comm_timer(tm->callback)); 5032 (*tm->callback)(tm->cb_arg); 5033 } 5034 5035 int 5036 comm_timer_is_set(struct comm_timer* timer) 5037 { 5038 return (int)timer->ev_timer->enabled; 5039 } 5040 5041 size_t 5042 comm_timer_get_mem(struct comm_timer* ATTR_UNUSED(timer)) 5043 { 5044 return sizeof(struct internal_timer); 5045 } 5046 5047 struct comm_signal* 5048 comm_signal_create(struct comm_base* base, 5049 void (*callback)(int, void*), void* cb_arg) 5050 { 5051 struct comm_signal* com = (struct comm_signal*)malloc( 5052 sizeof(struct comm_signal)); 5053 if(!com) { 5054 log_err("malloc failed"); 5055 return NULL; 5056 } 5057 com->base = base; 5058 com->callback = callback; 5059 com->cb_arg = cb_arg; 5060 com->ev_signal = NULL; 5061 return com; 5062 } 5063 5064 void 5065 comm_signal_callback(int sig, short event, void* arg) 5066 { 5067 struct comm_signal* comsig = (struct comm_signal*)arg; 5068 if(!(event & UB_EV_SIGNAL)) 5069 return; 5070 ub_comm_base_now(comsig->base); 5071 fptr_ok(fptr_whitelist_comm_signal(comsig->callback)); 5072 (*comsig->callback)(sig, comsig->cb_arg); 5073 } 5074 5075 int 5076 comm_signal_bind(struct comm_signal* comsig, int sig) 5077 { 5078 struct internal_signal* entry = (struct internal_signal*)calloc(1, 5079 sizeof(struct internal_signal)); 5080 if(!entry) { 5081 log_err("malloc failed"); 5082 return 0; 5083 } 5084 log_assert(comsig); 5085 /* add signal event */ 5086 entry->ev = ub_signal_new(comsig->base->eb->base, sig, 5087 comm_signal_callback, comsig); 5088 if(entry->ev == NULL) { 5089 log_err("Could not create signal event"); 5090 free(entry); 5091 return 0; 5092 } 5093 if(ub_signal_add(entry->ev, NULL) != 0) { 5094 log_err("Could not add signal handler"); 5095 ub_event_free(entry->ev); 5096 free(entry); 5097 return 0; 5098 } 5099 /* link into list */ 5100 entry->next = comsig->ev_signal; 5101 comsig->ev_signal = entry; 5102 return 1; 5103 } 5104 5105 void 5106 comm_signal_delete(struct comm_signal* comsig) 5107 { 5108 struct internal_signal* p, *np; 5109 if(!comsig) 5110 return; 5111 p=comsig->ev_signal; 5112 while(p) { 5113 np = p->next; 5114 ub_signal_del(p->ev); 5115 ub_event_free(p->ev); 5116 free(p); 5117 p = np; 5118 } 5119 free(comsig); 5120 } 5121