1 /* 2 * util/netevent.c - event notification 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file contains event notification functions. 40 */ 41 #include "config.h" 42 #include "util/netevent.h" 43 #include "util/ub_event.h" 44 #include "util/log.h" 45 #include "util/net_help.h" 46 #include "util/tcp_conn_limit.h" 47 #include "util/fptr_wlist.h" 48 #include "sldns/pkthdr.h" 49 #include "sldns/sbuffer.h" 50 #include "sldns/str2wire.h" 51 #include "dnstap/dnstap.h" 52 #include "dnscrypt/dnscrypt.h" 53 #include "services/listen_dnsport.h" 54 #ifdef HAVE_SYS_TYPES_H 55 #include <sys/types.h> 56 #endif 57 #ifdef HAVE_SYS_SOCKET_H 58 #include <sys/socket.h> 59 #endif 60 #ifdef HAVE_NETDB_H 61 #include <netdb.h> 62 #endif 63 64 #ifdef HAVE_OPENSSL_SSL_H 65 #include <openssl/ssl.h> 66 #endif 67 #ifdef HAVE_OPENSSL_ERR_H 68 #include <openssl/err.h> 69 #endif 70 71 /* -------- Start of local definitions -------- */ 72 /** if CMSG_ALIGN is not defined on this platform, a workaround */ 73 #ifndef CMSG_ALIGN 74 # ifdef __CMSG_ALIGN 75 # define CMSG_ALIGN(n) __CMSG_ALIGN(n) 76 # elif defined(CMSG_DATA_ALIGN) 77 # define CMSG_ALIGN _CMSG_DATA_ALIGN 78 # else 79 # define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1)) 80 # endif 81 #endif 82 83 /** if CMSG_LEN is not defined on this platform, a workaround */ 84 #ifndef CMSG_LEN 85 # define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len)) 86 #endif 87 88 /** if CMSG_SPACE is not defined on this platform, a workaround */ 89 #ifndef CMSG_SPACE 90 # ifdef _CMSG_HDR_ALIGN 91 # define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr))) 92 # else 93 # define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr))) 94 # endif 95 #endif 96 97 /** The TCP writing query timeout in milliseconds */ 98 #define TCP_QUERY_TIMEOUT 120000 99 /** The minimum actual TCP timeout to use, regardless of what we advertise, 100 * in msec */ 101 #define TCP_QUERY_TIMEOUT_MINIMUM 200 102 103 #ifndef NONBLOCKING_IS_BROKEN 104 /** number of UDP reads to perform per read indication from select */ 105 #define NUM_UDP_PER_SELECT 100 106 #else 107 #define NUM_UDP_PER_SELECT 1 108 #endif 109 110 /** 111 * The internal event structure for keeping ub_event info for the event. 112 * Possibly other structures (list, tree) this is part of. 113 */ 114 struct internal_event { 115 /** the comm base */ 116 struct comm_base* base; 117 /** ub_event event type */ 118 struct ub_event* ev; 119 }; 120 121 /** 122 * Internal base structure, so that every thread has its own events. 123 */ 124 struct internal_base { 125 /** ub_event event_base type. */ 126 struct ub_event_base* base; 127 /** seconds time pointer points here */ 128 time_t secs; 129 /** timeval with current time */ 130 struct timeval now; 131 /** the event used for slow_accept timeouts */ 132 struct ub_event* slow_accept; 133 /** true if slow_accept is enabled */ 134 int slow_accept_enabled; 135 }; 136 137 /** 138 * Internal timer structure, to store timer event in. 139 */ 140 struct internal_timer { 141 /** the super struct from which derived */ 142 struct comm_timer super; 143 /** the comm base */ 144 struct comm_base* base; 145 /** ub_event event type */ 146 struct ub_event* ev; 147 /** is timer enabled */ 148 uint8_t enabled; 149 }; 150 151 /** 152 * Internal signal structure, to store signal event in. 153 */ 154 struct internal_signal { 155 /** ub_event event type */ 156 struct ub_event* ev; 157 /** next in signal list */ 158 struct internal_signal* next; 159 }; 160 161 /** create a tcp handler with a parent */ 162 static struct comm_point* comm_point_create_tcp_handler( 163 struct comm_base *base, struct comm_point* parent, size_t bufsize, 164 struct sldns_buffer* spoolbuf, comm_point_callback_type* callback, 165 void* callback_arg, struct unbound_socket* socket); 166 167 /* -------- End of local definitions -------- */ 168 169 struct comm_base* 170 comm_base_create(int sigs) 171 { 172 struct comm_base* b = (struct comm_base*)calloc(1, 173 sizeof(struct comm_base)); 174 const char *evnm="event", *evsys="", *evmethod=""; 175 176 if(!b) 177 return NULL; 178 b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base)); 179 if(!b->eb) { 180 free(b); 181 return NULL; 182 } 183 b->eb->base = ub_default_event_base(sigs, &b->eb->secs, &b->eb->now); 184 if(!b->eb->base) { 185 free(b->eb); 186 free(b); 187 return NULL; 188 } 189 ub_comm_base_now(b); 190 ub_get_event_sys(b->eb->base, &evnm, &evsys, &evmethod); 191 verbose(VERB_ALGO, "%s %s uses %s method.", evnm, evsys, evmethod); 192 return b; 193 } 194 195 struct comm_base* 196 comm_base_create_event(struct ub_event_base* base) 197 { 198 struct comm_base* b = (struct comm_base*)calloc(1, 199 sizeof(struct comm_base)); 200 if(!b) 201 return NULL; 202 b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base)); 203 if(!b->eb) { 204 free(b); 205 return NULL; 206 } 207 b->eb->base = base; 208 ub_comm_base_now(b); 209 return b; 210 } 211 212 void 213 comm_base_delete(struct comm_base* b) 214 { 215 if(!b) 216 return; 217 if(b->eb->slow_accept_enabled) { 218 if(ub_event_del(b->eb->slow_accept) != 0) { 219 log_err("could not event_del slow_accept"); 220 } 221 ub_event_free(b->eb->slow_accept); 222 } 223 ub_event_base_free(b->eb->base); 224 b->eb->base = NULL; 225 free(b->eb); 226 free(b); 227 } 228 229 void 230 comm_base_delete_no_base(struct comm_base* b) 231 { 232 if(!b) 233 return; 234 if(b->eb->slow_accept_enabled) { 235 if(ub_event_del(b->eb->slow_accept) != 0) { 236 log_err("could not event_del slow_accept"); 237 } 238 ub_event_free(b->eb->slow_accept); 239 } 240 b->eb->base = NULL; 241 free(b->eb); 242 free(b); 243 } 244 245 void 246 comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv) 247 { 248 *tt = &b->eb->secs; 249 *tv = &b->eb->now; 250 } 251 252 void 253 comm_base_dispatch(struct comm_base* b) 254 { 255 int retval; 256 retval = ub_event_base_dispatch(b->eb->base); 257 if(retval < 0) { 258 fatal_exit("event_dispatch returned error %d, " 259 "errno is %s", retval, strerror(errno)); 260 } 261 } 262 263 void comm_base_exit(struct comm_base* b) 264 { 265 if(ub_event_base_loopexit(b->eb->base) != 0) { 266 log_err("Could not loopexit"); 267 } 268 } 269 270 void comm_base_set_slow_accept_handlers(struct comm_base* b, 271 void (*stop_acc)(void*), void (*start_acc)(void*), void* arg) 272 { 273 b->stop_accept = stop_acc; 274 b->start_accept = start_acc; 275 b->cb_arg = arg; 276 } 277 278 struct ub_event_base* comm_base_internal(struct comm_base* b) 279 { 280 return b->eb->base; 281 } 282 283 /** see if errno for udp has to be logged or not uses globals */ 284 static int 285 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen) 286 { 287 /* do not log transient errors (unless high verbosity) */ 288 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN) 289 switch(errno) { 290 # ifdef ENETUNREACH 291 case ENETUNREACH: 292 # endif 293 # ifdef EHOSTDOWN 294 case EHOSTDOWN: 295 # endif 296 # ifdef EHOSTUNREACH 297 case EHOSTUNREACH: 298 # endif 299 # ifdef ENETDOWN 300 case ENETDOWN: 301 # endif 302 case EPERM: 303 case EACCES: 304 if(verbosity < VERB_ALGO) 305 return 0; 306 default: 307 break; 308 } 309 #endif 310 /* permission denied is gotten for every send if the 311 * network is disconnected (on some OS), squelch it */ 312 if( ((errno == EPERM) 313 # ifdef EADDRNOTAVAIL 314 /* 'Cannot assign requested address' also when disconnected */ 315 || (errno == EADDRNOTAVAIL) 316 # endif 317 ) && verbosity < VERB_ALGO) 318 return 0; 319 # ifdef EADDRINUSE 320 /* If SO_REUSEADDR is set, we could try to connect to the same server 321 * from the same source port twice. */ 322 if(errno == EADDRINUSE && verbosity < VERB_DETAIL) 323 return 0; 324 # endif 325 /* squelch errors where people deploy AAAA ::ffff:bla for 326 * authority servers, which we try for intranets. */ 327 if(errno == EINVAL && addr_is_ip4mapped( 328 (struct sockaddr_storage*)addr, addrlen) && 329 verbosity < VERB_DETAIL) 330 return 0; 331 /* SO_BROADCAST sockopt can give access to 255.255.255.255, 332 * but a dns cache does not need it. */ 333 if(errno == EACCES && addr_is_broadcast( 334 (struct sockaddr_storage*)addr, addrlen) && 335 verbosity < VERB_DETAIL) 336 return 0; 337 return 1; 338 } 339 340 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen) 341 { 342 return udp_send_errno_needs_log(addr, addrlen); 343 } 344 345 /* send a UDP reply */ 346 int 347 comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet, 348 struct sockaddr* addr, socklen_t addrlen, int is_connected) 349 { 350 ssize_t sent; 351 log_assert(c->fd != -1); 352 #ifdef UNBOUND_DEBUG 353 if(sldns_buffer_remaining(packet) == 0) 354 log_err("error: send empty UDP packet"); 355 #endif 356 log_assert(addr && addrlen > 0); 357 if(!is_connected) { 358 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 359 sldns_buffer_remaining(packet), 0, 360 addr, addrlen); 361 } else { 362 sent = send(c->fd, (void*)sldns_buffer_begin(packet), 363 sldns_buffer_remaining(packet), 0); 364 } 365 if(sent == -1) { 366 /* try again and block, waiting for IO to complete, 367 * we want to send the answer, and we will wait for 368 * the ethernet interface buffer to have space. */ 369 #ifndef USE_WINSOCK 370 if(errno == EAGAIN || 371 # ifdef EWOULDBLOCK 372 errno == EWOULDBLOCK || 373 # endif 374 errno == ENOBUFS) { 375 #else 376 if(WSAGetLastError() == WSAEINPROGRESS || 377 WSAGetLastError() == WSAENOBUFS || 378 WSAGetLastError() == WSAEWOULDBLOCK) { 379 #endif 380 int e; 381 fd_set_block(c->fd); 382 if (!is_connected) { 383 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 384 sldns_buffer_remaining(packet), 0, 385 addr, addrlen); 386 } else { 387 sent = send(c->fd, (void*)sldns_buffer_begin(packet), 388 sldns_buffer_remaining(packet), 0); 389 } 390 e = errno; 391 fd_set_nonblock(c->fd); 392 errno = e; 393 } 394 } 395 if(sent == -1) { 396 if(!udp_send_errno_needs_log(addr, addrlen)) 397 return 0; 398 if (!is_connected) { 399 verbose(VERB_OPS, "sendto failed: %s", sock_strerror(errno)); 400 } else { 401 verbose(VERB_OPS, "send failed: %s", sock_strerror(errno)); 402 } 403 if(addr) 404 log_addr(VERB_OPS, "remote address is", 405 (struct sockaddr_storage*)addr, addrlen); 406 return 0; 407 } else if((size_t)sent != sldns_buffer_remaining(packet)) { 408 log_err("sent %d in place of %d bytes", 409 (int)sent, (int)sldns_buffer_remaining(packet)); 410 return 0; 411 } 412 return 1; 413 } 414 415 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG)) 416 /** print debug ancillary info */ 417 static void p_ancil(const char* str, struct comm_reply* r) 418 { 419 if(r->srctype != 4 && r->srctype != 6) { 420 log_info("%s: unknown srctype %d", str, r->srctype); 421 return; 422 } 423 424 if(r->srctype == 6) { 425 #ifdef IPV6_PKTINFO 426 char buf[1024]; 427 if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr, 428 buf, (socklen_t)sizeof(buf)) == 0) { 429 (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf)); 430 } 431 buf[sizeof(buf)-1]=0; 432 log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex); 433 #endif 434 } else if(r->srctype == 4) { 435 #ifdef IP_PKTINFO 436 char buf1[1024], buf2[1024]; 437 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr, 438 buf1, (socklen_t)sizeof(buf1)) == 0) { 439 (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1)); 440 } 441 buf1[sizeof(buf1)-1]=0; 442 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST 443 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst, 444 buf2, (socklen_t)sizeof(buf2)) == 0) { 445 (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2)); 446 } 447 buf2[sizeof(buf2)-1]=0; 448 #else 449 buf2[0]=0; 450 #endif 451 log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex, 452 buf1, buf2); 453 #elif defined(IP_RECVDSTADDR) 454 char buf1[1024]; 455 if(inet_ntop(AF_INET, &r->pktinfo.v4addr, 456 buf1, (socklen_t)sizeof(buf1)) == 0) { 457 (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1)); 458 } 459 buf1[sizeof(buf1)-1]=0; 460 log_info("%s: %s", str, buf1); 461 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */ 462 } 463 } 464 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */ 465 466 /** send a UDP reply over specified interface*/ 467 static int 468 comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet, 469 struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r) 470 { 471 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG) 472 ssize_t sent; 473 struct msghdr msg; 474 struct iovec iov[1]; 475 union { 476 struct cmsghdr hdr; 477 char buf[256]; 478 } control; 479 #ifndef S_SPLINT_S 480 struct cmsghdr *cmsg; 481 #endif /* S_SPLINT_S */ 482 483 log_assert(c->fd != -1); 484 #ifdef UNBOUND_DEBUG 485 if(sldns_buffer_remaining(packet) == 0) 486 log_err("error: send empty UDP packet"); 487 #endif 488 log_assert(addr && addrlen > 0); 489 490 msg.msg_name = addr; 491 msg.msg_namelen = addrlen; 492 iov[0].iov_base = sldns_buffer_begin(packet); 493 iov[0].iov_len = sldns_buffer_remaining(packet); 494 msg.msg_iov = iov; 495 msg.msg_iovlen = 1; 496 msg.msg_control = control.buf; 497 #ifndef S_SPLINT_S 498 msg.msg_controllen = sizeof(control.buf); 499 #endif /* S_SPLINT_S */ 500 msg.msg_flags = 0; 501 502 #ifndef S_SPLINT_S 503 cmsg = CMSG_FIRSTHDR(&msg); 504 if(r->srctype == 4) { 505 #ifdef IP_PKTINFO 506 void* cmsg_data; 507 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo)); 508 log_assert(msg.msg_controllen <= sizeof(control.buf)); 509 cmsg->cmsg_level = IPPROTO_IP; 510 cmsg->cmsg_type = IP_PKTINFO; 511 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info, 512 sizeof(struct in_pktinfo)); 513 /* unset the ifindex to not bypass the routing tables */ 514 cmsg_data = CMSG_DATA(cmsg); 515 ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0; 516 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo)); 517 #elif defined(IP_SENDSRCADDR) 518 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr)); 519 log_assert(msg.msg_controllen <= sizeof(control.buf)); 520 cmsg->cmsg_level = IPPROTO_IP; 521 cmsg->cmsg_type = IP_SENDSRCADDR; 522 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr, 523 sizeof(struct in_addr)); 524 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr)); 525 #else 526 verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR"); 527 msg.msg_control = NULL; 528 #endif /* IP_PKTINFO or IP_SENDSRCADDR */ 529 } else if(r->srctype == 6) { 530 void* cmsg_data; 531 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); 532 log_assert(msg.msg_controllen <= sizeof(control.buf)); 533 cmsg->cmsg_level = IPPROTO_IPV6; 534 cmsg->cmsg_type = IPV6_PKTINFO; 535 memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info, 536 sizeof(struct in6_pktinfo)); 537 /* unset the ifindex to not bypass the routing tables */ 538 cmsg_data = CMSG_DATA(cmsg); 539 ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0; 540 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); 541 } else { 542 /* try to pass all 0 to use default route */ 543 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); 544 log_assert(msg.msg_controllen <= sizeof(control.buf)); 545 cmsg->cmsg_level = IPPROTO_IPV6; 546 cmsg->cmsg_type = IPV6_PKTINFO; 547 memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo)); 548 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); 549 } 550 #endif /* S_SPLINT_S */ 551 if(verbosity >= VERB_ALGO) 552 p_ancil("send_udp over interface", r); 553 sent = sendmsg(c->fd, &msg, 0); 554 if(sent == -1) { 555 /* try again and block, waiting for IO to complete, 556 * we want to send the answer, and we will wait for 557 * the ethernet interface buffer to have space. */ 558 #ifndef USE_WINSOCK 559 if(errno == EAGAIN || 560 # ifdef EWOULDBLOCK 561 errno == EWOULDBLOCK || 562 # endif 563 errno == ENOBUFS) { 564 #else 565 if(WSAGetLastError() == WSAEINPROGRESS || 566 WSAGetLastError() == WSAENOBUFS || 567 WSAGetLastError() == WSAEWOULDBLOCK) { 568 #endif 569 int e; 570 fd_set_block(c->fd); 571 sent = sendmsg(c->fd, &msg, 0); 572 e = errno; 573 fd_set_nonblock(c->fd); 574 errno = e; 575 } 576 } 577 if(sent == -1) { 578 if(!udp_send_errno_needs_log(addr, addrlen)) 579 return 0; 580 verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno)); 581 log_addr(VERB_OPS, "remote address is", 582 (struct sockaddr_storage*)addr, addrlen); 583 #ifdef __NetBSD__ 584 /* netbsd 7 has IP_PKTINFO for recv but not send */ 585 if(errno == EINVAL && r->srctype == 4) 586 log_err("sendmsg: No support for sendmsg(IP_PKTINFO). " 587 "Please disable interface-automatic"); 588 #endif 589 return 0; 590 } else if((size_t)sent != sldns_buffer_remaining(packet)) { 591 log_err("sent %d in place of %d bytes", 592 (int)sent, (int)sldns_buffer_remaining(packet)); 593 return 0; 594 } 595 return 1; 596 #else 597 (void)c; 598 (void)packet; 599 (void)addr; 600 (void)addrlen; 601 (void)r; 602 log_err("sendmsg: IPV6_PKTINFO not supported"); 603 return 0; 604 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */ 605 } 606 607 /** return true is UDP receive error needs to be logged */ 608 static int udp_recv_needs_log(int err) 609 { 610 switch(err) { 611 case EACCES: /* some hosts send ICMP 'Permission Denied' */ 612 #ifndef USE_WINSOCK 613 case ECONNREFUSED: 614 # ifdef ENETUNREACH 615 case ENETUNREACH: 616 # endif 617 # ifdef EHOSTDOWN 618 case EHOSTDOWN: 619 # endif 620 # ifdef EHOSTUNREACH 621 case EHOSTUNREACH: 622 # endif 623 # ifdef ENETDOWN 624 case ENETDOWN: 625 # endif 626 #else /* USE_WINSOCK */ 627 case WSAECONNREFUSED: 628 case WSAENETUNREACH: 629 case WSAEHOSTDOWN: 630 case WSAEHOSTUNREACH: 631 case WSAENETDOWN: 632 #endif 633 if(verbosity >= VERB_ALGO) 634 return 1; 635 return 0; 636 default: 637 break; 638 } 639 return 1; 640 } 641 642 void 643 comm_point_udp_ancil_callback(int fd, short event, void* arg) 644 { 645 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG) 646 struct comm_reply rep; 647 struct msghdr msg; 648 struct iovec iov[1]; 649 ssize_t rcv; 650 union { 651 struct cmsghdr hdr; 652 char buf[256]; 653 } ancil; 654 int i; 655 #ifndef S_SPLINT_S 656 struct cmsghdr* cmsg; 657 #endif /* S_SPLINT_S */ 658 659 rep.c = (struct comm_point*)arg; 660 log_assert(rep.c->type == comm_udp); 661 662 if(!(event&UB_EV_READ)) 663 return; 664 log_assert(rep.c && rep.c->buffer && rep.c->fd == fd); 665 ub_comm_base_now(rep.c->ev->base); 666 for(i=0; i<NUM_UDP_PER_SELECT; i++) { 667 sldns_buffer_clear(rep.c->buffer); 668 rep.addrlen = (socklen_t)sizeof(rep.addr); 669 log_assert(fd != -1); 670 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0); 671 msg.msg_name = &rep.addr; 672 msg.msg_namelen = (socklen_t)sizeof(rep.addr); 673 iov[0].iov_base = sldns_buffer_begin(rep.c->buffer); 674 iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer); 675 msg.msg_iov = iov; 676 msg.msg_iovlen = 1; 677 msg.msg_control = ancil.buf; 678 #ifndef S_SPLINT_S 679 msg.msg_controllen = sizeof(ancil.buf); 680 #endif /* S_SPLINT_S */ 681 msg.msg_flags = 0; 682 rcv = recvmsg(fd, &msg, 0); 683 if(rcv == -1) { 684 if(errno != EAGAIN && errno != EINTR 685 && udp_recv_needs_log(errno)) { 686 log_err("recvmsg failed: %s", strerror(errno)); 687 } 688 return; 689 } 690 rep.addrlen = msg.msg_namelen; 691 sldns_buffer_skip(rep.c->buffer, rcv); 692 sldns_buffer_flip(rep.c->buffer); 693 rep.srctype = 0; 694 #ifndef S_SPLINT_S 695 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; 696 cmsg = CMSG_NXTHDR(&msg, cmsg)) { 697 if( cmsg->cmsg_level == IPPROTO_IPV6 && 698 cmsg->cmsg_type == IPV6_PKTINFO) { 699 rep.srctype = 6; 700 memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg), 701 sizeof(struct in6_pktinfo)); 702 break; 703 #ifdef IP_PKTINFO 704 } else if( cmsg->cmsg_level == IPPROTO_IP && 705 cmsg->cmsg_type == IP_PKTINFO) { 706 rep.srctype = 4; 707 memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg), 708 sizeof(struct in_pktinfo)); 709 break; 710 #elif defined(IP_RECVDSTADDR) 711 } else if( cmsg->cmsg_level == IPPROTO_IP && 712 cmsg->cmsg_type == IP_RECVDSTADDR) { 713 rep.srctype = 4; 714 memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg), 715 sizeof(struct in_addr)); 716 break; 717 #endif /* IP_PKTINFO or IP_RECVDSTADDR */ 718 } 719 } 720 if(verbosity >= VERB_ALGO) 721 p_ancil("receive_udp on interface", &rep); 722 #endif /* S_SPLINT_S */ 723 fptr_ok(fptr_whitelist_comm_point(rep.c->callback)); 724 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) { 725 /* send back immediate reply */ 726 (void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer, 727 (struct sockaddr*)&rep.addr, rep.addrlen, &rep); 728 } 729 if(!rep.c || rep.c->fd == -1) /* commpoint closed */ 730 break; 731 } 732 #else 733 (void)fd; 734 (void)event; 735 (void)arg; 736 fatal_exit("recvmsg: No support for IPV6_PKTINFO; IP_PKTINFO or IP_RECVDSTADDR. " 737 "Please disable interface-automatic"); 738 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */ 739 } 740 741 void 742 comm_point_udp_callback(int fd, short event, void* arg) 743 { 744 struct comm_reply rep; 745 ssize_t rcv; 746 int i; 747 struct sldns_buffer *buffer; 748 749 rep.c = (struct comm_point*)arg; 750 log_assert(rep.c->type == comm_udp); 751 752 if(!(event&UB_EV_READ)) 753 return; 754 log_assert(rep.c && rep.c->buffer && rep.c->fd == fd); 755 ub_comm_base_now(rep.c->ev->base); 756 for(i=0; i<NUM_UDP_PER_SELECT; i++) { 757 sldns_buffer_clear(rep.c->buffer); 758 rep.addrlen = (socklen_t)sizeof(rep.addr); 759 log_assert(fd != -1); 760 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0); 761 rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer), 762 sldns_buffer_remaining(rep.c->buffer), 0, 763 (struct sockaddr*)&rep.addr, &rep.addrlen); 764 if(rcv == -1) { 765 #ifndef USE_WINSOCK 766 if(errno != EAGAIN && errno != EINTR 767 && udp_recv_needs_log(errno)) 768 log_err("recvfrom %d failed: %s", 769 fd, strerror(errno)); 770 #else 771 if(WSAGetLastError() != WSAEINPROGRESS && 772 WSAGetLastError() != WSAECONNRESET && 773 WSAGetLastError()!= WSAEWOULDBLOCK && 774 udp_recv_needs_log(WSAGetLastError())) 775 log_err("recvfrom failed: %s", 776 wsa_strerror(WSAGetLastError())); 777 #endif 778 return; 779 } 780 sldns_buffer_skip(rep.c->buffer, rcv); 781 sldns_buffer_flip(rep.c->buffer); 782 rep.srctype = 0; 783 fptr_ok(fptr_whitelist_comm_point(rep.c->callback)); 784 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) { 785 /* send back immediate reply */ 786 #ifdef USE_DNSCRYPT 787 buffer = rep.c->dnscrypt_buffer; 788 #else 789 buffer = rep.c->buffer; 790 #endif 791 (void)comm_point_send_udp_msg(rep.c, buffer, 792 (struct sockaddr*)&rep.addr, rep.addrlen, 0); 793 } 794 if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for 795 another UDP port. Note rep.c cannot be reused with TCP fd. */ 796 break; 797 } 798 } 799 800 int adjusted_tcp_timeout(struct comm_point* c) 801 { 802 if(c->tcp_timeout_msec < TCP_QUERY_TIMEOUT_MINIMUM) 803 return TCP_QUERY_TIMEOUT_MINIMUM; 804 return c->tcp_timeout_msec; 805 } 806 807 /** Use a new tcp handler for new query fd, set to read query */ 808 static void 809 setup_tcp_handler(struct comm_point* c, int fd, int cur, int max) 810 { 811 int handler_usage; 812 log_assert(c->type == comm_tcp || c->type == comm_http); 813 log_assert(c->fd == -1); 814 sldns_buffer_clear(c->buffer); 815 #ifdef USE_DNSCRYPT 816 if (c->dnscrypt) 817 sldns_buffer_clear(c->dnscrypt_buffer); 818 #endif 819 c->tcp_is_reading = 1; 820 c->tcp_byte_count = 0; 821 c->tcp_keepalive = 0; 822 /* if more than half the tcp handlers are in use, use a shorter 823 * timeout for this TCP connection, we need to make space for 824 * other connections to be able to get attention */ 825 /* If > 50% TCP handler structures in use, set timeout to 1/100th 826 * configured value. 827 * If > 65%TCP handler structures in use, set to 1/500th configured 828 * value. 829 * If > 80% TCP handler structures in use, set to 0. 830 * 831 * If the timeout to use falls below 200 milliseconds, an actual 832 * timeout of 200ms is used. 833 */ 834 handler_usage = (cur * 100) / max; 835 if(handler_usage > 50 && handler_usage <= 65) 836 c->tcp_timeout_msec /= 100; 837 else if (handler_usage > 65 && handler_usage <= 80) 838 c->tcp_timeout_msec /= 500; 839 else if (handler_usage > 80) 840 c->tcp_timeout_msec = 0; 841 comm_point_start_listening(c, fd, adjusted_tcp_timeout(c)); 842 } 843 844 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd), 845 short ATTR_UNUSED(event), void* arg) 846 { 847 struct comm_base* b = (struct comm_base*)arg; 848 /* timeout for the slow accept, re-enable accepts again */ 849 if(b->start_accept) { 850 verbose(VERB_ALGO, "wait is over, slow accept disabled"); 851 fptr_ok(fptr_whitelist_start_accept(b->start_accept)); 852 (*b->start_accept)(b->cb_arg); 853 b->eb->slow_accept_enabled = 0; 854 } 855 } 856 857 int comm_point_perform_accept(struct comm_point* c, 858 struct sockaddr_storage* addr, socklen_t* addrlen) 859 { 860 int new_fd; 861 *addrlen = (socklen_t)sizeof(*addr); 862 #ifndef HAVE_ACCEPT4 863 new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen); 864 #else 865 /* SOCK_NONBLOCK saves extra calls to fcntl for the same result */ 866 new_fd = accept4(c->fd, (struct sockaddr*)addr, addrlen, SOCK_NONBLOCK); 867 #endif 868 if(new_fd == -1) { 869 #ifndef USE_WINSOCK 870 /* EINTR is signal interrupt. others are closed connection. */ 871 if( errno == EINTR || errno == EAGAIN 872 #ifdef EWOULDBLOCK 873 || errno == EWOULDBLOCK 874 #endif 875 #ifdef ECONNABORTED 876 || errno == ECONNABORTED 877 #endif 878 #ifdef EPROTO 879 || errno == EPROTO 880 #endif /* EPROTO */ 881 ) 882 return -1; 883 #if defined(ENFILE) && defined(EMFILE) 884 if(errno == ENFILE || errno == EMFILE) { 885 /* out of file descriptors, likely outside of our 886 * control. stop accept() calls for some time */ 887 if(c->ev->base->stop_accept) { 888 struct comm_base* b = c->ev->base; 889 struct timeval tv; 890 verbose(VERB_ALGO, "out of file descriptors: " 891 "slow accept"); 892 b->eb->slow_accept_enabled = 1; 893 fptr_ok(fptr_whitelist_stop_accept( 894 b->stop_accept)); 895 (*b->stop_accept)(b->cb_arg); 896 /* set timeout, no mallocs */ 897 tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000; 898 tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000; 899 b->eb->slow_accept = ub_event_new(b->eb->base, 900 -1, UB_EV_TIMEOUT, 901 comm_base_handle_slow_accept, b); 902 if(b->eb->slow_accept == NULL) { 903 /* we do not want to log here, because 904 * that would spam the logfiles. 905 * error: "event_base_set failed." */ 906 } 907 else if(ub_event_add(b->eb->slow_accept, &tv) 908 != 0) { 909 /* we do not want to log here, 910 * error: "event_add failed." */ 911 } 912 } 913 return -1; 914 } 915 #endif 916 #else /* USE_WINSOCK */ 917 if(WSAGetLastError() == WSAEINPROGRESS || 918 WSAGetLastError() == WSAECONNRESET) 919 return -1; 920 if(WSAGetLastError() == WSAEWOULDBLOCK) { 921 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 922 return -1; 923 } 924 #endif 925 log_err_addr("accept failed", sock_strerror(errno), addr, 926 *addrlen); 927 return -1; 928 } 929 if(c->tcp_conn_limit && c->type == comm_tcp_accept) { 930 c->tcl_addr = tcl_addr_lookup(c->tcp_conn_limit, addr, *addrlen); 931 if(!tcl_new_connection(c->tcl_addr)) { 932 if(verbosity >= 3) 933 log_err_addr("accept rejected", 934 "connection limit exceeded", addr, *addrlen); 935 close(new_fd); 936 return -1; 937 } 938 } 939 #ifndef HAVE_ACCEPT4 940 fd_set_nonblock(new_fd); 941 #endif 942 return new_fd; 943 } 944 945 #ifdef USE_WINSOCK 946 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp), 947 #ifdef HAVE_BIO_SET_CALLBACK_EX 948 size_t ATTR_UNUSED(len), 949 #endif 950 int ATTR_UNUSED(argi), long argl, 951 #ifndef HAVE_BIO_SET_CALLBACK_EX 952 long retvalue 953 #else 954 int retvalue, size_t* ATTR_UNUSED(processed) 955 #endif 956 ) 957 { 958 int wsa_err = WSAGetLastError(); /* store errcode before it is gone */ 959 verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper, 960 (oper&BIO_CB_RETURN)?"return":"before", 961 (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"), 962 wsa_err==WSAEWOULDBLOCK?"wsawb":""); 963 /* on windows, check if previous operation caused EWOULDBLOCK */ 964 if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) || 965 (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) { 966 if(wsa_err == WSAEWOULDBLOCK) 967 ub_winsock_tcp_wouldblock((struct ub_event*) 968 BIO_get_callback_arg(b), UB_EV_READ); 969 } 970 if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) || 971 (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) { 972 if(wsa_err == WSAEWOULDBLOCK) 973 ub_winsock_tcp_wouldblock((struct ub_event*) 974 BIO_get_callback_arg(b), UB_EV_WRITE); 975 } 976 /* return original return value */ 977 return retvalue; 978 } 979 980 /** set win bio callbacks for nonblocking operations */ 981 void 982 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl) 983 { 984 SSL* ssl = (SSL*)thessl; 985 /* set them both just in case, but usually they are the same BIO */ 986 #ifdef HAVE_BIO_SET_CALLBACK_EX 987 BIO_set_callback_ex(SSL_get_rbio(ssl), &win_bio_cb); 988 #else 989 BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb); 990 #endif 991 BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)c->ev->ev); 992 #ifdef HAVE_BIO_SET_CALLBACK_EX 993 BIO_set_callback_ex(SSL_get_wbio(ssl), &win_bio_cb); 994 #else 995 BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb); 996 #endif 997 BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)c->ev->ev); 998 } 999 #endif 1000 1001 #ifdef HAVE_NGHTTP2 1002 /** Create http2 session server. Per connection, after TCP accepted.*/ 1003 static int http2_session_server_create(struct http2_session* h2_session) 1004 { 1005 log_assert(h2_session->callbacks); 1006 h2_session->is_drop = 0; 1007 if(nghttp2_session_server_new(&h2_session->session, 1008 h2_session->callbacks, 1009 h2_session) == NGHTTP2_ERR_NOMEM) { 1010 log_err("failed to create nghttp2 session server"); 1011 return 0; 1012 } 1013 1014 return 1; 1015 } 1016 1017 /** Submit http2 setting to session. Once per session. */ 1018 static int http2_submit_settings(struct http2_session* h2_session) 1019 { 1020 int ret; 1021 nghttp2_settings_entry settings[1] = { 1022 {NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, 1023 h2_session->c->http2_max_streams}}; 1024 1025 ret = nghttp2_submit_settings(h2_session->session, NGHTTP2_FLAG_NONE, 1026 settings, 1); 1027 if(ret) { 1028 verbose(VERB_QUERY, "http2: submit_settings failed, " 1029 "error: %s", nghttp2_strerror(ret)); 1030 return 0; 1031 } 1032 return 1; 1033 } 1034 #endif /* HAVE_NGHTTP2 */ 1035 1036 1037 void 1038 comm_point_tcp_accept_callback(int fd, short event, void* arg) 1039 { 1040 struct comm_point* c = (struct comm_point*)arg, *c_hdl; 1041 int new_fd; 1042 log_assert(c->type == comm_tcp_accept); 1043 if(!(event & UB_EV_READ)) { 1044 log_info("ignoring tcp accept event %d", (int)event); 1045 return; 1046 } 1047 ub_comm_base_now(c->ev->base); 1048 /* find free tcp handler. */ 1049 if(!c->tcp_free) { 1050 log_warn("accepted too many tcp, connections full"); 1051 return; 1052 } 1053 /* accept incoming connection. */ 1054 c_hdl = c->tcp_free; 1055 /* clear leftover flags from previous use, and then set the 1056 * correct event base for the event structure for libevent */ 1057 ub_event_free(c_hdl->ev->ev); 1058 c_hdl->ev->ev = NULL; 1059 if((c_hdl->type == comm_tcp && c_hdl->tcp_req_info) || 1060 c_hdl->type == comm_local || c_hdl->type == comm_raw) 1061 c_hdl->tcp_do_toggle_rw = 0; 1062 else c_hdl->tcp_do_toggle_rw = 1; 1063 1064 if(c_hdl->type == comm_http) { 1065 #ifdef HAVE_NGHTTP2 1066 if(!c_hdl->h2_session || 1067 !http2_session_server_create(c_hdl->h2_session)) { 1068 log_warn("failed to create nghttp2"); 1069 return; 1070 } 1071 if(!c_hdl->h2_session || 1072 !http2_submit_settings(c_hdl->h2_session)) { 1073 log_warn("failed to submit http2 settings"); 1074 return; 1075 } 1076 if(!c->ssl) { 1077 c_hdl->tcp_do_toggle_rw = 0; 1078 c_hdl->use_h2 = 1; 1079 } 1080 #endif 1081 c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1, 1082 UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT, 1083 comm_point_http_handle_callback, c_hdl); 1084 } else { 1085 c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1, 1086 UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT, 1087 comm_point_tcp_handle_callback, c_hdl); 1088 } 1089 if(!c_hdl->ev->ev) { 1090 log_warn("could not ub_event_new, dropped tcp"); 1091 return; 1092 } 1093 log_assert(fd != -1); 1094 (void)fd; 1095 new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.addr, 1096 &c_hdl->repinfo.addrlen); 1097 if(new_fd == -1) 1098 return; 1099 if(c->ssl) { 1100 c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd); 1101 if(!c_hdl->ssl) { 1102 c_hdl->fd = new_fd; 1103 comm_point_close(c_hdl); 1104 return; 1105 } 1106 c_hdl->ssl_shake_state = comm_ssl_shake_read; 1107 #ifdef USE_WINSOCK 1108 comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl); 1109 #endif 1110 } 1111 1112 /* grab the tcp handler buffers */ 1113 c->cur_tcp_count++; 1114 c->tcp_free = c_hdl->tcp_free; 1115 c_hdl->tcp_free = NULL; 1116 if(!c->tcp_free) { 1117 /* stop accepting incoming queries for now. */ 1118 comm_point_stop_listening(c); 1119 } 1120 setup_tcp_handler(c_hdl, new_fd, c->cur_tcp_count, c->max_tcp_count); 1121 } 1122 1123 /** Make tcp handler free for next assignment */ 1124 static void 1125 reclaim_tcp_handler(struct comm_point* c) 1126 { 1127 log_assert(c->type == comm_tcp); 1128 if(c->ssl) { 1129 #ifdef HAVE_SSL 1130 SSL_shutdown(c->ssl); 1131 SSL_free(c->ssl); 1132 c->ssl = NULL; 1133 #endif 1134 } 1135 comm_point_close(c); 1136 if(c->tcp_parent) { 1137 if(c != c->tcp_parent->tcp_free) { 1138 c->tcp_parent->cur_tcp_count--; 1139 c->tcp_free = c->tcp_parent->tcp_free; 1140 c->tcp_parent->tcp_free = c; 1141 } 1142 if(!c->tcp_free) { 1143 /* re-enable listening on accept socket */ 1144 comm_point_start_listening(c->tcp_parent, -1, -1); 1145 } 1146 } 1147 c->tcp_more_read_again = NULL; 1148 c->tcp_more_write_again = NULL; 1149 c->tcp_byte_count = 0; 1150 sldns_buffer_clear(c->buffer); 1151 } 1152 1153 /** do the callback when writing is done */ 1154 static void 1155 tcp_callback_writer(struct comm_point* c) 1156 { 1157 log_assert(c->type == comm_tcp); 1158 if(!c->tcp_write_and_read) { 1159 sldns_buffer_clear(c->buffer); 1160 c->tcp_byte_count = 0; 1161 } 1162 if(c->tcp_do_toggle_rw) 1163 c->tcp_is_reading = 1; 1164 /* switch from listening(write) to listening(read) */ 1165 if(c->tcp_req_info) { 1166 tcp_req_info_handle_writedone(c->tcp_req_info); 1167 } else { 1168 comm_point_stop_listening(c); 1169 if(c->tcp_write_and_read) { 1170 fptr_ok(fptr_whitelist_comm_point(c->callback)); 1171 if( (*c->callback)(c, c->cb_arg, NETEVENT_PKT_WRITTEN, 1172 &c->repinfo) ) { 1173 comm_point_start_listening(c, -1, 1174 adjusted_tcp_timeout(c)); 1175 } 1176 } else { 1177 comm_point_start_listening(c, -1, 1178 adjusted_tcp_timeout(c)); 1179 } 1180 } 1181 } 1182 1183 /** do the callback when reading is done */ 1184 static void 1185 tcp_callback_reader(struct comm_point* c) 1186 { 1187 log_assert(c->type == comm_tcp || c->type == comm_local); 1188 sldns_buffer_flip(c->buffer); 1189 if(c->tcp_do_toggle_rw) 1190 c->tcp_is_reading = 0; 1191 c->tcp_byte_count = 0; 1192 if(c->tcp_req_info) { 1193 tcp_req_info_handle_readdone(c->tcp_req_info); 1194 } else { 1195 if(c->type == comm_tcp) 1196 comm_point_stop_listening(c); 1197 fptr_ok(fptr_whitelist_comm_point(c->callback)); 1198 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) { 1199 comm_point_start_listening(c, -1, 1200 adjusted_tcp_timeout(c)); 1201 } 1202 } 1203 } 1204 1205 #ifdef HAVE_SSL 1206 /** true if the ssl handshake error has to be squelched from the logs */ 1207 int 1208 squelch_err_ssl_handshake(unsigned long err) 1209 { 1210 if(verbosity >= VERB_QUERY) 1211 return 0; /* only squelch on low verbosity */ 1212 /* this is very specific, we could filter on ERR_GET_REASON() 1213 * (the third element in ERR_PACK) */ 1214 if(err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTPS_PROXY_REQUEST) || 1215 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTP_REQUEST) || 1216 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_WRONG_VERSION_NUMBER) || 1217 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_READ_BYTES, SSL_R_SSLV3_ALERT_BAD_CERTIFICATE) 1218 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO 1219 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_POST_PROCESS_CLIENT_HELLO, SSL_R_NO_SHARED_CIPHER) 1220 #endif 1221 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO 1222 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNKNOWN_PROTOCOL) 1223 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNSUPPORTED_PROTOCOL) 1224 # ifdef SSL_R_VERSION_TOO_LOW 1225 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_VERSION_TOO_LOW) 1226 # endif 1227 #endif 1228 ) 1229 return 1; 1230 return 0; 1231 } 1232 #endif /* HAVE_SSL */ 1233 1234 /** continue ssl handshake */ 1235 #ifdef HAVE_SSL 1236 static int 1237 ssl_handshake(struct comm_point* c) 1238 { 1239 int r; 1240 if(c->ssl_shake_state == comm_ssl_shake_hs_read) { 1241 /* read condition satisfied back to writing */ 1242 comm_point_listen_for_rw(c, 0, 1); 1243 c->ssl_shake_state = comm_ssl_shake_none; 1244 return 1; 1245 } 1246 if(c->ssl_shake_state == comm_ssl_shake_hs_write) { 1247 /* write condition satisfied, back to reading */ 1248 comm_point_listen_for_rw(c, 1, 0); 1249 c->ssl_shake_state = comm_ssl_shake_none; 1250 return 1; 1251 } 1252 1253 ERR_clear_error(); 1254 r = SSL_do_handshake(c->ssl); 1255 if(r != 1) { 1256 int want = SSL_get_error(c->ssl, r); 1257 if(want == SSL_ERROR_WANT_READ) { 1258 if(c->ssl_shake_state == comm_ssl_shake_read) 1259 return 1; 1260 c->ssl_shake_state = comm_ssl_shake_read; 1261 comm_point_listen_for_rw(c, 1, 0); 1262 return 1; 1263 } else if(want == SSL_ERROR_WANT_WRITE) { 1264 if(c->ssl_shake_state == comm_ssl_shake_write) 1265 return 1; 1266 c->ssl_shake_state = comm_ssl_shake_write; 1267 comm_point_listen_for_rw(c, 0, 1); 1268 return 1; 1269 } else if(r == 0) { 1270 return 0; /* closed */ 1271 } else if(want == SSL_ERROR_SYSCALL) { 1272 /* SYSCALL and errno==0 means closed uncleanly */ 1273 #ifdef EPIPE 1274 if(errno == EPIPE && verbosity < 2) 1275 return 0; /* silence 'broken pipe' */ 1276 #endif 1277 #ifdef ECONNRESET 1278 if(errno == ECONNRESET && verbosity < 2) 1279 return 0; /* silence reset by peer */ 1280 #endif 1281 if(errno != 0) 1282 log_err("SSL_handshake syscall: %s", 1283 strerror(errno)); 1284 return 0; 1285 } else { 1286 unsigned long err = ERR_get_error(); 1287 if(!squelch_err_ssl_handshake(err)) { 1288 log_crypto_err_code("ssl handshake failed", err); 1289 log_addr(VERB_OPS, "ssl handshake failed", &c->repinfo.addr, 1290 c->repinfo.addrlen); 1291 } 1292 return 0; 1293 } 1294 } 1295 /* this is where peer verification could take place */ 1296 if((SSL_get_verify_mode(c->ssl)&SSL_VERIFY_PEER)) { 1297 /* verification */ 1298 if(SSL_get_verify_result(c->ssl) == X509_V_OK) { 1299 #ifdef HAVE_SSL_GET1_PEER_CERTIFICATE 1300 X509* x = SSL_get1_peer_certificate(c->ssl); 1301 #else 1302 X509* x = SSL_get_peer_certificate(c->ssl); 1303 #endif 1304 if(!x) { 1305 log_addr(VERB_ALGO, "SSL connection failed: " 1306 "no certificate", 1307 &c->repinfo.addr, c->repinfo.addrlen); 1308 return 0; 1309 } 1310 log_cert(VERB_ALGO, "peer certificate", x); 1311 #ifdef HAVE_SSL_GET0_PEERNAME 1312 if(SSL_get0_peername(c->ssl)) { 1313 char buf[255]; 1314 snprintf(buf, sizeof(buf), "SSL connection " 1315 "to %s authenticated", 1316 SSL_get0_peername(c->ssl)); 1317 log_addr(VERB_ALGO, buf, &c->repinfo.addr, 1318 c->repinfo.addrlen); 1319 } else { 1320 #endif 1321 log_addr(VERB_ALGO, "SSL connection " 1322 "authenticated", &c->repinfo.addr, 1323 c->repinfo.addrlen); 1324 #ifdef HAVE_SSL_GET0_PEERNAME 1325 } 1326 #endif 1327 X509_free(x); 1328 } else { 1329 #ifdef HAVE_SSL_GET1_PEER_CERTIFICATE 1330 X509* x = SSL_get1_peer_certificate(c->ssl); 1331 #else 1332 X509* x = SSL_get_peer_certificate(c->ssl); 1333 #endif 1334 if(x) { 1335 log_cert(VERB_ALGO, "peer certificate", x); 1336 X509_free(x); 1337 } 1338 log_addr(VERB_ALGO, "SSL connection failed: " 1339 "failed to authenticate", 1340 &c->repinfo.addr, c->repinfo.addrlen); 1341 return 0; 1342 } 1343 } else { 1344 /* unauthenticated, the verify peer flag was not set 1345 * in c->ssl when the ssl object was created from ssl_ctx */ 1346 log_addr(VERB_ALGO, "SSL connection", &c->repinfo.addr, 1347 c->repinfo.addrlen); 1348 } 1349 1350 #ifdef HAVE_SSL_GET0_ALPN_SELECTED 1351 /* check if http2 use is negotiated */ 1352 if(c->type == comm_http && c->h2_session) { 1353 const unsigned char *alpn; 1354 unsigned int alpnlen = 0; 1355 SSL_get0_alpn_selected(c->ssl, &alpn, &alpnlen); 1356 if(alpnlen == 2 && memcmp("h2", alpn, 2) == 0) { 1357 /* connection upgraded to HTTP2 */ 1358 c->tcp_do_toggle_rw = 0; 1359 c->use_h2 = 1; 1360 } 1361 } 1362 #endif 1363 1364 /* setup listen rw correctly */ 1365 if(c->tcp_is_reading) { 1366 if(c->ssl_shake_state != comm_ssl_shake_read) 1367 comm_point_listen_for_rw(c, 1, 0); 1368 } else { 1369 comm_point_listen_for_rw(c, 0, 1); 1370 } 1371 c->ssl_shake_state = comm_ssl_shake_none; 1372 return 1; 1373 } 1374 #endif /* HAVE_SSL */ 1375 1376 /** ssl read callback on TCP */ 1377 static int 1378 ssl_handle_read(struct comm_point* c) 1379 { 1380 #ifdef HAVE_SSL 1381 int r; 1382 if(c->ssl_shake_state != comm_ssl_shake_none) { 1383 if(!ssl_handshake(c)) 1384 return 0; 1385 if(c->ssl_shake_state != comm_ssl_shake_none) 1386 return 1; 1387 } 1388 if(c->tcp_byte_count < sizeof(uint16_t)) { 1389 /* read length bytes */ 1390 ERR_clear_error(); 1391 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer, 1392 c->tcp_byte_count), (int)(sizeof(uint16_t) - 1393 c->tcp_byte_count))) <= 0) { 1394 int want = SSL_get_error(c->ssl, r); 1395 if(want == SSL_ERROR_ZERO_RETURN) { 1396 if(c->tcp_req_info) 1397 return tcp_req_info_handle_read_close(c->tcp_req_info); 1398 return 0; /* shutdown, closed */ 1399 } else if(want == SSL_ERROR_WANT_READ) { 1400 #ifdef USE_WINSOCK 1401 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1402 #endif 1403 return 1; /* read more later */ 1404 } else if(want == SSL_ERROR_WANT_WRITE) { 1405 c->ssl_shake_state = comm_ssl_shake_hs_write; 1406 comm_point_listen_for_rw(c, 0, 1); 1407 return 1; 1408 } else if(want == SSL_ERROR_SYSCALL) { 1409 #ifdef ECONNRESET 1410 if(errno == ECONNRESET && verbosity < 2) 1411 return 0; /* silence reset by peer */ 1412 #endif 1413 if(errno != 0) 1414 log_err("SSL_read syscall: %s", 1415 strerror(errno)); 1416 return 0; 1417 } 1418 log_crypto_err("could not SSL_read"); 1419 return 0; 1420 } 1421 c->tcp_byte_count += r; 1422 if(c->tcp_byte_count < sizeof(uint16_t)) 1423 return 1; 1424 if(sldns_buffer_read_u16_at(c->buffer, 0) > 1425 sldns_buffer_capacity(c->buffer)) { 1426 verbose(VERB_QUERY, "ssl: dropped larger than buffer"); 1427 return 0; 1428 } 1429 sldns_buffer_set_limit(c->buffer, 1430 sldns_buffer_read_u16_at(c->buffer, 0)); 1431 if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) { 1432 verbose(VERB_QUERY, "ssl: dropped bogus too short."); 1433 return 0; 1434 } 1435 sldns_buffer_skip(c->buffer, (ssize_t)(c->tcp_byte_count-sizeof(uint16_t))); 1436 verbose(VERB_ALGO, "Reading ssl tcp query of length %d", 1437 (int)sldns_buffer_limit(c->buffer)); 1438 } 1439 if(sldns_buffer_remaining(c->buffer) > 0) { 1440 ERR_clear_error(); 1441 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer), 1442 (int)sldns_buffer_remaining(c->buffer)); 1443 if(r <= 0) { 1444 int want = SSL_get_error(c->ssl, r); 1445 if(want == SSL_ERROR_ZERO_RETURN) { 1446 if(c->tcp_req_info) 1447 return tcp_req_info_handle_read_close(c->tcp_req_info); 1448 return 0; /* shutdown, closed */ 1449 } else if(want == SSL_ERROR_WANT_READ) { 1450 #ifdef USE_WINSOCK 1451 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1452 #endif 1453 return 1; /* read more later */ 1454 } else if(want == SSL_ERROR_WANT_WRITE) { 1455 c->ssl_shake_state = comm_ssl_shake_hs_write; 1456 comm_point_listen_for_rw(c, 0, 1); 1457 return 1; 1458 } else if(want == SSL_ERROR_SYSCALL) { 1459 #ifdef ECONNRESET 1460 if(errno == ECONNRESET && verbosity < 2) 1461 return 0; /* silence reset by peer */ 1462 #endif 1463 if(errno != 0) 1464 log_err("SSL_read syscall: %s", 1465 strerror(errno)); 1466 return 0; 1467 } 1468 log_crypto_err("could not SSL_read"); 1469 return 0; 1470 } 1471 sldns_buffer_skip(c->buffer, (ssize_t)r); 1472 } 1473 if(sldns_buffer_remaining(c->buffer) <= 0) { 1474 tcp_callback_reader(c); 1475 } 1476 return 1; 1477 #else 1478 (void)c; 1479 return 0; 1480 #endif /* HAVE_SSL */ 1481 } 1482 1483 /** ssl write callback on TCP */ 1484 static int 1485 ssl_handle_write(struct comm_point* c) 1486 { 1487 #ifdef HAVE_SSL 1488 int r; 1489 if(c->ssl_shake_state != comm_ssl_shake_none) { 1490 if(!ssl_handshake(c)) 1491 return 0; 1492 if(c->ssl_shake_state != comm_ssl_shake_none) 1493 return 1; 1494 } 1495 /* ignore return, if fails we may simply block */ 1496 (void)SSL_set_mode(c->ssl, (long)SSL_MODE_ENABLE_PARTIAL_WRITE); 1497 if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) { 1498 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(c->buffer)); 1499 ERR_clear_error(); 1500 if(c->tcp_write_and_read) { 1501 if(c->tcp_write_pkt_len + 2 < LDNS_RR_BUF_SIZE) { 1502 /* combine the tcp length and the query for 1503 * write, this emulates writev */ 1504 uint8_t buf[LDNS_RR_BUF_SIZE]; 1505 memmove(buf, &len, sizeof(uint16_t)); 1506 memmove(buf+sizeof(uint16_t), 1507 c->tcp_write_pkt, 1508 c->tcp_write_pkt_len); 1509 r = SSL_write(c->ssl, 1510 (void*)(buf+c->tcp_write_byte_count), 1511 c->tcp_write_pkt_len + 2 - 1512 c->tcp_write_byte_count); 1513 } else { 1514 r = SSL_write(c->ssl, 1515 (void*)(((uint8_t*)&len)+c->tcp_write_byte_count), 1516 (int)(sizeof(uint16_t)-c->tcp_write_byte_count)); 1517 } 1518 } else if(sizeof(uint16_t)+sldns_buffer_remaining(c->buffer) < 1519 LDNS_RR_BUF_SIZE) { 1520 /* combine the tcp length and the query for write, 1521 * this emulates writev */ 1522 uint8_t buf[LDNS_RR_BUF_SIZE]; 1523 memmove(buf, &len, sizeof(uint16_t)); 1524 memmove(buf+sizeof(uint16_t), 1525 sldns_buffer_current(c->buffer), 1526 sldns_buffer_remaining(c->buffer)); 1527 r = SSL_write(c->ssl, (void*)(buf+c->tcp_byte_count), 1528 (int)(sizeof(uint16_t)+ 1529 sldns_buffer_remaining(c->buffer) 1530 - c->tcp_byte_count)); 1531 } else { 1532 r = SSL_write(c->ssl, 1533 (void*)(((uint8_t*)&len)+c->tcp_byte_count), 1534 (int)(sizeof(uint16_t)-c->tcp_byte_count)); 1535 } 1536 if(r <= 0) { 1537 int want = SSL_get_error(c->ssl, r); 1538 if(want == SSL_ERROR_ZERO_RETURN) { 1539 return 0; /* closed */ 1540 } else if(want == SSL_ERROR_WANT_READ) { 1541 c->ssl_shake_state = comm_ssl_shake_hs_read; 1542 comm_point_listen_for_rw(c, 1, 0); 1543 return 1; /* wait for read condition */ 1544 } else if(want == SSL_ERROR_WANT_WRITE) { 1545 #ifdef USE_WINSOCK 1546 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 1547 #endif 1548 return 1; /* write more later */ 1549 } else if(want == SSL_ERROR_SYSCALL) { 1550 #ifdef EPIPE 1551 if(errno == EPIPE && verbosity < 2) 1552 return 0; /* silence 'broken pipe' */ 1553 #endif 1554 if(errno != 0) 1555 log_err("SSL_write syscall: %s", 1556 strerror(errno)); 1557 return 0; 1558 } 1559 log_crypto_err("could not SSL_write"); 1560 return 0; 1561 } 1562 if(c->tcp_write_and_read) { 1563 c->tcp_write_byte_count += r; 1564 if(c->tcp_write_byte_count < sizeof(uint16_t)) 1565 return 1; 1566 } else { 1567 c->tcp_byte_count += r; 1568 if(c->tcp_byte_count < sizeof(uint16_t)) 1569 return 1; 1570 sldns_buffer_set_position(c->buffer, c->tcp_byte_count - 1571 sizeof(uint16_t)); 1572 } 1573 if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 1574 tcp_callback_writer(c); 1575 return 1; 1576 } 1577 } 1578 log_assert(c->tcp_write_and_read || sldns_buffer_remaining(c->buffer) > 0); 1579 log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2); 1580 ERR_clear_error(); 1581 if(c->tcp_write_and_read) { 1582 r = SSL_write(c->ssl, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2), 1583 (int)(c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count)); 1584 } else { 1585 r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer), 1586 (int)sldns_buffer_remaining(c->buffer)); 1587 } 1588 if(r <= 0) { 1589 int want = SSL_get_error(c->ssl, r); 1590 if(want == SSL_ERROR_ZERO_RETURN) { 1591 return 0; /* closed */ 1592 } else if(want == SSL_ERROR_WANT_READ) { 1593 c->ssl_shake_state = comm_ssl_shake_hs_read; 1594 comm_point_listen_for_rw(c, 1, 0); 1595 return 1; /* wait for read condition */ 1596 } else if(want == SSL_ERROR_WANT_WRITE) { 1597 #ifdef USE_WINSOCK 1598 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 1599 #endif 1600 return 1; /* write more later */ 1601 } else if(want == SSL_ERROR_SYSCALL) { 1602 #ifdef EPIPE 1603 if(errno == EPIPE && verbosity < 2) 1604 return 0; /* silence 'broken pipe' */ 1605 #endif 1606 if(errno != 0) 1607 log_err("SSL_write syscall: %s", 1608 strerror(errno)); 1609 return 0; 1610 } 1611 log_crypto_err("could not SSL_write"); 1612 return 0; 1613 } 1614 if(c->tcp_write_and_read) { 1615 c->tcp_write_byte_count += r; 1616 } else { 1617 sldns_buffer_skip(c->buffer, (ssize_t)r); 1618 } 1619 1620 if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 1621 tcp_callback_writer(c); 1622 } 1623 return 1; 1624 #else 1625 (void)c; 1626 return 0; 1627 #endif /* HAVE_SSL */ 1628 } 1629 1630 /** handle ssl tcp connection with dns contents */ 1631 static int 1632 ssl_handle_it(struct comm_point* c, int is_write) 1633 { 1634 /* handle case where renegotiation wants read during write call 1635 * or write during read calls */ 1636 if(is_write && c->ssl_shake_state == comm_ssl_shake_hs_write) 1637 return ssl_handle_read(c); 1638 else if(!is_write && c->ssl_shake_state == comm_ssl_shake_hs_read) 1639 return ssl_handle_write(c); 1640 /* handle read events for read operation and write events for a 1641 * write operation */ 1642 else if(!is_write) 1643 return ssl_handle_read(c); 1644 return ssl_handle_write(c); 1645 } 1646 1647 /** Handle tcp reading callback. 1648 * @param fd: file descriptor of socket. 1649 * @param c: comm point to read from into buffer. 1650 * @param short_ok: if true, very short packets are OK (for comm_local). 1651 * @return: 0 on error 1652 */ 1653 static int 1654 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok) 1655 { 1656 ssize_t r; 1657 log_assert(c->type == comm_tcp || c->type == comm_local); 1658 if(c->ssl) 1659 return ssl_handle_it(c, 0); 1660 if(!c->tcp_is_reading && !c->tcp_write_and_read) 1661 return 0; 1662 1663 log_assert(fd != -1); 1664 if(c->tcp_byte_count < sizeof(uint16_t)) { 1665 /* read length bytes */ 1666 r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count), 1667 sizeof(uint16_t)-c->tcp_byte_count, 0); 1668 if(r == 0) { 1669 if(c->tcp_req_info) 1670 return tcp_req_info_handle_read_close(c->tcp_req_info); 1671 return 0; 1672 } else if(r == -1) { 1673 #ifndef USE_WINSOCK 1674 if(errno == EINTR || errno == EAGAIN) 1675 return 1; 1676 #ifdef ECONNRESET 1677 if(errno == ECONNRESET && verbosity < 2) 1678 return 0; /* silence reset by peer */ 1679 #endif 1680 #ifdef ECONNREFUSED 1681 if(errno == ECONNREFUSED && verbosity < 2) 1682 return 0; /* silence reset by peer */ 1683 #endif 1684 #ifdef ENETUNREACH 1685 if(errno == ENETUNREACH && verbosity < 2) 1686 return 0; /* silence it */ 1687 #endif 1688 #ifdef EHOSTDOWN 1689 if(errno == EHOSTDOWN && verbosity < 2) 1690 return 0; /* silence it */ 1691 #endif 1692 #ifdef EHOSTUNREACH 1693 if(errno == EHOSTUNREACH && verbosity < 2) 1694 return 0; /* silence it */ 1695 #endif 1696 #ifdef ENETDOWN 1697 if(errno == ENETDOWN && verbosity < 2) 1698 return 0; /* silence it */ 1699 #endif 1700 #ifdef EACCES 1701 if(errno == EACCES && verbosity < 2) 1702 return 0; /* silence it */ 1703 #endif 1704 #ifdef ENOTCONN 1705 if(errno == ENOTCONN) { 1706 log_err_addr("read (in tcp s) failed and this could be because TCP Fast Open is enabled [--disable-tfo-client --disable-tfo-server] but does not work", sock_strerror(errno), 1707 &c->repinfo.addr, c->repinfo.addrlen); 1708 return 0; 1709 } 1710 #endif 1711 #else /* USE_WINSOCK */ 1712 if(WSAGetLastError() == WSAECONNREFUSED && verbosity < 2) 1713 return 0; 1714 if(WSAGetLastError() == WSAEHOSTDOWN && verbosity < 2) 1715 return 0; 1716 if(WSAGetLastError() == WSAEHOSTUNREACH && verbosity < 2) 1717 return 0; 1718 if(WSAGetLastError() == WSAENETDOWN && verbosity < 2) 1719 return 0; 1720 if(WSAGetLastError() == WSAENETUNREACH && verbosity < 2) 1721 return 0; 1722 if(WSAGetLastError() == WSAECONNRESET) 1723 return 0; 1724 if(WSAGetLastError() == WSAEINPROGRESS) 1725 return 1; 1726 if(WSAGetLastError() == WSAEWOULDBLOCK) { 1727 ub_winsock_tcp_wouldblock(c->ev->ev, 1728 UB_EV_READ); 1729 return 1; 1730 } 1731 #endif 1732 log_err_addr("read (in tcp s)", sock_strerror(errno), 1733 &c->repinfo.addr, c->repinfo.addrlen); 1734 return 0; 1735 } 1736 c->tcp_byte_count += r; 1737 if(c->tcp_byte_count != sizeof(uint16_t)) 1738 return 1; 1739 if(sldns_buffer_read_u16_at(c->buffer, 0) > 1740 sldns_buffer_capacity(c->buffer)) { 1741 verbose(VERB_QUERY, "tcp: dropped larger than buffer"); 1742 return 0; 1743 } 1744 sldns_buffer_set_limit(c->buffer, 1745 sldns_buffer_read_u16_at(c->buffer, 0)); 1746 if(!short_ok && 1747 sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) { 1748 verbose(VERB_QUERY, "tcp: dropped bogus too short."); 1749 return 0; 1750 } 1751 verbose(VERB_ALGO, "Reading tcp query of length %d", 1752 (int)sldns_buffer_limit(c->buffer)); 1753 } 1754 1755 if(sldns_buffer_remaining(c->buffer) == 0) 1756 log_err("in comm_point_tcp_handle_read buffer_remaining is not > 0 as expected, continuing with (harmless) 0 length recv"); 1757 r = recv(fd, (void*)sldns_buffer_current(c->buffer), 1758 sldns_buffer_remaining(c->buffer), 0); 1759 if(r == 0) { 1760 if(c->tcp_req_info) 1761 return tcp_req_info_handle_read_close(c->tcp_req_info); 1762 return 0; 1763 } else if(r == -1) { 1764 #ifndef USE_WINSOCK 1765 if(errno == EINTR || errno == EAGAIN) 1766 return 1; 1767 #else /* USE_WINSOCK */ 1768 if(WSAGetLastError() == WSAECONNRESET) 1769 return 0; 1770 if(WSAGetLastError() == WSAEINPROGRESS) 1771 return 1; 1772 if(WSAGetLastError() == WSAEWOULDBLOCK) { 1773 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1774 return 1; 1775 } 1776 #endif 1777 log_err_addr("read (in tcp r)", sock_strerror(errno), 1778 &c->repinfo.addr, c->repinfo.addrlen); 1779 return 0; 1780 } 1781 sldns_buffer_skip(c->buffer, r); 1782 if(sldns_buffer_remaining(c->buffer) <= 0) { 1783 tcp_callback_reader(c); 1784 } 1785 return 1; 1786 } 1787 1788 /** 1789 * Handle tcp writing callback. 1790 * @param fd: file descriptor of socket. 1791 * @param c: comm point to write buffer out of. 1792 * @return: 0 on error 1793 */ 1794 static int 1795 comm_point_tcp_handle_write(int fd, struct comm_point* c) 1796 { 1797 ssize_t r; 1798 struct sldns_buffer *buffer; 1799 log_assert(c->type == comm_tcp); 1800 #ifdef USE_DNSCRYPT 1801 buffer = c->dnscrypt_buffer; 1802 #else 1803 buffer = c->buffer; 1804 #endif 1805 if(c->tcp_is_reading && !c->ssl && !c->tcp_write_and_read) 1806 return 0; 1807 log_assert(fd != -1); 1808 if(((!c->tcp_write_and_read && c->tcp_byte_count == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == 0)) && c->tcp_check_nb_connect) { 1809 /* check for pending error from nonblocking connect */ 1810 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/ 1811 int error = 0; 1812 socklen_t len = (socklen_t)sizeof(error); 1813 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 1814 &len) < 0){ 1815 #ifndef USE_WINSOCK 1816 error = errno; /* on solaris errno is error */ 1817 #else /* USE_WINSOCK */ 1818 error = WSAGetLastError(); 1819 #endif 1820 } 1821 #ifndef USE_WINSOCK 1822 #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 1823 if(error == EINPROGRESS || error == EWOULDBLOCK) 1824 return 1; /* try again later */ 1825 else 1826 #endif 1827 if(error != 0 && verbosity < 2) 1828 return 0; /* silence lots of chatter in the logs */ 1829 else if(error != 0) { 1830 log_err_addr("tcp connect", strerror(error), 1831 &c->repinfo.addr, c->repinfo.addrlen); 1832 #else /* USE_WINSOCK */ 1833 /* examine error */ 1834 if(error == WSAEINPROGRESS) 1835 return 1; 1836 else if(error == WSAEWOULDBLOCK) { 1837 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 1838 return 1; 1839 } else if(error != 0 && verbosity < 2) 1840 return 0; 1841 else if(error != 0) { 1842 log_err_addr("tcp connect", wsa_strerror(error), 1843 &c->repinfo.addr, c->repinfo.addrlen); 1844 #endif /* USE_WINSOCK */ 1845 return 0; 1846 } 1847 } 1848 if(c->ssl) 1849 return ssl_handle_it(c, 1); 1850 1851 #ifdef USE_MSG_FASTOPEN 1852 /* Only try this on first use of a connection that uses tfo, 1853 otherwise fall through to normal write */ 1854 /* Also, TFO support on WINDOWS not implemented at the moment */ 1855 if(c->tcp_do_fastopen == 1) { 1856 /* this form of sendmsg() does both a connect() and send() so need to 1857 look for various flavours of error*/ 1858 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer)); 1859 struct msghdr msg; 1860 struct iovec iov[2]; 1861 c->tcp_do_fastopen = 0; 1862 memset(&msg, 0, sizeof(msg)); 1863 if(c->tcp_write_and_read) { 1864 iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count; 1865 iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count; 1866 iov[1].iov_base = c->tcp_write_pkt; 1867 iov[1].iov_len = c->tcp_write_pkt_len; 1868 } else { 1869 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count; 1870 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count; 1871 iov[1].iov_base = sldns_buffer_begin(buffer); 1872 iov[1].iov_len = sldns_buffer_limit(buffer); 1873 } 1874 log_assert(iov[0].iov_len > 0); 1875 msg.msg_name = &c->repinfo.addr; 1876 msg.msg_namelen = c->repinfo.addrlen; 1877 msg.msg_iov = iov; 1878 msg.msg_iovlen = 2; 1879 r = sendmsg(fd, &msg, MSG_FASTOPEN); 1880 if (r == -1) { 1881 #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 1882 /* Handshake is underway, maybe because no TFO cookie available. 1883 Come back to write the message*/ 1884 if(errno == EINPROGRESS || errno == EWOULDBLOCK) 1885 return 1; 1886 #endif 1887 if(errno == EINTR || errno == EAGAIN) 1888 return 1; 1889 /* Not handling EISCONN here as shouldn't ever hit that case.*/ 1890 if(errno != EPIPE 1891 #ifdef EOPNOTSUPP 1892 /* if /proc/sys/net/ipv4/tcp_fastopen is 1893 * disabled on Linux, sendmsg may return 1894 * 'Operation not supported', if so 1895 * fallthrough to ordinary connect. */ 1896 && errno != EOPNOTSUPP 1897 #endif 1898 && errno != 0) { 1899 if(verbosity < 2) 1900 return 0; /* silence lots of chatter in the logs */ 1901 log_err_addr("tcp sendmsg", strerror(errno), 1902 &c->repinfo.addr, c->repinfo.addrlen); 1903 return 0; 1904 } 1905 verbose(VERB_ALGO, "tcp sendmsg for fastopen failed (with %s), try normal connect", strerror(errno)); 1906 /* fallthrough to nonFASTOPEN 1907 * (MSG_FASTOPEN on Linux 3 produces EPIPE) 1908 * we need to perform connect() */ 1909 if(connect(fd, (struct sockaddr *)&c->repinfo.addr, c->repinfo.addrlen) == -1) { 1910 #ifdef EINPROGRESS 1911 if(errno == EINPROGRESS) 1912 return 1; /* wait until connect done*/ 1913 #endif 1914 #ifdef USE_WINSOCK 1915 if(WSAGetLastError() == WSAEINPROGRESS || 1916 WSAGetLastError() == WSAEWOULDBLOCK) 1917 return 1; /* wait until connect done*/ 1918 #endif 1919 if(tcp_connect_errno_needs_log( 1920 (struct sockaddr *)&c->repinfo.addr, c->repinfo.addrlen)) { 1921 log_err_addr("outgoing tcp: connect after EPIPE for fastopen", 1922 strerror(errno), &c->repinfo.addr, c->repinfo.addrlen); 1923 } 1924 return 0; 1925 } 1926 1927 } else { 1928 if(c->tcp_write_and_read) { 1929 c->tcp_write_byte_count += r; 1930 if(c->tcp_write_byte_count < sizeof(uint16_t)) 1931 return 1; 1932 } else { 1933 c->tcp_byte_count += r; 1934 if(c->tcp_byte_count < sizeof(uint16_t)) 1935 return 1; 1936 sldns_buffer_set_position(buffer, c->tcp_byte_count - 1937 sizeof(uint16_t)); 1938 } 1939 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 1940 tcp_callback_writer(c); 1941 return 1; 1942 } 1943 } 1944 } 1945 #endif /* USE_MSG_FASTOPEN */ 1946 1947 if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) { 1948 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer)); 1949 #ifdef HAVE_WRITEV 1950 struct iovec iov[2]; 1951 if(c->tcp_write_and_read) { 1952 iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count; 1953 iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count; 1954 iov[1].iov_base = c->tcp_write_pkt; 1955 iov[1].iov_len = c->tcp_write_pkt_len; 1956 } else { 1957 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count; 1958 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count; 1959 iov[1].iov_base = sldns_buffer_begin(buffer); 1960 iov[1].iov_len = sldns_buffer_limit(buffer); 1961 } 1962 log_assert(iov[0].iov_len > 0); 1963 r = writev(fd, iov, 2); 1964 #else /* HAVE_WRITEV */ 1965 if(c->tcp_write_and_read) { 1966 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_write_byte_count), 1967 sizeof(uint16_t)-c->tcp_write_byte_count, 0); 1968 } else { 1969 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count), 1970 sizeof(uint16_t)-c->tcp_byte_count, 0); 1971 } 1972 #endif /* HAVE_WRITEV */ 1973 if(r == -1) { 1974 #ifndef USE_WINSOCK 1975 # ifdef EPIPE 1976 if(errno == EPIPE && verbosity < 2) 1977 return 0; /* silence 'broken pipe' */ 1978 #endif 1979 if(errno == EINTR || errno == EAGAIN) 1980 return 1; 1981 #ifdef ECONNRESET 1982 if(errno == ECONNRESET && verbosity < 2) 1983 return 0; /* silence reset by peer */ 1984 #endif 1985 # ifdef HAVE_WRITEV 1986 log_err_addr("tcp writev", strerror(errno), 1987 &c->repinfo.addr, c->repinfo.addrlen); 1988 # else /* HAVE_WRITEV */ 1989 log_err_addr("tcp send s", strerror(errno), 1990 &c->repinfo.addr, c->repinfo.addrlen); 1991 # endif /* HAVE_WRITEV */ 1992 #else 1993 if(WSAGetLastError() == WSAENOTCONN) 1994 return 1; 1995 if(WSAGetLastError() == WSAEINPROGRESS) 1996 return 1; 1997 if(WSAGetLastError() == WSAEWOULDBLOCK) { 1998 ub_winsock_tcp_wouldblock(c->ev->ev, 1999 UB_EV_WRITE); 2000 return 1; 2001 } 2002 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 2003 return 0; /* silence reset by peer */ 2004 log_err_addr("tcp send s", 2005 wsa_strerror(WSAGetLastError()), 2006 &c->repinfo.addr, c->repinfo.addrlen); 2007 #endif 2008 return 0; 2009 } 2010 if(c->tcp_write_and_read) { 2011 c->tcp_write_byte_count += r; 2012 if(c->tcp_write_byte_count < sizeof(uint16_t)) 2013 return 1; 2014 } else { 2015 c->tcp_byte_count += r; 2016 if(c->tcp_byte_count < sizeof(uint16_t)) 2017 return 1; 2018 sldns_buffer_set_position(buffer, c->tcp_byte_count - 2019 sizeof(uint16_t)); 2020 } 2021 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 2022 tcp_callback_writer(c); 2023 return 1; 2024 } 2025 } 2026 log_assert(c->tcp_write_and_read || sldns_buffer_remaining(buffer) > 0); 2027 log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2); 2028 if(c->tcp_write_and_read) { 2029 r = send(fd, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2), 2030 c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count, 0); 2031 } else { 2032 r = send(fd, (void*)sldns_buffer_current(buffer), 2033 sldns_buffer_remaining(buffer), 0); 2034 } 2035 if(r == -1) { 2036 #ifndef USE_WINSOCK 2037 if(errno == EINTR || errno == EAGAIN) 2038 return 1; 2039 #ifdef ECONNRESET 2040 if(errno == ECONNRESET && verbosity < 2) 2041 return 0; /* silence reset by peer */ 2042 #endif 2043 #else 2044 if(WSAGetLastError() == WSAEINPROGRESS) 2045 return 1; 2046 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2047 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 2048 return 1; 2049 } 2050 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 2051 return 0; /* silence reset by peer */ 2052 #endif 2053 log_err_addr("tcp send r", sock_strerror(errno), 2054 &c->repinfo.addr, c->repinfo.addrlen); 2055 return 0; 2056 } 2057 if(c->tcp_write_and_read) { 2058 c->tcp_write_byte_count += r; 2059 } else { 2060 sldns_buffer_skip(buffer, r); 2061 } 2062 2063 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 2064 tcp_callback_writer(c); 2065 } 2066 2067 return 1; 2068 } 2069 2070 /** read again to drain buffers when there could be more to read */ 2071 static void 2072 tcp_req_info_read_again(int fd, struct comm_point* c) 2073 { 2074 while(c->tcp_req_info->read_again) { 2075 int r; 2076 c->tcp_req_info->read_again = 0; 2077 if(c->tcp_is_reading) 2078 r = comm_point_tcp_handle_read(fd, c, 0); 2079 else r = comm_point_tcp_handle_write(fd, c); 2080 if(!r) { 2081 reclaim_tcp_handler(c); 2082 if(!c->tcp_do_close) { 2083 fptr_ok(fptr_whitelist_comm_point( 2084 c->callback)); 2085 (void)(*c->callback)(c, c->cb_arg, 2086 NETEVENT_CLOSED, NULL); 2087 } 2088 return; 2089 } 2090 } 2091 } 2092 2093 /** read again to drain buffers when there could be more to read */ 2094 static void 2095 tcp_more_read_again(int fd, struct comm_point* c) 2096 { 2097 /* if the packet is done, but another one could be waiting on 2098 * the connection, the callback signals this, and we try again */ 2099 /* this continues until the read routines get EAGAIN or so, 2100 * and thus does not call the callback, and the bool is 0 */ 2101 int* moreread = c->tcp_more_read_again; 2102 while(moreread && *moreread) { 2103 *moreread = 0; 2104 if(!comm_point_tcp_handle_read(fd, c, 0)) { 2105 reclaim_tcp_handler(c); 2106 if(!c->tcp_do_close) { 2107 fptr_ok(fptr_whitelist_comm_point( 2108 c->callback)); 2109 (void)(*c->callback)(c, c->cb_arg, 2110 NETEVENT_CLOSED, NULL); 2111 } 2112 return; 2113 } 2114 } 2115 } 2116 2117 /** write again to fill up when there could be more to write */ 2118 static void 2119 tcp_more_write_again(int fd, struct comm_point* c) 2120 { 2121 /* if the packet is done, but another is waiting to be written, 2122 * the callback signals it and we try again. */ 2123 /* this continues until the write routines get EAGAIN or so, 2124 * and thus does not call the callback, and the bool is 0 */ 2125 int* morewrite = c->tcp_more_write_again; 2126 while(morewrite && *morewrite) { 2127 *morewrite = 0; 2128 if(!comm_point_tcp_handle_write(fd, c)) { 2129 reclaim_tcp_handler(c); 2130 if(!c->tcp_do_close) { 2131 fptr_ok(fptr_whitelist_comm_point( 2132 c->callback)); 2133 (void)(*c->callback)(c, c->cb_arg, 2134 NETEVENT_CLOSED, NULL); 2135 } 2136 return; 2137 } 2138 } 2139 } 2140 2141 void 2142 comm_point_tcp_handle_callback(int fd, short event, void* arg) 2143 { 2144 struct comm_point* c = (struct comm_point*)arg; 2145 log_assert(c->type == comm_tcp); 2146 ub_comm_base_now(c->ev->base); 2147 2148 #ifdef USE_DNSCRYPT 2149 /* Initialize if this is a dnscrypt socket */ 2150 if(c->tcp_parent) { 2151 c->dnscrypt = c->tcp_parent->dnscrypt; 2152 } 2153 if(c->dnscrypt && c->dnscrypt_buffer == c->buffer) { 2154 c->dnscrypt_buffer = sldns_buffer_new(sldns_buffer_capacity(c->buffer)); 2155 if(!c->dnscrypt_buffer) { 2156 log_err("Could not allocate dnscrypt buffer"); 2157 reclaim_tcp_handler(c); 2158 if(!c->tcp_do_close) { 2159 fptr_ok(fptr_whitelist_comm_point( 2160 c->callback)); 2161 (void)(*c->callback)(c, c->cb_arg, 2162 NETEVENT_CLOSED, NULL); 2163 } 2164 return; 2165 } 2166 } 2167 #endif 2168 2169 if(event&UB_EV_TIMEOUT) { 2170 verbose(VERB_QUERY, "tcp took too long, dropped"); 2171 reclaim_tcp_handler(c); 2172 if(!c->tcp_do_close) { 2173 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2174 (void)(*c->callback)(c, c->cb_arg, 2175 NETEVENT_TIMEOUT, NULL); 2176 } 2177 return; 2178 } 2179 if(event&UB_EV_READ 2180 #ifdef USE_MSG_FASTOPEN 2181 && !(c->tcp_do_fastopen && (event&UB_EV_WRITE)) 2182 #endif 2183 ) { 2184 int has_tcpq = (c->tcp_req_info != NULL); 2185 int* moreread = c->tcp_more_read_again; 2186 if(!comm_point_tcp_handle_read(fd, c, 0)) { 2187 reclaim_tcp_handler(c); 2188 if(!c->tcp_do_close) { 2189 fptr_ok(fptr_whitelist_comm_point( 2190 c->callback)); 2191 (void)(*c->callback)(c, c->cb_arg, 2192 NETEVENT_CLOSED, NULL); 2193 } 2194 return; 2195 } 2196 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) 2197 tcp_req_info_read_again(fd, c); 2198 if(moreread && *moreread) 2199 tcp_more_read_again(fd, c); 2200 return; 2201 } 2202 if(event&UB_EV_WRITE) { 2203 int has_tcpq = (c->tcp_req_info != NULL); 2204 int* morewrite = c->tcp_more_write_again; 2205 if(!comm_point_tcp_handle_write(fd, c)) { 2206 reclaim_tcp_handler(c); 2207 if(!c->tcp_do_close) { 2208 fptr_ok(fptr_whitelist_comm_point( 2209 c->callback)); 2210 (void)(*c->callback)(c, c->cb_arg, 2211 NETEVENT_CLOSED, NULL); 2212 } 2213 return; 2214 } 2215 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) 2216 tcp_req_info_read_again(fd, c); 2217 if(morewrite && *morewrite) 2218 tcp_more_write_again(fd, c); 2219 return; 2220 } 2221 log_err("Ignored event %d for tcphdl.", event); 2222 } 2223 2224 /** Make http handler free for next assignment */ 2225 static void 2226 reclaim_http_handler(struct comm_point* c) 2227 { 2228 log_assert(c->type == comm_http); 2229 if(c->ssl) { 2230 #ifdef HAVE_SSL 2231 SSL_shutdown(c->ssl); 2232 SSL_free(c->ssl); 2233 c->ssl = NULL; 2234 #endif 2235 } 2236 comm_point_close(c); 2237 if(c->tcp_parent) { 2238 if(c != c->tcp_parent->tcp_free) { 2239 c->tcp_parent->cur_tcp_count--; 2240 c->tcp_free = c->tcp_parent->tcp_free; 2241 c->tcp_parent->tcp_free = c; 2242 } 2243 if(!c->tcp_free) { 2244 /* re-enable listening on accept socket */ 2245 comm_point_start_listening(c->tcp_parent, -1, -1); 2246 } 2247 } 2248 } 2249 2250 /** read more data for http (with ssl) */ 2251 static int 2252 ssl_http_read_more(struct comm_point* c) 2253 { 2254 #ifdef HAVE_SSL 2255 int r; 2256 log_assert(sldns_buffer_remaining(c->buffer) > 0); 2257 ERR_clear_error(); 2258 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer), 2259 (int)sldns_buffer_remaining(c->buffer)); 2260 if(r <= 0) { 2261 int want = SSL_get_error(c->ssl, r); 2262 if(want == SSL_ERROR_ZERO_RETURN) { 2263 return 0; /* shutdown, closed */ 2264 } else if(want == SSL_ERROR_WANT_READ) { 2265 return 1; /* read more later */ 2266 } else if(want == SSL_ERROR_WANT_WRITE) { 2267 c->ssl_shake_state = comm_ssl_shake_hs_write; 2268 comm_point_listen_for_rw(c, 0, 1); 2269 return 1; 2270 } else if(want == SSL_ERROR_SYSCALL) { 2271 #ifdef ECONNRESET 2272 if(errno == ECONNRESET && verbosity < 2) 2273 return 0; /* silence reset by peer */ 2274 #endif 2275 if(errno != 0) 2276 log_err("SSL_read syscall: %s", 2277 strerror(errno)); 2278 return 0; 2279 } 2280 log_crypto_err("could not SSL_read"); 2281 return 0; 2282 } 2283 verbose(VERB_ALGO, "ssl http read more skip to %d + %d", 2284 (int)sldns_buffer_position(c->buffer), (int)r); 2285 sldns_buffer_skip(c->buffer, (ssize_t)r); 2286 return 1; 2287 #else 2288 (void)c; 2289 return 0; 2290 #endif /* HAVE_SSL */ 2291 } 2292 2293 /** read more data for http */ 2294 static int 2295 http_read_more(int fd, struct comm_point* c) 2296 { 2297 ssize_t r; 2298 log_assert(sldns_buffer_remaining(c->buffer) > 0); 2299 r = recv(fd, (void*)sldns_buffer_current(c->buffer), 2300 sldns_buffer_remaining(c->buffer), 0); 2301 if(r == 0) { 2302 return 0; 2303 } else if(r == -1) { 2304 #ifndef USE_WINSOCK 2305 if(errno == EINTR || errno == EAGAIN) 2306 return 1; 2307 #else /* USE_WINSOCK */ 2308 if(WSAGetLastError() == WSAECONNRESET) 2309 return 0; 2310 if(WSAGetLastError() == WSAEINPROGRESS) 2311 return 1; 2312 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2313 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 2314 return 1; 2315 } 2316 #endif 2317 log_err_addr("read (in http r)", sock_strerror(errno), 2318 &c->repinfo.addr, c->repinfo.addrlen); 2319 return 0; 2320 } 2321 verbose(VERB_ALGO, "http read more skip to %d + %d", 2322 (int)sldns_buffer_position(c->buffer), (int)r); 2323 sldns_buffer_skip(c->buffer, r); 2324 return 1; 2325 } 2326 2327 /** return true if http header has been read (one line complete) */ 2328 static int 2329 http_header_done(sldns_buffer* buf) 2330 { 2331 size_t i; 2332 for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) { 2333 /* there was a \r before the \n, but we ignore that */ 2334 if((char)sldns_buffer_read_u8_at(buf, i) == '\n') 2335 return 1; 2336 } 2337 return 0; 2338 } 2339 2340 /** return character string into buffer for header line, moves buffer 2341 * past that line and puts zero terminator into linefeed-newline */ 2342 static char* 2343 http_header_line(sldns_buffer* buf) 2344 { 2345 char* result = (char*)sldns_buffer_current(buf); 2346 size_t i; 2347 for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) { 2348 /* terminate the string on the \r */ 2349 if((char)sldns_buffer_read_u8_at(buf, i) == '\r') 2350 sldns_buffer_write_u8_at(buf, i, 0); 2351 /* terminate on the \n and skip past the it and done */ 2352 if((char)sldns_buffer_read_u8_at(buf, i) == '\n') { 2353 sldns_buffer_write_u8_at(buf, i, 0); 2354 sldns_buffer_set_position(buf, i+1); 2355 return result; 2356 } 2357 } 2358 return NULL; 2359 } 2360 2361 /** move unread buffer to start and clear rest for putting the rest into it */ 2362 static void 2363 http_moveover_buffer(sldns_buffer* buf) 2364 { 2365 size_t pos = sldns_buffer_position(buf); 2366 size_t len = sldns_buffer_remaining(buf); 2367 sldns_buffer_clear(buf); 2368 memmove(sldns_buffer_begin(buf), sldns_buffer_at(buf, pos), len); 2369 sldns_buffer_set_position(buf, len); 2370 } 2371 2372 /** a http header is complete, process it */ 2373 static int 2374 http_process_initial_header(struct comm_point* c) 2375 { 2376 char* line = http_header_line(c->buffer); 2377 if(!line) return 1; 2378 verbose(VERB_ALGO, "http header: %s", line); 2379 if(strncasecmp(line, "HTTP/1.1 ", 9) == 0) { 2380 /* check returncode */ 2381 if(line[9] != '2') { 2382 verbose(VERB_ALGO, "http bad status %s", line+9); 2383 return 0; 2384 } 2385 } else if(strncasecmp(line, "Content-Length: ", 16) == 0) { 2386 if(!c->http_is_chunked) 2387 c->tcp_byte_count = (size_t)atoi(line+16); 2388 } else if(strncasecmp(line, "Transfer-Encoding: chunked", 19+7) == 0) { 2389 c->tcp_byte_count = 0; 2390 c->http_is_chunked = 1; 2391 } else if(line[0] == 0) { 2392 /* end of initial headers */ 2393 c->http_in_headers = 0; 2394 if(c->http_is_chunked) 2395 c->http_in_chunk_headers = 1; 2396 /* remove header text from front of buffer 2397 * the buffer is going to be used to return the data segment 2398 * itself and we don't want the header to get returned 2399 * prepended with it */ 2400 http_moveover_buffer(c->buffer); 2401 sldns_buffer_flip(c->buffer); 2402 return 1; 2403 } 2404 /* ignore other headers */ 2405 return 1; 2406 } 2407 2408 /** a chunk header is complete, process it, return 0=fail, 1=continue next 2409 * header line, 2=done with chunked transfer*/ 2410 static int 2411 http_process_chunk_header(struct comm_point* c) 2412 { 2413 char* line = http_header_line(c->buffer); 2414 if(!line) return 1; 2415 if(c->http_in_chunk_headers == 3) { 2416 verbose(VERB_ALGO, "http chunk trailer: %s", line); 2417 /* are we done ? */ 2418 if(line[0] == 0 && c->tcp_byte_count == 0) { 2419 /* callback of http reader when NETEVENT_DONE, 2420 * end of data, with no data in buffer */ 2421 sldns_buffer_set_position(c->buffer, 0); 2422 sldns_buffer_set_limit(c->buffer, 0); 2423 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2424 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL); 2425 /* return that we are done */ 2426 return 2; 2427 } 2428 if(line[0] == 0) { 2429 /* continue with header of the next chunk */ 2430 c->http_in_chunk_headers = 1; 2431 /* remove header text from front of buffer */ 2432 http_moveover_buffer(c->buffer); 2433 sldns_buffer_flip(c->buffer); 2434 return 1; 2435 } 2436 /* ignore further trail headers */ 2437 return 1; 2438 } 2439 verbose(VERB_ALGO, "http chunk header: %s", line); 2440 if(c->http_in_chunk_headers == 1) { 2441 /* read chunked start line */ 2442 char* end = NULL; 2443 c->tcp_byte_count = (size_t)strtol(line, &end, 16); 2444 if(end == line) 2445 return 0; 2446 c->http_in_chunk_headers = 0; 2447 /* remove header text from front of buffer */ 2448 http_moveover_buffer(c->buffer); 2449 sldns_buffer_flip(c->buffer); 2450 if(c->tcp_byte_count == 0) { 2451 /* done with chunks, process chunk_trailer lines */ 2452 c->http_in_chunk_headers = 3; 2453 } 2454 return 1; 2455 } 2456 /* ignore other headers */ 2457 return 1; 2458 } 2459 2460 /** handle nonchunked data segment, 0=fail, 1=wait */ 2461 static int 2462 http_nonchunk_segment(struct comm_point* c) 2463 { 2464 /* c->buffer at position..limit has new data we read in. 2465 * the buffer itself is full of nonchunked data. 2466 * we are looking to read tcp_byte_count more data 2467 * and then the transfer is done. */ 2468 size_t remainbufferlen; 2469 size_t got_now = sldns_buffer_limit(c->buffer); 2470 if(c->tcp_byte_count <= got_now) { 2471 /* done, this is the last data fragment */ 2472 c->http_stored = 0; 2473 sldns_buffer_set_position(c->buffer, 0); 2474 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2475 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL); 2476 return 1; 2477 } 2478 /* if we have the buffer space, 2479 * read more data collected into the buffer */ 2480 remainbufferlen = sldns_buffer_capacity(c->buffer) - 2481 sldns_buffer_limit(c->buffer); 2482 if(remainbufferlen+got_now >= c->tcp_byte_count || 2483 remainbufferlen >= (c->ssl?16384:2048)) { 2484 size_t total = sldns_buffer_limit(c->buffer); 2485 sldns_buffer_clear(c->buffer); 2486 sldns_buffer_set_position(c->buffer, total); 2487 c->http_stored = total; 2488 /* return and wait to read more */ 2489 return 1; 2490 } 2491 /* call callback with this data amount, then 2492 * wait for more */ 2493 c->tcp_byte_count -= got_now; 2494 c->http_stored = 0; 2495 sldns_buffer_set_position(c->buffer, 0); 2496 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2497 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL); 2498 /* c->callback has to buffer_clear(c->buffer). */ 2499 /* return and wait to read more */ 2500 return 1; 2501 } 2502 2503 /** handle chunked data segment, return 0=fail, 1=wait, 2=process more */ 2504 static int 2505 http_chunked_segment(struct comm_point* c) 2506 { 2507 /* the c->buffer has from position..limit new data we read. */ 2508 /* the current chunk has length tcp_byte_count. 2509 * once we read that read more chunk headers. 2510 */ 2511 size_t remainbufferlen; 2512 size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored; 2513 verbose(VERB_ALGO, "http_chunked_segment: got now %d, tcpbytcount %d, http_stored %d, buffer pos %d, buffer limit %d", (int)got_now, (int)c->tcp_byte_count, (int)c->http_stored, (int)sldns_buffer_position(c->buffer), (int)sldns_buffer_limit(c->buffer)); 2514 if(c->tcp_byte_count <= got_now) { 2515 /* the chunk has completed (with perhaps some extra data 2516 * from next chunk header and next chunk) */ 2517 /* save too much info into temp buffer */ 2518 size_t fraglen; 2519 struct comm_reply repinfo; 2520 c->http_stored = 0; 2521 sldns_buffer_skip(c->buffer, (ssize_t)c->tcp_byte_count); 2522 sldns_buffer_clear(c->http_temp); 2523 sldns_buffer_write(c->http_temp, 2524 sldns_buffer_current(c->buffer), 2525 sldns_buffer_remaining(c->buffer)); 2526 sldns_buffer_flip(c->http_temp); 2527 2528 /* callback with this fragment */ 2529 fraglen = sldns_buffer_position(c->buffer); 2530 sldns_buffer_set_position(c->buffer, 0); 2531 sldns_buffer_set_limit(c->buffer, fraglen); 2532 repinfo = c->repinfo; 2533 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2534 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &repinfo); 2535 /* c->callback has to buffer_clear(). */ 2536 2537 /* is commpoint deleted? */ 2538 if(!repinfo.c) { 2539 return 1; 2540 } 2541 /* copy waiting info */ 2542 sldns_buffer_clear(c->buffer); 2543 sldns_buffer_write(c->buffer, 2544 sldns_buffer_begin(c->http_temp), 2545 sldns_buffer_remaining(c->http_temp)); 2546 sldns_buffer_flip(c->buffer); 2547 /* process end of chunk trailer header lines, until 2548 * an empty line */ 2549 c->http_in_chunk_headers = 3; 2550 /* process more data in buffer (if any) */ 2551 return 2; 2552 } 2553 c->tcp_byte_count -= got_now; 2554 2555 /* if we have the buffer space, 2556 * read more data collected into the buffer */ 2557 remainbufferlen = sldns_buffer_capacity(c->buffer) - 2558 sldns_buffer_limit(c->buffer); 2559 if(remainbufferlen >= c->tcp_byte_count || 2560 remainbufferlen >= 2048) { 2561 size_t total = sldns_buffer_limit(c->buffer); 2562 sldns_buffer_clear(c->buffer); 2563 sldns_buffer_set_position(c->buffer, total); 2564 c->http_stored = total; 2565 /* return and wait to read more */ 2566 return 1; 2567 } 2568 2569 /* callback of http reader for a new part of the data */ 2570 c->http_stored = 0; 2571 sldns_buffer_set_position(c->buffer, 0); 2572 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2573 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL); 2574 /* c->callback has to buffer_clear(c->buffer). */ 2575 /* return and wait to read more */ 2576 return 1; 2577 } 2578 2579 #ifdef HAVE_NGHTTP2 2580 /** Create new http2 session. Called when creating handling comm point. */ 2581 static struct http2_session* http2_session_create(struct comm_point* c) 2582 { 2583 struct http2_session* session = calloc(1, sizeof(*session)); 2584 if(!session) { 2585 log_err("malloc failure while creating http2 session"); 2586 return NULL; 2587 } 2588 session->c = c; 2589 2590 return session; 2591 } 2592 #endif 2593 2594 /** Delete http2 session. After closing connection or on error */ 2595 static void http2_session_delete(struct http2_session* h2_session) 2596 { 2597 #ifdef HAVE_NGHTTP2 2598 if(h2_session->callbacks) 2599 nghttp2_session_callbacks_del(h2_session->callbacks); 2600 free(h2_session); 2601 #else 2602 (void)h2_session; 2603 #endif 2604 } 2605 2606 #ifdef HAVE_NGHTTP2 2607 struct http2_stream* http2_stream_create(int32_t stream_id) 2608 { 2609 struct http2_stream* h2_stream = calloc(1, sizeof(*h2_stream)); 2610 if(!h2_stream) { 2611 log_err("malloc failure while creating http2 stream"); 2612 return NULL; 2613 } 2614 h2_stream->stream_id = stream_id; 2615 return h2_stream; 2616 } 2617 2618 /** Delete http2 stream. After session delete or stream close callback */ 2619 static void http2_stream_delete(struct http2_session* h2_session, 2620 struct http2_stream* h2_stream) 2621 { 2622 if(h2_stream->mesh_state) { 2623 mesh_state_remove_reply(h2_stream->mesh, h2_stream->mesh_state, 2624 h2_session->c); 2625 h2_stream->mesh_state = NULL; 2626 } 2627 http2_req_stream_clear(h2_stream); 2628 free(h2_stream); 2629 } 2630 #endif 2631 2632 void http2_stream_add_meshstate(struct http2_stream* h2_stream, 2633 struct mesh_area* mesh, struct mesh_state* m) 2634 { 2635 h2_stream->mesh = mesh; 2636 h2_stream->mesh_state = m; 2637 } 2638 2639 /** delete http2 session server. After closing connection. */ 2640 static void http2_session_server_delete(struct http2_session* h2_session) 2641 { 2642 #ifdef HAVE_NGHTTP2 2643 struct http2_stream* h2_stream, *next; 2644 nghttp2_session_del(h2_session->session); /* NULL input is fine */ 2645 h2_session->session = NULL; 2646 for(h2_stream = h2_session->first_stream; h2_stream;) { 2647 next = h2_stream->next; 2648 http2_stream_delete(h2_session, h2_stream); 2649 h2_stream = next; 2650 } 2651 h2_session->first_stream = NULL; 2652 h2_session->is_drop = 0; 2653 h2_session->postpone_drop = 0; 2654 h2_session->c->h2_stream = NULL; 2655 #endif 2656 (void)h2_session; 2657 } 2658 2659 #ifdef HAVE_NGHTTP2 2660 void http2_session_add_stream(struct http2_session* h2_session, 2661 struct http2_stream* h2_stream) 2662 { 2663 if(h2_session->first_stream) 2664 h2_session->first_stream->prev = h2_stream; 2665 h2_stream->next = h2_session->first_stream; 2666 h2_session->first_stream = h2_stream; 2667 } 2668 2669 /** remove stream from session linked list. After stream close callback or 2670 * closing connection */ 2671 static void http2_session_remove_stream(struct http2_session* h2_session, 2672 struct http2_stream* h2_stream) 2673 { 2674 if(h2_stream->prev) 2675 h2_stream->prev->next = h2_stream->next; 2676 else 2677 h2_session->first_stream = h2_stream->next; 2678 if(h2_stream->next) 2679 h2_stream->next->prev = h2_stream->prev; 2680 2681 } 2682 2683 int http2_stream_close_cb(nghttp2_session* ATTR_UNUSED(session), 2684 int32_t stream_id, uint32_t ATTR_UNUSED(error_code), void* cb_arg) 2685 { 2686 struct http2_stream* h2_stream; 2687 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2688 if(!(h2_stream = nghttp2_session_get_stream_user_data( 2689 h2_session->session, stream_id))) { 2690 return 0; 2691 } 2692 http2_session_remove_stream(h2_session, h2_stream); 2693 http2_stream_delete(h2_session, h2_stream); 2694 return 0; 2695 } 2696 2697 ssize_t http2_recv_cb(nghttp2_session* ATTR_UNUSED(session), uint8_t* buf, 2698 size_t len, int ATTR_UNUSED(flags), void* cb_arg) 2699 { 2700 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2701 ssize_t ret; 2702 2703 log_assert(h2_session->c->type == comm_http); 2704 log_assert(h2_session->c->h2_session); 2705 2706 #ifdef HAVE_SSL 2707 if(h2_session->c->ssl) { 2708 int r; 2709 ERR_clear_error(); 2710 r = SSL_read(h2_session->c->ssl, buf, len); 2711 if(r <= 0) { 2712 int want = SSL_get_error(h2_session->c->ssl, r); 2713 if(want == SSL_ERROR_ZERO_RETURN) { 2714 return NGHTTP2_ERR_EOF; 2715 } else if(want == SSL_ERROR_WANT_READ) { 2716 return NGHTTP2_ERR_WOULDBLOCK; 2717 } else if(want == SSL_ERROR_WANT_WRITE) { 2718 h2_session->c->ssl_shake_state = comm_ssl_shake_hs_write; 2719 comm_point_listen_for_rw(h2_session->c, 0, 1); 2720 return NGHTTP2_ERR_WOULDBLOCK; 2721 } else if(want == SSL_ERROR_SYSCALL) { 2722 #ifdef ECONNRESET 2723 if(errno == ECONNRESET && verbosity < 2) 2724 return NGHTTP2_ERR_CALLBACK_FAILURE; 2725 #endif 2726 if(errno != 0) 2727 log_err("SSL_read syscall: %s", 2728 strerror(errno)); 2729 return NGHTTP2_ERR_CALLBACK_FAILURE; 2730 } 2731 log_crypto_err("could not SSL_read"); 2732 return NGHTTP2_ERR_CALLBACK_FAILURE; 2733 } 2734 return r; 2735 } 2736 #endif /* HAVE_SSL */ 2737 2738 ret = recv(h2_session->c->fd, buf, len, 0); 2739 if(ret == 0) { 2740 return NGHTTP2_ERR_EOF; 2741 } else if(ret < 0) { 2742 #ifndef USE_WINSOCK 2743 if(errno == EINTR || errno == EAGAIN) 2744 return NGHTTP2_ERR_WOULDBLOCK; 2745 #ifdef ECONNRESET 2746 if(errno == ECONNRESET && verbosity < 2) 2747 return NGHTTP2_ERR_CALLBACK_FAILURE; 2748 #endif 2749 log_err_addr("could not http2 recv: %s", strerror(errno), 2750 &h2_session->c->repinfo.addr, 2751 h2_session->c->repinfo.addrlen); 2752 #else /* USE_WINSOCK */ 2753 if(WSAGetLastError() == WSAECONNRESET) 2754 return NGHTTP2_ERR_CALLBACK_FAILURE; 2755 if(WSAGetLastError() == WSAEINPROGRESS) 2756 return NGHTTP2_ERR_WOULDBLOCK; 2757 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2758 ub_winsock_tcp_wouldblock(h2_session->c->ev->ev, 2759 UB_EV_READ); 2760 return NGHTTP2_ERR_WOULDBLOCK; 2761 } 2762 log_err_addr("could not http2 recv: %s", 2763 wsa_strerror(WSAGetLastError()), 2764 &h2_session->c->repinfo.addr, 2765 h2_session->c->repinfo.addrlen); 2766 #endif 2767 return NGHTTP2_ERR_CALLBACK_FAILURE; 2768 } 2769 return ret; 2770 } 2771 #endif /* HAVE_NGHTTP2 */ 2772 2773 /** Handle http2 read */ 2774 static int 2775 comm_point_http2_handle_read(int ATTR_UNUSED(fd), struct comm_point* c) 2776 { 2777 #ifdef HAVE_NGHTTP2 2778 int ret; 2779 log_assert(c->h2_session); 2780 2781 /* reading until recv cb returns NGHTTP2_ERR_WOULDBLOCK */ 2782 ret = nghttp2_session_recv(c->h2_session->session); 2783 if(ret) { 2784 if(ret != NGHTTP2_ERR_EOF && 2785 ret != NGHTTP2_ERR_CALLBACK_FAILURE) { 2786 char a[256]; 2787 addr_to_str(&c->repinfo.addr, c->repinfo.addrlen, 2788 a, sizeof(a)); 2789 verbose(VERB_QUERY, "http2: session_recv from %s failed, " 2790 "error: %s", a, nghttp2_strerror(ret)); 2791 } 2792 return 0; 2793 } 2794 if(nghttp2_session_want_write(c->h2_session->session)) { 2795 c->tcp_is_reading = 0; 2796 comm_point_stop_listening(c); 2797 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c)); 2798 } else if(!nghttp2_session_want_read(c->h2_session->session)) 2799 return 0; /* connection can be closed */ 2800 return 1; 2801 #else 2802 (void)c; 2803 return 0; 2804 #endif 2805 } 2806 2807 /** 2808 * Handle http reading callback. 2809 * @param fd: file descriptor of socket. 2810 * @param c: comm point to read from into buffer. 2811 * @return: 0 on error 2812 */ 2813 static int 2814 comm_point_http_handle_read(int fd, struct comm_point* c) 2815 { 2816 log_assert(c->type == comm_http); 2817 log_assert(fd != -1); 2818 2819 /* if we are in ssl handshake, handle SSL handshake */ 2820 #ifdef HAVE_SSL 2821 if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) { 2822 if(!ssl_handshake(c)) 2823 return 0; 2824 if(c->ssl_shake_state != comm_ssl_shake_none) 2825 return 1; 2826 } 2827 #endif /* HAVE_SSL */ 2828 2829 if(!c->tcp_is_reading) 2830 return 1; 2831 2832 if(c->use_h2) { 2833 return comm_point_http2_handle_read(fd, c); 2834 } 2835 2836 /* http version is <= http/1.1 */ 2837 2838 if(c->http_min_version >= http_version_2) { 2839 /* HTTP/2 failed, not allowed to use lower version. */ 2840 return 0; 2841 } 2842 2843 /* read more data */ 2844 if(c->ssl) { 2845 if(!ssl_http_read_more(c)) 2846 return 0; 2847 } else { 2848 if(!http_read_more(fd, c)) 2849 return 0; 2850 } 2851 2852 if(c->http_stored >= sldns_buffer_position(c->buffer)) { 2853 /* read did not work but we wanted more data, there is 2854 * no bytes to process now. */ 2855 return 1; 2856 } 2857 sldns_buffer_flip(c->buffer); 2858 /* if we are partway in a segment of data, position us at the point 2859 * where we left off previously */ 2860 if(c->http_stored < sldns_buffer_limit(c->buffer)) 2861 sldns_buffer_set_position(c->buffer, c->http_stored); 2862 else sldns_buffer_set_position(c->buffer, sldns_buffer_limit(c->buffer)); 2863 2864 while(sldns_buffer_remaining(c->buffer) > 0) { 2865 /* Handle HTTP/1.x data */ 2866 /* if we are reading headers, read more headers */ 2867 if(c->http_in_headers || c->http_in_chunk_headers) { 2868 /* if header is done, process the header */ 2869 if(!http_header_done(c->buffer)) { 2870 /* copy remaining data to front of buffer 2871 * and set rest for writing into it */ 2872 http_moveover_buffer(c->buffer); 2873 /* return and wait to read more */ 2874 return 1; 2875 } 2876 if(!c->http_in_chunk_headers) { 2877 /* process initial headers */ 2878 if(!http_process_initial_header(c)) 2879 return 0; 2880 } else { 2881 /* process chunk headers */ 2882 int r = http_process_chunk_header(c); 2883 if(r == 0) return 0; 2884 if(r == 2) return 1; /* done */ 2885 /* r == 1, continue */ 2886 } 2887 /* see if we have more to process */ 2888 continue; 2889 } 2890 2891 if(!c->http_is_chunked) { 2892 /* if we are reading nonchunks, process that*/ 2893 return http_nonchunk_segment(c); 2894 } else { 2895 /* if we are reading chunks, read the chunk */ 2896 int r = http_chunked_segment(c); 2897 if(r == 0) return 0; 2898 if(r == 1) return 1; 2899 continue; 2900 } 2901 } 2902 /* broke out of the loop; could not process header instead need 2903 * to read more */ 2904 /* moveover any remaining data and read more data */ 2905 http_moveover_buffer(c->buffer); 2906 /* return and wait to read more */ 2907 return 1; 2908 } 2909 2910 /** check pending connect for http */ 2911 static int 2912 http_check_connect(int fd, struct comm_point* c) 2913 { 2914 /* check for pending error from nonblocking connect */ 2915 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/ 2916 int error = 0; 2917 socklen_t len = (socklen_t)sizeof(error); 2918 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 2919 &len) < 0){ 2920 #ifndef USE_WINSOCK 2921 error = errno; /* on solaris errno is error */ 2922 #else /* USE_WINSOCK */ 2923 error = WSAGetLastError(); 2924 #endif 2925 } 2926 #ifndef USE_WINSOCK 2927 #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 2928 if(error == EINPROGRESS || error == EWOULDBLOCK) 2929 return 1; /* try again later */ 2930 else 2931 #endif 2932 if(error != 0 && verbosity < 2) 2933 return 0; /* silence lots of chatter in the logs */ 2934 else if(error != 0) { 2935 log_err_addr("http connect", strerror(error), 2936 &c->repinfo.addr, c->repinfo.addrlen); 2937 #else /* USE_WINSOCK */ 2938 /* examine error */ 2939 if(error == WSAEINPROGRESS) 2940 return 1; 2941 else if(error == WSAEWOULDBLOCK) { 2942 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 2943 return 1; 2944 } else if(error != 0 && verbosity < 2) 2945 return 0; 2946 else if(error != 0) { 2947 log_err_addr("http connect", wsa_strerror(error), 2948 &c->repinfo.addr, c->repinfo.addrlen); 2949 #endif /* USE_WINSOCK */ 2950 return 0; 2951 } 2952 /* keep on processing this socket */ 2953 return 2; 2954 } 2955 2956 /** write more data for http (with ssl) */ 2957 static int 2958 ssl_http_write_more(struct comm_point* c) 2959 { 2960 #ifdef HAVE_SSL 2961 int r; 2962 log_assert(sldns_buffer_remaining(c->buffer) > 0); 2963 ERR_clear_error(); 2964 r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer), 2965 (int)sldns_buffer_remaining(c->buffer)); 2966 if(r <= 0) { 2967 int want = SSL_get_error(c->ssl, r); 2968 if(want == SSL_ERROR_ZERO_RETURN) { 2969 return 0; /* closed */ 2970 } else if(want == SSL_ERROR_WANT_READ) { 2971 c->ssl_shake_state = comm_ssl_shake_hs_read; 2972 comm_point_listen_for_rw(c, 1, 0); 2973 return 1; /* wait for read condition */ 2974 } else if(want == SSL_ERROR_WANT_WRITE) { 2975 return 1; /* write more later */ 2976 } else if(want == SSL_ERROR_SYSCALL) { 2977 #ifdef EPIPE 2978 if(errno == EPIPE && verbosity < 2) 2979 return 0; /* silence 'broken pipe' */ 2980 #endif 2981 if(errno != 0) 2982 log_err("SSL_write syscall: %s", 2983 strerror(errno)); 2984 return 0; 2985 } 2986 log_crypto_err("could not SSL_write"); 2987 return 0; 2988 } 2989 sldns_buffer_skip(c->buffer, (ssize_t)r); 2990 return 1; 2991 #else 2992 (void)c; 2993 return 0; 2994 #endif /* HAVE_SSL */ 2995 } 2996 2997 /** write more data for http */ 2998 static int 2999 http_write_more(int fd, struct comm_point* c) 3000 { 3001 ssize_t r; 3002 log_assert(sldns_buffer_remaining(c->buffer) > 0); 3003 r = send(fd, (void*)sldns_buffer_current(c->buffer), 3004 sldns_buffer_remaining(c->buffer), 0); 3005 if(r == -1) { 3006 #ifndef USE_WINSOCK 3007 if(errno == EINTR || errno == EAGAIN) 3008 return 1; 3009 #else 3010 if(WSAGetLastError() == WSAEINPROGRESS) 3011 return 1; 3012 if(WSAGetLastError() == WSAEWOULDBLOCK) { 3013 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 3014 return 1; 3015 } 3016 #endif 3017 log_err_addr("http send r", sock_strerror(errno), 3018 &c->repinfo.addr, c->repinfo.addrlen); 3019 return 0; 3020 } 3021 sldns_buffer_skip(c->buffer, r); 3022 return 1; 3023 } 3024 3025 #ifdef HAVE_NGHTTP2 3026 ssize_t http2_send_cb(nghttp2_session* ATTR_UNUSED(session), const uint8_t* buf, 3027 size_t len, int ATTR_UNUSED(flags), void* cb_arg) 3028 { 3029 ssize_t ret; 3030 struct http2_session* h2_session = (struct http2_session*)cb_arg; 3031 log_assert(h2_session->c->type == comm_http); 3032 log_assert(h2_session->c->h2_session); 3033 3034 #ifdef HAVE_SSL 3035 if(h2_session->c->ssl) { 3036 int r; 3037 ERR_clear_error(); 3038 r = SSL_write(h2_session->c->ssl, buf, len); 3039 if(r <= 0) { 3040 int want = SSL_get_error(h2_session->c->ssl, r); 3041 if(want == SSL_ERROR_ZERO_RETURN) { 3042 return NGHTTP2_ERR_CALLBACK_FAILURE; 3043 } else if(want == SSL_ERROR_WANT_READ) { 3044 h2_session->c->ssl_shake_state = comm_ssl_shake_hs_read; 3045 comm_point_listen_for_rw(h2_session->c, 1, 0); 3046 return NGHTTP2_ERR_WOULDBLOCK; 3047 } else if(want == SSL_ERROR_WANT_WRITE) { 3048 return NGHTTP2_ERR_WOULDBLOCK; 3049 } else if(want == SSL_ERROR_SYSCALL) { 3050 #ifdef EPIPE 3051 if(errno == EPIPE && verbosity < 2) 3052 return NGHTTP2_ERR_CALLBACK_FAILURE; 3053 #endif 3054 if(errno != 0) 3055 log_err("SSL_write syscall: %s", 3056 strerror(errno)); 3057 return NGHTTP2_ERR_CALLBACK_FAILURE; 3058 } 3059 log_crypto_err("could not SSL_write"); 3060 return NGHTTP2_ERR_CALLBACK_FAILURE; 3061 } 3062 return r; 3063 } 3064 #endif /* HAVE_SSL */ 3065 3066 ret = send(h2_session->c->fd, buf, len, 0); 3067 if(ret == 0) { 3068 return NGHTTP2_ERR_CALLBACK_FAILURE; 3069 } else if(ret < 0) { 3070 #ifndef USE_WINSOCK 3071 if(errno == EINTR || errno == EAGAIN) 3072 return NGHTTP2_ERR_WOULDBLOCK; 3073 #ifdef EPIPE 3074 if(errno == EPIPE && verbosity < 2) 3075 return NGHTTP2_ERR_CALLBACK_FAILURE; 3076 #endif 3077 #ifdef ECONNRESET 3078 if(errno == ECONNRESET && verbosity < 2) 3079 return NGHTTP2_ERR_CALLBACK_FAILURE; 3080 #endif 3081 log_err_addr("could not http2 write: %s", strerror(errno), 3082 &h2_session->c->repinfo.addr, 3083 h2_session->c->repinfo.addrlen); 3084 #else /* USE_WINSOCK */ 3085 if(WSAGetLastError() == WSAENOTCONN) 3086 return NGHTTP2_ERR_WOULDBLOCK; 3087 if(WSAGetLastError() == WSAEINPROGRESS) 3088 return NGHTTP2_ERR_WOULDBLOCK; 3089 if(WSAGetLastError() == WSAEWOULDBLOCK) { 3090 ub_winsock_tcp_wouldblock(h2_session->c->ev->ev, 3091 UB_EV_WRITE); 3092 return NGHTTP2_ERR_WOULDBLOCK; 3093 } 3094 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 3095 return NGHTTP2_ERR_CALLBACK_FAILURE; 3096 log_err_addr("could not http2 write: %s", 3097 wsa_strerror(WSAGetLastError()), 3098 &h2_session->c->repinfo.addr, 3099 h2_session->c->repinfo.addrlen); 3100 #endif 3101 return NGHTTP2_ERR_CALLBACK_FAILURE; 3102 } 3103 return ret; 3104 } 3105 #endif /* HAVE_NGHTTP2 */ 3106 3107 /** Handle http2 writing */ 3108 static int 3109 comm_point_http2_handle_write(int ATTR_UNUSED(fd), struct comm_point* c) 3110 { 3111 #ifdef HAVE_NGHTTP2 3112 int ret; 3113 log_assert(c->h2_session); 3114 3115 ret = nghttp2_session_send(c->h2_session->session); 3116 if(ret) { 3117 verbose(VERB_QUERY, "http2: session_send failed, " 3118 "error: %s", nghttp2_strerror(ret)); 3119 return 0; 3120 } 3121 3122 if(nghttp2_session_want_read(c->h2_session->session)) { 3123 c->tcp_is_reading = 1; 3124 comm_point_stop_listening(c); 3125 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c)); 3126 } else if(!nghttp2_session_want_write(c->h2_session->session)) 3127 return 0; /* connection can be closed */ 3128 return 1; 3129 #else 3130 (void)c; 3131 return 0; 3132 #endif 3133 } 3134 3135 /** 3136 * Handle http writing callback. 3137 * @param fd: file descriptor of socket. 3138 * @param c: comm point to write buffer out of. 3139 * @return: 0 on error 3140 */ 3141 static int 3142 comm_point_http_handle_write(int fd, struct comm_point* c) 3143 { 3144 log_assert(c->type == comm_http); 3145 log_assert(fd != -1); 3146 3147 /* check pending connect errors, if that fails, we wait for more, 3148 * or we can continue to write contents */ 3149 if(c->tcp_check_nb_connect) { 3150 int r = http_check_connect(fd, c); 3151 if(r == 0) return 0; 3152 if(r == 1) return 1; 3153 c->tcp_check_nb_connect = 0; 3154 } 3155 /* if we are in ssl handshake, handle SSL handshake */ 3156 #ifdef HAVE_SSL 3157 if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) { 3158 if(!ssl_handshake(c)) 3159 return 0; 3160 if(c->ssl_shake_state != comm_ssl_shake_none) 3161 return 1; 3162 } 3163 #endif /* HAVE_SSL */ 3164 if(c->tcp_is_reading) 3165 return 1; 3166 3167 if(c->use_h2) { 3168 return comm_point_http2_handle_write(fd, c); 3169 } 3170 3171 /* http version is <= http/1.1 */ 3172 3173 if(c->http_min_version >= http_version_2) { 3174 /* HTTP/2 failed, not allowed to use lower version. */ 3175 return 0; 3176 } 3177 3178 /* if we are writing, write more */ 3179 if(c->ssl) { 3180 if(!ssl_http_write_more(c)) 3181 return 0; 3182 } else { 3183 if(!http_write_more(fd, c)) 3184 return 0; 3185 } 3186 3187 /* we write a single buffer contents, that can contain 3188 * the http request, and then flip to read the results */ 3189 /* see if write is done */ 3190 if(sldns_buffer_remaining(c->buffer) == 0) { 3191 sldns_buffer_clear(c->buffer); 3192 if(c->tcp_do_toggle_rw) 3193 c->tcp_is_reading = 1; 3194 c->tcp_byte_count = 0; 3195 /* switch from listening(write) to listening(read) */ 3196 comm_point_stop_listening(c); 3197 comm_point_start_listening(c, -1, -1); 3198 } 3199 return 1; 3200 } 3201 3202 void 3203 comm_point_http_handle_callback(int fd, short event, void* arg) 3204 { 3205 struct comm_point* c = (struct comm_point*)arg; 3206 log_assert(c->type == comm_http); 3207 ub_comm_base_now(c->ev->base); 3208 3209 if(event&UB_EV_TIMEOUT) { 3210 verbose(VERB_QUERY, "http took too long, dropped"); 3211 reclaim_http_handler(c); 3212 if(!c->tcp_do_close) { 3213 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3214 (void)(*c->callback)(c, c->cb_arg, 3215 NETEVENT_TIMEOUT, NULL); 3216 } 3217 return; 3218 } 3219 if(event&UB_EV_READ) { 3220 if(!comm_point_http_handle_read(fd, c)) { 3221 reclaim_http_handler(c); 3222 if(!c->tcp_do_close) { 3223 fptr_ok(fptr_whitelist_comm_point( 3224 c->callback)); 3225 (void)(*c->callback)(c, c->cb_arg, 3226 NETEVENT_CLOSED, NULL); 3227 } 3228 } 3229 return; 3230 } 3231 if(event&UB_EV_WRITE) { 3232 if(!comm_point_http_handle_write(fd, c)) { 3233 reclaim_http_handler(c); 3234 if(!c->tcp_do_close) { 3235 fptr_ok(fptr_whitelist_comm_point( 3236 c->callback)); 3237 (void)(*c->callback)(c, c->cb_arg, 3238 NETEVENT_CLOSED, NULL); 3239 } 3240 } 3241 return; 3242 } 3243 log_err("Ignored event %d for httphdl.", event); 3244 } 3245 3246 void comm_point_local_handle_callback(int fd, short event, void* arg) 3247 { 3248 struct comm_point* c = (struct comm_point*)arg; 3249 log_assert(c->type == comm_local); 3250 ub_comm_base_now(c->ev->base); 3251 3252 if(event&UB_EV_READ) { 3253 if(!comm_point_tcp_handle_read(fd, c, 1)) { 3254 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3255 (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED, 3256 NULL); 3257 } 3258 return; 3259 } 3260 log_err("Ignored event %d for localhdl.", event); 3261 } 3262 3263 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd), 3264 short event, void* arg) 3265 { 3266 struct comm_point* c = (struct comm_point*)arg; 3267 int err = NETEVENT_NOERROR; 3268 log_assert(c->type == comm_raw); 3269 ub_comm_base_now(c->ev->base); 3270 3271 if(event&UB_EV_TIMEOUT) 3272 err = NETEVENT_TIMEOUT; 3273 fptr_ok(fptr_whitelist_comm_point_raw(c->callback)); 3274 (void)(*c->callback)(c, c->cb_arg, err, NULL); 3275 } 3276 3277 struct comm_point* 3278 comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer, 3279 comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket) 3280 { 3281 struct comm_point* c = (struct comm_point*)calloc(1, 3282 sizeof(struct comm_point)); 3283 short evbits; 3284 if(!c) 3285 return NULL; 3286 c->ev = (struct internal_event*)calloc(1, 3287 sizeof(struct internal_event)); 3288 if(!c->ev) { 3289 free(c); 3290 return NULL; 3291 } 3292 c->ev->base = base; 3293 c->fd = fd; 3294 c->buffer = buffer; 3295 c->timeout = NULL; 3296 c->tcp_is_reading = 0; 3297 c->tcp_byte_count = 0; 3298 c->tcp_parent = NULL; 3299 c->max_tcp_count = 0; 3300 c->cur_tcp_count = 0; 3301 c->tcp_handlers = NULL; 3302 c->tcp_free = NULL; 3303 c->type = comm_udp; 3304 c->tcp_do_close = 0; 3305 c->do_not_close = 0; 3306 c->tcp_do_toggle_rw = 0; 3307 c->tcp_check_nb_connect = 0; 3308 #ifdef USE_MSG_FASTOPEN 3309 c->tcp_do_fastopen = 0; 3310 #endif 3311 #ifdef USE_DNSCRYPT 3312 c->dnscrypt = 0; 3313 c->dnscrypt_buffer = buffer; 3314 #endif 3315 c->inuse = 0; 3316 c->callback = callback; 3317 c->cb_arg = callback_arg; 3318 c->socket = socket; 3319 evbits = UB_EV_READ | UB_EV_PERSIST; 3320 /* ub_event stuff */ 3321 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3322 comm_point_udp_callback, c); 3323 if(c->ev->ev == NULL) { 3324 log_err("could not baseset udp event"); 3325 comm_point_delete(c); 3326 return NULL; 3327 } 3328 if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) { 3329 log_err("could not add udp event"); 3330 comm_point_delete(c); 3331 return NULL; 3332 } 3333 c->event_added = 1; 3334 return c; 3335 } 3336 3337 struct comm_point* 3338 comm_point_create_udp_ancil(struct comm_base *base, int fd, 3339 sldns_buffer* buffer, 3340 comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket) 3341 { 3342 struct comm_point* c = (struct comm_point*)calloc(1, 3343 sizeof(struct comm_point)); 3344 short evbits; 3345 if(!c) 3346 return NULL; 3347 c->ev = (struct internal_event*)calloc(1, 3348 sizeof(struct internal_event)); 3349 if(!c->ev) { 3350 free(c); 3351 return NULL; 3352 } 3353 c->ev->base = base; 3354 c->fd = fd; 3355 c->buffer = buffer; 3356 c->timeout = NULL; 3357 c->tcp_is_reading = 0; 3358 c->tcp_byte_count = 0; 3359 c->tcp_parent = NULL; 3360 c->max_tcp_count = 0; 3361 c->cur_tcp_count = 0; 3362 c->tcp_handlers = NULL; 3363 c->tcp_free = NULL; 3364 c->type = comm_udp; 3365 c->tcp_do_close = 0; 3366 c->do_not_close = 0; 3367 #ifdef USE_DNSCRYPT 3368 c->dnscrypt = 0; 3369 c->dnscrypt_buffer = buffer; 3370 #endif 3371 c->inuse = 0; 3372 c->tcp_do_toggle_rw = 0; 3373 c->tcp_check_nb_connect = 0; 3374 #ifdef USE_MSG_FASTOPEN 3375 c->tcp_do_fastopen = 0; 3376 #endif 3377 c->callback = callback; 3378 c->cb_arg = callback_arg; 3379 c->socket = socket; 3380 evbits = UB_EV_READ | UB_EV_PERSIST; 3381 /* ub_event stuff */ 3382 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3383 comm_point_udp_ancil_callback, c); 3384 if(c->ev->ev == NULL) { 3385 log_err("could not baseset udp event"); 3386 comm_point_delete(c); 3387 return NULL; 3388 } 3389 if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) { 3390 log_err("could not add udp event"); 3391 comm_point_delete(c); 3392 return NULL; 3393 } 3394 c->event_added = 1; 3395 return c; 3396 } 3397 3398 static struct comm_point* 3399 comm_point_create_tcp_handler(struct comm_base *base, 3400 struct comm_point* parent, size_t bufsize, 3401 struct sldns_buffer* spoolbuf, comm_point_callback_type* callback, 3402 void* callback_arg, struct unbound_socket* socket) 3403 { 3404 struct comm_point* c = (struct comm_point*)calloc(1, 3405 sizeof(struct comm_point)); 3406 short evbits; 3407 if(!c) 3408 return NULL; 3409 c->ev = (struct internal_event*)calloc(1, 3410 sizeof(struct internal_event)); 3411 if(!c->ev) { 3412 free(c); 3413 return NULL; 3414 } 3415 c->ev->base = base; 3416 c->fd = -1; 3417 c->buffer = sldns_buffer_new(bufsize); 3418 if(!c->buffer) { 3419 free(c->ev); 3420 free(c); 3421 return NULL; 3422 } 3423 c->timeout = (struct timeval*)malloc(sizeof(struct timeval)); 3424 if(!c->timeout) { 3425 sldns_buffer_free(c->buffer); 3426 free(c->ev); 3427 free(c); 3428 return NULL; 3429 } 3430 c->tcp_is_reading = 0; 3431 c->tcp_byte_count = 0; 3432 c->tcp_parent = parent; 3433 c->tcp_timeout_msec = parent->tcp_timeout_msec; 3434 c->tcp_conn_limit = parent->tcp_conn_limit; 3435 c->tcl_addr = NULL; 3436 c->tcp_keepalive = 0; 3437 c->max_tcp_count = 0; 3438 c->cur_tcp_count = 0; 3439 c->tcp_handlers = NULL; 3440 c->tcp_free = NULL; 3441 c->type = comm_tcp; 3442 c->tcp_do_close = 0; 3443 c->do_not_close = 0; 3444 c->tcp_do_toggle_rw = 1; 3445 c->tcp_check_nb_connect = 0; 3446 #ifdef USE_MSG_FASTOPEN 3447 c->tcp_do_fastopen = 0; 3448 #endif 3449 #ifdef USE_DNSCRYPT 3450 c->dnscrypt = 0; 3451 /* We don't know just yet if this is a dnscrypt channel. Allocation 3452 * will be done when handling the callback. */ 3453 c->dnscrypt_buffer = c->buffer; 3454 #endif 3455 c->repinfo.c = c; 3456 c->callback = callback; 3457 c->cb_arg = callback_arg; 3458 c->socket = socket; 3459 if(spoolbuf) { 3460 c->tcp_req_info = tcp_req_info_create(spoolbuf); 3461 if(!c->tcp_req_info) { 3462 log_err("could not create tcp commpoint"); 3463 sldns_buffer_free(c->buffer); 3464 free(c->timeout); 3465 free(c->ev); 3466 free(c); 3467 return NULL; 3468 } 3469 c->tcp_req_info->cp = c; 3470 c->tcp_do_close = 1; 3471 c->tcp_do_toggle_rw = 0; 3472 } 3473 /* add to parent free list */ 3474 c->tcp_free = parent->tcp_free; 3475 parent->tcp_free = c; 3476 /* ub_event stuff */ 3477 evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT; 3478 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3479 comm_point_tcp_handle_callback, c); 3480 if(c->ev->ev == NULL) 3481 { 3482 log_err("could not basetset tcphdl event"); 3483 parent->tcp_free = c->tcp_free; 3484 tcp_req_info_delete(c->tcp_req_info); 3485 sldns_buffer_free(c->buffer); 3486 free(c->timeout); 3487 free(c->ev); 3488 free(c); 3489 return NULL; 3490 } 3491 return c; 3492 } 3493 3494 static struct comm_point* 3495 comm_point_create_http_handler(struct comm_base *base, 3496 struct comm_point* parent, size_t bufsize, int harden_large_queries, 3497 uint32_t http_max_streams, char* http_endpoint, 3498 comm_point_callback_type* callback, void* callback_arg, 3499 struct unbound_socket* socket) 3500 { 3501 struct comm_point* c = (struct comm_point*)calloc(1, 3502 sizeof(struct comm_point)); 3503 short evbits; 3504 if(!c) 3505 return NULL; 3506 c->ev = (struct internal_event*)calloc(1, 3507 sizeof(struct internal_event)); 3508 if(!c->ev) { 3509 free(c); 3510 return NULL; 3511 } 3512 c->ev->base = base; 3513 c->fd = -1; 3514 c->buffer = sldns_buffer_new(bufsize); 3515 if(!c->buffer) { 3516 free(c->ev); 3517 free(c); 3518 return NULL; 3519 } 3520 c->timeout = (struct timeval*)malloc(sizeof(struct timeval)); 3521 if(!c->timeout) { 3522 sldns_buffer_free(c->buffer); 3523 free(c->ev); 3524 free(c); 3525 return NULL; 3526 } 3527 c->tcp_is_reading = 0; 3528 c->tcp_byte_count = 0; 3529 c->tcp_parent = parent; 3530 c->tcp_timeout_msec = parent->tcp_timeout_msec; 3531 c->tcp_conn_limit = parent->tcp_conn_limit; 3532 c->tcl_addr = NULL; 3533 c->tcp_keepalive = 0; 3534 c->max_tcp_count = 0; 3535 c->cur_tcp_count = 0; 3536 c->tcp_handlers = NULL; 3537 c->tcp_free = NULL; 3538 c->type = comm_http; 3539 c->tcp_do_close = 1; 3540 c->do_not_close = 0; 3541 c->tcp_do_toggle_rw = 1; /* will be set to 0 after http2 upgrade */ 3542 c->tcp_check_nb_connect = 0; 3543 #ifdef USE_MSG_FASTOPEN 3544 c->tcp_do_fastopen = 0; 3545 #endif 3546 #ifdef USE_DNSCRYPT 3547 c->dnscrypt = 0; 3548 c->dnscrypt_buffer = NULL; 3549 #endif 3550 c->repinfo.c = c; 3551 c->callback = callback; 3552 c->cb_arg = callback_arg; 3553 c->socket = socket; 3554 3555 c->http_min_version = http_version_2; 3556 c->http2_stream_max_qbuffer_size = bufsize; 3557 if(harden_large_queries && bufsize > 512) 3558 c->http2_stream_max_qbuffer_size = 512; 3559 c->http2_max_streams = http_max_streams; 3560 if(!(c->http_endpoint = strdup(http_endpoint))) { 3561 log_err("could not strdup http_endpoint"); 3562 sldns_buffer_free(c->buffer); 3563 free(c->timeout); 3564 free(c->ev); 3565 free(c); 3566 return NULL; 3567 } 3568 c->use_h2 = 0; 3569 #ifdef HAVE_NGHTTP2 3570 if(!(c->h2_session = http2_session_create(c))) { 3571 log_err("could not create http2 session"); 3572 free(c->http_endpoint); 3573 sldns_buffer_free(c->buffer); 3574 free(c->timeout); 3575 free(c->ev); 3576 free(c); 3577 return NULL; 3578 } 3579 if(!(c->h2_session->callbacks = http2_req_callbacks_create())) { 3580 log_err("could not create http2 callbacks"); 3581 http2_session_delete(c->h2_session); 3582 free(c->http_endpoint); 3583 sldns_buffer_free(c->buffer); 3584 free(c->timeout); 3585 free(c->ev); 3586 free(c); 3587 return NULL; 3588 } 3589 #endif 3590 3591 /* add to parent free list */ 3592 c->tcp_free = parent->tcp_free; 3593 parent->tcp_free = c; 3594 /* ub_event stuff */ 3595 evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT; 3596 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3597 comm_point_http_handle_callback, c); 3598 if(c->ev->ev == NULL) 3599 { 3600 log_err("could not set http handler event"); 3601 parent->tcp_free = c->tcp_free; 3602 http2_session_delete(c->h2_session); 3603 sldns_buffer_free(c->buffer); 3604 free(c->timeout); 3605 free(c->ev); 3606 free(c); 3607 return NULL; 3608 } 3609 return c; 3610 } 3611 3612 struct comm_point* 3613 comm_point_create_tcp(struct comm_base *base, int fd, int num, 3614 int idle_timeout, int harden_large_queries, 3615 uint32_t http_max_streams, char* http_endpoint, 3616 struct tcl_list* tcp_conn_limit, size_t bufsize, 3617 struct sldns_buffer* spoolbuf, enum listen_type port_type, 3618 comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket) 3619 { 3620 struct comm_point* c = (struct comm_point*)calloc(1, 3621 sizeof(struct comm_point)); 3622 short evbits; 3623 int i; 3624 /* first allocate the TCP accept listener */ 3625 if(!c) 3626 return NULL; 3627 c->ev = (struct internal_event*)calloc(1, 3628 sizeof(struct internal_event)); 3629 if(!c->ev) { 3630 free(c); 3631 return NULL; 3632 } 3633 c->ev->base = base; 3634 c->fd = fd; 3635 c->buffer = NULL; 3636 c->timeout = NULL; 3637 c->tcp_is_reading = 0; 3638 c->tcp_byte_count = 0; 3639 c->tcp_timeout_msec = idle_timeout; 3640 c->tcp_conn_limit = tcp_conn_limit; 3641 c->tcl_addr = NULL; 3642 c->tcp_keepalive = 0; 3643 c->tcp_parent = NULL; 3644 c->max_tcp_count = num; 3645 c->cur_tcp_count = 0; 3646 c->tcp_handlers = (struct comm_point**)calloc((size_t)num, 3647 sizeof(struct comm_point*)); 3648 if(!c->tcp_handlers) { 3649 free(c->ev); 3650 free(c); 3651 return NULL; 3652 } 3653 c->tcp_free = NULL; 3654 c->type = comm_tcp_accept; 3655 c->tcp_do_close = 0; 3656 c->do_not_close = 0; 3657 c->tcp_do_toggle_rw = 0; 3658 c->tcp_check_nb_connect = 0; 3659 #ifdef USE_MSG_FASTOPEN 3660 c->tcp_do_fastopen = 0; 3661 #endif 3662 #ifdef USE_DNSCRYPT 3663 c->dnscrypt = 0; 3664 c->dnscrypt_buffer = NULL; 3665 #endif 3666 c->callback = NULL; 3667 c->cb_arg = NULL; 3668 c->socket = socket; 3669 evbits = UB_EV_READ | UB_EV_PERSIST; 3670 /* ub_event stuff */ 3671 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3672 comm_point_tcp_accept_callback, c); 3673 if(c->ev->ev == NULL) { 3674 log_err("could not baseset tcpacc event"); 3675 comm_point_delete(c); 3676 return NULL; 3677 } 3678 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 3679 log_err("could not add tcpacc event"); 3680 comm_point_delete(c); 3681 return NULL; 3682 } 3683 c->event_added = 1; 3684 /* now prealloc the handlers */ 3685 for(i=0; i<num; i++) { 3686 if(port_type == listen_type_tcp || 3687 port_type == listen_type_ssl || 3688 port_type == listen_type_tcp_dnscrypt) { 3689 c->tcp_handlers[i] = comm_point_create_tcp_handler(base, 3690 c, bufsize, spoolbuf, callback, callback_arg, socket); 3691 } else if(port_type == listen_type_http) { 3692 c->tcp_handlers[i] = comm_point_create_http_handler( 3693 base, c, bufsize, harden_large_queries, 3694 http_max_streams, http_endpoint, 3695 callback, callback_arg, socket); 3696 } 3697 else { 3698 log_err("could not create tcp handler, unknown listen " 3699 "type"); 3700 return NULL; 3701 } 3702 if(!c->tcp_handlers[i]) { 3703 comm_point_delete(c); 3704 return NULL; 3705 } 3706 } 3707 3708 return c; 3709 } 3710 3711 struct comm_point* 3712 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize, 3713 comm_point_callback_type* callback, void* callback_arg) 3714 { 3715 struct comm_point* c = (struct comm_point*)calloc(1, 3716 sizeof(struct comm_point)); 3717 short evbits; 3718 if(!c) 3719 return NULL; 3720 c->ev = (struct internal_event*)calloc(1, 3721 sizeof(struct internal_event)); 3722 if(!c->ev) { 3723 free(c); 3724 return NULL; 3725 } 3726 c->ev->base = base; 3727 c->fd = -1; 3728 c->buffer = sldns_buffer_new(bufsize); 3729 if(!c->buffer) { 3730 free(c->ev); 3731 free(c); 3732 return NULL; 3733 } 3734 c->timeout = NULL; 3735 c->tcp_is_reading = 0; 3736 c->tcp_byte_count = 0; 3737 c->tcp_timeout_msec = TCP_QUERY_TIMEOUT; 3738 c->tcp_conn_limit = NULL; 3739 c->tcl_addr = NULL; 3740 c->tcp_keepalive = 0; 3741 c->tcp_parent = NULL; 3742 c->max_tcp_count = 0; 3743 c->cur_tcp_count = 0; 3744 c->tcp_handlers = NULL; 3745 c->tcp_free = NULL; 3746 c->type = comm_tcp; 3747 c->tcp_do_close = 0; 3748 c->do_not_close = 0; 3749 c->tcp_do_toggle_rw = 1; 3750 c->tcp_check_nb_connect = 1; 3751 #ifdef USE_MSG_FASTOPEN 3752 c->tcp_do_fastopen = 1; 3753 #endif 3754 #ifdef USE_DNSCRYPT 3755 c->dnscrypt = 0; 3756 c->dnscrypt_buffer = c->buffer; 3757 #endif 3758 c->repinfo.c = c; 3759 c->callback = callback; 3760 c->cb_arg = callback_arg; 3761 evbits = UB_EV_PERSIST | UB_EV_WRITE; 3762 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3763 comm_point_tcp_handle_callback, c); 3764 if(c->ev->ev == NULL) 3765 { 3766 log_err("could not baseset tcpout event"); 3767 sldns_buffer_free(c->buffer); 3768 free(c->ev); 3769 free(c); 3770 return NULL; 3771 } 3772 3773 return c; 3774 } 3775 3776 struct comm_point* 3777 comm_point_create_http_out(struct comm_base *base, size_t bufsize, 3778 comm_point_callback_type* callback, void* callback_arg, 3779 sldns_buffer* temp) 3780 { 3781 struct comm_point* c = (struct comm_point*)calloc(1, 3782 sizeof(struct comm_point)); 3783 short evbits; 3784 if(!c) 3785 return NULL; 3786 c->ev = (struct internal_event*)calloc(1, 3787 sizeof(struct internal_event)); 3788 if(!c->ev) { 3789 free(c); 3790 return NULL; 3791 } 3792 c->ev->base = base; 3793 c->fd = -1; 3794 c->buffer = sldns_buffer_new(bufsize); 3795 if(!c->buffer) { 3796 free(c->ev); 3797 free(c); 3798 return NULL; 3799 } 3800 c->timeout = NULL; 3801 c->tcp_is_reading = 0; 3802 c->tcp_byte_count = 0; 3803 c->tcp_parent = NULL; 3804 c->max_tcp_count = 0; 3805 c->cur_tcp_count = 0; 3806 c->tcp_handlers = NULL; 3807 c->tcp_free = NULL; 3808 c->type = comm_http; 3809 c->tcp_do_close = 0; 3810 c->do_not_close = 0; 3811 c->tcp_do_toggle_rw = 1; 3812 c->tcp_check_nb_connect = 1; 3813 c->http_in_headers = 1; 3814 c->http_in_chunk_headers = 0; 3815 c->http_is_chunked = 0; 3816 c->http_temp = temp; 3817 #ifdef USE_MSG_FASTOPEN 3818 c->tcp_do_fastopen = 1; 3819 #endif 3820 #ifdef USE_DNSCRYPT 3821 c->dnscrypt = 0; 3822 c->dnscrypt_buffer = c->buffer; 3823 #endif 3824 c->repinfo.c = c; 3825 c->callback = callback; 3826 c->cb_arg = callback_arg; 3827 evbits = UB_EV_PERSIST | UB_EV_WRITE; 3828 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3829 comm_point_http_handle_callback, c); 3830 if(c->ev->ev == NULL) 3831 { 3832 log_err("could not baseset tcpout event"); 3833 #ifdef HAVE_SSL 3834 SSL_free(c->ssl); 3835 #endif 3836 sldns_buffer_free(c->buffer); 3837 free(c->ev); 3838 free(c); 3839 return NULL; 3840 } 3841 3842 return c; 3843 } 3844 3845 struct comm_point* 3846 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize, 3847 comm_point_callback_type* callback, void* callback_arg) 3848 { 3849 struct comm_point* c = (struct comm_point*)calloc(1, 3850 sizeof(struct comm_point)); 3851 short evbits; 3852 if(!c) 3853 return NULL; 3854 c->ev = (struct internal_event*)calloc(1, 3855 sizeof(struct internal_event)); 3856 if(!c->ev) { 3857 free(c); 3858 return NULL; 3859 } 3860 c->ev->base = base; 3861 c->fd = fd; 3862 c->buffer = sldns_buffer_new(bufsize); 3863 if(!c->buffer) { 3864 free(c->ev); 3865 free(c); 3866 return NULL; 3867 } 3868 c->timeout = NULL; 3869 c->tcp_is_reading = 1; 3870 c->tcp_byte_count = 0; 3871 c->tcp_parent = NULL; 3872 c->max_tcp_count = 0; 3873 c->cur_tcp_count = 0; 3874 c->tcp_handlers = NULL; 3875 c->tcp_free = NULL; 3876 c->type = comm_local; 3877 c->tcp_do_close = 0; 3878 c->do_not_close = 1; 3879 c->tcp_do_toggle_rw = 0; 3880 c->tcp_check_nb_connect = 0; 3881 #ifdef USE_MSG_FASTOPEN 3882 c->tcp_do_fastopen = 0; 3883 #endif 3884 #ifdef USE_DNSCRYPT 3885 c->dnscrypt = 0; 3886 c->dnscrypt_buffer = c->buffer; 3887 #endif 3888 c->callback = callback; 3889 c->cb_arg = callback_arg; 3890 /* ub_event stuff */ 3891 evbits = UB_EV_PERSIST | UB_EV_READ; 3892 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3893 comm_point_local_handle_callback, c); 3894 if(c->ev->ev == NULL) { 3895 log_err("could not baseset localhdl event"); 3896 free(c->ev); 3897 free(c); 3898 return NULL; 3899 } 3900 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 3901 log_err("could not add localhdl event"); 3902 ub_event_free(c->ev->ev); 3903 free(c->ev); 3904 free(c); 3905 return NULL; 3906 } 3907 c->event_added = 1; 3908 return c; 3909 } 3910 3911 struct comm_point* 3912 comm_point_create_raw(struct comm_base* base, int fd, int writing, 3913 comm_point_callback_type* callback, void* callback_arg) 3914 { 3915 struct comm_point* c = (struct comm_point*)calloc(1, 3916 sizeof(struct comm_point)); 3917 short evbits; 3918 if(!c) 3919 return NULL; 3920 c->ev = (struct internal_event*)calloc(1, 3921 sizeof(struct internal_event)); 3922 if(!c->ev) { 3923 free(c); 3924 return NULL; 3925 } 3926 c->ev->base = base; 3927 c->fd = fd; 3928 c->buffer = NULL; 3929 c->timeout = NULL; 3930 c->tcp_is_reading = 0; 3931 c->tcp_byte_count = 0; 3932 c->tcp_parent = NULL; 3933 c->max_tcp_count = 0; 3934 c->cur_tcp_count = 0; 3935 c->tcp_handlers = NULL; 3936 c->tcp_free = NULL; 3937 c->type = comm_raw; 3938 c->tcp_do_close = 0; 3939 c->do_not_close = 1; 3940 c->tcp_do_toggle_rw = 0; 3941 c->tcp_check_nb_connect = 0; 3942 #ifdef USE_MSG_FASTOPEN 3943 c->tcp_do_fastopen = 0; 3944 #endif 3945 #ifdef USE_DNSCRYPT 3946 c->dnscrypt = 0; 3947 c->dnscrypt_buffer = c->buffer; 3948 #endif 3949 c->callback = callback; 3950 c->cb_arg = callback_arg; 3951 /* ub_event stuff */ 3952 if(writing) 3953 evbits = UB_EV_PERSIST | UB_EV_WRITE; 3954 else evbits = UB_EV_PERSIST | UB_EV_READ; 3955 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3956 comm_point_raw_handle_callback, c); 3957 if(c->ev->ev == NULL) { 3958 log_err("could not baseset rawhdl event"); 3959 free(c->ev); 3960 free(c); 3961 return NULL; 3962 } 3963 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 3964 log_err("could not add rawhdl event"); 3965 ub_event_free(c->ev->ev); 3966 free(c->ev); 3967 free(c); 3968 return NULL; 3969 } 3970 c->event_added = 1; 3971 return c; 3972 } 3973 3974 void 3975 comm_point_close(struct comm_point* c) 3976 { 3977 if(!c) 3978 return; 3979 if(c->fd != -1) { 3980 verbose(5, "comm_point_close of %d: event_del", c->fd); 3981 if(c->event_added) { 3982 if(ub_event_del(c->ev->ev) != 0) { 3983 log_err("could not event_del on close"); 3984 } 3985 c->event_added = 0; 3986 } 3987 } 3988 tcl_close_connection(c->tcl_addr); 3989 if(c->tcp_req_info) 3990 tcp_req_info_clear(c->tcp_req_info); 3991 if(c->h2_session) 3992 http2_session_server_delete(c->h2_session); 3993 3994 /* close fd after removing from event lists, or epoll.. is messed up */ 3995 if(c->fd != -1 && !c->do_not_close) { 3996 #ifdef USE_WINSOCK 3997 if(c->type == comm_tcp || c->type == comm_http) { 3998 /* delete sticky events for the fd, it gets closed */ 3999 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 4000 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 4001 } 4002 #endif 4003 verbose(VERB_ALGO, "close fd %d", c->fd); 4004 sock_close(c->fd); 4005 } 4006 c->fd = -1; 4007 } 4008 4009 void 4010 comm_point_delete(struct comm_point* c) 4011 { 4012 if(!c) 4013 return; 4014 if((c->type == comm_tcp || c->type == comm_http) && c->ssl) { 4015 #ifdef HAVE_SSL 4016 SSL_shutdown(c->ssl); 4017 SSL_free(c->ssl); 4018 #endif 4019 } 4020 if(c->type == comm_http && c->http_endpoint) { 4021 free(c->http_endpoint); 4022 c->http_endpoint = NULL; 4023 } 4024 comm_point_close(c); 4025 if(c->tcp_handlers) { 4026 int i; 4027 for(i=0; i<c->max_tcp_count; i++) 4028 comm_point_delete(c->tcp_handlers[i]); 4029 free(c->tcp_handlers); 4030 } 4031 free(c->timeout); 4032 if(c->type == comm_tcp || c->type == comm_local || c->type == comm_http) { 4033 sldns_buffer_free(c->buffer); 4034 #ifdef USE_DNSCRYPT 4035 if(c->dnscrypt && c->dnscrypt_buffer != c->buffer) { 4036 sldns_buffer_free(c->dnscrypt_buffer); 4037 } 4038 #endif 4039 if(c->tcp_req_info) { 4040 tcp_req_info_delete(c->tcp_req_info); 4041 } 4042 if(c->h2_session) { 4043 http2_session_delete(c->h2_session); 4044 } 4045 } 4046 ub_event_free(c->ev->ev); 4047 free(c->ev); 4048 free(c); 4049 } 4050 4051 void 4052 comm_point_send_reply(struct comm_reply *repinfo) 4053 { 4054 struct sldns_buffer* buffer; 4055 log_assert(repinfo && repinfo->c); 4056 #ifdef USE_DNSCRYPT 4057 buffer = repinfo->c->dnscrypt_buffer; 4058 if(!dnsc_handle_uncurved_request(repinfo)) { 4059 return; 4060 } 4061 #else 4062 buffer = repinfo->c->buffer; 4063 #endif 4064 if(repinfo->c->type == comm_udp) { 4065 if(repinfo->srctype) 4066 comm_point_send_udp_msg_if(repinfo->c, 4067 buffer, (struct sockaddr*)&repinfo->addr, 4068 repinfo->addrlen, repinfo); 4069 else 4070 comm_point_send_udp_msg(repinfo->c, buffer, 4071 (struct sockaddr*)&repinfo->addr, repinfo->addrlen, 0); 4072 #ifdef USE_DNSTAP 4073 /* 4074 * sending src (client)/dst (local service) addresses over DNSTAP from udp callback 4075 */ 4076 if(repinfo->c->dtenv != NULL && repinfo->c->dtenv->log_client_response_messages) { 4077 log_addr(VERB_ALGO, "from local addr", (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->socket->addr->ai_addrlen); 4078 log_addr(VERB_ALGO, "response to client", &repinfo->addr, repinfo->addrlen); 4079 dt_msg_send_client_response(repinfo->c->dtenv, &repinfo->addr, (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->type, repinfo->c->buffer); 4080 } 4081 #endif 4082 } else { 4083 #ifdef USE_DNSTAP 4084 /* 4085 * sending src (client)/dst (local service) addresses over DNSTAP from TCP callback 4086 */ 4087 if(repinfo->c->tcp_parent->dtenv != NULL && repinfo->c->tcp_parent->dtenv->log_client_response_messages) { 4088 log_addr(VERB_ALGO, "from local addr", (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->socket->addr->ai_addrlen); 4089 log_addr(VERB_ALGO, "response to client", &repinfo->addr, repinfo->addrlen); 4090 dt_msg_send_client_response(repinfo->c->tcp_parent->dtenv, &repinfo->addr, (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->type, 4091 ( repinfo->c->tcp_req_info? repinfo->c->tcp_req_info->spool_buffer: repinfo->c->buffer )); 4092 } 4093 #endif 4094 if(repinfo->c->tcp_req_info) { 4095 tcp_req_info_send_reply(repinfo->c->tcp_req_info); 4096 } else if(repinfo->c->use_h2) { 4097 if(!http2_submit_dns_response(repinfo->c->h2_session)) { 4098 comm_point_drop_reply(repinfo); 4099 return; 4100 } 4101 repinfo->c->h2_stream = NULL; 4102 repinfo->c->tcp_is_reading = 0; 4103 comm_point_stop_listening(repinfo->c); 4104 comm_point_start_listening(repinfo->c, -1, 4105 adjusted_tcp_timeout(repinfo->c)); 4106 return; 4107 } else { 4108 comm_point_start_listening(repinfo->c, -1, 4109 adjusted_tcp_timeout(repinfo->c)); 4110 } 4111 } 4112 } 4113 4114 void 4115 comm_point_drop_reply(struct comm_reply* repinfo) 4116 { 4117 if(!repinfo) 4118 return; 4119 log_assert(repinfo->c); 4120 log_assert(repinfo->c->type != comm_tcp_accept); 4121 if(repinfo->c->type == comm_udp) 4122 return; 4123 if(repinfo->c->tcp_req_info) 4124 repinfo->c->tcp_req_info->is_drop = 1; 4125 if(repinfo->c->type == comm_http) { 4126 if(repinfo->c->h2_session) { 4127 repinfo->c->h2_session->is_drop = 1; 4128 if(!repinfo->c->h2_session->postpone_drop) 4129 reclaim_http_handler(repinfo->c); 4130 return; 4131 } 4132 reclaim_http_handler(repinfo->c); 4133 return; 4134 } 4135 reclaim_tcp_handler(repinfo->c); 4136 } 4137 4138 void 4139 comm_point_stop_listening(struct comm_point* c) 4140 { 4141 verbose(VERB_ALGO, "comm point stop listening %d", c->fd); 4142 if(c->event_added) { 4143 if(ub_event_del(c->ev->ev) != 0) { 4144 log_err("event_del error to stoplisten"); 4145 } 4146 c->event_added = 0; 4147 } 4148 } 4149 4150 void 4151 comm_point_start_listening(struct comm_point* c, int newfd, int msec) 4152 { 4153 verbose(VERB_ALGO, "comm point start listening %d (%d msec)", 4154 c->fd==-1?newfd:c->fd, msec); 4155 if(c->type == comm_tcp_accept && !c->tcp_free) { 4156 /* no use to start listening no free slots. */ 4157 return; 4158 } 4159 if(c->event_added) { 4160 if(ub_event_del(c->ev->ev) != 0) { 4161 log_err("event_del error to startlisten"); 4162 } 4163 c->event_added = 0; 4164 } 4165 if(msec != -1 && msec != 0) { 4166 if(!c->timeout) { 4167 c->timeout = (struct timeval*)malloc(sizeof( 4168 struct timeval)); 4169 if(!c->timeout) { 4170 log_err("cpsl: malloc failed. No net read."); 4171 return; 4172 } 4173 } 4174 ub_event_add_bits(c->ev->ev, UB_EV_TIMEOUT); 4175 #ifndef S_SPLINT_S /* splint fails on struct timeval. */ 4176 c->timeout->tv_sec = msec/1000; 4177 c->timeout->tv_usec = (msec%1000)*1000; 4178 #endif /* S_SPLINT_S */ 4179 } else { 4180 if(msec == 0 || !c->timeout) { 4181 ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT); 4182 } 4183 } 4184 if(c->type == comm_tcp || c->type == comm_http) { 4185 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4186 if(c->tcp_write_and_read) { 4187 verbose(5, "startlistening %d mode rw", (newfd==-1?c->fd:newfd)); 4188 ub_event_add_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4189 } else if(c->tcp_is_reading) { 4190 verbose(5, "startlistening %d mode r", (newfd==-1?c->fd:newfd)); 4191 ub_event_add_bits(c->ev->ev, UB_EV_READ); 4192 } else { 4193 verbose(5, "startlistening %d mode w", (newfd==-1?c->fd:newfd)); 4194 ub_event_add_bits(c->ev->ev, UB_EV_WRITE); 4195 } 4196 } 4197 if(newfd != -1) { 4198 if(c->fd != -1 && c->fd != newfd) { 4199 verbose(5, "cpsl close of fd %d for %d", c->fd, newfd); 4200 sock_close(c->fd); 4201 } 4202 c->fd = newfd; 4203 ub_event_set_fd(c->ev->ev, c->fd); 4204 } 4205 if(ub_event_add(c->ev->ev, msec==0?NULL:c->timeout) != 0) { 4206 log_err("event_add failed. in cpsl."); 4207 return; 4208 } 4209 c->event_added = 1; 4210 } 4211 4212 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr) 4213 { 4214 verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr); 4215 if(c->event_added) { 4216 if(ub_event_del(c->ev->ev) != 0) { 4217 log_err("event_del error to cplf"); 4218 } 4219 c->event_added = 0; 4220 } 4221 if(!c->timeout) { 4222 ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT); 4223 } 4224 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4225 if(rd) ub_event_add_bits(c->ev->ev, UB_EV_READ); 4226 if(wr) ub_event_add_bits(c->ev->ev, UB_EV_WRITE); 4227 if(ub_event_add(c->ev->ev, c->timeout) != 0) { 4228 log_err("event_add failed. in cplf."); 4229 return; 4230 } 4231 c->event_added = 1; 4232 } 4233 4234 size_t comm_point_get_mem(struct comm_point* c) 4235 { 4236 size_t s; 4237 if(!c) 4238 return 0; 4239 s = sizeof(*c) + sizeof(*c->ev); 4240 if(c->timeout) 4241 s += sizeof(*c->timeout); 4242 if(c->type == comm_tcp || c->type == comm_local) { 4243 s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer); 4244 #ifdef USE_DNSCRYPT 4245 s += sizeof(*c->dnscrypt_buffer); 4246 if(c->buffer != c->dnscrypt_buffer) { 4247 s += sldns_buffer_capacity(c->dnscrypt_buffer); 4248 } 4249 #endif 4250 } 4251 if(c->type == comm_tcp_accept) { 4252 int i; 4253 for(i=0; i<c->max_tcp_count; i++) 4254 s += comm_point_get_mem(c->tcp_handlers[i]); 4255 } 4256 return s; 4257 } 4258 4259 struct comm_timer* 4260 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg) 4261 { 4262 struct internal_timer *tm = (struct internal_timer*)calloc(1, 4263 sizeof(struct internal_timer)); 4264 if(!tm) { 4265 log_err("malloc failed"); 4266 return NULL; 4267 } 4268 tm->super.ev_timer = tm; 4269 tm->base = base; 4270 tm->super.callback = cb; 4271 tm->super.cb_arg = cb_arg; 4272 tm->ev = ub_event_new(base->eb->base, -1, UB_EV_TIMEOUT, 4273 comm_timer_callback, &tm->super); 4274 if(tm->ev == NULL) { 4275 log_err("timer_create: event_base_set failed."); 4276 free(tm); 4277 return NULL; 4278 } 4279 return &tm->super; 4280 } 4281 4282 void 4283 comm_timer_disable(struct comm_timer* timer) 4284 { 4285 if(!timer) 4286 return; 4287 ub_timer_del(timer->ev_timer->ev); 4288 timer->ev_timer->enabled = 0; 4289 } 4290 4291 void 4292 comm_timer_set(struct comm_timer* timer, struct timeval* tv) 4293 { 4294 log_assert(tv); 4295 if(timer->ev_timer->enabled) 4296 comm_timer_disable(timer); 4297 if(ub_timer_add(timer->ev_timer->ev, timer->ev_timer->base->eb->base, 4298 comm_timer_callback, timer, tv) != 0) 4299 log_err("comm_timer_set: evtimer_add failed."); 4300 timer->ev_timer->enabled = 1; 4301 } 4302 4303 void 4304 comm_timer_delete(struct comm_timer* timer) 4305 { 4306 if(!timer) 4307 return; 4308 comm_timer_disable(timer); 4309 /* Free the sub struct timer->ev_timer derived from the super struct timer. 4310 * i.e. assert(timer == timer->ev_timer) 4311 */ 4312 ub_event_free(timer->ev_timer->ev); 4313 free(timer->ev_timer); 4314 } 4315 4316 void 4317 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg) 4318 { 4319 struct comm_timer* tm = (struct comm_timer*)arg; 4320 if(!(event&UB_EV_TIMEOUT)) 4321 return; 4322 ub_comm_base_now(tm->ev_timer->base); 4323 tm->ev_timer->enabled = 0; 4324 fptr_ok(fptr_whitelist_comm_timer(tm->callback)); 4325 (*tm->callback)(tm->cb_arg); 4326 } 4327 4328 int 4329 comm_timer_is_set(struct comm_timer* timer) 4330 { 4331 return (int)timer->ev_timer->enabled; 4332 } 4333 4334 size_t 4335 comm_timer_get_mem(struct comm_timer* ATTR_UNUSED(timer)) 4336 { 4337 return sizeof(struct internal_timer); 4338 } 4339 4340 struct comm_signal* 4341 comm_signal_create(struct comm_base* base, 4342 void (*callback)(int, void*), void* cb_arg) 4343 { 4344 struct comm_signal* com = (struct comm_signal*)malloc( 4345 sizeof(struct comm_signal)); 4346 if(!com) { 4347 log_err("malloc failed"); 4348 return NULL; 4349 } 4350 com->base = base; 4351 com->callback = callback; 4352 com->cb_arg = cb_arg; 4353 com->ev_signal = NULL; 4354 return com; 4355 } 4356 4357 void 4358 comm_signal_callback(int sig, short event, void* arg) 4359 { 4360 struct comm_signal* comsig = (struct comm_signal*)arg; 4361 if(!(event & UB_EV_SIGNAL)) 4362 return; 4363 ub_comm_base_now(comsig->base); 4364 fptr_ok(fptr_whitelist_comm_signal(comsig->callback)); 4365 (*comsig->callback)(sig, comsig->cb_arg); 4366 } 4367 4368 int 4369 comm_signal_bind(struct comm_signal* comsig, int sig) 4370 { 4371 struct internal_signal* entry = (struct internal_signal*)calloc(1, 4372 sizeof(struct internal_signal)); 4373 if(!entry) { 4374 log_err("malloc failed"); 4375 return 0; 4376 } 4377 log_assert(comsig); 4378 /* add signal event */ 4379 entry->ev = ub_signal_new(comsig->base->eb->base, sig, 4380 comm_signal_callback, comsig); 4381 if(entry->ev == NULL) { 4382 log_err("Could not create signal event"); 4383 free(entry); 4384 return 0; 4385 } 4386 if(ub_signal_add(entry->ev, NULL) != 0) { 4387 log_err("Could not add signal handler"); 4388 ub_event_free(entry->ev); 4389 free(entry); 4390 return 0; 4391 } 4392 /* link into list */ 4393 entry->next = comsig->ev_signal; 4394 comsig->ev_signal = entry; 4395 return 1; 4396 } 4397 4398 void 4399 comm_signal_delete(struct comm_signal* comsig) 4400 { 4401 struct internal_signal* p, *np; 4402 if(!comsig) 4403 return; 4404 p=comsig->ev_signal; 4405 while(p) { 4406 np = p->next; 4407 ub_signal_del(p->ev); 4408 ub_event_free(p->ev); 4409 free(p); 4410 p = np; 4411 } 4412 free(comsig); 4413 } 4414