1 /* 2 * util/netevent.c - event notification 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file contains event notification functions. 40 */ 41 #include "config.h" 42 #include "util/netevent.h" 43 #include "util/ub_event.h" 44 #include "util/log.h" 45 #include "util/net_help.h" 46 #include "util/tcp_conn_limit.h" 47 #include "util/fptr_wlist.h" 48 #include "sldns/pkthdr.h" 49 #include "sldns/sbuffer.h" 50 #include "sldns/str2wire.h" 51 #include "dnstap/dnstap.h" 52 #include "dnscrypt/dnscrypt.h" 53 #include "services/listen_dnsport.h" 54 #ifdef HAVE_SYS_TYPES_H 55 #include <sys/types.h> 56 #endif 57 #ifdef HAVE_SYS_SOCKET_H 58 #include <sys/socket.h> 59 #endif 60 #ifdef HAVE_NETDB_H 61 #include <netdb.h> 62 #endif 63 64 #ifdef HAVE_OPENSSL_SSL_H 65 #include <openssl/ssl.h> 66 #endif 67 #ifdef HAVE_OPENSSL_ERR_H 68 #include <openssl/err.h> 69 #endif 70 71 /* -------- Start of local definitions -------- */ 72 /** if CMSG_ALIGN is not defined on this platform, a workaround */ 73 #ifndef CMSG_ALIGN 74 # ifdef __CMSG_ALIGN 75 # define CMSG_ALIGN(n) __CMSG_ALIGN(n) 76 # elif defined(CMSG_DATA_ALIGN) 77 # define CMSG_ALIGN _CMSG_DATA_ALIGN 78 # else 79 # define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1)) 80 # endif 81 #endif 82 83 /** if CMSG_LEN is not defined on this platform, a workaround */ 84 #ifndef CMSG_LEN 85 # define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len)) 86 #endif 87 88 /** if CMSG_SPACE is not defined on this platform, a workaround */ 89 #ifndef CMSG_SPACE 90 # ifdef _CMSG_HDR_ALIGN 91 # define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr))) 92 # else 93 # define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr))) 94 # endif 95 #endif 96 97 /** The TCP writing query timeout in milliseconds */ 98 #define TCP_QUERY_TIMEOUT 120000 99 /** The minimum actual TCP timeout to use, regardless of what we advertise, 100 * in msec */ 101 #define TCP_QUERY_TIMEOUT_MINIMUM 200 102 103 #ifndef NONBLOCKING_IS_BROKEN 104 /** number of UDP reads to perform per read indication from select */ 105 #define NUM_UDP_PER_SELECT 100 106 #else 107 #define NUM_UDP_PER_SELECT 1 108 #endif 109 110 /** 111 * The internal event structure for keeping ub_event info for the event. 112 * Possibly other structures (list, tree) this is part of. 113 */ 114 struct internal_event { 115 /** the comm base */ 116 struct comm_base* base; 117 /** ub_event event type */ 118 struct ub_event* ev; 119 }; 120 121 /** 122 * Internal base structure, so that every thread has its own events. 123 */ 124 struct internal_base { 125 /** ub_event event_base type. */ 126 struct ub_event_base* base; 127 /** seconds time pointer points here */ 128 time_t secs; 129 /** timeval with current time */ 130 struct timeval now; 131 /** the event used for slow_accept timeouts */ 132 struct ub_event* slow_accept; 133 /** true if slow_accept is enabled */ 134 int slow_accept_enabled; 135 }; 136 137 /** 138 * Internal timer structure, to store timer event in. 139 */ 140 struct internal_timer { 141 /** the super struct from which derived */ 142 struct comm_timer super; 143 /** the comm base */ 144 struct comm_base* base; 145 /** ub_event event type */ 146 struct ub_event* ev; 147 /** is timer enabled */ 148 uint8_t enabled; 149 }; 150 151 /** 152 * Internal signal structure, to store signal event in. 153 */ 154 struct internal_signal { 155 /** ub_event event type */ 156 struct ub_event* ev; 157 /** next in signal list */ 158 struct internal_signal* next; 159 }; 160 161 /** create a tcp handler with a parent */ 162 static struct comm_point* comm_point_create_tcp_handler( 163 struct comm_base *base, struct comm_point* parent, size_t bufsize, 164 struct sldns_buffer* spoolbuf, comm_point_callback_type* callback, 165 void* callback_arg, struct unbound_socket* socket); 166 167 /* -------- End of local definitions -------- */ 168 169 struct comm_base* 170 comm_base_create(int sigs) 171 { 172 struct comm_base* b = (struct comm_base*)calloc(1, 173 sizeof(struct comm_base)); 174 const char *evnm="event", *evsys="", *evmethod=""; 175 176 if(!b) 177 return NULL; 178 b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base)); 179 if(!b->eb) { 180 free(b); 181 return NULL; 182 } 183 b->eb->base = ub_default_event_base(sigs, &b->eb->secs, &b->eb->now); 184 if(!b->eb->base) { 185 free(b->eb); 186 free(b); 187 return NULL; 188 } 189 ub_comm_base_now(b); 190 ub_get_event_sys(b->eb->base, &evnm, &evsys, &evmethod); 191 verbose(VERB_ALGO, "%s %s uses %s method.", evnm, evsys, evmethod); 192 return b; 193 } 194 195 struct comm_base* 196 comm_base_create_event(struct ub_event_base* base) 197 { 198 struct comm_base* b = (struct comm_base*)calloc(1, 199 sizeof(struct comm_base)); 200 if(!b) 201 return NULL; 202 b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base)); 203 if(!b->eb) { 204 free(b); 205 return NULL; 206 } 207 b->eb->base = base; 208 ub_comm_base_now(b); 209 return b; 210 } 211 212 void 213 comm_base_delete(struct comm_base* b) 214 { 215 if(!b) 216 return; 217 if(b->eb->slow_accept_enabled) { 218 if(ub_event_del(b->eb->slow_accept) != 0) { 219 log_err("could not event_del slow_accept"); 220 } 221 ub_event_free(b->eb->slow_accept); 222 } 223 ub_event_base_free(b->eb->base); 224 b->eb->base = NULL; 225 free(b->eb); 226 free(b); 227 } 228 229 void 230 comm_base_delete_no_base(struct comm_base* b) 231 { 232 if(!b) 233 return; 234 if(b->eb->slow_accept_enabled) { 235 if(ub_event_del(b->eb->slow_accept) != 0) { 236 log_err("could not event_del slow_accept"); 237 } 238 ub_event_free(b->eb->slow_accept); 239 } 240 b->eb->base = NULL; 241 free(b->eb); 242 free(b); 243 } 244 245 void 246 comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv) 247 { 248 *tt = &b->eb->secs; 249 *tv = &b->eb->now; 250 } 251 252 void 253 comm_base_dispatch(struct comm_base* b) 254 { 255 int retval; 256 retval = ub_event_base_dispatch(b->eb->base); 257 if(retval < 0) { 258 fatal_exit("event_dispatch returned error %d, " 259 "errno is %s", retval, strerror(errno)); 260 } 261 } 262 263 void comm_base_exit(struct comm_base* b) 264 { 265 if(ub_event_base_loopexit(b->eb->base) != 0) { 266 log_err("Could not loopexit"); 267 } 268 } 269 270 void comm_base_set_slow_accept_handlers(struct comm_base* b, 271 void (*stop_acc)(void*), void (*start_acc)(void*), void* arg) 272 { 273 b->stop_accept = stop_acc; 274 b->start_accept = start_acc; 275 b->cb_arg = arg; 276 } 277 278 struct ub_event_base* comm_base_internal(struct comm_base* b) 279 { 280 return b->eb->base; 281 } 282 283 /** see if errno for udp has to be logged or not uses globals */ 284 static int 285 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen) 286 { 287 /* do not log transient errors (unless high verbosity) */ 288 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN) 289 switch(errno) { 290 # ifdef ENETUNREACH 291 case ENETUNREACH: 292 # endif 293 # ifdef EHOSTDOWN 294 case EHOSTDOWN: 295 # endif 296 # ifdef EHOSTUNREACH 297 case EHOSTUNREACH: 298 # endif 299 # ifdef ENETDOWN 300 case ENETDOWN: 301 # endif 302 case EPERM: 303 case EACCES: 304 if(verbosity < VERB_ALGO) 305 return 0; 306 default: 307 break; 308 } 309 #endif 310 /* permission denied is gotten for every send if the 311 * network is disconnected (on some OS), squelch it */ 312 if( ((errno == EPERM) 313 # ifdef EADDRNOTAVAIL 314 /* 'Cannot assign requested address' also when disconnected */ 315 || (errno == EADDRNOTAVAIL) 316 # endif 317 ) && verbosity < VERB_ALGO) 318 return 0; 319 # ifdef EADDRINUSE 320 /* If SO_REUSEADDR is set, we could try to connect to the same server 321 * from the same source port twice. */ 322 if(errno == EADDRINUSE && verbosity < VERB_DETAIL) 323 return 0; 324 # endif 325 /* squelch errors where people deploy AAAA ::ffff:bla for 326 * authority servers, which we try for intranets. */ 327 if(errno == EINVAL && addr_is_ip4mapped( 328 (struct sockaddr_storage*)addr, addrlen) && 329 verbosity < VERB_DETAIL) 330 return 0; 331 /* SO_BROADCAST sockopt can give access to 255.255.255.255, 332 * but a dns cache does not need it. */ 333 if(errno == EACCES && addr_is_broadcast( 334 (struct sockaddr_storage*)addr, addrlen) && 335 verbosity < VERB_DETAIL) 336 return 0; 337 return 1; 338 } 339 340 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen) 341 { 342 return udp_send_errno_needs_log(addr, addrlen); 343 } 344 345 /* send a UDP reply */ 346 int 347 comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet, 348 struct sockaddr* addr, socklen_t addrlen, int is_connected) 349 { 350 ssize_t sent; 351 log_assert(c->fd != -1); 352 #ifdef UNBOUND_DEBUG 353 if(sldns_buffer_remaining(packet) == 0) 354 log_err("error: send empty UDP packet"); 355 #endif 356 log_assert(addr && addrlen > 0); 357 if(!is_connected) { 358 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 359 sldns_buffer_remaining(packet), 0, 360 addr, addrlen); 361 } else { 362 sent = send(c->fd, (void*)sldns_buffer_begin(packet), 363 sldns_buffer_remaining(packet), 0); 364 } 365 if(sent == -1) { 366 /* try again and block, waiting for IO to complete, 367 * we want to send the answer, and we will wait for 368 * the ethernet interface buffer to have space. */ 369 #ifndef USE_WINSOCK 370 if(errno == EAGAIN || 371 # ifdef EWOULDBLOCK 372 errno == EWOULDBLOCK || 373 # endif 374 errno == ENOBUFS) { 375 #else 376 if(WSAGetLastError() == WSAEINPROGRESS || 377 WSAGetLastError() == WSAENOBUFS || 378 WSAGetLastError() == WSAEWOULDBLOCK) { 379 #endif 380 int e; 381 fd_set_block(c->fd); 382 if (!is_connected) { 383 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 384 sldns_buffer_remaining(packet), 0, 385 addr, addrlen); 386 } else { 387 sent = send(c->fd, (void*)sldns_buffer_begin(packet), 388 sldns_buffer_remaining(packet), 0); 389 } 390 e = errno; 391 fd_set_nonblock(c->fd); 392 errno = e; 393 } 394 } 395 if(sent == -1) { 396 if(!udp_send_errno_needs_log(addr, addrlen)) 397 return 0; 398 if (!is_connected) { 399 verbose(VERB_OPS, "sendto failed: %s", sock_strerror(errno)); 400 } else { 401 verbose(VERB_OPS, "send failed: %s", sock_strerror(errno)); 402 } 403 if(addr) 404 log_addr(VERB_OPS, "remote address is", 405 (struct sockaddr_storage*)addr, addrlen); 406 return 0; 407 } else if((size_t)sent != sldns_buffer_remaining(packet)) { 408 log_err("sent %d in place of %d bytes", 409 (int)sent, (int)sldns_buffer_remaining(packet)); 410 return 0; 411 } 412 return 1; 413 } 414 415 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG)) 416 /** print debug ancillary info */ 417 static void p_ancil(const char* str, struct comm_reply* r) 418 { 419 if(r->srctype != 4 && r->srctype != 6) { 420 log_info("%s: unknown srctype %d", str, r->srctype); 421 return; 422 } 423 424 if(r->srctype == 6) { 425 #ifdef IPV6_PKTINFO 426 char buf[1024]; 427 if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr, 428 buf, (socklen_t)sizeof(buf)) == 0) { 429 (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf)); 430 } 431 buf[sizeof(buf)-1]=0; 432 log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex); 433 #endif 434 } else if(r->srctype == 4) { 435 #ifdef IP_PKTINFO 436 char buf1[1024], buf2[1024]; 437 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr, 438 buf1, (socklen_t)sizeof(buf1)) == 0) { 439 (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1)); 440 } 441 buf1[sizeof(buf1)-1]=0; 442 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST 443 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst, 444 buf2, (socklen_t)sizeof(buf2)) == 0) { 445 (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2)); 446 } 447 buf2[sizeof(buf2)-1]=0; 448 #else 449 buf2[0]=0; 450 #endif 451 log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex, 452 buf1, buf2); 453 #elif defined(IP_RECVDSTADDR) 454 char buf1[1024]; 455 if(inet_ntop(AF_INET, &r->pktinfo.v4addr, 456 buf1, (socklen_t)sizeof(buf1)) == 0) { 457 (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1)); 458 } 459 buf1[sizeof(buf1)-1]=0; 460 log_info("%s: %s", str, buf1); 461 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */ 462 } 463 } 464 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */ 465 466 /** send a UDP reply over specified interface*/ 467 static int 468 comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet, 469 struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r) 470 { 471 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG) 472 ssize_t sent; 473 struct msghdr msg; 474 struct iovec iov[1]; 475 union { 476 struct cmsghdr hdr; 477 char buf[256]; 478 } control; 479 #ifndef S_SPLINT_S 480 struct cmsghdr *cmsg; 481 #endif /* S_SPLINT_S */ 482 483 log_assert(c->fd != -1); 484 #ifdef UNBOUND_DEBUG 485 if(sldns_buffer_remaining(packet) == 0) 486 log_err("error: send empty UDP packet"); 487 #endif 488 log_assert(addr && addrlen > 0); 489 490 msg.msg_name = addr; 491 msg.msg_namelen = addrlen; 492 iov[0].iov_base = sldns_buffer_begin(packet); 493 iov[0].iov_len = sldns_buffer_remaining(packet); 494 msg.msg_iov = iov; 495 msg.msg_iovlen = 1; 496 msg.msg_control = control.buf; 497 #ifndef S_SPLINT_S 498 msg.msg_controllen = sizeof(control.buf); 499 #endif /* S_SPLINT_S */ 500 msg.msg_flags = 0; 501 502 #ifndef S_SPLINT_S 503 cmsg = CMSG_FIRSTHDR(&msg); 504 if(r->srctype == 4) { 505 #ifdef IP_PKTINFO 506 void* cmsg_data; 507 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo)); 508 log_assert(msg.msg_controllen <= sizeof(control.buf)); 509 cmsg->cmsg_level = IPPROTO_IP; 510 cmsg->cmsg_type = IP_PKTINFO; 511 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info, 512 sizeof(struct in_pktinfo)); 513 /* unset the ifindex to not bypass the routing tables */ 514 cmsg_data = CMSG_DATA(cmsg); 515 ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0; 516 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo)); 517 #elif defined(IP_SENDSRCADDR) 518 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr)); 519 log_assert(msg.msg_controllen <= sizeof(control.buf)); 520 cmsg->cmsg_level = IPPROTO_IP; 521 cmsg->cmsg_type = IP_SENDSRCADDR; 522 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr, 523 sizeof(struct in_addr)); 524 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr)); 525 #else 526 verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR"); 527 msg.msg_control = NULL; 528 #endif /* IP_PKTINFO or IP_SENDSRCADDR */ 529 } else if(r->srctype == 6) { 530 void* cmsg_data; 531 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); 532 log_assert(msg.msg_controllen <= sizeof(control.buf)); 533 cmsg->cmsg_level = IPPROTO_IPV6; 534 cmsg->cmsg_type = IPV6_PKTINFO; 535 memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info, 536 sizeof(struct in6_pktinfo)); 537 /* unset the ifindex to not bypass the routing tables */ 538 cmsg_data = CMSG_DATA(cmsg); 539 ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0; 540 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); 541 } else { 542 /* try to pass all 0 to use default route */ 543 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); 544 log_assert(msg.msg_controllen <= sizeof(control.buf)); 545 cmsg->cmsg_level = IPPROTO_IPV6; 546 cmsg->cmsg_type = IPV6_PKTINFO; 547 memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo)); 548 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); 549 } 550 #endif /* S_SPLINT_S */ 551 if(verbosity >= VERB_ALGO) 552 p_ancil("send_udp over interface", r); 553 sent = sendmsg(c->fd, &msg, 0); 554 if(sent == -1) { 555 /* try again and block, waiting for IO to complete, 556 * we want to send the answer, and we will wait for 557 * the ethernet interface buffer to have space. */ 558 #ifndef USE_WINSOCK 559 if(errno == EAGAIN || 560 # ifdef EWOULDBLOCK 561 errno == EWOULDBLOCK || 562 # endif 563 errno == ENOBUFS) { 564 #else 565 if(WSAGetLastError() == WSAEINPROGRESS || 566 WSAGetLastError() == WSAENOBUFS || 567 WSAGetLastError() == WSAEWOULDBLOCK) { 568 #endif 569 int e; 570 fd_set_block(c->fd); 571 sent = sendmsg(c->fd, &msg, 0); 572 e = errno; 573 fd_set_nonblock(c->fd); 574 errno = e; 575 } 576 } 577 if(sent == -1) { 578 if(!udp_send_errno_needs_log(addr, addrlen)) 579 return 0; 580 verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno)); 581 log_addr(VERB_OPS, "remote address is", 582 (struct sockaddr_storage*)addr, addrlen); 583 #ifdef __NetBSD__ 584 /* netbsd 7 has IP_PKTINFO for recv but not send */ 585 if(errno == EINVAL && r->srctype == 4) 586 log_err("sendmsg: No support for sendmsg(IP_PKTINFO). " 587 "Please disable interface-automatic"); 588 #endif 589 return 0; 590 } else if((size_t)sent != sldns_buffer_remaining(packet)) { 591 log_err("sent %d in place of %d bytes", 592 (int)sent, (int)sldns_buffer_remaining(packet)); 593 return 0; 594 } 595 return 1; 596 #else 597 (void)c; 598 (void)packet; 599 (void)addr; 600 (void)addrlen; 601 (void)r; 602 log_err("sendmsg: IPV6_PKTINFO not supported"); 603 return 0; 604 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */ 605 } 606 607 /** return true is UDP receive error needs to be logged */ 608 static int udp_recv_needs_log(int err) 609 { 610 switch(err) { 611 case EACCES: /* some hosts send ICMP 'Permission Denied' */ 612 #ifndef USE_WINSOCK 613 case ECONNREFUSED: 614 # ifdef ENETUNREACH 615 case ENETUNREACH: 616 # endif 617 # ifdef EHOSTDOWN 618 case EHOSTDOWN: 619 # endif 620 # ifdef EHOSTUNREACH 621 case EHOSTUNREACH: 622 # endif 623 # ifdef ENETDOWN 624 case ENETDOWN: 625 # endif 626 #else /* USE_WINSOCK */ 627 case WSAECONNREFUSED: 628 case WSAENETUNREACH: 629 case WSAEHOSTDOWN: 630 case WSAEHOSTUNREACH: 631 case WSAENETDOWN: 632 #endif 633 if(verbosity >= VERB_ALGO) 634 return 1; 635 return 0; 636 default: 637 break; 638 } 639 return 1; 640 } 641 642 void 643 comm_point_udp_ancil_callback(int fd, short event, void* arg) 644 { 645 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG) 646 struct comm_reply rep; 647 struct msghdr msg; 648 struct iovec iov[1]; 649 ssize_t rcv; 650 union { 651 struct cmsghdr hdr; 652 char buf[256]; 653 } ancil; 654 int i; 655 #ifndef S_SPLINT_S 656 struct cmsghdr* cmsg; 657 #endif /* S_SPLINT_S */ 658 659 rep.c = (struct comm_point*)arg; 660 log_assert(rep.c->type == comm_udp); 661 662 if(!(event&UB_EV_READ)) 663 return; 664 log_assert(rep.c && rep.c->buffer && rep.c->fd == fd); 665 ub_comm_base_now(rep.c->ev->base); 666 for(i=0; i<NUM_UDP_PER_SELECT; i++) { 667 sldns_buffer_clear(rep.c->buffer); 668 rep.addrlen = (socklen_t)sizeof(rep.addr); 669 log_assert(fd != -1); 670 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0); 671 msg.msg_name = &rep.addr; 672 msg.msg_namelen = (socklen_t)sizeof(rep.addr); 673 iov[0].iov_base = sldns_buffer_begin(rep.c->buffer); 674 iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer); 675 msg.msg_iov = iov; 676 msg.msg_iovlen = 1; 677 msg.msg_control = ancil.buf; 678 #ifndef S_SPLINT_S 679 msg.msg_controllen = sizeof(ancil.buf); 680 #endif /* S_SPLINT_S */ 681 msg.msg_flags = 0; 682 rcv = recvmsg(fd, &msg, 0); 683 if(rcv == -1) { 684 if(errno != EAGAIN && errno != EINTR 685 && udp_recv_needs_log(errno)) { 686 log_err("recvmsg failed: %s", strerror(errno)); 687 } 688 return; 689 } 690 rep.addrlen = msg.msg_namelen; 691 sldns_buffer_skip(rep.c->buffer, rcv); 692 sldns_buffer_flip(rep.c->buffer); 693 rep.srctype = 0; 694 #ifndef S_SPLINT_S 695 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; 696 cmsg = CMSG_NXTHDR(&msg, cmsg)) { 697 if( cmsg->cmsg_level == IPPROTO_IPV6 && 698 cmsg->cmsg_type == IPV6_PKTINFO) { 699 rep.srctype = 6; 700 memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg), 701 sizeof(struct in6_pktinfo)); 702 break; 703 #ifdef IP_PKTINFO 704 } else if( cmsg->cmsg_level == IPPROTO_IP && 705 cmsg->cmsg_type == IP_PKTINFO) { 706 rep.srctype = 4; 707 memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg), 708 sizeof(struct in_pktinfo)); 709 break; 710 #elif defined(IP_RECVDSTADDR) 711 } else if( cmsg->cmsg_level == IPPROTO_IP && 712 cmsg->cmsg_type == IP_RECVDSTADDR) { 713 rep.srctype = 4; 714 memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg), 715 sizeof(struct in_addr)); 716 break; 717 #endif /* IP_PKTINFO or IP_RECVDSTADDR */ 718 } 719 } 720 if(verbosity >= VERB_ALGO) 721 p_ancil("receive_udp on interface", &rep); 722 #endif /* S_SPLINT_S */ 723 fptr_ok(fptr_whitelist_comm_point(rep.c->callback)); 724 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) { 725 /* send back immediate reply */ 726 (void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer, 727 (struct sockaddr*)&rep.addr, rep.addrlen, &rep); 728 } 729 if(!rep.c || rep.c->fd == -1) /* commpoint closed */ 730 break; 731 } 732 #else 733 (void)fd; 734 (void)event; 735 (void)arg; 736 fatal_exit("recvmsg: No support for IPV6_PKTINFO; IP_PKTINFO or IP_RECVDSTADDR. " 737 "Please disable interface-automatic"); 738 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */ 739 } 740 741 void 742 comm_point_udp_callback(int fd, short event, void* arg) 743 { 744 struct comm_reply rep; 745 ssize_t rcv; 746 int i; 747 struct sldns_buffer *buffer; 748 749 rep.c = (struct comm_point*)arg; 750 log_assert(rep.c->type == comm_udp); 751 752 if(!(event&UB_EV_READ)) 753 return; 754 log_assert(rep.c && rep.c->buffer && rep.c->fd == fd); 755 ub_comm_base_now(rep.c->ev->base); 756 for(i=0; i<NUM_UDP_PER_SELECT; i++) { 757 sldns_buffer_clear(rep.c->buffer); 758 rep.addrlen = (socklen_t)sizeof(rep.addr); 759 log_assert(fd != -1); 760 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0); 761 rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer), 762 sldns_buffer_remaining(rep.c->buffer), 0, 763 (struct sockaddr*)&rep.addr, &rep.addrlen); 764 if(rcv == -1) { 765 #ifndef USE_WINSOCK 766 if(errno != EAGAIN && errno != EINTR 767 && udp_recv_needs_log(errno)) 768 log_err("recvfrom %d failed: %s", 769 fd, strerror(errno)); 770 #else 771 if(WSAGetLastError() != WSAEINPROGRESS && 772 WSAGetLastError() != WSAECONNRESET && 773 WSAGetLastError()!= WSAEWOULDBLOCK && 774 udp_recv_needs_log(WSAGetLastError())) 775 log_err("recvfrom failed: %s", 776 wsa_strerror(WSAGetLastError())); 777 #endif 778 return; 779 } 780 sldns_buffer_skip(rep.c->buffer, rcv); 781 sldns_buffer_flip(rep.c->buffer); 782 rep.srctype = 0; 783 fptr_ok(fptr_whitelist_comm_point(rep.c->callback)); 784 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) { 785 /* send back immediate reply */ 786 #ifdef USE_DNSCRYPT 787 buffer = rep.c->dnscrypt_buffer; 788 #else 789 buffer = rep.c->buffer; 790 #endif 791 (void)comm_point_send_udp_msg(rep.c, buffer, 792 (struct sockaddr*)&rep.addr, rep.addrlen, 0); 793 } 794 if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for 795 another UDP port. Note rep.c cannot be reused with TCP fd. */ 796 break; 797 } 798 } 799 800 int adjusted_tcp_timeout(struct comm_point* c) 801 { 802 if(c->tcp_timeout_msec < TCP_QUERY_TIMEOUT_MINIMUM) 803 return TCP_QUERY_TIMEOUT_MINIMUM; 804 return c->tcp_timeout_msec; 805 } 806 807 /** Use a new tcp handler for new query fd, set to read query */ 808 static void 809 setup_tcp_handler(struct comm_point* c, int fd, int cur, int max) 810 { 811 int handler_usage; 812 log_assert(c->type == comm_tcp || c->type == comm_http); 813 log_assert(c->fd == -1); 814 sldns_buffer_clear(c->buffer); 815 #ifdef USE_DNSCRYPT 816 if (c->dnscrypt) 817 sldns_buffer_clear(c->dnscrypt_buffer); 818 #endif 819 c->tcp_is_reading = 1; 820 c->tcp_byte_count = 0; 821 c->tcp_keepalive = 0; 822 /* if more than half the tcp handlers are in use, use a shorter 823 * timeout for this TCP connection, we need to make space for 824 * other connections to be able to get attention */ 825 /* If > 50% TCP handler structures in use, set timeout to 1/100th 826 * configured value. 827 * If > 65%TCP handler structures in use, set to 1/500th configured 828 * value. 829 * If > 80% TCP handler structures in use, set to 0. 830 * 831 * If the timeout to use falls below 200 milliseconds, an actual 832 * timeout of 200ms is used. 833 */ 834 handler_usage = (cur * 100) / max; 835 if(handler_usage > 50 && handler_usage <= 65) 836 c->tcp_timeout_msec /= 100; 837 else if (handler_usage > 65 && handler_usage <= 80) 838 c->tcp_timeout_msec /= 500; 839 else if (handler_usage > 80) 840 c->tcp_timeout_msec = 0; 841 comm_point_start_listening(c, fd, adjusted_tcp_timeout(c)); 842 } 843 844 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd), 845 short ATTR_UNUSED(event), void* arg) 846 { 847 struct comm_base* b = (struct comm_base*)arg; 848 /* timeout for the slow accept, re-enable accepts again */ 849 if(b->start_accept) { 850 verbose(VERB_ALGO, "wait is over, slow accept disabled"); 851 fptr_ok(fptr_whitelist_start_accept(b->start_accept)); 852 (*b->start_accept)(b->cb_arg); 853 b->eb->slow_accept_enabled = 0; 854 } 855 } 856 857 int comm_point_perform_accept(struct comm_point* c, 858 struct sockaddr_storage* addr, socklen_t* addrlen) 859 { 860 int new_fd; 861 *addrlen = (socklen_t)sizeof(*addr); 862 #ifndef HAVE_ACCEPT4 863 new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen); 864 #else 865 /* SOCK_NONBLOCK saves extra calls to fcntl for the same result */ 866 new_fd = accept4(c->fd, (struct sockaddr*)addr, addrlen, SOCK_NONBLOCK); 867 #endif 868 if(new_fd == -1) { 869 #ifndef USE_WINSOCK 870 /* EINTR is signal interrupt. others are closed connection. */ 871 if( errno == EINTR || errno == EAGAIN 872 #ifdef EWOULDBLOCK 873 || errno == EWOULDBLOCK 874 #endif 875 #ifdef ECONNABORTED 876 || errno == ECONNABORTED 877 #endif 878 #ifdef EPROTO 879 || errno == EPROTO 880 #endif /* EPROTO */ 881 ) 882 return -1; 883 #if defined(ENFILE) && defined(EMFILE) 884 if(errno == ENFILE || errno == EMFILE) { 885 /* out of file descriptors, likely outside of our 886 * control. stop accept() calls for some time */ 887 if(c->ev->base->stop_accept) { 888 struct comm_base* b = c->ev->base; 889 struct timeval tv; 890 verbose(VERB_ALGO, "out of file descriptors: " 891 "slow accept"); 892 b->eb->slow_accept_enabled = 1; 893 fptr_ok(fptr_whitelist_stop_accept( 894 b->stop_accept)); 895 (*b->stop_accept)(b->cb_arg); 896 /* set timeout, no mallocs */ 897 tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000; 898 tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000; 899 b->eb->slow_accept = ub_event_new(b->eb->base, 900 -1, UB_EV_TIMEOUT, 901 comm_base_handle_slow_accept, b); 902 if(b->eb->slow_accept == NULL) { 903 /* we do not want to log here, because 904 * that would spam the logfiles. 905 * error: "event_base_set failed." */ 906 } 907 else if(ub_event_add(b->eb->slow_accept, &tv) 908 != 0) { 909 /* we do not want to log here, 910 * error: "event_add failed." */ 911 } 912 } 913 return -1; 914 } 915 #endif 916 #else /* USE_WINSOCK */ 917 if(WSAGetLastError() == WSAEINPROGRESS || 918 WSAGetLastError() == WSAECONNRESET) 919 return -1; 920 if(WSAGetLastError() == WSAEWOULDBLOCK) { 921 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 922 return -1; 923 } 924 #endif 925 log_err_addr("accept failed", sock_strerror(errno), addr, 926 *addrlen); 927 return -1; 928 } 929 if(c->tcp_conn_limit && c->type == comm_tcp_accept) { 930 c->tcl_addr = tcl_addr_lookup(c->tcp_conn_limit, addr, *addrlen); 931 if(!tcl_new_connection(c->tcl_addr)) { 932 if(verbosity >= 3) 933 log_err_addr("accept rejected", 934 "connection limit exceeded", addr, *addrlen); 935 close(new_fd); 936 return -1; 937 } 938 } 939 #ifndef HAVE_ACCEPT4 940 fd_set_nonblock(new_fd); 941 #endif 942 return new_fd; 943 } 944 945 #ifdef USE_WINSOCK 946 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp), 947 #ifdef HAVE_BIO_SET_CALLBACK_EX 948 size_t ATTR_UNUSED(len), 949 #endif 950 int ATTR_UNUSED(argi), long argl, 951 #ifndef HAVE_BIO_SET_CALLBACK_EX 952 long retvalue 953 #else 954 int retvalue, size_t* ATTR_UNUSED(processed) 955 #endif 956 ) 957 { 958 int wsa_err = WSAGetLastError(); /* store errcode before it is gone */ 959 verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper, 960 (oper&BIO_CB_RETURN)?"return":"before", 961 (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"), 962 wsa_err==WSAEWOULDBLOCK?"wsawb":""); 963 /* on windows, check if previous operation caused EWOULDBLOCK */ 964 if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) || 965 (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) { 966 if(wsa_err == WSAEWOULDBLOCK) 967 ub_winsock_tcp_wouldblock((struct ub_event*) 968 BIO_get_callback_arg(b), UB_EV_READ); 969 } 970 if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) || 971 (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) { 972 if(wsa_err == WSAEWOULDBLOCK) 973 ub_winsock_tcp_wouldblock((struct ub_event*) 974 BIO_get_callback_arg(b), UB_EV_WRITE); 975 } 976 /* return original return value */ 977 return retvalue; 978 } 979 980 /** set win bio callbacks for nonblocking operations */ 981 void 982 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl) 983 { 984 SSL* ssl = (SSL*)thessl; 985 /* set them both just in case, but usually they are the same BIO */ 986 #ifdef HAVE_BIO_SET_CALLBACK_EX 987 BIO_set_callback_ex(SSL_get_rbio(ssl), &win_bio_cb); 988 #else 989 BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb); 990 #endif 991 BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)c->ev->ev); 992 #ifdef HAVE_BIO_SET_CALLBACK_EX 993 BIO_set_callback_ex(SSL_get_wbio(ssl), &win_bio_cb); 994 #else 995 BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb); 996 #endif 997 BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)c->ev->ev); 998 } 999 #endif 1000 1001 #ifdef HAVE_NGHTTP2 1002 /** Create http2 session server. Per connection, after TCP accepted.*/ 1003 static int http2_session_server_create(struct http2_session* h2_session) 1004 { 1005 log_assert(h2_session->callbacks); 1006 h2_session->is_drop = 0; 1007 if(nghttp2_session_server_new(&h2_session->session, 1008 h2_session->callbacks, 1009 h2_session) == NGHTTP2_ERR_NOMEM) { 1010 log_err("failed to create nghttp2 session server"); 1011 return 0; 1012 } 1013 1014 return 1; 1015 } 1016 1017 /** Submit http2 setting to session. Once per session. */ 1018 static int http2_submit_settings(struct http2_session* h2_session) 1019 { 1020 int ret; 1021 nghttp2_settings_entry settings[1] = { 1022 {NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, 1023 h2_session->c->http2_max_streams}}; 1024 1025 ret = nghttp2_submit_settings(h2_session->session, NGHTTP2_FLAG_NONE, 1026 settings, 1); 1027 if(ret) { 1028 verbose(VERB_QUERY, "http2: submit_settings failed, " 1029 "error: %s", nghttp2_strerror(ret)); 1030 return 0; 1031 } 1032 return 1; 1033 } 1034 #endif /* HAVE_NGHTTP2 */ 1035 1036 1037 void 1038 comm_point_tcp_accept_callback(int fd, short event, void* arg) 1039 { 1040 struct comm_point* c = (struct comm_point*)arg, *c_hdl; 1041 int new_fd; 1042 log_assert(c->type == comm_tcp_accept); 1043 if(!(event & UB_EV_READ)) { 1044 log_info("ignoring tcp accept event %d", (int)event); 1045 return; 1046 } 1047 ub_comm_base_now(c->ev->base); 1048 /* find free tcp handler. */ 1049 if(!c->tcp_free) { 1050 log_warn("accepted too many tcp, connections full"); 1051 return; 1052 } 1053 /* accept incoming connection. */ 1054 c_hdl = c->tcp_free; 1055 /* clear leftover flags from previous use, and then set the 1056 * correct event base for the event structure for libevent */ 1057 ub_event_free(c_hdl->ev->ev); 1058 c_hdl->ev->ev = NULL; 1059 if((c_hdl->type == comm_tcp && c_hdl->tcp_req_info) || 1060 c_hdl->type == comm_local || c_hdl->type == comm_raw) 1061 c_hdl->tcp_do_toggle_rw = 0; 1062 else c_hdl->tcp_do_toggle_rw = 1; 1063 1064 if(c_hdl->type == comm_http) { 1065 #ifdef HAVE_NGHTTP2 1066 if(!c_hdl->h2_session || 1067 !http2_session_server_create(c_hdl->h2_session)) { 1068 log_warn("failed to create nghttp2"); 1069 return; 1070 } 1071 if(!c_hdl->h2_session || 1072 !http2_submit_settings(c_hdl->h2_session)) { 1073 log_warn("failed to submit http2 settings"); 1074 return; 1075 } 1076 if(!c->ssl) { 1077 c_hdl->tcp_do_toggle_rw = 0; 1078 c_hdl->use_h2 = 1; 1079 } 1080 #endif 1081 c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1, 1082 UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT, 1083 comm_point_http_handle_callback, c_hdl); 1084 } else { 1085 c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1, 1086 UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT, 1087 comm_point_tcp_handle_callback, c_hdl); 1088 } 1089 if(!c_hdl->ev->ev) { 1090 log_warn("could not ub_event_new, dropped tcp"); 1091 return; 1092 } 1093 log_assert(fd != -1); 1094 (void)fd; 1095 new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.addr, 1096 &c_hdl->repinfo.addrlen); 1097 if(new_fd == -1) 1098 return; 1099 if(c->ssl) { 1100 c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd); 1101 if(!c_hdl->ssl) { 1102 c_hdl->fd = new_fd; 1103 comm_point_close(c_hdl); 1104 return; 1105 } 1106 c_hdl->ssl_shake_state = comm_ssl_shake_read; 1107 #ifdef USE_WINSOCK 1108 comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl); 1109 #endif 1110 } 1111 1112 /* grab the tcp handler buffers */ 1113 c->cur_tcp_count++; 1114 c->tcp_free = c_hdl->tcp_free; 1115 c_hdl->tcp_free = NULL; 1116 if(!c->tcp_free) { 1117 /* stop accepting incoming queries for now. */ 1118 comm_point_stop_listening(c); 1119 } 1120 setup_tcp_handler(c_hdl, new_fd, c->cur_tcp_count, c->max_tcp_count); 1121 } 1122 1123 /** Make tcp handler free for next assignment */ 1124 static void 1125 reclaim_tcp_handler(struct comm_point* c) 1126 { 1127 log_assert(c->type == comm_tcp); 1128 if(c->ssl) { 1129 #ifdef HAVE_SSL 1130 SSL_shutdown(c->ssl); 1131 SSL_free(c->ssl); 1132 c->ssl = NULL; 1133 #endif 1134 } 1135 comm_point_close(c); 1136 if(c->tcp_parent) { 1137 if(c != c->tcp_parent->tcp_free) { 1138 c->tcp_parent->cur_tcp_count--; 1139 c->tcp_free = c->tcp_parent->tcp_free; 1140 c->tcp_parent->tcp_free = c; 1141 } 1142 if(!c->tcp_free) { 1143 /* re-enable listening on accept socket */ 1144 comm_point_start_listening(c->tcp_parent, -1, -1); 1145 } 1146 } 1147 c->tcp_more_read_again = NULL; 1148 c->tcp_more_write_again = NULL; 1149 } 1150 1151 /** do the callback when writing is done */ 1152 static void 1153 tcp_callback_writer(struct comm_point* c) 1154 { 1155 log_assert(c->type == comm_tcp); 1156 if(!c->tcp_write_and_read) { 1157 sldns_buffer_clear(c->buffer); 1158 c->tcp_byte_count = 0; 1159 } 1160 if(c->tcp_do_toggle_rw) 1161 c->tcp_is_reading = 1; 1162 /* switch from listening(write) to listening(read) */ 1163 if(c->tcp_req_info) { 1164 tcp_req_info_handle_writedone(c->tcp_req_info); 1165 } else { 1166 comm_point_stop_listening(c); 1167 if(c->tcp_write_and_read) { 1168 fptr_ok(fptr_whitelist_comm_point(c->callback)); 1169 if( (*c->callback)(c, c->cb_arg, NETEVENT_PKT_WRITTEN, 1170 &c->repinfo) ) { 1171 comm_point_start_listening(c, -1, 1172 adjusted_tcp_timeout(c)); 1173 } 1174 } else { 1175 comm_point_start_listening(c, -1, 1176 adjusted_tcp_timeout(c)); 1177 } 1178 } 1179 } 1180 1181 /** do the callback when reading is done */ 1182 static void 1183 tcp_callback_reader(struct comm_point* c) 1184 { 1185 log_assert(c->type == comm_tcp || c->type == comm_local); 1186 sldns_buffer_flip(c->buffer); 1187 if(c->tcp_do_toggle_rw) 1188 c->tcp_is_reading = 0; 1189 c->tcp_byte_count = 0; 1190 if(c->tcp_req_info) { 1191 tcp_req_info_handle_readdone(c->tcp_req_info); 1192 } else { 1193 if(c->type == comm_tcp) 1194 comm_point_stop_listening(c); 1195 fptr_ok(fptr_whitelist_comm_point(c->callback)); 1196 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) { 1197 comm_point_start_listening(c, -1, 1198 adjusted_tcp_timeout(c)); 1199 } 1200 } 1201 } 1202 1203 #ifdef HAVE_SSL 1204 /** true if the ssl handshake error has to be squelched from the logs */ 1205 int 1206 squelch_err_ssl_handshake(unsigned long err) 1207 { 1208 if(verbosity >= VERB_QUERY) 1209 return 0; /* only squelch on low verbosity */ 1210 /* this is very specific, we could filter on ERR_GET_REASON() 1211 * (the third element in ERR_PACK) */ 1212 if(err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTPS_PROXY_REQUEST) || 1213 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTP_REQUEST) || 1214 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_WRONG_VERSION_NUMBER) || 1215 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_READ_BYTES, SSL_R_SSLV3_ALERT_BAD_CERTIFICATE) 1216 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO 1217 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_POST_PROCESS_CLIENT_HELLO, SSL_R_NO_SHARED_CIPHER) 1218 #endif 1219 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO 1220 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNKNOWN_PROTOCOL) 1221 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNSUPPORTED_PROTOCOL) 1222 # ifdef SSL_R_VERSION_TOO_LOW 1223 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_VERSION_TOO_LOW) 1224 # endif 1225 #endif 1226 ) 1227 return 1; 1228 return 0; 1229 } 1230 #endif /* HAVE_SSL */ 1231 1232 /** continue ssl handshake */ 1233 #ifdef HAVE_SSL 1234 static int 1235 ssl_handshake(struct comm_point* c) 1236 { 1237 int r; 1238 if(c->ssl_shake_state == comm_ssl_shake_hs_read) { 1239 /* read condition satisfied back to writing */ 1240 comm_point_listen_for_rw(c, 0, 1); 1241 c->ssl_shake_state = comm_ssl_shake_none; 1242 return 1; 1243 } 1244 if(c->ssl_shake_state == comm_ssl_shake_hs_write) { 1245 /* write condition satisfied, back to reading */ 1246 comm_point_listen_for_rw(c, 1, 0); 1247 c->ssl_shake_state = comm_ssl_shake_none; 1248 return 1; 1249 } 1250 1251 ERR_clear_error(); 1252 r = SSL_do_handshake(c->ssl); 1253 if(r != 1) { 1254 int want = SSL_get_error(c->ssl, r); 1255 if(want == SSL_ERROR_WANT_READ) { 1256 if(c->ssl_shake_state == comm_ssl_shake_read) 1257 return 1; 1258 c->ssl_shake_state = comm_ssl_shake_read; 1259 comm_point_listen_for_rw(c, 1, 0); 1260 return 1; 1261 } else if(want == SSL_ERROR_WANT_WRITE) { 1262 if(c->ssl_shake_state == comm_ssl_shake_write) 1263 return 1; 1264 c->ssl_shake_state = comm_ssl_shake_write; 1265 comm_point_listen_for_rw(c, 0, 1); 1266 return 1; 1267 } else if(r == 0) { 1268 return 0; /* closed */ 1269 } else if(want == SSL_ERROR_SYSCALL) { 1270 /* SYSCALL and errno==0 means closed uncleanly */ 1271 #ifdef EPIPE 1272 if(errno == EPIPE && verbosity < 2) 1273 return 0; /* silence 'broken pipe' */ 1274 #endif 1275 #ifdef ECONNRESET 1276 if(errno == ECONNRESET && verbosity < 2) 1277 return 0; /* silence reset by peer */ 1278 #endif 1279 if(errno != 0) 1280 log_err("SSL_handshake syscall: %s", 1281 strerror(errno)); 1282 return 0; 1283 } else { 1284 unsigned long err = ERR_get_error(); 1285 if(!squelch_err_ssl_handshake(err)) { 1286 log_crypto_err_code("ssl handshake failed", err); 1287 log_addr(VERB_OPS, "ssl handshake failed", &c->repinfo.addr, 1288 c->repinfo.addrlen); 1289 } 1290 return 0; 1291 } 1292 } 1293 /* this is where peer verification could take place */ 1294 if((SSL_get_verify_mode(c->ssl)&SSL_VERIFY_PEER)) { 1295 /* verification */ 1296 if(SSL_get_verify_result(c->ssl) == X509_V_OK) { 1297 #ifdef HAVE_SSL_GET1_PEER_CERTIFICATE 1298 X509* x = SSL_get1_peer_certificate(c->ssl); 1299 #else 1300 X509* x = SSL_get_peer_certificate(c->ssl); 1301 #endif 1302 if(!x) { 1303 log_addr(VERB_ALGO, "SSL connection failed: " 1304 "no certificate", 1305 &c->repinfo.addr, c->repinfo.addrlen); 1306 return 0; 1307 } 1308 log_cert(VERB_ALGO, "peer certificate", x); 1309 #ifdef HAVE_SSL_GET0_PEERNAME 1310 if(SSL_get0_peername(c->ssl)) { 1311 char buf[255]; 1312 snprintf(buf, sizeof(buf), "SSL connection " 1313 "to %s authenticated", 1314 SSL_get0_peername(c->ssl)); 1315 log_addr(VERB_ALGO, buf, &c->repinfo.addr, 1316 c->repinfo.addrlen); 1317 } else { 1318 #endif 1319 log_addr(VERB_ALGO, "SSL connection " 1320 "authenticated", &c->repinfo.addr, 1321 c->repinfo.addrlen); 1322 #ifdef HAVE_SSL_GET0_PEERNAME 1323 } 1324 #endif 1325 X509_free(x); 1326 } else { 1327 #ifdef HAVE_SSL_GET1_PEER_CERTIFICATE 1328 X509* x = SSL_get1_peer_certificate(c->ssl); 1329 #else 1330 X509* x = SSL_get_peer_certificate(c->ssl); 1331 #endif 1332 if(x) { 1333 log_cert(VERB_ALGO, "peer certificate", x); 1334 X509_free(x); 1335 } 1336 log_addr(VERB_ALGO, "SSL connection failed: " 1337 "failed to authenticate", 1338 &c->repinfo.addr, c->repinfo.addrlen); 1339 return 0; 1340 } 1341 } else { 1342 /* unauthenticated, the verify peer flag was not set 1343 * in c->ssl when the ssl object was created from ssl_ctx */ 1344 log_addr(VERB_ALGO, "SSL connection", &c->repinfo.addr, 1345 c->repinfo.addrlen); 1346 } 1347 1348 #ifdef HAVE_SSL_GET0_ALPN_SELECTED 1349 /* check if http2 use is negotiated */ 1350 if(c->type == comm_http && c->h2_session) { 1351 const unsigned char *alpn; 1352 unsigned int alpnlen = 0; 1353 SSL_get0_alpn_selected(c->ssl, &alpn, &alpnlen); 1354 if(alpnlen == 2 && memcmp("h2", alpn, 2) == 0) { 1355 /* connection upgraded to HTTP2 */ 1356 c->tcp_do_toggle_rw = 0; 1357 c->use_h2 = 1; 1358 } 1359 } 1360 #endif 1361 1362 /* setup listen rw correctly */ 1363 if(c->tcp_is_reading) { 1364 if(c->ssl_shake_state != comm_ssl_shake_read) 1365 comm_point_listen_for_rw(c, 1, 0); 1366 } else { 1367 comm_point_listen_for_rw(c, 0, 1); 1368 } 1369 c->ssl_shake_state = comm_ssl_shake_none; 1370 return 1; 1371 } 1372 #endif /* HAVE_SSL */ 1373 1374 /** ssl read callback on TCP */ 1375 static int 1376 ssl_handle_read(struct comm_point* c) 1377 { 1378 #ifdef HAVE_SSL 1379 int r; 1380 if(c->ssl_shake_state != comm_ssl_shake_none) { 1381 if(!ssl_handshake(c)) 1382 return 0; 1383 if(c->ssl_shake_state != comm_ssl_shake_none) 1384 return 1; 1385 } 1386 if(c->tcp_byte_count < sizeof(uint16_t)) { 1387 /* read length bytes */ 1388 ERR_clear_error(); 1389 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer, 1390 c->tcp_byte_count), (int)(sizeof(uint16_t) - 1391 c->tcp_byte_count))) <= 0) { 1392 int want = SSL_get_error(c->ssl, r); 1393 if(want == SSL_ERROR_ZERO_RETURN) { 1394 if(c->tcp_req_info) 1395 return tcp_req_info_handle_read_close(c->tcp_req_info); 1396 return 0; /* shutdown, closed */ 1397 } else if(want == SSL_ERROR_WANT_READ) { 1398 #ifdef USE_WINSOCK 1399 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1400 #endif 1401 return 1; /* read more later */ 1402 } else if(want == SSL_ERROR_WANT_WRITE) { 1403 c->ssl_shake_state = comm_ssl_shake_hs_write; 1404 comm_point_listen_for_rw(c, 0, 1); 1405 return 1; 1406 } else if(want == SSL_ERROR_SYSCALL) { 1407 #ifdef ECONNRESET 1408 if(errno == ECONNRESET && verbosity < 2) 1409 return 0; /* silence reset by peer */ 1410 #endif 1411 if(errno != 0) 1412 log_err("SSL_read syscall: %s", 1413 strerror(errno)); 1414 return 0; 1415 } 1416 log_crypto_err("could not SSL_read"); 1417 return 0; 1418 } 1419 c->tcp_byte_count += r; 1420 if(c->tcp_byte_count < sizeof(uint16_t)) 1421 return 1; 1422 if(sldns_buffer_read_u16_at(c->buffer, 0) > 1423 sldns_buffer_capacity(c->buffer)) { 1424 verbose(VERB_QUERY, "ssl: dropped larger than buffer"); 1425 return 0; 1426 } 1427 sldns_buffer_set_limit(c->buffer, 1428 sldns_buffer_read_u16_at(c->buffer, 0)); 1429 if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) { 1430 verbose(VERB_QUERY, "ssl: dropped bogus too short."); 1431 return 0; 1432 } 1433 sldns_buffer_skip(c->buffer, (ssize_t)(c->tcp_byte_count-sizeof(uint16_t))); 1434 verbose(VERB_ALGO, "Reading ssl tcp query of length %d", 1435 (int)sldns_buffer_limit(c->buffer)); 1436 } 1437 if(sldns_buffer_remaining(c->buffer) > 0) { 1438 ERR_clear_error(); 1439 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer), 1440 (int)sldns_buffer_remaining(c->buffer)); 1441 if(r <= 0) { 1442 int want = SSL_get_error(c->ssl, r); 1443 if(want == SSL_ERROR_ZERO_RETURN) { 1444 if(c->tcp_req_info) 1445 return tcp_req_info_handle_read_close(c->tcp_req_info); 1446 return 0; /* shutdown, closed */ 1447 } else if(want == SSL_ERROR_WANT_READ) { 1448 #ifdef USE_WINSOCK 1449 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1450 #endif 1451 return 1; /* read more later */ 1452 } else if(want == SSL_ERROR_WANT_WRITE) { 1453 c->ssl_shake_state = comm_ssl_shake_hs_write; 1454 comm_point_listen_for_rw(c, 0, 1); 1455 return 1; 1456 } else if(want == SSL_ERROR_SYSCALL) { 1457 #ifdef ECONNRESET 1458 if(errno == ECONNRESET && verbosity < 2) 1459 return 0; /* silence reset by peer */ 1460 #endif 1461 if(errno != 0) 1462 log_err("SSL_read syscall: %s", 1463 strerror(errno)); 1464 return 0; 1465 } 1466 log_crypto_err("could not SSL_read"); 1467 return 0; 1468 } 1469 sldns_buffer_skip(c->buffer, (ssize_t)r); 1470 } 1471 if(sldns_buffer_remaining(c->buffer) <= 0) { 1472 tcp_callback_reader(c); 1473 } 1474 return 1; 1475 #else 1476 (void)c; 1477 return 0; 1478 #endif /* HAVE_SSL */ 1479 } 1480 1481 /** ssl write callback on TCP */ 1482 static int 1483 ssl_handle_write(struct comm_point* c) 1484 { 1485 #ifdef HAVE_SSL 1486 int r; 1487 if(c->ssl_shake_state != comm_ssl_shake_none) { 1488 if(!ssl_handshake(c)) 1489 return 0; 1490 if(c->ssl_shake_state != comm_ssl_shake_none) 1491 return 1; 1492 } 1493 /* ignore return, if fails we may simply block */ 1494 (void)SSL_set_mode(c->ssl, (long)SSL_MODE_ENABLE_PARTIAL_WRITE); 1495 if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) { 1496 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(c->buffer)); 1497 ERR_clear_error(); 1498 if(c->tcp_write_and_read) { 1499 if(c->tcp_write_pkt_len + 2 < LDNS_RR_BUF_SIZE) { 1500 /* combine the tcp length and the query for 1501 * write, this emulates writev */ 1502 uint8_t buf[LDNS_RR_BUF_SIZE]; 1503 memmove(buf, &len, sizeof(uint16_t)); 1504 memmove(buf+sizeof(uint16_t), 1505 c->tcp_write_pkt, 1506 c->tcp_write_pkt_len); 1507 r = SSL_write(c->ssl, 1508 (void*)(buf+c->tcp_write_byte_count), 1509 c->tcp_write_pkt_len + 2 - 1510 c->tcp_write_byte_count); 1511 } else { 1512 r = SSL_write(c->ssl, 1513 (void*)(((uint8_t*)&len)+c->tcp_write_byte_count), 1514 (int)(sizeof(uint16_t)-c->tcp_write_byte_count)); 1515 } 1516 } else if(sizeof(uint16_t)+sldns_buffer_remaining(c->buffer) < 1517 LDNS_RR_BUF_SIZE) { 1518 /* combine the tcp length and the query for write, 1519 * this emulates writev */ 1520 uint8_t buf[LDNS_RR_BUF_SIZE]; 1521 memmove(buf, &len, sizeof(uint16_t)); 1522 memmove(buf+sizeof(uint16_t), 1523 sldns_buffer_current(c->buffer), 1524 sldns_buffer_remaining(c->buffer)); 1525 r = SSL_write(c->ssl, (void*)(buf+c->tcp_byte_count), 1526 (int)(sizeof(uint16_t)+ 1527 sldns_buffer_remaining(c->buffer) 1528 - c->tcp_byte_count)); 1529 } else { 1530 r = SSL_write(c->ssl, 1531 (void*)(((uint8_t*)&len)+c->tcp_byte_count), 1532 (int)(sizeof(uint16_t)-c->tcp_byte_count)); 1533 } 1534 if(r <= 0) { 1535 int want = SSL_get_error(c->ssl, r); 1536 if(want == SSL_ERROR_ZERO_RETURN) { 1537 return 0; /* closed */ 1538 } else if(want == SSL_ERROR_WANT_READ) { 1539 c->ssl_shake_state = comm_ssl_shake_hs_read; 1540 comm_point_listen_for_rw(c, 1, 0); 1541 return 1; /* wait for read condition */ 1542 } else if(want == SSL_ERROR_WANT_WRITE) { 1543 #ifdef USE_WINSOCK 1544 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 1545 #endif 1546 return 1; /* write more later */ 1547 } else if(want == SSL_ERROR_SYSCALL) { 1548 #ifdef EPIPE 1549 if(errno == EPIPE && verbosity < 2) 1550 return 0; /* silence 'broken pipe' */ 1551 #endif 1552 if(errno != 0) 1553 log_err("SSL_write syscall: %s", 1554 strerror(errno)); 1555 return 0; 1556 } 1557 log_crypto_err("could not SSL_write"); 1558 return 0; 1559 } 1560 if(c->tcp_write_and_read) { 1561 c->tcp_write_byte_count += r; 1562 if(c->tcp_write_byte_count < sizeof(uint16_t)) 1563 return 1; 1564 } else { 1565 c->tcp_byte_count += r; 1566 if(c->tcp_byte_count < sizeof(uint16_t)) 1567 return 1; 1568 sldns_buffer_set_position(c->buffer, c->tcp_byte_count - 1569 sizeof(uint16_t)); 1570 } 1571 if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 1572 tcp_callback_writer(c); 1573 return 1; 1574 } 1575 } 1576 log_assert(c->tcp_write_and_read || sldns_buffer_remaining(c->buffer) > 0); 1577 log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2); 1578 ERR_clear_error(); 1579 if(c->tcp_write_and_read) { 1580 r = SSL_write(c->ssl, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2), 1581 (int)(c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count)); 1582 } else { 1583 r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer), 1584 (int)sldns_buffer_remaining(c->buffer)); 1585 } 1586 if(r <= 0) { 1587 int want = SSL_get_error(c->ssl, r); 1588 if(want == SSL_ERROR_ZERO_RETURN) { 1589 return 0; /* closed */ 1590 } else if(want == SSL_ERROR_WANT_READ) { 1591 c->ssl_shake_state = comm_ssl_shake_hs_read; 1592 comm_point_listen_for_rw(c, 1, 0); 1593 return 1; /* wait for read condition */ 1594 } else if(want == SSL_ERROR_WANT_WRITE) { 1595 #ifdef USE_WINSOCK 1596 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 1597 #endif 1598 return 1; /* write more later */ 1599 } else if(want == SSL_ERROR_SYSCALL) { 1600 #ifdef EPIPE 1601 if(errno == EPIPE && verbosity < 2) 1602 return 0; /* silence 'broken pipe' */ 1603 #endif 1604 if(errno != 0) 1605 log_err("SSL_write syscall: %s", 1606 strerror(errno)); 1607 return 0; 1608 } 1609 log_crypto_err("could not SSL_write"); 1610 return 0; 1611 } 1612 if(c->tcp_write_and_read) { 1613 c->tcp_write_byte_count += r; 1614 } else { 1615 sldns_buffer_skip(c->buffer, (ssize_t)r); 1616 } 1617 1618 if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 1619 tcp_callback_writer(c); 1620 } 1621 return 1; 1622 #else 1623 (void)c; 1624 return 0; 1625 #endif /* HAVE_SSL */ 1626 } 1627 1628 /** handle ssl tcp connection with dns contents */ 1629 static int 1630 ssl_handle_it(struct comm_point* c, int is_write) 1631 { 1632 /* handle case where renegotiation wants read during write call 1633 * or write during read calls */ 1634 if(is_write && c->ssl_shake_state == comm_ssl_shake_hs_write) 1635 return ssl_handle_read(c); 1636 else if(!is_write && c->ssl_shake_state == comm_ssl_shake_hs_read) 1637 return ssl_handle_write(c); 1638 /* handle read events for read operation and write events for a 1639 * write operation */ 1640 else if(!is_write) 1641 return ssl_handle_read(c); 1642 return ssl_handle_write(c); 1643 } 1644 1645 /** Handle tcp reading callback. 1646 * @param fd: file descriptor of socket. 1647 * @param c: comm point to read from into buffer. 1648 * @param short_ok: if true, very short packets are OK (for comm_local). 1649 * @return: 0 on error 1650 */ 1651 static int 1652 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok) 1653 { 1654 ssize_t r; 1655 log_assert(c->type == comm_tcp || c->type == comm_local); 1656 if(c->ssl) 1657 return ssl_handle_it(c, 0); 1658 if(!c->tcp_is_reading && !c->tcp_write_and_read) 1659 return 0; 1660 1661 log_assert(fd != -1); 1662 if(c->tcp_byte_count < sizeof(uint16_t)) { 1663 /* read length bytes */ 1664 r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count), 1665 sizeof(uint16_t)-c->tcp_byte_count, 0); 1666 if(r == 0) { 1667 if(c->tcp_req_info) 1668 return tcp_req_info_handle_read_close(c->tcp_req_info); 1669 return 0; 1670 } else if(r == -1) { 1671 #ifndef USE_WINSOCK 1672 if(errno == EINTR || errno == EAGAIN) 1673 return 1; 1674 #ifdef ECONNRESET 1675 if(errno == ECONNRESET && verbosity < 2) 1676 return 0; /* silence reset by peer */ 1677 #endif 1678 #ifdef ECONNREFUSED 1679 if(errno == ECONNREFUSED && verbosity < 2) 1680 return 0; /* silence reset by peer */ 1681 #endif 1682 #ifdef ENETUNREACH 1683 if(errno == ENETUNREACH && verbosity < 2) 1684 return 0; /* silence it */ 1685 #endif 1686 #ifdef EHOSTDOWN 1687 if(errno == EHOSTDOWN && verbosity < 2) 1688 return 0; /* silence it */ 1689 #endif 1690 #ifdef EHOSTUNREACH 1691 if(errno == EHOSTUNREACH && verbosity < 2) 1692 return 0; /* silence it */ 1693 #endif 1694 #ifdef ENETDOWN 1695 if(errno == ENETDOWN && verbosity < 2) 1696 return 0; /* silence it */ 1697 #endif 1698 #ifdef EACCES 1699 if(errno == EACCES && verbosity < 2) 1700 return 0; /* silence it */ 1701 #endif 1702 #ifdef ENOTCONN 1703 if(errno == ENOTCONN) { 1704 log_err_addr("read (in tcp s) failed and this could be because TCP Fast Open is enabled [--disable-tfo-client --disable-tfo-server] but does not work", sock_strerror(errno), 1705 &c->repinfo.addr, c->repinfo.addrlen); 1706 return 0; 1707 } 1708 #endif 1709 #else /* USE_WINSOCK */ 1710 if(WSAGetLastError() == WSAECONNREFUSED && verbosity < 2) 1711 return 0; 1712 if(WSAGetLastError() == WSAEHOSTDOWN && verbosity < 2) 1713 return 0; 1714 if(WSAGetLastError() == WSAEHOSTUNREACH && verbosity < 2) 1715 return 0; 1716 if(WSAGetLastError() == WSAENETDOWN && verbosity < 2) 1717 return 0; 1718 if(WSAGetLastError() == WSAENETUNREACH && verbosity < 2) 1719 return 0; 1720 if(WSAGetLastError() == WSAECONNRESET) 1721 return 0; 1722 if(WSAGetLastError() == WSAEINPROGRESS) 1723 return 1; 1724 if(WSAGetLastError() == WSAEWOULDBLOCK) { 1725 ub_winsock_tcp_wouldblock(c->ev->ev, 1726 UB_EV_READ); 1727 return 1; 1728 } 1729 #endif 1730 log_err_addr("read (in tcp s)", sock_strerror(errno), 1731 &c->repinfo.addr, c->repinfo.addrlen); 1732 return 0; 1733 } 1734 c->tcp_byte_count += r; 1735 if(c->tcp_byte_count != sizeof(uint16_t)) 1736 return 1; 1737 if(sldns_buffer_read_u16_at(c->buffer, 0) > 1738 sldns_buffer_capacity(c->buffer)) { 1739 verbose(VERB_QUERY, "tcp: dropped larger than buffer"); 1740 return 0; 1741 } 1742 sldns_buffer_set_limit(c->buffer, 1743 sldns_buffer_read_u16_at(c->buffer, 0)); 1744 if(!short_ok && 1745 sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) { 1746 verbose(VERB_QUERY, "tcp: dropped bogus too short."); 1747 return 0; 1748 } 1749 verbose(VERB_ALGO, "Reading tcp query of length %d", 1750 (int)sldns_buffer_limit(c->buffer)); 1751 } 1752 1753 if(sldns_buffer_remaining(c->buffer) == 0) 1754 log_err("in comm_point_tcp_handle_read buffer_remaining is not > 0 as expected, continuing with (harmless) 0 length recv"); 1755 r = recv(fd, (void*)sldns_buffer_current(c->buffer), 1756 sldns_buffer_remaining(c->buffer), 0); 1757 if(r == 0) { 1758 if(c->tcp_req_info) 1759 return tcp_req_info_handle_read_close(c->tcp_req_info); 1760 return 0; 1761 } else if(r == -1) { 1762 #ifndef USE_WINSOCK 1763 if(errno == EINTR || errno == EAGAIN) 1764 return 1; 1765 #else /* USE_WINSOCK */ 1766 if(WSAGetLastError() == WSAECONNRESET) 1767 return 0; 1768 if(WSAGetLastError() == WSAEINPROGRESS) 1769 return 1; 1770 if(WSAGetLastError() == WSAEWOULDBLOCK) { 1771 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1772 return 1; 1773 } 1774 #endif 1775 log_err_addr("read (in tcp r)", sock_strerror(errno), 1776 &c->repinfo.addr, c->repinfo.addrlen); 1777 return 0; 1778 } 1779 sldns_buffer_skip(c->buffer, r); 1780 if(sldns_buffer_remaining(c->buffer) <= 0) { 1781 tcp_callback_reader(c); 1782 } 1783 return 1; 1784 } 1785 1786 /** 1787 * Handle tcp writing callback. 1788 * @param fd: file descriptor of socket. 1789 * @param c: comm point to write buffer out of. 1790 * @return: 0 on error 1791 */ 1792 static int 1793 comm_point_tcp_handle_write(int fd, struct comm_point* c) 1794 { 1795 ssize_t r; 1796 struct sldns_buffer *buffer; 1797 log_assert(c->type == comm_tcp); 1798 #ifdef USE_DNSCRYPT 1799 buffer = c->dnscrypt_buffer; 1800 #else 1801 buffer = c->buffer; 1802 #endif 1803 if(c->tcp_is_reading && !c->ssl && !c->tcp_write_and_read) 1804 return 0; 1805 log_assert(fd != -1); 1806 if(((!c->tcp_write_and_read && c->tcp_byte_count == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == 0)) && c->tcp_check_nb_connect) { 1807 /* check for pending error from nonblocking connect */ 1808 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/ 1809 int error = 0; 1810 socklen_t len = (socklen_t)sizeof(error); 1811 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 1812 &len) < 0){ 1813 #ifndef USE_WINSOCK 1814 error = errno; /* on solaris errno is error */ 1815 #else /* USE_WINSOCK */ 1816 error = WSAGetLastError(); 1817 #endif 1818 } 1819 #ifndef USE_WINSOCK 1820 #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 1821 if(error == EINPROGRESS || error == EWOULDBLOCK) 1822 return 1; /* try again later */ 1823 else 1824 #endif 1825 if(error != 0 && verbosity < 2) 1826 return 0; /* silence lots of chatter in the logs */ 1827 else if(error != 0) { 1828 log_err_addr("tcp connect", strerror(error), 1829 &c->repinfo.addr, c->repinfo.addrlen); 1830 #else /* USE_WINSOCK */ 1831 /* examine error */ 1832 if(error == WSAEINPROGRESS) 1833 return 1; 1834 else if(error == WSAEWOULDBLOCK) { 1835 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 1836 return 1; 1837 } else if(error != 0 && verbosity < 2) 1838 return 0; 1839 else if(error != 0) { 1840 log_err_addr("tcp connect", wsa_strerror(error), 1841 &c->repinfo.addr, c->repinfo.addrlen); 1842 #endif /* USE_WINSOCK */ 1843 return 0; 1844 } 1845 } 1846 if(c->ssl) 1847 return ssl_handle_it(c, 1); 1848 1849 #ifdef USE_MSG_FASTOPEN 1850 /* Only try this on first use of a connection that uses tfo, 1851 otherwise fall through to normal write */ 1852 /* Also, TFO support on WINDOWS not implemented at the moment */ 1853 if(c->tcp_do_fastopen == 1) { 1854 /* this form of sendmsg() does both a connect() and send() so need to 1855 look for various flavours of error*/ 1856 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer)); 1857 struct msghdr msg; 1858 struct iovec iov[2]; 1859 c->tcp_do_fastopen = 0; 1860 memset(&msg, 0, sizeof(msg)); 1861 if(c->tcp_write_and_read) { 1862 iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count; 1863 iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count; 1864 iov[1].iov_base = c->tcp_write_pkt; 1865 iov[1].iov_len = c->tcp_write_pkt_len; 1866 } else { 1867 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count; 1868 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count; 1869 iov[1].iov_base = sldns_buffer_begin(buffer); 1870 iov[1].iov_len = sldns_buffer_limit(buffer); 1871 } 1872 log_assert(iov[0].iov_len > 0); 1873 msg.msg_name = &c->repinfo.addr; 1874 msg.msg_namelen = c->repinfo.addrlen; 1875 msg.msg_iov = iov; 1876 msg.msg_iovlen = 2; 1877 r = sendmsg(fd, &msg, MSG_FASTOPEN); 1878 if (r == -1) { 1879 #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 1880 /* Handshake is underway, maybe because no TFO cookie available. 1881 Come back to write the message*/ 1882 if(errno == EINPROGRESS || errno == EWOULDBLOCK) 1883 return 1; 1884 #endif 1885 if(errno == EINTR || errno == EAGAIN) 1886 return 1; 1887 /* Not handling EISCONN here as shouldn't ever hit that case.*/ 1888 if(errno != EPIPE 1889 #ifdef EOPNOTSUPP 1890 /* if /proc/sys/net/ipv4/tcp_fastopen is 1891 * disabled on Linux, sendmsg may return 1892 * 'Operation not supported', if so 1893 * fallthrough to ordinary connect. */ 1894 && errno != EOPNOTSUPP 1895 #endif 1896 && errno != 0) { 1897 if(verbosity < 2) 1898 return 0; /* silence lots of chatter in the logs */ 1899 log_err_addr("tcp sendmsg", strerror(errno), 1900 &c->repinfo.addr, c->repinfo.addrlen); 1901 return 0; 1902 } 1903 verbose(VERB_ALGO, "tcp sendmsg for fastopen failed (with %s), try normal connect", strerror(errno)); 1904 /* fallthrough to nonFASTOPEN 1905 * (MSG_FASTOPEN on Linux 3 produces EPIPE) 1906 * we need to perform connect() */ 1907 if(connect(fd, (struct sockaddr *)&c->repinfo.addr, c->repinfo.addrlen) == -1) { 1908 #ifdef EINPROGRESS 1909 if(errno == EINPROGRESS) 1910 return 1; /* wait until connect done*/ 1911 #endif 1912 #ifdef USE_WINSOCK 1913 if(WSAGetLastError() == WSAEINPROGRESS || 1914 WSAGetLastError() == WSAEWOULDBLOCK) 1915 return 1; /* wait until connect done*/ 1916 #endif 1917 if(tcp_connect_errno_needs_log( 1918 (struct sockaddr *)&c->repinfo.addr, c->repinfo.addrlen)) { 1919 log_err_addr("outgoing tcp: connect after EPIPE for fastopen", 1920 strerror(errno), &c->repinfo.addr, c->repinfo.addrlen); 1921 } 1922 return 0; 1923 } 1924 1925 } else { 1926 if(c->tcp_write_and_read) { 1927 c->tcp_write_byte_count += r; 1928 if(c->tcp_write_byte_count < sizeof(uint16_t)) 1929 return 1; 1930 } else { 1931 c->tcp_byte_count += r; 1932 if(c->tcp_byte_count < sizeof(uint16_t)) 1933 return 1; 1934 sldns_buffer_set_position(buffer, c->tcp_byte_count - 1935 sizeof(uint16_t)); 1936 } 1937 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 1938 tcp_callback_writer(c); 1939 return 1; 1940 } 1941 } 1942 } 1943 #endif /* USE_MSG_FASTOPEN */ 1944 1945 if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) { 1946 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer)); 1947 #ifdef HAVE_WRITEV 1948 struct iovec iov[2]; 1949 if(c->tcp_write_and_read) { 1950 iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count; 1951 iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count; 1952 iov[1].iov_base = c->tcp_write_pkt; 1953 iov[1].iov_len = c->tcp_write_pkt_len; 1954 } else { 1955 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count; 1956 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count; 1957 iov[1].iov_base = sldns_buffer_begin(buffer); 1958 iov[1].iov_len = sldns_buffer_limit(buffer); 1959 } 1960 log_assert(iov[0].iov_len > 0); 1961 r = writev(fd, iov, 2); 1962 #else /* HAVE_WRITEV */ 1963 if(c->tcp_write_and_read) { 1964 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_write_byte_count), 1965 sizeof(uint16_t)-c->tcp_write_byte_count, 0); 1966 } else { 1967 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count), 1968 sizeof(uint16_t)-c->tcp_byte_count, 0); 1969 } 1970 #endif /* HAVE_WRITEV */ 1971 if(r == -1) { 1972 #ifndef USE_WINSOCK 1973 # ifdef EPIPE 1974 if(errno == EPIPE && verbosity < 2) 1975 return 0; /* silence 'broken pipe' */ 1976 #endif 1977 if(errno == EINTR || errno == EAGAIN) 1978 return 1; 1979 #ifdef ECONNRESET 1980 if(errno == ECONNRESET && verbosity < 2) 1981 return 0; /* silence reset by peer */ 1982 #endif 1983 # ifdef HAVE_WRITEV 1984 log_err_addr("tcp writev", strerror(errno), 1985 &c->repinfo.addr, c->repinfo.addrlen); 1986 # else /* HAVE_WRITEV */ 1987 log_err_addr("tcp send s", strerror(errno), 1988 &c->repinfo.addr, c->repinfo.addrlen); 1989 # endif /* HAVE_WRITEV */ 1990 #else 1991 if(WSAGetLastError() == WSAENOTCONN) 1992 return 1; 1993 if(WSAGetLastError() == WSAEINPROGRESS) 1994 return 1; 1995 if(WSAGetLastError() == WSAEWOULDBLOCK) { 1996 ub_winsock_tcp_wouldblock(c->ev->ev, 1997 UB_EV_WRITE); 1998 return 1; 1999 } 2000 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 2001 return 0; /* silence reset by peer */ 2002 log_err_addr("tcp send s", 2003 wsa_strerror(WSAGetLastError()), 2004 &c->repinfo.addr, c->repinfo.addrlen); 2005 #endif 2006 return 0; 2007 } 2008 if(c->tcp_write_and_read) { 2009 c->tcp_write_byte_count += r; 2010 if(c->tcp_write_byte_count < sizeof(uint16_t)) 2011 return 1; 2012 } else { 2013 c->tcp_byte_count += r; 2014 if(c->tcp_byte_count < sizeof(uint16_t)) 2015 return 1; 2016 sldns_buffer_set_position(buffer, c->tcp_byte_count - 2017 sizeof(uint16_t)); 2018 } 2019 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 2020 tcp_callback_writer(c); 2021 return 1; 2022 } 2023 } 2024 log_assert(c->tcp_write_and_read || sldns_buffer_remaining(buffer) > 0); 2025 log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2); 2026 if(c->tcp_write_and_read) { 2027 r = send(fd, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2), 2028 c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count, 0); 2029 } else { 2030 r = send(fd, (void*)sldns_buffer_current(buffer), 2031 sldns_buffer_remaining(buffer), 0); 2032 } 2033 if(r == -1) { 2034 #ifndef USE_WINSOCK 2035 if(errno == EINTR || errno == EAGAIN) 2036 return 1; 2037 #ifdef ECONNRESET 2038 if(errno == ECONNRESET && verbosity < 2) 2039 return 0; /* silence reset by peer */ 2040 #endif 2041 #else 2042 if(WSAGetLastError() == WSAEINPROGRESS) 2043 return 1; 2044 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2045 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 2046 return 1; 2047 } 2048 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 2049 return 0; /* silence reset by peer */ 2050 #endif 2051 log_err_addr("tcp send r", sock_strerror(errno), 2052 &c->repinfo.addr, c->repinfo.addrlen); 2053 return 0; 2054 } 2055 if(c->tcp_write_and_read) { 2056 c->tcp_write_byte_count += r; 2057 } else { 2058 sldns_buffer_skip(buffer, r); 2059 } 2060 2061 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 2062 tcp_callback_writer(c); 2063 } 2064 2065 return 1; 2066 } 2067 2068 /** read again to drain buffers when there could be more to read */ 2069 static void 2070 tcp_req_info_read_again(int fd, struct comm_point* c) 2071 { 2072 while(c->tcp_req_info->read_again) { 2073 int r; 2074 c->tcp_req_info->read_again = 0; 2075 if(c->tcp_is_reading) 2076 r = comm_point_tcp_handle_read(fd, c, 0); 2077 else r = comm_point_tcp_handle_write(fd, c); 2078 if(!r) { 2079 reclaim_tcp_handler(c); 2080 if(!c->tcp_do_close) { 2081 fptr_ok(fptr_whitelist_comm_point( 2082 c->callback)); 2083 (void)(*c->callback)(c, c->cb_arg, 2084 NETEVENT_CLOSED, NULL); 2085 } 2086 return; 2087 } 2088 } 2089 } 2090 2091 /** read again to drain buffers when there could be more to read */ 2092 static void 2093 tcp_more_read_again(int fd, struct comm_point* c) 2094 { 2095 /* if the packet is done, but another one could be waiting on 2096 * the connection, the callback signals this, and we try again */ 2097 /* this continues until the read routines get EAGAIN or so, 2098 * and thus does not call the callback, and the bool is 0 */ 2099 int* moreread = c->tcp_more_read_again; 2100 while(moreread && *moreread) { 2101 *moreread = 0; 2102 if(!comm_point_tcp_handle_read(fd, c, 0)) { 2103 reclaim_tcp_handler(c); 2104 if(!c->tcp_do_close) { 2105 fptr_ok(fptr_whitelist_comm_point( 2106 c->callback)); 2107 (void)(*c->callback)(c, c->cb_arg, 2108 NETEVENT_CLOSED, NULL); 2109 } 2110 return; 2111 } 2112 } 2113 } 2114 2115 /** write again to fill up when there could be more to write */ 2116 static void 2117 tcp_more_write_again(int fd, struct comm_point* c) 2118 { 2119 /* if the packet is done, but another is waiting to be written, 2120 * the callback signals it and we try again. */ 2121 /* this continues until the write routines get EAGAIN or so, 2122 * and thus does not call the callback, and the bool is 0 */ 2123 int* morewrite = c->tcp_more_write_again; 2124 while(morewrite && *morewrite) { 2125 *morewrite = 0; 2126 if(!comm_point_tcp_handle_write(fd, c)) { 2127 reclaim_tcp_handler(c); 2128 if(!c->tcp_do_close) { 2129 fptr_ok(fptr_whitelist_comm_point( 2130 c->callback)); 2131 (void)(*c->callback)(c, c->cb_arg, 2132 NETEVENT_CLOSED, NULL); 2133 } 2134 return; 2135 } 2136 } 2137 } 2138 2139 void 2140 comm_point_tcp_handle_callback(int fd, short event, void* arg) 2141 { 2142 struct comm_point* c = (struct comm_point*)arg; 2143 log_assert(c->type == comm_tcp); 2144 ub_comm_base_now(c->ev->base); 2145 2146 #ifdef USE_DNSCRYPT 2147 /* Initialize if this is a dnscrypt socket */ 2148 if(c->tcp_parent) { 2149 c->dnscrypt = c->tcp_parent->dnscrypt; 2150 } 2151 if(c->dnscrypt && c->dnscrypt_buffer == c->buffer) { 2152 c->dnscrypt_buffer = sldns_buffer_new(sldns_buffer_capacity(c->buffer)); 2153 if(!c->dnscrypt_buffer) { 2154 log_err("Could not allocate dnscrypt buffer"); 2155 reclaim_tcp_handler(c); 2156 if(!c->tcp_do_close) { 2157 fptr_ok(fptr_whitelist_comm_point( 2158 c->callback)); 2159 (void)(*c->callback)(c, c->cb_arg, 2160 NETEVENT_CLOSED, NULL); 2161 } 2162 return; 2163 } 2164 } 2165 #endif 2166 2167 if(event&UB_EV_TIMEOUT) { 2168 verbose(VERB_QUERY, "tcp took too long, dropped"); 2169 reclaim_tcp_handler(c); 2170 if(!c->tcp_do_close) { 2171 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2172 (void)(*c->callback)(c, c->cb_arg, 2173 NETEVENT_TIMEOUT, NULL); 2174 } 2175 return; 2176 } 2177 if(event&UB_EV_READ 2178 #ifdef USE_MSG_FASTOPEN 2179 && !(c->tcp_do_fastopen && (event&UB_EV_WRITE)) 2180 #endif 2181 ) { 2182 int has_tcpq = (c->tcp_req_info != NULL); 2183 int* moreread = c->tcp_more_read_again; 2184 if(!comm_point_tcp_handle_read(fd, c, 0)) { 2185 reclaim_tcp_handler(c); 2186 if(!c->tcp_do_close) { 2187 fptr_ok(fptr_whitelist_comm_point( 2188 c->callback)); 2189 (void)(*c->callback)(c, c->cb_arg, 2190 NETEVENT_CLOSED, NULL); 2191 } 2192 return; 2193 } 2194 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) 2195 tcp_req_info_read_again(fd, c); 2196 if(moreread && *moreread) 2197 tcp_more_read_again(fd, c); 2198 return; 2199 } 2200 if(event&UB_EV_WRITE) { 2201 int has_tcpq = (c->tcp_req_info != NULL); 2202 int* morewrite = c->tcp_more_write_again; 2203 if(!comm_point_tcp_handle_write(fd, c)) { 2204 reclaim_tcp_handler(c); 2205 if(!c->tcp_do_close) { 2206 fptr_ok(fptr_whitelist_comm_point( 2207 c->callback)); 2208 (void)(*c->callback)(c, c->cb_arg, 2209 NETEVENT_CLOSED, NULL); 2210 } 2211 return; 2212 } 2213 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) 2214 tcp_req_info_read_again(fd, c); 2215 if(morewrite && *morewrite) 2216 tcp_more_write_again(fd, c); 2217 return; 2218 } 2219 log_err("Ignored event %d for tcphdl.", event); 2220 } 2221 2222 /** Make http handler free for next assignment */ 2223 static void 2224 reclaim_http_handler(struct comm_point* c) 2225 { 2226 log_assert(c->type == comm_http); 2227 if(c->ssl) { 2228 #ifdef HAVE_SSL 2229 SSL_shutdown(c->ssl); 2230 SSL_free(c->ssl); 2231 c->ssl = NULL; 2232 #endif 2233 } 2234 comm_point_close(c); 2235 if(c->tcp_parent) { 2236 if(c != c->tcp_parent->tcp_free) { 2237 c->tcp_parent->cur_tcp_count--; 2238 c->tcp_free = c->tcp_parent->tcp_free; 2239 c->tcp_parent->tcp_free = c; 2240 } 2241 if(!c->tcp_free) { 2242 /* re-enable listening on accept socket */ 2243 comm_point_start_listening(c->tcp_parent, -1, -1); 2244 } 2245 } 2246 } 2247 2248 /** read more data for http (with ssl) */ 2249 static int 2250 ssl_http_read_more(struct comm_point* c) 2251 { 2252 #ifdef HAVE_SSL 2253 int r; 2254 log_assert(sldns_buffer_remaining(c->buffer) > 0); 2255 ERR_clear_error(); 2256 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer), 2257 (int)sldns_buffer_remaining(c->buffer)); 2258 if(r <= 0) { 2259 int want = SSL_get_error(c->ssl, r); 2260 if(want == SSL_ERROR_ZERO_RETURN) { 2261 return 0; /* shutdown, closed */ 2262 } else if(want == SSL_ERROR_WANT_READ) { 2263 return 1; /* read more later */ 2264 } else if(want == SSL_ERROR_WANT_WRITE) { 2265 c->ssl_shake_state = comm_ssl_shake_hs_write; 2266 comm_point_listen_for_rw(c, 0, 1); 2267 return 1; 2268 } else if(want == SSL_ERROR_SYSCALL) { 2269 #ifdef ECONNRESET 2270 if(errno == ECONNRESET && verbosity < 2) 2271 return 0; /* silence reset by peer */ 2272 #endif 2273 if(errno != 0) 2274 log_err("SSL_read syscall: %s", 2275 strerror(errno)); 2276 return 0; 2277 } 2278 log_crypto_err("could not SSL_read"); 2279 return 0; 2280 } 2281 verbose(VERB_ALGO, "ssl http read more skip to %d + %d", 2282 (int)sldns_buffer_position(c->buffer), (int)r); 2283 sldns_buffer_skip(c->buffer, (ssize_t)r); 2284 return 1; 2285 #else 2286 (void)c; 2287 return 0; 2288 #endif /* HAVE_SSL */ 2289 } 2290 2291 /** read more data for http */ 2292 static int 2293 http_read_more(int fd, struct comm_point* c) 2294 { 2295 ssize_t r; 2296 log_assert(sldns_buffer_remaining(c->buffer) > 0); 2297 r = recv(fd, (void*)sldns_buffer_current(c->buffer), 2298 sldns_buffer_remaining(c->buffer), 0); 2299 if(r == 0) { 2300 return 0; 2301 } else if(r == -1) { 2302 #ifndef USE_WINSOCK 2303 if(errno == EINTR || errno == EAGAIN) 2304 return 1; 2305 #else /* USE_WINSOCK */ 2306 if(WSAGetLastError() == WSAECONNRESET) 2307 return 0; 2308 if(WSAGetLastError() == WSAEINPROGRESS) 2309 return 1; 2310 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2311 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 2312 return 1; 2313 } 2314 #endif 2315 log_err_addr("read (in http r)", sock_strerror(errno), 2316 &c->repinfo.addr, c->repinfo.addrlen); 2317 return 0; 2318 } 2319 verbose(VERB_ALGO, "http read more skip to %d + %d", 2320 (int)sldns_buffer_position(c->buffer), (int)r); 2321 sldns_buffer_skip(c->buffer, r); 2322 return 1; 2323 } 2324 2325 /** return true if http header has been read (one line complete) */ 2326 static int 2327 http_header_done(sldns_buffer* buf) 2328 { 2329 size_t i; 2330 for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) { 2331 /* there was a \r before the \n, but we ignore that */ 2332 if((char)sldns_buffer_read_u8_at(buf, i) == '\n') 2333 return 1; 2334 } 2335 return 0; 2336 } 2337 2338 /** return character string into buffer for header line, moves buffer 2339 * past that line and puts zero terminator into linefeed-newline */ 2340 static char* 2341 http_header_line(sldns_buffer* buf) 2342 { 2343 char* result = (char*)sldns_buffer_current(buf); 2344 size_t i; 2345 for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) { 2346 /* terminate the string on the \r */ 2347 if((char)sldns_buffer_read_u8_at(buf, i) == '\r') 2348 sldns_buffer_write_u8_at(buf, i, 0); 2349 /* terminate on the \n and skip past the it and done */ 2350 if((char)sldns_buffer_read_u8_at(buf, i) == '\n') { 2351 sldns_buffer_write_u8_at(buf, i, 0); 2352 sldns_buffer_set_position(buf, i+1); 2353 return result; 2354 } 2355 } 2356 return NULL; 2357 } 2358 2359 /** move unread buffer to start and clear rest for putting the rest into it */ 2360 static void 2361 http_moveover_buffer(sldns_buffer* buf) 2362 { 2363 size_t pos = sldns_buffer_position(buf); 2364 size_t len = sldns_buffer_remaining(buf); 2365 sldns_buffer_clear(buf); 2366 memmove(sldns_buffer_begin(buf), sldns_buffer_at(buf, pos), len); 2367 sldns_buffer_set_position(buf, len); 2368 } 2369 2370 /** a http header is complete, process it */ 2371 static int 2372 http_process_initial_header(struct comm_point* c) 2373 { 2374 char* line = http_header_line(c->buffer); 2375 if(!line) return 1; 2376 verbose(VERB_ALGO, "http header: %s", line); 2377 if(strncasecmp(line, "HTTP/1.1 ", 9) == 0) { 2378 /* check returncode */ 2379 if(line[9] != '2') { 2380 verbose(VERB_ALGO, "http bad status %s", line+9); 2381 return 0; 2382 } 2383 } else if(strncasecmp(line, "Content-Length: ", 16) == 0) { 2384 if(!c->http_is_chunked) 2385 c->tcp_byte_count = (size_t)atoi(line+16); 2386 } else if(strncasecmp(line, "Transfer-Encoding: chunked", 19+7) == 0) { 2387 c->tcp_byte_count = 0; 2388 c->http_is_chunked = 1; 2389 } else if(line[0] == 0) { 2390 /* end of initial headers */ 2391 c->http_in_headers = 0; 2392 if(c->http_is_chunked) 2393 c->http_in_chunk_headers = 1; 2394 /* remove header text from front of buffer 2395 * the buffer is going to be used to return the data segment 2396 * itself and we don't want the header to get returned 2397 * prepended with it */ 2398 http_moveover_buffer(c->buffer); 2399 sldns_buffer_flip(c->buffer); 2400 return 1; 2401 } 2402 /* ignore other headers */ 2403 return 1; 2404 } 2405 2406 /** a chunk header is complete, process it, return 0=fail, 1=continue next 2407 * header line, 2=done with chunked transfer*/ 2408 static int 2409 http_process_chunk_header(struct comm_point* c) 2410 { 2411 char* line = http_header_line(c->buffer); 2412 if(!line) return 1; 2413 if(c->http_in_chunk_headers == 3) { 2414 verbose(VERB_ALGO, "http chunk trailer: %s", line); 2415 /* are we done ? */ 2416 if(line[0] == 0 && c->tcp_byte_count == 0) { 2417 /* callback of http reader when NETEVENT_DONE, 2418 * end of data, with no data in buffer */ 2419 sldns_buffer_set_position(c->buffer, 0); 2420 sldns_buffer_set_limit(c->buffer, 0); 2421 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2422 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL); 2423 /* return that we are done */ 2424 return 2; 2425 } 2426 if(line[0] == 0) { 2427 /* continue with header of the next chunk */ 2428 c->http_in_chunk_headers = 1; 2429 /* remove header text from front of buffer */ 2430 http_moveover_buffer(c->buffer); 2431 sldns_buffer_flip(c->buffer); 2432 return 1; 2433 } 2434 /* ignore further trail headers */ 2435 return 1; 2436 } 2437 verbose(VERB_ALGO, "http chunk header: %s", line); 2438 if(c->http_in_chunk_headers == 1) { 2439 /* read chunked start line */ 2440 char* end = NULL; 2441 c->tcp_byte_count = (size_t)strtol(line, &end, 16); 2442 if(end == line) 2443 return 0; 2444 c->http_in_chunk_headers = 0; 2445 /* remove header text from front of buffer */ 2446 http_moveover_buffer(c->buffer); 2447 sldns_buffer_flip(c->buffer); 2448 if(c->tcp_byte_count == 0) { 2449 /* done with chunks, process chunk_trailer lines */ 2450 c->http_in_chunk_headers = 3; 2451 } 2452 return 1; 2453 } 2454 /* ignore other headers */ 2455 return 1; 2456 } 2457 2458 /** handle nonchunked data segment, 0=fail, 1=wait */ 2459 static int 2460 http_nonchunk_segment(struct comm_point* c) 2461 { 2462 /* c->buffer at position..limit has new data we read in. 2463 * the buffer itself is full of nonchunked data. 2464 * we are looking to read tcp_byte_count more data 2465 * and then the transfer is done. */ 2466 size_t remainbufferlen; 2467 size_t got_now = sldns_buffer_limit(c->buffer); 2468 if(c->tcp_byte_count <= got_now) { 2469 /* done, this is the last data fragment */ 2470 c->http_stored = 0; 2471 sldns_buffer_set_position(c->buffer, 0); 2472 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2473 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL); 2474 return 1; 2475 } 2476 /* if we have the buffer space, 2477 * read more data collected into the buffer */ 2478 remainbufferlen = sldns_buffer_capacity(c->buffer) - 2479 sldns_buffer_limit(c->buffer); 2480 if(remainbufferlen+got_now >= c->tcp_byte_count || 2481 remainbufferlen >= (c->ssl?16384:2048)) { 2482 size_t total = sldns_buffer_limit(c->buffer); 2483 sldns_buffer_clear(c->buffer); 2484 sldns_buffer_set_position(c->buffer, total); 2485 c->http_stored = total; 2486 /* return and wait to read more */ 2487 return 1; 2488 } 2489 /* call callback with this data amount, then 2490 * wait for more */ 2491 c->tcp_byte_count -= got_now; 2492 c->http_stored = 0; 2493 sldns_buffer_set_position(c->buffer, 0); 2494 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2495 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL); 2496 /* c->callback has to buffer_clear(c->buffer). */ 2497 /* return and wait to read more */ 2498 return 1; 2499 } 2500 2501 /** handle chunked data segment, return 0=fail, 1=wait, 2=process more */ 2502 static int 2503 http_chunked_segment(struct comm_point* c) 2504 { 2505 /* the c->buffer has from position..limit new data we read. */ 2506 /* the current chunk has length tcp_byte_count. 2507 * once we read that read more chunk headers. 2508 */ 2509 size_t remainbufferlen; 2510 size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored; 2511 verbose(VERB_ALGO, "http_chunked_segment: got now %d, tcpbytcount %d, http_stored %d, buffer pos %d, buffer limit %d", (int)got_now, (int)c->tcp_byte_count, (int)c->http_stored, (int)sldns_buffer_position(c->buffer), (int)sldns_buffer_limit(c->buffer)); 2512 if(c->tcp_byte_count <= got_now) { 2513 /* the chunk has completed (with perhaps some extra data 2514 * from next chunk header and next chunk) */ 2515 /* save too much info into temp buffer */ 2516 size_t fraglen; 2517 struct comm_reply repinfo; 2518 c->http_stored = 0; 2519 sldns_buffer_skip(c->buffer, (ssize_t)c->tcp_byte_count); 2520 sldns_buffer_clear(c->http_temp); 2521 sldns_buffer_write(c->http_temp, 2522 sldns_buffer_current(c->buffer), 2523 sldns_buffer_remaining(c->buffer)); 2524 sldns_buffer_flip(c->http_temp); 2525 2526 /* callback with this fragment */ 2527 fraglen = sldns_buffer_position(c->buffer); 2528 sldns_buffer_set_position(c->buffer, 0); 2529 sldns_buffer_set_limit(c->buffer, fraglen); 2530 repinfo = c->repinfo; 2531 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2532 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &repinfo); 2533 /* c->callback has to buffer_clear(). */ 2534 2535 /* is commpoint deleted? */ 2536 if(!repinfo.c) { 2537 return 1; 2538 } 2539 /* copy waiting info */ 2540 sldns_buffer_clear(c->buffer); 2541 sldns_buffer_write(c->buffer, 2542 sldns_buffer_begin(c->http_temp), 2543 sldns_buffer_remaining(c->http_temp)); 2544 sldns_buffer_flip(c->buffer); 2545 /* process end of chunk trailer header lines, until 2546 * an empty line */ 2547 c->http_in_chunk_headers = 3; 2548 /* process more data in buffer (if any) */ 2549 return 2; 2550 } 2551 c->tcp_byte_count -= got_now; 2552 2553 /* if we have the buffer space, 2554 * read more data collected into the buffer */ 2555 remainbufferlen = sldns_buffer_capacity(c->buffer) - 2556 sldns_buffer_limit(c->buffer); 2557 if(remainbufferlen >= c->tcp_byte_count || 2558 remainbufferlen >= 2048) { 2559 size_t total = sldns_buffer_limit(c->buffer); 2560 sldns_buffer_clear(c->buffer); 2561 sldns_buffer_set_position(c->buffer, total); 2562 c->http_stored = total; 2563 /* return and wait to read more */ 2564 return 1; 2565 } 2566 2567 /* callback of http reader for a new part of the data */ 2568 c->http_stored = 0; 2569 sldns_buffer_set_position(c->buffer, 0); 2570 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2571 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL); 2572 /* c->callback has to buffer_clear(c->buffer). */ 2573 /* return and wait to read more */ 2574 return 1; 2575 } 2576 2577 #ifdef HAVE_NGHTTP2 2578 /** Create new http2 session. Called when creating handling comm point. */ 2579 static struct http2_session* http2_session_create(struct comm_point* c) 2580 { 2581 struct http2_session* session = calloc(1, sizeof(*session)); 2582 if(!session) { 2583 log_err("malloc failure while creating http2 session"); 2584 return NULL; 2585 } 2586 session->c = c; 2587 2588 return session; 2589 } 2590 #endif 2591 2592 /** Delete http2 session. After closing connection or on error */ 2593 static void http2_session_delete(struct http2_session* h2_session) 2594 { 2595 #ifdef HAVE_NGHTTP2 2596 if(h2_session->callbacks) 2597 nghttp2_session_callbacks_del(h2_session->callbacks); 2598 free(h2_session); 2599 #else 2600 (void)h2_session; 2601 #endif 2602 } 2603 2604 #ifdef HAVE_NGHTTP2 2605 struct http2_stream* http2_stream_create(int32_t stream_id) 2606 { 2607 struct http2_stream* h2_stream = calloc(1, sizeof(*h2_stream)); 2608 if(!h2_stream) { 2609 log_err("malloc failure while creating http2 stream"); 2610 return NULL; 2611 } 2612 h2_stream->stream_id = stream_id; 2613 return h2_stream; 2614 } 2615 2616 /** Delete http2 stream. After session delete or stream close callback */ 2617 static void http2_stream_delete(struct http2_session* h2_session, 2618 struct http2_stream* h2_stream) 2619 { 2620 if(h2_stream->mesh_state) { 2621 mesh_state_remove_reply(h2_stream->mesh, h2_stream->mesh_state, 2622 h2_session->c); 2623 h2_stream->mesh_state = NULL; 2624 } 2625 http2_req_stream_clear(h2_stream); 2626 free(h2_stream); 2627 } 2628 #endif 2629 2630 void http2_stream_add_meshstate(struct http2_stream* h2_stream, 2631 struct mesh_area* mesh, struct mesh_state* m) 2632 { 2633 h2_stream->mesh = mesh; 2634 h2_stream->mesh_state = m; 2635 } 2636 2637 /** delete http2 session server. After closing connection. */ 2638 static void http2_session_server_delete(struct http2_session* h2_session) 2639 { 2640 #ifdef HAVE_NGHTTP2 2641 struct http2_stream* h2_stream, *next; 2642 nghttp2_session_del(h2_session->session); /* NULL input is fine */ 2643 h2_session->session = NULL; 2644 for(h2_stream = h2_session->first_stream; h2_stream;) { 2645 next = h2_stream->next; 2646 http2_stream_delete(h2_session, h2_stream); 2647 h2_stream = next; 2648 } 2649 h2_session->first_stream = NULL; 2650 h2_session->is_drop = 0; 2651 h2_session->postpone_drop = 0; 2652 h2_session->c->h2_stream = NULL; 2653 #endif 2654 (void)h2_session; 2655 } 2656 2657 #ifdef HAVE_NGHTTP2 2658 void http2_session_add_stream(struct http2_session* h2_session, 2659 struct http2_stream* h2_stream) 2660 { 2661 if(h2_session->first_stream) 2662 h2_session->first_stream->prev = h2_stream; 2663 h2_stream->next = h2_session->first_stream; 2664 h2_session->first_stream = h2_stream; 2665 } 2666 2667 /** remove stream from session linked list. After stream close callback or 2668 * closing connection */ 2669 static void http2_session_remove_stream(struct http2_session* h2_session, 2670 struct http2_stream* h2_stream) 2671 { 2672 if(h2_stream->prev) 2673 h2_stream->prev->next = h2_stream->next; 2674 else 2675 h2_session->first_stream = h2_stream->next; 2676 if(h2_stream->next) 2677 h2_stream->next->prev = h2_stream->prev; 2678 2679 } 2680 2681 int http2_stream_close_cb(nghttp2_session* ATTR_UNUSED(session), 2682 int32_t stream_id, uint32_t ATTR_UNUSED(error_code), void* cb_arg) 2683 { 2684 struct http2_stream* h2_stream; 2685 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2686 if(!(h2_stream = nghttp2_session_get_stream_user_data( 2687 h2_session->session, stream_id))) { 2688 return 0; 2689 } 2690 http2_session_remove_stream(h2_session, h2_stream); 2691 http2_stream_delete(h2_session, h2_stream); 2692 return 0; 2693 } 2694 2695 ssize_t http2_recv_cb(nghttp2_session* ATTR_UNUSED(session), uint8_t* buf, 2696 size_t len, int ATTR_UNUSED(flags), void* cb_arg) 2697 { 2698 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2699 ssize_t ret; 2700 2701 log_assert(h2_session->c->type == comm_http); 2702 log_assert(h2_session->c->h2_session); 2703 2704 #ifdef HAVE_SSL 2705 if(h2_session->c->ssl) { 2706 int r; 2707 ERR_clear_error(); 2708 r = SSL_read(h2_session->c->ssl, buf, len); 2709 if(r <= 0) { 2710 int want = SSL_get_error(h2_session->c->ssl, r); 2711 if(want == SSL_ERROR_ZERO_RETURN) { 2712 return NGHTTP2_ERR_EOF; 2713 } else if(want == SSL_ERROR_WANT_READ) { 2714 return NGHTTP2_ERR_WOULDBLOCK; 2715 } else if(want == SSL_ERROR_WANT_WRITE) { 2716 h2_session->c->ssl_shake_state = comm_ssl_shake_hs_write; 2717 comm_point_listen_for_rw(h2_session->c, 0, 1); 2718 return NGHTTP2_ERR_WOULDBLOCK; 2719 } else if(want == SSL_ERROR_SYSCALL) { 2720 #ifdef ECONNRESET 2721 if(errno == ECONNRESET && verbosity < 2) 2722 return NGHTTP2_ERR_CALLBACK_FAILURE; 2723 #endif 2724 if(errno != 0) 2725 log_err("SSL_read syscall: %s", 2726 strerror(errno)); 2727 return NGHTTP2_ERR_CALLBACK_FAILURE; 2728 } 2729 log_crypto_err("could not SSL_read"); 2730 return NGHTTP2_ERR_CALLBACK_FAILURE; 2731 } 2732 return r; 2733 } 2734 #endif /* HAVE_SSL */ 2735 2736 ret = recv(h2_session->c->fd, buf, len, 0); 2737 if(ret == 0) { 2738 return NGHTTP2_ERR_EOF; 2739 } else if(ret < 0) { 2740 #ifndef USE_WINSOCK 2741 if(errno == EINTR || errno == EAGAIN) 2742 return NGHTTP2_ERR_WOULDBLOCK; 2743 #ifdef ECONNRESET 2744 if(errno == ECONNRESET && verbosity < 2) 2745 return NGHTTP2_ERR_CALLBACK_FAILURE; 2746 #endif 2747 log_err_addr("could not http2 recv: %s", strerror(errno), 2748 &h2_session->c->repinfo.addr, 2749 h2_session->c->repinfo.addrlen); 2750 #else /* USE_WINSOCK */ 2751 if(WSAGetLastError() == WSAECONNRESET) 2752 return NGHTTP2_ERR_CALLBACK_FAILURE; 2753 if(WSAGetLastError() == WSAEINPROGRESS) 2754 return NGHTTP2_ERR_WOULDBLOCK; 2755 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2756 ub_winsock_tcp_wouldblock(h2_session->c->ev->ev, 2757 UB_EV_READ); 2758 return NGHTTP2_ERR_WOULDBLOCK; 2759 } 2760 log_err_addr("could not http2 recv: %s", 2761 wsa_strerror(WSAGetLastError()), 2762 &h2_session->c->repinfo.addr, 2763 h2_session->c->repinfo.addrlen); 2764 #endif 2765 return NGHTTP2_ERR_CALLBACK_FAILURE; 2766 } 2767 return ret; 2768 } 2769 #endif /* HAVE_NGHTTP2 */ 2770 2771 /** Handle http2 read */ 2772 static int 2773 comm_point_http2_handle_read(int ATTR_UNUSED(fd), struct comm_point* c) 2774 { 2775 #ifdef HAVE_NGHTTP2 2776 int ret; 2777 log_assert(c->h2_session); 2778 2779 /* reading until recv cb returns NGHTTP2_ERR_WOULDBLOCK */ 2780 ret = nghttp2_session_recv(c->h2_session->session); 2781 if(ret) { 2782 if(ret != NGHTTP2_ERR_EOF && 2783 ret != NGHTTP2_ERR_CALLBACK_FAILURE) { 2784 char a[256]; 2785 addr_to_str(&c->repinfo.addr, c->repinfo.addrlen, 2786 a, sizeof(a)); 2787 verbose(VERB_QUERY, "http2: session_recv from %s failed, " 2788 "error: %s", a, nghttp2_strerror(ret)); 2789 } 2790 return 0; 2791 } 2792 if(nghttp2_session_want_write(c->h2_session->session)) { 2793 c->tcp_is_reading = 0; 2794 comm_point_stop_listening(c); 2795 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c)); 2796 } else if(!nghttp2_session_want_read(c->h2_session->session)) 2797 return 0; /* connection can be closed */ 2798 return 1; 2799 #else 2800 (void)c; 2801 return 0; 2802 #endif 2803 } 2804 2805 /** 2806 * Handle http reading callback. 2807 * @param fd: file descriptor of socket. 2808 * @param c: comm point to read from into buffer. 2809 * @return: 0 on error 2810 */ 2811 static int 2812 comm_point_http_handle_read(int fd, struct comm_point* c) 2813 { 2814 log_assert(c->type == comm_http); 2815 log_assert(fd != -1); 2816 2817 /* if we are in ssl handshake, handle SSL handshake */ 2818 #ifdef HAVE_SSL 2819 if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) { 2820 if(!ssl_handshake(c)) 2821 return 0; 2822 if(c->ssl_shake_state != comm_ssl_shake_none) 2823 return 1; 2824 } 2825 #endif /* HAVE_SSL */ 2826 2827 if(!c->tcp_is_reading) 2828 return 1; 2829 2830 if(c->use_h2) { 2831 return comm_point_http2_handle_read(fd, c); 2832 } 2833 2834 /* http version is <= http/1.1 */ 2835 2836 if(c->http_min_version >= http_version_2) { 2837 /* HTTP/2 failed, not allowed to use lower version. */ 2838 return 0; 2839 } 2840 2841 /* read more data */ 2842 if(c->ssl) { 2843 if(!ssl_http_read_more(c)) 2844 return 0; 2845 } else { 2846 if(!http_read_more(fd, c)) 2847 return 0; 2848 } 2849 2850 if(c->http_stored >= sldns_buffer_position(c->buffer)) { 2851 /* read did not work but we wanted more data, there is 2852 * no bytes to process now. */ 2853 return 1; 2854 } 2855 sldns_buffer_flip(c->buffer); 2856 /* if we are partway in a segment of data, position us at the point 2857 * where we left off previously */ 2858 if(c->http_stored < sldns_buffer_limit(c->buffer)) 2859 sldns_buffer_set_position(c->buffer, c->http_stored); 2860 else sldns_buffer_set_position(c->buffer, sldns_buffer_limit(c->buffer)); 2861 2862 while(sldns_buffer_remaining(c->buffer) > 0) { 2863 /* Handle HTTP/1.x data */ 2864 /* if we are reading headers, read more headers */ 2865 if(c->http_in_headers || c->http_in_chunk_headers) { 2866 /* if header is done, process the header */ 2867 if(!http_header_done(c->buffer)) { 2868 /* copy remaining data to front of buffer 2869 * and set rest for writing into it */ 2870 http_moveover_buffer(c->buffer); 2871 /* return and wait to read more */ 2872 return 1; 2873 } 2874 if(!c->http_in_chunk_headers) { 2875 /* process initial headers */ 2876 if(!http_process_initial_header(c)) 2877 return 0; 2878 } else { 2879 /* process chunk headers */ 2880 int r = http_process_chunk_header(c); 2881 if(r == 0) return 0; 2882 if(r == 2) return 1; /* done */ 2883 /* r == 1, continue */ 2884 } 2885 /* see if we have more to process */ 2886 continue; 2887 } 2888 2889 if(!c->http_is_chunked) { 2890 /* if we are reading nonchunks, process that*/ 2891 return http_nonchunk_segment(c); 2892 } else { 2893 /* if we are reading chunks, read the chunk */ 2894 int r = http_chunked_segment(c); 2895 if(r == 0) return 0; 2896 if(r == 1) return 1; 2897 continue; 2898 } 2899 } 2900 /* broke out of the loop; could not process header instead need 2901 * to read more */ 2902 /* moveover any remaining data and read more data */ 2903 http_moveover_buffer(c->buffer); 2904 /* return and wait to read more */ 2905 return 1; 2906 } 2907 2908 /** check pending connect for http */ 2909 static int 2910 http_check_connect(int fd, struct comm_point* c) 2911 { 2912 /* check for pending error from nonblocking connect */ 2913 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/ 2914 int error = 0; 2915 socklen_t len = (socklen_t)sizeof(error); 2916 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 2917 &len) < 0){ 2918 #ifndef USE_WINSOCK 2919 error = errno; /* on solaris errno is error */ 2920 #else /* USE_WINSOCK */ 2921 error = WSAGetLastError(); 2922 #endif 2923 } 2924 #ifndef USE_WINSOCK 2925 #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 2926 if(error == EINPROGRESS || error == EWOULDBLOCK) 2927 return 1; /* try again later */ 2928 else 2929 #endif 2930 if(error != 0 && verbosity < 2) 2931 return 0; /* silence lots of chatter in the logs */ 2932 else if(error != 0) { 2933 log_err_addr("http connect", strerror(error), 2934 &c->repinfo.addr, c->repinfo.addrlen); 2935 #else /* USE_WINSOCK */ 2936 /* examine error */ 2937 if(error == WSAEINPROGRESS) 2938 return 1; 2939 else if(error == WSAEWOULDBLOCK) { 2940 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 2941 return 1; 2942 } else if(error != 0 && verbosity < 2) 2943 return 0; 2944 else if(error != 0) { 2945 log_err_addr("http connect", wsa_strerror(error), 2946 &c->repinfo.addr, c->repinfo.addrlen); 2947 #endif /* USE_WINSOCK */ 2948 return 0; 2949 } 2950 /* keep on processing this socket */ 2951 return 2; 2952 } 2953 2954 /** write more data for http (with ssl) */ 2955 static int 2956 ssl_http_write_more(struct comm_point* c) 2957 { 2958 #ifdef HAVE_SSL 2959 int r; 2960 log_assert(sldns_buffer_remaining(c->buffer) > 0); 2961 ERR_clear_error(); 2962 r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer), 2963 (int)sldns_buffer_remaining(c->buffer)); 2964 if(r <= 0) { 2965 int want = SSL_get_error(c->ssl, r); 2966 if(want == SSL_ERROR_ZERO_RETURN) { 2967 return 0; /* closed */ 2968 } else if(want == SSL_ERROR_WANT_READ) { 2969 c->ssl_shake_state = comm_ssl_shake_hs_read; 2970 comm_point_listen_for_rw(c, 1, 0); 2971 return 1; /* wait for read condition */ 2972 } else if(want == SSL_ERROR_WANT_WRITE) { 2973 return 1; /* write more later */ 2974 } else if(want == SSL_ERROR_SYSCALL) { 2975 #ifdef EPIPE 2976 if(errno == EPIPE && verbosity < 2) 2977 return 0; /* silence 'broken pipe' */ 2978 #endif 2979 if(errno != 0) 2980 log_err("SSL_write syscall: %s", 2981 strerror(errno)); 2982 return 0; 2983 } 2984 log_crypto_err("could not SSL_write"); 2985 return 0; 2986 } 2987 sldns_buffer_skip(c->buffer, (ssize_t)r); 2988 return 1; 2989 #else 2990 (void)c; 2991 return 0; 2992 #endif /* HAVE_SSL */ 2993 } 2994 2995 /** write more data for http */ 2996 static int 2997 http_write_more(int fd, struct comm_point* c) 2998 { 2999 ssize_t r; 3000 log_assert(sldns_buffer_remaining(c->buffer) > 0); 3001 r = send(fd, (void*)sldns_buffer_current(c->buffer), 3002 sldns_buffer_remaining(c->buffer), 0); 3003 if(r == -1) { 3004 #ifndef USE_WINSOCK 3005 if(errno == EINTR || errno == EAGAIN) 3006 return 1; 3007 #else 3008 if(WSAGetLastError() == WSAEINPROGRESS) 3009 return 1; 3010 if(WSAGetLastError() == WSAEWOULDBLOCK) { 3011 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 3012 return 1; 3013 } 3014 #endif 3015 log_err_addr("http send r", sock_strerror(errno), 3016 &c->repinfo.addr, c->repinfo.addrlen); 3017 return 0; 3018 } 3019 sldns_buffer_skip(c->buffer, r); 3020 return 1; 3021 } 3022 3023 #ifdef HAVE_NGHTTP2 3024 ssize_t http2_send_cb(nghttp2_session* ATTR_UNUSED(session), const uint8_t* buf, 3025 size_t len, int ATTR_UNUSED(flags), void* cb_arg) 3026 { 3027 ssize_t ret; 3028 struct http2_session* h2_session = (struct http2_session*)cb_arg; 3029 log_assert(h2_session->c->type == comm_http); 3030 log_assert(h2_session->c->h2_session); 3031 3032 #ifdef HAVE_SSL 3033 if(h2_session->c->ssl) { 3034 int r; 3035 ERR_clear_error(); 3036 r = SSL_write(h2_session->c->ssl, buf, len); 3037 if(r <= 0) { 3038 int want = SSL_get_error(h2_session->c->ssl, r); 3039 if(want == SSL_ERROR_ZERO_RETURN) { 3040 return NGHTTP2_ERR_CALLBACK_FAILURE; 3041 } else if(want == SSL_ERROR_WANT_READ) { 3042 h2_session->c->ssl_shake_state = comm_ssl_shake_hs_read; 3043 comm_point_listen_for_rw(h2_session->c, 1, 0); 3044 return NGHTTP2_ERR_WOULDBLOCK; 3045 } else if(want == SSL_ERROR_WANT_WRITE) { 3046 return NGHTTP2_ERR_WOULDBLOCK; 3047 } else if(want == SSL_ERROR_SYSCALL) { 3048 #ifdef EPIPE 3049 if(errno == EPIPE && verbosity < 2) 3050 return NGHTTP2_ERR_CALLBACK_FAILURE; 3051 #endif 3052 if(errno != 0) 3053 log_err("SSL_write syscall: %s", 3054 strerror(errno)); 3055 return NGHTTP2_ERR_CALLBACK_FAILURE; 3056 } 3057 log_crypto_err("could not SSL_write"); 3058 return NGHTTP2_ERR_CALLBACK_FAILURE; 3059 } 3060 return r; 3061 } 3062 #endif /* HAVE_SSL */ 3063 3064 ret = send(h2_session->c->fd, buf, len, 0); 3065 if(ret == 0) { 3066 return NGHTTP2_ERR_CALLBACK_FAILURE; 3067 } else if(ret < 0) { 3068 #ifndef USE_WINSOCK 3069 if(errno == EINTR || errno == EAGAIN) 3070 return NGHTTP2_ERR_WOULDBLOCK; 3071 #ifdef EPIPE 3072 if(errno == EPIPE && verbosity < 2) 3073 return NGHTTP2_ERR_CALLBACK_FAILURE; 3074 #endif 3075 #ifdef ECONNRESET 3076 if(errno == ECONNRESET && verbosity < 2) 3077 return NGHTTP2_ERR_CALLBACK_FAILURE; 3078 #endif 3079 log_err_addr("could not http2 write: %s", strerror(errno), 3080 &h2_session->c->repinfo.addr, 3081 h2_session->c->repinfo.addrlen); 3082 #else /* USE_WINSOCK */ 3083 if(WSAGetLastError() == WSAENOTCONN) 3084 return NGHTTP2_ERR_WOULDBLOCK; 3085 if(WSAGetLastError() == WSAEINPROGRESS) 3086 return NGHTTP2_ERR_WOULDBLOCK; 3087 if(WSAGetLastError() == WSAEWOULDBLOCK) { 3088 ub_winsock_tcp_wouldblock(h2_session->c->ev->ev, 3089 UB_EV_WRITE); 3090 return NGHTTP2_ERR_WOULDBLOCK; 3091 } 3092 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 3093 return NGHTTP2_ERR_CALLBACK_FAILURE; 3094 log_err_addr("could not http2 write: %s", 3095 wsa_strerror(WSAGetLastError()), 3096 &h2_session->c->repinfo.addr, 3097 h2_session->c->repinfo.addrlen); 3098 #endif 3099 return NGHTTP2_ERR_CALLBACK_FAILURE; 3100 } 3101 return ret; 3102 } 3103 #endif /* HAVE_NGHTTP2 */ 3104 3105 /** Handle http2 writing */ 3106 static int 3107 comm_point_http2_handle_write(int ATTR_UNUSED(fd), struct comm_point* c) 3108 { 3109 #ifdef HAVE_NGHTTP2 3110 int ret; 3111 log_assert(c->h2_session); 3112 3113 ret = nghttp2_session_send(c->h2_session->session); 3114 if(ret) { 3115 verbose(VERB_QUERY, "http2: session_send failed, " 3116 "error: %s", nghttp2_strerror(ret)); 3117 return 0; 3118 } 3119 3120 if(nghttp2_session_want_read(c->h2_session->session)) { 3121 c->tcp_is_reading = 1; 3122 comm_point_stop_listening(c); 3123 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c)); 3124 } else if(!nghttp2_session_want_write(c->h2_session->session)) 3125 return 0; /* connection can be closed */ 3126 return 1; 3127 #else 3128 (void)c; 3129 return 0; 3130 #endif 3131 } 3132 3133 /** 3134 * Handle http writing callback. 3135 * @param fd: file descriptor of socket. 3136 * @param c: comm point to write buffer out of. 3137 * @return: 0 on error 3138 */ 3139 static int 3140 comm_point_http_handle_write(int fd, struct comm_point* c) 3141 { 3142 log_assert(c->type == comm_http); 3143 log_assert(fd != -1); 3144 3145 /* check pending connect errors, if that fails, we wait for more, 3146 * or we can continue to write contents */ 3147 if(c->tcp_check_nb_connect) { 3148 int r = http_check_connect(fd, c); 3149 if(r == 0) return 0; 3150 if(r == 1) return 1; 3151 c->tcp_check_nb_connect = 0; 3152 } 3153 /* if we are in ssl handshake, handle SSL handshake */ 3154 #ifdef HAVE_SSL 3155 if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) { 3156 if(!ssl_handshake(c)) 3157 return 0; 3158 if(c->ssl_shake_state != comm_ssl_shake_none) 3159 return 1; 3160 } 3161 #endif /* HAVE_SSL */ 3162 if(c->tcp_is_reading) 3163 return 1; 3164 3165 if(c->use_h2) { 3166 return comm_point_http2_handle_write(fd, c); 3167 } 3168 3169 /* http version is <= http/1.1 */ 3170 3171 if(c->http_min_version >= http_version_2) { 3172 /* HTTP/2 failed, not allowed to use lower version. */ 3173 return 0; 3174 } 3175 3176 /* if we are writing, write more */ 3177 if(c->ssl) { 3178 if(!ssl_http_write_more(c)) 3179 return 0; 3180 } else { 3181 if(!http_write_more(fd, c)) 3182 return 0; 3183 } 3184 3185 /* we write a single buffer contents, that can contain 3186 * the http request, and then flip to read the results */ 3187 /* see if write is done */ 3188 if(sldns_buffer_remaining(c->buffer) == 0) { 3189 sldns_buffer_clear(c->buffer); 3190 if(c->tcp_do_toggle_rw) 3191 c->tcp_is_reading = 1; 3192 c->tcp_byte_count = 0; 3193 /* switch from listening(write) to listening(read) */ 3194 comm_point_stop_listening(c); 3195 comm_point_start_listening(c, -1, -1); 3196 } 3197 return 1; 3198 } 3199 3200 void 3201 comm_point_http_handle_callback(int fd, short event, void* arg) 3202 { 3203 struct comm_point* c = (struct comm_point*)arg; 3204 log_assert(c->type == comm_http); 3205 ub_comm_base_now(c->ev->base); 3206 3207 if(event&UB_EV_TIMEOUT) { 3208 verbose(VERB_QUERY, "http took too long, dropped"); 3209 reclaim_http_handler(c); 3210 if(!c->tcp_do_close) { 3211 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3212 (void)(*c->callback)(c, c->cb_arg, 3213 NETEVENT_TIMEOUT, NULL); 3214 } 3215 return; 3216 } 3217 if(event&UB_EV_READ) { 3218 if(!comm_point_http_handle_read(fd, c)) { 3219 reclaim_http_handler(c); 3220 if(!c->tcp_do_close) { 3221 fptr_ok(fptr_whitelist_comm_point( 3222 c->callback)); 3223 (void)(*c->callback)(c, c->cb_arg, 3224 NETEVENT_CLOSED, NULL); 3225 } 3226 } 3227 return; 3228 } 3229 if(event&UB_EV_WRITE) { 3230 if(!comm_point_http_handle_write(fd, c)) { 3231 reclaim_http_handler(c); 3232 if(!c->tcp_do_close) { 3233 fptr_ok(fptr_whitelist_comm_point( 3234 c->callback)); 3235 (void)(*c->callback)(c, c->cb_arg, 3236 NETEVENT_CLOSED, NULL); 3237 } 3238 } 3239 return; 3240 } 3241 log_err("Ignored event %d for httphdl.", event); 3242 } 3243 3244 void comm_point_local_handle_callback(int fd, short event, void* arg) 3245 { 3246 struct comm_point* c = (struct comm_point*)arg; 3247 log_assert(c->type == comm_local); 3248 ub_comm_base_now(c->ev->base); 3249 3250 if(event&UB_EV_READ) { 3251 if(!comm_point_tcp_handle_read(fd, c, 1)) { 3252 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3253 (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED, 3254 NULL); 3255 } 3256 return; 3257 } 3258 log_err("Ignored event %d for localhdl.", event); 3259 } 3260 3261 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd), 3262 short event, void* arg) 3263 { 3264 struct comm_point* c = (struct comm_point*)arg; 3265 int err = NETEVENT_NOERROR; 3266 log_assert(c->type == comm_raw); 3267 ub_comm_base_now(c->ev->base); 3268 3269 if(event&UB_EV_TIMEOUT) 3270 err = NETEVENT_TIMEOUT; 3271 fptr_ok(fptr_whitelist_comm_point_raw(c->callback)); 3272 (void)(*c->callback)(c, c->cb_arg, err, NULL); 3273 } 3274 3275 struct comm_point* 3276 comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer, 3277 comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket) 3278 { 3279 struct comm_point* c = (struct comm_point*)calloc(1, 3280 sizeof(struct comm_point)); 3281 short evbits; 3282 if(!c) 3283 return NULL; 3284 c->ev = (struct internal_event*)calloc(1, 3285 sizeof(struct internal_event)); 3286 if(!c->ev) { 3287 free(c); 3288 return NULL; 3289 } 3290 c->ev->base = base; 3291 c->fd = fd; 3292 c->buffer = buffer; 3293 c->timeout = NULL; 3294 c->tcp_is_reading = 0; 3295 c->tcp_byte_count = 0; 3296 c->tcp_parent = NULL; 3297 c->max_tcp_count = 0; 3298 c->cur_tcp_count = 0; 3299 c->tcp_handlers = NULL; 3300 c->tcp_free = NULL; 3301 c->type = comm_udp; 3302 c->tcp_do_close = 0; 3303 c->do_not_close = 0; 3304 c->tcp_do_toggle_rw = 0; 3305 c->tcp_check_nb_connect = 0; 3306 #ifdef USE_MSG_FASTOPEN 3307 c->tcp_do_fastopen = 0; 3308 #endif 3309 #ifdef USE_DNSCRYPT 3310 c->dnscrypt = 0; 3311 c->dnscrypt_buffer = buffer; 3312 #endif 3313 c->inuse = 0; 3314 c->callback = callback; 3315 c->cb_arg = callback_arg; 3316 c->socket = socket; 3317 evbits = UB_EV_READ | UB_EV_PERSIST; 3318 /* ub_event stuff */ 3319 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3320 comm_point_udp_callback, c); 3321 if(c->ev->ev == NULL) { 3322 log_err("could not baseset udp event"); 3323 comm_point_delete(c); 3324 return NULL; 3325 } 3326 if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) { 3327 log_err("could not add udp event"); 3328 comm_point_delete(c); 3329 return NULL; 3330 } 3331 c->event_added = 1; 3332 return c; 3333 } 3334 3335 struct comm_point* 3336 comm_point_create_udp_ancil(struct comm_base *base, int fd, 3337 sldns_buffer* buffer, 3338 comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket) 3339 { 3340 struct comm_point* c = (struct comm_point*)calloc(1, 3341 sizeof(struct comm_point)); 3342 short evbits; 3343 if(!c) 3344 return NULL; 3345 c->ev = (struct internal_event*)calloc(1, 3346 sizeof(struct internal_event)); 3347 if(!c->ev) { 3348 free(c); 3349 return NULL; 3350 } 3351 c->ev->base = base; 3352 c->fd = fd; 3353 c->buffer = buffer; 3354 c->timeout = NULL; 3355 c->tcp_is_reading = 0; 3356 c->tcp_byte_count = 0; 3357 c->tcp_parent = NULL; 3358 c->max_tcp_count = 0; 3359 c->cur_tcp_count = 0; 3360 c->tcp_handlers = NULL; 3361 c->tcp_free = NULL; 3362 c->type = comm_udp; 3363 c->tcp_do_close = 0; 3364 c->do_not_close = 0; 3365 #ifdef USE_DNSCRYPT 3366 c->dnscrypt = 0; 3367 c->dnscrypt_buffer = buffer; 3368 #endif 3369 c->inuse = 0; 3370 c->tcp_do_toggle_rw = 0; 3371 c->tcp_check_nb_connect = 0; 3372 #ifdef USE_MSG_FASTOPEN 3373 c->tcp_do_fastopen = 0; 3374 #endif 3375 c->callback = callback; 3376 c->cb_arg = callback_arg; 3377 c->socket = socket; 3378 evbits = UB_EV_READ | UB_EV_PERSIST; 3379 /* ub_event stuff */ 3380 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3381 comm_point_udp_ancil_callback, c); 3382 if(c->ev->ev == NULL) { 3383 log_err("could not baseset udp event"); 3384 comm_point_delete(c); 3385 return NULL; 3386 } 3387 if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) { 3388 log_err("could not add udp event"); 3389 comm_point_delete(c); 3390 return NULL; 3391 } 3392 c->event_added = 1; 3393 return c; 3394 } 3395 3396 static struct comm_point* 3397 comm_point_create_tcp_handler(struct comm_base *base, 3398 struct comm_point* parent, size_t bufsize, 3399 struct sldns_buffer* spoolbuf, comm_point_callback_type* callback, 3400 void* callback_arg, struct unbound_socket* socket) 3401 { 3402 struct comm_point* c = (struct comm_point*)calloc(1, 3403 sizeof(struct comm_point)); 3404 short evbits; 3405 if(!c) 3406 return NULL; 3407 c->ev = (struct internal_event*)calloc(1, 3408 sizeof(struct internal_event)); 3409 if(!c->ev) { 3410 free(c); 3411 return NULL; 3412 } 3413 c->ev->base = base; 3414 c->fd = -1; 3415 c->buffer = sldns_buffer_new(bufsize); 3416 if(!c->buffer) { 3417 free(c->ev); 3418 free(c); 3419 return NULL; 3420 } 3421 c->timeout = (struct timeval*)malloc(sizeof(struct timeval)); 3422 if(!c->timeout) { 3423 sldns_buffer_free(c->buffer); 3424 free(c->ev); 3425 free(c); 3426 return NULL; 3427 } 3428 c->tcp_is_reading = 0; 3429 c->tcp_byte_count = 0; 3430 c->tcp_parent = parent; 3431 c->tcp_timeout_msec = parent->tcp_timeout_msec; 3432 c->tcp_conn_limit = parent->tcp_conn_limit; 3433 c->tcl_addr = NULL; 3434 c->tcp_keepalive = 0; 3435 c->max_tcp_count = 0; 3436 c->cur_tcp_count = 0; 3437 c->tcp_handlers = NULL; 3438 c->tcp_free = NULL; 3439 c->type = comm_tcp; 3440 c->tcp_do_close = 0; 3441 c->do_not_close = 0; 3442 c->tcp_do_toggle_rw = 1; 3443 c->tcp_check_nb_connect = 0; 3444 #ifdef USE_MSG_FASTOPEN 3445 c->tcp_do_fastopen = 0; 3446 #endif 3447 #ifdef USE_DNSCRYPT 3448 c->dnscrypt = 0; 3449 /* We don't know just yet if this is a dnscrypt channel. Allocation 3450 * will be done when handling the callback. */ 3451 c->dnscrypt_buffer = c->buffer; 3452 #endif 3453 c->repinfo.c = c; 3454 c->callback = callback; 3455 c->cb_arg = callback_arg; 3456 c->socket = socket; 3457 if(spoolbuf) { 3458 c->tcp_req_info = tcp_req_info_create(spoolbuf); 3459 if(!c->tcp_req_info) { 3460 log_err("could not create tcp commpoint"); 3461 sldns_buffer_free(c->buffer); 3462 free(c->timeout); 3463 free(c->ev); 3464 free(c); 3465 return NULL; 3466 } 3467 c->tcp_req_info->cp = c; 3468 c->tcp_do_close = 1; 3469 c->tcp_do_toggle_rw = 0; 3470 } 3471 /* add to parent free list */ 3472 c->tcp_free = parent->tcp_free; 3473 parent->tcp_free = c; 3474 /* ub_event stuff */ 3475 evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT; 3476 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3477 comm_point_tcp_handle_callback, c); 3478 if(c->ev->ev == NULL) 3479 { 3480 log_err("could not basetset tcphdl event"); 3481 parent->tcp_free = c->tcp_free; 3482 tcp_req_info_delete(c->tcp_req_info); 3483 sldns_buffer_free(c->buffer); 3484 free(c->timeout); 3485 free(c->ev); 3486 free(c); 3487 return NULL; 3488 } 3489 return c; 3490 } 3491 3492 static struct comm_point* 3493 comm_point_create_http_handler(struct comm_base *base, 3494 struct comm_point* parent, size_t bufsize, int harden_large_queries, 3495 uint32_t http_max_streams, char* http_endpoint, 3496 comm_point_callback_type* callback, void* callback_arg, 3497 struct unbound_socket* socket) 3498 { 3499 struct comm_point* c = (struct comm_point*)calloc(1, 3500 sizeof(struct comm_point)); 3501 short evbits; 3502 if(!c) 3503 return NULL; 3504 c->ev = (struct internal_event*)calloc(1, 3505 sizeof(struct internal_event)); 3506 if(!c->ev) { 3507 free(c); 3508 return NULL; 3509 } 3510 c->ev->base = base; 3511 c->fd = -1; 3512 c->buffer = sldns_buffer_new(bufsize); 3513 if(!c->buffer) { 3514 free(c->ev); 3515 free(c); 3516 return NULL; 3517 } 3518 c->timeout = (struct timeval*)malloc(sizeof(struct timeval)); 3519 if(!c->timeout) { 3520 sldns_buffer_free(c->buffer); 3521 free(c->ev); 3522 free(c); 3523 return NULL; 3524 } 3525 c->tcp_is_reading = 0; 3526 c->tcp_byte_count = 0; 3527 c->tcp_parent = parent; 3528 c->tcp_timeout_msec = parent->tcp_timeout_msec; 3529 c->tcp_conn_limit = parent->tcp_conn_limit; 3530 c->tcl_addr = NULL; 3531 c->tcp_keepalive = 0; 3532 c->max_tcp_count = 0; 3533 c->cur_tcp_count = 0; 3534 c->tcp_handlers = NULL; 3535 c->tcp_free = NULL; 3536 c->type = comm_http; 3537 c->tcp_do_close = 1; 3538 c->do_not_close = 0; 3539 c->tcp_do_toggle_rw = 1; /* will be set to 0 after http2 upgrade */ 3540 c->tcp_check_nb_connect = 0; 3541 #ifdef USE_MSG_FASTOPEN 3542 c->tcp_do_fastopen = 0; 3543 #endif 3544 #ifdef USE_DNSCRYPT 3545 c->dnscrypt = 0; 3546 c->dnscrypt_buffer = NULL; 3547 #endif 3548 c->repinfo.c = c; 3549 c->callback = callback; 3550 c->cb_arg = callback_arg; 3551 c->socket = socket; 3552 3553 c->http_min_version = http_version_2; 3554 c->http2_stream_max_qbuffer_size = bufsize; 3555 if(harden_large_queries && bufsize > 512) 3556 c->http2_stream_max_qbuffer_size = 512; 3557 c->http2_max_streams = http_max_streams; 3558 if(!(c->http_endpoint = strdup(http_endpoint))) { 3559 log_err("could not strdup http_endpoint"); 3560 sldns_buffer_free(c->buffer); 3561 free(c->timeout); 3562 free(c->ev); 3563 free(c); 3564 return NULL; 3565 } 3566 c->use_h2 = 0; 3567 #ifdef HAVE_NGHTTP2 3568 if(!(c->h2_session = http2_session_create(c))) { 3569 log_err("could not create http2 session"); 3570 free(c->http_endpoint); 3571 sldns_buffer_free(c->buffer); 3572 free(c->timeout); 3573 free(c->ev); 3574 free(c); 3575 return NULL; 3576 } 3577 if(!(c->h2_session->callbacks = http2_req_callbacks_create())) { 3578 log_err("could not create http2 callbacks"); 3579 http2_session_delete(c->h2_session); 3580 free(c->http_endpoint); 3581 sldns_buffer_free(c->buffer); 3582 free(c->timeout); 3583 free(c->ev); 3584 free(c); 3585 return NULL; 3586 } 3587 #endif 3588 3589 /* add to parent free list */ 3590 c->tcp_free = parent->tcp_free; 3591 parent->tcp_free = c; 3592 /* ub_event stuff */ 3593 evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT; 3594 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3595 comm_point_http_handle_callback, c); 3596 if(c->ev->ev == NULL) 3597 { 3598 log_err("could not set http handler event"); 3599 parent->tcp_free = c->tcp_free; 3600 http2_session_delete(c->h2_session); 3601 sldns_buffer_free(c->buffer); 3602 free(c->timeout); 3603 free(c->ev); 3604 free(c); 3605 return NULL; 3606 } 3607 return c; 3608 } 3609 3610 struct comm_point* 3611 comm_point_create_tcp(struct comm_base *base, int fd, int num, 3612 int idle_timeout, int harden_large_queries, 3613 uint32_t http_max_streams, char* http_endpoint, 3614 struct tcl_list* tcp_conn_limit, size_t bufsize, 3615 struct sldns_buffer* spoolbuf, enum listen_type port_type, 3616 comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket) 3617 { 3618 struct comm_point* c = (struct comm_point*)calloc(1, 3619 sizeof(struct comm_point)); 3620 short evbits; 3621 int i; 3622 /* first allocate the TCP accept listener */ 3623 if(!c) 3624 return NULL; 3625 c->ev = (struct internal_event*)calloc(1, 3626 sizeof(struct internal_event)); 3627 if(!c->ev) { 3628 free(c); 3629 return NULL; 3630 } 3631 c->ev->base = base; 3632 c->fd = fd; 3633 c->buffer = NULL; 3634 c->timeout = NULL; 3635 c->tcp_is_reading = 0; 3636 c->tcp_byte_count = 0; 3637 c->tcp_timeout_msec = idle_timeout; 3638 c->tcp_conn_limit = tcp_conn_limit; 3639 c->tcl_addr = NULL; 3640 c->tcp_keepalive = 0; 3641 c->tcp_parent = NULL; 3642 c->max_tcp_count = num; 3643 c->cur_tcp_count = 0; 3644 c->tcp_handlers = (struct comm_point**)calloc((size_t)num, 3645 sizeof(struct comm_point*)); 3646 if(!c->tcp_handlers) { 3647 free(c->ev); 3648 free(c); 3649 return NULL; 3650 } 3651 c->tcp_free = NULL; 3652 c->type = comm_tcp_accept; 3653 c->tcp_do_close = 0; 3654 c->do_not_close = 0; 3655 c->tcp_do_toggle_rw = 0; 3656 c->tcp_check_nb_connect = 0; 3657 #ifdef USE_MSG_FASTOPEN 3658 c->tcp_do_fastopen = 0; 3659 #endif 3660 #ifdef USE_DNSCRYPT 3661 c->dnscrypt = 0; 3662 c->dnscrypt_buffer = NULL; 3663 #endif 3664 c->callback = NULL; 3665 c->cb_arg = NULL; 3666 c->socket = socket; 3667 evbits = UB_EV_READ | UB_EV_PERSIST; 3668 /* ub_event stuff */ 3669 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3670 comm_point_tcp_accept_callback, c); 3671 if(c->ev->ev == NULL) { 3672 log_err("could not baseset tcpacc event"); 3673 comm_point_delete(c); 3674 return NULL; 3675 } 3676 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 3677 log_err("could not add tcpacc event"); 3678 comm_point_delete(c); 3679 return NULL; 3680 } 3681 c->event_added = 1; 3682 /* now prealloc the handlers */ 3683 for(i=0; i<num; i++) { 3684 if(port_type == listen_type_tcp || 3685 port_type == listen_type_ssl || 3686 port_type == listen_type_tcp_dnscrypt) { 3687 c->tcp_handlers[i] = comm_point_create_tcp_handler(base, 3688 c, bufsize, spoolbuf, callback, callback_arg, socket); 3689 } else if(port_type == listen_type_http) { 3690 c->tcp_handlers[i] = comm_point_create_http_handler( 3691 base, c, bufsize, harden_large_queries, 3692 http_max_streams, http_endpoint, 3693 callback, callback_arg, socket); 3694 } 3695 else { 3696 log_err("could not create tcp handler, unknown listen " 3697 "type"); 3698 return NULL; 3699 } 3700 if(!c->tcp_handlers[i]) { 3701 comm_point_delete(c); 3702 return NULL; 3703 } 3704 } 3705 3706 return c; 3707 } 3708 3709 struct comm_point* 3710 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize, 3711 comm_point_callback_type* callback, void* callback_arg) 3712 { 3713 struct comm_point* c = (struct comm_point*)calloc(1, 3714 sizeof(struct comm_point)); 3715 short evbits; 3716 if(!c) 3717 return NULL; 3718 c->ev = (struct internal_event*)calloc(1, 3719 sizeof(struct internal_event)); 3720 if(!c->ev) { 3721 free(c); 3722 return NULL; 3723 } 3724 c->ev->base = base; 3725 c->fd = -1; 3726 c->buffer = sldns_buffer_new(bufsize); 3727 if(!c->buffer) { 3728 free(c->ev); 3729 free(c); 3730 return NULL; 3731 } 3732 c->timeout = NULL; 3733 c->tcp_is_reading = 0; 3734 c->tcp_byte_count = 0; 3735 c->tcp_timeout_msec = TCP_QUERY_TIMEOUT; 3736 c->tcp_conn_limit = NULL; 3737 c->tcl_addr = NULL; 3738 c->tcp_keepalive = 0; 3739 c->tcp_parent = NULL; 3740 c->max_tcp_count = 0; 3741 c->cur_tcp_count = 0; 3742 c->tcp_handlers = NULL; 3743 c->tcp_free = NULL; 3744 c->type = comm_tcp; 3745 c->tcp_do_close = 0; 3746 c->do_not_close = 0; 3747 c->tcp_do_toggle_rw = 1; 3748 c->tcp_check_nb_connect = 1; 3749 #ifdef USE_MSG_FASTOPEN 3750 c->tcp_do_fastopen = 1; 3751 #endif 3752 #ifdef USE_DNSCRYPT 3753 c->dnscrypt = 0; 3754 c->dnscrypt_buffer = c->buffer; 3755 #endif 3756 c->repinfo.c = c; 3757 c->callback = callback; 3758 c->cb_arg = callback_arg; 3759 evbits = UB_EV_PERSIST | UB_EV_WRITE; 3760 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3761 comm_point_tcp_handle_callback, c); 3762 if(c->ev->ev == NULL) 3763 { 3764 log_err("could not baseset tcpout event"); 3765 sldns_buffer_free(c->buffer); 3766 free(c->ev); 3767 free(c); 3768 return NULL; 3769 } 3770 3771 return c; 3772 } 3773 3774 struct comm_point* 3775 comm_point_create_http_out(struct comm_base *base, size_t bufsize, 3776 comm_point_callback_type* callback, void* callback_arg, 3777 sldns_buffer* temp) 3778 { 3779 struct comm_point* c = (struct comm_point*)calloc(1, 3780 sizeof(struct comm_point)); 3781 short evbits; 3782 if(!c) 3783 return NULL; 3784 c->ev = (struct internal_event*)calloc(1, 3785 sizeof(struct internal_event)); 3786 if(!c->ev) { 3787 free(c); 3788 return NULL; 3789 } 3790 c->ev->base = base; 3791 c->fd = -1; 3792 c->buffer = sldns_buffer_new(bufsize); 3793 if(!c->buffer) { 3794 free(c->ev); 3795 free(c); 3796 return NULL; 3797 } 3798 c->timeout = NULL; 3799 c->tcp_is_reading = 0; 3800 c->tcp_byte_count = 0; 3801 c->tcp_parent = NULL; 3802 c->max_tcp_count = 0; 3803 c->cur_tcp_count = 0; 3804 c->tcp_handlers = NULL; 3805 c->tcp_free = NULL; 3806 c->type = comm_http; 3807 c->tcp_do_close = 0; 3808 c->do_not_close = 0; 3809 c->tcp_do_toggle_rw = 1; 3810 c->tcp_check_nb_connect = 1; 3811 c->http_in_headers = 1; 3812 c->http_in_chunk_headers = 0; 3813 c->http_is_chunked = 0; 3814 c->http_temp = temp; 3815 #ifdef USE_MSG_FASTOPEN 3816 c->tcp_do_fastopen = 1; 3817 #endif 3818 #ifdef USE_DNSCRYPT 3819 c->dnscrypt = 0; 3820 c->dnscrypt_buffer = c->buffer; 3821 #endif 3822 c->repinfo.c = c; 3823 c->callback = callback; 3824 c->cb_arg = callback_arg; 3825 evbits = UB_EV_PERSIST | UB_EV_WRITE; 3826 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3827 comm_point_http_handle_callback, c); 3828 if(c->ev->ev == NULL) 3829 { 3830 log_err("could not baseset tcpout event"); 3831 #ifdef HAVE_SSL 3832 SSL_free(c->ssl); 3833 #endif 3834 sldns_buffer_free(c->buffer); 3835 free(c->ev); 3836 free(c); 3837 return NULL; 3838 } 3839 3840 return c; 3841 } 3842 3843 struct comm_point* 3844 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize, 3845 comm_point_callback_type* callback, void* callback_arg) 3846 { 3847 struct comm_point* c = (struct comm_point*)calloc(1, 3848 sizeof(struct comm_point)); 3849 short evbits; 3850 if(!c) 3851 return NULL; 3852 c->ev = (struct internal_event*)calloc(1, 3853 sizeof(struct internal_event)); 3854 if(!c->ev) { 3855 free(c); 3856 return NULL; 3857 } 3858 c->ev->base = base; 3859 c->fd = fd; 3860 c->buffer = sldns_buffer_new(bufsize); 3861 if(!c->buffer) { 3862 free(c->ev); 3863 free(c); 3864 return NULL; 3865 } 3866 c->timeout = NULL; 3867 c->tcp_is_reading = 1; 3868 c->tcp_byte_count = 0; 3869 c->tcp_parent = NULL; 3870 c->max_tcp_count = 0; 3871 c->cur_tcp_count = 0; 3872 c->tcp_handlers = NULL; 3873 c->tcp_free = NULL; 3874 c->type = comm_local; 3875 c->tcp_do_close = 0; 3876 c->do_not_close = 1; 3877 c->tcp_do_toggle_rw = 0; 3878 c->tcp_check_nb_connect = 0; 3879 #ifdef USE_MSG_FASTOPEN 3880 c->tcp_do_fastopen = 0; 3881 #endif 3882 #ifdef USE_DNSCRYPT 3883 c->dnscrypt = 0; 3884 c->dnscrypt_buffer = c->buffer; 3885 #endif 3886 c->callback = callback; 3887 c->cb_arg = callback_arg; 3888 /* ub_event stuff */ 3889 evbits = UB_EV_PERSIST | UB_EV_READ; 3890 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3891 comm_point_local_handle_callback, c); 3892 if(c->ev->ev == NULL) { 3893 log_err("could not baseset localhdl event"); 3894 free(c->ev); 3895 free(c); 3896 return NULL; 3897 } 3898 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 3899 log_err("could not add localhdl event"); 3900 ub_event_free(c->ev->ev); 3901 free(c->ev); 3902 free(c); 3903 return NULL; 3904 } 3905 c->event_added = 1; 3906 return c; 3907 } 3908 3909 struct comm_point* 3910 comm_point_create_raw(struct comm_base* base, int fd, int writing, 3911 comm_point_callback_type* callback, void* callback_arg) 3912 { 3913 struct comm_point* c = (struct comm_point*)calloc(1, 3914 sizeof(struct comm_point)); 3915 short evbits; 3916 if(!c) 3917 return NULL; 3918 c->ev = (struct internal_event*)calloc(1, 3919 sizeof(struct internal_event)); 3920 if(!c->ev) { 3921 free(c); 3922 return NULL; 3923 } 3924 c->ev->base = base; 3925 c->fd = fd; 3926 c->buffer = NULL; 3927 c->timeout = NULL; 3928 c->tcp_is_reading = 0; 3929 c->tcp_byte_count = 0; 3930 c->tcp_parent = NULL; 3931 c->max_tcp_count = 0; 3932 c->cur_tcp_count = 0; 3933 c->tcp_handlers = NULL; 3934 c->tcp_free = NULL; 3935 c->type = comm_raw; 3936 c->tcp_do_close = 0; 3937 c->do_not_close = 1; 3938 c->tcp_do_toggle_rw = 0; 3939 c->tcp_check_nb_connect = 0; 3940 #ifdef USE_MSG_FASTOPEN 3941 c->tcp_do_fastopen = 0; 3942 #endif 3943 #ifdef USE_DNSCRYPT 3944 c->dnscrypt = 0; 3945 c->dnscrypt_buffer = c->buffer; 3946 #endif 3947 c->callback = callback; 3948 c->cb_arg = callback_arg; 3949 /* ub_event stuff */ 3950 if(writing) 3951 evbits = UB_EV_PERSIST | UB_EV_WRITE; 3952 else evbits = UB_EV_PERSIST | UB_EV_READ; 3953 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3954 comm_point_raw_handle_callback, c); 3955 if(c->ev->ev == NULL) { 3956 log_err("could not baseset rawhdl event"); 3957 free(c->ev); 3958 free(c); 3959 return NULL; 3960 } 3961 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 3962 log_err("could not add rawhdl event"); 3963 ub_event_free(c->ev->ev); 3964 free(c->ev); 3965 free(c); 3966 return NULL; 3967 } 3968 c->event_added = 1; 3969 return c; 3970 } 3971 3972 void 3973 comm_point_close(struct comm_point* c) 3974 { 3975 if(!c) 3976 return; 3977 if(c->fd != -1) { 3978 verbose(5, "comm_point_close of %d: event_del", c->fd); 3979 if(c->event_added) { 3980 if(ub_event_del(c->ev->ev) != 0) { 3981 log_err("could not event_del on close"); 3982 } 3983 c->event_added = 0; 3984 } 3985 } 3986 tcl_close_connection(c->tcl_addr); 3987 if(c->tcp_req_info) 3988 tcp_req_info_clear(c->tcp_req_info); 3989 if(c->h2_session) 3990 http2_session_server_delete(c->h2_session); 3991 3992 /* close fd after removing from event lists, or epoll.. is messed up */ 3993 if(c->fd != -1 && !c->do_not_close) { 3994 #ifdef USE_WINSOCK 3995 if(c->type == comm_tcp || c->type == comm_http) { 3996 /* delete sticky events for the fd, it gets closed */ 3997 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 3998 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 3999 } 4000 #endif 4001 verbose(VERB_ALGO, "close fd %d", c->fd); 4002 sock_close(c->fd); 4003 } 4004 c->fd = -1; 4005 } 4006 4007 void 4008 comm_point_delete(struct comm_point* c) 4009 { 4010 if(!c) 4011 return; 4012 if((c->type == comm_tcp || c->type == comm_http) && c->ssl) { 4013 #ifdef HAVE_SSL 4014 SSL_shutdown(c->ssl); 4015 SSL_free(c->ssl); 4016 #endif 4017 } 4018 if(c->type == comm_http && c->http_endpoint) { 4019 free(c->http_endpoint); 4020 c->http_endpoint = NULL; 4021 } 4022 comm_point_close(c); 4023 if(c->tcp_handlers) { 4024 int i; 4025 for(i=0; i<c->max_tcp_count; i++) 4026 comm_point_delete(c->tcp_handlers[i]); 4027 free(c->tcp_handlers); 4028 } 4029 free(c->timeout); 4030 if(c->type == comm_tcp || c->type == comm_local || c->type == comm_http) { 4031 sldns_buffer_free(c->buffer); 4032 #ifdef USE_DNSCRYPT 4033 if(c->dnscrypt && c->dnscrypt_buffer != c->buffer) { 4034 sldns_buffer_free(c->dnscrypt_buffer); 4035 } 4036 #endif 4037 if(c->tcp_req_info) { 4038 tcp_req_info_delete(c->tcp_req_info); 4039 } 4040 if(c->h2_session) { 4041 http2_session_delete(c->h2_session); 4042 } 4043 } 4044 ub_event_free(c->ev->ev); 4045 free(c->ev); 4046 free(c); 4047 } 4048 4049 void 4050 comm_point_send_reply(struct comm_reply *repinfo) 4051 { 4052 struct sldns_buffer* buffer; 4053 log_assert(repinfo && repinfo->c); 4054 #ifdef USE_DNSCRYPT 4055 buffer = repinfo->c->dnscrypt_buffer; 4056 if(!dnsc_handle_uncurved_request(repinfo)) { 4057 return; 4058 } 4059 #else 4060 buffer = repinfo->c->buffer; 4061 #endif 4062 if(repinfo->c->type == comm_udp) { 4063 if(repinfo->srctype) 4064 comm_point_send_udp_msg_if(repinfo->c, 4065 buffer, (struct sockaddr*)&repinfo->addr, 4066 repinfo->addrlen, repinfo); 4067 else 4068 comm_point_send_udp_msg(repinfo->c, buffer, 4069 (struct sockaddr*)&repinfo->addr, repinfo->addrlen, 0); 4070 #ifdef USE_DNSTAP 4071 /* 4072 * sending src (client)/dst (local service) addresses over DNSTAP from udp callback 4073 */ 4074 if(repinfo->c->dtenv != NULL && repinfo->c->dtenv->log_client_response_messages) { 4075 log_addr(VERB_ALGO, "from local addr", (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->socket->addr->ai_addrlen); 4076 log_addr(VERB_ALGO, "response to client", &repinfo->addr, repinfo->addrlen); 4077 dt_msg_send_client_response(repinfo->c->dtenv, &repinfo->addr, (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->type, repinfo->c->buffer); 4078 } 4079 #endif 4080 } else { 4081 #ifdef USE_DNSTAP 4082 /* 4083 * sending src (client)/dst (local service) addresses over DNSTAP from TCP callback 4084 */ 4085 if(repinfo->c->tcp_parent->dtenv != NULL && repinfo->c->tcp_parent->dtenv->log_client_response_messages) { 4086 log_addr(VERB_ALGO, "from local addr", (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->socket->addr->ai_addrlen); 4087 log_addr(VERB_ALGO, "response to client", &repinfo->addr, repinfo->addrlen); 4088 dt_msg_send_client_response(repinfo->c->tcp_parent->dtenv, &repinfo->addr, (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->type, 4089 ( repinfo->c->tcp_req_info? repinfo->c->tcp_req_info->spool_buffer: repinfo->c->buffer )); 4090 } 4091 #endif 4092 if(repinfo->c->tcp_req_info) { 4093 tcp_req_info_send_reply(repinfo->c->tcp_req_info); 4094 } else if(repinfo->c->use_h2) { 4095 if(!http2_submit_dns_response(repinfo->c->h2_session)) { 4096 comm_point_drop_reply(repinfo); 4097 return; 4098 } 4099 repinfo->c->h2_stream = NULL; 4100 repinfo->c->tcp_is_reading = 0; 4101 comm_point_stop_listening(repinfo->c); 4102 comm_point_start_listening(repinfo->c, -1, 4103 adjusted_tcp_timeout(repinfo->c)); 4104 return; 4105 } else { 4106 comm_point_start_listening(repinfo->c, -1, 4107 adjusted_tcp_timeout(repinfo->c)); 4108 } 4109 } 4110 } 4111 4112 void 4113 comm_point_drop_reply(struct comm_reply* repinfo) 4114 { 4115 if(!repinfo) 4116 return; 4117 log_assert(repinfo->c); 4118 log_assert(repinfo->c->type != comm_tcp_accept); 4119 if(repinfo->c->type == comm_udp) 4120 return; 4121 if(repinfo->c->tcp_req_info) 4122 repinfo->c->tcp_req_info->is_drop = 1; 4123 if(repinfo->c->type == comm_http) { 4124 if(repinfo->c->h2_session) { 4125 repinfo->c->h2_session->is_drop = 1; 4126 if(!repinfo->c->h2_session->postpone_drop) 4127 reclaim_http_handler(repinfo->c); 4128 return; 4129 } 4130 reclaim_http_handler(repinfo->c); 4131 return; 4132 } 4133 reclaim_tcp_handler(repinfo->c); 4134 } 4135 4136 void 4137 comm_point_stop_listening(struct comm_point* c) 4138 { 4139 verbose(VERB_ALGO, "comm point stop listening %d", c->fd); 4140 if(c->event_added) { 4141 if(ub_event_del(c->ev->ev) != 0) { 4142 log_err("event_del error to stoplisten"); 4143 } 4144 c->event_added = 0; 4145 } 4146 } 4147 4148 void 4149 comm_point_start_listening(struct comm_point* c, int newfd, int msec) 4150 { 4151 verbose(VERB_ALGO, "comm point start listening %d (%d msec)", 4152 c->fd==-1?newfd:c->fd, msec); 4153 if(c->type == comm_tcp_accept && !c->tcp_free) { 4154 /* no use to start listening no free slots. */ 4155 return; 4156 } 4157 if(c->event_added) { 4158 if(ub_event_del(c->ev->ev) != 0) { 4159 log_err("event_del error to startlisten"); 4160 } 4161 c->event_added = 0; 4162 } 4163 if(msec != -1 && msec != 0) { 4164 if(!c->timeout) { 4165 c->timeout = (struct timeval*)malloc(sizeof( 4166 struct timeval)); 4167 if(!c->timeout) { 4168 log_err("cpsl: malloc failed. No net read."); 4169 return; 4170 } 4171 } 4172 ub_event_add_bits(c->ev->ev, UB_EV_TIMEOUT); 4173 #ifndef S_SPLINT_S /* splint fails on struct timeval. */ 4174 c->timeout->tv_sec = msec/1000; 4175 c->timeout->tv_usec = (msec%1000)*1000; 4176 #endif /* S_SPLINT_S */ 4177 } else { 4178 if(msec == 0 || !c->timeout) { 4179 ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT); 4180 } 4181 } 4182 if(c->type == comm_tcp || c->type == comm_http) { 4183 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4184 if(c->tcp_write_and_read) { 4185 verbose(5, "startlistening %d mode rw", (newfd==-1?c->fd:newfd)); 4186 ub_event_add_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4187 } else if(c->tcp_is_reading) { 4188 verbose(5, "startlistening %d mode r", (newfd==-1?c->fd:newfd)); 4189 ub_event_add_bits(c->ev->ev, UB_EV_READ); 4190 } else { 4191 verbose(5, "startlistening %d mode w", (newfd==-1?c->fd:newfd)); 4192 ub_event_add_bits(c->ev->ev, UB_EV_WRITE); 4193 } 4194 } 4195 if(newfd != -1) { 4196 if(c->fd != -1 && c->fd != newfd) { 4197 verbose(5, "cpsl close of fd %d for %d", c->fd, newfd); 4198 sock_close(c->fd); 4199 } 4200 c->fd = newfd; 4201 ub_event_set_fd(c->ev->ev, c->fd); 4202 } 4203 if(ub_event_add(c->ev->ev, msec==0?NULL:c->timeout) != 0) { 4204 log_err("event_add failed. in cpsl."); 4205 return; 4206 } 4207 c->event_added = 1; 4208 } 4209 4210 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr) 4211 { 4212 verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr); 4213 if(c->event_added) { 4214 if(ub_event_del(c->ev->ev) != 0) { 4215 log_err("event_del error to cplf"); 4216 } 4217 c->event_added = 0; 4218 } 4219 if(!c->timeout) { 4220 ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT); 4221 } 4222 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4223 if(rd) ub_event_add_bits(c->ev->ev, UB_EV_READ); 4224 if(wr) ub_event_add_bits(c->ev->ev, UB_EV_WRITE); 4225 if(ub_event_add(c->ev->ev, c->timeout) != 0) { 4226 log_err("event_add failed. in cplf."); 4227 return; 4228 } 4229 c->event_added = 1; 4230 } 4231 4232 size_t comm_point_get_mem(struct comm_point* c) 4233 { 4234 size_t s; 4235 if(!c) 4236 return 0; 4237 s = sizeof(*c) + sizeof(*c->ev); 4238 if(c->timeout) 4239 s += sizeof(*c->timeout); 4240 if(c->type == comm_tcp || c->type == comm_local) { 4241 s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer); 4242 #ifdef USE_DNSCRYPT 4243 s += sizeof(*c->dnscrypt_buffer); 4244 if(c->buffer != c->dnscrypt_buffer) { 4245 s += sldns_buffer_capacity(c->dnscrypt_buffer); 4246 } 4247 #endif 4248 } 4249 if(c->type == comm_tcp_accept) { 4250 int i; 4251 for(i=0; i<c->max_tcp_count; i++) 4252 s += comm_point_get_mem(c->tcp_handlers[i]); 4253 } 4254 return s; 4255 } 4256 4257 struct comm_timer* 4258 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg) 4259 { 4260 struct internal_timer *tm = (struct internal_timer*)calloc(1, 4261 sizeof(struct internal_timer)); 4262 if(!tm) { 4263 log_err("malloc failed"); 4264 return NULL; 4265 } 4266 tm->super.ev_timer = tm; 4267 tm->base = base; 4268 tm->super.callback = cb; 4269 tm->super.cb_arg = cb_arg; 4270 tm->ev = ub_event_new(base->eb->base, -1, UB_EV_TIMEOUT, 4271 comm_timer_callback, &tm->super); 4272 if(tm->ev == NULL) { 4273 log_err("timer_create: event_base_set failed."); 4274 free(tm); 4275 return NULL; 4276 } 4277 return &tm->super; 4278 } 4279 4280 void 4281 comm_timer_disable(struct comm_timer* timer) 4282 { 4283 if(!timer) 4284 return; 4285 ub_timer_del(timer->ev_timer->ev); 4286 timer->ev_timer->enabled = 0; 4287 } 4288 4289 void 4290 comm_timer_set(struct comm_timer* timer, struct timeval* tv) 4291 { 4292 log_assert(tv); 4293 if(timer->ev_timer->enabled) 4294 comm_timer_disable(timer); 4295 if(ub_timer_add(timer->ev_timer->ev, timer->ev_timer->base->eb->base, 4296 comm_timer_callback, timer, tv) != 0) 4297 log_err("comm_timer_set: evtimer_add failed."); 4298 timer->ev_timer->enabled = 1; 4299 } 4300 4301 void 4302 comm_timer_delete(struct comm_timer* timer) 4303 { 4304 if(!timer) 4305 return; 4306 comm_timer_disable(timer); 4307 /* Free the sub struct timer->ev_timer derived from the super struct timer. 4308 * i.e. assert(timer == timer->ev_timer) 4309 */ 4310 ub_event_free(timer->ev_timer->ev); 4311 free(timer->ev_timer); 4312 } 4313 4314 void 4315 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg) 4316 { 4317 struct comm_timer* tm = (struct comm_timer*)arg; 4318 if(!(event&UB_EV_TIMEOUT)) 4319 return; 4320 ub_comm_base_now(tm->ev_timer->base); 4321 tm->ev_timer->enabled = 0; 4322 fptr_ok(fptr_whitelist_comm_timer(tm->callback)); 4323 (*tm->callback)(tm->cb_arg); 4324 } 4325 4326 int 4327 comm_timer_is_set(struct comm_timer* timer) 4328 { 4329 return (int)timer->ev_timer->enabled; 4330 } 4331 4332 size_t 4333 comm_timer_get_mem(struct comm_timer* ATTR_UNUSED(timer)) 4334 { 4335 return sizeof(struct internal_timer); 4336 } 4337 4338 struct comm_signal* 4339 comm_signal_create(struct comm_base* base, 4340 void (*callback)(int, void*), void* cb_arg) 4341 { 4342 struct comm_signal* com = (struct comm_signal*)malloc( 4343 sizeof(struct comm_signal)); 4344 if(!com) { 4345 log_err("malloc failed"); 4346 return NULL; 4347 } 4348 com->base = base; 4349 com->callback = callback; 4350 com->cb_arg = cb_arg; 4351 com->ev_signal = NULL; 4352 return com; 4353 } 4354 4355 void 4356 comm_signal_callback(int sig, short event, void* arg) 4357 { 4358 struct comm_signal* comsig = (struct comm_signal*)arg; 4359 if(!(event & UB_EV_SIGNAL)) 4360 return; 4361 ub_comm_base_now(comsig->base); 4362 fptr_ok(fptr_whitelist_comm_signal(comsig->callback)); 4363 (*comsig->callback)(sig, comsig->cb_arg); 4364 } 4365 4366 int 4367 comm_signal_bind(struct comm_signal* comsig, int sig) 4368 { 4369 struct internal_signal* entry = (struct internal_signal*)calloc(1, 4370 sizeof(struct internal_signal)); 4371 if(!entry) { 4372 log_err("malloc failed"); 4373 return 0; 4374 } 4375 log_assert(comsig); 4376 /* add signal event */ 4377 entry->ev = ub_signal_new(comsig->base->eb->base, sig, 4378 comm_signal_callback, comsig); 4379 if(entry->ev == NULL) { 4380 log_err("Could not create signal event"); 4381 free(entry); 4382 return 0; 4383 } 4384 if(ub_signal_add(entry->ev, NULL) != 0) { 4385 log_err("Could not add signal handler"); 4386 ub_event_free(entry->ev); 4387 free(entry); 4388 return 0; 4389 } 4390 /* link into list */ 4391 entry->next = comsig->ev_signal; 4392 comsig->ev_signal = entry; 4393 return 1; 4394 } 4395 4396 void 4397 comm_signal_delete(struct comm_signal* comsig) 4398 { 4399 struct internal_signal* p, *np; 4400 if(!comsig) 4401 return; 4402 p=comsig->ev_signal; 4403 while(p) { 4404 np = p->next; 4405 ub_signal_del(p->ev); 4406 ub_event_free(p->ev); 4407 free(p); 4408 p = np; 4409 } 4410 free(comsig); 4411 } 4412