1 /* 2 * util/netevent.c - event notification 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file contains event notification functions. 40 */ 41 #include "config.h" 42 #include "util/netevent.h" 43 #include "util/ub_event.h" 44 #include "util/log.h" 45 #include "util/net_help.h" 46 #include "util/tcp_conn_limit.h" 47 #include "util/fptr_wlist.h" 48 #include "util/proxy_protocol.h" 49 #include "sldns/pkthdr.h" 50 #include "sldns/sbuffer.h" 51 #include "sldns/str2wire.h" 52 #include "dnstap/dnstap.h" 53 #include "dnscrypt/dnscrypt.h" 54 #include "services/listen_dnsport.h" 55 #ifdef HAVE_SYS_TYPES_H 56 #include <sys/types.h> 57 #endif 58 #ifdef HAVE_SYS_SOCKET_H 59 #include <sys/socket.h> 60 #endif 61 #ifdef HAVE_NETDB_H 62 #include <netdb.h> 63 #endif 64 #ifdef HAVE_POLL_H 65 #include <poll.h> 66 #endif 67 68 #ifdef HAVE_OPENSSL_SSL_H 69 #include <openssl/ssl.h> 70 #endif 71 #ifdef HAVE_OPENSSL_ERR_H 72 #include <openssl/err.h> 73 #endif 74 75 /* -------- Start of local definitions -------- */ 76 /** if CMSG_ALIGN is not defined on this platform, a workaround */ 77 #ifndef CMSG_ALIGN 78 # ifdef __CMSG_ALIGN 79 # define CMSG_ALIGN(n) __CMSG_ALIGN(n) 80 # elif defined(CMSG_DATA_ALIGN) 81 # define CMSG_ALIGN _CMSG_DATA_ALIGN 82 # else 83 # define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1)) 84 # endif 85 #endif 86 87 /** if CMSG_LEN is not defined on this platform, a workaround */ 88 #ifndef CMSG_LEN 89 # define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len)) 90 #endif 91 92 /** if CMSG_SPACE is not defined on this platform, a workaround */ 93 #ifndef CMSG_SPACE 94 # ifdef _CMSG_HDR_ALIGN 95 # define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr))) 96 # else 97 # define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr))) 98 # endif 99 #endif 100 101 /** The TCP writing query timeout in milliseconds */ 102 #define TCP_QUERY_TIMEOUT 120000 103 /** The minimum actual TCP timeout to use, regardless of what we advertise, 104 * in msec */ 105 #define TCP_QUERY_TIMEOUT_MINIMUM 200 106 107 #ifndef NONBLOCKING_IS_BROKEN 108 /** number of UDP reads to perform per read indication from select */ 109 #define NUM_UDP_PER_SELECT 100 110 #else 111 #define NUM_UDP_PER_SELECT 1 112 #endif 113 114 /** timeout in millisec to wait for write to unblock, packets dropped after.*/ 115 #define SEND_BLOCKED_WAIT_TIMEOUT 200 116 117 /** 118 * The internal event structure for keeping ub_event info for the event. 119 * Possibly other structures (list, tree) this is part of. 120 */ 121 struct internal_event { 122 /** the comm base */ 123 struct comm_base* base; 124 /** ub_event event type */ 125 struct ub_event* ev; 126 }; 127 128 /** 129 * Internal base structure, so that every thread has its own events. 130 */ 131 struct internal_base { 132 /** ub_event event_base type. */ 133 struct ub_event_base* base; 134 /** seconds time pointer points here */ 135 time_t secs; 136 /** timeval with current time */ 137 struct timeval now; 138 /** the event used for slow_accept timeouts */ 139 struct ub_event* slow_accept; 140 /** true if slow_accept is enabled */ 141 int slow_accept_enabled; 142 /** last log time for slow logging of file descriptor errors */ 143 time_t last_slow_log; 144 /** last log time for slow logging of write wait failures */ 145 time_t last_writewait_log; 146 }; 147 148 /** 149 * Internal timer structure, to store timer event in. 150 */ 151 struct internal_timer { 152 /** the super struct from which derived */ 153 struct comm_timer super; 154 /** the comm base */ 155 struct comm_base* base; 156 /** ub_event event type */ 157 struct ub_event* ev; 158 /** is timer enabled */ 159 uint8_t enabled; 160 }; 161 162 /** 163 * Internal signal structure, to store signal event in. 164 */ 165 struct internal_signal { 166 /** ub_event event type */ 167 struct ub_event* ev; 168 /** next in signal list */ 169 struct internal_signal* next; 170 }; 171 172 /** create a tcp handler with a parent */ 173 static struct comm_point* comm_point_create_tcp_handler( 174 struct comm_base *base, struct comm_point* parent, size_t bufsize, 175 struct sldns_buffer* spoolbuf, comm_point_callback_type* callback, 176 void* callback_arg, struct unbound_socket* socket); 177 178 /* -------- End of local definitions -------- */ 179 180 struct comm_base* 181 comm_base_create(int sigs) 182 { 183 struct comm_base* b = (struct comm_base*)calloc(1, 184 sizeof(struct comm_base)); 185 const char *evnm="event", *evsys="", *evmethod=""; 186 187 if(!b) 188 return NULL; 189 b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base)); 190 if(!b->eb) { 191 free(b); 192 return NULL; 193 } 194 b->eb->base = ub_default_event_base(sigs, &b->eb->secs, &b->eb->now); 195 if(!b->eb->base) { 196 free(b->eb); 197 free(b); 198 return NULL; 199 } 200 ub_comm_base_now(b); 201 ub_get_event_sys(b->eb->base, &evnm, &evsys, &evmethod); 202 verbose(VERB_ALGO, "%s %s uses %s method.", evnm, evsys, evmethod); 203 return b; 204 } 205 206 struct comm_base* 207 comm_base_create_event(struct ub_event_base* base) 208 { 209 struct comm_base* b = (struct comm_base*)calloc(1, 210 sizeof(struct comm_base)); 211 if(!b) 212 return NULL; 213 b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base)); 214 if(!b->eb) { 215 free(b); 216 return NULL; 217 } 218 b->eb->base = base; 219 ub_comm_base_now(b); 220 return b; 221 } 222 223 void 224 comm_base_delete(struct comm_base* b) 225 { 226 if(!b) 227 return; 228 if(b->eb->slow_accept_enabled) { 229 if(ub_event_del(b->eb->slow_accept) != 0) { 230 log_err("could not event_del slow_accept"); 231 } 232 ub_event_free(b->eb->slow_accept); 233 } 234 ub_event_base_free(b->eb->base); 235 b->eb->base = NULL; 236 free(b->eb); 237 free(b); 238 } 239 240 void 241 comm_base_delete_no_base(struct comm_base* b) 242 { 243 if(!b) 244 return; 245 if(b->eb->slow_accept_enabled) { 246 if(ub_event_del(b->eb->slow_accept) != 0) { 247 log_err("could not event_del slow_accept"); 248 } 249 ub_event_free(b->eb->slow_accept); 250 } 251 b->eb->base = NULL; 252 free(b->eb); 253 free(b); 254 } 255 256 void 257 comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv) 258 { 259 *tt = &b->eb->secs; 260 *tv = &b->eb->now; 261 } 262 263 void 264 comm_base_dispatch(struct comm_base* b) 265 { 266 int retval; 267 retval = ub_event_base_dispatch(b->eb->base); 268 if(retval < 0) { 269 fatal_exit("event_dispatch returned error %d, " 270 "errno is %s", retval, strerror(errno)); 271 } 272 } 273 274 void comm_base_exit(struct comm_base* b) 275 { 276 if(ub_event_base_loopexit(b->eb->base) != 0) { 277 log_err("Could not loopexit"); 278 } 279 } 280 281 void comm_base_set_slow_accept_handlers(struct comm_base* b, 282 void (*stop_acc)(void*), void (*start_acc)(void*), void* arg) 283 { 284 b->stop_accept = stop_acc; 285 b->start_accept = start_acc; 286 b->cb_arg = arg; 287 } 288 289 struct ub_event_base* comm_base_internal(struct comm_base* b) 290 { 291 return b->eb->base; 292 } 293 294 /** see if errno for udp has to be logged or not uses globals */ 295 static int 296 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen) 297 { 298 /* do not log transient errors (unless high verbosity) */ 299 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN) 300 switch(errno) { 301 # ifdef ENETUNREACH 302 case ENETUNREACH: 303 # endif 304 # ifdef EHOSTDOWN 305 case EHOSTDOWN: 306 # endif 307 # ifdef EHOSTUNREACH 308 case EHOSTUNREACH: 309 # endif 310 # ifdef ENETDOWN 311 case ENETDOWN: 312 # endif 313 case EPERM: 314 case EACCES: 315 if(verbosity < VERB_ALGO) 316 return 0; 317 default: 318 break; 319 } 320 #endif 321 /* permission denied is gotten for every send if the 322 * network is disconnected (on some OS), squelch it */ 323 if( ((errno == EPERM) 324 # ifdef EADDRNOTAVAIL 325 /* 'Cannot assign requested address' also when disconnected */ 326 || (errno == EADDRNOTAVAIL) 327 # endif 328 ) && verbosity < VERB_ALGO) 329 return 0; 330 # ifdef EADDRINUSE 331 /* If SO_REUSEADDR is set, we could try to connect to the same server 332 * from the same source port twice. */ 333 if(errno == EADDRINUSE && verbosity < VERB_DETAIL) 334 return 0; 335 # endif 336 /* squelch errors where people deploy AAAA ::ffff:bla for 337 * authority servers, which we try for intranets. */ 338 if(errno == EINVAL && addr_is_ip4mapped( 339 (struct sockaddr_storage*)addr, addrlen) && 340 verbosity < VERB_DETAIL) 341 return 0; 342 /* SO_BROADCAST sockopt can give access to 255.255.255.255, 343 * but a dns cache does not need it. */ 344 if(errno == EACCES && addr_is_broadcast( 345 (struct sockaddr_storage*)addr, addrlen) && 346 verbosity < VERB_DETAIL) 347 return 0; 348 return 1; 349 } 350 351 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen) 352 { 353 return udp_send_errno_needs_log(addr, addrlen); 354 } 355 356 /* send a UDP reply */ 357 int 358 comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet, 359 struct sockaddr* addr, socklen_t addrlen, int is_connected) 360 { 361 ssize_t sent; 362 log_assert(c->fd != -1); 363 #ifdef UNBOUND_DEBUG 364 if(sldns_buffer_remaining(packet) == 0) 365 log_err("error: send empty UDP packet"); 366 #endif 367 log_assert(addr && addrlen > 0); 368 if(!is_connected) { 369 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 370 sldns_buffer_remaining(packet), 0, 371 addr, addrlen); 372 } else { 373 sent = send(c->fd, (void*)sldns_buffer_begin(packet), 374 sldns_buffer_remaining(packet), 0); 375 } 376 if(sent == -1) { 377 /* try again and block, waiting for IO to complete, 378 * we want to send the answer, and we will wait for 379 * the ethernet interface buffer to have space. */ 380 #ifndef USE_WINSOCK 381 if(errno == EAGAIN || errno == EINTR || 382 # ifdef EWOULDBLOCK 383 errno == EWOULDBLOCK || 384 # endif 385 errno == ENOBUFS) { 386 #else 387 if(WSAGetLastError() == WSAEINPROGRESS || 388 WSAGetLastError() == WSAEINTR || 389 WSAGetLastError() == WSAENOBUFS || 390 WSAGetLastError() == WSAEWOULDBLOCK) { 391 #endif 392 /* if we set the fd blocking, other threads suddenly 393 * have a blocking fd that they operate on */ 394 while(sent == -1 && ( 395 #ifndef USE_WINSOCK 396 errno == EAGAIN || errno == EINTR || 397 # ifdef EWOULDBLOCK 398 errno == EWOULDBLOCK || 399 # endif 400 errno == ENOBUFS 401 #else 402 WSAGetLastError() == WSAEINPROGRESS || 403 WSAGetLastError() == WSAEINTR || 404 WSAGetLastError() == WSAENOBUFS || 405 WSAGetLastError() == WSAEWOULDBLOCK 406 #endif 407 )) { 408 #if defined(HAVE_POLL) || defined(USE_WINSOCK) 409 struct pollfd p; 410 int pret; 411 memset(&p, 0, sizeof(p)); 412 p.fd = c->fd; 413 p.events = POLLOUT | POLLERR | POLLHUP; 414 # ifndef USE_WINSOCK 415 pret = poll(&p, 1, SEND_BLOCKED_WAIT_TIMEOUT); 416 # else 417 pret = WSAPoll(&p, 1, 418 SEND_BLOCKED_WAIT_TIMEOUT); 419 # endif 420 if(pret == 0) { 421 /* timer expired */ 422 struct comm_base* b = c->ev->base; 423 if(b->eb->last_writewait_log+SLOW_LOG_TIME <= 424 b->eb->secs) { 425 b->eb->last_writewait_log = b->eb->secs; 426 verbose(VERB_OPS, "send udp blocked " 427 "for long, dropping packet."); 428 } 429 return 0; 430 } else if(pret < 0 && 431 #ifndef USE_WINSOCK 432 errno != EAGAIN && errno != EINTR && 433 # ifdef EWOULDBLOCK 434 errno != EWOULDBLOCK && 435 # endif 436 errno != ENOBUFS 437 #else 438 WSAGetLastError() != WSAEINPROGRESS && 439 WSAGetLastError() != WSAEINTR && 440 WSAGetLastError() != WSAENOBUFS && 441 WSAGetLastError() != WSAEWOULDBLOCK 442 #endif 443 ) { 444 log_err("poll udp out failed: %s", 445 sock_strerror(errno)); 446 return 0; 447 } 448 #endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */ 449 if (!is_connected) { 450 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 451 sldns_buffer_remaining(packet), 0, 452 addr, addrlen); 453 } else { 454 sent = send(c->fd, (void*)sldns_buffer_begin(packet), 455 sldns_buffer_remaining(packet), 0); 456 } 457 } 458 } 459 } 460 if(sent == -1) { 461 if(!udp_send_errno_needs_log(addr, addrlen)) 462 return 0; 463 if (!is_connected) { 464 verbose(VERB_OPS, "sendto failed: %s", sock_strerror(errno)); 465 } else { 466 verbose(VERB_OPS, "send failed: %s", sock_strerror(errno)); 467 } 468 if(addr) 469 log_addr(VERB_OPS, "remote address is", 470 (struct sockaddr_storage*)addr, addrlen); 471 return 0; 472 } else if((size_t)sent != sldns_buffer_remaining(packet)) { 473 log_err("sent %d in place of %d bytes", 474 (int)sent, (int)sldns_buffer_remaining(packet)); 475 return 0; 476 } 477 return 1; 478 } 479 480 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG)) 481 /** print debug ancillary info */ 482 static void p_ancil(const char* str, struct comm_reply* r) 483 { 484 if(r->srctype != 4 && r->srctype != 6) { 485 log_info("%s: unknown srctype %d", str, r->srctype); 486 return; 487 } 488 489 if(r->srctype == 6) { 490 #ifdef IPV6_PKTINFO 491 char buf[1024]; 492 if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr, 493 buf, (socklen_t)sizeof(buf)) == 0) { 494 (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf)); 495 } 496 buf[sizeof(buf)-1]=0; 497 log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex); 498 #endif 499 } else if(r->srctype == 4) { 500 #ifdef IP_PKTINFO 501 char buf1[1024], buf2[1024]; 502 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr, 503 buf1, (socklen_t)sizeof(buf1)) == 0) { 504 (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1)); 505 } 506 buf1[sizeof(buf1)-1]=0; 507 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST 508 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst, 509 buf2, (socklen_t)sizeof(buf2)) == 0) { 510 (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2)); 511 } 512 buf2[sizeof(buf2)-1]=0; 513 #else 514 buf2[0]=0; 515 #endif 516 log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex, 517 buf1, buf2); 518 #elif defined(IP_RECVDSTADDR) 519 char buf1[1024]; 520 if(inet_ntop(AF_INET, &r->pktinfo.v4addr, 521 buf1, (socklen_t)sizeof(buf1)) == 0) { 522 (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1)); 523 } 524 buf1[sizeof(buf1)-1]=0; 525 log_info("%s: %s", str, buf1); 526 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */ 527 } 528 } 529 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */ 530 531 /** send a UDP reply over specified interface*/ 532 static int 533 comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet, 534 struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r) 535 { 536 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG) 537 ssize_t sent; 538 struct msghdr msg; 539 struct iovec iov[1]; 540 union { 541 struct cmsghdr hdr; 542 char buf[256]; 543 } control; 544 #ifndef S_SPLINT_S 545 struct cmsghdr *cmsg; 546 #endif /* S_SPLINT_S */ 547 548 log_assert(c->fd != -1); 549 #ifdef UNBOUND_DEBUG 550 if(sldns_buffer_remaining(packet) == 0) 551 log_err("error: send empty UDP packet"); 552 #endif 553 log_assert(addr && addrlen > 0); 554 555 msg.msg_name = addr; 556 msg.msg_namelen = addrlen; 557 iov[0].iov_base = sldns_buffer_begin(packet); 558 iov[0].iov_len = sldns_buffer_remaining(packet); 559 msg.msg_iov = iov; 560 msg.msg_iovlen = 1; 561 msg.msg_control = control.buf; 562 #ifndef S_SPLINT_S 563 msg.msg_controllen = sizeof(control.buf); 564 #endif /* S_SPLINT_S */ 565 msg.msg_flags = 0; 566 567 #ifndef S_SPLINT_S 568 cmsg = CMSG_FIRSTHDR(&msg); 569 if(r->srctype == 4) { 570 #ifdef IP_PKTINFO 571 void* cmsg_data; 572 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo)); 573 log_assert(msg.msg_controllen <= sizeof(control.buf)); 574 cmsg->cmsg_level = IPPROTO_IP; 575 cmsg->cmsg_type = IP_PKTINFO; 576 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info, 577 sizeof(struct in_pktinfo)); 578 /* unset the ifindex to not bypass the routing tables */ 579 cmsg_data = CMSG_DATA(cmsg); 580 ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0; 581 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo)); 582 #elif defined(IP_SENDSRCADDR) 583 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr)); 584 log_assert(msg.msg_controllen <= sizeof(control.buf)); 585 cmsg->cmsg_level = IPPROTO_IP; 586 cmsg->cmsg_type = IP_SENDSRCADDR; 587 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr, 588 sizeof(struct in_addr)); 589 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr)); 590 #else 591 verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR"); 592 msg.msg_control = NULL; 593 #endif /* IP_PKTINFO or IP_SENDSRCADDR */ 594 } else if(r->srctype == 6) { 595 void* cmsg_data; 596 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); 597 log_assert(msg.msg_controllen <= sizeof(control.buf)); 598 cmsg->cmsg_level = IPPROTO_IPV6; 599 cmsg->cmsg_type = IPV6_PKTINFO; 600 memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info, 601 sizeof(struct in6_pktinfo)); 602 /* unset the ifindex to not bypass the routing tables */ 603 cmsg_data = CMSG_DATA(cmsg); 604 ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0; 605 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); 606 } else { 607 /* try to pass all 0 to use default route */ 608 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); 609 log_assert(msg.msg_controllen <= sizeof(control.buf)); 610 cmsg->cmsg_level = IPPROTO_IPV6; 611 cmsg->cmsg_type = IPV6_PKTINFO; 612 memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo)); 613 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); 614 } 615 #endif /* S_SPLINT_S */ 616 if(verbosity >= VERB_ALGO) 617 p_ancil("send_udp over interface", r); 618 sent = sendmsg(c->fd, &msg, 0); 619 if(sent == -1) { 620 /* try again and block, waiting for IO to complete, 621 * we want to send the answer, and we will wait for 622 * the ethernet interface buffer to have space. */ 623 #ifndef USE_WINSOCK 624 if(errno == EAGAIN || errno == EINTR || 625 # ifdef EWOULDBLOCK 626 errno == EWOULDBLOCK || 627 # endif 628 errno == ENOBUFS) { 629 #else 630 if(WSAGetLastError() == WSAEINPROGRESS || 631 WSAGetLastError() == WSAEINTR || 632 WSAGetLastError() == WSAENOBUFS || 633 WSAGetLastError() == WSAEWOULDBLOCK) { 634 #endif 635 while(sent == -1 && ( 636 #ifndef USE_WINSOCK 637 errno == EAGAIN || errno == EINTR || 638 # ifdef EWOULDBLOCK 639 errno == EWOULDBLOCK || 640 # endif 641 errno == ENOBUFS 642 #else 643 WSAGetLastError() == WSAEINPROGRESS || 644 WSAGetLastError() == WSAEINTR || 645 WSAGetLastError() == WSAENOBUFS || 646 WSAGetLastError() == WSAEWOULDBLOCK 647 #endif 648 )) { 649 #if defined(HAVE_POLL) || defined(USE_WINSOCK) 650 struct pollfd p; 651 int pret; 652 memset(&p, 0, sizeof(p)); 653 p.fd = c->fd; 654 p.events = POLLOUT | POLLERR | POLLHUP; 655 # ifndef USE_WINSOCK 656 pret = poll(&p, 1, SEND_BLOCKED_WAIT_TIMEOUT); 657 # else 658 pret = WSAPoll(&p, 1, 659 SEND_BLOCKED_WAIT_TIMEOUT); 660 # endif 661 if(pret == 0) { 662 /* timer expired */ 663 struct comm_base* b = c->ev->base; 664 if(b->eb->last_writewait_log+SLOW_LOG_TIME <= 665 b->eb->secs) { 666 b->eb->last_writewait_log = b->eb->secs; 667 verbose(VERB_OPS, "send udp blocked " 668 "for long, dropping packet."); 669 } 670 return 0; 671 } else if(pret < 0 && 672 #ifndef USE_WINSOCK 673 errno != EAGAIN && errno != EINTR && 674 # ifdef EWOULDBLOCK 675 errno != EWOULDBLOCK && 676 # endif 677 errno != ENOBUFS 678 #else 679 WSAGetLastError() != WSAEINPROGRESS && 680 WSAGetLastError() != WSAEINTR && 681 WSAGetLastError() != WSAENOBUFS && 682 WSAGetLastError() != WSAEWOULDBLOCK 683 #endif 684 ) { 685 log_err("poll udp out failed: %s", 686 sock_strerror(errno)); 687 return 0; 688 } 689 #endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */ 690 sent = sendmsg(c->fd, &msg, 0); 691 } 692 } 693 } 694 if(sent == -1) { 695 if(!udp_send_errno_needs_log(addr, addrlen)) 696 return 0; 697 verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno)); 698 log_addr(VERB_OPS, "remote address is", 699 (struct sockaddr_storage*)addr, addrlen); 700 #ifdef __NetBSD__ 701 /* netbsd 7 has IP_PKTINFO for recv but not send */ 702 if(errno == EINVAL && r->srctype == 4) 703 log_err("sendmsg: No support for sendmsg(IP_PKTINFO). " 704 "Please disable interface-automatic"); 705 #endif 706 return 0; 707 } else if((size_t)sent != sldns_buffer_remaining(packet)) { 708 log_err("sent %d in place of %d bytes", 709 (int)sent, (int)sldns_buffer_remaining(packet)); 710 return 0; 711 } 712 return 1; 713 #else 714 (void)c; 715 (void)packet; 716 (void)addr; 717 (void)addrlen; 718 (void)r; 719 log_err("sendmsg: IPV6_PKTINFO not supported"); 720 return 0; 721 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */ 722 } 723 724 /** return true is UDP receive error needs to be logged */ 725 static int udp_recv_needs_log(int err) 726 { 727 switch(err) { 728 case EACCES: /* some hosts send ICMP 'Permission Denied' */ 729 #ifndef USE_WINSOCK 730 case ECONNREFUSED: 731 # ifdef ENETUNREACH 732 case ENETUNREACH: 733 # endif 734 # ifdef EHOSTDOWN 735 case EHOSTDOWN: 736 # endif 737 # ifdef EHOSTUNREACH 738 case EHOSTUNREACH: 739 # endif 740 # ifdef ENETDOWN 741 case ENETDOWN: 742 # endif 743 #else /* USE_WINSOCK */ 744 case WSAECONNREFUSED: 745 case WSAENETUNREACH: 746 case WSAEHOSTDOWN: 747 case WSAEHOSTUNREACH: 748 case WSAENETDOWN: 749 #endif 750 if(verbosity >= VERB_ALGO) 751 return 1; 752 return 0; 753 default: 754 break; 755 } 756 return 1; 757 } 758 759 /** Parses the PROXYv2 header from buf and updates the comm_reply struct. 760 * Returns 1 on success, 0 on failure. */ 761 static int consume_pp2_header(struct sldns_buffer* buf, struct comm_reply* rep, 762 int stream) { 763 size_t size; 764 struct pp2_header *header = pp2_read_header(buf); 765 if(header == NULL) return 0; 766 size = PP2_HEADER_SIZE + ntohs(header->len); 767 if((header->ver_cmd & 0xF) == PP2_CMD_LOCAL) { 768 /* A connection from the proxy itself. 769 * No need to do anything with addresses. */ 770 goto done; 771 } 772 if(header->fam_prot == 0x00) { 773 /* Unspecified family and protocol. This could be used for 774 * health checks by proxies. 775 * No need to do anything with addresses. */ 776 goto done; 777 } 778 /* Read the proxied address */ 779 switch(header->fam_prot) { 780 case 0x11: /* AF_INET|STREAM */ 781 case 0x12: /* AF_INET|DGRAM */ 782 { 783 struct sockaddr_in* addr = 784 (struct sockaddr_in*)&rep->client_addr; 785 addr->sin_family = AF_INET; 786 addr->sin_addr.s_addr = header->addr.addr4.src_addr; 787 addr->sin_port = header->addr.addr4.src_port; 788 rep->client_addrlen = (socklen_t)sizeof(struct sockaddr_in); 789 } 790 /* Ignore the destination address; it should be us. */ 791 break; 792 case 0x21: /* AF_INET6|STREAM */ 793 case 0x22: /* AF_INET6|DGRAM */ 794 { 795 struct sockaddr_in6* addr = 796 (struct sockaddr_in6*)&rep->client_addr; 797 memset(addr, 0, sizeof(*addr)); 798 addr->sin6_family = AF_INET6; 799 memcpy(&addr->sin6_addr, 800 header->addr.addr6.src_addr, 16); 801 addr->sin6_port = header->addr.addr6.src_port; 802 rep->client_addrlen = (socklen_t)sizeof(struct sockaddr_in6); 803 } 804 /* Ignore the destination address; it should be us. */ 805 break; 806 } 807 rep->is_proxied = 1; 808 done: 809 if(!stream) { 810 /* We are reading a whole packet; 811 * Move the rest of the data to overwrite the PROXYv2 header */ 812 /* XXX can we do better to avoid memmove? */ 813 memmove(header, ((char*)header)+size, 814 sldns_buffer_limit(buf)-size); 815 sldns_buffer_set_limit(buf, sldns_buffer_limit(buf)-size); 816 } 817 return 1; 818 } 819 820 void 821 comm_point_udp_ancil_callback(int fd, short event, void* arg) 822 { 823 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG) 824 struct comm_reply rep; 825 struct msghdr msg; 826 struct iovec iov[1]; 827 ssize_t rcv; 828 union { 829 struct cmsghdr hdr; 830 char buf[256]; 831 } ancil; 832 int i; 833 #ifndef S_SPLINT_S 834 struct cmsghdr* cmsg; 835 #endif /* S_SPLINT_S */ 836 837 rep.c = (struct comm_point*)arg; 838 log_assert(rep.c->type == comm_udp); 839 840 if(!(event&UB_EV_READ)) 841 return; 842 log_assert(rep.c && rep.c->buffer && rep.c->fd == fd); 843 ub_comm_base_now(rep.c->ev->base); 844 for(i=0; i<NUM_UDP_PER_SELECT; i++) { 845 sldns_buffer_clear(rep.c->buffer); 846 rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr); 847 log_assert(fd != -1); 848 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0); 849 msg.msg_name = &rep.remote_addr; 850 msg.msg_namelen = (socklen_t)sizeof(rep.remote_addr); 851 iov[0].iov_base = sldns_buffer_begin(rep.c->buffer); 852 iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer); 853 msg.msg_iov = iov; 854 msg.msg_iovlen = 1; 855 msg.msg_control = ancil.buf; 856 #ifndef S_SPLINT_S 857 msg.msg_controllen = sizeof(ancil.buf); 858 #endif /* S_SPLINT_S */ 859 msg.msg_flags = 0; 860 rcv = recvmsg(fd, &msg, MSG_DONTWAIT); 861 if(rcv == -1) { 862 if(errno != EAGAIN && errno != EINTR 863 && udp_recv_needs_log(errno)) { 864 log_err("recvmsg failed: %s", strerror(errno)); 865 } 866 return; 867 } 868 rep.remote_addrlen = msg.msg_namelen; 869 sldns_buffer_skip(rep.c->buffer, rcv); 870 sldns_buffer_flip(rep.c->buffer); 871 rep.srctype = 0; 872 rep.is_proxied = 0; 873 #ifndef S_SPLINT_S 874 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; 875 cmsg = CMSG_NXTHDR(&msg, cmsg)) { 876 if( cmsg->cmsg_level == IPPROTO_IPV6 && 877 cmsg->cmsg_type == IPV6_PKTINFO) { 878 rep.srctype = 6; 879 memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg), 880 sizeof(struct in6_pktinfo)); 881 break; 882 #ifdef IP_PKTINFO 883 } else if( cmsg->cmsg_level == IPPROTO_IP && 884 cmsg->cmsg_type == IP_PKTINFO) { 885 rep.srctype = 4; 886 memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg), 887 sizeof(struct in_pktinfo)); 888 break; 889 #elif defined(IP_RECVDSTADDR) 890 } else if( cmsg->cmsg_level == IPPROTO_IP && 891 cmsg->cmsg_type == IP_RECVDSTADDR) { 892 rep.srctype = 4; 893 memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg), 894 sizeof(struct in_addr)); 895 break; 896 #endif /* IP_PKTINFO or IP_RECVDSTADDR */ 897 } 898 } 899 if(verbosity >= VERB_ALGO) 900 p_ancil("receive_udp on interface", &rep); 901 #endif /* S_SPLINT_S */ 902 903 if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer, 904 &rep, 0)) { 905 log_err("proxy_protocol: could not consume PROXYv2 header"); 906 return; 907 } 908 if(!rep.is_proxied) { 909 rep.client_addrlen = rep.remote_addrlen; 910 memmove(&rep.client_addr, &rep.remote_addr, 911 rep.remote_addrlen); 912 } 913 914 fptr_ok(fptr_whitelist_comm_point(rep.c->callback)); 915 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) { 916 /* send back immediate reply */ 917 (void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer, 918 (struct sockaddr*)&rep.remote_addr, 919 rep.remote_addrlen, &rep); 920 } 921 if(!rep.c || rep.c->fd == -1) /* commpoint closed */ 922 break; 923 } 924 #else 925 (void)fd; 926 (void)event; 927 (void)arg; 928 fatal_exit("recvmsg: No support for IPV6_PKTINFO; IP_PKTINFO or IP_RECVDSTADDR. " 929 "Please disable interface-automatic"); 930 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */ 931 } 932 933 void 934 comm_point_udp_callback(int fd, short event, void* arg) 935 { 936 struct comm_reply rep; 937 ssize_t rcv; 938 int i; 939 struct sldns_buffer *buffer; 940 941 rep.c = (struct comm_point*)arg; 942 log_assert(rep.c->type == comm_udp); 943 944 if(!(event&UB_EV_READ)) 945 return; 946 log_assert(rep.c && rep.c->buffer && rep.c->fd == fd); 947 ub_comm_base_now(rep.c->ev->base); 948 for(i=0; i<NUM_UDP_PER_SELECT; i++) { 949 sldns_buffer_clear(rep.c->buffer); 950 rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr); 951 log_assert(fd != -1); 952 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0); 953 rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer), 954 sldns_buffer_remaining(rep.c->buffer), MSG_DONTWAIT, 955 (struct sockaddr*)&rep.remote_addr, &rep.remote_addrlen); 956 if(rcv == -1) { 957 #ifndef USE_WINSOCK 958 if(errno != EAGAIN && errno != EINTR 959 && udp_recv_needs_log(errno)) 960 log_err("recvfrom %d failed: %s", 961 fd, strerror(errno)); 962 #else 963 if(WSAGetLastError() != WSAEINPROGRESS && 964 WSAGetLastError() != WSAECONNRESET && 965 WSAGetLastError()!= WSAEWOULDBLOCK && 966 udp_recv_needs_log(WSAGetLastError())) 967 log_err("recvfrom failed: %s", 968 wsa_strerror(WSAGetLastError())); 969 #endif 970 return; 971 } 972 sldns_buffer_skip(rep.c->buffer, rcv); 973 sldns_buffer_flip(rep.c->buffer); 974 rep.srctype = 0; 975 rep.is_proxied = 0; 976 977 if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer, 978 &rep, 0)) { 979 log_err("proxy_protocol: could not consume PROXYv2 header"); 980 return; 981 } 982 if(!rep.is_proxied) { 983 rep.client_addrlen = rep.remote_addrlen; 984 memmove(&rep.client_addr, &rep.remote_addr, 985 rep.remote_addrlen); 986 } 987 988 fptr_ok(fptr_whitelist_comm_point(rep.c->callback)); 989 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) { 990 /* send back immediate reply */ 991 #ifdef USE_DNSCRYPT 992 buffer = rep.c->dnscrypt_buffer; 993 #else 994 buffer = rep.c->buffer; 995 #endif 996 (void)comm_point_send_udp_msg(rep.c, buffer, 997 (struct sockaddr*)&rep.remote_addr, 998 rep.remote_addrlen, 0); 999 } 1000 if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for 1001 another UDP port. Note rep.c cannot be reused with TCP fd. */ 1002 break; 1003 } 1004 } 1005 1006 int adjusted_tcp_timeout(struct comm_point* c) 1007 { 1008 if(c->tcp_timeout_msec < TCP_QUERY_TIMEOUT_MINIMUM) 1009 return TCP_QUERY_TIMEOUT_MINIMUM; 1010 return c->tcp_timeout_msec; 1011 } 1012 1013 /** Use a new tcp handler for new query fd, set to read query */ 1014 static void 1015 setup_tcp_handler(struct comm_point* c, int fd, int cur, int max) 1016 { 1017 int handler_usage; 1018 log_assert(c->type == comm_tcp || c->type == comm_http); 1019 log_assert(c->fd == -1); 1020 sldns_buffer_clear(c->buffer); 1021 #ifdef USE_DNSCRYPT 1022 if (c->dnscrypt) 1023 sldns_buffer_clear(c->dnscrypt_buffer); 1024 #endif 1025 c->tcp_is_reading = 1; 1026 c->tcp_byte_count = 0; 1027 c->tcp_keepalive = 0; 1028 /* if more than half the tcp handlers are in use, use a shorter 1029 * timeout for this TCP connection, we need to make space for 1030 * other connections to be able to get attention */ 1031 /* If > 50% TCP handler structures in use, set timeout to 1/100th 1032 * configured value. 1033 * If > 65%TCP handler structures in use, set to 1/500th configured 1034 * value. 1035 * If > 80% TCP handler structures in use, set to 0. 1036 * 1037 * If the timeout to use falls below 200 milliseconds, an actual 1038 * timeout of 200ms is used. 1039 */ 1040 handler_usage = (cur * 100) / max; 1041 if(handler_usage > 50 && handler_usage <= 65) 1042 c->tcp_timeout_msec /= 100; 1043 else if (handler_usage > 65 && handler_usage <= 80) 1044 c->tcp_timeout_msec /= 500; 1045 else if (handler_usage > 80) 1046 c->tcp_timeout_msec = 0; 1047 comm_point_start_listening(c, fd, adjusted_tcp_timeout(c)); 1048 } 1049 1050 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd), 1051 short ATTR_UNUSED(event), void* arg) 1052 { 1053 struct comm_base* b = (struct comm_base*)arg; 1054 /* timeout for the slow accept, re-enable accepts again */ 1055 if(b->start_accept) { 1056 verbose(VERB_ALGO, "wait is over, slow accept disabled"); 1057 fptr_ok(fptr_whitelist_start_accept(b->start_accept)); 1058 (*b->start_accept)(b->cb_arg); 1059 b->eb->slow_accept_enabled = 0; 1060 } 1061 } 1062 1063 int comm_point_perform_accept(struct comm_point* c, 1064 struct sockaddr_storage* addr, socklen_t* addrlen) 1065 { 1066 int new_fd; 1067 *addrlen = (socklen_t)sizeof(*addr); 1068 #ifndef HAVE_ACCEPT4 1069 new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen); 1070 #else 1071 /* SOCK_NONBLOCK saves extra calls to fcntl for the same result */ 1072 new_fd = accept4(c->fd, (struct sockaddr*)addr, addrlen, SOCK_NONBLOCK); 1073 #endif 1074 if(new_fd == -1) { 1075 #ifndef USE_WINSOCK 1076 /* EINTR is signal interrupt. others are closed connection. */ 1077 if( errno == EINTR || errno == EAGAIN 1078 #ifdef EWOULDBLOCK 1079 || errno == EWOULDBLOCK 1080 #endif 1081 #ifdef ECONNABORTED 1082 || errno == ECONNABORTED 1083 #endif 1084 #ifdef EPROTO 1085 || errno == EPROTO 1086 #endif /* EPROTO */ 1087 ) 1088 return -1; 1089 #if defined(ENFILE) && defined(EMFILE) 1090 if(errno == ENFILE || errno == EMFILE) { 1091 /* out of file descriptors, likely outside of our 1092 * control. stop accept() calls for some time */ 1093 if(c->ev->base->stop_accept) { 1094 struct comm_base* b = c->ev->base; 1095 struct timeval tv; 1096 verbose(VERB_ALGO, "out of file descriptors: " 1097 "slow accept"); 1098 ub_comm_base_now(b); 1099 if(b->eb->last_slow_log+SLOW_LOG_TIME <= 1100 b->eb->secs) { 1101 b->eb->last_slow_log = b->eb->secs; 1102 verbose(VERB_OPS, "accept failed, " 1103 "slow down accept for %d " 1104 "msec: %s", 1105 NETEVENT_SLOW_ACCEPT_TIME, 1106 sock_strerror(errno)); 1107 } 1108 b->eb->slow_accept_enabled = 1; 1109 fptr_ok(fptr_whitelist_stop_accept( 1110 b->stop_accept)); 1111 (*b->stop_accept)(b->cb_arg); 1112 /* set timeout, no mallocs */ 1113 tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000; 1114 tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000; 1115 b->eb->slow_accept = ub_event_new(b->eb->base, 1116 -1, UB_EV_TIMEOUT, 1117 comm_base_handle_slow_accept, b); 1118 if(b->eb->slow_accept == NULL) { 1119 /* we do not want to log here, because 1120 * that would spam the logfiles. 1121 * error: "event_base_set failed." */ 1122 } 1123 else if(ub_event_add(b->eb->slow_accept, &tv) 1124 != 0) { 1125 /* we do not want to log here, 1126 * error: "event_add failed." */ 1127 } 1128 } else { 1129 log_err("accept, with no slow down, " 1130 "failed: %s", sock_strerror(errno)); 1131 } 1132 return -1; 1133 } 1134 #endif 1135 #else /* USE_WINSOCK */ 1136 if(WSAGetLastError() == WSAEINPROGRESS || 1137 WSAGetLastError() == WSAECONNRESET) 1138 return -1; 1139 if(WSAGetLastError() == WSAEWOULDBLOCK) { 1140 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1141 return -1; 1142 } 1143 #endif 1144 log_err_addr("accept failed", sock_strerror(errno), addr, 1145 *addrlen); 1146 return -1; 1147 } 1148 if(c->tcp_conn_limit && c->type == comm_tcp_accept) { 1149 c->tcl_addr = tcl_addr_lookup(c->tcp_conn_limit, addr, *addrlen); 1150 if(!tcl_new_connection(c->tcl_addr)) { 1151 if(verbosity >= 3) 1152 log_err_addr("accept rejected", 1153 "connection limit exceeded", addr, *addrlen); 1154 close(new_fd); 1155 return -1; 1156 } 1157 } 1158 #ifndef HAVE_ACCEPT4 1159 fd_set_nonblock(new_fd); 1160 #endif 1161 return new_fd; 1162 } 1163 1164 #ifdef USE_WINSOCK 1165 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp), 1166 #ifdef HAVE_BIO_SET_CALLBACK_EX 1167 size_t ATTR_UNUSED(len), 1168 #endif 1169 int ATTR_UNUSED(argi), long argl, 1170 #ifndef HAVE_BIO_SET_CALLBACK_EX 1171 long retvalue 1172 #else 1173 int retvalue, size_t* ATTR_UNUSED(processed) 1174 #endif 1175 ) 1176 { 1177 int wsa_err = WSAGetLastError(); /* store errcode before it is gone */ 1178 verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper, 1179 (oper&BIO_CB_RETURN)?"return":"before", 1180 (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"), 1181 wsa_err==WSAEWOULDBLOCK?"wsawb":""); 1182 /* on windows, check if previous operation caused EWOULDBLOCK */ 1183 if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) || 1184 (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) { 1185 if(wsa_err == WSAEWOULDBLOCK) 1186 ub_winsock_tcp_wouldblock((struct ub_event*) 1187 BIO_get_callback_arg(b), UB_EV_READ); 1188 } 1189 if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) || 1190 (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) { 1191 if(wsa_err == WSAEWOULDBLOCK) 1192 ub_winsock_tcp_wouldblock((struct ub_event*) 1193 BIO_get_callback_arg(b), UB_EV_WRITE); 1194 } 1195 /* return original return value */ 1196 return retvalue; 1197 } 1198 1199 /** set win bio callbacks for nonblocking operations */ 1200 void 1201 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl) 1202 { 1203 SSL* ssl = (SSL*)thessl; 1204 /* set them both just in case, but usually they are the same BIO */ 1205 #ifdef HAVE_BIO_SET_CALLBACK_EX 1206 BIO_set_callback_ex(SSL_get_rbio(ssl), &win_bio_cb); 1207 #else 1208 BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb); 1209 #endif 1210 BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)c->ev->ev); 1211 #ifdef HAVE_BIO_SET_CALLBACK_EX 1212 BIO_set_callback_ex(SSL_get_wbio(ssl), &win_bio_cb); 1213 #else 1214 BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb); 1215 #endif 1216 BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)c->ev->ev); 1217 } 1218 #endif 1219 1220 #ifdef HAVE_NGHTTP2 1221 /** Create http2 session server. Per connection, after TCP accepted.*/ 1222 static int http2_session_server_create(struct http2_session* h2_session) 1223 { 1224 log_assert(h2_session->callbacks); 1225 h2_session->is_drop = 0; 1226 if(nghttp2_session_server_new(&h2_session->session, 1227 h2_session->callbacks, 1228 h2_session) == NGHTTP2_ERR_NOMEM) { 1229 log_err("failed to create nghttp2 session server"); 1230 return 0; 1231 } 1232 1233 return 1; 1234 } 1235 1236 /** Submit http2 setting to session. Once per session. */ 1237 static int http2_submit_settings(struct http2_session* h2_session) 1238 { 1239 int ret; 1240 nghttp2_settings_entry settings[1] = { 1241 {NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, 1242 h2_session->c->http2_max_streams}}; 1243 1244 ret = nghttp2_submit_settings(h2_session->session, NGHTTP2_FLAG_NONE, 1245 settings, 1); 1246 if(ret) { 1247 verbose(VERB_QUERY, "http2: submit_settings failed, " 1248 "error: %s", nghttp2_strerror(ret)); 1249 return 0; 1250 } 1251 return 1; 1252 } 1253 #endif /* HAVE_NGHTTP2 */ 1254 1255 1256 void 1257 comm_point_tcp_accept_callback(int fd, short event, void* arg) 1258 { 1259 struct comm_point* c = (struct comm_point*)arg, *c_hdl; 1260 int new_fd; 1261 log_assert(c->type == comm_tcp_accept); 1262 if(!(event & UB_EV_READ)) { 1263 log_info("ignoring tcp accept event %d", (int)event); 1264 return; 1265 } 1266 ub_comm_base_now(c->ev->base); 1267 /* find free tcp handler. */ 1268 if(!c->tcp_free) { 1269 log_warn("accepted too many tcp, connections full"); 1270 return; 1271 } 1272 /* accept incoming connection. */ 1273 c_hdl = c->tcp_free; 1274 /* clear leftover flags from previous use, and then set the 1275 * correct event base for the event structure for libevent */ 1276 ub_event_free(c_hdl->ev->ev); 1277 c_hdl->ev->ev = NULL; 1278 if((c_hdl->type == comm_tcp && c_hdl->tcp_req_info) || 1279 c_hdl->type == comm_local || c_hdl->type == comm_raw) 1280 c_hdl->tcp_do_toggle_rw = 0; 1281 else c_hdl->tcp_do_toggle_rw = 1; 1282 1283 if(c_hdl->type == comm_http) { 1284 #ifdef HAVE_NGHTTP2 1285 if(!c_hdl->h2_session || 1286 !http2_session_server_create(c_hdl->h2_session)) { 1287 log_warn("failed to create nghttp2"); 1288 return; 1289 } 1290 if(!c_hdl->h2_session || 1291 !http2_submit_settings(c_hdl->h2_session)) { 1292 log_warn("failed to submit http2 settings"); 1293 return; 1294 } 1295 if(!c->ssl) { 1296 c_hdl->tcp_do_toggle_rw = 0; 1297 c_hdl->use_h2 = 1; 1298 } 1299 #endif 1300 c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1, 1301 UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT, 1302 comm_point_http_handle_callback, c_hdl); 1303 } else { 1304 c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1, 1305 UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT, 1306 comm_point_tcp_handle_callback, c_hdl); 1307 } 1308 if(!c_hdl->ev->ev) { 1309 log_warn("could not ub_event_new, dropped tcp"); 1310 return; 1311 } 1312 log_assert(fd != -1); 1313 (void)fd; 1314 new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.remote_addr, 1315 &c_hdl->repinfo.remote_addrlen); 1316 if(new_fd == -1) 1317 return; 1318 /* Copy remote_address to client_address. 1319 * Simplest way/time for streams to do that. */ 1320 c_hdl->repinfo.client_addrlen = c_hdl->repinfo.remote_addrlen; 1321 memmove(&c_hdl->repinfo.client_addr, 1322 &c_hdl->repinfo.remote_addr, 1323 c_hdl->repinfo.remote_addrlen); 1324 if(c->ssl) { 1325 c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd); 1326 if(!c_hdl->ssl) { 1327 c_hdl->fd = new_fd; 1328 comm_point_close(c_hdl); 1329 return; 1330 } 1331 c_hdl->ssl_shake_state = comm_ssl_shake_read; 1332 #ifdef USE_WINSOCK 1333 comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl); 1334 #endif 1335 } 1336 1337 /* grab the tcp handler buffers */ 1338 c->cur_tcp_count++; 1339 c->tcp_free = c_hdl->tcp_free; 1340 c_hdl->tcp_free = NULL; 1341 if(!c->tcp_free) { 1342 /* stop accepting incoming queries for now. */ 1343 comm_point_stop_listening(c); 1344 } 1345 setup_tcp_handler(c_hdl, new_fd, c->cur_tcp_count, c->max_tcp_count); 1346 } 1347 1348 /** Make tcp handler free for next assignment */ 1349 static void 1350 reclaim_tcp_handler(struct comm_point* c) 1351 { 1352 log_assert(c->type == comm_tcp); 1353 if(c->ssl) { 1354 #ifdef HAVE_SSL 1355 SSL_shutdown(c->ssl); 1356 SSL_free(c->ssl); 1357 c->ssl = NULL; 1358 #endif 1359 } 1360 comm_point_close(c); 1361 if(c->tcp_parent) { 1362 if(c != c->tcp_parent->tcp_free) { 1363 c->tcp_parent->cur_tcp_count--; 1364 c->tcp_free = c->tcp_parent->tcp_free; 1365 c->tcp_parent->tcp_free = c; 1366 } 1367 if(!c->tcp_free) { 1368 /* re-enable listening on accept socket */ 1369 comm_point_start_listening(c->tcp_parent, -1, -1); 1370 } 1371 } 1372 c->tcp_more_read_again = NULL; 1373 c->tcp_more_write_again = NULL; 1374 c->tcp_byte_count = 0; 1375 c->pp2_header_state = pp2_header_none; 1376 sldns_buffer_clear(c->buffer); 1377 } 1378 1379 /** do the callback when writing is done */ 1380 static void 1381 tcp_callback_writer(struct comm_point* c) 1382 { 1383 log_assert(c->type == comm_tcp); 1384 if(!c->tcp_write_and_read) { 1385 sldns_buffer_clear(c->buffer); 1386 c->tcp_byte_count = 0; 1387 } 1388 if(c->tcp_do_toggle_rw) 1389 c->tcp_is_reading = 1; 1390 /* switch from listening(write) to listening(read) */ 1391 if(c->tcp_req_info) { 1392 tcp_req_info_handle_writedone(c->tcp_req_info); 1393 } else { 1394 comm_point_stop_listening(c); 1395 if(c->tcp_write_and_read) { 1396 fptr_ok(fptr_whitelist_comm_point(c->callback)); 1397 if( (*c->callback)(c, c->cb_arg, NETEVENT_PKT_WRITTEN, 1398 &c->repinfo) ) { 1399 comm_point_start_listening(c, -1, 1400 adjusted_tcp_timeout(c)); 1401 } 1402 } else { 1403 comm_point_start_listening(c, -1, 1404 adjusted_tcp_timeout(c)); 1405 } 1406 } 1407 } 1408 1409 /** do the callback when reading is done */ 1410 static void 1411 tcp_callback_reader(struct comm_point* c) 1412 { 1413 log_assert(c->type == comm_tcp || c->type == comm_local); 1414 sldns_buffer_flip(c->buffer); 1415 if(c->tcp_do_toggle_rw) 1416 c->tcp_is_reading = 0; 1417 c->tcp_byte_count = 0; 1418 if(c->tcp_req_info) { 1419 tcp_req_info_handle_readdone(c->tcp_req_info); 1420 } else { 1421 if(c->type == comm_tcp) 1422 comm_point_stop_listening(c); 1423 fptr_ok(fptr_whitelist_comm_point(c->callback)); 1424 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) { 1425 comm_point_start_listening(c, -1, 1426 adjusted_tcp_timeout(c)); 1427 } 1428 } 1429 } 1430 1431 #ifdef HAVE_SSL 1432 /** true if the ssl handshake error has to be squelched from the logs */ 1433 int 1434 squelch_err_ssl_handshake(unsigned long err) 1435 { 1436 if(verbosity >= VERB_QUERY) 1437 return 0; /* only squelch on low verbosity */ 1438 if(ERR_GET_LIB(err) == ERR_LIB_SSL && 1439 (ERR_GET_REASON(err) == SSL_R_HTTPS_PROXY_REQUEST || 1440 ERR_GET_REASON(err) == SSL_R_HTTP_REQUEST || 1441 ERR_GET_REASON(err) == SSL_R_WRONG_VERSION_NUMBER || 1442 ERR_GET_REASON(err) == SSL_R_SSLV3_ALERT_BAD_CERTIFICATE 1443 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO 1444 || ERR_GET_REASON(err) == SSL_R_NO_SHARED_CIPHER 1445 #endif 1446 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO 1447 || ERR_GET_REASON(err) == SSL_R_UNKNOWN_PROTOCOL 1448 || ERR_GET_REASON(err) == SSL_R_UNSUPPORTED_PROTOCOL 1449 # ifdef SSL_R_VERSION_TOO_LOW 1450 || ERR_GET_REASON(err) == SSL_R_VERSION_TOO_LOW 1451 # endif 1452 #endif 1453 )) 1454 return 1; 1455 return 0; 1456 } 1457 #endif /* HAVE_SSL */ 1458 1459 /** continue ssl handshake */ 1460 #ifdef HAVE_SSL 1461 static int 1462 ssl_handshake(struct comm_point* c) 1463 { 1464 int r; 1465 if(c->ssl_shake_state == comm_ssl_shake_hs_read) { 1466 /* read condition satisfied back to writing */ 1467 comm_point_listen_for_rw(c, 0, 1); 1468 c->ssl_shake_state = comm_ssl_shake_none; 1469 return 1; 1470 } 1471 if(c->ssl_shake_state == comm_ssl_shake_hs_write) { 1472 /* write condition satisfied, back to reading */ 1473 comm_point_listen_for_rw(c, 1, 0); 1474 c->ssl_shake_state = comm_ssl_shake_none; 1475 return 1; 1476 } 1477 1478 ERR_clear_error(); 1479 r = SSL_do_handshake(c->ssl); 1480 if(r != 1) { 1481 int want = SSL_get_error(c->ssl, r); 1482 if(want == SSL_ERROR_WANT_READ) { 1483 if(c->ssl_shake_state == comm_ssl_shake_read) 1484 return 1; 1485 c->ssl_shake_state = comm_ssl_shake_read; 1486 comm_point_listen_for_rw(c, 1, 0); 1487 return 1; 1488 } else if(want == SSL_ERROR_WANT_WRITE) { 1489 if(c->ssl_shake_state == comm_ssl_shake_write) 1490 return 1; 1491 c->ssl_shake_state = comm_ssl_shake_write; 1492 comm_point_listen_for_rw(c, 0, 1); 1493 return 1; 1494 } else if(r == 0) { 1495 return 0; /* closed */ 1496 } else if(want == SSL_ERROR_SYSCALL) { 1497 /* SYSCALL and errno==0 means closed uncleanly */ 1498 #ifdef EPIPE 1499 if(errno == EPIPE && verbosity < 2) 1500 return 0; /* silence 'broken pipe' */ 1501 #endif 1502 #ifdef ECONNRESET 1503 if(errno == ECONNRESET && verbosity < 2) 1504 return 0; /* silence reset by peer */ 1505 #endif 1506 if(!tcp_connect_errno_needs_log( 1507 (struct sockaddr*)&c->repinfo.remote_addr, 1508 c->repinfo.remote_addrlen)) 1509 return 0; /* silence connect failures that 1510 show up because after connect this is the 1511 first system call that accesses the socket */ 1512 if(errno != 0) 1513 log_err("SSL_handshake syscall: %s", 1514 strerror(errno)); 1515 return 0; 1516 } else { 1517 unsigned long err = ERR_get_error(); 1518 if(!squelch_err_ssl_handshake(err)) { 1519 log_crypto_err_code("ssl handshake failed", err); 1520 log_addr(VERB_OPS, "ssl handshake failed", 1521 &c->repinfo.remote_addr, 1522 c->repinfo.remote_addrlen); 1523 } 1524 return 0; 1525 } 1526 } 1527 /* this is where peer verification could take place */ 1528 if((SSL_get_verify_mode(c->ssl)&SSL_VERIFY_PEER)) { 1529 /* verification */ 1530 if(SSL_get_verify_result(c->ssl) == X509_V_OK) { 1531 #ifdef HAVE_SSL_GET1_PEER_CERTIFICATE 1532 X509* x = SSL_get1_peer_certificate(c->ssl); 1533 #else 1534 X509* x = SSL_get_peer_certificate(c->ssl); 1535 #endif 1536 if(!x) { 1537 log_addr(VERB_ALGO, "SSL connection failed: " 1538 "no certificate", 1539 &c->repinfo.remote_addr, 1540 c->repinfo.remote_addrlen); 1541 return 0; 1542 } 1543 log_cert(VERB_ALGO, "peer certificate", x); 1544 #ifdef HAVE_SSL_GET0_PEERNAME 1545 if(SSL_get0_peername(c->ssl)) { 1546 char buf[255]; 1547 snprintf(buf, sizeof(buf), "SSL connection " 1548 "to %s authenticated", 1549 SSL_get0_peername(c->ssl)); 1550 log_addr(VERB_ALGO, buf, &c->repinfo.remote_addr, 1551 c->repinfo.remote_addrlen); 1552 } else { 1553 #endif 1554 log_addr(VERB_ALGO, "SSL connection " 1555 "authenticated", &c->repinfo.remote_addr, 1556 c->repinfo.remote_addrlen); 1557 #ifdef HAVE_SSL_GET0_PEERNAME 1558 } 1559 #endif 1560 X509_free(x); 1561 } else { 1562 #ifdef HAVE_SSL_GET1_PEER_CERTIFICATE 1563 X509* x = SSL_get1_peer_certificate(c->ssl); 1564 #else 1565 X509* x = SSL_get_peer_certificate(c->ssl); 1566 #endif 1567 if(x) { 1568 log_cert(VERB_ALGO, "peer certificate", x); 1569 X509_free(x); 1570 } 1571 log_addr(VERB_ALGO, "SSL connection failed: " 1572 "failed to authenticate", 1573 &c->repinfo.remote_addr, 1574 c->repinfo.remote_addrlen); 1575 return 0; 1576 } 1577 } else { 1578 /* unauthenticated, the verify peer flag was not set 1579 * in c->ssl when the ssl object was created from ssl_ctx */ 1580 log_addr(VERB_ALGO, "SSL connection", &c->repinfo.remote_addr, 1581 c->repinfo.remote_addrlen); 1582 } 1583 1584 #ifdef HAVE_SSL_GET0_ALPN_SELECTED 1585 /* check if http2 use is negotiated */ 1586 if(c->type == comm_http && c->h2_session) { 1587 const unsigned char *alpn; 1588 unsigned int alpnlen = 0; 1589 SSL_get0_alpn_selected(c->ssl, &alpn, &alpnlen); 1590 if(alpnlen == 2 && memcmp("h2", alpn, 2) == 0) { 1591 /* connection upgraded to HTTP2 */ 1592 c->tcp_do_toggle_rw = 0; 1593 c->use_h2 = 1; 1594 } 1595 } 1596 #endif 1597 1598 /* setup listen rw correctly */ 1599 if(c->tcp_is_reading) { 1600 if(c->ssl_shake_state != comm_ssl_shake_read) 1601 comm_point_listen_for_rw(c, 1, 0); 1602 } else { 1603 comm_point_listen_for_rw(c, 0, 1); 1604 } 1605 c->ssl_shake_state = comm_ssl_shake_none; 1606 return 1; 1607 } 1608 #endif /* HAVE_SSL */ 1609 1610 /** ssl read callback on TCP */ 1611 static int 1612 ssl_handle_read(struct comm_point* c) 1613 { 1614 #ifdef HAVE_SSL 1615 int r; 1616 if(c->ssl_shake_state != comm_ssl_shake_none) { 1617 if(!ssl_handshake(c)) 1618 return 0; 1619 if(c->ssl_shake_state != comm_ssl_shake_none) 1620 return 1; 1621 } 1622 if(c->pp2_enabled && c->pp2_header_state != pp2_header_done) { 1623 struct pp2_header* header = NULL; 1624 size_t want_read_size = 0; 1625 size_t current_read_size = 0; 1626 if(c->pp2_header_state == pp2_header_none) { 1627 want_read_size = PP2_HEADER_SIZE; 1628 if(sldns_buffer_remaining(c->buffer)<want_read_size) { 1629 log_err_addr("proxy_protocol: not enough " 1630 "buffer size to read PROXYv2 header", "", 1631 &c->repinfo.remote_addr, 1632 c->repinfo.remote_addrlen); 1633 return 0; 1634 } 1635 verbose(VERB_ALGO, "proxy_protocol: reading fixed " 1636 "part of PROXYv2 header (len %lu)", 1637 (unsigned long)want_read_size); 1638 current_read_size = want_read_size; 1639 if(c->tcp_byte_count < current_read_size) { 1640 ERR_clear_error(); 1641 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at( 1642 c->buffer, c->tcp_byte_count), 1643 current_read_size - 1644 c->tcp_byte_count)) <= 0) { 1645 int want = SSL_get_error(c->ssl, r); 1646 if(want == SSL_ERROR_ZERO_RETURN) { 1647 if(c->tcp_req_info) 1648 return tcp_req_info_handle_read_close(c->tcp_req_info); 1649 return 0; /* shutdown, closed */ 1650 } else if(want == SSL_ERROR_WANT_READ) { 1651 #ifdef USE_WINSOCK 1652 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1653 #endif 1654 return 1; /* read more later */ 1655 } else if(want == SSL_ERROR_WANT_WRITE) { 1656 c->ssl_shake_state = comm_ssl_shake_hs_write; 1657 comm_point_listen_for_rw(c, 0, 1); 1658 return 1; 1659 } else if(want == SSL_ERROR_SYSCALL) { 1660 #ifdef ECONNRESET 1661 if(errno == ECONNRESET && verbosity < 2) 1662 return 0; /* silence reset by peer */ 1663 #endif 1664 if(errno != 0) 1665 log_err("SSL_read syscall: %s", 1666 strerror(errno)); 1667 return 0; 1668 } 1669 log_crypto_err("could not SSL_read"); 1670 return 0; 1671 } 1672 c->tcp_byte_count += r; 1673 if(c->tcp_byte_count != current_read_size) return 1; 1674 c->pp2_header_state = pp2_header_init; 1675 } 1676 } 1677 if(c->pp2_header_state == pp2_header_init) { 1678 header = pp2_read_header(c->buffer); 1679 if(!header) { 1680 log_err("proxy_protocol: could not parse " 1681 "PROXYv2 header"); 1682 return 0; 1683 } 1684 want_read_size = ntohs(header->len); 1685 if(sldns_buffer_remaining(c->buffer) < 1686 PP2_HEADER_SIZE + want_read_size) { 1687 log_err_addr("proxy_protocol: not enough " 1688 "buffer size to read PROXYv2 header", "", 1689 &c->repinfo.remote_addr, 1690 c->repinfo.remote_addrlen); 1691 return 0; 1692 } 1693 verbose(VERB_ALGO, "proxy_protocol: reading variable " 1694 "part of PROXYv2 header (len %lu)", 1695 (unsigned long)want_read_size); 1696 current_read_size = PP2_HEADER_SIZE + want_read_size; 1697 if(want_read_size == 0) { 1698 /* nothing more to read; header is complete */ 1699 c->pp2_header_state = pp2_header_done; 1700 } else if(c->tcp_byte_count < current_read_size) { 1701 ERR_clear_error(); 1702 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at( 1703 c->buffer, c->tcp_byte_count), 1704 current_read_size - 1705 c->tcp_byte_count)) <= 0) { 1706 int want = SSL_get_error(c->ssl, r); 1707 if(want == SSL_ERROR_ZERO_RETURN) { 1708 if(c->tcp_req_info) 1709 return tcp_req_info_handle_read_close(c->tcp_req_info); 1710 return 0; /* shutdown, closed */ 1711 } else if(want == SSL_ERROR_WANT_READ) { 1712 #ifdef USE_WINSOCK 1713 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1714 #endif 1715 return 1; /* read more later */ 1716 } else if(want == SSL_ERROR_WANT_WRITE) { 1717 c->ssl_shake_state = comm_ssl_shake_hs_write; 1718 comm_point_listen_for_rw(c, 0, 1); 1719 return 1; 1720 } else if(want == SSL_ERROR_SYSCALL) { 1721 #ifdef ECONNRESET 1722 if(errno == ECONNRESET && verbosity < 2) 1723 return 0; /* silence reset by peer */ 1724 #endif 1725 if(errno != 0) 1726 log_err("SSL_read syscall: %s", 1727 strerror(errno)); 1728 return 0; 1729 } 1730 log_crypto_err("could not SSL_read"); 1731 return 0; 1732 } 1733 c->tcp_byte_count += r; 1734 if(c->tcp_byte_count != current_read_size) return 1; 1735 c->pp2_header_state = pp2_header_done; 1736 } 1737 } 1738 if(c->pp2_header_state != pp2_header_done || !header) { 1739 log_err_addr("proxy_protocol: wrong state for the " 1740 "PROXYv2 header", "", &c->repinfo.remote_addr, 1741 c->repinfo.remote_addrlen); 1742 return 0; 1743 } 1744 if(!consume_pp2_header(c->buffer, &c->repinfo, 1)) { 1745 log_err_addr("proxy_protocol: could not consume " 1746 "PROXYv2 header", "", &c->repinfo.remote_addr, 1747 c->repinfo.remote_addrlen); 1748 return 0; 1749 } 1750 verbose(VERB_ALGO, "proxy_protocol: successful read of " 1751 "PROXYv2 header"); 1752 /* Clear and reset the buffer to read the following 1753 * DNS packet(s). */ 1754 sldns_buffer_clear(c->buffer); 1755 c->tcp_byte_count = 0; 1756 return 1; 1757 } 1758 if(c->tcp_byte_count < sizeof(uint16_t)) { 1759 /* read length bytes */ 1760 ERR_clear_error(); 1761 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer, 1762 c->tcp_byte_count), (int)(sizeof(uint16_t) - 1763 c->tcp_byte_count))) <= 0) { 1764 int want = SSL_get_error(c->ssl, r); 1765 if(want == SSL_ERROR_ZERO_RETURN) { 1766 if(c->tcp_req_info) 1767 return tcp_req_info_handle_read_close(c->tcp_req_info); 1768 return 0; /* shutdown, closed */ 1769 } else if(want == SSL_ERROR_WANT_READ) { 1770 #ifdef USE_WINSOCK 1771 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1772 #endif 1773 return 1; /* read more later */ 1774 } else if(want == SSL_ERROR_WANT_WRITE) { 1775 c->ssl_shake_state = comm_ssl_shake_hs_write; 1776 comm_point_listen_for_rw(c, 0, 1); 1777 return 1; 1778 } else if(want == SSL_ERROR_SYSCALL) { 1779 #ifdef ECONNRESET 1780 if(errno == ECONNRESET && verbosity < 2) 1781 return 0; /* silence reset by peer */ 1782 #endif 1783 if(errno != 0) 1784 log_err("SSL_read syscall: %s", 1785 strerror(errno)); 1786 return 0; 1787 } 1788 log_crypto_err("could not SSL_read"); 1789 return 0; 1790 } 1791 c->tcp_byte_count += r; 1792 if(c->tcp_byte_count < sizeof(uint16_t)) 1793 return 1; 1794 if(sldns_buffer_read_u16_at(c->buffer, 0) > 1795 sldns_buffer_capacity(c->buffer)) { 1796 verbose(VERB_QUERY, "ssl: dropped larger than buffer"); 1797 return 0; 1798 } 1799 sldns_buffer_set_limit(c->buffer, 1800 sldns_buffer_read_u16_at(c->buffer, 0)); 1801 if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) { 1802 verbose(VERB_QUERY, "ssl: dropped bogus too short."); 1803 return 0; 1804 } 1805 sldns_buffer_skip(c->buffer, (ssize_t)(c->tcp_byte_count-sizeof(uint16_t))); 1806 verbose(VERB_ALGO, "Reading ssl tcp query of length %d", 1807 (int)sldns_buffer_limit(c->buffer)); 1808 } 1809 if(sldns_buffer_remaining(c->buffer) > 0) { 1810 ERR_clear_error(); 1811 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer), 1812 (int)sldns_buffer_remaining(c->buffer)); 1813 if(r <= 0) { 1814 int want = SSL_get_error(c->ssl, r); 1815 if(want == SSL_ERROR_ZERO_RETURN) { 1816 if(c->tcp_req_info) 1817 return tcp_req_info_handle_read_close(c->tcp_req_info); 1818 return 0; /* shutdown, closed */ 1819 } else if(want == SSL_ERROR_WANT_READ) { 1820 #ifdef USE_WINSOCK 1821 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1822 #endif 1823 return 1; /* read more later */ 1824 } else if(want == SSL_ERROR_WANT_WRITE) { 1825 c->ssl_shake_state = comm_ssl_shake_hs_write; 1826 comm_point_listen_for_rw(c, 0, 1); 1827 return 1; 1828 } else if(want == SSL_ERROR_SYSCALL) { 1829 #ifdef ECONNRESET 1830 if(errno == ECONNRESET && verbosity < 2) 1831 return 0; /* silence reset by peer */ 1832 #endif 1833 if(errno != 0) 1834 log_err("SSL_read syscall: %s", 1835 strerror(errno)); 1836 return 0; 1837 } 1838 log_crypto_err("could not SSL_read"); 1839 return 0; 1840 } 1841 sldns_buffer_skip(c->buffer, (ssize_t)r); 1842 } 1843 if(sldns_buffer_remaining(c->buffer) <= 0) { 1844 tcp_callback_reader(c); 1845 } 1846 return 1; 1847 #else 1848 (void)c; 1849 return 0; 1850 #endif /* HAVE_SSL */ 1851 } 1852 1853 /** ssl write callback on TCP */ 1854 static int 1855 ssl_handle_write(struct comm_point* c) 1856 { 1857 #ifdef HAVE_SSL 1858 int r; 1859 if(c->ssl_shake_state != comm_ssl_shake_none) { 1860 if(!ssl_handshake(c)) 1861 return 0; 1862 if(c->ssl_shake_state != comm_ssl_shake_none) 1863 return 1; 1864 } 1865 /* ignore return, if fails we may simply block */ 1866 (void)SSL_set_mode(c->ssl, (long)SSL_MODE_ENABLE_PARTIAL_WRITE); 1867 if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) { 1868 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(c->buffer)); 1869 ERR_clear_error(); 1870 if(c->tcp_write_and_read) { 1871 if(c->tcp_write_pkt_len + 2 < LDNS_RR_BUF_SIZE) { 1872 /* combine the tcp length and the query for 1873 * write, this emulates writev */ 1874 uint8_t buf[LDNS_RR_BUF_SIZE]; 1875 memmove(buf, &len, sizeof(uint16_t)); 1876 memmove(buf+sizeof(uint16_t), 1877 c->tcp_write_pkt, 1878 c->tcp_write_pkt_len); 1879 r = SSL_write(c->ssl, 1880 (void*)(buf+c->tcp_write_byte_count), 1881 c->tcp_write_pkt_len + 2 - 1882 c->tcp_write_byte_count); 1883 } else { 1884 r = SSL_write(c->ssl, 1885 (void*)(((uint8_t*)&len)+c->tcp_write_byte_count), 1886 (int)(sizeof(uint16_t)-c->tcp_write_byte_count)); 1887 } 1888 } else if(sizeof(uint16_t)+sldns_buffer_remaining(c->buffer) < 1889 LDNS_RR_BUF_SIZE) { 1890 /* combine the tcp length and the query for write, 1891 * this emulates writev */ 1892 uint8_t buf[LDNS_RR_BUF_SIZE]; 1893 memmove(buf, &len, sizeof(uint16_t)); 1894 memmove(buf+sizeof(uint16_t), 1895 sldns_buffer_current(c->buffer), 1896 sldns_buffer_remaining(c->buffer)); 1897 r = SSL_write(c->ssl, (void*)(buf+c->tcp_byte_count), 1898 (int)(sizeof(uint16_t)+ 1899 sldns_buffer_remaining(c->buffer) 1900 - c->tcp_byte_count)); 1901 } else { 1902 r = SSL_write(c->ssl, 1903 (void*)(((uint8_t*)&len)+c->tcp_byte_count), 1904 (int)(sizeof(uint16_t)-c->tcp_byte_count)); 1905 } 1906 if(r <= 0) { 1907 int want = SSL_get_error(c->ssl, r); 1908 if(want == SSL_ERROR_ZERO_RETURN) { 1909 return 0; /* closed */ 1910 } else if(want == SSL_ERROR_WANT_READ) { 1911 c->ssl_shake_state = comm_ssl_shake_hs_read; 1912 comm_point_listen_for_rw(c, 1, 0); 1913 return 1; /* wait for read condition */ 1914 } else if(want == SSL_ERROR_WANT_WRITE) { 1915 #ifdef USE_WINSOCK 1916 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 1917 #endif 1918 return 1; /* write more later */ 1919 } else if(want == SSL_ERROR_SYSCALL) { 1920 #ifdef EPIPE 1921 if(errno == EPIPE && verbosity < 2) 1922 return 0; /* silence 'broken pipe' */ 1923 #endif 1924 if(errno != 0) 1925 log_err("SSL_write syscall: %s", 1926 strerror(errno)); 1927 return 0; 1928 } 1929 log_crypto_err("could not SSL_write"); 1930 return 0; 1931 } 1932 if(c->tcp_write_and_read) { 1933 c->tcp_write_byte_count += r; 1934 if(c->tcp_write_byte_count < sizeof(uint16_t)) 1935 return 1; 1936 } else { 1937 c->tcp_byte_count += r; 1938 if(c->tcp_byte_count < sizeof(uint16_t)) 1939 return 1; 1940 sldns_buffer_set_position(c->buffer, c->tcp_byte_count - 1941 sizeof(uint16_t)); 1942 } 1943 if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 1944 tcp_callback_writer(c); 1945 return 1; 1946 } 1947 } 1948 log_assert(c->tcp_write_and_read || sldns_buffer_remaining(c->buffer) > 0); 1949 log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2); 1950 ERR_clear_error(); 1951 if(c->tcp_write_and_read) { 1952 r = SSL_write(c->ssl, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2), 1953 (int)(c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count)); 1954 } else { 1955 r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer), 1956 (int)sldns_buffer_remaining(c->buffer)); 1957 } 1958 if(r <= 0) { 1959 int want = SSL_get_error(c->ssl, r); 1960 if(want == SSL_ERROR_ZERO_RETURN) { 1961 return 0; /* closed */ 1962 } else if(want == SSL_ERROR_WANT_READ) { 1963 c->ssl_shake_state = comm_ssl_shake_hs_read; 1964 comm_point_listen_for_rw(c, 1, 0); 1965 return 1; /* wait for read condition */ 1966 } else if(want == SSL_ERROR_WANT_WRITE) { 1967 #ifdef USE_WINSOCK 1968 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 1969 #endif 1970 return 1; /* write more later */ 1971 } else if(want == SSL_ERROR_SYSCALL) { 1972 #ifdef EPIPE 1973 if(errno == EPIPE && verbosity < 2) 1974 return 0; /* silence 'broken pipe' */ 1975 #endif 1976 if(errno != 0) 1977 log_err("SSL_write syscall: %s", 1978 strerror(errno)); 1979 return 0; 1980 } 1981 log_crypto_err("could not SSL_write"); 1982 return 0; 1983 } 1984 if(c->tcp_write_and_read) { 1985 c->tcp_write_byte_count += r; 1986 } else { 1987 sldns_buffer_skip(c->buffer, (ssize_t)r); 1988 } 1989 1990 if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 1991 tcp_callback_writer(c); 1992 } 1993 return 1; 1994 #else 1995 (void)c; 1996 return 0; 1997 #endif /* HAVE_SSL */ 1998 } 1999 2000 /** handle ssl tcp connection with dns contents */ 2001 static int 2002 ssl_handle_it(struct comm_point* c, int is_write) 2003 { 2004 /* handle case where renegotiation wants read during write call 2005 * or write during read calls */ 2006 if(is_write && c->ssl_shake_state == comm_ssl_shake_hs_write) 2007 return ssl_handle_read(c); 2008 else if(!is_write && c->ssl_shake_state == comm_ssl_shake_hs_read) 2009 return ssl_handle_write(c); 2010 /* handle read events for read operation and write events for a 2011 * write operation */ 2012 else if(!is_write) 2013 return ssl_handle_read(c); 2014 return ssl_handle_write(c); 2015 } 2016 2017 /** 2018 * Handle tcp reading callback. 2019 * @param fd: file descriptor of socket. 2020 * @param c: comm point to read from into buffer. 2021 * @param short_ok: if true, very short packets are OK (for comm_local). 2022 * @return: 0 on error 2023 */ 2024 static int 2025 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok) 2026 { 2027 ssize_t r; 2028 int recv_initial = 0; 2029 log_assert(c->type == comm_tcp || c->type == comm_local); 2030 if(c->ssl) 2031 return ssl_handle_it(c, 0); 2032 if(!c->tcp_is_reading && !c->tcp_write_and_read) 2033 return 0; 2034 2035 log_assert(fd != -1); 2036 if(c->pp2_enabled && c->pp2_header_state != pp2_header_done) { 2037 struct pp2_header* header = NULL; 2038 size_t want_read_size = 0; 2039 size_t current_read_size = 0; 2040 if(c->pp2_header_state == pp2_header_none) { 2041 want_read_size = PP2_HEADER_SIZE; 2042 if(sldns_buffer_remaining(c->buffer)<want_read_size) { 2043 log_err_addr("proxy_protocol: not enough " 2044 "buffer size to read PROXYv2 header", "", 2045 &c->repinfo.remote_addr, 2046 c->repinfo.remote_addrlen); 2047 return 0; 2048 } 2049 verbose(VERB_ALGO, "proxy_protocol: reading fixed " 2050 "part of PROXYv2 header (len %lu)", 2051 (unsigned long)want_read_size); 2052 current_read_size = want_read_size; 2053 if(c->tcp_byte_count < current_read_size) { 2054 r = recv(fd, (void*)sldns_buffer_at(c->buffer, 2055 c->tcp_byte_count), 2056 current_read_size-c->tcp_byte_count, MSG_DONTWAIT); 2057 if(r == 0) { 2058 if(c->tcp_req_info) 2059 return tcp_req_info_handle_read_close(c->tcp_req_info); 2060 return 0; 2061 } else if(r == -1) { 2062 goto recv_error_initial; 2063 } 2064 c->tcp_byte_count += r; 2065 if(c->tcp_byte_count != current_read_size) return 1; 2066 c->pp2_header_state = pp2_header_init; 2067 } 2068 } 2069 if(c->pp2_header_state == pp2_header_init) { 2070 header = pp2_read_header(c->buffer); 2071 if(!header) { 2072 log_err("proxy_protocol: could not parse " 2073 "PROXYv2 header"); 2074 return 0; 2075 } 2076 want_read_size = ntohs(header->len); 2077 if(sldns_buffer_remaining(c->buffer) < 2078 PP2_HEADER_SIZE + want_read_size) { 2079 log_err_addr("proxy_protocol: not enough " 2080 "buffer size to read PROXYv2 header", "", 2081 &c->repinfo.remote_addr, 2082 c->repinfo.remote_addrlen); 2083 return 0; 2084 } 2085 verbose(VERB_ALGO, "proxy_protocol: reading variable " 2086 "part of PROXYv2 header (len %lu)", 2087 (unsigned long)want_read_size); 2088 current_read_size = PP2_HEADER_SIZE + want_read_size; 2089 if(want_read_size == 0) { 2090 /* nothing more to read; header is complete */ 2091 c->pp2_header_state = pp2_header_done; 2092 } else if(c->tcp_byte_count < current_read_size) { 2093 r = recv(fd, (void*)sldns_buffer_at(c->buffer, 2094 c->tcp_byte_count), 2095 current_read_size-c->tcp_byte_count, MSG_DONTWAIT); 2096 if(r == 0) { 2097 if(c->tcp_req_info) 2098 return tcp_req_info_handle_read_close(c->tcp_req_info); 2099 return 0; 2100 } else if(r == -1) { 2101 goto recv_error; 2102 } 2103 c->tcp_byte_count += r; 2104 if(c->tcp_byte_count != current_read_size) return 1; 2105 c->pp2_header_state = pp2_header_done; 2106 } 2107 } 2108 if(c->pp2_header_state != pp2_header_done || !header) { 2109 log_err_addr("proxy_protocol: wrong state for the " 2110 "PROXYv2 header", "", &c->repinfo.remote_addr, 2111 c->repinfo.remote_addrlen); 2112 return 0; 2113 } 2114 if(!consume_pp2_header(c->buffer, &c->repinfo, 1)) { 2115 log_err_addr("proxy_protocol: could not consume " 2116 "PROXYv2 header", "", &c->repinfo.remote_addr, 2117 c->repinfo.remote_addrlen); 2118 return 0; 2119 } 2120 verbose(VERB_ALGO, "proxy_protocol: successful read of " 2121 "PROXYv2 header"); 2122 /* Clear and reset the buffer to read the following 2123 * DNS packet(s). */ 2124 sldns_buffer_clear(c->buffer); 2125 c->tcp_byte_count = 0; 2126 return 1; 2127 } 2128 2129 if(c->tcp_byte_count < sizeof(uint16_t)) { 2130 /* read length bytes */ 2131 r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count), 2132 sizeof(uint16_t)-c->tcp_byte_count, MSG_DONTWAIT); 2133 if(r == 0) { 2134 if(c->tcp_req_info) 2135 return tcp_req_info_handle_read_close(c->tcp_req_info); 2136 return 0; 2137 } else if(r == -1) { 2138 if(c->pp2_enabled) goto recv_error; 2139 goto recv_error_initial; 2140 } 2141 c->tcp_byte_count += r; 2142 if(c->tcp_byte_count != sizeof(uint16_t)) 2143 return 1; 2144 if(sldns_buffer_read_u16_at(c->buffer, 0) > 2145 sldns_buffer_capacity(c->buffer)) { 2146 verbose(VERB_QUERY, "tcp: dropped larger than buffer"); 2147 return 0; 2148 } 2149 sldns_buffer_set_limit(c->buffer, 2150 sldns_buffer_read_u16_at(c->buffer, 0)); 2151 if(!short_ok && 2152 sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) { 2153 verbose(VERB_QUERY, "tcp: dropped bogus too short."); 2154 return 0; 2155 } 2156 verbose(VERB_ALGO, "Reading tcp query of length %d", 2157 (int)sldns_buffer_limit(c->buffer)); 2158 } 2159 2160 if(sldns_buffer_remaining(c->buffer) == 0) 2161 log_err("in comm_point_tcp_handle_read buffer_remaining is " 2162 "not > 0 as expected, continuing with (harmless) 0 " 2163 "length recv"); 2164 r = recv(fd, (void*)sldns_buffer_current(c->buffer), 2165 sldns_buffer_remaining(c->buffer), MSG_DONTWAIT); 2166 if(r == 0) { 2167 if(c->tcp_req_info) 2168 return tcp_req_info_handle_read_close(c->tcp_req_info); 2169 return 0; 2170 } else if(r == -1) { 2171 goto recv_error; 2172 } 2173 sldns_buffer_skip(c->buffer, r); 2174 if(sldns_buffer_remaining(c->buffer) <= 0) { 2175 tcp_callback_reader(c); 2176 } 2177 return 1; 2178 2179 recv_error_initial: 2180 recv_initial = 1; 2181 recv_error: 2182 #ifndef USE_WINSOCK 2183 if(errno == EINTR || errno == EAGAIN) 2184 return 1; 2185 if(recv_initial) { 2186 #ifdef ECONNRESET 2187 if(errno == ECONNRESET && verbosity < 2) 2188 return 0; /* silence reset by peer */ 2189 #endif 2190 #ifdef ECONNREFUSED 2191 if(errno == ECONNREFUSED && verbosity < 2) 2192 return 0; /* silence reset by peer */ 2193 #endif 2194 #ifdef ENETUNREACH 2195 if(errno == ENETUNREACH && verbosity < 2) 2196 return 0; /* silence it */ 2197 #endif 2198 #ifdef EHOSTDOWN 2199 if(errno == EHOSTDOWN && verbosity < 2) 2200 return 0; /* silence it */ 2201 #endif 2202 #ifdef EHOSTUNREACH 2203 if(errno == EHOSTUNREACH && verbosity < 2) 2204 return 0; /* silence it */ 2205 #endif 2206 #ifdef ENETDOWN 2207 if(errno == ENETDOWN && verbosity < 2) 2208 return 0; /* silence it */ 2209 #endif 2210 #ifdef EACCES 2211 if(errno == EACCES && verbosity < 2) 2212 return 0; /* silence it */ 2213 #endif 2214 #ifdef ENOTCONN 2215 if(errno == ENOTCONN) { 2216 log_err_addr("read (in tcp s) failed and this " 2217 "could be because TCP Fast Open is " 2218 "enabled [--disable-tfo-client " 2219 "--disable-tfo-server] but does not " 2220 "work", sock_strerror(errno), 2221 &c->repinfo.remote_addr, 2222 c->repinfo.remote_addrlen); 2223 return 0; 2224 } 2225 #endif 2226 } 2227 #else /* USE_WINSOCK */ 2228 if(recv_initial) { 2229 if(WSAGetLastError() == WSAECONNREFUSED && verbosity < 2) 2230 return 0; 2231 if(WSAGetLastError() == WSAEHOSTDOWN && verbosity < 2) 2232 return 0; 2233 if(WSAGetLastError() == WSAEHOSTUNREACH && verbosity < 2) 2234 return 0; 2235 if(WSAGetLastError() == WSAENETDOWN && verbosity < 2) 2236 return 0; 2237 if(WSAGetLastError() == WSAENETUNREACH && verbosity < 2) 2238 return 0; 2239 } 2240 if(WSAGetLastError() == WSAECONNRESET) 2241 return 0; 2242 if(WSAGetLastError() == WSAEINPROGRESS) 2243 return 1; 2244 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2245 ub_winsock_tcp_wouldblock(c->ev->ev, 2246 UB_EV_READ); 2247 return 1; 2248 } 2249 #endif 2250 log_err_addr("read (in tcp s)", sock_strerror(errno), 2251 &c->repinfo.remote_addr, c->repinfo.remote_addrlen); 2252 return 0; 2253 } 2254 2255 /** 2256 * Handle tcp writing callback. 2257 * @param fd: file descriptor of socket. 2258 * @param c: comm point to write buffer out of. 2259 * @return: 0 on error 2260 */ 2261 static int 2262 comm_point_tcp_handle_write(int fd, struct comm_point* c) 2263 { 2264 ssize_t r; 2265 struct sldns_buffer *buffer; 2266 log_assert(c->type == comm_tcp); 2267 #ifdef USE_DNSCRYPT 2268 buffer = c->dnscrypt_buffer; 2269 #else 2270 buffer = c->buffer; 2271 #endif 2272 if(c->tcp_is_reading && !c->ssl && !c->tcp_write_and_read) 2273 return 0; 2274 log_assert(fd != -1); 2275 if(((!c->tcp_write_and_read && c->tcp_byte_count == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == 0)) && c->tcp_check_nb_connect) { 2276 /* check for pending error from nonblocking connect */ 2277 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/ 2278 int error = 0; 2279 socklen_t len = (socklen_t)sizeof(error); 2280 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 2281 &len) < 0){ 2282 #ifndef USE_WINSOCK 2283 error = errno; /* on solaris errno is error */ 2284 #else /* USE_WINSOCK */ 2285 error = WSAGetLastError(); 2286 #endif 2287 } 2288 #ifndef USE_WINSOCK 2289 #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 2290 if(error == EINPROGRESS || error == EWOULDBLOCK) 2291 return 1; /* try again later */ 2292 else 2293 #endif 2294 if(error != 0 && verbosity < 2) 2295 return 0; /* silence lots of chatter in the logs */ 2296 else if(error != 0) { 2297 log_err_addr("tcp connect", strerror(error), 2298 &c->repinfo.remote_addr, 2299 c->repinfo.remote_addrlen); 2300 #else /* USE_WINSOCK */ 2301 /* examine error */ 2302 if(error == WSAEINPROGRESS) 2303 return 1; 2304 else if(error == WSAEWOULDBLOCK) { 2305 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 2306 return 1; 2307 } else if(error != 0 && verbosity < 2) 2308 return 0; 2309 else if(error != 0) { 2310 log_err_addr("tcp connect", wsa_strerror(error), 2311 &c->repinfo.remote_addr, 2312 c->repinfo.remote_addrlen); 2313 #endif /* USE_WINSOCK */ 2314 return 0; 2315 } 2316 } 2317 if(c->ssl) 2318 return ssl_handle_it(c, 1); 2319 2320 #ifdef USE_MSG_FASTOPEN 2321 /* Only try this on first use of a connection that uses tfo, 2322 otherwise fall through to normal write */ 2323 /* Also, TFO support on WINDOWS not implemented at the moment */ 2324 if(c->tcp_do_fastopen == 1) { 2325 /* this form of sendmsg() does both a connect() and send() so need to 2326 look for various flavours of error*/ 2327 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer)); 2328 struct msghdr msg; 2329 struct iovec iov[2]; 2330 c->tcp_do_fastopen = 0; 2331 memset(&msg, 0, sizeof(msg)); 2332 if(c->tcp_write_and_read) { 2333 iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count; 2334 iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count; 2335 iov[1].iov_base = c->tcp_write_pkt; 2336 iov[1].iov_len = c->tcp_write_pkt_len; 2337 } else { 2338 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count; 2339 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count; 2340 iov[1].iov_base = sldns_buffer_begin(buffer); 2341 iov[1].iov_len = sldns_buffer_limit(buffer); 2342 } 2343 log_assert(iov[0].iov_len > 0); 2344 msg.msg_name = &c->repinfo.remote_addr; 2345 msg.msg_namelen = c->repinfo.remote_addrlen; 2346 msg.msg_iov = iov; 2347 msg.msg_iovlen = 2; 2348 r = sendmsg(fd, &msg, MSG_FASTOPEN); 2349 if (r == -1) { 2350 #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 2351 /* Handshake is underway, maybe because no TFO cookie available. 2352 Come back to write the message*/ 2353 if(errno == EINPROGRESS || errno == EWOULDBLOCK) 2354 return 1; 2355 #endif 2356 if(errno == EINTR || errno == EAGAIN) 2357 return 1; 2358 /* Not handling EISCONN here as shouldn't ever hit that case.*/ 2359 if(errno != EPIPE 2360 #ifdef EOPNOTSUPP 2361 /* if /proc/sys/net/ipv4/tcp_fastopen is 2362 * disabled on Linux, sendmsg may return 2363 * 'Operation not supported', if so 2364 * fallthrough to ordinary connect. */ 2365 && errno != EOPNOTSUPP 2366 #endif 2367 && errno != 0) { 2368 if(verbosity < 2) 2369 return 0; /* silence lots of chatter in the logs */ 2370 log_err_addr("tcp sendmsg", strerror(errno), 2371 &c->repinfo.remote_addr, 2372 c->repinfo.remote_addrlen); 2373 return 0; 2374 } 2375 verbose(VERB_ALGO, "tcp sendmsg for fastopen failed (with %s), try normal connect", strerror(errno)); 2376 /* fallthrough to nonFASTOPEN 2377 * (MSG_FASTOPEN on Linux 3 produces EPIPE) 2378 * we need to perform connect() */ 2379 if(connect(fd, (struct sockaddr *)&c->repinfo.remote_addr, 2380 c->repinfo.remote_addrlen) == -1) { 2381 #ifdef EINPROGRESS 2382 if(errno == EINPROGRESS) 2383 return 1; /* wait until connect done*/ 2384 #endif 2385 #ifdef USE_WINSOCK 2386 if(WSAGetLastError() == WSAEINPROGRESS || 2387 WSAGetLastError() == WSAEWOULDBLOCK) 2388 return 1; /* wait until connect done*/ 2389 #endif 2390 if(tcp_connect_errno_needs_log( 2391 (struct sockaddr *)&c->repinfo.remote_addr, 2392 c->repinfo.remote_addrlen)) { 2393 log_err_addr("outgoing tcp: connect after EPIPE for fastopen", 2394 strerror(errno), 2395 &c->repinfo.remote_addr, 2396 c->repinfo.remote_addrlen); 2397 } 2398 return 0; 2399 } 2400 2401 } else { 2402 if(c->tcp_write_and_read) { 2403 c->tcp_write_byte_count += r; 2404 if(c->tcp_write_byte_count < sizeof(uint16_t)) 2405 return 1; 2406 } else { 2407 c->tcp_byte_count += r; 2408 if(c->tcp_byte_count < sizeof(uint16_t)) 2409 return 1; 2410 sldns_buffer_set_position(buffer, c->tcp_byte_count - 2411 sizeof(uint16_t)); 2412 } 2413 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 2414 tcp_callback_writer(c); 2415 return 1; 2416 } 2417 } 2418 } 2419 #endif /* USE_MSG_FASTOPEN */ 2420 2421 if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) { 2422 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer)); 2423 #ifdef HAVE_WRITEV 2424 struct iovec iov[2]; 2425 if(c->tcp_write_and_read) { 2426 iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count; 2427 iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count; 2428 iov[1].iov_base = c->tcp_write_pkt; 2429 iov[1].iov_len = c->tcp_write_pkt_len; 2430 } else { 2431 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count; 2432 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count; 2433 iov[1].iov_base = sldns_buffer_begin(buffer); 2434 iov[1].iov_len = sldns_buffer_limit(buffer); 2435 } 2436 log_assert(iov[0].iov_len > 0); 2437 r = writev(fd, iov, 2); 2438 #else /* HAVE_WRITEV */ 2439 if(c->tcp_write_and_read) { 2440 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_write_byte_count), 2441 sizeof(uint16_t)-c->tcp_write_byte_count, 0); 2442 } else { 2443 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count), 2444 sizeof(uint16_t)-c->tcp_byte_count, 0); 2445 } 2446 #endif /* HAVE_WRITEV */ 2447 if(r == -1) { 2448 #ifndef USE_WINSOCK 2449 # ifdef EPIPE 2450 if(errno == EPIPE && verbosity < 2) 2451 return 0; /* silence 'broken pipe' */ 2452 #endif 2453 if(errno == EINTR || errno == EAGAIN) 2454 return 1; 2455 #ifdef ECONNRESET 2456 if(errno == ECONNRESET && verbosity < 2) 2457 return 0; /* silence reset by peer */ 2458 #endif 2459 # ifdef HAVE_WRITEV 2460 log_err_addr("tcp writev", strerror(errno), 2461 &c->repinfo.remote_addr, 2462 c->repinfo.remote_addrlen); 2463 # else /* HAVE_WRITEV */ 2464 log_err_addr("tcp send s", strerror(errno), 2465 &c->repinfo.remote_addr, 2466 c->repinfo.remote_addrlen); 2467 # endif /* HAVE_WRITEV */ 2468 #else 2469 if(WSAGetLastError() == WSAENOTCONN) 2470 return 1; 2471 if(WSAGetLastError() == WSAEINPROGRESS) 2472 return 1; 2473 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2474 ub_winsock_tcp_wouldblock(c->ev->ev, 2475 UB_EV_WRITE); 2476 return 1; 2477 } 2478 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 2479 return 0; /* silence reset by peer */ 2480 log_err_addr("tcp send s", 2481 wsa_strerror(WSAGetLastError()), 2482 &c->repinfo.remote_addr, 2483 c->repinfo.remote_addrlen); 2484 #endif 2485 return 0; 2486 } 2487 if(c->tcp_write_and_read) { 2488 c->tcp_write_byte_count += r; 2489 if(c->tcp_write_byte_count < sizeof(uint16_t)) 2490 return 1; 2491 } else { 2492 c->tcp_byte_count += r; 2493 if(c->tcp_byte_count < sizeof(uint16_t)) 2494 return 1; 2495 sldns_buffer_set_position(buffer, c->tcp_byte_count - 2496 sizeof(uint16_t)); 2497 } 2498 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 2499 tcp_callback_writer(c); 2500 return 1; 2501 } 2502 } 2503 log_assert(c->tcp_write_and_read || sldns_buffer_remaining(buffer) > 0); 2504 log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2); 2505 if(c->tcp_write_and_read) { 2506 r = send(fd, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2), 2507 c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count, 0); 2508 } else { 2509 r = send(fd, (void*)sldns_buffer_current(buffer), 2510 sldns_buffer_remaining(buffer), 0); 2511 } 2512 if(r == -1) { 2513 #ifndef USE_WINSOCK 2514 if(errno == EINTR || errno == EAGAIN) 2515 return 1; 2516 #ifdef ECONNRESET 2517 if(errno == ECONNRESET && verbosity < 2) 2518 return 0; /* silence reset by peer */ 2519 #endif 2520 #else 2521 if(WSAGetLastError() == WSAEINPROGRESS) 2522 return 1; 2523 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2524 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 2525 return 1; 2526 } 2527 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 2528 return 0; /* silence reset by peer */ 2529 #endif 2530 log_err_addr("tcp send r", sock_strerror(errno), 2531 &c->repinfo.remote_addr, 2532 c->repinfo.remote_addrlen); 2533 return 0; 2534 } 2535 if(c->tcp_write_and_read) { 2536 c->tcp_write_byte_count += r; 2537 } else { 2538 sldns_buffer_skip(buffer, r); 2539 } 2540 2541 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 2542 tcp_callback_writer(c); 2543 } 2544 2545 return 1; 2546 } 2547 2548 /** read again to drain buffers when there could be more to read, returns 0 2549 * on failure which means the comm point is closed. */ 2550 static int 2551 tcp_req_info_read_again(int fd, struct comm_point* c) 2552 { 2553 while(c->tcp_req_info->read_again) { 2554 int r; 2555 c->tcp_req_info->read_again = 0; 2556 if(c->tcp_is_reading) 2557 r = comm_point_tcp_handle_read(fd, c, 0); 2558 else r = comm_point_tcp_handle_write(fd, c); 2559 if(!r) { 2560 reclaim_tcp_handler(c); 2561 if(!c->tcp_do_close) { 2562 fptr_ok(fptr_whitelist_comm_point( 2563 c->callback)); 2564 (void)(*c->callback)(c, c->cb_arg, 2565 NETEVENT_CLOSED, NULL); 2566 } 2567 return 0; 2568 } 2569 } 2570 return 1; 2571 } 2572 2573 /** read again to drain buffers when there could be more to read */ 2574 static void 2575 tcp_more_read_again(int fd, struct comm_point* c) 2576 { 2577 /* if the packet is done, but another one could be waiting on 2578 * the connection, the callback signals this, and we try again */ 2579 /* this continues until the read routines get EAGAIN or so, 2580 * and thus does not call the callback, and the bool is 0 */ 2581 int* moreread = c->tcp_more_read_again; 2582 while(moreread && *moreread) { 2583 *moreread = 0; 2584 if(!comm_point_tcp_handle_read(fd, c, 0)) { 2585 reclaim_tcp_handler(c); 2586 if(!c->tcp_do_close) { 2587 fptr_ok(fptr_whitelist_comm_point( 2588 c->callback)); 2589 (void)(*c->callback)(c, c->cb_arg, 2590 NETEVENT_CLOSED, NULL); 2591 } 2592 return; 2593 } 2594 } 2595 } 2596 2597 /** write again to fill up when there could be more to write */ 2598 static void 2599 tcp_more_write_again(int fd, struct comm_point* c) 2600 { 2601 /* if the packet is done, but another is waiting to be written, 2602 * the callback signals it and we try again. */ 2603 /* this continues until the write routines get EAGAIN or so, 2604 * and thus does not call the callback, and the bool is 0 */ 2605 int* morewrite = c->tcp_more_write_again; 2606 while(morewrite && *morewrite) { 2607 *morewrite = 0; 2608 if(!comm_point_tcp_handle_write(fd, c)) { 2609 reclaim_tcp_handler(c); 2610 if(!c->tcp_do_close) { 2611 fptr_ok(fptr_whitelist_comm_point( 2612 c->callback)); 2613 (void)(*c->callback)(c, c->cb_arg, 2614 NETEVENT_CLOSED, NULL); 2615 } 2616 return; 2617 } 2618 } 2619 } 2620 2621 void 2622 comm_point_tcp_handle_callback(int fd, short event, void* arg) 2623 { 2624 struct comm_point* c = (struct comm_point*)arg; 2625 log_assert(c->type == comm_tcp); 2626 ub_comm_base_now(c->ev->base); 2627 2628 if(c->fd == -1 || c->fd != fd) 2629 return; /* duplicate event, but commpoint closed. */ 2630 2631 #ifdef USE_DNSCRYPT 2632 /* Initialize if this is a dnscrypt socket */ 2633 if(c->tcp_parent) { 2634 c->dnscrypt = c->tcp_parent->dnscrypt; 2635 } 2636 if(c->dnscrypt && c->dnscrypt_buffer == c->buffer) { 2637 c->dnscrypt_buffer = sldns_buffer_new(sldns_buffer_capacity(c->buffer)); 2638 if(!c->dnscrypt_buffer) { 2639 log_err("Could not allocate dnscrypt buffer"); 2640 reclaim_tcp_handler(c); 2641 if(!c->tcp_do_close) { 2642 fptr_ok(fptr_whitelist_comm_point( 2643 c->callback)); 2644 (void)(*c->callback)(c, c->cb_arg, 2645 NETEVENT_CLOSED, NULL); 2646 } 2647 return; 2648 } 2649 } 2650 #endif 2651 2652 if(event&UB_EV_TIMEOUT) { 2653 verbose(VERB_QUERY, "tcp took too long, dropped"); 2654 reclaim_tcp_handler(c); 2655 if(!c->tcp_do_close) { 2656 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2657 (void)(*c->callback)(c, c->cb_arg, 2658 NETEVENT_TIMEOUT, NULL); 2659 } 2660 return; 2661 } 2662 if(event&UB_EV_READ 2663 #ifdef USE_MSG_FASTOPEN 2664 && !(c->tcp_do_fastopen && (event&UB_EV_WRITE)) 2665 #endif 2666 ) { 2667 int has_tcpq = (c->tcp_req_info != NULL); 2668 int* moreread = c->tcp_more_read_again; 2669 if(!comm_point_tcp_handle_read(fd, c, 0)) { 2670 reclaim_tcp_handler(c); 2671 if(!c->tcp_do_close) { 2672 fptr_ok(fptr_whitelist_comm_point( 2673 c->callback)); 2674 (void)(*c->callback)(c, c->cb_arg, 2675 NETEVENT_CLOSED, NULL); 2676 } 2677 return; 2678 } 2679 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) { 2680 if(!tcp_req_info_read_again(fd, c)) 2681 return; 2682 } 2683 if(moreread && *moreread) 2684 tcp_more_read_again(fd, c); 2685 return; 2686 } 2687 if(event&UB_EV_WRITE) { 2688 int has_tcpq = (c->tcp_req_info != NULL); 2689 int* morewrite = c->tcp_more_write_again; 2690 if(!comm_point_tcp_handle_write(fd, c)) { 2691 reclaim_tcp_handler(c); 2692 if(!c->tcp_do_close) { 2693 fptr_ok(fptr_whitelist_comm_point( 2694 c->callback)); 2695 (void)(*c->callback)(c, c->cb_arg, 2696 NETEVENT_CLOSED, NULL); 2697 } 2698 return; 2699 } 2700 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) { 2701 if(!tcp_req_info_read_again(fd, c)) 2702 return; 2703 } 2704 if(morewrite && *morewrite) 2705 tcp_more_write_again(fd, c); 2706 return; 2707 } 2708 log_err("Ignored event %d for tcphdl.", event); 2709 } 2710 2711 /** Make http handler free for next assignment */ 2712 static void 2713 reclaim_http_handler(struct comm_point* c) 2714 { 2715 log_assert(c->type == comm_http); 2716 if(c->ssl) { 2717 #ifdef HAVE_SSL 2718 SSL_shutdown(c->ssl); 2719 SSL_free(c->ssl); 2720 c->ssl = NULL; 2721 #endif 2722 } 2723 comm_point_close(c); 2724 if(c->tcp_parent) { 2725 if(c != c->tcp_parent->tcp_free) { 2726 c->tcp_parent->cur_tcp_count--; 2727 c->tcp_free = c->tcp_parent->tcp_free; 2728 c->tcp_parent->tcp_free = c; 2729 } 2730 if(!c->tcp_free) { 2731 /* re-enable listening on accept socket */ 2732 comm_point_start_listening(c->tcp_parent, -1, -1); 2733 } 2734 } 2735 } 2736 2737 /** read more data for http (with ssl) */ 2738 static int 2739 ssl_http_read_more(struct comm_point* c) 2740 { 2741 #ifdef HAVE_SSL 2742 int r; 2743 log_assert(sldns_buffer_remaining(c->buffer) > 0); 2744 ERR_clear_error(); 2745 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer), 2746 (int)sldns_buffer_remaining(c->buffer)); 2747 if(r <= 0) { 2748 int want = SSL_get_error(c->ssl, r); 2749 if(want == SSL_ERROR_ZERO_RETURN) { 2750 return 0; /* shutdown, closed */ 2751 } else if(want == SSL_ERROR_WANT_READ) { 2752 return 1; /* read more later */ 2753 } else if(want == SSL_ERROR_WANT_WRITE) { 2754 c->ssl_shake_state = comm_ssl_shake_hs_write; 2755 comm_point_listen_for_rw(c, 0, 1); 2756 return 1; 2757 } else if(want == SSL_ERROR_SYSCALL) { 2758 #ifdef ECONNRESET 2759 if(errno == ECONNRESET && verbosity < 2) 2760 return 0; /* silence reset by peer */ 2761 #endif 2762 if(errno != 0) 2763 log_err("SSL_read syscall: %s", 2764 strerror(errno)); 2765 return 0; 2766 } 2767 log_crypto_err("could not SSL_read"); 2768 return 0; 2769 } 2770 verbose(VERB_ALGO, "ssl http read more skip to %d + %d", 2771 (int)sldns_buffer_position(c->buffer), (int)r); 2772 sldns_buffer_skip(c->buffer, (ssize_t)r); 2773 return 1; 2774 #else 2775 (void)c; 2776 return 0; 2777 #endif /* HAVE_SSL */ 2778 } 2779 2780 /** read more data for http */ 2781 static int 2782 http_read_more(int fd, struct comm_point* c) 2783 { 2784 ssize_t r; 2785 log_assert(sldns_buffer_remaining(c->buffer) > 0); 2786 r = recv(fd, (void*)sldns_buffer_current(c->buffer), 2787 sldns_buffer_remaining(c->buffer), MSG_DONTWAIT); 2788 if(r == 0) { 2789 return 0; 2790 } else if(r == -1) { 2791 #ifndef USE_WINSOCK 2792 if(errno == EINTR || errno == EAGAIN) 2793 return 1; 2794 #else /* USE_WINSOCK */ 2795 if(WSAGetLastError() == WSAECONNRESET) 2796 return 0; 2797 if(WSAGetLastError() == WSAEINPROGRESS) 2798 return 1; 2799 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2800 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 2801 return 1; 2802 } 2803 #endif 2804 log_err_addr("read (in http r)", sock_strerror(errno), 2805 &c->repinfo.remote_addr, c->repinfo.remote_addrlen); 2806 return 0; 2807 } 2808 verbose(VERB_ALGO, "http read more skip to %d + %d", 2809 (int)sldns_buffer_position(c->buffer), (int)r); 2810 sldns_buffer_skip(c->buffer, r); 2811 return 1; 2812 } 2813 2814 /** return true if http header has been read (one line complete) */ 2815 static int 2816 http_header_done(sldns_buffer* buf) 2817 { 2818 size_t i; 2819 for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) { 2820 /* there was a \r before the \n, but we ignore that */ 2821 if((char)sldns_buffer_read_u8_at(buf, i) == '\n') 2822 return 1; 2823 } 2824 return 0; 2825 } 2826 2827 /** return character string into buffer for header line, moves buffer 2828 * past that line and puts zero terminator into linefeed-newline */ 2829 static char* 2830 http_header_line(sldns_buffer* buf) 2831 { 2832 char* result = (char*)sldns_buffer_current(buf); 2833 size_t i; 2834 for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) { 2835 /* terminate the string on the \r */ 2836 if((char)sldns_buffer_read_u8_at(buf, i) == '\r') 2837 sldns_buffer_write_u8_at(buf, i, 0); 2838 /* terminate on the \n and skip past the it and done */ 2839 if((char)sldns_buffer_read_u8_at(buf, i) == '\n') { 2840 sldns_buffer_write_u8_at(buf, i, 0); 2841 sldns_buffer_set_position(buf, i+1); 2842 return result; 2843 } 2844 } 2845 return NULL; 2846 } 2847 2848 /** move unread buffer to start and clear rest for putting the rest into it */ 2849 static void 2850 http_moveover_buffer(sldns_buffer* buf) 2851 { 2852 size_t pos = sldns_buffer_position(buf); 2853 size_t len = sldns_buffer_remaining(buf); 2854 sldns_buffer_clear(buf); 2855 memmove(sldns_buffer_begin(buf), sldns_buffer_at(buf, pos), len); 2856 sldns_buffer_set_position(buf, len); 2857 } 2858 2859 /** a http header is complete, process it */ 2860 static int 2861 http_process_initial_header(struct comm_point* c) 2862 { 2863 char* line = http_header_line(c->buffer); 2864 if(!line) return 1; 2865 verbose(VERB_ALGO, "http header: %s", line); 2866 if(strncasecmp(line, "HTTP/1.1 ", 9) == 0) { 2867 /* check returncode */ 2868 if(line[9] != '2') { 2869 verbose(VERB_ALGO, "http bad status %s", line+9); 2870 return 0; 2871 } 2872 } else if(strncasecmp(line, "Content-Length: ", 16) == 0) { 2873 if(!c->http_is_chunked) 2874 c->tcp_byte_count = (size_t)atoi(line+16); 2875 } else if(strncasecmp(line, "Transfer-Encoding: chunked", 19+7) == 0) { 2876 c->tcp_byte_count = 0; 2877 c->http_is_chunked = 1; 2878 } else if(line[0] == 0) { 2879 /* end of initial headers */ 2880 c->http_in_headers = 0; 2881 if(c->http_is_chunked) 2882 c->http_in_chunk_headers = 1; 2883 /* remove header text from front of buffer 2884 * the buffer is going to be used to return the data segment 2885 * itself and we don't want the header to get returned 2886 * prepended with it */ 2887 http_moveover_buffer(c->buffer); 2888 sldns_buffer_flip(c->buffer); 2889 return 1; 2890 } 2891 /* ignore other headers */ 2892 return 1; 2893 } 2894 2895 /** a chunk header is complete, process it, return 0=fail, 1=continue next 2896 * header line, 2=done with chunked transfer*/ 2897 static int 2898 http_process_chunk_header(struct comm_point* c) 2899 { 2900 char* line = http_header_line(c->buffer); 2901 if(!line) return 1; 2902 if(c->http_in_chunk_headers == 3) { 2903 verbose(VERB_ALGO, "http chunk trailer: %s", line); 2904 /* are we done ? */ 2905 if(line[0] == 0 && c->tcp_byte_count == 0) { 2906 /* callback of http reader when NETEVENT_DONE, 2907 * end of data, with no data in buffer */ 2908 sldns_buffer_set_position(c->buffer, 0); 2909 sldns_buffer_set_limit(c->buffer, 0); 2910 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2911 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL); 2912 /* return that we are done */ 2913 return 2; 2914 } 2915 if(line[0] == 0) { 2916 /* continue with header of the next chunk */ 2917 c->http_in_chunk_headers = 1; 2918 /* remove header text from front of buffer */ 2919 http_moveover_buffer(c->buffer); 2920 sldns_buffer_flip(c->buffer); 2921 return 1; 2922 } 2923 /* ignore further trail headers */ 2924 return 1; 2925 } 2926 verbose(VERB_ALGO, "http chunk header: %s", line); 2927 if(c->http_in_chunk_headers == 1) { 2928 /* read chunked start line */ 2929 char* end = NULL; 2930 c->tcp_byte_count = (size_t)strtol(line, &end, 16); 2931 if(end == line) 2932 return 0; 2933 c->http_in_chunk_headers = 0; 2934 /* remove header text from front of buffer */ 2935 http_moveover_buffer(c->buffer); 2936 sldns_buffer_flip(c->buffer); 2937 if(c->tcp_byte_count == 0) { 2938 /* done with chunks, process chunk_trailer lines */ 2939 c->http_in_chunk_headers = 3; 2940 } 2941 return 1; 2942 } 2943 /* ignore other headers */ 2944 return 1; 2945 } 2946 2947 /** handle nonchunked data segment, 0=fail, 1=wait */ 2948 static int 2949 http_nonchunk_segment(struct comm_point* c) 2950 { 2951 /* c->buffer at position..limit has new data we read in. 2952 * the buffer itself is full of nonchunked data. 2953 * we are looking to read tcp_byte_count more data 2954 * and then the transfer is done. */ 2955 size_t remainbufferlen; 2956 size_t got_now = sldns_buffer_limit(c->buffer); 2957 if(c->tcp_byte_count <= got_now) { 2958 /* done, this is the last data fragment */ 2959 c->http_stored = 0; 2960 sldns_buffer_set_position(c->buffer, 0); 2961 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2962 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL); 2963 return 1; 2964 } 2965 /* if we have the buffer space, 2966 * read more data collected into the buffer */ 2967 remainbufferlen = sldns_buffer_capacity(c->buffer) - 2968 sldns_buffer_limit(c->buffer); 2969 if(remainbufferlen+got_now >= c->tcp_byte_count || 2970 remainbufferlen >= (size_t)(c->ssl?16384:2048)) { 2971 size_t total = sldns_buffer_limit(c->buffer); 2972 sldns_buffer_clear(c->buffer); 2973 sldns_buffer_set_position(c->buffer, total); 2974 c->http_stored = total; 2975 /* return and wait to read more */ 2976 return 1; 2977 } 2978 /* call callback with this data amount, then 2979 * wait for more */ 2980 c->tcp_byte_count -= got_now; 2981 c->http_stored = 0; 2982 sldns_buffer_set_position(c->buffer, 0); 2983 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2984 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL); 2985 /* c->callback has to buffer_clear(c->buffer). */ 2986 /* return and wait to read more */ 2987 return 1; 2988 } 2989 2990 /** handle chunked data segment, return 0=fail, 1=wait, 2=process more */ 2991 static int 2992 http_chunked_segment(struct comm_point* c) 2993 { 2994 /* the c->buffer has from position..limit new data we read. */ 2995 /* the current chunk has length tcp_byte_count. 2996 * once we read that read more chunk headers. 2997 */ 2998 size_t remainbufferlen; 2999 size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored; 3000 verbose(VERB_ALGO, "http_chunked_segment: got now %d, tcpbytcount %d, http_stored %d, buffer pos %d, buffer limit %d", (int)got_now, (int)c->tcp_byte_count, (int)c->http_stored, (int)sldns_buffer_position(c->buffer), (int)sldns_buffer_limit(c->buffer)); 3001 if(c->tcp_byte_count <= got_now) { 3002 /* the chunk has completed (with perhaps some extra data 3003 * from next chunk header and next chunk) */ 3004 /* save too much info into temp buffer */ 3005 size_t fraglen; 3006 struct comm_reply repinfo; 3007 c->http_stored = 0; 3008 sldns_buffer_skip(c->buffer, (ssize_t)c->tcp_byte_count); 3009 sldns_buffer_clear(c->http_temp); 3010 sldns_buffer_write(c->http_temp, 3011 sldns_buffer_current(c->buffer), 3012 sldns_buffer_remaining(c->buffer)); 3013 sldns_buffer_flip(c->http_temp); 3014 3015 /* callback with this fragment */ 3016 fraglen = sldns_buffer_position(c->buffer); 3017 sldns_buffer_set_position(c->buffer, 0); 3018 sldns_buffer_set_limit(c->buffer, fraglen); 3019 repinfo = c->repinfo; 3020 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3021 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &repinfo); 3022 /* c->callback has to buffer_clear(). */ 3023 3024 /* is commpoint deleted? */ 3025 if(!repinfo.c) { 3026 return 1; 3027 } 3028 /* copy waiting info */ 3029 sldns_buffer_clear(c->buffer); 3030 sldns_buffer_write(c->buffer, 3031 sldns_buffer_begin(c->http_temp), 3032 sldns_buffer_remaining(c->http_temp)); 3033 sldns_buffer_flip(c->buffer); 3034 /* process end of chunk trailer header lines, until 3035 * an empty line */ 3036 c->http_in_chunk_headers = 3; 3037 /* process more data in buffer (if any) */ 3038 return 2; 3039 } 3040 c->tcp_byte_count -= got_now; 3041 3042 /* if we have the buffer space, 3043 * read more data collected into the buffer */ 3044 remainbufferlen = sldns_buffer_capacity(c->buffer) - 3045 sldns_buffer_limit(c->buffer); 3046 if(remainbufferlen >= c->tcp_byte_count || 3047 remainbufferlen >= 2048) { 3048 size_t total = sldns_buffer_limit(c->buffer); 3049 sldns_buffer_clear(c->buffer); 3050 sldns_buffer_set_position(c->buffer, total); 3051 c->http_stored = total; 3052 /* return and wait to read more */ 3053 return 1; 3054 } 3055 3056 /* callback of http reader for a new part of the data */ 3057 c->http_stored = 0; 3058 sldns_buffer_set_position(c->buffer, 0); 3059 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3060 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL); 3061 /* c->callback has to buffer_clear(c->buffer). */ 3062 /* return and wait to read more */ 3063 return 1; 3064 } 3065 3066 #ifdef HAVE_NGHTTP2 3067 /** Create new http2 session. Called when creating handling comm point. */ 3068 static struct http2_session* http2_session_create(struct comm_point* c) 3069 { 3070 struct http2_session* session = calloc(1, sizeof(*session)); 3071 if(!session) { 3072 log_err("malloc failure while creating http2 session"); 3073 return NULL; 3074 } 3075 session->c = c; 3076 3077 return session; 3078 } 3079 #endif 3080 3081 /** Delete http2 session. After closing connection or on error */ 3082 static void http2_session_delete(struct http2_session* h2_session) 3083 { 3084 #ifdef HAVE_NGHTTP2 3085 if(h2_session->callbacks) 3086 nghttp2_session_callbacks_del(h2_session->callbacks); 3087 free(h2_session); 3088 #else 3089 (void)h2_session; 3090 #endif 3091 } 3092 3093 #ifdef HAVE_NGHTTP2 3094 struct http2_stream* http2_stream_create(int32_t stream_id) 3095 { 3096 struct http2_stream* h2_stream = calloc(1, sizeof(*h2_stream)); 3097 if(!h2_stream) { 3098 log_err("malloc failure while creating http2 stream"); 3099 return NULL; 3100 } 3101 h2_stream->stream_id = stream_id; 3102 return h2_stream; 3103 } 3104 3105 /** Delete http2 stream. After session delete or stream close callback */ 3106 static void http2_stream_delete(struct http2_session* h2_session, 3107 struct http2_stream* h2_stream) 3108 { 3109 if(h2_stream->mesh_state) { 3110 mesh_state_remove_reply(h2_stream->mesh, h2_stream->mesh_state, 3111 h2_session->c); 3112 h2_stream->mesh_state = NULL; 3113 } 3114 http2_req_stream_clear(h2_stream); 3115 free(h2_stream); 3116 } 3117 #endif 3118 3119 void http2_stream_add_meshstate(struct http2_stream* h2_stream, 3120 struct mesh_area* mesh, struct mesh_state* m) 3121 { 3122 h2_stream->mesh = mesh; 3123 h2_stream->mesh_state = m; 3124 } 3125 3126 /** delete http2 session server. After closing connection. */ 3127 static void http2_session_server_delete(struct http2_session* h2_session) 3128 { 3129 #ifdef HAVE_NGHTTP2 3130 struct http2_stream* h2_stream, *next; 3131 nghttp2_session_del(h2_session->session); /* NULL input is fine */ 3132 h2_session->session = NULL; 3133 for(h2_stream = h2_session->first_stream; h2_stream;) { 3134 next = h2_stream->next; 3135 http2_stream_delete(h2_session, h2_stream); 3136 h2_stream = next; 3137 } 3138 h2_session->first_stream = NULL; 3139 h2_session->is_drop = 0; 3140 h2_session->postpone_drop = 0; 3141 h2_session->c->h2_stream = NULL; 3142 #endif 3143 (void)h2_session; 3144 } 3145 3146 #ifdef HAVE_NGHTTP2 3147 void http2_session_add_stream(struct http2_session* h2_session, 3148 struct http2_stream* h2_stream) 3149 { 3150 if(h2_session->first_stream) 3151 h2_session->first_stream->prev = h2_stream; 3152 h2_stream->next = h2_session->first_stream; 3153 h2_session->first_stream = h2_stream; 3154 } 3155 3156 /** remove stream from session linked list. After stream close callback or 3157 * closing connection */ 3158 static void http2_session_remove_stream(struct http2_session* h2_session, 3159 struct http2_stream* h2_stream) 3160 { 3161 if(h2_stream->prev) 3162 h2_stream->prev->next = h2_stream->next; 3163 else 3164 h2_session->first_stream = h2_stream->next; 3165 if(h2_stream->next) 3166 h2_stream->next->prev = h2_stream->prev; 3167 3168 } 3169 3170 int http2_stream_close_cb(nghttp2_session* ATTR_UNUSED(session), 3171 int32_t stream_id, uint32_t ATTR_UNUSED(error_code), void* cb_arg) 3172 { 3173 struct http2_stream* h2_stream; 3174 struct http2_session* h2_session = (struct http2_session*)cb_arg; 3175 if(!(h2_stream = nghttp2_session_get_stream_user_data( 3176 h2_session->session, stream_id))) { 3177 return 0; 3178 } 3179 http2_session_remove_stream(h2_session, h2_stream); 3180 http2_stream_delete(h2_session, h2_stream); 3181 return 0; 3182 } 3183 3184 ssize_t http2_recv_cb(nghttp2_session* ATTR_UNUSED(session), uint8_t* buf, 3185 size_t len, int ATTR_UNUSED(flags), void* cb_arg) 3186 { 3187 struct http2_session* h2_session = (struct http2_session*)cb_arg; 3188 ssize_t ret; 3189 3190 log_assert(h2_session->c->type == comm_http); 3191 log_assert(h2_session->c->h2_session); 3192 3193 #ifdef HAVE_SSL 3194 if(h2_session->c->ssl) { 3195 int r; 3196 ERR_clear_error(); 3197 r = SSL_read(h2_session->c->ssl, buf, len); 3198 if(r <= 0) { 3199 int want = SSL_get_error(h2_session->c->ssl, r); 3200 if(want == SSL_ERROR_ZERO_RETURN) { 3201 return NGHTTP2_ERR_EOF; 3202 } else if(want == SSL_ERROR_WANT_READ) { 3203 return NGHTTP2_ERR_WOULDBLOCK; 3204 } else if(want == SSL_ERROR_WANT_WRITE) { 3205 h2_session->c->ssl_shake_state = comm_ssl_shake_hs_write; 3206 comm_point_listen_for_rw(h2_session->c, 0, 1); 3207 return NGHTTP2_ERR_WOULDBLOCK; 3208 } else if(want == SSL_ERROR_SYSCALL) { 3209 #ifdef ECONNRESET 3210 if(errno == ECONNRESET && verbosity < 2) 3211 return NGHTTP2_ERR_CALLBACK_FAILURE; 3212 #endif 3213 if(errno != 0) 3214 log_err("SSL_read syscall: %s", 3215 strerror(errno)); 3216 return NGHTTP2_ERR_CALLBACK_FAILURE; 3217 } 3218 log_crypto_err("could not SSL_read"); 3219 return NGHTTP2_ERR_CALLBACK_FAILURE; 3220 } 3221 return r; 3222 } 3223 #endif /* HAVE_SSL */ 3224 3225 ret = recv(h2_session->c->fd, buf, len, MSG_DONTWAIT); 3226 if(ret == 0) { 3227 return NGHTTP2_ERR_EOF; 3228 } else if(ret < 0) { 3229 #ifndef USE_WINSOCK 3230 if(errno == EINTR || errno == EAGAIN) 3231 return NGHTTP2_ERR_WOULDBLOCK; 3232 #ifdef ECONNRESET 3233 if(errno == ECONNRESET && verbosity < 2) 3234 return NGHTTP2_ERR_CALLBACK_FAILURE; 3235 #endif 3236 log_err_addr("could not http2 recv: %s", strerror(errno), 3237 &h2_session->c->repinfo.remote_addr, 3238 h2_session->c->repinfo.remote_addrlen); 3239 #else /* USE_WINSOCK */ 3240 if(WSAGetLastError() == WSAECONNRESET) 3241 return NGHTTP2_ERR_CALLBACK_FAILURE; 3242 if(WSAGetLastError() == WSAEINPROGRESS) 3243 return NGHTTP2_ERR_WOULDBLOCK; 3244 if(WSAGetLastError() == WSAEWOULDBLOCK) { 3245 ub_winsock_tcp_wouldblock(h2_session->c->ev->ev, 3246 UB_EV_READ); 3247 return NGHTTP2_ERR_WOULDBLOCK; 3248 } 3249 log_err_addr("could not http2 recv: %s", 3250 wsa_strerror(WSAGetLastError()), 3251 &h2_session->c->repinfo.remote_addr, 3252 h2_session->c->repinfo.remote_addrlen); 3253 #endif 3254 return NGHTTP2_ERR_CALLBACK_FAILURE; 3255 } 3256 return ret; 3257 } 3258 #endif /* HAVE_NGHTTP2 */ 3259 3260 /** Handle http2 read */ 3261 static int 3262 comm_point_http2_handle_read(int ATTR_UNUSED(fd), struct comm_point* c) 3263 { 3264 #ifdef HAVE_NGHTTP2 3265 int ret; 3266 log_assert(c->h2_session); 3267 3268 /* reading until recv cb returns NGHTTP2_ERR_WOULDBLOCK */ 3269 ret = nghttp2_session_recv(c->h2_session->session); 3270 if(ret) { 3271 if(ret != NGHTTP2_ERR_EOF && 3272 ret != NGHTTP2_ERR_CALLBACK_FAILURE) { 3273 char a[256]; 3274 addr_to_str(&c->repinfo.remote_addr, 3275 c->repinfo.remote_addrlen, a, sizeof(a)); 3276 verbose(VERB_QUERY, "http2: session_recv from %s failed, " 3277 "error: %s", a, nghttp2_strerror(ret)); 3278 } 3279 return 0; 3280 } 3281 if(nghttp2_session_want_write(c->h2_session->session)) { 3282 c->tcp_is_reading = 0; 3283 comm_point_stop_listening(c); 3284 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c)); 3285 } else if(!nghttp2_session_want_read(c->h2_session->session)) 3286 return 0; /* connection can be closed */ 3287 return 1; 3288 #else 3289 (void)c; 3290 return 0; 3291 #endif 3292 } 3293 3294 /** 3295 * Handle http reading callback. 3296 * @param fd: file descriptor of socket. 3297 * @param c: comm point to read from into buffer. 3298 * @return: 0 on error 3299 */ 3300 static int 3301 comm_point_http_handle_read(int fd, struct comm_point* c) 3302 { 3303 log_assert(c->type == comm_http); 3304 log_assert(fd != -1); 3305 3306 /* if we are in ssl handshake, handle SSL handshake */ 3307 #ifdef HAVE_SSL 3308 if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) { 3309 if(!ssl_handshake(c)) 3310 return 0; 3311 if(c->ssl_shake_state != comm_ssl_shake_none) 3312 return 1; 3313 } 3314 #endif /* HAVE_SSL */ 3315 3316 if(!c->tcp_is_reading) 3317 return 1; 3318 3319 if(c->use_h2) { 3320 return comm_point_http2_handle_read(fd, c); 3321 } 3322 3323 /* http version is <= http/1.1 */ 3324 3325 if(c->http_min_version >= http_version_2) { 3326 /* HTTP/2 failed, not allowed to use lower version. */ 3327 return 0; 3328 } 3329 3330 /* read more data */ 3331 if(c->ssl) { 3332 if(!ssl_http_read_more(c)) 3333 return 0; 3334 } else { 3335 if(!http_read_more(fd, c)) 3336 return 0; 3337 } 3338 3339 if(c->http_stored >= sldns_buffer_position(c->buffer)) { 3340 /* read did not work but we wanted more data, there is 3341 * no bytes to process now. */ 3342 return 1; 3343 } 3344 sldns_buffer_flip(c->buffer); 3345 /* if we are partway in a segment of data, position us at the point 3346 * where we left off previously */ 3347 if(c->http_stored < sldns_buffer_limit(c->buffer)) 3348 sldns_buffer_set_position(c->buffer, c->http_stored); 3349 else sldns_buffer_set_position(c->buffer, sldns_buffer_limit(c->buffer)); 3350 3351 while(sldns_buffer_remaining(c->buffer) > 0) { 3352 /* Handle HTTP/1.x data */ 3353 /* if we are reading headers, read more headers */ 3354 if(c->http_in_headers || c->http_in_chunk_headers) { 3355 /* if header is done, process the header */ 3356 if(!http_header_done(c->buffer)) { 3357 /* copy remaining data to front of buffer 3358 * and set rest for writing into it */ 3359 http_moveover_buffer(c->buffer); 3360 /* return and wait to read more */ 3361 return 1; 3362 } 3363 if(!c->http_in_chunk_headers) { 3364 /* process initial headers */ 3365 if(!http_process_initial_header(c)) 3366 return 0; 3367 } else { 3368 /* process chunk headers */ 3369 int r = http_process_chunk_header(c); 3370 if(r == 0) return 0; 3371 if(r == 2) return 1; /* done */ 3372 /* r == 1, continue */ 3373 } 3374 /* see if we have more to process */ 3375 continue; 3376 } 3377 3378 if(!c->http_is_chunked) { 3379 /* if we are reading nonchunks, process that*/ 3380 return http_nonchunk_segment(c); 3381 } else { 3382 /* if we are reading chunks, read the chunk */ 3383 int r = http_chunked_segment(c); 3384 if(r == 0) return 0; 3385 if(r == 1) return 1; 3386 continue; 3387 } 3388 } 3389 /* broke out of the loop; could not process header instead need 3390 * to read more */ 3391 /* moveover any remaining data and read more data */ 3392 http_moveover_buffer(c->buffer); 3393 /* return and wait to read more */ 3394 return 1; 3395 } 3396 3397 /** check pending connect for http */ 3398 static int 3399 http_check_connect(int fd, struct comm_point* c) 3400 { 3401 /* check for pending error from nonblocking connect */ 3402 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/ 3403 int error = 0; 3404 socklen_t len = (socklen_t)sizeof(error); 3405 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 3406 &len) < 0){ 3407 #ifndef USE_WINSOCK 3408 error = errno; /* on solaris errno is error */ 3409 #else /* USE_WINSOCK */ 3410 error = WSAGetLastError(); 3411 #endif 3412 } 3413 #ifndef USE_WINSOCK 3414 #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 3415 if(error == EINPROGRESS || error == EWOULDBLOCK) 3416 return 1; /* try again later */ 3417 else 3418 #endif 3419 if(error != 0 && verbosity < 2) 3420 return 0; /* silence lots of chatter in the logs */ 3421 else if(error != 0) { 3422 log_err_addr("http connect", strerror(error), 3423 &c->repinfo.remote_addr, c->repinfo.remote_addrlen); 3424 #else /* USE_WINSOCK */ 3425 /* examine error */ 3426 if(error == WSAEINPROGRESS) 3427 return 1; 3428 else if(error == WSAEWOULDBLOCK) { 3429 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 3430 return 1; 3431 } else if(error != 0 && verbosity < 2) 3432 return 0; 3433 else if(error != 0) { 3434 log_err_addr("http connect", wsa_strerror(error), 3435 &c->repinfo.remote_addr, c->repinfo.remote_addrlen); 3436 #endif /* USE_WINSOCK */ 3437 return 0; 3438 } 3439 /* keep on processing this socket */ 3440 return 2; 3441 } 3442 3443 /** write more data for http (with ssl) */ 3444 static int 3445 ssl_http_write_more(struct comm_point* c) 3446 { 3447 #ifdef HAVE_SSL 3448 int r; 3449 log_assert(sldns_buffer_remaining(c->buffer) > 0); 3450 ERR_clear_error(); 3451 r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer), 3452 (int)sldns_buffer_remaining(c->buffer)); 3453 if(r <= 0) { 3454 int want = SSL_get_error(c->ssl, r); 3455 if(want == SSL_ERROR_ZERO_RETURN) { 3456 return 0; /* closed */ 3457 } else if(want == SSL_ERROR_WANT_READ) { 3458 c->ssl_shake_state = comm_ssl_shake_hs_read; 3459 comm_point_listen_for_rw(c, 1, 0); 3460 return 1; /* wait for read condition */ 3461 } else if(want == SSL_ERROR_WANT_WRITE) { 3462 return 1; /* write more later */ 3463 } else if(want == SSL_ERROR_SYSCALL) { 3464 #ifdef EPIPE 3465 if(errno == EPIPE && verbosity < 2) 3466 return 0; /* silence 'broken pipe' */ 3467 #endif 3468 if(errno != 0) 3469 log_err("SSL_write syscall: %s", 3470 strerror(errno)); 3471 return 0; 3472 } 3473 log_crypto_err("could not SSL_write"); 3474 return 0; 3475 } 3476 sldns_buffer_skip(c->buffer, (ssize_t)r); 3477 return 1; 3478 #else 3479 (void)c; 3480 return 0; 3481 #endif /* HAVE_SSL */ 3482 } 3483 3484 /** write more data for http */ 3485 static int 3486 http_write_more(int fd, struct comm_point* c) 3487 { 3488 ssize_t r; 3489 log_assert(sldns_buffer_remaining(c->buffer) > 0); 3490 r = send(fd, (void*)sldns_buffer_current(c->buffer), 3491 sldns_buffer_remaining(c->buffer), 0); 3492 if(r == -1) { 3493 #ifndef USE_WINSOCK 3494 if(errno == EINTR || errno == EAGAIN) 3495 return 1; 3496 #else 3497 if(WSAGetLastError() == WSAEINPROGRESS) 3498 return 1; 3499 if(WSAGetLastError() == WSAEWOULDBLOCK) { 3500 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 3501 return 1; 3502 } 3503 #endif 3504 log_err_addr("http send r", sock_strerror(errno), 3505 &c->repinfo.remote_addr, c->repinfo.remote_addrlen); 3506 return 0; 3507 } 3508 sldns_buffer_skip(c->buffer, r); 3509 return 1; 3510 } 3511 3512 #ifdef HAVE_NGHTTP2 3513 ssize_t http2_send_cb(nghttp2_session* ATTR_UNUSED(session), const uint8_t* buf, 3514 size_t len, int ATTR_UNUSED(flags), void* cb_arg) 3515 { 3516 ssize_t ret; 3517 struct http2_session* h2_session = (struct http2_session*)cb_arg; 3518 log_assert(h2_session->c->type == comm_http); 3519 log_assert(h2_session->c->h2_session); 3520 3521 #ifdef HAVE_SSL 3522 if(h2_session->c->ssl) { 3523 int r; 3524 ERR_clear_error(); 3525 r = SSL_write(h2_session->c->ssl, buf, len); 3526 if(r <= 0) { 3527 int want = SSL_get_error(h2_session->c->ssl, r); 3528 if(want == SSL_ERROR_ZERO_RETURN) { 3529 return NGHTTP2_ERR_CALLBACK_FAILURE; 3530 } else if(want == SSL_ERROR_WANT_READ) { 3531 h2_session->c->ssl_shake_state = comm_ssl_shake_hs_read; 3532 comm_point_listen_for_rw(h2_session->c, 1, 0); 3533 return NGHTTP2_ERR_WOULDBLOCK; 3534 } else if(want == SSL_ERROR_WANT_WRITE) { 3535 return NGHTTP2_ERR_WOULDBLOCK; 3536 } else if(want == SSL_ERROR_SYSCALL) { 3537 #ifdef EPIPE 3538 if(errno == EPIPE && verbosity < 2) 3539 return NGHTTP2_ERR_CALLBACK_FAILURE; 3540 #endif 3541 if(errno != 0) 3542 log_err("SSL_write syscall: %s", 3543 strerror(errno)); 3544 return NGHTTP2_ERR_CALLBACK_FAILURE; 3545 } 3546 log_crypto_err("could not SSL_write"); 3547 return NGHTTP2_ERR_CALLBACK_FAILURE; 3548 } 3549 return r; 3550 } 3551 #endif /* HAVE_SSL */ 3552 3553 ret = send(h2_session->c->fd, buf, len, 0); 3554 if(ret == 0) { 3555 return NGHTTP2_ERR_CALLBACK_FAILURE; 3556 } else if(ret < 0) { 3557 #ifndef USE_WINSOCK 3558 if(errno == EINTR || errno == EAGAIN) 3559 return NGHTTP2_ERR_WOULDBLOCK; 3560 #ifdef EPIPE 3561 if(errno == EPIPE && verbosity < 2) 3562 return NGHTTP2_ERR_CALLBACK_FAILURE; 3563 #endif 3564 #ifdef ECONNRESET 3565 if(errno == ECONNRESET && verbosity < 2) 3566 return NGHTTP2_ERR_CALLBACK_FAILURE; 3567 #endif 3568 log_err_addr("could not http2 write: %s", strerror(errno), 3569 &h2_session->c->repinfo.remote_addr, 3570 h2_session->c->repinfo.remote_addrlen); 3571 #else /* USE_WINSOCK */ 3572 if(WSAGetLastError() == WSAENOTCONN) 3573 return NGHTTP2_ERR_WOULDBLOCK; 3574 if(WSAGetLastError() == WSAEINPROGRESS) 3575 return NGHTTP2_ERR_WOULDBLOCK; 3576 if(WSAGetLastError() == WSAEWOULDBLOCK) { 3577 ub_winsock_tcp_wouldblock(h2_session->c->ev->ev, 3578 UB_EV_WRITE); 3579 return NGHTTP2_ERR_WOULDBLOCK; 3580 } 3581 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 3582 return NGHTTP2_ERR_CALLBACK_FAILURE; 3583 log_err_addr("could not http2 write: %s", 3584 wsa_strerror(WSAGetLastError()), 3585 &h2_session->c->repinfo.remote_addr, 3586 h2_session->c->repinfo.remote_addrlen); 3587 #endif 3588 return NGHTTP2_ERR_CALLBACK_FAILURE; 3589 } 3590 return ret; 3591 } 3592 #endif /* HAVE_NGHTTP2 */ 3593 3594 /** Handle http2 writing */ 3595 static int 3596 comm_point_http2_handle_write(int ATTR_UNUSED(fd), struct comm_point* c) 3597 { 3598 #ifdef HAVE_NGHTTP2 3599 int ret; 3600 log_assert(c->h2_session); 3601 3602 ret = nghttp2_session_send(c->h2_session->session); 3603 if(ret) { 3604 verbose(VERB_QUERY, "http2: session_send failed, " 3605 "error: %s", nghttp2_strerror(ret)); 3606 return 0; 3607 } 3608 3609 if(nghttp2_session_want_read(c->h2_session->session)) { 3610 c->tcp_is_reading = 1; 3611 comm_point_stop_listening(c); 3612 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c)); 3613 } else if(!nghttp2_session_want_write(c->h2_session->session)) 3614 return 0; /* connection can be closed */ 3615 return 1; 3616 #else 3617 (void)c; 3618 return 0; 3619 #endif 3620 } 3621 3622 /** 3623 * Handle http writing callback. 3624 * @param fd: file descriptor of socket. 3625 * @param c: comm point to write buffer out of. 3626 * @return: 0 on error 3627 */ 3628 static int 3629 comm_point_http_handle_write(int fd, struct comm_point* c) 3630 { 3631 log_assert(c->type == comm_http); 3632 log_assert(fd != -1); 3633 3634 /* check pending connect errors, if that fails, we wait for more, 3635 * or we can continue to write contents */ 3636 if(c->tcp_check_nb_connect) { 3637 int r = http_check_connect(fd, c); 3638 if(r == 0) return 0; 3639 if(r == 1) return 1; 3640 c->tcp_check_nb_connect = 0; 3641 } 3642 /* if we are in ssl handshake, handle SSL handshake */ 3643 #ifdef HAVE_SSL 3644 if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) { 3645 if(!ssl_handshake(c)) 3646 return 0; 3647 if(c->ssl_shake_state != comm_ssl_shake_none) 3648 return 1; 3649 } 3650 #endif /* HAVE_SSL */ 3651 if(c->tcp_is_reading) 3652 return 1; 3653 3654 if(c->use_h2) { 3655 return comm_point_http2_handle_write(fd, c); 3656 } 3657 3658 /* http version is <= http/1.1 */ 3659 3660 if(c->http_min_version >= http_version_2) { 3661 /* HTTP/2 failed, not allowed to use lower version. */ 3662 return 0; 3663 } 3664 3665 /* if we are writing, write more */ 3666 if(c->ssl) { 3667 if(!ssl_http_write_more(c)) 3668 return 0; 3669 } else { 3670 if(!http_write_more(fd, c)) 3671 return 0; 3672 } 3673 3674 /* we write a single buffer contents, that can contain 3675 * the http request, and then flip to read the results */ 3676 /* see if write is done */ 3677 if(sldns_buffer_remaining(c->buffer) == 0) { 3678 sldns_buffer_clear(c->buffer); 3679 if(c->tcp_do_toggle_rw) 3680 c->tcp_is_reading = 1; 3681 c->tcp_byte_count = 0; 3682 /* switch from listening(write) to listening(read) */ 3683 comm_point_stop_listening(c); 3684 comm_point_start_listening(c, -1, -1); 3685 } 3686 return 1; 3687 } 3688 3689 void 3690 comm_point_http_handle_callback(int fd, short event, void* arg) 3691 { 3692 struct comm_point* c = (struct comm_point*)arg; 3693 log_assert(c->type == comm_http); 3694 ub_comm_base_now(c->ev->base); 3695 3696 if(event&UB_EV_TIMEOUT) { 3697 verbose(VERB_QUERY, "http took too long, dropped"); 3698 reclaim_http_handler(c); 3699 if(!c->tcp_do_close) { 3700 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3701 (void)(*c->callback)(c, c->cb_arg, 3702 NETEVENT_TIMEOUT, NULL); 3703 } 3704 return; 3705 } 3706 if(event&UB_EV_READ) { 3707 if(!comm_point_http_handle_read(fd, c)) { 3708 reclaim_http_handler(c); 3709 if(!c->tcp_do_close) { 3710 fptr_ok(fptr_whitelist_comm_point( 3711 c->callback)); 3712 (void)(*c->callback)(c, c->cb_arg, 3713 NETEVENT_CLOSED, NULL); 3714 } 3715 } 3716 return; 3717 } 3718 if(event&UB_EV_WRITE) { 3719 if(!comm_point_http_handle_write(fd, c)) { 3720 reclaim_http_handler(c); 3721 if(!c->tcp_do_close) { 3722 fptr_ok(fptr_whitelist_comm_point( 3723 c->callback)); 3724 (void)(*c->callback)(c, c->cb_arg, 3725 NETEVENT_CLOSED, NULL); 3726 } 3727 } 3728 return; 3729 } 3730 log_err("Ignored event %d for httphdl.", event); 3731 } 3732 3733 void comm_point_local_handle_callback(int fd, short event, void* arg) 3734 { 3735 struct comm_point* c = (struct comm_point*)arg; 3736 log_assert(c->type == comm_local); 3737 ub_comm_base_now(c->ev->base); 3738 3739 if(event&UB_EV_READ) { 3740 if(!comm_point_tcp_handle_read(fd, c, 1)) { 3741 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3742 (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED, 3743 NULL); 3744 } 3745 return; 3746 } 3747 log_err("Ignored event %d for localhdl.", event); 3748 } 3749 3750 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd), 3751 short event, void* arg) 3752 { 3753 struct comm_point* c = (struct comm_point*)arg; 3754 int err = NETEVENT_NOERROR; 3755 log_assert(c->type == comm_raw); 3756 ub_comm_base_now(c->ev->base); 3757 3758 if(event&UB_EV_TIMEOUT) 3759 err = NETEVENT_TIMEOUT; 3760 fptr_ok(fptr_whitelist_comm_point_raw(c->callback)); 3761 (void)(*c->callback)(c, c->cb_arg, err, NULL); 3762 } 3763 3764 struct comm_point* 3765 comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer, 3766 int pp2_enabled, comm_point_callback_type* callback, 3767 void* callback_arg, struct unbound_socket* socket) 3768 { 3769 struct comm_point* c = (struct comm_point*)calloc(1, 3770 sizeof(struct comm_point)); 3771 short evbits; 3772 if(!c) 3773 return NULL; 3774 c->ev = (struct internal_event*)calloc(1, 3775 sizeof(struct internal_event)); 3776 if(!c->ev) { 3777 free(c); 3778 return NULL; 3779 } 3780 c->ev->base = base; 3781 c->fd = fd; 3782 c->buffer = buffer; 3783 c->timeout = NULL; 3784 c->tcp_is_reading = 0; 3785 c->tcp_byte_count = 0; 3786 c->tcp_parent = NULL; 3787 c->max_tcp_count = 0; 3788 c->cur_tcp_count = 0; 3789 c->tcp_handlers = NULL; 3790 c->tcp_free = NULL; 3791 c->type = comm_udp; 3792 c->tcp_do_close = 0; 3793 c->do_not_close = 0; 3794 c->tcp_do_toggle_rw = 0; 3795 c->tcp_check_nb_connect = 0; 3796 #ifdef USE_MSG_FASTOPEN 3797 c->tcp_do_fastopen = 0; 3798 #endif 3799 #ifdef USE_DNSCRYPT 3800 c->dnscrypt = 0; 3801 c->dnscrypt_buffer = buffer; 3802 #endif 3803 c->inuse = 0; 3804 c->callback = callback; 3805 c->cb_arg = callback_arg; 3806 c->socket = socket; 3807 c->pp2_enabled = pp2_enabled; 3808 c->pp2_header_state = pp2_header_none; 3809 evbits = UB_EV_READ | UB_EV_PERSIST; 3810 /* ub_event stuff */ 3811 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3812 comm_point_udp_callback, c); 3813 if(c->ev->ev == NULL) { 3814 log_err("could not baseset udp event"); 3815 comm_point_delete(c); 3816 return NULL; 3817 } 3818 if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) { 3819 log_err("could not add udp event"); 3820 comm_point_delete(c); 3821 return NULL; 3822 } 3823 c->event_added = 1; 3824 return c; 3825 } 3826 3827 struct comm_point* 3828 comm_point_create_udp_ancil(struct comm_base *base, int fd, 3829 sldns_buffer* buffer, int pp2_enabled, 3830 comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket) 3831 { 3832 struct comm_point* c = (struct comm_point*)calloc(1, 3833 sizeof(struct comm_point)); 3834 short evbits; 3835 if(!c) 3836 return NULL; 3837 c->ev = (struct internal_event*)calloc(1, 3838 sizeof(struct internal_event)); 3839 if(!c->ev) { 3840 free(c); 3841 return NULL; 3842 } 3843 c->ev->base = base; 3844 c->fd = fd; 3845 c->buffer = buffer; 3846 c->timeout = NULL; 3847 c->tcp_is_reading = 0; 3848 c->tcp_byte_count = 0; 3849 c->tcp_parent = NULL; 3850 c->max_tcp_count = 0; 3851 c->cur_tcp_count = 0; 3852 c->tcp_handlers = NULL; 3853 c->tcp_free = NULL; 3854 c->type = comm_udp; 3855 c->tcp_do_close = 0; 3856 c->do_not_close = 0; 3857 #ifdef USE_DNSCRYPT 3858 c->dnscrypt = 0; 3859 c->dnscrypt_buffer = buffer; 3860 #endif 3861 c->inuse = 0; 3862 c->tcp_do_toggle_rw = 0; 3863 c->tcp_check_nb_connect = 0; 3864 #ifdef USE_MSG_FASTOPEN 3865 c->tcp_do_fastopen = 0; 3866 #endif 3867 c->callback = callback; 3868 c->cb_arg = callback_arg; 3869 c->socket = socket; 3870 c->pp2_enabled = pp2_enabled; 3871 c->pp2_header_state = pp2_header_none; 3872 evbits = UB_EV_READ | UB_EV_PERSIST; 3873 /* ub_event stuff */ 3874 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3875 comm_point_udp_ancil_callback, c); 3876 if(c->ev->ev == NULL) { 3877 log_err("could not baseset udp event"); 3878 comm_point_delete(c); 3879 return NULL; 3880 } 3881 if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) { 3882 log_err("could not add udp event"); 3883 comm_point_delete(c); 3884 return NULL; 3885 } 3886 c->event_added = 1; 3887 return c; 3888 } 3889 3890 static struct comm_point* 3891 comm_point_create_tcp_handler(struct comm_base *base, 3892 struct comm_point* parent, size_t bufsize, 3893 struct sldns_buffer* spoolbuf, comm_point_callback_type* callback, 3894 void* callback_arg, struct unbound_socket* socket) 3895 { 3896 struct comm_point* c = (struct comm_point*)calloc(1, 3897 sizeof(struct comm_point)); 3898 short evbits; 3899 if(!c) 3900 return NULL; 3901 c->ev = (struct internal_event*)calloc(1, 3902 sizeof(struct internal_event)); 3903 if(!c->ev) { 3904 free(c); 3905 return NULL; 3906 } 3907 c->ev->base = base; 3908 c->fd = -1; 3909 c->buffer = sldns_buffer_new(bufsize); 3910 if(!c->buffer) { 3911 free(c->ev); 3912 free(c); 3913 return NULL; 3914 } 3915 c->timeout = (struct timeval*)malloc(sizeof(struct timeval)); 3916 if(!c->timeout) { 3917 sldns_buffer_free(c->buffer); 3918 free(c->ev); 3919 free(c); 3920 return NULL; 3921 } 3922 c->tcp_is_reading = 0; 3923 c->tcp_byte_count = 0; 3924 c->tcp_parent = parent; 3925 c->tcp_timeout_msec = parent->tcp_timeout_msec; 3926 c->tcp_conn_limit = parent->tcp_conn_limit; 3927 c->tcl_addr = NULL; 3928 c->tcp_keepalive = 0; 3929 c->max_tcp_count = 0; 3930 c->cur_tcp_count = 0; 3931 c->tcp_handlers = NULL; 3932 c->tcp_free = NULL; 3933 c->type = comm_tcp; 3934 c->tcp_do_close = 0; 3935 c->do_not_close = 0; 3936 c->tcp_do_toggle_rw = 1; 3937 c->tcp_check_nb_connect = 0; 3938 #ifdef USE_MSG_FASTOPEN 3939 c->tcp_do_fastopen = 0; 3940 #endif 3941 #ifdef USE_DNSCRYPT 3942 c->dnscrypt = 0; 3943 /* We don't know just yet if this is a dnscrypt channel. Allocation 3944 * will be done when handling the callback. */ 3945 c->dnscrypt_buffer = c->buffer; 3946 #endif 3947 c->repinfo.c = c; 3948 c->callback = callback; 3949 c->cb_arg = callback_arg; 3950 c->socket = socket; 3951 c->pp2_enabled = parent->pp2_enabled; 3952 c->pp2_header_state = pp2_header_none; 3953 if(spoolbuf) { 3954 c->tcp_req_info = tcp_req_info_create(spoolbuf); 3955 if(!c->tcp_req_info) { 3956 log_err("could not create tcp commpoint"); 3957 sldns_buffer_free(c->buffer); 3958 free(c->timeout); 3959 free(c->ev); 3960 free(c); 3961 return NULL; 3962 } 3963 c->tcp_req_info->cp = c; 3964 c->tcp_do_close = 1; 3965 c->tcp_do_toggle_rw = 0; 3966 } 3967 /* add to parent free list */ 3968 c->tcp_free = parent->tcp_free; 3969 parent->tcp_free = c; 3970 /* ub_event stuff */ 3971 evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT; 3972 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3973 comm_point_tcp_handle_callback, c); 3974 if(c->ev->ev == NULL) 3975 { 3976 log_err("could not basetset tcphdl event"); 3977 parent->tcp_free = c->tcp_free; 3978 tcp_req_info_delete(c->tcp_req_info); 3979 sldns_buffer_free(c->buffer); 3980 free(c->timeout); 3981 free(c->ev); 3982 free(c); 3983 return NULL; 3984 } 3985 return c; 3986 } 3987 3988 static struct comm_point* 3989 comm_point_create_http_handler(struct comm_base *base, 3990 struct comm_point* parent, size_t bufsize, int harden_large_queries, 3991 uint32_t http_max_streams, char* http_endpoint, 3992 comm_point_callback_type* callback, void* callback_arg, 3993 struct unbound_socket* socket) 3994 { 3995 struct comm_point* c = (struct comm_point*)calloc(1, 3996 sizeof(struct comm_point)); 3997 short evbits; 3998 if(!c) 3999 return NULL; 4000 c->ev = (struct internal_event*)calloc(1, 4001 sizeof(struct internal_event)); 4002 if(!c->ev) { 4003 free(c); 4004 return NULL; 4005 } 4006 c->ev->base = base; 4007 c->fd = -1; 4008 c->buffer = sldns_buffer_new(bufsize); 4009 if(!c->buffer) { 4010 free(c->ev); 4011 free(c); 4012 return NULL; 4013 } 4014 c->timeout = (struct timeval*)malloc(sizeof(struct timeval)); 4015 if(!c->timeout) { 4016 sldns_buffer_free(c->buffer); 4017 free(c->ev); 4018 free(c); 4019 return NULL; 4020 } 4021 c->tcp_is_reading = 0; 4022 c->tcp_byte_count = 0; 4023 c->tcp_parent = parent; 4024 c->tcp_timeout_msec = parent->tcp_timeout_msec; 4025 c->tcp_conn_limit = parent->tcp_conn_limit; 4026 c->tcl_addr = NULL; 4027 c->tcp_keepalive = 0; 4028 c->max_tcp_count = 0; 4029 c->cur_tcp_count = 0; 4030 c->tcp_handlers = NULL; 4031 c->tcp_free = NULL; 4032 c->type = comm_http; 4033 c->tcp_do_close = 1; 4034 c->do_not_close = 0; 4035 c->tcp_do_toggle_rw = 1; /* will be set to 0 after http2 upgrade */ 4036 c->tcp_check_nb_connect = 0; 4037 #ifdef USE_MSG_FASTOPEN 4038 c->tcp_do_fastopen = 0; 4039 #endif 4040 #ifdef USE_DNSCRYPT 4041 c->dnscrypt = 0; 4042 c->dnscrypt_buffer = NULL; 4043 #endif 4044 c->repinfo.c = c; 4045 c->callback = callback; 4046 c->cb_arg = callback_arg; 4047 c->socket = socket; 4048 c->pp2_enabled = 0; 4049 c->pp2_header_state = pp2_header_none; 4050 4051 c->http_min_version = http_version_2; 4052 c->http2_stream_max_qbuffer_size = bufsize; 4053 if(harden_large_queries && bufsize > 512) 4054 c->http2_stream_max_qbuffer_size = 512; 4055 c->http2_max_streams = http_max_streams; 4056 if(!(c->http_endpoint = strdup(http_endpoint))) { 4057 log_err("could not strdup http_endpoint"); 4058 sldns_buffer_free(c->buffer); 4059 free(c->timeout); 4060 free(c->ev); 4061 free(c); 4062 return NULL; 4063 } 4064 c->use_h2 = 0; 4065 #ifdef HAVE_NGHTTP2 4066 if(!(c->h2_session = http2_session_create(c))) { 4067 log_err("could not create http2 session"); 4068 free(c->http_endpoint); 4069 sldns_buffer_free(c->buffer); 4070 free(c->timeout); 4071 free(c->ev); 4072 free(c); 4073 return NULL; 4074 } 4075 if(!(c->h2_session->callbacks = http2_req_callbacks_create())) { 4076 log_err("could not create http2 callbacks"); 4077 http2_session_delete(c->h2_session); 4078 free(c->http_endpoint); 4079 sldns_buffer_free(c->buffer); 4080 free(c->timeout); 4081 free(c->ev); 4082 free(c); 4083 return NULL; 4084 } 4085 #endif 4086 4087 /* add to parent free list */ 4088 c->tcp_free = parent->tcp_free; 4089 parent->tcp_free = c; 4090 /* ub_event stuff */ 4091 evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT; 4092 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 4093 comm_point_http_handle_callback, c); 4094 if(c->ev->ev == NULL) 4095 { 4096 log_err("could not set http handler event"); 4097 parent->tcp_free = c->tcp_free; 4098 http2_session_delete(c->h2_session); 4099 sldns_buffer_free(c->buffer); 4100 free(c->timeout); 4101 free(c->ev); 4102 free(c); 4103 return NULL; 4104 } 4105 return c; 4106 } 4107 4108 struct comm_point* 4109 comm_point_create_tcp(struct comm_base *base, int fd, int num, 4110 int idle_timeout, int harden_large_queries, 4111 uint32_t http_max_streams, char* http_endpoint, 4112 struct tcl_list* tcp_conn_limit, size_t bufsize, 4113 struct sldns_buffer* spoolbuf, enum listen_type port_type, 4114 int pp2_enabled, comm_point_callback_type* callback, 4115 void* callback_arg, struct unbound_socket* socket) 4116 { 4117 struct comm_point* c = (struct comm_point*)calloc(1, 4118 sizeof(struct comm_point)); 4119 short evbits; 4120 int i; 4121 /* first allocate the TCP accept listener */ 4122 if(!c) 4123 return NULL; 4124 c->ev = (struct internal_event*)calloc(1, 4125 sizeof(struct internal_event)); 4126 if(!c->ev) { 4127 free(c); 4128 return NULL; 4129 } 4130 c->ev->base = base; 4131 c->fd = fd; 4132 c->buffer = NULL; 4133 c->timeout = NULL; 4134 c->tcp_is_reading = 0; 4135 c->tcp_byte_count = 0; 4136 c->tcp_timeout_msec = idle_timeout; 4137 c->tcp_conn_limit = tcp_conn_limit; 4138 c->tcl_addr = NULL; 4139 c->tcp_keepalive = 0; 4140 c->tcp_parent = NULL; 4141 c->max_tcp_count = num; 4142 c->cur_tcp_count = 0; 4143 c->tcp_handlers = (struct comm_point**)calloc((size_t)num, 4144 sizeof(struct comm_point*)); 4145 if(!c->tcp_handlers) { 4146 free(c->ev); 4147 free(c); 4148 return NULL; 4149 } 4150 c->tcp_free = NULL; 4151 c->type = comm_tcp_accept; 4152 c->tcp_do_close = 0; 4153 c->do_not_close = 0; 4154 c->tcp_do_toggle_rw = 0; 4155 c->tcp_check_nb_connect = 0; 4156 #ifdef USE_MSG_FASTOPEN 4157 c->tcp_do_fastopen = 0; 4158 #endif 4159 #ifdef USE_DNSCRYPT 4160 c->dnscrypt = 0; 4161 c->dnscrypt_buffer = NULL; 4162 #endif 4163 c->callback = NULL; 4164 c->cb_arg = NULL; 4165 c->socket = socket; 4166 c->pp2_enabled = (port_type==listen_type_http?0:pp2_enabled); 4167 c->pp2_header_state = pp2_header_none; 4168 evbits = UB_EV_READ | UB_EV_PERSIST; 4169 /* ub_event stuff */ 4170 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 4171 comm_point_tcp_accept_callback, c); 4172 if(c->ev->ev == NULL) { 4173 log_err("could not baseset tcpacc event"); 4174 comm_point_delete(c); 4175 return NULL; 4176 } 4177 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 4178 log_err("could not add tcpacc event"); 4179 comm_point_delete(c); 4180 return NULL; 4181 } 4182 c->event_added = 1; 4183 /* now prealloc the handlers */ 4184 for(i=0; i<num; i++) { 4185 if(port_type == listen_type_tcp || 4186 port_type == listen_type_ssl || 4187 port_type == listen_type_tcp_dnscrypt) { 4188 c->tcp_handlers[i] = comm_point_create_tcp_handler(base, 4189 c, bufsize, spoolbuf, callback, callback_arg, socket); 4190 } else if(port_type == listen_type_http) { 4191 c->tcp_handlers[i] = comm_point_create_http_handler( 4192 base, c, bufsize, harden_large_queries, 4193 http_max_streams, http_endpoint, 4194 callback, callback_arg, socket); 4195 } 4196 else { 4197 log_err("could not create tcp handler, unknown listen " 4198 "type"); 4199 return NULL; 4200 } 4201 if(!c->tcp_handlers[i]) { 4202 comm_point_delete(c); 4203 return NULL; 4204 } 4205 } 4206 4207 return c; 4208 } 4209 4210 struct comm_point* 4211 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize, 4212 comm_point_callback_type* callback, void* callback_arg) 4213 { 4214 struct comm_point* c = (struct comm_point*)calloc(1, 4215 sizeof(struct comm_point)); 4216 short evbits; 4217 if(!c) 4218 return NULL; 4219 c->ev = (struct internal_event*)calloc(1, 4220 sizeof(struct internal_event)); 4221 if(!c->ev) { 4222 free(c); 4223 return NULL; 4224 } 4225 c->ev->base = base; 4226 c->fd = -1; 4227 c->buffer = sldns_buffer_new(bufsize); 4228 if(!c->buffer) { 4229 free(c->ev); 4230 free(c); 4231 return NULL; 4232 } 4233 c->timeout = NULL; 4234 c->tcp_is_reading = 0; 4235 c->tcp_byte_count = 0; 4236 c->tcp_timeout_msec = TCP_QUERY_TIMEOUT; 4237 c->tcp_conn_limit = NULL; 4238 c->tcl_addr = NULL; 4239 c->tcp_keepalive = 0; 4240 c->tcp_parent = NULL; 4241 c->max_tcp_count = 0; 4242 c->cur_tcp_count = 0; 4243 c->tcp_handlers = NULL; 4244 c->tcp_free = NULL; 4245 c->type = comm_tcp; 4246 c->tcp_do_close = 0; 4247 c->do_not_close = 0; 4248 c->tcp_do_toggle_rw = 1; 4249 c->tcp_check_nb_connect = 1; 4250 #ifdef USE_MSG_FASTOPEN 4251 c->tcp_do_fastopen = 1; 4252 #endif 4253 #ifdef USE_DNSCRYPT 4254 c->dnscrypt = 0; 4255 c->dnscrypt_buffer = c->buffer; 4256 #endif 4257 c->repinfo.c = c; 4258 c->callback = callback; 4259 c->cb_arg = callback_arg; 4260 c->pp2_enabled = 0; 4261 c->pp2_header_state = pp2_header_none; 4262 evbits = UB_EV_PERSIST | UB_EV_WRITE; 4263 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 4264 comm_point_tcp_handle_callback, c); 4265 if(c->ev->ev == NULL) 4266 { 4267 log_err("could not baseset tcpout event"); 4268 sldns_buffer_free(c->buffer); 4269 free(c->ev); 4270 free(c); 4271 return NULL; 4272 } 4273 4274 return c; 4275 } 4276 4277 struct comm_point* 4278 comm_point_create_http_out(struct comm_base *base, size_t bufsize, 4279 comm_point_callback_type* callback, void* callback_arg, 4280 sldns_buffer* temp) 4281 { 4282 struct comm_point* c = (struct comm_point*)calloc(1, 4283 sizeof(struct comm_point)); 4284 short evbits; 4285 if(!c) 4286 return NULL; 4287 c->ev = (struct internal_event*)calloc(1, 4288 sizeof(struct internal_event)); 4289 if(!c->ev) { 4290 free(c); 4291 return NULL; 4292 } 4293 c->ev->base = base; 4294 c->fd = -1; 4295 c->buffer = sldns_buffer_new(bufsize); 4296 if(!c->buffer) { 4297 free(c->ev); 4298 free(c); 4299 return NULL; 4300 } 4301 c->timeout = NULL; 4302 c->tcp_is_reading = 0; 4303 c->tcp_byte_count = 0; 4304 c->tcp_parent = NULL; 4305 c->max_tcp_count = 0; 4306 c->cur_tcp_count = 0; 4307 c->tcp_handlers = NULL; 4308 c->tcp_free = NULL; 4309 c->type = comm_http; 4310 c->tcp_do_close = 0; 4311 c->do_not_close = 0; 4312 c->tcp_do_toggle_rw = 1; 4313 c->tcp_check_nb_connect = 1; 4314 c->http_in_headers = 1; 4315 c->http_in_chunk_headers = 0; 4316 c->http_is_chunked = 0; 4317 c->http_temp = temp; 4318 #ifdef USE_MSG_FASTOPEN 4319 c->tcp_do_fastopen = 1; 4320 #endif 4321 #ifdef USE_DNSCRYPT 4322 c->dnscrypt = 0; 4323 c->dnscrypt_buffer = c->buffer; 4324 #endif 4325 c->repinfo.c = c; 4326 c->callback = callback; 4327 c->cb_arg = callback_arg; 4328 c->pp2_enabled = 0; 4329 c->pp2_header_state = pp2_header_none; 4330 evbits = UB_EV_PERSIST | UB_EV_WRITE; 4331 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 4332 comm_point_http_handle_callback, c); 4333 if(c->ev->ev == NULL) 4334 { 4335 log_err("could not baseset tcpout event"); 4336 #ifdef HAVE_SSL 4337 SSL_free(c->ssl); 4338 #endif 4339 sldns_buffer_free(c->buffer); 4340 free(c->ev); 4341 free(c); 4342 return NULL; 4343 } 4344 4345 return c; 4346 } 4347 4348 struct comm_point* 4349 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize, 4350 comm_point_callback_type* callback, void* callback_arg) 4351 { 4352 struct comm_point* c = (struct comm_point*)calloc(1, 4353 sizeof(struct comm_point)); 4354 short evbits; 4355 if(!c) 4356 return NULL; 4357 c->ev = (struct internal_event*)calloc(1, 4358 sizeof(struct internal_event)); 4359 if(!c->ev) { 4360 free(c); 4361 return NULL; 4362 } 4363 c->ev->base = base; 4364 c->fd = fd; 4365 c->buffer = sldns_buffer_new(bufsize); 4366 if(!c->buffer) { 4367 free(c->ev); 4368 free(c); 4369 return NULL; 4370 } 4371 c->timeout = NULL; 4372 c->tcp_is_reading = 1; 4373 c->tcp_byte_count = 0; 4374 c->tcp_parent = NULL; 4375 c->max_tcp_count = 0; 4376 c->cur_tcp_count = 0; 4377 c->tcp_handlers = NULL; 4378 c->tcp_free = NULL; 4379 c->type = comm_local; 4380 c->tcp_do_close = 0; 4381 c->do_not_close = 1; 4382 c->tcp_do_toggle_rw = 0; 4383 c->tcp_check_nb_connect = 0; 4384 #ifdef USE_MSG_FASTOPEN 4385 c->tcp_do_fastopen = 0; 4386 #endif 4387 #ifdef USE_DNSCRYPT 4388 c->dnscrypt = 0; 4389 c->dnscrypt_buffer = c->buffer; 4390 #endif 4391 c->callback = callback; 4392 c->cb_arg = callback_arg; 4393 c->pp2_enabled = 0; 4394 c->pp2_header_state = pp2_header_none; 4395 /* ub_event stuff */ 4396 evbits = UB_EV_PERSIST | UB_EV_READ; 4397 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 4398 comm_point_local_handle_callback, c); 4399 if(c->ev->ev == NULL) { 4400 log_err("could not baseset localhdl event"); 4401 free(c->ev); 4402 free(c); 4403 return NULL; 4404 } 4405 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 4406 log_err("could not add localhdl event"); 4407 ub_event_free(c->ev->ev); 4408 free(c->ev); 4409 free(c); 4410 return NULL; 4411 } 4412 c->event_added = 1; 4413 return c; 4414 } 4415 4416 struct comm_point* 4417 comm_point_create_raw(struct comm_base* base, int fd, int writing, 4418 comm_point_callback_type* callback, void* callback_arg) 4419 { 4420 struct comm_point* c = (struct comm_point*)calloc(1, 4421 sizeof(struct comm_point)); 4422 short evbits; 4423 if(!c) 4424 return NULL; 4425 c->ev = (struct internal_event*)calloc(1, 4426 sizeof(struct internal_event)); 4427 if(!c->ev) { 4428 free(c); 4429 return NULL; 4430 } 4431 c->ev->base = base; 4432 c->fd = fd; 4433 c->buffer = NULL; 4434 c->timeout = NULL; 4435 c->tcp_is_reading = 0; 4436 c->tcp_byte_count = 0; 4437 c->tcp_parent = NULL; 4438 c->max_tcp_count = 0; 4439 c->cur_tcp_count = 0; 4440 c->tcp_handlers = NULL; 4441 c->tcp_free = NULL; 4442 c->type = comm_raw; 4443 c->tcp_do_close = 0; 4444 c->do_not_close = 1; 4445 c->tcp_do_toggle_rw = 0; 4446 c->tcp_check_nb_connect = 0; 4447 #ifdef USE_MSG_FASTOPEN 4448 c->tcp_do_fastopen = 0; 4449 #endif 4450 #ifdef USE_DNSCRYPT 4451 c->dnscrypt = 0; 4452 c->dnscrypt_buffer = c->buffer; 4453 #endif 4454 c->callback = callback; 4455 c->cb_arg = callback_arg; 4456 c->pp2_enabled = 0; 4457 c->pp2_header_state = pp2_header_none; 4458 /* ub_event stuff */ 4459 if(writing) 4460 evbits = UB_EV_PERSIST | UB_EV_WRITE; 4461 else evbits = UB_EV_PERSIST | UB_EV_READ; 4462 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 4463 comm_point_raw_handle_callback, c); 4464 if(c->ev->ev == NULL) { 4465 log_err("could not baseset rawhdl event"); 4466 free(c->ev); 4467 free(c); 4468 return NULL; 4469 } 4470 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 4471 log_err("could not add rawhdl event"); 4472 ub_event_free(c->ev->ev); 4473 free(c->ev); 4474 free(c); 4475 return NULL; 4476 } 4477 c->event_added = 1; 4478 return c; 4479 } 4480 4481 void 4482 comm_point_close(struct comm_point* c) 4483 { 4484 if(!c) 4485 return; 4486 if(c->fd != -1) { 4487 verbose(5, "comm_point_close of %d: event_del", c->fd); 4488 if(c->event_added) { 4489 if(ub_event_del(c->ev->ev) != 0) { 4490 log_err("could not event_del on close"); 4491 } 4492 c->event_added = 0; 4493 } 4494 } 4495 tcl_close_connection(c->tcl_addr); 4496 if(c->tcp_req_info) 4497 tcp_req_info_clear(c->tcp_req_info); 4498 if(c->h2_session) 4499 http2_session_server_delete(c->h2_session); 4500 /* stop the comm point from reading or writing after it is closed. */ 4501 if(c->tcp_more_read_again && *c->tcp_more_read_again) 4502 *c->tcp_more_read_again = 0; 4503 if(c->tcp_more_write_again && *c->tcp_more_write_again) 4504 *c->tcp_more_write_again = 0; 4505 4506 /* close fd after removing from event lists, or epoll.. is messed up */ 4507 if(c->fd != -1 && !c->do_not_close) { 4508 #ifdef USE_WINSOCK 4509 if(c->type == comm_tcp || c->type == comm_http) { 4510 /* delete sticky events for the fd, it gets closed */ 4511 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 4512 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 4513 } 4514 #endif 4515 verbose(VERB_ALGO, "close fd %d", c->fd); 4516 sock_close(c->fd); 4517 } 4518 c->fd = -1; 4519 } 4520 4521 void 4522 comm_point_delete(struct comm_point* c) 4523 { 4524 if(!c) 4525 return; 4526 if((c->type == comm_tcp || c->type == comm_http) && c->ssl) { 4527 #ifdef HAVE_SSL 4528 SSL_shutdown(c->ssl); 4529 SSL_free(c->ssl); 4530 #endif 4531 } 4532 if(c->type == comm_http && c->http_endpoint) { 4533 free(c->http_endpoint); 4534 c->http_endpoint = NULL; 4535 } 4536 comm_point_close(c); 4537 if(c->tcp_handlers) { 4538 int i; 4539 for(i=0; i<c->max_tcp_count; i++) 4540 comm_point_delete(c->tcp_handlers[i]); 4541 free(c->tcp_handlers); 4542 } 4543 free(c->timeout); 4544 if(c->type == comm_tcp || c->type == comm_local || c->type == comm_http) { 4545 sldns_buffer_free(c->buffer); 4546 #ifdef USE_DNSCRYPT 4547 if(c->dnscrypt && c->dnscrypt_buffer != c->buffer) { 4548 sldns_buffer_free(c->dnscrypt_buffer); 4549 } 4550 #endif 4551 if(c->tcp_req_info) { 4552 tcp_req_info_delete(c->tcp_req_info); 4553 } 4554 if(c->h2_session) { 4555 http2_session_delete(c->h2_session); 4556 } 4557 } 4558 ub_event_free(c->ev->ev); 4559 free(c->ev); 4560 free(c); 4561 } 4562 4563 void 4564 comm_point_send_reply(struct comm_reply *repinfo) 4565 { 4566 struct sldns_buffer* buffer; 4567 log_assert(repinfo && repinfo->c); 4568 #ifdef USE_DNSCRYPT 4569 buffer = repinfo->c->dnscrypt_buffer; 4570 if(!dnsc_handle_uncurved_request(repinfo)) { 4571 return; 4572 } 4573 #else 4574 buffer = repinfo->c->buffer; 4575 #endif 4576 if(repinfo->c->type == comm_udp) { 4577 if(repinfo->srctype) 4578 comm_point_send_udp_msg_if(repinfo->c, buffer, 4579 (struct sockaddr*)&repinfo->remote_addr, 4580 repinfo->remote_addrlen, repinfo); 4581 else 4582 comm_point_send_udp_msg(repinfo->c, buffer, 4583 (struct sockaddr*)&repinfo->remote_addr, 4584 repinfo->remote_addrlen, 0); 4585 #ifdef USE_DNSTAP 4586 /* 4587 * sending src (client)/dst (local service) addresses over DNSTAP from udp callback 4588 */ 4589 if(repinfo->c->dtenv != NULL && repinfo->c->dtenv->log_client_response_messages) { 4590 log_addr(VERB_ALGO, "from local addr", (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->socket->addr->ai_addrlen); 4591 log_addr(VERB_ALGO, "response to client", &repinfo->client_addr, repinfo->client_addrlen); 4592 dt_msg_send_client_response(repinfo->c->dtenv, &repinfo->client_addr, (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->type, repinfo->c->buffer); 4593 } 4594 #endif 4595 } else { 4596 #ifdef USE_DNSTAP 4597 /* 4598 * sending src (client)/dst (local service) addresses over DNSTAP from TCP callback 4599 */ 4600 if(repinfo->c->tcp_parent->dtenv != NULL && repinfo->c->tcp_parent->dtenv->log_client_response_messages) { 4601 log_addr(VERB_ALGO, "from local addr", (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->socket->addr->ai_addrlen); 4602 log_addr(VERB_ALGO, "response to client", &repinfo->client_addr, repinfo->client_addrlen); 4603 dt_msg_send_client_response(repinfo->c->tcp_parent->dtenv, &repinfo->client_addr, (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->type, 4604 ( repinfo->c->tcp_req_info? repinfo->c->tcp_req_info->spool_buffer: repinfo->c->buffer )); 4605 } 4606 #endif 4607 if(repinfo->c->tcp_req_info) { 4608 tcp_req_info_send_reply(repinfo->c->tcp_req_info); 4609 } else if(repinfo->c->use_h2) { 4610 if(!http2_submit_dns_response(repinfo->c->h2_session)) { 4611 comm_point_drop_reply(repinfo); 4612 return; 4613 } 4614 repinfo->c->h2_stream = NULL; 4615 repinfo->c->tcp_is_reading = 0; 4616 comm_point_stop_listening(repinfo->c); 4617 comm_point_start_listening(repinfo->c, -1, 4618 adjusted_tcp_timeout(repinfo->c)); 4619 return; 4620 } else { 4621 comm_point_start_listening(repinfo->c, -1, 4622 adjusted_tcp_timeout(repinfo->c)); 4623 } 4624 } 4625 } 4626 4627 void 4628 comm_point_drop_reply(struct comm_reply* repinfo) 4629 { 4630 if(!repinfo) 4631 return; 4632 log_assert(repinfo->c); 4633 log_assert(repinfo->c->type != comm_tcp_accept); 4634 if(repinfo->c->type == comm_udp) 4635 return; 4636 if(repinfo->c->tcp_req_info) 4637 repinfo->c->tcp_req_info->is_drop = 1; 4638 if(repinfo->c->type == comm_http) { 4639 if(repinfo->c->h2_session) { 4640 repinfo->c->h2_session->is_drop = 1; 4641 if(!repinfo->c->h2_session->postpone_drop) 4642 reclaim_http_handler(repinfo->c); 4643 return; 4644 } 4645 reclaim_http_handler(repinfo->c); 4646 return; 4647 } 4648 reclaim_tcp_handler(repinfo->c); 4649 } 4650 4651 void 4652 comm_point_stop_listening(struct comm_point* c) 4653 { 4654 verbose(VERB_ALGO, "comm point stop listening %d", c->fd); 4655 if(c->event_added) { 4656 if(ub_event_del(c->ev->ev) != 0) { 4657 log_err("event_del error to stoplisten"); 4658 } 4659 c->event_added = 0; 4660 } 4661 } 4662 4663 void 4664 comm_point_start_listening(struct comm_point* c, int newfd, int msec) 4665 { 4666 verbose(VERB_ALGO, "comm point start listening %d (%d msec)", 4667 c->fd==-1?newfd:c->fd, msec); 4668 if(c->type == comm_tcp_accept && !c->tcp_free) { 4669 /* no use to start listening no free slots. */ 4670 return; 4671 } 4672 if(c->event_added) { 4673 if(ub_event_del(c->ev->ev) != 0) { 4674 log_err("event_del error to startlisten"); 4675 } 4676 c->event_added = 0; 4677 } 4678 if(msec != -1 && msec != 0) { 4679 if(!c->timeout) { 4680 c->timeout = (struct timeval*)malloc(sizeof( 4681 struct timeval)); 4682 if(!c->timeout) { 4683 log_err("cpsl: malloc failed. No net read."); 4684 return; 4685 } 4686 } 4687 ub_event_add_bits(c->ev->ev, UB_EV_TIMEOUT); 4688 #ifndef S_SPLINT_S /* splint fails on struct timeval. */ 4689 c->timeout->tv_sec = msec/1000; 4690 c->timeout->tv_usec = (msec%1000)*1000; 4691 #endif /* S_SPLINT_S */ 4692 } else { 4693 if(msec == 0 || !c->timeout) { 4694 ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT); 4695 } 4696 } 4697 if(c->type == comm_tcp || c->type == comm_http) { 4698 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4699 if(c->tcp_write_and_read) { 4700 verbose(5, "startlistening %d mode rw", (newfd==-1?c->fd:newfd)); 4701 ub_event_add_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4702 } else if(c->tcp_is_reading) { 4703 verbose(5, "startlistening %d mode r", (newfd==-1?c->fd:newfd)); 4704 ub_event_add_bits(c->ev->ev, UB_EV_READ); 4705 } else { 4706 verbose(5, "startlistening %d mode w", (newfd==-1?c->fd:newfd)); 4707 ub_event_add_bits(c->ev->ev, UB_EV_WRITE); 4708 } 4709 } 4710 if(newfd != -1) { 4711 if(c->fd != -1 && c->fd != newfd) { 4712 verbose(5, "cpsl close of fd %d for %d", c->fd, newfd); 4713 sock_close(c->fd); 4714 } 4715 c->fd = newfd; 4716 ub_event_set_fd(c->ev->ev, c->fd); 4717 } 4718 if(ub_event_add(c->ev->ev, msec==0?NULL:c->timeout) != 0) { 4719 log_err("event_add failed. in cpsl."); 4720 return; 4721 } 4722 c->event_added = 1; 4723 } 4724 4725 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr) 4726 { 4727 verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr); 4728 if(c->event_added) { 4729 if(ub_event_del(c->ev->ev) != 0) { 4730 log_err("event_del error to cplf"); 4731 } 4732 c->event_added = 0; 4733 } 4734 if(!c->timeout) { 4735 ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT); 4736 } 4737 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4738 if(rd) ub_event_add_bits(c->ev->ev, UB_EV_READ); 4739 if(wr) ub_event_add_bits(c->ev->ev, UB_EV_WRITE); 4740 if(ub_event_add(c->ev->ev, c->timeout) != 0) { 4741 log_err("event_add failed. in cplf."); 4742 return; 4743 } 4744 c->event_added = 1; 4745 } 4746 4747 size_t comm_point_get_mem(struct comm_point* c) 4748 { 4749 size_t s; 4750 if(!c) 4751 return 0; 4752 s = sizeof(*c) + sizeof(*c->ev); 4753 if(c->timeout) 4754 s += sizeof(*c->timeout); 4755 if(c->type == comm_tcp || c->type == comm_local) { 4756 s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer); 4757 #ifdef USE_DNSCRYPT 4758 s += sizeof(*c->dnscrypt_buffer); 4759 if(c->buffer != c->dnscrypt_buffer) { 4760 s += sldns_buffer_capacity(c->dnscrypt_buffer); 4761 } 4762 #endif 4763 } 4764 if(c->type == comm_tcp_accept) { 4765 int i; 4766 for(i=0; i<c->max_tcp_count; i++) 4767 s += comm_point_get_mem(c->tcp_handlers[i]); 4768 } 4769 return s; 4770 } 4771 4772 struct comm_timer* 4773 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg) 4774 { 4775 struct internal_timer *tm = (struct internal_timer*)calloc(1, 4776 sizeof(struct internal_timer)); 4777 if(!tm) { 4778 log_err("malloc failed"); 4779 return NULL; 4780 } 4781 tm->super.ev_timer = tm; 4782 tm->base = base; 4783 tm->super.callback = cb; 4784 tm->super.cb_arg = cb_arg; 4785 tm->ev = ub_event_new(base->eb->base, -1, UB_EV_TIMEOUT, 4786 comm_timer_callback, &tm->super); 4787 if(tm->ev == NULL) { 4788 log_err("timer_create: event_base_set failed."); 4789 free(tm); 4790 return NULL; 4791 } 4792 return &tm->super; 4793 } 4794 4795 void 4796 comm_timer_disable(struct comm_timer* timer) 4797 { 4798 if(!timer) 4799 return; 4800 ub_timer_del(timer->ev_timer->ev); 4801 timer->ev_timer->enabled = 0; 4802 } 4803 4804 void 4805 comm_timer_set(struct comm_timer* timer, struct timeval* tv) 4806 { 4807 log_assert(tv); 4808 if(timer->ev_timer->enabled) 4809 comm_timer_disable(timer); 4810 if(ub_timer_add(timer->ev_timer->ev, timer->ev_timer->base->eb->base, 4811 comm_timer_callback, timer, tv) != 0) 4812 log_err("comm_timer_set: evtimer_add failed."); 4813 timer->ev_timer->enabled = 1; 4814 } 4815 4816 void 4817 comm_timer_delete(struct comm_timer* timer) 4818 { 4819 if(!timer) 4820 return; 4821 comm_timer_disable(timer); 4822 /* Free the sub struct timer->ev_timer derived from the super struct timer. 4823 * i.e. assert(timer == timer->ev_timer) 4824 */ 4825 ub_event_free(timer->ev_timer->ev); 4826 free(timer->ev_timer); 4827 } 4828 4829 void 4830 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg) 4831 { 4832 struct comm_timer* tm = (struct comm_timer*)arg; 4833 if(!(event&UB_EV_TIMEOUT)) 4834 return; 4835 ub_comm_base_now(tm->ev_timer->base); 4836 tm->ev_timer->enabled = 0; 4837 fptr_ok(fptr_whitelist_comm_timer(tm->callback)); 4838 (*tm->callback)(tm->cb_arg); 4839 } 4840 4841 int 4842 comm_timer_is_set(struct comm_timer* timer) 4843 { 4844 return (int)timer->ev_timer->enabled; 4845 } 4846 4847 size_t 4848 comm_timer_get_mem(struct comm_timer* ATTR_UNUSED(timer)) 4849 { 4850 return sizeof(struct internal_timer); 4851 } 4852 4853 struct comm_signal* 4854 comm_signal_create(struct comm_base* base, 4855 void (*callback)(int, void*), void* cb_arg) 4856 { 4857 struct comm_signal* com = (struct comm_signal*)malloc( 4858 sizeof(struct comm_signal)); 4859 if(!com) { 4860 log_err("malloc failed"); 4861 return NULL; 4862 } 4863 com->base = base; 4864 com->callback = callback; 4865 com->cb_arg = cb_arg; 4866 com->ev_signal = NULL; 4867 return com; 4868 } 4869 4870 void 4871 comm_signal_callback(int sig, short event, void* arg) 4872 { 4873 struct comm_signal* comsig = (struct comm_signal*)arg; 4874 if(!(event & UB_EV_SIGNAL)) 4875 return; 4876 ub_comm_base_now(comsig->base); 4877 fptr_ok(fptr_whitelist_comm_signal(comsig->callback)); 4878 (*comsig->callback)(sig, comsig->cb_arg); 4879 } 4880 4881 int 4882 comm_signal_bind(struct comm_signal* comsig, int sig) 4883 { 4884 struct internal_signal* entry = (struct internal_signal*)calloc(1, 4885 sizeof(struct internal_signal)); 4886 if(!entry) { 4887 log_err("malloc failed"); 4888 return 0; 4889 } 4890 log_assert(comsig); 4891 /* add signal event */ 4892 entry->ev = ub_signal_new(comsig->base->eb->base, sig, 4893 comm_signal_callback, comsig); 4894 if(entry->ev == NULL) { 4895 log_err("Could not create signal event"); 4896 free(entry); 4897 return 0; 4898 } 4899 if(ub_signal_add(entry->ev, NULL) != 0) { 4900 log_err("Could not add signal handler"); 4901 ub_event_free(entry->ev); 4902 free(entry); 4903 return 0; 4904 } 4905 /* link into list */ 4906 entry->next = comsig->ev_signal; 4907 comsig->ev_signal = entry; 4908 return 1; 4909 } 4910 4911 void 4912 comm_signal_delete(struct comm_signal* comsig) 4913 { 4914 struct internal_signal* p, *np; 4915 if(!comsig) 4916 return; 4917 p=comsig->ev_signal; 4918 while(p) { 4919 np = p->next; 4920 ub_signal_del(p->ev); 4921 ub_event_free(p->ev); 4922 free(p); 4923 p = np; 4924 } 4925 free(comsig); 4926 } 4927