1 /*- 2 * Copyright (c) 2000-2013 Dag-Erling Smørgrav 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 /* 33 * The following copyright applies to the base64 code: 34 * 35 *- 36 * Copyright 1997 Massachusetts Institute of Technology 37 * 38 * Permission to use, copy, modify, and distribute this software and 39 * its documentation for any purpose and without fee is hereby 40 * granted, provided that both the above copyright notice and this 41 * permission notice appear in all copies, that both the above 42 * copyright notice and this permission notice appear in all 43 * supporting documentation, and that the name of M.I.T. not be used 44 * in advertising or publicity pertaining to distribution of the 45 * software without specific, written prior permission. M.I.T. makes 46 * no representations about the suitability of this software for any 47 * purpose. It is provided "as is" without express or implied 48 * warranty. 49 * 50 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 51 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 52 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 53 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 54 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 55 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 56 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 57 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 58 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 59 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 60 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 #include <sys/param.h> 65 #include <sys/socket.h> 66 #include <sys/time.h> 67 68 #include <ctype.h> 69 #include <err.h> 70 #include <errno.h> 71 #include <locale.h> 72 #include <netdb.h> 73 #include <stdarg.h> 74 #include <stdio.h> 75 #include <stdlib.h> 76 #include <string.h> 77 #include <time.h> 78 #include <unistd.h> 79 80 #ifdef WITH_SSL 81 #include <openssl/md5.h> 82 #define MD5Init(c) MD5_Init(c) 83 #define MD5Update(c, data, len) MD5_Update(c, data, len) 84 #define MD5Final(md, c) MD5_Final(md, c) 85 #else 86 #include <md5.h> 87 #endif 88 89 #include <netinet/in.h> 90 #include <netinet/tcp.h> 91 92 #include "fetch.h" 93 #include "common.h" 94 #include "httperr.h" 95 96 /* Maximum number of redirects to follow */ 97 #define MAX_REDIRECT 20 98 99 /* Symbolic names for reply codes we care about */ 100 #define HTTP_OK 200 101 #define HTTP_PARTIAL 206 102 #define HTTP_MOVED_PERM 301 103 #define HTTP_MOVED_TEMP 302 104 #define HTTP_SEE_OTHER 303 105 #define HTTP_NOT_MODIFIED 304 106 #define HTTP_USE_PROXY 305 107 #define HTTP_TEMP_REDIRECT 307 108 #define HTTP_PERM_REDIRECT 308 109 #define HTTP_NEED_AUTH 401 110 #define HTTP_NEED_PROXY_AUTH 407 111 #define HTTP_BAD_RANGE 416 112 #define HTTP_PROTOCOL_ERROR 999 113 114 #define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \ 115 || (xyz) == HTTP_MOVED_TEMP \ 116 || (xyz) == HTTP_TEMP_REDIRECT \ 117 || (xyz) == HTTP_USE_PROXY \ 118 || (xyz) == HTTP_SEE_OTHER) 119 120 #define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599) 121 122 123 /***************************************************************************** 124 * I/O functions for decoding chunked streams 125 */ 126 127 struct httpio 128 { 129 conn_t *conn; /* connection */ 130 int chunked; /* chunked mode */ 131 char *buf; /* chunk buffer */ 132 size_t bufsize; /* size of chunk buffer */ 133 ssize_t buflen; /* amount of data currently in buffer */ 134 int bufpos; /* current read offset in buffer */ 135 int eof; /* end-of-file flag */ 136 int error; /* error flag */ 137 size_t chunksize; /* remaining size of current chunk */ 138 #ifndef NDEBUG 139 size_t total; 140 #endif 141 }; 142 143 /* 144 * Get next chunk header 145 */ 146 static int 147 http_new_chunk(struct httpio *io) 148 { 149 char *p; 150 151 if (fetch_getln(io->conn) == -1) 152 return (-1); 153 154 if (io->conn->buflen < 2 || !isxdigit((unsigned char)*io->conn->buf)) 155 return (-1); 156 157 for (p = io->conn->buf; *p && !isspace((unsigned char)*p); ++p) { 158 if (*p == ';') 159 break; 160 if (!isxdigit((unsigned char)*p)) 161 return (-1); 162 if (isdigit((unsigned char)*p)) { 163 io->chunksize = io->chunksize * 16 + 164 *p - '0'; 165 } else { 166 io->chunksize = io->chunksize * 16 + 167 10 + tolower((unsigned char)*p) - 'a'; 168 } 169 } 170 171 #ifndef NDEBUG 172 if (fetchDebug) { 173 io->total += io->chunksize; 174 if (io->chunksize == 0) 175 fprintf(stderr, "%s(): end of last chunk\n", __func__); 176 else 177 fprintf(stderr, "%s(): new chunk: %lu (%lu)\n", 178 __func__, (unsigned long)io->chunksize, 179 (unsigned long)io->total); 180 } 181 #endif 182 183 return (io->chunksize); 184 } 185 186 /* 187 * Grow the input buffer to at least len bytes 188 */ 189 static inline int 190 http_growbuf(struct httpio *io, size_t len) 191 { 192 char *tmp; 193 194 if (io->bufsize >= len) 195 return (0); 196 197 if ((tmp = realloc(io->buf, len)) == NULL) 198 return (-1); 199 io->buf = tmp; 200 io->bufsize = len; 201 return (0); 202 } 203 204 /* 205 * Fill the input buffer, do chunk decoding on the fly 206 */ 207 static int 208 http_fillbuf(struct httpio *io, size_t len) 209 { 210 ssize_t nbytes; 211 char ch; 212 213 if (io->error) 214 return (-1); 215 if (io->eof) 216 return (0); 217 218 if (io->chunked == 0) { 219 if (http_growbuf(io, len) == -1) 220 return (-1); 221 if ((nbytes = fetch_read(io->conn, io->buf, len)) == -1) { 222 io->error = errno; 223 return (-1); 224 } 225 io->buflen = nbytes; 226 io->bufpos = 0; 227 return (io->buflen); 228 } 229 230 if (io->chunksize == 0) { 231 switch (http_new_chunk(io)) { 232 case -1: 233 io->error = 1; 234 return (-1); 235 case 0: 236 io->eof = 1; 237 return (0); 238 } 239 } 240 241 if (len > io->chunksize) 242 len = io->chunksize; 243 if (http_growbuf(io, len) == -1) 244 return (-1); 245 if ((nbytes = fetch_read(io->conn, io->buf, len)) == -1) { 246 io->error = errno; 247 return (-1); 248 } 249 io->buflen = nbytes; 250 io->chunksize -= io->buflen; 251 252 if (io->chunksize == 0) { 253 if (fetch_read(io->conn, &ch, 1) != 1 || ch != '\r' || 254 fetch_read(io->conn, &ch, 1) != 1 || ch != '\n') 255 return (-1); 256 } 257 258 io->bufpos = 0; 259 260 return (io->buflen); 261 } 262 263 /* 264 * Read function 265 */ 266 static int 267 http_readfn(void *v, char *buf, int len) 268 { 269 struct httpio *io = (struct httpio *)v; 270 int rlen; 271 272 if (io->error) 273 return (-1); 274 if (io->eof) 275 return (0); 276 277 /* empty buffer */ 278 if (!io->buf || io->bufpos == io->buflen) { 279 if (http_fillbuf(io, len) < 1) { 280 if (io->error == EINTR) 281 io->error = 0; 282 return (-1); 283 } 284 } 285 286 rlen = io->buflen - io->bufpos; 287 if (len < rlen) 288 rlen = len; 289 memcpy(buf, io->buf + io->bufpos, rlen); 290 io->bufpos += rlen; 291 return (rlen); 292 } 293 294 /* 295 * Write function 296 */ 297 static int 298 http_writefn(void *v, const char *buf, int len) 299 { 300 struct httpio *io = (struct httpio *)v; 301 302 return (fetch_write(io->conn, buf, len)); 303 } 304 305 /* 306 * Close function 307 */ 308 static int 309 http_closefn(void *v) 310 { 311 struct httpio *io = (struct httpio *)v; 312 int r; 313 314 r = fetch_close(io->conn); 315 if (io->buf) 316 free(io->buf); 317 free(io); 318 return (r); 319 } 320 321 /* 322 * Wrap a file descriptor up 323 */ 324 static FILE * 325 http_funopen(conn_t *conn, int chunked) 326 { 327 struct httpio *io; 328 FILE *f; 329 330 if ((io = calloc(1, sizeof(*io))) == NULL) { 331 fetch_syserr(); 332 return (NULL); 333 } 334 io->conn = conn; 335 io->chunked = chunked; 336 f = funopen(io, http_readfn, http_writefn, NULL, http_closefn); 337 if (f == NULL) { 338 fetch_syserr(); 339 free(io); 340 return (NULL); 341 } 342 return (f); 343 } 344 345 346 /***************************************************************************** 347 * Helper functions for talking to the server and parsing its replies 348 */ 349 350 /* Header types */ 351 typedef enum { 352 hdr_syserror = -2, 353 hdr_error = -1, 354 hdr_end = 0, 355 hdr_unknown = 1, 356 hdr_content_length, 357 hdr_content_range, 358 hdr_last_modified, 359 hdr_location, 360 hdr_transfer_encoding, 361 hdr_www_authenticate, 362 hdr_proxy_authenticate, 363 } hdr_t; 364 365 /* Names of interesting headers */ 366 static struct { 367 hdr_t num; 368 const char *name; 369 } hdr_names[] = { 370 { hdr_content_length, "Content-Length" }, 371 { hdr_content_range, "Content-Range" }, 372 { hdr_last_modified, "Last-Modified" }, 373 { hdr_location, "Location" }, 374 { hdr_transfer_encoding, "Transfer-Encoding" }, 375 { hdr_www_authenticate, "WWW-Authenticate" }, 376 { hdr_proxy_authenticate, "Proxy-Authenticate" }, 377 { hdr_unknown, NULL }, 378 }; 379 380 /* 381 * Send a formatted line; optionally echo to terminal 382 */ 383 static int 384 http_cmd(conn_t *conn, const char *fmt, ...) 385 { 386 va_list ap; 387 size_t len; 388 char *msg; 389 int r; 390 391 va_start(ap, fmt); 392 len = vasprintf(&msg, fmt, ap); 393 va_end(ap); 394 395 if (msg == NULL) { 396 errno = ENOMEM; 397 fetch_syserr(); 398 return (-1); 399 } 400 401 r = fetch_putln(conn, msg, len); 402 free(msg); 403 404 if (r == -1) { 405 fetch_syserr(); 406 return (-1); 407 } 408 409 return (0); 410 } 411 412 /* 413 * Get and parse status line 414 */ 415 static int 416 http_get_reply(conn_t *conn) 417 { 418 char *p; 419 420 if (fetch_getln(conn) == -1) 421 return (-1); 422 /* 423 * A valid status line looks like "HTTP/m.n xyz reason" where m 424 * and n are the major and minor protocol version numbers and xyz 425 * is the reply code. 426 * Unfortunately, there are servers out there (NCSA 1.5.1, to name 427 * just one) that do not send a version number, so we can't rely 428 * on finding one, but if we do, insist on it being 1.0 or 1.1. 429 * We don't care about the reason phrase. 430 */ 431 if (strncmp(conn->buf, "HTTP", 4) != 0) 432 return (HTTP_PROTOCOL_ERROR); 433 p = conn->buf + 4; 434 if (*p == '/') { 435 if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1')) 436 return (HTTP_PROTOCOL_ERROR); 437 p += 4; 438 } 439 if (*p != ' ' || 440 !isdigit((unsigned char)p[1]) || 441 !isdigit((unsigned char)p[2]) || 442 !isdigit((unsigned char)p[3])) 443 return (HTTP_PROTOCOL_ERROR); 444 445 conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0'); 446 return (conn->err); 447 } 448 449 /* 450 * Check a header; if the type matches the given string, return a pointer 451 * to the beginning of the value. 452 */ 453 static const char * 454 http_match(const char *str, const char *hdr) 455 { 456 while (*str && *hdr && 457 tolower((unsigned char)*str++) == tolower((unsigned char)*hdr++)) 458 /* nothing */; 459 if (*str || *hdr != ':') 460 return (NULL); 461 while (*hdr && isspace((unsigned char)*++hdr)) 462 /* nothing */; 463 return (hdr); 464 } 465 466 467 /* 468 * Get the next header and return the appropriate symbolic code. We 469 * need to read one line ahead for checking for a continuation line 470 * belonging to the current header (continuation lines start with 471 * white space). 472 * 473 * We get called with a fresh line already in the conn buffer, either 474 * from the previous http_next_header() invocation, or, the first 475 * time, from a fetch_getln() performed by our caller. 476 * 477 * This stops when we encounter an empty line (we dont read beyond the header 478 * area). 479 * 480 * Note that the "headerbuf" is just a place to return the result. Its 481 * contents are not used for the next call. This means that no cleanup 482 * is needed when ie doing another connection, just call the cleanup when 483 * fully done to deallocate memory. 484 */ 485 486 /* Limit the max number of continuation lines to some reasonable value */ 487 #define HTTP_MAX_CONT_LINES 10 488 489 /* Place into which to build a header from one or several lines */ 490 typedef struct { 491 char *buf; /* buffer */ 492 size_t bufsize; /* buffer size */ 493 size_t buflen; /* length of buffer contents */ 494 } http_headerbuf_t; 495 496 static void 497 init_http_headerbuf(http_headerbuf_t *buf) 498 { 499 buf->buf = NULL; 500 buf->bufsize = 0; 501 buf->buflen = 0; 502 } 503 504 static void 505 clean_http_headerbuf(http_headerbuf_t *buf) 506 { 507 if (buf->buf) 508 free(buf->buf); 509 init_http_headerbuf(buf); 510 } 511 512 /* Remove whitespace at the end of the buffer */ 513 static void 514 http_conn_trimright(conn_t *conn) 515 { 516 while (conn->buflen && 517 isspace((unsigned char)conn->buf[conn->buflen - 1])) 518 conn->buflen--; 519 conn->buf[conn->buflen] = '\0'; 520 } 521 522 static hdr_t 523 http_next_header(conn_t *conn, http_headerbuf_t *hbuf, const char **p) 524 { 525 unsigned int i, len; 526 527 /* 528 * Have to do the stripping here because of the first line. So 529 * it's done twice for the subsequent lines. No big deal 530 */ 531 http_conn_trimright(conn); 532 if (conn->buflen == 0) 533 return (hdr_end); 534 535 /* Copy the line to the headerbuf */ 536 if (hbuf->bufsize < conn->buflen + 1) { 537 if ((hbuf->buf = realloc(hbuf->buf, conn->buflen + 1)) == NULL) 538 return (hdr_syserror); 539 hbuf->bufsize = conn->buflen + 1; 540 } 541 strcpy(hbuf->buf, conn->buf); 542 hbuf->buflen = conn->buflen; 543 544 /* 545 * Fetch possible continuation lines. Stop at 1st non-continuation 546 * and leave it in the conn buffer 547 */ 548 for (i = 0; i < HTTP_MAX_CONT_LINES; i++) { 549 if (fetch_getln(conn) == -1) 550 return (hdr_syserror); 551 552 /* 553 * Note: we carry on the idea from the previous version 554 * that a pure whitespace line is equivalent to an empty 555 * one (so it's not continuation and will be handled when 556 * we are called next) 557 */ 558 http_conn_trimright(conn); 559 if (conn->buf[0] != ' ' && conn->buf[0] != "\t"[0]) 560 break; 561 562 /* Got a continuation line. Concatenate to previous */ 563 len = hbuf->buflen + conn->buflen; 564 if (hbuf->bufsize < len + 1) { 565 len *= 2; 566 if ((hbuf->buf = realloc(hbuf->buf, len + 1)) == NULL) 567 return (hdr_syserror); 568 hbuf->bufsize = len + 1; 569 } 570 strcpy(hbuf->buf + hbuf->buflen, conn->buf); 571 hbuf->buflen += conn->buflen; 572 } 573 574 /* 575 * We could check for malformed headers but we don't really care. 576 * A valid header starts with a token immediately followed by a 577 * colon; a token is any sequence of non-control, non-whitespace 578 * characters except "()<>@,;:\\\"{}". 579 */ 580 for (i = 0; hdr_names[i].num != hdr_unknown; i++) 581 if ((*p = http_match(hdr_names[i].name, hbuf->buf)) != NULL) 582 return (hdr_names[i].num); 583 584 return (hdr_unknown); 585 } 586 587 /************************** 588 * [Proxy-]Authenticate header parsing 589 */ 590 591 /* 592 * Read doublequote-delimited string into output buffer obuf (allocated 593 * by caller, whose responsibility it is to ensure that it's big enough) 594 * cp points to the first char after the initial '"' 595 * Handles \ quoting 596 * Returns pointer to the first char after the terminating double quote, or 597 * NULL for error. 598 */ 599 static const char * 600 http_parse_headerstring(const char *cp, char *obuf) 601 { 602 for (;;) { 603 switch (*cp) { 604 case 0: /* Unterminated string */ 605 *obuf = 0; 606 return (NULL); 607 case '"': /* Ending quote */ 608 *obuf = 0; 609 return (++cp); 610 case '\\': 611 if (*++cp == 0) { 612 *obuf = 0; 613 return (NULL); 614 } 615 /* FALLTHROUGH */ 616 default: 617 *obuf++ = *cp++; 618 } 619 } 620 } 621 622 /* Http auth challenge schemes */ 623 typedef enum {HTTPAS_UNKNOWN, HTTPAS_BASIC,HTTPAS_DIGEST} http_auth_schemes_t; 624 625 /* Data holder for a Basic or Digest challenge. */ 626 typedef struct { 627 http_auth_schemes_t scheme; 628 char *realm; 629 char *qop; 630 char *nonce; 631 char *opaque; 632 char *algo; 633 int stale; 634 int nc; /* Nonce count */ 635 } http_auth_challenge_t; 636 637 static void 638 init_http_auth_challenge(http_auth_challenge_t *b) 639 { 640 b->scheme = HTTPAS_UNKNOWN; 641 b->realm = b->qop = b->nonce = b->opaque = b->algo = NULL; 642 b->stale = b->nc = 0; 643 } 644 645 static void 646 clean_http_auth_challenge(http_auth_challenge_t *b) 647 { 648 if (b->realm) 649 free(b->realm); 650 if (b->qop) 651 free(b->qop); 652 if (b->nonce) 653 free(b->nonce); 654 if (b->opaque) 655 free(b->opaque); 656 if (b->algo) 657 free(b->algo); 658 init_http_auth_challenge(b); 659 } 660 661 /* Data holder for an array of challenges offered in an http response. */ 662 #define MAX_CHALLENGES 10 663 typedef struct { 664 http_auth_challenge_t *challenges[MAX_CHALLENGES]; 665 int count; /* Number of parsed challenges in the array */ 666 int valid; /* We did parse an authenticate header */ 667 } http_auth_challenges_t; 668 669 static void 670 init_http_auth_challenges(http_auth_challenges_t *cs) 671 { 672 int i; 673 for (i = 0; i < MAX_CHALLENGES; i++) 674 cs->challenges[i] = NULL; 675 cs->count = cs->valid = 0; 676 } 677 678 static void 679 clean_http_auth_challenges(http_auth_challenges_t *cs) 680 { 681 int i; 682 /* We rely on non-zero pointers being allocated, not on the count */ 683 for (i = 0; i < MAX_CHALLENGES; i++) { 684 if (cs->challenges[i] != NULL) { 685 clean_http_auth_challenge(cs->challenges[i]); 686 free(cs->challenges[i]); 687 } 688 } 689 init_http_auth_challenges(cs); 690 } 691 692 /* 693 * Enumeration for lexical elements. Separators will be returned as their own 694 * ascii value 695 */ 696 typedef enum {HTTPHL_WORD=256, HTTPHL_STRING=257, HTTPHL_END=258, 697 HTTPHL_ERROR = 259} http_header_lex_t; 698 699 /* 700 * Determine what kind of token comes next and return possible value 701 * in buf, which is supposed to have been allocated big enough by 702 * caller. Advance input pointer and return element type. 703 */ 704 static int 705 http_header_lex(const char **cpp, char *buf) 706 { 707 size_t l; 708 /* Eat initial whitespace */ 709 *cpp += strspn(*cpp, " \t"); 710 if (**cpp == 0) 711 return (HTTPHL_END); 712 713 /* Separator ? */ 714 if (**cpp == ',' || **cpp == '=') 715 return (*((*cpp)++)); 716 717 /* String ? */ 718 if (**cpp == '"') { 719 *cpp = http_parse_headerstring(++*cpp, buf); 720 if (*cpp == NULL) 721 return (HTTPHL_ERROR); 722 return (HTTPHL_STRING); 723 } 724 725 /* Read other token, until separator or whitespace */ 726 l = strcspn(*cpp, " \t,="); 727 memcpy(buf, *cpp, l); 728 buf[l] = 0; 729 *cpp += l; 730 return (HTTPHL_WORD); 731 } 732 733 /* 734 * Read challenges from http xxx-authenticate header and accumulate them 735 * in the challenges list structure. 736 * 737 * Headers with multiple challenges are specified by rfc2617, but 738 * servers (ie: squid) often send them in separate headers instead, 739 * which in turn is forbidden by the http spec (multiple headers with 740 * the same name are only allowed for pure comma-separated lists, see 741 * rfc2616 sec 4.2). 742 * 743 * We support both approaches anyway 744 */ 745 static int 746 http_parse_authenticate(const char *cp, http_auth_challenges_t *cs) 747 { 748 int ret = -1; 749 http_header_lex_t lex; 750 char *key = malloc(strlen(cp) + 1); 751 char *value = malloc(strlen(cp) + 1); 752 char *buf = malloc(strlen(cp) + 1); 753 754 if (key == NULL || value == NULL || buf == NULL) { 755 fetch_syserr(); 756 goto out; 757 } 758 759 /* In any case we've seen the header and we set the valid bit */ 760 cs->valid = 1; 761 762 /* Need word first */ 763 lex = http_header_lex(&cp, key); 764 if (lex != HTTPHL_WORD) 765 goto out; 766 767 /* Loop on challenges */ 768 for (; cs->count < MAX_CHALLENGES; cs->count++) { 769 cs->challenges[cs->count] = 770 malloc(sizeof(http_auth_challenge_t)); 771 if (cs->challenges[cs->count] == NULL) { 772 fetch_syserr(); 773 goto out; 774 } 775 init_http_auth_challenge(cs->challenges[cs->count]); 776 if (!strcasecmp(key, "basic")) { 777 cs->challenges[cs->count]->scheme = HTTPAS_BASIC; 778 } else if (!strcasecmp(key, "digest")) { 779 cs->challenges[cs->count]->scheme = HTTPAS_DIGEST; 780 } else { 781 cs->challenges[cs->count]->scheme = HTTPAS_UNKNOWN; 782 /* 783 * Continue parsing as basic or digest may 784 * follow, and the syntax is the same for 785 * all. We'll just ignore this one when 786 * looking at the list 787 */ 788 } 789 790 /* Loop on attributes */ 791 for (;;) { 792 /* Key */ 793 lex = http_header_lex(&cp, key); 794 if (lex != HTTPHL_WORD) 795 goto out; 796 797 /* Equal sign */ 798 lex = http_header_lex(&cp, buf); 799 if (lex != '=') 800 goto out; 801 802 /* Value */ 803 lex = http_header_lex(&cp, value); 804 if (lex != HTTPHL_WORD && lex != HTTPHL_STRING) 805 goto out; 806 807 if (!strcasecmp(key, "realm")) 808 cs->challenges[cs->count]->realm = 809 strdup(value); 810 else if (!strcasecmp(key, "qop")) 811 cs->challenges[cs->count]->qop = 812 strdup(value); 813 else if (!strcasecmp(key, "nonce")) 814 cs->challenges[cs->count]->nonce = 815 strdup(value); 816 else if (!strcasecmp(key, "opaque")) 817 cs->challenges[cs->count]->opaque = 818 strdup(value); 819 else if (!strcasecmp(key, "algorithm")) 820 cs->challenges[cs->count]->algo = 821 strdup(value); 822 else if (!strcasecmp(key, "stale")) 823 cs->challenges[cs->count]->stale = 824 strcasecmp(value, "no"); 825 /* Else ignore unknown attributes */ 826 827 /* Comma or Next challenge or End */ 828 lex = http_header_lex(&cp, key); 829 /* 830 * If we get a word here, this is the beginning of the 831 * next challenge. Break the attributes loop 832 */ 833 if (lex == HTTPHL_WORD) 834 break; 835 836 if (lex == HTTPHL_END) { 837 /* End while looking for ',' is normal exit */ 838 cs->count++; 839 ret = 0; 840 goto out; 841 } 842 /* Anything else is an error */ 843 if (lex != ',') 844 goto out; 845 846 } /* End attributes loop */ 847 } /* End challenge loop */ 848 849 /* 850 * Challenges max count exceeded. This really can't happen 851 * with normal data, something's fishy -> error 852 */ 853 854 out: 855 if (key) 856 free(key); 857 if (value) 858 free(value); 859 if (buf) 860 free(buf); 861 return (ret); 862 } 863 864 865 /* 866 * Parse a last-modified header 867 */ 868 static int 869 http_parse_mtime(const char *p, time_t *mtime) 870 { 871 char locale[64], *r; 872 struct tm tm; 873 874 strncpy(locale, setlocale(LC_TIME, NULL), sizeof(locale)); 875 setlocale(LC_TIME, "C"); 876 r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm); 877 /* XXX should add support for date-2 and date-3 */ 878 setlocale(LC_TIME, locale); 879 if (r == NULL) 880 return (-1); 881 DEBUG(fprintf(stderr, "last modified: [%04d-%02d-%02d " 882 "%02d:%02d:%02d]\n", 883 tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, 884 tm.tm_hour, tm.tm_min, tm.tm_sec)); 885 *mtime = timegm(&tm); 886 return (0); 887 } 888 889 /* 890 * Parse a content-length header 891 */ 892 static int 893 http_parse_length(const char *p, off_t *length) 894 { 895 off_t len; 896 897 for (len = 0; *p && isdigit((unsigned char)*p); ++p) 898 len = len * 10 + (*p - '0'); 899 if (*p) 900 return (-1); 901 DEBUG(fprintf(stderr, "content length: [%lld]\n", 902 (long long)len)); 903 *length = len; 904 return (0); 905 } 906 907 /* 908 * Parse a content-range header 909 */ 910 static int 911 http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size) 912 { 913 off_t first, last, len; 914 915 if (strncasecmp(p, "bytes ", 6) != 0) 916 return (-1); 917 p += 6; 918 if (*p == '*') { 919 first = last = -1; 920 ++p; 921 } else { 922 for (first = 0; *p && isdigit((unsigned char)*p); ++p) 923 first = first * 10 + *p - '0'; 924 if (*p != '-') 925 return (-1); 926 for (last = 0, ++p; *p && isdigit((unsigned char)*p); ++p) 927 last = last * 10 + *p - '0'; 928 } 929 if (first > last || *p != '/') 930 return (-1); 931 for (len = 0, ++p; *p && isdigit((unsigned char)*p); ++p) 932 len = len * 10 + *p - '0'; 933 if (*p || len < last - first + 1) 934 return (-1); 935 if (first == -1) { 936 DEBUG(fprintf(stderr, "content range: [*/%lld]\n", 937 (long long)len)); 938 *length = 0; 939 } else { 940 DEBUG(fprintf(stderr, "content range: [%lld-%lld/%lld]\n", 941 (long long)first, (long long)last, (long long)len)); 942 *length = last - first + 1; 943 } 944 *offset = first; 945 *size = len; 946 return (0); 947 } 948 949 950 /***************************************************************************** 951 * Helper functions for authorization 952 */ 953 954 /* 955 * Base64 encoding 956 */ 957 static char * 958 http_base64(const char *src) 959 { 960 static const char base64[] = 961 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 962 "abcdefghijklmnopqrstuvwxyz" 963 "0123456789+/"; 964 char *str, *dst; 965 size_t l; 966 int t, r; 967 968 l = strlen(src); 969 if ((str = malloc(((l + 2) / 3) * 4 + 1)) == NULL) 970 return (NULL); 971 dst = str; 972 r = 0; 973 974 while (l >= 3) { 975 t = (src[0] << 16) | (src[1] << 8) | src[2]; 976 dst[0] = base64[(t >> 18) & 0x3f]; 977 dst[1] = base64[(t >> 12) & 0x3f]; 978 dst[2] = base64[(t >> 6) & 0x3f]; 979 dst[3] = base64[(t >> 0) & 0x3f]; 980 src += 3; l -= 3; 981 dst += 4; r += 4; 982 } 983 984 switch (l) { 985 case 2: 986 t = (src[0] << 16) | (src[1] << 8); 987 dst[0] = base64[(t >> 18) & 0x3f]; 988 dst[1] = base64[(t >> 12) & 0x3f]; 989 dst[2] = base64[(t >> 6) & 0x3f]; 990 dst[3] = '='; 991 dst += 4; 992 r += 4; 993 break; 994 case 1: 995 t = src[0] << 16; 996 dst[0] = base64[(t >> 18) & 0x3f]; 997 dst[1] = base64[(t >> 12) & 0x3f]; 998 dst[2] = dst[3] = '='; 999 dst += 4; 1000 r += 4; 1001 break; 1002 case 0: 1003 break; 1004 } 1005 1006 *dst = 0; 1007 return (str); 1008 } 1009 1010 1011 /* 1012 * Extract authorization parameters from environment value. 1013 * The value is like scheme:realm:user:pass 1014 */ 1015 typedef struct { 1016 char *scheme; 1017 char *realm; 1018 char *user; 1019 char *password; 1020 } http_auth_params_t; 1021 1022 static void 1023 init_http_auth_params(http_auth_params_t *s) 1024 { 1025 s->scheme = s->realm = s->user = s->password = 0; 1026 } 1027 1028 static void 1029 clean_http_auth_params(http_auth_params_t *s) 1030 { 1031 if (s->scheme) 1032 free(s->scheme); 1033 if (s->realm) 1034 free(s->realm); 1035 if (s->user) 1036 free(s->user); 1037 if (s->password) 1038 free(s->password); 1039 init_http_auth_params(s); 1040 } 1041 1042 static int 1043 http_authfromenv(const char *p, http_auth_params_t *parms) 1044 { 1045 int ret = -1; 1046 char *v, *ve; 1047 char *str = strdup(p); 1048 1049 if (str == NULL) { 1050 fetch_syserr(); 1051 return (-1); 1052 } 1053 v = str; 1054 1055 if ((ve = strchr(v, ':')) == NULL) 1056 goto out; 1057 1058 *ve = 0; 1059 if ((parms->scheme = strdup(v)) == NULL) { 1060 fetch_syserr(); 1061 goto out; 1062 } 1063 v = ve + 1; 1064 1065 if ((ve = strchr(v, ':')) == NULL) 1066 goto out; 1067 1068 *ve = 0; 1069 if ((parms->realm = strdup(v)) == NULL) { 1070 fetch_syserr(); 1071 goto out; 1072 } 1073 v = ve + 1; 1074 1075 if ((ve = strchr(v, ':')) == NULL) 1076 goto out; 1077 1078 *ve = 0; 1079 if ((parms->user = strdup(v)) == NULL) { 1080 fetch_syserr(); 1081 goto out; 1082 } 1083 v = ve + 1; 1084 1085 1086 if ((parms->password = strdup(v)) == NULL) { 1087 fetch_syserr(); 1088 goto out; 1089 } 1090 ret = 0; 1091 out: 1092 if (ret == -1) 1093 clean_http_auth_params(parms); 1094 if (str) 1095 free(str); 1096 return (ret); 1097 } 1098 1099 1100 /* 1101 * Digest response: the code to compute the digest is taken from the 1102 * sample implementation in RFC2616 1103 */ 1104 #define IN const 1105 #define OUT 1106 1107 #define HASHLEN 16 1108 typedef char HASH[HASHLEN]; 1109 #define HASHHEXLEN 32 1110 typedef char HASHHEX[HASHHEXLEN+1]; 1111 1112 static const char *hexchars = "0123456789abcdef"; 1113 static void 1114 CvtHex(IN HASH Bin, OUT HASHHEX Hex) 1115 { 1116 unsigned short i; 1117 unsigned char j; 1118 1119 for (i = 0; i < HASHLEN; i++) { 1120 j = (Bin[i] >> 4) & 0xf; 1121 Hex[i*2] = hexchars[j]; 1122 j = Bin[i] & 0xf; 1123 Hex[i*2+1] = hexchars[j]; 1124 }; 1125 Hex[HASHHEXLEN] = '\0'; 1126 }; 1127 1128 /* calculate H(A1) as per spec */ 1129 static void 1130 DigestCalcHA1( 1131 IN char * pszAlg, 1132 IN char * pszUserName, 1133 IN char * pszRealm, 1134 IN char * pszPassword, 1135 IN char * pszNonce, 1136 IN char * pszCNonce, 1137 OUT HASHHEX SessionKey 1138 ) 1139 { 1140 MD5_CTX Md5Ctx; 1141 HASH HA1; 1142 1143 MD5Init(&Md5Ctx); 1144 MD5Update(&Md5Ctx, pszUserName, strlen(pszUserName)); 1145 MD5Update(&Md5Ctx, ":", 1); 1146 MD5Update(&Md5Ctx, pszRealm, strlen(pszRealm)); 1147 MD5Update(&Md5Ctx, ":", 1); 1148 MD5Update(&Md5Ctx, pszPassword, strlen(pszPassword)); 1149 MD5Final(HA1, &Md5Ctx); 1150 if (strcasecmp(pszAlg, "md5-sess") == 0) { 1151 1152 MD5Init(&Md5Ctx); 1153 MD5Update(&Md5Ctx, HA1, HASHLEN); 1154 MD5Update(&Md5Ctx, ":", 1); 1155 MD5Update(&Md5Ctx, pszNonce, strlen(pszNonce)); 1156 MD5Update(&Md5Ctx, ":", 1); 1157 MD5Update(&Md5Ctx, pszCNonce, strlen(pszCNonce)); 1158 MD5Final(HA1, &Md5Ctx); 1159 }; 1160 CvtHex(HA1, SessionKey); 1161 } 1162 1163 /* calculate request-digest/response-digest as per HTTP Digest spec */ 1164 static void 1165 DigestCalcResponse( 1166 IN HASHHEX HA1, /* H(A1) */ 1167 IN char * pszNonce, /* nonce from server */ 1168 IN char * pszNonceCount, /* 8 hex digits */ 1169 IN char * pszCNonce, /* client nonce */ 1170 IN char * pszQop, /* qop-value: "", "auth", "auth-int" */ 1171 IN char * pszMethod, /* method from the request */ 1172 IN char * pszDigestUri, /* requested URL */ 1173 IN HASHHEX HEntity, /* H(entity body) if qop="auth-int" */ 1174 OUT HASHHEX Response /* request-digest or response-digest */ 1175 ) 1176 { 1177 /* DEBUG(fprintf(stderr, 1178 "Calc: HA1[%s] Nonce[%s] qop[%s] method[%s] URI[%s]\n", 1179 HA1, pszNonce, pszQop, pszMethod, pszDigestUri));*/ 1180 MD5_CTX Md5Ctx; 1181 HASH HA2; 1182 HASH RespHash; 1183 HASHHEX HA2Hex; 1184 1185 // calculate H(A2) 1186 MD5Init(&Md5Ctx); 1187 MD5Update(&Md5Ctx, pszMethod, strlen(pszMethod)); 1188 MD5Update(&Md5Ctx, ":", 1); 1189 MD5Update(&Md5Ctx, pszDigestUri, strlen(pszDigestUri)); 1190 if (strcasecmp(pszQop, "auth-int") == 0) { 1191 MD5Update(&Md5Ctx, ":", 1); 1192 MD5Update(&Md5Ctx, HEntity, HASHHEXLEN); 1193 }; 1194 MD5Final(HA2, &Md5Ctx); 1195 CvtHex(HA2, HA2Hex); 1196 1197 // calculate response 1198 MD5Init(&Md5Ctx); 1199 MD5Update(&Md5Ctx, HA1, HASHHEXLEN); 1200 MD5Update(&Md5Ctx, ":", 1); 1201 MD5Update(&Md5Ctx, pszNonce, strlen(pszNonce)); 1202 MD5Update(&Md5Ctx, ":", 1); 1203 if (*pszQop) { 1204 MD5Update(&Md5Ctx, pszNonceCount, strlen(pszNonceCount)); 1205 MD5Update(&Md5Ctx, ":", 1); 1206 MD5Update(&Md5Ctx, pszCNonce, strlen(pszCNonce)); 1207 MD5Update(&Md5Ctx, ":", 1); 1208 MD5Update(&Md5Ctx, pszQop, strlen(pszQop)); 1209 MD5Update(&Md5Ctx, ":", 1); 1210 }; 1211 MD5Update(&Md5Ctx, HA2Hex, HASHHEXLEN); 1212 MD5Final(RespHash, &Md5Ctx); 1213 CvtHex(RespHash, Response); 1214 } 1215 1216 /* 1217 * Generate/Send a Digest authorization header 1218 * This looks like: [Proxy-]Authorization: credentials 1219 * 1220 * credentials = "Digest" digest-response 1221 * digest-response = 1#( username | realm | nonce | digest-uri 1222 * | response | [ algorithm ] | [cnonce] | 1223 * [opaque] | [message-qop] | 1224 * [nonce-count] | [auth-param] ) 1225 * username = "username" "=" username-value 1226 * username-value = quoted-string 1227 * digest-uri = "uri" "=" digest-uri-value 1228 * digest-uri-value = request-uri ; As specified by HTTP/1.1 1229 * message-qop = "qop" "=" qop-value 1230 * cnonce = "cnonce" "=" cnonce-value 1231 * cnonce-value = nonce-value 1232 * nonce-count = "nc" "=" nc-value 1233 * nc-value = 8LHEX 1234 * response = "response" "=" request-digest 1235 * request-digest = <"> 32LHEX <"> 1236 */ 1237 static int 1238 http_digest_auth(conn_t *conn, const char *hdr, http_auth_challenge_t *c, 1239 http_auth_params_t *parms, struct url *url) 1240 { 1241 int r; 1242 char noncecount[10]; 1243 char cnonce[40]; 1244 char *options = 0; 1245 1246 if (!c->realm || !c->nonce) { 1247 DEBUG(fprintf(stderr, "realm/nonce not set in challenge\n")); 1248 return(-1); 1249 } 1250 if (!c->algo) 1251 c->algo = strdup(""); 1252 1253 if (asprintf(&options, "%s%s%s%s", 1254 *c->algo? ",algorithm=" : "", c->algo, 1255 c->opaque? ",opaque=" : "", c->opaque?c->opaque:"")== -1) 1256 return (-1); 1257 1258 if (!c->qop) { 1259 c->qop = strdup(""); 1260 *noncecount = 0; 1261 *cnonce = 0; 1262 } else { 1263 c->nc++; 1264 sprintf(noncecount, "%08x", c->nc); 1265 /* We don't try very hard with the cnonce ... */ 1266 sprintf(cnonce, "%x%lx", getpid(), (unsigned long)time(0)); 1267 } 1268 1269 HASHHEX HA1; 1270 DigestCalcHA1(c->algo, parms->user, c->realm, 1271 parms->password, c->nonce, cnonce, HA1); 1272 DEBUG(fprintf(stderr, "HA1: [%s]\n", HA1)); 1273 HASHHEX digest; 1274 DigestCalcResponse(HA1, c->nonce, noncecount, cnonce, c->qop, 1275 "GET", url->doc, "", digest); 1276 1277 if (c->qop[0]) { 1278 r = http_cmd(conn, "%s: Digest username=\"%s\",realm=\"%s\"," 1279 "nonce=\"%s\",uri=\"%s\",response=\"%s\"," 1280 "qop=\"auth\", cnonce=\"%s\", nc=%s%s", 1281 hdr, parms->user, c->realm, 1282 c->nonce, url->doc, digest, 1283 cnonce, noncecount, options); 1284 } else { 1285 r = http_cmd(conn, "%s: Digest username=\"%s\",realm=\"%s\"," 1286 "nonce=\"%s\",uri=\"%s\",response=\"%s\"%s", 1287 hdr, parms->user, c->realm, 1288 c->nonce, url->doc, digest, options); 1289 } 1290 if (options) 1291 free(options); 1292 return (r); 1293 } 1294 1295 /* 1296 * Encode username and password 1297 */ 1298 static int 1299 http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd) 1300 { 1301 char *upw, *auth; 1302 int r; 1303 1304 DEBUG(fprintf(stderr, "basic: usr: [%s]\n", usr)); 1305 DEBUG(fprintf(stderr, "basic: pwd: [%s]\n", pwd)); 1306 if (asprintf(&upw, "%s:%s", usr, pwd) == -1) 1307 return (-1); 1308 auth = http_base64(upw); 1309 free(upw); 1310 if (auth == NULL) 1311 return (-1); 1312 r = http_cmd(conn, "%s: Basic %s", hdr, auth); 1313 free(auth); 1314 return (r); 1315 } 1316 1317 /* 1318 * Chose the challenge to answer and call the appropriate routine to 1319 * produce the header. 1320 */ 1321 static int 1322 http_authorize(conn_t *conn, const char *hdr, http_auth_challenges_t *cs, 1323 http_auth_params_t *parms, struct url *url) 1324 { 1325 http_auth_challenge_t *basic = NULL; 1326 http_auth_challenge_t *digest = NULL; 1327 int i; 1328 1329 /* If user or pass are null we're not happy */ 1330 if (!parms->user || !parms->password) { 1331 DEBUG(fprintf(stderr, "NULL usr or pass\n")); 1332 return (-1); 1333 } 1334 1335 /* Look for a Digest and a Basic challenge */ 1336 for (i = 0; i < cs->count; i++) { 1337 if (cs->challenges[i]->scheme == HTTPAS_BASIC) 1338 basic = cs->challenges[i]; 1339 if (cs->challenges[i]->scheme == HTTPAS_DIGEST) 1340 digest = cs->challenges[i]; 1341 } 1342 1343 /* Error if "Digest" was specified and there is no Digest challenge */ 1344 if (!digest && (parms->scheme && 1345 !strcasecmp(parms->scheme, "digest"))) { 1346 DEBUG(fprintf(stderr, 1347 "Digest auth in env, not supported by peer\n")); 1348 return (-1); 1349 } 1350 /* 1351 * If "basic" was specified in the environment, or there is no Digest 1352 * challenge, do the basic thing. Don't need a challenge for this, 1353 * so no need to check basic!=NULL 1354 */ 1355 if (!digest || (parms->scheme && !strcasecmp(parms->scheme,"basic"))) 1356 return (http_basic_auth(conn,hdr,parms->user,parms->password)); 1357 1358 /* Else, prefer digest. We just checked that it's not NULL */ 1359 return (http_digest_auth(conn, hdr, digest, parms, url)); 1360 } 1361 1362 /***************************************************************************** 1363 * Helper functions for connecting to a server or proxy 1364 */ 1365 1366 /* 1367 * Connect to the correct HTTP server or proxy. 1368 */ 1369 static conn_t * 1370 http_connect(struct url *URL, struct url *purl, const char *flags) 1371 { 1372 struct url *curl; 1373 conn_t *conn; 1374 int verbose; 1375 int af, val; 1376 1377 #ifdef INET6 1378 af = AF_UNSPEC; 1379 #else 1380 af = AF_INET; 1381 #endif 1382 1383 verbose = CHECK_FLAG('v'); 1384 if (CHECK_FLAG('4')) 1385 af = AF_INET; 1386 #ifdef INET6 1387 else if (CHECK_FLAG('6')) 1388 af = AF_INET6; 1389 #endif 1390 1391 curl = (purl != NULL) ? purl : URL; 1392 1393 if ((conn = fetch_connect(curl->host, curl->port, af, verbose)) == NULL) 1394 /* fetch_connect() has already set an error code */ 1395 return (NULL); 1396 if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 && purl) { 1397 http_cmd(conn, "CONNECT %s:%d HTTP/1.1", 1398 URL->host, URL->port); 1399 http_cmd(conn, "Host: %s:%d", 1400 URL->host, URL->port); 1401 http_cmd(conn, ""); 1402 if (http_get_reply(conn) != HTTP_OK) { 1403 fetch_close(conn); 1404 return (NULL); 1405 } 1406 http_get_reply(conn); 1407 } 1408 if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 && 1409 fetch_ssl(conn, URL, verbose) == -1) { 1410 fetch_close(conn); 1411 /* grrr */ 1412 errno = EAUTH; 1413 fetch_syserr(); 1414 return (NULL); 1415 } 1416 1417 val = 1; 1418 setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, sizeof(val)); 1419 1420 return (conn); 1421 } 1422 1423 static struct url * 1424 http_get_proxy(struct url * url, const char *flags) 1425 { 1426 struct url *purl; 1427 char *p; 1428 1429 if (flags != NULL && strchr(flags, 'd') != NULL) 1430 return (NULL); 1431 if (fetch_no_proxy_match(url->host)) 1432 return (NULL); 1433 if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) && 1434 *p && (purl = fetchParseURL(p))) { 1435 if (!*purl->scheme) 1436 strcpy(purl->scheme, SCHEME_HTTP); 1437 if (!purl->port) 1438 purl->port = fetch_default_proxy_port(purl->scheme); 1439 if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0) 1440 return (purl); 1441 fetchFreeURL(purl); 1442 } 1443 return (NULL); 1444 } 1445 1446 static void 1447 http_print_html(FILE *out, FILE *in) 1448 { 1449 size_t len; 1450 char *line, *p, *q; 1451 int comment, tag; 1452 1453 comment = tag = 0; 1454 while ((line = fgetln(in, &len)) != NULL) { 1455 while (len && isspace((unsigned char)line[len - 1])) 1456 --len; 1457 for (p = q = line; q < line + len; ++q) { 1458 if (comment && *q == '-') { 1459 if (q + 2 < line + len && 1460 strcmp(q, "-->") == 0) { 1461 tag = comment = 0; 1462 q += 2; 1463 } 1464 } else if (tag && !comment && *q == '>') { 1465 p = q + 1; 1466 tag = 0; 1467 } else if (!tag && *q == '<') { 1468 if (q > p) 1469 fwrite(p, q - p, 1, out); 1470 tag = 1; 1471 if (q + 3 < line + len && 1472 strcmp(q, "<!--") == 0) { 1473 comment = 1; 1474 q += 3; 1475 } 1476 } 1477 } 1478 if (!tag && q > p) 1479 fwrite(p, q - p, 1, out); 1480 fputc('\n', out); 1481 } 1482 } 1483 1484 1485 /***************************************************************************** 1486 * Core 1487 */ 1488 1489 /* 1490 * Send a request and process the reply 1491 * 1492 * XXX This function is way too long, the do..while loop should be split 1493 * XXX off into a separate function. 1494 */ 1495 FILE * 1496 http_request(struct url *URL, const char *op, struct url_stat *us, 1497 struct url *purl, const char *flags) 1498 { 1499 char timebuf[80]; 1500 char hbuf[MAXHOSTNAMELEN + 7], *host; 1501 conn_t *conn; 1502 struct url *url, *new; 1503 int chunked, direct, ims, noredirect, verbose; 1504 int e, i, n, val; 1505 off_t offset, clength, length, size; 1506 time_t mtime; 1507 const char *p; 1508 FILE *f; 1509 hdr_t h; 1510 struct tm *timestruct; 1511 http_headerbuf_t headerbuf; 1512 http_auth_challenges_t server_challenges; 1513 http_auth_challenges_t proxy_challenges; 1514 1515 /* The following calls don't allocate anything */ 1516 init_http_headerbuf(&headerbuf); 1517 init_http_auth_challenges(&server_challenges); 1518 init_http_auth_challenges(&proxy_challenges); 1519 1520 direct = CHECK_FLAG('d'); 1521 noredirect = CHECK_FLAG('A'); 1522 verbose = CHECK_FLAG('v'); 1523 ims = CHECK_FLAG('i'); 1524 1525 if (direct && purl) { 1526 fetchFreeURL(purl); 1527 purl = NULL; 1528 } 1529 1530 /* try the provided URL first */ 1531 url = URL; 1532 1533 n = MAX_REDIRECT; 1534 i = 0; 1535 1536 e = HTTP_PROTOCOL_ERROR; 1537 do { 1538 new = NULL; 1539 chunked = 0; 1540 offset = 0; 1541 clength = -1; 1542 length = -1; 1543 size = -1; 1544 mtime = 0; 1545 1546 /* check port */ 1547 if (!url->port) 1548 url->port = fetch_default_port(url->scheme); 1549 1550 /* were we redirected to an FTP URL? */ 1551 if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) { 1552 if (strcmp(op, "GET") == 0) 1553 return (ftp_request(url, "RETR", us, purl, flags)); 1554 else if (strcmp(op, "HEAD") == 0) 1555 return (ftp_request(url, "STAT", us, purl, flags)); 1556 } 1557 1558 /* connect to server or proxy */ 1559 if ((conn = http_connect(url, purl, flags)) == NULL) 1560 goto ouch; 1561 1562 host = url->host; 1563 #ifdef INET6 1564 if (strchr(url->host, ':')) { 1565 snprintf(hbuf, sizeof(hbuf), "[%s]", url->host); 1566 host = hbuf; 1567 } 1568 #endif 1569 if (url->port != fetch_default_port(url->scheme)) { 1570 if (host != hbuf) { 1571 strcpy(hbuf, host); 1572 host = hbuf; 1573 } 1574 snprintf(hbuf + strlen(hbuf), 1575 sizeof(hbuf) - strlen(hbuf), ":%d", url->port); 1576 } 1577 1578 /* send request */ 1579 if (verbose) 1580 fetch_info("requesting %s://%s%s", 1581 url->scheme, host, url->doc); 1582 if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) { 1583 http_cmd(conn, "%s %s://%s%s HTTP/1.1", 1584 op, url->scheme, host, url->doc); 1585 } else { 1586 http_cmd(conn, "%s %s HTTP/1.1", 1587 op, url->doc); 1588 } 1589 1590 if (ims && url->ims_time) { 1591 timestruct = gmtime((time_t *)&url->ims_time); 1592 (void)strftime(timebuf, 80, "%a, %d %b %Y %T GMT", 1593 timestruct); 1594 if (verbose) 1595 fetch_info("If-Modified-Since: %s", timebuf); 1596 http_cmd(conn, "If-Modified-Since: %s", timebuf); 1597 } 1598 /* virtual host */ 1599 http_cmd(conn, "Host: %s", host); 1600 1601 /* 1602 * Proxy authorization: we only send auth after we received 1603 * a 407 error. We do not first try basic anyway (changed 1604 * when support was added for digest-auth) 1605 */ 1606 if (purl && proxy_challenges.valid) { 1607 http_auth_params_t aparams; 1608 init_http_auth_params(&aparams); 1609 if (*purl->user || *purl->pwd) { 1610 aparams.user = purl->user ? 1611 strdup(purl->user) : strdup(""); 1612 aparams.password = purl->pwd? 1613 strdup(purl->pwd) : strdup(""); 1614 } else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && 1615 *p != '\0') { 1616 if (http_authfromenv(p, &aparams) < 0) { 1617 http_seterr(HTTP_NEED_PROXY_AUTH); 1618 goto ouch; 1619 } 1620 } 1621 http_authorize(conn, "Proxy-Authorization", 1622 &proxy_challenges, &aparams, url); 1623 clean_http_auth_params(&aparams); 1624 } 1625 1626 /* 1627 * Server authorization: we never send "a priori" 1628 * Basic auth, which used to be done if user/pass were 1629 * set in the url. This would be weird because we'd send the 1630 * password in the clear even if Digest is finally to be 1631 * used (it would have made more sense for the 1632 * pre-digest version to do this when Basic was specified 1633 * in the environment) 1634 */ 1635 if (server_challenges.valid) { 1636 http_auth_params_t aparams; 1637 init_http_auth_params(&aparams); 1638 if (*url->user || *url->pwd) { 1639 aparams.user = url->user ? 1640 strdup(url->user) : strdup(""); 1641 aparams.password = url->pwd ? 1642 strdup(url->pwd) : strdup(""); 1643 } else if ((p = getenv("HTTP_AUTH")) != NULL && 1644 *p != '\0') { 1645 if (http_authfromenv(p, &aparams) < 0) { 1646 http_seterr(HTTP_NEED_AUTH); 1647 goto ouch; 1648 } 1649 } else if (fetchAuthMethod && 1650 fetchAuthMethod(url) == 0) { 1651 aparams.user = url->user ? 1652 strdup(url->user) : strdup(""); 1653 aparams.password = url->pwd ? 1654 strdup(url->pwd) : strdup(""); 1655 } else { 1656 http_seterr(HTTP_NEED_AUTH); 1657 goto ouch; 1658 } 1659 http_authorize(conn, "Authorization", 1660 &server_challenges, &aparams, url); 1661 clean_http_auth_params(&aparams); 1662 } 1663 1664 /* other headers */ 1665 if ((p = getenv("HTTP_ACCEPT")) != NULL) { 1666 if (*p != '\0') 1667 http_cmd(conn, "Accept: %s", p); 1668 } else { 1669 http_cmd(conn, "Accept: */*"); 1670 } 1671 if ((p = getenv("HTTP_REFERER")) != NULL && *p != '\0') { 1672 if (strcasecmp(p, "auto") == 0) 1673 http_cmd(conn, "Referer: %s://%s%s", 1674 url->scheme, host, url->doc); 1675 else 1676 http_cmd(conn, "Referer: %s", p); 1677 } 1678 if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0') 1679 http_cmd(conn, "User-Agent: %s", p); 1680 else 1681 http_cmd(conn, "User-Agent: %s " _LIBFETCH_VER, getprogname()); 1682 if (url->offset > 0) 1683 http_cmd(conn, "Range: bytes=%lld-", (long long)url->offset); 1684 http_cmd(conn, "Connection: close"); 1685 http_cmd(conn, ""); 1686 1687 /* 1688 * Force the queued request to be dispatched. Normally, one 1689 * would do this with shutdown(2) but squid proxies can be 1690 * configured to disallow such half-closed connections. To 1691 * be compatible with such configurations, fiddle with socket 1692 * options to force the pending data to be written. 1693 */ 1694 val = 0; 1695 setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, 1696 sizeof(val)); 1697 val = 1; 1698 setsockopt(conn->sd, IPPROTO_TCP, TCP_NODELAY, &val, 1699 sizeof(val)); 1700 1701 /* get reply */ 1702 switch (http_get_reply(conn)) { 1703 case HTTP_OK: 1704 case HTTP_PARTIAL: 1705 case HTTP_NOT_MODIFIED: 1706 /* fine */ 1707 break; 1708 case HTTP_MOVED_PERM: 1709 case HTTP_MOVED_TEMP: 1710 case HTTP_SEE_OTHER: 1711 case HTTP_USE_PROXY: 1712 /* 1713 * Not so fine, but we still have to read the 1714 * headers to get the new location. 1715 */ 1716 break; 1717 case HTTP_NEED_AUTH: 1718 if (server_challenges.valid) { 1719 /* 1720 * We already sent out authorization code, 1721 * so there's nothing more we can do. 1722 */ 1723 http_seterr(conn->err); 1724 goto ouch; 1725 } 1726 /* try again, but send the password this time */ 1727 if (verbose) 1728 fetch_info("server requires authorization"); 1729 break; 1730 case HTTP_NEED_PROXY_AUTH: 1731 if (proxy_challenges.valid) { 1732 /* 1733 * We already sent our proxy 1734 * authorization code, so there's 1735 * nothing more we can do. */ 1736 http_seterr(conn->err); 1737 goto ouch; 1738 } 1739 /* try again, but send the password this time */ 1740 if (verbose) 1741 fetch_info("proxy requires authorization"); 1742 break; 1743 case HTTP_BAD_RANGE: 1744 /* 1745 * This can happen if we ask for 0 bytes because 1746 * we already have the whole file. Consider this 1747 * a success for now, and check sizes later. 1748 */ 1749 break; 1750 case HTTP_PROTOCOL_ERROR: 1751 /* fall through */ 1752 case -1: 1753 fetch_syserr(); 1754 goto ouch; 1755 default: 1756 http_seterr(conn->err); 1757 if (!verbose) 1758 goto ouch; 1759 /* fall through so we can get the full error message */ 1760 } 1761 1762 /* get headers. http_next_header expects one line readahead */ 1763 if (fetch_getln(conn) == -1) { 1764 fetch_syserr(); 1765 goto ouch; 1766 } 1767 do { 1768 switch ((h = http_next_header(conn, &headerbuf, &p))) { 1769 case hdr_syserror: 1770 fetch_syserr(); 1771 goto ouch; 1772 case hdr_error: 1773 http_seterr(HTTP_PROTOCOL_ERROR); 1774 goto ouch; 1775 case hdr_content_length: 1776 http_parse_length(p, &clength); 1777 break; 1778 case hdr_content_range: 1779 http_parse_range(p, &offset, &length, &size); 1780 break; 1781 case hdr_last_modified: 1782 http_parse_mtime(p, &mtime); 1783 break; 1784 case hdr_location: 1785 if (!HTTP_REDIRECT(conn->err)) 1786 break; 1787 /* 1788 * if the A flag is set, we don't follow 1789 * temporary redirects. 1790 */ 1791 if (noredirect && 1792 conn->err != HTTP_MOVED_PERM && 1793 conn->err != HTTP_PERM_REDIRECT && 1794 conn->err != HTTP_USE_PROXY) { 1795 n = 1; 1796 break; 1797 } 1798 if (new) 1799 free(new); 1800 if (verbose) 1801 fetch_info("%d redirect to %s", conn->err, p); 1802 if (*p == '/') 1803 /* absolute path */ 1804 new = fetchMakeURL(url->scheme, url->host, url->port, p, 1805 url->user, url->pwd); 1806 else 1807 new = fetchParseURL(p); 1808 if (new == NULL) { 1809 /* XXX should set an error code */ 1810 DEBUG(fprintf(stderr, "failed to parse new URL\n")); 1811 goto ouch; 1812 } 1813 1814 /* Only copy credentials if the host matches */ 1815 if (!strcmp(new->host, url->host) && !*new->user && !*new->pwd) { 1816 strcpy(new->user, url->user); 1817 strcpy(new->pwd, url->pwd); 1818 } 1819 new->offset = url->offset; 1820 new->length = url->length; 1821 break; 1822 case hdr_transfer_encoding: 1823 /* XXX weak test*/ 1824 chunked = (strcasecmp(p, "chunked") == 0); 1825 break; 1826 case hdr_www_authenticate: 1827 if (conn->err != HTTP_NEED_AUTH) 1828 break; 1829 if (http_parse_authenticate(p, &server_challenges) == 0) 1830 ++n; 1831 break; 1832 case hdr_proxy_authenticate: 1833 if (conn->err != HTTP_NEED_PROXY_AUTH) 1834 break; 1835 if (http_parse_authenticate(p, &proxy_challenges) == 0) 1836 ++n; 1837 break; 1838 case hdr_end: 1839 /* fall through */ 1840 case hdr_unknown: 1841 /* ignore */ 1842 break; 1843 } 1844 } while (h > hdr_end); 1845 1846 /* we need to provide authentication */ 1847 if (conn->err == HTTP_NEED_AUTH || 1848 conn->err == HTTP_NEED_PROXY_AUTH) { 1849 e = conn->err; 1850 if ((conn->err == HTTP_NEED_AUTH && 1851 !server_challenges.valid) || 1852 (conn->err == HTTP_NEED_PROXY_AUTH && 1853 !proxy_challenges.valid)) { 1854 /* 401/7 but no www/proxy-authenticate ?? */ 1855 DEBUG(fprintf(stderr, "401/7 and no auth header\n")); 1856 goto ouch; 1857 } 1858 fetch_close(conn); 1859 conn = NULL; 1860 continue; 1861 } 1862 1863 /* requested range not satisfiable */ 1864 if (conn->err == HTTP_BAD_RANGE) { 1865 if (url->offset == size && url->length == 0) { 1866 /* asked for 0 bytes; fake it */ 1867 offset = url->offset; 1868 clength = -1; 1869 conn->err = HTTP_OK; 1870 break; 1871 } else { 1872 http_seterr(conn->err); 1873 goto ouch; 1874 } 1875 } 1876 1877 /* we have a hit or an error */ 1878 if (conn->err == HTTP_OK 1879 || conn->err == HTTP_NOT_MODIFIED 1880 || conn->err == HTTP_PARTIAL 1881 || HTTP_ERROR(conn->err)) 1882 break; 1883 1884 /* all other cases: we got a redirect */ 1885 e = conn->err; 1886 clean_http_auth_challenges(&server_challenges); 1887 fetch_close(conn); 1888 conn = NULL; 1889 if (!new) { 1890 DEBUG(fprintf(stderr, "redirect with no new location\n")); 1891 break; 1892 } 1893 if (url != URL) 1894 fetchFreeURL(url); 1895 url = new; 1896 } while (++i < n); 1897 1898 /* we failed, or ran out of retries */ 1899 if (conn == NULL) { 1900 http_seterr(e); 1901 goto ouch; 1902 } 1903 1904 DEBUG(fprintf(stderr, "offset %lld, length %lld," 1905 " size %lld, clength %lld\n", 1906 (long long)offset, (long long)length, 1907 (long long)size, (long long)clength)); 1908 1909 if (conn->err == HTTP_NOT_MODIFIED) { 1910 http_seterr(HTTP_NOT_MODIFIED); 1911 return (NULL); 1912 } 1913 1914 /* check for inconsistencies */ 1915 if (clength != -1 && length != -1 && clength != length) { 1916 http_seterr(HTTP_PROTOCOL_ERROR); 1917 goto ouch; 1918 } 1919 if (clength == -1) 1920 clength = length; 1921 if (clength != -1) 1922 length = offset + clength; 1923 if (length != -1 && size != -1 && length != size) { 1924 http_seterr(HTTP_PROTOCOL_ERROR); 1925 goto ouch; 1926 } 1927 if (size == -1) 1928 size = length; 1929 1930 /* fill in stats */ 1931 if (us) { 1932 us->size = size; 1933 us->atime = us->mtime = mtime; 1934 } 1935 1936 /* too far? */ 1937 if (URL->offset > 0 && offset > URL->offset) { 1938 http_seterr(HTTP_PROTOCOL_ERROR); 1939 goto ouch; 1940 } 1941 1942 /* report back real offset and size */ 1943 URL->offset = offset; 1944 URL->length = clength; 1945 1946 /* wrap it up in a FILE */ 1947 if ((f = http_funopen(conn, chunked)) == NULL) { 1948 fetch_syserr(); 1949 goto ouch; 1950 } 1951 1952 if (url != URL) 1953 fetchFreeURL(url); 1954 if (purl) 1955 fetchFreeURL(purl); 1956 1957 if (HTTP_ERROR(conn->err)) { 1958 http_print_html(stderr, f); 1959 fclose(f); 1960 f = NULL; 1961 } 1962 clean_http_headerbuf(&headerbuf); 1963 clean_http_auth_challenges(&server_challenges); 1964 clean_http_auth_challenges(&proxy_challenges); 1965 return (f); 1966 1967 ouch: 1968 if (url != URL) 1969 fetchFreeURL(url); 1970 if (purl) 1971 fetchFreeURL(purl); 1972 if (conn != NULL) 1973 fetch_close(conn); 1974 clean_http_headerbuf(&headerbuf); 1975 clean_http_auth_challenges(&server_challenges); 1976 clean_http_auth_challenges(&proxy_challenges); 1977 return (NULL); 1978 } 1979 1980 1981 /***************************************************************************** 1982 * Entry points 1983 */ 1984 1985 /* 1986 * Retrieve and stat a file by HTTP 1987 */ 1988 FILE * 1989 fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags) 1990 { 1991 return (http_request(URL, "GET", us, http_get_proxy(URL, flags), flags)); 1992 } 1993 1994 /* 1995 * Retrieve a file by HTTP 1996 */ 1997 FILE * 1998 fetchGetHTTP(struct url *URL, const char *flags) 1999 { 2000 return (fetchXGetHTTP(URL, NULL, flags)); 2001 } 2002 2003 /* 2004 * Store a file by HTTP 2005 */ 2006 FILE * 2007 fetchPutHTTP(struct url *URL __unused, const char *flags __unused) 2008 { 2009 warnx("fetchPutHTTP(): not implemented"); 2010 return (NULL); 2011 } 2012 2013 /* 2014 * Get an HTTP document's metadata 2015 */ 2016 int 2017 fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags) 2018 { 2019 FILE *f; 2020 2021 f = http_request(URL, "HEAD", us, http_get_proxy(URL, flags), flags); 2022 if (f == NULL) 2023 return (-1); 2024 fclose(f); 2025 return (0); 2026 } 2027 2028 /* 2029 * List a directory 2030 */ 2031 struct url_ent * 2032 fetchListHTTP(struct url *url __unused, const char *flags __unused) 2033 { 2034 warnx("fetchListHTTP(): not implemented"); 2035 return (NULL); 2036 } 2037