1 /*- 2 * Copyright (c) 2000 Dag-Erling Co�dan Sm�rgrav 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 /* 33 * The following copyright applies to the base64 code: 34 * 35 *- 36 * Copyright 1997 Massachusetts Institute of Technology 37 * 38 * Permission to use, copy, modify, and distribute this software and 39 * its documentation for any purpose and without fee is hereby 40 * granted, provided that both the above copyright notice and this 41 * permission notice appear in all copies, that both the above 42 * copyright notice and this permission notice appear in all 43 * supporting documentation, and that the name of M.I.T. not be used 44 * in advertising or publicity pertaining to distribution of the 45 * software without specific, written prior permission. M.I.T. makes 46 * no representations about the suitability of this software for any 47 * purpose. It is provided "as is" without express or implied 48 * warranty. 49 * 50 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 51 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 52 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 53 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 54 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 55 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 56 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 57 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 58 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 59 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 60 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 #include <sys/param.h> 65 #include <sys/socket.h> 66 67 #include <ctype.h> 68 #include <err.h> 69 #include <errno.h> 70 #include <locale.h> 71 #include <netdb.h> 72 #include <stdarg.h> 73 #include <stdio.h> 74 #include <stdlib.h> 75 #include <string.h> 76 #include <time.h> 77 #include <unistd.h> 78 79 #include "fetch.h" 80 #include "common.h" 81 #include "httperr.h" 82 83 /* Maximum number of redirects to follow */ 84 #define MAX_REDIRECT 5 85 86 /* Symbolic names for reply codes we care about */ 87 #define HTTP_OK 200 88 #define HTTP_PARTIAL 206 89 #define HTTP_MOVED_PERM 301 90 #define HTTP_MOVED_TEMP 302 91 #define HTTP_SEE_OTHER 303 92 #define HTTP_NEED_AUTH 401 93 #define HTTP_NEED_PROXY_AUTH 407 94 #define HTTP_PROTOCOL_ERROR 999 95 96 #define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \ 97 || (xyz) == HTTP_MOVED_TEMP \ 98 || (xyz) == HTTP_SEE_OTHER) 99 100 #define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599) 101 102 103 /***************************************************************************** 104 * I/O functions for decoding chunked streams 105 */ 106 107 struct httpio 108 { 109 conn_t *conn; /* connection */ 110 int chunked; /* chunked mode */ 111 char *buf; /* chunk buffer */ 112 size_t bufsize; /* size of chunk buffer */ 113 ssize_t buflen; /* amount of data currently in buffer */ 114 int bufpos; /* current read offset in buffer */ 115 int eof; /* end-of-file flag */ 116 int error; /* error flag */ 117 size_t chunksize; /* remaining size of current chunk */ 118 #ifndef NDEBUG 119 size_t total; 120 #endif 121 }; 122 123 /* 124 * Get next chunk header 125 */ 126 static int 127 _http_new_chunk(struct httpio *io) 128 { 129 char *p; 130 131 if (_fetch_getln(io->conn) == -1) 132 return (-1); 133 134 if (io->conn->buflen < 2 || !ishexnumber(*io->conn->buf)) 135 return (-1); 136 137 for (p = io->conn->buf; *p && !isspace(*p); ++p) { 138 if (*p == ';') 139 break; 140 if (!ishexnumber(*p)) 141 return (-1); 142 if (isdigit(*p)) { 143 io->chunksize = io->chunksize * 16 + 144 *p - '0'; 145 } else { 146 io->chunksize = io->chunksize * 16 + 147 10 + tolower(*p) - 'a'; 148 } 149 } 150 151 #ifndef NDEBUG 152 if (fetchDebug) { 153 io->total += io->chunksize; 154 if (io->chunksize == 0) 155 fprintf(stderr, "_http_fillbuf(): " 156 "end of last chunk\n"); 157 else 158 fprintf(stderr, "_http_fillbuf(): " 159 "new chunk: %lu (%lu)\n", 160 (unsigned long)io->chunksize, (unsigned long)io->total); 161 } 162 #endif 163 164 return (io->chunksize); 165 } 166 167 /* 168 * Grow the input buffer to at least len bytes 169 */ 170 static inline int 171 _http_growbuf(struct httpio *io, size_t len) 172 { 173 char *tmp; 174 175 if (io->bufsize >= len) 176 return (0); 177 178 if ((tmp = realloc(io->buf, len)) == NULL) 179 return (-1); 180 io->buf = tmp; 181 io->bufsize = len; 182 return (0); 183 } 184 185 /* 186 * Fill the input buffer, do chunk decoding on the fly 187 */ 188 static int 189 _http_fillbuf(struct httpio *io, size_t len) 190 { 191 if (io->error) 192 return (-1); 193 if (io->eof) 194 return (0); 195 196 if (io->chunked == 0) { 197 if (_http_growbuf(io, len) == -1) 198 return (-1); 199 if ((io->buflen = _fetch_read(io->conn, io->buf, len)) == -1) 200 return (-1); 201 io->bufpos = 0; 202 return (io->buflen); 203 } 204 205 if (io->chunksize == 0) { 206 switch (_http_new_chunk(io)) { 207 case -1: 208 io->error = 1; 209 return (-1); 210 case 0: 211 io->eof = 1; 212 return (0); 213 } 214 } 215 216 if (len > io->chunksize) 217 len = io->chunksize; 218 if (_http_growbuf(io, len) == -1) 219 return (-1); 220 if ((io->buflen = _fetch_read(io->conn, io->buf, len)) == -1) 221 return (-1); 222 io->chunksize -= io->buflen; 223 224 if (io->chunksize == 0) { 225 char endl[2]; 226 227 if (_fetch_read(io->conn, endl, 2) != 2 || 228 endl[0] != '\r' || endl[1] != '\n') 229 return (-1); 230 } 231 232 io->bufpos = 0; 233 234 return (io->buflen); 235 } 236 237 /* 238 * Read function 239 */ 240 static int 241 _http_readfn(void *v, char *buf, int len) 242 { 243 struct httpio *io = (struct httpio *)v; 244 int l, pos; 245 246 if (io->error) 247 return (-1); 248 if (io->eof) 249 return (0); 250 251 for (pos = 0; len > 0; pos += l, len -= l) { 252 /* empty buffer */ 253 if (!io->buf || io->bufpos == io->buflen) 254 if (_http_fillbuf(io, len) < 1) 255 break; 256 l = io->buflen - io->bufpos; 257 if (len < l) 258 l = len; 259 bcopy(io->buf + io->bufpos, buf + pos, l); 260 io->bufpos += l; 261 } 262 263 if (!pos && io->error) 264 return (-1); 265 return (pos); 266 } 267 268 /* 269 * Write function 270 */ 271 static int 272 _http_writefn(void *v, const char *buf, int len) 273 { 274 struct httpio *io = (struct httpio *)v; 275 276 return (_fetch_write(io->conn, buf, len)); 277 } 278 279 /* 280 * Close function 281 */ 282 static int 283 _http_closefn(void *v) 284 { 285 struct httpio *io = (struct httpio *)v; 286 int r; 287 288 r = _fetch_close(io->conn); 289 if (io->buf) 290 free(io->buf); 291 free(io); 292 return (r); 293 } 294 295 /* 296 * Wrap a file descriptor up 297 */ 298 static FILE * 299 _http_funopen(conn_t *conn, int chunked) 300 { 301 struct httpio *io; 302 FILE *f; 303 304 if ((io = calloc(1, sizeof *io)) == NULL) { 305 _fetch_syserr(); 306 return (NULL); 307 } 308 io->conn = conn; 309 io->chunked = chunked; 310 f = funopen(io, _http_readfn, _http_writefn, NULL, _http_closefn); 311 if (f == NULL) { 312 _fetch_syserr(); 313 free(io); 314 return (NULL); 315 } 316 return (f); 317 } 318 319 320 /***************************************************************************** 321 * Helper functions for talking to the server and parsing its replies 322 */ 323 324 /* Header types */ 325 typedef enum { 326 hdr_syserror = -2, 327 hdr_error = -1, 328 hdr_end = 0, 329 hdr_unknown = 1, 330 hdr_content_length, 331 hdr_content_range, 332 hdr_last_modified, 333 hdr_location, 334 hdr_transfer_encoding, 335 hdr_www_authenticate 336 } hdr_t; 337 338 /* Names of interesting headers */ 339 static struct { 340 hdr_t num; 341 const char *name; 342 } hdr_names[] = { 343 { hdr_content_length, "Content-Length" }, 344 { hdr_content_range, "Content-Range" }, 345 { hdr_last_modified, "Last-Modified" }, 346 { hdr_location, "Location" }, 347 { hdr_transfer_encoding, "Transfer-Encoding" }, 348 { hdr_www_authenticate, "WWW-Authenticate" }, 349 { hdr_unknown, NULL }, 350 }; 351 352 /* 353 * Send a formatted line; optionally echo to terminal 354 */ 355 static int 356 _http_cmd(conn_t *conn, const char *fmt, ...) 357 { 358 va_list ap; 359 size_t len; 360 char *msg; 361 int r; 362 363 va_start(ap, fmt); 364 len = vasprintf(&msg, fmt, ap); 365 va_end(ap); 366 367 if (msg == NULL) { 368 errno = ENOMEM; 369 _fetch_syserr(); 370 return (-1); 371 } 372 373 r = _fetch_putln(conn, msg, len); 374 free(msg); 375 376 if (r == -1) { 377 _fetch_syserr(); 378 return (-1); 379 } 380 381 return (0); 382 } 383 384 /* 385 * Get and parse status line 386 */ 387 static int 388 _http_get_reply(conn_t *conn) 389 { 390 char *p; 391 392 if (_fetch_getln(conn) == -1) 393 return (-1); 394 /* 395 * A valid status line looks like "HTTP/m.n xyz reason" where m 396 * and n are the major and minor protocol version numbers and xyz 397 * is the reply code. 398 * Unfortunately, there are servers out there (NCSA 1.5.1, to name 399 * just one) that do not send a version number, so we can't rely 400 * on finding one, but if we do, insist on it being 1.0 or 1.1. 401 * We don't care about the reason phrase. 402 */ 403 if (strncmp(conn->buf, "HTTP", 4) != 0) 404 return (HTTP_PROTOCOL_ERROR); 405 p = conn->buf + 4; 406 if (*p == '/') { 407 if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1')) 408 return (HTTP_PROTOCOL_ERROR); 409 p += 4; 410 } 411 if (*p != ' ' || !isdigit(p[1]) || !isdigit(p[2]) || !isdigit(p[3])) 412 return (HTTP_PROTOCOL_ERROR); 413 414 conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0'); 415 return (conn->err); 416 } 417 418 /* 419 * Check a header; if the type matches the given string, return a pointer 420 * to the beginning of the value. 421 */ 422 static const char * 423 _http_match(const char *str, const char *hdr) 424 { 425 while (*str && *hdr && tolower(*str++) == tolower(*hdr++)) 426 /* nothing */; 427 if (*str || *hdr != ':') 428 return (NULL); 429 while (*hdr && isspace(*++hdr)) 430 /* nothing */; 431 return (hdr); 432 } 433 434 /* 435 * Get the next header and return the appropriate symbolic code. 436 */ 437 static hdr_t 438 _http_next_header(conn_t *conn, const char **p) 439 { 440 int i; 441 442 if (_fetch_getln(conn) == -1) 443 return (hdr_syserror); 444 while (conn->buflen && isspace(conn->buf[conn->buflen - 1])) 445 conn->buflen--; 446 conn->buf[conn->buflen] = '\0'; 447 if (conn->buflen == 0) 448 return (hdr_end); 449 /* 450 * We could check for malformed headers but we don't really care. 451 * A valid header starts with a token immediately followed by a 452 * colon; a token is any sequence of non-control, non-whitespace 453 * characters except "()<>@,;:\\\"{}". 454 */ 455 for (i = 0; hdr_names[i].num != hdr_unknown; i++) 456 if ((*p = _http_match(hdr_names[i].name, conn->buf)) != NULL) 457 return (hdr_names[i].num); 458 return (hdr_unknown); 459 } 460 461 /* 462 * Parse a last-modified header 463 */ 464 static int 465 _http_parse_mtime(const char *p, time_t *mtime) 466 { 467 char locale[64], *r; 468 struct tm tm; 469 470 strncpy(locale, setlocale(LC_TIME, NULL), sizeof locale); 471 setlocale(LC_TIME, "C"); 472 r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm); 473 /* XXX should add support for date-2 and date-3 */ 474 setlocale(LC_TIME, locale); 475 if (r == NULL) 476 return (-1); 477 DEBUG(fprintf(stderr, "last modified: [%04d-%02d-%02d " 478 "%02d:%02d:%02d]\n", 479 tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, 480 tm.tm_hour, tm.tm_min, tm.tm_sec)); 481 *mtime = timegm(&tm); 482 return (0); 483 } 484 485 /* 486 * Parse a content-length header 487 */ 488 static int 489 _http_parse_length(const char *p, off_t *length) 490 { 491 off_t len; 492 493 for (len = 0; *p && isdigit(*p); ++p) 494 len = len * 10 + (*p - '0'); 495 if (*p) 496 return (-1); 497 DEBUG(fprintf(stderr, "content length: [%lld]\n", 498 (long long)len)); 499 *length = len; 500 return (0); 501 } 502 503 /* 504 * Parse a content-range header 505 */ 506 static int 507 _http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size) 508 { 509 off_t first, last, len; 510 511 if (strncasecmp(p, "bytes ", 6) != 0) 512 return (-1); 513 for (first = 0, p += 6; *p && isdigit(*p); ++p) 514 first = first * 10 + *p - '0'; 515 if (*p != '-') 516 return (-1); 517 for (last = 0, ++p; *p && isdigit(*p); ++p) 518 last = last * 10 + *p - '0'; 519 if (first > last || *p != '/') 520 return (-1); 521 for (len = 0, ++p; *p && isdigit(*p); ++p) 522 len = len * 10 + *p - '0'; 523 if (*p || len < last - first + 1) 524 return (-1); 525 DEBUG(fprintf(stderr, "content range: [%lld-%lld/%lld]\n", 526 (long long)first, (long long)last, (long long)len)); 527 *offset = first; 528 *length = last - first + 1; 529 *size = len; 530 return (0); 531 } 532 533 534 /***************************************************************************** 535 * Helper functions for authorization 536 */ 537 538 /* 539 * Base64 encoding 540 */ 541 static char * 542 _http_base64(const char *src) 543 { 544 static const char base64[] = 545 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 546 "abcdefghijklmnopqrstuvwxyz" 547 "0123456789+/"; 548 char *str, *dst; 549 size_t l; 550 int t, r; 551 552 l = strlen(src); 553 if ((str = malloc(((l + 2) / 3) * 4)) == NULL) 554 return (NULL); 555 dst = str; 556 r = 0; 557 558 while (l >= 3) { 559 t = (src[0] << 16) | (src[1] << 8) | src[2]; 560 dst[0] = base64[(t >> 18) & 0x3f]; 561 dst[1] = base64[(t >> 12) & 0x3f]; 562 dst[2] = base64[(t >> 6) & 0x3f]; 563 dst[3] = base64[(t >> 0) & 0x3f]; 564 src += 3; l -= 3; 565 dst += 4; r += 4; 566 } 567 568 switch (l) { 569 case 2: 570 t = (src[0] << 16) | (src[1] << 8); 571 dst[0] = base64[(t >> 18) & 0x3f]; 572 dst[1] = base64[(t >> 12) & 0x3f]; 573 dst[2] = base64[(t >> 6) & 0x3f]; 574 dst[3] = '='; 575 dst += 4; 576 r += 4; 577 break; 578 case 1: 579 t = src[0] << 16; 580 dst[0] = base64[(t >> 18) & 0x3f]; 581 dst[1] = base64[(t >> 12) & 0x3f]; 582 dst[2] = dst[3] = '='; 583 dst += 4; 584 r += 4; 585 break; 586 case 0: 587 break; 588 } 589 590 *dst = 0; 591 return (str); 592 } 593 594 /* 595 * Encode username and password 596 */ 597 static int 598 _http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd) 599 { 600 char *upw, *auth; 601 int r; 602 603 DEBUG(fprintf(stderr, "usr: [%s]\n", usr)); 604 DEBUG(fprintf(stderr, "pwd: [%s]\n", pwd)); 605 if (asprintf(&upw, "%s:%s", usr, pwd) == -1) 606 return (-1); 607 auth = _http_base64(upw); 608 free(upw); 609 if (auth == NULL) 610 return (-1); 611 r = _http_cmd(conn, "%s: Basic %s", hdr, auth); 612 free(auth); 613 return (r); 614 } 615 616 /* 617 * Send an authorization header 618 */ 619 static int 620 _http_authorize(conn_t *conn, const char *hdr, const char *p) 621 { 622 /* basic authorization */ 623 if (strncasecmp(p, "basic:", 6) == 0) { 624 char *user, *pwd, *str; 625 int r; 626 627 /* skip realm */ 628 for (p += 6; *p && *p != ':'; ++p) 629 /* nothing */ ; 630 if (!*p || strchr(++p, ':') == NULL) 631 return (-1); 632 if ((str = strdup(p)) == NULL) 633 return (-1); /* XXX */ 634 user = str; 635 pwd = strchr(str, ':'); 636 *pwd++ = '\0'; 637 r = _http_basic_auth(conn, hdr, user, pwd); 638 free(str); 639 return (r); 640 } 641 return (-1); 642 } 643 644 645 /***************************************************************************** 646 * Helper functions for connecting to a server or proxy 647 */ 648 649 /* 650 * Connect to the correct HTTP server or proxy. 651 */ 652 static conn_t * 653 _http_connect(struct url *URL, struct url *purl, const char *flags) 654 { 655 conn_t *conn; 656 int verbose; 657 int af; 658 659 #ifdef INET6 660 af = AF_UNSPEC; 661 #else 662 af = AF_INET; 663 #endif 664 665 verbose = CHECK_FLAG('v'); 666 if (CHECK_FLAG('4')) 667 af = AF_INET; 668 #ifdef INET6 669 else if (CHECK_FLAG('6')) 670 af = AF_INET6; 671 #endif 672 673 if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) { 674 URL = purl; 675 } else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) { 676 /* can't talk http to an ftp server */ 677 /* XXX should set an error code */ 678 return (NULL); 679 } 680 681 if ((conn = _fetch_connect(URL->host, URL->port, af, verbose)) == NULL) 682 /* _fetch_connect() has already set an error code */ 683 return (NULL); 684 if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 && 685 _fetch_ssl(conn, verbose) == -1) { 686 _fetch_close(conn); 687 /* grrr */ 688 errno = EAUTH; 689 _fetch_syserr(); 690 return (NULL); 691 } 692 return (conn); 693 } 694 695 static struct url * 696 _http_get_proxy(void) 697 { 698 struct url *purl; 699 char *p; 700 701 if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) && 702 (purl = fetchParseURL(p))) { 703 if (!*purl->scheme) 704 strcpy(purl->scheme, SCHEME_HTTP); 705 if (!purl->port) 706 purl->port = _fetch_default_proxy_port(purl->scheme); 707 if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0) 708 return (purl); 709 fetchFreeURL(purl); 710 } 711 return (NULL); 712 } 713 714 static void 715 _http_print_html(FILE *out, FILE *in) 716 { 717 size_t len; 718 char *line, *p, *q; 719 int comment, tag; 720 721 comment = tag = 0; 722 while ((line = fgetln(in, &len)) != NULL) { 723 while (len && isspace(line[len - 1])) 724 --len; 725 for (p = q = line; q < line + len; ++q) { 726 if (comment && *q == '-') { 727 if (q + 2 < line + len && 728 strcmp(q, "-->") == 0) { 729 tag = comment = 0; 730 q += 2; 731 } 732 } else if (tag && !comment && *q == '>') { 733 p = q + 1; 734 tag = 0; 735 } else if (!tag && *q == '<') { 736 if (q > p) 737 fwrite(p, q - p, 1, out); 738 tag = 1; 739 if (q + 3 < line + len && 740 strcmp(q, "<!--") == 0) { 741 comment = 1; 742 q += 3; 743 } 744 } 745 } 746 if (!tag && q > p) 747 fwrite(p, q - p, 1, out); 748 fputc('\n', out); 749 } 750 } 751 752 753 /***************************************************************************** 754 * Core 755 */ 756 757 /* 758 * Send a request and process the reply 759 * 760 * XXX This function is way too long, the do..while loop should be split 761 * XXX off into a separate function. 762 */ 763 FILE * 764 _http_request(struct url *URL, const char *op, struct url_stat *us, 765 struct url *purl, const char *flags) 766 { 767 conn_t *conn; 768 struct url *url, *new; 769 int chunked, direct, need_auth, noredirect, verbose; 770 int e, i, n; 771 off_t offset, clength, length, size; 772 time_t mtime; 773 const char *p; 774 FILE *f; 775 hdr_t h; 776 char *host; 777 #ifdef INET6 778 char hbuf[MAXHOSTNAMELEN + 1]; 779 #endif 780 781 direct = CHECK_FLAG('d'); 782 noredirect = CHECK_FLAG('A'); 783 verbose = CHECK_FLAG('v'); 784 785 if (direct && purl) { 786 fetchFreeURL(purl); 787 purl = NULL; 788 } 789 790 /* try the provided URL first */ 791 url = URL; 792 793 /* if the A flag is set, we only get one try */ 794 n = noredirect ? 1 : MAX_REDIRECT; 795 i = 0; 796 797 e = HTTP_PROTOCOL_ERROR; 798 need_auth = 0; 799 do { 800 new = NULL; 801 chunked = 0; 802 offset = 0; 803 clength = -1; 804 length = -1; 805 size = -1; 806 mtime = 0; 807 808 /* check port */ 809 if (!url->port) 810 url->port = _fetch_default_port(url->scheme); 811 812 /* were we redirected to an FTP URL? */ 813 if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) { 814 if (strcmp(op, "GET") == 0) 815 return (_ftp_request(url, "RETR", us, purl, flags)); 816 else if (strcmp(op, "HEAD") == 0) 817 return (_ftp_request(url, "STAT", us, purl, flags)); 818 } 819 820 /* connect to server or proxy */ 821 if ((conn = _http_connect(url, purl, flags)) == NULL) 822 goto ouch; 823 824 host = url->host; 825 #ifdef INET6 826 if (strchr(url->host, ':')) { 827 snprintf(hbuf, sizeof(hbuf), "[%s]", url->host); 828 host = hbuf; 829 } 830 #endif 831 832 /* send request */ 833 if (verbose) 834 _fetch_info("requesting %s://%s:%d%s", 835 url->scheme, host, url->port, url->doc); 836 if (purl) { 837 _http_cmd(conn, "%s %s://%s:%d%s HTTP/1.1", 838 op, url->scheme, host, url->port, url->doc); 839 } else { 840 _http_cmd(conn, "%s %s HTTP/1.1", 841 op, url->doc); 842 } 843 844 /* virtual host */ 845 if (url->port == _fetch_default_port(url->scheme)) 846 _http_cmd(conn, "Host: %s", host); 847 else 848 _http_cmd(conn, "Host: %s:%d", host, url->port); 849 850 /* proxy authorization */ 851 if (purl) { 852 if (*purl->user || *purl->pwd) 853 _http_basic_auth(conn, "Proxy-Authorization", 854 purl->user, purl->pwd); 855 else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0') 856 _http_authorize(conn, "Proxy-Authorization", p); 857 } 858 859 /* server authorization */ 860 if (need_auth || *url->user || *url->pwd) { 861 if (*url->user || *url->pwd) 862 _http_basic_auth(conn, "Authorization", url->user, url->pwd); 863 else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0') 864 _http_authorize(conn, "Authorization", p); 865 else if (fetchAuthMethod && fetchAuthMethod(url) == 0) { 866 _http_basic_auth(conn, "Authorization", url->user, url->pwd); 867 } else { 868 _http_seterr(HTTP_NEED_AUTH); 869 goto ouch; 870 } 871 } 872 873 /* other headers */ 874 if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0') 875 _http_cmd(conn, "User-Agent: %s", p); 876 else 877 _http_cmd(conn, "User-Agent: %s " _LIBFETCH_VER, getprogname()); 878 if (url->offset) 879 _http_cmd(conn, "Range: bytes=%lld-", (long long)url->offset); 880 _http_cmd(conn, "Connection: close"); 881 _http_cmd(conn, ""); 882 883 /* get reply */ 884 switch (_http_get_reply(conn)) { 885 case HTTP_OK: 886 case HTTP_PARTIAL: 887 /* fine */ 888 break; 889 case HTTP_MOVED_PERM: 890 case HTTP_MOVED_TEMP: 891 case HTTP_SEE_OTHER: 892 /* 893 * Not so fine, but we still have to read the headers to 894 * get the new location. 895 */ 896 break; 897 case HTTP_NEED_AUTH: 898 if (need_auth) { 899 /* 900 * We already sent out authorization code, so there's 901 * nothing more we can do. 902 */ 903 _http_seterr(conn->err); 904 goto ouch; 905 } 906 /* try again, but send the password this time */ 907 if (verbose) 908 _fetch_info("server requires authorization"); 909 break; 910 case HTTP_NEED_PROXY_AUTH: 911 /* 912 * If we're talking to a proxy, we already sent our proxy 913 * authorization code, so there's nothing more we can do. 914 */ 915 _http_seterr(conn->err); 916 goto ouch; 917 case HTTP_PROTOCOL_ERROR: 918 /* fall through */ 919 case -1: 920 _fetch_syserr(); 921 goto ouch; 922 default: 923 _http_seterr(conn->err); 924 if (!verbose) 925 goto ouch; 926 /* fall through so we can get the full error message */ 927 } 928 929 /* get headers */ 930 do { 931 switch ((h = _http_next_header(conn, &p))) { 932 case hdr_syserror: 933 _fetch_syserr(); 934 goto ouch; 935 case hdr_error: 936 _http_seterr(HTTP_PROTOCOL_ERROR); 937 goto ouch; 938 case hdr_content_length: 939 _http_parse_length(p, &clength); 940 break; 941 case hdr_content_range: 942 _http_parse_range(p, &offset, &length, &size); 943 break; 944 case hdr_last_modified: 945 _http_parse_mtime(p, &mtime); 946 break; 947 case hdr_location: 948 if (!HTTP_REDIRECT(conn->err)) 949 break; 950 if (new) 951 free(new); 952 if (verbose) 953 _fetch_info("%d redirect to %s", conn->err, p); 954 if (*p == '/') 955 /* absolute path */ 956 new = fetchMakeURL(url->scheme, url->host, url->port, p, 957 url->user, url->pwd); 958 else 959 new = fetchParseURL(p); 960 if (new == NULL) { 961 /* XXX should set an error code */ 962 DEBUG(fprintf(stderr, "failed to parse new URL\n")); 963 goto ouch; 964 } 965 if (!*new->user && !*new->pwd) { 966 strcpy(new->user, url->user); 967 strcpy(new->pwd, url->pwd); 968 } 969 new->offset = url->offset; 970 new->length = url->length; 971 break; 972 case hdr_transfer_encoding: 973 /* XXX weak test*/ 974 chunked = (strcasecmp(p, "chunked") == 0); 975 break; 976 case hdr_www_authenticate: 977 if (conn->err != HTTP_NEED_AUTH) 978 break; 979 /* if we were smarter, we'd check the method and realm */ 980 break; 981 case hdr_end: 982 /* fall through */ 983 case hdr_unknown: 984 /* ignore */ 985 break; 986 } 987 } while (h > hdr_end); 988 989 /* we need to provide authentication */ 990 if (conn->err == HTTP_NEED_AUTH) { 991 e = conn->err; 992 need_auth = 1; 993 _fetch_close(conn); 994 conn = NULL; 995 continue; 996 } 997 998 /* we have a hit or an error */ 999 if (conn->err == HTTP_OK || conn->err == HTTP_PARTIAL || HTTP_ERROR(conn->err)) 1000 break; 1001 1002 /* all other cases: we got a redirect */ 1003 e = conn->err; 1004 need_auth = 0; 1005 _fetch_close(conn); 1006 conn = NULL; 1007 if (!new) { 1008 DEBUG(fprintf(stderr, "redirect with no new location\n")); 1009 break; 1010 } 1011 if (url != URL) 1012 fetchFreeURL(url); 1013 url = new; 1014 } while (++i < n); 1015 1016 /* we failed, or ran out of retries */ 1017 if (conn == NULL) { 1018 _http_seterr(e); 1019 goto ouch; 1020 } 1021 1022 DEBUG(fprintf(stderr, "offset %lld, length %lld," 1023 " size %lld, clength %lld\n", 1024 (long long)offset, (long long)length, 1025 (long long)size, (long long)clength)); 1026 1027 /* check for inconsistencies */ 1028 if (clength != -1 && length != -1 && clength != length) { 1029 _http_seterr(HTTP_PROTOCOL_ERROR); 1030 goto ouch; 1031 } 1032 if (clength == -1) 1033 clength = length; 1034 if (clength != -1) 1035 length = offset + clength; 1036 if (length != -1 && size != -1 && length != size) { 1037 _http_seterr(HTTP_PROTOCOL_ERROR); 1038 goto ouch; 1039 } 1040 if (size == -1) 1041 size = length; 1042 1043 /* fill in stats */ 1044 if (us) { 1045 us->size = size; 1046 us->atime = us->mtime = mtime; 1047 } 1048 1049 /* too far? */ 1050 if (offset > URL->offset) { 1051 _http_seterr(HTTP_PROTOCOL_ERROR); 1052 goto ouch; 1053 } 1054 1055 /* report back real offset and size */ 1056 URL->offset = offset; 1057 URL->length = clength; 1058 1059 /* wrap it up in a FILE */ 1060 if ((f = _http_funopen(conn, chunked)) == NULL) { 1061 _fetch_syserr(); 1062 goto ouch; 1063 } 1064 1065 if (url != URL) 1066 fetchFreeURL(url); 1067 if (purl) 1068 fetchFreeURL(purl); 1069 1070 if (HTTP_ERROR(conn->err)) { 1071 _http_print_html(stderr, f); 1072 fclose(f); 1073 f = NULL; 1074 } 1075 1076 return (f); 1077 1078 ouch: 1079 if (url != URL) 1080 fetchFreeURL(url); 1081 if (purl) 1082 fetchFreeURL(purl); 1083 if (conn != NULL) 1084 _fetch_close(conn); 1085 return (NULL); 1086 } 1087 1088 1089 /***************************************************************************** 1090 * Entry points 1091 */ 1092 1093 /* 1094 * Retrieve and stat a file by HTTP 1095 */ 1096 FILE * 1097 fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags) 1098 { 1099 return (_http_request(URL, "GET", us, _http_get_proxy(), flags)); 1100 } 1101 1102 /* 1103 * Retrieve a file by HTTP 1104 */ 1105 FILE * 1106 fetchGetHTTP(struct url *URL, const char *flags) 1107 { 1108 return (fetchXGetHTTP(URL, NULL, flags)); 1109 } 1110 1111 /* 1112 * Store a file by HTTP 1113 */ 1114 FILE * 1115 fetchPutHTTP(struct url *URL __unused, const char *flags __unused) 1116 { 1117 warnx("fetchPutHTTP(): not implemented"); 1118 return (NULL); 1119 } 1120 1121 /* 1122 * Get an HTTP document's metadata 1123 */ 1124 int 1125 fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags) 1126 { 1127 FILE *f; 1128 1129 if ((f = _http_request(URL, "HEAD", us, _http_get_proxy(), flags)) == NULL) 1130 return (-1); 1131 fclose(f); 1132 return (0); 1133 } 1134 1135 /* 1136 * List a directory 1137 */ 1138 struct url_ent * 1139 fetchListHTTP(struct url *url __unused, const char *flags __unused) 1140 { 1141 warnx("fetchListHTTP(): not implemented"); 1142 return (NULL); 1143 } 1144