xref: /freebsd/lib/libfetch/http.c (revision 41466b50c1d5bfd1cf6adaae547a579a75d7c04e)
1 /*-
2  * Copyright (c) 2000 Dag-Erling Co�dan Sm�rgrav
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 /*
33  * The following copyright applies to the base64 code:
34  *
35  *-
36  * Copyright 1997 Massachusetts Institute of Technology
37  *
38  * Permission to use, copy, modify, and distribute this software and
39  * its documentation for any purpose and without fee is hereby
40  * granted, provided that both the above copyright notice and this
41  * permission notice appear in all copies, that both the above
42  * copyright notice and this permission notice appear in all
43  * supporting documentation, and that the name of M.I.T. not be used
44  * in advertising or publicity pertaining to distribution of the
45  * software without specific, written prior permission.  M.I.T. makes
46  * no representations about the suitability of this software for any
47  * purpose.  It is provided "as is" without express or implied
48  * warranty.
49  *
50  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
51  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
52  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
53  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
54  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
56  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
57  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
58  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
59  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
60  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  */
63 
64 #include <sys/param.h>
65 #include <sys/socket.h>
66 
67 #include <ctype.h>
68 #include <err.h>
69 #include <errno.h>
70 #include <locale.h>
71 #include <netdb.h>
72 #include <stdarg.h>
73 #include <stdio.h>
74 #include <stdlib.h>
75 #include <string.h>
76 #include <time.h>
77 #include <unistd.h>
78 
79 #include "fetch.h"
80 #include "common.h"
81 #include "httperr.h"
82 
83 extern char *__progname; /* XXX not portable */
84 
85 /* Maximum number of redirects to follow */
86 #define MAX_REDIRECT 5
87 
88 /* Symbolic names for reply codes we care about */
89 #define HTTP_OK			200
90 #define HTTP_PARTIAL		206
91 #define HTTP_MOVED_PERM		301
92 #define HTTP_MOVED_TEMP		302
93 #define HTTP_SEE_OTHER		303
94 #define HTTP_NEED_AUTH		401
95 #define HTTP_NEED_PROXY_AUTH	403
96 #define HTTP_PROTOCOL_ERROR	999
97 
98 #define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \
99                             || (xyz) == HTTP_MOVED_TEMP \
100                             || (xyz) == HTTP_SEE_OTHER)
101 
102 
103 
104 /*****************************************************************************
105  * I/O functions for decoding chunked streams
106  */
107 
108 struct cookie
109 {
110     int		 fd;
111     char	*buf;
112     size_t	 b_size;
113     ssize_t	 b_len;
114     int		 b_pos;
115     int		 eof;
116     int		 error;
117     size_t	 chunksize;
118 #ifndef NDEBUG
119     size_t	 total;
120 #endif
121 };
122 
123 /*
124  * Get next chunk header
125  */
126 static int
127 _http_new_chunk(struct cookie *c)
128 {
129     char *p;
130 
131     if (_fetch_getln(c->fd, &c->buf, &c->b_size, &c->b_len) == -1)
132 	return -1;
133 
134     if (c->b_len < 2 || !ishexnumber(*c->buf))
135 	return -1;
136 
137     for (p = c->buf; !isspace(*p) && *p != ';' && p < c->buf + c->b_len; ++p)
138 	if (!ishexnumber(*p))
139 	    return -1;
140 	else if (isdigit(*p))
141 	    c->chunksize = c->chunksize * 16 + *p - '0';
142 	else
143 	    c->chunksize = c->chunksize * 16 + 10 + tolower(*p) - 'a';
144 
145 #ifndef NDEBUG
146     c->total += c->chunksize;
147     if (c->chunksize == 0)
148 	fprintf(stderr, "\033[1m_http_fillbuf(): "
149 		"end of last chunk\033[m\n");
150     else
151 	fprintf(stderr, "\033[1m_http_fillbuf(): "
152 		"new chunk: %lu (%lu)\033[m\n",
153 		(unsigned long)c->chunksize, (unsigned long)c->total);
154 #endif
155 
156     return c->chunksize;
157 }
158 
159 /*
160  * Fill the input buffer, do chunk decoding on the fly
161  */
162 static int
163 _http_fillbuf(struct cookie *c)
164 {
165     if (c->error)
166 	return -1;
167     if (c->eof)
168 	return 0;
169 
170     if (c->chunksize == 0) {
171 	switch (_http_new_chunk(c)) {
172 	case -1:
173 	    c->error = 1;
174 	    return -1;
175 	case 0:
176 	    c->eof = 1;
177 	    return 0;
178 	}
179     }
180 
181     if (c->b_size < c->chunksize) {
182 	char *tmp;
183 
184 	if ((tmp = realloc(c->buf, c->chunksize)) == NULL)
185 	    return -1;
186 	c->buf = tmp;
187 	c->b_size = c->chunksize;
188     }
189 
190     if ((c->b_len = read(c->fd, c->buf, c->chunksize)) == -1)
191 	return -1;
192     c->chunksize -= c->b_len;
193 
194     if (c->chunksize == 0) {
195 	char endl[2];
196 	read(c->fd, endl, 2);
197     }
198 
199     c->b_pos = 0;
200 
201     return c->b_len;
202 }
203 
204 /*
205  * Read function
206  */
207 static int
208 _http_readfn(void *v, char *buf, int len)
209 {
210     struct cookie *c = (struct cookie *)v;
211     int l, pos;
212 
213     if (c->error)
214 	return -1;
215     if (c->eof)
216 	return 0;
217 
218     for (pos = 0; len > 0; pos += l, len -= l) {
219 	/* empty buffer */
220 	if (!c->buf || c->b_pos == c->b_len)
221 	    if (_http_fillbuf(c) < 1)
222 		break;
223 	l = c->b_len - c->b_pos;
224 	if (len < l)
225 	    l = len;
226 	bcopy(c->buf + c->b_pos, buf + pos, l);
227 	c->b_pos += l;
228     }
229 
230     if (!pos && c->error)
231 	return -1;
232     return pos;
233 }
234 
235 /*
236  * Write function
237  */
238 static int
239 _http_writefn(void *v, const char *buf, int len)
240 {
241     struct cookie *c = (struct cookie *)v;
242 
243     return write(c->fd, buf, len);
244 }
245 
246 /*
247  * Close function
248  */
249 static int
250 _http_closefn(void *v)
251 {
252     struct cookie *c = (struct cookie *)v;
253     int r;
254 
255     r = close(c->fd);
256     if (c->buf)
257 	free(c->buf);
258     free(c);
259     return r;
260 }
261 
262 /*
263  * Wrap a file descriptor up
264  */
265 static FILE *
266 _http_funopen(int fd)
267 {
268     struct cookie *c;
269     FILE *f;
270 
271     if ((c = calloc(1, sizeof *c)) == NULL) {
272 	_fetch_syserr();
273 	return NULL;
274     }
275     c->fd = fd;
276     if (!(f = funopen(c, _http_readfn, _http_writefn, NULL, _http_closefn))) {
277 	_fetch_syserr();
278 	free(c);
279 	return NULL;
280     }
281     return f;
282 }
283 
284 
285 /*****************************************************************************
286  * Helper functions for talking to the server and parsing its replies
287  */
288 
289 /* Header types */
290 typedef enum {
291     hdr_syserror = -2,
292     hdr_error = -1,
293     hdr_end = 0,
294     hdr_unknown = 1,
295     hdr_content_length,
296     hdr_content_range,
297     hdr_last_modified,
298     hdr_location,
299     hdr_transfer_encoding,
300     hdr_www_authenticate
301 } hdr_t;
302 
303 /* Names of interesting headers */
304 static struct {
305     hdr_t	 num;
306     const char	*name;
307 } hdr_names[] = {
308     { hdr_content_length,	"Content-Length" },
309     { hdr_content_range,	"Content-Range" },
310     { hdr_last_modified,	"Last-Modified" },
311     { hdr_location,		"Location" },
312     { hdr_transfer_encoding,	"Transfer-Encoding" },
313     { hdr_www_authenticate,	"WWW-Authenticate" },
314     { hdr_unknown,		NULL },
315 };
316 
317 static char	*reply_buf;
318 static size_t	 reply_size;
319 static size_t	 reply_length;
320 
321 /*
322  * Send a formatted line; optionally echo to terminal
323  */
324 static int
325 _http_cmd(int fd, const char *fmt, ...)
326 {
327     va_list ap;
328     size_t len;
329     char *msg;
330     int r;
331 
332     va_start(ap, fmt);
333     len = vasprintf(&msg, fmt, ap);
334     va_end(ap);
335 
336     if (msg == NULL) {
337 	errno = ENOMEM;
338 	_fetch_syserr();
339 	return -1;
340     }
341 
342     r = _fetch_putln(fd, msg, len);
343     free(msg);
344 
345     if (r == -1) {
346 	_fetch_syserr();
347 	return -1;
348     }
349 
350     return 0;
351 }
352 
353 /*
354  * Get and parse status line
355  */
356 static int
357 _http_get_reply(int fd)
358 {
359     char *p;
360 
361     if (_fetch_getln(fd, &reply_buf, &reply_size, &reply_length) == -1)
362 	return -1;
363     /*
364      * A valid status line looks like "HTTP/m.n xyz reason" where m
365      * and n are the major and minor protocol version numbers and xyz
366      * is the reply code.
367      * Unfortunately, there are servers out there (NCSA 1.5.1, to name
368      * just one) that do not send a version number, so we can't rely
369      * on finding one, but if we do, insist on it being 1.0 or 1.1.
370      * We don't care about the reason phrase.
371      */
372     if (strncmp(reply_buf, "HTTP", 4) != 0)
373 	return HTTP_PROTOCOL_ERROR;
374     p = reply_buf + 4;
375     if (*p == '/') {
376 	if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1'))
377 	    return HTTP_PROTOCOL_ERROR;
378 	p += 4;
379     }
380     if (*p != ' '
381 	|| !isdigit(p[1])
382 	|| !isdigit(p[2])
383 	|| !isdigit(p[3]))
384 	return HTTP_PROTOCOL_ERROR;
385 
386     return ((p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0'));
387 }
388 
389 /*
390  * Check a header; if the type matches the given string, return a
391  * pointer to the beginning of the value.
392  */
393 static const char *
394 _http_match(const char *str, const char *hdr)
395 {
396     while (*str && *hdr && tolower(*str++) == tolower(*hdr++))
397 	/* nothing */;
398     if (*str || *hdr != ':')
399 	return NULL;
400     while (*hdr && isspace(*++hdr))
401 	/* nothing */;
402     return hdr;
403 }
404 
405 /*
406  * Get the next header and return the appropriate symbolic code.
407  */
408 static hdr_t
409 _http_next_header(int fd, const char **p)
410 {
411     int i;
412 
413     if (_fetch_getln(fd, &reply_buf, &reply_size, &reply_length) == -1)
414 	return hdr_syserror;
415     while (reply_length && isspace(reply_buf[reply_length-1]))
416 	reply_length--;
417     reply_buf[reply_length] = 0;
418     if (reply_length == 0)
419 	return hdr_end;
420     /*
421      * We could check for malformed headers but we don't really care.
422      * A valid header starts with a token immediately followed by a
423      * colon; a token is any sequence of non-control, non-whitespace
424      * characters except "()<>@,;:\\\"{}".
425      */
426     for (i = 0; hdr_names[i].num != hdr_unknown; i++)
427 	if ((*p = _http_match(hdr_names[i].name, reply_buf)) != NULL)
428 	    return hdr_names[i].num;
429     return hdr_unknown;
430 }
431 
432 /*
433  * Parse a last-modified header
434  */
435 static int
436 _http_parse_mtime(const char *p, time_t *mtime)
437 {
438     char locale[64], *r;
439     struct tm tm;
440 
441     strncpy(locale, setlocale(LC_TIME, NULL), sizeof locale);
442     setlocale(LC_TIME, "C");
443     r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
444     /* XXX should add support for date-2 and date-3 */
445     setlocale(LC_TIME, locale);
446     if (r == NULL)
447 	return -1;
448     DEBUG(fprintf(stderr, "last modified: [\033[1m%04d-%02d-%02d "
449 		  "%02d:%02d:%02d\033[m]\n",
450 		  tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
451 		  tm.tm_hour, tm.tm_min, tm.tm_sec));
452     *mtime = timegm(&tm);
453     return 0;
454 }
455 
456 /*
457  * Parse a content-length header
458  */
459 static int
460 _http_parse_length(const char *p, off_t *length)
461 {
462     off_t len;
463 
464     for (len = 0; *p && isdigit(*p); ++p)
465 	len = len * 10 + (*p - '0');
466     if (*p)
467 	return -1;
468     DEBUG(fprintf(stderr, "content length: [\033[1m%lld\033[m]\n",
469 		  (long long)len));
470     *length = len;
471     return 0;
472 }
473 
474 /*
475  * Parse a content-range header
476  */
477 static int
478 _http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size)
479 {
480     off_t first, last, len;
481 
482     if (strncasecmp(p, "bytes ", 6) != 0)
483 	return -1;
484     for (first = 0, p += 6; *p && isdigit(*p); ++p)
485 	first = first * 10 + *p - '0';
486     if (*p != '-')
487 	return -1;
488     for (last = 0, ++p; *p && isdigit(*p); ++p)
489 	last = last * 10 + *p - '0';
490     if (first > last || *p != '/')
491 	return -1;
492     for (len = 0, ++p; *p && isdigit(*p); ++p)
493 	len = len * 10 + *p - '0';
494     if (*p || len < last - first + 1)
495 	return -1;
496     DEBUG(fprintf(stderr, "content range: [\033[1m%lld-%lld/%lld\033[m]\n",
497 		  (long long)first, (long long)last, (long long)len));
498     *offset = first;
499     *length = last - first + 1;
500     *size = len;
501     return 0;
502 }
503 
504 
505 /*****************************************************************************
506  * Helper functions for authorization
507  */
508 
509 /*
510  * Base64 encoding
511  */
512 static char *
513 _http_base64(char *src)
514 {
515     static const char base64[] =
516 	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
517 	"abcdefghijklmnopqrstuvwxyz"
518 	"0123456789+/";
519     char *str, *dst;
520     size_t l;
521     int t, r;
522 
523     l = strlen(src);
524     if ((str = malloc(((l + 2) / 3) * 4)) == NULL)
525 	return NULL;
526     dst = str;
527     r = 0;
528 
529     while (l >= 3) {
530 	t = (src[0] << 16) | (src[1] << 8) | src[2];
531 	dst[0] = base64[(t >> 18) & 0x3f];
532 	dst[1] = base64[(t >> 12) & 0x3f];
533 	dst[2] = base64[(t >> 6) & 0x3f];
534 	dst[3] = base64[(t >> 0) & 0x3f];
535 	src += 3; l -= 3;
536 	dst += 4; r += 4;
537     }
538 
539     switch (l) {
540     case 2:
541 	t = (src[0] << 16) | (src[1] << 8);
542 	dst[0] = base64[(t >> 18) & 0x3f];
543 	dst[1] = base64[(t >> 12) & 0x3f];
544 	dst[2] = base64[(t >> 6) & 0x3f];
545 	dst[3] = '=';
546 	dst += 4;
547 	r += 4;
548 	break;
549     case 1:
550 	t = src[0] << 16;
551 	dst[0] = base64[(t >> 18) & 0x3f];
552 	dst[1] = base64[(t >> 12) & 0x3f];
553 	dst[2] = dst[3] = '=';
554 	dst += 4;
555 	r += 4;
556 	break;
557     case 0:
558 	break;
559     }
560 
561     *dst = 0;
562     return str;
563 }
564 
565 /*
566  * Encode username and password
567  */
568 static int
569 _http_basic_auth(int fd, const char *hdr, const char *usr, const char *pwd)
570 {
571     char *upw, *auth;
572     int r;
573 
574     DEBUG(fprintf(stderr, "usr: [\033[1m%s\033[m]\n", usr));
575     DEBUG(fprintf(stderr, "pwd: [\033[1m%s\033[m]\n", pwd));
576     if (asprintf(&upw, "%s:%s", usr, pwd) == -1)
577 	return -1;
578     auth = _http_base64(upw);
579     free(upw);
580     if (auth == NULL)
581 	return -1;
582     r = _http_cmd(fd, "%s: Basic %s", hdr, auth);
583     free(auth);
584     return r;
585 }
586 
587 /*
588  * Send an authorization header
589  */
590 static int
591 _http_authorize(int fd, const char *hdr, const char *p)
592 {
593     /* basic authorization */
594     if (strncasecmp(p, "basic:", 6) == 0) {
595 	char *user, *pwd, *str;
596 	int r;
597 
598 	/* skip realm */
599 	for (p += 6; *p && *p != ':'; ++p)
600 	    /* nothing */ ;
601 	if (!*p || strchr(++p, ':') == NULL)
602 	    return -1;
603 	if ((str = strdup(p)) == NULL)
604 	    return -1; /* XXX */
605 	user = str;
606 	pwd = strchr(str, ':');
607 	*pwd++ = '\0';
608 	r = _http_basic_auth(fd, hdr, user, pwd);
609 	free(str);
610 	return r;
611     }
612     return -1;
613 }
614 
615 
616 /*****************************************************************************
617  * Helper functions for connecting to a server or proxy
618  */
619 
620 /*
621  * Connect to the correct HTTP server or proxy.
622  */
623 static int
624 _http_connect(struct url *URL, struct url *purl, const char *flags)
625 {
626     int verbose;
627     int af, fd;
628 
629 #ifdef INET6
630     af = AF_UNSPEC;
631 #else
632     af = AF_INET;
633 #endif
634 
635     verbose = CHECK_FLAG('v');
636     if (CHECK_FLAG('4'))
637 	af = AF_INET;
638 #ifdef INET6
639     else if (CHECK_FLAG('6'))
640 	af = AF_INET6;
641 #endif
642 
643     if (purl) {
644 	URL = purl;
645     } else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
646 	/* can't talk http to an ftp server */
647 	/* XXX should set an error code */
648 	return -1;
649     }
650 
651     if ((fd = _fetch_connect(URL->host, URL->port, af, verbose)) == -1)
652 	/* _fetch_connect() has already set an error code */
653 	return -1;
654     return fd;
655 }
656 
657 static struct url *
658 _http_get_proxy(void)
659 {
660     struct url *purl;
661     char *p;
662 
663     if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) &&
664 	(purl = fetchParseURL(p))) {
665 	if (!*purl->scheme)
666 	    strcpy(purl->scheme, SCHEME_HTTP);
667 	if (!purl->port)
668 	    purl->port = _fetch_default_proxy_port(purl->scheme);
669 	if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0)
670 	    return purl;
671 	fetchFreeURL(purl);
672     }
673     return NULL;
674 }
675 
676 
677 /*****************************************************************************
678  * Core
679  */
680 
681 /*
682  * Send a request and process the reply
683  */
684 FILE *
685 _http_request(struct url *URL, const char *op, struct url_stat *us,
686 	      struct url *purl, const char *flags)
687 {
688     struct url *url, *new;
689     int chunked, direct, need_auth, noredirect, verbose;
690     int code, fd, i, n;
691     off_t offset, clength, length, size;
692     time_t mtime;
693     const char *p;
694     FILE *f;
695     hdr_t h;
696     char *host;
697 #ifdef INET6
698     char hbuf[MAXHOSTNAMELEN + 1];
699 #endif
700 
701     direct = CHECK_FLAG('d');
702     noredirect = CHECK_FLAG('A');
703     verbose = CHECK_FLAG('v');
704 
705     if (direct && purl) {
706 	fetchFreeURL(purl);
707 	purl = NULL;
708     }
709 
710     /* try the provided URL first */
711     url = URL;
712 
713     /* if the A flag is set, we only get one try */
714     n = noredirect ? 1 : MAX_REDIRECT;
715     i = 0;
716 
717     need_auth = 0;
718     do {
719 	new = NULL;
720 	chunked = 0;
721 	offset = 0;
722 	clength = -1;
723 	length = -1;
724 	size = -1;
725 	mtime = 0;
726 
727 	/* check port */
728 	if (!url->port)
729 	    url->port = _fetch_default_port(url->scheme);
730 
731 	/* connect to server or proxy */
732 	if ((fd = _http_connect(url, purl, flags)) == -1)
733 	    goto ouch;
734 
735 	host = url->host;
736 #ifdef INET6
737 	if (strchr(url->host, ':')) {
738 	    snprintf(hbuf, sizeof(hbuf), "[%s]", url->host);
739 	    host = hbuf;
740 	}
741 #endif
742 
743 	/* send request */
744 	if (verbose)
745 	    _fetch_info("requesting %s://%s:%d%s",
746 			url->scheme, host, url->port, url->doc);
747 	if (purl) {
748 	    _http_cmd(fd, "%s %s://%s:%d%s HTTP/1.1",
749 		      op, url->scheme, host, url->port, url->doc);
750 	} else {
751 	    _http_cmd(fd, "%s %s HTTP/1.1",
752 		      op, url->doc);
753 	}
754 
755 	/* virtual host */
756 	if (url->port == _fetch_default_port(url->scheme))
757 	    _http_cmd(fd, "Host: %s", host);
758 	else
759 	    _http_cmd(fd, "Host: %s:%d", host, url->port);
760 
761 	/* proxy authorization */
762 	if (purl) {
763 	    if (*purl->user || *purl->pwd)
764 		_http_basic_auth(fd, "Proxy-Authorization",
765 				 purl->user, purl->pwd);
766 	    else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0')
767 		_http_authorize(fd, "Proxy-Authorization", p);
768 	}
769 
770 	/* server authorization */
771 	if (need_auth || *url->user || *url->pwd) {
772 	    if (*url->user || *url->pwd)
773 		_http_basic_auth(fd, "Authorization", url->user, url->pwd);
774 	    else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0')
775 		_http_authorize(fd, "Authorization", p);
776 	    else if (fetchAuthMethod && fetchAuthMethod(url) == 0) {
777 		_http_basic_auth(fd, "Authorization", url->user, url->pwd);
778 	    } else {
779 		_http_seterr(HTTP_NEED_AUTH);
780 		goto ouch;
781 	    }
782 	}
783 
784 	/* other headers */
785 	if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0')
786 	    _http_cmd(fd, "User-Agent: %s", p);
787 	else
788 	    _http_cmd(fd, "User-Agent: %s " _LIBFETCH_VER, __progname);
789 	if (url->offset)
790 	    _http_cmd(fd, "Range: bytes=%lld-", (long long)url->offset);
791 	_http_cmd(fd, "Connection: close");
792 	_http_cmd(fd, "");
793 
794 	/* get reply */
795 	switch ((code = _http_get_reply(fd))) {
796 	case HTTP_OK:
797 	case HTTP_PARTIAL:
798 	    /* fine */
799 	    break;
800 	case HTTP_MOVED_PERM:
801 	case HTTP_MOVED_TEMP:
802 	    /*
803 	     * Not so fine, but we still have to read the headers to
804 	     * get the new location.
805 	     */
806 	    break;
807 	case HTTP_NEED_AUTH:
808 	    if (need_auth) {
809 		/*
810 		 * We already sent out authorization code, so there's
811 		 * nothing more we can do.
812 		 */
813 		_http_seterr(code);
814 		goto ouch;
815 	    }
816 	    /* try again, but send the password this time */
817 	    if (verbose)
818 		_fetch_info("server requires authorization");
819 	    break;
820 	case HTTP_NEED_PROXY_AUTH:
821 	    /*
822 	     * If we're talking to a proxy, we already sent our proxy
823 	     * authorization code, so there's nothing more we can do.
824 	     */
825 	    _http_seterr(code);
826 	    goto ouch;
827 	case HTTP_PROTOCOL_ERROR:
828 	    /* fall through */
829 	case -1:
830 	    _fetch_syserr();
831 	    goto ouch;
832 	default:
833 	    _http_seterr(code);
834 	    goto ouch;
835 	}
836 
837 	/* get headers */
838 	do {
839 	    switch ((h = _http_next_header(fd, &p))) {
840 	    case hdr_syserror:
841 		_fetch_syserr();
842 		goto ouch;
843 	    case hdr_error:
844 		_http_seterr(HTTP_PROTOCOL_ERROR);
845 		goto ouch;
846 	    case hdr_content_length:
847 		_http_parse_length(p, &clength);
848 		break;
849 	    case hdr_content_range:
850 		_http_parse_range(p, &offset, &length, &size);
851 		break;
852 	    case hdr_last_modified:
853 		_http_parse_mtime(p, &mtime);
854 		break;
855 	    case hdr_location:
856 		if (!HTTP_REDIRECT(code))
857 		    break;
858 		if (new)
859 		    free(new);
860 		if (verbose)
861 		    _fetch_info("%d redirect to %s", code, p);
862 		if (*p == '/')
863 		    /* absolute path */
864 		    new = fetchMakeURL(url->scheme, url->host, url->port, p,
865 				       url->user, url->pwd);
866 		else
867 		    new = fetchParseURL(p);
868 		if (new == NULL) {
869 		    /* XXX should set an error code */
870 		    DEBUG(fprintf(stderr, "failed to parse new URL\n"));
871 		    goto ouch;
872 		}
873 		if (!*new->user && !*new->pwd) {
874 		    strcpy(new->user, url->user);
875 		    strcpy(new->pwd, url->pwd);
876 		}
877 		new->offset = url->offset;
878 		new->length = url->length;
879 		break;
880 	    case hdr_transfer_encoding:
881 		/* XXX weak test*/
882 		chunked = (strcasecmp(p, "chunked") == 0);
883 		break;
884 	    case hdr_www_authenticate:
885 		if (code != HTTP_NEED_AUTH)
886 		    break;
887 		/* if we were smarter, we'd check the method and realm */
888 		break;
889 	    case hdr_end:
890 		/* fall through */
891 	    case hdr_unknown:
892 		/* ignore */
893 		break;
894 	    }
895 	} while (h > hdr_end);
896 
897 	/* we have a hit */
898 	if (code == HTTP_OK || code == HTTP_PARTIAL)
899 	    break;
900 
901 	/* we need to provide authentication */
902 	if (code == HTTP_NEED_AUTH) {
903 	    need_auth = 1;
904 	    close(fd);
905 	    fd = -1;
906 	    continue;
907 	}
908 
909 	/* all other cases: we got a redirect */
910 	need_auth = 0;
911 	close(fd);
912 	fd = -1;
913 	if (!new) {
914 	    DEBUG(fprintf(stderr, "redirect with no new location\n"));
915 	    break;
916 	}
917 	if (url != URL)
918 	    fetchFreeURL(url);
919 	url = new;
920     } while (++i < n);
921 
922     /* we failed, or ran out of retries */
923     if (fd == -1) {
924 	_http_seterr(code);
925 	goto ouch;
926     }
927 
928     DEBUG(fprintf(stderr, "offset %lld, length %lld,"
929 		  " size %lld, clength %lld\n",
930 		  (long long)offset, (long long)length,
931 		  (long long)size, (long long)clength));
932 
933     /* check for inconsistencies */
934     if (clength != -1 && length != -1 && clength != length) {
935 	_http_seterr(HTTP_PROTOCOL_ERROR);
936 	goto ouch;
937     }
938     if (clength == -1)
939 	clength = length;
940     if (clength != -1)
941 	length = offset + clength;
942     if (length != -1 && size != -1 && length != size) {
943 	_http_seterr(HTTP_PROTOCOL_ERROR);
944 	goto ouch;
945     }
946     if (size == -1)
947 	size = length;
948 
949     /* fill in stats */
950     if (us) {
951 	us->size = size;
952 	us->atime = us->mtime = mtime;
953     }
954 
955     /* too far? */
956     if (offset > URL->offset) {
957 	_http_seterr(HTTP_PROTOCOL_ERROR);
958 	goto ouch;
959     }
960 
961     /* report back real offset and size */
962     URL->offset = offset;
963     URL->length = clength;
964 
965     /* wrap it up in a FILE */
966     if ((f = chunked ? _http_funopen(fd) : fdopen(fd, "r")) == NULL) {
967 	_fetch_syserr();
968 	goto ouch;
969     }
970 
971     if (url != URL)
972 	fetchFreeURL(url);
973     if (purl)
974 	fetchFreeURL(purl);
975 
976     return f;
977 
978  ouch:
979     if (url != URL)
980 	fetchFreeURL(url);
981     if (purl)
982 	fetchFreeURL(purl);
983     if (fd != -1)
984 	close(fd);
985     return NULL;
986 }
987 
988 
989 /*****************************************************************************
990  * Entry points
991  */
992 
993 /*
994  * Retrieve and stat a file by HTTP
995  */
996 FILE *
997 fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags)
998 {
999     return _http_request(URL, "GET", us, _http_get_proxy(), flags);
1000 }
1001 
1002 /*
1003  * Retrieve a file by HTTP
1004  */
1005 FILE *
1006 fetchGetHTTP(struct url *URL, const char *flags)
1007 {
1008     return fetchXGetHTTP(URL, NULL, flags);
1009 }
1010 
1011 /*
1012  * Store a file by HTTP
1013  */
1014 FILE *
1015 fetchPutHTTP(struct url *URL __unused, const char *flags __unused)
1016 {
1017     warnx("fetchPutHTTP(): not implemented");
1018     return NULL;
1019 }
1020 
1021 /*
1022  * Get an HTTP document's metadata
1023  */
1024 int
1025 fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags)
1026 {
1027     FILE *f;
1028 
1029     if ((f = _http_request(URL, "HEAD", us, _http_get_proxy(), flags)) == NULL)
1030 	return -1;
1031     fclose(f);
1032     return 0;
1033 }
1034 
1035 /*
1036  * List a directory
1037  */
1038 struct url_ent *
1039 fetchListHTTP(struct url *url __unused, const char *flags __unused)
1040 {
1041     warnx("fetchListHTTP(): not implemented");
1042     return NULL;
1043 }
1044