xref: /freebsd/lib/libfetch/http.c (revision ee2ea5ceafed78a5bd9810beb9e3ca927180c226)
1 /*-
2  * Copyright (c) 2000 Dag-Erling Co�dan Sm�rgrav
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 /*
33  * The following copyright applies to the base64 code:
34  *
35  *-
36  * Copyright 1997 Massachusetts Institute of Technology
37  *
38  * Permission to use, copy, modify, and distribute this software and
39  * its documentation for any purpose and without fee is hereby
40  * granted, provided that both the above copyright notice and this
41  * permission notice appear in all copies, that both the above
42  * copyright notice and this permission notice appear in all
43  * supporting documentation, and that the name of M.I.T. not be used
44  * in advertising or publicity pertaining to distribution of the
45  * software without specific, written prior permission.  M.I.T. makes
46  * no representations about the suitability of this software for any
47  * purpose.  It is provided "as is" without express or implied
48  * warranty.
49  *
50  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
51  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
52  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
53  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
54  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
56  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
57  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
58  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
59  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
60  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  */
63 
64 #include <sys/param.h>
65 #include <sys/socket.h>
66 
67 #include <ctype.h>
68 #include <err.h>
69 #include <errno.h>
70 #include <locale.h>
71 #include <netdb.h>
72 #include <stdarg.h>
73 #include <stdio.h>
74 #include <stdlib.h>
75 #include <string.h>
76 #include <time.h>
77 #include <unistd.h>
78 
79 #include "fetch.h"
80 #include "common.h"
81 #include "httperr.h"
82 
83 /* Maximum number of redirects to follow */
84 #define MAX_REDIRECT 5
85 
86 /* Symbolic names for reply codes we care about */
87 #define HTTP_OK			200
88 #define HTTP_PARTIAL		206
89 #define HTTP_MOVED_PERM		301
90 #define HTTP_MOVED_TEMP		302
91 #define HTTP_SEE_OTHER		303
92 #define HTTP_NEED_AUTH		401
93 #define HTTP_NEED_PROXY_AUTH	407
94 #define HTTP_PROTOCOL_ERROR	999
95 
96 #define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \
97 			    || (xyz) == HTTP_MOVED_TEMP \
98 			    || (xyz) == HTTP_SEE_OTHER)
99 
100 #define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599)
101 
102 
103 /*****************************************************************************
104  * I/O functions for decoding chunked streams
105  */
106 
107 struct cookie
108 {
109 	int		 fd;
110 	char		*buf;
111 	size_t		 b_size;
112 	ssize_t		 b_len;
113 	int		 b_pos;
114 	int		 eof;
115 	int		 error;
116 	size_t		 chunksize;
117 #ifndef NDEBUG
118 	size_t		 total;
119 #endif
120 };
121 
122 /*
123  * Get next chunk header
124  */
125 static int
126 _http_new_chunk(struct cookie *c)
127 {
128 	char *p;
129 
130 	if (_fetch_getln(c->fd, &c->buf, &c->b_size, &c->b_len) == -1)
131 		return (-1);
132 
133 	if (c->b_len < 2 || !ishexnumber(*c->buf))
134 		return (-1);
135 
136 	for (p = c->buf; !isspace(*p) && p < c->buf + c->b_len; ++p) {
137 		if (*p == ';')
138 			break;
139 		if (!ishexnumber(*p))
140 			return (-1);
141 		if (isdigit(*p)) {
142 			c->chunksize = c->chunksize * 16 +
143 			    *p - '0';
144 		} else {
145 			c->chunksize = c->chunksize * 16 +
146 			    10 + tolower(*p) - 'a';
147 		}
148 	}
149 
150 #ifndef NDEBUG
151 	if (fetchDebug) {
152 		c->total += c->chunksize;
153 		if (c->chunksize == 0)
154 			fprintf(stderr, "_http_fillbuf(): "
155 			    "end of last chunk\n");
156 		else
157 			fprintf(stderr, "_http_fillbuf(): "
158 			    "new chunk: %lu (%lu)\n",
159 			    (unsigned long)c->chunksize, (unsigned long)c->total);
160 	}
161 #endif
162 
163 	return (c->chunksize);
164 }
165 
166 /*
167  * Fill the input buffer, do chunk decoding on the fly
168  */
169 static int
170 _http_fillbuf(struct cookie *c)
171 {
172 	if (c->error)
173 		return (-1);
174 	if (c->eof)
175 		return (0);
176 
177 	if (c->chunksize == 0) {
178 		switch (_http_new_chunk(c)) {
179 		case -1:
180 			c->error = 1;
181 			return (-1);
182 		case 0:
183 			c->eof = 1;
184 			return (0);
185 		}
186 	}
187 
188 	if (c->b_size < c->chunksize) {
189 		char *tmp;
190 
191 		if ((tmp = realloc(c->buf, c->chunksize)) == NULL)
192 			return (-1);
193 		c->buf = tmp;
194 		c->b_size = c->chunksize;
195 	}
196 
197 	if ((c->b_len = read(c->fd, c->buf, c->chunksize)) == -1)
198 		return (-1);
199 	c->chunksize -= c->b_len;
200 
201 	if (c->chunksize == 0) {
202 		char endl;
203 		if (read(c->fd, &endl, 1) == -1 ||
204 		    read(c->fd, &endl, 1) == -1)
205 			return (-1);
206 	}
207 
208 	c->b_pos = 0;
209 
210 	return (c->b_len);
211 }
212 
213 /*
214  * Read function
215  */
216 static int
217 _http_readfn(void *v, char *buf, int len)
218 {
219 	struct cookie *c = (struct cookie *)v;
220 	int l, pos;
221 
222 	if (c->error)
223 		return (-1);
224 	if (c->eof)
225 		return (0);
226 
227 	for (pos = 0; len > 0; pos += l, len -= l) {
228 		/* empty buffer */
229 		if (!c->buf || c->b_pos == c->b_len)
230 			if (_http_fillbuf(c) < 1)
231 				break;
232 		l = c->b_len - c->b_pos;
233 		if (len < l)
234 			l = len;
235 		bcopy(c->buf + c->b_pos, buf + pos, l);
236 		c->b_pos += l;
237 	}
238 
239 	if (!pos && c->error)
240 		return (-1);
241 	return (pos);
242 }
243 
244 /*
245  * Write function
246  */
247 static int
248 _http_writefn(void *v, const char *buf, int len)
249 {
250 	struct cookie *c = (struct cookie *)v;
251 
252 	return (write(c->fd, buf, len));
253 }
254 
255 /*
256  * Close function
257  */
258 static int
259 _http_closefn(void *v)
260 {
261 	struct cookie *c = (struct cookie *)v;
262 	int r;
263 
264 	r = close(c->fd);
265 	if (c->buf)
266 		free(c->buf);
267 	free(c);
268 	return (r);
269 }
270 
271 /*
272  * Wrap a file descriptor up
273  */
274 static FILE *
275 _http_funopen(int fd)
276 {
277 	struct cookie *c;
278 	FILE *f;
279 
280 	if ((c = calloc(1, sizeof *c)) == NULL) {
281 		_fetch_syserr();
282 		return (NULL);
283 	}
284 	c->fd = fd;
285 	f = funopen(c, _http_readfn, _http_writefn, NULL, _http_closefn);
286 	if (f == NULL) {
287 		_fetch_syserr();
288 		free(c);
289 		return (NULL);
290 	}
291 	return (f);
292 }
293 
294 
295 /*****************************************************************************
296  * Helper functions for talking to the server and parsing its replies
297  */
298 
299 /* Header types */
300 typedef enum {
301 	hdr_syserror = -2,
302 	hdr_error = -1,
303 	hdr_end = 0,
304 	hdr_unknown = 1,
305 	hdr_content_length,
306 	hdr_content_range,
307 	hdr_last_modified,
308 	hdr_location,
309 	hdr_transfer_encoding,
310 	hdr_www_authenticate
311 } hdr_t;
312 
313 /* Names of interesting headers */
314 static struct {
315 	hdr_t		 num;
316 	const char	*name;
317 } hdr_names[] = {
318 	{ hdr_content_length,		"Content-Length" },
319 	{ hdr_content_range,		"Content-Range" },
320 	{ hdr_last_modified,		"Last-Modified" },
321 	{ hdr_location,			"Location" },
322 	{ hdr_transfer_encoding,	"Transfer-Encoding" },
323 	{ hdr_www_authenticate,		"WWW-Authenticate" },
324 	{ hdr_unknown,			NULL },
325 };
326 
327 static char		*reply_buf;
328 static size_t		 reply_size;
329 static size_t		 reply_length;
330 
331 /*
332  * Send a formatted line; optionally echo to terminal
333  */
334 static int
335 _http_cmd(int fd, const char *fmt, ...)
336 {
337 	va_list ap;
338 	size_t len;
339 	char *msg;
340 	int r;
341 
342 	va_start(ap, fmt);
343 	len = vasprintf(&msg, fmt, ap);
344 	va_end(ap);
345 
346 	if (msg == NULL) {
347 		errno = ENOMEM;
348 		_fetch_syserr();
349 		return (-1);
350 	}
351 
352 	r = _fetch_putln(fd, msg, len);
353 	free(msg);
354 
355 	if (r == -1) {
356 		_fetch_syserr();
357 		return (-1);
358 	}
359 
360 	return (0);
361 }
362 
363 /*
364  * Get and parse status line
365  */
366 static int
367 _http_get_reply(int fd)
368 {
369 	char *p;
370 
371 	if (_fetch_getln(fd, &reply_buf, &reply_size, &reply_length) == -1)
372 		return (-1);
373 	/*
374 	 * A valid status line looks like "HTTP/m.n xyz reason" where m
375 	 * and n are the major and minor protocol version numbers and xyz
376 	 * is the reply code.
377 	 * Unfortunately, there are servers out there (NCSA 1.5.1, to name
378 	 * just one) that do not send a version number, so we can't rely
379 	 * on finding one, but if we do, insist on it being 1.0 or 1.1.
380 	 * We don't care about the reason phrase.
381 	 */
382 	if (strncmp(reply_buf, "HTTP", 4) != 0)
383 		return (HTTP_PROTOCOL_ERROR);
384 	p = reply_buf + 4;
385 	if (*p == '/') {
386 		if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1'))
387 			return (HTTP_PROTOCOL_ERROR);
388 		p += 4;
389 	}
390 	if (*p != ' ' || !isdigit(p[1]) || !isdigit(p[2]) || !isdigit(p[3]))
391 		return (HTTP_PROTOCOL_ERROR);
392 
393 	return ((p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0'));
394 }
395 
396 /*
397  * Check a header; if the type matches the given string, return a pointer
398  * to the beginning of the value.
399  */
400 static const char *
401 _http_match(const char *str, const char *hdr)
402 {
403 	while (*str && *hdr && tolower(*str++) == tolower(*hdr++))
404 		/* nothing */;
405 	if (*str || *hdr != ':')
406 		return (NULL);
407 	while (*hdr && isspace(*++hdr))
408 		/* nothing */;
409 	return (hdr);
410 }
411 
412 /*
413  * Get the next header and return the appropriate symbolic code.
414  */
415 static hdr_t
416 _http_next_header(int fd, const char **p)
417 {
418 	int i;
419 
420 	if (_fetch_getln(fd, &reply_buf, &reply_size, &reply_length) == -1)
421 		return (hdr_syserror);
422 	while (reply_length && isspace(reply_buf[reply_length-1]))
423 		reply_length--;
424 	reply_buf[reply_length] = 0;
425 	if (reply_length == 0)
426 	return (hdr_end);
427 	/*
428 	 * We could check for malformed headers but we don't really care.
429 	 * A valid header starts with a token immediately followed by a
430 	 * colon; a token is any sequence of non-control, non-whitespace
431 	 * characters except "()<>@,;:\\\"{}".
432 	 */
433 	for (i = 0; hdr_names[i].num != hdr_unknown; i++)
434 		if ((*p = _http_match(hdr_names[i].name, reply_buf)) != NULL)
435 			return (hdr_names[i].num);
436 	return (hdr_unknown);
437 }
438 
439 /*
440  * Parse a last-modified header
441  */
442 static int
443 _http_parse_mtime(const char *p, time_t *mtime)
444 {
445 	char locale[64], *r;
446 	struct tm tm;
447 
448 	strncpy(locale, setlocale(LC_TIME, NULL), sizeof locale);
449 	setlocale(LC_TIME, "C");
450 	r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
451 	/* XXX should add support for date-2 and date-3 */
452 	setlocale(LC_TIME, locale);
453 	if (r == NULL)
454 		return (-1);
455 	DEBUG(fprintf(stderr, "last modified: [%04d-%02d-%02d "
456 		  "%02d:%02d:%02d]\n",
457 		  tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
458 		  tm.tm_hour, tm.tm_min, tm.tm_sec));
459 	*mtime = timegm(&tm);
460 	return (0);
461 }
462 
463 /*
464  * Parse a content-length header
465  */
466 static int
467 _http_parse_length(const char *p, off_t *length)
468 {
469 	off_t len;
470 
471 	for (len = 0; *p && isdigit(*p); ++p)
472 		len = len * 10 + (*p - '0');
473 	if (*p)
474 		return (-1);
475 	DEBUG(fprintf(stderr, "content length: [%lld]\n",
476 	    (long long)len));
477 	*length = len;
478 	return (0);
479 }
480 
481 /*
482  * Parse a content-range header
483  */
484 static int
485 _http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size)
486 {
487 	off_t first, last, len;
488 
489 	if (strncasecmp(p, "bytes ", 6) != 0)
490 		return (-1);
491 	for (first = 0, p += 6; *p && isdigit(*p); ++p)
492 		first = first * 10 + *p - '0';
493 	if (*p != '-')
494 		return (-1);
495 	for (last = 0, ++p; *p && isdigit(*p); ++p)
496 		last = last * 10 + *p - '0';
497 	if (first > last || *p != '/')
498 		return (-1);
499 	for (len = 0, ++p; *p && isdigit(*p); ++p)
500 		len = len * 10 + *p - '0';
501 	if (*p || len < last - first + 1)
502 		return (-1);
503 	DEBUG(fprintf(stderr, "content range: [%lld-%lld/%lld]\n",
504 	    (long long)first, (long long)last, (long long)len));
505 	*offset = first;
506 	*length = last - first + 1;
507 	*size = len;
508 	return (0);
509 }
510 
511 
512 /*****************************************************************************
513  * Helper functions for authorization
514  */
515 
516 /*
517  * Base64 encoding
518  */
519 static char *
520 _http_base64(const char *src)
521 {
522 	static const char base64[] =
523 	    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
524 	    "abcdefghijklmnopqrstuvwxyz"
525 	    "0123456789+/";
526 	char *str, *dst;
527 	size_t l;
528 	int t, r;
529 
530 	l = strlen(src);
531 	if ((str = malloc(((l + 2) / 3) * 4)) == NULL)
532 		return (NULL);
533 	dst = str;
534 	r = 0;
535 
536 	while (l >= 3) {
537 		t = (src[0] << 16) | (src[1] << 8) | src[2];
538 		dst[0] = base64[(t >> 18) & 0x3f];
539 		dst[1] = base64[(t >> 12) & 0x3f];
540 		dst[2] = base64[(t >> 6) & 0x3f];
541 		dst[3] = base64[(t >> 0) & 0x3f];
542 		src += 3; l -= 3;
543 		dst += 4; r += 4;
544 	}
545 
546 	switch (l) {
547 	case 2:
548 		t = (src[0] << 16) | (src[1] << 8);
549 		dst[0] = base64[(t >> 18) & 0x3f];
550 		dst[1] = base64[(t >> 12) & 0x3f];
551 		dst[2] = base64[(t >> 6) & 0x3f];
552 		dst[3] = '=';
553 		dst += 4;
554 		r += 4;
555 		break;
556 	case 1:
557 		t = src[0] << 16;
558 		dst[0] = base64[(t >> 18) & 0x3f];
559 		dst[1] = base64[(t >> 12) & 0x3f];
560 		dst[2] = dst[3] = '=';
561 		dst += 4;
562 		r += 4;
563 		break;
564 	case 0:
565 		break;
566 	}
567 
568 	*dst = 0;
569 	return (str);
570 }
571 
572 /*
573  * Encode username and password
574  */
575 static int
576 _http_basic_auth(int fd, const char *hdr, const char *usr, const char *pwd)
577 {
578 	char *upw, *auth;
579 	int r;
580 
581 	DEBUG(fprintf(stderr, "usr: [%s]\n", usr));
582 	DEBUG(fprintf(stderr, "pwd: [%s]\n", pwd));
583 	if (asprintf(&upw, "%s:%s", usr, pwd) == -1)
584 		return (-1);
585 	auth = _http_base64(upw);
586 	free(upw);
587 	if (auth == NULL)
588 		return (-1);
589 	r = _http_cmd(fd, "%s: Basic %s", hdr, auth);
590 	free(auth);
591 	return (r);
592 }
593 
594 /*
595  * Send an authorization header
596  */
597 static int
598 _http_authorize(int fd, const char *hdr, const char *p)
599 {
600 	/* basic authorization */
601 	if (strncasecmp(p, "basic:", 6) == 0) {
602 		char *user, *pwd, *str;
603 		int r;
604 
605 		/* skip realm */
606 		for (p += 6; *p && *p != ':'; ++p)
607 			/* nothing */ ;
608 		if (!*p || strchr(++p, ':') == NULL)
609 			return (-1);
610 		if ((str = strdup(p)) == NULL)
611 			return (-1); /* XXX */
612 		user = str;
613 		pwd = strchr(str, ':');
614 		*pwd++ = '\0';
615 		r = _http_basic_auth(fd, hdr, user, pwd);
616 		free(str);
617 		return (r);
618 	}
619 	return (-1);
620 }
621 
622 
623 /*****************************************************************************
624  * Helper functions for connecting to a server or proxy
625  */
626 
627 /*
628  * Connect to the correct HTTP server or proxy.
629  */
630 static int
631 _http_connect(struct url *URL, struct url *purl, const char *flags)
632 {
633 	int verbose;
634 	int af, fd;
635 
636 #ifdef INET6
637 	af = AF_UNSPEC;
638 #else
639 	af = AF_INET;
640 #endif
641 
642 	verbose = CHECK_FLAG('v');
643 	if (CHECK_FLAG('4'))
644 		af = AF_INET;
645 #ifdef INET6
646 	else if (CHECK_FLAG('6'))
647 		af = AF_INET6;
648 #endif
649 
650 	if (purl) {
651 		URL = purl;
652 	} else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
653 		/* can't talk http to an ftp server */
654 		/* XXX should set an error code */
655 		return (-1);
656 	}
657 
658 	if ((fd = _fetch_connect(URL->host, URL->port, af, verbose)) == -1)
659 		/* _fetch_connect() has already set an error code */
660 		return (-1);
661 	return (fd);
662 }
663 
664 static struct url *
665 _http_get_proxy(void)
666 {
667 	struct url *purl;
668 	char *p;
669 
670 	if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) &&
671 	    (purl = fetchParseURL(p))) {
672 		if (!*purl->scheme)
673 			strcpy(purl->scheme, SCHEME_HTTP);
674 		if (!purl->port)
675 			purl->port = _fetch_default_proxy_port(purl->scheme);
676 		if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0)
677 			return (purl);
678 		fetchFreeURL(purl);
679 	}
680 	return (NULL);
681 }
682 
683 static void
684 _http_print_html(FILE *out, FILE *in)
685 {
686 	size_t len;
687 	char *line, *p, *q;
688 	int comment, tag;
689 
690 	comment = tag = 0;
691 	while ((line = fgetln(in, &len)) != NULL) {
692 		while (len && isspace(line[len - 1]))
693 			--len;
694 		for (p = q = line; q < line + len; ++q) {
695 			if (comment && *q == '-') {
696 				if (q + 2 < line + len &&
697 				    strcmp(q, "-->") == 0) {
698 					tag = comment = 0;
699 					q += 2;
700 				}
701 			} else if (tag && !comment && *q == '>') {
702 				p = q + 1;
703 				tag = 0;
704 			} else if (!tag && *q == '<') {
705 				if (q > p)
706 					fwrite(p, q - p, 1, out);
707 				tag = 1;
708 				if (q + 3 < line + len &&
709 				    strcmp(q, "<!--") == 0) {
710 					comment = 1;
711 					q += 3;
712 				}
713 			}
714 		}
715 		if (!tag && q > p)
716 			fwrite(p, q - p, 1, out);
717 		fputc('\n', out);
718 	}
719 }
720 
721 
722 /*****************************************************************************
723  * Core
724  */
725 
726 /*
727  * Send a request and process the reply
728  */
729 FILE *
730 _http_request(struct url *URL, const char *op, struct url_stat *us,
731     struct url *purl, const char *flags)
732 {
733 	struct url *url, *new;
734 	int chunked, direct, need_auth, noredirect, verbose;
735 	int code, fd, i, n;
736 	off_t offset, clength, length, size;
737 	time_t mtime;
738 	const char *p;
739 	FILE *f;
740 	hdr_t h;
741 	char *host;
742 #ifdef INET6
743 	char hbuf[MAXHOSTNAMELEN + 1];
744 #endif
745 
746 	direct = CHECK_FLAG('d');
747 	noredirect = CHECK_FLAG('A');
748 	verbose = CHECK_FLAG('v');
749 
750 	if (direct && purl) {
751 		fetchFreeURL(purl);
752 		purl = NULL;
753 	}
754 
755 	/* try the provided URL first */
756 	url = URL;
757 
758 	/* if the A flag is set, we only get one try */
759 	n = noredirect ? 1 : MAX_REDIRECT;
760 	i = 0;
761 
762 	need_auth = 0;
763 	do {
764 		new = NULL;
765 		chunked = 0;
766 		offset = 0;
767 		clength = -1;
768 		length = -1;
769 		size = -1;
770 		mtime = 0;
771 
772 		/* check port */
773 		if (!url->port)
774 			url->port = _fetch_default_port(url->scheme);
775 
776 		/* were we redirected to an FTP URL? */
777 		if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) {
778 			if (strcmp(op, "GET") == 0)
779 				return (_ftp_request(url, "RETR", us, purl, flags));
780 			else if (strcmp(op, "HEAD") == 0)
781 				return (_ftp_request(url, "STAT", us, purl, flags));
782 		}
783 
784 		/* connect to server or proxy */
785 		if ((fd = _http_connect(url, purl, flags)) == -1)
786 			goto ouch;
787 
788 		host = url->host;
789 #ifdef INET6
790 		if (strchr(url->host, ':')) {
791 			snprintf(hbuf, sizeof(hbuf), "[%s]", url->host);
792 			host = hbuf;
793 		}
794 #endif
795 
796 		/* send request */
797 		if (verbose)
798 			_fetch_info("requesting %s://%s:%d%s",
799 			    url->scheme, host, url->port, url->doc);
800 		if (purl) {
801 			_http_cmd(fd, "%s %s://%s:%d%s HTTP/1.1",
802 			    op, url->scheme, host, url->port, url->doc);
803 		} else {
804 			_http_cmd(fd, "%s %s HTTP/1.1",
805 			    op, url->doc);
806 		}
807 
808 		/* virtual host */
809 		if (url->port == _fetch_default_port(url->scheme))
810 			_http_cmd(fd, "Host: %s", host);
811 		else
812 			_http_cmd(fd, "Host: %s:%d", host, url->port);
813 
814 		/* proxy authorization */
815 		if (purl) {
816 			if (*purl->user || *purl->pwd)
817 				_http_basic_auth(fd, "Proxy-Authorization",
818 				    purl->user, purl->pwd);
819 			else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0')
820 				_http_authorize(fd, "Proxy-Authorization", p);
821 		}
822 
823 		/* server authorization */
824 		if (need_auth || *url->user || *url->pwd) {
825 			if (*url->user || *url->pwd)
826 				_http_basic_auth(fd, "Authorization", url->user, url->pwd);
827 			else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0')
828 				_http_authorize(fd, "Authorization", p);
829 			else if (fetchAuthMethod && fetchAuthMethod(url) == 0) {
830 				_http_basic_auth(fd, "Authorization", url->user, url->pwd);
831 			} else {
832 				_http_seterr(HTTP_NEED_AUTH);
833 				goto ouch;
834 			}
835 		}
836 
837 		/* other headers */
838 		if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0')
839 			_http_cmd(fd, "User-Agent: %s", p);
840 		else
841 			_http_cmd(fd, "User-Agent: %s " _LIBFETCH_VER, _getprogname());
842 		if (url->offset)
843 			_http_cmd(fd, "Range: bytes=%lld-", (long long)url->offset);
844 		_http_cmd(fd, "Connection: close");
845 		_http_cmd(fd, "");
846 
847 		/* get reply */
848 		switch ((code = _http_get_reply(fd))) {
849 		case HTTP_OK:
850 		case HTTP_PARTIAL:
851 			/* fine */
852 			break;
853 		case HTTP_MOVED_PERM:
854 		case HTTP_MOVED_TEMP:
855 		case HTTP_SEE_OTHER:
856 			/*
857 			 * Not so fine, but we still have to read the headers to
858 			 * get the new location.
859 			 */
860 			break;
861 		case HTTP_NEED_AUTH:
862 			if (need_auth) {
863 				/*
864 				 * We already sent out authorization code, so there's
865 				 * nothing more we can do.
866 				 */
867 				_http_seterr(code);
868 				goto ouch;
869 			}
870 			/* try again, but send the password this time */
871 			if (verbose)
872 				_fetch_info("server requires authorization");
873 			break;
874 		case HTTP_NEED_PROXY_AUTH:
875 			/*
876 			 * If we're talking to a proxy, we already sent our proxy
877 			 * authorization code, so there's nothing more we can do.
878 			 */
879 			_http_seterr(code);
880 			goto ouch;
881 		case HTTP_PROTOCOL_ERROR:
882 			/* fall through */
883 		case -1:
884 			_fetch_syserr();
885 			goto ouch;
886 		default:
887 			_http_seterr(code);
888 			if (!verbose)
889 				goto ouch;
890 			/* fall through so we can get the full error message */
891 		}
892 
893 		/* get headers */
894 		do {
895 			switch ((h = _http_next_header(fd, &p))) {
896 			case hdr_syserror:
897 				_fetch_syserr();
898 				goto ouch;
899 			case hdr_error:
900 				_http_seterr(HTTP_PROTOCOL_ERROR);
901 				goto ouch;
902 			case hdr_content_length:
903 				_http_parse_length(p, &clength);
904 				break;
905 			case hdr_content_range:
906 				_http_parse_range(p, &offset, &length, &size);
907 				break;
908 			case hdr_last_modified:
909 				_http_parse_mtime(p, &mtime);
910 				break;
911 			case hdr_location:
912 				if (!HTTP_REDIRECT(code))
913 					break;
914 				if (new)
915 					free(new);
916 				if (verbose)
917 					_fetch_info("%d redirect to %s", code, p);
918 				if (*p == '/')
919 					/* absolute path */
920 					new = fetchMakeURL(url->scheme, url->host, url->port, p,
921 					    url->user, url->pwd);
922 				else
923 					new = fetchParseURL(p);
924 				if (new == NULL) {
925 					/* XXX should set an error code */
926 					DEBUG(fprintf(stderr, "failed to parse new URL\n"));
927 					goto ouch;
928 				}
929 				if (!*new->user && !*new->pwd) {
930 					strcpy(new->user, url->user);
931 					strcpy(new->pwd, url->pwd);
932 				}
933 				new->offset = url->offset;
934 				new->length = url->length;
935 				break;
936 			case hdr_transfer_encoding:
937 				/* XXX weak test*/
938 				chunked = (strcasecmp(p, "chunked") == 0);
939 				break;
940 			case hdr_www_authenticate:
941 				if (code != HTTP_NEED_AUTH)
942 					break;
943 				/* if we were smarter, we'd check the method and realm */
944 				break;
945 			case hdr_end:
946 				/* fall through */
947 			case hdr_unknown:
948 				/* ignore */
949 				break;
950 			}
951 		} while (h > hdr_end);
952 
953 		/* we have a hit or an error */
954 		if (code == HTTP_OK || code == HTTP_PARTIAL || HTTP_ERROR(code))
955 			break;
956 
957 		/* we need to provide authentication */
958 		if (code == HTTP_NEED_AUTH) {
959 			need_auth = 1;
960 			close(fd);
961 			fd = -1;
962 			continue;
963 		}
964 
965 		/* all other cases: we got a redirect */
966 		need_auth = 0;
967 		close(fd);
968 		fd = -1;
969 		if (!new) {
970 			DEBUG(fprintf(stderr, "redirect with no new location\n"));
971 			break;
972 		}
973 		if (url != URL)
974 			fetchFreeURL(url);
975 		url = new;
976 	} while (++i < n);
977 
978 	/* we failed, or ran out of retries */
979 	if (fd == -1) {
980 		_http_seterr(code);
981 		goto ouch;
982 	}
983 
984 	DEBUG(fprintf(stderr, "offset %lld, length %lld,"
985 		  " size %lld, clength %lld\n",
986 		  (long long)offset, (long long)length,
987 		  (long long)size, (long long)clength));
988 
989 	/* check for inconsistencies */
990 	if (clength != -1 && length != -1 && clength != length) {
991 		_http_seterr(HTTP_PROTOCOL_ERROR);
992 		goto ouch;
993 	}
994 	if (clength == -1)
995 		clength = length;
996 	if (clength != -1)
997 		length = offset + clength;
998 	if (length != -1 && size != -1 && length != size) {
999 		_http_seterr(HTTP_PROTOCOL_ERROR);
1000 		goto ouch;
1001 	}
1002 	if (size == -1)
1003 		size = length;
1004 
1005 	/* fill in stats */
1006 	if (us) {
1007 		us->size = size;
1008 		us->atime = us->mtime = mtime;
1009 	}
1010 
1011 	/* too far? */
1012 	if (offset > URL->offset) {
1013 		_http_seterr(HTTP_PROTOCOL_ERROR);
1014 		goto ouch;
1015 	}
1016 
1017 	/* report back real offset and size */
1018 	URL->offset = offset;
1019 	URL->length = clength;
1020 
1021 	/* wrap it up in a FILE */
1022 	if ((f = chunked ? _http_funopen(fd) : fdopen(fd, "r")) == NULL) {
1023 		_fetch_syserr();
1024 		goto ouch;
1025 	}
1026 
1027 	if (url != URL)
1028 		fetchFreeURL(url);
1029 	if (purl)
1030 		fetchFreeURL(purl);
1031 
1032 	if (HTTP_ERROR(code)) {
1033 		_http_print_html(stderr, f);
1034 		fclose(f);
1035 		f = NULL;
1036 	}
1037 
1038 	return (f);
1039 
1040 ouch:
1041 	if (url != URL)
1042 		fetchFreeURL(url);
1043 	if (purl)
1044 		fetchFreeURL(purl);
1045 	if (fd != -1)
1046 		close(fd);
1047 	return (NULL);
1048 }
1049 
1050 
1051 /*****************************************************************************
1052  * Entry points
1053  */
1054 
1055 /*
1056  * Retrieve and stat a file by HTTP
1057  */
1058 FILE *
1059 fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags)
1060 {
1061 	return (_http_request(URL, "GET", us, _http_get_proxy(), flags));
1062 }
1063 
1064 /*
1065  * Retrieve a file by HTTP
1066  */
1067 FILE *
1068 fetchGetHTTP(struct url *URL, const char *flags)
1069 {
1070 	return (fetchXGetHTTP(URL, NULL, flags));
1071 }
1072 
1073 /*
1074  * Store a file by HTTP
1075  */
1076 FILE *
1077 fetchPutHTTP(struct url *URL __unused, const char *flags __unused)
1078 {
1079 	warnx("fetchPutHTTP(): not implemented");
1080 	return (NULL);
1081 }
1082 
1083 /*
1084  * Get an HTTP document's metadata
1085  */
1086 int
1087 fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags)
1088 {
1089 	FILE *f;
1090 
1091 	if ((f = _http_request(URL, "HEAD", us, _http_get_proxy(), flags)) == NULL)
1092 		return (-1);
1093 	fclose(f);
1094 	return (0);
1095 }
1096 
1097 /*
1098  * List a directory
1099  */
1100 struct url_ent *
1101 fetchListHTTP(struct url *url __unused, const char *flags __unused)
1102 {
1103 	warnx("fetchListHTTP(): not implemented");
1104 	return (NULL);
1105 }
1106