xref: /freebsd/lib/libfetch/http.c (revision 6efb30c8d0f5d094d8fcbf11f38a2731078f8735)
1 /*-
2  * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /*
32  * The base64 code in this file is based on code from MIT fetch, which
33  * has the following copyright and license:
34  *
35  *-
36  * Copyright 1997 Massachusetts Institute of Technology
37  *
38  * Permission to use, copy, modify, and distribute this software and
39  * its documentation for any purpose and without fee is hereby
40  * granted, provided that both the above copyright notice and this
41  * permission notice appear in all copies, that both the above
42  * copyright notice and this permission notice appear in all
43  * supporting documentation, and that the name of M.I.T. not be used
44  * in advertising or publicity pertaining to distribution of the
45  * software without specific, written prior permission.	 M.I.T. makes
46  * no representations about the suitability of this software for any
47  * purpose.  It is provided "as is" without express or implied
48  * warranty.
49  *
50  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
51  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
52  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
53  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
54  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
56  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
57  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
58  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
59  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
60  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE. */
62 
63 #include <sys/param.h>
64 
65 #include <err.h>
66 #include <ctype.h>
67 #include <locale.h>
68 #include <netdb.h>
69 #include <stdarg.h>
70 #include <stdio.h>
71 #include <stdlib.h>
72 #include <string.h>
73 #include <time.h>
74 #include <unistd.h>
75 
76 #include "fetch.h"
77 #include "common.h"
78 #include "httperr.h"
79 
80 extern char *__progname;
81 
82 #define ENDL "\r\n"
83 
84 #define HTTP_OK		200
85 #define HTTP_PARTIAL	206
86 
87 struct cookie
88 {
89     FILE *real_f;
90 #define ENC_NONE 0
91 #define ENC_CHUNKED 1
92     int encoding;			/* 1 = chunked, 0 = none */
93 #define HTTPCTYPELEN 59
94     char content_type[HTTPCTYPELEN+1];
95     char *buf;
96     int b_cur, eof;
97     unsigned b_len, chunksize;
98 };
99 
100 /*
101  * Send a formatted line; optionally echo to terminal
102  */
103 static int
104 _http_cmd(FILE *f, char *fmt, ...)
105 {
106     va_list ap;
107 
108     va_start(ap, fmt);
109     vfprintf(f, fmt, ap);
110 #ifndef NDEBUG
111     fprintf(stderr, "\033[1m>>> ");
112     vfprintf(stderr, fmt, ap);
113     fprintf(stderr, "\033[m");
114 #endif
115     va_end(ap);
116 
117     return 0; /* XXX */
118 }
119 
120 /*
121  * Fill the input buffer, do chunk decoding on the fly
122  */
123 static char *
124 _http_fillbuf(struct cookie *c)
125 {
126     char *ln;
127     unsigned int len;
128 
129     if (c->eof)
130 	return NULL;
131 
132     if (c->encoding == ENC_NONE) {
133 	c->buf = fgetln(c->real_f, &(c->b_len));
134 	c->b_cur = 0;
135     } else if (c->encoding == ENC_CHUNKED) {
136 	if (c->chunksize == 0) {
137 	    ln = fgetln(c->real_f, &len);
138 	    if (len <= 2)
139 		return NULL;
140 	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: "
141 			  "%*.*s\033[m\n", (int)len-2, (int)len-2, ln));
142 	    sscanf(ln, "%x", &(c->chunksize));
143 	    if (!c->chunksize) {
144 		DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
145 			      "end of last chunk\033[m\n"));
146 		c->eof = 1;
147 		return NULL;
148 	    }
149 	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
150 			  "new chunk: %X\033[m\n", c->chunksize));
151 	}
152 	c->buf = fgetln(c->real_f, &(c->b_len));
153 	if (c->b_len > c->chunksize)
154 	    c->b_len = c->chunksize;
155 	c->chunksize -= c->b_len;
156 	c->b_cur = 0;
157     }
158     else return NULL; /* unknown encoding */
159     return c->buf;
160 }
161 
162 /*
163  * Read function
164  */
165 static int
166 _http_readfn(struct cookie *c, char *buf, int len)
167 {
168     int l, pos = 0;
169     while (len) {
170 	/* empty buffer */
171 	if (!c->buf || (c->b_cur == c->b_len))
172 	    if (!_http_fillbuf(c))
173 		break;
174 
175 	l = c->b_len - c->b_cur;
176 	if (len < l) l = len;
177 	memcpy(buf + pos, c->buf + c->b_cur, l);
178 	c->b_cur += l;
179 	pos += l;
180 	len -= l;
181     }
182 
183     if (ferror(c->real_f))
184 	return -1;
185     else return pos;
186 }
187 
188 /*
189  * Write function
190  */
191 static int
192 _http_writefn(struct cookie *c, const char *buf, int len)
193 {
194     size_t r = fwrite(buf, 1, (size_t)len, c->real_f);
195     return r ? r : -1;
196 }
197 
198 /*
199  * Close function
200  */
201 static int
202 _http_closefn(struct cookie *c)
203 {
204     int r = fclose(c->real_f);
205     free(c);
206     return (r == EOF) ? -1 : 0;
207 }
208 
209 /*
210  * Extract content type from cookie
211  */
212 char *
213 fetchContentType(FILE *f)
214 {
215     /*
216      * We have no way of making sure this really *is* one of our cookies,
217      * so just check for a null pointer and hope for the best.
218      */
219     return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL;
220 }
221 
222 /*
223  * Base64 encoding
224  */
225 int
226 _http_base64(char *dst, char *src, int l)
227 {
228     static const char base64[] =
229 	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
230 	"abcdefghijklmnopqrstuvwxyz"
231 	"0123456789+/";
232     int t, r = 0;
233 
234     while (l >= 3) {
235 	t = (src[0] << 16) | (src[1] << 8) | src[2];
236 	dst[0] = base64[(t >> 18) & 0x3f];
237 	dst[1] = base64[(t >> 12) & 0x3f];
238 	dst[2] = base64[(t >> 6) & 0x3f];
239 	dst[3] = base64[(t >> 0) & 0x3f];
240 	src += 3; l -= 3;
241 	dst += 4; r += 4;
242     }
243 
244     switch (l) {
245     case 2:
246 	t = (src[0] << 16) | (src[1] << 8);
247 	dst[0] = base64[(t >> 18) & 0x3f];
248 	dst[1] = base64[(t >> 12) & 0x3f];
249 	dst[2] = base64[(t >> 6) & 0x3f];
250 	dst[3] = '=';
251 	dst += 4;
252 	r += 4;
253 	break;
254     case 1:
255 	t = src[0] << 16;
256 	dst[0] = base64[(t >> 18) & 0x3f];
257 	dst[1] = base64[(t >> 12) & 0x3f];
258 	dst[2] = dst[3] = '=';
259 	dst += 4;
260 	r += 4;
261 	break;
262     case 0:
263 	break;
264     }
265 
266     *dst = 0;
267     return r;
268 }
269 
270 /*
271  * Encode username and password
272  */
273 char *
274 _http_auth(char *usr, char *pwd)
275 {
276     int len, lu, lp;
277     char *str, *s;
278 
279     lu = strlen(usr);
280     lp = strlen(pwd);
281 
282     len = (lu * 4 + 2) / 3	/* user name, round up */
283 	+ 1			/* colon */
284 	+ (lp * 4 + 2) / 3	/* password, round up */
285 	+ 1;			/* null */
286 
287     if ((s = str = (char *)malloc(len)) == NULL)
288 	return NULL;
289 
290     s += _http_base64(s, usr, lu);
291     *s++ = ':';
292     s += _http_base64(s, pwd, lp);
293     *s = 0;
294 
295     return str;
296 }
297 
298 /*
299  * Connect to server or proxy
300  */
301 FILE *
302 _http_connect(struct url *URL, char *flags)
303 {
304     int direct, sd = -1, verbose;
305     size_t len;
306     char *px;
307     FILE *f;
308 
309     direct = (flags && strchr(flags, 'd'));
310     verbose = (flags && strchr(flags, 'v'));
311 
312     /* check port */
313     if (!URL->port) {
314 	struct servent *se;
315 
316 	if (strcasecmp(URL->scheme, "ftp") == 0)
317 	    if ((se = getservbyname("ftp", "tcp")) != NULL)
318 		URL->port = ntohs(se->s_port);
319 	    else
320 		URL->port = 21;
321 	else
322 	    if ((se = getservbyname("http", "tcp")) != NULL)
323 		URL->port = ntohs(se->s_port);
324 	    else
325 		URL->port = 80;
326     }
327 
328     /* attempt to connect to proxy server */
329     if (!direct && (px = getenv("HTTP_PROXY")) != NULL) {
330 	char host[MAXHOSTNAMELEN];
331 	int port = 0;
332 
333 	/* measure length */
334 	len = strcspn(px, ":");
335 
336 	/* get port (XXX atoi is a little too tolerant perhaps?) */
337 	if (px[len] == ':') {
338 	    if (strspn(px+len+1, "0123456789") != strlen(px+len+1)
339 		|| strlen(px+len+1) > 5) {
340 		/* XXX we should emit some kind of warning */
341 	    }
342 	    port = atoi(px+len+1);
343 	    if (port < 1 || port > 65535) {
344 		/* XXX we should emit some kind of warning */
345 	    }
346 	}
347 	if (!port) {
348 #if 0
349 	    /*
350 	     * commented out, since there is currently no service name
351 	     * for HTTP proxies
352 	     */
353 	    struct servent *se;
354 
355 	    if ((se = getservbyname("xxxx", "tcp")) != NULL)
356 		port = ntohs(se->s_port);
357 	    else
358 #endif
359 		port = 3128;
360 	}
361 
362 	/* get host name */
363 	if (len >= MAXHOSTNAMELEN)
364 	    len = MAXHOSTNAMELEN - 1;
365 	strncpy(host, px, len);
366 	host[len] = 0;
367 
368 	/* connect */
369 	sd = _fetch_connect(host, port, verbose);
370     }
371 
372     /* if no proxy is configured or could be contacted, try direct */
373     if (sd == -1) {
374 	if (strcasecmp(URL->scheme, "ftp") == 0)
375 	    goto ouch;
376 	if ((sd = _fetch_connect(URL->host, URL->port, verbose)) == -1)
377 	    goto ouch;
378     }
379 
380     /* reopen as stream */
381     if ((f = fdopen(sd, "r+")) == NULL)
382 	goto ouch;
383 
384     return f;
385 
386 ouch:
387     if (sd >= 0)
388 	close(sd);
389     _http_seterr(999); /* XXX do this properly RSN */
390     return NULL;
391 }
392 
393 /*
394  * Send a HEAD or GET request
395  */
396 int
397 _http_request(FILE *f, char *op, struct url *URL, char *flags)
398 {
399     int e, verbose;
400     char *ln, *p;
401     size_t len;
402 
403     verbose = (flags && strchr(flags, 'v'));
404 
405     /* send request (proxies require absolute form, so use that) */
406     if (verbose)
407 	_fetch_info("requesting %s://%s:%d%s",
408 		    URL->scheme, URL->host, URL->port, URL->doc);
409     _http_cmd(f, "%s %s://%s:%d%s HTTP/1.1" ENDL,
410 	      op, URL->scheme, URL->host, URL->port, URL->doc);
411 
412     /* start sending headers away */
413     if (URL->user[0] || URL->pwd[0]) {
414 	char *auth_str = _http_auth(URL->user, URL->pwd);
415 	if (!auth_str)
416 	    return 999; /* XXX wrong */
417 	_http_cmd(f, "Authorization: Basic %s" ENDL, auth_str);
418 	free(auth_str);
419     }
420     _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port);
421     _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname);
422     if (URL->offset)
423 	_http_cmd(f, "Range: bytes=%lld-" ENDL, URL->offset);
424     _http_cmd(f, "Connection: close" ENDL ENDL);
425 
426     /* get response */
427     if ((ln = fgetln(f, &len)) == NULL)
428 	return 999;
429     DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n",
430 		  (int)len-2, (int)len-2, ln));
431 
432     /* we can't use strchr() and friends since ln isn't NUL-terminated */
433     p = ln;
434     while ((p < ln + len) && !isspace(*p))
435 	p++;
436     while ((p < ln + len) && !isdigit(*p))
437 	p++;
438     if (!isdigit(*p))
439 	return 999;
440 
441     e = atoi(p);
442     DEBUG(fprintf(stderr, "code:     [\033[1m%d\033[m]\n", e));
443     return e;
444 }
445 
446 /*
447  * Check a header line
448  */
449 char *
450 _http_match(char *str, char *hdr)
451 {
452     while (*str && *hdr && tolower(*str++) == tolower(*hdr++))
453 	/* nothing */;
454     if (*str || *hdr != ':')
455 	return NULL;
456     while (*hdr && isspace(*++hdr))
457 	/* nothing */;
458     return hdr;
459 }
460 
461 /*
462  * Retrieve a file by HTTP
463  */
464 FILE *
465 fetchGetHTTP(struct url *URL, char *flags)
466 {
467     int e, enc = ENC_NONE, i;
468     struct cookie *c;
469     char *ln, *p, *q;
470     FILE *f, *cf;
471     size_t len;
472     off_t pos = 0;
473 
474     /* allocate cookie */
475     if ((c = calloc(1, sizeof *c)) == NULL)
476 	return NULL;
477 
478     /* connect */
479     if ((f = _http_connect(URL, flags)) == NULL) {
480 	free(c);
481 	return NULL;
482     }
483     c->real_f = f;
484 
485     e = _http_request(f, "GET", URL, flags);
486 
487     /* add code to handle redirects later */
488     if (e != (URL->offset ? HTTP_PARTIAL : HTTP_OK)) {
489 	_http_seterr(e);
490 	goto fouch;
491     }
492 
493     /* browse through header */
494     while (1) {
495 	if ((ln = fgetln(f, &len)) == NULL)
496 	    goto fouch;
497 	if ((ln[0] == '\r') || (ln[0] == '\n'))
498 	    break;
499 	while (isspace(ln[len-1]))
500 	    --len;
501 	ln[len] = '\0'; /* XXX */
502 	DEBUG(fprintf(stderr, "header:	 [\033[1m%s\033[m]\n", ln));
503 	if ((p = _http_match("Transfer-Encoding", ln)) != NULL) {
504 	    for (q = p; *q && !isspace(*q); q++)
505 		/* VOID */ ;
506 	    *q = 0;
507 	    if (strcasecmp(p, "chunked") == 0)
508 		enc = ENC_CHUNKED;
509 	    DEBUG(fprintf(stderr, "transfer encoding:  [\033[1m%s\033[m]\n", p));
510 	} else if ((p = _http_match("Content-Type", ln)) != NULL) {
511 	    for (i = 0; *p && i < HTTPCTYPELEN; p++, i++)
512 		    c->content_type[i] = *p;
513 	    do c->content_type[i--] = 0; while (isspace(c->content_type[i]));
514 	    DEBUG(fprintf(stderr, "content type: [\033[1m%s\033[m]\n",
515 			  c->content_type));
516 	} else if ((p = _http_match("Content-Range", ln)) != NULL) {
517 	    if (strncasecmp(p, "bytes ", 6) != 0)
518 		goto fouch;
519 	    p += 6;
520 	    while (*p && isdigit(*p))
521 		pos = pos * 10 + (*p++ - '0');
522 	    /* XXX wouldn't hurt to be slightly more paranoid here */
523 	    DEBUG(fprintf(stderr, "content range: [\033[1m%lld-\033[m]\n", pos));
524 	    if (pos > URL->offset)
525 		goto fouch;
526 	}
527     }
528 
529     /* only body remains */
530     c->encoding = enc;
531     cf = funopen(c,
532 		 (int (*)(void *, char *, int))_http_readfn,
533 		 (int (*)(void *, const char *, int))_http_writefn,
534 		 (fpos_t (*)(void *, fpos_t, int))NULL,
535 		 (int (*)(void *))_http_closefn);
536     if (cf == NULL)
537 	goto fouch;
538 
539     while (pos < URL->offset)
540 	if (fgetc(cf) == EOF)
541 	    goto cfouch;
542 
543     return cf;
544 
545 fouch:
546     fclose(f);
547     free(c);
548     _http_seterr(999); /* XXX do this properly RSN */
549     return NULL;
550 cfouch:
551     fclose(cf);
552     _http_seterr(999); /* XXX do this properly RSN */
553     return NULL;
554 }
555 
556 FILE *
557 fetchPutHTTP(struct url *URL, char *flags)
558 {
559     warnx("fetchPutHTTP(): not implemented");
560     return NULL;
561 }
562 
563 /*
564  * Get an HTTP document's metadata
565  */
566 int
567 fetchStatHTTP(struct url *URL, struct url_stat *us, char *flags)
568 {
569     int e;
570     size_t len;
571     char *ln, *p;
572     FILE *f;
573 
574     us->size = -1;
575     us->atime = us->mtime = 0;
576 
577     /* connect */
578     if ((f = _http_connect(URL, flags)) == NULL)
579 	return -1;
580 
581     if ((e = _http_request(f, "HEAD", URL, flags)) != HTTP_OK) {
582 	_http_seterr(e);
583 	goto ouch;
584     }
585 
586     while (1) {
587 	if ((ln = fgetln(f, &len)) == NULL)
588 	    goto fouch;
589 	if ((ln[0] == '\r') || (ln[0] == '\n'))
590 	    break;
591 	while (isspace(ln[len-1]))
592 	    --len;
593 	ln[len] = '\0'; /* XXX */
594 	DEBUG(fprintf(stderr, "header:	 [\033[1m%s\033[m]\n", ln));
595 	if ((p = _http_match("Last-Modified", ln)) != NULL) {
596 	    struct tm tm;
597 	    char locale[64];
598 
599 	    strncpy(locale, setlocale(LC_TIME, NULL), sizeof locale);
600 	    setlocale(LC_TIME, "C");
601 	    strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
602 	    /* XXX should add support for date-2 and date-3 */
603 	    setlocale(LC_TIME, locale);
604 	    us->atime = us->mtime = timegm(&tm);
605 	    DEBUG(fprintf(stderr, "last modified: [\033[1m%04d-%02d-%02d "
606 			  "%02d:%02d:%02d\033[m]\n",
607 			  tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
608 			  tm.tm_hour, tm.tm_min, tm.tm_sec));
609 	} else if ((p = _http_match("Content-Length", ln)) != NULL) {
610 	    us->size = 0;
611 	    while (*p && isdigit(*p))
612 		us->size = us->size * 10 + (*p++ - '0');
613 	    DEBUG(fprintf(stderr, "content length: [\033[1m%lld\033[m]\n", us->size));
614 	}
615     }
616 
617     fclose(f);
618     return 0;
619  ouch:
620     _http_seterr(999); /* XXX do this properly RSN */
621  fouch:
622     fclose(f);
623     return -1;
624 }
625 
626 /*
627  * List a directory
628  */
629 struct url_ent *
630 fetchListHTTP(struct url *url, char *flags)
631 {
632     warnx("fetchListHTTP(): not implemented");
633     return NULL;
634 }
635