xref: /freebsd/lib/libfetch/http.c (revision c97925ad4e79dd82554760f514089a3e683147be)
1 /*-
2  * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /*
32  * The base64 code in this file is based on code from MIT fetch, which
33  * has the following copyright and license:
34  *
35  *-
36  * Copyright 1997 Massachusetts Institute of Technology
37  *
38  * Permission to use, copy, modify, and distribute this software and
39  * its documentation for any purpose and without fee is hereby
40  * granted, provided that both the above copyright notice and this
41  * permission notice appear in all copies, that both the above
42  * copyright notice and this permission notice appear in all
43  * supporting documentation, and that the name of M.I.T. not be used
44  * in advertising or publicity pertaining to distribution of the
45  * software without specific, written prior permission.	 M.I.T. makes
46  * no representations about the suitability of this software for any
47  * purpose.  It is provided "as is" without express or implied
48  * warranty.
49  *
50  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
51  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
52  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
53  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
54  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
56  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
57  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
58  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
59  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
60  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE. */
62 
63 #include <sys/param.h>
64 
65 #include <err.h>
66 #include <ctype.h>
67 #include <locale.h>
68 #include <netdb.h>
69 #include <stdarg.h>
70 #include <stdio.h>
71 #include <stdlib.h>
72 #include <string.h>
73 #include <time.h>
74 #include <unistd.h>
75 
76 #include "fetch.h"
77 #include "common.h"
78 #include "httperr.h"
79 
80 extern char *__progname;
81 
82 #define ENDL "\r\n"
83 
84 #define HTTP_OK		200
85 #define HTTP_PARTIAL	206
86 
87 struct cookie
88 {
89     FILE *real_f;
90 #define ENC_NONE 0
91 #define ENC_CHUNKED 1
92     int encoding;			/* 1 = chunked, 0 = none */
93 #define HTTPCTYPELEN 59
94     char content_type[HTTPCTYPELEN+1];
95     char *buf;
96     int b_cur, eof;
97     unsigned b_len, chunksize;
98 };
99 
100 /*
101  * Send a formatted line; optionally echo to terminal
102  */
103 static int
104 _http_cmd(FILE *f, char *fmt, ...)
105 {
106     va_list ap;
107 
108     va_start(ap, fmt);
109     vfprintf(f, fmt, ap);
110 #ifndef NDEBUG
111     fprintf(stderr, "\033[1m>>> ");
112     vfprintf(stderr, fmt, ap);
113     fprintf(stderr, "\033[m");
114 #endif
115     va_end(ap);
116 
117     return 0; /* XXX */
118 }
119 
120 /*
121  * Fill the input buffer, do chunk decoding on the fly
122  */
123 static char *
124 _http_fillbuf(struct cookie *c)
125 {
126     char *ln;
127     unsigned int len;
128 
129     if (c->eof)
130 	return NULL;
131 
132     if (c->encoding == ENC_NONE) {
133 	c->buf = fgetln(c->real_f, &(c->b_len));
134 	c->b_cur = 0;
135     } else if (c->encoding == ENC_CHUNKED) {
136 	if (c->chunksize == 0) {
137 	    ln = fgetln(c->real_f, &len);
138 	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: "
139 			  "%*.*s\033[m\n", (int)len-2, (int)len-2, ln));
140 	    sscanf(ln, "%x", &(c->chunksize));
141 	    if (!c->chunksize) {
142 		DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
143 			      "end of last chunk\033[m\n"));
144 		c->eof = 1;
145 		return NULL;
146 	    }
147 	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
148 			  "new chunk: %X\033[m\n", c->chunksize));
149 	}
150 	c->buf = fgetln(c->real_f, &(c->b_len));
151 	if (c->b_len > c->chunksize)
152 	    c->b_len = c->chunksize;
153 	c->chunksize -= c->b_len;
154 	c->b_cur = 0;
155     }
156     else return NULL; /* unknown encoding */
157     return c->buf;
158 }
159 
160 /*
161  * Read function
162  */
163 static int
164 _http_readfn(struct cookie *c, char *buf, int len)
165 {
166     int l, pos = 0;
167     while (len) {
168 	/* empty buffer */
169 	if (!c->buf || (c->b_cur == c->b_len))
170 	    if (!_http_fillbuf(c))
171 		break;
172 
173 	l = c->b_len - c->b_cur;
174 	if (len < l) l = len;
175 	memcpy(buf + pos, c->buf + c->b_cur, l);
176 	c->b_cur += l;
177 	pos += l;
178 	len -= l;
179     }
180 
181     if (ferror(c->real_f))
182 	return -1;
183     else return pos;
184 }
185 
186 /*
187  * Write function
188  */
189 static int
190 _http_writefn(struct cookie *c, const char *buf, int len)
191 {
192     size_t r = fwrite(buf, 1, (size_t)len, c->real_f);
193     return r ? r : -1;
194 }
195 
196 /*
197  * Close function
198  */
199 static int
200 _http_closefn(struct cookie *c)
201 {
202     int r = fclose(c->real_f);
203     free(c);
204     return (r == EOF) ? -1 : 0;
205 }
206 
207 /*
208  * Extract content type from cookie
209  */
210 char *
211 fetchContentType(FILE *f)
212 {
213     /*
214      * We have no way of making sure this really *is* one of our cookies,
215      * so just check for a null pointer and hope for the best.
216      */
217     return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL;
218 }
219 
220 /*
221  * Base64 encoding
222  */
223 int
224 _http_base64(char *dst, char *src, int l)
225 {
226     static const char base64[] =
227 	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
228 	"abcdefghijklmnopqrstuvwxyz"
229 	"0123456789+/";
230     int t, r = 0;
231 
232     while (l >= 3) {
233 	t = (src[0] << 16) | (src[1] << 8) | src[2];
234 	dst[0] = base64[(t >> 18) & 0x3f];
235 	dst[1] = base64[(t >> 12) & 0x3f];
236 	dst[2] = base64[(t >> 6) & 0x3f];
237 	dst[3] = base64[(t >> 0) & 0x3f];
238 	src += 3; l -= 3;
239 	dst += 4; r += 4;
240     }
241 
242     switch (l) {
243     case 2:
244 	t = (src[0] << 16) | (src[1] << 8);
245 	dst[0] = base64[(t >> 18) & 0x3f];
246 	dst[1] = base64[(t >> 12) & 0x3f];
247 	dst[2] = base64[(t >> 6) & 0x3f];
248 	dst[3] = '=';
249 	dst += 4;
250 	r += 4;
251 	break;
252     case 1:
253 	t = src[0] << 16;
254 	dst[0] = base64[(t >> 18) & 0x3f];
255 	dst[1] = base64[(t >> 12) & 0x3f];
256 	dst[2] = dst[3] = '=';
257 	dst += 4;
258 	r += 4;
259 	break;
260     case 0:
261 	break;
262     }
263 
264     *dst = 0;
265     return r;
266 }
267 
268 /*
269  * Encode username and password
270  */
271 char *
272 _http_auth(char *usr, char *pwd)
273 {
274     int len, lu, lp;
275     char *str, *s;
276 
277     lu = strlen(usr);
278     lp = strlen(pwd);
279 
280     len = (lu * 4 + 2) / 3	/* user name, round up */
281 	+ 1			/* colon */
282 	+ (lp * 4 + 2) / 3	/* password, round up */
283 	+ 1;			/* null */
284 
285     if ((s = str = (char *)malloc(len)) == NULL)
286 	return NULL;
287 
288     s += _http_base64(s, usr, lu);
289     *s++ = ':';
290     s += _http_base64(s, pwd, lp);
291     *s = 0;
292 
293     return str;
294 }
295 
296 /*
297  * Connect to server or proxy
298  */
299 FILE *
300 _http_connect(struct url *URL, char *flags)
301 {
302     int direct, sd = -1, verbose;
303     size_t len;
304     char *px;
305     FILE *f;
306 
307     direct = (flags && strchr(flags, 'd'));
308     verbose = (flags && strchr(flags, 'v'));
309 
310     /* check port */
311     if (!URL->port) {
312 	struct servent *se;
313 
314 	if (strcasecmp(URL->scheme, "ftp") == 0)
315 	    if ((se = getservbyname("ftp", "tcp")) != NULL)
316 		URL->port = ntohs(se->s_port);
317 	    else
318 		URL->port = 21;
319 	else
320 	    if ((se = getservbyname("http", "tcp")) != NULL)
321 		URL->port = ntohs(se->s_port);
322 	    else
323 		URL->port = 80;
324     }
325 
326     /* attempt to connect to proxy server */
327     if (!direct && (px = getenv("HTTP_PROXY")) != NULL) {
328 	char host[MAXHOSTNAMELEN];
329 	int port = 0;
330 
331 	/* measure length */
332 	len = strcspn(px, ":");
333 
334 	/* get port (XXX atoi is a little too tolerant perhaps?) */
335 	if (px[len] == ':') {
336 	    if (strspn(px+len+1, "0123456789") != strlen(px+len+1)
337 		|| strlen(px+len+1) > 5) {
338 		/* XXX we should emit some kind of warning */
339 	    }
340 	    port = atoi(px+len+1);
341 	    if (port < 1 || port > 65535) {
342 		/* XXX we should emit some kind of warning */
343 	    }
344 	}
345 	if (!port) {
346 #if 0
347 	    /*
348 	     * commented out, since there is currently no service name
349 	     * for HTTP proxies
350 	     */
351 	    struct servent *se;
352 
353 	    if ((se = getservbyname("xxxx", "tcp")) != NULL)
354 		port = ntohs(se->s_port);
355 	    else
356 #endif
357 		port = 3128;
358 	}
359 
360 	/* get host name */
361 	if (len >= MAXHOSTNAMELEN)
362 	    len = MAXHOSTNAMELEN - 1;
363 	strncpy(host, px, len);
364 	host[len] = 0;
365 
366 	/* connect */
367 	sd = _fetch_connect(host, port, verbose);
368     }
369 
370     /* if no proxy is configured or could be contacted, try direct */
371     if (sd == -1) {
372 	if (strcasecmp(URL->scheme, "ftp") == 0)
373 	    goto ouch;
374 	if ((sd = _fetch_connect(URL->host, URL->port, verbose)) == -1)
375 	    goto ouch;
376     }
377 
378     /* reopen as stream */
379     if ((f = fdopen(sd, "r+")) == NULL)
380 	goto ouch;
381 
382     return f;
383 
384 ouch:
385     if (sd >= 0)
386 	close(sd);
387     _http_seterr(999); /* XXX do this properly RSN */
388     return NULL;
389 }
390 
391 /*
392  * Send a HEAD or GET request
393  */
394 int
395 _http_request(FILE *f, char *op, struct url *URL, char *flags)
396 {
397     int e, verbose;
398     char *ln, *p;
399     size_t len;
400 
401     verbose = (flags && strchr(flags, 'v'));
402 
403     /* send request (proxies require absolute form, so use that) */
404     if (verbose)
405 	_fetch_info("requesting %s://%s:%d%s",
406 		    URL->scheme, URL->host, URL->port, URL->doc);
407     _http_cmd(f, "%s %s://%s:%d%s HTTP/1.1" ENDL,
408 	      op, URL->scheme, URL->host, URL->port, URL->doc);
409 
410     /* start sending headers away */
411     if (URL->user[0] || URL->pwd[0]) {
412 	char *auth_str = _http_auth(URL->user, URL->pwd);
413 	if (!auth_str)
414 	    return 999; /* XXX wrong */
415 	_http_cmd(f, "Authorization: Basic %s" ENDL, auth_str);
416 	free(auth_str);
417     }
418     _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port);
419     _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname);
420     if (URL->offset)
421 	_http_cmd(f, "Range: bytes=%lld-" ENDL, URL->offset);
422     _http_cmd(f, "Connection: close" ENDL ENDL);
423 
424     /* get response */
425     if ((ln = fgetln(f, &len)) == NULL)
426 	return 999;
427     DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n",
428 		  (int)len-2, (int)len-2, ln));
429 
430     /* we can't use strchr() and friends since ln isn't NUL-terminated */
431     p = ln;
432     while ((p < ln + len) && !isspace(*p))
433 	p++;
434     while ((p < ln + len) && !isdigit(*p))
435 	p++;
436     if (!isdigit(*p))
437 	return 999;
438 
439     e = atoi(p);
440     DEBUG(fprintf(stderr, "code:     [\033[1m%d\033[m]\n", e));
441     return e;
442 }
443 
444 /*
445  * Check a header line
446  */
447 char *
448 _http_match(char *str, char *hdr)
449 {
450     while (*str && *hdr && tolower(*str++) == tolower(*hdr++))
451 	/* nothing */;
452     if (*str || *hdr != ':')
453 	return NULL;
454     while (*hdr && isspace(*++hdr))
455 	/* nothing */;
456     return hdr;
457 }
458 
459 /*
460  * Retrieve a file by HTTP
461  */
462 FILE *
463 fetchGetHTTP(struct url *URL, char *flags)
464 {
465     int e, enc = ENC_NONE, i;
466     struct cookie *c;
467     char *ln, *p, *q;
468     FILE *f, *cf;
469     size_t len;
470     off_t pos = 0;
471 
472     /* allocate cookie */
473     if ((c = calloc(1, sizeof *c)) == NULL)
474 	return NULL;
475 
476     /* connect */
477     if ((f = _http_connect(URL, flags)) == NULL) {
478 	free(c);
479 	return NULL;
480     }
481     c->real_f = f;
482 
483     e = _http_request(f, "GET", URL, flags);
484 
485     /* add code to handle redirects later */
486     if (e != (URL->offset ? HTTP_PARTIAL : HTTP_OK)) {
487 	_http_seterr(e);
488 	goto fouch;
489     }
490 
491     /* browse through header */
492     while (1) {
493 	if ((ln = fgetln(f, &len)) == NULL)
494 	    goto fouch;
495 	if ((ln[0] == '\r') || (ln[0] == '\n'))
496 	    break;
497 	while (isspace(ln[len-1]))
498 	    --len;
499 	ln[len] = '\0'; /* XXX */
500 	DEBUG(fprintf(stderr, "header:	 [\033[1m%s\033[m]\n", ln));
501 	if ((p = _http_match("Transfer-Encoding", ln)) != NULL) {
502 	    for (q = p; *q && !isspace(*q); q++)
503 		/* VOID */ ;
504 	    *q = 0;
505 	    if (strcasecmp(p, "chunked") == 0)
506 		enc = ENC_CHUNKED;
507 	    DEBUG(fprintf(stderr, "transfer encoding:  [\033[1m%s\033[m]\n", p));
508 	} else if ((p = _http_match("Content-Type", ln)) != NULL) {
509 	    for (i = 0; *p && i < HTTPCTYPELEN; p++, i++)
510 		    c->content_type[i] = *p;
511 	    do c->content_type[i--] = 0; while (isspace(c->content_type[i]));
512 	    DEBUG(fprintf(stderr, "content type: [\033[1m%s\033[m]\n",
513 			  c->content_type));
514 	} else if ((p = _http_match("Content-Range", ln)) != NULL) {
515 	    if (strncasecmp(p, "bytes ", 6) != 0)
516 		goto fouch;
517 	    p += 6;
518 	    while (*p && isdigit(*p))
519 		pos = pos * 10 + (*p++ - '0');
520 	    /* XXX wouldn't hurt to be slightly more paranoid here */
521 	    DEBUG(fprintf(stderr, "content range: [\033[1m%lld-\033[m]\n", pos));
522 	    if (pos > URL->offset)
523 		goto fouch;
524 	}
525     }
526 
527     /* only body remains */
528     c->encoding = enc;
529     cf = funopen(c,
530 		 (int (*)(void *, char *, int))_http_readfn,
531 		 (int (*)(void *, const char *, int))_http_writefn,
532 		 (fpos_t (*)(void *, fpos_t, int))NULL,
533 		 (int (*)(void *))_http_closefn);
534     if (cf == NULL)
535 	goto fouch;
536 
537     while (pos < URL->offset)
538 	if (fgetc(cf) == EOF)
539 	    goto cfouch;
540 
541     return cf;
542 
543 fouch:
544     fclose(f);
545     free(c);
546     _http_seterr(999); /* XXX do this properly RSN */
547     return NULL;
548 cfouch:
549     fclose(cf);
550     _http_seterr(999); /* XXX do this properly RSN */
551     return NULL;
552 }
553 
554 FILE *
555 fetchPutHTTP(struct url *URL, char *flags)
556 {
557     warnx("fetchPutHTTP(): not implemented");
558     return NULL;
559 }
560 
561 /*
562  * Get an HTTP document's metadata
563  */
564 int
565 fetchStatHTTP(struct url *URL, struct url_stat *us, char *flags)
566 {
567     int e;
568     size_t len;
569     char *ln, *p;
570     FILE *f;
571 
572     us->size = -1;
573     us->atime = us->mtime = 0;
574 
575     /* connect */
576     if ((f = _http_connect(URL, flags)) == NULL)
577 	return -1;
578 
579     if ((e = _http_request(f, "HEAD", URL, flags)) != HTTP_OK) {
580 	_http_seterr(e);
581 	goto ouch;
582     }
583 
584     while (1) {
585 	if ((ln = fgetln(f, &len)) == NULL)
586 	    goto fouch;
587 	if ((ln[0] == '\r') || (ln[0] == '\n'))
588 	    break;
589 	while (isspace(ln[len-1]))
590 	    --len;
591 	ln[len] = '\0'; /* XXX */
592 	DEBUG(fprintf(stderr, "header:	 [\033[1m%s\033[m]\n", ln));
593 	if ((p = _http_match("Last-Modified", ln)) != NULL) {
594 	    struct tm tm;
595 	    char locale[64];
596 
597 	    strncpy(locale, setlocale(LC_TIME, NULL), sizeof locale);
598 	    setlocale(LC_TIME, "C");
599 	    strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
600 	    /* XXX should add support for date-2 and date-3 */
601 	    setlocale(LC_TIME, locale);
602 	    us->atime = us->mtime = timegm(&tm);
603 	    DEBUG(fprintf(stderr, "last modified: [\033[1m%04d-%02d-%02d "
604 			  "%02d:%02d:%02d\033[m]\n",
605 			  tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
606 			  tm.tm_hour, tm.tm_min, tm.tm_sec));
607 	} else if ((p = _http_match("Content-Length", ln)) != NULL) {
608 	    us->size = 0;
609 	    while (*p && isdigit(*p))
610 		us->size = us->size * 10 + (*p++ - '0');
611 	    DEBUG(fprintf(stderr, "content length: [\033[1m%lld\033[m]\n", us->size));
612 	}
613     }
614 
615     fclose(f);
616     return 0;
617  ouch:
618     _http_seterr(999); /* XXX do this properly RSN */
619  fouch:
620     fclose(f);
621     return -1;
622 }
623 
624 /*
625  * List a directory
626  */
627 struct url_ent *
628 fetchListHTTP(struct url *url, char *flags)
629 {
630     warnx("fetchListHTTP(): not implemented");
631     return NULL;
632 }
633