xref: /freebsd/lib/libfetch/http.c (revision b95b56c7a06bd268299c6d3b1104e09831e600dc)
1 /*-
2  * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /*
32  * The base64 code in this file is based on code from MIT fetch, which
33  * has the following copyright and license:
34  *
35  *-
36  * Copyright 1997 Massachusetts Institute of Technology
37  *
38  * Permission to use, copy, modify, and distribute this software and
39  * its documentation for any purpose and without fee is hereby
40  * granted, provided that both the above copyright notice and this
41  * permission notice appear in all copies, that both the above
42  * copyright notice and this permission notice appear in all
43  * supporting documentation, and that the name of M.I.T. not be used
44  * in advertising or publicity pertaining to distribution of the
45  * software without specific, written prior permission.	 M.I.T. makes
46  * no representations about the suitability of this software for any
47  * purpose.  It is provided "as is" without express or implied
48  * warranty.
49  *
50  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
51  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
52  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
53  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
54  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
56  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
57  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
58  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
59  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
60  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE. */
62 
63 #include <sys/param.h>
64 
65 #include <err.h>
66 #include <ctype.h>
67 #include <netdb.h>
68 #include <stdarg.h>
69 #include <stdio.h>
70 #include <stdlib.h>
71 #include <string.h>
72 #include <unistd.h>
73 
74 #include "fetch.h"
75 #include "common.h"
76 #include "httperr.h"
77 
78 extern char *__progname;
79 
80 #define ENDL "\r\n"
81 
82 #define HTTP_OK		200
83 #define HTTP_PARTIAL	206
84 
85 struct cookie
86 {
87     FILE *real_f;
88 #define ENC_NONE 0
89 #define ENC_CHUNKED 1
90     int encoding;			/* 1 = chunked, 0 = none */
91 #define HTTPCTYPELEN 59
92     char content_type[HTTPCTYPELEN+1];
93     char *buf;
94     int b_cur, eof;
95     unsigned b_len, chunksize;
96 };
97 
98 /*
99  * Send a formatted line; optionally echo to terminal
100  */
101 static int
102 _http_cmd(FILE *f, char *fmt, ...)
103 {
104     va_list ap;
105 
106     va_start(ap, fmt);
107     vfprintf(f, fmt, ap);
108 #ifndef NDEBUG
109     fprintf(stderr, "\033[1m>>> ");
110     vfprintf(stderr, fmt, ap);
111     fprintf(stderr, "\033[m");
112 #endif
113     va_end(ap);
114 
115     return 0; /* XXX */
116 }
117 
118 /*
119  * Fill the input buffer, do chunk decoding on the fly
120  */
121 static char *
122 _http_fillbuf(struct cookie *c)
123 {
124     char *ln;
125     unsigned int len;
126 
127     if (c->eof)
128 	return NULL;
129 
130     if (c->encoding == ENC_NONE) {
131 	c->buf = fgetln(c->real_f, &(c->b_len));
132 	c->b_cur = 0;
133     } else if (c->encoding == ENC_CHUNKED) {
134 	if (c->chunksize == 0) {
135 	    ln = fgetln(c->real_f, &len);
136 	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: "
137 			  "%*.*s\033[m\n", (int)len-2, (int)len-2, ln));
138 	    sscanf(ln, "%x", &(c->chunksize));
139 	    if (!c->chunksize) {
140 		DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
141 			      "end of last chunk\033[m\n"));
142 		c->eof = 1;
143 		return NULL;
144 	    }
145 	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
146 			  "new chunk: %X\033[m\n", c->chunksize));
147 	}
148 	c->buf = fgetln(c->real_f, &(c->b_len));
149 	if (c->b_len > c->chunksize)
150 	    c->b_len = c->chunksize;
151 	c->chunksize -= c->b_len;
152 	c->b_cur = 0;
153     }
154     else return NULL; /* unknown encoding */
155     return c->buf;
156 }
157 
158 /*
159  * Read function
160  */
161 static int
162 _http_readfn(struct cookie *c, char *buf, int len)
163 {
164     int l, pos = 0;
165     while (len) {
166 	/* empty buffer */
167 	if (!c->buf || (c->b_cur == c->b_len))
168 	    if (!_http_fillbuf(c))
169 		break;
170 
171 	l = c->b_len - c->b_cur;
172 	if (len < l) l = len;
173 	memcpy(buf + pos, c->buf + c->b_cur, l);
174 	c->b_cur += l;
175 	pos += l;
176 	len -= l;
177     }
178 
179     if (ferror(c->real_f))
180 	return -1;
181     else return pos;
182 }
183 
184 /*
185  * Write function
186  */
187 static int
188 _http_writefn(struct cookie *c, const char *buf, int len)
189 {
190     size_t r = fwrite(buf, 1, (size_t)len, c->real_f);
191     return r ? r : -1;
192 }
193 
194 /*
195  * Close function
196  */
197 static int
198 _http_closefn(struct cookie *c)
199 {
200     int r = fclose(c->real_f);
201     free(c);
202     return (r == EOF) ? -1 : 0;
203 }
204 
205 /*
206  * Extract content type from cookie
207  */
208 char *
209 fetchContentType(FILE *f)
210 {
211     /*
212      * We have no way of making sure this really *is* one of our cookies,
213      * so just check for a null pointer and hope for the best.
214      */
215     return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL;
216 }
217 
218 /*
219  * Base64 encoding
220  */
221 int
222 _http_base64(char *dst, char *src, int l)
223 {
224     static const char base64[] =
225 	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
226 	"abcdefghijklmnopqrstuvwxyz"
227 	"0123456789+/";
228     int t, r = 0;
229 
230     while (l >= 3) {
231 	t = (src[0] << 16) | (src[1] << 8) | src[2];
232 	dst[0] = base64[(t >> 18) & 0x3f];
233 	dst[1] = base64[(t >> 12) & 0x3f];
234 	dst[2] = base64[(t >> 6) & 0x3f];
235 	dst[3] = base64[(t >> 0) & 0x3f];
236 	src += 3; l -= 3;
237 	dst += 4; r += 4;
238     }
239 
240     switch (l) {
241     case 2:
242 	t = (src[0] << 16) | (src[1] << 8);
243 	dst[0] = base64[(t >> 18) & 0x3f];
244 	dst[1] = base64[(t >> 12) & 0x3f];
245 	dst[2] = base64[(t >> 6) & 0x3f];
246 	dst[3] = '=';
247 	dst += 4;
248 	r += 4;
249 	break;
250     case 1:
251 	t = src[0] << 16;
252 	dst[0] = base64[(t >> 18) & 0x3f];
253 	dst[1] = base64[(t >> 12) & 0x3f];
254 	dst[2] = dst[3] = '=';
255 	dst += 4;
256 	r += 4;
257 	break;
258     case 0:
259 	break;
260     }
261 
262     *dst = 0;
263     return r;
264 }
265 
266 /*
267  * Encode username and password
268  */
269 char *
270 _http_auth(char *usr, char *pwd)
271 {
272     int len, lu, lp;
273     char *str, *s;
274 
275     lu = strlen(usr);
276     lp = strlen(pwd);
277 
278     len = (lu * 4 + 2) / 3	/* user name, round up */
279 	+ 1			/* colon */
280 	+ (lp * 4 + 2) / 3	/* password, round up */
281 	+ 1;			/* null */
282 
283     if ((s = str = (char *)malloc(len)) == NULL)
284 	return NULL;
285 
286     s += _http_base64(s, usr, lu);
287     *s++ = ':';
288     s += _http_base64(s, pwd, lp);
289     *s = 0;
290 
291     return str;
292 }
293 
294 /*
295  * Retrieve a file by HTTP
296  */
297 FILE *
298 fetchGetHTTP(struct url *URL, char *flags)
299 {
300     int sd = -1, e, i, enc = ENC_NONE, direct, verbose;
301     struct cookie *c;
302     char *ln, *p, *px, *q;
303     FILE *f, *cf;
304     size_t len;
305     off_t pos = 0;
306 
307     direct = (flags && strchr(flags, 'd'));
308     verbose = (flags && strchr(flags, 'v'));
309 
310     /* allocate cookie */
311     if ((c = calloc(1, sizeof *c)) == NULL)
312 	return NULL;
313 
314     /* check port */
315     if (!URL->port) {
316 	struct servent *se;
317 
318 	if ((se = getservbyname("http", "tcp")) != NULL)
319 	    URL->port = ntohs(se->s_port);
320 	else
321 	    URL->port = 80;
322     }
323 
324     /* attempt to connect to proxy server */
325     if (!direct && (px = getenv("HTTP_PROXY")) != NULL) {
326 	char host[MAXHOSTNAMELEN];
327 	int port = 0;
328 
329 	/* measure length */
330 	len = strcspn(px, ":");
331 
332 	/* get port (XXX atoi is a little too tolerant perhaps?) */
333 	if (px[len] == ':') {
334 	    if (strspn(px+len+1, "0123456789") != strlen(px+len+1)
335 		|| strlen(px+len+1) > 5) {
336 		/* XXX we should emit some kind of warning */
337 	    }
338 	    port = atoi(px+len+1);
339 	    if (port < 1 || port > 65535) {
340 		/* XXX we should emit some kind of warning */
341 	    }
342 	}
343 	if (!port) {
344 #if 0
345 	    /*
346 	     * commented out, since there is currently no service name
347 	     * for HTTP proxies
348 	     */
349 	    struct servent *se;
350 
351 	    if ((se = getservbyname("xxxx", "tcp")) != NULL)
352 		port = ntohs(se->s_port);
353 	    else
354 #endif
355 		port = 3128;
356 	}
357 
358 	/* get host name */
359 	if (len >= MAXHOSTNAMELEN)
360 	    len = MAXHOSTNAMELEN - 1;
361 	strncpy(host, px, len);
362 	host[len] = 0;
363 
364 	/* connect */
365 	sd = _fetch_connect(host, port, verbose);
366     }
367 
368     /* if no proxy is configured or could be contacted, try direct */
369     if (sd == -1) {
370 	if ((sd = _fetch_connect(URL->host, URL->port, verbose)) == -1)
371 	    goto ouch;
372     }
373 
374     /* reopen as stream */
375     if ((f = fdopen(sd, "r+")) == NULL)
376 	goto ouch;
377     c->real_f = f;
378 
379     /* send request (proxies require absolute form, so use that) */
380     if (verbose)
381 	_fetch_info("requesting http://%s:%d%s",
382 		    URL->host, URL->port, URL->doc);
383     _http_cmd(f, "GET http://%s:%d%s HTTP/1.1" ENDL,
384 	      URL->host, URL->port, URL->doc);
385 
386     /* start sending headers away */
387     if (URL->user[0] || URL->pwd[0]) {
388 	char *auth_str = _http_auth(URL->user, URL->pwd);
389 	if (!auth_str)
390 	    goto fouch;
391 	_http_cmd(f, "Authorization: Basic %s" ENDL, auth_str);
392 	free(auth_str);
393     }
394     _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port);
395     _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname);
396     if (URL->offset)
397 	_http_cmd(f, "Range: bytes=%lld-" ENDL, URL->offset);
398     _http_cmd(f, "Connection: close" ENDL ENDL);
399 
400     /* get response */
401     if ((ln = fgetln(f, &len)) == NULL)
402 	goto fouch;
403     DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n",
404 		  (int)len-2, (int)len-2, ln));
405 
406     /* we can't use strchr() and friends since ln isn't NUL-terminated */
407     p = ln;
408     while ((p < ln + len) && !isspace(*p))
409 	p++;
410     while ((p < ln + len) && !isdigit(*p))
411 	p++;
412     if (!isdigit(*p))
413 	goto fouch;
414     e = atoi(p);
415     DEBUG(fprintf(stderr, "code:     [\033[1m%d\033[m]\n", e));
416 
417     /* add code to handle redirects later */
418     if (e != (URL->offset ? HTTP_PARTIAL : HTTP_OK)) {
419 	_http_seterr(e);
420 	goto fouch;
421     }
422 
423     /* browse through header */
424     while (1) {
425 	if ((ln = fgetln(f, &len)) == NULL)
426 	    goto fouch;
427 	if ((ln[0] == '\r') || (ln[0] == '\n'))
428 	    break;
429 	DEBUG(fprintf(stderr, "header:	 [\033[1m%*.*s\033[m]\n",
430 		      (int)len-2, (int)len-2, ln));
431 #define XFERENC "Transfer-Encoding:"
432 	if (strncasecmp(ln, XFERENC, sizeof XFERENC - 1) == 0) {
433 	    p = ln + sizeof XFERENC - 1;
434 	    while ((p < ln + len) && isspace(*p))
435 		p++;
436 	    for (q = p; (q < ln + len) && !isspace(*q); q++)
437 		/* VOID */ ;
438 	    *q = 0;
439 	    if (strcasecmp(p, "chunked") == 0)
440 		enc = ENC_CHUNKED;
441 	    DEBUG(fprintf(stderr, "xferenc:  [\033[1m%s\033[m]\n", p));
442 #undef XFERENC
443 #define CONTTYPE "Content-Type:"
444 	} else if (strncasecmp(ln, CONTTYPE, sizeof CONTTYPE - 1) == 0) {
445 	    p = ln + sizeof CONTTYPE - 1;
446 	    while ((p < ln + len) && isspace(*p))
447 		p++;
448 	    for (i = 0; p < ln + len; p++)
449 		if (i < HTTPCTYPELEN)
450 		    c->content_type[i++] = *p;
451 	    do c->content_type[i--] = 0; while (isspace(c->content_type[i]));
452 	    DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n",
453 			  c->content_type));
454 #undef CONTTYPE
455 #define CONTRANGE "Content-Range:"
456 #define BYTES "bytes "
457 	} else if (strncasecmp(ln, CONTRANGE, sizeof CONTRANGE - 1) == 0) {
458 	    p = ln + sizeof CONTRANGE - 1;
459 	    while ((p < ln + len) && isspace(*p))
460 		p++;
461 	    if (strncasecmp(p, BYTES, sizeof BYTES - 1) != 0
462 		|| (p += 6) >= ln + len)
463 		goto fouch;
464 	    while ((p < ln + len) && isdigit(*p))
465 		pos = pos * 10 + (*p++ - '0');
466 	    /* XXX wouldn't hurt to be slightly more paranoid here */
467 	    DEBUG(fprintf(stderr, "contrange: [\033[1m%lld-\033[m]\n", pos));
468 	    if (pos > URL->offset)
469 		goto fouch;
470 #undef BYTES
471 #undef CONTRANGE
472 	}
473     }
474 
475     /* only body remains */
476     c->encoding = enc;
477     cf = funopen(c,
478 		 (int (*)(void *, char *, int))_http_readfn,
479 		 (int (*)(void *, const char *, int))_http_writefn,
480 		 (fpos_t (*)(void *, fpos_t, int))NULL,
481 		 (int (*)(void *))_http_closefn);
482     if (cf == NULL)
483 	goto fouch;
484 
485     while (pos < URL->offset)
486 	if (fgetc(cf) == EOF)
487 	    goto cfouch;
488 
489     return cf;
490 
491 ouch:
492     if (sd >= 0)
493 	close(sd);
494     free(c);
495     _http_seterr(999); /* XXX do this properly RSN */
496     return NULL;
497 fouch:
498     fclose(f);
499     free(c);
500     _http_seterr(999); /* XXX do this properly RSN */
501     return NULL;
502 cfouch:
503     fclose(cf);
504     _http_seterr(999); /* XXX do this properly RSN */
505     return NULL;
506 }
507 
508 FILE *
509 fetchPutHTTP(struct url *URL, char *flags)
510 {
511     warnx("fetchPutHTTP(): not implemented");
512     return NULL;
513 }
514 
515 /*
516  * Get an HTTP document's metadata
517  */
518 int
519 fetchStatHTTP(struct url *url, struct url_stat *us, char *flags)
520 {
521     warnx("fetchStatHTTP(): not implemented");
522     return -1;
523 }
524 
525 /*
526  * List a directory
527  */
528 struct url_ent *
529 fetchListHTTP(struct url *url, char *flags)
530 {
531     warnx("fetchListHTTP(): not implemented");
532     return NULL;
533 }
534