xref: /freebsd/lib/libfetch/http.c (revision c807777a43ef2b59786fa8a1a35c1f154fd069e5)
1 /*-
2  * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /*
32  * The base64 code in this file is based on code from MIT fetch, which
33  * has the following copyright and license:
34  *
35  *-
36  * Copyright 1997 Massachusetts Institute of Technology
37  *
38  * Permission to use, copy, modify, and distribute this software and
39  * its documentation for any purpose and without fee is hereby
40  * granted, provided that both the above copyright notice and this
41  * permission notice appear in all copies, that both the above
42  * copyright notice and this permission notice appear in all
43  * supporting documentation, and that the name of M.I.T. not be used
44  * in advertising or publicity pertaining to distribution of the
45  * software without specific, written prior permission.  M.I.T. makes
46  * no representations about the suitability of this software for any
47  * purpose.  It is provided "as is" without express or implied
48  * warranty.
49  *
50  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
51  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
52  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
53  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
54  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
56  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
57  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
58  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
59  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
60  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE. */
62 
63 #include <sys/param.h>
64 
65 #include <err.h>
66 #include <ctype.h>
67 #include <stdarg.h>
68 #include <stdio.h>
69 #include <stdlib.h>
70 #include <string.h>
71 #include <unistd.h>
72 
73 #include "fetch.h"
74 #include "common.h"
75 #include "httperr.h"
76 
77 extern char *__progname;
78 
79 #define ENDL "\r\n"
80 
81 struct cookie
82 {
83     FILE *real_f;
84 #define ENC_NONE 0
85 #define ENC_CHUNKED 1
86     int encoding;			/* 1 = chunked, 0 = none */
87 #define HTTPCTYPELEN 59
88     char content_type[HTTPCTYPELEN+1];
89     char *buf;
90     int b_cur, eof;
91     unsigned b_len, chunksize;
92 };
93 
94 /*
95  * Send a formatted line; optionally echo to terminal
96  */
97 static int
98 _http_cmd(FILE *f, char *fmt, ...)
99 {
100     va_list ap;
101 
102     va_start(ap, fmt);
103     vfprintf(f, fmt, ap);
104 #ifndef NDEBUG
105     fprintf(stderr, "\033[1m>>> ");
106     vfprintf(stderr, fmt, ap);
107     fprintf(stderr, "\033[m");
108 #endif
109     va_end(ap);
110 
111     return 0; /* XXX */
112 }
113 
114 /*
115  * Fill the input buffer, do chunk decoding on the fly
116  */
117 static char *
118 _http_fillbuf(struct cookie *c)
119 {
120     char *ln;
121     unsigned int len;
122 
123     if (c->eof)
124 	return NULL;
125 
126     if (c->encoding == ENC_NONE) {
127 	c->buf = fgetln(c->real_f, &(c->b_len));
128 	c->b_cur = 0;
129     } else if (c->encoding == ENC_CHUNKED) {
130 	if (c->chunksize == 0) {
131 	    ln = fgetln(c->real_f, &len);
132 	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: "
133 			  "%*.*s\033[m\n", (int)len-2, (int)len-2, ln));
134 	    sscanf(ln, "%x", &(c->chunksize));
135 	    if (!c->chunksize) {
136 		DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
137 			      "end of last chunk\033[m\n"));
138 		c->eof = 1;
139 		return NULL;
140 	    }
141 	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
142 			  "new chunk: %X\033[m\n", c->chunksize));
143 	}
144 	c->buf = fgetln(c->real_f, &(c->b_len));
145 	if (c->b_len > c->chunksize)
146 	    c->b_len = c->chunksize;
147 	c->chunksize -= c->b_len;
148 	c->b_cur = 0;
149     }
150     else return NULL; /* unknown encoding */
151     return c->buf;
152 }
153 
154 /*
155  * Read function
156  */
157 static int
158 _http_readfn(struct cookie *c, char *buf, int len)
159 {
160     int l, pos = 0;
161     while (len) {
162 	/* empty buffer */
163 	if (!c->buf || (c->b_cur == c->b_len))
164 	    if (!_http_fillbuf(c))
165 		break;
166 
167 	l = c->b_len - c->b_cur;
168 	if (len < l) l = len;
169 	memcpy(buf + pos, c->buf + c->b_cur, l);
170 	c->b_cur += l;
171 	pos += l;
172 	len -= l;
173     }
174 
175     if (ferror(c->real_f))
176 	return -1;
177     else return pos;
178 }
179 
180 /*
181  * Write function
182  */
183 static int
184 _http_writefn(struct cookie *c, const char *buf, int len)
185 {
186     size_t r = fwrite(buf, 1, (size_t)len, c->real_f);
187     return r ? r : -1;
188 }
189 
190 /*
191  * Close function
192  */
193 static int
194 _http_closefn(struct cookie *c)
195 {
196     int r = fclose(c->real_f);
197     free(c);
198     return (r == EOF) ? -1 : 0;
199 }
200 
201 /*
202  * Extract content type from cookie
203  */
204 char *
205 fetchContentType(FILE *f)
206 {
207     /*
208      * We have no way of making sure this really *is* one of our cookies,
209      * so just check for a null pointer and hope for the best.
210      */
211     return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL;
212 }
213 
214 /*
215  * Base64 encoding
216  */
217 int
218 _http_base64(char *dst, char *src, int l)
219 {
220     static const char base64[] =
221 	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
222 	"abcdefghijklmnopqrstuvwxyz"
223 	"0123456789+/";
224     int t, r = 0;
225 
226     while (l >= 3) {
227 	t = (src[0] << 16) | (src[1] << 8) | src[2];
228 	dst[0] = base64[(t >> 18) & 0x3f];
229 	dst[1] = base64[(t >> 12) & 0x3f];
230 	dst[2] = base64[(t >> 6) & 0x3f];
231 	dst[3] = base64[(t >> 0) & 0x3f];
232 	src += 3; l -= 3;
233 	dst += 4; r += 4;
234     }
235 
236     switch (l) {
237     case 2:
238 	t = (src[0] << 16) | (src[1] << 8);
239 	dst[0] = base64[(t >> 18) & 0x3f];
240 	dst[1] = base64[(t >> 12) & 0x3f];
241 	dst[2] = base64[(t >> 6) & 0x3f];
242 	dst[3] = '=';
243 	dst += 4;
244 	r += 4;
245 	break;
246     case 1:
247 	t = src[0] << 16;
248 	dst[0] = base64[(t >> 18) & 0x3f];
249 	dst[1] = base64[(t >> 12) & 0x3f];
250 	dst[2] = dst[3] = '=';
251 	dst += 4;
252 	r += 4;
253 	break;
254     case 0:
255 	break;
256     }
257 
258     *dst = 0;
259     return r;
260 }
261 
262 /*
263  * Encode username and password
264  */
265 char *
266 _http_auth(char *usr, char *pwd)
267 {
268     int len, lu, lp;
269     char *str, *s;
270 
271     lu = strlen(usr);
272     lp = strlen(pwd);
273 
274     len = (lu * 4 + 2) / 3	/* user name, round up */
275 	+ 1			/* colon */
276 	+ (lp * 4 + 2) / 3	/* password, round up */
277 	+ 1;			/* null */
278 
279     if ((s = str = (char *)malloc(len)) == NULL)
280 	return NULL;
281 
282     s += _http_base64(s, usr, lu);
283     *s++ = ':';
284     s += _http_base64(s, pwd, lp);
285     *s = 0;
286 
287     return str;
288 }
289 
290 /*
291  * Retrieve a file by HTTP
292  */
293 FILE *
294 fetchGetHTTP(struct url *URL, char *flags)
295 {
296     int sd = -1, e, i, enc = ENC_NONE, direct, verbose;
297     struct cookie *c;
298     char *ln, *p, *px, *q;
299     FILE *f, *cf;
300     size_t len;
301 
302     direct = (flags && strchr(flags, 'd'));
303     verbose = (flags && strchr(flags, 'v'));
304 
305     /* allocate cookie */
306     if ((c = calloc(1, sizeof(struct cookie))) == NULL)
307 	return NULL;
308 
309     /* check port */
310     if (!URL->port)
311 	URL->port = 80; /* default HTTP port */
312 
313     /* attempt to connect to proxy server */
314     if (!direct && (px = getenv("HTTP_PROXY")) != NULL) {
315 	char host[MAXHOSTNAMELEN];
316 	int port = 3128; /* XXX I think 3128 is default... check? */
317 
318 	/* measure length */
319 	len = strcspn(px, ":");
320 
321 	/* get port (XXX atoi is a little too tolerant perhaps?) */
322 	if (px[len] == ':')
323 	    port = atoi(px+len+1);
324 
325 	/* get host name */
326 	if (len >= MAXHOSTNAMELEN)
327 	    len = MAXHOSTNAMELEN - 1;
328 	strncpy(host, px, len);
329 	host[len] = 0;
330 
331 	/* connect */
332 	sd = _fetch_connect(host, port, verbose);
333     }
334 
335     /* if no proxy is configured or could be contacted, try direct */
336     if (sd == -1) {
337 	if ((sd = _fetch_connect(URL->host, URL->port, verbose)) == -1)
338 	    goto ouch;
339     }
340 
341     /* reopen as stream */
342     if ((f = fdopen(sd, "r+")) == NULL)
343 	goto ouch;
344     c->real_f = f;
345 
346     /* send request (proxies require absolute form, so use that) */
347     if (verbose)
348 	_fetch_info("requesting http://%s:%d%s",
349 		    URL->host, URL->port, URL->doc);
350     _http_cmd(f, "GET http://%s:%d%s HTTP/1.1" ENDL,
351 	      URL->host, URL->port, URL->doc);
352 
353     /* start sending headers away */
354     if (URL->user[0] || URL->pwd[0]) {
355 	char *auth_str = _http_auth(URL->user, URL->pwd);
356 	if (!auth_str)
357 	    goto fouch;
358 	_http_cmd(f, "Authorization: Basic %s" ENDL, auth_str);
359 	free(auth_str);
360     }
361     _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port);
362     _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname);
363     _http_cmd(f, "Connection: close" ENDL ENDL);
364 
365     /* get response */
366     if ((ln = fgetln(f, &len)) == NULL)
367 	goto fouch;
368     DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n",
369 		  (int)len-2, (int)len-2, ln));
370 
371     /* we can't use strchr() and friends since ln isn't NUL-terminated */
372     p = ln;
373     while ((p < ln + len) && !isspace(*p))
374 	p++;
375     while ((p < ln + len) && !isdigit(*p))
376 	p++;
377     if (!isdigit(*p))
378 	goto fouch;
379     e = atoi(p);
380     DEBUG(fprintf(stderr, "code:     [\033[1m%d\033[m]\n", e));
381 
382     /* add code to handle redirects later */
383     if (e != 200) {
384 	_http_seterr(e);
385 	goto fouch;
386     }
387 
388     /* browse through header */
389     while (1) {
390 	if ((ln = fgetln(f, &len)) == NULL)
391 	    goto fouch;
392 	if ((ln[0] == '\r') || (ln[0] == '\n'))
393 	    break;
394 	DEBUG(fprintf(stderr, "header:   [\033[1m%*.*s\033[m]\n",
395 		      (int)len-2, (int)len-2, ln));
396 #define XFERENC "Transfer-Encoding:"
397 	if (strncasecmp(ln, XFERENC, sizeof(XFERENC)-1) == 0) {
398 	    p = ln + sizeof(XFERENC) - 1;
399 	    while ((p < ln + len) && isspace(*p))
400 		p++;
401 	    for (q = p; (q < ln + len) && !isspace(*q); q++)
402 		/* VOID */ ;
403 	    *q = 0;
404 	    if (strcasecmp(p, "chunked") == 0)
405 		enc = ENC_CHUNKED;
406 	    DEBUG(fprintf(stderr, "xferenc:  [\033[1m%s\033[m]\n", p));
407 #undef XFERENC
408 #define CONTTYPE "Content-Type:"
409 	} else if (strncasecmp(ln, CONTTYPE, sizeof(CONTTYPE)-1) == 0) {
410 	    p = ln + sizeof(CONTTYPE) - 1;
411 	    while ((p < ln + len) && isspace(*p))
412 		p++;
413 	    for (i = 0; p < ln + len; p++)
414 		if (i < HTTPCTYPELEN)
415 		    c->content_type[i++] = *p;
416 	    do c->content_type[i--] = 0; while (isspace(c->content_type[i]));
417 	    DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n",
418 			  c->content_type));
419 #undef CONTTYPE
420 	}
421     }
422 
423     /* only body remains */
424     c->encoding = enc;
425     cf = funopen(c,
426 		 (int (*)(void *, char *, int))_http_readfn,
427 		 (int (*)(void *, const char *, int))_http_writefn,
428 		 (fpos_t (*)(void *, fpos_t, int))NULL,
429 		 (int (*)(void *))_http_closefn);
430     if (cf == NULL)
431 	goto fouch;
432     return cf;
433 
434 ouch:
435     if (sd >= 0)
436 	close(sd);
437     free(c);
438     _http_seterr(999); /* XXX do this properly RSN */
439     return NULL;
440 fouch:
441     fclose(f);
442     free(c);
443     _http_seterr(999); /* XXX do this properly RSN */
444     return NULL;
445 }
446 
447 FILE *
448 fetchPutHTTP(struct url *URL, char *flags)
449 {
450     warnx("fetchPutHTTP(): not implemented");
451     return NULL;
452 }
453 
454 /*
455  * Get an HTTP document's metadata
456  */
457 int
458 fetchStatHTTP(struct url *url, struct url_stat *us, char *flags)
459 {
460     warnx("fetchStatHTTP(): not implemented");
461     return -1;
462 }
463 
464 /*
465  * List a directory
466  */
467 struct url_ent *
468 fetchListHTTP(struct url *url, char *flags)
469 {
470     warnx("fetchListHTTP(): not implemented");
471     return NULL;
472 }
473