xref: /freebsd/lib/libfetch/http.c (revision a8445737e740901f5f2c8d24c12ef7fc8b00134e)
1 /*-
2  * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  *	$Id: http.c,v 1.4 1998/07/12 22:34:40 des Exp $
29  */
30 
31 /*
32  * The base64 code in this file is based on code from MIT fetch, which
33  * has the following copyright and license:
34  *
35  *-
36  * Copyright 1997 Massachusetts Institute of Technology
37  *
38  * Permission to use, copy, modify, and distribute this software and
39  * its documentation for any purpose and without fee is hereby
40  * granted, provided that both the above copyright notice and this
41  * permission notice appear in all copies, that both the above
42  * copyright notice and this permission notice appear in all
43  * supporting documentation, and that the name of M.I.T. not be used
44  * in advertising or publicity pertaining to distribution of the
45  * software without specific, written prior permission.  M.I.T. makes
46  * no representations about the suitability of this software for any
47  * purpose.  It is provided "as is" without express or implied
48  * warranty.
49  *
50  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
51  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
52  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
53  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
54  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
56  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
57  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
58  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
59  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
60  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE. */
62 
63 #include <sys/param.h>
64 #include <sys/errno.h>
65 #include <sys/socket.h>
66 #include <sys/types.h>
67 
68 #include <netinet/in.h>
69 
70 #include <err.h>
71 #include <ctype.h>
72 #include <netdb.h>
73 #include <stdarg.h>
74 #include <stdio.h>
75 #include <stdlib.h>
76 #include <string.h>
77 #include <unistd.h>
78 
79 #include "fetch.h"
80 #include "httperr.c"
81 
82 #ifndef NDEBUG
83 #define DEBUG(x) do x; while (0)
84 #else
85 #define DEBUG(x) do { } while (0)
86 #endif
87 
88 extern char *__progname;
89 
90 #define ENDL "\r\n"
91 
92 struct cookie
93 {
94     FILE *real_f;
95 #define ENC_NONE 0
96 #define ENC_CHUNKED 1
97     int encoding;			/* 1 = chunked, 0 = none */
98 #define HTTPCTYPELEN 59
99     char content_type[HTTPCTYPELEN+1];
100     char *buf;
101     int b_cur, eof;
102     unsigned b_len, chunksize;
103 };
104 
105 /*
106  * Look up error code
107  */
108 static const char *
109 _http_errstring(int e)
110 {
111     struct httperr *p = _http_errlist;
112 
113     while ((p->num != -1) && (p->num != e))
114 	p++;
115 
116     return p->string;
117 }
118 
119 /*
120  * Send a formatted line; optionally echo to terminal
121  */
122 static int
123 _http_cmd(FILE *f, char *fmt, ...)
124 {
125     va_list ap;
126 
127     va_start(ap, fmt);
128     vfprintf(f, fmt, ap);
129 #ifndef NDEBUG
130     fprintf(stderr, "\033[1m>>> ");
131     vfprintf(stderr, fmt, ap);
132     fprintf(stderr, "\033[m");
133 #endif
134     va_end(ap);
135 
136     return 0; /* XXX */
137 }
138 
139 /*
140  * Fill the input buffer, do chunk decoding on the fly
141  */
142 static char *
143 _http_fillbuf(struct cookie *c)
144 {
145     char *ln;
146     unsigned int len;
147 
148     if (c->eof)
149 	return NULL;
150 
151     if (c->encoding == ENC_NONE) {
152 	c->buf = fgetln(c->real_f, &(c->b_len));
153 	c->b_cur = 0;
154     } else if (c->encoding == ENC_CHUNKED) {
155 	if (c->chunksize == 0) {
156 	    ln = fgetln(c->real_f, &len);
157 	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: "
158 			  "%*.*s\033[m\n", (int)len-2, (int)len-2, ln));
159 	    sscanf(ln, "%x", &(c->chunksize));
160 	    if (!c->chunksize) {
161 		DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
162 			      "end of last chunk\033[m\n"));
163 		c->eof = 1;
164 		return NULL;
165 	    }
166 	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
167 			  "new chunk: %X\033[m\n", c->chunksize));
168 	}
169 	c->buf = fgetln(c->real_f, &(c->b_len));
170 	if (c->b_len > c->chunksize)
171 	    c->b_len = c->chunksize;
172 	c->chunksize -= c->b_len;
173 	c->b_cur = 0;
174     }
175     else return NULL; /* unknown encoding */
176     return c->buf;
177 }
178 
179 /*
180  * Read function
181  */
182 static int
183 _http_readfn(struct cookie *c, char *buf, int len)
184 {
185     int l, pos = 0;
186     while (len) {
187 	/* empty buffer */
188 	if (!c->buf || (c->b_cur == c->b_len))
189 	    if (!_http_fillbuf(c))
190 		break;
191 
192 	l = c->b_len - c->b_cur;
193 	if (len < l) l = len;
194 	memcpy(buf + pos, c->buf + c->b_cur, l);
195 	c->b_cur += l;
196 	pos += l;
197 	len -= l;
198     }
199 
200     if (ferror(c->real_f))
201 	return -1;
202     else return pos;
203 }
204 
205 /*
206  * Write function
207  */
208 static int
209 _http_writefn(struct cookie *c, const char *buf, int len)
210 {
211     size_t r = fwrite(buf, 1, (size_t)len, c->real_f);
212     return r ? r : -1;
213 }
214 
215 /*
216  * Close function
217  */
218 static int
219 _http_closefn(struct cookie *c)
220 {
221     int r = fclose(c->real_f);
222     free(c);
223     return (r == EOF) ? -1 : 0;
224 }
225 
226 /*
227  * Extract content type from cookie
228  */
229 char *
230 fetchContentType(FILE *f)
231 {
232     /*
233      * We have no way of making sure this really *is* one of our cookies,
234      * so just check for a null pointer and hope for the best.
235      */
236     return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL;
237 }
238 
239 /*
240  * Base64 encoding
241  */
242 int
243 _http_base64(char *dst, char *src, int l)
244 {
245     static const char base64[] =
246 	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
247 	"abcdefghijklmnopqrstuvwxyz"
248 	"0123456789+/";
249     int t, r = 0;
250 
251     while (l >= 3) {
252 	t = (src[0] << 16) | (src[1] << 8) | src[2];
253 	dst[0] = base64[(t >> 18) & 0x3f];
254 	dst[1] = base64[(t >> 12) & 0x3f];
255 	dst[2] = base64[(t >> 6) & 0x3f];
256 	dst[3] = base64[(t >> 0) & 0x3f];
257 	src += 3; l -= 3;
258 	dst += 4; r += 4;
259     }
260 
261     switch (l) {
262     case 2:
263 	t = (src[0] << 16) | (src[1] << 8);
264 	dst[0] = base64[(t >> 18) & 0x3f];
265 	dst[1] = base64[(t >> 12) & 0x3f];
266 	dst[2] = base64[(t >> 6) & 0x3f];
267 	dst[3] = '=';
268 	dst += 4;
269 	r += 4;
270 	break;
271     case 1:
272 	t = src[0] << 16;
273 	dst[0] = base64[(t >> 18) & 0x3f];
274 	dst[1] = base64[(t >> 12) & 0x3f];
275 	dst[2] = dst[3] = '=';
276 	dst += 4;
277 	r += 4;
278 	break;
279     case 0:
280 	break;
281     }
282 
283     *dst = 0;
284     return r;
285 }
286 
287 /*
288  * Encode username and password
289  */
290 char *
291 _http_auth(char *usr, char *pwd)
292 {
293     int len, lu, lp;
294     char *str, *s;
295 
296     lu = strlen(usr);
297     lp = strlen(pwd);
298 
299     len = (lu * 4 + 2) / 3	/* user name, round up */
300 	+ 1			/* colon */
301 	+ (lp * 4 + 2) / 3	/* password, round up */
302 	+ 1;			/* null */
303 
304     if ((s = str = (char *)malloc(len)) == NULL)
305 	return NULL;
306 
307     s += _http_base64(s, usr, lu);
308     *s++ = ':';
309     s += _http_base64(s, pwd, lp);
310     *s = 0;
311 
312     return str;
313 }
314 
315 /*
316  * retrieve a file by HTTP
317  */
318 FILE *
319 fetchGetHTTP(url_t *URL, char *flags)
320 {
321     int sd = -1, err, i, enc = ENC_NONE;
322     struct cookie *c;
323     char *ln, *p, *q;
324     FILE *f, *cf;
325     size_t len;
326 
327     /* allocate cookie */
328     if ((c = calloc(1, sizeof(struct cookie))) == NULL)
329 	return NULL;
330 
331     /* check port */
332     if (!URL->port)
333 	URL->port = 80; /* default HTTP port */
334 
335     /* attempt to connect to proxy server */
336     if (getenv("HTTP_PROXY")) {
337 	char *px, host[MAXHOSTNAMELEN];
338 	int port = 3128; /* XXX I think 3128 is default... check? */
339 	size_t len;
340 
341 	/* measure length */
342 	px = getenv("HTTP_PROXY");
343 	len = strcspn(px, ":");
344 
345 	/* get port (atoi is a little too tolerant perhaps?) */
346 	if (px[len] == ':')
347 	    port = atoi(px+len+1);
348 
349 	/* get host name */
350 	if (len >= MAXHOSTNAMELEN)
351 	    len = MAXHOSTNAMELEN - 1;
352 	strncpy(host, px, len);
353 	host[len] = 0;
354 
355 	/* connect */
356 	sd = fetchConnect(host, port);
357     }
358 
359     /* if no proxy is configured or could be contacted, try direct */
360     if (sd == -1) {
361 	if ((sd = fetchConnect(URL->host, URL->port)) == -1)
362 	    goto ouch;
363     }
364 
365     /* reopen as stream */
366     if ((f = fdopen(sd, "r+")) == NULL)
367 	goto ouch;
368     c->real_f = f;
369 
370     /* send request (proxies require absolute form, so use that) */
371     _http_cmd(f, "GET http://%s:%d%s HTTP/1.1" ENDL,
372 	      URL->host, URL->port, URL->doc);
373 
374     /* start sending headers away */
375     if (URL->user[0] || URL->pwd[0]) {
376 	char *auth_str = _http_auth(URL->user, URL->pwd);
377 	if (!auth_str)
378 	    goto fouch;
379 	_http_cmd(f, "Authorization: Basic %s" ENDL, auth_str);
380 	free(auth_str);
381     }
382     _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port);
383     _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname);
384     _http_cmd(f, "Connection: close" ENDL ENDL);
385 
386     /* get response */
387     if ((ln = fgetln(f, &len)) == NULL)
388 	goto fouch;
389     DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n",
390 		  (int)len-2, (int)len-2, ln));
391 
392     /* we can't use strchr() and friends since ln isn't NUL-terminated */
393     p = ln;
394     while ((p < ln + len) && !isspace(*p))
395 	p++;
396     while ((p < ln + len) && !isdigit(*p))
397 	p++;
398     if (!isdigit(*p))
399 	goto fouch;
400     err = atoi(p);
401     DEBUG(fprintf(stderr, "code:     [\033[1m%d\033[m]\n", err));
402 
403     /* add code to handle redirects later */
404     if (err != 200) {
405 	fetchLastErrCode = err;
406 	fetchLastErrText = _http_errstring(err);
407 	goto fouch;
408     }
409 
410     /* browse through header */
411     while (1) {
412 	if ((ln = fgetln(f, &len)) == NULL)
413 	    goto fouch;
414 	if ((ln[0] == '\r') || (ln[0] == '\n'))
415 	    break;
416 	DEBUG(fprintf(stderr, "header:   [\033[1m%*.*s\033[m]\n",
417 		      (int)len-2, (int)len-2, ln));
418 #define XFERENC "Transfer-Encoding:"
419 	if (strncasecmp(ln, XFERENC, sizeof(XFERENC)-1) == 0) {
420 	    p = ln + sizeof(XFERENC) - 1;
421 	    while ((p < ln + len) && isspace(*p))
422 		p++;
423 	    for (q = p; (q < ln + len) && !isspace(*q); q++)
424 		/* VOID */ ;
425 	    *q = 0;
426 	    if (strcasecmp(p, "chunked") == 0)
427 		enc = ENC_CHUNKED;
428 	    DEBUG(fprintf(stderr, "xferenc:  [\033[1m%s\033[m]\n", p));
429 #undef XFERENC
430 #define CONTTYPE "Content-Type:"
431 	} else if (strncasecmp(ln, CONTTYPE, sizeof(CONTTYPE)-1) == 0) {
432 	    p = ln + sizeof(CONTTYPE) - 1;
433 	    while ((p < ln + len) && isspace(*p))
434 		p++;
435 	    for (i = 0; p < ln + len; p++)
436 		if (i < HTTPCTYPELEN)
437 		    c->content_type[i++] = *p;
438 	    do c->content_type[i--] = 0; while (isspace(c->content_type[i]));
439 	    DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n",
440 			  c->content_type));
441 #undef CONTTYPE
442 	}
443     }
444 
445     /* only body remains */
446     c->encoding = enc;
447     cf = funopen(c,
448 		 (int (*)(void *, char *, int))_http_readfn,
449 		 (int (*)(void *, const char *, int))_http_writefn,
450 		 (fpos_t (*)(void *, fpos_t, int))NULL,
451 		 (int (*)(void *))_http_closefn);
452     if (cf == NULL)
453 	goto fouch;
454     return cf;
455 
456 ouch:
457     if (sd >= 0)
458 	close(sd);
459     free(c);
460     return NULL;
461 fouch:
462     fclose(f);
463     free(c);
464     return NULL;
465 }
466 
467 FILE *
468 fetchPutHTTP(url_t *URL, char *flags)
469 {
470     warnx("fetchPutHTTP(): not implemented");
471     return NULL;
472 }
473