xref: /freebsd/lib/libfetch/http.c (revision 842a95cc236e53afbf5e516267246c3408e0779e)
1 /*-
2  * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  *	$Id: http.c,v 1.5 1998/08/17 09:30:19 des Exp $
29  */
30 
31 /*
32  * The base64 code in this file is based on code from MIT fetch, which
33  * has the following copyright and license:
34  *
35  *-
36  * Copyright 1997 Massachusetts Institute of Technology
37  *
38  * Permission to use, copy, modify, and distribute this software and
39  * its documentation for any purpose and without fee is hereby
40  * granted, provided that both the above copyright notice and this
41  * permission notice appear in all copies, that both the above
42  * copyright notice and this permission notice appear in all
43  * supporting documentation, and that the name of M.I.T. not be used
44  * in advertising or publicity pertaining to distribution of the
45  * software without specific, written prior permission.  M.I.T. makes
46  * no representations about the suitability of this software for any
47  * purpose.  It is provided "as is" without express or implied
48  * warranty.
49  *
50  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
51  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
52  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
53  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
54  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
56  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
57  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
58  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
59  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
60  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE. */
62 
63 #include <sys/param.h>
64 #include <sys/errno.h>
65 #include <sys/socket.h>
66 #include <sys/types.h>
67 
68 #include <netinet/in.h>
69 
70 #include <err.h>
71 #include <ctype.h>
72 #include <netdb.h>
73 #include <stdarg.h>
74 #include <stdio.h>
75 #include <stdlib.h>
76 #include <string.h>
77 #include <unistd.h>
78 
79 #include "fetch.h"
80 #include "common.h"
81 #include "httperr.c"
82 
83 #ifndef NDEBUG
84 #define DEBUG(x) do x; while (0)
85 #else
86 #define DEBUG(x) do { } while (0)
87 #endif
88 
89 extern char *__progname;
90 
91 #define ENDL "\r\n"
92 
93 struct cookie
94 {
95     FILE *real_f;
96 #define ENC_NONE 0
97 #define ENC_CHUNKED 1
98     int encoding;			/* 1 = chunked, 0 = none */
99 #define HTTPCTYPELEN 59
100     char content_type[HTTPCTYPELEN+1];
101     char *buf;
102     int b_cur, eof;
103     unsigned b_len, chunksize;
104 };
105 
106 /*
107  * Send a formatted line; optionally echo to terminal
108  */
109 static int
110 _http_cmd(FILE *f, char *fmt, ...)
111 {
112     va_list ap;
113 
114     va_start(ap, fmt);
115     vfprintf(f, fmt, ap);
116 #ifndef NDEBUG
117     fprintf(stderr, "\033[1m>>> ");
118     vfprintf(stderr, fmt, ap);
119     fprintf(stderr, "\033[m");
120 #endif
121     va_end(ap);
122 
123     return 0; /* XXX */
124 }
125 
126 /*
127  * Fill the input buffer, do chunk decoding on the fly
128  */
129 static char *
130 _http_fillbuf(struct cookie *c)
131 {
132     char *ln;
133     unsigned int len;
134 
135     if (c->eof)
136 	return NULL;
137 
138     if (c->encoding == ENC_NONE) {
139 	c->buf = fgetln(c->real_f, &(c->b_len));
140 	c->b_cur = 0;
141     } else if (c->encoding == ENC_CHUNKED) {
142 	if (c->chunksize == 0) {
143 	    ln = fgetln(c->real_f, &len);
144 	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: "
145 			  "%*.*s\033[m\n", (int)len-2, (int)len-2, ln));
146 	    sscanf(ln, "%x", &(c->chunksize));
147 	    if (!c->chunksize) {
148 		DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
149 			      "end of last chunk\033[m\n"));
150 		c->eof = 1;
151 		return NULL;
152 	    }
153 	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
154 			  "new chunk: %X\033[m\n", c->chunksize));
155 	}
156 	c->buf = fgetln(c->real_f, &(c->b_len));
157 	if (c->b_len > c->chunksize)
158 	    c->b_len = c->chunksize;
159 	c->chunksize -= c->b_len;
160 	c->b_cur = 0;
161     }
162     else return NULL; /* unknown encoding */
163     return c->buf;
164 }
165 
166 /*
167  * Read function
168  */
169 static int
170 _http_readfn(struct cookie *c, char *buf, int len)
171 {
172     int l, pos = 0;
173     while (len) {
174 	/* empty buffer */
175 	if (!c->buf || (c->b_cur == c->b_len))
176 	    if (!_http_fillbuf(c))
177 		break;
178 
179 	l = c->b_len - c->b_cur;
180 	if (len < l) l = len;
181 	memcpy(buf + pos, c->buf + c->b_cur, l);
182 	c->b_cur += l;
183 	pos += l;
184 	len -= l;
185     }
186 
187     if (ferror(c->real_f))
188 	return -1;
189     else return pos;
190 }
191 
192 /*
193  * Write function
194  */
195 static int
196 _http_writefn(struct cookie *c, const char *buf, int len)
197 {
198     size_t r = fwrite(buf, 1, (size_t)len, c->real_f);
199     return r ? r : -1;
200 }
201 
202 /*
203  * Close function
204  */
205 static int
206 _http_closefn(struct cookie *c)
207 {
208     int r = fclose(c->real_f);
209     free(c);
210     return (r == EOF) ? -1 : 0;
211 }
212 
213 /*
214  * Extract content type from cookie
215  */
216 char *
217 fetchContentType(FILE *f)
218 {
219     /*
220      * We have no way of making sure this really *is* one of our cookies,
221      * so just check for a null pointer and hope for the best.
222      */
223     return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL;
224 }
225 
226 /*
227  * Base64 encoding
228  */
229 int
230 _http_base64(char *dst, char *src, int l)
231 {
232     static const char base64[] =
233 	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
234 	"abcdefghijklmnopqrstuvwxyz"
235 	"0123456789+/";
236     int t, r = 0;
237 
238     while (l >= 3) {
239 	t = (src[0] << 16) | (src[1] << 8) | src[2];
240 	dst[0] = base64[(t >> 18) & 0x3f];
241 	dst[1] = base64[(t >> 12) & 0x3f];
242 	dst[2] = base64[(t >> 6) & 0x3f];
243 	dst[3] = base64[(t >> 0) & 0x3f];
244 	src += 3; l -= 3;
245 	dst += 4; r += 4;
246     }
247 
248     switch (l) {
249     case 2:
250 	t = (src[0] << 16) | (src[1] << 8);
251 	dst[0] = base64[(t >> 18) & 0x3f];
252 	dst[1] = base64[(t >> 12) & 0x3f];
253 	dst[2] = base64[(t >> 6) & 0x3f];
254 	dst[3] = '=';
255 	dst += 4;
256 	r += 4;
257 	break;
258     case 1:
259 	t = src[0] << 16;
260 	dst[0] = base64[(t >> 18) & 0x3f];
261 	dst[1] = base64[(t >> 12) & 0x3f];
262 	dst[2] = dst[3] = '=';
263 	dst += 4;
264 	r += 4;
265 	break;
266     case 0:
267 	break;
268     }
269 
270     *dst = 0;
271     return r;
272 }
273 
274 /*
275  * Encode username and password
276  */
277 char *
278 _http_auth(char *usr, char *pwd)
279 {
280     int len, lu, lp;
281     char *str, *s;
282 
283     lu = strlen(usr);
284     lp = strlen(pwd);
285 
286     len = (lu * 4 + 2) / 3	/* user name, round up */
287 	+ 1			/* colon */
288 	+ (lp * 4 + 2) / 3	/* password, round up */
289 	+ 1;			/* null */
290 
291     if ((s = str = (char *)malloc(len)) == NULL)
292 	return NULL;
293 
294     s += _http_base64(s, usr, lu);
295     *s++ = ':';
296     s += _http_base64(s, pwd, lp);
297     *s = 0;
298 
299     return str;
300 }
301 
302 /*
303  * retrieve a file by HTTP
304  */
305 FILE *
306 fetchGetHTTP(url_t *URL, char *flags)
307 {
308     int sd = -1, err, i, enc = ENC_NONE;
309     struct cookie *c;
310     char *ln, *p, *q;
311     FILE *f, *cf;
312     size_t len;
313 
314     /* allocate cookie */
315     if ((c = calloc(1, sizeof(struct cookie))) == NULL)
316 	return NULL;
317 
318     /* check port */
319     if (!URL->port)
320 	URL->port = 80; /* default HTTP port */
321 
322     /* attempt to connect to proxy server */
323     if (getenv("HTTP_PROXY")) {
324 	char *px, host[MAXHOSTNAMELEN];
325 	int port = 3128; /* XXX I think 3128 is default... check? */
326 	size_t len;
327 
328 	/* measure length */
329 	px = getenv("HTTP_PROXY");
330 	len = strcspn(px, ":");
331 
332 	/* get port (atoi is a little too tolerant perhaps?) */
333 	if (px[len] == ':')
334 	    port = atoi(px+len+1);
335 
336 	/* get host name */
337 	if (len >= MAXHOSTNAMELEN)
338 	    len = MAXHOSTNAMELEN - 1;
339 	strncpy(host, px, len);
340 	host[len] = 0;
341 
342 	/* connect */
343 	sd = fetchConnect(host, port);
344     }
345 
346     /* if no proxy is configured or could be contacted, try direct */
347     if (sd == -1) {
348 	if ((sd = fetchConnect(URL->host, URL->port)) == -1)
349 	    goto ouch;
350     }
351 
352     /* reopen as stream */
353     if ((f = fdopen(sd, "r+")) == NULL)
354 	goto ouch;
355     c->real_f = f;
356 
357     /* send request (proxies require absolute form, so use that) */
358     _http_cmd(f, "GET http://%s:%d%s HTTP/1.1" ENDL,
359 	      URL->host, URL->port, URL->doc);
360 
361     /* start sending headers away */
362     if (URL->user[0] || URL->pwd[0]) {
363 	char *auth_str = _http_auth(URL->user, URL->pwd);
364 	if (!auth_str)
365 	    goto fouch;
366 	_http_cmd(f, "Authorization: Basic %s" ENDL, auth_str);
367 	free(auth_str);
368     }
369     _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port);
370     _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname);
371     _http_cmd(f, "Connection: close" ENDL ENDL);
372 
373     /* get response */
374     if ((ln = fgetln(f, &len)) == NULL)
375 	goto fouch;
376     DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n",
377 		  (int)len-2, (int)len-2, ln));
378 
379     /* we can't use strchr() and friends since ln isn't NUL-terminated */
380     p = ln;
381     while ((p < ln + len) && !isspace(*p))
382 	p++;
383     while ((p < ln + len) && !isdigit(*p))
384 	p++;
385     if (!isdigit(*p))
386 	goto fouch;
387     err = atoi(p);
388     DEBUG(fprintf(stderr, "code:     [\033[1m%d\033[m]\n", err));
389 
390     /* add code to handle redirects later */
391     if (err != 200) {
392 	fetchLastErrCode = err;
393 	fetchLastErrText = _http_errstring(err);
394 	goto fouch;
395     }
396 
397     /* browse through header */
398     while (1) {
399 	if ((ln = fgetln(f, &len)) == NULL)
400 	    goto fouch;
401 	if ((ln[0] == '\r') || (ln[0] == '\n'))
402 	    break;
403 	DEBUG(fprintf(stderr, "header:   [\033[1m%*.*s\033[m]\n",
404 		      (int)len-2, (int)len-2, ln));
405 #define XFERENC "Transfer-Encoding:"
406 	if (strncasecmp(ln, XFERENC, sizeof(XFERENC)-1) == 0) {
407 	    p = ln + sizeof(XFERENC) - 1;
408 	    while ((p < ln + len) && isspace(*p))
409 		p++;
410 	    for (q = p; (q < ln + len) && !isspace(*q); q++)
411 		/* VOID */ ;
412 	    *q = 0;
413 	    if (strcasecmp(p, "chunked") == 0)
414 		enc = ENC_CHUNKED;
415 	    DEBUG(fprintf(stderr, "xferenc:  [\033[1m%s\033[m]\n", p));
416 #undef XFERENC
417 #define CONTTYPE "Content-Type:"
418 	} else if (strncasecmp(ln, CONTTYPE, sizeof(CONTTYPE)-1) == 0) {
419 	    p = ln + sizeof(CONTTYPE) - 1;
420 	    while ((p < ln + len) && isspace(*p))
421 		p++;
422 	    for (i = 0; p < ln + len; p++)
423 		if (i < HTTPCTYPELEN)
424 		    c->content_type[i++] = *p;
425 	    do c->content_type[i--] = 0; while (isspace(c->content_type[i]));
426 	    DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n",
427 			  c->content_type));
428 #undef CONTTYPE
429 	}
430     }
431 
432     /* only body remains */
433     c->encoding = enc;
434     cf = funopen(c,
435 		 (int (*)(void *, char *, int))_http_readfn,
436 		 (int (*)(void *, const char *, int))_http_writefn,
437 		 (fpos_t (*)(void *, fpos_t, int))NULL,
438 		 (int (*)(void *))_http_closefn);
439     if (cf == NULL)
440 	goto fouch;
441     return cf;
442 
443 ouch:
444     if (sd >= 0)
445 	close(sd);
446     free(c);
447     return NULL;
448 fouch:
449     fclose(f);
450     free(c);
451     return NULL;
452 }
453 
454 FILE *
455 fetchPutHTTP(url_t *URL, char *flags)
456 {
457     warnx("fetchPutHTTP(): not implemented");
458     return NULL;
459 }
460