xref: /freebsd/lib/libfetch/fetch.c (revision 842a95cc236e53afbf5e516267246c3408e0779e)
14ca1ab94SDag-Erling Smørgrav /*-
24ca1ab94SDag-Erling Smørgrav  * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
34ca1ab94SDag-Erling Smørgrav  * All rights reserved.
44ca1ab94SDag-Erling Smørgrav  *
54ca1ab94SDag-Erling Smørgrav  * Redistribution and use in source and binary forms, with or without
64ca1ab94SDag-Erling Smørgrav  * modification, are permitted provided that the following conditions
74ca1ab94SDag-Erling Smørgrav  * are met:
84ca1ab94SDag-Erling Smørgrav  * 1. Redistributions of source code must retain the above copyright
94ca1ab94SDag-Erling Smørgrav  *    notice, this list of conditions and the following disclaimer
104ca1ab94SDag-Erling Smørgrav  *    in this position and unchanged.
114ca1ab94SDag-Erling Smørgrav  * 2. Redistributions in binary form must reproduce the above copyright
124ca1ab94SDag-Erling Smørgrav  *    notice, this list of conditions and the following disclaimer in the
134ca1ab94SDag-Erling Smørgrav  *    documentation and/or other materials provided with the distribution.
144ca1ab94SDag-Erling Smørgrav  * 3. The name of the author may not be used to endorse or promote products
154ca1ab94SDag-Erling Smørgrav  *    derived from this software without specific prior written permission
164ca1ab94SDag-Erling Smørgrav  *
174ca1ab94SDag-Erling Smørgrav  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
184ca1ab94SDag-Erling Smørgrav  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
194ca1ab94SDag-Erling Smørgrav  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
204ca1ab94SDag-Erling Smørgrav  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
214ca1ab94SDag-Erling Smørgrav  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
224ca1ab94SDag-Erling Smørgrav  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
234ca1ab94SDag-Erling Smørgrav  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
244ca1ab94SDag-Erling Smørgrav  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
254ca1ab94SDag-Erling Smørgrav  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
264ca1ab94SDag-Erling Smørgrav  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
274ca1ab94SDag-Erling Smørgrav  *
28842a95ccSDag-Erling Smørgrav  *	$Id: fetch.c,v 1.4 1998/08/17 09:30:19 des Exp $
294ca1ab94SDag-Erling Smørgrav  */
304ca1ab94SDag-Erling Smørgrav 
314ca1ab94SDag-Erling Smørgrav #include <sys/param.h>
324ca1ab94SDag-Erling Smørgrav 
334ca1ab94SDag-Erling Smørgrav #include <ctype.h>
344ca1ab94SDag-Erling Smørgrav #include <stdio.h>
354ca1ab94SDag-Erling Smørgrav #include <stdlib.h>
364ca1ab94SDag-Erling Smørgrav #include <string.h>
374ca1ab94SDag-Erling Smørgrav 
384ca1ab94SDag-Erling Smørgrav #include "fetch.h"
394ca1ab94SDag-Erling Smørgrav 
404ca1ab94SDag-Erling Smørgrav #ifndef NDEBUG
414ca1ab94SDag-Erling Smørgrav #define DEBUG(x) do x; while (0)
424ca1ab94SDag-Erling Smørgrav #else
434ca1ab94SDag-Erling Smørgrav #define DEBUG(x) do { } while (0)
444ca1ab94SDag-Erling Smørgrav #endif
454ca1ab94SDag-Erling Smørgrav 
468e3986eaSDag-Erling Smørgrav int fetchLastErrCode;
478e3986eaSDag-Erling Smørgrav const char *fetchLastErrText;
484ca1ab94SDag-Erling Smørgrav 
49842a95ccSDag-Erling Smørgrav /*
50842a95ccSDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return a
51842a95ccSDag-Erling Smørgrav  * read-only stream connected to the document referenced by the URL.
52842a95ccSDag-Erling Smørgrav  */
53ecc91352SDag-Erling Smørgrav FILE *
54ecc91352SDag-Erling Smørgrav fetchGet(url_t *URL, char *flags)
55ecc91352SDag-Erling Smørgrav {
56ecc91352SDag-Erling Smørgrav     if (strcasecmp(URL->scheme, "file") == 0)
57ecc91352SDag-Erling Smørgrav 	return fetchGetFile(URL, flags);
58ecc91352SDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, "http") == 0)
59ecc91352SDag-Erling Smørgrav 	return fetchGetHTTP(URL, flags);
60ecc91352SDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, "ftp") == 0)
61ecc91352SDag-Erling Smørgrav 	return fetchGetFTP(URL, flags);
62ecc91352SDag-Erling Smørgrav     else return NULL;
63ecc91352SDag-Erling Smørgrav 
64ecc91352SDag-Erling Smørgrav }
65ecc91352SDag-Erling Smørgrav 
66842a95ccSDag-Erling Smørgrav /*
67842a95ccSDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return a
68842a95ccSDag-Erling Smørgrav  * write-only stream connected to the document referenced by the URL.
69842a95ccSDag-Erling Smørgrav  */
70ecc91352SDag-Erling Smørgrav FILE *
71ecc91352SDag-Erling Smørgrav fetchPut(url_t *URL, char *flags)
72ecc91352SDag-Erling Smørgrav {
73ecc91352SDag-Erling Smørgrav     if (strcasecmp(URL->scheme, "file") == 0)
74ecc91352SDag-Erling Smørgrav 	return fetchPutFile(URL, flags);
75ecc91352SDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, "http") == 0)
76ecc91352SDag-Erling Smørgrav 	return fetchPutHTTP(URL, flags);
77ecc91352SDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, "ftp") == 0)
78ecc91352SDag-Erling Smørgrav 	return fetchPutFTP(URL, flags);
79ecc91352SDag-Erling Smørgrav     else return NULL;
80ecc91352SDag-Erling Smørgrav }
81ecc91352SDag-Erling Smørgrav 
82842a95ccSDag-Erling Smørgrav /*
83842a95ccSDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchGet().
84842a95ccSDag-Erling Smørgrav  */
854ca1ab94SDag-Erling Smørgrav FILE *
864ca1ab94SDag-Erling Smørgrav fetchGetURL(char *URL, char *flags)
874ca1ab94SDag-Erling Smørgrav {
884ca1ab94SDag-Erling Smørgrav     url_t *u;
894ca1ab94SDag-Erling Smørgrav     FILE *f;
904ca1ab94SDag-Erling Smørgrav 
914ca1ab94SDag-Erling Smørgrav     if ((u = fetchParseURL(URL)) == NULL)
924ca1ab94SDag-Erling Smørgrav 	return NULL;
934ca1ab94SDag-Erling Smørgrav 
94ecc91352SDag-Erling Smørgrav     f = fetchGet(u, flags);
954ca1ab94SDag-Erling Smørgrav 
96842a95ccSDag-Erling Smørgrav     free(u);
974ca1ab94SDag-Erling Smørgrav     return f;
984ca1ab94SDag-Erling Smørgrav }
994ca1ab94SDag-Erling Smørgrav 
1004ca1ab94SDag-Erling Smørgrav 
101842a95ccSDag-Erling Smørgrav /*
102842a95ccSDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchPut().
103842a95ccSDag-Erling Smørgrav  */
1044ca1ab94SDag-Erling Smørgrav FILE *
1054ca1ab94SDag-Erling Smørgrav fetchPutURL(char *URL, char *flags)
1064ca1ab94SDag-Erling Smørgrav {
1074ca1ab94SDag-Erling Smørgrav     url_t *u;
1084ca1ab94SDag-Erling Smørgrav     FILE *f;
1094ca1ab94SDag-Erling Smørgrav 
1104ca1ab94SDag-Erling Smørgrav     if ((u = fetchParseURL(URL)) == NULL)
1114ca1ab94SDag-Erling Smørgrav 	return NULL;
1124ca1ab94SDag-Erling Smørgrav 
113ecc91352SDag-Erling Smørgrav     f = fetchPut(u, flags);
1144ca1ab94SDag-Erling Smørgrav 
115842a95ccSDag-Erling Smørgrav     free(u);
1164ca1ab94SDag-Erling Smørgrav     return f;
1174ca1ab94SDag-Erling Smørgrav }
1184ca1ab94SDag-Erling Smørgrav 
1194ca1ab94SDag-Erling Smørgrav /*
1204ca1ab94SDag-Erling Smørgrav  * Split an URL into components. URL syntax is:
1214ca1ab94SDag-Erling Smørgrav  * method:[//[user[:pwd]@]host[:port]]/[document]
1224ca1ab94SDag-Erling Smørgrav  * This almost, but not quite, RFC1738 URL syntax.
1234ca1ab94SDag-Erling Smørgrav  */
1244ca1ab94SDag-Erling Smørgrav url_t *
1254ca1ab94SDag-Erling Smørgrav fetchParseURL(char *URL)
1264ca1ab94SDag-Erling Smørgrav {
1274ca1ab94SDag-Erling Smørgrav     char *p, *q;
1284ca1ab94SDag-Erling Smørgrav     url_t *u;
1294ca1ab94SDag-Erling Smørgrav     int i;
1304ca1ab94SDag-Erling Smørgrav 
1314ca1ab94SDag-Erling Smørgrav     /* allocate url_t */
1324ca1ab94SDag-Erling Smørgrav     if ((u = calloc(1, sizeof(url_t))) == NULL)
1334ca1ab94SDag-Erling Smørgrav 	return NULL;
1344ca1ab94SDag-Erling Smørgrav 
1354ca1ab94SDag-Erling Smørgrav     /* scheme name */
1364ca1ab94SDag-Erling Smørgrav     for (i = 0; *URL && (*URL != ':'); URL++)
1374ca1ab94SDag-Erling Smørgrav 	if (i < URL_SCHEMELEN)
1384ca1ab94SDag-Erling Smørgrav 	    u->scheme[i++] = *URL;
1394ca1ab94SDag-Erling Smørgrav     if (!URL[0] || (URL[1] != '/'))
1404ca1ab94SDag-Erling Smørgrav 	goto ouch;
1414ca1ab94SDag-Erling Smørgrav     else URL++;
1424ca1ab94SDag-Erling Smørgrav     if (URL[1] != '/') {
1434ca1ab94SDag-Erling Smørgrav 	p = URL;
1444ca1ab94SDag-Erling Smørgrav 	goto nohost;
1454ca1ab94SDag-Erling Smørgrav     }
1464ca1ab94SDag-Erling Smørgrav     else URL += 2;
1474ca1ab94SDag-Erling Smørgrav 
1484ca1ab94SDag-Erling Smørgrav     p = strpbrk(URL, "/@");
1494ca1ab94SDag-Erling Smørgrav     if (*p == '@') {
1504ca1ab94SDag-Erling Smørgrav 	/* username */
1514ca1ab94SDag-Erling Smørgrav 	for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
1524ca1ab94SDag-Erling Smørgrav 	    if (i < URL_USERLEN)
1534ca1ab94SDag-Erling Smørgrav 		u->user[i++] = *q;
1544ca1ab94SDag-Erling Smørgrav 
1554ca1ab94SDag-Erling Smørgrav 	/* password */
1564ca1ab94SDag-Erling Smørgrav 	if (*q == ':')
1574ca1ab94SDag-Erling Smørgrav 	    for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
1584ca1ab94SDag-Erling Smørgrav 		if (i < URL_PWDLEN)
1594ca1ab94SDag-Erling Smørgrav 		    u->pwd[i++] = *q;
1604ca1ab94SDag-Erling Smørgrav 
1614ca1ab94SDag-Erling Smørgrav 	p++;
1624ca1ab94SDag-Erling Smørgrav     } else p = URL;
1634ca1ab94SDag-Erling Smørgrav 
1644ca1ab94SDag-Erling Smørgrav     /* hostname */
1654ca1ab94SDag-Erling Smørgrav     for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
1664ca1ab94SDag-Erling Smørgrav 	if (i < MAXHOSTNAMELEN)
1674ca1ab94SDag-Erling Smørgrav 	    u->host[i++] = *p;
1684ca1ab94SDag-Erling Smørgrav 
1694ca1ab94SDag-Erling Smørgrav     /* port */
1704ca1ab94SDag-Erling Smørgrav     if (*p == ':') {
1714ca1ab94SDag-Erling Smørgrav 	for (q = ++p; *q && (*q != '/'); q++)
1724ca1ab94SDag-Erling Smørgrav 	    if (isdigit(*q))
1734ca1ab94SDag-Erling Smørgrav 		u->port = u->port * 10 + (*q - '0');
1744ca1ab94SDag-Erling Smørgrav 	    else return 0; /* invalid port */
1754ca1ab94SDag-Erling Smørgrav 	while (*p && (*p != '/'))
1764ca1ab94SDag-Erling Smørgrav 	    p++;
1774ca1ab94SDag-Erling Smørgrav     }
1784ca1ab94SDag-Erling Smørgrav 
1794ca1ab94SDag-Erling Smørgrav nohost:
1804ca1ab94SDag-Erling Smørgrav     /* document */
181842a95ccSDag-Erling Smørgrav     if (*p) {
182842a95ccSDag-Erling Smørgrav 	url_t *t;
183842a95ccSDag-Erling Smørgrav 	t = realloc(u, sizeof(*u)+strlen(p)-1);
184842a95ccSDag-Erling Smørgrav 	if (t == NULL)
1854ca1ab94SDag-Erling Smørgrav 	    goto ouch;
186842a95ccSDag-Erling Smørgrav 	u = t;
187842a95ccSDag-Erling Smørgrav 	strcpy(u->doc, p);
188842a95ccSDag-Erling Smørgrav     } else {
189842a95ccSDag-Erling Smørgrav 	u->doc[0] = '/';
190842a95ccSDag-Erling Smørgrav 	u->doc[1] = 0;
191842a95ccSDag-Erling Smørgrav     }
1924ca1ab94SDag-Erling Smørgrav 
1934ca1ab94SDag-Erling Smørgrav     DEBUG(fprintf(stderr,
1944ca1ab94SDag-Erling Smørgrav 		  "scheme:   [\033[1m%s\033[m]\n"
1954ca1ab94SDag-Erling Smørgrav 		  "user:     [\033[1m%s\033[m]\n"
1964ca1ab94SDag-Erling Smørgrav 		  "password: [\033[1m%s\033[m]\n"
1974ca1ab94SDag-Erling Smørgrav 		  "host:     [\033[1m%s\033[m]\n"
1984ca1ab94SDag-Erling Smørgrav 		  "port:     [\033[1m%d\033[m]\n"
1994ca1ab94SDag-Erling Smørgrav 		  "document: [\033[1m%s\033[m]\n",
2004ca1ab94SDag-Erling Smørgrav 		  u->scheme, u->user, u->pwd,
2014ca1ab94SDag-Erling Smørgrav 		  u->host, u->port, u->doc));
2024ca1ab94SDag-Erling Smørgrav 
2034ca1ab94SDag-Erling Smørgrav     return u;
2044ca1ab94SDag-Erling Smørgrav 
2054ca1ab94SDag-Erling Smørgrav ouch:
2064ca1ab94SDag-Erling Smørgrav     free(u);
2074ca1ab94SDag-Erling Smørgrav     return NULL;
2084ca1ab94SDag-Erling Smørgrav }
209