xref: /freebsd/lib/libfetch/fetch.c (revision d8acd8dc5cb2a0ce54609c9c1d94cff16792902a)
14ca1ab94SDag-Erling Smørgrav /*-
24ca1ab94SDag-Erling Smørgrav  * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
34ca1ab94SDag-Erling Smørgrav  * All rights reserved.
44ca1ab94SDag-Erling Smørgrav  *
54ca1ab94SDag-Erling Smørgrav  * Redistribution and use in source and binary forms, with or without
64ca1ab94SDag-Erling Smørgrav  * modification, are permitted provided that the following conditions
74ca1ab94SDag-Erling Smørgrav  * are met:
84ca1ab94SDag-Erling Smørgrav  * 1. Redistributions of source code must retain the above copyright
94ca1ab94SDag-Erling Smørgrav  *    notice, this list of conditions and the following disclaimer
104ca1ab94SDag-Erling Smørgrav  *    in this position and unchanged.
114ca1ab94SDag-Erling Smørgrav  * 2. Redistributions in binary form must reproduce the above copyright
124ca1ab94SDag-Erling Smørgrav  *    notice, this list of conditions and the following disclaimer in the
134ca1ab94SDag-Erling Smørgrav  *    documentation and/or other materials provided with the distribution.
144ca1ab94SDag-Erling Smørgrav  * 3. The name of the author may not be used to endorse or promote products
154ca1ab94SDag-Erling Smørgrav  *    derived from this software without specific prior written permission
164ca1ab94SDag-Erling Smørgrav  *
174ca1ab94SDag-Erling Smørgrav  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
184ca1ab94SDag-Erling Smørgrav  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
194ca1ab94SDag-Erling Smørgrav  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
204ca1ab94SDag-Erling Smørgrav  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
214ca1ab94SDag-Erling Smørgrav  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
224ca1ab94SDag-Erling Smørgrav  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
234ca1ab94SDag-Erling Smørgrav  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
244ca1ab94SDag-Erling Smørgrav  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
254ca1ab94SDag-Erling Smørgrav  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
264ca1ab94SDag-Erling Smørgrav  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
274ca1ab94SDag-Erling Smørgrav  *
28d8acd8dcSDag-Erling Smørgrav  *	$Id: fetch.c,v 1.5 1998/11/05 19:48:17 des Exp $
294ca1ab94SDag-Erling Smørgrav  */
304ca1ab94SDag-Erling Smørgrav 
314ca1ab94SDag-Erling Smørgrav #include <sys/param.h>
32d8acd8dcSDag-Erling Smørgrav #include <sys/errno.h>
334ca1ab94SDag-Erling Smørgrav 
344ca1ab94SDag-Erling Smørgrav #include <ctype.h>
354ca1ab94SDag-Erling Smørgrav #include <stdio.h>
364ca1ab94SDag-Erling Smørgrav #include <stdlib.h>
374ca1ab94SDag-Erling Smørgrav #include <string.h>
384ca1ab94SDag-Erling Smørgrav 
394ca1ab94SDag-Erling Smørgrav #include "fetch.h"
40d8acd8dcSDag-Erling Smørgrav #include "common.h"
414ca1ab94SDag-Erling Smørgrav 
424ca1ab94SDag-Erling Smørgrav 
43d8acd8dcSDag-Erling Smørgrav /*** Local data **************************************************************/
44d8acd8dcSDag-Erling Smørgrav 
45d8acd8dcSDag-Erling Smørgrav /*
46d8acd8dcSDag-Erling Smørgrav  * Error messages for parser errors
47d8acd8dcSDag-Erling Smørgrav  */
48d8acd8dcSDag-Erling Smørgrav #define URL_MALFORMED		1
49d8acd8dcSDag-Erling Smørgrav #define URL_BAD_SCHEME		2
50d8acd8dcSDag-Erling Smørgrav #define URL_BAD_PORT		3
51d8acd8dcSDag-Erling Smørgrav static struct fetcherr _url_errlist[] = {
52d8acd8dcSDag-Erling Smørgrav     { URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
53d8acd8dcSDag-Erling Smørgrav     { URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
54d8acd8dcSDag-Erling Smørgrav     { URL_BAD_PORT,	FETCH_URL,	"Invalid server port" },
55d8acd8dcSDag-Erling Smørgrav     { -1,		FETCH_UNKNOWN,	"Unknown parser error" }
56d8acd8dcSDag-Erling Smørgrav };
57d8acd8dcSDag-Erling Smørgrav 
58d8acd8dcSDag-Erling Smørgrav 
59d8acd8dcSDag-Erling Smørgrav /*** Public API **************************************************************/
604ca1ab94SDag-Erling Smørgrav 
61842a95ccSDag-Erling Smørgrav /*
62842a95ccSDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return a
63842a95ccSDag-Erling Smørgrav  * read-only stream connected to the document referenced by the URL.
64842a95ccSDag-Erling Smørgrav  */
65ecc91352SDag-Erling Smørgrav FILE *
66d8acd8dcSDag-Erling Smørgrav fetchGet(struct url *URL, char *flags)
67ecc91352SDag-Erling Smørgrav {
68ecc91352SDag-Erling Smørgrav     if (strcasecmp(URL->scheme, "file") == 0)
69ecc91352SDag-Erling Smørgrav 	return fetchGetFile(URL, flags);
70ecc91352SDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, "http") == 0)
71ecc91352SDag-Erling Smørgrav 	return fetchGetHTTP(URL, flags);
72ecc91352SDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, "ftp") == 0)
73ecc91352SDag-Erling Smørgrav 	return fetchGetFTP(URL, flags);
74d8acd8dcSDag-Erling Smørgrav     else {
75d8acd8dcSDag-Erling Smørgrav 	_url_seterr(URL_BAD_SCHEME);
76d8acd8dcSDag-Erling Smørgrav 	return NULL;
77d8acd8dcSDag-Erling Smørgrav     }
78ecc91352SDag-Erling Smørgrav }
79ecc91352SDag-Erling Smørgrav 
80842a95ccSDag-Erling Smørgrav /*
81842a95ccSDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return a
82842a95ccSDag-Erling Smørgrav  * write-only stream connected to the document referenced by the URL.
83842a95ccSDag-Erling Smørgrav  */
84ecc91352SDag-Erling Smørgrav FILE *
85d8acd8dcSDag-Erling Smørgrav fetchPut(struct url *URL, char *flags)
86ecc91352SDag-Erling Smørgrav {
87ecc91352SDag-Erling Smørgrav     if (strcasecmp(URL->scheme, "file") == 0)
88ecc91352SDag-Erling Smørgrav 	return fetchPutFile(URL, flags);
89ecc91352SDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, "http") == 0)
90ecc91352SDag-Erling Smørgrav 	return fetchPutHTTP(URL, flags);
91ecc91352SDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, "ftp") == 0)
92ecc91352SDag-Erling Smørgrav 	return fetchPutFTP(URL, flags);
93d8acd8dcSDag-Erling Smørgrav     else {
94d8acd8dcSDag-Erling Smørgrav 	_url_seterr(URL_BAD_SCHEME);
95d8acd8dcSDag-Erling Smørgrav 	return NULL;
96d8acd8dcSDag-Erling Smørgrav     }
97d8acd8dcSDag-Erling Smørgrav }
98d8acd8dcSDag-Erling Smørgrav 
99d8acd8dcSDag-Erling Smørgrav /*
100d8acd8dcSDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return the
101d8acd8dcSDag-Erling Smørgrav  * size of the document referenced by the URL if it exists.
102d8acd8dcSDag-Erling Smørgrav  */
103d8acd8dcSDag-Erling Smørgrav int
104d8acd8dcSDag-Erling Smørgrav fetchStat(struct url *URL, struct url_stat *us, char *flags)
105d8acd8dcSDag-Erling Smørgrav {
106d8acd8dcSDag-Erling Smørgrav     if (strcasecmp(URL->scheme, "file") == 0)
107d8acd8dcSDag-Erling Smørgrav 	return fetchStatFile(URL, us, flags);
108d8acd8dcSDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, "http") == 0)
109d8acd8dcSDag-Erling Smørgrav 	return fetchStatHTTP(URL, us, flags);
110d8acd8dcSDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, "ftp") == 0)
111d8acd8dcSDag-Erling Smørgrav 	return fetchStatFTP(URL, us, flags);
112d8acd8dcSDag-Erling Smørgrav     else {
113d8acd8dcSDag-Erling Smørgrav 	_url_seterr(URL_BAD_SCHEME);
114d8acd8dcSDag-Erling Smørgrav 	return -1;
115d8acd8dcSDag-Erling Smørgrav     }
116ecc91352SDag-Erling Smørgrav }
117ecc91352SDag-Erling Smørgrav 
118842a95ccSDag-Erling Smørgrav /*
119842a95ccSDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchGet().
120842a95ccSDag-Erling Smørgrav  */
1214ca1ab94SDag-Erling Smørgrav FILE *
1224ca1ab94SDag-Erling Smørgrav fetchGetURL(char *URL, char *flags)
1234ca1ab94SDag-Erling Smørgrav {
124d8acd8dcSDag-Erling Smørgrav     struct url *u;
1254ca1ab94SDag-Erling Smørgrav     FILE *f;
1264ca1ab94SDag-Erling Smørgrav 
1274ca1ab94SDag-Erling Smørgrav     if ((u = fetchParseURL(URL)) == NULL)
1284ca1ab94SDag-Erling Smørgrav 	return NULL;
1294ca1ab94SDag-Erling Smørgrav 
130ecc91352SDag-Erling Smørgrav     f = fetchGet(u, flags);
1314ca1ab94SDag-Erling Smørgrav 
132842a95ccSDag-Erling Smørgrav     free(u);
1334ca1ab94SDag-Erling Smørgrav     return f;
1344ca1ab94SDag-Erling Smørgrav }
1354ca1ab94SDag-Erling Smørgrav 
1364ca1ab94SDag-Erling Smørgrav 
137842a95ccSDag-Erling Smørgrav /*
138842a95ccSDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchPut().
139842a95ccSDag-Erling Smørgrav  */
1404ca1ab94SDag-Erling Smørgrav FILE *
1414ca1ab94SDag-Erling Smørgrav fetchPutURL(char *URL, char *flags)
1424ca1ab94SDag-Erling Smørgrav {
143d8acd8dcSDag-Erling Smørgrav     struct url *u;
1444ca1ab94SDag-Erling Smørgrav     FILE *f;
1454ca1ab94SDag-Erling Smørgrav 
1464ca1ab94SDag-Erling Smørgrav     if ((u = fetchParseURL(URL)) == NULL)
1474ca1ab94SDag-Erling Smørgrav 	return NULL;
1484ca1ab94SDag-Erling Smørgrav 
149ecc91352SDag-Erling Smørgrav     f = fetchPut(u, flags);
1504ca1ab94SDag-Erling Smørgrav 
151842a95ccSDag-Erling Smørgrav     free(u);
1524ca1ab94SDag-Erling Smørgrav     return f;
1534ca1ab94SDag-Erling Smørgrav }
1544ca1ab94SDag-Erling Smørgrav 
1554ca1ab94SDag-Erling Smørgrav /*
156d8acd8dcSDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchStat().
157d8acd8dcSDag-Erling Smørgrav  */
158d8acd8dcSDag-Erling Smørgrav int
159d8acd8dcSDag-Erling Smørgrav fetchStatURL(char *URL, struct url_stat *us, char *flags)
160d8acd8dcSDag-Erling Smørgrav {
161d8acd8dcSDag-Erling Smørgrav     struct url *u;
162d8acd8dcSDag-Erling Smørgrav     int s;
163d8acd8dcSDag-Erling Smørgrav 
164d8acd8dcSDag-Erling Smørgrav     if ((u = fetchParseURL(URL)) == NULL)
165d8acd8dcSDag-Erling Smørgrav 	return -1;
166d8acd8dcSDag-Erling Smørgrav 
167d8acd8dcSDag-Erling Smørgrav     s = fetchStat(u, us, flags);
168d8acd8dcSDag-Erling Smørgrav 
169d8acd8dcSDag-Erling Smørgrav     free(u);
170d8acd8dcSDag-Erling Smørgrav     return s;
171d8acd8dcSDag-Erling Smørgrav }
172d8acd8dcSDag-Erling Smørgrav 
173d8acd8dcSDag-Erling Smørgrav /*
1744ca1ab94SDag-Erling Smørgrav  * Split an URL into components. URL syntax is:
1754ca1ab94SDag-Erling Smørgrav  * method:[//[user[:pwd]@]host[:port]]/[document]
1764ca1ab94SDag-Erling Smørgrav  * This almost, but not quite, RFC1738 URL syntax.
1774ca1ab94SDag-Erling Smørgrav  */
178d8acd8dcSDag-Erling Smørgrav struct url *
1794ca1ab94SDag-Erling Smørgrav fetchParseURL(char *URL)
1804ca1ab94SDag-Erling Smørgrav {
1814ca1ab94SDag-Erling Smørgrav     char *p, *q;
182d8acd8dcSDag-Erling Smørgrav     struct url *u;
1834ca1ab94SDag-Erling Smørgrav     int i;
1844ca1ab94SDag-Erling Smørgrav 
185d8acd8dcSDag-Erling Smørgrav     /* allocate struct url */
186d8acd8dcSDag-Erling Smørgrav     if ((u = calloc(1, sizeof(struct url))) == NULL) {
187d8acd8dcSDag-Erling Smørgrav 	errno = ENOMEM;
188d8acd8dcSDag-Erling Smørgrav 	_fetch_syserr();
1894ca1ab94SDag-Erling Smørgrav 	return NULL;
190d8acd8dcSDag-Erling Smørgrav     }
1914ca1ab94SDag-Erling Smørgrav 
1924ca1ab94SDag-Erling Smørgrav     /* scheme name */
1934ca1ab94SDag-Erling Smørgrav     for (i = 0; *URL && (*URL != ':'); URL++)
1944ca1ab94SDag-Erling Smørgrav 	if (i < URL_SCHEMELEN)
1954ca1ab94SDag-Erling Smørgrav 	    u->scheme[i++] = *URL;
196d8acd8dcSDag-Erling Smørgrav     if (!URL[0] || (URL[1] != '/')) {
197d8acd8dcSDag-Erling Smørgrav 	_url_seterr(URL_BAD_SCHEME);
1984ca1ab94SDag-Erling Smørgrav 	goto ouch;
199d8acd8dcSDag-Erling Smørgrav     }
2004ca1ab94SDag-Erling Smørgrav     else URL++;
2014ca1ab94SDag-Erling Smørgrav     if (URL[1] != '/') {
2024ca1ab94SDag-Erling Smørgrav 	p = URL;
2034ca1ab94SDag-Erling Smørgrav 	goto nohost;
2044ca1ab94SDag-Erling Smørgrav     }
2054ca1ab94SDag-Erling Smørgrav     else URL += 2;
2064ca1ab94SDag-Erling Smørgrav 
2074ca1ab94SDag-Erling Smørgrav     p = strpbrk(URL, "/@");
2084ca1ab94SDag-Erling Smørgrav     if (*p == '@') {
2094ca1ab94SDag-Erling Smørgrav 	/* username */
2104ca1ab94SDag-Erling Smørgrav 	for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
2114ca1ab94SDag-Erling Smørgrav 	    if (i < URL_USERLEN)
2124ca1ab94SDag-Erling Smørgrav 		u->user[i++] = *q;
2134ca1ab94SDag-Erling Smørgrav 
2144ca1ab94SDag-Erling Smørgrav 	/* password */
2154ca1ab94SDag-Erling Smørgrav 	if (*q == ':')
2164ca1ab94SDag-Erling Smørgrav 	    for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
2174ca1ab94SDag-Erling Smørgrav 		if (i < URL_PWDLEN)
2184ca1ab94SDag-Erling Smørgrav 		    u->pwd[i++] = *q;
2194ca1ab94SDag-Erling Smørgrav 
2204ca1ab94SDag-Erling Smørgrav 	p++;
2214ca1ab94SDag-Erling Smørgrav     } else p = URL;
2224ca1ab94SDag-Erling Smørgrav 
2234ca1ab94SDag-Erling Smørgrav     /* hostname */
2244ca1ab94SDag-Erling Smørgrav     for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
2254ca1ab94SDag-Erling Smørgrav 	if (i < MAXHOSTNAMELEN)
2264ca1ab94SDag-Erling Smørgrav 	    u->host[i++] = *p;
2274ca1ab94SDag-Erling Smørgrav 
2284ca1ab94SDag-Erling Smørgrav     /* port */
2294ca1ab94SDag-Erling Smørgrav     if (*p == ':') {
2304ca1ab94SDag-Erling Smørgrav 	for (q = ++p; *q && (*q != '/'); q++)
2314ca1ab94SDag-Erling Smørgrav 	    if (isdigit(*q))
2324ca1ab94SDag-Erling Smørgrav 		u->port = u->port * 10 + (*q - '0');
233d8acd8dcSDag-Erling Smørgrav 	    else {
234d8acd8dcSDag-Erling Smørgrav 		/* invalid port */
235d8acd8dcSDag-Erling Smørgrav 		_url_seterr(URL_BAD_PORT);
236d8acd8dcSDag-Erling Smørgrav 		goto ouch;
237d8acd8dcSDag-Erling Smørgrav 	    }
2384ca1ab94SDag-Erling Smørgrav 	while (*p && (*p != '/'))
2394ca1ab94SDag-Erling Smørgrav 	    p++;
2404ca1ab94SDag-Erling Smørgrav     }
2414ca1ab94SDag-Erling Smørgrav 
2424ca1ab94SDag-Erling Smørgrav nohost:
2434ca1ab94SDag-Erling Smørgrav     /* document */
244842a95ccSDag-Erling Smørgrav     if (*p) {
245d8acd8dcSDag-Erling Smørgrav 	struct url *t;
246842a95ccSDag-Erling Smørgrav 	t = realloc(u, sizeof(*u)+strlen(p)-1);
247d8acd8dcSDag-Erling Smørgrav 	if (t == NULL) {
248d8acd8dcSDag-Erling Smørgrav 	    errno = ENOMEM;
249d8acd8dcSDag-Erling Smørgrav 	    _fetch_syserr();
2504ca1ab94SDag-Erling Smørgrav 	    goto ouch;
251d8acd8dcSDag-Erling Smørgrav 	}
252842a95ccSDag-Erling Smørgrav 	u = t;
253842a95ccSDag-Erling Smørgrav 	strcpy(u->doc, p);
254842a95ccSDag-Erling Smørgrav     } else {
255842a95ccSDag-Erling Smørgrav 	u->doc[0] = '/';
256842a95ccSDag-Erling Smørgrav 	u->doc[1] = 0;
257842a95ccSDag-Erling Smørgrav     }
2584ca1ab94SDag-Erling Smørgrav 
2594ca1ab94SDag-Erling Smørgrav     DEBUG(fprintf(stderr,
2604ca1ab94SDag-Erling Smørgrav 		  "scheme:   [\033[1m%s\033[m]\n"
2614ca1ab94SDag-Erling Smørgrav 		  "user:     [\033[1m%s\033[m]\n"
2624ca1ab94SDag-Erling Smørgrav 		  "password: [\033[1m%s\033[m]\n"
2634ca1ab94SDag-Erling Smørgrav 		  "host:     [\033[1m%s\033[m]\n"
2644ca1ab94SDag-Erling Smørgrav 		  "port:     [\033[1m%d\033[m]\n"
2654ca1ab94SDag-Erling Smørgrav 		  "document: [\033[1m%s\033[m]\n",
2664ca1ab94SDag-Erling Smørgrav 		  u->scheme, u->user, u->pwd,
2674ca1ab94SDag-Erling Smørgrav 		  u->host, u->port, u->doc));
2684ca1ab94SDag-Erling Smørgrav 
2694ca1ab94SDag-Erling Smørgrav     return u;
2704ca1ab94SDag-Erling Smørgrav 
2714ca1ab94SDag-Erling Smørgrav ouch:
2724ca1ab94SDag-Erling Smørgrav     free(u);
2734ca1ab94SDag-Erling Smørgrav     return NULL;
2744ca1ab94SDag-Erling Smørgrav }
275