xref: /freebsd/lib/libfetch/fetch.c (revision 0fba3a00059437a62d6ac727a35ae085dd5c6ff7)
14ca1ab94SDag-Erling Smørgrav /*-
24ca1ab94SDag-Erling Smørgrav  * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
34ca1ab94SDag-Erling Smørgrav  * All rights reserved.
44ca1ab94SDag-Erling Smørgrav  *
54ca1ab94SDag-Erling Smørgrav  * Redistribution and use in source and binary forms, with or without
64ca1ab94SDag-Erling Smørgrav  * modification, are permitted provided that the following conditions
74ca1ab94SDag-Erling Smørgrav  * are met:
84ca1ab94SDag-Erling Smørgrav  * 1. Redistributions of source code must retain the above copyright
94ca1ab94SDag-Erling Smørgrav  *    notice, this list of conditions and the following disclaimer
104ca1ab94SDag-Erling Smørgrav  *    in this position and unchanged.
114ca1ab94SDag-Erling Smørgrav  * 2. Redistributions in binary form must reproduce the above copyright
124ca1ab94SDag-Erling Smørgrav  *    notice, this list of conditions and the following disclaimer in the
134ca1ab94SDag-Erling Smørgrav  *    documentation and/or other materials provided with the distribution.
144ca1ab94SDag-Erling Smørgrav  * 3. The name of the author may not be used to endorse or promote products
154ca1ab94SDag-Erling Smørgrav  *    derived from this software without specific prior written permission
164ca1ab94SDag-Erling Smørgrav  *
174ca1ab94SDag-Erling Smørgrav  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
184ca1ab94SDag-Erling Smørgrav  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
194ca1ab94SDag-Erling Smørgrav  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
204ca1ab94SDag-Erling Smørgrav  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
214ca1ab94SDag-Erling Smørgrav  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
224ca1ab94SDag-Erling Smørgrav  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
234ca1ab94SDag-Erling Smørgrav  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
244ca1ab94SDag-Erling Smørgrav  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
254ca1ab94SDag-Erling Smørgrav  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
264ca1ab94SDag-Erling Smørgrav  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
274ca1ab94SDag-Erling Smørgrav  *
280fba3a00SDag-Erling Smørgrav  *	$Id: fetch.c,v 1.6 1998/11/06 22:14:08 des Exp $
294ca1ab94SDag-Erling Smørgrav  */
304ca1ab94SDag-Erling Smørgrav 
314ca1ab94SDag-Erling Smørgrav #include <sys/param.h>
32d8acd8dcSDag-Erling Smørgrav #include <sys/errno.h>
334ca1ab94SDag-Erling Smørgrav 
344ca1ab94SDag-Erling Smørgrav #include <ctype.h>
354ca1ab94SDag-Erling Smørgrav #include <stdio.h>
364ca1ab94SDag-Erling Smørgrav #include <stdlib.h>
374ca1ab94SDag-Erling Smørgrav #include <string.h>
384ca1ab94SDag-Erling Smørgrav 
394ca1ab94SDag-Erling Smørgrav #include "fetch.h"
40d8acd8dcSDag-Erling Smørgrav #include "common.h"
414ca1ab94SDag-Erling Smørgrav 
424ca1ab94SDag-Erling Smørgrav 
430fba3a00SDag-Erling Smørgrav int fetchLastErrCode;
440fba3a00SDag-Erling Smørgrav 
450fba3a00SDag-Erling Smørgrav 
46d8acd8dcSDag-Erling Smørgrav /*** Local data **************************************************************/
47d8acd8dcSDag-Erling Smørgrav 
48d8acd8dcSDag-Erling Smørgrav /*
49d8acd8dcSDag-Erling Smørgrav  * Error messages for parser errors
50d8acd8dcSDag-Erling Smørgrav  */
51d8acd8dcSDag-Erling Smørgrav #define URL_MALFORMED		1
52d8acd8dcSDag-Erling Smørgrav #define URL_BAD_SCHEME		2
53d8acd8dcSDag-Erling Smørgrav #define URL_BAD_PORT		3
54d8acd8dcSDag-Erling Smørgrav static struct fetcherr _url_errlist[] = {
55d8acd8dcSDag-Erling Smørgrav     { URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
56d8acd8dcSDag-Erling Smørgrav     { URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
57d8acd8dcSDag-Erling Smørgrav     { URL_BAD_PORT,	FETCH_URL,	"Invalid server port" },
58d8acd8dcSDag-Erling Smørgrav     { -1,		FETCH_UNKNOWN,	"Unknown parser error" }
59d8acd8dcSDag-Erling Smørgrav };
60d8acd8dcSDag-Erling Smørgrav 
61d8acd8dcSDag-Erling Smørgrav 
62d8acd8dcSDag-Erling Smørgrav /*** Public API **************************************************************/
634ca1ab94SDag-Erling Smørgrav 
64842a95ccSDag-Erling Smørgrav /*
65842a95ccSDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return a
66842a95ccSDag-Erling Smørgrav  * read-only stream connected to the document referenced by the URL.
67842a95ccSDag-Erling Smørgrav  */
68ecc91352SDag-Erling Smørgrav FILE *
69d8acd8dcSDag-Erling Smørgrav fetchGet(struct url *URL, char *flags)
70ecc91352SDag-Erling Smørgrav {
71ecc91352SDag-Erling Smørgrav     if (strcasecmp(URL->scheme, "file") == 0)
72ecc91352SDag-Erling Smørgrav 	return fetchGetFile(URL, flags);
73ecc91352SDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, "http") == 0)
74ecc91352SDag-Erling Smørgrav 	return fetchGetHTTP(URL, flags);
75ecc91352SDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, "ftp") == 0)
76ecc91352SDag-Erling Smørgrav 	return fetchGetFTP(URL, flags);
77d8acd8dcSDag-Erling Smørgrav     else {
78d8acd8dcSDag-Erling Smørgrav 	_url_seterr(URL_BAD_SCHEME);
79d8acd8dcSDag-Erling Smørgrav 	return NULL;
80d8acd8dcSDag-Erling Smørgrav     }
81ecc91352SDag-Erling Smørgrav }
82ecc91352SDag-Erling Smørgrav 
83842a95ccSDag-Erling Smørgrav /*
84842a95ccSDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return a
85842a95ccSDag-Erling Smørgrav  * write-only stream connected to the document referenced by the URL.
86842a95ccSDag-Erling Smørgrav  */
87ecc91352SDag-Erling Smørgrav FILE *
88d8acd8dcSDag-Erling Smørgrav fetchPut(struct url *URL, char *flags)
89ecc91352SDag-Erling Smørgrav {
90ecc91352SDag-Erling Smørgrav     if (strcasecmp(URL->scheme, "file") == 0)
91ecc91352SDag-Erling Smørgrav 	return fetchPutFile(URL, flags);
92ecc91352SDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, "http") == 0)
93ecc91352SDag-Erling Smørgrav 	return fetchPutHTTP(URL, flags);
94ecc91352SDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, "ftp") == 0)
95ecc91352SDag-Erling Smørgrav 	return fetchPutFTP(URL, flags);
96d8acd8dcSDag-Erling Smørgrav     else {
97d8acd8dcSDag-Erling Smørgrav 	_url_seterr(URL_BAD_SCHEME);
98d8acd8dcSDag-Erling Smørgrav 	return NULL;
99d8acd8dcSDag-Erling Smørgrav     }
100d8acd8dcSDag-Erling Smørgrav }
101d8acd8dcSDag-Erling Smørgrav 
102d8acd8dcSDag-Erling Smørgrav /*
103d8acd8dcSDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return the
104d8acd8dcSDag-Erling Smørgrav  * size of the document referenced by the URL if it exists.
105d8acd8dcSDag-Erling Smørgrav  */
106d8acd8dcSDag-Erling Smørgrav int
107d8acd8dcSDag-Erling Smørgrav fetchStat(struct url *URL, struct url_stat *us, char *flags)
108d8acd8dcSDag-Erling Smørgrav {
109d8acd8dcSDag-Erling Smørgrav     if (strcasecmp(URL->scheme, "file") == 0)
110d8acd8dcSDag-Erling Smørgrav 	return fetchStatFile(URL, us, flags);
111d8acd8dcSDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, "http") == 0)
112d8acd8dcSDag-Erling Smørgrav 	return fetchStatHTTP(URL, us, flags);
113d8acd8dcSDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, "ftp") == 0)
114d8acd8dcSDag-Erling Smørgrav 	return fetchStatFTP(URL, us, flags);
115d8acd8dcSDag-Erling Smørgrav     else {
116d8acd8dcSDag-Erling Smørgrav 	_url_seterr(URL_BAD_SCHEME);
117d8acd8dcSDag-Erling Smørgrav 	return -1;
118d8acd8dcSDag-Erling Smørgrav     }
119ecc91352SDag-Erling Smørgrav }
120ecc91352SDag-Erling Smørgrav 
121842a95ccSDag-Erling Smørgrav /*
122842a95ccSDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchGet().
123842a95ccSDag-Erling Smørgrav  */
1244ca1ab94SDag-Erling Smørgrav FILE *
1254ca1ab94SDag-Erling Smørgrav fetchGetURL(char *URL, char *flags)
1264ca1ab94SDag-Erling Smørgrav {
127d8acd8dcSDag-Erling Smørgrav     struct url *u;
1284ca1ab94SDag-Erling Smørgrav     FILE *f;
1294ca1ab94SDag-Erling Smørgrav 
1304ca1ab94SDag-Erling Smørgrav     if ((u = fetchParseURL(URL)) == NULL)
1314ca1ab94SDag-Erling Smørgrav 	return NULL;
1324ca1ab94SDag-Erling Smørgrav 
133ecc91352SDag-Erling Smørgrav     f = fetchGet(u, flags);
1344ca1ab94SDag-Erling Smørgrav 
135842a95ccSDag-Erling Smørgrav     free(u);
1364ca1ab94SDag-Erling Smørgrav     return f;
1374ca1ab94SDag-Erling Smørgrav }
1384ca1ab94SDag-Erling Smørgrav 
1394ca1ab94SDag-Erling Smørgrav 
140842a95ccSDag-Erling Smørgrav /*
141842a95ccSDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchPut().
142842a95ccSDag-Erling Smørgrav  */
1434ca1ab94SDag-Erling Smørgrav FILE *
1444ca1ab94SDag-Erling Smørgrav fetchPutURL(char *URL, char *flags)
1454ca1ab94SDag-Erling Smørgrav {
146d8acd8dcSDag-Erling Smørgrav     struct url *u;
1474ca1ab94SDag-Erling Smørgrav     FILE *f;
1484ca1ab94SDag-Erling Smørgrav 
1494ca1ab94SDag-Erling Smørgrav     if ((u = fetchParseURL(URL)) == NULL)
1504ca1ab94SDag-Erling Smørgrav 	return NULL;
1514ca1ab94SDag-Erling Smørgrav 
152ecc91352SDag-Erling Smørgrav     f = fetchPut(u, flags);
1534ca1ab94SDag-Erling Smørgrav 
154842a95ccSDag-Erling Smørgrav     free(u);
1554ca1ab94SDag-Erling Smørgrav     return f;
1564ca1ab94SDag-Erling Smørgrav }
1574ca1ab94SDag-Erling Smørgrav 
1584ca1ab94SDag-Erling Smørgrav /*
159d8acd8dcSDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchStat().
160d8acd8dcSDag-Erling Smørgrav  */
161d8acd8dcSDag-Erling Smørgrav int
162d8acd8dcSDag-Erling Smørgrav fetchStatURL(char *URL, struct url_stat *us, char *flags)
163d8acd8dcSDag-Erling Smørgrav {
164d8acd8dcSDag-Erling Smørgrav     struct url *u;
165d8acd8dcSDag-Erling Smørgrav     int s;
166d8acd8dcSDag-Erling Smørgrav 
167d8acd8dcSDag-Erling Smørgrav     if ((u = fetchParseURL(URL)) == NULL)
168d8acd8dcSDag-Erling Smørgrav 	return -1;
169d8acd8dcSDag-Erling Smørgrav 
170d8acd8dcSDag-Erling Smørgrav     s = fetchStat(u, us, flags);
171d8acd8dcSDag-Erling Smørgrav 
172d8acd8dcSDag-Erling Smørgrav     free(u);
173d8acd8dcSDag-Erling Smørgrav     return s;
174d8acd8dcSDag-Erling Smørgrav }
175d8acd8dcSDag-Erling Smørgrav 
176d8acd8dcSDag-Erling Smørgrav /*
1774ca1ab94SDag-Erling Smørgrav  * Split an URL into components. URL syntax is:
1784ca1ab94SDag-Erling Smørgrav  * method:[//[user[:pwd]@]host[:port]]/[document]
1794ca1ab94SDag-Erling Smørgrav  * This almost, but not quite, RFC1738 URL syntax.
1804ca1ab94SDag-Erling Smørgrav  */
181d8acd8dcSDag-Erling Smørgrav struct url *
1824ca1ab94SDag-Erling Smørgrav fetchParseURL(char *URL)
1834ca1ab94SDag-Erling Smørgrav {
1844ca1ab94SDag-Erling Smørgrav     char *p, *q;
185d8acd8dcSDag-Erling Smørgrav     struct url *u;
1864ca1ab94SDag-Erling Smørgrav     int i;
1874ca1ab94SDag-Erling Smørgrav 
188d8acd8dcSDag-Erling Smørgrav     /* allocate struct url */
189d8acd8dcSDag-Erling Smørgrav     if ((u = calloc(1, sizeof(struct url))) == NULL) {
190d8acd8dcSDag-Erling Smørgrav 	errno = ENOMEM;
191d8acd8dcSDag-Erling Smørgrav 	_fetch_syserr();
1924ca1ab94SDag-Erling Smørgrav 	return NULL;
193d8acd8dcSDag-Erling Smørgrav     }
1944ca1ab94SDag-Erling Smørgrav 
1954ca1ab94SDag-Erling Smørgrav     /* scheme name */
1964ca1ab94SDag-Erling Smørgrav     for (i = 0; *URL && (*URL != ':'); URL++)
1974ca1ab94SDag-Erling Smørgrav 	if (i < URL_SCHEMELEN)
1984ca1ab94SDag-Erling Smørgrav 	    u->scheme[i++] = *URL;
199d8acd8dcSDag-Erling Smørgrav     if (!URL[0] || (URL[1] != '/')) {
200d8acd8dcSDag-Erling Smørgrav 	_url_seterr(URL_BAD_SCHEME);
2014ca1ab94SDag-Erling Smørgrav 	goto ouch;
202d8acd8dcSDag-Erling Smørgrav     }
2034ca1ab94SDag-Erling Smørgrav     else URL++;
2044ca1ab94SDag-Erling Smørgrav     if (URL[1] != '/') {
2054ca1ab94SDag-Erling Smørgrav 	p = URL;
2064ca1ab94SDag-Erling Smørgrav 	goto nohost;
2074ca1ab94SDag-Erling Smørgrav     }
2084ca1ab94SDag-Erling Smørgrav     else URL += 2;
2094ca1ab94SDag-Erling Smørgrav 
2104ca1ab94SDag-Erling Smørgrav     p = strpbrk(URL, "/@");
2110fba3a00SDag-Erling Smørgrav     if (p && *p == '@') {
2124ca1ab94SDag-Erling Smørgrav 	/* username */
2134ca1ab94SDag-Erling Smørgrav 	for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
2144ca1ab94SDag-Erling Smørgrav 	    if (i < URL_USERLEN)
2154ca1ab94SDag-Erling Smørgrav 		u->user[i++] = *q;
2164ca1ab94SDag-Erling Smørgrav 
2174ca1ab94SDag-Erling Smørgrav 	/* password */
2184ca1ab94SDag-Erling Smørgrav 	if (*q == ':')
2194ca1ab94SDag-Erling Smørgrav 	    for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
2204ca1ab94SDag-Erling Smørgrav 		if (i < URL_PWDLEN)
2214ca1ab94SDag-Erling Smørgrav 		    u->pwd[i++] = *q;
2224ca1ab94SDag-Erling Smørgrav 
2234ca1ab94SDag-Erling Smørgrav 	p++;
2244ca1ab94SDag-Erling Smørgrav     } else p = URL;
2254ca1ab94SDag-Erling Smørgrav 
2264ca1ab94SDag-Erling Smørgrav     /* hostname */
2274ca1ab94SDag-Erling Smørgrav     for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
2284ca1ab94SDag-Erling Smørgrav 	if (i < MAXHOSTNAMELEN)
2294ca1ab94SDag-Erling Smørgrav 	    u->host[i++] = *p;
2304ca1ab94SDag-Erling Smørgrav 
2314ca1ab94SDag-Erling Smørgrav     /* port */
2324ca1ab94SDag-Erling Smørgrav     if (*p == ':') {
2334ca1ab94SDag-Erling Smørgrav 	for (q = ++p; *q && (*q != '/'); q++)
2344ca1ab94SDag-Erling Smørgrav 	    if (isdigit(*q))
2354ca1ab94SDag-Erling Smørgrav 		u->port = u->port * 10 + (*q - '0');
236d8acd8dcSDag-Erling Smørgrav 	    else {
237d8acd8dcSDag-Erling Smørgrav 		/* invalid port */
238d8acd8dcSDag-Erling Smørgrav 		_url_seterr(URL_BAD_PORT);
239d8acd8dcSDag-Erling Smørgrav 		goto ouch;
240d8acd8dcSDag-Erling Smørgrav 	    }
2414ca1ab94SDag-Erling Smørgrav 	while (*p && (*p != '/'))
2424ca1ab94SDag-Erling Smørgrav 	    p++;
2434ca1ab94SDag-Erling Smørgrav     }
2444ca1ab94SDag-Erling Smørgrav 
2454ca1ab94SDag-Erling Smørgrav nohost:
2464ca1ab94SDag-Erling Smørgrav     /* document */
247842a95ccSDag-Erling Smørgrav     if (*p) {
248d8acd8dcSDag-Erling Smørgrav 	struct url *t;
249842a95ccSDag-Erling Smørgrav 	t = realloc(u, sizeof(*u)+strlen(p)-1);
250d8acd8dcSDag-Erling Smørgrav 	if (t == NULL) {
251d8acd8dcSDag-Erling Smørgrav 	    errno = ENOMEM;
252d8acd8dcSDag-Erling Smørgrav 	    _fetch_syserr();
2534ca1ab94SDag-Erling Smørgrav 	    goto ouch;
254d8acd8dcSDag-Erling Smørgrav 	}
255842a95ccSDag-Erling Smørgrav 	u = t;
256842a95ccSDag-Erling Smørgrav 	strcpy(u->doc, p);
257842a95ccSDag-Erling Smørgrav     } else {
258842a95ccSDag-Erling Smørgrav 	u->doc[0] = '/';
259842a95ccSDag-Erling Smørgrav 	u->doc[1] = 0;
260842a95ccSDag-Erling Smørgrav     }
2614ca1ab94SDag-Erling Smørgrav 
2624ca1ab94SDag-Erling Smørgrav     DEBUG(fprintf(stderr,
2634ca1ab94SDag-Erling Smørgrav 		  "scheme:   [\033[1m%s\033[m]\n"
2644ca1ab94SDag-Erling Smørgrav 		  "user:     [\033[1m%s\033[m]\n"
2654ca1ab94SDag-Erling Smørgrav 		  "password: [\033[1m%s\033[m]\n"
2664ca1ab94SDag-Erling Smørgrav 		  "host:     [\033[1m%s\033[m]\n"
2674ca1ab94SDag-Erling Smørgrav 		  "port:     [\033[1m%d\033[m]\n"
2684ca1ab94SDag-Erling Smørgrav 		  "document: [\033[1m%s\033[m]\n",
2694ca1ab94SDag-Erling Smørgrav 		  u->scheme, u->user, u->pwd,
2704ca1ab94SDag-Erling Smørgrav 		  u->host, u->port, u->doc));
2714ca1ab94SDag-Erling Smørgrav 
2724ca1ab94SDag-Erling Smørgrav     return u;
2734ca1ab94SDag-Erling Smørgrav 
2744ca1ab94SDag-Erling Smørgrav ouch:
2754ca1ab94SDag-Erling Smørgrav     free(u);
2764ca1ab94SDag-Erling Smørgrav     return NULL;
2774ca1ab94SDag-Erling Smørgrav }
278