xref: /freebsd/lib/libfetch/fetch.c (revision 930105c1e9a553c4faa1364d56f79f3fbdd383fb)
14ca1ab94SDag-Erling Smørgrav /*-
24ca1ab94SDag-Erling Smørgrav  * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
34ca1ab94SDag-Erling Smørgrav  * All rights reserved.
44ca1ab94SDag-Erling Smørgrav  *
54ca1ab94SDag-Erling Smørgrav  * Redistribution and use in source and binary forms, with or without
64ca1ab94SDag-Erling Smørgrav  * modification, are permitted provided that the following conditions
74ca1ab94SDag-Erling Smørgrav  * are met:
84ca1ab94SDag-Erling Smørgrav  * 1. Redistributions of source code must retain the above copyright
94ca1ab94SDag-Erling Smørgrav  *    notice, this list of conditions and the following disclaimer
104ca1ab94SDag-Erling Smørgrav  *    in this position and unchanged.
114ca1ab94SDag-Erling Smørgrav  * 2. Redistributions in binary form must reproduce the above copyright
124ca1ab94SDag-Erling Smørgrav  *    notice, this list of conditions and the following disclaimer in the
134ca1ab94SDag-Erling Smørgrav  *    documentation and/or other materials provided with the distribution.
144ca1ab94SDag-Erling Smørgrav  * 3. The name of the author may not be used to endorse or promote products
154ca1ab94SDag-Erling Smørgrav  *    derived from this software without specific prior written permission
164ca1ab94SDag-Erling Smørgrav  *
174ca1ab94SDag-Erling Smørgrav  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
184ca1ab94SDag-Erling Smørgrav  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
194ca1ab94SDag-Erling Smørgrav  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
204ca1ab94SDag-Erling Smørgrav  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
214ca1ab94SDag-Erling Smørgrav  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
224ca1ab94SDag-Erling Smørgrav  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
234ca1ab94SDag-Erling Smørgrav  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
244ca1ab94SDag-Erling Smørgrav  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
254ca1ab94SDag-Erling Smørgrav  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
264ca1ab94SDag-Erling Smørgrav  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
274ca1ab94SDag-Erling Smørgrav  */
284ca1ab94SDag-Erling Smørgrav 
29cecb889fSMatthew Dillon #include <sys/cdefs.h>
30cecb889fSMatthew Dillon __FBSDID("$FreeBSD$");
31cecb889fSMatthew Dillon 
324ca1ab94SDag-Erling Smørgrav #include <sys/param.h>
33d8acd8dcSDag-Erling Smørgrav #include <sys/errno.h>
344ca1ab94SDag-Erling Smørgrav 
354ca1ab94SDag-Erling Smørgrav #include <ctype.h>
364ca1ab94SDag-Erling Smørgrav #include <stdio.h>
374ca1ab94SDag-Erling Smørgrav #include <stdlib.h>
384ca1ab94SDag-Erling Smørgrav #include <string.h>
394ca1ab94SDag-Erling Smørgrav 
404ca1ab94SDag-Erling Smørgrav #include "fetch.h"
41d8acd8dcSDag-Erling Smørgrav #include "common.h"
424ca1ab94SDag-Erling Smørgrav 
436490b215SDag-Erling Smørgrav auth_t	 fetchAuthMethod;
440fba3a00SDag-Erling Smørgrav int	 fetchLastErrCode;
45ba101983SDag-Erling Smørgrav char	 fetchLastErrString[MAXERRSTRING];
46fc6e9e65SDag-Erling Smørgrav int	 fetchTimeout;
47a1bb3f48SDag-Erling Smørgrav int	 fetchRestartCalls = 1;
487eb2f34dSDag-Erling Smørgrav int	 fetchDebug;
490fba3a00SDag-Erling Smørgrav 
500fba3a00SDag-Erling Smørgrav 
51d8acd8dcSDag-Erling Smørgrav /*** Local data **************************************************************/
52d8acd8dcSDag-Erling Smørgrav 
53d8acd8dcSDag-Erling Smørgrav /*
54d8acd8dcSDag-Erling Smørgrav  * Error messages for parser errors
55d8acd8dcSDag-Erling Smørgrav  */
56d8acd8dcSDag-Erling Smørgrav #define URL_MALFORMED		1
57d8acd8dcSDag-Erling Smørgrav #define URL_BAD_SCHEME		2
58d8acd8dcSDag-Erling Smørgrav #define URL_BAD_PORT		3
59d8acd8dcSDag-Erling Smørgrav static struct fetcherr _url_errlist[] = {
60d8acd8dcSDag-Erling Smørgrav 	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
61d8acd8dcSDag-Erling Smørgrav 	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
62d8acd8dcSDag-Erling Smørgrav 	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
63d8acd8dcSDag-Erling Smørgrav 	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
64d8acd8dcSDag-Erling Smørgrav };
65d8acd8dcSDag-Erling Smørgrav 
66d8acd8dcSDag-Erling Smørgrav 
67d8acd8dcSDag-Erling Smørgrav /*** Public API **************************************************************/
684ca1ab94SDag-Erling Smørgrav 
69842a95ccSDag-Erling Smørgrav /*
70842a95ccSDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return a
71842a95ccSDag-Erling Smørgrav  * read-only stream connected to the document referenced by the URL.
721a5faa10SDag-Erling Smørgrav  * Also fill out the struct url_stat.
73842a95ccSDag-Erling Smørgrav  */
74ecc91352SDag-Erling Smørgrav FILE *
7538c7e4a6SArchie Cobbs fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
76ecc91352SDag-Erling Smørgrav {
77c97925adSHajimu UMEMOTO 	int direct;
78c97925adSHajimu UMEMOTO 
79d74a913bSDag-Erling Smørgrav 	direct = CHECK_FLAG('d');
8002e6bec1SDag-Erling Smørgrav 	if (us != NULL) {
8102e6bec1SDag-Erling Smørgrav 		us->size = -1;
8202e6bec1SDag-Erling Smørgrav 		us->atime = us->mtime = 0;
8302e6bec1SDag-Erling Smørgrav 	}
8459769ab1SDag-Erling Smørgrav 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
85e19e6098SDag-Erling Smørgrav 		return (fetchXGetFile(URL, us, flags));
86111e2510SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
87111e2510SDag-Erling Smørgrav 		return (fetchXGetFTP(URL, us, flags));
8859769ab1SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
89e19e6098SDag-Erling Smørgrav 		return (fetchXGetHTTP(URL, us, flags));
90111e2510SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
91111e2510SDag-Erling Smørgrav 		return (fetchXGetHTTP(URL, us, flags));
92d8acd8dcSDag-Erling Smørgrav 	_url_seterr(URL_BAD_SCHEME);
93e19e6098SDag-Erling Smørgrav 	return (NULL);
94d8acd8dcSDag-Erling Smørgrav }
95ecc91352SDag-Erling Smørgrav 
96842a95ccSDag-Erling Smørgrav /*
97842a95ccSDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return a
981a5faa10SDag-Erling Smørgrav  * read-only stream connected to the document referenced by the URL.
991a5faa10SDag-Erling Smørgrav  */
1001a5faa10SDag-Erling Smørgrav FILE *
10138c7e4a6SArchie Cobbs fetchGet(struct url *URL, const char *flags)
1021a5faa10SDag-Erling Smørgrav {
103e19e6098SDag-Erling Smørgrav 	return (fetchXGet(URL, NULL, flags));
1041a5faa10SDag-Erling Smørgrav }
1051a5faa10SDag-Erling Smørgrav 
1061a5faa10SDag-Erling Smørgrav /*
1071a5faa10SDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return a
108842a95ccSDag-Erling Smørgrav  * write-only stream connected to the document referenced by the URL.
109842a95ccSDag-Erling Smørgrav  */
110ecc91352SDag-Erling Smørgrav FILE *
11138c7e4a6SArchie Cobbs fetchPut(struct url *URL, const char *flags)
112ecc91352SDag-Erling Smørgrav {
113c97925adSHajimu UMEMOTO 	int direct;
114c97925adSHajimu UMEMOTO 
115d74a913bSDag-Erling Smørgrav 	direct = CHECK_FLAG('d');
11659769ab1SDag-Erling Smørgrav 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
117e19e6098SDag-Erling Smørgrav 		return (fetchPutFile(URL, flags));
118111e2510SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
119111e2510SDag-Erling Smørgrav 		return (fetchPutFTP(URL, flags));
12059769ab1SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
121e19e6098SDag-Erling Smørgrav 		return (fetchPutHTTP(URL, flags));
122111e2510SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
123111e2510SDag-Erling Smørgrav 		return (fetchPutHTTP(URL, flags));
124d8acd8dcSDag-Erling Smørgrav 	_url_seterr(URL_BAD_SCHEME);
125e19e6098SDag-Erling Smørgrav 	return (NULL);
126d8acd8dcSDag-Erling Smørgrav }
127d8acd8dcSDag-Erling Smørgrav 
128d8acd8dcSDag-Erling Smørgrav /*
129d8acd8dcSDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return the
130d8acd8dcSDag-Erling Smørgrav  * size of the document referenced by the URL if it exists.
131d8acd8dcSDag-Erling Smørgrav  */
132d8acd8dcSDag-Erling Smørgrav int
13338c7e4a6SArchie Cobbs fetchStat(struct url *URL, struct url_stat *us, const char *flags)
134d8acd8dcSDag-Erling Smørgrav {
135c97925adSHajimu UMEMOTO 	int direct;
136c97925adSHajimu UMEMOTO 
137d74a913bSDag-Erling Smørgrav 	direct = CHECK_FLAG('d');
13802e6bec1SDag-Erling Smørgrav 	if (us != NULL) {
13902e6bec1SDag-Erling Smørgrav 		us->size = -1;
14002e6bec1SDag-Erling Smørgrav 		us->atime = us->mtime = 0;
14102e6bec1SDag-Erling Smørgrav 	}
14259769ab1SDag-Erling Smørgrav 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
143e19e6098SDag-Erling Smørgrav 		return (fetchStatFile(URL, us, flags));
144e19e6098SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
145e19e6098SDag-Erling Smørgrav 		return (fetchStatFTP(URL, us, flags));
146111e2510SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
147111e2510SDag-Erling Smørgrav 		return (fetchStatHTTP(URL, us, flags));
148111e2510SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
149111e2510SDag-Erling Smørgrav 		return (fetchStatHTTP(URL, us, flags));
150d8acd8dcSDag-Erling Smørgrav 	_url_seterr(URL_BAD_SCHEME);
151e19e6098SDag-Erling Smørgrav 	return (-1);
152ecc91352SDag-Erling Smørgrav }
153ecc91352SDag-Erling Smørgrav 
154842a95ccSDag-Erling Smørgrav /*
155ce71b736SDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return a
156ce71b736SDag-Erling Smørgrav  * list of files in the directory pointed to by the URL.
157ce71b736SDag-Erling Smørgrav  */
158ce71b736SDag-Erling Smørgrav struct url_ent *
15938c7e4a6SArchie Cobbs fetchList(struct url *URL, const char *flags)
160ce71b736SDag-Erling Smørgrav {
161c97925adSHajimu UMEMOTO 	int direct;
162c97925adSHajimu UMEMOTO 
163d74a913bSDag-Erling Smørgrav 	direct = CHECK_FLAG('d');
16459769ab1SDag-Erling Smørgrav 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
165e19e6098SDag-Erling Smørgrav 		return (fetchListFile(URL, flags));
166e19e6098SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
167e19e6098SDag-Erling Smørgrav 		return (fetchListFTP(URL, flags));
168111e2510SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
169111e2510SDag-Erling Smørgrav 		return (fetchListHTTP(URL, flags));
170111e2510SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
171111e2510SDag-Erling Smørgrav 		return (fetchListHTTP(URL, flags));
172ce71b736SDag-Erling Smørgrav 	_url_seterr(URL_BAD_SCHEME);
173e19e6098SDag-Erling Smørgrav 	return (NULL);
174ce71b736SDag-Erling Smørgrav }
175ce71b736SDag-Erling Smørgrav 
176ce71b736SDag-Erling Smørgrav /*
1771a5faa10SDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchXGet().
178842a95ccSDag-Erling Smørgrav  */
1794ca1ab94SDag-Erling Smørgrav FILE *
18038c7e4a6SArchie Cobbs fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
1814ca1ab94SDag-Erling Smørgrav {
182d8acd8dcSDag-Erling Smørgrav 	struct url *u;
1834ca1ab94SDag-Erling Smørgrav 	FILE *f;
1844ca1ab94SDag-Erling Smørgrav 
1854ca1ab94SDag-Erling Smørgrav 	if ((u = fetchParseURL(URL)) == NULL)
186e19e6098SDag-Erling Smørgrav 		return (NULL);
1874ca1ab94SDag-Erling Smørgrav 
1881a5faa10SDag-Erling Smørgrav 	f = fetchXGet(u, us, flags);
1894ca1ab94SDag-Erling Smørgrav 
19060245e42SDag-Erling Smørgrav 	fetchFreeURL(u);
191e19e6098SDag-Erling Smørgrav 	return (f);
1924ca1ab94SDag-Erling Smørgrav }
1934ca1ab94SDag-Erling Smørgrav 
1941a5faa10SDag-Erling Smørgrav /*
1951a5faa10SDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchGet().
1961a5faa10SDag-Erling Smørgrav  */
1971a5faa10SDag-Erling Smørgrav FILE *
19838c7e4a6SArchie Cobbs fetchGetURL(const char *URL, const char *flags)
1991a5faa10SDag-Erling Smørgrav {
200e19e6098SDag-Erling Smørgrav 	return (fetchXGetURL(URL, NULL, flags));
2011a5faa10SDag-Erling Smørgrav }
2024ca1ab94SDag-Erling Smørgrav 
203842a95ccSDag-Erling Smørgrav /*
204842a95ccSDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchPut().
205842a95ccSDag-Erling Smørgrav  */
2064ca1ab94SDag-Erling Smørgrav FILE *
20738c7e4a6SArchie Cobbs fetchPutURL(const char *URL, const char *flags)
2084ca1ab94SDag-Erling Smørgrav {
209d8acd8dcSDag-Erling Smørgrav 	struct url *u;
2104ca1ab94SDag-Erling Smørgrav 	FILE *f;
2114ca1ab94SDag-Erling Smørgrav 
2124ca1ab94SDag-Erling Smørgrav 	if ((u = fetchParseURL(URL)) == NULL)
213e19e6098SDag-Erling Smørgrav 		return (NULL);
2144ca1ab94SDag-Erling Smørgrav 
215ecc91352SDag-Erling Smørgrav 	f = fetchPut(u, flags);
2164ca1ab94SDag-Erling Smørgrav 
21760245e42SDag-Erling Smørgrav 	fetchFreeURL(u);
218e19e6098SDag-Erling Smørgrav 	return (f);
2194ca1ab94SDag-Erling Smørgrav }
2204ca1ab94SDag-Erling Smørgrav 
2214ca1ab94SDag-Erling Smørgrav /*
222d8acd8dcSDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchStat().
223d8acd8dcSDag-Erling Smørgrav  */
224d8acd8dcSDag-Erling Smørgrav int
22538c7e4a6SArchie Cobbs fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
226d8acd8dcSDag-Erling Smørgrav {
227d8acd8dcSDag-Erling Smørgrav 	struct url *u;
228d8acd8dcSDag-Erling Smørgrav 	int s;
229d8acd8dcSDag-Erling Smørgrav 
230d8acd8dcSDag-Erling Smørgrav 	if ((u = fetchParseURL(URL)) == NULL)
231e19e6098SDag-Erling Smørgrav 		return (-1);
232d8acd8dcSDag-Erling Smørgrav 
233d8acd8dcSDag-Erling Smørgrav 	s = fetchStat(u, us, flags);
234d8acd8dcSDag-Erling Smørgrav 
23560245e42SDag-Erling Smørgrav 	fetchFreeURL(u);
236e19e6098SDag-Erling Smørgrav 	return (s);
237d8acd8dcSDag-Erling Smørgrav }
238d8acd8dcSDag-Erling Smørgrav 
239d8acd8dcSDag-Erling Smørgrav /*
240ce71b736SDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchList().
241ce71b736SDag-Erling Smørgrav  */
242ce71b736SDag-Erling Smørgrav struct url_ent *
24338c7e4a6SArchie Cobbs fetchListURL(const char *URL, const char *flags)
244ce71b736SDag-Erling Smørgrav {
245ce71b736SDag-Erling Smørgrav 	struct url *u;
246ce71b736SDag-Erling Smørgrav 	struct url_ent *ue;
247ce71b736SDag-Erling Smørgrav 
248ce71b736SDag-Erling Smørgrav 	if ((u = fetchParseURL(URL)) == NULL)
249e19e6098SDag-Erling Smørgrav 		return (NULL);
250ce71b736SDag-Erling Smørgrav 
251ce71b736SDag-Erling Smørgrav 	ue = fetchList(u, flags);
252ce71b736SDag-Erling Smørgrav 
25360245e42SDag-Erling Smørgrav 	fetchFreeURL(u);
254e19e6098SDag-Erling Smørgrav 	return (ue);
255ce71b736SDag-Erling Smørgrav }
256ce71b736SDag-Erling Smørgrav 
257ce71b736SDag-Erling Smørgrav /*
2589a964d6aSDag-Erling Smørgrav  * Make a URL
2599a964d6aSDag-Erling Smørgrav  */
2609a964d6aSDag-Erling Smørgrav struct url *
26138c7e4a6SArchie Cobbs fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
26238c7e4a6SArchie Cobbs     const char *user, const char *pwd)
2639a964d6aSDag-Erling Smørgrav {
2649a964d6aSDag-Erling Smørgrav 	struct url *u;
2659a964d6aSDag-Erling Smørgrav 
2669a964d6aSDag-Erling Smørgrav 	if (!scheme || (!host && !doc)) {
2679a964d6aSDag-Erling Smørgrav 		_url_seterr(URL_MALFORMED);
268e19e6098SDag-Erling Smørgrav 		return (NULL);
2699a964d6aSDag-Erling Smørgrav 	}
2709a964d6aSDag-Erling Smørgrav 
2719a964d6aSDag-Erling Smørgrav 	if (port < 0 || port > 65535) {
2729a964d6aSDag-Erling Smørgrav 		_url_seterr(URL_BAD_PORT);
273e19e6098SDag-Erling Smørgrav 		return (NULL);
2749a964d6aSDag-Erling Smørgrav 	}
2759a964d6aSDag-Erling Smørgrav 
2769a964d6aSDag-Erling Smørgrav 	/* allocate struct url */
277930105c1SDag-Erling Smørgrav 	if ((u = calloc(1, sizeof(*u))) == NULL) {
2789a964d6aSDag-Erling Smørgrav 		_fetch_syserr();
279e19e6098SDag-Erling Smørgrav 		return (NULL);
2809a964d6aSDag-Erling Smørgrav 	}
2819a964d6aSDag-Erling Smørgrav 
2829a964d6aSDag-Erling Smørgrav 	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
2839a964d6aSDag-Erling Smørgrav 		_fetch_syserr();
2849a964d6aSDag-Erling Smørgrav 		free(u);
285e19e6098SDag-Erling Smørgrav 		return (NULL);
2869a964d6aSDag-Erling Smørgrav 	}
2879a964d6aSDag-Erling Smørgrav 
288930105c1SDag-Erling Smørgrav #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
2899a964d6aSDag-Erling Smørgrav 	seturl(scheme);
2909a964d6aSDag-Erling Smørgrav 	seturl(host);
2919a964d6aSDag-Erling Smørgrav 	seturl(user);
2929a964d6aSDag-Erling Smørgrav 	seturl(pwd);
2939a964d6aSDag-Erling Smørgrav #undef seturl
2949a964d6aSDag-Erling Smørgrav 	u->port = port;
2959a964d6aSDag-Erling Smørgrav 
296e19e6098SDag-Erling Smørgrav 	return (u);
2979a964d6aSDag-Erling Smørgrav }
2989a964d6aSDag-Erling Smørgrav 
2999a964d6aSDag-Erling Smørgrav /*
3004ca1ab94SDag-Erling Smørgrav  * Split an URL into components. URL syntax is:
30159769ab1SDag-Erling Smørgrav  * [method:/][/[user[:pwd]@]host[:port]/][document]
3024ca1ab94SDag-Erling Smørgrav  * This almost, but not quite, RFC1738 URL syntax.
3034ca1ab94SDag-Erling Smørgrav  */
304d8acd8dcSDag-Erling Smørgrav struct url *
30538c7e4a6SArchie Cobbs fetchParseURL(const char *URL)
3064ca1ab94SDag-Erling Smørgrav {
30738c7e4a6SArchie Cobbs 	char *doc;
30838c7e4a6SArchie Cobbs 	const char *p, *q;
309d8acd8dcSDag-Erling Smørgrav 	struct url *u;
3104ca1ab94SDag-Erling Smørgrav 	int i;
3114ca1ab94SDag-Erling Smørgrav 
312d8acd8dcSDag-Erling Smørgrav 	/* allocate struct url */
313930105c1SDag-Erling Smørgrav 	if ((u = calloc(1, sizeof(*u))) == NULL) {
314d8acd8dcSDag-Erling Smørgrav 		_fetch_syserr();
315e19e6098SDag-Erling Smørgrav 		return (NULL);
316d8acd8dcSDag-Erling Smørgrav 	}
3174ca1ab94SDag-Erling Smørgrav 
3184ca1ab94SDag-Erling Smørgrav 	/* scheme name */
31959769ab1SDag-Erling Smørgrav 	if ((p = strstr(URL, ":/"))) {
320e19e6098SDag-Erling Smørgrav 		snprintf(u->scheme, URL_SCHEMELEN+1,
321e19e6098SDag-Erling Smørgrav 		    "%.*s", (int)(p - URL), URL);
32259769ab1SDag-Erling Smørgrav 		URL = ++p;
32359769ab1SDag-Erling Smørgrav 		/*
32459769ab1SDag-Erling Smørgrav 		 * Only one slash: no host, leave slash as part of document
32559769ab1SDag-Erling Smørgrav 		 * Two slashes: host follows, strip slashes
32659769ab1SDag-Erling Smørgrav 		 */
32759769ab1SDag-Erling Smørgrav 		if (URL[1] == '/')
32859769ab1SDag-Erling Smørgrav 			URL = (p += 2);
3291ba84976SDag-Erling Smørgrav 	} else {
3301ba84976SDag-Erling Smørgrav 		p = URL;
331d8acd8dcSDag-Erling Smørgrav 	}
3325b2ad516SDag-Erling Smørgrav 	if (!*URL || *URL == '/' || *URL == '.' ||
33373b3e4dfSStefan Eßer 	    (u->scheme[0] == '\0' &&
33473b3e4dfSStefan Eßer 		strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
3354ca1ab94SDag-Erling Smørgrav 		goto nohost;
3364ca1ab94SDag-Erling Smørgrav 
3374ca1ab94SDag-Erling Smørgrav 	p = strpbrk(URL, "/@");
3380fba3a00SDag-Erling Smørgrav 	if (p && *p == '@') {
3394ca1ab94SDag-Erling Smørgrav 		/* username */
3404ca1ab94SDag-Erling Smørgrav 		for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
3414ca1ab94SDag-Erling Smørgrav 			if (i < URL_USERLEN)
3424ca1ab94SDag-Erling Smørgrav 				u->user[i++] = *q;
3434ca1ab94SDag-Erling Smørgrav 
3444ca1ab94SDag-Erling Smørgrav 		/* password */
3454ca1ab94SDag-Erling Smørgrav 		if (*q == ':')
3464ca1ab94SDag-Erling Smørgrav 			for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
3474ca1ab94SDag-Erling Smørgrav 				if (i < URL_PWDLEN)
3484ca1ab94SDag-Erling Smørgrav 					u->pwd[i++] = *q;
3494ca1ab94SDag-Erling Smørgrav 
3504ca1ab94SDag-Erling Smørgrav 		p++;
351ab39353eSDag-Erling Smørgrav 	} else {
352ab39353eSDag-Erling Smørgrav 		p = URL;
353ab39353eSDag-Erling Smørgrav 	}
3544ca1ab94SDag-Erling Smørgrav 
3554ca1ab94SDag-Erling Smørgrav 	/* hostname */
35628c645cfSHajimu UMEMOTO #ifdef INET6
35728c645cfSHajimu UMEMOTO 	if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
35828c645cfSHajimu UMEMOTO 	    (*++q == '\0' || *q == '/' || *q == ':')) {
35928c645cfSHajimu UMEMOTO 		if ((i = q - p - 2) > MAXHOSTNAMELEN)
36028c645cfSHajimu UMEMOTO 			i = MAXHOSTNAMELEN;
36128c645cfSHajimu UMEMOTO 		strncpy(u->host, ++p, i);
36228c645cfSHajimu UMEMOTO 		p = q;
36328c645cfSHajimu UMEMOTO 	} else
36428c645cfSHajimu UMEMOTO #endif
3654ca1ab94SDag-Erling Smørgrav 		for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
3664ca1ab94SDag-Erling Smørgrav 			if (i < MAXHOSTNAMELEN)
3674ca1ab94SDag-Erling Smørgrav 				u->host[i++] = *p;
3684ca1ab94SDag-Erling Smørgrav 
3694ca1ab94SDag-Erling Smørgrav 	/* port */
3704ca1ab94SDag-Erling Smørgrav 	if (*p == ':') {
3714ca1ab94SDag-Erling Smørgrav 		for (q = ++p; *q && (*q != '/'); q++)
3724ca1ab94SDag-Erling Smørgrav 			if (isdigit(*q))
3734ca1ab94SDag-Erling Smørgrav 				u->port = u->port * 10 + (*q - '0');
374d8acd8dcSDag-Erling Smørgrav 			else {
375d8acd8dcSDag-Erling Smørgrav 				/* invalid port */
376d8acd8dcSDag-Erling Smørgrav 				_url_seterr(URL_BAD_PORT);
377d8acd8dcSDag-Erling Smørgrav 				goto ouch;
378d8acd8dcSDag-Erling Smørgrav 			}
379551858f0SDag-Erling Smørgrav 		p = q;
3804ca1ab94SDag-Erling Smørgrav 	}
3814ca1ab94SDag-Erling Smørgrav 
3824ca1ab94SDag-Erling Smørgrav nohost:
3834ca1ab94SDag-Erling Smørgrav 	/* document */
38460245e42SDag-Erling Smørgrav 	if (!*p)
38560245e42SDag-Erling Smørgrav 		p = "/";
38660245e42SDag-Erling Smørgrav 
387f9c2053bSDag-Erling Smørgrav 	if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
388f9c2053bSDag-Erling Smørgrav 	    strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
38923fe6d7aSDag-Erling Smørgrav 		const char hexnums[] = "0123456789abcdef";
39023fe6d7aSDag-Erling Smørgrav 
391f9c2053bSDag-Erling Smørgrav 		/* percent-escape whitespace. */
392f9c2053bSDag-Erling Smørgrav 		if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
39323fe6d7aSDag-Erling Smørgrav 			_fetch_syserr();
39423fe6d7aSDag-Erling Smørgrav 			goto ouch;
39523fe6d7aSDag-Erling Smørgrav 		}
396f9c2053bSDag-Erling Smørgrav 		u->doc = doc;
39723fe6d7aSDag-Erling Smørgrav 		while (*p != '\0') {
39823fe6d7aSDag-Erling Smørgrav 			if (!isspace(*p)) {
39923fe6d7aSDag-Erling Smørgrav 				*doc++ = *p++;
40023fe6d7aSDag-Erling Smørgrav 			} else {
40123fe6d7aSDag-Erling Smørgrav 				*doc++ = '%';
40223fe6d7aSDag-Erling Smørgrav 				*doc++ = hexnums[((unsigned int)*p) >> 4];
40323fe6d7aSDag-Erling Smørgrav 				*doc++ = hexnums[((unsigned int)*p) & 0xf];
40423fe6d7aSDag-Erling Smørgrav 				p++;
40523fe6d7aSDag-Erling Smørgrav 			}
40623fe6d7aSDag-Erling Smørgrav 		}
40723fe6d7aSDag-Erling Smørgrav 		*doc = '\0';
40823fe6d7aSDag-Erling Smørgrav 	} else if ((u->doc = strdup(p)) == NULL) {
409d8acd8dcSDag-Erling Smørgrav 		_fetch_syserr();
4104ca1ab94SDag-Erling Smørgrav 		goto ouch;
411d8acd8dcSDag-Erling Smørgrav 	}
4124ca1ab94SDag-Erling Smørgrav 
4134ca1ab94SDag-Erling Smørgrav 	DEBUG(fprintf(stderr,
414f67efa37SDag-Erling Smørgrav 		  "scheme:   [%s]\n"
415f67efa37SDag-Erling Smørgrav 		  "user:     [%s]\n"
416f67efa37SDag-Erling Smørgrav 		  "password: [%s]\n"
417f67efa37SDag-Erling Smørgrav 		  "host:     [%s]\n"
418f67efa37SDag-Erling Smørgrav 		  "port:     [%d]\n"
419f67efa37SDag-Erling Smørgrav 		  "document: [%s]\n",
4204ca1ab94SDag-Erling Smørgrav 		  u->scheme, u->user, u->pwd,
4214ca1ab94SDag-Erling Smørgrav 		  u->host, u->port, u->doc));
4224ca1ab94SDag-Erling Smørgrav 
423e19e6098SDag-Erling Smørgrav 	return (u);
4244ca1ab94SDag-Erling Smørgrav 
4254ca1ab94SDag-Erling Smørgrav ouch:
4264ca1ab94SDag-Erling Smørgrav 	free(u);
427e19e6098SDag-Erling Smørgrav 	return (NULL);
4284ca1ab94SDag-Erling Smørgrav }
42960245e42SDag-Erling Smørgrav 
43060245e42SDag-Erling Smørgrav /*
43160245e42SDag-Erling Smørgrav  * Free a URL
43260245e42SDag-Erling Smørgrav  */
43360245e42SDag-Erling Smørgrav void
43460245e42SDag-Erling Smørgrav fetchFreeURL(struct url *u)
43560245e42SDag-Erling Smørgrav {
43660245e42SDag-Erling Smørgrav 	free(u->doc);
43760245e42SDag-Erling Smørgrav 	free(u);
43860245e42SDag-Erling Smørgrav }
439