xref: /freebsd/lib/libfetch/fetch.c (revision 1ba84976606b34e81005aefcb131d48fe22a1118)
14ca1ab94SDag-Erling Smørgrav /*-
24ca1ab94SDag-Erling Smørgrav  * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
34ca1ab94SDag-Erling Smørgrav  * All rights reserved.
44ca1ab94SDag-Erling Smørgrav  *
54ca1ab94SDag-Erling Smørgrav  * Redistribution and use in source and binary forms, with or without
64ca1ab94SDag-Erling Smørgrav  * modification, are permitted provided that the following conditions
74ca1ab94SDag-Erling Smørgrav  * are met:
84ca1ab94SDag-Erling Smørgrav  * 1. Redistributions of source code must retain the above copyright
94ca1ab94SDag-Erling Smørgrav  *    notice, this list of conditions and the following disclaimer
104ca1ab94SDag-Erling Smørgrav  *    in this position and unchanged.
114ca1ab94SDag-Erling Smørgrav  * 2. Redistributions in binary form must reproduce the above copyright
124ca1ab94SDag-Erling Smørgrav  *    notice, this list of conditions and the following disclaimer in the
134ca1ab94SDag-Erling Smørgrav  *    documentation and/or other materials provided with the distribution.
144ca1ab94SDag-Erling Smørgrav  * 3. The name of the author may not be used to endorse or promote products
154ca1ab94SDag-Erling Smørgrav  *    derived from this software without specific prior written permission
164ca1ab94SDag-Erling Smørgrav  *
174ca1ab94SDag-Erling Smørgrav  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
184ca1ab94SDag-Erling Smørgrav  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
194ca1ab94SDag-Erling Smørgrav  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
204ca1ab94SDag-Erling Smørgrav  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
214ca1ab94SDag-Erling Smørgrav  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
224ca1ab94SDag-Erling Smørgrav  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
234ca1ab94SDag-Erling Smørgrav  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
244ca1ab94SDag-Erling Smørgrav  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
254ca1ab94SDag-Erling Smørgrav  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
264ca1ab94SDag-Erling Smørgrav  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
274ca1ab94SDag-Erling Smørgrav  *
287f3dea24SPeter Wemm  * $FreeBSD$
294ca1ab94SDag-Erling Smørgrav  */
304ca1ab94SDag-Erling Smørgrav 
314ca1ab94SDag-Erling Smørgrav #include <sys/param.h>
32d8acd8dcSDag-Erling Smørgrav #include <sys/errno.h>
334ca1ab94SDag-Erling Smørgrav 
344ca1ab94SDag-Erling Smørgrav #include <ctype.h>
354ca1ab94SDag-Erling Smørgrav #include <stdio.h>
364ca1ab94SDag-Erling Smørgrav #include <stdlib.h>
374ca1ab94SDag-Erling Smørgrav #include <string.h>
384ca1ab94SDag-Erling Smørgrav 
394ca1ab94SDag-Erling Smørgrav #include "fetch.h"
40d8acd8dcSDag-Erling Smørgrav #include "common.h"
414ca1ab94SDag-Erling Smørgrav 
424ca1ab94SDag-Erling Smørgrav 
430fba3a00SDag-Erling Smørgrav int	 fetchLastErrCode;
44ba101983SDag-Erling Smørgrav char	 fetchLastErrString[MAXERRSTRING];
45fc6e9e65SDag-Erling Smørgrav int	 fetchTimeout;
46a1bb3f48SDag-Erling Smørgrav int	 fetchRestartCalls = 1;
470fba3a00SDag-Erling Smørgrav 
480fba3a00SDag-Erling Smørgrav 
49d8acd8dcSDag-Erling Smørgrav /*** Local data **************************************************************/
50d8acd8dcSDag-Erling Smørgrav 
51d8acd8dcSDag-Erling Smørgrav /*
52d8acd8dcSDag-Erling Smørgrav  * Error messages for parser errors
53d8acd8dcSDag-Erling Smørgrav  */
54d8acd8dcSDag-Erling Smørgrav #define URL_MALFORMED		1
55d8acd8dcSDag-Erling Smørgrav #define URL_BAD_SCHEME		2
56d8acd8dcSDag-Erling Smørgrav #define URL_BAD_PORT		3
57d8acd8dcSDag-Erling Smørgrav static struct fetcherr _url_errlist[] = {
58d8acd8dcSDag-Erling Smørgrav     { URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
59d8acd8dcSDag-Erling Smørgrav     { URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
60d8acd8dcSDag-Erling Smørgrav     { URL_BAD_PORT,	FETCH_URL,	"Invalid server port" },
61d8acd8dcSDag-Erling Smørgrav     { -1,		FETCH_UNKNOWN,	"Unknown parser error" }
62d8acd8dcSDag-Erling Smørgrav };
63d8acd8dcSDag-Erling Smørgrav 
64d8acd8dcSDag-Erling Smørgrav 
65d8acd8dcSDag-Erling Smørgrav /*** Public API **************************************************************/
664ca1ab94SDag-Erling Smørgrav 
67842a95ccSDag-Erling Smørgrav /*
68842a95ccSDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return a
69842a95ccSDag-Erling Smørgrav  * read-only stream connected to the document referenced by the URL.
701a5faa10SDag-Erling Smørgrav  * Also fill out the struct url_stat.
71842a95ccSDag-Erling Smørgrav  */
72ecc91352SDag-Erling Smørgrav FILE *
731a5faa10SDag-Erling Smørgrav fetchXGet(struct url *URL, struct url_stat *us, char *flags)
74ecc91352SDag-Erling Smørgrav {
75c97925adSHajimu UMEMOTO     int direct;
76c97925adSHajimu UMEMOTO 
77d74a913bSDag-Erling Smørgrav     direct = CHECK_FLAG('d');
7859769ab1SDag-Erling Smørgrav     if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
791a5faa10SDag-Erling Smørgrav 	return fetchXGetFile(URL, us, flags);
8059769ab1SDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
811a5faa10SDag-Erling Smørgrav 	return fetchXGetHTTP(URL, us, flags);
8259769ab1SDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
831a5faa10SDag-Erling Smørgrav 	return fetchXGetFTP(URL, us, flags);
84c97925adSHajimu UMEMOTO     } else {
85d8acd8dcSDag-Erling Smørgrav 	_url_seterr(URL_BAD_SCHEME);
86d8acd8dcSDag-Erling Smørgrav 	return NULL;
87d8acd8dcSDag-Erling Smørgrav     }
88ecc91352SDag-Erling Smørgrav }
89ecc91352SDag-Erling Smørgrav 
90842a95ccSDag-Erling Smørgrav /*
91842a95ccSDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return a
921a5faa10SDag-Erling Smørgrav  * read-only stream connected to the document referenced by the URL.
931a5faa10SDag-Erling Smørgrav  */
941a5faa10SDag-Erling Smørgrav FILE *
951a5faa10SDag-Erling Smørgrav fetchGet(struct url *URL, char *flags)
961a5faa10SDag-Erling Smørgrav {
971a5faa10SDag-Erling Smørgrav     return fetchXGet(URL, NULL, flags);
981a5faa10SDag-Erling Smørgrav }
991a5faa10SDag-Erling Smørgrav 
1001a5faa10SDag-Erling Smørgrav /*
1011a5faa10SDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return a
102842a95ccSDag-Erling Smørgrav  * write-only stream connected to the document referenced by the URL.
103842a95ccSDag-Erling Smørgrav  */
104ecc91352SDag-Erling Smørgrav FILE *
105d8acd8dcSDag-Erling Smørgrav fetchPut(struct url *URL, char *flags)
106ecc91352SDag-Erling Smørgrav {
107c97925adSHajimu UMEMOTO     int direct;
108c97925adSHajimu UMEMOTO 
109d74a913bSDag-Erling Smørgrav     direct = CHECK_FLAG('d');
11059769ab1SDag-Erling Smørgrav     if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
111ecc91352SDag-Erling Smørgrav 	return fetchPutFile(URL, flags);
11259769ab1SDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
113ecc91352SDag-Erling Smørgrav 	return fetchPutHTTP(URL, flags);
11459769ab1SDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
115ecc91352SDag-Erling Smørgrav 	return fetchPutFTP(URL, flags);
116c97925adSHajimu UMEMOTO     } else {
117d8acd8dcSDag-Erling Smørgrav 	_url_seterr(URL_BAD_SCHEME);
118d8acd8dcSDag-Erling Smørgrav 	return NULL;
119d8acd8dcSDag-Erling Smørgrav     }
120d8acd8dcSDag-Erling Smørgrav }
121d8acd8dcSDag-Erling Smørgrav 
122d8acd8dcSDag-Erling Smørgrav /*
123d8acd8dcSDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return the
124d8acd8dcSDag-Erling Smørgrav  * size of the document referenced by the URL if it exists.
125d8acd8dcSDag-Erling Smørgrav  */
126d8acd8dcSDag-Erling Smørgrav int
127d8acd8dcSDag-Erling Smørgrav fetchStat(struct url *URL, struct url_stat *us, char *flags)
128d8acd8dcSDag-Erling Smørgrav {
129c97925adSHajimu UMEMOTO     int direct;
130c97925adSHajimu UMEMOTO 
131d74a913bSDag-Erling Smørgrav     direct = CHECK_FLAG('d');
13259769ab1SDag-Erling Smørgrav     if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
133d8acd8dcSDag-Erling Smørgrav 	return fetchStatFile(URL, us, flags);
13459769ab1SDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
135d8acd8dcSDag-Erling Smørgrav 	return fetchStatHTTP(URL, us, flags);
13659769ab1SDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
137d8acd8dcSDag-Erling Smørgrav 	return fetchStatFTP(URL, us, flags);
138c97925adSHajimu UMEMOTO     } else {
139d8acd8dcSDag-Erling Smørgrav 	_url_seterr(URL_BAD_SCHEME);
140d8acd8dcSDag-Erling Smørgrav 	return -1;
141d8acd8dcSDag-Erling Smørgrav     }
142ecc91352SDag-Erling Smørgrav }
143ecc91352SDag-Erling Smørgrav 
144842a95ccSDag-Erling Smørgrav /*
145ce71b736SDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return a
146ce71b736SDag-Erling Smørgrav  * list of files in the directory pointed to by the URL.
147ce71b736SDag-Erling Smørgrav  */
148ce71b736SDag-Erling Smørgrav struct url_ent *
149ce71b736SDag-Erling Smørgrav fetchList(struct url *URL, char *flags)
150ce71b736SDag-Erling Smørgrav {
151c97925adSHajimu UMEMOTO     int direct;
152c97925adSHajimu UMEMOTO 
153d74a913bSDag-Erling Smørgrav     direct = CHECK_FLAG('d');
15459769ab1SDag-Erling Smørgrav     if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
155ce71b736SDag-Erling Smørgrav 	return fetchListFile(URL, flags);
15659769ab1SDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
157ce71b736SDag-Erling Smørgrav 	return fetchListHTTP(URL, flags);
15859769ab1SDag-Erling Smørgrav     else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
159ce71b736SDag-Erling Smørgrav 	return fetchListFTP(URL, flags);
160c97925adSHajimu UMEMOTO     } else {
161ce71b736SDag-Erling Smørgrav 	_url_seterr(URL_BAD_SCHEME);
162ce71b736SDag-Erling Smørgrav 	return NULL;
163ce71b736SDag-Erling Smørgrav     }
164ce71b736SDag-Erling Smørgrav }
165ce71b736SDag-Erling Smørgrav 
166ce71b736SDag-Erling Smørgrav /*
1671a5faa10SDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchXGet().
168842a95ccSDag-Erling Smørgrav  */
1694ca1ab94SDag-Erling Smørgrav FILE *
1701a5faa10SDag-Erling Smørgrav fetchXGetURL(char *URL, struct url_stat *us, char *flags)
1714ca1ab94SDag-Erling Smørgrav {
172d8acd8dcSDag-Erling Smørgrav     struct url *u;
1734ca1ab94SDag-Erling Smørgrav     FILE *f;
1744ca1ab94SDag-Erling Smørgrav 
1754ca1ab94SDag-Erling Smørgrav     if ((u = fetchParseURL(URL)) == NULL)
1764ca1ab94SDag-Erling Smørgrav 	return NULL;
1774ca1ab94SDag-Erling Smørgrav 
1781a5faa10SDag-Erling Smørgrav     f = fetchXGet(u, us, flags);
1794ca1ab94SDag-Erling Smørgrav 
18060245e42SDag-Erling Smørgrav     fetchFreeURL(u);
1814ca1ab94SDag-Erling Smørgrav     return f;
1824ca1ab94SDag-Erling Smørgrav }
1834ca1ab94SDag-Erling Smørgrav 
1841a5faa10SDag-Erling Smørgrav /*
1851a5faa10SDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchGet().
1861a5faa10SDag-Erling Smørgrav  */
1871a5faa10SDag-Erling Smørgrav FILE *
1881a5faa10SDag-Erling Smørgrav fetchGetURL(char *URL, char *flags)
1891a5faa10SDag-Erling Smørgrav {
1901a5faa10SDag-Erling Smørgrav     return fetchXGetURL(URL, NULL, flags);
1911a5faa10SDag-Erling Smørgrav }
1924ca1ab94SDag-Erling Smørgrav 
193842a95ccSDag-Erling Smørgrav /*
194842a95ccSDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchPut().
195842a95ccSDag-Erling Smørgrav  */
1964ca1ab94SDag-Erling Smørgrav FILE *
1974ca1ab94SDag-Erling Smørgrav fetchPutURL(char *URL, char *flags)
1984ca1ab94SDag-Erling Smørgrav {
199d8acd8dcSDag-Erling Smørgrav     struct url *u;
2004ca1ab94SDag-Erling Smørgrav     FILE *f;
2014ca1ab94SDag-Erling Smørgrav 
2024ca1ab94SDag-Erling Smørgrav     if ((u = fetchParseURL(URL)) == NULL)
2034ca1ab94SDag-Erling Smørgrav 	return NULL;
2044ca1ab94SDag-Erling Smørgrav 
205ecc91352SDag-Erling Smørgrav     f = fetchPut(u, flags);
2064ca1ab94SDag-Erling Smørgrav 
20760245e42SDag-Erling Smørgrav     fetchFreeURL(u);
2084ca1ab94SDag-Erling Smørgrav     return f;
2094ca1ab94SDag-Erling Smørgrav }
2104ca1ab94SDag-Erling Smørgrav 
2114ca1ab94SDag-Erling Smørgrav /*
212d8acd8dcSDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchStat().
213d8acd8dcSDag-Erling Smørgrav  */
214d8acd8dcSDag-Erling Smørgrav int
215d8acd8dcSDag-Erling Smørgrav fetchStatURL(char *URL, struct url_stat *us, char *flags)
216d8acd8dcSDag-Erling Smørgrav {
217d8acd8dcSDag-Erling Smørgrav     struct url *u;
218d8acd8dcSDag-Erling Smørgrav     int s;
219d8acd8dcSDag-Erling Smørgrav 
220d8acd8dcSDag-Erling Smørgrav     if ((u = fetchParseURL(URL)) == NULL)
221d8acd8dcSDag-Erling Smørgrav 	return -1;
222d8acd8dcSDag-Erling Smørgrav 
223d8acd8dcSDag-Erling Smørgrav     s = fetchStat(u, us, flags);
224d8acd8dcSDag-Erling Smørgrav 
22560245e42SDag-Erling Smørgrav     fetchFreeURL(u);
226d8acd8dcSDag-Erling Smørgrav     return s;
227d8acd8dcSDag-Erling Smørgrav }
228d8acd8dcSDag-Erling Smørgrav 
229d8acd8dcSDag-Erling Smørgrav /*
230ce71b736SDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchList().
231ce71b736SDag-Erling Smørgrav  */
232ce71b736SDag-Erling Smørgrav struct url_ent *
233ce71b736SDag-Erling Smørgrav fetchListURL(char *URL, char *flags)
234ce71b736SDag-Erling Smørgrav {
235ce71b736SDag-Erling Smørgrav     struct url *u;
236ce71b736SDag-Erling Smørgrav     struct url_ent *ue;
237ce71b736SDag-Erling Smørgrav 
238ce71b736SDag-Erling Smørgrav     if ((u = fetchParseURL(URL)) == NULL)
239ce71b736SDag-Erling Smørgrav 	return NULL;
240ce71b736SDag-Erling Smørgrav 
241ce71b736SDag-Erling Smørgrav     ue = fetchList(u, flags);
242ce71b736SDag-Erling Smørgrav 
24360245e42SDag-Erling Smørgrav     fetchFreeURL(u);
244ce71b736SDag-Erling Smørgrav     return ue;
245ce71b736SDag-Erling Smørgrav }
246ce71b736SDag-Erling Smørgrav 
247ce71b736SDag-Erling Smørgrav /*
2489a964d6aSDag-Erling Smørgrav  * Make a URL
2499a964d6aSDag-Erling Smørgrav  */
2509a964d6aSDag-Erling Smørgrav struct url *
2519a964d6aSDag-Erling Smørgrav fetchMakeURL(char *scheme, char *host, int port, char *doc,
2529a964d6aSDag-Erling Smørgrav     char *user, char *pwd)
2539a964d6aSDag-Erling Smørgrav {
2549a964d6aSDag-Erling Smørgrav     struct url *u;
2559a964d6aSDag-Erling Smørgrav 
2569a964d6aSDag-Erling Smørgrav     if (!scheme || (!host && !doc)) {
2579a964d6aSDag-Erling Smørgrav 	_url_seterr(URL_MALFORMED);
2589a964d6aSDag-Erling Smørgrav 	return NULL;
2599a964d6aSDag-Erling Smørgrav     }
2609a964d6aSDag-Erling Smørgrav 
2619a964d6aSDag-Erling Smørgrav     if (port < 0 || port > 65535) {
2629a964d6aSDag-Erling Smørgrav 	_url_seterr(URL_BAD_PORT);
2639a964d6aSDag-Erling Smørgrav 	return NULL;
2649a964d6aSDag-Erling Smørgrav     }
2659a964d6aSDag-Erling Smørgrav 
2669a964d6aSDag-Erling Smørgrav     /* allocate struct url */
2679a964d6aSDag-Erling Smørgrav     if ((u = calloc(1, sizeof *u)) == NULL) {
2689a964d6aSDag-Erling Smørgrav 	_fetch_syserr();
2699a964d6aSDag-Erling Smørgrav 	return NULL;
2709a964d6aSDag-Erling Smørgrav     }
2719a964d6aSDag-Erling Smørgrav 
2729a964d6aSDag-Erling Smørgrav     if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
2739a964d6aSDag-Erling Smørgrav 	_fetch_syserr();
2749a964d6aSDag-Erling Smørgrav 	free(u);
2759a964d6aSDag-Erling Smørgrav 	return NULL;
2769a964d6aSDag-Erling Smørgrav     }
2779a964d6aSDag-Erling Smørgrav 
2789a964d6aSDag-Erling Smørgrav #define seturl(x) snprintf(u->x, sizeof u->x, "%s", x)
2799a964d6aSDag-Erling Smørgrav     seturl(scheme);
2809a964d6aSDag-Erling Smørgrav     seturl(host);
2819a964d6aSDag-Erling Smørgrav     seturl(user);
2829a964d6aSDag-Erling Smørgrav     seturl(pwd);
2839a964d6aSDag-Erling Smørgrav #undef seturl
2849a964d6aSDag-Erling Smørgrav     u->port = port;
2859a964d6aSDag-Erling Smørgrav 
2869a964d6aSDag-Erling Smørgrav     return u;
2879a964d6aSDag-Erling Smørgrav }
2889a964d6aSDag-Erling Smørgrav 
2899a964d6aSDag-Erling Smørgrav /*
2904ca1ab94SDag-Erling Smørgrav  * Split an URL into components. URL syntax is:
29159769ab1SDag-Erling Smørgrav  * [method:/][/[user[:pwd]@]host[:port]/][document]
2924ca1ab94SDag-Erling Smørgrav  * This almost, but not quite, RFC1738 URL syntax.
2934ca1ab94SDag-Erling Smørgrav  */
294d8acd8dcSDag-Erling Smørgrav struct url *
2954ca1ab94SDag-Erling Smørgrav fetchParseURL(char *URL)
2964ca1ab94SDag-Erling Smørgrav {
297f9c2053bSDag-Erling Smørgrav     char *doc, *p, *q;
298d8acd8dcSDag-Erling Smørgrav     struct url *u;
2994ca1ab94SDag-Erling Smørgrav     int i;
3004ca1ab94SDag-Erling Smørgrav 
301d8acd8dcSDag-Erling Smørgrav     /* allocate struct url */
302807c941cSDag-Erling Smørgrav     if ((u = calloc(1, sizeof *u)) == NULL) {
303d8acd8dcSDag-Erling Smørgrav 	_fetch_syserr();
3044ca1ab94SDag-Erling Smørgrav 	return NULL;
305d8acd8dcSDag-Erling Smørgrav     }
3064ca1ab94SDag-Erling Smørgrav 
3074ca1ab94SDag-Erling Smørgrav     /* scheme name */
30859769ab1SDag-Erling Smørgrav     if ((p = strstr(URL, ":/"))) {
30959769ab1SDag-Erling Smørgrav 	snprintf(u->scheme, URL_SCHEMELEN+1, "%.*s", p - URL, URL);
31059769ab1SDag-Erling Smørgrav 	URL = ++p;
31159769ab1SDag-Erling Smørgrav 	/*
31259769ab1SDag-Erling Smørgrav 	 * Only one slash: no host, leave slash as part of document
31359769ab1SDag-Erling Smørgrav 	 * Two slashes: host follows, strip slashes
31459769ab1SDag-Erling Smørgrav 	 */
31559769ab1SDag-Erling Smørgrav 	if (URL[1] == '/')
31659769ab1SDag-Erling Smørgrav 	    URL = (p += 2);
3171ba84976SDag-Erling Smørgrav     } else {
3181ba84976SDag-Erling Smørgrav 	p = URL;
319d8acd8dcSDag-Erling Smørgrav     }
32059769ab1SDag-Erling Smørgrav     if (!*URL || *URL == '/')
3214ca1ab94SDag-Erling Smørgrav 	goto nohost;
3224ca1ab94SDag-Erling Smørgrav 
3234ca1ab94SDag-Erling Smørgrav     p = strpbrk(URL, "/@");
3240fba3a00SDag-Erling Smørgrav     if (p && *p == '@') {
3254ca1ab94SDag-Erling Smørgrav 	/* username */
3264ca1ab94SDag-Erling Smørgrav 	for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
3274ca1ab94SDag-Erling Smørgrav 	    if (i < URL_USERLEN)
3284ca1ab94SDag-Erling Smørgrav 		u->user[i++] = *q;
3294ca1ab94SDag-Erling Smørgrav 
3304ca1ab94SDag-Erling Smørgrav 	/* password */
3314ca1ab94SDag-Erling Smørgrav 	if (*q == ':')
3324ca1ab94SDag-Erling Smørgrav 	    for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
3334ca1ab94SDag-Erling Smørgrav 		if (i < URL_PWDLEN)
3344ca1ab94SDag-Erling Smørgrav 		    u->pwd[i++] = *q;
3354ca1ab94SDag-Erling Smørgrav 
3364ca1ab94SDag-Erling Smørgrav 	p++;
3374ca1ab94SDag-Erling Smørgrav     } else p = URL;
3384ca1ab94SDag-Erling Smørgrav 
3394ca1ab94SDag-Erling Smørgrav     /* hostname */
34028c645cfSHajimu UMEMOTO #ifdef INET6
34128c645cfSHajimu UMEMOTO     if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
34228c645cfSHajimu UMEMOTO 	(*++q == '\0' || *q == '/' || *q == ':')) {
34328c645cfSHajimu UMEMOTO 	if ((i = q - p - 2) > MAXHOSTNAMELEN)
34428c645cfSHajimu UMEMOTO 	    i = MAXHOSTNAMELEN;
34528c645cfSHajimu UMEMOTO 	strncpy(u->host, ++p, i);
34628c645cfSHajimu UMEMOTO 	p = q;
34728c645cfSHajimu UMEMOTO     } else
34828c645cfSHajimu UMEMOTO #endif
3494ca1ab94SDag-Erling Smørgrav 	for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
3504ca1ab94SDag-Erling Smørgrav 	    if (i < MAXHOSTNAMELEN)
3514ca1ab94SDag-Erling Smørgrav 		u->host[i++] = *p;
3524ca1ab94SDag-Erling Smørgrav 
3534ca1ab94SDag-Erling Smørgrav     /* port */
3544ca1ab94SDag-Erling Smørgrav     if (*p == ':') {
3554ca1ab94SDag-Erling Smørgrav 	for (q = ++p; *q && (*q != '/'); q++)
3564ca1ab94SDag-Erling Smørgrav 	    if (isdigit(*q))
3574ca1ab94SDag-Erling Smørgrav 		u->port = u->port * 10 + (*q - '0');
358d8acd8dcSDag-Erling Smørgrav 	    else {
359d8acd8dcSDag-Erling Smørgrav 		/* invalid port */
360d8acd8dcSDag-Erling Smørgrav 		_url_seterr(URL_BAD_PORT);
361d8acd8dcSDag-Erling Smørgrav 		goto ouch;
362d8acd8dcSDag-Erling Smørgrav 	    }
3634ca1ab94SDag-Erling Smørgrav 	while (*p && (*p != '/'))
3644ca1ab94SDag-Erling Smørgrav 	    p++;
3654ca1ab94SDag-Erling Smørgrav     }
3664ca1ab94SDag-Erling Smørgrav 
3674ca1ab94SDag-Erling Smørgrav nohost:
3684ca1ab94SDag-Erling Smørgrav     /* document */
36960245e42SDag-Erling Smørgrav     if (!*p)
37060245e42SDag-Erling Smørgrav 	p = "/";
37160245e42SDag-Erling Smørgrav 
372f9c2053bSDag-Erling Smørgrav     if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
373f9c2053bSDag-Erling Smørgrav 	strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
37423fe6d7aSDag-Erling Smørgrav 	const char hexnums[] = "0123456789abcdef";
37523fe6d7aSDag-Erling Smørgrav 
376f9c2053bSDag-Erling Smørgrav 	/* percent-escape whitespace. */
377f9c2053bSDag-Erling Smørgrav 	if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
37823fe6d7aSDag-Erling Smørgrav 	    _fetch_syserr();
37923fe6d7aSDag-Erling Smørgrav 	    goto ouch;
38023fe6d7aSDag-Erling Smørgrav 	}
381f9c2053bSDag-Erling Smørgrav 	u->doc = doc;
38223fe6d7aSDag-Erling Smørgrav 	while (*p != '\0') {
38323fe6d7aSDag-Erling Smørgrav 	    if (!isspace(*p)) {
38423fe6d7aSDag-Erling Smørgrav 		*doc++ = *p++;
38523fe6d7aSDag-Erling Smørgrav             } else {
38623fe6d7aSDag-Erling Smørgrav 		*doc++ = '%';
38723fe6d7aSDag-Erling Smørgrav 		*doc++ = hexnums[((unsigned int)*p) >> 4];
38823fe6d7aSDag-Erling Smørgrav 		*doc++ = hexnums[((unsigned int)*p) & 0xf];
38923fe6d7aSDag-Erling Smørgrav 		p++;
39023fe6d7aSDag-Erling Smørgrav             }
39123fe6d7aSDag-Erling Smørgrav 	}
39223fe6d7aSDag-Erling Smørgrav 	*doc = '\0';
39323fe6d7aSDag-Erling Smørgrav     } else if ((u->doc = strdup(p)) == NULL) {
394d8acd8dcSDag-Erling Smørgrav 	_fetch_syserr();
3954ca1ab94SDag-Erling Smørgrav 	goto ouch;
396d8acd8dcSDag-Erling Smørgrav     }
3974ca1ab94SDag-Erling Smørgrav 
3984ca1ab94SDag-Erling Smørgrav     DEBUG(fprintf(stderr,
3994ca1ab94SDag-Erling Smørgrav 		  "scheme:   [\033[1m%s\033[m]\n"
4004ca1ab94SDag-Erling Smørgrav 		  "user:     [\033[1m%s\033[m]\n"
4014ca1ab94SDag-Erling Smørgrav 		  "password: [\033[1m%s\033[m]\n"
4024ca1ab94SDag-Erling Smørgrav 		  "host:     [\033[1m%s\033[m]\n"
4034ca1ab94SDag-Erling Smørgrav 		  "port:     [\033[1m%d\033[m]\n"
4044ca1ab94SDag-Erling Smørgrav 		  "document: [\033[1m%s\033[m]\n",
4054ca1ab94SDag-Erling Smørgrav 		  u->scheme, u->user, u->pwd,
4064ca1ab94SDag-Erling Smørgrav 		  u->host, u->port, u->doc));
4074ca1ab94SDag-Erling Smørgrav 
4084ca1ab94SDag-Erling Smørgrav     return u;
4094ca1ab94SDag-Erling Smørgrav 
4104ca1ab94SDag-Erling Smørgrav ouch:
4114ca1ab94SDag-Erling Smørgrav     free(u);
4124ca1ab94SDag-Erling Smørgrav     return NULL;
4134ca1ab94SDag-Erling Smørgrav }
41460245e42SDag-Erling Smørgrav 
41560245e42SDag-Erling Smørgrav /*
41660245e42SDag-Erling Smørgrav  * Free a URL
41760245e42SDag-Erling Smørgrav  */
41860245e42SDag-Erling Smørgrav void
41960245e42SDag-Erling Smørgrav fetchFreeURL(struct url *u)
42060245e42SDag-Erling Smørgrav {
42160245e42SDag-Erling Smørgrav     free(u->doc);
42260245e42SDag-Erling Smørgrav     free(u);
42360245e42SDag-Erling Smørgrav }
424