xref: /freebsd/lib/libfetch/fetch.c (revision 9bc22394d885aba91d8cd52655c1a8d183bc0ead)
14ca1ab94SDag-Erling Smørgrav /*-
2578153f1SDag-Erling Smørgrav  * Copyright (c) 1998-2004 Dag-Erling Smørgrav
34ca1ab94SDag-Erling Smørgrav  * All rights reserved.
44ca1ab94SDag-Erling Smørgrav  *
54ca1ab94SDag-Erling Smørgrav  * Redistribution and use in source and binary forms, with or without
64ca1ab94SDag-Erling Smørgrav  * modification, are permitted provided that the following conditions
74ca1ab94SDag-Erling Smørgrav  * are met:
84ca1ab94SDag-Erling Smørgrav  * 1. Redistributions of source code must retain the above copyright
94ca1ab94SDag-Erling Smørgrav  *    notice, this list of conditions and the following disclaimer
104ca1ab94SDag-Erling Smørgrav  *    in this position and unchanged.
114ca1ab94SDag-Erling Smørgrav  * 2. Redistributions in binary form must reproduce the above copyright
124ca1ab94SDag-Erling Smørgrav  *    notice, this list of conditions and the following disclaimer in the
134ca1ab94SDag-Erling Smørgrav  *    documentation and/or other materials provided with the distribution.
144ca1ab94SDag-Erling Smørgrav  * 3. The name of the author may not be used to endorse or promote products
154ca1ab94SDag-Erling Smørgrav  *    derived from this software without specific prior written permission
164ca1ab94SDag-Erling Smørgrav  *
174ca1ab94SDag-Erling Smørgrav  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
184ca1ab94SDag-Erling Smørgrav  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
194ca1ab94SDag-Erling Smørgrav  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
204ca1ab94SDag-Erling Smørgrav  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
214ca1ab94SDag-Erling Smørgrav  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
224ca1ab94SDag-Erling Smørgrav  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
234ca1ab94SDag-Erling Smørgrav  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
244ca1ab94SDag-Erling Smørgrav  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
254ca1ab94SDag-Erling Smørgrav  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
264ca1ab94SDag-Erling Smørgrav  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
274ca1ab94SDag-Erling Smørgrav  */
284ca1ab94SDag-Erling Smørgrav 
29cecb889fSMatthew Dillon #include <sys/cdefs.h>
30cecb889fSMatthew Dillon __FBSDID("$FreeBSD$");
31cecb889fSMatthew Dillon 
324ca1ab94SDag-Erling Smørgrav #include <sys/param.h>
33d8acd8dcSDag-Erling Smørgrav #include <sys/errno.h>
344ca1ab94SDag-Erling Smørgrav 
354ca1ab94SDag-Erling Smørgrav #include <ctype.h>
364ca1ab94SDag-Erling Smørgrav #include <stdio.h>
374ca1ab94SDag-Erling Smørgrav #include <stdlib.h>
384ca1ab94SDag-Erling Smørgrav #include <string.h>
394ca1ab94SDag-Erling Smørgrav 
404ca1ab94SDag-Erling Smørgrav #include "fetch.h"
41d8acd8dcSDag-Erling Smørgrav #include "common.h"
424ca1ab94SDag-Erling Smørgrav 
436490b215SDag-Erling Smørgrav auth_t	 fetchAuthMethod;
440fba3a00SDag-Erling Smørgrav int	 fetchLastErrCode;
45ba101983SDag-Erling Smørgrav char	 fetchLastErrString[MAXERRSTRING];
46fc6e9e65SDag-Erling Smørgrav int	 fetchTimeout;
47a1bb3f48SDag-Erling Smørgrav int	 fetchRestartCalls = 1;
487eb2f34dSDag-Erling Smørgrav int	 fetchDebug;
490fba3a00SDag-Erling Smørgrav 
500fba3a00SDag-Erling Smørgrav 
51d8acd8dcSDag-Erling Smørgrav /*** Local data **************************************************************/
52d8acd8dcSDag-Erling Smørgrav 
53d8acd8dcSDag-Erling Smørgrav /*
54d8acd8dcSDag-Erling Smørgrav  * Error messages for parser errors
55d8acd8dcSDag-Erling Smørgrav  */
56d8acd8dcSDag-Erling Smørgrav #define URL_MALFORMED		1
57d8acd8dcSDag-Erling Smørgrav #define URL_BAD_SCHEME		2
58d8acd8dcSDag-Erling Smørgrav #define URL_BAD_PORT		3
59a1b37df2SDag-Erling Smørgrav static struct fetcherr url_errlist[] = {
60d8acd8dcSDag-Erling Smørgrav 	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
61d8acd8dcSDag-Erling Smørgrav 	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
62d8acd8dcSDag-Erling Smørgrav 	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
63d8acd8dcSDag-Erling Smørgrav 	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
64d8acd8dcSDag-Erling Smørgrav };
65d8acd8dcSDag-Erling Smørgrav 
66d8acd8dcSDag-Erling Smørgrav 
67d8acd8dcSDag-Erling Smørgrav /*** Public API **************************************************************/
684ca1ab94SDag-Erling Smørgrav 
69842a95ccSDag-Erling Smørgrav /*
70842a95ccSDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return a
71842a95ccSDag-Erling Smørgrav  * read-only stream connected to the document referenced by the URL.
721a5faa10SDag-Erling Smørgrav  * Also fill out the struct url_stat.
73842a95ccSDag-Erling Smørgrav  */
74ecc91352SDag-Erling Smørgrav FILE *
7538c7e4a6SArchie Cobbs fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
76ecc91352SDag-Erling Smørgrav {
77c97925adSHajimu UMEMOTO 
7802e6bec1SDag-Erling Smørgrav 	if (us != NULL) {
7902e6bec1SDag-Erling Smørgrav 		us->size = -1;
8002e6bec1SDag-Erling Smørgrav 		us->atime = us->mtime = 0;
8102e6bec1SDag-Erling Smørgrav 	}
8259769ab1SDag-Erling Smørgrav 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
83e19e6098SDag-Erling Smørgrav 		return (fetchXGetFile(URL, us, flags));
84111e2510SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
85111e2510SDag-Erling Smørgrav 		return (fetchXGetFTP(URL, us, flags));
8659769ab1SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
87e19e6098SDag-Erling Smørgrav 		return (fetchXGetHTTP(URL, us, flags));
88111e2510SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
89111e2510SDag-Erling Smørgrav 		return (fetchXGetHTTP(URL, us, flags));
90a1b37df2SDag-Erling Smørgrav 	url_seterr(URL_BAD_SCHEME);
91e19e6098SDag-Erling Smørgrav 	return (NULL);
92d8acd8dcSDag-Erling Smørgrav }
93ecc91352SDag-Erling Smørgrav 
94842a95ccSDag-Erling Smørgrav /*
95842a95ccSDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return a
961a5faa10SDag-Erling Smørgrav  * read-only stream connected to the document referenced by the URL.
971a5faa10SDag-Erling Smørgrav  */
981a5faa10SDag-Erling Smørgrav FILE *
9938c7e4a6SArchie Cobbs fetchGet(struct url *URL, const char *flags)
1001a5faa10SDag-Erling Smørgrav {
101e19e6098SDag-Erling Smørgrav 	return (fetchXGet(URL, NULL, flags));
1021a5faa10SDag-Erling Smørgrav }
1031a5faa10SDag-Erling Smørgrav 
1041a5faa10SDag-Erling Smørgrav /*
1051a5faa10SDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return a
106842a95ccSDag-Erling Smørgrav  * write-only stream connected to the document referenced by the URL.
107842a95ccSDag-Erling Smørgrav  */
108ecc91352SDag-Erling Smørgrav FILE *
10938c7e4a6SArchie Cobbs fetchPut(struct url *URL, const char *flags)
110ecc91352SDag-Erling Smørgrav {
111c97925adSHajimu UMEMOTO 
11259769ab1SDag-Erling Smørgrav 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
113e19e6098SDag-Erling Smørgrav 		return (fetchPutFile(URL, flags));
114111e2510SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
115111e2510SDag-Erling Smørgrav 		return (fetchPutFTP(URL, flags));
11659769ab1SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
117e19e6098SDag-Erling Smørgrav 		return (fetchPutHTTP(URL, flags));
118111e2510SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
119111e2510SDag-Erling Smørgrav 		return (fetchPutHTTP(URL, flags));
120a1b37df2SDag-Erling Smørgrav 	url_seterr(URL_BAD_SCHEME);
121e19e6098SDag-Erling Smørgrav 	return (NULL);
122d8acd8dcSDag-Erling Smørgrav }
123d8acd8dcSDag-Erling Smørgrav 
124d8acd8dcSDag-Erling Smørgrav /*
125d8acd8dcSDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return the
126d8acd8dcSDag-Erling Smørgrav  * size of the document referenced by the URL if it exists.
127d8acd8dcSDag-Erling Smørgrav  */
128d8acd8dcSDag-Erling Smørgrav int
12938c7e4a6SArchie Cobbs fetchStat(struct url *URL, struct url_stat *us, const char *flags)
130d8acd8dcSDag-Erling Smørgrav {
131c97925adSHajimu UMEMOTO 
13202e6bec1SDag-Erling Smørgrav 	if (us != NULL) {
13302e6bec1SDag-Erling Smørgrav 		us->size = -1;
13402e6bec1SDag-Erling Smørgrav 		us->atime = us->mtime = 0;
13502e6bec1SDag-Erling Smørgrav 	}
13659769ab1SDag-Erling Smørgrav 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
137e19e6098SDag-Erling Smørgrav 		return (fetchStatFile(URL, us, flags));
138e19e6098SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
139e19e6098SDag-Erling Smørgrav 		return (fetchStatFTP(URL, us, flags));
140111e2510SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
141111e2510SDag-Erling Smørgrav 		return (fetchStatHTTP(URL, us, flags));
142111e2510SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
143111e2510SDag-Erling Smørgrav 		return (fetchStatHTTP(URL, us, flags));
144a1b37df2SDag-Erling Smørgrav 	url_seterr(URL_BAD_SCHEME);
145e19e6098SDag-Erling Smørgrav 	return (-1);
146ecc91352SDag-Erling Smørgrav }
147ecc91352SDag-Erling Smørgrav 
148842a95ccSDag-Erling Smørgrav /*
149ce71b736SDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return a
150ce71b736SDag-Erling Smørgrav  * list of files in the directory pointed to by the URL.
151ce71b736SDag-Erling Smørgrav  */
152ce71b736SDag-Erling Smørgrav struct url_ent *
15338c7e4a6SArchie Cobbs fetchList(struct url *URL, const char *flags)
154ce71b736SDag-Erling Smørgrav {
155c97925adSHajimu UMEMOTO 
15659769ab1SDag-Erling Smørgrav 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
157e19e6098SDag-Erling Smørgrav 		return (fetchListFile(URL, flags));
158e19e6098SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
159e19e6098SDag-Erling Smørgrav 		return (fetchListFTP(URL, flags));
160111e2510SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
161111e2510SDag-Erling Smørgrav 		return (fetchListHTTP(URL, flags));
162111e2510SDag-Erling Smørgrav 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
163111e2510SDag-Erling Smørgrav 		return (fetchListHTTP(URL, flags));
164a1b37df2SDag-Erling Smørgrav 	url_seterr(URL_BAD_SCHEME);
165e19e6098SDag-Erling Smørgrav 	return (NULL);
166ce71b736SDag-Erling Smørgrav }
167ce71b736SDag-Erling Smørgrav 
168ce71b736SDag-Erling Smørgrav /*
1691a5faa10SDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchXGet().
170842a95ccSDag-Erling Smørgrav  */
1714ca1ab94SDag-Erling Smørgrav FILE *
17238c7e4a6SArchie Cobbs fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
1734ca1ab94SDag-Erling Smørgrav {
174d8acd8dcSDag-Erling Smørgrav 	struct url *u;
1754ca1ab94SDag-Erling Smørgrav 	FILE *f;
1764ca1ab94SDag-Erling Smørgrav 
1774ca1ab94SDag-Erling Smørgrav 	if ((u = fetchParseURL(URL)) == NULL)
178e19e6098SDag-Erling Smørgrav 		return (NULL);
1794ca1ab94SDag-Erling Smørgrav 
1801a5faa10SDag-Erling Smørgrav 	f = fetchXGet(u, us, flags);
1814ca1ab94SDag-Erling Smørgrav 
18260245e42SDag-Erling Smørgrav 	fetchFreeURL(u);
183e19e6098SDag-Erling Smørgrav 	return (f);
1844ca1ab94SDag-Erling Smørgrav }
1854ca1ab94SDag-Erling Smørgrav 
1861a5faa10SDag-Erling Smørgrav /*
1871a5faa10SDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchGet().
1881a5faa10SDag-Erling Smørgrav  */
1891a5faa10SDag-Erling Smørgrav FILE *
19038c7e4a6SArchie Cobbs fetchGetURL(const char *URL, const char *flags)
1911a5faa10SDag-Erling Smørgrav {
192e19e6098SDag-Erling Smørgrav 	return (fetchXGetURL(URL, NULL, flags));
1931a5faa10SDag-Erling Smørgrav }
1944ca1ab94SDag-Erling Smørgrav 
195842a95ccSDag-Erling Smørgrav /*
196842a95ccSDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchPut().
197842a95ccSDag-Erling Smørgrav  */
1984ca1ab94SDag-Erling Smørgrav FILE *
19938c7e4a6SArchie Cobbs fetchPutURL(const char *URL, const char *flags)
2004ca1ab94SDag-Erling Smørgrav {
201d8acd8dcSDag-Erling Smørgrav 	struct url *u;
2024ca1ab94SDag-Erling Smørgrav 	FILE *f;
2034ca1ab94SDag-Erling Smørgrav 
2044ca1ab94SDag-Erling Smørgrav 	if ((u = fetchParseURL(URL)) == NULL)
205e19e6098SDag-Erling Smørgrav 		return (NULL);
2064ca1ab94SDag-Erling Smørgrav 
207ecc91352SDag-Erling Smørgrav 	f = fetchPut(u, flags);
2084ca1ab94SDag-Erling Smørgrav 
20960245e42SDag-Erling Smørgrav 	fetchFreeURL(u);
210e19e6098SDag-Erling Smørgrav 	return (f);
2114ca1ab94SDag-Erling Smørgrav }
2124ca1ab94SDag-Erling Smørgrav 
2134ca1ab94SDag-Erling Smørgrav /*
214d8acd8dcSDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchStat().
215d8acd8dcSDag-Erling Smørgrav  */
216d8acd8dcSDag-Erling Smørgrav int
21738c7e4a6SArchie Cobbs fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
218d8acd8dcSDag-Erling Smørgrav {
219d8acd8dcSDag-Erling Smørgrav 	struct url *u;
220d8acd8dcSDag-Erling Smørgrav 	int s;
221d8acd8dcSDag-Erling Smørgrav 
222d8acd8dcSDag-Erling Smørgrav 	if ((u = fetchParseURL(URL)) == NULL)
223e19e6098SDag-Erling Smørgrav 		return (-1);
224d8acd8dcSDag-Erling Smørgrav 
225d8acd8dcSDag-Erling Smørgrav 	s = fetchStat(u, us, flags);
226d8acd8dcSDag-Erling Smørgrav 
22760245e42SDag-Erling Smørgrav 	fetchFreeURL(u);
228e19e6098SDag-Erling Smørgrav 	return (s);
229d8acd8dcSDag-Erling Smørgrav }
230d8acd8dcSDag-Erling Smørgrav 
231d8acd8dcSDag-Erling Smørgrav /*
232ce71b736SDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchList().
233ce71b736SDag-Erling Smørgrav  */
234ce71b736SDag-Erling Smørgrav struct url_ent *
23538c7e4a6SArchie Cobbs fetchListURL(const char *URL, const char *flags)
236ce71b736SDag-Erling Smørgrav {
237ce71b736SDag-Erling Smørgrav 	struct url *u;
238ce71b736SDag-Erling Smørgrav 	struct url_ent *ue;
239ce71b736SDag-Erling Smørgrav 
240ce71b736SDag-Erling Smørgrav 	if ((u = fetchParseURL(URL)) == NULL)
241e19e6098SDag-Erling Smørgrav 		return (NULL);
242ce71b736SDag-Erling Smørgrav 
243ce71b736SDag-Erling Smørgrav 	ue = fetchList(u, flags);
244ce71b736SDag-Erling Smørgrav 
24560245e42SDag-Erling Smørgrav 	fetchFreeURL(u);
246e19e6098SDag-Erling Smørgrav 	return (ue);
247ce71b736SDag-Erling Smørgrav }
248ce71b736SDag-Erling Smørgrav 
249ce71b736SDag-Erling Smørgrav /*
2509a964d6aSDag-Erling Smørgrav  * Make a URL
2519a964d6aSDag-Erling Smørgrav  */
2529a964d6aSDag-Erling Smørgrav struct url *
25338c7e4a6SArchie Cobbs fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
25438c7e4a6SArchie Cobbs     const char *user, const char *pwd)
2559a964d6aSDag-Erling Smørgrav {
2569a964d6aSDag-Erling Smørgrav 	struct url *u;
2579a964d6aSDag-Erling Smørgrav 
2589a964d6aSDag-Erling Smørgrav 	if (!scheme || (!host && !doc)) {
259a1b37df2SDag-Erling Smørgrav 		url_seterr(URL_MALFORMED);
260e19e6098SDag-Erling Smørgrav 		return (NULL);
2619a964d6aSDag-Erling Smørgrav 	}
2629a964d6aSDag-Erling Smørgrav 
2639a964d6aSDag-Erling Smørgrav 	if (port < 0 || port > 65535) {
264a1b37df2SDag-Erling Smørgrav 		url_seterr(URL_BAD_PORT);
265e19e6098SDag-Erling Smørgrav 		return (NULL);
2669a964d6aSDag-Erling Smørgrav 	}
2679a964d6aSDag-Erling Smørgrav 
2689a964d6aSDag-Erling Smørgrav 	/* allocate struct url */
269930105c1SDag-Erling Smørgrav 	if ((u = calloc(1, sizeof(*u))) == NULL) {
270a1b37df2SDag-Erling Smørgrav 		fetch_syserr();
271e19e6098SDag-Erling Smørgrav 		return (NULL);
2729a964d6aSDag-Erling Smørgrav 	}
2739a964d6aSDag-Erling Smørgrav 
2749a964d6aSDag-Erling Smørgrav 	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
275a1b37df2SDag-Erling Smørgrav 		fetch_syserr();
2769a964d6aSDag-Erling Smørgrav 		free(u);
277e19e6098SDag-Erling Smørgrav 		return (NULL);
2789a964d6aSDag-Erling Smørgrav 	}
2799a964d6aSDag-Erling Smørgrav 
280930105c1SDag-Erling Smørgrav #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
2819a964d6aSDag-Erling Smørgrav 	seturl(scheme);
2829a964d6aSDag-Erling Smørgrav 	seturl(host);
2839a964d6aSDag-Erling Smørgrav 	seturl(user);
2849a964d6aSDag-Erling Smørgrav 	seturl(pwd);
2859a964d6aSDag-Erling Smørgrav #undef seturl
2869a964d6aSDag-Erling Smørgrav 	u->port = port;
2879a964d6aSDag-Erling Smørgrav 
288e19e6098SDag-Erling Smørgrav 	return (u);
2899a964d6aSDag-Erling Smørgrav }
2909a964d6aSDag-Erling Smørgrav 
2919a964d6aSDag-Erling Smørgrav /*
2920fa39199SEd Maste  * Return value of the given hex digit.
2930fa39199SEd Maste  */
2940fa39199SEd Maste static int
2950fa39199SEd Maste fetch_hexval(char ch)
2960fa39199SEd Maste {
2970fa39199SEd Maste 
2980fa39199SEd Maste 	if (ch >= '0' && ch <= '9')
2990fa39199SEd Maste 		return (ch - '0');
3000fa39199SEd Maste 	else if (ch >= 'a' && ch <= 'f')
3010fa39199SEd Maste 		return (ch - 'a' + 10);
3020fa39199SEd Maste 	else if (ch >= 'A' && ch <= 'F')
3030fa39199SEd Maste 		return (ch - 'A' + 10);
3040fa39199SEd Maste 	return (-1);
3050fa39199SEd Maste }
3060fa39199SEd Maste 
3070fa39199SEd Maste /*
3080fa39199SEd Maste  * Decode percent-encoded URL component from src into dst, stopping at end
3090fa39199SEd Maste  * of string, or at @ or : separators.  Returns a pointer to the unhandled
3100fa39199SEd Maste  * part of the input string (null terminator, @, or :).  No terminator is
3110fa39199SEd Maste  * written to dst (it is the caller's responsibility).
3120fa39199SEd Maste  */
3130fa39199SEd Maste static const char *
3140fa39199SEd Maste fetch_pctdecode(char *dst, const char *src, size_t dlen)
3150fa39199SEd Maste {
3160fa39199SEd Maste 	int d1, d2;
3170fa39199SEd Maste 	char c;
3180fa39199SEd Maste 	const char *s;
3190fa39199SEd Maste 
3200fa39199SEd Maste 	for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) {
3210fa39199SEd Maste 		if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 &&
3220fa39199SEd Maste 		    (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) {
3230fa39199SEd Maste 			c = d1 << 4 | d2;
3240fa39199SEd Maste 			s += 2;
3250fa39199SEd Maste 		} else {
3260fa39199SEd Maste 			c = *s;
3270fa39199SEd Maste 		}
3280fa39199SEd Maste 		if (dlen-- > 0)
3290fa39199SEd Maste 			*dst++ = c;
3300fa39199SEd Maste 	}
3310fa39199SEd Maste 	return (s);
3320fa39199SEd Maste }
3330fa39199SEd Maste 
3340fa39199SEd Maste /*
3354ca1ab94SDag-Erling Smørgrav  * Split an URL into components. URL syntax is:
33659769ab1SDag-Erling Smørgrav  * [method:/][/[user[:pwd]@]host[:port]/][document]
3374ca1ab94SDag-Erling Smørgrav  * This almost, but not quite, RFC1738 URL syntax.
3384ca1ab94SDag-Erling Smørgrav  */
339d8acd8dcSDag-Erling Smørgrav struct url *
34038c7e4a6SArchie Cobbs fetchParseURL(const char *URL)
3414ca1ab94SDag-Erling Smørgrav {
34238c7e4a6SArchie Cobbs 	char *doc;
34338c7e4a6SArchie Cobbs 	const char *p, *q;
344d8acd8dcSDag-Erling Smørgrav 	struct url *u;
3454ca1ab94SDag-Erling Smørgrav 	int i;
3464ca1ab94SDag-Erling Smørgrav 
347d8acd8dcSDag-Erling Smørgrav 	/* allocate struct url */
348930105c1SDag-Erling Smørgrav 	if ((u = calloc(1, sizeof(*u))) == NULL) {
349a1b37df2SDag-Erling Smørgrav 		fetch_syserr();
350e19e6098SDag-Erling Smørgrav 		return (NULL);
351d8acd8dcSDag-Erling Smørgrav 	}
3524ca1ab94SDag-Erling Smørgrav 
3534ca1ab94SDag-Erling Smørgrav 	/* scheme name */
35459769ab1SDag-Erling Smørgrav 	if ((p = strstr(URL, ":/"))) {
355e19e6098SDag-Erling Smørgrav 		snprintf(u->scheme, URL_SCHEMELEN+1,
356e19e6098SDag-Erling Smørgrav 		    "%.*s", (int)(p - URL), URL);
35759769ab1SDag-Erling Smørgrav 		URL = ++p;
35859769ab1SDag-Erling Smørgrav 		/*
35959769ab1SDag-Erling Smørgrav 		 * Only one slash: no host, leave slash as part of document
36059769ab1SDag-Erling Smørgrav 		 * Two slashes: host follows, strip slashes
36159769ab1SDag-Erling Smørgrav 		 */
36259769ab1SDag-Erling Smørgrav 		if (URL[1] == '/')
36359769ab1SDag-Erling Smørgrav 			URL = (p += 2);
3641ba84976SDag-Erling Smørgrav 	} else {
3651ba84976SDag-Erling Smørgrav 		p = URL;
366d8acd8dcSDag-Erling Smørgrav 	}
3675b2ad516SDag-Erling Smørgrav 	if (!*URL || *URL == '/' || *URL == '.' ||
36873b3e4dfSStefan Eßer 	    (u->scheme[0] == '\0' &&
36973b3e4dfSStefan Eßer 		strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
3704ca1ab94SDag-Erling Smørgrav 		goto nohost;
3714ca1ab94SDag-Erling Smørgrav 
3724ca1ab94SDag-Erling Smørgrav 	p = strpbrk(URL, "/@");
3730fba3a00SDag-Erling Smørgrav 	if (p && *p == '@') {
3744ca1ab94SDag-Erling Smørgrav 		/* username */
3750fa39199SEd Maste 		q = fetch_pctdecode(u->user, URL, URL_USERLEN);
3764ca1ab94SDag-Erling Smørgrav 
3774ca1ab94SDag-Erling Smørgrav 		/* password */
3784ca1ab94SDag-Erling Smørgrav 		if (*q == ':')
379*9bc22394STim Kientzle 			q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN);
3804ca1ab94SDag-Erling Smørgrav 
3814ca1ab94SDag-Erling Smørgrav 		p++;
382ab39353eSDag-Erling Smørgrav 	} else {
383ab39353eSDag-Erling Smørgrav 		p = URL;
384ab39353eSDag-Erling Smørgrav 	}
3854ca1ab94SDag-Erling Smørgrav 
3864ca1ab94SDag-Erling Smørgrav 	/* hostname */
38728c645cfSHajimu UMEMOTO #ifdef INET6
38828c645cfSHajimu UMEMOTO 	if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
38928c645cfSHajimu UMEMOTO 	    (*++q == '\0' || *q == '/' || *q == ':')) {
39028c645cfSHajimu UMEMOTO 		if ((i = q - p - 2) > MAXHOSTNAMELEN)
39128c645cfSHajimu UMEMOTO 			i = MAXHOSTNAMELEN;
39228c645cfSHajimu UMEMOTO 		strncpy(u->host, ++p, i);
39328c645cfSHajimu UMEMOTO 		p = q;
39428c645cfSHajimu UMEMOTO 	} else
39528c645cfSHajimu UMEMOTO #endif
3964ca1ab94SDag-Erling Smørgrav 		for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
3974ca1ab94SDag-Erling Smørgrav 			if (i < MAXHOSTNAMELEN)
3984ca1ab94SDag-Erling Smørgrav 				u->host[i++] = *p;
3994ca1ab94SDag-Erling Smørgrav 
4004ca1ab94SDag-Erling Smørgrav 	/* port */
4014ca1ab94SDag-Erling Smørgrav 	if (*p == ':') {
4024ca1ab94SDag-Erling Smørgrav 		for (q = ++p; *q && (*q != '/'); q++)
403facd9827SDag-Erling Smørgrav 			if (isdigit((unsigned char)*q))
4044ca1ab94SDag-Erling Smørgrav 				u->port = u->port * 10 + (*q - '0');
405d8acd8dcSDag-Erling Smørgrav 			else {
406d8acd8dcSDag-Erling Smørgrav 				/* invalid port */
407a1b37df2SDag-Erling Smørgrav 				url_seterr(URL_BAD_PORT);
408d8acd8dcSDag-Erling Smørgrav 				goto ouch;
409d8acd8dcSDag-Erling Smørgrav 			}
410551858f0SDag-Erling Smørgrav 		p = q;
4114ca1ab94SDag-Erling Smørgrav 	}
4124ca1ab94SDag-Erling Smørgrav 
4134ca1ab94SDag-Erling Smørgrav nohost:
4144ca1ab94SDag-Erling Smørgrav 	/* document */
41560245e42SDag-Erling Smørgrav 	if (!*p)
41660245e42SDag-Erling Smørgrav 		p = "/";
41760245e42SDag-Erling Smørgrav 
418f9c2053bSDag-Erling Smørgrav 	if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
419f9c2053bSDag-Erling Smørgrav 	    strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
42023fe6d7aSDag-Erling Smørgrav 		const char hexnums[] = "0123456789abcdef";
42123fe6d7aSDag-Erling Smørgrav 
422f9c2053bSDag-Erling Smørgrav 		/* percent-escape whitespace. */
423f9c2053bSDag-Erling Smørgrav 		if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
424a1b37df2SDag-Erling Smørgrav 			fetch_syserr();
42523fe6d7aSDag-Erling Smørgrav 			goto ouch;
42623fe6d7aSDag-Erling Smørgrav 		}
427f9c2053bSDag-Erling Smørgrav 		u->doc = doc;
42823fe6d7aSDag-Erling Smørgrav 		while (*p != '\0') {
429facd9827SDag-Erling Smørgrav 			if (!isspace((unsigned char)*p)) {
43023fe6d7aSDag-Erling Smørgrav 				*doc++ = *p++;
43123fe6d7aSDag-Erling Smørgrav 			} else {
43223fe6d7aSDag-Erling Smørgrav 				*doc++ = '%';
43323fe6d7aSDag-Erling Smørgrav 				*doc++ = hexnums[((unsigned int)*p) >> 4];
43423fe6d7aSDag-Erling Smørgrav 				*doc++ = hexnums[((unsigned int)*p) & 0xf];
43523fe6d7aSDag-Erling Smørgrav 				p++;
43623fe6d7aSDag-Erling Smørgrav 			}
43723fe6d7aSDag-Erling Smørgrav 		}
43823fe6d7aSDag-Erling Smørgrav 		*doc = '\0';
43923fe6d7aSDag-Erling Smørgrav 	} else if ((u->doc = strdup(p)) == NULL) {
440a1b37df2SDag-Erling Smørgrav 		fetch_syserr();
4414ca1ab94SDag-Erling Smørgrav 		goto ouch;
442d8acd8dcSDag-Erling Smørgrav 	}
4434ca1ab94SDag-Erling Smørgrav 
4444ca1ab94SDag-Erling Smørgrav 	DEBUG(fprintf(stderr,
445f67efa37SDag-Erling Smørgrav 		  "scheme:   [%s]\n"
446f67efa37SDag-Erling Smørgrav 		  "user:     [%s]\n"
447f67efa37SDag-Erling Smørgrav 		  "password: [%s]\n"
448f67efa37SDag-Erling Smørgrav 		  "host:     [%s]\n"
449f67efa37SDag-Erling Smørgrav 		  "port:     [%d]\n"
450f67efa37SDag-Erling Smørgrav 		  "document: [%s]\n",
4514ca1ab94SDag-Erling Smørgrav 		  u->scheme, u->user, u->pwd,
4524ca1ab94SDag-Erling Smørgrav 		  u->host, u->port, u->doc));
4534ca1ab94SDag-Erling Smørgrav 
454e19e6098SDag-Erling Smørgrav 	return (u);
4554ca1ab94SDag-Erling Smørgrav 
4564ca1ab94SDag-Erling Smørgrav ouch:
4574ca1ab94SDag-Erling Smørgrav 	free(u);
458e19e6098SDag-Erling Smørgrav 	return (NULL);
4594ca1ab94SDag-Erling Smørgrav }
46060245e42SDag-Erling Smørgrav 
46160245e42SDag-Erling Smørgrav /*
46260245e42SDag-Erling Smørgrav  * Free a URL
46360245e42SDag-Erling Smørgrav  */
46460245e42SDag-Erling Smørgrav void
46560245e42SDag-Erling Smørgrav fetchFreeURL(struct url *u)
46660245e42SDag-Erling Smørgrav {
46760245e42SDag-Erling Smørgrav 	free(u->doc);
46860245e42SDag-Erling Smørgrav 	free(u);
46960245e42SDag-Erling Smørgrav }
470