xref: /freebsd/lib/libfetch/fetch.c (revision 8d9de5b10a24bd2d79ed99f139c0ac28c09b15ca)
14ca1ab94SDag-Erling Smørgrav /*-
25e53a4f9SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
35e53a4f9SPedro F. Giffuni  *
4578153f1SDag-Erling Smørgrav  * Copyright (c) 1998-2004 Dag-Erling Smørgrav
54ca1ab94SDag-Erling Smørgrav  * All rights reserved.
64ca1ab94SDag-Erling Smørgrav  *
74ca1ab94SDag-Erling Smørgrav  * Redistribution and use in source and binary forms, with or without
84ca1ab94SDag-Erling Smørgrav  * modification, are permitted provided that the following conditions
94ca1ab94SDag-Erling Smørgrav  * are met:
104ca1ab94SDag-Erling Smørgrav  * 1. Redistributions of source code must retain the above copyright
114ca1ab94SDag-Erling Smørgrav  *    notice, this list of conditions and the following disclaimer
124ca1ab94SDag-Erling Smørgrav  *    in this position and unchanged.
134ca1ab94SDag-Erling Smørgrav  * 2. Redistributions in binary form must reproduce the above copyright
144ca1ab94SDag-Erling Smørgrav  *    notice, this list of conditions and the following disclaimer in the
154ca1ab94SDag-Erling Smørgrav  *    documentation and/or other materials provided with the distribution.
164ca1ab94SDag-Erling Smørgrav  * 3. The name of the author may not be used to endorse or promote products
174ca1ab94SDag-Erling Smørgrav  *    derived from this software without specific prior written permission
184ca1ab94SDag-Erling Smørgrav  *
194ca1ab94SDag-Erling Smørgrav  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
204ca1ab94SDag-Erling Smørgrav  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
214ca1ab94SDag-Erling Smørgrav  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
224ca1ab94SDag-Erling Smørgrav  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
234ca1ab94SDag-Erling Smørgrav  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
244ca1ab94SDag-Erling Smørgrav  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
254ca1ab94SDag-Erling Smørgrav  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
264ca1ab94SDag-Erling Smørgrav  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
274ca1ab94SDag-Erling Smørgrav  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
284ca1ab94SDag-Erling Smørgrav  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
294ca1ab94SDag-Erling Smørgrav  */
304ca1ab94SDag-Erling Smørgrav 
31cecb889fSMatthew Dillon #include <sys/cdefs.h>
32cecb889fSMatthew Dillon __FBSDID("$FreeBSD$");
33cecb889fSMatthew Dillon 
344ca1ab94SDag-Erling Smørgrav #include <sys/param.h>
354ca1ab94SDag-Erling Smørgrav 
36*8d9de5b1SDag-Erling Smørgrav #include <netinet/in.h>
37*8d9de5b1SDag-Erling Smørgrav 
38*8d9de5b1SDag-Erling Smørgrav #include <errno.h>
394ca1ab94SDag-Erling Smørgrav #include <ctype.h>
404ca1ab94SDag-Erling Smørgrav #include <stdio.h>
414ca1ab94SDag-Erling Smørgrav #include <stdlib.h>
424ca1ab94SDag-Erling Smørgrav #include <string.h>
434ca1ab94SDag-Erling Smørgrav 
444ca1ab94SDag-Erling Smørgrav #include "fetch.h"
45d8acd8dcSDag-Erling Smørgrav #include "common.h"
464ca1ab94SDag-Erling Smørgrav 
476490b215SDag-Erling Smørgrav auth_t	 fetchAuthMethod;
480fba3a00SDag-Erling Smørgrav int	 fetchLastErrCode;
49ba101983SDag-Erling Smørgrav char	 fetchLastErrString[MAXERRSTRING];
50fc6e9e65SDag-Erling Smørgrav int	 fetchTimeout;
51a1bb3f48SDag-Erling Smørgrav int	 fetchRestartCalls = 1;
527eb2f34dSDag-Erling Smørgrav int	 fetchDebug;
530fba3a00SDag-Erling Smørgrav 
540fba3a00SDag-Erling Smørgrav 
55d8acd8dcSDag-Erling Smørgrav /*** Local data **************************************************************/
56d8acd8dcSDag-Erling Smørgrav 
57d8acd8dcSDag-Erling Smørgrav /*
58d8acd8dcSDag-Erling Smørgrav  * Error messages for parser errors
59d8acd8dcSDag-Erling Smørgrav  */
60d8acd8dcSDag-Erling Smørgrav #define URL_MALFORMED		1
61d8acd8dcSDag-Erling Smørgrav #define URL_BAD_SCHEME		2
62d8acd8dcSDag-Erling Smørgrav #define URL_BAD_PORT		3
63a1b37df2SDag-Erling Smørgrav static struct fetcherr url_errlist[] = {
64d8acd8dcSDag-Erling Smørgrav 	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
65d8acd8dcSDag-Erling Smørgrav 	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
66d8acd8dcSDag-Erling Smørgrav 	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
67d8acd8dcSDag-Erling Smørgrav 	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
68d8acd8dcSDag-Erling Smørgrav };
69d8acd8dcSDag-Erling Smørgrav 
70d8acd8dcSDag-Erling Smørgrav 
71d8acd8dcSDag-Erling Smørgrav /*** Public API **************************************************************/
724ca1ab94SDag-Erling Smørgrav 
73842a95ccSDag-Erling Smørgrav /*
74842a95ccSDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return a
75842a95ccSDag-Erling Smørgrav  * read-only stream connected to the document referenced by the URL.
761a5faa10SDag-Erling Smørgrav  * Also fill out the struct url_stat.
77842a95ccSDag-Erling Smørgrav  */
78ecc91352SDag-Erling Smørgrav FILE *
7938c7e4a6SArchie Cobbs fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
80ecc91352SDag-Erling Smørgrav {
81c97925adSHajimu UMEMOTO 
8202e6bec1SDag-Erling Smørgrav 	if (us != NULL) {
8302e6bec1SDag-Erling Smørgrav 		us->size = -1;
8402e6bec1SDag-Erling Smørgrav 		us->atime = us->mtime = 0;
8502e6bec1SDag-Erling Smørgrav 	}
86*8d9de5b1SDag-Erling Smørgrav 	if (strcmp(URL->scheme, SCHEME_FILE) == 0)
87e19e6098SDag-Erling Smørgrav 		return (fetchXGetFile(URL, us, flags));
88*8d9de5b1SDag-Erling Smørgrav 	else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
89111e2510SDag-Erling Smørgrav 		return (fetchXGetFTP(URL, us, flags));
90*8d9de5b1SDag-Erling Smørgrav 	else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
91e19e6098SDag-Erling Smørgrav 		return (fetchXGetHTTP(URL, us, flags));
92*8d9de5b1SDag-Erling Smørgrav 	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
93111e2510SDag-Erling Smørgrav 		return (fetchXGetHTTP(URL, us, flags));
94a1b37df2SDag-Erling Smørgrav 	url_seterr(URL_BAD_SCHEME);
95e19e6098SDag-Erling Smørgrav 	return (NULL);
96d8acd8dcSDag-Erling Smørgrav }
97ecc91352SDag-Erling Smørgrav 
98842a95ccSDag-Erling Smørgrav /*
99842a95ccSDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return a
1001a5faa10SDag-Erling Smørgrav  * read-only stream connected to the document referenced by the URL.
1011a5faa10SDag-Erling Smørgrav  */
1021a5faa10SDag-Erling Smørgrav FILE *
10338c7e4a6SArchie Cobbs fetchGet(struct url *URL, const char *flags)
1041a5faa10SDag-Erling Smørgrav {
105e19e6098SDag-Erling Smørgrav 	return (fetchXGet(URL, NULL, flags));
1061a5faa10SDag-Erling Smørgrav }
1071a5faa10SDag-Erling Smørgrav 
1081a5faa10SDag-Erling Smørgrav /*
1091a5faa10SDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return a
110842a95ccSDag-Erling Smørgrav  * write-only stream connected to the document referenced by the URL.
111842a95ccSDag-Erling Smørgrav  */
112ecc91352SDag-Erling Smørgrav FILE *
11338c7e4a6SArchie Cobbs fetchPut(struct url *URL, const char *flags)
114ecc91352SDag-Erling Smørgrav {
115c97925adSHajimu UMEMOTO 
116*8d9de5b1SDag-Erling Smørgrav 	if (strcmp(URL->scheme, SCHEME_FILE) == 0)
117e19e6098SDag-Erling Smørgrav 		return (fetchPutFile(URL, flags));
118*8d9de5b1SDag-Erling Smørgrav 	else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
119111e2510SDag-Erling Smørgrav 		return (fetchPutFTP(URL, flags));
120*8d9de5b1SDag-Erling Smørgrav 	else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
121e19e6098SDag-Erling Smørgrav 		return (fetchPutHTTP(URL, flags));
122*8d9de5b1SDag-Erling Smørgrav 	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
123111e2510SDag-Erling Smørgrav 		return (fetchPutHTTP(URL, flags));
124a1b37df2SDag-Erling Smørgrav 	url_seterr(URL_BAD_SCHEME);
125e19e6098SDag-Erling Smørgrav 	return (NULL);
126d8acd8dcSDag-Erling Smørgrav }
127d8acd8dcSDag-Erling Smørgrav 
128d8acd8dcSDag-Erling Smørgrav /*
129d8acd8dcSDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return the
130d8acd8dcSDag-Erling Smørgrav  * size of the document referenced by the URL if it exists.
131d8acd8dcSDag-Erling Smørgrav  */
132d8acd8dcSDag-Erling Smørgrav int
13338c7e4a6SArchie Cobbs fetchStat(struct url *URL, struct url_stat *us, const char *flags)
134d8acd8dcSDag-Erling Smørgrav {
135c97925adSHajimu UMEMOTO 
13602e6bec1SDag-Erling Smørgrav 	if (us != NULL) {
13702e6bec1SDag-Erling Smørgrav 		us->size = -1;
13802e6bec1SDag-Erling Smørgrav 		us->atime = us->mtime = 0;
13902e6bec1SDag-Erling Smørgrav 	}
140*8d9de5b1SDag-Erling Smørgrav 	if (strcmp(URL->scheme, SCHEME_FILE) == 0)
141e19e6098SDag-Erling Smørgrav 		return (fetchStatFile(URL, us, flags));
142*8d9de5b1SDag-Erling Smørgrav 	else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
143e19e6098SDag-Erling Smørgrav 		return (fetchStatFTP(URL, us, flags));
144*8d9de5b1SDag-Erling Smørgrav 	else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
145111e2510SDag-Erling Smørgrav 		return (fetchStatHTTP(URL, us, flags));
146*8d9de5b1SDag-Erling Smørgrav 	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
147111e2510SDag-Erling Smørgrav 		return (fetchStatHTTP(URL, us, flags));
148a1b37df2SDag-Erling Smørgrav 	url_seterr(URL_BAD_SCHEME);
149e19e6098SDag-Erling Smørgrav 	return (-1);
150ecc91352SDag-Erling Smørgrav }
151ecc91352SDag-Erling Smørgrav 
152842a95ccSDag-Erling Smørgrav /*
153ce71b736SDag-Erling Smørgrav  * Select the appropriate protocol for the URL scheme, and return a
154ce71b736SDag-Erling Smørgrav  * list of files in the directory pointed to by the URL.
155ce71b736SDag-Erling Smørgrav  */
156ce71b736SDag-Erling Smørgrav struct url_ent *
15738c7e4a6SArchie Cobbs fetchList(struct url *URL, const char *flags)
158ce71b736SDag-Erling Smørgrav {
159c97925adSHajimu UMEMOTO 
160*8d9de5b1SDag-Erling Smørgrav 	if (strcmp(URL->scheme, SCHEME_FILE) == 0)
161e19e6098SDag-Erling Smørgrav 		return (fetchListFile(URL, flags));
162*8d9de5b1SDag-Erling Smørgrav 	else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
163e19e6098SDag-Erling Smørgrav 		return (fetchListFTP(URL, flags));
164*8d9de5b1SDag-Erling Smørgrav 	else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
165111e2510SDag-Erling Smørgrav 		return (fetchListHTTP(URL, flags));
166*8d9de5b1SDag-Erling Smørgrav 	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
167111e2510SDag-Erling Smørgrav 		return (fetchListHTTP(URL, flags));
168a1b37df2SDag-Erling Smørgrav 	url_seterr(URL_BAD_SCHEME);
169e19e6098SDag-Erling Smørgrav 	return (NULL);
170ce71b736SDag-Erling Smørgrav }
171ce71b736SDag-Erling Smørgrav 
172ce71b736SDag-Erling Smørgrav /*
1731a5faa10SDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchXGet().
174842a95ccSDag-Erling Smørgrav  */
1754ca1ab94SDag-Erling Smørgrav FILE *
17638c7e4a6SArchie Cobbs fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
1774ca1ab94SDag-Erling Smørgrav {
178d8acd8dcSDag-Erling Smørgrav 	struct url *u;
1794ca1ab94SDag-Erling Smørgrav 	FILE *f;
1804ca1ab94SDag-Erling Smørgrav 
1814ca1ab94SDag-Erling Smørgrav 	if ((u = fetchParseURL(URL)) == NULL)
182e19e6098SDag-Erling Smørgrav 		return (NULL);
1834ca1ab94SDag-Erling Smørgrav 
1841a5faa10SDag-Erling Smørgrav 	f = fetchXGet(u, us, flags);
1854ca1ab94SDag-Erling Smørgrav 
18660245e42SDag-Erling Smørgrav 	fetchFreeURL(u);
187e19e6098SDag-Erling Smørgrav 	return (f);
1884ca1ab94SDag-Erling Smørgrav }
1894ca1ab94SDag-Erling Smørgrav 
1901a5faa10SDag-Erling Smørgrav /*
1911a5faa10SDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchGet().
1921a5faa10SDag-Erling Smørgrav  */
1931a5faa10SDag-Erling Smørgrav FILE *
19438c7e4a6SArchie Cobbs fetchGetURL(const char *URL, const char *flags)
1951a5faa10SDag-Erling Smørgrav {
196e19e6098SDag-Erling Smørgrav 	return (fetchXGetURL(URL, NULL, flags));
1971a5faa10SDag-Erling Smørgrav }
1984ca1ab94SDag-Erling Smørgrav 
199842a95ccSDag-Erling Smørgrav /*
200842a95ccSDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchPut().
201842a95ccSDag-Erling Smørgrav  */
2024ca1ab94SDag-Erling Smørgrav FILE *
20338c7e4a6SArchie Cobbs fetchPutURL(const char *URL, const char *flags)
2044ca1ab94SDag-Erling Smørgrav {
205d8acd8dcSDag-Erling Smørgrav 	struct url *u;
2064ca1ab94SDag-Erling Smørgrav 	FILE *f;
2074ca1ab94SDag-Erling Smørgrav 
2084ca1ab94SDag-Erling Smørgrav 	if ((u = fetchParseURL(URL)) == NULL)
209e19e6098SDag-Erling Smørgrav 		return (NULL);
2104ca1ab94SDag-Erling Smørgrav 
211ecc91352SDag-Erling Smørgrav 	f = fetchPut(u, flags);
2124ca1ab94SDag-Erling Smørgrav 
21360245e42SDag-Erling Smørgrav 	fetchFreeURL(u);
214e19e6098SDag-Erling Smørgrav 	return (f);
2154ca1ab94SDag-Erling Smørgrav }
2164ca1ab94SDag-Erling Smørgrav 
2174ca1ab94SDag-Erling Smørgrav /*
218d8acd8dcSDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchStat().
219d8acd8dcSDag-Erling Smørgrav  */
220d8acd8dcSDag-Erling Smørgrav int
22138c7e4a6SArchie Cobbs fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
222d8acd8dcSDag-Erling Smørgrav {
223d8acd8dcSDag-Erling Smørgrav 	struct url *u;
224d8acd8dcSDag-Erling Smørgrav 	int s;
225d8acd8dcSDag-Erling Smørgrav 
226d8acd8dcSDag-Erling Smørgrav 	if ((u = fetchParseURL(URL)) == NULL)
227e19e6098SDag-Erling Smørgrav 		return (-1);
228d8acd8dcSDag-Erling Smørgrav 
229d8acd8dcSDag-Erling Smørgrav 	s = fetchStat(u, us, flags);
230d8acd8dcSDag-Erling Smørgrav 
23160245e42SDag-Erling Smørgrav 	fetchFreeURL(u);
232e19e6098SDag-Erling Smørgrav 	return (s);
233d8acd8dcSDag-Erling Smørgrav }
234d8acd8dcSDag-Erling Smørgrav 
235d8acd8dcSDag-Erling Smørgrav /*
236ce71b736SDag-Erling Smørgrav  * Attempt to parse the given URL; if successful, call fetchList().
237ce71b736SDag-Erling Smørgrav  */
238ce71b736SDag-Erling Smørgrav struct url_ent *
23938c7e4a6SArchie Cobbs fetchListURL(const char *URL, const char *flags)
240ce71b736SDag-Erling Smørgrav {
241ce71b736SDag-Erling Smørgrav 	struct url *u;
242ce71b736SDag-Erling Smørgrav 	struct url_ent *ue;
243ce71b736SDag-Erling Smørgrav 
244ce71b736SDag-Erling Smørgrav 	if ((u = fetchParseURL(URL)) == NULL)
245e19e6098SDag-Erling Smørgrav 		return (NULL);
246ce71b736SDag-Erling Smørgrav 
247ce71b736SDag-Erling Smørgrav 	ue = fetchList(u, flags);
248ce71b736SDag-Erling Smørgrav 
24960245e42SDag-Erling Smørgrav 	fetchFreeURL(u);
250e19e6098SDag-Erling Smørgrav 	return (ue);
251ce71b736SDag-Erling Smørgrav }
252ce71b736SDag-Erling Smørgrav 
253ce71b736SDag-Erling Smørgrav /*
2549a964d6aSDag-Erling Smørgrav  * Make a URL
2559a964d6aSDag-Erling Smørgrav  */
2569a964d6aSDag-Erling Smørgrav struct url *
25738c7e4a6SArchie Cobbs fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
25838c7e4a6SArchie Cobbs     const char *user, const char *pwd)
2599a964d6aSDag-Erling Smørgrav {
2609a964d6aSDag-Erling Smørgrav 	struct url *u;
2619a964d6aSDag-Erling Smørgrav 
2629a964d6aSDag-Erling Smørgrav 	if (!scheme || (!host && !doc)) {
263a1b37df2SDag-Erling Smørgrav 		url_seterr(URL_MALFORMED);
264e19e6098SDag-Erling Smørgrav 		return (NULL);
2659a964d6aSDag-Erling Smørgrav 	}
2669a964d6aSDag-Erling Smørgrav 
2679a964d6aSDag-Erling Smørgrav 	if (port < 0 || port > 65535) {
268a1b37df2SDag-Erling Smørgrav 		url_seterr(URL_BAD_PORT);
269e19e6098SDag-Erling Smørgrav 		return (NULL);
2709a964d6aSDag-Erling Smørgrav 	}
2719a964d6aSDag-Erling Smørgrav 
2729a964d6aSDag-Erling Smørgrav 	/* allocate struct url */
273930105c1SDag-Erling Smørgrav 	if ((u = calloc(1, sizeof(*u))) == NULL) {
274a1b37df2SDag-Erling Smørgrav 		fetch_syserr();
275e19e6098SDag-Erling Smørgrav 		return (NULL);
2769a964d6aSDag-Erling Smørgrav 	}
2775f04ebd4SDag-Erling Smørgrav 	u->netrcfd = -1;
2789a964d6aSDag-Erling Smørgrav 
2799a964d6aSDag-Erling Smørgrav 	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
280a1b37df2SDag-Erling Smørgrav 		fetch_syserr();
2819a964d6aSDag-Erling Smørgrav 		free(u);
282e19e6098SDag-Erling Smørgrav 		return (NULL);
2839a964d6aSDag-Erling Smørgrav 	}
2849a964d6aSDag-Erling Smørgrav 
285930105c1SDag-Erling Smørgrav #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
2869a964d6aSDag-Erling Smørgrav 	seturl(scheme);
2879a964d6aSDag-Erling Smørgrav 	seturl(host);
2889a964d6aSDag-Erling Smørgrav 	seturl(user);
2899a964d6aSDag-Erling Smørgrav 	seturl(pwd);
2909a964d6aSDag-Erling Smørgrav #undef seturl
2919a964d6aSDag-Erling Smørgrav 	u->port = port;
2929a964d6aSDag-Erling Smørgrav 
293e19e6098SDag-Erling Smørgrav 	return (u);
2949a964d6aSDag-Erling Smørgrav }
2959a964d6aSDag-Erling Smørgrav 
2969a964d6aSDag-Erling Smørgrav /*
2970fa39199SEd Maste  * Return value of the given hex digit.
2980fa39199SEd Maste  */
2990fa39199SEd Maste static int
3000fa39199SEd Maste fetch_hexval(char ch)
3010fa39199SEd Maste {
3020fa39199SEd Maste 
3030fa39199SEd Maste 	if (ch >= '0' && ch <= '9')
3040fa39199SEd Maste 		return (ch - '0');
3050fa39199SEd Maste 	else if (ch >= 'a' && ch <= 'f')
3060fa39199SEd Maste 		return (ch - 'a' + 10);
3070fa39199SEd Maste 	else if (ch >= 'A' && ch <= 'F')
3080fa39199SEd Maste 		return (ch - 'A' + 10);
3090fa39199SEd Maste 	return (-1);
3100fa39199SEd Maste }
3110fa39199SEd Maste 
3120fa39199SEd Maste /*
3130fa39199SEd Maste  * Decode percent-encoded URL component from src into dst, stopping at end
3140fa39199SEd Maste  * of string, or at @ or : separators.  Returns a pointer to the unhandled
3150fa39199SEd Maste  * part of the input string (null terminator, @, or :).  No terminator is
3160fa39199SEd Maste  * written to dst (it is the caller's responsibility).
3170fa39199SEd Maste  */
3180fa39199SEd Maste static const char *
3190fa39199SEd Maste fetch_pctdecode(char *dst, const char *src, size_t dlen)
3200fa39199SEd Maste {
3210fa39199SEd Maste 	int d1, d2;
3220fa39199SEd Maste 	char c;
3230fa39199SEd Maste 	const char *s;
3240fa39199SEd Maste 
3250fa39199SEd Maste 	for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) {
3260fa39199SEd Maste 		if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 &&
3270fa39199SEd Maste 		    (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) {
3280fa39199SEd Maste 			c = d1 << 4 | d2;
3290fa39199SEd Maste 			s += 2;
3300fa39199SEd Maste 		} else {
3310fa39199SEd Maste 			c = *s;
3320fa39199SEd Maste 		}
3330fa39199SEd Maste 		if (dlen-- > 0)
3340fa39199SEd Maste 			*dst++ = c;
3350fa39199SEd Maste 	}
3360fa39199SEd Maste 	return (s);
3370fa39199SEd Maste }
3380fa39199SEd Maste 
3390fa39199SEd Maste /*
3404ca1ab94SDag-Erling Smørgrav  * Split an URL into components. URL syntax is:
34159769ab1SDag-Erling Smørgrav  * [method:/][/[user[:pwd]@]host[:port]/][document]
3424ca1ab94SDag-Erling Smørgrav  * This almost, but not quite, RFC1738 URL syntax.
3434ca1ab94SDag-Erling Smørgrav  */
344d8acd8dcSDag-Erling Smørgrav struct url *
34538c7e4a6SArchie Cobbs fetchParseURL(const char *URL)
3464ca1ab94SDag-Erling Smørgrav {
34738c7e4a6SArchie Cobbs 	char *doc;
34838c7e4a6SArchie Cobbs 	const char *p, *q;
349d8acd8dcSDag-Erling Smørgrav 	struct url *u;
350*8d9de5b1SDag-Erling Smørgrav 	int i, n;
3514ca1ab94SDag-Erling Smørgrav 
352d8acd8dcSDag-Erling Smørgrav 	/* allocate struct url */
353930105c1SDag-Erling Smørgrav 	if ((u = calloc(1, sizeof(*u))) == NULL) {
354a1b37df2SDag-Erling Smørgrav 		fetch_syserr();
355e19e6098SDag-Erling Smørgrav 		return (NULL);
356d8acd8dcSDag-Erling Smørgrav 	}
3575f04ebd4SDag-Erling Smørgrav 	u->netrcfd = -1;
3584ca1ab94SDag-Erling Smørgrav 
3594ca1ab94SDag-Erling Smørgrav 	/* scheme name */
36059769ab1SDag-Erling Smørgrav 	if ((p = strstr(URL, ":/"))) {
361*8d9de5b1SDag-Erling Smørgrav                 if (p - URL > URL_SCHEMELEN)
362*8d9de5b1SDag-Erling Smørgrav                         goto ouch;
363*8d9de5b1SDag-Erling Smørgrav                 for (i = 0; URL + i < p; i++)
364*8d9de5b1SDag-Erling Smørgrav                         u->scheme[i] = tolower((unsigned char)URL[i]);
36559769ab1SDag-Erling Smørgrav 		URL = ++p;
36659769ab1SDag-Erling Smørgrav 		/*
36759769ab1SDag-Erling Smørgrav 		 * Only one slash: no host, leave slash as part of document
36859769ab1SDag-Erling Smørgrav 		 * Two slashes: host follows, strip slashes
36959769ab1SDag-Erling Smørgrav 		 */
37059769ab1SDag-Erling Smørgrav 		if (URL[1] == '/')
37159769ab1SDag-Erling Smørgrav 			URL = (p += 2);
3721ba84976SDag-Erling Smørgrav 	} else {
3731ba84976SDag-Erling Smørgrav 		p = URL;
374d8acd8dcSDag-Erling Smørgrav 	}
3755b2ad516SDag-Erling Smørgrav 	if (!*URL || *URL == '/' || *URL == '.' ||
37673b3e4dfSStefan Eßer 	    (u->scheme[0] == '\0' &&
37773b3e4dfSStefan Eßer 		strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
3784ca1ab94SDag-Erling Smørgrav 		goto nohost;
3794ca1ab94SDag-Erling Smørgrav 
3804ca1ab94SDag-Erling Smørgrav 	p = strpbrk(URL, "/@");
3810fba3a00SDag-Erling Smørgrav 	if (p && *p == '@') {
3824ca1ab94SDag-Erling Smørgrav 		/* username */
3830fa39199SEd Maste 		q = fetch_pctdecode(u->user, URL, URL_USERLEN);
3844ca1ab94SDag-Erling Smørgrav 
3854ca1ab94SDag-Erling Smørgrav 		/* password */
3864ca1ab94SDag-Erling Smørgrav 		if (*q == ':')
3879bc22394STim Kientzle 			q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN);
3884ca1ab94SDag-Erling Smørgrav 
3894ca1ab94SDag-Erling Smørgrav 		p++;
390ab39353eSDag-Erling Smørgrav 	} else {
391ab39353eSDag-Erling Smørgrav 		p = URL;
392ab39353eSDag-Erling Smørgrav 	}
3934ca1ab94SDag-Erling Smørgrav 
3944ca1ab94SDag-Erling Smørgrav 	/* hostname */
395*8d9de5b1SDag-Erling Smørgrav 	if (*p == '[') {
396*8d9de5b1SDag-Erling Smørgrav 		q = p + 1 + strspn(p + 1, ":0123456789ABCDEFabcdef");
397*8d9de5b1SDag-Erling Smørgrav 		if (*q++ != ']')
398*8d9de5b1SDag-Erling Smørgrav 			goto ouch;
39908a49957SDag-Erling Smørgrav 	} else {
400*8d9de5b1SDag-Erling Smørgrav 		/* valid characters in a DNS name */
401*8d9de5b1SDag-Erling Smørgrav 		q = p + strspn(p, "-." "0123456789"
402*8d9de5b1SDag-Erling Smørgrav 		    "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "_"
403*8d9de5b1SDag-Erling Smørgrav 		    "abcdefghijklmnopqrstuvwxyz");
40408a49957SDag-Erling Smørgrav 	}
405*8d9de5b1SDag-Erling Smørgrav 	if ((*q != '\0' && *q != '/' && *q != ':') || q - p > MAXHOSTNAMELEN)
406*8d9de5b1SDag-Erling Smørgrav 		goto ouch;
407*8d9de5b1SDag-Erling Smørgrav 	for (i = 0; p + i < q; i++)
408*8d9de5b1SDag-Erling Smørgrav 		u->host[i] = tolower((unsigned char)p[i]);
409*8d9de5b1SDag-Erling Smørgrav 	u->host[i] = '\0';
410*8d9de5b1SDag-Erling Smørgrav 	p = q;
4114ca1ab94SDag-Erling Smørgrav 
4124ca1ab94SDag-Erling Smørgrav 	/* port */
4134ca1ab94SDag-Erling Smørgrav 	if (*p == ':') {
414*8d9de5b1SDag-Erling Smørgrav 		for (n = 0, q = ++p; *q && (*q != '/'); q++) {
415*8d9de5b1SDag-Erling Smørgrav 			if (*q >= '0' && *q <= '9' && n < INT_MAX / 10) {
416*8d9de5b1SDag-Erling Smørgrav 				n = n * 10 + (*q - '0');
417*8d9de5b1SDag-Erling Smørgrav 			} else {
418d8acd8dcSDag-Erling Smørgrav 				/* invalid port */
419a1b37df2SDag-Erling Smørgrav 				url_seterr(URL_BAD_PORT);
420d8acd8dcSDag-Erling Smørgrav 				goto ouch;
421d8acd8dcSDag-Erling Smørgrav 			}
422*8d9de5b1SDag-Erling Smørgrav 		}
423*8d9de5b1SDag-Erling Smørgrav 		if (n < 1 || n > IPPORT_MAX)
424*8d9de5b1SDag-Erling Smørgrav 			goto ouch;
425*8d9de5b1SDag-Erling Smørgrav 		u->port = n;
426551858f0SDag-Erling Smørgrav 		p = q;
4274ca1ab94SDag-Erling Smørgrav 	}
4284ca1ab94SDag-Erling Smørgrav 
4294ca1ab94SDag-Erling Smørgrav nohost:
4304ca1ab94SDag-Erling Smørgrav 	/* document */
43160245e42SDag-Erling Smørgrav 	if (!*p)
43260245e42SDag-Erling Smørgrav 		p = "/";
43360245e42SDag-Erling Smørgrav 
434*8d9de5b1SDag-Erling Smørgrav 	if (strcmp(u->scheme, SCHEME_HTTP) == 0 ||
435*8d9de5b1SDag-Erling Smørgrav 	    strcmp(u->scheme, SCHEME_HTTPS) == 0) {
43623fe6d7aSDag-Erling Smørgrav 		const char hexnums[] = "0123456789abcdef";
43723fe6d7aSDag-Erling Smørgrav 
438f9c2053bSDag-Erling Smørgrav 		/* percent-escape whitespace. */
439f9c2053bSDag-Erling Smørgrav 		if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
440a1b37df2SDag-Erling Smørgrav 			fetch_syserr();
44123fe6d7aSDag-Erling Smørgrav 			goto ouch;
44223fe6d7aSDag-Erling Smørgrav 		}
443f9c2053bSDag-Erling Smørgrav 		u->doc = doc;
44423fe6d7aSDag-Erling Smørgrav 		while (*p != '\0') {
445facd9827SDag-Erling Smørgrav 			if (!isspace((unsigned char)*p)) {
44623fe6d7aSDag-Erling Smørgrav 				*doc++ = *p++;
44723fe6d7aSDag-Erling Smørgrav 			} else {
44823fe6d7aSDag-Erling Smørgrav 				*doc++ = '%';
44923fe6d7aSDag-Erling Smørgrav 				*doc++ = hexnums[((unsigned int)*p) >> 4];
45023fe6d7aSDag-Erling Smørgrav 				*doc++ = hexnums[((unsigned int)*p) & 0xf];
45123fe6d7aSDag-Erling Smørgrav 				p++;
45223fe6d7aSDag-Erling Smørgrav 			}
45323fe6d7aSDag-Erling Smørgrav 		}
45423fe6d7aSDag-Erling Smørgrav 		*doc = '\0';
45523fe6d7aSDag-Erling Smørgrav 	} else if ((u->doc = strdup(p)) == NULL) {
456a1b37df2SDag-Erling Smørgrav 		fetch_syserr();
4574ca1ab94SDag-Erling Smørgrav 		goto ouch;
458d8acd8dcSDag-Erling Smørgrav 	}
4594ca1ab94SDag-Erling Smørgrav 
460c5712d6dSDag-Erling Smørgrav 	DEBUGF("scheme:   \"%s\"\n"
46108a49957SDag-Erling Smørgrav 	    "user:     \"%s\"\n"
46208a49957SDag-Erling Smørgrav 	    "password: \"%s\"\n"
46308a49957SDag-Erling Smørgrav 	    "host:     \"%s\"\n"
46408a49957SDag-Erling Smørgrav 	    "port:     \"%d\"\n"
46508a49957SDag-Erling Smørgrav 	    "document: \"%s\"\n",
4664ca1ab94SDag-Erling Smørgrav 	    u->scheme, u->user, u->pwd,
467c5712d6dSDag-Erling Smørgrav 	    u->host, u->port, u->doc);
4684ca1ab94SDag-Erling Smørgrav 
469e19e6098SDag-Erling Smørgrav 	return (u);
4704ca1ab94SDag-Erling Smørgrav 
4714ca1ab94SDag-Erling Smørgrav ouch:
4724ca1ab94SDag-Erling Smørgrav 	free(u);
473e19e6098SDag-Erling Smørgrav 	return (NULL);
4744ca1ab94SDag-Erling Smørgrav }
47560245e42SDag-Erling Smørgrav 
47660245e42SDag-Erling Smørgrav /*
47760245e42SDag-Erling Smørgrav  * Free a URL
47860245e42SDag-Erling Smørgrav  */
47960245e42SDag-Erling Smørgrav void
48060245e42SDag-Erling Smørgrav fetchFreeURL(struct url *u)
48160245e42SDag-Erling Smørgrav {
48260245e42SDag-Erling Smørgrav 	free(u->doc);
48360245e42SDag-Erling Smørgrav 	free(u);
48460245e42SDag-Erling Smørgrav }
485