14ca1ab94SDag-Erling Smørgrav /*-
25e53a4f9SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause
35e53a4f9SPedro F. Giffuni *
4578153f1SDag-Erling Smørgrav * Copyright (c) 1998-2004 Dag-Erling Smørgrav
54ca1ab94SDag-Erling Smørgrav * All rights reserved.
64ca1ab94SDag-Erling Smørgrav *
74ca1ab94SDag-Erling Smørgrav * Redistribution and use in source and binary forms, with or without
84ca1ab94SDag-Erling Smørgrav * modification, are permitted provided that the following conditions
94ca1ab94SDag-Erling Smørgrav * are met:
104ca1ab94SDag-Erling Smørgrav * 1. Redistributions of source code must retain the above copyright
114ca1ab94SDag-Erling Smørgrav * notice, this list of conditions and the following disclaimer
124ca1ab94SDag-Erling Smørgrav * in this position and unchanged.
134ca1ab94SDag-Erling Smørgrav * 2. Redistributions in binary form must reproduce the above copyright
144ca1ab94SDag-Erling Smørgrav * notice, this list of conditions and the following disclaimer in the
154ca1ab94SDag-Erling Smørgrav * documentation and/or other materials provided with the distribution.
164ca1ab94SDag-Erling Smørgrav * 3. The name of the author may not be used to endorse or promote products
174ca1ab94SDag-Erling Smørgrav * derived from this software without specific prior written permission
184ca1ab94SDag-Erling Smørgrav *
194ca1ab94SDag-Erling Smørgrav * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
204ca1ab94SDag-Erling Smørgrav * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
214ca1ab94SDag-Erling Smørgrav * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
224ca1ab94SDag-Erling Smørgrav * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
234ca1ab94SDag-Erling Smørgrav * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
244ca1ab94SDag-Erling Smørgrav * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
254ca1ab94SDag-Erling Smørgrav * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
264ca1ab94SDag-Erling Smørgrav * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
274ca1ab94SDag-Erling Smørgrav * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
284ca1ab94SDag-Erling Smørgrav * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
294ca1ab94SDag-Erling Smørgrav */
304ca1ab94SDag-Erling Smørgrav
314ca1ab94SDag-Erling Smørgrav #include <sys/param.h>
324ca1ab94SDag-Erling Smørgrav
338d9de5b1SDag-Erling Smørgrav #include <netinet/in.h>
348d9de5b1SDag-Erling Smørgrav
358d9de5b1SDag-Erling Smørgrav #include <errno.h>
364ca1ab94SDag-Erling Smørgrav #include <ctype.h>
374ca1ab94SDag-Erling Smørgrav #include <stdio.h>
384ca1ab94SDag-Erling Smørgrav #include <stdlib.h>
394ca1ab94SDag-Erling Smørgrav #include <string.h>
404ca1ab94SDag-Erling Smørgrav
414ca1ab94SDag-Erling Smørgrav #include "fetch.h"
42d8acd8dcSDag-Erling Smørgrav #include "common.h"
434ca1ab94SDag-Erling Smørgrav
446490b215SDag-Erling Smørgrav auth_t fetchAuthMethod;
450fba3a00SDag-Erling Smørgrav int fetchLastErrCode;
46ba101983SDag-Erling Smørgrav char fetchLastErrString[MAXERRSTRING];
47fc6e9e65SDag-Erling Smørgrav int fetchTimeout;
48a1bb3f48SDag-Erling Smørgrav int fetchRestartCalls = 1;
497eb2f34dSDag-Erling Smørgrav int fetchDebug;
500fba3a00SDag-Erling Smørgrav
510fba3a00SDag-Erling Smørgrav
52d8acd8dcSDag-Erling Smørgrav /*** Local data **************************************************************/
53d8acd8dcSDag-Erling Smørgrav
54d8acd8dcSDag-Erling Smørgrav /*
55d8acd8dcSDag-Erling Smørgrav * Error messages for parser errors
56d8acd8dcSDag-Erling Smørgrav */
57d8acd8dcSDag-Erling Smørgrav #define URL_MALFORMED 1
58d8acd8dcSDag-Erling Smørgrav #define URL_BAD_SCHEME 2
59d8acd8dcSDag-Erling Smørgrav #define URL_BAD_PORT 3
60a1b37df2SDag-Erling Smørgrav static struct fetcherr url_errlist[] = {
61d8acd8dcSDag-Erling Smørgrav { URL_MALFORMED, FETCH_URL, "Malformed URL" },
62d8acd8dcSDag-Erling Smørgrav { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" },
63d8acd8dcSDag-Erling Smørgrav { URL_BAD_PORT, FETCH_URL, "Invalid server port" },
64d8acd8dcSDag-Erling Smørgrav { -1, FETCH_UNKNOWN, "Unknown parser error" }
65d8acd8dcSDag-Erling Smørgrav };
66d8acd8dcSDag-Erling Smørgrav
67d8acd8dcSDag-Erling Smørgrav
68d8acd8dcSDag-Erling Smørgrav /*** Public API **************************************************************/
694ca1ab94SDag-Erling Smørgrav
70842a95ccSDag-Erling Smørgrav /*
71842a95ccSDag-Erling Smørgrav * Select the appropriate protocol for the URL scheme, and return a
72842a95ccSDag-Erling Smørgrav * read-only stream connected to the document referenced by the URL.
731a5faa10SDag-Erling Smørgrav * Also fill out the struct url_stat.
74842a95ccSDag-Erling Smørgrav */
75ecc91352SDag-Erling Smørgrav FILE *
fetchXGet(struct url * URL,struct url_stat * us,const char * flags)7638c7e4a6SArchie Cobbs fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
77ecc91352SDag-Erling Smørgrav {
78c97925adSHajimu UMEMOTO
7902e6bec1SDag-Erling Smørgrav if (us != NULL) {
8002e6bec1SDag-Erling Smørgrav us->size = -1;
8102e6bec1SDag-Erling Smørgrav us->atime = us->mtime = 0;
8202e6bec1SDag-Erling Smørgrav }
838d9de5b1SDag-Erling Smørgrav if (strcmp(URL->scheme, SCHEME_FILE) == 0)
84e19e6098SDag-Erling Smørgrav return (fetchXGetFile(URL, us, flags));
858d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
86111e2510SDag-Erling Smørgrav return (fetchXGetFTP(URL, us, flags));
878d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
88e19e6098SDag-Erling Smørgrav return (fetchXGetHTTP(URL, us, flags));
898d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
90111e2510SDag-Erling Smørgrav return (fetchXGetHTTP(URL, us, flags));
91a1b37df2SDag-Erling Smørgrav url_seterr(URL_BAD_SCHEME);
92e19e6098SDag-Erling Smørgrav return (NULL);
93d8acd8dcSDag-Erling Smørgrav }
94ecc91352SDag-Erling Smørgrav
95842a95ccSDag-Erling Smørgrav /*
96842a95ccSDag-Erling Smørgrav * Select the appropriate protocol for the URL scheme, and return a
971a5faa10SDag-Erling Smørgrav * read-only stream connected to the document referenced by the URL.
981a5faa10SDag-Erling Smørgrav */
991a5faa10SDag-Erling Smørgrav FILE *
fetchGet(struct url * URL,const char * flags)10038c7e4a6SArchie Cobbs fetchGet(struct url *URL, const char *flags)
1011a5faa10SDag-Erling Smørgrav {
102e19e6098SDag-Erling Smørgrav return (fetchXGet(URL, NULL, flags));
1031a5faa10SDag-Erling Smørgrav }
1041a5faa10SDag-Erling Smørgrav
1051a5faa10SDag-Erling Smørgrav /*
1061a5faa10SDag-Erling Smørgrav * Select the appropriate protocol for the URL scheme, and return a
107842a95ccSDag-Erling Smørgrav * write-only stream connected to the document referenced by the URL.
108842a95ccSDag-Erling Smørgrav */
109ecc91352SDag-Erling Smørgrav FILE *
fetchPut(struct url * URL,const char * flags)11038c7e4a6SArchie Cobbs fetchPut(struct url *URL, const char *flags)
111ecc91352SDag-Erling Smørgrav {
112c97925adSHajimu UMEMOTO
1138d9de5b1SDag-Erling Smørgrav if (strcmp(URL->scheme, SCHEME_FILE) == 0)
114e19e6098SDag-Erling Smørgrav return (fetchPutFile(URL, flags));
1158d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
116111e2510SDag-Erling Smørgrav return (fetchPutFTP(URL, flags));
1178d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
118e19e6098SDag-Erling Smørgrav return (fetchPutHTTP(URL, flags));
1198d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
120111e2510SDag-Erling Smørgrav return (fetchPutHTTP(URL, flags));
121a1b37df2SDag-Erling Smørgrav url_seterr(URL_BAD_SCHEME);
122e19e6098SDag-Erling Smørgrav return (NULL);
123d8acd8dcSDag-Erling Smørgrav }
124d8acd8dcSDag-Erling Smørgrav
125d8acd8dcSDag-Erling Smørgrav /*
126d8acd8dcSDag-Erling Smørgrav * Select the appropriate protocol for the URL scheme, and return the
127d8acd8dcSDag-Erling Smørgrav * size of the document referenced by the URL if it exists.
128d8acd8dcSDag-Erling Smørgrav */
129d8acd8dcSDag-Erling Smørgrav int
fetchStat(struct url * URL,struct url_stat * us,const char * flags)13038c7e4a6SArchie Cobbs fetchStat(struct url *URL, struct url_stat *us, const char *flags)
131d8acd8dcSDag-Erling Smørgrav {
132c97925adSHajimu UMEMOTO
13302e6bec1SDag-Erling Smørgrav if (us != NULL) {
13402e6bec1SDag-Erling Smørgrav us->size = -1;
13502e6bec1SDag-Erling Smørgrav us->atime = us->mtime = 0;
13602e6bec1SDag-Erling Smørgrav }
1378d9de5b1SDag-Erling Smørgrav if (strcmp(URL->scheme, SCHEME_FILE) == 0)
138e19e6098SDag-Erling Smørgrav return (fetchStatFile(URL, us, flags));
1398d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
140e19e6098SDag-Erling Smørgrav return (fetchStatFTP(URL, us, flags));
1418d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
142111e2510SDag-Erling Smørgrav return (fetchStatHTTP(URL, us, flags));
1438d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
144111e2510SDag-Erling Smørgrav return (fetchStatHTTP(URL, us, flags));
145a1b37df2SDag-Erling Smørgrav url_seterr(URL_BAD_SCHEME);
146e19e6098SDag-Erling Smørgrav return (-1);
147ecc91352SDag-Erling Smørgrav }
148ecc91352SDag-Erling Smørgrav
149842a95ccSDag-Erling Smørgrav /*
150ce71b736SDag-Erling Smørgrav * Select the appropriate protocol for the URL scheme, and return a
151ce71b736SDag-Erling Smørgrav * list of files in the directory pointed to by the URL.
152ce71b736SDag-Erling Smørgrav */
153ce71b736SDag-Erling Smørgrav struct url_ent *
fetchList(struct url * URL,const char * flags)15438c7e4a6SArchie Cobbs fetchList(struct url *URL, const char *flags)
155ce71b736SDag-Erling Smørgrav {
156c97925adSHajimu UMEMOTO
1578d9de5b1SDag-Erling Smørgrav if (strcmp(URL->scheme, SCHEME_FILE) == 0)
158e19e6098SDag-Erling Smørgrav return (fetchListFile(URL, flags));
1598d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
160e19e6098SDag-Erling Smørgrav return (fetchListFTP(URL, flags));
1618d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
162111e2510SDag-Erling Smørgrav return (fetchListHTTP(URL, flags));
1638d9de5b1SDag-Erling Smørgrav else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
164111e2510SDag-Erling Smørgrav return (fetchListHTTP(URL, flags));
165a1b37df2SDag-Erling Smørgrav url_seterr(URL_BAD_SCHEME);
166e19e6098SDag-Erling Smørgrav return (NULL);
167ce71b736SDag-Erling Smørgrav }
168ce71b736SDag-Erling Smørgrav
169ce71b736SDag-Erling Smørgrav /*
1701a5faa10SDag-Erling Smørgrav * Attempt to parse the given URL; if successful, call fetchXGet().
171842a95ccSDag-Erling Smørgrav */
1724ca1ab94SDag-Erling Smørgrav FILE *
fetchXGetURL(const char * URL,struct url_stat * us,const char * flags)17338c7e4a6SArchie Cobbs fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
1744ca1ab94SDag-Erling Smørgrav {
175d8acd8dcSDag-Erling Smørgrav struct url *u;
1764ca1ab94SDag-Erling Smørgrav FILE *f;
1774ca1ab94SDag-Erling Smørgrav
1784ca1ab94SDag-Erling Smørgrav if ((u = fetchParseURL(URL)) == NULL)
179e19e6098SDag-Erling Smørgrav return (NULL);
1804ca1ab94SDag-Erling Smørgrav
1811a5faa10SDag-Erling Smørgrav f = fetchXGet(u, us, flags);
1824ca1ab94SDag-Erling Smørgrav
18360245e42SDag-Erling Smørgrav fetchFreeURL(u);
184e19e6098SDag-Erling Smørgrav return (f);
1854ca1ab94SDag-Erling Smørgrav }
1864ca1ab94SDag-Erling Smørgrav
1871a5faa10SDag-Erling Smørgrav /*
1881a5faa10SDag-Erling Smørgrav * Attempt to parse the given URL; if successful, call fetchGet().
1891a5faa10SDag-Erling Smørgrav */
1901a5faa10SDag-Erling Smørgrav FILE *
fetchGetURL(const char * URL,const char * flags)19138c7e4a6SArchie Cobbs fetchGetURL(const char *URL, const char *flags)
1921a5faa10SDag-Erling Smørgrav {
193e19e6098SDag-Erling Smørgrav return (fetchXGetURL(URL, NULL, flags));
1941a5faa10SDag-Erling Smørgrav }
1954ca1ab94SDag-Erling Smørgrav
196842a95ccSDag-Erling Smørgrav /*
197842a95ccSDag-Erling Smørgrav * Attempt to parse the given URL; if successful, call fetchPut().
198842a95ccSDag-Erling Smørgrav */
1994ca1ab94SDag-Erling Smørgrav FILE *
fetchPutURL(const char * URL,const char * flags)20038c7e4a6SArchie Cobbs fetchPutURL(const char *URL, const char *flags)
2014ca1ab94SDag-Erling Smørgrav {
202d8acd8dcSDag-Erling Smørgrav struct url *u;
2034ca1ab94SDag-Erling Smørgrav FILE *f;
2044ca1ab94SDag-Erling Smørgrav
2054ca1ab94SDag-Erling Smørgrav if ((u = fetchParseURL(URL)) == NULL)
206e19e6098SDag-Erling Smørgrav return (NULL);
2074ca1ab94SDag-Erling Smørgrav
208ecc91352SDag-Erling Smørgrav f = fetchPut(u, flags);
2094ca1ab94SDag-Erling Smørgrav
21060245e42SDag-Erling Smørgrav fetchFreeURL(u);
211e19e6098SDag-Erling Smørgrav return (f);
2124ca1ab94SDag-Erling Smørgrav }
2134ca1ab94SDag-Erling Smørgrav
2144ca1ab94SDag-Erling Smørgrav /*
215d8acd8dcSDag-Erling Smørgrav * Attempt to parse the given URL; if successful, call fetchStat().
216d8acd8dcSDag-Erling Smørgrav */
217d8acd8dcSDag-Erling Smørgrav int
fetchStatURL(const char * URL,struct url_stat * us,const char * flags)21838c7e4a6SArchie Cobbs fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
219d8acd8dcSDag-Erling Smørgrav {
220d8acd8dcSDag-Erling Smørgrav struct url *u;
221d8acd8dcSDag-Erling Smørgrav int s;
222d8acd8dcSDag-Erling Smørgrav
223d8acd8dcSDag-Erling Smørgrav if ((u = fetchParseURL(URL)) == NULL)
224e19e6098SDag-Erling Smørgrav return (-1);
225d8acd8dcSDag-Erling Smørgrav
226d8acd8dcSDag-Erling Smørgrav s = fetchStat(u, us, flags);
227d8acd8dcSDag-Erling Smørgrav
22860245e42SDag-Erling Smørgrav fetchFreeURL(u);
229e19e6098SDag-Erling Smørgrav return (s);
230d8acd8dcSDag-Erling Smørgrav }
231d8acd8dcSDag-Erling Smørgrav
232d8acd8dcSDag-Erling Smørgrav /*
233ce71b736SDag-Erling Smørgrav * Attempt to parse the given URL; if successful, call fetchList().
234ce71b736SDag-Erling Smørgrav */
235ce71b736SDag-Erling Smørgrav struct url_ent *
fetchListURL(const char * URL,const char * flags)23638c7e4a6SArchie Cobbs fetchListURL(const char *URL, const char *flags)
237ce71b736SDag-Erling Smørgrav {
238ce71b736SDag-Erling Smørgrav struct url *u;
239ce71b736SDag-Erling Smørgrav struct url_ent *ue;
240ce71b736SDag-Erling Smørgrav
241ce71b736SDag-Erling Smørgrav if ((u = fetchParseURL(URL)) == NULL)
242e19e6098SDag-Erling Smørgrav return (NULL);
243ce71b736SDag-Erling Smørgrav
244ce71b736SDag-Erling Smørgrav ue = fetchList(u, flags);
245ce71b736SDag-Erling Smørgrav
24660245e42SDag-Erling Smørgrav fetchFreeURL(u);
247e19e6098SDag-Erling Smørgrav return (ue);
248ce71b736SDag-Erling Smørgrav }
249ce71b736SDag-Erling Smørgrav
250ce71b736SDag-Erling Smørgrav /*
2519a964d6aSDag-Erling Smørgrav * Make a URL
2529a964d6aSDag-Erling Smørgrav */
2539a964d6aSDag-Erling Smørgrav struct url *
fetchMakeURL(const char * scheme,const char * host,int port,const char * doc,const char * user,const char * pwd)25438c7e4a6SArchie Cobbs fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
25538c7e4a6SArchie Cobbs const char *user, const char *pwd)
2569a964d6aSDag-Erling Smørgrav {
2579a964d6aSDag-Erling Smørgrav struct url *u;
2589a964d6aSDag-Erling Smørgrav
2599a964d6aSDag-Erling Smørgrav if (!scheme || (!host && !doc)) {
260a1b37df2SDag-Erling Smørgrav url_seterr(URL_MALFORMED);
261e19e6098SDag-Erling Smørgrav return (NULL);
2629a964d6aSDag-Erling Smørgrav }
2639a964d6aSDag-Erling Smørgrav
2649a964d6aSDag-Erling Smørgrav if (port < 0 || port > 65535) {
265a1b37df2SDag-Erling Smørgrav url_seterr(URL_BAD_PORT);
266e19e6098SDag-Erling Smørgrav return (NULL);
2679a964d6aSDag-Erling Smørgrav }
2689a964d6aSDag-Erling Smørgrav
2699a964d6aSDag-Erling Smørgrav /* allocate struct url */
270930105c1SDag-Erling Smørgrav if ((u = calloc(1, sizeof(*u))) == NULL) {
271a1b37df2SDag-Erling Smørgrav fetch_syserr();
272e19e6098SDag-Erling Smørgrav return (NULL);
2739a964d6aSDag-Erling Smørgrav }
2745f04ebd4SDag-Erling Smørgrav u->netrcfd = -1;
2759a964d6aSDag-Erling Smørgrav
2769a964d6aSDag-Erling Smørgrav if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
277a1b37df2SDag-Erling Smørgrav fetch_syserr();
2789a964d6aSDag-Erling Smørgrav free(u);
279e19e6098SDag-Erling Smørgrav return (NULL);
2809a964d6aSDag-Erling Smørgrav }
2819a964d6aSDag-Erling Smørgrav
282930105c1SDag-Erling Smørgrav #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
2839a964d6aSDag-Erling Smørgrav seturl(scheme);
2849a964d6aSDag-Erling Smørgrav seturl(host);
2859a964d6aSDag-Erling Smørgrav seturl(user);
2869a964d6aSDag-Erling Smørgrav seturl(pwd);
2879a964d6aSDag-Erling Smørgrav #undef seturl
2889a964d6aSDag-Erling Smørgrav u->port = port;
2899a964d6aSDag-Erling Smørgrav
290e19e6098SDag-Erling Smørgrav return (u);
2919a964d6aSDag-Erling Smørgrav }
2929a964d6aSDag-Erling Smørgrav
2939a964d6aSDag-Erling Smørgrav /*
2940fa39199SEd Maste * Return value of the given hex digit.
2950fa39199SEd Maste */
2960fa39199SEd Maste static int
fetch_hexval(char ch)2970fa39199SEd Maste fetch_hexval(char ch)
2980fa39199SEd Maste {
2990fa39199SEd Maste
3000fa39199SEd Maste if (ch >= '0' && ch <= '9')
3010fa39199SEd Maste return (ch - '0');
3020fa39199SEd Maste else if (ch >= 'a' && ch <= 'f')
3030fa39199SEd Maste return (ch - 'a' + 10);
3040fa39199SEd Maste else if (ch >= 'A' && ch <= 'F')
3050fa39199SEd Maste return (ch - 'A' + 10);
3060fa39199SEd Maste return (-1);
3070fa39199SEd Maste }
3080fa39199SEd Maste
3090fa39199SEd Maste /*
3100fa39199SEd Maste * Decode percent-encoded URL component from src into dst, stopping at end
3110fa39199SEd Maste * of string, or at @ or : separators. Returns a pointer to the unhandled
3120fa39199SEd Maste * part of the input string (null terminator, @, or :). No terminator is
3130fa39199SEd Maste * written to dst (it is the caller's responsibility).
3140fa39199SEd Maste */
3150fa39199SEd Maste static const char *
fetch_pctdecode(char * dst,const char * src,size_t dlen)3160fa39199SEd Maste fetch_pctdecode(char *dst, const char *src, size_t dlen)
3170fa39199SEd Maste {
3180fa39199SEd Maste int d1, d2;
3190fa39199SEd Maste char c;
3200fa39199SEd Maste const char *s;
3210fa39199SEd Maste
3220fa39199SEd Maste for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) {
3230fa39199SEd Maste if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 &&
3240fa39199SEd Maste (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) {
3250fa39199SEd Maste c = d1 << 4 | d2;
3260fa39199SEd Maste s += 2;
32783372bdaSEd Maste } else if (s[0] == '%') {
32883372bdaSEd Maste /* Invalid escape sequence. */
32983372bdaSEd Maste return (NULL);
3300fa39199SEd Maste } else {
3310fa39199SEd Maste c = *s;
3320fa39199SEd Maste }
3330fa39199SEd Maste if (dlen-- > 0)
3340fa39199SEd Maste *dst++ = c;
3356fb3f994SGordon Tetlow else
3366fb3f994SGordon Tetlow return (NULL);
3370fa39199SEd Maste }
3380fa39199SEd Maste return (s);
3390fa39199SEd Maste }
3400fa39199SEd Maste
3410fa39199SEd Maste /*
3424ca1ab94SDag-Erling Smørgrav * Split an URL into components. URL syntax is:
34359769ab1SDag-Erling Smørgrav * [method:/][/[user[:pwd]@]host[:port]/][document]
3444ca1ab94SDag-Erling Smørgrav * This almost, but not quite, RFC1738 URL syntax.
3454ca1ab94SDag-Erling Smørgrav */
346d8acd8dcSDag-Erling Smørgrav struct url *
fetchParseURL(const char * URL)34738c7e4a6SArchie Cobbs fetchParseURL(const char *URL)
3484ca1ab94SDag-Erling Smørgrav {
34938c7e4a6SArchie Cobbs char *doc;
35038c7e4a6SArchie Cobbs const char *p, *q;
351d8acd8dcSDag-Erling Smørgrav struct url *u;
3528d9de5b1SDag-Erling Smørgrav int i, n;
3534ca1ab94SDag-Erling Smørgrav
354d8acd8dcSDag-Erling Smørgrav /* allocate struct url */
355930105c1SDag-Erling Smørgrav if ((u = calloc(1, sizeof(*u))) == NULL) {
356a1b37df2SDag-Erling Smørgrav fetch_syserr();
357e19e6098SDag-Erling Smørgrav return (NULL);
358d8acd8dcSDag-Erling Smørgrav }
3595f04ebd4SDag-Erling Smørgrav u->netrcfd = -1;
3604ca1ab94SDag-Erling Smørgrav
3614ca1ab94SDag-Erling Smørgrav /* scheme name */
36259769ab1SDag-Erling Smørgrav if ((p = strstr(URL, ":/"))) {
3638d9de5b1SDag-Erling Smørgrav if (p - URL > URL_SCHEMELEN)
3648d9de5b1SDag-Erling Smørgrav goto ouch;
3658d9de5b1SDag-Erling Smørgrav for (i = 0; URL + i < p; i++)
3668d9de5b1SDag-Erling Smørgrav u->scheme[i] = tolower((unsigned char)URL[i]);
36759769ab1SDag-Erling Smørgrav URL = ++p;
36859769ab1SDag-Erling Smørgrav /*
36959769ab1SDag-Erling Smørgrav * Only one slash: no host, leave slash as part of document
37059769ab1SDag-Erling Smørgrav * Two slashes: host follows, strip slashes
37159769ab1SDag-Erling Smørgrav */
37259769ab1SDag-Erling Smørgrav if (URL[1] == '/')
37359769ab1SDag-Erling Smørgrav URL = (p += 2);
3741ba84976SDag-Erling Smørgrav } else {
3751ba84976SDag-Erling Smørgrav p = URL;
376d8acd8dcSDag-Erling Smørgrav }
3775b2ad516SDag-Erling Smørgrav if (!*URL || *URL == '/' || *URL == '.' ||
37873b3e4dfSStefan Eßer (u->scheme[0] == '\0' &&
37973b3e4dfSStefan Eßer strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
3804ca1ab94SDag-Erling Smørgrav goto nohost;
3814ca1ab94SDag-Erling Smørgrav
3824ca1ab94SDag-Erling Smørgrav p = strpbrk(URL, "/@");
3830fba3a00SDag-Erling Smørgrav if (p && *p == '@') {
3844ca1ab94SDag-Erling Smørgrav /* username */
3850fa39199SEd Maste q = fetch_pctdecode(u->user, URL, URL_USERLEN);
3866fb3f994SGordon Tetlow if (q == NULL)
3876fb3f994SGordon Tetlow goto ouch;
3884ca1ab94SDag-Erling Smørgrav
3894ca1ab94SDag-Erling Smørgrav /* password */
3906fb3f994SGordon Tetlow if (*q == ':') {
3919bc22394STim Kientzle q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN);
3926fb3f994SGordon Tetlow if (q == NULL)
3936fb3f994SGordon Tetlow goto ouch;
3946fb3f994SGordon Tetlow }
3954ca1ab94SDag-Erling Smørgrav p++;
396ab39353eSDag-Erling Smørgrav } else {
397ab39353eSDag-Erling Smørgrav p = URL;
398ab39353eSDag-Erling Smørgrav }
3994ca1ab94SDag-Erling Smørgrav
4004ca1ab94SDag-Erling Smørgrav /* hostname */
4018d9de5b1SDag-Erling Smørgrav if (*p == '[') {
4028cd71a0eSKa Ho Ng q = p + 1 + strspn(p + 1, ":0123456789ABCDEFabcdef.");
4038d9de5b1SDag-Erling Smørgrav if (*q++ != ']')
4048d9de5b1SDag-Erling Smørgrav goto ouch;
40508a49957SDag-Erling Smørgrav } else {
4068d9de5b1SDag-Erling Smørgrav /* valid characters in a DNS name */
4078d9de5b1SDag-Erling Smørgrav q = p + strspn(p, "-." "0123456789"
4088d9de5b1SDag-Erling Smørgrav "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "_"
4098d9de5b1SDag-Erling Smørgrav "abcdefghijklmnopqrstuvwxyz");
41008a49957SDag-Erling Smørgrav }
4118d9de5b1SDag-Erling Smørgrav if ((*q != '\0' && *q != '/' && *q != ':') || q - p > MAXHOSTNAMELEN)
4128d9de5b1SDag-Erling Smørgrav goto ouch;
4138d9de5b1SDag-Erling Smørgrav for (i = 0; p + i < q; i++)
4148d9de5b1SDag-Erling Smørgrav u->host[i] = tolower((unsigned char)p[i]);
4158d9de5b1SDag-Erling Smørgrav u->host[i] = '\0';
4168d9de5b1SDag-Erling Smørgrav p = q;
4174ca1ab94SDag-Erling Smørgrav
4184ca1ab94SDag-Erling Smørgrav /* port */
4194ca1ab94SDag-Erling Smørgrav if (*p == ':') {
4208d9de5b1SDag-Erling Smørgrav for (n = 0, q = ++p; *q && (*q != '/'); q++) {
4218d9de5b1SDag-Erling Smørgrav if (*q >= '0' && *q <= '9' && n < INT_MAX / 10) {
4228d9de5b1SDag-Erling Smørgrav n = n * 10 + (*q - '0');
4238d9de5b1SDag-Erling Smørgrav } else {
424d8acd8dcSDag-Erling Smørgrav /* invalid port */
425a1b37df2SDag-Erling Smørgrav url_seterr(URL_BAD_PORT);
426d8acd8dcSDag-Erling Smørgrav goto ouch;
427d8acd8dcSDag-Erling Smørgrav }
4288d9de5b1SDag-Erling Smørgrav }
429fb860ed0SKa Ho Ng if (p != q && (n < 1 || n > IPPORT_MAX))
4308d9de5b1SDag-Erling Smørgrav goto ouch;
4318d9de5b1SDag-Erling Smørgrav u->port = n;
432551858f0SDag-Erling Smørgrav p = q;
4334ca1ab94SDag-Erling Smørgrav }
4344ca1ab94SDag-Erling Smørgrav
4354ca1ab94SDag-Erling Smørgrav nohost:
4364ca1ab94SDag-Erling Smørgrav /* document */
43760245e42SDag-Erling Smørgrav if (!*p)
43860245e42SDag-Erling Smørgrav p = "/";
43960245e42SDag-Erling Smørgrav
4408d9de5b1SDag-Erling Smørgrav if (strcmp(u->scheme, SCHEME_HTTP) == 0 ||
4418d9de5b1SDag-Erling Smørgrav strcmp(u->scheme, SCHEME_HTTPS) == 0) {
44223fe6d7aSDag-Erling Smørgrav const char hexnums[] = "0123456789abcdef";
44323fe6d7aSDag-Erling Smørgrav
444f9c2053bSDag-Erling Smørgrav /* percent-escape whitespace. */
445f9c2053bSDag-Erling Smørgrav if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
446a1b37df2SDag-Erling Smørgrav fetch_syserr();
44723fe6d7aSDag-Erling Smørgrav goto ouch;
44823fe6d7aSDag-Erling Smørgrav }
449f9c2053bSDag-Erling Smørgrav u->doc = doc;
450*1af7d5f3SPietro Cerutti /* fragments are reserved for client-side processing, see
451*1af7d5f3SPietro Cerutti * https://www.rfc-editor.org/rfc/rfc9110.html#section-7.1
452*1af7d5f3SPietro Cerutti */
453*1af7d5f3SPietro Cerutti while (*p != '\0' && *p != '#') {
454facd9827SDag-Erling Smørgrav if (!isspace((unsigned char)*p)) {
45523fe6d7aSDag-Erling Smørgrav *doc++ = *p++;
45623fe6d7aSDag-Erling Smørgrav } else {
45723fe6d7aSDag-Erling Smørgrav *doc++ = '%';
45823fe6d7aSDag-Erling Smørgrav *doc++ = hexnums[((unsigned int)*p) >> 4];
45923fe6d7aSDag-Erling Smørgrav *doc++ = hexnums[((unsigned int)*p) & 0xf];
46023fe6d7aSDag-Erling Smørgrav p++;
46123fe6d7aSDag-Erling Smørgrav }
46223fe6d7aSDag-Erling Smørgrav }
46323fe6d7aSDag-Erling Smørgrav *doc = '\0';
46423fe6d7aSDag-Erling Smørgrav } else if ((u->doc = strdup(p)) == NULL) {
465a1b37df2SDag-Erling Smørgrav fetch_syserr();
4664ca1ab94SDag-Erling Smørgrav goto ouch;
467d8acd8dcSDag-Erling Smørgrav }
4684ca1ab94SDag-Erling Smørgrav
469c5712d6dSDag-Erling Smørgrav DEBUGF("scheme: \"%s\"\n"
47008a49957SDag-Erling Smørgrav "user: \"%s\"\n"
47108a49957SDag-Erling Smørgrav "password: \"%s\"\n"
47208a49957SDag-Erling Smørgrav "host: \"%s\"\n"
47308a49957SDag-Erling Smørgrav "port: \"%d\"\n"
47408a49957SDag-Erling Smørgrav "document: \"%s\"\n",
4754ca1ab94SDag-Erling Smørgrav u->scheme, u->user, u->pwd,
476c5712d6dSDag-Erling Smørgrav u->host, u->port, u->doc);
4774ca1ab94SDag-Erling Smørgrav
478e19e6098SDag-Erling Smørgrav return (u);
4794ca1ab94SDag-Erling Smørgrav
4804ca1ab94SDag-Erling Smørgrav ouch:
4814ca1ab94SDag-Erling Smørgrav free(u);
482e19e6098SDag-Erling Smørgrav return (NULL);
4834ca1ab94SDag-Erling Smørgrav }
48460245e42SDag-Erling Smørgrav
48560245e42SDag-Erling Smørgrav /*
48660245e42SDag-Erling Smørgrav * Free a URL
48760245e42SDag-Erling Smørgrav */
48860245e42SDag-Erling Smørgrav void
fetchFreeURL(struct url * u)48960245e42SDag-Erling Smørgrav fetchFreeURL(struct url *u)
49060245e42SDag-Erling Smørgrav {
49160245e42SDag-Erling Smørgrav free(u->doc);
49260245e42SDag-Erling Smørgrav free(u);
49360245e42SDag-Erling Smørgrav }
494