xref: /freebsd/lib/libfetch/fetch.c (revision d59a76183470685bdf0b88013d2baad1f04f030f)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1998-2004 Dag-Erling Smørgrav
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer
12  *    in this position and unchanged.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. The name of the author may not be used to endorse or promote products
17  *    derived from this software without specific prior written permission
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include <sys/param.h>
32 
33 #include <netinet/in.h>
34 
35 #include <errno.h>
36 #include <ctype.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 
41 #include "fetch.h"
42 #include "common.h"
43 
44 auth_t	 fetchAuthMethod;
45 int	 fetchLastErrCode;
46 char	 fetchLastErrString[MAXERRSTRING];
47 int	 fetchTimeout;
48 int	 fetchRestartCalls = 1;
49 int	 fetchDebug;
50 
51 
52 /*** Local data **************************************************************/
53 
54 /*
55  * Error messages for parser errors
56  */
57 #define URL_MALFORMED		1
58 #define URL_BAD_SCHEME		2
59 #define URL_BAD_PORT		3
60 static struct fetcherr url_errlist[] = {
61 	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
62 	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
63 	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
64 	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
65 };
66 
67 
68 /*** Public API **************************************************************/
69 
70 /*
71  * Select the appropriate protocol for the URL scheme, and return a
72  * read-only stream connected to the document referenced by the URL.
73  * Also fill out the struct url_stat.
74  */
75 FILE *
76 fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
77 {
78 
79 	if (us != NULL) {
80 		us->size = -1;
81 		us->atime = us->mtime = 0;
82 	}
83 	if (strcmp(URL->scheme, SCHEME_FILE) == 0)
84 		return (fetchXGetFile(URL, us, flags));
85 	else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
86 		return (fetchXGetFTP(URL, us, flags));
87 	else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
88 		return (fetchXGetHTTP(URL, us, flags));
89 	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
90 		return (fetchXGetHTTP(URL, us, flags));
91 	url_seterr(URL_BAD_SCHEME);
92 	return (NULL);
93 }
94 
95 /*
96  * Select the appropriate protocol for the URL scheme, and return a
97  * read-only stream connected to the document referenced by the URL.
98  */
99 FILE *
100 fetchGet(struct url *URL, const char *flags)
101 {
102 	return (fetchXGet(URL, NULL, flags));
103 }
104 
105 /*
106  * Select the appropriate protocol for the URL scheme, and return a
107  * write-only stream connected to the document referenced by the URL.
108  */
109 FILE *
110 fetchPut(struct url *URL, const char *flags)
111 {
112 
113 	if (strcmp(URL->scheme, SCHEME_FILE) == 0)
114 		return (fetchPutFile(URL, flags));
115 	else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
116 		return (fetchPutFTP(URL, flags));
117 	else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
118 		return (fetchPutHTTP(URL, flags));
119 	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
120 		return (fetchPutHTTP(URL, flags));
121 	url_seterr(URL_BAD_SCHEME);
122 	return (NULL);
123 }
124 
125 /*
126  * Select the appropriate protocol for the URL scheme, and return the
127  * size of the document referenced by the URL if it exists.
128  */
129 int
130 fetchStat(struct url *URL, struct url_stat *us, const char *flags)
131 {
132 
133 	if (us != NULL) {
134 		us->size = -1;
135 		us->atime = us->mtime = 0;
136 	}
137 	if (strcmp(URL->scheme, SCHEME_FILE) == 0)
138 		return (fetchStatFile(URL, us, flags));
139 	else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
140 		return (fetchStatFTP(URL, us, flags));
141 	else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
142 		return (fetchStatHTTP(URL, us, flags));
143 	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
144 		return (fetchStatHTTP(URL, us, flags));
145 	url_seterr(URL_BAD_SCHEME);
146 	return (-1);
147 }
148 
149 /*
150  * Select the appropriate protocol for the URL scheme, and return a
151  * list of files in the directory pointed to by the URL.
152  */
153 struct url_ent *
154 fetchList(struct url *URL, const char *flags)
155 {
156 
157 	if (strcmp(URL->scheme, SCHEME_FILE) == 0)
158 		return (fetchListFile(URL, flags));
159 	else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
160 		return (fetchListFTP(URL, flags));
161 	else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
162 		return (fetchListHTTP(URL, flags));
163 	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
164 		return (fetchListHTTP(URL, flags));
165 	url_seterr(URL_BAD_SCHEME);
166 	return (NULL);
167 }
168 
169 /*
170  * Attempt to parse the given URL; if successful, call fetchXGet().
171  */
172 FILE *
173 fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
174 {
175 	struct url *u;
176 	FILE *f;
177 
178 	if ((u = fetchParseURL(URL)) == NULL)
179 		return (NULL);
180 
181 	f = fetchXGet(u, us, flags);
182 
183 	fetchFreeURL(u);
184 	return (f);
185 }
186 
187 /*
188  * Attempt to parse the given URL; if successful, call fetchGet().
189  */
190 FILE *
191 fetchGetURL(const char *URL, const char *flags)
192 {
193 	return (fetchXGetURL(URL, NULL, flags));
194 }
195 
196 /*
197  * Attempt to parse the given URL; if successful, call fetchPut().
198  */
199 FILE *
200 fetchPutURL(const char *URL, const char *flags)
201 {
202 	struct url *u;
203 	FILE *f;
204 
205 	if ((u = fetchParseURL(URL)) == NULL)
206 		return (NULL);
207 
208 	f = fetchPut(u, flags);
209 
210 	fetchFreeURL(u);
211 	return (f);
212 }
213 
214 /*
215  * Attempt to parse the given URL; if successful, call fetchStat().
216  */
217 int
218 fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
219 {
220 	struct url *u;
221 	int s;
222 
223 	if ((u = fetchParseURL(URL)) == NULL)
224 		return (-1);
225 
226 	s = fetchStat(u, us, flags);
227 
228 	fetchFreeURL(u);
229 	return (s);
230 }
231 
232 /*
233  * Attempt to parse the given URL; if successful, call fetchList().
234  */
235 struct url_ent *
236 fetchListURL(const char *URL, const char *flags)
237 {
238 	struct url *u;
239 	struct url_ent *ue;
240 
241 	if ((u = fetchParseURL(URL)) == NULL)
242 		return (NULL);
243 
244 	ue = fetchList(u, flags);
245 
246 	fetchFreeURL(u);
247 	return (ue);
248 }
249 
250 /*
251  * Make a URL
252  */
253 struct url *
254 fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
255     const char *user, const char *pwd)
256 {
257 	struct url *u;
258 
259 	if (!scheme || (!host && !doc)) {
260 		url_seterr(URL_MALFORMED);
261 		return (NULL);
262 	}
263 
264 	if (port < 0 || port > 65535) {
265 		url_seterr(URL_BAD_PORT);
266 		return (NULL);
267 	}
268 
269 	/* allocate struct url */
270 	if ((u = calloc(1, sizeof(*u))) == NULL) {
271 		fetch_syserr();
272 		return (NULL);
273 	}
274 	u->netrcfd = -1;
275 
276 	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
277 		fetch_syserr();
278 		free(u);
279 		return (NULL);
280 	}
281 
282 #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
283 	seturl(scheme);
284 	seturl(host);
285 	seturl(user);
286 	seturl(pwd);
287 #undef seturl
288 	u->port = port;
289 
290 	return (u);
291 }
292 
293 /*
294  * Return value of the given hex digit.
295  */
296 static int
297 fetch_hexval(char ch)
298 {
299 
300 	if (ch >= '0' && ch <= '9')
301 		return (ch - '0');
302 	else if (ch >= 'a' && ch <= 'f')
303 		return (ch - 'a' + 10);
304 	else if (ch >= 'A' && ch <= 'F')
305 		return (ch - 'A' + 10);
306 	return (-1);
307 }
308 
309 /*
310  * Decode percent-encoded URL component from src into dst, stopping at end
311  * of string, or at @ or : separators.  Returns a pointer to the unhandled
312  * part of the input string (null terminator, @, or :).  No terminator is
313  * written to dst (it is the caller's responsibility).
314  */
315 static const char *
316 fetch_pctdecode(char *dst, const char *src, size_t dlen)
317 {
318 	int d1, d2;
319 	char c;
320 	const char *s;
321 
322 	for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) {
323 		if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 &&
324 		    (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) {
325 			c = d1 << 4 | d2;
326 			s += 2;
327 		} else if (s[0] == '%') {
328 			/* Invalid escape sequence. */
329 			return (NULL);
330 		} else {
331 			c = *s;
332 		}
333 		if (dlen-- > 0)
334 			*dst++ = c;
335 		else
336 			return (NULL);
337 	}
338 	return (s);
339 }
340 
341 /*
342  * Split an URL into components. URL syntax is:
343  * [method:/][/[user[:pwd]@]host[:port]/][document]
344  * This almost, but not quite, RFC1738 URL syntax.
345  */
346 struct url *
347 fetchParseURL(const char *URL)
348 {
349 	char *doc;
350 	const char *p, *q;
351 	struct url *u;
352 	int i, n;
353 
354 	/* allocate struct url */
355 	if ((u = calloc(1, sizeof(*u))) == NULL) {
356 		fetch_syserr();
357 		return (NULL);
358 	}
359 	u->netrcfd = -1;
360 
361 	/* scheme name */
362 	if ((p = strstr(URL, ":/"))) {
363                 if (p - URL > URL_SCHEMELEN)
364                         goto ouch;
365                 for (i = 0; URL + i < p; i++)
366                         u->scheme[i] = tolower((unsigned char)URL[i]);
367 		URL = ++p;
368 		/*
369 		 * Only one slash: no host, leave slash as part of document
370 		 * Two slashes: host follows, strip slashes
371 		 */
372 		if (URL[1] == '/')
373 			URL = (p += 2);
374 	} else {
375 		p = URL;
376 	}
377 	if (!*URL || *URL == '/' || *URL == '.' ||
378 	    (u->scheme[0] == '\0' &&
379 		strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
380 		goto nohost;
381 
382 	p = strpbrk(URL, "/@");
383 	if (p && *p == '@') {
384 		/* username */
385 		q = fetch_pctdecode(u->user, URL, URL_USERLEN);
386 		if (q == NULL)
387 			goto ouch;
388 
389 		/* password */
390 		if (*q == ':') {
391 			q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN);
392 			if (q == NULL)
393 				goto ouch;
394 		}
395 		p++;
396 	} else {
397 		p = URL;
398 	}
399 
400 	/* hostname */
401 	if (*p == '[') {
402 		q = p + 1 + strspn(p + 1, ":0123456789ABCDEFabcdef.");
403 		if (*q++ != ']')
404 			goto ouch;
405 	} else {
406 		/* valid characters in a DNS name */
407 		q = p + strspn(p, "-." "0123456789"
408 		    "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "_"
409 		    "abcdefghijklmnopqrstuvwxyz");
410 	}
411 	if ((*q != '\0' && *q != '/' && *q != ':') || q - p > MAXHOSTNAMELEN)
412 		goto ouch;
413 	for (i = 0; p + i < q; i++)
414 		u->host[i] = tolower((unsigned char)p[i]);
415 	u->host[i] = '\0';
416 	p = q;
417 
418 	/* port */
419 	if (*p == ':') {
420 		for (n = 0, q = ++p; *q && (*q != '/'); q++) {
421 			if (*q >= '0' && *q <= '9' && n < INT_MAX / 10) {
422 				n = n * 10 + (*q - '0');
423 			} else {
424 				/* invalid port */
425 				url_seterr(URL_BAD_PORT);
426 				goto ouch;
427 			}
428 		}
429 		if (p != q && (n < 1 || n > IPPORT_MAX))
430 			goto ouch;
431 		u->port = n;
432 		p = q;
433 	}
434 
435 nohost:
436 	/* document */
437 	if (!*p)
438 		p = "/";
439 
440 	if (strcmp(u->scheme, SCHEME_HTTP) == 0 ||
441 	    strcmp(u->scheme, SCHEME_HTTPS) == 0) {
442 		const char hexnums[] = "0123456789abcdef";
443 
444 		/* percent-escape whitespace. */
445 		if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
446 			fetch_syserr();
447 			goto ouch;
448 		}
449 		u->doc = doc;
450 		/* fragments are reserved for client-side processing, see
451 		 * https://www.rfc-editor.org/rfc/rfc9110.html#section-7.1
452 		 */
453 		while (*p != '\0' && *p != '#') {
454 			if (!isspace((unsigned char)*p)) {
455 				*doc++ = *p++;
456 			} else {
457 				*doc++ = '%';
458 				*doc++ = hexnums[((unsigned int)*p) >> 4];
459 				*doc++ = hexnums[((unsigned int)*p) & 0xf];
460 				p++;
461 			}
462 		}
463 		*doc = '\0';
464 	} else if ((u->doc = strdup(p)) == NULL) {
465 		fetch_syserr();
466 		goto ouch;
467 	}
468 
469 	DEBUGF("scheme:   \"%s\"\n"
470 	    "user:     \"%s\"\n"
471 	    "password: \"%s\"\n"
472 	    "host:     \"%s\"\n"
473 	    "port:     \"%d\"\n"
474 	    "document: \"%s\"\n",
475 	    u->scheme, u->user, u->pwd,
476 	    u->host, u->port, u->doc);
477 
478 	return (u);
479 
480 ouch:
481 	free(u);
482 	return (NULL);
483 }
484 
485 /*
486  * Free a URL
487  */
488 void
489 fetchFreeURL(struct url *u)
490 {
491 	free(u->doc);
492 	free(u);
493 }
494