xref: /freebsd/lib/libfetch/fetch.c (revision 3416500aef140042c64bc149cb1ec6620483bc44)
1 /*-
2  * Copyright (c) 1998-2004 Dag-Erling Smørgrav
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/errno.h>
34 
35 #include <ctype.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 
40 #include "fetch.h"
41 #include "common.h"
42 
43 auth_t	 fetchAuthMethod;
44 int	 fetchLastErrCode;
45 char	 fetchLastErrString[MAXERRSTRING];
46 int	 fetchTimeout;
47 int	 fetchRestartCalls = 1;
48 int	 fetchDebug;
49 
50 
51 /*** Local data **************************************************************/
52 
53 /*
54  * Error messages for parser errors
55  */
56 #define URL_MALFORMED		1
57 #define URL_BAD_SCHEME		2
58 #define URL_BAD_PORT		3
59 static struct fetcherr url_errlist[] = {
60 	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
61 	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
62 	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
63 	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
64 };
65 
66 
67 /*** Public API **************************************************************/
68 
69 /*
70  * Select the appropriate protocol for the URL scheme, and return a
71  * read-only stream connected to the document referenced by the URL.
72  * Also fill out the struct url_stat.
73  */
74 FILE *
75 fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
76 {
77 
78 	if (us != NULL) {
79 		us->size = -1;
80 		us->atime = us->mtime = 0;
81 	}
82 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
83 		return (fetchXGetFile(URL, us, flags));
84 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
85 		return (fetchXGetFTP(URL, us, flags));
86 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
87 		return (fetchXGetHTTP(URL, us, flags));
88 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
89 		return (fetchXGetHTTP(URL, us, flags));
90 	url_seterr(URL_BAD_SCHEME);
91 	return (NULL);
92 }
93 
94 /*
95  * Select the appropriate protocol for the URL scheme, and return a
96  * read-only stream connected to the document referenced by the URL.
97  */
98 FILE *
99 fetchGet(struct url *URL, const char *flags)
100 {
101 	return (fetchXGet(URL, NULL, flags));
102 }
103 
104 /*
105  * Select the appropriate protocol for the URL scheme, and return a
106  * write-only stream connected to the document referenced by the URL.
107  */
108 FILE *
109 fetchPut(struct url *URL, const char *flags)
110 {
111 
112 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
113 		return (fetchPutFile(URL, flags));
114 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
115 		return (fetchPutFTP(URL, flags));
116 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
117 		return (fetchPutHTTP(URL, flags));
118 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
119 		return (fetchPutHTTP(URL, flags));
120 	url_seterr(URL_BAD_SCHEME);
121 	return (NULL);
122 }
123 
124 /*
125  * Select the appropriate protocol for the URL scheme, and return the
126  * size of the document referenced by the URL if it exists.
127  */
128 int
129 fetchStat(struct url *URL, struct url_stat *us, const char *flags)
130 {
131 
132 	if (us != NULL) {
133 		us->size = -1;
134 		us->atime = us->mtime = 0;
135 	}
136 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
137 		return (fetchStatFile(URL, us, flags));
138 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
139 		return (fetchStatFTP(URL, us, flags));
140 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
141 		return (fetchStatHTTP(URL, us, flags));
142 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
143 		return (fetchStatHTTP(URL, us, flags));
144 	url_seterr(URL_BAD_SCHEME);
145 	return (-1);
146 }
147 
148 /*
149  * Select the appropriate protocol for the URL scheme, and return a
150  * list of files in the directory pointed to by the URL.
151  */
152 struct url_ent *
153 fetchList(struct url *URL, const char *flags)
154 {
155 
156 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
157 		return (fetchListFile(URL, flags));
158 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
159 		return (fetchListFTP(URL, flags));
160 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
161 		return (fetchListHTTP(URL, flags));
162 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
163 		return (fetchListHTTP(URL, flags));
164 	url_seterr(URL_BAD_SCHEME);
165 	return (NULL);
166 }
167 
168 /*
169  * Attempt to parse the given URL; if successful, call fetchXGet().
170  */
171 FILE *
172 fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
173 {
174 	struct url *u;
175 	FILE *f;
176 
177 	if ((u = fetchParseURL(URL)) == NULL)
178 		return (NULL);
179 
180 	f = fetchXGet(u, us, flags);
181 
182 	fetchFreeURL(u);
183 	return (f);
184 }
185 
186 /*
187  * Attempt to parse the given URL; if successful, call fetchGet().
188  */
189 FILE *
190 fetchGetURL(const char *URL, const char *flags)
191 {
192 	return (fetchXGetURL(URL, NULL, flags));
193 }
194 
195 /*
196  * Attempt to parse the given URL; if successful, call fetchPut().
197  */
198 FILE *
199 fetchPutURL(const char *URL, const char *flags)
200 {
201 	struct url *u;
202 	FILE *f;
203 
204 	if ((u = fetchParseURL(URL)) == NULL)
205 		return (NULL);
206 
207 	f = fetchPut(u, flags);
208 
209 	fetchFreeURL(u);
210 	return (f);
211 }
212 
213 /*
214  * Attempt to parse the given URL; if successful, call fetchStat().
215  */
216 int
217 fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
218 {
219 	struct url *u;
220 	int s;
221 
222 	if ((u = fetchParseURL(URL)) == NULL)
223 		return (-1);
224 
225 	s = fetchStat(u, us, flags);
226 
227 	fetchFreeURL(u);
228 	return (s);
229 }
230 
231 /*
232  * Attempt to parse the given URL; if successful, call fetchList().
233  */
234 struct url_ent *
235 fetchListURL(const char *URL, const char *flags)
236 {
237 	struct url *u;
238 	struct url_ent *ue;
239 
240 	if ((u = fetchParseURL(URL)) == NULL)
241 		return (NULL);
242 
243 	ue = fetchList(u, flags);
244 
245 	fetchFreeURL(u);
246 	return (ue);
247 }
248 
249 /*
250  * Make a URL
251  */
252 struct url *
253 fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
254     const char *user, const char *pwd)
255 {
256 	struct url *u;
257 
258 	if (!scheme || (!host && !doc)) {
259 		url_seterr(URL_MALFORMED);
260 		return (NULL);
261 	}
262 
263 	if (port < 0 || port > 65535) {
264 		url_seterr(URL_BAD_PORT);
265 		return (NULL);
266 	}
267 
268 	/* allocate struct url */
269 	if ((u = calloc(1, sizeof(*u))) == NULL) {
270 		fetch_syserr();
271 		return (NULL);
272 	}
273 
274 	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
275 		fetch_syserr();
276 		free(u);
277 		return (NULL);
278 	}
279 
280 #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
281 	seturl(scheme);
282 	seturl(host);
283 	seturl(user);
284 	seturl(pwd);
285 #undef seturl
286 	u->port = port;
287 	u->netrcfd = -2;
288 
289 	return (u);
290 }
291 
292 /*
293  * Return value of the given hex digit.
294  */
295 static int
296 fetch_hexval(char ch)
297 {
298 
299 	if (ch >= '0' && ch <= '9')
300 		return (ch - '0');
301 	else if (ch >= 'a' && ch <= 'f')
302 		return (ch - 'a' + 10);
303 	else if (ch >= 'A' && ch <= 'F')
304 		return (ch - 'A' + 10);
305 	return (-1);
306 }
307 
308 /*
309  * Decode percent-encoded URL component from src into dst, stopping at end
310  * of string, or at @ or : separators.  Returns a pointer to the unhandled
311  * part of the input string (null terminator, @, or :).  No terminator is
312  * written to dst (it is the caller's responsibility).
313  */
314 static const char *
315 fetch_pctdecode(char *dst, const char *src, size_t dlen)
316 {
317 	int d1, d2;
318 	char c;
319 	const char *s;
320 
321 	for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) {
322 		if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 &&
323 		    (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) {
324 			c = d1 << 4 | d2;
325 			s += 2;
326 		} else {
327 			c = *s;
328 		}
329 		if (dlen-- > 0)
330 			*dst++ = c;
331 	}
332 	return (s);
333 }
334 
335 /*
336  * Split an URL into components. URL syntax is:
337  * [method:/][/[user[:pwd]@]host[:port]/][document]
338  * This almost, but not quite, RFC1738 URL syntax.
339  */
340 struct url *
341 fetchParseURL(const char *URL)
342 {
343 	char *doc;
344 	const char *p, *q;
345 	struct url *u;
346 	int i;
347 
348 	/* allocate struct url */
349 	if ((u = calloc(1, sizeof(*u))) == NULL) {
350 		fetch_syserr();
351 		return (NULL);
352 	}
353 	u->netrcfd = -2;
354 
355 	/* scheme name */
356 	if ((p = strstr(URL, ":/"))) {
357 		snprintf(u->scheme, URL_SCHEMELEN+1,
358 		    "%.*s", (int)(p - URL), URL);
359 		URL = ++p;
360 		/*
361 		 * Only one slash: no host, leave slash as part of document
362 		 * Two slashes: host follows, strip slashes
363 		 */
364 		if (URL[1] == '/')
365 			URL = (p += 2);
366 	} else {
367 		p = URL;
368 	}
369 	if (!*URL || *URL == '/' || *URL == '.' ||
370 	    (u->scheme[0] == '\0' &&
371 		strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
372 		goto nohost;
373 
374 	p = strpbrk(URL, "/@");
375 	if (p && *p == '@') {
376 		/* username */
377 		q = fetch_pctdecode(u->user, URL, URL_USERLEN);
378 
379 		/* password */
380 		if (*q == ':')
381 			q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN);
382 
383 		p++;
384 	} else {
385 		p = URL;
386 	}
387 
388 	/* hostname */
389 	if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
390 	    (*++q == '\0' || *q == '/' || *q == ':')) {
391 		if ((i = q - p) > MAXHOSTNAMELEN)
392 			i = MAXHOSTNAMELEN;
393 		strncpy(u->host, p, i);
394 		p = q;
395 	} else {
396 		for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
397 			if (i < MAXHOSTNAMELEN)
398 				u->host[i++] = *p;
399 	}
400 
401 	/* port */
402 	if (*p == ':') {
403 		for (q = ++p; *q && (*q != '/'); q++)
404 			if (isdigit((unsigned char)*q))
405 				u->port = u->port * 10 + (*q - '0');
406 			else {
407 				/* invalid port */
408 				url_seterr(URL_BAD_PORT);
409 				goto ouch;
410 			}
411 		p = q;
412 	}
413 
414 nohost:
415 	/* document */
416 	if (!*p)
417 		p = "/";
418 
419 	if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
420 	    strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
421 		const char hexnums[] = "0123456789abcdef";
422 
423 		/* percent-escape whitespace. */
424 		if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
425 			fetch_syserr();
426 			goto ouch;
427 		}
428 		u->doc = doc;
429 		while (*p != '\0') {
430 			if (!isspace((unsigned char)*p)) {
431 				*doc++ = *p++;
432 			} else {
433 				*doc++ = '%';
434 				*doc++ = hexnums[((unsigned int)*p) >> 4];
435 				*doc++ = hexnums[((unsigned int)*p) & 0xf];
436 				p++;
437 			}
438 		}
439 		*doc = '\0';
440 	} else if ((u->doc = strdup(p)) == NULL) {
441 		fetch_syserr();
442 		goto ouch;
443 	}
444 
445 	DEBUG(fprintf(stderr,
446 		  "scheme:   \"%s\"\n"
447 		  "user:     \"%s\"\n"
448 		  "password: \"%s\"\n"
449 		  "host:     \"%s\"\n"
450 		  "port:     \"%d\"\n"
451 		  "document: \"%s\"\n",
452 		  u->scheme, u->user, u->pwd,
453 		  u->host, u->port, u->doc));
454 
455 	return (u);
456 
457 ouch:
458 	free(u);
459 	return (NULL);
460 }
461 
462 /*
463  * Free a URL
464  */
465 void
466 fetchFreeURL(struct url *u)
467 {
468 	free(u->doc);
469 	free(u);
470 }
471