xref: /freebsd/lib/libfetch/fetch.c (revision 2be1a816b9ff69588e55be0a84cbe2a31efc0f2f)
1 /*-
2  * Copyright (c) 1998-2004 Dag-Erling Co�dan Sm�rgrav
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/errno.h>
34 
35 #include <ctype.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 
40 #include "fetch.h"
41 #include "common.h"
42 
43 auth_t	 fetchAuthMethod;
44 int	 fetchLastErrCode;
45 char	 fetchLastErrString[MAXERRSTRING];
46 int	 fetchTimeout;
47 int	 fetchRestartCalls = 1;
48 int	 fetchDebug;
49 
50 
51 /*** Local data **************************************************************/
52 
53 /*
54  * Error messages for parser errors
55  */
56 #define URL_MALFORMED		1
57 #define URL_BAD_SCHEME		2
58 #define URL_BAD_PORT		3
59 static struct fetcherr url_errlist[] = {
60 	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
61 	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
62 	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
63 	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
64 };
65 
66 
67 /*** Public API **************************************************************/
68 
69 /*
70  * Select the appropriate protocol for the URL scheme, and return a
71  * read-only stream connected to the document referenced by the URL.
72  * Also fill out the struct url_stat.
73  */
74 FILE *
75 fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
76 {
77 	int direct;
78 
79 	direct = CHECK_FLAG('d');
80 	if (us != NULL) {
81 		us->size = -1;
82 		us->atime = us->mtime = 0;
83 	}
84 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
85 		return (fetchXGetFile(URL, us, flags));
86 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
87 		return (fetchXGetFTP(URL, us, flags));
88 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
89 		return (fetchXGetHTTP(URL, us, flags));
90 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
91 		return (fetchXGetHTTP(URL, us, flags));
92 	url_seterr(URL_BAD_SCHEME);
93 	return (NULL);
94 }
95 
96 /*
97  * Select the appropriate protocol for the URL scheme, and return a
98  * read-only stream connected to the document referenced by the URL.
99  */
100 FILE *
101 fetchGet(struct url *URL, const char *flags)
102 {
103 	return (fetchXGet(URL, NULL, flags));
104 }
105 
106 /*
107  * Select the appropriate protocol for the URL scheme, and return a
108  * write-only stream connected to the document referenced by the URL.
109  */
110 FILE *
111 fetchPut(struct url *URL, const char *flags)
112 {
113 	int direct;
114 
115 	direct = CHECK_FLAG('d');
116 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
117 		return (fetchPutFile(URL, flags));
118 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
119 		return (fetchPutFTP(URL, flags));
120 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
121 		return (fetchPutHTTP(URL, flags));
122 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
123 		return (fetchPutHTTP(URL, flags));
124 	url_seterr(URL_BAD_SCHEME);
125 	return (NULL);
126 }
127 
128 /*
129  * Select the appropriate protocol for the URL scheme, and return the
130  * size of the document referenced by the URL if it exists.
131  */
132 int
133 fetchStat(struct url *URL, struct url_stat *us, const char *flags)
134 {
135 	int direct;
136 
137 	direct = CHECK_FLAG('d');
138 	if (us != NULL) {
139 		us->size = -1;
140 		us->atime = us->mtime = 0;
141 	}
142 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
143 		return (fetchStatFile(URL, us, flags));
144 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
145 		return (fetchStatFTP(URL, us, flags));
146 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
147 		return (fetchStatHTTP(URL, us, flags));
148 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
149 		return (fetchStatHTTP(URL, us, flags));
150 	url_seterr(URL_BAD_SCHEME);
151 	return (-1);
152 }
153 
154 /*
155  * Select the appropriate protocol for the URL scheme, and return a
156  * list of files in the directory pointed to by the URL.
157  */
158 struct url_ent *
159 fetchList(struct url *URL, const char *flags)
160 {
161 	int direct;
162 
163 	direct = CHECK_FLAG('d');
164 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
165 		return (fetchListFile(URL, flags));
166 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
167 		return (fetchListFTP(URL, flags));
168 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
169 		return (fetchListHTTP(URL, flags));
170 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
171 		return (fetchListHTTP(URL, flags));
172 	url_seterr(URL_BAD_SCHEME);
173 	return (NULL);
174 }
175 
176 /*
177  * Attempt to parse the given URL; if successful, call fetchXGet().
178  */
179 FILE *
180 fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
181 {
182 	struct url *u;
183 	FILE *f;
184 
185 	if ((u = fetchParseURL(URL)) == NULL)
186 		return (NULL);
187 
188 	f = fetchXGet(u, us, flags);
189 
190 	fetchFreeURL(u);
191 	return (f);
192 }
193 
194 /*
195  * Attempt to parse the given URL; if successful, call fetchGet().
196  */
197 FILE *
198 fetchGetURL(const char *URL, const char *flags)
199 {
200 	return (fetchXGetURL(URL, NULL, flags));
201 }
202 
203 /*
204  * Attempt to parse the given URL; if successful, call fetchPut().
205  */
206 FILE *
207 fetchPutURL(const char *URL, const char *flags)
208 {
209 	struct url *u;
210 	FILE *f;
211 
212 	if ((u = fetchParseURL(URL)) == NULL)
213 		return (NULL);
214 
215 	f = fetchPut(u, flags);
216 
217 	fetchFreeURL(u);
218 	return (f);
219 }
220 
221 /*
222  * Attempt to parse the given URL; if successful, call fetchStat().
223  */
224 int
225 fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
226 {
227 	struct url *u;
228 	int s;
229 
230 	if ((u = fetchParseURL(URL)) == NULL)
231 		return (-1);
232 
233 	s = fetchStat(u, us, flags);
234 
235 	fetchFreeURL(u);
236 	return (s);
237 }
238 
239 /*
240  * Attempt to parse the given URL; if successful, call fetchList().
241  */
242 struct url_ent *
243 fetchListURL(const char *URL, const char *flags)
244 {
245 	struct url *u;
246 	struct url_ent *ue;
247 
248 	if ((u = fetchParseURL(URL)) == NULL)
249 		return (NULL);
250 
251 	ue = fetchList(u, flags);
252 
253 	fetchFreeURL(u);
254 	return (ue);
255 }
256 
257 /*
258  * Make a URL
259  */
260 struct url *
261 fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
262     const char *user, const char *pwd)
263 {
264 	struct url *u;
265 
266 	if (!scheme || (!host && !doc)) {
267 		url_seterr(URL_MALFORMED);
268 		return (NULL);
269 	}
270 
271 	if (port < 0 || port > 65535) {
272 		url_seterr(URL_BAD_PORT);
273 		return (NULL);
274 	}
275 
276 	/* allocate struct url */
277 	if ((u = calloc(1, sizeof(*u))) == NULL) {
278 		fetch_syserr();
279 		return (NULL);
280 	}
281 
282 	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
283 		fetch_syserr();
284 		free(u);
285 		return (NULL);
286 	}
287 
288 #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
289 	seturl(scheme);
290 	seturl(host);
291 	seturl(user);
292 	seturl(pwd);
293 #undef seturl
294 	u->port = port;
295 
296 	return (u);
297 }
298 
299 /*
300  * Split an URL into components. URL syntax is:
301  * [method:/][/[user[:pwd]@]host[:port]/][document]
302  * This almost, but not quite, RFC1738 URL syntax.
303  */
304 struct url *
305 fetchParseURL(const char *URL)
306 {
307 	char *doc;
308 	const char *p, *q;
309 	struct url *u;
310 	int i;
311 
312 	/* allocate struct url */
313 	if ((u = calloc(1, sizeof(*u))) == NULL) {
314 		fetch_syserr();
315 		return (NULL);
316 	}
317 
318 	/* scheme name */
319 	if ((p = strstr(URL, ":/"))) {
320 		snprintf(u->scheme, URL_SCHEMELEN+1,
321 		    "%.*s", (int)(p - URL), URL);
322 		URL = ++p;
323 		/*
324 		 * Only one slash: no host, leave slash as part of document
325 		 * Two slashes: host follows, strip slashes
326 		 */
327 		if (URL[1] == '/')
328 			URL = (p += 2);
329 	} else {
330 		p = URL;
331 	}
332 	if (!*URL || *URL == '/' || *URL == '.' ||
333 	    (u->scheme[0] == '\0' &&
334 		strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
335 		goto nohost;
336 
337 	p = strpbrk(URL, "/@");
338 	if (p && *p == '@') {
339 		/* username */
340 		for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
341 			if (i < URL_USERLEN)
342 				u->user[i++] = *q;
343 
344 		/* password */
345 		if (*q == ':')
346 			for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
347 				if (i < URL_PWDLEN)
348 					u->pwd[i++] = *q;
349 
350 		p++;
351 	} else {
352 		p = URL;
353 	}
354 
355 	/* hostname */
356 #ifdef INET6
357 	if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
358 	    (*++q == '\0' || *q == '/' || *q == ':')) {
359 		if ((i = q - p - 2) > MAXHOSTNAMELEN)
360 			i = MAXHOSTNAMELEN;
361 		strncpy(u->host, ++p, i);
362 		p = q;
363 	} else
364 #endif
365 		for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
366 			if (i < MAXHOSTNAMELEN)
367 				u->host[i++] = *p;
368 
369 	/* port */
370 	if (*p == ':') {
371 		for (q = ++p; *q && (*q != '/'); q++)
372 			if (isdigit((unsigned char)*q))
373 				u->port = u->port * 10 + (*q - '0');
374 			else {
375 				/* invalid port */
376 				url_seterr(URL_BAD_PORT);
377 				goto ouch;
378 			}
379 		p = q;
380 	}
381 
382 nohost:
383 	/* document */
384 	if (!*p)
385 		p = "/";
386 
387 	if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
388 	    strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
389 		const char hexnums[] = "0123456789abcdef";
390 
391 		/* percent-escape whitespace. */
392 		if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
393 			fetch_syserr();
394 			goto ouch;
395 		}
396 		u->doc = doc;
397 		while (*p != '\0') {
398 			if (!isspace((unsigned char)*p)) {
399 				*doc++ = *p++;
400 			} else {
401 				*doc++ = '%';
402 				*doc++ = hexnums[((unsigned int)*p) >> 4];
403 				*doc++ = hexnums[((unsigned int)*p) & 0xf];
404 				p++;
405 			}
406 		}
407 		*doc = '\0';
408 	} else if ((u->doc = strdup(p)) == NULL) {
409 		fetch_syserr();
410 		goto ouch;
411 	}
412 
413 	DEBUG(fprintf(stderr,
414 		  "scheme:   [%s]\n"
415 		  "user:     [%s]\n"
416 		  "password: [%s]\n"
417 		  "host:     [%s]\n"
418 		  "port:     [%d]\n"
419 		  "document: [%s]\n",
420 		  u->scheme, u->user, u->pwd,
421 		  u->host, u->port, u->doc));
422 
423 	return (u);
424 
425 ouch:
426 	free(u);
427 	return (NULL);
428 }
429 
430 /*
431  * Free a URL
432  */
433 void
434 fetchFreeURL(struct url *u)
435 {
436 	free(u->doc);
437 	free(u);
438 }
439