xref: /freebsd/lib/libfetch/fetch.c (revision 9207b4cff7b8d483f4dd3c62266c2b58819eb7f9)
1 /*-
2  * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/errno.h>
34 
35 #include <ctype.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 
40 #include "fetch.h"
41 #include "common.h"
42 
43 auth_t	 fetchAuthMethod;
44 int	 fetchLastErrCode;
45 char	 fetchLastErrString[MAXERRSTRING];
46 int	 fetchTimeout;
47 int	 fetchRestartCalls = 1;
48 int	 fetchDebug;
49 
50 
51 /*** Local data **************************************************************/
52 
53 /*
54  * Error messages for parser errors
55  */
56 #define URL_MALFORMED		1
57 #define URL_BAD_SCHEME		2
58 #define URL_BAD_PORT		3
59 static struct fetcherr _url_errlist[] = {
60     { URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
61     { URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
62     { URL_BAD_PORT,	FETCH_URL,	"Invalid server port" },
63     { -1,		FETCH_UNKNOWN,	"Unknown parser error" }
64 };
65 
66 
67 /*** Public API **************************************************************/
68 
69 /*
70  * Select the appropriate protocol for the URL scheme, and return a
71  * read-only stream connected to the document referenced by the URL.
72  * Also fill out the struct url_stat.
73  */
74 FILE *
75 fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
76 {
77     int direct;
78 
79     direct = CHECK_FLAG('d');
80     if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
81 	return fetchXGetFile(URL, us, flags);
82     else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
83 	return fetchXGetHTTP(URL, us, flags);
84     else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
85 	return fetchXGetFTP(URL, us, flags);
86     } else {
87 	_url_seterr(URL_BAD_SCHEME);
88 	return NULL;
89     }
90 }
91 
92 /*
93  * Select the appropriate protocol for the URL scheme, and return a
94  * read-only stream connected to the document referenced by the URL.
95  */
96 FILE *
97 fetchGet(struct url *URL, const char *flags)
98 {
99     return fetchXGet(URL, NULL, flags);
100 }
101 
102 /*
103  * Select the appropriate protocol for the URL scheme, and return a
104  * write-only stream connected to the document referenced by the URL.
105  */
106 FILE *
107 fetchPut(struct url *URL, const char *flags)
108 {
109     int direct;
110 
111     direct = CHECK_FLAG('d');
112     if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
113 	return fetchPutFile(URL, flags);
114     else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
115 	return fetchPutHTTP(URL, flags);
116     else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
117 	return fetchPutFTP(URL, flags);
118     } else {
119 	_url_seterr(URL_BAD_SCHEME);
120 	return NULL;
121     }
122 }
123 
124 /*
125  * Select the appropriate protocol for the URL scheme, and return the
126  * size of the document referenced by the URL if it exists.
127  */
128 int
129 fetchStat(struct url *URL, struct url_stat *us, const char *flags)
130 {
131     int direct;
132 
133     direct = CHECK_FLAG('d');
134     if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
135 	return fetchStatFile(URL, us, flags);
136     else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
137 	return fetchStatHTTP(URL, us, flags);
138     else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
139 	return fetchStatFTP(URL, us, flags);
140     } else {
141 	_url_seterr(URL_BAD_SCHEME);
142 	return -1;
143     }
144 }
145 
146 /*
147  * Select the appropriate protocol for the URL scheme, and return a
148  * list of files in the directory pointed to by the URL.
149  */
150 struct url_ent *
151 fetchList(struct url *URL, const char *flags)
152 {
153     int direct;
154 
155     direct = CHECK_FLAG('d');
156     if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
157 	return fetchListFile(URL, flags);
158     else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
159 	return fetchListHTTP(URL, flags);
160     else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
161 	return fetchListFTP(URL, flags);
162     } else {
163 	_url_seterr(URL_BAD_SCHEME);
164 	return NULL;
165     }
166 }
167 
168 /*
169  * Attempt to parse the given URL; if successful, call fetchXGet().
170  */
171 FILE *
172 fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
173 {
174     struct url *u;
175     FILE *f;
176 
177     if ((u = fetchParseURL(URL)) == NULL)
178 	return NULL;
179 
180     f = fetchXGet(u, us, flags);
181 
182     fetchFreeURL(u);
183     return f;
184 }
185 
186 /*
187  * Attempt to parse the given URL; if successful, call fetchGet().
188  */
189 FILE *
190 fetchGetURL(const char *URL, const char *flags)
191 {
192     return fetchXGetURL(URL, NULL, flags);
193 }
194 
195 /*
196  * Attempt to parse the given URL; if successful, call fetchPut().
197  */
198 FILE *
199 fetchPutURL(const char *URL, const char *flags)
200 {
201     struct url *u;
202     FILE *f;
203 
204     if ((u = fetchParseURL(URL)) == NULL)
205 	return NULL;
206 
207     f = fetchPut(u, flags);
208 
209     fetchFreeURL(u);
210     return f;
211 }
212 
213 /*
214  * Attempt to parse the given URL; if successful, call fetchStat().
215  */
216 int
217 fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
218 {
219     struct url *u;
220     int s;
221 
222     if ((u = fetchParseURL(URL)) == NULL)
223 	return -1;
224 
225     s = fetchStat(u, us, flags);
226 
227     fetchFreeURL(u);
228     return s;
229 }
230 
231 /*
232  * Attempt to parse the given URL; if successful, call fetchList().
233  */
234 struct url_ent *
235 fetchListURL(const char *URL, const char *flags)
236 {
237     struct url *u;
238     struct url_ent *ue;
239 
240     if ((u = fetchParseURL(URL)) == NULL)
241 	return NULL;
242 
243     ue = fetchList(u, flags);
244 
245     fetchFreeURL(u);
246     return ue;
247 }
248 
249 /*
250  * Make a URL
251  */
252 struct url *
253 fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
254     const char *user, const char *pwd)
255 {
256     struct url *u;
257 
258     if (!scheme || (!host && !doc)) {
259 	_url_seterr(URL_MALFORMED);
260 	return NULL;
261     }
262 
263     if (port < 0 || port > 65535) {
264 	_url_seterr(URL_BAD_PORT);
265 	return NULL;
266     }
267 
268     /* allocate struct url */
269     if ((u = calloc(1, sizeof *u)) == NULL) {
270 	_fetch_syserr();
271 	return NULL;
272     }
273 
274     if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
275 	_fetch_syserr();
276 	free(u);
277 	return NULL;
278     }
279 
280 #define seturl(x) snprintf(u->x, sizeof u->x, "%s", x)
281     seturl(scheme);
282     seturl(host);
283     seturl(user);
284     seturl(pwd);
285 #undef seturl
286     u->port = port;
287 
288     return u;
289 }
290 
291 /*
292  * Split an URL into components. URL syntax is:
293  * [method:/][/[user[:pwd]@]host[:port]/][document]
294  * This almost, but not quite, RFC1738 URL syntax.
295  */
296 struct url *
297 fetchParseURL(const char *URL)
298 {
299     char *doc;
300     const char *p, *q;
301     struct url *u;
302     int i;
303 
304     /* allocate struct url */
305     if ((u = calloc(1, sizeof *u)) == NULL) {
306 	_fetch_syserr();
307 	return NULL;
308     }
309 
310     /* scheme name */
311     if ((p = strstr(URL, ":/"))) {
312 	snprintf(u->scheme, URL_SCHEMELEN+1, "%.*s", (int)(p - URL), URL);
313 	URL = ++p;
314 	/*
315 	 * Only one slash: no host, leave slash as part of document
316 	 * Two slashes: host follows, strip slashes
317 	 */
318 	if (URL[1] == '/')
319 	    URL = (p += 2);
320     } else {
321 	p = URL;
322     }
323     if (!*URL || *URL == '/' || *URL == '.' ||
324 	(u->scheme[0] == '\0' &&
325     	    strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
326 	goto nohost;
327 
328     p = strpbrk(URL, "/@");
329     if (p && *p == '@') {
330 	/* username */
331 	for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
332 	    if (i < URL_USERLEN)
333 		u->user[i++] = *q;
334 
335 	/* password */
336 	if (*q == ':')
337 	    for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
338 		if (i < URL_PWDLEN)
339 		    u->pwd[i++] = *q;
340 
341 	p++;
342     } else {
343 	p = URL;
344     }
345 
346     /* hostname */
347 #ifdef INET6
348     if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
349 	(*++q == '\0' || *q == '/' || *q == ':')) {
350 	if ((i = q - p - 2) > MAXHOSTNAMELEN)
351 	    i = MAXHOSTNAMELEN;
352 	strncpy(u->host, ++p, i);
353 	p = q;
354     } else
355 #endif
356 	for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
357 	    if (i < MAXHOSTNAMELEN)
358 		u->host[i++] = *p;
359 
360     /* port */
361     if (*p == ':') {
362 	for (q = ++p; *q && (*q != '/'); q++)
363 	    if (isdigit(*q))
364 		u->port = u->port * 10 + (*q - '0');
365 	    else {
366 		/* invalid port */
367 		_url_seterr(URL_BAD_PORT);
368 		goto ouch;
369 	    }
370 	while (*p && (*p != '/'))
371 	    p++;
372     }
373 
374 nohost:
375     /* document */
376     if (!*p)
377 	p = "/";
378 
379     if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
380 	strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
381 	const char hexnums[] = "0123456789abcdef";
382 
383 	/* percent-escape whitespace. */
384 	if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
385 	    _fetch_syserr();
386 	    goto ouch;
387 	}
388 	u->doc = doc;
389 	while (*p != '\0') {
390 	    if (!isspace(*p)) {
391 		*doc++ = *p++;
392             } else {
393 		*doc++ = '%';
394 		*doc++ = hexnums[((unsigned int)*p) >> 4];
395 		*doc++ = hexnums[((unsigned int)*p) & 0xf];
396 		p++;
397             }
398 	}
399 	*doc = '\0';
400     } else if ((u->doc = strdup(p)) == NULL) {
401 	_fetch_syserr();
402 	goto ouch;
403     }
404 
405     DEBUG(fprintf(stderr,
406 		  "scheme:   [\033[1m%s\033[m]\n"
407 		  "user:     [\033[1m%s\033[m]\n"
408 		  "password: [\033[1m%s\033[m]\n"
409 		  "host:     [\033[1m%s\033[m]\n"
410 		  "port:     [\033[1m%d\033[m]\n"
411 		  "document: [\033[1m%s\033[m]\n",
412 		  u->scheme, u->user, u->pwd,
413 		  u->host, u->port, u->doc));
414 
415     return u;
416 
417 ouch:
418     free(u);
419     return NULL;
420 }
421 
422 /*
423  * Free a URL
424  */
425 void
426 fetchFreeURL(struct url *u)
427 {
428     free(u->doc);
429     free(u);
430 }
431