xref: /freebsd/lib/libfetch/fetch.c (revision 41466b50c1d5bfd1cf6adaae547a579a75d7c04e)
1 /*-
2  * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/errno.h>
34 
35 #include <ctype.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 
40 #include "fetch.h"
41 #include "common.h"
42 
43 auth_t	 fetchAuthMethod;
44 int	 fetchLastErrCode;
45 char	 fetchLastErrString[MAXERRSTRING];
46 int	 fetchTimeout;
47 int	 fetchRestartCalls = 1;
48 
49 
50 /*** Local data **************************************************************/
51 
52 /*
53  * Error messages for parser errors
54  */
55 #define URL_MALFORMED		1
56 #define URL_BAD_SCHEME		2
57 #define URL_BAD_PORT		3
58 static struct fetcherr _url_errlist[] = {
59     { URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
60     { URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
61     { URL_BAD_PORT,	FETCH_URL,	"Invalid server port" },
62     { -1,		FETCH_UNKNOWN,	"Unknown parser error" }
63 };
64 
65 
66 /*** Public API **************************************************************/
67 
68 /*
69  * Select the appropriate protocol for the URL scheme, and return a
70  * read-only stream connected to the document referenced by the URL.
71  * Also fill out the struct url_stat.
72  */
73 FILE *
74 fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
75 {
76     int direct;
77 
78     direct = CHECK_FLAG('d');
79     if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
80 	return fetchXGetFile(URL, us, flags);
81     else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
82 	return fetchXGetHTTP(URL, us, flags);
83     else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
84 	return fetchXGetFTP(URL, us, flags);
85     } else {
86 	_url_seterr(URL_BAD_SCHEME);
87 	return NULL;
88     }
89 }
90 
91 /*
92  * Select the appropriate protocol for the URL scheme, and return a
93  * read-only stream connected to the document referenced by the URL.
94  */
95 FILE *
96 fetchGet(struct url *URL, const char *flags)
97 {
98     return fetchXGet(URL, NULL, flags);
99 }
100 
101 /*
102  * Select the appropriate protocol for the URL scheme, and return a
103  * write-only stream connected to the document referenced by the URL.
104  */
105 FILE *
106 fetchPut(struct url *URL, const char *flags)
107 {
108     int direct;
109 
110     direct = CHECK_FLAG('d');
111     if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
112 	return fetchPutFile(URL, flags);
113     else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
114 	return fetchPutHTTP(URL, flags);
115     else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
116 	return fetchPutFTP(URL, flags);
117     } else {
118 	_url_seterr(URL_BAD_SCHEME);
119 	return NULL;
120     }
121 }
122 
123 /*
124  * Select the appropriate protocol for the URL scheme, and return the
125  * size of the document referenced by the URL if it exists.
126  */
127 int
128 fetchStat(struct url *URL, struct url_stat *us, const char *flags)
129 {
130     int direct;
131 
132     direct = CHECK_FLAG('d');
133     if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
134 	return fetchStatFile(URL, us, flags);
135     else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
136 	return fetchStatHTTP(URL, us, flags);
137     else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
138 	return fetchStatFTP(URL, us, flags);
139     } else {
140 	_url_seterr(URL_BAD_SCHEME);
141 	return -1;
142     }
143 }
144 
145 /*
146  * Select the appropriate protocol for the URL scheme, and return a
147  * list of files in the directory pointed to by the URL.
148  */
149 struct url_ent *
150 fetchList(struct url *URL, const char *flags)
151 {
152     int direct;
153 
154     direct = CHECK_FLAG('d');
155     if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
156 	return fetchListFile(URL, flags);
157     else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
158 	return fetchListHTTP(URL, flags);
159     else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
160 	return fetchListFTP(URL, flags);
161     } else {
162 	_url_seterr(URL_BAD_SCHEME);
163 	return NULL;
164     }
165 }
166 
167 /*
168  * Attempt to parse the given URL; if successful, call fetchXGet().
169  */
170 FILE *
171 fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
172 {
173     struct url *u;
174     FILE *f;
175 
176     if ((u = fetchParseURL(URL)) == NULL)
177 	return NULL;
178 
179     f = fetchXGet(u, us, flags);
180 
181     fetchFreeURL(u);
182     return f;
183 }
184 
185 /*
186  * Attempt to parse the given URL; if successful, call fetchGet().
187  */
188 FILE *
189 fetchGetURL(const char *URL, const char *flags)
190 {
191     return fetchXGetURL(URL, NULL, flags);
192 }
193 
194 /*
195  * Attempt to parse the given URL; if successful, call fetchPut().
196  */
197 FILE *
198 fetchPutURL(const char *URL, const char *flags)
199 {
200     struct url *u;
201     FILE *f;
202 
203     if ((u = fetchParseURL(URL)) == NULL)
204 	return NULL;
205 
206     f = fetchPut(u, flags);
207 
208     fetchFreeURL(u);
209     return f;
210 }
211 
212 /*
213  * Attempt to parse the given URL; if successful, call fetchStat().
214  */
215 int
216 fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
217 {
218     struct url *u;
219     int s;
220 
221     if ((u = fetchParseURL(URL)) == NULL)
222 	return -1;
223 
224     s = fetchStat(u, us, flags);
225 
226     fetchFreeURL(u);
227     return s;
228 }
229 
230 /*
231  * Attempt to parse the given URL; if successful, call fetchList().
232  */
233 struct url_ent *
234 fetchListURL(const char *URL, const char *flags)
235 {
236     struct url *u;
237     struct url_ent *ue;
238 
239     if ((u = fetchParseURL(URL)) == NULL)
240 	return NULL;
241 
242     ue = fetchList(u, flags);
243 
244     fetchFreeURL(u);
245     return ue;
246 }
247 
248 /*
249  * Make a URL
250  */
251 struct url *
252 fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
253     const char *user, const char *pwd)
254 {
255     struct url *u;
256 
257     if (!scheme || (!host && !doc)) {
258 	_url_seterr(URL_MALFORMED);
259 	return NULL;
260     }
261 
262     if (port < 0 || port > 65535) {
263 	_url_seterr(URL_BAD_PORT);
264 	return NULL;
265     }
266 
267     /* allocate struct url */
268     if ((u = calloc(1, sizeof *u)) == NULL) {
269 	_fetch_syserr();
270 	return NULL;
271     }
272 
273     if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
274 	_fetch_syserr();
275 	free(u);
276 	return NULL;
277     }
278 
279 #define seturl(x) snprintf(u->x, sizeof u->x, "%s", x)
280     seturl(scheme);
281     seturl(host);
282     seturl(user);
283     seturl(pwd);
284 #undef seturl
285     u->port = port;
286 
287     return u;
288 }
289 
290 /*
291  * Split an URL into components. URL syntax is:
292  * [method:/][/[user[:pwd]@]host[:port]/][document]
293  * This almost, but not quite, RFC1738 URL syntax.
294  */
295 struct url *
296 fetchParseURL(const char *URL)
297 {
298     char *doc;
299     const char *p, *q;
300     struct url *u;
301     int i;
302 
303     /* allocate struct url */
304     if ((u = calloc(1, sizeof *u)) == NULL) {
305 	_fetch_syserr();
306 	return NULL;
307     }
308 
309     /* scheme name */
310     if ((p = strstr(URL, ":/"))) {
311 	snprintf(u->scheme, URL_SCHEMELEN+1, "%.*s", (int)(p - URL), URL);
312 	URL = ++p;
313 	/*
314 	 * Only one slash: no host, leave slash as part of document
315 	 * Two slashes: host follows, strip slashes
316 	 */
317 	if (URL[1] == '/')
318 	    URL = (p += 2);
319     } else {
320 	p = URL;
321     }
322     if (!*URL || *URL == '/' || *URL == '.' ||
323 	(u->scheme[0] == '\0' &&
324     	    strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
325 	goto nohost;
326 
327     p = strpbrk(URL, "/@");
328     if (p && *p == '@') {
329 	/* username */
330 	for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
331 	    if (i < URL_USERLEN)
332 		u->user[i++] = *q;
333 
334 	/* password */
335 	if (*q == ':')
336 	    for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
337 		if (i < URL_PWDLEN)
338 		    u->pwd[i++] = *q;
339 
340 	p++;
341     } else {
342 	p = URL;
343     }
344 
345     /* hostname */
346 #ifdef INET6
347     if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
348 	(*++q == '\0' || *q == '/' || *q == ':')) {
349 	if ((i = q - p - 2) > MAXHOSTNAMELEN)
350 	    i = MAXHOSTNAMELEN;
351 	strncpy(u->host, ++p, i);
352 	p = q;
353     } else
354 #endif
355 	for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
356 	    if (i < MAXHOSTNAMELEN)
357 		u->host[i++] = *p;
358 
359     /* port */
360     if (*p == ':') {
361 	for (q = ++p; *q && (*q != '/'); q++)
362 	    if (isdigit(*q))
363 		u->port = u->port * 10 + (*q - '0');
364 	    else {
365 		/* invalid port */
366 		_url_seterr(URL_BAD_PORT);
367 		goto ouch;
368 	    }
369 	while (*p && (*p != '/'))
370 	    p++;
371     }
372 
373 nohost:
374     /* document */
375     if (!*p)
376 	p = "/";
377 
378     if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
379 	strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
380 	const char hexnums[] = "0123456789abcdef";
381 
382 	/* percent-escape whitespace. */
383 	if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
384 	    _fetch_syserr();
385 	    goto ouch;
386 	}
387 	u->doc = doc;
388 	while (*p != '\0') {
389 	    if (!isspace(*p)) {
390 		*doc++ = *p++;
391             } else {
392 		*doc++ = '%';
393 		*doc++ = hexnums[((unsigned int)*p) >> 4];
394 		*doc++ = hexnums[((unsigned int)*p) & 0xf];
395 		p++;
396             }
397 	}
398 	*doc = '\0';
399     } else if ((u->doc = strdup(p)) == NULL) {
400 	_fetch_syserr();
401 	goto ouch;
402     }
403 
404     DEBUG(fprintf(stderr,
405 		  "scheme:   [\033[1m%s\033[m]\n"
406 		  "user:     [\033[1m%s\033[m]\n"
407 		  "password: [\033[1m%s\033[m]\n"
408 		  "host:     [\033[1m%s\033[m]\n"
409 		  "port:     [\033[1m%d\033[m]\n"
410 		  "document: [\033[1m%s\033[m]\n",
411 		  u->scheme, u->user, u->pwd,
412 		  u->host, u->port, u->doc));
413 
414     return u;
415 
416 ouch:
417     free(u);
418     return NULL;
419 }
420 
421 /*
422  * Free a URL
423  */
424 void
425 fetchFreeURL(struct url *u)
426 {
427     free(u->doc);
428     free(u);
429 }
430