xref: /freebsd/lib/libfetch/fetch.c (revision 1ba84976606b34e81005aefcb131d48fe22a1118)
1 /*-
2  * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include <sys/param.h>
32 #include <sys/errno.h>
33 
34 #include <ctype.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 
39 #include "fetch.h"
40 #include "common.h"
41 
42 
43 int	 fetchLastErrCode;
44 char	 fetchLastErrString[MAXERRSTRING];
45 int	 fetchTimeout;
46 int	 fetchRestartCalls = 1;
47 
48 
49 /*** Local data **************************************************************/
50 
51 /*
52  * Error messages for parser errors
53  */
54 #define URL_MALFORMED		1
55 #define URL_BAD_SCHEME		2
56 #define URL_BAD_PORT		3
57 static struct fetcherr _url_errlist[] = {
58     { URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
59     { URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
60     { URL_BAD_PORT,	FETCH_URL,	"Invalid server port" },
61     { -1,		FETCH_UNKNOWN,	"Unknown parser error" }
62 };
63 
64 
65 /*** Public API **************************************************************/
66 
67 /*
68  * Select the appropriate protocol for the URL scheme, and return a
69  * read-only stream connected to the document referenced by the URL.
70  * Also fill out the struct url_stat.
71  */
72 FILE *
73 fetchXGet(struct url *URL, struct url_stat *us, char *flags)
74 {
75     int direct;
76 
77     direct = CHECK_FLAG('d');
78     if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
79 	return fetchXGetFile(URL, us, flags);
80     else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
81 	return fetchXGetHTTP(URL, us, flags);
82     else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
83 	return fetchXGetFTP(URL, us, flags);
84     } else {
85 	_url_seterr(URL_BAD_SCHEME);
86 	return NULL;
87     }
88 }
89 
90 /*
91  * Select the appropriate protocol for the URL scheme, and return a
92  * read-only stream connected to the document referenced by the URL.
93  */
94 FILE *
95 fetchGet(struct url *URL, char *flags)
96 {
97     return fetchXGet(URL, NULL, flags);
98 }
99 
100 /*
101  * Select the appropriate protocol for the URL scheme, and return a
102  * write-only stream connected to the document referenced by the URL.
103  */
104 FILE *
105 fetchPut(struct url *URL, char *flags)
106 {
107     int direct;
108 
109     direct = CHECK_FLAG('d');
110     if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
111 	return fetchPutFile(URL, flags);
112     else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
113 	return fetchPutHTTP(URL, flags);
114     else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
115 	return fetchPutFTP(URL, flags);
116     } else {
117 	_url_seterr(URL_BAD_SCHEME);
118 	return NULL;
119     }
120 }
121 
122 /*
123  * Select the appropriate protocol for the URL scheme, and return the
124  * size of the document referenced by the URL if it exists.
125  */
126 int
127 fetchStat(struct url *URL, struct url_stat *us, char *flags)
128 {
129     int direct;
130 
131     direct = CHECK_FLAG('d');
132     if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
133 	return fetchStatFile(URL, us, flags);
134     else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
135 	return fetchStatHTTP(URL, us, flags);
136     else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
137 	return fetchStatFTP(URL, us, flags);
138     } else {
139 	_url_seterr(URL_BAD_SCHEME);
140 	return -1;
141     }
142 }
143 
144 /*
145  * Select the appropriate protocol for the URL scheme, and return a
146  * list of files in the directory pointed to by the URL.
147  */
148 struct url_ent *
149 fetchList(struct url *URL, char *flags)
150 {
151     int direct;
152 
153     direct = CHECK_FLAG('d');
154     if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
155 	return fetchListFile(URL, flags);
156     else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
157 	return fetchListHTTP(URL, flags);
158     else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
159 	return fetchListFTP(URL, flags);
160     } else {
161 	_url_seterr(URL_BAD_SCHEME);
162 	return NULL;
163     }
164 }
165 
166 /*
167  * Attempt to parse the given URL; if successful, call fetchXGet().
168  */
169 FILE *
170 fetchXGetURL(char *URL, struct url_stat *us, char *flags)
171 {
172     struct url *u;
173     FILE *f;
174 
175     if ((u = fetchParseURL(URL)) == NULL)
176 	return NULL;
177 
178     f = fetchXGet(u, us, flags);
179 
180     fetchFreeURL(u);
181     return f;
182 }
183 
184 /*
185  * Attempt to parse the given URL; if successful, call fetchGet().
186  */
187 FILE *
188 fetchGetURL(char *URL, char *flags)
189 {
190     return fetchXGetURL(URL, NULL, flags);
191 }
192 
193 /*
194  * Attempt to parse the given URL; if successful, call fetchPut().
195  */
196 FILE *
197 fetchPutURL(char *URL, char *flags)
198 {
199     struct url *u;
200     FILE *f;
201 
202     if ((u = fetchParseURL(URL)) == NULL)
203 	return NULL;
204 
205     f = fetchPut(u, flags);
206 
207     fetchFreeURL(u);
208     return f;
209 }
210 
211 /*
212  * Attempt to parse the given URL; if successful, call fetchStat().
213  */
214 int
215 fetchStatURL(char *URL, struct url_stat *us, char *flags)
216 {
217     struct url *u;
218     int s;
219 
220     if ((u = fetchParseURL(URL)) == NULL)
221 	return -1;
222 
223     s = fetchStat(u, us, flags);
224 
225     fetchFreeURL(u);
226     return s;
227 }
228 
229 /*
230  * Attempt to parse the given URL; if successful, call fetchList().
231  */
232 struct url_ent *
233 fetchListURL(char *URL, char *flags)
234 {
235     struct url *u;
236     struct url_ent *ue;
237 
238     if ((u = fetchParseURL(URL)) == NULL)
239 	return NULL;
240 
241     ue = fetchList(u, flags);
242 
243     fetchFreeURL(u);
244     return ue;
245 }
246 
247 /*
248  * Make a URL
249  */
250 struct url *
251 fetchMakeURL(char *scheme, char *host, int port, char *doc,
252     char *user, char *pwd)
253 {
254     struct url *u;
255 
256     if (!scheme || (!host && !doc)) {
257 	_url_seterr(URL_MALFORMED);
258 	return NULL;
259     }
260 
261     if (port < 0 || port > 65535) {
262 	_url_seterr(URL_BAD_PORT);
263 	return NULL;
264     }
265 
266     /* allocate struct url */
267     if ((u = calloc(1, sizeof *u)) == NULL) {
268 	_fetch_syserr();
269 	return NULL;
270     }
271 
272     if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
273 	_fetch_syserr();
274 	free(u);
275 	return NULL;
276     }
277 
278 #define seturl(x) snprintf(u->x, sizeof u->x, "%s", x)
279     seturl(scheme);
280     seturl(host);
281     seturl(user);
282     seturl(pwd);
283 #undef seturl
284     u->port = port;
285 
286     return u;
287 }
288 
289 /*
290  * Split an URL into components. URL syntax is:
291  * [method:/][/[user[:pwd]@]host[:port]/][document]
292  * This almost, but not quite, RFC1738 URL syntax.
293  */
294 struct url *
295 fetchParseURL(char *URL)
296 {
297     char *doc, *p, *q;
298     struct url *u;
299     int i;
300 
301     /* allocate struct url */
302     if ((u = calloc(1, sizeof *u)) == NULL) {
303 	_fetch_syserr();
304 	return NULL;
305     }
306 
307     /* scheme name */
308     if ((p = strstr(URL, ":/"))) {
309 	snprintf(u->scheme, URL_SCHEMELEN+1, "%.*s", p - URL, URL);
310 	URL = ++p;
311 	/*
312 	 * Only one slash: no host, leave slash as part of document
313 	 * Two slashes: host follows, strip slashes
314 	 */
315 	if (URL[1] == '/')
316 	    URL = (p += 2);
317     } else {
318 	p = URL;
319     }
320     if (!*URL || *URL == '/')
321 	goto nohost;
322 
323     p = strpbrk(URL, "/@");
324     if (p && *p == '@') {
325 	/* username */
326 	for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
327 	    if (i < URL_USERLEN)
328 		u->user[i++] = *q;
329 
330 	/* password */
331 	if (*q == ':')
332 	    for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
333 		if (i < URL_PWDLEN)
334 		    u->pwd[i++] = *q;
335 
336 	p++;
337     } else p = URL;
338 
339     /* hostname */
340 #ifdef INET6
341     if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
342 	(*++q == '\0' || *q == '/' || *q == ':')) {
343 	if ((i = q - p - 2) > MAXHOSTNAMELEN)
344 	    i = MAXHOSTNAMELEN;
345 	strncpy(u->host, ++p, i);
346 	p = q;
347     } else
348 #endif
349 	for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
350 	    if (i < MAXHOSTNAMELEN)
351 		u->host[i++] = *p;
352 
353     /* port */
354     if (*p == ':') {
355 	for (q = ++p; *q && (*q != '/'); q++)
356 	    if (isdigit(*q))
357 		u->port = u->port * 10 + (*q - '0');
358 	    else {
359 		/* invalid port */
360 		_url_seterr(URL_BAD_PORT);
361 		goto ouch;
362 	    }
363 	while (*p && (*p != '/'))
364 	    p++;
365     }
366 
367 nohost:
368     /* document */
369     if (!*p)
370 	p = "/";
371 
372     if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
373 	strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
374 	const char hexnums[] = "0123456789abcdef";
375 
376 	/* percent-escape whitespace. */
377 	if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
378 	    _fetch_syserr();
379 	    goto ouch;
380 	}
381 	u->doc = doc;
382 	while (*p != '\0') {
383 	    if (!isspace(*p)) {
384 		*doc++ = *p++;
385             } else {
386 		*doc++ = '%';
387 		*doc++ = hexnums[((unsigned int)*p) >> 4];
388 		*doc++ = hexnums[((unsigned int)*p) & 0xf];
389 		p++;
390             }
391 	}
392 	*doc = '\0';
393     } else if ((u->doc = strdup(p)) == NULL) {
394 	_fetch_syserr();
395 	goto ouch;
396     }
397 
398     DEBUG(fprintf(stderr,
399 		  "scheme:   [\033[1m%s\033[m]\n"
400 		  "user:     [\033[1m%s\033[m]\n"
401 		  "password: [\033[1m%s\033[m]\n"
402 		  "host:     [\033[1m%s\033[m]\n"
403 		  "port:     [\033[1m%d\033[m]\n"
404 		  "document: [\033[1m%s\033[m]\n",
405 		  u->scheme, u->user, u->pwd,
406 		  u->host, u->port, u->doc));
407 
408     return u;
409 
410 ouch:
411     free(u);
412     return NULL;
413 }
414 
415 /*
416  * Free a URL
417  */
418 void
419 fetchFreeURL(struct url *u)
420 {
421     free(u->doc);
422     free(u);
423 }
424