xref: /freebsd/lib/libfetch/fetch.c (revision 38c7e4a631ce968b02e4a08944aabad9b57844e8)
1 /*-
2  * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include <sys/param.h>
32 #include <sys/errno.h>
33 
34 #include <ctype.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 
39 #include "fetch.h"
40 #include "common.h"
41 
42 
43 int	 fetchLastErrCode;
44 char	 fetchLastErrString[MAXERRSTRING];
45 int	 fetchTimeout;
46 int	 fetchRestartCalls = 1;
47 
48 
49 /*** Local data **************************************************************/
50 
51 /*
52  * Error messages for parser errors
53  */
54 #define URL_MALFORMED		1
55 #define URL_BAD_SCHEME		2
56 #define URL_BAD_PORT		3
57 static struct fetcherr _url_errlist[] = {
58     { URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
59     { URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
60     { URL_BAD_PORT,	FETCH_URL,	"Invalid server port" },
61     { -1,		FETCH_UNKNOWN,	"Unknown parser error" }
62 };
63 
64 
65 /*** Public API **************************************************************/
66 
67 /*
68  * Select the appropriate protocol for the URL scheme, and return a
69  * read-only stream connected to the document referenced by the URL.
70  * Also fill out the struct url_stat.
71  */
72 FILE *
73 fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
74 {
75     int direct;
76 
77     direct = CHECK_FLAG('d');
78     if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
79 	return fetchXGetFile(URL, us, flags);
80     else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
81 	return fetchXGetHTTP(URL, us, flags);
82     else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
83 	return fetchXGetFTP(URL, us, flags);
84     } else {
85 	_url_seterr(URL_BAD_SCHEME);
86 	return NULL;
87     }
88 }
89 
90 /*
91  * Select the appropriate protocol for the URL scheme, and return a
92  * read-only stream connected to the document referenced by the URL.
93  */
94 FILE *
95 fetchGet(struct url *URL, const char *flags)
96 {
97     return fetchXGet(URL, NULL, flags);
98 }
99 
100 /*
101  * Select the appropriate protocol for the URL scheme, and return a
102  * write-only stream connected to the document referenced by the URL.
103  */
104 FILE *
105 fetchPut(struct url *URL, const char *flags)
106 {
107     int direct;
108 
109     direct = CHECK_FLAG('d');
110     if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
111 	return fetchPutFile(URL, flags);
112     else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
113 	return fetchPutHTTP(URL, flags);
114     else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
115 	return fetchPutFTP(URL, flags);
116     } else {
117 	_url_seterr(URL_BAD_SCHEME);
118 	return NULL;
119     }
120 }
121 
122 /*
123  * Select the appropriate protocol for the URL scheme, and return the
124  * size of the document referenced by the URL if it exists.
125  */
126 int
127 fetchStat(struct url *URL, struct url_stat *us, const char *flags)
128 {
129     int direct;
130 
131     direct = CHECK_FLAG('d');
132     if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
133 	return fetchStatFile(URL, us, flags);
134     else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
135 	return fetchStatHTTP(URL, us, flags);
136     else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
137 	return fetchStatFTP(URL, us, flags);
138     } else {
139 	_url_seterr(URL_BAD_SCHEME);
140 	return -1;
141     }
142 }
143 
144 /*
145  * Select the appropriate protocol for the URL scheme, and return a
146  * list of files in the directory pointed to by the URL.
147  */
148 struct url_ent *
149 fetchList(struct url *URL, const char *flags)
150 {
151     int direct;
152 
153     direct = CHECK_FLAG('d');
154     if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
155 	return fetchListFile(URL, flags);
156     else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
157 	return fetchListHTTP(URL, flags);
158     else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
159 	return fetchListFTP(URL, flags);
160     } else {
161 	_url_seterr(URL_BAD_SCHEME);
162 	return NULL;
163     }
164 }
165 
166 /*
167  * Attempt to parse the given URL; if successful, call fetchXGet().
168  */
169 FILE *
170 fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
171 {
172     struct url *u;
173     FILE *f;
174 
175     if ((u = fetchParseURL(URL)) == NULL)
176 	return NULL;
177 
178     f = fetchXGet(u, us, flags);
179 
180     fetchFreeURL(u);
181     return f;
182 }
183 
184 /*
185  * Attempt to parse the given URL; if successful, call fetchGet().
186  */
187 FILE *
188 fetchGetURL(const char *URL, const char *flags)
189 {
190     return fetchXGetURL(URL, NULL, flags);
191 }
192 
193 /*
194  * Attempt to parse the given URL; if successful, call fetchPut().
195  */
196 FILE *
197 fetchPutURL(const char *URL, const char *flags)
198 {
199     struct url *u;
200     FILE *f;
201 
202     if ((u = fetchParseURL(URL)) == NULL)
203 	return NULL;
204 
205     f = fetchPut(u, flags);
206 
207     fetchFreeURL(u);
208     return f;
209 }
210 
211 /*
212  * Attempt to parse the given URL; if successful, call fetchStat().
213  */
214 int
215 fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
216 {
217     struct url *u;
218     int s;
219 
220     if ((u = fetchParseURL(URL)) == NULL)
221 	return -1;
222 
223     s = fetchStat(u, us, flags);
224 
225     fetchFreeURL(u);
226     return s;
227 }
228 
229 /*
230  * Attempt to parse the given URL; if successful, call fetchList().
231  */
232 struct url_ent *
233 fetchListURL(const char *URL, const char *flags)
234 {
235     struct url *u;
236     struct url_ent *ue;
237 
238     if ((u = fetchParseURL(URL)) == NULL)
239 	return NULL;
240 
241     ue = fetchList(u, flags);
242 
243     fetchFreeURL(u);
244     return ue;
245 }
246 
247 /*
248  * Make a URL
249  */
250 struct url *
251 fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
252     const char *user, const char *pwd)
253 {
254     struct url *u;
255 
256     if (!scheme || (!host && !doc)) {
257 	_url_seterr(URL_MALFORMED);
258 	return NULL;
259     }
260 
261     if (port < 0 || port > 65535) {
262 	_url_seterr(URL_BAD_PORT);
263 	return NULL;
264     }
265 
266     /* allocate struct url */
267     if ((u = calloc(1, sizeof *u)) == NULL) {
268 	_fetch_syserr();
269 	return NULL;
270     }
271 
272     if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
273 	_fetch_syserr();
274 	free(u);
275 	return NULL;
276     }
277 
278 #define seturl(x) snprintf(u->x, sizeof u->x, "%s", x)
279     seturl(scheme);
280     seturl(host);
281     seturl(user);
282     seturl(pwd);
283 #undef seturl
284     u->port = port;
285 
286     return u;
287 }
288 
289 /*
290  * Split an URL into components. URL syntax is:
291  * [method:/][/[user[:pwd]@]host[:port]/][document]
292  * This almost, but not quite, RFC1738 URL syntax.
293  */
294 struct url *
295 fetchParseURL(const char *URL)
296 {
297     char *doc;
298     const char *p, *q;
299     struct url *u;
300     int i;
301 
302     /* allocate struct url */
303     if ((u = calloc(1, sizeof *u)) == NULL) {
304 	_fetch_syserr();
305 	return NULL;
306     }
307 
308     /* scheme name */
309     if ((p = strstr(URL, ":/"))) {
310 	snprintf(u->scheme, URL_SCHEMELEN+1, "%.*s", p - URL, URL);
311 	URL = ++p;
312 	/*
313 	 * Only one slash: no host, leave slash as part of document
314 	 * Two slashes: host follows, strip slashes
315 	 */
316 	if (URL[1] == '/')
317 	    URL = (p += 2);
318     } else {
319 	p = URL;
320     }
321     if (!*URL || *URL == '/')
322 	goto nohost;
323 
324     p = strpbrk(URL, "/@");
325     if (p && *p == '@') {
326 	/* username */
327 	for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
328 	    if (i < URL_USERLEN)
329 		u->user[i++] = *q;
330 
331 	/* password */
332 	if (*q == ':')
333 	    for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
334 		if (i < URL_PWDLEN)
335 		    u->pwd[i++] = *q;
336 
337 	p++;
338     } else p = URL;
339 
340     /* hostname */
341 #ifdef INET6
342     if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
343 	(*++q == '\0' || *q == '/' || *q == ':')) {
344 	if ((i = q - p - 2) > MAXHOSTNAMELEN)
345 	    i = MAXHOSTNAMELEN;
346 	strncpy(u->host, ++p, i);
347 	p = q;
348     } else
349 #endif
350 	for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
351 	    if (i < MAXHOSTNAMELEN)
352 		u->host[i++] = *p;
353 
354     /* port */
355     if (*p == ':') {
356 	for (q = ++p; *q && (*q != '/'); q++)
357 	    if (isdigit(*q))
358 		u->port = u->port * 10 + (*q - '0');
359 	    else {
360 		/* invalid port */
361 		_url_seterr(URL_BAD_PORT);
362 		goto ouch;
363 	    }
364 	while (*p && (*p != '/'))
365 	    p++;
366     }
367 
368 nohost:
369     /* document */
370     if (!*p)
371 	p = "/";
372 
373     if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
374 	strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
375 	const char hexnums[] = "0123456789abcdef";
376 
377 	/* percent-escape whitespace. */
378 	if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
379 	    _fetch_syserr();
380 	    goto ouch;
381 	}
382 	u->doc = doc;
383 	while (*p != '\0') {
384 	    if (!isspace(*p)) {
385 		*doc++ = *p++;
386             } else {
387 		*doc++ = '%';
388 		*doc++ = hexnums[((unsigned int)*p) >> 4];
389 		*doc++ = hexnums[((unsigned int)*p) & 0xf];
390 		p++;
391             }
392 	}
393 	*doc = '\0';
394     } else if ((u->doc = strdup(p)) == NULL) {
395 	_fetch_syserr();
396 	goto ouch;
397     }
398 
399     DEBUG(fprintf(stderr,
400 		  "scheme:   [\033[1m%s\033[m]\n"
401 		  "user:     [\033[1m%s\033[m]\n"
402 		  "password: [\033[1m%s\033[m]\n"
403 		  "host:     [\033[1m%s\033[m]\n"
404 		  "port:     [\033[1m%d\033[m]\n"
405 		  "document: [\033[1m%s\033[m]\n",
406 		  u->scheme, u->user, u->pwd,
407 		  u->host, u->port, u->doc));
408 
409     return u;
410 
411 ouch:
412     free(u);
413     return NULL;
414 }
415 
416 /*
417  * Free a URL
418  */
419 void
420 fetchFreeURL(struct url *u)
421 {
422     free(u->doc);
423     free(u);
424 }
425