14ca1ab94SDag-Erling Smørgrav /*- 24ca1ab94SDag-Erling Smørgrav * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav 34ca1ab94SDag-Erling Smørgrav * All rights reserved. 44ca1ab94SDag-Erling Smørgrav * 54ca1ab94SDag-Erling Smørgrav * Redistribution and use in source and binary forms, with or without 64ca1ab94SDag-Erling Smørgrav * modification, are permitted provided that the following conditions 74ca1ab94SDag-Erling Smørgrav * are met: 84ca1ab94SDag-Erling Smørgrav * 1. Redistributions of source code must retain the above copyright 94ca1ab94SDag-Erling Smørgrav * notice, this list of conditions and the following disclaimer 104ca1ab94SDag-Erling Smørgrav * in this position and unchanged. 114ca1ab94SDag-Erling Smørgrav * 2. Redistributions in binary form must reproduce the above copyright 124ca1ab94SDag-Erling Smørgrav * notice, this list of conditions and the following disclaimer in the 134ca1ab94SDag-Erling Smørgrav * documentation and/or other materials provided with the distribution. 144ca1ab94SDag-Erling Smørgrav * 3. The name of the author may not be used to endorse or promote products 154ca1ab94SDag-Erling Smørgrav * derived from this software without specific prior written permission 164ca1ab94SDag-Erling Smørgrav * 174ca1ab94SDag-Erling Smørgrav * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 184ca1ab94SDag-Erling Smørgrav * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 194ca1ab94SDag-Erling Smørgrav * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 204ca1ab94SDag-Erling Smørgrav * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 214ca1ab94SDag-Erling Smørgrav * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 224ca1ab94SDag-Erling Smørgrav * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 234ca1ab94SDag-Erling Smørgrav * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 244ca1ab94SDag-Erling Smørgrav * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 254ca1ab94SDag-Erling Smørgrav * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 264ca1ab94SDag-Erling Smørgrav * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 274ca1ab94SDag-Erling Smørgrav * 287f3dea24SPeter Wemm * $FreeBSD$ 294ca1ab94SDag-Erling Smørgrav */ 304ca1ab94SDag-Erling Smørgrav 314ca1ab94SDag-Erling Smørgrav #include <sys/param.h> 32d8acd8dcSDag-Erling Smørgrav #include <sys/errno.h> 334ca1ab94SDag-Erling Smørgrav 344ca1ab94SDag-Erling Smørgrav #include <ctype.h> 354ca1ab94SDag-Erling Smørgrav #include <stdio.h> 364ca1ab94SDag-Erling Smørgrav #include <stdlib.h> 374ca1ab94SDag-Erling Smørgrav #include <string.h> 384ca1ab94SDag-Erling Smørgrav 394ca1ab94SDag-Erling Smørgrav #include "fetch.h" 40d8acd8dcSDag-Erling Smørgrav #include "common.h" 414ca1ab94SDag-Erling Smørgrav 424ca1ab94SDag-Erling Smørgrav 430fba3a00SDag-Erling Smørgrav int fetchLastErrCode; 44fc6e9e65SDag-Erling Smørgrav int fetchTimeout; 450fba3a00SDag-Erling Smørgrav 460fba3a00SDag-Erling Smørgrav 47d8acd8dcSDag-Erling Smørgrav /*** Local data **************************************************************/ 48d8acd8dcSDag-Erling Smørgrav 49d8acd8dcSDag-Erling Smørgrav /* 50d8acd8dcSDag-Erling Smørgrav * Error messages for parser errors 51d8acd8dcSDag-Erling Smørgrav */ 52d8acd8dcSDag-Erling Smørgrav #define URL_MALFORMED 1 53d8acd8dcSDag-Erling Smørgrav #define URL_BAD_SCHEME 2 54d8acd8dcSDag-Erling Smørgrav #define URL_BAD_PORT 3 55d8acd8dcSDag-Erling Smørgrav static struct fetcherr _url_errlist[] = { 56d8acd8dcSDag-Erling Smørgrav { URL_MALFORMED, FETCH_URL, "Malformed URL" }, 57d8acd8dcSDag-Erling Smørgrav { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, 58d8acd8dcSDag-Erling Smørgrav { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, 59d8acd8dcSDag-Erling Smørgrav { -1, FETCH_UNKNOWN, "Unknown parser error" } 60d8acd8dcSDag-Erling Smørgrav }; 61d8acd8dcSDag-Erling Smørgrav 62d8acd8dcSDag-Erling Smørgrav 63d8acd8dcSDag-Erling Smørgrav /*** Public API **************************************************************/ 644ca1ab94SDag-Erling Smørgrav 65842a95ccSDag-Erling Smørgrav /* 66842a95ccSDag-Erling Smørgrav * Select the appropriate protocol for the URL scheme, and return a 67842a95ccSDag-Erling Smørgrav * read-only stream connected to the document referenced by the URL. 68842a95ccSDag-Erling Smørgrav */ 69ecc91352SDag-Erling Smørgrav FILE * 70d8acd8dcSDag-Erling Smørgrav fetchGet(struct url *URL, char *flags) 71ecc91352SDag-Erling Smørgrav { 72ecc91352SDag-Erling Smørgrav if (strcasecmp(URL->scheme, "file") == 0) 73ecc91352SDag-Erling Smørgrav return fetchGetFile(URL, flags); 74ecc91352SDag-Erling Smørgrav else if (strcasecmp(URL->scheme, "http") == 0) 75ecc91352SDag-Erling Smørgrav return fetchGetHTTP(URL, flags); 76ecc91352SDag-Erling Smørgrav else if (strcasecmp(URL->scheme, "ftp") == 0) 77ecc91352SDag-Erling Smørgrav return fetchGetFTP(URL, flags); 78d8acd8dcSDag-Erling Smørgrav else { 79d8acd8dcSDag-Erling Smørgrav _url_seterr(URL_BAD_SCHEME); 80d8acd8dcSDag-Erling Smørgrav return NULL; 81d8acd8dcSDag-Erling Smørgrav } 82ecc91352SDag-Erling Smørgrav } 83ecc91352SDag-Erling Smørgrav 84842a95ccSDag-Erling Smørgrav /* 85842a95ccSDag-Erling Smørgrav * Select the appropriate protocol for the URL scheme, and return a 86842a95ccSDag-Erling Smørgrav * write-only stream connected to the document referenced by the URL. 87842a95ccSDag-Erling Smørgrav */ 88ecc91352SDag-Erling Smørgrav FILE * 89d8acd8dcSDag-Erling Smørgrav fetchPut(struct url *URL, char *flags) 90ecc91352SDag-Erling Smørgrav { 91ecc91352SDag-Erling Smørgrav if (strcasecmp(URL->scheme, "file") == 0) 92ecc91352SDag-Erling Smørgrav return fetchPutFile(URL, flags); 93ecc91352SDag-Erling Smørgrav else if (strcasecmp(URL->scheme, "http") == 0) 94ecc91352SDag-Erling Smørgrav return fetchPutHTTP(URL, flags); 95ecc91352SDag-Erling Smørgrav else if (strcasecmp(URL->scheme, "ftp") == 0) 96ecc91352SDag-Erling Smørgrav return fetchPutFTP(URL, flags); 97d8acd8dcSDag-Erling Smørgrav else { 98d8acd8dcSDag-Erling Smørgrav _url_seterr(URL_BAD_SCHEME); 99d8acd8dcSDag-Erling Smørgrav return NULL; 100d8acd8dcSDag-Erling Smørgrav } 101d8acd8dcSDag-Erling Smørgrav } 102d8acd8dcSDag-Erling Smørgrav 103d8acd8dcSDag-Erling Smørgrav /* 104d8acd8dcSDag-Erling Smørgrav * Select the appropriate protocol for the URL scheme, and return the 105d8acd8dcSDag-Erling Smørgrav * size of the document referenced by the URL if it exists. 106d8acd8dcSDag-Erling Smørgrav */ 107d8acd8dcSDag-Erling Smørgrav int 108d8acd8dcSDag-Erling Smørgrav fetchStat(struct url *URL, struct url_stat *us, char *flags) 109d8acd8dcSDag-Erling Smørgrav { 110d8acd8dcSDag-Erling Smørgrav if (strcasecmp(URL->scheme, "file") == 0) 111d8acd8dcSDag-Erling Smørgrav return fetchStatFile(URL, us, flags); 112d8acd8dcSDag-Erling Smørgrav else if (strcasecmp(URL->scheme, "http") == 0) 113d8acd8dcSDag-Erling Smørgrav return fetchStatHTTP(URL, us, flags); 114d8acd8dcSDag-Erling Smørgrav else if (strcasecmp(URL->scheme, "ftp") == 0) 115d8acd8dcSDag-Erling Smørgrav return fetchStatFTP(URL, us, flags); 116d8acd8dcSDag-Erling Smørgrav else { 117d8acd8dcSDag-Erling Smørgrav _url_seterr(URL_BAD_SCHEME); 118d8acd8dcSDag-Erling Smørgrav return -1; 119d8acd8dcSDag-Erling Smørgrav } 120ecc91352SDag-Erling Smørgrav } 121ecc91352SDag-Erling Smørgrav 122842a95ccSDag-Erling Smørgrav /* 123ce71b736SDag-Erling Smørgrav * Select the appropriate protocol for the URL scheme, and return a 124ce71b736SDag-Erling Smørgrav * list of files in the directory pointed to by the URL. 125ce71b736SDag-Erling Smørgrav */ 126ce71b736SDag-Erling Smørgrav struct url_ent * 127ce71b736SDag-Erling Smørgrav fetchList(struct url *URL, char *flags) 128ce71b736SDag-Erling Smørgrav { 129ce71b736SDag-Erling Smørgrav if (strcasecmp(URL->scheme, "file") == 0) 130ce71b736SDag-Erling Smørgrav return fetchListFile(URL, flags); 131ce71b736SDag-Erling Smørgrav else if (strcasecmp(URL->scheme, "http") == 0) 132ce71b736SDag-Erling Smørgrav return fetchListHTTP(URL, flags); 133ce71b736SDag-Erling Smørgrav else if (strcasecmp(URL->scheme, "ftp") == 0) 134ce71b736SDag-Erling Smørgrav return fetchListFTP(URL, flags); 135ce71b736SDag-Erling Smørgrav else { 136ce71b736SDag-Erling Smørgrav _url_seterr(URL_BAD_SCHEME); 137ce71b736SDag-Erling Smørgrav return NULL; 138ce71b736SDag-Erling Smørgrav } 139ce71b736SDag-Erling Smørgrav } 140ce71b736SDag-Erling Smørgrav 141ce71b736SDag-Erling Smørgrav /* 142842a95ccSDag-Erling Smørgrav * Attempt to parse the given URL; if successful, call fetchGet(). 143842a95ccSDag-Erling Smørgrav */ 1444ca1ab94SDag-Erling Smørgrav FILE * 1454ca1ab94SDag-Erling Smørgrav fetchGetURL(char *URL, char *flags) 1464ca1ab94SDag-Erling Smørgrav { 147d8acd8dcSDag-Erling Smørgrav struct url *u; 1484ca1ab94SDag-Erling Smørgrav FILE *f; 1494ca1ab94SDag-Erling Smørgrav 1504ca1ab94SDag-Erling Smørgrav if ((u = fetchParseURL(URL)) == NULL) 1514ca1ab94SDag-Erling Smørgrav return NULL; 1524ca1ab94SDag-Erling Smørgrav 153ecc91352SDag-Erling Smørgrav f = fetchGet(u, flags); 1544ca1ab94SDag-Erling Smørgrav 155842a95ccSDag-Erling Smørgrav free(u); 1564ca1ab94SDag-Erling Smørgrav return f; 1574ca1ab94SDag-Erling Smørgrav } 1584ca1ab94SDag-Erling Smørgrav 1594ca1ab94SDag-Erling Smørgrav 160842a95ccSDag-Erling Smørgrav /* 161842a95ccSDag-Erling Smørgrav * Attempt to parse the given URL; if successful, call fetchPut(). 162842a95ccSDag-Erling Smørgrav */ 1634ca1ab94SDag-Erling Smørgrav FILE * 1644ca1ab94SDag-Erling Smørgrav fetchPutURL(char *URL, char *flags) 1654ca1ab94SDag-Erling Smørgrav { 166d8acd8dcSDag-Erling Smørgrav struct url *u; 1674ca1ab94SDag-Erling Smørgrav FILE *f; 1684ca1ab94SDag-Erling Smørgrav 1694ca1ab94SDag-Erling Smørgrav if ((u = fetchParseURL(URL)) == NULL) 1704ca1ab94SDag-Erling Smørgrav return NULL; 1714ca1ab94SDag-Erling Smørgrav 172ecc91352SDag-Erling Smørgrav f = fetchPut(u, flags); 1734ca1ab94SDag-Erling Smørgrav 174842a95ccSDag-Erling Smørgrav free(u); 1754ca1ab94SDag-Erling Smørgrav return f; 1764ca1ab94SDag-Erling Smørgrav } 1774ca1ab94SDag-Erling Smørgrav 1784ca1ab94SDag-Erling Smørgrav /* 179d8acd8dcSDag-Erling Smørgrav * Attempt to parse the given URL; if successful, call fetchStat(). 180d8acd8dcSDag-Erling Smørgrav */ 181d8acd8dcSDag-Erling Smørgrav int 182d8acd8dcSDag-Erling Smørgrav fetchStatURL(char *URL, struct url_stat *us, char *flags) 183d8acd8dcSDag-Erling Smørgrav { 184d8acd8dcSDag-Erling Smørgrav struct url *u; 185d8acd8dcSDag-Erling Smørgrav int s; 186d8acd8dcSDag-Erling Smørgrav 187d8acd8dcSDag-Erling Smørgrav if ((u = fetchParseURL(URL)) == NULL) 188d8acd8dcSDag-Erling Smørgrav return -1; 189d8acd8dcSDag-Erling Smørgrav 190d8acd8dcSDag-Erling Smørgrav s = fetchStat(u, us, flags); 191d8acd8dcSDag-Erling Smørgrav 192d8acd8dcSDag-Erling Smørgrav free(u); 193d8acd8dcSDag-Erling Smørgrav return s; 194d8acd8dcSDag-Erling Smørgrav } 195d8acd8dcSDag-Erling Smørgrav 196d8acd8dcSDag-Erling Smørgrav /* 197ce71b736SDag-Erling Smørgrav * Attempt to parse the given URL; if successful, call fetchList(). 198ce71b736SDag-Erling Smørgrav */ 199ce71b736SDag-Erling Smørgrav struct url_ent * 200ce71b736SDag-Erling Smørgrav fetchListURL(char *URL, char *flags) 201ce71b736SDag-Erling Smørgrav { 202ce71b736SDag-Erling Smørgrav struct url *u; 203ce71b736SDag-Erling Smørgrav struct url_ent *ue; 204ce71b736SDag-Erling Smørgrav 205ce71b736SDag-Erling Smørgrav if ((u = fetchParseURL(URL)) == NULL) 206ce71b736SDag-Erling Smørgrav return NULL; 207ce71b736SDag-Erling Smørgrav 208ce71b736SDag-Erling Smørgrav ue = fetchList(u, flags); 209ce71b736SDag-Erling Smørgrav 210ce71b736SDag-Erling Smørgrav free(u); 211ce71b736SDag-Erling Smørgrav return ue; 212ce71b736SDag-Erling Smørgrav } 213ce71b736SDag-Erling Smørgrav 214ce71b736SDag-Erling Smørgrav /* 2154ca1ab94SDag-Erling Smørgrav * Split an URL into components. URL syntax is: 2164ca1ab94SDag-Erling Smørgrav * method:[//[user[:pwd]@]host[:port]]/[document] 2174ca1ab94SDag-Erling Smørgrav * This almost, but not quite, RFC1738 URL syntax. 2184ca1ab94SDag-Erling Smørgrav */ 219d8acd8dcSDag-Erling Smørgrav struct url * 2204ca1ab94SDag-Erling Smørgrav fetchParseURL(char *URL) 2214ca1ab94SDag-Erling Smørgrav { 2224ca1ab94SDag-Erling Smørgrav char *p, *q; 223d8acd8dcSDag-Erling Smørgrav struct url *u; 2244ca1ab94SDag-Erling Smørgrav int i; 2254ca1ab94SDag-Erling Smørgrav 226d8acd8dcSDag-Erling Smørgrav /* allocate struct url */ 227d8acd8dcSDag-Erling Smørgrav if ((u = calloc(1, sizeof(struct url))) == NULL) { 228d8acd8dcSDag-Erling Smørgrav errno = ENOMEM; 229d8acd8dcSDag-Erling Smørgrav _fetch_syserr(); 2304ca1ab94SDag-Erling Smørgrav return NULL; 231d8acd8dcSDag-Erling Smørgrav } 2324ca1ab94SDag-Erling Smørgrav 2334ca1ab94SDag-Erling Smørgrav /* scheme name */ 2344ca1ab94SDag-Erling Smørgrav for (i = 0; *URL && (*URL != ':'); URL++) 2354ca1ab94SDag-Erling Smørgrav if (i < URL_SCHEMELEN) 2364ca1ab94SDag-Erling Smørgrav u->scheme[i++] = *URL; 237d8acd8dcSDag-Erling Smørgrav if (!URL[0] || (URL[1] != '/')) { 238d8acd8dcSDag-Erling Smørgrav _url_seterr(URL_BAD_SCHEME); 2394ca1ab94SDag-Erling Smørgrav goto ouch; 240d8acd8dcSDag-Erling Smørgrav } 2414ca1ab94SDag-Erling Smørgrav else URL++; 2424ca1ab94SDag-Erling Smørgrav if (URL[1] != '/') { 2434ca1ab94SDag-Erling Smørgrav p = URL; 2444ca1ab94SDag-Erling Smørgrav goto nohost; 2454ca1ab94SDag-Erling Smørgrav } 2464ca1ab94SDag-Erling Smørgrav else URL += 2; 2474ca1ab94SDag-Erling Smørgrav 2484ca1ab94SDag-Erling Smørgrav p = strpbrk(URL, "/@"); 2490fba3a00SDag-Erling Smørgrav if (p && *p == '@') { 2504ca1ab94SDag-Erling Smørgrav /* username */ 2514ca1ab94SDag-Erling Smørgrav for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++) 2524ca1ab94SDag-Erling Smørgrav if (i < URL_USERLEN) 2534ca1ab94SDag-Erling Smørgrav u->user[i++] = *q; 2544ca1ab94SDag-Erling Smørgrav 2554ca1ab94SDag-Erling Smørgrav /* password */ 2564ca1ab94SDag-Erling Smørgrav if (*q == ':') 2574ca1ab94SDag-Erling Smørgrav for (q++, i = 0; (*q != ':') && (*q != '@'); q++) 2584ca1ab94SDag-Erling Smørgrav if (i < URL_PWDLEN) 2594ca1ab94SDag-Erling Smørgrav u->pwd[i++] = *q; 2604ca1ab94SDag-Erling Smørgrav 2614ca1ab94SDag-Erling Smørgrav p++; 2624ca1ab94SDag-Erling Smørgrav } else p = URL; 2634ca1ab94SDag-Erling Smørgrav 2644ca1ab94SDag-Erling Smørgrav /* hostname */ 2654ca1ab94SDag-Erling Smørgrav for (i = 0; *p && (*p != '/') && (*p != ':'); p++) 2664ca1ab94SDag-Erling Smørgrav if (i < MAXHOSTNAMELEN) 2674ca1ab94SDag-Erling Smørgrav u->host[i++] = *p; 2684ca1ab94SDag-Erling Smørgrav 2694ca1ab94SDag-Erling Smørgrav /* port */ 2704ca1ab94SDag-Erling Smørgrav if (*p == ':') { 2714ca1ab94SDag-Erling Smørgrav for (q = ++p; *q && (*q != '/'); q++) 2724ca1ab94SDag-Erling Smørgrav if (isdigit(*q)) 2734ca1ab94SDag-Erling Smørgrav u->port = u->port * 10 + (*q - '0'); 274d8acd8dcSDag-Erling Smørgrav else { 275d8acd8dcSDag-Erling Smørgrav /* invalid port */ 276d8acd8dcSDag-Erling Smørgrav _url_seterr(URL_BAD_PORT); 277d8acd8dcSDag-Erling Smørgrav goto ouch; 278d8acd8dcSDag-Erling Smørgrav } 2794ca1ab94SDag-Erling Smørgrav while (*p && (*p != '/')) 2804ca1ab94SDag-Erling Smørgrav p++; 2814ca1ab94SDag-Erling Smørgrav } 2824ca1ab94SDag-Erling Smørgrav 2834ca1ab94SDag-Erling Smørgrav nohost: 2844ca1ab94SDag-Erling Smørgrav /* document */ 285842a95ccSDag-Erling Smørgrav if (*p) { 286d8acd8dcSDag-Erling Smørgrav struct url *t; 287842a95ccSDag-Erling Smørgrav t = realloc(u, sizeof(*u)+strlen(p)-1); 288d8acd8dcSDag-Erling Smørgrav if (t == NULL) { 289d8acd8dcSDag-Erling Smørgrav errno = ENOMEM; 290d8acd8dcSDag-Erling Smørgrav _fetch_syserr(); 2914ca1ab94SDag-Erling Smørgrav goto ouch; 292d8acd8dcSDag-Erling Smørgrav } 293842a95ccSDag-Erling Smørgrav u = t; 294842a95ccSDag-Erling Smørgrav strcpy(u->doc, p); 295842a95ccSDag-Erling Smørgrav } else { 296842a95ccSDag-Erling Smørgrav u->doc[0] = '/'; 297842a95ccSDag-Erling Smørgrav u->doc[1] = 0; 298842a95ccSDag-Erling Smørgrav } 2994ca1ab94SDag-Erling Smørgrav 3004ca1ab94SDag-Erling Smørgrav DEBUG(fprintf(stderr, 3014ca1ab94SDag-Erling Smørgrav "scheme: [\033[1m%s\033[m]\n" 3024ca1ab94SDag-Erling Smørgrav "user: [\033[1m%s\033[m]\n" 3034ca1ab94SDag-Erling Smørgrav "password: [\033[1m%s\033[m]\n" 3044ca1ab94SDag-Erling Smørgrav "host: [\033[1m%s\033[m]\n" 3054ca1ab94SDag-Erling Smørgrav "port: [\033[1m%d\033[m]\n" 3064ca1ab94SDag-Erling Smørgrav "document: [\033[1m%s\033[m]\n", 3074ca1ab94SDag-Erling Smørgrav u->scheme, u->user, u->pwd, 3084ca1ab94SDag-Erling Smørgrav u->host, u->port, u->doc)); 3094ca1ab94SDag-Erling Smørgrav 3104ca1ab94SDag-Erling Smørgrav return u; 3114ca1ab94SDag-Erling Smørgrav 3124ca1ab94SDag-Erling Smørgrav ouch: 3134ca1ab94SDag-Erling Smørgrav free(u); 3144ca1ab94SDag-Erling Smørgrav return NULL; 3154ca1ab94SDag-Erling Smørgrav } 316