14ca1ab94SDag-Erling Smørgrav /*- 24ca1ab94SDag-Erling Smørgrav * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav 34ca1ab94SDag-Erling Smørgrav * All rights reserved. 44ca1ab94SDag-Erling Smørgrav * 54ca1ab94SDag-Erling Smørgrav * Redistribution and use in source and binary forms, with or without 64ca1ab94SDag-Erling Smørgrav * modification, are permitted provided that the following conditions 74ca1ab94SDag-Erling Smørgrav * are met: 84ca1ab94SDag-Erling Smørgrav * 1. Redistributions of source code must retain the above copyright 94ca1ab94SDag-Erling Smørgrav * notice, this list of conditions and the following disclaimer 104ca1ab94SDag-Erling Smørgrav * in this position and unchanged. 114ca1ab94SDag-Erling Smørgrav * 2. Redistributions in binary form must reproduce the above copyright 124ca1ab94SDag-Erling Smørgrav * notice, this list of conditions and the following disclaimer in the 134ca1ab94SDag-Erling Smørgrav * documentation and/or other materials provided with the distribution. 144ca1ab94SDag-Erling Smørgrav * 3. The name of the author may not be used to endorse or promote products 154ca1ab94SDag-Erling Smørgrav * derived from this software without specific prior written permission 164ca1ab94SDag-Erling Smørgrav * 174ca1ab94SDag-Erling Smørgrav * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 184ca1ab94SDag-Erling Smørgrav * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 194ca1ab94SDag-Erling Smørgrav * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 204ca1ab94SDag-Erling Smørgrav * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 214ca1ab94SDag-Erling Smørgrav * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 224ca1ab94SDag-Erling Smørgrav * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 234ca1ab94SDag-Erling Smørgrav * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 244ca1ab94SDag-Erling Smørgrav * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 254ca1ab94SDag-Erling Smørgrav * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 264ca1ab94SDag-Erling Smørgrav * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 274ca1ab94SDag-Erling Smørgrav * 280fba3a00SDag-Erling Smørgrav * $Id: fetch.c,v 1.6 1998/11/06 22:14:08 des Exp $ 294ca1ab94SDag-Erling Smørgrav */ 304ca1ab94SDag-Erling Smørgrav 314ca1ab94SDag-Erling Smørgrav #include <sys/param.h> 32d8acd8dcSDag-Erling Smørgrav #include <sys/errno.h> 334ca1ab94SDag-Erling Smørgrav 344ca1ab94SDag-Erling Smørgrav #include <ctype.h> 354ca1ab94SDag-Erling Smørgrav #include <stdio.h> 364ca1ab94SDag-Erling Smørgrav #include <stdlib.h> 374ca1ab94SDag-Erling Smørgrav #include <string.h> 384ca1ab94SDag-Erling Smørgrav 394ca1ab94SDag-Erling Smørgrav #include "fetch.h" 40d8acd8dcSDag-Erling Smørgrav #include "common.h" 414ca1ab94SDag-Erling Smørgrav 424ca1ab94SDag-Erling Smørgrav 430fba3a00SDag-Erling Smørgrav int fetchLastErrCode; 440fba3a00SDag-Erling Smørgrav 450fba3a00SDag-Erling Smørgrav 46d8acd8dcSDag-Erling Smørgrav /*** Local data **************************************************************/ 47d8acd8dcSDag-Erling Smørgrav 48d8acd8dcSDag-Erling Smørgrav /* 49d8acd8dcSDag-Erling Smørgrav * Error messages for parser errors 50d8acd8dcSDag-Erling Smørgrav */ 51d8acd8dcSDag-Erling Smørgrav #define URL_MALFORMED 1 52d8acd8dcSDag-Erling Smørgrav #define URL_BAD_SCHEME 2 53d8acd8dcSDag-Erling Smørgrav #define URL_BAD_PORT 3 54d8acd8dcSDag-Erling Smørgrav static struct fetcherr _url_errlist[] = { 55d8acd8dcSDag-Erling Smørgrav { URL_MALFORMED, FETCH_URL, "Malformed URL" }, 56d8acd8dcSDag-Erling Smørgrav { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, 57d8acd8dcSDag-Erling Smørgrav { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, 58d8acd8dcSDag-Erling Smørgrav { -1, FETCH_UNKNOWN, "Unknown parser error" } 59d8acd8dcSDag-Erling Smørgrav }; 60d8acd8dcSDag-Erling Smørgrav 61d8acd8dcSDag-Erling Smørgrav 62d8acd8dcSDag-Erling Smørgrav /*** Public API **************************************************************/ 634ca1ab94SDag-Erling Smørgrav 64842a95ccSDag-Erling Smørgrav /* 65842a95ccSDag-Erling Smørgrav * Select the appropriate protocol for the URL scheme, and return a 66842a95ccSDag-Erling Smørgrav * read-only stream connected to the document referenced by the URL. 67842a95ccSDag-Erling Smørgrav */ 68ecc91352SDag-Erling Smørgrav FILE * 69d8acd8dcSDag-Erling Smørgrav fetchGet(struct url *URL, char *flags) 70ecc91352SDag-Erling Smørgrav { 71ecc91352SDag-Erling Smørgrav if (strcasecmp(URL->scheme, "file") == 0) 72ecc91352SDag-Erling Smørgrav return fetchGetFile(URL, flags); 73ecc91352SDag-Erling Smørgrav else if (strcasecmp(URL->scheme, "http") == 0) 74ecc91352SDag-Erling Smørgrav return fetchGetHTTP(URL, flags); 75ecc91352SDag-Erling Smørgrav else if (strcasecmp(URL->scheme, "ftp") == 0) 76ecc91352SDag-Erling Smørgrav return fetchGetFTP(URL, flags); 77d8acd8dcSDag-Erling Smørgrav else { 78d8acd8dcSDag-Erling Smørgrav _url_seterr(URL_BAD_SCHEME); 79d8acd8dcSDag-Erling Smørgrav return NULL; 80d8acd8dcSDag-Erling Smørgrav } 81ecc91352SDag-Erling Smørgrav } 82ecc91352SDag-Erling Smørgrav 83842a95ccSDag-Erling Smørgrav /* 84842a95ccSDag-Erling Smørgrav * Select the appropriate protocol for the URL scheme, and return a 85842a95ccSDag-Erling Smørgrav * write-only stream connected to the document referenced by the URL. 86842a95ccSDag-Erling Smørgrav */ 87ecc91352SDag-Erling Smørgrav FILE * 88d8acd8dcSDag-Erling Smørgrav fetchPut(struct url *URL, char *flags) 89ecc91352SDag-Erling Smørgrav { 90ecc91352SDag-Erling Smørgrav if (strcasecmp(URL->scheme, "file") == 0) 91ecc91352SDag-Erling Smørgrav return fetchPutFile(URL, flags); 92ecc91352SDag-Erling Smørgrav else if (strcasecmp(URL->scheme, "http") == 0) 93ecc91352SDag-Erling Smørgrav return fetchPutHTTP(URL, flags); 94ecc91352SDag-Erling Smørgrav else if (strcasecmp(URL->scheme, "ftp") == 0) 95ecc91352SDag-Erling Smørgrav return fetchPutFTP(URL, flags); 96d8acd8dcSDag-Erling Smørgrav else { 97d8acd8dcSDag-Erling Smørgrav _url_seterr(URL_BAD_SCHEME); 98d8acd8dcSDag-Erling Smørgrav return NULL; 99d8acd8dcSDag-Erling Smørgrav } 100d8acd8dcSDag-Erling Smørgrav } 101d8acd8dcSDag-Erling Smørgrav 102d8acd8dcSDag-Erling Smørgrav /* 103d8acd8dcSDag-Erling Smørgrav * Select the appropriate protocol for the URL scheme, and return the 104d8acd8dcSDag-Erling Smørgrav * size of the document referenced by the URL if it exists. 105d8acd8dcSDag-Erling Smørgrav */ 106d8acd8dcSDag-Erling Smørgrav int 107d8acd8dcSDag-Erling Smørgrav fetchStat(struct url *URL, struct url_stat *us, char *flags) 108d8acd8dcSDag-Erling Smørgrav { 109d8acd8dcSDag-Erling Smørgrav if (strcasecmp(URL->scheme, "file") == 0) 110d8acd8dcSDag-Erling Smørgrav return fetchStatFile(URL, us, flags); 111d8acd8dcSDag-Erling Smørgrav else if (strcasecmp(URL->scheme, "http") == 0) 112d8acd8dcSDag-Erling Smørgrav return fetchStatHTTP(URL, us, flags); 113d8acd8dcSDag-Erling Smørgrav else if (strcasecmp(URL->scheme, "ftp") == 0) 114d8acd8dcSDag-Erling Smørgrav return fetchStatFTP(URL, us, flags); 115d8acd8dcSDag-Erling Smørgrav else { 116d8acd8dcSDag-Erling Smørgrav _url_seterr(URL_BAD_SCHEME); 117d8acd8dcSDag-Erling Smørgrav return -1; 118d8acd8dcSDag-Erling Smørgrav } 119ecc91352SDag-Erling Smørgrav } 120ecc91352SDag-Erling Smørgrav 121842a95ccSDag-Erling Smørgrav /* 122842a95ccSDag-Erling Smørgrav * Attempt to parse the given URL; if successful, call fetchGet(). 123842a95ccSDag-Erling Smørgrav */ 1244ca1ab94SDag-Erling Smørgrav FILE * 1254ca1ab94SDag-Erling Smørgrav fetchGetURL(char *URL, char *flags) 1264ca1ab94SDag-Erling Smørgrav { 127d8acd8dcSDag-Erling Smørgrav struct url *u; 1284ca1ab94SDag-Erling Smørgrav FILE *f; 1294ca1ab94SDag-Erling Smørgrav 1304ca1ab94SDag-Erling Smørgrav if ((u = fetchParseURL(URL)) == NULL) 1314ca1ab94SDag-Erling Smørgrav return NULL; 1324ca1ab94SDag-Erling Smørgrav 133ecc91352SDag-Erling Smørgrav f = fetchGet(u, flags); 1344ca1ab94SDag-Erling Smørgrav 135842a95ccSDag-Erling Smørgrav free(u); 1364ca1ab94SDag-Erling Smørgrav return f; 1374ca1ab94SDag-Erling Smørgrav } 1384ca1ab94SDag-Erling Smørgrav 1394ca1ab94SDag-Erling Smørgrav 140842a95ccSDag-Erling Smørgrav /* 141842a95ccSDag-Erling Smørgrav * Attempt to parse the given URL; if successful, call fetchPut(). 142842a95ccSDag-Erling Smørgrav */ 1434ca1ab94SDag-Erling Smørgrav FILE * 1444ca1ab94SDag-Erling Smørgrav fetchPutURL(char *URL, char *flags) 1454ca1ab94SDag-Erling Smørgrav { 146d8acd8dcSDag-Erling Smørgrav struct url *u; 1474ca1ab94SDag-Erling Smørgrav FILE *f; 1484ca1ab94SDag-Erling Smørgrav 1494ca1ab94SDag-Erling Smørgrav if ((u = fetchParseURL(URL)) == NULL) 1504ca1ab94SDag-Erling Smørgrav return NULL; 1514ca1ab94SDag-Erling Smørgrav 152ecc91352SDag-Erling Smørgrav f = fetchPut(u, flags); 1534ca1ab94SDag-Erling Smørgrav 154842a95ccSDag-Erling Smørgrav free(u); 1554ca1ab94SDag-Erling Smørgrav return f; 1564ca1ab94SDag-Erling Smørgrav } 1574ca1ab94SDag-Erling Smørgrav 1584ca1ab94SDag-Erling Smørgrav /* 159d8acd8dcSDag-Erling Smørgrav * Attempt to parse the given URL; if successful, call fetchStat(). 160d8acd8dcSDag-Erling Smørgrav */ 161d8acd8dcSDag-Erling Smørgrav int 162d8acd8dcSDag-Erling Smørgrav fetchStatURL(char *URL, struct url_stat *us, char *flags) 163d8acd8dcSDag-Erling Smørgrav { 164d8acd8dcSDag-Erling Smørgrav struct url *u; 165d8acd8dcSDag-Erling Smørgrav int s; 166d8acd8dcSDag-Erling Smørgrav 167d8acd8dcSDag-Erling Smørgrav if ((u = fetchParseURL(URL)) == NULL) 168d8acd8dcSDag-Erling Smørgrav return -1; 169d8acd8dcSDag-Erling Smørgrav 170d8acd8dcSDag-Erling Smørgrav s = fetchStat(u, us, flags); 171d8acd8dcSDag-Erling Smørgrav 172d8acd8dcSDag-Erling Smørgrav free(u); 173d8acd8dcSDag-Erling Smørgrav return s; 174d8acd8dcSDag-Erling Smørgrav } 175d8acd8dcSDag-Erling Smørgrav 176d8acd8dcSDag-Erling Smørgrav /* 1774ca1ab94SDag-Erling Smørgrav * Split an URL into components. URL syntax is: 1784ca1ab94SDag-Erling Smørgrav * method:[//[user[:pwd]@]host[:port]]/[document] 1794ca1ab94SDag-Erling Smørgrav * This almost, but not quite, RFC1738 URL syntax. 1804ca1ab94SDag-Erling Smørgrav */ 181d8acd8dcSDag-Erling Smørgrav struct url * 1824ca1ab94SDag-Erling Smørgrav fetchParseURL(char *URL) 1834ca1ab94SDag-Erling Smørgrav { 1844ca1ab94SDag-Erling Smørgrav char *p, *q; 185d8acd8dcSDag-Erling Smørgrav struct url *u; 1864ca1ab94SDag-Erling Smørgrav int i; 1874ca1ab94SDag-Erling Smørgrav 188d8acd8dcSDag-Erling Smørgrav /* allocate struct url */ 189d8acd8dcSDag-Erling Smørgrav if ((u = calloc(1, sizeof(struct url))) == NULL) { 190d8acd8dcSDag-Erling Smørgrav errno = ENOMEM; 191d8acd8dcSDag-Erling Smørgrav _fetch_syserr(); 1924ca1ab94SDag-Erling Smørgrav return NULL; 193d8acd8dcSDag-Erling Smørgrav } 1944ca1ab94SDag-Erling Smørgrav 1954ca1ab94SDag-Erling Smørgrav /* scheme name */ 1964ca1ab94SDag-Erling Smørgrav for (i = 0; *URL && (*URL != ':'); URL++) 1974ca1ab94SDag-Erling Smørgrav if (i < URL_SCHEMELEN) 1984ca1ab94SDag-Erling Smørgrav u->scheme[i++] = *URL; 199d8acd8dcSDag-Erling Smørgrav if (!URL[0] || (URL[1] != '/')) { 200d8acd8dcSDag-Erling Smørgrav _url_seterr(URL_BAD_SCHEME); 2014ca1ab94SDag-Erling Smørgrav goto ouch; 202d8acd8dcSDag-Erling Smørgrav } 2034ca1ab94SDag-Erling Smørgrav else URL++; 2044ca1ab94SDag-Erling Smørgrav if (URL[1] != '/') { 2054ca1ab94SDag-Erling Smørgrav p = URL; 2064ca1ab94SDag-Erling Smørgrav goto nohost; 2074ca1ab94SDag-Erling Smørgrav } 2084ca1ab94SDag-Erling Smørgrav else URL += 2; 2094ca1ab94SDag-Erling Smørgrav 2104ca1ab94SDag-Erling Smørgrav p = strpbrk(URL, "/@"); 2110fba3a00SDag-Erling Smørgrav if (p && *p == '@') { 2124ca1ab94SDag-Erling Smørgrav /* username */ 2134ca1ab94SDag-Erling Smørgrav for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++) 2144ca1ab94SDag-Erling Smørgrav if (i < URL_USERLEN) 2154ca1ab94SDag-Erling Smørgrav u->user[i++] = *q; 2164ca1ab94SDag-Erling Smørgrav 2174ca1ab94SDag-Erling Smørgrav /* password */ 2184ca1ab94SDag-Erling Smørgrav if (*q == ':') 2194ca1ab94SDag-Erling Smørgrav for (q++, i = 0; (*q != ':') && (*q != '@'); q++) 2204ca1ab94SDag-Erling Smørgrav if (i < URL_PWDLEN) 2214ca1ab94SDag-Erling Smørgrav u->pwd[i++] = *q; 2224ca1ab94SDag-Erling Smørgrav 2234ca1ab94SDag-Erling Smørgrav p++; 2244ca1ab94SDag-Erling Smørgrav } else p = URL; 2254ca1ab94SDag-Erling Smørgrav 2264ca1ab94SDag-Erling Smørgrav /* hostname */ 2274ca1ab94SDag-Erling Smørgrav for (i = 0; *p && (*p != '/') && (*p != ':'); p++) 2284ca1ab94SDag-Erling Smørgrav if (i < MAXHOSTNAMELEN) 2294ca1ab94SDag-Erling Smørgrav u->host[i++] = *p; 2304ca1ab94SDag-Erling Smørgrav 2314ca1ab94SDag-Erling Smørgrav /* port */ 2324ca1ab94SDag-Erling Smørgrav if (*p == ':') { 2334ca1ab94SDag-Erling Smørgrav for (q = ++p; *q && (*q != '/'); q++) 2344ca1ab94SDag-Erling Smørgrav if (isdigit(*q)) 2354ca1ab94SDag-Erling Smørgrav u->port = u->port * 10 + (*q - '0'); 236d8acd8dcSDag-Erling Smørgrav else { 237d8acd8dcSDag-Erling Smørgrav /* invalid port */ 238d8acd8dcSDag-Erling Smørgrav _url_seterr(URL_BAD_PORT); 239d8acd8dcSDag-Erling Smørgrav goto ouch; 240d8acd8dcSDag-Erling Smørgrav } 2414ca1ab94SDag-Erling Smørgrav while (*p && (*p != '/')) 2424ca1ab94SDag-Erling Smørgrav p++; 2434ca1ab94SDag-Erling Smørgrav } 2444ca1ab94SDag-Erling Smørgrav 2454ca1ab94SDag-Erling Smørgrav nohost: 2464ca1ab94SDag-Erling Smørgrav /* document */ 247842a95ccSDag-Erling Smørgrav if (*p) { 248d8acd8dcSDag-Erling Smørgrav struct url *t; 249842a95ccSDag-Erling Smørgrav t = realloc(u, sizeof(*u)+strlen(p)-1); 250d8acd8dcSDag-Erling Smørgrav if (t == NULL) { 251d8acd8dcSDag-Erling Smørgrav errno = ENOMEM; 252d8acd8dcSDag-Erling Smørgrav _fetch_syserr(); 2534ca1ab94SDag-Erling Smørgrav goto ouch; 254d8acd8dcSDag-Erling Smørgrav } 255842a95ccSDag-Erling Smørgrav u = t; 256842a95ccSDag-Erling Smørgrav strcpy(u->doc, p); 257842a95ccSDag-Erling Smørgrav } else { 258842a95ccSDag-Erling Smørgrav u->doc[0] = '/'; 259842a95ccSDag-Erling Smørgrav u->doc[1] = 0; 260842a95ccSDag-Erling Smørgrav } 2614ca1ab94SDag-Erling Smørgrav 2624ca1ab94SDag-Erling Smørgrav DEBUG(fprintf(stderr, 2634ca1ab94SDag-Erling Smørgrav "scheme: [\033[1m%s\033[m]\n" 2644ca1ab94SDag-Erling Smørgrav "user: [\033[1m%s\033[m]\n" 2654ca1ab94SDag-Erling Smørgrav "password: [\033[1m%s\033[m]\n" 2664ca1ab94SDag-Erling Smørgrav "host: [\033[1m%s\033[m]\n" 2674ca1ab94SDag-Erling Smørgrav "port: [\033[1m%d\033[m]\n" 2684ca1ab94SDag-Erling Smørgrav "document: [\033[1m%s\033[m]\n", 2694ca1ab94SDag-Erling Smørgrav u->scheme, u->user, u->pwd, 2704ca1ab94SDag-Erling Smørgrav u->host, u->port, u->doc)); 2714ca1ab94SDag-Erling Smørgrav 2724ca1ab94SDag-Erling Smørgrav return u; 2734ca1ab94SDag-Erling Smørgrav 2744ca1ab94SDag-Erling Smørgrav ouch: 2754ca1ab94SDag-Erling Smørgrav free(u); 2764ca1ab94SDag-Erling Smørgrav return NULL; 2774ca1ab94SDag-Erling Smørgrav } 278