14ca1ab94SDag-Erling Smørgrav /*- 24ca1ab94SDag-Erling Smørgrav * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav 34ca1ab94SDag-Erling Smørgrav * All rights reserved. 44ca1ab94SDag-Erling Smørgrav * 54ca1ab94SDag-Erling Smørgrav * Redistribution and use in source and binary forms, with or without 64ca1ab94SDag-Erling Smørgrav * modification, are permitted provided that the following conditions 74ca1ab94SDag-Erling Smørgrav * are met: 84ca1ab94SDag-Erling Smørgrav * 1. Redistributions of source code must retain the above copyright 94ca1ab94SDag-Erling Smørgrav * notice, this list of conditions and the following disclaimer 104ca1ab94SDag-Erling Smørgrav * in this position and unchanged. 114ca1ab94SDag-Erling Smørgrav * 2. Redistributions in binary form must reproduce the above copyright 124ca1ab94SDag-Erling Smørgrav * notice, this list of conditions and the following disclaimer in the 134ca1ab94SDag-Erling Smørgrav * documentation and/or other materials provided with the distribution. 144ca1ab94SDag-Erling Smørgrav * 3. The name of the author may not be used to endorse or promote products 154ca1ab94SDag-Erling Smørgrav * derived from this software without specific prior written permission 164ca1ab94SDag-Erling Smørgrav * 174ca1ab94SDag-Erling Smørgrav * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 184ca1ab94SDag-Erling Smørgrav * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 194ca1ab94SDag-Erling Smørgrav * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 204ca1ab94SDag-Erling Smørgrav * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 214ca1ab94SDag-Erling Smørgrav * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 224ca1ab94SDag-Erling Smørgrav * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 234ca1ab94SDag-Erling Smørgrav * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 244ca1ab94SDag-Erling Smørgrav * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 254ca1ab94SDag-Erling Smørgrav * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 264ca1ab94SDag-Erling Smørgrav * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 274ca1ab94SDag-Erling Smørgrav * 28d8acd8dcSDag-Erling Smørgrav * $Id: fetch.c,v 1.5 1998/11/05 19:48:17 des Exp $ 294ca1ab94SDag-Erling Smørgrav */ 304ca1ab94SDag-Erling Smørgrav 314ca1ab94SDag-Erling Smørgrav #include <sys/param.h> 32d8acd8dcSDag-Erling Smørgrav #include <sys/errno.h> 334ca1ab94SDag-Erling Smørgrav 344ca1ab94SDag-Erling Smørgrav #include <ctype.h> 354ca1ab94SDag-Erling Smørgrav #include <stdio.h> 364ca1ab94SDag-Erling Smørgrav #include <stdlib.h> 374ca1ab94SDag-Erling Smørgrav #include <string.h> 384ca1ab94SDag-Erling Smørgrav 394ca1ab94SDag-Erling Smørgrav #include "fetch.h" 40d8acd8dcSDag-Erling Smørgrav #include "common.h" 414ca1ab94SDag-Erling Smørgrav 424ca1ab94SDag-Erling Smørgrav 43d8acd8dcSDag-Erling Smørgrav /*** Local data **************************************************************/ 44d8acd8dcSDag-Erling Smørgrav 45d8acd8dcSDag-Erling Smørgrav /* 46d8acd8dcSDag-Erling Smørgrav * Error messages for parser errors 47d8acd8dcSDag-Erling Smørgrav */ 48d8acd8dcSDag-Erling Smørgrav #define URL_MALFORMED 1 49d8acd8dcSDag-Erling Smørgrav #define URL_BAD_SCHEME 2 50d8acd8dcSDag-Erling Smørgrav #define URL_BAD_PORT 3 51d8acd8dcSDag-Erling Smørgrav static struct fetcherr _url_errlist[] = { 52d8acd8dcSDag-Erling Smørgrav { URL_MALFORMED, FETCH_URL, "Malformed URL" }, 53d8acd8dcSDag-Erling Smørgrav { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, 54d8acd8dcSDag-Erling Smørgrav { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, 55d8acd8dcSDag-Erling Smørgrav { -1, FETCH_UNKNOWN, "Unknown parser error" } 56d8acd8dcSDag-Erling Smørgrav }; 57d8acd8dcSDag-Erling Smørgrav 58d8acd8dcSDag-Erling Smørgrav 59d8acd8dcSDag-Erling Smørgrav /*** Public API **************************************************************/ 604ca1ab94SDag-Erling Smørgrav 61842a95ccSDag-Erling Smørgrav /* 62842a95ccSDag-Erling Smørgrav * Select the appropriate protocol for the URL scheme, and return a 63842a95ccSDag-Erling Smørgrav * read-only stream connected to the document referenced by the URL. 64842a95ccSDag-Erling Smørgrav */ 65ecc91352SDag-Erling Smørgrav FILE * 66d8acd8dcSDag-Erling Smørgrav fetchGet(struct url *URL, char *flags) 67ecc91352SDag-Erling Smørgrav { 68ecc91352SDag-Erling Smørgrav if (strcasecmp(URL->scheme, "file") == 0) 69ecc91352SDag-Erling Smørgrav return fetchGetFile(URL, flags); 70ecc91352SDag-Erling Smørgrav else if (strcasecmp(URL->scheme, "http") == 0) 71ecc91352SDag-Erling Smørgrav return fetchGetHTTP(URL, flags); 72ecc91352SDag-Erling Smørgrav else if (strcasecmp(URL->scheme, "ftp") == 0) 73ecc91352SDag-Erling Smørgrav return fetchGetFTP(URL, flags); 74d8acd8dcSDag-Erling Smørgrav else { 75d8acd8dcSDag-Erling Smørgrav _url_seterr(URL_BAD_SCHEME); 76d8acd8dcSDag-Erling Smørgrav return NULL; 77d8acd8dcSDag-Erling Smørgrav } 78ecc91352SDag-Erling Smørgrav } 79ecc91352SDag-Erling Smørgrav 80842a95ccSDag-Erling Smørgrav /* 81842a95ccSDag-Erling Smørgrav * Select the appropriate protocol for the URL scheme, and return a 82842a95ccSDag-Erling Smørgrav * write-only stream connected to the document referenced by the URL. 83842a95ccSDag-Erling Smørgrav */ 84ecc91352SDag-Erling Smørgrav FILE * 85d8acd8dcSDag-Erling Smørgrav fetchPut(struct url *URL, char *flags) 86ecc91352SDag-Erling Smørgrav { 87ecc91352SDag-Erling Smørgrav if (strcasecmp(URL->scheme, "file") == 0) 88ecc91352SDag-Erling Smørgrav return fetchPutFile(URL, flags); 89ecc91352SDag-Erling Smørgrav else if (strcasecmp(URL->scheme, "http") == 0) 90ecc91352SDag-Erling Smørgrav return fetchPutHTTP(URL, flags); 91ecc91352SDag-Erling Smørgrav else if (strcasecmp(URL->scheme, "ftp") == 0) 92ecc91352SDag-Erling Smørgrav return fetchPutFTP(URL, flags); 93d8acd8dcSDag-Erling Smørgrav else { 94d8acd8dcSDag-Erling Smørgrav _url_seterr(URL_BAD_SCHEME); 95d8acd8dcSDag-Erling Smørgrav return NULL; 96d8acd8dcSDag-Erling Smørgrav } 97d8acd8dcSDag-Erling Smørgrav } 98d8acd8dcSDag-Erling Smørgrav 99d8acd8dcSDag-Erling Smørgrav /* 100d8acd8dcSDag-Erling Smørgrav * Select the appropriate protocol for the URL scheme, and return the 101d8acd8dcSDag-Erling Smørgrav * size of the document referenced by the URL if it exists. 102d8acd8dcSDag-Erling Smørgrav */ 103d8acd8dcSDag-Erling Smørgrav int 104d8acd8dcSDag-Erling Smørgrav fetchStat(struct url *URL, struct url_stat *us, char *flags) 105d8acd8dcSDag-Erling Smørgrav { 106d8acd8dcSDag-Erling Smørgrav if (strcasecmp(URL->scheme, "file") == 0) 107d8acd8dcSDag-Erling Smørgrav return fetchStatFile(URL, us, flags); 108d8acd8dcSDag-Erling Smørgrav else if (strcasecmp(URL->scheme, "http") == 0) 109d8acd8dcSDag-Erling Smørgrav return fetchStatHTTP(URL, us, flags); 110d8acd8dcSDag-Erling Smørgrav else if (strcasecmp(URL->scheme, "ftp") == 0) 111d8acd8dcSDag-Erling Smørgrav return fetchStatFTP(URL, us, flags); 112d8acd8dcSDag-Erling Smørgrav else { 113d8acd8dcSDag-Erling Smørgrav _url_seterr(URL_BAD_SCHEME); 114d8acd8dcSDag-Erling Smørgrav return -1; 115d8acd8dcSDag-Erling Smørgrav } 116ecc91352SDag-Erling Smørgrav } 117ecc91352SDag-Erling Smørgrav 118842a95ccSDag-Erling Smørgrav /* 119842a95ccSDag-Erling Smørgrav * Attempt to parse the given URL; if successful, call fetchGet(). 120842a95ccSDag-Erling Smørgrav */ 1214ca1ab94SDag-Erling Smørgrav FILE * 1224ca1ab94SDag-Erling Smørgrav fetchGetURL(char *URL, char *flags) 1234ca1ab94SDag-Erling Smørgrav { 124d8acd8dcSDag-Erling Smørgrav struct url *u; 1254ca1ab94SDag-Erling Smørgrav FILE *f; 1264ca1ab94SDag-Erling Smørgrav 1274ca1ab94SDag-Erling Smørgrav if ((u = fetchParseURL(URL)) == NULL) 1284ca1ab94SDag-Erling Smørgrav return NULL; 1294ca1ab94SDag-Erling Smørgrav 130ecc91352SDag-Erling Smørgrav f = fetchGet(u, flags); 1314ca1ab94SDag-Erling Smørgrav 132842a95ccSDag-Erling Smørgrav free(u); 1334ca1ab94SDag-Erling Smørgrav return f; 1344ca1ab94SDag-Erling Smørgrav } 1354ca1ab94SDag-Erling Smørgrav 1364ca1ab94SDag-Erling Smørgrav 137842a95ccSDag-Erling Smørgrav /* 138842a95ccSDag-Erling Smørgrav * Attempt to parse the given URL; if successful, call fetchPut(). 139842a95ccSDag-Erling Smørgrav */ 1404ca1ab94SDag-Erling Smørgrav FILE * 1414ca1ab94SDag-Erling Smørgrav fetchPutURL(char *URL, char *flags) 1424ca1ab94SDag-Erling Smørgrav { 143d8acd8dcSDag-Erling Smørgrav struct url *u; 1444ca1ab94SDag-Erling Smørgrav FILE *f; 1454ca1ab94SDag-Erling Smørgrav 1464ca1ab94SDag-Erling Smørgrav if ((u = fetchParseURL(URL)) == NULL) 1474ca1ab94SDag-Erling Smørgrav return NULL; 1484ca1ab94SDag-Erling Smørgrav 149ecc91352SDag-Erling Smørgrav f = fetchPut(u, flags); 1504ca1ab94SDag-Erling Smørgrav 151842a95ccSDag-Erling Smørgrav free(u); 1524ca1ab94SDag-Erling Smørgrav return f; 1534ca1ab94SDag-Erling Smørgrav } 1544ca1ab94SDag-Erling Smørgrav 1554ca1ab94SDag-Erling Smørgrav /* 156d8acd8dcSDag-Erling Smørgrav * Attempt to parse the given URL; if successful, call fetchStat(). 157d8acd8dcSDag-Erling Smørgrav */ 158d8acd8dcSDag-Erling Smørgrav int 159d8acd8dcSDag-Erling Smørgrav fetchStatURL(char *URL, struct url_stat *us, char *flags) 160d8acd8dcSDag-Erling Smørgrav { 161d8acd8dcSDag-Erling Smørgrav struct url *u; 162d8acd8dcSDag-Erling Smørgrav int s; 163d8acd8dcSDag-Erling Smørgrav 164d8acd8dcSDag-Erling Smørgrav if ((u = fetchParseURL(URL)) == NULL) 165d8acd8dcSDag-Erling Smørgrav return -1; 166d8acd8dcSDag-Erling Smørgrav 167d8acd8dcSDag-Erling Smørgrav s = fetchStat(u, us, flags); 168d8acd8dcSDag-Erling Smørgrav 169d8acd8dcSDag-Erling Smørgrav free(u); 170d8acd8dcSDag-Erling Smørgrav return s; 171d8acd8dcSDag-Erling Smørgrav } 172d8acd8dcSDag-Erling Smørgrav 173d8acd8dcSDag-Erling Smørgrav /* 1744ca1ab94SDag-Erling Smørgrav * Split an URL into components. URL syntax is: 1754ca1ab94SDag-Erling Smørgrav * method:[//[user[:pwd]@]host[:port]]/[document] 1764ca1ab94SDag-Erling Smørgrav * This almost, but not quite, RFC1738 URL syntax. 1774ca1ab94SDag-Erling Smørgrav */ 178d8acd8dcSDag-Erling Smørgrav struct url * 1794ca1ab94SDag-Erling Smørgrav fetchParseURL(char *URL) 1804ca1ab94SDag-Erling Smørgrav { 1814ca1ab94SDag-Erling Smørgrav char *p, *q; 182d8acd8dcSDag-Erling Smørgrav struct url *u; 1834ca1ab94SDag-Erling Smørgrav int i; 1844ca1ab94SDag-Erling Smørgrav 185d8acd8dcSDag-Erling Smørgrav /* allocate struct url */ 186d8acd8dcSDag-Erling Smørgrav if ((u = calloc(1, sizeof(struct url))) == NULL) { 187d8acd8dcSDag-Erling Smørgrav errno = ENOMEM; 188d8acd8dcSDag-Erling Smørgrav _fetch_syserr(); 1894ca1ab94SDag-Erling Smørgrav return NULL; 190d8acd8dcSDag-Erling Smørgrav } 1914ca1ab94SDag-Erling Smørgrav 1924ca1ab94SDag-Erling Smørgrav /* scheme name */ 1934ca1ab94SDag-Erling Smørgrav for (i = 0; *URL && (*URL != ':'); URL++) 1944ca1ab94SDag-Erling Smørgrav if (i < URL_SCHEMELEN) 1954ca1ab94SDag-Erling Smørgrav u->scheme[i++] = *URL; 196d8acd8dcSDag-Erling Smørgrav if (!URL[0] || (URL[1] != '/')) { 197d8acd8dcSDag-Erling Smørgrav _url_seterr(URL_BAD_SCHEME); 1984ca1ab94SDag-Erling Smørgrav goto ouch; 199d8acd8dcSDag-Erling Smørgrav } 2004ca1ab94SDag-Erling Smørgrav else URL++; 2014ca1ab94SDag-Erling Smørgrav if (URL[1] != '/') { 2024ca1ab94SDag-Erling Smørgrav p = URL; 2034ca1ab94SDag-Erling Smørgrav goto nohost; 2044ca1ab94SDag-Erling Smørgrav } 2054ca1ab94SDag-Erling Smørgrav else URL += 2; 2064ca1ab94SDag-Erling Smørgrav 2074ca1ab94SDag-Erling Smørgrav p = strpbrk(URL, "/@"); 2084ca1ab94SDag-Erling Smørgrav if (*p == '@') { 2094ca1ab94SDag-Erling Smørgrav /* username */ 2104ca1ab94SDag-Erling Smørgrav for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++) 2114ca1ab94SDag-Erling Smørgrav if (i < URL_USERLEN) 2124ca1ab94SDag-Erling Smørgrav u->user[i++] = *q; 2134ca1ab94SDag-Erling Smørgrav 2144ca1ab94SDag-Erling Smørgrav /* password */ 2154ca1ab94SDag-Erling Smørgrav if (*q == ':') 2164ca1ab94SDag-Erling Smørgrav for (q++, i = 0; (*q != ':') && (*q != '@'); q++) 2174ca1ab94SDag-Erling Smørgrav if (i < URL_PWDLEN) 2184ca1ab94SDag-Erling Smørgrav u->pwd[i++] = *q; 2194ca1ab94SDag-Erling Smørgrav 2204ca1ab94SDag-Erling Smørgrav p++; 2214ca1ab94SDag-Erling Smørgrav } else p = URL; 2224ca1ab94SDag-Erling Smørgrav 2234ca1ab94SDag-Erling Smørgrav /* hostname */ 2244ca1ab94SDag-Erling Smørgrav for (i = 0; *p && (*p != '/') && (*p != ':'); p++) 2254ca1ab94SDag-Erling Smørgrav if (i < MAXHOSTNAMELEN) 2264ca1ab94SDag-Erling Smørgrav u->host[i++] = *p; 2274ca1ab94SDag-Erling Smørgrav 2284ca1ab94SDag-Erling Smørgrav /* port */ 2294ca1ab94SDag-Erling Smørgrav if (*p == ':') { 2304ca1ab94SDag-Erling Smørgrav for (q = ++p; *q && (*q != '/'); q++) 2314ca1ab94SDag-Erling Smørgrav if (isdigit(*q)) 2324ca1ab94SDag-Erling Smørgrav u->port = u->port * 10 + (*q - '0'); 233d8acd8dcSDag-Erling Smørgrav else { 234d8acd8dcSDag-Erling Smørgrav /* invalid port */ 235d8acd8dcSDag-Erling Smørgrav _url_seterr(URL_BAD_PORT); 236d8acd8dcSDag-Erling Smørgrav goto ouch; 237d8acd8dcSDag-Erling Smørgrav } 2384ca1ab94SDag-Erling Smørgrav while (*p && (*p != '/')) 2394ca1ab94SDag-Erling Smørgrav p++; 2404ca1ab94SDag-Erling Smørgrav } 2414ca1ab94SDag-Erling Smørgrav 2424ca1ab94SDag-Erling Smørgrav nohost: 2434ca1ab94SDag-Erling Smørgrav /* document */ 244842a95ccSDag-Erling Smørgrav if (*p) { 245d8acd8dcSDag-Erling Smørgrav struct url *t; 246842a95ccSDag-Erling Smørgrav t = realloc(u, sizeof(*u)+strlen(p)-1); 247d8acd8dcSDag-Erling Smørgrav if (t == NULL) { 248d8acd8dcSDag-Erling Smørgrav errno = ENOMEM; 249d8acd8dcSDag-Erling Smørgrav _fetch_syserr(); 2504ca1ab94SDag-Erling Smørgrav goto ouch; 251d8acd8dcSDag-Erling Smørgrav } 252842a95ccSDag-Erling Smørgrav u = t; 253842a95ccSDag-Erling Smørgrav strcpy(u->doc, p); 254842a95ccSDag-Erling Smørgrav } else { 255842a95ccSDag-Erling Smørgrav u->doc[0] = '/'; 256842a95ccSDag-Erling Smørgrav u->doc[1] = 0; 257842a95ccSDag-Erling Smørgrav } 2584ca1ab94SDag-Erling Smørgrav 2594ca1ab94SDag-Erling Smørgrav DEBUG(fprintf(stderr, 2604ca1ab94SDag-Erling Smørgrav "scheme: [\033[1m%s\033[m]\n" 2614ca1ab94SDag-Erling Smørgrav "user: [\033[1m%s\033[m]\n" 2624ca1ab94SDag-Erling Smørgrav "password: [\033[1m%s\033[m]\n" 2634ca1ab94SDag-Erling Smørgrav "host: [\033[1m%s\033[m]\n" 2644ca1ab94SDag-Erling Smørgrav "port: [\033[1m%d\033[m]\n" 2654ca1ab94SDag-Erling Smørgrav "document: [\033[1m%s\033[m]\n", 2664ca1ab94SDag-Erling Smørgrav u->scheme, u->user, u->pwd, 2674ca1ab94SDag-Erling Smørgrav u->host, u->port, u->doc)); 2684ca1ab94SDag-Erling Smørgrav 2694ca1ab94SDag-Erling Smørgrav return u; 2704ca1ab94SDag-Erling Smørgrav 2714ca1ab94SDag-Erling Smørgrav ouch: 2724ca1ab94SDag-Erling Smørgrav free(u); 2734ca1ab94SDag-Erling Smørgrav return NULL; 2744ca1ab94SDag-Erling Smørgrav } 275