1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1998-2004 Dag-Erling Smørgrav 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer 12 * in this position and unchanged. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/errno.h> 36 37 #include <ctype.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 42 #include "fetch.h" 43 #include "common.h" 44 45 auth_t fetchAuthMethod; 46 int fetchLastErrCode; 47 char fetchLastErrString[MAXERRSTRING]; 48 int fetchTimeout; 49 int fetchRestartCalls = 1; 50 int fetchDebug; 51 52 53 /*** Local data **************************************************************/ 54 55 /* 56 * Error messages for parser errors 57 */ 58 #define URL_MALFORMED 1 59 #define URL_BAD_SCHEME 2 60 #define URL_BAD_PORT 3 61 static struct fetcherr url_errlist[] = { 62 { URL_MALFORMED, FETCH_URL, "Malformed URL" }, 63 { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, 64 { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, 65 { -1, FETCH_UNKNOWN, "Unknown parser error" } 66 }; 67 68 69 /*** Public API **************************************************************/ 70 71 /* 72 * Select the appropriate protocol for the URL scheme, and return a 73 * read-only stream connected to the document referenced by the URL. 74 * Also fill out the struct url_stat. 75 */ 76 FILE * 77 fetchXGet(struct url *URL, struct url_stat *us, const char *flags) 78 { 79 80 if (us != NULL) { 81 us->size = -1; 82 us->atime = us->mtime = 0; 83 } 84 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 85 return (fetchXGetFile(URL, us, flags)); 86 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 87 return (fetchXGetFTP(URL, us, flags)); 88 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 89 return (fetchXGetHTTP(URL, us, flags)); 90 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 91 return (fetchXGetHTTP(URL, us, flags)); 92 url_seterr(URL_BAD_SCHEME); 93 return (NULL); 94 } 95 96 /* 97 * Select the appropriate protocol for the URL scheme, and return a 98 * read-only stream connected to the document referenced by the URL. 99 */ 100 FILE * 101 fetchGet(struct url *URL, const char *flags) 102 { 103 return (fetchXGet(URL, NULL, flags)); 104 } 105 106 /* 107 * Select the appropriate protocol for the URL scheme, and return a 108 * write-only stream connected to the document referenced by the URL. 109 */ 110 FILE * 111 fetchPut(struct url *URL, const char *flags) 112 { 113 114 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 115 return (fetchPutFile(URL, flags)); 116 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 117 return (fetchPutFTP(URL, flags)); 118 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 119 return (fetchPutHTTP(URL, flags)); 120 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 121 return (fetchPutHTTP(URL, flags)); 122 url_seterr(URL_BAD_SCHEME); 123 return (NULL); 124 } 125 126 /* 127 * Select the appropriate protocol for the URL scheme, and return the 128 * size of the document referenced by the URL if it exists. 129 */ 130 int 131 fetchStat(struct url *URL, struct url_stat *us, const char *flags) 132 { 133 134 if (us != NULL) { 135 us->size = -1; 136 us->atime = us->mtime = 0; 137 } 138 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 139 return (fetchStatFile(URL, us, flags)); 140 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 141 return (fetchStatFTP(URL, us, flags)); 142 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 143 return (fetchStatHTTP(URL, us, flags)); 144 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 145 return (fetchStatHTTP(URL, us, flags)); 146 url_seterr(URL_BAD_SCHEME); 147 return (-1); 148 } 149 150 /* 151 * Select the appropriate protocol for the URL scheme, and return a 152 * list of files in the directory pointed to by the URL. 153 */ 154 struct url_ent * 155 fetchList(struct url *URL, const char *flags) 156 { 157 158 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 159 return (fetchListFile(URL, flags)); 160 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 161 return (fetchListFTP(URL, flags)); 162 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 163 return (fetchListHTTP(URL, flags)); 164 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 165 return (fetchListHTTP(URL, flags)); 166 url_seterr(URL_BAD_SCHEME); 167 return (NULL); 168 } 169 170 /* 171 * Attempt to parse the given URL; if successful, call fetchXGet(). 172 */ 173 FILE * 174 fetchXGetURL(const char *URL, struct url_stat *us, const char *flags) 175 { 176 struct url *u; 177 FILE *f; 178 179 if ((u = fetchParseURL(URL)) == NULL) 180 return (NULL); 181 182 f = fetchXGet(u, us, flags); 183 184 fetchFreeURL(u); 185 return (f); 186 } 187 188 /* 189 * Attempt to parse the given URL; if successful, call fetchGet(). 190 */ 191 FILE * 192 fetchGetURL(const char *URL, const char *flags) 193 { 194 return (fetchXGetURL(URL, NULL, flags)); 195 } 196 197 /* 198 * Attempt to parse the given URL; if successful, call fetchPut(). 199 */ 200 FILE * 201 fetchPutURL(const char *URL, const char *flags) 202 { 203 struct url *u; 204 FILE *f; 205 206 if ((u = fetchParseURL(URL)) == NULL) 207 return (NULL); 208 209 f = fetchPut(u, flags); 210 211 fetchFreeURL(u); 212 return (f); 213 } 214 215 /* 216 * Attempt to parse the given URL; if successful, call fetchStat(). 217 */ 218 int 219 fetchStatURL(const char *URL, struct url_stat *us, const char *flags) 220 { 221 struct url *u; 222 int s; 223 224 if ((u = fetchParseURL(URL)) == NULL) 225 return (-1); 226 227 s = fetchStat(u, us, flags); 228 229 fetchFreeURL(u); 230 return (s); 231 } 232 233 /* 234 * Attempt to parse the given URL; if successful, call fetchList(). 235 */ 236 struct url_ent * 237 fetchListURL(const char *URL, const char *flags) 238 { 239 struct url *u; 240 struct url_ent *ue; 241 242 if ((u = fetchParseURL(URL)) == NULL) 243 return (NULL); 244 245 ue = fetchList(u, flags); 246 247 fetchFreeURL(u); 248 return (ue); 249 } 250 251 /* 252 * Make a URL 253 */ 254 struct url * 255 fetchMakeURL(const char *scheme, const char *host, int port, const char *doc, 256 const char *user, const char *pwd) 257 { 258 struct url *u; 259 260 if (!scheme || (!host && !doc)) { 261 url_seterr(URL_MALFORMED); 262 return (NULL); 263 } 264 265 if (port < 0 || port > 65535) { 266 url_seterr(URL_BAD_PORT); 267 return (NULL); 268 } 269 270 /* allocate struct url */ 271 if ((u = calloc(1, sizeof(*u))) == NULL) { 272 fetch_syserr(); 273 return (NULL); 274 } 275 276 if ((u->doc = strdup(doc ? doc : "/")) == NULL) { 277 fetch_syserr(); 278 free(u); 279 return (NULL); 280 } 281 282 #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x) 283 seturl(scheme); 284 seturl(host); 285 seturl(user); 286 seturl(pwd); 287 #undef seturl 288 u->port = port; 289 u->netrcfd = -2; 290 291 return (u); 292 } 293 294 /* 295 * Return value of the given hex digit. 296 */ 297 static int 298 fetch_hexval(char ch) 299 { 300 301 if (ch >= '0' && ch <= '9') 302 return (ch - '0'); 303 else if (ch >= 'a' && ch <= 'f') 304 return (ch - 'a' + 10); 305 else if (ch >= 'A' && ch <= 'F') 306 return (ch - 'A' + 10); 307 return (-1); 308 } 309 310 /* 311 * Decode percent-encoded URL component from src into dst, stopping at end 312 * of string, or at @ or : separators. Returns a pointer to the unhandled 313 * part of the input string (null terminator, @, or :). No terminator is 314 * written to dst (it is the caller's responsibility). 315 */ 316 static const char * 317 fetch_pctdecode(char *dst, const char *src, size_t dlen) 318 { 319 int d1, d2; 320 char c; 321 const char *s; 322 323 for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) { 324 if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 && 325 (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) { 326 c = d1 << 4 | d2; 327 s += 2; 328 } else { 329 c = *s; 330 } 331 if (dlen-- > 0) 332 *dst++ = c; 333 } 334 return (s); 335 } 336 337 /* 338 * Split an URL into components. URL syntax is: 339 * [method:/][/[user[:pwd]@]host[:port]/][document] 340 * This almost, but not quite, RFC1738 URL syntax. 341 */ 342 struct url * 343 fetchParseURL(const char *URL) 344 { 345 char *doc; 346 const char *p, *q; 347 struct url *u; 348 int i; 349 350 /* allocate struct url */ 351 if ((u = calloc(1, sizeof(*u))) == NULL) { 352 fetch_syserr(); 353 return (NULL); 354 } 355 u->netrcfd = -2; 356 357 /* scheme name */ 358 if ((p = strstr(URL, ":/"))) { 359 snprintf(u->scheme, URL_SCHEMELEN+1, 360 "%.*s", (int)(p - URL), URL); 361 URL = ++p; 362 /* 363 * Only one slash: no host, leave slash as part of document 364 * Two slashes: host follows, strip slashes 365 */ 366 if (URL[1] == '/') 367 URL = (p += 2); 368 } else { 369 p = URL; 370 } 371 if (!*URL || *URL == '/' || *URL == '.' || 372 (u->scheme[0] == '\0' && 373 strchr(URL, '/') == NULL && strchr(URL, ':') == NULL)) 374 goto nohost; 375 376 p = strpbrk(URL, "/@"); 377 if (p && *p == '@') { 378 /* username */ 379 q = fetch_pctdecode(u->user, URL, URL_USERLEN); 380 381 /* password */ 382 if (*q == ':') 383 q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN); 384 385 p++; 386 } else { 387 p = URL; 388 } 389 390 /* hostname */ 391 if (*p == '[' && (q = strchr(p + 1, ']')) != NULL && 392 (*++q == '\0' || *q == '/' || *q == ':')) { 393 if ((i = q - p) > MAXHOSTNAMELEN) 394 i = MAXHOSTNAMELEN; 395 strncpy(u->host, p, i); 396 p = q; 397 } else { 398 for (i = 0; *p && (*p != '/') && (*p != ':'); p++) 399 if (i < MAXHOSTNAMELEN) 400 u->host[i++] = *p; 401 } 402 403 /* port */ 404 if (*p == ':') { 405 for (q = ++p; *q && (*q != '/'); q++) 406 if (isdigit((unsigned char)*q)) 407 u->port = u->port * 10 + (*q - '0'); 408 else { 409 /* invalid port */ 410 url_seterr(URL_BAD_PORT); 411 goto ouch; 412 } 413 p = q; 414 } 415 416 nohost: 417 /* document */ 418 if (!*p) 419 p = "/"; 420 421 if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 || 422 strcasecmp(u->scheme, SCHEME_HTTPS) == 0) { 423 const char hexnums[] = "0123456789abcdef"; 424 425 /* percent-escape whitespace. */ 426 if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) { 427 fetch_syserr(); 428 goto ouch; 429 } 430 u->doc = doc; 431 while (*p != '\0') { 432 if (!isspace((unsigned char)*p)) { 433 *doc++ = *p++; 434 } else { 435 *doc++ = '%'; 436 *doc++ = hexnums[((unsigned int)*p) >> 4]; 437 *doc++ = hexnums[((unsigned int)*p) & 0xf]; 438 p++; 439 } 440 } 441 *doc = '\0'; 442 } else if ((u->doc = strdup(p)) == NULL) { 443 fetch_syserr(); 444 goto ouch; 445 } 446 447 DEBUG(fprintf(stderr, 448 "scheme: \"%s\"\n" 449 "user: \"%s\"\n" 450 "password: \"%s\"\n" 451 "host: \"%s\"\n" 452 "port: \"%d\"\n" 453 "document: \"%s\"\n", 454 u->scheme, u->user, u->pwd, 455 u->host, u->port, u->doc)); 456 457 return (u); 458 459 ouch: 460 free(u); 461 return (NULL); 462 } 463 464 /* 465 * Free a URL 466 */ 467 void 468 fetchFreeURL(struct url *u) 469 { 470 free(u->doc); 471 free(u); 472 } 473