1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1998-2004 Dag-Erling Smørgrav 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer 12 * in this position and unchanged. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 #include <sys/param.h> 33 34 #include <netinet/in.h> 35 36 #include <errno.h> 37 #include <ctype.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 42 #include "fetch.h" 43 #include "common.h" 44 45 auth_t fetchAuthMethod; 46 int fetchLastErrCode; 47 char fetchLastErrString[MAXERRSTRING]; 48 int fetchTimeout; 49 int fetchRestartCalls = 1; 50 int fetchDebug; 51 52 53 /*** Local data **************************************************************/ 54 55 /* 56 * Error messages for parser errors 57 */ 58 #define URL_MALFORMED 1 59 #define URL_BAD_SCHEME 2 60 #define URL_BAD_PORT 3 61 static struct fetcherr url_errlist[] = { 62 { URL_MALFORMED, FETCH_URL, "Malformed URL" }, 63 { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, 64 { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, 65 { -1, FETCH_UNKNOWN, "Unknown parser error" } 66 }; 67 68 69 /*** Public API **************************************************************/ 70 71 /* 72 * Select the appropriate protocol for the URL scheme, and return a 73 * read-only stream connected to the document referenced by the URL. 74 * Also fill out the struct url_stat. 75 */ 76 FILE * 77 fetchXGet(struct url *URL, struct url_stat *us, const char *flags) 78 { 79 80 if (us != NULL) { 81 us->size = -1; 82 us->atime = us->mtime = 0; 83 } 84 if (strcmp(URL->scheme, SCHEME_FILE) == 0) 85 return (fetchXGetFile(URL, us, flags)); 86 else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 87 return (fetchXGetFTP(URL, us, flags)); 88 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 89 return (fetchXGetHTTP(URL, us, flags)); 90 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 91 return (fetchXGetHTTP(URL, us, flags)); 92 url_seterr(URL_BAD_SCHEME); 93 return (NULL); 94 } 95 96 /* 97 * Select the appropriate protocol for the URL scheme, and return a 98 * read-only stream connected to the document referenced by the URL. 99 */ 100 FILE * 101 fetchGet(struct url *URL, const char *flags) 102 { 103 return (fetchXGet(URL, NULL, flags)); 104 } 105 106 /* 107 * Select the appropriate protocol for the URL scheme, and return a 108 * write-only stream connected to the document referenced by the URL. 109 */ 110 FILE * 111 fetchPut(struct url *URL, const char *flags) 112 { 113 114 if (strcmp(URL->scheme, SCHEME_FILE) == 0) 115 return (fetchPutFile(URL, flags)); 116 else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 117 return (fetchPutFTP(URL, flags)); 118 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 119 return (fetchPutHTTP(URL, flags)); 120 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 121 return (fetchPutHTTP(URL, flags)); 122 url_seterr(URL_BAD_SCHEME); 123 return (NULL); 124 } 125 126 /* 127 * Select the appropriate protocol for the URL scheme, and return the 128 * size of the document referenced by the URL if it exists. 129 */ 130 int 131 fetchStat(struct url *URL, struct url_stat *us, const char *flags) 132 { 133 134 if (us != NULL) { 135 us->size = -1; 136 us->atime = us->mtime = 0; 137 } 138 if (strcmp(URL->scheme, SCHEME_FILE) == 0) 139 return (fetchStatFile(URL, us, flags)); 140 else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 141 return (fetchStatFTP(URL, us, flags)); 142 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 143 return (fetchStatHTTP(URL, us, flags)); 144 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 145 return (fetchStatHTTP(URL, us, flags)); 146 url_seterr(URL_BAD_SCHEME); 147 return (-1); 148 } 149 150 /* 151 * Select the appropriate protocol for the URL scheme, and return a 152 * list of files in the directory pointed to by the URL. 153 */ 154 struct url_ent * 155 fetchList(struct url *URL, const char *flags) 156 { 157 158 if (strcmp(URL->scheme, SCHEME_FILE) == 0) 159 return (fetchListFile(URL, flags)); 160 else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 161 return (fetchListFTP(URL, flags)); 162 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 163 return (fetchListHTTP(URL, flags)); 164 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 165 return (fetchListHTTP(URL, flags)); 166 url_seterr(URL_BAD_SCHEME); 167 return (NULL); 168 } 169 170 /* 171 * Attempt to parse the given URL; if successful, call fetchXGet(). 172 */ 173 FILE * 174 fetchXGetURL(const char *URL, struct url_stat *us, const char *flags) 175 { 176 struct url *u; 177 FILE *f; 178 179 if ((u = fetchParseURL(URL)) == NULL) 180 return (NULL); 181 182 f = fetchXGet(u, us, flags); 183 184 fetchFreeURL(u); 185 return (f); 186 } 187 188 /* 189 * Attempt to parse the given URL; if successful, call fetchGet(). 190 */ 191 FILE * 192 fetchGetURL(const char *URL, const char *flags) 193 { 194 return (fetchXGetURL(URL, NULL, flags)); 195 } 196 197 /* 198 * Attempt to parse the given URL; if successful, call fetchPut(). 199 */ 200 FILE * 201 fetchPutURL(const char *URL, const char *flags) 202 { 203 struct url *u; 204 FILE *f; 205 206 if ((u = fetchParseURL(URL)) == NULL) 207 return (NULL); 208 209 f = fetchPut(u, flags); 210 211 fetchFreeURL(u); 212 return (f); 213 } 214 215 /* 216 * Attempt to parse the given URL; if successful, call fetchStat(). 217 */ 218 int 219 fetchStatURL(const char *URL, struct url_stat *us, const char *flags) 220 { 221 struct url *u; 222 int s; 223 224 if ((u = fetchParseURL(URL)) == NULL) 225 return (-1); 226 227 s = fetchStat(u, us, flags); 228 229 fetchFreeURL(u); 230 return (s); 231 } 232 233 /* 234 * Attempt to parse the given URL; if successful, call fetchList(). 235 */ 236 struct url_ent * 237 fetchListURL(const char *URL, const char *flags) 238 { 239 struct url *u; 240 struct url_ent *ue; 241 242 if ((u = fetchParseURL(URL)) == NULL) 243 return (NULL); 244 245 ue = fetchList(u, flags); 246 247 fetchFreeURL(u); 248 return (ue); 249 } 250 251 /* 252 * Make a URL 253 */ 254 struct url * 255 fetchMakeURL(const char *scheme, const char *host, int port, const char *doc, 256 const char *user, const char *pwd) 257 { 258 struct url *u; 259 260 if (!scheme || (!host && !doc)) { 261 url_seterr(URL_MALFORMED); 262 return (NULL); 263 } 264 265 if (port < 0 || port > 65535) { 266 url_seterr(URL_BAD_PORT); 267 return (NULL); 268 } 269 270 /* allocate struct url */ 271 if ((u = calloc(1, sizeof(*u))) == NULL) { 272 fetch_syserr(); 273 return (NULL); 274 } 275 u->netrcfd = -1; 276 277 if ((u->doc = strdup(doc ? doc : "/")) == NULL) { 278 fetch_syserr(); 279 free(u); 280 return (NULL); 281 } 282 283 #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x) 284 seturl(scheme); 285 seturl(host); 286 seturl(user); 287 seturl(pwd); 288 #undef seturl 289 u->port = port; 290 291 return (u); 292 } 293 294 /* 295 * Return value of the given hex digit. 296 */ 297 static int 298 fetch_hexval(char ch) 299 { 300 301 if (ch >= '0' && ch <= '9') 302 return (ch - '0'); 303 else if (ch >= 'a' && ch <= 'f') 304 return (ch - 'a' + 10); 305 else if (ch >= 'A' && ch <= 'F') 306 return (ch - 'A' + 10); 307 return (-1); 308 } 309 310 /* 311 * Decode percent-encoded URL component from src into dst, stopping at end 312 * of string, or at @ or : separators. Returns a pointer to the unhandled 313 * part of the input string (null terminator, @, or :). No terminator is 314 * written to dst (it is the caller's responsibility). 315 */ 316 static const char * 317 fetch_pctdecode(char *dst, const char *src, size_t dlen) 318 { 319 int d1, d2; 320 char c; 321 const char *s; 322 323 for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) { 324 if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 && 325 (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) { 326 c = d1 << 4 | d2; 327 s += 2; 328 } else if (s[0] == '%') { 329 /* Invalid escape sequence. */ 330 return (NULL); 331 } else { 332 c = *s; 333 } 334 if (dlen-- > 0) 335 *dst++ = c; 336 else 337 return (NULL); 338 } 339 return (s); 340 } 341 342 /* 343 * Split an URL into components. URL syntax is: 344 * [method:/][/[user[:pwd]@]host[:port]/][document] 345 * This almost, but not quite, RFC1738 URL syntax. 346 */ 347 struct url * 348 fetchParseURL(const char *URL) 349 { 350 char *doc; 351 const char *p, *q; 352 struct url *u; 353 int i, n; 354 355 /* allocate struct url */ 356 if ((u = calloc(1, sizeof(*u))) == NULL) { 357 fetch_syserr(); 358 return (NULL); 359 } 360 u->netrcfd = -1; 361 362 /* scheme name */ 363 if ((p = strstr(URL, ":/"))) { 364 if (p - URL > URL_SCHEMELEN) 365 goto ouch; 366 for (i = 0; URL + i < p; i++) 367 u->scheme[i] = tolower((unsigned char)URL[i]); 368 URL = ++p; 369 /* 370 * Only one slash: no host, leave slash as part of document 371 * Two slashes: host follows, strip slashes 372 */ 373 if (URL[1] == '/') 374 URL = (p += 2); 375 } else { 376 p = URL; 377 } 378 if (!*URL || *URL == '/' || *URL == '.' || 379 (u->scheme[0] == '\0' && 380 strchr(URL, '/') == NULL && strchr(URL, ':') == NULL)) 381 goto nohost; 382 383 p = strpbrk(URL, "/@"); 384 if (p && *p == '@') { 385 /* username */ 386 q = fetch_pctdecode(u->user, URL, URL_USERLEN); 387 if (q == NULL) 388 goto ouch; 389 390 /* password */ 391 if (*q == ':') { 392 q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN); 393 if (q == NULL) 394 goto ouch; 395 } 396 p++; 397 } else { 398 p = URL; 399 } 400 401 /* hostname */ 402 if (*p == '[') { 403 q = p + 1 + strspn(p + 1, ":0123456789ABCDEFabcdef"); 404 if (*q++ != ']') 405 goto ouch; 406 } else { 407 /* valid characters in a DNS name */ 408 q = p + strspn(p, "-." "0123456789" 409 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "_" 410 "abcdefghijklmnopqrstuvwxyz"); 411 } 412 if ((*q != '\0' && *q != '/' && *q != ':') || q - p > MAXHOSTNAMELEN) 413 goto ouch; 414 for (i = 0; p + i < q; i++) 415 u->host[i] = tolower((unsigned char)p[i]); 416 u->host[i] = '\0'; 417 p = q; 418 419 /* port */ 420 if (*p == ':') { 421 for (n = 0, q = ++p; *q && (*q != '/'); q++) { 422 if (*q >= '0' && *q <= '9' && n < INT_MAX / 10) { 423 n = n * 10 + (*q - '0'); 424 } else { 425 /* invalid port */ 426 url_seterr(URL_BAD_PORT); 427 goto ouch; 428 } 429 } 430 if (n < 1 || n > IPPORT_MAX) 431 goto ouch; 432 u->port = n; 433 p = q; 434 } 435 436 nohost: 437 /* document */ 438 if (!*p) 439 p = "/"; 440 441 if (strcmp(u->scheme, SCHEME_HTTP) == 0 || 442 strcmp(u->scheme, SCHEME_HTTPS) == 0) { 443 const char hexnums[] = "0123456789abcdef"; 444 445 /* percent-escape whitespace. */ 446 if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) { 447 fetch_syserr(); 448 goto ouch; 449 } 450 u->doc = doc; 451 while (*p != '\0') { 452 if (!isspace((unsigned char)*p)) { 453 *doc++ = *p++; 454 } else { 455 *doc++ = '%'; 456 *doc++ = hexnums[((unsigned int)*p) >> 4]; 457 *doc++ = hexnums[((unsigned int)*p) & 0xf]; 458 p++; 459 } 460 } 461 *doc = '\0'; 462 } else if ((u->doc = strdup(p)) == NULL) { 463 fetch_syserr(); 464 goto ouch; 465 } 466 467 DEBUGF("scheme: \"%s\"\n" 468 "user: \"%s\"\n" 469 "password: \"%s\"\n" 470 "host: \"%s\"\n" 471 "port: \"%d\"\n" 472 "document: \"%s\"\n", 473 u->scheme, u->user, u->pwd, 474 u->host, u->port, u->doc); 475 476 return (u); 477 478 ouch: 479 free(u); 480 return (NULL); 481 } 482 483 /* 484 * Free a URL 485 */ 486 void 487 fetchFreeURL(struct url *u) 488 { 489 free(u->doc); 490 free(u); 491 } 492