1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1998-2004 Dag-Erling Smørgrav 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer 12 * in this position and unchanged. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/param.h> 32 33 #include <netinet/in.h> 34 35 #include <errno.h> 36 #include <ctype.h> 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <string.h> 40 41 #include "fetch.h" 42 #include "common.h" 43 44 auth_t fetchAuthMethod; 45 int fetchLastErrCode; 46 char fetchLastErrString[MAXERRSTRING]; 47 int fetchTimeout; 48 int fetchRestartCalls = 1; 49 int fetchDebug; 50 51 52 /*** Local data **************************************************************/ 53 54 /* 55 * Error messages for parser errors 56 */ 57 #define URL_MALFORMED 1 58 #define URL_BAD_SCHEME 2 59 #define URL_BAD_PORT 3 60 static struct fetcherr url_errlist[] = { 61 { URL_MALFORMED, FETCH_URL, "Malformed URL" }, 62 { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, 63 { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, 64 { -1, FETCH_UNKNOWN, "Unknown parser error" } 65 }; 66 67 68 /*** Public API **************************************************************/ 69 70 /* 71 * Select the appropriate protocol for the URL scheme, and return a 72 * read-only stream connected to the document referenced by the URL. 73 * Also fill out the struct url_stat. 74 */ 75 FILE * 76 fetchXGet(struct url *URL, struct url_stat *us, const char *flags) 77 { 78 79 if (us != NULL) { 80 us->size = -1; 81 us->atime = us->mtime = 0; 82 } 83 if (strcmp(URL->scheme, SCHEME_FILE) == 0) 84 return (fetchXGetFile(URL, us, flags)); 85 else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 86 return (fetchXGetFTP(URL, us, flags)); 87 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 88 return (fetchXGetHTTP(URL, us, flags)); 89 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 90 return (fetchXGetHTTP(URL, us, flags)); 91 url_seterr(URL_BAD_SCHEME); 92 return (NULL); 93 } 94 95 /* 96 * Select the appropriate protocol for the URL scheme, and return a 97 * read-only stream connected to the document referenced by the URL. 98 */ 99 FILE * 100 fetchGet(struct url *URL, const char *flags) 101 { 102 return (fetchXGet(URL, NULL, flags)); 103 } 104 105 /* 106 * Select the appropriate protocol for the URL scheme, and return a 107 * write-only stream connected to the document referenced by the URL. 108 */ 109 FILE * 110 fetchPut(struct url *URL, const char *flags) 111 { 112 113 if (strcmp(URL->scheme, SCHEME_FILE) == 0) 114 return (fetchPutFile(URL, flags)); 115 else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 116 return (fetchPutFTP(URL, flags)); 117 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 118 return (fetchPutHTTP(URL, flags)); 119 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 120 return (fetchPutHTTP(URL, flags)); 121 url_seterr(URL_BAD_SCHEME); 122 return (NULL); 123 } 124 125 /* 126 * Select the appropriate protocol for the URL scheme, and return the 127 * size of the document referenced by the URL if it exists. 128 */ 129 int 130 fetchStat(struct url *URL, struct url_stat *us, const char *flags) 131 { 132 133 if (us != NULL) { 134 us->size = -1; 135 us->atime = us->mtime = 0; 136 } 137 if (strcmp(URL->scheme, SCHEME_FILE) == 0) 138 return (fetchStatFile(URL, us, flags)); 139 else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 140 return (fetchStatFTP(URL, us, flags)); 141 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 142 return (fetchStatHTTP(URL, us, flags)); 143 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 144 return (fetchStatHTTP(URL, us, flags)); 145 url_seterr(URL_BAD_SCHEME); 146 return (-1); 147 } 148 149 /* 150 * Select the appropriate protocol for the URL scheme, and return a 151 * list of files in the directory pointed to by the URL. 152 */ 153 struct url_ent * 154 fetchList(struct url *URL, const char *flags) 155 { 156 157 if (strcmp(URL->scheme, SCHEME_FILE) == 0) 158 return (fetchListFile(URL, flags)); 159 else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 160 return (fetchListFTP(URL, flags)); 161 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 162 return (fetchListHTTP(URL, flags)); 163 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 164 return (fetchListHTTP(URL, flags)); 165 url_seterr(URL_BAD_SCHEME); 166 return (NULL); 167 } 168 169 /* 170 * Attempt to parse the given URL; if successful, call fetchXGet(). 171 */ 172 FILE * 173 fetchXGetURL(const char *URL, struct url_stat *us, const char *flags) 174 { 175 struct url *u; 176 FILE *f; 177 178 if ((u = fetchParseURL(URL)) == NULL) 179 return (NULL); 180 181 f = fetchXGet(u, us, flags); 182 183 fetchFreeURL(u); 184 return (f); 185 } 186 187 /* 188 * Attempt to parse the given URL; if successful, call fetchGet(). 189 */ 190 FILE * 191 fetchGetURL(const char *URL, const char *flags) 192 { 193 return (fetchXGetURL(URL, NULL, flags)); 194 } 195 196 /* 197 * Attempt to parse the given URL; if successful, call fetchPut(). 198 */ 199 FILE * 200 fetchPutURL(const char *URL, const char *flags) 201 { 202 struct url *u; 203 FILE *f; 204 205 if ((u = fetchParseURL(URL)) == NULL) 206 return (NULL); 207 208 f = fetchPut(u, flags); 209 210 fetchFreeURL(u); 211 return (f); 212 } 213 214 /* 215 * Attempt to parse the given URL; if successful, call fetchStat(). 216 */ 217 int 218 fetchStatURL(const char *URL, struct url_stat *us, const char *flags) 219 { 220 struct url *u; 221 int s; 222 223 if ((u = fetchParseURL(URL)) == NULL) 224 return (-1); 225 226 s = fetchStat(u, us, flags); 227 228 fetchFreeURL(u); 229 return (s); 230 } 231 232 /* 233 * Attempt to parse the given URL; if successful, call fetchList(). 234 */ 235 struct url_ent * 236 fetchListURL(const char *URL, const char *flags) 237 { 238 struct url *u; 239 struct url_ent *ue; 240 241 if ((u = fetchParseURL(URL)) == NULL) 242 return (NULL); 243 244 ue = fetchList(u, flags); 245 246 fetchFreeURL(u); 247 return (ue); 248 } 249 250 /* 251 * Make a URL 252 */ 253 struct url * 254 fetchMakeURL(const char *scheme, const char *host, int port, const char *doc, 255 const char *user, const char *pwd) 256 { 257 struct url *u; 258 259 if (!scheme || (!host && !doc)) { 260 url_seterr(URL_MALFORMED); 261 return (NULL); 262 } 263 264 if (port < 0 || port > 65535) { 265 url_seterr(URL_BAD_PORT); 266 return (NULL); 267 } 268 269 /* allocate struct url */ 270 if ((u = calloc(1, sizeof(*u))) == NULL) { 271 fetch_syserr(); 272 return (NULL); 273 } 274 u->netrcfd = -1; 275 276 if ((u->doc = strdup(doc ? doc : "/")) == NULL) { 277 fetch_syserr(); 278 free(u); 279 return (NULL); 280 } 281 282 #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x) 283 seturl(scheme); 284 seturl(host); 285 seturl(user); 286 seturl(pwd); 287 #undef seturl 288 u->port = port; 289 290 return (u); 291 } 292 293 /* 294 * Return value of the given hex digit. 295 */ 296 static int 297 fetch_hexval(char ch) 298 { 299 300 if (ch >= '0' && ch <= '9') 301 return (ch - '0'); 302 else if (ch >= 'a' && ch <= 'f') 303 return (ch - 'a' + 10); 304 else if (ch >= 'A' && ch <= 'F') 305 return (ch - 'A' + 10); 306 return (-1); 307 } 308 309 /* 310 * Decode percent-encoded URL component from src into dst, stopping at end 311 * of string, or at @ or : separators. Returns a pointer to the unhandled 312 * part of the input string (null terminator, @, or :). No terminator is 313 * written to dst (it is the caller's responsibility). 314 */ 315 static const char * 316 fetch_pctdecode(char *dst, const char *src, size_t dlen) 317 { 318 int d1, d2; 319 char c; 320 const char *s; 321 322 for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) { 323 if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 && 324 (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) { 325 c = d1 << 4 | d2; 326 s += 2; 327 } else if (s[0] == '%') { 328 /* Invalid escape sequence. */ 329 return (NULL); 330 } else { 331 c = *s; 332 } 333 if (dlen-- > 0) 334 *dst++ = c; 335 else 336 return (NULL); 337 } 338 return (s); 339 } 340 341 /* 342 * Split an URL into components. URL syntax is: 343 * [method:/][/[user[:pwd]@]host[:port]/][document] 344 * This almost, but not quite, RFC1738 URL syntax. 345 */ 346 struct url * 347 fetchParseURL(const char *URL) 348 { 349 char *doc; 350 const char *p, *q; 351 struct url *u; 352 int i, n; 353 354 /* allocate struct url */ 355 if ((u = calloc(1, sizeof(*u))) == NULL) { 356 fetch_syserr(); 357 return (NULL); 358 } 359 u->netrcfd = -1; 360 361 /* scheme name */ 362 if ((p = strstr(URL, ":/"))) { 363 if (p - URL > URL_SCHEMELEN) 364 goto ouch; 365 for (i = 0; URL + i < p; i++) 366 u->scheme[i] = tolower((unsigned char)URL[i]); 367 URL = ++p; 368 /* 369 * Only one slash: no host, leave slash as part of document 370 * Two slashes: host follows, strip slashes 371 */ 372 if (URL[1] == '/') 373 URL = (p += 2); 374 } else { 375 p = URL; 376 } 377 if (!*URL || *URL == '/' || *URL == '.' || 378 (u->scheme[0] == '\0' && 379 strchr(URL, '/') == NULL && strchr(URL, ':') == NULL)) 380 goto nohost; 381 382 p = strpbrk(URL, "/@"); 383 if (p && *p == '@') { 384 /* username */ 385 q = fetch_pctdecode(u->user, URL, URL_USERLEN); 386 if (q == NULL) 387 goto ouch; 388 389 /* password */ 390 if (*q == ':') { 391 q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN); 392 if (q == NULL) 393 goto ouch; 394 } 395 p++; 396 } else { 397 p = URL; 398 } 399 400 /* hostname */ 401 if (*p == '[') { 402 q = p + 1 + strspn(p + 1, ":0123456789ABCDEFabcdef."); 403 if (*q++ != ']') 404 goto ouch; 405 } else { 406 /* valid characters in a DNS name */ 407 q = p + strspn(p, "-." "0123456789" 408 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "_" 409 "abcdefghijklmnopqrstuvwxyz"); 410 } 411 if ((*q != '\0' && *q != '/' && *q != ':') || q - p > MAXHOSTNAMELEN) 412 goto ouch; 413 for (i = 0; p + i < q; i++) 414 u->host[i] = tolower((unsigned char)p[i]); 415 u->host[i] = '\0'; 416 p = q; 417 418 /* port */ 419 if (*p == ':') { 420 for (n = 0, q = ++p; *q && (*q != '/'); q++) { 421 if (*q >= '0' && *q <= '9' && n < INT_MAX / 10) { 422 n = n * 10 + (*q - '0'); 423 } else { 424 /* invalid port */ 425 url_seterr(URL_BAD_PORT); 426 goto ouch; 427 } 428 } 429 if (p != q && (n < 1 || n > IPPORT_MAX)) 430 goto ouch; 431 u->port = n; 432 p = q; 433 } 434 435 nohost: 436 /* document */ 437 if (!*p) 438 p = "/"; 439 440 if (strcmp(u->scheme, SCHEME_HTTP) == 0 || 441 strcmp(u->scheme, SCHEME_HTTPS) == 0) { 442 const char hexnums[] = "0123456789abcdef"; 443 444 /* percent-escape whitespace. */ 445 if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) { 446 fetch_syserr(); 447 goto ouch; 448 } 449 u->doc = doc; 450 /* fragments are reserved for client-side processing, see 451 * https://www.rfc-editor.org/rfc/rfc9110.html#section-7.1 452 */ 453 while (*p != '\0' && *p != '#') { 454 if (!isspace((unsigned char)*p)) { 455 *doc++ = *p++; 456 } else { 457 *doc++ = '%'; 458 *doc++ = hexnums[((unsigned int)*p) >> 4]; 459 *doc++ = hexnums[((unsigned int)*p) & 0xf]; 460 p++; 461 } 462 } 463 *doc = '\0'; 464 } else if ((u->doc = strdup(p)) == NULL) { 465 fetch_syserr(); 466 goto ouch; 467 } 468 469 DEBUGF("scheme: \"%s\"\n" 470 "user: \"%s\"\n" 471 "password: \"%s\"\n" 472 "host: \"%s\"\n" 473 "port: \"%d\"\n" 474 "document: \"%s\"\n", 475 u->scheme, u->user, u->pwd, 476 u->host, u->port, u->doc); 477 478 return (u); 479 480 ouch: 481 free(u); 482 return (NULL); 483 } 484 485 /* 486 * Free a URL 487 */ 488 void 489 fetchFreeURL(struct url *u) 490 { 491 free(u->doc); 492 free(u); 493 } 494