1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1998-2004 Dag-Erling Smørgrav 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer 12 * in this position and unchanged. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 36 #include <netinet/in.h> 37 38 #include <errno.h> 39 #include <ctype.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 44 #include "fetch.h" 45 #include "common.h" 46 47 auth_t fetchAuthMethod; 48 int fetchLastErrCode; 49 char fetchLastErrString[MAXERRSTRING]; 50 int fetchTimeout; 51 int fetchRestartCalls = 1; 52 int fetchDebug; 53 54 55 /*** Local data **************************************************************/ 56 57 /* 58 * Error messages for parser errors 59 */ 60 #define URL_MALFORMED 1 61 #define URL_BAD_SCHEME 2 62 #define URL_BAD_PORT 3 63 static struct fetcherr url_errlist[] = { 64 { URL_MALFORMED, FETCH_URL, "Malformed URL" }, 65 { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, 66 { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, 67 { -1, FETCH_UNKNOWN, "Unknown parser error" } 68 }; 69 70 71 /*** Public API **************************************************************/ 72 73 /* 74 * Select the appropriate protocol for the URL scheme, and return a 75 * read-only stream connected to the document referenced by the URL. 76 * Also fill out the struct url_stat. 77 */ 78 FILE * 79 fetchXGet(struct url *URL, struct url_stat *us, const char *flags) 80 { 81 82 if (us != NULL) { 83 us->size = -1; 84 us->atime = us->mtime = 0; 85 } 86 if (strcmp(URL->scheme, SCHEME_FILE) == 0) 87 return (fetchXGetFile(URL, us, flags)); 88 else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 89 return (fetchXGetFTP(URL, us, flags)); 90 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 91 return (fetchXGetHTTP(URL, us, flags)); 92 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 93 return (fetchXGetHTTP(URL, us, flags)); 94 url_seterr(URL_BAD_SCHEME); 95 return (NULL); 96 } 97 98 /* 99 * Select the appropriate protocol for the URL scheme, and return a 100 * read-only stream connected to the document referenced by the URL. 101 */ 102 FILE * 103 fetchGet(struct url *URL, const char *flags) 104 { 105 return (fetchXGet(URL, NULL, flags)); 106 } 107 108 /* 109 * Select the appropriate protocol for the URL scheme, and return a 110 * write-only stream connected to the document referenced by the URL. 111 */ 112 FILE * 113 fetchPut(struct url *URL, const char *flags) 114 { 115 116 if (strcmp(URL->scheme, SCHEME_FILE) == 0) 117 return (fetchPutFile(URL, flags)); 118 else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 119 return (fetchPutFTP(URL, flags)); 120 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 121 return (fetchPutHTTP(URL, flags)); 122 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 123 return (fetchPutHTTP(URL, flags)); 124 url_seterr(URL_BAD_SCHEME); 125 return (NULL); 126 } 127 128 /* 129 * Select the appropriate protocol for the URL scheme, and return the 130 * size of the document referenced by the URL if it exists. 131 */ 132 int 133 fetchStat(struct url *URL, struct url_stat *us, const char *flags) 134 { 135 136 if (us != NULL) { 137 us->size = -1; 138 us->atime = us->mtime = 0; 139 } 140 if (strcmp(URL->scheme, SCHEME_FILE) == 0) 141 return (fetchStatFile(URL, us, flags)); 142 else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 143 return (fetchStatFTP(URL, us, flags)); 144 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 145 return (fetchStatHTTP(URL, us, flags)); 146 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 147 return (fetchStatHTTP(URL, us, flags)); 148 url_seterr(URL_BAD_SCHEME); 149 return (-1); 150 } 151 152 /* 153 * Select the appropriate protocol for the URL scheme, and return a 154 * list of files in the directory pointed to by the URL. 155 */ 156 struct url_ent * 157 fetchList(struct url *URL, const char *flags) 158 { 159 160 if (strcmp(URL->scheme, SCHEME_FILE) == 0) 161 return (fetchListFile(URL, flags)); 162 else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 163 return (fetchListFTP(URL, flags)); 164 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 165 return (fetchListHTTP(URL, flags)); 166 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 167 return (fetchListHTTP(URL, flags)); 168 url_seterr(URL_BAD_SCHEME); 169 return (NULL); 170 } 171 172 /* 173 * Attempt to parse the given URL; if successful, call fetchXGet(). 174 */ 175 FILE * 176 fetchXGetURL(const char *URL, struct url_stat *us, const char *flags) 177 { 178 struct url *u; 179 FILE *f; 180 181 if ((u = fetchParseURL(URL)) == NULL) 182 return (NULL); 183 184 f = fetchXGet(u, us, flags); 185 186 fetchFreeURL(u); 187 return (f); 188 } 189 190 /* 191 * Attempt to parse the given URL; if successful, call fetchGet(). 192 */ 193 FILE * 194 fetchGetURL(const char *URL, const char *flags) 195 { 196 return (fetchXGetURL(URL, NULL, flags)); 197 } 198 199 /* 200 * Attempt to parse the given URL; if successful, call fetchPut(). 201 */ 202 FILE * 203 fetchPutURL(const char *URL, const char *flags) 204 { 205 struct url *u; 206 FILE *f; 207 208 if ((u = fetchParseURL(URL)) == NULL) 209 return (NULL); 210 211 f = fetchPut(u, flags); 212 213 fetchFreeURL(u); 214 return (f); 215 } 216 217 /* 218 * Attempt to parse the given URL; if successful, call fetchStat(). 219 */ 220 int 221 fetchStatURL(const char *URL, struct url_stat *us, const char *flags) 222 { 223 struct url *u; 224 int s; 225 226 if ((u = fetchParseURL(URL)) == NULL) 227 return (-1); 228 229 s = fetchStat(u, us, flags); 230 231 fetchFreeURL(u); 232 return (s); 233 } 234 235 /* 236 * Attempt to parse the given URL; if successful, call fetchList(). 237 */ 238 struct url_ent * 239 fetchListURL(const char *URL, const char *flags) 240 { 241 struct url *u; 242 struct url_ent *ue; 243 244 if ((u = fetchParseURL(URL)) == NULL) 245 return (NULL); 246 247 ue = fetchList(u, flags); 248 249 fetchFreeURL(u); 250 return (ue); 251 } 252 253 /* 254 * Make a URL 255 */ 256 struct url * 257 fetchMakeURL(const char *scheme, const char *host, int port, const char *doc, 258 const char *user, const char *pwd) 259 { 260 struct url *u; 261 262 if (!scheme || (!host && !doc)) { 263 url_seterr(URL_MALFORMED); 264 return (NULL); 265 } 266 267 if (port < 0 || port > 65535) { 268 url_seterr(URL_BAD_PORT); 269 return (NULL); 270 } 271 272 /* allocate struct url */ 273 if ((u = calloc(1, sizeof(*u))) == NULL) { 274 fetch_syserr(); 275 return (NULL); 276 } 277 u->netrcfd = -1; 278 279 if ((u->doc = strdup(doc ? doc : "/")) == NULL) { 280 fetch_syserr(); 281 free(u); 282 return (NULL); 283 } 284 285 #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x) 286 seturl(scheme); 287 seturl(host); 288 seturl(user); 289 seturl(pwd); 290 #undef seturl 291 u->port = port; 292 293 return (u); 294 } 295 296 /* 297 * Return value of the given hex digit. 298 */ 299 static int 300 fetch_hexval(char ch) 301 { 302 303 if (ch >= '0' && ch <= '9') 304 return (ch - '0'); 305 else if (ch >= 'a' && ch <= 'f') 306 return (ch - 'a' + 10); 307 else if (ch >= 'A' && ch <= 'F') 308 return (ch - 'A' + 10); 309 return (-1); 310 } 311 312 /* 313 * Decode percent-encoded URL component from src into dst, stopping at end 314 * of string, or at @ or : separators. Returns a pointer to the unhandled 315 * part of the input string (null terminator, @, or :). No terminator is 316 * written to dst (it is the caller's responsibility). 317 */ 318 static const char * 319 fetch_pctdecode(char *dst, const char *src, size_t dlen) 320 { 321 int d1, d2; 322 char c; 323 const char *s; 324 325 for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) { 326 if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 && 327 (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) { 328 c = d1 << 4 | d2; 329 s += 2; 330 } else { 331 c = *s; 332 } 333 if (dlen-- > 0) 334 *dst++ = c; 335 } 336 return (s); 337 } 338 339 /* 340 * Split an URL into components. URL syntax is: 341 * [method:/][/[user[:pwd]@]host[:port]/][document] 342 * This almost, but not quite, RFC1738 URL syntax. 343 */ 344 struct url * 345 fetchParseURL(const char *URL) 346 { 347 char *doc; 348 const char *p, *q; 349 struct url *u; 350 int i, n; 351 352 /* allocate struct url */ 353 if ((u = calloc(1, sizeof(*u))) == NULL) { 354 fetch_syserr(); 355 return (NULL); 356 } 357 u->netrcfd = -1; 358 359 /* scheme name */ 360 if ((p = strstr(URL, ":/"))) { 361 if (p - URL > URL_SCHEMELEN) 362 goto ouch; 363 for (i = 0; URL + i < p; i++) 364 u->scheme[i] = tolower((unsigned char)URL[i]); 365 URL = ++p; 366 /* 367 * Only one slash: no host, leave slash as part of document 368 * Two slashes: host follows, strip slashes 369 */ 370 if (URL[1] == '/') 371 URL = (p += 2); 372 } else { 373 p = URL; 374 } 375 if (!*URL || *URL == '/' || *URL == '.' || 376 (u->scheme[0] == '\0' && 377 strchr(URL, '/') == NULL && strchr(URL, ':') == NULL)) 378 goto nohost; 379 380 p = strpbrk(URL, "/@"); 381 if (p && *p == '@') { 382 /* username */ 383 q = fetch_pctdecode(u->user, URL, URL_USERLEN); 384 385 /* password */ 386 if (*q == ':') 387 q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN); 388 389 p++; 390 } else { 391 p = URL; 392 } 393 394 /* hostname */ 395 if (*p == '[') { 396 q = p + 1 + strspn(p + 1, ":0123456789ABCDEFabcdef"); 397 if (*q++ != ']') 398 goto ouch; 399 } else { 400 /* valid characters in a DNS name */ 401 q = p + strspn(p, "-." "0123456789" 402 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "_" 403 "abcdefghijklmnopqrstuvwxyz"); 404 } 405 if ((*q != '\0' && *q != '/' && *q != ':') || q - p > MAXHOSTNAMELEN) 406 goto ouch; 407 for (i = 0; p + i < q; i++) 408 u->host[i] = tolower((unsigned char)p[i]); 409 u->host[i] = '\0'; 410 p = q; 411 412 /* port */ 413 if (*p == ':') { 414 for (n = 0, q = ++p; *q && (*q != '/'); q++) { 415 if (*q >= '0' && *q <= '9' && n < INT_MAX / 10) { 416 n = n * 10 + (*q - '0'); 417 } else { 418 /* invalid port */ 419 url_seterr(URL_BAD_PORT); 420 goto ouch; 421 } 422 } 423 if (n < 1 || n > IPPORT_MAX) 424 goto ouch; 425 u->port = n; 426 p = q; 427 } 428 429 nohost: 430 /* document */ 431 if (!*p) 432 p = "/"; 433 434 if (strcmp(u->scheme, SCHEME_HTTP) == 0 || 435 strcmp(u->scheme, SCHEME_HTTPS) == 0) { 436 const char hexnums[] = "0123456789abcdef"; 437 438 /* percent-escape whitespace. */ 439 if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) { 440 fetch_syserr(); 441 goto ouch; 442 } 443 u->doc = doc; 444 while (*p != '\0') { 445 if (!isspace((unsigned char)*p)) { 446 *doc++ = *p++; 447 } else { 448 *doc++ = '%'; 449 *doc++ = hexnums[((unsigned int)*p) >> 4]; 450 *doc++ = hexnums[((unsigned int)*p) & 0xf]; 451 p++; 452 } 453 } 454 *doc = '\0'; 455 } else if ((u->doc = strdup(p)) == NULL) { 456 fetch_syserr(); 457 goto ouch; 458 } 459 460 DEBUGF("scheme: \"%s\"\n" 461 "user: \"%s\"\n" 462 "password: \"%s\"\n" 463 "host: \"%s\"\n" 464 "port: \"%d\"\n" 465 "document: \"%s\"\n", 466 u->scheme, u->user, u->pwd, 467 u->host, u->port, u->doc); 468 469 return (u); 470 471 ouch: 472 free(u); 473 return (NULL); 474 } 475 476 /* 477 * Free a URL 478 */ 479 void 480 fetchFreeURL(struct url *u) 481 { 482 free(u->doc); 483 free(u); 484 } 485