xref: /freebsd/usr.bin/fetch/fetch.c (revision 77a0943ded95b9e6438f7db70c4a28e4d93946d4)
1 /*-
2  * Copyright (c) 2000 Dag-Erling Co�dan Sm�rgrav
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  *	$FreeBSD$
29  */
30 
31 #include <sys/param.h>
32 #include <sys/stat.h>
33 #include <sys/socket.h>
34 
35 #include <ctype.h>
36 #include <err.h>
37 #include <errno.h>
38 #include <signal.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <sysexits.h>
43 #include <unistd.h>
44 
45 #include <fetch.h>
46 
47 #define MINBUFSIZE	4096
48 
49 /* Option flags */
50 int	 A_flag;	/*    -A: do not follow 302 redirects */
51 int	 a_flag;	/*    -a: auto retry */
52 size_t	 B_size;	/*    -B: buffer size */
53 int	 b_flag;	/*!   -b: workaround TCP bug */
54 char    *c_dirname;	/*    -c: remote directory */
55 int	 d_flag;	/*    -d: direct connection */
56 int	 F_flag;	/*    -F: restart without checking mtime  */
57 char	*f_filename;	/*    -f: file to fetch */
58 int	 H_flag;	/*    -H: use high port */
59 char	*h_hostname;	/*    -h: host to fetch from */
60 int	 l_flag;	/*    -l: link rather than copy file: URLs */
61 int	 m_flag;	/* -[Mm]: mirror mode */
62 int	 n_flag;	/*    -n: do not preserve modification time */
63 int	 o_flag;	/*    -o: specify output file */
64 int	 o_directory;	/*        output file is a directory */
65 char	*o_filename;	/*        name of output file */
66 int	 o_stdout;	/*        output file is stdout */
67 int	 once_flag;	/*    -1: stop at first successful file */
68 int	 p_flag;	/* -[Pp]: use passive FTP */
69 int	 R_flag;	/*    -R: don't delete partially transferred files */
70 int	 r_flag;	/*    -r: restart previously interrupted transfer */
71 u_int	 T_secs = 0;	/*    -T: transfer timeout in seconds */
72 int	 s_flag;        /*    -s: show size, don't fetch */
73 off_t	 S_size;        /*    -S: require size to match */
74 int	 t_flag;	/*!   -t: workaround TCP bug */
75 int	 v_level = 1;	/*    -v: verbosity level */
76 int	 v_tty;		/*        stdout is a tty */
77 u_int	 w_secs;	/*    -w: retry delay */
78 int	 family = PF_UNSPEC;	/* -[46]: address family to use */
79 
80 int	 sigalrm;	/* SIGALRM received */
81 int	 sigint;	/* SIGINT received */
82 
83 u_int	 ftp_timeout;	/* default timeout for FTP transfers */
84 u_int	 http_timeout;	/* default timeout for HTTP transfers */
85 u_char	*buf;		/* transfer buffer */
86 
87 
88 void
89 sig_handler(int sig)
90 {
91     switch (sig) {
92     case SIGALRM:
93 	sigalrm = 1;
94 	break;
95     case SIGINT:
96 	sigint = 1;
97 	break;
98     }
99 }
100 
101 struct xferstat {
102     char		 name[40];
103     struct timeval	 start;
104     struct timeval	 end;
105     struct timeval	 last;
106     off_t		 size;
107     off_t		 offset;
108     off_t		 rcvd;
109 };
110 
111 void
112 stat_display(struct xferstat *xs, int force)
113 {
114     struct timeval now;
115 
116     if (!v_tty || !v_level)
117 	return;
118 
119     gettimeofday(&now, NULL);
120     if (!force && now.tv_sec <= xs->last.tv_sec)
121 	return;
122     xs->last = now;
123 
124     fprintf(stderr, "\rReceiving %s", xs->name);
125     if (xs->size == -1)
126 	fprintf(stderr, ": %lld bytes", xs->rcvd);
127     else
128 	fprintf(stderr, " (%lld bytes): %d%%", xs->size,
129 		(int)((100.0 * xs->rcvd) / xs->size));
130 }
131 
132 void
133 stat_start(struct xferstat *xs, char *name, off_t size, off_t offset)
134 {
135     snprintf(xs->name, sizeof xs->name, "%s", name);
136     gettimeofday(&xs->start, NULL);
137     xs->last.tv_sec = xs->last.tv_usec = 0;
138     xs->end = xs->last;
139     xs->size = size;
140     xs->offset = offset;
141     xs->rcvd = offset;
142     stat_display(xs, 1);
143 }
144 
145 void
146 stat_update(struct xferstat *xs, off_t rcvd, int force)
147 {
148     xs->rcvd = rcvd;
149     stat_display(xs, 0);
150 }
151 
152 void
153 stat_end(struct xferstat *xs)
154 {
155     double delta;
156     double bps;
157 
158     if (!v_level)
159 	return;
160 
161     gettimeofday(&xs->end, NULL);
162 
163     stat_display(xs, 1);
164     fputc('\n', stderr);
165     delta = (xs->end.tv_sec + (xs->end.tv_usec / 1.e6))
166 	- (xs->start.tv_sec + (xs->start.tv_usec / 1.e6));
167     fprintf(stderr, "%lld bytes transferred in %.1f seconds ",
168 	    xs->rcvd - xs->offset, delta);
169     bps = (xs->rcvd - xs->offset) / delta;
170     if (bps > 1024*1024)
171 	fprintf(stderr, "(%.2f MBps)\n", bps / (1024*1024));
172     else if (bps > 1024)
173 	fprintf(stderr, "(%.2f kBps)\n", bps / 1024);
174     else
175 	fprintf(stderr, "(%.2f Bps)\n", bps);
176 }
177 
178 int
179 fetch(char *URL, char *path)
180 {
181     struct url *url;
182     struct url_stat us;
183     struct stat sb;
184     struct xferstat xs;
185     FILE *f, *of;
186     size_t size;
187     off_t count;
188     char flags[8];
189     int n, r;
190     u_int timeout;
191 
192     f = of = NULL;
193 
194     /* parse URL */
195     if ((url = fetchParseURL(URL)) == NULL) {
196 	warnx("%s: parse error", URL);
197 	goto failure;
198     }
199 
200     timeout = 0;
201     *flags = 0;
202     count = 0;
203 
204     /* common flags */
205     if (v_level > 1)
206 	strcat(flags, "v");
207     switch (family) {
208     case PF_INET:
209 	strcat(flags, "4");
210 	break;
211     case PF_INET6:
212 	strcat(flags, "6");
213 	break;
214     }
215 
216     /* FTP specific flags */
217     if (strcmp(url->scheme, "ftp") == 0) {
218 	if (p_flag)
219 	    strcat(flags, "p");
220 	if (d_flag)
221 	    strcat(flags, "d");
222 	if (H_flag)
223 	    strcat(flags, "h");
224 	timeout = T_secs ? T_secs : ftp_timeout;
225     }
226 
227     /* HTTP specific flags */
228     if (strcmp(url->scheme, "http") == 0) {
229 	if (d_flag)
230 	    strcat(flags, "d");
231 	if (A_flag)
232 	    strcat(flags, "A");
233 	timeout = T_secs ? T_secs : http_timeout;
234     }
235 
236     /* set the protocol timeout. */
237     fetchTimeout = timeout;
238 
239     /* just print size */
240     if (s_flag) {
241 	if (fetchStat(url, &us, flags) == -1)
242 	    goto failure;
243 	if (us.size == -1)
244 	    printf("Unknown\n");
245 	else
246 	    printf("%lld\n", us.size);
247 	goto success;
248     }
249 
250     /*
251      * If the -r flag was specified, we have to compare the local and
252      * remote files, so we should really do a fetchStat() first, but I
253      * know of at least one HTTP server that only sends the content
254      * size in response to GET requests, and leaves it out of replies
255      * to HEAD requests. Also, in the (frequent) case that the local
256      * and remote files match but the local file is truncated, we have
257      * sufficient information *before* the compare to issue a correct
258      * request. Therefore, we always issue a GET request as if we were
259      * sure the local file was a truncated copy of the remote file; we
260      * can drop the connection later if we change our minds.
261      */
262     if ((r_flag  || m_flag) && !o_stdout && stat(path, &sb) != -1) {
263 	if (r_flag)
264 	    url->offset = sb.st_size;
265     } else {
266 	sb.st_size = -1;
267     }
268 
269     /* start the transfer */
270     if ((f = fetchXGet(url, &us, flags)) == NULL) {
271 	warnx("%s: %s", path, fetchLastErrString);
272 	goto failure;
273     }
274     if (sigint)
275 	goto signal;
276 
277     /* check that size is as expected */
278     if (S_size) {
279 	if (us.size == -1) {
280 	    warnx("%s: size unknown", path);
281 	    goto failure;
282 	} else if (us.size != S_size) {
283 	    warnx("%s: size mismatch: expected %lld, actual %lld",
284 		  path, S_size, us.size);
285 	    goto failure;
286 	}
287     }
288 
289     /* symlink instead of copy */
290     if (l_flag && strcmp(url->scheme, "file") == 0 && !o_stdout) {
291 	if (symlink(url->doc, path) == -1) {
292 	    warn("%s: symlink()", path);
293 	    goto failure;
294 	}
295 	goto success;
296     }
297 
298     if (v_level > 1) {
299 	if (sb.st_size != -1)
300 	    fprintf(stderr, "local size / mtime: %lld / %ld\n",
301 		    sb.st_size, sb.st_mtime);
302 	fprintf(stderr, "remote size / mtime: %lld / %ld\n",
303 		us.size, us.mtime);
304     }
305 
306     /* open output file */
307     if (o_stdout) {
308 	/* output to stdout */
309 	of = stdout;
310     } else if (sb.st_size != -1) {
311 	/* resume mode, local file exists */
312 	if (!F_flag && us.mtime && sb.st_mtime != us.mtime) {
313 	    /* no match! have to refetch */
314 	    fclose(f);
315 	    url->offset = 0;
316 	    if ((f = fetchXGet(url, &us, flags)) == NULL) {
317 		warnx("%s: %s", path, fetchLastErrString);
318 		goto failure;
319 	    }
320 	    if (sigint)
321 		goto signal;
322 	} else {
323 	    if (us.size == sb.st_size)
324 		/* nothing to do */
325 		goto success;
326 	    if (sb.st_size > us.size) {
327 		/* local file too long! */
328 		warnx("%s: local file (%lld bytes) is longer "
329 		      "than remote file (%lld bytes)",
330 		      path, sb.st_size, us.size);
331 		goto failure;
332 	    }
333 	    /* we got through, open local file and seek to offset */
334 	    /*
335 	     * XXX there's a race condition here - the file we open is not
336 	     * necessarily the same as the one we stat()'ed earlier...
337 	     */
338 	    if ((of = fopen(path, "a")) == NULL) {
339 		warn("%s: fopen()", path);
340 		goto failure;
341 	    }
342 	    if (fseek(of, url->offset, SEEK_SET) == -1) {
343 		warn("%s: fseek()", path);
344 		goto failure;
345 	    }
346 	}
347     }
348     if (m_flag && sb.st_size != -1) {
349 	/* mirror mode, local file exists */
350 	if (sb.st_size == us.size && sb.st_mtime == us.mtime)
351 	    goto success;
352     }
353     if (!of) {
354 	/*
355 	 * We don't yet have an output file; either this is a vanilla
356 	 * run with no special flags, or the local and remote files
357 	 * didn't match.
358 	 */
359 	if ((of = fopen(path, "w")) == NULL) {
360 	    warn("%s: open()", path);
361 	    goto failure;
362 	}
363     }
364     count = url->offset;
365 
366     /* start the counter */
367     stat_start(&xs, path, us.size, count);
368 
369     sigint = sigalrm = 0;
370 
371     /* suck in the data */
372     for (n = 0; !sigint && !sigalrm; ++n) {
373 	if (us.size != -1 && us.size - count < B_size)
374 	    size = us.size - count;
375 	else
376 	    size = B_size;
377 	if (timeout)
378 	    alarm(timeout);
379 	if ((size = fread(buf, 1, size, f)) <= 0)
380 	    break;
381 	stat_update(&xs, count += size, 0);
382 	if (fwrite(buf, size, 1, of) != 1)
383 	    break;
384     }
385 
386     if (timeout)
387 	alarm(0);
388 
389     stat_end(&xs);
390 
391     /* Set mtime of local file */
392     if (!n_flag && us.mtime && !o_stdout
393 	&& (stat(path, &sb) != -1) && sb.st_mode & S_IFREG) {
394 	struct timeval tv[2];
395 
396 	fflush(of);
397 	tv[0].tv_sec = (long)(us.atime ? us.atime : us.mtime);
398 	tv[1].tv_sec = (long)us.mtime;
399 	tv[0].tv_usec = tv[1].tv_usec = 0;
400 	if (utimes(path, tv))
401 	    warn("%s: utimes()", path);
402     }
403 
404     /* timed out or interrupted? */
405  signal:
406     if (sigalrm)
407 	warnx("transfer timed out");
408     if (sigint) {
409 	warnx("transfer interrupted");
410 	goto failure;
411     }
412 
413     if (!sigalrm) {
414 	/* check the status of our files */
415 	if (ferror(f))
416 	    warn("%s", URL);
417 	if (ferror(of))
418 	    warn("%s", path);
419 	if (ferror(f) || ferror(of))
420 	    goto failure;
421     }
422 
423     /* did the transfer complete normally? */
424     if (us.size != -1 && count < us.size) {
425 	warnx("%s appears to be truncated: %lld/%lld bytes",
426 	      path, count, us.size);
427 	goto failure_keep;
428     }
429 
430  success:
431     r = 0;
432     goto done;
433  failure:
434     if (of && of != stdout && !R_flag && !r_flag)
435 	if (stat(path, &sb) != -1 && (sb.st_mode & S_IFREG))
436 	    unlink(path);
437  failure_keep:
438     r = -1;
439     goto done;
440  done:
441     if (f)
442 	fclose(f);
443     if (of && of != stdout)
444 	fclose(of);
445     if (url)
446 	fetchFreeURL(url);
447     return r;
448 }
449 
450 void
451 usage(void)
452 {
453     /* XXX badly out of synch */
454     fprintf(stderr,
455 	    "Usage: fetch [-1AFHMPRabdlmnpqrstv] [-o outputfile] [-S bytes]\n"
456 	    "             [-B bytes] [-T seconds] [-w seconds]\n"
457 	    "             [-f file -h host [-c dir] | URL ...]\n"
458 	);
459 }
460 
461 
462 #define PARSENUM(NAME, TYPE)		\
463 int					\
464 NAME(char *s, TYPE *v)			\
465 {					\
466     *v = 0;				\
467     for (*v = 0; *s; s++)		\
468 	if (isdigit(*s))		\
469 	    *v = *v * 10 + *s - '0';	\
470 	else				\
471 	    return -1;			\
472     return 0;				\
473 }
474 
475 PARSENUM(parseint, u_int)
476 PARSENUM(parsesize, size_t)
477 PARSENUM(parseoff, off_t)
478 
479 int
480 main(int argc, char *argv[])
481 {
482     struct stat sb;
483     struct sigaction sa;
484     char *p, *q, *s;
485     int c, e, r;
486 
487     while ((c = getopt(argc, argv,
488 		       "146AaB:bc:dFf:h:lHMmnPpo:qRrS:sT:tvw:")) != EOF)
489 	switch (c) {
490 	case '1':
491 	    once_flag = 1;
492 	    break;
493 	case '4':
494 	    family = PF_INET;
495 	    break;
496 	case '6':
497 	    family = PF_INET6;
498 	    break;
499 	case 'A':
500 	    A_flag = 1;
501 	    break;
502 	case 'a':
503 	    a_flag = 1;
504 	    break;
505 	case 'B':
506 	    if (parsesize(optarg, &B_size) == -1)
507 		errx(1, "invalid buffer size");
508 	    break;
509 	case 'b':
510 	    warnx("warning: the -b option is deprecated");
511 	    b_flag = 1;
512 	    break;
513 	case 'c':
514 	    c_dirname = optarg;
515 	    break;
516 	case 'd':
517 	    d_flag = 1;
518 	    break;
519 	case 'F':
520 	    F_flag = 1;
521 	    break;
522 	case 'f':
523 	    f_filename = optarg;
524 	    break;
525 	case 'H':
526 	    H_flag = 1;
527 	    break;
528 	case 'h':
529 	    h_hostname = optarg;
530 	    break;
531 	case 'l':
532 	    l_flag = 1;
533 	    break;
534 	case 'o':
535 	    o_flag = 1;
536 	    o_filename = optarg;
537 	    break;
538 	case 'M':
539 	case 'm':
540 	    if (r_flag)
541 		errx(1, "the -m and -r flags are mutually exclusive");
542 	    m_flag = 1;
543 	    break;
544 	case 'n':
545 	    n_flag = 1;
546 	    break;
547 	case 'P':
548 	case 'p':
549 	    p_flag = 1;
550 	    break;
551 	case 'q':
552 	    v_level = 0;
553 	    break;
554 	case 'R':
555 	    R_flag = 1;
556 	    break;
557 	case 'r':
558 	    if (m_flag)
559 		errx(1, "the -m and -r flags are mutually exclusive");
560 	    r_flag = 1;
561 	    break;
562 	case 'S':
563 	    if (parseoff(optarg, &S_size) == -1)
564 		errx(1, "invalid size");
565 	    break;
566 	case 's':
567 	    s_flag = 1;
568 	    break;
569 	case 'T':
570 	    if (parseint(optarg, &T_secs) == -1)
571 		errx(1, "invalid timeout");
572 	    break;
573 	case 't':
574 	    t_flag = 1;
575 	    warnx("warning: the -t option is deprecated");
576 	    break;
577 	case 'v':
578 	    v_level++;
579 	    break;
580 	case 'w':
581 	    a_flag = 1;
582 	    if (parseint(optarg, &w_secs) == -1)
583 		errx(1, "invalid delay");
584 	    break;
585 	default:
586 	    usage();
587 	    exit(EX_USAGE);
588 	}
589 
590     argc -= optind;
591     argv += optind;
592 
593     if (h_hostname || f_filename || c_dirname) {
594 	if (!h_hostname || !f_filename || argc) {
595 	    usage();
596 	    exit(EX_USAGE);
597 	}
598 	/* XXX this is a hack. */
599 	if (strcspn(h_hostname, "@:/") != strlen(h_hostname))
600 	    errx(1, "invalid hostname");
601 	if (asprintf(argv, "ftp://%s/%s/%s", h_hostname,
602 		     c_dirname ? c_dirname : "", f_filename) == -1)
603 	    errx(1, "%s", strerror(ENOMEM));
604 	argc++;
605     }
606 
607     if (!argc) {
608 	usage();
609 	exit(EX_USAGE);
610     }
611 
612     /* allocate buffer */
613     if (B_size < MINBUFSIZE)
614 	B_size = MINBUFSIZE;
615     if ((buf = malloc(B_size)) == NULL)
616 	errx(1, "%s", strerror(ENOMEM));
617 
618     /* timeouts */
619     if ((s = getenv("FTP_TIMEOUT")) != NULL) {
620 	if (parseint(s, &ftp_timeout) == -1) {
621 	    warnx("FTP_TIMEOUT is not a positive integer");
622 	    ftp_timeout = 0;
623 	}
624     }
625     if ((s = getenv("HTTP_TIMEOUT")) != NULL) {
626 	if (parseint(s, &http_timeout) == -1) {
627 	    warnx("HTTP_TIMEOUT is not a positive integer");
628 	    http_timeout = 0;
629 	}
630     }
631 
632     /* signal handling */
633     sa.sa_flags = 0;
634     sa.sa_handler = sig_handler;
635     sigemptyset(&sa.sa_mask);
636     sigaction(SIGALRM, &sa, NULL);
637     sa.sa_flags = SA_RESETHAND;
638     sigaction(SIGINT, &sa, NULL);
639     fetchRestartCalls = 0;
640 
641     /* output file */
642     if (o_flag) {
643 	if (strcmp(o_filename, "-") == 0) {
644 	    o_stdout = 1;
645 	} else if (stat(o_filename, &sb) == -1) {
646 	    if (errno == ENOENT) {
647 		if (argc > 1)
648 		    errx(EX_USAGE, "%s is not a directory", o_filename);
649 	    } else {
650 		err(EX_IOERR, "%s", o_filename);
651 	    }
652 	} else {
653 	    if (sb.st_mode & S_IFDIR)
654 		o_directory = 1;
655 	}
656     }
657 
658     /* check if output is to a tty (for progress report) */
659     v_tty = isatty(STDERR_FILENO);
660     r = 0;
661 
662     while (argc) {
663 	if ((p = strrchr(*argv, '/')) == NULL)
664 	    p = *argv;
665 	else
666 	    p++;
667 
668 	if (!*p)
669 	    p = "fetch.out";
670 
671 	fetchLastErrCode = 0;
672 
673 	if (o_flag) {
674 	    if (o_stdout) {
675 		e = fetch(*argv, "-");
676 	    } else if (o_directory) {
677 		asprintf(&q, "%s/%s", o_filename, p);
678 		e = fetch(*argv, q);
679 		free(q);
680 	    } else {
681 		e = fetch(*argv, o_filename);
682 	    }
683 	} else {
684 	    e = fetch(*argv, p);
685 	}
686 
687 	if (sigint)
688 	    kill(getpid(), SIGINT);
689 
690 	if (e == 0 && once_flag)
691 	    exit(0);
692 
693 	if (e) {
694 	    r = 1;
695 	    if ((fetchLastErrCode
696 		 && fetchLastErrCode != FETCH_UNAVAIL
697 		 && fetchLastErrCode != FETCH_MOVED
698 		 && fetchLastErrCode != FETCH_URL
699 		 && fetchLastErrCode != FETCH_RESOLV
700 		 && fetchLastErrCode != FETCH_UNKNOWN)) {
701 		if (w_secs) {
702 		    if (v_level)
703 			fprintf(stderr, "Waiting %d seconds before retrying\n",
704 				w_secs);
705 		    sleep(w_secs);
706 		}
707 		if (a_flag)
708 		    continue;
709 	    }
710 	}
711 
712 	argc--, argv++;
713     }
714 
715     exit(r);
716 }
717