141edb306SCy Schubert
241edb306SCy Schubert /*
341edb306SCy Schubert * Copyright (C) 2012 by Darren Reed.
441edb306SCy Schubert *
541edb306SCy Schubert * See the IPFILTER.LICENCE file for details on licencing.
641edb306SCy Schubert *
741edb306SCy Schubert * $Id: load_http.c,v 1.5.2.5 2012/07/22 08:04:24 darren_r Exp $
841edb306SCy Schubert */
941edb306SCy Schubert
1041edb306SCy Schubert #include "ipf.h"
1141edb306SCy Schubert #include <ctype.h>
1241edb306SCy Schubert
1341edb306SCy Schubert /*
1441edb306SCy Schubert * Because the URL can be included twice into the buffer, once as the
1541edb306SCy Schubert * full path for the "GET" and once as the "Host:", the buffer it is
1641edb306SCy Schubert * put in needs to be larger than 512*2 to make room for the supporting
1741edb306SCy Schubert * text. Why not just use snprintf and truncate? The warning about the
1841edb306SCy Schubert * URL being too long tells you something is wrong and does not fetch
1941edb306SCy Schubert * any data - just truncating the URL (with snprintf, etc) and sending
2041edb306SCy Schubert * that to the server is allowing an unknown and unintentioned action
2141edb306SCy Schubert * to happen.
2241edb306SCy Schubert */
2341edb306SCy Schubert #define MAX_URL_LEN 512
2441edb306SCy Schubert #define LOAD_BUFSIZE (MAX_URL_LEN * 2 + 128)
2541edb306SCy Schubert
2641edb306SCy Schubert /*
27*0939cf32SElyes Haouas * Format expected is one address per line, at the start of each line.
2841edb306SCy Schubert */
2941edb306SCy Schubert alist_t *
load_http(char * url)3041edb306SCy Schubert load_http(char *url)
3141edb306SCy Schubert {
3241edb306SCy Schubert int fd, len, left, port, endhdr, removed, linenum = 0;
3341edb306SCy Schubert char *s, *t, *u, buffer[LOAD_BUFSIZE], *myurl;
3441edb306SCy Schubert alist_t *a, *rtop, *rbot;
3541edb306SCy Schubert size_t avail;
3641edb306SCy Schubert int error;
3741edb306SCy Schubert
3841edb306SCy Schubert /*
3941edb306SCy Schubert * More than this would just be absurd.
4041edb306SCy Schubert */
4141edb306SCy Schubert if (strlen(url) > MAX_URL_LEN) {
4241edb306SCy Schubert fprintf(stderr, "load_http has a URL > %d bytes?!\n",
4341edb306SCy Schubert MAX_URL_LEN);
442582ae57SCy Schubert return (NULL);
4541edb306SCy Schubert }
4641edb306SCy Schubert
4741edb306SCy Schubert fd = -1;
4841edb306SCy Schubert rtop = NULL;
4941edb306SCy Schubert rbot = NULL;
5041edb306SCy Schubert
5141edb306SCy Schubert avail = sizeof(buffer);
5241edb306SCy Schubert error = snprintf(buffer, avail, "GET %s HTTP/1.0\r\n", url);
5341edb306SCy Schubert
5441edb306SCy Schubert /*
5541edb306SCy Schubert * error is always less then avail due to the constraint on
5641edb306SCy Schubert * the url length above.
5741edb306SCy Schubert */
5841edb306SCy Schubert avail -= error;
5941edb306SCy Schubert
6041edb306SCy Schubert myurl = strdup(url);
6141edb306SCy Schubert if (myurl == NULL)
6241edb306SCy Schubert goto done;
6341edb306SCy Schubert
6441edb306SCy Schubert s = myurl + 7; /* http:// */
6541edb306SCy Schubert t = strchr(s, '/');
6641edb306SCy Schubert if (t == NULL) {
6741edb306SCy Schubert fprintf(stderr, "load_http has a malformed URL '%s'\n", url);
6841edb306SCy Schubert free(myurl);
692582ae57SCy Schubert return (NULL);
7041edb306SCy Schubert }
7141edb306SCy Schubert *t++ = '\0';
7241edb306SCy Schubert
7341edb306SCy Schubert /*
7441edb306SCy Schubert * 10 is the length of 'Host: \r\n\r\n' below.
7541edb306SCy Schubert */
7641edb306SCy Schubert if (strlen(s) + strlen(buffer) + 10 > sizeof(buffer)) {
7741edb306SCy Schubert fprintf(stderr, "load_http has a malformed URL '%s'\n", url);
7841edb306SCy Schubert free(myurl);
792582ae57SCy Schubert return (NULL);
8041edb306SCy Schubert }
8141edb306SCy Schubert
8241edb306SCy Schubert u = strchr(s, '@');
8341edb306SCy Schubert if (u != NULL)
8441edb306SCy Schubert s = u + 1; /* AUTH */
8541edb306SCy Schubert
8641edb306SCy Schubert error = snprintf(buffer + strlen(buffer), avail, "Host: %s\r\n\r\n", s);
8741edb306SCy Schubert if (error >= avail) {
8841edb306SCy Schubert fprintf(stderr, "URL is too large: %s\n", url);
8941edb306SCy Schubert goto done;
9041edb306SCy Schubert }
9141edb306SCy Schubert
9241edb306SCy Schubert u = strchr(s, ':');
9341edb306SCy Schubert if (u != NULL) {
9441edb306SCy Schubert *u++ = '\0';
9541edb306SCy Schubert port = atoi(u);
9641edb306SCy Schubert if (port < 0 || port > 65535)
9741edb306SCy Schubert goto done;
9841edb306SCy Schubert } else {
9941edb306SCy Schubert port = 80;
10041edb306SCy Schubert }
10141edb306SCy Schubert
10241edb306SCy Schubert
10341edb306SCy Schubert fd = connecttcp(s, port);
10441edb306SCy Schubert if (fd == -1)
10541edb306SCy Schubert goto done;
10641edb306SCy Schubert
10741edb306SCy Schubert
10841edb306SCy Schubert len = strlen(buffer);
10941edb306SCy Schubert if (write(fd, buffer, len) != len)
11041edb306SCy Schubert goto done;
11141edb306SCy Schubert
11241edb306SCy Schubert s = buffer;
11341edb306SCy Schubert endhdr = 0;
11441edb306SCy Schubert left = sizeof(buffer) - 1;
11541edb306SCy Schubert
11641edb306SCy Schubert while ((len = read(fd, s, left)) > 0) {
11741edb306SCy Schubert s[len] = '\0';
11841edb306SCy Schubert left -= len;
11941edb306SCy Schubert s += len;
12041edb306SCy Schubert
12141edb306SCy Schubert if (endhdr >= 0) {
12241edb306SCy Schubert if (endhdr == 0) {
12341edb306SCy Schubert t = strchr(buffer, ' ');
12441edb306SCy Schubert if (t == NULL)
12541edb306SCy Schubert continue;
12641edb306SCy Schubert t++;
12741edb306SCy Schubert if (*t != '2')
12841edb306SCy Schubert break;
12941edb306SCy Schubert }
13041edb306SCy Schubert
13141edb306SCy Schubert u = buffer;
13241edb306SCy Schubert while ((t = strchr(u, '\r')) != NULL) {
13341edb306SCy Schubert if (t == u) {
13441edb306SCy Schubert if (*(t + 1) == '\n') {
13541edb306SCy Schubert u = t + 2;
13641edb306SCy Schubert endhdr = -1;
13741edb306SCy Schubert break;
13841edb306SCy Schubert } else
13941edb306SCy Schubert t++;
14041edb306SCy Schubert } else if (*(t + 1) == '\n') {
14141edb306SCy Schubert endhdr++;
14241edb306SCy Schubert u = t + 2;
14341edb306SCy Schubert } else
14441edb306SCy Schubert u = t + 1;
14541edb306SCy Schubert }
14641edb306SCy Schubert if (endhdr >= 0)
14741edb306SCy Schubert continue;
14841edb306SCy Schubert removed = (u - buffer) + 1;
14941edb306SCy Schubert memmove(buffer, u, (sizeof(buffer) - left) - removed);
15041edb306SCy Schubert s -= removed;
15141edb306SCy Schubert left += removed;
15241edb306SCy Schubert }
15341edb306SCy Schubert
15441edb306SCy Schubert do {
15541edb306SCy Schubert t = strchr(buffer, '\n');
15641edb306SCy Schubert if (t == NULL)
15741edb306SCy Schubert break;
15841edb306SCy Schubert
15941edb306SCy Schubert linenum++;
16041edb306SCy Schubert *t = '\0';
16141edb306SCy Schubert
16241edb306SCy Schubert /*
16341edb306SCy Schubert * Remove comment and continue to the next line if
16441edb306SCy Schubert * the comment is at the start of the line.
16541edb306SCy Schubert */
16641edb306SCy Schubert u = strchr(buffer, '#');
16741edb306SCy Schubert if (u != NULL) {
16841edb306SCy Schubert *u = '\0';
16941edb306SCy Schubert if (u == buffer)
17041edb306SCy Schubert continue;
17141edb306SCy Schubert }
17241edb306SCy Schubert
17341edb306SCy Schubert /*
17441edb306SCy Schubert * Trim off tailing white spaces, will include \r
17541edb306SCy Schubert */
17641edb306SCy Schubert for (u = t - 1; (u >= buffer) && ISSPACE(*u); u--)
17741edb306SCy Schubert *u = '\0';
17841edb306SCy Schubert
17941edb306SCy Schubert a = alist_new(AF_UNSPEC, buffer);
18041edb306SCy Schubert if (a != NULL) {
18141edb306SCy Schubert if (rbot != NULL)
18241edb306SCy Schubert rbot->al_next = a;
18341edb306SCy Schubert else
18441edb306SCy Schubert rtop = a;
18541edb306SCy Schubert rbot = a;
18641edb306SCy Schubert } else {
18741edb306SCy Schubert fprintf(stderr,
18841edb306SCy Schubert "%s:%d unrecognised content:%s\n",
18941edb306SCy Schubert url, linenum, buffer);
19041edb306SCy Schubert }
19141edb306SCy Schubert
19241edb306SCy Schubert t++;
19341edb306SCy Schubert removed = t - buffer;
19441edb306SCy Schubert memmove(buffer, t, sizeof(buffer) - left - removed);
19541edb306SCy Schubert s -= removed;
19641edb306SCy Schubert left += removed;
19741edb306SCy Schubert
19841edb306SCy Schubert } while (1);
19941edb306SCy Schubert }
20041edb306SCy Schubert
20141edb306SCy Schubert done:
20241edb306SCy Schubert if (myurl != NULL)
20341edb306SCy Schubert free(myurl);
20441edb306SCy Schubert if (fd != -1)
20541edb306SCy Schubert close(fd);
2062582ae57SCy Schubert return (rtop);
20741edb306SCy Schubert }
208