From 6c54370aa13b65a2c79f08d35fa9264e00dd7d4d Mon Sep 17 00:00:00 2001
From: Sergey Lyubka <valenok@gmail.com>
Date: Sat, 28 Jul 2012 11:04:03 +0100
Subject: [PATCH] Added mg_connect(), mg_fetch() API functions. More tests
 added

---
 mongoose.c       | 145 ++++++++++++++++++++++++++++++++++++++---------
 mongoose.h       |  20 +++++++
 test/test.pl     |  14 +++--
 test/unit_test.c |  75 +++++++++++++++++++-----
 4 files changed, 208 insertions(+), 46 deletions(-)

diff --git a/mongoose.c b/mongoose.c
index fadc1eb2c..1ae7d3df0 100644
--- a/mongoose.c
+++ b/mongoose.c
@@ -2622,26 +2622,43 @@ static int is_valid_http_method(const char *method) {
 // Parse HTTP request, fill in mg_request_info structure.
 // This function modifies the buffer with HTTP request by nul-terminating
 // HTTP request components, header names and header values.
-static int parse_http_request(char *buf, struct mg_request_info *ri) {
-  int status = 0;
-
-  // RFC says that all initial whitespaces should be ingored
-  while (*buf != '\0' && isspace(* (unsigned char *) buf)) {
-    buf++;
+static int parse_http_message(char *buf, int len, struct mg_request_info *ri) {
+  int request_length = get_request_len(buf, len);
+  if (request_length > 0) {
+    // Reset attributes. DO NOT TOUCH is_ssl, remote_ip, remote_port
+    ri->remote_user = ri->request_method = ri->uri = ri->http_version;
+    ri->num_headers = 0;
+    ri->status_code = -1;
+
+    buf[request_length - 1] = '\0';
+
+    // RFC says that all initial whitespaces should be ingored
+    while (*buf != '\0' && isspace(* (unsigned char *) buf)) {
+      buf++;
+    }
+    ri->request_method = skip(&buf, " ");
+    ri->uri = skip(&buf, " ");
+    ri->http_version = skip(&buf, "\r\n");
+    parse_http_headers(&buf, ri);
   }
+  return request_length;
+}
 
-  ri->request_method = skip(&buf, " ");
-  ri->uri = skip(&buf, " ");
-  ri->http_version = skip(&buf, "\r\n");
-
-  if (is_valid_http_method(ri->request_method) &&
-      strncmp(ri->http_version, "HTTP/", 5) == 0) {
+static int parse_http_request(char *buf, int len, struct mg_request_info *ri) {
+  int result = parse_http_message(buf, len, ri);
+  if (result > 0 &&
+      is_valid_http_method(ri->request_method) &&
+      !strncmp(ri->http_version, "HTTP/", 5)) {
     ri->http_version += 5;   // Skip "HTTP/"
-    parse_http_headers(&buf, ri);
-    status = 1;
+  } else {
+    result = -1;
   }
+  return result;
+}
 
-  return status;
+static int parse_http_response(char *buf, int len, struct mg_request_info *ri) {
+  int result = parse_http_message(buf, len, ri);
+  return result > 0 && !strncmp(ri->request_method, "HTTP/", 5) ? result : -1;
 }
 
 // Keep reading the input (either opened file descriptor fd, or socket sock,
@@ -2659,7 +2676,7 @@ static int read_request(FILE *fp, SOCKET sock, SSL *ssl, char *buf, int bufsiz,
         (n = pull(fp, sock, ssl, buf + *nread, bufsiz - *nread)) > 0) {
       *nread += n;
     }
-  } while (*nread < bufsiz && request_len == 0 && n > 0);
+  } while (*nread <= bufsiz && request_len == 0 && n > 0);
 
   return request_len;
 }
@@ -3802,14 +3819,7 @@ static int set_acl_option(struct mg_context *ctx) {
 }
 
 static void reset_per_request_attributes(struct mg_connection *conn) {
-  struct mg_request_info *ri = &conn->request_info;
-
-  // Reset request info attributes. DO NOT TOUCH is_ssl, remote_ip, remote_port
-  ri->remote_user = ri->request_method = ri->uri = ri->http_version =
-    conn->path_info = NULL;
-  ri->num_headers = 0;
-  ri->status_code = -1;
-
+  conn->path_info = NULL;
   conn->num_bytes_sent = conn->consumed_content = 0;
   conn->content_len = -1;
   conn->request_len = conn->data_len = 0;
@@ -3855,6 +3865,88 @@ static void close_connection(struct mg_connection *conn) {
   }
 }
 
+struct mg_connection *mg_connect(struct mg_context *ctx,
+                                 const char *host, int port, int use_ssl) {
+  struct mg_connection *newconn = NULL;
+  struct sockaddr_in sin;
+  struct hostent *he;
+  int sock;
+
+  if (ctx->ssl_ctx == NULL && use_ssl) {
+    cry(fc(ctx), "%s: SSL is not initialized", __func__);
+  } else if ((he = gethostbyname(host)) == NULL) {
+    cry(fc(ctx), "%s: gethostbyname(%s): %s", __func__, host, strerror(ERRNO));
+  } else if ((sock = socket(PF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET) {
+    cry(fc(ctx), "%s: socket: %s", __func__, strerror(ERRNO));
+  } else {
+    sin.sin_family = AF_INET;
+    sin.sin_port = htons((uint16_t) port);
+    sin.sin_addr = * (struct in_addr *) he->h_addr_list[0];
+    if (connect(sock, (struct sockaddr *) &sin, sizeof(sin)) != 0) {
+      cry(fc(ctx), "%s: connect(%s:%d): %s", __func__, host, port,
+          strerror(ERRNO));
+      closesocket(sock);
+    } else if ((newconn = (struct mg_connection *)
+                calloc(1, sizeof(*newconn))) == NULL) {
+      cry(fc(ctx), "%s: calloc: %s", __func__, strerror(ERRNO));
+      closesocket(sock);
+    } else {
+      newconn->client.sock = sock;
+      newconn->client.rsa.sin = sin;
+      newconn->client.is_ssl = use_ssl;
+      if (use_ssl) {
+        sslize(newconn, SSL_connect);
+      }
+    }
+  }
+
+  return newconn;
+}
+
+FILE *mg_fetch(struct mg_context *ctx, const char *url, const char *path,
+               struct mg_request_info *ri) {
+  struct mg_connection *newconn;
+  int n, req_length, data_length = 0, port = 80;
+  char host[1025], proto[10], buf[16384];
+  FILE *fp = NULL;
+
+  if (sscanf(url, "%9[htps]://%1024[^:]:%d/%n", proto, host, &port, &n) != 3 &&
+      sscanf(url, "%9[htps]://%1024[^/]/%n", proto, host, &n) != 2) {
+    cry(fc(ctx), "%s: invalid URL: [%s]", __func__, url);
+  } else if ((newconn = mg_connect(ctx, host, port,
+                                   !strcmp(proto, "https"))) == NULL) {
+    cry(fc(ctx), "%s: mg_connect(%s): %s", __func__, url, strerror(ERRNO));
+  } else {
+    mg_printf(newconn, "GET /%s HTTP/1.0\r\n\r\n", url + n);
+    req_length = read_request(NULL, newconn->client.sock,
+                              newconn->ssl, buf, sizeof(buf), &data_length);
+    if (req_length <= 0) {
+      cry(fc(ctx), "%s(%s): invalid HTTP reply", __func__, url);
+    } else if (parse_http_response(buf, req_length, ri) <= 0) {
+      cry(fc(ctx), "%s(%s): cannot parse HTTP headers", __func__, url);
+    } else if ((fp = fopen(path, "w+b")) == NULL) {
+      cry(fc(ctx), "%s: fopen(%s): %s", __func__, path, strerror(ERRNO));
+    } else {
+      data_length -= req_length;
+      memmove(buf, buf + req_length, data_length);
+      do {
+        if (fwrite(buf, 1, data_length, fp) != (size_t) data_length) {
+          fclose(fp);
+          fp = NULL;
+          break;
+        }
+        data_length = mg_read(newconn, buf, sizeof(buf));
+      } while (data_length > 0);
+    }
+    close_connection(newconn);
+    free(newconn);
+  }
+
+  return fp;
+}
+
+
+
 static void discard_current_request_from_buffer(struct mg_connection *conn) {
   char *buffered;
   int buffered_len, body_len;
@@ -3903,9 +3995,8 @@ static void process_new_connection(struct mg_connection *conn) {
       return;  // Remote end closed the connection
     }
 
-    // Nul-terminate the request cause parse_http_request() uses sscanf
-    conn->buf[conn->request_len - 1] = '\0';
-    if (!parse_http_request(conn->buf, ri) || !is_valid_uri(ri->uri)) {
+    if (parse_http_request(conn->buf, conn->buf_size, ri) <= 0 ||
+        !is_valid_uri(ri->uri)) {
       // Do not put garbage in the access log, just send it back to the client
       send_http_error(conn, 400, "Bad Request",
           "Cannot parse HTTP request: [%.*s]", conn->data_len, conn->buf);
diff --git a/mongoose.h b/mongoose.h
index f97f417f8..a672756cc 100644
--- a/mongoose.h
+++ b/mongoose.h
@@ -218,6 +218,26 @@ int mg_get_cookie(const struct mg_connection *,
                   const char *cookie_name, char *buf, size_t buf_len);
 
 
+// Connect to the remote web server.
+// Return:
+//   On success, valid pointer to the new connection
+//   On error, NULL
+struct mg_connection *mg_connect(struct mg_context *ctx,
+                                 const char *host, int port, int use_ssl);
+
+
+// Download given URL to a given file.
+//   url: URL to download
+//   path: file name where to save the data
+//   request_info: pointer to a structure that will hold parsed reply headers
+// Return:
+//   On success, opened file stream to the downloaded contents. The stream
+//   is positioned to the end of the file.
+//   On error, NULL
+FILE *mg_fetch(struct mg_context *ctx, const char *url, const char *path,
+               struct mg_request_info *request_info);
+
+
 // Return Mongoose version.
 const char *mg_version(void);
 
diff --git a/test/test.pl b/test/test.pl
index 16dd8b70c..4889ecea6 100644
--- a/test/test.pl
+++ b/test/test.pl
@@ -85,7 +85,8 @@ sub o {
   if ($reply =~ /$expected_reply/s) {
     print "OK\n";
   } else {
-    fail("Requested: [$request]\nExpected: [$expected_reply], got: [$reply]");
+#fail("Requested: [$request]\nExpected: [$expected_reply], got: [$reply]");
+    fail("Expected: [$expected_reply], got: [$reply]");
   }
 }
 
@@ -177,6 +178,11 @@ my $cmd = "$exe ".
 $cmd .= ' -cgi_interpreter perl' if on_windows();
 spawn($cmd);
 
+  my $x = 'x=' . 'A' x (200 * 1024);
+  my $len = length($x);
+  o("POST /env.cgi HTTP/1.0\r\nContent-Length: $len\r\n\r\n$x",
+    '^HTTP/1.1 200 OK', 'Long POST');
+
 # Try to overflow: Send very long request
 req('POST ' . '/..' x 100 . 'ABCD' x 3000 . "\n\n", 0); # don't log this one
 
@@ -339,10 +345,6 @@ unless (scalar(@ARGV) > 0 and $ARGV[0] eq "basic_tests") {
   o("GET /hello.txt HTTP/1.0\nAuthorization: $auth_header\n\n", 'HTTP/1.1 200 OK', 'GET regular file with auth');
   unlink "$root/.htpasswd";
 
-  my $x = 'x=' . 'A' x (200 * 1024);
-  my $len = length($x);
-  o("POST /env.cgi HTTP/1.0\r\nContent-Length: $len\r\n\r\n$x",
-    '^HTTP/1.1 200 OK', 'Long POST');
 
   o("GET /env.cgi HTTP/1.0\n\r\n", 'HTTP/1.1 200 OK', 'GET CGI file');
   o("GET /bad2.cgi HTTP/1.0\n\n", "HTTP/1.1 123 Please pass me to the client\r",
@@ -443,7 +445,7 @@ sub do_PUT_test {
 }
 
 sub do_unit_test {
-  my $cmd = "cc -W -Wall -o $unit_test_exe $root/unit_test.c -I. ".
+  my $cmd = "cc -g -W -Wall -o $unit_test_exe $root/unit_test.c -I. ".
     "-pthread -DNO_SSL ";
   if (on_windows()) {
     $cmd = "cl $root/embed.c mongoose.c /I. /nologo /DNO_SSL ".
diff --git a/test/unit_test.c b/test/unit_test.c
index f6b80c529..d6200f2a3 100644
--- a/test/unit_test.c
+++ b/test/unit_test.c
@@ -13,25 +13,21 @@ static void test_parse_http_request() {
   char req3[] = "GET / HTTP/1.1\r\nBah\r\n";
   char req4[] = "GET / HTTP/1.1\r\nA: foo bar\r\nB: bar\r\nbaz\r\n\r\n";
 
-  ASSERT(parse_http_request(req1, &ri) == 1);
+  ASSERT(parse_http_request(req1, sizeof(req1), &ri) == sizeof(req1) - 1);
   ASSERT(strcmp(ri.http_version, "1.1") == 0);
   ASSERT(ri.num_headers == 0);
 
-  ASSERT(parse_http_request(req2, &ri) == 0);
-
-  // TODO(lsm): Fix this. Bah is not a valid header.
-  ASSERT(parse_http_request(req3, &ri) == 1);
-  ASSERT(ri.num_headers == 1);
-  ASSERT(strcmp(ri.http_headers[0].name, "Bah\r\n") == 0);
+  ASSERT(parse_http_request(req2, sizeof(req2), &ri) == -1);
+  ASSERT(parse_http_request(req3, sizeof(req3), &ri) == -1);
 
   // TODO(lsm): Fix this. Header value may span multiple lines.
-  ASSERT(parse_http_request(req4, &ri) == 1);
+  ASSERT(parse_http_request(req4, sizeof(req4), &ri) == sizeof(req4) - 1);
   ASSERT(ri.num_headers == 3);
   ASSERT(strcmp(ri.http_headers[0].name, "A") == 0);
   ASSERT(strcmp(ri.http_headers[0].value, "foo bar") == 0);
   ASSERT(strcmp(ri.http_headers[1].name, "B") == 0);
   ASSERT(strcmp(ri.http_headers[1].value, "bar") == 0);
-  ASSERT(strcmp(ri.http_headers[2].name, "baz\r\n\r\n") == 0);
+  ASSERT(strcmp(ri.http_headers[2].name, "baz\r\n\r") == 0);
   ASSERT(strcmp(ri.http_headers[2].value, "") == 0);
 
   // TODO(lsm): add more tests. 
@@ -47,7 +43,7 @@ static void test_should_keep_alive(void) {
 
   memset(&conn, 0, sizeof(conn));
   conn.ctx = &ctx;
-  parse_http_request(req1, &conn.request_info);
+  parse_http_request(req1, sizeof(req1), &conn.request_info);
 
   ctx.config[ENABLE_KEEP_ALIVE] = "no";
   ASSERT(should_keep_alive(&conn) == 0);
@@ -59,13 +55,13 @@ static void test_should_keep_alive(void) {
   ASSERT(should_keep_alive(&conn) == 0);
 
   conn.must_close = 0;
-  parse_http_request(req2, &conn.request_info);
+  parse_http_request(req2, sizeof(req2), &conn.request_info);
   ASSERT(should_keep_alive(&conn) == 0);
 
-  parse_http_request(req3, &conn.request_info);
+  parse_http_request(req3, sizeof(req3), &conn.request_info);
   ASSERT(should_keep_alive(&conn) == 0);
 
-  parse_http_request(req4, &conn.request_info);
+  parse_http_request(req4, sizeof(req4), &conn.request_info);
   ASSERT(should_keep_alive(&conn) == 1);
 
   conn.request_info.status_code = 401;
@@ -131,10 +127,63 @@ static void test_remove_double_dots() {
   }
 }
 
+static const char *fetch_data = "hello world!\n";
+static void *event_handler(enum mg_event event,
+                           struct mg_connection *conn,
+                           const struct mg_request_info *request_info) {
+  if (event == MG_NEW_REQUEST && !strcmp(request_info->uri, "/data")) {
+    mg_printf(conn, "HTTP/1.1 200 OK\r\n"
+              "Content-Length: %d\r\n\r\n"
+              "%s", (int) strlen(fetch_data), fetch_data);
+    return "";
+  } else if (event == MG_EVENT_LOG) {
+    printf("%s\n", request_info->log_message);
+  }
+  
+  return NULL;
+}
+
+static void test_mg_fetch(void) {
+  static const char *options[] = {
+    "document_root", ".",
+    "listening_ports", "33796",
+    NULL,
+  };
+  char buf[1000];
+  int length;
+  struct mg_context *ctx;
+  struct mg_request_info ri;
+  const char *tmp_file = "temporary_file_name_for_unit_test.txt";
+  FILE *fp;
+
+  ASSERT((ctx = mg_start(event_handler, NULL, options)) != NULL);
+
+  // Failed fetch, pass invalid URL
+  ASSERT(mg_fetch(ctx, "localhost", tmp_file, &ri) == NULL);
+  ASSERT(mg_fetch(ctx, "localhost:33796", tmp_file, &ri) == NULL);
+  ASSERT(mg_fetch(ctx, "http://$$$.$$$", tmp_file, &ri) == NULL);
+
+  // Failed fetch, pass invalid file name
+  ASSERT(mg_fetch(ctx, "http://localhost:33796/data",
+                  "/this/file/must/not/exist/ever", &ri) == NULL);
+
+  // Successful fetch
+  ASSERT((fp = mg_fetch(ctx, "http://localhost:33796/data",
+                        tmp_file, &ri)) != NULL);
+  ASSERT((length = ftell(fp)) == (int) strlen(fetch_data));
+  fseek(fp, 0, SEEK_SET);
+  ASSERT(fread(buf, 1, length, fp) == length);
+  ASSERT(memcmp(buf, fetch_data, length) == 0);
+
+  remove(tmp_file);
+  mg_stop(ctx);
+}
+
 int main(void) {
   test_match_prefix();
   test_remove_double_dots();
   test_should_keep_alive();
   test_parse_http_request();
+  test_mg_fetch();
   return 0;
 }
-- 
GitLab