From 8c394f44b8c860e1919736372798f904007a22c0 Mon Sep 17 00:00:00 2001
From: Marko Mikulicic <mkm@cesanta.com>
Date: Wed, 13 Jan 2016 16:25:05 +0200
Subject: [PATCH] Implement mg_parse_uri

    PUBLISHED_FROM=43d7e3f56cd5d45c8070ab55acdc091e61839681
---
 mongoose.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 mongoose.h |  49 ++++++++++++++++++
 2 files changed, 191 insertions(+)

diff --git a/mongoose.c b/mongoose.c
index f25928aa0..e1fee1802 100644
--- a/mongoose.c
+++ b/mongoose.c
@@ -3770,6 +3770,148 @@ void mg_enable_multithreading(struct mg_connection *nc) {
 }
 #endif
 #ifdef NS_MODULE_LINES
+#line 1 "./src/uri.c"
+/**/
+#endif
+/*
+ * Copyright (c) 2014 Cesanta Software Limited
+ * All rights reserved
+ */
+
+/* Amalgamated: #include "mongoose/src/internal.h" */
+/* Amalgamated: #include "mongoose/src/uri.h" */
+
+/*
+ * scan string until `sep`, keeping track of component boundaries in `res`.
+ *
+ * `p` will point to the char after the separator or it will be `end`.
+ */
+static void parse_uri_component(const char **p, const char *end, char sep,
+                                struct mg_str *res) {
+  res->p = *p;
+  for (; *p < end; (*p)++) {
+    if (**p == sep) {
+      break;
+    }
+  }
+  res->len = (*p) - res->p;
+  (*p)++;
+}
+
+int mg_parse_uri(struct mg_str uri, struct mg_str *scheme,
+                 struct mg_str *user_info, struct mg_str *host,
+                 unsigned int *port, struct mg_str *path, struct mg_str *query,
+                 struct mg_str *fragment) {
+  struct mg_str rscheme = {0, 0}, ruser_info = {0, 0}, rhost = {0, 0},
+                rpath = {0, 0}, rquery = {0, 0}, rfragment = {0, 0};
+  unsigned int rport = 0;
+  enum {
+    P_START,
+    P_SCHEME_OR_PORT,
+    P_USER_INFO,
+    P_HOST,
+    P_PORT,
+    P_REST
+  } state = P_START;
+
+  const char *p = uri.p, *end = p + uri.len;
+  while (p < end) {
+    printf("STATE: %d (%s)\n", state, p);
+    switch (state) {
+      case P_START:
+        /*
+         * expecting on of:
+         * - `scheme://xxxx`
+         * - `xxxx:port`
+         * - `xxxx/path`
+         */
+        for (; p < end; p++) {
+          if (*p == ':') {
+            state = P_SCHEME_OR_PORT;
+            break;
+          } else if (*p == '/') {
+            state = P_REST;
+            break;
+          }
+        }
+        if (state == P_START || state == P_REST) {
+          rhost.p = uri.p;
+          rhost.len = p - uri.p;
+        }
+        break;
+      case P_SCHEME_OR_PORT:
+        if (end - p >= 3 && memcmp(p, "://", 3) == 0) {
+          rscheme.p = uri.p;
+          rscheme.len = p - uri.p;
+          state = P_USER_INFO;
+          p += 2; /* point to last separator char */
+        } else {
+          rhost.p = uri.p;
+          rhost.len = p - uri.p;
+          state = P_PORT;
+        }
+        break;
+      case P_USER_INFO:
+        p++;
+        ruser_info.p = p;
+        for (; p < end; p++) {
+          if (*p == '@') {
+            state = P_HOST;
+            break;
+          } else if (*p == '/') {
+            /* backtrack and parse as host */
+            state = P_HOST;
+            p = ruser_info.p;
+            break;
+          }
+        }
+        ruser_info.len = p - ruser_info.p;
+        break;
+      case P_HOST:
+        if (*p == '@') p++;
+        rhost.p = p;
+        for (; p < end; p++) {
+          if (*p == ':') {
+            state = P_PORT;
+            break;
+          } else if (*p == '/') {
+            state = P_REST;
+            break;
+          }
+        }
+        rhost.len = p - rhost.p;
+        break;
+      case P_PORT:
+        p++;
+        for (; p < end; p++) {
+          if (*p == '/') {
+            state = P_REST;
+            break;
+          }
+          rport *= 10;
+          rport += *p - '0';
+        }
+        break;
+      case P_REST:
+        /* `p` points to separator. `path` includes the separator */
+        parse_uri_component(&p, end, '?', &rpath);
+        parse_uri_component(&p, end, '#', &rquery);
+        parse_uri_component(&p, end, '\0', &rfragment);
+        break;
+    }
+  }
+
+  if (scheme != 0) *scheme = rscheme;
+  if (user_info != 0) *user_info = ruser_info;
+  if (host != 0) *host = rhost;
+  if (port != 0) *port = rport;
+  if (path != 0) *path = rpath;
+  if (query != 0) *query = rquery;
+  if (fragment != 0) *fragment = rfragment;
+
+  return 0;
+}
+#ifdef NS_MODULE_LINES
 #line 1 "./src/http.c"
 /**/
 #endif
diff --git a/mongoose.h b/mongoose.h
index 7f163de94..e7ad6fb0a 100644
--- a/mongoose.h
+++ b/mongoose.h
@@ -1218,6 +1218,55 @@ void mg_if_get_conn_addr(struct mg_connection *nc, int remote,
  * All rights reserved
  */
 
+/*
+ * === URI
+ */
+
+#ifndef MG_URI_HEADER_DEFINED
+#define MG_URI_HEADER_DEFINED
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*
+ * Parses an URI and fills string chunks with locations of the respective
+ * uri components within the input uri string. NULL pointers will be
+ * ignored.
+ *
+ * General syntax:
+ *
+ *     [scheme://[user_info@]]host[:port][/path][?query][#fragment]
+ *
+ * Example:
+ *
+ *     foo.com:80
+ *     tcp://foo.com:1234
+ *     http://foo.com:80/bar?baz=1
+ *     https://user:pw@foo.com:443/blah
+ *
+ * `path` will include the leading slash. `query` won't include the leading `?`.
+ * `host` can contain embedded colons if surrounded by square brackets in order
+ * to support IPv6 literal addresses.
+ *
+ *
+ * Returns 0 on success, -1 on error.
+ */
+int mg_parse_uri(struct mg_str uri, struct mg_str *scheme,
+                 struct mg_str *user_info, struct mg_str *host,
+                 unsigned int *port, struct mg_str *path, struct mg_str *query,
+                 struct mg_str *fragment);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#endif /* MG_URI_HEADER_DEFINED */
+/*
+ * Copyright (c) 2014 Cesanta Software Limited
+ * All rights reserved
+ */
+
 /*
  * === Utilities
  */
-- 
GitLab