buffet: added url_utils

Added a bunch of utility functions to manipulate URLs. Stuff like
combining URLs, adding and extracting query parameters and so on.

BUG=None
TEST=New and old unit tests pass.

Change-Id: Ie8c76b611f9d985dc24aae22caf60cd22aac96a8
Reviewed-on: https://chromium-review.googlesource.com/195629
Tested-by: Alex Vakulenko <avakulenko@chromium.org>
Reviewed-by: Chris Sosa <sosa@chromium.org>
Commit-Queue: Alex Vakulenko <avakulenko@chromium.org>
diff --git a/buffet/buffet.gyp b/buffet/buffet.gyp
index 12933d4..afdabb6 100644
--- a/buffet/buffet.gyp
+++ b/buffet/buffet.gyp
@@ -18,7 +18,6 @@
         '-lbase-dbus_test_support-<(libbase_ver)',
       ],
     },
-    # TODO(sosa): Remove no-strict-aliasing: crbug.com/356745.
     'cflags_cc': [
       '-std=gnu++11',
     ],
@@ -41,6 +40,7 @@
         'manager.cc',
         'mime_utils.cc',
         'string_utils.cc',
+        'url_utils.cc'
       ],
     },
     {
@@ -78,6 +78,7 @@
         'async_event_sequencer_unittest.cc',
         'mime_utils_unittest.cc',
         'string_utils_unittest.cc',
+        'url_utils_unittest.cc'
       ],
     },
   ],
diff --git a/buffet/device_registration_info.cc b/buffet/device_registration_info.cc
index c2b7780..20fbdf2 100644
--- a/buffet/device_registration_info.cc
+++ b/buffet/device_registration_info.cc
@@ -14,6 +14,7 @@
 #include "buffet/mime_utils.h"
 #include "buffet/string_utils.h"
 #include "buffet/data_encoding.h"
+#include "buffet/url_utils.h"
 
 using namespace chromeos::http;
 using namespace chromeos::data_encoding;
@@ -76,20 +77,11 @@
   return resp;
 }
 
-std::string BuildURL(std::string url,
-                     const std::string& subpath,
+std::string BuildURL(const std::string& url,
+                     const std::vector<std::string>& subpaths,
                      const WebParamList& params) {
-  if (!subpath.empty()) {
-    if (!url.empty() && url.back() != '/')
-      url += '/';
-    url += subpath;
-  }
-
-  if (!params.empty()) {
-    url += '?';
-    url += WebParamsEncode(params);
-  }
-  return url;
+  std::string result = chromeos::url::CombineMultiple(url, subpaths);
+  return chromeos::url::AppendQueryParams(result, params);
 }
 
 
@@ -104,22 +96,18 @@
 
 std::string DeviceRegistrationInfo::GetServiceURL(
     const std::string& subpath, const WebParamList& params) const {
-  return BuildURL(service_url_, subpath, params);
+  return BuildURL(service_url_, {subpath}, params);
 }
 
 std::string DeviceRegistrationInfo::GetDeviceURL(
     const std::string& subpath, const WebParamList& params) const {
   CHECK(!device_id_.empty()) << "Must have a valid device ID";
-  std::string path = "devices/" + device_id_;
-  if (!subpath.empty()) {
-    path += '/' + subpath;
-  }
-  return GetServiceURL(path, params);
+  return BuildURL(service_url_, {"devices", device_id_, subpath}, params);
 }
 
 std::string DeviceRegistrationInfo::GetOAuthURL(const std::string& subpath,
                                     const WebParamList& params) const {
-  return BuildURL(oauth_url_, subpath, params);
+  return BuildURL(oauth_url_, {subpath}, params);
 }
 
 std::string DeviceRegistrationInfo::GetDeviceId() {
diff --git a/buffet/mime_utils.h b/buffet/mime_utils.h
index 31a7687..44f65c8 100644
--- a/buffet/mime_utils.h
+++ b/buffet/mime_utils.h
@@ -5,6 +5,7 @@
 #ifndef BUFFET_MIME_UTILS_H_
 #define BUFFET_MIME_UTILS_H_
 
+#include <base/basictypes.h>
 #include <string>
 #include <vector>
 
@@ -57,7 +58,7 @@
 // e.g. Combine("text", "plain", {{"charset", "utf-8"}}) will give:
 //      "text/plain; charset=utf-8"
 std::string Combine(const std::string& type, const std::string& subtype,
-                    const Parameters& parameters = {});
+                    const Parameters& parameters = {}) WARN_UNUSED_RESULT;
 
 // Splits a MIME string into type and subtype.
 // "text/plain;charset=utf-8" => ("text", "plain")
@@ -83,13 +84,13 @@
 
 // Removes parameters from a MIME string
 // "text/plain;charset=utf-8" => "text/plain"
-std::string RemoveParameters(const std::string& mime_string);
+std::string RemoveParameters(const std::string& mime_string) WARN_UNUSED_RESULT;
 
 // Appends a parameter to a MIME string.
 // "text/plain" => "text/plain; charset=utf-8"
 std::string AppendParameter(const std::string& mime_string,
                             const std::string& paramName,
-                            const std::string& paramValue);
+                            const std::string& paramValue) WARN_UNUSED_RESULT;
 
 // Returns the value of a parameter on a MIME string (empty string if missing).
 // ("text/plain;charset=utf-8","charset") => "utf-8"
diff --git a/buffet/url_utils.cc b/buffet/url_utils.cc
new file mode 100644
index 0000000..08d78f7
--- /dev/null
+++ b/buffet/url_utils.cc
@@ -0,0 +1,162 @@
+// Copyright 2014 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "buffet/url_utils.h"
+
+#include <algorithm>
+
+namespace {
+// Given a URL string, determine where the query string starts and ends.
+// URLs have schema, domain and path (along with possible user name, password
+// and port number which are of no interest for us here) which could optionally
+// have a query string that is separated from the path by '?'. Finally, the URL
+// could also have a '#'-separated URL fragment which is usually used by the
+// browser as a bookmark element. So, for example:
+//    http://server.com/path/to/object?k=v&foo=bar#fragment
+// Here:
+//    http://server.com/path/to/object - is the URL of the object,
+//    ?k=v&foo=bar                     - URL query string
+//    #fragment                        - URL framgment string
+// If |exclude_fragment| is true, the function returns the start character and
+// the length of the query string alone. If it is false, the query string length
+// will include both the query string and the fragment.
+bool GetQueryStringPos(const std::string& url, bool exclude_fragment,
+                       size_t* query_pos, size_t* query_len) {
+  size_t query_start = url.find_first_of("?#");
+  if (query_start == std::string::npos) {
+    *query_pos = url.size();
+    if (query_len)
+      *query_len = 0;
+    return false;
+  }
+
+  *query_pos = query_start;
+  if (query_len) {
+    size_t query_end = url.size();
+
+    if (exclude_fragment) {
+      if (url[query_start] == '?') {
+        size_t pos_fragment = url.find('#', query_start);
+        if (pos_fragment != std::string::npos)
+          query_end = pos_fragment;
+      } else {
+        query_end = query_start;
+      }
+    }
+    *query_len = query_end - query_start;
+  }
+  return true;
+}
+}  // anonymous namespace
+
+std::string chromeos::url::TrimOffQueryString(std::string* url) {
+  size_t query_pos;
+  if (!GetQueryStringPos(*url, false, &query_pos, nullptr))
+    return std::string();
+  std::string query_string = url->substr(query_pos);
+  url->resize(query_pos);
+  return query_string;
+}
+
+std::string chromeos::url::Combine(
+    const std::string& url, const std::string& subpath) {
+  return CombineMultiple(url, {subpath});
+}
+
+std::string chromeos::url::CombineMultiple(
+    const std::string& url, const std::vector<std::string>& parts) {
+  std::string result = url;
+  if (!parts.empty()) {
+    std::string query_string = TrimOffQueryString(&result);
+    for (auto&& part : parts) {
+      if (!part.empty()) {
+        if (!result.empty() && result.back() != '/')
+          result += '/';
+        size_t non_slash_pos = part.find_first_not_of('/');
+        if (non_slash_pos != std::string::npos)
+          result += part.substr(non_slash_pos);
+      }
+    }
+    result += query_string;
+  }
+  return result;
+}
+
+std::string chromeos::url::GetQueryString(
+    const std::string& url, bool remove_fragment) {
+  std::string query_string;
+  size_t query_pos, query_len;
+  if (GetQueryStringPos(url, remove_fragment, &query_pos, &query_len)) {
+    query_string = url.substr(query_pos, query_len);
+  }
+  return query_string;
+}
+
+chromeos::data_encoding::WebParamList chromeos::url::GetQueryStringParameters(
+    const std::string& url) {
+  // Extract the query string and remove the leading '?'.
+  std::string query_string = GetQueryString(url, true).substr(1);
+  return chromeos::data_encoding::WebParamsDecode(query_string);
+}
+
+std::string chromeos::url::GetQueryStringValue(
+    const std::string& url, const std::string& name) {
+  return GetQueryStringValue(GetQueryStringParameters(url), name);
+}
+
+std::string chromeos::url::GetQueryStringValue(
+    const chromeos::data_encoding::WebParamList& params,
+    const std::string& name) {
+  for (auto&& pair : params) {
+    if (name.compare(pair.first) == 0)
+      return pair.second;
+  }
+  return std::string();
+}
+
+std::string chromeos::url::RemoveQueryString(
+    const std::string& url, bool remove_fragment_too) {
+  size_t query_pos, query_len;
+  if (!GetQueryStringPos(url, !remove_fragment_too, &query_pos, &query_len))
+    return url;
+  std::string result = url.substr(0, query_pos);
+  size_t fragment_pos = query_pos + query_len;
+  if (fragment_pos < url.size()) {
+    result += url.substr(fragment_pos);
+  }
+  return result;
+}
+
+std::string chromeos::url::AppendQueryParam(
+    const std::string& url, const std::string& name, const std::string& value) {
+  return AppendQueryParams(url, {{name, value}});
+}
+
+std::string chromeos::url::AppendQueryParams(
+    const std::string& url,
+    const chromeos::data_encoding::WebParamList& params) {
+  if (params.empty())
+    return url;
+  size_t query_pos, query_len;
+  GetQueryStringPos(url, true, &query_pos, &query_len);
+  size_t fragment_pos = query_pos + query_len;
+  std::string result = url.substr(0, fragment_pos);
+  if (query_len == 0) {
+    result += '?';
+  } else if (query_len > 1) {
+    result += '&';
+  }
+  result += chromeos::data_encoding::WebParamsEncode(params);
+  if (fragment_pos < url.size()) {
+    result += url.substr(fragment_pos);
+  }
+  return result;
+}
+
+bool chromeos::url::HasQueryString(const std::string& url) {
+  size_t query_pos, query_len;
+  GetQueryStringPos(url, true, &query_pos, &query_len);
+  return (query_len > 0);
+}
+
diff --git a/buffet/url_utils.h b/buffet/url_utils.h
new file mode 100644
index 0000000..f08cf45
--- /dev/null
+++ b/buffet/url_utils.h
@@ -0,0 +1,75 @@
+// Copyright 2014 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BUFFET_URL_UTILS_H_
+#define BUFFET_URL_UTILS_H_
+
+#include <base/basictypes.h>
+#include <string>
+#include <vector>
+#include "buffet/data_encoding.h"
+
+namespace chromeos {
+
+namespace url {
+
+// Appends a subpath to url and delimiting then with '/' if the path doesn't
+// end with it already. Also handles URLs with query parameters/fragment.
+std::string Combine(const std::string& url,
+                    const std::string& subpath) WARN_UNUSED_RESULT;
+std::string CombineMultiple(
+    const std::string& url,
+    const std::vector<std::string>& parts) WARN_UNUSED_RESULT;
+
+// Removes the query string/fragment from |url| and returns the query string.
+// This method actiually modifies |url|. So, if you call it on this:
+//    http://www.test.org/?foo=bar
+// it will modify |url| to "http://www.test.org/" and return "?foo=bar"
+std::string TrimOffQueryString(std::string* url);
+
+// Returns the query string, if available.
+// For example, for the following URL:
+//    http://server.com/path/to/object?k=v&foo=bar#fragment
+// Here:
+//    http://server.com/path/to/object - is the URL of the object,
+//    ?k=v&foo=bar                     - URL query string
+//    #fragment                        - URL framgment string
+// If |remove_fragment| is true, the function returns the query string without
+// the fragment. Otherwise the fragment is included as part of the result.
+std::string GetQueryString(const std::string& url, bool remove_fragment);
+
+// Parses the query string into a set of key-value pairs.
+data_encoding::WebParamList GetQueryStringParameters(const std::string& url);
+
+// Returns a value of the specified query parameter, or empty string if missing.
+std::string GetQueryStringValue(const std::string& url,
+                                const std::string& name);
+std::string GetQueryStringValue(const data_encoding::WebParamList& params,
+                                const std::string& name);
+
+// Removes the query string and/or a fragment part from URL.
+// If |remove_fragment| is specified, the fragment is also removed.
+// For example:
+//    http://server.com/path/to/object?k=v&foo=bar#fragment
+// true  -> http://server.com/path/to/object
+// false -> http://server.com/path/to/object#fragment
+std::string RemoveQueryString(const std::string& url,
+                              bool remove_fragment) WARN_UNUSED_RESULT;
+
+// Appends a single query parameter to the URL.
+std::string AppendQueryParam(const std::string& url,
+                             const std::string& name,
+                             const std::string& value) WARN_UNUSED_RESULT;
+// Appends a list of query parameters to the URL.
+std::string AppendQueryParams(
+    const std::string& url,
+    const data_encoding::WebParamList& params) WARN_UNUSED_RESULT;
+
+// Checks if the URL has query parameters.
+bool HasQueryString(const std::string& url);
+
+} // namespace url
+} // namespace chromeos
+
+#endif // BUFFET_URL_UTILS_H_
diff --git a/buffet/url_utils_unittest.cc b/buffet/url_utils_unittest.cc
new file mode 100644
index 0000000..3407d60
--- /dev/null
+++ b/buffet/url_utils_unittest.cc
@@ -0,0 +1,152 @@
+// Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "buffet/url_utils.h"
+
+#include <gtest/gtest.h>
+
+using namespace chromeos;
+
+TEST(UrlUtils, Combine) {
+  EXPECT_EQ("http://sample.org/path",
+            url::Combine("http://sample.org", "path"));
+  EXPECT_EQ("http://sample.org/path",
+            url::Combine("http://sample.org/", "path"));
+  EXPECT_EQ("path1/path2", url::Combine("", "path1/path2"));
+  EXPECT_EQ("path1/path2", url::Combine("path1", "path2"));
+  EXPECT_EQ("http://sample.org",
+            url::Combine("http://sample.org", ""));
+  EXPECT_EQ("http://sample.org/path",
+            url::Combine("http://sample.org/", "/path"));
+  EXPECT_EQ("http://sample.org/path",
+            url::Combine("http://sample.org", "//////path"));
+  EXPECT_EQ("http://sample.org/",
+            url::Combine("http://sample.org", "///"));
+  EXPECT_EQ("http://sample.org/obj/path1/path2",
+            url::Combine("http://sample.org/obj", "path1/path2"));
+  EXPECT_EQ("http://sample.org/obj/path1/path2#tag",
+            url::Combine("http://sample.org/obj#tag", "path1/path2"));
+  EXPECT_EQ("http://sample.org/obj/path1/path2?k1=v1&k2=v2",
+            url::Combine("http://sample.org/obj?k1=v1&k2=v2", "path1/path2"));
+  EXPECT_EQ("http://sample.org/obj/path1/path2?k1=v1#k2=v2",
+            url::Combine("http://sample.org/obj/?k1=v1#k2=v2", "path1/path2"));
+  EXPECT_EQ("http://sample.org/obj/path1/path2#tag?",
+            url::Combine("http://sample.org/obj#tag?", "path1/path2"));
+  EXPECT_EQ("path1/path2", url::CombineMultiple("", {"path1", "path2"}));
+  EXPECT_EQ("http://sample.org/obj/part1/part2",
+            url::CombineMultiple("http://sample.org",
+                                 {"obj", "", "/part1/", "part2"}));
+}
+
+TEST(UrlUtils, GetQueryString) {
+  EXPECT_EQ("", url::GetQueryString("http://sample.org", false));
+  EXPECT_EQ("", url::GetQueryString("http://sample.org", true));
+  EXPECT_EQ("", url::GetQueryString("", false));
+  EXPECT_EQ("", url::GetQueryString("", true));
+
+  EXPECT_EQ("?q=v&b=2#tag?2",
+            url::GetQueryString("http://s.com/?q=v&b=2#tag?2", false));
+  EXPECT_EQ("?q=v&b=2",
+            url::GetQueryString("http://s.com/?q=v&b=2#tag?2", true));
+
+  EXPECT_EQ("#tag?a=2",
+            url::GetQueryString("http://s.com/#tag?a=2", false));
+  EXPECT_EQ("",
+            url::GetQueryString("http://s.com/#tag?a=2", true));
+
+  EXPECT_EQ("?a=2&b=2",
+            url::GetQueryString("?a=2&b=2", false));
+  EXPECT_EQ("?a=2&b=2",
+            url::GetQueryString("?a=2&b=2", true));
+
+  EXPECT_EQ("#s#?d#?f?#s?#d",
+            url::GetQueryString("#s#?d#?f?#s?#d", false));
+  EXPECT_EQ("",
+            url::GetQueryString("#s#?d#?f?#s?#d", true));
+}
+
+TEST(UrlUtils, GetQueryStringParameters) {
+  auto params = url::GetQueryStringParameters(
+    "http://sample.org/path?k=v&&%3Dkey%3D=val%26&r#blah");
+
+  EXPECT_EQ(3, params.size());
+  EXPECT_EQ("k", params[0].first);
+  EXPECT_EQ("v", params[0].second);
+  EXPECT_EQ("=key=", params[1].first);
+  EXPECT_EQ("val&", params[1].second);
+  EXPECT_EQ("r", params[2].first);
+  EXPECT_EQ("", params[2].second);
+}
+
+TEST(UrlUtils, GetQueryStringValue) {
+  std::string url = "http://url?key1=val1&&key2=val2";
+  EXPECT_EQ("val1", url::GetQueryStringValue(url, "key1"));
+  EXPECT_EQ("val2", url::GetQueryStringValue(url, "key2"));
+  EXPECT_EQ("", url::GetQueryStringValue(url, "key3"));
+
+  auto params = url::GetQueryStringParameters(url);
+  EXPECT_EQ("val1", url::GetQueryStringValue(params, "key1"));
+  EXPECT_EQ("val2", url::GetQueryStringValue(params, "key2"));
+  EXPECT_EQ("", url::GetQueryStringValue(params, "key3"));
+}
+
+TEST(UrlUtils, TrimOffQueryString) {
+  std::string url = "http://url?key1=val1&key2=val2#fragment";
+  std::string query = url::TrimOffQueryString(&url);
+  EXPECT_EQ("http://url", url);
+  EXPECT_EQ("?key1=val1&key2=val2#fragment", query);
+
+  url = "http://url#fragment";
+  query = url::TrimOffQueryString(&url);
+  EXPECT_EQ("http://url", url);
+  EXPECT_EQ("#fragment", query);
+
+  url = "http://url";
+  query = url::TrimOffQueryString(&url);
+  EXPECT_EQ("http://url", url);
+  EXPECT_EQ("", query);
+}
+
+TEST(UrlUtils, RemoveQueryString) {
+  std::string url = "http://url?key1=val1&key2=val2#fragment";
+  EXPECT_EQ("http://url", url::RemoveQueryString(url, true));
+  EXPECT_EQ("http://url#fragment", url::RemoveQueryString(url, false));
+}
+
+TEST(UrlUtils, AppendQueryParam) {
+  std::string url = "http://server.com/path";
+  url = url::AppendQueryParam(url, "param", "value");
+  EXPECT_EQ("http://server.com/path?param=value", url);
+  url = url::AppendQueryParam(url, "param2", "v");
+  EXPECT_EQ("http://server.com/path?param=value&param2=v", url);
+
+  url = "http://server.com/path#fragment";
+  url = url::AppendQueryParam(url, "param", "value");
+  EXPECT_EQ("http://server.com/path?param=value#fragment", url);
+  url = url::AppendQueryParam(url, "param2", "v");
+  EXPECT_EQ("http://server.com/path?param=value&param2=v#fragment", url);
+
+  url = url::AppendQueryParam("http://server.com/path?", "param", "value");
+  EXPECT_EQ("http://server.com/path?param=value", url);
+}
+
+TEST(UrlUtils, AppendQueryParams) {
+  std::string url = "http://server.com/path";
+  url = url::AppendQueryParams(url, {});
+  EXPECT_EQ("http://server.com/path", url);
+  url = url::AppendQueryParams(url, {{"param", "value"}, {"q", "="}});
+  EXPECT_EQ("http://server.com/path?param=value&q=%3D", url);
+  url += "#fr?";
+  url = url::AppendQueryParams(url, {{"p", "1"}, {"s&", "\n"}});
+  EXPECT_EQ("http://server.com/path?param=value&q=%3D&p=1&s%26=%0A#fr?", url);
+}
+
+TEST(UrlUtils, HasQueryString) {
+  EXPECT_FALSE(url::HasQueryString("http://server.com/path"));
+  EXPECT_FALSE(url::HasQueryString("http://server.com/path#blah?v=1"));
+  EXPECT_TRUE(url::HasQueryString("http://server.com/path?v=1#blah"));
+  EXPECT_TRUE(url::HasQueryString("http://server.com/path?v=1"));
+  EXPECT_FALSE(url::HasQueryString(""));
+  EXPECT_TRUE(url::HasQueryString("?ss"));
+}