buffet: added url_utils
Added a bunch of utility functions to manipulate URLs. Stuff like
combining URLs, adding and extracting query parameters and so on.
BUG=None
TEST=New and old unit tests pass.
Change-Id: Ie8c76b611f9d985dc24aae22caf60cd22aac96a8
Reviewed-on: https://chromium-review.googlesource.com/195629
Tested-by: Alex Vakulenko <avakulenko@chromium.org>
Reviewed-by: Chris Sosa <sosa@chromium.org>
Commit-Queue: Alex Vakulenko <avakulenko@chromium.org>
diff --git a/buffet/url_utils.cc b/buffet/url_utils.cc
new file mode 100644
index 0000000..08d78f7
--- /dev/null
+++ b/buffet/url_utils.cc
@@ -0,0 +1,162 @@
+// Copyright 2014 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "buffet/url_utils.h"
+
+#include <algorithm>
+
+namespace {
+// Given a URL string, determine where the query string starts and ends.
+// URLs have schema, domain and path (along with possible user name, password
+// and port number which are of no interest for us here) which could optionally
+// have a query string that is separated from the path by '?'. Finally, the URL
+// could also have a '#'-separated URL fragment which is usually used by the
+// browser as a bookmark element. So, for example:
+// http://server.com/path/to/object?k=v&foo=bar#fragment
+// Here:
+// http://server.com/path/to/object - is the URL of the object,
+// ?k=v&foo=bar - URL query string
+// #fragment - URL framgment string
+// If |exclude_fragment| is true, the function returns the start character and
+// the length of the query string alone. If it is false, the query string length
+// will include both the query string and the fragment.
+bool GetQueryStringPos(const std::string& url, bool exclude_fragment,
+ size_t* query_pos, size_t* query_len) {
+ size_t query_start = url.find_first_of("?#");
+ if (query_start == std::string::npos) {
+ *query_pos = url.size();
+ if (query_len)
+ *query_len = 0;
+ return false;
+ }
+
+ *query_pos = query_start;
+ if (query_len) {
+ size_t query_end = url.size();
+
+ if (exclude_fragment) {
+ if (url[query_start] == '?') {
+ size_t pos_fragment = url.find('#', query_start);
+ if (pos_fragment != std::string::npos)
+ query_end = pos_fragment;
+ } else {
+ query_end = query_start;
+ }
+ }
+ *query_len = query_end - query_start;
+ }
+ return true;
+}
+} // anonymous namespace
+
+std::string chromeos::url::TrimOffQueryString(std::string* url) {
+ size_t query_pos;
+ if (!GetQueryStringPos(*url, false, &query_pos, nullptr))
+ return std::string();
+ std::string query_string = url->substr(query_pos);
+ url->resize(query_pos);
+ return query_string;
+}
+
+std::string chromeos::url::Combine(
+ const std::string& url, const std::string& subpath) {
+ return CombineMultiple(url, {subpath});
+}
+
+std::string chromeos::url::CombineMultiple(
+ const std::string& url, const std::vector<std::string>& parts) {
+ std::string result = url;
+ if (!parts.empty()) {
+ std::string query_string = TrimOffQueryString(&result);
+ for (auto&& part : parts) {
+ if (!part.empty()) {
+ if (!result.empty() && result.back() != '/')
+ result += '/';
+ size_t non_slash_pos = part.find_first_not_of('/');
+ if (non_slash_pos != std::string::npos)
+ result += part.substr(non_slash_pos);
+ }
+ }
+ result += query_string;
+ }
+ return result;
+}
+
+std::string chromeos::url::GetQueryString(
+ const std::string& url, bool remove_fragment) {
+ std::string query_string;
+ size_t query_pos, query_len;
+ if (GetQueryStringPos(url, remove_fragment, &query_pos, &query_len)) {
+ query_string = url.substr(query_pos, query_len);
+ }
+ return query_string;
+}
+
+chromeos::data_encoding::WebParamList chromeos::url::GetQueryStringParameters(
+ const std::string& url) {
+ // Extract the query string and remove the leading '?'.
+ std::string query_string = GetQueryString(url, true).substr(1);
+ return chromeos::data_encoding::WebParamsDecode(query_string);
+}
+
+std::string chromeos::url::GetQueryStringValue(
+ const std::string& url, const std::string& name) {
+ return GetQueryStringValue(GetQueryStringParameters(url), name);
+}
+
+std::string chromeos::url::GetQueryStringValue(
+ const chromeos::data_encoding::WebParamList& params,
+ const std::string& name) {
+ for (auto&& pair : params) {
+ if (name.compare(pair.first) == 0)
+ return pair.second;
+ }
+ return std::string();
+}
+
+std::string chromeos::url::RemoveQueryString(
+ const std::string& url, bool remove_fragment_too) {
+ size_t query_pos, query_len;
+ if (!GetQueryStringPos(url, !remove_fragment_too, &query_pos, &query_len))
+ return url;
+ std::string result = url.substr(0, query_pos);
+ size_t fragment_pos = query_pos + query_len;
+ if (fragment_pos < url.size()) {
+ result += url.substr(fragment_pos);
+ }
+ return result;
+}
+
+std::string chromeos::url::AppendQueryParam(
+ const std::string& url, const std::string& name, const std::string& value) {
+ return AppendQueryParams(url, {{name, value}});
+}
+
+std::string chromeos::url::AppendQueryParams(
+ const std::string& url,
+ const chromeos::data_encoding::WebParamList& params) {
+ if (params.empty())
+ return url;
+ size_t query_pos, query_len;
+ GetQueryStringPos(url, true, &query_pos, &query_len);
+ size_t fragment_pos = query_pos + query_len;
+ std::string result = url.substr(0, fragment_pos);
+ if (query_len == 0) {
+ result += '?';
+ } else if (query_len > 1) {
+ result += '&';
+ }
+ result += chromeos::data_encoding::WebParamsEncode(params);
+ if (fragment_pos < url.size()) {
+ result += url.substr(fragment_pos);
+ }
+ return result;
+}
+
+bool chromeos::url::HasQueryString(const std::string& url) {
+ size_t query_pos, query_len;
+ GetQueryStringPos(url, true, &query_pos, &query_len);
+ return (query_len > 0);
+}
+