libweave: Add timestamp for device resource update requests

To guard against out-of-order PUT requests to the server, include
the last known device resource timestamp on the request URL.

If a request gets delayed due to network outage and eventually gets
retransmitted some time later, the server can detect the outdated
request and ignore it.

BUG=brillo:1261
TEST=`FEATURES=test emerge-link buffet`
CQ-DEPEND=CL:288010

Change-Id: I51481edc6cf485d111ad7d63540afd70888a1278
Reviewed-on: https://chromium-review.googlesource.com/287630
Reviewed-by: Vitaly Buka <vitalybuka@chromium.org>
Commit-Queue: Alex Vakulenko <avakulenko@chromium.org>
Tested-by: Alex Vakulenko <avakulenko@chromium.org>
diff --git a/libweave/src/device_registration_info.cc b/libweave/src/device_registration_info.cc
index 5979ac6..9f97900 100644
--- a/libweave/src/device_registration_info.cc
+++ b/libweave/src/device_registration_info.cc
@@ -12,6 +12,7 @@
 
 #include <base/bind.h>
 #include <base/json/json_writer.h>
+#include <base/strings/string_number_conversions.h>
 #include <base/values.h>
 #include <chromeos/bind_lambda.h>
 #include <chromeos/data_encoding.h>
@@ -38,10 +39,6 @@
 
 namespace {
 
-const int kMaxStartDeviceRetryDelayMinutes{1};
-const int64_t kMinStartDeviceRetryDelaySeconds{5};
-const int64_t kAbortCommandRetryDelaySeconds{5};
-
 std::pair<std::string, std::string> BuildAuthHeader(
     const std::string& access_token_type,
     const std::string& access_token) {
@@ -178,29 +175,19 @@
     //             timeout as a configurable knob or allow local
     //             daemons to signal that their state is up to date so that
     //             we need not wait for them.
-    ScheduleStartDevice(base::TimeDelta::FromSeconds(5));
+    ScheduleCloudConnection(base::TimeDelta::FromSeconds(5));
   }
 }
 
-void DeviceRegistrationInfo::ScheduleStartDevice(const base::TimeDelta& later) {
+void DeviceRegistrationInfo::ScheduleCloudConnection(
+    const base::TimeDelta& delay) {
   SetRegistrationStatus(RegistrationStatus::kConnecting);
   if (!task_runner_)
     return;  // Assume we're in unittests
-  base::TimeDelta max_delay =
-      base::TimeDelta::FromMinutes(kMaxStartDeviceRetryDelayMinutes);
-  base::TimeDelta min_delay =
-      base::TimeDelta::FromSeconds(kMinStartDeviceRetryDelaySeconds);
-  base::TimeDelta retry_delay = later * 2;
-  if (retry_delay > max_delay) {
-    retry_delay = max_delay;
-  }
-  if (retry_delay < min_delay) {
-    retry_delay = min_delay;
-  }
   task_runner_->PostDelayedTask(
-      FROM_HERE, base::Bind(&DeviceRegistrationInfo::StartDevice,
-                            weak_factory_.GetWeakPtr(), nullptr, retry_delay),
-      later);
+      FROM_HERE,
+      base::Bind(&DeviceRegistrationInfo::ConnectToCloud, AsWeakPtr()),
+      delay);
 }
 
 bool DeviceRegistrationInfo::HaveRegistrationCredentials() const {
@@ -449,8 +436,8 @@
     return std::string();
   }
 
-  url = GetServiceURL("registrationTickets/" + ticket_id + "/finalize?key=" +
-                      config_->api_key());
+  url = GetServiceURL("registrationTickets/" + ticket_id + "/finalize",
+                      {{"key", config_->api_key()}});
   response = chromeos::http::SendRequestWithNoDataAndBlock(
       chromeos::http::request_type::kPost, url, {}, transport_, error);
   if (!response)
@@ -466,15 +453,19 @@
   std::string auth_code;
   std::string device_id;
   std::string robot_account;
+  const base::DictionaryValue* device_draft_response = nullptr;
   if (!json_resp->GetString("robotAccountEmail", &robot_account) ||
       !json_resp->GetString("robotAccountAuthorizationCode", &auth_code) ||
-      !json_resp->GetString("deviceDraft.id", &device_id)) {
+      !json_resp->GetDictionary("deviceDraft", &device_draft_response) ||
+      !device_draft_response->GetString("id", &device_id)) {
     chromeos::Error::AddTo(error, FROM_HERE, kErrorDomainGCD,
                            "unexpected_response",
                            "Device account missing in response");
     return std::string();
   }
 
+  UpdateDeviceInfoTimestamp(*device_draft_response);
+
   // Now get access_token and refresh_token
   response = chromeos::http::PostFormDataAndBlock(
       GetOAuthURL("token"),
@@ -512,9 +503,9 @@
 
   StartNotificationChannel();
 
-  // We're going to respond with our success immediately and we'll StartDevice
-  // shortly after.
-  ScheduleStartDevice(base::TimeDelta::FromSeconds(0));
+  // We're going to respond with our success immediately and we'll connect to
+  // cloud shortly after.
+  ScheduleCloudConnection(base::TimeDelta::FromSeconds(0));
   return device_id;
 }
 
@@ -586,6 +577,7 @@
   VLOG(1) << "Response for cloud request with ID " << request_id
           << " received with status code " << status_code;
   if (status_code == chromeos::http::status_code::Denied) {
+    cloud_backoff_entry_->InformOfRequest(true);
     RefreshAccessToken(
         base::Bind(&DeviceRegistrationInfo::OnAccessTokenRefreshed, AsWeakPtr(),
                    data),
@@ -608,6 +600,7 @@
       chromeos::http::ParseJsonResponse(response.get(), nullptr, &error);
   if (!json_resp) {
     data->error_callback.Run(error.get());
+    cloud_backoff_entry_->InformOfRequest(true);
     return;
   }
 
@@ -619,6 +612,7 @@
       RetryCloudRequest(data);
       return;
     }
+    cloud_backoff_entry_->InformOfRequest(true);
     data->error_callback.Run(error.get());
     return;
   }
@@ -652,31 +646,48 @@
 void DeviceRegistrationInfo::OnAccessTokenError(
     const std::shared_ptr<const CloudRequestData>& data,
     const chromeos::Error* error) {
-  if (error->HasError(kErrorDomainOAuth2, "invalid_grant"))
-    MarkDeviceUnregistered();
+  CheckAccessTokenError(error);
   data->error_callback.Run(error);
 }
 
-void DeviceRegistrationInfo::StartDevice(chromeos::ErrorPtr* error,
-                                         const base::TimeDelta& retry_delay) {
-  if (!VerifyRegistrationCredentials(error))
+void DeviceRegistrationInfo::CheckAccessTokenError(
+    const chromeos::Error* error) {
+  if (error->HasError(kErrorDomainOAuth2, "invalid_grant"))
+    MarkDeviceUnregistered();
+}
+
+void DeviceRegistrationInfo::ConnectToCloud() {
+  connected_to_cloud_ = false;
+  if (!VerifyRegistrationCredentials(nullptr))
     return;
-  auto handle_start_device_failure_cb =
-      base::Bind(&IgnoreCloudErrorWithCallback,
-                 base::Bind(&DeviceRegistrationInfo::ScheduleStartDevice,
-                            weak_factory_.GetWeakPtr(), retry_delay));
-  // "Starting" a device just means that we:
+
+  if (access_token_.empty()) {
+    RefreshAccessToken(
+        base::Bind(&DeviceRegistrationInfo::ConnectToCloud, AsWeakPtr()),
+        base::Bind(&DeviceRegistrationInfo::CheckAccessTokenError,
+                   AsWeakPtr()));
+    return;
+  }
+
+  // Connecting a device to cloud just means that we:
   //   1) push an updated device resource
   //   2) fetch an initial set of outstanding commands
   //   3) abort any commands that we've previously marked as "in progress"
   //      or as being in an error state; publish queued commands
-  auto abort_commands_cb =
-      base::Bind(&DeviceRegistrationInfo::ProcessInitialCommandList,
-                 weak_factory_.GetWeakPtr());
-  auto fetch_commands_cb = base::Bind(
-      &DeviceRegistrationInfo::FetchCommands, weak_factory_.GetWeakPtr(),
-      abort_commands_cb, handle_start_device_failure_cb);
-  UpdateDeviceResource(fetch_commands_cb, handle_start_device_failure_cb);
+  UpdateDeviceResource(
+      base::Bind(&DeviceRegistrationInfo::OnConnectedToCloud, AsWeakPtr()),
+      base::Bind(&IgnoreCloudError));
+}
+
+void DeviceRegistrationInfo::OnConnectedToCloud() {
+  LOG(INFO) << "Device connected to cloud server";
+  connected_to_cloud_ = true;
+  FetchCommands(base::Bind(&DeviceRegistrationInfo::ProcessInitialCommandList,
+                           AsWeakPtr()),
+                base::Bind(&IgnoreCloudError));
+  // In case there are any pending state updates since we sent off the initial
+  // UpdateDeviceResource() request, update the server with any state changes.
+  PublishStateUpdates();
 }
 
 bool DeviceRegistrationInfo::UpdateDeviceInfo(const std::string& name,
@@ -767,19 +778,7 @@
                             chromeos::string_utils::Join(";", messages));
   }
   UpdateCommand(command_id, command_patch, base::Bind(&base::DoNothing),
-                base::Bind(&DeviceRegistrationInfo::RetryNotifyCommandAborted,
-                           weak_factory_.GetWeakPtr(), command_id,
-                           base::Passed(std::move(error))));
-}
-
-void DeviceRegistrationInfo::RetryNotifyCommandAborted(
-    const std::string& command_id,
-    chromeos::ErrorPtr error) {
-  task_runner_->PostDelayedTask(
-      FROM_HERE, base::Bind(&DeviceRegistrationInfo::NotifyCommandAborted,
-                            weak_factory_.GetWeakPtr(), command_id,
-                            base::Passed(std::move(error))),
-      base::TimeDelta::FromSeconds(kAbortCommandRetryDelaySeconds));
+                base::Bind(&base::DoNothing));
 }
 
 void DeviceRegistrationInfo::UpdateDeviceResource(
@@ -795,12 +794,28 @@
 }
 
 void DeviceRegistrationInfo::StartQueuedUpdateDeviceResource() {
-  CHECK(in_progress_resource_update_callbacks_.empty());
-  if (queued_resource_update_callbacks_.empty())
+  if (in_progress_resource_update_callbacks_.empty() &&
+      queued_resource_update_callbacks_.empty())
     return;
 
-  std::swap(queued_resource_update_callbacks_,
-            in_progress_resource_update_callbacks_);
+  if (last_device_resource_updated_timestamp_.empty()) {
+    // We don't know the current time stamp of the device resource from the
+    // server side. We need to provide the time stamp to the server as part of
+    // the request to guard against out-of-order requests overwriting settings
+    // specified by later requests.
+    VLOG(1) << "Getting the last device resource timestamp from server...";
+    GetDeviceInfo(
+        base::Bind(&DeviceRegistrationInfo::OnDeviceInfoRetrieved, AsWeakPtr()),
+        base::Bind(&DeviceRegistrationInfo::OnUpdateDeviceResourceError,
+                   AsWeakPtr()));
+    return;
+  }
+
+  in_progress_resource_update_callbacks_.insert(
+      in_progress_resource_update_callbacks_.end(),
+      queued_resource_update_callbacks_.begin(),
+      queued_resource_update_callbacks_.end());
+  queued_resource_update_callbacks_.clear();
 
   VLOG(1) << "Updating GCD server with CDD...";
   chromeos::ErrorPtr error;
@@ -811,16 +826,40 @@
     return;
   }
 
+  std::string url = GetDeviceURL(
+      {}, {{"lastUpdateTimeMs", last_device_resource_updated_timestamp_}});
+
   DoCloudRequest(
-      chromeos::http::request_type::kPut, GetDeviceURL(), device_resource.get(),
+      chromeos::http::request_type::kPut, url, device_resource.get(),
       base::Bind(&DeviceRegistrationInfo::OnUpdateDeviceResourceSuccess,
                  AsWeakPtr()),
       base::Bind(&DeviceRegistrationInfo::OnUpdateDeviceResourceError,
                  AsWeakPtr()));
 }
 
+void DeviceRegistrationInfo::OnDeviceInfoRetrieved(
+    const base::DictionaryValue& device_info) {
+  if (UpdateDeviceInfoTimestamp(device_info))
+    StartQueuedUpdateDeviceResource();
+}
+
+bool DeviceRegistrationInfo::UpdateDeviceInfoTimestamp(
+    const base::DictionaryValue& device_info) {
+  // For newly created devices, "lastUpdateTimeMs" may not be present, but
+  // "creationTimeMs" should be there at least.
+  if (!device_info.GetString("lastUpdateTimeMs",
+                             &last_device_resource_updated_timestamp_) &&
+      !device_info.GetString("creationTimeMs",
+                             &last_device_resource_updated_timestamp_)) {
+    LOG(WARNING) << "Device resource timestamp is missing";
+    return false;
+  }
+  return true;
+}
+
 void DeviceRegistrationInfo::OnUpdateDeviceResourceSuccess(
-    const base::DictionaryValue& reply) {
+    const base::DictionaryValue& device_info) {
+  UpdateDeviceInfoTimestamp(device_info);
   // Make a copy of the callback list so that if the callback triggers another
   // call to UpdateDeviceResource(), we do not modify the list we are iterating
   // over.
@@ -832,12 +871,24 @@
 
 void DeviceRegistrationInfo::OnUpdateDeviceResourceError(
     const chromeos::Error* error) {
+  if (error->HasError(kErrorDomainGCDServer, "invalid_last_update_time_ms")) {
+    // If the server rejected our previous request, retrieve the latest
+    // timestamp from the server and retry.
+    VLOG(1) << "Getting the last device resource timestamp from server...";
+    GetDeviceInfo(
+        base::Bind(&DeviceRegistrationInfo::OnDeviceInfoRetrieved, AsWeakPtr()),
+        base::Bind(&DeviceRegistrationInfo::OnUpdateDeviceResourceError,
+                   AsWeakPtr()));
+    return;
+  }
+
   // Make a copy of the callback list so that if the callback triggers another
   // call to UpdateDeviceResource(), we do not modify the list we are iterating
   // over.
   auto callback_list = std::move(in_progress_resource_update_callbacks_);
   for (const auto& callback_pair : callback_list)
     callback_pair.second.Run(error);
+
   StartQueuedUpdateDeviceResource();
 }
 
@@ -1020,7 +1071,7 @@
 
 void DeviceRegistrationInfo::OnCommandDefsChanged() {
   VLOG(1) << "CommandDefinitionChanged notification received";
-  if (!HaveRegistrationCredentials())
+  if (!HaveRegistrationCredentials() || !connected_to_cloud_)
     return;
 
   UpdateDeviceResource(base::Bind(&base::DoNothing),
@@ -1029,7 +1080,7 @@
 
 void DeviceRegistrationInfo::OnStateChanged() {
   VLOG(1) << "StateChanged notification received";
-  if (!HaveRegistrationCredentials())
+  if (!HaveRegistrationCredentials() || !connected_to_cloud_)
     return;
 
   // TODO(vitalybuka): Integrate BackoffEntry.
@@ -1043,6 +1094,10 @@
   notification_channel_starting_ = false;
   pull_channel_->UpdatePullInterval(config_->backup_polling_period());
   current_notification_channel_ = primary_notification_channel_.get();
+
+  if (!connected_to_cloud_)
+    return;
+
   // Once we update the device resource with the new notification channel,
   // do the last poll for commands from the server, to make sure we have the
   // latest command baseline and no other commands have been queued between
@@ -1055,7 +1110,7 @@
 
 void DeviceRegistrationInfo::OnDisconnected() {
   LOG(INFO) << "Notification channel disconnected";
-  if (!HaveRegistrationCredentials())
+  if (!HaveRegistrationCredentials() || !connected_to_cloud_)
     return;
 
   pull_channel_->UpdatePullInterval(config_->polling_period());
@@ -1067,17 +1122,16 @@
 void DeviceRegistrationInfo::OnPermanentFailure() {
   LOG(ERROR) << "Failed to establish notification channel.";
   notification_channel_starting_ = false;
-  auto mark_unregistered =
-      base::Bind(&DeviceRegistrationInfo::MarkDeviceUnregistered, AsWeakPtr());
-  auto error_callback = [mark_unregistered](const chromeos::Error* error) {
-    if (error->HasError(kErrorDomainOAuth2, "invalid_grant"))
-      mark_unregistered.Run();
-  };
-  RefreshAccessToken(base::Bind(&base::DoNothing), base::Bind(error_callback));
+  RefreshAccessToken(base::Bind(&base::DoNothing),
+                     base::Bind(&DeviceRegistrationInfo::CheckAccessTokenError,
+                                AsWeakPtr()));
 }
 
 void DeviceRegistrationInfo::OnCommandCreated(
     const base::DictionaryValue& command) {
+  if (!connected_to_cloud_)
+    return;
+
   if (!command.empty()) {
     // GCD spec indicates that the command parameter in notification object
     // "may be empty if command size is too big".
@@ -1103,6 +1157,8 @@
   if (!HaveRegistrationCredentials())
     return;
 
+  connected_to_cloud_ = false;
+
   LOG(INFO) << "Device is unregistered from the cloud. Deleting credentials";
   BuffetConfig::Transaction change{config_.get()};
   change.set_device_id("");
diff --git a/libweave/src/device_registration_info.h b/libweave/src/device_registration_info.h
index 88039dc..4ea7e00 100644
--- a/libweave/src/device_registration_info.h
+++ b/libweave/src/device_registration_info.h
@@ -159,15 +159,16 @@
  private:
   friend class DeviceRegistrationInfoTest;
 
-  // Cause DeviceRegistrationInfo to attempt to StartDevice on its own later.
-  void ScheduleStartDevice(const base::TimeDelta& later);
+  // Cause DeviceRegistrationInfo to attempt to connect to cloud server on
+  // its own later.
+  void ScheduleCloudConnection(const base::TimeDelta& delay);
 
-  // Starts device execution.
+  // Initiates the connection to the cloud server.
   // Device will do required start up chores and then start to listen
   // to new commands.
-  // TODO(antonm): Consider moving into some other class.
-  void StartDevice(chromeos::ErrorPtr* error,
-                   const base::TimeDelta& retry_delay);
+  void ConnectToCloud();
+  // Notification called when ConnectToCloud() succeeds.
+  void OnConnectedToCloud();
 
   // Forcibly refreshes the access token.
   void RefreshAccessToken(const base::Closure& success_callback,
@@ -220,16 +221,28 @@
   void RetryCloudRequest(const std::shared_ptr<const CloudRequestData>& data);
   void OnAccessTokenRefreshed(
       const std::shared_ptr<const CloudRequestData>& data);
-  void OnAccessTokenError(const std::shared_ptr<const CloudRequestData>& data,
-                          const chromeos::Error* error);
+  void OnAccessTokenError(
+      const std::shared_ptr<const CloudRequestData>& data,
+      const chromeos::Error* error);
+  void CheckAccessTokenError(const chromeos::Error* error);
 
   void UpdateDeviceResource(const base::Closure& on_success,
                             const CloudRequestErrorCallback& on_failure);
   void StartQueuedUpdateDeviceResource();
   // Success/failure callbacks for UpdateDeviceResource().
-  void OnUpdateDeviceResourceSuccess(const base::DictionaryValue& reply);
+  void OnUpdateDeviceResourceSuccess(const base::DictionaryValue& device_info);
   void OnUpdateDeviceResourceError(const chromeos::Error* error);
 
+  // Callback from GetDeviceInfo() to retrieve the device resource timestamp
+  // and retry UpdateDeviceResource() call.
+  void OnDeviceInfoRetrieved(const base::DictionaryValue& device_info);
+
+  // Extracts the timestamp from the device resource and sets it to
+  // |last_device_resource_updated_timestamp_|.
+  // Returns false if the "lastUpdateTimeMs" field is not found in the device
+  // resource or it is invalid.
+  bool UpdateDeviceInfoTimestamp(const base::DictionaryValue& device_info);
+
   void FetchCommands(
       const base::Callback<void(const base::ListValue&)>& on_success,
       const CloudRequestErrorCallback& on_failure);
@@ -256,11 +269,6 @@
   void NotifyCommandAborted(const std::string& command_id,
                             chromeos::ErrorPtr error);
 
-  // When NotifyCommandAborted() fails, RetryNotifyCommandAborted() schedules
-  // a retry attempt.
-  void RetryNotifyCommandAborted(const std::string& command_id,
-                                 chromeos::ErrorPtr error);
-
   // Builds Cloud API devices collection REST resource which matches
   // current state of the device including command definitions
   // for all supported commands and current device state.
@@ -287,6 +295,12 @@
   // Transient data
   std::string access_token_;
   base::Time access_token_expiration_;
+  // The time stamp of last device resource update on the server.
+  std::string last_device_resource_updated_timestamp_;
+  // Set to true if the device has connected to the cloud server correctly.
+  // At this point, normal state and command updates can be dispatched to the
+  // server.
+  bool connected_to_cloud_{false};
 
   // HTTP transport used for communications.
   std::shared_ptr<chromeos::http::Transport> transport_;
diff --git a/libweave/src/storage_impls.cc b/libweave/src/storage_impls.cc
index 3a6a32d..025af39 100644
--- a/libweave/src/storage_impls.cc
+++ b/libweave/src/storage_impls.cc
@@ -18,11 +18,7 @@
 }
 
 std::unique_ptr<base::DictionaryValue> FileStorage::Load() {
-  std::string json;
-  if (!base::ReadFileToString(file_path_, &json))
-    return std::unique_ptr<base::DictionaryValue>();
-
-  return LoadJsonDict(json, nullptr);
+  return LoadJsonDict(file_path_, nullptr);
 }
 
 bool FileStorage::Save(const base::DictionaryValue& config) {
diff --git a/libweave/src/utils.cc b/libweave/src/utils.cc
index 7e0864c..20d6b84 100644
--- a/libweave/src/utils.cc
+++ b/libweave/src/utils.cc
@@ -63,8 +63,9 @@
     chromeos::Error::AddToPrintf(
         error, FROM_HERE, chromeos::errors::json::kDomain,
         chromeos::errors::json::kParseError,
-        "Error parsing JSON string '%s': %s",
-        LimitString(json_string, kMaxStrLen).c_str(), error_message.c_str());
+        "Error parsing JSON string '%s' (%zu): %s",
+        LimitString(json_string, kMaxStrLen).c_str(), json_string.size(),
+        error_message.c_str());
     return result;
   }
   base::DictionaryValue* dict_value = nullptr;