aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBertrand SIMONNET <bsimonnet@google.com>2015-12-15 12:33:01 -0800
committerBertrand SIMONNET <bsimonnet@google.com>2016-01-12 15:54:11 -0800
commit56e96df6e56986d00dbf2745c33c5efad40bdada (patch)
treef3eefae536c3c456e630805904ba68f981b3ce2d
parent9e854564acea5cd41d40d718e8286ca8556dcc0e (diff)
downloadmetricsd-56e96df6e56986d00dbf2745c33c5efad40bdada.tar.gz
metricsd: Persist the metrics to disk periodically.
Every now and then (5 minutes by default), the uploader will persist the current metrics to disk to avoid losing them in case we exit unexpectedly (reboot or crash). When starting up, metricsd will load the previously saved log and resume the metrics collection from there. Bug: 25670584 Test: Unit tests. Test: manual: restart metricsd. The saved log is detected and parsed correctly. Test: manual: Send a sample to metricsd, send SIGTERM to metricsd, the log is saved to disk, metricsd restarts and picks up the log where it left. Change-Id: I4cefc62c7ea1fa51333d84d8a7ba0a2e9c7fd58f
-rw-r--r--constants.h1
-rw-r--r--metricsd_main.cc10
-rw-r--r--uploader/metrics_log.cc36
-rw-r--r--uploader/metrics_log.h8
-rw-r--r--uploader/upload_service.cc70
-rw-r--r--uploader/upload_service.h19
-rw-r--r--uploader/upload_service_test.cc66
7 files changed, 170 insertions, 40 deletions
diff --git a/constants.h b/constants.h
index 4815888..b702737 100644
--- a/constants.h
+++ b/constants.h
@@ -26,6 +26,7 @@ static const char kMetricsGUIDFileName[] = "Sysinfo.GUID";
static const char kMetricsServer[] = "https://clients4.google.com/uma/v2";
static const char kConsentFileName[] = "enabled";
static const char kStagedLogName[] = "staged_log";
+static const char kSavedLogName[] = "saved_log";
static const char kFailedUploadCountName[] = "failed_upload_count";
static const char kDefaultVersion[] = "0.0.0.0";
diff --git a/metricsd_main.cc b/metricsd_main.cc
index ae441d0..0178342 100644
--- a/metricsd_main.cc
+++ b/metricsd_main.cc
@@ -33,10 +33,13 @@ int main(int argc, char** argv) {
// Upload Service flags.
DEFINE_int32(upload_interval_secs, 1800,
- "Interval at which metrics_daemon sends the metrics. (needs "
- "-uploader)");
+ "Interval at which metricsd uploads the metrics.");
+ DEFINE_int32(disk_persistence_interval_secs, 300,
+ "Interval at which metricsd saves the aggregated metrics to "
+ "disk to avoid losing them if metricsd stops in between "
+ "two uploads.");
DEFINE_string(server, metrics::kMetricsServer,
- "Server to upload the metrics to. (needs -uploader)");
+ "Server to upload the metrics to.");
DEFINE_string(private_directory, metrics::kMetricsdDirectory,
"Path to the private directory used by metricsd "
"(testing only)");
@@ -72,6 +75,7 @@ int main(int argc, char** argv) {
UploadService upload_service(
FLAGS_server, base::TimeDelta::FromSeconds(FLAGS_upload_interval_secs),
+ base::TimeDelta::FromSeconds(FLAGS_disk_persistence_interval_secs),
base::FilePath(FLAGS_private_directory),
base::FilePath(FLAGS_shared_directory));
diff --git a/uploader/metrics_log.cc b/uploader/metrics_log.cc
index a01b5da..39655e6 100644
--- a/uploader/metrics_log.cc
+++ b/uploader/metrics_log.cc
@@ -18,6 +18,8 @@
#include <string>
+#include <base/files/file_util.h>
+
#include "uploader/proto/system_profile.pb.h"
#include "uploader/system_profile_setter.h"
@@ -27,6 +29,40 @@ MetricsLog::MetricsLog()
: MetricsLogBase("", 0, metrics::MetricsLogBase::ONGOING_LOG, "") {
}
+bool MetricsLog::LoadFromFile(const base::FilePath& saved_log) {
+ std::string encoded_log;
+ if (!base::ReadFileToString(saved_log, &encoded_log)) {
+ LOG(ERROR) << "Failed to read the metrics log backup from "
+ << saved_log.value();
+ return false;
+ }
+
+ if (!uma_proto()->ParseFromString(encoded_log)) {
+ LOG(ERROR) << "Failed to parse log from " << saved_log.value()
+ << ", deleting the log";
+ base::DeleteFile(saved_log, false);
+ uma_proto()->Clear();
+ return false;
+ }
+
+ VLOG(1) << uma_proto()->histogram_event_size() << " histograms loaded from "
+ << saved_log.value();
+
+ return true;
+}
+
+bool MetricsLog::SaveToFile(const base::FilePath& path) {
+ std::string encoded_log;
+ GetEncodedLog(&encoded_log);
+
+ if (static_cast<int>(encoded_log.size()) !=
+ base::WriteFile(path, encoded_log.data(), encoded_log.size())) {
+ LOG(ERROR) << "Failed to persist the current log to " << path.value();
+ return false;
+ }
+ return true;
+}
+
void MetricsLog::IncrementUserCrashCount(unsigned int count) {
metrics::SystemProfileProto::Stability* stability(
uma_proto()->mutable_system_profile()->mutable_stability());
diff --git a/uploader/metrics_log.h b/uploader/metrics_log.h
index b76cd72..9e60b97 100644
--- a/uploader/metrics_log.h
+++ b/uploader/metrics_log.h
@@ -19,6 +19,7 @@
#include <string>
+#include <base/files/file_path.h>
#include <base/macros.h>
#include "uploader/metrics_log_base.h"
@@ -44,8 +45,15 @@ class MetricsLog : public metrics::MetricsLogBase {
// Populate the system profile with system information using setter.
bool PopulateSystemProfile(SystemProfileSetter* setter);
+ // Load the log from |path|.
+ bool LoadFromFile(const base::FilePath& path);
+
+ // Save this log to |path|.
+ bool SaveToFile(const base::FilePath& path);
+
private:
friend class UploadServiceTest;
+ FRIEND_TEST(UploadServiceTest, CurrentLogSavedAndResumed);
FRIEND_TEST(UploadServiceTest, LogContainsAggregatedValues);
FRIEND_TEST(UploadServiceTest, LogContainsCrashCounts);
FRIEND_TEST(UploadServiceTest, LogKernelCrash);
diff --git a/uploader/upload_service.cc b/uploader/upload_service.cc
index 0d7aacc..ab44b28 100644
--- a/uploader/upload_service.cc
+++ b/uploader/upload_service.cc
@@ -42,6 +42,7 @@ const int UploadService::kMaxFailedUpload = 10;
UploadService::UploadService(const std::string& server,
const base::TimeDelta& upload_interval,
+ const base::TimeDelta& disk_persistence_interval,
const base::FilePath& private_metrics_directory,
const base::FilePath& shared_metrics_directory)
: brillo::Daemon(),
@@ -51,11 +52,19 @@ UploadService::UploadService(const std::string& server,
private_metrics_directory),
counters_(new CrashCounters),
upload_interval_(upload_interval),
+ disk_persistence_interval_(disk_persistence_interval),
metricsd_service_runner_(counters_) {
staged_log_path_ = private_metrics_directory.Append(metrics::kStagedLogName);
+ saved_log_path_ = private_metrics_directory.Append(metrics::kSavedLogName);
consent_file_ = shared_metrics_directory.Append(metrics::kConsentFileName);
}
+void UploadService::LoadSavedLog() {
+ if (base::PathExists(saved_log_path_)) {
+ GetOrCreateCurrentLog()->LoadFromFile(saved_log_path_);
+ }
+}
+
int UploadService::OnInit() {
brillo::Daemon::OnInit();
@@ -64,12 +73,18 @@ int UploadService::OnInit() {
system_profile_setter_.reset(new SystemProfileCache());
- base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
- base::Bind(&UploadService::UploadEventCallback,
- base::Unretained(this),
- upload_interval_),
+ base::MessageLoop::current()->PostDelayedTask(
+ FROM_HERE,
+ base::Bind(&UploadService::UploadEventCallback, base::Unretained(this)),
upload_interval_);
+ base::MessageLoop::current()->PostDelayedTask(
+ FROM_HERE,
+ base::Bind(&UploadService::PersistEventCallback, base::Unretained(this)),
+ disk_persistence_interval_);
+
+ LoadSavedLog();
+
return EX_OK;
}
@@ -78,24 +93,37 @@ void UploadService::OnShutdown(int* exit_code) {
}
void UploadService::InitForTest(SystemProfileSetter* setter) {
+ LoadSavedLog();
system_profile_setter_.reset(setter);
}
void UploadService::StartNewLog() {
- CHECK(!HasStagedLog()) << "the staged log should be discarded before "
- << "starting a new metrics log";
- MetricsLog* log = new MetricsLog();
- current_log_.reset(log);
+ current_log_.reset(new MetricsLog());
}
-void UploadService::UploadEventCallback(const base::TimeDelta& interval) {
+void UploadService::UploadEventCallback() {
UploadEvent();
- base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
- base::Bind(&UploadService::UploadEventCallback,
- base::Unretained(this),
- interval),
- interval);
+ base::MessageLoop::current()->PostDelayedTask(
+ FROM_HERE,
+ base::Bind(&UploadService::UploadEventCallback, base::Unretained(this)),
+ upload_interval_);
+}
+
+void UploadService::PersistEventCallback() {
+ PersistToDisk();
+
+ base::MessageLoop::current()->PostDelayedTask(
+ FROM_HERE,
+ base::Bind(&UploadService::PersistEventCallback, base::Unretained(this)),
+ disk_persistence_interval_);
+}
+
+void UploadService::PersistToDisk() {
+ GatherHistograms();
+ if (current_log_) {
+ current_log_->SaveToFile(saved_log_path_);
+ }
}
void UploadService::UploadEvent() {
@@ -189,14 +217,16 @@ void UploadService::StageCurrentLog() {
<< "log.";
return;
}
- std::string encoded_log;
- staged_log->GetEncodedLog(&encoded_log);
- failed_upload_count_.Set(0);
- if (static_cast<int>(encoded_log.size()) != base::WriteFile(
- staged_log_path_, encoded_log.data(), encoded_log.size())) {
- LOG(ERROR) << "failed to persist to " << staged_log_path_.value();
+ if (!base::DeleteFile(saved_log_path_, false)) {
+ // There is a chance that we will upload the same metrics twice but, if we
+ // are lucky, the backup should be overridden before that. In doubt, try not
+ // to lose any metrics.
+ LOG(ERROR) << "failed to delete the last backup of the current log.";
}
+
+ failed_upload_count_.Set(0);
+ staged_log->SaveToFile(staged_log_path_);
}
MetricsLog* UploadService::GetOrCreateCurrentLog() {
diff --git a/uploader/upload_service.h b/uploader/upload_service.h
index b84179f..a1d9d3b 100644
--- a/uploader/upload_service.h
+++ b/uploader/upload_service.h
@@ -66,6 +66,7 @@ class UploadService : public base::HistogramFlattener, public brillo::Daemon {
public:
UploadService(const std::string& server,
const base::TimeDelta& upload_interval,
+ const base::TimeDelta& disk_persistence_interval,
const base::FilePath& private_metrics_directory,
const base::FilePath& shared_metrics_directory);
@@ -79,8 +80,8 @@ class UploadService : public base::HistogramFlattener, public brillo::Daemon {
// launch as it is destroyed when staging the log.
void StartNewLog();
- // Event callback for handling MessageLoop events.
- void UploadEventCallback(const base::TimeDelta& interval);
+ // Saves the current metrics to a file.
+ void PersistToDisk();
// Triggers an upload event.
void UploadEvent();
@@ -100,6 +101,8 @@ class UploadService : public base::HistogramFlattener, public brillo::Daemon {
friend class UploadServiceTest;
FRIEND_TEST(UploadServiceTest, CanSendMultipleTimes);
+ FRIEND_TEST(UploadServiceTest, CorruptedSavedLog);
+ FRIEND_TEST(UploadServiceTest, CurrentLogSavedAndResumed);
FRIEND_TEST(UploadServiceTest, DiscardLogsAfterTooManyFailedUpload);
FRIEND_TEST(UploadServiceTest, EmptyLogsAreNotSent);
FRIEND_TEST(UploadServiceTest, FailedSendAreRetried);
@@ -111,6 +114,7 @@ class UploadService : public base::HistogramFlattener, public brillo::Daemon {
FRIEND_TEST(UploadServiceTest, LogKernelCrash);
FRIEND_TEST(UploadServiceTest, LogUncleanShutdown);
FRIEND_TEST(UploadServiceTest, LogUserCrash);
+ FRIEND_TEST(UploadServiceTest, PersistEmptyLog);
FRIEND_TEST(UploadServiceTest, UnknownCrashIgnored);
FRIEND_TEST(UploadServiceTest, ValuesInConfigFileAreSent);
@@ -121,12 +125,21 @@ class UploadService : public base::HistogramFlattener, public brillo::Daemon {
// will be discarded.
static const int kMaxFailedUpload;
+ // Loads the log saved to disk if it exists.
+ void LoadSavedLog();
+
// Resets the internal state.
void Reset();
// Returns true iff metrics reporting is enabled.
bool AreMetricsEnabled();
+ // Event callback for handling Upload events.
+ void UploadEventCallback();
+
+ // Event callback for handling Persist events.
+ void PersistEventCallback();
+
// Aggregates all histogram available in memory and store them in the current
// log.
void GatherHistograms();
@@ -156,11 +169,13 @@ class UploadService : public base::HistogramFlattener, public brillo::Daemon {
std::shared_ptr<CrashCounters> counters_;
base::TimeDelta upload_interval_;
+ base::TimeDelta disk_persistence_interval_;
MetricsdServiceRunner metricsd_service_runner_;
base::FilePath consent_file_;
base::FilePath staged_log_path_;
+ base::FilePath saved_log_path_;
bool testing_;
};
diff --git a/uploader/upload_service_test.cc b/uploader/upload_service_test.cc
index bd5b39a..70112f4 100644
--- a/uploader/upload_service_test.cc
+++ b/uploader/upload_service_test.cc
@@ -45,17 +45,17 @@ class UploadServiceTest : public testing::Test {
ASSERT_FALSE(base::StatisticsRecorder::IsActive());
base::StatisticsRecorder::Initialize();
- base::FilePath private_dir = dir_.path().Append("private");
- base::FilePath shared_dir = dir_.path().Append("shared");
+ private_dir_ = dir_.path().Append("private");
+ shared_dir_ = dir_.path().Append("shared");
- EXPECT_TRUE(base::CreateDirectory(private_dir));
- EXPECT_TRUE(base::CreateDirectory(shared_dir));
+ EXPECT_TRUE(base::CreateDirectory(private_dir_));
+ EXPECT_TRUE(base::CreateDirectory(shared_dir_));
- ASSERT_EQ(0, base::WriteFile(shared_dir.Append(metrics::kConsentFileName),
+ ASSERT_EQ(0, base::WriteFile(shared_dir_.Append(metrics::kConsentFileName),
"", 0));
- upload_service_.reset(
- new UploadService("", base::TimeDelta(), private_dir, shared_dir));
+ upload_service_.reset(new UploadService(
+ "", base::TimeDelta(), base::TimeDelta(), private_dir_, shared_dir_));
counters_ = upload_service_->counters_;
upload_service_->sender_.reset(new SenderMock);
@@ -81,15 +81,16 @@ class UploadServiceTest : public testing::Test {
base::FilePath filepath =
dir_.path().Append("etc/os-release.d").Append(name);
ASSERT_TRUE(base::CreateDirectory(filepath.DirName()));
- ASSERT_EQ(
- value.size(),
- base::WriteFile(filepath, value.data(), value.size()));
+ ASSERT_EQ(value.size(),
+ base::WriteFile(filepath, value.data(), value.size()));
}
const metrics::SystemProfileProto_Stability GetCurrentStability() {
EXPECT_TRUE(upload_service_->current_log_.get());
- return upload_service_->current_log_->uma_proto()->system_profile().stability();
+ return upload_service_->current_log_->uma_proto()
+ ->system_profile()
+ .stability();
}
base::ScopedTempDir dir_;
@@ -97,6 +98,8 @@ class UploadServiceTest : public testing::Test {
std::unique_ptr<base::AtExitManager> exit_manager_;
std::shared_ptr<CrashCounters> counters_;
+ base::FilePath private_dir_;
+ base::FilePath shared_dir_;
};
TEST_F(UploadServiceTest, FailedSendAreRetried) {
@@ -219,10 +222,8 @@ TEST_F(UploadServiceTest, LogContainsCrashCounts) {
}
TEST_F(UploadServiceTest, ExtractChannelFromString) {
- EXPECT_EQ(
- SystemProfileCache::ProtoChannelFromString(
- "developer-build"),
- metrics::SystemProfileProto::CHANNEL_UNKNOWN);
+ EXPECT_EQ(SystemProfileCache::ProtoChannelFromString("developer-build"),
+ metrics::SystemProfileProto::CHANNEL_UNKNOWN);
EXPECT_EQ(metrics::SystemProfileProto::CHANNEL_DEV,
SystemProfileCache::ProtoChannelFromString("dev-channel"));
@@ -297,3 +298,38 @@ TEST_F(UploadServiceTest, ProductIdMandatory) {
SetTestingProperty(metrics::kProductId, "hello");
ASSERT_TRUE(cache.Initialize());
}
+
+TEST_F(UploadServiceTest, CurrentLogSavedAndResumed) {
+ SendHistogram("hello", 10, 0, 100, 10);
+ upload_service_->PersistToDisk();
+ EXPECT_EQ(
+ 1, upload_service_->current_log_->uma_proto()->histogram_event().size());
+ upload_service_.reset(new UploadService(
+ "", base::TimeDelta(), base::TimeDelta(), private_dir_, shared_dir_));
+ upload_service_->InitForTest(nullptr);
+
+ SendHistogram("hello", 10, 0, 100, 10);
+ upload_service_->GatherHistograms();
+ EXPECT_EQ(2, upload_service_->GetOrCreateCurrentLog()
+ ->uma_proto()
+ ->histogram_event()
+ .size());
+}
+
+TEST_F(UploadServiceTest, PersistEmptyLog) {
+ upload_service_->PersistToDisk();
+ EXPECT_FALSE(base::PathExists(upload_service_->saved_log_path_));
+}
+
+TEST_F(UploadServiceTest, CorruptedSavedLog) {
+ // Write a bogus saved log.
+ EXPECT_EQ(5, base::WriteFile(upload_service_->saved_log_path_, "hello", 5));
+
+ upload_service_.reset(new UploadService(
+ "", base::TimeDelta(), base::TimeDelta(), private_dir_, shared_dir_));
+
+ upload_service_->InitForTest(nullptr);
+ // If the log is unreadable, we drop it and continue execution.
+ ASSERT_NE(nullptr, upload_service_->GetOrCreateCurrentLog());
+ ASSERT_FALSE(base::PathExists(upload_service_->saved_log_path_));
+}