aboutsummaryrefslogtreecommitdiff
path: root/deduplication.cc
diff options
context:
space:
mode:
Diffstat (limited to 'deduplication.cc')
-rw-r--r--deduplication.cc36
1 files changed, 19 insertions, 17 deletions
diff --git a/deduplication.cc b/deduplication.cc
index a3b1f9d..b71c599 100644
--- a/deduplication.cc
+++ b/deduplication.cc
@@ -21,31 +21,33 @@
#include <cstddef>
#include <unordered_map>
+#include <utility>
#include <vector>
#include "equality.h"
#include "equality_cache.h"
#include "graph.h"
-#include "metrics.h"
+#include "hashing.h"
+#include "runtime.h"
#include "substitution.h"
namespace stg {
-Id Deduplicate(Graph& graph, Id root, const Hashes& hashes, Metrics& metrics) {
+Id Deduplicate(Runtime& runtime, Graph& graph, Id root, const Hashes& hashes) {
// Partition the nodes by hash.
std::unordered_map<HashValue, std::vector<Id>> partitions;
{
- Time x(metrics, "partition nodes");
+ const Time x(runtime, "partition nodes");
for (const auto& [id, fp] : hashes) {
partitions[fp].push_back(id);
}
}
- Counter(metrics, "deduplicate.nodes") = hashes.size();
- Counter(metrics, "deduplicate.hashes") = partitions.size();
+ Counter(runtime, "deduplicate.nodes") = hashes.size();
+ Counter(runtime, "deduplicate.hashes") = partitions.size();
- Histogram hash_partition_size(metrics, "deduplicate.hash_partition_size");
- Counter min_comparisons(metrics, "deduplicate.min_comparisons");
- Counter max_comparisons(metrics, "deduplicate.max_comparisons");
+ Histogram hash_partition_size(runtime, "deduplicate.hash_partition_size");
+ Counter min_comparisons(runtime, "deduplicate.min_comparisons");
+ Counter max_comparisons(runtime, "deduplicate.max_comparisons");
for (const auto& [fp, ids] : partitions) {
const auto n = ids.size();
hash_partition_size.Add(n);
@@ -54,16 +56,16 @@ Id Deduplicate(Graph& graph, Id root, const Hashes& hashes, Metrics& metrics) {
}
// Refine partitions of nodes with the same fingerprints.
- EqualityCache cache(hashes, metrics);
+ EqualityCache cache(runtime, hashes);
Equals<EqualityCache> equals(graph, cache);
- Counter equalities(metrics, "deduplicate.equalities");
- Counter inequalities(metrics, "deduplicate.inequalities");
+ Counter equalities(runtime, "deduplicate.equalities");
+ Counter inequalities(runtime, "deduplicate.inequalities");
{
- Time x(metrics, "find duplicates");
+ const Time x(runtime, "find duplicates");
for (auto& [fp, ids] : partitions) {
while (ids.size() > 1) {
std::vector<Id> todo;
- Id candidate = ids[0];
+ const Id candidate = ids[0];
for (size_t i = 1; i < ids.size(); ++i) {
if (equals(ids[i], candidate)) {
++equalities;
@@ -78,8 +80,8 @@ Id Deduplicate(Graph& graph, Id root, const Hashes& hashes, Metrics& metrics) {
}
// Keep one representative of each set of duplicates.
- Counter unique(metrics, "deduplicate.unique");
- Counter duplicate(metrics, "deduplicate.duplicate");
+ Counter unique(runtime, "deduplicate.unique");
+ Counter duplicate(runtime, "deduplicate.duplicate");
auto remap = [&cache](Id& id) {
// update id to representative id, avoiding silent stores
const Id fid = cache.Find(id);
@@ -89,9 +91,9 @@ Id Deduplicate(Graph& graph, Id root, const Hashes& hashes, Metrics& metrics) {
};
Substitute substitute(graph, remap);
{
- Time x(metrics, "rewrite");
+ const Time x(runtime, "rewrite");
for (const auto& [id, fp] : hashes) {
- Id fid = cache.Find(id);
+ const Id fid = cache.Find(id);
if (fid != id) {
graph.Remove(id);
++duplicate;