diff options
Diffstat (limited to 'deduplication.cc')
-rw-r--r-- | deduplication.cc | 36 |
1 files changed, 19 insertions, 17 deletions
diff --git a/deduplication.cc b/deduplication.cc index a3b1f9d..b71c599 100644 --- a/deduplication.cc +++ b/deduplication.cc @@ -21,31 +21,33 @@ #include <cstddef> #include <unordered_map> +#include <utility> #include <vector> #include "equality.h" #include "equality_cache.h" #include "graph.h" -#include "metrics.h" +#include "hashing.h" +#include "runtime.h" #include "substitution.h" namespace stg { -Id Deduplicate(Graph& graph, Id root, const Hashes& hashes, Metrics& metrics) { +Id Deduplicate(Runtime& runtime, Graph& graph, Id root, const Hashes& hashes) { // Partition the nodes by hash. std::unordered_map<HashValue, std::vector<Id>> partitions; { - Time x(metrics, "partition nodes"); + const Time x(runtime, "partition nodes"); for (const auto& [id, fp] : hashes) { partitions[fp].push_back(id); } } - Counter(metrics, "deduplicate.nodes") = hashes.size(); - Counter(metrics, "deduplicate.hashes") = partitions.size(); + Counter(runtime, "deduplicate.nodes") = hashes.size(); + Counter(runtime, "deduplicate.hashes") = partitions.size(); - Histogram hash_partition_size(metrics, "deduplicate.hash_partition_size"); - Counter min_comparisons(metrics, "deduplicate.min_comparisons"); - Counter max_comparisons(metrics, "deduplicate.max_comparisons"); + Histogram hash_partition_size(runtime, "deduplicate.hash_partition_size"); + Counter min_comparisons(runtime, "deduplicate.min_comparisons"); + Counter max_comparisons(runtime, "deduplicate.max_comparisons"); for (const auto& [fp, ids] : partitions) { const auto n = ids.size(); hash_partition_size.Add(n); @@ -54,16 +56,16 @@ Id Deduplicate(Graph& graph, Id root, const Hashes& hashes, Metrics& metrics) { } // Refine partitions of nodes with the same fingerprints. - EqualityCache cache(hashes, metrics); + EqualityCache cache(runtime, hashes); Equals<EqualityCache> equals(graph, cache); - Counter equalities(metrics, "deduplicate.equalities"); - Counter inequalities(metrics, "deduplicate.inequalities"); + Counter equalities(runtime, "deduplicate.equalities"); + Counter inequalities(runtime, "deduplicate.inequalities"); { - Time x(metrics, "find duplicates"); + const Time x(runtime, "find duplicates"); for (auto& [fp, ids] : partitions) { while (ids.size() > 1) { std::vector<Id> todo; - Id candidate = ids[0]; + const Id candidate = ids[0]; for (size_t i = 1; i < ids.size(); ++i) { if (equals(ids[i], candidate)) { ++equalities; @@ -78,8 +80,8 @@ Id Deduplicate(Graph& graph, Id root, const Hashes& hashes, Metrics& metrics) { } // Keep one representative of each set of duplicates. - Counter unique(metrics, "deduplicate.unique"); - Counter duplicate(metrics, "deduplicate.duplicate"); + Counter unique(runtime, "deduplicate.unique"); + Counter duplicate(runtime, "deduplicate.duplicate"); auto remap = [&cache](Id& id) { // update id to representative id, avoiding silent stores const Id fid = cache.Find(id); @@ -89,9 +91,9 @@ Id Deduplicate(Graph& graph, Id root, const Hashes& hashes, Metrics& metrics) { }; Substitute substitute(graph, remap); { - Time x(metrics, "rewrite"); + const Time x(runtime, "rewrite"); for (const auto& [id, fp] : hashes) { - Id fid = cache.Find(id); + const Id fid = cache.Find(id); if (fid != id) { graph.Remove(id); ++duplicate; |