aboutsummaryrefslogtreecommitdiff
path: root/src/include/fst/interval-set.h
diff options
context:
space:
mode:
authorIan Hodson <idh@google.com>2012-05-30 21:27:06 +0100
committerIan Hodson <idh@google.com>2012-05-30 22:47:36 +0100
commitf4c12fce1ee58e670f9c3fce46c40296ba9ee8a2 (patch)
treeb131ed907f9b2d5af09c0983b651e9e69bc6aab9 /src/include/fst/interval-set.h
parenta92766f0a6ba4fac46cd6fd3856ef20c3b204f0d (diff)
downloadopenfst-jb-release.tar.gz
Moved from GoogleTTS Change-Id: I6bc6bdadaa53bd0f810b88443339f6d899502cc8
Diffstat (limited to 'src/include/fst/interval-set.h')
-rw-r--r--src/include/fst/interval-set.h381
1 files changed, 381 insertions, 0 deletions
diff --git a/src/include/fst/interval-set.h b/src/include/fst/interval-set.h
new file mode 100644
index 0000000..cf6ac54
--- /dev/null
+++ b/src/include/fst/interval-set.h
@@ -0,0 +1,381 @@
+// interval-set.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to represent and operate on sets of intervals.
+
+#ifndef FST_LIB_INTERVAL_SET_H__
+#define FST_LIB_INTERVAL_SET_H__
+
+#include <iostream>
+#include <vector>
+using std::vector;
+
+
+#include <fst/util.h>
+
+
+namespace fst {
+
+// Stores and operates on a set of half-open integral intervals [a,b)
+// of signed integers of type T.
+template <typename T>
+class IntervalSet {
+ public:
+ struct Interval {
+ T begin;
+ T end;
+
+ Interval() : begin(-1), end(-1) {}
+
+ Interval(T b, T e) : begin(b), end(e) {}
+
+ bool operator<(const Interval &i) const {
+ return begin < i.begin || (begin == i.begin && end > i.end);
+ }
+
+ bool operator==(const Interval &i) const {
+ return begin == i.begin && end == i.end;
+ }
+
+ bool operator!=(const Interval &i) const {
+ return begin != i.begin || end != i.end;
+ }
+
+ istream &Read(istream &strm) {
+ T n;
+ ReadType(strm, &n);
+ begin = n;
+ ReadType(strm, &n);
+ end = n;
+ return strm;
+ }
+
+ ostream &Write(ostream &strm) const {
+ T n = begin;
+ WriteType(strm, n);
+ n = end;
+ WriteType(strm, n);
+ return strm;
+ }
+ };
+
+ IntervalSet() : count_(-1) {}
+
+ // Returns the interval set as a vector.
+ vector<Interval> *Intervals() { return &intervals_; }
+
+ const vector<Interval> *Intervals() const { return &intervals_; }
+
+ const bool Empty() const { return intervals_.empty(); }
+
+ const T Size() const { return intervals_.size(); }
+
+ // Number of points in the intervals (undefined if not normalized).
+ const T Count() const { return count_; }
+
+ void Clear() {
+ intervals_.clear();
+ count_ = 0;
+ }
+
+ // Adds an interval set to the set. The result may not be normalized.
+ void Union(const IntervalSet<T> &iset) {
+ const vector<Interval> *intervals = iset.Intervals();
+ for (typename vector<Interval>::const_iterator it = intervals->begin();
+ it != intervals->end(); ++it)
+ intervals_.push_back(*it);
+ }
+
+ // Requires intervals be normalized.
+ bool Member(T value) const {
+ Interval interval(value, value);
+ typename vector<Interval>::const_iterator lb =
+ lower_bound(intervals_.begin(), intervals_.end(), interval);
+ if (lb == intervals_.begin())
+ return false;
+ return (--lb)->end > value;
+ }
+
+ // Requires intervals be normalized.
+ bool operator==(const IntervalSet<T>& iset) const {
+ return *(iset.Intervals()) == intervals_;
+ }
+
+ // Requires intervals be normalized.
+ bool operator!=(const IntervalSet<T>& iset) const {
+ return *(iset.Intervals()) != intervals_;
+ }
+
+ bool Singleton() const {
+ return intervals_.size() == 1 &&
+ intervals_[0].begin + 1 == intervals_[0].end;
+ }
+
+
+ // Sorts; collapses overlapping and adjacent interals; sets count.
+ void Normalize();
+
+ // Intersects an interval set with the set. Requires intervals be
+ // normalized. The result is normalized.
+ void Intersect(const IntervalSet<T> &iset, IntervalSet<T> *oset) const;
+
+ // Complements the set w.r.t [0, maxval). Requires intervals be
+ // normalized. The result is normalized.
+ void Complement(T maxval, IntervalSet<T> *oset) const;
+
+ // Subtract an interval set from the set. Requires intervals be
+ // normalized. The result is normalized.
+ void Difference(const IntervalSet<T> &iset, IntervalSet<T> *oset) const;
+
+ // Determines if an interval set overlaps with the set. Requires
+ // intervals be normalized.
+ bool Overlaps(const IntervalSet<T> &iset) const;
+
+ // Determines if an interval set overlaps with the set but neither
+ // is contained in the other. Requires intervals be normalized.
+ bool StrictlyOverlaps(const IntervalSet<T> &iset) const;
+
+ // Determines if an interval set is contained within the set. Requires
+ // intervals be normalized.
+ bool Contains(const IntervalSet<T> &iset) const;
+
+ istream &Read(istream &strm) {
+ ReadType(strm, &intervals_);
+ return ReadType(strm, &count_);
+ }
+
+ ostream &Write(ostream &strm) const {
+ WriteType(strm, intervals_);
+ return WriteType(strm, count_);
+ }
+
+ private:
+ vector<Interval> intervals_;
+ T count_;
+};
+
+// Sorts; collapses overlapping and adjacent interavls; sets count.
+template <typename T>
+void IntervalSet<T>::Normalize() {
+ sort(intervals_.begin(), intervals_.end());
+
+ count_ = 0;
+ T size = 0;
+ for (T i = 0; i < intervals_.size(); ++i) {
+ Interval &inti = intervals_[i];
+ if (inti.begin == inti.end)
+ continue;
+ for (T j = i + 1; j < intervals_.size(); ++j) {
+ Interval &intj = intervals_[j];
+ if (intj.begin > inti.end)
+ break;
+ if (intj.end > inti.end)
+ inti.end = intj.end;
+ ++i;
+ }
+ count_ += inti.end - inti.begin;
+ intervals_[size++] = inti;
+ }
+ intervals_.resize(size);
+}
+
+// Intersects an interval set with the set. Requires intervals be normalized.
+// The result is normalized.
+template <typename T>
+void IntervalSet<T>::Intersect(const IntervalSet<T> &iset,
+ IntervalSet<T> *oset) const {
+ const vector<Interval> *iintervals = iset.Intervals();
+ vector<Interval> *ointervals = oset->Intervals();
+ typename vector<Interval>::const_iterator it1 = intervals_.begin();
+ typename vector<Interval>::const_iterator it2 = iintervals->begin();
+
+ ointervals->clear();
+ oset->count_ = 0;
+
+ while (it1 != intervals_.end() && it2 != iintervals->end()) {
+ if (it1->end <= it2->begin) {
+ ++it1;
+ } else if (it2->end <= it1->begin) {
+ ++it2;
+ } else {
+ Interval interval;
+ interval.begin = max(it1->begin, it2->begin);
+ interval.end = min(it1->end, it2->end);
+ ointervals->push_back(interval);
+ oset->count_ += interval.end - interval.begin;
+ if (it1->end < it2->end)
+ ++it1;
+ else
+ ++it2;
+ }
+ }
+}
+
+// Complements the set w.r.t [0, maxval). Requires intervals be normalized.
+// The result is normalized.
+template <typename T>
+void IntervalSet<T>::Complement(T maxval, IntervalSet<T> *oset) const {
+ vector<Interval> *ointervals = oset->Intervals();
+ ointervals->clear();
+ oset->count_ = 0;
+
+ Interval interval;
+ interval.begin = 0;
+ for (typename vector<Interval>::const_iterator it = intervals_.begin();
+ it != intervals_.end();
+ ++it) {
+ interval.end = min(it->begin, maxval);
+ if (interval.begin < interval.end) {
+ ointervals->push_back(interval);
+ oset->count_ += interval.end - interval.begin;
+ }
+ interval.begin = it->end;
+ }
+ interval.end = maxval;
+ if (interval.begin < interval.end) {
+ ointervals->push_back(interval);
+ oset->count_ += interval.end - interval.begin;
+ }
+}
+
+// Subtract an interval set from the set. Requires intervals be normalized.
+// The result is normalized.
+template <typename T>
+void IntervalSet<T>::Difference(const IntervalSet<T> &iset,
+ IntervalSet<T> *oset) const {
+ if (intervals_.empty()) {
+ oset->Intervals()->clear();
+ oset->count_ = 0;
+ } else {
+ IntervalSet<T> cset;
+ iset.Complement(intervals_.back().end, &cset);
+ Intersect(cset, oset);
+ }
+}
+
+// Determines if an interval set overlaps with the set. Requires
+// intervals be normalized.
+template <typename T>
+bool IntervalSet<T>::Overlaps(const IntervalSet<T> &iset) const {
+ const vector<Interval> *intervals = iset.Intervals();
+ typename vector<Interval>::const_iterator it1 = intervals_.begin();
+ typename vector<Interval>::const_iterator it2 = intervals->begin();
+
+ while (it1 != intervals_.end() && it2 != intervals->end()) {
+ if (it1->end <= it2->begin) {
+ ++it1;
+ } else if (it2->end <= it1->begin) {
+ ++it2;
+ } else {
+ return true;
+ }
+ }
+ return false;
+}
+
+// Determines if an interval set overlaps with the set but neither
+// is contained in the other. Requires intervals be normalized.
+template <typename T>
+bool IntervalSet<T>::StrictlyOverlaps(const IntervalSet<T> &iset) const {
+ const vector<Interval> *intervals = iset.Intervals();
+ typename vector<Interval>::const_iterator it1 = intervals_.begin();
+ typename vector<Interval>::const_iterator it2 = intervals->begin();
+ bool only1 = false; // point in intervals_ but not intervals
+ bool only2 = false; // point in intervals but not intervals_
+ bool overlap = false; // point in both intervals_ and intervals
+
+ while (it1 != intervals_.end() && it2 != intervals->end()) {
+ if (it1->end <= it2->begin) { // no overlap - it1 first
+ only1 = true;
+ ++it1;
+ } else if (it2->end <= it1->begin) { // no overlap - it2 first
+ only2 = true;
+ ++it2;
+ } else if (it2->begin == it1->begin && it2->end == it1->end) { // equals
+ overlap = true;
+ ++it1;
+ ++it2;
+ } else if (it2->begin <= it1->begin && it2->end >= it1->end) { // 1 c 2
+ only2 = true;
+ overlap = true;
+ ++it1;
+ } else if (it1->begin <= it2->begin && it1->end >= it2->end) { // 2 c 1
+ only1 = true;
+ overlap = true;
+ ++it2;
+ } else { // strict overlap
+ only1 = true;
+ only2 = true;
+ overlap = true;
+ }
+ if (only1 == true && only2 == true && overlap == true)
+ return true;
+ }
+ if (it1 != intervals_.end())
+ only1 = true;
+ if (it2 != intervals->end())
+ only2 = true;
+
+ return only1 == true && only2 == true && overlap == true;
+}
+
+// Determines if an interval set is contained within the set. Requires
+// intervals be normalized.
+template <typename T>
+bool IntervalSet<T>::Contains(const IntervalSet<T> &iset) const {
+ if (iset.Count() > Count())
+ return false;
+
+ const vector<Interval> *intervals = iset.Intervals();
+ typename vector<Interval>::const_iterator it1 = intervals_.begin();
+ typename vector<Interval>::const_iterator it2 = intervals->begin();
+
+ while (it1 != intervals_.end() && it2 != intervals->end()) {
+ if (it1->end <= it2->begin) { // no overlap - it1 first
+ ++it1;
+ } else if (it2->begin < it1->begin || it2->end > it1->end) { // no C
+ return false;
+ } else if (it2->end == it1->end) {
+ ++it1;
+ ++it2;
+ } else {
+ ++it2;
+ }
+ }
+ return it2 == intervals->end();
+}
+
+template <typename T>
+ostream &operator<<(ostream &strm, const IntervalSet<T> &s) {
+ typedef typename IntervalSet<T>::Interval Interval;
+ const vector<Interval> *intervals = s.Intervals();
+ strm << "{";
+ for (typename vector<Interval>::const_iterator it = intervals->begin();
+ it != intervals->end();
+ ++it) {
+ if (it != intervals->begin())
+ strm << ",";
+ strm << "[" << it->begin << "," << it->end << ")";
+ }
+ strm << "}";
+ return strm;
+}
+
+} // namespace fst
+
+#endif // FST_LIB_INTERVAL_SET_H__