aboutsummaryrefslogtreecommitdiff
path: root/src/include/fst/string-weight.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/fst/string-weight.h')
-rw-r--r--src/include/fst/string-weight.h560
1 files changed, 560 insertions, 0 deletions
diff --git a/src/include/fst/string-weight.h b/src/include/fst/string-weight.h
new file mode 100644
index 0000000..1beeb33
--- /dev/null
+++ b/src/include/fst/string-weight.h
@@ -0,0 +1,560 @@
+// string-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// String weight set and associated semiring operation definitions.
+
+#ifndef FST_LIB_STRING_WEIGHT_H__
+#define FST_LIB_STRING_WEIGHT_H__
+
+#include <list>
+#include <string>
+
+#include <fst/product-weight.h>
+#include <fst/weight.h>
+
+namespace fst {
+
+const int kStringInfinity = -1; // Label for the infinite string
+const int kStringBad = -2; // Label for a non-string
+const char kStringSeparator = '_'; // Label separator in strings
+
+// Determines whether to use left or right string semiring. Includes
+// restricted versions that signal an error if proper prefixes
+// (suffixes) would otherwise be returned by Plus, useful with various
+// algorithms that require functional transducer input with the
+// string semirings.
+enum StringType { STRING_LEFT = 0, STRING_RIGHT = 1 ,
+ STRING_LEFT_RESTRICT = 2, STRING_RIGHT_RESTRICT };
+
+#define REVERSE_STRING_TYPE(S) \
+ ((S) == STRING_LEFT ? STRING_RIGHT : \
+ ((S) == STRING_RIGHT ? STRING_LEFT : \
+ ((S) == STRING_LEFT_RESTRICT ? STRING_RIGHT_RESTRICT : \
+ STRING_LEFT_RESTRICT)))
+
+template <typename L, StringType S = STRING_LEFT>
+class StringWeight;
+
+template <typename L, StringType S = STRING_LEFT>
+class StringWeightIterator;
+
+template <typename L, StringType S = STRING_LEFT>
+class StringWeightReverseIterator;
+
+template <typename L, StringType S>
+bool operator==(const StringWeight<L, S> &, const StringWeight<L, S> &);
+
+
+// String semiring: (longest_common_prefix/suffix, ., Infinity, Epsilon)
+template <typename L, StringType S>
+class StringWeight {
+ public:
+ typedef L Label;
+ typedef StringWeight<L, REVERSE_STRING_TYPE(S)> ReverseWeight;
+
+ friend class StringWeightIterator<L, S>;
+ friend class StringWeightReverseIterator<L, S>;
+ friend bool operator==<>(const StringWeight<L, S> &,
+ const StringWeight<L, S> &);
+
+ StringWeight() { Init(); }
+
+ template <typename Iter>
+ StringWeight(const Iter &begin, const Iter &end) {
+ Init();
+ for (Iter iter = begin; iter != end; ++iter)
+ PushBack(*iter);
+ }
+
+ explicit StringWeight(L l) { Init(); PushBack(l); }
+
+ static const StringWeight<L, S> &Zero() {
+ static const StringWeight<L, S> zero(kStringInfinity);
+ return zero;
+ }
+
+ static const StringWeight<L, S> &One() {
+ static const StringWeight<L, S> one;
+ return one;
+ }
+
+ static const StringWeight<L, S> &NoWeight() {
+ static const StringWeight<L, S> no_weight(kStringBad);
+ return no_weight;
+ }
+
+ static const string &Type() {
+ static const string type =
+ S == STRING_LEFT ? "string" :
+ (S == STRING_RIGHT ? "right_string" :
+ (S == STRING_LEFT_RESTRICT ? "restricted_string" :
+ "right_restricted_string"));
+ return type;
+ }
+
+ bool Member() const;
+
+ istream &Read(istream &strm);
+
+ ostream &Write(ostream &strm) const;
+
+ size_t Hash() const;
+
+ StringWeight<L, S> Quantize(float delta = kDelta) const {
+ return *this;
+ }
+
+ ReverseWeight Reverse() const;
+
+ static uint64 Properties() {
+ return (S == STRING_LEFT || S == STRING_LEFT_RESTRICT ?
+ kLeftSemiring : kRightSemiring) | kIdempotent;
+ }
+
+ // NB: This needs to be uncommented only if default fails for this impl.
+ // StringWeight<L, S> &operator=(const StringWeight<L, S> &w);
+
+ // These operations combined with the StringWeightIterator and
+ // StringWeightReverseIterator provide the access and mutation of
+ // the string internal elements.
+
+ // Common initializer among constructors.
+ void Init() { first_ = 0; }
+
+ // Clear existing StringWeight.
+ void Clear() { first_ = 0; rest_.clear(); }
+
+ size_t Size() const { return first_ ? rest_.size() + 1 : 0; }
+
+ void PushFront(L l) {
+ if (first_)
+ rest_.push_front(first_);
+ first_ = l;
+ }
+
+ void PushBack(L l) {
+ if (!first_)
+ first_ = l;
+ else
+ rest_.push_back(l);
+ }
+
+ private:
+ L first_; // first label in string (0 if empty)
+ list<L> rest_; // remaining labels in string
+};
+
+
+// Traverses string in forward direction.
+template <typename L, StringType S>
+class StringWeightIterator {
+ public:
+ explicit StringWeightIterator(const StringWeight<L, S>& w)
+ : first_(w.first_), rest_(w.rest_), init_(true),
+ iter_(rest_.begin()) {}
+
+ bool Done() const {
+ if (init_) return first_ == 0;
+ else return iter_ == rest_.end();
+ }
+
+ const L& Value() const { return init_ ? first_ : *iter_; }
+
+ void Next() {
+ if (init_) init_ = false;
+ else ++iter_;
+ }
+
+ void Reset() {
+ init_ = true;
+ iter_ = rest_.begin();
+ }
+
+ private:
+ const L &first_;
+ const list<L> &rest_;
+ bool init_; // in the initialized state?
+ typename list<L>::const_iterator iter_;
+
+ DISALLOW_COPY_AND_ASSIGN(StringWeightIterator);
+};
+
+
+// Traverses string in backward direction.
+template <typename L, StringType S>
+class StringWeightReverseIterator {
+ public:
+ explicit StringWeightReverseIterator(const StringWeight<L, S>& w)
+ : first_(w.first_), rest_(w.rest_), fin_(first_ == 0),
+ iter_(rest_.rbegin()) {}
+
+ bool Done() const { return fin_; }
+
+ const L& Value() const { return iter_ == rest_.rend() ? first_ : *iter_; }
+
+ void Next() {
+ if (iter_ == rest_.rend()) fin_ = true;
+ else ++iter_;
+ }
+
+ void Reset() {
+ fin_ = false;
+ iter_ = rest_.rbegin();
+ }
+
+ private:
+ const L &first_;
+ const list<L> &rest_;
+ bool fin_; // in the final state?
+ typename list<L>::const_reverse_iterator iter_;
+
+ DISALLOW_COPY_AND_ASSIGN(StringWeightReverseIterator);
+};
+
+
+// StringWeight member functions follow that require
+// StringWeightIterator or StringWeightReverseIterator.
+
+template <typename L, StringType S>
+inline istream &StringWeight<L, S>::Read(istream &strm) {
+ Clear();
+ int32 size;
+ ReadType(strm, &size);
+ for (int i = 0; i < size; ++i) {
+ L label;
+ ReadType(strm, &label);
+ PushBack(label);
+ }
+ return strm;
+}
+
+template <typename L, StringType S>
+inline ostream &StringWeight<L, S>::Write(ostream &strm) const {
+ int32 size = Size();
+ WriteType(strm, size);
+ for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next()) {
+ L label = iter.Value();
+ WriteType(strm, label);
+ }
+ return strm;
+}
+
+template <typename L, StringType S>
+inline bool StringWeight<L, S>::Member() const {
+ if (Size() != 1)
+ return true;
+ StringWeightIterator<L, S> iter(*this);
+ return iter.Value() != kStringBad;
+}
+
+template <typename L, StringType S>
+inline typename StringWeight<L, S>::ReverseWeight
+StringWeight<L, S>::Reverse() const {
+ ReverseWeight rw;
+ for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next())
+ rw.PushFront(iter.Value());
+ return rw;
+}
+
+template <typename L, StringType S>
+inline size_t StringWeight<L, S>::Hash() const {
+ size_t h = 0;
+ for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next())
+ h ^= h<<1 ^ iter.Value();
+ return h;
+}
+
+// NB: This needs to be uncommented only if default fails for this the impl.
+//
+// template <typename L, StringType S>
+// inline StringWeight<L, S>
+// &StringWeight<L, S>::operator=(const StringWeight<L, S> &w) {
+// if (this != &w) {
+// Clear();
+// for (StringWeightIterator<L, S> iter(w); !iter.Done(); iter.Next())
+// PushBack(iter.Value());
+// }
+// return *this;
+// }
+
+template <typename L, StringType S>
+inline bool operator==(const StringWeight<L, S> &w1,
+ const StringWeight<L, S> &w2) {
+ if (w1.Size() != w2.Size())
+ return false;
+
+ StringWeightIterator<L, S> iter1(w1);
+ StringWeightIterator<L, S> iter2(w2);
+
+ for (; !iter1.Done() ; iter1.Next(), iter2.Next())
+ if (iter1.Value() != iter2.Value())
+ return false;
+
+ return true;
+}
+
+template <typename L, StringType S>
+inline bool operator!=(const StringWeight<L, S> &w1,
+ const StringWeight<L, S> &w2) {
+ return !(w1 == w2);
+}
+
+template <typename L, StringType S>
+inline bool ApproxEqual(const StringWeight<L, S> &w1,
+ const StringWeight<L, S> &w2,
+ float delta = kDelta) {
+ return w1 == w2;
+}
+
+template <typename L, StringType S>
+inline ostream &operator<<(ostream &strm, const StringWeight<L, S> &w) {
+ StringWeightIterator<L, S> iter(w);
+ if (iter.Done())
+ return strm << "Epsilon";
+ else if (iter.Value() == kStringInfinity)
+ return strm << "Infinity";
+ else if (iter.Value() == kStringBad)
+ return strm << "BadString";
+ else
+ for (size_t i = 0; !iter.Done(); ++i, iter.Next()) {
+ if (i > 0)
+ strm << kStringSeparator;
+ strm << iter.Value();
+ }
+ return strm;
+}
+
+template <typename L, StringType S>
+inline istream &operator>>(istream &strm, StringWeight<L, S> &w) {
+ string s;
+ strm >> s;
+ if (s == "Infinity") {
+ w = StringWeight<L, S>::Zero();
+ } else if (s == "Epsilon") {
+ w = StringWeight<L, S>::One();
+ } else {
+ w.Clear();
+ char *p = 0;
+ for (const char *cs = s.c_str(); !p || *p != '\0'; cs = p + 1) {
+ int l = strtoll(cs, &p, 10);
+ if (p == cs || (*p != 0 && *p != kStringSeparator)) {
+ strm.clear(std::ios::badbit);
+ break;
+ }
+ w.PushBack(l);
+ }
+ }
+ return strm;
+}
+
+
+// Default is for the restricted left and right semirings. String
+// equality is required (for non-Zero() input. This restriction
+// is used in e.g. Determinize to ensure functional input.
+template <typename L, StringType S> inline StringWeight<L, S>
+Plus(const StringWeight<L, S> &w1,
+ const StringWeight<L, S> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return StringWeight<L, S>::NoWeight();
+ if (w1 == StringWeight<L, S>::Zero())
+ return w2;
+ if (w2 == StringWeight<L, S>::Zero())
+ return w1;
+
+ if (w1 != w2) {
+ FSTERROR() << "StringWeight::Plus: unequal arguments "
+ << "(non-functional FST?)"
+ << " w1 = " << w1
+ << " w2 = " << w2;
+ return StringWeight<L, S>::NoWeight();
+ }
+
+ return w1;
+}
+
+
+// Longest common prefix for left string semiring.
+template <typename L> inline StringWeight<L, STRING_LEFT>
+Plus(const StringWeight<L, STRING_LEFT> &w1,
+ const StringWeight<L, STRING_LEFT> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return StringWeight<L, STRING_LEFT>::NoWeight();
+ if (w1 == StringWeight<L, STRING_LEFT>::Zero())
+ return w2;
+ if (w2 == StringWeight<L, STRING_LEFT>::Zero())
+ return w1;
+
+ StringWeight<L, STRING_LEFT> sum;
+ StringWeightIterator<L, STRING_LEFT> iter1(w1);
+ StringWeightIterator<L, STRING_LEFT> iter2(w2);
+ for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value();
+ iter1.Next(), iter2.Next())
+ sum.PushBack(iter1.Value());
+ return sum;
+}
+
+
+// Longest common suffix for right string semiring.
+template <typename L> inline StringWeight<L, STRING_RIGHT>
+Plus(const StringWeight<L, STRING_RIGHT> &w1,
+ const StringWeight<L, STRING_RIGHT> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return StringWeight<L, STRING_RIGHT>::NoWeight();
+ if (w1 == StringWeight<L, STRING_RIGHT>::Zero())
+ return w2;
+ if (w2 == StringWeight<L, STRING_RIGHT>::Zero())
+ return w1;
+
+ StringWeight<L, STRING_RIGHT> sum;
+ StringWeightReverseIterator<L, STRING_RIGHT> iter1(w1);
+ StringWeightReverseIterator<L, STRING_RIGHT> iter2(w2);
+ for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value();
+ iter1.Next(), iter2.Next())
+ sum.PushFront(iter1.Value());
+ return sum;
+}
+
+
+template <typename L, StringType S>
+inline StringWeight<L, S> Times(const StringWeight<L, S> &w1,
+ const StringWeight<L, S> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return StringWeight<L, S>::NoWeight();
+ if (w1 == StringWeight<L, S>::Zero() || w2 == StringWeight<L, S>::Zero())
+ return StringWeight<L, S>::Zero();
+
+ StringWeight<L, S> prod(w1);
+ for (StringWeightIterator<L, S> iter(w2); !iter.Done(); iter.Next())
+ prod.PushBack(iter.Value());
+
+ return prod;
+}
+
+
+// Default is for left division in the left string and the
+// left restricted string semirings.
+template <typename L, StringType S> inline StringWeight<L, S>
+Divide(const StringWeight<L, S> &w1,
+ const StringWeight<L, S> &w2,
+ DivideType typ) {
+
+ if (typ != DIVIDE_LEFT) {
+ FSTERROR() << "StringWeight::Divide: only left division is defined "
+ << "for the " << StringWeight<L, S>::Type() << " semiring";
+ return StringWeight<L, S>::NoWeight();
+ }
+
+ if (!w1.Member() || !w2.Member())
+ return StringWeight<L, S>::NoWeight();
+
+ if (w2 == StringWeight<L, S>::Zero())
+ return StringWeight<L, S>(kStringBad);
+ else if (w1 == StringWeight<L, S>::Zero())
+ return StringWeight<L, S>::Zero();
+
+ StringWeight<L, S> div;
+ StringWeightIterator<L, S> iter(w1);
+ for (int i = 0; !iter.Done(); iter.Next(), ++i) {
+ if (i >= w2.Size())
+ div.PushBack(iter.Value());
+ }
+ return div;
+}
+
+
+// Right division in the right string semiring.
+template <typename L> inline StringWeight<L, STRING_RIGHT>
+Divide(const StringWeight<L, STRING_RIGHT> &w1,
+ const StringWeight<L, STRING_RIGHT> &w2,
+ DivideType typ) {
+
+ if (typ != DIVIDE_RIGHT) {
+ FSTERROR() << "StringWeight::Divide: only right division is defined "
+ << "for the right string semiring";
+ return StringWeight<L, STRING_RIGHT>::NoWeight();
+ }
+
+ if (!w1.Member() || !w2.Member())
+ return StringWeight<L, STRING_RIGHT>::NoWeight();
+
+ if (w2 == StringWeight<L, STRING_RIGHT>::Zero())
+ return StringWeight<L, STRING_RIGHT>(kStringBad);
+ else if (w1 == StringWeight<L, STRING_RIGHT>::Zero())
+ return StringWeight<L, STRING_RIGHT>::Zero();
+
+ StringWeight<L, STRING_RIGHT> div;
+ StringWeightReverseIterator<L, STRING_RIGHT> iter(w1);
+ for (int i = 0; !iter.Done(); iter.Next(), ++i) {
+ if (i >= w2.Size())
+ div.PushFront(iter.Value());
+ }
+ return div;
+}
+
+
+// Right division in the right restricted string semiring.
+template <typename L> inline StringWeight<L, STRING_RIGHT_RESTRICT>
+Divide(const StringWeight<L, STRING_RIGHT_RESTRICT> &w1,
+ const StringWeight<L, STRING_RIGHT_RESTRICT> &w2,
+ DivideType typ) {
+
+ if (typ != DIVIDE_RIGHT) {
+ FSTERROR() << "StringWeight::Divide: only right division is defined "
+ << "for the right restricted string semiring";
+ return StringWeight<L, STRING_RIGHT_RESTRICT>::NoWeight();
+ }
+
+ if (!w1.Member() || !w2.Member())
+ return StringWeight<L, STRING_RIGHT_RESTRICT>::NoWeight();
+
+ if (w2 == StringWeight<L, STRING_RIGHT_RESTRICT>::Zero())
+ return StringWeight<L, STRING_RIGHT_RESTRICT>(kStringBad);
+ else if (w1 == StringWeight<L, STRING_RIGHT_RESTRICT>::Zero())
+ return StringWeight<L, STRING_RIGHT_RESTRICT>::Zero();
+
+ StringWeight<L, STRING_RIGHT_RESTRICT> div;
+ StringWeightReverseIterator<L, STRING_RIGHT_RESTRICT> iter(w1);
+ for (int i = 0; !iter.Done(); iter.Next(), ++i) {
+ if (i >= w2.Size())
+ div.PushFront(iter.Value());
+ }
+ return div;
+}
+
+
+// Product of string weight and an arbitray weight.
+template <class L, class W, StringType S = STRING_LEFT>
+struct GallicWeight : public ProductWeight<StringWeight<L, S>, W> {
+ typedef GallicWeight<L, typename W::ReverseWeight, REVERSE_STRING_TYPE(S)>
+ ReverseWeight;
+
+ GallicWeight() {}
+
+ GallicWeight(StringWeight<L, S> w1, W w2)
+ : ProductWeight<StringWeight<L, S>, W>(w1, w2) {}
+
+ explicit GallicWeight(const string &s, int *nread = 0)
+ : ProductWeight<StringWeight<L, S>, W>(s, nread) {}
+
+ GallicWeight(const ProductWeight<StringWeight<L, S>, W> &w)
+ : ProductWeight<StringWeight<L, S>, W>(w) {}
+};
+
+} // namespace fst
+
+#endif // FST_LIB_STRING_WEIGHT_H__