aboutsummaryrefslogtreecommitdiff
path: root/src/include/fst/matcher.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/fst/matcher.h')
-rw-r--r--src/include/fst/matcher.h1116
1 files changed, 1116 insertions, 0 deletions
diff --git a/src/include/fst/matcher.h b/src/include/fst/matcher.h
new file mode 100644
index 0000000..a89325b
--- /dev/null
+++ b/src/include/fst/matcher.h
@@ -0,0 +1,1116 @@
+// matcher.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Classes to allow matching labels leaving FST states.
+
+#ifndef FST_LIB_MATCHER_H__
+#define FST_LIB_MATCHER_H__
+
+#include <algorithm>
+#include <set>
+
+#include <fst/mutable-fst.h> // for all internal FST accessors
+
+
+namespace fst {
+
+// MATCHERS - these can find and iterate through requested labels at
+// FST states. In the simplest form, these are just some associative
+// map or search keyed on labels. More generally, they may
+// implement matching special labels that represent sets of labels
+// such as 'sigma' (all), 'rho' (rest), or 'phi' (fail).
+// The Matcher interface is:
+//
+// template <class F>
+// class Matcher {
+// public:
+// typedef F FST;
+// typedef F::Arc Arc;
+// typedef typename Arc::StateId StateId;
+// typedef typename Arc::Label Label;
+// typedef typename Arc::Weight Weight;
+//
+// // Required constructors.
+// Matcher(const F &fst, MatchType type);
+// // If safe=true, the copy is thread-safe. See Fst<>::Copy()
+// // for further doc.
+// Matcher(const Matcher &matcher, bool safe = false);
+//
+// // If safe=true, the copy is thread-safe. See Fst<>::Copy()
+// // for further doc.
+// Matcher<F> *Copy(bool safe = false) const;
+//
+// // Returns the match type that can be provided (depending on
+// // compatibility of the input FST). It is either
+// // the requested match type, MATCH_NONE, or MATCH_UNKNOWN.
+// // If 'test' is false, a constant time test is performed, but
+// // MATCH_UNKNOWN may be returned. If 'test' is true,
+// // a definite answer is returned, but may involve more costly
+// // computation (e.g., visiting the Fst).
+// MatchType Type(bool test) const;
+// // Specifies the current state.
+// void SetState(StateId s);
+//
+// // This finds matches to a label at the current state.
+// // Returns true if a match found. kNoLabel matches any
+// // 'non-consuming' transitions, e.g., epsilon transitions,
+// // which do not require a matching symbol.
+// bool Find(Label label);
+// // These iterate through any matches found:
+// bool Done() const; // No more matches.
+// const A& Value() const; // Current arc (when !Done)
+// void Next(); // Advance to next arc (when !Done)
+//
+// // Return matcher FST.
+// const F& GetFst() const;
+// // This specifies the known Fst properties as viewed from this
+// // matcher. It takes as argument the input Fst's known properties.
+// uint64 Properties(uint64 props) const;
+// };
+
+// Flags used for basic matchers (see also lookahead.h).
+const uint32 kMatcherFlags = 0x00000000;
+
+// Matcher interface, templated on the Arc definition; used
+// for matcher specializations that are returned by the
+// InitMatcher Fst method.
+template <class A>
+class MatcherBase {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+
+ virtual ~MatcherBase() {}
+
+ virtual MatcherBase<A> *Copy(bool safe = false) const = 0;
+ virtual MatchType Type(bool test) const = 0;
+ void SetState(StateId s) { SetState_(s); }
+ bool Find(Label label) { return Find_(label); }
+ bool Done() const { return Done_(); }
+ const A& Value() const { return Value_(); }
+ void Next() { Next_(); }
+ virtual const Fst<A> &GetFst() const = 0;
+ virtual uint64 Properties(uint64 props) const = 0;
+ virtual uint32 Flags() const { return 0; }
+ private:
+ virtual void SetState_(StateId s) = 0;
+ virtual bool Find_(Label label) = 0;
+ virtual bool Done_() const = 0;
+ virtual const A& Value_() const = 0;
+ virtual void Next_() = 0;
+};
+
+
+// A matcher that expects sorted labels on the side to be matched.
+// If match_type == MATCH_INPUT, epsilons match the implicit self loop
+// Arc(kNoLabel, 0, Weight::One(), current_state) as well as any
+// actual epsilon transitions. If match_type == MATCH_OUTPUT, then
+// Arc(0, kNoLabel, Weight::One(), current_state) is instead matched.
+template <class F>
+class SortedMatcher : public MatcherBase<typename F::Arc> {
+ public:
+ typedef F FST;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ // Labels >= binary_label will be searched for by binary search,
+ // o.w. linear search is used.
+ SortedMatcher(const F &fst, MatchType match_type,
+ Label binary_label = 1)
+ : fst_(fst.Copy()),
+ s_(kNoStateId),
+ aiter_(0),
+ match_type_(match_type),
+ binary_label_(binary_label),
+ match_label_(kNoLabel),
+ narcs_(0),
+ loop_(kNoLabel, 0, Weight::One(), kNoStateId),
+ error_(false) {
+ switch(match_type_) {
+ case MATCH_INPUT:
+ case MATCH_NONE:
+ break;
+ case MATCH_OUTPUT:
+ swap(loop_.ilabel, loop_.olabel);
+ break;
+ default:
+ FSTERROR() << "SortedMatcher: bad match type";
+ match_type_ = MATCH_NONE;
+ error_ = true;
+ }
+ }
+
+ SortedMatcher(const SortedMatcher<F> &matcher, bool safe = false)
+ : fst_(matcher.fst_->Copy(safe)),
+ s_(kNoStateId),
+ aiter_(0),
+ match_type_(matcher.match_type_),
+ binary_label_(matcher.binary_label_),
+ match_label_(kNoLabel),
+ narcs_(0),
+ loop_(matcher.loop_),
+ error_(matcher.error_) {}
+
+ virtual ~SortedMatcher() {
+ if (aiter_)
+ delete aiter_;
+ delete fst_;
+ }
+
+ virtual SortedMatcher<F> *Copy(bool safe = false) const {
+ return new SortedMatcher<F>(*this, safe);
+ }
+
+ virtual MatchType Type(bool test) const {
+ if (match_type_ == MATCH_NONE)
+ return match_type_;
+
+ uint64 true_prop = match_type_ == MATCH_INPUT ?
+ kILabelSorted : kOLabelSorted;
+ uint64 false_prop = match_type_ == MATCH_INPUT ?
+ kNotILabelSorted : kNotOLabelSorted;
+ uint64 props = fst_->Properties(true_prop | false_prop, test);
+
+ if (props & true_prop)
+ return match_type_;
+ else if (props & false_prop)
+ return MATCH_NONE;
+ else
+ return MATCH_UNKNOWN;
+ }
+
+ void SetState(StateId s) {
+ if (s_ == s)
+ return;
+ s_ = s;
+ if (match_type_ == MATCH_NONE) {
+ FSTERROR() << "SortedMatcher: bad match type";
+ error_ = true;
+ }
+ if (aiter_)
+ delete aiter_;
+ aiter_ = new ArcIterator<F>(*fst_, s);
+ aiter_->SetFlags(kArcNoCache, kArcNoCache);
+ narcs_ = internal::NumArcs(*fst_, s);
+ loop_.nextstate = s;
+ }
+
+ bool Find(Label match_label);
+
+ bool Done() const {
+ if (current_loop_)
+ return false;
+ if (aiter_->Done())
+ return true;
+ aiter_->SetFlags(
+ match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue,
+ kArcValueFlags);
+ Label label = match_type_ == MATCH_INPUT ?
+ aiter_->Value().ilabel : aiter_->Value().olabel;
+ return label != match_label_;
+ }
+
+ const Arc& Value() const {
+ if (current_loop_) {
+ return loop_;
+ }
+ aiter_->SetFlags(kArcValueFlags, kArcValueFlags);
+ return aiter_->Value();
+ }
+
+ void Next() {
+ if (current_loop_)
+ current_loop_ = false;
+ else
+ aiter_->Next();
+ }
+
+ virtual const F &GetFst() const { return *fst_; }
+
+ virtual uint64 Properties(uint64 inprops) const {
+ uint64 outprops = inprops;
+ if (error_) outprops |= kError;
+ return outprops;
+ }
+
+ private:
+ virtual void SetState_(StateId s) { SetState(s); }
+ virtual bool Find_(Label label) { return Find(label); }
+ virtual bool Done_() const { return Done(); }
+ virtual const Arc& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+
+ const F *fst_;
+ StateId s_; // Current state
+ ArcIterator<F> *aiter_; // Iterator for current state
+ MatchType match_type_; // Type of match to perform
+ Label binary_label_; // Least label for binary search
+ Label match_label_; // Current label to be matched
+ size_t narcs_; // Current state arc count
+ Arc loop_; // For non-consuming symbols
+ bool current_loop_; // Current arc is the implicit loop
+ bool error_; // Error encountered
+
+ void operator=(const SortedMatcher<F> &); // Disallow
+};
+
+template <class F> inline
+bool SortedMatcher<F>::Find(Label match_label) {
+ if (error_) {
+ current_loop_ = false;
+ match_label_ = kNoLabel;
+ return false;
+ }
+ current_loop_ = match_label == 0;
+ match_label_ = match_label == kNoLabel ? 0 : match_label;
+ aiter_->SetFlags(
+ match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue,
+ kArcValueFlags);
+ if (match_label_ >= binary_label_) {
+ // Binary search for match.
+ size_t low = 0;
+ size_t high = narcs_;
+ while (low < high) {
+ size_t mid = (low + high) / 2;
+ aiter_->Seek(mid);
+ Label label = match_type_ == MATCH_INPUT ?
+ aiter_->Value().ilabel : aiter_->Value().olabel;
+ if (label > match_label_) {
+ high = mid;
+ } else if (label < match_label_) {
+ low = mid + 1;
+ } else {
+ // find first matching label (when non-determinism)
+ for (size_t i = mid; i > low; --i) {
+ aiter_->Seek(i - 1);
+ label = match_type_ == MATCH_INPUT ? aiter_->Value().ilabel :
+ aiter_->Value().olabel;
+ if (label != match_label_) {
+ aiter_->Seek(i);
+ return true;
+ }
+ }
+ return true;
+ }
+ }
+ return current_loop_;
+ } else {
+ // Linear search for match.
+ for (aiter_->Reset(); !aiter_->Done(); aiter_->Next()) {
+ Label label = match_type_ == MATCH_INPUT ?
+ aiter_->Value().ilabel : aiter_->Value().olabel;
+ if (label == match_label_) {
+ return true;
+ }
+ if (label > match_label_)
+ break;
+ }
+ return current_loop_;
+ }
+}
+
+
+// Specifies whether during matching we rewrite both the input and output sides.
+enum MatcherRewriteMode {
+ MATCHER_REWRITE_AUTO = 0, // Rewrites both sides iff acceptor.
+ MATCHER_REWRITE_ALWAYS,
+ MATCHER_REWRITE_NEVER
+};
+
+
+// For any requested label that doesn't match at a state, this matcher
+// considers all transitions that match the label 'rho_label' (rho =
+// 'rest'). Each such rho transition found is returned with the
+// rho_label rewritten as the requested label (both sides if an
+// acceptor, or if 'rewrite_both' is true and both input and output
+// labels of the found transition are 'rho_label'). If 'rho_label' is
+// kNoLabel, this special matching is not done. RhoMatcher is
+// templated itself on a matcher, which is used to perform the
+// underlying matching. By default, the underlying matcher is
+// constructed by RhoMatcher. The user can instead pass in this
+// object; in that case, RhoMatcher takes its ownership.
+template <class M>
+class RhoMatcher : public MatcherBase<typename M::Arc> {
+ public:
+ typedef typename M::FST FST;
+ typedef typename M::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ RhoMatcher(const FST &fst,
+ MatchType match_type,
+ Label rho_label = kNoLabel,
+ MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO,
+ M *matcher = 0)
+ : matcher_(matcher ? matcher : new M(fst, match_type)),
+ match_type_(match_type),
+ rho_label_(rho_label),
+ error_(false) {
+ if (match_type == MATCH_BOTH) {
+ FSTERROR() << "RhoMatcher: bad match type";
+ match_type_ = MATCH_NONE;
+ error_ = true;
+ }
+ if (rho_label == 0) {
+ FSTERROR() << "RhoMatcher: 0 cannot be used as rho_label";
+ rho_label_ = kNoLabel;
+ error_ = true;
+ }
+
+ if (rewrite_mode == MATCHER_REWRITE_AUTO)
+ rewrite_both_ = fst.Properties(kAcceptor, true);
+ else if (rewrite_mode == MATCHER_REWRITE_ALWAYS)
+ rewrite_both_ = true;
+ else
+ rewrite_both_ = false;
+ }
+
+ RhoMatcher(const RhoMatcher<M> &matcher, bool safe = false)
+ : matcher_(new M(*matcher.matcher_, safe)),
+ match_type_(matcher.match_type_),
+ rho_label_(matcher.rho_label_),
+ rewrite_both_(matcher.rewrite_both_),
+ error_(matcher.error_) {}
+
+ virtual ~RhoMatcher() {
+ delete matcher_;
+ }
+
+ virtual RhoMatcher<M> *Copy(bool safe = false) const {
+ return new RhoMatcher<M>(*this, safe);
+ }
+
+ virtual MatchType Type(bool test) const { return matcher_->Type(test); }
+
+ void SetState(StateId s) {
+ matcher_->SetState(s);
+ has_rho_ = rho_label_ != kNoLabel;
+ }
+
+ bool Find(Label match_label) {
+ if (match_label == rho_label_ && rho_label_ != kNoLabel) {
+ FSTERROR() << "RhoMatcher::Find: bad label (rho)";
+ error_ = true;
+ return false;
+ }
+ if (matcher_->Find(match_label)) {
+ rho_match_ = kNoLabel;
+ return true;
+ } else if (has_rho_ && match_label != 0 && match_label != kNoLabel &&
+ (has_rho_ = matcher_->Find(rho_label_))) {
+ rho_match_ = match_label;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ bool Done() const { return matcher_->Done(); }
+
+ const Arc& Value() const {
+ if (rho_match_ == kNoLabel) {
+ return matcher_->Value();
+ } else {
+ rho_arc_ = matcher_->Value();
+ if (rewrite_both_) {
+ if (rho_arc_.ilabel == rho_label_)
+ rho_arc_.ilabel = rho_match_;
+ if (rho_arc_.olabel == rho_label_)
+ rho_arc_.olabel = rho_match_;
+ } else if (match_type_ == MATCH_INPUT) {
+ rho_arc_.ilabel = rho_match_;
+ } else {
+ rho_arc_.olabel = rho_match_;
+ }
+ return rho_arc_;
+ }
+ }
+
+ void Next() { matcher_->Next(); }
+
+ virtual const FST &GetFst() const { return matcher_->GetFst(); }
+
+ virtual uint64 Properties(uint64 props) const;
+
+ private:
+ virtual void SetState_(StateId s) { SetState(s); }
+ virtual bool Find_(Label label) { return Find(label); }
+ virtual bool Done_() const { return Done(); }
+ virtual const Arc& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+
+ M *matcher_;
+ MatchType match_type_; // Type of match requested
+ Label rho_label_; // Label that represents the rho transition
+ bool rewrite_both_; // Rewrite both sides when both are 'rho_label_'
+ bool has_rho_; // Are there possibly rhos at the current state?
+ Label rho_match_; // Current label that matches rho transition
+ mutable Arc rho_arc_; // Arc to return when rho match
+ bool error_; // Error encountered
+
+ void operator=(const RhoMatcher<M> &); // Disallow
+};
+
+template <class M> inline
+uint64 RhoMatcher<M>::Properties(uint64 inprops) const {
+ uint64 outprops = matcher_->Properties(inprops);
+ if (error_) outprops |= kError;
+
+ if (match_type_ == MATCH_NONE) {
+ return outprops;
+ } else if (match_type_ == MATCH_INPUT) {
+ if (rewrite_both_) {
+ return outprops & ~(kODeterministic | kNonODeterministic | kString |
+ kILabelSorted | kNotILabelSorted |
+ kOLabelSorted | kNotOLabelSorted);
+ } else {
+ return outprops & ~(kODeterministic | kAcceptor | kString |
+ kILabelSorted | kNotILabelSorted);
+ }
+ } else if (match_type_ == MATCH_OUTPUT) {
+ if (rewrite_both_) {
+ return outprops & ~(kIDeterministic | kNonIDeterministic | kString |
+ kILabelSorted | kNotILabelSorted |
+ kOLabelSorted | kNotOLabelSorted);
+ } else {
+ return outprops & ~(kIDeterministic | kAcceptor | kString |
+ kOLabelSorted | kNotOLabelSorted);
+ }
+ } else {
+ // Shouldn't ever get here.
+ FSTERROR() << "RhoMatcher:: bad match type: " << match_type_;
+ return 0;
+ }
+}
+
+
+// For any requested label, this matcher considers all transitions
+// that match the label 'sigma_label' (sigma = "any"), and this in
+// additions to transitions with the requested label. Each such sigma
+// transition found is returned with the sigma_label rewritten as the
+// requested label (both sides if an acceptor, or if 'rewrite_both' is
+// true and both input and output labels of the found transition are
+// 'sigma_label'). If 'sigma_label' is kNoLabel, this special
+// matching is not done. SigmaMatcher is templated itself on a
+// matcher, which is used to perform the underlying matching. By
+// default, the underlying matcher is constructed by SigmaMatcher.
+// The user can instead pass in this object; in that case,
+// SigmaMatcher takes its ownership.
+template <class M>
+class SigmaMatcher : public MatcherBase<typename M::Arc> {
+ public:
+ typedef typename M::FST FST;
+ typedef typename M::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ SigmaMatcher(const FST &fst,
+ MatchType match_type,
+ Label sigma_label = kNoLabel,
+ MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO,
+ M *matcher = 0)
+ : matcher_(matcher ? matcher : new M(fst, match_type)),
+ match_type_(match_type),
+ sigma_label_(sigma_label),
+ error_(false) {
+ if (match_type == MATCH_BOTH) {
+ FSTERROR() << "SigmaMatcher: bad match type";
+ match_type_ = MATCH_NONE;
+ error_ = true;
+ }
+ if (sigma_label == 0) {
+ FSTERROR() << "SigmaMatcher: 0 cannot be used as sigma_label";
+ sigma_label_ = kNoLabel;
+ error_ = true;
+ }
+
+ if (rewrite_mode == MATCHER_REWRITE_AUTO)
+ rewrite_both_ = fst.Properties(kAcceptor, true);
+ else if (rewrite_mode == MATCHER_REWRITE_ALWAYS)
+ rewrite_both_ = true;
+ else
+ rewrite_both_ = false;
+ }
+
+ SigmaMatcher(const SigmaMatcher<M> &matcher, bool safe = false)
+ : matcher_(new M(*matcher.matcher_, safe)),
+ match_type_(matcher.match_type_),
+ sigma_label_(matcher.sigma_label_),
+ rewrite_both_(matcher.rewrite_both_),
+ error_(matcher.error_) {}
+
+ virtual ~SigmaMatcher() {
+ delete matcher_;
+ }
+
+ virtual SigmaMatcher<M> *Copy(bool safe = false) const {
+ return new SigmaMatcher<M>(*this, safe);
+ }
+
+ virtual MatchType Type(bool test) const { return matcher_->Type(test); }
+
+ void SetState(StateId s) {
+ matcher_->SetState(s);
+ has_sigma_ =
+ sigma_label_ != kNoLabel ? matcher_->Find(sigma_label_) : false;
+ }
+
+ bool Find(Label match_label) {
+ match_label_ = match_label;
+ if (match_label == sigma_label_ && sigma_label_ != kNoLabel) {
+ FSTERROR() << "SigmaMatcher::Find: bad label (sigma)";
+ error_ = true;
+ return false;
+ }
+ if (matcher_->Find(match_label)) {
+ sigma_match_ = kNoLabel;
+ return true;
+ } else if (has_sigma_ && match_label != 0 && match_label != kNoLabel &&
+ matcher_->Find(sigma_label_)) {
+ sigma_match_ = match_label;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ bool Done() const {
+ return matcher_->Done();
+ }
+
+ const Arc& Value() const {
+ if (sigma_match_ == kNoLabel) {
+ return matcher_->Value();
+ } else {
+ sigma_arc_ = matcher_->Value();
+ if (rewrite_both_) {
+ if (sigma_arc_.ilabel == sigma_label_)
+ sigma_arc_.ilabel = sigma_match_;
+ if (sigma_arc_.olabel == sigma_label_)
+ sigma_arc_.olabel = sigma_match_;
+ } else if (match_type_ == MATCH_INPUT) {
+ sigma_arc_.ilabel = sigma_match_;
+ } else {
+ sigma_arc_.olabel = sigma_match_;
+ }
+ return sigma_arc_;
+ }
+ }
+
+ void Next() {
+ matcher_->Next();
+ if (matcher_->Done() && has_sigma_ && (sigma_match_ == kNoLabel) &&
+ (match_label_ > 0)) {
+ matcher_->Find(sigma_label_);
+ sigma_match_ = match_label_;
+ }
+ }
+
+ virtual const FST &GetFst() const { return matcher_->GetFst(); }
+
+ virtual uint64 Properties(uint64 props) const;
+
+private:
+ virtual void SetState_(StateId s) { SetState(s); }
+ virtual bool Find_(Label label) { return Find(label); }
+ virtual bool Done_() const { return Done(); }
+ virtual const Arc& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+
+ M *matcher_;
+ MatchType match_type_; // Type of match requested
+ Label sigma_label_; // Label that represents the sigma transition
+ bool rewrite_both_; // Rewrite both sides when both are 'sigma_label_'
+ bool has_sigma_; // Are there sigmas at the current state?
+ Label sigma_match_; // Current label that matches sigma transition
+ mutable Arc sigma_arc_; // Arc to return when sigma match
+ Label match_label_; // Label being matched
+ bool error_; // Error encountered
+
+ void operator=(const SigmaMatcher<M> &); // disallow
+};
+
+template <class M> inline
+uint64 SigmaMatcher<M>::Properties(uint64 inprops) const {
+ uint64 outprops = matcher_->Properties(inprops);
+ if (error_) outprops |= kError;
+
+ if (match_type_ == MATCH_NONE) {
+ return outprops;
+ } else if (rewrite_both_) {
+ return outprops & ~(kIDeterministic | kNonIDeterministic |
+ kODeterministic | kNonODeterministic |
+ kILabelSorted | kNotILabelSorted |
+ kOLabelSorted | kNotOLabelSorted |
+ kString);
+ } else if (match_type_ == MATCH_INPUT) {
+ return outprops & ~(kIDeterministic | kNonIDeterministic |
+ kODeterministic | kNonODeterministic |
+ kILabelSorted | kNotILabelSorted |
+ kString | kAcceptor);
+ } else if (match_type_ == MATCH_OUTPUT) {
+ return outprops & ~(kIDeterministic | kNonIDeterministic |
+ kODeterministic | kNonODeterministic |
+ kOLabelSorted | kNotOLabelSorted |
+ kString | kAcceptor);
+ } else {
+ // Shouldn't ever get here.
+ FSTERROR() << "SigmaMatcher:: bad match type: " << match_type_;
+ return 0;
+ }
+}
+
+
+// For any requested label that doesn't match at a state, this matcher
+// considers the *unique* transition that matches the label 'phi_label'
+// (phi = 'fail'), and recursively looks for a match at its
+// destination. When 'phi_loop' is true, if no match is found but a
+// phi self-loop is found, then the phi transition found is returned
+// with the phi_label rewritten as the requested label (both sides if
+// an acceptor, or if 'rewrite_both' is true and both input and output
+// labels of the found transition are 'phi_label'). If 'phi_label' is
+// kNoLabel, this special matching is not done. PhiMatcher is
+// templated itself on a matcher, which is used to perform the
+// underlying matching. By default, the underlying matcher is
+// constructed by PhiMatcher. The user can instead pass in this
+// object; in that case, PhiMatcher takes its ownership.
+// Warning: phi non-determinism not supported (for simplicity).
+template <class M>
+class PhiMatcher : public MatcherBase<typename M::Arc> {
+ public:
+ typedef typename M::FST FST;
+ typedef typename M::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ PhiMatcher(const FST &fst,
+ MatchType match_type,
+ Label phi_label = kNoLabel,
+ bool phi_loop = true,
+ MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO,
+ M *matcher = 0)
+ : matcher_(matcher ? matcher : new M(fst, match_type)),
+ match_type_(match_type),
+ phi_label_(phi_label),
+ state_(kNoStateId),
+ phi_loop_(phi_loop),
+ error_(false) {
+ if (match_type == MATCH_BOTH) {
+ FSTERROR() << "PhiMatcher: bad match type";
+ match_type_ = MATCH_NONE;
+ error_ = true;
+ }
+ if (phi_label == 0) {
+ FSTERROR() << "PhiMatcher: 0 cannot be used as phi_label";
+ phi_label_ = kNoLabel;
+ error_ = true;
+ }
+
+ if (rewrite_mode == MATCHER_REWRITE_AUTO)
+ rewrite_both_ = fst.Properties(kAcceptor, true);
+ else if (rewrite_mode == MATCHER_REWRITE_ALWAYS)
+ rewrite_both_ = true;
+ else
+ rewrite_both_ = false;
+ }
+
+ PhiMatcher(const PhiMatcher<M> &matcher, bool safe = false)
+ : matcher_(new M(*matcher.matcher_, safe)),
+ match_type_(matcher.match_type_),
+ phi_label_(matcher.phi_label_),
+ rewrite_both_(matcher.rewrite_both_),
+ state_(kNoStateId),
+ phi_loop_(matcher.phi_loop_),
+ error_(matcher.error_) {}
+
+ virtual ~PhiMatcher() {
+ delete matcher_;
+ }
+
+ virtual PhiMatcher<M> *Copy(bool safe = false) const {
+ return new PhiMatcher<M>(*this, safe);
+ }
+
+ virtual MatchType Type(bool test) const { return matcher_->Type(test); }
+
+ void SetState(StateId s) {
+ matcher_->SetState(s);
+ state_ = s;
+ has_phi_ = phi_label_ != kNoLabel;
+ }
+
+ bool Find(Label match_label);
+
+ bool Done() const { return matcher_->Done(); }
+
+ const Arc& Value() const {
+ if ((phi_match_ == kNoLabel) && (phi_weight_ == Weight::One())) {
+ return matcher_->Value();
+ } else {
+ phi_arc_ = matcher_->Value();
+ phi_arc_.weight = Times(phi_weight_, phi_arc_.weight);
+ if (phi_match_ != kNoLabel) {
+ if (rewrite_both_) {
+ if (phi_arc_.ilabel == phi_label_)
+ phi_arc_.ilabel = phi_match_;
+ if (phi_arc_.olabel == phi_label_)
+ phi_arc_.olabel = phi_match_;
+ } else if (match_type_ == MATCH_INPUT) {
+ phi_arc_.ilabel = phi_match_;
+ } else {
+ phi_arc_.olabel = phi_match_;
+ }
+ }
+ return phi_arc_;
+ }
+ }
+
+ void Next() { matcher_->Next(); }
+
+ virtual const FST &GetFst() const { return matcher_->GetFst(); }
+
+ virtual uint64 Properties(uint64 props) const;
+
+private:
+ virtual void SetState_(StateId s) { SetState(s); }
+ virtual bool Find_(Label label) { return Find(label); }
+ virtual bool Done_() const { return Done(); }
+ virtual const Arc& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+
+ M *matcher_;
+ MatchType match_type_; // Type of match requested
+ Label phi_label_; // Label that represents the phi transition
+ bool rewrite_both_; // Rewrite both sides when both are 'phi_label_'
+ bool has_phi_; // Are there possibly phis at the current state?
+ Label phi_match_; // Current label that matches phi loop
+ mutable Arc phi_arc_; // Arc to return
+ StateId state_; // State where looking for matches
+ Weight phi_weight_; // Product of the weights of phi transitions taken
+ bool phi_loop_; // When true, phi self-loop are allowed and treated
+ // as rho (required for Aho-Corasick)
+ bool error_; // Error encountered
+
+ void operator=(const PhiMatcher<M> &); // disallow
+};
+
+template <class M> inline
+bool PhiMatcher<M>::Find(Label match_label) {
+ if (match_label == phi_label_ && phi_label_ != kNoLabel) {
+ FSTERROR() << "PhiMatcher::Find: bad label (phi)";
+ error_ = true;
+ return false;
+ }
+ matcher_->SetState(state_);
+ phi_match_ = kNoLabel;
+ phi_weight_ = Weight::One();
+ if (!has_phi_ || match_label == 0 || match_label == kNoLabel)
+ return matcher_->Find(match_label);
+ StateId state = state_;
+ while (!matcher_->Find(match_label)) {
+ if (!matcher_->Find(phi_label_))
+ return false;
+ if (phi_loop_ && matcher_->Value().nextstate == state) {
+ phi_match_ = match_label;
+ return true;
+ }
+ phi_weight_ = Times(phi_weight_, matcher_->Value().weight);
+ state = matcher_->Value().nextstate;
+ matcher_->Next();
+ if (!matcher_->Done()) {
+ FSTERROR() << "PhiMatcher: phi non-determinism not supported";
+ error_ = true;
+ }
+ matcher_->SetState(state);
+ }
+ return true;
+}
+
+template <class M> inline
+uint64 PhiMatcher<M>::Properties(uint64 inprops) const {
+ uint64 outprops = matcher_->Properties(inprops);
+ if (error_) outprops |= kError;
+
+ if (match_type_ == MATCH_NONE) {
+ return outprops;
+ } else if (match_type_ == MATCH_INPUT) {
+ if (rewrite_both_) {
+ return outprops & ~(kODeterministic | kNonODeterministic | kString |
+ kILabelSorted | kNotILabelSorted |
+ kOLabelSorted | kNotOLabelSorted);
+ } else {
+ return outprops & ~(kODeterministic | kAcceptor | kString |
+ kILabelSorted | kNotILabelSorted |
+ kOLabelSorted | kNotOLabelSorted);
+ }
+ } else if (match_type_ == MATCH_OUTPUT) {
+ if (rewrite_both_) {
+ return outprops & ~(kIDeterministic | kNonIDeterministic | kString |
+ kILabelSorted | kNotILabelSorted |
+ kOLabelSorted | kNotOLabelSorted);
+ } else {
+ return outprops & ~(kIDeterministic | kAcceptor | kString |
+ kILabelSorted | kNotILabelSorted |
+ kOLabelSorted | kNotOLabelSorted);
+ }
+ } else {
+ // Shouldn't ever get here.
+ FSTERROR() << "PhiMatcher:: bad match type: " << match_type_;
+ return 0;
+ }
+}
+
+
+//
+// MULTI-EPS MATCHER FLAGS
+//
+
+// Return multi-epsilon arcs for Find(kNoLabel).
+const uint32 kMultiEpsList = 0x00000001;
+
+// Return a kNolabel loop for Find(multi_eps).
+const uint32 kMultiEpsLoop = 0x00000002;
+
+// MultiEpsMatcher: allows treating multiple non-0 labels as
+// non-consuming labels in addition to 0 that is always
+// non-consuming. Precise behavior controlled by 'flags' argument. By
+// default, the underlying matcher is constructed by
+// MultiEpsMatcher. The user can instead pass in this object; in that
+// case, MultiEpsMatcher takes its ownership iff 'own_matcher' is
+// true.
+template <class M>
+class MultiEpsMatcher {
+ public:
+ typedef typename M::FST FST;
+ typedef typename M::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ MultiEpsMatcher(const FST &fst, MatchType match_type,
+ uint32 flags = (kMultiEpsLoop | kMultiEpsList),
+ M *matcher = 0, bool own_matcher = true)
+ : matcher_(matcher ? matcher : new M(fst, match_type)),
+ flags_(flags),
+ own_matcher_(matcher ? own_matcher : true) {
+ if (match_type == MATCH_INPUT) {
+ loop_.ilabel = kNoLabel;
+ loop_.olabel = 0;
+ } else {
+ loop_.ilabel = 0;
+ loop_.olabel = kNoLabel;
+ }
+ loop_.weight = Weight::One();
+ loop_.nextstate = kNoStateId;
+ }
+
+ MultiEpsMatcher(const MultiEpsMatcher<M> &matcher, bool safe = false)
+ : matcher_(new M(*matcher.matcher_, safe)),
+ flags_(matcher.flags_),
+ own_matcher_(true),
+ multi_eps_labels_(matcher.multi_eps_labels_),
+ loop_(matcher.loop_) {
+ loop_.nextstate = kNoStateId;
+ }
+
+ ~MultiEpsMatcher() {
+ if (own_matcher_)
+ delete matcher_;
+ }
+
+ MultiEpsMatcher<M> *Copy(bool safe = false) const {
+ return new MultiEpsMatcher<M>(*this, safe);
+ }
+
+ MatchType Type(bool test) const { return matcher_->Type(test); }
+
+ void SetState(StateId s) {
+ matcher_->SetState(s);
+ loop_.nextstate = s;
+ }
+
+ bool Find(Label match_label);
+
+ bool Done() const {
+ return done_;
+ }
+
+ const Arc& Value() const {
+ return current_loop_ ? loop_ : matcher_->Value();
+ }
+
+ void Next() {
+ if (!current_loop_) {
+ matcher_->Next();
+ done_ = matcher_->Done();
+ if (done_ && multi_eps_iter_ != multi_eps_labels_.End()) {
+ ++multi_eps_iter_;
+ while ((multi_eps_iter_ != multi_eps_labels_.End()) &&
+ !matcher_->Find(*multi_eps_iter_))
+ ++multi_eps_iter_;
+ if (multi_eps_iter_ != multi_eps_labels_.End())
+ done_ = false;
+ else
+ done_ = !matcher_->Find(kNoLabel);
+
+ }
+ } else {
+ done_ = true;
+ }
+ }
+
+ const FST &GetFst() const { return matcher_->GetFst(); }
+
+ uint64 Properties(uint64 props) const { return matcher_->Properties(props); }
+
+ uint32 Flags() const { return matcher_->Flags(); }
+
+ void AddMultiEpsLabel(Label label) {
+ if (label == 0) {
+ FSTERROR() << "MultiEpsMatcher: Bad multi-eps label: 0";
+ } else {
+ multi_eps_labels_.Insert(label);
+ }
+ }
+
+ void ClearMultiEpsLabels() {
+ multi_eps_labels_.Clear();
+ }
+
+private:
+ // Specialized for 'set' - log lookup
+ bool IsMultiEps(const set<Label> &multi_eps_labels, Label label) const {
+ return multi_eps_labels.Find(label) != multi_eps_labels.end();
+ }
+
+ M *matcher_;
+ uint32 flags_;
+ bool own_matcher_; // Does this class delete the matcher?
+
+ // Multi-eps label set
+ CompactSet<Label, kNoLabel> multi_eps_labels_;
+ typename CompactSet<Label, kNoLabel>::const_iterator multi_eps_iter_;
+
+ bool current_loop_; // Current arc is the implicit loop
+ mutable Arc loop_; // For non-consuming symbols
+ bool done_; // Matching done
+
+ void operator=(const MultiEpsMatcher<M> &); // Disallow
+};
+
+template <class M> inline
+bool MultiEpsMatcher<M>::Find(Label match_label) {
+ multi_eps_iter_ = multi_eps_labels_.End();
+ current_loop_ = false;
+ bool ret;
+ if (match_label == 0) {
+ ret = matcher_->Find(0);
+ } else if (match_label == kNoLabel) {
+ if (flags_ & kMultiEpsList) {
+ // return all non-consuming arcs (incl. epsilon)
+ multi_eps_iter_ = multi_eps_labels_.Begin();
+ while ((multi_eps_iter_ != multi_eps_labels_.End()) &&
+ !matcher_->Find(*multi_eps_iter_))
+ ++multi_eps_iter_;
+ if (multi_eps_iter_ != multi_eps_labels_.End())
+ ret = true;
+ else
+ ret = matcher_->Find(kNoLabel);
+ } else {
+ // return all epsilon arcs
+ ret = matcher_->Find(kNoLabel);
+ }
+ } else if ((flags_ & kMultiEpsLoop) &&
+ multi_eps_labels_.Find(match_label) != multi_eps_labels_.End()) {
+ // return 'implicit' loop
+ current_loop_ = true;
+ ret = true;
+ } else {
+ ret = matcher_->Find(match_label);
+ }
+ done_ = !ret;
+ return ret;
+}
+
+
+// Generic matcher, templated on the FST definition
+// - a wrapper around pointer to specific one.
+// Here is a typical use: \code
+// Matcher<StdFst> matcher(fst, MATCH_INPUT);
+// matcher.SetState(state);
+// if (matcher.Find(label))
+// for (; !matcher.Done(); matcher.Next()) {
+// StdArc &arc = matcher.Value();
+// ...
+// } \endcode
+template <class F>
+class Matcher {
+ public:
+ typedef F FST;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ Matcher(const F &fst, MatchType match_type) {
+ base_ = fst.InitMatcher(match_type);
+ if (!base_)
+ base_ = new SortedMatcher<F>(fst, match_type);
+ }
+
+ Matcher(const Matcher<F> &matcher, bool safe = false) {
+ base_ = matcher.base_->Copy(safe);
+ }
+
+ // Takes ownership of the provided matcher
+ Matcher(MatcherBase<Arc>* base_matcher) { base_ = base_matcher; }
+
+ ~Matcher() { delete base_; }
+
+ Matcher<F> *Copy(bool safe = false) const {
+ return new Matcher<F>(*this, safe);
+ }
+
+ MatchType Type(bool test) const { return base_->Type(test); }
+ void SetState(StateId s) { base_->SetState(s); }
+ bool Find(Label label) { return base_->Find(label); }
+ bool Done() const { return base_->Done(); }
+ const Arc& Value() const { return base_->Value(); }
+ void Next() { base_->Next(); }
+ const F &GetFst() const { return static_cast<const F &>(base_->GetFst()); }
+ uint64 Properties(uint64 props) const { return base_->Properties(props); }
+ uint32 Flags() const { return base_->Flags() & kMatcherFlags; }
+
+ private:
+ MatcherBase<Arc> *base_;
+
+ void operator=(const Matcher<Arc> &); // disallow
+};
+
+} // namespace fst
+
+
+
+#endif // FST_LIB_MATCHER_H__