aboutsummaryrefslogtreecommitdiff
path: root/src/include/fst
diff options
context:
space:
mode:
authorIan Hodson <idh@google.com>2012-05-30 21:27:06 +0100
committerIan Hodson <idh@google.com>2012-05-30 22:47:36 +0100
commitf4c12fce1ee58e670f9c3fce46c40296ba9ee8a2 (patch)
treeb131ed907f9b2d5af09c0983b651e9e69bc6aab9 /src/include/fst
parenta92766f0a6ba4fac46cd6fd3856ef20c3b204f0d (diff)
downloadopenfst-f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2.tar.gz
Moved from GoogleTTS Change-Id: I6bc6bdadaa53bd0f810b88443339f6d899502cc8
Diffstat (limited to 'src/include/fst')
-rw-r--r--src/include/fst/accumulator.h745
-rw-r--r--src/include/fst/add-on.h306
-rw-r--r--src/include/fst/arc-map.h1146
-rw-r--r--src/include/fst/arc.h306
-rw-r--r--src/include/fst/arcfilter.h99
-rw-r--r--src/include/fst/arcsort.h203
-rw-r--r--src/include/fst/bi-table.h396
-rw-r--r--src/include/fst/cache.h738
-rw-r--r--src/include/fst/closure.h155
-rw-r--r--src/include/fst/compact-fst.h1307
-rw-r--r--src/include/fst/compat.h152
-rw-r--r--src/include/fst/complement.h338
-rw-r--r--src/include/fst/compose-filter.h542
-rw-r--r--src/include/fst/compose.h673
-rw-r--r--src/include/fst/concat.h246
-rw-r--r--src/include/fst/config.h12
-rw-r--r--src/include/fst/config.h.in11
-rw-r--r--src/include/fst/connect.h319
-rw-r--r--src/include/fst/const-fst.h483
-rw-r--r--src/include/fst/determinize.h887
-rw-r--r--src/include/fst/dfs-visit.h204
-rw-r--r--src/include/fst/difference.h189
-rw-r--r--src/include/fst/edit-fst.h774
-rw-r--r--src/include/fst/encode.h599
-rw-r--r--src/include/fst/epsnormalize.h74
-rw-r--r--src/include/fst/equal.h124
-rw-r--r--src/include/fst/equivalent.h274
-rw-r--r--src/include/fst/expanded-fst.h189
-rw-r--r--src/include/fst/expectation-weight.h142
-rw-r--r--src/include/fst/extensions/far/compile-strings.h271
-rw-r--r--src/include/fst/extensions/far/create.h87
-rw-r--r--src/include/fst/extensions/far/extract.h85
-rw-r--r--src/include/fst/extensions/far/far.h360
-rw-r--r--src/include/fst/extensions/far/farlib.h31
-rw-r--r--src/include/fst/extensions/far/farscript.h234
-rw-r--r--src/include/fst/extensions/far/info.h128
-rw-r--r--src/include/fst/extensions/far/main.h43
-rw-r--r--src/include/fst/extensions/far/print-strings.h126
-rw-r--r--src/include/fst/extensions/far/stlist.h304
-rw-r--r--src/include/fst/extensions/far/sttable.h370
-rw-r--r--src/include/fst/extensions/pdt/collection.h122
-rw-r--r--src/include/fst/extensions/pdt/compose.h146
-rw-r--r--src/include/fst/extensions/pdt/expand.h975
-rw-r--r--src/include/fst/extensions/pdt/info.h175
-rw-r--r--src/include/fst/extensions/pdt/paren.h496
-rw-r--r--src/include/fst/extensions/pdt/pdt.h212
-rw-r--r--src/include/fst/extensions/pdt/pdtlib.h30
-rw-r--r--src/include/fst/extensions/pdt/pdtscript.h284
-rw-r--r--src/include/fst/extensions/pdt/replace.h192
-rw-r--r--src/include/fst/extensions/pdt/reverse.h58
-rw-r--r--src/include/fst/extensions/pdt/shortest-path.h790
-rw-r--r--src/include/fst/factor-weight.h476
-rw-r--r--src/include/fst/flags.h224
-rw-r--r--src/include/fst/float-weight.h598
-rw-r--r--src/include/fst/fst-decl.h125
-rw-r--r--src/include/fst/fst.h942
-rw-r--r--src/include/fst/fstlib.h151
-rw-r--r--src/include/fst/generic-register.h159
-rw-r--r--src/include/fst/heap.h206
-rw-r--r--src/include/fst/icu.h103
-rw-r--r--src/include/fst/intersect.h172
-rw-r--r--src/include/fst/interval-set.h381
-rw-r--r--src/include/fst/invert.h125
-rw-r--r--src/include/fst/label-reachable.h565
-rw-r--r--src/include/fst/lexicographic-weight.h151
-rw-r--r--src/include/fst/lock.h81
-rw-r--r--src/include/fst/log.h66
-rw-r--r--src/include/fst/lookahead-filter.h698
-rw-r--r--src/include/fst/lookahead-matcher.h813
-rw-r--r--src/include/fst/map.h121
-rw-r--r--src/include/fst/matcher-fst.h359
-rw-r--r--src/include/fst/matcher.h1116
-rw-r--r--src/include/fst/minimize.h584
-rw-r--r--src/include/fst/mutable-fst.h378
-rw-r--r--src/include/fst/pair-weight.h280
-rw-r--r--src/include/fst/partition.h290
-rw-r--r--src/include/fst/power-weight.h159
-rw-r--r--src/include/fst/product-weight.h115
-rw-r--r--src/include/fst/project.h148
-rw-r--r--src/include/fst/properties.h460
-rw-r--r--src/include/fst/prune.h339
-rw-r--r--src/include/fst/push.h175
-rw-r--r--src/include/fst/queue.h889
-rw-r--r--src/include/fst/randequivalent.h135
-rw-r--r--src/include/fst/randgen.h712
-rw-r--r--src/include/fst/random-weight.h348
-rw-r--r--src/include/fst/rational.h330
-rw-r--r--src/include/fst/register.h132
-rw-r--r--src/include/fst/relabel.h524
-rw-r--r--src/include/fst/replace-util.h550
-rw-r--r--src/include/fst/replace.h1453
-rw-r--r--src/include/fst/reverse.h91
-rw-r--r--src/include/fst/reweight.h146
-rw-r--r--src/include/fst/rmepsilon.h601
-rw-r--r--src/include/fst/rmfinalepsilon.h107
-rw-r--r--src/include/fst/script/arcsort.h49
-rw-r--r--src/include/fst/script/arg-packs.h240
-rw-r--r--src/include/fst/script/closure.h41
-rw-r--r--src/include/fst/script/compile-impl.h215
-rw-r--r--src/include/fst/script/compile.h92
-rw-r--r--src/include/fst/script/compose.h63
-rw-r--r--src/include/fst/script/concat.h54
-rw-r--r--src/include/fst/script/connect.h45
-rw-r--r--src/include/fst/script/convert.h49
-rw-r--r--src/include/fst/script/decode.h46
-rw-r--r--src/include/fst/script/determinize.h68
-rw-r--r--src/include/fst/script/difference.h67
-rw-r--r--src/include/fst/script/draw-impl.h234
-rw-r--r--src/include/fst/script/draw.h113
-rw-r--r--src/include/fst/script/encode.h58
-rw-r--r--src/include/fst/script/epsnormalize.h44
-rw-r--r--src/include/fst/script/equal.h45
-rw-r--r--src/include/fst/script/equivalent.h47
-rw-r--r--src/include/fst/script/fst-class.h343
-rw-r--r--src/include/fst/script/fstscript-decl.h35
-rw-r--r--src/include/fst/script/fstscript.h154
-rw-r--r--src/include/fst/script/info-impl.h325
-rw-r--r--src/include/fst/script/info.h48
-rw-r--r--src/include/fst/script/intersect.h65
-rw-r--r--src/include/fst/script/invert.h43
-rw-r--r--src/include/fst/script/map.h115
-rw-r--r--src/include/fst/script/minimize.h45
-rw-r--r--src/include/fst/script/print-impl.h149
-rw-r--r--src/include/fst/script/print.h86
-rw-r--r--src/include/fst/script/project.h43
-rw-r--r--src/include/fst/script/prune.h153
-rw-r--r--src/include/fst/script/push.h70
-rw-r--r--src/include/fst/script/randequivalent.h105
-rw-r--r--src/include/fst/script/randgen.h76
-rw-r--r--src/include/fst/script/register.h120
-rw-r--r--src/include/fst/script/relabel.h102
-rw-r--r--src/include/fst/script/replace.h62
-rw-r--r--src/include/fst/script/reverse.h42
-rw-r--r--src/include/fst/script/reweight.h53
-rw-r--r--src/include/fst/script/rmepsilon.h211
-rw-r--r--src/include/fst/script/script-impl.h206
-rw-r--r--src/include/fst/script/shortest-distance.h250
-rw-r--r--src/include/fst/script/shortest-path.h190
-rw-r--r--src/include/fst/script/symbols.h20
-rw-r--r--src/include/fst/script/synchronize.h42
-rw-r--r--src/include/fst/script/text-io.h50
-rw-r--r--src/include/fst/script/topsort.h40
-rw-r--r--src/include/fst/script/union.h42
-rw-r--r--src/include/fst/script/verify.h40
-rw-r--r--src/include/fst/script/weight-class.h216
-rw-r--r--src/include/fst/shortest-distance.h347
-rw-r--r--src/include/fst/shortest-path.h501
-rw-r--r--src/include/fst/signed-log-weight.h367
-rw-r--r--src/include/fst/slist.h61
-rw-r--r--src/include/fst/sparse-power-weight.h225
-rw-r--r--src/include/fst/sparse-tuple-weight.h640
-rw-r--r--src/include/fst/state-map.h601
-rw-r--r--src/include/fst/state-reachable.h198
-rw-r--r--src/include/fst/state-table.h469
-rw-r--r--src/include/fst/statesort.h97
-rw-r--r--src/include/fst/string-weight.h560
-rw-r--r--src/include/fst/string.h247
-rw-r--r--src/include/fst/symbol-table-ops.h91
-rw-r--r--src/include/fst/symbol-table.h507
-rw-r--r--src/include/fst/synchronize.h457
-rw-r--r--src/include/fst/test-properties.h246
-rw-r--r--src/include/fst/topsort.h112
-rw-r--r--src/include/fst/tuple-weight.h332
-rw-r--r--src/include/fst/types.h38
-rw-r--r--src/include/fst/union-find.h110
-rw-r--r--src/include/fst/union.h185
-rw-r--r--src/include/fst/util.h409
-rw-r--r--src/include/fst/vector-fst.h727
-rw-r--r--src/include/fst/verify.h126
-rw-r--r--src/include/fst/visit.h270
-rw-r--r--src/include/fst/weight.h179
171 files changed, 46943 insertions, 0 deletions
diff --git a/src/include/fst/accumulator.h b/src/include/fst/accumulator.h
new file mode 100644
index 0000000..fcb960c
--- /dev/null
+++ b/src/include/fst/accumulator.h
@@ -0,0 +1,745 @@
+// accumulator.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Classes to accumulate arc weights. Useful for weight lookahead.
+
+#ifndef FST_LIB_ACCUMULATOR_H__
+#define FST_LIB_ACCUMULATOR_H__
+
+#include <algorithm>
+#include <functional>
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <vector>
+using std::vector;
+
+#include <fst/arcfilter.h>
+#include <fst/arcsort.h>
+#include <fst/dfs-visit.h>
+#include <fst/expanded-fst.h>
+#include <fst/replace.h>
+
+namespace fst {
+
+// This class accumulates arc weights using the semiring Plus().
+template <class A>
+class DefaultAccumulator {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ DefaultAccumulator() {}
+
+ DefaultAccumulator(const DefaultAccumulator<A> &acc) {}
+
+ void Init(const Fst<A>& fst, bool copy = false) {}
+
+ void SetState(StateId) {}
+
+ Weight Sum(Weight w, Weight v) {
+ return Plus(w, v);
+ }
+
+ template <class ArcIterator>
+ Weight Sum(Weight w, ArcIterator *aiter, ssize_t begin,
+ ssize_t end) {
+ Weight sum = w;
+ aiter->Seek(begin);
+ for (ssize_t pos = begin; pos < end; aiter->Next(), ++pos)
+ sum = Plus(sum, aiter->Value().weight);
+ return sum;
+ }
+
+ bool Error() const { return false; }
+
+ private:
+ void operator=(const DefaultAccumulator<A> &); // Disallow
+};
+
+
+// This class accumulates arc weights using the log semiring Plus()
+// assuming an arc weight has a WeightConvert specialization to
+// and from log64 weights.
+template <class A>
+class LogAccumulator {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ LogAccumulator() {}
+
+ LogAccumulator(const LogAccumulator<A> &acc) {}
+
+ void Init(const Fst<A>& fst, bool copy = false) {}
+
+ void SetState(StateId) {}
+
+ Weight Sum(Weight w, Weight v) {
+ return LogPlus(w, v);
+ }
+
+ template <class ArcIterator>
+ Weight Sum(Weight w, ArcIterator *aiter, ssize_t begin,
+ ssize_t end) {
+ Weight sum = w;
+ aiter->Seek(begin);
+ for (ssize_t pos = begin; pos < end; aiter->Next(), ++pos)
+ sum = LogPlus(sum, aiter->Value().weight);
+ return sum;
+ }
+
+ bool Error() const { return false; }
+
+ private:
+ double LogPosExp(double x) { return log(1.0F + exp(-x)); }
+
+ Weight LogPlus(Weight w, Weight v) {
+ double f1 = to_log_weight_(w).Value();
+ double f2 = to_log_weight_(v).Value();
+ if (f1 > f2)
+ return to_weight_(f2 - LogPosExp(f1 - f2));
+ else
+ return to_weight_(f1 - LogPosExp(f2 - f1));
+ }
+
+ WeightConvert<Weight, Log64Weight> to_log_weight_;
+ WeightConvert<Log64Weight, Weight> to_weight_;
+
+ void operator=(const LogAccumulator<A> &); // Disallow
+};
+
+
+// Stores shareable data for fast log accumulator copies.
+class FastLogAccumulatorData {
+ public:
+ FastLogAccumulatorData() {}
+
+ vector<double> *Weights() { return &weights_; }
+ vector<ssize_t> *WeightPositions() { return &weight_positions_; }
+ double *WeightEnd() { return &(weights_[weights_.size() - 1]); };
+ int RefCount() const { return ref_count_.count(); }
+ int IncrRefCount() { return ref_count_.Incr(); }
+ int DecrRefCount() { return ref_count_.Decr(); }
+
+ private:
+ // Cummulative weight per state for all states s.t. # of arcs >
+ // arc_limit_ with arcs in order. Special first element per state
+ // being Log64Weight::Zero();
+ vector<double> weights_;
+ // Maps from state to corresponding beginning weight position in
+ // weights_. Position -1 means no pre-computed weights for that
+ // state.
+ vector<ssize_t> weight_positions_;
+ RefCounter ref_count_; // Reference count.
+
+ DISALLOW_COPY_AND_ASSIGN(FastLogAccumulatorData);
+};
+
+
+// This class accumulates arc weights using the log semiring Plus()
+// assuming an arc weight has a WeightConvert specialization to and
+// from log64 weights. The member function Init(fst) has to be called
+// to setup pre-computed weight information.
+template <class A>
+class FastLogAccumulator {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ explicit FastLogAccumulator(ssize_t arc_limit = 20, ssize_t arc_period = 10)
+ : arc_limit_(arc_limit),
+ arc_period_(arc_period),
+ data_(new FastLogAccumulatorData()),
+ error_(false) {}
+
+ FastLogAccumulator(const FastLogAccumulator<A> &acc)
+ : arc_limit_(acc.arc_limit_),
+ arc_period_(acc.arc_period_),
+ data_(acc.data_),
+ error_(acc.error_) {
+ data_->IncrRefCount();
+ }
+
+ ~FastLogAccumulator() {
+ if (!data_->DecrRefCount())
+ delete data_;
+ }
+
+ void SetState(StateId s) {
+ vector<double> &weights = *data_->Weights();
+ vector<ssize_t> &weight_positions = *data_->WeightPositions();
+
+ if (weight_positions.size() <= s) {
+ FSTERROR() << "FastLogAccumulator::SetState: invalid state id.";
+ error_ = true;
+ return;
+ }
+
+ ssize_t pos = weight_positions[s];
+ if (pos >= 0)
+ state_weights_ = &(weights[pos]);
+ else
+ state_weights_ = 0;
+ }
+
+ Weight Sum(Weight w, Weight v) {
+ return LogPlus(w, v);
+ }
+
+ template <class ArcIterator>
+ Weight Sum(Weight w, ArcIterator *aiter, ssize_t begin,
+ ssize_t end) {
+ if (error_) return Weight::NoWeight();
+ Weight sum = w;
+ // Finds begin and end of pre-stored weights
+ ssize_t index_begin = -1, index_end = -1;
+ ssize_t stored_begin = end, stored_end = end;
+ if (state_weights_ != 0) {
+ index_begin = begin > 0 ? (begin - 1)/ arc_period_ + 1 : 0;
+ index_end = end / arc_period_;
+ stored_begin = index_begin * arc_period_;
+ stored_end = index_end * arc_period_;
+ }
+ // Computes sum before pre-stored weights
+ if (begin < stored_begin) {
+ ssize_t pos_end = min(stored_begin, end);
+ aiter->Seek(begin);
+ for (ssize_t pos = begin; pos < pos_end; aiter->Next(), ++pos)
+ sum = LogPlus(sum, aiter->Value().weight);
+ }
+ // Computes sum between pre-stored weights
+ if (stored_begin < stored_end) {
+ sum = LogPlus(sum, LogMinus(state_weights_[index_end],
+ state_weights_[index_begin]));
+ }
+ // Computes sum after pre-stored weights
+ if (stored_end < end) {
+ ssize_t pos_start = max(stored_begin, stored_end);
+ aiter->Seek(pos_start);
+ for (ssize_t pos = pos_start; pos < end; aiter->Next(), ++pos)
+ sum = LogPlus(sum, aiter->Value().weight);
+ }
+ return sum;
+ }
+
+ template <class F>
+ void Init(const F &fst, bool copy = false) {
+ if (copy)
+ return;
+ vector<double> &weights = *data_->Weights();
+ vector<ssize_t> &weight_positions = *data_->WeightPositions();
+ if (!weights.empty() || arc_limit_ < arc_period_) {
+ FSTERROR() << "FastLogAccumulator: initialization error.";
+ error_ = true;
+ return;
+ }
+ weight_positions.reserve(CountStates(fst));
+
+ ssize_t weight_position = 0;
+ for(StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ if (fst.NumArcs(s) >= arc_limit_) {
+ double sum = FloatLimits<double>::kPosInfinity;
+ weight_positions.push_back(weight_position);
+ weights.push_back(sum);
+ ++weight_position;
+ ssize_t narcs = 0;
+ for(ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
+ const A &arc = aiter.Value();
+ sum = LogPlus(sum, arc.weight);
+ // Stores cumulative weight distribution per arc_period_.
+ if (++narcs % arc_period_ == 0) {
+ weights.push_back(sum);
+ ++weight_position;
+ }
+ }
+ } else {
+ weight_positions.push_back(-1);
+ }
+ }
+ }
+
+ bool Error() const { return error_; }
+
+ private:
+ double LogPosExp(double x) {
+ return x == FloatLimits<double>::kPosInfinity ?
+ 0.0 : log(1.0F + exp(-x));
+ }
+
+ double LogMinusExp(double x) {
+ return x == FloatLimits<double>::kPosInfinity ?
+ 0.0 : log(1.0F - exp(-x));
+ }
+
+ Weight LogPlus(Weight w, Weight v) {
+ double f1 = to_log_weight_(w).Value();
+ double f2 = to_log_weight_(v).Value();
+ if (f1 > f2)
+ return to_weight_(f2 - LogPosExp(f1 - f2));
+ else
+ return to_weight_(f1 - LogPosExp(f2 - f1));
+ }
+
+ double LogPlus(double f1, Weight v) {
+ double f2 = to_log_weight_(v).Value();
+ if (f1 == FloatLimits<double>::kPosInfinity)
+ return f2;
+ else if (f1 > f2)
+ return f2 - LogPosExp(f1 - f2);
+ else
+ return f1 - LogPosExp(f2 - f1);
+ }
+
+ Weight LogMinus(double f1, double f2) {
+ if (f1 >= f2) {
+ FSTERROR() << "FastLogAcumulator::LogMinus: f1 >= f2 with f1 = " << f1
+ << " and f2 = " << f2;
+ error_ = true;
+ return Weight::NoWeight();
+ }
+ if (f2 == FloatLimits<double>::kPosInfinity)
+ return to_weight_(f1);
+ else
+ return to_weight_(f1 - LogMinusExp(f2 - f1));
+ }
+
+ WeightConvert<Weight, Log64Weight> to_log_weight_;
+ WeightConvert<Log64Weight, Weight> to_weight_;
+
+ ssize_t arc_limit_; // Minimum # of arcs to pre-compute state
+ ssize_t arc_period_; // Save cumulative weights per 'arc_period_'.
+ bool init_; // Cumulative weights initialized?
+ FastLogAccumulatorData *data_;
+ double *state_weights_;
+ bool error_;
+
+ void operator=(const FastLogAccumulator<A> &); // Disallow
+};
+
+
+// Stores shareable data for cache log accumulator copies.
+// All copies share the same cache.
+template <class A>
+class CacheLogAccumulatorData {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ CacheLogAccumulatorData(bool gc, size_t gc_limit)
+ : cache_gc_(gc), cache_limit_(gc_limit), cache_size_(0) {}
+
+ ~CacheLogAccumulatorData() {
+ for(typename unordered_map<StateId, CacheState>::iterator it = cache_.begin();
+ it != cache_.end();
+ ++it)
+ delete it->second.weights;
+ }
+
+ bool CacheDisabled() const { return cache_gc_ && cache_limit_ == 0; }
+
+ vector<double> *GetWeights(StateId s) {
+ typename unordered_map<StateId, CacheState>::iterator it = cache_.find(s);
+ if (it != cache_.end()) {
+ it->second.recent = true;
+ return it->second.weights;
+ } else {
+ return 0;
+ }
+ }
+
+ void AddWeights(StateId s, vector<double> *weights) {
+ if (cache_gc_ && cache_size_ >= cache_limit_)
+ GC(false);
+ cache_.insert(make_pair(s, CacheState(weights, true)));
+ if (cache_gc_)
+ cache_size_ += weights->capacity() * sizeof(double);
+ }
+
+ int RefCount() const { return ref_count_.count(); }
+ int IncrRefCount() { return ref_count_.Incr(); }
+ int DecrRefCount() { return ref_count_.Decr(); }
+
+ private:
+ // Cached information for a given state.
+ struct CacheState {
+ vector<double>* weights; // Accumulated weights for this state.
+ bool recent; // Has this state been accessed since last GC?
+
+ CacheState(vector<double> *w, bool r) : weights(w), recent(r) {}
+ };
+
+ // Garbage collect: Delete from cache states that have not been
+ // accessed since the last GC ('free_recent = false') until
+ // 'cache_size_' is 2/3 of 'cache_limit_'. If it does not free enough
+ // memory, start deleting recently accessed states.
+ void GC(bool free_recent) {
+ size_t cache_target = (2 * cache_limit_)/3 + 1;
+ typename unordered_map<StateId, CacheState>::iterator it = cache_.begin();
+ while (it != cache_.end() && cache_size_ > cache_target) {
+ CacheState &cs = it->second;
+ if (free_recent || !cs.recent) {
+ cache_size_ -= cs.weights->capacity() * sizeof(double);
+ delete cs.weights;
+ cache_.erase(it++);
+ } else {
+ cs.recent = false;
+ ++it;
+ }
+ }
+ if (!free_recent && cache_size_ > cache_target)
+ GC(true);
+ }
+
+ unordered_map<StateId, CacheState> cache_; // Cache
+ bool cache_gc_; // Enable garbage collection
+ size_t cache_limit_; // # of bytes cached
+ size_t cache_size_; // # of bytes allowed before GC
+ RefCounter ref_count_;
+
+ DISALLOW_COPY_AND_ASSIGN(CacheLogAccumulatorData);
+};
+
+// This class accumulates arc weights using the log semiring Plus()
+// has a WeightConvert specialization to and from log64 weights. It
+// is similar to the FastLogAccumator. However here, the accumulated
+// weights are pre-computed and stored only for the states that are
+// visited. The member function Init(fst) has to be called to setup
+// this accumulator.
+template <class A>
+class CacheLogAccumulator {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ explicit CacheLogAccumulator(ssize_t arc_limit = 10, bool gc = false,
+ size_t gc_limit = 10 * 1024 * 1024)
+ : arc_limit_(arc_limit), fst_(0), data_(
+ new CacheLogAccumulatorData<A>(gc, gc_limit)), s_(kNoStateId),
+ error_(false) {}
+
+ CacheLogAccumulator(const CacheLogAccumulator<A> &acc)
+ : arc_limit_(acc.arc_limit_), fst_(acc.fst_ ? acc.fst_->Copy() : 0),
+ data_(acc.data_), s_(kNoStateId), error_(acc.error_) {
+ data_->IncrRefCount();
+ }
+
+ ~CacheLogAccumulator() {
+ if (fst_)
+ delete fst_;
+ if (!data_->DecrRefCount())
+ delete data_;
+ }
+
+ // Arg 'arc_limit' specifies minimum # of arcs to pre-compute state.
+ void Init(const Fst<A> &fst, bool copy = false) {
+ if (copy) {
+ delete fst_;
+ } else if (fst_) {
+ FSTERROR() << "CacheLogAccumulator: initialization error.";
+ error_ = true;
+ return;
+ }
+ fst_ = fst.Copy();
+ }
+
+ void SetState(StateId s, int depth = 0) {
+ if (s == s_)
+ return;
+ s_ = s;
+
+ if (data_->CacheDisabled() || error_) {
+ weights_ = 0;
+ return;
+ }
+
+ if (!fst_) {
+ FSTERROR() << "CacheLogAccumulator::SetState: incorrectly initialized.";
+ error_ = true;
+ weights_ = 0;
+ return;
+ }
+
+ weights_ = data_->GetWeights(s);
+ if ((weights_ == 0) && (fst_->NumArcs(s) >= arc_limit_)) {
+ weights_ = new vector<double>;
+ weights_->reserve(fst_->NumArcs(s) + 1);
+ weights_->push_back(FloatLimits<double>::kPosInfinity);
+ data_->AddWeights(s, weights_);
+ }
+ }
+
+ Weight Sum(Weight w, Weight v) {
+ return LogPlus(w, v);
+ }
+
+ template <class Iterator>
+ Weight Sum(Weight w, Iterator *aiter, ssize_t begin,
+ ssize_t end) {
+ if (weights_ == 0) {
+ Weight sum = w;
+ aiter->Seek(begin);
+ for (ssize_t pos = begin; pos < end; aiter->Next(), ++pos)
+ sum = LogPlus(sum, aiter->Value().weight);
+ return sum;
+ } else {
+ if (weights_->size() <= end)
+ for (aiter->Seek(weights_->size() - 1);
+ weights_->size() <= end;
+ aiter->Next())
+ weights_->push_back(LogPlus(weights_->back(),
+ aiter->Value().weight));
+ return LogPlus(w, LogMinus((*weights_)[end], (*weights_)[begin]));
+ }
+ }
+
+ template <class Iterator>
+ size_t LowerBound(double w, Iterator *aiter) {
+ if (weights_ != 0) {
+ return lower_bound(weights_->begin() + 1,
+ weights_->end(),
+ w,
+ std::greater<double>())
+ - weights_->begin() - 1;
+ } else {
+ size_t n = 0;
+ double x = FloatLimits<double>::kPosInfinity;
+ for(aiter->Reset(); !aiter->Done(); aiter->Next(), ++n) {
+ x = LogPlus(x, aiter->Value().weight);
+ if (x < w) break;
+ }
+ return n;
+ }
+ }
+
+ bool Error() const { return error_; }
+
+ private:
+ double LogPosExp(double x) {
+ return x == FloatLimits<double>::kPosInfinity ?
+ 0.0 : log(1.0F + exp(-x));
+ }
+
+ double LogMinusExp(double x) {
+ return x == FloatLimits<double>::kPosInfinity ?
+ 0.0 : log(1.0F - exp(-x));
+ }
+
+ Weight LogPlus(Weight w, Weight v) {
+ double f1 = to_log_weight_(w).Value();
+ double f2 = to_log_weight_(v).Value();
+ if (f1 > f2)
+ return to_weight_(f2 - LogPosExp(f1 - f2));
+ else
+ return to_weight_(f1 - LogPosExp(f2 - f1));
+ }
+
+ double LogPlus(double f1, Weight v) {
+ double f2 = to_log_weight_(v).Value();
+ if (f1 == FloatLimits<double>::kPosInfinity)
+ return f2;
+ else if (f1 > f2)
+ return f2 - LogPosExp(f1 - f2);
+ else
+ return f1 - LogPosExp(f2 - f1);
+ }
+
+ Weight LogMinus(double f1, double f2) {
+ if (f1 >= f2) {
+ FSTERROR() << "CacheLogAcumulator::LogMinus: f1 >= f2 with f1 = " << f1
+ << " and f2 = " << f2;
+ error_ = true;
+ return Weight::NoWeight();
+ }
+ if (f2 == FloatLimits<double>::kPosInfinity)
+ return to_weight_(f1);
+ else
+ return to_weight_(f1 - LogMinusExp(f2 - f1));
+ }
+
+ WeightConvert<Weight, Log64Weight> to_log_weight_;
+ WeightConvert<Log64Weight, Weight> to_weight_;
+
+ ssize_t arc_limit_; // Minimum # of arcs to cache a state
+ vector<double> *weights_; // Accumulated weights for cur. state
+ const Fst<A>* fst_; // Input fst
+ CacheLogAccumulatorData<A> *data_; // Cache data
+ StateId s_; // Current state
+ bool error_;
+
+ void operator=(const CacheLogAccumulator<A> &); // Disallow
+};
+
+
+// Stores shareable data for replace accumulator copies.
+template <class Accumulator, class T>
+class ReplaceAccumulatorData {
+ public:
+ typedef typename Accumulator::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef T StateTable;
+ typedef typename T::StateTuple StateTuple;
+
+ ReplaceAccumulatorData() : state_table_(0) {}
+
+ ReplaceAccumulatorData(const vector<Accumulator*> &accumulators)
+ : state_table_(0), accumulators_(accumulators) {}
+
+ ~ReplaceAccumulatorData() {
+ for (size_t i = 0; i < fst_array_.size(); ++i)
+ delete fst_array_[i];
+ for (size_t i = 0; i < accumulators_.size(); ++i)
+ delete accumulators_[i];
+ }
+
+ void Init(const vector<pair<Label, const Fst<Arc>*> > &fst_tuples,
+ const StateTable *state_table) {
+ state_table_ = state_table;
+ accumulators_.resize(fst_tuples.size());
+ for (size_t i = 0; i < accumulators_.size(); ++i) {
+ if (!accumulators_[i])
+ accumulators_[i] = new Accumulator;
+ accumulators_[i]->Init(*(fst_tuples[i].second));
+ fst_array_.push_back(fst_tuples[i].second->Copy());
+ }
+ }
+
+ const StateTuple &GetTuple(StateId s) const {
+ return state_table_->Tuple(s);
+ }
+
+ Accumulator *GetAccumulator(size_t i) { return accumulators_[i]; }
+
+ const Fst<Arc> *GetFst(size_t i) const { return fst_array_[i]; }
+
+ int RefCount() const { return ref_count_.count(); }
+ int IncrRefCount() { return ref_count_.Incr(); }
+ int DecrRefCount() { return ref_count_.Decr(); }
+
+ private:
+ const T * state_table_;
+ vector<Accumulator*> accumulators_;
+ vector<const Fst<Arc>*> fst_array_;
+ RefCounter ref_count_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReplaceAccumulatorData);
+};
+
+// This class accumulates weights in a ReplaceFst. The 'Init' method
+// takes as input the argument used to build the ReplaceFst and the
+// ReplaceFst state table. It uses accumulators of type 'Accumulator'
+// in the underlying FSTs.
+template <class Accumulator,
+ class T = DefaultReplaceStateTable<typename Accumulator::Arc> >
+class ReplaceAccumulator {
+ public:
+ typedef typename Accumulator::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+ typedef T StateTable;
+ typedef typename T::StateTuple StateTuple;
+
+ ReplaceAccumulator()
+ : init_(false), data_(new ReplaceAccumulatorData<Accumulator, T>()),
+ error_(false) {}
+
+ ReplaceAccumulator(const vector<Accumulator*> &accumulators)
+ : init_(false),
+ data_(new ReplaceAccumulatorData<Accumulator, T>(accumulators)),
+ error_(false) {}
+
+ ReplaceAccumulator(const ReplaceAccumulator<Accumulator, T> &acc)
+ : init_(acc.init_), data_(acc.data_), error_(acc.error_) {
+ if (!init_)
+ FSTERROR() << "ReplaceAccumulator: can't copy unintialized accumulator";
+ data_->IncrRefCount();
+ }
+
+ ~ReplaceAccumulator() {
+ if (!data_->DecrRefCount())
+ delete data_;
+ }
+
+ // Does not take ownership of the state table, the state table
+ // is own by the ReplaceFst
+ void Init(const vector<pair<Label, const Fst<Arc>*> > &fst_tuples,
+ const StateTable *state_table) {
+ init_ = true;
+ data_->Init(fst_tuples, state_table);
+ }
+
+ void SetState(StateId s) {
+ if (!init_) {
+ FSTERROR() << "ReplaceAccumulator::SetState: incorrectly initialized.";
+ error_ = true;
+ return;
+ }
+ StateTuple tuple = data_->GetTuple(s);
+ fst_id_ = tuple.fst_id - 1; // Replace FST ID is 1-based
+ data_->GetAccumulator(fst_id_)->SetState(tuple.fst_state);
+ if ((tuple.prefix_id != 0) &&
+ (data_->GetFst(fst_id_)->Final(tuple.fst_state) != Weight::Zero())) {
+ offset_ = 1;
+ offset_weight_ = data_->GetFst(fst_id_)->Final(tuple.fst_state);
+ } else {
+ offset_ = 0;
+ offset_weight_ = Weight::Zero();
+ }
+ }
+
+ Weight Sum(Weight w, Weight v) {
+ if (error_) return Weight::NoWeight();
+ return data_->GetAccumulator(fst_id_)->Sum(w, v);
+ }
+
+ template <class ArcIterator>
+ Weight Sum(Weight w, ArcIterator *aiter, ssize_t begin,
+ ssize_t end) {
+ if (error_) return Weight::NoWeight();
+ Weight sum = begin == end ? Weight::Zero()
+ : data_->GetAccumulator(fst_id_)->Sum(
+ w, aiter, begin ? begin - offset_ : 0, end - offset_);
+ if (begin == 0 && end != 0 && offset_ > 0)
+ sum = Sum(offset_weight_, sum);
+ return sum;
+ }
+
+ bool Error() const { return error_; }
+
+ private:
+ bool init_;
+ ReplaceAccumulatorData<Accumulator, T> *data_;
+ Label fst_id_;
+ size_t offset_;
+ Weight offset_weight_;
+ bool error_;
+
+ void operator=(const ReplaceAccumulator<Accumulator, T> &); // Disallow
+};
+
+} // namespace fst
+
+#endif // FST_LIB_ACCUMULATOR_H__
diff --git a/src/include/fst/add-on.h b/src/include/fst/add-on.h
new file mode 100644
index 0000000..ee21a93
--- /dev/null
+++ b/src/include/fst/add-on.h
@@ -0,0 +1,306 @@
+// add-on.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Fst implementation class to attach an arbitrary object with a
+// read/write method to an FST and its file rep. The FST is given a
+// new type name.
+
+#ifndef FST_LIB_ADD_ON_FST_H__
+#define FST_LIB_ADD_ON_FST_H__
+
+#include <stddef.h>
+#include <string>
+
+#include <fst/fst.h>
+
+
+namespace fst {
+
+// Identifies stream data as an add-on fst.
+static const int32 kAddOnMagicNumber = 446681434;
+
+
+//
+// Some useful add-on objects.
+//
+
+// Nothing to save.
+class NullAddOn {
+ public:
+ NullAddOn() {}
+
+ static NullAddOn *Read(istream &istrm) {
+ return new NullAddOn();
+ };
+
+ bool Write(ostream &ostrm) const { return true; }
+
+ int RefCount() const { return ref_count_.count(); }
+ int IncrRefCount() { return ref_count_.Incr(); }
+ int DecrRefCount() { return ref_count_.Decr(); }
+
+ private:
+ RefCounter ref_count_;
+
+ DISALLOW_COPY_AND_ASSIGN(NullAddOn);
+};
+
+
+// Create a new add-on from a pair of add-ons.
+template <class A1, class A2>
+class AddOnPair {
+ public:
+ // Argument reference count incremented.
+ AddOnPair(A1 *a1, A2 *a2)
+ : a1_(a1), a2_(a2) {
+ if (a1_)
+ a1_->IncrRefCount();
+ if (a2_)
+ a2_->IncrRefCount();
+ }
+
+ ~AddOnPair() {
+ if (a1_ && !a1_->DecrRefCount())
+ delete a1_;
+ if (a2_ && !a2_->DecrRefCount())
+ delete a2_;
+ }
+
+ A1 *First() const { return a1_; }
+ A2 *Second() const { return a2_; }
+
+ static AddOnPair<A1, A2> *Read(istream &istrm) {
+ A1 *a1 = 0;
+ bool have_addon1 = false;
+ ReadType(istrm, &have_addon1);
+ if (have_addon1)
+ a1 = A1::Read(istrm);
+
+ A2 *a2 = 0;
+ bool have_addon2 = false;
+ ReadType(istrm, &have_addon2);
+ if (have_addon2)
+ a2 = A2::Read(istrm);
+
+ AddOnPair<A1, A2> *a = new AddOnPair<A1, A2>(a1, a2);
+ if (a1)
+ a1->DecrRefCount();
+ if (a2)
+ a2->DecrRefCount();
+ return a;
+ };
+
+ bool Write(ostream &ostrm) const {
+ bool have_addon1 = a1_;
+ WriteType(ostrm, have_addon1);
+ if (have_addon1)
+ a1_->Write(ostrm);
+ bool have_addon2 = a2_;
+ WriteType(ostrm, have_addon2);
+ if (have_addon2)
+ a2_->Write(ostrm);
+ return true;
+ }
+
+ int RefCount() const { return ref_count_.count(); }
+
+ int IncrRefCount() {
+ return ref_count_.Incr();
+ }
+
+ int DecrRefCount() {
+ return ref_count_.Decr();
+ }
+
+ private:
+ A1 *a1_;
+ A2 *a2_;
+ RefCounter ref_count_;
+
+ DISALLOW_COPY_AND_ASSIGN(AddOnPair);
+};
+
+
+// Add to an Fst F a type T object. T must have a 'T* Read(istream &)',
+// a 'bool Write(ostream &)' method, and 'int RecCount(), 'int IncrRefCount()'
+// and 'int DecrRefCount()' methods (e.g. 'MatcherData' in matcher-fst.h).
+// The result is a new Fst implemenation with type name 'type'.
+template<class F, class T>
+class AddOnImpl : public FstImpl<typename F::Arc> {
+ public:
+ typedef typename F::Arc Arc;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ using FstImpl<Arc>::SetType;
+ using FstImpl<Arc>::SetProperties;
+ using FstImpl<Arc>::WriteHeader;
+
+ // If 't' is non-zero, its reference count is incremented.
+ AddOnImpl(const F &fst, const string &type, T *t = 0)
+ : fst_(fst), t_(t) {
+ SetType(type);
+ SetProperties(fst_.Properties(kFstProperties, false));
+ if (t_)
+ t_->IncrRefCount();
+ }
+
+ // If 't' is non-zero, its reference count is incremented.
+ AddOnImpl(const Fst<Arc> &fst, const string &type, T *t = 0)
+ : fst_(fst), t_(t) {
+ SetType(type);
+ SetProperties(fst_.Properties(kFstProperties, false));
+ if (t_)
+ t_->IncrRefCount();
+ }
+
+ AddOnImpl(const AddOnImpl<F, T> &impl)
+ : fst_(impl.fst_), t_(impl.t_) {
+ SetType(impl.Type());
+ SetProperties(fst_.Properties(kCopyProperties, false));
+ if (t_)
+ t_->IncrRefCount();
+ }
+
+ ~AddOnImpl() {
+ if (t_ && !t_->DecrRefCount())
+ delete t_;
+ }
+
+ StateId Start() const { return fst_.Start(); }
+ Weight Final(StateId s) const { return fst_.Final(s); }
+ size_t NumArcs(StateId s) const { return fst_.NumArcs(s); }
+
+ size_t NumInputEpsilons(StateId s) const {
+ return fst_.NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) const {
+ return fst_.NumOutputEpsilons(s);
+ }
+
+ size_t NumStates() const { return fst_.NumStates(); }
+
+ static AddOnImpl<F, T> *Read(istream &strm, const FstReadOptions &opts) {
+ FstReadOptions nopts(opts);
+ FstHeader hdr;
+ if (!nopts.header) {
+ hdr.Read(strm, nopts.source);
+ nopts.header = &hdr;
+ }
+ AddOnImpl<F, T> *impl = new AddOnImpl<F, T>(nopts.header->FstType());
+ if (!impl->ReadHeader(strm, nopts, kMinFileVersion, &hdr))
+ return 0;
+ delete impl; // Used here only for checking types.
+
+ int32 magic_number = 0;
+ ReadType(strm, &magic_number); // Ensures this is an add-on Fst.
+ if (magic_number != kAddOnMagicNumber) {
+ LOG(ERROR) << "AddOnImpl::Read: Bad add-on header: " << nopts.source;
+ return 0;
+ }
+
+ FstReadOptions fopts(opts);
+ fopts.header = 0; // Contained header was written out.
+ F *fst = F::Read(strm, fopts);
+ if (!fst)
+ return 0;
+
+ T *t = 0;
+ bool have_addon = false;
+ ReadType(strm, &have_addon);
+ if (have_addon) { // Read add-on object if present.
+ t = T::Read(strm);
+ if (!t)
+ return 0;
+ }
+ impl = new AddOnImpl<F, T>(*fst, nopts.header->FstType(), t);
+ delete fst;
+ if (t)
+ t->DecrRefCount();
+ return impl;
+ }
+
+ bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ FstHeader hdr;
+ FstWriteOptions nopts(opts);
+ nopts.write_isymbols = false; // Let contained FST hold any symbols.
+ nopts.write_osymbols = false;
+ WriteHeader(strm, nopts, kFileVersion, &hdr);
+ WriteType(strm, kAddOnMagicNumber); // Ensures this is an add-on Fst.
+ FstWriteOptions fopts(opts);
+ fopts.write_header = true; // Force writing contained header.
+ if (!fst_.Write(strm, fopts))
+ return false;
+ bool have_addon = t_;
+ WriteType(strm, have_addon);
+ if (have_addon) // Write add-on object if present.
+ t_->Write(strm);
+ return true;
+ }
+
+ void InitStateIterator(StateIteratorData<Arc> *data) const {
+ fst_.InitStateIterator(data);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
+ fst_.InitArcIterator(s, data);
+ }
+
+ F &GetFst() { return fst_; }
+
+ const F &GetFst() const { return fst_; }
+
+ T *GetAddOn() const { return t_; }
+
+ // If 't' is non-zero, its reference count is incremented.
+ void SetAddOn(T *t) {
+ if (t == t_)
+ return;
+ if (t_ && !t_->DecrRefCount())
+ delete t_;
+ t_ = t;
+ if (t_)
+ t_->IncrRefCount();
+ }
+
+ private:
+ explicit AddOnImpl(const string &type) : t_(0) {
+ SetType(type);
+ SetProperties(kExpanded);
+ }
+
+ // Current file format version
+ static const int kFileVersion = 1;
+ // Minimum file format version supported
+ static const int kMinFileVersion = 1;
+
+ F fst_;
+ T *t_;
+
+ void operator=(const AddOnImpl<F, T> &fst); // Disallow
+};
+
+template <class F, class T> const int AddOnImpl<F, T>::kFileVersion;
+template <class F, class T> const int AddOnImpl<F, T>::kMinFileVersion;
+
+
+} // namespace fst
+
+#endif // FST_LIB_ADD_ON_FST_H__
diff --git a/src/include/fst/arc-map.h b/src/include/fst/arc-map.h
new file mode 100644
index 0000000..3055f71
--- /dev/null
+++ b/src/include/fst/arc-map.h
@@ -0,0 +1,1146 @@
+// arc-map.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to map over/transform arcs e.g., change semirings or
+// implement project/invert. Consider using when operation does
+// not change the number of arcs (except possibly superfinal arcs).
+
+#ifndef FST_LIB_ARC_MAP_H__
+#define FST_LIB_ARC_MAP_H__
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <string>
+#include <utility>
+using std::pair; using std::make_pair;
+
+#include <fst/cache.h>
+#include <fst/mutable-fst.h>
+
+
+namespace fst {
+
+// This determines how final weights are mapped.
+enum MapFinalAction {
+ // A final weight is mapped into a final weight. An error
+ // is raised if this is not possible.
+ MAP_NO_SUPERFINAL,
+
+ // A final weight is mapped to an arc to the superfinal state
+ // when the result cannot be represented as a final weight.
+ // The superfinal state will be added only if it is needed.
+ MAP_ALLOW_SUPERFINAL,
+
+ // A final weight is mapped to an arc to the superfinal state
+ // unless the result can be represented as a final weight of weight
+ // Zero(). The superfinal state is always added (if the input is
+ // not the empty Fst).
+ MAP_REQUIRE_SUPERFINAL
+};
+
+// This determines how symbol tables are mapped.
+enum MapSymbolsAction {
+ // Symbols should be cleared in the result by the map.
+ MAP_CLEAR_SYMBOLS,
+
+ // Symbols should be copied from the input FST by the map.
+ MAP_COPY_SYMBOLS,
+
+ // Symbols should not be modified in the result by the map itself.
+ // (They may set by the mapper).
+ MAP_NOOP_SYMBOLS
+};
+
+// ArcMapper Interface - class determinies how arcs and final weights
+// are mapped. Useful for implementing operations that do not change
+// the number of arcs (expect possibly superfinal arcs).
+//
+// class ArcMapper {
+// public:
+// typedef A FromArc;
+// typedef B ToArc;
+//
+// // Maps an arc type A to arc type B.
+// B operator()(const A &arc);
+// // Specifies final action the mapper requires (see above).
+// // The mapper will be passed final weights as arcs of the
+// // form A(0, 0, weight, kNoStateId).
+// MapFinalAction FinalAction() const;
+// // Specifies input symbol table action the mapper requires (see above).
+// MapSymbolsAction InputSymbolsAction() const;
+// // Specifies output symbol table action the mapper requires (see above).
+// MapSymbolsAction OutputSymbolsAction() const;
+// // This specifies the known properties of an Fst mapped by this
+// // mapper. It takes as argument the input Fst's known properties.
+// uint64 Properties(uint64 props) const;
+// };
+//
+// The ArcMap functions and classes below will use the FinalAction()
+// method of the mapper to determine how to treat final weights,
+// e.g. whether to add a superfinal state. They will use the Properties()
+// method to set the result Fst properties.
+//
+// We include a various map versions below. One dimension of
+// variation is whether the mapping mutates its input, writes to a
+// new result Fst, or is an on-the-fly Fst. Another dimension is how
+// we pass the mapper. We allow passing the mapper by pointer
+// for cases that we need to change the state of the user's mapper.
+// This is the case with the encode mapper, which is reused during
+// decoding. We also include map versions that pass the mapper
+// by value or const reference when this suffices.
+
+
+// Maps an arc type A using a mapper function object C, passed
+// by pointer. This version modifies its Fst input.
+template<class A, class C>
+void ArcMap(MutableFst<A> *fst, C* mapper) {
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ fst->SetInputSymbols(0);
+
+ if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ fst->SetOutputSymbols(0);
+
+ if (fst->Start() == kNoStateId)
+ return;
+
+ uint64 props = fst->Properties(kFstProperties, false);
+
+ MapFinalAction final_action = mapper->FinalAction();
+ StateId superfinal = kNoStateId;
+ if (final_action == MAP_REQUIRE_SUPERFINAL) {
+ superfinal = fst->AddState();
+ fst->SetFinal(superfinal, Weight::One());
+ }
+
+ for (StateId s = 0; s < fst->NumStates(); ++s) {
+ for (MutableArcIterator< MutableFst<A> > aiter(fst, s);
+ !aiter.Done(); aiter.Next()) {
+ const A &arc = aiter.Value();
+ aiter.SetValue((*mapper)(arc));
+ }
+
+ switch (final_action) {
+ case MAP_NO_SUPERFINAL:
+ default: {
+ A final_arc = (*mapper)(A(0, 0, fst->Final(s), kNoStateId));
+ if (final_arc.ilabel != 0 || final_arc.olabel != 0) {
+ FSTERROR() << "ArcMap: non-zero arc labels for superfinal arc";
+ fst->SetProperties(kError, kError);
+ }
+
+ fst->SetFinal(s, final_arc.weight);
+ break;
+ }
+ case MAP_ALLOW_SUPERFINAL: {
+ if (s != superfinal) {
+ A final_arc = (*mapper)(A(0, 0, fst->Final(s), kNoStateId));
+ if (final_arc.ilabel != 0 || final_arc.olabel != 0) {
+ // Add a superfinal state if not already done.
+ if (superfinal == kNoStateId) {
+ superfinal = fst->AddState();
+ fst->SetFinal(superfinal, Weight::One());
+ }
+ final_arc.nextstate = superfinal;
+ fst->AddArc(s, final_arc);
+ fst->SetFinal(s, Weight::Zero());
+ } else {
+ fst->SetFinal(s, final_arc.weight);
+ }
+ break;
+ }
+ }
+ case MAP_REQUIRE_SUPERFINAL: {
+ if (s != superfinal) {
+ A final_arc = (*mapper)(A(0, 0, fst->Final(s), kNoStateId));
+ if (final_arc.ilabel != 0 || final_arc.olabel != 0 ||
+ final_arc.weight != Weight::Zero())
+ fst->AddArc(s, A(final_arc.ilabel, final_arc.olabel,
+ final_arc.weight, superfinal));
+ fst->SetFinal(s, Weight::Zero());
+ }
+ break;
+ }
+ }
+ }
+ fst->SetProperties(mapper->Properties(props), kFstProperties);
+}
+
+
+// Maps an arc type A using a mapper function object C, passed
+// by value. This version modifies its Fst input.
+template<class A, class C>
+void ArcMap(MutableFst<A> *fst, C mapper) {
+ ArcMap(fst, &mapper);
+}
+
+
+// Maps an arc type A to an arc type B using mapper function
+// object C, passed by pointer. This version writes the mapped
+// input Fst to an output MutableFst.
+template<class A, class B, class C>
+void ArcMap(const Fst<A> &ifst, MutableFst<B> *ofst, C* mapper) {
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ ofst->DeleteStates();
+
+ if (mapper->InputSymbolsAction() == MAP_COPY_SYMBOLS)
+ ofst->SetInputSymbols(ifst.InputSymbols());
+ else if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ ofst->SetInputSymbols(0);
+
+ if (mapper->OutputSymbolsAction() == MAP_COPY_SYMBOLS)
+ ofst->SetOutputSymbols(ifst.OutputSymbols());
+ else if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ ofst->SetOutputSymbols(0);
+
+ uint64 iprops = ifst.Properties(kCopyProperties, false);
+
+ if (ifst.Start() == kNoStateId) {
+ if (iprops & kError) ofst->SetProperties(kError, kError);
+ return;
+ }
+
+ MapFinalAction final_action = mapper->FinalAction();
+ if (ifst.Properties(kExpanded, false)) {
+ ofst->ReserveStates(CountStates(ifst) +
+ final_action == MAP_NO_SUPERFINAL ? 0 : 1);
+ }
+
+ // Add all states.
+ for (StateIterator< Fst<A> > siter(ifst); !siter.Done(); siter.Next())
+ ofst->AddState();
+
+ StateId superfinal = kNoStateId;
+ if (final_action == MAP_REQUIRE_SUPERFINAL) {
+ superfinal = ofst->AddState();
+ ofst->SetFinal(superfinal, B::Weight::One());
+ }
+ for (StateIterator< Fst<A> > siter(ifst); !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ if (s == ifst.Start())
+ ofst->SetStart(s);
+
+ ofst->ReserveArcs(s, ifst.NumArcs(s));
+ for (ArcIterator< Fst<A> > aiter(ifst, s); !aiter.Done(); aiter.Next())
+ ofst->AddArc(s, (*mapper)(aiter.Value()));
+
+ switch (final_action) {
+ case MAP_NO_SUPERFINAL:
+ default: {
+ B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId));
+ if (final_arc.ilabel != 0 || final_arc.olabel != 0) {
+ FSTERROR() << "ArcMap: non-zero arc labels for superfinal arc";
+ ofst->SetProperties(kError, kError);
+ }
+ ofst->SetFinal(s, final_arc.weight);
+ break;
+ }
+ case MAP_ALLOW_SUPERFINAL: {
+ B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId));
+ if (final_arc.ilabel != 0 || final_arc.olabel != 0) {
+ // Add a superfinal state if not already done.
+ if (superfinal == kNoStateId) {
+ superfinal = ofst->AddState();
+ ofst->SetFinal(superfinal, B::Weight::One());
+ }
+ final_arc.nextstate = superfinal;
+ ofst->AddArc(s, final_arc);
+ ofst->SetFinal(s, B::Weight::Zero());
+ } else {
+ ofst->SetFinal(s, final_arc.weight);
+ }
+ break;
+ }
+ case MAP_REQUIRE_SUPERFINAL: {
+ B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId));
+ if (final_arc.ilabel != 0 || final_arc.olabel != 0 ||
+ final_arc.weight != B::Weight::Zero())
+ ofst->AddArc(s, B(final_arc.ilabel, final_arc.olabel,
+ final_arc.weight, superfinal));
+ ofst->SetFinal(s, B::Weight::Zero());
+ break;
+ }
+ }
+ }
+ uint64 oprops = ofst->Properties(kFstProperties, false);
+ ofst->SetProperties(mapper->Properties(iprops) | oprops, kFstProperties);
+}
+
+// Maps an arc type A to an arc type B using mapper function
+// object C, passed by value. This version writes the mapped input
+// Fst to an output MutableFst.
+template<class A, class B, class C>
+void ArcMap(const Fst<A> &ifst, MutableFst<B> *ofst, C mapper) {
+ ArcMap(ifst, ofst, &mapper);
+}
+
+
+struct ArcMapFstOptions : public CacheOptions {
+ // ArcMapFst default caching behaviour is to do no caching. Most
+ // mappers are cheap and therefore we save memory by not doing
+ // caching.
+ ArcMapFstOptions() : CacheOptions(true, 0) {}
+ ArcMapFstOptions(const CacheOptions& opts) : CacheOptions(opts) {}
+};
+
+
+template <class A, class B, class C> class ArcMapFst;
+
+// Implementation of delayed ArcMapFst.
+template <class A, class B, class C>
+class ArcMapFstImpl : public CacheImpl<B> {
+ public:
+ using FstImpl<B>::SetType;
+ using FstImpl<B>::SetProperties;
+ using FstImpl<B>::SetInputSymbols;
+ using FstImpl<B>::SetOutputSymbols;
+
+ using VectorFstBaseImpl<typename CacheImpl<B>::State>::NumStates;
+
+ using CacheImpl<B>::PushArc;
+ using CacheImpl<B>::HasArcs;
+ using CacheImpl<B>::HasFinal;
+ using CacheImpl<B>::HasStart;
+ using CacheImpl<B>::SetArcs;
+ using CacheImpl<B>::SetFinal;
+ using CacheImpl<B>::SetStart;
+
+ friend class StateIterator< ArcMapFst<A, B, C> >;
+
+ typedef B Arc;
+ typedef typename B::Weight Weight;
+ typedef typename B::StateId StateId;
+
+ ArcMapFstImpl(const Fst<A> &fst, const C &mapper,
+ const ArcMapFstOptions& opts)
+ : CacheImpl<B>(opts),
+ fst_(fst.Copy()),
+ mapper_(new C(mapper)),
+ own_mapper_(true),
+ superfinal_(kNoStateId),
+ nstates_(0) {
+ Init();
+ }
+
+ ArcMapFstImpl(const Fst<A> &fst, C *mapper,
+ const ArcMapFstOptions& opts)
+ : CacheImpl<B>(opts),
+ fst_(fst.Copy()),
+ mapper_(mapper),
+ own_mapper_(false),
+ superfinal_(kNoStateId),
+ nstates_(0) {
+ Init();
+ }
+
+ ArcMapFstImpl(const ArcMapFstImpl<A, B, C> &impl)
+ : CacheImpl<B>(impl),
+ fst_(impl.fst_->Copy(true)),
+ mapper_(new C(*impl.mapper_)),
+ own_mapper_(true),
+ superfinal_(kNoStateId),
+ nstates_(0) {
+ Init();
+ }
+
+ ~ArcMapFstImpl() {
+ delete fst_;
+ if (own_mapper_) delete mapper_;
+ }
+
+ StateId Start() {
+ if (!HasStart())
+ SetStart(FindOState(fst_->Start()));
+ return CacheImpl<B>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ switch (final_action_) {
+ case MAP_NO_SUPERFINAL:
+ default: {
+ B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)),
+ kNoStateId));
+ if (final_arc.ilabel != 0 || final_arc.olabel != 0) {
+ FSTERROR() << "ArcMapFst: non-zero arc labels for superfinal arc";
+ SetProperties(kError, kError);
+ }
+ SetFinal(s, final_arc.weight);
+ break;
+ }
+ case MAP_ALLOW_SUPERFINAL: {
+ if (s == superfinal_) {
+ SetFinal(s, Weight::One());
+ } else {
+ B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)),
+ kNoStateId));
+ if (final_arc.ilabel == 0 && final_arc.olabel == 0)
+ SetFinal(s, final_arc.weight);
+ else
+ SetFinal(s, Weight::Zero());
+ }
+ break;
+ }
+ case MAP_REQUIRE_SUPERFINAL: {
+ SetFinal(s, s == superfinal_ ? Weight::One() : Weight::Zero());
+ break;
+ }
+ }
+ }
+ return CacheImpl<B>::Final(s);
+ }
+
+ size_t NumArcs(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<B>::NumArcs(s);
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<B>::NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<B>::NumOutputEpsilons(s);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) && (fst_->Properties(kError, false) ||
+ (mapper_->Properties(0) & kError)))
+ SetProperties(kError, kError);
+ return FstImpl<Arc>::Properties(mask);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<B> *data) {
+ if (!HasArcs(s))
+ Expand(s);
+ CacheImpl<B>::InitArcIterator(s, data);
+ }
+
+ void Expand(StateId s) {
+ // Add exiting arcs.
+ if (s == superfinal_) { SetArcs(s); return; }
+
+ for (ArcIterator< Fst<A> > aiter(*fst_, FindIState(s));
+ !aiter.Done(); aiter.Next()) {
+ A aarc(aiter.Value());
+ aarc.nextstate = FindOState(aarc.nextstate);
+ const B& barc = (*mapper_)(aarc);
+ PushArc(s, barc);
+ }
+
+ // Check for superfinal arcs.
+ if (!HasFinal(s) || Final(s) == Weight::Zero())
+ switch (final_action_) {
+ case MAP_NO_SUPERFINAL:
+ default:
+ break;
+ case MAP_ALLOW_SUPERFINAL: {
+ B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)),
+ kNoStateId));
+ if (final_arc.ilabel != 0 || final_arc.olabel != 0) {
+ if (superfinal_ == kNoStateId)
+ superfinal_ = nstates_++;
+ final_arc.nextstate = superfinal_;
+ PushArc(s, final_arc);
+ }
+ break;
+ }
+ case MAP_REQUIRE_SUPERFINAL: {
+ B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)),
+ kNoStateId));
+ if (final_arc.ilabel != 0 || final_arc.olabel != 0 ||
+ final_arc.weight != B::Weight::Zero())
+ PushArc(s, B(final_arc.ilabel, final_arc.olabel,
+ final_arc.weight, superfinal_));
+ break;
+ }
+ }
+ SetArcs(s);
+ }
+
+ private:
+ void Init() {
+ SetType("map");
+
+ if (mapper_->InputSymbolsAction() == MAP_COPY_SYMBOLS)
+ SetInputSymbols(fst_->InputSymbols());
+ else if (mapper_->InputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ SetInputSymbols(0);
+
+ if (mapper_->OutputSymbolsAction() == MAP_COPY_SYMBOLS)
+ SetOutputSymbols(fst_->OutputSymbols());
+ else if (mapper_->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ SetOutputSymbols(0);
+
+ if (fst_->Start() == kNoStateId) {
+ final_action_ = MAP_NO_SUPERFINAL;
+ SetProperties(kNullProperties);
+ } else {
+ final_action_ = mapper_->FinalAction();
+ uint64 props = fst_->Properties(kCopyProperties, false);
+ SetProperties(mapper_->Properties(props));
+ if (final_action_ == MAP_REQUIRE_SUPERFINAL)
+ superfinal_ = 0;
+ }
+ }
+
+ // Maps from output state to input state.
+ StateId FindIState(StateId s) {
+ if (superfinal_ == kNoStateId || s < superfinal_)
+ return s;
+ else
+ return s - 1;
+ }
+
+ // Maps from input state to output state.
+ StateId FindOState(StateId is) {
+ StateId os;
+ if (superfinal_ == kNoStateId || is < superfinal_)
+ os = is;
+ else
+ os = is + 1;
+
+ if (os >= nstates_)
+ nstates_ = os + 1;
+
+ return os;
+ }
+
+
+ const Fst<A> *fst_;
+ C* mapper_;
+ bool own_mapper_;
+ MapFinalAction final_action_;
+
+ StateId superfinal_;
+ StateId nstates_;
+
+ void operator=(const ArcMapFstImpl<A, B, C> &); // disallow
+};
+
+
+// Maps an arc type A to an arc type B using Mapper function object
+// C. This version is a delayed Fst.
+template <class A, class B, class C>
+class ArcMapFst : public ImplToFst< ArcMapFstImpl<A, B, C> > {
+ public:
+ friend class ArcIterator< ArcMapFst<A, B, C> >;
+ friend class StateIterator< ArcMapFst<A, B, C> >;
+
+ typedef B Arc;
+ typedef typename B::Weight Weight;
+ typedef typename B::StateId StateId;
+ typedef CacheState<B> State;
+ typedef ArcMapFstImpl<A, B, C> Impl;
+
+ ArcMapFst(const Fst<A> &fst, const C &mapper, const ArcMapFstOptions& opts)
+ : ImplToFst<Impl>(new Impl(fst, mapper, opts)) {}
+
+ ArcMapFst(const Fst<A> &fst, C* mapper, const ArcMapFstOptions& opts)
+ : ImplToFst<Impl>(new Impl(fst, mapper, opts)) {}
+
+ ArcMapFst(const Fst<A> &fst, const C &mapper)
+ : ImplToFst<Impl>(new Impl(fst, mapper, ArcMapFstOptions())) {}
+
+ ArcMapFst(const Fst<A> &fst, C* mapper)
+ : ImplToFst<Impl>(new Impl(fst, mapper, ArcMapFstOptions())) {}
+
+ // See Fst<>::Copy() for doc.
+ ArcMapFst(const ArcMapFst<A, B, C> &fst, bool safe = false)
+ : ImplToFst<Impl>(fst, safe) {}
+
+ // Get a copy of this ArcMapFst. See Fst<>::Copy() for further doc.
+ virtual ArcMapFst<A, B, C> *Copy(bool safe = false) const {
+ return new ArcMapFst<A, B, C>(*this, safe);
+ }
+
+ virtual inline void InitStateIterator(StateIteratorData<B> *data) const;
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<B> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const ArcMapFst<A, B, C> &fst); // disallow
+};
+
+
+// Specialization for ArcMapFst.
+template<class A, class B, class C>
+class StateIterator< ArcMapFst<A, B, C> > : public StateIteratorBase<B> {
+ public:
+ typedef typename B::StateId StateId;
+
+ explicit StateIterator(const ArcMapFst<A, B, C> &fst)
+ : impl_(fst.GetImpl()), siter_(*impl_->fst_), s_(0),
+ superfinal_(impl_->final_action_ == MAP_REQUIRE_SUPERFINAL)
+ { CheckSuperfinal(); }
+
+ bool Done() const { return siter_.Done() && !superfinal_; }
+
+ StateId Value() const { return s_; }
+
+ void Next() {
+ ++s_;
+ if (!siter_.Done()) {
+ siter_.Next();
+ CheckSuperfinal();
+ }
+ else if (superfinal_)
+ superfinal_ = false;
+ }
+
+ void Reset() {
+ s_ = 0;
+ siter_.Reset();
+ superfinal_ = impl_->final_action_ == MAP_REQUIRE_SUPERFINAL;
+ CheckSuperfinal();
+ }
+
+ private:
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ bool Done_() const { return Done(); }
+ StateId Value_() const { return Value(); }
+ void Next_() { Next(); }
+ void Reset_() { Reset(); }
+
+ void CheckSuperfinal() {
+ if (impl_->final_action_ != MAP_ALLOW_SUPERFINAL || superfinal_)
+ return;
+ if (!siter_.Done()) {
+ B final_arc = (*impl_->mapper_)(A(0, 0, impl_->fst_->Final(s_),
+ kNoStateId));
+ if (final_arc.ilabel != 0 || final_arc.olabel != 0)
+ superfinal_ = true;
+ }
+ }
+
+ const ArcMapFstImpl<A, B, C> *impl_;
+ StateIterator< Fst<A> > siter_;
+ StateId s_;
+ bool superfinal_; // true if there is a superfinal state and not done
+
+ DISALLOW_COPY_AND_ASSIGN(StateIterator);
+};
+
+
+// Specialization for ArcMapFst.
+template <class A, class B, class C>
+class ArcIterator< ArcMapFst<A, B, C> >
+ : public CacheArcIterator< ArcMapFst<A, B, C> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const ArcMapFst<A, B, C> &fst, StateId s)
+ : CacheArcIterator< ArcMapFst<A, B, C> >(fst.GetImpl(), s) {
+ if (!fst.GetImpl()->HasArcs(s))
+ fst.GetImpl()->Expand(s);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+template <class A, class B, class C> inline
+void ArcMapFst<A, B, C>::InitStateIterator(StateIteratorData<B> *data)
+ const {
+ data->base = new StateIterator< ArcMapFst<A, B, C> >(*this);
+}
+
+
+//
+// Utility Mappers
+//
+
+// Mapper that returns its input.
+template <class A>
+struct IdentityArcMapper {
+ typedef A FromArc;
+ typedef A ToArc;
+
+ A operator()(const A &arc) const { return arc; }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const { return props; }
+};
+
+
+// Mapper that returns its input with final states redirected to
+// a single super-final state.
+template <class A>
+struct SuperFinalMapper {
+ typedef A FromArc;
+ typedef A ToArc;
+
+ A operator()(const A &arc) const { return arc; }
+
+ MapFinalAction FinalAction() const { return MAP_REQUIRE_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const {
+ return props & kAddSuperFinalProperties;
+ }
+};
+
+
+// Mapper that leaves labels and nextstate unchanged and constructs a new weight
+// from the underlying value of the arc weight. Requires that there is a
+// WeightConvert class specialization that converts the weights.
+template <class A, class B>
+class WeightConvertMapper {
+ public:
+ typedef A FromArc;
+ typedef B ToArc;
+ typedef typename FromArc::Weight FromWeight;
+ typedef typename ToArc::Weight ToWeight;
+
+ ToArc operator()(const FromArc &arc) const {
+ return ToArc(arc.ilabel, arc.olabel,
+ convert_weight_(arc.weight), arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const { return props; }
+
+ private:
+ WeightConvert<FromWeight, ToWeight> convert_weight_;
+};
+
+// Non-precision-changing weight conversions.
+// Consider using more efficient Cast (fst.h) instead.
+typedef WeightConvertMapper<StdArc, LogArc> StdToLogMapper;
+typedef WeightConvertMapper<LogArc, StdArc> LogToStdMapper;
+
+// Precision-changing weight conversions.
+typedef WeightConvertMapper<StdArc, Log64Arc> StdToLog64Mapper;
+typedef WeightConvertMapper<LogArc, Log64Arc> LogToLog64Mapper;
+typedef WeightConvertMapper<Log64Arc, StdArc> Log64ToStdMapper;
+typedef WeightConvertMapper<Log64Arc, LogArc> Log64ToLogMapper;
+
+// Mapper from A to GallicArc<A>.
+template <class A, StringType S = STRING_LEFT>
+struct ToGallicMapper {
+ typedef A FromArc;
+ typedef GallicArc<A, S> ToArc;
+
+ typedef StringWeight<typename A::Label, S> SW;
+ typedef typename A::Weight AW;
+ typedef typename GallicArc<A, S>::Weight GW;
+
+ ToArc operator()(const A &arc) const {
+ // 'Super-final' arc.
+ if (arc.nextstate == kNoStateId && arc.weight != AW::Zero())
+ return ToArc(0, 0, GW(SW::One(), arc.weight), kNoStateId);
+ // 'Super-non-final' arc.
+ else if (arc.nextstate == kNoStateId)
+ return ToArc(0, 0, GW(SW::Zero(), arc.weight), kNoStateId);
+ // Epsilon label.
+ else if (arc.olabel == 0)
+ return ToArc(arc.ilabel, arc.ilabel,
+ GW(SW::One(), arc.weight), arc.nextstate);
+ // Regular label.
+ else
+ return ToArc(arc.ilabel, arc.ilabel,
+ GW(SW(arc.olabel), arc.weight), arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const {
+ return ProjectProperties(props, true) & kWeightInvariantProperties;
+ }
+};
+
+
+// Mapper from GallicArc<A> to A.
+template <class A, StringType S = STRING_LEFT>
+struct FromGallicMapper {
+ typedef GallicArc<A, S> FromArc;
+ typedef A ToArc;
+
+ typedef typename A::Label Label;
+ typedef StringWeight<Label, S> SW;
+ typedef typename A::Weight AW;
+ typedef typename GallicArc<A, S>::Weight GW;
+
+ FromGallicMapper(Label superfinal_label = 0)
+ : superfinal_label_(superfinal_label), error_(false) {}
+
+ A operator()(const FromArc &arc) const {
+ // 'Super-non-final' arc.
+ if (arc.nextstate == kNoStateId && arc.weight == GW::Zero())
+ return A(arc.ilabel, 0, AW::Zero(), kNoStateId);
+
+ SW w1 = arc.weight.Value1();
+ AW w2 = arc.weight.Value2();
+ StringWeightIterator<Label, S> iter1(w1);
+
+ Label l = w1.Size() == 1 ? iter1.Value() : 0;
+
+ if (l == kStringInfinity || l == kStringBad ||
+ arc.ilabel != arc.olabel || w1.Size() > 1) {
+ FSTERROR() << "FromGallicMapper: unrepesentable weight";
+ error_ = true;
+ }
+
+ if (arc.ilabel == 0 && l != 0 && arc.nextstate == kNoStateId)
+ return A(superfinal_label_, l, w2, arc.nextstate);
+ else
+ return A(arc.ilabel, l, w2, arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_ALLOW_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;}
+
+ uint64 Properties(uint64 inprops) const {
+ uint64 outprops = inprops & kOLabelInvariantProperties &
+ kWeightInvariantProperties & kAddSuperFinalProperties;
+ if (error_)
+ outprops |= kError;
+ return outprops;
+ }
+
+ private:
+ Label superfinal_label_;
+ mutable bool error_;
+};
+
+
+// Mapper from GallicArc<A> to A.
+template <class A, StringType S = STRING_LEFT>
+struct GallicToNewSymbolsMapper {
+ typedef GallicArc<A, S> FromArc;
+ typedef A ToArc;
+
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef StringWeight<Label, S> SW;
+ typedef typename A::Weight AW;
+ typedef typename GallicArc<A, S>::Weight GW;
+
+ GallicToNewSymbolsMapper(MutableFst<ToArc> *fst)
+ : fst_(fst), lmax_(0), osymbols_(fst->OutputSymbols()),
+ isymbols_(0), error_(false) {
+ fst_->DeleteStates();
+ state_ = fst_->AddState();
+ fst_->SetStart(state_);
+ fst_->SetFinal(state_, AW::One());
+ if (osymbols_) {
+ string name = osymbols_->Name() + "_from_gallic";
+ fst_->SetInputSymbols(new SymbolTable(name));
+ isymbols_ = fst_->MutableInputSymbols();
+ isymbols_->AddSymbol(osymbols_->Find((int64) 0), 0);
+ } else {
+ fst_->SetInputSymbols(0);
+ }
+ }
+
+ A operator()(const FromArc &arc) {
+ // 'Super-non-final' arc.
+ if (arc.nextstate == kNoStateId && arc.weight == GW::Zero())
+ return A(arc.ilabel, 0, AW::Zero(), kNoStateId);
+
+ SW w1 = arc.weight.Value1();
+ AW w2 = arc.weight.Value2();
+ Label l;
+
+ if (w1.Size() == 0) {
+ l = 0;
+ } else {
+ typename Map::iterator miter = map_.find(w1);
+ if (miter != map_.end()) {
+ l = (*miter).second;
+ } else {
+ l = ++lmax_;
+ map_.insert(pair<const SW, Label>(w1, l));
+ StringWeightIterator<Label, S> iter1(w1);
+ StateId n;
+ string s;
+ for(size_t i = 0, p = state_;
+ i < w1.Size();
+ ++i, iter1.Next(), p = n) {
+ n = i == w1.Size() - 1 ? state_ : fst_->AddState();
+ fst_->AddArc(p, ToArc(i ? 0 : l, iter1.Value(), AW::One(), n));
+ if (isymbols_) {
+ if (i) s = s + "_";
+ s = s + osymbols_->Find(iter1.Value());
+ }
+ }
+ if (isymbols_)
+ isymbols_->AddSymbol(s, l);
+ }
+ }
+
+ if (l == kStringInfinity || l == kStringBad || arc.ilabel != arc.olabel) {
+ FSTERROR() << "GallicToNewSymbolMapper: unrepesentable weight";
+ error_ = true;
+ }
+
+ return A(arc.ilabel, l, w2, arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_ALLOW_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; }
+
+ uint64 Properties(uint64 inprops) const {
+ uint64 outprops = inprops & kOLabelInvariantProperties &
+ kWeightInvariantProperties & kAddSuperFinalProperties;
+ if (error_)
+ outprops |= kError;
+ return outprops;
+ }
+
+ private:
+ class StringKey {
+ public:
+ size_t operator()(const SW &x) const {
+ return x.Hash();
+ }
+ };
+
+ typedef unordered_map<SW, Label, StringKey> Map;
+
+ MutableFst<ToArc> *fst_;
+ Map map_;
+ Label lmax_;
+ StateId state_;
+ const SymbolTable *osymbols_;
+ SymbolTable *isymbols_;
+ mutable bool error_;
+
+ DISALLOW_COPY_AND_ASSIGN(GallicToNewSymbolsMapper);
+};
+
+
+// Mapper to add a constant to all weights.
+template <class A>
+struct PlusMapper {
+ typedef A FromArc;
+ typedef A ToArc;
+ typedef typename A::Weight Weight;
+
+ explicit PlusMapper(Weight w) : weight_(w) {}
+
+ A operator()(const A &arc) const {
+ if (arc.weight == Weight::Zero())
+ return arc;
+ Weight w = Plus(arc.weight, weight_);
+ return A(arc.ilabel, arc.olabel, w, arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const {
+ return props & kWeightInvariantProperties;
+ }
+
+ private:
+
+
+
+ Weight weight_;
+};
+
+
+// Mapper to (right) multiply a constant to all weights.
+template <class A>
+struct TimesMapper {
+ typedef A FromArc;
+ typedef A ToArc;
+ typedef typename A::Weight Weight;
+
+ explicit TimesMapper(Weight w) : weight_(w) {}
+
+ A operator()(const A &arc) const {
+ if (arc.weight == Weight::Zero())
+ return arc;
+ Weight w = Times(arc.weight, weight_);
+ return A(arc.ilabel, arc.olabel, w, arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const {
+ return props & kWeightInvariantProperties;
+ }
+
+ private:
+ Weight weight_;
+};
+
+
+// Mapper to reciprocate all non-Zero() weights.
+template <class A>
+struct InvertWeightMapper {
+ typedef A FromArc;
+ typedef A ToArc;
+ typedef typename A::Weight Weight;
+
+ A operator()(const A &arc) const {
+ if (arc.weight == Weight::Zero())
+ return arc;
+ Weight w = Divide(Weight::One(), arc.weight);
+ return A(arc.ilabel, arc.olabel, w, arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const {
+ return props & kWeightInvariantProperties;
+ }
+};
+
+
+// Mapper to map all non-Zero() weights to One().
+template <class A, class B = A>
+struct RmWeightMapper {
+ typedef A FromArc;
+ typedef B ToArc;
+ typedef typename FromArc::Weight FromWeight;
+ typedef typename ToArc::Weight ToWeight;
+
+ B operator()(const A &arc) const {
+ ToWeight w = arc.weight != FromWeight::Zero() ?
+ ToWeight::One() : ToWeight::Zero();
+ return B(arc.ilabel, arc.olabel, w, arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const {
+ return (props & kWeightInvariantProperties) | kUnweighted;
+ }
+};
+
+
+// Mapper to quantize all weights.
+template <class A, class B = A>
+struct QuantizeMapper {
+ typedef A FromArc;
+ typedef B ToArc;
+ typedef typename FromArc::Weight FromWeight;
+ typedef typename ToArc::Weight ToWeight;
+
+ QuantizeMapper() : delta_(kDelta) {}
+
+ explicit QuantizeMapper(float d) : delta_(d) {}
+
+ B operator()(const A &arc) const {
+ ToWeight w = arc.weight.Quantize(delta_);
+ return B(arc.ilabel, arc.olabel, w, arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const {
+ return props & kWeightInvariantProperties;
+ }
+
+ private:
+ float delta_;
+};
+
+
+// Mapper from A to B under the assumption:
+// B::Weight = A::Weight::ReverseWeight
+// B::Label == A::Label
+// B::StateId == A::StateId
+// The weight is reversed, while the label and nextstate preserved
+// in the mapping.
+template <class A, class B>
+struct ReverseWeightMapper {
+ typedef A FromArc;
+ typedef B ToArc;
+
+ B operator()(const A &arc) const {
+ return B(arc.ilabel, arc.olabel, arc.weight.Reverse(), arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const { return props; }
+};
+
+} // namespace fst
+
+#endif // FST_LIB_ARC_MAP_H__
diff --git a/src/include/fst/arc.h b/src/include/fst/arc.h
new file mode 100644
index 0000000..56086c9
--- /dev/null
+++ b/src/include/fst/arc.h
@@ -0,0 +1,306 @@
+// arc.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+//
+// Commonly used Fst arc types.
+
+#ifndef FST_LIB_ARC_H__
+#define FST_LIB_ARC_H__
+
+#include <string>
+
+
+#include <fst/expectation-weight.h>
+#include <fst/float-weight.h>
+#include <fst/lexicographic-weight.h>
+#include <fst/power-weight.h>
+#include <fst/product-weight.h>
+#include <fst/signed-log-weight.h>
+#include <fst/sparse-power-weight.h>
+#include <iostream>
+#include <fstream>
+#include <fst/string-weight.h>
+
+
+namespace fst {
+
+template <class W>
+class ArcTpl {
+ public:
+ typedef W Weight;
+ typedef int Label;
+ typedef int StateId;
+
+ ArcTpl(Label i, Label o, const Weight& w, StateId s)
+ : ilabel(i), olabel(o), weight(w), nextstate(s) {}
+
+ ArcTpl() {}
+
+ static const string &Type(void) {
+ static const string type =
+ (Weight::Type() == "tropical") ? "standard" : Weight::Type();
+ return type;
+ }
+
+ Label ilabel;
+ Label olabel;
+ Weight weight;
+ StateId nextstate;
+};
+
+typedef ArcTpl<TropicalWeight> StdArc;
+typedef ArcTpl<LogWeight> LogArc;
+typedef ArcTpl<Log64Weight> Log64Arc;
+typedef ArcTpl<SignedLogWeight> SignedLogArc;
+typedef ArcTpl<SignedLog64Weight> SignedLog64Arc;
+typedef ArcTpl<MinMaxWeight> MinMaxArc;
+
+
+// Arc with integer labels and state Ids and string weights.
+template <StringType S = STRING_LEFT>
+class StringArc {
+ public:
+ typedef int Label;
+ typedef StringWeight<int, S> Weight;
+ typedef int StateId;
+
+ StringArc(Label i, Label o, Weight w, StateId s)
+ : ilabel(i), olabel(o), weight(w), nextstate(s) {}
+
+ StringArc() {}
+
+ static const string &Type() { // Arc type name
+ static const string type =
+ S == STRING_LEFT ? "standard_string" :
+ (S == STRING_RIGHT ? "right_standard_string" :
+ (S == STRING_LEFT_RESTRICT ? "restricted_string" :
+ "right_restricted_string"));
+ return type;
+ }
+
+ Label ilabel; // Transition input label
+ Label olabel; // Transition output label
+ Weight weight; // Transition weight
+ StateId nextstate; // Transition destination state
+};
+
+
+// Arc with label and state Id type the same as template arg and with
+// weights over the Gallic semiring w.r.t the output labels and weights of A.
+template <class A, StringType S = STRING_LEFT>
+struct GallicArc {
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef GallicWeight<Label, typename A::Weight, S> Weight;
+
+ GallicArc() {}
+
+ GallicArc(Label i, Label o, Weight w, StateId s)
+ : ilabel(i), olabel(o), weight(w), nextstate(s) {}
+
+ GallicArc(const A &arc)
+ : ilabel(arc.ilabel), olabel(arc.ilabel),
+ weight(arc.olabel, arc.weight), nextstate(arc.nextstate) {}
+
+ static const string &Type() { // Arc type name
+ static const string type =
+ (S == STRING_LEFT ? "gallic_" :
+ (S == STRING_RIGHT ? "right_gallic_" :
+ (S == STRING_LEFT_RESTRICT ? "restricted_gallic_" :
+ "right_restricted_gallic_"))) + A::Type();
+ return type;
+ }
+
+ Label ilabel; // Transition input label
+ Label olabel; // Transition output label
+ Weight weight; // Transition weight
+ StateId nextstate; // Transition destination state
+};
+
+
+// Arc with the reverse of the weight found in its template arg.
+template <class A> struct ReverseArc {
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight AWeight;
+ typedef typename AWeight::ReverseWeight Weight;
+ typedef typename A::StateId StateId;
+
+ ReverseArc(Label i, Label o, Weight w, StateId s)
+ : ilabel(i), olabel(o), weight(w), nextstate(s) {}
+
+ ReverseArc() {}
+
+ static const string &Type() { // Arc type name
+ static const string type = "reverse_" + Arc::Type();
+ return type;
+ }
+
+ Label ilabel; // Transition input label
+ Label olabel; // Transition output label
+ Weight weight; // Transition weight
+ StateId nextstate; // Transition destination state
+};
+
+
+// Arc with integer labels and state Ids and lexicographic weights.
+template<class W1, class W2>
+struct LexicographicArc {
+ typedef int Label;
+ typedef LexicographicWeight<W1, W2> Weight;
+ typedef int StateId;
+
+ LexicographicArc(Label i, Label o, Weight w, StateId s)
+ : ilabel(i), olabel(o), weight(w), nextstate(s) {}
+
+ LexicographicArc() {}
+
+ static const string &Type() { // Arc type name
+ static const string type = Weight::Type();
+ return type;
+ }
+
+ Label ilabel; // Transition input label
+ Label olabel; // Transition output label
+ Weight weight; // Transition weight
+ StateId nextstate; // Transition destination state
+};
+
+
+// Arc with integer labels and state Ids and product weights.
+template<class W1, class W2>
+struct ProductArc {
+ typedef int Label;
+ typedef ProductWeight<W1, W2> Weight;
+ typedef int StateId;
+
+ ProductArc(Label i, Label o, Weight w, StateId s)
+ : ilabel(i), olabel(o), weight(w), nextstate(s) {}
+
+ ProductArc() {}
+
+ static const string &Type() { // Arc type name
+ static const string type = Weight::Type();
+ return type;
+ }
+
+ Label ilabel; // Transition input label
+ Label olabel; // Transition output label
+ Weight weight; // Transition weight
+ StateId nextstate; // Transition destination state
+};
+
+
+// Arc with label and state Id type the same as first template arg and with
+// weights over the n-th cartesian power of the weight type of the
+// template arg.
+template <class A, unsigned int n>
+struct PowerArc {
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef PowerWeight<typename A::Weight, n> Weight;
+
+ PowerArc() {}
+
+ PowerArc(Label i, Label o, Weight w, StateId s)
+ : ilabel(i), olabel(o), weight(w), nextstate(s) {}
+
+ static const string &Type() { // Arc type name
+ static string type;
+ if (type.empty()) {
+ string power;
+ Int64ToStr(n, &power);
+ type = A::Type() + "_^" + power;
+ }
+ return type;
+ }
+
+ Label ilabel; // Transition input label
+ Label olabel; // Transition output label
+ Weight weight; // Transition weight
+ StateId nextstate; // Transition destination state
+};
+
+
+// Arc with label and state Id type the same as first template arg and with
+// weights over the arbitrary cartesian power of the weight type.
+template <class A, class K = int>
+struct SparsePowerArc {
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef SparsePowerWeight<typename A::Weight, K> Weight;
+
+ SparsePowerArc() {}
+
+ SparsePowerArc(Label i, Label o, Weight w, StateId s)
+ : ilabel(i), olabel(o), weight(w), nextstate(s) {}
+
+ static const string &Type() { // Arc type name
+ static string type;
+ if (type.empty()) { type = A::Type() + "_^n"; }
+ if(sizeof(K) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(8 * sizeof(K), &size);
+ type += "_" + size;
+ }
+ return type;
+ }
+
+ Label ilabel; // Transition input label
+ Label olabel; // Transition output label
+ Weight weight; // Transition weight
+ StateId nextstate; // Transition destination state
+};
+
+
+// Arc with label and state Id type the same as first template arg and with
+// expectation weight over the first template arg weight type and the
+// second template arg.
+template <class A, class X2>
+struct ExpectationArc {
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight X1;
+ typedef ExpectationWeight<X1, X2> Weight;
+
+ ExpectationArc() {}
+
+ ExpectationArc(Label i, Label o, Weight w, StateId s)
+ : ilabel(i), olabel(o), weight(w), nextstate(s) {}
+
+ static const string &Type() { // Arc type name
+ static string type;
+ if (type.empty()) {
+ type = "expectation_" + A::Type() + "_" + X2::Type();
+ }
+ return type;
+ }
+
+ Label ilabel; // Transition input label
+ Label olabel; // Transition output label
+ Weight weight; // Transition weight
+ StateId nextstate; // Transition destination state
+};
+
+} // namespace fst
+
+#endif // FST_LIB_ARC_H__
diff --git a/src/include/fst/arcfilter.h b/src/include/fst/arcfilter.h
new file mode 100644
index 0000000..179dc2c
--- /dev/null
+++ b/src/include/fst/arcfilter.h
@@ -0,0 +1,99 @@
+// arcfilter.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Function objects to restrict which arcs are traversed in an FST.
+
+#ifndef FST_LIB_ARCFILTER_H__
+#define FST_LIB_ARCFILTER_H__
+
+
+#include <fst/fst.h>
+#include <fst/util.h>
+
+
+namespace fst {
+
+// True for all arcs.
+template <class A>
+class AnyArcFilter {
+public:
+ bool operator()(const A &arc) const { return true; }
+};
+
+
+// True for (input/output) epsilon arcs.
+template <class A>
+class EpsilonArcFilter {
+public:
+ bool operator()(const A &arc) const {
+ return arc.ilabel == 0 && arc.olabel == 0;
+ }
+};
+
+
+// True for input epsilon arcs.
+template <class A>
+class InputEpsilonArcFilter {
+public:
+ bool operator()(const A &arc) const {
+ return arc.ilabel == 0;
+ }
+};
+
+
+// True for output epsilon arcs.
+template <class A>
+class OutputEpsilonArcFilter {
+public:
+ bool operator()(const A &arc) const {
+ return arc.olabel == 0;
+ }
+};
+
+
+// True if specified labels match (don't match) when keep_match is
+// true (false).
+template <class A>
+class MultiLabelArcFilter {
+public:
+ typedef typename A::Label Label;
+
+ MultiLabelArcFilter(bool match_input = true, bool keep_match = true)
+ : match_input_(match_input),
+ keep_match_(keep_match) {}
+
+
+ bool operator()(const A &arc) const {
+ Label label = match_input_ ? arc.ilabel : arc.olabel;
+ bool match = labels_.Find(label) != labels_.End();
+ return keep_match_ ? match : !match;
+ }
+
+ void AddLabel(Label label) {
+ labels_.Insert(label);
+ }
+
+private:
+ CompactSet<Label, kNoLabel> labels_;
+ bool match_input_;
+ bool keep_match_;
+};
+
+} // namespace fst
+
+#endif // FST_LIB_ARCFILTER_H__
diff --git a/src/include/fst/arcsort.h b/src/include/fst/arcsort.h
new file mode 100644
index 0000000..38f4f95
--- /dev/null
+++ b/src/include/fst/arcsort.h
@@ -0,0 +1,203 @@
+// arcsort.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Functions and classes to sort arcs in an FST.
+
+#ifndef FST_LIB_ARCSORT_H__
+#define FST_LIB_ARCSORT_H__
+
+#include <algorithm>
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+#include <fst/state-map.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+template <class Arc, class Compare>
+class ArcSortMapper {
+ public:
+ typedef Arc FromArc;
+ typedef Arc ToArc;
+
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ ArcSortMapper(const Fst<Arc> &fst, const Compare &comp)
+ : fst_(fst), comp_(comp), i_(0) {}
+
+ // Allows updating Fst argument; pass only if changed.
+ ArcSortMapper(const ArcSortMapper<Arc, Compare> &mapper,
+ const Fst<Arc> *fst = 0)
+ : fst_(fst ? *fst : mapper.fst_), comp_(mapper.comp_), i_(0) {}
+
+ StateId Start() { return fst_.Start(); }
+ Weight Final(StateId s) const { return fst_.Final(s); }
+
+ void SetState(StateId s) {
+ i_ = 0;
+ arcs_.clear();
+ arcs_.reserve(fst_.NumArcs(s));
+ for (ArcIterator< Fst<Arc> > aiter(fst_, s); !aiter.Done(); aiter.Next())
+ arcs_.push_back(aiter.Value());
+ sort(arcs_.begin(), arcs_.end(), comp_);
+ }
+
+ bool Done() const { return i_ >= arcs_.size(); }
+ const Arc &Value() const { return arcs_[i_]; }
+ void Next() { ++i_; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+ uint64 Properties(uint64 props) const { return comp_.Properties(props); }
+
+ private:
+ const Fst<Arc> &fst_;
+ const Compare &comp_;
+ vector<Arc> arcs_;
+ ssize_t i_; // current arc position
+
+ void operator=(const ArcSortMapper<Arc, Compare> &); // disallow
+};
+
+
+// Sorts the arcs in an FST according to function object 'comp' of
+// type Compare. This version modifies its input. Comparison function
+// objects ILabelCompare and OLabelCompare are provived by the
+// library. In general, Compare must meet the requirements for an STL
+// sort comparision function object. It must also have a member
+// Properties(uint64) that specifies the known properties of the
+// sorted FST; it takes as argument the input FST's known properties
+// before the sort.
+//
+// Complexity:
+// - Time: O(V D log D)
+// - Space: O(D)
+// where V = # of states and D = maximum out-degree.
+template<class Arc, class Compare>
+void ArcSort(MutableFst<Arc> *fst, Compare comp) {
+ ArcSortMapper<Arc, Compare> mapper(*fst, comp);
+ StateMap(fst, mapper);
+}
+
+typedef CacheOptions ArcSortFstOptions;
+
+// Sorts the arcs in an FST according to function object 'comp' of
+// type Compare. This version is a delayed Fst. Comparsion function
+// objects ILabelCompare and OLabelCompare are provided by the
+// library. In general, Compare must meet the requirements for an STL
+// comparision function object (e.g. as used for STL sort). It must
+// also have a member Properties(uint64) that specifies the known
+// properties of the sorted FST; it takes as argument the input FST's
+// known properties.
+//
+// Complexity:
+// - Time: O(v d log d)
+// - Space: O(d)
+// where v = # of states visited, d = maximum out-degree of states
+// visited. Constant time and space to visit an input state is assumed
+// and exclusive of caching.
+template <class A, class C>
+class ArcSortFst : public StateMapFst<A, A, ArcSortMapper<A, C> > {
+ public:
+ typedef A Arc;
+ typedef ArcSortMapper<A, C> M;
+
+ ArcSortFst(const Fst<A> &fst, const C &comp)
+ : StateMapFst<A, A, M>(fst, ArcSortMapper<A, C>(fst, comp)) {}
+
+ ArcSortFst(const Fst<A> &fst, const C &comp, const ArcSortFstOptions &opts)
+ : StateMapFst<A, A, M>(fst, ArcSortMapper<A, C>(fst, comp), opts) {}
+
+ // See Fst<>::Copy() for doc.
+ ArcSortFst(const ArcSortFst<A, C> &fst, bool safe = false)
+ : StateMapFst<A, A, M>(fst, safe) {}
+
+ // Get a copy of this ArcSortFst. See Fst<>::Copy() for further doc.
+ virtual ArcSortFst<A, C> *Copy(bool safe = false) const {
+ return new ArcSortFst(*this, safe);
+ }
+};
+
+
+// Specialization for ArcSortFst.
+template <class A, class C>
+class StateIterator< ArcSortFst<A, C> >
+ : public StateIterator< StateMapFst<A, A, ArcSortMapper<A, C> > > {
+ public:
+ explicit StateIterator(const ArcSortFst<A, C> &fst)
+ : StateIterator< StateMapFst<A, A, ArcSortMapper<A, C> > >(fst) {}
+};
+
+
+// Specialization for ArcSortFst.
+template <class A, class C>
+class ArcIterator< ArcSortFst<A, C> >
+ : public ArcIterator< StateMapFst<A, A, ArcSortMapper<A, C> > > {
+ public:
+ ArcIterator(const ArcSortFst<A, C> &fst, typename A::StateId s)
+ : ArcIterator< StateMapFst<A, A, ArcSortMapper<A, C> > >(fst, s) {}
+};
+
+
+// Compare class for comparing input labels of arcs.
+template<class A> class ILabelCompare {
+ public:
+ bool operator() (A arc1, A arc2) const {
+ return arc1.ilabel < arc2.ilabel;
+ }
+
+ uint64 Properties(uint64 props) const {
+ return (props & kArcSortProperties) | kILabelSorted |
+ (props & kAcceptor ? kOLabelSorted : 0);
+ }
+};
+
+
+// Compare class for comparing output labels of arcs.
+template<class A> class OLabelCompare {
+ public:
+ bool operator() (const A &arc1, const A &arc2) const {
+ return arc1.olabel < arc2.olabel;
+ }
+
+ uint64 Properties(uint64 props) const {
+ return (props & kArcSortProperties) | kOLabelSorted |
+ (props & kAcceptor ? kILabelSorted : 0);
+ }
+};
+
+
+// Useful aliases when using StdArc.
+template<class C> class StdArcSortFst : public ArcSortFst<StdArc, C> {
+ public:
+ typedef StdArc Arc;
+ typedef C Compare;
+};
+
+typedef ILabelCompare<StdArc> StdILabelCompare;
+
+typedef OLabelCompare<StdArc> StdOLabelCompare;
+
+} // namespace fst
+
+#endif // FST_LIB_ARCSORT_H__
diff --git a/src/include/fst/bi-table.h b/src/include/fst/bi-table.h
new file mode 100644
index 0000000..dbb436c
--- /dev/null
+++ b/src/include/fst/bi-table.h
@@ -0,0 +1,396 @@
+// bi-table.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Classes for representing a bijective mapping between an arbitrary entry
+// of type T and a signed integral ID.
+
+#ifndef FST_LIB_BI_TABLE_H__
+#define FST_LIB_BI_TABLE_H__
+
+#include <deque>
+#include <vector>
+using std::vector;
+
+namespace fst {
+
+// BI TABLES - these determine a bijective mapping between an
+// arbitrary entry of type T and an signed integral ID of type I. The IDs are
+// allocated starting from 0 in order.
+//
+// template <class I, class T>
+// class BiTable {
+// public:
+//
+// // Required constructors.
+// BiTable();
+//
+// // Lookup integer ID from entry. If it doesn't exist, then add it.
+// I FindId(const T &entry);
+// // Lookup entry from integer ID.
+// const T &FindEntry(I) const;
+// // # of stored entries.
+// I Size() const;
+// };
+
+// An implementation using a hash map for the entry to ID mapping.
+// The entry T must have == defined and the default constructor
+// must produce an entry that will never be seen. H is the hash function.
+template <class I, class T, class H>
+class HashBiTable {
+ public:
+
+ HashBiTable() {
+ T empty_entry;
+ }
+
+ I FindId(const T &entry) {
+ I &id_ref = entry2id_[entry];
+ if (id_ref == 0) { // T not found; store and assign it a new ID.
+ id2entry_.push_back(entry);
+ id_ref = id2entry_.size();
+ }
+ return id_ref - 1; // NB: id_ref = ID + 1
+ }
+
+ const T &FindEntry(I s) const {
+ return id2entry_[s];
+ }
+
+ I Size() const { return id2entry_.size(); }
+
+ private:
+ unordered_map<T, I, H> entry2id_;
+ vector<T> id2entry_;
+
+ DISALLOW_COPY_AND_ASSIGN(HashBiTable);
+};
+
+
+// An implementation using a hash set for the entry to ID
+// mapping. The hash set holds 'keys' which are either the ID
+// or kCurrentKey. These keys can be mapped to entrys either by
+// looking up in the entry vector or, if kCurrentKey, in current_entry_
+// member. The hash and key equality functions map to entries first.
+// The entry T must have == defined and the default constructor
+// must produce a entry that will never be seen. H is the hash
+// function.
+template <class I, class T, class H>
+class CompactHashBiTable {
+ public:
+ friend class HashFunc;
+ friend class HashEqual;
+
+ CompactHashBiTable()
+ : hash_func_(*this),
+ hash_equal_(*this),
+ keys_(0, hash_func_, hash_equal_) {
+ }
+
+ // Reserves space for table_size elements.
+ explicit CompactHashBiTable(size_t table_size)
+ : hash_func_(*this),
+ hash_equal_(*this),
+ keys_(table_size, hash_func_, hash_equal_) {
+ id2entry_.reserve(table_size);
+ }
+
+ I FindId(const T &entry) {
+ current_entry_ = &entry;
+ typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey);
+ if (it == keys_.end()) {
+ I key = id2entry_.size();
+ id2entry_.push_back(entry);
+ keys_.insert(key);
+ return key;
+ } else {
+ return *it;
+ }
+ }
+
+ const T &FindEntry(I s) const { return id2entry_[s]; }
+ I Size() const { return id2entry_.size(); }
+
+ private:
+ static const I kEmptyKey; // -1
+ static const I kCurrentKey; // -2
+
+ class HashFunc {
+ public:
+ HashFunc(const CompactHashBiTable &ht) : ht_(&ht) {}
+
+ size_t operator()(I k) const { return hf(ht_->Key2T(k)); }
+ private:
+ const CompactHashBiTable *ht_;
+ H hf;
+ };
+
+ class HashEqual {
+ public:
+ HashEqual(const CompactHashBiTable &ht) : ht_(&ht) {}
+
+ bool operator()(I k1, I k2) const {
+ return ht_->Key2T(k1) == ht_->Key2T(k2);
+ }
+ private:
+ const CompactHashBiTable *ht_;
+ };
+
+ typedef unordered_set<I, HashFunc, HashEqual> KeyHashSet;
+
+ const T &Key2T(I k) const {
+ if (k == kEmptyKey)
+ return empty_entry_;
+ else if (k == kCurrentKey)
+ return *current_entry_;
+ else
+ return id2entry_[k];
+ }
+
+ HashFunc hash_func_;
+ HashEqual hash_equal_;
+ KeyHashSet keys_;
+ vector<T> id2entry_;
+ const T empty_entry_;
+ const T *current_entry_;
+
+ DISALLOW_COPY_AND_ASSIGN(CompactHashBiTable);
+};
+
+template <class I, class T, class H>
+const I CompactHashBiTable<I, T, H>::kEmptyKey = -1;
+
+template <class I, class T, class H>
+const I CompactHashBiTable<I, T, H>::kCurrentKey = -2;
+
+
+// An implementation using a vector for the entry to ID mapping.
+// It is passed a function object FP that should fingerprint entries
+// uniquely to an integer that can used as a vector index. Normally,
+// VectorBiTable constructs the FP object. The user can instead
+// pass in this object; in that case, VectorBiTable takes its
+// ownership.
+template <class I, class T, class FP>
+class VectorBiTable {
+ public:
+ explicit VectorBiTable(FP *fp = 0) : fp_(fp ? fp : new FP()) {}
+
+ ~VectorBiTable() { delete fp_; }
+
+ I FindId(const T &entry) {
+ ssize_t fp = (*fp_)(entry);
+ if (fp >= fp2id_.size())
+ fp2id_.resize(fp + 1);
+ I &id_ref = fp2id_[fp];
+ if (id_ref == 0) { // T not found; store and assign it a new ID.
+ id2entry_.push_back(entry);
+ id_ref = id2entry_.size();
+ }
+ return id_ref - 1; // NB: id_ref = ID + 1
+ }
+
+ const T &FindEntry(I s) const { return id2entry_[s]; }
+
+ I Size() const { return id2entry_.size(); }
+
+ const FP &Fingerprint() const { return *fp_; }
+
+ private:
+ FP *fp_;
+ vector<I> fp2id_;
+ vector<T> id2entry_;
+
+ DISALLOW_COPY_AND_ASSIGN(VectorBiTable);
+};
+
+
+// An implementation using a vector and a compact hash table. The
+// selecting functor S returns true for entries to be hashed in the
+// vector. The fingerprinting functor FP returns a unique fingerprint
+// for each entry to be hashed in the vector (these need to be
+// suitable for indexing in a vector). The hash functor H is used when
+// hashing entry into the compact hash table.
+template <class I, class T, class S, class FP, class H>
+class VectorHashBiTable {
+ public:
+ friend class HashFunc;
+ friend class HashEqual;
+
+ VectorHashBiTable(S *s, FP *fp, H *h,
+ size_t vector_size = 0,
+ size_t entry_size = 0)
+ : selector_(s),
+ fp_(fp),
+ h_(h),
+ hash_func_(*this),
+ hash_equal_(*this),
+ keys_(0, hash_func_, hash_equal_) {
+ if (vector_size)
+ fp2id_.reserve(vector_size);
+ if (entry_size)
+ id2entry_.reserve(entry_size);
+ }
+
+ ~VectorHashBiTable() {
+ delete selector_;
+ delete fp_;
+ delete h_;
+ }
+
+ I FindId(const T &entry) {
+ if ((*selector_)(entry)) { // Use the vector if 'selector_(entry) == true'
+ uint64 fp = (*fp_)(entry);
+ if (fp2id_.size() <= fp)
+ fp2id_.resize(fp + 1, 0);
+ if (fp2id_[fp] == 0) {
+ id2entry_.push_back(entry);
+ fp2id_[fp] = id2entry_.size();
+ }
+ return fp2id_[fp] - 1; // NB: assoc_value = ID + 1
+ } else { // Use the hash table otherwise.
+ current_entry_ = &entry;
+ typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey);
+ if (it == keys_.end()) {
+ I key = id2entry_.size();
+ id2entry_.push_back(entry);
+ keys_.insert(key);
+ return key;
+ } else {
+ return *it;
+ }
+ }
+ }
+
+ const T &FindEntry(I s) const {
+ return id2entry_[s];
+ }
+
+ I Size() const { return id2entry_.size(); }
+
+ const S &Selector() const { return *selector_; }
+
+ const FP &Fingerprint() const { return *fp_; }
+
+ const H &Hash() const { return *h_; }
+
+ private:
+ static const I kEmptyKey;
+ static const I kCurrentKey;
+
+ class HashFunc {
+ public:
+ HashFunc(const VectorHashBiTable &ht) : ht_(&ht) {}
+
+ size_t operator()(I k) const { return (*(ht_->h_))(ht_->Key2Entry(k)); }
+ private:
+ const VectorHashBiTable *ht_;
+ };
+
+ class HashEqual {
+ public:
+ HashEqual(const VectorHashBiTable &ht) : ht_(&ht) {}
+
+ bool operator()(I k1, I k2) const {
+ return ht_->Key2Entry(k1) == ht_->Key2Entry(k2);
+ }
+ private:
+ const VectorHashBiTable *ht_;
+ };
+
+ typedef unordered_set<I, HashFunc, HashEqual> KeyHashSet;
+
+ const T &Key2Entry(I k) const {
+ if (k == kEmptyKey)
+ return empty_entry_;
+ else if (k == kCurrentKey)
+ return *current_entry_;
+ else
+ return id2entry_[k];
+ }
+
+
+ S *selector_; // Returns true if entry hashed into vector
+ FP *fp_; // Fingerprint used when hashing entry into vector
+ H *h_; // Hash function used when hashing entry into hash_set
+
+ vector<T> id2entry_; // Maps state IDs to entry
+ vector<I> fp2id_; // Maps entry fingerprints to IDs
+
+ // Compact implementation of the hash table mapping entrys to
+ // state IDs using the hash function 'h_'
+ HashFunc hash_func_;
+ HashEqual hash_equal_;
+ KeyHashSet keys_;
+ const T empty_entry_;
+ const T *current_entry_;
+
+ DISALLOW_COPY_AND_ASSIGN(VectorHashBiTable);
+};
+
+template <class I, class T, class S, class FP, class H>
+const I VectorHashBiTable<I, T, S, FP, H>::kEmptyKey = -1;
+
+template <class I, class T, class S, class FP, class H>
+const I VectorHashBiTable<I, T, S, FP, H>::kCurrentKey = -2;
+
+
+// An implementation using a hash map for the entry to ID
+// mapping. This version permits erasing of s. The entry T
+// must have == defined and its default constructor must produce a
+// entry that will never be seen. F is the hash function.
+template <class I, class T, class F>
+class ErasableBiTable {
+ public:
+ ErasableBiTable() : first_(0) {}
+
+ I FindId(const T &entry) {
+ I &id_ref = entry2id_[entry];
+ if (id_ref == 0) { // T not found; store and assign it a new ID.
+ id2entry_.push_back(entry);
+ id_ref = id2entry_.size() + first_;
+ }
+ return id_ref - 1; // NB: id_ref = ID + 1
+ }
+
+ const T &FindEntry(I s) const { return id2entry_[s - first_]; }
+
+ I Size() const { return id2entry_.size(); }
+
+ void Erase(I s) {
+ T &entry = id2entry_[s - first_];
+ typename unordered_map<T, I, F>::iterator it =
+ entry2id_.find(entry);
+ entry2id_.erase(it);
+ id2entry_[s - first_] = empty_entry_;
+ while (!id2entry_.empty() && id2entry_.front() == empty_entry_) {
+ id2entry_.pop_front();
+ ++first_;
+ }
+ }
+
+ private:
+ unordered_map<T, I, F> entry2id_;
+ deque<T> id2entry_;
+ const T empty_entry_;
+ I first_; // I of first element in the deque;
+
+ DISALLOW_COPY_AND_ASSIGN(ErasableBiTable);
+};
+
+} // namespace fst
+
+#endif // FST_LIB_BI_TABLE_H__
diff --git a/src/include/fst/cache.h b/src/include/fst/cache.h
new file mode 100644
index 0000000..a6a92d4
--- /dev/null
+++ b/src/include/fst/cache.h
@@ -0,0 +1,738 @@
+// cache.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// An Fst implementation that caches FST elements of a delayed
+// computation.
+
+#ifndef FST_LIB_CACHE_H__
+#define FST_LIB_CACHE_H__
+
+#include <vector>
+using std::vector;
+#include <list>
+
+#include <fst/vector-fst.h>
+
+
+DECLARE_bool(fst_default_cache_gc);
+DECLARE_int64(fst_default_cache_gc_limit);
+
+namespace fst {
+
+struct CacheOptions {
+ bool gc; // enable GC
+ size_t gc_limit; // # of bytes allowed before GC
+
+ CacheOptions(bool g, size_t l) : gc(g), gc_limit(l) {}
+ CacheOptions()
+ : gc(FLAGS_fst_default_cache_gc),
+ gc_limit(FLAGS_fst_default_cache_gc_limit) {}
+};
+
+// A CacheStateAllocator allocates and frees CacheStates
+// template <class S>
+// struct CacheStateAllocator {
+// S *Allocate(StateId s);
+// void Free(S *state, StateId s);
+// };
+//
+
+// A simple allocator class, can be overridden as needed,
+// maintains a single entry cache.
+template <class S>
+struct DefaultCacheStateAllocator {
+ typedef typename S::Arc::StateId StateId;
+
+ DefaultCacheStateAllocator() : mru_(NULL) { }
+
+ ~DefaultCacheStateAllocator() {
+ delete mru_;
+ }
+
+ S *Allocate(StateId s) {
+ if (mru_) {
+ S *state = mru_;
+ mru_ = NULL;
+ state->Reset();
+ return state;
+ }
+ return new S();
+ }
+
+ void Free(S *state, StateId s) {
+ if (mru_) {
+ delete mru_;
+ }
+ mru_ = state;
+ }
+
+ private:
+ S *mru_;
+};
+
+// VectorState but additionally has a flags data member (see
+// CacheState below). This class is used to cache FST elements with
+// the flags used to indicate what has been cached. Use HasStart()
+// HasFinal(), and HasArcs() to determine if cached and SetStart(),
+// SetFinal(), AddArc(), (or PushArc() and SetArcs()) to cache. Note you
+// must set the final weight even if the state is non-final to mark it as
+// cached. If the 'gc' option is 'false', cached items have the extent
+// of the FST - minimizing computation. If the 'gc' option is 'true',
+// garbage collection of states (not in use in an arc iterator) is
+// performed, in a rough approximation of LRU order, when 'gc_limit'
+// bytes is reached - controlling memory use. When 'gc_limit' is 0,
+// special optimizations apply - minimizing memory use.
+
+template <class S, class C = DefaultCacheStateAllocator<S> >
+class CacheBaseImpl : public VectorFstBaseImpl<S> {
+ public:
+ typedef S State;
+ typedef C Allocator;
+ typedef typename State::Arc Arc;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ using FstImpl<Arc>::Type;
+ using FstImpl<Arc>::Properties;
+ using FstImpl<Arc>::SetProperties;
+ using VectorFstBaseImpl<State>::NumStates;
+ using VectorFstBaseImpl<State>::AddState;
+ using VectorFstBaseImpl<State>::SetState;
+
+ explicit CacheBaseImpl(C *allocator = 0)
+ : cache_start_(false), nknown_states_(0), min_unexpanded_state_id_(0),
+ cache_first_state_id_(kNoStateId), cache_first_state_(0),
+ cache_gc_(FLAGS_fst_default_cache_gc), cache_size_(0),
+ cache_limit_(FLAGS_fst_default_cache_gc_limit > kMinCacheLimit ||
+ FLAGS_fst_default_cache_gc_limit == 0 ?
+ FLAGS_fst_default_cache_gc_limit : kMinCacheLimit) {
+ allocator_ = allocator ? allocator : new C();
+ }
+
+ explicit CacheBaseImpl(const CacheOptions &opts, C *allocator = 0)
+ : cache_start_(false), nknown_states_(0),
+ min_unexpanded_state_id_(0), cache_first_state_id_(kNoStateId),
+ cache_first_state_(0), cache_gc_(opts.gc), cache_size_(0),
+ cache_limit_(opts.gc_limit > kMinCacheLimit || opts.gc_limit == 0 ?
+ opts.gc_limit : kMinCacheLimit) {
+ allocator_ = allocator ? allocator : new C();
+ }
+
+ // Preserve gc parameters, but initially cache nothing.
+ CacheBaseImpl(const CacheBaseImpl &impl)
+ : cache_start_(false), nknown_states_(0),
+ min_unexpanded_state_id_(0), cache_first_state_id_(kNoStateId),
+ cache_first_state_(0), cache_gc_(impl.cache_gc_), cache_size_(0),
+ cache_limit_(impl.cache_limit_) {
+ allocator_ = new C();
+ }
+
+ ~CacheBaseImpl() {
+ allocator_->Free(cache_first_state_, cache_first_state_id_);
+ delete allocator_;
+ }
+
+ // Gets a state from its ID; state must exist.
+ const S *GetState(StateId s) const {
+ if (s == cache_first_state_id_)
+ return cache_first_state_;
+ else
+ return VectorFstBaseImpl<S>::GetState(s);
+ }
+
+ // Gets a state from its ID; state must exist.
+ S *GetState(StateId s) {
+ if (s == cache_first_state_id_)
+ return cache_first_state_;
+ else
+ return VectorFstBaseImpl<S>::GetState(s);
+ }
+
+ // Gets a state from its ID; return 0 if it doesn't exist.
+ const S *CheckState(StateId s) const {
+ if (s == cache_first_state_id_)
+ return cache_first_state_;
+ else if (s < NumStates())
+ return VectorFstBaseImpl<S>::GetState(s);
+ else
+ return 0;
+ }
+
+ // Gets a state from its ID; add it if necessary.
+ S *ExtendState(StateId s) {
+ if (s == cache_first_state_id_) {
+ return cache_first_state_; // Return 1st cached state
+ } else if (cache_limit_ == 0 && cache_first_state_id_ == kNoStateId) {
+ cache_first_state_id_ = s; // Remember 1st cached state
+ cache_first_state_ = allocator_->Allocate(s);
+ return cache_first_state_;
+ } else if (cache_first_state_id_ != kNoStateId &&
+ cache_first_state_->ref_count == 0) {
+ // With Default allocator, the Free and Allocate will reuse the same S*.
+ allocator_->Free(cache_first_state_, cache_first_state_id_);
+ cache_first_state_id_ = s;
+ cache_first_state_ = allocator_->Allocate(s);
+ return cache_first_state_; // Return 1st cached state
+ } else {
+ while (NumStates() <= s) // Add state to main cache
+ AddState(0);
+ if (!VectorFstBaseImpl<S>::GetState(s)) {
+ SetState(s, allocator_->Allocate(s));
+ if (cache_first_state_id_ != kNoStateId) { // Forget 1st cached state
+ while (NumStates() <= cache_first_state_id_)
+ AddState(0);
+ SetState(cache_first_state_id_, cache_first_state_);
+ if (cache_gc_) {
+ cache_states_.push_back(cache_first_state_id_);
+ cache_size_ += sizeof(S) +
+ cache_first_state_->arcs.capacity() * sizeof(Arc);
+ }
+ cache_limit_ = kMinCacheLimit;
+ cache_first_state_id_ = kNoStateId;
+ cache_first_state_ = 0;
+ }
+ if (cache_gc_) {
+ cache_states_.push_back(s);
+ cache_size_ += sizeof(S);
+ if (cache_size_ > cache_limit_)
+ GC(s, false);
+ }
+ }
+ S *state = VectorFstBaseImpl<S>::GetState(s);
+ return state;
+ }
+ }
+
+ void SetStart(StateId s) {
+ VectorFstBaseImpl<S>::SetStart(s);
+ cache_start_ = true;
+ if (s >= nknown_states_)
+ nknown_states_ = s + 1;
+ }
+
+ void SetFinal(StateId s, Weight w) {
+ S *state = ExtendState(s);
+ state->final = w;
+ state->flags |= kCacheFinal | kCacheRecent | kCacheModified;
+ }
+
+ // AddArc adds a single arc to state s and does incremental cache
+ // book-keeping. For efficiency, prefer PushArc and SetArcs below
+ // when possible.
+ void AddArc(StateId s, const Arc &arc) {
+ S *state = ExtendState(s);
+ state->arcs.push_back(arc);
+ if (arc.ilabel == 0) {
+ ++state->niepsilons;
+ }
+ if (arc.olabel == 0) {
+ ++state->noepsilons;
+ }
+ const Arc *parc = state->arcs.empty() ? 0 : &(state->arcs.back());
+ SetProperties(AddArcProperties(Properties(), s, arc, parc));
+ state->flags |= kCacheModified;
+ if (cache_gc_ && s != cache_first_state_id_) {
+ cache_size_ += sizeof(Arc);
+ if (cache_size_ > cache_limit_)
+ GC(s, false);
+ }
+ }
+
+ // Adds a single arc to state s but delays cache book-keeping.
+ // SetArcs must be called when all PushArc calls at a state are
+ // complete. Do not mix with calls to AddArc.
+ void PushArc(StateId s, const Arc &arc) {
+ S *state = ExtendState(s);
+ state->arcs.push_back(arc);
+ }
+
+ // Marks arcs of state s as cached and does cache book-keeping after all
+ // calls to PushArc have been completed. Do not mix with calls to AddArc.
+ void SetArcs(StateId s) {
+ S *state = ExtendState(s);
+ vector<Arc> &arcs = state->arcs;
+ state->niepsilons = state->noepsilons = 0;
+ for (size_t a = 0; a < arcs.size(); ++a) {
+ const Arc &arc = arcs[a];
+ if (arc.nextstate >= nknown_states_)
+ nknown_states_ = arc.nextstate + 1;
+ if (arc.ilabel == 0)
+ ++state->niepsilons;
+ if (arc.olabel == 0)
+ ++state->noepsilons;
+ }
+ ExpandedState(s);
+ state->flags |= kCacheArcs | kCacheRecent | kCacheModified;
+ if (cache_gc_ && s != cache_first_state_id_) {
+ cache_size_ += arcs.capacity() * sizeof(Arc);
+ if (cache_size_ > cache_limit_)
+ GC(s, false);
+ }
+ };
+
+ void ReserveArcs(StateId s, size_t n) {
+ S *state = ExtendState(s);
+ state->arcs.reserve(n);
+ }
+
+ void DeleteArcs(StateId s, size_t n) {
+ S *state = ExtendState(s);
+ const vector<Arc> &arcs = GetState(s)->arcs;
+ for (size_t i = 0; i < n; ++i) {
+ size_t j = arcs.size() - i - 1;
+ if (arcs[j].ilabel == 0)
+ --GetState(s)->niepsilons;
+ if (arcs[j].olabel == 0)
+ --GetState(s)->noepsilons;
+ }
+ state->arcs.resize(arcs.size() - n);
+ SetProperties(DeleteArcsProperties(Properties()));
+ state->flags |= kCacheModified;
+ }
+
+ void DeleteArcs(StateId s) {
+ S *state = ExtendState(s);
+ state->niepsilons = 0;
+ state->noepsilons = 0;
+ state->arcs.clear();
+ SetProperties(DeleteArcsProperties(Properties()));
+ state->flags |= kCacheModified;
+ }
+
+ // Is the start state cached?
+ bool HasStart() const {
+ if (!cache_start_ && Properties(kError))
+ cache_start_ = true;
+ return cache_start_;
+ }
+
+ // Is the final weight of state s cached?
+ bool HasFinal(StateId s) const {
+ const S *state = CheckState(s);
+ if (state && state->flags & kCacheFinal) {
+ state->flags |= kCacheRecent;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ // Are arcs of state s cached?
+ bool HasArcs(StateId s) const {
+ const S *state = CheckState(s);
+ if (state && state->flags & kCacheArcs) {
+ state->flags |= kCacheRecent;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ Weight Final(StateId s) const {
+ const S *state = GetState(s);
+ return state->final;
+ }
+
+ size_t NumArcs(StateId s) const {
+ const S *state = GetState(s);
+ return state->arcs.size();
+ }
+
+ size_t NumInputEpsilons(StateId s) const {
+ const S *state = GetState(s);
+ return state->niepsilons;
+ }
+
+ size_t NumOutputEpsilons(StateId s) const {
+ const S *state = GetState(s);
+ return state->noepsilons;
+ }
+
+ // Provides information needed for generic arc iterator.
+ void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
+ const S *state = GetState(s);
+ data->base = 0;
+ data->narcs = state->arcs.size();
+ data->arcs = data->narcs > 0 ? &(state->arcs[0]) : 0;
+ data->ref_count = &(state->ref_count);
+ ++(*data->ref_count);
+ }
+
+ // Number of known states.
+ StateId NumKnownStates() const { return nknown_states_; }
+
+ // Update number of known states taking in account the existence of state s.
+ void UpdateNumKnownStates(StateId s) {
+ if (s >= nknown_states_)
+ nknown_states_ = s + 1;
+ }
+
+ // Find the mininum never-expanded state Id
+ StateId MinUnexpandedState() const {
+ while (min_unexpanded_state_id_ < expanded_states_.size() &&
+ expanded_states_[min_unexpanded_state_id_])
+ ++min_unexpanded_state_id_;
+ return min_unexpanded_state_id_;
+ }
+
+ // Removes from cache_states_ and uncaches (not referenced-counted)
+ // states that have not been accessed since the last GC until
+ // cache_limit_/3 bytes are uncached. If that fails to free enough,
+ // recurs uncaching recently visited states as well. If still
+ // unable to free enough memory, then widens cache_limit_.
+ void GC(StateId current, bool free_recent) {
+ if (!cache_gc_)
+ return;
+ VLOG(2) << "CacheImpl: Enter GC: object = " << Type() << "(" << this
+ << "), free recently cached = " << free_recent
+ << ", cache size = " << cache_size_
+ << ", cache limit = " << cache_limit_ << "\n";
+ typename list<StateId>::iterator siter = cache_states_.begin();
+
+ size_t cache_target = (2 * cache_limit_)/3 + 1;
+ while (siter != cache_states_.end()) {
+ StateId s = *siter;
+ S* state = VectorFstBaseImpl<S>::GetState(s);
+ if (cache_size_ > cache_target && state->ref_count == 0 &&
+ (free_recent || !(state->flags & kCacheRecent)) && s != current) {
+ cache_size_ -= sizeof(S) + state->arcs.capacity() * sizeof(Arc);
+ allocator_->Free(state, s);
+ SetState(s, 0);
+ cache_states_.erase(siter++);
+ } else {
+ state->flags &= ~kCacheRecent;
+ ++siter;
+ }
+ }
+ if (!free_recent && cache_size_ > cache_target) {
+ GC(current, true);
+ } else {
+ while (cache_size_ > cache_target) {
+ cache_limit_ *= 2;
+ cache_target *= 2;
+ }
+ }
+ VLOG(2) << "CacheImpl: Exit GC: object = " << Type() << "(" << this
+ << "), free recently cached = " << free_recent
+ << ", cache size = " << cache_size_
+ << ", cache limit = " << cache_limit_ << "\n";
+ }
+
+ void ExpandedState(StateId s) {
+ if (s < min_unexpanded_state_id_)
+ return;
+ while (expanded_states_.size() <= s)
+ expanded_states_.push_back(false);
+ expanded_states_[s] = true;
+ }
+
+ // Caching on/off switch, limit and size accessors.
+ bool GetCacheGc() const { return cache_gc_; }
+ size_t GetCacheLimit() const { return cache_limit_; }
+ size_t GetCacheSize() const { return cache_size_; }
+
+ private:
+ static const size_t kMinCacheLimit = 8096; // Minimum (non-zero) cache limit
+ static const uint32 kCacheFinal = 0x0001; // Final weight has been cached
+ static const uint32 kCacheArcs = 0x0002; // Arcs have been cached
+ static const uint32 kCacheRecent = 0x0004; // Mark as visited since GC
+
+ public:
+ static const uint32 kCacheModified = 0x0008; // Mark state as modified
+ static const uint32 kCacheFlags = kCacheFinal | kCacheArcs | kCacheRecent
+ | kCacheModified;
+
+ protected:
+ C *allocator_; // used to allocate new states
+
+ private:
+ mutable bool cache_start_; // Is the start state cached?
+ StateId nknown_states_; // # of known states
+ vector<bool> expanded_states_; // states that have been expanded
+ mutable StateId min_unexpanded_state_id_; // minimum never-expanded state Id
+ StateId cache_first_state_id_; // First cached state id
+ S *cache_first_state_; // First cached state
+ list<StateId> cache_states_; // list of currently cached states
+ bool cache_gc_; // enable GC
+ size_t cache_size_; // # of bytes cached
+ size_t cache_limit_; // # of bytes allowed before GC
+
+ void operator=(const CacheBaseImpl<S> &impl); // disallow
+};
+
+template <class S, class C> const uint32 CacheBaseImpl<S, C>::kCacheFinal;
+template <class S, class C> const uint32 CacheBaseImpl<S, C>::kCacheArcs;
+template <class S, class C> const uint32 CacheBaseImpl<S, C>::kCacheRecent;
+template <class S, class C> const uint32 CacheBaseImpl<S, C>::kCacheModified;
+template <class S, class C> const size_t CacheBaseImpl<S, C>::kMinCacheLimit;
+
+// Arcs implemented by an STL vector per state. Similar to VectorState
+// but adds flags and ref count to keep track of what has been cached.
+template <class A>
+struct CacheState {
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ CacheState() : final(Weight::Zero()), flags(0), ref_count(0) {}
+
+ void Reset() {
+ flags = 0;
+ ref_count = 0;
+ arcs.resize(0);
+ }
+
+ Weight final; // Final weight
+ vector<A> arcs; // Arcs represenation
+ size_t niepsilons; // # of input epsilons
+ size_t noepsilons; // # of output epsilons
+ mutable uint32 flags;
+ mutable int ref_count;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(CacheState);
+};
+
+// A CacheBaseImpl with a commonly used CacheState.
+template <class A>
+class CacheImpl : public CacheBaseImpl< CacheState<A> > {
+ public:
+ typedef CacheState<A> State;
+
+ CacheImpl() {}
+
+ explicit CacheImpl(const CacheOptions &opts)
+ : CacheBaseImpl< CacheState<A> >(opts) {}
+
+ CacheImpl(const CacheImpl<State> &impl) : CacheBaseImpl<State>(impl) {}
+
+ private:
+ void operator=(const CacheImpl<State> &impl); // disallow
+};
+
+
+// Use this to make a state iterator for a CacheBaseImpl-derived Fst,
+// which must have type 'State' defined. Note this iterator only
+// returns those states reachable from the initial state, so consider
+// implementing a class-specific one.
+template <class F>
+class CacheStateIterator : public StateIteratorBase<typename F::Arc> {
+ public:
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename F::State State;
+ typedef CacheBaseImpl<State> Impl;
+
+ CacheStateIterator(const F &fst, Impl *impl)
+ : fst_(fst), impl_(impl), s_(0) {}
+
+ bool Done() const {
+ if (s_ < impl_->NumKnownStates())
+ return false;
+ fst_.Start(); // force start state
+ if (s_ < impl_->NumKnownStates())
+ return false;
+ for (StateId u = impl_->MinUnexpandedState();
+ u < impl_->NumKnownStates();
+ u = impl_->MinUnexpandedState()) {
+ // force state expansion
+ ArcIterator<F> aiter(fst_, u);
+ aiter.SetFlags(kArcValueFlags, kArcValueFlags | kArcNoCache);
+ for (; !aiter.Done(); aiter.Next())
+ impl_->UpdateNumKnownStates(aiter.Value().nextstate);
+ impl_->ExpandedState(u);
+ if (s_ < impl_->NumKnownStates())
+ return false;
+ }
+ return true;
+ }
+
+ StateId Value() const { return s_; }
+
+ void Next() { ++s_; }
+
+ void Reset() { s_ = 0; }
+
+ private:
+ // This allows base class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual bool Done_() const { return Done(); }
+ virtual StateId Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+ virtual void Reset_() { Reset(); }
+
+ const F &fst_;
+ Impl *impl_;
+ StateId s_;
+};
+
+
+// Use this to make an arc iterator for a CacheBaseImpl-derived Fst,
+// which must have types 'Arc' and 'State' defined.
+template <class F,
+ class C = DefaultCacheStateAllocator<CacheState<typename F::Arc> > >
+class CacheArcIterator {
+ public:
+ typedef typename F::Arc Arc;
+ typedef typename F::State State;
+ typedef typename Arc::StateId StateId;
+ typedef CacheBaseImpl<State, C> Impl;
+
+ CacheArcIterator(Impl *impl, StateId s) : i_(0) {
+ state_ = impl->ExtendState(s);
+ ++state_->ref_count;
+ }
+
+ ~CacheArcIterator() { --state_->ref_count; }
+
+ bool Done() const { return i_ >= state_->arcs.size(); }
+
+ const Arc& Value() const { return state_->arcs[i_]; }
+
+ void Next() { ++i_; }
+
+ size_t Position() const { return i_; }
+
+ void Reset() { i_ = 0; }
+
+ void Seek(size_t a) { i_ = a; }
+
+ uint32 Flags() const {
+ return kArcValueFlags;
+ }
+
+ void SetFlags(uint32 flags, uint32 mask) {}
+
+ private:
+ const State *state_;
+ size_t i_;
+
+ DISALLOW_COPY_AND_ASSIGN(CacheArcIterator);
+};
+
+// Use this to make a mutable arc iterator for a CacheBaseImpl-derived Fst,
+// which must have types 'Arc' and 'State' defined.
+template <class F,
+ class C = DefaultCacheStateAllocator<CacheState<typename F::Arc> > >
+class CacheMutableArcIterator
+ : public MutableArcIteratorBase<typename F::Arc> {
+ public:
+ typedef typename F::State State;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef CacheBaseImpl<State, C> Impl;
+
+ // You will need to call MutateCheck() in the constructor.
+ CacheMutableArcIterator(Impl *impl, StateId s) : i_(0), s_(s), impl_(impl) {
+ state_ = impl_->ExtendState(s_);
+ ++state_->ref_count;
+ };
+
+ ~CacheMutableArcIterator() {
+ --state_->ref_count;
+ }
+
+ bool Done() const { return i_ >= state_->arcs.size(); }
+
+ const Arc& Value() const { return state_->arcs[i_]; }
+
+ void Next() { ++i_; }
+
+ size_t Position() const { return i_; }
+
+ void Reset() { i_ = 0; }
+
+ void Seek(size_t a) { i_ = a; }
+
+ void SetValue(const Arc& arc) {
+ state_->flags |= CacheBaseImpl<State, C>::kCacheModified;
+ uint64 properties = impl_->Properties();
+ Arc& oarc = state_->arcs[i_];
+ if (oarc.ilabel != oarc.olabel)
+ properties &= ~kNotAcceptor;
+ if (oarc.ilabel == 0) {
+ --state_->niepsilons;
+ properties &= ~kIEpsilons;
+ if (oarc.olabel == 0)
+ properties &= ~kEpsilons;
+ }
+ if (oarc.olabel == 0) {
+ --state_->noepsilons;
+ properties &= ~kOEpsilons;
+ }
+ if (oarc.weight != Weight::Zero() && oarc.weight != Weight::One())
+ properties &= ~kWeighted;
+ oarc = arc;
+ if (arc.ilabel != arc.olabel) {
+ properties |= kNotAcceptor;
+ properties &= ~kAcceptor;
+ }
+ if (arc.ilabel == 0) {
+ ++state_->niepsilons;
+ properties |= kIEpsilons;
+ properties &= ~kNoIEpsilons;
+ if (arc.olabel == 0) {
+ properties |= kEpsilons;
+ properties &= ~kNoEpsilons;
+ }
+ }
+ if (arc.olabel == 0) {
+ ++state_->noepsilons;
+ properties |= kOEpsilons;
+ properties &= ~kNoOEpsilons;
+ }
+ if (arc.weight != Weight::Zero() && arc.weight != Weight::One()) {
+ properties |= kWeighted;
+ properties &= ~kUnweighted;
+ }
+ properties &= kSetArcProperties | kAcceptor | kNotAcceptor |
+ kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons |
+ kOEpsilons | kNoOEpsilons | kWeighted | kUnweighted;
+ impl_->SetProperties(properties);
+ }
+
+ uint32 Flags() const {
+ return kArcValueFlags;
+ }
+
+ void SetFlags(uint32 f, uint32 m) {}
+
+ private:
+ virtual bool Done_() const { return Done(); }
+ virtual const Arc& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+ virtual size_t Position_() const { return Position(); }
+ virtual void Reset_() { Reset(); }
+ virtual void Seek_(size_t a) { Seek(a); }
+ virtual void SetValue_(const Arc &a) { SetValue(a); }
+ uint32 Flags_() const { return Flags(); }
+ void SetFlags_(uint32 f, uint32 m) { SetFlags(f, m); }
+
+ size_t i_;
+ StateId s_;
+ Impl *impl_;
+ State *state_;
+
+ DISALLOW_COPY_AND_ASSIGN(CacheMutableArcIterator);
+};
+
+} // namespace fst
+
+#endif // FST_LIB_CACHE_H__
diff --git a/src/include/fst/closure.h b/src/include/fst/closure.h
new file mode 100644
index 0000000..541562b
--- /dev/null
+++ b/src/include/fst/closure.h
@@ -0,0 +1,155 @@
+// closure.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Functions and classes to compute the concatenative closure of an Fst.
+
+#ifndef FST_LIB_CLOSURE_H__
+#define FST_LIB_CLOSURE_H__
+
+#include <vector>
+using std::vector;
+#include <algorithm>
+
+#include <fst/mutable-fst.h>
+#include <fst/rational.h>
+
+
+namespace fst {
+
+// Computes the concatenative closure. This version modifies its
+// MutableFst input. If FST transduces string x to y with weight a,
+// then the closure transduces x to y with weight a, xx to yy with
+// weight Times(a, a), xxx to yyy with with Times(Times(a, a), a),
+// etc. If closure_type == CLOSURE_STAR, then the empty string is
+// transduced to itself with weight Weight::One() as well.
+//
+// Complexity:
+// - Time: O(V)
+// - Space: O(V)
+// where V = # of states.
+template<class Arc>
+void Closure(MutableFst<Arc> *fst, ClosureType closure_type) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ uint64 props = fst->Properties(kFstProperties, false);
+ StateId start = fst->Start();
+ for (StateIterator< MutableFst<Arc> > siter(*fst);
+ !siter.Done();
+ siter.Next()) {
+ StateId s = siter.Value();
+ Weight final = fst->Final(s);
+ if (final != Weight::Zero())
+ fst->AddArc(s, Arc(0, 0, final, start));
+ }
+ if (closure_type == CLOSURE_STAR) {
+ fst->ReserveStates(fst->NumStates() + 1);
+ StateId nstart = fst->AddState();
+ fst->SetStart(nstart);
+ fst->SetFinal(nstart, Weight::One());
+ if (start != kNoLabel)
+ fst->AddArc(nstart, Arc(0, 0, Weight::One(), start));
+ }
+ fst->SetProperties(ClosureProperties(props, closure_type == CLOSURE_STAR),
+ kFstProperties);
+}
+
+// Computes the concatenative closure. This version modifies its
+// RationalFst input.
+template<class Arc>
+void Closure(RationalFst<Arc> *fst, ClosureType closure_type) {
+ fst->GetImpl()->AddClosure(closure_type);
+}
+
+
+struct ClosureFstOptions : RationalFstOptions {
+ ClosureType type;
+
+ ClosureFstOptions(const RationalFstOptions &opts, ClosureType t)
+ : RationalFstOptions(opts), type(t) {}
+ explicit ClosureFstOptions(ClosureType t) : type(t) {}
+ ClosureFstOptions() : type(CLOSURE_STAR) {}
+};
+
+
+// Computes the concatenative closure. This version is a delayed
+// Fst. If FST transduces string x to y with weight a, then the
+// closure transduces x to y with weight a, xx to yy with weight
+// Times(a, a), xxx to yyy with weight Times(Times(a, a), a), etc. If
+// closure_type == CLOSURE_STAR, then The empty string is transduced
+// to itself with weight Weight::One() as well.
+//
+// Complexity:
+// - Time: O(v)
+// - Space: O(v)
+// where v = # of states visited. Constant time and space to visit an
+// input state or arc is assumed and exclusive of caching.
+template <class A>
+class ClosureFst : public RationalFst<A> {
+ public:
+ using ImplToFst< RationalFstImpl<A> >::GetImpl;
+
+ typedef A Arc;
+
+ ClosureFst(const Fst<A> &fst, ClosureType closure_type) {
+ GetImpl()->InitClosure(fst, closure_type);
+ }
+
+ ClosureFst(const Fst<A> &fst, const ClosureFstOptions &opts)
+ : RationalFst<A>(opts) {
+ GetImpl()->InitClosure(fst, opts.type);
+ }
+
+ // See Fst<>::Copy() for doc.
+ ClosureFst(const ClosureFst<A> &fst, bool safe = false)
+ : RationalFst<A>(fst, safe) {}
+
+ // Get a copy of this ClosureFst. See Fst<>::Copy() for further doc.
+ virtual ClosureFst<A> *Copy(bool safe = false) const {
+ return new ClosureFst<A>(*this, safe);
+ }
+};
+
+
+// Specialization for ClosureFst.
+template <class A>
+class StateIterator< ClosureFst<A> > : public StateIterator< RationalFst<A> > {
+ public:
+ explicit StateIterator(const ClosureFst<A> &fst)
+ : StateIterator< RationalFst<A> >(fst) {}
+};
+
+
+// Specialization for ClosureFst.
+template <class A>
+class ArcIterator< ClosureFst<A> > : public ArcIterator< RationalFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const ClosureFst<A> &fst, StateId s)
+ : ArcIterator< RationalFst<A> >(fst, s) {}
+};
+
+
+// Useful alias when using StdArc.
+typedef ClosureFst<StdArc> StdClosureFst;
+
+} // namespace fst
+
+#endif // FST_LIB_CLOSURE_H__
diff --git a/src/include/fst/compact-fst.h b/src/include/fst/compact-fst.h
new file mode 100644
index 0000000..efa567a
--- /dev/null
+++ b/src/include/fst/compact-fst.h
@@ -0,0 +1,1307 @@
+// compact-fst.h
+
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// FST Class for memory-efficient representation of common types of
+// FSTs: linear automata, acceptors, unweighted FSTs, ...
+
+#ifndef FST_LIB_COMPACT_FST_H__
+#define FST_LIB_COMPACT_FST_H__
+
+#include <iterator>
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+#include <fst/expanded-fst.h>
+#include <fst/fst-decl.h> // For optional argument declarations
+#include <fst/matcher.h>
+#include <fst/test-properties.h>
+#include <fst/util.h>
+
+
+namespace fst {
+
+struct CompactFstOptions : public CacheOptions {
+ // CompactFst default caching behaviour is to do no caching. Most
+ // compactors are cheap and therefore we save memory by not doing
+ // caching.
+ CompactFstOptions() : CacheOptions(true, 0) {}
+ CompactFstOptions(const CacheOptions &opts) : CacheOptions(opts) {}
+};
+
+// Compactor Interface - class determinies how arcs and final weights
+// are compacted and expanded.
+//
+// Final weights are treated as transitions to the superfinal state,
+// i.e. ilabel = olabel = kNoLabel and nextstate = kNoStateId.
+//
+// There are two types of compactors:
+//
+// * Fixed out-degree compactors: 'compactor.Size()' returns a
+// positive integer 's'. An FST can be compacted by this compactor
+// only if each state has exactly 's' outgoing transitions (counting a
+// non-Zero() final weight as a transition). A typical example is a
+// compactor for string FSTs, i.e. 's == 1'.
+//
+// * Variable out-degree compactors: 'compactor.Size() == -1'. There
+// are no out-degree restrictions for these compactors.
+//
+//
+// class Compactor {
+// public:
+// // Element is the type of the compacted transitions.
+// typedef ... Element;
+// // Return the compacted representation of a transition 'arc'
+// // at a state 's'.
+// Element Compact(StateId s, const Arc &arc);
+// // Return the transition at state 's' represented by the compacted
+// // transition 'e'.
+// Arc Expand(StateId s, const Element &e);
+// // Return -1 for variable out-degree compactors, and the mandatory
+// // out-degree otherwise.
+// ssize_t Size();
+// // Test whether 'fst' can be compacted by this compactor.
+// bool Compatible(const Fst<A> &fst);
+// // Return the properties that are always true for an fst
+// // compacted using this compactor
+// uint64 Properties();
+// // Return a string identifying the type of compactor.
+// static const string &Type();
+// // Write a compactor to a file.
+// bool Write(ostream &strm);
+// // Read a compactor from a file.
+// static Compactor *Read(istream &strm);
+// // Default constructor (optional, see comment below).
+// Compactor();
+// };
+//
+// The default constructor is only required for FST_REGISTER to work
+// (i.e. enabling Convert() and the command-line utilities to work
+// with this new compactor). However, a default constructor always
+// needs to be specify for this code to compile, but one can have it
+// simply raised an error when called:
+//
+// Compactor::Compactor() {
+// FSTERROR() << "Compactor: no default constructor";
+// }
+
+
+// Implementation data for Compact Fst, which can shared between otherwise
+// independent copies.
+//
+// The implementation contains two arrays: 'states_' and 'compacts_'.
+//
+// For fixed out-degree compactors, the 'states_' array is unallocated.
+// The 'compacts_' contains the compacted transitions. Its size is
+// 'ncompacts_'. The outgoing transitions at a given state are stored
+// consecutively. For a given state 's', its 'compactor.Size()' outgoing
+// transitions (including superfinal transition when 's' is final), are
+// stored in position ['s*compactor.Size()', '(s+1)*compactor_.Size()').
+//
+// For variable out-degree compactors, the states_ array has size
+// 'nstates_ + 1' and contains pointers to positions into 'compacts_'.
+// For a given state 's', the compacted transitions of 's' are
+// stored in positions [ 'states_[s]', 'states_[s + 1]' ) in 'compacts_'.
+// By convention, 'states_[nstates_] == ncompacts_'.
+//
+// In both cases, the superfinal transitons (when 's' is final, i.e.
+// 'Final(s) != Weight::Zero()') is stored first.
+//
+// The unsigned type U is used to represent indices into the compacts_
+// array.
+template <class E, class U>
+class CompactFstData {
+ public:
+ typedef E CompactElement;
+ typedef U Unsigned;
+
+ CompactFstData()
+ : states_(0),
+ compacts_(0),
+ nstates_(0),
+ ncompacts_(0),
+ narcs_(0),
+ start_(kNoStateId),
+ error_(false) {}
+
+ template <class A, class Compactor>
+ CompactFstData(const Fst<A> &fst, const Compactor &compactor);
+
+ template <class Iterator, class Compactor>
+ CompactFstData(const Iterator &begin, const Iterator &end,
+ const Compactor &compactor);
+
+ ~CompactFstData() {
+ delete[] states_;
+ delete[] compacts_;
+ }
+
+ template <class Compactor>
+ static CompactFstData<E, U> *Read(istream &strm,
+ const FstReadOptions &opts,
+ const FstHeader &hdr,
+ const Compactor &compactor);
+
+ bool Write(ostream &strm, const FstWriteOptions &opts) const;
+
+ Unsigned States(ssize_t i) const { return states_[i]; }
+ const CompactElement &Compacts(size_t i) const { return compacts_[i]; }
+ size_t NumStates() const { return nstates_; }
+ size_t NumCompacts() const { return ncompacts_; }
+ size_t NumArcs() const { return narcs_; }
+ ssize_t Start() const { return start_; }
+
+ int RefCount() const { return ref_count_.count(); }
+ int IncrRefCount() { return ref_count_.Incr(); }
+ int DecrRefCount() { return ref_count_.Decr(); }
+
+ bool Error() const { return error_; }
+
+ private:
+ // Byte alignment for states and arcs in file format (version 1 only)
+ static const int kFileAlign = 16;
+
+ Unsigned *states_;
+ CompactElement *compacts_;
+ size_t nstates_;
+ size_t ncompacts_;
+ size_t narcs_;
+ ssize_t start_;
+ RefCounter ref_count_;
+ bool error_;
+};
+
+template <class E, class U>
+const int CompactFstData<E, U>::kFileAlign;
+
+
+template <class E, class U>
+template <class A, class C>
+CompactFstData<E, U>::CompactFstData(const Fst<A> &fst, const C &compactor)
+ : states_(0),
+ compacts_(0),
+ nstates_(0),
+ ncompacts_(0),
+ narcs_(0),
+ start_(kNoStateId),
+ error_(false) {
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+ start_ = fst.Start();
+ // Count # of states and arcs.
+ StateId nfinals = 0;
+ for (StateIterator< Fst<A> > siter(fst);
+ !siter.Done();
+ siter.Next()) {
+ ++nstates_;
+ StateId s = siter.Value();
+ for (ArcIterator< Fst<A> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next())
+ ++narcs_;
+ if (fst.Final(s) != Weight::Zero()) ++nfinals;
+ }
+ if (compactor.Size() == -1) {
+ states_ = new Unsigned[nstates_ + 1];
+ ncompacts_ = narcs_ + nfinals;
+ compacts_ = new CompactElement[ncompacts_];
+ states_[nstates_] = ncompacts_;
+ } else {
+ states_ = 0;
+ ncompacts_ = nstates_ * compactor.Size();
+ if ((narcs_ + nfinals) != ncompacts_) {
+ FSTERROR() << "CompactFstData: compactor incompatible with fst";
+ error_ = true;
+ return;
+ }
+ compacts_ = new CompactElement[ncompacts_];
+ }
+ size_t pos = 0, fpos = 0;
+ for (StateId s = 0; s < nstates_; ++s) {
+ fpos = pos;
+ if (compactor.Size() == -1)
+ states_[s] = pos;
+ if (fst.Final(s) != Weight::Zero())
+ compacts_[pos++] = compactor.Compact(s, A(kNoLabel, kNoLabel,
+ fst.Final(s), kNoStateId));
+ for (ArcIterator< Fst<A> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next()) {
+ compacts_[pos++] = compactor.Compact(s, aiter.Value());
+ }
+ if ((compactor.Size() != -1) && ((pos - fpos) != compactor.Size())) {
+ FSTERROR() << "CompactFstData: compactor incompatible with fst";
+ error_ = true;
+ return;
+ }
+ }
+ if (pos != ncompacts_) {
+ FSTERROR() << "CompactFstData: compactor incompatible with fst";
+ error_ = true;
+ return;
+ }
+}
+
+template <class E, class U>
+template <class Iterator, class C>
+CompactFstData<E, U>::CompactFstData(const Iterator &begin,
+ const Iterator &end,
+ const C &compactor)
+ : states_(0),
+ compacts_(0),
+ nstates_(0),
+ ncompacts_(0),
+ narcs_(0),
+ start_(kNoStateId),
+ error_(false) {
+ typedef typename C::Arc Arc;
+ typedef typename Arc::Weight Weight;
+ if (compactor.Size() != -1) {
+ ncompacts_ = distance(begin, end);
+ if (compactor.Size() == 1) {
+ // For strings, allow implicit final weight.
+ // Empty input is the empty string.
+ if (ncompacts_ == 0) {
+ ++ncompacts_;
+ } else {
+ Arc arc = compactor.Expand(ncompacts_ - 1,
+ *(begin + (ncompacts_ - 1)));
+ if (arc.ilabel != kNoLabel)
+ ++ncompacts_;
+ }
+ }
+ if (ncompacts_ % compactor.Size()) {
+ FSTERROR() << "CompactFstData: size of input container incompatible"
+ << " with compactor";
+ error_ = true;
+ return;
+ }
+ if (ncompacts_ == 0)
+ return;
+ start_ = 0;
+ nstates_ = ncompacts_ / compactor.Size();
+ compacts_ = new CompactElement[ncompacts_];
+ size_t i = 0;
+ Iterator it = begin;
+ for(; it != end; ++it, ++i){
+ compacts_[i] = *it;
+ if (compactor.Expand(i, *it).ilabel != kNoLabel)
+ ++narcs_;
+ }
+ if (i < ncompacts_)
+ compacts_[i] = compactor.Compact(i, Arc(kNoLabel, kNoLabel,
+ Weight::One(), kNoStateId));
+ } else {
+ if (distance(begin, end) == 0)
+ return;
+ // Count # of states, arcs and compacts.
+ Iterator it = begin;
+ for(size_t i = 0; it != end; ++it, ++i) {
+ Arc arc = compactor.Expand(i, *it);
+ if (arc.ilabel != kNoLabel) {
+ ++narcs_;
+ ++ncompacts_;
+ } else {
+ ++nstates_;
+ if (arc.weight != Weight::Zero())
+ ++ncompacts_;
+ }
+ }
+ start_ = 0;
+ compacts_ = new CompactElement[ncompacts_];
+ states_ = new Unsigned[nstates_ + 1];
+ states_[nstates_] = ncompacts_;
+ size_t i = 0, s = 0;
+ for(it = begin; it != end; ++it) {
+ Arc arc = compactor.Expand(i, *it);
+ if (arc.ilabel != kNoLabel) {
+ compacts_[i++] = *it;
+ } else {
+ states_[s++] = i;
+ if (arc.weight != Weight::Zero())
+ compacts_[i++] = *it;
+ }
+ }
+ if ((s != nstates_) || (i != ncompacts_)) {
+ FSTERROR() << "CompactFstData: ill-formed input container";
+ error_ = true;
+ return;
+ }
+ }
+}
+
+template <class E, class U>
+template <class C>
+CompactFstData<E, U> *CompactFstData<E, U>::Read(
+ istream &strm,
+ const FstReadOptions &opts,
+ const FstHeader &hdr,
+ const C &compactor) {
+ CompactFstData<E, U> *data = new CompactFstData<E, U>();
+ data->start_ = hdr.Start();
+ data->nstates_ = hdr.NumStates();
+ data->narcs_ = hdr.NumArcs();
+
+ if (compactor.Size() == -1) {
+ data->states_ = new Unsigned[data->nstates_ + 1];
+ if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) &&
+ !AlignInput(strm, kFileAlign)) {
+ LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source;
+ delete data;
+ return 0;
+ }
+ // TODO: memory map this
+ size_t b = (data->nstates_ + 1) * sizeof(Unsigned);
+ strm.read(reinterpret_cast<char *>(data->states_), b);
+ if (!strm) {
+ LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source;
+ delete data;
+ return 0;
+ }
+ } else {
+ data->states_ = 0;
+ }
+ data->ncompacts_ = compactor.Size() == -1
+ ? data->states_[data->nstates_]
+ : data->nstates_ * compactor.Size();
+ data->compacts_ = new CompactElement[data->ncompacts_];
+ // TODO: memory map this
+ size_t b = data->ncompacts_ * sizeof(CompactElement);
+ if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) &&
+ !AlignInput(strm, kFileAlign)) {
+ LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source;
+ delete data;
+ return 0;
+ }
+ strm.read(reinterpret_cast<char *>(data->compacts_), b);
+ if (!strm) {
+ LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source;
+ delete data;
+ return 0;
+ }
+ return data;
+}
+
+template<class E, class U>
+bool CompactFstData<E, U>::Write(ostream &strm,
+ const FstWriteOptions &opts) const {
+ if (states_) {
+ if (opts.align && !AlignOutput(strm, kFileAlign)) {
+ LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
+ return false;
+ }
+ strm.write(reinterpret_cast<char *>(states_),
+ (nstates_ + 1) * sizeof(Unsigned));
+ }
+ if (opts.align && !AlignOutput(strm, kFileAlign)) {
+ LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
+ return false;
+ }
+ strm.write(reinterpret_cast<char *>(compacts_),
+ ncompacts_ * sizeof(CompactElement));
+
+ strm.flush();
+ if (!strm) {
+ LOG(ERROR) << "CompactFst::Write: Write failed: " << opts.source;
+ return false;
+ }
+ return true;
+}
+
+template <class A, class C, class U> class CompactFst;
+template <class F, class G> void Cast(const F &, G *);
+
+// Implementation class for CompactFst, which contains CompactFstData
+// and Fst cache.
+template <class A, class C, class U>
+class CompactFstImpl : public CacheImpl<A> {
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::Properties;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+ using FstImpl<A>::WriteHeader;
+
+ using CacheImpl<A>::PushArc;
+ using CacheImpl<A>::HasArcs;
+ using CacheImpl<A>::HasFinal;
+ using CacheImpl<A>::HasStart;
+ using CacheImpl<A>::SetArcs;
+ using CacheImpl<A>::SetFinal;
+ using CacheImpl<A>::SetStart;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef C Compactor;
+ typedef typename C::Element CompactElement;
+ typedef U Unsigned;
+
+ CompactFstImpl()
+ : CacheImpl<A>(CompactFstOptions()),
+ compactor_(0),
+ own_compactor_(false),
+ data_(0) {
+ string type = "compact";
+ if (sizeof(U) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(8 * sizeof(U), &size);
+ type += size;
+ }
+ type += "_";
+ type += C::Type();
+ SetType(type);
+ SetProperties(kNullProperties | kStaticProperties);
+ }
+
+ CompactFstImpl(const Fst<Arc> &fst, const C &compactor,
+ const CompactFstOptions &opts)
+ : CacheImpl<A>(opts),
+ compactor_(new C(compactor)),
+ own_compactor_(true),
+ data_(0) {
+ Init(fst);
+ }
+
+ CompactFstImpl(const Fst<Arc> &fst, C *compactor,
+ const CompactFstOptions &opts)
+ : CacheImpl<A>(opts),
+ compactor_(compactor),
+ own_compactor_(false),
+ data_(0) {
+ Init(fst);
+ }
+
+ template <class Iterator>
+ CompactFstImpl(const Iterator &b, const Iterator &e, const C &compactor,
+ const CompactFstOptions &opts)
+ : CacheImpl<A>(opts),
+ compactor_(new C(compactor)),
+ own_compactor_(true),
+ data_(0) {
+ Init(b, e);
+ }
+
+ template <class Iterator>
+ CompactFstImpl(const Iterator &b, const Iterator &e, C *compactor,
+ const CompactFstOptions &opts)
+ : CacheImpl<A>(opts),
+ compactor_(compactor),
+ own_compactor_(false),
+ data_(0) {
+ Init(b, e);
+ }
+
+ CompactFstImpl(const CompactFstImpl<A, C, U> &impl)
+ : CacheImpl<A>(impl),
+ compactor_(new C(*impl.compactor_)),
+ own_compactor_(true),
+ data_(impl.data_) {
+ if (data_)
+ data_->IncrRefCount();
+ SetType(impl.Type());
+ SetProperties(impl.Properties());
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ ~CompactFstImpl(){
+ if (own_compactor_)
+ delete compactor_;
+ if (data_ && !data_->DecrRefCount())
+ delete data_;
+ }
+
+ StateId Start() {
+ if (!HasStart()) {
+ SetStart(data_->Start());
+ }
+ return CacheImpl<A>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ Arc arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId);
+ if ((compactor_->Size() != -1) ||
+ (data_->States(s) != data_->States(s + 1)))
+ arc = ComputeArc(s,
+ compactor_->Size() == -1
+ ? data_->States(s)
+ : s * compactor_->Size());
+ SetFinal(s, arc.ilabel == kNoLabel ? arc.weight : Weight::Zero());
+ }
+ return CacheImpl<A>::Final(s);
+ }
+
+ StateId NumStates() const {
+ if (Properties(kError)) return 0;
+ return data_->NumStates();
+ }
+
+ size_t NumArcs(StateId s) {
+ if (HasArcs(s))
+ return CacheImpl<A>::NumArcs(s);
+ Unsigned i, num_arcs;
+ if (compactor_->Size() == -1) {
+ i = data_->States(s);
+ num_arcs = data_->States(s + 1) - i;
+ } else {
+ i = s * compactor_->Size();
+ num_arcs = compactor_->Size();
+ }
+ if (num_arcs > 0) {
+ const A &arc = ComputeArc(s, i, kArcILabelValue);
+ if (arc.ilabel == kNoStateId) {
+ --num_arcs;
+ }
+ }
+ return num_arcs;
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s) && !Properties(kILabelSorted))
+ Expand(s);
+ if (HasArcs(s))
+ return CacheImpl<A>::NumInputEpsilons(s);
+ return CountEpsilons(s, false);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s) && !Properties(kOLabelSorted))
+ Expand(s);
+ if (HasArcs(s))
+ return CacheImpl<A>::NumOutputEpsilons(s);
+ return CountEpsilons(s, true);
+ }
+
+ size_t CountEpsilons(StateId s, bool output_epsilons) {
+ size_t begin = compactor_->Size() == -1 ?
+ data_->States(s) : s * compactor_->Size();
+ size_t end = compactor_->Size() == -1 ?
+ data_->States(s + 1) : (s + 1) * compactor_->Size();
+ size_t num_eps = 0;
+ for (size_t i = begin; i < end; ++i) {
+ const A &arc = ComputeArc(
+ s, i, output_epsilons ? kArcOLabelValue : kArcILabelValue);
+ const typename A::Label &label =
+ (output_epsilons ? arc.olabel : arc.ilabel);
+ if (label == kNoLabel)
+ continue;
+ else if (label > 0)
+ break;
+ ++num_eps;
+ }
+ return num_eps;
+ }
+
+ static CompactFstImpl<A, C, U> *Read(istream &strm,
+ const FstReadOptions &opts) {
+ CompactFstImpl<A, C, U> *impl = new CompactFstImpl<A, C, U>();
+ FstHeader hdr;
+ if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
+ delete impl;
+ return 0;
+ }
+
+ // Ensures compatibility
+ if (hdr.Version() == kAlignedFileVersion)
+ hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED);
+
+ impl->compactor_ = C::Read(strm);
+ if (!impl->compactor_) {
+ delete impl;
+ return 0;
+ }
+ impl->own_compactor_ = true;
+ impl->data_ = CompactFstData<CompactElement, U>::Read(strm, opts, hdr,
+ *impl->compactor_);
+ if (!impl->data_) {
+ delete impl;
+ return 0;
+ }
+ return impl;
+ }
+
+ bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ FstHeader hdr;
+ hdr.SetStart(data_->Start());
+ hdr.SetNumStates(data_->NumStates());
+ hdr.SetNumArcs(data_->NumArcs());
+
+ // Ensures compatibility
+ int file_version = opts.align ? kAlignedFileVersion : kFileVersion;
+ WriteHeader(strm, opts, file_version, &hdr);
+
+ compactor_->Write(strm);
+ return data_->Write(strm, opts);
+ }
+
+ // Provide information needed for generic state iterator
+ void InitStateIterator(StateIteratorData<A> *data) const {
+ data->base = 0;
+ data->nstates = data_->NumStates();
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
+ if (!HasArcs(s))
+ Expand(s);
+ CacheImpl<A>::InitArcIterator(s, data);
+ }
+
+ Arc ComputeArc(StateId s, Unsigned i, uint32 f = kArcValueFlags) const {
+ return compactor_->Expand(s, data_->Compacts(i), f);
+ }
+
+ void Expand(StateId s) {
+ size_t begin = compactor_->Size() == -1 ?
+ data_->States(s) : s * compactor_->Size();
+ size_t end = compactor_->Size() == -1 ?
+ data_->States(s + 1) : (s + 1) * compactor_->Size();
+ for (size_t i = begin; i < end; ++i) {
+ const Arc &arc = ComputeArc(s, i);
+ if (arc.ilabel == kNoLabel) continue;
+ PushArc(s, arc);
+ }
+ SetArcs(s);
+ }
+
+ template <class Iterator>
+ void SetCompactElements(const Iterator &b, const Iterator &e) {
+ if (data_ && !data_->DecrRefCount())
+ delete data_;
+ data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_);
+ }
+
+ C *GetCompactor() const { return compactor_; }
+ CompactFstData<CompactElement, U> *Data() const { return data_; }
+
+ protected:
+ template <class B, class D>
+ explicit CompactFstImpl(const CompactFstImpl<B, D, U> &impl)
+ : CacheImpl<A>(CacheOptions(impl.GetCacheGc(), impl.GetCacheLimit())),
+ compactor_(new C(*impl.GetCompactor())),
+ own_compactor_(true),
+ data_(impl.Data()) {
+ if (data_)
+ data_->IncrRefCount();
+ SetType(impl.Type());
+ SetProperties(impl.Properties());
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ private:
+ void Init(const Fst<Arc> &fst) {
+ string type = "compact";
+ if (sizeof(U) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(8 * sizeof(U), &size);
+ type += size;
+ }
+ type += "_";
+ type += compactor_->Type();
+ SetType(type);
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ data_ = new CompactFstData<CompactElement, U>(fst, *compactor_);
+ if (data_->Error())
+ SetProperties(kError, kError);
+ uint64 copy_properties = fst.Properties(kCopyProperties, true);
+ if ((copy_properties & kError) || !compactor_->Compatible(fst)) {
+ FSTERROR() << "CompactFstImpl: input fst incompatible with compactor";
+ SetProperties(kError, kError);
+ return;
+ }
+ SetProperties(copy_properties | kStaticProperties);
+ }
+
+ template <class Iterator>
+ void Init(const Iterator &b, const Iterator &e) {
+ string type = "compact";
+ if (sizeof(U) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(8 * sizeof(U), &size);
+ type += size;
+ }
+ type += "_";
+ type += compactor_->Type();
+ SetType(type);
+ SetProperties(kStaticProperties | compactor_->Properties());
+ data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_);
+ if (data_->Error())
+ SetProperties(kError, kError);
+ }
+
+ // Properties always true of this Fst class
+ static const uint64 kStaticProperties = kExpanded;
+ // Current unaligned file format version
+ static const int kFileVersion = 2;
+ // Current aligned file format version
+ static const int kAlignedFileVersion = 1;
+ // Minimum file format version supported
+ static const int kMinFileVersion = 1;
+
+ C *compactor_;
+ bool own_compactor_;
+ CompactFstData<CompactElement, U> *data_;
+};
+
+template <class A, class C, class U>
+const uint64 CompactFstImpl<A, C, U>::kStaticProperties;
+template <class A, class C, class U>
+const int CompactFstImpl<A, C, U>::kFileVersion;
+template <class A, class C, class U>
+const int CompactFstImpl<A, C, U>::kAlignedFileVersion;
+template <class A, class C, class U>
+const int CompactFstImpl<A, C, U>::kMinFileVersion;
+
+
+// CompactFst. This class attaches interface to implementation and
+// handles reference counting, delegating most methods to
+// ImplToExpandedFst. The unsigned type U is used to represent indices
+// into the compact arc array (uint32 by default, declared in
+// fst-decl.h).
+template <class A, class C, class U>
+class CompactFst : public ImplToExpandedFst< CompactFstImpl<A, C, U> > {
+ public:
+ friend class StateIterator< CompactFst<A, C, U> >;
+ friend class ArcIterator< CompactFst<A, C, U> >;
+ template <class F, class G> void friend Cast(const F &, G *);
+
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef CompactFstImpl<A, C, U> Impl;
+ typedef CacheState<A> State;
+ typedef U Unsigned;
+
+ CompactFst() : ImplToExpandedFst<Impl>(new Impl()) {}
+
+ explicit CompactFst(const Fst<A> &fst, const C &compactor = C(),
+ const CompactFstOptions &opts = CompactFstOptions())
+ : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {}
+
+ CompactFst(const Fst<A> &fst, C *compactor,
+ const CompactFstOptions &opts = CompactFstOptions())
+ : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {}
+
+ // The following 2 constructors take as input two iterators delimiting
+ // a set of (already) compacted transitions, starting with the
+ // transitions out of the initial state. The format of the input
+ // differs for fixed out-degree and variable out-degree compactors.
+ //
+ // - For fixed out-degree compactors, the final weight (encoded as a
+ // compacted transition) needs to be given only for final
+ // states. All strings (compactor of size 1) will be assume to be
+ // terminated by a final state even when the final state is not
+ // implicitely given.
+ //
+ // - For variable out-degree compactors, the final weight (encoded
+ // as a compacted transition) needs to be given for all states and
+ // must appeared first in the list (for state s, final weight of s,
+ // followed by outgoing transitons in s).
+ //
+ // These 2 constructors allows the direct construction of a CompactFst
+ // without first creating a more memory hungry 'regular' FST. This
+ // is useful when memory usage is severely constrained.
+ template <class Iterator>
+ explicit CompactFst(const Iterator &begin, const Iterator &end,
+ const C &compactor = C(),
+ const CompactFstOptions &opts = CompactFstOptions())
+ : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {}
+
+ template <class Iterator>
+ CompactFst(const Iterator &begin, const Iterator &end,
+ C *compactor, const CompactFstOptions &opts = CompactFstOptions())
+ : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {}
+
+ // See Fst<>::Copy() for doc.
+ CompactFst(const CompactFst<A, C, U> &fst, bool safe = false)
+ : ImplToExpandedFst<Impl>(fst, safe) {}
+
+ // Get a copy of this CompactFst. See Fst<>::Copy() for further doc.
+ virtual CompactFst<A, C, U> *Copy(bool safe = false) const {
+ return new CompactFst<A, C, U>(*this, safe);
+ }
+
+ // Read a CompactFst from an input stream; return NULL on error
+ static CompactFst<A, C, U> *Read(istream &strm, const FstReadOptions &opts) {
+ Impl* impl = Impl::Read(strm, opts);
+ return impl ? new CompactFst<A, C, U>(impl) : 0;
+ }
+
+ // Read a CompactFst from a file; return NULL on error
+ // Empty filename reads from standard input
+ static CompactFst<A, C, U> *Read(const string &filename) {
+ Impl* impl = ImplToExpandedFst<Impl>::Read(filename);
+ return impl ? new CompactFst<A, C, U>(impl) : 0;
+ }
+
+ virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ return GetImpl()->Write(strm, opts);
+ }
+
+ virtual bool Write(const string &filename) const {
+ return Fst<A>::WriteFile(filename);
+ }
+
+ virtual void InitStateIterator(StateIteratorData<A> *data) const {
+ GetImpl()->InitStateIterator(data);
+ }
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ virtual MatcherBase<A> *InitMatcher(MatchType match_type) const {
+ return new SortedMatcher<CompactFst<A, C, U> >(*this, match_type);
+ }
+
+ template <class Iterator>
+ void SetCompactElements(const Iterator &b, const Iterator &e) {
+ GetImpl()->SetCompactElements(b, e);
+ }
+
+ private:
+ CompactFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {}
+
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); }
+
+ void SetImpl(Impl *impl, bool own_impl = false) {
+ ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl);
+ }
+
+ void operator=(const CompactFst<A, C, U> &fst); // disallow
+};
+
+
+// Specialization for CompactFst; see generic version in fst.h
+// for sample usage (but use the CompactFst type!). This version
+// should inline.
+template <class A, class C, class U>
+class StateIterator< CompactFst<A, C, U> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ explicit StateIterator(const CompactFst<A, C, U> &fst)
+ : nstates_(fst.GetImpl()->NumStates()), s_(0) {}
+
+ bool Done() const { return s_ >= nstates_; }
+
+ StateId Value() const { return s_; }
+
+ void Next() { ++s_; }
+
+ void Reset() { s_ = 0; }
+
+ private:
+ StateId nstates_;
+ StateId s_;
+
+ DISALLOW_COPY_AND_ASSIGN(StateIterator);
+};
+
+// Specialization for CompactFst.
+// Never caches, always iterates over the underlying compact elements.
+template <class A, class C, class U>
+class ArcIterator< CompactFst<A, C, U> > {
+ public:
+ typedef typename A::StateId StateId;
+ typedef typename C::Element CompactElement;
+
+ ArcIterator(const CompactFst<A, C, U> &fst, StateId s)
+ : compactor_(fst.GetImpl()->GetCompactor()), state_(s), compacts_(0),
+ pos_(0), flags_(kArcValueFlags) {
+
+ const CompactFstData<CompactElement, U> *data = fst.GetImpl()->Data();
+ size_t offset;
+ if (compactor_->Size() == -1) { // Variable out-degree compactor
+ offset = data->States(s);
+ num_arcs_ = data->States(s + 1) - offset;
+ } else { // Fixed out-degree compactor
+ offset = s * compactor_->Size();
+ num_arcs_ = compactor_->Size();
+ }
+ if (num_arcs_ > 0) {
+ compacts_ = &(data->Compacts(offset));
+ arc_ = compactor_->Expand(s, *compacts_, kArcILabelValue);
+ if (arc_.ilabel == kNoStateId) {
+ ++compacts_;
+ --num_arcs_;
+ }
+ }
+ }
+
+ ~ArcIterator() {}
+
+ bool Done() const { return pos_ >= num_arcs_; }
+
+ const A& Value() const {
+ arc_ = compactor_->Expand(state_, compacts_[pos_], flags_);
+ return arc_;
+ }
+
+ void Next() { ++pos_; }
+
+ size_t Position() const { return pos_; }
+
+ void Reset() { pos_ = 0; }
+
+ void Seek(size_t pos) { pos_ = pos; }
+
+ uint32 Flags() const { return flags_; }
+
+ void SetFlags(uint32 f, uint32 m) {
+ flags_ &= ~m;
+ flags_ |= (f & kArcValueFlags);
+ }
+
+ private:
+ C *compactor_;
+ StateId state_;
+ const CompactElement *compacts_;
+ size_t pos_;
+ size_t num_arcs_;
+ mutable A arc_;
+ uint32 flags_;
+
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+// // Specialization for CompactFst.
+// // This is an optionally caching arc iterator.
+// // TODO(allauzen): implements the kArcValueFlags, the current
+// // implementation only implements the kArcNoCache flag.
+// template <class A, class C, class U>
+// class ArcIterator< CompactFst<A, C, U> > {
+// public:
+// typedef typename A::StateId StateId;
+
+// ArcIterator(const CompactFst<A, C, U> &fst, StateId s)
+// : fst_(fst), state_(s), pos_(0), num_arcs_(0), offset_(0),
+// flags_(kArcValueFlags) {
+// cache_data_.ref_count = 0;
+
+// if (fst_.GetImpl()->HasArcs(state_)) {
+// fst_.GetImpl()->InitArcIterator(s, &cache_data_);
+// num_arcs_ = cache_data_.narcs;
+// return;
+// }
+
+// const C *compactor = fst_.GetImpl()->GetCompactor();
+// const CompactFstData<A, C, U> *data = fst_.GetImpl()->Data();
+// if (compactor->Size() == -1) { // Variable out-degree compactor
+// offset_ = data->States(s);
+// num_arcs_ = data->States(s + 1) - offset_;
+// } else { // Fixed out-degree compactor
+// offset_ = s * compactor->Size();
+// num_arcs_ = compactor->Size();
+// }
+// if (num_arcs_ > 0) {
+// const A &arc = fst_.GetImpl()->ComputeArc(s, offset_);
+// if (arc.ilabel == kNoStateId) {
+// ++offset_;
+// --num_arcs_;
+// }
+// }
+// }
+
+
+// ~ArcIterator() {
+// if (cache_data_.ref_count)
+// --(*cache_data_.ref_count);
+// }
+
+// bool Done() const { return pos_ >= num_arcs_; }
+
+// const A& Value() const {
+// if (cache_data_.ref_count == 0) {
+// if (flags_ & kArcNoCache) {
+// arc_ = fst_.GetImpl()->ComputeArc(state_, pos_ + offset_);
+// return arc_;
+// } else {
+// fst_.GetImpl()->InitArcIterator(state_, &cache_data_);
+// }
+// }
+// return cache_data_.arcs[pos_];
+// }
+
+// void Next() { ++pos_; }
+
+// size_t Position() const { return pos_; }
+
+// void Reset() { pos_ = 0; }
+
+// void Seek(size_t pos) { pos_ = pos; }
+
+// uint32 Flags() const { return flags_; }
+
+// void SetFlags(uint32 f, uint32 m) {
+// flags_ &= ~m;
+// flags_ |= f;
+
+// if (!(flags_ & kArcNoCache) && cache_data_.ref_count == 0)
+// fst_.GetImpl()->InitArcIterator(state_, &cache_data_);
+// }
+
+// private:
+// mutable const CompactFst<A, C, U> &fst_;
+// StateId state_;
+// size_t pos_;
+// size_t num_arcs_;
+// size_t offset_;
+// uint32 flags_;
+// mutable A arc_;
+// mutable ArcIteratorData<A> cache_data_;
+
+// DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+// };
+
+
+//
+// Utility Compactors
+//
+
+// Compactor for unweighted string FSTs
+template <class A>
+class StringCompactor {
+ public:
+ typedef A Arc;
+ typedef typename A::Label Element;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ Element Compact(StateId s, const A &arc) const { return arc.ilabel; }
+
+ Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
+ return Arc(p, p, Weight::One(), p != kNoLabel ? s + 1 : kNoStateId);
+ }
+
+ ssize_t Size() const { return 1; }
+
+ uint64 Properties() const {
+ return kString | kAcceptor | kUnweighted;
+ }
+
+ bool Compatible(const Fst<A> &fst) const {
+ uint64 props = Properties();
+ return fst.Properties(props, true) == props;
+ }
+
+ static const string &Type() {
+ static const string type = "string";
+ return type;
+ }
+
+ bool Write(ostream &strm) const { return true; }
+
+ static StringCompactor *Read(istream &strm) {
+ return new StringCompactor;
+ }
+};
+
+
+// Compactor for weighted string FSTs
+template <class A>
+class WeightedStringCompactor {
+ public:
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+ typedef pair<Label, Weight> Element;
+
+ Element Compact(StateId s, const A &arc) const {
+ return make_pair(arc.ilabel, arc.weight);
+ }
+
+ Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
+ return Arc(p.first, p.first, p.second,
+ p.first != kNoLabel ? s + 1 : kNoStateId);
+ }
+
+ ssize_t Size() const { return 1;}
+
+ uint64 Properties() const {
+ return kString | kAcceptor;
+ }
+
+ bool Compatible(const Fst<A> &fst) const {
+ uint64 props = Properties();
+ return fst.Properties(props, true) == props;
+ }
+
+ static const string &Type() {
+ static const string type = "weighted_string";
+ return type;
+ }
+
+ bool Write(ostream &strm) const { return true; }
+
+ static WeightedStringCompactor *Read(istream &strm) {
+ return new WeightedStringCompactor;
+ }
+};
+
+
+// Compactor for unweighted acceptor FSTs
+template <class A>
+class UnweightedAcceptorCompactor {
+ public:
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+ typedef pair<Label, StateId> Element;
+
+ Element Compact(StateId s, const A &arc) const {
+ return make_pair(arc.ilabel, arc.nextstate);
+ }
+
+ Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
+ return Arc(p.first, p.first, Weight::One(), p.second);
+ }
+
+ ssize_t Size() const { return -1;}
+
+ uint64 Properties() const {
+ return kAcceptor | kUnweighted;
+ }
+
+ bool Compatible(const Fst<A> &fst) const {
+ uint64 props = Properties();
+ return fst.Properties(props, true) == props;
+ }
+
+ static const string &Type() {
+ static const string type = "unweighted_acceptor";
+ return type;
+ }
+
+ bool Write(ostream &strm) const { return true; }
+
+ static UnweightedAcceptorCompactor *Read(istream &istrm) {
+ return new UnweightedAcceptorCompactor;
+ }
+};
+
+
+// Compactor for weighted acceptor FSTs
+template <class A>
+class AcceptorCompactor {
+ public:
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+ typedef pair< pair<Label, Weight>, StateId > Element;
+
+ Element Compact(StateId s, const A &arc) const {
+ return make_pair(make_pair(arc.ilabel, arc.weight), arc.nextstate);
+ }
+
+ Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
+ return Arc(p.first.first, p.first.first, p.first.second, p.second);
+ }
+
+ ssize_t Size() const { return -1;}
+
+ uint64 Properties() const {
+ return kAcceptor;
+ }
+
+ bool Compatible(const Fst<A> &fst) const {
+ uint64 props = Properties();
+ return fst.Properties(props, true) == props;
+ }
+
+ static const string &Type() {
+ static const string type = "acceptor";
+ return type;
+ }
+
+ bool Write(ostream &strm) const { return true; }
+
+ static AcceptorCompactor *Read(istream &strm) {
+ return new AcceptorCompactor;
+ }
+};
+
+
+// Compactor for unweighted FSTs
+template <class A>
+class UnweightedCompactor {
+ public:
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+ typedef pair< pair<Label, Label>, StateId > Element;
+
+ Element Compact(StateId s, const A &arc) const {
+ return make_pair(make_pair(arc.ilabel, arc.olabel), arc.nextstate);
+ }
+
+ Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
+ return Arc(p.first.first, p.first.second, Weight::One(), p.second);
+ }
+
+ ssize_t Size() const { return -1; }
+
+ uint64 Properties() const {
+ return kUnweighted;
+ }
+
+ bool Compatible(const Fst<A> &fst) const {
+ uint64 props = Properties();
+ return fst.Properties(props, true) == props;
+ }
+
+ static const string &Type() {
+ static const string type = "unweighted";
+ return type;
+ }
+
+ bool Write(ostream &strm) const { return true; }
+
+ static UnweightedCompactor *Read(istream &strm) {
+ return new UnweightedCompactor;
+ }
+};
+
+
+// Uselful aliases when using StdArc
+typedef CompactFst< StdArc, StringCompactor<StdArc> >
+StdCompactStringFst;
+typedef CompactFst< StdArc, WeightedStringCompactor<StdArc> >
+StdCompactWeightedStringFst;
+typedef CompactFst<StdArc, AcceptorCompactor<StdArc> >
+StdCompactAcceptorFst;
+typedef CompactFst<StdArc, UnweightedCompactor<StdArc> >
+StdCompactUnweightedFst;
+typedef CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> >
+StdCompactUnweightedAcceptorFst;
+
+} // namespace fst
+
+#endif // FST_LIB_COMPACT_FST_H__
diff --git a/src/include/fst/compat.h b/src/include/fst/compat.h
new file mode 100644
index 0000000..034b57e
--- /dev/null
+++ b/src/include/fst/compat.h
@@ -0,0 +1,152 @@
+// compat.h
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Google compatibility declarations and inline definitions.
+
+#ifndef FST_LIB_COMPAT_H__
+#define FST_LIB_COMPAT_H__
+
+#include <dlfcn.h>
+
+#include <climits>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <string>
+#include <vector>
+
+// Makes copy constructor and operator= private
+#define DISALLOW_COPY_AND_ASSIGN(type) \
+ type(const type&); \
+ void operator=(const type&)
+
+#include <fst/config.h>
+#include <fst/types.h>
+#include <fst/lock.h>
+#include <fst/flags.h>
+#include <fst/log.h>
+
+#ifdef HAVE_ICU
+#include <fst/icu.h>
+#endif
+
+using std::cin;
+using std::cout;
+using std::cerr;
+using std::endl;
+using std::string;
+
+void FailedNewHandler();
+
+namespace fst {
+
+using namespace std;
+
+void SplitToVector(char *line, const char *delim,
+ std::vector<char *> *vec, bool omit_empty_strings);
+
+// Downcasting
+template<typename To, typename From>
+inline To down_cast(From* f) {
+ return static_cast<To>(f);
+}
+
+// Bitcasting
+template <class Dest, class Source>
+inline Dest bit_cast(const Source& source) {
+ // Compile time assertion: sizeof(Dest) == sizeof(Source)
+ // A compile error here means your Dest and Source have different sizes.
+ typedef char VerifySizesAreEqual [sizeof(Dest) == sizeof(Source) ? 1 :
+ -1];
+ Dest dest;
+ memcpy(&dest, &source, sizeof(dest));
+ return dest;
+}
+
+// Check sums
+class CheckSummer {
+ public:
+ CheckSummer() : count_(0) {
+ check_sum_.resize(kCheckSumLength, '\0');
+ }
+
+ void Reset() {
+ count_ = 0;
+ for (int i = 0; i < kCheckSumLength; ++i)
+ check_sum_[0] = '\0';
+ }
+
+ void Update(void const *data, int size) {
+ const char *p = reinterpret_cast<const char *>(data);
+ for (int i = 0; i < size; ++i)
+ check_sum_[(count_++) % kCheckSumLength] ^= p[i];
+ }
+
+ void Update(string const &data) {
+ for (int i = 0; i < data.size(); ++i)
+ check_sum_[(count_++) % kCheckSumLength] ^= data[i];
+ }
+
+ string Digest() {
+ return check_sum_;
+ }
+
+ private:
+ static const int kCheckSumLength = 32;
+ int count_;
+ string check_sum_;
+
+ DISALLOW_COPY_AND_ASSIGN(CheckSummer);
+};
+
+// Define the UTF8 string conversion function to throw an error
+// when the ICU Library is missing or disabled.
+#ifndef HAVE_ICU
+
+template <class Label>
+bool UTF8StringToLabels(const string&, std::vector<Label>*) {
+ LOG(ERROR) << "UTF8StringToLabels: ICU Library required for UTF8 handling";
+ return false;
+}
+
+template <class Label>
+bool LabelsToUTF8String(const std::vector<Label>&, string*) {
+ LOG(ERROR) << "LabelsToUTF8String: ICU Library required for UTF8 handling";
+ return false;
+}
+
+#endif // HAVE_ICU
+
+} // namespace fst
+
+
+// Define missing hash functions if needed
+#ifndef HAVE_STD__TR1__HASH_LONG_LONG_UNSIGNED_
+namespace std {
+namespace tr1 {
+
+template <class T> class hash;
+
+template<> struct hash<uint64> {
+ size_t operator()(uint64 x) const { return x; }
+};
+
+}
+}
+#endif // HAVE_STD__TR1__HASH_LONG_LONG_UNSIGNED_
+
+#endif // FST_LIB_COMPAT_H__
diff --git a/src/include/fst/complement.h b/src/include/fst/complement.h
new file mode 100644
index 0000000..dacf396
--- /dev/null
+++ b/src/include/fst/complement.h
@@ -0,0 +1,338 @@
+// complement.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to complement an Fst.
+
+#ifndef FST_LIB_COMPLEMENT_H__
+#define FST_LIB_COMPLEMENT_H__
+
+#include <algorithm>
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/fst.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+template <class A> class ComplementFst;
+
+// Implementation of delayed ComplementFst. The algorithm used
+// completes the (deterministic) FSA and then exchanges final and
+// non-final states. Completion, i.e. ensuring that all labels can be
+// read from every state, is accomplished by using RHO labels, which
+// match all labels that are otherwise not found leaving a state. The
+// first state in the output is reserved to be a new state that is the
+// destination of all RHO labels. Each remaining output state s
+// corresponds to input state s - 1. The first arc in the output at
+// these states is the rho label, the remaining arcs correspond to the
+// input arcs.
+template <class A>
+class ComplementFstImpl : public FstImpl<A> {
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+
+ friend class StateIterator< ComplementFst<A> >;
+ friend class ArcIterator< ComplementFst<A> >;
+
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ explicit ComplementFstImpl(const Fst<A> &fst) : fst_(fst.Copy()) {
+ SetType("complement");
+ uint64 props = fst.Properties(kILabelSorted, false);
+ SetProperties(ComplementProperties(props), kCopyProperties);
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ }
+
+ ComplementFstImpl(const ComplementFstImpl<A> &impl)
+ : fst_(impl.fst_->Copy()) {
+ SetType("complement");
+ SetProperties(impl.Properties(), kCopyProperties);
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ ~ComplementFstImpl() { delete fst_; }
+
+ StateId Start() const {
+ if (Properties(kError))
+ return kNoStateId;
+
+ StateId start = fst_->Start();
+ if (start != kNoStateId)
+ return start + 1;
+ else
+ return 0;
+ }
+
+ // Exchange final and non-final states; make rho destination state final.
+ Weight Final(StateId s) const {
+ if (s == 0 || fst_->Final(s - 1) == Weight::Zero())
+ return Weight::One();
+ else
+ return Weight::Zero();
+ }
+
+ size_t NumArcs(StateId s) const {
+ if (s == 0)
+ return 1;
+ else
+ return fst_->NumArcs(s - 1) + 1;
+ }
+
+ size_t NumInputEpsilons(StateId s) const {
+ return s == 0 ? 0 : fst_->NumInputEpsilons(s - 1);
+ }
+
+ size_t NumOutputEpsilons(StateId s) const {
+ return s == 0 ? 0 : fst_->NumOutputEpsilons(s - 1);
+ }
+
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) && fst_->Properties(kError, false))
+ SetProperties(kError, kError);
+ return FstImpl<Arc>::Properties(mask);
+ }
+
+
+ private:
+ const Fst<A> *fst_;
+
+ void operator=(const ComplementFstImpl<A> &fst); // Disallow
+};
+
+
+// Complements an automaton. This is a library-internal operation that
+// introduces a (negative) 'rho' label; use Difference/DifferenceFst in
+// user code, which will not see this label. This version is a delayed Fst.
+//
+// This class attaches interface to implementation and handles
+// reference counting, delegating most methods to ImplToFst.
+template <class A>
+class ComplementFst : public ImplToFst< ComplementFstImpl<A> > {
+ public:
+ friend class StateIterator< ComplementFst<A> >;
+ friend class ArcIterator< ComplementFst<A> >;
+
+ using ImplToFst< ComplementFstImpl<A> >::GetImpl;
+
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef ComplementFstImpl<A> Impl;
+
+ explicit ComplementFst(const Fst<A> &fst)
+ : ImplToFst<Impl>(new Impl(fst)) {
+ uint64 props = kUnweighted | kNoEpsilons | kIDeterministic | kAcceptor;
+ if (fst.Properties(props, true) != props) {
+ FSTERROR() << "ComplementFst: argument not an unweighted "
+ << "epsilon-free deterministic acceptor";
+ GetImpl()->SetProperties(kError, kError);
+ }
+ }
+
+ // See Fst<>::Copy() for doc.
+ ComplementFst(const ComplementFst<A> &fst, bool safe = false)
+ : ImplToFst<Impl>(fst, safe) {}
+
+ // Get a copy of this ComplementFst. See Fst<>::Copy() for further doc.
+ virtual ComplementFst<A> *Copy(bool safe = false) const {
+ return new ComplementFst<A>(*this, safe);
+ }
+
+ virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
+
+ virtual inline void InitArcIterator(StateId s,
+ ArcIteratorData<A> *data) const;
+
+ // Label that represents the rho transition.
+ // We use a negative value, which is thus private to the library and
+ // which will preserve FST label sort order.
+ static const Label kRhoLabel = -2;
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const ComplementFst<A> &fst); // disallow
+};
+
+template <class A> const typename A::Label ComplementFst<A>::kRhoLabel;
+
+
+// Specialization for ComplementFst.
+template <class A>
+class StateIterator< ComplementFst<A> > : public StateIteratorBase<A> {
+ public:
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+
+ explicit StateIterator(const ComplementFst<A> &fst)
+ : siter_(*fst.GetImpl()->fst_), s_(0) {
+ }
+
+ bool Done() const { return s_ > 0 && siter_.Done(); }
+
+ StateId Value() const { return s_; }
+
+ void Next() {
+ if (s_ != 0)
+ siter_.Next();
+ ++s_;
+ }
+
+ void Reset() {
+ siter_.Reset();
+ s_ = 0;
+ }
+
+ private:
+ // This allows base class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual bool Done_() const { return Done(); }
+ virtual StateId Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+ virtual void Reset_() { Reset(); }
+
+ StateIterator< Fst<A> > siter_;
+ StateId s_;
+
+ DISALLOW_COPY_AND_ASSIGN(StateIterator);
+};
+
+
+// Specialization for ComplementFst.
+template <class A>
+class ArcIterator< ComplementFst<A> > : public ArcIteratorBase<A> {
+ public:
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+
+ ArcIterator(const ComplementFst<A> &fst, StateId s)
+ : aiter_(0), s_(s), pos_(0) {
+ if (s_ != 0)
+ aiter_ = new ArcIterator< Fst<A> >(*fst.GetImpl()->fst_, s - 1);
+ }
+
+ virtual ~ArcIterator() { delete aiter_; }
+
+ bool Done() const {
+ if (s_ != 0)
+ return pos_ > 0 && aiter_->Done();
+ else
+ return pos_ > 0;
+ }
+
+ // Adds the rho label to the rho destination state.
+ const A& Value() const {
+ if (pos_ == 0) {
+ arc_.ilabel = arc_.olabel = ComplementFst<A>::kRhoLabel;
+ arc_.weight = Weight::One();
+ arc_.nextstate = 0;
+ } else {
+ arc_ = aiter_->Value();
+ ++arc_.nextstate;
+ }
+ return arc_;
+ }
+
+ void Next() {
+ if (s_ != 0 && pos_ > 0)
+ aiter_->Next();
+ ++pos_;
+ }
+
+ size_t Position() const {
+ return pos_;
+ }
+
+ void Reset() {
+ if (s_ != 0)
+ aiter_->Reset();
+ pos_ = 0;
+ }
+
+ void Seek(size_t a) {
+ if (s_ != 0) {
+ if (a == 0) {
+ aiter_->Reset();
+ } else {
+ aiter_->Seek(a - 1);
+ }
+ }
+ pos_ = a;
+ }
+
+ uint32 Flags() const {
+ return kArcValueFlags;
+ }
+
+ void SetFlags(uint32 f, uint32 m) {}
+
+ private:
+ // This allows base class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual bool Done_() const { return Done(); }
+ virtual const A& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+ virtual size_t Position_() const { return Position(); }
+ virtual void Reset_() { Reset(); }
+ virtual void Seek_(size_t a) { Seek(a); }
+ uint32 Flags_() const { return Flags(); }
+ void SetFlags_(uint32 f, uint32 m) { SetFlags(f, m); }
+
+ ArcIterator< Fst<A> > *aiter_;
+ StateId s_;
+ size_t pos_;
+ mutable A arc_;
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+
+template <class A> inline void
+ComplementFst<A>::InitStateIterator(StateIteratorData<A> *data) const {
+ data->base = new StateIterator< ComplementFst<A> >(*this);
+}
+
+template <class A> inline void
+ComplementFst<A>::InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ data->base = new ArcIterator< ComplementFst<A> >(*this, s);
+}
+
+
+// Useful alias when using StdArc.
+typedef ComplementFst<StdArc> StdComplementFst;
+
+} // namespace fst
+
+#endif // FST_LIB_COMPLEMENT_H__
diff --git a/src/include/fst/compose-filter.h b/src/include/fst/compose-filter.h
new file mode 100644
index 0000000..6bf7736
--- /dev/null
+++ b/src/include/fst/compose-filter.h
@@ -0,0 +1,542 @@
+// compose-filter.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Classes for filtering the composition matches, e.g. for correct epsilon
+// handling.
+
+#ifndef FST_LIB_COMPOSE_FILTER_H__
+#define FST_LIB_COMPOSE_FILTER_H__
+
+#include <fst/fst.h>
+#include <fst/fst-decl.h> // For optional argument declarations
+#include <fst/matcher.h>
+
+
+namespace fst {
+
+
+// COMPOSITION FILTER STATE - this represents the state of
+// the composition filter. It has the form:
+//
+// class FilterState {
+// public:
+// // Required constructors
+// FilterState();
+// FilterState(const FilterState &f);
+// // An invalid filter state.
+// static const FilterState NoState();
+// // Maps state to integer for hashing.
+// size_t Hash() const;
+// // Equality of filter states.
+// bool operator==(const FilterState &f) const;
+// // Inequality of filter states.
+// bool operator!=(const FilterState &f) const;
+// // Assignment to filter states.
+// FilterState& operator=(const FilterState& f);
+// };
+
+
+// Filter state that is a signed integral type.
+template <typename T>
+class IntegerFilterState {
+ public:
+ IntegerFilterState() : state_(kNoStateId) {}
+ explicit IntegerFilterState(T s) : state_(s) {}
+
+ static const IntegerFilterState NoState() { return IntegerFilterState(); }
+
+ size_t Hash() const { return static_cast<size_t>(state_); }
+
+ bool operator==(const IntegerFilterState &f) const {
+ return state_ == f.state_;
+ }
+
+ bool operator!=(const IntegerFilterState &f) const {
+ return state_ != f.state_;
+ }
+
+ T GetState() const { return state_; }
+
+ void SetState(T state) { state_ = state; }
+
+private:
+ T state_;
+};
+
+typedef IntegerFilterState<signed char> CharFilterState;
+typedef IntegerFilterState<short> ShortFilterState;
+typedef IntegerFilterState<int> IntFilterState;
+
+
+// Filter state that is a weight (class).
+template <class W>
+class WeightFilterState {
+ public:
+ WeightFilterState() : weight_(W::Zero()) {}
+ explicit WeightFilterState(W w) : weight_(w) {}
+
+ static const WeightFilterState NoState() { return WeightFilterState(); }
+
+ size_t Hash() const { return weight_.Hash(); }
+
+ bool operator==(const WeightFilterState &f) const {
+ return weight_ == f.weight_;
+ }
+
+ bool operator!=(const WeightFilterState &f) const {
+ return weight_ != f.weight_;
+ }
+
+ W GetWeight() const { return weight_; }
+
+ void SetWeight(W w) { weight_ = w; }
+
+private:
+ W weight_;
+};
+
+
+// Filter state that is the combination of two filter states.
+template <class F1, class F2>
+class PairFilterState {
+ public:
+ PairFilterState() : f1_(F1::NoState()), f2_(F2::NoState()) {}
+
+ PairFilterState(const F1 &f1, const F2 &f2) : f1_(f1), f2_(f2) {}
+
+ static const PairFilterState NoState() { return PairFilterState(); }
+
+ size_t Hash() const {
+ size_t h1 = f1_.Hash();
+ size_t h2 = f2_.Hash();
+ const int lshift = 5;
+ const int rshift = CHAR_BIT * sizeof(size_t) - 5;
+ return h1 << lshift ^ h1 >> rshift ^ h2;
+ }
+
+ bool operator==(const PairFilterState &f) const {
+ return f1_ == f.f1_ && f2_ == f.f2_;
+ }
+
+ bool operator!=(const PairFilterState &f) const {
+ return f1_ != f.f1_ || f2_ != f.f2_;
+ }
+
+ const F1 &GetState1() const { return f1_; }
+ const F2 &GetState2() const { return f2_; }
+
+ void SetState(const F1 &f1, const F2 &f2) {
+ f1_ = f1;
+ f2_ = f2;
+ }
+
+private:
+ F1 f1_;
+ F2 f2_;
+};
+
+
+// COMPOSITION FILTERS - these determine which matches are allowed to
+// proceed. The filter's state is represented by the type
+// ComposeFilter::FilterState. The basic filters handle correct
+// epsilon matching. Their interface is:
+//
+// template <class M1, class M2>
+// class ComposeFilter {
+// public:
+// typedef typename M1::FST1 FST1;
+// typedef typename M1::FST2 FST2;
+// typedef typename FST1::Arc Arc;
+// typedef ... FilterState;
+// typedef ... Matcher1;
+// typedef ... Matcher2;
+//
+// // Required constructors.
+// ComposeFilter(const FST1 &fst1, const FST2 &fst2,
+// // M1 *matcher1 = 0, M2 *matcher2 = 0);
+// // If safe=true, the copy is thread-safe. See Fst<>::Copy()
+// // for further doc.
+// ComposeFilter(const ComposeFilter<M1, M2> &filter,
+// // bool safe = false);
+// // Return start state of filter.
+// FilterState Start() const;
+// // Specifies current composition state.
+// void SetState(StateId s1, StateId s2, const FilterState &f);
+//
+// // Apply filter at current composition state to these transitions.
+// // If an arc label to be matched is kNolabel, then that side
+// // does not consume a symbol. Returns the new filter state or,
+// // if disallowed, FilterState::NoState(). The filter is permitted to
+// // modify its inputs, e.g. for optimizations.
+// FilterState FilterArc(Arc *arc1, Arc *arc2) const;
+
+// // Apply filter at current composition state to these final weights
+// // (cf. superfinal transitions). The filter may modify its inputs,
+// // e.g. for optimizations.
+// void FilterFinal(Weight *final1, Weight *final2) const;
+//
+// // Return resp matchers. Ownership stays with filter. These
+// // methods allow the filter to access and possibly modify
+// // the composition matchers (useful e.g. with lookahead).
+// Matcher1 *GetMatcher1();
+// Matcher2 *GetMatcher2();
+//
+// // This specifies how the filter affects the composition result
+// // properties. It takes as argument the properties that would
+// // apply with a trivial composition fitler.
+// uint64 Properties(uint64 props) const;
+// };
+
+// This filter requires epsilons on FST1 to be read before epsilons on FST2.
+template <class M1, class M2>
+class SequenceComposeFilter {
+ public:
+ typedef typename M1::FST FST1;
+ typedef typename M2::FST FST2;
+ typedef typename FST1::Arc Arc;
+ typedef CharFilterState FilterState;
+ typedef M1 Matcher1;
+ typedef M2 Matcher2;
+
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ SequenceComposeFilter(const FST1 &fst1, const FST2 &fst2,
+ M1 *matcher1 = 0, M2 *matcher2 = 0)
+ : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)),
+ matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)),
+ fst1_(matcher1_->GetFst()),
+ s1_(kNoStateId),
+ s2_(kNoStateId),
+ f_(kNoStateId) {}
+
+ SequenceComposeFilter(const SequenceComposeFilter<M1, M2> &filter,
+ bool safe = false)
+ : matcher1_(filter.matcher1_->Copy(safe)),
+ matcher2_(filter.matcher2_->Copy(safe)),
+ fst1_(matcher1_->GetFst()),
+ s1_(kNoStateId),
+ s2_(kNoStateId),
+ f_(kNoStateId) {}
+
+ ~SequenceComposeFilter() {
+ delete matcher1_;
+ delete matcher2_;
+ }
+
+ FilterState Start() const { return FilterState(0); }
+
+ void SetState(StateId s1, StateId s2, const FilterState &f) {
+ if (s1_ == s1 && s2_ == s2 && f == f_)
+ return;
+ s1_ = s1;
+ s2_ = s2;
+ f_ = f;
+ size_t na1 = internal::NumArcs(fst1_, s1);
+ size_t ne1 = internal::NumOutputEpsilons(fst1_, s1);
+ bool fin1 = internal::Final(fst1_, s1) != Weight::Zero();
+ alleps1_ = na1 == ne1 && !fin1;
+ noeps1_ = ne1 == 0;
+ }
+
+ FilterState FilterArc(Arc *arc1, Arc *arc2) const {
+ if (arc1->olabel == kNoLabel)
+ return alleps1_ ? FilterState::NoState() :
+ noeps1_ ? FilterState(0) : FilterState(1);
+ else if (arc2->ilabel == kNoLabel)
+ return f_ != FilterState(0) ? FilterState::NoState() : FilterState(0);
+ else
+ return arc1->olabel == 0 ? FilterState::NoState() : FilterState(0);
+ }
+
+ void FilterFinal(Weight *, Weight *) const {}
+
+ // Return resp matchers. Ownership stays with filter.
+ Matcher1 *GetMatcher1() { return matcher1_; }
+ Matcher2 *GetMatcher2() { return matcher2_; }
+
+ uint64 Properties(uint64 props) const { return props; }
+
+ private:
+ Matcher1 *matcher1_;
+ Matcher2 *matcher2_;
+ const FST1 &fst1_;
+ StateId s1_; // Current fst1_ state;
+ StateId s2_; // Current fst2_ state;
+ FilterState f_; // Current filter state
+ bool alleps1_; // Only epsilons (and non-final) leaving s1_?
+ bool noeps1_; // No epsilons leaving s1_?
+
+ void operator=(const SequenceComposeFilter<M1, M2> &); // disallow
+};
+
+
+// This filter requires epsilons on FST2 to be read before epsilons on FST1.
+template <class M1, class M2>
+class AltSequenceComposeFilter {
+ public:
+ typedef typename M1::FST FST1;
+ typedef typename M2::FST FST2;
+ typedef typename FST1::Arc Arc;
+ typedef CharFilterState FilterState;
+ typedef M1 Matcher1;
+ typedef M2 Matcher2;
+
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ AltSequenceComposeFilter(const FST1 &fst1, const FST2 &fst2,
+ M1 *matcher1 = 0, M2 *matcher2 = 0)
+ : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)),
+ matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)),
+ fst2_(matcher2_->GetFst()),
+ s1_(kNoStateId),
+ s2_(kNoStateId),
+ f_(kNoStateId) {}
+
+ AltSequenceComposeFilter(const AltSequenceComposeFilter<M1, M2> &filter,
+ bool safe = false)
+ : matcher1_(filter.matcher1_->Copy(safe)),
+ matcher2_(filter.matcher2_->Copy(safe)),
+ fst2_(matcher2_->GetFst()),
+ s1_(kNoStateId),
+ s2_(kNoStateId),
+ f_(kNoStateId) {}
+
+ ~AltSequenceComposeFilter() {
+ delete matcher1_;
+ delete matcher2_;
+ }
+
+ FilterState Start() const { return FilterState(0); }
+
+ void SetState(StateId s1, StateId s2, const FilterState &f) {
+ if (s1_ == s1 && s2_ == s2 && f == f_)
+ return;
+ s1_ = s1;
+ s2_ = s2;
+ f_ = f;
+ size_t na2 = internal::NumArcs(fst2_, s2);
+ size_t ne2 = internal::NumInputEpsilons(fst2_, s2);
+ bool fin2 = internal::Final(fst2_, s2) != Weight::Zero();
+ alleps2_ = na2 == ne2 && !fin2;
+ noeps2_ = ne2 == 0;
+ }
+
+ FilterState FilterArc(Arc *arc1, Arc *arc2) const {
+ if (arc2->ilabel == kNoLabel)
+ return alleps2_ ? FilterState::NoState() :
+ noeps2_ ? FilterState(0) : FilterState(1);
+ else if (arc1->olabel == kNoLabel)
+ return f_ == FilterState(1) ? FilterState::NoState() : FilterState(0);
+ else
+ return arc1->olabel == 0 ? FilterState::NoState() : FilterState(0);
+ }
+
+ void FilterFinal(Weight *, Weight *) const {}
+
+ // Return resp matchers. Ownership stays with filter.
+ Matcher1 *GetMatcher1() { return matcher1_; }
+ Matcher2 *GetMatcher2() { return matcher2_; }
+
+ uint64 Properties(uint64 props) const { return props; }
+
+ private:
+ Matcher1 *matcher1_;
+ Matcher2 *matcher2_;
+ const FST2 &fst2_;
+ StateId s1_; // Current fst1_ state;
+ StateId s2_; // Current fst2_ state;
+ FilterState f_; // Current filter state
+ bool alleps2_; // Only epsilons (and non-final) leaving s2_?
+ bool noeps2_; // No epsilons leaving s2_?
+
+void operator=(const AltSequenceComposeFilter<M1, M2> &); // disallow
+};
+
+
+// This filter requires epsilons on FST1 to be matched with epsilons on FST2
+// whenever possible.
+template <class M1, class M2>
+class MatchComposeFilter {
+ public:
+ typedef typename M1::FST FST1;
+ typedef typename M2::FST FST2;
+ typedef typename FST1::Arc Arc;
+ typedef CharFilterState FilterState;
+ typedef M1 Matcher1;
+ typedef M2 Matcher2;
+
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ MatchComposeFilter(const FST1 &fst1, const FST2 &fst2,
+ M1 *matcher1 = 0, M2 *matcher2 = 0)
+ : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)),
+ matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)),
+ fst1_(matcher1_->GetFst()),
+ fst2_(matcher2_->GetFst()),
+ s1_(kNoStateId),
+ s2_(kNoStateId),
+ f_(kNoStateId) {}
+
+ MatchComposeFilter(const MatchComposeFilter<M1, M2> &filter,
+ bool safe = false)
+ : matcher1_(filter.matcher1_->Copy(safe)),
+ matcher2_(filter.matcher2_->Copy(safe)),
+ fst1_(matcher1_->GetFst()),
+ fst2_(matcher2_->GetFst()),
+ s1_(kNoStateId),
+ s2_(kNoStateId),
+ f_(kNoStateId) {}
+
+ ~MatchComposeFilter() {
+ delete matcher1_;
+ delete matcher2_;
+ }
+
+ FilterState Start() const { return FilterState(0); }
+
+ void SetState(StateId s1, StateId s2, const FilterState &f) {
+ if (s1_ == s1 && s2_ == s2 && f == f_)
+ return;
+ s1_ = s1;
+ s2_ = s2;
+ f_ = f;
+ size_t na1 = internal::NumArcs(fst1_, s1);
+ size_t ne1 = internal::NumOutputEpsilons(fst1_, s1);
+ bool f1 = internal::Final(fst1_, s1) != Weight::Zero();
+ alleps1_ = na1 == ne1 && !f1;
+ noeps1_ = ne1 == 0;
+ size_t na2 = internal::NumArcs(fst2_, s2);
+ size_t ne2 = internal::NumInputEpsilons(fst2_, s2);
+ bool f2 = internal::Final(fst2_, s2) != Weight::Zero();
+ alleps2_ = na2 == ne2 && !f2;
+ noeps2_ = ne2 == 0;
+ }
+
+ FilterState FilterArc(Arc *arc1, Arc *arc2) const {
+ if (arc2->ilabel == kNoLabel) // Epsilon on Fst1
+ return f_ == FilterState(0) ?
+ (noeps2_ ? FilterState(0) :
+ (alleps2_ ? FilterState::NoState(): FilterState(1))) :
+ (f_ == FilterState(1) ? FilterState(1) : FilterState::NoState());
+ else if (arc1->olabel == kNoLabel) // Epsilon on Fst2
+ return f_ == FilterState(0) ?
+ (noeps1_ ? FilterState(0) :
+ (alleps1_ ? FilterState::NoState() : FilterState(2))) :
+ (f_ == FilterState(2) ? FilterState(2) : FilterState::NoState());
+ else if (arc1->olabel == 0) // Epsilon on both
+ return f_ == FilterState(0) ? FilterState(0) : FilterState::NoState();
+ else // Both are non-epsilons
+ return FilterState(0);
+ }
+
+ void FilterFinal(Weight *, Weight *) const {}
+
+ // Return resp matchers. Ownership stays with filter.
+ Matcher1 *GetMatcher1() { return matcher1_; }
+ Matcher2 *GetMatcher2() { return matcher2_; }
+
+ uint64 Properties(uint64 props) const { return props; }
+
+ private:
+ Matcher1 *matcher1_;
+ Matcher2 *matcher2_;
+ const FST1 &fst1_;
+ const FST2 &fst2_;
+ StateId s1_; // Current fst1_ state;
+ StateId s2_; // Current fst2_ state;
+ FilterState f_; // Current filter state ID
+ bool alleps1_, alleps2_; // Only epsilons (and non-final) leaving s1, s2?
+ bool noeps1_, noeps2_; // No epsilons leaving s1, s2?
+
+ void operator=(const MatchComposeFilter<M1, M2> &); // disallow
+};
+
+
+// This filter works with the MultiEpsMatcher to determine if
+// 'multi-epsilons' are preserved in the composition output
+// (rather than rewritten as 0) and ensures correct properties.
+template <class F>
+class MultiEpsFilter {
+ public:
+ typedef typename F::FST1 FST1;
+ typedef typename F::FST2 FST2;
+ typedef typename F::Arc Arc;
+ typedef typename F::Matcher1 Matcher1;
+ typedef typename F::Matcher2 Matcher2;
+ typedef typename F::FilterState FilterState;
+ typedef MultiEpsFilter<F> Filter;
+
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ MultiEpsFilter(const FST1 &fst1, const FST2 &fst2,
+ Matcher1 *matcher1 = 0, Matcher2 *matcher2 = 0,
+ bool keep_multi_eps = false)
+ : filter_(fst1, fst2, matcher1, matcher2),
+ keep_multi_eps_(keep_multi_eps) {}
+
+ MultiEpsFilter(const Filter &filter, bool safe = false)
+ : filter_(filter.filter_, safe),
+ keep_multi_eps_(filter.keep_multi_eps_) {}
+
+ FilterState Start() const { return filter_.Start(); }
+
+ void SetState(StateId s1, StateId s2, const FilterState &f) {
+ return filter_.SetState(s1, s2, f);
+ }
+
+ FilterState FilterArc(Arc *arc1, Arc *arc2) const {
+ FilterState f = filter_.FilterArc(arc1, arc2);
+ if (keep_multi_eps_) {
+ if (arc1->olabel == kNoLabel)
+ arc1->ilabel = arc2->ilabel;
+ if (arc2->ilabel == kNoLabel)
+ arc2->olabel = arc1->olabel;
+ }
+ return f;
+ }
+
+ void FilterFinal(Weight *w1, Weight *w2) const {
+ return filter_.FilterFinal(w1, w2);
+ }
+
+ // Return resp matchers. Ownership stays with filter.
+ Matcher1 *GetMatcher1() { return filter_.GetMatcher1(); }
+ Matcher2 *GetMatcher2() { return filter_.GetMatcher2(); }
+
+ uint64 Properties(uint64 iprops) const {
+ uint64 oprops = filter_.Properties(iprops);
+ return oprops & kILabelInvariantProperties & kOLabelInvariantProperties;
+ }
+
+ private:
+ F filter_;
+ bool keep_multi_eps_;
+};
+
+} // namespace fst
+
+
+#endif // FST_LIB_COMPOSE_FILTER_H__
diff --git a/src/include/fst/compose.h b/src/include/fst/compose.h
new file mode 100644
index 0000000..c0bf4b1
--- /dev/null
+++ b/src/include/fst/compose.h
@@ -0,0 +1,673 @@
+// compose.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to compute the composition of two FSTs
+
+#ifndef FST_LIB_COMPOSE_H__
+#define FST_LIB_COMPOSE_H__
+
+#include <algorithm>
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+#include <fst/compose-filter.h>
+#include <fst/lookahead-filter.h>
+#include <fst/matcher.h>
+#include <fst/state-table.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+// Delayed composition options templated on the arc type, the matcher,
+// the composition filter, and the composition state table. By
+// default, the matchers, filter, and state table are constructed by
+// composition. If set below, the user can instead pass in these
+// objects; in that case, ComposeFst takes their ownership. This
+// version controls composition implemented between generic Fst<Arc>
+// types and a shared matcher type M for Fst<Arc>. This should be
+// adequate for most applications, giving a reasonable tradeoff
+// between efficiency and code sharing (but see ComposeFstImplOptions).
+template <class A,
+ class M = Matcher<Fst<A> >,
+ class F = SequenceComposeFilter<M>,
+ class T = GenericComposeStateTable<A, typename F::FilterState> >
+struct ComposeFstOptions : public CacheOptions {
+ M *matcher1; // FST1 matcher (see matcher.h)
+ M *matcher2; // FST2 matcher
+ F *filter; // Composition filter (see compose-filter.h)
+ T *state_table; // Composition state table (see compose-state-table.h)
+
+ explicit ComposeFstOptions(const CacheOptions &opts,
+ M *mat1 = 0, M *mat2 = 0,
+ F *filt = 0, T *sttable= 0)
+ : CacheOptions(opts), matcher1(mat1), matcher2(mat2),
+ filter(filt), state_table(sttable) {}
+
+ ComposeFstOptions() : matcher1(0), matcher2(0), filter(0), state_table(0) {}
+};
+
+
+// Delayed composition options templated on the two matcher types, the
+// composition filter, and the composition state table. By default,
+// the matchers, filter, and state table are constructed by
+// composition. If set below, the user can instead pass in these
+// objects; in that case, ComposeFst takes their ownership. This
+// version controls composition implemented using arbitrary matchers
+// (of the same Arc type but otherwise arbitrary Fst type). The user
+// must ensure the matchers are compatible. These options permit the
+// most efficient use, but shares the least code. This is for advanced
+// use only in the most demanding or specialized applications that can
+// benefit from it (o.w. prefer ComposeFstOptions).
+template <class M1, class M2,
+ class F = SequenceComposeFilter<M1, M2>,
+ class T = GenericComposeStateTable<typename M1::Arc,
+ typename F::FilterState> >
+struct ComposeFstImplOptions : public CacheOptions {
+ M1 *matcher1; // FST1 matcher (see matcher.h)
+ M2 *matcher2; // FST2 matcher
+ F *filter; // Composition filter (see compose-filter.h)
+ T *state_table; // Composition state table (see compose-state-table.h)
+
+ explicit ComposeFstImplOptions(const CacheOptions &opts,
+ M1 *mat1 = 0, M2 *mat2 = 0,
+ F *filt = 0, T *sttable= 0)
+ : CacheOptions(opts), matcher1(mat1), matcher2(mat2),
+ filter(filt), state_table(sttable) {}
+
+ ComposeFstImplOptions()
+ : matcher1(0), matcher2(0), filter(0), state_table(0) {}
+};
+
+
+// Implementation of delayed composition. This base class is
+// common to the variants with different matchers, composition filters
+// and state tables.
+template <class A>
+class ComposeFstImplBase : public CacheImpl<A> {
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::Properties;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+
+ using CacheBaseImpl< CacheState<A> >::HasStart;
+ using CacheBaseImpl< CacheState<A> >::HasFinal;
+ using CacheBaseImpl< CacheState<A> >::HasArcs;
+ using CacheBaseImpl< CacheState<A> >::SetFinal;
+ using CacheBaseImpl< CacheState<A> >::SetStart;
+
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+
+ ComposeFstImplBase(const Fst<A> &fst1, const Fst<A> &fst2,
+ const CacheOptions &opts)
+ :CacheImpl<A>(opts) {
+ VLOG(2) << "ComposeFst(" << this << "): Begin";
+ SetType("compose");
+
+ if (!CompatSymbols(fst2.InputSymbols(), fst1.OutputSymbols())) {
+ FSTERROR() << "ComposeFst: output symbol table of 1st argument "
+ << "does not match input symbol table of 2nd argument";
+ SetProperties(kError, kError);
+ }
+
+ SetInputSymbols(fst1.InputSymbols());
+ SetOutputSymbols(fst2.OutputSymbols());
+ }
+
+ ComposeFstImplBase(const ComposeFstImplBase<A> &impl)
+ : CacheImpl<A>(impl) {
+ SetProperties(impl.Properties(), kCopyProperties);
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ virtual ComposeFstImplBase<A> *Copy() = 0;
+
+ virtual ~ComposeFstImplBase() {}
+
+ StateId Start() {
+ if (!HasStart()) {
+ StateId start = ComputeStart();
+ if (start != kNoStateId) {
+ SetStart(start);
+ }
+ }
+ return CacheImpl<A>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ Weight final = ComputeFinal(s);
+ SetFinal(s, final);
+ }
+ return CacheImpl<A>::Final(s);
+ }
+
+ virtual void Expand(StateId s) = 0;
+
+ size_t NumArcs(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumArcs(s);
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumOutputEpsilons(s);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
+ if (!HasArcs(s))
+ Expand(s);
+ CacheImpl<A>::InitArcIterator(s, data);
+ }
+
+ protected:
+ virtual StateId ComputeStart() = 0;
+ virtual Weight ComputeFinal(StateId s) = 0;
+};
+
+
+// Implementaion of delayed composition templated on the matchers (see
+// matcher.h), composition filter (see compose-filter-inl.h) and
+// the composition state table (see compose-state-table.h).
+template <class M1, class M2, class F, class T>
+class ComposeFstImpl : public ComposeFstImplBase<typename M1::Arc> {
+ typedef typename M1::FST FST1;
+ typedef typename M2::FST FST2;
+ typedef typename M1::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+ typedef typename F::FilterState FilterState;
+ typedef typename F::Matcher1 Matcher1;
+ typedef typename F::Matcher2 Matcher2;
+
+ using CacheBaseImpl<CacheState<Arc> >::SetArcs;
+ using FstImpl<Arc>::SetType;
+ using FstImpl<Arc>::SetProperties;
+
+ typedef ComposeStateTuple<StateId, FilterState> StateTuple;
+
+ public:
+ ComposeFstImpl(const FST1 &fst1, const FST2 &fst2,
+ const ComposeFstImplOptions<M1, M2, F, T> &opts);
+
+ ComposeFstImpl(const ComposeFstImpl<M1, M2, F, T> &impl)
+ : ComposeFstImplBase<Arc>(impl),
+ filter_(new F(*impl.filter_, true)),
+ matcher1_(filter_->GetMatcher1()),
+ matcher2_(filter_->GetMatcher2()),
+ fst1_(matcher1_->GetFst()),
+ fst2_(matcher2_->GetFst()),
+ state_table_(new T(*impl.state_table_)),
+ match_type_(impl.match_type_) {}
+
+ ~ComposeFstImpl() {
+ VLOG(2) << "ComposeFst(" << this
+ << "): End: # of visited states: " << state_table_->Size();
+
+ delete filter_;
+ delete state_table_;
+ }
+
+ virtual ComposeFstImpl<M1, M2, F, T> *Copy() {
+ return new ComposeFstImpl<M1, M2, F, T>(*this);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) &&
+ (fst1_.Properties(kError, false) ||
+ fst2_.Properties(kError, false) ||
+ (matcher1_->Properties(0) & kError) ||
+ (matcher2_->Properties(0) & kError) |
+ (filter_->Properties(0) & kError) ||
+ state_table_->Error())) {
+ SetProperties(kError, kError);
+ }
+ return FstImpl<Arc>::Properties(mask);
+ }
+
+ // Arranges it so that the first arg to OrderedExpand is the Fst
+ // that will be matched on.
+ void Expand(StateId s) {
+ const StateTuple &tuple = state_table_->Tuple(s);
+ StateId s1 = tuple.state_id1;
+ StateId s2 = tuple.state_id2;
+ filter_->SetState(s1, s2, tuple.filter_state);
+ if (match_type_ == MATCH_OUTPUT ||
+ (match_type_ == MATCH_BOTH &&
+ internal::NumArcs(fst1_, s1) > internal::NumArcs(fst2_, s2)))
+ OrderedExpand(s, fst1_, s1, fst2_, s2, matcher1_, false);
+ else
+ OrderedExpand(s, fst2_, s2, fst1_, s1, matcher2_, true);
+ }
+
+ private:
+ // This does that actual matching of labels in the composition. The
+ // arguments are ordered so matching is called on state 'sa' of
+ // 'fsta' for each arc leaving state 'sb' of 'fstb'. The 'match_input' arg
+ // determines whether the input or output label of arcs at 'sb' is
+ // the one to match on.
+ template <class FST, class Matcher>
+ void OrderedExpand(StateId s, const Fst<Arc> &, StateId sa,
+ const FST &fstb, StateId sb,
+ Matcher *matchera, bool match_input) {
+ matchera->SetState(sa);
+
+ // First process non-consuming symbols (e.g., epsilons) on FSTA.
+ Arc loop(match_input ? 0 : kNoLabel, match_input ? kNoLabel : 0,
+ Weight::One(), sb);
+ MatchArc(s, matchera, loop, match_input);
+
+ // Then process matches on FSTB.
+ for (ArcIterator<FST> iterb(fstb, sb); !iterb.Done(); iterb.Next())
+ MatchArc(s, matchera, iterb.Value(), match_input);
+
+ SetArcs(s);
+ }
+
+ // Matches a single transition from 'fstb' against 'fata' at 's'.
+ template <class Matcher>
+ void MatchArc(StateId s, Matcher *matchera,
+ const Arc &arc, bool match_input) {
+ if (matchera->Find(match_input ? arc.olabel : arc.ilabel)) {
+ for (; !matchera->Done(); matchera->Next()) {
+ Arc arca = matchera->Value();
+ Arc arcb = arc;
+ if (match_input) {
+ const FilterState &f = filter_->FilterArc(&arcb, &arca);
+ if (f != FilterState::NoState())
+ AddArc(s, arcb, arca, f);
+ } else {
+ const FilterState &f = filter_->FilterArc(&arca, &arcb);
+ if (f != FilterState::NoState())
+ AddArc(s, arca, arcb, f);
+ }
+ }
+ }
+ }
+
+ // Add a matching transition at 's'.
+ void AddArc(StateId s, const Arc &arc1, const Arc &arc2,
+ const FilterState &f) {
+ StateTuple tuple(arc1.nextstate, arc2.nextstate, f);
+ Arc oarc(arc1.ilabel, arc2.olabel, Times(arc1.weight, arc2.weight),
+ state_table_->FindState(tuple));
+ CacheImpl<Arc>::PushArc(s, oarc);
+ }
+
+ StateId ComputeStart() {
+ StateId s1 = fst1_.Start();
+ if (s1 == kNoStateId)
+ return kNoStateId;
+
+ StateId s2 = fst2_.Start();
+ if (s2 == kNoStateId)
+ return kNoStateId;
+
+ const FilterState &f = filter_->Start();
+ StateTuple tuple(s1, s2, f);
+ return state_table_->FindState(tuple);
+ }
+
+ Weight ComputeFinal(StateId s) {
+ const StateTuple &tuple = state_table_->Tuple(s);
+ StateId s1 = tuple.state_id1;
+ Weight final1 = internal::Final(fst1_, s1);
+ if (final1 == Weight::Zero())
+ return final1;
+
+ StateId s2 = tuple.state_id2;
+ Weight final2 = internal::Final(fst2_, s2);
+ if (final2 == Weight::Zero())
+ return final2;
+
+ filter_->SetState(s1, s2, tuple.filter_state);
+ filter_->FilterFinal(&final1, &final2);
+ return Times(final1, final2);
+ }
+
+ F *filter_;
+ Matcher1 *matcher1_;
+ Matcher2 *matcher2_;
+ const FST1 &fst1_;
+ const FST2 &fst2_;
+ T *state_table_;
+
+ MatchType match_type_;
+
+ void operator=(const ComposeFstImpl<M1, M2, F, T> &); // disallow
+};
+
+template <class M1, class M2, class F, class T> inline
+ComposeFstImpl<M1, M2, F, T>::ComposeFstImpl(
+ const FST1 &fst1, const FST2 &fst2,
+ const ComposeFstImplOptions<M1, M2, F, T> &opts)
+ : ComposeFstImplBase<Arc>(fst1, fst2, opts),
+ filter_(opts.filter ? opts.filter :
+ new F(fst1, fst2, opts.matcher1, opts.matcher2)),
+ matcher1_(filter_->GetMatcher1()),
+ matcher2_(filter_->GetMatcher2()),
+ fst1_(matcher1_->GetFst()),
+ fst2_(matcher2_->GetFst()),
+ state_table_(opts.state_table ? opts.state_table :
+ new T(fst1_, fst2_)) {
+ MatchType type1 = matcher1_->Type(false);
+ MatchType type2 = matcher2_->Type(false);
+ if (type1 == MATCH_OUTPUT && type2 == MATCH_INPUT) {
+ match_type_ = MATCH_BOTH;
+ } else if (type1 == MATCH_OUTPUT) {
+ match_type_ = MATCH_OUTPUT;
+ } else if (type2 == MATCH_INPUT) {
+ match_type_ = MATCH_INPUT;
+ } else if (matcher1_->Type(true) == MATCH_OUTPUT) {
+ match_type_ = MATCH_OUTPUT;
+ } else if (matcher2_->Type(true) == MATCH_INPUT) {
+ match_type_ = MATCH_INPUT;
+ } else {
+ FSTERROR() << "ComposeFst: 1st argument cannot match on output labels "
+ << "and 2nd argument cannot match on input labels (sort?).";
+ SetProperties(kError, kError);
+ }
+ uint64 fprops1 = fst1.Properties(kFstProperties, false);
+ uint64 fprops2 = fst2.Properties(kFstProperties, false);
+ uint64 mprops1 = matcher1_->Properties(fprops1);
+ uint64 mprops2 = matcher2_->Properties(fprops2);
+ uint64 cprops = ComposeProperties(mprops1, mprops2);
+ SetProperties(filter_->Properties(cprops), kCopyProperties);
+ if (state_table_->Error()) SetProperties(kError, kError);
+ VLOG(2) << "ComposeFst(" << this << "): Initialized";
+}
+
+
+// Computes the composition of two transducers. This version is a
+// delayed Fst. If FST1 transduces string x to y with weight a and FST2
+// transduces y to z with weight b, then their composition transduces
+// string x to z with weight Times(x, z).
+//
+// The output labels of the first transducer or the input labels of
+// the second transducer must be sorted (with the default matcher).
+// The weights need to form a commutative semiring (valid for
+// TropicalWeight and LogWeight).
+//
+// Complexity:
+// Assuming the first FST is unsorted and the second is sorted:
+// - Time: O(v1 v2 d1 (log d2 + m2)),
+// - Space: O(v1 v2)
+// where vi = # of states visited, di = maximum out-degree, and mi the
+// maximum multiplicity of the states visited for the ith
+// FST. Constant time and space to visit an input state or arc is
+// assumed and exclusive of caching.
+//
+// Caveats:
+// - ComposeFst does not trim its output (since it is a delayed operation).
+// - The efficiency of composition can be strongly affected by several factors:
+// - the choice of which tnansducer is sorted - prefer sorting the FST
+// that has the greater average out-degree.
+// - the amount of non-determinism
+// - the presence and location of epsilon transitions - avoid epsilon
+// transitions on the output side of the first transducer or
+// the input side of the second transducer or prefer placing
+// them later in a path since they delay matching and can
+// introduce non-coaccessible states and transitions.
+//
+// This class attaches interface to implementation and handles
+// reference counting, delegating most methods to ImplToFst.
+template <class A>
+class ComposeFst : public ImplToFst< ComposeFstImplBase<A> > {
+ public:
+ friend class ArcIterator< ComposeFst<A> >;
+ friend class StateIterator< ComposeFst<A> >;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+ typedef ComposeFstImplBase<A> Impl;
+
+ using ImplToFst<Impl>::SetImpl;
+
+ // Compose specifying only caching options.
+ ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2,
+ const CacheOptions &opts = CacheOptions())
+ : ImplToFst<Impl>(CreateBase(fst1, fst2, opts)) {}
+
+ // Compose specifying one shared matcher type M. Requires input
+ // Fsts and matcher FST type (M::FST) be Fst<A>. Recommended for
+ // best code-sharing and matcher compatiblity.
+ template <class M, class F, class T>
+ ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2,
+ const ComposeFstOptions<A, M, F, T> &opts)
+ : ImplToFst<Impl>(CreateBase1(fst1, fst2, opts)) {}
+
+ // Compose specifying two matcher types M1 and M2. Requires input
+ // Fsts (of the same Arc type but o.w. arbitrary) match the
+ // corresponding matcher FST types (M1::FST, M2::FST). Recommended
+ // only for advanced use in demanding or specialized applications
+ // due to potential code bloat and matcher incompatibilities.
+ template <class M1, class M2, class F, class T>
+ ComposeFst(const typename M1::FST &fst1, const typename M2::FST &fst2,
+ const ComposeFstImplOptions<M1, M2, F, T> &opts)
+ : ImplToFst<Impl>(CreateBase2(fst1, fst2, opts)) {}
+
+ // See Fst<>::Copy() for doc.
+ ComposeFst(const ComposeFst<A> &fst, bool safe = false) {
+ if (safe)
+ SetImpl(fst.GetImpl()->Copy());
+ else
+ SetImpl(fst.GetImpl(), false);
+ }
+
+ // Get a copy of this ComposeFst. See Fst<>::Copy() for further doc.
+ virtual ComposeFst<A> *Copy(bool safe = false) const {
+ return new ComposeFst<A>(*this, safe);
+ }
+
+ virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ protected:
+ ComposeFst() {}
+
+ // Create compose implementation specifying two matcher types.
+ template <class M1, class M2, class F, class T>
+ static Impl *CreateBase2(
+ const typename M1::FST &fst1, const typename M2::FST &fst2,
+ const ComposeFstImplOptions<M1, M2, F, T> &opts) {
+ Impl *impl = new ComposeFstImpl<M1, M2, F, T>(fst1, fst2, opts);
+ if (!(Weight::Properties() & kCommutative)) {
+ int64 props1 = fst1.Properties(kUnweighted, true);
+ int64 props2 = fst2.Properties(kUnweighted, true);
+ if (!(props1 & kUnweighted) && !(props2 & kUnweighted)) {
+ FSTERROR() << "ComposeFst: Weights must be a commutative semiring: "
+ << Weight::Type();
+ impl->SetProperties(kError, kError);
+ }
+ }
+ return impl;
+ }
+
+ // Create compose implementation specifying one matcher type.
+ // Requires input Fsts and matcher FST type (M::FST) be Fst<A>
+ template <class M, class F, class T>
+ static Impl *CreateBase1(const Fst<A> &fst1, const Fst<A> &fst2,
+ const ComposeFstOptions<A, M, F, T> &opts) {
+ ComposeFstImplOptions<M, M, F, T> nopts(opts, opts.matcher1, opts.matcher2,
+ opts.filter, opts.state_table);
+ return CreateBase2(fst1, fst2, nopts);
+ }
+
+ // Create compose implementation specifying no matcher type.
+ static Impl *CreateBase(const Fst<A> &fst1, const Fst<A> &fst2,
+ const CacheOptions &opts) {
+ switch (LookAheadMatchType(fst1, fst2)) { // Check for lookahead matchers
+ default:
+ case MATCH_NONE: { // Default composition (no look-ahead)
+ ComposeFstOptions<Arc> nopts(opts);
+ return CreateBase1(fst1, fst2, nopts);
+ }
+ case MATCH_OUTPUT: { // Lookahead on fst1
+ typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::FstMatcher M;
+ typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::ComposeFilter F;
+ ComposeFstOptions<Arc, M, F> nopts(opts);
+ return CreateBase1(fst1, fst2, nopts);
+ }
+ case MATCH_INPUT: { // Lookahead on fst2
+ typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::FstMatcher M;
+ typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::ComposeFilter F;
+ ComposeFstOptions<Arc, M, F> nopts(opts);
+ return CreateBase1(fst1, fst2, nopts);
+ }
+ }
+ }
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const ComposeFst<A> &fst); // disallow
+};
+
+
+// Specialization for ComposeFst.
+template<class A>
+class StateIterator< ComposeFst<A> >
+ : public CacheStateIterator< ComposeFst<A> > {
+ public:
+ explicit StateIterator(const ComposeFst<A> &fst)
+ : CacheStateIterator< ComposeFst<A> >(fst, fst.GetImpl()) {}
+};
+
+
+// Specialization for ComposeFst.
+template <class A>
+class ArcIterator< ComposeFst<A> >
+ : public CacheArcIterator< ComposeFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const ComposeFst<A> &fst, StateId s)
+ : CacheArcIterator< ComposeFst<A> >(fst.GetImpl(), s) {
+ if (!fst.GetImpl()->HasArcs(s))
+ fst.GetImpl()->Expand(s);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+template <class A> inline
+void ComposeFst<A>::InitStateIterator(StateIteratorData<A> *data) const {
+ data->base = new StateIterator< ComposeFst<A> >(*this);
+}
+
+// Useful alias when using StdArc.
+typedef ComposeFst<StdArc> StdComposeFst;
+
+enum ComposeFilter { AUTO_FILTER, SEQUENCE_FILTER, ALT_SEQUENCE_FILTER,
+ MATCH_FILTER };
+
+struct ComposeOptions {
+ bool connect; // Connect output
+ ComposeFilter filter_type; // Which pre-defined filter to use
+
+ ComposeOptions(bool c, ComposeFilter ft = AUTO_FILTER)
+ : connect(c), filter_type(ft) {}
+ ComposeOptions() : connect(true), filter_type(AUTO_FILTER) {}
+};
+
+// Computes the composition of two transducers. This version writes
+// the composed FST into a MurableFst. If FST1 transduces string x to
+// y with weight a and FST2 transduces y to z with weight b, then
+// their composition transduces string x to z with weight
+// Times(x, z).
+//
+// The output labels of the first transducer or the input labels of
+// the second transducer must be sorted. The weights need to form a
+// commutative semiring (valid for TropicalWeight and LogWeight).
+//
+// Complexity:
+// Assuming the first FST is unsorted and the second is sorted:
+// - Time: O(V1 V2 D1 (log D2 + M2)),
+// - Space: O(V1 V2 D1 M2)
+// where Vi = # of states, Di = maximum out-degree, and Mi is
+// the maximum multiplicity for the ith FST.
+//
+// Caveats:
+// - Compose trims its output.
+// - The efficiency of composition can be strongly affected by several factors:
+// - the choice of which tnansducer is sorted - prefer sorting the FST
+// that has the greater average out-degree.
+// - the amount of non-determinism
+// - the presence and location of epsilon transitions - avoid epsilon
+// transitions on the output side of the first transducer or
+// the input side of the second transducer or prefer placing
+// them later in a path since they delay matching and can
+// introduce non-coaccessible states and transitions.
+template<class Arc>
+void Compose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
+ MutableFst<Arc> *ofst,
+ const ComposeOptions &opts = ComposeOptions()) {
+ typedef Matcher< Fst<Arc> > M;
+
+ if (opts.filter_type == AUTO_FILTER) {
+ CacheOptions nopts;
+ nopts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = ComposeFst<Arc>(ifst1, ifst2, nopts);
+ } else if (opts.filter_type == SEQUENCE_FILTER) {
+ ComposeFstOptions<Arc> copts;
+ copts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
+ } else if (opts.filter_type == ALT_SEQUENCE_FILTER) {
+ ComposeFstOptions<Arc, M, AltSequenceComposeFilter<M> > copts;
+ copts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
+ } else if (opts.filter_type == MATCH_FILTER) {
+ ComposeFstOptions<Arc, M, MatchComposeFilter<M> > copts;
+ copts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
+ }
+
+ if (opts.connect)
+ Connect(ofst);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_COMPOSE_H__
diff --git a/src/include/fst/concat.h b/src/include/fst/concat.h
new file mode 100644
index 0000000..8500d50
--- /dev/null
+++ b/src/include/fst/concat.h
@@ -0,0 +1,246 @@
+// concat.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Functions and classes to compute the concat of two FSTs.
+
+#ifndef FST_LIB_CONCAT_H__
+#define FST_LIB_CONCAT_H__
+
+#include <vector>
+using std::vector;
+#include <algorithm>
+
+#include <fst/mutable-fst.h>
+#include <fst/rational.h>
+
+
+namespace fst {
+
+// Computes the concatenation (product) of two FSTs. If FST1
+// transduces string x to y with weight a and FST2 transduces string w
+// to v with weight b, then their concatenation transduces string xw
+// to yv with Times(a, b).
+//
+// This version modifies its MutableFst argument (in first position).
+//
+// Complexity:
+// - Time: O(V1 + V2 + E2)
+// - Space: O(V1 + V2 + E2)
+// where Vi = # of states and Ei = # of arcs of the ith FST.
+//
+template<class Arc>
+void Concat(MutableFst<Arc> *fst1, const Fst<Arc> &fst2) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ // TODO(riley): restore when voice actions issues fixed
+ // Check that the symbol table are compatible
+ if (!CompatSymbols(fst1->InputSymbols(), fst2.InputSymbols()) ||
+ !CompatSymbols(fst1->OutputSymbols(), fst2.OutputSymbols())) {
+ LOG(ERROR) << "Concat: input/output symbol tables of 1st argument "
+ << "do not match input/output symbol tables of 2nd argument";
+ // fst1->SetProperties(kError, kError);
+ // return;
+ }
+
+ uint64 props1 = fst1->Properties(kFstProperties, false);
+ uint64 props2 = fst2.Properties(kFstProperties, false);
+
+ StateId start1 = fst1->Start();
+ if (start1 == kNoStateId) {
+ if (props2 & kError) fst1->SetProperties(kError, kError);
+ return;
+ }
+
+ StateId numstates1 = fst1->NumStates();
+ if (fst2.Properties(kExpanded, false))
+ fst1->ReserveStates(numstates1 + CountStates(fst2));
+
+ for (StateIterator< Fst<Arc> > siter2(fst2);
+ !siter2.Done();
+ siter2.Next()) {
+ StateId s1 = fst1->AddState();
+ StateId s2 = siter2.Value();
+ fst1->SetFinal(s1, fst2.Final(s2));
+ fst1->ReserveArcs(s1, fst2.NumArcs(s2));
+ for (ArcIterator< Fst<Arc> > aiter(fst2, s2);
+ !aiter.Done();
+ aiter.Next()) {
+ Arc arc = aiter.Value();
+ arc.nextstate += numstates1;
+ fst1->AddArc(s1, arc);
+ }
+ }
+
+ StateId start2 = fst2.Start();
+ for (StateId s1 = 0; s1 < numstates1; ++s1) {
+ Weight final = fst1->Final(s1);
+ if (final != Weight::Zero()) {
+ fst1->SetFinal(s1, Weight::Zero());
+ if (start2 != kNoStateId)
+ fst1->AddArc(s1, Arc(0, 0, final, start2 + numstates1));
+ }
+ }
+ if (start2 != kNoStateId)
+ fst1->SetProperties(ConcatProperties(props1, props2), kFstProperties);
+}
+
+// Computes the concatentation of two FSTs. This version modifies its
+// MutableFst argument (in second position).
+//
+// Complexity:
+// - Time: O(V1 + E1)
+// - Space: O(V1 + E1)
+// where Vi = # of states and Ei = # of arcs of the ith FST.
+//
+template<class Arc>
+void Concat(const Fst<Arc> &fst1, MutableFst<Arc> *fst2) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ // Check that the symbol table are compatible
+ if (!CompatSymbols(fst1.InputSymbols(), fst2->InputSymbols()) ||
+ !CompatSymbols(fst1.OutputSymbols(), fst2->OutputSymbols())) {
+ LOG(ERROR) << "Concat: input/output symbol tables of 1st argument "
+ << "do not match input/output symbol tables of 2nd argument";
+ // fst2->SetProperties(kError, kError);
+ // return;
+ }
+
+ uint64 props1 = fst1.Properties(kFstProperties, false);
+ uint64 props2 = fst2->Properties(kFstProperties, false);
+
+ StateId start2 = fst2->Start();
+ if (start2 == kNoStateId) {
+ if (props1 & kError) fst2->SetProperties(kError, kError);
+ return;
+ }
+
+ StateId numstates2 = fst2->NumStates();
+ if (fst1.Properties(kExpanded, false))
+ fst2->ReserveStates(numstates2 + CountStates(fst1));
+
+ for (StateIterator< Fst<Arc> > siter(fst1);
+ !siter.Done();
+ siter.Next()) {
+ StateId s1 = siter.Value();
+ StateId s2 = fst2->AddState();
+ Weight final = fst1.Final(s1);
+ fst2->ReserveArcs(s2, fst1.NumArcs(s1) + (final != Weight::Zero() ? 1 : 0));
+ if (final != Weight::Zero())
+ fst2->AddArc(s2, Arc(0, 0, final, start2));
+ for (ArcIterator< Fst<Arc> > aiter(fst1, s1);
+ !aiter.Done();
+ aiter.Next()) {
+ Arc arc = aiter.Value();
+ arc.nextstate += numstates2;
+ fst2->AddArc(s2, arc);
+ }
+ }
+ StateId start1 = fst1.Start();
+ fst2->SetStart(start1 == kNoStateId ? fst2->AddState() : start1 + numstates2);
+ if (start1 != kNoStateId)
+ fst2->SetProperties(ConcatProperties(props1, props2), kFstProperties);
+}
+
+
+// Computes the concatentation of two FSTs. This version modifies its
+// RationalFst input (in first position).
+template<class Arc>
+void Concat(RationalFst<Arc> *fst1, const Fst<Arc> &fst2) {
+ fst1->GetImpl()->AddConcat(fst2, true);
+}
+
+// Computes the concatentation of two FSTs. This version modifies its
+// RationalFst input (in second position).
+template<class Arc>
+void Concat(const Fst<Arc> &fst1, RationalFst<Arc> *fst2) {
+ fst2->GetImpl()->AddConcat(fst1, false);
+}
+
+typedef RationalFstOptions ConcatFstOptions;
+
+
+// Computes the concatenation (product) of two FSTs; this version is a
+// delayed Fst. If FST1 transduces string x to y with weight a and FST2
+// transduces string w to v with weight b, then their concatenation
+// transduces string xw to yv with Times(a, b).
+//
+// Complexity:
+// - Time: O(v1 + e1 + v2 + e2),
+// - Space: O(v1 + v2)
+// where vi = # of states visited and ei = # of arcs visited of the
+// ith FST. Constant time and space to visit an input state or arc is
+// assumed and exclusive of caching.
+template <class A>
+class ConcatFst : public RationalFst<A> {
+ public:
+ using ImplToFst< RationalFstImpl<A> >::GetImpl;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ ConcatFst(const Fst<A> &fst1, const Fst<A> &fst2) {
+ GetImpl()->InitConcat(fst1, fst2);
+ }
+
+ ConcatFst(const Fst<A> &fst1, const Fst<A> &fst2,
+ const ConcatFstOptions &opts) : RationalFst<A>(opts) {
+ GetImpl()->InitConcat(fst1, fst2);
+ }
+
+ // See Fst<>::Copy() for doc.
+ ConcatFst(const ConcatFst<A> &fst, bool safe = false)
+ : RationalFst<A>(fst, safe) {}
+
+ // Get a copy of this ConcatFst. See Fst<>::Copy() for further doc.
+ virtual ConcatFst<A> *Copy(bool safe = false) const {
+ return new ConcatFst<A>(*this, safe);
+ }
+};
+
+
+// Specialization for ConcatFst.
+template <class A>
+class StateIterator< ConcatFst<A> > : public StateIterator< RationalFst<A> > {
+ public:
+ explicit StateIterator(const ConcatFst<A> &fst)
+ : StateIterator< RationalFst<A> >(fst) {}
+};
+
+
+// Specialization for ConcatFst.
+template <class A>
+class ArcIterator< ConcatFst<A> > : public ArcIterator< RationalFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const ConcatFst<A> &fst, StateId s)
+ : ArcIterator< RationalFst<A> >(fst, s) {}
+};
+
+
+// Useful alias when using StdArc.
+typedef ConcatFst<StdArc> StdConcatFst;
+
+} // namespace fst
+
+#endif // FST_LIB_CONCAT_H__
diff --git a/src/include/fst/config.h b/src/include/fst/config.h
new file mode 100644
index 0000000..046b49c
--- /dev/null
+++ b/src/include/fst/config.h
@@ -0,0 +1,12 @@
+/* src/include/fst/config.h. Generated from config.h.in by configure. */
+// OpenFst config file
+
+/* Define to 1 if you have the ICU library. */
+/* #undef HAVE_ICU */
+
+/* Define to 1 if the system has the type `std::tr1::hash<long long
+ unsigned>'. */
+#define HAVE_STD__TR1__HASH_LONG_LONG_UNSIGNED_ 1
+
+/* Define to 1 if the system has the type `__gnu_cxx::slist<int>'. */
+#define HAVE___GNU_CXX__SLIST_INT_ 1
diff --git a/src/include/fst/config.h.in b/src/include/fst/config.h.in
new file mode 100644
index 0000000..7815dfc
--- /dev/null
+++ b/src/include/fst/config.h.in
@@ -0,0 +1,11 @@
+// OpenFst config file
+
+/* Define to 1 if you have the ICU library. */
+#undef HAVE_ICU
+
+/* Define to 1 if the system has the type `std::tr1::hash<long long
+ unsigned>'. */
+#define HAVE_STD__TR1__HASH_LONG_LONG_UNSIGNED_ 1
+
+/* Define to 1 if the system has the type `__gnu_cxx::slist<int>'. */
+#define HAVE___GNU_CXX__SLIST_INT_ 1
diff --git a/src/include/fst/connect.h b/src/include/fst/connect.h
new file mode 100644
index 0000000..427808c
--- /dev/null
+++ b/src/include/fst/connect.h
@@ -0,0 +1,319 @@
+// connect.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Classes and functions to remove unsuccessful paths from an Fst.
+
+#ifndef FST_LIB_CONNECT_H__
+#define FST_LIB_CONNECT_H__
+
+#include <vector>
+using std::vector;
+
+#include <fst/dfs-visit.h>
+#include <fst/union-find.h>
+#include <fst/mutable-fst.h>
+
+
+namespace fst {
+
+// Finds and returns connected components. Use with Visit().
+template <class A>
+class CcVisitor {
+ public:
+ typedef A Arc;
+ typedef typename Arc::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ // cc[i]: connected component number for state i.
+ CcVisitor(vector<StateId> *cc)
+ : comps_(new UnionFind<StateId>(0, kNoStateId)),
+ cc_(cc),
+ nstates_(0) { }
+
+ // comps: connected components equiv classes.
+ CcVisitor(UnionFind<StateId> *comps)
+ : comps_(comps),
+ cc_(0),
+ nstates_(0) { }
+
+ ~CcVisitor() {
+ if (cc_) // own comps_?
+ delete comps_;
+ }
+
+ void InitVisit(const Fst<A> &fst) { }
+
+ bool InitState(StateId s, StateId root) {
+ ++nstates_;
+ if (comps_->FindSet(s) == kNoStateId)
+ comps_->MakeSet(s);
+ return true;
+ }
+
+ bool WhiteArc(StateId s, const A &arc) {
+ comps_->MakeSet(arc.nextstate);
+ comps_->Union(s, arc.nextstate);
+ return true;
+ }
+
+ bool GreyArc(StateId s, const A &arc) {
+ comps_->Union(s, arc.nextstate);
+ return true;
+ }
+
+ bool BlackArc(StateId s, const A &arc) {
+ comps_->Union(s, arc.nextstate);
+ return true;
+ }
+
+ void FinishState(StateId s) { }
+
+ void FinishVisit() {
+ if (cc_)
+ GetCcVector(cc_);
+ }
+
+ // cc[i]: connected component number for state i.
+ // Returns number of components.
+ int GetCcVector(vector<StateId> *cc) {
+ cc->clear();
+ cc->resize(nstates_, kNoStateId);
+ StateId ncomp = 0;
+ for (StateId i = 0; i < nstates_; ++i) {
+ StateId rep = comps_->FindSet(i);
+ StateId &comp = (*cc)[rep];
+ if (comp == kNoStateId) {
+ comp = ncomp;
+ ++ncomp;
+ }
+ (*cc)[i] = comp;
+ }
+ return ncomp;
+ }
+
+ private:
+ UnionFind<StateId> *comps_; // Components
+ vector<StateId> *cc_; // State's cc number
+ StateId nstates_; // State count
+};
+
+
+// Finds and returns strongly-connected components, accessible and
+// coaccessible states and related properties. Uses Tarjan's single
+// DFS SCC algorithm (see Aho, et al, "Design and Analysis of Computer
+// Algorithms", 189pp). Use with DfsVisit();
+template <class A>
+class SccVisitor {
+ public:
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ // scc[i]: strongly-connected component number for state i.
+ // SCC numbers will be in topological order for acyclic input.
+ // access[i]: accessibility of state i.
+ // coaccess[i]: coaccessibility of state i.
+ // Any of above can be NULL.
+ // props: related property bits (cyclicity, initial cyclicity,
+ // accessibility, coaccessibility) set/cleared (o.w. unchanged).
+ SccVisitor(vector<StateId> *scc, vector<bool> *access,
+ vector<bool> *coaccess, uint64 *props)
+ : scc_(scc), access_(access), coaccess_(coaccess), props_(props) {}
+ SccVisitor(uint64 *props)
+ : scc_(0), access_(0), coaccess_(0), props_(props) {}
+
+ void InitVisit(const Fst<A> &fst);
+
+ bool InitState(StateId s, StateId root);
+
+ bool TreeArc(StateId s, const A &arc) { return true; }
+
+ bool BackArc(StateId s, const A &arc) {
+ StateId t = arc.nextstate;
+ if ((*dfnumber_)[t] < (*lowlink_)[s])
+ (*lowlink_)[s] = (*dfnumber_)[t];
+ if ((*coaccess_)[t])
+ (*coaccess_)[s] = true;
+ *props_ |= kCyclic;
+ *props_ &= ~kAcyclic;
+ if (arc.nextstate == start_) {
+ *props_ |= kInitialCyclic;
+ *props_ &= ~kInitialAcyclic;
+ }
+ return true;
+ }
+
+ bool ForwardOrCrossArc(StateId s, const A &arc) {
+ StateId t = arc.nextstate;
+ if ((*dfnumber_)[t] < (*dfnumber_)[s] /* cross edge */ &&
+ (*onstack_)[t] && (*dfnumber_)[t] < (*lowlink_)[s])
+ (*lowlink_)[s] = (*dfnumber_)[t];
+ if ((*coaccess_)[t])
+ (*coaccess_)[s] = true;
+ return true;
+ }
+
+ void FinishState(StateId s, StateId p, const A *);
+
+ void FinishVisit() {
+ // Numbers SCC's in topological order when acyclic.
+ if (scc_)
+ for (StateId i = 0; i < scc_->size(); ++i)
+ (*scc_)[i] = nscc_ - 1 - (*scc_)[i];
+ if (coaccess_internal_)
+ delete coaccess_;
+ delete dfnumber_;
+ delete lowlink_;
+ delete onstack_;
+ delete scc_stack_;
+ }
+
+ private:
+ vector<StateId> *scc_; // State's scc number
+ vector<bool> *access_; // State's accessibility
+ vector<bool> *coaccess_; // State's coaccessibility
+ uint64 *props_;
+ const Fst<A> *fst_;
+ StateId start_;
+ StateId nstates_; // State count
+ StateId nscc_; // SCC count
+ bool coaccess_internal_;
+ vector<StateId> *dfnumber_; // state discovery times
+ vector<StateId> *lowlink_; // lowlink[s] == dfnumber[s] => SCC root
+ vector<bool> *onstack_; // is a state on the SCC stack
+ vector<StateId> *scc_stack_; // SCC stack (w/ random access)
+};
+
+template <class A> inline
+void SccVisitor<A>::InitVisit(const Fst<A> &fst) {
+ if (scc_)
+ scc_->clear();
+ if (access_)
+ access_->clear();
+ if (coaccess_) {
+ coaccess_->clear();
+ coaccess_internal_ = false;
+ } else {
+ coaccess_ = new vector<bool>;
+ coaccess_internal_ = true;
+ }
+ *props_ |= kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible;
+ *props_ &= ~(kCyclic | kInitialCyclic | kNotAccessible | kNotCoAccessible);
+ fst_ = &fst;
+ start_ = fst.Start();
+ nstates_ = 0;
+ nscc_ = 0;
+ dfnumber_ = new vector<StateId>;
+ lowlink_ = new vector<StateId>;
+ onstack_ = new vector<bool>;
+ scc_stack_ = new vector<StateId>;
+}
+
+template <class A> inline
+bool SccVisitor<A>::InitState(StateId s, StateId root) {
+ scc_stack_->push_back(s);
+ while (dfnumber_->size() <= s) {
+ if (scc_)
+ scc_->push_back(-1);
+ if (access_)
+ access_->push_back(false);
+ coaccess_->push_back(false);
+ dfnumber_->push_back(-1);
+ lowlink_->push_back(-1);
+ onstack_->push_back(false);
+ }
+ (*dfnumber_)[s] = nstates_;
+ (*lowlink_)[s] = nstates_;
+ (*onstack_)[s] = true;
+ if (root == start_) {
+ if (access_)
+ (*access_)[s] = true;
+ } else {
+ if (access_)
+ (*access_)[s] = false;
+ *props_ |= kNotAccessible;
+ *props_ &= ~kAccessible;
+ }
+ ++nstates_;
+ return true;
+}
+
+template <class A> inline
+void SccVisitor<A>::FinishState(StateId s, StateId p, const A *) {
+ if (fst_->Final(s) != Weight::Zero())
+ (*coaccess_)[s] = true;
+ if ((*dfnumber_)[s] == (*lowlink_)[s]) { // root of new SCC
+ bool scc_coaccess = false;
+ size_t i = scc_stack_->size();
+ StateId t;
+ do {
+ t = (*scc_stack_)[--i];
+ if ((*coaccess_)[t])
+ scc_coaccess = true;
+ } while (s != t);
+ do {
+ t = scc_stack_->back();
+ if (scc_)
+ (*scc_)[t] = nscc_;
+ if (scc_coaccess)
+ (*coaccess_)[t] = true;
+ (*onstack_)[t] = false;
+ scc_stack_->pop_back();
+ } while (s != t);
+ if (!scc_coaccess) {
+ *props_ |= kNotCoAccessible;
+ *props_ &= ~kCoAccessible;
+ }
+ ++nscc_;
+ }
+ if (p != kNoStateId) {
+ if ((*coaccess_)[s])
+ (*coaccess_)[p] = true;
+ if ((*lowlink_)[s] < (*lowlink_)[p])
+ (*lowlink_)[p] = (*lowlink_)[s];
+ }
+}
+
+
+// Trims an FST, removing states and arcs that are not on successful
+// paths. This version modifies its input.
+//
+// Complexity:
+// - Time: O(V + E)
+// - Space: O(V + E)
+// where V = # of states and E = # of arcs.
+template<class Arc>
+void Connect(MutableFst<Arc> *fst) {
+ typedef typename Arc::StateId StateId;
+
+ vector<bool> access;
+ vector<bool> coaccess;
+ uint64 props = 0;
+ SccVisitor<Arc> scc_visitor(0, &access, &coaccess, &props);
+ DfsVisit(*fst, &scc_visitor);
+ vector<StateId> dstates;
+ for (StateId s = 0; s < access.size(); ++s)
+ if (!access[s] || !coaccess[s])
+ dstates.push_back(s);
+ fst->DeleteStates(dstates);
+ fst->SetProperties(kAccessible | kCoAccessible, kAccessible | kCoAccessible);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_CONNECT_H__
diff --git a/src/include/fst/const-fst.h b/src/include/fst/const-fst.h
new file mode 100644
index 0000000..f68e8ed
--- /dev/null
+++ b/src/include/fst/const-fst.h
@@ -0,0 +1,483 @@
+// const-fst.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Simple concrete immutable FST whose states and arcs are each stored
+// in single arrays.
+
+#ifndef FST_LIB_CONST_FST_H__
+#define FST_LIB_CONST_FST_H__
+
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/expanded-fst.h>
+#include <fst/fst-decl.h> // For optional argument declarations
+#include <fst/test-properties.h>
+#include <fst/util.h>
+
+
+namespace fst {
+
+template <class A, class U> class ConstFst;
+template <class F, class G> void Cast(const F &, G *);
+
+// States and arcs each implemented by single arrays, templated on the
+// Arc definition. The unsigned type U is used to represent indices into
+// the arc array.
+template <class A, class U>
+class ConstFstImpl : public FstImpl<A> {
+ public:
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::Properties;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef U Unsigned;
+
+ ConstFstImpl()
+ : states_(0), arcs_(0), nstates_(0), narcs_(0), start_(kNoStateId) {
+ string type = "const";
+ if (sizeof(U) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(8 * sizeof(U), &size);
+ type += size;
+ }
+ SetType(type);
+ SetProperties(kNullProperties | kStaticProperties);
+ }
+
+ explicit ConstFstImpl(const Fst<A> &fst);
+
+ ~ConstFstImpl() {
+ delete[] states_;
+ delete[] arcs_;
+ }
+
+ StateId Start() const { return start_; }
+
+ Weight Final(StateId s) const { return states_[s].final; }
+
+ StateId NumStates() const { return nstates_; }
+
+ size_t NumArcs(StateId s) const { return states_[s].narcs; }
+
+ size_t NumInputEpsilons(StateId s) const { return states_[s].niepsilons; }
+
+ size_t NumOutputEpsilons(StateId s) const { return states_[s].noepsilons; }
+
+ static ConstFstImpl<A, U> *Read(istream &strm, const FstReadOptions &opts);
+
+ bool Write(ostream &strm, const FstWriteOptions &opts) const;
+
+ A *Arcs(StateId s) { return arcs_ + states_[s].pos; }
+
+ // Provide information needed for generic state iterator
+ void InitStateIterator(StateIteratorData<A> *data) const {
+ data->base = 0;
+ data->nstates = nstates_;
+ }
+
+ // Provide information needed for the generic arc iterator
+ void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ data->base = 0;
+ data->arcs = arcs_ + states_[s].pos;
+ data->narcs = states_[s].narcs;
+ data->ref_count = 0;
+ }
+
+ private:
+ friend class ConstFst<A, U>; // Allow finding narcs_, nstates_ during Write
+
+ // States implemented by array *states_ below, arcs by (single) *arcs_.
+ struct State {
+ Weight final; // Final weight
+ Unsigned pos; // Start of state's arcs in *arcs_
+ Unsigned narcs; // Number of arcs (per state)
+ Unsigned niepsilons; // # of input epsilons
+ Unsigned noepsilons; // # of output epsilons
+ State() : final(Weight::Zero()), niepsilons(0), noepsilons(0) {}
+ };
+
+ // Properties always true of this Fst class
+ static const uint64 kStaticProperties = kExpanded;
+ // Current unaligned file format version. The unaligned version was added and
+ // made the default since the aligned version does not work on pipes.
+ static const int kFileVersion = 2;
+ // Current aligned file format version
+ static const int kAlignedFileVersion = 1;
+ // Minimum file format version supported
+ static const int kMinFileVersion = 1;
+ // Byte alignment for states and arcs in file format (version 1 only)
+ static const int kFileAlign = 16;
+
+ State *states_; // States represenation
+ A *arcs_; // Arcs representation
+ StateId nstates_; // Number of states
+ size_t narcs_; // Number of arcs (per FST)
+ StateId start_; // Initial state
+
+ DISALLOW_COPY_AND_ASSIGN(ConstFstImpl);
+};
+
+template <class A, class U>
+const uint64 ConstFstImpl<A, U>::kStaticProperties;
+template <class A, class U>
+const int ConstFstImpl<A, U>::kFileVersion;
+template <class A, class U>
+const int ConstFstImpl<A, U>::kAlignedFileVersion;
+template <class A, class U>
+const int ConstFstImpl<A, U>::kMinFileVersion;
+template <class A, class U>
+const int ConstFstImpl<A, U>::kFileAlign;
+
+
+template<class A, class U>
+ConstFstImpl<A, U>::ConstFstImpl(const Fst<A> &fst) : nstates_(0), narcs_(0) {
+ string type = "const";
+ if (sizeof(U) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(sizeof(U) * 8, &size);
+ type += size;
+ }
+ SetType(type);
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ start_ = fst.Start();
+
+ // Count # of states and arcs.
+ for (StateIterator< Fst<A> > siter(fst);
+ !siter.Done();
+ siter.Next()) {
+ ++nstates_;
+ StateId s = siter.Value();
+ for (ArcIterator< Fst<A> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next())
+ ++narcs_;
+ }
+ states_ = new State[nstates_];
+ arcs_ = new A[narcs_];
+ size_t pos = 0;
+ for (StateId s = 0; s < nstates_; ++s) {
+ states_[s].final = fst.Final(s);
+ states_[s].pos = pos;
+ states_[s].narcs = 0;
+ states_[s].niepsilons = 0;
+ states_[s].noepsilons = 0;
+ for (ArcIterator< Fst<A> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const A &arc = aiter.Value();
+ ++states_[s].narcs;
+ if (arc.ilabel == 0)
+ ++states_[s].niepsilons;
+ if (arc.olabel == 0)
+ ++states_[s].noepsilons;
+ arcs_[pos++] = arc;
+ }
+ }
+ SetProperties(fst.Properties(kCopyProperties, true) | kStaticProperties);
+}
+
+
+template<class A, class U>
+ConstFstImpl<A, U> *ConstFstImpl<A, U>::Read(istream &strm,
+ const FstReadOptions &opts) {
+ ConstFstImpl<A, U> *impl = new ConstFstImpl<A, U>;
+ FstHeader hdr;
+ if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
+ delete impl;
+ return 0;
+ }
+ impl->start_ = hdr.Start();
+ impl->nstates_ = hdr.NumStates();
+ impl->narcs_ = hdr.NumArcs();
+ impl->states_ = new State[impl->nstates_];
+ impl->arcs_ = new A[impl->narcs_];
+
+ // Ensures compatibility
+ if (hdr.Version() == kAlignedFileVersion)
+ hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED);
+
+ if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) &&
+ !AlignInput(strm, kFileAlign)) {
+ LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source;
+ delete impl;
+ return 0;
+ }
+ size_t b = impl->nstates_ * sizeof(typename ConstFstImpl<A, U>::State);
+ strm.read(reinterpret_cast<char *>(impl->states_), b);
+ if (!strm) {
+ LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source;
+ delete impl;
+ return 0;
+ }
+ if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) &&
+ !AlignInput(strm, kFileAlign)) {
+ LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source;
+ delete impl;
+ return 0;
+ }
+ b = impl->narcs_ * sizeof(A);
+ strm.read(reinterpret_cast<char *>(impl->arcs_), b);
+ if (!strm) {
+ LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source;
+ delete impl;
+ return 0;
+ }
+ return impl;
+}
+
+// Simple concrete immutable FST. This class attaches interface to
+// implementation and handles reference counting, delegating most
+// methods to ImplToExpandedFst. The unsigned type U is used to
+// represent indices into the arc array (uint32 by default, declared
+// in fst-decl.h).
+template <class A, class U>
+class ConstFst : public ImplToExpandedFst< ConstFstImpl<A, U> > {
+ public:
+ friend class StateIterator< ConstFst<A, U> >;
+ friend class ArcIterator< ConstFst<A, U> >;
+ template <class F, class G> void friend Cast(const F &, G *);
+
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef ConstFstImpl<A, U> Impl;
+ typedef U Unsigned;
+
+ ConstFst() : ImplToExpandedFst<Impl>(new Impl()) {}
+
+ explicit ConstFst(const Fst<A> &fst)
+ : ImplToExpandedFst<Impl>(new Impl(fst)) {}
+
+ ConstFst(const ConstFst<A, U> &fst) : ImplToExpandedFst<Impl>(fst) {}
+
+ // Get a copy of this ConstFst. See Fst<>::Copy() for further doc.
+ virtual ConstFst<A, U> *Copy(bool safe = false) const {
+ return new ConstFst<A, U>(*this);
+ }
+
+ // Read a ConstFst from an input stream; return NULL on error
+ static ConstFst<A, U> *Read(istream &strm, const FstReadOptions &opts) {
+ Impl* impl = Impl::Read(strm, opts);
+ return impl ? new ConstFst<A, U>(impl) : 0;
+ }
+
+ // Read a ConstFst from a file; return NULL on error
+ // Empty filename reads from standard input
+ static ConstFst<A, U> *Read(const string &filename) {
+ Impl* impl = ImplToExpandedFst<Impl>::Read(filename);
+ return impl ? new ConstFst<A, U>(impl) : 0;
+ }
+
+ virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ return WriteFst(*this, strm, opts);
+ }
+
+ virtual bool Write(const string &filename) const {
+ return Fst<A>::WriteFile(filename);
+ }
+
+ template <class F>
+ static bool WriteFst(const F &fst, ostream &strm,
+ const FstWriteOptions &opts);
+
+ virtual void InitStateIterator(StateIteratorData<Arc> *data) const {
+ GetImpl()->InitStateIterator(data);
+ }
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ private:
+ explicit ConstFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {}
+
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); }
+
+ void SetImpl(Impl *impl, bool own_impl = true) {
+ ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl);
+ }
+
+ void operator=(const ConstFst<A, U> &fst); // disallow
+};
+
+// Writes Fst in Const format, potentially with a pass over the machine
+// before writing to compute number of states and arcs.
+//
+template <class A, class U>
+template <class F>
+bool ConstFst<A, U>::WriteFst(const F &fst, ostream &strm,
+ const FstWriteOptions &opts) {
+ static const int kFileVersion = 2;
+ static const int kAlignedFileVersion = 1;
+ static const int kFileAlign = 16;
+ int file_version = opts.align ? kAlignedFileVersion : kFileVersion;
+ size_t num_arcs = -1, num_states = -1;
+ size_t start_offset = 0;
+ bool update_header = true;
+ if (fst.Type() == ConstFst<A, U>().Type()) {
+ const ConstFst<A, U> *const_fst = static_cast<const ConstFst<A, U> *>(&fst);
+ num_arcs = const_fst->GetImpl()->narcs_;
+ num_states = const_fst->GetImpl()->nstates_;
+ update_header = false;
+ } else if ((start_offset = strm.tellp()) == -1) {
+ // precompute values needed for header when we cannot seek to rewrite it.
+ for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
+ num_arcs += fst.NumArcs(siter.Value());
+ num_states++;
+ }
+ update_header = false;
+ }
+ FstHeader hdr;
+ hdr.SetStart(fst.Start());
+ hdr.SetNumStates(num_states);
+ hdr.SetNumArcs(num_arcs);
+ string type = "const";
+ if (sizeof(U) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(8 * sizeof(U), &size);
+ type += size;
+ }
+ FstImpl<A>::WriteFstHeader(fst, strm, opts, file_version, type, &hdr);
+ if (opts.align && !AlignOutput(strm, kFileAlign)) {
+ LOG(ERROR) << "Could not align file during write after header";
+ return false;
+ }
+ size_t pos = 0, states = 0;
+ typename ConstFstImpl<A, U>::State state;
+ for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
+ state.final = fst.Final(siter.Value());
+ state.pos = pos;
+ state.narcs = fst.NumArcs(siter.Value());
+ state.niepsilons = fst.NumInputEpsilons(siter.Value());
+ state.noepsilons = fst.NumOutputEpsilons(siter.Value());
+ strm.write(reinterpret_cast<const char *>(&state), sizeof(state));
+ pos += state.narcs;
+ states++;
+ }
+ hdr.SetNumStates(states);
+ hdr.SetNumArcs(pos);
+ if (opts.align && !AlignOutput(strm, kFileAlign)) {
+ LOG(ERROR) << "Could not align file during write after writing states";
+ }
+ for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
+ const A &arc = aiter.Value();
+ strm.write(reinterpret_cast<const char *>(&arc), sizeof(arc));
+ }
+ }
+ strm.flush();
+ if (!strm) {
+ LOG(ERROR) << "WriteAsVectorFst write failed: " << opts.source;
+ return false;
+ }
+ if (update_header) {
+ return FstImpl<A>::UpdateFstHeader(fst, strm, opts, file_version, type,
+ &hdr, start_offset);
+ } else {
+ if (hdr.NumStates() != num_states) {
+ LOG(ERROR) << "Inconsistent number of states observed during write";
+ return false;
+ }
+ if (hdr.NumArcs() != num_arcs) {
+ LOG(ERROR) << "Inconsistent number of arcs observed during write";
+ return false;
+ }
+ }
+ return true;
+}
+
+// Specialization for ConstFst; see generic version in fst.h
+// for sample usage (but use the ConstFst type!). This version
+// should inline.
+template <class A, class U>
+class StateIterator< ConstFst<A, U> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ explicit StateIterator(const ConstFst<A, U> &fst)
+ : nstates_(fst.GetImpl()->NumStates()), s_(0) {}
+
+ bool Done() const { return s_ >= nstates_; }
+
+ StateId Value() const { return s_; }
+
+ void Next() { ++s_; }
+
+ void Reset() { s_ = 0; }
+
+ private:
+ StateId nstates_;
+ StateId s_;
+
+ DISALLOW_COPY_AND_ASSIGN(StateIterator);
+};
+
+
+// Specialization for ConstFst; see generic version in fst.h
+// for sample usage (but use the ConstFst type!). This version
+// should inline.
+template <class A, class U>
+class ArcIterator< ConstFst<A, U> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const ConstFst<A, U> &fst, StateId s)
+ : arcs_(fst.GetImpl()->Arcs(s)),
+ narcs_(fst.GetImpl()->NumArcs(s)), i_(0) {}
+
+ bool Done() const { return i_ >= narcs_; }
+
+ const A& Value() const { return arcs_[i_]; }
+
+ void Next() { ++i_; }
+
+ size_t Position() const { return i_; }
+
+ void Reset() { i_ = 0; }
+
+ void Seek(size_t a) { i_ = a; }
+
+ uint32 Flags() const {
+ return kArcValueFlags;
+ }
+
+ void SetFlags(uint32 f, uint32 m) {}
+
+ private:
+ const A *arcs_;
+ size_t narcs_;
+ size_t i_;
+
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+// A useful alias when using StdArc.
+typedef ConstFst<StdArc> StdConstFst;
+
+} // namespace fst
+
+#endif // FST_LIB_CONST_FST_H__
diff --git a/src/include/fst/determinize.h b/src/include/fst/determinize.h
new file mode 100644
index 0000000..417142f
--- /dev/null
+++ b/src/include/fst/determinize.h
@@ -0,0 +1,887 @@
+// determinize.h
+
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Functions and classes to determinize an FST.
+
+#ifndef FST_LIB_DETERMINIZE_H__
+#define FST_LIB_DETERMINIZE_H__
+
+#include <algorithm>
+#include <climits>
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <map>
+#include <fst/slist.h>
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+#include <fst/factor-weight.h>
+#include <fst/arc-map.h>
+#include <fst/prune.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+//
+// COMMON DIVISORS - these are used in determinization to compute
+// the transition weights. In the simplest case, it is just the same
+// as the semiring Plus(). However, other choices permit more efficient
+// determinization when the output contains strings.
+//
+
+// The default common divisor uses the semiring Plus.
+template <class W>
+class DefaultCommonDivisor {
+ public:
+ typedef W Weight;
+
+ W operator()(const W &w1, const W &w2) const { return Plus(w1, w2); }
+};
+
+
+// The label common divisor for a (left) string semiring selects a
+// single letter common prefix or the empty string. This is used in
+// the determinization of output strings so that at most a single
+// letter will appear in the output of a transtion.
+template <typename L, StringType S>
+class LabelCommonDivisor {
+ public:
+ typedef StringWeight<L, S> Weight;
+
+ Weight operator()(const Weight &w1, const Weight &w2) const {
+ StringWeightIterator<L, S> iter1(w1);
+ StringWeightIterator<L, S> iter2(w2);
+
+ if (!(StringWeight<L, S>::Properties() & kLeftSemiring)) {
+ FSTERROR() << "LabelCommonDivisor: Weight needs to be left semiring";
+ return Weight::NoWeight();
+ } else if (w1.Size() == 0 || w2.Size() == 0) {
+ return Weight::One();
+ } else if (w1 == Weight::Zero()) {
+ return Weight(iter2.Value());
+ } else if (w2 == Weight::Zero()) {
+ return Weight(iter1.Value());
+ } else if (iter1.Value() == iter2.Value()) {
+ return Weight(iter1.Value());
+ } else {
+ return Weight::One();
+ }
+ }
+};
+
+
+// The gallic common divisor uses the label common divisor on the
+// string component and the template argument D common divisor on the
+// weight component, which defaults to the default common divisor.
+template <class L, class W, StringType S, class D = DefaultCommonDivisor<W> >
+class GallicCommonDivisor {
+ public:
+ typedef GallicWeight<L, W, S> Weight;
+
+ Weight operator()(const Weight &w1, const Weight &w2) const {
+ return Weight(label_common_divisor_(w1.Value1(), w2.Value1()),
+ weight_common_divisor_(w1.Value2(), w2.Value2()));
+ }
+
+ private:
+ LabelCommonDivisor<L, S> label_common_divisor_;
+ D weight_common_divisor_;
+};
+
+// Options for finite-state transducer determinization.
+template <class Arc>
+struct DeterminizeFstOptions : CacheOptions {
+ typedef typename Arc::Label Label;
+ float delta; // Quantization delta for subset weights
+ Label subsequential_label; // Label used for residual final output
+ // when producing subsequential transducers.
+
+ explicit DeterminizeFstOptions(const CacheOptions &opts,
+ float del = kDelta,
+ Label lab = 0)
+ : CacheOptions(opts), delta(del), subsequential_label(lab) {}
+
+ explicit DeterminizeFstOptions(float del = kDelta, Label lab = 0)
+ : delta(del), subsequential_label(lab) {}
+};
+
+
+// Implementation of delayed DeterminizeFst. This base class is
+// common to the variants that implement acceptor and transducer
+// determinization.
+template <class A>
+class DeterminizeFstImplBase : public CacheImpl<A> {
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::Properties;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+
+ using CacheBaseImpl< CacheState<A> >::HasStart;
+ using CacheBaseImpl< CacheState<A> >::HasFinal;
+ using CacheBaseImpl< CacheState<A> >::HasArcs;
+ using CacheBaseImpl< CacheState<A> >::SetFinal;
+ using CacheBaseImpl< CacheState<A> >::SetStart;
+
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+
+ DeterminizeFstImplBase(const Fst<A> &fst,
+ const DeterminizeFstOptions<A> &opts)
+ : CacheImpl<A>(opts), fst_(fst.Copy()) {
+ SetType("determinize");
+ uint64 props = fst.Properties(kFstProperties, false);
+ SetProperties(DeterminizeProperties(props,
+ opts.subsequential_label != 0),
+ kCopyProperties);
+
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ }
+
+ DeterminizeFstImplBase(const DeterminizeFstImplBase<A> &impl)
+ : CacheImpl<A>(impl),
+ fst_(impl.fst_->Copy(true)) {
+ SetType("determinize");
+ SetProperties(impl.Properties(), kCopyProperties);
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ virtual ~DeterminizeFstImplBase() { delete fst_; }
+
+ virtual DeterminizeFstImplBase<A> *Copy() = 0;
+
+ StateId Start() {
+ if (!HasStart()) {
+ StateId start = ComputeStart();
+ if (start != kNoStateId) {
+ SetStart(start);
+ }
+ }
+ return CacheImpl<A>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ Weight final = ComputeFinal(s);
+ SetFinal(s, final);
+ }
+ return CacheImpl<A>::Final(s);
+ }
+
+ virtual void Expand(StateId s) = 0;
+
+ size_t NumArcs(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumArcs(s);
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumOutputEpsilons(s);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
+ if (!HasArcs(s))
+ Expand(s);
+ CacheImpl<A>::InitArcIterator(s, data);
+ }
+
+ virtual StateId ComputeStart() = 0;
+
+ virtual Weight ComputeFinal(StateId s) = 0;
+
+ const Fst<A> &GetFst() const { return *fst_; }
+
+ private:
+ const Fst<A> *fst_; // Input Fst
+
+ void operator=(const DeterminizeFstImplBase<A> &); // disallow
+};
+
+
+// Implementation of delayed determinization for weighted acceptors.
+// It is templated on the arc type A and the common divisor D.
+template <class A, class D>
+class DeterminizeFsaImpl : public DeterminizeFstImplBase<A> {
+ public:
+ using FstImpl<A>::SetProperties;
+ using DeterminizeFstImplBase<A>::GetFst;
+ using DeterminizeFstImplBase<A>::SetArcs;
+
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ struct Element {
+ Element() {}
+
+ Element(StateId s, Weight w) : state_id(s), weight(w) {}
+
+ StateId state_id; // Input state Id
+ Weight weight; // Residual weight
+ };
+ typedef slist<Element> Subset;
+ typedef map<Label, Subset*> LabelMap;
+
+ DeterminizeFsaImpl(const Fst<A> &fst, D common_divisor,
+ const vector<Weight> *in_dist, vector<Weight> *out_dist,
+ const DeterminizeFstOptions<A> &opts)
+ : DeterminizeFstImplBase<A>(fst, opts),
+ delta_(opts.delta),
+ in_dist_(in_dist),
+ out_dist_(out_dist),
+ common_divisor_(common_divisor),
+ subset_hash_(0, SubsetKey(), SubsetEqual(&elements_)) {
+ if (!fst.Properties(kAcceptor, true)) {
+ FSTERROR() << "DeterminizeFst: argument not an acceptor";
+ SetProperties(kError, kError);
+ }
+ if (!(Weight::Properties() & kLeftSemiring)) {
+ FSTERROR() << "DeterminizeFst: Weight needs to be left distributive: "
+ << Weight::Type();
+ SetProperties(kError, kError);
+ }
+ if (out_dist_)
+ out_dist_->clear();
+ }
+
+ DeterminizeFsaImpl(const DeterminizeFsaImpl<A, D> &impl)
+ : DeterminizeFstImplBase<A>(impl),
+ delta_(impl.delta_),
+ in_dist_(0),
+ out_dist_(0),
+ common_divisor_(impl.common_divisor_),
+ subset_hash_(0, SubsetKey(), SubsetEqual(&elements_)) {
+ if (impl.out_dist_) {
+ FSTERROR() << "DeterminizeFsaImpl: cannot copy with out_dist vector";
+ SetProperties(kError, kError);
+ }
+ }
+
+ virtual ~DeterminizeFsaImpl() {
+ for (int i = 0; i < subsets_.size(); ++i)
+ delete subsets_[i];
+ }
+
+ virtual DeterminizeFsaImpl<A, D> *Copy() {
+ return new DeterminizeFsaImpl<A, D>(*this);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) && (GetFst().Properties(kError, false)))
+ SetProperties(kError, kError);
+ return FstImpl<A>::Properties(mask);
+ }
+
+ virtual StateId ComputeStart() {
+ StateId s = GetFst().Start();
+ if (s == kNoStateId)
+ return kNoStateId;
+ Element element(s, Weight::One());
+ Subset *subset = new Subset;
+ subset->push_front(element);
+ return FindState(subset);
+ }
+
+ virtual Weight ComputeFinal(StateId s) {
+ Subset *subset = subsets_[s];
+ Weight final = Weight::Zero();
+ for (typename Subset::iterator siter = subset->begin();
+ siter != subset->end();
+ ++siter) {
+ Element &element = *siter;
+ final = Plus(final, Times(element.weight,
+ GetFst().Final(element.state_id)));
+ if (!final.Member())
+ SetProperties(kError, kError);
+ }
+ return final;
+ }
+
+ // Finds the state corresponding to a subset. Only creates a new state
+ // if the subset is not found in the subset hash. FindState takes
+ // ownership of the subset argument (so that it doesn't have to copy it
+ // if it creates a new state).
+ //
+ // The method exploits the following device: all pairs stored in the
+ // associative container subset_hash_ are of the form (subset,
+ // id(subset) + 1), i.e. subset_hash_[subset] > 0 if subset has been
+ // stored previously. For unassigned subsets, the call to
+ // subset_hash_[subset] creates a new pair (subset, 0). As a result,
+ // subset_hash_[subset] == 0 iff subset is new.
+ StateId FindState(Subset *subset) {
+ StateId &assoc_value = subset_hash_[subset];
+ if (assoc_value == 0) { // subset wasn't present; create new state
+ StateId s = CreateState(subset);
+ assoc_value = s + 1;
+ return s;
+ } else {
+ delete subset;
+ return assoc_value - 1; // NB: assoc_value = ID + 1
+ }
+ }
+
+ StateId CreateState(Subset *subset) {
+ StateId s = subsets_.size();
+ subsets_.push_back(subset);
+ if (in_dist_)
+ out_dist_->push_back(ComputeDistance(subset));
+ return s;
+ }
+
+ // Compute distance from a state to the final states in the DFA
+ // given the distances in the NFA.
+ Weight ComputeDistance(const Subset *subset) {
+ Weight outd = Weight::Zero();
+ for (typename Subset::const_iterator siter = subset->begin();
+ siter != subset->end(); ++siter) {
+ const Element &element = *siter;
+ Weight ind = element.state_id < in_dist_->size() ?
+ (*in_dist_)[element.state_id] : Weight::Zero();
+ outd = Plus(outd, Times(element.weight, ind));
+ }
+ return outd;
+ }
+
+ // Computes the outgoing transitions from a state, creating new destination
+ // states as needed.
+ virtual void Expand(StateId s) {
+
+ LabelMap label_map;
+ LabelSubsets(s, &label_map);
+
+ for (typename LabelMap::iterator liter = label_map.begin();
+ liter != label_map.end();
+ ++liter)
+ AddArc(s, liter->first, liter->second);
+ SetArcs(s);
+ }
+
+ private:
+ // Constructs destination subsets per label. At return, subset
+ // element weights include the input automaton label weights and the
+ // subsets may contain duplicate states.
+ void LabelSubsets(StateId s, LabelMap *label_map) {
+ Subset *src_subset = subsets_[s];
+
+ for (typename Subset::iterator siter = src_subset->begin();
+ siter != src_subset->end();
+ ++siter) {
+ Element &src_element = *siter;
+ for (ArcIterator< Fst<A> > aiter(GetFst(), src_element.state_id);
+ !aiter.Done();
+ aiter.Next()) {
+ const A &arc = aiter.Value();
+ Element dest_element(arc.nextstate,
+ Times(src_element.weight, arc.weight));
+ Subset* &dest_subset = (*label_map)[arc.ilabel];
+ if (dest_subset == 0)
+ dest_subset = new Subset;
+ dest_subset->push_front(dest_element);
+ }
+ }
+ }
+
+ // Adds an arc from state S to the destination state associated
+ // with subset DEST_SUBSET (as created by LabelSubsets).
+ void AddArc(StateId s, Label label, Subset *dest_subset) {
+ A arc;
+ arc.ilabel = label;
+ arc.olabel = label;
+ arc.weight = Weight::Zero();
+
+ typename Subset::iterator oiter;
+ for (typename Subset::iterator diter = dest_subset->begin();
+ diter != dest_subset->end();) {
+ Element &dest_element = *diter;
+ // Computes label weight.
+ arc.weight = common_divisor_(arc.weight, dest_element.weight);
+
+ while (elements_.size() <= dest_element.state_id)
+ elements_.push_back(0);
+ Element *matching_element = elements_[dest_element.state_id];
+ if (matching_element) {
+ // Found duplicate state: sums state weight and deletes dup.
+ matching_element->weight = Plus(matching_element->weight,
+ dest_element.weight);
+ if (!matching_element->weight.Member())
+ SetProperties(kError, kError);
+ ++diter;
+ dest_subset->erase_after(oiter);
+ } else {
+ // Saves element so we can check for duplicate for this state.
+ elements_[dest_element.state_id] = &dest_element;
+ oiter = diter;
+ ++diter;
+ }
+ }
+
+ // Divides out label weight from destination subset elements.
+ // Quantizes to ensure comparisons are effective.
+ // Clears element vector.
+ for (typename Subset::iterator diter = dest_subset->begin();
+ diter != dest_subset->end();
+ ++diter) {
+ Element &dest_element = *diter;
+ dest_element.weight = Divide(dest_element.weight, arc.weight,
+ DIVIDE_LEFT);
+ dest_element.weight = dest_element.weight.Quantize(delta_);
+ elements_[dest_element.state_id] = 0;
+ }
+
+ arc.nextstate = FindState(dest_subset);
+ CacheImpl<A>::PushArc(s, arc);
+ }
+
+ // Comparison object for hashing Subset(s). Subsets are not sorted in this
+ // implementation, so ordering must not be assumed in the equivalence
+ // test.
+ class SubsetEqual {
+ public:
+ // Constructor takes vector needed to check equality. See immediately
+ // below for constraints on it.
+ explicit SubsetEqual(vector<Element *> *elements)
+ : elements_(elements) {}
+
+ // At each call to operator(), the elements_ vector should contain
+ // only NULLs. When this operator returns, elements_ will still
+ // have this property.
+ bool operator()(Subset* subset1, Subset* subset2) const {
+ if (subset1->size() != subset2->size())
+ return false;
+
+ // Loads first subset elements in element vector.
+ for (typename Subset::iterator iter1 = subset1->begin();
+ iter1 != subset1->end();
+ ++iter1) {
+ Element &element1 = *iter1;
+ while (elements_->size() <= element1.state_id)
+ elements_->push_back(0);
+ (*elements_)[element1.state_id] = &element1;
+ }
+
+ // Checks second subset matches first via element vector.
+ for (typename Subset::iterator iter2 = subset2->begin();
+ iter2 != subset2->end();
+ ++iter2) {
+ Element &element2 = *iter2;
+ while (elements_->size() <= element2.state_id)
+ elements_->push_back(0);
+ Element *element1 = (*elements_)[element2.state_id];
+ if (!element1 || element1->weight != element2.weight) {
+ // Mismatch found. Resets element vector before returning false.
+ for (typename Subset::iterator iter1 = subset1->begin();
+ iter1 != subset1->end();
+ ++iter1)
+ (*elements_)[iter1->state_id] = 0;
+ return false;
+ } else {
+ (*elements_)[element2.state_id] = 0; // Clears entry
+ }
+ }
+ return true;
+ }
+ private:
+ vector<Element *> *elements_;
+ };
+
+ // Hash function for Subset to Fst states. Subset elements are not
+ // sorted in this implementation, so the hash must be invariant
+ // under subset reordering.
+ class SubsetKey {
+ public:
+ size_t operator()(const Subset* subset) const {
+ size_t hash = 0;
+ for (typename Subset::const_iterator iter = subset->begin();
+ iter != subset->end();
+ ++iter) {
+ const Element &element = *iter;
+ int lshift = element.state_id % (CHAR_BIT * sizeof(size_t) - 1) + 1;
+ int rshift = CHAR_BIT * sizeof(size_t) - lshift;
+ size_t n = element.state_id;
+ hash ^= n << lshift ^ n >> rshift ^ element.weight.Hash();
+ }
+ return hash;
+ }
+ };
+
+ float delta_; // Quantization delta for subset weights
+ const vector<Weight> *in_dist_; // Distance to final NFA states
+ vector<Weight> *out_dist_; // Distance to final DFA states
+
+ D common_divisor_;
+
+ // Used to test equivalence of subsets.
+ vector<Element *> elements_;
+
+ // Maps from StateId to Subset.
+ vector<Subset *> subsets_;
+
+ // Hashes from Subset to its StateId in the output automaton.
+ typedef unordered_map<Subset *, StateId, SubsetKey, SubsetEqual>
+ SubsetHash;
+
+ // Hashes from Label to Subsets corr. to destination states of current state.
+ SubsetHash subset_hash_;
+
+ void operator=(const DeterminizeFsaImpl<A, D> &); // disallow
+};
+
+
+// Implementation of delayed determinization for transducers.
+// Transducer determinization is implemented by mapping the input to
+// the Gallic semiring as an acceptor whose weights contain the output
+// strings and using acceptor determinization above to determinize
+// that acceptor.
+template <class A, StringType S>
+class DeterminizeFstImpl : public DeterminizeFstImplBase<A> {
+ public:
+ using FstImpl<A>::SetProperties;
+ using DeterminizeFstImplBase<A>::GetFst;
+ using CacheBaseImpl< CacheState<A> >::GetCacheGc;
+ using CacheBaseImpl< CacheState<A> >::GetCacheLimit;
+
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ typedef ToGallicMapper<A, S> ToMapper;
+ typedef FromGallicMapper<A, S> FromMapper;
+
+ typedef typename ToMapper::ToArc ToArc;
+ typedef ArcMapFst<A, ToArc, ToMapper> ToFst;
+ typedef ArcMapFst<ToArc, A, FromMapper> FromFst;
+
+ typedef GallicCommonDivisor<Label, Weight, S> CommonDivisor;
+ typedef GallicFactor<Label, Weight, S> FactorIterator;
+
+ DeterminizeFstImpl(const Fst<A> &fst, const DeterminizeFstOptions<A> &opts)
+ : DeterminizeFstImplBase<A>(fst, opts),
+ delta_(opts.delta),
+ subsequential_label_(opts.subsequential_label) {
+ Init(GetFst());
+ }
+
+ DeterminizeFstImpl(const DeterminizeFstImpl<A, S> &impl)
+ : DeterminizeFstImplBase<A>(impl),
+ delta_(impl.delta_),
+ subsequential_label_(impl.subsequential_label_) {
+ Init(GetFst());
+ }
+
+ ~DeterminizeFstImpl() { delete from_fst_; }
+
+ virtual DeterminizeFstImpl<A, S> *Copy() {
+ return new DeterminizeFstImpl<A, S>(*this);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) && (GetFst().Properties(kError, false) ||
+ from_fst_->Properties(kError, false)))
+ SetProperties(kError, kError);
+ return FstImpl<A>::Properties(mask);
+ }
+
+ virtual StateId ComputeStart() { return from_fst_->Start(); }
+
+ virtual Weight ComputeFinal(StateId s) { return from_fst_->Final(s); }
+
+ virtual void Expand(StateId s) {
+ for (ArcIterator<FromFst> aiter(*from_fst_, s);
+ !aiter.Done();
+ aiter.Next())
+ CacheImpl<A>::PushArc(s, aiter.Value());
+ CacheImpl<A>::SetArcs(s);
+ }
+
+ private:
+ // Initialization of transducer determinization implementation, which
+ // is defined after DeterminizeFst since it calls it.
+ void Init(const Fst<A> &fst);
+
+ float delta_;
+ Label subsequential_label_;
+ FromFst *from_fst_;
+
+ void operator=(const DeterminizeFstImpl<A, S> &); // disallow
+};
+
+
+// Determinizes a weighted transducer. This version is a delayed
+// Fst. The result will be an equivalent FST that has the property
+// that no state has two transitions with the same input label.
+// For this algorithm, epsilon transitions are treated as regular
+// symbols (cf. RmEpsilon).
+//
+// The transducer must be functional. The weights must be (weakly)
+// left divisible (valid for TropicalWeight and LogWeight for instance)
+// and be zero-sum-free if for all a,b: (Plus(a, b) = 0 => a = b = 0.
+//
+// Complexity:
+// - Determinizable: exponential (polynomial in the size of the output)
+// - Non-determinizable) does not terminate
+//
+// The determinizable automata include all unweighted and all acyclic input.
+//
+// References:
+// - Mehryar Mohri, "Finite-State Transducers in Language and Speech
+// Processing". Computational Linguistics, 23:2, 1997.
+//
+// This class attaches interface to implementation and handles
+// reference counting, delegating most methods to ImplToFst.
+template <class A>
+class DeterminizeFst : public ImplToFst< DeterminizeFstImplBase<A> > {
+ public:
+ friend class ArcIterator< DeterminizeFst<A> >;
+ friend class StateIterator< DeterminizeFst<A> >;
+ template <class B, StringType S> friend class DeterminizeFstImpl;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef CacheState<A> State;
+ typedef DeterminizeFstImplBase<A> Impl;
+
+ using ImplToFst<Impl>::SetImpl;
+
+ explicit DeterminizeFst(
+ const Fst<A> &fst,
+ const DeterminizeFstOptions<A> &opts = DeterminizeFstOptions<A>()) {
+ if (fst.Properties(kAcceptor, true)) {
+ // Calls implementation for acceptors.
+ typedef DefaultCommonDivisor<Weight> D;
+ SetImpl(new DeterminizeFsaImpl<A, D>(fst, D(), 0, 0, opts));
+ } else {
+ // Calls implementation for transducers.
+ SetImpl(new DeterminizeFstImpl<A, STRING_LEFT_RESTRICT>(fst, opts));
+ }
+ }
+
+ // This acceptor-only version additionally computes the distance to
+ // final states in the output if provided with those distances for the
+ // input. Useful for e.g. unique N-shortest paths.
+ DeterminizeFst(
+ const Fst<A> &fst,
+ const vector<Weight> &in_dist, vector<Weight> *out_dist,
+ const DeterminizeFstOptions<A> &opts = DeterminizeFstOptions<A>()) {
+ if (!fst.Properties(kAcceptor, true)) {
+ FSTERROR() << "DeterminizeFst:"
+ << " distance to final states computed for acceptors only";
+ GetImpl()->SetProperties(kError, kError);
+ }
+ typedef DefaultCommonDivisor<Weight> D;
+ SetImpl(new DeterminizeFsaImpl<A, D>(fst, D(), &in_dist, out_dist, opts));
+ }
+
+ // See Fst<>::Copy() for doc.
+ DeterminizeFst(const DeterminizeFst<A> &fst, bool safe = false) {
+ if (safe)
+ SetImpl(fst.GetImpl()->Copy());
+ else
+ SetImpl(fst.GetImpl(), false);
+ }
+
+ // Get a copy of this DeterminizeFst. See Fst<>::Copy() for further doc.
+ virtual DeterminizeFst<A> *Copy(bool safe = false) const {
+ return new DeterminizeFst<A>(*this, safe);
+ }
+
+ virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ private:
+ // This private version is for passing the common divisor to
+ // FSA determinization.
+ template <class D>
+ DeterminizeFst(const Fst<A> &fst, const D &common_div,
+ const DeterminizeFstOptions<A> &opts)
+ : ImplToFst<Impl>(
+ new DeterminizeFsaImpl<A, D>(fst, common_div, 0, 0, opts)) {}
+
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const DeterminizeFst<A> &fst); // Disallow
+};
+
+
+// Initialization of transducer determinization implementation. which
+// is defined after DeterminizeFst since it calls it.
+template <class A, StringType S>
+void DeterminizeFstImpl<A, S>::Init(const Fst<A> &fst) {
+ // Mapper to an acceptor.
+ ToFst to_fst(fst, ToMapper());
+
+ // Determinize acceptor.
+ // This recursive call terminates since it passes the common divisor
+ // to a private constructor.
+ CacheOptions copts(GetCacheGc(), GetCacheLimit());
+ DeterminizeFstOptions<ToArc> dopts(copts, delta_);
+ DeterminizeFst<ToArc> det_fsa(to_fst, CommonDivisor(), dopts);
+
+ // Mapper back to transducer.
+ FactorWeightOptions<ToArc> fopts(CacheOptions(true, 0), delta_,
+ kFactorFinalWeights,
+ subsequential_label_,
+ subsequential_label_);
+ FactorWeightFst<ToArc, FactorIterator> factored_fst(det_fsa, fopts);
+ from_fst_ = new FromFst(factored_fst, FromMapper(subsequential_label_));
+}
+
+
+// Specialization for DeterminizeFst.
+template <class A>
+class StateIterator< DeterminizeFst<A> >
+ : public CacheStateIterator< DeterminizeFst<A> > {
+ public:
+ explicit StateIterator(const DeterminizeFst<A> &fst)
+ : CacheStateIterator< DeterminizeFst<A> >(fst, fst.GetImpl()) {}
+};
+
+
+// Specialization for DeterminizeFst.
+template <class A>
+class ArcIterator< DeterminizeFst<A> >
+ : public CacheArcIterator< DeterminizeFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const DeterminizeFst<A> &fst, StateId s)
+ : CacheArcIterator< DeterminizeFst<A> >(fst.GetImpl(), s) {
+ if (!fst.GetImpl()->HasArcs(s))
+ fst.GetImpl()->Expand(s);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+
+template <class A> inline
+void DeterminizeFst<A>::InitStateIterator(StateIteratorData<A> *data) const
+{
+ data->base = new StateIterator< DeterminizeFst<A> >(*this);
+}
+
+
+// Useful aliases when using StdArc.
+typedef DeterminizeFst<StdArc> StdDeterminizeFst;
+
+
+template <class Arc>
+struct DeterminizeOptions {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::Label Label;
+
+ float delta; // Quantization delta for subset weights.
+ Weight weight_threshold; // Pruning weight threshold.
+ StateId state_threshold; // Pruning state threshold.
+ Label subsequential_label; // Label used for residual final output
+ // when producing subsequential transducers.
+
+ explicit DeterminizeOptions(float d = kDelta, Weight w = Weight::Zero(),
+ StateId n = kNoStateId, Label l = 0)
+ : delta(d), weight_threshold(w), state_threshold(n),
+ subsequential_label(l) {}
+};
+
+
+// Determinizes a weighted transducer. This version writes the
+// determinized Fst to an output MutableFst. The result will be an
+// equivalent FSt that has the property that no state has two
+// transitions with the same input label. For this algorithm, epsilon
+// transitions are treated as regular symbols (cf. RmEpsilon).
+//
+// The transducer must be functional. The weights must be (weakly)
+// left divisible (valid for TropicalWeight and LogWeight).
+//
+// Complexity:
+// - Determinizable: exponential (polynomial in the size of the output)
+// - Non-determinizable: does not terminate
+//
+// The determinizable automata include all unweighted and all acyclic input.
+//
+// References:
+// - Mehryar Mohri, "Finite-State Transducers in Language and Speech
+// Processing". Computational Linguistics, 23:2, 1997.
+template <class Arc>
+void Determinize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst,
+ const DeterminizeOptions<Arc> &opts
+ = DeterminizeOptions<Arc>()) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ DeterminizeFstOptions<Arc> nopts;
+ nopts.delta = opts.delta;
+ nopts.subsequential_label = opts.subsequential_label;
+
+ nopts.gc_limit = 0; // Cache only the last state for fastest copy.
+
+ if (opts.weight_threshold != Weight::Zero() ||
+ opts.state_threshold != kNoStateId) {
+ if (ifst.Properties(kAcceptor, false)) {
+ vector<Weight> idistance, odistance;
+ ShortestDistance(ifst, &idistance, true);
+ DeterminizeFst<Arc> dfst(ifst, idistance, &odistance, nopts);
+ PruneOptions< Arc, AnyArcFilter<Arc> > popts(opts.weight_threshold,
+ opts.state_threshold,
+ AnyArcFilter<Arc>(),
+ &odistance);
+ Prune(dfst, ofst, popts);
+ } else {
+ *ofst = DeterminizeFst<Arc>(ifst, nopts);
+ Prune(ofst, opts.weight_threshold, opts.state_threshold);
+ }
+ } else {
+ *ofst = DeterminizeFst<Arc>(ifst, nopts);
+ }
+}
+
+
+} // namespace fst
+
+#endif // FST_LIB_DETERMINIZE_H__
diff --git a/src/include/fst/dfs-visit.h b/src/include/fst/dfs-visit.h
new file mode 100644
index 0000000..b47c78d
--- /dev/null
+++ b/src/include/fst/dfs-visit.h
@@ -0,0 +1,204 @@
+// dfs-visit.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Depth-first search visitation. See visit.h for more general
+// search queue disciplines.
+
+#ifndef FST_LIB_DFS_VISIT_H__
+#define FST_LIB_DFS_VISIT_H__
+
+#include <stack>
+#include <vector>
+using std::vector;
+
+#include <fst/arcfilter.h>
+#include <fst/fst.h>
+
+
+namespace fst {
+
+// Visitor Interface - class determines actions taken during a Dfs.
+// If any of the boolean member functions return false, the DFS is
+// aborted by first calling FinishState() on all currently grey states
+// and then calling FinishVisit().
+//
+// Note this is similar to the more general visitor interface in visit.h
+// except that FinishState returns additional information appropriate only for
+// a DFS and some methods names here are better suited to a DFS.
+//
+// template <class Arc>
+// class Visitor {
+// public:
+// typedef typename Arc::StateId StateId;
+//
+// Visitor(T *return_data);
+// // Invoked before DFS visit
+// void InitVisit(const Fst<Arc> &fst);
+// // Invoked when state discovered (2nd arg is DFS tree root)
+// bool InitState(StateId s, StateId root);
+// // Invoked when tree arc examined (to white/undiscovered state)
+// bool TreeArc(StateId s, const Arc &a);
+// // Invoked when back arc examined (to grey/unfinished state)
+// bool BackArc(StateId s, const Arc &a);
+// // Invoked when forward or cross arc examined (to black/finished state)
+// bool ForwardOrCrossArc(StateId s, const Arc &a);
+// // Invoked when state finished (PARENT is kNoStateID and ARC == NULL
+// // when S is tree root)
+// void FinishState(StateId s, StateId parent, const Arc *parent_arc);
+// // Invoked after DFS visit
+// void FinishVisit();
+// };
+
+// An Fst state's DFS status
+const int kDfsWhite = 0; // Undiscovered
+const int kDfsGrey = 1; // Discovered & unfinished
+const int kDfsBlack = 2; // Finished
+
+// An Fst state's DFS stack state
+template <class Arc>
+struct DfsState {
+ typedef typename Arc::StateId StateId;
+
+ DfsState(const Fst<Arc> &fst, StateId s): state_id(s), arc_iter(fst, s) {}
+
+ StateId state_id; // Fst state ...
+ ArcIterator< Fst<Arc> > arc_iter; // and its corresponding arcs
+};
+
+
+// Performs depth-first visitation. Visitor class argument determines
+// actions and contains any return data. ArcFilter determines arcs
+// that are considered.
+//
+// Note this is similar to Visit() in visit.h called with a LIFO
+// queue except this version has a Visitor class specialized and
+// augmented for a DFS.
+template <class Arc, class V, class ArcFilter>
+void DfsVisit(const Fst<Arc> &fst, V *visitor, ArcFilter filter) {
+ typedef typename Arc::StateId StateId;
+
+ visitor->InitVisit(fst);
+
+ StateId start = fst.Start();
+ if (start == kNoStateId) {
+ visitor->FinishVisit();
+ return;
+ }
+
+ vector<char> state_color; // Fst state DFS status
+ stack<DfsState<Arc> *> state_stack; // DFS execution stack
+
+ StateId nstates = start + 1; // # of known states in general case
+ bool expanded = false;
+ if (fst.Properties(kExpanded, false)) { // tests if expanded case, then
+ nstates = CountStates(fst); // uses ExpandedFst::NumStates().
+ expanded = true;
+ }
+
+ state_color.resize(nstates, kDfsWhite);
+ StateIterator< Fst<Arc> > siter(fst);
+
+ // Continue DFS while true
+ bool dfs = true;
+
+ // Iterate over trees in DFS forest.
+ for (StateId root = start; dfs && root < nstates;) {
+ state_color[root] = kDfsGrey;
+ state_stack.push(new DfsState<Arc>(fst, root));
+ dfs = visitor->InitState(root, root);
+ while (!state_stack.empty()) {
+ DfsState<Arc> *dfs_state = state_stack.top();
+ StateId s = dfs_state->state_id;
+ if (s >= state_color.size()) {
+ nstates = s + 1;
+ state_color.resize(nstates, kDfsWhite);
+ }
+ ArcIterator< Fst<Arc> > &aiter = dfs_state->arc_iter;
+ if (!dfs || aiter.Done()) {
+ state_color[s] = kDfsBlack;
+ delete dfs_state;
+ state_stack.pop();
+ if (!state_stack.empty()) {
+ DfsState<Arc> *parent_state = state_stack.top();
+ StateId p = parent_state->state_id;
+ ArcIterator< Fst<Arc> > &piter = parent_state->arc_iter;
+ visitor->FinishState(s, p, &piter.Value());
+ piter.Next();
+ } else {
+ visitor->FinishState(s, kNoStateId, 0);
+ }
+ continue;
+ }
+ const Arc &arc = aiter.Value();
+ if (arc.nextstate >= state_color.size()) {
+ nstates = arc.nextstate + 1;
+ state_color.resize(nstates, kDfsWhite);
+ }
+ if (!filter(arc)) {
+ aiter.Next();
+ continue;
+ }
+ int next_color = state_color[arc.nextstate];
+ switch (next_color) {
+ default:
+ case kDfsWhite:
+ dfs = visitor->TreeArc(s, arc);
+ if (!dfs) break;
+ state_color[arc.nextstate] = kDfsGrey;
+ state_stack.push(new DfsState<Arc>(fst, arc.nextstate));
+ dfs = visitor->InitState(arc.nextstate, root);
+ break;
+ case kDfsGrey:
+ dfs = visitor->BackArc(s, arc);
+ aiter.Next();
+ break;
+ case kDfsBlack:
+ dfs = visitor->ForwardOrCrossArc(s, arc);
+ aiter.Next();
+ break;
+ }
+ }
+
+ // Find next tree root
+ for (root = root == start ? 0 : root + 1;
+ root < nstates && state_color[root] != kDfsWhite;
+ ++root);
+
+ // Check for a state beyond the largest known state
+ if (!expanded && root == nstates) {
+ for (; !siter.Done(); siter.Next()) {
+ if (siter.Value() == nstates) {
+ ++nstates;
+ state_color.push_back(kDfsWhite);
+ break;
+ }
+ }
+ }
+ }
+ visitor->FinishVisit();
+}
+
+
+template <class Arc, class V>
+void DfsVisit(const Fst<Arc> &fst, V *visitor) {
+ DfsVisit(fst, visitor, AnyArcFilter<Arc>());
+}
+
+} // namespace fst
+
+#endif // FST_LIB_DFS_VISIT_H__
diff --git a/src/include/fst/difference.h b/src/include/fst/difference.h
new file mode 100644
index 0000000..8a3306f
--- /dev/null
+++ b/src/include/fst/difference.h
@@ -0,0 +1,189 @@
+// difference.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to compute the difference between two FSAs
+
+#ifndef FST_LIB_DIFFERENCE_H__
+#define FST_LIB_DIFFERENCE_H__
+
+#include <vector>
+using std::vector;
+#include <algorithm>
+
+#include <fst/cache.h>
+#include <fst/compose.h>
+#include <fst/complement.h>
+
+
+namespace fst {
+
+template <class A,
+ class M = Matcher<Fst<A> >,
+ class F = SequenceComposeFilter<M>,
+ class T = GenericComposeStateTable<A, typename F::FilterState> >
+struct DifferenceFstOptions : public ComposeFstOptions<A, M, F, T> {
+ explicit DifferenceFstOptions(const CacheOptions &opts,
+ M *mat1 = 0, M *mat2 = 0,
+ F *filt = 0, T *sttable= 0)
+ : ComposeFstOptions<A, M, F, T>(mat1, mat2, filt, sttable) { }
+
+ DifferenceFstOptions() {}
+};
+
+// Computes the difference between two FSAs. This version is a delayed
+// Fst. Only strings that are in the first automaton but not in second
+// are retained in the result.
+//
+// The first argument must be an acceptor; the second argument must be
+// an unweighted, epsilon-free, deterministic acceptor. One of the
+// arguments must be label-sorted.
+//
+// Complexity: same as ComposeFst.
+//
+// Caveats: same as ComposeFst.
+template <class A>
+class DifferenceFst : public ComposeFst<A> {
+ public:
+ using ImplToFst< ComposeFstImplBase<A> >::SetImpl;
+ using ImplToFst< ComposeFstImplBase<A> >::GetImpl;
+
+ using ComposeFst<A>::CreateBase1;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ // A - B = A ^ B'.
+ DifferenceFst(const Fst<A> &fst1, const Fst<A> &fst2,
+ const CacheOptions &opts = CacheOptions()) {
+ typedef RhoMatcher< Matcher<Fst<A> > > R;
+
+ ComplementFst<A> cfst(fst2);
+ ComposeFstOptions<A, R> copts(CacheOptions(),
+ new R(fst1, MATCH_NONE),
+ new R(cfst, MATCH_INPUT,
+ ComplementFst<A>::kRhoLabel));
+ SetImpl(CreateBase1(fst1, cfst, copts));
+
+ if (!fst1.Properties(kAcceptor, true)) {
+ FSTERROR() << "DifferenceFst: 1st argument not an acceptor";
+ GetImpl()->SetProperties(kError, kError);
+ }
+ }
+
+ template <class M, class F, class T>
+ DifferenceFst(const Fst<A> &fst1, const Fst<A> &fst2,
+ const DifferenceFstOptions<A, M, F, T> &opts) {
+ typedef RhoMatcher<M> R;
+
+ ComplementFst<A> cfst(fst2);
+ ComposeFstOptions<A, R> copts(opts);
+ copts.matcher1 = new R(fst1, MATCH_NONE, kNoLabel, MATCHER_REWRITE_ALWAYS,
+ opts.matcher1);
+ copts.matcher2 = new R(cfst, MATCH_INPUT, ComplementFst<A>::kRhoLabel,
+ MATCHER_REWRITE_ALWAYS, opts.matcher2);
+
+ SetImpl(CreateBase1(fst1, cfst, copts));
+
+ if (!fst1.Properties(kAcceptor, true)) {
+ FSTERROR() << "DifferenceFst: 1st argument not an acceptor";
+ GetImpl()->SetProperties(kError, kError);
+ }
+ }
+
+ // See Fst<>::Copy() for doc.
+ DifferenceFst(const DifferenceFst<A> &fst, bool safe = false)
+ : ComposeFst<A>(fst, safe) {}
+
+ // Get a copy of this DifferenceFst. See Fst<>::Copy() for further doc.
+ virtual DifferenceFst<A> *Copy(bool safe = false) const {
+ return new DifferenceFst<A>(*this, safe);
+ }
+};
+
+
+// Specialization for DifferenceFst.
+template <class A>
+class StateIterator< DifferenceFst<A> >
+ : public StateIterator< ComposeFst<A> > {
+ public:
+ explicit StateIterator(const DifferenceFst<A> &fst)
+ : StateIterator< ComposeFst<A> >(fst) {}
+};
+
+
+// Specialization for DifferenceFst.
+template <class A>
+class ArcIterator< DifferenceFst<A> >
+ : public ArcIterator< ComposeFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const DifferenceFst<A> &fst, StateId s)
+ : ArcIterator< ComposeFst<A> >(fst, s) {}
+};
+
+// Useful alias when using StdArc.
+typedef DifferenceFst<StdArc> StdDifferenceFst;
+
+
+typedef ComposeOptions DifferenceOptions;
+
+
+// Computes the difference between two FSAs. This version is writes
+// the difference to an output MutableFst. Only strings that are in
+// the first automaton but not in second are retained in the result.
+//
+// The first argument must be an acceptor; the second argument must be
+// an unweighted, epsilon-free, deterministic acceptor. One of the
+// arguments must be label-sorted.
+//
+// Complexity: same as Compose.
+//
+// Caveats: same as Compose.
+template<class Arc>
+void Difference(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
+ MutableFst<Arc> *ofst,
+ const DifferenceOptions &opts = DifferenceOptions()) {
+ typedef Matcher< Fst<Arc> > M;
+
+ if (opts.filter_type == AUTO_FILTER) {
+ CacheOptions nopts;
+ nopts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = DifferenceFst<Arc>(ifst1, ifst2, nopts);
+ } else if (opts.filter_type == SEQUENCE_FILTER) {
+ DifferenceFstOptions<Arc> dopts;
+ dopts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts);
+ } else if (opts.filter_type == ALT_SEQUENCE_FILTER) {
+ DifferenceFstOptions<Arc, M, AltSequenceComposeFilter<M> > dopts;
+ dopts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts);
+ } else if (opts.filter_type == MATCH_FILTER) {
+ DifferenceFstOptions<Arc, M, MatchComposeFilter<M> > dopts;
+ dopts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts);
+ }
+
+ if (opts.connect)
+ Connect(ofst);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_DIFFERENCE_H__
diff --git a/src/include/fst/edit-fst.h b/src/include/fst/edit-fst.h
new file mode 100644
index 0000000..303cb24
--- /dev/null
+++ b/src/include/fst/edit-fst.h
@@ -0,0 +1,774 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: dbikel@google.com (Dan Bikel)
+//
+// An \ref Fst implementation that allows non-destructive edit operations on an
+// existing fst.
+
+#ifndef FST_LIB_EDIT_FST_H_
+#define FST_LIB_EDIT_FST_H_
+
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+
+namespace fst {
+
+// The EditFst class enables non-destructive edit operations on a wrapped
+// ExpandedFst. The implementation uses copy-on-write semantics at the node
+// level: if a user has an underlying fst on which he or she wants to perform a
+// relatively small number of edits (read: mutations), then this implementation
+// will copy the edited node to an internal MutableFst and perform any edits in
+// situ on that copied node. This class supports all the methods of MutableFst
+// except for DeleteStates(const vector<StateId> &); thus, new nodes may also be
+// added, and one may add transitions from existing nodes of the wrapped fst to
+// new nodes.
+//
+// N.B.: The documentation for Fst::Copy(true) says that its behavior is
+// undefined if invoked on an fst that has already been accessed. This class
+// requires that the Fst implementation it wraps provides consistent, reliable
+// behavior when its Copy(true) method is invoked, where consistent means
+// the graph structure, graph properties and state numbering and do not change.
+// VectorFst and CompactFst, for example, are both well-behaved in this regard.
+
+// The EditFstData class is a container for all mutable data for EditFstImpl;
+// also, this class provides most of the actual implementation of what EditFst
+// does (that is, most of EditFstImpl's methods delegate to methods in this, the
+// EditFstData class). Instances of this class are reference-counted and can be
+// shared between otherwise independent EditFstImpl instances. This scheme
+// allows EditFstImpl to implement the thread-safe, copy-on-write semantics
+// required by Fst::Copy(true).
+//
+// template parameters:
+// A the type of arc to use
+// WrappedFstT the type of fst wrapped by the EditFst instance that
+// this EditFstData instance is backing
+// MutableFstT the type of mutable fst to use internally for edited states;
+// crucially, MutableFstT::Copy(false) *must* yield an fst that is
+// thread-safe for reading (VectorFst, for example, has this property)
+template <typename A,
+ typename WrappedFstT = ExpandedFst<A>,
+ typename MutableFstT = VectorFst<A> >
+class EditFstData {
+ public:
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef typename unordered_map<StateId, StateId>::const_iterator
+ IdMapIterator;
+ typedef typename unordered_map<StateId, Weight>::const_iterator
+ FinalWeightIterator;
+
+
+ EditFstData() : num_new_states_(0) {
+ SetEmptyAndDeleteKeysForInternalMaps();
+ }
+
+ EditFstData(const EditFstData &other) :
+ edits_(other.edits_),
+ external_to_internal_ids_(other.external_to_internal_ids_),
+ edited_final_weights_(other.edited_final_weights_),
+ num_new_states_(other.num_new_states_) {
+ }
+
+ ~EditFstData() {
+ }
+
+ static EditFstData<A, WrappedFstT, MutableFstT> *Read(istream &strm,
+ const FstReadOptions &opts);
+
+ bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ // Serialize all private data members of this class.
+ FstWriteOptions edits_opts(opts);
+ edits_opts.write_header = true; // Force writing contained header.
+ edits_.Write(strm, edits_opts);
+ WriteType(strm, external_to_internal_ids_);
+ WriteType(strm, edited_final_weights_);
+ WriteType(strm, num_new_states_);
+ if (!strm) {
+ LOG(ERROR) << "EditFstData::Write: write failed: " << opts.source;
+ return false;
+ }
+ return true;
+ }
+
+ int RefCount() const { return ref_count_.count(); }
+ int IncrRefCount() { return ref_count_.Incr(); }
+ int DecrRefCount() { return ref_count_.Decr(); }
+
+ StateId NumNewStates() const {
+ return num_new_states_;
+ }
+
+ // accessor methods for the fst holding edited states
+ StateId EditedStart() const {
+ return edits_.Start();
+ }
+
+ Weight Final(StateId s, const WrappedFstT *wrapped) const {
+ FinalWeightIterator final_weight_it = GetFinalWeightIterator(s);
+ if (final_weight_it == NotInFinalWeightMap()) {
+ IdMapIterator it = GetEditedIdMapIterator(s);
+ return it == NotInEditedMap() ?
+ wrapped->Final(s) : edits_.Final(it->second);
+ }
+ else {
+ return final_weight_it->second;
+ }
+ }
+
+ size_t NumArcs(StateId s, const WrappedFstT *wrapped) const {
+ IdMapIterator it = GetEditedIdMapIterator(s);
+ return it == NotInEditedMap() ?
+ wrapped->NumArcs(s) : edits_.NumArcs(it->second);
+ }
+
+ size_t NumInputEpsilons(StateId s, const WrappedFstT *wrapped) const {
+ IdMapIterator it = GetEditedIdMapIterator(s);
+ return it == NotInEditedMap() ?
+ wrapped->NumInputEpsilons(s) :
+ edits_.NumInputEpsilons(it->second);
+ }
+
+ size_t NumOutputEpsilons(StateId s, const WrappedFstT *wrapped) const {
+ IdMapIterator it = GetEditedIdMapIterator(s);
+ return it == NotInEditedMap() ?
+ wrapped->NumOutputEpsilons(s) :
+ edits_.NumOutputEpsilons(it->second);
+ }
+
+ void SetEditedProperties(uint64 props, uint64 mask) {
+ edits_.SetProperties(props, mask);
+ }
+
+ // non-const MutableFst operations
+
+ // Sets the start state for this fst.
+ void SetStart(StateId s) {
+ edits_.SetStart(s);
+ }
+
+ // Sets the final state for this fst.
+ Weight SetFinal(StateId s, Weight w, const WrappedFstT *wrapped) {
+ Weight old_weight = Final(s, wrapped);
+ IdMapIterator it = GetEditedIdMapIterator(s);
+ // if we haven't already edited state s, don't add it to edited_ (which can
+ // be expensive if s has many transitions); just use the
+ // edited_final_weights_ map
+ if (it == NotInEditedMap()) {
+ edited_final_weights_[s] = w;
+ }
+ else {
+ edits_.SetFinal(GetEditableInternalId(s, wrapped), w);
+ }
+ return old_weight;
+ }
+
+ // Adds a new state to this fst, initially with no arcs.
+ StateId AddState(StateId curr_num_states) {
+ StateId internal_state_id = edits_.AddState();
+ StateId external_state_id = curr_num_states;
+ external_to_internal_ids_[external_state_id] = internal_state_id;
+ num_new_states_++;
+ return external_state_id;
+ }
+
+ // Adds the specified arc to the specified state of this fst.
+ const A *AddArc(StateId s, const Arc &arc, const WrappedFstT *wrapped) {
+ StateId internal_id = GetEditableInternalId(s, wrapped);
+
+ size_t num_arcs = edits_.NumArcs(internal_id);
+ ArcIterator<MutableFstT> arc_it(edits_, internal_id);
+ const A *prev_arc = NULL;
+ if (num_arcs > 0) {
+ // grab the final arc associated with this state in edits_
+ arc_it.Seek(num_arcs - 1);
+ prev_arc = &(arc_it.Value());
+ }
+ edits_.AddArc(internal_id, arc);
+ return prev_arc;
+ }
+
+ void DeleteStates() {
+ edits_.DeleteStates();
+ num_new_states_ = 0;
+ external_to_internal_ids_.clear();
+ edited_final_weights_.clear();
+ }
+
+ // Removes all but the first n outgoing arcs of the specified state.
+ void DeleteArcs(StateId s, size_t n, const WrappedFstT *wrapped) {
+ edits_.DeleteArcs(GetEditableInternalId(s, wrapped), n);
+ }
+
+ // Removes all outgoing arcs from the specified state.
+ void DeleteArcs(StateId s, const WrappedFstT *wrapped) {
+ edits_.DeleteArcs(GetEditableInternalId(s, wrapped));
+ }
+
+ // end methods for non-const MutableFst operations
+
+ // Provides information for the generic arc iterator.
+ void InitArcIterator(StateId s, ArcIteratorData<Arc> *data,
+ const WrappedFstT *wrapped) const {
+ IdMapIterator id_map_it = GetEditedIdMapIterator(s);
+ if (id_map_it == NotInEditedMap()) {
+ VLOG(3) << "EditFstData::InitArcIterator: iterating on state "
+ << s << " of original fst";
+ wrapped->InitArcIterator(s, data);
+ } else {
+ VLOG(2) << "EditFstData::InitArcIterator: iterating on edited state "
+ << s << " (internal state id: " << id_map_it->second << ")";
+ edits_.InitArcIterator(id_map_it->second, data);
+ }
+ }
+
+ // Provides information for the generic mutable arc iterator.
+ void InitMutableArcIterator(StateId s, MutableArcIteratorData<A> *data,
+ const WrappedFstT *wrapped) {
+ data->base =
+ new MutableArcIterator<MutableFstT>(&edits_,
+ GetEditableInternalId(s, wrapped));
+ }
+
+ // Prints out the map from external to internal state id's (for debugging
+ // purposes).
+ void PrintMap() {
+ for (IdMapIterator map_it = external_to_internal_ids_.begin();
+ map_it != NotInEditedMap(); ++map_it) {
+ LOG(INFO) << "(external,internal)=("
+ << map_it->first << "," << map_it->second << ")";
+ }
+ }
+
+
+ private:
+ void SetEmptyAndDeleteKeysForInternalMaps() {
+ }
+
+ // Returns the iterator of the map from external to internal state id's
+ // of edits_ for the specified external state id.
+ IdMapIterator GetEditedIdMapIterator(StateId s) const {
+ return external_to_internal_ids_.find(s);
+ }
+ IdMapIterator NotInEditedMap() const {
+ return external_to_internal_ids_.end();
+ }
+
+ FinalWeightIterator GetFinalWeightIterator(StateId s) const {
+ return edited_final_weights_.find(s);
+ }
+ FinalWeightIterator NotInFinalWeightMap() const {
+ return edited_final_weights_.end();
+ }
+
+ // Returns the internal state id of the specified external id if the state has
+ // already been made editable, or else copies the state from wrapped_
+ // to edits_ and returns the state id of the newly editable state in edits_.
+ //
+ // \return makes the specified state editable if it isn't already and returns
+ // its state id in edits_
+ StateId GetEditableInternalId(StateId s, const WrappedFstT *wrapped) {
+ IdMapIterator id_map_it = GetEditedIdMapIterator(s);
+ if (id_map_it == NotInEditedMap()) {
+ StateId new_internal_id = edits_.AddState();
+ VLOG(2) << "EditFstData::GetEditableInternalId: editing state " << s
+ << " of original fst; new internal state id:" << new_internal_id;
+ external_to_internal_ids_[s] = new_internal_id;
+ for (ArcIterator< Fst<A> > arc_iterator(*wrapped, s);
+ !arc_iterator.Done();
+ arc_iterator.Next()) {
+ edits_.AddArc(new_internal_id, arc_iterator.Value());
+ }
+ // copy the final weight
+ FinalWeightIterator final_weight_it = GetFinalWeightIterator(s);
+ if (final_weight_it == NotInFinalWeightMap()) {
+ edits_.SetFinal(new_internal_id, wrapped->Final(s));
+ } else {
+ edits_.SetFinal(new_internal_id, final_weight_it->second);
+ edited_final_weights_.erase(s);
+ }
+ return new_internal_id;
+ } else {
+ return id_map_it->second;
+ }
+ }
+
+ // A mutable fst (by default, a VectorFst) to contain new states, and/or
+ // copies of states from a wrapped ExpandedFst that have been modified in
+ // some way.
+ MutableFstT edits_;
+ // A mapping from external state id's to the internal id's of states that
+ // appear in edits_.
+ unordered_map<StateId, StateId> external_to_internal_ids_;
+ // A mapping from external state id's to final state weights assigned to
+ // those states. The states in this map are *only* those whose final weight
+ // has been modified; if any other part of the state has been modified,
+ // the entire state is copied to edits_, and all modifications reside there.
+ unordered_map<StateId, Weight> edited_final_weights_;
+ // The number of new states added to this mutable fst impl, which is <= the
+ // number of states in edits_ (since edits_ contains both edited *and* new
+ // states).
+ StateId num_new_states_;
+ RefCounter ref_count_;
+};
+
+// EditFstData method implementations: just the Read method.
+template <typename A, typename WrappedFstT, typename MutableFstT>
+EditFstData<A, WrappedFstT, MutableFstT> *
+EditFstData<A, WrappedFstT, MutableFstT>::Read(istream &strm,
+ const FstReadOptions &opts) {
+ EditFstData<A, WrappedFstT, MutableFstT> *data =
+ new EditFstData<A, WrappedFstT, MutableFstT>();
+ // next read in MutabelFstT machine that stores edits
+ FstReadOptions edits_opts(opts);
+ edits_opts.header = 0; // Contained header was written out, so read it in.
+
+ // Because our internal representation of edited states is a solid object
+ // of type MutableFstT (defaults to VectorFst<A>) and not a pointer,
+ // and because the static Read method allocates a new object on the heap,
+ // we need to call Read, check if there was a failure, use
+ // MutableFstT::operator= to assign the object (not the pointer) to the
+ // edits_ data member (which will increase the ref count by 1 on the impl)
+ // and, finally, delete the heap-allocated object.
+ MutableFstT *edits = MutableFstT::Read(strm, edits_opts);
+ if (!edits) {
+ return 0;
+ }
+ data->edits_ = *edits;
+ delete edits;
+ // finally, read in rest of private data members
+ ReadType(strm, &data->external_to_internal_ids_);
+ ReadType(strm, &data->edited_final_weights_);
+ ReadType(strm, &data->num_new_states_);
+ if (!strm) {
+ LOG(ERROR) << "EditFst::Read: read failed: " << opts.source;
+ return 0;
+ }
+ return data;
+}
+
+// This class enables non-destructive edit operations on a wrapped ExpandedFst.
+// The implementation uses copy-on-write semantics at the node level: if a user
+// has an underlying fst on which he or she wants to perform a relatively small
+// number of edits (read: mutations), then this implementation will copy the
+// edited node to an internal MutableFst and perform any edits in situ on that
+// copied node. This class supports all the methods of MutableFst except for
+// DeleteStates(const vector<StateId> &); thus, new nodes may also be added, and
+// one may add transitions from existing nodes of the wrapped fst to new nodes.
+//
+// template parameters:
+// A the type of arc to use
+// WrappedFstT the type of fst wrapped by the EditFst instance that
+// this EditFstImpl instance is backing
+// MutableFstT the type of mutable fst to use internally for edited states;
+// crucially, MutableFstT::Copy(false) *must* yield an fst that is
+// thread-safe for reading (VectorFst, for example, has this property)
+template <typename A,
+ typename WrappedFstT = ExpandedFst<A>,
+ typename MutableFstT = VectorFst<A> >
+class EditFstImpl : public FstImpl<A> {
+ public:
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+ using FstImpl<A>::WriteHeader;
+
+ typedef A Arc;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ // Constructs an editable fst implementation with no states. Effectively,
+ // this initially-empty fst will in every way mimic the behavior of
+ // a VectorFst--more precisely, a VectorFstImpl instance--but with slightly
+ // slower performance (by a constant factor), due to the fact that
+ // this class maintains a mapping between external state id's and
+ // their internal equivalents.
+ EditFstImpl() {
+ FstImpl<A>::SetType("edit");
+ wrapped_ = new MutableFstT();
+ InheritPropertiesFromWrapped();
+ data_ = new EditFstData<A, WrappedFstT, MutableFstT>();
+ }
+
+ // Wraps the specified ExpandedFst. This constructor requires that the
+ // specified Fst is an ExpandedFst instance. This requirement is only enforced
+ // at runtime. (See below for the reason.)
+ //
+ // This library uses the pointer-to-implementation or "PIMPL" design pattern.
+ // In particular, to make it convenient to bind an implementation class to its
+ // interface, there are a pair of template "binder" classes, one for immutable
+ // and one for mutable fst's (ImplToFst and ImplToMutableFst, respectively).
+ // As it happens, the API for the ImplToMutableFst<I,F> class requires that
+ // the implementation class--the template parameter "I"--have a constructor
+ // taking a const Fst<A> reference. Accordingly, the constructor here must
+ // perform a static_cast to the WrappedFstT type required by EditFst and
+ // therefore EditFstImpl.
+ explicit EditFstImpl(const Fst<A> &wrapped)
+ : wrapped_(static_cast<WrappedFstT *>(wrapped.Copy())) {
+ FstImpl<A>::SetType("edit");
+
+ data_ = new EditFstData<A, WrappedFstT, MutableFstT>();
+ // have edits_ inherit all properties from wrapped_
+ data_->SetEditedProperties(wrapped_->Properties(kFstProperties, false),
+ kFstProperties);
+ InheritPropertiesFromWrapped();
+ }
+
+ // A copy constructor for this implementation class, used to implement
+ // the Copy() method of the Fst interface.
+ EditFstImpl(const EditFstImpl &impl)
+ : wrapped_(static_cast<WrappedFstT *>(impl.wrapped_->Copy(true))),
+ data_(impl.data_) {
+ data_->IncrRefCount();
+ SetProperties(impl.Properties());
+ }
+
+ ~EditFstImpl() {
+ delete wrapped_;
+ if (!data_->DecrRefCount()) {
+ delete data_;
+ }
+ }
+
+ // const Fst/ExpandedFst operations, declared in the Fst and ExpandedFst
+ // interfaces
+ StateId Start() const {
+ StateId edited_start = data_->EditedStart();
+ return edited_start == kNoStateId ? wrapped_->Start() : edited_start;
+ }
+
+ Weight Final(StateId s) const {
+ return data_->Final(s, wrapped_);
+ }
+
+ size_t NumArcs(StateId s) const {
+ return data_->NumArcs(s, wrapped_);
+ }
+
+ size_t NumInputEpsilons(StateId s) const {
+ return data_->NumInputEpsilons(s, wrapped_);
+ }
+
+ size_t NumOutputEpsilons(StateId s) const {
+ return data_->NumOutputEpsilons(s, wrapped_);
+ }
+
+ StateId NumStates() const {
+ return wrapped_->NumStates() + data_->NumNewStates();
+ }
+
+ static EditFstImpl<A, WrappedFstT, MutableFstT> *
+ Read(istream &strm,
+ const FstReadOptions &opts);
+
+ bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ FstHeader hdr;
+ hdr.SetStart(Start());
+ hdr.SetNumStates(NumStates());
+ FstWriteOptions header_opts(opts);
+ header_opts.write_isymbols = false; // Let contained FST hold any symbols.
+ header_opts.write_osymbols = false;
+ WriteHeader(strm, header_opts, kFileVersion, &hdr);
+
+ // First, serialize wrapped fst to stream.
+ FstWriteOptions wrapped_opts(opts);
+ wrapped_opts.write_header = true; // Force writing contained header.
+ wrapped_->Write(strm, wrapped_opts);
+
+ data_->Write(strm, opts);
+
+ strm.flush();
+ if (!strm) {
+ LOG(ERROR) << "EditFst::Write: write failed: " << opts.source;
+ return false;
+ }
+ return true;
+ }
+ // end const Fst operations
+
+ // non-const MutableFst operations
+
+ // Sets the start state for this fst.
+ void SetStart(StateId s) {
+ MutateCheck();
+ data_->SetStart(s);
+ SetProperties(SetStartProperties(FstImpl<A>::Properties()));
+ }
+
+ // Sets the final state for this fst.
+ void SetFinal(StateId s, Weight w) {
+ MutateCheck();
+ Weight old_weight = data_->SetFinal(s, w, wrapped_);
+ SetProperties(SetFinalProperties(FstImpl<A>::Properties(), old_weight, w));
+ }
+
+ // Adds a new state to this fst, initially with no arcs.
+ StateId AddState() {
+ MutateCheck();
+ SetProperties(AddStateProperties(FstImpl<A>::Properties()));
+ return data_->AddState(NumStates());
+ }
+
+ // Adds the specified arc to the specified state of this fst.
+ void AddArc(StateId s, const Arc &arc) {
+ MutateCheck();
+ const A *prev_arc = data_->AddArc(s, arc, wrapped_);
+ SetProperties(AddArcProperties(FstImpl<A>::Properties(), s, arc, prev_arc));
+ }
+
+ void DeleteStates(const vector<StateId>& dstates) {
+ FSTERROR() << ": EditFstImpl::DeleteStates(const std::vector<StateId>&): "
+ << " not implemented";
+ SetProperties(kError, kError);
+ }
+
+ // Deletes all states in this fst.
+ void DeleteStates();
+
+ // Removes all but the first n outgoing arcs of the specified state.
+ void DeleteArcs(StateId s, size_t n) {
+ MutateCheck();
+ data_->DeleteArcs(s, n, wrapped_);
+ SetProperties(DeleteArcsProperties(FstImpl<A>::Properties()));
+ }
+
+ // Removes all outgoing arcs from the specified state.
+ void DeleteArcs(StateId s) {
+ MutateCheck();
+ data_->DeleteArcs(s, wrapped_);
+ SetProperties(DeleteArcsProperties(FstImpl<A>::Properties()));
+ }
+
+ void ReserveStates(StateId s) {
+ }
+
+ void ReserveArcs(StateId s, size_t n) {
+ }
+
+ // end non-const MutableFst operations
+
+ // Provides information for the generic state iterator.
+ void InitStateIterator(StateIteratorData<Arc> *data) const {
+ data->base = 0;
+ data->nstates = NumStates();
+ }
+
+ // Provides information for the generic arc iterator.
+ void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
+ data_->InitArcIterator(s, data, wrapped_);
+ }
+
+ // Provides information for the generic mutable arc iterator.
+ void InitMutableArcIterator(StateId s, MutableArcIteratorData<A> *data) {
+ MutateCheck();
+ data_->InitMutableArcIterator(s, data, wrapped_);
+ }
+
+ private:
+ typedef typename unordered_map<StateId, StateId>::const_iterator
+ IdMapIterator;
+ typedef typename unordered_map<StateId, Weight>::const_iterator
+ FinalWeightIterator;
+ // Properties always true of this Fst class
+ static const uint64 kStaticProperties = kExpanded | kMutable;
+ // Current file format version
+ static const int kFileVersion = 2;
+ // Minimum file format version supported
+ static const int kMinFileVersion = 2;
+
+ // Causes this fst to inherit all the properties from its wrapped fst, except
+ // for the two properties that always apply to EditFst instances: kExpanded
+ // and kMutable.
+ void InheritPropertiesFromWrapped() {
+ SetProperties(wrapped_->Properties(kCopyProperties, false) |
+ kStaticProperties);
+ SetInputSymbols(wrapped_->InputSymbols());
+ SetOutputSymbols(wrapped_->OutputSymbols());
+ }
+
+ // This method ensures that any operations that alter the mutable data
+ // portion of this EditFstImpl cause the data_ member to be copied when its
+ // reference count is greater than 1. Note that this method is distinct from
+ // MutableFst::Mutate, which gets invoked whenever one of the basic mutation
+ // methods defined in MutableFst is invoked, such as SetInputSymbols.
+ // The MutateCheck here in EditFstImpl is invoked whenever one of the
+ // mutating methods specifically related to the types of edits provided
+ // by EditFst is performed, such as changing an arc of an existing state
+ // of the wrapped fst via a MutableArcIterator, or adding a new state via
+ // AddState().
+ void MutateCheck() {
+ if (data_->RefCount() > 1) {
+ EditFstData<A, WrappedFstT, MutableFstT> *data_copy =
+ new EditFstData<A, WrappedFstT, MutableFstT>(*data_);
+ if (data_ && !data_->DecrRefCount()) {
+ delete data_;
+ }
+ data_ = data_copy;
+ }
+ }
+
+ // The fst that this fst wraps. The purpose of this class is to enable
+ // non-destructive edits on this wrapped fst.
+ const WrappedFstT *wrapped_;
+ // The mutable data for this EditFst instance, with delegates for all the
+ // methods that can mutate data.
+ EditFstData<A, WrappedFstT, MutableFstT> *data_;
+};
+
+template <typename A, typename WrappedFstT, typename MutableFstT>
+const uint64 EditFstImpl<A, WrappedFstT, MutableFstT>::kStaticProperties;
+
+// EditFstImpl IMPLEMENTATION STARTS HERE
+
+template<typename A, typename WrappedFstT, typename MutableFstT>
+inline void EditFstImpl<A, WrappedFstT, MutableFstT>::DeleteStates() {
+ data_->DeleteStates();
+ delete wrapped_;
+ // we are deleting all states, so just forget about pointer to wrapped_
+ // and do what default constructor does: set wrapped_ to a new VectorFst
+ wrapped_ = new MutableFstT();
+ uint64 newProps = DeleteAllStatesProperties(FstImpl<A>::Properties(),
+ kStaticProperties);
+ FstImpl<A>::SetProperties(newProps);
+}
+
+template <typename A, typename WrappedFstT, typename MutableFstT>
+EditFstImpl<A, WrappedFstT, MutableFstT> *
+EditFstImpl<A, WrappedFstT, MutableFstT>::Read(istream &strm,
+ const FstReadOptions &opts) {
+ EditFstImpl<A, WrappedFstT, MutableFstT> *impl = new EditFstImpl();
+ FstHeader hdr;
+ if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
+ return 0;
+ }
+ impl->SetStart(hdr.Start());
+
+ // first, read in wrapped fst
+ FstReadOptions wrapped_opts(opts);
+ wrapped_opts.header = 0; // Contained header was written out, so read it in.
+ Fst<A> *wrapped_fst = Fst<A>::Read(strm, wrapped_opts);
+ if (!wrapped_fst) {
+ return 0;
+ }
+ impl->wrapped_ = static_cast<WrappedFstT *>(wrapped_fst);
+
+ impl->data_ = EditFstData<A, WrappedFstT, MutableFstT>::Read(strm, opts);
+
+ if (!impl->data_) {
+ delete wrapped_fst;
+ return 0;
+ }
+
+ return impl;
+}
+
+// END EditFstImpl IMPLEMENTATION
+
+// Concrete, editable FST. This class attaches interface to implementation.
+template <typename A,
+ typename WrappedFstT = ExpandedFst<A>,
+ typename MutableFstT = VectorFst<A> >
+class EditFst :
+ public ImplToMutableFst< EditFstImpl<A, WrappedFstT, MutableFstT> > {
+ public:
+ friend class MutableArcIterator< EditFst<A, WrappedFstT, MutableFstT> >;
+
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef EditFstImpl<A, WrappedFstT, MutableFstT> Impl;
+
+ EditFst() : ImplToMutableFst<Impl>(new Impl()) {}
+
+ explicit EditFst(const Fst<A> &fst) :
+ ImplToMutableFst<Impl>(new Impl(fst)) {}
+
+ explicit EditFst(const WrappedFstT &fst) :
+ ImplToMutableFst<Impl>(new Impl(fst)) {}
+
+ // See Fst<>::Copy() for doc.
+ EditFst(const EditFst<A, WrappedFstT, MutableFstT> &fst, bool safe = false) :
+ ImplToMutableFst<Impl>(fst, safe) {}
+
+ virtual ~EditFst() {}
+
+ // Get a copy of this EditFst. See Fst<>::Copy() for further doc.
+ virtual EditFst<A, WrappedFstT, MutableFstT> *Copy(bool safe = false) const {
+ return new EditFst<A, WrappedFstT, MutableFstT>(*this, safe);
+ }
+
+ EditFst<A, WrappedFstT, MutableFstT> &
+ operator=(const EditFst<A, WrappedFstT, MutableFstT> &fst) {
+ SetImpl(fst.GetImpl(), false);
+ return *this;
+ }
+
+ virtual EditFst<A, WrappedFstT, MutableFstT> &operator=(const Fst<A> &fst) {
+ if (this != &fst) {
+ SetImpl(new Impl(fst));
+ }
+ return *this;
+ }
+
+ // Read an EditFst from an input stream; return NULL on error.
+ static EditFst<A, WrappedFstT, MutableFstT> *
+ Read(istream &strm,
+ const FstReadOptions &opts) {
+ Impl* impl = Impl::Read(strm, opts);
+ return impl ? new EditFst<A>(impl) : 0;
+ }
+
+ // Read an EditFst from a file; return NULL on error.
+ // Empty filename reads from standard input.
+ static EditFst<A, WrappedFstT, MutableFstT> *Read(const string &filename) {
+ Impl* impl = ImplToExpandedFst<Impl, MutableFst<A> >::Read(filename);
+ return impl ? new EditFst<A, WrappedFstT, MutableFstT>(impl) : 0;
+ }
+
+ virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ return GetImpl()->Write(strm, opts);
+ }
+
+ virtual bool Write(const string &filename) const {
+ return Fst<A>::WriteFile(filename);
+ }
+
+ virtual void InitStateIterator(StateIteratorData<Arc> *data) const {
+ GetImpl()->InitStateIterator(data);
+ }
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ virtual
+ void InitMutableArcIterator(StateId s, MutableArcIteratorData<A> *data) {
+ GetImpl()->InitMutableArcIterator(s, data);
+ }
+ private:
+ explicit EditFst(Impl *impl) : ImplToMutableFst<Impl>(impl) {}
+
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst< Impl, MutableFst<A> >::GetImpl(); }
+
+ void SetImpl(Impl *impl, bool own_impl = true) {
+ ImplToFst< Impl, MutableFst<A> >::SetImpl(impl, own_impl);
+ }
+};
+
+} // namespace fst
+
+#endif // FST_LIB_EDIT_FST_H_
diff --git a/src/include/fst/encode.h b/src/include/fst/encode.h
new file mode 100644
index 0000000..7245b45
--- /dev/null
+++ b/src/include/fst/encode.h
@@ -0,0 +1,599 @@
+// encode.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: johans@google.com (Johan Schalkwyk)
+//
+// \file
+// Class to encode and decoder an fst.
+
+#ifndef FST_LIB_ENCODE_H__
+#define FST_LIB_ENCODE_H__
+
+#include <climits>
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/arc-map.h>
+#include <fst/rmfinalepsilon.h>
+
+
+namespace fst {
+
+static const uint32 kEncodeLabels = 0x0001;
+static const uint32 kEncodeWeights = 0x0002;
+static const uint32 kEncodeFlags = 0x0003; // All non-internal flags
+
+static const uint32 kEncodeHasISymbols = 0x0004; // For internal use
+static const uint32 kEncodeHasOSymbols = 0x0008; // For internal use
+
+enum EncodeType { ENCODE = 1, DECODE = 2 };
+
+// Identifies stream data as an encode table (and its endianity)
+static const int32 kEncodeMagicNumber = 2129983209;
+
+
+// The following class encapsulates implementation details for the
+// encoding and decoding of label/weight tuples used for encoding
+// and decoding of Fsts. The EncodeTable is bidirectional. I.E it
+// stores both the Tuple of encode labels and weights to a unique
+// label, and the reverse.
+template <class A> class EncodeTable {
+ public:
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+
+ // Encoded data consists of arc input/output labels and arc weight
+ struct Tuple {
+ Tuple() {}
+ Tuple(Label ilabel_, Label olabel_, Weight weight_)
+ : ilabel(ilabel_), olabel(olabel_), weight(weight_) {}
+ Tuple(const Tuple& tuple)
+ : ilabel(tuple.ilabel), olabel(tuple.olabel), weight(tuple.weight) {}
+
+ Label ilabel;
+ Label olabel;
+ Weight weight;
+ };
+
+ // Comparison object for hashing EncodeTable Tuple(s).
+ class TupleEqual {
+ public:
+ bool operator()(const Tuple* x, const Tuple* y) const {
+ return (x->ilabel == y->ilabel &&
+ x->olabel == y->olabel &&
+ x->weight == y->weight);
+ }
+ };
+
+ // Hash function for EncodeTabe Tuples. Based on the encode flags
+ // we either hash the labels, weights or combination of them.
+ class TupleKey {
+ public:
+ TupleKey()
+ : encode_flags_(kEncodeLabels | kEncodeWeights) {}
+
+ TupleKey(const TupleKey& key)
+ : encode_flags_(key.encode_flags_) {}
+
+ explicit TupleKey(uint32 encode_flags)
+ : encode_flags_(encode_flags) {}
+
+ size_t operator()(const Tuple* x) const {
+ size_t hash = x->ilabel;
+ const int lshift = 5;
+ const int rshift = CHAR_BIT * sizeof(size_t) - 5;
+ if (encode_flags_ & kEncodeLabels)
+ hash = hash << lshift ^ hash >> rshift ^ x->olabel;
+ if (encode_flags_ & kEncodeWeights)
+ hash = hash << lshift ^ hash >> rshift ^ x->weight.Hash();
+ return hash;
+ }
+
+ private:
+ int32 encode_flags_;
+ };
+
+ typedef unordered_map<const Tuple*,
+ Label,
+ TupleKey,
+ TupleEqual> EncodeHash;
+
+ explicit EncodeTable(uint32 encode_flags)
+ : flags_(encode_flags),
+ encode_hash_(1024, TupleKey(encode_flags)),
+ isymbols_(0), osymbols_(0) {}
+
+ ~EncodeTable() {
+ for (size_t i = 0; i < encode_tuples_.size(); ++i) {
+ delete encode_tuples_[i];
+ }
+ delete isymbols_;
+ delete osymbols_;
+ }
+
+ // Given an arc encode either input/ouptut labels or input/costs or both
+ Label Encode(const A &arc) {
+ const Tuple tuple(arc.ilabel,
+ flags_ & kEncodeLabels ? arc.olabel : 0,
+ flags_ & kEncodeWeights ? arc.weight : Weight::One());
+ typename EncodeHash::const_iterator it = encode_hash_.find(&tuple);
+ if (it == encode_hash_.end()) {
+ encode_tuples_.push_back(new Tuple(tuple));
+ encode_hash_[encode_tuples_.back()] = encode_tuples_.size();
+ return encode_tuples_.size();
+ } else {
+ return it->second;
+ }
+ }
+
+ // Given an arc, look up its encoded label. Returns kNoLabel if not found.
+ Label GetLabel(const A &arc) const {
+ const Tuple tuple(arc.ilabel,
+ flags_ & kEncodeLabels ? arc.olabel : 0,
+ flags_ & kEncodeWeights ? arc.weight : Weight::One());
+ typename EncodeHash::const_iterator it = encode_hash_.find(&tuple);
+ if (it == encode_hash_.end()) {
+ return kNoLabel;
+ } else {
+ return it->second;
+ }
+ }
+
+ // Given an encode arc Label decode back to input/output labels and costs
+ const Tuple* Decode(Label key) const {
+ if (key < 1 || key > encode_tuples_.size()) {
+ LOG(ERROR) << "EncodeTable::Decode: unknown decode key: " << key;
+ return 0;
+ }
+ return encode_tuples_[key - 1];
+ }
+
+ size_t Size() const { return encode_tuples_.size(); }
+
+ bool Write(ostream &strm, const string &source) const;
+
+ static EncodeTable<A> *Read(istream &strm, const string &source);
+
+ const uint32 flags() const { return flags_ & kEncodeFlags; }
+
+ int RefCount() const { return ref_count_.count(); }
+ int IncrRefCount() { return ref_count_.Incr(); }
+ int DecrRefCount() { return ref_count_.Decr(); }
+
+
+ SymbolTable *InputSymbols() const { return isymbols_; }
+
+ SymbolTable *OutputSymbols() const { return osymbols_; }
+
+ void SetInputSymbols(const SymbolTable* syms) {
+ if (isymbols_) delete isymbols_;
+ if (syms) {
+ isymbols_ = syms->Copy();
+ flags_ |= kEncodeHasISymbols;
+ } else {
+ isymbols_ = 0;
+ flags_ &= ~kEncodeHasISymbols;
+ }
+ }
+
+ void SetOutputSymbols(const SymbolTable* syms) {
+ if (osymbols_) delete osymbols_;
+ if (syms) {
+ osymbols_ = syms->Copy();
+ flags_ |= kEncodeHasOSymbols;
+ } else {
+ osymbols_ = 0;
+ flags_ &= ~kEncodeHasOSymbols;
+ }
+ }
+
+ private:
+ uint32 flags_;
+ vector<Tuple*> encode_tuples_;
+ EncodeHash encode_hash_;
+ RefCounter ref_count_;
+ SymbolTable *isymbols_; // Pre-encoded ilabel symbol table
+ SymbolTable *osymbols_; // Pre-encoded olabel symbol table
+
+ DISALLOW_COPY_AND_ASSIGN(EncodeTable);
+};
+
+template <class A> inline
+bool EncodeTable<A>::Write(ostream &strm, const string &source) const {
+ WriteType(strm, kEncodeMagicNumber);
+ WriteType(strm, flags_);
+ int64 size = encode_tuples_.size();
+ WriteType(strm, size);
+ for (size_t i = 0; i < size; ++i) {
+ const Tuple* tuple = encode_tuples_[i];
+ WriteType(strm, tuple->ilabel);
+ WriteType(strm, tuple->olabel);
+ tuple->weight.Write(strm);
+ }
+
+ if (flags_ & kEncodeHasISymbols)
+ isymbols_->Write(strm);
+
+ if (flags_ & kEncodeHasOSymbols)
+ osymbols_->Write(strm);
+
+ strm.flush();
+ if (!strm) {
+ LOG(ERROR) << "EncodeTable::Write: write failed: " << source;
+ return false;
+ }
+ return true;
+}
+
+template <class A> inline
+EncodeTable<A> *EncodeTable<A>::Read(istream &strm, const string &source) {
+ int32 magic_number = 0;
+ ReadType(strm, &magic_number);
+ if (magic_number != kEncodeMagicNumber) {
+ LOG(ERROR) << "EncodeTable::Read: Bad encode table header: " << source;
+ return 0;
+ }
+ uint32 flags;
+ ReadType(strm, &flags);
+ EncodeTable<A> *table = new EncodeTable<A>(flags);
+
+ int64 size;
+ ReadType(strm, &size);
+ if (!strm) {
+ LOG(ERROR) << "EncodeTable::Read: read failed: " << source;
+ return 0;
+ }
+
+ for (size_t i = 0; i < size; ++i) {
+ Tuple* tuple = new Tuple();
+ ReadType(strm, &tuple->ilabel);
+ ReadType(strm, &tuple->olabel);
+ tuple->weight.Read(strm);
+ if (!strm) {
+ LOG(ERROR) << "EncodeTable::Read: read failed: " << source;
+ return 0;
+ }
+ table->encode_tuples_.push_back(tuple);
+ table->encode_hash_[table->encode_tuples_.back()] =
+ table->encode_tuples_.size();
+ }
+
+ if (flags & kEncodeHasISymbols)
+ table->isymbols_ = SymbolTable::Read(strm, source);
+
+ if (flags & kEncodeHasOSymbols)
+ table->osymbols_ = SymbolTable::Read(strm, source);
+
+ return table;
+}
+
+
+// A mapper to encode/decode weighted transducers. Encoding of an
+// Fst is useful for performing classical determinization or minimization
+// on a weighted transducer by treating it as an unweighted acceptor over
+// encoded labels.
+//
+// The Encode mapper stores the encoding in a local hash table (EncodeTable)
+// This table is shared (and reference counted) between the encoder and
+// decoder. A decoder has read only access to the EncodeTable.
+//
+// The EncodeMapper allows on the fly encoding of the machine. As the
+// EncodeTable is generated the same table may by used to decode the machine
+// on the fly. For example in the following sequence of operations
+//
+// Encode -> Determinize -> Decode
+//
+// we will use the encoding table generated during the encode step in the
+// decode, even though the encoding is not complete.
+//
+template <class A> class EncodeMapper {
+ typedef typename A::Weight Weight;
+ typedef typename A::Label Label;
+ public:
+ EncodeMapper(uint32 flags, EncodeType type)
+ : flags_(flags),
+ type_(type),
+ table_(new EncodeTable<A>(flags)),
+ error_(false) {}
+
+ EncodeMapper(const EncodeMapper& mapper)
+ : flags_(mapper.flags_),
+ type_(mapper.type_),
+ table_(mapper.table_),
+ error_(false) {
+ table_->IncrRefCount();
+ }
+
+ // Copy constructor but setting the type, typically to DECODE
+ EncodeMapper(const EncodeMapper& mapper, EncodeType type)
+ : flags_(mapper.flags_),
+ type_(type),
+ table_(mapper.table_),
+ error_(mapper.error_) {
+ table_->IncrRefCount();
+ }
+
+ ~EncodeMapper() {
+ if (!table_->DecrRefCount()) delete table_;
+ }
+
+ A operator()(const A &arc);
+
+ MapFinalAction FinalAction() const {
+ return (type_ == ENCODE && (flags_ & kEncodeWeights)) ?
+ MAP_REQUIRE_SUPERFINAL : MAP_NO_SUPERFINAL;
+ }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;}
+
+ uint64 Properties(uint64 inprops) {
+ uint64 outprops = inprops;
+ if (error_) outprops |= kError;
+
+ uint64 mask = kFstProperties;
+ if (flags_ & kEncodeLabels)
+ mask &= kILabelInvariantProperties & kOLabelInvariantProperties;
+ if (flags_ & kEncodeWeights)
+ mask &= kILabelInvariantProperties & kWeightInvariantProperties &
+ (type_ == ENCODE ? kAddSuperFinalProperties :
+ kRmSuperFinalProperties);
+
+ return outprops & mask;
+ }
+
+ const uint32 flags() const { return flags_; }
+ const EncodeType type() const { return type_; }
+ const EncodeTable<A> &table() const { return *table_; }
+
+ bool Write(ostream &strm, const string& source) {
+ return table_->Write(strm, source);
+ }
+
+ bool Write(const string& filename) {
+ ofstream strm(filename.c_str(), ofstream::out | ofstream::binary);
+ if (!strm) {
+ LOG(ERROR) << "EncodeMap: Can't open file: " << filename;
+ return false;
+ }
+ return Write(strm, filename);
+ }
+
+ static EncodeMapper<A> *Read(istream &strm,
+ const string& source,
+ EncodeType type = ENCODE) {
+ EncodeTable<A> *table = EncodeTable<A>::Read(strm, source);
+ return table ? new EncodeMapper(table->flags(), type, table) : 0;
+ }
+
+ static EncodeMapper<A> *Read(const string& filename,
+ EncodeType type = ENCODE) {
+ ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
+ if (!strm) {
+ LOG(ERROR) << "EncodeMap: Can't open file: " << filename;
+ return NULL;
+ }
+ return Read(strm, filename, type);
+ }
+
+ SymbolTable *InputSymbols() const { return table_->InputSymbols(); }
+
+ SymbolTable *OutputSymbols() const { return table_->OutputSymbols(); }
+
+ void SetInputSymbols(const SymbolTable* syms) {
+ table_->SetInputSymbols(syms);
+ }
+
+ void SetOutputSymbols(const SymbolTable* syms) {
+ table_->SetOutputSymbols(syms);
+ }
+
+ private:
+ uint32 flags_;
+ EncodeType type_;
+ EncodeTable<A>* table_;
+ bool error_;
+
+ explicit EncodeMapper(uint32 flags, EncodeType type, EncodeTable<A> *table)
+ : flags_(flags), type_(type), table_(table) {}
+ void operator=(const EncodeMapper &); // Disallow.
+};
+
+template <class A> inline
+A EncodeMapper<A>::operator()(const A &arc) {
+ if (type_ == ENCODE) { // labels and/or weights to single label
+ if ((arc.nextstate == kNoStateId && !(flags_ & kEncodeWeights)) ||
+ (arc.nextstate == kNoStateId && (flags_ & kEncodeWeights) &&
+ arc.weight == Weight::Zero())) {
+ return arc;
+ } else {
+ Label label = table_->Encode(arc);
+ return A(label,
+ flags_ & kEncodeLabels ? label : arc.olabel,
+ flags_ & kEncodeWeights ? Weight::One() : arc.weight,
+ arc.nextstate);
+ }
+ } else { // type_ == DECODE
+ if (arc.nextstate == kNoStateId) {
+ return arc;
+ } else {
+ if (arc.ilabel == 0) return arc;
+ if (flags_ & kEncodeLabels && arc.ilabel != arc.olabel) {
+ FSTERROR() << "EncodeMapper: Label-encoded arc has different "
+ "input and output labels";
+ error_ = true;
+ }
+ if (flags_ & kEncodeWeights && arc.weight != Weight::One()) {
+ FSTERROR() <<
+ "EncodeMapper: Weight-encoded arc has non-trivial weight";
+ error_ = true;
+ }
+ const typename EncodeTable<A>::Tuple* tuple = table_->Decode(arc.ilabel);
+ if (!tuple) {
+ FSTERROR() << "EncodeMapper: decode failed";
+ error_ = true;
+ return A(kNoLabel, kNoLabel, Weight::NoWeight(), arc.nextstate);
+ } else {
+ return A(tuple->ilabel,
+ flags_ & kEncodeLabels ? tuple->olabel : arc.olabel,
+ flags_ & kEncodeWeights ? tuple->weight : arc.weight,
+ arc.nextstate);
+ }
+ }
+ }
+}
+
+
+// Complexity: O(nstates + narcs)
+template<class A> inline
+void Encode(MutableFst<A> *fst, EncodeMapper<A>* mapper) {
+ mapper->SetInputSymbols(fst->InputSymbols());
+ mapper->SetOutputSymbols(fst->OutputSymbols());
+ ArcMap(fst, mapper);
+}
+
+template<class A> inline
+void Decode(MutableFst<A>* fst, const EncodeMapper<A>& mapper) {
+ ArcMap(fst, EncodeMapper<A>(mapper, DECODE));
+ RmFinalEpsilon(fst);
+ fst->SetInputSymbols(mapper.InputSymbols());
+ fst->SetOutputSymbols(mapper.OutputSymbols());
+}
+
+
+// On the fly label and/or weight encoding of input Fst
+//
+// Complexity:
+// - Constructor: O(1)
+// - Traversal: O(nstates_visited + narcs_visited), assuming constant
+// time to visit an input state or arc.
+template <class A>
+class EncodeFst : public ArcMapFst<A, A, EncodeMapper<A> > {
+ public:
+ typedef A Arc;
+ typedef EncodeMapper<A> C;
+ typedef ArcMapFstImpl< A, A, EncodeMapper<A> > Impl;
+ using ImplToFst<Impl>::GetImpl;
+
+ EncodeFst(const Fst<A> &fst, EncodeMapper<A>* encoder)
+ : ArcMapFst<A, A, C>(fst, encoder, ArcMapFstOptions()) {
+ encoder->SetInputSymbols(fst.InputSymbols());
+ encoder->SetOutputSymbols(fst.OutputSymbols());
+ }
+
+ EncodeFst(const Fst<A> &fst, const EncodeMapper<A>& encoder)
+ : ArcMapFst<A, A, C>(fst, encoder, ArcMapFstOptions()) {}
+
+ // See Fst<>::Copy() for doc.
+ EncodeFst(const EncodeFst<A> &fst, bool copy = false)
+ : ArcMapFst<A, A, C>(fst, copy) {}
+
+ // Get a copy of this EncodeFst. See Fst<>::Copy() for further doc.
+ virtual EncodeFst<A> *Copy(bool safe = false) const {
+ if (safe) {
+ FSTERROR() << "EncodeFst::Copy(true): not allowed.";
+ GetImpl()->SetProperties(kError, kError);
+ }
+ return new EncodeFst(*this);
+ }
+};
+
+
+// On the fly label and/or weight encoding of input Fst
+//
+// Complexity:
+// - Constructor: O(1)
+// - Traversal: O(nstates_visited + narcs_visited), assuming constant
+// time to visit an input state or arc.
+template <class A>
+class DecodeFst : public ArcMapFst<A, A, EncodeMapper<A> > {
+ public:
+ typedef A Arc;
+ typedef EncodeMapper<A> C;
+ typedef ArcMapFstImpl< A, A, EncodeMapper<A> > Impl;
+ using ImplToFst<Impl>::GetImpl;
+
+ DecodeFst(const Fst<A> &fst, const EncodeMapper<A>& encoder)
+ : ArcMapFst<A, A, C>(fst,
+ EncodeMapper<A>(encoder, DECODE),
+ ArcMapFstOptions()) {
+ GetImpl()->SetInputSymbols(encoder.InputSymbols());
+ GetImpl()->SetOutputSymbols(encoder.OutputSymbols());
+ }
+
+ // See Fst<>::Copy() for doc.
+ DecodeFst(const DecodeFst<A> &fst, bool safe = false)
+ : ArcMapFst<A, A, C>(fst, safe) {}
+
+ // Get a copy of this DecodeFst. See Fst<>::Copy() for further doc.
+ virtual DecodeFst<A> *Copy(bool safe = false) const {
+ return new DecodeFst(*this, safe);
+ }
+};
+
+
+// Specialization for EncodeFst.
+template <class A>
+class StateIterator< EncodeFst<A> >
+ : public StateIterator< ArcMapFst<A, A, EncodeMapper<A> > > {
+ public:
+ explicit StateIterator(const EncodeFst<A> &fst)
+ : StateIterator< ArcMapFst<A, A, EncodeMapper<A> > >(fst) {}
+};
+
+
+// Specialization for EncodeFst.
+template <class A>
+class ArcIterator< EncodeFst<A> >
+ : public ArcIterator< ArcMapFst<A, A, EncodeMapper<A> > > {
+ public:
+ ArcIterator(const EncodeFst<A> &fst, typename A::StateId s)
+ : ArcIterator< ArcMapFst<A, A, EncodeMapper<A> > >(fst, s) {}
+};
+
+
+// Specialization for DecodeFst.
+template <class A>
+class StateIterator< DecodeFst<A> >
+ : public StateIterator< ArcMapFst<A, A, EncodeMapper<A> > > {
+ public:
+ explicit StateIterator(const DecodeFst<A> &fst)
+ : StateIterator< ArcMapFst<A, A, EncodeMapper<A> > >(fst) {}
+};
+
+
+// Specialization for DecodeFst.
+template <class A>
+class ArcIterator< DecodeFst<A> >
+ : public ArcIterator< ArcMapFst<A, A, EncodeMapper<A> > > {
+ public:
+ ArcIterator(const DecodeFst<A> &fst, typename A::StateId s)
+ : ArcIterator< ArcMapFst<A, A, EncodeMapper<A> > >(fst, s) {}
+};
+
+
+// Useful aliases when using StdArc.
+typedef EncodeFst<StdArc> StdEncodeFst;
+
+typedef DecodeFst<StdArc> StdDecodeFst;
+
+} // namespace fst
+
+#endif // FST_LIB_ENCODE_H__
diff --git a/src/include/fst/epsnormalize.h b/src/include/fst/epsnormalize.h
new file mode 100644
index 0000000..696242b
--- /dev/null
+++ b/src/include/fst/epsnormalize.h
@@ -0,0 +1,74 @@
+// epsnormalize.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Function that implements epsilon normalization.
+
+#ifndef FST_LIB_EPSNORMALIZE_H__
+#define FST_LIB_EPSNORMALIZE_H__
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <fst/slist.h>
+
+
+#include <fst/factor-weight.h>
+#include <fst/invert.h>
+#include <fst/arc-map.h>
+#include <fst/rmepsilon.h>
+
+
+namespace fst {
+
+enum EpsNormalizeType {EPS_NORM_INPUT, EPS_NORM_OUTPUT};
+
+// Returns an equivalent FST that is epsilon-normalized. An acceptor is
+// epsilon-normalized if it is epsilon-removed. A transducer is input
+// epsilon-normalized if additionally if on each path any epsilon input
+// label follows all non-epsilon input labels. Output epsilon-normalized
+// is defined similarly.
+//
+// The input FST needs to be functional.
+//
+// References:
+// - Mehryar Mohri. "Generic epsilon-removal and input epsilon-normalization
+// algorithms for weighted transducers", International Journal of Computer
+// Science, 13(1): 129-143, 2002.
+template <class Arc>
+void EpsNormalize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst,
+ EpsNormalizeType type = EPS_NORM_INPUT) {
+ VectorFst< GallicArc<Arc, STRING_RIGHT_RESTRICT> > gfst;
+ if (type == EPS_NORM_INPUT)
+ ArcMap(ifst, &gfst, ToGallicMapper<Arc, STRING_RIGHT_RESTRICT>());
+ else // type == EPS_NORM_OUTPUT
+ ArcMap(InvertFst<Arc>(ifst), &gfst,
+ ToGallicMapper<Arc, STRING_RIGHT_RESTRICT>());
+ RmEpsilon(&gfst);
+ FactorWeightFst< GallicArc<Arc, STRING_RIGHT_RESTRICT>,
+ GallicFactor<typename Arc::Label,
+ typename Arc::Weight, STRING_RIGHT_RESTRICT> >
+ fwfst(gfst);
+ ArcMap(fwfst, ofst, FromGallicMapper<Arc, STRING_RIGHT_RESTRICT>());
+ ofst->SetOutputSymbols(ifst.OutputSymbols());
+ if(type == EPS_NORM_OUTPUT)
+ Invert(ofst);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_EPSNORMALIZE_H__
diff --git a/src/include/fst/equal.h b/src/include/fst/equal.h
new file mode 100644
index 0000000..33be198
--- /dev/null
+++ b/src/include/fst/equal.h
@@ -0,0 +1,124 @@
+// test.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Function to test equality of two Fsts.
+
+#ifndef FST_LIB_EQUAL_H__
+#define FST_LIB_EQUAL_H__
+
+#include <fst/fst.h>
+
+
+namespace fst {
+
+// Tests if two Fsts have the same states and arcs in the same order.
+template<class Arc>
+bool Equal(const Fst<Arc> &fst1, const Fst<Arc> &fst2, float delta = kDelta) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ if (fst1.Start() != fst2.Start()) {
+ VLOG(1) << "Equal: mismatched start states";
+ return false;
+ }
+
+ StateIterator< Fst<Arc> > siter1(fst1);
+ StateIterator< Fst<Arc> > siter2(fst2);
+
+ while (!siter1.Done() || !siter2.Done()) {
+ if (siter1.Done() || siter2.Done()) {
+ VLOG(1) << "Equal: mismatched # of states";
+ return false;
+ }
+ StateId s1 = siter1.Value();
+ StateId s2 = siter2.Value();
+ if (s1 != s2) {
+ VLOG(1) << "Equal: mismatched states:"
+ << ", state1 = " << s1
+ << ", state2 = " << s2;
+ return false;
+ }
+ Weight final1 = fst1.Final(s1);
+ Weight final2 = fst2.Final(s2);
+ if (!ApproxEqual(final1, final2, delta)) {
+ VLOG(1) << "Equal: mismatched final weights:"
+ << " state = " << s1
+ << ", final1 = " << final1
+ << ", final2 = " << final2;
+ return false;
+ }
+ ArcIterator< Fst<Arc> > aiter1(fst1, s1);
+ ArcIterator< Fst<Arc> > aiter2(fst2, s2);
+ for (size_t a = 0; !aiter1.Done() || !aiter2.Done(); ++a) {
+ if (aiter1.Done() || aiter2.Done()) {
+ VLOG(1) << "Equal: mismatched # of arcs"
+ << " state = " << s1;
+ return false;
+ }
+ Arc arc1 = aiter1.Value();
+ Arc arc2 = aiter2.Value();
+ if (arc1.ilabel != arc2.ilabel) {
+ VLOG(1) << "Equal: mismatched arc input labels:"
+ << " state = " << s1
+ << ", arc = " << a
+ << ", ilabel1 = " << arc1.ilabel
+ << ", ilabel2 = " << arc2.ilabel;
+ return false;
+ } else if (arc1.olabel != arc2.olabel) {
+ VLOG(1) << "Equal: mismatched arc output labels:"
+ << " state = " << s1
+ << ", arc = " << a
+ << ", olabel1 = " << arc1.olabel
+ << ", olabel2 = " << arc2.olabel;
+ return false;
+ } else if (!ApproxEqual(arc1.weight, arc2.weight, delta)) {
+ VLOG(1) << "Equal: mismatched arc weights:"
+ << " state = " << s1
+ << ", arc = " << a
+ << ", weight1 = " << arc1.weight
+ << ", weight2 = " << arc2.weight;
+ return false;
+ } else if (arc1.nextstate != arc2.nextstate) {
+ VLOG(1) << "Equal: mismatched input label:"
+ << " state = " << s1
+ << ", arc = " << a
+ << ", nextstate1 = " << arc1.nextstate
+ << ", nextstate2 = " << arc2.nextstate;
+ return false;
+ }
+ aiter1.Next();
+ aiter2.Next();
+
+ }
+ // Sanity checks: should never fail
+ if (fst1.NumArcs(s1) != fst2.NumArcs(s2) ||
+ fst1.NumInputEpsilons(s1) != fst2.NumInputEpsilons(s2) ||
+ fst1.NumOutputEpsilons(s1) != fst2.NumOutputEpsilons(s2)) {
+ FSTERROR() << "Equal: inconsistent arc/epsilon counts";
+ }
+
+ siter1.Next();
+ siter2.Next();
+ }
+ return true;
+}
+
+} // namespace fst
+
+
+#endif // FST_LIB_EQUAL_H__
diff --git a/src/include/fst/equivalent.h b/src/include/fst/equivalent.h
new file mode 100644
index 0000000..f05ff87
--- /dev/null
+++ b/src/include/fst/equivalent.h
@@ -0,0 +1,274 @@
+// equivalent.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: wojciech@google.com (Wojciech Skut)
+//
+// \file Functions and classes to determine the equivalence of two
+// FSTs.
+
+#ifndef FST_LIB_EQUIVALENT_H__
+#define FST_LIB_EQUIVALENT_H__
+
+#include <algorithm>
+#include <deque>
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+#include <fst/encode.h>
+#include <fst/push.h>
+#include <fst/union-find.h>
+#include <fst/vector-fst.h>
+
+
+namespace fst {
+
+// Traits-like struct holding utility functions/typedefs/constants for
+// the equivalence algorithm.
+//
+// Encoding device: in order to make the statesets of the two acceptors
+// disjoint, we map Arc::StateId on the type MappedId. The states of
+// the first acceptor are mapped on odd numbers (s -> 2s + 1), and
+// those of the second one on even numbers (s -> 2s + 2). The number 0
+// is reserved for an implicit (non-final) 'dead state' (required for
+// the correct treatment of non-coaccessible states; kNoStateId is
+// mapped to kDeadState for both acceptors). The union-find algorithm
+// operates on the mapped IDs.
+template <class Arc>
+struct EquivalenceUtil {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef StateId MappedId; // ID for an equivalence class.
+
+ // MappedId for an implicit dead state.
+ static const MappedId kDeadState = 0;
+
+ // MappedId for lookup failure.
+ static const MappedId kInvalidId = -1;
+
+ // Maps state ID to the representative of the corresponding
+ // equivalence class. The parameter 'which_fst' takes the values 1
+ // and 2, identifying the input FST.
+ static MappedId MapState(StateId s, int32 which_fst) {
+ return
+ (kNoStateId == s)
+ ?
+ kDeadState
+ :
+ (static_cast<MappedId>(s) << 1) + which_fst;
+ }
+ // Maps set ID to State ID.
+ static StateId UnMapState(MappedId id) {
+ return static_cast<StateId>((--id) >> 1);
+ }
+ // Convenience function: checks if state with MappedId 's' is final
+ // in acceptor 'fa'.
+ static bool IsFinal(const Fst<Arc> &fa, MappedId s) {
+ return
+ (kDeadState == s) ?
+ false : (fa.Final(UnMapState(s)) != Weight::Zero());
+ }
+ // Convenience function: returns the representative of 'id' in 'sets',
+ // creating a new set if needed.
+ static MappedId FindSet(UnionFind<MappedId> *sets, MappedId id) {
+ MappedId repr = sets->FindSet(id);
+ if (repr != kInvalidId) {
+ return repr;
+ } else {
+ sets->MakeSet(id);
+ return id;
+ }
+ }
+};
+
+template <class Arc> const
+typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kDeadState;
+
+template <class Arc> const
+typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kInvalidId;
+
+
+// Equivalence checking algorithm: determines if the two FSTs
+// <code>fst1</code> and <code>fst2</code> are equivalent. The input
+// FSTs must be deterministic input-side epsilon-free acceptors,
+// unweighted or with weights over a left semiring. Two acceptors are
+// considered equivalent if they accept exactly the same set of
+// strings (with the same weights).
+//
+// The algorithm (cf. Aho, Hopcroft and Ullman, "The Design and
+// Analysis of Computer Programs") successively constructs sets of
+// states that can be reached by the same prefixes, starting with a
+// set containing the start states of both acceptors. A disjoint tree
+// forest (the union-find algorithm) is used to represent the sets of
+// states. The algorithm returns 'false' if one of the constructed
+// sets contains both final and non-final states. Returns optional error
+// value (when FLAGS_error_fatal = false).
+//
+// Complexity: quasi-linear, i.e. O(n G(n)), where
+// n = |S1| + |S2| is the number of states in both acceptors
+// G(n) is a very slowly growing function that can be approximated
+// by 4 by all practical purposes.
+//
+template <class Arc>
+bool Equivalent(const Fst<Arc> &fst1,
+ const Fst<Arc> &fst2,
+ double delta = kDelta, bool *error = 0) {
+ typedef typename Arc::Weight Weight;
+ if (error) *error = false;
+
+ // Check that the symbol table are compatible
+ if (!CompatSymbols(fst1.InputSymbols(), fst2.InputSymbols()) ||
+ !CompatSymbols(fst1.OutputSymbols(), fst2.OutputSymbols())) {
+ FSTERROR() << "Equivalent: input/output symbol tables of 1st argument "
+ << "do not match input/output symbol tables of 2nd argument";
+ if (error) *error = true;
+ return false;
+ }
+ // Check properties first:
+ uint64 props = kNoEpsilons | kIDeterministic | kAcceptor;
+ if (fst1.Properties(props, true) != props) {
+ FSTERROR() << "Equivalent: first argument not an"
+ << " epsilon-free deterministic acceptor";
+ if (error) *error = true;
+ return false;
+ }
+ if (fst2.Properties(props, true) != props) {
+ FSTERROR() << "Equivalent: second argument not an"
+ << " epsilon-free deterministic acceptor";
+ if (error) *error = true;
+ return false;
+ }
+
+ if ((fst1.Properties(kUnweighted , true) != kUnweighted)
+ || (fst2.Properties(kUnweighted , true) != kUnweighted)) {
+ VectorFst<Arc> efst1(fst1);
+ VectorFst<Arc> efst2(fst2);
+ Push(&efst1, REWEIGHT_TO_INITIAL, delta);
+ Push(&efst2, REWEIGHT_TO_INITIAL, delta);
+ ArcMap(&efst1, QuantizeMapper<Arc>(delta));
+ ArcMap(&efst2, QuantizeMapper<Arc>(delta));
+ EncodeMapper<Arc> mapper(kEncodeWeights|kEncodeLabels, ENCODE);
+ ArcMap(&efst1, &mapper);
+ ArcMap(&efst2, &mapper);
+ return Equivalent(efst1, efst2);
+ }
+
+ // Convenience typedefs:
+ typedef typename Arc::StateId StateId;
+ typedef EquivalenceUtil<Arc> Util;
+ typedef typename Util::MappedId MappedId;
+ enum { FST1 = 1, FST2 = 2 }; // Required by Util::MapState(...)
+
+ MappedId s1 = Util::MapState(fst1.Start(), FST1);
+ MappedId s2 = Util::MapState(fst2.Start(), FST2);
+
+ // The union-find structure.
+ UnionFind<MappedId> eq_classes(1000, Util::kInvalidId);
+
+ // Initialize the union-find structure.
+ eq_classes.MakeSet(s1);
+ eq_classes.MakeSet(s2);
+
+ // Data structure for the (partial) acceptor transition function of
+ // fst1 and fst2: input labels mapped to pairs of MappedId's
+ // representing destination states of the corresponding arcs in fst1
+ // and fst2, respectively.
+ typedef
+ unordered_map<typename Arc::Label, pair<MappedId, MappedId> >
+ Label2StatePairMap;
+
+ Label2StatePairMap arc_pairs;
+
+ // Pairs of MappedId's to be processed, organized in a queue.
+ deque<pair<MappedId, MappedId> > q;
+
+ bool ret = true;
+ // Early return if the start states differ w.r.t. being final.
+ if (Util::IsFinal(fst1, s1) != Util::IsFinal(fst2, s2)) {
+ ret = false;
+ }
+
+ // Main loop: explores the two acceptors in a breadth-first manner,
+ // updating the equivalence relation on the statesets. Loop
+ // invariant: each block of states contains either final states only
+ // or non-final states only.
+ for (q.push_back(make_pair(s1, s2)); ret && !q.empty(); q.pop_front()) {
+ s1 = q.front().first;
+ s2 = q.front().second;
+
+ // Representatives of the equivalence classes of s1/s2.
+ MappedId rep1 = Util::FindSet(&eq_classes, s1);
+ MappedId rep2 = Util::FindSet(&eq_classes, s2);
+
+ if (rep1 != rep2) {
+ eq_classes.Union(rep1, rep2);
+ arc_pairs.clear();
+
+ // Copy outgoing arcs starting at s1 into the hashtable.
+ if (Util::kDeadState != s1) {
+ ArcIterator<Fst<Arc> > arc_iter(fst1, Util::UnMapState(s1));
+ for (; !arc_iter.Done(); arc_iter.Next()) {
+ const Arc &arc = arc_iter.Value();
+ if (arc.weight != Weight::Zero()) { // Zero-weight arcs
+ // are treated as
+ // non-exisitent.
+ arc_pairs[arc.ilabel].first = Util::MapState(arc.nextstate, FST1);
+ }
+ }
+ }
+ // Copy outgoing arcs starting at s2 into the hashtable.
+ if (Util::kDeadState != s2) {
+ ArcIterator<Fst<Arc> > arc_iter(fst2, Util::UnMapState(s2));
+ for (; !arc_iter.Done(); arc_iter.Next()) {
+ const Arc &arc = arc_iter.Value();
+ if (arc.weight != Weight::Zero()) { // Zero-weight arcs
+ // are treated as
+ // non-existent.
+ arc_pairs[arc.ilabel].second = Util::MapState(arc.nextstate, FST2);
+ }
+ }
+ }
+ // Iterate through the hashtable and process pairs of target
+ // states.
+ for (typename Label2StatePairMap::const_iterator
+ arc_iter = arc_pairs.begin();
+ arc_iter != arc_pairs.end();
+ ++arc_iter) {
+ const pair<MappedId, MappedId> &p = arc_iter->second;
+ if (Util::IsFinal(fst1, p.first) != Util::IsFinal(fst2, p.second)) {
+ // Detected inconsistency: return false.
+ ret = false;
+ break;
+ }
+ q.push_back(p);
+ }
+ }
+ }
+
+ if (fst1.Properties(kError, false) || fst2.Properties(kError, false)) {
+ if (error) *error = true;
+ return false;
+ }
+
+ return ret;
+}
+
+} // namespace fst
+
+#endif // FST_LIB_EQUIVALENT_H__
diff --git a/src/include/fst/expanded-fst.h b/src/include/fst/expanded-fst.h
new file mode 100644
index 0000000..b44b81c
--- /dev/null
+++ b/src/include/fst/expanded-fst.h
@@ -0,0 +1,189 @@
+// expanded-fst.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Generic FST augmented with state count - interface class definition.
+//
+
+#ifndef FST_LIB_EXPANDED_FST_H__
+#define FST_LIB_EXPANDED_FST_H__
+
+#include <sys/types.h>
+#include <string>
+
+#include <fst/fst.h>
+
+
+namespace fst {
+
+// A generic FST plus state count.
+template <class A>
+class ExpandedFst : public Fst<A> {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+
+ virtual StateId NumStates() const = 0; // State count
+
+ // Get a copy of this ExpandedFst. See Fst<>::Copy() for further doc.
+ virtual ExpandedFst<A> *Copy(bool safe = false) const = 0;
+
+ // Read an ExpandedFst from an input stream; return NULL on error.
+ static ExpandedFst<A> *Read(istream &strm, const FstReadOptions &opts) {
+ FstReadOptions ropts(opts);
+ FstHeader hdr;
+ if (ropts.header)
+ hdr = *opts.header;
+ else {
+ if (!hdr.Read(strm, opts.source))
+ return 0;
+ ropts.header = &hdr;
+ }
+ if (!(hdr.Properties() & kExpanded)) {
+ LOG(ERROR) << "ExpandedFst::Read: Not an ExpandedFst: " << ropts.source;
+ return 0;
+ }
+ FstRegister<A> *registr = FstRegister<A>::GetRegister();
+ const typename FstRegister<A>::Reader reader =
+ registr->GetReader(hdr.FstType());
+ if (!reader) {
+ LOG(ERROR) << "ExpandedFst::Read: Unknown FST type \"" << hdr.FstType()
+ << "\" (arc type = \"" << A::Type()
+ << "\"): " << ropts.source;
+ return 0;
+ }
+ Fst<A> *fst = reader(strm, ropts);
+ if (!fst) return 0;
+ return static_cast<ExpandedFst<A> *>(fst);
+ }
+
+ // Read an ExpandedFst from a file; return NULL on error.
+ // Empty filename reads from standard input.
+ static ExpandedFst<A> *Read(const string &filename) {
+ if (!filename.empty()) {
+ ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
+ if (!strm) {
+ LOG(ERROR) << "ExpandedFst::Read: Can't open file: " << filename;
+ return 0;
+ }
+ return Read(strm, FstReadOptions(filename));
+ } else {
+ return Read(std::cin, FstReadOptions("standard input"));
+ }
+ }
+};
+
+
+namespace internal {
+
+// ExpandedFst<A> case - abstract methods.
+template <class A> inline
+typename A::Weight Final(const ExpandedFst<A> &fst, typename A::StateId s) {
+ return fst.Final(s);
+}
+
+template <class A> inline
+ssize_t NumArcs(const ExpandedFst<A> &fst, typename A::StateId s) {
+ return fst.NumArcs(s);
+}
+
+template <class A> inline
+ssize_t NumInputEpsilons(const ExpandedFst<A> &fst, typename A::StateId s) {
+ return fst.NumInputEpsilons(s);
+}
+
+template <class A> inline
+ssize_t NumOutputEpsilons(const ExpandedFst<A> &fst, typename A::StateId s) {
+ return fst.NumOutputEpsilons(s);
+}
+
+} // namespace internal
+
+
+// A useful alias when using StdArc.
+typedef ExpandedFst<StdArc> StdExpandedFst;
+
+
+// This is a helper class template useful for attaching an ExpandedFst
+// interface to its implementation, handling reference counting. It
+// delegates to ImplToFst the handling of the Fst interface methods.
+template < class I, class F = ExpandedFst<typename I::Arc> >
+class ImplToExpandedFst : public ImplToFst<I, F> {
+ public:
+ typedef typename I::Arc Arc;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ using ImplToFst<I, F>::GetImpl;
+
+ virtual StateId NumStates() const { return GetImpl()->NumStates(); }
+
+ protected:
+ ImplToExpandedFst() : ImplToFst<I, F>() {}
+
+ ImplToExpandedFst(I *impl) : ImplToFst<I, F>(impl) {}
+
+ ImplToExpandedFst(const ImplToExpandedFst<I, F> &fst)
+ : ImplToFst<I, F>(fst) {}
+
+ ImplToExpandedFst(const ImplToExpandedFst<I, F> &fst, bool safe)
+ : ImplToFst<I, F>(fst, safe) {}
+
+ // Read FST implementation from a file; return NULL on error.
+ // Empty filename reads from standard input.
+ static I *Read(const string &filename) {
+ if (!filename.empty()) {
+ ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
+ if (!strm) {
+ LOG(ERROR) << "ExpandedFst::Read: Can't open file: " << filename;
+ return 0;
+ }
+ return I::Read(strm, FstReadOptions(filename));
+ } else {
+ return I::Read(std::cin, FstReadOptions("standard input"));
+ }
+ }
+
+ private:
+ // Disallow
+ ImplToExpandedFst<I, F> &operator=(const ImplToExpandedFst<I, F> &fst);
+
+ ImplToExpandedFst<I, F> &operator=(const Fst<Arc> &fst) {
+ FSTERROR() << "ImplToExpandedFst: Assignment operator disallowed";
+ GetImpl()->SetProperties(kError, kError);
+ return *this;
+ }
+};
+
+// Function to return the number of states in an FST, counting them
+// if necessary.
+template <class Arc>
+typename Arc::StateId CountStates(const Fst<Arc> &fst) {
+ if (fst.Properties(kExpanded, false)) {
+ const ExpandedFst<Arc> *efst = static_cast<const ExpandedFst<Arc> *>(&fst);
+ return efst->NumStates();
+ } else {
+ typename Arc::StateId nstates = 0;
+ for (StateIterator< Fst<Arc> > siter(fst); !siter.Done(); siter.Next())
+ ++nstates;
+ return nstates;
+ }
+}
+
+} // namespace fst
+
+#endif // FST_LIB_EXPANDED_FST_H__
diff --git a/src/include/fst/expectation-weight.h b/src/include/fst/expectation-weight.h
new file mode 100644
index 0000000..5226cad
--- /dev/null
+++ b/src/include/fst/expectation-weight.h
@@ -0,0 +1,142 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: krr@google.com (Kasturi Rangan Raghavan)
+// Inspiration: shumash@google.com (Masha Maria Shugrina)
+// \file
+// Expectation semiring as described by Jason Eisner:
+// See: doi=10.1.1.22.9398
+// Multiplex semiring operations and identities:
+// One: <One, Zero>
+// Zero: <Zero, Zero>
+// Plus: <a1, b1> + <a2, b2> = < (a1 + a2) , (b1 + b2) >
+// Times: <a1, b1> * <a2, b2> = < (a1 * a2) , [(a1 * b2) + (a2 * b1)] >
+// Division: Undefined (currently)
+//
+// Usually used to store the pair <probability, random_variable> so that
+// ShortestDistance[Fst<ArcTpl<ExpectationWeight<P, V> > >]
+// == < PosteriorProbability, Expected_Value[V] >
+
+#ifndef FST_LIB_EXPECTATION_WEIGHT_H_
+#define FST_LIB_EXPECTATION_WEIGHT_H_
+
+#include<string>
+
+#include <fst/pair-weight.h>
+
+
+namespace fst {
+
+// X1 is usually a probability weight like LogWeight
+// X2 is usually a random variable or vector
+// see SignedLogWeight or SparsePowerWeight
+//
+// If X1 is distinct from X2, it is required that there is an external
+// product between X1 and X2 and if both semriring are commutative, or
+// left or right semirings, then result must have those properties.
+template <class X1, class X2>
+class ExpectationWeight : public PairWeight<X1, X2> {
+ public:
+ using PairWeight<X1, X2>::Value1;
+ using PairWeight<X1, X2>::Value2;
+
+ using PairWeight<X1, X2>::Reverse;
+ using PairWeight<X1, X2>::Quantize;
+ using PairWeight<X1, X2>::Member;
+
+ typedef X1 W1;
+ typedef X2 W2;
+
+ typedef ExpectationWeight<typename X1::ReverseWeight,
+ typename X2::ReverseWeight> ReverseWeight;
+
+ ExpectationWeight() : PairWeight<X1, X2>(Zero()) { }
+
+ ExpectationWeight(const ExpectationWeight<X1, X2>& w)
+ : PairWeight<X1, X2> (w) { }
+
+ ExpectationWeight(const PairWeight<X1, X2>& w)
+ : PairWeight<X1, X2> (w) { }
+
+ ExpectationWeight(const X1& x1, const X2& x2)
+ : PairWeight<X1, X2>(x1, x2) { }
+
+ static const ExpectationWeight<X1, X2> &Zero() {
+ static const ExpectationWeight<X1, X2> zero(X1::Zero(), X2::Zero());
+ return zero;
+ }
+
+ static const ExpectationWeight<X1, X2> &One() {
+ static const ExpectationWeight<X1, X2> one(X1::One(), X2::Zero());
+ return one;
+ }
+
+ static const ExpectationWeight<X1, X2> &NoWeight() {
+ static const ExpectationWeight<X1, X2> no_weight(X1::NoWeight(),
+ X2::NoWeight());
+ return no_weight;
+ }
+
+ static const string &Type() {
+ static const string type = "expectation_" + X1::Type() + "_" + X2::Type();
+ return type;
+ }
+
+ PairWeight<X1, X2> Quantize(float delta = kDelta) const {
+ return PairWeight<X1, X2>::Quantize();
+ }
+
+ ReverseWeight Reverse() const {
+ return PairWeight<X1, X2>::Reverse();
+ }
+
+ bool Member() const {
+ return PairWeight<X1, X2>::Member();
+ }
+
+ static uint64 Properties() {
+ uint64 props1 = W1::Properties();
+ uint64 props2 = W2::Properties();
+ return props1 & props2 & (kLeftSemiring | kRightSemiring |
+ kCommutative | kIdempotent);
+ }
+};
+
+template <class X1, class X2>
+inline ExpectationWeight<X1, X2> Plus(const ExpectationWeight<X1, X2> &w,
+ const ExpectationWeight<X1, X2> &v) {
+ return ExpectationWeight<X1, X2>(Plus(w.Value1(), v.Value1()),
+ Plus(w.Value2(), v.Value2()));
+}
+
+
+template <class X1, class X2>
+inline ExpectationWeight<X1, X2> Times(const ExpectationWeight<X1, X2> &w,
+ const ExpectationWeight<X1, X2> &v) {
+ return ExpectationWeight<X1, X2>(Times(w.Value1(), v.Value1()),
+ Plus(Times(w.Value1(), v.Value2()),
+ Times(w.Value2(), v.Value1())));
+}
+
+template <class X1, class X2>
+inline ExpectationWeight<X1, X2> Divide(const ExpectationWeight<X1, X2> &w,
+ const ExpectationWeight<X1, X2> &v,
+ DivideType typ = DIVIDE_ANY) {
+ FSTERROR() << "ExpectationWeight::Divide: not implemented";
+ return ExpectationWeight<X1, X2>::NoWeight();
+}
+
+} // namespace fst
+
+#endif // FST_LIB_EXPECTATION_WEIGHT_H_
diff --git a/src/include/fst/extensions/far/compile-strings.h b/src/include/fst/extensions/far/compile-strings.h
new file mode 100644
index 0000000..d7f4d6b
--- /dev/null
+++ b/src/include/fst/extensions/far/compile-strings.h
@@ -0,0 +1,271 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Authors: allauzen@google.com (Cyril Allauzen)
+// ttai@google.com (Terry Tai)
+// jpr@google.com (Jake Ratkiewicz)
+
+
+#ifndef FST_EXTENSIONS_FAR_COMPILE_STRINGS_H_
+#define FST_EXTENSIONS_FAR_COMPILE_STRINGS_H_
+
+#include <libgen.h>
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/extensions/far/far.h>
+#include <fst/string.h>
+
+namespace fst {
+
+// Construct a reader that provides FSTs from a file (stream) either on a
+// line-by-line basis or on a per-stream basis. Note that the freshly
+// constructed reader is already set to the first input.
+//
+// Sample Usage:
+// for (StringReader<Arc> reader(...); !reader.Done(); reader.Next()) {
+// Fst *fst = reader.GetVectorFst();
+// }
+template <class A>
+class StringReader {
+ public:
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename StringCompiler<A>::TokenType TokenType;
+
+ enum EntryType { LINE = 1, FILE = 2 };
+
+ StringReader(istream &istrm,
+ const string &source,
+ EntryType entry_type,
+ TokenType token_type,
+ bool allow_negative_labels,
+ const SymbolTable *syms = 0,
+ Label unknown_label = kNoStateId)
+ : nline_(0), strm_(istrm), source_(source), entry_type_(entry_type),
+ token_type_(token_type), done_(false),
+ compiler_(token_type, syms, unknown_label, allow_negative_labels) {
+ Next(); // Initialize the reader to the first input.
+ }
+
+ bool Done() {
+ return done_;
+ }
+
+ void Next() {
+ VLOG(1) << "Processing source " << source_ << " at line " << nline_;
+ if (!strm_) { // We're done if we have no more input.
+ done_ = true;
+ return;
+ }
+ if (entry_type_ == LINE) {
+ getline(strm_, content_);
+ ++nline_;
+ } else {
+ content_.clear();
+ string line;
+ while (getline(strm_, line)) {
+ ++nline_;
+ content_.append(line);
+ content_.append("\n");
+ }
+ }
+ if (!strm_ && content_.empty()) // We're also done if we read off all the
+ done_ = true; // whitespace at the end of a file.
+ }
+
+ VectorFst<A> *GetVectorFst() {
+ VectorFst<A> *fst = new VectorFst<A>;
+ if (compiler_(content_, fst)) {
+ return fst;
+ } else {
+ delete fst;
+ return NULL;
+ }
+ }
+
+ CompactFst<A, StringCompactor<A> > *GetCompactFst() {
+ CompactFst<A, StringCompactor<A> > *fst =
+ new CompactFst<A, StringCompactor<A> >;
+ if (compiler_(content_, fst)) {
+ return fst;
+ } else {
+ delete fst;
+ return NULL;
+ }
+ }
+
+ private:
+ size_t nline_;
+ istream &strm_;
+ string source_;
+ EntryType entry_type_;
+ TokenType token_type_;
+ bool done_;
+ StringCompiler<A> compiler_;
+ string content_; // The actual content of the input stream's next FST.
+
+ DISALLOW_COPY_AND_ASSIGN(StringReader);
+};
+
+// Compute the minimal length required to encode each line number as a decimal
+// number.
+int KeySize(const char *filename);
+
+template <class Arc>
+void FarCompileStrings(const vector<string> &in_fnames,
+ const string &out_fname,
+ const string &fst_type,
+ const FarType &far_type,
+ int32 generate_keys,
+ FarEntryType fet,
+ FarTokenType tt,
+ const string &symbols_fname,
+ const string &unknown_symbol,
+ bool allow_negative_labels,
+ bool file_list_input,
+ const string &key_prefix,
+ const string &key_suffix) {
+ typename StringReader<Arc>::EntryType entry_type;
+ if (fet == FET_LINE) {
+ entry_type = StringReader<Arc>::LINE;
+ } else if (fet == FET_FILE) {
+ entry_type = StringReader<Arc>::FILE;
+ } else {
+ FSTERROR() << "FarCompileStrings: unknown entry type";
+ return;
+ }
+
+ typename StringCompiler<Arc>::TokenType token_type;
+ if (tt == FTT_SYMBOL) {
+ token_type = StringCompiler<Arc>::SYMBOL;
+ } else if (tt == FTT_BYTE) {
+ token_type = StringCompiler<Arc>::BYTE;
+ } else if (tt == FTT_UTF8) {
+ token_type = StringCompiler<Arc>::UTF8;
+ } else {
+ FSTERROR() << "FarCompileStrings: unknown token type";
+ return;
+ }
+
+ bool compact;
+ if (fst_type.empty() || (fst_type == "vector")) {
+ compact = false;
+ } else if (fst_type == "compact") {
+ compact = true;
+ } else {
+ FSTERROR() << "FarCompileStrings: unknown fst type: "
+ << fst_type;
+ return;
+ }
+
+ const SymbolTable *syms = 0;
+ typename Arc::Label unknown_label = kNoLabel;
+ if (!symbols_fname.empty()) {
+ syms = SymbolTable::ReadText(symbols_fname,
+ allow_negative_labels);
+ if (!syms) {
+ FSTERROR() << "FarCompileStrings: error reading symbol table: "
+ << symbols_fname;
+ return;
+ }
+ if (!unknown_symbol.empty()) {
+ unknown_label = syms->Find(unknown_symbol);
+ if (unknown_label == kNoLabel) {
+ FSTERROR() << "FarCompileStrings: unknown label \"" << unknown_label
+ << "\" missing from symbol table: " << symbols_fname;
+ return;
+ }
+ }
+ }
+
+ FarWriter<Arc> *far_writer =
+ FarWriter<Arc>::Create(out_fname, far_type);
+ if (!far_writer) return;
+
+ vector<string> inputs;
+ if (file_list_input) {
+ for (int i = 1; i < in_fnames.size(); ++i) {
+ ifstream istrm(in_fnames[i].c_str());
+ string str;
+ while (getline(istrm, str))
+ inputs.push_back(str);
+ }
+ } else {
+ inputs = in_fnames;
+ }
+
+ for (int i = 0, n = 0; i < inputs.size(); ++i) {
+ int key_size = generate_keys ? generate_keys :
+ (entry_type == StringReader<Arc>::FILE ? 1 :
+ KeySize(inputs[i].c_str()));
+ ifstream istrm(inputs[i].c_str());
+
+ for (StringReader<Arc> reader(
+ istrm, inputs[i], entry_type, token_type,
+ allow_negative_labels, syms, unknown_label);
+ !reader.Done();
+ reader.Next()) {
+ ++n;
+ const Fst<Arc> *fst;
+ if (compact)
+ fst = reader.GetCompactFst();
+ else
+ fst = reader.GetVectorFst();
+ if (!fst) {
+ FSTERROR() << "FarCompileStrings: compiling string number " << n
+ << " in file " << inputs[i] << " failed with token_type = "
+ << (tt == FTT_BYTE ? "byte" :
+ (tt == FTT_UTF8 ? "utf8" :
+ (tt == FTT_SYMBOL ? "symbol" : "unknown")))
+ << " and entry_type = "
+ << (fet == FET_LINE ? "line" :
+ (fet == FET_FILE ? "file" : "unknown"));
+ delete far_writer;
+ delete syms;
+ return;
+ }
+ ostringstream keybuf;
+ keybuf.width(key_size);
+ keybuf.fill('0');
+ keybuf << n;
+ string key;
+ if (generate_keys > 0) {
+ key = keybuf.str();
+ } else {
+ char* filename = new char[inputs[i].size() + 1];
+ strcpy(filename, inputs[i].c_str());
+ key = basename(filename);
+ if (entry_type != StringReader<Arc>::FILE) {
+ key += "-";
+ key += keybuf.str();
+ }
+ delete[] filename;
+ }
+ far_writer->Add(key_prefix + key + key_suffix, *fst);
+ delete fst;
+ }
+ if (generate_keys == 0)
+ n = 0;
+ }
+
+ delete far_writer;
+}
+
+} // namespace fst
+
+
+#endif // FST_EXTENSIONS_FAR_COMPILE_STRINGS_H_
diff --git a/src/include/fst/extensions/far/create.h b/src/include/fst/extensions/far/create.h
new file mode 100644
index 0000000..edb31e7
--- /dev/null
+++ b/src/include/fst/extensions/far/create.h
@@ -0,0 +1,87 @@
+// create-main.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use new dispatch
+//
+// \file
+// Creates a finite-state archive from component FSTs. Includes
+// helper function for farcreate.cc that templates the main on the arc
+// type to support multiple and extensible arc types.
+//
+
+#ifndef FST_EXTENSIONS_FAR_CREATE_H__
+#define FST_EXTENSIONS_FAR_CREATE_H__
+
+#include <libgen.h>
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/extensions/far/far.h>
+
+namespace fst {
+
+template <class Arc>
+void FarCreate(const vector<string> &in_fnames,
+ const string &out_fname,
+ const int32 generate_keys,
+ const bool file_list_input,
+ const FarType &far_type,
+ const string &key_prefix,
+ const string &key_suffix) {
+ FarWriter<Arc> *far_writer =
+ FarWriter<Arc>::Create(out_fname, far_type);
+ if (!far_writer) return;
+
+ vector<string> inputs;
+ if (file_list_input) {
+ for (int i = 1; i < in_fnames.size(); ++i) {
+ ifstream istrm(in_fnames[i].c_str());
+ string str;
+ while (getline(istrm, str))
+ inputs.push_back(str);
+ }
+ } else {
+ inputs = in_fnames;
+ }
+
+ for (int i = 0; i < inputs.size(); ++i) {
+ Fst<Arc> *ifst = Fst<Arc>::Read(inputs[i]);
+ if (!ifst) return;
+ string key;
+ if (generate_keys > 0) {
+ ostringstream keybuf;
+ keybuf.width(generate_keys);
+ keybuf.fill('0');
+ keybuf << i + 1;
+ key = keybuf.str();
+ } else {
+ char* filename = new char[inputs[i].size() + 1];
+ strcpy(filename, inputs[i].c_str());
+ key = basename(filename);
+ delete[] filename;
+ }
+
+ far_writer->Add(key_prefix + key + key_suffix, *ifst);
+ delete ifst;
+ }
+
+ delete far_writer;
+}
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_FAR_CREATE_H__
diff --git a/src/include/fst/extensions/far/extract.h b/src/include/fst/extensions/far/extract.h
new file mode 100644
index 0000000..022ca60
--- /dev/null
+++ b/src/include/fst/extensions/far/extract.h
@@ -0,0 +1,85 @@
+// extract-main.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use the new arc-dispatch
+
+// \file
+// Extracts component FSTs from an finite-state archive.
+//
+
+#ifndef FST_EXTENSIONS_FAR_EXTRACT_H__
+#define FST_EXTENSIONS_FAR_EXTRACT_H__
+
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/extensions/far/far.h>
+
+namespace fst {
+
+template<class Arc>
+void FarExtract(const vector<string> &ifilenames,
+ const int32 &generate_filenames,
+ const string &begin_key,
+ const string &end_key,
+ const string &filename_prefix,
+ const string &filename_suffix) {
+ FarReader<Arc> *far_reader = FarReader<Arc>::Open(ifilenames);
+ if (!far_reader) return;
+
+ if (!begin_key.empty())
+ far_reader->Find(begin_key);
+
+ string okey;
+ int nrep = 0;
+ for (int i = 1; !far_reader->Done(); far_reader->Next(), ++i) {
+ string key = far_reader->GetKey();
+ if (!end_key.empty() && end_key < key)
+ break;
+ const Fst<Arc> &fst = far_reader->GetFst();
+
+ if (key == okey)
+ ++nrep;
+ else
+ nrep = 0;
+
+ okey = key;
+
+ string ofilename;
+ if (generate_filenames) {
+ ostringstream tmp;
+ tmp.width(generate_filenames);
+ tmp.fill('0');
+ tmp << i;
+ ofilename = tmp.str();
+ } else {
+ if (nrep > 0) {
+ ostringstream tmp;
+ tmp << '.' << nrep;
+ key += tmp.str();
+ }
+ ofilename = key;
+ }
+ fst.Write(filename_prefix + ofilename + filename_suffix);
+ }
+
+ return;
+}
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_FAR_EXTRACT_H__
diff --git a/src/include/fst/extensions/far/far.h b/src/include/fst/extensions/far/far.h
new file mode 100644
index 0000000..82b9e5c
--- /dev/null
+++ b/src/include/fst/extensions/far/far.h
@@ -0,0 +1,360 @@
+// far.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Finite-State Transducer (FST) archive classes.
+//
+
+#ifndef FST_EXTENSIONS_FAR_FAR_H__
+#define FST_EXTENSIONS_FAR_FAR_H__
+
+#include <fst/extensions/far/stlist.h>
+#include <fst/extensions/far/sttable.h>
+#include <fst/fst.h>
+#include <fst/vector-fst.h>
+
+namespace fst {
+
+enum FarEntryType { FET_LINE, FET_FILE };
+enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 };
+
+// FST archive header class
+class FarHeader {
+ public:
+ const string &FarType() const { return fartype_; }
+ const string &ArcType() const { return arctype_; }
+
+ bool Read(const string &filename) {
+ FstHeader fsthdr;
+ if (filename.empty()) { // Header reading unsupported on stdin.
+ return false;
+ } else if (IsSTTable(filename)) { // Check if STTable
+ ReadSTTableHeader(filename, &fsthdr);
+ fartype_ = "sttable";
+ arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
+ return true;
+ } else if (IsSTList(filename)) { // Check if STList
+ ReadSTListHeader(filename, &fsthdr);
+ fartype_ = "sttable";
+ arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
+ return true;
+ }
+ return false;
+ }
+
+ private:
+ string fartype_;
+ string arctype_;
+};
+
+enum FarType { FAR_DEFAULT = 0, FAR_STTABLE = 1, FAR_STLIST = 2,
+ FAR_SSTABLE = 3 };
+
+// This class creates an archive of FSTs.
+template <class A>
+class FarWriter {
+ public:
+ typedef A Arc;
+
+ // Creates a new (empty) FST archive; returns NULL on error.
+ static FarWriter *Create(const string &filename, FarType type = FAR_DEFAULT);
+
+ // Adds an FST to the end of an archive. Keys must be non-empty and
+ // in lexicographic order. FSTs must have a suitable write method.
+ virtual void Add(const string &key, const Fst<A> &fst) = 0;
+
+ virtual FarType Type() const = 0;
+
+ virtual bool Error() const = 0;
+
+ virtual ~FarWriter() {}
+
+ protected:
+ FarWriter() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(FarWriter);
+};
+
+
+// This class iterates through an existing archive of FSTs.
+template <class A>
+class FarReader {
+ public:
+ typedef A Arc;
+
+ // Opens an existing FST archive in a single file; returns NULL on error.
+ // Sets current position to the beginning of the achive.
+ static FarReader *Open(const string &filename);
+
+ // Opens an existing FST archive in multiple files; returns NULL on error.
+ // Sets current position to the beginning of the achive.
+ static FarReader *Open(const vector<string> &filenames);
+
+ // Resets current posision to beginning of archive.
+ virtual void Reset() = 0;
+
+ // Sets current position to first entry >= key. Returns true if a match.
+ virtual bool Find(const string &key) = 0;
+
+ // Current position at end of archive?
+ virtual bool Done() const = 0;
+
+ // Move current position to next FST.
+ virtual void Next() = 0;
+
+ // Returns key at the current position. This reference is invalidated if
+ // the current position in the archive is changed.
+ virtual const string &GetKey() const = 0;
+
+ // Returns FST at the current position. This reference is invalidated if
+ // the current position in the archive is changed.
+ virtual const Fst<A> &GetFst() const = 0;
+
+ virtual FarType Type() const = 0;
+
+ virtual bool Error() const = 0;
+
+ virtual ~FarReader() {}
+
+ protected:
+ FarReader() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(FarReader);
+};
+
+
+template <class A>
+class FstWriter {
+ public:
+ void operator()(ostream &strm, const Fst<A> &fst) const {
+ fst.Write(strm, FstWriteOptions());
+ }
+};
+
+
+template <class A>
+class STTableFarWriter : public FarWriter<A> {
+ public:
+ typedef A Arc;
+
+ static STTableFarWriter *Create(const string filename) {
+ STTableWriter<Fst<A>, FstWriter<A> > *writer =
+ STTableWriter<Fst<A>, FstWriter<A> >::Create(filename);
+ return new STTableFarWriter(writer);
+ }
+
+ void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
+
+ FarType Type() const { return FAR_STTABLE; }
+
+ bool Error() const { return writer_->Error(); }
+
+ ~STTableFarWriter() { delete writer_; }
+
+ private:
+ explicit STTableFarWriter(STTableWriter<Fst<A>, FstWriter<A> > *writer)
+ : writer_(writer) {}
+
+ private:
+ STTableWriter<Fst<A>, FstWriter<A> > *writer_;
+
+ DISALLOW_COPY_AND_ASSIGN(STTableFarWriter);
+};
+
+
+template <class A>
+class STListFarWriter : public FarWriter<A> {
+ public:
+ typedef A Arc;
+
+ static STListFarWriter *Create(const string filename) {
+ STListWriter<Fst<A>, FstWriter<A> > *writer =
+ STListWriter<Fst<A>, FstWriter<A> >::Create(filename);
+ return new STListFarWriter(writer);
+ }
+
+ void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
+
+ FarType Type() const { return FAR_STLIST; }
+
+ bool Error() const { return writer_->Error(); }
+
+ ~STListFarWriter() { delete writer_; }
+
+ private:
+ explicit STListFarWriter(STListWriter<Fst<A>, FstWriter<A> > *writer)
+ : writer_(writer) {}
+
+ private:
+ STListWriter<Fst<A>, FstWriter<A> > *writer_;
+
+ DISALLOW_COPY_AND_ASSIGN(STListFarWriter);
+};
+
+
+template <class A>
+FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) {
+ switch(type) {
+ case FAR_DEFAULT:
+ if (filename.empty())
+ return STListFarWriter<A>::Create(filename);
+ case FAR_STTABLE:
+ return STTableFarWriter<A>::Create(filename);
+ break;
+ case FAR_STLIST:
+ return STListFarWriter<A>::Create(filename);
+ break;
+ default:
+ LOG(ERROR) << "FarWriter::Create: unknown far type";
+ return 0;
+ }
+}
+
+
+template <class A>
+class FstReader {
+ public:
+ Fst<A> *operator()(istream &strm) const {
+ return Fst<A>::Read(strm, FstReadOptions());
+ }
+};
+
+
+template <class A>
+class STTableFarReader : public FarReader<A> {
+ public:
+ typedef A Arc;
+
+ static STTableFarReader *Open(const string &filename) {
+ STTableReader<Fst<A>, FstReader<A> > *reader =
+ STTableReader<Fst<A>, FstReader<A> >::Open(filename);
+ // TODO: error check
+ return new STTableFarReader(reader);
+ }
+
+ static STTableFarReader *Open(const vector<string> &filenames) {
+ STTableReader<Fst<A>, FstReader<A> > *reader =
+ STTableReader<Fst<A>, FstReader<A> >::Open(filenames);
+ // TODO: error check
+ return new STTableFarReader(reader);
+ }
+
+ void Reset() { reader_->Reset(); }
+
+ bool Find(const string &key) { return reader_->Find(key); }
+
+ bool Done() const { return reader_->Done(); }
+
+ void Next() { return reader_->Next(); }
+
+ const string &GetKey() const { return reader_->GetKey(); }
+
+ const Fst<A> &GetFst() const { return reader_->GetEntry(); }
+
+ FarType Type() const { return FAR_STTABLE; }
+
+ bool Error() const { return reader_->Error(); }
+
+ ~STTableFarReader() { delete reader_; }
+
+ private:
+ explicit STTableFarReader(STTableReader<Fst<A>, FstReader<A> > *reader)
+ : reader_(reader) {}
+
+ private:
+ STTableReader<Fst<A>, FstReader<A> > *reader_;
+
+ DISALLOW_COPY_AND_ASSIGN(STTableFarReader);
+};
+
+
+template <class A>
+class STListFarReader : public FarReader<A> {
+ public:
+ typedef A Arc;
+
+ static STListFarReader *Open(const string &filename) {
+ STListReader<Fst<A>, FstReader<A> > *reader =
+ STListReader<Fst<A>, FstReader<A> >::Open(filename);
+ // TODO: error check
+ return new STListFarReader(reader);
+ }
+
+ static STListFarReader *Open(const vector<string> &filenames) {
+ STListReader<Fst<A>, FstReader<A> > *reader =
+ STListReader<Fst<A>, FstReader<A> >::Open(filenames);
+ // TODO: error check
+ return new STListFarReader(reader);
+ }
+
+ void Reset() { reader_->Reset(); }
+
+ bool Find(const string &key) { return reader_->Find(key); }
+
+ bool Done() const { return reader_->Done(); }
+
+ void Next() { return reader_->Next(); }
+
+ const string &GetKey() const { return reader_->GetKey(); }
+
+ const Fst<A> &GetFst() const { return reader_->GetEntry(); }
+
+ FarType Type() const { return FAR_STLIST; }
+
+ bool Error() const { return reader_->Error(); }
+
+ ~STListFarReader() { delete reader_; }
+
+ private:
+ explicit STListFarReader(STListReader<Fst<A>, FstReader<A> > *reader)
+ : reader_(reader) {}
+
+ private:
+ STListReader<Fst<A>, FstReader<A> > *reader_;
+
+ DISALLOW_COPY_AND_ASSIGN(STListFarReader);
+};
+
+
+template <class A>
+FarReader<A> *FarReader<A>::Open(const string &filename) {
+ if (filename.empty())
+ return STListFarReader<A>::Open(filename);
+ else if (IsSTTable(filename))
+ return STTableFarReader<A>::Open(filename);
+ else if (IsSTList(filename))
+ return STListFarReader<A>::Open(filename);
+ return 0;
+}
+
+
+template <class A>
+FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) {
+ if (!filenames.empty() && filenames[0].empty())
+ return STListFarReader<A>::Open(filenames);
+ else if (!filenames.empty() && IsSTTable(filenames[0]))
+ return STTableFarReader<A>::Open(filenames);
+ else if (!filenames.empty() && IsSTList(filenames[0]))
+ return STListFarReader<A>::Open(filenames);
+ return 0;
+}
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_FAR_FAR_H__
diff --git a/src/include/fst/extensions/far/farlib.h b/src/include/fst/extensions/far/farlib.h
new file mode 100644
index 0000000..91ba224
--- /dev/null
+++ b/src/include/fst/extensions/far/farlib.h
@@ -0,0 +1,31 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+// A finite-state archive (FAR) is used to store an indexable collection of
+// FSTs in a single file. Utilities are provided to create FARs from FSTs,
+// to iterate over FARs, and to extract specific FSTs from FARs.
+
+#ifndef FST_EXTENSIONS_FAR_FARLIB_H_
+#define FST_EXTENSIONS_FAR_FARLIB_H_
+
+#include <fst/extensions/far/far.h>
+#include <fst/extensions/far/compile-strings.h>
+#include <fst/extensions/far/create.h>
+#include <fst/extensions/far/extract.h>
+#include <fst/extensions/far/info.h>
+#include <fst/extensions/far/print-strings.h>
+
+#endif // FST_EXTENSIONS_FAR_FARLIB_H_
diff --git a/src/include/fst/extensions/far/farscript.h b/src/include/fst/extensions/far/farscript.h
new file mode 100644
index 0000000..9c3b1ca
--- /dev/null
+++ b/src/include/fst/extensions/far/farscript.h
@@ -0,0 +1,234 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+// Convenience file for including all of the FAR operations,
+// or registering them for new arc types.
+
+#ifndef FST_EXTENSIONS_FAR_FARSCRIPT_H_
+#define FST_EXTENSIONS_FAR_FARSCRIPT_H_
+
+#include <vector>
+using std::vector;
+#include <string>
+
+#include <fst/script/arg-packs.h>
+#include <fst/extensions/far/compile-strings.h>
+#include <fst/extensions/far/create.h>
+#include <fst/extensions/far/extract.h>
+#include <fst/extensions/far/info.h>
+#include <fst/extensions/far/print-strings.h>
+#include <fst/extensions/far/far.h>
+
+#include <fst/types.h>
+
+namespace fst {
+namespace script {
+
+// Note: it is safe to pass these strings as references because
+// this struct is only used to pass them deeper in the call graph.
+// Be sure you understand why this is so before using this struct
+// for anything else!
+struct FarCompileStringsArgs {
+ const vector<string> &in_fnames;
+ const string &out_fname;
+ const string &fst_type;
+ const FarType &far_type;
+ const int32 generate_keys;
+ const FarEntryType fet;
+ const FarTokenType tt;
+ const string &symbols_fname;
+ const string &unknown_symbol;
+ const bool allow_negative_labels;
+ const bool file_list_input;
+ const string &key_prefix;
+ const string &key_suffix;
+
+ FarCompileStringsArgs(const vector<string> &in_fnames,
+ const string &out_fname,
+ const string &fst_type,
+ const FarType &far_type,
+ int32 generate_keys,
+ FarEntryType fet,
+ FarTokenType tt,
+ const string &symbols_fname,
+ const string &unknown_symbol,
+ bool allow_negative_labels,
+ bool file_list_input,
+ const string &key_prefix,
+ const string &key_suffix) :
+ in_fnames(in_fnames), out_fname(out_fname), fst_type(fst_type),
+ far_type(far_type), generate_keys(generate_keys), fet(fet),
+ tt(tt), symbols_fname(symbols_fname), unknown_symbol(unknown_symbol),
+ allow_negative_labels(allow_negative_labels),
+ file_list_input(file_list_input), key_prefix(key_prefix),
+ key_suffix(key_suffix) { }
+};
+
+template <class Arc>
+void FarCompileStrings(FarCompileStringsArgs *args) {
+ fst::FarCompileStrings<Arc>(
+ args->in_fnames, args->out_fname, args->fst_type, args->far_type,
+ args->generate_keys, args->fet, args->tt, args->symbols_fname,
+ args->unknown_symbol, args->allow_negative_labels, args->file_list_input,
+ args->key_prefix, args->key_suffix);
+}
+
+void FarCompileStrings(
+ const vector<string> &in_fnames,
+ const string &out_fname,
+ const string &arc_type,
+ const string &fst_type,
+ const FarType &far_type,
+ int32 generate_keys,
+ FarEntryType fet,
+ FarTokenType tt,
+ const string &symbols_fname,
+ const string &unknown_symbol,
+ bool allow_negative_labels,
+ bool file_list_input,
+ const string &key_prefix,
+ const string &key_suffix);
+
+
+// Note: it is safe to pass these strings as references because
+// this struct is only used to pass them deeper in the call graph.
+// Be sure you understand why this is so before using this struct
+// for anything else!
+struct FarCreateArgs {
+ const vector<string> &in_fnames;
+ const string &out_fname;
+ const int32 generate_keys;
+ const bool file_list_input;
+ const FarType &far_type;
+ const string &key_prefix;
+ const string &key_suffix;
+
+ FarCreateArgs(
+ const vector<string> &in_fnames, const string &out_fname,
+ const int32 generate_keys, const bool file_list_input,
+ const FarType &far_type, const string &key_prefix,
+ const string &key_suffix)
+ : in_fnames(in_fnames), out_fname(out_fname),
+ generate_keys(generate_keys), file_list_input(file_list_input),
+ far_type(far_type), key_prefix(key_prefix), key_suffix(key_suffix) { }
+};
+
+template<class Arc>
+void FarCreate(FarCreateArgs *args) {
+ fst::FarCreate<Arc>(args->in_fnames, args->out_fname, args->generate_keys,
+ args->file_list_input, args->far_type,
+ args->key_prefix, args->key_suffix);
+}
+
+void FarCreate(const vector<string> &in_fnames,
+ const string &out_fname,
+ const string &arc_type,
+ const int32 generate_keys,
+ const bool file_list_input,
+ const FarType &far_type,
+ const string &key_prefix,
+ const string &key_suffix);
+
+
+typedef args::Package<const vector<string> &, int32,
+ const string&, const string&, const string&,
+ const string&> FarExtractArgs;
+
+template<class Arc>
+void FarExtract(FarExtractArgs *args) {
+ fst::FarExtract<Arc>(
+ args->arg1, args->arg2, args->arg3, args->arg4, args->arg5, args->arg6);
+}
+
+void FarExtract(const vector<string> &ifilenames,
+ const string &arc_type,
+ int32 generate_filenames, const string &begin_key,
+ const string &end_key, const string &filename_prefix,
+ const string &filename_suffix);
+
+typedef args::Package<const vector<string> &, const string &,
+ const string &, const bool> FarInfoArgs;
+
+template <class Arc>
+void FarInfo(FarInfoArgs *args) {
+ fst::FarInfo<Arc>(args->arg1, args->arg2, args->arg3, args->arg4);
+}
+
+void FarInfo(const vector<string> &filenames,
+ const string &arc_type,
+ const string &begin_key,
+ const string &end_key,
+ const bool list_fsts);
+
+struct FarPrintStringsArgs {
+ const vector<string> &ifilenames;
+ const FarEntryType entry_type;
+ const FarTokenType token_type;
+ const string &begin_key;
+ const string &end_key;
+ const bool print_key;
+ const string &symbols_fname;
+ const int32 generate_filenames;
+ const string &filename_prefix;
+ const string &filename_suffix;
+
+ FarPrintStringsArgs(
+ const vector<string> &ifilenames, const FarEntryType entry_type,
+ const FarTokenType token_type, const string &begin_key,
+ const string &end_key, const bool print_key,
+ const string &symbols_fname, const int32 generate_filenames,
+ const string &filename_prefix, const string &filename_suffix) :
+ ifilenames(ifilenames), entry_type(entry_type), token_type(token_type),
+ begin_key(begin_key), end_key(end_key), print_key(print_key),
+ symbols_fname(symbols_fname),
+ generate_filenames(generate_filenames), filename_prefix(filename_prefix),
+ filename_suffix(filename_suffix) { }
+};
+
+template <class Arc>
+void FarPrintStrings(FarPrintStringsArgs *args) {
+ fst::FarPrintStrings<Arc>(
+ args->ifilenames, args->entry_type, args->token_type,
+ args->begin_key, args->end_key, args->print_key,
+ args->symbols_fname, args->generate_filenames, args->filename_prefix,
+ args->filename_suffix);
+}
+
+
+void FarPrintStrings(const vector<string> &ifilenames,
+ const string &arc_type,
+ const FarEntryType entry_type,
+ const FarTokenType token_type,
+ const string &begin_key,
+ const string &end_key,
+ const bool print_key,
+ const string &symbols_fname,
+ const int32 generate_filenames,
+ const string &filename_prefix,
+ const string &filename_suffix);
+
+} // namespace script
+} // namespace fst
+
+
+#define REGISTER_FST_FAR_OPERATIONS(ArcType) \
+ REGISTER_FST_OPERATION(FarCompileStrings, ArcType, FarCompileStringsArgs); \
+ REGISTER_FST_OPERATION(FarCreate, ArcType, FarCreateArgs); \
+ REGISTER_FST_OPERATION(FarExtract, ArcType, FarExtractArgs); \
+ REGISTER_FST_OPERATION(FarInfo, ArcType, FarInfoArgs); \
+ REGISTER_FST_OPERATION(FarPrintStrings, ArcType, FarPrintStringsArgs)
+
+#endif // FST_EXTENSIONS_FAR_FARSCRIPT_H_
diff --git a/src/include/fst/extensions/far/info.h b/src/include/fst/extensions/far/info.h
new file mode 100644
index 0000000..f010546
--- /dev/null
+++ b/src/include/fst/extensions/far/info.h
@@ -0,0 +1,128 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+// Modified: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_EXTENSIONS_FAR_INFO_H_
+#define FST_EXTENSIONS_FAR_INFO_H_
+
+#include <iomanip>
+#include <set>
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/extensions/far/far.h>
+#include <fst/extensions/far/main.h> // For FarTypeToString
+
+namespace fst {
+
+template <class Arc>
+void CountStatesAndArcs(const Fst<Arc> &fst, size_t *nstate, size_t *narc) {
+ StateIterator<Fst<Arc> > siter(fst);
+ for (; !siter.Done(); siter.Next(), ++(*nstate)) {
+ ArcIterator<Fst<Arc> > aiter(fst, siter.Value());
+ for (; !aiter.Done(); aiter.Next(), ++(*narc));
+ }
+}
+
+struct KeyInfo {
+ string key;
+ string type;
+ size_t nstate;
+ size_t narc;
+
+ KeyInfo(string k, string t, int64 ns = 0, int64 na = 0)
+ : key(k), type(t), nstate(ns), narc(na) {}
+};
+
+template <class Arc>
+void FarInfo(const vector<string> &filenames, const string &begin_key,
+ const string &end_key, const bool list_fsts) {
+ FarReader<Arc> *far_reader = FarReader<Arc>::Open(filenames);
+ if (!far_reader) return;
+
+ if (!begin_key.empty())
+ far_reader->Find(begin_key);
+
+ vector<KeyInfo> *infos = list_fsts ? new vector<KeyInfo>() : 0;
+ size_t nfst = 0, nstate = 0, narc = 0;
+ set<string> fst_types;
+ for (; !far_reader->Done(); far_reader->Next()) {
+ string key = far_reader->GetKey();
+ if (!end_key.empty() && end_key < key)
+ break;
+ ++nfst;
+ const Fst<Arc> &fst = far_reader->GetFst();
+ fst_types.insert(fst.Type());
+ if (infos) {
+ KeyInfo info(key, fst.Type());
+ CountStatesAndArcs(fst, &info.nstate, &info.narc);
+ nstate += info.nstate;
+ nstate += info.narc;
+ infos->push_back(info);
+ } else {
+ CountStatesAndArcs(fst, &nstate, &narc);
+ }
+ }
+
+ if (!infos) {
+ cout << std::left << setw(50) << "far type"
+ << FarTypeToString(far_reader->Type()) << endl;
+ cout << std::left << setw(50) << "arc type" << Arc::Type() << endl;
+ cout << std::left << setw(50) << "fst type";
+ for (set<string>::const_iterator iter = fst_types.begin();
+ iter != fst_types.end();
+ ++iter) {
+ if (iter != fst_types.begin())
+ cout << ",";
+ cout << *iter;
+ }
+ cout << endl;
+ cout << std::left << setw(50) << "# of FSTs" << nfst << endl;
+ cout << std::left << setw(50) << "total # of states" << nstate << endl;
+ cout << std::left << setw(50) << "total # of arcs" << narc << endl;
+
+ } else {
+ int wkey = 10, wtype = 10, wnstate = 16, wnarc = 16;
+ for (size_t i = 0; i < infos->size(); ++i) {
+ const KeyInfo &info = (*infos)[i];
+ if (info.key.size() + 2 > wkey)
+ wkey = info.key.size() + 2;
+ if (info.type.size() + 2 > wtype)
+ wtype = info.type.size() + 2;
+ if (ceil(log10(info.nstate)) + 2 > wnstate)
+ wnstate = ceil(log10(info.nstate)) + 2;
+ if (ceil(log10(info.narc)) + 2 > wnarc)
+ wnarc = ceil(log10(info.narc)) + 2;
+ }
+
+ cout << std::left << setw(wkey) << "key" << setw(wtype) << "type"
+ << std::right << setw(wnstate) << "# of states"
+ << setw(wnarc) << "# of arcs" << endl;
+
+ for (size_t i = 0; i < infos->size(); ++i) {
+ const KeyInfo &info = (*infos)[i];
+ cout << std::left << setw(wkey) << info.key << setw(wtype) << info.type
+ << std::right << setw(wnstate) << info.nstate
+ << setw(wnarc) << info.narc << endl;
+ }
+ }
+}
+
+} // namespace fst
+
+
+#endif // FST_EXTENSIONS_FAR_INFO_H_
diff --git a/src/include/fst/extensions/far/main.h b/src/include/fst/extensions/far/main.h
new file mode 100644
index 0000000..00ccfef
--- /dev/null
+++ b/src/include/fst/extensions/far/main.h
@@ -0,0 +1,43 @@
+// main.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Classes and functions for registering and invoking Far main
+// functions that support multiple and extensible arc types.
+
+#ifndef FST_EXTENSIONS_FAR_MAIN_H__
+#define FST_EXTENSIONS_FAR_MAIN_H__
+
+#include <fst/extensions/far/far.h>
+
+namespace fst {
+
+FarEntryType StringToFarEntryType(const string &s);
+FarTokenType StringToFarTokenType(const string &s);
+
+// Return the 'FarType' value corresponding to a far type name.
+FarType FarTypeFromString(const string &str);
+
+// Return the textual name corresponding to a 'FarType;.
+string FarTypeToString(FarType type);
+
+string LoadArcTypeFromFar(const string& far_fname);
+string LoadArcTypeFromFst(const string& far_fname);
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_FAR_MAIN_H__
diff --git a/src/include/fst/extensions/far/print-strings.h b/src/include/fst/extensions/far/print-strings.h
new file mode 100644
index 0000000..aff1e51
--- /dev/null
+++ b/src/include/fst/extensions/far/print-strings.h
@@ -0,0 +1,126 @@
+// printstrings-main.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+// Modified by: jpr@google.com (Jake Ratkiewicz)
+//
+// \file
+// Output as strings the string FSTs in a finite-state archive.
+
+#ifndef FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
+#define FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
+
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/extensions/far/far.h>
+#include <fst/string.h>
+
+namespace fst {
+
+template <class Arc>
+void FarPrintStrings(
+ const vector<string> &ifilenames, const FarEntryType entry_type,
+ const FarTokenType far_token_type, const string &begin_key,
+ const string &end_key, const bool print_key, const string &symbols_fname,
+ const int32 generate_filenames, const string &filename_prefix,
+ const string &filename_suffix) {
+
+ typename StringPrinter<Arc>::TokenType token_type;
+ if (far_token_type == FTT_SYMBOL) {
+ token_type = StringPrinter<Arc>::SYMBOL;
+ } else if (far_token_type == FTT_BYTE) {
+ token_type = StringPrinter<Arc>::BYTE;
+ } else if (far_token_type == FTT_UTF8) {
+ token_type = StringPrinter<Arc>::UTF8;
+ } else {
+ FSTERROR() << "FarPrintStrings: unknown token type";
+ return;
+ }
+
+ const SymbolTable *syms = 0;
+ if (!symbols_fname.empty()) {
+ // allow negative flag?
+ syms = SymbolTable::ReadText(symbols_fname, true);
+ if (!syms) {
+ FSTERROR() << "FarPrintStrings: error reading symbol table: "
+ << symbols_fname;
+ return;
+ }
+ }
+
+ StringPrinter<Arc> string_printer(token_type, syms);
+
+ FarReader<Arc> *far_reader = FarReader<Arc>::Open(ifilenames);
+ if (!far_reader) return;
+
+ if (!begin_key.empty())
+ far_reader->Find(begin_key);
+
+ string okey;
+ int nrep = 0;
+ for (int i = 1; !far_reader->Done(); far_reader->Next(), ++i) {
+ string key = far_reader->GetKey();
+ if (!end_key.empty() && end_key < key)
+ break;
+ if (okey == key)
+ ++nrep;
+ else
+ nrep = 0;
+ okey = key;
+
+ const Fst<Arc> &fst = far_reader->GetFst();
+ string str;
+ VLOG(2) << "Handling key: " << key;
+ string_printer(fst, &str);
+
+ if (entry_type == FET_LINE) {
+ if (print_key)
+ cout << key << "\t";
+ cout << str << endl;
+ } else if (entry_type == FET_FILE) {
+ stringstream sstrm;
+ if (generate_filenames) {
+ sstrm.fill('0');
+ sstrm << std::right << setw(generate_filenames) << i;
+ } else {
+ sstrm << key;
+ if (nrep > 0)
+ sstrm << "." << nrep;
+ }
+
+ string filename;
+ filename = filename_prefix + sstrm.str() + filename_suffix;
+
+ ofstream ostrm(filename.c_str());
+ if (!ostrm) {
+ FSTERROR() << "FarPrintStrings: Can't open file:" << filename;
+ delete syms;
+ delete far_reader;
+ return;
+ }
+ ostrm << str;
+ if (token_type == StringPrinter<Arc>::SYMBOL)
+ ostrm << "\n";
+ }
+ }
+}
+
+
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
diff --git a/src/include/fst/extensions/far/stlist.h b/src/include/fst/extensions/far/stlist.h
new file mode 100644
index 0000000..4738181
--- /dev/null
+++ b/src/include/fst/extensions/far/stlist.h
@@ -0,0 +1,304 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// A generic (string,type) list file format.
+//
+// This is a stripped-down version of STTable that does
+// not support the Find() operation but that does support
+// reading/writting from standard in/out.
+
+#ifndef FST_EXTENSIONS_FAR_STLIST_H_
+#define FST_EXTENSIONS_FAR_STLIST_H_
+
+#include <iostream>
+#include <fstream>
+#include <fst/util.h>
+
+#include <algorithm>
+#include <functional>
+#include <queue>
+#include <string>
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+namespace fst {
+
+static const int32 kSTListMagicNumber = 5656924;
+static const int32 kSTListFileVersion = 1;
+
+// String-type list writing class for object of type 'T' using functor 'W'
+// to write an object of type 'T' from a stream. 'W' must conform to the
+// following interface:
+//
+// struct Writer {
+// void operator()(ostream &, const T &) const;
+// };
+//
+template <class T, class W>
+class STListWriter {
+ public:
+ typedef T EntryType;
+ typedef W EntryWriter;
+
+ explicit STListWriter(const string filename)
+ : stream_(
+ filename.empty() ? &std::cout :
+ new ofstream(filename.c_str(), ofstream::out | ofstream::binary)),
+ error_(false) {
+ WriteType(*stream_, kSTListMagicNumber);
+ WriteType(*stream_, kSTListFileVersion);
+ if (!stream_) {
+ FSTERROR() << "STListWriter::STListWriter: error writing to file: "
+ << filename;
+ error_ = true;
+ }
+ }
+
+ static STListWriter<T, W> *Create(const string &filename) {
+ return new STListWriter<T, W>(filename);
+ }
+
+ void Add(const string &key, const T &t) {
+ if (key == "") {
+ FSTERROR() << "STListWriter::Add: key empty: " << key;
+ error_ = true;
+ } else if (key < last_key_) {
+ FSTERROR() << "STListWriter::Add: key disorder: " << key;
+ error_ = true;
+ }
+ if (error_) return;
+ last_key_ = key;
+ WriteType(*stream_, key);
+ entry_writer_(*stream_, t);
+ }
+
+ bool Error() const { return error_; }
+
+ ~STListWriter() {
+ WriteType(*stream_, string());
+ if (stream_ != &std::cout)
+ delete stream_;
+ }
+
+ private:
+ EntryWriter entry_writer_; // Write functor for 'EntryType'
+ ostream *stream_; // Output stream
+ string last_key_; // Last key
+ bool error_;
+
+ DISALLOW_COPY_AND_ASSIGN(STListWriter);
+};
+
+
+// String-type list reading class for object of type 'T' using functor 'R'
+// to read an object of type 'T' form a stream. 'R' must conform to the
+// following interface:
+//
+// struct Reader {
+// T *operator()(istream &) const;
+// };
+//
+template <class T, class R>
+class STListReader {
+ public:
+ typedef T EntryType;
+ typedef R EntryReader;
+
+ explicit STListReader(const vector<string> &filenames)
+ : sources_(filenames), entry_(0), error_(false) {
+ streams_.resize(filenames.size(), 0);
+ bool has_stdin = false;
+ for (size_t i = 0; i < filenames.size(); ++i) {
+ if (filenames[i].empty()) {
+ if (!has_stdin) {
+ streams_[i] = &std::cin;
+ sources_[i] = "stdin";
+ has_stdin = true;
+ } else {
+ FSTERROR() << "STListReader::STListReader: stdin should only "
+ << "appear once in the input file list.";
+ error_ = true;
+ return;
+ }
+ } else {
+ streams_[i] = new ifstream(
+ filenames[i].c_str(), ifstream::in | ifstream::binary);
+ }
+ int32 magic_number = 0, file_version = 0;
+ ReadType(*streams_[i], &magic_number);
+ ReadType(*streams_[i], &file_version);
+ if (magic_number != kSTListMagicNumber) {
+ FSTERROR() << "STListReader::STTableReader: wrong file type: "
+ << filenames[i];
+ error_ = true;
+ return;
+ }
+ if (file_version != kSTListFileVersion) {
+ FSTERROR() << "STListReader::STTableReader: wrong file version: "
+ << filenames[i];
+ error_ = true;
+ return;
+ }
+ string key;
+ ReadType(*streams_[i], &key);
+ if (!key.empty())
+ heap_.push(make_pair(key, i));
+ if (!*streams_[i]) {
+ FSTERROR() << "STTableReader: error reading file: " << sources_[i];
+ error_ = true;
+ return;
+ }
+ }
+ if (heap_.empty()) return;
+ size_t current = heap_.top().second;
+ entry_ = entry_reader_(*streams_[current]);
+ if (!entry_ || !*streams_[current]) {
+ FSTERROR() << "STTableReader: error reading entry for key: "
+ << heap_.top().first << ", file: " << sources_[current];
+ error_ = true;
+ }
+ }
+
+ ~STListReader() {
+ for (size_t i = 0; i < streams_.size(); ++i) {
+ if (streams_[i] != &std::cin)
+ delete streams_[i];
+ }
+ if (entry_)
+ delete entry_;
+ }
+
+ static STListReader<T, R> *Open(const string &filename) {
+ vector<string> filenames;
+ filenames.push_back(filename);
+ return new STListReader<T, R>(filenames);
+ }
+
+ static STListReader<T, R> *Open(const vector<string> &filenames) {
+ return new STListReader<T, R>(filenames);
+ }
+
+ void Reset() {
+ FSTERROR()
+ << "STListReader::Reset: stlist does not support reset operation";
+ error_ = true;
+ }
+
+ bool Find(const string &key) {
+ FSTERROR()
+ << "STListReader::Find: stlist does not support find operation";
+ error_ = true;
+ return false;
+ }
+
+ bool Done() const {
+ return error_ || heap_.empty();
+ }
+
+ void Next() {
+ if (error_) return;
+ size_t current = heap_.top().second;
+ string key;
+ heap_.pop();
+ ReadType(*(streams_[current]), &key);
+ if (!*streams_[current]) {
+ FSTERROR() << "STTableReader: error reading file: "
+ << sources_[current];
+ error_ = true;
+ return;
+ }
+ if (!key.empty())
+ heap_.push(make_pair(key, current));
+
+ if(!heap_.empty()) {
+ current = heap_.top().second;
+ if (entry_)
+ delete entry_;
+ entry_ = entry_reader_(*streams_[current]);
+ if (!entry_ || !*streams_[current]) {
+ FSTERROR() << "STTableReader: error reading entry for key: "
+ << heap_.top().first << ", file: " << sources_[current];
+ error_ = true;
+ }
+ }
+ }
+
+ const string &GetKey() const {
+ return heap_.top().first;
+ }
+
+ const EntryType &GetEntry() const {
+ return *entry_;
+ }
+
+ bool Error() const { return error_; }
+
+ private:
+ EntryReader entry_reader_; // Read functor for 'EntryType'
+ vector<istream*> streams_; // Input streams
+ vector<string> sources_; // and corresponding file names
+ priority_queue<
+ pair<string, size_t>, vector<pair<string, size_t> >,
+ greater<pair<string, size_t> > > heap_; // (Key, stream id) heap
+ mutable EntryType *entry_; // Pointer to the currently read entry
+ bool error_;
+
+ DISALLOW_COPY_AND_ASSIGN(STListReader);
+};
+
+
+// String-type list header reading function template on the entry header
+// type 'H' having a member function:
+// Read(istream &strm, const string &filename);
+// Checks that 'filename' is an STTable and call the H::Read() on the last
+// entry in the STTable.
+// Does not support reading from stdin.
+template <class H>
+bool ReadSTListHeader(const string &filename, H *header) {
+ if (filename.empty()) {
+ LOG(ERROR) << "ReadSTListHeader: reading header not supported on stdin";
+ return false;
+ }
+ ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
+ int32 magic_number = 0, file_version = 0;
+ ReadType(strm, &magic_number);
+ ReadType(strm, &file_version);
+ if (magic_number != kSTListMagicNumber) {
+ LOG(ERROR) << "ReadSTTableHeader: wrong file type: " << filename;
+ return false;
+ }
+ if (file_version != kSTListFileVersion) {
+ LOG(ERROR) << "ReadSTTableHeader: wrong file version: " << filename;
+ return false;
+ }
+ string key;
+ ReadType(strm, &key);
+ header->Read(strm, filename + ":" + key);
+ if (!strm) {
+ LOG(ERROR) << "ReadSTTableHeader: error reading file: " << filename;
+ return false;
+ }
+ return true;
+}
+
+bool IsSTList(const string &filename);
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_FAR_STLIST_H_
diff --git a/src/include/fst/extensions/far/sttable.h b/src/include/fst/extensions/far/sttable.h
new file mode 100644
index 0000000..3a03133
--- /dev/null
+++ b/src/include/fst/extensions/far/sttable.h
@@ -0,0 +1,370 @@
+// sttable.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// A generic string-to-type table file format
+//
+// This is not meant as a generalization of SSTable. This is more of
+// a simple replacement for SSTable in order to provide an open-source
+// implementation of the FAR format for the external version of the
+// FST Library.
+
+#ifndef FST_EXTENSIONS_FAR_STTABLE_H_
+#define FST_EXTENSIONS_FAR_STTABLE_H_
+
+#include <algorithm>
+#include <iostream>
+#include <fstream>
+#include <fst/util.h>
+
+namespace fst {
+
+static const int32 kSTTableMagicNumber = 2125656924;
+static const int32 kSTTableFileVersion = 1;
+
+// String-to-type table writing class for object of type 'T' using functor 'W'
+// to write an object of type 'T' from a stream. 'W' must conform to the
+// following interface:
+//
+// struct Writer {
+// void operator()(ostream &, const T &) const;
+// };
+//
+template <class T, class W>
+class STTableWriter {
+ public:
+ typedef T EntryType;
+ typedef W EntryWriter;
+
+ explicit STTableWriter(const string &filename)
+ : stream_(filename.c_str(), ofstream::out | ofstream::binary),
+ error_(false) {
+ WriteType(stream_, kSTTableMagicNumber);
+ WriteType(stream_, kSTTableFileVersion);
+ if (!stream_) {
+ FSTERROR() << "STTableWriter::STTableWriter: error writing to file: "
+ << filename;
+ error_=true;
+ }
+ }
+
+ static STTableWriter<T, W> *Create(const string &filename) {
+ if (filename.empty()) {
+ LOG(ERROR) << "STTableWriter: writing to standard out unsupported.";
+ return 0;
+ }
+ return new STTableWriter<T, W>(filename);
+ }
+
+ void Add(const string &key, const T &t) {
+ if (key == "") {
+ FSTERROR() << "STTableWriter::Add: key empty: " << key;
+ error_ = true;
+ } else if (key < last_key_) {
+ FSTERROR() << "STTableWriter::Add: key disorder: " << key;
+ error_ = true;
+ }
+ if (error_) return;
+ last_key_ = key;
+ positions_.push_back(stream_.tellp());
+ WriteType(stream_, key);
+ entry_writer_(stream_, t);
+ }
+
+ bool Error() const { return error_; }
+
+ ~STTableWriter() {
+ WriteType(stream_, positions_);
+ WriteType(stream_, static_cast<int64>(positions_.size()));
+ }
+
+ private:
+ EntryWriter entry_writer_; // Write functor for 'EntryType'
+ ofstream stream_; // Output stream
+ vector<int64> positions_; // Position in file of each key-entry pair
+ string last_key_; // Last key
+ bool error_;
+
+ DISALLOW_COPY_AND_ASSIGN(STTableWriter);
+};
+
+
+// String-to-type table reading class for object of type 'T' using functor 'R'
+// to read an object of type 'T' form a stream. 'R' must conform to the
+// following interface:
+//
+// struct Reader {
+// T *operator()(istream &) const;
+// };
+//
+template <class T, class R>
+class STTableReader {
+ public:
+ typedef T EntryType;
+ typedef R EntryReader;
+
+ explicit STTableReader(const vector<string> &filenames)
+ : sources_(filenames), entry_(0), error_(false) {
+ compare_ = new Compare(&keys_);
+ keys_.resize(filenames.size());
+ streams_.resize(filenames.size(), 0);
+ positions_.resize(filenames.size());
+ for (size_t i = 0; i < filenames.size(); ++i) {
+ streams_[i] = new ifstream(
+ filenames[i].c_str(), ifstream::in | ifstream::binary);
+ int32 magic_number = 0, file_version = 0;
+ ReadType(*streams_[i], &magic_number);
+ ReadType(*streams_[i], &file_version);
+ if (magic_number != kSTTableMagicNumber) {
+ FSTERROR() << "STTableReader::STTableReader: wrong file type: "
+ << filenames[i];
+ error_ = true;
+ return;
+ }
+ if (file_version != kSTTableFileVersion) {
+ FSTERROR() << "STTableReader::STTableReader: wrong file version: "
+ << filenames[i];
+ error_ = true;
+ return;
+ }
+ int64 num_entries;
+ streams_[i]->seekg(-static_cast<int>(sizeof(int64)), ios_base::end);
+ ReadType(*streams_[i], &num_entries);
+ streams_[i]->seekg(-static_cast<int>(sizeof(int64)) *
+ (num_entries + 1), ios_base::end);
+ positions_[i].resize(num_entries);
+ for (size_t j = 0; (j < num_entries) && (*streams_[i]); ++j)
+ ReadType(*streams_[i], &(positions_[i][j]));
+ streams_[i]->seekg(positions_[i][0]);
+ if (!*streams_[i]) {
+ FSTERROR() << "STTableReader::STTableReader: error reading file: "
+ << filenames[i];
+ error_ = true;
+ return;
+ }
+
+ }
+ MakeHeap();
+ }
+
+ ~STTableReader() {
+ for (size_t i = 0; i < streams_.size(); ++i)
+ delete streams_[i];
+ delete compare_;
+ if (entry_)
+ delete entry_;
+ }
+
+ static STTableReader<T, R> *Open(const string &filename) {
+ if (filename.empty()) {
+ LOG(ERROR) << "STTableReader: reading from standard in not supported";
+ return 0;
+ }
+ vector<string> filenames;
+ filenames.push_back(filename);
+ return new STTableReader<T, R>(filenames);
+ }
+
+ static STTableReader<T, R> *Open(const vector<string> &filenames) {
+ return new STTableReader<T, R>(filenames);
+ }
+
+ void Reset() {
+ if (error_) return;
+ for (size_t i = 0; i < streams_.size(); ++i)
+ streams_[i]->seekg(positions_[i].front());
+ MakeHeap();
+ }
+
+ bool Find(const string &key) {
+ if (error_) return false;
+ for (size_t i = 0; i < streams_.size(); ++i)
+ LowerBound(i, key);
+ MakeHeap();
+ return keys_[current_] == key;
+ }
+
+ bool Done() const { return error_ || heap_.empty(); }
+
+ void Next() {
+ if (error_) return;
+ if (streams_[current_]->tellg() <= positions_[current_].back()) {
+ ReadType(*(streams_[current_]), &(keys_[current_]));
+ if (!*streams_[current_]) {
+ FSTERROR() << "STTableReader: error reading file: "
+ << sources_[current_];
+ error_ = true;
+ return;
+ }
+ push_heap(heap_.begin(), heap_.end(), *compare_);
+ } else {
+ heap_.pop_back();
+ }
+ if (!heap_.empty())
+ PopHeap();
+ }
+
+ const string &GetKey() const {
+ return keys_[current_];
+ }
+
+ const EntryType &GetEntry() const {
+ return *entry_;
+ }
+
+ bool Error() const { return error_; }
+
+ private:
+ // Comparison functor used to compare stream IDs in the heap
+ struct Compare {
+ Compare(const vector<string> *keys) : keys_(keys) {}
+
+ bool operator()(size_t i, size_t j) const {
+ return (*keys_)[i] > (*keys_)[j];
+ };
+
+ private:
+ const vector<string> *keys_;
+ };
+
+ // Position the stream with ID 'id' at the position corresponding
+ // to the lower bound for key 'find_key'
+ void LowerBound(size_t id, const string &find_key) {
+ ifstream *strm = streams_[id];
+ const vector<int64> &positions = positions_[id];
+ size_t low = 0, high = positions.size() - 1;
+
+ while (low < high) {
+ size_t mid = (low + high)/2;
+ strm->seekg(positions[mid]);
+ string key;
+ ReadType(*strm, &key);
+ if (key > find_key) {
+ high = mid;
+ } else if (key < find_key) {
+ low = mid + 1;
+ } else {
+ for (size_t i = mid; i > low; --i) {
+ strm->seekg(positions[i - 1]);
+ ReadType(*strm, &key);
+ if (key != find_key) {
+ strm->seekg(positions[i]);
+ return;
+ }
+ }
+ strm->seekg(positions[low]);
+ return;
+ }
+ }
+ strm->seekg(positions[low]);
+ }
+
+ // Add all streams to the heap
+ void MakeHeap() {
+ heap_.clear();
+ for (size_t i = 0; i < streams_.size(); ++i) {
+ ReadType(*streams_[i], &(keys_[i]));
+ if (!*streams_[i]) {
+ FSTERROR() << "STTableReader: error reading file: " << sources_[i];
+ error_ = true;
+ return;
+ }
+ heap_.push_back(i);
+ }
+ make_heap(heap_.begin(), heap_.end(), *compare_);
+ PopHeap();
+ }
+
+ // Position the stream with the lowest key at the top
+ // of the heap, set 'current_' to the ID of that stream
+ // and read the current entry from that stream
+ void PopHeap() {
+ pop_heap(heap_.begin(), heap_.end(), *compare_);
+ current_ = heap_.back();
+ if (entry_)
+ delete entry_;
+ entry_ = entry_reader_(*streams_[current_]);
+ if (!entry_)
+ error_ = true;
+ if (!*streams_[current_]) {
+ FSTERROR() << "STTableReader: error reading entry for key: "
+ << keys_[current_] << ", file: " << sources_[current_];
+ error_ = true;
+ }
+ }
+
+
+ EntryReader entry_reader_; // Read functor for 'EntryType'
+ vector<ifstream*> streams_; // Input streams
+ vector<string> sources_; // and corresponding file names
+ vector<vector<int64> > positions_; // Index of positions for each stream
+ vector<string> keys_; // Lowest unread key for each stream
+ vector<int64> heap_; // Heap containing ID of streams with unread keys
+ int64 current_; // Id of current stream to be read
+ Compare *compare_; // Functor comparing stream IDs for the heap
+ mutable EntryType *entry_; // Pointer to the currently read entry
+ bool error_;
+
+ DISALLOW_COPY_AND_ASSIGN(STTableReader);
+};
+
+
+// String-to-type table header reading function template on the entry header
+// type 'H' having a member function:
+// Read(istream &strm, const string &filename);
+// Checks that 'filename' is an STTable and call the H::Read() on the last
+// entry in the STTable.
+template <class H>
+bool ReadSTTableHeader(const string &filename, H *header) {
+ ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
+ int32 magic_number = 0, file_version = 0;
+ ReadType(strm, &magic_number);
+ ReadType(strm, &file_version);
+ if (magic_number != kSTTableMagicNumber) {
+ LOG(ERROR) << "ReadSTTableHeader: wrong file type: " << filename;
+ return false;
+ }
+ if (file_version != kSTTableFileVersion) {
+ LOG(ERROR) << "ReadSTTableHeader: wrong file version: " << filename;
+ return false;
+ }
+ int64 i = -1;
+ strm.seekg(-static_cast<int>(sizeof(int64)), ios_base::end);
+ ReadType(strm, &i); // Read number of entries
+ if (!strm) {
+ LOG(ERROR) << "ReadSTTableHeader: error reading file: " << filename;
+ return false;
+ }
+ if (i == 0) return true; // No entry header to read
+ strm.seekg(-2 * static_cast<int>(sizeof(int64)), ios_base::end);
+ ReadType(strm, &i); // Read position for last entry in file
+ strm.seekg(i);
+ string key;
+ ReadType(strm, &key);
+ header->Read(strm, filename + ":" + key);
+ if (!strm) {
+ LOG(ERROR) << "ReadSTTableHeader: error reading file: " << filename;
+ return false;
+ }
+ return true;
+}
+
+bool IsSTTable(const string &filename);
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_FAR_STTABLE_H_
diff --git a/src/include/fst/extensions/pdt/collection.h b/src/include/fst/extensions/pdt/collection.h
new file mode 100644
index 0000000..26be504
--- /dev/null
+++ b/src/include/fst/extensions/pdt/collection.h
@@ -0,0 +1,122 @@
+// collection.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to store a collection of sets with elements of type T.
+
+#ifndef FST_EXTENSIONS_PDT_COLLECTION_H__
+#define FST_EXTENSIONS_PDT_COLLECTION_H__
+
+#include <algorithm>
+#include <vector>
+using std::vector;
+
+#include <fst/bi-table.h>
+
+namespace fst {
+
+// Stores a collection of non-empty sets with elements of type T. A
+// default constructor, equality ==, a total order <, and an STL-style
+// hash class must be defined on the elements. Provides signed
+// integer ID (of type I) of each unique set. The IDs are allocated
+// starting from 0 in order.
+template <class I, class T>
+class Collection {
+ public:
+ struct Node { // Trie node
+ I node_id; // Root is kNoNodeId;
+ T element;
+
+ Node() : node_id(kNoNodeId), element(T()) {}
+ Node(I i, const T &t) : node_id(i), element(t) {}
+
+ bool operator==(const Node& n) const {
+ return n.node_id == node_id && n.element == element;
+ }
+ };
+
+ struct NodeHash {
+ size_t operator()(const Node &n) const {
+ return n.node_id + hash_(n.element) * kPrime;
+ }
+ };
+
+ typedef CompactHashBiTable<I, Node, NodeHash> NodeTable;
+
+ class SetIterator {
+ public:
+ SetIterator(I id, Node node, NodeTable *node_table)
+ :id_(id), node_(node), node_table_(node_table) {}
+
+ bool Done() const { return id_ == kNoNodeId; }
+
+ const T &Element() const { return node_.element; }
+
+ void Next() {
+ id_ = node_.node_id;
+ if (id_ != kNoNodeId)
+ node_ = node_table_->FindEntry(id_);
+ }
+
+ private:
+ I id_; // Iterator set node id
+ Node node_; // Iterator set node
+ NodeTable *node_table_;
+ };
+
+ Collection() {}
+
+ // Lookups integer ID from set. If it doesn't exist, then adds it.
+ // Set elements should be in strict order (and therefore unique).
+ I FindId(const vector<T> &set) {
+ I node_id = kNoNodeId;
+ for (ssize_t i = set.size() - 1; i >= 0; --i) {
+ Node node(node_id, set[i]);
+ node_id = node_table_.FindId(node);
+ }
+ return node_id;
+ }
+
+ // Finds set given integer ID. Returns true if ID corresponds
+ // to set. Use iterators below to traverse result.
+ SetIterator FindSet(I id) {
+ if (id < 0 && id >= node_table_.Size()) {
+ return SetIterator(kNoNodeId, Node(kNoNodeId, T()), &node_table_);
+ } else {
+ return SetIterator(id, node_table_.FindEntry(id), &node_table_);
+ }
+ }
+
+ private:
+ static const I kNoNodeId;
+ static const size_t kPrime;
+ static std::tr1::hash<T> hash_;
+
+ NodeTable node_table_;
+
+ DISALLOW_COPY_AND_ASSIGN(Collection);
+};
+
+template<class I, class T> const I Collection<I, T>::kNoNodeId = -1;
+
+template <class I, class T> const size_t Collection<I, T>::kPrime = 7853;
+
+template <class I, class T> std::tr1::hash<T> Collection<I, T>::hash_;
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_PDT_COLLECTION_H__
diff --git a/src/include/fst/extensions/pdt/compose.h b/src/include/fst/extensions/pdt/compose.h
new file mode 100644
index 0000000..364d76f
--- /dev/null
+++ b/src/include/fst/extensions/pdt/compose.h
@@ -0,0 +1,146 @@
+// compose.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Compose a PDT and an FST.
+
+#ifndef FST_EXTENSIONS_PDT_COMPOSE_H__
+#define FST_EXTENSIONS_PDT_COMPOSE_H__
+
+#include <fst/compose.h>
+
+namespace fst {
+
+// Class to setup composition options for PDT composition.
+// Default is for the PDT as the first composition argument.
+template <class Arc, bool left_pdt = true>
+class PdtComposeOptions : public
+ComposeFstOptions<Arc,
+ MultiEpsMatcher< Matcher<Fst<Arc> > >,
+ MultiEpsFilter<AltSequenceComposeFilter<
+ MultiEpsMatcher<
+ Matcher<Fst<Arc> > > > > > {
+ public:
+ typedef typename Arc::Label Label;
+ typedef MultiEpsMatcher< Matcher<Fst<Arc> > > PdtMatcher;
+ typedef MultiEpsFilter<AltSequenceComposeFilter<PdtMatcher> > PdtFilter;
+ typedef ComposeFstOptions<Arc, PdtMatcher, PdtFilter> COptions;
+ using COptions::matcher1;
+ using COptions::matcher2;
+ using COptions::filter;
+
+ PdtComposeOptions(const Fst<Arc> &ifst1,
+ const vector<pair<Label, Label> > &parens,
+ const Fst<Arc> &ifst2) {
+ matcher1 = new PdtMatcher(ifst1, MATCH_OUTPUT, kMultiEpsList);
+ matcher2 = new PdtMatcher(ifst2, MATCH_INPUT, kMultiEpsLoop);
+
+ // Treat parens as multi-epsilons when composing.
+ for (size_t i = 0; i < parens.size(); ++i) {
+ matcher1->AddMultiEpsLabel(parens[i].first);
+ matcher1->AddMultiEpsLabel(parens[i].second);
+ matcher2->AddMultiEpsLabel(parens[i].first);
+ matcher2->AddMultiEpsLabel(parens[i].second);
+ }
+
+ filter = new PdtFilter(ifst1, ifst2, matcher1, matcher2, true);
+ }
+};
+
+// Class to setup composition options for PDT with FST composition.
+// Specialization is for the FST as the first composition argument.
+template <class Arc>
+class PdtComposeOptions<Arc, false> : public
+ComposeFstOptions<Arc,
+ MultiEpsMatcher< Matcher<Fst<Arc> > >,
+ MultiEpsFilter<SequenceComposeFilter<
+ MultiEpsMatcher<
+ Matcher<Fst<Arc> > > > > > {
+ public:
+ typedef typename Arc::Label Label;
+ typedef MultiEpsMatcher< Matcher<Fst<Arc> > > PdtMatcher;
+ typedef MultiEpsFilter<SequenceComposeFilter<PdtMatcher> > PdtFilter;
+ typedef ComposeFstOptions<Arc, PdtMatcher, PdtFilter> COptions;
+ using COptions::matcher1;
+ using COptions::matcher2;
+ using COptions::filter;
+
+ PdtComposeOptions(const Fst<Arc> &ifst1,
+ const Fst<Arc> &ifst2,
+ const vector<pair<Label, Label> > &parens) {
+ matcher1 = new PdtMatcher(ifst1, MATCH_OUTPUT, kMultiEpsLoop);
+ matcher2 = new PdtMatcher(ifst2, MATCH_INPUT, kMultiEpsList);
+
+ // Treat parens as multi-epsilons when composing.
+ for (size_t i = 0; i < parens.size(); ++i) {
+ matcher1->AddMultiEpsLabel(parens[i].first);
+ matcher1->AddMultiEpsLabel(parens[i].second);
+ matcher2->AddMultiEpsLabel(parens[i].first);
+ matcher2->AddMultiEpsLabel(parens[i].second);
+ }
+
+ filter = new PdtFilter(ifst1, ifst2, matcher1, matcher2, true);
+ }
+};
+
+
+// Composes pushdown transducer (PDT) encoded as an FST (1st arg) and
+// an FST (2nd arg) with the result also a PDT encoded as an Fst. (3rd arg).
+// In the PDTs, some transitions are labeled with open or close
+// parentheses. To be interpreted as a PDT, the parens must balance on
+// a path (see PdtExpand()). The open-close parenthesis label pairs
+// are passed in 'parens'.
+template <class Arc>
+void Compose(const Fst<Arc> &ifst1,
+ const vector<pair<typename Arc::Label,
+ typename Arc::Label> > &parens,
+ const Fst<Arc> &ifst2,
+ MutableFst<Arc> *ofst,
+ const ComposeOptions &opts = ComposeOptions()) {
+
+ PdtComposeOptions<Arc, true> copts(ifst1, parens, ifst2);
+ copts.gc_limit = 0;
+ *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
+ if (opts.connect)
+ Connect(ofst);
+}
+
+
+// Composes an FST (1st arg) and pushdown transducer (PDT) encoded as
+// an FST (2nd arg) with the result also a PDT encoded as an Fst (3rd arg).
+// In the PDTs, some transitions are labeled with open or close
+// parentheses. To be interpreted as a PDT, the parens must balance on
+// a path (see ExpandFst()). The open-close parenthesis label pairs
+// are passed in 'parens'.
+template <class Arc>
+void Compose(const Fst<Arc> &ifst1,
+ const Fst<Arc> &ifst2,
+ const vector<pair<typename Arc::Label,
+ typename Arc::Label> > &parens,
+ MutableFst<Arc> *ofst,
+ const ComposeOptions &opts = ComposeOptions()) {
+
+ PdtComposeOptions<Arc, false> copts(ifst1, ifst2, parens);
+ copts.gc_limit = 0;
+ *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
+ if (opts.connect)
+ Connect(ofst);
+}
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_PDT_COMPOSE_H__
diff --git a/src/include/fst/extensions/pdt/expand.h b/src/include/fst/extensions/pdt/expand.h
new file mode 100644
index 0000000..f464403
--- /dev/null
+++ b/src/include/fst/extensions/pdt/expand.h
@@ -0,0 +1,975 @@
+// expand.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Expand a PDT to an FST.
+
+#ifndef FST_EXTENSIONS_PDT_EXPAND_H__
+#define FST_EXTENSIONS_PDT_EXPAND_H__
+
+#include <vector>
+using std::vector;
+
+#include <fst/extensions/pdt/pdt.h>
+#include <fst/extensions/pdt/paren.h>
+#include <fst/extensions/pdt/shortest-path.h>
+#include <fst/extensions/pdt/reverse.h>
+#include <fst/cache.h>
+#include <fst/mutable-fst.h>
+#include <fst/queue.h>
+#include <fst/state-table.h>
+#include <fst/test-properties.h>
+
+namespace fst {
+
+template <class Arc>
+struct ExpandFstOptions : public CacheOptions {
+ bool keep_parentheses;
+ PdtStack<typename Arc::StateId, typename Arc::Label> *stack;
+ PdtStateTable<typename Arc::StateId, typename Arc::StateId> *state_table;
+
+ ExpandFstOptions(
+ const CacheOptions &opts = CacheOptions(),
+ bool kp = false,
+ PdtStack<typename Arc::StateId, typename Arc::Label> *s = 0,
+ PdtStateTable<typename Arc::StateId, typename Arc::StateId> *st = 0)
+ : CacheOptions(opts), keep_parentheses(kp), stack(s), state_table(st) {}
+};
+
+// Properties for an expanded PDT.
+inline uint64 ExpandProperties(uint64 inprops) {
+ return inprops & (kAcceptor | kAcyclic | kInitialAcyclic | kUnweighted);
+}
+
+
+// Implementation class for ExpandFst
+template <class A>
+class ExpandFstImpl
+ : public CacheImpl<A> {
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::Properties;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+
+ using CacheBaseImpl< CacheState<A> >::PushArc;
+ using CacheBaseImpl< CacheState<A> >::HasArcs;
+ using CacheBaseImpl< CacheState<A> >::HasFinal;
+ using CacheBaseImpl< CacheState<A> >::HasStart;
+ using CacheBaseImpl< CacheState<A> >::SetArcs;
+ using CacheBaseImpl< CacheState<A> >::SetFinal;
+ using CacheBaseImpl< CacheState<A> >::SetStart;
+
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef StateId StackId;
+ typedef PdtStateTuple<StateId, StackId> StateTuple;
+
+ ExpandFstImpl(const Fst<A> &fst,
+ const vector<pair<typename Arc::Label,
+ typename Arc::Label> > &parens,
+ const ExpandFstOptions<A> &opts)
+ : CacheImpl<A>(opts), fst_(fst.Copy()),
+ stack_(opts.stack ? opts.stack: new PdtStack<StateId, Label>(parens)),
+ state_table_(opts.state_table ? opts.state_table :
+ new PdtStateTable<StateId, StackId>()),
+ own_stack_(opts.stack == 0), own_state_table_(opts.state_table == 0),
+ keep_parentheses_(opts.keep_parentheses) {
+ SetType("expand");
+
+ uint64 props = fst.Properties(kFstProperties, false);
+ SetProperties(ExpandProperties(props), kCopyProperties);
+
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ }
+
+ ExpandFstImpl(const ExpandFstImpl &impl)
+ : CacheImpl<A>(impl),
+ fst_(impl.fst_->Copy(true)),
+ stack_(new PdtStack<StateId, Label>(*impl.stack_)),
+ state_table_(new PdtStateTable<StateId, StackId>()),
+ own_stack_(true), own_state_table_(true),
+ keep_parentheses_(impl.keep_parentheses_) {
+ SetType("expand");
+ SetProperties(impl.Properties(), kCopyProperties);
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ ~ExpandFstImpl() {
+ delete fst_;
+ if (own_stack_)
+ delete stack_;
+ if (own_state_table_)
+ delete state_table_;
+ }
+
+ StateId Start() {
+ if (!HasStart()) {
+ StateId s = fst_->Start();
+ if (s == kNoStateId)
+ return kNoStateId;
+ StateTuple tuple(s, 0);
+ StateId start = state_table_->FindState(tuple);
+ SetStart(start);
+ }
+ return CacheImpl<A>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ const StateTuple &tuple = state_table_->Tuple(s);
+ Weight w = fst_->Final(tuple.state_id);
+ if (w != Weight::Zero() && tuple.stack_id == 0)
+ SetFinal(s, w);
+ else
+ SetFinal(s, Weight::Zero());
+ }
+ return CacheImpl<A>::Final(s);
+ }
+
+ size_t NumArcs(StateId s) {
+ if (!HasArcs(s)) {
+ ExpandState(s);
+ }
+ return CacheImpl<A>::NumArcs(s);
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ ExpandState(s);
+ return CacheImpl<A>::NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ ExpandState(s);
+ return CacheImpl<A>::NumOutputEpsilons(s);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
+ if (!HasArcs(s))
+ ExpandState(s);
+ CacheImpl<A>::InitArcIterator(s, data);
+ }
+
+ // Computes the outgoing transitions from a state, creating new destination
+ // states as needed.
+ void ExpandState(StateId s) {
+ StateTuple tuple = state_table_->Tuple(s);
+ for (ArcIterator< Fst<A> > aiter(*fst_, tuple.state_id);
+ !aiter.Done(); aiter.Next()) {
+ Arc arc = aiter.Value();
+ StackId stack_id = stack_->Find(tuple.stack_id, arc.ilabel);
+ if (stack_id == -1) {
+ // Non-matching close parenthesis
+ continue;
+ } else if ((stack_id != tuple.stack_id) && !keep_parentheses_) {
+ // Stack push/pop
+ arc.ilabel = arc.olabel = 0;
+ }
+
+ StateTuple ntuple(arc.nextstate, stack_id);
+ arc.nextstate = state_table_->FindState(ntuple);
+ PushArc(s, arc);
+ }
+ SetArcs(s);
+ }
+
+ const PdtStack<StackId, Label> &GetStack() const { return *stack_; }
+
+ const PdtStateTable<StateId, StackId> &GetStateTable() const {
+ return *state_table_;
+ }
+
+ private:
+ const Fst<A> *fst_;
+
+ PdtStack<StackId, Label> *stack_;
+ PdtStateTable<StateId, StackId> *state_table_;
+ bool own_stack_;
+ bool own_state_table_;
+ bool keep_parentheses_;
+
+ void operator=(const ExpandFstImpl<A> &); // disallow
+};
+
+// Expands a pushdown transducer (PDT) encoded as an FST into an FST.
+// This version is a delayed Fst. In the PDT, some transitions are
+// labeled with open or close parentheses. To be interpreted as a PDT,
+// the parens must balance on a path. The open-close parenthesis label
+// pairs are passed in 'parens'. The expansion enforces the
+// parenthesis constraints. The PDT must be expandable as an FST.
+//
+// This class attaches interface to implementation and handles
+// reference counting, delegating most methods to ImplToFst.
+template <class A>
+class ExpandFst : public ImplToFst< ExpandFstImpl<A> > {
+ public:
+ friend class ArcIterator< ExpandFst<A> >;
+ friend class StateIterator< ExpandFst<A> >;
+
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef StateId StackId;
+ typedef CacheState<A> State;
+ typedef ExpandFstImpl<A> Impl;
+
+ ExpandFst(const Fst<A> &fst,
+ const vector<pair<typename Arc::Label,
+ typename Arc::Label> > &parens)
+ : ImplToFst<Impl>(new Impl(fst, parens, ExpandFstOptions<A>())) {}
+
+ ExpandFst(const Fst<A> &fst,
+ const vector<pair<typename Arc::Label,
+ typename Arc::Label> > &parens,
+ const ExpandFstOptions<A> &opts)
+ : ImplToFst<Impl>(new Impl(fst, parens, opts)) {}
+
+ // See Fst<>::Copy() for doc.
+ ExpandFst(const ExpandFst<A> &fst, bool safe = false)
+ : ImplToFst<Impl>(fst, safe) {}
+
+ // Get a copy of this ExpandFst. See Fst<>::Copy() for further doc.
+ virtual ExpandFst<A> *Copy(bool safe = false) const {
+ return new ExpandFst<A>(*this, safe);
+ }
+
+ virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ const PdtStack<StackId, Label> &GetStack() const {
+ return GetImpl()->GetStack();
+ }
+
+ const PdtStateTable<StateId, StackId> &GetStateTable() const {
+ return GetImpl()->GetStateTable();
+ }
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const ExpandFst<A> &fst); // Disallow
+};
+
+
+// Specialization for ExpandFst.
+template<class A>
+class StateIterator< ExpandFst<A> >
+ : public CacheStateIterator< ExpandFst<A> > {
+ public:
+ explicit StateIterator(const ExpandFst<A> &fst)
+ : CacheStateIterator< ExpandFst<A> >(fst, fst.GetImpl()) {}
+};
+
+
+// Specialization for ExpandFst.
+template <class A>
+class ArcIterator< ExpandFst<A> >
+ : public CacheArcIterator< ExpandFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const ExpandFst<A> &fst, StateId s)
+ : CacheArcIterator< ExpandFst<A> >(fst.GetImpl(), s) {
+ if (!fst.GetImpl()->HasArcs(s))
+ fst.GetImpl()->ExpandState(s);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+
+template <class A> inline
+void ExpandFst<A>::InitStateIterator(StateIteratorData<A> *data) const
+{
+ data->base = new StateIterator< ExpandFst<A> >(*this);
+}
+
+//
+// PrunedExpand Class
+//
+
+// Prunes the delayed expansion of a pushdown transducer (PDT) encoded
+// as an FST into an FST. In the PDT, some transitions are labeled
+// with open or close parentheses. To be interpreted as a PDT, the
+// parens must balance on a path. The open-close parenthesis label
+// pairs are passed in 'parens'. The expansion enforces the
+// parenthesis constraints.
+//
+// The algorithm works by visiting the delayed ExpandFst using a
+// shortest-stack first queue discipline and relies on the
+// shortest-distance information computed using a reverse
+// shortest-path call to perform the pruning.
+//
+// The algorithm maintains the same state ordering between the ExpandFst
+// being visited 'efst_' and the result of pruning written into the
+// MutableFst 'ofst_' to improve readability of the code.
+//
+template <class A>
+class PrunedExpand {
+ public:
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+ typedef StateId StackId;
+ typedef PdtStack<StackId, Label> Stack;
+ typedef PdtStateTable<StateId, StackId> StateTable;
+ typedef typename PdtBalanceData<Arc>::SetIterator SetIterator;
+
+ // Constructor taking as input a PDT specified by 'ifst' and 'parens'.
+ // 'keep_parentheses' specifies whether parentheses are replaced by
+ // epsilons or not during the expansion. 'opts' is the cache options
+ // used to instantiate the underlying ExpandFst.
+ PrunedExpand(const Fst<A> &ifst,
+ const vector<pair<Label, Label> > &parens,
+ bool keep_parentheses = false,
+ const CacheOptions &opts = CacheOptions())
+ : ifst_(ifst.Copy()),
+ keep_parentheses_(keep_parentheses),
+ stack_(parens),
+ efst_(ifst, parens,
+ ExpandFstOptions<Arc>(opts, true, &stack_, &state_table_)),
+ queue_(state_table_, stack_, stack_length_, distance_, fdistance_) {
+ Reverse(*ifst_, parens, &rfst_);
+ VectorFst<Arc> path;
+ reverse_shortest_path_ = new SP(
+ rfst_, parens,
+ PdtShortestPathOptions<A, FifoQueue<StateId> >(true, false));
+ reverse_shortest_path_->ShortestPath(&path);
+ balance_data_ = reverse_shortest_path_->GetBalanceData()->Reverse(
+ rfst_.NumStates(), 10, -1);
+
+ InitCloseParenMultimap(parens);
+ }
+
+ ~PrunedExpand() {
+ delete ifst_;
+ delete reverse_shortest_path_;
+ delete balance_data_;
+ }
+
+ // Expands and prunes with weight threshold 'threshold' the input PDT.
+ // Writes the result in 'ofst'.
+ void Expand(MutableFst<A> *ofst, const Weight &threshold);
+
+ private:
+ static const uint8 kEnqueued;
+ static const uint8 kExpanded;
+ static const uint8 kSourceState;
+
+ // Comparison functor used by the queue:
+ // 1. states corresponding to shortest stack first,
+ // 2. among stacks of the same length, reverse lexicographic order is used,
+ // 3. among states with the same stack, shortest-first order is used.
+ class StackCompare {
+ public:
+ StackCompare(const StateTable &st,
+ const Stack &s, const vector<StackId> &sl,
+ const vector<Weight> &d, const vector<Weight> &fd)
+ : state_table_(st), stack_(s), stack_length_(sl),
+ distance_(d), fdistance_(fd) {}
+
+ bool operator()(StateId s1, StateId s2) const {
+ StackId si1 = state_table_.Tuple(s1).stack_id;
+ StackId si2 = state_table_.Tuple(s2).stack_id;
+ if (stack_length_[si1] < stack_length_[si2])
+ return true;
+ if (stack_length_[si1] > stack_length_[si2])
+ return false;
+ // If stack id equal, use A*
+ if (si1 == si2) {
+ Weight w1 = (s1 < distance_.size()) && (s1 < fdistance_.size()) ?
+ Times(distance_[s1], fdistance_[s1]) : Weight::Zero();
+ Weight w2 = (s2 < distance_.size()) && (s2 < fdistance_.size()) ?
+ Times(distance_[s2], fdistance_[s2]) : Weight::Zero();
+ return less_(w1, w2);
+ }
+ // If lenghts are equal, use reverse lexico.
+ for (; si1 != si2; si1 = stack_.Pop(si1), si2 = stack_.Pop(si2)) {
+ if (stack_.Top(si1) < stack_.Top(si2)) return true;
+ if (stack_.Top(si1) > stack_.Top(si2)) return false;
+ }
+ return false;
+ }
+
+ private:
+ const StateTable &state_table_;
+ const Stack &stack_;
+ const vector<StackId> &stack_length_;
+ const vector<Weight> &distance_;
+ const vector<Weight> &fdistance_;
+ NaturalLess<Weight> less_;
+ };
+
+ class ShortestStackFirstQueue
+ : public ShortestFirstQueue<StateId, StackCompare> {
+ public:
+ ShortestStackFirstQueue(
+ const PdtStateTable<StateId, StackId> &st,
+ const Stack &s,
+ const vector<StackId> &sl,
+ const vector<Weight> &d, const vector<Weight> &fd)
+ : ShortestFirstQueue<StateId, StackCompare>(
+ StackCompare(st, s, sl, d, fd)) {}
+ };
+
+
+ void InitCloseParenMultimap(const vector<pair<Label, Label> > &parens);
+ Weight DistanceToDest(StateId state, StateId source) const;
+ uint8 Flags(StateId s) const;
+ void SetFlags(StateId s, uint8 flags, uint8 mask);
+ Weight Distance(StateId s) const;
+ void SetDistance(StateId s, Weight w);
+ Weight FinalDistance(StateId s) const;
+ void SetFinalDistance(StateId s, Weight w);
+ StateId SourceState(StateId s) const;
+ void SetSourceState(StateId s, StateId p);
+ void AddStateAndEnqueue(StateId s);
+ void Relax(StateId s, const A &arc, Weight w);
+ bool PruneArc(StateId s, const A &arc);
+ void ProcStart();
+ void ProcFinal(StateId s);
+ bool ProcNonParen(StateId s, const A &arc, bool add_arc);
+ bool ProcOpenParen(StateId s, const A &arc, StackId si, StackId nsi);
+ bool ProcCloseParen(StateId s, const A &arc);
+ void ProcDestStates(StateId s, StackId si);
+
+ Fst<A> *ifst_; // Input PDT
+ VectorFst<Arc> rfst_; // Reversed PDT
+ bool keep_parentheses_; // Keep parentheses in ofst?
+ StateTable state_table_; // State table for efst_
+ Stack stack_; // Stack trie
+ ExpandFst<Arc> efst_; // Expanded PDT
+ vector<StackId> stack_length_; // Length of stack for given stack id
+ vector<Weight> distance_; // Distance from initial state in efst_/ofst
+ vector<Weight> fdistance_; // Distance to final states in efst_/ofst
+ ShortestStackFirstQueue queue_; // Queue used to visit efst_
+ vector<uint8> flags_; // Status flags for states in efst_/ofst
+ vector<StateId> sources_; // PDT source state for each expanded state
+
+ typedef PdtShortestPath<Arc, FifoQueue<StateId> > SP;
+ typedef typename SP::CloseParenMultimap ParenMultimap;
+ SP *reverse_shortest_path_; // Shortest path for rfst_
+ PdtBalanceData<Arc> *balance_data_; // Not owned by shortest_path_
+ ParenMultimap close_paren_multimap_; // Maps open paren arcs to
+ // balancing close paren arcs.
+
+ MutableFst<Arc> *ofst_; // Output fst
+ Weight limit_; // Weight limit
+
+ typedef unordered_map<StateId, Weight> DestMap;
+ DestMap dest_map_;
+ StackId current_stack_id_;
+ // 'current_stack_id_' is the stack id of the states currently at the top
+ // of queue, i.e., the states currently being popped and processed.
+ // 'dest_map_' maps a state 's' in 'ifst_' that is the source
+ // of a close parentheses matching the top of 'current_stack_id_; to
+ // the shortest-distance from '(s, current_stack_id_)' to the final
+ // states in 'efst_'.
+ ssize_t current_paren_id_; // Paren id at top of current stack
+ ssize_t cached_stack_id_;
+ StateId cached_source_;
+ slist<pair<StateId, Weight> > cached_dest_list_;
+ // 'cached_dest_list_' contains the set of pair of destination
+ // states and weight to final states for source state
+ // 'cached_source_' and paren id 'cached_paren_id': the set of
+ // source state of a close parenthesis with paren id
+ // 'cached_paren_id' balancing an incoming open parenthesis with
+ // paren id 'cached_paren_id' in state 'cached_source_'.
+
+ NaturalLess<Weight> less_;
+};
+
+template <class A> const uint8 PrunedExpand<A>::kEnqueued = 0x01;
+template <class A> const uint8 PrunedExpand<A>::kExpanded = 0x02;
+template <class A> const uint8 PrunedExpand<A>::kSourceState = 0x04;
+
+
+// Initializes close paren multimap, mapping pairs (s,paren_id) to
+// all the arcs out of s labeled with close parenthese for paren_id.
+template <class A>
+void PrunedExpand<A>::InitCloseParenMultimap(
+ const vector<pair<Label, Label> > &parens) {
+ unordered_map<Label, Label> paren_id_map;
+ for (Label i = 0; i < parens.size(); ++i) {
+ const pair<Label, Label> &p = parens[i];
+ paren_id_map[p.first] = i;
+ paren_id_map[p.second] = i;
+ }
+
+ for (StateIterator<Fst<Arc> > siter(*ifst_); !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ for (ArcIterator<Fst<Arc> > aiter(*ifst_, s);
+ !aiter.Done(); aiter.Next()) {
+ const Arc &arc = aiter.Value();
+ typename unordered_map<Label, Label>::const_iterator pit
+ = paren_id_map.find(arc.ilabel);
+ if (pit == paren_id_map.end()) continue;
+ if (arc.ilabel == parens[pit->second].second) { // Close paren
+ ParenState<Arc> paren_state(pit->second, s);
+ close_paren_multimap_.insert(make_pair(paren_state, arc));
+ }
+ }
+ }
+}
+
+
+// Returns the weight of the shortest balanced path from 'source' to 'dest'
+// in 'ifst_', 'dest' must be the source state of a close paren arc.
+template <class A>
+typename A::Weight PrunedExpand<A>::DistanceToDest(StateId source,
+ StateId dest) const {
+ typename SP::SearchState s(source + 1, dest + 1);
+ VLOG(2) << "D(" << source << ", " << dest << ") ="
+ << reverse_shortest_path_->GetShortestPathData().Distance(s);
+ return reverse_shortest_path_->GetShortestPathData().Distance(s);
+}
+
+// Returns the flags for state 's' in 'ofst_'.
+template <class A>
+uint8 PrunedExpand<A>::Flags(StateId s) const {
+ return s < flags_.size() ? flags_[s] : 0;
+}
+
+// Modifies the flags for state 's' in 'ofst_'.
+template <class A>
+void PrunedExpand<A>::SetFlags(StateId s, uint8 flags, uint8 mask) {
+ while (flags_.size() <= s) flags_.push_back(0);
+ flags_[s] &= ~mask;
+ flags_[s] |= flags & mask;
+}
+
+
+// Returns the shortest distance from the initial state to 's' in 'ofst_'.
+template <class A>
+typename A::Weight PrunedExpand<A>::Distance(StateId s) const {
+ return s < distance_.size() ? distance_[s] : Weight::Zero();
+}
+
+// Sets the shortest distance from the initial state to 's' in 'ofst_' to 'w'.
+template <class A>
+void PrunedExpand<A>::SetDistance(StateId s, Weight w) {
+ while (distance_.size() <= s ) distance_.push_back(Weight::Zero());
+ distance_[s] = w;
+}
+
+
+// Returns the shortest distance from 's' to the final states in 'ofst_'.
+template <class A>
+typename A::Weight PrunedExpand<A>::FinalDistance(StateId s) const {
+ return s < fdistance_.size() ? fdistance_[s] : Weight::Zero();
+}
+
+// Sets the shortest distance from 's' to the final states in 'ofst_' to 'w'.
+template <class A>
+void PrunedExpand<A>::SetFinalDistance(StateId s, Weight w) {
+ while (fdistance_.size() <= s) fdistance_.push_back(Weight::Zero());
+ fdistance_[s] = w;
+}
+
+// Returns the PDT "source" state of state 's' in 'ofst_'.
+template <class A>
+typename A::StateId PrunedExpand<A>::SourceState(StateId s) const {
+ return s < sources_.size() ? sources_[s] : kNoStateId;
+}
+
+// Sets the PDT "source" state of state 's' in 'ofst_' to state 'p' in 'ifst_'.
+template <class A>
+void PrunedExpand<A>::SetSourceState(StateId s, StateId p) {
+ while (sources_.size() <= s) sources_.push_back(kNoStateId);
+ sources_[s] = p;
+}
+
+// Adds state 's' of 'efst_' to 'ofst_' and inserts it in the queue,
+// modifying the flags for 's' accordingly.
+template <class A>
+void PrunedExpand<A>::AddStateAndEnqueue(StateId s) {
+ if (!(Flags(s) & (kEnqueued | kExpanded))) {
+ while (ofst_->NumStates() <= s) ofst_->AddState();
+ queue_.Enqueue(s);
+ SetFlags(s, kEnqueued, kEnqueued);
+ } else if (Flags(s) & kEnqueued) {
+ queue_.Update(s);
+ }
+ // TODO(allauzen): Check everything is fine when kExpanded?
+}
+
+// Relaxes arc 'arc' out of state 's' in 'ofst_':
+// * if the distance to 's' times the weight of 'arc' is smaller than
+// the currently stored distance for 'arc.nextstate',
+// updates 'Distance(arc.nextstate)' with new estimate;
+// * if 'fd' is less than the currently stored distance from 'arc.nextstate'
+// to the final state, updates with new estimate.
+template <class A>
+void PrunedExpand<A>::Relax(StateId s, const A &arc, Weight fd) {
+ Weight nd = Times(Distance(s), arc.weight);
+ if (less_(nd, Distance(arc.nextstate))) {
+ SetDistance(arc.nextstate, nd);
+ SetSourceState(arc.nextstate, SourceState(s));
+ }
+ if (less_(fd, FinalDistance(arc.nextstate)))
+ SetFinalDistance(arc.nextstate, fd);
+ VLOG(2) << "Relax: " << s << ", d[s] = " << Distance(s) << ", to "
+ << arc.nextstate << ", d[ns] = " << Distance(arc.nextstate)
+ << ", nd = " << nd;
+}
+
+// Returns 'true' if the arc 'arc' out of state 's' in 'efst_' needs to
+// be pruned.
+template <class A>
+bool PrunedExpand<A>::PruneArc(StateId s, const A &arc) {
+ VLOG(2) << "Prune ?";
+ Weight fd = Weight::Zero();
+
+ if ((cached_source_ != SourceState(s)) ||
+ (cached_stack_id_ != current_stack_id_)) {
+ cached_source_ = SourceState(s);
+ cached_stack_id_ = current_stack_id_;
+ cached_dest_list_.clear();
+ if (cached_source_ != ifst_->Start()) {
+ for (SetIterator set_iter =
+ balance_data_->Find(current_paren_id_, cached_source_);
+ !set_iter.Done(); set_iter.Next()) {
+ StateId dest = set_iter.Element();
+ typename DestMap::const_iterator iter = dest_map_.find(dest);
+ cached_dest_list_.push_front(*iter);
+ }
+ } else {
+ // TODO(allauzen): queue discipline should prevent this never
+ // from happening; replace by a check.
+ cached_dest_list_.push_front(
+ make_pair(rfst_.Start() -1, Weight::One()));
+ }
+ }
+
+ for (typename slist<pair<StateId, Weight> >::const_iterator iter =
+ cached_dest_list_.begin();
+ iter != cached_dest_list_.end();
+ ++iter) {
+ fd = Plus(fd,
+ Times(DistanceToDest(state_table_.Tuple(arc.nextstate).state_id,
+ iter->first),
+ iter->second));
+ }
+ Relax(s, arc, fd);
+ Weight w = Times(Distance(s), Times(arc.weight, fd));
+ return less_(limit_, w);
+}
+
+// Adds start state of 'efst_' to 'ofst_', enqueues it and initializes
+// the distance data structures.
+template <class A>
+void PrunedExpand<A>::ProcStart() {
+ StateId s = efst_.Start();
+ AddStateAndEnqueue(s);
+ ofst_->SetStart(s);
+ SetSourceState(s, ifst_->Start());
+
+ current_stack_id_ = 0;
+ current_paren_id_ = -1;
+ stack_length_.push_back(0);
+ dest_map_[rfst_.Start() - 1] = Weight::One(); // not needed
+
+ cached_source_ = ifst_->Start();
+ cached_stack_id_ = 0;
+ cached_dest_list_.push_front(
+ make_pair(rfst_.Start() -1, Weight::One()));
+
+ PdtStateTuple<StateId, StackId> tuple(rfst_.Start() - 1, 0);
+ SetFinalDistance(state_table_.FindState(tuple), Weight::One());
+ SetDistance(s, Weight::One());
+ SetFinalDistance(s, DistanceToDest(ifst_->Start(), rfst_.Start() - 1));
+ VLOG(2) << DistanceToDest(ifst_->Start(), rfst_.Start() - 1);
+}
+
+// Makes 's' final in 'ofst_' if shortest accepting path ending in 's'
+// is below threshold.
+template <class A>
+void PrunedExpand<A>::ProcFinal(StateId s) {
+ Weight final = efst_.Final(s);
+ if ((final == Weight::Zero()) || less_(limit_, Times(Distance(s), final)))
+ return;
+ ofst_->SetFinal(s, final);
+}
+
+// Returns true when arc (or meta-arc) 'arc' out of 's' in 'efst_' is
+// below the threshold. When 'add_arc' is true, 'arc' is added to 'ofst_'.
+template <class A>
+bool PrunedExpand<A>::ProcNonParen(StateId s, const A &arc, bool add_arc) {
+ VLOG(2) << "ProcNonParen: " << s << " to " << arc.nextstate
+ << ", " << arc.ilabel << ":" << arc.olabel << " / " << arc.weight
+ << ", add_arc = " << (add_arc ? "true" : "false");
+ if (PruneArc(s, arc)) return false;
+ if(add_arc) ofst_->AddArc(s, arc);
+ AddStateAndEnqueue(arc.nextstate);
+ return true;
+}
+
+// Processes an open paren arc 'arc' out of state 's' in 'ofst_'.
+// When 'arc' is labeled with an open paren,
+// 1. considers each (shortest) balanced path starting in 's' by
+// taking 'arc' and ending by a close paren balancing the open
+// paren of 'arc' as a meta-arc, processes and prunes each meta-arc
+// as a non-paren arc, inserting its destination to the queue;
+// 2. if at least one of these meta-arcs has not been pruned,
+// adds the destination of 'arc' to 'ofst_' as a new source state
+// for the stack id 'nsi' and inserts it in the queue.
+template <class A>
+bool PrunedExpand<A>::ProcOpenParen(StateId s, const A &arc, StackId si,
+ StackId nsi) {
+ // Update the stack lenght when needed: |nsi| = |si| + 1.
+ while (stack_length_.size() <= nsi) stack_length_.push_back(-1);
+ if (stack_length_[nsi] == -1)
+ stack_length_[nsi] = stack_length_[si] + 1;
+
+ StateId ns = arc.nextstate;
+ VLOG(2) << "Open paren: " << s << "(" << state_table_.Tuple(s).state_id
+ << ") to " << ns << "(" << state_table_.Tuple(ns).state_id << ")";
+ bool proc_arc = false;
+ Weight fd = Weight::Zero();
+ ssize_t paren_id = stack_.ParenId(arc.ilabel);
+ slist<StateId> sources;
+ for (SetIterator set_iter =
+ balance_data_->Find(paren_id, state_table_.Tuple(ns).state_id);
+ !set_iter.Done(); set_iter.Next()) {
+ sources.push_front(set_iter.Element());
+ }
+ for (typename slist<StateId>::const_iterator sources_iter = sources.begin();
+ sources_iter != sources.end();
+ ++ sources_iter) {
+ StateId source = *sources_iter;
+ VLOG(2) << "Close paren source: " << source;
+ ParenState<Arc> paren_state(paren_id, source);
+ for (typename ParenMultimap::const_iterator iter =
+ close_paren_multimap_.find(paren_state);
+ iter != close_paren_multimap_.end() && paren_state == iter->first;
+ ++iter) {
+ Arc meta_arc = iter->second;
+ PdtStateTuple<StateId, StackId> tuple(meta_arc.nextstate, si);
+ meta_arc.nextstate = state_table_.FindState(tuple);
+ VLOG(2) << state_table_.Tuple(ns).state_id << ", " << source;
+ VLOG(2) << "Meta arc weight = " << arc.weight << " Times "
+ << DistanceToDest(state_table_.Tuple(ns).state_id, source)
+ << " Times " << meta_arc.weight;
+ meta_arc.weight = Times(
+ arc.weight,
+ Times(DistanceToDest(state_table_.Tuple(ns).state_id, source),
+ meta_arc.weight));
+ proc_arc |= ProcNonParen(s, meta_arc, false);
+ fd = Plus(fd, Times(
+ Times(
+ DistanceToDest(state_table_.Tuple(ns).state_id, source),
+ iter->second.weight),
+ FinalDistance(meta_arc.nextstate)));
+ }
+ }
+ if (proc_arc) {
+ VLOG(2) << "Proc open paren " << s << " to " << arc.nextstate;
+ ofst_->AddArc(
+ s, keep_parentheses_ ? arc : Arc(0, 0, arc.weight, arc.nextstate));
+ AddStateAndEnqueue(arc.nextstate);
+ Weight nd = Times(Distance(s), arc.weight);
+ if(less_(nd, Distance(arc.nextstate)))
+ SetDistance(arc.nextstate, nd);
+ // FinalDistance not necessary for source state since pruning
+ // decided using the meta-arcs above. But this is a problem with
+ // A*, hence:
+ if (less_(fd, FinalDistance(arc.nextstate)))
+ SetFinalDistance(arc.nextstate, fd);
+ SetFlags(arc.nextstate, kSourceState, kSourceState);
+ }
+ return proc_arc;
+}
+
+// Checks that shortest path through close paren arc in 'efst_' is
+// below threshold, if so adds it to 'ofst_'.
+template <class A>
+bool PrunedExpand<A>::ProcCloseParen(StateId s, const A &arc) {
+ Weight w = Times(Distance(s),
+ Times(arc.weight, FinalDistance(arc.nextstate)));
+ if (less_(limit_, w))
+ return false;
+ ofst_->AddArc(
+ s, keep_parentheses_ ? arc : Arc(0, 0, arc.weight, arc.nextstate));
+ return true;
+}
+
+// When 's' in 'ofst_' is a source state for stack id 'si', identifies
+// all the corresponding possible destination states, that is, all the
+// states in 'ifst_' that have an outgoing close paren arc balancing
+// the incoming open paren taken to get to 's', and for each such
+// state 't', computes the shortest distance from (t, si) to the final
+// states in 'ofst_'. Stores this information in 'dest_map_'.
+template <class A>
+void PrunedExpand<A>::ProcDestStates(StateId s, StackId si) {
+ if (!(Flags(s) & kSourceState)) return;
+ if (si != current_stack_id_) {
+ dest_map_.clear();
+ current_stack_id_ = si;
+ current_paren_id_ = stack_.Top(current_stack_id_);
+ VLOG(2) << "StackID " << si << " dequeued for first time";
+ }
+ // TODO(allauzen): clean up source state business; rename current function to
+ // ProcSourceState.
+ SetSourceState(s, state_table_.Tuple(s).state_id);
+
+ ssize_t paren_id = stack_.Top(si);
+ for (SetIterator set_iter =
+ balance_data_->Find(paren_id, state_table_.Tuple(s).state_id);
+ !set_iter.Done(); set_iter.Next()) {
+ StateId dest_state = set_iter.Element();
+ if (dest_map_.find(dest_state) != dest_map_.end())
+ continue;
+ Weight dest_weight = Weight::Zero();
+ ParenState<Arc> paren_state(paren_id, dest_state);
+ for (typename ParenMultimap::const_iterator iter =
+ close_paren_multimap_.find(paren_state);
+ iter != close_paren_multimap_.end() && paren_state == iter->first;
+ ++iter) {
+ const Arc &arc = iter->second;
+ PdtStateTuple<StateId, StackId> tuple(arc.nextstate, stack_.Pop(si));
+ dest_weight = Plus(dest_weight,
+ Times(arc.weight,
+ FinalDistance(state_table_.FindState(tuple))));
+ }
+ dest_map_[dest_state] = dest_weight;
+ VLOG(2) << "State " << dest_state << " is a dest state for stack id "
+ << si << " with weight " << dest_weight;
+ }
+}
+
+// Expands and prunes with weight threshold 'threshold' the input PDT.
+// Writes the result in 'ofst'.
+template <class A>
+void PrunedExpand<A>::Expand(
+ MutableFst<A> *ofst, const typename A::Weight &threshold) {
+ ofst_ = ofst;
+ ofst_->DeleteStates();
+ ofst_->SetInputSymbols(ifst_->InputSymbols());
+ ofst_->SetOutputSymbols(ifst_->OutputSymbols());
+
+ limit_ = Times(DistanceToDest(ifst_->Start(), rfst_.Start() - 1), threshold);
+ flags_.clear();
+
+ ProcStart();
+
+ while (!queue_.Empty()) {
+ StateId s = queue_.Head();
+ queue_.Dequeue();
+ SetFlags(s, kExpanded, kExpanded | kEnqueued);
+ VLOG(2) << s << " dequeued!";
+
+ ProcFinal(s);
+ StackId stack_id = state_table_.Tuple(s).stack_id;
+ ProcDestStates(s, stack_id);
+
+ for (ArcIterator<ExpandFst<Arc> > aiter(efst_, s);
+ !aiter.Done();
+ aiter.Next()) {
+ Arc arc = aiter.Value();
+ StackId nextstack_id = state_table_.Tuple(arc.nextstate).stack_id;
+ if (stack_id == nextstack_id)
+ ProcNonParen(s, arc, true);
+ else if (stack_id == stack_.Pop(nextstack_id))
+ ProcOpenParen(s, arc, stack_id, nextstack_id);
+ else
+ ProcCloseParen(s, arc);
+ }
+ VLOG(2) << "d[" << s << "] = " << Distance(s)
+ << ", fd[" << s << "] = " << FinalDistance(s);
+ }
+}
+
+//
+// Expand() Functions
+//
+
+template <class Arc>
+struct ExpandOptions {
+ bool connect;
+ bool keep_parentheses;
+ typename Arc::Weight weight_threshold;
+
+ ExpandOptions(bool c = true, bool k = false,
+ typename Arc::Weight w = Arc::Weight::Zero())
+ : connect(c), keep_parentheses(k), weight_threshold(w) {}
+};
+
+// Expands a pushdown transducer (PDT) encoded as an FST into an FST.
+// This version writes the expanded PDT result to a MutableFst.
+// In the PDT, some transitions are labeled with open or close
+// parentheses. To be interpreted as a PDT, the parens must balance on
+// a path. The open-close parenthesis label pairs are passed in
+// 'parens'. The expansion enforces the parenthesis constraints. The
+// PDT must be expandable as an FST.
+template <class Arc>
+void Expand(
+ const Fst<Arc> &ifst,
+ const vector<pair<typename Arc::Label, typename Arc::Label> > &parens,
+ MutableFst<Arc> *ofst,
+ const ExpandOptions<Arc> &opts) {
+ typedef typename Arc::Label Label;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef typename ExpandFst<Arc>::StackId StackId;
+
+ ExpandFstOptions<Arc> eopts;
+ eopts.gc_limit = 0;
+ if (opts.weight_threshold == Weight::Zero()) {
+ eopts.keep_parentheses = opts.keep_parentheses;
+ *ofst = ExpandFst<Arc>(ifst, parens, eopts);
+ } else {
+ PrunedExpand<Arc> pruned_expand(ifst, parens, opts.keep_parentheses);
+ pruned_expand.Expand(ofst, opts.weight_threshold);
+ }
+
+ if (opts.connect)
+ Connect(ofst);
+}
+
+// Expands a pushdown transducer (PDT) encoded as an FST into an FST.
+// This version writes the expanded PDT result to a MutableFst.
+// In the PDT, some transitions are labeled with open or close
+// parentheses. To be interpreted as a PDT, the parens must balance on
+// a path. The open-close parenthesis label pairs are passed in
+// 'parens'. The expansion enforces the parenthesis constraints. The
+// PDT must be expandable as an FST.
+template<class Arc>
+void Expand(
+ const Fst<Arc> &ifst,
+ const vector<pair<typename Arc::Label, typename Arc::Label> > &parens,
+ MutableFst<Arc> *ofst,
+ bool connect = true, bool keep_parentheses = false) {
+ Expand(ifst, parens, ofst, ExpandOptions<Arc>(connect, keep_parentheses));
+}
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_PDT_EXPAND_H__
diff --git a/src/include/fst/extensions/pdt/info.h b/src/include/fst/extensions/pdt/info.h
new file mode 100644
index 0000000..ef9a860
--- /dev/null
+++ b/src/include/fst/extensions/pdt/info.h
@@ -0,0 +1,175 @@
+// info.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Prints information about a PDT.
+
+#ifndef FST_EXTENSIONS_PDT_INFO_H__
+#define FST_EXTENSIONS_PDT_INFO_H__
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <tr1/unordered_set>
+using std::tr1::unordered_set;
+using std::tr1::unordered_multiset;
+#include <vector>
+using std::vector;
+
+#include <fst/fst.h>
+#include <fst/extensions/pdt/pdt.h>
+
+namespace fst {
+
+// Compute various information about PDTs, helper class for pdtinfo.cc.
+template <class A> class PdtInfo {
+public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+
+ PdtInfo(const Fst<A> &fst,
+ const vector<pair<typename A::Label,
+ typename A::Label> > &parens);
+
+ const string& FstType() const { return fst_type_; }
+ const string& ArcType() const { return A::Type(); }
+
+ int64 NumStates() const { return nstates_; }
+ int64 NumArcs() const { return narcs_; }
+ int64 NumOpenParens() const { return nopen_parens_; }
+ int64 NumCloseParens() const { return nclose_parens_; }
+ int64 NumUniqueOpenParens() const { return nuniq_open_parens_; }
+ int64 NumUniqueCloseParens() const { return nuniq_close_parens_; }
+ int64 NumOpenParenStates() const { return nopen_paren_states_; }
+ int64 NumCloseParenStates() const { return nclose_paren_states_; }
+
+ private:
+ string fst_type_;
+ int64 nstates_;
+ int64 narcs_;
+ int64 nopen_parens_;
+ int64 nclose_parens_;
+ int64 nuniq_open_parens_;
+ int64 nuniq_close_parens_;
+ int64 nopen_paren_states_;
+ int64 nclose_paren_states_;
+
+ DISALLOW_COPY_AND_ASSIGN(PdtInfo);
+};
+
+template <class A>
+PdtInfo<A>::PdtInfo(const Fst<A> &fst,
+ const vector<pair<typename A::Label,
+ typename A::Label> > &parens)
+ : fst_type_(fst.Type()),
+ nstates_(0),
+ narcs_(0),
+ nopen_parens_(0),
+ nclose_parens_(0),
+ nuniq_open_parens_(0),
+ nuniq_close_parens_(0),
+ nopen_paren_states_(0),
+ nclose_paren_states_(0) {
+ unordered_map<Label, size_t> paren_map;
+ unordered_set<Label> paren_set;
+ unordered_set<StateId> open_paren_state_set;
+ unordered_set<StateId> close_paren_state_set;
+
+ for (size_t i = 0; i < parens.size(); ++i) {
+ const pair<Label, Label> &p = parens[i];
+ paren_map[p.first] = i;
+ paren_map[p.second] = i;
+ }
+
+ for (StateIterator< Fst<A> > siter(fst);
+ !siter.Done();
+ siter.Next()) {
+ ++nstates_;
+ StateId s = siter.Value();
+ for (ArcIterator< Fst<A> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const A &arc = aiter.Value();
+ ++narcs_;
+ typename unordered_map<Label, size_t>::const_iterator pit
+ = paren_map.find(arc.ilabel);
+ if (pit != paren_map.end()) {
+ Label open_paren = parens[pit->second].first;
+ Label close_paren = parens[pit->second].second;
+ if (arc.ilabel == open_paren) {
+ ++nopen_parens_;
+ if (!paren_set.count(open_paren)) {
+ ++nuniq_open_parens_;
+ paren_set.insert(open_paren);
+ }
+ if (!open_paren_state_set.count(arc.nextstate)) {
+ ++nopen_paren_states_;
+ open_paren_state_set.insert(arc.nextstate);
+ }
+ } else {
+ ++nclose_parens_;
+ if (!paren_set.count(close_paren)) {
+ ++nuniq_close_parens_;
+ paren_set.insert(close_paren);
+ }
+ if (!close_paren_state_set.count(s)) {
+ ++nclose_paren_states_;
+ close_paren_state_set.insert(s);
+ }
+
+ }
+ }
+ }
+ }
+}
+
+
+template <class A>
+void PrintPdtInfo(const PdtInfo<A> &pdtinfo) {
+ ios_base::fmtflags old = cout.setf(ios::left);
+ cout.width(50);
+ cout << "fst type" << pdtinfo.FstType().c_str() << endl;
+ cout.width(50);
+ cout << "arc type" << pdtinfo.ArcType().c_str() << endl;
+ cout.width(50);
+ cout << "# of states" << pdtinfo.NumStates() << endl;
+ cout.width(50);
+ cout << "# of arcs" << pdtinfo.NumArcs() << endl;
+ cout.width(50);
+ cout << "# of open parentheses" << pdtinfo.NumOpenParens() << endl;
+ cout.width(50);
+ cout << "# of close parentheses" << pdtinfo.NumCloseParens() << endl;
+ cout.width(50);
+ cout << "# of unique open parentheses"
+ << pdtinfo.NumUniqueOpenParens() << endl;
+ cout.width(50);
+ cout << "# of unique close parentheses"
+ << pdtinfo.NumUniqueCloseParens() << endl;
+ cout.width(50);
+ cout << "# of open parenthesis dest. states"
+ << pdtinfo.NumOpenParenStates() << endl;
+ cout.width(50);
+ cout << "# of close parenthesis source states"
+ << pdtinfo.NumCloseParenStates() << endl;
+ cout.setf(old);
+}
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_PDT_INFO_H__
diff --git a/src/include/fst/extensions/pdt/paren.h b/src/include/fst/extensions/pdt/paren.h
new file mode 100644
index 0000000..7b9887f
--- /dev/null
+++ b/src/include/fst/extensions/pdt/paren.h
@@ -0,0 +1,496 @@
+// paren.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// Common classes for PDT parentheses
+
+// \file
+
+#ifndef FST_EXTENSIONS_PDT_PAREN_H_
+#define FST_EXTENSIONS_PDT_PAREN_H_
+
+#include <algorithm>
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <tr1/unordered_set>
+using std::tr1::unordered_set;
+using std::tr1::unordered_multiset;
+#include <set>
+
+#include <fst/extensions/pdt/pdt.h>
+#include <fst/extensions/pdt/collection.h>
+#include <fst/fst.h>
+#include <fst/dfs-visit.h>
+
+
+namespace fst {
+
+//
+// ParenState: Pair of an open (close) parenthesis and
+// its destination (source) state.
+//
+
+template <class A>
+class ParenState {
+ public:
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+
+ struct Hash {
+ size_t operator()(const ParenState<A> &p) const {
+ return p.paren_id + p.state_id * kPrime;
+ }
+ };
+
+ Label paren_id; // ID of open (close) paren
+ StateId state_id; // destination (source) state of open (close) paren
+
+ ParenState() : paren_id(kNoLabel), state_id(kNoStateId) {}
+
+ ParenState(Label p, StateId s) : paren_id(p), state_id(s) {}
+
+ bool operator==(const ParenState<A> &p) const {
+ if (&p == this)
+ return true;
+ return p.paren_id == this->paren_id && p.state_id == this->state_id;
+ }
+
+ bool operator!=(const ParenState<A> &p) const { return !(p == *this); }
+
+ bool operator<(const ParenState<A> &p) const {
+ return paren_id < this->paren.id ||
+ (p.paren_id == this->paren.id && p.state_id < this->state_id);
+ }
+
+ private:
+ static const size_t kPrime;
+};
+
+template <class A>
+const size_t ParenState<A>::kPrime = 7853;
+
+
+// Creates an FST-style iterator from STL map and iterator.
+template <class M>
+class MapIterator {
+ public:
+ typedef typename M::const_iterator StlIterator;
+ typedef typename M::value_type PairType;
+ typedef typename PairType::second_type ValueType;
+
+ MapIterator(const M &m, StlIterator iter)
+ : map_(m), begin_(iter), iter_(iter) {}
+
+ bool Done() const {
+ return iter_ == map_.end() || iter_->first != begin_->first;
+ }
+
+ ValueType Value() const { return iter_->second; }
+ void Next() { ++iter_; }
+ void Reset() { iter_ = begin_; }
+
+ private:
+ const M &map_;
+ StlIterator begin_;
+ StlIterator iter_;
+};
+
+//
+// PdtParenReachable: Provides various parenthesis reachability information
+// on a PDT.
+//
+
+template <class A>
+class PdtParenReachable {
+ public:
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ public:
+ // Maps from state ID to reachable paren IDs from (to) that state.
+ typedef unordered_multimap<StateId, Label> ParenMultiMap;
+
+ // Maps from paren ID and state ID to reachable state set ID
+ typedef unordered_map<ParenState<A>, ssize_t,
+ typename ParenState<A>::Hash> StateSetMap;
+
+ // Maps from paren ID and state ID to arcs exiting that state with that
+ // Label.
+ typedef unordered_multimap<ParenState<A>, A,
+ typename ParenState<A>::Hash> ParenArcMultiMap;
+
+ typedef MapIterator<ParenMultiMap> ParenIterator;
+
+ typedef MapIterator<ParenArcMultiMap> ParenArcIterator;
+
+ typedef typename Collection<ssize_t, StateId>::SetIterator SetIterator;
+
+ // Computes close (open) parenthesis reachabilty information for
+ // a PDT with bounded stack.
+ PdtParenReachable(const Fst<A> &fst,
+ const vector<pair<Label, Label> > &parens, bool close)
+ : fst_(fst),
+ parens_(parens),
+ close_(close) {
+ for (Label i = 0; i < parens.size(); ++i) {
+ const pair<Label, Label> &p = parens[i];
+ paren_id_map_[p.first] = i;
+ paren_id_map_[p.second] = i;
+ }
+
+ if (close_) {
+ StateId start = fst.Start();
+ if (start == kNoStateId)
+ return;
+ DFSearch(start, start);
+ } else {
+ FSTERROR() << "PdtParenReachable: open paren info not implemented";
+ }
+ }
+
+ // Given a state ID, returns an iterator over paren IDs
+ // for close (open) parens reachable from that state along balanced
+ // paths.
+ ParenIterator FindParens(StateId s) const {
+ return ParenIterator(paren_multimap_, paren_multimap_.find(s));
+ }
+
+ // Given a paren ID and a state ID s, returns an iterator over
+ // states that can be reached along balanced paths from (to) s that
+ // have have close (open) parentheses matching the paren ID exiting
+ // (entering) those states.
+ SetIterator FindStates(Label paren_id, StateId s) const {
+ ParenState<A> paren_state(paren_id, s);
+ typename StateSetMap::const_iterator id_it = set_map_.find(paren_state);
+ if (id_it == set_map_.end()) {
+ return state_sets_.FindSet(-1);
+ } else {
+ return state_sets_.FindSet(id_it->second);
+ }
+ }
+
+ // Given a paren Id and a state ID s, return an iterator over
+ // arcs that exit (enter) s and are labeled with a close (open)
+ // parenthesis matching the paren ID.
+ ParenArcIterator FindParenArcs(Label paren_id, StateId s) const {
+ ParenState<A> paren_state(paren_id, s);
+ return ParenArcIterator(paren_arc_multimap_,
+ paren_arc_multimap_.find(paren_state));
+ }
+
+ private:
+ // DFS that gathers paren and state set information.
+ // Bool returns false when cycle detected.
+ bool DFSearch(StateId s, StateId start);
+
+ // Unions state sets together gathered by the DFS.
+ void ComputeStateSet(StateId s);
+
+ // Gather state set(s) from state 'nexts'.
+ void UpdateStateSet(StateId nexts, set<Label> *paren_set,
+ vector< set<StateId> > *state_sets) const;
+
+ const Fst<A> &fst_;
+ const vector<pair<Label, Label> > &parens_; // Paren ID -> Labels
+ bool close_; // Close/open paren info?
+ unordered_map<Label, Label> paren_id_map_; // Paren labels -> ID
+ ParenMultiMap paren_multimap_; // Paren reachability
+ ParenArcMultiMap paren_arc_multimap_; // Paren Arcs
+ vector<char> state_color_; // DFS state
+ mutable Collection<ssize_t, StateId> state_sets_; // Reachable states -> ID
+ StateSetMap set_map_; // ID -> Reachable states
+ DISALLOW_COPY_AND_ASSIGN(PdtParenReachable);
+};
+
+// DFS that gathers paren and state set information.
+template <class A>
+bool PdtParenReachable<A>::DFSearch(StateId s, StateId start) {
+ if (s >= state_color_.size())
+ state_color_.resize(s + 1, kDfsWhite);
+
+ if (state_color_[s] == kDfsBlack)
+ return true;
+
+ if (state_color_[s] == kDfsGrey)
+ return false;
+
+ state_color_[s] = kDfsGrey;
+
+ for (ArcIterator<Fst<A> > aiter(fst_, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const A &arc = aiter.Value();
+
+ typename unordered_map<Label, Label>::const_iterator pit
+ = paren_id_map_.find(arc.ilabel);
+ if (pit != paren_id_map_.end()) { // paren?
+ Label paren_id = pit->second;
+ if (arc.ilabel == parens_[paren_id].first) { // open paren
+ DFSearch(arc.nextstate, arc.nextstate);
+ for (SetIterator set_iter = FindStates(paren_id, arc.nextstate);
+ !set_iter.Done(); set_iter.Next()) {
+ for (ParenArcIterator paren_arc_iter =
+ FindParenArcs(paren_id, set_iter.Element());
+ !paren_arc_iter.Done();
+ paren_arc_iter.Next()) {
+ const A &cparc = paren_arc_iter.Value();
+ DFSearch(cparc.nextstate, start);
+ }
+ }
+ }
+ } else { // non-paren
+ if(!DFSearch(arc.nextstate, start)) {
+ FSTERROR() << "PdtReachable: Underlying cyclicity not supported";
+ return true;
+ }
+ }
+ }
+ ComputeStateSet(s);
+ state_color_[s] = kDfsBlack;
+ return true;
+}
+
+// Unions state sets together gathered by the DFS.
+template <class A>
+void PdtParenReachable<A>::ComputeStateSet(StateId s) {
+ set<Label> paren_set;
+ vector< set<StateId> > state_sets(parens_.size());
+ for (ArcIterator< Fst<A> > aiter(fst_, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const A &arc = aiter.Value();
+
+ typename unordered_map<Label, Label>::const_iterator pit
+ = paren_id_map_.find(arc.ilabel);
+ if (pit != paren_id_map_.end()) { // paren?
+ Label paren_id = pit->second;
+ if (arc.ilabel == parens_[paren_id].first) { // open paren
+ for (SetIterator set_iter =
+ FindStates(paren_id, arc.nextstate);
+ !set_iter.Done(); set_iter.Next()) {
+ for (ParenArcIterator paren_arc_iter =
+ FindParenArcs(paren_id, set_iter.Element());
+ !paren_arc_iter.Done();
+ paren_arc_iter.Next()) {
+ const A &cparc = paren_arc_iter.Value();
+ UpdateStateSet(cparc.nextstate, &paren_set, &state_sets);
+ }
+ }
+ } else { // close paren
+ paren_set.insert(paren_id);
+ state_sets[paren_id].insert(s);
+ ParenState<A> paren_state(paren_id, s);
+ paren_arc_multimap_.insert(make_pair(paren_state, arc));
+ }
+ } else { // non-paren
+ UpdateStateSet(arc.nextstate, &paren_set, &state_sets);
+ }
+ }
+
+ vector<StateId> state_set;
+ for (typename set<Label>::iterator paren_iter = paren_set.begin();
+ paren_iter != paren_set.end(); ++paren_iter) {
+ state_set.clear();
+ Label paren_id = *paren_iter;
+ paren_multimap_.insert(make_pair(s, paren_id));
+ for (typename set<StateId>::iterator state_iter
+ = state_sets[paren_id].begin();
+ state_iter != state_sets[paren_id].end();
+ ++state_iter) {
+ state_set.push_back(*state_iter);
+ }
+ ParenState<A> paren_state(paren_id, s);
+ set_map_[paren_state] = state_sets_.FindId(state_set);
+ }
+}
+
+// Gather state set(s) from state 'nexts'.
+template <class A>
+void PdtParenReachable<A>::UpdateStateSet(
+ StateId nexts, set<Label> *paren_set,
+ vector< set<StateId> > *state_sets) const {
+ for(ParenIterator paren_iter = FindParens(nexts);
+ !paren_iter.Done(); paren_iter.Next()) {
+ Label paren_id = paren_iter.Value();
+ paren_set->insert(paren_id);
+ for (SetIterator set_iter = FindStates(paren_id, nexts);
+ !set_iter.Done(); set_iter.Next()) {
+ (*state_sets)[paren_id].insert(set_iter.Element());
+ }
+ }
+}
+
+
+// Store balancing parenthesis data for a PDT. Allows on-the-fly
+// construction (e.g. in PdtShortestPath) unlike PdtParenReachable above.
+template <class A>
+class PdtBalanceData {
+ public:
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+
+ // Hash set for open parens
+ typedef unordered_set<ParenState<A>, typename ParenState<A>::Hash> OpenParenSet;
+
+ // Maps from open paren destination state to parenthesis ID.
+ typedef unordered_multimap<StateId, Label> OpenParenMap;
+
+ // Maps from open paren state to source states of matching close parens
+ typedef unordered_multimap<ParenState<A>, StateId,
+ typename ParenState<A>::Hash> CloseParenMap;
+
+ // Maps from open paren state to close source set ID
+ typedef unordered_map<ParenState<A>, ssize_t,
+ typename ParenState<A>::Hash> CloseSourceMap;
+
+ typedef typename Collection<ssize_t, StateId>::SetIterator SetIterator;
+
+ PdtBalanceData() {}
+
+ void Clear() {
+ open_paren_map_.clear();
+ close_paren_map_.clear();
+ }
+
+ // Adds an open parenthesis with destination state 'open_dest'.
+ void OpenInsert(Label paren_id, StateId open_dest) {
+ ParenState<A> key(paren_id, open_dest);
+ if (!open_paren_set_.count(key)) {
+ open_paren_set_.insert(key);
+ open_paren_map_.insert(make_pair(open_dest, paren_id));
+ }
+ }
+
+ // Adds a matching closing parenthesis with source state
+ // 'close_source' that balances an open_parenthesis with destination
+ // state 'open_dest' if OpenInsert() previously called
+ // (o.w. CloseInsert() does nothing).
+ void CloseInsert(Label paren_id, StateId open_dest, StateId close_source) {
+ ParenState<A> key(paren_id, open_dest);
+ if (open_paren_set_.count(key))
+ close_paren_map_.insert(make_pair(key, close_source));
+ }
+
+ // Find close paren source states matching an open parenthesis.
+ // Methods that follow, iterate through those matching states.
+ // Should be called only after FinishInsert(open_dest).
+ SetIterator Find(Label paren_id, StateId open_dest) {
+ ParenState<A> close_key(paren_id, open_dest);
+ typename CloseSourceMap::const_iterator id_it =
+ close_source_map_.find(close_key);
+ if (id_it == close_source_map_.end()) {
+ return close_source_sets_.FindSet(-1);
+ } else {
+ return close_source_sets_.FindSet(id_it->second);
+ }
+ }
+
+ // Call when all open and close parenthesis insertions wrt open
+ // parentheses entering 'open_dest' are finished. Must be called
+ // before Find(open_dest). Stores close paren source state sets
+ // efficiently.
+ void FinishInsert(StateId open_dest) {
+ vector<StateId> close_sources;
+ for (typename OpenParenMap::iterator oit = open_paren_map_.find(open_dest);
+ oit != open_paren_map_.end() && oit->first == open_dest;) {
+ Label paren_id = oit->second;
+ close_sources.clear();
+ ParenState<A> okey(paren_id, open_dest);
+ open_paren_set_.erase(open_paren_set_.find(okey));
+ for (typename CloseParenMap::iterator cit = close_paren_map_.find(okey);
+ cit != close_paren_map_.end() && cit->first == okey;) {
+ close_sources.push_back(cit->second);
+ close_paren_map_.erase(cit++);
+ }
+ sort(close_sources.begin(), close_sources.end());
+ typename vector<StateId>::iterator unique_end =
+ unique(close_sources.begin(), close_sources.end());
+ close_sources.resize(unique_end - close_sources.begin());
+
+ if (!close_sources.empty())
+ close_source_map_[okey] = close_source_sets_.FindId(close_sources);
+ open_paren_map_.erase(oit++);
+ }
+ }
+
+ // Return a new balance data object representing the reversed balance
+ // information.
+ PdtBalanceData<A> *Reverse(StateId num_states,
+ StateId num_split,
+ StateId state_id_shift) const;
+
+ private:
+ OpenParenSet open_paren_set_; // open par. at dest?
+
+ OpenParenMap open_paren_map_; // open parens per state
+ ParenState<A> open_dest_; // cur open dest. state
+ typename OpenParenMap::const_iterator open_iter_; // cur open parens/state
+
+ CloseParenMap close_paren_map_; // close states/open
+ // paren and state
+
+ CloseSourceMap close_source_map_; // paren, state to set ID
+ mutable Collection<ssize_t, StateId> close_source_sets_;
+};
+
+// Return a new balance data object representing the reversed balance
+// information.
+template <class A>
+PdtBalanceData<A> *PdtBalanceData<A>::Reverse(
+ StateId num_states,
+ StateId num_split,
+ StateId state_id_shift) const {
+ PdtBalanceData<A> *bd = new PdtBalanceData<A>;
+ unordered_set<StateId> close_sources;
+ StateId split_size = num_states / num_split;
+
+ for (StateId i = 0; i < num_states; i+= split_size) {
+ close_sources.clear();
+
+ for (typename CloseSourceMap::const_iterator
+ sit = close_source_map_.begin();
+ sit != close_source_map_.end();
+ ++sit) {
+ ParenState<A> okey = sit->first;
+ StateId open_dest = okey.state_id;
+ Label paren_id = okey.paren_id;
+ for (SetIterator set_iter = close_source_sets_.FindSet(sit->second);
+ !set_iter.Done(); set_iter.Next()) {
+ StateId close_source = set_iter.Element();
+ if ((close_source < i) || (close_source >= i + split_size))
+ continue;
+ close_sources.insert(close_source + state_id_shift);
+ bd->OpenInsert(paren_id, close_source + state_id_shift);
+ bd->CloseInsert(paren_id, close_source + state_id_shift,
+ open_dest + state_id_shift);
+ }
+ }
+
+ for (typename unordered_set<StateId>::const_iterator it
+ = close_sources.begin();
+ it != close_sources.end();
+ ++it) {
+ bd->FinishInsert(*it);
+ }
+
+ }
+ return bd;
+}
+
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_PDT_PAREN_H_
diff --git a/src/include/fst/extensions/pdt/pdt.h b/src/include/fst/extensions/pdt/pdt.h
new file mode 100644
index 0000000..171541f
--- /dev/null
+++ b/src/include/fst/extensions/pdt/pdt.h
@@ -0,0 +1,212 @@
+// pdt.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Common classes for PDT expansion/traversal.
+
+#ifndef FST_EXTENSIONS_PDT_PDT_H__
+#define FST_EXTENSIONS_PDT_PDT_H__
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <map>
+#include <set>
+
+#include <fst/state-table.h>
+#include <fst/fst.h>
+
+namespace fst {
+
+// Provides bijection between parenthesis stacks and signed integral
+// stack IDs. Each stack ID is unique to each distinct stack. The
+// open-close parenthesis label pairs are passed in 'parens'.
+template <typename K, typename L>
+class PdtStack {
+ public:
+ typedef K StackId;
+ typedef L Label;
+
+ // The stacks are stored in a tree. The nodes are stored in vector
+ // 'nodes_'. Each node represents the top of some stack and is
+ // ID'ed by its position in the vector. Its parent node represents
+ // the stack with the top 'popped' and its children are stored in
+ // 'child_map_' accessed by stack_id and label. The paren_id is
+ // the position in 'parens' of the parenthesis for that node.
+ struct StackNode {
+ StackId parent_id;
+ size_t paren_id;
+
+ StackNode(StackId p, size_t i) : parent_id(p), paren_id(i) {}
+ };
+
+ PdtStack(const vector<pair<Label, Label> > &parens)
+ : parens_(parens), min_paren_(kNoLabel), max_paren_(kNoLabel) {
+ for (size_t i = 0; i < parens.size(); ++i) {
+ const pair<Label, Label> &p = parens[i];
+ paren_map_[p.first] = i;
+ paren_map_[p.second] = i;
+
+ if (min_paren_ == kNoLabel || p.first < min_paren_)
+ min_paren_ = p.first;
+ if (p.second < min_paren_)
+ min_paren_ = p.second;
+
+ if (max_paren_ == kNoLabel || p.first > max_paren_)
+ max_paren_ = p.first;
+ if (p.second > max_paren_)
+ max_paren_ = p.second;
+ }
+ nodes_.push_back(StackNode(-1, -1)); // Tree root.
+ }
+
+ // Returns stack ID given the current stack ID (0 if empty) and
+ // label read. 'Pushes' onto a stack if the label is an open
+ // parenthesis, returning the new stack ID. 'Pops' the stack if the
+ // label is a close parenthesis that matches the top of the stack,
+ // returning the parent stack ID. Returns -1 if label is an
+ // unmatched close parenthesis. Otherwise, returns the current stack
+ // ID.
+ StackId Find(StackId stack_id, Label label) {
+ if (min_paren_ == kNoLabel || label < min_paren_ || label > max_paren_)
+ return stack_id; // Non-paren.
+
+ typename unordered_map<Label, size_t>::const_iterator pit
+ = paren_map_.find(label);
+ if (pit == paren_map_.end()) // Non-paren.
+ return stack_id;
+ ssize_t paren_id = pit->second;
+
+ if (label == parens_[paren_id].first) { // Open paren.
+ StackId &child_id = child_map_[make_pair(stack_id, label)];
+ if (child_id == 0) { // Child not found, push label.
+ child_id = nodes_.size();
+ nodes_.push_back(StackNode(stack_id, paren_id));
+ }
+ return child_id;
+ }
+
+ const StackNode &node = nodes_[stack_id];
+ if (paren_id == node.paren_id) // Matching close paren.
+ return node.parent_id;
+
+ return -1; // Non-matching close paren.
+ }
+
+ // Returns the stack ID obtained by "popping" the label at the top
+ // of the current stack ID.
+ StackId Pop(StackId stack_id) const {
+ return nodes_[stack_id].parent_id;
+ }
+
+ // Returns the paren ID at the top of the stack for 'stack_id'
+ ssize_t Top(StackId stack_id) const {
+ return nodes_[stack_id].paren_id;
+ }
+
+ ssize_t ParenId(Label label) const {
+ typename unordered_map<Label, size_t>::const_iterator pit
+ = paren_map_.find(label);
+ if (pit == paren_map_.end()) // Non-paren.
+ return -1;
+ return pit->second;
+ }
+
+ private:
+ struct ChildHash {
+ size_t operator()(const pair<StackId, Label> &p) const {
+ return p.first + p.second * kPrime;
+ }
+ };
+
+ static const size_t kPrime;
+
+ vector<pair<Label, Label> > parens_;
+ vector<StackNode> nodes_;
+ unordered_map<Label, size_t> paren_map_;
+ unordered_map<pair<StackId, Label>,
+ StackId, ChildHash> child_map_; // Child of stack node wrt label
+ Label min_paren_; // For faster paren. check
+ Label max_paren_; // For faster paren. check
+};
+
+template <typename T, typename L>
+const size_t PdtStack<T, L>::kPrime = 7853;
+
+
+// State tuple for PDT expansion
+template <typename S, typename K>
+struct PdtStateTuple {
+ typedef S StateId;
+ typedef K StackId;
+
+ StateId state_id;
+ StackId stack_id;
+
+ PdtStateTuple()
+ : state_id(kNoStateId), stack_id(-1) {}
+
+ PdtStateTuple(StateId fs, StackId ss)
+ : state_id(fs), stack_id(ss) {}
+};
+
+// Equality of PDT state tuples.
+template <typename S, typename K>
+inline bool operator==(const PdtStateTuple<S, K>& x,
+ const PdtStateTuple<S, K>& y) {
+ if (&x == &y)
+ return true;
+ return x.state_id == y.state_id && x.stack_id == y.stack_id;
+}
+
+
+// Hash function object for PDT state tuples
+template <class T>
+class PdtStateHash {
+ public:
+ size_t operator()(const T &tuple) const {
+ return tuple.state_id + tuple.stack_id * kPrime;
+ }
+
+ private:
+ static const size_t kPrime;
+};
+
+template <typename T>
+const size_t PdtStateHash<T>::kPrime = 7853;
+
+
+// Tuple to PDT state bijection.
+template <class S, class K>
+class PdtStateTable
+ : public CompactHashStateTable<PdtStateTuple<S, K>,
+ PdtStateHash<PdtStateTuple<S, K> > > {
+ public:
+ typedef S StateId;
+ typedef K StackId;
+
+ PdtStateTable() {}
+
+ PdtStateTable(const PdtStateTable<S, K> &table) {}
+
+ private:
+ void operator=(const PdtStateTable<S, K> &table); // disallow
+};
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_PDT_PDT_H__
diff --git a/src/include/fst/extensions/pdt/pdtlib.h b/src/include/fst/extensions/pdt/pdtlib.h
new file mode 100644
index 0000000..71c8123
--- /dev/null
+++ b/src/include/fst/extensions/pdt/pdtlib.h
@@ -0,0 +1,30 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+// This is an experimental push-down transducer(PDT) library. A PDT is
+// encoded as an FST, where some transitions are labeled with open or close
+// parentheses. To be interpreted as a PDT, the parentheses must balance on a
+// path.
+
+#ifndef FST_EXTENSIONS_PDT_PDTLIB_H_
+#define FST_EXTENSIONS_PDT_PDTLIB_H_
+
+#include <fst/extensions/pdt/pdt.h>
+#include <fst/extensions/pdt/compose.h>
+#include <fst/extensions/pdt/expand.h>
+#include <fst/extensions/pdt/replace.h>
+
+#endif // FST_EXTENSIONS_PDT_PDTLIB_H_
diff --git a/src/include/fst/extensions/pdt/pdtscript.h b/src/include/fst/extensions/pdt/pdtscript.h
new file mode 100644
index 0000000..c2a1cf4
--- /dev/null
+++ b/src/include/fst/extensions/pdt/pdtscript.h
@@ -0,0 +1,284 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+// Convenience file for including all PDT operations at once, and/or
+// registering them for new arc types.
+
+#ifndef FST_EXTENSIONS_PDT_PDTSCRIPT_H_
+#define FST_EXTENSIONS_PDT_PDTSCRIPT_H_
+
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+#include <fst/compose.h> // for ComposeOptions
+#include <fst/util.h>
+
+#include <fst/script/fst-class.h>
+#include <fst/script/arg-packs.h>
+#include <fst/script/shortest-path.h>
+
+#include <fst/extensions/pdt/compose.h>
+#include <fst/extensions/pdt/expand.h>
+#include <fst/extensions/pdt/info.h>
+#include <fst/extensions/pdt/replace.h>
+#include <fst/extensions/pdt/reverse.h>
+#include <fst/extensions/pdt/shortest-path.h>
+
+
+namespace fst {
+namespace script {
+
+// PDT COMPOSE
+
+typedef args::Package<const FstClass &,
+ const FstClass &,
+ const vector<pair<int64, int64> >&,
+ MutableFstClass *,
+ const ComposeOptions &,
+ bool> PdtComposeArgs;
+
+template<class Arc>
+void PdtCompose(PdtComposeArgs *args) {
+ const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>());
+ const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg4->GetMutableFst<Arc>();
+
+ vector<pair<typename Arc::Label, typename Arc::Label> > parens(
+ args->arg3.size());
+
+ for (size_t i = 0; i < parens.size(); ++i) {
+ parens[i].first = args->arg3[i].first;
+ parens[i].second = args->arg3[i].second;
+ }
+
+ if (args->arg6) {
+ Compose(ifst1, parens, ifst2, ofst, args->arg5);
+ } else {
+ Compose(ifst1, ifst2, parens, ofst, args->arg5);
+ }
+}
+
+void PdtCompose(const FstClass & ifst1,
+ const FstClass & ifst2,
+ const vector<pair<int64, int64> > &parens,
+ MutableFstClass *ofst,
+ const ComposeOptions &copts,
+ bool left_pdt);
+
+// PDT EXPAND
+
+struct PdtExpandOptions {
+ bool connect;
+ bool keep_parentheses;
+ WeightClass weight_threshold;
+
+ PdtExpandOptions(bool c = true, bool k = false,
+ WeightClass w = WeightClass::Zero())
+ : connect(c), keep_parentheses(k), weight_threshold(w) {}
+};
+
+typedef args::Package<const FstClass &,
+ const vector<pair<int64, int64> >&,
+ MutableFstClass *, PdtExpandOptions> PdtExpandArgs;
+
+template<class Arc>
+void PdtExpand(PdtExpandArgs *args) {
+ const Fst<Arc> &fst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>();
+
+ vector<pair<typename Arc::Label, typename Arc::Label> > parens(
+ args->arg2.size());
+ for (size_t i = 0; i < parens.size(); ++i) {
+ parens[i].first = args->arg2[i].first;
+ parens[i].second = args->arg2[i].second;
+ }
+ Expand(fst, parens, ofst,
+ ExpandOptions<Arc>(
+ args->arg4.connect, args->arg4.keep_parentheses,
+ *(args->arg4.weight_threshold.GetWeight<typename Arc::Weight>())));
+}
+
+void PdtExpand(const FstClass &ifst,
+ const vector<pair<int64, int64> > &parens,
+ MutableFstClass *ofst, const PdtExpandOptions &opts);
+
+void PdtExpand(const FstClass &ifst,
+ const vector<pair<int64, int64> > &parens,
+ MutableFstClass *ofst, bool connect);
+
+// PDT REPLACE
+
+typedef args::Package<const vector<pair<int64, const FstClass*> > &,
+ MutableFstClass *,
+ vector<pair<int64, int64> > *,
+ const int64 &> PdtReplaceArgs;
+template<class Arc>
+void PdtReplace(PdtReplaceArgs *args) {
+ vector<pair<typename Arc::Label, const Fst<Arc> *> > tuples(
+ args->arg1.size());
+ for (size_t i = 0; i < tuples.size(); ++i) {
+ tuples[i].first = args->arg1[i].first;
+ tuples[i].second = (args->arg1[i].second)->GetFst<Arc>();
+ }
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+ vector<pair<typename Arc::Label, typename Arc::Label> > parens(
+ args->arg3->size());
+
+ for (size_t i = 0; i < parens.size(); ++i) {
+ parens[i].first = args->arg3->at(i).first;
+ parens[i].second = args->arg3->at(i).second;
+ }
+ Replace(tuples, ofst, &parens, args->arg4);
+
+ // now copy parens back
+ args->arg3->resize(parens.size());
+ for (size_t i = 0; i < parens.size(); ++i) {
+ (*args->arg3)[i].first = parens[i].first;
+ (*args->arg3)[i].second = parens[i].second;
+ }
+}
+
+void PdtReplace(const vector<pair<int64, const FstClass*> > &fst_tuples,
+ MutableFstClass *ofst,
+ vector<pair<int64, int64> > *parens,
+ const int64 &root);
+
+// PDT REVERSE
+
+typedef args::Package<const FstClass &,
+ const vector<pair<int64, int64> >&,
+ MutableFstClass *> PdtReverseArgs;
+
+template<class Arc>
+void PdtReverse(PdtReverseArgs *args) {
+ const Fst<Arc> &fst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>();
+
+ vector<pair<typename Arc::Label, typename Arc::Label> > parens(
+ args->arg2.size());
+ for (size_t i = 0; i < parens.size(); ++i) {
+ parens[i].first = args->arg2[i].first;
+ parens[i].second = args->arg2[i].second;
+ }
+ Reverse(fst, parens, ofst);
+}
+
+void PdtReverse(const FstClass &ifst,
+ const vector<pair<int64, int64> > &parens,
+ MutableFstClass *ofst);
+
+
+// PDT SHORTESTPATH
+
+struct PdtShortestPathOptions {
+ QueueType queue_type;
+ bool keep_parentheses;
+ bool path_gc;
+
+ PdtShortestPathOptions(QueueType qt = FIFO_QUEUE,
+ bool kp = false, bool gc = true)
+ : queue_type(qt), keep_parentheses(kp), path_gc(gc) {}
+};
+
+typedef args::Package<const FstClass &,
+ const vector<pair<int64, int64> >&,
+ MutableFstClass *,
+ const PdtShortestPathOptions &> PdtShortestPathArgs;
+
+template<class Arc>
+void PdtShortestPath(PdtShortestPathArgs *args) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ const Fst<Arc> &fst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>();
+ const PdtShortestPathOptions &opts = args->arg4;
+
+
+ vector<pair<Label, Label> > parens(args->arg2.size());
+ for (size_t i = 0; i < parens.size(); ++i) {
+ parens[i].first = args->arg2[i].first;
+ parens[i].second = args->arg2[i].second;
+ }
+
+ switch (opts.queue_type) {
+ default:
+ FSTERROR() << "Unknown queue type: " << opts.queue_type;
+ case FIFO_QUEUE: {
+ typedef FifoQueue<StateId> Queue;
+ fst::PdtShortestPathOptions<Arc, Queue> spopts(opts.keep_parentheses,
+ opts.path_gc);
+ ShortestPath(fst, parens, ofst, spopts);
+ return;
+ }
+ case LIFO_QUEUE: {
+ typedef LifoQueue<StateId> Queue;
+ fst::PdtShortestPathOptions<Arc, Queue> spopts(opts.keep_parentheses,
+ opts.path_gc);
+ ShortestPath(fst, parens, ofst, spopts);
+ return;
+ }
+ case STATE_ORDER_QUEUE: {
+ typedef StateOrderQueue<StateId> Queue;
+ fst::PdtShortestPathOptions<Arc, Queue> spopts(opts.keep_parentheses,
+ opts.path_gc);
+ ShortestPath(fst, parens, ofst, spopts);
+ return;
+ }
+ }
+}
+
+void PdtShortestPath(const FstClass &ifst,
+ const vector<pair<int64, int64> > &parens,
+ MutableFstClass *ofst,
+ const PdtShortestPathOptions &opts =
+ PdtShortestPathOptions());
+
+// PRINT INFO
+
+typedef args::Package<const FstClass &,
+ const vector<pair<int64, int64> > &> PrintPdtInfoArgs;
+
+template<class Arc>
+void PrintPdtInfo(PrintPdtInfoArgs *args) {
+ const Fst<Arc> &fst = *(args->arg1.GetFst<Arc>());
+ vector<pair<typename Arc::Label, typename Arc::Label> > parens(
+ args->arg2.size());
+ for (size_t i = 0; i < parens.size(); ++i) {
+ parens[i].first = args->arg2[i].first;
+ parens[i].second = args->arg2[i].second;
+ }
+ PdtInfo<Arc> pdtinfo(fst, parens);
+ PrintPdtInfo(pdtinfo);
+}
+
+void PrintPdtInfo(const FstClass &ifst,
+ const vector<pair<int64, int64> > &parens);
+
+} // namespace script
+} // namespace fst
+
+
+#define REGISTER_FST_PDT_OPERATIONS(ArcType) \
+ REGISTER_FST_OPERATION(PdtCompose, ArcType, PdtComposeArgs); \
+ REGISTER_FST_OPERATION(PdtExpand, ArcType, PdtExpandArgs); \
+ REGISTER_FST_OPERATION(PdtReplace, ArcType, PdtReplaceArgs); \
+ REGISTER_FST_OPERATION(PdtReverse, ArcType, PdtReverseArgs); \
+ REGISTER_FST_OPERATION(PdtShortestPath, ArcType, PdtShortestPathArgs); \
+ REGISTER_FST_OPERATION(PrintPdtInfo, ArcType, PrintPdtInfoArgs)
+#endif // FST_EXTENSIONS_PDT_PDTSCRIPT_H_
diff --git a/src/include/fst/extensions/pdt/replace.h b/src/include/fst/extensions/pdt/replace.h
new file mode 100644
index 0000000..a85d0fe
--- /dev/null
+++ b/src/include/fst/extensions/pdt/replace.h
@@ -0,0 +1,192 @@
+// replace.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Recursively replace Fst arcs with other Fst(s) returning a PDT.
+
+#ifndef FST_EXTENSIONS_PDT_REPLACE_H__
+#define FST_EXTENSIONS_PDT_REPLACE_H__
+
+#include <fst/replace.h>
+
+namespace fst {
+
+// Hash to paren IDs
+template <typename S>
+struct ReplaceParenHash {
+ size_t operator()(const pair<size_t, S> &p) const {
+ return p.first + p.second * kPrime;
+ }
+ private:
+ static const size_t kPrime = 7853;
+};
+
+template <typename S> const size_t ReplaceParenHash<S>::kPrime;
+
+// Builds a pushdown transducer (PDT) from an RTN specification
+// identical to that in fst/lib/replace.h. The result is a PDT
+// encoded as the FST 'ofst' where some transitions are labeled with
+// open or close parentheses. To be interpreted as a PDT, the parens
+// must balance on a path (see PdtExpand()). The open/close
+// parenthesis label pairs are returned in 'parens'.
+template <class Arc>
+void Replace(const vector<pair<typename Arc::Label,
+ const Fst<Arc>* > >& ifst_array,
+ MutableFst<Arc> *ofst,
+ vector<pair<typename Arc::Label,
+ typename Arc::Label> > *parens,
+ typename Arc::Label root) {
+ typedef typename Arc::Label Label;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ ofst->DeleteStates();
+ parens->clear();
+
+ unordered_map<Label, size_t> label2id;
+ for (size_t i = 0; i < ifst_array.size(); ++i)
+ label2id[ifst_array[i].first] = i;
+
+ Label max_label = kNoLabel;
+
+ deque<size_t> non_term_queue; // Queue of non-terminals to replace
+ unordered_set<Label> non_term_set; // Set of non-terminals to replace
+ non_term_queue.push_back(root);
+ non_term_set.insert(root);
+
+ // PDT state corr. to ith replace FST start state.
+ vector<StateId> fst_start(ifst_array.size(), kNoLabel);
+ // PDT state, weight pairs corr. to ith replace FST final state & weights.
+ vector< vector<pair<StateId, Weight> > > fst_final(ifst_array.size());
+
+ // Builds single Fst combining all referenced input Fsts. Leaves in the
+ // non-termnals for now. Tabulate the PDT states that correspond to
+ // the start and final states of the input Fsts.
+ for (StateId soff = 0; !non_term_queue.empty(); soff = ofst->NumStates()) {
+ Label label = non_term_queue.front();
+ non_term_queue.pop_front();
+ size_t fst_id = label2id[label];
+
+ const Fst<Arc> *ifst = ifst_array[fst_id].second;
+ for (StateIterator< Fst<Arc> > siter(*ifst);
+ !siter.Done(); siter.Next()) {
+ StateId is = siter.Value();
+ StateId os = ofst->AddState();
+ if (is == ifst->Start()) {
+ fst_start[fst_id] = os;
+ if (label == root)
+ ofst->SetStart(os);
+ }
+ if (ifst->Final(is) != Weight::Zero()) {
+ if (label == root)
+ ofst->SetFinal(os, ifst->Final(is));
+ fst_final[fst_id].push_back(make_pair(os, ifst->Final(is)));
+ }
+ for (ArcIterator< Fst<Arc> > aiter(*ifst, is);
+ !aiter.Done(); aiter.Next()) {
+ Arc arc = aiter.Value();
+ if (max_label == kNoLabel || arc.olabel > max_label)
+ max_label = arc.olabel;
+ typename unordered_map<Label, size_t>::const_iterator it =
+ label2id.find(arc.olabel);
+ if (it != label2id.end()) {
+ size_t nfst_id = it->second;
+ if (ifst_array[nfst_id].second->Start() == -1)
+ continue;
+ if (non_term_set.count(arc.olabel) == 0) {
+ non_term_queue.push_back(arc.olabel);
+ non_term_set.insert(arc.olabel);
+ }
+ }
+ arc.nextstate += soff;
+ ofst->AddArc(os, arc);
+ }
+ }
+ }
+
+ // Changes each non-terminal transition to an open parenthesis
+ // transition redirected to the PDT state that corresponds to the
+ // start state of the input FST for the non-terminal. Adds close parenthesis
+ // transitions from the PDT states corr. to the final states of the
+ // input FST for the non-terminal to the former destination state of the
+ // non-terminal transition.
+
+ typedef MutableArcIterator< MutableFst<Arc> > MIter;
+ typedef unordered_map<pair<size_t, StateId >, size_t,
+ ReplaceParenHash<StateId> > ParenMap;
+
+ // Parenthesis pair ID per fst, state pair.
+ ParenMap paren_map;
+ // # of parenthesis pairs per fst.
+ vector<size_t> nparens(ifst_array.size(), 0);
+ // Initial open parenthesis label
+ Label first_paren = max_label + 1;
+
+ for (StateIterator< Fst<Arc> > siter(*ofst);
+ !siter.Done(); siter.Next()) {
+ StateId os = siter.Value();
+ MIter *aiter = new MIter(ofst, os);
+ for (size_t n = 0; !aiter->Done(); aiter->Next(), ++n) {
+ Arc arc = aiter->Value();
+ typename unordered_map<Label, size_t>::const_iterator lit =
+ label2id.find(arc.olabel);
+ if (lit != label2id.end()) {
+ size_t nfst_id = lit->second;
+
+ // Get parentheses. Ensures distinct parenthesis pair per
+ // non-terminal and destination state but otherwise reuses them.
+ Label open_paren = kNoLabel, close_paren = kNoLabel;
+ pair<size_t, StateId> paren_key(nfst_id, arc.nextstate);
+ typename ParenMap::const_iterator pit = paren_map.find(paren_key);
+ if (pit != paren_map.end()) {
+ size_t paren_id = pit->second;
+ open_paren = (*parens)[paren_id].first;
+ close_paren = (*parens)[paren_id].second;
+ } else {
+ size_t paren_id = nparens[nfst_id]++;
+ open_paren = first_paren + 2 * paren_id;
+ close_paren = open_paren + 1;
+ paren_map[paren_key] = paren_id;
+ if (paren_id >= parens->size())
+ parens->push_back(make_pair(open_paren, close_paren));
+ }
+
+ // Sets open parenthesis.
+ Arc sarc(open_paren, open_paren, arc.weight, fst_start[nfst_id]);
+ aiter->SetValue(sarc);
+
+ // Adds close parentheses.
+ for (size_t i = 0; i < fst_final[nfst_id].size(); ++i) {
+ pair<StateId, Weight> &p = fst_final[nfst_id][i];
+ Arc farc(close_paren, close_paren, p.second, arc.nextstate);
+
+ ofst->AddArc(p.first, farc);
+ if (os == p.first) { // Invalidated iterator
+ delete aiter;
+ aiter = new MIter(ofst, os);
+ aiter->Seek(n);
+ }
+ }
+ }
+ }
+ delete aiter;
+ }
+}
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_PDT_REPLACE_H__
diff --git a/src/include/fst/extensions/pdt/reverse.h b/src/include/fst/extensions/pdt/reverse.h
new file mode 100644
index 0000000..b20e1c5
--- /dev/null
+++ b/src/include/fst/extensions/pdt/reverse.h
@@ -0,0 +1,58 @@
+// reverse.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Expand a PDT to an FST.
+
+#ifndef FST_EXTENSIONS_PDT_REVERSE_H__
+#define FST_EXTENSIONS_PDT_REVERSE_H__
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <vector>
+using std::vector;
+
+#include <fst/mutable-fst.h>
+#include <fst/relabel.h>
+#include <fst/reverse.h>
+
+namespace fst {
+
+// Reverses a pushdown transducer (PDT) encoded as an FST.
+template<class Arc, class RevArc>
+void Reverse(const Fst<Arc> &ifst,
+ const vector<pair<typename Arc::Label,
+ typename Arc::Label> > &parens,
+ MutableFst<RevArc> *ofst) {
+ typedef typename Arc::Label Label;
+
+ // Reverses FST
+ Reverse(ifst, ofst);
+
+ // Exchanges open and close parenthesis pairs
+ vector<pair<Label, Label> > relabel_pairs;
+ for (size_t i = 0; i < parens.size(); ++i) {
+ relabel_pairs.push_back(make_pair(parens[i].first, parens[i].second));
+ relabel_pairs.push_back(make_pair(parens[i].second, parens[i].first));
+ }
+ Relabel(ofst, relabel_pairs, relabel_pairs);
+}
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_PDT_REVERSE_H__
diff --git a/src/include/fst/extensions/pdt/shortest-path.h b/src/include/fst/extensions/pdt/shortest-path.h
new file mode 100644
index 0000000..e90471b
--- /dev/null
+++ b/src/include/fst/extensions/pdt/shortest-path.h
@@ -0,0 +1,790 @@
+// shortest-path.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Functions to find shortest paths in a PDT.
+
+#ifndef FST_EXTENSIONS_PDT_SHORTEST_PATH_H__
+#define FST_EXTENSIONS_PDT_SHORTEST_PATH_H__
+
+#include <fst/shortest-path.h>
+#include <fst/extensions/pdt/paren.h>
+#include <fst/extensions/pdt/pdt.h>
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <tr1/unordered_set>
+using std::tr1::unordered_set;
+using std::tr1::unordered_multiset;
+#include <stack>
+#include <vector>
+using std::vector;
+
+namespace fst {
+
+template <class Arc, class Queue>
+struct PdtShortestPathOptions {
+ bool keep_parentheses;
+ bool path_gc;
+
+ PdtShortestPathOptions(bool kp = false, bool gc = true)
+ : keep_parentheses(kp), path_gc(gc) {}
+};
+
+
+// Class to store PDT shortest path results. Stores shortest path
+// tree info 'Distance()', Parent(), and ArcParent() information keyed
+// on two types:
+// (1) By SearchState: This is a usual node in a shortest path tree but:
+// (a) is w.r.t a PDT search state - a pair of a PDT state and
+// a 'start' state, which is either the PDT start state or
+// the destination state of an open parenthesis.
+// (b) the Distance() is from this 'start' state to the search state.
+// (c) Parent().state is kNoLabel for the 'start' state.
+//
+// (2) By ParenSpec: This connects shortest path trees depending on the
+// the parenthesis taken. Given the parenthesis spec:
+// (a) the Distance() is from the Parent() 'start' state to the
+// parenthesis destination state.
+// (b) the ArcParent() is the parenthesis arc.
+template <class Arc>
+class PdtShortestPathData {
+ public:
+ static const uint8 kFinal;
+
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::Label Label;
+
+ struct SearchState {
+ SearchState() : state(kNoStateId), start(kNoStateId) {}
+
+ SearchState(StateId s, StateId t) : state(s), start(t) {}
+
+ bool operator==(const SearchState &s) const {
+ if (&s == this)
+ return true;
+ return s.state == this->state && s.start == this->start;
+ }
+
+ StateId state; // PDT state
+ StateId start; // PDT paren 'source' state
+ };
+
+
+ // Specifies paren id, source and dest 'start' states of a paren.
+ // These are the 'start' states of the respective sub-graphs.
+ struct ParenSpec {
+ ParenSpec()
+ : paren_id(kNoLabel), src_start(kNoStateId), dest_start(kNoStateId) {}
+
+ ParenSpec(Label id, StateId s, StateId d)
+ : paren_id(id), src_start(s), dest_start(d) {}
+
+ Label paren_id; // Id of parenthesis
+ StateId src_start; // sub-graph 'start' state for paren source.
+ StateId dest_start; // sub-graph 'start' state for paren dest.
+
+ bool operator==(const ParenSpec &x) const {
+ if (&x == this)
+ return true;
+ return x.paren_id == this->paren_id &&
+ x.src_start == this->src_start &&
+ x.dest_start == this->dest_start;
+ }
+ };
+
+ struct SearchData {
+ SearchData() : distance(Weight::Zero()),
+ parent(kNoStateId, kNoStateId),
+ paren_id(kNoLabel),
+ flags(0) {}
+
+ Weight distance; // Distance to this state from PDT 'start' state
+ SearchState parent; // Parent state in shortest path tree
+ int16 paren_id; // If parent arc has paren, paren ID, o.w. kNoLabel
+ uint8 flags; // First byte reserved for PdtShortestPathData use
+ };
+
+ PdtShortestPathData(bool gc)
+ : state_(kNoStateId, kNoStateId),
+ paren_(kNoLabel, kNoStateId, kNoStateId),
+ gc_(gc),
+ nstates_(0),
+ ngc_(0),
+ finished_(false) {}
+
+ ~PdtShortestPathData() {
+ VLOG(1) << "opm size: " << paren_map_.size();
+ VLOG(1) << "# of search states: " << nstates_;
+ if (gc_)
+ VLOG(1) << "# of GC'd search states: " << ngc_;
+ }
+
+ void Clear() {
+ search_map_.clear();
+ search_multimap_.clear();
+ paren_map_.clear();
+ state_ = SearchState(kNoStateId, kNoStateId);
+ nstates_ = 0;
+ ngc_ = 0;
+ }
+
+ Weight Distance(SearchState s) const {
+ SearchData *data = GetSearchData(s);
+ return data->distance;
+ }
+
+ Weight Distance(const ParenSpec &paren) const {
+ SearchData *data = GetSearchData(paren);
+ return data->distance;
+ }
+
+ SearchState Parent(SearchState s) const {
+ SearchData *data = GetSearchData(s);
+ return data->parent;
+ }
+
+ SearchState Parent(const ParenSpec &paren) const {
+ SearchData *data = GetSearchData(paren);
+ return data->parent;
+ }
+
+ Label ParenId(SearchState s) const {
+ SearchData *data = GetSearchData(s);
+ return data->paren_id;
+ }
+
+ uint8 Flags(SearchState s) const {
+ SearchData *data = GetSearchData(s);
+ return data->flags;
+ }
+
+ void SetDistance(SearchState s, Weight w) {
+ SearchData *data = GetSearchData(s);
+ data->distance = w;
+ }
+
+ void SetDistance(const ParenSpec &paren, Weight w) {
+ SearchData *data = GetSearchData(paren);
+ data->distance = w;
+ }
+
+ void SetParent(SearchState s, SearchState p) {
+ SearchData *data = GetSearchData(s);
+ data->parent = p;
+ }
+
+ void SetParent(const ParenSpec &paren, SearchState p) {
+ SearchData *data = GetSearchData(paren);
+ data->parent = p;
+ }
+
+ void SetParenId(SearchState s, Label p) {
+ if (p >= 32768)
+ FSTERROR() << "PdtShortestPathData: Paren ID does not fits in an int16";
+ SearchData *data = GetSearchData(s);
+ data->paren_id = p;
+ }
+
+ void SetFlags(SearchState s, uint8 f, uint8 mask) {
+ SearchData *data = GetSearchData(s);
+ data->flags &= ~mask;
+ data->flags |= f & mask;
+ }
+
+ void GC(StateId s);
+
+ void Finish() { finished_ = true; }
+
+ private:
+ static const Arc kNoArc;
+ static const size_t kPrime0;
+ static const size_t kPrime1;
+ static const uint8 kInited;
+ static const uint8 kMarked;
+
+ // Hash for search state
+ struct SearchStateHash {
+ size_t operator()(const SearchState &s) const {
+ return s.state + s.start * kPrime0;
+ }
+ };
+
+ // Hash for paren map
+ struct ParenHash {
+ size_t operator()(const ParenSpec &paren) const {
+ return paren.paren_id + paren.src_start * kPrime0 +
+ paren.dest_start * kPrime1;
+ }
+ };
+
+ typedef unordered_map<SearchState, SearchData, SearchStateHash> SearchMap;
+
+ typedef unordered_multimap<StateId, StateId> SearchMultimap;
+
+ // Hash map from paren spec to open paren data
+ typedef unordered_map<ParenSpec, SearchData, ParenHash> ParenMap;
+
+ SearchData *GetSearchData(SearchState s) const {
+ if (s == state_)
+ return state_data_;
+ if (finished_) {
+ typename SearchMap::iterator it = search_map_.find(s);
+ if (it == search_map_.end())
+ return &null_search_data_;
+ state_ = s;
+ return state_data_ = &(it->second);
+ } else {
+ state_ = s;
+ state_data_ = &search_map_[s];
+ if (!(state_data_->flags & kInited)) {
+ ++nstates_;
+ if (gc_)
+ search_multimap_.insert(make_pair(s.start, s.state));
+ state_data_->flags = kInited;
+ }
+ return state_data_;
+ }
+ }
+
+ SearchData *GetSearchData(ParenSpec paren) const {
+ if (paren == paren_)
+ return paren_data_;
+ if (finished_) {
+ typename ParenMap::iterator it = paren_map_.find(paren);
+ if (it == paren_map_.end())
+ return &null_search_data_;
+ paren_ = paren;
+ return state_data_ = &(it->second);
+ } else {
+ paren_ = paren;
+ return paren_data_ = &paren_map_[paren];
+ }
+ }
+
+ mutable SearchMap search_map_; // Maps from search state to data
+ mutable SearchMultimap search_multimap_; // Maps from 'start' to subgraph
+ mutable ParenMap paren_map_; // Maps paren spec to search data
+ mutable SearchState state_; // Last state accessed
+ mutable SearchData *state_data_; // Last state data accessed
+ mutable ParenSpec paren_; // Last paren spec accessed
+ mutable SearchData *paren_data_; // Last paren data accessed
+ bool gc_; // Allow GC?
+ mutable size_t nstates_; // Total number of search states
+ size_t ngc_; // Number of GC'd search states
+ mutable SearchData null_search_data_; // Null search data
+ bool finished_; // Read-only access when true
+
+ DISALLOW_COPY_AND_ASSIGN(PdtShortestPathData);
+};
+
+// Deletes inaccessible search data from a given 'start' (open paren dest)
+// state. Assumes 'final' (close paren source or PDT final) states have
+// been flagged 'kFinal'.
+template<class Arc>
+void PdtShortestPathData<Arc>::GC(StateId start) {
+ if (!gc_)
+ return;
+ vector<StateId> final;
+ for (typename SearchMultimap::iterator mmit = search_multimap_.find(start);
+ mmit != search_multimap_.end() && mmit->first == start;
+ ++mmit) {
+ SearchState s(mmit->second, start);
+ const SearchData &data = search_map_[s];
+ if (data.flags & kFinal)
+ final.push_back(s.state);
+ }
+
+ // Mark phase
+ for (size_t i = 0; i < final.size(); ++i) {
+ SearchState s(final[i], start);
+ while (s.state != kNoLabel) {
+ SearchData *sdata = &search_map_[s];
+ if (sdata->flags & kMarked)
+ break;
+ sdata->flags |= kMarked;
+ SearchState p = sdata->parent;
+ if (p.start != start && p.start != kNoLabel) { // entering sub-subgraph
+ ParenSpec paren(sdata->paren_id, s.start, p.start);
+ SearchData *pdata = &paren_map_[paren];
+ s = pdata->parent;
+ } else {
+ s = p;
+ }
+ }
+ }
+
+ // Sweep phase
+ typename SearchMultimap::iterator mmit = search_multimap_.find(start);
+ while (mmit != search_multimap_.end() && mmit->first == start) {
+ SearchState s(mmit->second, start);
+ typename SearchMap::iterator mit = search_map_.find(s);
+ const SearchData &data = mit->second;
+ if (!(data.flags & kMarked)) {
+ search_map_.erase(mit);
+ ++ngc_;
+ }
+ search_multimap_.erase(mmit++);
+ }
+}
+
+template<class Arc> const Arc PdtShortestPathData<Arc>::kNoArc
+ = Arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId);
+
+template<class Arc> const size_t PdtShortestPathData<Arc>::kPrime0 = 7853;
+
+template<class Arc> const size_t PdtShortestPathData<Arc>::kPrime1 = 7867;
+
+template<class Arc> const uint8 PdtShortestPathData<Arc>::kInited = 0x01;
+
+template<class Arc> const uint8 PdtShortestPathData<Arc>::kFinal = 0x02;
+
+template<class Arc> const uint8 PdtShortestPathData<Arc>::kMarked = 0x04;
+
+
+// This computes the single source shortest (balanced) path (SSSP)
+// through a weighted PDT that has a bounded stack (i.e. is expandable
+// as an FST). It is a generalization of the classic SSSP graph
+// algorithm that removes a state s from a queue (defined by a
+// user-provided queue type) and relaxes the destination states of
+// transitions leaving s. In this PDT version, states that have
+// entering open parentheses are treated as source states for a
+// sub-graph SSSP problem with the shortest path up to the open
+// parenthesis being first saved. When a close parenthesis is then
+// encountered any balancing open parenthesis is examined for this
+// saved information and multiplied back. In this way, each sub-graph
+// is entered only once rather than repeatedly. If every state in the
+// input PDT has the property that there is a unique 'start' state for
+// it with entering open parentheses, then this algorithm is quite
+// straight-forward. In general, this will not be the case, so the
+// algorithm (implicitly) creates a new graph where each state is a
+// pair of an original state and a possible parenthesis 'start' state
+// for that state.
+template<class Arc, class Queue>
+class PdtShortestPath {
+ public:
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::Label Label;
+
+ typedef PdtShortestPathData<Arc> SpData;
+ typedef typename SpData::SearchState SearchState;
+ typedef typename SpData::ParenSpec ParenSpec;
+
+ typedef typename PdtParenReachable<Arc>::SetIterator StateSetIterator;
+ typedef typename PdtBalanceData<Arc>::SetIterator CloseSourceIterator;
+
+ PdtShortestPath(const Fst<Arc> &ifst,
+ const vector<pair<Label, Label> > &parens,
+ const PdtShortestPathOptions<Arc, Queue> &opts)
+ : kFinal(SpData::kFinal),
+ ifst_(ifst.Copy()),
+ parens_(parens),
+ keep_parens_(opts.keep_parentheses),
+ start_(ifst.Start()),
+ sp_data_(opts.path_gc),
+ error_(false) {
+
+ if ((Weight::Properties() & (kPath | kRightSemiring))
+ != (kPath | kRightSemiring)) {
+ FSTERROR() << "SingleShortestPath: Weight needs to have the path"
+ << " property and be right distributive: " << Weight::Type();
+ error_ = true;
+ }
+
+ for (Label i = 0; i < parens.size(); ++i) {
+ const pair<Label, Label> &p = parens[i];
+ paren_id_map_[p.first] = i;
+ paren_id_map_[p.second] = i;
+ }
+ };
+
+ ~PdtShortestPath() {
+ VLOG(1) << "# of input states: " << CountStates(*ifst_);
+ VLOG(1) << "# of enqueued: " << nenqueued_;
+ VLOG(1) << "cpmm size: " << close_paren_multimap_.size();
+ delete ifst_;
+ }
+
+ void ShortestPath(MutableFst<Arc> *ofst) {
+ Init(ofst);
+ GetDistance(start_);
+ GetPath();
+ sp_data_.Finish();
+ if (error_) ofst->SetProperties(kError, kError);
+ }
+
+ const PdtShortestPathData<Arc> &GetShortestPathData() const {
+ return sp_data_;
+ }
+
+ PdtBalanceData<Arc> *GetBalanceData() { return &balance_data_; }
+
+ private:
+ static const Arc kNoArc;
+ static const uint8 kEnqueued;
+ static const uint8 kExpanded;
+ const uint8 kFinal;
+
+ public:
+ // Hash multimap from close paren label to an paren arc.
+ typedef unordered_multimap<ParenState<Arc>, Arc,
+ typename ParenState<Arc>::Hash> CloseParenMultimap;
+
+ const CloseParenMultimap &GetCloseParenMultimap() const {
+ return close_paren_multimap_;
+ }
+
+ private:
+ void Init(MutableFst<Arc> *ofst);
+ void GetDistance(StateId start);
+ void ProcFinal(SearchState s);
+ void ProcArcs(SearchState s);
+ void ProcOpenParen(Label paren_id, SearchState s, Arc arc, Weight w);
+ void ProcCloseParen(Label paren_id, SearchState s, const Arc &arc, Weight w);
+ void ProcNonParen(SearchState s, const Arc &arc, Weight w);
+ void Relax(SearchState s, SearchState t, Arc arc, Weight w, Label paren_id);
+ void Enqueue(SearchState d);
+ void GetPath();
+ Arc GetPathArc(SearchState s, SearchState p, Label paren_id, bool open);
+
+ Fst<Arc> *ifst_;
+ MutableFst<Arc> *ofst_;
+ const vector<pair<Label, Label> > &parens_;
+ bool keep_parens_;
+ Queue *state_queue_; // current state queue
+ StateId start_;
+ Weight f_distance_;
+ SearchState f_parent_;
+ SpData sp_data_;
+ unordered_map<Label, Label> paren_id_map_;
+ CloseParenMultimap close_paren_multimap_;
+ PdtBalanceData<Arc> balance_data_;
+ ssize_t nenqueued_;
+ bool error_;
+
+ DISALLOW_COPY_AND_ASSIGN(PdtShortestPath);
+};
+
+template<class Arc, class Queue>
+void PdtShortestPath<Arc, Queue>::Init(MutableFst<Arc> *ofst) {
+ ofst_ = ofst;
+ ofst->DeleteStates();
+ ofst->SetInputSymbols(ifst_->InputSymbols());
+ ofst->SetOutputSymbols(ifst_->OutputSymbols());
+
+ if (ifst_->Start() == kNoStateId)
+ return;
+
+ f_distance_ = Weight::Zero();
+ f_parent_ = SearchState(kNoStateId, kNoStateId);
+
+ sp_data_.Clear();
+ close_paren_multimap_.clear();
+ balance_data_.Clear();
+ nenqueued_ = 0;
+
+ // Find open parens per destination state and close parens per source state.
+ for (StateIterator<Fst<Arc> > siter(*ifst_); !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ for (ArcIterator<Fst<Arc> > aiter(*ifst_, s);
+ !aiter.Done(); aiter.Next()) {
+ const Arc &arc = aiter.Value();
+ typename unordered_map<Label, Label>::const_iterator pit
+ = paren_id_map_.find(arc.ilabel);
+ if (pit != paren_id_map_.end()) { // Is a paren?
+ Label paren_id = pit->second;
+ if (arc.ilabel == parens_[paren_id].first) { // Open paren
+ balance_data_.OpenInsert(paren_id, arc.nextstate);
+ } else { // Close paren
+ ParenState<Arc> paren_state(paren_id, s);
+ close_paren_multimap_.insert(make_pair(paren_state, arc));
+ }
+ }
+ }
+ }
+}
+
+// Computes the shortest distance stored in a recursive way. Each
+// sub-graph (i.e. different paren 'start' state) begins with weight One().
+template<class Arc, class Queue>
+void PdtShortestPath<Arc, Queue>::GetDistance(StateId start) {
+ if (start == kNoStateId)
+ return;
+
+ Queue state_queue;
+ state_queue_ = &state_queue;
+ SearchState q(start, start);
+ Enqueue(q);
+ sp_data_.SetDistance(q, Weight::One());
+
+ while (!state_queue_->Empty()) {
+ StateId state = state_queue_->Head();
+ state_queue_->Dequeue();
+ SearchState s(state, start);
+ sp_data_.SetFlags(s, 0, kEnqueued);
+ ProcFinal(s);
+ ProcArcs(s);
+ sp_data_.SetFlags(s, kExpanded, kExpanded);
+ }
+ balance_data_.FinishInsert(start);
+ sp_data_.GC(start);
+}
+
+// Updates best complete path.
+template<class Arc, class Queue>
+void PdtShortestPath<Arc, Queue>::ProcFinal(SearchState s) {
+ if (ifst_->Final(s.state) != Weight::Zero() && s.start == start_) {
+ Weight w = Times(sp_data_.Distance(s),
+ ifst_->Final(s.state));
+ if (f_distance_ != Plus(f_distance_, w)) {
+ if (f_parent_.state != kNoStateId)
+ sp_data_.SetFlags(f_parent_, 0, kFinal);
+ sp_data_.SetFlags(s, kFinal, kFinal);
+
+ f_distance_ = Plus(f_distance_, w);
+ f_parent_ = s;
+ }
+ }
+}
+
+// Processes all arcs leaving the state s.
+template<class Arc, class Queue>
+void PdtShortestPath<Arc, Queue>::ProcArcs(SearchState s) {
+ for (ArcIterator< Fst<Arc> > aiter(*ifst_, s.state);
+ !aiter.Done();
+ aiter.Next()) {
+ Arc arc = aiter.Value();
+ Weight w = Times(sp_data_.Distance(s), arc.weight);
+
+ typename unordered_map<Label, Label>::const_iterator pit
+ = paren_id_map_.find(arc.ilabel);
+ if (pit != paren_id_map_.end()) { // Is a paren?
+ Label paren_id = pit->second;
+ if (arc.ilabel == parens_[paren_id].first)
+ ProcOpenParen(paren_id, s, arc, w);
+ else
+ ProcCloseParen(paren_id, s, arc, w);
+ } else {
+ ProcNonParen(s, arc, w);
+ }
+ }
+}
+
+// Saves the shortest path info for reaching this parenthesis
+// and starts a new SSSP in the sub-graph pointed to by the parenthesis
+// if previously unvisited. Otherwise it finds any previously encountered
+// closing parentheses and relaxes them using the recursively stored
+// shortest distance to them.
+template<class Arc, class Queue> inline
+void PdtShortestPath<Arc, Queue>::ProcOpenParen(
+ Label paren_id, SearchState s, Arc arc, Weight w) {
+
+ SearchState d(arc.nextstate, arc.nextstate);
+ ParenSpec paren(paren_id, s.start, d.start);
+ Weight pdist = sp_data_.Distance(paren);
+ if (pdist != Plus(pdist, w)) {
+ sp_data_.SetDistance(paren, w);
+ sp_data_.SetParent(paren, s);
+ Weight dist = sp_data_.Distance(d);
+ if (dist == Weight::Zero()) {
+ Queue *state_queue = state_queue_;
+ GetDistance(d.start);
+ state_queue_ = state_queue;
+ }
+ for (CloseSourceIterator set_iter =
+ balance_data_.Find(paren_id, arc.nextstate);
+ !set_iter.Done(); set_iter.Next()) {
+ SearchState cpstate(set_iter.Element(), d.start);
+ ParenState<Arc> paren_state(paren_id, cpstate.state);
+ for (typename CloseParenMultimap::const_iterator cpit =
+ close_paren_multimap_.find(paren_state);
+ cpit != close_paren_multimap_.end() && paren_state == cpit->first;
+ ++cpit) {
+ const Arc &cparc = cpit->second;
+ Weight cpw = Times(w, Times(sp_data_.Distance(cpstate),
+ cparc.weight));
+ Relax(cpstate, s, cparc, cpw, paren_id);
+ }
+ }
+ }
+}
+
+// Saves the correspondence between each closing parenthesis and its
+// balancing open parenthesis info. Relaxes any close parenthesis
+// destination state that has a balancing previously encountered open
+// parenthesis.
+template<class Arc, class Queue> inline
+void PdtShortestPath<Arc, Queue>::ProcCloseParen(
+ Label paren_id, SearchState s, const Arc &arc, Weight w) {
+ ParenState<Arc> paren_state(paren_id, s.start);
+ if (!(sp_data_.Flags(s) & kExpanded)) {
+ balance_data_.CloseInsert(paren_id, s.start, s.state);
+ sp_data_.SetFlags(s, kFinal, kFinal);
+ }
+}
+
+// For non-parentheses, classical relaxation.
+template<class Arc, class Queue> inline
+void PdtShortestPath<Arc, Queue>::ProcNonParen(
+ SearchState s, const Arc &arc, Weight w) {
+ Relax(s, s, arc, w, kNoLabel);
+}
+
+// Classical relaxation on the search graph for 'arc' from state 's'.
+// State 't' is in the same sub-graph as the nextstate should be (i.e.
+// has the same paren 'start'.
+template<class Arc, class Queue> inline
+void PdtShortestPath<Arc, Queue>::Relax(
+ SearchState s, SearchState t, Arc arc, Weight w, Label paren_id) {
+ SearchState d(arc.nextstate, t.start);
+ Weight dist = sp_data_.Distance(d);
+ if (dist != Plus(dist, w)) {
+ sp_data_.SetParent(d, s);
+ sp_data_.SetParenId(d, paren_id);
+ sp_data_.SetDistance(d, Plus(dist, w));
+ Enqueue(d);
+ }
+}
+
+template<class Arc, class Queue> inline
+void PdtShortestPath<Arc, Queue>::Enqueue(SearchState s) {
+ if (!(sp_data_.Flags(s) & kEnqueued)) {
+ state_queue_->Enqueue(s.state);
+ sp_data_.SetFlags(s, kEnqueued, kEnqueued);
+ ++nenqueued_;
+ } else {
+ state_queue_->Update(s.state);
+ }
+}
+
+// Follows parent pointers to find the shortest path. Uses a stack
+// since the shortest distance is stored recursively.
+template<class Arc, class Queue>
+void PdtShortestPath<Arc, Queue>::GetPath() {
+ SearchState s = f_parent_, d = SearchState(kNoStateId, kNoStateId);
+ StateId s_p = kNoStateId, d_p = kNoStateId;
+ Arc arc(kNoArc);
+ Label paren_id = kNoLabel;
+ stack<ParenSpec> paren_stack;
+ while (s.state != kNoStateId) {
+ d_p = s_p;
+ s_p = ofst_->AddState();
+ if (d.state == kNoStateId) {
+ ofst_->SetFinal(s_p, ifst_->Final(f_parent_.state));
+ } else {
+ if (paren_id != kNoLabel) { // paren?
+ if (arc.ilabel == parens_[paren_id].first) { // open paren
+ paren_stack.pop();
+ } else { // close paren
+ ParenSpec paren(paren_id, d.start, s.start);
+ paren_stack.push(paren);
+ }
+ if (!keep_parens_)
+ arc.ilabel = arc.olabel = 0;
+ }
+ arc.nextstate = d_p;
+ ofst_->AddArc(s_p, arc);
+ }
+ d = s;
+ s = sp_data_.Parent(d);
+ paren_id = sp_data_.ParenId(d);
+ if (s.state != kNoStateId) {
+ arc = GetPathArc(s, d, paren_id, false);
+ } else if (!paren_stack.empty()) {
+ ParenSpec paren = paren_stack.top();
+ s = sp_data_.Parent(paren);
+ paren_id = paren.paren_id;
+ arc = GetPathArc(s, d, paren_id, true);
+ }
+ }
+ ofst_->SetStart(s_p);
+ ofst_->SetProperties(
+ ShortestPathProperties(ofst_->Properties(kFstProperties, false)),
+ kFstProperties);
+}
+
+
+// Finds transition with least weight between two states with label matching
+// paren_id and open/close paren type or a non-paren if kNoLabel.
+template<class Arc, class Queue>
+Arc PdtShortestPath<Arc, Queue>::GetPathArc(
+ SearchState s, SearchState d, Label paren_id, bool open_paren) {
+ Arc path_arc = kNoArc;
+ for (ArcIterator< Fst<Arc> > aiter(*ifst_, s.state);
+ !aiter.Done();
+ aiter.Next()) {
+ const Arc &arc = aiter.Value();
+ if (arc.nextstate != d.state)
+ continue;
+ Label arc_paren_id = kNoLabel;
+ typename unordered_map<Label, Label>::const_iterator pit
+ = paren_id_map_.find(arc.ilabel);
+ if (pit != paren_id_map_.end()) {
+ arc_paren_id = pit->second;
+ bool arc_open_paren = arc.ilabel == parens_[arc_paren_id].first;
+ if (arc_open_paren != open_paren)
+ continue;
+ }
+ if (arc_paren_id != paren_id)
+ continue;
+ if (arc.weight == Plus(arc.weight, path_arc.weight))
+ path_arc = arc;
+ }
+ if (path_arc.nextstate == kNoStateId) {
+ FSTERROR() << "PdtShortestPath::GetPathArc failed to find arc";
+ error_ = true;
+ }
+ return path_arc;
+}
+
+template<class Arc, class Queue>
+const Arc PdtShortestPath<Arc, Queue>::kNoArc
+ = Arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId);
+
+template<class Arc, class Queue>
+const uint8 PdtShortestPath<Arc, Queue>::kEnqueued = 0x10;
+
+template<class Arc, class Queue>
+const uint8 PdtShortestPath<Arc, Queue>::kExpanded = 0x20;
+
+template<class Arc, class Queue>
+void ShortestPath(const Fst<Arc> &ifst,
+ const vector<pair<typename Arc::Label,
+ typename Arc::Label> > &parens,
+ MutableFst<Arc> *ofst,
+ const PdtShortestPathOptions<Arc, Queue> &opts) {
+ PdtShortestPath<Arc, Queue> psp(ifst, parens, opts);
+ psp.ShortestPath(ofst);
+}
+
+template<class Arc>
+void ShortestPath(const Fst<Arc> &ifst,
+ const vector<pair<typename Arc::Label,
+ typename Arc::Label> > &parens,
+ MutableFst<Arc> *ofst) {
+ typedef FifoQueue<typename Arc::StateId> Queue;
+ PdtShortestPathOptions<Arc, Queue> opts;
+ PdtShortestPath<Arc, Queue> psp(ifst, parens, opts);
+ psp.ShortestPath(ofst);
+}
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_PDT_SHORTEST_PATH_H__
diff --git a/src/include/fst/factor-weight.h b/src/include/fst/factor-weight.h
new file mode 100644
index 0000000..ce0d58d
--- /dev/null
+++ b/src/include/fst/factor-weight.h
@@ -0,0 +1,476 @@
+// factor-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Classes to factor weights in an FST.
+
+#ifndef FST_LIB_FACTOR_WEIGHT_H__
+#define FST_LIB_FACTOR_WEIGHT_H__
+
+#include <algorithm>
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <fst/slist.h>
+#include <string>
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+const uint32 kFactorFinalWeights = 0x00000001;
+const uint32 kFactorArcWeights = 0x00000002;
+
+template <class Arc>
+struct FactorWeightOptions : CacheOptions {
+ typedef typename Arc::Label Label;
+ float delta;
+ uint32 mode; // factor arc weights and/or final weights
+ Label final_ilabel; // input label of arc created when factoring final w's
+ Label final_olabel; // output label of arc created when factoring final w's
+
+ FactorWeightOptions(const CacheOptions &opts, float d,
+ uint32 m = kFactorArcWeights | kFactorFinalWeights,
+ Label il = 0, Label ol = 0)
+ : CacheOptions(opts), delta(d), mode(m), final_ilabel(il),
+ final_olabel(ol) {}
+
+ explicit FactorWeightOptions(
+ float d, uint32 m = kFactorArcWeights | kFactorFinalWeights,
+ Label il = 0, Label ol = 0)
+ : delta(d), mode(m), final_ilabel(il), final_olabel(ol) {}
+
+ FactorWeightOptions(uint32 m = kFactorArcWeights | kFactorFinalWeights,
+ Label il = 0, Label ol = 0)
+ : delta(kDelta), mode(m), final_ilabel(il), final_olabel(ol) {}
+};
+
+
+// A factor iterator takes as argument a weight w and returns a
+// sequence of pairs of weights (xi,yi) such that the sum of the
+// products xi times yi is equal to w. If w is fully factored,
+// the iterator should return nothing.
+//
+// template <class W>
+// class FactorIterator {
+// public:
+// FactorIterator(W w);
+// bool Done() const;
+// void Next();
+// pair<W, W> Value() const;
+// void Reset();
+// }
+
+
+// Factor trivially.
+template <class W>
+class IdentityFactor {
+ public:
+ IdentityFactor(const W &w) {}
+ bool Done() const { return true; }
+ void Next() {}
+ pair<W, W> Value() const { return make_pair(W::One(), W::One()); } // unused
+ void Reset() {}
+};
+
+
+// Factor a StringWeight w as 'ab' where 'a' is a label.
+template <typename L, StringType S = STRING_LEFT>
+class StringFactor {
+ public:
+ StringFactor(const StringWeight<L, S> &w)
+ : weight_(w), done_(w.Size() <= 1) {}
+
+ bool Done() const { return done_; }
+
+ void Next() { done_ = true; }
+
+ pair< StringWeight<L, S>, StringWeight<L, S> > Value() const {
+ StringWeightIterator<L, S> iter(weight_);
+ StringWeight<L, S> w1(iter.Value());
+ StringWeight<L, S> w2;
+ for (iter.Next(); !iter.Done(); iter.Next())
+ w2.PushBack(iter.Value());
+ return make_pair(w1, w2);
+ }
+
+ void Reset() { done_ = weight_.Size() <= 1; }
+
+ private:
+ StringWeight<L, S> weight_;
+ bool done_;
+};
+
+
+// Factor a GallicWeight using StringFactor.
+template <class L, class W, StringType S = STRING_LEFT>
+class GallicFactor {
+ public:
+ GallicFactor(const GallicWeight<L, W, S> &w)
+ : weight_(w), done_(w.Value1().Size() <= 1) {}
+
+ bool Done() const { return done_; }
+
+ void Next() { done_ = true; }
+
+ pair< GallicWeight<L, W, S>, GallicWeight<L, W, S> > Value() const {
+ StringFactor<L, S> iter(weight_.Value1());
+ GallicWeight<L, W, S> w1(iter.Value().first, weight_.Value2());
+ GallicWeight<L, W, S> w2(iter.Value().second, W::One());
+ return make_pair(w1, w2);
+ }
+
+ void Reset() { done_ = weight_.Value1().Size() <= 1; }
+
+ private:
+ GallicWeight<L, W, S> weight_;
+ bool done_;
+};
+
+
+// Implementation class for FactorWeight
+template <class A, class F>
+class FactorWeightFstImpl
+ : public CacheImpl<A> {
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+
+ using CacheBaseImpl< CacheState<A> >::PushArc;
+ using CacheBaseImpl< CacheState<A> >::HasStart;
+ using CacheBaseImpl< CacheState<A> >::HasFinal;
+ using CacheBaseImpl< CacheState<A> >::HasArcs;
+ using CacheBaseImpl< CacheState<A> >::SetArcs;
+ using CacheBaseImpl< CacheState<A> >::SetFinal;
+ using CacheBaseImpl< CacheState<A> >::SetStart;
+
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef F FactorIterator;
+
+ struct Element {
+ Element() {}
+
+ Element(StateId s, Weight w) : state(s), weight(w) {}
+
+ StateId state; // Input state Id
+ Weight weight; // Residual weight
+ };
+
+ FactorWeightFstImpl(const Fst<A> &fst, const FactorWeightOptions<A> &opts)
+ : CacheImpl<A>(opts),
+ fst_(fst.Copy()),
+ delta_(opts.delta),
+ mode_(opts.mode),
+ final_ilabel_(opts.final_ilabel),
+ final_olabel_(opts.final_olabel) {
+ SetType("factor_weight");
+ uint64 props = fst.Properties(kFstProperties, false);
+ SetProperties(FactorWeightProperties(props), kCopyProperties);
+
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+
+ if (mode_ == 0)
+ LOG(WARNING) << "FactorWeightFst: factor mode is set to 0: "
+ << "factoring neither arc weights nor final weights.";
+ }
+
+ FactorWeightFstImpl(const FactorWeightFstImpl<A, F> &impl)
+ : CacheImpl<A>(impl),
+ fst_(impl.fst_->Copy(true)),
+ delta_(impl.delta_),
+ mode_(impl.mode_),
+ final_ilabel_(impl.final_ilabel_),
+ final_olabel_(impl.final_olabel_) {
+ SetType("factor_weight");
+ SetProperties(impl.Properties(), kCopyProperties);
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ ~FactorWeightFstImpl() {
+ delete fst_;
+ }
+
+ StateId Start() {
+ if (!HasStart()) {
+ StateId s = fst_->Start();
+ if (s == kNoStateId)
+ return kNoStateId;
+ StateId start = FindState(Element(fst_->Start(), Weight::One()));
+ SetStart(start);
+ }
+ return CacheImpl<A>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ const Element &e = elements_[s];
+ // TODO: fix so cast is unnecessary
+ Weight w = e.state == kNoStateId
+ ? e.weight
+ : (Weight) Times(e.weight, fst_->Final(e.state));
+ FactorIterator f(w);
+ if (!(mode_ & kFactorFinalWeights) || f.Done())
+ SetFinal(s, w);
+ else
+ SetFinal(s, Weight::Zero());
+ }
+ return CacheImpl<A>::Final(s);
+ }
+
+ size_t NumArcs(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumArcs(s);
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumOutputEpsilons(s);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) && fst_->Properties(kError, false))
+ SetProperties(kError, kError);
+ return FstImpl<Arc>::Properties(mask);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
+ if (!HasArcs(s))
+ Expand(s);
+ CacheImpl<A>::InitArcIterator(s, data);
+ }
+
+
+ // Find state corresponding to an element. Create new state
+ // if element not found.
+ StateId FindState(const Element &e) {
+ if (!(mode_ & kFactorArcWeights) && e.weight == Weight::One()) {
+ while (unfactored_.size() <= e.state)
+ unfactored_.push_back(kNoStateId);
+ if (unfactored_[e.state] == kNoStateId) {
+ unfactored_[e.state] = elements_.size();
+ elements_.push_back(e);
+ }
+ return unfactored_[e.state];
+ } else {
+ typename ElementMap::iterator eit = element_map_.find(e);
+ if (eit != element_map_.end()) {
+ return (*eit).second;
+ } else {
+ StateId s = elements_.size();
+ elements_.push_back(e);
+ element_map_.insert(pair<const Element, StateId>(e, s));
+ return s;
+ }
+ }
+ }
+
+ // Computes the outgoing transitions from a state, creating new destination
+ // states as needed.
+ void Expand(StateId s) {
+ Element e = elements_[s];
+ if (e.state != kNoStateId) {
+ for (ArcIterator< Fst<A> > ait(*fst_, e.state);
+ !ait.Done();
+ ait.Next()) {
+ const A &arc = ait.Value();
+ Weight w = Times(e.weight, arc.weight);
+ FactorIterator fit(w);
+ if (!(mode_ & kFactorArcWeights) || fit.Done()) {
+ StateId d = FindState(Element(arc.nextstate, Weight::One()));
+ PushArc(s, Arc(arc.ilabel, arc.olabel, w, d));
+ } else {
+ for (; !fit.Done(); fit.Next()) {
+ const pair<Weight, Weight> &p = fit.Value();
+ StateId d = FindState(Element(arc.nextstate,
+ p.second.Quantize(delta_)));
+ PushArc(s, Arc(arc.ilabel, arc.olabel, p.first, d));
+ }
+ }
+ }
+ }
+
+ if ((mode_ & kFactorFinalWeights) &&
+ ((e.state == kNoStateId) ||
+ (fst_->Final(e.state) != Weight::Zero()))) {
+ Weight w = e.state == kNoStateId
+ ? e.weight
+ : Times(e.weight, fst_->Final(e.state));
+ for (FactorIterator fit(w);
+ !fit.Done();
+ fit.Next()) {
+ const pair<Weight, Weight> &p = fit.Value();
+ StateId d = FindState(Element(kNoStateId,
+ p.second.Quantize(delta_)));
+ PushArc(s, Arc(final_ilabel_, final_olabel_, p.first, d));
+ }
+ }
+ SetArcs(s);
+ }
+
+ private:
+ static const size_t kPrime = 7853;
+
+ // Equality function for Elements, assume weights have been quantized.
+ class ElementEqual {
+ public:
+ bool operator()(const Element &x, const Element &y) const {
+ return x.state == y.state && x.weight == y.weight;
+ }
+ };
+
+ // Hash function for Elements to Fst states.
+ class ElementKey {
+ public:
+ size_t operator()(const Element &x) const {
+ return static_cast<size_t>(x.state * kPrime + x.weight.Hash());
+ }
+ private:
+ };
+
+ typedef unordered_map<Element, StateId, ElementKey, ElementEqual> ElementMap;
+
+ const Fst<A> *fst_;
+ float delta_;
+ uint32 mode_; // factoring arc and/or final weights
+ Label final_ilabel_; // ilabel of arc created when factoring final w's
+ Label final_olabel_; // olabel of arc created when factoring final w's
+ vector<Element> elements_; // mapping Fst state to Elements
+ ElementMap element_map_; // mapping Elements to Fst state
+ // mapping between old/new 'StateId' for states that do not need to
+ // be factored when 'mode_' is '0' or 'kFactorFinalWeights'
+ vector<StateId> unfactored_;
+
+ void operator=(const FactorWeightFstImpl<A, F> &); // disallow
+};
+
+template <class A, class F> const size_t FactorWeightFstImpl<A, F>::kPrime;
+
+
+// FactorWeightFst takes as template parameter a FactorIterator as
+// defined above. The result of weight factoring is a transducer
+// equivalent to the input whose path weights have been factored
+// according to the FactorIterator. States and transitions will be
+// added as necessary. The algorithm is a generalization to arbitrary
+// weights of the second step of the input epsilon-normalization
+// algorithm due to Mohri, "Generic epsilon-removal and input
+// epsilon-normalization algorithms for weighted transducers",
+// International Journal of Computer Science 13(1): 129-143 (2002).
+//
+// This class attaches interface to implementation and handles
+// reference counting, delegating most methods to ImplToFst.
+template <class A, class F>
+class FactorWeightFst : public ImplToFst< FactorWeightFstImpl<A, F> > {
+ public:
+ friend class ArcIterator< FactorWeightFst<A, F> >;
+ friend class StateIterator< FactorWeightFst<A, F> >;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+ typedef FactorWeightFstImpl<A, F> Impl;
+
+ FactorWeightFst(const Fst<A> &fst)
+ : ImplToFst<Impl>(new Impl(fst, FactorWeightOptions<A>())) {}
+
+ FactorWeightFst(const Fst<A> &fst, const FactorWeightOptions<A> &opts)
+ : ImplToFst<Impl>(new Impl(fst, opts)) {}
+
+ // See Fst<>::Copy() for doc.
+ FactorWeightFst(const FactorWeightFst<A, F> &fst, bool copy)
+ : ImplToFst<Impl>(fst, copy) {}
+
+ // Get a copy of this FactorWeightFst. See Fst<>::Copy() for further doc.
+ virtual FactorWeightFst<A, F> *Copy(bool copy = false) const {
+ return new FactorWeightFst<A, F>(*this, copy);
+ }
+
+ virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const FactorWeightFst<A, F> &fst); // Disallow
+};
+
+
+// Specialization for FactorWeightFst.
+template<class A, class F>
+class StateIterator< FactorWeightFst<A, F> >
+ : public CacheStateIterator< FactorWeightFst<A, F> > {
+ public:
+ explicit StateIterator(const FactorWeightFst<A, F> &fst)
+ : CacheStateIterator< FactorWeightFst<A, F> >(fst, fst.GetImpl()) {}
+};
+
+
+// Specialization for FactorWeightFst.
+template <class A, class F>
+class ArcIterator< FactorWeightFst<A, F> >
+ : public CacheArcIterator< FactorWeightFst<A, F> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const FactorWeightFst<A, F> &fst, StateId s)
+ : CacheArcIterator< FactorWeightFst<A, F> >(fst.GetImpl(), s) {
+ if (!fst.GetImpl()->HasArcs(s))
+ fst.GetImpl()->Expand(s);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+template <class A, class F> inline
+void FactorWeightFst<A, F>::InitStateIterator(StateIteratorData<A> *data) const
+{
+ data->base = new StateIterator< FactorWeightFst<A, F> >(*this);
+}
+
+
+} // namespace fst
+
+#endif // FST_LIB_FACTOR_WEIGHT_H__
diff --git a/src/include/fst/flags.h b/src/include/fst/flags.h
new file mode 100644
index 0000000..ec3d301
--- /dev/null
+++ b/src/include/fst/flags.h
@@ -0,0 +1,224 @@
+// flags.h
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Google-style flag handling declarations and inline definitions.
+
+#ifndef FST_LIB_FLAGS_H__
+#define FST_LIB_FLAGS_H__
+
+#include <iostream>
+#include <map>
+#include <string>
+
+#include <fst/types.h>
+#include <fst/lock.h>
+
+using std::string;
+
+//
+// FLAGS USAGE:
+//
+// Definition example:
+//
+// DEFINE_int32(length, 0, "length");
+//
+// This defines variable FLAGS_length, initialized to 0.
+//
+// Declaration example:
+//
+// DECLARE_int32(length);
+//
+// SetFlags() can be used to set flags from the command line
+// using, for example, '--length=2'.
+//
+// ShowUsage() can be used to print out command and flag usage.
+//
+
+#define DECLARE_bool(name) extern bool FLAGS_ ## name
+#define DECLARE_string(name) extern string FLAGS_ ## name
+#define DECLARE_int32(name) extern int32 FLAGS_ ## name
+#define DECLARE_int64(name) extern int64 FLAGS_ ## name
+#define DECLARE_double(name) extern double FLAGS_ ## name
+
+template <typename T>
+struct FlagDescription {
+ FlagDescription(T *addr, const char *doc, const char *type, const T val)
+ : address(addr), doc_string(doc), type_name(type), default_value(val) {}
+
+ T *address;
+ const char *doc_string;
+ const char *type_name;
+ const T default_value;
+};
+
+template <typename T>
+class FlagRegister {
+ public:
+ static FlagRegister<T> *GetRegister() {
+ fst::FstOnceInit(&register_init_, &FlagRegister<T>::Init);
+ return register_;
+ }
+
+ const FlagDescription<T> &GetFlagDescription(const string &name) const {
+ fst::MutexLock l(register_lock_);
+ typename std::map< string, FlagDescription<T> >::const_iterator it =
+ flag_table_.find(name);
+ return it != flag_table_.end() ? it->second : 0;
+ }
+ void SetDescription(const string &name,
+ const FlagDescription<T> &desc) {
+ fst::MutexLock l(register_lock_);
+ flag_table_.insert(make_pair(name, desc));
+ }
+
+ bool SetFlag(const string &val, bool *address) const {
+ if (val == "true" || val == "1" || val.empty()) {
+ *address = true;
+ return true;
+ } else if (val == "false" || val == "0") {
+ *address = false;
+ return true;
+ }
+ else {
+ return false;
+ }
+ }
+ bool SetFlag(const string &val, string *address) const {
+ *address = val;
+ return true;
+ }
+ bool SetFlag(const string &val, int32 *address) const {
+ char *p = 0;
+ *address = strtol(val.c_str(), &p, 0);
+ return !val.empty() && *p == '\0';
+ }
+ bool SetFlag(const string &val, int64 *address) const {
+ char *p = 0;
+ *address = strtoll(val.c_str(), &p, 0);
+ return !val.empty() && *p == '\0';
+ }
+ bool SetFlag(const string &val, double *address) const {
+ char *p = 0;
+ *address = strtod(val.c_str(), &p);
+ return !val.empty() && *p == '\0';
+ }
+
+ bool SetFlag(const string &arg, const string &val) const {
+ for (typename std::map< string,
+ FlagDescription<T> >::const_iterator it =
+ flag_table_.begin();
+ it != flag_table_.end();
+ ++it) {
+ const string &name = it->first;
+ const FlagDescription<T> &desc = it->second;
+ if (arg == name)
+ return SetFlag(val, desc.address);
+ }
+ return false;
+ }
+
+ void ShowDefault(bool default_value) const {
+ std::cout << ", default = ";
+ std::cout << (default_value ? "true" : "false");
+ }
+ void ShowDefault(const string &default_value) const {
+ std::cout << ", default = ";
+ std::cout << "\"" << default_value << "\"";
+ }
+ template<typename V> void ShowDefault(const V& default_value) const {
+ std::cout << ", default = ";
+ std::cout << default_value;
+ }
+ void ShowUsage() const {
+ for (typename std::map< string,
+ FlagDescription<T> >::const_iterator it =
+ flag_table_.begin();
+ it != flag_table_.end();
+ ++it) {
+ const string &name = it->first;
+ const FlagDescription<T> &desc = it->second;
+ std::cout << " --" << name
+ << ": type = " << desc.type_name;
+ ShowDefault(desc.default_value);
+ std::cout << "\n " << desc.doc_string << "\n";
+ }
+ }
+
+ private:
+ static void Init() {
+ register_lock_ = new fst::Mutex;
+ register_ = new FlagRegister<T>;
+ }
+ static fst::FstOnceType register_init_; // ensures only called once
+ static fst::Mutex* register_lock_; // multithreading lock
+ static FlagRegister<T> *register_;
+
+ std::map< string, FlagDescription<T> > flag_table_;
+};
+
+template <class T>
+fst::FstOnceType FlagRegister<T>::register_init_ = fst::FST_ONCE_INIT;
+
+template <class T>
+fst::Mutex *FlagRegister<T>::register_lock_ = 0;
+
+template <class T>
+FlagRegister<T> *FlagRegister<T>::register_ = 0;
+
+
+template <typename T>
+class FlagRegisterer {
+ public:
+ FlagRegisterer(const string &name, const FlagDescription<T> &desc) {
+ FlagRegister<T> *registr = FlagRegister<T>::GetRegister();
+ registr->SetDescription(name, desc);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(FlagRegisterer);
+};
+
+
+#define DEFINE_VAR(type, name, value, doc) \
+ type FLAGS_ ## name = value; \
+ static FlagRegisterer<type> \
+ name ## _flags_registerer(#name, FlagDescription<type>(&FLAGS_ ## name, \
+ doc, \
+ #type, \
+ value))
+
+#define DEFINE_bool(name, value, doc) DEFINE_VAR(bool, name, value, doc)
+#define DEFINE_string(name, value, doc) \
+ DEFINE_VAR(string, name, value, doc)
+#define DEFINE_int32(name, value, doc) DEFINE_VAR(int32, name, value, doc)
+#define DEFINE_int64(name, value, doc) DEFINE_VAR(int64, name, value, doc)
+#define DEFINE_double(name, value, doc) DEFINE_VAR(double, name, value, doc)
+
+
+// Temporary directory
+DECLARE_string(tmpdir);
+
+void SetFlags(const char *usage, int *argc, char ***argv, bool remove_flags);
+
+// Deprecated - for backward compatibility
+inline void InitFst(const char *usage, int *argc, char ***argv, bool rmflags) {
+ return SetFlags(usage, argc, argv, rmflags);
+}
+
+void ShowUsage();
+
+#endif // FST_LIB_FLAGS_H__
diff --git a/src/include/fst/float-weight.h b/src/include/fst/float-weight.h
new file mode 100644
index 0000000..530cbdd
--- /dev/null
+++ b/src/include/fst/float-weight.h
@@ -0,0 +1,598 @@
+// float-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Float weight set and associated semiring operation definitions.
+//
+
+#ifndef FST_LIB_FLOAT_WEIGHT_H__
+#define FST_LIB_FLOAT_WEIGHT_H__
+
+#include <limits>
+#include <climits>
+#include <sstream>
+#include <string>
+
+#include <fst/util.h>
+#include <fst/weight.h>
+
+
+namespace fst {
+
+// numeric limits class
+template <class T>
+class FloatLimits {
+ public:
+ static const T kPosInfinity;
+ static const T kNegInfinity;
+ static const T kNumberBad;
+};
+
+template <class T>
+const T FloatLimits<T>::kPosInfinity = numeric_limits<T>::infinity();
+
+template <class T>
+const T FloatLimits<T>::kNegInfinity = -FloatLimits<T>::kPosInfinity;
+
+template <class T>
+const T FloatLimits<T>::kNumberBad = numeric_limits<T>::quiet_NaN();
+
+// weight class to be templated on floating-points types
+template <class T = float>
+class FloatWeightTpl {
+ public:
+ FloatWeightTpl() {}
+
+ FloatWeightTpl(T f) : value_(f) {}
+
+ FloatWeightTpl(const FloatWeightTpl<T> &w) : value_(w.value_) {}
+
+ FloatWeightTpl<T> &operator=(const FloatWeightTpl<T> &w) {
+ value_ = w.value_;
+ return *this;
+ }
+
+ istream &Read(istream &strm) {
+ return ReadType(strm, &value_);
+ }
+
+ ostream &Write(ostream &strm) const {
+ return WriteType(strm, value_);
+ }
+
+ size_t Hash() const {
+ union {
+ T f;
+ size_t s;
+ } u;
+ u.s = 0;
+ u.f = value_;
+ return u.s;
+ }
+
+ const T &Value() const { return value_; }
+
+ protected:
+ void SetValue(const T &f) { value_ = f; }
+
+ inline static string GetPrecisionString() {
+ int64 size = sizeof(T);
+ if (size == sizeof(float)) return "";
+ size *= CHAR_BIT;
+
+ string result;
+ Int64ToStr(size, &result);
+ return result;
+ }
+
+ private:
+ T value_;
+};
+
+// Single-precision float weight
+typedef FloatWeightTpl<float> FloatWeight;
+
+template <class T>
+inline bool operator==(const FloatWeightTpl<T> &w1,
+ const FloatWeightTpl<T> &w2) {
+ // Volatile qualifier thwarts over-aggressive compiler optimizations
+ // that lead to problems esp. with NaturalLess().
+ volatile T v1 = w1.Value();
+ volatile T v2 = w2.Value();
+ return v1 == v2;
+}
+
+inline bool operator==(const FloatWeightTpl<double> &w1,
+ const FloatWeightTpl<double> &w2) {
+ return operator==<double>(w1, w2);
+}
+
+inline bool operator==(const FloatWeightTpl<float> &w1,
+ const FloatWeightTpl<float> &w2) {
+ return operator==<float>(w1, w2);
+}
+
+template <class T>
+inline bool operator!=(const FloatWeightTpl<T> &w1,
+ const FloatWeightTpl<T> &w2) {
+ return !(w1 == w2);
+}
+
+inline bool operator!=(const FloatWeightTpl<double> &w1,
+ const FloatWeightTpl<double> &w2) {
+ return operator!=<double>(w1, w2);
+}
+
+inline bool operator!=(const FloatWeightTpl<float> &w1,
+ const FloatWeightTpl<float> &w2) {
+ return operator!=<float>(w1, w2);
+}
+
+template <class T>
+inline bool ApproxEqual(const FloatWeightTpl<T> &w1,
+ const FloatWeightTpl<T> &w2,
+ float delta = kDelta) {
+ return w1.Value() <= w2.Value() + delta && w2.Value() <= w1.Value() + delta;
+}
+
+template <class T>
+inline ostream &operator<<(ostream &strm, const FloatWeightTpl<T> &w) {
+ if (w.Value() == FloatLimits<T>::kPosInfinity)
+ return strm << "Infinity";
+ else if (w.Value() == FloatLimits<T>::kNegInfinity)
+ return strm << "-Infinity";
+ else if (w.Value() != w.Value()) // Fails for NaN
+ return strm << "BadNumber";
+ else
+ return strm << w.Value();
+}
+
+template <class T>
+inline istream &operator>>(istream &strm, FloatWeightTpl<T> &w) {
+ string s;
+ strm >> s;
+ if (s == "Infinity") {
+ w = FloatWeightTpl<T>(FloatLimits<T>::kPosInfinity);
+ } else if (s == "-Infinity") {
+ w = FloatWeightTpl<T>(FloatLimits<T>::kNegInfinity);
+ } else {
+ char *p;
+ T f = strtod(s.c_str(), &p);
+ if (p < s.c_str() + s.size())
+ strm.clear(std::ios::badbit);
+ else
+ w = FloatWeightTpl<T>(f);
+ }
+ return strm;
+}
+
+
+// Tropical semiring: (min, +, inf, 0)
+template <class T>
+class TropicalWeightTpl : public FloatWeightTpl<T> {
+ public:
+ using FloatWeightTpl<T>::Value;
+
+ typedef TropicalWeightTpl<T> ReverseWeight;
+
+ TropicalWeightTpl() : FloatWeightTpl<T>() {}
+
+ TropicalWeightTpl(T f) : FloatWeightTpl<T>(f) {}
+
+ TropicalWeightTpl(const TropicalWeightTpl<T> &w) : FloatWeightTpl<T>(w) {}
+
+ static const TropicalWeightTpl<T> Zero() {
+ return TropicalWeightTpl<T>(FloatLimits<T>::kPosInfinity); }
+
+ static const TropicalWeightTpl<T> One() {
+ return TropicalWeightTpl<T>(0.0F); }
+
+ static const TropicalWeightTpl<T> NoWeight() {
+ return TropicalWeightTpl<T>(FloatLimits<T>::kNumberBad); }
+
+ static const string &Type() {
+ static const string type = "tropical" +
+ FloatWeightTpl<T>::GetPrecisionString();
+ return type;
+ }
+
+ bool Member() const {
+ // First part fails for IEEE NaN
+ return Value() == Value() && Value() != FloatLimits<T>::kNegInfinity;
+ }
+
+ TropicalWeightTpl<T> Quantize(float delta = kDelta) const {
+ if (Value() == FloatLimits<T>::kNegInfinity ||
+ Value() == FloatLimits<T>::kPosInfinity ||
+ Value() != Value())
+ return *this;
+ else
+ return TropicalWeightTpl<T>(floor(Value()/delta + 0.5F) * delta);
+ }
+
+ TropicalWeightTpl<T> Reverse() const { return *this; }
+
+ static uint64 Properties() {
+ return kLeftSemiring | kRightSemiring | kCommutative |
+ kPath | kIdempotent;
+ }
+};
+
+// Single precision tropical weight
+typedef TropicalWeightTpl<float> TropicalWeight;
+
+template <class T>
+inline TropicalWeightTpl<T> Plus(const TropicalWeightTpl<T> &w1,
+ const TropicalWeightTpl<T> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return TropicalWeightTpl<T>::NoWeight();
+ return w1.Value() < w2.Value() ? w1 : w2;
+}
+
+inline TropicalWeightTpl<float> Plus(const TropicalWeightTpl<float> &w1,
+ const TropicalWeightTpl<float> &w2) {
+ return Plus<float>(w1, w2);
+}
+
+inline TropicalWeightTpl<double> Plus(const TropicalWeightTpl<double> &w1,
+ const TropicalWeightTpl<double> &w2) {
+ return Plus<double>(w1, w2);
+}
+
+template <class T>
+inline TropicalWeightTpl<T> Times(const TropicalWeightTpl<T> &w1,
+ const TropicalWeightTpl<T> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return TropicalWeightTpl<T>::NoWeight();
+ T f1 = w1.Value(), f2 = w2.Value();
+ if (f1 == FloatLimits<T>::kPosInfinity)
+ return w1;
+ else if (f2 == FloatLimits<T>::kPosInfinity)
+ return w2;
+ else
+ return TropicalWeightTpl<T>(f1 + f2);
+}
+
+inline TropicalWeightTpl<float> Times(const TropicalWeightTpl<float> &w1,
+ const TropicalWeightTpl<float> &w2) {
+ return Times<float>(w1, w2);
+}
+
+inline TropicalWeightTpl<double> Times(const TropicalWeightTpl<double> &w1,
+ const TropicalWeightTpl<double> &w2) {
+ return Times<double>(w1, w2);
+}
+
+template <class T>
+inline TropicalWeightTpl<T> Divide(const TropicalWeightTpl<T> &w1,
+ const TropicalWeightTpl<T> &w2,
+ DivideType typ = DIVIDE_ANY) {
+ if (!w1.Member() || !w2.Member())
+ return TropicalWeightTpl<T>::NoWeight();
+ T f1 = w1.Value(), f2 = w2.Value();
+ if (f2 == FloatLimits<T>::kPosInfinity)
+ return FloatLimits<T>::kNumberBad;
+ else if (f1 == FloatLimits<T>::kPosInfinity)
+ return FloatLimits<T>::kPosInfinity;
+ else
+ return TropicalWeightTpl<T>(f1 - f2);
+}
+
+inline TropicalWeightTpl<float> Divide(const TropicalWeightTpl<float> &w1,
+ const TropicalWeightTpl<float> &w2,
+ DivideType typ = DIVIDE_ANY) {
+ return Divide<float>(w1, w2, typ);
+}
+
+inline TropicalWeightTpl<double> Divide(const TropicalWeightTpl<double> &w1,
+ const TropicalWeightTpl<double> &w2,
+ DivideType typ = DIVIDE_ANY) {
+ return Divide<double>(w1, w2, typ);
+}
+
+
+// Log semiring: (log(e^-x + e^y), +, inf, 0)
+template <class T>
+class LogWeightTpl : public FloatWeightTpl<T> {
+ public:
+ using FloatWeightTpl<T>::Value;
+
+ typedef LogWeightTpl ReverseWeight;
+
+ LogWeightTpl() : FloatWeightTpl<T>() {}
+
+ LogWeightTpl(T f) : FloatWeightTpl<T>(f) {}
+
+ LogWeightTpl(const LogWeightTpl<T> &w) : FloatWeightTpl<T>(w) {}
+
+ static const LogWeightTpl<T> Zero() {
+ return LogWeightTpl<T>(FloatLimits<T>::kPosInfinity);
+ }
+
+ static const LogWeightTpl<T> One() {
+ return LogWeightTpl<T>(0.0F);
+ }
+
+ static const LogWeightTpl<T> NoWeight() {
+ return LogWeightTpl<T>(FloatLimits<T>::kNumberBad); }
+
+ static const string &Type() {
+ static const string type = "log" + FloatWeightTpl<T>::GetPrecisionString();
+ return type;
+ }
+
+ bool Member() const {
+ // First part fails for IEEE NaN
+ return Value() == Value() && Value() != FloatLimits<T>::kNegInfinity;
+ }
+
+ LogWeightTpl<T> Quantize(float delta = kDelta) const {
+ if (Value() == FloatLimits<T>::kNegInfinity ||
+ Value() == FloatLimits<T>::kPosInfinity ||
+ Value() != Value())
+ return *this;
+ else
+ return LogWeightTpl<T>(floor(Value()/delta + 0.5F) * delta);
+ }
+
+ LogWeightTpl<T> Reverse() const { return *this; }
+
+ static uint64 Properties() {
+ return kLeftSemiring | kRightSemiring | kCommutative;
+ }
+};
+
+// Single-precision log weight
+typedef LogWeightTpl<float> LogWeight;
+// Double-precision log weight
+typedef LogWeightTpl<double> Log64Weight;
+
+template <class T>
+inline T LogExp(T x) { return log(1.0F + exp(-x)); }
+
+template <class T>
+inline LogWeightTpl<T> Plus(const LogWeightTpl<T> &w1,
+ const LogWeightTpl<T> &w2) {
+ T f1 = w1.Value(), f2 = w2.Value();
+ if (f1 == FloatLimits<T>::kPosInfinity)
+ return w2;
+ else if (f2 == FloatLimits<T>::kPosInfinity)
+ return w1;
+ else if (f1 > f2)
+ return LogWeightTpl<T>(f2 - LogExp(f1 - f2));
+ else
+ return LogWeightTpl<T>(f1 - LogExp(f2 - f1));
+}
+
+inline LogWeightTpl<float> Plus(const LogWeightTpl<float> &w1,
+ const LogWeightTpl<float> &w2) {
+ return Plus<float>(w1, w2);
+}
+
+inline LogWeightTpl<double> Plus(const LogWeightTpl<double> &w1,
+ const LogWeightTpl<double> &w2) {
+ return Plus<double>(w1, w2);
+}
+
+template <class T>
+inline LogWeightTpl<T> Times(const LogWeightTpl<T> &w1,
+ const LogWeightTpl<T> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return LogWeightTpl<T>::NoWeight();
+ T f1 = w1.Value(), f2 = w2.Value();
+ if (f1 == FloatLimits<T>::kPosInfinity)
+ return w1;
+ else if (f2 == FloatLimits<T>::kPosInfinity)
+ return w2;
+ else
+ return LogWeightTpl<T>(f1 + f2);
+}
+
+inline LogWeightTpl<float> Times(const LogWeightTpl<float> &w1,
+ const LogWeightTpl<float> &w2) {
+ return Times<float>(w1, w2);
+}
+
+inline LogWeightTpl<double> Times(const LogWeightTpl<double> &w1,
+ const LogWeightTpl<double> &w2) {
+ return Times<double>(w1, w2);
+}
+
+template <class T>
+inline LogWeightTpl<T> Divide(const LogWeightTpl<T> &w1,
+ const LogWeightTpl<T> &w2,
+ DivideType typ = DIVIDE_ANY) {
+ if (!w1.Member() || !w2.Member())
+ return LogWeightTpl<T>::NoWeight();
+ T f1 = w1.Value(), f2 = w2.Value();
+ if (f2 == FloatLimits<T>::kPosInfinity)
+ return FloatLimits<T>::kNumberBad;
+ else if (f1 == FloatLimits<T>::kPosInfinity)
+ return FloatLimits<T>::kPosInfinity;
+ else
+ return LogWeightTpl<T>(f1 - f2);
+}
+
+inline LogWeightTpl<float> Divide(const LogWeightTpl<float> &w1,
+ const LogWeightTpl<float> &w2,
+ DivideType typ = DIVIDE_ANY) {
+ return Divide<float>(w1, w2, typ);
+}
+
+inline LogWeightTpl<double> Divide(const LogWeightTpl<double> &w1,
+ const LogWeightTpl<double> &w2,
+ DivideType typ = DIVIDE_ANY) {
+ return Divide<double>(w1, w2, typ);
+}
+
+// MinMax semiring: (min, max, inf, -inf)
+template <class T>
+class MinMaxWeightTpl : public FloatWeightTpl<T> {
+ public:
+ using FloatWeightTpl<T>::Value;
+
+ typedef MinMaxWeightTpl<T> ReverseWeight;
+
+ MinMaxWeightTpl() : FloatWeightTpl<T>() {}
+
+ MinMaxWeightTpl(T f) : FloatWeightTpl<T>(f) {}
+
+ MinMaxWeightTpl(const MinMaxWeightTpl<T> &w) : FloatWeightTpl<T>(w) {}
+
+ static const MinMaxWeightTpl<T> Zero() {
+ return MinMaxWeightTpl<T>(FloatLimits<T>::kPosInfinity);
+ }
+
+ static const MinMaxWeightTpl<T> One() {
+ return MinMaxWeightTpl<T>(FloatLimits<T>::kNegInfinity);
+ }
+
+ static const MinMaxWeightTpl<T> NoWeight() {
+ return MinMaxWeightTpl<T>(FloatLimits<T>::kNumberBad); }
+
+ static const string &Type() {
+ static const string type = "minmax" +
+ FloatWeightTpl<T>::GetPrecisionString();
+ return type;
+ }
+
+ bool Member() const {
+ // Fails for IEEE NaN
+ return Value() == Value();
+ }
+
+ MinMaxWeightTpl<T> Quantize(float delta = kDelta) const {
+ // If one of infinities, or a NaN
+ if (Value() == FloatLimits<T>::kNegInfinity ||
+ Value() == FloatLimits<T>::kPosInfinity ||
+ Value() != Value())
+ return *this;
+ else
+ return MinMaxWeightTpl<T>(floor(Value()/delta + 0.5F) * delta);
+ }
+
+ MinMaxWeightTpl<T> Reverse() const { return *this; }
+
+ static uint64 Properties() {
+ return kLeftSemiring | kRightSemiring | kCommutative | kIdempotent | kPath;
+ }
+};
+
+// Single-precision min-max weight
+typedef MinMaxWeightTpl<float> MinMaxWeight;
+
+// Min
+template <class T>
+inline MinMaxWeightTpl<T> Plus(
+ const MinMaxWeightTpl<T> &w1, const MinMaxWeightTpl<T> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return MinMaxWeightTpl<T>::NoWeight();
+ return w1.Value() < w2.Value() ? w1 : w2;
+}
+
+inline MinMaxWeightTpl<float> Plus(
+ const MinMaxWeightTpl<float> &w1, const MinMaxWeightTpl<float> &w2) {
+ return Plus<float>(w1, w2);
+}
+
+inline MinMaxWeightTpl<double> Plus(
+ const MinMaxWeightTpl<double> &w1, const MinMaxWeightTpl<double> &w2) {
+ return Plus<double>(w1, w2);
+}
+
+// Max
+template <class T>
+inline MinMaxWeightTpl<T> Times(
+ const MinMaxWeightTpl<T> &w1, const MinMaxWeightTpl<T> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return MinMaxWeightTpl<T>::NoWeight();
+ return w1.Value() >= w2.Value() ? w1 : w2;
+}
+
+inline MinMaxWeightTpl<float> Times(
+ const MinMaxWeightTpl<float> &w1, const MinMaxWeightTpl<float> &w2) {
+ return Times<float>(w1, w2);
+}
+
+inline MinMaxWeightTpl<double> Times(
+ const MinMaxWeightTpl<double> &w1, const MinMaxWeightTpl<double> &w2) {
+ return Times<double>(w1, w2);
+}
+
+// Defined only for special cases
+template <class T>
+inline MinMaxWeightTpl<T> Divide(const MinMaxWeightTpl<T> &w1,
+ const MinMaxWeightTpl<T> &w2,
+ DivideType typ = DIVIDE_ANY) {
+ if (!w1.Member() || !w2.Member())
+ return MinMaxWeightTpl<T>::NoWeight();
+ // min(w1, x) = w2, w1 >= w2 => min(w1, x) = w2, x = w2
+ return w1.Value() >= w2.Value() ? w1 : FloatLimits<T>::kNumberBad;
+}
+
+inline MinMaxWeightTpl<float> Divide(const MinMaxWeightTpl<float> &w1,
+ const MinMaxWeightTpl<float> &w2,
+ DivideType typ = DIVIDE_ANY) {
+ return Divide<float>(w1, w2, typ);
+}
+
+inline MinMaxWeightTpl<double> Divide(const MinMaxWeightTpl<double> &w1,
+ const MinMaxWeightTpl<double> &w2,
+ DivideType typ = DIVIDE_ANY) {
+ return Divide<double>(w1, w2, typ);
+}
+
+//
+// WEIGHT CONVERTER SPECIALIZATIONS.
+//
+
+// Convert to tropical
+template <>
+struct WeightConvert<LogWeight, TropicalWeight> {
+ TropicalWeight operator()(LogWeight w) const { return w.Value(); }
+};
+
+template <>
+struct WeightConvert<Log64Weight, TropicalWeight> {
+ TropicalWeight operator()(Log64Weight w) const { return w.Value(); }
+};
+
+// Convert to log
+template <>
+struct WeightConvert<TropicalWeight, LogWeight> {
+ LogWeight operator()(TropicalWeight w) const { return w.Value(); }
+};
+
+template <>
+struct WeightConvert<Log64Weight, LogWeight> {
+ LogWeight operator()(Log64Weight w) const { return w.Value(); }
+};
+
+// Convert to log64
+template <>
+struct WeightConvert<TropicalWeight, Log64Weight> {
+ Log64Weight operator()(TropicalWeight w) const { return w.Value(); }
+};
+
+template <>
+struct WeightConvert<LogWeight, Log64Weight> {
+ Log64Weight operator()(LogWeight w) const { return w.Value(); }
+};
+
+} // namespace fst
+
+#endif // FST_LIB_FLOAT_WEIGHT_H__
diff --git a/src/include/fst/fst-decl.h b/src/include/fst/fst-decl.h
new file mode 100644
index 0000000..0e2cdf1
--- /dev/null
+++ b/src/include/fst/fst-decl.h
@@ -0,0 +1,125 @@
+// fst-decl.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// This file contains declarations of classes in the Fst template library.
+//
+
+#ifndef FST_LIB_FST_DECL_H__
+#define FST_LIB_FST_DECL_H__
+
+#include <fst/types.h>
+
+namespace fst {
+
+class SymbolTable;
+class SymbolTableIterator;
+
+template <class W> class FloatWeightTpl;
+template <class W> class TropicalWeightTpl;
+template <class W> class LogWeightTpl;
+template <class W> class MinMaxWeightTpl;
+
+typedef FloatWeightTpl<float> FloatWeight;
+typedef TropicalWeightTpl<float> TropicalWeight;
+typedef LogWeightTpl<float> LogWeight;
+typedef MinMaxWeightTpl<float> MinMaxWeight;
+
+template <class W> class ArcTpl;
+typedef ArcTpl<TropicalWeight> StdArc;
+typedef ArcTpl<LogWeight> LogArc;
+
+template <class A, class C, class U = uint32> class CompactFst;
+template <class A, class U = uint32> class ConstFst;
+template <class A, class W, class M> class EditFst;
+template <class A> class ExpandedFst;
+template <class A> class Fst;
+template <class A> class MutableFst;
+template <class A> class VectorFst;
+
+template <class A, class C> class ArcSortFst;
+template <class A> class ClosureFst;
+template <class A> class ComposeFst;
+template <class A> class ConcatFst;
+template <class A> class DeterminizeFst;
+template <class A> class DeterminizeFst;
+template <class A> class DifferenceFst;
+template <class A> class IntersectFst;
+template <class A> class InvertFst;
+template <class A, class B, class C> class ArcMapFst;
+template <class A> class ProjectFst;
+template <class A, class B, class S> class RandGenFst;
+template <class A> class RelabelFst;
+template <class A, class T> class ReplaceFst;
+template <class A> class RmEpsilonFst;
+template <class A> class UnionFst;
+
+template <class T, class Compare, bool max> class Heap;
+
+template <class A> class AcceptorCompactor;
+template <class A> class StringCompactor;
+template <class A> class UnweightedAcceptorCompactor;
+template <class A> class UnweightedCompactor;
+template <class A> class WeightedStringCompactor;
+
+template <class A, class P> class DefaultReplaceStateTable;
+
+typedef CompactFst<StdArc, AcceptorCompactor<StdArc> >
+StdCompactAcceptorFst;
+typedef CompactFst< StdArc, StringCompactor<StdArc> >
+StdCompactStringFst;
+typedef CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> >
+StdCompactUnweightedAcceptorFst;
+typedef CompactFst<StdArc, UnweightedCompactor<StdArc> >
+StdCompactUnweightedFst;
+typedef CompactFst< StdArc, WeightedStringCompactor<StdArc> >
+StdCompactWeightedStringFst;
+typedef ConstFst<StdArc> StdConstFst;
+typedef ExpandedFst<StdArc> StdExpandedFst;
+typedef Fst<StdArc> StdFst;
+typedef MutableFst<StdArc> StdMutableFst;
+typedef VectorFst<StdArc> StdVectorFst;
+
+
+template <class C> class StdArcSortFst;
+typedef ClosureFst<StdArc> StdClosureFst;
+typedef ComposeFst<StdArc> StdComposeFst;
+typedef ConcatFst<StdArc> StdConcatFst;
+typedef DeterminizeFst<StdArc> StdDeterminizeFst;
+typedef DifferenceFst<StdArc> StdDifferenceFst;
+typedef IntersectFst<StdArc> StdIntersectFst;
+typedef InvertFst<StdArc> StdInvertFst;
+typedef ProjectFst<StdArc> StdProjectFst;
+typedef RelabelFst<StdArc> StdRelabelFst;
+typedef ReplaceFst<StdArc, DefaultReplaceStateTable<StdArc, ssize_t> >
+StdReplaceFst;
+typedef RmEpsilonFst<StdArc> StdRmEpsilonFst;
+typedef UnionFst<StdArc> StdUnionFst;
+
+template <typename T> class IntegerFilterState;
+typedef IntegerFilterState<signed char> CharFilterState;
+typedef IntegerFilterState<short> ShortFilterState;
+typedef IntegerFilterState<int> IntFilterState;
+
+template <class F> class Matcher;
+template <class M1, class M2 = M1> class SequenceComposeFilter;
+template <class M1, class M2 = M1> class AltSequenceComposeFilter;
+template <class M1, class M2 = M1> class MatchComposeFilter;
+
+} // namespace fst
+
+#endif // FST_LIB_FST_DECL_H__
diff --git a/src/include/fst/fst.h b/src/include/fst/fst.h
new file mode 100644
index 0000000..9c4d0db
--- /dev/null
+++ b/src/include/fst/fst.h
@@ -0,0 +1,942 @@
+// fst.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Finite-State Transducer (FST) - abstract base class definition,
+// state and arc iterator interface, and suggested base implementation.
+//
+
+#ifndef FST_LIB_FST_H__
+#define FST_LIB_FST_H__
+
+#include <stddef.h>
+#include <sys/types.h>
+#include <cmath>
+#include <string>
+
+#include <fst/compat.h>
+#include <fst/types.h>
+
+#include <fst/arc.h>
+#include <fst/properties.h>
+#include <fst/register.h>
+#include <iostream>
+#include <fstream>
+#include <fst/symbol-table.h>
+#include <fst/util.h>
+
+
+DECLARE_bool(fst_align);
+
+namespace fst {
+
+bool IsFstHeader(istream &, const string &);
+
+class FstHeader;
+template <class A> class StateIteratorData;
+template <class A> class ArcIteratorData;
+template <class A> class MatcherBase;
+
+struct FstReadOptions {
+ string source; // Where you're reading from
+ const FstHeader *header; // Pointer to Fst header. If non-zero, use
+ // this info (don't read a stream header)
+ const SymbolTable* isymbols; // Pointer to input symbols. If non-zero, use
+ // this info (read and skip stream isymbols)
+ const SymbolTable* osymbols; // Pointer to output symbols. If non-zero, use
+ // this info (read and skip stream osymbols)
+
+ explicit FstReadOptions(const string& src = "<unspecfied>",
+ const FstHeader *hdr = 0,
+ const SymbolTable* isym = 0,
+ const SymbolTable* osym = 0)
+ : source(src), header(hdr), isymbols(isym), osymbols(osym) {}
+
+ explicit FstReadOptions(const string& src,
+ const SymbolTable* isym,
+ const SymbolTable* osym = 0)
+ : source(src), header(0), isymbols(isym), osymbols(osym) {}
+};
+
+
+struct FstWriteOptions {
+ string source; // Where you're writing to
+ bool write_header; // Write the header?
+ bool write_isymbols; // Write input symbols?
+ bool write_osymbols; // Write output symbols?
+ bool align; // Write data aligned where appropriate;
+ // this may fail on pipes
+
+ explicit FstWriteOptions(const string& src = "<unspecifed>",
+ bool hdr = true, bool isym = true,
+ bool osym = true, bool alig = FLAGS_fst_align)
+ : source(src), write_header(hdr),
+ write_isymbols(isym), write_osymbols(osym), align(alig) {}
+};
+
+//
+// Fst HEADER CLASS
+//
+// This is the recommended Fst file header representation.
+//
+class FstHeader {
+ public:
+ enum {
+ HAS_ISYMBOLS = 0x1, // Has input symbol table
+ HAS_OSYMBOLS = 0x2, // Has output symbol table
+ IS_ALIGNED = 0x4, // Memory-aligned (where appropriate)
+ } Flags;
+
+ FstHeader() : version_(0), flags_(0), properties_(0), start_(-1),
+ numstates_(0), numarcs_(0) {}
+ const string &FstType() const { return fsttype_; }
+ const string &ArcType() const { return arctype_; }
+ int32 Version() const { return version_; }
+ int32 GetFlags() const { return flags_; }
+ uint64 Properties() const { return properties_; }
+ int64 Start() const { return start_; }
+ int64 NumStates() const { return numstates_; }
+ int64 NumArcs() const { return numarcs_; }
+
+ void SetFstType(const string& type) { fsttype_ = type; }
+ void SetArcType(const string& type) { arctype_ = type; }
+ void SetVersion(int32 version) { version_ = version; }
+ void SetFlags(int32 flags) { flags_ = flags; }
+ void SetProperties(uint64 properties) { properties_ = properties; }
+ void SetStart(int64 start) { start_ = start; }
+ void SetNumStates(int64 numstates) { numstates_ = numstates; }
+ void SetNumArcs(int64 numarcs) { numarcs_ = numarcs; }
+
+ bool Read(istream &strm, const string &source, bool rewind = false);
+ bool Write(ostream &strm, const string &source) const;
+
+ private:
+
+ string fsttype_; // E.g. "vector"
+ string arctype_; // E.g. "standard"
+ int32 version_; // Type version #
+ int32 flags_; // File format bits
+ uint64 properties_; // FST property bits
+ int64 start_; // Start state
+ int64 numstates_; // # of states
+ int64 numarcs_; // # of arcs
+};
+
+
+// Specifies matcher action.
+enum MatchType { MATCH_INPUT, // Match input label.
+ MATCH_OUTPUT, // Match output label.
+ MATCH_BOTH, // Match input or output label.
+ MATCH_NONE, // Match nothing.
+ MATCH_UNKNOWN }; // Match type unknown.
+
+//
+// Fst INTERFACE CLASS DEFINITION
+//
+
+// A generic FST, templated on the arc definition, with
+// common-demoninator methods (use StateIterator and ArcIterator to
+// iterate over its states and arcs).
+template <class A>
+class Fst {
+ public:
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ virtual ~Fst() {}
+
+ virtual StateId Start() const = 0; // Initial state
+
+ virtual Weight Final(StateId) const = 0; // State's final weight
+
+ virtual size_t NumArcs(StateId) const = 0; // State's arc count
+
+ virtual size_t NumInputEpsilons(StateId)
+ const = 0; // State's input epsilon count
+
+ virtual size_t NumOutputEpsilons(StateId)
+ const = 0; // State's output epsilon count
+
+ // If test=false, return stored properties bits for mask (some poss. unknown)
+ // If test=true, return property bits for mask (computing o.w. unknown)
+ virtual uint64 Properties(uint64 mask, bool test)
+ const = 0; // Property bits
+
+ virtual const string& Type() const = 0; // Fst type name
+
+ // Get a copy of this Fst. The copying behaves as follows:
+ //
+ // (1) The copying is constant time if safe = false or if safe = true
+ // and is on an otherwise unaccessed Fst.
+ //
+ // (2) If safe = true, the copy is thread-safe in that the original
+ // and copy can be safely accessed (but not necessarily mutated) by
+ // separate threads. For some Fst types, 'Copy(true)' should only be
+ // called on an Fst that has not otherwise been accessed. Its behavior
+ // is undefined otherwise.
+ //
+ // (3) If a MutableFst is copied and then mutated, then the original is
+ // unmodified and vice versa (often by a copy-on-write on the initial
+ // mutation, which may not be constant time).
+ virtual Fst<A> *Copy(bool safe = false) const = 0;
+
+ // Read an Fst from an input stream; returns NULL on error
+ static Fst<A> *Read(istream &strm, const FstReadOptions &opts) {
+ FstReadOptions ropts(opts);
+ FstHeader hdr;
+ if (ropts.header)
+ hdr = *opts.header;
+ else {
+ if (!hdr.Read(strm, opts.source))
+ return 0;
+ ropts.header = &hdr;
+ }
+ FstRegister<A> *registr = FstRegister<A>::GetRegister();
+ const typename FstRegister<A>::Reader reader =
+ registr->GetReader(hdr.FstType());
+ if (!reader) {
+ LOG(ERROR) << "Fst::Read: Unknown FST type \"" << hdr.FstType()
+ << "\" (arc type = \"" << A::Type()
+ << "\"): " << ropts.source;
+ return 0;
+ }
+ return reader(strm, ropts);
+ };
+
+ // Read an Fst from a file; return NULL on error
+ // Empty filename reads from standard input
+ static Fst<A> *Read(const string &filename) {
+ if (!filename.empty()) {
+ ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
+ if (!strm) {
+ LOG(ERROR) << "Fst::Read: Can't open file: " << filename;
+ return 0;
+ }
+ return Read(strm, FstReadOptions(filename));
+ } else {
+ return Read(std::cin, FstReadOptions("standard input"));
+ }
+ }
+
+ // Write an Fst to an output stream; return false on error
+ virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ LOG(ERROR) << "Fst::Write: No write stream method for " << Type()
+ << " Fst type";
+ return false;
+ }
+
+ // Write an Fst to a file; return false on error
+ // Empty filename writes to standard output
+ virtual bool Write(const string &filename) const {
+ LOG(ERROR) << "Fst::Write: No write filename method for " << Type()
+ << " Fst type";
+ return false;
+ }
+
+ // Return input label symbol table; return NULL if not specified
+ virtual const SymbolTable* InputSymbols() const = 0;
+
+ // Return output label symbol table; return NULL if not specified
+ virtual const SymbolTable* OutputSymbols() const = 0;
+
+ // For generic state iterator construction; not normally called
+ // directly by users.
+ virtual void InitStateIterator(StateIteratorData<A> *) const = 0;
+
+ // For generic arc iterator construction; not normally called
+ // directly by users.
+ virtual void InitArcIterator(StateId s, ArcIteratorData<A> *) const = 0;
+
+ // For generic matcher construction; not normally called
+ // directly by users.
+ virtual MatcherBase<A> *InitMatcher(MatchType match_type) const;
+
+ protected:
+
+ bool WriteFile(const string &filename) const {
+ if (!filename.empty()) {
+ ofstream strm(filename.c_str(), ofstream::out | ofstream::binary);
+ if (!strm) {
+ LOG(ERROR) << "Fst::Write: Can't open file: " << filename;
+ return false;
+ }
+ return Write(strm, FstWriteOptions(filename));
+ } else {
+ return Write(std::cout, FstWriteOptions("standard output"));
+ }
+ }
+};
+
+
+//
+// STATE and ARC ITERATOR DEFINITIONS
+//
+
+// State iterator interface templated on the Arc definition; used
+// for StateIterator specializations returned by the InitStateIterator
+// Fst method.
+template <class A>
+class StateIteratorBase {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+
+ virtual ~StateIteratorBase() {}
+
+ bool Done() const { return Done_(); } // End of iterator?
+ StateId Value() const { return Value_(); } // Current state (when !Done)
+ void Next() { Next_(); } // Advance to next state (when !Done)
+ void Reset() { Reset_(); } // Return to initial condition
+
+ private:
+ // This allows base class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual bool Done_() const = 0;
+ virtual StateId Value_() const = 0;
+ virtual void Next_() = 0;
+ virtual void Reset_() = 0;
+};
+
+
+// StateIterator initialization data
+
+template <class A> struct StateIteratorData {
+ StateIteratorBase<A> *base; // Specialized iterator if non-zero
+ typename A::StateId nstates; // O.w. total # of states
+};
+
+
+// Generic state iterator, templated on the FST definition
+// - a wrapper around pointer to specific one.
+// Here is a typical use: \code
+// for (StateIterator<StdFst> siter(fst);
+// !siter.Done();
+// siter.Next()) {
+// StateId s = siter.Value();
+// ...
+// } \endcode
+template <class F>
+class StateIterator {
+ public:
+ typedef F FST;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+
+ explicit StateIterator(const F &fst) : s_(0) {
+ fst.InitStateIterator(&data_);
+ }
+
+ ~StateIterator() { if (data_.base) delete data_.base; }
+
+ bool Done() const {
+ return data_.base ? data_.base->Done() : s_ >= data_.nstates;
+ }
+
+ StateId Value() const { return data_.base ? data_.base->Value() : s_; }
+
+ void Next() {
+ if (data_.base)
+ data_.base->Next();
+ else
+ ++s_;
+ }
+
+ void Reset() {
+ if (data_.base)
+ data_.base->Reset();
+ else
+ s_ = 0;
+ }
+
+ private:
+ StateIteratorData<Arc> data_;
+ StateId s_;
+
+ DISALLOW_COPY_AND_ASSIGN(StateIterator);
+};
+
+
+// Flags to control the behavior on an arc iterator:
+static const uint32 kArcILabelValue = 0x0001; // Value() gives valid ilabel
+static const uint32 kArcOLabelValue = 0x0002; // " " " olabel
+static const uint32 kArcWeightValue = 0x0004; // " " " weight
+static const uint32 kArcNextStateValue = 0x0008; // " " " nextstate
+static const uint32 kArcNoCache = 0x0010; // No need to cache arcs
+
+static const uint32 kArcValueFlags =
+ kArcILabelValue | kArcOLabelValue |
+ kArcWeightValue | kArcNextStateValue;
+
+static const uint32 kArcFlags = kArcValueFlags | kArcNoCache;
+
+
+// Arc iterator interface, templated on the Arc definition; used
+// for Arc iterator specializations that are returned by the InitArcIterator
+// Fst method.
+template <class A>
+class ArcIteratorBase {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+
+ virtual ~ArcIteratorBase() {}
+
+ bool Done() const { return Done_(); } // End of iterator?
+ const A& Value() const { return Value_(); } // Current arc (when !Done)
+ void Next() { Next_(); } // Advance to next arc (when !Done)
+ size_t Position() const { return Position_(); } // Return current position
+ void Reset() { Reset_(); } // Return to initial condition
+ void Seek(size_t a) { Seek_(a); } // Random arc access by position
+ uint32 Flags() const { return Flags_(); } // Return current behavorial flags
+ void SetFlags(uint32 flags, uint32 mask) { // Set behavorial flags
+ SetFlags_(flags, mask);
+ }
+
+ private:
+ // This allows base class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual bool Done_() const = 0;
+ virtual const A& Value_() const = 0;
+ virtual void Next_() = 0;
+ virtual size_t Position_() const = 0;
+ virtual void Reset_() = 0;
+ virtual void Seek_(size_t a) = 0;
+ virtual uint32 Flags_() const = 0;
+ virtual void SetFlags_(uint32 flags, uint32 mask) = 0;
+};
+
+
+// ArcIterator initialization data
+template <class A> struct ArcIteratorData {
+ ArcIteratorBase<A> *base; // Specialized iterator if non-zero
+ const A *arcs; // O.w. arcs pointer
+ size_t narcs; // ... and arc count
+ int *ref_count; // ... and reference count if non-zero
+};
+
+
+// Generic arc iterator, templated on the FST definition
+// - a wrapper around pointer to specific one.
+// Here is a typical use: \code
+// for (ArcIterator<StdFst> aiter(fst, s));
+// !aiter.Done();
+// aiter.Next()) {
+// StdArc &arc = aiter.Value();
+// ...
+// } \endcode
+template <class F>
+class ArcIterator {
+ public:
+ typedef F FST;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+
+ ArcIterator(const F &fst, StateId s) : i_(0) {
+ fst.InitArcIterator(s, &data_);
+ }
+
+ explicit ArcIterator(const ArcIteratorData<Arc> &data) : data_(data), i_(0) {
+ if (data_.ref_count)
+ ++(*data_.ref_count);
+ }
+
+ ~ArcIterator() {
+ if (data_.base)
+ delete data_.base;
+ else if (data_.ref_count)
+ --(*data_.ref_count);
+ }
+
+ bool Done() const {
+ return data_.base ? data_.base->Done() : i_ >= data_.narcs;
+ }
+
+ const Arc& Value() const {
+ return data_.base ? data_.base->Value() : data_.arcs[i_];
+ }
+
+ void Next() {
+ if (data_.base)
+ data_.base->Next();
+ else
+ ++i_;
+ }
+
+ void Reset() {
+ if (data_.base)
+ data_.base->Reset();
+ else
+ i_ = 0;
+ }
+
+ void Seek(size_t a) {
+ if (data_.base)
+ data_.base->Seek(a);
+ else
+ i_ = a;
+ }
+
+ size_t Position() const {
+ return data_.base ? data_.base->Position() : i_;
+ }
+
+ uint32 Flags() const {
+ if (data_.base)
+ return data_.base->Flags();
+ else
+ return kArcValueFlags;
+ }
+
+ void SetFlags(uint32 flags, uint32 mask) {
+ if (data_.base)
+ data_.base->SetFlags(flags, mask);
+ }
+
+ private:
+ ArcIteratorData<Arc> data_;
+ size_t i_;
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+//
+// MATCHER DEFINITIONS
+//
+
+template <class A>
+MatcherBase<A> *Fst<A>::InitMatcher(MatchType match_type) const {
+ return 0; // Use the default matcher
+}
+
+
+//
+// FST ACCESSORS - Useful functions in high-performance cases.
+//
+
+namespace internal {
+
+// General case - requires non-abstract, 'final' methods. Use for inlining.
+template <class F> inline
+typename F::Arc::Weight Final(const F &fst, typename F::Arc::StateId s) {
+ return fst.F::Final(s);
+}
+
+template <class F> inline
+ssize_t NumArcs(const F &fst, typename F::Arc::StateId s) {
+ return fst.F::NumArcs(s);
+}
+
+template <class F> inline
+ssize_t NumInputEpsilons(const F &fst, typename F::Arc::StateId s) {
+ return fst.F::NumInputEpsilons(s);
+}
+
+template <class F> inline
+ssize_t NumOutputEpsilons(const F &fst, typename F::Arc::StateId s) {
+ return fst.F::NumOutputEpsilons(s);
+}
+
+
+// Fst<A> case - abstract methods.
+template <class A> inline
+typename A::Weight Final(const Fst<A> &fst, typename A::StateId s) {
+ return fst.Final(s);
+}
+
+template <class A> inline
+ssize_t NumArcs(const Fst<A> &fst, typename A::StateId s) {
+ return fst.NumArcs(s);
+}
+
+template <class A> inline
+ssize_t NumInputEpsilons(const Fst<A> &fst, typename A::StateId s) {
+ return fst.NumInputEpsilons(s);
+}
+
+template <class A> inline
+ssize_t NumOutputEpsilons(const Fst<A> &fst, typename A::StateId s) {
+ return fst.NumOutputEpsilons(s);
+}
+
+} // namespace internal
+
+// A useful alias when using StdArc.
+typedef Fst<StdArc> StdFst;
+
+
+//
+// CONSTANT DEFINITIONS
+//
+
+const int kNoStateId = -1; // Not a valid state ID
+const int kNoLabel = -1; // Not a valid label
+
+//
+// Fst IMPLEMENTATION BASE
+//
+// This is the recommended Fst implementation base class. It will
+// handle reference counts, property bits, type information and symbols.
+//
+
+template <class A> class FstImpl {
+ public:
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ FstImpl()
+ : properties_(0), type_("null"), isymbols_(0), osymbols_(0) {}
+
+ FstImpl(const FstImpl<A> &impl)
+ : properties_(impl.properties_), type_(impl.type_),
+ isymbols_(impl.isymbols_ ? impl.isymbols_->Copy() : 0),
+ osymbols_(impl.osymbols_ ? impl.osymbols_->Copy() : 0) {}
+
+ virtual ~FstImpl() {
+ delete isymbols_;
+ delete osymbols_;
+ }
+
+ const string& Type() const { return type_; }
+
+ void SetType(const string &type) { type_ = type; }
+
+ virtual uint64 Properties() const { return properties_; }
+
+ virtual uint64 Properties(uint64 mask) const { return properties_ & mask; }
+
+ void SetProperties(uint64 props) {
+ properties_ &= kError; // kError can't be cleared
+ properties_ |= props;
+ }
+
+ void SetProperties(uint64 props, uint64 mask) {
+ properties_ &= ~mask | kError; // kError can't be cleared
+ properties_ |= props & mask;
+ }
+
+ // Allows (only) setting error bit on const FST impls
+ void SetProperties(uint64 props, uint64 mask) const {
+ if (mask != kError)
+ FSTERROR() << "FstImpl::SetProperties() const: can only set kError";
+ properties_ |= kError;
+ }
+
+ const SymbolTable* InputSymbols() const { return isymbols_; }
+
+ const SymbolTable* OutputSymbols() const { return osymbols_; }
+
+ SymbolTable* InputSymbols() { return isymbols_; }
+
+ SymbolTable* OutputSymbols() { return osymbols_; }
+
+ void SetInputSymbols(const SymbolTable* isyms) {
+ if (isymbols_) delete isymbols_;
+ isymbols_ = isyms ? isyms->Copy() : 0;
+ }
+
+ void SetOutputSymbols(const SymbolTable* osyms) {
+ if (osymbols_) delete osymbols_;
+ osymbols_ = osyms ? osyms->Copy() : 0;
+ }
+
+ int RefCount() const {
+ return ref_count_.count();
+ }
+
+ int IncrRefCount() {
+ return ref_count_.Incr();
+ }
+
+ int DecrRefCount() {
+ return ref_count_.Decr();
+ }
+
+ // Read-in header and symbols from input stream, initialize Fst, and
+ // return the header. If opts.header is non-null, skip read-in and
+ // use the option value. If opts.[io]symbols is non-null, read-in
+ // (if present), but use the option value.
+ bool ReadHeader(istream &strm, const FstReadOptions& opts,
+ int min_version, FstHeader *hdr);
+
+ // Write-out header and symbols from output stream.
+ // If a opts.header is false, skip writing header.
+ // If opts.[io]symbols is false, skip writing those symbols.
+ // This method is needed for Impl's that implement Write methods.
+ void WriteHeader(ostream &strm, const FstWriteOptions& opts,
+ int version, FstHeader *hdr) const {
+ if (opts.write_header) {
+ hdr->SetFstType(type_);
+ hdr->SetArcType(A::Type());
+ hdr->SetVersion(version);
+ hdr->SetProperties(properties_);
+ int32 file_flags = 0;
+ if (isymbols_ && opts.write_isymbols)
+ file_flags |= FstHeader::HAS_ISYMBOLS;
+ if (osymbols_ && opts.write_osymbols)
+ file_flags |= FstHeader::HAS_OSYMBOLS;
+ if (opts.align)
+ file_flags |= FstHeader::IS_ALIGNED;
+ hdr->SetFlags(file_flags);
+ hdr->Write(strm, opts.source);
+ }
+ if (isymbols_ && opts.write_isymbols) isymbols_->Write(strm);
+ if (osymbols_ && opts.write_osymbols) osymbols_->Write(strm);
+ }
+
+ // Write-out header and symbols to output stream.
+ // If a opts.header is false, skip writing header.
+ // If opts.[io]symbols is false, skip writing those symbols.
+ // type is the Fst type being written.
+ // This method is used in the cross-type serialization methods Fst::WriteFst.
+ static void WriteFstHeader(const Fst<A> &fst, ostream &strm,
+ const FstWriteOptions& opts, int version,
+ const string &type, FstHeader *hdr) {
+ if (opts.write_header) {
+ hdr->SetFstType(type);
+ hdr->SetArcType(A::Type());
+ hdr->SetVersion(version);
+ hdr->SetProperties(fst.Properties(kFstProperties, false));
+ int32 file_flags = 0;
+ if (fst.InputSymbols() && opts.write_isymbols)
+ file_flags |= FstHeader::HAS_ISYMBOLS;
+ if (fst.OutputSymbols() && opts.write_osymbols)
+ file_flags |= FstHeader::HAS_OSYMBOLS;
+ if (opts.align)
+ file_flags |= FstHeader::IS_ALIGNED;
+ hdr->SetFlags(file_flags);
+ hdr->Write(strm, opts.source);
+ }
+ if (fst.InputSymbols() && opts.write_isymbols) {
+ fst.InputSymbols()->Write(strm);
+ }
+ if (fst.OutputSymbols() && opts.write_osymbols) {
+ fst.OutputSymbols()->Write(strm);
+ }
+ }
+
+ // In serialization routines where the header cannot be written until after
+ // the machine has been serialized, this routine can be called to seek to
+ // the beginning of the file an rewrite the header with updated fields.
+ // It repositions the file pointer back at the end of the file.
+ // returns true on success, false on failure.
+ static bool UpdateFstHeader(const Fst<A> &fst, ostream &strm,
+ const FstWriteOptions& opts, int version,
+ const string &type, FstHeader *hdr,
+ size_t header_offset) {
+ strm.seekp(header_offset);
+ if (!strm) {
+ LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
+ return false;
+ }
+ WriteFstHeader(fst, strm, opts, version, type, hdr);
+ if (!strm) {
+ LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
+ return false;
+ }
+ strm.seekp(0, ios_base::end);
+ if (!strm) {
+ LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
+ return false;
+ }
+ return true;
+ }
+
+ protected:
+ mutable uint64 properties_; // Property bits
+
+ private:
+ string type_; // Unique name of Fst class
+ SymbolTable *isymbols_; // Ilabel symbol table
+ SymbolTable *osymbols_; // Olabel symbol table
+ RefCounter ref_count_; // Reference count
+
+ void operator=(const FstImpl<A> &impl); // disallow
+};
+
+template <class A> inline
+bool FstImpl<A>::ReadHeader(istream &strm, const FstReadOptions& opts,
+ int min_version, FstHeader *hdr) {
+ if (opts.header)
+ *hdr = *opts.header;
+ else if (!hdr->Read(strm, opts.source))
+ return false;
+
+ if (FLAGS_v >= 2) {
+ LOG(INFO) << "FstImpl::ReadHeader: source: " << opts.source
+ << ", fst_type: " << hdr->FstType()
+ << ", arc_type: " << A::Type()
+ << ", version: " << hdr->Version()
+ << ", flags: " << hdr->GetFlags();
+ }
+
+ if (hdr->FstType() != type_) {
+ LOG(ERROR) << "FstImpl::ReadHeader: Fst not of type \"" << type_
+ << "\": " << opts.source;
+ return false;
+ }
+ if (hdr->ArcType() != A::Type()) {
+ LOG(ERROR) << "FstImpl::ReadHeader: Arc not of type \"" << A::Type()
+ << "\": " << opts.source;
+ return false;
+ }
+ if (hdr->Version() < min_version) {
+ LOG(ERROR) << "FstImpl::ReadHeader: Obsolete " << type_
+ << " Fst version: " << opts.source;
+ return false;
+ }
+ properties_ = hdr->Properties();
+ if (hdr->GetFlags() & FstHeader::HAS_ISYMBOLS)
+ isymbols_ = SymbolTable::Read(strm, opts.source);
+ if (hdr->GetFlags() & FstHeader::HAS_OSYMBOLS)
+ osymbols_ =SymbolTable::Read(strm, opts.source);
+
+ if (opts.isymbols) {
+ delete isymbols_;
+ isymbols_ = opts.isymbols->Copy();
+ }
+ if (opts.osymbols) {
+ delete osymbols_;
+ osymbols_ = opts.osymbols->Copy();
+ }
+ return true;
+}
+
+
+template<class Arc>
+uint64 TestProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known);
+
+
+// This is a helper class template useful for attaching an Fst interface to
+// its implementation, handling reference counting.
+template < class I, class F = Fst<typename I::Arc> >
+class ImplToFst : public F {
+ public:
+ typedef typename I::Arc Arc;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ virtual ~ImplToFst() { if (!impl_->DecrRefCount()) delete impl_; }
+
+ virtual StateId Start() const { return impl_->Start(); }
+
+ virtual Weight Final(StateId s) const { return impl_->Final(s); }
+
+ virtual size_t NumArcs(StateId s) const { return impl_->NumArcs(s); }
+
+ virtual size_t NumInputEpsilons(StateId s) const {
+ return impl_->NumInputEpsilons(s);
+ }
+
+ virtual size_t NumOutputEpsilons(StateId s) const {
+ return impl_->NumOutputEpsilons(s);
+ }
+
+ virtual uint64 Properties(uint64 mask, bool test) const {
+ if (test) {
+ uint64 knownprops, testprops = TestProperties(*this, mask, &knownprops);
+ impl_->SetProperties(testprops, knownprops);
+ return testprops & mask;
+ } else {
+ return impl_->Properties(mask);
+ }
+ }
+
+ virtual const string& Type() const { return impl_->Type(); }
+
+ virtual const SymbolTable* InputSymbols() const {
+ return impl_->InputSymbols();
+ }
+
+ virtual const SymbolTable* OutputSymbols() const {
+ return impl_->OutputSymbols();
+ }
+
+ protected:
+ ImplToFst() : impl_(0) {}
+
+ ImplToFst(I *impl) : impl_(impl) {}
+
+ ImplToFst(const ImplToFst<I, F> &fst) {
+ impl_ = fst.impl_;
+ impl_->IncrRefCount();
+ }
+
+ // This constructor presumes there is a copy constructor for the
+ // implementation.
+ ImplToFst(const ImplToFst<I, F> &fst, bool safe) {
+ if (safe) {
+ impl_ = new I(*(fst.impl_));
+ } else {
+ impl_ = fst.impl_;
+ impl_->IncrRefCount();
+ }
+ }
+
+ I *GetImpl() const { return impl_; }
+
+ // Change Fst implementation pointer. If 'own_impl' is true,
+ // ownership of the input implementation is given to this
+ // object; otherwise, the input implementation's reference count
+ // should be incremented.
+ void SetImpl(I *impl, bool own_impl = true) {
+ if (!own_impl)
+ impl->IncrRefCount();
+ if (impl_ && !impl_->DecrRefCount()) delete impl_;
+ impl_ = impl;
+ }
+
+ private:
+ // Disallow
+ ImplToFst<I, F> &operator=(const ImplToFst<I, F> &fst);
+
+ ImplToFst<I, F> &operator=(const Fst<Arc> &fst) {
+ FSTERROR() << "ImplToFst: Assignment operator disallowed";
+ GetImpl()->SetProperties(kError, kError);
+ return *this;
+ }
+
+ I *impl_;
+};
+
+
+// Converts FSTs by casting their implementations, where this makes
+// sense (which excludes implementations with weight-dependent virtual
+// methods). Must be a friend of the Fst classes involved (currently
+// the concrete Fsts: VectorFst, ConstFst, CompactFst).
+template<class F, class G> void Cast(const F &ifst, G *ofst) {
+ ofst->SetImpl(reinterpret_cast<typename G::Impl *>(ifst.GetImpl()), false);
+}
+
+// Fst Serialization
+template <class A>
+void FstToString(const Fst<A> &fst, string *result) {
+ ostringstream ostrm;
+ fst.Write(ostrm, FstWriteOptions("FstToString"));
+ *result = ostrm.str();
+}
+
+template <class A>
+Fst<A> *StringToFst(const string &s) {
+ istringstream istrm(s);
+ return Fst<A>::Read(istrm, FstReadOptions("StringToFst"));
+}
+
+} // namespace fst
+
+#endif // FST_LIB_FST_H__
diff --git a/src/include/fst/fstlib.h b/src/include/fst/fstlib.h
new file mode 100644
index 0000000..c05c775
--- /dev/null
+++ b/src/include/fst/fstlib.h
@@ -0,0 +1,151 @@
+// fstlib.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \page FstLib FST - Weighted Finite State Transducers
+// This is a library for constructing, combining, optimizing, and
+// searching "weighted finite-state transducers" (FSTs). Weighted
+// finite-state transducers are automata where each transition has an
+// input label, an output label, and a weight. The more familiar
+// finite-state acceptor is represented as a transducer with each
+// transition's input and output the same. Finite-state acceptors
+// are used to represent sets of strings (specifically, "regular" or
+// "rational sets"); finite-state transducers are used to represent
+// binary relations between pairs of strings (specifically, "rational
+// transductions"). The weights can be used to represent the cost of
+// taking a particular transition.
+//
+// In this library, the transducers are templated on the Arc
+// (transition) definition, which allows changing the label, weight,
+// and state ID sets. Labels and state IDs are restricted to signed
+// integral types but the weight can be an arbitrary type whose
+// members satisfy certain algebraic ("semiring") properties.
+//
+// For more information, see the FST Library Wiki page:
+// http://wiki.corp.google.com/twiki/bin/view/Main/FstLibrary
+
+// \file
+// This convenience file includes all other FST inl.h files.
+//
+
+#ifndef FST_LIB_FSTLIB_H__
+#define FST_LIB_FSTLIB_H__
+
+
+// Abstract FST classes
+#include <fst/fst.h>
+#include <fst/expanded-fst.h>
+#include <fst/mutable-fst.h>
+
+// Concrete FST classes
+#include <fst/compact-fst.h>
+#include <fst/const-fst.h>
+#include <fst/edit-fst.h>
+#include <fst/vector-fst.h>
+
+// FST algorithms and delayed FST classes
+#include <fst/arcsort.h>
+#include <fst/arc-map.h>
+#include <fst/closure.h>
+#include <fst/compose.h>
+#include <fst/concat.h>
+#include <fst/connect.h>
+#include <fst/determinize.h>
+#include <fst/difference.h>
+#include <fst/encode.h>
+#include <fst/epsnormalize.h>
+#include <fst/equal.h>
+#include <fst/equivalent.h>
+#include <fst/factor-weight.h>
+#include <fst/intersect.h>
+#include <fst/invert.h>
+#include <fst/map.h>
+#include <fst/minimize.h>
+#include <fst/project.h>
+#include <fst/prune.h>
+#include <fst/push.h>
+#include <fst/randequivalent.h>
+#include <fst/randgen.h>
+#include <fst/rational.h>
+#include <fst/relabel.h>
+#include <fst/replace.h>
+#include <fst/replace-util.h>
+#include <fst/reverse.h>
+#include <fst/reweight.h>
+#include <fst/rmepsilon.h>
+#include <fst/rmfinalepsilon.h>
+#include <fst/shortest-distance.h>
+#include <fst/shortest-path.h>
+#include <fst/statesort.h>
+#include <fst/state-map.h>
+#include <fst/synchronize.h>
+#include <fst/topsort.h>
+#include <fst/union.h>
+#include <fst/verify.h>
+#include <fst/visit.h>
+
+// Weights
+#include <fst/weight.h>
+#include <fst/expectation-weight.h>
+#include <fst/float-weight.h>
+#include <fst/lexicographic-weight.h>
+#include <fst/pair-weight.h>
+#include <fst/power-weight.h>
+#include <fst/product-weight.h>
+#include <fst/random-weight.h>
+#include <fst/signed-log-weight.h>
+#include <fst/sparse-power-weight.h>
+#include <fst/sparse-tuple-weight.h>
+#include <fst/string-weight.h>
+#include <fst/tuple-weight.h>
+
+// Auxiliary classes for composition
+#include <fst/compose-filter.h>
+#include <fst/lookahead-filter.h>
+#include <fst/lookahead-matcher.h>
+#include <fst/matcher-fst.h>
+#include <fst/matcher.h>
+#include <fst/state-table.h>
+
+// Data structures
+#include <fst/heap.h>
+#include <fst/interval-set.h>
+#include <fst/queue.h>
+#include <fst/union-find.h>
+
+// Miscellaneous
+#include <fst/accumulator.h>
+#include <fst/add-on.h>
+#include <fst/arc.h>
+#include <fst/arcfilter.h>
+#include <fst/cache.h>
+#include <fst/complement.h>
+#include <fst/dfs-visit.h>
+#include <fst/generic-register.h>
+#include <fst/label-reachable.h>
+#include <fst/partition.h>
+#include <fst/properties.h>
+#include <fst/register.h>
+#include <fst/state-reachable.h>
+#include <iostream>
+#include <fstream>
+#include <fst/symbol-table.h>
+#include <fst/symbol-table-ops.h>
+#include <fst/test-properties.h>
+#include <fst/util.h>
+
+
+#endif // FST_LIB_FSTLIB_H__
diff --git a/src/include/fst/generic-register.h b/src/include/fst/generic-register.h
new file mode 100644
index 0000000..4f8b512
--- /dev/null
+++ b/src/include/fst/generic-register.h
@@ -0,0 +1,159 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_LIB_GENERIC_REGISTER_H_
+#define FST_LIB_GENERIC_REGISTER_H_
+
+#include <map>
+#include <string>
+
+#include <fst/compat.h>
+#include <fst/types.h>
+
+// Generic class representing a globally-stored correspondence between
+// objects of KeyType and EntryType.
+// KeyType must:
+// a) be such as can be stored as a key in a map<>
+// b) be concatenable with a const char* with the + operator
+// (or you must subclass and redefine LoadEntryFromSharedObject)
+// EntryType must be default constructible.
+//
+// The third template parameter should be the type of a subclass of this class
+// (think CRTP). This is to allow GetRegister() to instantiate and return
+// an object of the appropriate type.
+
+namespace fst {
+
+template<class KeyType, class EntryType, class RegisterType>
+class GenericRegister {
+ public:
+ typedef KeyType Key;
+ typedef EntryType Entry;
+
+ static RegisterType *GetRegister() {
+ FstOnceInit(&register_init_,
+ &RegisterType::Init);
+
+ return register_;
+ }
+
+ void SetEntry(const KeyType &key,
+ const EntryType &entry) {
+ MutexLock l(register_lock_);
+
+ register_table_.insert(make_pair(key, entry));
+ }
+
+ EntryType GetEntry(const KeyType &key) const {
+ const EntryType *entry = LookupEntry(key);
+ if (entry) {
+ return *entry;
+ } else {
+ return LoadEntryFromSharedObject(key);
+ }
+ }
+
+ virtual ~GenericRegister() { }
+
+ protected:
+ // Override this if you want to be able to load missing definitions from
+ // shared object files.
+ virtual EntryType LoadEntryFromSharedObject(const KeyType &key) const {
+ string so_filename = ConvertKeyToSoFilename(key);
+
+ void *handle = dlopen(so_filename.c_str(), RTLD_LAZY);
+ if (handle == 0) {
+ LOG(ERROR) << "GenericRegister::GetEntry : " << dlerror();
+ return EntryType();
+ }
+
+ // We assume that the DSO constructs a static object in its global
+ // scope that does the registration. Thus we need only load it, not
+ // call any methods.
+ const EntryType *entry = this->LookupEntry(key);
+ if (entry == 0) {
+ LOG(ERROR) << "GenericRegister::GetEntry : "
+ << "lookup failed in shared object: " << so_filename;
+ return EntryType();
+ }
+ return *entry;
+ }
+
+ // Override this to define how to turn a key into an SO filename.
+ virtual string ConvertKeyToSoFilename(const KeyType& key) const = 0;
+
+ virtual const EntryType *LookupEntry(
+ const KeyType &key) const {
+ MutexLock l(register_lock_);
+
+ typename RegisterMapType::const_iterator it = register_table_.find(key);
+
+ if (it != register_table_.end()) {
+ return &it->second;
+ } else {
+ return 0;
+ }
+ }
+
+ private:
+ typedef map<KeyType, EntryType> RegisterMapType;
+
+ static void Init() {
+ register_lock_ = new Mutex;
+ register_ = new RegisterType;
+ }
+
+ static FstOnceType register_init_;
+ static Mutex *register_lock_;
+ static RegisterType *register_;
+
+ RegisterMapType register_table_;
+};
+
+template<class KeyType, class EntryType, class RegisterType>
+FstOnceType GenericRegister<KeyType, EntryType,
+ RegisterType>::register_init_ = FST_ONCE_INIT;
+
+template<class KeyType, class EntryType, class RegisterType>
+Mutex *GenericRegister<KeyType, EntryType, RegisterType>::register_lock_ = 0;
+
+template<class KeyType, class EntryType, class RegisterType>
+RegisterType *GenericRegister<KeyType, EntryType, RegisterType>::register_ = 0;
+
+//
+// GENERIC REGISTRATION
+//
+
+// Generic register-er class capable of creating new register entries in the
+// given RegisterType template parameter. This type must define types Key
+// and Entry, and have appropriate static GetRegister() and instance
+// SetEntry() functions. An easy way to accomplish this is to have RegisterType
+// be the type of a subclass of GenericRegister.
+template<class RegisterType>
+class GenericRegisterer {
+ public:
+ typedef typename RegisterType::Key Key;
+ typedef typename RegisterType::Entry Entry;
+
+ GenericRegisterer(Key key, Entry entry) {
+ RegisterType *reg = RegisterType::GetRegister();
+ reg->SetEntry(key, entry);
+ }
+};
+
+} // namespace fst
+
+#endif // FST_LIB_GENERIC_REGISTER_H_
diff --git a/src/include/fst/heap.h b/src/include/fst/heap.h
new file mode 100644
index 0000000..a7affbd
--- /dev/null
+++ b/src/include/fst/heap.h
@@ -0,0 +1,206 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// All Rights Reserved.
+// Author: Johan Schalkwyk (johans@google.com)
+//
+// \file
+// Implementation of a heap as in STL, but allows tracking positions
+// in heap using a key. The key can be used to do an in-place update of
+// values in the heap.
+
+#ifndef FST_LIB_HEAP_H__
+#define FST_LIB_HEAP_H__
+
+#include <vector>
+using std::vector;
+#include <functional>
+
+#include <fst/compat.h>
+namespace fst {
+
+//
+// \class Heap
+// \brief A templated heap implementation that support in-place update
+// of values.
+//
+// The templated heap implementation is a little different from the
+// STL priority_queue and the *_heap operations in STL. This heap
+// supports indexing of values in the heap via an associated key.
+//
+// Each value is internally associated with a key which is returned
+// to the calling functions on heap insert. This key can be used
+// to later update the specific value in the heap.
+//
+// \param T the element type of the hash, can be POD, Data or Ptr to Data
+// \param Compare Comparison class for determiningg min-heapness.
+// \param whether heap top should be max or min element w.r.t. Compare
+//
+
+static const int kNoKey = -1;
+template <class T, class Compare, bool max>
+class Heap {
+ public:
+
+ // Initialize with a specific comparator
+ Heap(Compare comp) : comp_(comp), size_(0) { }
+
+ // Create a heap with initial size of internal arrays of 0
+ Heap() : size_(0) { }
+
+ ~Heap() { }
+
+ // Insert a value into the heap
+ int Insert(const T& val) {
+ if (size_ < A_.size()) {
+ A_[size_] = val;
+ pos_[key_[size_]] = size_;
+ } else {
+ A_.push_back(val);
+ pos_.push_back(size_);
+ key_.push_back(size_);
+ }
+
+ ++size_;
+ return Insert(val, size_ - 1);
+ }
+
+ // Update a value at position given by the key. The pos array is first
+ // indexed by the key. The position gives the position in the heap array.
+ // Once we have the position we can then use the standard heap operations
+ // to calculate the parent and child positions.
+ void Update(int key, const T& val) {
+ int i = pos_[key];
+ if (Better(val, A_[Parent(i)])) {
+ Insert(val, i);
+ } else {
+ A_[i] = val;
+ Heapify(i);
+ }
+ }
+
+ // Return the greatest (max=true) / least (max=false) value w.r.t.
+ // from the heap.
+ T Pop() {
+ T top = A_[0];
+
+ Swap(0, size_-1);
+ size_--;
+ Heapify(0);
+ return top;
+ }
+
+ // Return the greatest (max=true) / least (max=false) value w.r.t.
+ // comp object from the heap.
+ T Top() const {
+ return A_[0];
+ }
+
+ // Check if the heap is empty
+ bool Empty() const {
+ return size_ == 0;
+ }
+
+ void Clear() {
+ size_ = 0;
+ }
+
+
+ //
+ // The following protected routines are used in a supportive role
+ // for managing the heap and keeping the heap properties.
+ //
+ private:
+ // Compute left child of parent
+ int Left(int i) {
+ return 2*(i+1)-1; // 0 -> 1, 1 -> 3
+ }
+
+ // Compute right child of parent
+ int Right(int i) {
+ return 2*(i+1); // 0 -> 2, 1 -> 4
+ }
+
+ // Given a child compute parent
+ int Parent(int i) {
+ return (i-1)/2; // 1 -> 0, 2 -> 0, 3 -> 1, 4-> 1
+ }
+
+ // Swap a child, parent. Use to move element up/down tree.
+ // Note a little tricky here. When we swap we need to swap:
+ // the value
+ // the associated keys
+ // the position of the value in the heap
+ void Swap(int j, int k) {
+ int tkey = key_[j];
+ pos_[key_[j] = key_[k]] = j;
+ pos_[key_[k] = tkey] = k;
+
+ T val = A_[j];
+ A_[j] = A_[k];
+ A_[k] = val;
+ }
+
+ // Returns the greater (max=true) / least (max=false) of two
+ // elements.
+ bool Better(const T& x, const T& y) {
+ return max ? comp_(y, x) : comp_(x, y);
+ }
+
+ // Heapify subtree rooted at index i.
+ void Heapify(int i) {
+ int l = Left(i);
+ int r = Right(i);
+ int largest;
+
+ if (l < size_ && Better(A_[l], A_[i]) )
+ largest = l;
+ else
+ largest = i;
+
+ if (r < size_ && Better(A_[r], A_[largest]) )
+ largest = r;
+
+ if (largest != i) {
+ Swap(i, largest);
+ Heapify(largest);
+ }
+ }
+
+
+ // Insert (update) element at subtree rooted at index i
+ int Insert(const T& val, int i) {
+ int p;
+ while (i > 0 && !Better(A_[p = Parent(i)], val)) {
+ Swap(i, p);
+ i = p;
+ }
+
+ return key_[i];
+ }
+
+ private:
+ Compare comp_;
+
+ vector<int> pos_;
+ vector<int> key_;
+ vector<T> A_;
+ int size_;
+
+ // DISALLOW_COPY_AND_ASSIGN(Heap);
+};
+
+} // namespace fst
+
+#endif // FST_LIB_HEAP_H__
diff --git a/src/include/fst/icu.h b/src/include/fst/icu.h
new file mode 100644
index 0000000..6b74c2e
--- /dev/null
+++ b/src/include/fst/icu.h
@@ -0,0 +1,103 @@
+// icu.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: roubert@google.com (Fredrik Roubert)
+
+// Wrapper class for UErrorCode, with conversion operators for direct use in
+// ICU C and C++ APIs.
+//
+// Features:
+// - The constructor initializes the internal UErrorCode to U_ZERO_ERROR,
+// removing one common source of errors.
+// - Same use in C APIs taking a UErrorCode* (pointer) and C++ taking
+// UErrorCode& (reference), via conversion operators.
+// - Automatic checking for success when it goes out of scope. On failure,
+// the destructor will FSTERROR() an error message.
+//
+// Most of ICU will handle errors gracefully and provide sensible fallbacks.
+// Using IcuErrorCode, it is therefore possible to write very compact code
+// that does sensible things on failure and provides logging for debugging.
+//
+// Example:
+//
+// IcuErrorCode icuerrorcode;
+// return collator.compareUTF8(a, b, icuerrorcode) == UCOL_EQUAL;
+
+#ifndef FST_LIB_ICU_H_
+#define FST_LIB_ICU_H_
+
+#include <unicode/errorcode.h>
+#include <unicode/unistr.h>
+#include <unicode/ustring.h>
+#include <unicode/utf8.h>
+
+class IcuErrorCode : public icu::ErrorCode {
+ public:
+ IcuErrorCode() {}
+ virtual ~IcuErrorCode() { if (isFailure()) handleFailure(); }
+
+ // Redefine 'errorName()' in order to be compatible with ICU version 4.2
+ const char* errorName() const {
+ return u_errorName(errorCode);
+ }
+
+ protected:
+ virtual void handleFailure() const {
+ FSTERROR() << errorName();
+}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(IcuErrorCode);
+};
+
+namespace fst {
+
+template <class Label>
+bool UTF8StringToLabels(const string &str, vector<Label> *labels) {
+ const char *c_str = str.c_str();
+ int32_t length = str.size();
+ UChar32 c;
+ for (int32_t i = 0; i < length; /* no update */) {
+ U8_NEXT(c_str, i, length, c);
+ if (c < 0) {
+ LOG(ERROR) << "UTF8StringToLabels: Invalid character found: " << c;
+ return false;
+ }
+ labels->push_back(c);
+ }
+ return true;
+}
+
+template <class Label>
+bool LabelsToUTF8String(const vector<Label> &labels, string *str) {
+ icu::UnicodeString u_str;
+ char c_str[5];
+ for (size_t i = 0; i < labels.size(); ++i) {
+ u_str.setTo(labels[i]);
+ IcuErrorCode error;
+ u_strToUTF8(c_str, 5, NULL, u_str.getTerminatedBuffer(), -1, error);
+ if (error.isFailure()) {
+ LOG(ERROR) << "LabelsToUTF8String: Bad encoding: "
+ << error.errorName();
+ return false;
+ }
+ *str += c_str;
+ }
+ return true;
+}
+
+} // namespace fst
+
+#endif // FST_LIB_ICU_H_
diff --git a/src/include/fst/intersect.h b/src/include/fst/intersect.h
new file mode 100644
index 0000000..f46116f
--- /dev/null
+++ b/src/include/fst/intersect.h
@@ -0,0 +1,172 @@
+// intersect.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to compute the intersection of two FSAs
+
+#ifndef FST_LIB_INTERSECT_H__
+#define FST_LIB_INTERSECT_H__
+
+#include <algorithm>
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+#include <fst/compose.h>
+
+
+namespace fst {
+
+template <class A,
+ class M = Matcher<Fst<A> >,
+ class F = SequenceComposeFilter<M>,
+ class T = GenericComposeStateTable<A, typename F::FilterState> >
+struct IntersectFstOptions : public ComposeFstOptions<A, M, F, T> {
+ explicit IntersectFstOptions(const CacheOptions &opts,
+ M *mat1 = 0, M *mat2 = 0,
+ F *filt = 0, T *sttable= 0)
+ : ComposeFstOptions<A, M, F, T>(opts, mat1, mat2, filt, sttable) { }
+
+ IntersectFstOptions() {}
+};
+
+// Computes the intersection (Hadamard product) of two FSAs. This
+// version is a delayed Fst. Only strings that are in both automata
+// are retained in the result.
+//
+// The two arguments must be acceptors. One of the arguments must be
+// label-sorted.
+//
+// Complexity: same as ComposeFst.
+//
+// Caveats: same as ComposeFst.
+template <class A>
+class IntersectFst : public ComposeFst<A> {
+ public:
+ using ComposeFst<A>::CreateBase;
+ using ComposeFst<A>::CreateBase1;
+ using ComposeFst<A>::Properties;
+ using ImplToFst< ComposeFstImplBase<A> >::GetImpl;
+ using ImplToFst< ComposeFstImplBase<A> >::SetImpl;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ IntersectFst(const Fst<A> &fst1, const Fst<A> &fst2,
+ const CacheOptions opts = CacheOptions()) {
+ bool acceptors = fst1.Properties(kAcceptor, true) &&
+ fst2.Properties(kAcceptor, true);
+ SetImpl(CreateBase(fst1, fst2, opts));
+ if (!acceptors) {
+ FSTERROR() << "IntersectFst: input FSTs are not acceptors";
+ GetImpl()->SetProperties(kError);
+ }
+ }
+
+ template <class M, class F, class T>
+ IntersectFst(const Fst<A> &fst1, const Fst<A> &fst2,
+ const IntersectFstOptions<A, M, F, T> &opts) {
+ bool acceptors = fst1.Properties(kAcceptor, true) &&
+ fst2.Properties(kAcceptor, true);
+ SetImpl(CreateBase1(fst1, fst2, opts));
+ if (!acceptors) {
+ FSTERROR() << "IntersectFst: input FSTs are not acceptors";
+ GetImpl()->SetProperties(kError);
+ }
+ }
+
+ // See Fst<>::Copy() for doc.
+ IntersectFst(const IntersectFst<A> &fst, bool safe = false) :
+ ComposeFst<A>(fst, safe) {}
+
+ // Get a copy of this IntersectFst. See Fst<>::Copy() for further doc.
+ virtual IntersectFst<A> *Copy(bool safe = false) const {
+ return new IntersectFst<A>(*this, safe);
+ }
+};
+
+
+// Specialization for IntersectFst.
+template <class A>
+class StateIterator< IntersectFst<A> >
+ : public StateIterator< ComposeFst<A> > {
+ public:
+ explicit StateIterator(const IntersectFst<A> &fst)
+ : StateIterator< ComposeFst<A> >(fst) {}
+};
+
+
+// Specialization for IntersectFst.
+template <class A>
+class ArcIterator< IntersectFst<A> >
+ : public ArcIterator< ComposeFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const IntersectFst<A> &fst, StateId s)
+ : ArcIterator< ComposeFst<A> >(fst, s) {}
+};
+
+// Useful alias when using StdArc.
+typedef IntersectFst<StdArc> StdIntersectFst;
+
+
+typedef ComposeOptions IntersectOptions;
+
+
+// Computes the intersection (Hadamard product) of two FSAs. This
+// version writes the intersection to an output MurableFst. Only
+// strings that are in both automata are retained in the result.
+//
+// The two arguments must be acceptors. One of the arguments must be
+// label-sorted.
+//
+// Complexity: same as Compose.
+//
+// Caveats: same as Compose.
+template<class Arc>
+void Intersect(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
+ MutableFst<Arc> *ofst,
+ const IntersectOptions &opts = IntersectOptions()) {
+ typedef Matcher< Fst<Arc> > M;
+
+ if (opts.filter_type == AUTO_FILTER) {
+ CacheOptions nopts;
+ nopts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = IntersectFst<Arc>(ifst1, ifst2, nopts);
+ } else if (opts.filter_type == SEQUENCE_FILTER) {
+ IntersectFstOptions<Arc> iopts;
+ iopts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = IntersectFst<Arc>(ifst1, ifst2, iopts);
+ } else if (opts.filter_type == ALT_SEQUENCE_FILTER) {
+ IntersectFstOptions<Arc, M, AltSequenceComposeFilter<M> > iopts;
+ iopts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = IntersectFst<Arc>(ifst1, ifst2, iopts);
+ } else if (opts.filter_type == MATCH_FILTER) {
+ IntersectFstOptions<Arc, M, MatchComposeFilter<M> > iopts;
+ iopts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = IntersectFst<Arc>(ifst1, ifst2, iopts);
+ }
+
+ if (opts.connect)
+ Connect(ofst);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_INTERSECT_H__
diff --git a/src/include/fst/interval-set.h b/src/include/fst/interval-set.h
new file mode 100644
index 0000000..cf6ac54
--- /dev/null
+++ b/src/include/fst/interval-set.h
@@ -0,0 +1,381 @@
+// interval-set.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to represent and operate on sets of intervals.
+
+#ifndef FST_LIB_INTERVAL_SET_H__
+#define FST_LIB_INTERVAL_SET_H__
+
+#include <iostream>
+#include <vector>
+using std::vector;
+
+
+#include <fst/util.h>
+
+
+namespace fst {
+
+// Stores and operates on a set of half-open integral intervals [a,b)
+// of signed integers of type T.
+template <typename T>
+class IntervalSet {
+ public:
+ struct Interval {
+ T begin;
+ T end;
+
+ Interval() : begin(-1), end(-1) {}
+
+ Interval(T b, T e) : begin(b), end(e) {}
+
+ bool operator<(const Interval &i) const {
+ return begin < i.begin || (begin == i.begin && end > i.end);
+ }
+
+ bool operator==(const Interval &i) const {
+ return begin == i.begin && end == i.end;
+ }
+
+ bool operator!=(const Interval &i) const {
+ return begin != i.begin || end != i.end;
+ }
+
+ istream &Read(istream &strm) {
+ T n;
+ ReadType(strm, &n);
+ begin = n;
+ ReadType(strm, &n);
+ end = n;
+ return strm;
+ }
+
+ ostream &Write(ostream &strm) const {
+ T n = begin;
+ WriteType(strm, n);
+ n = end;
+ WriteType(strm, n);
+ return strm;
+ }
+ };
+
+ IntervalSet() : count_(-1) {}
+
+ // Returns the interval set as a vector.
+ vector<Interval> *Intervals() { return &intervals_; }
+
+ const vector<Interval> *Intervals() const { return &intervals_; }
+
+ const bool Empty() const { return intervals_.empty(); }
+
+ const T Size() const { return intervals_.size(); }
+
+ // Number of points in the intervals (undefined if not normalized).
+ const T Count() const { return count_; }
+
+ void Clear() {
+ intervals_.clear();
+ count_ = 0;
+ }
+
+ // Adds an interval set to the set. The result may not be normalized.
+ void Union(const IntervalSet<T> &iset) {
+ const vector<Interval> *intervals = iset.Intervals();
+ for (typename vector<Interval>::const_iterator it = intervals->begin();
+ it != intervals->end(); ++it)
+ intervals_.push_back(*it);
+ }
+
+ // Requires intervals be normalized.
+ bool Member(T value) const {
+ Interval interval(value, value);
+ typename vector<Interval>::const_iterator lb =
+ lower_bound(intervals_.begin(), intervals_.end(), interval);
+ if (lb == intervals_.begin())
+ return false;
+ return (--lb)->end > value;
+ }
+
+ // Requires intervals be normalized.
+ bool operator==(const IntervalSet<T>& iset) const {
+ return *(iset.Intervals()) == intervals_;
+ }
+
+ // Requires intervals be normalized.
+ bool operator!=(const IntervalSet<T>& iset) const {
+ return *(iset.Intervals()) != intervals_;
+ }
+
+ bool Singleton() const {
+ return intervals_.size() == 1 &&
+ intervals_[0].begin + 1 == intervals_[0].end;
+ }
+
+
+ // Sorts; collapses overlapping and adjacent interals; sets count.
+ void Normalize();
+
+ // Intersects an interval set with the set. Requires intervals be
+ // normalized. The result is normalized.
+ void Intersect(const IntervalSet<T> &iset, IntervalSet<T> *oset) const;
+
+ // Complements the set w.r.t [0, maxval). Requires intervals be
+ // normalized. The result is normalized.
+ void Complement(T maxval, IntervalSet<T> *oset) const;
+
+ // Subtract an interval set from the set. Requires intervals be
+ // normalized. The result is normalized.
+ void Difference(const IntervalSet<T> &iset, IntervalSet<T> *oset) const;
+
+ // Determines if an interval set overlaps with the set. Requires
+ // intervals be normalized.
+ bool Overlaps(const IntervalSet<T> &iset) const;
+
+ // Determines if an interval set overlaps with the set but neither
+ // is contained in the other. Requires intervals be normalized.
+ bool StrictlyOverlaps(const IntervalSet<T> &iset) const;
+
+ // Determines if an interval set is contained within the set. Requires
+ // intervals be normalized.
+ bool Contains(const IntervalSet<T> &iset) const;
+
+ istream &Read(istream &strm) {
+ ReadType(strm, &intervals_);
+ return ReadType(strm, &count_);
+ }
+
+ ostream &Write(ostream &strm) const {
+ WriteType(strm, intervals_);
+ return WriteType(strm, count_);
+ }
+
+ private:
+ vector<Interval> intervals_;
+ T count_;
+};
+
+// Sorts; collapses overlapping and adjacent interavls; sets count.
+template <typename T>
+void IntervalSet<T>::Normalize() {
+ sort(intervals_.begin(), intervals_.end());
+
+ count_ = 0;
+ T size = 0;
+ for (T i = 0; i < intervals_.size(); ++i) {
+ Interval &inti = intervals_[i];
+ if (inti.begin == inti.end)
+ continue;
+ for (T j = i + 1; j < intervals_.size(); ++j) {
+ Interval &intj = intervals_[j];
+ if (intj.begin > inti.end)
+ break;
+ if (intj.end > inti.end)
+ inti.end = intj.end;
+ ++i;
+ }
+ count_ += inti.end - inti.begin;
+ intervals_[size++] = inti;
+ }
+ intervals_.resize(size);
+}
+
+// Intersects an interval set with the set. Requires intervals be normalized.
+// The result is normalized.
+template <typename T>
+void IntervalSet<T>::Intersect(const IntervalSet<T> &iset,
+ IntervalSet<T> *oset) const {
+ const vector<Interval> *iintervals = iset.Intervals();
+ vector<Interval> *ointervals = oset->Intervals();
+ typename vector<Interval>::const_iterator it1 = intervals_.begin();
+ typename vector<Interval>::const_iterator it2 = iintervals->begin();
+
+ ointervals->clear();
+ oset->count_ = 0;
+
+ while (it1 != intervals_.end() && it2 != iintervals->end()) {
+ if (it1->end <= it2->begin) {
+ ++it1;
+ } else if (it2->end <= it1->begin) {
+ ++it2;
+ } else {
+ Interval interval;
+ interval.begin = max(it1->begin, it2->begin);
+ interval.end = min(it1->end, it2->end);
+ ointervals->push_back(interval);
+ oset->count_ += interval.end - interval.begin;
+ if (it1->end < it2->end)
+ ++it1;
+ else
+ ++it2;
+ }
+ }
+}
+
+// Complements the set w.r.t [0, maxval). Requires intervals be normalized.
+// The result is normalized.
+template <typename T>
+void IntervalSet<T>::Complement(T maxval, IntervalSet<T> *oset) const {
+ vector<Interval> *ointervals = oset->Intervals();
+ ointervals->clear();
+ oset->count_ = 0;
+
+ Interval interval;
+ interval.begin = 0;
+ for (typename vector<Interval>::const_iterator it = intervals_.begin();
+ it != intervals_.end();
+ ++it) {
+ interval.end = min(it->begin, maxval);
+ if (interval.begin < interval.end) {
+ ointervals->push_back(interval);
+ oset->count_ += interval.end - interval.begin;
+ }
+ interval.begin = it->end;
+ }
+ interval.end = maxval;
+ if (interval.begin < interval.end) {
+ ointervals->push_back(interval);
+ oset->count_ += interval.end - interval.begin;
+ }
+}
+
+// Subtract an interval set from the set. Requires intervals be normalized.
+// The result is normalized.
+template <typename T>
+void IntervalSet<T>::Difference(const IntervalSet<T> &iset,
+ IntervalSet<T> *oset) const {
+ if (intervals_.empty()) {
+ oset->Intervals()->clear();
+ oset->count_ = 0;
+ } else {
+ IntervalSet<T> cset;
+ iset.Complement(intervals_.back().end, &cset);
+ Intersect(cset, oset);
+ }
+}
+
+// Determines if an interval set overlaps with the set. Requires
+// intervals be normalized.
+template <typename T>
+bool IntervalSet<T>::Overlaps(const IntervalSet<T> &iset) const {
+ const vector<Interval> *intervals = iset.Intervals();
+ typename vector<Interval>::const_iterator it1 = intervals_.begin();
+ typename vector<Interval>::const_iterator it2 = intervals->begin();
+
+ while (it1 != intervals_.end() && it2 != intervals->end()) {
+ if (it1->end <= it2->begin) {
+ ++it1;
+ } else if (it2->end <= it1->begin) {
+ ++it2;
+ } else {
+ return true;
+ }
+ }
+ return false;
+}
+
+// Determines if an interval set overlaps with the set but neither
+// is contained in the other. Requires intervals be normalized.
+template <typename T>
+bool IntervalSet<T>::StrictlyOverlaps(const IntervalSet<T> &iset) const {
+ const vector<Interval> *intervals = iset.Intervals();
+ typename vector<Interval>::const_iterator it1 = intervals_.begin();
+ typename vector<Interval>::const_iterator it2 = intervals->begin();
+ bool only1 = false; // point in intervals_ but not intervals
+ bool only2 = false; // point in intervals but not intervals_
+ bool overlap = false; // point in both intervals_ and intervals
+
+ while (it1 != intervals_.end() && it2 != intervals->end()) {
+ if (it1->end <= it2->begin) { // no overlap - it1 first
+ only1 = true;
+ ++it1;
+ } else if (it2->end <= it1->begin) { // no overlap - it2 first
+ only2 = true;
+ ++it2;
+ } else if (it2->begin == it1->begin && it2->end == it1->end) { // equals
+ overlap = true;
+ ++it1;
+ ++it2;
+ } else if (it2->begin <= it1->begin && it2->end >= it1->end) { // 1 c 2
+ only2 = true;
+ overlap = true;
+ ++it1;
+ } else if (it1->begin <= it2->begin && it1->end >= it2->end) { // 2 c 1
+ only1 = true;
+ overlap = true;
+ ++it2;
+ } else { // strict overlap
+ only1 = true;
+ only2 = true;
+ overlap = true;
+ }
+ if (only1 == true && only2 == true && overlap == true)
+ return true;
+ }
+ if (it1 != intervals_.end())
+ only1 = true;
+ if (it2 != intervals->end())
+ only2 = true;
+
+ return only1 == true && only2 == true && overlap == true;
+}
+
+// Determines if an interval set is contained within the set. Requires
+// intervals be normalized.
+template <typename T>
+bool IntervalSet<T>::Contains(const IntervalSet<T> &iset) const {
+ if (iset.Count() > Count())
+ return false;
+
+ const vector<Interval> *intervals = iset.Intervals();
+ typename vector<Interval>::const_iterator it1 = intervals_.begin();
+ typename vector<Interval>::const_iterator it2 = intervals->begin();
+
+ while (it1 != intervals_.end() && it2 != intervals->end()) {
+ if (it1->end <= it2->begin) { // no overlap - it1 first
+ ++it1;
+ } else if (it2->begin < it1->begin || it2->end > it1->end) { // no C
+ return false;
+ } else if (it2->end == it1->end) {
+ ++it1;
+ ++it2;
+ } else {
+ ++it2;
+ }
+ }
+ return it2 == intervals->end();
+}
+
+template <typename T>
+ostream &operator<<(ostream &strm, const IntervalSet<T> &s) {
+ typedef typename IntervalSet<T>::Interval Interval;
+ const vector<Interval> *intervals = s.Intervals();
+ strm << "{";
+ for (typename vector<Interval>::const_iterator it = intervals->begin();
+ it != intervals->end();
+ ++it) {
+ if (it != intervals->begin())
+ strm << ",";
+ strm << "[" << it->begin << "," << it->end << ")";
+ }
+ strm << "}";
+ return strm;
+}
+
+} // namespace fst
+
+#endif // FST_LIB_INTERVAL_SET_H__
diff --git a/src/include/fst/invert.h b/src/include/fst/invert.h
new file mode 100644
index 0000000..bc83a5d
--- /dev/null
+++ b/src/include/fst/invert.h
@@ -0,0 +1,125 @@
+// invert.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Functions and classes to invert an Fst.
+
+#ifndef FST_LIB_INVERT_H__
+#define FST_LIB_INVERT_H__
+
+#include <fst/arc-map.h>
+#include <fst/mutable-fst.h>
+
+
+namespace fst {
+
+// Mapper to implement inversion of an arc.
+template <class A> struct InvertMapper {
+ InvertMapper() {}
+
+ A operator()(const A &arc) {
+ return A(arc.olabel, arc.ilabel, arc.weight, arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;}
+
+ uint64 Properties(uint64 props) { return InvertProperties(props); }
+};
+
+
+// Inverts the transduction corresponding to an FST by exchanging the
+// FST's input and output labels. This version modifies its input.
+//
+// Complexity:
+// - Time: O(V + E)
+// - Space: O(1)
+// where V = # of states and E = # of arcs.
+template<class Arc> inline
+void Invert(MutableFst<Arc> *fst) {
+ SymbolTable *input = fst->InputSymbols() ? fst->InputSymbols()->Copy() : 0;
+ SymbolTable *output = fst->OutputSymbols() ? fst->OutputSymbols()->Copy() : 0;
+ ArcMap(fst, InvertMapper<Arc>());
+ fst->SetInputSymbols(output);
+ fst->SetOutputSymbols(input);
+ delete input;
+ delete output;
+}
+
+
+// Inverts the transduction corresponding to an FST by exchanging the
+// FST's input and output labels. This version is a delayed Fst.
+//
+// Complexity:
+// - Time: O(v + e)
+// - Space: O(1)
+// where v = # of states visited, e = # of arcs visited. Constant
+// time and to visit an input state or arc is assumed and exclusive
+// of caching.
+template <class A>
+class InvertFst : public ArcMapFst<A, A, InvertMapper<A> > {
+ public:
+ typedef A Arc;
+ typedef InvertMapper<A> C;
+ typedef ArcMapFstImpl< A, A, InvertMapper<A> > Impl;
+ using ImplToFst<Impl>::GetImpl;
+
+ explicit InvertFst(const Fst<A> &fst) : ArcMapFst<A, A, C>(fst, C()) {
+ GetImpl()->SetOutputSymbols(fst.InputSymbols());
+ GetImpl()->SetInputSymbols(fst.OutputSymbols());
+ }
+
+ // See Fst<>::Copy() for doc.
+ InvertFst(const InvertFst<A> &fst, bool safe = false)
+ : ArcMapFst<A, A, C>(fst, safe) {}
+
+ // Get a copy of this InvertFst. See Fst<>::Copy() for further doc.
+ virtual InvertFst<A> *Copy(bool safe = false) const {
+ return new InvertFst(*this, safe);
+ }
+};
+
+
+// Specialization for InvertFst.
+template <class A>
+class StateIterator< InvertFst<A> >
+ : public StateIterator< ArcMapFst<A, A, InvertMapper<A> > > {
+ public:
+ explicit StateIterator(const InvertFst<A> &fst)
+ : StateIterator< ArcMapFst<A, A, InvertMapper<A> > >(fst) {}
+};
+
+
+// Specialization for InvertFst.
+template <class A>
+class ArcIterator< InvertFst<A> >
+ : public ArcIterator< ArcMapFst<A, A, InvertMapper<A> > > {
+ public:
+ ArcIterator(const InvertFst<A> &fst, typename A::StateId s)
+ : ArcIterator< ArcMapFst<A, A, InvertMapper<A> > >(fst, s) {}
+};
+
+
+// Useful alias when using StdArc.
+typedef InvertFst<StdArc> StdInvertFst;
+
+} // namespace fst
+
+#endif // FST_LIB_INVERT_H__
diff --git a/src/include/fst/label-reachable.h b/src/include/fst/label-reachable.h
new file mode 100644
index 0000000..8f9aca8
--- /dev/null
+++ b/src/include/fst/label-reachable.h
@@ -0,0 +1,565 @@
+// label_reachable.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to determine if a non-epsilon label can be read as the
+// first non-epsilon symbol along some path from a given state.
+
+
+#ifndef FST_LIB_LABEL_REACHABLE_H__
+#define FST_LIB_LABEL_REACHABLE_H__
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <vector>
+using std::vector;
+
+#include <fst/accumulator.h>
+#include <fst/arcsort.h>
+#include <fst/interval-set.h>
+#include <fst/state-reachable.h>
+#include <fst/vector-fst.h>
+
+
+namespace fst {
+
+// Stores shareable data for label reachable class copies.
+template <typename L>
+class LabelReachableData {
+ public:
+ typedef L Label;
+ typedef typename IntervalSet<L>::Interval Interval;
+
+ explicit LabelReachableData(bool reach_input, bool keep_relabel_data = true)
+ : reach_input_(reach_input),
+ keep_relabel_data_(keep_relabel_data),
+ have_relabel_data_(true),
+ final_label_(kNoLabel) {}
+
+ ~LabelReachableData() {}
+
+ bool ReachInput() const { return reach_input_; }
+
+ vector< IntervalSet<L> > *IntervalSets() { return &isets_; }
+
+ unordered_map<L, L> *Label2Index() {
+ if (!have_relabel_data_)
+ FSTERROR() << "LabelReachableData: no relabeling data";
+ return &label2index_;
+ }
+
+ Label FinalLabel() {
+ if (final_label_ == kNoLabel)
+ final_label_ = label2index_[kNoLabel];
+ return final_label_;
+ }
+
+ static LabelReachableData<L> *Read(istream &istrm) {
+ LabelReachableData<L> *data = new LabelReachableData<L>();
+
+ ReadType(istrm, &data->reach_input_);
+ ReadType(istrm, &data->keep_relabel_data_);
+ data->have_relabel_data_ = data->keep_relabel_data_;
+ if (data->keep_relabel_data_)
+ ReadType(istrm, &data->label2index_);
+ ReadType(istrm, &data->final_label_);
+ ReadType(istrm, &data->isets_);
+ return data;
+ }
+
+ bool Write(ostream &ostrm) {
+ WriteType(ostrm, reach_input_);
+ WriteType(ostrm, keep_relabel_data_);
+ if (keep_relabel_data_)
+ WriteType(ostrm, label2index_);
+ WriteType(ostrm, FinalLabel());
+ WriteType(ostrm, isets_);
+ return true;
+ }
+
+ int RefCount() const { return ref_count_.count(); }
+ int IncrRefCount() { return ref_count_.Incr(); }
+ int DecrRefCount() { return ref_count_.Decr(); }
+
+ private:
+ LabelReachableData() {}
+
+ bool reach_input_; // Input or output labels considered?
+ bool keep_relabel_data_; // Save label2index_ to file?
+ bool have_relabel_data_; // Using label2index_?
+ Label final_label_; // Final label
+ RefCounter ref_count_; // Reference count.
+ unordered_map<L, L> label2index_; // Finds index for a label.
+ vector<IntervalSet <L> > isets_; // Interval sets per state.
+
+ DISALLOW_COPY_AND_ASSIGN(LabelReachableData);
+};
+
+
+// Tests reachability of labels from a given state. If reach_input =
+// true, then input labels are considered, o.w. output labels are
+// considered. To test for reachability from a state s, first do
+// SetState(s). Then a label l can be reached from state s of FST f
+// iff Reach(r) is true where r = Relabel(l). The relabeling is
+// required to ensure a compact representation of the reachable
+// labels.
+
+// The whole FST can be relabeled instead with Relabel(&f,
+// reach_input) so that the test Reach(r) applies directly to the
+// labels of the transformed FST f. The relabeled FST will also be
+// sorted appropriately for composition.
+//
+// Reachablity of a final state from state s (via an epsilon path)
+// can be tested with ReachFinal();
+//
+// Reachability can also be tested on the set of labels specified by
+// an arc iterator, useful for FST composition. In particular,
+// Reach(aiter, ...) is true if labels on the input (output) side of
+// the transitions of the arc iterator, when iter_input is true
+// (false), can be reached from the state s. The iterator labels must
+// have already been relabeled.
+//
+// With the arc iterator test of reachability, the begin position, end
+// position and accumulated arc weight of the matches can be
+// returned. The optional template argument controls how reachable arc
+// weights are accumulated. The default uses the semiring
+// Plus(). Alternative ones can be used to distribute the weights in
+// composition in various ways.
+template <class A, class S = DefaultAccumulator<A> >
+class LabelReachable {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename IntervalSet<Label>::Interval Interval;
+
+ LabelReachable(const Fst<A> &fst, bool reach_input, S *s = 0,
+ bool keep_relabel_data = true)
+ : fst_(new VectorFst<Arc>(fst)),
+ s_(kNoStateId),
+ data_(new LabelReachableData<Label>(reach_input, keep_relabel_data)),
+ accumulator_(s ? s : new S()),
+ ncalls_(0),
+ nintervals_(0),
+ error_(false) {
+ StateId ins = fst_->NumStates();
+ TransformFst();
+ FindIntervals(ins);
+ delete fst_;
+ }
+
+ explicit LabelReachable(LabelReachableData<Label> *data, S *s = 0)
+ : fst_(0),
+ s_(kNoStateId),
+ data_(data),
+ accumulator_(s ? s : new S()),
+ ncalls_(0),
+ nintervals_(0),
+ error_(false) {
+ data_->IncrRefCount();
+ }
+
+ LabelReachable(const LabelReachable<A, S> &reachable) :
+ fst_(0),
+ s_(kNoStateId),
+ data_(reachable.data_),
+ accumulator_(new S(*reachable.accumulator_)),
+ ncalls_(0),
+ nintervals_(0),
+ error_(reachable.error_) {
+ data_->IncrRefCount();
+ }
+
+ ~LabelReachable() {
+ if (!data_->DecrRefCount())
+ delete data_;
+ delete accumulator_;
+ if (ncalls_ > 0) {
+ VLOG(2) << "# of calls: " << ncalls_;
+ VLOG(2) << "# of intervals/call: " << (nintervals_ / ncalls_);
+ }
+ }
+
+ // Relabels w.r.t labels that give compact label sets.
+ Label Relabel(Label label) {
+ if (label == 0 || error_)
+ return label;
+ unordered_map<Label, Label> &label2index = *data_->Label2Index();
+ Label &relabel = label2index[label];
+ if (!relabel) // Add new label
+ relabel = label2index.size() + 1;
+ return relabel;
+ }
+
+ // Relabels Fst w.r.t to labels that give compact label sets.
+ void Relabel(MutableFst<Arc> *fst, bool relabel_input) {
+ for (StateIterator< MutableFst<Arc> > siter(*fst);
+ !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ for (MutableArcIterator< MutableFst<Arc> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next()) {
+ Arc arc = aiter.Value();
+ if (relabel_input)
+ arc.ilabel = Relabel(arc.ilabel);
+ else
+ arc.olabel = Relabel(arc.olabel);
+ aiter.SetValue(arc);
+ }
+ }
+ if (relabel_input) {
+ ArcSort(fst, ILabelCompare<Arc>());
+ fst->SetInputSymbols(0);
+ } else {
+ ArcSort(fst, OLabelCompare<Arc>());
+ fst->SetOutputSymbols(0);
+ }
+ }
+
+ // Returns relabeling pairs (cf. relabel.h::Relabel()).
+ // If 'avoid_collisions' is true, extra pairs are added to
+ // ensure no collisions when relabeling automata that have
+ // labels unseen here.
+ void RelabelPairs(vector<pair<Label, Label> > *pairs,
+ bool avoid_collisions = false) {
+ pairs->clear();
+ unordered_map<Label, Label> &label2index = *data_->Label2Index();
+ // Maps labels to their new values in [1, label2index().size()]
+ for (typename unordered_map<Label, Label>::const_iterator
+ it = label2index.begin(); it != label2index.end(); ++it)
+ if (it->second != data_->FinalLabel())
+ pairs->push_back(pair<Label, Label>(it->first, it->second));
+ if (avoid_collisions) {
+ // Ensures any label in [1, label2index().size()] is mapped either
+ // by the above step or to label2index() + 1 (to avoid collisions).
+ for (int i = 1; i <= label2index.size(); ++i) {
+ typename unordered_map<Label, Label>::const_iterator
+ it = label2index.find(i);
+ if (it == label2index.end() || it->second == data_->FinalLabel())
+ pairs->push_back(pair<Label, Label>(i, label2index.size() + 1));
+ }
+ }
+ }
+
+ // Set current state. Optionally set state associated
+ // with arc iterator to be passed to Reach.
+ void SetState(StateId s, StateId aiter_s = kNoStateId) {
+ s_ = s;
+ if (aiter_s != kNoStateId) {
+ accumulator_->SetState(aiter_s);
+ if (accumulator_->Error()) error_ = true;
+ }
+ }
+
+ // Can reach this label from current state?
+ // Original labels must be transformed by the Relabel methods above.
+ bool Reach(Label label) {
+ if (label == 0 || error_)
+ return false;
+ vector< IntervalSet<Label> > &isets = *data_->IntervalSets();
+ return isets[s_].Member(label);
+
+ }
+
+ // Can reach final state (via epsilon transitions) from this state?
+ bool ReachFinal() {
+ if (error_) return false;
+ vector< IntervalSet<Label> > &isets = *data_->IntervalSets();
+ return isets[s_].Member(data_->FinalLabel());
+ }
+
+ // Initialize with secondary FST to be used with Reach(Iterator,...).
+ // If copy is true, then 'fst' is a copy of the FST used in the
+ // previous call to this method (useful to avoid unnecessary updates).
+ template <class F>
+ void ReachInit(const F &fst, bool copy = false) {
+ accumulator_->Init(fst, copy);
+ if (accumulator_->Error()) error_ = true;
+ }
+
+ // Can reach any arc iterator label between iterator positions
+ // aiter_begin and aiter_end? If aiter_input = true, then iterator
+ // input labels are considered, o.w. output labels are considered.
+ // Arc iterator labels must be transformed by the Relabel methods
+ // above. If compute_weight is true, user may call ReachWeight().
+ template <class Iterator>
+ bool Reach(Iterator *aiter, ssize_t aiter_begin,
+ ssize_t aiter_end, bool aiter_input, bool compute_weight) {
+ if (error_) return false;
+ vector< IntervalSet<Label> > &isets = *data_->IntervalSets();
+ const vector<Interval> *intervals = isets[s_].Intervals();
+ ++ncalls_;
+ nintervals_ += intervals->size();
+
+ reach_begin_ = -1;
+ reach_end_ = -1;
+ reach_weight_ = Weight::Zero();
+
+ uint32 flags = aiter->Flags(); // save flags to restore them on exit
+ aiter->SetFlags(kArcNoCache, kArcNoCache); // make caching optional
+ aiter->Seek(aiter_begin);
+
+ if (2 * (aiter_end - aiter_begin) < intervals->size()) {
+ // Check each arc against intervals.
+ // Set arc iterator flags to only compute the ilabel or olabel values,
+ // since they are the only values required for most of the arcs processed.
+ aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue,
+ kArcValueFlags);
+ Label reach_label = kNoLabel;
+ for (ssize_t aiter_pos = aiter_begin;
+ aiter_pos < aiter_end; aiter->Next(), ++aiter_pos) {
+ const A &arc = aiter->Value();
+ Label label = aiter_input ? arc.ilabel : arc.olabel;
+ if (label == reach_label || Reach(label)) {
+ reach_label = label;
+ if (reach_begin_ < 0)
+ reach_begin_ = aiter_pos;
+ reach_end_ = aiter_pos + 1;
+ if (compute_weight) {
+ if (!(aiter->Flags() & kArcWeightValue)) {
+ // If the 'arc.weight' wasn't computed by the call
+ // to 'aiter->Value()' above, we need to call
+ // 'aiter->Value()' again after having set the arc iterator
+ // flags to compute the arc weight value.
+ aiter->SetFlags(kArcWeightValue, kArcValueFlags);
+ const A &arcb = aiter->Value();
+ // Call the accumulator.
+ reach_weight_ = accumulator_->Sum(reach_weight_, arcb.weight);
+ // Only ilabel or olabel required to process the following
+ // arcs.
+ aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue,
+ kArcValueFlags);
+ } else {
+ // Call the accumulator.
+ reach_weight_ = accumulator_->Sum(reach_weight_, arc.weight);
+ }
+ }
+ }
+ }
+ } else {
+ // Check each interval against arcs
+ ssize_t begin_low, end_low = aiter_begin;
+ for (typename vector<Interval>::const_iterator
+ iiter = intervals->begin();
+ iiter != intervals->end(); ++iiter) {
+ begin_low = LowerBound(aiter, end_low, aiter_end,
+ aiter_input, iiter->begin);
+ end_low = LowerBound(aiter, begin_low, aiter_end,
+ aiter_input, iiter->end);
+ if (end_low - begin_low > 0) {
+ if (reach_begin_ < 0)
+ reach_begin_ = begin_low;
+ reach_end_ = end_low;
+ if (compute_weight) {
+ aiter->SetFlags(kArcWeightValue, kArcValueFlags);
+ reach_weight_ = accumulator_->Sum(reach_weight_, aiter,
+ begin_low, end_low);
+ }
+ }
+ }
+ }
+
+ aiter->SetFlags(flags, kArcFlags); // restore original flag values
+ return reach_begin_ >= 0;
+ }
+
+ // Returns iterator position of first matching arc.
+ ssize_t ReachBegin() const { return reach_begin_; }
+
+ // Returns iterator position one past last matching arc.
+ ssize_t ReachEnd() const { return reach_end_; }
+
+ // Return the sum of the weights for matching arcs.
+ // Valid only if compute_weight was true in Reach() call.
+ Weight ReachWeight() const { return reach_weight_; }
+
+ // Access to the relabeling map. Excludes epsilon (0) label but
+ // includes kNoLabel that is used internally for super-final
+ // transitons.
+ const unordered_map<Label, Label>& Label2Index() const {
+ return *data_->Label2Index();
+ }
+
+ LabelReachableData<Label> *GetData() const { return data_; }
+
+ bool Error() const { return error_ || accumulator_->Error(); }
+
+ private:
+ // Redirects labeled arcs (input or output labels determined by
+ // ReachInput()) to new label-specific final states. Each original
+ // final state is redirected via a transition labeled with kNoLabel
+ // to a new kNoLabel-specific final state. Creates super-initial
+ // state for all states with zero in-degree.
+ void TransformFst() {
+ StateId ins = fst_->NumStates();
+ StateId ons = ins;
+
+ vector<ssize_t> indeg(ins, 0);
+
+ // Redirects labeled arcs to new final states.
+ for (StateId s = 0; s < ins; ++s) {
+ for (MutableArcIterator< VectorFst<Arc> > aiter(fst_, s);
+ !aiter.Done();
+ aiter.Next()) {
+ Arc arc = aiter.Value();
+ Label label = data_->ReachInput() ? arc.ilabel : arc.olabel;
+ if (label) {
+ if (label2state_.find(label) == label2state_.end()) {
+ label2state_[label] = ons;
+ indeg.push_back(0);
+ ++ons;
+ }
+ arc.nextstate = label2state_[label];
+ aiter.SetValue(arc);
+ }
+ ++indeg[arc.nextstate]; // Finds in-degrees for next step.
+ }
+
+ // Redirects final weights to new final state.
+ Weight final = fst_->Final(s);
+ if (final != Weight::Zero()) {
+ if (label2state_.find(kNoLabel) == label2state_.end()) {
+ label2state_[kNoLabel] = ons;
+ indeg.push_back(0);
+ ++ons;
+ }
+ Arc arc(kNoLabel, kNoLabel, final, label2state_[kNoLabel]);
+ fst_->AddArc(s, arc);
+ ++indeg[arc.nextstate]; // Finds in-degrees for next step.
+
+ fst_->SetFinal(s, Weight::Zero());
+ }
+ }
+
+ // Add new final states to Fst.
+ while (fst_->NumStates() < ons) {
+ StateId s = fst_->AddState();
+ fst_->SetFinal(s, Weight::One());
+ }
+
+ // Creates a super-initial state for all states with zero in-degree.
+ StateId start = fst_->AddState();
+ fst_->SetStart(start);
+ for (StateId s = 0; s < start; ++s) {
+ if (indeg[s] == 0) {
+ Arc arc(0, 0, Weight::One(), s);
+ fst_->AddArc(start, arc);
+ }
+ }
+ }
+
+ void FindIntervals(StateId ins) {
+ StateReachable<A, Label> state_reachable(*fst_);
+ if (state_reachable.Error()) {
+ error_ = true;
+ return;
+ }
+
+ vector<Label> &state2index = state_reachable.State2Index();
+ vector< IntervalSet<Label> > &isets = *data_->IntervalSets();
+ isets = state_reachable.IntervalSets();
+ isets.resize(ins);
+
+ unordered_map<Label, Label> &label2index = *data_->Label2Index();
+ for (typename unordered_map<Label, StateId>::const_iterator
+ it = label2state_.begin();
+ it != label2state_.end();
+ ++it) {
+ Label l = it->first;
+ StateId s = it->second;
+ Label i = state2index[s];
+ label2index[l] = i;
+ }
+ label2state_.clear();
+
+ double nintervals = 0;
+ ssize_t non_intervals = 0;
+ for (ssize_t s = 0; s < ins; ++s) {
+ nintervals += isets[s].Size();
+ if (isets[s].Size() > 1) {
+ ++non_intervals;
+ VLOG(3) << "state: " << s << " # of intervals: " << isets[s].Size();
+ }
+ }
+ VLOG(2) << "# of states: " << ins;
+ VLOG(2) << "# of intervals: " << nintervals;
+ VLOG(2) << "# of intervals/state: " << nintervals/ins;
+ VLOG(2) << "# of non-interval states: " << non_intervals;
+ }
+
+ template <class Iterator>
+ ssize_t LowerBound(Iterator *aiter, ssize_t aiter_begin,
+ ssize_t aiter_end, bool aiter_input,
+ Label match_label) const {
+ // Only need to compute the ilabel or olabel of arcs when
+ // performing the binary search.
+ aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue,
+ kArcValueFlags);
+ ssize_t low = aiter_begin;
+ ssize_t high = aiter_end;
+ while (low < high) {
+ ssize_t mid = (low + high) / 2;
+ aiter->Seek(mid);
+ Label label = aiter_input ?
+ aiter->Value().ilabel : aiter->Value().olabel;
+ if (label > match_label) {
+ high = mid;
+ } else if (label < match_label) {
+ low = mid + 1;
+ } else {
+ // Find first matching label (when non-deterministic)
+ for (ssize_t i = mid; i > low; --i) {
+ aiter->Seek(i - 1);
+ label = aiter_input ? aiter->Value().ilabel : aiter->Value().olabel;
+ if (label != match_label) {
+ aiter->Seek(i);
+ aiter->SetFlags(kArcValueFlags, kArcValueFlags);
+ return i;
+ }
+ }
+ aiter->SetFlags(kArcValueFlags, kArcValueFlags);
+ return low;
+ }
+ }
+ aiter->Seek(low);
+ aiter->SetFlags(kArcValueFlags, kArcValueFlags);
+ return low;
+ }
+
+ VectorFst<Arc> *fst_;
+ StateId s_; // Current state
+ unordered_map<Label, StateId> label2state_; // Finds final state for a label
+
+ ssize_t reach_begin_; // Iterator pos of first match
+ ssize_t reach_end_; // Iterator pos after last match
+ Weight reach_weight_; // Gives weight sum of arc iterator
+ // arcs with reachable labels.
+ LabelReachableData<Label> *data_; // Shareable data between copies
+ S *accumulator_; // Sums arc weights
+
+ double ncalls_;
+ double nintervals_;
+ bool error_;
+
+ void operator=(const LabelReachable<A, S> &); // Disallow
+};
+
+} // namespace fst
+
+#endif // FST_LIB_LABEL_REACHABLE_H__
diff --git a/src/include/fst/lexicographic-weight.h b/src/include/fst/lexicographic-weight.h
new file mode 100644
index 0000000..4b55c50
--- /dev/null
+++ b/src/include/fst/lexicographic-weight.h
@@ -0,0 +1,151 @@
+// lexicographic-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: rws@google.com (Richard Sproat)
+//
+// \file
+// Lexicographic weight set and associated semiring operation definitions.
+//
+// A lexicographic weight is a sequence of weights, each of which must have the
+// path property and Times() must be (strongly) cancellative
+// (for all a,b,c != Zero(): Times(c, a) = Times(c, b) => a = b,
+// Times(a, c) = Times(b, c) => a = b).
+// The + operation on two weights a and b is the lexicographically
+// prior of a and b.
+
+#ifndef FST_LIB_LEXICOGRAPHIC_WEIGHT_H__
+#define FST_LIB_LEXICOGRAPHIC_WEIGHT_H__
+
+#include <string>
+
+#include <fst/pair-weight.h>
+#include <fst/weight.h>
+
+
+namespace fst {
+
+template<class W1, class W2>
+class LexicographicWeight : public PairWeight<W1, W2> {
+ public:
+ using PairWeight<W1, W2>::Value1;
+ using PairWeight<W1, W2>::Value2;
+ using PairWeight<W1, W2>::SetValue1;
+ using PairWeight<W1, W2>::SetValue2;
+ using PairWeight<W1, W2>::Zero;
+ using PairWeight<W1, W2>::One;
+ using PairWeight<W1, W2>::NoWeight;
+ using PairWeight<W1, W2>::Quantize;
+ using PairWeight<W1, W2>::Reverse;
+
+ typedef LexicographicWeight<typename W1::ReverseWeight,
+ typename W2::ReverseWeight>
+ ReverseWeight;
+
+ LexicographicWeight() {}
+
+ LexicographicWeight(const PairWeight<W1, W2>& w)
+ : PairWeight<W1, W2>(w) {}
+
+ LexicographicWeight(W1 w1, W2 w2) : PairWeight<W1, W2>(w1, w2) {
+ uint64 props = kPath;
+ if ((W1::Properties() & props) != props) {
+ FSTERROR() << "LexicographicWeight must "
+ << "have the path property: " << W1::Type();
+ SetValue1(W1::NoWeight());
+ }
+ if ((W2::Properties() & props) != props) {
+ FSTERROR() << "LexicographicWeight must "
+ << "have the path property: " << W2::Type();
+ SetValue2(W2::NoWeight());
+ }
+ }
+
+ static const LexicographicWeight<W1, W2> &Zero() {
+ static const LexicographicWeight<W1, W2> zero(PairWeight<W1, W2>::Zero());
+ return zero;
+ }
+
+ static const LexicographicWeight<W1, W2> &One() {
+ static const LexicographicWeight<W1, W2> one(PairWeight<W1, W2>::One());
+ return one;
+ }
+
+ static const LexicographicWeight<W1, W2> &NoWeight() {
+ static const LexicographicWeight<W1, W2> no_weight(
+ PairWeight<W1, W2>::NoWeight());
+ return no_weight;
+ }
+
+ static const string &Type() {
+ static const string type = W1::Type() + "_LT_" + W2::Type();
+ return type;
+ }
+
+ bool Member() const {
+ if (!Value1().Member() || !Value2().Member()) return false;
+ // Lexicographic weights cannot mix zeroes and non-zeroes.
+ if (Value1() == W1::Zero() && Value2() == W2::Zero()) return true;
+ if (Value1() != W1::Zero() && Value2() != W2::Zero()) return true;
+ return false;
+ }
+
+ LexicographicWeight<W1, W2> Quantize(float delta = kDelta) const {
+ return PairWeight<W1, W2>::Quantize();
+ }
+
+ ReverseWeight Reverse() const {
+ return PairWeight<W1, W2>::Reverse();
+ }
+
+ static uint64 Properties() {
+ uint64 props1 = W1::Properties();
+ uint64 props2 = W2::Properties();
+ return props1 & props2 & (kLeftSemiring | kRightSemiring | kPath |
+ kIdempotent | kCommutative);
+ }
+};
+
+template <class W1, class W2>
+inline LexicographicWeight<W1, W2> Plus(const LexicographicWeight<W1, W2> &w,
+ const LexicographicWeight<W1, W2> &v) {
+ if (!w.Member() || !v.Member())
+ return LexicographicWeight<W1, W2>::NoWeight();
+ NaturalLess<W1> less1;
+ NaturalLess<W2> less2;
+ if (less1(w.Value1(), v.Value1())) return w;
+ if (less1(v.Value1(), w.Value1())) return v;
+ if (less2(w.Value2(), v.Value2())) return w;
+ if (less2(v.Value2(), w.Value2())) return v;
+ return w;
+}
+
+template <class W1, class W2>
+inline LexicographicWeight<W1, W2> Times(const LexicographicWeight<W1, W2> &w,
+ const LexicographicWeight<W1, W2> &v) {
+ return LexicographicWeight<W1, W2>(Times(w.Value1(), v.Value1()),
+ Times(w.Value2(), v.Value2()));
+}
+
+template <class W1, class W2>
+inline LexicographicWeight<W1, W2> Divide(const LexicographicWeight<W1, W2> &w,
+ const LexicographicWeight<W1, W2> &v,
+ DivideType typ = DIVIDE_ANY) {
+ return LexicographicWeight<W1, W2>(Divide(w.Value1(), v.Value1(), typ),
+ Divide(w.Value2(), v.Value2(), typ));
+}
+
+} // namespace fst
+
+#endif // FST_LIB_LEXICOGRAPHIC_WEIGHT_H__
diff --git a/src/include/fst/lock.h b/src/include/fst/lock.h
new file mode 100644
index 0000000..3adf7df
--- /dev/null
+++ b/src/include/fst/lock.h
@@ -0,0 +1,81 @@
+// lock.h
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Google-compatibility locking declarations and inline definitions
+
+#ifndef FST_LIB_LOCK_H__
+#define FST_LIB_LOCK_H__
+
+#include <fst/compat.h> // for DISALLOW_COPY_AND_ASSIGN
+
+namespace fst {
+
+using namespace std;
+
+//
+// Single initialization - single-thread implementation
+//
+
+typedef int FstOnceType;
+
+static const int FST_ONCE_INIT = 1;
+
+inline int FstOnceInit(FstOnceType *once, void (*init)(void)) {
+ if (*once)
+ (*init)();
+ *once = 0;
+ return 0;
+}
+
+//
+// Thread locking - single-thread (non-)implementation
+//
+
+class Mutex {
+ public:
+ Mutex() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Mutex);
+};
+
+class MutexLock {
+ public:
+ MutexLock(Mutex *) {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(MutexLock);
+};
+
+// Reference counting - single-thread implementation
+class RefCounter {
+ public:
+ RefCounter() : count_(1) {}
+
+ int count() const { return count_; }
+ int Incr() const { return ++count_; }
+ int Decr() const { return --count_; }
+
+ private:
+ mutable int count_;
+
+ DISALLOW_COPY_AND_ASSIGN(RefCounter);
+};
+
+} // namespace fst
+
+#endif // FST_LIB_LOCK_H__
diff --git a/src/include/fst/log.h b/src/include/fst/log.h
new file mode 100644
index 0000000..d1492cd
--- /dev/null
+++ b/src/include/fst/log.h
@@ -0,0 +1,66 @@
+// log.h
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Google-style logging declarations and inline definitions.
+
+#ifndef FST_LIB_LOG_H__
+#define FST_LIB_LOG_H__
+
+#include <cassert>
+#include <iostream>
+#include <string>
+
+#include <fst/types.h>
+#include <fst/flags.h>
+
+using std::string;
+
+DECLARE_int32(v);
+
+class LogMessage {
+ public:
+ LogMessage(const string &type) : fatal_(type == "FATAL") {
+ std::cerr << type << ": ";
+ }
+ ~LogMessage() {
+ std::cerr << std::endl;
+ if(fatal_)
+ exit(1);
+ }
+ std::ostream &stream() { return std::cerr; }
+
+ private:
+ bool fatal_;
+};
+
+#define LOG(type) LogMessage(#type).stream()
+#define VLOG(level) if ((level) <= FLAGS_v) LOG(INFO)
+
+// Checks
+inline void CHECK(bool x) { assert(x); }
+
+#define CHECK_EQ(x, y) CHECK((x) == (y))
+#define CHECK_LT(x, y) CHECK((x) < (y))
+#define CHECK_GT(x, y) CHECK((x) > (y))
+#define CHECK_LE(x, y) CHECK((x) <= (y))
+#define CHECK_GE(x, y) CHECK((x) >= (y))
+#define CHECK_NE(x, y) CHECK((x) != (y))
+
+// Ports
+#define ATTRIBUTE_DEPRECATED __attribute__((deprecated))
+
+#endif // FST_LIB_LOG_H__
diff --git a/src/include/fst/lookahead-filter.h b/src/include/fst/lookahead-filter.h
new file mode 100644
index 0000000..e11c1bb
--- /dev/null
+++ b/src/include/fst/lookahead-filter.h
@@ -0,0 +1,698 @@
+// lookahead-filter.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Composition filters to support lookahead matchers, useful for improving
+// composition efficiency with certain inputs.
+
+#ifndef FST_LIB_LOOKAHEAD_FILTER_H__
+#define FST_LIB_LOOKAHEAD_FILTER_H__
+
+#include <vector>
+using std::vector;
+
+#include <fst/fst.h>
+#include <fst/lookahead-matcher.h>
+
+
+namespace fst {
+
+// Identifies and verifies the capabilities of the matcher to be used for
+// lookahead with the composition filters below. This version is passed
+// the matchers.
+template <class M1, class M2>
+MatchType LookAheadMatchType(const M1 &m1, const M2 &m2) {
+ MatchType type1 = m1.Type(false);
+ MatchType type2 = m2.Type(false);
+ if (type1 == MATCH_OUTPUT &&
+ m1.Flags() & kOutputLookAheadMatcher)
+ return MATCH_OUTPUT;
+ else if (type2 == MATCH_INPUT &&
+ m2.Flags() & kInputLookAheadMatcher)
+ return MATCH_INPUT;
+ else if (m1.Flags() & kOutputLookAheadMatcher &&
+ m1.Type(true) == MATCH_OUTPUT)
+ return MATCH_OUTPUT;
+ else if (m2.Flags() & kInputLookAheadMatcher &&
+ m2.Type(true) == MATCH_INPUT)
+ return MATCH_INPUT;
+ else
+ return MATCH_NONE;
+}
+
+// Identifies and verifies the capabilities of the matcher to be used for
+// lookahead with the composition filters below. This version uses the
+// Fst's default matchers.
+template <class Arc>
+MatchType LookAheadMatchType(const Fst<Arc> &fst1, const Fst<Arc> &fst2) {
+ LookAheadMatcher< Fst <Arc> > matcher1(fst1, MATCH_OUTPUT);
+ LookAheadMatcher< Fst <Arc> > matcher2(fst2, MATCH_INPUT);
+ return LookAheadMatchType(matcher1, matcher2);
+}
+
+//
+// LookAheadSelector - a helper class for selecting among possibly
+// distinct FST and matcher types w/o using a common base class. This
+// lets us avoid virtual function calls.
+//
+
+// Stores and returns the appropriate FST and matcher for lookahead.
+// It is templated on the matcher types. General case has no methods
+// since not currently supported.
+template <class M1, class M2, MatchType MT>
+class LookAheadSelector {
+};
+
+// Stores and returns the appropriate FST and matcher for lookahead.
+// Specialized for two matchers of same type with the (match) 'type'
+// arg determining which is used for lookahead.
+template <class M, MatchType MT>
+class LookAheadSelector<M, M, MT> {
+ public:
+ typedef typename M::Arc Arc;
+ typedef typename M::FST F;
+
+ LookAheadSelector(M *lmatcher1, M *lmatcher2, MatchType type)
+ : lmatcher1_(lmatcher1->Copy()),
+ lmatcher2_(lmatcher2->Copy()),
+ type_(type) {}
+
+ LookAheadSelector(const LookAheadSelector<M, M, MT> &selector)
+ : lmatcher1_(selector.lmatcher1_->Copy()),
+ lmatcher2_(selector.lmatcher2_->Copy()),
+ type_(selector.type_) {}
+
+ ~LookAheadSelector() {
+ delete lmatcher1_;
+ delete lmatcher2_;
+ }
+
+ const F &GetFst() const {
+ return type_ == MATCH_OUTPUT ? lmatcher2_->GetFst() :
+ lmatcher1_->GetFst();
+ }
+
+ M *GetMatcher() const {
+ return type_ == MATCH_OUTPUT ? lmatcher1_ : lmatcher2_;
+ }
+
+ private:
+ M *lmatcher1_;
+ M *lmatcher2_;
+ MatchType type_;
+
+ void operator=(const LookAheadSelector<M, M, MT> &); // disallow
+};
+
+// Stores and returns the appropriate FST and matcher for lookahead.
+// Specialized for lookahead on input labels.
+template <class M1, class M2>
+class LookAheadSelector<M1, M2, MATCH_INPUT> {
+ public:
+ typedef typename M1::FST F1;
+
+ LookAheadSelector(M1 *lmatcher1, M2 *lmatcher2, MatchType)
+ : fst_(lmatcher1->GetFst().Copy()),
+ lmatcher_(lmatcher2->Copy()) {}
+
+ LookAheadSelector(const LookAheadSelector<M1, M2, MATCH_INPUT> &selector)
+ : fst_(selector.fst_->Copy()),
+ lmatcher_(selector.lmatcher_->Copy()) {}
+
+ ~LookAheadSelector() {
+ delete lmatcher_;
+ delete fst_;
+ }
+
+ const F1 &GetFst() const { return *fst_; }
+
+ M2 *GetMatcher() const { return lmatcher_; }
+
+ private:
+ const F1 *fst_;
+ M2 *lmatcher_;
+
+ void operator=(const LookAheadSelector<M1, M2, MATCH_INPUT> &); // disallow
+};
+
+
+// Stores and returns the appropriate FST and matcher for lookahead.
+// Specialized for lookahead on output labels.
+template <class M1, class M2>
+class LookAheadSelector<M1, M2, MATCH_OUTPUT> {
+ public:
+ typedef typename M2::FST F2;
+
+ LookAheadSelector(M1 *lmatcher1, M2 *lmatcher2, MatchType)
+ : fst_(lmatcher2->GetFst().Copy()),
+ lmatcher_(lmatcher1->Copy()) {}
+
+ LookAheadSelector(const LookAheadSelector<M1, M2, MATCH_OUTPUT> &selector)
+ : fst_(selector.fst_->Copy()),
+ lmatcher_(selector.lmatcher_->Copy()) {}
+
+ ~LookAheadSelector() {
+ delete lmatcher_;
+ delete fst_;
+ }
+
+ const F2 &GetFst() const { return *fst_; }
+
+ M1 *GetMatcher() const { return lmatcher_; }
+
+ private:
+ const F2 *fst_;
+ M1 *lmatcher_;
+
+ void operator=(const LookAheadSelector<M1, M2, MATCH_OUTPUT> &); // disallow
+};
+
+// This filter uses a lookahead matcher in FilterArc(arc1, arc2) to
+// examine the future of the composition state (arc1.nextstate,
+// arc2.nextstate), blocking moving forward when its determined to be
+// non-coaccessible. It is templated on an underlying filter,
+// typically the epsilon filter. Which matcher is the lookahead
+// matcher is determined by the template argument MT unless it is
+// MATCH_BOTH. In that case, both matcher arguments must be lookahead
+// matchers of the same type and one will be selected by
+// LookAheadMatchType() based on their capability.
+template <class F,
+ class M1 = LookAheadMatcher<typename F::FST1>,
+ class M2 = M1,
+ MatchType MT = MATCH_BOTH>
+class LookAheadComposeFilter {
+ public:
+ typedef typename F::FST1 FST1;
+ typedef typename F::FST2 FST2;
+ typedef typename F::Arc Arc;
+ typedef typename F::Matcher1 Matcher1;
+ typedef typename F::Matcher2 Matcher2;
+ typedef typename F::FilterState FilterState;
+ typedef LookAheadComposeFilter<F, M1, M2, MT> Filter;
+
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ LookAheadComposeFilter(const FST1 &fst1, const FST2 &fst2,
+ M1 *matcher1, M2 *matcher2)
+ : filter_(fst1, fst2, matcher1, matcher2),
+ lookahead_type_(MT == MATCH_BOTH ?
+ LookAheadMatchType(*filter_.GetMatcher1(),
+ *filter_.GetMatcher2()) : MT),
+ selector_(filter_.GetMatcher1(), filter_.GetMatcher2(),
+ lookahead_type_),
+ flags_(lookahead_type_ == MATCH_OUTPUT ?
+ filter_.GetMatcher1()->Flags() :
+ filter_.GetMatcher2()->Flags()) {
+ if (lookahead_type_ == MATCH_NONE) {
+ FSTERROR() << "LookAheadComposeFilter: 1st argument cannot "
+ << "match/look-ahead on output labels and 2nd argument "
+ << "cannot match/look-ahead on input labels.";
+ }
+ selector_.GetMatcher()->InitLookAheadFst(selector_.GetFst());
+ }
+
+ LookAheadComposeFilter(const LookAheadComposeFilter<F, M1, M2, MT> &filter,
+ bool safe = false)
+ : filter_(filter.filter_, safe),
+ lookahead_type_(filter.lookahead_type_),
+ selector_(filter_.GetMatcher1(), filter_.GetMatcher2(),
+ lookahead_type_),
+ flags_(filter.flags_) {
+ selector_.GetMatcher()->InitLookAheadFst(selector_.GetFst(), true);
+ }
+
+ FilterState Start() const {
+ return filter_.Start();
+ }
+
+ void SetState(StateId s1, StateId s2, const FilterState &f) {
+ filter_.SetState(s1, s2, f);
+ }
+
+ FilterState FilterArc(Arc *arc1, Arc *arc2) const {
+ lookahead_arc_ = false;
+
+ const FilterState &f = filter_.FilterArc(arc1, arc2);
+ if (f == FilterState::NoState())
+ return FilterState::NoState();
+
+ return LookAheadOutput() ? LookAheadFilterArc(arc1, arc2, f) :
+ LookAheadFilterArc(arc2, arc1, f);
+ }
+
+ void FilterFinal(Weight *weight1, Weight *weight2) const {
+ filter_.FilterFinal(weight1, weight2);
+ }
+
+ // Return resp matchers. Ownership stays with filter.
+ Matcher1 *GetMatcher1() { return filter_.GetMatcher1(); }
+ Matcher2 *GetMatcher2() { return filter_.GetMatcher2(); }
+
+ const LookAheadSelector<Matcher1, Matcher2, MT> &Selector() const {
+ return selector_;
+ }
+
+ uint64 Properties(uint64 inprops) const {
+ uint64 outprops = filter_.Properties(inprops);
+ if (lookahead_type_ == MATCH_NONE)
+ outprops |= kError;
+ return outprops;
+ }
+
+ uint32 LookAheadFlags() const { return flags_; }
+
+ bool LookAheadArc() const { return lookahead_arc_; }
+
+ bool LookAheadOutput() const {
+ if (MT == MATCH_OUTPUT)
+ return true;
+ else if (MT == MATCH_INPUT)
+ return false;
+ else if (lookahead_type_ == MATCH_OUTPUT)
+ return true;
+ else
+ return false;
+ }
+
+ private:
+ FilterState LookAheadFilterArc(Arc *arca, Arc *arcb,
+ const FilterState &f) const {
+ Label &labela = LookAheadOutput() ? arca->olabel : arca->ilabel;
+
+ if (labela != 0 && !(flags_ & kLookAheadNonEpsilons))
+ return f;
+ if (labela == 0 && !(flags_ & kLookAheadEpsilons))
+ return f;
+
+ lookahead_arc_ = true;
+ selector_.GetMatcher()->SetState(arca->nextstate);
+
+ return selector_.GetMatcher()->LookAheadFst(selector_.GetFst(),
+ arcb->nextstate) ? f :
+ FilterState::NoState();
+ }
+
+ F filter_; // Underlying filter
+ MatchType lookahead_type_; // Lookahead match type
+ LookAheadSelector<Matcher1, Matcher2, MT> selector_;
+ uint32 flags_; // Lookahead flags
+ mutable bool lookahead_arc_; // Look-ahead performed at last FilterArc()?
+
+ void operator=(const LookAheadComposeFilter<F, M1, M2> &); // disallow
+};
+
+
+// This filter adds weight-pushing to a lookahead composition filter
+// using the LookAheadWeight() method of matcher argument. It is
+// templated on an underlying lookahead filter, typically the basic
+// lookahead filter. Weight-pushing in composition brings weights
+// forward as much as possible based on the lookahead information.
+template <class F,
+ class M1 = LookAheadMatcher<typename F::FST1>,
+ class M2 = M1,
+ MatchType MT = MATCH_BOTH>
+class PushWeightsComposeFilter {
+ public:
+ typedef typename F::FST1 FST1;
+ typedef typename F::FST2 FST2;
+ typedef typename F::Arc Arc;
+ typedef typename F::Matcher1 Matcher1;
+ typedef typename F::Matcher2 Matcher2;
+ typedef typename F::FilterState FilterState1;
+ typedef WeightFilterState<typename Arc::Weight> FilterState2;
+ typedef PairFilterState<FilterState1, FilterState2> FilterState;
+
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ PushWeightsComposeFilter(const FST1 &fst1, const FST2 &fst2,
+ M1 *matcher1, M2 *matcher2)
+ : filter_(fst1, fst2, matcher1, matcher2),
+ f_(FilterState::NoState()) {}
+
+ PushWeightsComposeFilter(const PushWeightsComposeFilter<F, M1, M2, MT>
+ &filter,
+ bool safe = false)
+ : filter_(filter.filter_, safe),
+ f_(FilterState::NoState()) {}
+
+ FilterState Start() const {
+ return FilterState(filter_.Start(), FilterState2(Weight::One()));
+ }
+
+ void SetState(StateId s1, StateId s2, const FilterState &f) {
+ f_ = f;
+ filter_.SetState(s1, s2, f.GetState1());
+ }
+
+ FilterState FilterArc(Arc *arc1, Arc *arc2) const {
+ const FilterState1 &f1 = filter_.FilterArc(arc1, arc2);
+ if (f1 == FilterState1::NoState())
+ return FilterState::NoState();
+
+ if (!(LookAheadFlags() & kLookAheadWeight))
+ return FilterState(f1, FilterState2(Weight::One()));
+
+ const Weight &lweight = filter_.LookAheadArc() ?
+ Selector().GetMatcher()->LookAheadWeight() : Weight::One();
+ const FilterState2 &f2 = f_.GetState2();
+ const Weight &fweight = f2.GetWeight();
+
+ arc2->weight = Divide(Times(arc2->weight, lweight), fweight);
+ return FilterState(f1, FilterState2(lweight));
+ }
+
+ void FilterFinal(Weight *weight1, Weight *weight2) const {
+ filter_.FilterFinal(weight1, weight2);
+ if (!(LookAheadFlags() & kLookAheadWeight) || *weight1 == Weight::Zero())
+ return;
+
+ const FilterState2 &f2 = f_.GetState2();
+ const Weight &fweight = f2.GetWeight();
+ *weight1 = Divide(*weight1, fweight);
+ }
+ // Return resp matchers. Ownership states with filter.
+ Matcher1 *GetMatcher1() { return filter_.GetMatcher1(); }
+ Matcher2 *GetMatcher2() { return filter_.GetMatcher2(); }
+
+ const LookAheadSelector<Matcher1, Matcher2, MT> &Selector() const {
+ return filter_.Selector();
+ }
+
+ uint32 LookAheadFlags() const { return filter_.LookAheadFlags(); }
+ bool LookAheadArc() const { return filter_.LookAheadArc(); }
+ bool LookAheadOutput() const { return filter_.LookAheadOutput(); }
+
+ uint64 Properties(uint64 props) const {
+ return filter_.Properties(props) & kWeightInvariantProperties;
+ }
+
+ private:
+ F filter_; // Underlying filter
+ FilterState f_; // Current filter state
+
+ void operator=(const PushWeightsComposeFilter<F, M1, M2, MT> &); // disallow
+};
+
+// This filter adds label-pushing to a lookahead composition filter
+// using the LookAheadPrefix() method of the matcher argument. It is
+// templated on an underlying filter, typically the basic lookahead
+// or weight-pushing lookahead filter. Label-pushing in composition
+// matches labels as early as possible based on the lookahead
+// information.
+template <class F,
+ class M1 = LookAheadMatcher<typename F::FST1>,
+ class M2 = M1,
+ MatchType MT = MATCH_BOTH>
+class PushLabelsComposeFilter {
+ public:
+ typedef typename F::FST1 FST1;
+ typedef typename F::FST2 FST2;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ typedef MultiEpsMatcher<typename F::Matcher1> Matcher1;
+ typedef MultiEpsMatcher<typename F::Matcher2> Matcher2;
+ typedef typename F::FilterState FilterState1;
+ typedef IntegerFilterState<typename Arc::Label> FilterState2;
+ typedef PairFilterState<FilterState1, FilterState2> FilterState;
+
+ PushLabelsComposeFilter(const FST1 &fst1, const FST2 &fst2,
+ M1 *matcher1, M2 *matcher2)
+ : filter_(fst1, fst2, matcher1, matcher2),
+ f_(FilterState::NoState()),
+ fst1_(filter_.GetMatcher1()->GetFst()),
+ fst2_(filter_.GetMatcher2()->GetFst()),
+ matcher1_(fst1_, MATCH_OUTPUT,
+ filter_.LookAheadOutput() ? kMultiEpsList : kMultiEpsLoop,
+ filter_.GetMatcher1(),
+ false),
+ matcher2_(fst2_, MATCH_INPUT,
+ filter_.LookAheadOutput() ? kMultiEpsLoop : kMultiEpsList,
+ filter_.GetMatcher2(),
+ false) {}
+
+ PushLabelsComposeFilter(const PushLabelsComposeFilter<F, M1, M2, MT> &filter,
+ bool safe = false)
+ : filter_(filter.filter_, safe),
+ f_(FilterState::NoState()),
+ fst1_(filter_.GetMatcher1()->GetFst()),
+ fst2_(filter_.GetMatcher2()->GetFst()),
+ matcher1_(fst1_, MATCH_OUTPUT,
+ filter_.LookAheadOutput() ? kMultiEpsList : kMultiEpsLoop,
+ filter_.GetMatcher1(),
+ false),
+ matcher2_(fst2_, MATCH_INPUT,
+ filter_.LookAheadOutput() ? kMultiEpsLoop : kMultiEpsList,
+ filter_.GetMatcher2(),
+ false) {
+ }
+
+ FilterState Start() const {
+ return FilterState(filter_.Start(), FilterState2(kNoLabel));
+ }
+
+ void SetState(StateId s1, StateId s2, const FilterState &f) {
+ f_ = f;
+ filter_.SetState(s1, s2, f.GetState1());
+ if (!(LookAheadFlags() & kLookAheadPrefix))
+ return;
+
+ narcsa_ = LookAheadOutput() ? internal::NumArcs(fst1_, s1)
+ : internal::NumArcs(fst2_, s2);
+
+ const FilterState2 &f2 = f_.GetState2();
+ const Label &flabel = f2.GetState();
+
+ GetMatcher1()->ClearMultiEpsLabels();
+ GetMatcher2()->ClearMultiEpsLabels();
+ if (flabel != kNoLabel) { // Have a lookahead label?
+ GetMatcher1()->AddMultiEpsLabel(flabel); // Yes, make it a multi-epsilon
+ GetMatcher2()->AddMultiEpsLabel(flabel); // label so that it matches the
+ } // implicit epsilon arc to be
+ } // modified below when pushing.
+
+ FilterState FilterArc(Arc *arc1, Arc *arc2) const {
+ if (!(LookAheadFlags() & kLookAheadPrefix))
+ return FilterState(filter_.FilterArc(arc1, arc2),
+ FilterState2(kNoLabel));
+
+ const FilterState2 &f2 = f_.GetState2();
+ const Label &flabel = f2.GetState();
+ if (flabel != kNoLabel) // Have a lookahead label?
+ return LookAheadOutput() ? PushedLabelFilterArc(arc1, arc2, flabel) :
+ PushedLabelFilterArc(arc2, arc1, flabel);
+
+ const FilterState1 &f1 = filter_.FilterArc(arc1, arc2);
+ if (f1 == FilterState1::NoState())
+ return FilterState::NoState();
+
+ if (!filter_.LookAheadArc())
+ return FilterState(f1, FilterState2(kNoLabel));
+
+ return LookAheadOutput() ? PushLabelFilterArc(arc1, arc2, f1) :
+ PushLabelFilterArc(arc2, arc1, f1);
+ }
+
+ void FilterFinal(Weight *weight1, Weight *weight2) const {
+ filter_.FilterFinal(weight1, weight2);
+ if (!(LookAheadFlags() & kLookAheadPrefix) ||
+ *weight1 == Weight::Zero())
+ return;
+
+ const FilterState2 &f2 = f_.GetState2();
+ const Label &flabel = f2.GetState();
+ if (flabel != kNoLabel)
+ *weight1 = Weight::Zero();
+ }
+
+ // Return resp matchers. Ownership states with filter.
+ Matcher1 *GetMatcher1() { return &matcher1_; }
+ Matcher2 *GetMatcher2() { return &matcher2_; }
+
+ uint64 Properties(uint64 iprops) const {
+ uint64 oprops = filter_.Properties(iprops);
+ if (LookAheadOutput())
+ return oprops & kOLabelInvariantProperties;
+ else
+ return oprops & kILabelInvariantProperties;
+ }
+
+ private:
+ const LookAheadSelector<typename F::Matcher1, typename F::Matcher2, MT>
+ &Selector() const {
+ return filter_.Selector();
+ }
+
+ // Consumes an already pushed label.
+ FilterState PushedLabelFilterArc(Arc *arca, Arc *arcb,
+ Label flabel) const {
+ Label &labela = LookAheadOutput() ? arca->olabel : arca->ilabel;
+ const Label &labelb = LookAheadOutput() ? arcb->ilabel : arcb->olabel;
+
+ if (labelb != kNoLabel) {
+ return FilterState::NoState(); // Block non- (multi-) epsilon label
+ } else if (labela == flabel) {
+ labela = 0; // Convert match to multi-eps to eps
+ return Start();
+ } else if (labela == 0) {
+ if (narcsa_ == 1)
+ return f_; // Take eps; keep state w/ label
+ Selector().GetMatcher()->SetState(arca->nextstate);
+ if (Selector().GetMatcher()->LookAheadLabel(flabel))
+ return f_; // Take eps; keep state w/ label
+ else
+ return FilterState::NoState(); // Block non-coaccessible path
+ } else {
+ return FilterState::NoState(); // Block mismatch to multi-eps label
+ }
+ }
+
+ // Pushes a label forward when possible.
+ FilterState PushLabelFilterArc(Arc *arca, Arc *arcb,
+ const FilterState1 &f1) const {
+ Label &labela = LookAheadOutput() ? arca->olabel : arca->ilabel;
+ const Label &labelb = LookAheadOutput() ? arcb->olabel : arcb->ilabel;
+
+ if (labelb != 0) // No place to push.
+ return FilterState(f1, FilterState2(kNoLabel));
+ if (labela != 0 && // Wrong lookahead prefix type?
+ LookAheadFlags() & kLookAheadNonEpsilonPrefix)
+ return FilterState(f1, FilterState2(kNoLabel));
+
+ Arc larc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId);
+
+ if (Selector().GetMatcher()->LookAheadPrefix(&larc)) { // Have prefix arc?
+ labela = LookAheadOutput() ? larc.ilabel : larc.olabel;
+ arcb->ilabel = larc.ilabel; // Yes, go forward on that arc,
+ arcb->olabel = larc.olabel; // thus pushing the label.
+ arcb->weight = Times(arcb->weight, larc.weight);
+ arcb->nextstate = larc.nextstate;
+ return FilterState(f1, FilterState2(labela));
+ } else {
+ return FilterState(f1, FilterState2(kNoLabel));
+ }
+ }
+
+ uint32 LookAheadFlags() const { return filter_.LookAheadFlags(); }
+ bool LookAheadArc() const { return filter_.LookAheadArc(); }
+ bool LookAheadOutput() const { return filter_.LookAheadOutput(); }
+
+ F filter_; // Underlying filter
+ FilterState f_ ; // Current filter state
+ const FST1 &fst1_;
+ const FST2 &fst2_;
+ Matcher1 matcher1_; // Multi-epsilon matcher for fst1
+ Matcher2 matcher2_; // Multi-epsilon matcher for fst2
+ ssize_t narcsa_; // Number of arcs leaving look-ahead match FST
+
+ void operator=(const PushLabelsComposeFilter<F, M1, M2, MT> &); // disallow
+};
+
+//
+// CONVENIENCE CLASS useful for setting up composition with a default
+// look-ahead matcher and filter.
+//
+
+template <class A, MatchType type> // MATCH_NONE
+class DefaultLookAhead {
+ public:
+ typedef Matcher< Fst<A> > M;
+ typedef SequenceComposeFilter<M> ComposeFilter;
+ typedef M FstMatcher;
+};
+
+// Specializes for MATCH_INPUT to allow lookahead.
+template <class A>
+class DefaultLookAhead<A, MATCH_INPUT> {
+ public:
+ typedef LookAheadMatcher< Fst<A> > M;
+ typedef SequenceComposeFilter<M> SF;
+ typedef LookAheadComposeFilter<SF, M> ComposeFilter;
+ typedef M FstMatcher;
+};
+
+// Specializes for MATCH_OUTPUT to allow lookahead.
+template <class A>
+class DefaultLookAhead<A, MATCH_OUTPUT> {
+ public:
+ typedef LookAheadMatcher< Fst<A> > M;
+ typedef AltSequenceComposeFilter<M> SF;
+ typedef LookAheadComposeFilter<SF, M> ComposeFilter;
+ typedef M FstMatcher;
+};
+
+// Specializes for StdArc to allow weight and label pushing.
+template <>
+class DefaultLookAhead<StdArc, MATCH_INPUT> {
+ public:
+ typedef StdArc A;
+ typedef LookAheadMatcher< Fst<A> > M;
+ typedef SequenceComposeFilter<M> SF;
+ typedef LookAheadComposeFilter<SF, M> LF;
+ typedef PushWeightsComposeFilter<LF, M> WF;
+ typedef PushLabelsComposeFilter<WF, M> ComposeFilter;
+ typedef M FstMatcher;
+};
+
+// Specializes for StdArc to allow weight and label pushing.
+template <>
+class DefaultLookAhead<StdArc, MATCH_OUTPUT> {
+ public:
+ typedef StdArc A;
+ typedef LookAheadMatcher< Fst<A> > M;
+ typedef AltSequenceComposeFilter<M> SF;
+ typedef LookAheadComposeFilter<SF, M> LF;
+ typedef PushWeightsComposeFilter<LF, M> WF;
+ typedef PushLabelsComposeFilter<WF, M> ComposeFilter;
+ typedef M FstMatcher;
+};
+
+// Specializes for LogArc to allow weight and label pushing.
+template <>
+class DefaultLookAhead<LogArc, MATCH_INPUT> {
+ public:
+ typedef LogArc A;
+ typedef LookAheadMatcher< Fst<A> > M;
+ typedef SequenceComposeFilter<M> SF;
+ typedef LookAheadComposeFilter<SF, M> LF;
+ typedef PushWeightsComposeFilter<LF, M> WF;
+ typedef PushLabelsComposeFilter<WF, M> ComposeFilter;
+ typedef M FstMatcher;
+};
+
+// Specializes for LogArc to allow weight and label pushing.
+template <>
+class DefaultLookAhead<LogArc, MATCH_OUTPUT> {
+ public:
+ typedef LogArc A;
+ typedef LookAheadMatcher< Fst<A> > M;
+ typedef AltSequenceComposeFilter<M> SF;
+ typedef LookAheadComposeFilter<SF, M> LF;
+ typedef PushWeightsComposeFilter<LF, M> WF;
+ typedef PushLabelsComposeFilter<WF, M> ComposeFilter;
+ typedef M FstMatcher;
+};
+
+} // namespace fst
+
+#endif // FST_LIB_LOOKAHEAD_FILTER_H__
diff --git a/src/include/fst/lookahead-matcher.h b/src/include/fst/lookahead-matcher.h
new file mode 100644
index 0000000..10d9c01
--- /dev/null
+++ b/src/include/fst/lookahead-matcher.h
@@ -0,0 +1,813 @@
+// lookahead-matcher.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Classes to add lookahead to FST matchers, useful e.g. for improving
+// composition efficiency with certain inputs.
+
+#ifndef FST_LIB_LOOKAHEAD_MATCHER_H__
+#define FST_LIB_LOOKAHEAD_MATCHER_H__
+
+#include <fst/add-on.h>
+#include <fst/const-fst.h>
+#include <fst/fst.h>
+#include <fst/label-reachable.h>
+#include <fst/matcher.h>
+
+
+DECLARE_string(save_relabel_ipairs);
+DECLARE_string(save_relabel_opairs);
+
+namespace fst {
+
+// LOOKAHEAD MATCHERS - these have the interface of Matchers (see
+// matcher.h) and these additional methods:
+//
+// template <class F>
+// class LookAheadMatcher {
+// public:
+// typedef F FST;
+// typedef F::Arc Arc;
+// typedef typename Arc::StateId StateId;
+// typedef typename Arc::Label Label;
+// typedef typename Arc::Weight Weight;
+//
+// // Required constructors.
+// LookAheadMatcher(const F &fst, MatchType match_type);
+// // If safe=true, the copy is thread-safe (except the lookahead Fst is
+// // preserved). See Fst<>::Cop() for further doc.
+// LookAheadMatcher(const LookAheadMatcher &matcher, bool safe = false);
+//
+// Below are methods for looking ahead for a match to a label and
+// more generally, to a rational set. Each returns false if there is
+// definitely not a match and returns true if there possibly is a
+// match.
+
+// // LABEL LOOKAHEAD: Can 'label' be read from the current matcher state
+// // after possibly following epsilon transitions?
+// bool LookAheadLabel(Label label) const;
+//
+// // RATIONAL LOOKAHEAD: The next methods allow looking ahead for an
+// // arbitrary rational set of strings, specified by an FST and a state
+// // from which to begin the matching. If the lookahead FST is a
+// // transducer, this looks on the side different from the matcher
+// // 'match_type' (cf. composition).
+//
+// // Are there paths P from 's' in the lookahead FST that can be read from
+// // the cur. matcher state?
+// bool LookAheadFst(const Fst<Arc>& fst, StateId s);
+//
+// // Gives an estimate of the combined weight of the paths P in the
+// // lookahead and matcher FSTs for the last call to LookAheadFst.
+// // A trivial implementation returns Weight::One(). Non-trivial
+// // implementations are useful for weight-pushing in composition.
+// Weight LookAheadWeight() const;
+//
+// // Is there is a single non-epsilon arc found in the lookahead FST
+// // that begins P (after possibly following any epsilons) in the last
+// // call LookAheadFst? If so, return true and copy it to '*arc', o.w.
+// // return false. A trivial implementation returns false. Non-trivial
+// // implementations are useful for label-pushing in composition.
+// bool LookAheadPrefix(Arc *arc);
+//
+// // Optionally pre-specifies the lookahead FST that will be passed
+// // to LookAheadFst() for possible precomputation. If copy is true,
+// // then 'fst' is a copy of the FST used in the previous call to
+// // this method (useful to avoid unnecessary updates).
+// void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false);
+//
+// };
+
+//
+// LOOK-AHEAD FLAGS (see also kMatcherFlags in matcher.h):
+//
+// Matcher is a lookahead matcher when 'match_type' is MATCH_INPUT.
+const uint32 kInputLookAheadMatcher = 0x00000001;
+
+// Matcher is a lookahead matcher when 'match_type' is MATCH_OUTPUT.
+const uint32 kOutputLookAheadMatcher = 0x00000002;
+
+// A non-trivial implementation of LookAheadWeight() method defined and
+// should be used?
+const uint32 kLookAheadWeight = 0x00000004;
+
+// A non-trivial implementation of LookAheadPrefix() method defined and
+// should be used?
+const uint32 kLookAheadPrefix = 0x00000008;
+
+// Look-ahead of matcher FST non-epsilon arcs?
+const uint32 kLookAheadNonEpsilons = 0x00000010;
+
+// Look-ahead of matcher FST epsilon arcs?
+const uint32 kLookAheadEpsilons = 0x00000020;
+
+// Ignore epsilon paths for the lookahead prefix? Note this gives
+// correct results in composition only with an appropriate composition
+// filter since it depends on the filter blocking the ignored paths.
+const uint32 kLookAheadNonEpsilonPrefix = 0x00000040;
+
+// For LabelLookAheadMatcher, save relabeling data to file
+const uint32 kLookAheadKeepRelabelData = 0x00000080;
+
+// Flags used for lookahead matchers.
+const uint32 kLookAheadFlags = 0x000000ff;
+
+// LookAhead Matcher interface, templated on the Arc definition; used
+// for lookahead matcher specializations that are returned by the
+// InitMatcher() Fst method.
+template <class A>
+class LookAheadMatcherBase : public MatcherBase<A> {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+
+ LookAheadMatcherBase()
+ : weight_(Weight::One()),
+ prefix_arc_(kNoLabel, kNoLabel, Weight::One(), kNoStateId) {}
+
+ virtual ~LookAheadMatcherBase() {}
+
+ bool LookAheadLabel(Label label) const { return LookAheadLabel_(label); }
+
+ bool LookAheadFst(const Fst<Arc> &fst, StateId s) {
+ return LookAheadFst_(fst, s);
+ }
+
+ Weight LookAheadWeight() const { return weight_; }
+
+ bool LookAheadPrefix(Arc *arc) const {
+ if (prefix_arc_.nextstate != kNoStateId) {
+ *arc = prefix_arc_;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ virtual void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) = 0;
+
+ protected:
+ void SetLookAheadWeight(const Weight &w) { weight_ = w; }
+
+ void SetLookAheadPrefix(const Arc &arc) { prefix_arc_ = arc; }
+
+ void ClearLookAheadPrefix() { prefix_arc_.nextstate = kNoStateId; }
+
+ private:
+ virtual bool LookAheadLabel_(Label label) const = 0;
+ virtual bool LookAheadFst_(const Fst<Arc> &fst,
+ StateId s) = 0; // This must set l.a. weight and
+ // prefix if non-trivial.
+ Weight weight_; // Look-ahead weight
+ Arc prefix_arc_; // Look-ahead prefix arc
+};
+
+
+// Don't really lookahead, just declare future looks good regardless.
+template <class M>
+class TrivialLookAheadMatcher
+ : public LookAheadMatcherBase<typename M::FST::Arc> {
+ public:
+ typedef typename M::FST FST;
+ typedef typename M::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ TrivialLookAheadMatcher(const FST &fst, MatchType match_type)
+ : matcher_(fst, match_type) {}
+
+ TrivialLookAheadMatcher(const TrivialLookAheadMatcher<M> &lmatcher,
+ bool safe = false)
+ : matcher_(lmatcher.matcher_, safe) {}
+
+ // General matcher methods
+ TrivialLookAheadMatcher<M> *Copy(bool safe = false) const {
+ return new TrivialLookAheadMatcher<M>(*this, safe);
+ }
+
+ MatchType Type(bool test) const { return matcher_.Type(test); }
+ void SetState(StateId s) { return matcher_.SetState(s); }
+ bool Find(Label label) { return matcher_.Find(label); }
+ bool Done() const { return matcher_.Done(); }
+ const Arc& Value() const { return matcher_.Value(); }
+ void Next() { matcher_.Next(); }
+ virtual const FST &GetFst() const { return matcher_.GetFst(); }
+ uint64 Properties(uint64 props) const { return matcher_.Properties(props); }
+ uint32 Flags() const {
+ return matcher_.Flags() | kInputLookAheadMatcher | kOutputLookAheadMatcher;
+ }
+
+ // Look-ahead methods.
+ bool LookAheadLabel(Label label) const { return true; }
+ bool LookAheadFst(const Fst<Arc> &fst, StateId s) {return true; }
+ Weight LookAheadWeight() const { return Weight::One(); }
+ bool LookAheadPrefix(Arc *arc) const { return false; }
+ void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) {}
+
+ private:
+ // This allows base class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual void SetState_(StateId s) { SetState(s); }
+ virtual bool Find_(Label label) { return Find(label); }
+ virtual bool Done_() const { return Done(); }
+ virtual const Arc& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+
+ bool LookAheadLabel_(Label l) const { return LookAheadLabel(l); }
+
+ bool LookAheadFst_(const Fst<Arc> &fst, StateId s) {
+ return LookAheadFst(fst, s);
+ }
+
+ Weight LookAheadWeight_() const { return LookAheadWeight(); }
+ bool LookAheadPrefix_(Arc *arc) const { return LookAheadPrefix(arc); }
+
+ M matcher_;
+};
+
+// Look-ahead of one transition. Template argument F accepts flags to
+// control behavior.
+template <class M, uint32 F = kLookAheadNonEpsilons | kLookAheadEpsilons |
+ kLookAheadWeight | kLookAheadPrefix>
+class ArcLookAheadMatcher
+ : public LookAheadMatcherBase<typename M::FST::Arc> {
+ public:
+ typedef typename M::FST FST;
+ typedef typename M::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+ typedef NullAddOn MatcherData;
+
+ using LookAheadMatcherBase<Arc>::LookAheadWeight;
+ using LookAheadMatcherBase<Arc>::SetLookAheadPrefix;
+ using LookAheadMatcherBase<Arc>::SetLookAheadWeight;
+ using LookAheadMatcherBase<Arc>::ClearLookAheadPrefix;
+
+ ArcLookAheadMatcher(const FST &fst, MatchType match_type,
+ MatcherData *data = 0)
+ : matcher_(fst, match_type),
+ fst_(matcher_.GetFst()),
+ lfst_(0),
+ s_(kNoStateId) {}
+
+ ArcLookAheadMatcher(const ArcLookAheadMatcher<M, F> &lmatcher,
+ bool safe = false)
+ : matcher_(lmatcher.matcher_, safe),
+ fst_(matcher_.GetFst()),
+ lfst_(lmatcher.lfst_),
+ s_(kNoStateId) {}
+
+ // General matcher methods
+ ArcLookAheadMatcher<M, F> *Copy(bool safe = false) const {
+ return new ArcLookAheadMatcher<M, F>(*this, safe);
+ }
+
+ MatchType Type(bool test) const { return matcher_.Type(test); }
+
+ void SetState(StateId s) {
+ s_ = s;
+ matcher_.SetState(s);
+ }
+
+ bool Find(Label label) { return matcher_.Find(label); }
+ bool Done() const { return matcher_.Done(); }
+ const Arc& Value() const { return matcher_.Value(); }
+ void Next() { matcher_.Next(); }
+ const FST &GetFst() const { return fst_; }
+ uint64 Properties(uint64 props) const { return matcher_.Properties(props); }
+ uint32 Flags() const {
+ return matcher_.Flags() | kInputLookAheadMatcher |
+ kOutputLookAheadMatcher | F;
+ }
+
+ // Writable matcher methods
+ MatcherData *GetData() const { return 0; }
+
+ // Look-ahead methods.
+ bool LookAheadLabel(Label label) const { return matcher_.Find(label); }
+
+ // Checks if there is a matching (possibly super-final) transition
+ // at (s_, s).
+ bool LookAheadFst(const Fst<Arc> &fst, StateId s);
+
+ void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) {
+ lfst_ = &fst;
+ }
+
+ private:
+ // This allows base class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual void SetState_(StateId s) { SetState(s); }
+ virtual bool Find_(Label label) { return Find(label); }
+ virtual bool Done_() const { return Done(); }
+ virtual const Arc& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+
+ bool LookAheadLabel_(Label l) const { return LookAheadLabel(l); }
+ bool LookAheadFst_(const Fst<Arc> &fst, StateId s) {
+ return LookAheadFst(fst, s);
+ }
+
+ mutable M matcher_;
+ const FST &fst_; // Matcher FST
+ const Fst<Arc> *lfst_; // Look-ahead FST
+ StateId s_; // Matcher state
+};
+
+template <class M, uint32 F>
+bool ArcLookAheadMatcher<M, F>::LookAheadFst(const Fst<Arc> &fst, StateId s) {
+ if (&fst != lfst_)
+ InitLookAheadFst(fst);
+
+ bool ret = false;
+ ssize_t nprefix = 0;
+ if (F & kLookAheadWeight)
+ SetLookAheadWeight(Weight::Zero());
+ if (F & kLookAheadPrefix)
+ ClearLookAheadPrefix();
+ if (fst_.Final(s_) != Weight::Zero() &&
+ lfst_->Final(s) != Weight::Zero()) {
+ if (!(F & (kLookAheadWeight | kLookAheadPrefix)))
+ return true;
+ ++nprefix;
+ if (F & kLookAheadWeight)
+ SetLookAheadWeight(Plus(LookAheadWeight(),
+ Times(fst_.Final(s_), lfst_->Final(s))));
+ ret = true;
+ }
+ if (matcher_.Find(kNoLabel)) {
+ if (!(F & (kLookAheadWeight | kLookAheadPrefix)))
+ return true;
+ ++nprefix;
+ if (F & kLookAheadWeight)
+ for (; !matcher_.Done(); matcher_.Next())
+ SetLookAheadWeight(Plus(LookAheadWeight(), matcher_.Value().weight));
+ ret = true;
+ }
+ for (ArcIterator< Fst<Arc> > aiter(*lfst_, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const Arc &arc = aiter.Value();
+ Label label = kNoLabel;
+ switch (matcher_.Type(false)) {
+ case MATCH_INPUT:
+ label = arc.olabel;
+ break;
+ case MATCH_OUTPUT:
+ label = arc.ilabel;
+ break;
+ default:
+ FSTERROR() << "ArcLookAheadMatcher::LookAheadFst: bad match type";
+ return true;
+ }
+ if (label == 0) {
+ if (!(F & (kLookAheadWeight | kLookAheadPrefix)))
+ return true;
+ if (!(F & kLookAheadNonEpsilonPrefix))
+ ++nprefix;
+ if (F & kLookAheadWeight)
+ SetLookAheadWeight(Plus(LookAheadWeight(), arc.weight));
+ ret = true;
+ } else if (matcher_.Find(label)) {
+ if (!(F & (kLookAheadWeight | kLookAheadPrefix)))
+ return true;
+ for (; !matcher_.Done(); matcher_.Next()) {
+ ++nprefix;
+ if (F & kLookAheadWeight)
+ SetLookAheadWeight(Plus(LookAheadWeight(),
+ Times(arc.weight,
+ matcher_.Value().weight)));
+ if ((F & kLookAheadPrefix) && nprefix == 1)
+ SetLookAheadPrefix(arc);
+ }
+ ret = true;
+ }
+ }
+ if (F & kLookAheadPrefix) {
+ if (nprefix == 1)
+ SetLookAheadWeight(Weight::One()); // Avoids double counting.
+ else
+ ClearLookAheadPrefix();
+ }
+ return ret;
+}
+
+
+// Template argument F accepts flags to control behavior.
+// It must include precisely one of KInputLookAheadMatcher or
+// KOutputLookAheadMatcher.
+template <class M, uint32 F = kLookAheadEpsilons | kLookAheadWeight |
+ kLookAheadPrefix | kLookAheadNonEpsilonPrefix |
+ kLookAheadKeepRelabelData,
+ class S = DefaultAccumulator<typename M::Arc> >
+class LabelLookAheadMatcher
+ : public LookAheadMatcherBase<typename M::FST::Arc> {
+ public:
+ typedef typename M::FST FST;
+ typedef typename M::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+ typedef LabelReachableData<Label> MatcherData;
+
+ using LookAheadMatcherBase<Arc>::LookAheadWeight;
+ using LookAheadMatcherBase<Arc>::SetLookAheadPrefix;
+ using LookAheadMatcherBase<Arc>::SetLookAheadWeight;
+ using LookAheadMatcherBase<Arc>::ClearLookAheadPrefix;
+
+ LabelLookAheadMatcher(const FST &fst, MatchType match_type,
+ MatcherData *data = 0, S *s = 0)
+ : matcher_(fst, match_type),
+ lfst_(0),
+ label_reachable_(0),
+ s_(kNoStateId),
+ error_(false) {
+ if (!(F & (kInputLookAheadMatcher | kOutputLookAheadMatcher))) {
+ FSTERROR() << "LabelLookaheadMatcher: bad matcher flags: " << F;
+ error_ = true;
+ }
+ bool reach_input = match_type == MATCH_INPUT;
+ if (data) {
+ if (reach_input == data->ReachInput())
+ label_reachable_ = new LabelReachable<Arc, S>(data, s);
+ } else if ((reach_input && (F & kInputLookAheadMatcher)) ||
+ (!reach_input && (F & kOutputLookAheadMatcher))) {
+ label_reachable_ = new LabelReachable<Arc, S>(
+ fst, reach_input, s, F & kLookAheadKeepRelabelData);
+ }
+ }
+
+ LabelLookAheadMatcher(const LabelLookAheadMatcher<M, F, S> &lmatcher,
+ bool safe = false)
+ : matcher_(lmatcher.matcher_, safe),
+ lfst_(lmatcher.lfst_),
+ label_reachable_(
+ lmatcher.label_reachable_ ?
+ new LabelReachable<Arc, S>(*lmatcher.label_reachable_) : 0),
+ s_(kNoStateId),
+ error_(lmatcher.error_) {}
+
+ ~LabelLookAheadMatcher() {
+ delete label_reachable_;
+ }
+
+ // General matcher methods
+ LabelLookAheadMatcher<M, F, S> *Copy(bool safe = false) const {
+ return new LabelLookAheadMatcher<M, F, S>(*this, safe);
+ }
+
+ MatchType Type(bool test) const { return matcher_.Type(test); }
+
+ void SetState(StateId s) {
+ if (s_ == s)
+ return;
+ s_ = s;
+ match_set_state_ = false;
+ reach_set_state_ = false;
+ }
+
+ bool Find(Label label) {
+ if (!match_set_state_) {
+ matcher_.SetState(s_);
+ match_set_state_ = true;
+ }
+ return matcher_.Find(label);
+ }
+
+ bool Done() const { return matcher_.Done(); }
+ const Arc& Value() const { return matcher_.Value(); }
+ void Next() { matcher_.Next(); }
+ const FST &GetFst() const { return matcher_.GetFst(); }
+
+ uint64 Properties(uint64 inprops) const {
+ uint64 outprops = matcher_.Properties(inprops);
+ if (error_ || (label_reachable_ && label_reachable_->Error()))
+ outprops |= kError;
+ return outprops;
+ }
+
+ uint32 Flags() const {
+ if (label_reachable_ && label_reachable_->GetData()->ReachInput())
+ return matcher_.Flags() | F | kInputLookAheadMatcher;
+ else if (label_reachable_ && !label_reachable_->GetData()->ReachInput())
+ return matcher_.Flags() | F | kOutputLookAheadMatcher;
+ else
+ return matcher_.Flags();
+ }
+
+ // Writable matcher methods
+ MatcherData *GetData() const {
+ return label_reachable_ ? label_reachable_->GetData() : 0;
+ };
+
+ // Look-ahead methods.
+ bool LookAheadLabel(Label label) const {
+ if (label == 0)
+ return true;
+
+ if (label_reachable_) {
+ if (!reach_set_state_) {
+ label_reachable_->SetState(s_);
+ reach_set_state_ = true;
+ }
+ return label_reachable_->Reach(label);
+ } else {
+ return true;
+ }
+ }
+
+ // Checks if there is a matching (possibly super-final) transition
+ // at (s_, s).
+ template <class L>
+ bool LookAheadFst(const L &fst, StateId s);
+
+ void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) {
+ lfst_ = &fst;
+ if (label_reachable_)
+ label_reachable_->ReachInit(fst, copy);
+ }
+
+ template <class L>
+ void InitLookAheadFst(const L& fst, bool copy = false) {
+ lfst_ = static_cast<const Fst<Arc> *>(&fst);
+ if (label_reachable_)
+ label_reachable_->ReachInit(fst, copy);
+ }
+
+ private:
+ // This allows base class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual void SetState_(StateId s) { SetState(s); }
+ virtual bool Find_(Label label) { return Find(label); }
+ virtual bool Done_() const { return Done(); }
+ virtual const Arc& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+
+ bool LookAheadLabel_(Label l) const { return LookAheadLabel(l); }
+ bool LookAheadFst_(const Fst<Arc> &fst, StateId s) {
+ return LookAheadFst(fst, s);
+ }
+
+ mutable M matcher_;
+ const Fst<Arc> *lfst_; // Look-ahead FST
+ LabelReachable<Arc, S> *label_reachable_; // Label reachability info
+ StateId s_; // Matcher state
+ bool match_set_state_; // matcher_.SetState called?
+ mutable bool reach_set_state_; // reachable_.SetState called?
+ bool error_;
+};
+
+template <class M, uint32 F, class S>
+template <class L> inline
+bool LabelLookAheadMatcher<M, F, S>::LookAheadFst(const L &fst, StateId s) {
+ if (static_cast<const Fst<Arc> *>(&fst) != lfst_)
+ InitLookAheadFst(fst);
+
+ SetLookAheadWeight(Weight::One());
+ ClearLookAheadPrefix();
+
+ if (!label_reachable_)
+ return true;
+
+ label_reachable_->SetState(s_, s);
+ reach_set_state_ = true;
+
+ bool compute_weight = F & kLookAheadWeight;
+ bool compute_prefix = F & kLookAheadPrefix;
+
+ bool reach_input = Type(false) == MATCH_OUTPUT;
+ ArcIterator<L> aiter(fst, s);
+ bool reach_arc = label_reachable_->Reach(&aiter, 0,
+ internal::NumArcs(*lfst_, s),
+ reach_input, compute_weight);
+ if (reach_arc) {
+ ssize_t begin = label_reachable_->ReachBegin();
+ ssize_t end = label_reachable_->ReachEnd();
+ if (compute_prefix && end - begin == 1) {
+ aiter.Seek(begin);
+ SetLookAheadPrefix(aiter.Value());
+ compute_weight = false;
+ } else if (compute_weight) {
+ SetLookAheadWeight(label_reachable_->ReachWeight());
+ }
+ }
+ Weight lfinal = internal::Final(*lfst_, s);
+ bool reach_final = lfinal != Weight::Zero() &&
+ label_reachable_->ReachFinal();
+ if (reach_final && compute_weight)
+ SetLookAheadWeight(reach_arc ?
+ Plus(LookAheadWeight(), lfinal) : lfinal);
+
+ return reach_arc || reach_final;
+}
+
+
+// Label-lookahead relabeling class.
+template <class A>
+class LabelLookAheadRelabeler {
+ public:
+ typedef typename A::Label Label;
+ typedef LabelReachableData<Label> MatcherData;
+ typedef AddOnPair<MatcherData, MatcherData> D;
+
+ // Relabels matcher Fst - initialization function object.
+ template <typename I>
+ LabelLookAheadRelabeler(I **impl);
+
+ // Relabels arbitrary Fst. Class L should be a label-lookahead Fst.
+ template <class L>
+ static void Relabel(MutableFst<A> *fst, const L &mfst,
+ bool relabel_input) {
+ typename L::Impl *impl = mfst.GetImpl();
+ D *data = impl->GetAddOn();
+ LabelReachable<A> reachable(data->First() ?
+ data->First() : data->Second());
+ reachable.Relabel(fst, relabel_input);
+ }
+
+ // Returns relabeling pairs (cf. relabel.h::Relabel()).
+ // Class L should be a label-lookahead Fst.
+ // If 'avoid_collisions' is true, extra pairs are added to
+ // ensure no collisions when relabeling automata that have
+ // labels unseen here.
+ template <class L>
+ static void RelabelPairs(const L &mfst, vector<pair<Label, Label> > *pairs,
+ bool avoid_collisions = false) {
+ typename L::Impl *impl = mfst.GetImpl();
+ D *data = impl->GetAddOn();
+ LabelReachable<A> reachable(data->First() ?
+ data->First() : data->Second());
+ reachable.RelabelPairs(pairs, avoid_collisions);
+ }
+};
+
+template <class A>
+template <typename I> inline
+LabelLookAheadRelabeler<A>::LabelLookAheadRelabeler(I **impl) {
+ Fst<A> &fst = (*impl)->GetFst();
+ D *data = (*impl)->GetAddOn();
+ const string name = (*impl)->Type();
+ bool is_mutable = fst.Properties(kMutable, false);
+ MutableFst<A> *mfst = 0;
+ if (is_mutable) {
+ mfst = static_cast<MutableFst<A> *>(&fst);
+ } else {
+ mfst = new VectorFst<A>(fst);
+ data->IncrRefCount();
+ delete *impl;
+ }
+ if (data->First()) { // reach_input
+ LabelReachable<A> reachable(data->First());
+ reachable.Relabel(mfst, true);
+ if (!FLAGS_save_relabel_ipairs.empty()) {
+ vector<pair<Label, Label> > pairs;
+ reachable.RelabelPairs(&pairs, true);
+ WriteLabelPairs(FLAGS_save_relabel_ipairs, pairs);
+ }
+ } else {
+ LabelReachable<A> reachable(data->Second());
+ reachable.Relabel(mfst, false);
+ if (!FLAGS_save_relabel_opairs.empty()) {
+ vector<pair<Label, Label> > pairs;
+ reachable.RelabelPairs(&pairs, true);
+ WriteLabelPairs(FLAGS_save_relabel_opairs, pairs);
+ }
+ }
+ if (!is_mutable) {
+ *impl = new I(*mfst, name);
+ (*impl)->SetAddOn(data);
+ delete mfst;
+ data->DecrRefCount();
+ }
+}
+
+
+// Generic lookahead matcher, templated on the FST definition
+// - a wrapper around pointer to specific one.
+template <class F>
+class LookAheadMatcher {
+ public:
+ typedef F FST;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+ typedef LookAheadMatcherBase<Arc> LBase;
+
+ LookAheadMatcher(const F &fst, MatchType match_type) {
+ base_ = fst.InitMatcher(match_type);
+ if (!base_)
+ base_ = new SortedMatcher<F>(fst, match_type);
+ lookahead_ = false;
+ }
+
+ LookAheadMatcher(const LookAheadMatcher<F> &matcher, bool safe = false) {
+ base_ = matcher.base_->Copy(safe);
+ lookahead_ = matcher.lookahead_;
+ }
+
+ ~LookAheadMatcher() { delete base_; }
+
+ // General matcher methods
+ LookAheadMatcher<F> *Copy(bool safe = false) const {
+ return new LookAheadMatcher<F>(*this, safe);
+ }
+
+ MatchType Type(bool test) const { return base_->Type(test); }
+ void SetState(StateId s) { base_->SetState(s); }
+ bool Find(Label label) { return base_->Find(label); }
+ bool Done() const { return base_->Done(); }
+ const Arc& Value() const { return base_->Value(); }
+ void Next() { base_->Next(); }
+ const F &GetFst() const { return static_cast<const F &>(base_->GetFst()); }
+
+ uint64 Properties(uint64 props) const { return base_->Properties(props); }
+
+ uint32 Flags() const { return base_->Flags(); }
+
+ // Look-ahead methods
+ bool LookAheadLabel(Label label) const {
+ if (LookAheadCheck()) {
+ LBase *lbase = static_cast<LBase *>(base_);
+ return lbase->LookAheadLabel(label);
+ } else {
+ return true;
+ }
+ }
+
+ bool LookAheadFst(const Fst<Arc> &fst, StateId s) {
+ if (LookAheadCheck()) {
+ LBase *lbase = static_cast<LBase *>(base_);
+ return lbase->LookAheadFst(fst, s);
+ } else {
+ return true;
+ }
+ }
+
+ Weight LookAheadWeight() const {
+ if (LookAheadCheck()) {
+ LBase *lbase = static_cast<LBase *>(base_);
+ return lbase->LookAheadWeight();
+ } else {
+ return Weight::One();
+ }
+ }
+
+ bool LookAheadPrefix(Arc *arc) const {
+ if (LookAheadCheck()) {
+ LBase *lbase = static_cast<LBase *>(base_);
+ return lbase->LookAheadPrefix(arc);
+ } else {
+ return false;
+ }
+ }
+
+ void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) {
+ if (LookAheadCheck()) {
+ LBase *lbase = static_cast<LBase *>(base_);
+ lbase->InitLookAheadFst(fst, copy);
+ }
+ }
+
+ private:
+ bool LookAheadCheck() const {
+ if (!lookahead_) {
+ lookahead_ = base_->Flags() &
+ (kInputLookAheadMatcher | kOutputLookAheadMatcher);
+ if (!lookahead_) {
+ FSTERROR() << "LookAheadMatcher: No look-ahead matcher defined";
+ }
+ }
+ return lookahead_;
+ }
+
+ MatcherBase<Arc> *base_;
+ mutable bool lookahead_;
+
+ void operator=(const LookAheadMatcher<Arc> &); // disallow
+};
+
+} // namespace fst
+
+#endif // FST_LIB_LOOKAHEAD_MATCHER_H__
diff --git a/src/include/fst/map.h b/src/include/fst/map.h
new file mode 100644
index 0000000..419cac4
--- /dev/null
+++ b/src/include/fst/map.h
@@ -0,0 +1,121 @@
+// map.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Compatability file for old-style Map() functions and MapFst class
+// that have been renamed to ArcMap (cf. StateMap).
+
+#ifndef FST_LIB_MAP_H__
+#define FST_LIB_MAP_H__
+
+
+#include <fst/arc-map.h>
+
+
+namespace fst {
+
+template<class A, class C>
+void Map(MutableFst<A> *fst, C* mapper) {
+ ArcMap(fst, mapper);
+}
+
+template<class A, class C>
+void Map(MutableFst<A> *fst, C mapper) {
+ ArcMap(fst, mapper);
+}
+
+template<class A, class B, class C>
+void Map(const Fst<A> &ifst, MutableFst<B> *ofst, C* mapper) {
+ ArcMap(ifst, ofst, mapper);
+}
+
+template<class A, class B, class C>
+void Map(const Fst<A> &ifst, MutableFst<B> *ofst, C mapper) {
+ ArcMap(ifst, ofst, mapper);
+}
+
+typedef ArcMapFstOptions MapFstOptions;
+
+template <class A, class B, class C>
+class MapFst : public ArcMapFst<A, B, C> {
+ public:
+ typedef B Arc;
+ typedef typename B::Weight Weight;
+ typedef typename B::StateId StateId;
+ typedef CacheState<B> State;
+
+ MapFst(const Fst<A> &fst, const C &mapper, const MapFstOptions& opts)
+ : ArcMapFst<A, B, C>(fst, mapper, opts) {}
+
+ MapFst(const Fst<A> &fst, C* mapper, const MapFstOptions& opts)
+ : ArcMapFst<A, B, C>(fst, mapper, opts) {}
+
+ MapFst(const Fst<A> &fst, const C &mapper)
+ : ArcMapFst<A, B, C>(fst, mapper) {}
+
+ MapFst(const Fst<A> &fst, C* mapper) : ArcMapFst<A, B, C>(fst, mapper) {}
+
+ // See Fst<>::Copy() for doc.
+ MapFst(const ArcMapFst<A, B, C> &fst, bool safe = false)
+ : ArcMapFst<A, B, C>(fst, safe) {}
+
+ // Get a copy of this MapFst. See Fst<>::Copy() for further doc.
+virtual MapFst<A, B, C> *Copy(bool safe = false) const {
+ return new MapFst(*this, safe);
+ }
+};
+
+
+// Specialization for MapFst.
+template <class A, class B, class C>
+class StateIterator< MapFst<A, B, C> >
+ : public StateIterator< ArcMapFst<A, B, C> > {
+ public:
+ explicit StateIterator(const ArcMapFst<A, B, C> &fst)
+ : StateIterator< ArcMapFst<A, B, C> >(fst) {}
+};
+
+
+// Specialization for MapFst.
+template <class A, class B, class C>
+class ArcIterator< MapFst<A, B, C> >
+ : public ArcIterator< ArcMapFst<A, B, C> > {
+ public:
+ ArcIterator(const ArcMapFst<A, B, C> &fst, typename A::StateId s)
+ : ArcIterator< ArcMapFst<A, B, C> >(fst, s) {}
+};
+
+
+template <class A>
+struct IdentityMapper {
+ typedef A FromArc;
+ typedef A ToArc;
+
+ A operator()(const A &arc) const { return arc; }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const { return props; }
+};
+
+} // namespace fst
+
+#endif // FST_LIB_MAP_H__
diff --git a/src/include/fst/matcher-fst.h b/src/include/fst/matcher-fst.h
new file mode 100644
index 0000000..73e64ad
--- /dev/null
+++ b/src/include/fst/matcher-fst.h
@@ -0,0 +1,359 @@
+// matcher-fst.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to add a matcher to an FST.
+
+#ifndef FST_LIB_MATCHER_FST_FST_H__
+#define FST_LIB_MATCHER_FST_FST_H__
+
+#include <fst/add-on.h>
+#include <fst/const-fst.h>
+#include <fst/lookahead-matcher.h>
+
+
+namespace fst {
+
+// WRITABLE MATCHERS - these have the interface of Matchers (see
+// matcher.h) and these additional methods:
+//
+// template <class F>
+// class Matcher {
+// public:
+// typedef ... MatcherData; // Initialization data
+// ...
+// // Constructor with additional argument for external initialization
+// // data; matcher increments its reference count on construction and
+// // decrements the reference count, and if 0 deletes, on destruction.
+// Matcher(const F &fst, MatchType type, MatcherData *data);
+//
+// // Returns pointer to initialization data that can be
+// // passed to a Matcher constructor.
+// MatcherData *GetData() const;
+// };
+
+// The matcher initialization data class must have the form:
+// class MatcherData {
+// public:
+// // Required copy constructor.
+// MatcherData(const MatcherData &);
+// //
+// // Required I/O methods.
+// static MatcherData *Read(istream &istrm);
+// bool Write(ostream &ostrm);
+//
+// // Required reference counting.
+// int RefCount() const;
+// int IncrRefCount();
+// int DecrRefCount();
+// };
+
+// Default MatcherFst initializer - does nothing.
+template <class M>
+class NullMatcherFstInit {
+ public:
+ typedef AddOnPair<typename M::MatcherData, typename M::MatcherData> D;
+ typedef AddOnImpl<typename M::FST, D> Impl;
+ NullMatcherFstInit(Impl **) {}
+};
+
+// Class to add a matcher M to an Fst F. Creates a new Fst of type name N.
+// Optional function object I can be used to initialize the Fst.
+template <class F, class M, const char* N,
+ class I = NullMatcherFstInit<M> >
+class MatcherFst
+ : public ImplToExpandedFst<
+ AddOnImpl<F,
+ AddOnPair<typename M::MatcherData,
+ typename M::MatcherData> > > {
+ public:
+ friend class StateIterator< MatcherFst<F, M, N, I> >;
+ friend class ArcIterator< MatcherFst<F, M, N, I> >;
+
+ typedef F FST;
+ typedef M FstMatcher;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef AddOnPair<typename M::MatcherData, typename M::MatcherData> D;
+ typedef AddOnImpl<F, D> Impl;
+
+ MatcherFst() : ImplToExpandedFst<Impl>(new Impl(F(), N)) {}
+
+ explicit MatcherFst(const F &fst)
+ : ImplToExpandedFst<Impl>(CreateImpl(fst, N)) {}
+
+ explicit MatcherFst(const Fst<Arc> &fst)
+ : ImplToExpandedFst<Impl>(CreateImpl(fst, N)) {}
+
+ // See Fst<>::Copy() for doc.
+ MatcherFst(const MatcherFst<F, M, N, I> &fst, bool safe = false)
+ : ImplToExpandedFst<Impl>(fst, safe) {}
+
+ // Get a copy of this MatcherFst. See Fst<>::Copy() for further doc.
+ virtual MatcherFst<F, M, N, I> *Copy(bool safe = false) const {
+ return new MatcherFst<F, M, N, I>(*this, safe);
+ }
+
+ // Read a MatcherFst from an input stream; return NULL on error
+ static MatcherFst<F, M, N, I> *Read(istream &strm,
+ const FstReadOptions &opts) {
+ Impl *impl = Impl::Read(strm, opts);
+ return impl ? new MatcherFst<F, M, N, I>(impl) : 0;
+ }
+
+ // Read a MatcherFst from a file; return NULL on error
+ // Empty filename reads from standard input
+ static MatcherFst<F, M, N, I> *Read(const string &filename) {
+ Impl *impl = ImplToExpandedFst<Impl>::Read(filename);
+ return impl ? new MatcherFst<F, M, N, I>(impl) : 0;
+ }
+
+ virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ return GetImpl()->Write(strm, opts);
+ }
+
+ virtual bool Write(const string &filename) const {
+ return Fst<Arc>::WriteFile(filename);
+ }
+
+ virtual void InitStateIterator(StateIteratorData<Arc> *data) const {
+ return GetImpl()->InitStateIterator(data);
+ }
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
+ return GetImpl()->InitArcIterator(s, data);
+ }
+
+ virtual M *InitMatcher(MatchType match_type) const {
+ return new M(GetFst(), match_type, GetData(match_type));
+ }
+
+ // Allows access to MatcherFst components.
+ Impl *GetImpl() const {
+ return ImplToFst<Impl, ExpandedFst<Arc> >::GetImpl();
+ }
+
+ F& GetFst() const { return GetImpl()->GetFst(); }
+
+ typename M::MatcherData *GetData(MatchType match_type) const {
+ D *data = GetImpl()->GetAddOn();
+ return match_type == MATCH_INPUT ? data->First() : data->Second();
+ }
+
+ private:
+ static Impl *CreateImpl(const F &fst, const string &name) {
+ M imatcher(fst, MATCH_INPUT);
+ M omatcher(fst, MATCH_OUTPUT);
+ D *data = new D(imatcher.GetData(), omatcher.GetData());
+ Impl *impl = new Impl(fst, name);
+ impl->SetAddOn(data);
+ I init(&impl);
+ data->DecrRefCount();
+ return impl;
+ }
+
+ static Impl *CreateImpl(const Fst<Arc> &fst, const string &name) {
+ F ffst(fst);
+ return CreateImpl(ffst, name);
+ }
+
+ explicit MatcherFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {}
+
+ // Makes visible to friends.
+ void SetImpl(Impl *impl, bool own_impl = true) {
+ ImplToFst< Impl, ExpandedFst<Arc> >::SetImpl(impl, own_impl);
+ }
+
+ void operator=(const MatcherFst<F, M, N, I> &fst); // disallow
+};
+
+
+// Specialization fo MatcherFst.
+template <class F, class M, const char* N, class I>
+class StateIterator< MatcherFst<F, M, N, I> > : public StateIterator<F> {
+ public:
+ explicit StateIterator(const MatcherFst<F, M, N, I> &fst) :
+ StateIterator<F>(fst.GetImpl()->GetFst()) {}
+};
+
+
+// Specialization for MatcherFst.
+template <class F, class M, const char* N, class I>
+class ArcIterator< MatcherFst<F, M, N, I> > : public ArcIterator<F> {
+ public:
+ ArcIterator(const MatcherFst<F, M, N, I> &fst, typename F::Arc::StateId s)
+ : ArcIterator<F>(fst.GetImpl()->GetFst(), s) {}
+};
+
+
+// Specialization for MatcherFst
+template <class F, class M, const char* N, class I>
+class Matcher< MatcherFst<F, M, N, I> > {
+ public:
+ typedef MatcherFst<F, M, N, I> FST;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+
+ Matcher(const FST &fst, MatchType match_type) {
+ matcher_ = fst.InitMatcher(match_type);
+ }
+
+ Matcher(const Matcher<FST> &matcher) {
+ matcher_ = matcher.matcher_->Copy();
+ }
+
+ ~Matcher() { delete matcher_; }
+
+ Matcher<FST> *Copy() const {
+ return new Matcher<FST>(*this);
+ }
+
+ MatchType Type(bool test) const { return matcher_->Type(test); }
+ void SetState(StateId s) { matcher_->SetState(s); }
+ bool Find(Label label) { return matcher_->Find(label); }
+ bool Done() const { return matcher_->Done(); }
+ const Arc& Value() const { return matcher_->Value(); }
+ void Next() { matcher_->Next(); }
+ uint64 Properties(uint64 props) const { return matcher_->Properties(props); }
+ uint32 Flags() const { return matcher_->Flags(); }
+
+ private:
+ M *matcher_;
+
+ void operator=(const Matcher<Arc> &); // disallow
+};
+
+
+// Specialization for MatcherFst
+template <class F, class M, const char* N, class I>
+class LookAheadMatcher< MatcherFst<F, M, N, I> > {
+ public:
+ typedef MatcherFst<F, M, N, I> FST;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ LookAheadMatcher(const FST &fst, MatchType match_type) {
+ matcher_ = fst.InitMatcher(match_type);
+ }
+
+ LookAheadMatcher(const LookAheadMatcher<FST> &matcher, bool safe = false) {
+ matcher_ = matcher.matcher_->Copy(safe);
+ }
+
+ ~LookAheadMatcher() { delete matcher_; }
+
+ // General matcher methods
+ LookAheadMatcher<FST> *Copy(bool safe = false) const {
+ return new LookAheadMatcher<FST>(*this, safe);
+ }
+
+ MatchType Type(bool test) const { return matcher_->Type(test); }
+ void SetState(StateId s) { matcher_->SetState(s); }
+ bool Find(Label label) { return matcher_->Find(label); }
+ bool Done() const { return matcher_->Done(); }
+ const Arc& Value() const { return matcher_->Value(); }
+ void Next() { matcher_->Next(); }
+ const FST &GetFst() const { return matcher_->GetFst(); }
+ uint64 Properties(uint64 props) const { return matcher_->Properties(props); }
+ uint32 Flags() const { return matcher_->Flags(); }
+
+ // Look-ahead methods
+ bool LookAheadLabel(Label label) const {
+ return matcher_->LookAheadLabel(label);
+ }
+
+ bool LookAheadFst(const Fst<Arc> &fst, StateId s) {
+ return matcher_->LookAheadFst(fst, s);
+ }
+
+ Weight LookAheadWeight() const { return matcher_->LookAheadWeight(); }
+
+ bool LookAheadPrefix(Arc *arc) const {
+ return matcher_->LookAheadPrefix(arc);
+ }
+
+ void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) {
+ matcher_->InitLookAheadFst(fst, copy);
+ }
+
+ private:
+ M *matcher_;
+
+ void operator=(const LookAheadMatcher<FST> &); // disallow
+};
+
+//
+// Useful aliases when using StdArc and LogArc.
+//
+
+// Arc look-ahead matchers
+extern const char arc_lookahead_fst_type[];
+
+typedef MatcherFst<ConstFst<StdArc>,
+ ArcLookAheadMatcher<SortedMatcher<ConstFst<StdArc> > >,
+ arc_lookahead_fst_type> StdArcLookAheadFst;
+
+typedef MatcherFst<ConstFst<LogArc>,
+ ArcLookAheadMatcher<SortedMatcher<ConstFst<LogArc> > >,
+ arc_lookahead_fst_type> LogArcLookAheadFst;
+
+
+// Label look-ahead matchers
+extern const char ilabel_lookahead_fst_type[];
+extern const char olabel_lookahead_fst_type[];
+
+static const uint32 ilabel_lookahead_flags = kInputLookAheadMatcher |
+ kLookAheadWeight | kLookAheadPrefix |
+ kLookAheadEpsilons | kLookAheadNonEpsilonPrefix;
+static const uint32 olabel_lookahead_flags = kOutputLookAheadMatcher |
+ kLookAheadWeight | kLookAheadPrefix |
+ kLookAheadEpsilons | kLookAheadNonEpsilonPrefix;
+
+typedef MatcherFst<ConstFst<StdArc>,
+ LabelLookAheadMatcher<SortedMatcher<ConstFst<StdArc> >,
+ ilabel_lookahead_flags,
+ FastLogAccumulator<StdArc> >,
+ ilabel_lookahead_fst_type,
+ LabelLookAheadRelabeler<StdArc> > StdILabelLookAheadFst;
+
+typedef MatcherFst<ConstFst<LogArc>,
+ LabelLookAheadMatcher<SortedMatcher<ConstFst<LogArc> >,
+ ilabel_lookahead_flags,
+ FastLogAccumulator<LogArc> >,
+ ilabel_lookahead_fst_type,
+ LabelLookAheadRelabeler<LogArc> > LogILabelLookAheadFst;
+
+typedef MatcherFst<ConstFst<StdArc>,
+ LabelLookAheadMatcher<SortedMatcher<ConstFst<StdArc> >,
+ olabel_lookahead_flags,
+ FastLogAccumulator<StdArc> >,
+ olabel_lookahead_fst_type,
+ LabelLookAheadRelabeler<StdArc> > StdOLabelLookAheadFst;
+
+typedef MatcherFst<ConstFst<LogArc>,
+ LabelLookAheadMatcher<SortedMatcher<ConstFst<LogArc> >,
+ olabel_lookahead_flags,
+ FastLogAccumulator<LogArc> >,
+ olabel_lookahead_fst_type,
+ LabelLookAheadRelabeler<LogArc> > LogOLabelLookAheadFst;
+
+} // namespace fst
+
+#endif // FST_LIB_MATCHER_FST_FST_H__
diff --git a/src/include/fst/matcher.h b/src/include/fst/matcher.h
new file mode 100644
index 0000000..a89325b
--- /dev/null
+++ b/src/include/fst/matcher.h
@@ -0,0 +1,1116 @@
+// matcher.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Classes to allow matching labels leaving FST states.
+
+#ifndef FST_LIB_MATCHER_H__
+#define FST_LIB_MATCHER_H__
+
+#include <algorithm>
+#include <set>
+
+#include <fst/mutable-fst.h> // for all internal FST accessors
+
+
+namespace fst {
+
+// MATCHERS - these can find and iterate through requested labels at
+// FST states. In the simplest form, these are just some associative
+// map or search keyed on labels. More generally, they may
+// implement matching special labels that represent sets of labels
+// such as 'sigma' (all), 'rho' (rest), or 'phi' (fail).
+// The Matcher interface is:
+//
+// template <class F>
+// class Matcher {
+// public:
+// typedef F FST;
+// typedef F::Arc Arc;
+// typedef typename Arc::StateId StateId;
+// typedef typename Arc::Label Label;
+// typedef typename Arc::Weight Weight;
+//
+// // Required constructors.
+// Matcher(const F &fst, MatchType type);
+// // If safe=true, the copy is thread-safe. See Fst<>::Copy()
+// // for further doc.
+// Matcher(const Matcher &matcher, bool safe = false);
+//
+// // If safe=true, the copy is thread-safe. See Fst<>::Copy()
+// // for further doc.
+// Matcher<F> *Copy(bool safe = false) const;
+//
+// // Returns the match type that can be provided (depending on
+// // compatibility of the input FST). It is either
+// // the requested match type, MATCH_NONE, or MATCH_UNKNOWN.
+// // If 'test' is false, a constant time test is performed, but
+// // MATCH_UNKNOWN may be returned. If 'test' is true,
+// // a definite answer is returned, but may involve more costly
+// // computation (e.g., visiting the Fst).
+// MatchType Type(bool test) const;
+// // Specifies the current state.
+// void SetState(StateId s);
+//
+// // This finds matches to a label at the current state.
+// // Returns true if a match found. kNoLabel matches any
+// // 'non-consuming' transitions, e.g., epsilon transitions,
+// // which do not require a matching symbol.
+// bool Find(Label label);
+// // These iterate through any matches found:
+// bool Done() const; // No more matches.
+// const A& Value() const; // Current arc (when !Done)
+// void Next(); // Advance to next arc (when !Done)
+//
+// // Return matcher FST.
+// const F& GetFst() const;
+// // This specifies the known Fst properties as viewed from this
+// // matcher. It takes as argument the input Fst's known properties.
+// uint64 Properties(uint64 props) const;
+// };
+
+// Flags used for basic matchers (see also lookahead.h).
+const uint32 kMatcherFlags = 0x00000000;
+
+// Matcher interface, templated on the Arc definition; used
+// for matcher specializations that are returned by the
+// InitMatcher Fst method.
+template <class A>
+class MatcherBase {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+
+ virtual ~MatcherBase() {}
+
+ virtual MatcherBase<A> *Copy(bool safe = false) const = 0;
+ virtual MatchType Type(bool test) const = 0;
+ void SetState(StateId s) { SetState_(s); }
+ bool Find(Label label) { return Find_(label); }
+ bool Done() const { return Done_(); }
+ const A& Value() const { return Value_(); }
+ void Next() { Next_(); }
+ virtual const Fst<A> &GetFst() const = 0;
+ virtual uint64 Properties(uint64 props) const = 0;
+ virtual uint32 Flags() const { return 0; }
+ private:
+ virtual void SetState_(StateId s) = 0;
+ virtual bool Find_(Label label) = 0;
+ virtual bool Done_() const = 0;
+ virtual const A& Value_() const = 0;
+ virtual void Next_() = 0;
+};
+
+
+// A matcher that expects sorted labels on the side to be matched.
+// If match_type == MATCH_INPUT, epsilons match the implicit self loop
+// Arc(kNoLabel, 0, Weight::One(), current_state) as well as any
+// actual epsilon transitions. If match_type == MATCH_OUTPUT, then
+// Arc(0, kNoLabel, Weight::One(), current_state) is instead matched.
+template <class F>
+class SortedMatcher : public MatcherBase<typename F::Arc> {
+ public:
+ typedef F FST;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ // Labels >= binary_label will be searched for by binary search,
+ // o.w. linear search is used.
+ SortedMatcher(const F &fst, MatchType match_type,
+ Label binary_label = 1)
+ : fst_(fst.Copy()),
+ s_(kNoStateId),
+ aiter_(0),
+ match_type_(match_type),
+ binary_label_(binary_label),
+ match_label_(kNoLabel),
+ narcs_(0),
+ loop_(kNoLabel, 0, Weight::One(), kNoStateId),
+ error_(false) {
+ switch(match_type_) {
+ case MATCH_INPUT:
+ case MATCH_NONE:
+ break;
+ case MATCH_OUTPUT:
+ swap(loop_.ilabel, loop_.olabel);
+ break;
+ default:
+ FSTERROR() << "SortedMatcher: bad match type";
+ match_type_ = MATCH_NONE;
+ error_ = true;
+ }
+ }
+
+ SortedMatcher(const SortedMatcher<F> &matcher, bool safe = false)
+ : fst_(matcher.fst_->Copy(safe)),
+ s_(kNoStateId),
+ aiter_(0),
+ match_type_(matcher.match_type_),
+ binary_label_(matcher.binary_label_),
+ match_label_(kNoLabel),
+ narcs_(0),
+ loop_(matcher.loop_),
+ error_(matcher.error_) {}
+
+ virtual ~SortedMatcher() {
+ if (aiter_)
+ delete aiter_;
+ delete fst_;
+ }
+
+ virtual SortedMatcher<F> *Copy(bool safe = false) const {
+ return new SortedMatcher<F>(*this, safe);
+ }
+
+ virtual MatchType Type(bool test) const {
+ if (match_type_ == MATCH_NONE)
+ return match_type_;
+
+ uint64 true_prop = match_type_ == MATCH_INPUT ?
+ kILabelSorted : kOLabelSorted;
+ uint64 false_prop = match_type_ == MATCH_INPUT ?
+ kNotILabelSorted : kNotOLabelSorted;
+ uint64 props = fst_->Properties(true_prop | false_prop, test);
+
+ if (props & true_prop)
+ return match_type_;
+ else if (props & false_prop)
+ return MATCH_NONE;
+ else
+ return MATCH_UNKNOWN;
+ }
+
+ void SetState(StateId s) {
+ if (s_ == s)
+ return;
+ s_ = s;
+ if (match_type_ == MATCH_NONE) {
+ FSTERROR() << "SortedMatcher: bad match type";
+ error_ = true;
+ }
+ if (aiter_)
+ delete aiter_;
+ aiter_ = new ArcIterator<F>(*fst_, s);
+ aiter_->SetFlags(kArcNoCache, kArcNoCache);
+ narcs_ = internal::NumArcs(*fst_, s);
+ loop_.nextstate = s;
+ }
+
+ bool Find(Label match_label);
+
+ bool Done() const {
+ if (current_loop_)
+ return false;
+ if (aiter_->Done())
+ return true;
+ aiter_->SetFlags(
+ match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue,
+ kArcValueFlags);
+ Label label = match_type_ == MATCH_INPUT ?
+ aiter_->Value().ilabel : aiter_->Value().olabel;
+ return label != match_label_;
+ }
+
+ const Arc& Value() const {
+ if (current_loop_) {
+ return loop_;
+ }
+ aiter_->SetFlags(kArcValueFlags, kArcValueFlags);
+ return aiter_->Value();
+ }
+
+ void Next() {
+ if (current_loop_)
+ current_loop_ = false;
+ else
+ aiter_->Next();
+ }
+
+ virtual const F &GetFst() const { return *fst_; }
+
+ virtual uint64 Properties(uint64 inprops) const {
+ uint64 outprops = inprops;
+ if (error_) outprops |= kError;
+ return outprops;
+ }
+
+ private:
+ virtual void SetState_(StateId s) { SetState(s); }
+ virtual bool Find_(Label label) { return Find(label); }
+ virtual bool Done_() const { return Done(); }
+ virtual const Arc& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+
+ const F *fst_;
+ StateId s_; // Current state
+ ArcIterator<F> *aiter_; // Iterator for current state
+ MatchType match_type_; // Type of match to perform
+ Label binary_label_; // Least label for binary search
+ Label match_label_; // Current label to be matched
+ size_t narcs_; // Current state arc count
+ Arc loop_; // For non-consuming symbols
+ bool current_loop_; // Current arc is the implicit loop
+ bool error_; // Error encountered
+
+ void operator=(const SortedMatcher<F> &); // Disallow
+};
+
+template <class F> inline
+bool SortedMatcher<F>::Find(Label match_label) {
+ if (error_) {
+ current_loop_ = false;
+ match_label_ = kNoLabel;
+ return false;
+ }
+ current_loop_ = match_label == 0;
+ match_label_ = match_label == kNoLabel ? 0 : match_label;
+ aiter_->SetFlags(
+ match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue,
+ kArcValueFlags);
+ if (match_label_ >= binary_label_) {
+ // Binary search for match.
+ size_t low = 0;
+ size_t high = narcs_;
+ while (low < high) {
+ size_t mid = (low + high) / 2;
+ aiter_->Seek(mid);
+ Label label = match_type_ == MATCH_INPUT ?
+ aiter_->Value().ilabel : aiter_->Value().olabel;
+ if (label > match_label_) {
+ high = mid;
+ } else if (label < match_label_) {
+ low = mid + 1;
+ } else {
+ // find first matching label (when non-determinism)
+ for (size_t i = mid; i > low; --i) {
+ aiter_->Seek(i - 1);
+ label = match_type_ == MATCH_INPUT ? aiter_->Value().ilabel :
+ aiter_->Value().olabel;
+ if (label != match_label_) {
+ aiter_->Seek(i);
+ return true;
+ }
+ }
+ return true;
+ }
+ }
+ return current_loop_;
+ } else {
+ // Linear search for match.
+ for (aiter_->Reset(); !aiter_->Done(); aiter_->Next()) {
+ Label label = match_type_ == MATCH_INPUT ?
+ aiter_->Value().ilabel : aiter_->Value().olabel;
+ if (label == match_label_) {
+ return true;
+ }
+ if (label > match_label_)
+ break;
+ }
+ return current_loop_;
+ }
+}
+
+
+// Specifies whether during matching we rewrite both the input and output sides.
+enum MatcherRewriteMode {
+ MATCHER_REWRITE_AUTO = 0, // Rewrites both sides iff acceptor.
+ MATCHER_REWRITE_ALWAYS,
+ MATCHER_REWRITE_NEVER
+};
+
+
+// For any requested label that doesn't match at a state, this matcher
+// considers all transitions that match the label 'rho_label' (rho =
+// 'rest'). Each such rho transition found is returned with the
+// rho_label rewritten as the requested label (both sides if an
+// acceptor, or if 'rewrite_both' is true and both input and output
+// labels of the found transition are 'rho_label'). If 'rho_label' is
+// kNoLabel, this special matching is not done. RhoMatcher is
+// templated itself on a matcher, which is used to perform the
+// underlying matching. By default, the underlying matcher is
+// constructed by RhoMatcher. The user can instead pass in this
+// object; in that case, RhoMatcher takes its ownership.
+template <class M>
+class RhoMatcher : public MatcherBase<typename M::Arc> {
+ public:
+ typedef typename M::FST FST;
+ typedef typename M::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ RhoMatcher(const FST &fst,
+ MatchType match_type,
+ Label rho_label = kNoLabel,
+ MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO,
+ M *matcher = 0)
+ : matcher_(matcher ? matcher : new M(fst, match_type)),
+ match_type_(match_type),
+ rho_label_(rho_label),
+ error_(false) {
+ if (match_type == MATCH_BOTH) {
+ FSTERROR() << "RhoMatcher: bad match type";
+ match_type_ = MATCH_NONE;
+ error_ = true;
+ }
+ if (rho_label == 0) {
+ FSTERROR() << "RhoMatcher: 0 cannot be used as rho_label";
+ rho_label_ = kNoLabel;
+ error_ = true;
+ }
+
+ if (rewrite_mode == MATCHER_REWRITE_AUTO)
+ rewrite_both_ = fst.Properties(kAcceptor, true);
+ else if (rewrite_mode == MATCHER_REWRITE_ALWAYS)
+ rewrite_both_ = true;
+ else
+ rewrite_both_ = false;
+ }
+
+ RhoMatcher(const RhoMatcher<M> &matcher, bool safe = false)
+ : matcher_(new M(*matcher.matcher_, safe)),
+ match_type_(matcher.match_type_),
+ rho_label_(matcher.rho_label_),
+ rewrite_both_(matcher.rewrite_both_),
+ error_(matcher.error_) {}
+
+ virtual ~RhoMatcher() {
+ delete matcher_;
+ }
+
+ virtual RhoMatcher<M> *Copy(bool safe = false) const {
+ return new RhoMatcher<M>(*this, safe);
+ }
+
+ virtual MatchType Type(bool test) const { return matcher_->Type(test); }
+
+ void SetState(StateId s) {
+ matcher_->SetState(s);
+ has_rho_ = rho_label_ != kNoLabel;
+ }
+
+ bool Find(Label match_label) {
+ if (match_label == rho_label_ && rho_label_ != kNoLabel) {
+ FSTERROR() << "RhoMatcher::Find: bad label (rho)";
+ error_ = true;
+ return false;
+ }
+ if (matcher_->Find(match_label)) {
+ rho_match_ = kNoLabel;
+ return true;
+ } else if (has_rho_ && match_label != 0 && match_label != kNoLabel &&
+ (has_rho_ = matcher_->Find(rho_label_))) {
+ rho_match_ = match_label;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ bool Done() const { return matcher_->Done(); }
+
+ const Arc& Value() const {
+ if (rho_match_ == kNoLabel) {
+ return matcher_->Value();
+ } else {
+ rho_arc_ = matcher_->Value();
+ if (rewrite_both_) {
+ if (rho_arc_.ilabel == rho_label_)
+ rho_arc_.ilabel = rho_match_;
+ if (rho_arc_.olabel == rho_label_)
+ rho_arc_.olabel = rho_match_;
+ } else if (match_type_ == MATCH_INPUT) {
+ rho_arc_.ilabel = rho_match_;
+ } else {
+ rho_arc_.olabel = rho_match_;
+ }
+ return rho_arc_;
+ }
+ }
+
+ void Next() { matcher_->Next(); }
+
+ virtual const FST &GetFst() const { return matcher_->GetFst(); }
+
+ virtual uint64 Properties(uint64 props) const;
+
+ private:
+ virtual void SetState_(StateId s) { SetState(s); }
+ virtual bool Find_(Label label) { return Find(label); }
+ virtual bool Done_() const { return Done(); }
+ virtual const Arc& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+
+ M *matcher_;
+ MatchType match_type_; // Type of match requested
+ Label rho_label_; // Label that represents the rho transition
+ bool rewrite_both_; // Rewrite both sides when both are 'rho_label_'
+ bool has_rho_; // Are there possibly rhos at the current state?
+ Label rho_match_; // Current label that matches rho transition
+ mutable Arc rho_arc_; // Arc to return when rho match
+ bool error_; // Error encountered
+
+ void operator=(const RhoMatcher<M> &); // Disallow
+};
+
+template <class M> inline
+uint64 RhoMatcher<M>::Properties(uint64 inprops) const {
+ uint64 outprops = matcher_->Properties(inprops);
+ if (error_) outprops |= kError;
+
+ if (match_type_ == MATCH_NONE) {
+ return outprops;
+ } else if (match_type_ == MATCH_INPUT) {
+ if (rewrite_both_) {
+ return outprops & ~(kODeterministic | kNonODeterministic | kString |
+ kILabelSorted | kNotILabelSorted |
+ kOLabelSorted | kNotOLabelSorted);
+ } else {
+ return outprops & ~(kODeterministic | kAcceptor | kString |
+ kILabelSorted | kNotILabelSorted);
+ }
+ } else if (match_type_ == MATCH_OUTPUT) {
+ if (rewrite_both_) {
+ return outprops & ~(kIDeterministic | kNonIDeterministic | kString |
+ kILabelSorted | kNotILabelSorted |
+ kOLabelSorted | kNotOLabelSorted);
+ } else {
+ return outprops & ~(kIDeterministic | kAcceptor | kString |
+ kOLabelSorted | kNotOLabelSorted);
+ }
+ } else {
+ // Shouldn't ever get here.
+ FSTERROR() << "RhoMatcher:: bad match type: " << match_type_;
+ return 0;
+ }
+}
+
+
+// For any requested label, this matcher considers all transitions
+// that match the label 'sigma_label' (sigma = "any"), and this in
+// additions to transitions with the requested label. Each such sigma
+// transition found is returned with the sigma_label rewritten as the
+// requested label (both sides if an acceptor, or if 'rewrite_both' is
+// true and both input and output labels of the found transition are
+// 'sigma_label'). If 'sigma_label' is kNoLabel, this special
+// matching is not done. SigmaMatcher is templated itself on a
+// matcher, which is used to perform the underlying matching. By
+// default, the underlying matcher is constructed by SigmaMatcher.
+// The user can instead pass in this object; in that case,
+// SigmaMatcher takes its ownership.
+template <class M>
+class SigmaMatcher : public MatcherBase<typename M::Arc> {
+ public:
+ typedef typename M::FST FST;
+ typedef typename M::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ SigmaMatcher(const FST &fst,
+ MatchType match_type,
+ Label sigma_label = kNoLabel,
+ MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO,
+ M *matcher = 0)
+ : matcher_(matcher ? matcher : new M(fst, match_type)),
+ match_type_(match_type),
+ sigma_label_(sigma_label),
+ error_(false) {
+ if (match_type == MATCH_BOTH) {
+ FSTERROR() << "SigmaMatcher: bad match type";
+ match_type_ = MATCH_NONE;
+ error_ = true;
+ }
+ if (sigma_label == 0) {
+ FSTERROR() << "SigmaMatcher: 0 cannot be used as sigma_label";
+ sigma_label_ = kNoLabel;
+ error_ = true;
+ }
+
+ if (rewrite_mode == MATCHER_REWRITE_AUTO)
+ rewrite_both_ = fst.Properties(kAcceptor, true);
+ else if (rewrite_mode == MATCHER_REWRITE_ALWAYS)
+ rewrite_both_ = true;
+ else
+ rewrite_both_ = false;
+ }
+
+ SigmaMatcher(const SigmaMatcher<M> &matcher, bool safe = false)
+ : matcher_(new M(*matcher.matcher_, safe)),
+ match_type_(matcher.match_type_),
+ sigma_label_(matcher.sigma_label_),
+ rewrite_both_(matcher.rewrite_both_),
+ error_(matcher.error_) {}
+
+ virtual ~SigmaMatcher() {
+ delete matcher_;
+ }
+
+ virtual SigmaMatcher<M> *Copy(bool safe = false) const {
+ return new SigmaMatcher<M>(*this, safe);
+ }
+
+ virtual MatchType Type(bool test) const { return matcher_->Type(test); }
+
+ void SetState(StateId s) {
+ matcher_->SetState(s);
+ has_sigma_ =
+ sigma_label_ != kNoLabel ? matcher_->Find(sigma_label_) : false;
+ }
+
+ bool Find(Label match_label) {
+ match_label_ = match_label;
+ if (match_label == sigma_label_ && sigma_label_ != kNoLabel) {
+ FSTERROR() << "SigmaMatcher::Find: bad label (sigma)";
+ error_ = true;
+ return false;
+ }
+ if (matcher_->Find(match_label)) {
+ sigma_match_ = kNoLabel;
+ return true;
+ } else if (has_sigma_ && match_label != 0 && match_label != kNoLabel &&
+ matcher_->Find(sigma_label_)) {
+ sigma_match_ = match_label;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ bool Done() const {
+ return matcher_->Done();
+ }
+
+ const Arc& Value() const {
+ if (sigma_match_ == kNoLabel) {
+ return matcher_->Value();
+ } else {
+ sigma_arc_ = matcher_->Value();
+ if (rewrite_both_) {
+ if (sigma_arc_.ilabel == sigma_label_)
+ sigma_arc_.ilabel = sigma_match_;
+ if (sigma_arc_.olabel == sigma_label_)
+ sigma_arc_.olabel = sigma_match_;
+ } else if (match_type_ == MATCH_INPUT) {
+ sigma_arc_.ilabel = sigma_match_;
+ } else {
+ sigma_arc_.olabel = sigma_match_;
+ }
+ return sigma_arc_;
+ }
+ }
+
+ void Next() {
+ matcher_->Next();
+ if (matcher_->Done() && has_sigma_ && (sigma_match_ == kNoLabel) &&
+ (match_label_ > 0)) {
+ matcher_->Find(sigma_label_);
+ sigma_match_ = match_label_;
+ }
+ }
+
+ virtual const FST &GetFst() const { return matcher_->GetFst(); }
+
+ virtual uint64 Properties(uint64 props) const;
+
+private:
+ virtual void SetState_(StateId s) { SetState(s); }
+ virtual bool Find_(Label label) { return Find(label); }
+ virtual bool Done_() const { return Done(); }
+ virtual const Arc& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+
+ M *matcher_;
+ MatchType match_type_; // Type of match requested
+ Label sigma_label_; // Label that represents the sigma transition
+ bool rewrite_both_; // Rewrite both sides when both are 'sigma_label_'
+ bool has_sigma_; // Are there sigmas at the current state?
+ Label sigma_match_; // Current label that matches sigma transition
+ mutable Arc sigma_arc_; // Arc to return when sigma match
+ Label match_label_; // Label being matched
+ bool error_; // Error encountered
+
+ void operator=(const SigmaMatcher<M> &); // disallow
+};
+
+template <class M> inline
+uint64 SigmaMatcher<M>::Properties(uint64 inprops) const {
+ uint64 outprops = matcher_->Properties(inprops);
+ if (error_) outprops |= kError;
+
+ if (match_type_ == MATCH_NONE) {
+ return outprops;
+ } else if (rewrite_both_) {
+ return outprops & ~(kIDeterministic | kNonIDeterministic |
+ kODeterministic | kNonODeterministic |
+ kILabelSorted | kNotILabelSorted |
+ kOLabelSorted | kNotOLabelSorted |
+ kString);
+ } else if (match_type_ == MATCH_INPUT) {
+ return outprops & ~(kIDeterministic | kNonIDeterministic |
+ kODeterministic | kNonODeterministic |
+ kILabelSorted | kNotILabelSorted |
+ kString | kAcceptor);
+ } else if (match_type_ == MATCH_OUTPUT) {
+ return outprops & ~(kIDeterministic | kNonIDeterministic |
+ kODeterministic | kNonODeterministic |
+ kOLabelSorted | kNotOLabelSorted |
+ kString | kAcceptor);
+ } else {
+ // Shouldn't ever get here.
+ FSTERROR() << "SigmaMatcher:: bad match type: " << match_type_;
+ return 0;
+ }
+}
+
+
+// For any requested label that doesn't match at a state, this matcher
+// considers the *unique* transition that matches the label 'phi_label'
+// (phi = 'fail'), and recursively looks for a match at its
+// destination. When 'phi_loop' is true, if no match is found but a
+// phi self-loop is found, then the phi transition found is returned
+// with the phi_label rewritten as the requested label (both sides if
+// an acceptor, or if 'rewrite_both' is true and both input and output
+// labels of the found transition are 'phi_label'). If 'phi_label' is
+// kNoLabel, this special matching is not done. PhiMatcher is
+// templated itself on a matcher, which is used to perform the
+// underlying matching. By default, the underlying matcher is
+// constructed by PhiMatcher. The user can instead pass in this
+// object; in that case, PhiMatcher takes its ownership.
+// Warning: phi non-determinism not supported (for simplicity).
+template <class M>
+class PhiMatcher : public MatcherBase<typename M::Arc> {
+ public:
+ typedef typename M::FST FST;
+ typedef typename M::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ PhiMatcher(const FST &fst,
+ MatchType match_type,
+ Label phi_label = kNoLabel,
+ bool phi_loop = true,
+ MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO,
+ M *matcher = 0)
+ : matcher_(matcher ? matcher : new M(fst, match_type)),
+ match_type_(match_type),
+ phi_label_(phi_label),
+ state_(kNoStateId),
+ phi_loop_(phi_loop),
+ error_(false) {
+ if (match_type == MATCH_BOTH) {
+ FSTERROR() << "PhiMatcher: bad match type";
+ match_type_ = MATCH_NONE;
+ error_ = true;
+ }
+ if (phi_label == 0) {
+ FSTERROR() << "PhiMatcher: 0 cannot be used as phi_label";
+ phi_label_ = kNoLabel;
+ error_ = true;
+ }
+
+ if (rewrite_mode == MATCHER_REWRITE_AUTO)
+ rewrite_both_ = fst.Properties(kAcceptor, true);
+ else if (rewrite_mode == MATCHER_REWRITE_ALWAYS)
+ rewrite_both_ = true;
+ else
+ rewrite_both_ = false;
+ }
+
+ PhiMatcher(const PhiMatcher<M> &matcher, bool safe = false)
+ : matcher_(new M(*matcher.matcher_, safe)),
+ match_type_(matcher.match_type_),
+ phi_label_(matcher.phi_label_),
+ rewrite_both_(matcher.rewrite_both_),
+ state_(kNoStateId),
+ phi_loop_(matcher.phi_loop_),
+ error_(matcher.error_) {}
+
+ virtual ~PhiMatcher() {
+ delete matcher_;
+ }
+
+ virtual PhiMatcher<M> *Copy(bool safe = false) const {
+ return new PhiMatcher<M>(*this, safe);
+ }
+
+ virtual MatchType Type(bool test) const { return matcher_->Type(test); }
+
+ void SetState(StateId s) {
+ matcher_->SetState(s);
+ state_ = s;
+ has_phi_ = phi_label_ != kNoLabel;
+ }
+
+ bool Find(Label match_label);
+
+ bool Done() const { return matcher_->Done(); }
+
+ const Arc& Value() const {
+ if ((phi_match_ == kNoLabel) && (phi_weight_ == Weight::One())) {
+ return matcher_->Value();
+ } else {
+ phi_arc_ = matcher_->Value();
+ phi_arc_.weight = Times(phi_weight_, phi_arc_.weight);
+ if (phi_match_ != kNoLabel) {
+ if (rewrite_both_) {
+ if (phi_arc_.ilabel == phi_label_)
+ phi_arc_.ilabel = phi_match_;
+ if (phi_arc_.olabel == phi_label_)
+ phi_arc_.olabel = phi_match_;
+ } else if (match_type_ == MATCH_INPUT) {
+ phi_arc_.ilabel = phi_match_;
+ } else {
+ phi_arc_.olabel = phi_match_;
+ }
+ }
+ return phi_arc_;
+ }
+ }
+
+ void Next() { matcher_->Next(); }
+
+ virtual const FST &GetFst() const { return matcher_->GetFst(); }
+
+ virtual uint64 Properties(uint64 props) const;
+
+private:
+ virtual void SetState_(StateId s) { SetState(s); }
+ virtual bool Find_(Label label) { return Find(label); }
+ virtual bool Done_() const { return Done(); }
+ virtual const Arc& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+
+ M *matcher_;
+ MatchType match_type_; // Type of match requested
+ Label phi_label_; // Label that represents the phi transition
+ bool rewrite_both_; // Rewrite both sides when both are 'phi_label_'
+ bool has_phi_; // Are there possibly phis at the current state?
+ Label phi_match_; // Current label that matches phi loop
+ mutable Arc phi_arc_; // Arc to return
+ StateId state_; // State where looking for matches
+ Weight phi_weight_; // Product of the weights of phi transitions taken
+ bool phi_loop_; // When true, phi self-loop are allowed and treated
+ // as rho (required for Aho-Corasick)
+ bool error_; // Error encountered
+
+ void operator=(const PhiMatcher<M> &); // disallow
+};
+
+template <class M> inline
+bool PhiMatcher<M>::Find(Label match_label) {
+ if (match_label == phi_label_ && phi_label_ != kNoLabel) {
+ FSTERROR() << "PhiMatcher::Find: bad label (phi)";
+ error_ = true;
+ return false;
+ }
+ matcher_->SetState(state_);
+ phi_match_ = kNoLabel;
+ phi_weight_ = Weight::One();
+ if (!has_phi_ || match_label == 0 || match_label == kNoLabel)
+ return matcher_->Find(match_label);
+ StateId state = state_;
+ while (!matcher_->Find(match_label)) {
+ if (!matcher_->Find(phi_label_))
+ return false;
+ if (phi_loop_ && matcher_->Value().nextstate == state) {
+ phi_match_ = match_label;
+ return true;
+ }
+ phi_weight_ = Times(phi_weight_, matcher_->Value().weight);
+ state = matcher_->Value().nextstate;
+ matcher_->Next();
+ if (!matcher_->Done()) {
+ FSTERROR() << "PhiMatcher: phi non-determinism not supported";
+ error_ = true;
+ }
+ matcher_->SetState(state);
+ }
+ return true;
+}
+
+template <class M> inline
+uint64 PhiMatcher<M>::Properties(uint64 inprops) const {
+ uint64 outprops = matcher_->Properties(inprops);
+ if (error_) outprops |= kError;
+
+ if (match_type_ == MATCH_NONE) {
+ return outprops;
+ } else if (match_type_ == MATCH_INPUT) {
+ if (rewrite_both_) {
+ return outprops & ~(kODeterministic | kNonODeterministic | kString |
+ kILabelSorted | kNotILabelSorted |
+ kOLabelSorted | kNotOLabelSorted);
+ } else {
+ return outprops & ~(kODeterministic | kAcceptor | kString |
+ kILabelSorted | kNotILabelSorted |
+ kOLabelSorted | kNotOLabelSorted);
+ }
+ } else if (match_type_ == MATCH_OUTPUT) {
+ if (rewrite_both_) {
+ return outprops & ~(kIDeterministic | kNonIDeterministic | kString |
+ kILabelSorted | kNotILabelSorted |
+ kOLabelSorted | kNotOLabelSorted);
+ } else {
+ return outprops & ~(kIDeterministic | kAcceptor | kString |
+ kILabelSorted | kNotILabelSorted |
+ kOLabelSorted | kNotOLabelSorted);
+ }
+ } else {
+ // Shouldn't ever get here.
+ FSTERROR() << "PhiMatcher:: bad match type: " << match_type_;
+ return 0;
+ }
+}
+
+
+//
+// MULTI-EPS MATCHER FLAGS
+//
+
+// Return multi-epsilon arcs for Find(kNoLabel).
+const uint32 kMultiEpsList = 0x00000001;
+
+// Return a kNolabel loop for Find(multi_eps).
+const uint32 kMultiEpsLoop = 0x00000002;
+
+// MultiEpsMatcher: allows treating multiple non-0 labels as
+// non-consuming labels in addition to 0 that is always
+// non-consuming. Precise behavior controlled by 'flags' argument. By
+// default, the underlying matcher is constructed by
+// MultiEpsMatcher. The user can instead pass in this object; in that
+// case, MultiEpsMatcher takes its ownership iff 'own_matcher' is
+// true.
+template <class M>
+class MultiEpsMatcher {
+ public:
+ typedef typename M::FST FST;
+ typedef typename M::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ MultiEpsMatcher(const FST &fst, MatchType match_type,
+ uint32 flags = (kMultiEpsLoop | kMultiEpsList),
+ M *matcher = 0, bool own_matcher = true)
+ : matcher_(matcher ? matcher : new M(fst, match_type)),
+ flags_(flags),
+ own_matcher_(matcher ? own_matcher : true) {
+ if (match_type == MATCH_INPUT) {
+ loop_.ilabel = kNoLabel;
+ loop_.olabel = 0;
+ } else {
+ loop_.ilabel = 0;
+ loop_.olabel = kNoLabel;
+ }
+ loop_.weight = Weight::One();
+ loop_.nextstate = kNoStateId;
+ }
+
+ MultiEpsMatcher(const MultiEpsMatcher<M> &matcher, bool safe = false)
+ : matcher_(new M(*matcher.matcher_, safe)),
+ flags_(matcher.flags_),
+ own_matcher_(true),
+ multi_eps_labels_(matcher.multi_eps_labels_),
+ loop_(matcher.loop_) {
+ loop_.nextstate = kNoStateId;
+ }
+
+ ~MultiEpsMatcher() {
+ if (own_matcher_)
+ delete matcher_;
+ }
+
+ MultiEpsMatcher<M> *Copy(bool safe = false) const {
+ return new MultiEpsMatcher<M>(*this, safe);
+ }
+
+ MatchType Type(bool test) const { return matcher_->Type(test); }
+
+ void SetState(StateId s) {
+ matcher_->SetState(s);
+ loop_.nextstate = s;
+ }
+
+ bool Find(Label match_label);
+
+ bool Done() const {
+ return done_;
+ }
+
+ const Arc& Value() const {
+ return current_loop_ ? loop_ : matcher_->Value();
+ }
+
+ void Next() {
+ if (!current_loop_) {
+ matcher_->Next();
+ done_ = matcher_->Done();
+ if (done_ && multi_eps_iter_ != multi_eps_labels_.End()) {
+ ++multi_eps_iter_;
+ while ((multi_eps_iter_ != multi_eps_labels_.End()) &&
+ !matcher_->Find(*multi_eps_iter_))
+ ++multi_eps_iter_;
+ if (multi_eps_iter_ != multi_eps_labels_.End())
+ done_ = false;
+ else
+ done_ = !matcher_->Find(kNoLabel);
+
+ }
+ } else {
+ done_ = true;
+ }
+ }
+
+ const FST &GetFst() const { return matcher_->GetFst(); }
+
+ uint64 Properties(uint64 props) const { return matcher_->Properties(props); }
+
+ uint32 Flags() const { return matcher_->Flags(); }
+
+ void AddMultiEpsLabel(Label label) {
+ if (label == 0) {
+ FSTERROR() << "MultiEpsMatcher: Bad multi-eps label: 0";
+ } else {
+ multi_eps_labels_.Insert(label);
+ }
+ }
+
+ void ClearMultiEpsLabels() {
+ multi_eps_labels_.Clear();
+ }
+
+private:
+ // Specialized for 'set' - log lookup
+ bool IsMultiEps(const set<Label> &multi_eps_labels, Label label) const {
+ return multi_eps_labels.Find(label) != multi_eps_labels.end();
+ }
+
+ M *matcher_;
+ uint32 flags_;
+ bool own_matcher_; // Does this class delete the matcher?
+
+ // Multi-eps label set
+ CompactSet<Label, kNoLabel> multi_eps_labels_;
+ typename CompactSet<Label, kNoLabel>::const_iterator multi_eps_iter_;
+
+ bool current_loop_; // Current arc is the implicit loop
+ mutable Arc loop_; // For non-consuming symbols
+ bool done_; // Matching done
+
+ void operator=(const MultiEpsMatcher<M> &); // Disallow
+};
+
+template <class M> inline
+bool MultiEpsMatcher<M>::Find(Label match_label) {
+ multi_eps_iter_ = multi_eps_labels_.End();
+ current_loop_ = false;
+ bool ret;
+ if (match_label == 0) {
+ ret = matcher_->Find(0);
+ } else if (match_label == kNoLabel) {
+ if (flags_ & kMultiEpsList) {
+ // return all non-consuming arcs (incl. epsilon)
+ multi_eps_iter_ = multi_eps_labels_.Begin();
+ while ((multi_eps_iter_ != multi_eps_labels_.End()) &&
+ !matcher_->Find(*multi_eps_iter_))
+ ++multi_eps_iter_;
+ if (multi_eps_iter_ != multi_eps_labels_.End())
+ ret = true;
+ else
+ ret = matcher_->Find(kNoLabel);
+ } else {
+ // return all epsilon arcs
+ ret = matcher_->Find(kNoLabel);
+ }
+ } else if ((flags_ & kMultiEpsLoop) &&
+ multi_eps_labels_.Find(match_label) != multi_eps_labels_.End()) {
+ // return 'implicit' loop
+ current_loop_ = true;
+ ret = true;
+ } else {
+ ret = matcher_->Find(match_label);
+ }
+ done_ = !ret;
+ return ret;
+}
+
+
+// Generic matcher, templated on the FST definition
+// - a wrapper around pointer to specific one.
+// Here is a typical use: \code
+// Matcher<StdFst> matcher(fst, MATCH_INPUT);
+// matcher.SetState(state);
+// if (matcher.Find(label))
+// for (; !matcher.Done(); matcher.Next()) {
+// StdArc &arc = matcher.Value();
+// ...
+// } \endcode
+template <class F>
+class Matcher {
+ public:
+ typedef F FST;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ Matcher(const F &fst, MatchType match_type) {
+ base_ = fst.InitMatcher(match_type);
+ if (!base_)
+ base_ = new SortedMatcher<F>(fst, match_type);
+ }
+
+ Matcher(const Matcher<F> &matcher, bool safe = false) {
+ base_ = matcher.base_->Copy(safe);
+ }
+
+ // Takes ownership of the provided matcher
+ Matcher(MatcherBase<Arc>* base_matcher) { base_ = base_matcher; }
+
+ ~Matcher() { delete base_; }
+
+ Matcher<F> *Copy(bool safe = false) const {
+ return new Matcher<F>(*this, safe);
+ }
+
+ MatchType Type(bool test) const { return base_->Type(test); }
+ void SetState(StateId s) { base_->SetState(s); }
+ bool Find(Label label) { return base_->Find(label); }
+ bool Done() const { return base_->Done(); }
+ const Arc& Value() const { return base_->Value(); }
+ void Next() { base_->Next(); }
+ const F &GetFst() const { return static_cast<const F &>(base_->GetFst()); }
+ uint64 Properties(uint64 props) const { return base_->Properties(props); }
+ uint32 Flags() const { return base_->Flags() & kMatcherFlags; }
+
+ private:
+ MatcherBase<Arc> *base_;
+
+ void operator=(const Matcher<Arc> &); // disallow
+};
+
+} // namespace fst
+
+
+
+#endif // FST_LIB_MATCHER_H__
diff --git a/src/include/fst/minimize.h b/src/include/fst/minimize.h
new file mode 100644
index 0000000..3fbe3ba
--- /dev/null
+++ b/src/include/fst/minimize.h
@@ -0,0 +1,584 @@
+// minimize.h
+// minimize.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: johans@google.com (Johan Schalkwyk)
+//
+// \file Functions and classes to minimize a finite state acceptor
+//
+
+#ifndef FST_LIB_MINIMIZE_H__
+#define FST_LIB_MINIMIZE_H__
+
+#include <cmath>
+
+#include <algorithm>
+#include <map>
+#include <queue>
+#include <vector>
+using std::vector;
+
+#include <fst/arcsort.h>
+#include <fst/connect.h>
+#include <fst/dfs-visit.h>
+#include <fst/encode.h>
+#include <fst/factor-weight.h>
+#include <fst/fst.h>
+#include <fst/mutable-fst.h>
+#include <fst/partition.h>
+#include <fst/push.h>
+#include <fst/queue.h>
+#include <fst/reverse.h>
+#include <fst/state-map.h>
+
+
+namespace fst {
+
+// comparator for creating partition based on sorting on
+// - states
+// - final weight
+// - out degree,
+// - (input label, output label, weight, destination_block)
+template <class A>
+class StateComparator {
+ public:
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ static const uint32 kCompareFinal = 0x00000001;
+ static const uint32 kCompareOutDegree = 0x00000002;
+ static const uint32 kCompareArcs = 0x00000004;
+ static const uint32 kCompareAll = 0x00000007;
+
+ StateComparator(const Fst<A>& fst,
+ const Partition<typename A::StateId>& partition,
+ uint32 flags = kCompareAll)
+ : fst_(fst), partition_(partition), flags_(flags) {}
+
+ // compare state x with state y based on sort criteria
+ bool operator()(const StateId x, const StateId y) const {
+ // check for final state equivalence
+ if (flags_ & kCompareFinal) {
+ const size_t xfinal = fst_.Final(x).Hash();
+ const size_t yfinal = fst_.Final(y).Hash();
+ if (xfinal < yfinal) return true;
+ else if (xfinal > yfinal) return false;
+ }
+
+ if (flags_ & kCompareOutDegree) {
+ // check for # arcs
+ if (fst_.NumArcs(x) < fst_.NumArcs(y)) return true;
+ if (fst_.NumArcs(x) > fst_.NumArcs(y)) return false;
+
+ if (flags_ & kCompareArcs) {
+ // # arcs are equal, check for arc match
+ for (ArcIterator<Fst<A> > aiter1(fst_, x), aiter2(fst_, y);
+ !aiter1.Done() && !aiter2.Done(); aiter1.Next(), aiter2.Next()) {
+ const A& arc1 = aiter1.Value();
+ const A& arc2 = aiter2.Value();
+ if (arc1.ilabel < arc2.ilabel) return true;
+ if (arc1.ilabel > arc2.ilabel) return false;
+
+ if (partition_.class_id(arc1.nextstate) <
+ partition_.class_id(arc2.nextstate)) return true;
+ if (partition_.class_id(arc1.nextstate) >
+ partition_.class_id(arc2.nextstate)) return false;
+ }
+ }
+ }
+
+ return false;
+ }
+
+ private:
+ const Fst<A>& fst_;
+ const Partition<typename A::StateId>& partition_;
+ const uint32 flags_;
+};
+
+template <class A> const uint32 StateComparator<A>::kCompareFinal;
+template <class A> const uint32 StateComparator<A>::kCompareOutDegree;
+template <class A> const uint32 StateComparator<A>::kCompareArcs;
+template <class A> const uint32 StateComparator<A>::kCompareAll;
+
+
+// Computes equivalence classes for cyclic Fsts. For cyclic minimization
+// we use the classic HopCroft minimization algorithm, which is of
+//
+// O(E)log(N),
+//
+// where E is the number of edges in the machine and N is number of states.
+//
+// The following paper describes the original algorithm
+// An N Log N algorithm for minimizing states in a finite automaton
+// by John HopCroft, January 1971
+//
+template <class A, class Queue>
+class CyclicMinimizer {
+ public:
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::StateId ClassId;
+ typedef typename A::Weight Weight;
+ typedef ReverseArc<A> RevA;
+
+ CyclicMinimizer(const ExpandedFst<A>& fst) {
+ Initialize(fst);
+ Compute(fst);
+ }
+
+ ~CyclicMinimizer() {
+ delete aiter_queue_;
+ }
+
+ const Partition<StateId>& partition() const {
+ return P_;
+ }
+
+ // helper classes
+ private:
+ typedef ArcIterator<Fst<RevA> > ArcIter;
+ class ArcIterCompare {
+ public:
+ ArcIterCompare(const Partition<StateId>& partition)
+ : partition_(partition) {}
+
+ ArcIterCompare(const ArcIterCompare& comp)
+ : partition_(comp.partition_) {}
+
+ // compare two iterators based on there input labels, and proto state
+ // (partition class Ids)
+ bool operator()(const ArcIter* x, const ArcIter* y) const {
+ const RevA& xarc = x->Value();
+ const RevA& yarc = y->Value();
+ return (xarc.ilabel > yarc.ilabel);
+ }
+
+ private:
+ const Partition<StateId>& partition_;
+ };
+
+ typedef priority_queue<ArcIter*, vector<ArcIter*>, ArcIterCompare>
+ ArcIterQueue;
+
+ // helper methods
+ private:
+ // prepartitions the space into equivalence classes with
+ // same final weight
+ // same # arcs per state
+ // same outgoing arcs
+ void PrePartition(const Fst<A>& fst) {
+ VLOG(5) << "PrePartition";
+
+ typedef map<StateId, StateId, StateComparator<A> > EquivalenceMap;
+ StateComparator<A> comp(fst, P_, StateComparator<A>::kCompareFinal);
+ EquivalenceMap equiv_map(comp);
+
+ StateIterator<Fst<A> > siter(fst);
+ StateId class_id = P_.AddClass();
+ P_.Add(siter.Value(), class_id);
+ equiv_map[siter.Value()] = class_id;
+ L_.Enqueue(class_id);
+ for (siter.Next(); !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ typename EquivalenceMap::const_iterator it = equiv_map.find(s);
+ if (it == equiv_map.end()) {
+ class_id = P_.AddClass();
+ P_.Add(s, class_id);
+ equiv_map[s] = class_id;
+ L_.Enqueue(class_id);
+ } else {
+ P_.Add(s, it->second);
+ equiv_map[s] = it->second;
+ }
+ }
+
+ VLOG(5) << "Initial Partition: " << P_.num_classes();
+ }
+
+ // - Create inverse transition Tr_ = rev(fst)
+ // - loop over states in fst and split on final, creating two blocks
+ // in the partition corresponding to final, non-final
+ void Initialize(const Fst<A>& fst) {
+ // construct Tr
+ Reverse(fst, &Tr_);
+ ILabelCompare<RevA> ilabel_comp;
+ ArcSort(&Tr_, ilabel_comp);
+
+ // initial split (F, S - F)
+ P_.Initialize(Tr_.NumStates() - 1);
+
+ // prep partition
+ PrePartition(fst);
+
+ // allocate arc iterator queue
+ ArcIterCompare comp(P_);
+ aiter_queue_ = new ArcIterQueue(comp);
+ }
+
+ // partition all classes with destination C
+ void Split(ClassId C) {
+ // Prep priority queue. Open arc iterator for each state in C, and
+ // insert into priority queue.
+ for (PartitionIterator<StateId> siter(P_, C);
+ !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ if (Tr_.NumArcs(s + 1))
+ aiter_queue_->push(new ArcIterator<Fst<RevA> >(Tr_, s + 1));
+ }
+
+ // Now pop arc iterator from queue, split entering equivalence class
+ // re-insert updated iterator into queue.
+ Label prev_label = -1;
+ while (!aiter_queue_->empty()) {
+ ArcIterator<Fst<RevA> >* aiter = aiter_queue_->top();
+ aiter_queue_->pop();
+ if (aiter->Done()) {
+ delete aiter;
+ continue;
+ }
+
+ const RevA& arc = aiter->Value();
+ StateId from_state = aiter->Value().nextstate - 1;
+ Label from_label = arc.ilabel;
+ if (prev_label != from_label)
+ P_.FinalizeSplit(&L_);
+
+ StateId from_class = P_.class_id(from_state);
+ if (P_.class_size(from_class) > 1)
+ P_.SplitOn(from_state);
+
+ prev_label = from_label;
+ aiter->Next();
+ if (aiter->Done())
+ delete aiter;
+ else
+ aiter_queue_->push(aiter);
+ }
+ P_.FinalizeSplit(&L_);
+ }
+
+ // Main loop for hopcroft minimization.
+ void Compute(const Fst<A>& fst) {
+ // process active classes (FIFO, or FILO)
+ while (!L_.Empty()) {
+ ClassId C = L_.Head();
+ L_.Dequeue();
+
+ // split on C, all labels in C
+ Split(C);
+ }
+ }
+
+ // helper data
+ private:
+ // Partioning of states into equivalence classes
+ Partition<StateId> P_;
+
+ // L = set of active classes to be processed in partition P
+ Queue L_;
+
+ // reverse transition function
+ VectorFst<RevA> Tr_;
+
+ // Priority queue of open arc iterators for all states in the 'splitter'
+ // equivalence class
+ ArcIterQueue* aiter_queue_;
+};
+
+
+// Computes equivalence classes for acyclic Fsts. The implementation details
+// for this algorithms is documented by the following paper.
+//
+// Minimization of acyclic deterministic automata in linear time
+// Dominque Revuz
+//
+// Complexity O(|E|)
+//
+template <class A>
+class AcyclicMinimizer {
+ public:
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::StateId ClassId;
+ typedef typename A::Weight Weight;
+
+ AcyclicMinimizer(const ExpandedFst<A>& fst) {
+ Initialize(fst);
+ Refine(fst);
+ }
+
+ const Partition<StateId>& partition() {
+ return partition_;
+ }
+
+ // helper classes
+ private:
+ // DFS visitor to compute the height (distance) to final state.
+ class HeightVisitor {
+ public:
+ HeightVisitor() : max_height_(0), num_states_(0) { }
+
+ // invoked before dfs visit
+ void InitVisit(const Fst<A>& fst) {}
+
+ // invoked when state is discovered (2nd arg is DFS tree root)
+ bool InitState(StateId s, StateId root) {
+ // extend height array and initialize height (distance) to 0
+ for (size_t i = height_.size(); i <= s; ++i)
+ height_.push_back(-1);
+
+ if (s >= num_states_) num_states_ = s + 1;
+ return true;
+ }
+
+ // invoked when tree arc examined (to undiscoverted state)
+ bool TreeArc(StateId s, const A& arc) {
+ return true;
+ }
+
+ // invoked when back arc examined (to unfinished state)
+ bool BackArc(StateId s, const A& arc) {
+ return true;
+ }
+
+ // invoked when forward or cross arc examined (to finished state)
+ bool ForwardOrCrossArc(StateId s, const A& arc) {
+ if (height_[arc.nextstate] + 1 > height_[s])
+ height_[s] = height_[arc.nextstate] + 1;
+ return true;
+ }
+
+ // invoked when state finished (parent is kNoStateId for tree root)
+ void FinishState(StateId s, StateId parent, const A* parent_arc) {
+ if (height_[s] == -1) height_[s] = 0;
+ StateId h = height_[s] + 1;
+ if (parent >= 0) {
+ if (h > height_[parent]) height_[parent] = h;
+ if (h > max_height_) max_height_ = h;
+ }
+ }
+
+ // invoked after DFS visit
+ void FinishVisit() {}
+
+ size_t max_height() const { return max_height_; }
+
+ const vector<StateId>& height() const { return height_; }
+
+ const size_t num_states() const { return num_states_; }
+
+ private:
+ vector<StateId> height_;
+ size_t max_height_;
+ size_t num_states_;
+ };
+
+ // helper methods
+ private:
+ // cluster states according to height (distance to final state)
+ void Initialize(const Fst<A>& fst) {
+ // compute height (distance to final state)
+ HeightVisitor hvisitor;
+ DfsVisit(fst, &hvisitor);
+
+ // create initial partition based on height
+ partition_.Initialize(hvisitor.num_states());
+ partition_.AllocateClasses(hvisitor.max_height() + 1);
+ const vector<StateId>& hstates = hvisitor.height();
+ for (size_t s = 0; s < hstates.size(); ++s)
+ partition_.Add(s, hstates[s]);
+ }
+
+ // refine states based on arc sort (out degree, arc equivalence)
+ void Refine(const Fst<A>& fst) {
+ typedef map<StateId, StateId, StateComparator<A> > EquivalenceMap;
+ StateComparator<A> comp(fst, partition_);
+
+ // start with tail (height = 0)
+ size_t height = partition_.num_classes();
+ for (size_t h = 0; h < height; ++h) {
+ EquivalenceMap equiv_classes(comp);
+
+ // sort states within equivalence class
+ PartitionIterator<StateId> siter(partition_, h);
+ equiv_classes[siter.Value()] = h;
+ for (siter.Next(); !siter.Done(); siter.Next()) {
+ const StateId s = siter.Value();
+ typename EquivalenceMap::const_iterator it = equiv_classes.find(s);
+ if (it == equiv_classes.end())
+ equiv_classes[s] = partition_.AddClass();
+ else
+ equiv_classes[s] = it->second;
+ }
+
+ // create refined partition
+ for (siter.Reset(); !siter.Done();) {
+ const StateId s = siter.Value();
+ const StateId old_class = partition_.class_id(s);
+ const StateId new_class = equiv_classes[s];
+
+ // a move operation can invalidate the iterator, so
+ // we first update the iterator to the next element
+ // before we move the current element out of the list
+ siter.Next();
+ if (old_class != new_class)
+ partition_.Move(s, new_class);
+ }
+ }
+ }
+
+ private:
+ Partition<StateId> partition_;
+};
+
+
+// Given a partition and a mutable fst, merge states of Fst inplace
+// (i.e. destructively). Merging works by taking the first state in
+// a class of the partition to be the representative state for the class.
+// Each arc is then reconnected to this state. All states in the class
+// are merged by adding there arcs to the representative state.
+template <class A>
+void MergeStates(
+ const Partition<typename A::StateId>& partition, MutableFst<A>* fst) {
+ typedef typename A::StateId StateId;
+
+ vector<StateId> state_map(partition.num_classes());
+ for (size_t i = 0; i < partition.num_classes(); ++i) {
+ PartitionIterator<StateId> siter(partition, i);
+ state_map[i] = siter.Value(); // first state in partition;
+ }
+
+ // relabel destination states
+ for (size_t c = 0; c < partition.num_classes(); ++c) {
+ for (PartitionIterator<StateId> siter(partition, c);
+ !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ for (MutableArcIterator<MutableFst<A> > aiter(fst, s);
+ !aiter.Done(); aiter.Next()) {
+ A arc = aiter.Value();
+ arc.nextstate = state_map[partition.class_id(arc.nextstate)];
+
+ if (s == state_map[c]) // first state just set destination
+ aiter.SetValue(arc);
+ else
+ fst->AddArc(state_map[c], arc);
+ }
+ }
+ }
+ fst->SetStart(state_map[partition.class_id(fst->Start())]);
+
+ Connect(fst);
+}
+
+template <class A>
+void AcceptorMinimize(MutableFst<A>* fst) {
+ typedef typename A::StateId StateId;
+ if (!(fst->Properties(kAcceptor | kUnweighted, true))) {
+ FSTERROR() << "FST is not an unweighted acceptor";
+ fst->SetProperties(kError, kError);
+ return;
+ }
+
+ // connect fst before minimization, handles disconnected states
+ Connect(fst);
+ if (fst->NumStates() == 0) return;
+
+ if (fst->Properties(kAcyclic, true)) {
+ // Acyclic minimization (revuz)
+ VLOG(2) << "Acyclic Minimization";
+ ArcSort(fst, ILabelCompare<A>());
+ AcyclicMinimizer<A> minimizer(*fst);
+ MergeStates(minimizer.partition(), fst);
+
+ } else {
+ // Cyclic minimizaton (hopcroft)
+ VLOG(2) << "Cyclic Minimization";
+ CyclicMinimizer<A, LifoQueue<StateId> > minimizer(*fst);
+ MergeStates(minimizer.partition(), fst);
+ }
+
+ // Merge in appropriate semiring
+ ArcUniqueMapper<A> mapper(*fst);
+ StateMap(fst, mapper);
+}
+
+
+// In place minimization of deterministic weighted automata and transducers.
+// For transducers, then the 'sfst' argument is not null, the algorithm
+// produces a compact factorization of the minimal transducer.
+//
+// In the acyclic case, we use an algorithm from Dominique Revuz that
+// is linear in the number of arcs (edges) in the machine.
+// Complexity = O(E)
+//
+// In the cyclic case, we use the classical hopcroft minimization.
+// Complexity = O(|E|log(|N|)
+//
+template <class A>
+void Minimize(MutableFst<A>* fst,
+ MutableFst<A>* sfst = 0,
+ float delta = kDelta) {
+ uint64 props = fst->Properties(kAcceptor | kIDeterministic|
+ kWeighted | kUnweighted, true);
+ if (!(props & kIDeterministic)) {
+ FSTERROR() << "FST is not deterministic";
+ fst->SetProperties(kError, kError);
+ return;
+ }
+
+ if (!(props & kAcceptor)) { // weighted transducer
+ VectorFst< GallicArc<A, STRING_LEFT> > gfst;
+ ArcMap(*fst, &gfst, ToGallicMapper<A, STRING_LEFT>());
+ fst->DeleteStates();
+ gfst.SetProperties(kAcceptor, kAcceptor);
+ Push(&gfst, REWEIGHT_TO_INITIAL, delta);
+ ArcMap(&gfst, QuantizeMapper< GallicArc<A, STRING_LEFT> >(delta));
+ EncodeMapper< GallicArc<A, STRING_LEFT> >
+ encoder(kEncodeLabels | kEncodeWeights, ENCODE);
+ Encode(&gfst, &encoder);
+ AcceptorMinimize(&gfst);
+ Decode(&gfst, encoder);
+
+ if (sfst == 0) {
+ FactorWeightFst< GallicArc<A, STRING_LEFT>,
+ GallicFactor<typename A::Label,
+ typename A::Weight, STRING_LEFT> > fwfst(gfst);
+ SymbolTable *osyms = fst->OutputSymbols() ?
+ fst->OutputSymbols()->Copy() : 0;
+ ArcMap(fwfst, fst, FromGallicMapper<A, STRING_LEFT>());
+ fst->SetOutputSymbols(osyms);
+ delete osyms;
+ } else {
+ sfst->SetOutputSymbols(fst->OutputSymbols());
+ GallicToNewSymbolsMapper<A, STRING_LEFT> mapper(sfst);
+ ArcMap(gfst, fst, &mapper);
+ fst->SetOutputSymbols(sfst->InputSymbols());
+ }
+ } else if (props & kWeighted) { // weighted acceptor
+ Push(fst, REWEIGHT_TO_INITIAL, delta);
+ ArcMap(fst, QuantizeMapper<A>(delta));
+ EncodeMapper<A> encoder(kEncodeLabels | kEncodeWeights, ENCODE);
+ Encode(fst, &encoder);
+ AcceptorMinimize(fst);
+ Decode(fst, encoder);
+ } else { // unweighted acceptor
+ AcceptorMinimize(fst);
+ }
+}
+
+} // namespace fst
+
+#endif // FST_LIB_MINIMIZE_H__
diff --git a/src/include/fst/mutable-fst.h b/src/include/fst/mutable-fst.h
new file mode 100644
index 0000000..9afcab3
--- /dev/null
+++ b/src/include/fst/mutable-fst.h
@@ -0,0 +1,378 @@
+// mutable-fst.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Expanded FST augmented with mutators - interface class definition
+// and mutable arc iterator interface.
+//
+
+#ifndef FST_LIB_MUTABLE_FST_H__
+#define FST_LIB_MUTABLE_FST_H__
+
+#include <stddef.h>
+#include <sys/types.h>
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/expanded-fst.h>
+
+
+namespace fst {
+
+template <class A> class MutableArcIteratorData;
+
+// An expanded FST plus mutators (use MutableArcIterator to modify arcs).
+template <class A>
+class MutableFst : public ExpandedFst<A> {
+ public:
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ virtual MutableFst<A> &operator=(const Fst<A> &fst) = 0;
+
+ MutableFst<A> &operator=(const MutableFst<A> &fst) {
+ return operator=(static_cast<const Fst<A> &>(fst));
+ }
+
+ virtual void SetStart(StateId) = 0; // Set the initial state
+ virtual void SetFinal(StateId, Weight) = 0; // Set a state's final weight
+ virtual void SetProperties(uint64 props,
+ uint64 mask) = 0; // Set property bits wrt mask
+
+ virtual StateId AddState() = 0; // Add a state, return its ID
+ virtual void AddArc(StateId, const A &arc) = 0; // Add an arc to state
+
+ virtual void DeleteStates(const vector<StateId>&) = 0; // Delete some states
+ virtual void DeleteStates() = 0; // Delete all states
+ virtual void DeleteArcs(StateId, size_t n) = 0; // Delete some arcs at state
+ virtual void DeleteArcs(StateId) = 0; // Delete all arcs at state
+
+ virtual void ReserveStates(StateId n) { } // Optional, best effort only.
+ virtual void ReserveArcs(StateId s, size_t n) { } // Optional, Best effort.
+
+ // Return input label symbol table; return NULL if not specified
+ virtual const SymbolTable* InputSymbols() const = 0;
+ // Return output label symbol table; return NULL if not specified
+ virtual const SymbolTable* OutputSymbols() const = 0;
+
+ // Return input label symbol table; return NULL if not specified
+ virtual SymbolTable* MutableInputSymbols() = 0;
+ // Return output label symbol table; return NULL if not specified
+ virtual SymbolTable* MutableOutputSymbols() = 0;
+
+ // Set input label symbol table; NULL signifies not unspecified
+ virtual void SetInputSymbols(const SymbolTable* isyms) = 0;
+ // Set output label symbol table; NULL signifies not unspecified
+ virtual void SetOutputSymbols(const SymbolTable* osyms) = 0;
+
+ // Get a copy of this MutableFst. See Fst<>::Copy() for further doc.
+ virtual MutableFst<A> *Copy(bool safe = false) const = 0;
+
+ // Read an MutableFst from an input stream; return NULL on error.
+ static MutableFst<A> *Read(istream &strm, const FstReadOptions &opts) {
+ FstReadOptions ropts(opts);
+ FstHeader hdr;
+ if (ropts.header)
+ hdr = *opts.header;
+ else {
+ if (!hdr.Read(strm, opts.source))
+ return 0;
+ ropts.header = &hdr;
+ }
+ if (!(hdr.Properties() & kMutable)) {
+ LOG(ERROR) << "MutableFst::Read: Not an MutableFst: " << ropts.source;
+ return 0;
+ }
+ FstRegister<A> *registr = FstRegister<A>::GetRegister();
+ const typename FstRegister<A>::Reader reader =
+ registr->GetReader(hdr.FstType());
+ if (!reader) {
+ LOG(ERROR) << "MutableFst::Read: Unknown FST type \"" << hdr.FstType()
+ << "\" (arc type = \"" << A::Type()
+ << "\"): " << ropts.source;
+ return 0;
+ }
+ Fst<A> *fst = reader(strm, ropts);
+ if (!fst) return 0;
+ return static_cast<MutableFst<A> *>(fst);
+ }
+
+ // Read a MutableFst from a file; return NULL on error.
+ // Empty filename reads from standard input. If 'convert' is true,
+ // convert to a mutable FST of type 'convert_type' if file is
+ // a non-mutable FST.
+ static MutableFst<A> *Read(const string &filename, bool convert = false,
+ const string &convert_type = "vector") {
+ if (convert == false) {
+ if (!filename.empty()) {
+ ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
+ if (!strm) {
+ LOG(ERROR) << "MutableFst::Read: Can't open file: " << filename;
+ return 0;
+ }
+ return Read(strm, FstReadOptions(filename));
+ } else {
+ return Read(std::cin, FstReadOptions("standard input"));
+ }
+ } else { // Converts to 'convert_type' if not mutable.
+ Fst<A> *ifst = Fst<A>::Read(filename);
+ if (!ifst) return 0;
+ if (ifst->Properties(kMutable, false)) {
+ return static_cast<MutableFst *>(ifst);
+ } else {
+ Fst<A> *ofst = Convert(*ifst, convert_type);
+ delete ifst;
+ if (!ofst) return 0;
+ if (!ofst->Properties(kMutable, false))
+ LOG(ERROR) << "MutableFst: bad convert type: " << convert_type;
+ return static_cast<MutableFst *>(ofst);
+ }
+ }
+ }
+
+ // For generic mutuble arc iterator construction; not normally called
+ // directly by users.
+ virtual void InitMutableArcIterator(StateId s,
+ MutableArcIteratorData<A> *) = 0;
+};
+
+// Mutable arc iterator interface, templated on the Arc definition; used
+// for mutable Arc iterator specializations that are returned by
+// the InitMutableArcIterator MutableFst method.
+template <class A>
+class MutableArcIteratorBase : public ArcIteratorBase<A> {
+ public:
+ typedef A Arc;
+
+ void SetValue(const A &arc) { SetValue_(arc); } // Set current arc's content
+
+ private:
+ virtual void SetValue_(const A &arc) = 0;
+};
+
+template <class A>
+struct MutableArcIteratorData {
+ MutableArcIteratorBase<A> *base; // Specific iterator
+};
+
+// Generic mutable arc iterator, templated on the FST definition
+// - a wrapper around pointer to specific one.
+// Here is a typical use: \code
+// for (MutableArcIterator<StdFst> aiter(&fst, s));
+// !aiter.Done();
+// aiter.Next()) {
+// StdArc arc = aiter.Value();
+// arc.ilabel = 7;
+// aiter.SetValue(arc);
+// ...
+// } \endcode
+// This version requires function calls.
+template <class F>
+class MutableArcIterator {
+ public:
+ typedef F FST;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+
+ MutableArcIterator(F *fst, StateId s) {
+ fst->InitMutableArcIterator(s, &data_);
+ }
+ ~MutableArcIterator() { delete data_.base; }
+
+ bool Done() const { return data_.base->Done(); }
+ const Arc& Value() const { return data_.base->Value(); }
+ void Next() { data_.base->Next(); }
+ size_t Position() const { return data_.base->Position(); }
+ void Reset() { data_.base->Reset(); }
+ void Seek(size_t a) { data_.base->Seek(a); }
+ void SetValue(const Arc &a) { data_.base->SetValue(a); }
+ uint32 Flags() const { return data_.base->Flags(); }
+ void SetFlags(uint32 f, uint32 m) {
+ return data_.base->SetFlags(f, m);
+ }
+
+ private:
+ MutableArcIteratorData<Arc> data_;
+ DISALLOW_COPY_AND_ASSIGN(MutableArcIterator);
+};
+
+
+namespace internal {
+
+// MutableFst<A> case - abstract methods.
+template <class A> inline
+typename A::Weight Final(const MutableFst<A> &fst, typename A::StateId s) {
+ return fst.Final(s);
+}
+
+template <class A> inline
+ssize_t NumArcs(const MutableFst<A> &fst, typename A::StateId s) {
+ return fst.NumArcs(s);
+}
+
+template <class A> inline
+ssize_t NumInputEpsilons(const MutableFst<A> &fst, typename A::StateId s) {
+ return fst.NumInputEpsilons(s);
+}
+
+template <class A> inline
+ssize_t NumOutputEpsilons(const MutableFst<A> &fst, typename A::StateId s) {
+ return fst.NumOutputEpsilons(s);
+}
+
+} // namespace internal
+
+
+// A useful alias when using StdArc.
+typedef MutableFst<StdArc> StdMutableFst;
+
+
+// This is a helper class template useful for attaching a MutableFst
+// interface to its implementation, handling reference counting and
+// copy-on-write.
+template <class I, class F = MutableFst<typename I::Arc> >
+class ImplToMutableFst : public ImplToExpandedFst<I, F> {
+ public:
+ typedef typename I::Arc Arc;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ using ImplToFst<I, F>::GetImpl;
+ using ImplToFst<I, F>::SetImpl;
+
+ virtual void SetStart(StateId s) {
+ MutateCheck();
+ GetImpl()->SetStart(s);
+ }
+
+ virtual void SetFinal(StateId s, Weight w) {
+ MutateCheck();
+ GetImpl()->SetFinal(s, w);
+ }
+
+ virtual void SetProperties(uint64 props, uint64 mask) {
+ // Can skip mutate check if extrinsic properties don't change,
+ // since it is then safe to update all (shallow) copies
+ uint64 exprops = kExtrinsicProperties & mask;
+ if (GetImpl()->Properties(exprops) != (props & exprops))
+ MutateCheck();
+ GetImpl()->SetProperties(props, mask);
+ }
+
+ virtual StateId AddState() {
+ MutateCheck();
+ return GetImpl()->AddState();
+ }
+
+ virtual void AddArc(StateId s, const Arc &arc) {
+ MutateCheck();
+ GetImpl()->AddArc(s, arc);
+ }
+
+ virtual void DeleteStates(const vector<StateId> &dstates) {
+ MutateCheck();
+ GetImpl()->DeleteStates(dstates);
+ }
+
+ virtual void DeleteStates() {
+ MutateCheck();
+ GetImpl()->DeleteStates();
+ }
+
+ virtual void DeleteArcs(StateId s, size_t n) {
+ MutateCheck();
+ GetImpl()->DeleteArcs(s, n);
+ }
+
+ virtual void DeleteArcs(StateId s) {
+ MutateCheck();
+ GetImpl()->DeleteArcs(s);
+ }
+
+ virtual void ReserveStates(StateId s) {
+ MutateCheck();
+ GetImpl()->ReserveStates(s);
+ }
+
+ virtual void ReserveArcs(StateId s, size_t n) {
+ MutateCheck();
+ GetImpl()->ReserveArcs(s, n);
+ }
+
+ virtual const SymbolTable* InputSymbols() const {
+ return GetImpl()->InputSymbols();
+ }
+
+ virtual const SymbolTable* OutputSymbols() const {
+ return GetImpl()->OutputSymbols();
+ }
+
+ virtual SymbolTable* MutableInputSymbols() {
+ MutateCheck();
+ return GetImpl()->InputSymbols();
+ }
+
+ virtual SymbolTable* MutableOutputSymbols() {
+ MutateCheck();
+ return GetImpl()->OutputSymbols();
+ }
+
+ virtual void SetInputSymbols(const SymbolTable* isyms) {
+ MutateCheck();
+ GetImpl()->SetInputSymbols(isyms);
+ }
+
+ virtual void SetOutputSymbols(const SymbolTable* osyms) {
+ MutateCheck();
+ GetImpl()->SetOutputSymbols(osyms);
+ }
+
+ protected:
+ ImplToMutableFst() : ImplToExpandedFst<I, F>() {}
+
+ ImplToMutableFst(I *impl) : ImplToExpandedFst<I, F>(impl) {}
+
+
+ ImplToMutableFst(const ImplToMutableFst<I, F> &fst)
+ : ImplToExpandedFst<I, F>(fst) {}
+
+ ImplToMutableFst(const ImplToMutableFst<I, F> &fst, bool safe)
+ : ImplToExpandedFst<I, F>(fst, safe) {}
+
+ void MutateCheck() {
+ // Copy on write
+ if (GetImpl()->RefCount() > 1)
+ SetImpl(new I(*this));
+ }
+
+ private:
+ // Disallow
+ ImplToMutableFst<I, F> &operator=(const ImplToMutableFst<I, F> &fst);
+
+ ImplToMutableFst<I, F> &operator=(const Fst<Arc> &fst) {
+ FSTERROR() << "ImplToMutableFst: Assignment operator disallowed";
+ GetImpl()->SetProperties(kError, kError);
+ return *this;
+ }
+};
+
+
+} // namespace fst
+
+#endif // FST_LIB_MUTABLE_FST_H__
diff --git a/src/include/fst/pair-weight.h b/src/include/fst/pair-weight.h
new file mode 100644
index 0000000..7d8aa11
--- /dev/null
+++ b/src/include/fst/pair-weight.h
@@ -0,0 +1,280 @@
+// pair-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: shumash@google.com (Masha Maria Shugrina)
+//
+// \file
+// Pair weight templated base class for weight classes that
+// contain two weights (e.g. Product, Lexicographic)
+
+#ifndef FST_LIB_PAIR_WEIGHT_H_
+#define FST_LIB_PAIR_WEIGHT_H_
+
+#include <climits>
+#include <stack>
+#include <string>
+
+#include <fst/weight.h>
+
+
+DECLARE_string(fst_weight_parentheses);
+DECLARE_string(fst_weight_separator);
+
+namespace fst {
+
+template<class W1, class W2> class PairWeight;
+template <class W1, class W2>
+istream &operator>>(istream &strm, PairWeight<W1, W2> &w);
+
+template<class W1, class W2>
+class PairWeight {
+ public:
+ friend istream &operator>><W1, W2>(istream&, PairWeight<W1, W2>&);
+
+ typedef PairWeight<typename W1::ReverseWeight,
+ typename W2::ReverseWeight>
+ ReverseWeight;
+
+ PairWeight() {}
+
+ PairWeight(const PairWeight& w) : value1_(w.value1_), value2_(w.value2_) {}
+
+ PairWeight(W1 w1, W2 w2) : value1_(w1), value2_(w2) {}
+
+ static const PairWeight<W1, W2> &Zero() {
+ static const PairWeight<W1, W2> zero(W1::Zero(), W2::Zero());
+ return zero;
+ }
+
+ static const PairWeight<W1, W2> &One() {
+ static const PairWeight<W1, W2> one(W1::One(), W2::One());
+ return one;
+ }
+
+ static const PairWeight<W1, W2> &NoWeight() {
+ static const PairWeight<W1, W2> no_weight(W1::NoWeight(), W2::NoWeight());
+ return no_weight;
+ }
+
+ istream &Read(istream &strm) {
+ value1_.Read(strm);
+ return value2_.Read(strm);
+ }
+
+ ostream &Write(ostream &strm) const {
+ value1_.Write(strm);
+ return value2_.Write(strm);
+ }
+
+ PairWeight<W1, W2> &operator=(const PairWeight<W1, W2> &w) {
+ value1_ = w.Value1();
+ value2_ = w.Value2();
+ return *this;
+ }
+
+ bool Member() const { return value1_.Member() && value2_.Member(); }
+
+ size_t Hash() const {
+ size_t h1 = value1_.Hash();
+ size_t h2 = value2_.Hash();
+ const int lshift = 5;
+ const int rshift = CHAR_BIT * sizeof(size_t) - 5;
+ return h1 << lshift ^ h1 >> rshift ^ h2;
+ }
+
+ PairWeight<W1, W2> Quantize(float delta = kDelta) const {
+ return PairWeight<W1, W2>(value1_.Quantize(delta),
+ value2_.Quantize(delta));
+ }
+
+ ReverseWeight Reverse() const {
+ return ReverseWeight(value1_.Reverse(), value2_.Reverse());
+ }
+
+ const W1& Value1() const { return value1_; }
+
+ const W2& Value2() const { return value2_; }
+
+ protected:
+ void SetValue1(const W1 &w) { value1_ = w; }
+ void SetValue2(const W2 &w) { value2_ = w; }
+
+ // Reads PairWeight when there are not parentheses around pair terms
+ inline static istream &ReadNoParen(
+ istream &strm, PairWeight<W1, W2>& w, char separator) {
+ int c;
+ do {
+ c = strm.get();
+ } while (isspace(c));
+
+ string s1;
+ while (c != separator) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s1 += c;
+ c = strm.get();
+ }
+ istringstream strm1(s1);
+ W1 w1 = W1::Zero();
+ strm1 >> w1;
+
+ // read second element
+ W2 w2 = W2::Zero();
+ strm >> w2;
+
+ w = PairWeight<W1, W2>(w1, w2);
+ return strm;
+ }
+
+ // Reads PairWeight when there are parentheses around pair terms
+ inline static istream &ReadWithParen(
+ istream &strm, PairWeight<W1, W2>& w,
+ char separator, char open_paren, char close_paren) {
+ int c;
+ do {
+ c = strm.get();
+ } while (isspace(c));
+ if (c != open_paren) {
+ FSTERROR() << " is fst_weight_parentheses flag set correcty? ";
+ strm.clear(std::ios::failbit);
+ return strm;
+ }
+ c = strm.get();
+
+ // read first element
+ stack<int> parens;
+ string s1;
+ while (c != separator || !parens.empty()) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s1 += c;
+ // if parens encountered before separator, they must be matched
+ if (c == open_paren) {
+ parens.push(1);
+ } else if (c == close_paren) {
+ // Fail for mismatched parens
+ if (parens.empty()) {
+ strm.clear(std::ios::failbit);
+ return strm;
+ }
+ parens.pop();
+ }
+ c = strm.get();
+ }
+ istringstream strm1(s1);
+ W1 w1 = W1::Zero();
+ strm1 >> w1;
+
+ // read second element
+ string s2;
+ c = strm.get();
+ while (c != EOF) {
+ s2 += c;
+ c = strm.get();
+ }
+ if (s2.empty() || (s2[s2.size() - 1] != close_paren)) {
+ FSTERROR() << " is fst_weight_parentheses flag set correcty? ";
+ strm.clear(std::ios::failbit);
+ return strm;
+ }
+
+ s2.erase(s2.size() - 1, 1);
+ istringstream strm2(s2);
+ W2 w2 = W2::Zero();
+ strm2 >> w2;
+
+ w = PairWeight<W1, W2>(w1, w2);
+ return strm;
+ }
+
+ private:
+ W1 value1_;
+ W2 value2_;
+
+};
+
+template <class W1, class W2>
+inline bool operator==(const PairWeight<W1, W2> &w,
+ const PairWeight<W1, W2> &v) {
+ return w.Value1() == v.Value1() && w.Value2() == v.Value2();
+}
+
+template <class W1, class W2>
+inline bool operator!=(const PairWeight<W1, W2> &w1,
+ const PairWeight<W1, W2> &w2) {
+ return w1.Value1() != w2.Value1() || w1.Value2() != w2.Value2();
+}
+
+
+template <class W1, class W2>
+inline bool ApproxEqual(const PairWeight<W1, W2> &w1,
+ const PairWeight<W1, W2> &w2,
+ float delta = kDelta) {
+ return ApproxEqual(w1.Value1(), w2.Value1(), delta) &&
+ ApproxEqual(w1.Value2(), w2.Value2(), delta);
+}
+
+template <class W1, class W2>
+inline ostream &operator<<(ostream &strm, const PairWeight<W1, W2> &w) {
+ if(FLAGS_fst_weight_separator.size() != 1) {
+ FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ char separator = FLAGS_fst_weight_separator[0];
+ if (FLAGS_fst_weight_parentheses.empty())
+ return strm << w.Value1() << separator << w.Value2();
+
+ if (FLAGS_fst_weight_parentheses.size() != 2) {
+ FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ char open_paren = FLAGS_fst_weight_parentheses[0];
+ char close_paren = FLAGS_fst_weight_parentheses[1];
+ return strm << open_paren << w.Value1() << separator
+ << w.Value2() << close_paren ;
+}
+
+template <class W1, class W2>
+inline istream &operator>>(istream &strm, PairWeight<W1, W2> &w) {
+ if(FLAGS_fst_weight_separator.size() != 1) {
+ FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ char separator = FLAGS_fst_weight_separator[0];
+ bool read_parens = !FLAGS_fst_weight_parentheses.empty();
+ if (read_parens) {
+ if (FLAGS_fst_weight_parentheses.size() != 2) {
+ FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ return PairWeight<W1, W2>::ReadWithParen(
+ strm, w, separator, FLAGS_fst_weight_parentheses[0],
+ FLAGS_fst_weight_parentheses[1]);
+ } else {
+ return PairWeight<W1, W2>::ReadNoParen(strm, w, separator);
+ }
+}
+
+} // namespace fst
+
+#endif // FST_LIB_PAIR_WEIGHT_H_
diff --git a/src/include/fst/partition.h b/src/include/fst/partition.h
new file mode 100644
index 0000000..dcee67b
--- /dev/null
+++ b/src/include/fst/partition.h
@@ -0,0 +1,290 @@
+// partition.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: johans@google.com (Johan Schalkwyk)
+//
+// \file Functions and classes to create a partition of states
+//
+
+#ifndef FST_LIB_PARTITION_H__
+#define FST_LIB_PARTITION_H__
+
+#include <vector>
+using std::vector;
+#include <algorithm>
+
+
+#include <fst/queue.h>
+
+
+
+namespace fst {
+
+template <typename T> class PartitionIterator;
+
+// \class Partition
+// \brief Defines a partitioning of states. Typically used to represent
+// equivalence classes for Fst operations like minimization.
+//
+template <typename T>
+class Partition {
+ friend class PartitionIterator<T>;
+
+ struct Element {
+ Element() : value(0), next(0), prev(0) {}
+ Element(T v) : value(v), next(0), prev(0) {}
+
+ T value;
+ Element* next;
+ Element* prev;
+ };
+
+ public:
+ Partition() {}
+
+ Partition(T num_states) {
+ Initialize(num_states);
+ }
+
+ ~Partition() {
+ for (size_t i = 0; i < elements_.size(); ++i)
+ delete elements_[i];
+ }
+
+ // Create an empty partition for num_states. At initialization time
+ // all elements are not assigned to a class (i.e class_index = -1).
+ // Initialize just creates num_states of elements. All element
+ // operations are then done by simply disconnecting the element from
+ // it current class and placing it at the head of the next class.
+ void Initialize(size_t num_states) {
+ for (size_t i = 0; i < elements_.size(); ++i)
+ delete elements_[i];
+ elements_.clear();
+ classes_.clear();
+ class_index_.clear();
+
+ elements_.resize(num_states);
+ class_index_.resize(num_states, -1);
+ class_size_.reserve(num_states);
+ for (size_t i = 0; i < num_states; ++i)
+ elements_[i] = new Element(i);
+ num_states_ = num_states;
+ }
+
+ // Add a class, resize classes_ and class_size_ resource by 1.
+ size_t AddClass() {
+ size_t num_classes = classes_.size();
+ classes_.resize(num_classes + 1, 0);
+ class_size_.resize(num_classes + 1, 0);
+ class_split_.resize(num_classes + 1, 0);
+ split_size_.resize(num_classes + 1, 0);
+ return num_classes;
+ }
+
+ void AllocateClasses(T num_classes) {
+ size_t n = classes_.size() + num_classes;
+ classes_.resize(n, 0);
+ class_size_.resize(n, 0);
+ class_split_.resize(n, 0);
+ split_size_.resize(n, 0);
+ }
+
+ // Add element_id to class_id. The Add method is used to initialize
+ // partition. Once elements have been added to a class, you need to
+ // use the Move() method move an element from once class to another.
+ void Add(T element_id, T class_id) {
+ Element* element = elements_[element_id];
+
+ if (classes_[class_id])
+ classes_[class_id]->prev = element;
+ element->next = classes_[class_id];
+ element->prev = 0;
+ classes_[class_id] = element;
+
+ class_index_[element_id] = class_id;
+ class_size_[class_id]++;
+ }
+
+ // Move and element_id to class_id. Disconnects (removes) element
+ // from it current class and
+ void Move(T element_id, T class_id) {
+ T old_class_id = class_index_[element_id];
+
+ Element* element = elements_[element_id];
+ if (element->next) element->next->prev = element->prev;
+ if (element->prev) element->prev->next = element->next;
+ else classes_[old_class_id] = element->next;
+
+ Add(element_id, class_id);
+ class_size_[old_class_id]--;
+ }
+
+ // split class on the element_id
+ void SplitOn(T element_id) {
+ T class_id = class_index_[element_id];
+ if (class_size_[class_id] == 1) return;
+
+ // first time class is split
+ if (split_size_[class_id] == 0)
+ visited_classes_.push_back(class_id);
+
+ // increment size of split (set of element at head of chain)
+ split_size_[class_id]++;
+
+ // update split point
+ if (class_split_[class_id] == 0)
+ class_split_[class_id] = classes_[class_id];
+ if (class_split_[class_id] == elements_[element_id])
+ class_split_[class_id] = elements_[element_id]->next;
+
+ // move to head of chain in same class
+ Move(element_id, class_id);
+ }
+
+ // Finalize class_id, split if required, and update class_splits,
+ // class indices of the newly created class. Returns the new_class id
+ // or -1 if no new class was created.
+ T SplitRefine(T class_id) {
+ // only split if necessary
+ if (class_size_[class_id] == split_size_[class_id]) {
+ class_split_[class_id] = 0;
+ split_size_[class_id] = 0;
+ return -1;
+ } else {
+
+ T new_class = AddClass();
+ size_t remainder = class_size_[class_id] - split_size_[class_id];
+ if (remainder < split_size_[class_id]) { // add smaller
+ Element* split_el = class_split_[class_id];
+ classes_[new_class] = split_el;
+ class_size_[class_id] = split_size_[class_id];
+ class_size_[new_class] = remainder;
+ split_el->prev->next = 0;
+ split_el->prev = 0;
+ } else {
+ Element* split_el = class_split_[class_id];
+ classes_[new_class] = classes_[class_id];
+ class_size_[class_id] = remainder;
+ class_size_[new_class] = split_size_[class_id];
+ split_el->prev->next = 0;
+ split_el->prev = 0;
+ classes_[class_id] = split_el;
+ }
+
+ // update class index for element in new class
+ for (Element* el = classes_[new_class]; el; el = el->next)
+ class_index_[el->value] = new_class;
+
+ class_split_[class_id] = 0;
+ split_size_[class_id] = 0;
+
+ return new_class;
+ }
+ }
+
+ // Once all states have been processed for a particular class C, we
+ // can finalize the split. FinalizeSplit() will update each block in the
+ // partition, create new once and update the queue of active classes
+ // that require further refinement.
+ template <class Queue>
+ void FinalizeSplit(Queue* L) {
+ for (size_t i = 0; i < visited_classes_.size(); ++i) {
+ T new_class = SplitRefine(visited_classes_[i]);
+ if (new_class != -1 && L)
+ L->Enqueue(new_class);
+ }
+ visited_classes_.clear();
+ }
+
+
+ const T class_id(T element_id) const {
+ return class_index_[element_id];
+ }
+
+ const vector<T>& class_sizes() const {
+ return class_size_;
+ }
+
+ const size_t class_size(T class_id) const {
+ return class_size_[class_id];
+ }
+
+ const T num_classes() const {
+ return classes_.size();
+ }
+
+
+ private:
+ int num_states_;
+
+ // container of all elements (owner of ptrs)
+ vector<Element*> elements_;
+
+ // linked list of elements belonging to class
+ vector<Element*> classes_;
+
+ // pointer to split point for each class
+ vector<Element*> class_split_;
+
+ // class index of element
+ vector<T> class_index_;
+
+ // class sizes
+ vector<T> class_size_;
+
+ // size of split for each class
+ vector<T> split_size_;
+
+ // set of visited classes to be used in split refine
+ vector<T> visited_classes_;
+};
+
+
+// iterate over members of a class in a partition
+template <typename T>
+class PartitionIterator {
+ typedef typename Partition<T>::Element Element;
+ public:
+ PartitionIterator(const Partition<T>& partition, T class_id)
+ : p_(partition),
+ element_(p_.classes_[class_id]),
+ class_id_(class_id) {}
+
+ bool Done() {
+ return (element_ == 0);
+ }
+
+ const T Value() {
+ return (element_->value);
+ }
+
+ void Next() {
+ element_ = element_->next;
+ }
+
+ void Reset() {
+ element_ = p_.classes_[class_id_];
+ }
+
+ private:
+ const Partition<T>& p_;
+
+ const Element* element_;
+
+ T class_id_;
+};
+} // namespace fst
+
+#endif // FST_LIB_PARTITION_H__
diff --git a/src/include/fst/power-weight.h b/src/include/fst/power-weight.h
new file mode 100644
index 0000000..256928d
--- /dev/null
+++ b/src/include/fst/power-weight.h
@@ -0,0 +1,159 @@
+// power-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Cartesian power weight semiring operation definitions.
+
+#ifndef FST_LIB_POWER_WEIGHT_H__
+#define FST_LIB_POWER_WEIGHT_H__
+
+#include <fst/tuple-weight.h>
+#include <fst/weight.h>
+
+
+namespace fst {
+
+// Cartesian power semiring: W ^ n
+// Forms:
+// - a left semimodule when W is a left semiring,
+// - a right semimodule when W is a right semiring,
+// - a bisemimodule when W is a semiring,
+// the free semimodule of rank n over W
+// The Times operation is overloaded to provide the
+// left and right scalar products.
+template <class W, unsigned int n>
+class PowerWeight : public TupleWeight<W, n> {
+ public:
+ using TupleWeight<W, n>::Zero;
+ using TupleWeight<W, n>::One;
+ using TupleWeight<W, n>::NoWeight;
+ using TupleWeight<W, n>::Quantize;
+ using TupleWeight<W, n>::Reverse;
+
+ typedef PowerWeight<typename W::ReverseWeight, n> ReverseWeight;
+
+ PowerWeight() {}
+
+ PowerWeight(const TupleWeight<W, n> &w) : TupleWeight<W, n>(w) {}
+
+ template <class Iterator>
+ PowerWeight(Iterator begin, Iterator end) : TupleWeight<W, n>(begin, end) {}
+
+ static const PowerWeight<W, n> &Zero() {
+ static const PowerWeight<W, n> zero(TupleWeight<W, n>::Zero());
+ return zero;
+ }
+
+ static const PowerWeight<W, n> &One() {
+ static const PowerWeight<W, n> one(TupleWeight<W, n>::One());
+ return one;
+ }
+
+ static const PowerWeight<W, n> &NoWeight() {
+ static const PowerWeight<W, n> no_weight(TupleWeight<W, n>::NoWeight());
+ return no_weight;
+ }
+
+ static const string &Type() {
+ static string type;
+ if (type.empty()) {
+ string power;
+ Int64ToStr(n, &power);
+ type = W::Type() + "_^" + power;
+ }
+ return type;
+ }
+
+ static uint64 Properties() {
+ uint64 props = W::Properties();
+ return props & (kLeftSemiring | kRightSemiring |
+ kCommutative | kIdempotent);
+ }
+
+ PowerWeight<W, n> Quantize(float delta = kDelta) const {
+ return TupleWeight<W, n>::Quantize(delta);
+ }
+
+ ReverseWeight Reverse() const {
+ return TupleWeight<W, n>::Reverse();
+ }
+};
+
+
+// Semiring plus operation
+template <class W, unsigned int n>
+inline PowerWeight<W, n> Plus(const PowerWeight<W, n> &w1,
+ const PowerWeight<W, n> &w2) {
+ PowerWeight<W, n> w;
+ for (size_t i = 0; i < n; ++i)
+ w.SetValue(i, Plus(w1.Value(i), w2.Value(i)));
+ return w;
+}
+
+// Semiring times operation
+template <class W, unsigned int n>
+inline PowerWeight<W, n> Times(const PowerWeight<W, n> &w1,
+ const PowerWeight<W, n> &w2) {
+ PowerWeight<W, n> w;
+ for (size_t i = 0; i < n; ++i)
+ w.SetValue(i, Times(w1.Value(i), w2.Value(i)));
+ return w;
+}
+
+// Semiring divide operation
+template <class W, unsigned int n>
+inline PowerWeight<W, n> Divide(const PowerWeight<W, n> &w1,
+ const PowerWeight<W, n> &w2,
+ DivideType type = DIVIDE_ANY) {
+ PowerWeight<W, n> w;
+ for (size_t i = 0; i < n; ++i)
+ w.SetValue(i, Divide(w1.Value(i), w2.Value(i), type));
+ return w;
+}
+
+// Semimodule left scalar product
+template <class W, unsigned int n>
+inline PowerWeight<W, n> Times(const W &s, const PowerWeight<W, n> &w) {
+ PowerWeight<W, n> sw;
+ for (size_t i = 0; i < n; ++i)
+ sw.SetValue(i, Times(s, w.Value(i)));
+ return w;
+}
+
+// Semimodule right scalar product
+template <class W, unsigned int n>
+inline PowerWeight<W, n> Times(const PowerWeight<W, n> &w, const W &s) {
+ PowerWeight<W, n> ws;
+ for (size_t i = 0; i < n; ++i)
+ ws.SetValue(i, Times(w.Value(i), s));
+ return w;
+}
+
+// Semimodule dot product
+template <class W, unsigned int n>
+inline W DotProduct(const PowerWeight<W, n> &w1,
+ const PowerWeight<W, n> &w2) {
+ W w = W::Zero();
+ for (size_t i = 0; i < n; ++i)
+ w = Plus(w, Times(w1.Value(i), w2.Value(i)));
+ return w;
+}
+
+
+} // namespace fst
+
+#endif // FST_LIB_POWER_WEIGHT_H__
diff --git a/src/include/fst/product-weight.h b/src/include/fst/product-weight.h
new file mode 100644
index 0000000..16dede8
--- /dev/null
+++ b/src/include/fst/product-weight.h
@@ -0,0 +1,115 @@
+// product-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Product weight set and associated semiring operation definitions.
+
+#ifndef FST_LIB_PRODUCT_WEIGHT_H__
+#define FST_LIB_PRODUCT_WEIGHT_H__
+
+#include <stack>
+#include <string>
+
+#include <fst/pair-weight.h>
+#include <fst/weight.h>
+
+
+namespace fst {
+
+// Product semiring: W1 * W2
+template<class W1, class W2>
+class ProductWeight : public PairWeight<W1, W2> {
+ public:
+ using PairWeight<W1, W2>::Zero;
+ using PairWeight<W1, W2>::One;
+ using PairWeight<W1, W2>::NoWeight;
+ using PairWeight<W1, W2>::Quantize;
+ using PairWeight<W1, W2>::Reverse;
+
+ typedef ProductWeight<typename W1::ReverseWeight, typename W2::ReverseWeight>
+ ReverseWeight;
+
+ ProductWeight() {}
+
+ ProductWeight(const PairWeight<W1, W2>& w) : PairWeight<W1, W2>(w) {}
+
+ ProductWeight(W1 w1, W2 w2) : PairWeight<W1, W2>(w1, w2) {}
+
+ static const ProductWeight<W1, W2> &Zero() {
+ static const ProductWeight<W1, W2> zero(PairWeight<W1, W2>::Zero());
+ return zero;
+ }
+
+ static const ProductWeight<W1, W2> &One() {
+ static const ProductWeight<W1, W2> one(PairWeight<W1, W2>::One());
+ return one;
+ }
+
+ static const ProductWeight<W1, W2> &NoWeight() {
+ static const ProductWeight<W1, W2> no_weight(
+ PairWeight<W1, W2>::NoWeight());
+ return no_weight;
+ }
+
+ static const string &Type() {
+ static const string type = W1::Type() + "_X_" + W2::Type();
+ return type;
+ }
+
+ static uint64 Properties() {
+ uint64 props1 = W1::Properties();
+ uint64 props2 = W2::Properties();
+ return props1 & props2 & (kLeftSemiring | kRightSemiring |
+ kCommutative | kIdempotent);
+ }
+
+ ProductWeight<W1, W2> Quantize(float delta = kDelta) const {
+ return PairWeight<W1, W2>::Quantize(delta);
+ }
+
+ ReverseWeight Reverse() const {
+ return PairWeight<W1, W2>::Reverse();
+ }
+
+
+};
+
+template <class W1, class W2>
+inline ProductWeight<W1, W2> Plus(const ProductWeight<W1, W2> &w,
+ const ProductWeight<W1, W2> &v) {
+ return ProductWeight<W1, W2>(Plus(w.Value1(), v.Value1()),
+ Plus(w.Value2(), v.Value2()));
+}
+
+template <class W1, class W2>
+inline ProductWeight<W1, W2> Times(const ProductWeight<W1, W2> &w,
+ const ProductWeight<W1, W2> &v) {
+ return ProductWeight<W1, W2>(Times(w.Value1(), v.Value1()),
+ Times(w.Value2(), v.Value2()));
+}
+
+template <class W1, class W2>
+inline ProductWeight<W1, W2> Divide(const ProductWeight<W1, W2> &w,
+ const ProductWeight<W1, W2> &v,
+ DivideType typ = DIVIDE_ANY) {
+ return ProductWeight<W1, W2>(Divide(w.Value1(), v.Value1(), typ),
+ Divide(w.Value2(), v.Value2(), typ));
+}
+
+} // namespace fst
+
+#endif // FST_LIB_PRODUCT_WEIGHT_H__
diff --git a/src/include/fst/project.h b/src/include/fst/project.h
new file mode 100644
index 0000000..07946c3
--- /dev/null
+++ b/src/include/fst/project.h
@@ -0,0 +1,148 @@
+// project.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Functions and classes to project an Fst on to its domain or range.
+
+#ifndef FST_LIB_PROJECT_H__
+#define FST_LIB_PROJECT_H__
+
+#include <fst/arc-map.h>
+#include <fst/mutable-fst.h>
+
+
+namespace fst {
+
+// This specifies whether to project on input or output.
+enum ProjectType { PROJECT_INPUT = 1, PROJECT_OUTPUT = 2 };
+
+
+// Mapper to implement projection per arc.
+template <class A> class ProjectMapper {
+ public:
+ explicit ProjectMapper(ProjectType project_type)
+ : project_type_(project_type) {}
+
+ A operator()(const A &arc) {
+ typename A::Label label = project_type_ == PROJECT_INPUT
+ ? arc.ilabel : arc.olabel;
+ return A(label, label, arc.weight, arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const {
+ return project_type_ == PROJECT_INPUT ? MAP_COPY_SYMBOLS :
+ MAP_CLEAR_SYMBOLS;
+ }
+
+ MapSymbolsAction OutputSymbolsAction() const {
+ return project_type_ == PROJECT_OUTPUT ? MAP_COPY_SYMBOLS :
+ MAP_CLEAR_SYMBOLS;
+ }
+
+ uint64 Properties(uint64 props) {
+ return ProjectProperties(props, project_type_ == PROJECT_INPUT);
+ }
+
+
+ private:
+ ProjectType project_type_;
+};
+
+
+// Projects an FST onto its domain or range by either copying each arcs'
+// input label to the output label or vice versa. This version modifies
+// its input.
+//
+// Complexity:
+// - Time: O(V + E)
+// - Space: O(1)
+// where V = # of states and E = # of arcs.
+template<class Arc> inline
+void Project(MutableFst<Arc> *fst, ProjectType project_type) {
+ ArcMap(fst, ProjectMapper<Arc>(project_type));
+ if (project_type == PROJECT_INPUT)
+ fst->SetOutputSymbols(fst->InputSymbols());
+ if (project_type == PROJECT_OUTPUT)
+ fst->SetInputSymbols(fst->OutputSymbols());
+}
+
+
+// Projects an FST onto its domain or range by either copying each arc's
+// input label to the output label or vice versa. This version is a delayed
+// Fst.
+//
+// Complexity:
+// - Time: O(v + e)
+// - Space: O(1)
+// where v = # of states visited, e = # of arcs visited. Constant
+// time and to visit an input state or arc is assumed and exclusive
+// of caching.
+template <class A>
+class ProjectFst : public ArcMapFst<A, A, ProjectMapper<A> > {
+ public:
+ typedef A Arc;
+ typedef ProjectMapper<A> C;
+ typedef ArcMapFstImpl< A, A, ProjectMapper<A> > Impl;
+ using ImplToFst<Impl>::GetImpl;
+
+ ProjectFst(const Fst<A> &fst, ProjectType project_type)
+ : ArcMapFst<A, A, C>(fst, C(project_type)) {
+ if (project_type == PROJECT_INPUT)
+ GetImpl()->SetOutputSymbols(fst.InputSymbols());
+ if (project_type == PROJECT_OUTPUT)
+ GetImpl()->SetInputSymbols(fst.OutputSymbols());
+ }
+
+ // See Fst<>::Copy() for doc.
+ ProjectFst(const ProjectFst<A> &fst, bool safe = false)
+ : ArcMapFst<A, A, C>(fst, safe) {}
+
+ // Get a copy of this ProjectFst. See Fst<>::Copy() for further doc.
+ virtual ProjectFst<A> *Copy(bool safe = false) const {
+ return new ProjectFst(*this, safe);
+ }
+};
+
+
+// Specialization for ProjectFst.
+template <class A>
+class StateIterator< ProjectFst<A> >
+ : public StateIterator< ArcMapFst<A, A, ProjectMapper<A> > > {
+ public:
+ explicit StateIterator(const ProjectFst<A> &fst)
+ : StateIterator< ArcMapFst<A, A, ProjectMapper<A> > >(fst) {}
+};
+
+
+// Specialization for ProjectFst.
+template <class A>
+class ArcIterator< ProjectFst<A> >
+ : public ArcIterator< ArcMapFst<A, A, ProjectMapper<A> > > {
+ public:
+ ArcIterator(const ProjectFst<A> &fst, typename A::StateId s)
+ : ArcIterator< ArcMapFst<A, A, ProjectMapper<A> > >(fst, s) {}
+};
+
+
+// Useful alias when using StdArc.
+typedef ProjectFst<StdArc> StdProjectFst;
+
+} // namespace fst
+
+#endif // FST_LIB_PROJECT_H__
diff --git a/src/include/fst/properties.h b/src/include/fst/properties.h
new file mode 100644
index 0000000..8fab16f
--- /dev/null
+++ b/src/include/fst/properties.h
@@ -0,0 +1,460 @@
+// properties.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: Michael Riley <riley@google.com>
+// \file
+// FST property bits.
+
+#ifndef FST_LIB_PROPERTIES_H__
+#define FST_LIB_PROPERTIES_H__
+
+#include <sys/types.h>
+#include <vector>
+using std::vector;
+
+#include <fst/compat.h>
+
+namespace fst {
+
+// The property bits here assert facts about an FST. If individual
+// bits are added, then the composite properties below, the property
+// functions and property names in properties.cc, and
+// TestProperties() in test-properties.h should be updated.
+
+//
+// BINARY PROPERTIES
+//
+// For each property below, there is a single bit. If it is set,
+// the property is true. If it is not set, the property is false.
+//
+
+// The Fst is an ExpandedFst
+const uint64 kExpanded = 0x0000000000000001ULL;
+
+// The Fst is a MutableFst
+const uint64 kMutable = 0x0000000000000002ULL;
+
+// An error was detected while constructing/using the FST
+const uint64 kError = 0x0000000000000004ULL;
+
+//
+// TRINARY PROPERTIES
+//
+// For each of these properties below there is a pair of property bits
+// - one positive and one negative. If the positive bit is set, the
+// property is true. If the negative bit is set, the property is
+// false. If neither is set, the property has unknown value. Both
+// should never be simultaneously set. The individual positive and
+// negative bit pairs should be adjacent with the positive bit
+// at an odd and lower position.
+
+// ilabel == olabel for each arc
+const uint64 kAcceptor = 0x0000000000010000ULL;
+// ilabel != olabel for some arc
+const uint64 kNotAcceptor = 0x0000000000020000ULL;
+
+// ilabels unique leaving each state
+const uint64 kIDeterministic = 0x0000000000040000ULL;
+// ilabels not unique leaving some state
+const uint64 kNonIDeterministic = 0x0000000000080000ULL;
+
+// olabels unique leaving each state
+const uint64 kODeterministic = 0x0000000000100000ULL;
+// olabels not unique leaving some state
+const uint64 kNonODeterministic = 0x0000000000200000ULL;
+
+// FST has input/output epsilons
+const uint64 kEpsilons = 0x0000000000400000ULL;
+// FST has no input/output epsilons
+const uint64 kNoEpsilons = 0x0000000000800000ULL;
+
+// FST has input epsilons
+const uint64 kIEpsilons = 0x0000000001000000ULL;
+// FST has no input epsilons
+const uint64 kNoIEpsilons = 0x0000000002000000ULL;
+
+// FST has output epsilons
+const uint64 kOEpsilons = 0x0000000004000000ULL;
+// FST has no output epsilons
+const uint64 kNoOEpsilons = 0x0000000008000000ULL;
+
+// ilabels sorted wrt < for each state
+const uint64 kILabelSorted = 0x0000000010000000ULL;
+// ilabels not sorted wrt < for some state
+const uint64 kNotILabelSorted = 0x0000000020000000ULL;
+
+// olabels sorted wrt < for each state
+const uint64 kOLabelSorted = 0x0000000040000000ULL;
+// olabels not sorted wrt < for some state
+const uint64 kNotOLabelSorted = 0x0000000080000000ULL;
+
+// Non-trivial arc or final weights
+const uint64 kWeighted = 0x0000000100000000ULL;
+// Only trivial arc and final weights
+const uint64 kUnweighted = 0x0000000200000000ULL;
+
+// FST has cycles
+const uint64 kCyclic = 0x0000000400000000ULL;
+// FST has no cycles
+const uint64 kAcyclic = 0x0000000800000000ULL;
+
+// FST has cycles containing the initial state
+const uint64 kInitialCyclic = 0x0000001000000000ULL;
+// FST has no cycles containing the initial state
+const uint64 kInitialAcyclic = 0x0000002000000000ULL;
+
+// FST is topologically sorted
+const uint64 kTopSorted = 0x0000004000000000ULL;
+// FST is not topologically sorted
+const uint64 kNotTopSorted = 0x0000008000000000ULL;
+
+// All states reachable from the initial state
+const uint64 kAccessible = 0x0000010000000000ULL;
+// Not all states reachable from the initial state
+const uint64 kNotAccessible = 0x0000020000000000ULL;
+
+// All states can reach a final state
+const uint64 kCoAccessible = 0x0000040000000000ULL;
+// Not all states can reach a final state
+const uint64 kNotCoAccessible = 0x0000080000000000ULL;
+
+// If NumStates() > 0, then state 0 is initial, state NumStates()-1 is
+// final, there is a transition from each non-final state i to
+// state i+1, and there are no other transitions.
+const uint64 kString = 0x0000100000000000ULL;
+
+// Not a string FST
+const uint64 kNotString = 0x0000200000000000ULL;
+
+//
+// COMPOSITE PROPERTIES
+//
+
+// Properties of an empty machine
+const uint64 kNullProperties
+ = kAcceptor | kIDeterministic | kODeterministic | kNoEpsilons |
+ kNoIEpsilons | kNoOEpsilons | kILabelSorted | kOLabelSorted |
+ kUnweighted | kAcyclic | kInitialAcyclic | kTopSorted |
+ kAccessible | kCoAccessible | kString;
+
+// Properties that are preserved when an FST is copied
+const uint64 kCopyProperties
+ = kError | kAcceptor | kNotAcceptor | kIDeterministic | kNonIDeterministic |
+ kODeterministic | kNonODeterministic | kEpsilons | kNoEpsilons |
+ kIEpsilons | kNoIEpsilons | kOEpsilons | kNoOEpsilons |
+ kILabelSorted | kNotILabelSorted | kOLabelSorted |
+ kNotOLabelSorted | kWeighted | kUnweighted | kCyclic | kAcyclic |
+ kInitialCyclic | kInitialAcyclic | kTopSorted | kNotTopSorted |
+ kAccessible | kNotAccessible | kCoAccessible | kNotCoAccessible |
+ kString | kNotString;
+
+// Properites that are intrinsic to the FST
+const uint64 kIntrinsicProperties
+ = kExpanded | kMutable | kAcceptor | kNotAcceptor | kIDeterministic |
+ kNonIDeterministic | kODeterministic | kNonODeterministic |
+ kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons |
+ kNoOEpsilons | kILabelSorted | kNotILabelSorted | kOLabelSorted |
+ kNotOLabelSorted | kWeighted | kUnweighted | kCyclic | kAcyclic |
+ kInitialCyclic | kInitialAcyclic | kTopSorted | kNotTopSorted |
+ kAccessible | kNotAccessible | kCoAccessible | kNotCoAccessible |
+ kString | kNotString;
+
+// Properites that are (potentially) extrinsic to the FST
+const uint64 kExtrinsicProperties = kError;
+
+// Properties that are preserved when an FST start state is set
+const uint64 kSetStartProperties
+ = kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
+ kIDeterministic | kNonIDeterministic | kODeterministic |
+ kNonODeterministic | kEpsilons | kNoEpsilons | kIEpsilons |
+ kNoIEpsilons | kOEpsilons | kNoOEpsilons | kILabelSorted |
+ kNotILabelSorted | kOLabelSorted | kNotOLabelSorted | kWeighted |
+ kUnweighted | kCyclic | kAcyclic | kTopSorted | kNotTopSorted |
+ kCoAccessible | kNotCoAccessible;
+
+// Properties that are preserved when an FST final weight is set
+const uint64 kSetFinalProperties
+ = kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
+ kIDeterministic | kNonIDeterministic | kODeterministic |
+ kNonODeterministic | kEpsilons | kNoEpsilons | kIEpsilons |
+ kNoIEpsilons | kOEpsilons | kNoOEpsilons | kILabelSorted |
+ kNotILabelSorted | kOLabelSorted | kNotOLabelSorted | kCyclic |
+ kAcyclic | kInitialCyclic | kInitialAcyclic | kTopSorted |
+ kNotTopSorted | kAccessible | kNotAccessible;
+
+// Properties that are preserved when an FST state is added
+const uint64 kAddStateProperties
+ = kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
+ kIDeterministic | kNonIDeterministic | kODeterministic |
+ kNonODeterministic | kEpsilons | kNoEpsilons | kIEpsilons |
+ kNoIEpsilons | kOEpsilons | kNoOEpsilons | kILabelSorted |
+ kNotILabelSorted | kOLabelSorted | kNotOLabelSorted | kWeighted |
+ kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
+ kInitialAcyclic | kTopSorted | kNotTopSorted | kNotAccessible |
+ kNotCoAccessible | kNotString;
+
+// Properties that are preserved when an FST arc is added
+const uint64 kAddArcProperties = kExpanded | kMutable | kError | kNotAcceptor |
+ kNonIDeterministic | kNonODeterministic | kEpsilons | kIEpsilons |
+ kOEpsilons | kNotILabelSorted | kNotOLabelSorted | kWeighted |
+ kCyclic | kInitialCyclic | kNotTopSorted | kAccessible | kCoAccessible;
+
+// Properties that are preserved when an FST arc is set
+const uint64 kSetArcProperties = kExpanded | kMutable | kError;
+
+// Properties that are preserved when FST states are deleted
+const uint64 kDeleteStatesProperties
+ = kExpanded | kMutable | kError | kAcceptor | kIDeterministic |
+ kODeterministic | kNoEpsilons | kNoIEpsilons | kNoOEpsilons |
+ kILabelSorted | kOLabelSorted | kUnweighted | kAcyclic |
+ kInitialAcyclic | kTopSorted;
+
+// Properties that are preserved when FST arcs are deleted
+const uint64 kDeleteArcsProperties
+ = kExpanded | kMutable | kError | kAcceptor | kIDeterministic |
+ kODeterministic | kNoEpsilons | kNoIEpsilons | kNoOEpsilons |
+ kILabelSorted | kOLabelSorted | kUnweighted | kAcyclic |
+ kInitialAcyclic | kTopSorted | kNotAccessible | kNotCoAccessible;
+
+// Properties that are preserved when an FST's states are reordered
+const uint64 kStateSortProperties = kExpanded | kMutable | kError | kAcceptor |
+ kNotAcceptor | kIDeterministic | kNonIDeterministic |
+ kODeterministic | kNonODeterministic | kEpsilons | kNoEpsilons |
+ kIEpsilons | kNoIEpsilons | kOEpsilons | kNoOEpsilons |
+ kILabelSorted | kNotILabelSorted | kOLabelSorted | kNotOLabelSorted
+ | kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
+ kInitialAcyclic | kAccessible | kNotAccessible | kCoAccessible |
+ kNotCoAccessible;
+
+// Properties that are preserved when an FST's arcs are reordered
+const uint64 kArcSortProperties =
+ kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kIDeterministic |
+ kNonIDeterministic | kODeterministic | kNonODeterministic |
+ kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons |
+ kNoOEpsilons | kWeighted | kUnweighted | kCyclic | kAcyclic |
+ kInitialCyclic | kInitialAcyclic | kTopSorted | kNotTopSorted |
+ kAccessible | kNotAccessible | kCoAccessible | kNotCoAccessible |
+ kString | kNotString;
+
+// Properties that are preserved when an FST's input labels are changed.
+const uint64 kILabelInvariantProperties =
+ kExpanded | kMutable | kError | kODeterministic | kNonODeterministic |
+ kOEpsilons | kNoOEpsilons | kOLabelSorted | kNotOLabelSorted |
+ kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
+ kInitialAcyclic | kTopSorted | kNotTopSorted | kAccessible |
+ kNotAccessible | kCoAccessible | kNotCoAccessible | kString | kNotString;
+
+// Properties that are preserved when an FST's output labels are changed.
+const uint64 kOLabelInvariantProperties =
+ kExpanded | kMutable | kError | kIDeterministic | kNonIDeterministic |
+ kIEpsilons | kNoIEpsilons | kILabelSorted | kNotILabelSorted |
+ kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
+ kInitialAcyclic | kTopSorted | kNotTopSorted | kAccessible |
+ kNotAccessible | kCoAccessible | kNotCoAccessible | kString | kNotString;
+
+// Properties that are preserved when an FST's weights are changed.
+// This assumes that the set of states that are non-final is not changed.
+const uint64 kWeightInvariantProperties =
+ kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kIDeterministic |
+ kNonIDeterministic | kODeterministic | kNonODeterministic |
+ kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons |
+ kNoOEpsilons | kILabelSorted | kNotILabelSorted | kOLabelSorted |
+ kNotOLabelSorted | kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
+ kTopSorted | kNotTopSorted | kAccessible | kNotAccessible | kCoAccessible |
+ kNotCoAccessible | kString | kNotString;
+
+// Properties that are preserved when a superfinal state is added
+// and an FSTs final weights are directed to it via new transitions.
+const uint64 kAddSuperFinalProperties = kExpanded | kMutable | kError |
+ kAcceptor | kNotAcceptor | kNonIDeterministic | kNonODeterministic |
+ kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted | kNotOLabelSorted |
+ kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
+ kInitialAcyclic | kNotTopSorted | kNotAccessible | kCoAccessible |
+ kNotCoAccessible | kNotString;
+
+// Properties that are preserved when a superfinal state is removed
+// and the epsilon transitions directed to it are made final weights.
+const uint64 kRmSuperFinalProperties = kExpanded | kMutable | kError |
+ kAcceptor | kNotAcceptor | kIDeterministic | kODeterministic |
+ kNoEpsilons | kNoIEpsilons | kNoOEpsilons | kILabelSorted | kOLabelSorted |
+ kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
+ kInitialAcyclic | kTopSorted | kAccessible | kCoAccessible |
+ kNotCoAccessible | kString;
+
+// All binary properties
+const uint64 kBinaryProperties = 0x0000000000000007ULL;
+
+// All trinary properties
+const uint64 kTrinaryProperties = 0x00003fffffff0000ULL;
+
+//
+// COMPUTED PROPERTIES
+//
+
+// 1st bit of trinary properties
+const uint64 kPosTrinaryProperties =
+ kTrinaryProperties & 0x5555555555555555ULL;
+
+// 2nd bit of trinary properties
+const uint64 kNegTrinaryProperties =
+ kTrinaryProperties & 0xaaaaaaaaaaaaaaaaULL;
+
+// All properties
+const uint64 kFstProperties = kBinaryProperties | kTrinaryProperties;
+
+//
+// PROPERTY FUNCTIONS and STRING NAMES (defined in properties.cc)
+//
+
+// Below are functions for getting property bit vectors when executing
+// mutating fst operations.
+inline uint64 SetStartProperties(uint64 inprops);
+template <typename Weight>
+uint64 SetFinalProperties(uint64 inprops, Weight old_weight,
+ Weight new_weight);
+inline uint64 AddStateProperties(uint64 inprops);
+template <typename A>
+uint64 AddArcProperties(uint64 inprops, typename A::StateId s, const A &arc,
+ const A *prev_arc);
+inline uint64 DeleteStatesProperties(uint64 inprops);
+inline uint64 DeleteAllStatesProperties(uint64 inprops, uint64 staticProps);
+inline uint64 DeleteArcsProperties(uint64 inprops);
+
+uint64 ClosureProperties(uint64 inprops, bool star, bool delayed = false);
+uint64 ComplementProperties(uint64 inprops);
+uint64 ComposeProperties(uint64 inprops1, uint64 inprops2);
+uint64 ConcatProperties(uint64 inprops1, uint64 inprops2,
+ bool delayed = false);
+uint64 DeterminizeProperties(uint64 inprops, bool has_subsequential_label);
+uint64 FactorWeightProperties(uint64 inprops);
+uint64 InvertProperties(uint64 inprops);
+uint64 ProjectProperties(uint64 inprops, bool project_input);
+uint64 RandGenProperties(uint64 inprops, bool weighted);
+uint64 RelabelProperties(uint64 inprops);
+uint64 ReplaceProperties(const vector<uint64>& inprops,
+ ssize_t root,
+ bool epsilon_on_replace,
+ bool no_empty_fst);
+uint64 ReverseProperties(uint64 inprops);
+uint64 ReweightProperties(uint64 inprops);
+uint64 RmEpsilonProperties(uint64 inprops, bool delayed = false);
+uint64 ShortestPathProperties(uint64 props);
+uint64 SynchronizeProperties(uint64 inprops);
+uint64 UnionProperties(uint64 inprops1, uint64 inprops2, bool delayed = false);
+
+// Definitions of inlined functions.
+
+uint64 SetStartProperties(uint64 inprops) {
+ uint64 outprops = inprops & kSetStartProperties;
+ if (inprops & kAcyclic) {
+ outprops |= kInitialAcyclic;
+ }
+ return outprops;
+}
+
+uint64 AddStateProperties(uint64 inprops) {
+ return inprops & kAddStateProperties;
+}
+
+uint64 DeleteStatesProperties(uint64 inprops) {
+ return inprops & kDeleteStatesProperties;
+}
+
+uint64 DeleteAllStatesProperties(uint64 inprops, uint64 staticprops) {
+ uint64 outprops = inprops & kError;
+ return outprops | kNullProperties | staticprops;
+}
+
+uint64 DeleteArcsProperties(uint64 inprops) {
+ return inprops & kDeleteArcsProperties;
+}
+
+// Definitions of template functions.
+
+//
+template <typename Weight>
+uint64 SetFinalProperties(uint64 inprops, Weight old_weight,
+ Weight new_weight) {
+ uint64 outprops = inprops;
+ if (old_weight != Weight::Zero() && old_weight != Weight::One()) {
+ outprops &= ~kWeighted;
+ }
+ if (new_weight != Weight::Zero() && new_weight != Weight::One()) {
+ outprops |= kWeighted;
+ outprops &= ~kUnweighted;
+ }
+ outprops &= kSetFinalProperties | kWeighted | kUnweighted;
+ return outprops;
+}
+
+/// Gets the properties for the MutableFst::AddArc method.
+///
+/// \param inprops the current properties of the fst
+/// \param s the id of the state to which an arc is being added
+/// \param arc the arc being added to the state with the specified id
+/// \param prev_arc the previously-added (or "last") arc of state s, or NULL if
+/// s currently has no arcs
+template <typename A>
+uint64 AddArcProperties(uint64 inprops, typename A::StateId s,
+ const A &arc, const A *prev_arc) {
+ uint64 outprops = inprops;
+ if (arc.ilabel != arc.olabel) {
+ outprops |= kNotAcceptor;
+ outprops &= ~kAcceptor;
+ }
+ if (arc.ilabel == 0) {
+ outprops |= kIEpsilons;
+ outprops &= ~kNoIEpsilons;
+ if (arc.olabel == 0) {
+ outprops |= kEpsilons;
+ outprops &= ~kNoEpsilons;
+ }
+ }
+ if (arc.olabel == 0) {
+ outprops |= kOEpsilons;
+ outprops &= ~kNoOEpsilons;
+ }
+ if (prev_arc != 0) {
+ if (prev_arc->ilabel > arc.ilabel) {
+ outprops |= kNotILabelSorted;
+ outprops &= ~kILabelSorted;
+ }
+ if (prev_arc->olabel > arc.olabel) {
+ outprops |= kNotOLabelSorted;
+ outprops &= ~kOLabelSorted;
+ }
+ }
+ if (arc.weight != A::Weight::Zero() && arc.weight != A::Weight::One()) {
+ outprops |= kWeighted;
+ outprops &= ~kUnweighted;
+ }
+ if (arc.nextstate <= s) {
+ outprops |= kNotTopSorted;
+ outprops &= ~kTopSorted;
+ }
+ outprops &= kAddArcProperties | kAcceptor |
+ kNoEpsilons | kNoIEpsilons | kNoOEpsilons |
+ kILabelSorted | kOLabelSorted | kUnweighted | kTopSorted;
+ if (outprops & kTopSorted) {
+ outprops |= kAcyclic | kInitialAcyclic;
+ }
+ return outprops;
+}
+
+extern const char *PropertyNames[];
+
+} // namespace fst
+
+#endif // FST_LIB_PROPERTIES_H__
diff --git a/src/include/fst/prune.h b/src/include/fst/prune.h
new file mode 100644
index 0000000..5ea5b4d
--- /dev/null
+++ b/src/include/fst/prune.h
@@ -0,0 +1,339 @@
+// prune.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Functions implementing pruning.
+
+#ifndef FST_LIB_PRUNE_H__
+#define FST_LIB_PRUNE_H__
+
+#include <vector>
+using std::vector;
+
+#include <fst/arcfilter.h>
+#include <fst/heap.h>
+#include <fst/shortest-distance.h>
+
+
+namespace fst {
+
+template <class A, class ArcFilter>
+class PruneOptions {
+ public:
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ // Pruning weight threshold.
+ Weight weight_threshold;
+ // Pruning state threshold.
+ StateId state_threshold;
+ // Arc filter.
+ ArcFilter filter;
+ // If non-zero, passes in pre-computed shortest distance to final states.
+ const vector<Weight> *distance;
+ // Determines the degree of convergence required when computing shortest
+ // distances.
+ float delta;
+
+ explicit PruneOptions(const Weight& w, StateId s, ArcFilter f,
+ vector<Weight> *d = 0, float e = kDelta)
+ : weight_threshold(w),
+ state_threshold(s),
+ filter(f),
+ distance(d),
+ delta(e) {}
+ private:
+ PruneOptions(); // disallow
+};
+
+
+template <class S, class W>
+class PruneCompare {
+ public:
+ typedef S StateId;
+ typedef W Weight;
+
+ PruneCompare(const vector<Weight> &idistance,
+ const vector<Weight> &fdistance)
+ : idistance_(idistance), fdistance_(fdistance) {}
+
+ bool operator()(const StateId x, const StateId y) const {
+ Weight wx = Times(x < idistance_.size() ? idistance_[x] : Weight::Zero(),
+ x < fdistance_.size() ? fdistance_[x] : Weight::Zero());
+ Weight wy = Times(y < idistance_.size() ? idistance_[y] : Weight::Zero(),
+ y < fdistance_.size() ? fdistance_[y] : Weight::Zero());
+ return less_(wx, wy);
+ }
+
+ private:
+ const vector<Weight> &idistance_;
+ const vector<Weight> &fdistance_;
+ NaturalLess<Weight> less_;
+};
+
+
+
+// Pruning algorithm: this version modifies its input and it takes an
+// options class as an argment. Delete states and arcs in 'fst' that
+// do not belong to a successful path whose weight is no more than
+// the weight of the shortest path Times() 'opts.weight_threshold'.
+// When 'opts.state_threshold != kNoStateId', the resulting transducer
+// will restricted further to have at most 'opts.state_threshold'
+// states. Weights need to be commutative and have the path
+// property. The weight 'w' of any cycle needs to be bounded, i.e.,
+// 'Plus(w, W::One()) = One()'.
+template <class Arc, class ArcFilter>
+void Prune(MutableFst<Arc> *fst,
+ const PruneOptions<Arc, ArcFilter> &opts) {
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ if ((Weight::Properties() & (kPath | kCommutative))
+ != (kPath | kCommutative)) {
+ FSTERROR() << "Prune: Weight needs to have the path property and"
+ << " be commutative: "
+ << Weight::Type();
+ fst->SetProperties(kError, kError);
+ return;
+ }
+ StateId ns = fst->NumStates();
+ if (ns == 0) return;
+ vector<Weight> idistance(ns, Weight::Zero());
+ vector<Weight> tmp;
+ if (!opts.distance) {
+ tmp.reserve(ns);
+ ShortestDistance(*fst, &tmp, true, opts.delta);
+ }
+ const vector<Weight> *fdistance = opts.distance ? opts.distance : &tmp;
+
+ if ((opts.state_threshold == 0) ||
+ (fdistance->size() <= fst->Start()) ||
+ ((*fdistance)[fst->Start()] == Weight::Zero())) {
+ fst->DeleteStates();
+ return;
+ }
+ PruneCompare<StateId, Weight> compare(idistance, *fdistance);
+ Heap< StateId, PruneCompare<StateId, Weight>, false> heap(compare);
+ vector<bool> visited(ns, false);
+ vector<size_t> enqueued(ns, kNoKey);
+ vector<StateId> dead;
+ dead.push_back(fst->AddState());
+ NaturalLess<Weight> less;
+ Weight limit = Times((*fdistance)[fst->Start()], opts.weight_threshold);
+
+ StateId num_visited = 0;
+ StateId s = fst->Start();
+ if (!less(limit, (*fdistance)[s])) {
+ idistance[s] = Weight::One();
+ enqueued[s] = heap.Insert(s);
+ ++num_visited;
+ }
+
+ while (!heap.Empty()) {
+ s = heap.Top();
+ heap.Pop();
+ enqueued[s] = kNoKey;
+ visited[s] = true;
+ if (less(limit, Times(idistance[s], fst->Final(s))))
+ fst->SetFinal(s, Weight::Zero());
+ for (MutableArcIterator< MutableFst<Arc> > ait(fst, s);
+ !ait.Done();
+ ait.Next()) {
+ Arc arc = ait.Value();
+ if (!opts.filter(arc)) continue;
+ Weight weight = Times(Times(idistance[s], arc.weight),
+ arc.nextstate < fdistance->size()
+ ? (*fdistance)[arc.nextstate]
+ : Weight::Zero());
+ if (less(limit, weight)) {
+ arc.nextstate = dead[0];
+ ait.SetValue(arc);
+ continue;
+ }
+ if (less(Times(idistance[s], arc.weight), idistance[arc.nextstate]))
+ idistance[arc.nextstate] = Times(idistance[s], arc.weight);
+ if (visited[arc.nextstate]) continue;
+ if ((opts.state_threshold != kNoStateId) &&
+ (num_visited >= opts.state_threshold))
+ continue;
+ if (enqueued[arc.nextstate] == kNoKey) {
+ enqueued[arc.nextstate] = heap.Insert(arc.nextstate);
+ ++num_visited;
+ } else {
+ heap.Update(enqueued[arc.nextstate], arc.nextstate);
+ }
+ }
+ }
+ for (size_t i = 0; i < visited.size(); ++i)
+ if (!visited[i]) dead.push_back(i);
+ fst->DeleteStates(dead);
+}
+
+
+// Pruning algorithm: this version modifies its input and simply takes
+// the pruning threshold as an argument. Delete states and arcs in
+// 'fst' that do not belong to a successful path whose weight is no
+// more than the weight of the shortest path Times()
+// 'weight_threshold'. When 'state_threshold != kNoStateId', the
+// resulting transducer will be restricted further to have at most
+// 'opts.state_threshold' states. Weights need to be commutative and
+// have the path property. The weight 'w' of any cycle needs to be
+// bounded, i.e., 'Plus(w, W::One()) = One()'.
+template <class Arc>
+void Prune(MutableFst<Arc> *fst,
+ typename Arc::Weight weight_threshold,
+ typename Arc::StateId state_threshold = kNoStateId,
+ double delta = kDelta) {
+ PruneOptions<Arc, AnyArcFilter<Arc> > opts(weight_threshold, state_threshold,
+ AnyArcFilter<Arc>(), 0, delta);
+ Prune(fst, opts);
+}
+
+
+// Pruning algorithm: this version writes the pruned input Fst to an
+// output MutableFst and it takes an options class as an argument.
+// 'ofst' contains states and arcs that belong to a successful path in
+// 'ifst' whose weight is no more than the weight of the shortest path
+// Times() 'opts.weight_threshold'. When 'opts.state_threshold !=
+// kNoStateId', 'ofst' will be restricted further to have at most
+// 'opts.state_threshold' states. Weights need to be commutative and
+// have the path property. The weight 'w' of any cycle needs to be
+// bounded, i.e., 'Plus(w, W::One()) = One()'.
+template <class Arc, class ArcFilter>
+void Prune(const Fst<Arc> &ifst,
+ MutableFst<Arc> *ofst,
+ const PruneOptions<Arc, ArcFilter> &opts) {
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ if ((Weight::Properties() & (kPath | kCommutative))
+ != (kPath | kCommutative)) {
+ FSTERROR() << "Prune: Weight needs to have the path property and"
+ << " be commutative: "
+ << Weight::Type();
+ ofst->SetProperties(kError, kError);
+ return;
+ }
+ ofst->DeleteStates();
+ ofst->SetInputSymbols(ifst.InputSymbols());
+ ofst->SetOutputSymbols(ifst.OutputSymbols());
+ if (ifst.Start() == kNoStateId)
+ return;
+ NaturalLess<Weight> less;
+ if (less(opts.weight_threshold, Weight::One()) ||
+ (opts.state_threshold == 0))
+ return;
+ vector<Weight> idistance;
+ vector<Weight> tmp;
+ if (!opts.distance)
+ ShortestDistance(ifst, &tmp, true, opts.delta);
+ const vector<Weight> *fdistance = opts.distance ? opts.distance : &tmp;
+
+ if ((fdistance->size() <= ifst.Start()) ||
+ ((*fdistance)[ifst.Start()] == Weight::Zero())) {
+ return;
+ }
+ PruneCompare<StateId, Weight> compare(idistance, *fdistance);
+ Heap< StateId, PruneCompare<StateId, Weight>, false> heap(compare);
+ vector<StateId> copy;
+ vector<size_t> enqueued;
+ vector<bool> visited;
+
+ StateId s = ifst.Start();
+ Weight limit = Times(s < fdistance->size() ? (*fdistance)[s] : Weight::Zero(),
+ opts.weight_threshold);
+ while (copy.size() <= s)
+ copy.push_back(kNoStateId);
+ copy[s] = ofst->AddState();
+ ofst->SetStart(copy[s]);
+ while (idistance.size() <= s)
+ idistance.push_back(Weight::Zero());
+ idistance[s] = Weight::One();
+ while (enqueued.size() <= s) {
+ enqueued.push_back(kNoKey);
+ visited.push_back(false);
+ }
+ enqueued[s] = heap.Insert(s);
+
+ while (!heap.Empty()) {
+ s = heap.Top();
+ heap.Pop();
+ enqueued[s] = kNoKey;
+ visited[s] = true;
+ if (!less(limit, Times(idistance[s], ifst.Final(s))))
+ ofst->SetFinal(copy[s], ifst.Final(s));
+ for (ArcIterator< Fst<Arc> > ait(ifst, s);
+ !ait.Done();
+ ait.Next()) {
+ const Arc &arc = ait.Value();
+ if (!opts.filter(arc)) continue;
+ Weight weight = Times(Times(idistance[s], arc.weight),
+ arc.nextstate < fdistance->size()
+ ? (*fdistance)[arc.nextstate]
+ : Weight::Zero());
+ if (less(limit, weight)) continue;
+ if ((opts.state_threshold != kNoStateId) &&
+ (ofst->NumStates() >= opts.state_threshold))
+ continue;
+ while (idistance.size() <= arc.nextstate)
+ idistance.push_back(Weight::Zero());
+ if (less(Times(idistance[s], arc.weight),
+ idistance[arc.nextstate]))
+ idistance[arc.nextstate] = Times(idistance[s], arc.weight);
+ while (copy.size() <= arc.nextstate)
+ copy.push_back(kNoStateId);
+ if (copy[arc.nextstate] == kNoStateId)
+ copy[arc.nextstate] = ofst->AddState();
+ ofst->AddArc(copy[s], Arc(arc.ilabel, arc.olabel, arc.weight,
+ copy[arc.nextstate]));
+ while (enqueued.size() <= arc.nextstate) {
+ enqueued.push_back(kNoKey);
+ visited.push_back(false);
+ }
+ if (visited[arc.nextstate]) continue;
+ if (enqueued[arc.nextstate] == kNoKey)
+ enqueued[arc.nextstate] = heap.Insert(arc.nextstate);
+ else
+ heap.Update(enqueued[arc.nextstate], arc.nextstate);
+ }
+ }
+}
+
+
+// Pruning algorithm: this version writes the pruned input Fst to an
+// output MutableFst and simply takes the pruning threshold as an
+// argument. 'ofst' contains states and arcs that belong to a
+// successful path in 'ifst' whose weight is no more than
+// the weight of the shortest path Times() 'weight_threshold'. When
+// 'state_threshold != kNoStateId', 'ofst' will be restricted further
+// to have at most 'opts.state_threshold' states. Weights need to be
+// commutative and have the path property. The weight 'w' of any cycle
+// needs to be bounded, i.e., 'Plus(w, W::One()) = W::One()'.
+template <class Arc>
+void Prune(const Fst<Arc> &ifst,
+ MutableFst<Arc> *ofst,
+ typename Arc::Weight weight_threshold,
+ typename Arc::StateId state_threshold = kNoStateId,
+ float delta = kDelta) {
+ PruneOptions<Arc, AnyArcFilter<Arc> > opts(weight_threshold, state_threshold,
+ AnyArcFilter<Arc>(), 0, delta);
+ Prune(ifst, ofst, opts);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_PRUNE_H_
diff --git a/src/include/fst/push.h b/src/include/fst/push.h
new file mode 100644
index 0000000..1f7a8fa
--- /dev/null
+++ b/src/include/fst/push.h
@@ -0,0 +1,175 @@
+// push.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Class to reweight/push an FST.
+
+#ifndef FST_LIB_PUSH_H__
+#define FST_LIB_PUSH_H__
+
+#include <vector>
+using std::vector;
+
+#include <fst/factor-weight.h>
+#include <fst/fst.h>
+#include <fst/arc-map.h>
+#include <fst/reweight.h>
+#include <fst/shortest-distance.h>
+
+
+namespace fst {
+
+// Private helper functions for Push
+namespace internal {
+
+// Compute the total weight (sum of the weights of all accepting paths) from
+// the output of ShortestDistance. 'distance' is the shortest distance from the
+// initial state when 'reverse == false' and to the final states when
+// 'reverse == true'.
+template <class Arc>
+typename Arc::Weight ComputeTotalWeight(
+ const Fst<Arc> &fst,
+ const vector<typename Arc::Weight> &distance,
+ bool reverse) {
+ if (reverse)
+ return fst.Start() < distance.size() ?
+ distance[fst.Start()] : Arc::Weight::Zero();
+
+ typename Arc::Weight sum = Arc::Weight::Zero();
+ for (typename Arc::StateId s = 0; s < distance.size(); ++s)
+ sum = Plus(sum, Times(distance[s], fst.Final(s)));
+ return sum;
+}
+
+// Divide the weight of every accepting path by 'w'. The weight 'w' is
+// divided at the final states if 'at_final == true' and at the
+// initial state otherwise.
+template <class Arc>
+void RemoveWeight(MutableFst<Arc> *fst, typename Arc::Weight w, bool at_final) {
+ if ((w == Arc::Weight::One()) || (w == Arc::Weight::Zero()))
+ return;
+
+ if (at_final) {
+ // Remove 'w' from the final states
+ for (StateIterator< MutableFst<Arc> > sit(*fst);
+ !sit.Done();
+ sit.Next())
+ fst->SetFinal(sit.Value(),
+ Divide(fst->Final(sit.Value()), w, DIVIDE_RIGHT));
+ } else { // at_final == false
+ // Remove 'w' from the initial state
+ typename Arc::StateId start = fst->Start();
+ for (MutableArcIterator<MutableFst<Arc> > ait(fst, start);
+ !ait.Done();
+ ait.Next()) {
+ Arc arc = ait.Value();
+ arc.weight = Divide(arc.weight, w, DIVIDE_LEFT);
+ ait.SetValue(arc);
+ }
+ fst->SetFinal(start, Divide(fst->Final(start), w, DIVIDE_LEFT));
+ }
+}
+} // namespace internal
+
+// Pushes the weights in FST in the direction defined by TYPE. If
+// pushing towards the initial state, the sum of the weight of the
+// outgoing transitions and final weight at a non-initial state is
+// equal to One() in the resulting machine. If pushing towards the
+// final state, the same property holds on the reverse machine.
+//
+// Weight needs to be left distributive when pushing towards the
+// initial state and right distributive when pushing towards the final
+// states.
+template <class Arc>
+void Push(MutableFst<Arc> *fst,
+ ReweightType type,
+ float delta = kDelta,
+ bool remove_total_weight = false) {
+ vector<typename Arc::Weight> distance;
+ ShortestDistance(*fst, &distance, type == REWEIGHT_TO_INITIAL, delta);
+ typename Arc::Weight total_weight = Arc::Weight::One();
+ if (remove_total_weight)
+ total_weight = internal::ComputeTotalWeight(*fst, distance,
+ type == REWEIGHT_TO_INITIAL);
+ Reweight(fst, distance, type);
+ if (remove_total_weight)
+ internal::RemoveWeight(fst, total_weight, type == REWEIGHT_TO_FINAL);
+}
+
+const uint32 kPushWeights = 0x0001;
+const uint32 kPushLabels = 0x0002;
+const uint32 kPushRemoveTotalWeight = 0x0004;
+const uint32 kPushRemoveCommonAffix = 0x0008;
+
+// OFST obtained from IFST by pushing weights and/or labels according
+// to PTYPE in the direction defined by RTYPE. Weight needs to be
+// left distributive when pushing weights towards the initial state
+// and right distributive when pushing weights towards the final
+// states.
+template <class Arc, ReweightType rtype>
+void Push(const Fst<Arc> &ifst,
+ MutableFst<Arc> *ofst,
+ uint32 ptype,
+ float delta = kDelta) {
+
+ if ((ptype & (kPushWeights | kPushLabels)) == kPushWeights) {
+ *ofst = ifst;
+ Push(ofst, rtype, delta, ptype & kPushRemoveTotalWeight);
+ } else if (ptype & kPushLabels) {
+ const StringType stype = rtype == REWEIGHT_TO_INITIAL
+ ? STRING_LEFT
+ : STRING_RIGHT;
+ vector<typename GallicArc<Arc, stype>::Weight> gdistance;
+ VectorFst<GallicArc<Arc, stype> > gfst;
+ ArcMap(ifst, &gfst, ToGallicMapper<Arc, stype>());
+ if (ptype & kPushWeights ) {
+ ShortestDistance(gfst, &gdistance, rtype == REWEIGHT_TO_INITIAL, delta);
+ } else {
+ ArcMapFst<Arc, Arc, RmWeightMapper<Arc> >
+ uwfst(ifst, RmWeightMapper<Arc>());
+ ArcMapFst<Arc, GallicArc<Arc, stype>, ToGallicMapper<Arc, stype> >
+ guwfst(uwfst, ToGallicMapper<Arc, stype>());
+ ShortestDistance(guwfst, &gdistance, rtype == REWEIGHT_TO_INITIAL, delta);
+ }
+ typename GallicArc<Arc, stype>::Weight total_weight =
+ GallicArc<Arc, stype>::Weight::One();
+ if (ptype & (kPushRemoveTotalWeight | kPushRemoveCommonAffix)) {
+ total_weight = internal::ComputeTotalWeight(
+ gfst, gdistance, rtype == REWEIGHT_TO_INITIAL);
+ total_weight = typename GallicArc<Arc, stype>::Weight(
+ ptype & kPushRemoveCommonAffix ? total_weight.Value1()
+ : StringWeight<typename Arc::Label, stype>::One(),
+ ptype & kPushRemoveTotalWeight ? total_weight.Value2()
+ : Arc::Weight::One());
+ }
+ Reweight(&gfst, gdistance, rtype);
+ if (ptype & (kPushRemoveTotalWeight | kPushRemoveCommonAffix))
+ internal::RemoveWeight(&gfst, total_weight, rtype == REWEIGHT_TO_FINAL);
+ FactorWeightFst< GallicArc<Arc, stype>, GallicFactor<typename Arc::Label,
+ typename Arc::Weight, stype> > fwfst(gfst);
+ ArcMap(fwfst, ofst, FromGallicMapper<Arc, stype>());
+ ofst->SetOutputSymbols(ifst.OutputSymbols());
+ } else {
+ LOG(WARNING) << "Push: pushing type is set to 0: "
+ << "pushing neither labels nor weights.";
+ *ofst = ifst;
+ }
+}
+
+} // namespace fst
+
+#endif /* FST_LIB_PUSH_H_ */
diff --git a/src/include/fst/queue.h b/src/include/fst/queue.h
new file mode 100644
index 0000000..707dffc
--- /dev/null
+++ b/src/include/fst/queue.h
@@ -0,0 +1,889 @@
+// queue.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Functions and classes for various Fst state queues with
+// a unified interface.
+
+#ifndef FST_LIB_QUEUE_H__
+#define FST_LIB_QUEUE_H__
+
+#include <deque>
+#include <vector>
+using std::vector;
+
+#include <fst/arcfilter.h>
+#include <fst/connect.h>
+#include <fst/heap.h>
+#include <fst/topsort.h>
+
+
+namespace fst {
+
+// template <class S>
+// class Queue {
+// public:
+// typedef typename S StateId;
+//
+// // Ctr: may need args (e.g., Fst, comparator) for some queues
+// Queue(...);
+// // Returns the head of the queue
+// StateId Head() const;
+// // Inserts a state
+// void Enqueue(StateId s);
+// // Removes the head of the queue
+// void Dequeue();
+// // Updates ordering of state s when weight changes, if necessary
+// void Update(StateId s);
+// // Does the queue contain no elements?
+// bool Empty() const;
+// // Remove all states from queue
+// void Clear();
+// };
+
+// State queue types.
+enum QueueType {
+ TRIVIAL_QUEUE = 0, // Single state queue
+ FIFO_QUEUE = 1, // First-in, first-out queue
+ LIFO_QUEUE = 2, // Last-in, first-out queue
+ SHORTEST_FIRST_QUEUE = 3, // Shortest-first queue
+ TOP_ORDER_QUEUE = 4, // Topologically-ordered queue
+ STATE_ORDER_QUEUE = 5, // State-ID ordered queue
+ SCC_QUEUE = 6, // Component graph top-ordered meta-queue
+ AUTO_QUEUE = 7, // Auto-selected queue
+ OTHER_QUEUE = 8
+ };
+
+
+// QueueBase, templated on the StateId, is the base class shared by the
+// queues considered by AutoQueue.
+template <class S>
+class QueueBase {
+ public:
+ typedef S StateId;
+
+ QueueBase(QueueType type) : queue_type_(type), error_(false) {}
+ virtual ~QueueBase() {}
+ StateId Head() const { return Head_(); }
+ void Enqueue(StateId s) { Enqueue_(s); }
+ void Dequeue() { Dequeue_(); }
+ void Update(StateId s) { Update_(s); }
+ bool Empty() const { return Empty_(); }
+ void Clear() { Clear_(); }
+ QueueType Type() { return queue_type_; }
+ bool Error() const { return error_; }
+ void SetError(bool error) { error_ = error; }
+
+ private:
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual StateId Head_() const = 0;
+ virtual void Enqueue_(StateId s) = 0;
+ virtual void Dequeue_() = 0;
+ virtual void Update_(StateId s) = 0;
+ virtual bool Empty_() const = 0;
+ virtual void Clear_() = 0;
+
+ QueueType queue_type_;
+ bool error_;
+};
+
+
+// Trivial queue discipline, templated on the StateId. You may enqueue
+// at most one state at a time. It is used for strongly connected components
+// with only one state and no self loops.
+template <class S>
+class TrivialQueue : public QueueBase<S> {
+public:
+ typedef S StateId;
+
+ TrivialQueue() : QueueBase<S>(TRIVIAL_QUEUE), front_(kNoStateId) {}
+ StateId Head() const { return front_; }
+ void Enqueue(StateId s) { front_ = s; }
+ void Dequeue() { front_ = kNoStateId; }
+ void Update(StateId s) {}
+ bool Empty() const { return front_ == kNoStateId; }
+ void Clear() { front_ = kNoStateId; }
+
+
+private:
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual StateId Head_() const { return Head(); }
+ virtual void Enqueue_(StateId s) { Enqueue(s); }
+ virtual void Dequeue_() { Dequeue(); }
+ virtual void Update_(StateId s) { Update(s); }
+ virtual bool Empty_() const { return Empty(); }
+ virtual void Clear_() { return Clear(); }
+
+ StateId front_;
+};
+
+
+// First-in, first-out queue discipline, templated on the StateId.
+template <class S>
+class FifoQueue : public QueueBase<S>, public deque<S> {
+ public:
+ using deque<S>::back;
+ using deque<S>::push_front;
+ using deque<S>::pop_back;
+ using deque<S>::empty;
+ using deque<S>::clear;
+
+ typedef S StateId;
+
+ FifoQueue() : QueueBase<S>(FIFO_QUEUE) {}
+ StateId Head() const { return back(); }
+ void Enqueue(StateId s) { push_front(s); }
+ void Dequeue() { pop_back(); }
+ void Update(StateId s) {}
+ bool Empty() const { return empty(); }
+ void Clear() { clear(); }
+
+ private:
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual StateId Head_() const { return Head(); }
+ virtual void Enqueue_(StateId s) { Enqueue(s); }
+ virtual void Dequeue_() { Dequeue(); }
+ virtual void Update_(StateId s) { Update(s); }
+ virtual bool Empty_() const { return Empty(); }
+ virtual void Clear_() { return Clear(); }
+};
+
+
+// Last-in, first-out queue discipline, templated on the StateId.
+template <class S>
+class LifoQueue : public QueueBase<S>, public deque<S> {
+ public:
+ using deque<S>::front;
+ using deque<S>::push_front;
+ using deque<S>::pop_front;
+ using deque<S>::empty;
+ using deque<S>::clear;
+
+ typedef S StateId;
+
+ LifoQueue() : QueueBase<S>(LIFO_QUEUE) {}
+ StateId Head() const { return front(); }
+ void Enqueue(StateId s) { push_front(s); }
+ void Dequeue() { pop_front(); }
+ void Update(StateId s) {}
+ bool Empty() const { return empty(); }
+ void Clear() { clear(); }
+
+ private:
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual StateId Head_() const { return Head(); }
+ virtual void Enqueue_(StateId s) { Enqueue(s); }
+ virtual void Dequeue_() { Dequeue(); }
+ virtual void Update_(StateId s) { Update(s); }
+ virtual bool Empty_() const { return Empty(); }
+ virtual void Clear_() { return Clear(); }
+};
+
+
+// Shortest-first queue discipline, templated on the StateId and
+// comparison function object. Comparison function object COMP is
+// used to compare two StateIds. If a (single) state's order changes,
+// it can be reordered in the queue with a call to Update().
+// If 'update == false', call to Update() does not reorder the queue.
+template <typename S, typename C, bool update = true>
+class ShortestFirstQueue : public QueueBase<S> {
+ public:
+ typedef S StateId;
+ typedef C Compare;
+
+ ShortestFirstQueue(C comp)
+ : QueueBase<S>(SHORTEST_FIRST_QUEUE), heap_(comp) {}
+
+ StateId Head() const { return heap_.Top(); }
+
+ void Enqueue(StateId s) {
+ if (update) {
+ for (StateId i = key_.size(); i <= s; ++i)
+ key_.push_back(kNoKey);
+ key_[s] = heap_.Insert(s);
+ } else {
+ heap_.Insert(s);
+ }
+ }
+
+ void Dequeue() {
+ if (update)
+ key_[heap_.Pop()] = kNoKey;
+ else
+ heap_.Pop();
+ }
+
+ void Update(StateId s) {
+ if (!update)
+ return;
+ if (s >= key_.size() || key_[s] == kNoKey) {
+ Enqueue(s);
+ } else {
+ heap_.Update(key_[s], s);
+ }
+ }
+
+ bool Empty() const { return heap_.Empty(); }
+
+ void Clear() {
+ heap_.Clear();
+ if (update) key_.clear();
+ }
+
+ private:
+ Heap<S, C, false> heap_;
+ vector<ssize_t> key_;
+
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual StateId Head_() const { return Head(); }
+ virtual void Enqueue_(StateId s) { Enqueue(s); }
+ virtual void Dequeue_() { Dequeue(); }
+ virtual void Update_(StateId s) { Update(s); }
+ virtual bool Empty_() const { return Empty(); }
+ virtual void Clear_() { return Clear(); }
+};
+
+
+// Given a vector that maps from states to weights and a Less
+// comparison function object between weights, this class defines a
+// comparison function object between states.
+template <typename S, typename L>
+class StateWeightCompare {
+ public:
+ typedef L Less;
+ typedef typename L::Weight Weight;
+ typedef S StateId;
+
+ StateWeightCompare(const vector<Weight>& weights, const L &less)
+ : weights_(weights), less_(less) {}
+
+ bool operator()(const S x, const S y) const {
+ return less_(weights_[x], weights_[y]);
+ }
+
+ private:
+ const vector<Weight>& weights_;
+ L less_;
+};
+
+
+// Shortest-first queue discipline, templated on the StateId and Weight, is
+// specialized to use the weight's natural order for the comparison function.
+template <typename S, typename W>
+class NaturalShortestFirstQueue :
+ public ShortestFirstQueue<S, StateWeightCompare<S, NaturalLess<W> > > {
+ public:
+ typedef StateWeightCompare<S, NaturalLess<W> > C;
+
+ NaturalShortestFirstQueue(const vector<W> &distance) :
+ ShortestFirstQueue<S, C>(C(distance, less_)) {}
+
+ private:
+ NaturalLess<W> less_;
+};
+
+// Topological-order queue discipline, templated on the StateId.
+// States are ordered in the queue topologically. The FST must be acyclic.
+template <class S>
+class TopOrderQueue : public QueueBase<S> {
+ public:
+ typedef S StateId;
+
+ // This constructor computes the top. order. It accepts an arc filter
+ // to limit the transitions considered in that computation (e.g., only
+ // the epsilon graph).
+ template <class Arc, class ArcFilter>
+ TopOrderQueue(const Fst<Arc> &fst, ArcFilter filter)
+ : QueueBase<S>(TOP_ORDER_QUEUE), front_(0), back_(kNoStateId),
+ order_(0), state_(0) {
+ bool acyclic;
+ TopOrderVisitor<Arc> top_order_visitor(&order_, &acyclic);
+ DfsVisit(fst, &top_order_visitor, filter);
+ if (!acyclic) {
+ FSTERROR() << "TopOrderQueue: fst is not acyclic.";
+ QueueBase<S>::SetError(true);
+ }
+ state_.resize(order_.size(), kNoStateId);
+ }
+
+ // This constructor is passed the top. order, useful when we know it
+ // beforehand.
+ TopOrderQueue(const vector<StateId> &order)
+ : QueueBase<S>(TOP_ORDER_QUEUE), front_(0), back_(kNoStateId),
+ order_(order), state_(order.size(), kNoStateId) {}
+
+ StateId Head() const { return state_[front_]; }
+
+ void Enqueue(StateId s) {
+ if (front_ > back_) front_ = back_ = order_[s];
+ else if (order_[s] > back_) back_ = order_[s];
+ else if (order_[s] < front_) front_ = order_[s];
+ state_[order_[s]] = s;
+ }
+
+ void Dequeue() {
+ state_[front_] = kNoStateId;
+ while ((front_ <= back_) && (state_[front_] == kNoStateId)) ++front_;
+ }
+
+ void Update(StateId s) {}
+
+ bool Empty() const { return front_ > back_; }
+
+ void Clear() {
+ for (StateId i = front_; i <= back_; ++i) state_[i] = kNoStateId;
+ back_ = kNoStateId;
+ front_ = 0;
+ }
+
+ private:
+ StateId front_;
+ StateId back_;
+ vector<StateId> order_;
+ vector<StateId> state_;
+
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual StateId Head_() const { return Head(); }
+ virtual void Enqueue_(StateId s) { Enqueue(s); }
+ virtual void Dequeue_() { Dequeue(); }
+ virtual void Update_(StateId s) { Update(s); }
+ virtual bool Empty_() const { return Empty(); }
+ virtual void Clear_() { return Clear(); }
+};
+
+
+// State order queue discipline, templated on the StateId.
+// States are ordered in the queue by state Id.
+template <class S>
+class StateOrderQueue : public QueueBase<S> {
+public:
+ typedef S StateId;
+
+ StateOrderQueue()
+ : QueueBase<S>(STATE_ORDER_QUEUE), front_(0), back_(kNoStateId) {}
+
+ StateId Head() const { return front_; }
+
+ void Enqueue(StateId s) {
+ if (front_ > back_) front_ = back_ = s;
+ else if (s > back_) back_ = s;
+ else if (s < front_) front_ = s;
+ while (enqueued_.size() <= s) enqueued_.push_back(false);
+ enqueued_[s] = true;
+ }
+
+ void Dequeue() {
+ enqueued_[front_] = false;
+ while ((front_ <= back_) && (enqueued_[front_] == false)) ++front_;
+ }
+
+ void Update(StateId s) {}
+
+ bool Empty() const { return front_ > back_; }
+
+ void Clear() {
+ for (StateId i = front_; i <= back_; ++i) enqueued_[i] = false;
+ front_ = 0;
+ back_ = kNoStateId;
+ }
+
+private:
+ StateId front_;
+ StateId back_;
+ vector<bool> enqueued_;
+
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual StateId Head_() const { return Head(); }
+ virtual void Enqueue_(StateId s) { Enqueue(s); }
+ virtual void Dequeue_() { Dequeue(); }
+ virtual void Update_(StateId s) { Update(s); }
+ virtual bool Empty_() const { return Empty(); }
+ virtual void Clear_() { return Clear(); }
+
+};
+
+
+// SCC topological-order meta-queue discipline, templated on the StateId S
+// and a queue Q, which is used inside each SCC. It visits the SCC's
+// of an FST in topological order. Its constructor is passed the queues to
+// to use within an SCC.
+template <class S, class Q>
+class SccQueue : public QueueBase<S> {
+ public:
+ typedef S StateId;
+ typedef Q Queue;
+
+ // Constructor takes a vector specifying the SCC number per state
+ // and a vector giving the queue to use per SCC number.
+ SccQueue(const vector<StateId> &scc, vector<Queue*> *queue)
+ : QueueBase<S>(SCC_QUEUE), queue_(queue), scc_(scc), front_(0),
+ back_(kNoStateId) {}
+
+ StateId Head() const {
+ while ((front_ <= back_) &&
+ (((*queue_)[front_] && (*queue_)[front_]->Empty())
+ || (((*queue_)[front_] == 0) &&
+ ((front_ > trivial_queue_.size())
+ || (trivial_queue_[front_] == kNoStateId)))))
+ ++front_;
+ if ((*queue_)[front_])
+ return (*queue_)[front_]->Head();
+ else
+ return trivial_queue_[front_];
+ }
+
+ void Enqueue(StateId s) {
+ if (front_ > back_) front_ = back_ = scc_[s];
+ else if (scc_[s] > back_) back_ = scc_[s];
+ else if (scc_[s] < front_) front_ = scc_[s];
+ if ((*queue_)[scc_[s]]) {
+ (*queue_)[scc_[s]]->Enqueue(s);
+ } else {
+ while (trivial_queue_.size() <= scc_[s])
+ trivial_queue_.push_back(kNoStateId);
+ trivial_queue_[scc_[s]] = s;
+ }
+ }
+
+ void Dequeue() {
+ if ((*queue_)[front_])
+ (*queue_)[front_]->Dequeue();
+ else if (front_ < trivial_queue_.size())
+ trivial_queue_[front_] = kNoStateId;
+ }
+
+ void Update(StateId s) {
+ if ((*queue_)[scc_[s]])
+ (*queue_)[scc_[s]]->Update(s);
+ }
+
+ bool Empty() const {
+ if (front_ < back_) // Queue scc # back_ not empty unless back_==front_
+ return false;
+ else if (front_ > back_)
+ return true;
+ else if ((*queue_)[front_])
+ return (*queue_)[front_]->Empty();
+ else
+ return (front_ > trivial_queue_.size())
+ || (trivial_queue_[front_] == kNoStateId);
+ }
+
+ void Clear() {
+ for (StateId i = front_; i <= back_; ++i)
+ if ((*queue_)[i])
+ (*queue_)[i]->Clear();
+ else if (i < trivial_queue_.size())
+ trivial_queue_[i] = kNoStateId;
+ front_ = 0;
+ back_ = kNoStateId;
+ }
+
+private:
+ vector<Queue*> *queue_;
+ const vector<StateId> &scc_;
+ mutable StateId front_;
+ StateId back_;
+ vector<StateId> trivial_queue_;
+
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual StateId Head_() const { return Head(); }
+ virtual void Enqueue_(StateId s) { Enqueue(s); }
+ virtual void Dequeue_() { Dequeue(); }
+ virtual void Update_(StateId s) { Update(s); }
+ virtual bool Empty_() const { return Empty(); }
+ virtual void Clear_() { return Clear(); }
+
+ DISALLOW_COPY_AND_ASSIGN(SccQueue);
+};
+
+
+// Automatic queue discipline, templated on the StateId. It selects a
+// queue discipline for a given FST based on its properties.
+template <class S>
+class AutoQueue : public QueueBase<S> {
+public:
+ typedef S StateId;
+
+ // This constructor takes a state distance vector that, if non-null and if
+ // the Weight type has the path property, will entertain the
+ // shortest-first queue using the natural order w.r.t to the distance.
+ template <class Arc, class ArcFilter>
+ AutoQueue(const Fst<Arc> &fst, const vector<typename Arc::Weight> *distance,
+ ArcFilter filter) : QueueBase<S>(AUTO_QUEUE) {
+ typedef typename Arc::Weight Weight;
+ typedef StateWeightCompare< StateId, NaturalLess<Weight> > Compare;
+
+ // First check if the FST is known to have these properties.
+ uint64 props = fst.Properties(kAcyclic | kCyclic |
+ kTopSorted | kUnweighted, false);
+ if ((props & kTopSorted) || fst.Start() == kNoStateId) {
+ queue_ = new StateOrderQueue<StateId>();
+ VLOG(2) << "AutoQueue: using state-order discipline";
+ } else if (props & kAcyclic) {
+ queue_ = new TopOrderQueue<StateId>(fst, filter);
+ VLOG(2) << "AutoQueue: using top-order discipline";
+ } else if ((props & kUnweighted) && (Weight::Properties() & kIdempotent)) {
+ queue_ = new LifoQueue<StateId>();
+ VLOG(2) << "AutoQueue: using LIFO discipline";
+ } else {
+ uint64 properties;
+ // Decompose into strongly-connected components.
+ SccVisitor<Arc> scc_visitor(&scc_, 0, 0, &properties);
+ DfsVisit(fst, &scc_visitor, filter);
+ StateId nscc = *max_element(scc_.begin(), scc_.end()) + 1;
+ vector<QueueType> queue_types(nscc);
+ NaturalLess<Weight> *less = 0;
+ Compare *comp = 0;
+ if (distance && (Weight::Properties() & kPath)) {
+ less = new NaturalLess<Weight>;
+ comp = new Compare(*distance, *less);
+ }
+ // Find the queue type to use per SCC.
+ bool unweighted;
+ bool all_trivial;
+ SccQueueType(fst, scc_, &queue_types, filter, less, &all_trivial,
+ &unweighted);
+ // If unweighted and semiring is idempotent, use lifo queue.
+ if (unweighted) {
+ queue_ = new LifoQueue<StateId>();
+ VLOG(2) << "AutoQueue: using LIFO discipline";
+ delete comp;
+ delete less;
+ return;
+ }
+ // If all the scc are trivial, FST is acyclic and the scc# gives
+ // the topological order.
+ if (all_trivial) {
+ queue_ = new TopOrderQueue<StateId>(scc_);
+ VLOG(2) << "AutoQueue: using top-order discipline";
+ delete comp;
+ delete less;
+ return;
+ }
+ VLOG(2) << "AutoQueue: using SCC meta-discipline";
+ queues_.resize(nscc);
+ for (StateId i = 0; i < nscc; ++i) {
+ switch(queue_types[i]) {
+ case TRIVIAL_QUEUE:
+ queues_[i] = 0;
+ VLOG(3) << "AutoQueue: SCC #" << i
+ << ": using trivial discipline";
+ break;
+ case SHORTEST_FIRST_QUEUE:
+ queues_[i] = new ShortestFirstQueue<StateId, Compare, false>(*comp);
+ VLOG(3) << "AutoQueue: SCC #" << i <<
+ ": using shortest-first discipline";
+ break;
+ case LIFO_QUEUE:
+ queues_[i] = new LifoQueue<StateId>();
+ VLOG(3) << "AutoQueue: SCC #" << i
+ << ": using LIFO disciplle";
+ break;
+ case FIFO_QUEUE:
+ default:
+ queues_[i] = new FifoQueue<StateId>();
+ VLOG(3) << "AutoQueue: SCC #" << i
+ << ": using FIFO disciplle";
+ break;
+ }
+ }
+ queue_ = new SccQueue< StateId, QueueBase<StateId> >(scc_, &queues_);
+ delete comp;
+ delete less;
+ }
+ }
+
+ ~AutoQueue() {
+ for (StateId i = 0; i < queues_.size(); ++i)
+ delete queues_[i];
+ delete queue_;
+ }
+
+ StateId Head() const { return queue_->Head(); }
+
+ void Enqueue(StateId s) { queue_->Enqueue(s); }
+
+ void Dequeue() { queue_->Dequeue(); }
+
+ void Update(StateId s) { queue_->Update(s); }
+
+ bool Empty() const { return queue_->Empty(); }
+
+ void Clear() { queue_->Clear(); }
+
+
+ private:
+ QueueBase<StateId> *queue_;
+ vector< QueueBase<StateId>* > queues_;
+ vector<StateId> scc_;
+
+ template <class Arc, class ArcFilter, class Less>
+ static void SccQueueType(const Fst<Arc> &fst,
+ const vector<StateId> &scc,
+ vector<QueueType> *queue_types,
+ ArcFilter filter, Less *less,
+ bool *all_trivial, bool *unweighted);
+
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual StateId Head_() const { return Head(); }
+
+ virtual void Enqueue_(StateId s) { Enqueue(s); }
+
+ virtual void Dequeue_() { Dequeue(); }
+
+ virtual void Update_(StateId s) { Update(s); }
+
+ virtual bool Empty_() const { return Empty(); }
+
+ virtual void Clear_() { return Clear(); }
+
+ DISALLOW_COPY_AND_ASSIGN(AutoQueue);
+};
+
+
+// Examines the states in an Fst's strongly connected components and
+// determines which type of queue to use per SCC. Stores result in
+// vector QUEUE_TYPES, which is assumed to have length equal to the
+// number of SCCs. An arc filter is used to limit the transitions
+// considered (e.g., only the epsilon graph). ALL_TRIVIAL is set
+// to true if every queue is the trivial queue. UNWEIGHTED is set to
+// true if the semiring is idempotent and all the arc weights are equal to
+// Zero() or One().
+template <class StateId>
+template <class A, class ArcFilter, class Less>
+void AutoQueue<StateId>::SccQueueType(const Fst<A> &fst,
+ const vector<StateId> &scc,
+ vector<QueueType> *queue_type,
+ ArcFilter filter, Less *less,
+ bool *all_trivial, bool *unweighted) {
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ *all_trivial = true;
+ *unweighted = true;
+
+ for (StateId i = 0; i < queue_type->size(); ++i)
+ (*queue_type)[i] = TRIVIAL_QUEUE;
+
+ for (StateIterator< Fst<Arc> > sit(fst); !sit.Done(); sit.Next()) {
+ StateId state = sit.Value();
+ for (ArcIterator< Fst<Arc> > ait(fst, state);
+ !ait.Done();
+ ait.Next()) {
+ const Arc &arc = ait.Value();
+ if (!filter(arc)) continue;
+ if (scc[state] == scc[arc.nextstate]) {
+ QueueType &type = (*queue_type)[scc[state]];
+ if (!less || ((*less)(arc.weight, Weight::One())))
+ type = FIFO_QUEUE;
+ else if ((type == TRIVIAL_QUEUE) || (type == LIFO_QUEUE)) {
+ if (!(Weight::Properties() & kIdempotent) ||
+ (arc.weight != Weight::Zero() && arc.weight != Weight::One()))
+ type = SHORTEST_FIRST_QUEUE;
+ else
+ type = LIFO_QUEUE;
+ }
+ if (type != TRIVIAL_QUEUE) *all_trivial = false;
+ }
+ if (!(Weight::Properties() & kIdempotent) ||
+ (arc.weight != Weight::Zero() && arc.weight != Weight::One()))
+ *unweighted = false;
+ }
+ }
+}
+
+
+// An A* estimate is a function object that maps from a state ID to a
+// an estimate of the shortest distance to the final states.
+// The trivial A* estimate is always One().
+template <typename S, typename W>
+struct TrivialAStarEstimate {
+ W operator()(S s) const { return W::One(); }
+};
+
+
+// Given a vector that maps from states to weights representing the
+// shortest distance from the initial state, a Less comparison
+// function object between weights, and an estimate E of the
+// shortest distance to the final states, this class defines a
+// comparison function object between states.
+template <typename S, typename L, typename E>
+class AStarWeightCompare {
+ public:
+ typedef L Less;
+ typedef typename L::Weight Weight;
+ typedef S StateId;
+
+ AStarWeightCompare(const vector<Weight>& weights, const L &less,
+ const E &estimate)
+ : weights_(weights), less_(less), estimate_(estimate) {}
+
+ bool operator()(const S x, const S y) const {
+ Weight wx = Times(weights_[x], estimate_(x));
+ Weight wy = Times(weights_[y], estimate_(y));
+ return less_(wx, wy);
+ }
+
+ private:
+ const vector<Weight>& weights_;
+ L less_;
+ const E &estimate_;
+};
+
+
+// A* queue discipline, templated on the StateId, Weight and an
+// estimate E of the shortest distance to the final states, is specialized
+// to use the weight's natural order for the comparison function.
+template <typename S, typename W, typename E>
+class NaturalAStarQueue :
+ public ShortestFirstQueue<S, AStarWeightCompare<S, NaturalLess<W>, E> > {
+ public:
+ typedef AStarWeightCompare<S, NaturalLess<W>, E> C;
+
+ NaturalAStarQueue(const vector<W> &distance, const E &estimate) :
+ ShortestFirstQueue<S, C>(C(distance, less_, estimate)) {}
+
+ private:
+ NaturalLess<W> less_;
+};
+
+
+// A state equivalence class is a function object that
+// maps from a state ID to an equivalence class (state) ID.
+// The trivial equivalence class maps a state to itself.
+template <typename S>
+struct TrivialStateEquivClass {
+ S operator()(S s) const { return s; }
+};
+
+
+// Pruning queue discipline: Enqueues a state 's' only when its
+// shortest distance (so far), as specified by 'distance', is less
+// than (as specified by 'comp') the shortest distance Times() the
+// 'threshold' to any state in the same equivalence class, as
+// specified by the function object 'class_func'. The underlying
+// queue discipline is specified by 'queue'. The ownership of 'queue'
+// is given to this class.
+template <typename Q, typename L, typename C>
+class PruneQueue : public QueueBase<typename Q::StateId> {
+ public:
+ typedef typename Q::StateId StateId;
+ typedef typename L::Weight Weight;
+
+ PruneQueue(const vector<Weight> &distance, Q *queue, L comp,
+ const C &class_func, Weight threshold)
+ : QueueBase<StateId>(OTHER_QUEUE),
+ distance_(distance),
+ queue_(queue),
+ less_(comp),
+ class_func_(class_func),
+ threshold_(threshold) {}
+
+ ~PruneQueue() { delete queue_; }
+
+ StateId Head() const { return queue_->Head(); }
+
+ void Enqueue(StateId s) {
+ StateId c = class_func_(s);
+ if (c >= class_distance_.size())
+ class_distance_.resize(c + 1, Weight::Zero());
+ if (less_(distance_[s], class_distance_[c]))
+ class_distance_[c] = distance_[s];
+
+ // Enqueue only if below threshold limit
+ Weight limit = Times(class_distance_[c], threshold_);
+ if (less_(distance_[s], limit))
+ queue_->Enqueue(s);
+ }
+
+ void Dequeue() { queue_->Dequeue(); }
+
+ void Update(StateId s) {
+ StateId c = class_func_(s);
+ if (less_(distance_[s], class_distance_[c]))
+ class_distance_[c] = distance_[s];
+ queue_->Update(s);
+ }
+
+ bool Empty() const { return queue_->Empty(); }
+ void Clear() { queue_->Clear(); }
+
+ private:
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual StateId Head_() const { return Head(); }
+ virtual void Enqueue_(StateId s) { Enqueue(s); }
+ virtual void Dequeue_() { Dequeue(); }
+ virtual void Update_(StateId s) { Update(s); }
+ virtual bool Empty_() const { return Empty(); }
+ virtual void Clear_() { return Clear(); }
+
+ const vector<Weight> &distance_; // shortest distance to state
+ Q *queue_;
+ L less_;
+ const C &class_func_; // eqv. class function object
+ Weight threshold_; // pruning weight threshold
+ vector<Weight> class_distance_; // shortest distance to class
+
+ DISALLOW_COPY_AND_ASSIGN(PruneQueue);
+};
+
+
+// Pruning queue discipline (see above) using the weight's natural
+// order for the comparison function. The ownership of 'queue' is
+// given to this class.
+template <typename Q, typename W, typename C>
+class NaturalPruneQueue :
+ public PruneQueue<Q, NaturalLess<W>, C> {
+ public:
+ typedef typename Q::StateId StateId;
+ typedef W Weight;
+
+ NaturalPruneQueue(const vector<W> &distance, Q *queue,
+ const C &class_func_, Weight threshold) :
+ PruneQueue<Q, NaturalLess<W>, C>(distance, queue, less_,
+ class_func_, threshold) {}
+
+ private:
+ NaturalLess<W> less_;
+};
+
+
+} // namespace fst
+
+#endif
diff --git a/src/include/fst/randequivalent.h b/src/include/fst/randequivalent.h
new file mode 100644
index 0000000..1aaccf7
--- /dev/null
+++ b/src/include/fst/randequivalent.h
@@ -0,0 +1,135 @@
+// randequivalent.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Tests if two FSTS are equivalent by checking if random
+// strings from one FST are transduced the same by both FSTs.
+
+#ifndef FST_RANDEQUIVALENT_H__
+#define FST_RANDEQUIVALENT_H__
+
+#include <fst/arcsort.h>
+#include <fst/compose.h>
+#include <fst/project.h>
+#include <fst/randgen.h>
+#include <fst/shortest-distance.h>
+#include <fst/vector-fst.h>
+
+
+namespace fst {
+
+// Test if two FSTs are equivalent by randomly generating 'num_paths'
+// paths (as specified by the RandGenOptions 'opts') in these FSTs.
+//
+// For each randomly generated path, the algorithm computes for each
+// of the two FSTs the sum of the weights of all the successful paths
+// sharing the same input and output labels as the considered randomly
+// generated path and checks that these two values are within
+// 'delta'. Returns optional error value (when FLAGS_error_fatal = false).
+template<class Arc, class ArcSelector>
+bool RandEquivalent(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
+ ssize_t num_paths, float delta,
+ const RandGenOptions<ArcSelector> &opts,
+ bool *error = 0) {
+ typedef typename Arc::Weight Weight;
+ if (error) *error = false;
+
+ // Check that the symbol table are compatible
+ if (!CompatSymbols(fst1.InputSymbols(), fst2.InputSymbols()) ||
+ !CompatSymbols(fst1.OutputSymbols(), fst2.OutputSymbols())) {
+ FSTERROR() << "RandEquivalent: input/output symbol tables of 1st "
+ << "argument do not match input/output symbol tables of 2nd "
+ << "argument";
+ if (error) *error = true;
+ return false;
+ }
+
+ ILabelCompare<Arc> icomp;
+ OLabelCompare<Arc> ocomp;
+ VectorFst<Arc> sfst1(fst1);
+ VectorFst<Arc> sfst2(fst2);
+ Connect(&sfst1);
+ Connect(&sfst2);
+ ArcSort(&sfst1, icomp);
+ ArcSort(&sfst2, icomp);
+
+ bool ret = true;
+ for (ssize_t n = 0; n < num_paths; ++n) {
+ VectorFst<Arc> path;
+ const Fst<Arc> &fst = rand() % 2 ? sfst1 : sfst2;
+ RandGen(fst, &path, opts);
+
+ VectorFst<Arc> ipath(path);
+ VectorFst<Arc> opath(path);
+ Project(&ipath, PROJECT_INPUT);
+ Project(&opath, PROJECT_OUTPUT);
+
+ VectorFst<Arc> cfst1, pfst1;
+ Compose(ipath, sfst1, &cfst1);
+ ArcSort(&cfst1, ocomp);
+ Compose(cfst1, opath, &pfst1);
+ // Give up if there are epsilon cycles in a non-idempotent semiring
+ if (!(Weight::Properties() & kIdempotent) &&
+ pfst1.Properties(kCyclic, true))
+ continue;
+ Weight sum1 = ShortestDistance(pfst1);
+
+ VectorFst<Arc> cfst2, pfst2;
+ Compose(ipath, sfst2, &cfst2);
+ ArcSort(&cfst2, ocomp);
+ Compose(cfst2, opath, &pfst2);
+ // Give up if there are epsilon cycles in a non-idempotent semiring
+ if (!(Weight::Properties() & kIdempotent) &&
+ pfst2.Properties(kCyclic, true))
+ continue;
+ Weight sum2 = ShortestDistance(pfst2);
+
+ if (!ApproxEqual(sum1, sum2, delta)) {
+ VLOG(1) << "Sum1 = " << sum1;
+ VLOG(1) << "Sum2 = " << sum2;
+ ret = false;
+ break;
+ }
+ }
+
+ if (fst1.Properties(kError, false) || fst2.Properties(kError, false)) {
+ if (error) *error = true;
+ return false;
+ }
+
+ return ret;
+}
+
+
+// Test if two FSTs are equivalent by randomly generating 'num_paths' paths
+// of length no more than 'path_length' using the seed 'seed' in these FSTs.
+// Returns optional error value (when FLAGS_error_fatal = false).
+template <class Arc>
+bool RandEquivalent(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
+ ssize_t num_paths, float delta = kDelta,
+ int seed = time(0), int path_length = INT_MAX,
+ bool *error = 0) {
+ UniformArcSelector<Arc> uniform_selector(seed);
+ RandGenOptions< UniformArcSelector<Arc> >
+ opts(uniform_selector, path_length);
+ return RandEquivalent(fst1, fst2, num_paths, delta, opts, error);
+}
+
+
+} // namespace fst
+
+#endif // FST_LIB_RANDEQUIVALENT_H__
diff --git a/src/include/fst/randgen.h b/src/include/fst/randgen.h
new file mode 100644
index 0000000..82ddffa
--- /dev/null
+++ b/src/include/fst/randgen.h
@@ -0,0 +1,712 @@
+// randgen.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Classes and functions to generate random paths through an FST.
+
+#ifndef FST_LIB_RANDGEN_H__
+#define FST_LIB_RANDGEN_H__
+
+#include <cmath>
+#include <cstdlib>
+#include <ctime>
+#include <map>
+
+#include <fst/accumulator.h>
+#include <fst/cache.h>
+#include <fst/dfs-visit.h>
+#include <fst/mutable-fst.h>
+
+namespace fst {
+
+//
+// ARC SELECTORS - these function objects are used to select a random
+// transition to take from an FST's state. They should return a number
+// N s.t. 0 <= N <= NumArcs(). If N < NumArcs(), then the N-th
+// transition is selected. If N == NumArcs(), then the final weight at
+// that state is selected (i.e., the 'super-final' transition is selected).
+// It can be assumed these will not be called unless either there
+// are transitions leaving the state and/or the state is final.
+//
+
+// Randomly selects a transition using the uniform distribution.
+template <class A>
+struct UniformArcSelector {
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ UniformArcSelector(int seed = time(0)) { srand(seed); }
+
+ size_t operator()(const Fst<A> &fst, StateId s) const {
+ double r = rand()/(RAND_MAX + 1.0);
+ size_t n = fst.NumArcs(s);
+ if (fst.Final(s) != Weight::Zero())
+ ++n;
+ return static_cast<size_t>(r * n);
+ }
+};
+
+
+// Randomly selects a transition w.r.t. the weights treated as negative
+// log probabilities after normalizing for the total weight leaving
+// the state. Weight::zero transitions are disregarded.
+// Assumes Weight::Value() accesses the floating point
+// representation of the weight.
+template <class A>
+class LogProbArcSelector {
+ public:
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ LogProbArcSelector(int seed = time(0)) { srand(seed); }
+
+ size_t operator()(const Fst<A> &fst, StateId s) const {
+ // Find total weight leaving state
+ double sum = 0.0;
+ for (ArcIterator< Fst<A> > aiter(fst, s); !aiter.Done();
+ aiter.Next()) {
+ const A &arc = aiter.Value();
+ sum += exp(-to_log_weight_(arc.weight).Value());
+ }
+ sum += exp(-to_log_weight_(fst.Final(s)).Value());
+
+ double r = rand()/(RAND_MAX + 1.0);
+ double p = 0.0;
+ int n = 0;
+ for (ArcIterator< Fst<A> > aiter(fst, s); !aiter.Done();
+ aiter.Next(), ++n) {
+ const A &arc = aiter.Value();
+ p += exp(-to_log_weight_(arc.weight).Value());
+ if (p > r * sum) return n;
+ }
+ return n;
+ }
+
+ private:
+ WeightConvert<Weight, Log64Weight> to_log_weight_;
+};
+
+// Convenience definitions
+typedef LogProbArcSelector<StdArc> StdArcSelector;
+typedef LogProbArcSelector<LogArc> LogArcSelector;
+
+
+// Same as LogProbArcSelector but use CacheLogAccumulator to cache
+// the cummulative weight computations.
+template <class A>
+class FastLogProbArcSelector : public LogProbArcSelector<A> {
+ public:
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+ using LogProbArcSelector<A>::operator();
+
+ FastLogProbArcSelector(int seed = time(0))
+ : LogProbArcSelector<A>(seed),
+ seed_(seed) {}
+
+ size_t operator()(const Fst<A> &fst, StateId s,
+ CacheLogAccumulator<A> *accumulator) const {
+ accumulator->SetState(s);
+ ArcIterator< Fst<A> > aiter(fst, s);
+ // Find total weight leaving state
+ double sum = to_log_weight_(accumulator->Sum(fst.Final(s), &aiter, 0,
+ fst.NumArcs(s))).Value();
+ double r = -log(rand()/(RAND_MAX + 1.0));
+ return accumulator->LowerBound(r + sum, &aiter);
+ }
+
+ int Seed() const { return seed_; }
+ private:
+ int seed_;
+ WeightConvert<Weight, Log64Weight> to_log_weight_;
+};
+
+// Random path state info maintained by RandGenFst and passed to samplers.
+template <typename A>
+struct RandState {
+ typedef typename A::StateId StateId;
+
+ StateId state_id; // current input FST state
+ size_t nsamples; // # of samples to be sampled at this state
+ size_t length; // length of path to this random state
+ size_t select; // previous sample arc selection
+ const RandState<A> *parent; // previous random state on this path
+
+ RandState(StateId s, size_t n, size_t l, size_t k, const RandState<A> *p)
+ : state_id(s), nsamples(n), length(l), select(k), parent(p) {}
+
+ RandState()
+ : state_id(kNoStateId), nsamples(0), length(0), select(0), parent(0) {}
+};
+
+// This class, given an arc selector, samples, with raplacement,
+// multiple random transitions from an FST's state. This is a generic
+// version with a straight-forward use of the arc selector.
+// Specializations may be defined for arc selectors for greater
+// efficiency or special behavior.
+template <class A, class S>
+class ArcSampler {
+ public:
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ // The 'max_length' may be interpreted (including ignored) by a
+ // sampler as it chooses. This generic version interprets this literally.
+ ArcSampler(const Fst<A> &fst, const S &arc_selector,
+ int max_length = INT_MAX)
+ : fst_(fst),
+ arc_selector_(arc_selector),
+ max_length_(max_length) {}
+
+ // Allow updating Fst argument; pass only if changed.
+ ArcSampler(const ArcSampler<A, S> &sampler, const Fst<A> *fst = 0)
+ : fst_(fst ? *fst : sampler.fst_),
+ arc_selector_(sampler.arc_selector_),
+ max_length_(sampler.max_length_) {
+ Reset();
+ }
+
+ // Samples 'rstate.nsamples' from state 'state_id'. The 'rstate.length' is
+ // the length of the path to 'rstate'. Returns true if samples were
+ // collected. No samples may be collected if either there are no (including
+ // 'super-final') transitions leaving that state or if the
+ // 'max_length' has been deemed reached. Use the iterator members to
+ // read the samples. The samples will be in their original order.
+ bool Sample(const RandState<A> &rstate) {
+ sample_map_.clear();
+ if ((fst_.NumArcs(rstate.state_id) == 0 &&
+ fst_.Final(rstate.state_id) == Weight::Zero()) ||
+ rstate.length == max_length_) {
+ Reset();
+ return false;
+ }
+
+ for (size_t i = 0; i < rstate.nsamples; ++i)
+ ++sample_map_[arc_selector_(fst_, rstate.state_id)];
+ Reset();
+ return true;
+ }
+
+ // More samples?
+ bool Done() const { return sample_iter_ == sample_map_.end(); }
+
+ // Gets the next sample.
+ void Next() { ++sample_iter_; }
+
+ // Returns a pair (N, K) where 0 <= N <= NumArcs(s) and 0 < K <= nsamples.
+ // If N < NumArcs(s), then the N-th transition is specified.
+ // If N == NumArcs(s), then the final weight at that state is
+ // specified (i.e., the 'super-final' transition is specified).
+ // For the specified transition, K repetitions have been sampled.
+ pair<size_t, size_t> Value() const { return *sample_iter_; }
+
+ void Reset() { sample_iter_ = sample_map_.begin(); }
+
+ bool Error() const { return false; }
+
+ private:
+ const Fst<A> &fst_;
+ const S &arc_selector_;
+ int max_length_;
+
+ // Stores (N, K) as described for Value().
+ map<size_t, size_t> sample_map_;
+ map<size_t, size_t>::const_iterator sample_iter_;
+
+ // disallow
+ ArcSampler<A, S> & operator=(const ArcSampler<A, S> &s);
+};
+
+
+// Specialization for FastLogProbArcSelector.
+template <class A>
+class ArcSampler<A, FastLogProbArcSelector<A> > {
+ public:
+ typedef FastLogProbArcSelector<A> S;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+ typedef CacheLogAccumulator<A> C;
+
+ ArcSampler(const Fst<A> &fst, const S &arc_selector, int max_length = INT_MAX)
+ : fst_(fst),
+ arc_selector_(arc_selector),
+ max_length_(max_length),
+ accumulator_(new C()) {
+ accumulator_->Init(fst);
+ }
+
+ ArcSampler(const ArcSampler<A, S> &sampler, const Fst<A> *fst = 0)
+ : fst_(fst ? *fst : sampler.fst_),
+ arc_selector_(sampler.arc_selector_),
+ max_length_(sampler.max_length_) {
+ if (fst) {
+ accumulator_ = new C();
+ accumulator_->Init(*fst);
+ } else { // shallow copy
+ accumulator_ = new C(*sampler.accumulator_);
+ }
+ }
+
+ ~ArcSampler() {
+ delete accumulator_;
+ }
+
+ bool Sample(const RandState<A> &rstate) {
+ sample_map_.clear();
+ if ((fst_.NumArcs(rstate.state_id) == 0 &&
+ fst_.Final(rstate.state_id) == Weight::Zero()) ||
+ rstate.length == max_length_) {
+ Reset();
+ return false;
+ }
+
+ for (size_t i = 0; i < rstate.nsamples; ++i)
+ ++sample_map_[arc_selector_(fst_, rstate.state_id, accumulator_)];
+ Reset();
+ return true;
+ }
+
+ bool Done() const { return sample_iter_ == sample_map_.end(); }
+ void Next() { ++sample_iter_; }
+ pair<size_t, size_t> Value() const { return *sample_iter_; }
+ void Reset() { sample_iter_ = sample_map_.begin(); }
+
+ bool Error() const { return accumulator_->Error(); }
+
+ private:
+ const Fst<A> &fst_;
+ const S &arc_selector_;
+ int max_length_;
+
+ // Stores (N, K) as described for Value().
+ map<size_t, size_t> sample_map_;
+ map<size_t, size_t>::const_iterator sample_iter_;
+ C *accumulator_;
+
+ // disallow
+ ArcSampler<A, S> & operator=(const ArcSampler<A, S> &s);
+};
+
+
+// Options for random path generation with RandGenFst. The template argument
+// is an arc sampler, typically class 'ArcSampler' above. Ownership of
+// the sampler is taken by RandGenFst.
+template <class S>
+struct RandGenFstOptions : public CacheOptions {
+ S *arc_sampler; // How to sample transitions at a state
+ size_t npath; // # of paths to generate
+ bool weighted; // Output tree weighted by path count; o.w.
+ // output unweighted DAG
+ bool remove_total_weight; // Remove total weight when output is weighted.
+
+ RandGenFstOptions(const CacheOptions &copts, S *samp,
+ size_t n = 1, bool w = true, bool rw = false)
+ : CacheOptions(copts),
+ arc_sampler(samp),
+ npath(n),
+ weighted(w),
+ remove_total_weight(rw) {}
+};
+
+
+// Implementation of RandGenFst.
+template <class A, class B, class S>
+class RandGenFstImpl : public CacheImpl<B> {
+ public:
+ using FstImpl<B>::SetType;
+ using FstImpl<B>::SetProperties;
+ using FstImpl<B>::SetInputSymbols;
+ using FstImpl<B>::SetOutputSymbols;
+
+ using CacheBaseImpl< CacheState<B> >::AddArc;
+ using CacheBaseImpl< CacheState<B> >::HasArcs;
+ using CacheBaseImpl< CacheState<B> >::HasFinal;
+ using CacheBaseImpl< CacheState<B> >::HasStart;
+ using CacheBaseImpl< CacheState<B> >::SetArcs;
+ using CacheBaseImpl< CacheState<B> >::SetFinal;
+ using CacheBaseImpl< CacheState<B> >::SetStart;
+
+ typedef B Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ RandGenFstImpl(const Fst<A> &fst, const RandGenFstOptions<S> &opts)
+ : CacheImpl<B>(opts),
+ fst_(fst.Copy()),
+ arc_sampler_(opts.arc_sampler),
+ npath_(opts.npath),
+ weighted_(opts.weighted),
+ remove_total_weight_(opts.remove_total_weight),
+ superfinal_(kNoLabel) {
+ SetType("randgen");
+
+ uint64 props = fst.Properties(kFstProperties, false);
+ SetProperties(RandGenProperties(props, weighted_), kCopyProperties);
+
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ }
+
+ RandGenFstImpl(const RandGenFstImpl &impl)
+ : CacheImpl<B>(impl),
+ fst_(impl.fst_->Copy(true)),
+ arc_sampler_(new S(*impl.arc_sampler_, fst_)),
+ npath_(impl.npath_),
+ weighted_(impl.weighted_),
+ superfinal_(kNoLabel) {
+ SetType("randgen");
+ SetProperties(impl.Properties(), kCopyProperties);
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ ~RandGenFstImpl() {
+ for (int i = 0; i < state_table_.size(); ++i)
+ delete state_table_[i];
+ delete fst_;
+ delete arc_sampler_;
+ }
+
+ StateId Start() {
+ if (!HasStart()) {
+ StateId s = fst_->Start();
+ if (s == kNoStateId)
+ return kNoStateId;
+ StateId start = state_table_.size();
+ SetStart(start);
+ RandState<A> *rstate = new RandState<A>(s, npath_, 0, 0, 0);
+ state_table_.push_back(rstate);
+ }
+ return CacheImpl<B>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ Expand(s);
+ }
+ return CacheImpl<B>::Final(s);
+ }
+
+ size_t NumArcs(StateId s) {
+ if (!HasArcs(s)) {
+ Expand(s);
+ }
+ return CacheImpl<B>::NumArcs(s);
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<B>::NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<B>::NumOutputEpsilons(s);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) &&
+ (fst_->Properties(kError, false) || arc_sampler_->Error())) {
+ SetProperties(kError, kError);
+ }
+ return FstImpl<Arc>::Properties(mask);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<B> *data) {
+ if (!HasArcs(s))
+ Expand(s);
+ CacheImpl<B>::InitArcIterator(s, data);
+ }
+
+ // Computes the outgoing transitions from a state, creating new destination
+ // states as needed.
+ void Expand(StateId s) {
+ if (s == superfinal_) {
+ SetFinal(s, Weight::One());
+ SetArcs(s);
+ return;
+ }
+
+ SetFinal(s, Weight::Zero());
+ const RandState<A> &rstate = *state_table_[s];
+ arc_sampler_->Sample(rstate);
+ ArcIterator< Fst<A> > aiter(*fst_, rstate.state_id);
+ size_t narcs = fst_->NumArcs(rstate.state_id);
+ for (;!arc_sampler_->Done(); arc_sampler_->Next()) {
+ const pair<size_t, size_t> &sample_pair = arc_sampler_->Value();
+ size_t pos = sample_pair.first;
+ size_t count = sample_pair.second;
+ double prob = static_cast<double>(count)/rstate.nsamples;
+ if (pos < narcs) { // regular transition
+ aiter.Seek(sample_pair.first);
+ const A &aarc = aiter.Value();
+ Weight weight = weighted_ ? to_weight_(-log(prob)) : Weight::One();
+ B barc(aarc.ilabel, aarc.olabel, weight, state_table_.size());
+ AddArc(s, barc);
+ RandState<A> *nrstate =
+ new RandState<A>(aarc.nextstate, count, rstate.length + 1,
+ pos, &rstate);
+ state_table_.push_back(nrstate);
+ } else { // super-final transition
+ if (weighted_) {
+ Weight weight = remove_total_weight_ ?
+ to_weight_(-log(prob)) : to_weight_(-log(prob * npath_));
+ SetFinal(s, weight);
+ } else {
+ if (superfinal_ == kNoLabel) {
+ superfinal_ = state_table_.size();
+ RandState<A> *nrstate = new RandState<A>(kNoStateId, 0, 0, 0, 0);
+ state_table_.push_back(nrstate);
+ }
+ for (size_t n = 0; n < count; ++n) {
+ B barc(0, 0, Weight::One(), superfinal_);
+ AddArc(s, barc);
+ }
+ }
+ }
+ }
+ SetArcs(s);
+ }
+
+ private:
+ Fst<A> *fst_;
+ S *arc_sampler_;
+ size_t npath_;
+ vector<RandState<A> *> state_table_;
+ bool weighted_;
+ bool remove_total_weight_;
+ StateId superfinal_;
+ WeightConvert<Log64Weight, Weight> to_weight_;
+
+ void operator=(const RandGenFstImpl<A, B, S> &); // disallow
+};
+
+
+// Fst class to randomly generate paths through an FST; details controlled
+// by RandGenOptionsFst. Output format is a tree weighted by the
+// path count.
+template <class A, class B, class S>
+class RandGenFst : public ImplToFst< RandGenFstImpl<A, B, S> > {
+ public:
+ friend class ArcIterator< RandGenFst<A, B, S> >;
+ friend class StateIterator< RandGenFst<A, B, S> >;
+ typedef B Arc;
+ typedef S Sampler;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<B> State;
+ typedef RandGenFstImpl<A, B, S> Impl;
+
+ RandGenFst(const Fst<A> &fst, const RandGenFstOptions<S> &opts)
+ : ImplToFst<Impl>(new Impl(fst, opts)) {}
+
+ // See Fst<>::Copy() for doc.
+ RandGenFst(const RandGenFst<A, B, S> &fst, bool safe = false)
+ : ImplToFst<Impl>(fst, safe) {}
+
+ // Get a copy of this RandGenFst. See Fst<>::Copy() for further doc.
+ virtual RandGenFst<A, B, S> *Copy(bool safe = false) const {
+ return new RandGenFst<A, B, S>(*this, safe);
+ }
+
+ virtual inline void InitStateIterator(StateIteratorData<B> *data) const;
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<B> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const RandGenFst<A, B, S> &fst); // Disallow
+};
+
+
+
+// Specialization for RandGenFst.
+template <class A, class B, class S>
+class StateIterator< RandGenFst<A, B, S> >
+ : public CacheStateIterator< RandGenFst<A, B, S> > {
+ public:
+ explicit StateIterator(const RandGenFst<A, B, S> &fst)
+ : CacheStateIterator< RandGenFst<A, B, S> >(fst, fst.GetImpl()) {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(StateIterator);
+};
+
+
+// Specialization for RandGenFst.
+template <class A, class B, class S>
+class ArcIterator< RandGenFst<A, B, S> >
+ : public CacheArcIterator< RandGenFst<A, B, S> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const RandGenFst<A, B, S> &fst, StateId s)
+ : CacheArcIterator< RandGenFst<A, B, S> >(fst.GetImpl(), s) {
+ if (!fst.GetImpl()->HasArcs(s))
+ fst.GetImpl()->Expand(s);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+
+template <class A, class B, class S> inline
+void RandGenFst<A, B, S>::InitStateIterator(StateIteratorData<B> *data) const
+{
+ data->base = new StateIterator< RandGenFst<A, B, S> >(*this);
+}
+
+// Options for random path generation.
+template <class S>
+struct RandGenOptions {
+ const S &arc_selector; // How an arc is selected at a state
+ int max_length; // Maximum path length
+ size_t npath; // # of paths to generate
+ bool weighted; // Output is tree weighted by path count; o.w.
+ // output unweighted union of paths.
+ bool remove_total_weight; // Remove total weight when output is weighted.
+
+ RandGenOptions(const S &sel, int len = INT_MAX, size_t n = 1,
+ bool w = false, bool rw = false)
+ : arc_selector(sel),
+ max_length(len),
+ npath(n),
+ weighted(w),
+ remove_total_weight(rw) {}
+};
+
+
+template <class IArc, class OArc>
+class RandGenVisitor {
+ public:
+ typedef typename IArc::Weight Weight;
+ typedef typename IArc::StateId StateId;
+
+ RandGenVisitor(MutableFst<OArc> *ofst) : ofst_(ofst) {}
+
+ void InitVisit(const Fst<IArc> &ifst) {
+ ifst_ = &ifst;
+
+ ofst_->DeleteStates();
+ ofst_->SetInputSymbols(ifst.InputSymbols());
+ ofst_->SetOutputSymbols(ifst.OutputSymbols());
+ if (ifst.Properties(kError, false))
+ ofst_->SetProperties(kError, kError);
+ path_.clear();
+ }
+
+ bool InitState(StateId s, StateId root) { return true; }
+
+ bool TreeArc(StateId s, const IArc &arc) {
+ if (ifst_->Final(arc.nextstate) == Weight::Zero()) {
+ path_.push_back(arc);
+ } else {
+ OutputPath();
+ }
+ return true;
+ }
+
+ bool BackArc(StateId s, const IArc &arc) {
+ FSTERROR() << "RandGenVisitor: cyclic input";
+ ofst_->SetProperties(kError, kError);
+ return false;
+ }
+
+ bool ForwardOrCrossArc(StateId s, const IArc &arc) {
+ OutputPath();
+ return true;
+ }
+
+ void FinishState(StateId s, StateId p, const IArc *) {
+ if (p != kNoStateId && ifst_->Final(s) == Weight::Zero())
+ path_.pop_back();
+ }
+
+ void FinishVisit() {}
+
+ private:
+ void OutputPath() {
+ if (ofst_->Start() == kNoStateId) {
+ StateId start = ofst_->AddState();
+ ofst_->SetStart(start);
+ }
+
+ StateId src = ofst_->Start();
+ for (size_t i = 0; i < path_.size(); ++i) {
+ StateId dest = ofst_->AddState();
+ OArc arc(path_[i].ilabel, path_[i].olabel, Weight::One(), dest);
+ ofst_->AddArc(src, arc);
+ src = dest;
+ }
+ ofst_->SetFinal(src, Weight::One());
+ }
+
+ const Fst<IArc> *ifst_;
+ MutableFst<OArc> *ofst_;
+ vector<OArc> path_;
+
+ DISALLOW_COPY_AND_ASSIGN(RandGenVisitor);
+};
+
+
+// Randomly generate paths through an FST; details controlled by
+// RandGenOptions.
+template<class IArc, class OArc, class Selector>
+void RandGen(const Fst<IArc> &ifst, MutableFst<OArc> *ofst,
+ const RandGenOptions<Selector> &opts) {
+ typedef ArcSampler<IArc, Selector> Sampler;
+ typedef RandGenFst<IArc, OArc, Sampler> RandFst;
+ typedef typename OArc::StateId StateId;
+ typedef typename OArc::Weight Weight;
+
+ Sampler* arc_sampler = new Sampler(ifst, opts.arc_selector, opts.max_length);
+ RandGenFstOptions<Sampler> fopts(CacheOptions(true, 0), arc_sampler,
+ opts.npath, opts.weighted,
+ opts.remove_total_weight);
+ RandFst rfst(ifst, fopts);
+ if (opts.weighted) {
+ *ofst = rfst;
+ } else {
+ RandGenVisitor<IArc, OArc> rand_visitor(ofst);
+ DfsVisit(rfst, &rand_visitor);
+ }
+}
+
+// Randomly generate a path through an FST with the uniform distribution
+// over the transitions.
+template<class IArc, class OArc>
+void RandGen(const Fst<IArc> &ifst, MutableFst<OArc> *ofst) {
+ UniformArcSelector<IArc> uniform_selector;
+ RandGenOptions< UniformArcSelector<IArc> > opts(uniform_selector);
+ RandGen(ifst, ofst, opts);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_RANDGEN_H__
diff --git a/src/include/fst/random-weight.h b/src/include/fst/random-weight.h
new file mode 100644
index 0000000..0ccd95d
--- /dev/null
+++ b/src/include/fst/random-weight.h
@@ -0,0 +1,348 @@
+// random-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Function objects to generate random weights in various semirings
+// for testing purposes.
+
+#ifndef FST_LIB_RANDOM_WEIGHT_H__
+#define FST_LIB_RANDOM_WEIGHT_H__
+
+#include <cstdlib>
+#include <ctime>
+#include <vector>
+using std::vector;
+
+
+#include <fst/float-weight.h>
+#include <fst/product-weight.h>
+#include <fst/string-weight.h>
+#include <fst/lexicographic-weight.h>
+#include <fst/power-weight.h>
+#include <fst/signed-log-weight.h>
+#include <fst/sparse-power-weight.h>
+
+
+namespace fst {
+
+// The boolean 'allow_zero' below determines whether Zero() and zero
+// divisors should be returned in the random weight generation.
+
+// This function object returns TropicalWeightTpl<T>'s that are random integers
+// chosen from [0, kNumRandomWeights).
+template <class T>
+class TropicalWeightGenerator_ {
+ public:
+ typedef TropicalWeightTpl<T> Weight;
+
+ TropicalWeightGenerator_(int seed = time(0), bool allow_zero = true)
+ : allow_zero_(allow_zero) {
+ srand(seed);
+ }
+
+ Weight operator() () const {
+ int n = rand() % (kNumRandomWeights + allow_zero_);
+ if (allow_zero_ && n == kNumRandomWeights)
+ return Weight::Zero();
+
+ return Weight(static_cast<T>(n));
+ }
+
+ private:
+ // The number of alternative random weights.
+ static const int kNumRandomWeights = 5;
+
+ bool allow_zero_; // permit Zero() and zero divisors
+};
+
+template <class T> const int TropicalWeightGenerator_<T>::kNumRandomWeights;
+
+typedef TropicalWeightGenerator_<float> TropicalWeightGenerator;
+
+
+// This function object returns LogWeightTpl<T>'s that are random integers
+// chosen from [0, kNumRandomWeights).
+template <class T>
+class LogWeightGenerator_ {
+ public:
+ typedef LogWeightTpl<T> Weight;
+
+ LogWeightGenerator_(int seed = time(0), bool allow_zero = true)
+ : allow_zero_(allow_zero) {
+ srand(seed);
+ }
+
+ Weight operator() () const {
+ int n = rand() % (kNumRandomWeights + allow_zero_);
+ if (allow_zero_ && n == kNumRandomWeights)
+ return Weight::Zero();
+
+ return Weight(static_cast<T>(n));
+ }
+
+ private:
+ // Number of alternative random weights.
+ static const int kNumRandomWeights = 5;
+
+ bool allow_zero_; // permit Zero() and zero divisors
+};
+
+template <class T> const int LogWeightGenerator_<T>::kNumRandomWeights;
+
+typedef LogWeightGenerator_<float> LogWeightGenerator;
+
+
+// This function object returns MinMaxWeightTpl<T>'s that are random integers
+// chosen from (-kNumRandomWeights, kNumRandomWeights) in addition to
+// One(), and Zero() if zero is allowed.
+template <class T>
+class MinMaxWeightGenerator_ {
+ public:
+ typedef MinMaxWeightTpl<T> Weight;
+
+ MinMaxWeightGenerator_(int seed = time(0), bool allow_zero = true)
+ : allow_zero_(allow_zero) {
+ srand(seed);
+ }
+
+ Weight operator() () const {
+ int n = (rand() % (2*kNumRandomWeights + allow_zero_)) - kNumRandomWeights;
+ if (allow_zero_ && n == kNumRandomWeights)
+ return Weight::Zero();
+ else if (n == -kNumRandomWeights)
+ return Weight::One();
+
+ return Weight(static_cast<T>(n));
+ }
+
+ private:
+ // Parameters controlling the number of alternative random weights.
+ static const int kNumRandomWeights = 5;
+
+ bool allow_zero_; // permit Zero() and zero divisors
+};
+
+template <class T> const int MinMaxWeightGenerator_<T>::kNumRandomWeights;
+
+typedef MinMaxWeightGenerator_<float> MinMaxWeightGenerator;
+
+
+// This function object returns StringWeights that are random integer
+// strings chosen from {1,...,kAlphabetSize}^{0,kMaxStringLength} U { Zero }
+template <typename L, StringType S = STRING_LEFT>
+class StringWeightGenerator {
+ public:
+ typedef StringWeight<L, S> Weight;
+
+ StringWeightGenerator(int seed = time(0), bool allow_zero = true)
+ : allow_zero_(allow_zero) {
+ srand(seed);
+ }
+
+ Weight operator() () const {
+ int n = rand() % (kMaxStringLength + allow_zero_);
+ if (allow_zero_ && n == kMaxStringLength)
+ return Weight::Zero();
+
+ vector<L> v;
+ for (int i = 0; i < n; ++i)
+ v.push_back(rand() % kAlphabetSize + 1);
+ return Weight(v.begin(), v.end());
+ }
+
+ private:
+ // Alphabet size for random weights.
+ static const int kAlphabetSize = 5;
+ // Number of alternative random weights.
+ static const int kMaxStringLength = 5;
+
+ bool allow_zero_; // permit Zero() and zero
+};
+
+template <typename L, StringType S>
+const int StringWeightGenerator<L, S>::kAlphabetSize;
+template <typename L, StringType S>
+const int StringWeightGenerator<L, S>::kMaxStringLength;
+
+
+// This function object returns a weight generator over the product of the
+// weights (by default) for the generators G1 and G2.
+template <class G1, class G2,
+ class W = ProductWeight<typename G1::Weight, typename G2::Weight> >
+class ProductWeightGenerator {
+ public:
+ typedef typename G1::Weight W1;
+ typedef typename G2::Weight W2;
+ typedef W Weight;
+
+ ProductWeightGenerator(int seed = time(0), bool allow_zero = true)
+ : generator1_(seed, allow_zero), generator2_(seed, allow_zero) {}
+
+ Weight operator() () const {
+ W1 w1 = generator1_();
+ W2 w2 = generator2_();
+ return Weight(w1, w2);
+ }
+
+ private:
+ G1 generator1_;
+ G2 generator2_;
+};
+
+
+// This function object returns a weight generator for a lexicographic weight
+// composed out of weights for the generators G1 and G2. For lexicographic
+// weights, we cannot generate zeroes for the two subweights separately:
+// weights are members iff both members are zero or both members are non-zero.
+template <class G1, class G2>
+class LexicographicWeightGenerator {
+ public:
+ typedef typename G1::Weight W1;
+ typedef typename G2::Weight W2;
+ typedef LexicographicWeight<W1, W2> Weight;
+
+ LexicographicWeightGenerator(int seed = time(0), bool allow_zero = true)
+ : generator1_(seed, false), generator2_(seed, false),
+ allow_zero_(allow_zero) {}
+
+ Weight operator() () const {
+ if (allow_zero_) {
+ int n = rand() % (kNumRandomWeights + allow_zero_);
+ if (n == kNumRandomWeights)
+ return Weight(W1::Zero(), W2::Zero());
+ }
+ W1 w1 = generator1_();
+ W2 w2 = generator2_();
+ return Weight(w1, w2);
+ }
+
+ private:
+ G1 generator1_;
+ G2 generator2_;
+ static const int kNumRandomWeights = 5;
+ bool allow_zero_;
+};
+
+template <class G1, class G2>
+const int LexicographicWeightGenerator<G1, G2>::kNumRandomWeights;
+
+
+// Product generator of a string weight generator and an
+// arbitrary weight generator.
+template <class L, class G, StringType S = STRING_LEFT>
+class GallicWeightGenerator
+ : public ProductWeightGenerator<StringWeightGenerator<L, S>, G> {
+
+ public:
+ typedef ProductWeightGenerator<StringWeightGenerator<L, S>, G> PG;
+ typedef typename G::Weight W;
+ typedef GallicWeight<L, W, S> Weight;
+
+ GallicWeightGenerator(int seed = time(0), bool allow_zero = true)
+ : PG(seed, allow_zero) {}
+
+ GallicWeightGenerator(const PG &pg) : PG(pg) {}
+};
+
+// This function object returms a weight generator over the catersian power
+// of rank n of the weights for the generator G.
+template <class G, unsigned int n>
+class PowerWeightGenerator {
+ public:
+ typedef typename G::Weight W;
+ typedef PowerWeight<W, n> Weight;
+
+ PowerWeightGenerator(int seed = time(0), bool allow_zero = true)
+ : generator_(seed, allow_zero) {}
+
+ Weight operator()() const {
+ Weight w;
+ for (size_t i = 0; i < n; ++i) {
+ W r = generator_();
+ w.SetValue(i, r);
+ }
+ return w;
+ }
+
+ private:
+ G generator_;
+};
+
+// This function object returns SignedLogWeightTpl<T>'s that are
+// random integers chosen from [0, kNumRandomWeights).
+// The sign is randomly chosen as well.
+template <class T>
+class SignedLogWeightGenerator_ {
+ public:
+ typedef SignedLogWeightTpl<T> Weight;
+
+ SignedLogWeightGenerator_(int seed = time(0), bool allow_zero = true)
+ : allow_zero_(allow_zero) {
+ srand(seed);
+ }
+
+ Weight operator() () const {
+ int m = rand() % 2;
+ int n = rand() % (kNumRandomWeights + allow_zero_);
+
+ return SignedLogWeightTpl<T>(
+ (m == 0) ?
+ TropicalWeight(-1.0) :
+ TropicalWeight(1.0),
+ (allow_zero_ && n == kNumRandomWeights) ?
+ LogWeightTpl<T>::Zero() :
+ LogWeightTpl<T>(static_cast<T>(n)));
+ }
+
+ private:
+ // Number of alternative random weights.
+ static const int kNumRandomWeights = 5;
+ bool allow_zero_; // permit Zero() and zero divisors
+};
+
+template <class T> const int SignedLogWeightGenerator_<T>::kNumRandomWeights;
+
+typedef SignedLogWeightGenerator_<float> SignedLogWeightGenerator;
+
+// This function object returms a weight generator over the catersian power
+// of rank n of the weights for the generator G.
+template <class G, class K, unsigned int n>
+class SparsePowerWeightGenerator {
+ public:
+ typedef typename G::Weight W;
+ typedef SparsePowerWeight<W, K> Weight;
+
+ SparsePowerWeightGenerator(int seed = time(0), bool allow_zero = true)
+ : generator_(seed, allow_zero) {}
+
+ Weight operator()() const {
+ Weight w;
+ for (size_t i = 1; i <= n; ++i) {
+ W r = generator_();
+ K p = i;
+ w.Push(p, r, true);
+ }
+ return w;
+ }
+
+ private:
+ G generator_;
+};
+
+} // namespace fst
+
+#endif // FST_LIB_RANDOM_WEIGHT_H__
diff --git a/src/include/fst/rational.h b/src/include/fst/rational.h
new file mode 100644
index 0000000..96aa00d
--- /dev/null
+++ b/src/include/fst/rational.h
@@ -0,0 +1,330 @@
+// rational.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// An Fst implementation and base interface for delayed unions,
+// concatenations and closures.
+
+#ifndef FST_LIB_RATIONAL_H__
+#define FST_LIB_RATIONAL_H__
+
+#include <algorithm>
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/mutable-fst.h>
+#include <fst/replace.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+typedef CacheOptions RationalFstOptions;
+
+// This specifies whether to add the empty string.
+enum ClosureType { CLOSURE_STAR = 0, // T* -> add the empty string
+ CLOSURE_PLUS = 1 }; // T+ -> don't add the empty string
+
+template <class A> class RationalFst;
+template <class A> void Union(RationalFst<A> *fst1, const Fst<A> &fst2);
+template <class A> void Concat(RationalFst<A> *fst1, const Fst<A> &fst2);
+template <class A> void Concat(const Fst<A> &fst1, RationalFst<A> *fst2);
+template <class A> void Closure(RationalFst<A> *fst, ClosureType closure_type);
+
+
+// Implementation class for delayed unions, concatenations and closures.
+template<class A>
+class RationalFstImpl : public FstImpl<A> {
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::WriteHeader;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+
+ explicit RationalFstImpl(const RationalFstOptions &opts)
+ : nonterminals_(0),
+ replace_(0),
+ replace_options_(opts, 0) {
+ SetType("rational");
+ fst_tuples_.push_back(pair<Label, const Fst<A>*>(0, 0));
+ }
+
+ RationalFstImpl(const RationalFstImpl<A> &impl)
+ : rfst_(impl.rfst_),
+ nonterminals_(impl.nonterminals_),
+
+ replace_(impl.replace_ ? impl.replace_->Copy(true) : 0),
+ replace_options_(impl.replace_options_) {
+ SetType("rational");
+ fst_tuples_.reserve(impl.fst_tuples_.size());
+ for (size_t i = 0; i < impl.fst_tuples_.size(); ++i)
+ fst_tuples_.push_back(make_pair(impl.fst_tuples_[i].first,
+ impl.fst_tuples_[i].second
+ ? impl.fst_tuples_[i].second->Copy(true)
+ : 0));
+ }
+
+ virtual ~RationalFstImpl() {
+ for (size_t i = 0; i < fst_tuples_.size(); ++i)
+ if (fst_tuples_[i].second)
+ delete fst_tuples_[i].second;
+ if (replace_)
+ delete replace_;
+ }
+
+ StateId Start() { return Replace()->Start(); }
+
+ Weight Final(StateId s) { return Replace()->Final(s); }
+
+ size_t NumArcs(StateId s) { return Replace()->NumArcs(s); }
+
+ size_t NumInputEpsilons(StateId s) {
+ return Replace()->NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ return Replace()->NumOutputEpsilons(s);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) && Replace()->Properties(kError, false))
+ SetProperties(kError, kError);
+ return FstImpl<Arc>::Properties(mask);
+ }
+
+ // Implementation of UnionFst(fst1,fst2)
+ void InitUnion(const Fst<A> &fst1, const Fst<A> &fst2) {
+ if (replace_)
+ delete replace_;
+ uint64 props1 = fst1.Properties(kFstProperties, false);
+ uint64 props2 = fst2.Properties(kFstProperties, false);
+ SetInputSymbols(fst1.InputSymbols());
+ SetOutputSymbols(fst1.OutputSymbols());
+ rfst_.AddState();
+ rfst_.AddState();
+ rfst_.SetStart(0);
+ rfst_.SetFinal(1, Weight::One());
+ rfst_.SetInputSymbols(fst1.InputSymbols());
+ rfst_.SetOutputSymbols(fst1.OutputSymbols());
+ nonterminals_ = 2;
+ rfst_.AddArc(0, A(0, -1, Weight::One(), 1));
+ rfst_.AddArc(0, A(0, -2, Weight::One(), 1));
+ fst_tuples_.push_back(make_pair(-1, fst1.Copy()));
+ fst_tuples_.push_back(make_pair(-2, fst2.Copy()));
+ SetProperties(UnionProperties(props1, props2, true), kCopyProperties);
+ }
+
+ // Implementation of ConcatFst(fst1,fst2)
+ void InitConcat(const Fst<A> &fst1, const Fst<A> &fst2) {
+ if (replace_)
+ delete replace_;
+ uint64 props1 = fst1.Properties(kFstProperties, false);
+ uint64 props2 = fst2.Properties(kFstProperties, false);
+ SetInputSymbols(fst1.InputSymbols());
+ SetOutputSymbols(fst1.OutputSymbols());
+ rfst_.AddState();
+ rfst_.AddState();
+ rfst_.AddState();
+ rfst_.SetStart(0);
+ rfst_.SetFinal(2, Weight::One());
+ rfst_.SetInputSymbols(fst1.InputSymbols());
+ rfst_.SetOutputSymbols(fst1.OutputSymbols());
+ nonterminals_ = 2;
+ rfst_.AddArc(0, A(0, -1, Weight::One(), 1));
+ rfst_.AddArc(1, A(0, -2, Weight::One(), 2));
+ fst_tuples_.push_back(make_pair(-1, fst1.Copy()));
+ fst_tuples_.push_back(make_pair(-2, fst2.Copy()));
+ SetProperties(ConcatProperties(props1, props2, true), kCopyProperties);
+ }
+
+ // Implementation of ClosureFst(fst, closure_type)
+ void InitClosure(const Fst<A> &fst, ClosureType closure_type) {
+ if (replace_)
+ delete replace_;
+ uint64 props = fst.Properties(kFstProperties, false);
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ if (closure_type == CLOSURE_STAR) {
+ rfst_.AddState();
+ rfst_.SetStart(0);
+ rfst_.SetFinal(0, Weight::One());
+ rfst_.AddArc(0, A(0, -1, Weight::One(), 0));
+ } else {
+ rfst_.AddState();
+ rfst_.AddState();
+ rfst_.SetStart(0);
+ rfst_.SetFinal(1, Weight::One());
+ rfst_.AddArc(0, A(0, -1, Weight::One(), 1));
+ rfst_.AddArc(1, A(0, 0, Weight::One(), 0));
+ }
+ rfst_.SetInputSymbols(fst.InputSymbols());
+ rfst_.SetOutputSymbols(fst.OutputSymbols());
+ fst_tuples_.push_back(make_pair(-1, fst.Copy()));
+ nonterminals_ = 1;
+ SetProperties(ClosureProperties(props, closure_type == CLOSURE_STAR, true),
+ kCopyProperties);
+ }
+
+ // Implementation of Union(Fst &, RationalFst *)
+ void AddUnion(const Fst<A> &fst) {
+ if (replace_)
+ delete replace_;
+ uint64 props1 = FstImpl<A>::Properties();
+ uint64 props2 = fst.Properties(kFstProperties, false);
+ VectorFst<A> afst;
+ afst.AddState();
+ afst.AddState();
+ afst.SetStart(0);
+ afst.SetFinal(1, Weight::One());
+ ++nonterminals_;
+ afst.AddArc(0, A(0, -nonterminals_, Weight::One(), 1));
+ Union(&rfst_, afst);
+ fst_tuples_.push_back(make_pair(-nonterminals_, fst.Copy()));
+ SetProperties(UnionProperties(props1, props2, true), kCopyProperties);
+ }
+
+ // Implementation of Concat(Fst &, RationalFst *)
+ void AddConcat(const Fst<A> &fst, bool append) {
+ if (replace_)
+ delete replace_;
+ uint64 props1 = FstImpl<A>::Properties();
+ uint64 props2 = fst.Properties(kFstProperties, false);
+ VectorFst<A> afst;
+ afst.AddState();
+ afst.AddState();
+ afst.SetStart(0);
+ afst.SetFinal(1, Weight::One());
+ ++nonterminals_;
+ afst.AddArc(0, A(0, -nonterminals_, Weight::One(), 1));
+ if (append)
+ Concat(&rfst_, afst);
+ else
+ Concat(afst, &rfst_);
+ fst_tuples_.push_back(make_pair(-nonterminals_, fst.Copy()));
+ SetProperties(ConcatProperties(props1, props2, true), kCopyProperties);
+ }
+
+ // Implementation of Closure(RationalFst *, closure_type)
+ void AddClosure(ClosureType closure_type) {
+ if (replace_)
+ delete replace_;
+ uint64 props = FstImpl<A>::Properties();
+ Closure(&rfst_, closure_type);
+ SetProperties(ClosureProperties(props, closure_type == CLOSURE_STAR, true),
+ kCopyProperties);
+ }
+
+ // Returns the underlying ReplaceFst.
+ ReplaceFst<A> *Replace() const {
+ if (!replace_) {
+ fst_tuples_[0].second = rfst_.Copy();
+ replace_ = new ReplaceFst<A>(fst_tuples_, replace_options_);
+ }
+ return replace_;
+ }
+
+ private:
+ VectorFst<A> rfst_; // rational topology machine; uses neg. nonterminals
+ Label nonterminals_; // # of nonterminals used
+ // Contains the nonterminals and their corresponding FSTs.
+ mutable vector<pair<Label, const Fst<A>*> > fst_tuples_;
+ mutable ReplaceFst<A> *replace_; // Underlying ReplaceFst
+ ReplaceFstOptions<A> replace_options_; // Options for creating 'replace_'
+
+ void operator=(const RationalFstImpl<A> &impl); // disallow
+};
+
+// Parent class for the delayed rational operations - delayed union,
+// concatenation, and closure.
+//
+// This class attaches interface to implementation and handles
+// reference counting, delegating most methods to ImplToFst.
+template <class A>
+class RationalFst : public ImplToFst< RationalFstImpl<A> > {
+ public:
+ friend class StateIterator< RationalFst<A> >;
+ friend class ArcIterator< RationalFst<A> >;
+ friend void Union<>(RationalFst<A> *fst1, const Fst<A> &fst2);
+ friend void Concat<>(RationalFst<A> *fst1, const Fst<A> &fst2);
+ friend void Concat<>(const Fst<A> &fst1, RationalFst<A> *fst2);
+ friend void Closure<>(RationalFst<A> *fst, ClosureType closure_type);
+
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef RationalFstImpl<A> Impl;
+
+ virtual void InitStateIterator(StateIteratorData<A> *data) const {
+ GetImpl()->Replace()->InitStateIterator(data);
+ }
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ GetImpl()->Replace()->InitArcIterator(s, data);
+ }
+
+ protected:
+ RationalFst()
+ : ImplToFst<Impl>(new Impl(RationalFstOptions())) {}
+
+ explicit RationalFst(const RationalFstOptions &opts)
+ : ImplToFst<Impl>(new Impl(opts)) {}
+
+ // See Fst<>::Copy() for doc.
+ RationalFst(const RationalFst<A> &fst , bool safe = false)
+ : ImplToFst<Impl>(fst, safe) {}
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const RationalFst<A> &fst); // disallow
+};
+
+
+// Specialization for RationalFst.
+template <class A>
+class StateIterator< RationalFst<A> >
+ : public StateIterator< ReplaceFst<A> > {
+ public:
+ explicit StateIterator(const RationalFst<A> &fst)
+ : StateIterator< ReplaceFst<A> >(*(fst.GetImpl()->Replace())) {}
+};
+
+
+// Specialization for RationalFst.
+template <class A>
+class ArcIterator< RationalFst<A> >
+ : public CacheArcIterator< ReplaceFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const RationalFst<A> &fst, StateId s)
+ : ArcIterator< ReplaceFst<A> >(*(fst.GetImpl()->Replace()), s) {}
+};
+
+} // namespace fst
+
+#endif // FST_LIB_RATIONAL_H__
diff --git a/src/include/fst/register.h b/src/include/fst/register.h
new file mode 100644
index 0000000..55651cd
--- /dev/null
+++ b/src/include/fst/register.h
@@ -0,0 +1,132 @@
+// register.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley), jpr@google.com (Jake Ratkiewicz)
+//
+// \file
+// Classes for registering derived Fsts for generic reading
+//
+
+#ifndef FST_LIB_REGISTER_H__
+#define FST_LIB_REGISTER_H__
+
+#include <string>
+
+
+#include <fst/compat.h>
+#include <iostream>
+#include <fstream>
+#include <fst/util.h>
+#include <fst/generic-register.h>
+
+
+#include <fst/types.h>
+
+namespace fst {
+
+template <class A> class Fst;
+struct FstReadOptions;
+
+// This class represents a single entry in a FstRegister
+template<class A>
+struct FstRegisterEntry {
+ typedef Fst<A> *(*Reader)(istream &strm, const FstReadOptions &opts);
+ typedef Fst<A> *(*Converter)(const Fst<A> &fst);
+
+ Reader reader;
+ Converter converter;
+ FstRegisterEntry() : reader(0), converter(0) {}
+ FstRegisterEntry(Reader r, Converter c) : reader(r), converter(c) { }
+};
+
+// This class maintains the correspondence between a string describing
+// an FST type, and its reader and converter.
+template<class A>
+class FstRegister : public GenericRegister<string, FstRegisterEntry<A>,
+ FstRegister<A> > {
+ public:
+ typedef typename FstRegisterEntry<A>::Reader Reader;
+ typedef typename FstRegisterEntry<A>::Converter Converter;
+
+ const Reader GetReader(const string &type) const {
+ return this->GetEntry(type).reader;
+ }
+
+ const Converter GetConverter(const string &type) const {
+ return this->GetEntry(type).converter;
+ }
+
+ protected:
+ virtual string ConvertKeyToSoFilename(const string& key) const {
+ string legal_type(key);
+
+ ConvertToLegalCSymbol(&legal_type);
+
+ return legal_type + "-fst.so";
+ }
+};
+
+
+// This class registers an Fst type for generic reading and creating.
+// The Fst type must have a default constructor and a copy constructor
+// from 'Fst<Arc>' for this to work.
+template <class F>
+class FstRegisterer
+ : public GenericRegisterer<FstRegister<typename F::Arc> > {
+ public:
+ typedef typename F::Arc Arc;
+ typedef typename FstRegister<Arc>::Entry Entry;
+ typedef typename FstRegister<Arc>::Reader Reader;
+
+ FstRegisterer() :
+ GenericRegisterer<FstRegister<typename F::Arc> >(
+ F().Type(), BuildEntry()) { }
+
+ private:
+ Entry BuildEntry() {
+ F *(*reader)(istream &strm,
+ const FstReadOptions &opts) = &F::Read;
+
+ return Entry(reinterpret_cast<Reader>(reader),
+ &FstRegisterer<F>::Convert);
+ }
+
+ static Fst<Arc> *Convert(const Fst<Arc> &fst) { return new F(fst); }
+};
+
+
+// Convenience macro to generate static FstRegisterer instance.
+#define REGISTER_FST(F, A) \
+static fst::FstRegisterer< F<A> > F ## _ ## A ## _registerer
+
+
+// Converts an fst to type 'type'.
+template <class A>
+Fst<A> *Convert(const Fst<A> &fst, const string &ftype) {
+ FstRegister<A> *registr = FstRegister<A>::GetRegister();
+ const typename FstRegister<A>::Converter
+ converter = registr->GetConverter(ftype);
+ if (!converter) {
+ string atype = A::Type();
+ LOG(ERROR) << "Fst::Convert: Unknown FST type \"" << ftype
+ << "\" (arc type = \"" << atype << "\")";
+ return 0;
+ }
+ return converter(fst);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_REGISTER_H__
diff --git a/src/include/fst/relabel.h b/src/include/fst/relabel.h
new file mode 100644
index 0000000..fbb8942
--- /dev/null
+++ b/src/include/fst/relabel.h
@@ -0,0 +1,524 @@
+// relabel.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: johans@google.com (Johan Schalkwyk)
+//
+// \file
+// Functions and classes to relabel an Fst (either on input or output)
+//
+#ifndef FST_LIB_RELABEL_H__
+#define FST_LIB_RELABEL_H__
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <string>
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+//
+// Relabels either the input labels or output labels. The old to
+// new labels are specified using a vector of pair<Label,Label>.
+// Any label associations not specified are assumed to be identity
+// mapping.
+//
+// \param fst input fst, must be mutable
+// \param ipairs vector of input label pairs indicating old to new mapping
+// \param opairs vector of output label pairs indicating old to new mapping
+//
+template <class A>
+void Relabel(
+ MutableFst<A> *fst,
+ const vector<pair<typename A::Label, typename A::Label> >& ipairs,
+ const vector<pair<typename A::Label, typename A::Label> >& opairs) {
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+
+ uint64 props = fst->Properties(kFstProperties, false);
+
+ // construct label to label hash.
+ unordered_map<Label, Label> input_map;
+ for (size_t i = 0; i < ipairs.size(); ++i) {
+ input_map[ipairs[i].first] = ipairs[i].second;
+ }
+
+ unordered_map<Label, Label> output_map;
+ for (size_t i = 0; i < opairs.size(); ++i) {
+ output_map[opairs[i].first] = opairs[i].second;
+ }
+
+ for (StateIterator<MutableFst<A> > siter(*fst);
+ !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ for (MutableArcIterator<MutableFst<A> > aiter(fst, s);
+ !aiter.Done(); aiter.Next()) {
+ A arc = aiter.Value();
+
+ // relabel input
+ // only relabel if relabel pair defined
+ typename unordered_map<Label, Label>::iterator it =
+ input_map.find(arc.ilabel);
+ if (it != input_map.end()) {
+ if (it->second == kNoLabel) {
+ FSTERROR() << "Input symbol id " << arc.ilabel
+ << " missing from target vocabulary";
+ fst->SetProperties(kError, kError);
+ return;
+ }
+ arc.ilabel = it->second;
+ }
+
+ // relabel output
+ it = output_map.find(arc.olabel);
+ if (it != output_map.end()) {
+ if (it->second == kNoLabel) {
+ FSTERROR() << "Output symbol id " << arc.olabel
+ << " missing from target vocabulary";
+ fst->SetProperties(kError, kError);
+ return;
+ }
+ arc.olabel = it->second;
+ }
+
+ aiter.SetValue(arc);
+ }
+ }
+
+ fst->SetProperties(RelabelProperties(props), kFstProperties);
+}
+
+//
+// Relabels either the input labels or output labels. The old to
+// new labels mappings are specified using an input Symbol set.
+// Any label associations not specified are assumed to be identity
+// mapping.
+//
+// \param fst input fst, must be mutable
+// \param new_isymbols symbol set indicating new mapping of input symbols
+// \param new_osymbols symbol set indicating new mapping of output symbols
+//
+template<class A>
+void Relabel(MutableFst<A> *fst,
+ const SymbolTable* new_isymbols,
+ const SymbolTable* new_osymbols) {
+ Relabel(fst,
+ fst->InputSymbols(), new_isymbols, true,
+ fst->OutputSymbols(), new_osymbols, true);
+}
+
+template<class A>
+void Relabel(MutableFst<A> *fst,
+ const SymbolTable* old_isymbols,
+ const SymbolTable* new_isymbols,
+ bool attach_new_isymbols,
+ const SymbolTable* old_osymbols,
+ const SymbolTable* new_osymbols,
+ bool attach_new_osymbols) {
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+
+ vector<pair<Label, Label> > ipairs;
+ if (old_isymbols && new_isymbols) {
+ for (SymbolTableIterator syms_iter(*old_isymbols); !syms_iter.Done();
+ syms_iter.Next()) {
+ string isymbol = syms_iter.Symbol();
+ int isymbol_val = syms_iter.Value();
+ int new_isymbol_val = new_isymbols->Find(isymbol);
+ ipairs.push_back(make_pair(isymbol_val, new_isymbol_val));
+ }
+ if (attach_new_isymbols)
+ fst->SetInputSymbols(new_isymbols);
+ }
+
+ vector<pair<Label, Label> > opairs;
+ if (old_osymbols && new_osymbols) {
+ for (SymbolTableIterator syms_iter(*old_osymbols); !syms_iter.Done();
+ syms_iter.Next()) {
+ string osymbol = syms_iter.Symbol();
+ int osymbol_val = syms_iter.Value();
+ int new_osymbol_val = new_osymbols->Find(osymbol);
+ opairs.push_back(make_pair(osymbol_val, new_osymbol_val));
+ }
+ if (attach_new_osymbols)
+ fst->SetOutputSymbols(new_osymbols);
+ }
+
+ // call relabel using vector of relabel pairs.
+ Relabel(fst, ipairs, opairs);
+}
+
+
+typedef CacheOptions RelabelFstOptions;
+
+template <class A> class RelabelFst;
+
+//
+// \class RelabelFstImpl
+// \brief Implementation for delayed relabeling
+//
+// Relabels an FST from one symbol set to another. Relabeling
+// can either be on input or output space. RelabelFst implements
+// a delayed version of the relabel. Arcs are relabeled on the fly
+// and not cached. I.e each request is recomputed.
+//
+template<class A>
+class RelabelFstImpl : public CacheImpl<A> {
+ friend class StateIterator< RelabelFst<A> >;
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::WriteHeader;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+
+ using CacheImpl<A>::PushArc;
+ using CacheImpl<A>::HasArcs;
+ using CacheImpl<A>::HasFinal;
+ using CacheImpl<A>::HasStart;
+ using CacheImpl<A>::SetArcs;
+ using CacheImpl<A>::SetFinal;
+ using CacheImpl<A>::SetStart;
+
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+
+ RelabelFstImpl(const Fst<A>& fst,
+ const vector<pair<Label, Label> >& ipairs,
+ const vector<pair<Label, Label> >& opairs,
+ const RelabelFstOptions &opts)
+ : CacheImpl<A>(opts), fst_(fst.Copy()),
+ relabel_input_(false), relabel_output_(false) {
+ uint64 props = fst.Properties(kCopyProperties, false);
+ SetProperties(RelabelProperties(props));
+ SetType("relabel");
+
+ // create input label map
+ if (ipairs.size() > 0) {
+ for (size_t i = 0; i < ipairs.size(); ++i) {
+ input_map_[ipairs[i].first] = ipairs[i].second;
+ }
+ relabel_input_ = true;
+ }
+
+ // create output label map
+ if (opairs.size() > 0) {
+ for (size_t i = 0; i < opairs.size(); ++i) {
+ output_map_[opairs[i].first] = opairs[i].second;
+ }
+ relabel_output_ = true;
+ }
+ }
+
+ RelabelFstImpl(const Fst<A>& fst,
+ const SymbolTable* old_isymbols,
+ const SymbolTable* new_isymbols,
+ const SymbolTable* old_osymbols,
+ const SymbolTable* new_osymbols,
+ const RelabelFstOptions &opts)
+ : CacheImpl<A>(opts), fst_(fst.Copy()),
+ relabel_input_(false), relabel_output_(false) {
+ SetType("relabel");
+
+ uint64 props = fst.Properties(kCopyProperties, false);
+ SetProperties(RelabelProperties(props));
+ SetInputSymbols(old_isymbols);
+ SetOutputSymbols(old_osymbols);
+
+ if (old_isymbols && new_isymbols &&
+ old_isymbols->LabeledCheckSum() != new_isymbols->LabeledCheckSum()) {
+ for (SymbolTableIterator syms_iter(*old_isymbols); !syms_iter.Done();
+ syms_iter.Next()) {
+ input_map_[syms_iter.Value()] = new_isymbols->Find(syms_iter.Symbol());
+ }
+ SetInputSymbols(new_isymbols);
+ relabel_input_ = true;
+ }
+
+ if (old_osymbols && new_osymbols &&
+ old_osymbols->LabeledCheckSum() != new_osymbols->LabeledCheckSum()) {
+ for (SymbolTableIterator syms_iter(*old_osymbols); !syms_iter.Done();
+ syms_iter.Next()) {
+ output_map_[syms_iter.Value()] =
+ new_osymbols->Find(syms_iter.Symbol());
+ }
+ SetOutputSymbols(new_osymbols);
+ relabel_output_ = true;
+ }
+ }
+
+ RelabelFstImpl(const RelabelFstImpl<A>& impl)
+ : CacheImpl<A>(impl),
+ fst_(impl.fst_->Copy(true)),
+ input_map_(impl.input_map_),
+ output_map_(impl.output_map_),
+ relabel_input_(impl.relabel_input_),
+ relabel_output_(impl.relabel_output_) {
+ SetType("relabel");
+ SetProperties(impl.Properties(), kCopyProperties);
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ ~RelabelFstImpl() { delete fst_; }
+
+ StateId Start() {
+ if (!HasStart()) {
+ StateId s = fst_->Start();
+ SetStart(s);
+ }
+ return CacheImpl<A>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ SetFinal(s, fst_->Final(s));
+ }
+ return CacheImpl<A>::Final(s);
+ }
+
+ size_t NumArcs(StateId s) {
+ if (!HasArcs(s)) {
+ Expand(s);
+ }
+ return CacheImpl<A>::NumArcs(s);
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s)) {
+ Expand(s);
+ }
+ return CacheImpl<A>::NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s)) {
+ Expand(s);
+ }
+ return CacheImpl<A>::NumOutputEpsilons(s);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) && fst_->Properties(kError, false))
+ SetProperties(kError, kError);
+ return FstImpl<Arc>::Properties(mask);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<A>* data) {
+ if (!HasArcs(s)) {
+ Expand(s);
+ }
+ CacheImpl<A>::InitArcIterator(s, data);
+ }
+
+ void Expand(StateId s) {
+ for (ArcIterator<Fst<A> > aiter(*fst_, s); !aiter.Done(); aiter.Next()) {
+ A arc = aiter.Value();
+
+ // relabel input
+ if (relabel_input_) {
+ typename unordered_map<Label, Label>::iterator it =
+ input_map_.find(arc.ilabel);
+ if (it != input_map_.end()) { arc.ilabel = it->second; }
+ }
+
+ // relabel output
+ if (relabel_output_) {
+ typename unordered_map<Label, Label>::iterator it =
+ output_map_.find(arc.olabel);
+ if (it != output_map_.end()) { arc.olabel = it->second; }
+ }
+
+ PushArc(s, arc);
+ }
+ SetArcs(s);
+ }
+
+
+ private:
+ const Fst<A> *fst_;
+
+ unordered_map<Label, Label> input_map_;
+ unordered_map<Label, Label> output_map_;
+ bool relabel_input_;
+ bool relabel_output_;
+
+ void operator=(const RelabelFstImpl<A> &); // disallow
+};
+
+
+//
+// \class RelabelFst
+// \brief Delayed implementation of arc relabeling
+//
+// This class attaches interface to implementation and handles
+// reference counting, delegating most methods to ImplToFst.
+template <class A>
+class RelabelFst : public ImplToFst< RelabelFstImpl<A> > {
+ public:
+ friend class ArcIterator< RelabelFst<A> >;
+ friend class StateIterator< RelabelFst<A> >;
+
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+ typedef RelabelFstImpl<A> Impl;
+
+ RelabelFst(const Fst<A>& fst,
+ const vector<pair<Label, Label> >& ipairs,
+ const vector<pair<Label, Label> >& opairs)
+ : ImplToFst<Impl>(new Impl(fst, ipairs, opairs, RelabelFstOptions())) {}
+
+ RelabelFst(const Fst<A>& fst,
+ const vector<pair<Label, Label> >& ipairs,
+ const vector<pair<Label, Label> >& opairs,
+ const RelabelFstOptions &opts)
+ : ImplToFst<Impl>(new Impl(fst, ipairs, opairs, opts)) {}
+
+ RelabelFst(const Fst<A>& fst,
+ const SymbolTable* new_isymbols,
+ const SymbolTable* new_osymbols)
+ : ImplToFst<Impl>(new Impl(fst, fst.InputSymbols(), new_isymbols,
+ fst.OutputSymbols(), new_osymbols,
+ RelabelFstOptions())) {}
+
+ RelabelFst(const Fst<A>& fst,
+ const SymbolTable* new_isymbols,
+ const SymbolTable* new_osymbols,
+ const RelabelFstOptions &opts)
+ : ImplToFst<Impl>(new Impl(fst, fst.InputSymbols(), new_isymbols,
+ fst.OutputSymbols(), new_osymbols, opts)) {}
+
+ RelabelFst(const Fst<A>& fst,
+ const SymbolTable* old_isymbols,
+ const SymbolTable* new_isymbols,
+ const SymbolTable* old_osymbols,
+ const SymbolTable* new_osymbols)
+ : ImplToFst<Impl>(new Impl(fst, old_isymbols, new_isymbols, old_osymbols,
+ new_osymbols, RelabelFstOptions())) {}
+
+ RelabelFst(const Fst<A>& fst,
+ const SymbolTable* old_isymbols,
+ const SymbolTable* new_isymbols,
+ const SymbolTable* old_osymbols,
+ const SymbolTable* new_osymbols,
+ const RelabelFstOptions &opts)
+ : ImplToFst<Impl>(new Impl(fst, old_isymbols, new_isymbols, old_osymbols,
+ new_osymbols, opts)) {}
+
+ // See Fst<>::Copy() for doc.
+ RelabelFst(const RelabelFst<A> &fst, bool safe = false)
+ : ImplToFst<Impl>(fst, safe) {}
+
+ // Get a copy of this RelabelFst. See Fst<>::Copy() for further doc.
+ virtual RelabelFst<A> *Copy(bool safe = false) const {
+ return new RelabelFst<A>(*this, safe);
+ }
+
+ virtual void InitStateIterator(StateIteratorData<A> *data) const;
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ return GetImpl()->InitArcIterator(s, data);
+ }
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const RelabelFst<A> &fst); // disallow
+};
+
+// Specialization for RelabelFst.
+template<class A>
+class StateIterator< RelabelFst<A> > : public StateIteratorBase<A> {
+ public:
+ typedef typename A::StateId StateId;
+
+ explicit StateIterator(const RelabelFst<A> &fst)
+ : impl_(fst.GetImpl()), siter_(*impl_->fst_), s_(0) {}
+
+ bool Done() const { return siter_.Done(); }
+
+ StateId Value() const { return s_; }
+
+ void Next() {
+ if (!siter_.Done()) {
+ ++s_;
+ siter_.Next();
+ }
+ }
+
+ void Reset() {
+ s_ = 0;
+ siter_.Reset();
+ }
+
+ private:
+ bool Done_() const { return Done(); }
+ StateId Value_() const { return Value(); }
+ void Next_() { Next(); }
+ void Reset_() { Reset(); }
+
+ const RelabelFstImpl<A> *impl_;
+ StateIterator< Fst<A> > siter_;
+ StateId s_;
+
+ DISALLOW_COPY_AND_ASSIGN(StateIterator);
+};
+
+
+// Specialization for RelabelFst.
+template <class A>
+class ArcIterator< RelabelFst<A> >
+ : public CacheArcIterator< RelabelFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const RelabelFst<A> &fst, StateId s)
+ : CacheArcIterator< RelabelFst<A> >(fst.GetImpl(), s) {
+ if (!fst.GetImpl()->HasArcs(s))
+ fst.GetImpl()->Expand(s);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+template <class A> inline
+void RelabelFst<A>::InitStateIterator(StateIteratorData<A> *data) const {
+ data->base = new StateIterator< RelabelFst<A> >(*this);
+}
+
+// Useful alias when using StdArc.
+typedef RelabelFst<StdArc> StdRelabelFst;
+
+} // namespace fst
+
+#endif // FST_LIB_RELABEL_H__
diff --git a/src/include/fst/replace-util.h b/src/include/fst/replace-util.h
new file mode 100644
index 0000000..f4a9c05
--- /dev/null
+++ b/src/include/fst/replace-util.h
@@ -0,0 +1,550 @@
+// replace-util.h
+
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+
+// \file
+// Utility classes for the recursive replacement of Fsts (RTNs).
+
+#ifndef FST_LIB_REPLACE_UTIL_H__
+#define FST_LIB_REPLACE_UTIL_H__
+
+#include <vector>
+using std::vector;
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <unordered_set>
+using std::tr1::unordered_set;
+using std::tr1::unordered_multiset;
+#include <map>
+
+#include <fst/connect.h>
+#include <fst/mutable-fst.h>
+#include <fst/topsort.h>
+
+
+namespace fst {
+
+template <class Arc>
+void Replace(const vector<pair<typename Arc::Label, const Fst<Arc>* > >&,
+ MutableFst<Arc> *, typename Arc::Label, bool);
+
+
+// Utility class for the recursive replacement of Fsts (RTNs). The
+// user provides a set of Label, Fst pairs at construction. These are
+// used by methods for testing cyclic dependencies and connectedness
+// and doing RTN connection and specific Fst replacement by label or
+// for various optimization properties. The modified results can be
+// obtained with the GetFstPairs() or GetMutableFstPairs() methods.
+template <class Arc>
+class ReplaceUtil {
+ public:
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ typedef pair<Label, const Fst<Arc>*> FstPair;
+ typedef pair<Label, MutableFst<Arc>*> MutableFstPair;
+ typedef unordered_map<Label, Label> NonTerminalHash;
+
+ // Constructs from mutable Fsts; Fst ownership given to ReplaceUtil.
+ ReplaceUtil(const vector<MutableFstPair> &fst_pairs,
+ Label root_label, bool epsilon_on_replace = false);
+
+ // Constructs from Fsts; Fst ownership retained by caller.
+ ReplaceUtil(const vector<FstPair> &fst_pairs,
+ Label root_label, bool epsilon_on_replace = false);
+
+ // Constructs from ReplaceFst internals; ownership retained by caller.
+ ReplaceUtil(const vector<const Fst<Arc> *> &fst_array,
+ const NonTerminalHash &nonterminal_hash, Label root_fst,
+ bool epsilon_on_replace = false);
+
+ ~ReplaceUtil() {
+ for (Label i = 0; i < fst_array_.size(); ++i)
+ delete fst_array_[i];
+ }
+
+ // True if the non-terminal dependencies are cyclic. Cyclic
+ // dependencies will result in an unexpandable replace fst.
+ bool CyclicDependencies() const {
+ GetDependencies(false);
+ return depprops_ & kCyclic;
+ }
+
+ // Returns true if no useless Fsts, states or transitions.
+ bool Connected() const {
+ GetDependencies(false);
+ uint64 props = kAccessible | kCoAccessible;
+ for (Label i = 0; i < fst_array_.size(); ++i) {
+ if (!fst_array_[i])
+ continue;
+ if (fst_array_[i]->Properties(props, true) != props || !depaccess_[i])
+ return false;
+ }
+ return true;
+ }
+
+ // Removes useless Fsts, states and transitions.
+ void Connect();
+
+ // Replaces Fsts specified by labels.
+ // Does nothing if there are cyclic dependencies.
+ void ReplaceLabels(const vector<Label> &labels);
+
+ // Replaces Fsts that have at most 'nstates' states, 'narcs' arcs and
+ // 'nnonterm' non-terminals (updating in reverse dependency order).
+ // Does nothing if there are cyclic dependencies.
+ void ReplaceBySize(size_t nstates, size_t narcs, size_t nnonterms);
+
+ // Replaces singleton Fsts.
+ // Does nothing if there are cyclic dependencies.
+ void ReplaceTrivial() { ReplaceBySize(2, 1, 1); }
+
+ // Replaces non-terminals that have at most 'ninstances' instances
+ // (updating in dependency order).
+ // Does nothing if there are cyclic dependencies.
+ void ReplaceByInstances(size_t ninstances);
+
+ // Replaces non-terminals that have only one instance.
+ // Does nothing if there are cyclic dependencies.
+ void ReplaceUnique() { ReplaceByInstances(1); }
+
+ // Returns Label, Fst pairs; Fst ownership retained by ReplaceUtil.
+ void GetFstPairs(vector<FstPair> *fst_pairs);
+
+ // Returns Label, MutableFst pairs; Fst ownership given to caller.
+ void GetMutableFstPairs(vector<MutableFstPair> *mutable_fst_pairs);
+
+ private:
+ // Per Fst statistics
+ struct ReplaceStats {
+ StateId nstates; // # of states
+ StateId nfinal; // # of final states
+ size_t narcs; // # of arcs
+ Label nnonterms; // # of non-terminals in Fst
+ size_t nref; // # of non-terminal instances referring to this Fst
+
+ // # of times that ith Fst references this Fst
+ map<Label, size_t> inref;
+ // # of times that this Fst references the ith Fst
+ map<Label, size_t> outref;
+
+ ReplaceStats()
+ : nstates(0),
+ nfinal(0),
+ narcs(0),
+ nnonterms(0),
+ nref(0) {}
+ };
+
+ // Check Mutable Fsts exist o.w. create them.
+ void CheckMutableFsts();
+
+ // Computes the dependency graph of the replace Fsts.
+ // If 'stats' is true, dependency statistics computed as well.
+ void GetDependencies(bool stats) const;
+
+ void ClearDependencies() const {
+ depfst_.DeleteStates();
+ stats_.clear();
+ depprops_ = 0;
+ have_stats_ = false;
+ }
+
+ // Get topological order of dependencies. Returns false with cyclic input.
+ bool GetTopOrder(const Fst<Arc> &fst, vector<Label> *toporder) const;
+
+ // Update statistics assuming that jth Fst will be replaced.
+ void UpdateStats(Label j);
+
+ Label root_label_; // root non-terminal
+ Label root_fst_; // root Fst ID
+ bool epsilon_on_replace_; // see Replace()
+ vector<const Fst<Arc> *> fst_array_; // Fst per ID
+ vector<MutableFst<Arc> *> mutable_fst_array_; // MutableFst per ID
+ vector<Label> nonterminal_array_; // Fst ID to non-terminal
+ NonTerminalHash nonterminal_hash_; // non-terminal to Fst ID
+ mutable VectorFst<Arc> depfst_; // Fst ID dependencies
+ mutable vector<bool> depaccess_; // Fst ID accessibility
+ mutable uint64 depprops_; // dependency Fst props
+ mutable bool have_stats_; // have dependency statistics
+ mutable vector<ReplaceStats> stats_; // Per Fst statistics
+ DISALLOW_COPY_AND_ASSIGN(ReplaceUtil);
+};
+
+template <class Arc>
+ReplaceUtil<Arc>::ReplaceUtil(
+ const vector<MutableFstPair> &fst_pairs,
+ Label root_label, bool epsilon_on_replace)
+ : root_label_(root_label),
+ epsilon_on_replace_(epsilon_on_replace),
+ depprops_(0),
+ have_stats_(false) {
+ fst_array_.push_back(0);
+ mutable_fst_array_.push_back(0);
+ nonterminal_array_.push_back(kNoLabel);
+ for (Label i = 0; i < fst_pairs.size(); ++i) {
+ Label label = fst_pairs[i].first;
+ MutableFst<Arc> *fst = fst_pairs[i].second;
+ nonterminal_hash_[label] = fst_array_.size();
+ nonterminal_array_.push_back(label);
+ fst_array_.push_back(fst);
+ mutable_fst_array_.push_back(fst);
+ }
+ root_fst_ = nonterminal_hash_[root_label_];
+ if (!root_fst_)
+ FSTERROR() << "ReplaceUtil: no root FST for label: " << root_label_;
+}
+
+template <class Arc>
+ReplaceUtil<Arc>::ReplaceUtil(
+ const vector<FstPair> &fst_pairs,
+ Label root_label, bool epsilon_on_replace)
+ : root_label_(root_label),
+ epsilon_on_replace_(epsilon_on_replace),
+ depprops_(0),
+ have_stats_(false) {
+ fst_array_.push_back(0);
+ nonterminal_array_.push_back(kNoLabel);
+ for (Label i = 0; i < fst_pairs.size(); ++i) {
+ Label label = fst_pairs[i].first;
+ const Fst<Arc> *fst = fst_pairs[i].second;
+ nonterminal_hash_[label] = fst_array_.size();
+ nonterminal_array_.push_back(label);
+ fst_array_.push_back(fst->Copy());
+ }
+ root_fst_ = nonterminal_hash_[root_label];
+ if (!root_fst_)
+ FSTERROR() << "ReplaceUtil: no root FST for label: " << root_label_;
+}
+
+template <class Arc>
+ReplaceUtil<Arc>::ReplaceUtil(
+ const vector<const Fst<Arc> *> &fst_array,
+ const NonTerminalHash &nonterminal_hash, Label root_fst,
+ bool epsilon_on_replace)
+ : root_fst_(root_fst),
+ epsilon_on_replace_(epsilon_on_replace),
+ nonterminal_array_(fst_array.size()),
+ nonterminal_hash_(nonterminal_hash),
+ depprops_(0),
+ have_stats_(false) {
+ fst_array_.push_back(0);
+ for (Label i = 1; i < fst_array.size(); ++i)
+ fst_array_.push_back(fst_array[i]->Copy());
+ for (typename NonTerminalHash::const_iterator it =
+ nonterminal_hash.begin(); it != nonterminal_hash.end(); ++it)
+ nonterminal_array_[it->second] = it->first;
+ root_label_ = nonterminal_array_[root_fst_];
+}
+
+template <class Arc>
+void ReplaceUtil<Arc>::GetDependencies(bool stats) const {
+ if (depfst_.NumStates() > 0) {
+ if (stats && !have_stats_)
+ ClearDependencies();
+ else
+ return;
+ }
+
+ have_stats_ = stats;
+ if (have_stats_)
+ stats_.reserve(fst_array_.size());
+
+ for (Label i = 0; i < fst_array_.size(); ++i) {
+ depfst_.AddState();
+ depfst_.SetFinal(i, Weight::One());
+ if (have_stats_)
+ stats_.push_back(ReplaceStats());
+ }
+ depfst_.SetStart(root_fst_);
+
+ // An arc from each state (representing the fst) to the
+ // state representing the fst being replaced
+ for (Label i = 0; i < fst_array_.size(); ++i) {
+ const Fst<Arc> *ifst = fst_array_[i];
+ if (!ifst)
+ continue;
+ for (StateIterator<Fst<Arc> > siter(*ifst); !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ if (have_stats_) {
+ ++stats_[i].nstates;
+ if (ifst->Final(s) != Weight::Zero())
+ ++stats_[i].nfinal;
+ }
+ for (ArcIterator<Fst<Arc> > aiter(*ifst, s);
+ !aiter.Done(); aiter.Next()) {
+ if (have_stats_)
+ ++stats_[i].narcs;
+ const Arc& arc = aiter.Value();
+
+ typename NonTerminalHash::const_iterator it =
+ nonterminal_hash_.find(arc.olabel);
+ if (it != nonterminal_hash_.end()) {
+ Label j = it->second;
+ depfst_.AddArc(i, Arc(arc.olabel, arc.olabel, Weight::One(), j));
+ if (have_stats_) {
+ ++stats_[i].nnonterms;
+ ++stats_[j].nref;
+ ++stats_[j].inref[i];
+ ++stats_[i].outref[j];
+ }
+ }
+ }
+ }
+ }
+
+ // Gets accessibility info
+ SccVisitor<Arc> scc_visitor(0, &depaccess_, 0, &depprops_);
+ DfsVisit(depfst_, &scc_visitor);
+}
+
+template <class Arc>
+void ReplaceUtil<Arc>::UpdateStats(Label j) {
+ if (!have_stats_) {
+ FSTERROR() << "ReplaceUtil::UpdateStats: stats not available";
+ return;
+ }
+
+ if (j == root_fst_) // can't replace root
+ return;
+
+ typedef typename map<Label, size_t>::iterator Iter;
+ for (Iter in = stats_[j].inref.begin();
+ in != stats_[j].inref.end();
+ ++in) {
+ Label i = in->first;
+ size_t ni = in->second;
+ stats_[i].nstates += stats_[j].nstates * ni;
+ stats_[i].narcs += (stats_[j].narcs + 1) * ni; // narcs - 1 + 2 (eps)
+ stats_[i].nnonterms += (stats_[j].nnonterms - 1) * ni;
+ stats_[i].outref.erase(stats_[i].outref.find(j));
+ for (Iter out = stats_[j].outref.begin();
+ out != stats_[j].outref.end();
+ ++out) {
+ Label k = out->first;
+ size_t nk = out->second;
+ stats_[i].outref[k] += ni * nk;
+ }
+ }
+
+ for (Iter out = stats_[j].outref.begin();
+ out != stats_[j].outref.end();
+ ++out) {
+ Label k = out->first;
+ size_t nk = out->second;
+ stats_[k].nref -= nk;
+ stats_[k].inref.erase(stats_[k].inref.find(j));
+ for (Iter in = stats_[j].inref.begin();
+ in != stats_[j].inref.end();
+ ++in) {
+ Label i = in->first;
+ size_t ni = in->second;
+ stats_[k].inref[i] += ni * nk;
+ stats_[k].nref += ni * nk;
+ }
+ }
+}
+
+template <class Arc>
+void ReplaceUtil<Arc>::CheckMutableFsts() {
+ if (mutable_fst_array_.size() == 0) {
+ for (Label i = 0; i < fst_array_.size(); ++i) {
+ if (!fst_array_[i]) {
+ mutable_fst_array_.push_back(0);
+ } else {
+ mutable_fst_array_.push_back(new VectorFst<Arc>(*fst_array_[i]));
+ delete fst_array_[i];
+ fst_array_[i] = mutable_fst_array_[i];
+ }
+ }
+ }
+}
+
+template <class Arc>
+void ReplaceUtil<Arc>::Connect() {
+ CheckMutableFsts();
+ uint64 props = kAccessible | kCoAccessible;
+ for (Label i = 0; i < mutable_fst_array_.size(); ++i) {
+ if (!mutable_fst_array_[i])
+ continue;
+ if (mutable_fst_array_[i]->Properties(props, false) != props)
+ fst::Connect(mutable_fst_array_[i]);
+ }
+ GetDependencies(false);
+ for (Label i = 0; i < mutable_fst_array_.size(); ++i) {
+ MutableFst<Arc> *fst = mutable_fst_array_[i];
+ if (fst && !depaccess_[i]) {
+ delete fst;
+ fst_array_[i] = 0;
+ mutable_fst_array_[i] = 0;
+ }
+ }
+ ClearDependencies();
+}
+
+template <class Arc>
+bool ReplaceUtil<Arc>::GetTopOrder(const Fst<Arc> &fst,
+ vector<Label> *toporder) const {
+ // Finds topological order of dependencies.
+ vector<StateId> order;
+ bool acyclic = false;
+
+ TopOrderVisitor<Arc> top_order_visitor(&order, &acyclic);
+ DfsVisit(fst, &top_order_visitor);
+ if (!acyclic) {
+ LOG(WARNING) << "ReplaceUtil::GetTopOrder: Cyclical label dependencies";
+ return false;
+ }
+
+ toporder->resize(order.size());
+ for (Label i = 0; i < order.size(); ++i)
+ (*toporder)[order[i]] = i;
+
+ return true;
+}
+
+template <class Arc>
+void ReplaceUtil<Arc>::ReplaceLabels(const vector<Label> &labels) {
+ CheckMutableFsts();
+ unordered_set<Label> label_set;
+ for (Label i = 0; i < labels.size(); ++i)
+ if (labels[i] != root_label_) // can't replace root
+ label_set.insert(labels[i]);
+
+ // Finds Fst dependencies restricted to the labels requested.
+ GetDependencies(false);
+ VectorFst<Arc> pfst(depfst_);
+ for (StateId i = 0; i < pfst.NumStates(); ++i) {
+ vector<Arc> arcs;
+ for (ArcIterator< VectorFst<Arc> > aiter(pfst, i);
+ !aiter.Done(); aiter.Next()) {
+ const Arc &arc = aiter.Value();
+ Label label = nonterminal_array_[arc.nextstate];
+ if (label_set.count(label) > 0)
+ arcs.push_back(arc);
+ }
+ pfst.DeleteArcs(i);
+ for (size_t j = 0; j < arcs.size(); ++j)
+ pfst.AddArc(i, arcs[j]);
+ }
+
+ vector<Label> toporder;
+ if (!GetTopOrder(pfst, &toporder)) {
+ ClearDependencies();
+ return;
+ }
+
+ // Visits Fsts in reverse topological order of dependencies and
+ // performs replacements.
+ for (Label o = toporder.size() - 1; o >= 0; --o) {
+ vector<FstPair> fst_pairs;
+ StateId s = toporder[o];
+ for (ArcIterator< VectorFst<Arc> > aiter(pfst, s);
+ !aiter.Done(); aiter.Next()) {
+ const Arc &arc = aiter.Value();
+ Label label = nonterminal_array_[arc.nextstate];
+ const Fst<Arc> *fst = fst_array_[arc.nextstate];
+ fst_pairs.push_back(make_pair(label, fst));
+ }
+ if (fst_pairs.empty())
+ continue;
+ Label label = nonterminal_array_[s];
+ const Fst<Arc> *fst = fst_array_[s];
+ fst_pairs.push_back(make_pair(label, fst));
+
+ Replace(fst_pairs, mutable_fst_array_[s], label, epsilon_on_replace_);
+ }
+ ClearDependencies();
+}
+
+template <class Arc>
+void ReplaceUtil<Arc>::ReplaceBySize(size_t nstates, size_t narcs,
+ size_t nnonterms) {
+ vector<Label> labels;
+ GetDependencies(true);
+
+ vector<Label> toporder;
+ if (!GetTopOrder(depfst_, &toporder)) {
+ ClearDependencies();
+ return;
+ }
+
+ for (Label o = toporder.size() - 1; o >= 0; --o) {
+ Label j = toporder[o];
+ if (stats_[j].nstates <= nstates &&
+ stats_[j].narcs <= narcs &&
+ stats_[j].nnonterms <= nnonterms) {
+ labels.push_back(nonterminal_array_[j]);
+ UpdateStats(j);
+ }
+ }
+ ReplaceLabels(labels);
+}
+
+template <class Arc>
+void ReplaceUtil<Arc>::ReplaceByInstances(size_t ninstances) {
+ vector<Label> labels;
+ GetDependencies(true);
+
+ vector<Label> toporder;
+ if (!GetTopOrder(depfst_, &toporder)) {
+ ClearDependencies();
+ return;
+ }
+ for (Label o = 0; o < toporder.size(); ++o) {
+ Label j = toporder[o];
+ if (stats_[j].nref <= ninstances) {
+ labels.push_back(nonterminal_array_[j]);
+ UpdateStats(j);
+ }
+ }
+ ReplaceLabels(labels);
+}
+
+template <class Arc>
+void ReplaceUtil<Arc>::GetFstPairs(vector<FstPair> *fst_pairs) {
+ CheckMutableFsts();
+ fst_pairs->clear();
+ for (Label i = 0; i < fst_array_.size(); ++i) {
+ Label label = nonterminal_array_[i];
+ const Fst<Arc> *fst = fst_array_[i];
+ if (!fst)
+ continue;
+ fst_pairs->push_back(make_pair(label, fst));
+ }
+}
+
+template <class Arc>
+void ReplaceUtil<Arc>::GetMutableFstPairs(
+ vector<MutableFstPair> *mutable_fst_pairs) {
+ CheckMutableFsts();
+ mutable_fst_pairs->clear();
+ for (Label i = 0; i < mutable_fst_array_.size(); ++i) {
+ Label label = nonterminal_array_[i];
+ MutableFst<Arc> *fst = mutable_fst_array_[i];
+ if (!fst)
+ continue;
+ mutable_fst_pairs->push_back(make_pair(label, fst->Copy()));
+ }
+}
+
+} // namespace fst
+
+#endif // FST_LIB_REPLACE_UTIL_H__
diff --git a/src/include/fst/replace.h b/src/include/fst/replace.h
new file mode 100644
index 0000000..d08c0ea
--- /dev/null
+++ b/src/include/fst/replace.h
@@ -0,0 +1,1453 @@
+// replace.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: johans@google.com (Johan Schalkwyk)
+//
+// \file
+// Functions and classes for the recursive replacement of Fsts.
+//
+
+#ifndef FST_LIB_REPLACE_H__
+#define FST_LIB_REPLACE_H__
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <set>
+#include <string>
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+#include <fst/expanded-fst.h>
+#include <fst/fst.h>
+#include <fst/matcher.h>
+#include <fst/replace-util.h>
+#include <fst/state-table.h>
+#include <fst/test-properties.h>
+
+namespace fst {
+
+//
+// REPLACE STATE TUPLES AND TABLES
+//
+// The replace state table has the form
+//
+// template <class A, class P>
+// class ReplaceStateTable {
+// public:
+// typedef A Arc;
+// typedef P PrefixId;
+// typedef typename A::StateId StateId;
+// typedef ReplaceStateTuple<StateId, PrefixId> StateTuple;
+// typedef typename A::Label Label;
+//
+// // Required constuctor
+// ReplaceStateTable(const vector<pair<Label, const Fst<A>*> > &fst_tuples,
+// Label root);
+//
+// // Required copy constructor that does not copy state
+// ReplaceStateTable(const ReplaceStateTable<A,P> &table);
+//
+// // Lookup state ID by tuple. If it doesn't exist, then add it.
+// StateId FindState(const StateTuple &tuple);
+//
+// // Lookup state tuple by ID.
+// const StateTuple &Tuple(StateId id) const;
+// };
+
+
+// \struct ReplaceStateTuple
+// \brief Tuple of information that uniquely defines a state in replace
+template <class S, class P>
+struct ReplaceStateTuple {
+ typedef S StateId;
+ typedef P PrefixId;
+
+ ReplaceStateTuple()
+ : prefix_id(-1), fst_id(kNoStateId), fst_state(kNoStateId) {}
+
+ ReplaceStateTuple(PrefixId p, StateId f, StateId s)
+ : prefix_id(p), fst_id(f), fst_state(s) {}
+
+ PrefixId prefix_id; // index in prefix table
+ StateId fst_id; // current fst being walked
+ StateId fst_state; // current state in fst being walked, not to be
+ // confused with the state_id of the combined fst
+};
+
+
+// Equality of replace state tuples.
+template <class S, class P>
+inline bool operator==(const ReplaceStateTuple<S, P>& x,
+ const ReplaceStateTuple<S, P>& y) {
+ return x.prefix_id == y.prefix_id &&
+ x.fst_id == y.fst_id &&
+ x.fst_state == y.fst_state;
+}
+
+
+// \class ReplaceRootSelector
+// Functor returning true for tuples corresponding to states in the root FST
+template <class S, class P>
+class ReplaceRootSelector {
+ public:
+ bool operator()(const ReplaceStateTuple<S, P> &tuple) const {
+ return tuple.prefix_id == 0;
+ }
+};
+
+
+// \class ReplaceFingerprint
+// Fingerprint for general replace state tuples.
+template <class S, class P>
+class ReplaceFingerprint {
+ public:
+ ReplaceFingerprint(const vector<uint64> *size_array)
+ : cumulative_size_array_(size_array) {}
+
+ uint64 operator()(const ReplaceStateTuple<S, P> &tuple) const {
+ return tuple.prefix_id * (cumulative_size_array_->back()) +
+ cumulative_size_array_->at(tuple.fst_id - 1) +
+ tuple.fst_state;
+ }
+
+ private:
+ const vector<uint64> *cumulative_size_array_;
+};
+
+
+// \class ReplaceFstStateFingerprint
+// Useful when the fst_state uniquely define the tuple.
+template <class S, class P>
+class ReplaceFstStateFingerprint {
+ public:
+ uint64 operator()(const ReplaceStateTuple<S, P>& tuple) const {
+ return tuple.fst_state;
+ }
+};
+
+
+// \class ReplaceHash
+// A generic hash function for replace state tuples.
+template <typename S, typename P>
+class ReplaceHash {
+ public:
+ size_t operator()(const ReplaceStateTuple<S, P>& t) const {
+ return t.prefix_id + t.fst_id * kPrime0 + t.fst_state * kPrime1;
+ }
+ private:
+ static const size_t kPrime0;
+ static const size_t kPrime1;
+};
+
+template <typename S, typename P>
+const size_t ReplaceHash<S, P>::kPrime0 = 7853;
+
+template <typename S, typename P>
+const size_t ReplaceHash<S, P>::kPrime1 = 7867;
+
+template <class A, class T> class ReplaceFstMatcher;
+
+
+// \class VectorHashReplaceStateTable
+// A two-level state table for replace.
+// Warning: calls CountStates to compute the number of states of each
+// component Fst.
+template <class A, class P = ssize_t>
+class VectorHashReplaceStateTable {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef P PrefixId;
+ typedef ReplaceStateTuple<StateId, P> StateTuple;
+ typedef VectorHashStateTable<ReplaceStateTuple<StateId, P>,
+ ReplaceRootSelector<StateId, P>,
+ ReplaceFstStateFingerprint<StateId, P>,
+ ReplaceFingerprint<StateId, P> > StateTable;
+
+ VectorHashReplaceStateTable(
+ const vector<pair<Label, const Fst<A>*> > &fst_tuples,
+ Label root) : root_size_(0) {
+ cumulative_size_array_.push_back(0);
+ for (size_t i = 0; i < fst_tuples.size(); ++i) {
+ if (fst_tuples[i].first == root) {
+ root_size_ = CountStates(*(fst_tuples[i].second));
+ cumulative_size_array_.push_back(cumulative_size_array_.back());
+ } else {
+ cumulative_size_array_.push_back(cumulative_size_array_.back() +
+ CountStates(*(fst_tuples[i].second)));
+ }
+ }
+ state_table_ = new StateTable(
+ new ReplaceRootSelector<StateId, P>,
+ new ReplaceFstStateFingerprint<StateId, P>,
+ new ReplaceFingerprint<StateId, P>(&cumulative_size_array_),
+ root_size_,
+ root_size_ + cumulative_size_array_.back());
+ }
+
+ VectorHashReplaceStateTable(const VectorHashReplaceStateTable<A, P> &table)
+ : root_size_(table.root_size_),
+ cumulative_size_array_(table.cumulative_size_array_) {
+ state_table_ = new StateTable(
+ new ReplaceRootSelector<StateId, P>,
+ new ReplaceFstStateFingerprint<StateId, P>,
+ new ReplaceFingerprint<StateId, P>(&cumulative_size_array_),
+ root_size_,
+ root_size_ + cumulative_size_array_.back());
+ }
+
+ ~VectorHashReplaceStateTable() {
+ delete state_table_;
+ }
+
+ StateId FindState(const StateTuple &tuple) {
+ return state_table_->FindState(tuple);
+ }
+
+ const StateTuple &Tuple(StateId id) const {
+ return state_table_->Tuple(id);
+ }
+
+ private:
+ StateId root_size_;
+ vector<uint64> cumulative_size_array_;
+ StateTable *state_table_;
+};
+
+
+// \class DefaultReplaceStateTable
+// Default replace state table
+template <class A, class P = ssize_t>
+class DefaultReplaceStateTable : public CompactHashStateTable<
+ ReplaceStateTuple<typename A::StateId, P>,
+ ReplaceHash<typename A::StateId, P> > {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef P PrefixId;
+ typedef ReplaceStateTuple<StateId, P> StateTuple;
+ typedef CompactHashStateTable<StateTuple,
+ ReplaceHash<StateId, PrefixId> > StateTable;
+
+ using StateTable::FindState;
+ using StateTable::Tuple;
+
+ DefaultReplaceStateTable(
+ const vector<pair<Label, const Fst<A>*> > &fst_tuples,
+ Label root) {}
+
+ DefaultReplaceStateTable(const DefaultReplaceStateTable<A, P> &table)
+ : StateTable() {}
+};
+
+//
+// REPLACE FST CLASS
+//
+
+// By default ReplaceFst will copy the input label of the 'replace arc'.
+// For acceptors we do not want this behaviour. Instead we need to
+// create an epsilon arc when recursing into the appropriate Fst.
+// The 'epsilon_on_replace' option can be used to toggle this behaviour.
+template <class A, class T = DefaultReplaceStateTable<A> >
+struct ReplaceFstOptions : CacheOptions {
+ int64 root; // root rule for expansion
+ bool epsilon_on_replace;
+ bool take_ownership; // take ownership of input Fst(s)
+ T* state_table;
+
+ ReplaceFstOptions(const CacheOptions &opts, int64 r)
+ : CacheOptions(opts),
+ root(r),
+ epsilon_on_replace(false),
+ take_ownership(false),
+ state_table(0) {}
+ explicit ReplaceFstOptions(int64 r)
+ : root(r),
+ epsilon_on_replace(false),
+ take_ownership(false),
+ state_table(0) {}
+ ReplaceFstOptions(int64 r, bool epsilon_replace_arc)
+ : root(r),
+ epsilon_on_replace(epsilon_replace_arc),
+ take_ownership(false),
+ state_table(0) {}
+ ReplaceFstOptions()
+ : root(kNoLabel),
+ epsilon_on_replace(false),
+ take_ownership(false),
+ state_table(0) {}
+};
+
+
+// \class ReplaceFstImpl
+// \brief Implementation class for replace class Fst
+//
+// The replace implementation class supports a dynamic
+// expansion of a recursive transition network represented as Fst
+// with dynamic replacable arcs.
+//
+template <class A, class T>
+class ReplaceFstImpl : public CacheImpl<A> {
+ friend class ReplaceFstMatcher<A, T>;
+
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::WriteHeader;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+ using FstImpl<A>::InputSymbols;
+ using FstImpl<A>::OutputSymbols;
+
+ using CacheImpl<A>::PushArc;
+ using CacheImpl<A>::HasArcs;
+ using CacheImpl<A>::HasFinal;
+ using CacheImpl<A>::HasStart;
+ using CacheImpl<A>::SetArcs;
+ using CacheImpl<A>::SetFinal;
+ using CacheImpl<A>::SetStart;
+
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+ typedef A Arc;
+ typedef unordered_map<Label, Label> NonTerminalHash;
+
+ typedef T StateTable;
+ typedef typename T::PrefixId PrefixId;
+ typedef ReplaceStateTuple<StateId, PrefixId> StateTuple;
+
+ // constructor for replace class implementation.
+ // \param fst_tuples array of label/fst tuples, one for each non-terminal
+ ReplaceFstImpl(const vector< pair<Label, const Fst<A>* > >& fst_tuples,
+ const ReplaceFstOptions<A, T> &opts)
+ : CacheImpl<A>(opts),
+ epsilon_on_replace_(opts.epsilon_on_replace),
+ state_table_(opts.state_table ? opts.state_table :
+ new StateTable(fst_tuples, opts.root)) {
+
+ SetType("replace");
+
+ if (fst_tuples.size() > 0) {
+ SetInputSymbols(fst_tuples[0].second->InputSymbols());
+ SetOutputSymbols(fst_tuples[0].second->OutputSymbols());
+ }
+
+ bool all_negative = true; // all nonterminals are negative?
+ bool dense_range = true; // all nonterminals are positive
+ // and form a dense range containing 1?
+ for (size_t i = 0; i < fst_tuples.size(); ++i) {
+ Label nonterminal = fst_tuples[i].first;
+ if (nonterminal >= 0)
+ all_negative = false;
+ if (nonterminal > fst_tuples.size() || nonterminal <= 0)
+ dense_range = false;
+ }
+
+ vector<uint64> inprops;
+ bool all_ilabel_sorted = true;
+ bool all_olabel_sorted = true;
+ bool all_non_empty = true;
+ fst_array_.push_back(0);
+ for (size_t i = 0; i < fst_tuples.size(); ++i) {
+ Label label = fst_tuples[i].first;
+ const Fst<A> *fst = fst_tuples[i].second;
+ nonterminal_hash_[label] = fst_array_.size();
+ nonterminal_set_.insert(label);
+ fst_array_.push_back(opts.take_ownership ? fst : fst->Copy());
+ if (fst->Start() == kNoStateId)
+ all_non_empty = false;
+ if(!fst->Properties(kILabelSorted, false))
+ all_ilabel_sorted = false;
+ if(!fst->Properties(kOLabelSorted, false))
+ all_olabel_sorted = false;
+ inprops.push_back(fst->Properties(kCopyProperties, false));
+ if (i) {
+ if (!CompatSymbols(InputSymbols(), fst->InputSymbols())) {
+ FSTERROR() << "ReplaceFstImpl: input symbols of Fst " << i
+ << " does not match input symbols of base Fst (0'th fst)";
+ SetProperties(kError, kError);
+ }
+ if (!CompatSymbols(OutputSymbols(), fst->OutputSymbols())) {
+ FSTERROR() << "ReplaceFstImpl: output symbols of Fst " << i
+ << " does not match output symbols of base Fst "
+ << "(0'th fst)";
+ SetProperties(kError, kError);
+ }
+ }
+ }
+ Label nonterminal = nonterminal_hash_[opts.root];
+ if ((nonterminal == 0) && (fst_array_.size() > 1)) {
+ FSTERROR() << "ReplaceFstImpl: no Fst corresponding to root label '"
+ << opts.root << "' in the input tuple vector";
+ SetProperties(kError, kError);
+ }
+ root_ = (nonterminal > 0) ? nonterminal : 1;
+
+ SetProperties(ReplaceProperties(inprops, root_ - 1, epsilon_on_replace_,
+ all_non_empty));
+ // We assume that all terminals are positive. The resulting
+ // ReplaceFst is known to be kILabelSorted when all sub-FSTs are
+ // kILabelSorted and one of the 3 following conditions is satisfied:
+ // 1. 'epsilon_on_replace' is false, or
+ // 2. all non-terminals are negative, or
+ // 3. all non-terninals are positive and form a dense range containing 1.
+ if (all_ilabel_sorted &&
+ (!epsilon_on_replace_ || all_negative || dense_range))
+ SetProperties(kILabelSorted, kILabelSorted);
+ // Similarly, the resulting ReplaceFst is known to be
+ // kOLabelSorted when all sub-FSTs are kOLabelSorted and one of
+ // the 2 following conditions is satisfied:
+ // 1. all non-terminals are negative, or
+ // 2. all non-terninals are positive and form a dense range containing 1.
+ if (all_olabel_sorted && (all_negative || dense_range))
+ SetProperties(kOLabelSorted, kOLabelSorted);
+
+ // Enable optional caching as long as sorted and all non empty.
+ if (Properties(kILabelSorted | kOLabelSorted) && all_non_empty)
+ always_cache_ = false;
+ else
+ always_cache_ = true;
+ VLOG(2) << "ReplaceFstImpl::ReplaceFstImpl: always_cache = "
+ << (always_cache_ ? "true" : "false");
+ }
+
+ ReplaceFstImpl(const ReplaceFstImpl& impl)
+ : CacheImpl<A>(impl),
+ epsilon_on_replace_(impl.epsilon_on_replace_),
+ always_cache_(impl.always_cache_),
+ state_table_(new StateTable(*(impl.state_table_))),
+ nonterminal_set_(impl.nonterminal_set_),
+ nonterminal_hash_(impl.nonterminal_hash_),
+ root_(impl.root_) {
+ SetType("replace");
+ SetProperties(impl.Properties(), kCopyProperties);
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ fst_array_.reserve(impl.fst_array_.size());
+ fst_array_.push_back(0);
+ for (size_t i = 1; i < impl.fst_array_.size(); ++i) {
+ fst_array_.push_back(impl.fst_array_[i]->Copy(true));
+ }
+ }
+
+ ~ReplaceFstImpl() {
+ VLOG(2) << "~ReplaceFstImpl: gc = "
+ << (CacheImpl<A>::GetCacheGc() ? "true" : "false")
+ << ", gc_size = " << CacheImpl<A>::GetCacheSize()
+ << ", gc_limit = " << CacheImpl<A>::GetCacheLimit();
+
+ delete state_table_;
+ for (size_t i = 1; i < fst_array_.size(); ++i) {
+ delete fst_array_[i];
+ }
+ }
+
+ // Computes the dependency graph of the replace class and returns
+ // true if the dependencies are cyclic. Cyclic dependencies will result
+ // in an un-expandable replace fst.
+ bool CyclicDependencies() const {
+ ReplaceUtil<A> replace_util(fst_array_, nonterminal_hash_, root_);
+ return replace_util.CyclicDependencies();
+ }
+
+ // Return or compute start state of replace fst
+ StateId Start() {
+ if (!HasStart()) {
+ if (fst_array_.size() == 1) { // no fsts defined for replace
+ SetStart(kNoStateId);
+ return kNoStateId;
+ } else {
+ const Fst<A>* fst = fst_array_[root_];
+ StateId fst_start = fst->Start();
+ if (fst_start == kNoStateId) // root Fst is empty
+ return kNoStateId;
+
+ PrefixId prefix = GetPrefixId(StackPrefix());
+ StateId start = state_table_->FindState(
+ StateTuple(prefix, root_, fst_start));
+ SetStart(start);
+ return start;
+ }
+ } else {
+ return CacheImpl<A>::Start();
+ }
+ }
+
+ // return final weight of state (kInfWeight means state is not final)
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ const StateTuple& tuple = state_table_->Tuple(s);
+ const StackPrefix& stack = stackprefix_array_[tuple.prefix_id];
+ const Fst<A>* fst = fst_array_[tuple.fst_id];
+ StateId fst_state = tuple.fst_state;
+
+ if (fst->Final(fst_state) != Weight::Zero() && stack.Depth() == 0)
+ SetFinal(s, fst->Final(fst_state));
+ else
+ SetFinal(s, Weight::Zero());
+ }
+ return CacheImpl<A>::Final(s);
+ }
+
+ size_t NumArcs(StateId s) {
+ if (HasArcs(s)) { // If state cached, use the cached value.
+ return CacheImpl<A>::NumArcs(s);
+ } else if (always_cache_) { // If always caching, expand and cache state.
+ Expand(s);
+ return CacheImpl<A>::NumArcs(s);
+ } else { // Otherwise compute the number of arcs without expanding.
+ StateTuple tuple = state_table_->Tuple(s);
+ if (tuple.fst_state == kNoStateId)
+ return 0;
+
+ const Fst<A>* fst = fst_array_[tuple.fst_id];
+ size_t num_arcs = fst->NumArcs(tuple.fst_state);
+ if (ComputeFinalArc(tuple, 0))
+ num_arcs++;
+
+ return num_arcs;
+ }
+ }
+
+ // Returns whether a given label is a non terminal
+ bool IsNonTerminal(Label l) const {
+ // TODO(allauzen): be smarter and take advantage of
+ // all_dense or all_negative.
+ // Use also in ComputeArc, this would require changes to replace
+ // so that recursing into an empty fst lead to a non co-accessible
+ // state instead of deleting the arc as done currently.
+ // Current use correct, since i/olabel sorted iff all_non_empty.
+ typename NonTerminalHash::const_iterator it =
+ nonterminal_hash_.find(l);
+ return it != nonterminal_hash_.end();
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (HasArcs(s)) {
+ // If state cached, use the cached value.
+ return CacheImpl<A>::NumInputEpsilons(s);
+ } else if (always_cache_ || !Properties(kILabelSorted)) {
+ // If always caching or if the number of input epsilons is too expensive
+ // to compute without caching (i.e. not ilabel sorted),
+ // then expand and cache state.
+ Expand(s);
+ return CacheImpl<A>::NumInputEpsilons(s);
+ } else {
+ // Otherwise, compute the number of input epsilons without caching.
+ StateTuple tuple = state_table_->Tuple(s);
+ if (tuple.fst_state == kNoStateId)
+ return 0;
+ const Fst<A>* fst = fst_array_[tuple.fst_id];
+ size_t num = 0;
+ if (!epsilon_on_replace_) {
+ // If epsilon_on_replace is false, all input epsilon arcs
+ // are also input epsilons arcs in the underlying machine.
+ fst->NumInputEpsilons(tuple.fst_state);
+ } else {
+ // Otherwise, one need to consider that all non-terminal arcs
+ // in the underlying machine also become input epsilon arc.
+ ArcIterator<Fst<A> > aiter(*fst, tuple.fst_state);
+ for (; !aiter.Done() &&
+ ((aiter.Value().ilabel == 0) ||
+ IsNonTerminal(aiter.Value().olabel));
+ aiter.Next())
+ ++num;
+ }
+ if (ComputeFinalArc(tuple, 0))
+ num++;
+ return num;
+ }
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (HasArcs(s)) {
+ // If state cached, use the cached value.
+ return CacheImpl<A>::NumOutputEpsilons(s);
+ } else if(always_cache_ || !Properties(kOLabelSorted)) {
+ // If always caching or if the number of output epsilons is too expensive
+ // to compute without caching (i.e. not olabel sorted),
+ // then expand and cache state.
+ Expand(s);
+ return CacheImpl<A>::NumOutputEpsilons(s);
+ } else {
+ // Otherwise, compute the number of output epsilons without caching.
+ StateTuple tuple = state_table_->Tuple(s);
+ if (tuple.fst_state == kNoStateId)
+ return 0;
+ const Fst<A>* fst = fst_array_[tuple.fst_id];
+ size_t num = 0;
+ ArcIterator<Fst<A> > aiter(*fst, tuple.fst_state);
+ for (; !aiter.Done() &&
+ ((aiter.Value().olabel == 0) ||
+ IsNonTerminal(aiter.Value().olabel));
+ aiter.Next())
+ ++num;
+ if (ComputeFinalArc(tuple, 0))
+ num++;
+ return num;
+ }
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if (mask & kError) {
+ for (size_t i = 1; i < fst_array_.size(); ++i) {
+ if (fst_array_[i]->Properties(kError, false))
+ SetProperties(kError, kError);
+ }
+ }
+ return FstImpl<Arc>::Properties(mask);
+ }
+
+ // return the base arc iterator, if arcs have not been computed yet,
+ // extend/recurse for new arcs.
+ void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
+ if (!HasArcs(s))
+ Expand(s);
+ CacheImpl<A>::InitArcIterator(s, data);
+ // TODO(allauzen): Set behaviour of generic iterator
+ // Warning: ArcIterator<ReplaceFst<A> >::InitCache()
+ // relies on current behaviour.
+ }
+
+
+ // Extend current state (walk arcs one level deep)
+ void Expand(StateId s) {
+ StateTuple tuple = state_table_->Tuple(s);
+
+ // If local fst is empty
+ if (tuple.fst_state == kNoStateId) {
+ SetArcs(s);
+ return;
+ }
+
+ ArcIterator< Fst<A> > aiter(
+ *(fst_array_[tuple.fst_id]), tuple.fst_state);
+ Arc arc;
+
+ // Create a final arc when needed
+ if (ComputeFinalArc(tuple, &arc))
+ PushArc(s, arc);
+
+ // Expand all arcs leaving the state
+ for (;!aiter.Done(); aiter.Next()) {
+ if (ComputeArc(tuple, aiter.Value(), &arc))
+ PushArc(s, arc);
+ }
+
+ SetArcs(s);
+ }
+
+ void Expand(StateId s, const StateTuple &tuple,
+ const ArcIteratorData<A> &data) {
+ // If local fst is empty
+ if (tuple.fst_state == kNoStateId) {
+ SetArcs(s);
+ return;
+ }
+
+ ArcIterator< Fst<A> > aiter(data);
+ Arc arc;
+
+ // Create a final arc when needed
+ if (ComputeFinalArc(tuple, &arc))
+ AddArc(s, arc);
+
+ // Expand all arcs leaving the state
+ for (; !aiter.Done(); aiter.Next()) {
+ if (ComputeArc(tuple, aiter.Value(), &arc))
+ AddArc(s, arc);
+ }
+
+ SetArcs(s);
+ }
+
+ // If arcp == 0, only returns if a final arc is required, does not
+ // actually compute it.
+ bool ComputeFinalArc(const StateTuple &tuple, A* arcp,
+ uint32 flags = kArcValueFlags) {
+ const Fst<A>* fst = fst_array_[tuple.fst_id];
+ StateId fst_state = tuple.fst_state;
+ if (fst_state == kNoStateId)
+ return false;
+
+ // if state is final, pop up stack
+ const StackPrefix& stack = stackprefix_array_[tuple.prefix_id];
+ if (fst->Final(fst_state) != Weight::Zero() && stack.Depth()) {
+ if (arcp) {
+ arcp->ilabel = 0;
+ arcp->olabel = 0;
+ if (flags & kArcNextStateValue) {
+ PrefixId prefix_id = PopPrefix(stack);
+ const PrefixTuple& top = stack.Top();
+ arcp->nextstate = state_table_->FindState(
+ StateTuple(prefix_id, top.fst_id, top.nextstate));
+ }
+ if (flags & kArcWeightValue)
+ arcp->weight = fst->Final(fst_state);
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ // Compute the arc in the replace fst corresponding to a given
+ // in the underlying machine. Returns false if the underlying arc
+ // corresponds to no arc in the replace.
+ bool ComputeArc(const StateTuple &tuple, const A &arc, A* arcp,
+ uint32 flags = kArcValueFlags) {
+ if (!epsilon_on_replace_ &&
+ (flags == (flags & (kArcILabelValue | kArcWeightValue)))) {
+ *arcp = arc;
+ return true;
+ }
+
+ if (arc.olabel == 0) { // expand local fst
+ StateId nextstate = flags & kArcNextStateValue
+ ? state_table_->FindState(
+ StateTuple(tuple.prefix_id, tuple.fst_id, arc.nextstate))
+ : kNoStateId;
+ *arcp = A(arc.ilabel, arc.olabel, arc.weight, nextstate);
+ } else {
+ // check for non terminal
+ typename NonTerminalHash::const_iterator it =
+ nonterminal_hash_.find(arc.olabel);
+ if (it != nonterminal_hash_.end()) { // recurse into non terminal
+ Label nonterminal = it->second;
+ const Fst<A>* nt_fst = fst_array_[nonterminal];
+ PrefixId nt_prefix = PushPrefix(stackprefix_array_[tuple.prefix_id],
+ tuple.fst_id, arc.nextstate);
+
+ // if start state is valid replace, else arc is implicitly
+ // deleted
+ StateId nt_start = nt_fst->Start();
+ if (nt_start != kNoStateId) {
+ StateId nt_nextstate = flags & kArcNextStateValue
+ ? state_table_->FindState(
+ StateTuple(nt_prefix, nonterminal, nt_start))
+ : kNoStateId;
+ Label ilabel = (epsilon_on_replace_) ? 0 : arc.ilabel;
+ *arcp = A(ilabel, 0, arc.weight, nt_nextstate);
+ } else {
+ return false;
+ }
+ } else {
+ StateId nextstate = flags & kArcNextStateValue
+ ? state_table_->FindState(
+ StateTuple(tuple.prefix_id, tuple.fst_id, arc.nextstate))
+ : kNoStateId;
+ *arcp = A(arc.ilabel, arc.olabel, arc.weight, nextstate);
+ }
+ }
+ return true;
+ }
+
+ // Returns the arc iterator flags supported by this Fst.
+ uint32 ArcIteratorFlags() const {
+ uint32 flags = kArcValueFlags;
+ if (!always_cache_)
+ flags |= kArcNoCache;
+ return flags;
+ }
+
+ T* GetStateTable() const {
+ return state_table_;
+ }
+
+ const Fst<A>* GetFst(Label fst_id) const {
+ return fst_array_[fst_id];
+ }
+
+ bool EpsilonOnReplace() const { return epsilon_on_replace_; }
+
+ // private helper classes
+ private:
+ static const size_t kPrime0;
+
+ // \class PrefixTuple
+ // \brief Tuple of fst_id and destination state (entry in stack prefix)
+ struct PrefixTuple {
+ PrefixTuple(Label f, StateId s) : fst_id(f), nextstate(s) {}
+
+ Label fst_id;
+ StateId nextstate;
+ };
+
+ // \class StackPrefix
+ // \brief Container for stack prefix.
+ class StackPrefix {
+ public:
+ StackPrefix() {}
+
+ // copy constructor
+ StackPrefix(const StackPrefix& x) :
+ prefix_(x.prefix_) {
+ }
+
+ void Push(StateId fst_id, StateId nextstate) {
+ prefix_.push_back(PrefixTuple(fst_id, nextstate));
+ }
+
+ void Pop() {
+ prefix_.pop_back();
+ }
+
+ const PrefixTuple& Top() const {
+ return prefix_[prefix_.size()-1];
+ }
+
+ size_t Depth() const {
+ return prefix_.size();
+ }
+
+ public:
+ vector<PrefixTuple> prefix_;
+ };
+
+
+ // \class StackPrefixEqual
+ // \brief Compare two stack prefix classes for equality
+ class StackPrefixEqual {
+ public:
+ bool operator()(const StackPrefix& x, const StackPrefix& y) const {
+ if (x.prefix_.size() != y.prefix_.size()) return false;
+ for (size_t i = 0; i < x.prefix_.size(); ++i) {
+ if (x.prefix_[i].fst_id != y.prefix_[i].fst_id ||
+ x.prefix_[i].nextstate != y.prefix_[i].nextstate) return false;
+ }
+ return true;
+ }
+ };
+
+ //
+ // \class StackPrefixKey
+ // \brief Hash function for stack prefix to prefix id
+ class StackPrefixKey {
+ public:
+ size_t operator()(const StackPrefix& x) const {
+ size_t sum = 0;
+ for (size_t i = 0; i < x.prefix_.size(); ++i) {
+ sum += x.prefix_[i].fst_id + x.prefix_[i].nextstate*kPrime0;
+ }
+ return sum;
+ }
+ };
+
+ typedef unordered_map<StackPrefix, PrefixId, StackPrefixKey, StackPrefixEqual>
+ StackPrefixHash;
+
+ // private methods
+ private:
+ // hash stack prefix (return unique index into stackprefix array)
+ PrefixId GetPrefixId(const StackPrefix& prefix) {
+ typename StackPrefixHash::iterator it = prefix_hash_.find(prefix);
+ if (it == prefix_hash_.end()) {
+ PrefixId prefix_id = stackprefix_array_.size();
+ stackprefix_array_.push_back(prefix);
+ prefix_hash_[prefix] = prefix_id;
+ return prefix_id;
+ } else {
+ return it->second;
+ }
+ }
+
+ // prefix id after a stack pop
+ PrefixId PopPrefix(StackPrefix prefix) {
+ prefix.Pop();
+ return GetPrefixId(prefix);
+ }
+
+ // prefix id after a stack push
+ PrefixId PushPrefix(StackPrefix prefix, Label fst_id, StateId nextstate) {
+ prefix.Push(fst_id, nextstate);
+ return GetPrefixId(prefix);
+ }
+
+
+ // private data
+ private:
+ // runtime options
+ bool epsilon_on_replace_;
+ bool always_cache_; // Optionally caching arc iterator disabled when true
+
+ // state table
+ StateTable *state_table_;
+
+ // cross index of unique stack prefix
+ // could potentially have one copy of prefix array
+ StackPrefixHash prefix_hash_;
+ vector<StackPrefix> stackprefix_array_;
+
+ set<Label> nonterminal_set_;
+ NonTerminalHash nonterminal_hash_;
+ vector<const Fst<A>*> fst_array_;
+ Label root_;
+
+ void operator=(const ReplaceFstImpl<A, T> &); // disallow
+};
+
+
+template <class A, class T>
+const size_t ReplaceFstImpl<A, T>::kPrime0 = 7853;
+
+//
+// \class ReplaceFst
+// \brief Recursivively replaces arcs in the root Fst with other Fsts.
+// This version is a delayed Fst.
+//
+// ReplaceFst supports dynamic replacement of arcs in one Fst with
+// another Fst. This replacement is recursive. ReplaceFst can be used
+// to support a variety of delayed constructions such as recursive
+// transition networks, union, or closure. It is constructed with an
+// array of Fst(s). One Fst represents the root (or topology)
+// machine. The root Fst refers to other Fsts by recursively replacing
+// arcs labeled as non-terminals with the matching non-terminal
+// Fst. Currently the ReplaceFst uses the output symbols of the arcs
+// to determine whether the arc is a non-terminal arc or not. A
+// non-terminal can be any label that is not a non-zero terminal label
+// in the output alphabet.
+//
+// Note that the constructor uses a vector of pair<>. These correspond
+// to the tuple of non-terminal Label and corresponding Fst. For example
+// to implement the closure operation we need 2 Fsts. The first root
+// Fst is a single Arc on the start State that self loops, it references
+// the particular machine for which we are performing the closure operation.
+//
+// The ReplaceFst class supports an optionally caching arc iterator:
+// ArcIterator< ReplaceFst<A> >
+// The ReplaceFst need to be built such that it is known to be ilabel
+// or olabel sorted (see usage below).
+//
+// Observe that Matcher<Fst<A> > will use the optionally caching arc
+// iterator when available (Fst is ilabel sorted and matching on the
+// input, or Fst is olabel sorted and matching on the output).
+// In order to obtain the most efficient behaviour, it is recommended
+// to set 'epsilon_on_replace' to false (this means constructing acceptors
+// as transducers with epsilons on the input side of nonterminal arcs)
+// and matching on the input side.
+//
+// This class attaches interface to implementation and handles
+// reference counting, delegating most methods to ImplToFst.
+template <class A, class T = DefaultReplaceStateTable<A> >
+class ReplaceFst : public ImplToFst< ReplaceFstImpl<A, T> > {
+ public:
+ friend class ArcIterator< ReplaceFst<A, T> >;
+ friend class StateIterator< ReplaceFst<A, T> >;
+ friend class ReplaceFstMatcher<A, T>;
+
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+ typedef ReplaceFstImpl<A, T> Impl;
+
+ using ImplToFst<Impl>::Properties;
+
+ ReplaceFst(const vector<pair<Label, const Fst<A>* > >& fst_array,
+ Label root)
+ : ImplToFst<Impl>(new Impl(fst_array, ReplaceFstOptions<A, T>(root))) {}
+
+ ReplaceFst(const vector<pair<Label, const Fst<A>* > >& fst_array,
+ const ReplaceFstOptions<A, T> &opts)
+ : ImplToFst<Impl>(new Impl(fst_array, opts)) {}
+
+ // See Fst<>::Copy() for doc.
+ ReplaceFst(const ReplaceFst<A, T>& fst, bool safe = false)
+ : ImplToFst<Impl>(fst, safe) {}
+
+ // Get a copy of this ReplaceFst. See Fst<>::Copy() for further doc.
+ virtual ReplaceFst<A, T> *Copy(bool safe = false) const {
+ return new ReplaceFst<A, T>(*this, safe);
+ }
+
+ virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ virtual MatcherBase<A> *InitMatcher(MatchType match_type) const {
+ if ((GetImpl()->ArcIteratorFlags() & kArcNoCache) &&
+ ((match_type == MATCH_INPUT && Properties(kILabelSorted, false)) ||
+ (match_type == MATCH_OUTPUT && Properties(kOLabelSorted, false)))) {
+ return new ReplaceFstMatcher<A, T>(*this, match_type);
+ }
+ else {
+ VLOG(2) << "Not using replace matcher";
+ return 0;
+ }
+ }
+
+ bool CyclicDependencies() const {
+ return GetImpl()->CyclicDependencies();
+ }
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const ReplaceFst<A> &fst); // disallow
+};
+
+
+// Specialization for ReplaceFst.
+template<class A, class T>
+class StateIterator< ReplaceFst<A, T> >
+ : public CacheStateIterator< ReplaceFst<A, T> > {
+ public:
+ explicit StateIterator(const ReplaceFst<A, T> &fst)
+ : CacheStateIterator< ReplaceFst<A, T> >(fst, fst.GetImpl()) {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(StateIterator);
+};
+
+
+// Specialization for ReplaceFst.
+// Implements optional caching. It can be used as follows:
+//
+// ReplaceFst<A> replace;
+// ArcIterator< ReplaceFst<A> > aiter(replace, s);
+// // Note: ArcIterator< Fst<A> > is always a caching arc iterator.
+// aiter.SetFlags(kArcNoCache, kArcNoCache);
+// // Use the arc iterator, no arc will be cached, no state will be expanded.
+// // The varied 'kArcValueFlags' can be used to decide which part
+// // of arc values needs to be computed.
+// aiter.SetFlags(kArcILabelValue, kArcValueFlags);
+// // Only want the ilabel for this arc
+// aiter.Value(); // Does not compute the destination state.
+// aiter.Next();
+// aiter.SetFlags(kArcNextStateValue, kArcNextStateValue);
+// // Want both ilabel and nextstate for that arc
+// aiter.Value(); // Does compute the destination state and inserts it
+// // in the replace state table.
+// // No Arc has been cached at that point.
+//
+template <class A, class T>
+class ArcIterator< ReplaceFst<A, T> > {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const ReplaceFst<A, T> &fst, StateId s)
+ : fst_(fst), state_(s), pos_(0), offset_(0), flags_(0), arcs_(0),
+ data_flags_(0), final_flags_(0) {
+ cache_data_.ref_count = 0;
+ local_data_.ref_count = 0;
+
+ // If FST does not support optional caching, force caching.
+ if(!(fst_.GetImpl()->ArcIteratorFlags() & kArcNoCache) &&
+ !(fst_.GetImpl()->HasArcs(state_)))
+ fst_.GetImpl()->Expand(state_);
+
+ // If state is already cached, use cached arcs array.
+ if (fst_.GetImpl()->HasArcs(state_)) {
+ (fst_.GetImpl())->template CacheImpl<A>::InitArcIterator(state_,
+ &cache_data_);
+ num_arcs_ = cache_data_.narcs;
+ arcs_ = cache_data_.arcs; // 'arcs_' is a ptr to the cached arcs.
+ data_flags_ = kArcValueFlags; // All the arc member values are valid.
+ } else { // Otherwise delay decision until Value() is called.
+ tuple_ = fst_.GetImpl()->GetStateTable()->Tuple(state_);
+ if (tuple_.fst_state == kNoStateId) {
+ num_arcs_ = 0;
+ } else {
+ // The decision to cache or not to cache has been defered
+ // until Value() or SetFlags() is called. However, the arc
+ // iterator is set up now to be ready for non-caching in order
+ // to keep the Value() method simple and efficient.
+ const Fst<A>* fst = fst_.GetImpl()->GetFst(tuple_.fst_id);
+ fst->InitArcIterator(tuple_.fst_state, &local_data_);
+ // 'arcs_' is a pointer to the arcs in the underlying machine.
+ arcs_ = local_data_.arcs;
+ // Compute the final arc (but not its destination state)
+ // if a final arc is required.
+ bool has_final_arc = fst_.GetImpl()->ComputeFinalArc(
+ tuple_,
+ &final_arc_,
+ kArcValueFlags & ~kArcNextStateValue);
+ // Set the arc value flags that hold for 'final_arc_'.
+ final_flags_ = kArcValueFlags & ~kArcNextStateValue;
+ // Compute the number of arcs.
+ num_arcs_ = local_data_.narcs;
+ if (has_final_arc)
+ ++num_arcs_;
+ // Set the offset between the underlying arc positions and
+ // the positions in the arc iterator.
+ offset_ = num_arcs_ - local_data_.narcs;
+ // Defers the decision to cache or not until Value() or
+ // SetFlags() is called.
+ data_flags_ = 0;
+ }
+ }
+ }
+
+ ~ArcIterator() {
+ if (cache_data_.ref_count)
+ --(*cache_data_.ref_count);
+ if (local_data_.ref_count)
+ --(*local_data_.ref_count);
+ }
+
+ void ExpandAndCache() const {
+ // TODO(allauzen): revisit this
+ // fst_.GetImpl()->Expand(state_, tuple_, local_data_);
+ // (fst_.GetImpl())->CacheImpl<A>*>::InitArcIterator(state_,
+ // &cache_data_);
+ //
+ fst_.InitArcIterator(state_, &cache_data_); // Expand and cache state.
+ arcs_ = cache_data_.arcs; // 'arcs_' is a pointer to the cached arcs.
+ data_flags_ = kArcValueFlags; // All the arc member values are valid.
+ offset_ = 0; // No offset
+
+ }
+
+ void Init() {
+ if (flags_ & kArcNoCache) { // If caching is disabled
+ // 'arcs_' is a pointer to the arcs in the underlying machine.
+ arcs_ = local_data_.arcs;
+ // Set the arcs value flags that hold for 'arcs_'.
+ data_flags_ = kArcWeightValue;
+ if (!fst_.GetImpl()->EpsilonOnReplace())
+ data_flags_ |= kArcILabelValue;
+ // Set the offset between the underlying arc positions and
+ // the positions in the arc iterator.
+ offset_ = num_arcs_ - local_data_.narcs;
+ } else { // Otherwise, expand and cache
+ ExpandAndCache();
+ }
+ }
+
+ bool Done() const { return pos_ >= num_arcs_; }
+
+ const A& Value() const {
+ // If 'data_flags_' was set to 0, non-caching was not requested
+ if (!data_flags_) {
+ // TODO(allauzen): revisit this.
+ if (flags_ & kArcNoCache) {
+ // Should never happen.
+ FSTERROR() << "ReplaceFst: inconsistent arc iterator flags";
+ }
+ ExpandAndCache(); // Expand and cache.
+ }
+
+ if (pos_ - offset_ >= 0) { // The requested arc is not the 'final' arc.
+ const A& arc = arcs_[pos_ - offset_];
+ if ((data_flags_ & flags_) == (flags_ & kArcValueFlags)) {
+ // If the value flags for 'arc' match the recquired value flags
+ // then return 'arc'.
+ return arc;
+ } else {
+ // Otherwise, compute the corresponding arc on-the-fly.
+ fst_.GetImpl()->ComputeArc(tuple_, arc, &arc_, flags_ & kArcValueFlags);
+ return arc_;
+ }
+ } else { // The requested arc is the 'final' arc.
+ if ((final_flags_ & flags_) != (flags_ & kArcValueFlags)) {
+ // If the arc value flags that hold for the final arc
+ // do not match the requested value flags, then
+ // 'final_arc_' needs to be updated.
+ fst_.GetImpl()->ComputeFinalArc(tuple_, &final_arc_,
+ flags_ & kArcValueFlags);
+ final_flags_ = flags_ & kArcValueFlags;
+ }
+ return final_arc_;
+ }
+ }
+
+ void Next() { ++pos_; }
+
+ size_t Position() const { return pos_; }
+
+ void Reset() { pos_ = 0; }
+
+ void Seek(size_t pos) { pos_ = pos; }
+
+ uint32 Flags() const { return flags_; }
+
+ void SetFlags(uint32 f, uint32 mask) {
+ // Update the flags taking into account what flags are supported
+ // by the Fst.
+ flags_ &= ~mask;
+ flags_ |= (f & fst_.GetImpl()->ArcIteratorFlags());
+ // If non-caching is not requested (and caching has not already
+ // been performed), then flush 'data_flags_' to request caching
+ // during the next call to Value().
+ if (!(flags_ & kArcNoCache) && data_flags_ != kArcValueFlags) {
+ if (!fst_.GetImpl()->HasArcs(state_))
+ data_flags_ = 0;
+ }
+ // If 'data_flags_' has been flushed but non-caching is requested
+ // before calling Value(), then set up the iterator for non-caching.
+ if ((f & kArcNoCache) && (!data_flags_))
+ Init();
+ }
+
+ private:
+ const ReplaceFst<A, T> &fst_; // Reference to the FST
+ StateId state_; // State in the FST
+ mutable typename T::StateTuple tuple_; // Tuple corresponding to state_
+
+ ssize_t pos_; // Current position
+ mutable ssize_t offset_; // Offset between position in iterator and in arcs_
+ ssize_t num_arcs_; // Number of arcs at state_
+ uint32 flags_; // Behavorial flags for the arc iterator
+ mutable Arc arc_; // Memory to temporarily store computed arcs
+
+ mutable ArcIteratorData<Arc> cache_data_; // Arc iterator data in cache
+ mutable ArcIteratorData<Arc> local_data_; // Arc iterator data in local fst
+
+ mutable const A* arcs_; // Array of arcs
+ mutable uint32 data_flags_; // Arc value flags valid for data in arcs_
+ mutable Arc final_arc_; // Final arc (when required)
+ mutable uint32 final_flags_; // Arc value flags valid for final_arc_
+
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+
+template <class A, class T>
+class ReplaceFstMatcher : public MatcherBase<A> {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef MultiEpsMatcher<Matcher<Fst<A> > > LocalMatcher;
+
+ ReplaceFstMatcher(const ReplaceFst<A, T> &fst, fst::MatchType match_type)
+ : fst_(fst),
+ impl_(fst_.GetImpl()),
+ s_(fst::kNoStateId),
+ match_type_(match_type),
+ current_loop_(false),
+ final_arc_(false),
+ loop_(fst::kNoLabel, 0, A::Weight::One(), fst::kNoStateId) {
+ if (match_type_ == fst::MATCH_OUTPUT)
+ swap(loop_.ilabel, loop_.olabel);
+ InitMatchers();
+ }
+
+ ReplaceFstMatcher(const ReplaceFstMatcher<A, T> &matcher, bool safe = false)
+ : fst_(matcher.fst_),
+ impl_(fst_.GetImpl()),
+ s_(fst::kNoStateId),
+ match_type_(matcher.match_type_),
+ current_loop_(false),
+ loop_(fst::kNoLabel, 0, A::Weight::One(), fst::kNoStateId) {
+ if (match_type_ == fst::MATCH_OUTPUT)
+ swap(loop_.ilabel, loop_.olabel);
+ InitMatchers();
+ }
+
+ // Create a local matcher for each component Fst of replace.
+ // LocalMatcher is a multi epsilon wrapper matcher. MultiEpsilonMatcher
+ // is used to match each non-terminal arc, since these non-terminal
+ // turn into epsilons on recursion.
+ void InitMatchers() {
+ const vector<const Fst<A>*>& fst_array = impl_->fst_array_;
+ matcher_.resize(fst_array.size(), 0);
+ for (size_t i = 0; i < fst_array.size(); ++i) {
+ if (fst_array[i]) {
+ matcher_[i] =
+ new LocalMatcher(*fst_array[i], match_type_, kMultiEpsList);
+
+ typename set<Label>::iterator it = impl_->nonterminal_set_.begin();
+ for (; it != impl_->nonterminal_set_.end(); ++it) {
+ matcher_[i]->AddMultiEpsLabel(*it);
+ }
+ }
+ }
+ }
+
+ virtual ReplaceFstMatcher<A, T> *Copy(bool safe = false) const {
+ return new ReplaceFstMatcher<A, T>(*this, safe);
+ }
+
+ virtual ~ReplaceFstMatcher() {
+ for (size_t i = 0; i < matcher_.size(); ++i)
+ delete matcher_[i];
+ }
+
+ virtual MatchType Type(bool test) const {
+ if (match_type_ == MATCH_NONE)
+ return match_type_;
+
+ uint64 true_prop = match_type_ == MATCH_INPUT ?
+ kILabelSorted : kOLabelSorted;
+ uint64 false_prop = match_type_ == MATCH_INPUT ?
+ kNotILabelSorted : kNotOLabelSorted;
+ uint64 props = fst_.Properties(true_prop | false_prop, test);
+
+ if (props & true_prop)
+ return match_type_;
+ else if (props & false_prop)
+ return MATCH_NONE;
+ else
+ return MATCH_UNKNOWN;
+ }
+
+ virtual const Fst<A> &GetFst() const {
+ return fst_;
+ }
+
+ virtual uint64 Properties(uint64 props) const {
+ return props;
+ }
+
+ private:
+ // Set the sate from which our matching happens.
+ virtual void SetState_(StateId s) {
+ if (s_ == s) return;
+
+ s_ = s;
+ tuple_ = impl_->GetStateTable()->Tuple(s_);
+ if (tuple_.fst_state == kNoStateId) {
+ done_ = true;
+ return;
+ }
+ // Get current matcher. Used for non epsilon matching
+ current_matcher_ = matcher_[tuple_.fst_id];
+ current_matcher_->SetState(tuple_.fst_state);
+ loop_.nextstate = s_;
+
+ final_arc_ = false;
+ }
+
+ // Search for label, from previous set state. If label == 0, first
+ // hallucinate and epsilon loop, else use the underlying matcher to
+ // search for the label or epsilons.
+ // - Note since the ReplaceFST recursion on non-terminal arcs causes
+ // epsilon transitions to be created we use the MultiEpsilonMatcher
+ // to search for possible matches of non terminals.
+ // - If the component Fst reaches a final state we also need to add
+ // the exiting final arc.
+ virtual bool Find_(Label label) {
+ bool found = false;
+ label_ = label;
+ if (label_ == 0 || label_ == kNoLabel) {
+ // Compute loop directly, saving Replace::ComputeArc
+ if (label_ == 0) {
+ current_loop_ = true;
+ found = true;
+ }
+ // Search for matching multi epsilons
+ final_arc_ = impl_->ComputeFinalArc(tuple_, 0);
+ found = current_matcher_->Find(kNoLabel) || final_arc_ || found;
+ } else {
+ // Search on sub machine directly using sub machine matcher.
+ found = current_matcher_->Find(label_);
+ }
+ return found;
+ }
+
+ virtual bool Done_() const {
+ return !current_loop_ && !final_arc_ && current_matcher_->Done();
+ }
+
+ virtual const Arc& Value_() const {
+ if (current_loop_) {
+ return loop_;
+ }
+ if (final_arc_) {
+ impl_->ComputeFinalArc(tuple_, &arc_);
+ return arc_;
+ }
+ const Arc& component_arc = current_matcher_->Value();
+ impl_->ComputeArc(tuple_, component_arc, &arc_);
+ return arc_;
+ }
+
+ virtual void Next_() {
+ if (current_loop_) {
+ current_loop_ = false;
+ return;
+ }
+ if (final_arc_) {
+ final_arc_ = false;
+ return;
+ }
+ current_matcher_->Next();
+ }
+
+ const ReplaceFst<A, T>& fst_;
+ ReplaceFstImpl<A, T> *impl_;
+ LocalMatcher* current_matcher_;
+ vector<LocalMatcher*> matcher_;
+
+ StateId s_; // Current state
+ Label label_; // Current label
+
+ MatchType match_type_; // Supplied by caller
+ mutable bool done_;
+ mutable bool current_loop_; // Current arc is the implicit loop
+ mutable bool final_arc_; // Current arc for exiting recursion
+ mutable typename T::StateTuple tuple_; // Tuple corresponding to state_
+ mutable Arc arc_;
+ Arc loop_;
+};
+
+template <class A, class T> inline
+void ReplaceFst<A, T>::InitStateIterator(StateIteratorData<A> *data) const {
+ data->base = new StateIterator< ReplaceFst<A, T> >(*this);
+}
+
+typedef ReplaceFst<StdArc> StdReplaceFst;
+
+
+// // Recursivively replaces arcs in the root Fst with other Fsts.
+// This version writes the result of replacement to an output MutableFst.
+//
+// Replace supports replacement of arcs in one Fst with another
+// Fst. This replacement is recursive. Replace takes an array of
+// Fst(s). One Fst represents the root (or topology) machine. The root
+// Fst refers to other Fsts by recursively replacing arcs labeled as
+// non-terminals with the matching non-terminal Fst. Currently Replace
+// uses the output symbols of the arcs to determine whether the arc is
+// a non-terminal arc or not. A non-terminal can be any label that is
+// not a non-zero terminal label in the output alphabet. Note that
+// input argument is a vector of pair<>. These correspond to the tuple
+// of non-terminal Label and corresponding Fst.
+template<class Arc>
+void Replace(const vector<pair<typename Arc::Label,
+ const Fst<Arc>* > >& ifst_array,
+ MutableFst<Arc> *ofst, typename Arc::Label root,
+ bool epsilon_on_replace) {
+ ReplaceFstOptions<Arc> opts(root, epsilon_on_replace);
+ opts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = ReplaceFst<Arc>(ifst_array, opts);
+}
+
+template<class Arc>
+void Replace(const vector<pair<typename Arc::Label,
+ const Fst<Arc>* > >& ifst_array,
+ MutableFst<Arc> *ofst, typename Arc::Label root) {
+ Replace(ifst_array, ofst, root, false);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_REPLACE_H__
diff --git a/src/include/fst/reverse.h b/src/include/fst/reverse.h
new file mode 100644
index 0000000..4d4c75c
--- /dev/null
+++ b/src/include/fst/reverse.h
@@ -0,0 +1,91 @@
+// reverse.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Functions and classes to sort arcs in an FST.
+
+#ifndef FST_LIB_REVERSE_H__
+#define FST_LIB_REVERSE_H__
+
+#include <algorithm>
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+
+
+namespace fst {
+
+// Reverses an FST. The reversed result is written to an output
+// MutableFst. If A transduces string x to y with weight a, then the
+// reverse of A transduces the reverse of x to the reverse of y with
+// weight a.Reverse().
+//
+// Typically, a = a.Reverse() and Arc = RevArc (e.g. for
+// TropicalWeight or LogWeight). In general, e.g. when the weights
+// only form a left or right semiring, the output arc type must match
+// the input arc type except having the reversed Weight type.
+template<class Arc, class RevArc>
+void Reverse(const Fst<Arc> &ifst, MutableFst<RevArc> *ofst) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef typename RevArc::Weight RevWeight;
+
+ ofst->DeleteStates();
+ ofst->SetInputSymbols(ifst.InputSymbols());
+ ofst->SetOutputSymbols(ifst.OutputSymbols());
+ if (ifst.Properties(kExpanded, false))
+ ofst->ReserveStates(CountStates(ifst) + 1);
+ StateId istart = ifst.Start();
+ StateId ostart = ofst->AddState();
+ ofst->SetStart(ostart);
+
+ for (StateIterator< Fst<Arc> > siter(ifst);
+ !siter.Done();
+ siter.Next()) {
+ StateId is = siter.Value();
+ StateId os = is + 1;
+ while (ofst->NumStates() <= os)
+ ofst->AddState();
+ if (is == istart)
+ ofst->SetFinal(os, RevWeight::One());
+
+ Weight final = ifst.Final(is);
+ if (final != Weight::Zero()) {
+ RevArc oarc(0, 0, final.Reverse(), os);
+ ofst->AddArc(0, oarc);
+ }
+
+ for (ArcIterator< Fst<Arc> > aiter(ifst, is);
+ !aiter.Done();
+ aiter.Next()) {
+ const Arc &iarc = aiter.Value();
+ RevArc oarc(iarc.ilabel, iarc.olabel, iarc.weight.Reverse(), os);
+ StateId nos = iarc.nextstate + 1;
+ while (ofst->NumStates() <= nos)
+ ofst->AddState();
+ ofst->AddArc(nos, oarc);
+ }
+ }
+ uint64 iprops = ifst.Properties(kCopyProperties, false);
+ uint64 oprops = ofst->Properties(kFstProperties, false);
+ ofst->SetProperties(ReverseProperties(iprops) | oprops, kFstProperties);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_REVERSE_H__
diff --git a/src/include/fst/reweight.h b/src/include/fst/reweight.h
new file mode 100644
index 0000000..c051c2a
--- /dev/null
+++ b/src/include/fst/reweight.h
@@ -0,0 +1,146 @@
+// reweight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Function to reweight an FST.
+
+#ifndef FST_LIB_REWEIGHT_H__
+#define FST_LIB_REWEIGHT_H__
+
+#include <vector>
+using std::vector;
+
+#include <fst/mutable-fst.h>
+
+
+namespace fst {
+
+enum ReweightType { REWEIGHT_TO_INITIAL, REWEIGHT_TO_FINAL };
+
+// Reweight FST according to the potentials defined by the POTENTIAL
+// vector in the direction defined by TYPE. Weight needs to be left
+// distributive when reweighting towards the initial state and right
+// distributive when reweighting towards the final states.
+//
+// An arc of weight w, with an origin state of potential p and
+// destination state of potential q, is reweighted by p\wq when
+// reweighting towards the initial state and by pw/q when reweighting
+// towards the final states.
+template <class Arc>
+void Reweight(MutableFst<Arc> *fst,
+ const vector<typename Arc::Weight> &potential,
+ ReweightType type) {
+ typedef typename Arc::Weight Weight;
+
+ if (fst->NumStates() == 0)
+ return;
+
+ if (type == REWEIGHT_TO_FINAL && !(Weight::Properties() & kRightSemiring)) {
+ FSTERROR() << "Reweight: Reweighting to the final states requires "
+ << "Weight to be right distributive: "
+ << Weight::Type();
+ fst->SetProperties(kError, kError);
+ return;
+ }
+
+ if (type == REWEIGHT_TO_INITIAL && !(Weight::Properties() & kLeftSemiring)) {
+ FSTERROR() << "Reweight: Reweighting to the initial state requires "
+ << "Weight to be left distributive: "
+ << Weight::Type();
+ fst->SetProperties(kError, kError);
+ return;
+ }
+
+ StateIterator< MutableFst<Arc> > sit(*fst);
+ for (; !sit.Done(); sit.Next()) {
+ typename Arc::StateId state = sit.Value();
+ if (state == potential.size())
+ break;
+ typename Arc::Weight weight = potential[state];
+ if (weight != Weight::Zero()) {
+ for (MutableArcIterator< MutableFst<Arc> > ait(fst, state);
+ !ait.Done();
+ ait.Next()) {
+ Arc arc = ait.Value();
+ if (arc.nextstate >= potential.size())
+ continue;
+ typename Arc::Weight nextweight = potential[arc.nextstate];
+ if (nextweight == Weight::Zero())
+ continue;
+ if (type == REWEIGHT_TO_INITIAL)
+ arc.weight = Divide(Times(arc.weight, nextweight), weight,
+ DIVIDE_LEFT);
+ if (type == REWEIGHT_TO_FINAL)
+ arc.weight = Divide(Times(weight, arc.weight), nextweight,
+ DIVIDE_RIGHT);
+ ait.SetValue(arc);
+ }
+ if (type == REWEIGHT_TO_INITIAL)
+ fst->SetFinal(state, Divide(fst->Final(state), weight, DIVIDE_LEFT));
+ }
+ if (type == REWEIGHT_TO_FINAL)
+ fst->SetFinal(state, Times(weight, fst->Final(state)));
+ }
+
+ // This handles elements past the end of the potentials array.
+ for (; !sit.Done(); sit.Next()) {
+ typename Arc::StateId state = sit.Value();
+ if (type == REWEIGHT_TO_FINAL)
+ fst->SetFinal(state, Times(Weight::Zero(), fst->Final(state)));
+ }
+
+ typename Arc::Weight startweight = fst->Start() < potential.size() ?
+ potential[fst->Start()] : Weight::Zero();
+ if ((startweight != Weight::One()) && (startweight != Weight::Zero())) {
+ if (fst->Properties(kInitialAcyclic, true) & kInitialAcyclic) {
+ typename Arc::StateId state = fst->Start();
+ for (MutableArcIterator< MutableFst<Arc> > ait(fst, state);
+ !ait.Done();
+ ait.Next()) {
+ Arc arc = ait.Value();
+ if (type == REWEIGHT_TO_INITIAL)
+ arc.weight = Times(startweight, arc.weight);
+ else
+ arc.weight = Times(
+ Divide(Weight::One(), startweight, DIVIDE_RIGHT),
+ arc.weight);
+ ait.SetValue(arc);
+ }
+ if (type == REWEIGHT_TO_INITIAL)
+ fst->SetFinal(state, Times(startweight, fst->Final(state)));
+ else
+ fst->SetFinal(state, Times(Divide(Weight::One(), startweight,
+ DIVIDE_RIGHT),
+ fst->Final(state)));
+ } else {
+ typename Arc::StateId state = fst->AddState();
+ Weight w = type == REWEIGHT_TO_INITIAL ? startweight :
+ Divide(Weight::One(), startweight, DIVIDE_RIGHT);
+ Arc arc(0, 0, w, fst->Start());
+ fst->AddArc(state, arc);
+ fst->SetStart(state);
+ }
+ }
+
+ fst->SetProperties(ReweightProperties(
+ fst->Properties(kFstProperties, false)),
+ kFstProperties);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_REWEIGHT_H_
diff --git a/src/include/fst/rmepsilon.h b/src/include/fst/rmepsilon.h
new file mode 100644
index 0000000..ee9753e
--- /dev/null
+++ b/src/include/fst/rmepsilon.h
@@ -0,0 +1,601 @@
+// rmepsilon.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Functions and classes that implemement epsilon-removal.
+
+#ifndef FST_LIB_RMEPSILON_H__
+#define FST_LIB_RMEPSILON_H__
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <fst/slist.h>
+#include <stack>
+#include <string>
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+#include <fst/arcfilter.h>
+#include <fst/cache.h>
+#include <fst/connect.h>
+#include <fst/factor-weight.h>
+#include <fst/invert.h>
+#include <fst/prune.h>
+#include <fst/queue.h>
+#include <fst/shortest-distance.h>
+#include <fst/topsort.h>
+
+
+namespace fst {
+
+template <class Arc, class Queue>
+class RmEpsilonOptions
+ : public ShortestDistanceOptions<Arc, Queue, EpsilonArcFilter<Arc> > {
+ public:
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ bool connect; // Connect output
+ Weight weight_threshold; // Pruning weight threshold.
+ StateId state_threshold; // Pruning state threshold.
+
+ explicit RmEpsilonOptions(Queue *q, float d = kDelta, bool c = true,
+ Weight w = Weight::Zero(),
+ StateId n = kNoStateId)
+ : ShortestDistanceOptions< Arc, Queue, EpsilonArcFilter<Arc> >(
+ q, EpsilonArcFilter<Arc>(), kNoStateId, d),
+ connect(c), weight_threshold(w), state_threshold(n) {}
+ private:
+ RmEpsilonOptions(); // disallow
+};
+
+// Computation state of the epsilon-removal algorithm.
+template <class Arc, class Queue>
+class RmEpsilonState {
+ public:
+ typedef typename Arc::Label Label;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ RmEpsilonState(const Fst<Arc> &fst,
+ vector<Weight> *distance,
+ const RmEpsilonOptions<Arc, Queue> &opts)
+ : fst_(fst), distance_(distance), sd_state_(fst_, distance, opts, true),
+ expand_id_(0) {}
+
+ // Compute arcs and final weight for state 's'
+ void Expand(StateId s);
+
+ // Returns arcs of expanded state.
+ vector<Arc> &Arcs() { return arcs_; }
+
+ // Returns final weight of expanded state.
+ const Weight &Final() const { return final_; }
+
+ // Return true if an error has occured.
+ bool Error() const { return sd_state_.Error(); }
+
+ private:
+ static const size_t kPrime0 = 7853;
+ static const size_t kPrime1 = 7867;
+
+ struct Element {
+ Label ilabel;
+ Label olabel;
+ StateId nextstate;
+
+ Element() {}
+
+ Element(Label i, Label o, StateId s)
+ : ilabel(i), olabel(o), nextstate(s) {}
+ };
+
+ class ElementKey {
+ public:
+ size_t operator()(const Element& e) const {
+ return static_cast<size_t>(e.nextstate);
+ return static_cast<size_t>(e.nextstate +
+ e.ilabel * kPrime0 +
+ e.olabel * kPrime1);
+ }
+
+ private:
+ };
+
+ class ElementEqual {
+ public:
+ bool operator()(const Element &e1, const Element &e2) const {
+ return (e1.ilabel == e2.ilabel) && (e1.olabel == e2.olabel)
+ && (e1.nextstate == e2.nextstate);
+ }
+ };
+
+ typedef unordered_map<Element, pair<StateId, size_t>,
+ ElementKey, ElementEqual> ElementMap;
+
+ const Fst<Arc> &fst_;
+ // Distance from state being expanded in epsilon-closure.
+ vector<Weight> *distance_;
+ // Shortest distance algorithm computation state.
+ ShortestDistanceState<Arc, Queue, EpsilonArcFilter<Arc> > sd_state_;
+ // Maps an element 'e' to a pair 'p' corresponding to a position
+ // in the arcs vector of the state being expanded. 'e' corresponds
+ // to the position 'p.second' in the 'arcs_' vector if 'p.first' is
+ // equal to the state being expanded.
+ ElementMap element_map_;
+ EpsilonArcFilter<Arc> eps_filter_;
+ stack<StateId> eps_queue_; // Queue used to visit the epsilon-closure
+ vector<bool> visited_; // '[i] = true' if state 'i' has been visited
+ slist<StateId> visited_states_; // List of visited states
+ vector<Arc> arcs_; // Arcs of state being expanded
+ Weight final_; // Final weight of state being expanded
+ StateId expand_id_; // Unique ID for each call to Expand
+
+ DISALLOW_COPY_AND_ASSIGN(RmEpsilonState);
+};
+
+template <class Arc, class Queue>
+const size_t RmEpsilonState<Arc, Queue>::kPrime0;
+template <class Arc, class Queue>
+const size_t RmEpsilonState<Arc, Queue>::kPrime1;
+
+
+template <class Arc, class Queue>
+void RmEpsilonState<Arc,Queue>::Expand(typename Arc::StateId source) {
+ final_ = Weight::Zero();
+ arcs_.clear();
+ sd_state_.ShortestDistance(source);
+ if (sd_state_.Error())
+ return;
+ eps_queue_.push(source);
+
+ while (!eps_queue_.empty()) {
+ StateId state = eps_queue_.top();
+ eps_queue_.pop();
+
+ while (visited_.size() <= state) visited_.push_back(false);
+ if (visited_[state]) continue;
+ visited_[state] = true;
+ visited_states_.push_front(state);
+
+ for (ArcIterator< Fst<Arc> > ait(fst_, state);
+ !ait.Done();
+ ait.Next()) {
+ Arc arc = ait.Value();
+ arc.weight = Times((*distance_)[state], arc.weight);
+
+ if (eps_filter_(arc)) {
+ while (visited_.size() <= arc.nextstate)
+ visited_.push_back(false);
+ if (!visited_[arc.nextstate])
+ eps_queue_.push(arc.nextstate);
+ } else {
+ Element element(arc.ilabel, arc.olabel, arc.nextstate);
+ typename ElementMap::iterator it = element_map_.find(element);
+ if (it == element_map_.end()) {
+ element_map_.insert(
+ pair<Element, pair<StateId, size_t> >
+ (element, pair<StateId, size_t>(expand_id_, arcs_.size())));
+ arcs_.push_back(arc);
+ } else {
+ if (((*it).second).first == expand_id_) {
+ Weight &w = arcs_[((*it).second).second].weight;
+ w = Plus(w, arc.weight);
+ } else {
+ ((*it).second).first = expand_id_;
+ ((*it).second).second = arcs_.size();
+ arcs_.push_back(arc);
+ }
+ }
+ }
+ }
+ final_ = Plus(final_, Times((*distance_)[state], fst_.Final(state)));
+ }
+
+ while (!visited_states_.empty()) {
+ visited_[visited_states_.front()] = false;
+ visited_states_.pop_front();
+ }
+ ++expand_id_;
+}
+
+// Removes epsilon-transitions (when both the input and output label
+// are an epsilon) from a transducer. The result will be an equivalent
+// FST that has no such epsilon transitions. This version modifies
+// its input. It allows fine control via the options argument; see
+// below for a simpler interface.
+//
+// The vector 'distance' will be used to hold the shortest distances
+// during the epsilon-closure computation. The state queue discipline
+// and convergence delta are taken in the options argument.
+template <class Arc, class Queue>
+void RmEpsilon(MutableFst<Arc> *fst,
+ vector<typename Arc::Weight> *distance,
+ const RmEpsilonOptions<Arc, Queue> &opts) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::Label Label;
+
+ if (fst->Start() == kNoStateId) {
+ return;
+ }
+
+ // 'noneps_in[s]' will be set to true iff 's' admits a non-epsilon
+ // incoming transition or is the start state.
+ vector<bool> noneps_in(fst->NumStates(), false);
+ noneps_in[fst->Start()] = true;
+ for (StateId i = 0; i < fst->NumStates(); ++i) {
+ for (ArcIterator<Fst<Arc> > aiter(*fst, i);
+ !aiter.Done();
+ aiter.Next()) {
+ if (aiter.Value().ilabel != 0 || aiter.Value().olabel != 0)
+ noneps_in[aiter.Value().nextstate] = true;
+ }
+ }
+
+ // States sorted in topological order when (acyclic) or generic
+ // topological order (cyclic).
+ vector<StateId> states;
+ states.reserve(fst->NumStates());
+
+ if (fst->Properties(kTopSorted, false) & kTopSorted) {
+ for (StateId i = 0; i < fst->NumStates(); i++)
+ states.push_back(i);
+ } else if (fst->Properties(kAcyclic, false) & kAcyclic) {
+ vector<StateId> order;
+ bool acyclic;
+ TopOrderVisitor<Arc> top_order_visitor(&order, &acyclic);
+ DfsVisit(*fst, &top_order_visitor, EpsilonArcFilter<Arc>());
+ // Sanity check: should be acyclic if property bit is set.
+ if(!acyclic) {
+ FSTERROR() << "RmEpsilon: inconsistent acyclic property bit";
+ fst->SetProperties(kError, kError);
+ return;
+ }
+ states.resize(order.size());
+ for (StateId i = 0; i < order.size(); i++)
+ states[order[i]] = i;
+ } else {
+ uint64 props;
+ vector<StateId> scc;
+ SccVisitor<Arc> scc_visitor(&scc, 0, 0, &props);
+ DfsVisit(*fst, &scc_visitor, EpsilonArcFilter<Arc>());
+ vector<StateId> first(scc.size(), kNoStateId);
+ vector<StateId> next(scc.size(), kNoStateId);
+ for (StateId i = 0; i < scc.size(); i++) {
+ if (first[scc[i]] != kNoStateId)
+ next[i] = first[scc[i]];
+ first[scc[i]] = i;
+ }
+ for (StateId i = 0; i < first.size(); i++)
+ for (StateId j = first[i]; j != kNoStateId; j = next[j])
+ states.push_back(j);
+ }
+
+ RmEpsilonState<Arc, Queue>
+ rmeps_state(*fst, distance, opts);
+
+ while (!states.empty()) {
+ StateId state = states.back();
+ states.pop_back();
+ if (!noneps_in[state])
+ continue;
+ rmeps_state.Expand(state);
+ fst->SetFinal(state, rmeps_state.Final());
+ fst->DeleteArcs(state);
+ vector<Arc> &arcs = rmeps_state.Arcs();
+ fst->ReserveArcs(state, arcs.size());
+ while (!arcs.empty()) {
+ fst->AddArc(state, arcs.back());
+ arcs.pop_back();
+ }
+ }
+
+ for (StateId s = 0; s < fst->NumStates(); ++s) {
+ if (!noneps_in[s])
+ fst->DeleteArcs(s);
+ }
+
+ if(rmeps_state.Error())
+ fst->SetProperties(kError, kError);
+ fst->SetProperties(
+ RmEpsilonProperties(fst->Properties(kFstProperties, false)),
+ kFstProperties);
+
+ if (opts.weight_threshold != Weight::Zero() ||
+ opts.state_threshold != kNoStateId)
+ Prune(fst, opts.weight_threshold, opts.state_threshold);
+ if (opts.connect && (opts.weight_threshold == Weight::Zero() ||
+ opts.state_threshold != kNoStateId))
+ Connect(fst);
+}
+
+// Removes epsilon-transitions (when both the input and output label
+// are an epsilon) from a transducer. The result will be an equivalent
+// FST that has no such epsilon transitions. This version modifies its
+// input. It has a simplified interface; see above for a version that
+// allows finer control.
+//
+// Complexity:
+// - Time:
+// - Unweighted: O(V2 + V E)
+// - Acyclic: O(V2 + V E)
+// - Tropical semiring: O(V2 log V + V E)
+// - General: exponential
+// - Space: O(V E)
+// where V = # of states visited, E = # of arcs.
+//
+// References:
+// - Mehryar Mohri. Generic Epsilon-Removal and Input
+// Epsilon-Normalization Algorithms for Weighted Transducers,
+// "International Journal of Computer Science", 13(1):129-143 (2002).
+template <class Arc>
+void RmEpsilon(MutableFst<Arc> *fst,
+ bool connect = true,
+ typename Arc::Weight weight_threshold = Arc::Weight::Zero(),
+ typename Arc::StateId state_threshold = kNoStateId,
+ float delta = kDelta) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::Label Label;
+
+ vector<Weight> distance;
+ AutoQueue<StateId> state_queue(*fst, &distance, EpsilonArcFilter<Arc>());
+ RmEpsilonOptions<Arc, AutoQueue<StateId> >
+ opts(&state_queue, delta, connect, weight_threshold, state_threshold);
+
+ RmEpsilon(fst, &distance, opts);
+}
+
+
+struct RmEpsilonFstOptions : CacheOptions {
+ float delta;
+
+ RmEpsilonFstOptions(const CacheOptions &opts, float delta = kDelta)
+ : CacheOptions(opts), delta(delta) {}
+
+ explicit RmEpsilonFstOptions(float delta = kDelta) : delta(delta) {}
+};
+
+
+// Implementation of delayed RmEpsilonFst.
+template <class A>
+class RmEpsilonFstImpl : public CacheImpl<A> {
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+
+ using CacheBaseImpl< CacheState<A> >::PushArc;
+ using CacheBaseImpl< CacheState<A> >::HasArcs;
+ using CacheBaseImpl< CacheState<A> >::HasFinal;
+ using CacheBaseImpl< CacheState<A> >::HasStart;
+ using CacheBaseImpl< CacheState<A> >::SetArcs;
+ using CacheBaseImpl< CacheState<A> >::SetFinal;
+ using CacheBaseImpl< CacheState<A> >::SetStart;
+
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+
+ RmEpsilonFstImpl(const Fst<A>& fst, const RmEpsilonFstOptions &opts)
+ : CacheImpl<A>(opts),
+ fst_(fst.Copy()),
+ delta_(opts.delta),
+ rmeps_state_(
+ *fst_,
+ &distance_,
+ RmEpsilonOptions<A, FifoQueue<StateId> >(&queue_, delta_, false)) {
+ SetType("rmepsilon");
+ uint64 props = fst.Properties(kFstProperties, false);
+ SetProperties(RmEpsilonProperties(props, true), kCopyProperties);
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ }
+
+ RmEpsilonFstImpl(const RmEpsilonFstImpl &impl)
+ : CacheImpl<A>(impl),
+ fst_(impl.fst_->Copy(true)),
+ delta_(impl.delta_),
+ rmeps_state_(
+ *fst_,
+ &distance_,
+ RmEpsilonOptions<A, FifoQueue<StateId> >(&queue_, delta_, false)) {
+ SetType("rmepsilon");
+ SetProperties(impl.Properties(), kCopyProperties);
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ ~RmEpsilonFstImpl() {
+ delete fst_;
+ }
+
+ StateId Start() {
+ if (!HasStart()) {
+ SetStart(fst_->Start());
+ }
+ return CacheImpl<A>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ Expand(s);
+ }
+ return CacheImpl<A>::Final(s);
+ }
+
+ size_t NumArcs(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumArcs(s);
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumOutputEpsilons(s);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) &&
+ (fst_->Properties(kError, false) || rmeps_state_.Error()))
+ SetProperties(kError, kError);
+ return FstImpl<A>::Properties(mask);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
+ if (!HasArcs(s))
+ Expand(s);
+ CacheImpl<A>::InitArcIterator(s, data);
+ }
+
+ void Expand(StateId s) {
+ rmeps_state_.Expand(s);
+ SetFinal(s, rmeps_state_.Final());
+ vector<A> &arcs = rmeps_state_.Arcs();
+ while (!arcs.empty()) {
+ PushArc(s, arcs.back());
+ arcs.pop_back();
+ }
+ SetArcs(s);
+ }
+
+ private:
+ const Fst<A> *fst_;
+ float delta_;
+ vector<Weight> distance_;
+ FifoQueue<StateId> queue_;
+ RmEpsilonState<A, FifoQueue<StateId> > rmeps_state_;
+
+ void operator=(const RmEpsilonFstImpl<A> &); // disallow
+};
+
+
+// Removes epsilon-transitions (when both the input and output label
+// are an epsilon) from a transducer. The result will be an equivalent
+// FST that has no such epsilon transitions. This version is a
+// delayed Fst.
+//
+// Complexity:
+// - Time:
+// - Unweighted: O(v^2 + v e)
+// - General: exponential
+// - Space: O(v e)
+// where v = # of states visited, e = # of arcs visited. Constant time
+// to visit an input state or arc is assumed and exclusive of caching.
+//
+// References:
+// - Mehryar Mohri. Generic Epsilon-Removal and Input
+// Epsilon-Normalization Algorithms for Weighted Transducers,
+// "International Journal of Computer Science", 13(1):129-143 (2002).
+//
+// This class attaches interface to implementation and handles
+// reference counting, delegating most methods to ImplToFst.
+template <class A>
+class RmEpsilonFst : public ImplToFst< RmEpsilonFstImpl<A> > {
+ public:
+ friend class ArcIterator< RmEpsilonFst<A> >;
+ friend class StateIterator< RmEpsilonFst<A> >;
+
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+ typedef RmEpsilonFstImpl<A> Impl;
+
+ RmEpsilonFst(const Fst<A> &fst)
+ : ImplToFst<Impl>(new Impl(fst, RmEpsilonFstOptions())) {}
+
+ RmEpsilonFst(const Fst<A> &fst, const RmEpsilonFstOptions &opts)
+ : ImplToFst<Impl>(new Impl(fst, opts)) {}
+
+ // See Fst<>::Copy() for doc.
+ RmEpsilonFst(const RmEpsilonFst<A> &fst, bool safe = false)
+ : ImplToFst<Impl>(fst, safe) {}
+
+ // Get a copy of this RmEpsilonFst. See Fst<>::Copy() for further doc.
+ virtual RmEpsilonFst<A> *Copy(bool safe = false) const {
+ return new RmEpsilonFst<A>(*this, safe);
+ }
+
+ virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const RmEpsilonFst<A> &fst); // disallow
+};
+
+// Specialization for RmEpsilonFst.
+template<class A>
+class StateIterator< RmEpsilonFst<A> >
+ : public CacheStateIterator< RmEpsilonFst<A> > {
+ public:
+ explicit StateIterator(const RmEpsilonFst<A> &fst)
+ : CacheStateIterator< RmEpsilonFst<A> >(fst, fst.GetImpl()) {}
+};
+
+
+// Specialization for RmEpsilonFst.
+template <class A>
+class ArcIterator< RmEpsilonFst<A> >
+ : public CacheArcIterator< RmEpsilonFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const RmEpsilonFst<A> &fst, StateId s)
+ : CacheArcIterator< RmEpsilonFst<A> >(fst.GetImpl(), s) {
+ if (!fst.GetImpl()->HasArcs(s))
+ fst.GetImpl()->Expand(s);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+
+template <class A> inline
+void RmEpsilonFst<A>::InitStateIterator(StateIteratorData<A> *data) const {
+ data->base = new StateIterator< RmEpsilonFst<A> >(*this);
+}
+
+
+// Useful alias when using StdArc.
+typedef RmEpsilonFst<StdArc> StdRmEpsilonFst;
+
+} // namespace fst
+
+#endif // FST_LIB_RMEPSILON_H__
diff --git a/src/include/fst/rmfinalepsilon.h b/src/include/fst/rmfinalepsilon.h
new file mode 100644
index 0000000..236d1a7
--- /dev/null
+++ b/src/include/fst/rmfinalepsilon.h
@@ -0,0 +1,107 @@
+// rmfinalepsilon.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: johans@google.com (Johan Schalkwyk)
+//
+// \file
+// Function to remove of final states that have epsilon only input arcs.
+
+#ifndef FST_LIB_RMFINALEPSILON_H__
+#define FST_LIB_RMFINALEPSILON_H__
+
+#include <unordered_set>
+using std::tr1::unordered_set;
+using std::tr1::unordered_multiset;
+#include <vector>
+using std::vector;
+
+#include <fst/connect.h>
+#include <fst/mutable-fst.h>
+
+
+namespace fst {
+
+template<class A>
+void RmFinalEpsilon(MutableFst<A>* fst) {
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ // Determine the coaccesibility of states.
+ vector<bool> access;
+ vector<bool> coaccess;
+ uint64 props = 0;
+ SccVisitor<A> scc_visitor(0, &access, &coaccess, &props);
+ DfsVisit(*fst, &scc_visitor);
+
+ // Find potential list of removable final states. These are final states
+ // that have no outgoing transitions or final states that have a
+ // non-coaccessible future. Complexity O(S)
+ unordered_set<StateId> finals;
+ for (StateIterator<Fst<A> > siter(*fst); !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ if (fst->Final(s) != Weight::Zero()) {
+ bool future_coaccess = false;
+ for (ArcIterator<Fst<A> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
+ const A& arc = aiter.Value();
+ if (coaccess[arc.nextstate]) {
+ future_coaccess = true;
+ break;
+ }
+ }
+ if (!future_coaccess) {
+ finals.insert(s);
+ }
+ }
+ }
+
+ // Move the final weight. Complexity O(E)
+ vector<A> arcs;
+ for (StateIterator<Fst<A> > siter(*fst); !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ Weight w(fst->Final(s));
+
+ arcs.clear();
+ for (ArcIterator<Fst<A> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
+ const A& arc = aiter.Value();
+ // is next state in the list of finals
+ if (finals.find(arc.nextstate) != finals.end()) {
+ // sum up all epsilon arcs
+ if (arc.ilabel == 0 && arc.olabel == 0) {
+ w = Plus(Times(fst->Final(arc.nextstate), arc.weight), w);
+ } else {
+ arcs.push_back(arc);
+ }
+ } else {
+ arcs.push_back(arc);
+ }
+ }
+
+ // If some arcs (epsilon arcs) were deleted, delete all
+ // arcs and add back only the non epsilon arcs
+ if (arcs.size() < fst->NumArcs(s)) {
+ fst->DeleteArcs(s);
+ fst->SetFinal(s, w);
+ for (size_t i = 0; i < arcs.size(); ++i) {
+ fst->AddArc(s, arcs[i]);
+ }
+ }
+ }
+
+ Connect(fst);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_RMFINALEPSILON_H__
diff --git a/src/include/fst/script/arcsort.h b/src/include/fst/script/arcsort.h
new file mode 100644
index 0000000..4277332
--- /dev/null
+++ b/src/include/fst/script/arcsort.h
@@ -0,0 +1,49 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_ARCSORT_H_
+#define FST_SCRIPT_ARCSORT_H_
+
+#include <fst/arcsort.h>
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+
+namespace fst {
+namespace script {
+
+enum ArcSortType { ILABEL_COMPARE, OLABEL_COMPARE };
+
+typedef args::Package<MutableFstClass*, const ArcSortType> ArcSortArgs;
+
+template<class Arc>
+void ArcSort(ArcSortArgs *args) {
+ MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>();
+
+ if (args->arg2 == ILABEL_COMPARE) {
+ ILabelCompare<Arc> icomp;
+ ArcSort(fst, icomp);
+ } else { // OLABEL_COMPARE
+ OLabelCompare<Arc> ocomp;
+ ArcSort(fst, ocomp);
+ }
+}
+
+void ArcSort(MutableFstClass *ofst, ArcSortType sort_type);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_ARCSORT_H_
diff --git a/src/include/fst/script/arg-packs.h b/src/include/fst/script/arg-packs.h
new file mode 100644
index 0000000..8ebf8d8
--- /dev/null
+++ b/src/include/fst/script/arg-packs.h
@@ -0,0 +1,240 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+// Convenience templates for defining arg packs for the FstClass operations.
+
+// See operation-templates.h for a discussion about why these are needed; the
+// short story is that all FstClass operations must be implemented by a version
+// that takes one argument, most likely a struct bundling all the
+// logical arguments together. These template structs provide convenient ways
+// to specify these bundles (e.g. by means of appropriate typedefs).
+
+// The ArgPack template is sufficient for bundling together all the args for
+// a particular function. The function is assumed to be void-returning. If
+// you want a space for a return value, use the WithReturnValue template
+// as follows:
+
+// WithReturnValue<bool, ArgPack<...> >
+
+#ifndef FST_SCRIPT_ARG_PACKS_H_
+#define FST_SCRIPT_ARG_PACKS_H_
+
+namespace fst {
+namespace script {
+namespace args {
+
+// Sentinel value that means "no arg here."
+class none_type { };
+
+// Base arg pack template class. Specializations follow that allow
+// fewer numbers of arguments (down to 2). If the maximum number of arguments
+// increases, you will need to change three things:
+// 1) Add more template parameters to this template
+// 2) Add more specializations to allow fewer numbers of parameters than
+// the new max.
+// 3) Add extra none_types to all existing specializations to fill
+// the new slots.
+
+
+// 9 args (max)
+template<class T1,
+ class T2 = none_type,
+ class T3 = none_type,
+ class T4 = none_type,
+ class T5 = none_type,
+ class T6 = none_type,
+ class T7 = none_type,
+ class T8 = none_type,
+ class T9 = none_type>
+struct Package {
+ T1 arg1;
+ T2 arg2;
+ T3 arg3;
+ T4 arg4;
+ T5 arg5;
+ T6 arg6;
+ T7 arg7;
+ T8 arg8;
+ T9 arg9;
+
+ Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6,
+ T7 arg7, T8 arg8, T9 arg9) :
+ arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5),
+ arg6(arg6), arg7(arg7), arg8(arg8), arg9(arg9) { }
+};
+
+// 8 args
+template<class T1,
+ class T2,
+ class T3,
+ class T4,
+ class T5,
+ class T6,
+ class T7,
+ class T8>
+struct Package<T1, T2, T3, T4, T5, T6, T7, T8, none_type> {
+ T1 arg1;
+ T2 arg2;
+ T3 arg3;
+ T4 arg4;
+ T5 arg5;
+ T6 arg6;
+ T7 arg7;
+ T8 arg8;
+
+ Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6,
+ T7 arg7, T8 arg8) :
+ arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5),
+ arg6(arg6), arg7(arg7), arg8(arg8) { }
+};
+
+// 7 args
+template<class T1,
+ class T2,
+ class T3,
+ class T4,
+ class T5,
+ class T6,
+ class T7>
+struct Package<T1, T2, T3, T4, T5, T6, T7,
+ none_type, none_type> {
+ T1 arg1;
+ T2 arg2;
+ T3 arg3;
+ T4 arg4;
+ T5 arg5;
+ T6 arg6;
+ T7 arg7;
+
+ Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6,
+ T7 arg7) :
+ arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5),
+ arg6(arg6), arg7(arg7) { }
+};
+
+// 6 args
+template<class T1,
+ class T2,
+ class T3,
+ class T4,
+ class T5,
+ class T6>
+struct Package<T1, T2, T3, T4, T5, T6, none_type,
+ none_type, none_type> {
+ T1 arg1;
+ T2 arg2;
+ T3 arg3;
+ T4 arg4;
+ T5 arg5;
+ T6 arg6;
+
+ Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6) :
+ arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5),
+ arg6(arg6) { }
+};
+
+// 5 args
+template<class T1,
+ class T2,
+ class T3,
+ class T4,
+ class T5>
+struct Package<T1, T2, T3, T4, T5, none_type, none_type,
+ none_type, none_type> {
+ T1 arg1;
+ T2 arg2;
+ T3 arg3;
+ T4 arg4;
+ T5 arg5;
+
+ Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5) :
+ arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5) { }
+};
+
+// 4 args
+template<class T1,
+ class T2,
+ class T3,
+ class T4>
+struct Package<T1, T2, T3, T4, none_type, none_type,
+ none_type, none_type, none_type> {
+ T1 arg1;
+ T2 arg2;
+ T3 arg3;
+ T4 arg4;
+
+ Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4) :
+ arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4) { }
+};
+
+// 3 args
+template<class T1,
+ class T2,
+ class T3>
+struct Package<T1, T2, T3, none_type, none_type,
+ none_type, none_type, none_type,
+ none_type> {
+ T1 arg1;
+ T2 arg2;
+ T3 arg3;
+
+ Package(T1 arg1, T2 arg2, T3 arg3) :
+ arg1(arg1), arg2(arg2), arg3(arg3) { }
+};
+
+// 2 args (minimum)
+template<class T1,
+ class T2>
+struct Package<T1, T2, none_type, none_type,
+ none_type, none_type, none_type,
+ none_type, none_type> {
+ T1 arg1;
+ T2 arg2;
+
+ Package(T1 arg1, T2 arg2) :
+ arg1(arg1), arg2(arg2) { }
+};
+
+// Tack this on to an existing arg pack to add a return value.
+// The syntax for accessing the args is then slightly more stilted,
+// as you must do an extra member access (since the args are stored
+// as a member of this class).
+// The alternative is to declare another slew of templates for functions
+// that return a value, analogous to the above.
+
+template<class Retval, class ArgPackage>
+struct WithReturnValue {
+ Retval retval;
+ const ArgPackage &args;
+
+ explicit WithReturnValue(const ArgPackage &args) : args(args) { }
+};
+
+// We don't want to store a reference to a reference, if ArgPackage is
+// already some reference type.
+template<class Retval, class ArgPackage>
+struct WithReturnValue<Retval, ArgPackage&> {
+ Retval retval;
+ const ArgPackage &args;
+
+ explicit WithReturnValue(const ArgPackage &args) : args(args) { }
+};
+
+} // namespace args
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_ARG_PACKS_H_
diff --git a/src/include/fst/script/closure.h b/src/include/fst/script/closure.h
new file mode 100644
index 0000000..93b5ec3
--- /dev/null
+++ b/src/include/fst/script/closure.h
@@ -0,0 +1,41 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_CLOSURE_H_
+#define FST_SCRIPT_CLOSURE_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/closure.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<MutableFstClass*, const ClosureType> ClosureArgs;
+
+template<class Arc>
+void Closure(ClosureArgs *args) {
+ MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>();
+
+ Closure(fst, args->arg2);
+}
+
+void Closure(MutableFstClass *ofst, ClosureType closure_type);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_CLOSURE_H_
diff --git a/src/include/fst/script/compile-impl.h b/src/include/fst/script/compile-impl.h
new file mode 100644
index 0000000..4aab15b
--- /dev/null
+++ b/src/include/fst/script/compile-impl.h
@@ -0,0 +1,215 @@
+// compile.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to to compile a binary Fst from textual input.
+
+#ifndef FST_SCRIPT_COMPILE_IMPL_H_
+#define FST_SCRIPT_COMPILE_IMPL_H_
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <sstream>
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <iostream>
+#include <fstream>
+#include <fst/fst.h>
+#include <fst/util.h>
+#include <fst/vector-fst.h>
+
+DECLARE_string(fst_field_separator);
+
+namespace fst {
+
+// Compile a binary Fst from textual input, helper class for fstcompile.cc
+// WARNING: Stand-alone use of this class not recommended, most code should
+// read/write using the binary format which is much more efficient.
+template <class A> class FstCompiler {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+
+ // WARNING: use of 'allow_negative_labels = true' not recommended; may
+ // cause conflicts
+ FstCompiler(istream &istrm, const string &source,
+ const SymbolTable *isyms, const SymbolTable *osyms,
+ const SymbolTable *ssyms, bool accep, bool ikeep,
+ bool okeep, bool nkeep, bool allow_negative_labels = false)
+ : nline_(0), source_(source),
+ isyms_(isyms), osyms_(osyms), ssyms_(ssyms),
+ nstates_(0), keep_state_numbering_(nkeep),
+ allow_negative_labels_(allow_negative_labels) {
+ char line[kLineLen];
+ while (istrm.getline(line, kLineLen)) {
+ ++nline_;
+ vector<char *> col;
+ string separator = FLAGS_fst_field_separator + "\n";
+ SplitToVector(line, separator.c_str(), &col, true);
+ if (col.size() == 0 || col[0][0] == '\0') // empty line
+ continue;
+ if (col.size() > 5 ||
+ (col.size() > 4 && accep) ||
+ (col.size() == 3 && !accep)) {
+ FSTERROR() << "FstCompiler: Bad number of columns, source = "
+ << source_
+ << ", line = " << nline_;
+ fst_.SetProperties(kError, kError);
+ return;
+ }
+ StateId s = StrToStateId(col[0]);
+ while (s >= fst_.NumStates())
+ fst_.AddState();
+ if (nline_ == 1)
+ fst_.SetStart(s);
+
+ Arc arc;
+ StateId d = s;
+ switch (col.size()) {
+ case 1:
+ fst_.SetFinal(s, Weight::One());
+ break;
+ case 2:
+ fst_.SetFinal(s, StrToWeight(col[1], true));
+ break;
+ case 3:
+ arc.nextstate = d = StrToStateId(col[1]);
+ arc.ilabel = StrToILabel(col[2]);
+ arc.olabel = arc.ilabel;
+ arc.weight = Weight::One();
+ fst_.AddArc(s, arc);
+ break;
+ case 4:
+ arc.nextstate = d = StrToStateId(col[1]);
+ arc.ilabel = StrToILabel(col[2]);
+ if (accep) {
+ arc.olabel = arc.ilabel;
+ arc.weight = StrToWeight(col[3], false);
+ } else {
+ arc.olabel = StrToOLabel(col[3]);
+ arc.weight = Weight::One();
+ }
+ fst_.AddArc(s, arc);
+ break;
+ case 5:
+ arc.nextstate = d = StrToStateId(col[1]);
+ arc.ilabel = StrToILabel(col[2]);
+ arc.olabel = StrToOLabel(col[3]);
+ arc.weight = StrToWeight(col[4], false);
+ fst_.AddArc(s, arc);
+ }
+ while (d >= fst_.NumStates())
+ fst_.AddState();
+ }
+ if (ikeep)
+ fst_.SetInputSymbols(isyms);
+ if (okeep)
+ fst_.SetOutputSymbols(osyms);
+ }
+
+ const VectorFst<A> &Fst() const {
+ return fst_;
+ }
+
+ private:
+ // Maximum line length in text file.
+ static const int kLineLen = 8096;
+
+ int64 StrToId(const char *s, const SymbolTable *syms,
+ const char *name, bool allow_negative = false) const {
+ int64 n = 0;
+
+ if (syms) {
+ n = syms->Find(s);
+ if (n == -1 || (!allow_negative && n < 0)) {
+ FSTERROR() << "FstCompiler: Symbol \"" << s
+ << "\" is not mapped to any integer " << name
+ << ", symbol table = " << syms->Name()
+ << ", source = " << source_ << ", line = " << nline_;
+ fst_.SetProperties(kError, kError);
+ }
+ } else {
+ char *p;
+ n = strtoll(s, &p, 10);
+ if (p < s + strlen(s) || (!allow_negative && n < 0)) {
+ FSTERROR() << "FstCompiler: Bad " << name << " integer = \"" << s
+ << "\", source = " << source_ << ", line = " << nline_;
+ fst_.SetProperties(kError, kError);
+ }
+ }
+ return n;
+ }
+
+ StateId StrToStateId(const char *s) {
+ StateId n = StrToId(s, ssyms_, "state ID");
+
+ if (keep_state_numbering_)
+ return n;
+
+ // remap state IDs to make dense set
+ typename unordered_map<StateId, StateId>::const_iterator it = states_.find(n);
+ if (it == states_.end()) {
+ states_[n] = nstates_;
+ return nstates_++;
+ } else {
+ return it->second;
+ }
+ }
+
+ StateId StrToILabel(const char *s) const {
+ return StrToId(s, isyms_, "arc ilabel", allow_negative_labels_);
+ }
+
+ StateId StrToOLabel(const char *s) const {
+ return StrToId(s, osyms_, "arc olabel", allow_negative_labels_);
+ }
+
+ Weight StrToWeight(const char *s, bool allow_zero) const {
+ Weight w;
+ istringstream strm(s);
+ strm >> w;
+ if (!strm || (!allow_zero && w == Weight::Zero())) {
+ FSTERROR() << "FstCompiler: Bad weight = \"" << s
+ << "\", source = " << source_ << ", line = " << nline_;
+ fst_.SetProperties(kError, kError);
+ w = Weight::NoWeight();
+ }
+ return w;
+ }
+
+ mutable VectorFst<A> fst_;
+ size_t nline_;
+ string source_; // text FST source name
+ const SymbolTable *isyms_; // ilabel symbol table
+ const SymbolTable *osyms_; // olabel symbol table
+ const SymbolTable *ssyms_; // slabel symbol table
+ unordered_map<StateId, StateId> states_; // state ID map
+ StateId nstates_; // number of seen states
+ bool keep_state_numbering_;
+ bool allow_negative_labels_; // not recommended; may cause conflicts
+
+ DISALLOW_COPY_AND_ASSIGN(FstCompiler);
+};
+
+} // namespace fst
+
+#endif // FST_SCRIPT_COMPILE_IMPL_H_
diff --git a/src/include/fst/script/compile.h b/src/include/fst/script/compile.h
new file mode 100644
index 0000000..bb6ea56
--- /dev/null
+++ b/src/include/fst/script/compile.h
@@ -0,0 +1,92 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_COMPILE_H_
+#define FST_SCRIPT_COMPILE_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/compile-impl.h>
+
+namespace fst {
+namespace script {
+
+// Note: it is safe to pass these strings as references because
+// this struct is only used to pass them deeper in the call graph.
+// Be sure you understand why this is so before using this struct
+// for anything else!
+struct FstCompileArgs {
+ fst::istream &istrm;
+ const string &source;
+ const string &dest;
+ const string &fst_type;
+ const fst::SymbolTable *isyms;
+ const fst::SymbolTable *osyms;
+ const fst::SymbolTable *ssyms;
+ const bool accep;
+ const bool ikeep;
+ const bool okeep;
+ const bool nkeep;
+ const bool allow_negative_labels;
+
+ FstCompileArgs(istream &istrm, const string &source, const string &dest,
+ const string &fst_type, const fst::SymbolTable *isyms,
+ const fst::SymbolTable *osyms,
+ const fst::SymbolTable *ssyms,
+ bool accep, bool ikeep, bool okeep, bool nkeep,
+ bool allow_negative_labels = false) :
+ istrm(istrm), source(source), dest(dest), fst_type(fst_type),
+ isyms(isyms), osyms(osyms), ssyms(ssyms), accep(accep), ikeep(ikeep),
+ okeep(okeep), nkeep(nkeep),
+ allow_negative_labels(allow_negative_labels) { }
+};
+
+template<class Arc>
+void CompileFst(FstCompileArgs *args) {
+ using fst::FstCompiler;
+ using fst::Convert;
+ using fst::Fst;
+
+ FstCompiler<Arc> fstcompiler(args->istrm, args->source, args->isyms,
+ args->osyms, args->ssyms,
+ args->accep, args->ikeep,
+ args->okeep, args->nkeep,
+ args->allow_negative_labels);
+
+ const Fst<Arc> *fst = &fstcompiler.Fst();
+ if (args->fst_type != "vector") {
+ fst = Convert<Arc>(*fst, args->fst_type);
+ if (!fst) {
+ FSTERROR() << "Failed to convert FST to desired type: "
+ << args->fst_type;
+ return;
+ }
+ }
+
+ fst->Write(args->dest);
+}
+
+void CompileFst(istream &istrm, const string &source, const string &dest,
+ const string &fst_type, const string &arc_type,
+ const SymbolTable *isyms,
+ const SymbolTable *osyms, const SymbolTable *ssyms,
+ bool accep, bool ikeep, bool okeep, bool nkeep,
+ bool allow_negative_labels);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_COMPILE_H_
diff --git a/src/include/fst/script/compose.h b/src/include/fst/script/compose.h
new file mode 100644
index 0000000..96375f7
--- /dev/null
+++ b/src/include/fst/script/compose.h
@@ -0,0 +1,63 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_COMPOSE_H_
+#define FST_SCRIPT_COMPOSE_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/compose.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<const FstClass&, const FstClass&,
+ MutableFstClass*, ComposeFilter> ComposeArgs1;
+
+template<class Arc>
+void Compose(ComposeArgs1 *args) {
+ const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>());
+ const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>();
+
+ Compose(ifst1, ifst2, ofst, args->arg4);
+}
+
+typedef fst::ComposeOptions ComposeOptions;
+
+typedef args::Package<const FstClass&, const FstClass&,
+ MutableFstClass*, const ComposeOptions &> ComposeArgs2;
+
+template<class Arc>
+void Compose(ComposeArgs2 *args) {
+ const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>());
+ const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>();
+
+ Compose(ifst1, ifst2, ofst, args->arg4);
+}
+
+void Compose(const FstClass &ifst1, const FstClass &ifst2,
+ MutableFstClass *ofst,
+ const ComposeOptions &opts = fst::script::ComposeOptions());
+
+void Compose(const FstClass &ifst1, const FstClass &ifst2,
+ MutableFstClass *ofst, ComposeFilter compose_filter);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_COMPOSE_H_
diff --git a/src/include/fst/script/concat.h b/src/include/fst/script/concat.h
new file mode 100644
index 0000000..46c4407
--- /dev/null
+++ b/src/include/fst/script/concat.h
@@ -0,0 +1,54 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_CONCAT_H_
+#define FST_SCRIPT_CONCAT_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/concat.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<MutableFstClass*, const FstClass&> ConcatArgs1;
+typedef args::Package<const FstClass&, MutableFstClass*> ConcatArgs2;
+
+template<class Arc>
+void Concat(ConcatArgs1 *args) {
+ MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>();
+ const Fst<Arc> &ifst = *(args->arg2.GetFst<Arc>());
+
+ Concat(ofst, ifst);
+}
+
+template<class Arc>
+void Concat(ConcatArgs2 *args) {
+ const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+
+ Concat(ifst, ofst);
+}
+
+void Concat(MutableFstClass *ofst, const FstClass &ifst);
+void Concat(const FstClass &ifst, MutableFstClass *ofst);
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_CONCAT_H_
diff --git a/src/include/fst/script/connect.h b/src/include/fst/script/connect.h
new file mode 100644
index 0000000..19c4390
--- /dev/null
+++ b/src/include/fst/script/connect.h
@@ -0,0 +1,45 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_CONNECT_H_
+#define FST_SCRIPT_CONNECT_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/dfs-visit.h>
+#include <fst/connect.h>
+
+namespace fst {
+namespace script {
+
+// This function confuses SWIG, because both versions have the same args
+#ifndef SWIG
+template<class Arc>
+void Connect(MutableFstClass *fst) {
+ MutableFst<Arc> *typed_fst = fst->GetMutableFst<Arc>();
+
+ Connect(typed_fst);
+}
+#endif
+
+void Connect(MutableFstClass *fst);
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_CONNECT_H_
diff --git a/src/include/fst/script/convert.h b/src/include/fst/script/convert.h
new file mode 100644
index 0000000..2c70a70
--- /dev/null
+++ b/src/include/fst/script/convert.h
@@ -0,0 +1,49 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_CONVERT_H_
+#define FST_SCRIPT_CONVERT_H_
+
+#include <string>
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<const FstClass&, const string&> ConvertInnerArgs;
+typedef args::WithReturnValue<FstClass*, ConvertInnerArgs> ConvertArgs;
+
+template<class Arc>
+void Convert(ConvertArgs *args) {
+ const Fst<Arc> &fst = *(args->args.arg1.GetFst<Arc>());
+ const string &new_type = args->args.arg2;
+
+ Fst<Arc> *result = Convert(fst, new_type);
+ args->retval = new FstClass(result);
+ delete result;
+}
+
+#ifdef SWIG
+%newobject Convert;
+#endif
+FstClass *Convert(const FstClass& f, const string &new_type);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_CONVERT_H_
diff --git a/src/include/fst/script/decode.h b/src/include/fst/script/decode.h
new file mode 100644
index 0000000..1064ad5
--- /dev/null
+++ b/src/include/fst/script/decode.h
@@ -0,0 +1,46 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_DECODE_H_
+#define FST_SCRIPT_DECODE_H_
+
+#include <string>
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/encode.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<MutableFstClass*, const string&> DecodeArgs;
+
+template<class Arc>
+void Decode(DecodeArgs *args) {
+ MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>();
+
+ EncodeMapper<Arc> *decoder = EncodeMapper<Arc>::Read(args->arg2, DECODE);
+ Decode(ofst, *decoder);
+
+ delete decoder;
+}
+
+void Decode(MutableFstClass *fst, const string &coder_fname);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_DECODE_H_
diff --git a/src/include/fst/script/determinize.h b/src/include/fst/script/determinize.h
new file mode 100644
index 0000000..38fd7ad
--- /dev/null
+++ b/src/include/fst/script/determinize.h
@@ -0,0 +1,68 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_DETERMINIZE_H_
+#define FST_SCRIPT_DETERMINIZE_H_
+
+#include <fst/determinize.h>
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/weight-class.h>
+
+namespace fst {
+namespace script {
+
+struct DeterminizeOptions {
+ float delta;
+ WeightClass weight_threshold;
+ int64 state_threshold;
+ int64 subsequential_label;
+
+ explicit DeterminizeOptions(float d = fst::kDelta,
+ WeightClass w =
+ fst::script::WeightClass::Zero(),
+ int64 n = fst::kNoStateId, int64 l = 0)
+ : delta(d), weight_threshold(w), state_threshold(n),
+ subsequential_label(l) {}
+};
+
+typedef args::Package<const FstClass&, MutableFstClass*,
+ const DeterminizeOptions &> DeterminizeArgs;
+
+template<class Arc>
+void Determinize(DeterminizeArgs *args) {
+ const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+ const DeterminizeOptions &opts = args->arg3;
+
+ fst::DeterminizeOptions<Arc> detargs;
+ detargs.delta = opts.delta;
+ detargs.weight_threshold =
+ *(opts.weight_threshold.GetWeight<typename Arc::Weight>());
+ detargs.state_threshold = opts.state_threshold;
+ detargs.subsequential_label = opts.subsequential_label;
+
+ Determinize(ifst, ofst, detargs);
+}
+
+void Determinize(const FstClass &ifst, MutableFstClass *ofst,
+ const DeterminizeOptions &opts =
+ fst::script::DeterminizeOptions());
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_DETERMINIZE_H_
diff --git a/src/include/fst/script/difference.h b/src/include/fst/script/difference.h
new file mode 100644
index 0000000..76490d4
--- /dev/null
+++ b/src/include/fst/script/difference.h
@@ -0,0 +1,67 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_DIFFERENCE_H_
+#define FST_SCRIPT_DIFFERENCE_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/compose.h> // for ComposeFilter
+#include <fst/difference.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<const FstClass&, const FstClass&,
+ MutableFstClass*, ComposeFilter> DifferenceArgs1;
+
+template<class Arc>
+void Difference(DifferenceArgs1 *args) {
+ const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>());
+ const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>();
+
+ Difference(ifst1, ifst2, ofst, args->arg4);
+}
+
+typedef args::Package<const FstClass&, const FstClass&,
+ MutableFstClass*, const ComposeOptions &> DifferenceArgs2;
+
+template<class Arc>
+void Difference(DifferenceArgs2 *args) {
+ const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>());
+ const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>();
+
+ Difference(ifst1, ifst2, ofst, args->arg4);
+}
+
+
+void Difference(const FstClass &ifst1, const FstClass &ifst2,
+ MutableFstClass *ofst,
+ ComposeFilter compose_filter);
+
+void Difference(const FstClass &ifst1, const FstClass &ifst2,
+ MutableFstClass *ofst,
+ const ComposeOptions &opts = fst::script::ComposeOptions());
+
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_DIFFERENCE_H_
diff --git a/src/include/fst/script/draw-impl.h b/src/include/fst/script/draw-impl.h
new file mode 100644
index 0000000..e346649
--- /dev/null
+++ b/src/include/fst/script/draw-impl.h
@@ -0,0 +1,234 @@
+// draw.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Class to draw a binary FST by producing a text file in dot format,
+// helper class to fstdraw.cc
+
+#ifndef FST_SCRIPT_DRAW_IMPL_H_
+#define FST_SCRIPT_DRAW_IMPL_H_
+
+#include <sstream>
+#include <string>
+
+#include <fst/script/fst-class.h>
+#include <fst/fst.h>
+#include <fst/util.h>
+
+namespace fst {
+
+// Print a binary Fst in the dot textual format, helper class for fstdraw.cc
+// WARNING: Stand-alone use not recommend.
+template <class A> class FstDrawer {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+
+ FstDrawer(const Fst<A> &fst,
+ const SymbolTable *isyms,
+ const SymbolTable *osyms,
+ const SymbolTable *ssyms,
+ bool accep,
+ string title,
+ float width,
+ float height,
+ bool portrait,
+ bool vertical,
+ float ranksep,
+ float nodesep,
+ int fontsize,
+ int precision,
+ bool show_weight_one)
+ : fst_(fst), isyms_(isyms), osyms_(osyms), ssyms_(ssyms),
+ accep_(accep && fst.Properties(kAcceptor, true)), ostrm_(0),
+ title_(title), width_(width), height_(height), portrait_(portrait),
+ vertical_(vertical), ranksep_(ranksep), nodesep_(nodesep),
+ fontsize_(fontsize), precision_(precision),
+ show_weight_one_(show_weight_one) {}
+
+ // Draw Fst to an output buffer (or stdout if buf = 0)
+ void Draw(ostream *strm, const string &dest) {
+ ostrm_ = strm;
+ dest_ = dest;
+ StateId start = fst_.Start();
+ if (start == kNoStateId)
+ return;
+
+ PrintString("digraph FST {\n");
+ if (vertical_)
+ PrintString("rankdir = BT;\n");
+ else
+ PrintString("rankdir = LR;\n");
+ PrintString("size = \"");
+ Print(width_);
+ PrintString(",");
+ Print(height_);
+ PrintString("\";\n");
+ if (!dest_.empty())
+ PrintString("label = \"" + title_ + "\";\n");
+ PrintString("center = 1;\n");
+ if (portrait_)
+ PrintString("orientation = Portrait;\n");
+ else
+ PrintString("orientation = Landscape;\n");
+ PrintString("ranksep = \"");
+ Print(ranksep_);
+ PrintString("\";\n");
+ PrintString("nodesep = \"");
+ Print(nodesep_);
+ PrintString("\";\n");
+ // initial state first
+ DrawState(start);
+ for (StateIterator< Fst<A> > siter(fst_);
+ !siter.Done();
+ siter.Next()) {
+ StateId s = siter.Value();
+ if (s != start)
+ DrawState(s);
+ }
+ PrintString("}\n");
+ }
+
+ private:
+ // Maximum line length in text file.
+ static const int kLineLen = 8096;
+
+ void PrintString(const string &s) const {
+ *ostrm_ << s;
+ }
+
+ // Escapes backslash and double quote if these occur in the string. Dot will
+ // not deal gracefully with these if they are not escaped.
+ inline void EscapeChars(const string &s, string* ns) const {
+ const char* c = s.c_str();
+ while (*c) {
+ if (*c == '\\' || *c == '"') ns->push_back('\\');
+ ns->push_back(*c);
+ ++c;
+ }
+ }
+
+ void PrintId(int64 id, const SymbolTable *syms,
+ const char *name) const {
+ if (syms) {
+ string symbol = syms->Find(id);
+ if (symbol == "") {
+ FSTERROR() << "FstDrawer: Integer " << id
+ << " is not mapped to any textual symbol"
+ << ", symbol table = " << syms->Name()
+ << ", destination = " << dest_;
+ symbol = "?";
+ }
+ string nsymbol;
+ EscapeChars(symbol, &nsymbol);
+ PrintString(nsymbol);
+ } else {
+ ostringstream sid;
+ sid << id;
+ PrintString(sid.str());
+ }
+ }
+
+ void PrintStateId(StateId s) const {
+ PrintId(s, ssyms_, "state ID");
+ }
+
+ void PrintILabel(Label l) const {
+ PrintId(l, isyms_, "arc input label");
+ }
+
+ void PrintOLabel(Label l) const {
+ PrintId(l, osyms_, "arc output label");
+ }
+
+ template <class T>
+ void Print(T t) const {
+ *ostrm_ << t;
+ }
+
+ void DrawState(StateId s) const {
+ Print(s);
+ PrintString(" [label = \"");
+ PrintStateId(s);
+ Weight final = fst_.Final(s);
+ if (final != Weight::Zero()) {
+ if (show_weight_one_ || (final != Weight::One())) {
+ PrintString("/");
+ Print(final);
+ }
+ PrintString("\", shape = doublecircle,");
+ } else {
+ PrintString("\", shape = circle,");
+ }
+ if (s == fst_.Start())
+ PrintString(" style = bold,");
+ else
+ PrintString(" style = solid,");
+ PrintString(" fontsize = ");
+ Print(fontsize_);
+ PrintString("]\n");
+ for (ArcIterator< Fst<A> > aiter(fst_, s);
+ !aiter.Done();
+ aiter.Next()) {
+ Arc arc = aiter.Value();
+ PrintString("\t");
+ Print(s);
+ PrintString(" -> ");
+ Print(arc.nextstate);
+ PrintString(" [label = \"");
+ PrintILabel(arc.ilabel);
+ if (!accep_) {
+ PrintString(":");
+ PrintOLabel(arc.olabel);
+ }
+ if (show_weight_one_ || (arc.weight != Weight::One())) {
+ PrintString("/");
+ Print(arc.weight);
+ }
+ PrintString("\", fontsize = ");
+ Print(fontsize_);
+ PrintString("];\n");
+ }
+ }
+
+ const Fst<A> &fst_;
+ const SymbolTable *isyms_; // ilabel symbol table
+ const SymbolTable *osyms_; // olabel symbol table
+ const SymbolTable *ssyms_; // slabel symbol table
+ bool accep_; // print as acceptor when possible
+ ostream *ostrm_; // drawn FST destination
+ string dest_; // drawn FST destination name
+
+ string title_;
+ float width_;
+ float height_;
+ bool portrait_;
+ bool vertical_;
+ float ranksep_;
+ float nodesep_;
+ int fontsize_;
+ int precision_;
+ bool show_weight_one_;
+
+ DISALLOW_COPY_AND_ASSIGN(FstDrawer);
+};
+
+} // namespace fst
+
+#endif // FST_SCRIPT_DRAW_IMPL_H_
diff --git a/src/include/fst/script/draw.h b/src/include/fst/script/draw.h
new file mode 100644
index 0000000..1611ad1
--- /dev/null
+++ b/src/include/fst/script/draw.h
@@ -0,0 +1,113 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_DRAW_H_
+#define FST_SCRIPT_DRAW_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/draw-impl.h>
+#include <iostream>
+#include <fstream>
+
+namespace fst {
+namespace script {
+
+// Note: it is safe to pass these strings as references because
+// this struct is only used to pass them deeper in the call graph.
+// Be sure you understand why this is so before using this struct
+// for anything else!
+struct FstDrawerArgs {
+ const FstClass &fst;
+ const SymbolTable *isyms;
+ const SymbolTable *osyms;
+ const SymbolTable *ssyms;
+ const bool accep;
+ const string& title;
+ const float width;
+ const float height;
+ const bool portrait;
+ const bool vertical;
+ const float ranksep;
+ const float nodesep;
+ const int fontsize;
+ const int precision;
+ const bool show_weight_one;
+ ostream *ostrm;
+ const string &dest;
+
+ FstDrawerArgs(const FstClass &fst,
+ const SymbolTable *isyms,
+ const SymbolTable *osyms,
+ const SymbolTable *ssyms,
+ bool accep,
+ const string &title,
+ float width,
+ float height,
+ bool portrait,
+ bool vertical,
+ float ranksep,
+ float nodesep,
+ int fontsize,
+ int precision,
+ bool show_weight_one,
+ ostream *ostrm,
+ const string &dest) :
+ fst(fst), isyms(isyms), osyms(osyms), ssyms(ssyms), accep(accep),
+ title(title), width(width), height(height), portrait(portrait),
+ vertical(vertical), ranksep(ranksep), nodesep(nodesep),
+ fontsize(fontsize), precision(precision),
+ show_weight_one(show_weight_one), ostrm(ostrm), dest(dest) { }
+};
+
+
+template<class Arc>
+void DrawFst(FstDrawerArgs *args) {
+ const Fst<Arc> &fst = *(args->fst.GetFst<Arc>());
+
+ FstDrawer<Arc> fstdrawer(fst, args->isyms, args->osyms, args->ssyms,
+ args->accep, args->title, args->width,
+ args->height, args->portrait,
+ args->vertical, args->ranksep,
+ args->nodesep, args->fontsize,
+ args->precision, args->show_weight_one);
+ fstdrawer.Draw(args->ostrm, args->dest);
+}
+
+void DrawFst(const FstClass &fst,
+ const SymbolTable *isyms,
+ const SymbolTable *osyms,
+ const SymbolTable *ssyms,
+ bool accep,
+ const string &title,
+ float width,
+ float height,
+ bool portrait,
+ bool vertical,
+ float ranksep,
+ float nodesep,
+ int fontsize,
+ int precision,
+ bool show_weight_one,
+ ostream *ostrm,
+ const string &dest);
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_DRAW_H_
diff --git a/src/include/fst/script/encode.h b/src/include/fst/script/encode.h
new file mode 100644
index 0000000..dc1a290
--- /dev/null
+++ b/src/include/fst/script/encode.h
@@ -0,0 +1,58 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_ENCODE_H_
+#define FST_SCRIPT_ENCODE_H_
+
+#include <string>
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/encode.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<MutableFstClass*, uint32, bool,
+ const string &> EncodeArgs;
+
+template<class Arc>
+void Encode(EncodeArgs *args) {
+ MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>();
+ bool reuse_encoder = args->arg3;
+ const string &coder_fname = args->arg4;
+ uint32 flags = args->arg2;
+
+ EncodeMapper<Arc> *encoder = reuse_encoder
+ ? EncodeMapper<Arc>::Read(coder_fname, ENCODE)
+ : new EncodeMapper<Arc>(flags, ENCODE);
+
+ Encode(ofst, encoder);
+ if (!args->arg3)
+ encoder->Write(coder_fname);
+
+ delete encoder;
+}
+
+void Encode(MutableFstClass *fst, uint32 flags, bool reuse_encoder,
+ const string &coder_fname);
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_ENCODE_H_
diff --git a/src/include/fst/script/epsnormalize.h b/src/include/fst/script/epsnormalize.h
new file mode 100644
index 0000000..50b12da
--- /dev/null
+++ b/src/include/fst/script/epsnormalize.h
@@ -0,0 +1,44 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_EPSNORMALIZE_H_
+#define FST_SCRIPT_EPSNORMALIZE_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/epsnormalize.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<const FstClass&, MutableFstClass*,
+ EpsNormalizeType> EpsNormalizeArgs;
+
+template<class Arc>
+void EpsNormalize(EpsNormalizeArgs *args) {
+ const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+
+ EpsNormalize(ifst, ofst, args->arg3);
+}
+
+void EpsNormalize(const FstClass &ifst, MutableFstClass *ofst,
+ EpsNormalizeType norm_type = EPS_NORM_INPUT);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_EPSNORMALIZE_H_
diff --git a/src/include/fst/script/equal.h b/src/include/fst/script/equal.h
new file mode 100644
index 0000000..9fb2d3c
--- /dev/null
+++ b/src/include/fst/script/equal.h
@@ -0,0 +1,45 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_EQUAL_H_
+#define FST_SCRIPT_EQUAL_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/equal.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<const FstClass&, const FstClass&, float> EqualInnerArgs;
+typedef args::WithReturnValue<bool, EqualInnerArgs> EqualArgs;
+
+template<class Arc>
+void Equal(EqualArgs *args) {
+ const Fst<Arc> &fst1 = *(args->args.arg1.GetFst<Arc>());
+ const Fst<Arc> &fst2 = *(args->args.arg2.GetFst<Arc>());
+
+ args->retval = Equal(fst1, fst2, args->args.arg3);
+}
+
+bool Equal(const FstClass &fst1, const FstClass &fst2,
+ float delta = kDelta);
+
+} // namespace script
+} // namespace fst
+
+
+#endif // FST_SCRIPT_EQUAL_H_
diff --git a/src/include/fst/script/equivalent.h b/src/include/fst/script/equivalent.h
new file mode 100644
index 0000000..43460c6
--- /dev/null
+++ b/src/include/fst/script/equivalent.h
@@ -0,0 +1,47 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_EQUIVALENT_H_
+#define FST_SCRIPT_EQUIVALENT_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/equivalent.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<const FstClass &, const FstClass &,
+ float> EquivalentInnerArgs;
+typedef args::WithReturnValue<bool, EquivalentInnerArgs> EquivalentArgs;
+
+template<class Arc>
+void Equivalent(EquivalentArgs *args) {
+ const Fst<Arc> &fst1 = *(args->args.arg1.GetFst<Arc>());
+ const Fst<Arc> &fst2 = *(args->args.arg2.GetFst<Arc>());
+
+ args->retval = Equivalent(fst1, fst2, args->args.arg3);
+}
+
+bool Equivalent(const FstClass &fst1, const FstClass &fst2,
+ float delta = kDelta);
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_EQUIVALENT_H_
diff --git a/src/include/fst/script/fst-class.h b/src/include/fst/script/fst-class.h
new file mode 100644
index 0000000..3eacab4
--- /dev/null
+++ b/src/include/fst/script/fst-class.h
@@ -0,0 +1,343 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_FST_CLASS_H_
+#define FST_SCRIPT_FST_CLASS_H_
+
+#include <string>
+
+#include <fst/fst.h>
+#include <fst/mutable-fst.h>
+#include <fst/vector-fst.h>
+#include <iostream>
+#include <fstream>
+
+// Classes to support "boxing" all existing types of FST arcs in a single
+// FstClass which hides the arc types. This allows clients to load
+// and work with FSTs without knowing the arc type.
+
+// These classes are only recommended for use in high-level scripting
+// applications. Most users should use the lower-level templated versions
+// corresponding to these classes.
+
+namespace fst {
+namespace script {
+
+//
+// Abstract base class defining the set of functionalities implemented
+// in all impls, and passed through by all bases Below FstClassBase
+// the class hierarchy bifurcates; FstClassImplBase serves as the base
+// class for all implementations (of which FstClassImpl is currently
+// the only one) and FstClass serves as the base class for all
+// interfaces.
+//
+class FstClassBase {
+ public:
+ virtual const string &ArcType() const = 0;
+ virtual const string &FstType() const = 0;
+ virtual const string &WeightType() const = 0;
+ virtual const SymbolTable *InputSymbols() const = 0;
+ virtual const SymbolTable *OutputSymbols() const = 0;
+ virtual void Write(const string& fname) const = 0;
+ virtual uint64 Properties(uint64 mask, bool test) const = 0;
+ virtual ~FstClassBase() { }
+};
+
+class FstClassImplBase : public FstClassBase {
+ public:
+ virtual FstClassImplBase *Copy() = 0;
+ virtual void SetInputSymbols(SymbolTable *is) = 0;
+ virtual void SetOutputSymbols(SymbolTable *is) = 0;
+ virtual ~FstClassImplBase() { }
+};
+
+
+//
+// CONTAINER CLASS
+// Wraps an Fst<Arc>, hiding its arc type. Whether this Fst<Arc>
+// pointer refers to a special kind of FST (e.g. a MutableFst) is
+// known by the type of interface class that owns the pointer to this
+// container.
+//
+
+template<class Arc>
+class FstClassImpl : public FstClassImplBase {
+ public:
+ explicit FstClassImpl(Fst<Arc> *impl,
+ bool should_own = false) :
+ impl_(should_own ? impl : impl->Copy()) { }
+
+ virtual const string &ArcType() const {
+ return Arc::Type();
+ }
+
+ virtual const string &FstType() const {
+ return impl_->Type();
+ }
+
+ virtual const string &WeightType() const {
+ return Arc::Weight::Type();
+ }
+
+ virtual const SymbolTable *InputSymbols() const {
+ return impl_->InputSymbols();
+ }
+
+ virtual const SymbolTable *OutputSymbols() const {
+ return impl_->OutputSymbols();
+ }
+
+ // Warning: calling this method casts the FST to a mutable FST.
+ virtual void SetInputSymbols(SymbolTable *is) {
+ static_cast<MutableFst<Arc> *>(impl_)->SetInputSymbols(is);
+ }
+
+ // Warning: calling this method casts the FST to a mutable FST.
+ virtual void SetOutputSymbols(SymbolTable *os) {
+ static_cast<MutableFst<Arc> *>(impl_)->SetOutputSymbols(os);
+ }
+
+ virtual void Write(const string &fname) const {
+ impl_->Write(fname);
+ }
+
+ virtual uint64 Properties(uint64 mask, bool test) const {
+ return impl_->Properties(mask, test);
+ }
+
+ virtual ~FstClassImpl() { delete impl_; }
+
+ Fst<Arc> *GetImpl() { return impl_; }
+
+ virtual FstClassImpl *Copy() {
+ return new FstClassImpl<Arc>(impl_);
+ }
+
+ private:
+ Fst<Arc> *impl_;
+};
+
+//
+// BASE CLASS DEFINITIONS
+//
+
+class MutableFstClass;
+
+class FstClass : public FstClassBase {
+ public:
+ template<class Arc>
+ static FstClass *Read(istream &stream,
+ const FstReadOptions &opts) {
+ if (!opts.header) {
+ FSTERROR() << "FstClass::Read: options header not specified";
+ return 0;
+ }
+ const FstHeader &hdr = *opts.header;
+
+ if (hdr.Properties() & kMutable) {
+ return ReadTypedFst<MutableFstClass, MutableFst<Arc> >(stream, opts);
+ } else {
+ return ReadTypedFst<FstClass, Fst<Arc> >(stream, opts);
+ }
+ }
+
+ template<class Arc>
+ explicit FstClass(Fst<Arc> *fst) : impl_(new FstClassImpl<Arc>(fst)) { }
+
+ explicit FstClass(const FstClass &other) : impl_(other.impl_->Copy()) { }
+
+ static FstClass *Read(const string &fname);
+
+ virtual const string &ArcType() const {
+ return impl_->ArcType();
+ }
+
+ virtual const string& FstType() const {
+ return impl_->FstType();
+ }
+
+ virtual const SymbolTable *InputSymbols() const {
+ return impl_->InputSymbols();
+ }
+
+ virtual const SymbolTable *OutputSymbols() const {
+ return impl_->OutputSymbols();
+ }
+
+ virtual const string& WeightType() const {
+ return impl_->WeightType();
+ }
+
+ virtual void Write(const string &fname) const {
+ impl_->Write(fname);
+ }
+
+ virtual uint64 Properties(uint64 mask, bool test) const {
+ return impl_->Properties(mask, test);
+ }
+
+ template<class Arc>
+ const Fst<Arc> *GetFst() const {
+ if (Arc::Type() != ArcType()) {
+ return NULL;
+ } else {
+ FstClassImpl<Arc> *typed_impl = static_cast<FstClassImpl<Arc> *>(impl_);
+ return typed_impl->GetImpl();
+ }
+ }
+
+ virtual ~FstClass() { delete impl_; }
+
+ // These methods are required by IO registration
+ template<class Arc>
+ static FstClassImplBase *Convert(const FstClass &other) {
+ LOG(ERROR) << "Doesn't make sense to convert any class to type FstClass.";
+ return 0;
+ }
+
+ template<class Arc>
+ static FstClassImplBase *Create() {
+ LOG(ERROR) << "Doesn't make sense to create an FstClass with a "
+ << "particular arc type.";
+ return 0;
+ }
+ protected:
+ explicit FstClass(FstClassImplBase *impl) : impl_(impl) { }
+
+ // Generic template method for reading an arc-templated FST of type
+ // UnderlyingT, and returning it wrapped as FstClassT, with appropriate
+ // error checking. Called from arc-templated Read() static methods.
+ template<class FstClassT, class UnderlyingT>
+ static FstClassT* ReadTypedFst(istream &stream,
+ const FstReadOptions &opts) {
+ UnderlyingT *u = UnderlyingT::Read(stream, opts);
+ if (!u) {
+ return 0;
+ } else {
+ FstClassT *r = new FstClassT(u);
+ delete u;
+ return r;
+ }
+ }
+
+ FstClassImplBase *GetImpl() { return impl_; }
+ private:
+ FstClassImplBase *impl_;
+};
+
+//
+// Specific types of FstClass with special properties
+//
+
+class MutableFstClass : public FstClass {
+ public:
+ template<class Arc>
+ explicit MutableFstClass(MutableFst<Arc> *fst) :
+ FstClass(fst) { }
+
+ template<class Arc>
+ MutableFst<Arc> *GetMutableFst() {
+ Fst<Arc> *fst = const_cast<Fst<Arc> *>(this->GetFst<Arc>());
+ MutableFst<Arc> *mfst = static_cast<MutableFst<Arc> *>(fst);
+
+ return mfst;
+ }
+
+ template<class Arc>
+ static MutableFstClass *Read(istream &stream,
+ const FstReadOptions &opts) {
+ MutableFst<Arc> *mfst = MutableFst<Arc>::Read(stream, opts);
+ if (!mfst) {
+ return 0;
+ } else {
+ MutableFstClass *retval = new MutableFstClass(mfst);
+ delete mfst;
+ return retval;
+ }
+ }
+
+ static MutableFstClass *Read(const string &fname, bool convert = false);
+
+ virtual void SetInputSymbols(SymbolTable *is) {
+ GetImpl()->SetInputSymbols(is);
+ }
+
+ virtual void SetOutputSymbols(SymbolTable *os) {
+ GetImpl()->SetOutputSymbols(os);
+ }
+
+ // These methods are required by IO registration
+ template<class Arc>
+ static FstClassImplBase *Convert(const FstClass &other) {
+ LOG(ERROR) << "Doesn't make sense to convert any class to type "
+ << "MutableFstClass.";
+ return 0;
+ }
+
+ template<class Arc>
+ static FstClassImplBase *Create() {
+ LOG(ERROR) << "Doesn't make sense to create a MutableFstClass with a "
+ << "particular arc type.";
+ return 0;
+ }
+
+ protected:
+ explicit MutableFstClass(FstClassImplBase *impl) : FstClass(impl) { }
+};
+
+
+class VectorFstClass : public MutableFstClass {
+ public:
+ explicit VectorFstClass(const FstClass &other);
+ explicit VectorFstClass(const string &arc_type);
+
+ template<class Arc>
+ explicit VectorFstClass(VectorFst<Arc> *fst) :
+ MutableFstClass(fst) { }
+
+ template<class Arc>
+ static VectorFstClass *Read(istream &stream,
+ const FstReadOptions &opts) {
+ VectorFst<Arc> *vfst = VectorFst<Arc>::Read(stream, opts);
+ if (!vfst) {
+ return 0;
+ } else {
+ VectorFstClass *retval = new VectorFstClass(vfst);
+ delete vfst;
+ return retval;
+ }
+ }
+
+ static VectorFstClass *Read(const string &fname);
+
+ // Converter / creator for known arc types
+ template<class Arc>
+ static FstClassImplBase *Convert(const FstClass &other) {
+ return new FstClassImpl<Arc>(new VectorFst<Arc>(
+ *other.GetFst<Arc>()), true);
+ }
+
+ template<class Arc>
+ static FstClassImplBase *Create() {
+ return new FstClassImpl<Arc>(new VectorFst<Arc>(), true);
+ }
+};
+
+} // namespace script
+} // namespace fst
+
+
+#endif // FST_SCRIPT_FST_CLASS_H_
diff --git a/src/include/fst/script/fstscript-decl.h b/src/include/fst/script/fstscript-decl.h
new file mode 100644
index 0000000..fee813e
--- /dev/null
+++ b/src/include/fst/script/fstscript-decl.h
@@ -0,0 +1,35 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+// Forward declarations for the FST and FST-script classes.
+
+#ifndef FST_SCRIPT_FSTSCRIPT_DECL_H_
+#define FST_SCRIPT_FSTSCRIPT_DECL_H_
+
+#include <fst/fst-decl.h>
+
+namespace fst {
+namespace script {
+
+class FstClass;
+class MutableFstClass;
+class VectorFstClass;
+class WeightClass;
+
+} // namespace script
+} // namespace fst;
+
+#endif // FST_SCRIPT_FSTSCRIPT_DECL_H_
diff --git a/src/include/fst/script/fstscript.h b/src/include/fst/script/fstscript.h
new file mode 100644
index 0000000..90e1e75
--- /dev/null
+++ b/src/include/fst/script/fstscript.h
@@ -0,0 +1,154 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+// Convenience file that includes all FstScript functionality
+
+#ifndef FST_SCRIPT_FSTSCRIPT_H_
+#define FST_SCRIPT_FSTSCRIPT_H_
+
+// Major classes
+#include <fst/script/fst-class.h>
+#include <fst/script/weight-class.h>
+#include <fst/script/text-io.h>
+
+// Templates like Operation< >, Apply< >
+#include <fst/script/script-impl.h>
+
+// Operations
+#include <fst/script/arcsort.h>
+#include <fst/script/closure.h>
+#include <fst/script/compile.h>
+#include <fst/script/compose.h>
+#include <fst/script/concat.h>
+#include <fst/script/connect.h>
+#include <fst/script/convert.h>
+#include <fst/script/decode.h>
+#include <fst/script/determinize.h>
+#include <fst/script/difference.h>
+#include <fst/script/draw.h>
+#include <fst/script/encode.h>
+#include <fst/script/epsnormalize.h>
+#include <fst/script/equal.h>
+#include <fst/script/equivalent.h>
+#include <fst/script/info.h>
+#include <fst/script/intersect.h>
+#include <fst/script/invert.h>
+#include <fst/script/map.h>
+#include <fst/script/minimize.h>
+#include <fst/script/print.h>
+#include <fst/script/project.h>
+#include <fst/script/prune.h>
+#include <fst/script/push.h>
+#include <fst/script/randequivalent.h>
+#include <fst/script/randgen.h>
+#include <fst/script/relabel.h>
+#include <fst/script/replace.h>
+#include <fst/script/reverse.h>
+#include <fst/script/reweight.h>
+#include <fst/script/rmepsilon.h>
+#include <fst/script/shortest-distance.h>
+#include <fst/script/shortest-path.h>
+#include <fst/script/symbols.h>
+#include <fst/script/synchronize.h>
+#include <fst/script/topsort.h>
+#include <fst/script/union.h>
+#include <fst/script/verify.h>
+
+//
+// REGISTER OPERATIONS
+//
+
+
+// This class is necessary because registering each of the operations
+// separately overfills the stack, as there's so many of them.
+namespace fst {
+namespace script {
+template<class Arc>
+class AllFstOperationsRegisterer {
+ public:
+ AllFstOperationsRegisterer() {
+ RegisterBatch1();
+ RegisterBatch2();
+ }
+
+ private:
+ void RegisterBatch1() {
+ REGISTER_FST_OPERATION(ArcSort, Arc, ArcSortArgs);
+ REGISTER_FST_OPERATION(Closure, Arc, ClosureArgs);
+ REGISTER_FST_OPERATION(CompileFst, Arc, FstCompileArgs);
+ REGISTER_FST_OPERATION(Compose, Arc, ComposeArgs1);
+ REGISTER_FST_OPERATION(Compose, Arc, ComposeArgs2);
+ REGISTER_FST_OPERATION(Concat, Arc, ConcatArgs1);
+ REGISTER_FST_OPERATION(Concat, Arc, ConcatArgs2);
+ REGISTER_FST_OPERATION(Connect, Arc, MutableFstClass);
+ REGISTER_FST_OPERATION(Convert, Arc, ConvertArgs);
+ REGISTER_FST_OPERATION(Decode, Arc, DecodeArgs);
+ REGISTER_FST_OPERATION(Determinize, Arc, DeterminizeArgs);
+ REGISTER_FST_OPERATION(Difference, Arc, DifferenceArgs1);
+ REGISTER_FST_OPERATION(Difference, Arc, DifferenceArgs2);
+ REGISTER_FST_OPERATION(DrawFst, Arc, FstDrawerArgs);
+ REGISTER_FST_OPERATION(Encode, Arc, EncodeArgs);
+ REGISTER_FST_OPERATION(EpsNormalize, Arc, EpsNormalizeArgs);
+ REGISTER_FST_OPERATION(Equal, Arc, EqualArgs);
+ REGISTER_FST_OPERATION(Equivalent, Arc, EquivalentArgs);
+ REGISTER_FST_OPERATION(PrintFstInfo, Arc, InfoArgs);
+ REGISTER_FST_OPERATION(Intersect, Arc, IntersectArgs1);
+ REGISTER_FST_OPERATION(Intersect, Arc, IntersectArgs2);
+ REGISTER_FST_OPERATION(Invert, Arc, MutableFstClass);
+ REGISTER_FST_OPERATION(Map, Arc, MapArgs);
+ REGISTER_FST_OPERATION(Minimize, Arc, MinimizeArgs);
+ }
+
+ void RegisterBatch2() {
+ REGISTER_FST_OPERATION(PrintFst, Arc, FstPrinterArgs);
+ REGISTER_FST_OPERATION(Project, Arc, ProjectArgs);
+ REGISTER_FST_OPERATION(Prune, Arc, PruneArgs1);
+ REGISTER_FST_OPERATION(Prune, Arc, PruneArgs2);
+ REGISTER_FST_OPERATION(Prune, Arc, PruneArgs3);
+ REGISTER_FST_OPERATION(Prune, Arc, PruneArgs4);
+ REGISTER_FST_OPERATION(Push, Arc, PushArgs1);
+ REGISTER_FST_OPERATION(Push, Arc, PushArgs2);
+ REGISTER_FST_OPERATION(RandEquivalent, Arc, RandEquivalentArgs1);
+ REGISTER_FST_OPERATION(RandEquivalent, Arc, RandEquivalentArgs2);
+ REGISTER_FST_OPERATION(RandGen, Arc, RandGenArgs);
+ REGISTER_FST_OPERATION(Relabel, Arc, RelabelArgs1);
+ REGISTER_FST_OPERATION(Relabel, Arc, RelabelArgs2);
+ REGISTER_FST_OPERATION(Relabel, Arc, RelabelArgs3);
+ REGISTER_FST_OPERATION(Replace, Arc, ReplaceArgs);
+ REGISTER_FST_OPERATION(Reverse, Arc, ReverseArgs);
+ REGISTER_FST_OPERATION(Reweight, Arc, ReweightArgs);
+ REGISTER_FST_OPERATION(RmEpsilon, Arc, RmEpsilonArgs1);
+ REGISTER_FST_OPERATION(RmEpsilon, Arc, RmEpsilonArgs2);
+ REGISTER_FST_OPERATION(RmEpsilon, Arc, RmEpsilonArgs3);
+ REGISTER_FST_OPERATION(ShortestDistance, Arc, ShortestDistanceArgs1);
+ REGISTER_FST_OPERATION(ShortestDistance, Arc, ShortestDistanceArgs2);
+ REGISTER_FST_OPERATION(ShortestDistance, Arc, ShortestDistanceArgs3);
+ REGISTER_FST_OPERATION(ShortestPath, Arc, ShortestPathArgs1);
+ REGISTER_FST_OPERATION(ShortestPath, Arc, ShortestPathArgs2);
+ REGISTER_FST_OPERATION(Synchronize, Arc, SynchronizeArgs);
+ REGISTER_FST_OPERATION(TopSort, Arc, TopSortArgs);
+ REGISTER_FST_OPERATION(Union, Arc, UnionArgs);
+ REGISTER_FST_OPERATION(Verify, Arc, VerifyArgs);
+ }
+};
+} // namespace script
+} // namespace fst
+
+
+#define REGISTER_FST_OPERATIONS(Arc) \
+ AllFstOperationsRegisterer<Arc> register_all_fst_operations ## Arc;
+
+#endif // FST_SCRIPT_FSTSCRIPT_H_
diff --git a/src/include/fst/script/info-impl.h b/src/include/fst/script/info-impl.h
new file mode 100644
index 0000000..408fbcd
--- /dev/null
+++ b/src/include/fst/script/info-impl.h
@@ -0,0 +1,325 @@
+// info.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to compute various information about FSTs, helper class for fstinfo.cc
+
+#ifndef FST_SCRIPT_INFO_IMPL_H_
+#define FST_SCRIPT_INFO_IMPL_H_
+
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/connect.h>
+#include <fst/dfs-visit.h>
+#include <fst/fst.h>
+#include <fst/lookahead-matcher.h>
+#include <fst/matcher.h>
+#include <fst/queue.h>
+#include <fst/test-properties.h>
+#include <fst/verify.h>
+#include <fst/visit.h>
+
+namespace fst {
+
+// Compute various information about FSTs, helper class for fstinfo.cc.
+// WARNING: Stand-alone use of this class is not recommended, most code
+// should call directly the relevant library functions: Fst<A>::NumStates,
+// Fst<A>::NumArcs, TestProperties, ...
+template <class A> class FstInfo {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+
+ // When info_type is "short" (or "auto" and not an ExpandedFst)
+ // then only minimal info is computed and can be requested.
+ FstInfo(const Fst<A> &fst, bool test_properties,
+ const string &arc_filter_type = "any",
+ string info_type = "auto", bool verify = true)
+ : fst_type_(fst.Type()),
+ input_symbols_(fst.InputSymbols() ?
+ fst.InputSymbols()->Name() : "none"),
+ output_symbols_(fst.OutputSymbols() ?
+ fst.OutputSymbols()->Name() : "none"),
+ nstates_(0), narcs_(0), start_(kNoStateId), nfinal_(0),
+ nepsilons_(0), niepsilons_(0), noepsilons_(0),
+ naccess_(0), ncoaccess_(0), nconnect_(0), ncc_(0), nscc_(0),
+ input_match_type_(MATCH_NONE), output_match_type_(MATCH_NONE),
+ input_lookahead_(false), output_lookahead_(false),
+ properties_(0), arc_filter_type_(arc_filter_type), long_info_(true) {
+ if (info_type == "long") {
+ long_info_ = true;
+ } else if (info_type == "short") {
+ long_info_ = false;
+ } else if (info_type == "auto") {
+ long_info_ = fst.Properties(kExpanded, false);
+ } else {
+ FSTERROR() << "Bad info type: " << info_type;
+ return;
+ }
+
+ if (!long_info_)
+ return;
+
+ // If the FST is not sane, we return.
+ if (verify && !Verify(fst)) {
+ FSTERROR() << "FstInfo: Verify: FST not well-formed.";
+ return;
+ }
+
+ start_ = fst.Start();
+ properties_ = fst.Properties(kFstProperties, test_properties);
+
+ for (StateIterator< Fst<A> > siter(fst);
+ !siter.Done();
+ siter.Next()) {
+ ++nstates_;
+ StateId s = siter.Value();
+ if (fst.Final(s) != Weight::Zero())
+ ++nfinal_;
+ for (ArcIterator< Fst<A> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const A &arc = aiter.Value();
+ ++narcs_;
+ if (arc.ilabel == 0 && arc.olabel == 0)
+ ++nepsilons_;
+ if (arc.ilabel == 0)
+ ++niepsilons_;
+ if (arc.olabel == 0)
+ ++noepsilons_;
+ }
+ }
+
+ {
+ vector<StateId> cc;
+ CcVisitor<Arc> cc_visitor(&cc);
+ FifoQueue<StateId> fifo_queue;
+ if (arc_filter_type == "any") {
+ Visit(fst, &cc_visitor, &fifo_queue);
+ } else if (arc_filter_type == "epsilon") {
+ Visit(fst, &cc_visitor, &fifo_queue, EpsilonArcFilter<Arc>());
+ } else if (arc_filter_type == "iepsilon") {
+ Visit(fst, &cc_visitor, &fifo_queue, InputEpsilonArcFilter<Arc>());
+ } else if (arc_filter_type == "oepsilon") {
+ Visit(fst, &cc_visitor, &fifo_queue, OutputEpsilonArcFilter<Arc>());
+ } else {
+ FSTERROR() << "Bad arc filter type: " << arc_filter_type;
+ return;
+ }
+
+ for (StateId s = 0; s < cc.size(); ++s) {
+ if (cc[s] >= ncc_)
+ ncc_ = cc[s] + 1;
+ }
+ }
+
+ {
+ vector<StateId> scc;
+ vector<bool> access, coaccess;
+ uint64 props = 0;
+ SccVisitor<Arc> scc_visitor(&scc, &access, &coaccess, &props);
+ if (arc_filter_type == "any") {
+ DfsVisit(fst, &scc_visitor);
+ } else if (arc_filter_type == "epsilon") {
+ DfsVisit(fst, &scc_visitor, EpsilonArcFilter<Arc>());
+ } else if (arc_filter_type == "iepsilon") {
+ DfsVisit(fst, &scc_visitor, InputEpsilonArcFilter<Arc>());
+ } else if (arc_filter_type == "oepsilon") {
+ DfsVisit(fst, &scc_visitor, OutputEpsilonArcFilter<Arc>());
+ } else {
+ FSTERROR() << "Bad arc filter type: " << arc_filter_type;
+ return;
+ }
+
+ for (StateId s = 0; s < scc.size(); ++s) {
+ if (access[s])
+ ++naccess_;
+ if (coaccess[s])
+ ++ncoaccess_;
+ if (access[s] && coaccess[s])
+ ++nconnect_;
+ if (scc[s] >= nscc_)
+ nscc_ = scc[s] + 1;
+ }
+ }
+
+ LookAheadMatcher< Fst<A> > imatcher(fst, MATCH_INPUT);
+ input_match_type_ = imatcher.Type(test_properties);
+ input_lookahead_ = imatcher.Flags() & kInputLookAheadMatcher;
+
+ LookAheadMatcher< Fst<A> > omatcher(fst, MATCH_OUTPUT);
+ output_match_type_ = omatcher.Type(test_properties);
+ output_lookahead_ = omatcher.Flags() & kOutputLookAheadMatcher;
+ }
+
+ // Short info
+ const string& FstType() const { return fst_type_; }
+ const string& ArcType() const { return A::Type(); }
+ const string& InputSymbols() const { return input_symbols_; }
+ const string& OutputSymbols() const { return output_symbols_; }
+ const bool LongInfo() const { return long_info_; }
+ const string& ArcFilterType() const { return arc_filter_type_; }
+
+ // Long info
+ MatchType InputMatchType() const { CheckLong(); return input_match_type_; }
+ MatchType OutputMatchType() const { CheckLong(); return output_match_type_; }
+ bool InputLookAhead() const { CheckLong(); return input_lookahead_; }
+ bool OutputLookAhead() const { CheckLong(); return output_lookahead_; }
+ int64 NumStates() const { CheckLong(); return nstates_; }
+ int64 NumArcs() const { CheckLong(); return narcs_; }
+ int64 Start() const { CheckLong(); return start_; }
+ int64 NumFinal() const { CheckLong(); return nfinal_; }
+ int64 NumEpsilons() const { CheckLong(); return nepsilons_; }
+ int64 NumInputEpsilons() const { CheckLong(); return niepsilons_; }
+ int64 NumOutputEpsilons() const { CheckLong(); return noepsilons_; }
+ int64 NumAccessible() const { CheckLong(); return naccess_; }
+ int64 NumCoAccessible() const { CheckLong(); return ncoaccess_; }
+ int64 NumConnected() const { CheckLong(); return nconnect_; }
+ int64 NumCc() const { CheckLong(); return ncc_; }
+ int64 NumScc() const { CheckLong(); return nscc_; }
+ uint64 Properties() const { CheckLong(); return properties_; }
+
+ private:
+ void CheckLong() const {
+ if (!long_info_)
+ FSTERROR() << "FstInfo: method only available with long info version";
+ }
+
+ string fst_type_;
+ string input_symbols_;
+ string output_symbols_;
+ int64 nstates_;
+ int64 narcs_;
+ int64 start_;
+ int64 nfinal_;
+ int64 nepsilons_;
+ int64 niepsilons_;
+ int64 noepsilons_;
+ int64 naccess_;
+ int64 ncoaccess_;
+ int64 nconnect_;
+ int64 ncc_;
+ int64 nscc_;
+ MatchType input_match_type_;
+ MatchType output_match_type_;
+ bool input_lookahead_;
+ bool output_lookahead_;
+ uint64 properties_;
+ string arc_filter_type_;
+ bool long_info_;
+ DISALLOW_COPY_AND_ASSIGN(FstInfo);
+};
+
+template <class A>
+void PrintFstInfo(const FstInfo<A> &fstinfo, bool pipe = false) {
+ ostream &os = pipe ? cerr : cout;
+
+ ios_base::fmtflags old = os.setf(ios::left);
+ os.width(50);
+ os << "fst type" << fstinfo.FstType() << endl;
+ os.width(50);
+ os << "arc type" << fstinfo.ArcType() << endl;
+ os.width(50);
+ os << "input symbol table" << fstinfo.InputSymbols() << endl;
+ os.width(50);
+ os << "output symbol table" << fstinfo.OutputSymbols() << endl;
+
+ if (!fstinfo.LongInfo()) {
+ os.setf(old);
+ return;
+ }
+
+ os.width(50);
+ os << "# of states" << fstinfo.NumStates() << endl;
+ os.width(50);
+ os << "# of arcs" << fstinfo.NumArcs() << endl;
+ os.width(50);
+ os << "initial state" << fstinfo.Start() << endl;
+ os.width(50);
+ os << "# of final states" << fstinfo.NumFinal() << endl;
+ os.width(50);
+ os << "# of input/output epsilons" << fstinfo.NumEpsilons() << endl;
+ os.width(50);
+ os << "# of input epsilons" << fstinfo.NumInputEpsilons() << endl;
+ os.width(50);
+ os << "# of output epsilons" << fstinfo.NumOutputEpsilons() << endl;
+ os.width(50);
+
+ string arc_type = "";
+ if (fstinfo.ArcFilterType() == "epsilon")
+ arc_type = "epsilon ";
+ else if (fstinfo.ArcFilterType() == "iepsilon")
+ arc_type = "input-epsilon ";
+ else if (fstinfo.ArcFilterType() == "oepsilon")
+ arc_type = "output-epsilon ";
+
+ string accessible_label = "# of " + arc_type + "accessible states";
+ os.width(50);
+ os << accessible_label << fstinfo.NumAccessible() << endl;
+ string coaccessible_label = "# of " + arc_type + "coaccessible states";
+ os.width(50);
+ os << coaccessible_label << fstinfo.NumCoAccessible() << endl;
+ string connected_label = "# of " + arc_type + "connected states";
+ os.width(50);
+ os << connected_label << fstinfo.NumConnected() << endl;
+ string numcc_label = "# of " + arc_type + "connected components";
+ os.width(50);
+ os << numcc_label << fstinfo.NumCc() << endl;
+ string numscc_label = "# of " + arc_type + "strongly conn components";
+ os.width(50);
+ os << numscc_label << fstinfo.NumScc() << endl;
+
+ os.width(50);
+ os << "input matcher"
+ << (fstinfo.InputMatchType() == MATCH_INPUT ? 'y' :
+ fstinfo.InputMatchType() == MATCH_NONE ? 'n' : '?') << endl;
+ os.width(50);
+ os << "output matcher"
+ << (fstinfo.OutputMatchType() == MATCH_OUTPUT ? 'y' :
+ fstinfo.OutputMatchType() == MATCH_NONE ? 'n' : '?') << endl;
+ os.width(50);
+ os << "input lookahead"
+ << (fstinfo.InputLookAhead() ? 'y' : 'n') << endl;
+ os.width(50);
+ os << "output lookahead"
+ << (fstinfo.OutputLookAhead() ? 'y' : 'n') << endl;
+
+ uint64 prop = 1;
+ for (int i = 0; i < 64; ++i, prop <<= 1) {
+ if (prop & kBinaryProperties) {
+ char value = 'n';
+ if (fstinfo.Properties() & prop) value = 'y';
+ os.width(50);
+ os << PropertyNames[i] << value << endl;
+ } else if (prop & kPosTrinaryProperties) {
+ char value = '?';
+ if (fstinfo.Properties() & prop) value = 'y';
+ else if (fstinfo.Properties() & prop << 1) value = 'n';
+ os.width(50);
+ os << PropertyNames[i] << value << endl;
+ }
+ }
+ os.setf(old);
+}
+
+} // namespace fst
+
+#endif // FST_SCRIPT_INFO_IMPL_H_
diff --git a/src/include/fst/script/info.h b/src/include/fst/script/info.h
new file mode 100644
index 0000000..f434bd5
--- /dev/null
+++ b/src/include/fst/script/info.h
@@ -0,0 +1,48 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_INFO_H_
+#define FST_SCRIPT_INFO_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/info-impl.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<const FstClass&, bool, const string&,
+ const string&, bool, bool> InfoArgs;
+
+template<class Arc>
+void PrintFstInfo(InfoArgs *args) {
+ const Fst<Arc> &fst = *(args->arg1.GetFst<Arc>());
+ FstInfo<Arc> fstinfo(fst, args->arg2, args->arg3,
+ args->arg4, args->arg5);
+ PrintFstInfo(fstinfo, args->arg6);
+
+ if (args->arg6)
+ fst.Write("");
+}
+
+void PrintFstInfo(const FstClass &f, bool test_properties,
+ const string &arc_filter, const string &info_type,
+ bool pipe, bool verify);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_INFO_H_
diff --git a/src/include/fst/script/intersect.h b/src/include/fst/script/intersect.h
new file mode 100644
index 0000000..8011024
--- /dev/null
+++ b/src/include/fst/script/intersect.h
@@ -0,0 +1,65 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_INTERSECT_H_
+#define FST_SCRIPT_INTERSECT_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/intersect.h>
+#include <fst/script/compose.h> // for ComposeOptions, ComposeFilter
+
+namespace fst {
+namespace script {
+
+typedef args::Package<const FstClass&, const FstClass&,
+ MutableFstClass*, ComposeFilter> IntersectArgs1;
+
+template<class Arc>
+void Intersect(IntersectArgs1 *args) {
+ const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>());
+ const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>();
+
+ Intersect(ifst1, ifst2, ofst, args->arg4);
+}
+
+typedef args::Package<const FstClass&, const FstClass&,
+ MutableFstClass*, const ComposeOptions &> IntersectArgs2;
+
+template<class Arc>
+void Intersect(IntersectArgs2 *args) {
+ const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>());
+ const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>();
+
+ Intersect(ifst1, ifst2, ofst, args->arg4);
+}
+
+void Intersect(const FstClass &ifst1, const FstClass &ifst2,
+ MutableFstClass *ofst,
+ ComposeFilter compose_filter);
+
+void Intersect(const FstClass &ifst, const FstClass &ifst2,
+ MutableFstClass *ofst,
+ const ComposeOptions &opts = fst::script::ComposeOptions());
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_INTERSECT_H_
diff --git a/src/include/fst/script/invert.h b/src/include/fst/script/invert.h
new file mode 100644
index 0000000..1befd9f
--- /dev/null
+++ b/src/include/fst/script/invert.h
@@ -0,0 +1,43 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_INVERT_H_
+#define FST_SCRIPT_INVERT_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/invert.h>
+
+namespace fst {
+namespace script {
+
+// The following confuses swig, because it has the same arguments
+// as the non-templated version
+#ifndef SWIG
+template<class Arc>
+void Invert(MutableFstClass *fst) {
+ MutableFst<Arc> *typed_fst = fst->GetMutableFst<Arc>();
+
+ Invert(typed_fst);
+}
+#endif
+
+void Invert(MutableFstClass *fst);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_INVERT_H_
diff --git a/src/include/fst/script/map.h b/src/include/fst/script/map.h
new file mode 100644
index 0000000..2332074
--- /dev/null
+++ b/src/include/fst/script/map.h
@@ -0,0 +1,115 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_MAP_H_
+#define FST_SCRIPT_MAP_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/weight-class.h>
+#include <fst/arc-map.h>
+#include <fst/state-map.h>
+
+namespace fst {
+namespace script {
+
+template <class M>
+Fst<typename M::ToArc> *ArcMap(const Fst<typename M::FromArc> &fst,
+ const M &mapper) {
+ typedef typename M::ToArc ToArc;
+ VectorFst<ToArc> *ofst = new VectorFst<ToArc>;
+ ArcMap(fst, ofst, mapper);
+ return ofst;
+}
+
+template <class M>
+Fst<typename M::ToArc> *StateMap(const Fst<typename M::FromArc> &fst,
+ const M &mapper) {
+ typedef typename M::ToArc ToArc;
+ VectorFst<ToArc> *ofst = new VectorFst<ToArc>;
+ StateMap(fst, ofst, mapper);
+ return ofst;
+}
+
+enum MapType { ARC_SUM_MAPPER, IDENTITY_MAPPER, INVERT_MAPPER, PLUS_MAPPER,
+ QUANTIZE_MAPPER, RMWEIGHT_MAPPER, SUPERFINAL_MAPPER,
+ TIMES_MAPPER, TO_LOG_MAPPER, TO_LOG64_MAPPER, TO_STD_MAPPER };
+
+typedef args::Package<const FstClass&, MapType, float,
+ const WeightClass &> MapInnerArgs;
+typedef args::WithReturnValue<FstClass*, MapInnerArgs> MapArgs;
+
+template <class Arc>
+void Map(MapArgs *args) {
+ const Fst<Arc> &ifst = *(args->args.arg1.GetFst<Arc>());
+ MapType map_type = args->args.arg2;
+ float delta = args->args.arg3;
+ typename Arc::Weight w = *(args->args.arg4.GetWeight<typename Arc::Weight>());
+
+ if (map_type == ARC_SUM_MAPPER) {
+ args->retval = new FstClass(
+ script::StateMap(ifst, ArcSumMapper<Arc>(ifst)));
+ } else if (map_type == IDENTITY_MAPPER) {
+ args->retval = new FstClass(
+ script::ArcMap(ifst, IdentityArcMapper<Arc>()));
+ } else if (map_type == INVERT_MAPPER) {
+ args->retval = new FstClass(
+ script::ArcMap(ifst, InvertWeightMapper<Arc>()));
+ } else if (map_type == PLUS_MAPPER) {
+ args->retval = new FstClass(
+ script::ArcMap(ifst, PlusMapper<Arc>(w)));
+ } else if (map_type == QUANTIZE_MAPPER) {
+ args->retval = new FstClass(
+ script::ArcMap(ifst, QuantizeMapper<Arc>(delta)));
+ } else if (map_type == RMWEIGHT_MAPPER) {
+ args->retval = new FstClass(
+ script::ArcMap(ifst, RmWeightMapper<Arc>()));
+ } else if (map_type == SUPERFINAL_MAPPER) {
+ args->retval = new FstClass(
+ script::ArcMap(ifst, SuperFinalMapper<Arc>()));
+ } else if (map_type == TIMES_MAPPER) {
+ args->retval = new FstClass(
+ script::ArcMap(ifst, TimesMapper<Arc>(w)));
+ } else if (map_type == TO_LOG_MAPPER) {
+ args->retval = new FstClass(
+ script::ArcMap(ifst, WeightConvertMapper<Arc, LogArc>()));
+ } else if (map_type == TO_LOG64_MAPPER) {
+ args->retval = new FstClass(
+ script::ArcMap(ifst, WeightConvertMapper<Arc, Log64Arc>()));
+ } else if (map_type == TO_STD_MAPPER) {
+ args->retval = new FstClass(
+ script::ArcMap(ifst, WeightConvertMapper<Arc, StdArc>()));
+ } else {
+ FSTERROR() << "Error: unknown/unsupported mapper type: "
+ << map_type;
+ VectorFst<Arc> *ofst = new VectorFst<Arc>;
+ ofst->SetProperties(kError, kError);
+ args->retval = new FstClass(ofst);
+ }
+}
+
+
+#ifdef SWIG
+%newobject Map;
+#endif
+FstClass *Map(const FstClass& f, MapType map_type,
+ float delta = fst::kDelta,
+ const WeightClass &w = fst::script::WeightClass::Zero());
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_MAP_H_
diff --git a/src/include/fst/script/minimize.h b/src/include/fst/script/minimize.h
new file mode 100644
index 0000000..f250d03
--- /dev/null
+++ b/src/include/fst/script/minimize.h
@@ -0,0 +1,45 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_MINIMIZE_H_
+#define FST_SCRIPT_MINIMIZE_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/minimize.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<MutableFstClass*, MutableFstClass*, float> MinimizeArgs;
+
+template<class Arc>
+void Minimize(MinimizeArgs *args) {
+ MutableFst<Arc> *ofst1 = args->arg1->GetMutableFst<Arc>();
+ MutableFst<Arc> *ofst2 = args->arg2 ? args->arg2->GetMutableFst<Arc>() : 0;
+
+ Minimize(ofst1, ofst2, args->arg3);
+}
+
+void Minimize(MutableFstClass *ofst1, MutableFstClass *ofst2 = 0,
+ float delta = kDelta);
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_MINIMIZE_H_
diff --git a/src/include/fst/script/print-impl.h b/src/include/fst/script/print-impl.h
new file mode 100644
index 0000000..1433a29
--- /dev/null
+++ b/src/include/fst/script/print-impl.h
@@ -0,0 +1,149 @@
+// print.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Stand-alone class to print out binary FSTs in the AT&T format,
+// helper class for fstprint.cc
+
+#ifndef FST_SCRIPT_PRINT_IMPL_H_
+#define FST_SCRIPT_PRINT_IMPL_H_
+
+#include <sstream>
+#include <string>
+
+#include <fst/fst.h>
+#include <fst/util.h>
+
+DECLARE_string(fst_field_separator);
+
+namespace fst {
+
+// Print a binary Fst in textual format, helper class for fstprint.cc
+// WARNING: Stand-alone use of this class not recommended, most code should
+// read/write using the binary format which is much more efficient.
+template <class A> class FstPrinter {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+
+ FstPrinter(const Fst<A> &fst,
+ const SymbolTable *isyms,
+ const SymbolTable *osyms,
+ const SymbolTable *ssyms,
+ bool accep,
+ bool show_weight_one)
+ : fst_(fst), isyms_(isyms), osyms_(osyms), ssyms_(ssyms),
+ accep_(accep && fst.Properties(kAcceptor, true)), ostrm_(0),
+ show_weight_one_(show_weight_one) {}
+
+ // Print Fst to an output stream
+ void Print(ostream *ostrm, const string &dest) {
+ ostrm_ = ostrm;
+ dest_ = dest;
+ StateId start = fst_.Start();
+ if (start == kNoStateId)
+ return;
+ // initial state first
+ PrintState(start);
+ for (StateIterator< Fst<A> > siter(fst_);
+ !siter.Done();
+ siter.Next()) {
+ StateId s = siter.Value();
+ if (s != start)
+ PrintState(s);
+ }
+ }
+
+ private:
+ // Maximum line length in text file.
+ static const int kLineLen = 8096;
+
+ void PrintId(int64 id, const SymbolTable *syms,
+ const char *name) const {
+ if (syms) {
+ string symbol = syms->Find(id);
+ if (symbol == "") {
+ FSTERROR() << "FstPrinter: Integer " << id
+ << " is not mapped to any textual symbol"
+ << ", symbol table = " << syms->Name()
+ << ", destination = " << dest_;
+ symbol = "?";
+ }
+ *ostrm_ << symbol;
+ } else {
+ *ostrm_ << id;
+ }
+ }
+
+ void PrintStateId(StateId s) const {
+ PrintId(s, ssyms_, "state ID");
+ }
+
+ void PrintILabel(Label l) const {
+ PrintId(l, isyms_, "arc input label");
+ }
+
+ void PrintOLabel(Label l) const {
+ PrintId(l, osyms_, "arc output label");
+ }
+
+ void PrintState(StateId s) const {
+ bool output = false;
+ for (ArcIterator< Fst<A> > aiter(fst_, s);
+ !aiter.Done();
+ aiter.Next()) {
+ Arc arc = aiter.Value();
+ PrintStateId(s);
+ *ostrm_ << FLAGS_fst_field_separator[0];
+ PrintStateId(arc.nextstate);
+ *ostrm_ << FLAGS_fst_field_separator[0];
+ PrintILabel(arc.ilabel);
+ if (!accep_) {
+ *ostrm_ << FLAGS_fst_field_separator[0];
+ PrintOLabel(arc.olabel);
+ }
+ if (show_weight_one_ || arc.weight != Weight::One())
+ *ostrm_ << FLAGS_fst_field_separator[0] << arc.weight;
+ *ostrm_ << "\n";
+ output = true;
+ }
+ Weight final = fst_.Final(s);
+ if (final != Weight::Zero() || !output) {
+ PrintStateId(s);
+ if (show_weight_one_ || final != Weight::One()) {
+ *ostrm_ << FLAGS_fst_field_separator[0] << final;
+ }
+ *ostrm_ << "\n";
+ }
+ }
+
+ const Fst<A> &fst_;
+ const SymbolTable *isyms_; // ilabel symbol table
+ const SymbolTable *osyms_; // olabel symbol table
+ const SymbolTable *ssyms_; // slabel symbol table
+ bool accep_; // print as acceptor when possible
+ ostream *ostrm_; // text FST destination
+ string dest_; // text FST destination name
+ bool show_weight_one_; // print weights equal to Weight::One()
+ DISALLOW_COPY_AND_ASSIGN(FstPrinter);
+};
+
+} // namespace fst
+
+#endif // FST_SCRIPT_PRINT_IMPL_H_
diff --git a/src/include/fst/script/print.h b/src/include/fst/script/print.h
new file mode 100644
index 0000000..f82b19b
--- /dev/null
+++ b/src/include/fst/script/print.h
@@ -0,0 +1,86 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_PRINT_H_
+#define FST_SCRIPT_PRINT_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/print-impl.h>
+
+namespace fst {
+namespace script {
+
+// Note: it is safe to pass these strings as references because
+// this struct is only used to pass them deeper in the call graph.
+// Be sure you understand why this is so before using this struct
+// for anything else!
+struct FstPrinterArgs {
+ const FstClass &fst;
+ const SymbolTable *isyms;
+ const SymbolTable *osyms;
+ const SymbolTable *ssyms;
+ const bool accept;
+ const bool show_weight_one;
+ ostream *ostrm;
+ const string &dest;
+
+ FstPrinterArgs(const FstClass &fst,
+ const SymbolTable *isyms,
+ const SymbolTable *osyms,
+ const SymbolTable *ssyms,
+ bool accept,
+ bool show_weight_one,
+ ostream *ostrm,
+ const string &dest) :
+ fst(fst), isyms(isyms), osyms(osyms), ssyms(ssyms), accept(accept),
+ show_weight_one(show_weight_one), ostrm(ostrm), dest(dest) { }
+};
+
+template<class Arc>
+void PrintFst(FstPrinterArgs *args) {
+ const Fst<Arc> &fst = *(args->fst.GetFst<Arc>());
+
+ fst::FstPrinter<Arc> fstprinter(fst, args->isyms, args->osyms,
+ args->ssyms, args->accept,
+ args->show_weight_one);
+ fstprinter.Print(args->ostrm, args->dest);
+}
+
+void PrintFst(const FstClass &fst, ostream &ostrm, const string &dest,
+ const SymbolTable *isyms,
+ const SymbolTable *osyms,
+ const SymbolTable *ssyms,
+ bool accept, bool show_weight_one);
+
+
+// Below are two printing methods with useful defaults for a few of
+// the fst printer arguments.
+template <class Arc>
+void PrintFst(const Fst<Arc> &fst, ostream &os, const string dest = "",
+ const SymbolTable *isyms = NULL,
+ const SymbolTable *osyms = NULL,
+ const SymbolTable *ssyms = NULL) {
+ fst::FstPrinter<Arc> fstprinter(fst, isyms, osyms, ssyms, true, true);
+ fstprinter.Print(&os, dest);
+}
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_PRINT_H_
diff --git a/src/include/fst/script/project.h b/src/include/fst/script/project.h
new file mode 100644
index 0000000..12ee890
--- /dev/null
+++ b/src/include/fst/script/project.h
@@ -0,0 +1,43 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_PROJECT_H_
+#define FST_SCRIPT_PROJECT_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/project.h> // for ProjectType
+
+namespace fst {
+namespace script {
+
+typedef args::Package<MutableFstClass*, ProjectType> ProjectArgs;
+
+template<class Arc>
+void Project(ProjectArgs *args) {
+ MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>();
+
+ Project(ofst, args->arg2);
+}
+
+void Project(MutableFstClass *ofst, ProjectType project_type);
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_PROJECT_H_
diff --git a/src/include/fst/script/prune.h b/src/include/fst/script/prune.h
new file mode 100644
index 0000000..7118ff1
--- /dev/null
+++ b/src/include/fst/script/prune.h
@@ -0,0 +1,153 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_PRUNE_H_
+#define FST_SCRIPT_PRUNE_H_
+
+#include <vector>
+using std::vector;
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/weight-class.h>
+#include <fst/prune.h>
+#include <fst/arcfilter.h>
+
+namespace fst {
+namespace script {
+
+struct PruneOptions {
+ WeightClass weight_threshold;
+ int64 state_threshold;
+ const vector<WeightClass> *distance;
+ float delta;
+
+ explicit PruneOptions(const WeightClass& w, int64 s,
+ vector<WeightClass> *d = 0, float e = kDelta)
+ : weight_threshold(w),
+ state_threshold(s),
+ distance(d),
+ delta(e) {}
+ private:
+ PruneOptions(); // disallow
+};
+
+// converts a script::PruneOptions into a fst::PruneOptions.
+// Notes:
+// If the original opts.distance is not NULL, a new distance will be
+// created with new; it's the client's responsibility to delete this.
+
+template<class A>
+fst::PruneOptions<A, AnyArcFilter<A> > ConvertPruneOptions(
+ const PruneOptions &opts) {
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ Weight weight_threshold = *(opts.weight_threshold.GetWeight<Weight>());
+ StateId state_threshold = opts.state_threshold;
+ vector<Weight> *distance = 0;
+
+ if (opts.distance) {
+ distance = new vector<Weight>(opts.distance->size());
+ for (unsigned i = 0; i < opts.distance->size(); ++i) {
+ (*distance)[i] = *((*opts.distance)[i].GetWeight<Weight>());
+ }
+ }
+
+ return fst::PruneOptions<A, AnyArcFilter<A> >(
+ weight_threshold, state_threshold, AnyArcFilter<A>(), distance,
+ opts.delta);
+}
+
+// 1
+typedef args::Package<MutableFstClass *, const PruneOptions &> PruneArgs1;
+
+template<class Arc>
+void Prune(PruneArgs1 *args) {
+ MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>();
+
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ fst::PruneOptions<Arc, AnyArcFilter<Arc> > opts =
+ ConvertPruneOptions<Arc>(args->arg2);
+ Prune(ofst, opts);
+ delete opts.distance;
+}
+
+// 2
+typedef args::Package<const FstClass &, MutableFstClass *,
+ const PruneOptions &> PruneArgs2;
+
+template<class Arc>
+void Prune(PruneArgs2 *args) {
+ const Fst<Arc>& ifst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+
+ fst::PruneOptions<Arc, AnyArcFilter<Arc> > opts =
+ ConvertPruneOptions<Arc>(args->arg3);
+ Prune(ifst, ofst, opts);
+ delete opts.distance;
+}
+
+// 3
+typedef args::Package<const FstClass &,
+ MutableFstClass *,
+ const WeightClass &, int64, float> PruneArgs3;
+
+template<class Arc>
+void Prune(PruneArgs3 *args) {
+ const Fst<Arc>& ifst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+ typename Arc::Weight w = *(args->arg3.GetWeight<typename Arc::Weight>());
+
+ Prune(ifst, ofst, w, args->arg4, args->arg5);
+}
+
+// 4
+typedef args::Package<MutableFstClass *, const WeightClass&,
+ int64, float> PruneArgs4;
+template<class Arc>
+void Prune(PruneArgs4 *args) {
+ MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>();
+ typename Arc::Weight w = *(args->arg2.GetWeight<typename Arc::Weight>());
+ Prune(fst, w, args->arg3, args->arg4);
+}
+
+
+// 1
+void Prune(MutableFstClass *fst, const PruneOptions &opts);
+
+// 2
+void Prune(const FstClass &ifst, MutableFstClass *fst,
+ const PruneOptions &opts);
+
+// 3
+void Prune(const FstClass &ifst, MutableFstClass *ofst,
+ const WeightClass &weight_threshold,
+ int64 state_threshold = kNoStateId,
+ float delta = kDelta);
+
+// 4
+void Prune(MutableFstClass *fst, const WeightClass& weight_threshold,
+ int64 state_threshold, float delta);
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_PRUNE_H_
diff --git a/src/include/fst/script/push.h b/src/include/fst/script/push.h
new file mode 100644
index 0000000..cebd655
--- /dev/null
+++ b/src/include/fst/script/push.h
@@ -0,0 +1,70 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_PUSH_H_
+#define FST_SCRIPT_PUSH_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/push.h>
+
+namespace fst {
+namespace script {
+
+// 1
+typedef args::Package<MutableFstClass*, ReweightType, float, bool> PushArgs1;
+
+template<class Arc>
+void Push(PushArgs1 *args) {
+ MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>();
+
+ if (args->arg2 == REWEIGHT_TO_FINAL) {
+ fst::Push(ofst, REWEIGHT_TO_FINAL, args->arg3, args->arg4);
+ } else {
+ fst::Push(ofst, REWEIGHT_TO_INITIAL, args->arg3, args->arg4);
+ }
+}
+
+// 2
+typedef args::Package<const FstClass &, MutableFstClass *, uint32,
+ ReweightType, float> PushArgs2;
+
+template<class Arc>
+void Push(PushArgs2 *args) {
+ const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+
+ if (args->arg4 == REWEIGHT_TO_FINAL) {
+ fst::Push<Arc, REWEIGHT_TO_FINAL>(ifst, ofst, args->arg3, args->arg5);
+ } else {
+ fst::Push<Arc, REWEIGHT_TO_INITIAL>(ifst, ofst, args->arg3, args->arg5);
+ }
+}
+
+// 1
+void Push(MutableFstClass *ofst, ReweightType type, float delta = kDelta,
+ bool remove_total_weight = false);
+
+// 2
+void Push(const FstClass &ifst, MutableFstClass *ofst, uint32 flags,
+ ReweightType dir, float delta);
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_PUSH_H_
diff --git a/src/include/fst/script/randequivalent.h b/src/include/fst/script/randequivalent.h
new file mode 100644
index 0000000..b929683
--- /dev/null
+++ b/src/include/fst/script/randequivalent.h
@@ -0,0 +1,105 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_RANDEQUIVALENT_H_
+#define FST_SCRIPT_RANDEQUIVALENT_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/randgen.h> // for RandArcSelection
+#include <fst/randequivalent.h>
+
+namespace fst {
+namespace script {
+
+// 1
+typedef args::Package<const FstClass&, const FstClass&,
+ int32, float, int, int> RandEquivalentInnerArgs1;
+typedef args::WithReturnValue<bool,
+ RandEquivalentInnerArgs1> RandEquivalentArgs1;
+
+template<class Arc>
+void RandEquivalent(RandEquivalentArgs1 *args) {
+ const Fst<Arc> &fst1 = *(args->args.arg1.GetFst<Arc>());
+ const Fst<Arc> &fst2 = *(args->args.arg2.GetFst<Arc>());
+
+ args->retval = RandEquivalent(fst1, fst2, args->args.arg3, args->args.arg4,
+ args->args.arg5, args->args.arg6);
+}
+
+// 2
+typedef args::Package<const FstClass &, const FstClass &, int32,
+ ssize_t, float,
+ const RandGenOptions<RandArcSelection> &>
+ RandEquivalentInnerArgs2;
+
+typedef args::WithReturnValue<bool,
+ RandEquivalentInnerArgs2> RandEquivalentArgs2;
+
+template<class Arc>
+void RandEquivalent(RandEquivalentArgs2 *args) {
+ const Fst<Arc> &fst1 = *(args->args.arg1.GetFst<Arc>());
+ const Fst<Arc> &fst2 = *(args->args.arg2.GetFst<Arc>());
+ const RandGenOptions<RandArcSelection> &opts = args->args.arg6;
+ int32 seed = args->args.arg3;
+
+ if (opts.arc_selector == UNIFORM_ARC_SELECTOR) {
+ UniformArcSelector<Arc> arc_selector(seed);
+ RandGenOptions< UniformArcSelector<Arc> >
+ ropts(arc_selector, opts.max_length, opts.npath);
+
+ args->retval = RandEquivalent(fst1, fst2, args->args.arg4,
+ args->args.arg5, ropts);
+ } else if (opts.arc_selector == FAST_LOG_PROB_ARC_SELECTOR) {
+ FastLogProbArcSelector<Arc> arc_selector(seed);
+ RandGenOptions< FastLogProbArcSelector<Arc> >
+ ropts(arc_selector, opts.max_length, opts.npath);
+
+ args->retval = RandEquivalent(fst1, fst2, args->args.arg4,
+ args->args.arg5, ropts);
+ } else {
+ LogProbArcSelector<Arc> arc_selector(seed);
+ RandGenOptions< LogProbArcSelector<Arc> >
+ ropts(arc_selector, opts.max_length, opts.npath);
+ args->retval = RandEquivalent(fst1, fst2, args->args.arg4,
+ args->args.arg5, ropts);
+ }
+}
+
+
+// 1
+bool RandEquivalent(const FstClass &fst1,
+ const FstClass &fst2,
+ int32 seed = time(0),
+ ssize_t num_paths = 1,
+ float delta = fst::kDelta,
+ int path_length = INT_MAX);
+
+// 2
+bool RandEquivalent(const FstClass &fst1,
+ const FstClass &fst2,
+ int32 seed,
+ ssize_t num_paths,
+ float delta,
+ const fst::RandGenOptions<
+ fst::script::RandArcSelection> &opts);
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_RANDEQUIVALENT_H_
diff --git a/src/include/fst/script/randgen.h b/src/include/fst/script/randgen.h
new file mode 100644
index 0000000..817f9c1
--- /dev/null
+++ b/src/include/fst/script/randgen.h
@@ -0,0 +1,76 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_RANDGEN_H_
+#define FST_SCRIPT_RANDGEN_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/randgen.h>
+
+namespace fst {
+namespace script {
+
+enum RandArcSelection {
+ UNIFORM_ARC_SELECTOR,
+ LOG_PROB_ARC_SELECTOR,
+ FAST_LOG_PROB_ARC_SELECTOR
+};
+
+typedef args::Package<const FstClass &, MutableFstClass*, int32,
+ const RandGenOptions<RandArcSelection> &> RandGenArgs;
+
+template<class Arc>
+void RandGen(RandGenArgs *args) {
+ const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+ int32 seed = args->arg3;
+ const RandGenOptions<RandArcSelection> &opts = args->arg4;
+
+ if (opts.arc_selector == UNIFORM_ARC_SELECTOR) {
+ UniformArcSelector<Arc> arc_selector(seed);
+ RandGenOptions< UniformArcSelector<Arc> >
+ ropts(arc_selector, opts.max_length,
+ opts.npath, opts.weighted);
+ RandGen(ifst, ofst, ropts);
+ } else if (opts.arc_selector == FAST_LOG_PROB_ARC_SELECTOR) {
+ FastLogProbArcSelector<Arc> arc_selector(seed);
+ RandGenOptions< FastLogProbArcSelector<Arc> >
+ ropts(arc_selector, opts.max_length,
+ opts.npath, opts.weighted);
+ RandGen(ifst, ofst, ropts);
+ } else {
+ LogProbArcSelector<Arc> arc_selector(seed);
+ RandGenOptions< LogProbArcSelector<Arc> >
+ ropts(arc_selector, opts.max_length,
+ opts.npath, opts.weighted);
+ RandGen(ifst, ofst, ropts);
+ }
+}
+
+
+// Client-facing prototype
+void RandGen(const FstClass &ifst, MutableFstClass *ofst, int32 seed = time(0),
+ const RandGenOptions<RandArcSelection> &opts =
+ fst::RandGenOptions<fst::script::RandArcSelection>(
+ fst::script::UNIFORM_ARC_SELECTOR));
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_RANDGEN_H_
diff --git a/src/include/fst/script/register.h b/src/include/fst/script/register.h
new file mode 100644
index 0000000..03e0e36
--- /dev/null
+++ b/src/include/fst/script/register.h
@@ -0,0 +1,120 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_REGISTER_H_
+#define FST_SCRIPT_REGISTER_H_
+
+#include <string>
+
+#include <fst/generic-register.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/weight-class.h>
+
+// Holds methods and classes responsible for maintaining
+// the register for FstClass arc types.
+
+namespace fst {
+namespace script {
+
+//
+// Registers for reading and converting various kinds of FST classes.
+//
+
+// This class definition is to avoid a nested class definition inside
+// the IORegistration struct.
+template<class Reader, class Creator, class Converter>
+struct FstClassRegEntry {
+ Reader reader;
+ Creator creator;
+ Converter converter;
+
+ FstClassRegEntry(Reader r, Creator cr, Converter co) :
+ reader(r), creator(cr), converter(co) { }
+ FstClassRegEntry() : reader(0), creator(0), converter(0) { }
+};
+
+template<class Reader, class Creator, class Converter>
+class FstClassIORegister
+ : public GenericRegister<string,
+ FstClassRegEntry<Reader, Creator, Converter>,
+ FstClassIORegister<Reader, Creator,
+ Converter> > {
+ public:
+ Reader GetReader(const string &arc_type) const {
+ return this->GetEntry(arc_type).reader;
+ }
+
+ Creator GetCreator(const string &arc_type) const {
+ return this->GetEntry(arc_type).creator;
+ }
+
+ Converter GetConverter(const string &arc_type) const {
+ return this->GetEntry(arc_type).converter;
+ }
+
+ protected:
+ virtual string ConvertKeyToSoFilename(
+ const string& key) const {
+ string legal_type(key);
+ ConvertToLegalCSymbol(&legal_type);
+
+ return legal_type + "-arc.so";
+ }
+};
+
+//
+// Struct containing everything needed to register a particular type
+// of FST class (e.g. a plain FstClass, or a MutableFstClass, etc)
+//
+template<class FstClassType>
+struct IORegistration {
+ typedef FstClassType *(*Reader)(istream &stream,
+ const FstReadOptions &opts);
+
+ typedef FstClassImplBase *(*Creator)();
+ typedef FstClassImplBase *(*Converter)(const FstClass &other);
+
+ typedef FstClassRegEntry<Reader, Creator, Converter> Entry;
+
+ // FST class Register
+ typedef FstClassIORegister<Reader, Creator, Converter> Register;
+
+ // FST class Register-er
+ typedef GenericRegisterer<FstClassIORegister<Reader, Creator, Converter> >
+ Registerer;
+};
+
+
+//
+// REGISTRATION MACROS
+//
+
+#define REGISTER_FST_CLASS(Class, Arc) \
+ static IORegistration<Class>::Registerer Class ## _ ## Arc ## _registerer( \
+ Arc::Type(), \
+ IORegistration<Class>::Entry(Class::Read<Arc>, \
+ Class::Create<Arc>, \
+ Class::Convert<Arc>))
+
+#define REGISTER_FST_CLASSES(Arc) \
+ REGISTER_FST_CLASS(FstClass, Arc); \
+ REGISTER_FST_CLASS(MutableFstClass, Arc); \
+ REGISTER_FST_CLASS(VectorFstClass, Arc);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_REGISTER_H_
diff --git a/src/include/fst/script/relabel.h b/src/include/fst/script/relabel.h
new file mode 100644
index 0000000..6bbb4c5
--- /dev/null
+++ b/src/include/fst/script/relabel.h
@@ -0,0 +1,102 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_RELABEL_H_
+#define FST_SCRIPT_RELABEL_H_
+
+#include <utility>
+using std::pair; using std::make_pair;
+#include <algorithm>
+#include <vector>
+using std::vector;
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/relabel.h>
+
+namespace fst {
+namespace script {
+
+// 1
+typedef args::Package<MutableFstClass *,
+ const SymbolTable *, const SymbolTable *, bool,
+ const SymbolTable *, const SymbolTable *,
+ bool> RelabelArgs1;
+
+template<class Arc>
+void Relabel(RelabelArgs1 *args) {
+ MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>();
+
+ Relabel(ofst, args->arg2, args->arg3, args->arg4,
+ args->arg5, args->arg6, args->arg7);
+}
+
+// 2
+typedef args::Package<MutableFstClass*,
+ const vector<pair<int64, int64> > &,
+ const vector<pair<int64, int64> > > RelabelArgs2;
+
+template<class Arc>
+void Relabel(RelabelArgs2 *args) {
+ MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>();
+
+ // In case int64 is not the same as Arc::Label,
+ // copy the reassignments
+ typedef typename Arc::Label Label;
+
+ vector<pair<Label, Label> > converted_ipairs(args->arg2.size());
+ copy(args->arg2.begin(), args->arg2.end(), converted_ipairs.begin());
+
+ vector<pair<Label, Label> > converted_opairs(args->arg3.size());
+ copy(args->arg3.begin(), args->arg3.end(), converted_opairs.begin());
+
+ Relabel(ofst, converted_ipairs, converted_opairs);
+}
+
+// 3
+typedef args::Package<MutableFstClass*, const SymbolTable*,
+ const SymbolTable*> RelabelArgs3;
+template<class Arc>
+void Relabel(args::Package<MutableFstClass*, const SymbolTable*,
+ const SymbolTable*> *args) {
+ MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>();
+ Relabel(fst, args->arg2, args->arg3);
+}
+
+
+// 1
+void Relabel(MutableFstClass *ofst,
+ const SymbolTable *old_isyms, const SymbolTable *relabel_isyms,
+ bool attach_new_isyms,
+ const SymbolTable *old_osyms, const SymbolTable *relabel_osyms,
+ bool attch_new_osyms);
+
+// 2
+void Relabel(MutableFstClass *ofst,
+ const vector<pair<int64, int64> > &ipairs,
+ const vector<pair<int64, int64> > &opairs);
+
+
+// 3
+void Relabel(MutableFstClass *fst,
+ const SymbolTable *new_isymbols,
+ const SymbolTable *new_osymbols);
+
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_RELABEL_H_
diff --git a/src/include/fst/script/replace.h b/src/include/fst/script/replace.h
new file mode 100644
index 0000000..5eaf5bf
--- /dev/null
+++ b/src/include/fst/script/replace.h
@@ -0,0 +1,62 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_REPLACE_H_
+#define FST_SCRIPT_REPLACE_H_
+
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/replace.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<const vector<pair<int64, const FstClass *> > &,
+ MutableFstClass *, const int64, bool> ReplaceArgs;
+
+template<class Arc>
+void Replace(ReplaceArgs *args) {
+ // Now that we know the arc type, we construct a vector of
+ // pair<real label, real fst> that the real Replace will use
+ const vector<pair<int64, const FstClass *> >& untyped_tuples =
+ args->arg1;
+
+ vector<pair<typename Arc::Label, const Fst<Arc> *> > fst_tuples(
+ untyped_tuples.size());
+
+ for (unsigned i = 0; i < untyped_tuples.size(); ++i) {
+ fst_tuples[i].first = untyped_tuples[i].first; // convert label
+ fst_tuples[i].second = untyped_tuples[i].second->GetFst<Arc>();
+ }
+
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+
+ Replace(fst_tuples, ofst, args->arg3, args->arg4);
+}
+
+void Replace(const vector<pair<int64, const FstClass *> > &tuples,
+ MutableFstClass *ofst, const int64 &root,
+ bool epsilon_on_replace = false);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_REPLACE_H_
diff --git a/src/include/fst/script/reverse.h b/src/include/fst/script/reverse.h
new file mode 100644
index 0000000..3930875
--- /dev/null
+++ b/src/include/fst/script/reverse.h
@@ -0,0 +1,42 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_REVERSE_H_
+#define FST_SCRIPT_REVERSE_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/reverse.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<const FstClass &, MutableFstClass *> ReverseArgs;
+
+template<class Arc>
+void Reverse(ReverseArgs *args) {
+ const Fst<Arc> &fst1 = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *fst2 = args->arg2->GetMutableFst<Arc>();
+
+ Reverse(fst1, fst2);
+}
+
+void Reverse(const FstClass &fst1, MutableFstClass *fst2);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_REVERSE_H_
diff --git a/src/include/fst/script/reweight.h b/src/include/fst/script/reweight.h
new file mode 100644
index 0000000..7bce839
--- /dev/null
+++ b/src/include/fst/script/reweight.h
@@ -0,0 +1,53 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_REWEIGHT_H_
+#define FST_SCRIPT_REWEIGHT_H_
+
+#include <vector>
+using std::vector;
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/weight-class.h>
+#include <fst/reweight.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<MutableFstClass *, const vector<WeightClass> &,
+ ReweightType> ReweightArgs;
+
+template<class Arc>
+void Reweight(ReweightArgs *args) {
+ MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>();
+ typedef typename Arc::Weight Weight;
+ vector<Weight> potentials(args->arg2.size());
+
+ for (unsigned i = 0; i < args->arg2.size(); ++i) {
+ potentials[i] = *(args->arg2[i].GetWeight<Weight>());
+ }
+
+ Reweight(fst, potentials, args->arg3);
+}
+
+void Reweight(MutableFstClass *fst, const vector<WeightClass> &potential,
+ ReweightType reweight_type);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_REWEIGHT_H_
diff --git a/src/include/fst/script/rmepsilon.h b/src/include/fst/script/rmepsilon.h
new file mode 100644
index 0000000..62fed03
--- /dev/null
+++ b/src/include/fst/script/rmepsilon.h
@@ -0,0 +1,211 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_RMEPSILON_H_
+#define FST_SCRIPT_RMEPSILON_H_
+
+#include <vector>
+using std::vector;
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/weight-class.h>
+#include <fst/script/shortest-distance.h> // for ShortestDistanceOptions
+#include <fst/rmepsilon.h>
+#include <fst/queue.h>
+
+// the following is necessary, or SWIG complains mightily about
+// shortestdistanceoptions not being defined before being used as a base.
+#ifdef SWIG
+%include "nlp/fst/script/shortest-distance.h"
+#endif
+
+
+namespace fst {
+namespace script {
+
+//
+// OPTIONS
+//
+
+struct RmEpsilonOptions : public fst::script::ShortestDistanceOptions {
+ bool connect;
+ WeightClass weight_threshold;
+ int64 state_threshold;
+
+ RmEpsilonOptions(QueueType qt = AUTO_QUEUE, float d = kDelta, bool c = true,
+ WeightClass w = fst::script::WeightClass::Zero(),
+ int64 n = kNoStateId)
+ : ShortestDistanceOptions(qt, EPSILON_ARC_FILTER,
+ kNoStateId, d),
+ connect(c), weight_threshold(w), state_threshold(n) { }
+};
+
+
+//
+// TEMPLATES
+//
+
+// this function takes care of transforming a script-land RmEpsilonOptions
+// into a lib-land RmEpsilonOptions
+template<class Arc>
+void RmEpsilonHelper(MutableFst<Arc> *fst,
+ vector<typename Arc::Weight> *distance,
+ const RmEpsilonOptions &opts) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ typename Arc::Weight weight_thresh =
+ *(opts.weight_threshold.GetWeight<Weight>());
+
+ switch (opts.queue_type) {
+ case AUTO_QUEUE: {
+ AutoQueue<StateId> queue(*fst, distance, EpsilonArcFilter<Arc>());
+ fst::RmEpsilonOptions<Arc, AutoQueue<StateId> > ropts(
+ &queue, opts.delta, opts.connect, weight_thresh,
+ opts.state_threshold);
+ RmEpsilon(fst, distance, ropts);
+ break;
+ }
+ case FIFO_QUEUE: {
+ FifoQueue<StateId> queue;
+ fst::RmEpsilonOptions<Arc, FifoQueue<StateId> > ropts(
+ &queue, opts.delta, opts.connect, weight_thresh,
+ opts.state_threshold);
+ RmEpsilon(fst, distance, ropts);
+ break;
+ }
+ case LIFO_QUEUE: {
+ LifoQueue<StateId> queue;
+ fst::RmEpsilonOptions<Arc, LifoQueue<StateId> > ropts(
+ &queue, opts.delta, opts.connect, weight_thresh,
+ opts.state_threshold);
+ RmEpsilon(fst, distance, ropts);
+ break;
+ }
+ case SHORTEST_FIRST_QUEUE: {
+ NaturalShortestFirstQueue<StateId, Weight> queue(*distance);
+ fst::RmEpsilonOptions<Arc, NaturalShortestFirstQueue<StateId,
+ Weight> > ropts(
+ &queue, opts.delta, opts.connect, weight_thresh,
+ opts.state_threshold);
+ RmEpsilon(fst, distance, ropts);
+ break;
+ }
+ case STATE_ORDER_QUEUE: {
+ StateOrderQueue<StateId> queue;
+ fst::RmEpsilonOptions<Arc, StateOrderQueue<StateId> > ropts(
+ &queue, opts.delta, opts.connect, weight_thresh,
+ opts.state_threshold);
+ RmEpsilon(fst, distance, ropts);
+ break;
+ }
+ case TOP_ORDER_QUEUE: {
+ TopOrderQueue<StateId> queue(*fst, EpsilonArcFilter<Arc>());
+ fst::RmEpsilonOptions<Arc, TopOrderQueue<StateId> > ropts(
+ &queue, opts.delta, opts.connect, weight_thresh,
+ opts.state_threshold);
+ RmEpsilon(fst, distance, ropts);
+ break;
+ }
+ default:
+ FSTERROR() << "Unknown or unsupported queue type: " << opts.queue_type;
+ fst->SetProperties(kError, kError);
+ }
+}
+
+// 1
+typedef args::Package<const FstClass &, MutableFstClass *,
+ bool, const RmEpsilonOptions &> RmEpsilonArgs1;
+
+template<class Arc>
+void RmEpsilon(RmEpsilonArgs1 *args) {
+ const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+ vector<typename Arc::Weight> distance;
+ bool reverse = args->arg3;
+
+ if (reverse) {
+ VectorFst<Arc> rfst;
+ Reverse(ifst, &rfst);
+ RmEpsilonHelper(&rfst, &distance, args->arg4);
+ Reverse(rfst, ofst);
+ } else {
+ *ofst = ifst;
+ }
+ RmEpsilonHelper(ofst, &distance, args->arg4);
+}
+
+// 2
+typedef args::Package<MutableFstClass *, bool,
+ const WeightClass, int64,
+ float> RmEpsilonArgs2;
+
+template<class Arc>
+void RmEpsilon(RmEpsilonArgs2 *args) {
+ MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>();
+ typename Arc::Weight w = *(args->arg3.GetWeight<typename Arc::Weight>());
+
+ RmEpsilon(fst, args->arg2, w, args->arg4, args->arg5);
+}
+
+// 3
+typedef args::Package<MutableFstClass *, vector<WeightClass> *,
+ const RmEpsilonOptions &> RmEpsilonArgs3;
+
+template<class Arc>
+void RmEpsilon(RmEpsilonArgs3 *args) {
+ MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>();
+ const RmEpsilonOptions &opts = args->arg3;
+
+ vector<typename Arc::Weight> weights;
+
+ RmEpsilonHelper(fst, &weights, opts);
+
+ // Copy the weights back
+ args->arg2->resize(weights.size());
+ for (unsigned i = 0; i < weights.size(); ++i) {
+ (*args->arg2)[i] = WeightClass(weights[i]);
+ }
+}
+
+//
+// PROTOTYPES
+//
+
+// 1
+void RmEpsilon(const FstClass &ifst, MutableFstClass *ofst,
+ bool reverse = false,
+ const RmEpsilonOptions& opts =
+ fst::script::RmEpsilonOptions());
+
+// 2
+void RmEpsilon(MutableFstClass *arc, bool connect = true,
+ const WeightClass &weight_threshold =
+ fst::script::WeightClass::Zero(),
+ int64 state_threshold = fst::kNoStateId,
+ float delta = fst::kDelta);
+
+// 3
+void RmEpsilon(MutableFstClass *fst, vector<WeightClass> *distance,
+ const RmEpsilonOptions &opts);
+
+
+} // namespace script
+} // namespace fst
+
+
+#endif // FST_SCRIPT_RMEPSILON_H_
diff --git a/src/include/fst/script/script-impl.h b/src/include/fst/script/script-impl.h
new file mode 100644
index 0000000..452c7c5
--- /dev/null
+++ b/src/include/fst/script/script-impl.h
@@ -0,0 +1,206 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+// This file defines the registration mechanism for new operations.
+// These operations are designed to enable scripts to work with FST classes
+// at a high level.
+
+// If you have a new arc type and want these operations to work with FSTs
+// with that arc type, see below for the registration steps
+// you must take.
+
+// These methods are only recommended for use in high-level scripting
+// applications. Most users should use the lower-level templated versions
+// corresponding to these.
+
+// If you have a new arc type you'd like these operations to work with,
+// use the REGISTER_FST_OPERATIONS macro defined in fstcsript.h
+
+// If you have a custom operation you'd like to define, you need four
+// components. In the following, assume you want to create a new operation
+// with the signature
+//
+// void Foo(const FstClass &ifst, MutableFstClass *ofst);
+//
+// You need:
+//
+// 1) A way to bundle the args that your new Foo operation will take, as
+// a single struct. The template structs in arg-packs.h provide a handy
+// way to do this. In Foo's case, that might look like this:
+//
+// typedef args::Package<const FstClass &,
+// MutableFstClass *> FooArgs;
+//
+// Note: this package of args is going to be passed by non-const pointer.
+//
+// 2) A function template that is able to perform Foo, given the args and
+// arc type. Yours might look like this:
+//
+// template<class Arc>
+// void Foo(FooArgs *args) {
+// // Pull out the actual, arc-templated FSTs
+// const Fst<Arc> &ifst = args->arg1.GetFst<Arc>();
+// MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+//
+// // actually perform foo on ifst and ofst...
+// }
+//
+// 3) a client-facing function for your operation. This would look like
+// the following:
+//
+// void Foo(const FstClass &ifst, MutableFstClass *ofst) {
+// // Check that the arc types of the FSTs match
+// if (!ArcTypesMatch(ifst, *ofst, "Foo")) return;
+// // package the args
+// FooArgs args(ifst, ofst);
+// // Finally, call the operation
+// Apply<Operation<FooArgs> >("Foo", ifst->ArcType(), &args);
+// }
+//
+// The Apply<> function template takes care of the link between 2 and 3,
+// provided you also have:
+//
+// 4) A registration for your new operation, on the arc types you care about.
+// This can be provided easily by the REGISTER_FST_OPERATION macro in
+// operations.h:
+//
+// REGISTER_FST_OPERATION(Foo, StdArc, FooArgs);
+// REGISTER_FST_OPERATION(Foo, MyArc, FooArgs);
+// // .. etc
+//
+//
+// That's it! Now when you call Foo(const FstClass &, MutableFstClass *),
+// it dispatches (in #3) via the Apply<> function to the correct
+// instantiation of the template function in #2.
+//
+
+
+#ifndef FST_SCRIPT_SCRIPT_IMPL_H_
+#define FST_SCRIPT_SCRIPT_IMPL_H_
+
+//
+// This file contains general-purpose templates which are used in the
+// implementation of the operations.
+//
+
+#include <utility>
+using std::pair; using std::make_pair;
+#include <string>
+
+#include <fst/script/fst-class.h>
+#include <fst/generic-register.h>
+#include <fst/script/arg-packs.h>
+
+#include <fst/types.h>
+
+namespace fst {
+namespace script {
+
+//
+// A generic register for operations with various kinds of signatures.
+// Needed since every function signature requires a new registration class.
+// The pair<string, string> is understood to be the operation name and arc
+// type; subclasses (or typedefs) need only provide the operation signature.
+//
+
+template<class OperationSignature>
+class GenericOperationRegister
+ : public GenericRegister<pair<string, string>,
+ OperationSignature,
+ GenericOperationRegister<OperationSignature> > {
+ public:
+ void RegisterOperation(const string &operation_name,
+ const string &arc_type,
+ OperationSignature op) {
+ this->SetEntry(make_pair(operation_name, arc_type), op);
+ }
+
+ OperationSignature GetOperation(
+ const string &operation_name, const string &arc_type) {
+ return this->GetEntry(make_pair(operation_name, arc_type));
+ }
+
+ protected:
+ virtual string ConvertKeyToSoFilename(
+ const pair<string, string>& key) const {
+ // Just use the old-style FST for now.
+ string legal_type(key.second); // the arc type
+ ConvertToLegalCSymbol(&legal_type);
+
+ return legal_type + "-arc.so";
+ }
+};
+
+
+// Operation package - everything you need to register a new type of operation
+
+// The ArgPack should be the type that's passed into each wrapped function -
+// for instance, it might be a struct containing all the args.
+// It's always passed by pointer, so const members should be used to enforce
+// constness where it's needed. Return values should be implemented as a
+// member of ArgPack as well.
+
+template<class ArgPack>
+struct Operation {
+ typedef ArgPack Args;
+ typedef void (*OpType)(ArgPack *args);
+
+ // The register (hash) type
+ typedef GenericOperationRegister<OpType> Register;
+
+ // The register-er type
+ typedef GenericRegisterer<Register> Registerer;
+};
+
+
+// Macro for registering new types of operations.
+
+#define REGISTER_FST_OPERATION(Op, Arc, ArgPack) \
+ static fst::script::Operation<ArgPack>::Registerer \
+ arc_dispatched_operation_ ## ArgPack ## Op ## Arc ## _registerer( \
+ make_pair(#Op, Arc::Type()), Op<Arc>)
+
+
+//
+// Template function to apply an operation by name
+//
+
+template<class OpReg>
+void Apply(const string &op_name, const string &arc_type,
+ typename OpReg::Args *args) {
+ typename OpReg::Register *reg = OpReg::Register::GetRegister();
+
+ typename OpReg::OpType op = reg->GetOperation(op_name, arc_type);
+
+ if (op == 0) {
+ FSTERROR() << "No operation found for \"" << op_name << "\" on "
+ << "arc type " << arc_type;
+ return;
+ }
+
+ op(args);
+}
+
+
+// Helper that logs to ERROR if the arc types of a and b don't match.
+// The op_name is also printed.
+bool ArcTypesMatch(const FstClass &a, const FstClass &b,
+ const string &op_name);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_SCRIPT_IMPL_H_
diff --git a/src/include/fst/script/shortest-distance.h b/src/include/fst/script/shortest-distance.h
new file mode 100644
index 0000000..5fc2976
--- /dev/null
+++ b/src/include/fst/script/shortest-distance.h
@@ -0,0 +1,250 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_SHORTEST_DISTANCE_H_
+#define FST_SCRIPT_SHORTEST_DISTANCE_H_
+
+#include <vector>
+using std::vector;
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/weight-class.h>
+#include <fst/script/prune.h> // for ArcFilterType
+#include <fst/queue.h> // for QueueType
+#include <fst/shortest-distance.h>
+
+namespace fst {
+namespace script {
+
+enum ArcFilterType { ANY_ARC_FILTER, EPSILON_ARC_FILTER,
+ INPUT_EPSILON_ARC_FILTER, OUTPUT_EPSILON_ARC_FILTER };
+
+// See nlp/fst/lib/shortest-distance.h for the template options class
+// that this one shadows
+struct ShortestDistanceOptions {
+ const QueueType queue_type;
+ const ArcFilterType arc_filter_type;
+ const int64 source;
+ const float delta;
+ const bool first_path;
+
+ ShortestDistanceOptions(QueueType qt, ArcFilterType aft, int64 s,
+ float d)
+ : queue_type(qt), arc_filter_type(aft), source(s), delta(d),
+ first_path(false) { }
+};
+
+
+
+// 1
+typedef args::Package<const FstClass &, vector<WeightClass> *,
+ const ShortestDistanceOptions &> ShortestDistanceArgs1;
+
+template<class Queue, class Arc, class ArcFilter>
+struct QueueConstructor {
+ // template<class Arc, class ArcFilter>
+ static Queue *Construct(const Fst<Arc> &,
+ const vector<typename Arc::Weight> *) {
+ return new Queue();
+ }
+};
+
+// Specializations to deal with AutoQueue, NaturalShortestFirstQueue,
+// and TopOrderQueue's different constructors
+template<class Arc, class ArcFilter>
+struct QueueConstructor<AutoQueue<typename Arc::StateId>, Arc, ArcFilter> {
+ // template<class Arc, class ArcFilter>
+ static AutoQueue<typename Arc::StateId> *Construct(
+ const Fst<Arc> &fst,
+ const vector<typename Arc::Weight> *distance) {
+ return new AutoQueue<typename Arc::StateId>(fst, distance, ArcFilter());
+ }
+};
+
+template<class Arc, class ArcFilter>
+struct QueueConstructor<NaturalShortestFirstQueue<typename Arc::StateId,
+ typename Arc::Weight>,
+ Arc, ArcFilter> {
+ // template<class Arc, class ArcFilter>
+ static NaturalShortestFirstQueue<typename Arc::StateId, typename Arc::Weight>
+ *Construct(const Fst<Arc> &fst,
+ const vector<typename Arc::Weight> *distance) {
+ return new NaturalShortestFirstQueue<typename Arc::StateId,
+ typename Arc::Weight>(*distance);
+ }
+};
+
+template<class Arc, class ArcFilter>
+struct QueueConstructor<TopOrderQueue<typename Arc::StateId>, Arc, ArcFilter> {
+ // template<class Arc, class ArcFilter>
+ static TopOrderQueue<typename Arc::StateId> *Construct(
+ const Fst<Arc> &fst, const vector<typename Arc::Weight> *weights) {
+ return new TopOrderQueue<typename Arc::StateId>(fst, ArcFilter());
+ }
+};
+
+
+template<class Arc, class Queue>
+void ShortestDistanceHelper(ShortestDistanceArgs1 *args) {
+ const Fst<Arc> &fst = *(args->arg1.GetFst<Arc>());
+ const ShortestDistanceOptions &opts = args->arg3;
+
+ vector<typename Arc::Weight> weights;
+
+ switch (opts.arc_filter_type) {
+ case ANY_ARC_FILTER: {
+ Queue *queue =
+ QueueConstructor<Queue, Arc, AnyArcFilter<Arc> >::Construct(
+ fst, &weights);
+ fst::ShortestDistanceOptions<Arc, Queue, AnyArcFilter<Arc> > sdopts(
+ queue, AnyArcFilter<Arc>(), opts.source, opts.delta);
+ ShortestDistance(fst, &weights, sdopts);
+ delete queue;
+ break;
+ }
+ case EPSILON_ARC_FILTER: {
+ Queue *queue =
+ QueueConstructor<Queue, Arc, AnyArcFilter<Arc> >::Construct(
+ fst, &weights);
+ fst::ShortestDistanceOptions<Arc, Queue,
+ EpsilonArcFilter<Arc> > sdopts(
+ queue, EpsilonArcFilter<Arc>(), opts.source, opts.delta);
+ ShortestDistance(fst, &weights, sdopts);
+ delete queue;
+ break;
+ }
+ case INPUT_EPSILON_ARC_FILTER: {
+ Queue *queue =
+ QueueConstructor<Queue, Arc, InputEpsilonArcFilter<Arc> >::Construct(
+ fst, &weights);
+ fst::ShortestDistanceOptions<Arc, Queue,
+ InputEpsilonArcFilter<Arc> > sdopts(
+ queue, InputEpsilonArcFilter<Arc>(), opts.source, opts.delta);
+ ShortestDistance(fst, &weights, sdopts);
+ delete queue;
+ break;
+ }
+ case OUTPUT_EPSILON_ARC_FILTER: {
+ Queue *queue =
+ QueueConstructor<Queue, Arc,
+ OutputEpsilonArcFilter<Arc> >::Construct(
+ fst, &weights);
+ fst::ShortestDistanceOptions<Arc, Queue,
+ OutputEpsilonArcFilter<Arc> > sdopts(
+ queue, OutputEpsilonArcFilter<Arc>(), opts.source, opts.delta);
+ ShortestDistance(fst, &weights, sdopts);
+ delete queue;
+ break;
+ }
+ }
+
+ // Copy the weights back
+ args->arg2->resize(weights.size());
+ for (unsigned i = 0; i < weights.size(); ++i) {
+ (*args->arg2)[i] = WeightClass(weights[i]);
+ }
+}
+
+template<class Arc>
+void ShortestDistance(ShortestDistanceArgs1 *args) {
+ const ShortestDistanceOptions &opts = args->arg3;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ // Must consider (opts.queue_type x opts.filter_type) options
+ switch (opts.queue_type) {
+ default:
+ FSTERROR() << "Unknown queue type." << opts.queue_type;
+
+ case AUTO_QUEUE:
+ ShortestDistanceHelper<Arc, AutoQueue<StateId> >(args);
+ return;
+
+ case FIFO_QUEUE:
+ ShortestDistanceHelper<Arc, FifoQueue<StateId> >(args);
+ return;
+
+ case LIFO_QUEUE:
+ ShortestDistanceHelper<Arc, LifoQueue<StateId> >(args);
+ return;
+
+ case SHORTEST_FIRST_QUEUE:
+ ShortestDistanceHelper<Arc,
+ NaturalShortestFirstQueue<StateId, Weight> >(args);
+ return;
+
+ case STATE_ORDER_QUEUE:
+ ShortestDistanceHelper<Arc, StateOrderQueue<StateId> >(args);
+ return;
+
+ case TOP_ORDER_QUEUE:
+ ShortestDistanceHelper<Arc, TopOrderQueue<StateId> >(args);
+ return;
+ }
+}
+
+// 2
+typedef args::Package<const FstClass&, vector<WeightClass>*,
+ bool, double> ShortestDistanceArgs2;
+
+template<class Arc>
+void ShortestDistance(ShortestDistanceArgs2 *args) {
+ const Fst<Arc> &fst = *(args->arg1.GetFst<Arc>());
+ vector<typename Arc::Weight> distance;
+
+ ShortestDistance(fst, &distance, args->arg3, args->arg4);
+
+ // convert the typed weights back into weightclass
+ vector<WeightClass> *retval = args->arg2;
+ retval->resize(distance.size());
+
+ for (unsigned i = 0; i < distance.size(); ++i) {
+ (*retval)[i] = WeightClass(distance[i]);
+ }
+}
+
+// 3
+typedef args::WithReturnValue<WeightClass,
+ const FstClass &> ShortestDistanceArgs3;
+
+template<class Arc>
+void ShortestDistance(ShortestDistanceArgs3 *args) {
+ const Fst<Arc> &fst = *(args->args.GetFst<Arc>());
+
+ args->retval = WeightClass(ShortestDistance(fst));
+}
+
+
+// 1
+void ShortestDistance(const FstClass &fst, vector<WeightClass> *distance,
+ const ShortestDistanceOptions &opts);
+
+// 2
+void ShortestDistance(const FstClass &ifst, vector<WeightClass> *distance,
+ bool reverse = false, double delta = fst::kDelta);
+
+#ifndef SWIG
+// 3
+WeightClass ShortestDistance(const FstClass &ifst);
+#endif
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_SHORTEST_DISTANCE_H_
diff --git a/src/include/fst/script/shortest-path.h b/src/include/fst/script/shortest-path.h
new file mode 100644
index 0000000..b3a3eb9
--- /dev/null
+++ b/src/include/fst/script/shortest-path.h
@@ -0,0 +1,190 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_SHORTEST_PATH_H_
+#define FST_SCRIPT_SHORTEST_PATH_H_
+
+#include <vector>
+using std::vector;
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/weight-class.h>
+#include <fst/shortest-path.h>
+#include <fst/script/shortest-distance.h> // for ShortestDistanceOptions
+
+namespace fst {
+namespace script {
+
+struct ShortestPathOptions
+ : public fst::script::ShortestDistanceOptions {
+ const size_t nshortest;
+ const bool unique;
+ const bool has_distance;
+ const bool first_path;
+ const WeightClass weight_threshold;
+ const int64 state_threshold;
+
+ ShortestPathOptions(QueueType qt, size_t n = 1,
+ bool u = false, bool hasdist = false,
+ float d = fst::kDelta, bool fp = false,
+ WeightClass w = fst::script::WeightClass::Zero(),
+ int64 s = fst::kNoStateId)
+ : ShortestDistanceOptions(qt, ANY_ARC_FILTER, kNoStateId, d),
+ nshortest(n), unique(u), has_distance(hasdist), first_path(fp),
+ weight_threshold(w), state_threshold(s) { }
+};
+
+typedef args::Package<const FstClass &, MutableFstClass *,
+ vector<WeightClass> *, const ShortestPathOptions &>
+ ShortestPathArgs1;
+
+
+template<class Arc>
+void ShortestPath(ShortestPathArgs1 *args) {
+ const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+ const ShortestPathOptions &opts = args->arg4;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef AnyArcFilter<Arc> ArcFilter;
+
+ vector<typename Arc::Weight> weights;
+ typename Arc::Weight weight_threshold =
+ *(opts.weight_threshold.GetWeight<Weight>());
+
+ switch (opts.queue_type) {
+ case AUTO_QUEUE: {
+ typedef AutoQueue<StateId> Queue;
+ Queue *queue = QueueConstructor<Queue, Arc,
+ ArcFilter>::Construct(ifst, &weights);
+ fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts(
+ queue, ArcFilter(), opts.nshortest, opts.unique,
+ opts.has_distance, opts.delta, opts.first_path,
+ weight_threshold, opts.state_threshold);
+ ShortestPath(ifst, ofst, &weights, spopts);
+ delete queue;
+ return;
+ }
+ case FIFO_QUEUE: {
+ typedef FifoQueue<StateId> Queue;
+ Queue *queue = QueueConstructor<Queue, Arc,
+ ArcFilter>::Construct(ifst, &weights);
+ fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts(
+ queue, ArcFilter(), opts.nshortest, opts.unique,
+ opts.has_distance, opts.delta, opts.first_path,
+ weight_threshold, opts.state_threshold);
+ ShortestPath(ifst, ofst, &weights, spopts);
+ delete queue;
+ return;
+ }
+ case LIFO_QUEUE: {
+ typedef LifoQueue<StateId> Queue;
+ Queue *queue = QueueConstructor<Queue, Arc,
+ ArcFilter >::Construct(ifst, &weights);
+ fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts(
+ queue, ArcFilter(), opts.nshortest, opts.unique,
+ opts.has_distance, opts.delta, opts.first_path,
+ weight_threshold, opts.state_threshold);
+ ShortestPath(ifst, ofst, &weights, spopts);
+ delete queue;
+ return;
+ }
+ case SHORTEST_FIRST_QUEUE: {
+ typedef NaturalShortestFirstQueue<StateId, Weight> Queue;
+ Queue *queue = QueueConstructor<Queue, Arc,
+ ArcFilter>::Construct(ifst, &weights);
+ fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts(
+ queue, ArcFilter(), opts.nshortest, opts.unique,
+ opts.has_distance, opts.delta, opts.first_path,
+ weight_threshold, opts.state_threshold);
+ ShortestPath(ifst, ofst, &weights, spopts);
+ delete queue;
+ return;
+ }
+ case STATE_ORDER_QUEUE: {
+ typedef StateOrderQueue<StateId> Queue;
+ Queue *queue = QueueConstructor<Queue, Arc,
+ ArcFilter>::Construct(ifst, &weights);
+ fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts(
+ queue, ArcFilter(), opts.nshortest, opts.unique,
+ opts.has_distance, opts.delta, opts.first_path,
+ weight_threshold, opts.state_threshold);
+ ShortestPath(ifst, ofst, &weights, spopts);
+ delete queue;
+ return;
+ }
+ case TOP_ORDER_QUEUE: {
+ typedef TopOrderQueue<StateId> Queue;
+ Queue *queue = QueueConstructor<Queue, Arc,
+ ArcFilter>::Construct(ifst, &weights);
+ fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts(
+ queue, ArcFilter(), opts.nshortest, opts.unique,
+ opts.has_distance, opts.delta, opts.first_path,
+ weight_threshold, opts.state_threshold);
+ ShortestPath(ifst, ofst, &weights, spopts);
+ delete queue;
+ return;
+ }
+ default:
+ FSTERROR() << "Unknown queue type: " << opts.queue_type;
+ ofst->SetProperties(kError, kError);
+ }
+
+ // Copy the weights back
+ args->arg3->resize(weights.size());
+ for (unsigned i = 0; i < weights.size(); ++i) {
+ (*args->arg3)[i] = WeightClass(weights[i]);
+ }
+}
+
+// 2
+typedef args::Package<const FstClass &, MutableFstClass *,
+ size_t, bool, bool, WeightClass,
+ int64> ShortestPathArgs2;
+
+template<class Arc>
+void ShortestPath(ShortestPathArgs2 *args) {
+ const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+ typename Arc::Weight weight_threshold =
+ *(args->arg6.GetWeight<typename Arc::Weight>());
+
+ ShortestPath(ifst, ofst, args->arg3, args->arg4, args->arg5,
+ weight_threshold, args->arg7);
+}
+
+
+// 1
+void ShortestPath(const FstClass &ifst, MutableFstClass *ofst,
+ vector<WeightClass> *distance,
+ const ShortestPathOptions &opts);
+
+
+// 2
+void ShortestPath(const FstClass &ifst, MutableFstClass *ofst,
+ size_t n = 1, bool unique = false,
+ bool first_path = false,
+ WeightClass weight_threshold =
+ fst::script::WeightClass::Zero(),
+ int64 state_threshold = fst::kNoStateId);
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_SHORTEST_PATH_H_
diff --git a/src/include/fst/script/symbols.h b/src/include/fst/script/symbols.h
new file mode 100644
index 0000000..927600a
--- /dev/null
+++ b/src/include/fst/script/symbols.h
@@ -0,0 +1,20 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_SYMBOLS_H_
+#define FST_SCRIPT_SYMBOLS_H_
+
+#endif // FST_SCRIPT_SYMBOLS_H_
diff --git a/src/include/fst/script/synchronize.h b/src/include/fst/script/synchronize.h
new file mode 100644
index 0000000..3c0c905
--- /dev/null
+++ b/src/include/fst/script/synchronize.h
@@ -0,0 +1,42 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_SYNCHRONIZE_H_
+#define FST_SCRIPT_SYNCHRONIZE_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/synchronize.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<const FstClass &, MutableFstClass *> SynchronizeArgs;
+
+template<class Arc>
+void Synchronize(SynchronizeArgs *args) {
+ const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+
+ Synchronize(ifst, ofst);
+}
+
+void Synchronize(const FstClass &ifst, MutableFstClass *ofst);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_SYNCHRONIZE_H_
diff --git a/src/include/fst/script/text-io.h b/src/include/fst/script/text-io.h
new file mode 100644
index 0000000..95cc182
--- /dev/null
+++ b/src/include/fst/script/text-io.h
@@ -0,0 +1,50 @@
+// text-io.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to work with generic WeightClass
+//
+// \file
+// Utilities for reading and writing textual strings representing
+// states, labels, and weights and files specifying label-label pairs
+// and potentials (state-weight pairs).
+//
+
+#ifndef FST_SCRIPT_TEXT_IO_H__
+#define FST_SCRIPT_TEXT_IO_H__
+
+#include <string>
+#include <vector>
+using std::vector;
+
+
+#include <iostream>
+#include <fstream>
+#include <fst/script/weight-class.h>
+
+namespace fst {
+namespace script {
+
+bool ReadPotentials(const string &weight_type,
+ const string& filename,
+ vector<WeightClass>* potential);
+
+bool WritePotentials(const string& filename,
+ const vector<WeightClass>& potential);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_TEXT_IO_H__
diff --git a/src/include/fst/script/topsort.h b/src/include/fst/script/topsort.h
new file mode 100644
index 0000000..4e27e48
--- /dev/null
+++ b/src/include/fst/script/topsort.h
@@ -0,0 +1,40 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_TOPSORT_H_
+#define FST_SCRIPT_TOPSORT_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/topsort.h>
+
+namespace fst {
+namespace script {
+
+typedef args::WithReturnValue<bool, MutableFstClass*> TopSortArgs;
+
+template<class Arc>
+void TopSort(TopSortArgs *args) {
+ MutableFst<Arc> *fst = args->args->GetMutableFst<Arc>();
+ args->retval = TopSort(fst);
+}
+
+bool TopSort(MutableFstClass *fst);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_TOPSORT_H_
diff --git a/src/include/fst/script/union.h b/src/include/fst/script/union.h
new file mode 100644
index 0000000..780e484
--- /dev/null
+++ b/src/include/fst/script/union.h
@@ -0,0 +1,42 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_UNION_H_
+#define FST_SCRIPT_UNION_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/union.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<MutableFstClass *, const FstClass &> UnionArgs;
+
+template<class Arc>
+void Union(UnionArgs *args) {
+ MutableFst<Arc> *fst1 = args->arg1->GetMutableFst<Arc>();
+ const Fst<Arc> &fst2 = *(args->arg2.GetFst<Arc>());
+
+ Union(fst1, fst2);
+}
+
+void Union(MutableFstClass *fst1, const FstClass &fst2);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_UNION_H_
diff --git a/src/include/fst/script/verify.h b/src/include/fst/script/verify.h
new file mode 100644
index 0000000..6904003
--- /dev/null
+++ b/src/include/fst/script/verify.h
@@ -0,0 +1,40 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: sorenj@google.com (Jeffrey Sorensen)
+
+#ifndef FST_SCRIPT_VERIFY_H_
+#define FST_SCRIPT_VERIFY_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/verify.h>
+
+namespace fst {
+namespace script {
+
+typedef args::WithReturnValue<bool, const FstClass *> VerifyArgs;
+
+template<class Arc>
+void Verify(VerifyArgs *args) {
+ const Fst<Arc> *fst = args->args->GetFst<Arc>();
+ args->retval = Verify(*fst);
+}
+
+bool Verify(const FstClass &fst1);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_VERIFY_H_
diff --git a/src/include/fst/script/weight-class.h b/src/include/fst/script/weight-class.h
new file mode 100644
index 0000000..5a4890f
--- /dev/null
+++ b/src/include/fst/script/weight-class.h
@@ -0,0 +1,216 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+// Represents a generic weight in an FST -- that is, represents a specific
+// type of weight underneath while hiding that type from a client.
+
+
+#ifndef FST_SCRIPT_WEIGHT_CLASS_H_
+#define FST_SCRIPT_WEIGHT_CLASS_H_
+
+#include <string>
+
+#include <fst/generic-register.h>
+#include <fst/util.h>
+
+namespace fst {
+namespace script {
+
+class WeightImplBase {
+ public:
+ virtual WeightImplBase *Copy() const = 0;
+ virtual void Print(ostream *o) const = 0;
+ virtual const string &Type() const = 0;
+ virtual string to_string() const = 0;
+ virtual bool operator == (const WeightImplBase &other) const = 0;
+ virtual ~WeightImplBase() { }
+};
+
+template<class W>
+struct WeightClassImpl : public WeightImplBase {
+ W weight;
+
+ explicit WeightClassImpl(const W& weight) : weight(weight) { }
+
+ virtual WeightClassImpl<W> *Copy() const {
+ return new WeightClassImpl<W>(weight);
+ }
+
+ virtual const string &Type() const { return W::Type(); }
+
+ virtual void Print(ostream *o) const {
+ *o << weight;
+ }
+
+ virtual string to_string() const {
+ ostringstream s;
+ s << weight;
+ return s.str();
+ }
+
+ virtual bool operator == (const WeightImplBase &other) const {
+ if (Type() != other.Type()) {
+ return false;
+ } else {
+ const WeightClassImpl<W> *typed_other =
+ static_cast<const WeightClassImpl<W> *>(&other);
+
+ return typed_other->weight == weight;
+ }
+ }
+};
+
+
+class WeightClass {
+ public:
+ WeightClass() : element_type_(ZERO), impl_(0) { }
+
+ template<class W>
+ explicit WeightClass(const W& weight)
+ : element_type_(OTHER), impl_(new WeightClassImpl<W>(weight)) { }
+
+ WeightClass(const string &weight_type, const string &weight_str);
+
+ WeightClass(const WeightClass &other) :
+ element_type_(other.element_type_),
+ impl_(other.impl_ ? other.impl_->Copy() : 0) { }
+
+ WeightClass &operator = (const WeightClass &other) {
+ if (impl_) delete impl_;
+ impl_ = other.impl_ ? other.impl_->Copy() : 0;
+ element_type_ = other.element_type_;
+ return *this;
+ }
+
+ template<class W>
+ const W* GetWeight() const;
+
+ string to_string() const {
+ switch (element_type_) {
+ case ZERO:
+ return "ZERO";
+ case ONE:
+ return "ONE";
+ default:
+ case OTHER:
+ return impl_->to_string();
+ }
+ }
+
+ bool operator == (const WeightClass &other) const {
+ return element_type_ == other.element_type_ &&
+ ((impl_ && other.impl_ && (*impl_ == *other.impl_)) ||
+ (impl_ == 0 && other.impl_ == 0));
+ }
+
+ static const WeightClass &Zero() {
+ static WeightClass w(ZERO);
+
+ return w;
+ }
+
+ static const WeightClass &One() {
+ static WeightClass w(ONE);
+
+ return w;
+ }
+
+ ~WeightClass() { if (impl_) delete impl_; }
+ private:
+ enum ElementType { ZERO, ONE, OTHER };
+ ElementType element_type_;
+
+ WeightImplBase *impl_;
+
+ explicit WeightClass(ElementType et) : element_type_(et), impl_(0) { }
+
+ friend ostream &operator << (ostream &o, const WeightClass &c);
+};
+
+template<class W>
+const W* WeightClass::GetWeight() const {
+ // We need to store zero and one as statics, because the weight type
+ // W might return them as temporaries. We're returning a pointer,
+ // and it won't do to get the address of a temporary.
+ static const W zero = W::Zero();
+ static const W one = W::One();
+
+ if (element_type_ == ZERO) {
+ return &zero;
+ } else if (element_type_ == ONE) {
+ return &one;
+ } else {
+ if (W::Type() != impl_->Type()) {
+ return NULL;
+ } else {
+ WeightClassImpl<W> *typed_impl =
+ static_cast<WeightClassImpl<W> *>(impl_);
+ return &typed_impl->weight;
+ }
+ }
+}
+
+//
+// Registration for generic weight types.
+//
+
+typedef WeightImplBase* (*StrToWeightImplBaseT)(const string &str,
+ const string &src,
+ size_t nline);
+
+template<class W>
+WeightImplBase* StrToWeightImplBase(const string &str,
+ const string &src, size_t nline) {
+ return new WeightClassImpl<W>(StrToWeight<W>(str, src, nline));
+}
+
+// The following confuses swig, and doesn't need to be wrapped anyway.
+#ifndef SWIG
+ostream& operator << (ostream &o, const WeightClass &c);
+
+class WeightClassRegister : public GenericRegister<string,
+ StrToWeightImplBaseT,
+ WeightClassRegister> {
+ protected:
+ virtual string ConvertKeyToSoFilename(const string &key) const {
+ return key + ".so";
+ }
+};
+
+typedef GenericRegisterer<WeightClassRegister> WeightClassRegisterer;
+#endif
+
+// internal version, needs to be called by wrapper in order for
+// macro args to expand
+#define REGISTER_FST_WEIGHT__(Weight, line) \
+ static WeightClassRegisterer weight_registerer ## _ ## line( \
+ Weight::Type(), \
+ StrToWeightImplBase<Weight>)
+
+// This layer is where __FILE__ and __LINE__ are expanded
+#define REGISTER_FST_WEIGHT_EXPANDER(Weight, line) \
+ REGISTER_FST_WEIGHT__(Weight, line)
+
+//
+// Macro for registering new weight types. Clients call this.
+//
+#define REGISTER_FST_WEIGHT(Weight) \
+ REGISTER_FST_WEIGHT_EXPANDER(Weight, __LINE__)
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_WEIGHT_CLASS_H_
diff --git a/src/include/fst/shortest-distance.h b/src/include/fst/shortest-distance.h
new file mode 100644
index 0000000..5d38409
--- /dev/null
+++ b/src/include/fst/shortest-distance.h
@@ -0,0 +1,347 @@
+// shortest-distance.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Functions and classes to find shortest distance in an FST.
+
+#ifndef FST_LIB_SHORTEST_DISTANCE_H__
+#define FST_LIB_SHORTEST_DISTANCE_H__
+
+#include <deque>
+#include <vector>
+using std::vector;
+
+#include <fst/arcfilter.h>
+#include <fst/cache.h>
+#include <fst/queue.h>
+#include <fst/reverse.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+template <class Arc, class Queue, class ArcFilter>
+struct ShortestDistanceOptions {
+ typedef typename Arc::StateId StateId;
+
+ Queue *state_queue; // Queue discipline used; owned by caller
+ ArcFilter arc_filter; // Arc filter (e.g., limit to only epsilon graph)
+ StateId source; // If kNoStateId, use the Fst's initial state
+ float delta; // Determines the degree of convergence required
+ bool first_path; // For a semiring with the path property (o.w.
+ // undefined), compute the shortest-distances along
+ // along the first path to a final state found
+ // by the algorithm. That path is the shortest-path
+ // only if the FST has a unique final state (or all
+ // the final states have the same final weight), the
+ // queue discipline is shortest-first and all the
+ // weights in the FST are between One() and Zero()
+ // according to NaturalLess.
+
+ ShortestDistanceOptions(Queue *q, ArcFilter filt, StateId src = kNoStateId,
+ float d = kDelta)
+ : state_queue(q), arc_filter(filt), source(src), delta(d),
+ first_path(false) {}
+};
+
+
+// Computation state of the shortest-distance algorithm. Reusable
+// information is maintained across calls to member function
+// ShortestDistance(source) when 'retain' is true for improved
+// efficiency when calling multiple times from different source states
+// (e.g., in epsilon removal). Contrary to usual conventions, 'fst'
+// may not be freed before this class. Vector 'distance' should not be
+// modified by the user between these calls.
+// The Error() method returns true if an error was encountered.
+template<class Arc, class Queue, class ArcFilter>
+class ShortestDistanceState {
+ public:
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ ShortestDistanceState(
+ const Fst<Arc> &fst,
+ vector<Weight> *distance,
+ const ShortestDistanceOptions<Arc, Queue, ArcFilter> &opts,
+ bool retain)
+ : fst_(fst), distance_(distance), state_queue_(opts.state_queue),
+ arc_filter_(opts.arc_filter), delta_(opts.delta),
+ first_path_(opts.first_path), retain_(retain), source_id_(0),
+ error_(false) {
+ distance_->clear();
+ }
+
+ ~ShortestDistanceState() {}
+
+ void ShortestDistance(StateId source);
+
+ bool Error() const { return error_; }
+
+ private:
+ const Fst<Arc> &fst_;
+ vector<Weight> *distance_;
+ Queue *state_queue_;
+ ArcFilter arc_filter_;
+ float delta_;
+ bool first_path_;
+ bool retain_; // Retain and reuse information across calls
+
+ vector<Weight> rdistance_; // Relaxation distance.
+ vector<bool> enqueued_; // Is state enqueued?
+ vector<StateId> sources_; // Source ID for ith state in 'distance_',
+ // 'rdistance_', and 'enqueued_' if retained.
+ StateId source_id_; // Unique ID characterizing each call to SD
+
+ bool error_;
+};
+
+// Compute the shortest distance. If 'source' is kNoStateId, use
+// the initial state of the Fst.
+template <class Arc, class Queue, class ArcFilter>
+void ShortestDistanceState<Arc, Queue, ArcFilter>::ShortestDistance(
+ StateId source) {
+ if (fst_.Start() == kNoStateId) {
+ if (fst_.Properties(kError, false)) error_ = true;
+ return;
+ }
+
+ if (!(Weight::Properties() & kRightSemiring)) {
+ FSTERROR() << "ShortestDistance: Weight needs to be right distributive: "
+ << Weight::Type();
+ error_ = true;
+ return;
+ }
+
+ if (first_path_ && !(Weight::Properties() & kPath)) {
+ FSTERROR() << "ShortestDistance: first_path option disallowed when "
+ << "Weight does not have the path property: "
+ << Weight::Type();
+ error_ = true;
+ return;
+ }
+
+ state_queue_->Clear();
+
+ if (!retain_) {
+ distance_->clear();
+ rdistance_.clear();
+ enqueued_.clear();
+ }
+
+ if (source == kNoStateId)
+ source = fst_.Start();
+
+ while (distance_->size() <= source) {
+ distance_->push_back(Weight::Zero());
+ rdistance_.push_back(Weight::Zero());
+ enqueued_.push_back(false);
+ }
+ if (retain_) {
+ while (sources_.size() <= source)
+ sources_.push_back(kNoStateId);
+ sources_[source] = source_id_;
+ }
+ (*distance_)[source] = Weight::One();
+ rdistance_[source] = Weight::One();
+ enqueued_[source] = true;
+
+ state_queue_->Enqueue(source);
+
+ while (!state_queue_->Empty()) {
+ StateId s = state_queue_->Head();
+ state_queue_->Dequeue();
+ while (distance_->size() <= s) {
+ distance_->push_back(Weight::Zero());
+ rdistance_.push_back(Weight::Zero());
+ enqueued_.push_back(false);
+ }
+ if (first_path_ && (fst_.Final(s) != Weight::Zero()))
+ break;
+ enqueued_[s] = false;
+ Weight r = rdistance_[s];
+ rdistance_[s] = Weight::Zero();
+ for (ArcIterator< Fst<Arc> > aiter(fst_, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const Arc &arc = aiter.Value();
+ if (!arc_filter_(arc) || arc.weight == Weight::Zero())
+ continue;
+ while (distance_->size() <= arc.nextstate) {
+ distance_->push_back(Weight::Zero());
+ rdistance_.push_back(Weight::Zero());
+ enqueued_.push_back(false);
+ }
+ if (retain_) {
+ while (sources_.size() <= arc.nextstate)
+ sources_.push_back(kNoStateId);
+ if (sources_[arc.nextstate] != source_id_) {
+ (*distance_)[arc.nextstate] = Weight::Zero();
+ rdistance_[arc.nextstate] = Weight::Zero();
+ enqueued_[arc.nextstate] = false;
+ sources_[arc.nextstate] = source_id_;
+ }
+ }
+ Weight &nd = (*distance_)[arc.nextstate];
+ Weight &nr = rdistance_[arc.nextstate];
+ Weight w = Times(r, arc.weight);
+ if (!ApproxEqual(nd, Plus(nd, w), delta_)) {
+ nd = Plus(nd, w);
+ nr = Plus(nr, w);
+ if (!nd.Member() || !nr.Member()) {
+ error_ = true;
+ return;
+ }
+ if (!enqueued_[arc.nextstate]) {
+ state_queue_->Enqueue(arc.nextstate);
+ enqueued_[arc.nextstate] = true;
+ } else {
+ state_queue_->Update(arc.nextstate);
+ }
+ }
+ }
+ }
+ ++source_id_;
+ if (fst_.Properties(kError, false)) error_ = true;
+}
+
+
+// Shortest-distance algorithm: this version allows fine control
+// via the options argument. See below for a simpler interface.
+//
+// This computes the shortest distance from the 'opts.source' state to
+// each visited state S and stores the value in the 'distance' vector.
+// An unvisited state S has distance Zero(), which will be stored in
+// the 'distance' vector if S is less than the maximum visited state.
+// The state queue discipline, arc filter, and convergence delta are
+// taken in the options argument.
+// The 'distance' vector will contain a unique element for which
+// Member() is false if an error was encountered.
+//
+// The weights must must be right distributive and k-closed (i.e., 1 +
+// x + x^2 + ... + x^(k +1) = 1 + x + x^2 + ... + x^k).
+//
+// The algorithm is from Mohri, "Semiring Framweork and Algorithms for
+// Shortest-Distance Problems", Journal of Automata, Languages and
+// Combinatorics 7(3):321-350, 2002. The complexity of algorithm
+// depends on the properties of the semiring and the queue discipline
+// used. Refer to the paper for more details.
+template<class Arc, class Queue, class ArcFilter>
+void ShortestDistance(
+ const Fst<Arc> &fst,
+ vector<typename Arc::Weight> *distance,
+ const ShortestDistanceOptions<Arc, Queue, ArcFilter> &opts) {
+
+ ShortestDistanceState<Arc, Queue, ArcFilter>
+ sd_state(fst, distance, opts, false);
+ sd_state.ShortestDistance(opts.source);
+ if (sd_state.Error()) {
+ distance->clear();
+ distance->resize(1, Arc::Weight::NoWeight());
+ }
+}
+
+// Shortest-distance algorithm: simplified interface. See above for a
+// version that allows finer control.
+//
+// If 'reverse' is false, this computes the shortest distance from the
+// initial state to each state S and stores the value in the
+// 'distance' vector. If 'reverse' is true, this computes the shortest
+// distance from each state to the final states. An unvisited state S
+// has distance Zero(), which will be stored in the 'distance' vector
+// if S is less than the maximum visited state. The state queue
+// discipline is automatically-selected.
+// The 'distance' vector will contain a unique element for which
+// Member() is false if an error was encountered.
+//
+// The weights must must be right (left) distributive if reverse is
+// false (true) and k-closed (i.e., 1 + x + x^2 + ... + x^(k +1) = 1 +
+// x + x^2 + ... + x^k).
+//
+// The algorithm is from Mohri, "Semiring Framweork and Algorithms for
+// Shortest-Distance Problems", Journal of Automata, Languages and
+// Combinatorics 7(3):321-350, 2002. The complexity of algorithm
+// depends on the properties of the semiring and the queue discipline
+// used. Refer to the paper for more details.
+template<class Arc>
+void ShortestDistance(const Fst<Arc> &fst,
+ vector<typename Arc::Weight> *distance,
+ bool reverse = false,
+ float delta = kDelta) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ if (!reverse) {
+ AnyArcFilter<Arc> arc_filter;
+ AutoQueue<StateId> state_queue(fst, distance, arc_filter);
+ ShortestDistanceOptions< Arc, AutoQueue<StateId>, AnyArcFilter<Arc> >
+ opts(&state_queue, arc_filter);
+ opts.delta = delta;
+ ShortestDistance(fst, distance, opts);
+ } else {
+ typedef ReverseArc<Arc> ReverseArc;
+ typedef typename ReverseArc::Weight ReverseWeight;
+ AnyArcFilter<ReverseArc> rarc_filter;
+ VectorFst<ReverseArc> rfst;
+ Reverse(fst, &rfst);
+ vector<ReverseWeight> rdistance;
+ AutoQueue<StateId> state_queue(rfst, &rdistance, rarc_filter);
+ ShortestDistanceOptions< ReverseArc, AutoQueue<StateId>,
+ AnyArcFilter<ReverseArc> >
+ ropts(&state_queue, rarc_filter);
+ ropts.delta = delta;
+ ShortestDistance(rfst, &rdistance, ropts);
+ distance->clear();
+ if (rdistance.size() == 1 && !rdistance[0].Member()) {
+ distance->resize(1, Arc::Weight::NoWeight());
+ return;
+ }
+ while (distance->size() < rdistance.size() - 1)
+ distance->push_back(rdistance[distance->size() + 1].Reverse());
+ }
+}
+
+
+// Return the sum of the weight of all successful paths in an FST, i.e.,
+// the shortest-distance from the initial state to the final states.
+// Returns a weight such that Member() is false if an error was encountered.
+template <class Arc>
+typename Arc::Weight ShortestDistance(const Fst<Arc> &fst, float delta = kDelta) {
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+ vector<Weight> distance;
+ if (Weight::Properties() & kRightSemiring) {
+ ShortestDistance(fst, &distance, false, delta);
+ if (distance.size() == 1 && !distance[0].Member())
+ return Arc::Weight::NoWeight();
+ Weight sum = Weight::Zero();
+ for (StateId s = 0; s < distance.size(); ++s)
+ sum = Plus(sum, Times(distance[s], fst.Final(s)));
+ return sum;
+ } else {
+ ShortestDistance(fst, &distance, true, delta);
+ StateId s = fst.Start();
+ if (distance.size() == 1 && !distance[0].Member())
+ return Arc::Weight::NoWeight();
+ return s != kNoStateId && s < distance.size() ?
+ distance[s] : Weight::Zero();
+ }
+}
+
+
+} // namespace fst
+
+#endif // FST_LIB_SHORTEST_DISTANCE_H__
diff --git a/src/include/fst/shortest-path.h b/src/include/fst/shortest-path.h
new file mode 100644
index 0000000..f12970c
--- /dev/null
+++ b/src/include/fst/shortest-path.h
@@ -0,0 +1,501 @@
+// shortest-path.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Functions to find shortest paths in an FST.
+
+#ifndef FST_LIB_SHORTEST_PATH_H__
+#define FST_LIB_SHORTEST_PATH_H__
+
+#include <functional>
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+#include <fst/determinize.h>
+#include <fst/queue.h>
+#include <fst/shortest-distance.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+template <class Arc, class Queue, class ArcFilter>
+struct ShortestPathOptions
+ : public ShortestDistanceOptions<Arc, Queue, ArcFilter> {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ size_t nshortest; // return n-shortest paths
+ bool unique; // only return paths with distinct input strings
+ bool has_distance; // distance vector already contains the
+ // shortest distance from the initial state
+ bool first_path; // Single shortest path stops after finding the first
+ // path to a final state. That path is the shortest path
+ // only when using the ShortestFirstQueue and
+ // only when all the weights in the FST are between
+ // One() and Zero() according to NaturalLess.
+ Weight weight_threshold; // pruning weight threshold.
+ StateId state_threshold; // pruning state threshold.
+
+ ShortestPathOptions(Queue *q, ArcFilter filt, size_t n = 1, bool u = false,
+ bool hasdist = false, float d = kDelta,
+ bool fp = false, Weight w = Weight::Zero(),
+ StateId s = kNoStateId)
+ : ShortestDistanceOptions<Arc, Queue, ArcFilter>(q, filt, kNoStateId, d),
+ nshortest(n), unique(u), has_distance(hasdist), first_path(fp),
+ weight_threshold(w), state_threshold(s) {}
+};
+
+
+// Shortest-path algorithm: normally not called directly; prefer
+// 'ShortestPath' below with n=1. 'ofst' contains the shortest path in
+// 'ifst'. 'distance' returns the shortest distances from the source
+// state to each state in 'ifst'. 'opts' is used to specify options
+// such as the queue discipline, the arc filter and delta.
+//
+// The shortest path is the lowest weight path w.r.t. the natural
+// semiring order.
+//
+// The weights need to be right distributive and have the path (kPath)
+// property.
+template<class Arc, class Queue, class ArcFilter>
+void SingleShortestPath(const Fst<Arc> &ifst,
+ MutableFst<Arc> *ofst,
+ vector<typename Arc::Weight> *distance,
+ ShortestPathOptions<Arc, Queue, ArcFilter> &opts) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ ofst->DeleteStates();
+ ofst->SetInputSymbols(ifst.InputSymbols());
+ ofst->SetOutputSymbols(ifst.OutputSymbols());
+
+ if (ifst.Start() == kNoStateId) {
+ if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError);
+ return;
+ }
+
+ vector<bool> enqueued;
+ vector<StateId> parent;
+ vector<Arc> arc_parent;
+
+ Queue *state_queue = opts.state_queue;
+ StateId source = opts.source == kNoStateId ? ifst.Start() : opts.source;
+ Weight f_distance = Weight::Zero();
+ StateId f_parent = kNoStateId;
+
+ distance->clear();
+ state_queue->Clear();
+ if (opts.nshortest != 1) {
+ FSTERROR() << "SingleShortestPath: for nshortest > 1, use ShortestPath"
+ << " instead";
+ ofst->SetProperties(kError, kError);
+ return;
+ }
+ if (opts.weight_threshold != Weight::Zero() ||
+ opts.state_threshold != kNoStateId) {
+ FSTERROR() <<
+ "SingleShortestPath: weight and state thresholds not applicable";
+ ofst->SetProperties(kError, kError);
+ return;
+ }
+ if ((Weight::Properties() & (kPath | kRightSemiring))
+ != (kPath | kRightSemiring)) {
+ FSTERROR() << "SingleShortestPath: Weight needs to have the path"
+ << " property and be right distributive: " << Weight::Type();
+ ofst->SetProperties(kError, kError);
+ return;
+ }
+ while (distance->size() < source) {
+ distance->push_back(Weight::Zero());
+ enqueued.push_back(false);
+ parent.push_back(kNoStateId);
+ arc_parent.push_back(Arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId));
+ }
+ distance->push_back(Weight::One());
+ parent.push_back(kNoStateId);
+ arc_parent.push_back(Arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId));
+ state_queue->Enqueue(source);
+ enqueued.push_back(true);
+
+ while (!state_queue->Empty()) {
+ StateId s = state_queue->Head();
+ state_queue->Dequeue();
+ enqueued[s] = false;
+ Weight sd = (*distance)[s];
+ if (ifst.Final(s) != Weight::Zero()) {
+ Weight w = Times(sd, ifst.Final(s));
+ if (f_distance != Plus(f_distance, w)) {
+ f_distance = Plus(f_distance, w);
+ f_parent = s;
+ }
+ if (!f_distance.Member()) {
+ ofst->SetProperties(kError, kError);
+ return;
+ }
+ if (opts.first_path)
+ break;
+ }
+ for (ArcIterator< Fst<Arc> > aiter(ifst, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const Arc &arc = aiter.Value();
+ while (distance->size() <= arc.nextstate) {
+ distance->push_back(Weight::Zero());
+ enqueued.push_back(false);
+ parent.push_back(kNoStateId);
+ arc_parent.push_back(Arc(kNoLabel, kNoLabel, Weight::Zero(),
+ kNoStateId));
+ }
+ Weight &nd = (*distance)[arc.nextstate];
+ Weight w = Times(sd, arc.weight);
+ if (nd != Plus(nd, w)) {
+ nd = Plus(nd, w);
+ if (!nd.Member()) {
+ ofst->SetProperties(kError, kError);
+ return;
+ }
+ parent[arc.nextstate] = s;
+ arc_parent[arc.nextstate] = arc;
+ if (!enqueued[arc.nextstate]) {
+ state_queue->Enqueue(arc.nextstate);
+ enqueued[arc.nextstate] = true;
+ } else {
+ state_queue->Update(arc.nextstate);
+ }
+ }
+ }
+ }
+
+ StateId s_p = kNoStateId, d_p = kNoStateId;
+ for (StateId s = f_parent, d = kNoStateId;
+ s != kNoStateId;
+ d = s, s = parent[s]) {
+ d_p = s_p;
+ s_p = ofst->AddState();
+ if (d == kNoStateId) {
+ ofst->SetFinal(s_p, ifst.Final(f_parent));
+ } else {
+ arc_parent[d].nextstate = d_p;
+ ofst->AddArc(s_p, arc_parent[d]);
+ }
+ }
+ ofst->SetStart(s_p);
+ if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError);
+ ofst->SetProperties(
+ ShortestPathProperties(ofst->Properties(kFstProperties, false)),
+ kFstProperties);
+}
+
+
+template <class S, class W>
+class ShortestPathCompare {
+ public:
+ typedef S StateId;
+ typedef W Weight;
+ typedef pair<StateId, Weight> Pair;
+
+ ShortestPathCompare(const vector<Pair>& pairs,
+ const vector<Weight>& distance,
+ StateId sfinal, float d)
+ : pairs_(pairs), distance_(distance), superfinal_(sfinal), delta_(d) {}
+
+ bool operator()(const StateId x, const StateId y) const {
+ const Pair &px = pairs_[x];
+ const Pair &py = pairs_[y];
+ Weight dx = px.first == superfinal_ ? Weight::One() :
+ px.first < distance_.size() ? distance_[px.first] : Weight::Zero();
+ Weight dy = py.first == superfinal_ ? Weight::One() :
+ py.first < distance_.size() ? distance_[py.first] : Weight::Zero();
+ Weight wx = Times(dx, px.second);
+ Weight wy = Times(dy, py.second);
+ // Penalize complete paths to ensure correct results with inexact weights.
+ // This forms a strict weak order so long as ApproxEqual(a, b) =>
+ // ApproxEqual(a, c) for all c s.t. less_(a, c) && less_(c, b).
+ if (px.first == superfinal_ && py.first != superfinal_) {
+ return less_(wy, wx) || ApproxEqual(wx, wy, delta_);
+ } else if (py.first == superfinal_ && px.first != superfinal_) {
+ return less_(wy, wx) && !ApproxEqual(wx, wy, delta_);
+ } else {
+ return less_(wy, wx);
+ }
+ }
+
+ private:
+ const vector<Pair> &pairs_;
+ const vector<Weight> &distance_;
+ StateId superfinal_;
+ float delta_;
+ NaturalLess<Weight> less_;
+};
+
+
+// N-Shortest-path algorithm: implements the core n-shortest path
+// algorithm. The output is built REVERSED. See below for versions with
+// more options and not reversed.
+//
+// 'ofst' contains the REVERSE of 'n'-shortest paths in 'ifst'.
+// 'distance' must contain the shortest distance from each state to a final
+// state in 'ifst'. 'delta' is the convergence delta.
+//
+// The n-shortest paths are the n-lowest weight paths w.r.t. the
+// natural semiring order. The single path that can be read from the
+// ith of at most n transitions leaving the initial state of 'ofst' is
+// the ith shortest path. Disregarding the initial state and initial
+// transitions, the n-shortest paths, in fact, form a tree rooted at
+// the single final state.
+//
+// The weights need to be left and right distributive (kSemiring) and
+// have the path (kPath) property.
+//
+// The algorithm is from Mohri and Riley, "An Efficient Algorithm for
+// the n-best-strings problem", ICSLP 2002. The algorithm relies on
+// the shortest-distance algorithm. There are some issues with the
+// pseudo-code as written in the paper (viz., line 11).
+//
+// IMPLEMENTATION NOTE: The input fst 'ifst' can be a delayed fst and
+// and at any state in its expansion the values of distance vector need only
+// be defined at that time for the states that are known to exist.
+template<class Arc, class RevArc>
+void NShortestPath(const Fst<RevArc> &ifst,
+ MutableFst<Arc> *ofst,
+ const vector<typename Arc::Weight> &distance,
+ size_t n,
+ float delta = kDelta,
+ typename Arc::Weight weight_threshold = Arc::Weight::Zero(),
+ typename Arc::StateId state_threshold = kNoStateId) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef pair<StateId, Weight> Pair;
+ typedef typename RevArc::Weight RevWeight;
+
+ if (n <= 0) return;
+ if ((Weight::Properties() & (kPath | kSemiring)) != (kPath | kSemiring)) {
+ FSTERROR() << "NShortestPath: Weight needs to have the "
+ << "path property and be distributive: "
+ << Weight::Type();
+ ofst->SetProperties(kError, kError);
+ return;
+ }
+ ofst->DeleteStates();
+ ofst->SetInputSymbols(ifst.InputSymbols());
+ ofst->SetOutputSymbols(ifst.OutputSymbols());
+ // Each state in 'ofst' corresponds to a path with weight w from the
+ // initial state of 'ifst' to a state s in 'ifst', that can be
+ // characterized by a pair (s,w). The vector 'pairs' maps each
+ // state in 'ofst' to the corresponding pair maps states in OFST to
+ // the corresponding pair (s,w).
+ vector<Pair> pairs;
+ // The supefinal state is denoted by -1, 'compare' knows that the
+ // distance from 'superfinal' to the final state is 'Weight::One()',
+ // hence 'distance[superfinal]' is not needed.
+ StateId superfinal = -1;
+ ShortestPathCompare<StateId, Weight>
+ compare(pairs, distance, superfinal, delta);
+ vector<StateId> heap;
+ // 'r[s + 1]', 's' state in 'fst', is the number of states in 'ofst'
+ // which corresponding pair contains 's' ,i.e. , it is number of
+ // paths computed so far to 's'. Valid for 's == -1' (superfinal).
+ vector<int> r;
+ NaturalLess<Weight> less;
+ if (ifst.Start() == kNoStateId ||
+ distance.size() <= ifst.Start() ||
+ distance[ifst.Start()] == Weight::Zero() ||
+ less(weight_threshold, Weight::One()) ||
+ state_threshold == 0) {
+ if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError);
+ return;
+ }
+ ofst->SetStart(ofst->AddState());
+ StateId final = ofst->AddState();
+ ofst->SetFinal(final, Weight::One());
+ while (pairs.size() <= final)
+ pairs.push_back(Pair(kNoStateId, Weight::Zero()));
+ pairs[final] = Pair(ifst.Start(), Weight::One());
+ heap.push_back(final);
+ Weight limit = Times(distance[ifst.Start()], weight_threshold);
+
+ while (!heap.empty()) {
+ pop_heap(heap.begin(), heap.end(), compare);
+ StateId state = heap.back();
+ Pair p = pairs[state];
+ heap.pop_back();
+ Weight d = p.first == superfinal ? Weight::One() :
+ p.first < distance.size() ? distance[p.first] : Weight::Zero();
+
+ if (less(limit, Times(d, p.second)) ||
+ (state_threshold != kNoStateId &&
+ ofst->NumStates() >= state_threshold))
+ continue;
+
+ while (r.size() <= p.first + 1) r.push_back(0);
+ ++r[p.first + 1];
+ if (p.first == superfinal)
+ ofst->AddArc(ofst->Start(), Arc(0, 0, Weight::One(), state));
+ if ((p.first == superfinal) && (r[p.first + 1] == n)) break;
+ if (r[p.first + 1] > n) continue;
+ if (p.first == superfinal) continue;
+
+ for (ArcIterator< Fst<RevArc> > aiter(ifst, p.first);
+ !aiter.Done();
+ aiter.Next()) {
+ const RevArc &rarc = aiter.Value();
+ Arc arc(rarc.ilabel, rarc.olabel, rarc.weight.Reverse(), rarc.nextstate);
+ Weight w = Times(p.second, arc.weight);
+ StateId next = ofst->AddState();
+ pairs.push_back(Pair(arc.nextstate, w));
+ arc.nextstate = state;
+ ofst->AddArc(next, arc);
+ heap.push_back(next);
+ push_heap(heap.begin(), heap.end(), compare);
+ }
+
+ Weight finalw = ifst.Final(p.first).Reverse();
+ if (finalw != Weight::Zero()) {
+ Weight w = Times(p.second, finalw);
+ StateId next = ofst->AddState();
+ pairs.push_back(Pair(superfinal, w));
+ ofst->AddArc(next, Arc(0, 0, finalw, state));
+ heap.push_back(next);
+ push_heap(heap.begin(), heap.end(), compare);
+ }
+ }
+ Connect(ofst);
+ if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError);
+ ofst->SetProperties(
+ ShortestPathProperties(ofst->Properties(kFstProperties, false)),
+ kFstProperties);
+}
+
+
+// N-Shortest-path algorithm: this version allow fine control
+// via the options argument. See below for a simpler interface.
+//
+// 'ofst' contains the n-shortest paths in 'ifst'. 'distance' returns
+// the shortest distances from the source state to each state in
+// 'ifst'. 'opts' is used to specify options such as the number of
+// paths to return, whether they need to have distinct input
+// strings, the queue discipline, the arc filter and the convergence
+// delta.
+//
+// The n-shortest paths are the n-lowest weight paths w.r.t. the
+// natural semiring order. The single path that can be read from the
+// ith of at most n transitions leaving the initial state of 'ofst' is
+// the ith shortest path. Disregarding the initial state and initial
+// transitions, The n-shortest paths, in fact, form a tree rooted at
+// the single final state.
+
+// The weights need to be right distributive and have the path (kPath)
+// property. They need to be left distributive as well for nshortest
+// > 1.
+//
+// The algorithm is from Mohri and Riley, "An Efficient Algorithm for
+// the n-best-strings problem", ICSLP 2002. The algorithm relies on
+// the shortest-distance algorithm. There are some issues with the
+// pseudo-code as written in the paper (viz., line 11).
+template<class Arc, class Queue, class ArcFilter>
+void ShortestPath(const Fst<Arc> &ifst, MutableFst<Arc> *ofst,
+ vector<typename Arc::Weight> *distance,
+ ShortestPathOptions<Arc, Queue, ArcFilter> &opts) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef ReverseArc<Arc> ReverseArc;
+
+ size_t n = opts.nshortest;
+ if (n == 1) {
+ SingleShortestPath(ifst, ofst, distance, opts);
+ return;
+ }
+ if (n <= 0) return;
+ if ((Weight::Properties() & (kPath | kSemiring)) != (kPath | kSemiring)) {
+ FSTERROR() << "ShortestPath: n-shortest: Weight needs to have the "
+ << "path property and be distributive: "
+ << Weight::Type();
+ ofst->SetProperties(kError, kError);
+ return;
+ }
+ if (!opts.has_distance) {
+ ShortestDistance(ifst, distance, opts);
+ if (distance->size() == 1 && !(*distance)[0].Member()) {
+ ofst->SetProperties(kError, kError);
+ return;
+ }
+ }
+ // Algorithm works on the reverse of 'fst' : 'rfst', 'distance' is
+ // the distance to the final state in 'rfst', 'ofst' is built as the
+ // reverse of the tree of n-shortest path in 'rfst'.
+ VectorFst<ReverseArc> rfst;
+ Reverse(ifst, &rfst);
+ Weight d = Weight::Zero();
+ for (ArcIterator< VectorFst<ReverseArc> > aiter(rfst, 0);
+ !aiter.Done(); aiter.Next()) {
+ const ReverseArc &arc = aiter.Value();
+ StateId s = arc.nextstate - 1;
+ if (s < distance->size())
+ d = Plus(d, Times(arc.weight.Reverse(), (*distance)[s]));
+ }
+ distance->insert(distance->begin(), d);
+
+ if (!opts.unique) {
+ NShortestPath(rfst, ofst, *distance, n, opts.delta,
+ opts.weight_threshold, opts.state_threshold);
+ } else {
+ vector<Weight> ddistance;
+ DeterminizeFstOptions<ReverseArc> dopts(opts.delta);
+ DeterminizeFst<ReverseArc> dfst(rfst, *distance, &ddistance, dopts);
+ NShortestPath(dfst, ofst, ddistance, n, opts.delta,
+ opts.weight_threshold, opts.state_threshold);
+ }
+ distance->erase(distance->begin());
+}
+
+
+// Shortest-path algorithm: simplified interface. See above for a
+// version that allows finer control.
+//
+// 'ofst' contains the 'n'-shortest paths in 'ifst'. The queue
+// discipline is automatically selected. When 'unique' == true, only
+// paths with distinct input labels are returned.
+//
+// The n-shortest paths are the n-lowest weight paths w.r.t. the
+// natural semiring order. The single path that can be read from the
+// ith of at most n transitions leaving the initial state of 'ofst' is
+// the ith best path.
+//
+// The weights need to be right distributive and have the path
+// (kPath) property.
+template<class Arc>
+void ShortestPath(const Fst<Arc> &ifst, MutableFst<Arc> *ofst,
+ size_t n = 1, bool unique = false,
+ bool first_path = false,
+ typename Arc::Weight weight_threshold = Arc::Weight::Zero(),
+ typename Arc::StateId state_threshold = kNoStateId) {
+ vector<typename Arc::Weight> distance;
+ AnyArcFilter<Arc> arc_filter;
+ AutoQueue<typename Arc::StateId> state_queue(ifst, &distance, arc_filter);
+ ShortestPathOptions< Arc, AutoQueue<typename Arc::StateId>,
+ AnyArcFilter<Arc> > opts(&state_queue, arc_filter, n, unique, false,
+ kDelta, first_path, weight_threshold,
+ state_threshold);
+ ShortestPath(ifst, ofst, &distance, opts);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_SHORTEST_PATH_H__
diff --git a/src/include/fst/signed-log-weight.h b/src/include/fst/signed-log-weight.h
new file mode 100644
index 0000000..da96479
--- /dev/null
+++ b/src/include/fst/signed-log-weight.h
@@ -0,0 +1,367 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: krr@google.com (Kasturi Rangan Raghavan)
+// \file
+// LogWeight along with sign information that represents the value X in the
+// linear domain as <sign(X), -ln(|X|)>
+// The sign is a TropicalWeight:
+// positive, TropicalWeight.Value() > 0.0, recommended value 1.0
+// negative, TropicalWeight.Value() <= 0.0, recommended value -1.0
+
+#ifndef FST_LIB_SIGNED_LOG_WEIGHT_H_
+#define FST_LIB_SIGNED_LOG_WEIGHT_H_
+
+#include <fst/float-weight.h>
+#include <fst/pair-weight.h>
+
+
+namespace fst {
+template <class T>
+class SignedLogWeightTpl
+ : public PairWeight<TropicalWeight, LogWeightTpl<T> > {
+ public:
+ typedef TropicalWeight X1;
+ typedef LogWeightTpl<T> X2;
+ using PairWeight<X1, X2>::Value1;
+ using PairWeight<X1, X2>::Value2;
+
+ using PairWeight<X1, X2>::Reverse;
+ using PairWeight<X1, X2>::Quantize;
+ using PairWeight<X1, X2>::Member;
+
+ typedef SignedLogWeightTpl<T> ReverseWeight;
+
+ SignedLogWeightTpl() : PairWeight<X1, X2>() {}
+
+ SignedLogWeightTpl(const SignedLogWeightTpl<T>& w)
+ : PairWeight<X1, X2> (w) { }
+
+ SignedLogWeightTpl(const PairWeight<X1, X2>& w)
+ : PairWeight<X1, X2> (w) { }
+
+ SignedLogWeightTpl(const X1& x1, const X2& x2)
+ : PairWeight<X1, X2>(x1, x2) { }
+
+ static const SignedLogWeightTpl<T> &Zero() {
+ static const SignedLogWeightTpl<T> zero(X1(1.0), X2::Zero());
+ return zero;
+ }
+
+ static const SignedLogWeightTpl<T> &One() {
+ static const SignedLogWeightTpl<T> one(X1(1.0), X2::One());
+ return one;
+ }
+
+ static const SignedLogWeightTpl<T> &NoWeight() {
+ static const SignedLogWeightTpl<T> no_weight(X1(1.0), X2::NoWeight());
+ return no_weight;
+ }
+
+ static const string &Type() {
+ static const string type = "signed_log_" + X1::Type() + "_" + X2::Type();
+ return type;
+ }
+
+ ProductWeight<X1, X2> Quantize(float delta = kDelta) const {
+ return PairWeight<X1, X2>::Quantize();
+ }
+
+ ReverseWeight Reverse() const {
+ return PairWeight<X1, X2>::Reverse();
+ }
+
+ bool Member() const {
+ return PairWeight<X1, X2>::Member();
+ }
+
+ static uint64 Properties() {
+ // not idempotent nor path
+ return kLeftSemiring | kRightSemiring | kCommutative;
+ }
+
+ size_t Hash() const {
+ size_t h1;
+ if (Value2() == X2::Zero() || Value1().Value() > 0.0)
+ h1 = TropicalWeight(1.0).Hash();
+ else
+ h1 = TropicalWeight(-1.0).Hash();
+ size_t h2 = Value2().Hash();
+ const int lshift = 5;
+ const int rshift = CHAR_BIT * sizeof(size_t) - 5;
+ return h1 << lshift ^ h1 >> rshift ^ h2;
+ }
+};
+
+template <class T>
+inline SignedLogWeightTpl<T> Plus(const SignedLogWeightTpl<T> &w1,
+ const SignedLogWeightTpl<T> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return SignedLogWeightTpl<T>::NoWeight();
+ bool s1 = w1.Value1().Value() > 0.0;
+ bool s2 = w2.Value1().Value() > 0.0;
+ T f1 = w1.Value2().Value();
+ T f2 = w2.Value2().Value();
+ if (f1 == FloatLimits<T>::kPosInfinity)
+ return w2;
+ else if (f2 == FloatLimits<T>::kPosInfinity)
+ return w1;
+ else if (f1 == f2) {
+ if (s1 == s2)
+ return SignedLogWeightTpl<T>(w1.Value1(), (f2 - log(2.0F)));
+ else
+ return SignedLogWeightTpl<T>::Zero();
+ } else if (f1 > f2) {
+ if (s1 == s2) {
+ return SignedLogWeightTpl<T>(
+ w1.Value1(), (f2 - log(1.0F + exp(f2 - f1))));
+ } else {
+ return SignedLogWeightTpl<T>(
+ w2.Value1(), (f2 - log(1.0F - exp(f2 - f1))));
+ }
+ } else {
+ if (s2 == s1) {
+ return SignedLogWeightTpl<T>(
+ w2.Value1(), (f1 - log(1.0F + exp(f1 - f2))));
+ } else {
+ return SignedLogWeightTpl<T>(
+ w1.Value1(), (f1 - log(1.0F - exp(f1 - f2))));
+ }
+ }
+}
+
+template <class T>
+inline SignedLogWeightTpl<T> Minus(const SignedLogWeightTpl<T> &w1,
+ const SignedLogWeightTpl<T> &w2) {
+ SignedLogWeightTpl<T> minus_w2(-w2.Value1().Value(), w2.Value2());
+ return Plus(w1, minus_w2);
+}
+
+template <class T>
+inline SignedLogWeightTpl<T> Times(const SignedLogWeightTpl<T> &w1,
+ const SignedLogWeightTpl<T> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return SignedLogWeightTpl<T>::NoWeight();
+ bool s1 = w1.Value1().Value() > 0.0;
+ bool s2 = w2.Value1().Value() > 0.0;
+ T f1 = w1.Value2().Value();
+ T f2 = w2.Value2().Value();
+ if (s1 == s2)
+ return SignedLogWeightTpl<T>(TropicalWeight(1.0), (f1 + f2));
+ else
+ return SignedLogWeightTpl<T>(TropicalWeight(-1.0), (f1 + f2));
+}
+
+template <class T>
+inline SignedLogWeightTpl<T> Divide(const SignedLogWeightTpl<T> &w1,
+ const SignedLogWeightTpl<T> &w2,
+ DivideType typ = DIVIDE_ANY) {
+ if (!w1.Member() || !w2.Member())
+ return SignedLogWeightTpl<T>::NoWeight();
+ bool s1 = w1.Value1().Value() > 0.0;
+ bool s2 = w2.Value1().Value() > 0.0;
+ T f1 = w1.Value2().Value();
+ T f2 = w2.Value2().Value();
+ if (f2 == FloatLimits<T>::kPosInfinity)
+ return SignedLogWeightTpl<T>(TropicalWeight(1.0),
+ FloatLimits<T>::kNumberBad);
+ else if (f1 == FloatLimits<T>::kPosInfinity)
+ return SignedLogWeightTpl<T>(TropicalWeight(1.0),
+ FloatLimits<T>::kPosInfinity);
+ else if (s1 == s2)
+ return SignedLogWeightTpl<T>(TropicalWeight(1.0), (f1 - f2));
+ else
+ return SignedLogWeightTpl<T>(TropicalWeight(-1.0), (f1 - f2));
+}
+
+template <class T>
+inline bool ApproxEqual(const SignedLogWeightTpl<T> &w1,
+ const SignedLogWeightTpl<T> &w2,
+ float delta = kDelta) {
+ bool s1 = w1.Value1().Value() > 0.0;
+ bool s2 = w2.Value1().Value() > 0.0;
+ if (s1 == s2) {
+ return ApproxEqual(w1.Value2(), w2.Value2(), delta);
+ } else {
+ return w1.Value2() == LogWeightTpl<T>::Zero()
+ && w2.Value2() == LogWeightTpl<T>::Zero();
+ }
+}
+
+template <class T>
+inline bool operator==(const SignedLogWeightTpl<T> &w1,
+ const SignedLogWeightTpl<T> &w2) {
+ bool s1 = w1.Value1().Value() > 0.0;
+ bool s2 = w2.Value1().Value() > 0.0;
+ if (s1 == s2)
+ return w1.Value2() == w2.Value2();
+ else
+ return (w1.Value2() == LogWeightTpl<T>::Zero()) &&
+ (w2.Value2() == LogWeightTpl<T>::Zero());
+}
+
+
+// Single-precision signed-log weight
+typedef SignedLogWeightTpl<float> SignedLogWeight;
+// Double-precision signed-log weight
+typedef SignedLogWeightTpl<double> SignedLog64Weight;
+
+//
+// WEIGHT CONVERTER SPECIALIZATIONS.
+//
+
+template <class W1, class W2>
+bool SignedLogConvertCheck(W1 w) {
+ if (w.Value1().Value() < 0.0) {
+ FSTERROR() << "WeightConvert: can't convert weight from \""
+ << W1::Type() << "\" to \"" << W2::Type();
+ return false;
+ }
+ return true;
+}
+
+// Convert to tropical
+template <>
+struct WeightConvert<SignedLogWeight, TropicalWeight> {
+ TropicalWeight operator()(SignedLogWeight w) const {
+ if (!SignedLogConvertCheck<SignedLogWeight, TropicalWeight>(w))
+ return TropicalWeight::NoWeight();
+ return w.Value2().Value();
+ }
+};
+
+template <>
+struct WeightConvert<SignedLog64Weight, TropicalWeight> {
+ TropicalWeight operator()(SignedLog64Weight w) const {
+ if (!SignedLogConvertCheck<SignedLog64Weight, TropicalWeight>(w))
+ return TropicalWeight::NoWeight();
+ return w.Value2().Value();
+ }
+};
+
+// Convert to log
+template <>
+struct WeightConvert<SignedLogWeight, LogWeight> {
+ LogWeight operator()(SignedLogWeight w) const {
+ if (!SignedLogConvertCheck<SignedLogWeight, LogWeight>(w))
+ return LogWeight::NoWeight();
+ return w.Value2().Value();
+ }
+};
+
+template <>
+struct WeightConvert<SignedLog64Weight, LogWeight> {
+ LogWeight operator()(SignedLog64Weight w) const {
+ if (!SignedLogConvertCheck<SignedLog64Weight, LogWeight>(w))
+ return LogWeight::NoWeight();
+ return w.Value2().Value();
+ }
+};
+
+// Convert to log64
+template <>
+struct WeightConvert<SignedLogWeight, Log64Weight> {
+ Log64Weight operator()(SignedLogWeight w) const {
+ if (!SignedLogConvertCheck<SignedLogWeight, Log64Weight>(w))
+ return Log64Weight::NoWeight();
+ return w.Value2().Value();
+ }
+};
+
+template <>
+struct WeightConvert<SignedLog64Weight, Log64Weight> {
+ Log64Weight operator()(SignedLog64Weight w) const {
+ if (!SignedLogConvertCheck<SignedLog64Weight, Log64Weight>(w))
+ return Log64Weight::NoWeight();
+ return w.Value2().Value();
+ }
+};
+
+// Convert to signed log
+template <>
+struct WeightConvert<TropicalWeight, SignedLogWeight> {
+ SignedLogWeight operator()(TropicalWeight w) const {
+ TropicalWeight x1 = 1.0;
+ LogWeight x2 = w.Value();
+ return SignedLogWeight(x1, x2);
+ }
+};
+
+template <>
+struct WeightConvert<LogWeight, SignedLogWeight> {
+ SignedLogWeight operator()(LogWeight w) const {
+ TropicalWeight x1 = 1.0;
+ LogWeight x2 = w.Value();
+ return SignedLogWeight(x1, x2);
+ }
+};
+
+template <>
+struct WeightConvert<Log64Weight, SignedLogWeight> {
+ SignedLogWeight operator()(Log64Weight w) const {
+ TropicalWeight x1 = 1.0;
+ LogWeight x2 = w.Value();
+ return SignedLogWeight(x1, x2);
+ }
+};
+
+template <>
+struct WeightConvert<SignedLog64Weight, SignedLogWeight> {
+ SignedLogWeight operator()(SignedLog64Weight w) const {
+ TropicalWeight x1 = w.Value1();
+ LogWeight x2 = w.Value2().Value();
+ return SignedLogWeight(x1, x2);
+ }
+};
+
+// Convert to signed log64
+template <>
+struct WeightConvert<TropicalWeight, SignedLog64Weight> {
+ SignedLog64Weight operator()(TropicalWeight w) const {
+ TropicalWeight x1 = 1.0;
+ Log64Weight x2 = w.Value();
+ return SignedLog64Weight(x1, x2);
+ }
+};
+
+template <>
+struct WeightConvert<LogWeight, SignedLog64Weight> {
+ SignedLog64Weight operator()(LogWeight w) const {
+ TropicalWeight x1 = 1.0;
+ Log64Weight x2 = w.Value();
+ return SignedLog64Weight(x1, x2);
+ }
+};
+
+template <>
+struct WeightConvert<Log64Weight, SignedLog64Weight> {
+ SignedLog64Weight operator()(Log64Weight w) const {
+ TropicalWeight x1 = 1.0;
+ Log64Weight x2 = w.Value();
+ return SignedLog64Weight(x1, x2);
+ }
+};
+
+template <>
+struct WeightConvert<SignedLogWeight, SignedLog64Weight> {
+ SignedLog64Weight operator()(SignedLogWeight w) const {
+ TropicalWeight x1 = w.Value1();
+ Log64Weight x2 = w.Value2().Value();
+ return SignedLog64Weight(x1, x2);
+ }
+};
+
+} // namespace fst
+
+#endif // FST_LIB_SIGNED_LOG_WEIGHT_H_
diff --git a/src/include/fst/slist.h b/src/include/fst/slist.h
new file mode 100644
index 0000000..9f94027
--- /dev/null
+++ b/src/include/fst/slist.h
@@ -0,0 +1,61 @@
+// slist.h
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Includes slist definition or defines in terms of STL list as a fallback.
+
+#ifndef FST_LIB_SLIST_H__
+#define FST_LIB_SLIST_H__
+
+#include <fst/config.h>
+
+#if !defined(__ANDROID__) && defined(HAVE___GNU_CXX__SLIST_INT_)
+
+#include <slist>
+
+namespace fst {
+
+using __gnu_cxx::slist;
+
+}
+
+#else
+
+#include <list>
+
+namespace fst {
+
+using std::list;
+
+template <typename T> class slist : public list<T> {
+ public:
+ typedef typename list<T>::iterator iterator;
+ typedef typename list<T>::const_iterator const_iterator;
+
+ using list<T>::erase;
+
+ iterator erase_after(iterator pos) {
+ iterator npos = pos;
+ erase(++npos);
+ return pos;
+ }
+};
+
+} // namespace fst
+
+#endif // HAVE___GNU_CXX__SLIST_INT_
+
+#endif // FST_LIB_SLIST_H__
diff --git a/src/include/fst/sparse-power-weight.h b/src/include/fst/sparse-power-weight.h
new file mode 100644
index 0000000..a1ff56a
--- /dev/null
+++ b/src/include/fst/sparse-power-weight.h
@@ -0,0 +1,225 @@
+// sparse-power-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: krr@google.com (Kasturi Rangan Raghavan)
+// Inspiration: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Cartesian power weight semiring operation definitions.
+// Uses SparseTupleWeight as underlying representation.
+
+#ifndef FST_LIB_SPARSE_POWER_WEIGHT_H__
+#define FST_LIB_SPARSE_POWER_WEIGHT_H__
+
+#include<string>
+
+#include <fst/sparse-tuple-weight.h>
+#include <fst/weight.h>
+
+
+namespace fst {
+
+// Below SparseTupleWeight*Mapper are used in conjunction with
+// SparseTupleWeightMap to compute the respective semiring operations
+template<class W, class K>
+struct SparseTupleWeightPlusMapper {
+ W Map(const K& k, const W& v1, const W& v2) const {
+ return Plus(v1, v2);
+ }
+};
+
+template<class W, class K>
+struct SparseTupleWeightTimesMapper {
+ W Map(const K& k, const W& v1, const W& v2) const {
+ return Times(v1, v2);
+ }
+};
+
+template<class W, class K>
+struct SparseTupleWeightDivideMapper {
+ SparseTupleWeightDivideMapper(DivideType divide_type) {
+ divide_type_ = divide_type;
+ }
+ W Map(const K& k, const W& v1, const W& v2) const {
+ return Divide(v1, v2, divide_type_);
+ }
+ DivideType divide_type_;
+};
+
+template<class W, class K>
+struct SparseTupleWeightApproxMapper {
+ SparseTupleWeightApproxMapper(float delta) { delta_ = delta; }
+ W Map(const K& k, const W& v1, const W& v2) const {
+ return ApproxEqual(v1, v2, delta_) ? W::One() : W::Zero();
+ }
+ float delta_;
+};
+
+// Sparse cartesian power semiring: W ^ n
+// Forms:
+// - a left semimodule when W is a left semiring,
+// - a right semimodule when W is a right semiring,
+// - a bisemimodule when W is a semiring,
+// the free semimodule of rank n over W
+// The Times operation is overloaded to provide the
+// left and right scalar products.
+// K is the key value type. kNoKey(-1) is reserved for internal use
+template <class W, class K = int>
+class SparsePowerWeight : public SparseTupleWeight<W, K> {
+ public:
+ using SparseTupleWeight<W, K>::Zero;
+ using SparseTupleWeight<W, K>::One;
+ using SparseTupleWeight<W, K>::NoWeight;
+ using SparseTupleWeight<W, K>::Quantize;
+ using SparseTupleWeight<W, K>::Reverse;
+
+ typedef SparsePowerWeight<typename W::ReverseWeight, K> ReverseWeight;
+
+ SparsePowerWeight() {}
+
+ SparsePowerWeight(const SparseTupleWeight<W, K> &w) :
+ SparseTupleWeight<W, K>(w) { }
+
+ template <class Iterator>
+ SparsePowerWeight(Iterator begin, Iterator end) :
+ SparseTupleWeight<W, K>(begin, end) { }
+
+ SparsePowerWeight(const K &key, const W &w) :
+ SparseTupleWeight<W, K>(key, w) { }
+
+ static const SparsePowerWeight<W, K> &Zero() {
+ static const SparsePowerWeight<W, K> zero(SparseTupleWeight<W, K>::Zero());
+ return zero;
+ }
+
+ static const SparsePowerWeight<W, K> &One() {
+ static const SparsePowerWeight<W, K> one(SparseTupleWeight<W, K>::One());
+ return one;
+ }
+
+ static const SparsePowerWeight<W, K> &NoWeight() {
+ static const SparsePowerWeight<W, K> no_weight(
+ SparseTupleWeight<W, K>::NoWeight());
+ return no_weight;
+ }
+
+ // Overide this: Overwrite the Type method to reflect the key type
+ // if using non-default key type.
+ static const string &Type() {
+ static string type;
+ if(type.empty()) {
+ type = W::Type() + "_^n";
+ if(sizeof(K) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(8 * sizeof(K), &size);
+ type += "_" + size;
+ }
+ }
+ return type;
+ }
+
+ static uint64 Properties() {
+ uint64 props = W::Properties();
+ return props & (kLeftSemiring | kRightSemiring |
+ kCommutative | kIdempotent);
+ }
+
+ SparsePowerWeight<W, K> Quantize(float delta = kDelta) const {
+ return SparseTupleWeight<W, K>::Quantize(delta);
+ }
+
+ ReverseWeight Reverse() const {
+ return SparseTupleWeight<W, K>::Reverse();
+ }
+};
+
+// Semimodule plus operation
+template <class W, class K>
+inline SparsePowerWeight<W, K> Plus(const SparsePowerWeight<W, K> &w1,
+ const SparsePowerWeight<W, K> &w2) {
+ SparsePowerWeight<W, K> ret;
+ SparseTupleWeightPlusMapper<W, K> operator_mapper;
+ SparseTupleWeightMap(&ret, w1, w2, operator_mapper);
+ return ret;
+}
+
+// Semimodule times operation
+template <class W, class K>
+inline SparsePowerWeight<W, K> Times(const SparsePowerWeight<W, K> &w1,
+ const SparsePowerWeight<W, K> &w2) {
+ SparsePowerWeight<W, K> ret;
+ SparseTupleWeightTimesMapper<W, K> operator_mapper;
+ SparseTupleWeightMap(&ret, w1, w2, operator_mapper);
+ return ret;
+}
+
+// Semimodule divide operation
+template <class W, class K>
+inline SparsePowerWeight<W, K> Divide(const SparsePowerWeight<W, K> &w1,
+ const SparsePowerWeight<W, K> &w2,
+ DivideType type = DIVIDE_ANY) {
+ SparsePowerWeight<W, K> ret;
+ SparseTupleWeightDivideMapper<W, K> operator_mapper(type);
+ SparseTupleWeightMap(&ret, w1, w2, operator_mapper);
+ return ret;
+}
+
+// Semimodule dot product
+template <class W, class K>
+inline const W& DotProduct(const SparsePowerWeight<W, K> &w1,
+ const SparsePowerWeight<W, K> &w2) {
+ const SparsePowerWeight<W, K>& product = Times(w1, w2);
+ W ret(W::Zero());
+ for (SparseTupleWeightIterator<W, K> it(product); !it.Done(); it.Next()) {
+ ret = Plus(ret, it.Value().second);
+ }
+ return ret;
+}
+
+template <class W, class K>
+inline bool ApproxEqual(const SparsePowerWeight<W, K> &w1,
+ const SparsePowerWeight<W, K> &w2,
+ float delta = kDelta) {
+ SparseTupleWeight<W, K> ret;
+ SparseTupleWeightApproxMapper<W, K> operator_mapper(kDelta);
+ SparseTupleWeightMap(&ret, w1, w2, operator_mapper);
+ return ret == SparsePowerWeight<W, K>::One();
+}
+
+template <class W, class K>
+inline SparsePowerWeight<W, K> Times(const W &k,
+ const SparsePowerWeight<W, K> &w2) {
+ SparsePowerWeight<W, K> w1(k);
+ return Times(w1, w2);
+}
+
+template <class W, class K>
+inline SparsePowerWeight<W, K> Times(const SparsePowerWeight<W, K> &w1,
+ const W &k) {
+ SparsePowerWeight<W, K> w2(k);
+ return Times(w1, w2);
+}
+
+template <class W, class K>
+inline SparsePowerWeight<W, K> Divide(const SparsePowerWeight<W, K> &w1,
+ const W &k,
+ DivideType divide_type = DIVIDE_ANY) {
+ SparsePowerWeight<W, K> w2(k);
+ return Divide(w1, w2, divide_type);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_SPARSE_POWER_WEIGHT_H__
diff --git a/src/include/fst/sparse-tuple-weight.h b/src/include/fst/sparse-tuple-weight.h
new file mode 100644
index 0000000..d316b17
--- /dev/null
+++ b/src/include/fst/sparse-tuple-weight.h
@@ -0,0 +1,640 @@
+// sparse-tuple-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: krr@google.com (Kasturi Rangan Raghavan)
+// Inspiration: allauzen@google.com (Cyril Allauzen)
+// \file
+// Sparse version of tuple-weight, based on tuple-weight.h
+// Internally stores sparse key, value pairs in linked list
+// Default value elemnt is the assumed value of unset keys
+// Internal singleton implementation that stores first key,
+// value pair as a initialized member variable to avoide
+// unnecessary allocation on heap.
+// Use SparseTupleWeightIterator to iterate through the key,value pairs
+// Note: this does NOT iterate through the default value.
+//
+// Sparse tuple weight set operation definitions.
+
+#ifndef FST_LIB_SPARSE_TUPLE_WEIGHT_H__
+#define FST_LIB_SPARSE_TUPLE_WEIGHT_H__
+
+#include<string>
+#include<list>
+#include<stack>
+#include<unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+
+#include <fst/weight.h>
+
+
+DECLARE_string(fst_weight_parentheses);
+DECLARE_string(fst_weight_separator);
+
+namespace fst {
+
+template <class W, class K> class SparseTupleWeight;
+
+template<class W, class K>
+class SparseTupleWeightIterator;
+
+template <class W, class K>
+istream &operator>>(istream &strm, SparseTupleWeight<W, K> &w);
+
+// Arbitrary dimension tuple weight, stored as a sorted linked-list
+// W is any weight class,
+// K is the key value type. kNoKey(-1) is reserved for internal use
+template <class W, class K = int>
+class SparseTupleWeight {
+ public:
+ typedef pair<K, W> Pair;
+ typedef SparseTupleWeight<typename W::ReverseWeight, K> ReverseWeight;
+
+ const static K kNoKey = -1;
+ SparseTupleWeight() {
+ Init();
+ }
+
+ template <class Iterator>
+ SparseTupleWeight(Iterator begin, Iterator end) {
+ Init();
+ // Assumes input iterator is sorted
+ for (Iterator it = begin; it != end; ++it)
+ Push(*it);
+ }
+
+
+ SparseTupleWeight(const K& key, const W &w) {
+ Init();
+ Push(key, w);
+ }
+
+ SparseTupleWeight(const W &w) {
+ Init(w);
+ }
+
+ SparseTupleWeight(const SparseTupleWeight<W, K> &w) {
+ Init(w.DefaultValue());
+ SetDefaultValue(w.DefaultValue());
+ for (SparseTupleWeightIterator<W, K> it(w); !it.Done(); it.Next()) {
+ Push(it.Value());
+ }
+ }
+
+ static const SparseTupleWeight<W, K> &Zero() {
+ static SparseTupleWeight<W, K> zero;
+ return zero;
+ }
+
+ static const SparseTupleWeight<W, K> &One() {
+ static SparseTupleWeight<W, K> one(W::One());
+ return one;
+ }
+
+ static const SparseTupleWeight<W, K> &NoWeight() {
+ static SparseTupleWeight<W, K> no_weight(W::NoWeight());
+ return no_weight;
+ }
+
+ istream &Read(istream &strm) {
+ ReadType(strm, &default_);
+ ReadType(strm, &first_);
+ return ReadType(strm, &rest_);
+ }
+
+ ostream &Write(ostream &strm) const {
+ WriteType(strm, default_);
+ WriteType(strm, first_);
+ return WriteType(strm, rest_);
+ }
+
+ SparseTupleWeight<W, K> &operator=(const SparseTupleWeight<W, K> &w) {
+ if (this == &w) return *this; // check for w = w
+ Init(w.DefaultValue());
+ for (SparseTupleWeightIterator<W, K> it(w); !it.Done(); it.Next()) {
+ Push(it.Value());
+ }
+ return *this;
+ }
+
+ bool Member() const {
+ if (!DefaultValue().Member()) return false;
+ for (SparseTupleWeightIterator<W, K> it(*this); !it.Done(); it.Next()) {
+ if (!it.Value().second.Member()) return false;
+ }
+ return true;
+ }
+
+ // Assumes H() function exists for the hash of the key value
+ size_t Hash() const {
+ uint64 h = 0;
+ std::hash<K> H;
+ for (SparseTupleWeightIterator<W, K> it(*this); !it.Done(); it.Next()) {
+ h = 5 * h + H(it.Value().first);
+ h = 13 * h + it.Value().second.Hash();
+ }
+ return size_t(h);
+ }
+
+ SparseTupleWeight<W, K> Quantize(float delta = kDelta) const {
+ SparseTupleWeight<W, K> w;
+ for (SparseTupleWeightIterator<W, K> it(*this); !it.Done(); it.Next()) {
+ w.Push(it.Value().first, it.Value().second.Quantize(delta));
+ }
+ return w;
+ }
+
+ ReverseWeight Reverse() const {
+ SparseTupleWeight<W, K> w;
+ for (SparseTupleWeightIterator<W, K> it(*this); !it.Done(); it.Next()) {
+ w.Push(it.Value().first, it.Value().second.Reverse());
+ }
+ return w;
+ }
+
+ // Common initializer among constructors.
+ void Init() {
+ Init(W::Zero());
+ }
+
+ void Init(const W& default_value) {
+ first_.first = kNoKey;
+ /* initialized to the reserved key value */
+ default_ = default_value;
+ rest_.clear();
+ }
+
+ size_t Size() const {
+ if (first_.first == kNoKey)
+ return 0;
+ else
+ return rest_.size() + 1;
+ }
+
+ inline void Push(const K &k, const W &w, bool default_value_check = true) {
+ Push(make_pair(k, w), default_value_check);
+ }
+
+ inline void Push(const Pair &p, bool default_value_check = true) {
+ if (default_value_check && p.second == default_) return;
+ if (first_.first == kNoKey) {
+ first_ = p;
+ } else {
+ rest_.push_back(p);
+ }
+ }
+
+ void SetDefaultValue(const W& val) { default_ = val; }
+
+ const W& DefaultValue() const { return default_; }
+
+ protected:
+ static istream& ReadNoParen(
+ istream&, SparseTupleWeight<W, K>&, char separator);
+
+ static istream& ReadWithParen(
+ istream&, SparseTupleWeight<W, K>&,
+ char separator, char open_paren, char close_paren);
+
+ private:
+ // Assumed default value of uninitialized keys, by default W::Zero()
+ W default_;
+
+ // Key values pairs are first stored in first_, then fill rest_
+ // this way we can avoid dynamic allocation in the common case
+ // where the weight is a single key,val pair.
+ Pair first_;
+ list<Pair> rest_;
+
+ friend istream &operator>><W, K>(istream&, SparseTupleWeight<W, K>&);
+ friend class SparseTupleWeightIterator<W, K>;
+};
+
+template<class W, class K>
+class SparseTupleWeightIterator {
+ public:
+ typedef typename SparseTupleWeight<W, K>::Pair Pair;
+ typedef typename list<Pair>::const_iterator const_iterator;
+ typedef typename list<Pair>::iterator iterator;
+
+ explicit SparseTupleWeightIterator(const SparseTupleWeight<W, K>& w)
+ : first_(w.first_), rest_(w.rest_), init_(true),
+ iter_(rest_.begin()) {}
+
+ bool Done() const {
+ if (init_)
+ return first_.first == SparseTupleWeight<W, K>::kNoKey;
+ else
+ return iter_ == rest_.end();
+ }
+
+ const Pair& Value() const { return init_ ? first_ : *iter_; }
+
+ void Next() {
+ if (init_)
+ init_ = false;
+ else
+ ++iter_;
+ }
+
+ void Reset() {
+ init_ = true;
+ iter_ = rest_.begin();
+ }
+
+ private:
+ const Pair &first_;
+ const list<Pair> & rest_;
+ bool init_; // in the initialized state?
+ typename list<Pair>::const_iterator iter_;
+
+ DISALLOW_COPY_AND_ASSIGN(SparseTupleWeightIterator);
+};
+
+template<class W, class K, class M>
+inline void SparseTupleWeightMap(
+ SparseTupleWeight<W, K>* ret,
+ const SparseTupleWeight<W, K>& w1,
+ const SparseTupleWeight<W, K>& w2,
+ const M& operator_mapper) {
+ SparseTupleWeightIterator<W, K> w1_it(w1);
+ SparseTupleWeightIterator<W, K> w2_it(w2);
+ const W& v1_def = w1.DefaultValue();
+ const W& v2_def = w2.DefaultValue();
+ ret->SetDefaultValue(operator_mapper.Map(0, v1_def, v2_def));
+ while (!w1_it.Done() || !w2_it.Done()) {
+ const K& k1 = (w1_it.Done()) ? w2_it.Value().first : w1_it.Value().first;
+ const K& k2 = (w2_it.Done()) ? w1_it.Value().first : w2_it.Value().first;
+ const W& v1 = (w1_it.Done()) ? v1_def : w1_it.Value().second;
+ const W& v2 = (w2_it.Done()) ? v2_def : w2_it.Value().second;
+ if (k1 == k2) {
+ ret->Push(k1, operator_mapper.Map(k1, v1, v2));
+ if (!w1_it.Done()) w1_it.Next();
+ if (!w2_it.Done()) w2_it.Next();
+ } else if (k1 < k2) {
+ ret->Push(k1, operator_mapper.Map(k1, v1, v2_def));
+ w1_it.Next();
+ } else {
+ ret->Push(k2, operator_mapper.Map(k2, v1_def, v2));
+ w2_it.Next();
+ }
+ }
+}
+
+template <class W, class K>
+inline bool operator==(const SparseTupleWeight<W, K> &w1,
+ const SparseTupleWeight<W, K> &w2) {
+ const W& v1_def = w1.DefaultValue();
+ const W& v2_def = w2.DefaultValue();
+ if (v1_def != v2_def) return false;
+
+ SparseTupleWeightIterator<W, K> w1_it(w1);
+ SparseTupleWeightIterator<W, K> w2_it(w2);
+ while (!w1_it.Done() || !w2_it.Done()) {
+ const K& k1 = (w1_it.Done()) ? w2_it.Value().first : w1_it.Value().first;
+ const K& k2 = (w2_it.Done()) ? w1_it.Value().first : w2_it.Value().first;
+ const W& v1 = (w1_it.Done()) ? v1_def : w1_it.Value().second;
+ const W& v2 = (w2_it.Done()) ? v2_def : w2_it.Value().second;
+ if (k1 == k2) {
+ if (v1 != v2) return false;
+ if (!w1_it.Done()) w1_it.Next();
+ if (!w2_it.Done()) w2_it.Next();
+ } else if (k1 < k2) {
+ if (v1 != v2_def) return false;
+ w1_it.Next();
+ } else {
+ if (v1_def != v2) return false;
+ w2_it.Next();
+ }
+ }
+ return true;
+}
+
+template <class W, class K>
+inline bool operator!=(const SparseTupleWeight<W, K> &w1,
+ const SparseTupleWeight<W, K> &w2) {
+ return !(w1 == w2);
+}
+
+template <class W, class K>
+inline ostream &operator<<(ostream &strm, const SparseTupleWeight<W, K> &w) {
+ if(FLAGS_fst_weight_separator.size() != 1) {
+ FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ char separator = FLAGS_fst_weight_separator[0];
+ bool write_parens = false;
+ if (!FLAGS_fst_weight_parentheses.empty()) {
+ if (FLAGS_fst_weight_parentheses.size() != 2) {
+ FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ write_parens = true;
+ }
+
+ if (write_parens)
+ strm << FLAGS_fst_weight_parentheses[0];
+
+ strm << w.DefaultValue();
+ strm << separator;
+
+ size_t n = w.Size();
+ strm << n;
+ strm << separator;
+
+ for (SparseTupleWeightIterator<W, K> it(w); !it.Done(); it.Next()) {
+ strm << it.Value().first;
+ strm << separator;
+ strm << it.Value().second;
+ strm << separator;
+ }
+
+ if (write_parens)
+ strm << FLAGS_fst_weight_parentheses[1];
+
+ return strm;
+}
+
+template <class W, class K>
+inline istream &operator>>(istream &strm, SparseTupleWeight<W, K> &w) {
+ if(FLAGS_fst_weight_separator.size() != 1) {
+ FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ char separator = FLAGS_fst_weight_separator[0];
+
+ if (!FLAGS_fst_weight_parentheses.empty()) {
+ if (FLAGS_fst_weight_parentheses.size() != 2) {
+ FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ return SparseTupleWeight<W, K>::ReadWithParen(
+ strm, w, separator, FLAGS_fst_weight_parentheses[0],
+ FLAGS_fst_weight_parentheses[1]);
+ } else {
+ return SparseTupleWeight<W, K>::ReadNoParen(strm, w, separator);
+ }
+}
+
+// Reads SparseTupleWeight when there are no parentheses around tuple terms
+template <class W, class K>
+inline istream& SparseTupleWeight<W, K>::ReadNoParen(
+ istream &strm,
+ SparseTupleWeight<W, K> &w,
+ char separator) {
+ int c;
+ size_t n;
+
+ do {
+ c = strm.get();
+ } while (isspace(c));
+
+
+ { // Read default weight
+ W default_value;
+ string s;
+ while (c != separator) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s += c;
+ c = strm.get();
+ }
+ istringstream sstrm(s);
+ sstrm >> default_value;
+ w.SetDefaultValue(default_value);
+ }
+
+ c = strm.get();
+
+ { // Read n
+ string s;
+ while (c != separator) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s += c;
+ c = strm.get();
+ }
+ istringstream sstrm(s);
+ sstrm >> n;
+ }
+
+ // Read n elements
+ for (size_t i = 0; i < n; ++i) {
+ // discard separator
+ c = strm.get();
+ K p;
+ W r;
+
+ { // read key
+ string s;
+ while (c != separator) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s += c;
+ c = strm.get();
+ }
+ istringstream sstrm(s);
+ sstrm >> p;
+ }
+
+ c = strm.get();
+
+ { // read weight
+ string s;
+ while (c != separator) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s += c;
+ c = strm.get();
+ }
+ istringstream sstrm(s);
+ sstrm >> r;
+ }
+
+ w.Push(p, r);
+ }
+
+ c = strm.get();
+ if (c != separator) {
+ strm.clear(std::ios::badbit);
+ }
+
+ return strm;
+}
+
+// Reads SparseTupleWeight when there are parentheses around tuple terms
+template <class W, class K>
+inline istream& SparseTupleWeight<W, K>::ReadWithParen(
+ istream &strm,
+ SparseTupleWeight<W, K> &w,
+ char separator,
+ char open_paren,
+ char close_paren) {
+ int c;
+ size_t n;
+
+ do {
+ c = strm.get();
+ } while (isspace(c));
+
+ if (c != open_paren) {
+ FSTERROR() << "is fst_weight_parentheses flag set correcty? ";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+
+ c = strm.get();
+
+ { // Read weight
+ W default_value;
+ stack<int> parens;
+ string s;
+ while (c != separator || !parens.empty()) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s += c;
+ // If parens encountered before separator, they must be matched
+ if (c == open_paren) {
+ parens.push(1);
+ } else if (c == close_paren) {
+ // Fail for mismatched parens
+ if (parens.empty()) {
+ strm.clear(std::ios::failbit);
+ return strm;
+ }
+ parens.pop();
+ }
+ c = strm.get();
+ }
+ istringstream sstrm(s);
+ sstrm >> default_value;
+ w.SetDefaultValue(default_value);
+ }
+
+ c = strm.get();
+
+ { // Read n
+ string s;
+ while (c != separator) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s += c;
+ c = strm.get();
+ }
+ istringstream sstrm(s);
+ sstrm >> n;
+ }
+
+ // Read n elements
+ for (size_t i = 0; i < n; ++i) {
+ // discard separator
+ c = strm.get();
+ K p;
+ W r;
+
+ { // Read key
+ stack<int> parens;
+ string s;
+ while (c != separator || !parens.empty()) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s += c;
+ // If parens encountered before separator, they must be matched
+ if (c == open_paren) {
+ parens.push(1);
+ } else if (c == close_paren) {
+ // Fail for mismatched parens
+ if (parens.empty()) {
+ strm.clear(std::ios::failbit);
+ return strm;
+ }
+ parens.pop();
+ }
+ c = strm.get();
+ }
+ istringstream sstrm(s);
+ sstrm >> p;
+ }
+
+ c = strm.get();
+
+ { // Read weight
+ stack<int> parens;
+ string s;
+ while (c != separator || !parens.empty()) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s += c;
+ // If parens encountered before separator, they must be matched
+ if (c == open_paren) {
+ parens.push(1);
+ } else if (c == close_paren) {
+ // Fail for mismatched parens
+ if (parens.empty()) {
+ strm.clear(std::ios::failbit);
+ return strm;
+ }
+ parens.pop();
+ }
+ c = strm.get();
+ }
+ istringstream sstrm(s);
+ sstrm >> r;
+ }
+
+ w.Push(p, r);
+ }
+
+ if (c != separator) {
+ FSTERROR() << " separator expected, not found! ";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+
+ c = strm.get();
+ if (c != close_paren) {
+ FSTERROR() << " is fst_weight_parentheses flag set correcty? ";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+
+ return strm;
+}
+
+
+
+} // namespace fst
+
+#endif // FST_LIB_SPARSE_TUPLE_WEIGHT_H__
diff --git a/src/include/fst/state-map.h b/src/include/fst/state-map.h
new file mode 100644
index 0000000..ace4a3c
--- /dev/null
+++ b/src/include/fst/state-map.h
@@ -0,0 +1,601 @@
+// map.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to map over/transform states e.g., sort transitions
+// Consider using when operation does not change the number of states.
+
+#ifndef FST_LIB_STATE_MAP_H__
+#define FST_LIB_STATE_MAP_H__
+
+#include <algorithm>
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <string>
+#include <utility>
+using std::pair; using std::make_pair;
+
+#include <fst/cache.h>
+#include <fst/arc-map.h>
+#include <fst/mutable-fst.h>
+
+
+namespace fst {
+
+// StateMapper Interface - class determinies how states are mapped.
+// Useful for implementing operations that do not change the number of states.
+//
+// class StateMapper {
+// public:
+// typedef A FromArc;
+// typedef B ToArc;
+//
+// // Typical constructor
+// StateMapper(const Fst<A> &fst);
+// // Required copy constructor that allows updating Fst argument;
+// // pass only if relevant and changed.
+// StateMapper(const StateMapper &mapper, const Fst<A> *fst = 0);
+//
+// // Specifies initial state of result
+// B::StateId Start() const;
+// // Specifies state's final weight in result
+// B::Weight Final(B::StateId s) const;
+//
+// // These methods iterate through a state's arcs in result
+// // Specifies state to iterate over
+// void SetState(B::StateId s);
+// // End of arcs?
+// bool Done() const;
+// // Current arc
+
+// const B &Value() const;
+// // Advance to next arc (when !Done)
+// void Next();
+//
+// // Specifies input symbol table action the mapper requires (see above).
+// MapSymbolsAction InputSymbolsAction() const;
+// // Specifies output symbol table action the mapper requires (see above).
+// MapSymbolsAction OutputSymbolsAction() const;
+// // This specifies the known properties of an Fst mapped by this
+// // mapper. It takes as argument the input Fst's known properties.
+// uint64 Properties(uint64 props) const;
+// };
+//
+// We include a various state map versions below. One dimension of
+// variation is whether the mapping mutates its input, writes to a
+// new result Fst, or is an on-the-fly Fst. Another dimension is how
+// we pass the mapper. We allow passing the mapper by pointer
+// for cases that we need to change the state of the user's mapper.
+// We also include map versions that pass the mapper
+// by value or const reference when this suffices.
+
+// Maps an arc type A using a mapper function object C, passed
+// by pointer. This version modifies its Fst input.
+template<class A, class C>
+void StateMap(MutableFst<A> *fst, C* mapper) {
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ fst->SetInputSymbols(0);
+
+ if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ fst->SetOutputSymbols(0);
+
+ if (fst->Start() == kNoStateId)
+ return;
+
+ uint64 props = fst->Properties(kFstProperties, false);
+
+ fst->SetStart(mapper->Start());
+
+ for (StateId s = 0; s < fst->NumStates(); ++s) {
+ mapper->SetState(s);
+ fst->DeleteArcs(s);
+ for (; !mapper->Done(); mapper->Next())
+ fst->AddArc(s, mapper->Value());
+ fst->SetFinal(s, mapper->Final(s));
+ }
+
+ fst->SetProperties(mapper->Properties(props), kFstProperties);
+}
+
+// Maps an arc type A using a mapper function object C, passed
+// by value. This version modifies its Fst input.
+template<class A, class C>
+void StateMap(MutableFst<A> *fst, C mapper) {
+ StateMap(fst, &mapper);
+}
+
+
+// Maps an arc type A to an arc type B using mapper function
+// object C, passed by pointer. This version writes the mapped
+// input Fst to an output MutableFst.
+template<class A, class B, class C>
+void StateMap(const Fst<A> &ifst, MutableFst<B> *ofst, C* mapper) {
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ ofst->DeleteStates();
+
+ if (mapper->InputSymbolsAction() == MAP_COPY_SYMBOLS)
+ ofst->SetInputSymbols(ifst.InputSymbols());
+ else if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ ofst->SetInputSymbols(0);
+
+ if (mapper->OutputSymbolsAction() == MAP_COPY_SYMBOLS)
+ ofst->SetOutputSymbols(ifst.OutputSymbols());
+ else if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ ofst->SetOutputSymbols(0);
+
+ uint64 iprops = ifst.Properties(kCopyProperties, false);
+
+ if (ifst.Start() == kNoStateId) {
+ if (iprops & kError) ofst->SetProperties(kError, kError);
+ return;
+ }
+
+ // Add all states.
+ if (ifst.Properties(kExpanded, false))
+ ofst->ReserveStates(CountStates(ifst));
+ for (StateIterator< Fst<A> > siter(ifst); !siter.Done(); siter.Next())
+ ofst->AddState();
+
+ ofst->SetStart(mapper->Start());
+
+ for (StateIterator< Fst<A> > siter(ifst); !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ mapper->SetState(s);
+ for (; !mapper->Done(); mapper->Next())
+ ofst->AddArc(s, mapper->Value());
+ ofst->SetFinal(s, mapper->Final(s));
+ }
+
+ uint64 oprops = ofst->Properties(kFstProperties, false);
+ ofst->SetProperties(mapper->Properties(iprops) | oprops, kFstProperties);
+}
+
+// Maps an arc type A to an arc type B using mapper function
+// object C, passed by value. This version writes the mapped input
+// Fst to an output MutableFst.
+template<class A, class B, class C>
+void StateMap(const Fst<A> &ifst, MutableFst<B> *ofst, C mapper) {
+ StateMap(ifst, ofst, &mapper);
+}
+
+typedef CacheOptions StateMapFstOptions;
+
+template <class A, class B, class C> class StateMapFst;
+
+// Implementation of delayed StateMapFst.
+template <class A, class B, class C>
+class StateMapFstImpl : public CacheImpl<B> {
+ public:
+ using FstImpl<B>::SetType;
+ using FstImpl<B>::SetProperties;
+ using FstImpl<B>::SetInputSymbols;
+ using FstImpl<B>::SetOutputSymbols;
+
+ using VectorFstBaseImpl<typename CacheImpl<B>::State>::NumStates;
+
+ using CacheImpl<B>::PushArc;
+ using CacheImpl<B>::HasArcs;
+ using CacheImpl<B>::HasFinal;
+ using CacheImpl<B>::HasStart;
+ using CacheImpl<B>::SetArcs;
+ using CacheImpl<B>::SetFinal;
+ using CacheImpl<B>::SetStart;
+
+ friend class StateIterator< StateMapFst<A, B, C> >;
+
+ typedef B Arc;
+ typedef typename B::Weight Weight;
+ typedef typename B::StateId StateId;
+
+ StateMapFstImpl(const Fst<A> &fst, const C &mapper,
+ const StateMapFstOptions& opts)
+ : CacheImpl<B>(opts),
+ fst_(fst.Copy()),
+ mapper_(new C(mapper, fst_)),
+ own_mapper_(true) {
+ Init();
+ }
+
+ StateMapFstImpl(const Fst<A> &fst, C *mapper,
+ const StateMapFstOptions& opts)
+ : CacheImpl<B>(opts),
+ fst_(fst.Copy()),
+ mapper_(mapper),
+ own_mapper_(false) {
+ Init();
+ }
+
+ StateMapFstImpl(const StateMapFstImpl<A, B, C> &impl)
+ : CacheImpl<B>(impl),
+ fst_(impl.fst_->Copy(true)),
+ mapper_(new C(*impl.mapper_, fst_)),
+ own_mapper_(true) {
+ Init();
+ }
+
+ ~StateMapFstImpl() {
+ delete fst_;
+ if (own_mapper_) delete mapper_;
+ }
+
+ StateId Start() {
+ if (!HasStart())
+ SetStart(mapper_->Start());
+ return CacheImpl<B>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s))
+ SetFinal(s, mapper_->Final(s));
+ return CacheImpl<B>::Final(s);
+ }
+
+ size_t NumArcs(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<B>::NumArcs(s);
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<B>::NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<B>::NumOutputEpsilons(s);
+ }
+
+ void InitStateIterator(StateIteratorData<A> *data) const {
+ fst_->InitStateIterator(data);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<B> *data) {
+ if (!HasArcs(s))
+ Expand(s);
+ CacheImpl<B>::InitArcIterator(s, data);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) && (fst_->Properties(kError, false) ||
+ (mapper_->Properties(0) & kError)))
+ SetProperties(kError, kError);
+ return FstImpl<Arc>::Properties(mask);
+ }
+
+ void Expand(StateId s) {
+ // Add exiting arcs.
+ for (mapper_->SetState(s); !mapper_->Done(); mapper_->Next())
+ PushArc(s, mapper_->Value());
+ SetArcs(s);
+ }
+
+ private:
+ void Init() {
+ SetType("statemap");
+
+ if (mapper_->InputSymbolsAction() == MAP_COPY_SYMBOLS)
+ SetInputSymbols(fst_->InputSymbols());
+ else if (mapper_->InputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ SetInputSymbols(0);
+
+ if (mapper_->OutputSymbolsAction() == MAP_COPY_SYMBOLS)
+ SetOutputSymbols(fst_->OutputSymbols());
+ else if (mapper_->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ SetOutputSymbols(0);
+
+ uint64 props = fst_->Properties(kCopyProperties, false);
+ SetProperties(mapper_->Properties(props));
+ }
+
+ const Fst<A> *fst_;
+ C* mapper_;
+ bool own_mapper_;
+
+ void operator=(const StateMapFstImpl<A, B, C> &); // disallow
+};
+
+
+// Maps an arc type A to an arc type B using Mapper function object
+// C. This version is a delayed Fst.
+template <class A, class B, class C>
+class StateMapFst : public ImplToFst< StateMapFstImpl<A, B, C> > {
+ public:
+ friend class ArcIterator< StateMapFst<A, B, C> >;
+
+ typedef B Arc;
+ typedef typename B::Weight Weight;
+ typedef typename B::StateId StateId;
+ typedef CacheState<B> State;
+ typedef StateMapFstImpl<A, B, C> Impl;
+
+ StateMapFst(const Fst<A> &fst, const C &mapper,
+ const StateMapFstOptions& opts)
+ : ImplToFst<Impl>(new Impl(fst, mapper, opts)) {}
+
+ StateMapFst(const Fst<A> &fst, C* mapper, const StateMapFstOptions& opts)
+ : ImplToFst<Impl>(new Impl(fst, mapper, opts)) {}
+
+ StateMapFst(const Fst<A> &fst, const C &mapper)
+ : ImplToFst<Impl>(new Impl(fst, mapper, StateMapFstOptions())) {}
+
+ StateMapFst(const Fst<A> &fst, C* mapper)
+ : ImplToFst<Impl>(new Impl(fst, mapper, StateMapFstOptions())) {}
+
+ // See Fst<>::Copy() for doc.
+ StateMapFst(const StateMapFst<A, B, C> &fst, bool safe = false)
+ : ImplToFst<Impl>(fst, safe) {}
+
+ // Get a copy of this StateMapFst. See Fst<>::Copy() for further doc.
+ virtual StateMapFst<A, B, C> *Copy(bool safe = false) const {
+ return new StateMapFst<A, B, C>(*this, safe);
+ }
+
+ virtual void InitStateIterator(StateIteratorData<A> *data) const {
+ GetImpl()->InitStateIterator(data);
+ }
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<B> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const StateMapFst<A, B, C> &fst); // disallow
+};
+
+
+// Specialization for StateMapFst.
+template <class A, class B, class C>
+class ArcIterator< StateMapFst<A, B, C> >
+ : public CacheArcIterator< StateMapFst<A, B, C> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const StateMapFst<A, B, C> &fst, StateId s)
+ : CacheArcIterator< StateMapFst<A, B, C> >(fst.GetImpl(), s) {
+ if (!fst.GetImpl()->HasArcs(s))
+ fst.GetImpl()->Expand(s);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+//
+// Utility Mappers
+//
+
+// Mapper that returns its input.
+template <class A>
+class IdentityStateMapper {
+ public:
+ typedef A FromArc;
+ typedef A ToArc;
+
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ explicit IdentityStateMapper(const Fst<A> &fst) : fst_(fst), aiter_(0) {}
+
+ // Allows updating Fst argument; pass only if changed.
+ IdentityStateMapper(const IdentityStateMapper<A> &mapper,
+ const Fst<A> *fst = 0)
+ : fst_(fst ? *fst : mapper.fst_), aiter_(0) {}
+
+ ~IdentityStateMapper() { delete aiter_; }
+
+ StateId Start() const { return fst_.Start(); }
+
+ Weight Final(StateId s) const { return fst_.Final(s); }
+
+ void SetState(StateId s) {
+ if (aiter_) delete aiter_;
+ aiter_ = new ArcIterator< Fst<A> >(fst_, s);
+ }
+
+ bool Done() const { return aiter_->Done(); }
+ const A &Value() const { return aiter_->Value(); }
+ void Next() { aiter_->Next(); }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const { return props; }
+
+ private:
+ const Fst<A> &fst_;
+ ArcIterator< Fst<A> > *aiter_;
+};
+
+template <class A>
+class ArcSumMapper {
+ public:
+ typedef A FromArc;
+ typedef A ToArc;
+
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ explicit ArcSumMapper(const Fst<A> &fst) : fst_(fst), i_(0) {}
+
+ // Allows updating Fst argument; pass only if changed.
+ ArcSumMapper(const ArcSumMapper<A> &mapper,
+ const Fst<A> *fst = 0)
+ : fst_(fst ? *fst : mapper.fst_), i_(0) {}
+
+ StateId Start() const { return fst_.Start(); }
+ Weight Final(StateId s) const { return fst_.Final(s); }
+
+ void SetState(StateId s) {
+ i_ = 0;
+ arcs_.clear();
+ arcs_.reserve(fst_.NumArcs(s));
+ for (ArcIterator<Fst<A> > aiter(fst_, s); !aiter.Done(); aiter.Next())
+ arcs_.push_back(aiter.Value());
+
+ // First sorts the exiting arcs by input label, output label
+ // and destination state and then sums weights of arcs with
+ // the same input label, output label, and destination state.
+ sort(arcs_.begin(), arcs_.end(), comp_);
+ size_t narcs = 0;
+ for (size_t i = 0; i < arcs_.size(); ++i) {
+ if (narcs > 0 && equal_(arcs_[i], arcs_[narcs - 1])) {
+ arcs_[narcs - 1].weight = Plus(arcs_[narcs - 1].weight,
+ arcs_[i].weight);
+ } else {
+ arcs_[narcs++] = arcs_[i];
+ }
+ }
+ arcs_.resize(narcs);
+ }
+
+ bool Done() const { return i_ >= arcs_.size(); }
+ const A &Value() const { return arcs_[i_]; }
+ void Next() { ++i_; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ uint64 Properties(uint64 props) const {
+ return props & kArcSortProperties &
+ kDeleteArcsProperties & kWeightInvariantProperties;
+ }
+
+ private:
+ struct Compare {
+ bool operator()(const A& x, const A& y) {
+ if (x.ilabel < y.ilabel) return true;
+ if (x.ilabel > y.ilabel) return false;
+ if (x.olabel < y.olabel) return true;
+ if (x.olabel > y.olabel) return false;
+ if (x.nextstate < y.nextstate) return true;
+ if (x.nextstate > y.nextstate) return false;
+ return false;
+ }
+ };
+
+ struct Equal {
+ bool operator()(const A& x, const A& y) {
+ return (x.ilabel == y.ilabel &&
+ x.olabel == y.olabel &&
+ x.nextstate == y.nextstate);
+ }
+ };
+
+ const Fst<A> &fst_;
+ Compare comp_;
+ Equal equal_;
+ vector<A> arcs_;
+ ssize_t i_; // current arc position
+
+ void operator=(const ArcSumMapper<A> &); // disallow
+};
+
+template <class A>
+class ArcUniqueMapper {
+ public:
+ typedef A FromArc;
+ typedef A ToArc;
+
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ explicit ArcUniqueMapper(const Fst<A> &fst) : fst_(fst), i_(0) {}
+
+ // Allows updating Fst argument; pass only if changed.
+ ArcUniqueMapper(const ArcSumMapper<A> &mapper,
+ const Fst<A> *fst = 0)
+ : fst_(fst ? *fst : mapper.fst_), i_(0) {}
+
+ StateId Start() const { return fst_.Start(); }
+ Weight Final(StateId s) const { return fst_.Final(s); }
+
+ void SetState(StateId s) {
+ i_ = 0;
+ arcs_.clear();
+ arcs_.reserve(fst_.NumArcs(s));
+ for (ArcIterator<Fst<A> > aiter(fst_, s); !aiter.Done(); aiter.Next())
+ arcs_.push_back(aiter.Value());
+
+ // First sorts the exiting arcs by input label, output label
+ // and destination state and then uniques identical arcs
+ sort(arcs_.begin(), arcs_.end(), comp_);
+ typename vector<A>::iterator unique_end =
+ unique(arcs_.begin(), arcs_.end(), equal_);
+ arcs_.resize(unique_end - arcs_.begin());
+ }
+
+ bool Done() const { return i_ >= arcs_.size(); }
+ const A &Value() const { return arcs_[i_]; }
+ void Next() { ++i_; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ uint64 Properties(uint64 props) const {
+ return props & kArcSortProperties & kDeleteArcsProperties;
+ }
+
+ private:
+ struct Compare {
+ bool operator()(const A& x, const A& y) {
+ if (x.ilabel < y.ilabel) return true;
+ if (x.ilabel > y.ilabel) return false;
+ if (x.olabel < y.olabel) return true;
+ if (x.olabel > y.olabel) return false;
+ if (x.nextstate < y.nextstate) return true;
+ if (x.nextstate > y.nextstate) return false;
+ return false;
+ }
+ };
+
+ struct Equal {
+ bool operator()(const A& x, const A& y) {
+ return (x.ilabel == y.ilabel &&
+ x.olabel == y.olabel &&
+ x.nextstate == y.nextstate &&
+ x.weight == y.weight);
+ }
+ };
+
+ const Fst<A> &fst_;
+ Compare comp_;
+ Equal equal_;
+ vector<A> arcs_;
+ ssize_t i_; // current arc position
+
+ void operator=(const ArcUniqueMapper<A> &); // disallow
+};
+
+
+} // namespace fst
+
+#endif // FST_LIB_STATE_MAP_H__
diff --git a/src/include/fst/state-reachable.h b/src/include/fst/state-reachable.h
new file mode 100644
index 0000000..6d0c971
--- /dev/null
+++ b/src/include/fst/state-reachable.h
@@ -0,0 +1,198 @@
+// state-reachable.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to determine whether a given (final) state can be reached from some
+// other given state.
+
+#ifndef FST_LIB_STATE_REACHABLE_H__
+#define FST_LIB_STATE_REACHABLE_H__
+
+#include <vector>
+using std::vector;
+
+#include <fst/dfs-visit.h>
+#include <fst/fst.h>
+#include <fst/interval-set.h>
+
+
+namespace fst {
+
+// Computes the (final) states reachable from a given state in an FST.
+// After this visitor has been called, a final state f can be reached
+// from a state s iff (*isets)[s].Member(state2index[f]) is true, where
+// (*isets[s]) is a set of half-open inteval of final state indices
+// and state2index[f] maps from a final state to its index.
+//
+// If state2index is empty, it is filled-in with suitable indices.
+// If it is non-empty, those indices are used; in this case, the
+// final states must have out-degree 0.
+template <class A, typename I = typename A::StateId>
+class IntervalReachVisitor {
+ public:
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename IntervalSet<I>::Interval Interval;
+
+ IntervalReachVisitor(const Fst<A> &fst,
+ vector< IntervalSet<I> > *isets,
+ vector<I> *state2index)
+ : fst_(fst),
+ isets_(isets),
+ state2index_(state2index),
+ index_(state2index->empty() ? 1 : -1),
+ error_(false) {
+ isets_->clear();
+ }
+
+ void InitVisit(const Fst<A> &fst) { error_ = false; }
+
+ bool InitState(StateId s, StateId r) {
+ while (isets_->size() <= s)
+ isets_->push_back(IntervalSet<Label>());
+ while (state2index_->size() <= s)
+ state2index_->push_back(-1);
+
+ if (fst_.Final(s) != Weight::Zero()) {
+ // Create tree interval
+ vector<Interval> *intervals = (*isets_)[s].Intervals();
+ if (index_ < 0) { // Use state2index_ map to set index
+ if (fst_.NumArcs(s) > 0) {
+ FSTERROR() << "IntervalReachVisitor: state2index map must be empty "
+ << "for this FST";
+ error_ = true;
+ return false;
+ }
+ I index = (*state2index_)[s];
+ if (index < 0) {
+ FSTERROR() << "IntervalReachVisitor: state2index map incomplete";
+ error_ = true;
+ return false;
+ }
+ intervals->push_back(Interval(index, index + 1));
+ } else { // Use pre-order index
+ intervals->push_back(Interval(index_, index_ + 1));
+ (*state2index_)[s] = index_++;
+ }
+ }
+ return true;
+ }
+
+ bool TreeArc(StateId s, const A &arc) {
+ return true;
+ }
+
+ bool BackArc(StateId s, const A &arc) {
+ FSTERROR() << "IntervalReachVisitor: cyclic input";
+ error_ = true;
+ return false;
+ }
+
+ bool ForwardOrCrossArc(StateId s, const A &arc) {
+ // Non-tree interval
+ (*isets_)[s].Union((*isets_)[arc.nextstate]);
+ return true;
+ }
+
+ void FinishState(StateId s, StateId p, const A *arc) {
+ if (index_ >= 0 && fst_.Final(s) != Weight::Zero()) {
+ vector<Interval> *intervals = (*isets_)[s].Intervals();
+ (*intervals)[0].end = index_; // Update tree interval end
+ }
+ (*isets_)[s].Normalize();
+ if (p != kNoStateId)
+ (*isets_)[p].Union((*isets_)[s]); // Propagate intervals to parent
+ }
+
+ void FinishVisit() {}
+
+ bool Error() const { return error_; }
+
+ private:
+ const Fst<A> &fst_;
+ vector< IntervalSet<I> > *isets_;
+ vector<I> *state2index_;
+ I index_;
+ bool error_;
+};
+
+
+// Tests reachability of final states from a given state. To test for
+// reachability from a state s, first do SetState(s). Then a final
+// state f can be reached from state s of FST iff Reach(f) is true.
+template <class A, typename I = typename A::StateId>
+class StateReachable {
+ public:
+ typedef A Arc;
+ typedef I Index;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename IntervalSet<I>::Interval Interval;
+
+ StateReachable(const Fst<A> &fst)
+ : error_(false) {
+ IntervalReachVisitor<Arc> reach_visitor(fst, &isets_, &state2index_);
+ DfsVisit(fst, &reach_visitor);
+ if (reach_visitor.Error()) error_ = true;
+ }
+
+ StateReachable(const StateReachable<A> &reachable) {
+ FSTERROR() << "Copy constructor for state reachable class "
+ << "not yet implemented.";
+ error_ = true;
+ }
+
+ // Set current state.
+ void SetState(StateId s) { s_ = s; }
+
+ // Can reach this label from current state?
+ bool Reach(StateId s) {
+ if (s >= state2index_.size())
+ return false;
+
+ I i = state2index_[s];
+ if (i < 0) {
+ FSTERROR() << "StateReachable: state non-final: " << s;
+ error_ = true;
+ return false;
+ }
+ return isets_[s_].Member(i);
+ }
+
+ // Access to the state-to-index mapping. Unassigned states have index -1.
+ vector<I> &State2Index() { return state2index_; }
+
+ // Access to the interval sets. These specify the reachability
+ // to the final states as intervals of the final state indices.
+ const vector< IntervalSet<I> > &IntervalSets() { return isets_; }
+
+ bool Error() const { return error_; }
+
+ private:
+ StateId s_; // Current state
+ vector< IntervalSet<I> > isets_; // Interval sets per state
+ vector<I> state2index_; // Finds index for a final state
+ bool error_;
+
+ void operator=(const StateReachable<A> &); // Disallow
+};
+
+} // namespace fst
+
+#endif // FST_LIB_STATE_REACHABLE_H__
diff --git a/src/include/fst/state-table.h b/src/include/fst/state-table.h
new file mode 100644
index 0000000..7d863a0
--- /dev/null
+++ b/src/include/fst/state-table.h
@@ -0,0 +1,469 @@
+// state-table.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Classes for representing the mapping between state tuples and state Ids.
+
+#ifndef FST_LIB_STATE_TABLE_H__
+#define FST_LIB_STATE_TABLE_H__
+
+#include <deque>
+#include <vector>
+using std::vector;
+
+#include <fst/bi-table.h>
+#include <fst/expanded-fst.h>
+
+
+namespace fst {
+
+// STATE TABLES - these determine the bijective mapping between state
+// tuples (e.g. in composition triples of two FST states and a
+// composition filter state) and their corresponding state IDs.
+// They are classes, templated on state tuples, of the form:
+//
+// template <class T>
+// class StateTable {
+// public:
+// typedef typename T StateTuple;
+//
+// // Required constructors.
+// StateTable();
+//
+// // Lookup state ID by tuple. If it doesn't exist, then add it.
+// StateId FindState(const StateTuple &);
+// // Lookup state tuple by state ID.
+// const StateTuple<StateId> &Tuple(StateId) const;
+// // # of stored tuples.
+// StateId Size() const;
+// };
+//
+// A state tuple has the form:
+//
+// template <class S>
+// struct StateTuple {
+// typedef typename S StateId;
+//
+// // Required constructor.
+// StateTuple();
+// };
+
+
+// An implementation using a hash map for the tuple to state ID mapping.
+// The state tuple T must have == defined and the default constructor
+// must produce a tuple that will never be seen. H is the hash function.
+template <class T, class H>
+class HashStateTable : public HashBiTable<typename T::StateId, T, H> {
+ public:
+ typedef T StateTuple;
+ typedef typename StateTuple::StateId StateId;
+ using HashBiTable<StateId, T, H>::FindId;
+ using HashBiTable<StateId, T, H>::FindEntry;
+ using HashBiTable<StateId, T, H>::Size;
+
+ HashStateTable() : HashBiTable<StateId, T, H>() {}
+ StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
+ const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
+};
+
+
+// An implementation using a hash set for the tuple to state ID
+// mapping. The state tuple T must have == defined and the default
+// constructor must produce a tuple that will never be seen. H is the
+// hash function.
+template <class T, class H>
+class CompactHashStateTable
+ : public CompactHashBiTable<typename T::StateId, T, H> {
+ public:
+ typedef T StateTuple;
+ typedef typename StateTuple::StateId StateId;
+ using CompactHashBiTable<StateId, T, H>::FindId;
+ using CompactHashBiTable<StateId, T, H>::FindEntry;
+ using CompactHashBiTable<StateId, T, H>::Size;
+
+ CompactHashStateTable() : CompactHashBiTable<StateId, T, H>() {}
+
+ // Reserves space for table_size elements.
+ explicit CompactHashStateTable(size_t table_size)
+ : CompactHashBiTable<StateId, T, H>(table_size) {}
+
+ StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
+ const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
+};
+
+// An implementation using a vector for the tuple to state mapping.
+// It is passed a function object FP that should fingerprint tuples
+// uniquely to an integer that can used as a vector index. Normally,
+// VectorStateTable constructs the FP object. The user can instead
+// pass in this object; in that case, VectorStateTable takes its
+// ownership.
+template <class T, class FP>
+class VectorStateTable
+ : public VectorBiTable<typename T::StateId, T, FP> {
+ public:
+ typedef T StateTuple;
+ typedef typename StateTuple::StateId StateId;
+ using VectorBiTable<StateId, T, FP>::FindId;
+ using VectorBiTable<StateId, T, FP>::FindEntry;
+ using VectorBiTable<StateId, T, FP>::Size;
+ using VectorBiTable<StateId, T, FP>::Fingerprint;
+
+ explicit VectorStateTable(FP *fp = 0) : VectorBiTable<StateId, T, FP>(fp) {}
+ StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
+ const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
+};
+
+
+// An implementation using a vector and a compact hash table. The
+// selecting functor S returns true for tuples to be hashed in the
+// vector. The fingerprinting functor FP returns a unique fingerprint
+// for each tuple to be hashed in the vector (these need to be
+// suitable for indexing in a vector). The hash functor H is used when
+// hashing tuple into the compact hash table.
+template <class T, class S, class FP, class H>
+class VectorHashStateTable
+ : public VectorHashBiTable<typename T::StateId, T, S, FP, H> {
+ public:
+ typedef T StateTuple;
+ typedef typename StateTuple::StateId StateId;
+ using VectorHashBiTable<StateId, T, S, FP, H>::FindId;
+ using VectorHashBiTable<StateId, T, S, FP, H>::FindEntry;
+ using VectorHashBiTable<StateId, T, S, FP, H>::Size;
+ using VectorHashBiTable<StateId, T, S, FP, H>::Selector;
+ using VectorHashBiTable<StateId, T, S, FP, H>::Fingerprint;
+ using VectorHashBiTable<StateId, T, S, FP, H>::Hash;
+
+ VectorHashStateTable(S *s, FP *fp, H *h,
+ size_t vector_size = 0,
+ size_t tuple_size = 0)
+ : VectorHashBiTable<StateId, T, S, FP, H>(
+ s, fp, h, vector_size, tuple_size) {}
+
+ StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
+ const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
+};
+
+
+// An implementation using a hash map for the tuple to state ID
+// mapping. This version permits erasing of states. The state tuple T
+// must have == defined and its default constructor must produce a
+// tuple that will never be seen. F is the hash function.
+template <class T, class F>
+class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, F> {
+ public:
+ typedef T StateTuple;
+ typedef typename StateTuple::StateId StateId;
+ using ErasableBiTable<StateId, T, F>::FindId;
+ using ErasableBiTable<StateId, T, F>::FindEntry;
+ using ErasableBiTable<StateId, T, F>::Size;
+ using ErasableBiTable<StateId, T, F>::Erase;
+
+ ErasableStateTable() : ErasableBiTable<StateId, T, F>() {}
+ StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
+ const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
+};
+
+//
+// COMPOSITION STATE TUPLES AND TABLES
+//
+// The composition state table has the form:
+//
+// template <class A, class F>
+// class ComposeStateTable {
+// public:
+// typedef A Arc;
+// typedef F FilterState;
+// typedef typename A::StateId StateId;
+// typedef ComposeStateTuple<StateId> StateTuple;
+//
+// // Required constructors. Copy constructor does not copy state.
+// ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2);
+// ComposeStateTable(const ComposeStateTable<A, F> &table);
+// // Lookup state ID by tuple. If it doesn't exist, then add it.
+// StateId FindState(const StateTuple &);
+// // Lookup state tuple by state ID.
+// const StateTuple<StateId> &Tuple(StateId) const;
+// // # of stored tuples.
+// StateId Size() const;
+// // Return true if error encountered
+// bool Error() const;
+// };
+
+// Represents the composition state.
+template <typename S, typename F>
+struct ComposeStateTuple {
+ typedef S StateId;
+ typedef F FilterState;
+
+ ComposeStateTuple()
+ : state_id1(kNoStateId), state_id2(kNoStateId),
+ filter_state(FilterState::NoState()) {}
+
+ ComposeStateTuple(StateId s1, StateId s2, const FilterState &f)
+ : state_id1(s1), state_id2(s2), filter_state(f) {}
+
+ StateId state_id1; // State Id on fst1
+ StateId state_id2; // State Id on fst2
+ FilterState filter_state; // State of composition filter
+};
+
+// Equality of composition state tuples.
+template <typename S, typename F>
+inline bool operator==(const ComposeStateTuple<S, F>& x,
+ const ComposeStateTuple<S, F>& y) {
+ if (&x == &y)
+ return true;
+ return x.state_id1 == y.state_id1 &&
+ x.state_id2 == y.state_id2 &&
+ x.filter_state == y.filter_state;
+}
+
+
+// Hashing of composition state tuples.
+template <typename S, typename F>
+class ComposeHash {
+ public:
+ size_t operator()(const ComposeStateTuple<S, F>& t) const {
+ return t.state_id1 + t.state_id2 * kPrime0 +
+ t.filter_state.Hash() * kPrime1;
+ }
+ private:
+ static const size_t kPrime0;
+ static const size_t kPrime1;
+};
+
+template <typename S, typename F>
+const size_t ComposeHash<S, F>::kPrime0 = 7853;
+
+template <typename S, typename F>
+const size_t ComposeHash<S, F>::kPrime1 = 7867;
+
+
+// A HashStateTable over composition tuples.
+template <typename A,
+ typename F,
+ typename H =
+ CompactHashStateTable<ComposeStateTuple<typename A::StateId, F>,
+ ComposeHash<typename A::StateId, F> > >
+class GenericComposeStateTable : public H {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<StateId, F> StateTuple;
+
+ GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {}
+
+ GenericComposeStateTable(const GenericComposeStateTable<A, F> &table) {}
+
+ bool Error() const { return false; }
+
+ private:
+ void operator=(const GenericComposeStateTable<A, F> &table); // disallow
+};
+
+
+// Fingerprint for general composition tuples.
+template <typename S, typename F>
+class ComposeFingerprint {
+ public:
+ typedef S StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<S, F> StateTuple;
+
+ // Required but suboptimal constructor.
+ ComposeFingerprint() : mult1_(8192), mult2_(8192) {
+ LOG(WARNING) << "TupleFingerprint: # of FST states should be provided.";
+ }
+
+ // Constructor is provided the sizes of the input FSTs
+ ComposeFingerprint(StateId nstates1, StateId nstates2)
+ : mult1_(nstates1), mult2_(nstates1 * nstates2) { }
+
+ size_t operator()(const StateTuple &tuple) {
+ return tuple.state_id1 + tuple.state_id2 * mult1_ +
+ tuple.filter_state.Hash() * mult2_;
+ }
+
+ private:
+ ssize_t mult1_;
+ ssize_t mult2_;
+};
+
+
+// Useful when the first composition state determines the tuple.
+template <typename S, typename F>
+class ComposeState1Fingerprint {
+ public:
+ typedef S StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<S, F> StateTuple;
+
+ size_t operator()(const StateTuple &tuple) { return tuple.state_id1; }
+};
+
+
+// Useful when the second composition state determines the tuple.
+template <typename S, typename F>
+class ComposeState2Fingerprint {
+ public:
+ typedef S StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<S, F> StateTuple;
+
+ size_t operator()(const StateTuple &tuple) { return tuple.state_id2; }
+};
+
+
+// A VectorStateTable over composition tuples. This can be used when
+// the product of number of states in FST1 and FST2 (and the
+// composition filter state hash) is manageable. If the FSTs are not
+// expanded Fsts, they will first have their states counted.
+template <typename A, typename F>
+class ProductComposeStateTable : public
+VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
+ ComposeFingerprint<typename A::StateId, F> > {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<StateId, F> StateTuple;
+
+ ProductComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
+ : VectorStateTable<ComposeStateTuple<StateId, F>,
+ ComposeFingerprint<StateId, F> >
+ (new ComposeFingerprint<StateId, F>(CountStates(fst1),
+ CountStates(fst2))) { }
+
+ ProductComposeStateTable(const ProductComposeStateTable<A, F> &table)
+ : VectorStateTable<ComposeStateTuple<StateId, F>,
+ ComposeFingerprint<StateId, F> >
+ (new ComposeFingerprint<StateId, F>(table.Fingerprint())) {}
+
+ bool Error() const { return false; }
+
+ private:
+ void operator=(const ProductComposeStateTable<A, F> &table); // disallow
+};
+
+// A VectorStateTable over composition tuples. This can be used when
+// FST1 is a string (satisfies kStringProperties) and FST2 is
+// epsilon-free and deterministic. It should be used with a
+// composition filter that creates at most one filter state per tuple
+// under these conditions (e.g. SequenceComposeFilter or
+// MatchComposeFilter).
+template <typename A, typename F>
+class StringDetComposeStateTable : public
+VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
+ ComposeState1Fingerprint<typename A::StateId, F> > {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<StateId, F> StateTuple;
+
+ StringDetComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
+ : error_(false) {
+ uint64 props1 = kString;
+ uint64 props2 = kIDeterministic | kNoIEpsilons;
+ if (fst1.Properties(props1, true) != props1 ||
+ fst2.Properties(props2, true) != props2) {
+ FSTERROR() << "StringDetComposeStateTable: fst1 not a string or"
+ << " fst2 not input deterministic and epsilon-free";
+ error_ = true;
+ }
+ }
+
+ StringDetComposeStateTable(const StringDetComposeStateTable<A, F> &table)
+ : error_(table.error_) {}
+
+ bool Error() const { return error_; }
+
+ private:
+ bool error_;
+
+ void operator=(const StringDetComposeStateTable<A, F> &table); // disallow
+};
+
+
+// A VectorStateTable over composition tuples. This can be used when
+// FST2 is a string (satisfies kStringProperties) and FST1 is
+// epsilon-free and deterministic. It should be used with a
+// composition filter that creates at most one filter state per tuple
+// under these conditions (e.g. SequenceComposeFilter or
+// MatchComposeFilter).
+template <typename A, typename F>
+class DetStringComposeStateTable : public
+VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
+ ComposeState1Fingerprint<typename A::StateId, F> > {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<StateId, F> StateTuple;
+
+ DetStringComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
+ :error_(false) {
+ uint64 props1 = kODeterministic | kNoOEpsilons;
+ uint64 props2 = kString;
+ if (fst1.Properties(props1, true) != props1 ||
+ fst2.Properties(props2, true) != props2) {
+ FSTERROR() << "StringDetComposeStateTable: fst2 not a string or"
+ << " fst1 not output deterministic and epsilon-free";
+ error_ = true;
+ }
+ }
+
+ DetStringComposeStateTable(const DetStringComposeStateTable<A, F> &table)
+ : error_(table.error_) {}
+
+ bool Error() const { return error_; }
+
+ private:
+ bool error_;
+
+ void operator=(const DetStringComposeStateTable<A, F> &table); // disallow
+};
+
+
+// An ErasableStateTable over composition tuples. The Erase(StateId) method
+// can be called if the user either is sure that composition will never return
+// to that tuple or doesn't care that if it does, it is assigned a new
+// state ID.
+template <typename A, typename F>
+class ErasableComposeStateTable : public
+ErasableStateTable<ComposeStateTuple<typename A::StateId, F>,
+ ComposeHash<typename A::StateId, F> > {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<StateId, F> StateTuple;
+
+ ErasableComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {}
+
+ ErasableComposeStateTable(const ErasableComposeStateTable<A, F> &table) {}
+
+ bool Error() const { return false; }
+
+ private:
+ void operator=(const ErasableComposeStateTable<A, F> &table); // disallow
+};
+
+} // namespace fst
+
+#endif // FST_LIB_STATE_TABLE_H__
diff --git a/src/include/fst/statesort.h b/src/include/fst/statesort.h
new file mode 100644
index 0000000..6f827f4
--- /dev/null
+++ b/src/include/fst/statesort.h
@@ -0,0 +1,97 @@
+// statesort.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Function to sort states of an Fst.
+
+#ifndef FST_LIB_STATESORT_H__
+#define FST_LIB_STATESORT_H__
+
+#include <vector>
+using std::vector;
+#include <algorithm>
+
+#include <fst/mutable-fst.h>
+
+
+namespace fst {
+
+// Sorts the input states of an FST, modifying it. ORDER[i] gives the
+// the state Id after sorting that corresponds to state Id i before
+// sorting. ORDER must be a permutation of FST's states ID sequence:
+// (0, 1, 2, ..., fst->NumStates() - 1).
+template <class Arc>
+void StateSort(MutableFst<Arc> *fst,
+ const vector<typename Arc::StateId> &order) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ if (order.size() != fst->NumStates()) {
+ FSTERROR() << "StateSort: bad order vector size: " << order.size();
+ fst->SetProperties(kError, kError);
+ return;
+ }
+
+ if (fst->Start() == kNoStateId)
+ return;
+
+ uint64 props = fst->Properties(kStateSortProperties, false);
+
+ vector<bool> done(order.size(), false);
+ vector<Arc> arcsa, arcsb;
+ vector<Arc> *arcs1 = &arcsa, *arcs2 = &arcsb;
+
+ fst->SetStart(order[fst->Start()]);
+
+ for (StateIterator< MutableFst<Arc> > siter(*fst);
+ !siter.Done();
+ siter.Next()) {
+ StateId s1 = siter.Value(), s2;
+ if (done[s1])
+ continue;
+ Weight final1 = fst->Final(s1), final2 = Weight::Zero();
+ arcs1->clear();
+ for (ArcIterator< MutableFst<Arc> > aiter(*fst, s1);
+ !aiter.Done();
+ aiter.Next())
+ arcs1->push_back(aiter.Value());
+ for (; !done[s1]; s1 = s2, final1 = final2, swap(arcs1, arcs2)) {
+ s2 = order[s1];
+ if (!done[s2]) {
+ final2 = fst->Final(s2);
+ arcs2->clear();
+ for (ArcIterator< MutableFst<Arc> > aiter(*fst, s2);
+ !aiter.Done();
+ aiter.Next())
+ arcs2->push_back(aiter.Value());
+ }
+ fst->SetFinal(s2, final1);
+ fst->DeleteArcs(s2);
+ for (size_t i = 0; i < arcs1->size(); ++i) {
+ Arc arc = (*arcs1)[i];
+ arc.nextstate = order[arc.nextstate];
+ fst->AddArc(s2, arc);
+ }
+ done[s1] = true;
+ }
+ }
+ fst->SetProperties(props, kFstProperties);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_STATESORT_H__
diff --git a/src/include/fst/string-weight.h b/src/include/fst/string-weight.h
new file mode 100644
index 0000000..1beeb33
--- /dev/null
+++ b/src/include/fst/string-weight.h
@@ -0,0 +1,560 @@
+// string-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// String weight set and associated semiring operation definitions.
+
+#ifndef FST_LIB_STRING_WEIGHT_H__
+#define FST_LIB_STRING_WEIGHT_H__
+
+#include <list>
+#include <string>
+
+#include <fst/product-weight.h>
+#include <fst/weight.h>
+
+namespace fst {
+
+const int kStringInfinity = -1; // Label for the infinite string
+const int kStringBad = -2; // Label for a non-string
+const char kStringSeparator = '_'; // Label separator in strings
+
+// Determines whether to use left or right string semiring. Includes
+// restricted versions that signal an error if proper prefixes
+// (suffixes) would otherwise be returned by Plus, useful with various
+// algorithms that require functional transducer input with the
+// string semirings.
+enum StringType { STRING_LEFT = 0, STRING_RIGHT = 1 ,
+ STRING_LEFT_RESTRICT = 2, STRING_RIGHT_RESTRICT };
+
+#define REVERSE_STRING_TYPE(S) \
+ ((S) == STRING_LEFT ? STRING_RIGHT : \
+ ((S) == STRING_RIGHT ? STRING_LEFT : \
+ ((S) == STRING_LEFT_RESTRICT ? STRING_RIGHT_RESTRICT : \
+ STRING_LEFT_RESTRICT)))
+
+template <typename L, StringType S = STRING_LEFT>
+class StringWeight;
+
+template <typename L, StringType S = STRING_LEFT>
+class StringWeightIterator;
+
+template <typename L, StringType S = STRING_LEFT>
+class StringWeightReverseIterator;
+
+template <typename L, StringType S>
+bool operator==(const StringWeight<L, S> &, const StringWeight<L, S> &);
+
+
+// String semiring: (longest_common_prefix/suffix, ., Infinity, Epsilon)
+template <typename L, StringType S>
+class StringWeight {
+ public:
+ typedef L Label;
+ typedef StringWeight<L, REVERSE_STRING_TYPE(S)> ReverseWeight;
+
+ friend class StringWeightIterator<L, S>;
+ friend class StringWeightReverseIterator<L, S>;
+ friend bool operator==<>(const StringWeight<L, S> &,
+ const StringWeight<L, S> &);
+
+ StringWeight() { Init(); }
+
+ template <typename Iter>
+ StringWeight(const Iter &begin, const Iter &end) {
+ Init();
+ for (Iter iter = begin; iter != end; ++iter)
+ PushBack(*iter);
+ }
+
+ explicit StringWeight(L l) { Init(); PushBack(l); }
+
+ static const StringWeight<L, S> &Zero() {
+ static const StringWeight<L, S> zero(kStringInfinity);
+ return zero;
+ }
+
+ static const StringWeight<L, S> &One() {
+ static const StringWeight<L, S> one;
+ return one;
+ }
+
+ static const StringWeight<L, S> &NoWeight() {
+ static const StringWeight<L, S> no_weight(kStringBad);
+ return no_weight;
+ }
+
+ static const string &Type() {
+ static const string type =
+ S == STRING_LEFT ? "string" :
+ (S == STRING_RIGHT ? "right_string" :
+ (S == STRING_LEFT_RESTRICT ? "restricted_string" :
+ "right_restricted_string"));
+ return type;
+ }
+
+ bool Member() const;
+
+ istream &Read(istream &strm);
+
+ ostream &Write(ostream &strm) const;
+
+ size_t Hash() const;
+
+ StringWeight<L, S> Quantize(float delta = kDelta) const {
+ return *this;
+ }
+
+ ReverseWeight Reverse() const;
+
+ static uint64 Properties() {
+ return (S == STRING_LEFT || S == STRING_LEFT_RESTRICT ?
+ kLeftSemiring : kRightSemiring) | kIdempotent;
+ }
+
+ // NB: This needs to be uncommented only if default fails for this impl.
+ // StringWeight<L, S> &operator=(const StringWeight<L, S> &w);
+
+ // These operations combined with the StringWeightIterator and
+ // StringWeightReverseIterator provide the access and mutation of
+ // the string internal elements.
+
+ // Common initializer among constructors.
+ void Init() { first_ = 0; }
+
+ // Clear existing StringWeight.
+ void Clear() { first_ = 0; rest_.clear(); }
+
+ size_t Size() const { return first_ ? rest_.size() + 1 : 0; }
+
+ void PushFront(L l) {
+ if (first_)
+ rest_.push_front(first_);
+ first_ = l;
+ }
+
+ void PushBack(L l) {
+ if (!first_)
+ first_ = l;
+ else
+ rest_.push_back(l);
+ }
+
+ private:
+ L first_; // first label in string (0 if empty)
+ list<L> rest_; // remaining labels in string
+};
+
+
+// Traverses string in forward direction.
+template <typename L, StringType S>
+class StringWeightIterator {
+ public:
+ explicit StringWeightIterator(const StringWeight<L, S>& w)
+ : first_(w.first_), rest_(w.rest_), init_(true),
+ iter_(rest_.begin()) {}
+
+ bool Done() const {
+ if (init_) return first_ == 0;
+ else return iter_ == rest_.end();
+ }
+
+ const L& Value() const { return init_ ? first_ : *iter_; }
+
+ void Next() {
+ if (init_) init_ = false;
+ else ++iter_;
+ }
+
+ void Reset() {
+ init_ = true;
+ iter_ = rest_.begin();
+ }
+
+ private:
+ const L &first_;
+ const list<L> &rest_;
+ bool init_; // in the initialized state?
+ typename list<L>::const_iterator iter_;
+
+ DISALLOW_COPY_AND_ASSIGN(StringWeightIterator);
+};
+
+
+// Traverses string in backward direction.
+template <typename L, StringType S>
+class StringWeightReverseIterator {
+ public:
+ explicit StringWeightReverseIterator(const StringWeight<L, S>& w)
+ : first_(w.first_), rest_(w.rest_), fin_(first_ == 0),
+ iter_(rest_.rbegin()) {}
+
+ bool Done() const { return fin_; }
+
+ const L& Value() const { return iter_ == rest_.rend() ? first_ : *iter_; }
+
+ void Next() {
+ if (iter_ == rest_.rend()) fin_ = true;
+ else ++iter_;
+ }
+
+ void Reset() {
+ fin_ = false;
+ iter_ = rest_.rbegin();
+ }
+
+ private:
+ const L &first_;
+ const list<L> &rest_;
+ bool fin_; // in the final state?
+ typename list<L>::const_reverse_iterator iter_;
+
+ DISALLOW_COPY_AND_ASSIGN(StringWeightReverseIterator);
+};
+
+
+// StringWeight member functions follow that require
+// StringWeightIterator or StringWeightReverseIterator.
+
+template <typename L, StringType S>
+inline istream &StringWeight<L, S>::Read(istream &strm) {
+ Clear();
+ int32 size;
+ ReadType(strm, &size);
+ for (int i = 0; i < size; ++i) {
+ L label;
+ ReadType(strm, &label);
+ PushBack(label);
+ }
+ return strm;
+}
+
+template <typename L, StringType S>
+inline ostream &StringWeight<L, S>::Write(ostream &strm) const {
+ int32 size = Size();
+ WriteType(strm, size);
+ for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next()) {
+ L label = iter.Value();
+ WriteType(strm, label);
+ }
+ return strm;
+}
+
+template <typename L, StringType S>
+inline bool StringWeight<L, S>::Member() const {
+ if (Size() != 1)
+ return true;
+ StringWeightIterator<L, S> iter(*this);
+ return iter.Value() != kStringBad;
+}
+
+template <typename L, StringType S>
+inline typename StringWeight<L, S>::ReverseWeight
+StringWeight<L, S>::Reverse() const {
+ ReverseWeight rw;
+ for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next())
+ rw.PushFront(iter.Value());
+ return rw;
+}
+
+template <typename L, StringType S>
+inline size_t StringWeight<L, S>::Hash() const {
+ size_t h = 0;
+ for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next())
+ h ^= h<<1 ^ iter.Value();
+ return h;
+}
+
+// NB: This needs to be uncommented only if default fails for this the impl.
+//
+// template <typename L, StringType S>
+// inline StringWeight<L, S>
+// &StringWeight<L, S>::operator=(const StringWeight<L, S> &w) {
+// if (this != &w) {
+// Clear();
+// for (StringWeightIterator<L, S> iter(w); !iter.Done(); iter.Next())
+// PushBack(iter.Value());
+// }
+// return *this;
+// }
+
+template <typename L, StringType S>
+inline bool operator==(const StringWeight<L, S> &w1,
+ const StringWeight<L, S> &w2) {
+ if (w1.Size() != w2.Size())
+ return false;
+
+ StringWeightIterator<L, S> iter1(w1);
+ StringWeightIterator<L, S> iter2(w2);
+
+ for (; !iter1.Done() ; iter1.Next(), iter2.Next())
+ if (iter1.Value() != iter2.Value())
+ return false;
+
+ return true;
+}
+
+template <typename L, StringType S>
+inline bool operator!=(const StringWeight<L, S> &w1,
+ const StringWeight<L, S> &w2) {
+ return !(w1 == w2);
+}
+
+template <typename L, StringType S>
+inline bool ApproxEqual(const StringWeight<L, S> &w1,
+ const StringWeight<L, S> &w2,
+ float delta = kDelta) {
+ return w1 == w2;
+}
+
+template <typename L, StringType S>
+inline ostream &operator<<(ostream &strm, const StringWeight<L, S> &w) {
+ StringWeightIterator<L, S> iter(w);
+ if (iter.Done())
+ return strm << "Epsilon";
+ else if (iter.Value() == kStringInfinity)
+ return strm << "Infinity";
+ else if (iter.Value() == kStringBad)
+ return strm << "BadString";
+ else
+ for (size_t i = 0; !iter.Done(); ++i, iter.Next()) {
+ if (i > 0)
+ strm << kStringSeparator;
+ strm << iter.Value();
+ }
+ return strm;
+}
+
+template <typename L, StringType S>
+inline istream &operator>>(istream &strm, StringWeight<L, S> &w) {
+ string s;
+ strm >> s;
+ if (s == "Infinity") {
+ w = StringWeight<L, S>::Zero();
+ } else if (s == "Epsilon") {
+ w = StringWeight<L, S>::One();
+ } else {
+ w.Clear();
+ char *p = 0;
+ for (const char *cs = s.c_str(); !p || *p != '\0'; cs = p + 1) {
+ int l = strtoll(cs, &p, 10);
+ if (p == cs || (*p != 0 && *p != kStringSeparator)) {
+ strm.clear(std::ios::badbit);
+ break;
+ }
+ w.PushBack(l);
+ }
+ }
+ return strm;
+}
+
+
+// Default is for the restricted left and right semirings. String
+// equality is required (for non-Zero() input. This restriction
+// is used in e.g. Determinize to ensure functional input.
+template <typename L, StringType S> inline StringWeight<L, S>
+Plus(const StringWeight<L, S> &w1,
+ const StringWeight<L, S> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return StringWeight<L, S>::NoWeight();
+ if (w1 == StringWeight<L, S>::Zero())
+ return w2;
+ if (w2 == StringWeight<L, S>::Zero())
+ return w1;
+
+ if (w1 != w2) {
+ FSTERROR() << "StringWeight::Plus: unequal arguments "
+ << "(non-functional FST?)"
+ << " w1 = " << w1
+ << " w2 = " << w2;
+ return StringWeight<L, S>::NoWeight();
+ }
+
+ return w1;
+}
+
+
+// Longest common prefix for left string semiring.
+template <typename L> inline StringWeight<L, STRING_LEFT>
+Plus(const StringWeight<L, STRING_LEFT> &w1,
+ const StringWeight<L, STRING_LEFT> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return StringWeight<L, STRING_LEFT>::NoWeight();
+ if (w1 == StringWeight<L, STRING_LEFT>::Zero())
+ return w2;
+ if (w2 == StringWeight<L, STRING_LEFT>::Zero())
+ return w1;
+
+ StringWeight<L, STRING_LEFT> sum;
+ StringWeightIterator<L, STRING_LEFT> iter1(w1);
+ StringWeightIterator<L, STRING_LEFT> iter2(w2);
+ for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value();
+ iter1.Next(), iter2.Next())
+ sum.PushBack(iter1.Value());
+ return sum;
+}
+
+
+// Longest common suffix for right string semiring.
+template <typename L> inline StringWeight<L, STRING_RIGHT>
+Plus(const StringWeight<L, STRING_RIGHT> &w1,
+ const StringWeight<L, STRING_RIGHT> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return StringWeight<L, STRING_RIGHT>::NoWeight();
+ if (w1 == StringWeight<L, STRING_RIGHT>::Zero())
+ return w2;
+ if (w2 == StringWeight<L, STRING_RIGHT>::Zero())
+ return w1;
+
+ StringWeight<L, STRING_RIGHT> sum;
+ StringWeightReverseIterator<L, STRING_RIGHT> iter1(w1);
+ StringWeightReverseIterator<L, STRING_RIGHT> iter2(w2);
+ for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value();
+ iter1.Next(), iter2.Next())
+ sum.PushFront(iter1.Value());
+ return sum;
+}
+
+
+template <typename L, StringType S>
+inline StringWeight<L, S> Times(const StringWeight<L, S> &w1,
+ const StringWeight<L, S> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return StringWeight<L, S>::NoWeight();
+ if (w1 == StringWeight<L, S>::Zero() || w2 == StringWeight<L, S>::Zero())
+ return StringWeight<L, S>::Zero();
+
+ StringWeight<L, S> prod(w1);
+ for (StringWeightIterator<L, S> iter(w2); !iter.Done(); iter.Next())
+ prod.PushBack(iter.Value());
+
+ return prod;
+}
+
+
+// Default is for left division in the left string and the
+// left restricted string semirings.
+template <typename L, StringType S> inline StringWeight<L, S>
+Divide(const StringWeight<L, S> &w1,
+ const StringWeight<L, S> &w2,
+ DivideType typ) {
+
+ if (typ != DIVIDE_LEFT) {
+ FSTERROR() << "StringWeight::Divide: only left division is defined "
+ << "for the " << StringWeight<L, S>::Type() << " semiring";
+ return StringWeight<L, S>::NoWeight();
+ }
+
+ if (!w1.Member() || !w2.Member())
+ return StringWeight<L, S>::NoWeight();
+
+ if (w2 == StringWeight<L, S>::Zero())
+ return StringWeight<L, S>(kStringBad);
+ else if (w1 == StringWeight<L, S>::Zero())
+ return StringWeight<L, S>::Zero();
+
+ StringWeight<L, S> div;
+ StringWeightIterator<L, S> iter(w1);
+ for (int i = 0; !iter.Done(); iter.Next(), ++i) {
+ if (i >= w2.Size())
+ div.PushBack(iter.Value());
+ }
+ return div;
+}
+
+
+// Right division in the right string semiring.
+template <typename L> inline StringWeight<L, STRING_RIGHT>
+Divide(const StringWeight<L, STRING_RIGHT> &w1,
+ const StringWeight<L, STRING_RIGHT> &w2,
+ DivideType typ) {
+
+ if (typ != DIVIDE_RIGHT) {
+ FSTERROR() << "StringWeight::Divide: only right division is defined "
+ << "for the right string semiring";
+ return StringWeight<L, STRING_RIGHT>::NoWeight();
+ }
+
+ if (!w1.Member() || !w2.Member())
+ return StringWeight<L, STRING_RIGHT>::NoWeight();
+
+ if (w2 == StringWeight<L, STRING_RIGHT>::Zero())
+ return StringWeight<L, STRING_RIGHT>(kStringBad);
+ else if (w1 == StringWeight<L, STRING_RIGHT>::Zero())
+ return StringWeight<L, STRING_RIGHT>::Zero();
+
+ StringWeight<L, STRING_RIGHT> div;
+ StringWeightReverseIterator<L, STRING_RIGHT> iter(w1);
+ for (int i = 0; !iter.Done(); iter.Next(), ++i) {
+ if (i >= w2.Size())
+ div.PushFront(iter.Value());
+ }
+ return div;
+}
+
+
+// Right division in the right restricted string semiring.
+template <typename L> inline StringWeight<L, STRING_RIGHT_RESTRICT>
+Divide(const StringWeight<L, STRING_RIGHT_RESTRICT> &w1,
+ const StringWeight<L, STRING_RIGHT_RESTRICT> &w2,
+ DivideType typ) {
+
+ if (typ != DIVIDE_RIGHT) {
+ FSTERROR() << "StringWeight::Divide: only right division is defined "
+ << "for the right restricted string semiring";
+ return StringWeight<L, STRING_RIGHT_RESTRICT>::NoWeight();
+ }
+
+ if (!w1.Member() || !w2.Member())
+ return StringWeight<L, STRING_RIGHT_RESTRICT>::NoWeight();
+
+ if (w2 == StringWeight<L, STRING_RIGHT_RESTRICT>::Zero())
+ return StringWeight<L, STRING_RIGHT_RESTRICT>(kStringBad);
+ else if (w1 == StringWeight<L, STRING_RIGHT_RESTRICT>::Zero())
+ return StringWeight<L, STRING_RIGHT_RESTRICT>::Zero();
+
+ StringWeight<L, STRING_RIGHT_RESTRICT> div;
+ StringWeightReverseIterator<L, STRING_RIGHT_RESTRICT> iter(w1);
+ for (int i = 0; !iter.Done(); iter.Next(), ++i) {
+ if (i >= w2.Size())
+ div.PushFront(iter.Value());
+ }
+ return div;
+}
+
+
+// Product of string weight and an arbitray weight.
+template <class L, class W, StringType S = STRING_LEFT>
+struct GallicWeight : public ProductWeight<StringWeight<L, S>, W> {
+ typedef GallicWeight<L, typename W::ReverseWeight, REVERSE_STRING_TYPE(S)>
+ ReverseWeight;
+
+ GallicWeight() {}
+
+ GallicWeight(StringWeight<L, S> w1, W w2)
+ : ProductWeight<StringWeight<L, S>, W>(w1, w2) {}
+
+ explicit GallicWeight(const string &s, int *nread = 0)
+ : ProductWeight<StringWeight<L, S>, W>(s, nread) {}
+
+ GallicWeight(const ProductWeight<StringWeight<L, S>, W> &w)
+ : ProductWeight<StringWeight<L, S>, W>(w) {}
+};
+
+} // namespace fst
+
+#endif // FST_LIB_STRING_WEIGHT_H__
diff --git a/src/include/fst/string.h b/src/include/fst/string.h
new file mode 100644
index 0000000..3099b87
--- /dev/null
+++ b/src/include/fst/string.h
@@ -0,0 +1,247 @@
+
+// string.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Utilities to convert strings into FSTs.
+//
+
+#ifndef FST_LIB_STRING_H_
+#define FST_LIB_STRING_H_
+
+#include <fst/compact-fst.h>
+#include <fst/mutable-fst.h>
+
+DECLARE_string(fst_field_separator);
+
+namespace fst {
+
+// Functor compiling a string in an FST
+template <class A>
+class StringCompiler {
+ public:
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+
+ enum TokenType { SYMBOL = 1, BYTE = 2, UTF8 = 3 };
+
+ StringCompiler(TokenType type, const SymbolTable *syms = 0,
+ Label unknown_label = kNoLabel,
+ bool allow_negative = false)
+ : token_type_(type), syms_(syms), unknown_label_(unknown_label),
+ allow_negative_(allow_negative) {}
+
+ // Compile string 's' into FST 'fst'.
+ template <class F>
+ bool operator()(const string &s, F *fst) {
+ vector<Label> labels;
+ if (!ConvertStringToLabels(s, &labels))
+ return false;
+ Compile(labels, fst);
+ return true;
+ }
+
+ private:
+ bool ConvertStringToLabels(const string &str, vector<Label> *labels) const {
+ labels->clear();
+ if (token_type_ == BYTE) {
+ for (size_t i = 0; i < str.size(); ++i)
+ labels->push_back(static_cast<unsigned char>(str[i]));
+ } else if (token_type_ == UTF8) {
+ return UTF8StringToLabels(str, labels);
+ } else {
+ char *c_str = new char[str.size() + 1];
+ str.copy(c_str, str.size());
+ c_str[str.size()] = 0;
+ vector<char *> vec;
+ string separator = "\n" + FLAGS_fst_field_separator;
+ SplitToVector(c_str, separator.c_str(), &vec, true);
+ for (size_t i = 0; i < vec.size(); ++i) {
+ Label label;
+ if (!ConvertSymbolToLabel(vec[i], &label))
+ return false;
+ labels->push_back(label);
+ }
+ delete[] c_str;
+ }
+ return true;
+ }
+
+ void Compile(const vector<Label> &labels, MutableFst<A> *fst) const {
+ fst->DeleteStates();
+ while (fst->NumStates() <= labels.size())
+ fst->AddState();
+ for (size_t i = 0; i < labels.size(); ++i)
+ fst->AddArc(i, Arc(labels[i], labels[i], Weight::One(), i + 1));
+ fst->SetStart(0);
+ fst->SetFinal(labels.size(), Weight::One());
+ }
+
+ template <class Unsigned>
+ void Compile(const vector<Label> &labels, CompactFst<A, StringCompactor<A>,
+ Unsigned> *fst) const {
+ fst->SetCompactElements(labels.begin(), labels.end());
+ }
+
+ bool ConvertSymbolToLabel(const char *s, Label* output) const {
+ int64 n;
+ if (syms_) {
+ n = syms_->Find(s);
+ if ((n == -1) && (unknown_label_ != kNoLabel))
+ n = unknown_label_;
+ if (n == -1 || (!allow_negative_ && n < 0)) {
+ VLOG(1) << "StringCompiler::ConvertSymbolToLabel: Symbol \"" << s
+ << "\" is not mapped to any integer label, symbol table = "
+ << syms_->Name();
+ return false;
+ }
+ } else {
+ char *p;
+ n = strtoll(s, &p, 10);
+ if (p < s + strlen(s) || (!allow_negative_ && n < 0)) {
+ VLOG(1) << "StringCompiler::ConvertSymbolToLabel: Bad label integer "
+ << "= \"" << s << "\"";
+ return false;
+ }
+ }
+ *output = n;
+ return true;
+ }
+
+ TokenType token_type_; // Token type: symbol, byte or utf8 encoded
+ const SymbolTable *syms_; // Symbol table used when token type is symbol
+ Label unknown_label_; // Label for token missing from symbol table
+ bool allow_negative_; // Negative labels allowed?
+
+ DISALLOW_COPY_AND_ASSIGN(StringCompiler);
+};
+
+// Functor to print a string FST as a string.
+template <class A>
+class StringPrinter {
+ public:
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ enum TokenType { SYMBOL = 1, BYTE = 2, UTF8 = 3 };
+
+ StringPrinter(TokenType token_type,
+ const SymbolTable *syms = 0)
+ : token_type_(token_type), syms_(syms) {}
+
+ // Convert the FST 'fst' into the string 'output'
+ bool operator()(const Fst<A> &fst, string *output) {
+ bool is_a_string = FstToLabels(fst);
+ if (!is_a_string) {
+ VLOG(1) << "StringPrinter::operator(): Fst is not a string.";
+ return false;
+ }
+
+ output->clear();
+
+ if (token_type_ == SYMBOL) {
+ stringstream sstrm;
+ for (size_t i = 0; i < labels_.size(); ++i) {
+ if (i)
+ sstrm << *(FLAGS_fst_field_separator.rbegin());
+ if (!PrintLabel(labels_[i], sstrm))
+ return false;
+ }
+ *output = sstrm.str();
+ } else if (token_type_ == BYTE) {
+ for (size_t i = 0; i < labels_.size(); ++i) {
+ output->push_back(labels_[i]);
+ }
+ } else if (token_type_ == UTF8) {
+ return LabelsToUTF8String(labels_, output);
+ } else {
+ VLOG(1) << "StringPrinter::operator(): Unknown token type: "
+ << token_type_;
+ return false;
+ }
+ return true;
+ }
+
+ private:
+ bool FstToLabels(const Fst<A> &fst) {
+ labels_.clear();
+
+ StateId s = fst.Start();
+ if (s == kNoStateId) {
+ VLOG(2) << "StringPrinter::FstToLabels: Invalid starting state for "
+ << "string fst.";
+ return false;
+ }
+
+ while (fst.Final(s) == Weight::Zero()) {
+ ArcIterator<Fst<A> > aiter(fst, s);
+ if (aiter.Done()) {
+ VLOG(2) << "StringPrinter::FstToLabels: String fst traversal does "
+ << "not reach final state.";
+ return false;
+ }
+
+ const A& arc = aiter.Value();
+ labels_.push_back(arc.olabel);
+
+ s = arc.nextstate;
+ if (s == kNoStateId) {
+ VLOG(2) << "StringPrinter::FstToLabels: Transition to invalid "
+ << "state.";
+ return false;
+ }
+
+ aiter.Next();
+ if (!aiter.Done()) {
+ VLOG(2) << "StringPrinter::FstToLabels: State with multiple "
+ << "outgoing arcs found.";
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ bool PrintLabel(Label lab, ostream& ostrm) {
+ if (syms_) {
+ string symbol = syms_->Find(lab);
+ if (symbol == "") {
+ VLOG(2) << "StringPrinter::PrintLabel: Integer " << lab << " is not "
+ << "mapped to any textual symbol, symbol table = "
+ << syms_->Name();
+ return false;
+ }
+ ostrm << symbol;
+ } else {
+ ostrm << lab;
+ }
+ return true;
+ }
+
+ TokenType token_type_; // Token type: symbol, byte or utf8 encoded
+ const SymbolTable *syms_; // Symbol table used when token type is symbol
+ vector<Label> labels_; // Input FST labels.
+
+ DISALLOW_COPY_AND_ASSIGN(StringPrinter);
+};
+
+} // namespace fst
+
+#endif // FST_LIB_STRING_H_
diff --git a/src/include/fst/symbol-table-ops.h b/src/include/fst/symbol-table-ops.h
new file mode 100644
index 0000000..e46c4c2
--- /dev/null
+++ b/src/include/fst/symbol-table-ops.h
@@ -0,0 +1,91 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: sorenj@google.com (Jeffrey Sorensen)
+
+#ifndef FST_LIB_SYMBOL_TABLE_OPS_H_
+#define FST_LIB_SYMBOL_TABLE_OPS_H_
+
+#include <vector>
+using std::vector;
+#include <string>
+#include <unordered_set>
+using std::tr1::unordered_set;
+using std::tr1::unordered_multiset;
+
+
+#include <fst/fst.h>
+#include <fst/symbol-table.h>
+
+
+namespace fst {
+
+// Returns a minimal symbol table containing only symbols referenced by the
+// passed fst. Symbols preserve their original numbering, so fst does not
+// require relabeling.
+template<class Arc>
+SymbolTable *PruneSymbolTable(const Fst<Arc> &fst, const SymbolTable &syms,
+ bool input) {
+ unordered_set<typename Arc::Label> seen;
+ seen.insert(0); // Always keep epslion
+ StateIterator<Fst<Arc> > siter(fst);
+ for (; !siter.Done(); siter.Next()) {
+ ArcIterator<Fst<Arc> > aiter(fst, siter.Value());
+ for (; !aiter.Done(); aiter.Next()) {
+ typename Arc::Label sym = (input) ? aiter.Value().ilabel :
+ aiter.Value().olabel;
+ seen.insert(sym);
+ }
+ }
+ SymbolTable *pruned = new SymbolTable(syms.Name() + "_pruned");
+ for (SymbolTableIterator stiter(syms); !stiter.Done(); stiter.Next()) {
+ typename Arc::Label label = stiter.Value();
+ if (seen.find(label) != seen.end()) {
+ pruned->AddSymbol(stiter.Symbol(), stiter.Value());
+ }
+ }
+ return pruned;
+}
+
+// Relabels a symbol table to make it a contiguous mapping.
+SymbolTable *CompactSymbolTable(const SymbolTable &syms);
+
+// Merges two SymbolTables, all symbols from left will be merged into right
+// with the same ids. Symbols in right that have conflicting ids with those
+// in left will be assigned to value assigned from the left SymbolTable.
+// The returned symbol table will never modify symbol assignments from the left
+// side, but may do so on the right. If right_relabel_output is non-NULL, it
+// will be assigned true if the symbols from the right table needed to be
+// reassigned.
+// A potential use case is to Compose two Fst's that have different symbol
+// tables. You can reconcile them in the following way:
+// Fst<Arc> a, b;
+// bool relabel;
+// SymbolTable *bnew = MergeSymbolTable(a.OutputSymbols(),
+// b.InputSymbols(), &relabel);
+// if (relabel) {
+// Relabel(b, bnew, NULL);
+// }
+// b.SetInputSymbols(bnew);
+// delete bnew;
+SymbolTable *MergeSymbolTable(const SymbolTable &left, const SymbolTable &right,
+ bool *right_relabel_output = 0);
+
+// Read the symbol table from any Fst::Read()able file, without loading the
+// corresponding Fst. Returns NULL if the Fst does not contain a symbol table
+// or the symbol table cannot be read.
+SymbolTable *FstReadSymbols(const string &filename, bool input);
+
+} // namespace fst
+#endif // FST_LIB_SYMBOL_TABLE_OPS_H_
diff --git a/src/include/fst/symbol-table.h b/src/include/fst/symbol-table.h
new file mode 100644
index 0000000..93ebe76
--- /dev/null
+++ b/src/include/fst/symbol-table.h
@@ -0,0 +1,507 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// All Rights Reserved.
+//
+// Author : Johan Schalkwyk
+//
+// \file
+// Classes to provide symbol-to-integer and integer-to-symbol mappings.
+
+#ifndef FST_LIB_SYMBOL_TABLE_H__
+#define FST_LIB_SYMBOL_TABLE_H__
+
+#include <cstring>
+#include <string>
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+
+#include <fst/compat.h>
+#include <iostream>
+#include <fstream>
+
+
+#include <map>
+
+DECLARE_bool(fst_compat_symbols);
+
+namespace fst {
+
+// WARNING: Reading via symbol table read options should
+// not be used. This is a temporary work around for
+// reading symbol ranges of previously stored symbol sets.
+struct SymbolTableReadOptions {
+ SymbolTableReadOptions() { }
+
+ SymbolTableReadOptions(vector<pair<int64, int64> > string_hash_ranges_,
+ const string& source_)
+ : string_hash_ranges(string_hash_ranges_),
+ source(source_) { }
+
+ vector<pair<int64, int64> > string_hash_ranges;
+ string source;
+};
+
+class SymbolTableImpl {
+ public:
+ SymbolTableImpl(const string &name)
+ : name_(name),
+ available_key_(0),
+ dense_key_limit_(0),
+ check_sum_finalized_(false) {}
+
+ explicit SymbolTableImpl(const SymbolTableImpl& impl)
+ : name_(impl.name_),
+ available_key_(0),
+ dense_key_limit_(0),
+ check_sum_finalized_(false) {
+ for (size_t i = 0; i < impl.symbols_.size(); ++i) {
+ AddSymbol(impl.symbols_[i], impl.Find(impl.symbols_[i]));
+ }
+ }
+
+ ~SymbolTableImpl() {
+ for (size_t i = 0; i < symbols_.size(); ++i)
+ delete[] symbols_[i];
+ }
+
+ // TODO(johans): Add flag to specify whether the symbol
+ // should be indexed as string or int or both.
+ int64 AddSymbol(const string& symbol, int64 key);
+
+ int64 AddSymbol(const string& symbol) {
+ int64 key = Find(symbol);
+ return (key == -1) ? AddSymbol(symbol, available_key_++) : key;
+ }
+
+ static SymbolTableImpl* ReadText(istream &strm,
+ const string &name,
+ bool allow_negative = false);
+
+ static SymbolTableImpl* Read(istream &strm,
+ const SymbolTableReadOptions& opts);
+
+ bool Write(ostream &strm) const;
+
+ //
+ // Return the string associated with the key. If the key is out of
+ // range (<0, >max), return an empty string.
+ string Find(int64 key) const {
+ if (key >=0 && key < dense_key_limit_)
+ return string(symbols_[key]);
+
+ map<int64, const char*>::const_iterator it =
+ key_map_.find(key);
+ if (it == key_map_.end()) {
+ return "";
+ }
+ return string(it->second);
+ }
+
+ //
+ // Return the key associated with the symbol. If the symbol
+ // does not exists, return SymbolTable::kNoSymbol.
+ int64 Find(const string& symbol) const {
+ return Find(symbol.c_str());
+ }
+
+ //
+ // Return the key associated with the symbol. If the symbol
+ // does not exists, return SymbolTable::kNoSymbol.
+ int64 Find(const char* symbol) const {
+ map<const char *, int64, StrCmp>::const_iterator it =
+ symbol_map_.find(symbol);
+ if (it == symbol_map_.end()) {
+ return -1;
+ }
+ return it->second;
+ }
+
+ int64 GetNthKey(ssize_t pos) const {
+ if ((pos < 0) || (pos >= symbols_.size())) return -1;
+ else return Find(symbols_[pos]);
+ }
+
+ const string& Name() const { return name_; }
+
+ int IncrRefCount() const {
+ return ref_count_.Incr();
+ }
+ int DecrRefCount() const {
+ return ref_count_.Decr();
+ }
+ int RefCount() const {
+ return ref_count_.count();
+ }
+
+ string CheckSum() const {
+ MutexLock check_sum_lock(&check_sum_mutex_);
+ MaybeRecomputeCheckSum();
+ return check_sum_string_;
+ }
+
+ string LabeledCheckSum() const {
+ MutexLock check_sum_lock(&check_sum_mutex_);
+ MaybeRecomputeCheckSum();
+ return labeled_check_sum_string_;
+ }
+
+ int64 AvailableKey() const {
+ return available_key_;
+ }
+
+ size_t NumSymbols() const {
+ return symbols_.size();
+ }
+
+ private:
+ // Recomputes the checksums (both of them) if we've had changes since the last
+ // computation (i.e., if check_sum_finalized_ is false).
+ void MaybeRecomputeCheckSum() const;
+
+ struct StrCmp {
+ bool operator()(const char *s1, const char *s2) const {
+ return strcmp(s1, s2) < 0;
+ }
+ };
+
+ string name_;
+ int64 available_key_;
+ int64 dense_key_limit_;
+ vector<const char *> symbols_;
+ map<int64, const char*> key_map_;
+ map<const char *, int64, StrCmp> symbol_map_;
+
+ mutable RefCounter ref_count_;
+ mutable bool check_sum_finalized_;
+ mutable CheckSummer check_sum_;
+ mutable CheckSummer labeled_check_sum_;
+ mutable string check_sum_string_;
+ mutable string labeled_check_sum_string_;
+ mutable Mutex check_sum_mutex_;
+};
+
+//
+// \class SymbolTable
+// \brief Symbol (string) to int and reverse mapping
+//
+// The SymbolTable implements the mappings of labels to strings and reverse.
+// SymbolTables are used to describe the alphabet of the input and output
+// labels for arcs in a Finite State Transducer.
+//
+// SymbolTables are reference counted and can therefore be shared across
+// multiple machines. For example a language model grammar G, with a
+// SymbolTable for the words in the language model can share this symbol
+// table with the lexical representation L o G.
+//
+class SymbolTable {
+ public:
+ static const int64 kNoSymbol = -1;
+
+ // Construct symbol table with a unique name.
+ SymbolTable(const string& name) : impl_(new SymbolTableImpl(name)) {}
+
+ // Create a reference counted copy.
+ SymbolTable(const SymbolTable& table) : impl_(table.impl_) {
+ impl_->IncrRefCount();
+ }
+
+ // Derefence implentation object. When reference count hits 0, delete
+ // implementation.
+ virtual ~SymbolTable() {
+ if (!impl_->DecrRefCount()) delete impl_;
+ }
+
+ // Read an ascii representation of the symbol table from an istream. Pass a
+ // name to give the resulting SymbolTable.
+ static SymbolTable* ReadText(istream &strm,
+ const string& name,
+ bool allow_negative = false) {
+ SymbolTableImpl* impl = SymbolTableImpl::ReadText(strm,
+ name,
+ allow_negative);
+ if (!impl)
+ return 0;
+ else
+ return new SymbolTable(impl);
+ }
+
+ // read an ascii representation of the symbol table
+ static SymbolTable* ReadText(const string& filename,
+ bool allow_negative = false) {
+ ifstream strm(filename.c_str(), ifstream::in);
+ if (!strm) {
+ LOG(ERROR) << "SymbolTable::ReadText: Can't open file " << filename;
+ return 0;
+ }
+ return ReadText(strm, filename, allow_negative);
+ }
+
+
+ // WARNING: Reading via symbol table read options should
+ // not be used. This is a temporary work around.
+ static SymbolTable* Read(istream &strm,
+ const SymbolTableReadOptions& opts) {
+ SymbolTableImpl* impl = SymbolTableImpl::Read(strm, opts);
+ if (!impl)
+ return 0;
+ else
+ return new SymbolTable(impl);
+ }
+
+ // read a binary dump of the symbol table from a stream
+ static SymbolTable* Read(istream &strm, const string& source) {
+ SymbolTableReadOptions opts;
+ opts.source = source;
+ return Read(strm, opts);
+ }
+
+ // read a binary dump of the symbol table
+ static SymbolTable* Read(const string& filename) {
+ ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
+ if (!strm) {
+ LOG(ERROR) << "SymbolTable::Read: Can't open file " << filename;
+ return 0;
+ }
+ return Read(strm, filename);
+ }
+
+ //--------------------------------------------------------
+ // Derivable Interface (final)
+ //--------------------------------------------------------
+ // create a reference counted copy
+ virtual SymbolTable* Copy() const {
+ return new SymbolTable(*this);
+ }
+
+ // Add a symbol with given key to table. A symbol table also
+ // keeps track of the last available key (highest key value in
+ // the symbol table).
+ virtual int64 AddSymbol(const string& symbol, int64 key) {
+ MutateCheck();
+ return impl_->AddSymbol(symbol, key);
+ }
+
+ // Add a symbol to the table. The associated value key is automatically
+ // assigned by the symbol table.
+ virtual int64 AddSymbol(const string& symbol) {
+ MutateCheck();
+ return impl_->AddSymbol(symbol);
+ }
+
+ // Add another symbol table to this table. All key values will be offset
+ // by the current available key (highest key value in the symbol table).
+ // Note string symbols with the same key value with still have the same
+ // key value after the symbol table has been merged, but a different
+ // value. Adding symbol tables do not result in changes in the base table.
+ virtual void AddTable(const SymbolTable& table);
+
+ // return the name of the symbol table
+ virtual const string& Name() const {
+ return impl_->Name();
+ }
+
+ // Return the label-agnostic MD5 check-sum for this table. All new symbols
+ // added to the table will result in an updated checksum.
+ // DEPRECATED.
+ virtual string CheckSum() const {
+ return impl_->CheckSum();
+ }
+
+ // Same as CheckSum(), but this returns an label-dependent version.
+ virtual string LabeledCheckSum() const {
+ return impl_->LabeledCheckSum();
+ }
+
+ virtual bool Write(ostream &strm) const {
+ return impl_->Write(strm);
+ }
+
+ bool Write(const string& filename) const {
+ ofstream strm(filename.c_str(), ofstream::out | ofstream::binary);
+ if (!strm) {
+ LOG(ERROR) << "SymbolTable::Write: Can't open file " << filename;
+ return false;
+ }
+ return Write(strm);
+ }
+
+ // Dump an ascii text representation of the symbol table via a stream
+ virtual bool WriteText(ostream &strm) const;
+
+ // Dump an ascii text representation of the symbol table
+ bool WriteText(const string& filename) const {
+ ofstream strm(filename.c_str());
+ if (!strm) {
+ LOG(ERROR) << "SymbolTable::WriteText: Can't open file " << filename;
+ return false;
+ }
+ return WriteText(strm);
+ }
+
+ // Return the string associated with the key. If the key is out of
+ // range (<0, >max), log error and return an empty string.
+ virtual string Find(int64 key) const {
+ return impl_->Find(key);
+ }
+
+ // Return the key associated with the symbol. If the symbol
+ // does not exists, log error and return SymbolTable::kNoSymbol
+ virtual int64 Find(const string& symbol) const {
+ return impl_->Find(symbol);
+ }
+
+ // Return the key associated with the symbol. If the symbol
+ // does not exists, log error and return SymbolTable::kNoSymbol
+ virtual int64 Find(const char* symbol) const {
+ return impl_->Find(symbol);
+ }
+
+ // Return the current available key (i.e highest key number+1) in
+ // the symbol table
+ virtual int64 AvailableKey(void) const {
+ return impl_->AvailableKey();
+ }
+
+ // Return the current number of symbols in table (not necessarily
+ // equal to AvailableKey())
+ virtual size_t NumSymbols(void) const {
+ return impl_->NumSymbols();
+ }
+
+ virtual int64 GetNthKey(ssize_t pos) const {
+ return impl_->GetNthKey(pos);
+ }
+
+ private:
+ explicit SymbolTable(SymbolTableImpl* impl) : impl_(impl) {}
+
+ void MutateCheck() {
+ // Copy on write
+ if (impl_->RefCount() > 1) {
+ impl_->DecrRefCount();
+ impl_ = new SymbolTableImpl(*impl_);
+ }
+ }
+
+ const SymbolTableImpl* Impl() const {
+ return impl_;
+ }
+
+ private:
+ SymbolTableImpl* impl_;
+
+ void operator=(const SymbolTable &table); // disallow
+};
+
+
+//
+// \class SymbolTableIterator
+// \brief Iterator class for symbols in a symbol table
+class SymbolTableIterator {
+ public:
+ SymbolTableIterator(const SymbolTable& table)
+ : table_(table),
+ pos_(0),
+ nsymbols_(table.NumSymbols()),
+ key_(table.GetNthKey(0)) { }
+
+ ~SymbolTableIterator() { }
+
+ // is iterator done
+ bool Done(void) {
+ return (pos_ == nsymbols_);
+ }
+
+ // return the Value() of the current symbol (int64 key)
+ int64 Value(void) {
+ return key_;
+ }
+
+ // return the string of the current symbol
+ string Symbol(void) {
+ return table_.Find(key_);
+ }
+
+ // advance iterator forward
+ void Next(void) {
+ ++pos_;
+ if (pos_ < nsymbols_) key_ = table_.GetNthKey(pos_);
+ }
+
+ // reset iterator
+ void Reset(void) {
+ pos_ = 0;
+ key_ = table_.GetNthKey(0);
+ }
+
+ private:
+ const SymbolTable& table_;
+ ssize_t pos_;
+ size_t nsymbols_;
+ int64 key_;
+};
+
+
+// Tests compatibilty between two sets of symbol tables
+inline bool CompatSymbols(const SymbolTable *syms1, const SymbolTable *syms2,
+ bool warning = true) {
+ if (!FLAGS_fst_compat_symbols) {
+ return true;
+ } else if (!syms1 && !syms2) {
+ return true;
+ } else if (syms1 && !syms2) {
+ if (warning)
+ LOG(WARNING) <<
+ "CompatSymbols: first symbol table present but second missing";
+ return false;
+ } else if (!syms1 && syms2) {
+ if (warning)
+ LOG(WARNING) <<
+ "CompatSymbols: second symbol table present but first missing";
+ return false;
+ } else if (syms1->LabeledCheckSum() != syms2->LabeledCheckSum()) {
+ if (warning)
+ LOG(WARNING) << "CompatSymbols: Symbol table check sums do not match";
+ return false;
+ } else {
+ return true;
+ }
+}
+
+
+// Relabels a symbol table as specified by the input vector of pairs
+// (old label, new label). The new symbol table only retains symbols
+// for which a relabeling is *explicitely* specified.
+// TODO(allauzen): consider adding options to allow for some form
+// of implicit identity relabeling.
+template <class Label>
+SymbolTable *RelabelSymbolTable(const SymbolTable *table,
+ const vector<pair<Label, Label> > &pairs) {
+ SymbolTable *new_table = new SymbolTable(
+ table->Name().empty() ? string() :
+ (string("relabeled_") + table->Name()));
+
+ for (size_t i = 0; i < pairs.size(); ++i)
+ new_table->AddSymbol(table->Find(pairs[i].first), pairs[i].second);
+
+ return new_table;
+}
+
+} // namespace fst
+
+#endif // FST_LIB_SYMBOL_TABLE_H__
diff --git a/src/include/fst/synchronize.h b/src/include/fst/synchronize.h
new file mode 100644
index 0000000..28d1262
--- /dev/null
+++ b/src/include/fst/synchronize.h
@@ -0,0 +1,457 @@
+// synchronize.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Synchronize an FST with bounded delay.
+
+#ifndef FST_LIB_SYNCHRONIZE_H__
+#define FST_LIB_SYNCHRONIZE_H__
+
+#include <algorithm>
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <unordered_set>
+using std::tr1::unordered_set;
+using std::tr1::unordered_multiset;
+#include <string>
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+typedef CacheOptions SynchronizeFstOptions;
+
+
+// Implementation class for SynchronizeFst
+template <class A>
+class SynchronizeFstImpl
+ : public CacheImpl<A> {
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+
+ using CacheBaseImpl< CacheState<A> >::PushArc;
+ using CacheBaseImpl< CacheState<A> >::HasArcs;
+ using CacheBaseImpl< CacheState<A> >::HasFinal;
+ using CacheBaseImpl< CacheState<A> >::HasStart;
+ using CacheBaseImpl< CacheState<A> >::SetArcs;
+ using CacheBaseImpl< CacheState<A> >::SetFinal;
+ using CacheBaseImpl< CacheState<A> >::SetStart;
+
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ typedef basic_string<Label> String;
+
+ struct Element {
+ Element() {}
+
+ Element(StateId s, const String *i, const String *o)
+ : state(s), istring(i), ostring(o) {}
+
+ StateId state; // Input state Id
+ const String *istring; // Residual input labels
+ const String *ostring; // Residual output labels
+ // Residual strings are represented by const pointers to
+ // basic_string<Label> and are stored in a hash_set. The pointed
+ // memory is owned by the hash_set string_set_.
+ };
+
+ SynchronizeFstImpl(const Fst<A> &fst, const SynchronizeFstOptions &opts)
+ : CacheImpl<A>(opts), fst_(fst.Copy()) {
+ SetType("synchronize");
+ uint64 props = fst.Properties(kFstProperties, false);
+ SetProperties(SynchronizeProperties(props), kCopyProperties);
+
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ }
+
+ SynchronizeFstImpl(const SynchronizeFstImpl &impl)
+ : CacheImpl<A>(impl),
+ fst_(impl.fst_->Copy(true)) {
+ SetType("synchronize");
+ SetProperties(impl.Properties(), kCopyProperties);
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ ~SynchronizeFstImpl() {
+ delete fst_;
+ // Extract pointers from the hash set
+ vector<const String*> strings;
+ typename StringSet::iterator it = string_set_.begin();
+ for (; it != string_set_.end(); ++it)
+ strings.push_back(*it);
+ // Free the extracted pointers
+ for (size_t i = 0; i < strings.size(); ++i)
+ delete strings[i];
+ }
+
+ StateId Start() {
+ if (!HasStart()) {
+ StateId s = fst_->Start();
+ if (s == kNoStateId)
+ return kNoStateId;
+ const String *empty = FindString(new String());
+ StateId start = FindState(Element(fst_->Start(), empty, empty));
+ SetStart(start);
+ }
+ return CacheImpl<A>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ const Element &e = elements_[s];
+ Weight w = e.state == kNoStateId ? Weight::One() : fst_->Final(e.state);
+ if ((w != Weight::Zero()) && (e.istring)->empty() && (e.ostring)->empty())
+ SetFinal(s, w);
+ else
+ SetFinal(s, Weight::Zero());
+ }
+ return CacheImpl<A>::Final(s);
+ }
+
+ size_t NumArcs(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumArcs(s);
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumOutputEpsilons(s);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) && fst_->Properties(kError, false))
+ SetProperties(kError, kError);
+ return FstImpl<Arc>::Properties(mask);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
+ if (!HasArcs(s))
+ Expand(s);
+ CacheImpl<A>::InitArcIterator(s, data);
+ }
+
+ // Returns the first character of the string obtained by
+ // concatenating s and l.
+ Label Car(const String *s, Label l = 0) const {
+ if (!s->empty())
+ return (*s)[0];
+ else
+ return l;
+ }
+
+ // Computes the residual string obtained by removing the first
+ // character in the concatenation of s and l.
+ const String *Cdr(const String *s, Label l = 0) {
+ String *r = new String();
+ for (int i = 1; i < s->size(); ++i)
+ r->push_back((*s)[i]);
+ if (l && !(s->empty())) r->push_back(l);
+ return FindString(r);
+ }
+
+ // Computes the concatenation of s and l.
+ const String *Concat(const String *s, Label l = 0) {
+ String *r = new String();
+ for (int i = 0; i < s->size(); ++i)
+ r->push_back((*s)[i]);
+ if (l) r->push_back(l);
+ return FindString(r);
+ }
+
+ // Tests if the concatenation of s and l is empty
+ bool Empty(const String *s, Label l = 0) const {
+ if (s->empty())
+ return l == 0;
+ else
+ return false;
+ }
+
+ // Finds the string pointed by s in the hash set. Transfers the
+ // pointer ownership to the hash set.
+ const String *FindString(const String *s) {
+ typename StringSet::iterator it = string_set_.find(s);
+ if (it != string_set_.end()) {
+ delete s;
+ return (*it);
+ } else {
+ string_set_.insert(s);
+ return s;
+ }
+ }
+
+ // Finds state corresponding to an element. Creates new state
+ // if element not found.
+ StateId FindState(const Element &e) {
+ typename ElementMap::iterator eit = element_map_.find(e);
+ if (eit != element_map_.end()) {
+ return (*eit).second;
+ } else {
+ StateId s = elements_.size();
+ elements_.push_back(e);
+ element_map_.insert(pair<const Element, StateId>(e, s));
+ return s;
+ }
+ }
+
+
+ // Computes the outgoing transitions from a state, creating new destination
+ // states as needed.
+ void Expand(StateId s) {
+ Element e = elements_[s];
+
+ if (e.state != kNoStateId)
+ for (ArcIterator< Fst<A> > ait(*fst_, e.state);
+ !ait.Done();
+ ait.Next()) {
+ const A &arc = ait.Value();
+ if (!Empty(e.istring, arc.ilabel) && !Empty(e.ostring, arc.olabel)) {
+ const String *istring = Cdr(e.istring, arc.ilabel);
+ const String *ostring = Cdr(e.ostring, arc.olabel);
+ StateId d = FindState(Element(arc.nextstate, istring, ostring));
+ PushArc(s, Arc(Car(e.istring, arc.ilabel),
+ Car(e.ostring, arc.olabel), arc.weight, d));
+ } else {
+ const String *istring = Concat(e.istring, arc.ilabel);
+ const String *ostring = Concat(e.ostring, arc.olabel);
+ StateId d = FindState(Element(arc.nextstate, istring, ostring));
+ PushArc(s, Arc(0 , 0, arc.weight, d));
+ }
+ }
+
+ Weight w = e.state == kNoStateId ? Weight::One() : fst_->Final(e.state);
+ if ((w != Weight::Zero()) &&
+ ((e.istring)->size() + (e.ostring)->size() > 0)) {
+ const String *istring = Cdr(e.istring);
+ const String *ostring = Cdr(e.ostring);
+ StateId d = FindState(Element(kNoStateId, istring, ostring));
+ PushArc(s, Arc(Car(e.istring), Car(e.ostring), w, d));
+ }
+ SetArcs(s);
+ }
+
+ private:
+ // Equality function for Elements, assume strings have been hashed.
+ class ElementEqual {
+ public:
+ bool operator()(const Element &x, const Element &y) const {
+ return x.state == y.state &&
+ x.istring == y.istring &&
+ x.ostring == y.ostring;
+ }
+ };
+
+ // Hash function for Elements to Fst states.
+ class ElementKey {
+ public:
+ size_t operator()(const Element &x) const {
+ size_t key = x.state;
+ key = (key << 1) ^ (x.istring)->size();
+ for (size_t i = 0; i < (x.istring)->size(); ++i)
+ key = (key << 1) ^ (*x.istring)[i];
+ key = (key << 1) ^ (x.ostring)->size();
+ for (size_t i = 0; i < (x.ostring)->size(); ++i)
+ key = (key << 1) ^ (*x.ostring)[i];
+ return key;
+ }
+ };
+
+ // Equality function for strings
+ class StringEqual {
+ public:
+ bool operator()(const String * const &x, const String * const &y) const {
+ if (x->size() != y->size()) return false;
+ for (size_t i = 0; i < x->size(); ++i)
+ if ((*x)[i] != (*y)[i]) return false;
+ return true;
+ }
+ };
+
+ // Hash function for set of strings
+ class StringKey{
+ public:
+ size_t operator()(const String * const & x) const {
+ size_t key = x->size();
+ for (size_t i = 0; i < x->size(); ++i)
+ key = (key << 1) ^ (*x)[i];
+ return key;
+ }
+ };
+
+
+ typedef unordered_map<Element, StateId, ElementKey, ElementEqual> ElementMap;
+ typedef unordered_set<const String*, StringKey, StringEqual> StringSet;
+
+ const Fst<A> *fst_;
+ vector<Element> elements_; // mapping Fst state to Elements
+ ElementMap element_map_; // mapping Elements to Fst state
+ StringSet string_set_;
+
+ void operator=(const SynchronizeFstImpl<A> &); // disallow
+};
+
+
+// Synchronizes a transducer. This version is a delayed Fst. The
+// result will be an equivalent FST that has the property that during
+// the traversal of a path, the delay is either zero or strictly
+// increasing, where the delay is the difference between the number of
+// non-epsilon output labels and input labels along the path.
+//
+// For the algorithm to terminate, the input transducer must have
+// bounded delay, i.e., the delay of every cycle must be zero.
+//
+// Complexity:
+// - A has bounded delay: exponential
+// - A does not have bounded delay: does not terminate
+//
+// References:
+// - Mehryar Mohri. Edit-Distance of Weighted Automata: General
+// Definitions and Algorithms, International Journal of Computer
+// Science, 14(6): 957-982 (2003).
+//
+// This class attaches interface to implementation and handles
+// reference counting, delegating most methods to ImplToFst.
+template <class A>
+class SynchronizeFst : public ImplToFst< SynchronizeFstImpl<A> > {
+ public:
+ friend class ArcIterator< SynchronizeFst<A> >;
+ friend class StateIterator< SynchronizeFst<A> >;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+ typedef SynchronizeFstImpl<A> Impl;
+
+ SynchronizeFst(const Fst<A> &fst)
+ : ImplToFst<Impl>(new Impl(fst, SynchronizeFstOptions())) {}
+
+ SynchronizeFst(const Fst<A> &fst, const SynchronizeFstOptions &opts)
+ : ImplToFst<Impl>(new Impl(fst, opts)) {}
+
+ // See Fst<>::Copy() for doc.
+ SynchronizeFst(const SynchronizeFst<A> &fst, bool safe = false)
+ : ImplToFst<Impl>(fst, safe) {}
+
+ // Get a copy of this SynchronizeFst. See Fst<>::Copy() for further doc.
+ virtual SynchronizeFst<A> *Copy(bool safe = false) const {
+ return new SynchronizeFst<A>(*this, safe);
+ }
+
+ virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const SynchronizeFst<A> &fst); // Disallow
+};
+
+
+// Specialization for SynchronizeFst.
+template<class A>
+class StateIterator< SynchronizeFst<A> >
+ : public CacheStateIterator< SynchronizeFst<A> > {
+ public:
+ explicit StateIterator(const SynchronizeFst<A> &fst)
+ : CacheStateIterator< SynchronizeFst<A> >(fst, fst.GetImpl()) {}
+};
+
+
+// Specialization for SynchronizeFst.
+template <class A>
+class ArcIterator< SynchronizeFst<A> >
+ : public CacheArcIterator< SynchronizeFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const SynchronizeFst<A> &fst, StateId s)
+ : CacheArcIterator< SynchronizeFst<A> >(fst.GetImpl(), s) {
+ if (!fst.GetImpl()->HasArcs(s))
+ fst.GetImpl()->Expand(s);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+
+template <class A> inline
+void SynchronizeFst<A>::InitStateIterator(StateIteratorData<A> *data) const
+{
+ data->base = new StateIterator< SynchronizeFst<A> >(*this);
+}
+
+
+
+// Synchronizes a transducer. This version writes the synchronized
+// result to a MutableFst. The result will be an equivalent FST that
+// has the property that during the traversal of a path, the delay is
+// either zero or strictly increasing, where the delay is the
+// difference between the number of non-epsilon output labels and
+// input labels along the path.
+//
+// For the algorithm to terminate, the input transducer must have
+// bounded delay, i.e., the delay of every cycle must be zero.
+//
+// Complexity:
+// - A has bounded delay: exponential
+// - A does not have bounded delay: does not terminate
+//
+// References:
+// - Mehryar Mohri. Edit-Distance of Weighted Automata: General
+// Definitions and Algorithms, International Journal of Computer
+// Science, 14(6): 957-982 (2003).
+template<class Arc>
+void Synchronize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst) {
+ SynchronizeFstOptions opts;
+ opts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = SynchronizeFst<Arc>(ifst, opts);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_SYNCHRONIZE_H__
diff --git a/src/include/fst/test-properties.h b/src/include/fst/test-properties.h
new file mode 100644
index 0000000..db1ddcc
--- /dev/null
+++ b/src/include/fst/test-properties.h
@@ -0,0 +1,246 @@
+// test-properties.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Functions to manipulate and test property bits
+
+#ifndef FST_LIB_TEST_PROPERTIES_H__
+#define FST_LIB_TEST_PROPERTIES_H__
+
+#include <unordered_set>
+using std::tr1::unordered_set;
+using std::tr1::unordered_multiset;
+
+#include <fst/dfs-visit.h>
+#include <fst/connect.h>
+
+
+DECLARE_bool(fst_verify_properties);
+
+namespace fst {
+
+// For a binary property, the bit is always returned set.
+// For a trinary (i.e. two-bit) property, both bits are
+// returned set iff either corresponding input bit is set.
+inline uint64 KnownProperties(uint64 props) {
+ return kBinaryProperties | (props & kTrinaryProperties) |
+ ((props & kPosTrinaryProperties) << 1) |
+ ((props & kNegTrinaryProperties) >> 1);
+}
+
+// Tests compatibility between two sets of properties
+inline bool CompatProperties(uint64 props1, uint64 props2) {
+ uint64 known_props1 = KnownProperties(props1);
+ uint64 known_props2 = KnownProperties(props2);
+ uint64 known_props = known_props1 & known_props2;
+ uint64 incompat_props = (props1 & known_props) ^ (props2 & known_props);
+ if (incompat_props) {
+ uint64 prop = 1;
+ for (int i = 0; i < 64; ++i, prop <<= 1)
+ if (prop & incompat_props)
+ LOG(ERROR) << "CompatProperties: mismatch: " << PropertyNames[i]
+ << ": props1 = " << (props1 & prop ? "true" : "false")
+ << ", props2 = " << (props2 & prop ? "true" : "false");
+ return false;
+ } else {
+ return true;
+ }
+}
+
+// Computes FST property values defined in properties.h. The value of
+// each property indicated in the mask will be determined and returned
+// (these will never be unknown here). In the course of determining
+// the properties specifically requested in the mask, certain other
+// properties may be determined (those with little additional expense)
+// and their values will be returned as well. The complete set of
+// known properties (whether true or false) determined by this
+// operation will be assigned to the the value pointed to by KNOWN.
+// If 'use_stored' is true, pre-computed FST properties may be used
+// when possible. This routine is seldom called directly; instead it
+// is used to implement fst.Properties(mask, true).
+template<class Arc>
+uint64 ComputeProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known,
+ bool use_stored) {
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ uint64 fst_props = fst.Properties(kFstProperties, false); // Fst-stored
+
+ // Check stored FST properties first if allowed.
+ if (use_stored) {
+ uint64 known_props = KnownProperties(fst_props);
+ // If FST contains required info, return it.
+ if ((known_props & mask) == mask) {
+ *known = known_props;
+ return fst_props;
+ }
+ }
+
+ // Compute (trinary) properties explicitly.
+
+ // Initialize with binary properties (already known).
+ uint64 comp_props = fst_props & kBinaryProperties;
+
+ // Compute these trinary properties with a DFS. We compute only those
+ // that need a DFS here, since we otherwise would like to avoid a DFS
+ // since its stack could grow large.
+ uint64 dfs_props = kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
+ kAccessible | kNotAccessible |
+ kCoAccessible | kNotCoAccessible;
+ if (mask & dfs_props) {
+ SccVisitor<Arc> scc_visitor(&comp_props);
+ DfsVisit(fst, &scc_visitor);
+ }
+
+ // Compute any remaining trinary properties via a state and arcs iterations
+ if (mask & ~(kBinaryProperties | dfs_props)) {
+ comp_props |= kAcceptor | kNoEpsilons | kNoIEpsilons | kNoOEpsilons |
+ kILabelSorted | kOLabelSorted | kUnweighted | kTopSorted | kString;
+ if (mask & (kIDeterministic | kNonIDeterministic))
+ comp_props |= kIDeterministic;
+ if (mask & (kODeterministic | kNonODeterministic))
+ comp_props |= kODeterministic;
+
+ unordered_set<Label> *ilabels = 0;
+ unordered_set<Label> *olabels = 0;
+
+ StateId nfinal = 0;
+ for (StateIterator< Fst<Arc> > siter(fst);
+ !siter.Done();
+ siter.Next()) {
+ StateId s = siter.Value();
+
+ Arc prev_arc(kNoLabel, kNoLabel, Weight::One(), 0);
+ // Create these only if we need to
+ if (mask & (kIDeterministic | kNonIDeterministic))
+ ilabels = new unordered_set<Label>;
+ if (mask & (kODeterministic | kNonODeterministic))
+ olabels = new unordered_set<Label>;
+
+ for (ArcIterator< Fst<Arc> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const Arc &arc =aiter.Value();
+
+ if (ilabels && ilabels->find(arc.ilabel) != ilabels->end()) {
+ comp_props |= kNonIDeterministic;
+ comp_props &= ~kIDeterministic;
+ }
+ if (olabels && olabels->find(arc.olabel) != olabels->end()) {
+ comp_props |= kNonODeterministic;
+ comp_props &= ~kODeterministic;
+ }
+ if (arc.ilabel != arc.olabel) {
+ comp_props |= kNotAcceptor;
+ comp_props &= ~kAcceptor;
+ }
+ if (arc.ilabel == 0 && arc.olabel == 0) {
+ comp_props |= kEpsilons;
+ comp_props &= ~kNoEpsilons;
+ }
+ if (arc.ilabel == 0) {
+ comp_props |= kIEpsilons;
+ comp_props &= ~kNoIEpsilons;
+ }
+ if (arc.olabel == 0) {
+ comp_props |= kOEpsilons;
+ comp_props &= ~kNoOEpsilons;
+ }
+ if (prev_arc.ilabel != kNoLabel && arc.ilabel < prev_arc.ilabel) {
+ comp_props |= kNotILabelSorted;
+ comp_props &= ~kILabelSorted;
+ }
+ if (prev_arc.olabel != kNoLabel && arc.olabel < prev_arc.olabel) {
+ comp_props |= kNotOLabelSorted;
+ comp_props &= ~kOLabelSorted;
+ }
+ if (arc.weight != Weight::One() && arc.weight != Weight::Zero()) {
+ comp_props |= kWeighted;
+ comp_props &= ~kUnweighted;
+ }
+ if (arc.nextstate <= s) {
+ comp_props |= kNotTopSorted;
+ comp_props &= ~kTopSorted;
+ }
+ if (arc.nextstate != s + 1) {
+ comp_props |= kNotString;
+ comp_props &= ~kString;
+ }
+ prev_arc = arc;
+ if (ilabels)
+ ilabels->insert(arc.ilabel);
+ if (olabels)
+ olabels->insert(arc.olabel);
+ }
+
+ if (nfinal > 0) { // final state not last
+ comp_props |= kNotString;
+ comp_props &= ~kString;
+ }
+
+ Weight final = fst.Final(s);
+
+ if (final != Weight::Zero()) { // final state
+ if (final != Weight::One()) {
+ comp_props |= kWeighted;
+ comp_props &= ~kUnweighted;
+ }
+ ++nfinal;
+ } else { // non-final state
+ if (fst.NumArcs(s) != 1) {
+ comp_props |= kNotString;
+ comp_props &= ~kString;
+ }
+ }
+
+ delete ilabels;
+ delete olabels;
+ }
+
+ if (fst.Start() != kNoStateId && fst.Start() != 0) {
+ comp_props |= kNotString;
+ comp_props &= ~kString;
+ }
+ }
+
+ *known = KnownProperties(comp_props);
+ return comp_props;
+}
+
+// This is a wrapper around ComputeProperties that will cause a fatal
+// error if the stored properties and the computed properties are
+// incompatible when 'FLAGS_fst_verify_properties' is true. This
+// routine is seldom called directly; instead it is used to implement
+// fst.Properties(mask, true).
+template<class Arc>
+uint64 TestProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known) {
+ if (FLAGS_fst_verify_properties) {
+ uint64 stored_props = fst.Properties(kFstProperties, false);
+ uint64 computed_props = ComputeProperties(fst, mask, known, false);
+ if (!CompatProperties(stored_props, computed_props))
+ LOG(FATAL) << "TestProperties: stored Fst properties incorrect"
+ << " (stored: props1, computed: props2)";
+ return computed_props;
+ } else {
+ return ComputeProperties(fst, mask, known, true);
+ }
+}
+
+} // namespace fst
+
+#endif // FST_LIB_TEST_PROPERTIES_H__
diff --git a/src/include/fst/topsort.h b/src/include/fst/topsort.h
new file mode 100644
index 0000000..53735e5
--- /dev/null
+++ b/src/include/fst/topsort.h
@@ -0,0 +1,112 @@
+// topsort.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Topological sort of FSTs
+
+#ifndef FST_LIB_TOPSORT_H__
+#define FST_LIB_TOPSORT_H__
+
+#include <algorithm>
+#include <vector>
+using std::vector;
+
+
+#include <fst/dfs-visit.h>
+#include <fst/fst.h>
+#include <fst/statesort.h>
+
+
+namespace fst {
+
+// DFS visitor class to return topological ordering.
+template <class A>
+class TopOrderVisitor {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+
+ // If acyclic, ORDER[i] gives the topological position of state Id i;
+ // otherwise unchanged. ACYCLIC will be true iff the FST has
+ // no cycles.
+ TopOrderVisitor(vector<StateId> *order, bool *acyclic)
+ : order_(order), acyclic_(acyclic) {}
+
+ void InitVisit(const Fst<A> &fst) {
+ finish_ = new vector<StateId>;
+ *acyclic_ = true;
+ }
+
+ bool InitState(StateId s, StateId r) { return true; }
+
+ bool TreeArc(StateId s, const A &arc) { return true; }
+
+ bool BackArc(StateId s, const A &arc) { return (*acyclic_ = false); }
+
+ bool ForwardOrCrossArc(StateId s, const A &arc) { return true; }
+
+ void FinishState(StateId s, StateId p, const A *) { finish_->push_back(s); }
+
+ void FinishVisit() {
+ if (*acyclic_) {
+ order_->clear();
+ for (StateId s = 0; s < finish_->size(); ++s)
+ order_->push_back(kNoStateId);
+ for (StateId s = 0; s < finish_->size(); ++s)
+ (*order_)[(*finish_)[finish_->size() - s - 1]] = s;
+ }
+ delete finish_;
+ }
+
+ private:
+ vector<StateId> *order_;
+ bool *acyclic_;
+ vector<StateId> *finish_; // states in finishing-time order
+};
+
+
+// Topologically sorts its input if acyclic, modifying it. Otherwise,
+// the input is unchanged. When sorted, all transitions are from
+// lower to higher state IDs.
+//
+// Complexity:
+// - Time: O(V + E)
+// - Space: O(V + E)
+// where V = # of states and E = # of arcs.
+template <class Arc>
+bool TopSort(MutableFst<Arc> *fst) {
+ typedef typename Arc::StateId StateId;
+
+ vector<StateId> order;
+ bool acyclic;
+
+ TopOrderVisitor<Arc> top_order_visitor(&order, &acyclic);
+ DfsVisit(*fst, &top_order_visitor);
+
+ if (acyclic) {
+ StateSort(fst, order);
+ fst->SetProperties(kAcyclic | kInitialAcyclic | kTopSorted,
+ kAcyclic | kInitialAcyclic | kTopSorted);
+ } else {
+ fst->SetProperties(kCyclic | kNotTopSorted, kCyclic | kNotTopSorted);
+ }
+ return acyclic;
+}
+
+} // namespace fst
+
+#endif // FST_LIB_TOPSORT_H__
diff --git a/src/include/fst/tuple-weight.h b/src/include/fst/tuple-weight.h
new file mode 100644
index 0000000..184026c
--- /dev/null
+++ b/src/include/fst/tuple-weight.h
@@ -0,0 +1,332 @@
+// tuple-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google (Cyril Allauzen)
+//
+// \file
+// Tuple weight set operation definitions.
+
+#ifndef FST_LIB_TUPLE_WEIGHT_H__
+#define FST_LIB_TUPLE_WEIGHT_H__
+
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/weight.h>
+
+
+DECLARE_string(fst_weight_parentheses);
+DECLARE_string(fst_weight_separator);
+
+namespace fst {
+
+template<class W, unsigned int n> class TupleWeight;
+template <class W, unsigned int n>
+istream &operator>>(istream &strm, TupleWeight<W, n> &w);
+
+// n-tuple weight, element of the n-th catersian power of W
+template <class W, unsigned int n>
+class TupleWeight {
+ public:
+ typedef TupleWeight<typename W::ReverseWeight, n> ReverseWeight;
+
+ TupleWeight() {}
+
+ TupleWeight(const TupleWeight &w) {
+ for (size_t i = 0; i < n; ++i)
+ values_[i] = w.values_[i];
+ }
+
+ template <class Iterator>
+ TupleWeight(Iterator begin, Iterator end) {
+ for (Iterator iter = begin; iter != end; ++iter)
+ values_[iter - begin] = *iter;
+ }
+
+ TupleWeight(const W &w) {
+ for (size_t i = 0; i < n; ++i)
+ values_[i] = w;
+ }
+
+ static const TupleWeight<W, n> &Zero() {
+ static const TupleWeight<W, n> zero(W::Zero());
+ return zero;
+ }
+
+ static const TupleWeight<W, n> &One() {
+ static const TupleWeight<W, n> one(W::One());
+ return one;
+ }
+
+ static const TupleWeight<W, n> &NoWeight() {
+ static const TupleWeight<W, n> no_weight(W::NoWeight());
+ return no_weight;
+ }
+
+ static unsigned int Length() {
+ return n;
+ }
+
+ istream &Read(istream &strm) {
+ for (size_t i = 0; i < n; ++i)
+ values_[i].Read(strm);
+ return strm;
+ }
+
+ ostream &Write(ostream &strm) const {
+ for (size_t i = 0; i < n; ++i)
+ values_[i].Write(strm);
+ return strm;
+ }
+
+ TupleWeight<W, n> &operator=(const TupleWeight<W, n> &w) {
+ for (size_t i = 0; i < n; ++i)
+ values_[i] = w.values_[i];
+ return *this;
+ }
+
+ bool Member() const {
+ bool member = true;
+ for (size_t i = 0; i < n; ++i)
+ member = member && values_[i].Member();
+ return member;
+ }
+
+ size_t Hash() const {
+ uint64 hash = 0;
+ for (size_t i = 0; i < n; ++i)
+ hash = 5 * hash + values_[i].Hash();
+ return size_t(hash);
+ }
+
+ TupleWeight<W, n> Quantize(float delta = kDelta) const {
+ TupleWeight<W, n> w;
+ for (size_t i = 0; i < n; ++i)
+ w.values_[i] = values_[i].Quantize(delta);
+ return w;
+ }
+
+ ReverseWeight Reverse() const {
+ TupleWeight<W, n> w;
+ for (size_t i = 0; i < n; ++i)
+ w.values_[i] = values_[i].Reverse();
+ return w;
+ }
+
+ const W& Value(size_t i) const { return values_[i]; }
+
+ void SetValue(size_t i, const W &w) { values_[i] = w; }
+
+ protected:
+ // Reads TupleWeight when there are no parentheses around tuple terms
+ inline static istream &ReadNoParen(istream &strm,
+ TupleWeight<W, n> &w,
+ char separator) {
+ int c;
+ do {
+ c = strm.get();
+ } while (isspace(c));
+
+ for (size_t i = 0; i < n - 1; ++i) {
+ string s;
+ if (i)
+ c = strm.get();
+ while (c != separator) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s += c;
+ c = strm.get();
+ }
+ // read (i+1)-th element
+ istringstream sstrm(s);
+ W r = W::Zero();
+ sstrm >> r;
+ w.SetValue(i, r);
+ }
+
+ // read n-th element
+ W r = W::Zero();
+ strm >> r;
+ w.SetValue(n - 1, r);
+
+ return strm;
+ }
+
+ // Reads TupleWeight when there are parentheses around tuple terms
+ inline static istream &ReadWithParen(istream &strm,
+ TupleWeight<W, n> &w,
+ char separator,
+ char open_paren,
+ char close_paren) {
+ int c;
+ do {
+ c = strm.get();
+ } while (isspace(c));
+
+ if (c != open_paren) {
+ FSTERROR() << " is fst_weight_parentheses flag set correcty? ";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+
+ for (size_t i = 0; i < n - 1; ++i) {
+ // read (i+1)-th element
+ stack<int> parens;
+ string s;
+ c = strm.get();
+ while (c != separator || !parens.empty()) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s += c;
+ // if parens encountered before separator, they must be matched
+ if (c == open_paren) {
+ parens.push(1);
+ } else if (c == close_paren) {
+ // Fail for mismatched parens
+ if (parens.empty()) {
+ strm.clear(std::ios::failbit);
+ return strm;
+ }
+ parens.pop();
+ }
+ c = strm.get();
+ }
+ istringstream sstrm(s);
+ W r = W::Zero();
+ sstrm >> r;
+ w.SetValue(i, r);
+ }
+
+ // read n-th element
+ string s;
+ c = strm.get();
+ while (c != EOF) {
+ s += c;
+ c = strm.get();
+ }
+ if (s.empty() || *s.rbegin() != close_paren) {
+ FSTERROR() << " is fst_weight_parentheses flag set correcty? ";
+ strm.clear(std::ios::failbit);
+ return strm;
+ }
+ s.erase(s.size() - 1, 1);
+ istringstream sstrm(s);
+ W r = W::Zero();
+ sstrm >> r;
+ w.SetValue(n - 1, r);
+
+ return strm;
+ }
+
+
+ private:
+ W values_[n];
+
+ friend istream &operator>><W, n>(istream&, TupleWeight<W, n>&);
+};
+
+template <class W, unsigned int n>
+inline bool operator==(const TupleWeight<W, n> &w1,
+ const TupleWeight<W, n> &w2) {
+ bool equal = true;
+ for (size_t i = 0; i < n; ++i)
+ equal = equal && (w1.Value(i) == w2.Value(i));
+ return equal;
+}
+
+template <class W, unsigned int n>
+inline bool operator!=(const TupleWeight<W, n> &w1,
+ const TupleWeight<W, n> &w2) {
+ bool not_equal = false;
+ for (size_t i = 0; (i < n) && !not_equal; ++i)
+ not_equal = not_equal || (w1.Value(i) != w2.Value(i));
+ return not_equal;
+}
+
+template <class W, unsigned int n>
+inline bool ApproxEqual(const TupleWeight<W, n> &w1,
+ const TupleWeight<W, n> &w2,
+ float delta = kDelta) {
+ bool approx_equal = true;
+ for (size_t i = 0; i < n; ++i)
+ approx_equal = approx_equal &&
+ ApproxEqual(w1.Value(i), w2.Value(i), delta);
+ return approx_equal;
+}
+
+template <class W, unsigned int n>
+inline ostream &operator<<(ostream &strm, const TupleWeight<W, n> &w) {
+ if(FLAGS_fst_weight_separator.size() != 1) {
+ FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ char separator = FLAGS_fst_weight_separator[0];
+ bool write_parens = false;
+ if (!FLAGS_fst_weight_parentheses.empty()) {
+ if (FLAGS_fst_weight_parentheses.size() != 2) {
+ FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ write_parens = true;
+ }
+
+ if (write_parens)
+ strm << FLAGS_fst_weight_parentheses[0];
+ for (size_t i = 0; i < n; ++i) {
+ if(i)
+ strm << separator;
+ strm << w.Value(i);
+ }
+ if (write_parens)
+ strm << FLAGS_fst_weight_parentheses[1];
+
+ return strm;
+}
+
+template <class W, unsigned int n>
+inline istream &operator>>(istream &strm, TupleWeight<W, n> &w) {
+ if(FLAGS_fst_weight_separator.size() != 1) {
+ FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ char separator = FLAGS_fst_weight_separator[0];
+
+ if (!FLAGS_fst_weight_parentheses.empty()) {
+ if (FLAGS_fst_weight_parentheses.size() != 2) {
+ FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ return TupleWeight<W, n>::ReadWithParen(
+ strm, w, separator, FLAGS_fst_weight_parentheses[0],
+ FLAGS_fst_weight_parentheses[1]);
+ } else {
+ return TupleWeight<W, n>::ReadNoParen(strm, w, separator);
+ }
+}
+
+
+
+} // namespace fst
+
+#endif // FST_LIB_TUPLE_WEIGHT_H__
diff --git a/src/include/fst/types.h b/src/include/fst/types.h
new file mode 100644
index 0000000..8c4367a
--- /dev/null
+++ b/src/include/fst/types.h
@@ -0,0 +1,38 @@
+// types.h
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Various type definitions (mostly for Google compatibility).
+
+#include <cstdlib> // for ssize_t
+#include <stdint.h> // *int*_t
+
+#include <fst/compat.h> // for DISALLOW_COPY_AND_ASSIGN
+
+#ifndef FST_LIB_TYPES_H__
+#define FST_LIB_TYPES_H__
+
+typedef int8_t int8;
+typedef int16_t int16;
+typedef int32_t int32;
+typedef int64_t int64;
+
+typedef uint8_t uint8;
+typedef uint16_t uint16;
+typedef uint32_t uint32;
+typedef uint64_t uint64;
+
+#endif // FST_LIB_TYPES_H__
diff --git a/src/include/fst/union-find.h b/src/include/fst/union-find.h
new file mode 100644
index 0000000..c8633e0
--- /dev/null
+++ b/src/include/fst/union-find.h
@@ -0,0 +1,110 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: wojciech@google.com (Wojciech Skut)
+//
+// \file Union-Find algorithm for dense sets of non-negative
+// integers. Implemented using disjoint tree forests with rank
+// heuristics and path compression.
+
+#ifndef __fst_union_find_inl_h__
+#define __fst_union_find_inl_h__
+
+#include <stack>
+#include <vector>
+using std::vector;
+#include <fst/types.h>
+
+namespace fst {
+
+// Union-Find algorithm for dense sets of non-negative integers
+// (exact type: T).
+template <class T>
+class UnionFind {
+ public:
+ // Ctor: creates a disjoint set forest for the range [0;max).
+ // 'fail' is a value indicating that an element hasn't been
+ // initialized using MakeSet(...). The upper bound of the range
+ // can be reset (increased) using MakeSet(...).
+ UnionFind(T max, T fail)
+ : parent_(max, fail), rank_(max), fail_(fail) { }
+
+ // Finds the representative of the set 'item' belongs to.
+ // Performs path compression if needed.
+ T FindSet(T item) {
+ if (item >= parent_.size()
+ || item == fail_
+ || parent_[item] == fail_) return fail_;
+
+ T *p = &parent_[item];
+ for (; *p != item; item = *p, p = &parent_[item]) {
+ exec_stack_.push(p);
+ }
+ for (; ! exec_stack_.empty(); exec_stack_.pop()) {
+ *exec_stack_.top() = *p;
+ }
+ return *p;
+ }
+
+ // Creates the (destructive) union of the sets x and y belong to.
+ void Union(T x, T y) {
+ Link(FindSet(x), FindSet(y));
+ }
+
+ // Initialization of an element: creates a singleton set containing
+ // 'item'. The range [0;max) is reset if item >= max.
+ T MakeSet(T item) {
+ if (item >= parent_.size()) {
+ // New value in parent_ should be initialized to fail_
+ size_t nitem = item > 0 ? 2 * item : 2;
+ parent_.resize(nitem, fail_);
+ rank_.resize(nitem);
+ }
+ parent_[item] = item;
+ return item;
+ }
+
+ // Initialization of all elements starting from 0 to max - 1 to distinct sets
+ void MakeAllSet(T max) {
+ parent_.resize(max);
+ for (T item = 0; item < max; ++item) {
+ parent_[item] = item;
+ }
+ }
+
+ private:
+ vector<T> parent_; // Parent nodes.
+ vector<int> rank_; // Rank of an element = min. depth in tree.
+ T fail_; // Value indicating lookup failure.
+ stack<T*> exec_stack_; // Used for path compression.
+
+ // Links trees rooted in 'x' and 'y'.
+ void Link(T x, T y) {
+ if (x == y) return;
+
+ if (rank_[x] > rank_[y]) {
+ parent_[y] = x;
+ } else {
+ parent_[x] = y;
+ if (rank_[x] == rank_[y]) {
+ ++rank_[y];
+ }
+ }
+ }
+ DISALLOW_COPY_AND_ASSIGN(UnionFind);
+};
+
+} // namespace fst
+
+#endif // __fst_union_find_inl_h__
diff --git a/src/include/fst/union.h b/src/include/fst/union.h
new file mode 100644
index 0000000..a2f97fb
--- /dev/null
+++ b/src/include/fst/union.h
@@ -0,0 +1,185 @@
+// union.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Functions and classes to compute the union of two FSTs.
+
+#ifndef FST_LIB_UNION_H__
+#define FST_LIB_UNION_H__
+
+#include <vector>
+using std::vector;
+#include <algorithm>
+
+#include <fst/mutable-fst.h>
+#include <fst/rational.h>
+
+
+namespace fst {
+
+// Computes the union (sum) of two FSTs. This version writes the
+// union to an output MurableFst. If A transduces string x to y with
+// weight a and B transduces string w to v with weight b, then their
+// union transduces x to y with weight a and w to v with weight b.
+//
+// Complexity:
+// - Time: (V2 + E2)
+// - Space: O(V2 + E2)
+// where Vi = # of states and Ei = # of arcs of the ith FST.
+template <class Arc>
+void Union(MutableFst<Arc> *fst1, const Fst<Arc> &fst2) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ // TODO(riley): restore when voice actions issues fixed
+ // Check that the symbol table are compatible
+ if (!CompatSymbols(fst1->InputSymbols(), fst2.InputSymbols()) ||
+ !CompatSymbols(fst1->OutputSymbols(), fst2.OutputSymbols())) {
+ LOG(ERROR) << "Union: input/output symbol tables of 1st argument "
+ << "do not match input/output symbol tables of 2nd argument";
+ // fst1->SetProperties(kError, kError);
+ // return;
+ }
+
+ StateId numstates1 = fst1->NumStates();
+ bool initial_acyclic1 = fst1->Properties(kInitialAcyclic, true);
+ uint64 props1 = fst1->Properties(kFstProperties, false);
+ uint64 props2 = fst2.Properties(kFstProperties, false);
+
+ StateId start2 = fst2.Start();
+ if (start2 == kNoStateId) {
+ if (props2 & kError) fst1->SetProperties(kError, kError);
+ return;
+ }
+
+ if (fst2.Properties(kExpanded, false)) {
+ fst1->ReserveStates(
+ numstates1 + CountStates(fst2) + (initial_acyclic1 ? 0 : 1));
+ }
+
+ for (StateIterator< Fst<Arc> > siter(fst2);
+ !siter.Done();
+ siter.Next()) {
+ StateId s1 = fst1->AddState();
+ StateId s2 = siter.Value();
+ fst1->SetFinal(s1, fst2.Final(s2));
+ fst1->ReserveArcs(s1, fst2.NumArcs(s2));
+ for (ArcIterator< Fst<Arc> > aiter(fst2, s2);
+ !aiter.Done();
+ aiter.Next()) {
+ Arc arc = aiter.Value();
+ arc.nextstate += numstates1;
+ fst1->AddArc(s1, arc);
+ }
+ }
+ StateId start1 = fst1->Start();
+ if (start1 == kNoStateId) {
+ fst1->SetStart(start2);
+ fst1->SetProperties(props2, kCopyProperties);
+ return;
+ }
+
+ if (initial_acyclic1) {
+ fst1->AddArc(start1, Arc(0, 0, Weight::One(), start2 + numstates1));
+ } else {
+ StateId nstart1 = fst1->AddState();
+ fst1->SetStart(nstart1);
+ fst1->AddArc(nstart1, Arc(0, 0, Weight::One(), start1));
+ fst1->AddArc(nstart1, Arc(0, 0, Weight::One(), start2 + numstates1));
+ }
+ fst1->SetProperties(UnionProperties(props1, props2), kFstProperties);
+}
+
+
+// Computes the union of two FSTs; this version modifies its
+// RationalFst argument.
+template<class Arc>
+void Union(RationalFst<Arc> *fst1, const Fst<Arc> &fst2) {
+ fst1->GetImpl()->AddUnion(fst2);
+}
+
+
+typedef RationalFstOptions UnionFstOptions;
+
+
+// Computes the union (sum) of two FSTs. This version is a delayed
+// Fst. If A transduces string x to y with weight a and B transduces
+// string w to v with weight b, then their union transduces x to y
+// with weight a and w to v with weight b.
+//
+// Complexity:
+// - Time: O(v1 + e1 + v2 + e2)
+// - Sapce: O(v1 + v2)
+// where vi = # of states visited and ei = # of arcs visited of the
+// ith FST. Constant time and space to visit an input state or arc
+// is assumed and exclusive of caching.
+template <class A>
+class UnionFst : public RationalFst<A> {
+ public:
+ using ImplToFst< RationalFstImpl<A> >::GetImpl;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ UnionFst(const Fst<A> &fst1, const Fst<A> &fst2) {
+ GetImpl()->InitUnion(fst1, fst2);
+ }
+
+ UnionFst(const Fst<A> &fst1, const Fst<A> &fst2, const UnionFstOptions &opts)
+ : RationalFst<A>(opts) {
+ GetImpl()->InitUnion(fst1, fst2);
+ }
+
+ // See Fst<>::Copy() for doc.
+ UnionFst(const UnionFst<A> &fst, bool safe = false)
+ : RationalFst<A>(fst, safe) {}
+
+ // Get a copy of this UnionFst. See Fst<>::Copy() for further doc.
+ virtual UnionFst<A> *Copy(bool safe = false) const {
+ return new UnionFst<A>(*this, safe);
+ }
+};
+
+
+// Specialization for UnionFst.
+template <class A>
+class StateIterator< UnionFst<A> > : public StateIterator< RationalFst<A> > {
+ public:
+ explicit StateIterator(const UnionFst<A> &fst)
+ : StateIterator< RationalFst<A> >(fst) {}
+};
+
+
+// Specialization for UnionFst.
+template <class A>
+class ArcIterator< UnionFst<A> > : public ArcIterator< RationalFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const UnionFst<A> &fst, StateId s)
+ : ArcIterator< RationalFst<A> >(fst, s) {}
+};
+
+
+// Useful alias when using StdArc.
+typedef UnionFst<StdArc> StdUnionFst;
+
+} // namespace fst
+
+#endif // FST_LIB_UNION_H__
diff --git a/src/include/fst/util.h b/src/include/fst/util.h
new file mode 100644
index 0000000..87231e1
--- /dev/null
+++ b/src/include/fst/util.h
@@ -0,0 +1,409 @@
+// util.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// FST utility inline definitions.
+
+#ifndef FST_LIB_UTIL_H__
+#define FST_LIB_UTIL_H__
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <unordered_set>
+using std::tr1::unordered_set;
+using std::tr1::unordered_multiset;
+#include <list>
+#include <map>
+#include <set>
+#include <sstream>
+#include <string>
+#include <vector>
+using std::vector;
+
+
+#include <fst/compat.h>
+#include <fst/types.h>
+
+#include <iostream>
+#include <fstream>
+
+//
+// UTILITY FOR ERROR HANDLING
+//
+
+DECLARE_bool(fst_error_fatal);
+
+#define FSTERROR() (FLAGS_fst_error_fatal ? LOG(FATAL) : LOG(ERROR))
+
+namespace fst {
+
+//
+// UTILITIES FOR TYPE I/O
+//
+
+// Read some types from an input stream.
+
+// Generic case.
+template <typename T>
+inline istream &ReadType(istream &strm, T *t) {
+ return t->Read(strm);
+}
+
+// Fixed size, contiguous memory read.
+#define READ_POD_TYPE(T) \
+inline istream &ReadType(istream &strm, T *t) { \
+ return strm.read(reinterpret_cast<char *>(t), sizeof(T)); \
+}
+
+READ_POD_TYPE(bool);
+READ_POD_TYPE(char);
+READ_POD_TYPE(signed char);
+READ_POD_TYPE(unsigned char);
+READ_POD_TYPE(short);
+READ_POD_TYPE(unsigned short);
+READ_POD_TYPE(int);
+READ_POD_TYPE(unsigned int);
+READ_POD_TYPE(long);
+READ_POD_TYPE(unsigned long);
+READ_POD_TYPE(long long);
+READ_POD_TYPE(unsigned long long);
+READ_POD_TYPE(float);
+READ_POD_TYPE(double);
+
+// String case.
+inline istream &ReadType(istream &strm, string *s) {
+ s->clear();
+ int32 ns = 0;
+ strm.read(reinterpret_cast<char *>(&ns), sizeof(ns));
+ for (int i = 0; i < ns; ++i) {
+ char c;
+ strm.read(&c, 1);
+ *s += c;
+ }
+ return strm;
+}
+
+// Pair case.
+template <typename S, typename T>
+inline istream &ReadType(istream &strm, pair<S, T> *p) {
+ ReadType(strm, &p->first);
+ ReadType(strm, &p->second);
+ return strm;
+}
+
+template <typename S, typename T>
+inline istream &ReadType(istream &strm, pair<const S, T> *p) {
+ ReadType(strm, const_cast<S *>(&p->first));
+ ReadType(strm, &p->second);
+ return strm;
+}
+
+// General case - no-op.
+template <typename C>
+void StlReserve(C *c, int64 n) {}
+
+// Specialization for vectors.
+template <typename S, typename T>
+void StlReserve(vector<S, T> *c, int64 n) {
+ c->reserve(n);
+}
+
+// STL sequence container.
+#define READ_STL_SEQ_TYPE(C) \
+template <typename S, typename T> \
+inline istream &ReadType(istream &strm, C<S, T> *c) { \
+ c->clear(); \
+ int64 n = 0; \
+ strm.read(reinterpret_cast<char *>(&n), sizeof(n)); \
+ StlReserve(c, n); \
+ for (ssize_t i = 0; i < n; ++i) { \
+ typename C<S, T>::value_type value; \
+ ReadType(strm, &value); \
+ c->insert(c->end(), value); \
+ } \
+ return strm; \
+}
+
+READ_STL_SEQ_TYPE(vector);
+READ_STL_SEQ_TYPE(list);
+
+// STL associative container.
+#define READ_STL_ASSOC_TYPE(C) \
+template <typename S, typename T, typename U> \
+inline istream &ReadType(istream &strm, C<S, T, U> *c) { \
+ c->clear(); \
+ int64 n = 0; \
+ strm.read(reinterpret_cast<char *>(&n), sizeof(n)); \
+ for (ssize_t i = 0; i < n; ++i) { \
+ typename C<S, T, U>::value_type value; \
+ ReadType(strm, &value); \
+ c->insert(value); \
+ } \
+ return strm; \
+}
+
+READ_STL_ASSOC_TYPE(set);
+READ_STL_ASSOC_TYPE(unordered_set);
+READ_STL_ASSOC_TYPE(map);
+READ_STL_ASSOC_TYPE(unordered_map);
+
+// Write some types to an output stream.
+
+// Generic case.
+template <typename T>
+inline ostream &WriteType(ostream &strm, const T t) {
+ t.Write(strm);
+ return strm;
+}
+
+// Fixed size, contiguous memory write.
+#define WRITE_POD_TYPE(T) \
+inline ostream &WriteType(ostream &strm, const T t) { \
+ return strm.write(reinterpret_cast<const char *>(&t), sizeof(T)); \
+}
+
+WRITE_POD_TYPE(bool);
+WRITE_POD_TYPE(char);
+WRITE_POD_TYPE(signed char);
+WRITE_POD_TYPE(unsigned char);
+WRITE_POD_TYPE(short);
+WRITE_POD_TYPE(unsigned short);
+WRITE_POD_TYPE(int);
+WRITE_POD_TYPE(unsigned int);
+WRITE_POD_TYPE(long);
+WRITE_POD_TYPE(unsigned long);
+WRITE_POD_TYPE(long long);
+WRITE_POD_TYPE(unsigned long long);
+WRITE_POD_TYPE(float);
+WRITE_POD_TYPE(double);
+
+// String case.
+inline ostream &WriteType(ostream &strm, const string &s) {
+ int32 ns = s.size();
+ strm.write(reinterpret_cast<const char *>(&ns), sizeof(ns));
+ return strm.write(s.data(), ns);
+}
+
+// Pair case.
+template <typename S, typename T>
+inline ostream &WriteType(ostream &strm, const pair<S, T> &p) {
+ WriteType(strm, p.first);
+ WriteType(strm, p.second);
+ return strm;
+}
+
+// STL sequence container.
+#define WRITE_STL_SEQ_TYPE(C) \
+template <typename S, typename T> \
+inline ostream &WriteType(ostream &strm, const C<S, T> &c) { \
+ int64 n = c.size(); \
+ strm.write(reinterpret_cast<char *>(&n), sizeof(n)); \
+ for (typename C<S, T>::const_iterator it = c.begin(); \
+ it != c.end(); ++it) \
+ WriteType(strm, *it); \
+ return strm; \
+}
+
+WRITE_STL_SEQ_TYPE(vector);
+WRITE_STL_SEQ_TYPE(list);
+
+// STL associative container.
+#define WRITE_STL_ASSOC_TYPE(C) \
+template <typename S, typename T, typename U> \
+inline ostream &WriteType(ostream &strm, const C<S, T, U> &c) { \
+ int64 n = c.size(); \
+ strm.write(reinterpret_cast<char *>(&n), sizeof(n)); \
+ for (typename C<S, T, U>::const_iterator it = c.begin(); \
+ it != c.end(); ++it) \
+ WriteType(strm, *it); \
+ return strm; \
+}
+
+WRITE_STL_ASSOC_TYPE(set);
+WRITE_STL_ASSOC_TYPE(unordered_set);
+WRITE_STL_ASSOC_TYPE(map);
+WRITE_STL_ASSOC_TYPE(unordered_map);
+
+// Utilities for converting between int64 or Weight and string.
+
+int64 StrToInt64(const string &s, const string &src, size_t nline,
+ bool allow_negative, bool *error = 0);
+
+template <typename Weight>
+Weight StrToWeight(const string &s, const string &src, size_t nline) {
+ Weight w;
+ istringstream strm(s);
+ strm >> w;
+ if (!strm) {
+ FSTERROR() << "StrToWeight: Bad weight = \"" << s
+ << "\", source = " << src << ", line = " << nline;
+ return Weight::NoWeight();
+ }
+ return w;
+}
+
+void Int64ToStr(int64 n, string *s);
+
+template <typename Weight>
+void WeightToStr(Weight w, string *s) {
+ ostringstream strm;
+ strm.precision(9);
+ strm << w;
+ *s += strm.str();
+}
+
+// Utilities for reading/writing label pairs
+
+// Returns true on success
+template <typename Label>
+bool ReadLabelPairs(const string& filename,
+ vector<pair<Label, Label> >* pairs,
+ bool allow_negative = false) {
+ ifstream strm(filename.c_str());
+
+ if (!strm) {
+ LOG(ERROR) << "ReadLabelPairs: Can't open file: " << filename;
+ return false;
+ }
+
+ const int kLineLen = 8096;
+ char line[kLineLen];
+ size_t nline = 0;
+
+ pairs->clear();
+ while (strm.getline(line, kLineLen)) {
+ ++nline;
+ vector<char *> col;
+ SplitToVector(line, "\n\t ", &col, true);
+ if (col.size() == 0 || col[0][0] == '\0') // empty line
+ continue;
+ if (col.size() != 2) {
+ LOG(ERROR) << "ReadLabelPairs: Bad number of columns, "
+ << "file = " << filename << ", line = " << nline;
+ return false;
+ }
+
+ bool err;
+ Label frmlabel = StrToInt64(col[0], filename, nline, allow_negative, &err);
+ if (err) return false;
+ Label tolabel = StrToInt64(col[1], filename, nline, allow_negative, &err);
+ if (err) return false;
+ pairs->push_back(make_pair(frmlabel, tolabel));
+ }
+ return true;
+}
+
+// Returns true on success
+template <typename Label>
+bool WriteLabelPairs(const string& filename,
+ const vector<pair<Label, Label> >& pairs) {
+ ostream *strm = &std::cout;
+ if (!filename.empty()) {
+ strm = new ofstream(filename.c_str());
+ if (!*strm) {
+ LOG(ERROR) << "WriteLabelPairs: Can't open file: " << filename;
+ return false;
+ }
+ }
+
+ for (ssize_t n = 0; n < pairs.size(); ++n)
+ *strm << pairs[n].first << "\t" << pairs[n].second << "\n";
+
+ if (!*strm) {
+ LOG(ERROR) << "WriteLabelPairs: Write failed: "
+ << (filename.empty() ? "standard output" : filename);
+ return false;
+ }
+ if (strm != &std::cout)
+ delete strm;
+ return true;
+}
+
+// Utilities for converting a type name to a legal C symbol.
+
+void ConvertToLegalCSymbol(string *s);
+
+
+//
+// UTILITIES FOR STREAM I/O
+//
+
+bool AlignInput(istream &strm, int align);
+bool AlignOutput(ostream &strm, int align);
+
+//
+// UTILITIES FOR PROTOCOL BUFFER I/O
+//
+
+
+// An associative container for which testing membership is
+// faster than an STL set if members are restricted to an interval
+// that excludes most non-members. A 'Key' must have ==, !=, and < defined.
+// Element 'NoKey' should be a key that marks an uninitialized key and
+// is otherwise unused. 'Find()' returns an STL const_iterator to the match
+// found, otherwise it equals 'End()'.
+template <class Key, Key NoKey>
+class CompactSet {
+public:
+ typedef typename set<Key>::const_iterator const_iterator;
+
+ CompactSet()
+ : min_key_(NoKey),
+ max_key_(NoKey) { }
+
+ CompactSet(const CompactSet<Key, NoKey> &compact_set)
+ : set_(compact_set.set_),
+ min_key_(compact_set.min_key_),
+ max_key_(compact_set.max_key_) { }
+
+ void Insert(Key key) {
+ set_.insert(key);
+ if (min_key_ == NoKey || key < min_key_)
+ min_key_ = key;
+ if (max_key_ == NoKey || max_key_ < key)
+ max_key_ = key;
+ }
+
+ void Clear() {
+ set_.clear();
+ min_key_ = max_key_ = NoKey;
+ }
+
+ const_iterator Find(Key key) const {
+ if (min_key_ == NoKey ||
+ key < min_key_ || max_key_ < key)
+ return set_.end();
+ else
+ return set_.find(key);
+ }
+
+ const_iterator Begin() const { return set_.begin(); }
+
+ const_iterator End() const { return set_.end(); }
+
+private:
+ set<Key> set_;
+ Key min_key_;
+ Key max_key_;
+
+ void operator=(const CompactSet<Key, NoKey> &); //disallow
+};
+
+} // namespace fst
+
+#endif // FST_LIB_UTIL_H__
diff --git a/src/include/fst/vector-fst.h b/src/include/fst/vector-fst.h
new file mode 100644
index 0000000..f6d8a6d
--- /dev/null
+++ b/src/include/fst/vector-fst.h
@@ -0,0 +1,727 @@
+// vector-fst.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Simple concrete, mutable FST whose states and arcs are stored in STL
+// vectors.
+
+#ifndef FST_LIB_VECTOR_FST_H__
+#define FST_LIB_VECTOR_FST_H__
+
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/mutable-fst.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+template <class A> class VectorFst;
+template <class F, class G> void Cast(const F &, G *);
+
+
+// States and arcs implemented by STL vectors, templated on the
+// State definition. This does not manage the Fst properties.
+template <class State>
+class VectorFstBaseImpl : public FstImpl<typename State::Arc> {
+ public:
+ typedef typename State::Arc Arc;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ VectorFstBaseImpl() : start_(kNoStateId) {}
+
+ ~VectorFstBaseImpl() {
+ for (StateId s = 0; s < states_.size(); ++s)
+ delete states_[s];
+ }
+
+ StateId Start() const { return start_; }
+
+ Weight Final(StateId s) const { return states_[s]->final; }
+
+ StateId NumStates() const { return states_.size(); }
+
+ size_t NumArcs(StateId s) const { return states_[s]->arcs.size(); }
+
+ void SetStart(StateId s) { start_ = s; }
+
+ void SetFinal(StateId s, Weight w) { states_[s]->final = w; }
+
+ StateId AddState() {
+ states_.push_back(new State);
+ return states_.size() - 1;
+ }
+
+ StateId AddState(State *state) {
+ states_.push_back(state);
+ return states_.size() - 1;
+ }
+
+ void AddArc(StateId s, const Arc &arc) {
+ states_[s]->arcs.push_back(arc);
+ }
+
+ void DeleteStates(const vector<StateId>& dstates) {
+ vector<StateId> newid(states_.size(), 0);
+ for (size_t i = 0; i < dstates.size(); ++i)
+ newid[dstates[i]] = kNoStateId;
+ StateId nstates = 0;
+ for (StateId s = 0; s < states_.size(); ++s) {
+ if (newid[s] != kNoStateId) {
+ newid[s] = nstates;
+ if (s != nstates)
+ states_[nstates] = states_[s];
+ ++nstates;
+ } else {
+ delete states_[s];
+ }
+ }
+ states_.resize(nstates);
+ for (StateId s = 0; s < states_.size(); ++s) {
+ vector<Arc> &arcs = states_[s]->arcs;
+ size_t narcs = 0;
+ for (size_t i = 0; i < arcs.size(); ++i) {
+ StateId t = newid[arcs[i].nextstate];
+ if (t != kNoStateId) {
+ arcs[i].nextstate = t;
+ if (i != narcs)
+ arcs[narcs] = arcs[i];
+ ++narcs;
+ } else {
+ if (arcs[i].ilabel == 0)
+ --states_[s]->niepsilons;
+ if (arcs[i].olabel == 0)
+ --states_[s]->noepsilons;
+ }
+ }
+ arcs.resize(narcs);
+ }
+ if (Start() != kNoStateId)
+ SetStart(newid[Start()]);
+ }
+
+ void DeleteStates() {
+ for (StateId s = 0; s < states_.size(); ++s)
+ delete states_[s];
+ states_.clear();
+ SetStart(kNoStateId);
+ }
+
+ void DeleteArcs(StateId s, size_t n) {
+ states_[s]->arcs.resize(states_[s]->arcs.size() - n);
+ }
+
+ void DeleteArcs(StateId s) { states_[s]->arcs.clear(); }
+
+ State *GetState(StateId s) { return states_[s]; }
+
+ const State *GetState(StateId s) const { return states_[s]; }
+
+ void SetState(StateId s, State *state) { states_[s] = state; }
+
+ void ReserveStates(StateId n) { states_.reserve(n); }
+
+ void ReserveArcs(StateId s, size_t n) { states_[s]->arcs.reserve(n); }
+
+ // Provide information needed for generic state iterator
+ void InitStateIterator(StateIteratorData<Arc> *data) const {
+ data->base = 0;
+ data->nstates = states_.size();
+ }
+
+ // Provide information needed for generic arc iterator
+ void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
+ data->base = 0;
+ data->narcs = states_[s]->arcs.size();
+ data->arcs = data->narcs > 0 ? &states_[s]->arcs[0] : 0;
+ data->ref_count = 0;
+ }
+
+ private:
+ vector<State *> states_; // States represenation.
+ StateId start_; // initial state
+
+ DISALLOW_COPY_AND_ASSIGN(VectorFstBaseImpl);
+};
+
+// Arcs implemented by an STL vector per state.
+template <class A>
+struct VectorState {
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ VectorState() : final(Weight::Zero()), niepsilons(0), noepsilons(0) {}
+
+ Weight final; // Final weight
+ vector<A> arcs; // Arcs represenation
+ size_t niepsilons; // # of input epsilons
+ size_t noepsilons; // # of output epsilons
+};
+
+// This is a VectorFstBaseImpl container that holds VectorState's. It
+// manages Fst properties and the # of input and output epsilons.
+template <class A>
+class VectorFstImpl : public VectorFstBaseImpl< VectorState<A> > {
+ public:
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::Properties;
+
+ using VectorFstBaseImpl<VectorState<A> >::Start;
+ using VectorFstBaseImpl<VectorState<A> >::NumStates;
+ using VectorFstBaseImpl<VectorState<A> >::GetState;
+ using VectorFstBaseImpl<VectorState<A> >::ReserveArcs;
+
+ friend class MutableArcIterator< VectorFst<A> >;
+
+ typedef VectorFstBaseImpl< VectorState<A> > BaseImpl;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ VectorFstImpl() {
+ SetType("vector");
+ SetProperties(kNullProperties | kStaticProperties);
+ }
+ explicit VectorFstImpl(const Fst<A> &fst);
+
+ static VectorFstImpl<A> *Read(istream &strm, const FstReadOptions &opts);
+
+ size_t NumInputEpsilons(StateId s) const { return GetState(s)->niepsilons; }
+
+ size_t NumOutputEpsilons(StateId s) const { return GetState(s)->noepsilons; }
+
+ void SetStart(StateId s) {
+ BaseImpl::SetStart(s);
+ SetProperties(SetStartProperties(Properties()));
+ }
+
+ void SetFinal(StateId s, Weight w) {
+ Weight ow = BaseImpl::Final(s);
+ BaseImpl::SetFinal(s, w);
+ SetProperties(SetFinalProperties(Properties(), ow, w));
+ }
+
+ StateId AddState() {
+ StateId s = BaseImpl::AddState();
+ SetProperties(AddStateProperties(Properties()));
+ return s;
+ }
+
+ void AddArc(StateId s, const A &arc) {
+ VectorState<A> *state = GetState(s);
+ if (arc.ilabel == 0) {
+ ++state->niepsilons;
+ }
+ if (arc.olabel == 0) {
+ ++state->noepsilons;
+ }
+
+ const A *parc = state->arcs.empty() ? 0 : &(state->arcs.back());
+ SetProperties(AddArcProperties(Properties(), s, arc, parc));
+
+ BaseImpl::AddArc(s, arc);
+ }
+
+ void DeleteStates(const vector<StateId> &dstates) {
+ BaseImpl::DeleteStates(dstates);
+ SetProperties(DeleteStatesProperties(Properties()));
+ }
+
+ void DeleteStates() {
+ BaseImpl::DeleteStates();
+ SetProperties(DeleteAllStatesProperties(Properties(),
+ kStaticProperties));
+ }
+
+ void DeleteArcs(StateId s, size_t n) {
+ const vector<A> &arcs = GetState(s)->arcs;
+ for (size_t i = 0; i < n; ++i) {
+ size_t j = arcs.size() - i - 1;
+ if (arcs[j].ilabel == 0)
+ --GetState(s)->niepsilons;
+ if (arcs[j].olabel == 0)
+ --GetState(s)->noepsilons;
+ }
+ BaseImpl::DeleteArcs(s, n);
+ SetProperties(DeleteArcsProperties(Properties()));
+ }
+
+ void DeleteArcs(StateId s) {
+ GetState(s)->niepsilons = 0;
+ GetState(s)->noepsilons = 0;
+ BaseImpl::DeleteArcs(s);
+ SetProperties(DeleteArcsProperties(Properties()));
+ }
+
+ private:
+ // Properties always true of this Fst class
+ static const uint64 kStaticProperties = kExpanded | kMutable;
+ // Current file format version
+ static const int kFileVersion = 2;
+ // Minimum file format version supported
+ static const int kMinFileVersion = 1;
+
+ DISALLOW_COPY_AND_ASSIGN(VectorFstImpl);
+};
+
+template <class A> const uint64 VectorFstImpl<A>::kStaticProperties;
+template <class A> const int VectorFstImpl<A>::kFileVersion;
+template <class A> const int VectorFstImpl<A>::kMinFileVersion;
+
+
+template <class A>
+VectorFstImpl<A>::VectorFstImpl(const Fst<A> &fst) {
+ SetType("vector");
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ BaseImpl::SetStart(fst.Start());
+ if (fst.Properties(kExpanded, false))
+ BaseImpl::ReserveStates(CountStates(fst));
+
+ for (StateIterator< Fst<A> > siter(fst);
+ !siter.Done();
+ siter.Next()) {
+ StateId s = siter.Value();
+ BaseImpl::AddState();
+ BaseImpl::SetFinal(s, fst.Final(s));
+ ReserveArcs(s, fst.NumArcs(s));
+ for (ArcIterator< Fst<A> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const A &arc = aiter.Value();
+ BaseImpl::AddArc(s, arc);
+ if (arc.ilabel == 0)
+ ++GetState(s)->niepsilons;
+ if (arc.olabel == 0)
+ ++GetState(s)->noepsilons;
+ }
+ }
+ SetProperties(fst.Properties(kCopyProperties, false) | kStaticProperties);
+}
+
+template <class A>
+VectorFstImpl<A> *VectorFstImpl<A>::Read(istream &strm,
+ const FstReadOptions &opts) {
+ VectorFstImpl<A> *impl = new VectorFstImpl;
+ FstHeader hdr;
+ if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
+ delete impl;
+ return 0;
+ }
+ impl->BaseImpl::SetStart(hdr.Start());
+ if (hdr.NumStates() != kNoStateId) {
+ impl->ReserveStates(hdr.NumStates());
+ }
+
+ StateId s = 0;
+ for (;hdr.NumStates() == kNoStateId || s < hdr.NumStates(); ++s) {
+ typename A::Weight final;
+ if (!final.Read(strm)) break;
+ impl->BaseImpl::AddState();
+ VectorState<A> *state = impl->GetState(s);
+ state->final = final;
+ int64 narcs;
+ ReadType(strm, &narcs);
+ if (!strm) {
+ LOG(ERROR) << "VectorFst::Read: read failed: " << opts.source;
+ delete impl;
+ return 0;
+ }
+ impl->ReserveArcs(s, narcs);
+ for (size_t j = 0; j < narcs; ++j) {
+ A arc;
+ ReadType(strm, &arc.ilabel);
+ ReadType(strm, &arc.olabel);
+ arc.weight.Read(strm);
+ ReadType(strm, &arc.nextstate);
+ if (!strm) {
+ LOG(ERROR) << "VectorFst::Read: read failed: " << opts.source;
+ delete impl;
+ return 0;
+ }
+ impl->BaseImpl::AddArc(s, arc);
+ if (arc.ilabel == 0)
+ ++state->niepsilons;
+ if (arc.olabel == 0)
+ ++state->noepsilons;
+ }
+ }
+ if (hdr.NumStates() != kNoStateId && s != hdr.NumStates()) {
+ LOG(ERROR) << "VectorFst::Read: unexpected end of file: " << opts.source;
+ delete impl;
+ return 0;
+ }
+ return impl;
+}
+
+// Converts a string into a weight.
+template <class W> class WeightFromString {
+ public:
+ W operator()(const string &s);
+};
+
+// Generic case fails.
+template <class W> inline
+W WeightFromString<W>::operator()(const string &s) {
+ FSTERROR() << "VectorFst::Read: Obsolete file format";
+ return W::NoWeight();
+}
+
+// TropicalWeight version.
+template <> inline
+TropicalWeight WeightFromString<TropicalWeight>::operator()(const string &s) {
+ float f;
+ memcpy(&f, s.data(), sizeof(f));
+ return TropicalWeight(f);
+}
+
+// LogWeight version.
+template <> inline
+LogWeight WeightFromString<LogWeight>::operator()(const string &s) {
+ float f;
+ memcpy(&f, s.data(), sizeof(f));
+ return LogWeight(f);
+}
+
+// Simple concrete, mutable FST. This class attaches interface to
+// implementation and handles reference counting, delegating most
+// methods to ImplToMutableFst. Supports additional operations:
+// ReserveStates and ReserveArcs (cf. STL vectors).
+template <class A>
+class VectorFst : public ImplToMutableFst< VectorFstImpl<A> > {
+ public:
+ friend class StateIterator< VectorFst<A> >;
+ friend class ArcIterator< VectorFst<A> >;
+ friend class MutableArcIterator< VectorFst<A> >;
+ template <class F, class G> friend void Cast(const F &, G *);
+
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef VectorFstImpl<A> Impl;
+
+ VectorFst() : ImplToMutableFst<Impl>(new Impl) {}
+
+ explicit VectorFst(const Fst<A> &fst)
+ : ImplToMutableFst<Impl>(new Impl(fst)) {}
+
+ VectorFst(const VectorFst<A> &fst) : ImplToMutableFst<Impl>(fst) {}
+
+ // Get a copy of this VectorFst. See Fst<>::Copy() for further doc.
+ virtual VectorFst<A> *Copy(bool safe = false) const {
+ return new VectorFst<A>(*this);
+ }
+
+ VectorFst<A> &operator=(const VectorFst<A> &fst) {
+ SetImpl(fst.GetImpl(), false);
+ return *this;
+ }
+
+ virtual VectorFst<A> &operator=(const Fst<A> &fst) {
+ if (this != &fst) SetImpl(new Impl(fst));
+ return *this;
+ }
+
+ // Read a VectorFst from an input stream; return NULL on error
+ static VectorFst<A> *Read(istream &strm, const FstReadOptions &opts) {
+ Impl* impl = Impl::Read(strm, opts);
+ return impl ? new VectorFst<A>(impl) : 0;
+ }
+
+ // Read a VectorFst from a file; return NULL on error
+ // Empty filename reads from standard input
+ static VectorFst<A> *Read(const string &filename) {
+ Impl* impl = ImplToExpandedFst<Impl, MutableFst<A> >::Read(filename);
+ return impl ? new VectorFst<A>(impl) : 0;
+ }
+
+ virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ return WriteFst(*this, strm, opts);
+ }
+
+ virtual bool Write(const string &filename) const {
+ return Fst<A>::WriteFile(filename);
+ }
+
+ template <class F>
+ static bool WriteFst(const F &fst, ostream &strm,
+ const FstWriteOptions &opts);
+
+ void ReserveStates(StateId n) {
+ MutateCheck();
+ GetImpl()->ReserveStates(n);
+ }
+
+ void ReserveArcs(StateId s, size_t n) {
+ MutateCheck();
+ GetImpl()->ReserveArcs(s, n);
+ }
+
+ virtual void InitStateIterator(StateIteratorData<Arc> *data) const {
+ GetImpl()->InitStateIterator(data);
+ }
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ virtual inline
+ void InitMutableArcIterator(StateId s, MutableArcIteratorData<A> *);
+
+ private:
+ explicit VectorFst(Impl *impl) : ImplToMutableFst<Impl>(impl) {}
+
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst< Impl, MutableFst<A> >::GetImpl(); }
+
+ void SetImpl(Impl *impl, bool own_impl = true) {
+ ImplToFst< Impl, MutableFst<A> >::SetImpl(impl, own_impl);
+ }
+
+ void MutateCheck() { return ImplToMutableFst<Impl>::MutateCheck(); }
+};
+
+// Specialization for VectorFst; see generic version in fst.h
+// for sample usage (but use the VectorFst type!). This version
+// should inline.
+template <class A>
+class StateIterator< VectorFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ explicit StateIterator(const VectorFst<A> &fst)
+ : nstates_(fst.GetImpl()->NumStates()), s_(0) {}
+
+ bool Done() const { return s_ >= nstates_; }
+
+ StateId Value() const { return s_; }
+
+ void Next() { ++s_; }
+
+ void Reset() { s_ = 0; }
+
+ private:
+ StateId nstates_;
+ StateId s_;
+
+ DISALLOW_COPY_AND_ASSIGN(StateIterator);
+};
+
+// Writes Fst to file, will call CountStates so may involve two passes if
+// called from an Fst that is not derived from Expanded.
+template <class A>
+template <class F>
+bool VectorFst<A>::WriteFst(const F &fst, ostream &strm,
+ const FstWriteOptions &opts) {
+ static const int kFileVersion = 2;
+ bool update_header = true;
+ FstHeader hdr;
+ hdr.SetStart(fst.Start());
+ hdr.SetNumStates(kNoStateId);
+ size_t start_offset = 0;
+ if (fst.Properties(kExpanded, false) || (start_offset = strm.tellp()) != -1) {
+ hdr.SetNumStates(CountStates(fst));
+ update_header = false;
+ }
+ FstImpl<A>::WriteFstHeader(fst, strm, opts, kFileVersion, "vector", &hdr);
+ StateId num_states = 0;
+ for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
+ typename A::StateId s = siter.Value();
+ fst.Final(s).Write(strm);
+ int64 narcs = fst.NumArcs(s);
+ WriteType(strm, narcs);
+ for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
+ const A &arc = aiter.Value();
+ WriteType(strm, arc.ilabel);
+ WriteType(strm, arc.olabel);
+ arc.weight.Write(strm);
+ WriteType(strm, arc.nextstate);
+ }
+ num_states++;
+ }
+ strm.flush();
+ if (!strm) {
+ LOG(ERROR) << "VectorFst::Write: write failed: " << opts.source;
+ return false;
+ }
+ if (update_header) {
+ hdr.SetNumStates(num_states);
+ return FstImpl<A>::UpdateFstHeader(fst, strm, opts, kFileVersion, "vector",
+ &hdr, start_offset);
+ } else {
+ if (num_states != hdr.NumStates()) {
+ LOG(ERROR) << "Inconsistent number of states observed during write";
+ return false;
+ }
+ }
+ return true;
+}
+
+// Specialization for VectorFst; see generic version in fst.h
+// for sample usage (but use the VectorFst type!). This version
+// should inline.
+template <class A>
+class ArcIterator< VectorFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const VectorFst<A> &fst, StateId s)
+ : arcs_(fst.GetImpl()->GetState(s)->arcs), i_(0) {}
+
+ bool Done() const { return i_ >= arcs_.size(); }
+
+ const A& Value() const { return arcs_[i_]; }
+
+ void Next() { ++i_; }
+
+ void Reset() { i_ = 0; }
+
+ void Seek(size_t a) { i_ = a; }
+
+ size_t Position() const { return i_; }
+
+ uint32 Flags() const {
+ return kArcValueFlags;
+ }
+
+ void SetFlags(uint32 f, uint32 m) {}
+
+ private:
+ const vector<A>& arcs_;
+ size_t i_;
+
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+// Specialization for VectorFst; see generic version in fst.h
+// for sample usage (but use the VectorFst type!). This version
+// should inline.
+template <class A>
+class MutableArcIterator< VectorFst<A> >
+ : public MutableArcIteratorBase<A> {
+ public:
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ MutableArcIterator(VectorFst<A> *fst, StateId s) : i_(0) {
+ fst->MutateCheck();
+ state_ = fst->GetImpl()->GetState(s);
+ properties_ = &fst->GetImpl()->properties_;
+ }
+
+ bool Done() const { return i_ >= state_->arcs.size(); }
+
+ const A& Value() const { return state_->arcs[i_]; }
+
+ void Next() { ++i_; }
+
+ size_t Position() const { return i_; }
+
+ void Reset() { i_ = 0; }
+
+ void Seek(size_t a) { i_ = a; }
+
+ void SetValue(const A &arc) {
+ A& oarc = state_->arcs[i_];
+ if (oarc.ilabel != oarc.olabel)
+ *properties_ &= ~kNotAcceptor;
+ if (oarc.ilabel == 0) {
+ --state_->niepsilons;
+ *properties_ &= ~kIEpsilons;
+ if (oarc.olabel == 0)
+ *properties_ &= ~kEpsilons;
+ }
+ if (oarc.olabel == 0) {
+ --state_->noepsilons;
+ *properties_ &= ~kOEpsilons;
+ }
+ if (oarc.weight != Weight::Zero() && oarc.weight != Weight::One())
+ *properties_ &= ~kWeighted;
+ oarc = arc;
+ if (arc.ilabel != arc.olabel) {
+ *properties_ |= kNotAcceptor;
+ *properties_ &= ~kAcceptor;
+ }
+ if (arc.ilabel == 0) {
+ ++state_->niepsilons;
+ *properties_ |= kIEpsilons;
+ *properties_ &= ~kNoIEpsilons;
+ if (arc.olabel == 0) {
+ *properties_ |= kEpsilons;
+ *properties_ &= ~kNoEpsilons;
+ }
+ }
+ if (arc.olabel == 0) {
+ ++state_->noepsilons;
+ *properties_ |= kOEpsilons;
+ *properties_ &= ~kNoOEpsilons;
+ }
+ if (arc.weight != Weight::Zero() && arc.weight != Weight::One()) {
+ *properties_ |= kWeighted;
+ *properties_ &= ~kUnweighted;
+ }
+ *properties_ &= kSetArcProperties | kAcceptor | kNotAcceptor |
+ kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons |
+ kOEpsilons | kNoOEpsilons | kWeighted | kUnweighted;
+ }
+
+ uint32 Flags() const {
+ return kArcValueFlags;
+ }
+
+ void SetFlags(uint32 f, uint32 m) {}
+
+
+ private:
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual bool Done_() const { return Done(); }
+ virtual const A& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+ virtual size_t Position_() const { return Position(); }
+ virtual void Reset_() { Reset(); }
+ virtual void Seek_(size_t a) { Seek(a); }
+ virtual void SetValue_(const A &a) { SetValue(a); }
+ uint32 Flags_() const { return Flags(); }
+ void SetFlags_(uint32 f, uint32 m) { SetFlags(f, m); }
+
+ struct VectorState<A> *state_;
+ uint64 *properties_;
+ size_t i_;
+
+ DISALLOW_COPY_AND_ASSIGN(MutableArcIterator);
+};
+
+// Provide information needed for the generic mutable arc iterator
+template <class A> inline
+void VectorFst<A>::InitMutableArcIterator(
+ StateId s, MutableArcIteratorData<A> *data) {
+ data->base = new MutableArcIterator< VectorFst<A> >(this, s);
+}
+
+// A useful alias when using StdArc.
+typedef VectorFst<StdArc> StdVectorFst;
+
+} // namespace fst
+
+#endif // FST_LIB_VECTOR_FST_H__
diff --git a/src/include/fst/verify.h b/src/include/fst/verify.h
new file mode 100644
index 0000000..576cfca
--- /dev/null
+++ b/src/include/fst/verify.h
@@ -0,0 +1,126 @@
+// verify.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Function to verify an Fst's contents
+
+#ifndef FST_LIB_VERIFY_H__
+#define FST_LIB_VERIFY_H__
+
+#include <fst/fst.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+// Verifies that an Fst's contents are sane.
+template<class Arc>
+bool Verify(const Fst<Arc> &fst, bool allow_negative_labels = false) {
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ StateId start = fst.Start();
+ const SymbolTable *isyms = fst.InputSymbols();
+ const SymbolTable *osyms = fst.OutputSymbols();
+
+ // Count states
+ StateId ns = 0;
+ for (StateIterator< Fst<Arc> > siter(fst);
+ !siter.Done();
+ siter.Next())
+ ++ns;
+
+ if (start == kNoStateId && ns > 0) {
+ LOG(ERROR) << "Verify: Fst start state ID unset";
+ return false;
+ } else if (start >= ns) {
+ LOG(ERROR) << "Verify: Fst start state ID exceeds number of states";
+ return false;
+ }
+
+ for (StateIterator< Fst<Arc> > siter(fst);
+ !siter.Done();
+ siter.Next()) {
+ StateId s = siter.Value();
+ size_t na = 0;
+ for (ArcIterator< Fst<Arc> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const Arc &arc =aiter.Value();
+ if (!allow_negative_labels && arc.ilabel < 0) {
+ LOG(ERROR) << "Verify: Fst input label ID of arc at position "
+ << na << " of state " << s << " is negative";
+ return false;
+ } else if (isyms && isyms->Find(arc.ilabel) == "") {
+ LOG(ERROR) << "Verify: Fst input label ID " << arc.ilabel
+ << " of arc at position " << na << " of state " << s
+ << " is missing from input symbol table \""
+ << isyms->Name() << "\"";
+ return false;
+ } else if (!allow_negative_labels && arc.olabel < 0) {
+ LOG(ERROR) << "Verify: Fst output label ID of arc at position "
+ << na << " of state " << s << " is negative";
+ return false;
+ } else if (osyms && osyms->Find(arc.olabel) == "") {
+ LOG(ERROR) << "Verify: Fst output label ID " << arc.olabel
+ << " of arc at position " << na << " of state " << s
+ << " is missing from output symbol table \""
+ << osyms->Name() << "\"";
+ return false;
+ } else if (!arc.weight.Member() || arc.weight == Weight::Zero()) {
+ LOG(ERROR) << "Verify: Fst weight of arc at position "
+ << na << " of state " << s << " is invalid";
+ return false;
+ } else if (arc.nextstate < 0) {
+ LOG(ERROR) << "Verify: Fst destination state ID of arc at position "
+ << na << " of state " << s << " is negative";
+ return false;
+ } else if (arc.nextstate >= ns) {
+ LOG(ERROR) << "Verify: Fst destination state ID of arc at position "
+ << na << " of state " << s
+ << " exceeds number of states";
+ return false;
+ }
+ ++na;
+ }
+ if (!fst.Final(s).Member()) {
+ LOG(ERROR) << "Verify: Fst final weight of state " << s << " is invalid";
+ return false;
+ }
+ }
+ uint64 fst_props = fst.Properties(kFstProperties, false);
+ if (fst_props & kError) {
+ LOG(ERROR) << "Verify: Fst error property is set";
+ return false;
+ }
+
+ uint64 known_props;
+ uint64 test_props = ComputeProperties(fst, kFstProperties, &known_props,
+ false);
+ if (!CompatProperties(fst_props, test_props)) {
+ LOG(ERROR) << "Verify: stored Fst properties incorrect "
+ << "(props1 = stored props, props2 = tested)";
+ return false;
+ } else {
+ return true;
+ }
+}
+
+} // namespace fst
+
+#endif // FST_LIB_VERIFY_H__
diff --git a/src/include/fst/visit.h b/src/include/fst/visit.h
new file mode 100644
index 0000000..31a00a8
--- /dev/null
+++ b/src/include/fst/visit.h
@@ -0,0 +1,270 @@
+// visit.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Queue-dependent visitation of finite-state transducers. See also
+// dfs-visit.h.
+
+#ifndef FST_LIB_VISIT_H__
+#define FST_LIB_VISIT_H__
+
+
+#include <fst/arcfilter.h>
+#include <fst/mutable-fst.h>
+
+
+namespace fst {
+
+// Visitor Interface - class determines actions taken during a visit.
+// If any of the boolean member functions return false, the visit is
+// aborted by first calling FinishState() on all unfinished (grey)
+// states and then calling FinishVisit().
+//
+// Note this is more general than the visitor interface in
+// dfs-visit.h but lacks some DFS-specific behavior.
+//
+// template <class Arc>
+// class Visitor {
+// public:
+// typedef typename Arc::StateId StateId;
+//
+// Visitor(T *return_data);
+// // Invoked before visit
+// void InitVisit(const Fst<Arc> &fst);
+// // Invoked when state discovered (2nd arg is visitation root)
+// bool InitState(StateId s, StateId root);
+// // Invoked when arc to white/undiscovered state examined
+// bool WhiteArc(StateId s, const Arc &a);
+// // Invoked when arc to grey/unfinished state examined
+// bool GreyArc(StateId s, const Arc &a);
+// // Invoked when arc to black/finished state examined
+// bool BlackArc(StateId s, const Arc &a);
+// // Invoked when state finished.
+// void FinishState(StateId s);
+// // Invoked after visit
+// void FinishVisit();
+// };
+
+// Performs queue-dependent visitation. Visitor class argument
+// determines actions and contains any return data. ArcFilter
+// determines arcs that are considered.
+//
+// Note this is more general than DfsVisit() in dfs-visit.h but lacks
+// some DFS-specific Visitor behavior.
+template <class Arc, class V, class Q, class ArcFilter>
+void Visit(const Fst<Arc> &fst, V *visitor, Q *queue, ArcFilter filter) {
+
+ typedef typename Arc::StateId StateId;
+ typedef ArcIterator< Fst<Arc> > AIterator;
+
+ visitor->InitVisit(fst);
+
+ StateId start = fst.Start();
+ if (start == kNoStateId) {
+ visitor->FinishVisit();
+ return;
+ }
+
+ // An Fst state's visit color
+ const unsigned kWhiteState = 0x01; // Undiscovered
+ const unsigned kGreyState = 0x02; // Discovered & unfinished
+ const unsigned kBlackState = 0x04; // Finished
+
+ // We destroy an iterator as soon as possible and mark it so
+ const unsigned kArcIterDone = 0x08; // Arc iterator done and destroyed
+
+ vector<unsigned char> state_status;
+ vector<AIterator *> arc_iterator;
+
+ StateId nstates = start + 1; // # of known states in general case
+ bool expanded = false;
+ if (fst.Properties(kExpanded, false)) { // tests if expanded case, then
+ nstates = CountStates(fst); // uses ExpandedFst::NumStates().
+ expanded = true;
+ }
+
+ state_status.resize(nstates, kWhiteState);
+ arc_iterator.resize(nstates);
+ StateIterator< Fst<Arc> > siter(fst);
+
+ // Continues visit while true
+ bool visit = true;
+
+ // Iterates over trees in visit forest.
+ for (StateId root = start; visit && root < nstates;) {
+ visit = visitor->InitState(root, root);
+ state_status[root] = kGreyState;
+ queue->Enqueue(root);
+ while (!queue->Empty()) {
+ StateId s = queue->Head();
+ if (s >= state_status.size()) {
+ nstates = s + 1;
+ state_status.resize(nstates, kWhiteState);
+ arc_iterator.resize(nstates);
+ }
+ // Creates arc iterator if needed.
+ if (arc_iterator[s] == 0 && !(state_status[s] & kArcIterDone) && visit)
+ arc_iterator[s] = new AIterator(fst, s);
+ // Deletes arc iterator if done.
+ AIterator *aiter = arc_iterator[s];
+ if ((aiter && aiter->Done()) || !visit) {
+ delete aiter;
+ arc_iterator[s] = 0;
+ state_status[s] |= kArcIterDone;
+ }
+ // Dequeues state and marks black if done
+ if (state_status[s] & kArcIterDone) {
+ queue->Dequeue();
+ visitor->FinishState(s);
+ state_status[s] = kBlackState;
+ continue;
+ }
+
+ const Arc &arc = aiter->Value();
+ if (arc.nextstate >= state_status.size()) {
+ nstates = arc.nextstate + 1;
+ state_status.resize(nstates, kWhiteState);
+ arc_iterator.resize(nstates);
+ }
+ // Visits respective arc types
+ if (filter(arc)) {
+ // Enqueues destination state and marks grey if white
+ if (state_status[arc.nextstate] == kWhiteState) {
+ visit = visitor->WhiteArc(s, arc);
+ if (!visit) continue;
+ visit = visitor->InitState(arc.nextstate, root);
+ state_status[arc.nextstate] = kGreyState;
+ queue->Enqueue(arc.nextstate);
+ } else if (state_status[arc.nextstate] == kBlackState) {
+ visit = visitor->BlackArc(s, arc);
+ } else {
+ visit = visitor->GreyArc(s, arc);
+ }
+ }
+ aiter->Next();
+ // Destroys an iterator ASAP for efficiency.
+ if (aiter->Done()) {
+ delete aiter;
+ arc_iterator[s] = 0;
+ state_status[s] |= kArcIterDone;
+ }
+ }
+ // Finds next tree root
+ for (root = root == start ? 0 : root + 1;
+ root < nstates && state_status[root] != kWhiteState;
+ ++root);
+
+ // Check for a state beyond the largest known state
+ if (!expanded && root == nstates) {
+ for (; !siter.Done(); siter.Next()) {
+ if (siter.Value() == nstates) {
+ ++nstates;
+ state_status.push_back(kWhiteState);
+ arc_iterator.push_back(0);
+ break;
+ }
+ }
+ }
+ }
+ visitor->FinishVisit();
+}
+
+
+template <class Arc, class V, class Q>
+inline void Visit(const Fst<Arc> &fst, V *visitor, Q* queue) {
+ Visit(fst, visitor, queue, AnyArcFilter<Arc>());
+}
+
+// Copies input FST to mutable FST following queue order.
+template <class A>
+class CopyVisitor {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+
+ CopyVisitor(MutableFst<Arc> *ofst) : ifst_(0), ofst_(ofst) {}
+
+ void InitVisit(const Fst<A> &ifst) {
+ ifst_ = &ifst;
+ ofst_->DeleteStates();
+ ofst_->SetStart(ifst_->Start());
+ }
+
+ bool InitState(StateId s, StateId) {
+ while (ofst_->NumStates() <= s)
+ ofst_->AddState();
+ return true;
+ }
+
+ bool WhiteArc(StateId s, const Arc &arc) {
+ ofst_->AddArc(s, arc);
+ return true;
+ }
+
+ bool GreyArc(StateId s, const Arc &arc) {
+ ofst_->AddArc(s, arc);
+ return true;
+ }
+
+ bool BlackArc(StateId s, const Arc &arc) {
+ ofst_->AddArc(s, arc);
+ return true;
+ }
+
+ void FinishState(StateId s) {
+ ofst_->SetFinal(s, ifst_->Final(s));
+ }
+
+ void FinishVisit() {}
+
+ private:
+ const Fst<Arc> *ifst_;
+ MutableFst<Arc> *ofst_;
+};
+
+
+// Visits input FST up to a state limit following queue order.
+template <class A>
+class PartialVisitor {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+
+ explicit PartialVisitor(StateId maxvisit) : maxvisit_(maxvisit) {}
+
+ void InitVisit(const Fst<A> &ifst) { nvisit_ = 0; }
+
+ bool InitState(StateId s, StateId) {
+ ++nvisit_;
+ return nvisit_ <= maxvisit_;
+ }
+
+ bool WhiteArc(StateId s, const Arc &arc) { return true; }
+ bool GreyArc(StateId s, const Arc &arc) { return true; }
+ bool BlackArc(StateId s, const Arc &arc) { return true; }
+ void FinishState(StateId s) {}
+ void FinishVisit() {}
+
+ private:
+ StateId maxvisit_;
+ StateId nvisit_;
+};
+
+
+} // namespace fst
+
+#endif // FST_LIB_VISIT_H__
diff --git a/src/include/fst/weight.h b/src/include/fst/weight.h
new file mode 100644
index 0000000..72f5a22
--- /dev/null
+++ b/src/include/fst/weight.h
@@ -0,0 +1,179 @@
+// weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// General weight set and associated semiring operation definitions.
+//
+// A semiring is specified by two binary operations Plus and Times and
+// two designated elements Zero and One with the following properties:
+// Plus: associative, commutative, and has Zero as its identity.
+// Times: associative and has identity One, distributes w.r.t. Plus, and
+// has Zero as an annihilator:
+// Times(Zero(), a) == Times(a, Zero()) = Zero().
+//
+// A left semiring distributes on the left; a right semiring is
+// similarly defined.
+//
+// A Weight class is required to be (at least) a left or right semiring.
+//
+// In addition, the following should be defined for a Weight:
+// Member: predicate on set membership.
+// NoWeight: returns an element that is not a member, should only be
+// used to signal an error.
+// >>: reads weight.
+// <<: prints weight.
+// Read(istream &strm): reads from an input stream.
+// Write(ostream &strm): writes to an output stream.
+// Hash: maps weight to size_t.
+// ApproxEqual: approximate equality (for inexact weights)
+// Quantize: quantizes wrt delta (for inexact weights)
+// Divide: for all a,b,c s.t. Times(a, b) == c
+// --> b' = Divide(c, a, DIVIDE_LEFT) if a left semiring, b'.Member()
+// and Times(a, b') == c
+// --> a' = Divide(c, b, DIVIDE_RIGHT) if a right semiring, a'.Member()
+// and Times(a', b) == c
+// --> b' = Divide(c, a)
+// = Divide(c, a, DIVIDE_ANY)
+// = Divide(c, a, DIVIDE_LEFT)
+// = Divide(c, a, DIVIDE_RIGHT) if a commutative semiring,
+// b'.Member() and Times(a, b') == Times(b', a) == c
+// ReverseWeight: the type of the corresponding reverse weight.
+// Typically the same type as Weight for a (both left and right) semiring.
+// For the left string semiring, it is the right string semiring.
+// Reverse: a mapping from Weight to ReverseWeight s.t.
+// --> Reverse(Reverse(a)) = a
+// --> Reverse(Plus(a, b)) = Plus(Reverse(a), Reverse(b))
+// --> Reverse(Times(a, b)) = Times(Reverse(b), Reverse(a))
+// Typically the identity mapping in a (both left and right) semiring.
+// In the left string semiring, it maps to the reverse string
+// in the right string semiring.
+// Properties: specifies additional properties that hold:
+// LeftSemiring: indicates weights form a left semiring.
+// RightSemiring: indicates weights form a right semiring.
+// Commutative: for all a,b: Times(a,b) == Times(b,a)
+// Idempotent: for all a: Plus(a, a) == a.
+// Path Property: for all a, b: Plus(a, b) == a or Plus(a, b) == b.
+
+
+#ifndef FST_LIB_WEIGHT_H__
+#define FST_LIB_WEIGHT_H__
+
+#include <cmath>
+#include <cctype>
+#include <iostream>
+#include <sstream>
+
+#include <fst/compat.h>
+
+#include <fst/util.h>
+
+
+namespace fst {
+
+//
+// CONSTANT DEFINITIONS
+//
+
+// A representable float near .001
+const float kDelta = 1.0F/1024.0F;
+
+// For all a,b,c: Times(c, Plus(a,b)) = Plus(Times(c,a), Times(c, b))
+const uint64 kLeftSemiring = 0x0000000000000001ULL;
+
+// For all a,b,c: Times(Plus(a,b), c) = Plus(Times(a,c), Times(b, c))
+const uint64 kRightSemiring = 0x0000000000000002ULL;
+
+const uint64 kSemiring = kLeftSemiring | kRightSemiring;
+
+// For all a,b: Times(a,b) = Times(b,a)
+const uint64 kCommutative = 0x0000000000000004ULL;
+
+// For all a: Plus(a, a) = a
+const uint64 kIdempotent = 0x0000000000000008ULL;
+
+// For all a,b: Plus(a,b) = a or Plus(a,b) = b
+const uint64 kPath = 0x0000000000000010ULL;
+
+
+// Determines direction of division.
+enum DivideType { DIVIDE_LEFT, // left division
+ DIVIDE_RIGHT, // right division
+ DIVIDE_ANY }; // division in a commutative semiring
+
+// NATURAL ORDER
+//
+// By definition:
+// a <= b iff a + b = a
+// The natural order is a negative partial order iff the semiring is
+// idempotent. It is trivially monotonic for plus. It is left
+// (resp. right) monotonic for times iff the semiring is left
+// (resp. right) distributive. It is a total order iff the semiring
+// has the path property. See Mohri, "Semiring Framework and
+// Algorithms for Shortest-Distance Problems", Journal of Automata,
+// Languages and Combinatorics 7(3):321-350, 2002. We define the
+// strict version of this order below.
+
+template <class W>
+class NaturalLess {
+ public:
+ typedef W Weight;
+
+ NaturalLess() {
+ if (!(W::Properties() & kIdempotent)) {
+ FSTERROR() << "NaturalLess: Weight type is not idempotent: "
+ << W::Type();
+ }
+ }
+
+ bool operator()(const W &w1, const W &w2) const {
+ return (Plus(w1, w2) == w1) && w1 != w2;
+ }
+};
+
+
+// Power is the iterated product for arbitrary semirings such that
+// Power(w, 0) is One() for the semiring, and
+// Power(w, n) = Times(Power(w, n-1), w)
+
+template <class W>
+W Power(W w, size_t n) {
+ W result = W::One();
+ for (size_t i = 0; i < n; ++i) {
+ result = Times(result, w);
+ }
+ return result;
+}
+
+// General weight converter - raises error.
+template <class W1, class W2>
+struct WeightConvert {
+ W2 operator()(W1 w1) const {
+ FSTERROR() << "WeightConvert: can't convert weight from \""
+ << W1::Type() << "\" to \"" << W2::Type();
+ return W2::NoWeight();
+ }
+};
+
+// Specialized weight converter to self.
+template <class W>
+struct WeightConvert<W, W> {
+ W operator()(W w) const { return w; }
+};
+
+} // namespace fst
+
+#endif // FST_LIB_WEIGHT_H__