// const-fst.h // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // // \file // Simple concrete immutable FST whose states and arcs are each stored // in single arrays. #ifndef FST_LIB_CONST_FST_H__ #define FST_LIB_CONST_FST_H__ #include "fst/lib/expanded-fst.h" #include "fst/lib/test-properties.h" namespace fst { template class ConstFst; // States and arcs each implemented by single arrays, templated on the // Arc definition. template class ConstFstImpl : public FstImpl { public: using FstImpl::SetType; using FstImpl::SetProperties; using FstImpl::Properties; using FstImpl::WriteHeaderAndSymbols; typedef typename A::Weight Weight; typedef typename A::StateId StateId; ConstFstImpl() : states_(0), arcs_(0), nstates_(0), narcs_(0), start_(kNoStateId) { SetType("const"); SetProperties(kNullProperties | kStaticProperties); } explicit ConstFstImpl(const Fst &fst); ~ConstFstImpl() { delete[] states_; delete[] arcs_; } StateId Start() const { return start_; } Weight Final(StateId s) const { return states_[s].final; } StateId NumStates() const { return nstates_; } size_t NumArcs(StateId s) const { return states_[s].narcs; } size_t NumInputEpsilons(StateId s) const { return states_[s].niepsilons; } size_t NumOutputEpsilons(StateId s) const { return states_[s].noepsilons; } static ConstFstImpl *Read(istream &strm, const FstReadOptions &opts); bool Write(ostream &strm, const FstWriteOptions &opts) const; A *Arcs(StateId s) { return arcs_ + states_[s].pos; } // Provide information needed for generic state iterator void InitStateIterator(StateIteratorData *data) const { data->base = 0; data->nstates = nstates_; } // Provide information needed for the generic arc iterator void InitArcIterator(StateId s, ArcIteratorData *data) const { data->base = 0; data->arcs = arcs_ + states_[s].pos; data->narcs = states_[s].narcs; data->ref_count = 0; } private: // States implemented by array *states_ below, arcs by (single) *arcs_. struct State { Weight final; // Final weight uint32 pos; // Start of state's arcs in *arcs_ uint32 narcs; // Number of arcs (per state) uint32 niepsilons; // # of input epsilons uint32 noepsilons; // # of output epsilons State() : final(Weight::Zero()), niepsilons(0), noepsilons(0) {} }; // Properties always true of this Fst class static const uint64 kStaticProperties = kExpanded; // Current file format version static const int kFileVersion = 1; // Minimum file format version supported static const int kMinFileVersion = 1; // Byte alignment for states and arcs in file format static const int kFileAlign = 16; State *states_; // States represenation A *arcs_; // Arcs representation StateId nstates_; // Number of states size_t narcs_; // Number of arcs (per FST) StateId start_; // Initial state DISALLOW_EVIL_CONSTRUCTORS(ConstFstImpl); }; template ConstFstImpl::ConstFstImpl(const Fst &fst) : nstates_(0), narcs_(0) { SetType("const"); uint64 copy_properties = fst.Properties(kCopyProperties, true); SetProperties(copy_properties | kStaticProperties); this->SetInputSymbols(fst.InputSymbols()); this->SetOutputSymbols(fst.OutputSymbols()); start_ = fst.Start(); // count # of states and arcs for (StateIterator< Fst > siter(fst); !siter.Done(); siter.Next()) { ++nstates_; StateId s = siter.Value(); for (ArcIterator< Fst > aiter(fst, s); !aiter.Done(); aiter.Next()) ++narcs_; } states_ = new State[nstates_]; arcs_ = new A[narcs_]; size_t pos = 0; for (StateId s = 0; s < nstates_; ++s) { states_[s].final = fst.Final(s); states_[s].pos = pos; states_[s].narcs = 0; states_[s].niepsilons = 0; states_[s].noepsilons = 0; for (ArcIterator< Fst > aiter(fst, s); !aiter.Done(); aiter.Next()) { const A &arc = aiter.Value(); ++states_[s].narcs; if (arc.ilabel == 0) ++states_[s].niepsilons; if (arc.olabel == 0) ++states_[s].noepsilons; arcs_[pos++] = arc; } } } template ConstFstImpl *ConstFstImpl::Read(istream &strm, const FstReadOptions &opts) { ConstFstImpl *impl = new ConstFstImpl; FstHeader hdr; if (!impl->ReadHeaderAndSymbols(strm, opts, kMinFileVersion, &hdr)) return 0; impl->start_ = hdr.Start(); impl->nstates_ = hdr.NumStates(); impl->narcs_ = hdr.NumArcs(); impl->states_ = new State[impl->nstates_]; impl->arcs_ = new A[impl->narcs_]; char c; for (int i = 0; i < kFileAlign && strm.tellg() % kFileAlign; ++i) strm.read(&c, 1); // TODO: memory map this size_t b = impl->nstates_ * sizeof(typename ConstFstImpl::State); strm.read(reinterpret_cast(impl->states_), b); if (!strm) { LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source; return 0; } // TODO: memory map this b = impl->narcs_ * sizeof(A); for (int i = 0; i < kFileAlign && strm.tellg() % kFileAlign; ++i) strm.read(&c, 1); strm.read(reinterpret_cast(impl->arcs_), b); if (!strm) { LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source; return 0; } return impl; } template bool ConstFstImpl::Write(ostream &strm, const FstWriteOptions &opts) const { FstHeader hdr; hdr.SetStart(start_); hdr.SetNumStates(nstates_); hdr.SetNumArcs(narcs_); WriteHeaderAndSymbols(strm, opts, kFileVersion, &hdr); if (!strm) return false; for (int i = 0; i < kFileAlign && strm.tellp() % kFileAlign; ++i) strm.write("", 1); strm.write(reinterpret_cast(states_), nstates_ * sizeof(State)); for (int i = 0; i < kFileAlign && strm.tellp() % kFileAlign; ++i) strm.write("", 1); strm.write(reinterpret_cast(arcs_), narcs_ * sizeof(A)); strm.flush(); if (!strm) { LOG(ERROR) << "ConstFst::Write: Write failed: " << opts.source; return false; } return true; } // Simple concrete immutable FST. This class attaches interface to // implementation and handles reference counting. template class ConstFst : public ExpandedFst { public: friend class StateIterator< ConstFst >; friend class ArcIterator< ConstFst >; typedef A Arc; typedef typename A::Weight Weight; typedef typename A::StateId StateId; ConstFst() : impl_(new ConstFstImpl()) {} ConstFst(const ConstFst &fst) : impl_(fst.impl_) { impl_->IncrRefCount(); } explicit ConstFst(const Fst &fst) : impl_(new ConstFstImpl(fst)) {} virtual ~ConstFst() { if (!impl_->DecrRefCount()) delete impl_; } virtual StateId Start() const { return impl_->Start(); } virtual Weight Final(StateId s) const { return impl_->Final(s); } StateId NumStates() const { return impl_->NumStates(); } size_t NumArcs(StateId s) const { return impl_->NumArcs(s); } size_t NumInputEpsilons(StateId s) const { return impl_->NumInputEpsilons(s); } size_t NumOutputEpsilons(StateId s) const { return impl_->NumOutputEpsilons(s); } virtual uint64 Properties(uint64 mask, bool test) const { if (test) { uint64 known, test = TestProperties(*this, mask, &known); impl_->SetProperties(test, known); return test & mask; } else { return impl_->Properties(mask); } } virtual const string& Type() const { return impl_->Type(); } // Get a copy of this ConstFst virtual ConstFst *Copy() const { impl_->IncrRefCount(); return new ConstFst(impl_); } // Read a ConstFst from an input stream; return NULL on error static ConstFst *Read(istream &strm, const FstReadOptions &opts) { ConstFstImpl* impl = ConstFstImpl::Read(strm, opts); return impl ? new ConstFst(impl) : 0; } // Read a ConstFst from a file; returno NULL on error static ConstFst *Read(const string &filename) { ifstream strm(filename.c_str()); if (!strm) { LOG(ERROR) << "ConstFst::Write: Can't open file: " << filename; return 0; } return Read(strm, FstReadOptions(filename)); } // Write a ConstFst to an output stream; return false on error virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { return impl_->Write(strm, opts); } // Write a ConstFst to a file; return false on error virtual bool Write(const string &filename) const { if (!filename.empty()) { ofstream strm(filename.c_str()); if (!strm) { LOG(ERROR) << "ConstrFst::Write: Can't open file: " << filename; return false; } return Write(strm, FstWriteOptions(filename)); } else { return Write(std::cout, FstWriteOptions("standard output")); } } virtual const SymbolTable* InputSymbols() const { return impl_->InputSymbols(); } virtual const SymbolTable* OutputSymbols() const { return impl_->OutputSymbols(); } virtual void InitStateIterator(StateIteratorData *data) const { impl_->InitStateIterator(data); } virtual void InitArcIterator(StateId s, ArcIteratorData *data) const { impl_->InitArcIterator(s, data); } private: ConstFst(ConstFstImpl *impl) : impl_(impl) {} ConstFstImpl *impl_; // FST's impl void operator=(const ConstFst &fst); // disallow }; // Specialization for ConstFst; see generic version in fst.h // for sample usage (but use the ConstFst type!). This version // should inline. template class StateIterator< ConstFst > { public: typedef typename A::StateId StateId; explicit StateIterator(const ConstFst &fst) : nstates_(fst.impl_->NumStates()), s_(0) {} bool Done() const { return s_ >= nstates_; } StateId Value() const { return s_; } void Next() { ++s_; } void Reset() { s_ = 0; } private: StateId nstates_; StateId s_; DISALLOW_EVIL_CONSTRUCTORS(StateIterator); }; // Specialization for ConstFst; see generic version in fst.h // for sample usage (but use the ConstFst type!). This version // should inline. template class ArcIterator< ConstFst > { public: typedef typename A::StateId StateId; ArcIterator(const ConstFst &fst, StateId s) : arcs_(fst.impl_->Arcs(s)), narcs_(fst.impl_->NumArcs(s)), i_(0) {} bool Done() const { return i_ >= narcs_; } const A& Value() const { return arcs_[i_]; } void Next() { ++i_; } void Reset() { i_ = 0; } void Seek(size_t a) { i_ = a; } private: const A *arcs_; size_t narcs_; size_t i_; DISALLOW_EVIL_CONSTRUCTORS(ArcIterator); }; // A useful alias when using StdArc. typedef ConstFst StdConstFst; } // namespace fst; #endif // FST_LIB_CONST_FST_H__