diff options
author | Alexander Gutkin <agutkin@google.com> | 2012-09-12 18:11:43 +0100 |
---|---|---|
committer | Alexander Gutkin <agutkin@google.com> | 2012-09-12 18:11:43 +0100 |
commit | dfd8b8327b93660601d016cdc6f29f433b45a8d8 (patch) | |
tree | 968ec84b8e32ad73ec18d74334930f36b7471906 /src/include/fst/extensions/far | |
parent | f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2 (diff) | |
download | openfst-dfd8b8327b93660601d016cdc6f29f433b45a8d8.tar.gz |
Updated OpenFST version to openfst-1.3.2-CL32004048 from Greco3.
Change-Id: I19b0db718256b35c0e3e5a7315f1ed6335e6dcac
Diffstat (limited to 'src/include/fst/extensions/far')
-rw-r--r-- | src/include/fst/extensions/far/compile-strings.h | 61 | ||||
-rw-r--r-- | src/include/fst/extensions/far/equal.h | 99 | ||||
-rw-r--r-- | src/include/fst/extensions/far/extract.h | 2 | ||||
-rw-r--r-- | src/include/fst/extensions/far/far.h | 184 | ||||
-rw-r--r-- | src/include/fst/extensions/far/farscript.h | 51 | ||||
-rw-r--r-- | src/include/fst/extensions/far/info.h | 2 | ||||
-rw-r--r-- | src/include/fst/extensions/far/print-strings.h | 28 | ||||
-rw-r--r-- | src/include/fst/extensions/far/stlist.h | 9 | ||||
-rw-r--r-- | src/include/fst/extensions/far/sttable.h | 1 |
9 files changed, 395 insertions, 42 deletions
diff --git a/src/include/fst/extensions/far/compile-strings.h b/src/include/fst/extensions/far/compile-strings.h index d7f4d6b..ca247db 100644 --- a/src/include/fst/extensions/far/compile-strings.h +++ b/src/include/fst/extensions/far/compile-strings.h @@ -56,7 +56,7 @@ class StringReader { const SymbolTable *syms = 0, Label unknown_label = kNoStateId) : nline_(0), strm_(istrm), source_(source), entry_type_(entry_type), - token_type_(token_type), done_(false), + token_type_(token_type), symbols_(syms), done_(false), compiler_(token_type, syms, unknown_label, allow_negative_labels) { Next(); // Initialize the reader to the first input. } @@ -87,8 +87,12 @@ class StringReader { done_ = true; // whitespace at the end of a file. } - VectorFst<A> *GetVectorFst() { + VectorFst<A> *GetVectorFst(bool keep_symbols = false) { VectorFst<A> *fst = new VectorFst<A>; + if (keep_symbols) { + fst->SetInputSymbols(symbols_); + fst->SetOutputSymbols(symbols_); + } if (compiler_(content_, fst)) { return fst; } else { @@ -97,9 +101,16 @@ class StringReader { } } - CompactFst<A, StringCompactor<A> > *GetCompactFst() { - CompactFst<A, StringCompactor<A> > *fst = - new CompactFst<A, StringCompactor<A> >; + CompactFst<A, StringCompactor<A> > *GetCompactFst(bool keep_symbols = false) { + CompactFst<A, StringCompactor<A> > *fst; + if (keep_symbols) { + VectorFst<A> tmp; + tmp.SetInputSymbols(symbols_); + tmp.SetOutputSymbols(symbols_); + fst = new CompactFst<A, StringCompactor<A> >(tmp); + } else { + fst = new CompactFst<A, StringCompactor<A> >; + } if (compiler_(content_, fst)) { return fst; } else { @@ -114,6 +125,7 @@ class StringReader { string source_; EntryType entry_type_; TokenType token_type_; + const SymbolTable *symbols_; bool done_; StringCompiler<A> compiler_; string content_; // The actual content of the input stream's next FST. @@ -135,6 +147,8 @@ void FarCompileStrings(const vector<string> &in_fnames, FarTokenType tt, const string &symbols_fname, const string &unknown_symbol, + bool keep_symbols, + bool initial_symbols, bool allow_negative_labels, bool file_list_input, const string &key_prefix, @@ -175,8 +189,9 @@ void FarCompileStrings(const vector<string> &in_fnames, const SymbolTable *syms = 0; typename Arc::Label unknown_label = kNoLabel; if (!symbols_fname.empty()) { - syms = SymbolTable::ReadText(symbols_fname, - allow_negative_labels); + SymbolTableTextOptions opts; + opts.allow_negative = allow_negative_labels; + syms = SymbolTable::ReadText(symbols_fname, opts); if (!syms) { FSTERROR() << "FarCompileStrings: error reading symbol table: " << symbols_fname; @@ -199,32 +214,47 @@ void FarCompileStrings(const vector<string> &in_fnames, vector<string> inputs; if (file_list_input) { for (int i = 1; i < in_fnames.size(); ++i) { - ifstream istrm(in_fnames[i].c_str()); + istream *istrm = in_fnames.empty() ? &cin : + new ifstream(in_fnames[i].c_str()); string str; - while (getline(istrm, str)) + while (getline(*istrm, str)) inputs.push_back(str); + if (!in_fnames.empty()) + delete istrm; } } else { inputs = in_fnames; } for (int i = 0, n = 0; i < inputs.size(); ++i) { + if (generate_keys == 0 && inputs[i].empty()) { + FSTERROR() << "FarCompileStrings: read from a file instead of stdin or" + << " set the --generate_keys flags."; + delete far_writer; + delete syms; + return; + } int key_size = generate_keys ? generate_keys : (entry_type == StringReader<Arc>::FILE ? 1 : KeySize(inputs[i].c_str())); - ifstream istrm(inputs[i].c_str()); + istream *istrm = inputs[i].empty() ? &cin : + new ifstream(inputs[i].c_str()); + bool keep_syms = keep_symbols; for (StringReader<Arc> reader( - istrm, inputs[i], entry_type, token_type, - allow_negative_labels, syms, unknown_label); + *istrm, inputs[i].empty() ? "stdin" : inputs[i], + entry_type, token_type, allow_negative_labels, + syms, unknown_label); !reader.Done(); reader.Next()) { ++n; const Fst<Arc> *fst; if (compact) - fst = reader.GetCompactFst(); + fst = reader.GetCompactFst(keep_syms); else - fst = reader.GetVectorFst(); + fst = reader.GetVectorFst(keep_syms); + if (initial_symbols) + keep_syms = false; if (!fst) { FSTERROR() << "FarCompileStrings: compiling string number " << n << " in file " << inputs[i] << " failed with token_type = " @@ -236,6 +266,7 @@ void FarCompileStrings(const vector<string> &in_fnames, (fet == FET_FILE ? "file" : "unknown")); delete far_writer; delete syms; + if (!inputs[i].empty()) delete istrm; return; } ostringstream keybuf; @@ -260,6 +291,8 @@ void FarCompileStrings(const vector<string> &in_fnames, } if (generate_keys == 0) n = 0; + if (!inputs[i].empty()) + delete istrm; } delete far_writer; diff --git a/src/include/fst/extensions/far/equal.h b/src/include/fst/extensions/far/equal.h new file mode 100644 index 0000000..be82e2d --- /dev/null +++ b/src/include/fst/extensions/far/equal.h @@ -0,0 +1,99 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: allauzen@google.com (Cyril Allauzen) + +#ifndef FST_EXTENSIONS_FAR_EQUAL_H_ +#define FST_EXTENSIONS_FAR_EQUAL_H_ + +#include <string> + +#include <fst/extensions/far/far.h> +#include <fst/equal.h> + +namespace fst { + +template <class Arc> +bool FarEqual(const string &filename1, + const string &filename2, + float delta = kDelta, + const string &begin_key = string(), + const string &end_key = string()) { + + FarReader<Arc> *reader1 = FarReader<Arc>::Open(filename1); + FarReader<Arc> *reader2 = FarReader<Arc>::Open(filename2); + if (!reader1 || !reader2) { + delete reader1; + delete reader2; + VLOG(1) << "FarEqual: cannot open input Far file(s)"; + return false; + } + + if (!begin_key.empty()) { + bool find_begin1 = reader1->Find(begin_key); + bool find_begin2 = reader2->Find(begin_key); + if (!find_begin1 || !find_begin2) { + bool ret = !find_begin1 && !find_begin2; + if (!ret) { + VLOG(1) << "FarEqual: key \"" << begin_key << "\" missing from " + << (find_begin1 ? "second" : "first") << " archive."; + } + delete reader1; + delete reader2; + return ret; + } + } + + for(; !reader1->Done() && !reader2->Done(); + reader1->Next(), reader2->Next()) { + const string key1 = reader1->GetKey(); + const string key2 = reader2->GetKey(); + if (!end_key.empty() && end_key < key1 && end_key < key2) { + delete reader1; + delete reader2; + return true; + } + if (key1 != key2) { + VLOG(1) << "FarEqual: mismatched keys \"" + << key1 << "\" <> \"" << key2 << "\"."; + delete reader1; + delete reader2; + return false; + } + if (!Equal(reader1->GetFst(), reader2->GetFst(), delta)) { + VLOG(1) << "FarEqual: Fsts for key \"" << key1 << "\" are not equal."; + delete reader1; + delete reader2; + return false; + } + } + + if (!reader1->Done() || !reader2->Done()) { + VLOG(1) << "FarEqual: key \"" + << (reader1->Done() ? reader2->GetKey() : reader1->GetKey()) + << "\" missing form " << (reader2->Done() ? "first" : "second") + << " archive."; + delete reader1; + delete reader2; + return false; + } + + delete reader1; + delete reader2; + return true; +} + +} // namespace fst + +#endif // FST_EXTENSIONS_FAR_EQUAL_H_ diff --git a/src/include/fst/extensions/far/extract.h b/src/include/fst/extensions/far/extract.h index 022ca60..d6f92ff 100644 --- a/src/include/fst/extensions/far/extract.h +++ b/src/include/fst/extensions/far/extract.h @@ -70,7 +70,7 @@ void FarExtract(const vector<string> &ifilenames, if (nrep > 0) { ostringstream tmp; tmp << '.' << nrep; - key += tmp.str(); + key.append(tmp.str().data(), tmp.str().size()); } ofilename = key; } diff --git a/src/include/fst/extensions/far/far.h b/src/include/fst/extensions/far/far.h index 82b9e5c..acce76e 100644 --- a/src/include/fst/extensions/far/far.h +++ b/src/include/fst/extensions/far/far.h @@ -32,6 +32,13 @@ namespace fst { enum FarEntryType { FET_LINE, FET_FILE }; enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 }; +inline bool IsFst(const string &filename) { + ifstream strm(filename.c_str()); + if (!strm) + return false; + return IsFstHeader(strm, filename); +} + // FST archive header class class FarHeader { public: @@ -40,8 +47,11 @@ class FarHeader { bool Read(const string &filename) { FstHeader fsthdr; - if (filename.empty()) { // Header reading unsupported on stdin. - return false; + if (filename.empty()) { + // Header reading unsupported on stdin. Assumes STList and StdArc. + fartype_ = "stlist"; + arctype_ = "standard"; + return true; } else if (IsSTTable(filename)) { // Check if STTable ReadSTTableHeader(filename, &fsthdr); fartype_ = "sttable"; @@ -52,6 +62,12 @@ class FarHeader { fartype_ = "sttable"; arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType(); return true; + } else if (IsFst(filename)) { // Check if Fst + ifstream istrm(filename.c_str()); + fsthdr.Read(istrm, filename); + fartype_ = "fst"; + arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType(); + return true; } return false; } @@ -61,8 +77,12 @@ class FarHeader { string arctype_; }; -enum FarType { FAR_DEFAULT = 0, FAR_STTABLE = 1, FAR_STLIST = 2, - FAR_SSTABLE = 3 }; +enum FarType { + FAR_DEFAULT = 0, + FAR_STTABLE = 1, + FAR_STLIST = 2, + FAR_FST = 3, +}; // This class creates an archive of FSTs. template <class A> @@ -153,7 +173,7 @@ class STTableFarWriter : public FarWriter<A> { public: typedef A Arc; - static STTableFarWriter *Create(const string filename) { + static STTableFarWriter *Create(const string &filename) { STTableWriter<Fst<A>, FstWriter<A> > *writer = STTableWriter<Fst<A>, FstWriter<A> >::Create(filename); return new STTableFarWriter(writer); @@ -183,7 +203,7 @@ class STListFarWriter : public FarWriter<A> { public: typedef A Arc; - static STListFarWriter *Create(const string filename) { + static STListFarWriter *Create(const string &filename) { STListWriter<Fst<A>, FstWriter<A> > *writer = STListWriter<Fst<A>, FstWriter<A> >::Create(filename); return new STListFarWriter(writer); @@ -209,6 +229,43 @@ class STListFarWriter : public FarWriter<A> { template <class A> +class FstFarWriter : public FarWriter<A> { + public: + typedef A Arc; + + explicit FstFarWriter(const string &filename) + : filename_(filename), error_(false), written_(false) {} + + static FstFarWriter *Create(const string &filename) { + return new FstFarWriter(filename); + } + + void Add(const string &key, const Fst<A> &fst) { + if (written_) { + LOG(WARNING) << "FstFarWriter::Add: only one Fst supported," + << " subsequent entries discarded."; + } else { + error_ = !fst.Write(filename_); + written_ = true; + } + } + + FarType Type() const { return FAR_FST; } + + bool Error() const { return error_; } + + ~FstFarWriter() {} + + private: + string filename_; + bool error_; + bool written_; + + DISALLOW_COPY_AND_ASSIGN(FstFarWriter); +}; + + +template <class A> FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) { switch(type) { case FAR_DEFAULT: @@ -220,6 +277,9 @@ FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) { case FAR_STLIST: return STListFarWriter<A>::Create(filename); break; + case FAR_FST: + return FstFarWriter<A>::Create(filename); + break; default: LOG(ERROR) << "FarWriter::Create: unknown far type"; return 0; @@ -331,6 +391,114 @@ class STListFarReader : public FarReader<A> { DISALLOW_COPY_AND_ASSIGN(STListFarReader); }; +template <class A> +class FstFarReader : public FarReader<A> { + public: + typedef A Arc; + + static FstFarReader *Open(const string &filename) { + vector<string> filenames; + filenames.push_back(filename); + return new FstFarReader<A>(filenames); + } + + static FstFarReader *Open(const vector<string> &filenames) { + return new FstFarReader<A>(filenames); + } + + FstFarReader(const vector<string> &filenames) + : keys_(filenames), has_stdin_(false), pos_(0), fst_(0), error_(false) { + sort(keys_.begin(), keys_.end()); + streams_.resize(keys_.size(), 0); + for (size_t i = 0; i < keys_.size(); ++i) { + if (keys_[i].empty()) { + if (!has_stdin_) { + streams_[i] = &cin; + //sources_[i] = "stdin"; + has_stdin_ = true; + } else { + FSTERROR() << "FstFarReader::FstFarReader: stdin should only " + << "appear once in the input file list."; + error_ = true; + return; + } + } else { + streams_[i] = new ifstream( + keys_[i].c_str(), ifstream::in | ifstream::binary); + } + } + if (pos_ >= keys_.size()) return; + ReadFst(); + } + + void Reset() { + if (has_stdin_) { + FSTERROR() << "FstFarReader::Reset: operation not supported on stdin"; + error_ = true; + return; + } + pos_ = 0; + ReadFst(); + } + + bool Find(const string &key) { + if (has_stdin_) { + FSTERROR() << "FstFarReader::Find: operation not supported on stdin"; + error_ = true; + return false; + } + pos_ = 0;//TODO + ReadFst(); + return true; + } + + bool Done() const { return error_ || pos_ >= keys_.size(); } + + void Next() { + ++pos_; + ReadFst(); + } + + const string &GetKey() const { + return keys_[pos_]; + } + + const Fst<A> &GetFst() const { + return *fst_; + } + + FarType Type() const { return FAR_FST; } + + bool Error() const { return error_; } + + ~FstFarReader() { + if (fst_) delete fst_; + for (size_t i = 0; i < keys_.size(); ++i) + delete streams_[i]; + } + + private: + void ReadFst() { + if (fst_) delete fst_; + if (pos_ >= keys_.size()) return; + streams_[pos_]->seekg(0); + fst_ = Fst<A>::Read(*streams_[pos_], FstReadOptions()); + if (!fst_) { + FSTERROR() << "FstFarReader: error reading Fst from: " << keys_[pos_]; + error_ = true; + } + } + + private: + vector<string> keys_; + vector<istream*> streams_; + bool has_stdin_; + size_t pos_; + mutable Fst<A> *fst_; + mutable bool error_; + + DISALLOW_COPY_AND_ASSIGN(FstFarReader); +}; template <class A> FarReader<A> *FarReader<A>::Open(const string &filename) { @@ -340,6 +508,8 @@ FarReader<A> *FarReader<A>::Open(const string &filename) { return STTableFarReader<A>::Open(filename); else if (IsSTList(filename)) return STListFarReader<A>::Open(filename); + else if (IsFst(filename)) + return FstFarReader<A>::Open(filename); return 0; } @@ -352,6 +522,8 @@ FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) { return STTableFarReader<A>::Open(filenames); else if (!filenames.empty() && IsSTList(filenames[0])) return STListFarReader<A>::Open(filenames); + else if (!filenames.empty() && IsFst(filenames[0])) + return FstFarReader<A>::Open(filenames); return 0; } diff --git a/src/include/fst/extensions/far/farscript.h b/src/include/fst/extensions/far/farscript.h index 9c3b1ca..3a9c145 100644 --- a/src/include/fst/extensions/far/farscript.h +++ b/src/include/fst/extensions/far/farscript.h @@ -27,6 +27,7 @@ using std::vector; #include <fst/script/arg-packs.h> #include <fst/extensions/far/compile-strings.h> #include <fst/extensions/far/create.h> +#include <fst/extensions/far/equal.h> #include <fst/extensions/far/extract.h> #include <fst/extensions/far/info.h> #include <fst/extensions/far/print-strings.h> @@ -51,6 +52,8 @@ struct FarCompileStringsArgs { const FarTokenType tt; const string &symbols_fname; const string &unknown_symbol; + const bool keep_symbols; + const bool initial_symbols; const bool allow_negative_labels; const bool file_list_input; const string &key_prefix; @@ -65,6 +68,8 @@ struct FarCompileStringsArgs { FarTokenType tt, const string &symbols_fname, const string &unknown_symbol, + bool keep_symbols, + bool initial_symbols, bool allow_negative_labels, bool file_list_input, const string &key_prefix, @@ -72,6 +77,7 @@ struct FarCompileStringsArgs { in_fnames(in_fnames), out_fname(out_fname), fst_type(fst_type), far_type(far_type), generate_keys(generate_keys), fet(fet), tt(tt), symbols_fname(symbols_fname), unknown_symbol(unknown_symbol), + keep_symbols(keep_symbols), initial_symbols(initial_symbols), allow_negative_labels(allow_negative_labels), file_list_input(file_list_input), key_prefix(key_prefix), key_suffix(key_suffix) { } @@ -82,7 +88,8 @@ void FarCompileStrings(FarCompileStringsArgs *args) { fst::FarCompileStrings<Arc>( args->in_fnames, args->out_fname, args->fst_type, args->far_type, args->generate_keys, args->fet, args->tt, args->symbols_fname, - args->unknown_symbol, args->allow_negative_labels, args->file_list_input, + args->unknown_symbol, args->keep_symbols, args->initial_symbols, + args->allow_negative_labels, args->file_list_input, args->key_prefix, args->key_suffix); } @@ -97,6 +104,8 @@ void FarCompileStrings( FarTokenType tt, const string &symbols_fname, const string &unknown_symbol, + bool keep_symbols, + bool initial_symbols, bool allow_negative_labels, bool file_list_input, const string &key_prefix, @@ -143,6 +152,25 @@ void FarCreate(const vector<string> &in_fnames, const string &key_suffix); +typedef args::Package<const string &, const string &, float, + const string &, const string &> FarEqualInnerArgs; +typedef args::WithReturnValue<bool, FarEqualInnerArgs> FarEqualArgs; + +template <class Arc> +void FarEqual(FarEqualArgs *args) { + args->retval = fst::FarEqual<Arc>( + args->args.arg1, args->args.arg2, args->args.arg3, + args->args.arg4, args->args.arg5); +} + +bool FarEqual(const string &filename1, + const string &filename2, + const string &arc_type, + float delta = kDelta, + const string &begin_key = string(), + const string &end_key = string()); + + typedef args::Package<const vector<string> &, int32, const string&, const string&, const string&, const string&> FarExtractArgs; @@ -180,7 +208,9 @@ struct FarPrintStringsArgs { const string &begin_key; const string &end_key; const bool print_key; + const bool print_weight; const string &symbols_fname; + const bool initial_symbols; const int32 generate_filenames; const string &filename_prefix; const string &filename_suffix; @@ -188,12 +218,14 @@ struct FarPrintStringsArgs { FarPrintStringsArgs( const vector<string> &ifilenames, const FarEntryType entry_type, const FarTokenType token_type, const string &begin_key, - const string &end_key, const bool print_key, - const string &symbols_fname, const int32 generate_filenames, + const string &end_key, const bool print_key, const bool print_weight, + const string &symbols_fname, const bool initial_symbols, + const int32 generate_filenames, const string &filename_prefix, const string &filename_suffix) : ifilenames(ifilenames), entry_type(entry_type), token_type(token_type), - begin_key(begin_key), end_key(end_key), print_key(print_key), - symbols_fname(symbols_fname), + begin_key(begin_key), end_key(end_key), + print_key(print_key), print_weight(print_weight), + symbols_fname(symbols_fname), initial_symbols(initial_symbols), generate_filenames(generate_filenames), filename_prefix(filename_prefix), filename_suffix(filename_suffix) { } }; @@ -202,9 +234,9 @@ template <class Arc> void FarPrintStrings(FarPrintStringsArgs *args) { fst::FarPrintStrings<Arc>( args->ifilenames, args->entry_type, args->token_type, - args->begin_key, args->end_key, args->print_key, - args->symbols_fname, args->generate_filenames, args->filename_prefix, - args->filename_suffix); + args->begin_key, args->end_key, args->print_key, args->print_weight, + args->symbols_fname, args->initial_symbols, args->generate_filenames, + args->filename_prefix, args->filename_suffix); } @@ -215,7 +247,9 @@ void FarPrintStrings(const vector<string> &ifilenames, const string &begin_key, const string &end_key, const bool print_key, + const bool print_weight, const string &symbols_fname, + const bool initial_symbols, const int32 generate_filenames, const string &filename_prefix, const string &filename_suffix); @@ -227,6 +261,7 @@ void FarPrintStrings(const vector<string> &ifilenames, #define REGISTER_FST_FAR_OPERATIONS(ArcType) \ REGISTER_FST_OPERATION(FarCompileStrings, ArcType, FarCompileStringsArgs); \ REGISTER_FST_OPERATION(FarCreate, ArcType, FarCreateArgs); \ + REGISTER_FST_OPERATION(FarEqual, ArcType, FarEqualArgs); \ REGISTER_FST_OPERATION(FarExtract, ArcType, FarExtractArgs); \ REGISTER_FST_OPERATION(FarInfo, ArcType, FarInfoArgs); \ REGISTER_FST_OPERATION(FarPrintStrings, ArcType, FarPrintStringsArgs) diff --git a/src/include/fst/extensions/far/info.h b/src/include/fst/extensions/far/info.h index f010546..100fe68 100644 --- a/src/include/fst/extensions/far/info.h +++ b/src/include/fst/extensions/far/info.h @@ -34,7 +34,7 @@ void CountStatesAndArcs(const Fst<Arc> &fst, size_t *nstate, size_t *narc) { StateIterator<Fst<Arc> > siter(fst); for (; !siter.Done(); siter.Next(), ++(*nstate)) { ArcIterator<Fst<Arc> > aiter(fst, siter.Value()); - for (; !aiter.Done(); aiter.Next(), ++(*narc)); + for (; !aiter.Done(); aiter.Next(), ++(*narc)) {} } } diff --git a/src/include/fst/extensions/far/print-strings.h b/src/include/fst/extensions/far/print-strings.h index aff1e51..dcc7351 100644 --- a/src/include/fst/extensions/far/print-strings.h +++ b/src/include/fst/extensions/far/print-strings.h @@ -27,17 +27,21 @@ using std::vector; #include <fst/extensions/far/far.h> +#include <fst/shortest-distance.h> #include <fst/string.h> +DECLARE_string(far_field_separator); + namespace fst { template <class Arc> void FarPrintStrings( const vector<string> &ifilenames, const FarEntryType entry_type, const FarTokenType far_token_type, const string &begin_key, - const string &end_key, const bool print_key, const string &symbols_fname, - const int32 generate_filenames, const string &filename_prefix, - const string &filename_suffix) { + const string &end_key, const bool print_key, const bool print_weight, + const string &symbols_fname, const bool initial_symbols, + const int32 generate_filenames, + const string &filename_prefix, const string &filename_suffix) { typename StringPrinter<Arc>::TokenType token_type; if (far_token_type == FTT_SYMBOL) { @@ -54,7 +58,9 @@ void FarPrintStrings( const SymbolTable *syms = 0; if (!symbols_fname.empty()) { // allow negative flag? - syms = SymbolTable::ReadText(symbols_fname, true); + SymbolTableTextOptions opts; + opts.allow_negative = true; + syms = SymbolTable::ReadText(symbols_fname, opts); if (!syms) { FSTERROR() << "FarPrintStrings: error reading symbol table: " << symbols_fname; @@ -62,8 +68,6 @@ void FarPrintStrings( } } - StringPrinter<Arc> string_printer(token_type, syms); - FarReader<Arc> *far_reader = FarReader<Arc>::Open(ifilenames); if (!far_reader) return; @@ -83,14 +87,21 @@ void FarPrintStrings( okey = key; const Fst<Arc> &fst = far_reader->GetFst(); + if (i == 1 && initial_symbols && syms == 0 && fst.InputSymbols() != 0) + syms = fst.InputSymbols()->Copy(); string str; VLOG(2) << "Handling key: " << key; + StringPrinter<Arc> string_printer( + token_type, syms ? syms : fst.InputSymbols()); string_printer(fst, &str); if (entry_type == FET_LINE) { if (print_key) - cout << key << "\t"; - cout << str << endl; + cout << key << FLAGS_far_field_separator[0]; + cout << str; + if (print_weight) + cout << FLAGS_far_field_separator[0] << ShortestDistance(fst); + cout << endl; } else if (entry_type == FET_FILE) { stringstream sstrm; if (generate_filenames) { @@ -117,6 +128,7 @@ void FarPrintStrings( ostrm << "\n"; } } + delete syms; } diff --git a/src/include/fst/extensions/far/stlist.h b/src/include/fst/extensions/far/stlist.h index 4738181..1cdc80c 100644 --- a/src/include/fst/extensions/far/stlist.h +++ b/src/include/fst/extensions/far/stlist.h @@ -26,6 +26,7 @@ #include <iostream> #include <fstream> +#include <sstream> #include <fst/util.h> #include <algorithm> @@ -58,7 +59,7 @@ class STListWriter { explicit STListWriter(const string filename) : stream_( - filename.empty() ? &std::cout : + filename.empty() ? &cout : new ofstream(filename.c_str(), ofstream::out | ofstream::binary)), error_(false) { WriteType(*stream_, kSTListMagicNumber); @@ -92,7 +93,7 @@ class STListWriter { ~STListWriter() { WriteType(*stream_, string()); - if (stream_ != &std::cout) + if (stream_ != &cout) delete stream_; } @@ -127,7 +128,7 @@ class STListReader { for (size_t i = 0; i < filenames.size(); ++i) { if (filenames[i].empty()) { if (!has_stdin) { - streams_[i] = &std::cin; + streams_[i] = &cin; sources_[i] = "stdin"; has_stdin = true; } else { @@ -177,7 +178,7 @@ class STListReader { ~STListReader() { for (size_t i = 0; i < streams_.size(); ++i) { - if (streams_[i] != &std::cin) + if (streams_[i] != &cin) delete streams_[i]; } if (entry_) diff --git a/src/include/fst/extensions/far/sttable.h b/src/include/fst/extensions/far/sttable.h index 3a03133..3ce0a4b 100644 --- a/src/include/fst/extensions/far/sttable.h +++ b/src/include/fst/extensions/far/sttable.h @@ -29,6 +29,7 @@ #include <algorithm> #include <iostream> #include <fstream> +#include <sstream> #include <fst/util.h> namespace fst { |