aboutsummaryrefslogtreecommitdiff
path: root/src/include/fst/extensions/far
diff options
context:
space:
mode:
authorAlexander Gutkin <agutkin@google.com>2012-09-12 18:11:43 +0100
committerAlexander Gutkin <agutkin@google.com>2012-09-12 18:11:43 +0100
commitdfd8b8327b93660601d016cdc6f29f433b45a8d8 (patch)
tree968ec84b8e32ad73ec18d74334930f36b7471906 /src/include/fst/extensions/far
parentf4c12fce1ee58e670f9c3fce46c40296ba9ee8a2 (diff)
downloadopenfst-dfd8b8327b93660601d016cdc6f29f433b45a8d8.tar.gz
Updated OpenFST version to openfst-1.3.2-CL32004048 from Greco3.
Change-Id: I19b0db718256b35c0e3e5a7315f1ed6335e6dcac
Diffstat (limited to 'src/include/fst/extensions/far')
-rw-r--r--src/include/fst/extensions/far/compile-strings.h61
-rw-r--r--src/include/fst/extensions/far/equal.h99
-rw-r--r--src/include/fst/extensions/far/extract.h2
-rw-r--r--src/include/fst/extensions/far/far.h184
-rw-r--r--src/include/fst/extensions/far/farscript.h51
-rw-r--r--src/include/fst/extensions/far/info.h2
-rw-r--r--src/include/fst/extensions/far/print-strings.h28
-rw-r--r--src/include/fst/extensions/far/stlist.h9
-rw-r--r--src/include/fst/extensions/far/sttable.h1
9 files changed, 395 insertions, 42 deletions
diff --git a/src/include/fst/extensions/far/compile-strings.h b/src/include/fst/extensions/far/compile-strings.h
index d7f4d6b..ca247db 100644
--- a/src/include/fst/extensions/far/compile-strings.h
+++ b/src/include/fst/extensions/far/compile-strings.h
@@ -56,7 +56,7 @@ class StringReader {
const SymbolTable *syms = 0,
Label unknown_label = kNoStateId)
: nline_(0), strm_(istrm), source_(source), entry_type_(entry_type),
- token_type_(token_type), done_(false),
+ token_type_(token_type), symbols_(syms), done_(false),
compiler_(token_type, syms, unknown_label, allow_negative_labels) {
Next(); // Initialize the reader to the first input.
}
@@ -87,8 +87,12 @@ class StringReader {
done_ = true; // whitespace at the end of a file.
}
- VectorFst<A> *GetVectorFst() {
+ VectorFst<A> *GetVectorFst(bool keep_symbols = false) {
VectorFst<A> *fst = new VectorFst<A>;
+ if (keep_symbols) {
+ fst->SetInputSymbols(symbols_);
+ fst->SetOutputSymbols(symbols_);
+ }
if (compiler_(content_, fst)) {
return fst;
} else {
@@ -97,9 +101,16 @@ class StringReader {
}
}
- CompactFst<A, StringCompactor<A> > *GetCompactFst() {
- CompactFst<A, StringCompactor<A> > *fst =
- new CompactFst<A, StringCompactor<A> >;
+ CompactFst<A, StringCompactor<A> > *GetCompactFst(bool keep_symbols = false) {
+ CompactFst<A, StringCompactor<A> > *fst;
+ if (keep_symbols) {
+ VectorFst<A> tmp;
+ tmp.SetInputSymbols(symbols_);
+ tmp.SetOutputSymbols(symbols_);
+ fst = new CompactFst<A, StringCompactor<A> >(tmp);
+ } else {
+ fst = new CompactFst<A, StringCompactor<A> >;
+ }
if (compiler_(content_, fst)) {
return fst;
} else {
@@ -114,6 +125,7 @@ class StringReader {
string source_;
EntryType entry_type_;
TokenType token_type_;
+ const SymbolTable *symbols_;
bool done_;
StringCompiler<A> compiler_;
string content_; // The actual content of the input stream's next FST.
@@ -135,6 +147,8 @@ void FarCompileStrings(const vector<string> &in_fnames,
FarTokenType tt,
const string &symbols_fname,
const string &unknown_symbol,
+ bool keep_symbols,
+ bool initial_symbols,
bool allow_negative_labels,
bool file_list_input,
const string &key_prefix,
@@ -175,8 +189,9 @@ void FarCompileStrings(const vector<string> &in_fnames,
const SymbolTable *syms = 0;
typename Arc::Label unknown_label = kNoLabel;
if (!symbols_fname.empty()) {
- syms = SymbolTable::ReadText(symbols_fname,
- allow_negative_labels);
+ SymbolTableTextOptions opts;
+ opts.allow_negative = allow_negative_labels;
+ syms = SymbolTable::ReadText(symbols_fname, opts);
if (!syms) {
FSTERROR() << "FarCompileStrings: error reading symbol table: "
<< symbols_fname;
@@ -199,32 +214,47 @@ void FarCompileStrings(const vector<string> &in_fnames,
vector<string> inputs;
if (file_list_input) {
for (int i = 1; i < in_fnames.size(); ++i) {
- ifstream istrm(in_fnames[i].c_str());
+ istream *istrm = in_fnames.empty() ? &cin :
+ new ifstream(in_fnames[i].c_str());
string str;
- while (getline(istrm, str))
+ while (getline(*istrm, str))
inputs.push_back(str);
+ if (!in_fnames.empty())
+ delete istrm;
}
} else {
inputs = in_fnames;
}
for (int i = 0, n = 0; i < inputs.size(); ++i) {
+ if (generate_keys == 0 && inputs[i].empty()) {
+ FSTERROR() << "FarCompileStrings: read from a file instead of stdin or"
+ << " set the --generate_keys flags.";
+ delete far_writer;
+ delete syms;
+ return;
+ }
int key_size = generate_keys ? generate_keys :
(entry_type == StringReader<Arc>::FILE ? 1 :
KeySize(inputs[i].c_str()));
- ifstream istrm(inputs[i].c_str());
+ istream *istrm = inputs[i].empty() ? &cin :
+ new ifstream(inputs[i].c_str());
+ bool keep_syms = keep_symbols;
for (StringReader<Arc> reader(
- istrm, inputs[i], entry_type, token_type,
- allow_negative_labels, syms, unknown_label);
+ *istrm, inputs[i].empty() ? "stdin" : inputs[i],
+ entry_type, token_type, allow_negative_labels,
+ syms, unknown_label);
!reader.Done();
reader.Next()) {
++n;
const Fst<Arc> *fst;
if (compact)
- fst = reader.GetCompactFst();
+ fst = reader.GetCompactFst(keep_syms);
else
- fst = reader.GetVectorFst();
+ fst = reader.GetVectorFst(keep_syms);
+ if (initial_symbols)
+ keep_syms = false;
if (!fst) {
FSTERROR() << "FarCompileStrings: compiling string number " << n
<< " in file " << inputs[i] << " failed with token_type = "
@@ -236,6 +266,7 @@ void FarCompileStrings(const vector<string> &in_fnames,
(fet == FET_FILE ? "file" : "unknown"));
delete far_writer;
delete syms;
+ if (!inputs[i].empty()) delete istrm;
return;
}
ostringstream keybuf;
@@ -260,6 +291,8 @@ void FarCompileStrings(const vector<string> &in_fnames,
}
if (generate_keys == 0)
n = 0;
+ if (!inputs[i].empty())
+ delete istrm;
}
delete far_writer;
diff --git a/src/include/fst/extensions/far/equal.h b/src/include/fst/extensions/far/equal.h
new file mode 100644
index 0000000..be82e2d
--- /dev/null
+++ b/src/include/fst/extensions/far/equal.h
@@ -0,0 +1,99 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#ifndef FST_EXTENSIONS_FAR_EQUAL_H_
+#define FST_EXTENSIONS_FAR_EQUAL_H_
+
+#include <string>
+
+#include <fst/extensions/far/far.h>
+#include <fst/equal.h>
+
+namespace fst {
+
+template <class Arc>
+bool FarEqual(const string &filename1,
+ const string &filename2,
+ float delta = kDelta,
+ const string &begin_key = string(),
+ const string &end_key = string()) {
+
+ FarReader<Arc> *reader1 = FarReader<Arc>::Open(filename1);
+ FarReader<Arc> *reader2 = FarReader<Arc>::Open(filename2);
+ if (!reader1 || !reader2) {
+ delete reader1;
+ delete reader2;
+ VLOG(1) << "FarEqual: cannot open input Far file(s)";
+ return false;
+ }
+
+ if (!begin_key.empty()) {
+ bool find_begin1 = reader1->Find(begin_key);
+ bool find_begin2 = reader2->Find(begin_key);
+ if (!find_begin1 || !find_begin2) {
+ bool ret = !find_begin1 && !find_begin2;
+ if (!ret) {
+ VLOG(1) << "FarEqual: key \"" << begin_key << "\" missing from "
+ << (find_begin1 ? "second" : "first") << " archive.";
+ }
+ delete reader1;
+ delete reader2;
+ return ret;
+ }
+ }
+
+ for(; !reader1->Done() && !reader2->Done();
+ reader1->Next(), reader2->Next()) {
+ const string key1 = reader1->GetKey();
+ const string key2 = reader2->GetKey();
+ if (!end_key.empty() && end_key < key1 && end_key < key2) {
+ delete reader1;
+ delete reader2;
+ return true;
+ }
+ if (key1 != key2) {
+ VLOG(1) << "FarEqual: mismatched keys \""
+ << key1 << "\" <> \"" << key2 << "\".";
+ delete reader1;
+ delete reader2;
+ return false;
+ }
+ if (!Equal(reader1->GetFst(), reader2->GetFst(), delta)) {
+ VLOG(1) << "FarEqual: Fsts for key \"" << key1 << "\" are not equal.";
+ delete reader1;
+ delete reader2;
+ return false;
+ }
+ }
+
+ if (!reader1->Done() || !reader2->Done()) {
+ VLOG(1) << "FarEqual: key \""
+ << (reader1->Done() ? reader2->GetKey() : reader1->GetKey())
+ << "\" missing form " << (reader2->Done() ? "first" : "second")
+ << " archive.";
+ delete reader1;
+ delete reader2;
+ return false;
+ }
+
+ delete reader1;
+ delete reader2;
+ return true;
+}
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_FAR_EQUAL_H_
diff --git a/src/include/fst/extensions/far/extract.h b/src/include/fst/extensions/far/extract.h
index 022ca60..d6f92ff 100644
--- a/src/include/fst/extensions/far/extract.h
+++ b/src/include/fst/extensions/far/extract.h
@@ -70,7 +70,7 @@ void FarExtract(const vector<string> &ifilenames,
if (nrep > 0) {
ostringstream tmp;
tmp << '.' << nrep;
- key += tmp.str();
+ key.append(tmp.str().data(), tmp.str().size());
}
ofilename = key;
}
diff --git a/src/include/fst/extensions/far/far.h b/src/include/fst/extensions/far/far.h
index 82b9e5c..acce76e 100644
--- a/src/include/fst/extensions/far/far.h
+++ b/src/include/fst/extensions/far/far.h
@@ -32,6 +32,13 @@ namespace fst {
enum FarEntryType { FET_LINE, FET_FILE };
enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 };
+inline bool IsFst(const string &filename) {
+ ifstream strm(filename.c_str());
+ if (!strm)
+ return false;
+ return IsFstHeader(strm, filename);
+}
+
// FST archive header class
class FarHeader {
public:
@@ -40,8 +47,11 @@ class FarHeader {
bool Read(const string &filename) {
FstHeader fsthdr;
- if (filename.empty()) { // Header reading unsupported on stdin.
- return false;
+ if (filename.empty()) {
+ // Header reading unsupported on stdin. Assumes STList and StdArc.
+ fartype_ = "stlist";
+ arctype_ = "standard";
+ return true;
} else if (IsSTTable(filename)) { // Check if STTable
ReadSTTableHeader(filename, &fsthdr);
fartype_ = "sttable";
@@ -52,6 +62,12 @@ class FarHeader {
fartype_ = "sttable";
arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
return true;
+ } else if (IsFst(filename)) { // Check if Fst
+ ifstream istrm(filename.c_str());
+ fsthdr.Read(istrm, filename);
+ fartype_ = "fst";
+ arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
+ return true;
}
return false;
}
@@ -61,8 +77,12 @@ class FarHeader {
string arctype_;
};
-enum FarType { FAR_DEFAULT = 0, FAR_STTABLE = 1, FAR_STLIST = 2,
- FAR_SSTABLE = 3 };
+enum FarType {
+ FAR_DEFAULT = 0,
+ FAR_STTABLE = 1,
+ FAR_STLIST = 2,
+ FAR_FST = 3,
+};
// This class creates an archive of FSTs.
template <class A>
@@ -153,7 +173,7 @@ class STTableFarWriter : public FarWriter<A> {
public:
typedef A Arc;
- static STTableFarWriter *Create(const string filename) {
+ static STTableFarWriter *Create(const string &filename) {
STTableWriter<Fst<A>, FstWriter<A> > *writer =
STTableWriter<Fst<A>, FstWriter<A> >::Create(filename);
return new STTableFarWriter(writer);
@@ -183,7 +203,7 @@ class STListFarWriter : public FarWriter<A> {
public:
typedef A Arc;
- static STListFarWriter *Create(const string filename) {
+ static STListFarWriter *Create(const string &filename) {
STListWriter<Fst<A>, FstWriter<A> > *writer =
STListWriter<Fst<A>, FstWriter<A> >::Create(filename);
return new STListFarWriter(writer);
@@ -209,6 +229,43 @@ class STListFarWriter : public FarWriter<A> {
template <class A>
+class FstFarWriter : public FarWriter<A> {
+ public:
+ typedef A Arc;
+
+ explicit FstFarWriter(const string &filename)
+ : filename_(filename), error_(false), written_(false) {}
+
+ static FstFarWriter *Create(const string &filename) {
+ return new FstFarWriter(filename);
+ }
+
+ void Add(const string &key, const Fst<A> &fst) {
+ if (written_) {
+ LOG(WARNING) << "FstFarWriter::Add: only one Fst supported,"
+ << " subsequent entries discarded.";
+ } else {
+ error_ = !fst.Write(filename_);
+ written_ = true;
+ }
+ }
+
+ FarType Type() const { return FAR_FST; }
+
+ bool Error() const { return error_; }
+
+ ~FstFarWriter() {}
+
+ private:
+ string filename_;
+ bool error_;
+ bool written_;
+
+ DISALLOW_COPY_AND_ASSIGN(FstFarWriter);
+};
+
+
+template <class A>
FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) {
switch(type) {
case FAR_DEFAULT:
@@ -220,6 +277,9 @@ FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) {
case FAR_STLIST:
return STListFarWriter<A>::Create(filename);
break;
+ case FAR_FST:
+ return FstFarWriter<A>::Create(filename);
+ break;
default:
LOG(ERROR) << "FarWriter::Create: unknown far type";
return 0;
@@ -331,6 +391,114 @@ class STListFarReader : public FarReader<A> {
DISALLOW_COPY_AND_ASSIGN(STListFarReader);
};
+template <class A>
+class FstFarReader : public FarReader<A> {
+ public:
+ typedef A Arc;
+
+ static FstFarReader *Open(const string &filename) {
+ vector<string> filenames;
+ filenames.push_back(filename);
+ return new FstFarReader<A>(filenames);
+ }
+
+ static FstFarReader *Open(const vector<string> &filenames) {
+ return new FstFarReader<A>(filenames);
+ }
+
+ FstFarReader(const vector<string> &filenames)
+ : keys_(filenames), has_stdin_(false), pos_(0), fst_(0), error_(false) {
+ sort(keys_.begin(), keys_.end());
+ streams_.resize(keys_.size(), 0);
+ for (size_t i = 0; i < keys_.size(); ++i) {
+ if (keys_[i].empty()) {
+ if (!has_stdin_) {
+ streams_[i] = &cin;
+ //sources_[i] = "stdin";
+ has_stdin_ = true;
+ } else {
+ FSTERROR() << "FstFarReader::FstFarReader: stdin should only "
+ << "appear once in the input file list.";
+ error_ = true;
+ return;
+ }
+ } else {
+ streams_[i] = new ifstream(
+ keys_[i].c_str(), ifstream::in | ifstream::binary);
+ }
+ }
+ if (pos_ >= keys_.size()) return;
+ ReadFst();
+ }
+
+ void Reset() {
+ if (has_stdin_) {
+ FSTERROR() << "FstFarReader::Reset: operation not supported on stdin";
+ error_ = true;
+ return;
+ }
+ pos_ = 0;
+ ReadFst();
+ }
+
+ bool Find(const string &key) {
+ if (has_stdin_) {
+ FSTERROR() << "FstFarReader::Find: operation not supported on stdin";
+ error_ = true;
+ return false;
+ }
+ pos_ = 0;//TODO
+ ReadFst();
+ return true;
+ }
+
+ bool Done() const { return error_ || pos_ >= keys_.size(); }
+
+ void Next() {
+ ++pos_;
+ ReadFst();
+ }
+
+ const string &GetKey() const {
+ return keys_[pos_];
+ }
+
+ const Fst<A> &GetFst() const {
+ return *fst_;
+ }
+
+ FarType Type() const { return FAR_FST; }
+
+ bool Error() const { return error_; }
+
+ ~FstFarReader() {
+ if (fst_) delete fst_;
+ for (size_t i = 0; i < keys_.size(); ++i)
+ delete streams_[i];
+ }
+
+ private:
+ void ReadFst() {
+ if (fst_) delete fst_;
+ if (pos_ >= keys_.size()) return;
+ streams_[pos_]->seekg(0);
+ fst_ = Fst<A>::Read(*streams_[pos_], FstReadOptions());
+ if (!fst_) {
+ FSTERROR() << "FstFarReader: error reading Fst from: " << keys_[pos_];
+ error_ = true;
+ }
+ }
+
+ private:
+ vector<string> keys_;
+ vector<istream*> streams_;
+ bool has_stdin_;
+ size_t pos_;
+ mutable Fst<A> *fst_;
+ mutable bool error_;
+
+ DISALLOW_COPY_AND_ASSIGN(FstFarReader);
+};
template <class A>
FarReader<A> *FarReader<A>::Open(const string &filename) {
@@ -340,6 +508,8 @@ FarReader<A> *FarReader<A>::Open(const string &filename) {
return STTableFarReader<A>::Open(filename);
else if (IsSTList(filename))
return STListFarReader<A>::Open(filename);
+ else if (IsFst(filename))
+ return FstFarReader<A>::Open(filename);
return 0;
}
@@ -352,6 +522,8 @@ FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) {
return STTableFarReader<A>::Open(filenames);
else if (!filenames.empty() && IsSTList(filenames[0]))
return STListFarReader<A>::Open(filenames);
+ else if (!filenames.empty() && IsFst(filenames[0]))
+ return FstFarReader<A>::Open(filenames);
return 0;
}
diff --git a/src/include/fst/extensions/far/farscript.h b/src/include/fst/extensions/far/farscript.h
index 9c3b1ca..3a9c145 100644
--- a/src/include/fst/extensions/far/farscript.h
+++ b/src/include/fst/extensions/far/farscript.h
@@ -27,6 +27,7 @@ using std::vector;
#include <fst/script/arg-packs.h>
#include <fst/extensions/far/compile-strings.h>
#include <fst/extensions/far/create.h>
+#include <fst/extensions/far/equal.h>
#include <fst/extensions/far/extract.h>
#include <fst/extensions/far/info.h>
#include <fst/extensions/far/print-strings.h>
@@ -51,6 +52,8 @@ struct FarCompileStringsArgs {
const FarTokenType tt;
const string &symbols_fname;
const string &unknown_symbol;
+ const bool keep_symbols;
+ const bool initial_symbols;
const bool allow_negative_labels;
const bool file_list_input;
const string &key_prefix;
@@ -65,6 +68,8 @@ struct FarCompileStringsArgs {
FarTokenType tt,
const string &symbols_fname,
const string &unknown_symbol,
+ bool keep_symbols,
+ bool initial_symbols,
bool allow_negative_labels,
bool file_list_input,
const string &key_prefix,
@@ -72,6 +77,7 @@ struct FarCompileStringsArgs {
in_fnames(in_fnames), out_fname(out_fname), fst_type(fst_type),
far_type(far_type), generate_keys(generate_keys), fet(fet),
tt(tt), symbols_fname(symbols_fname), unknown_symbol(unknown_symbol),
+ keep_symbols(keep_symbols), initial_symbols(initial_symbols),
allow_negative_labels(allow_negative_labels),
file_list_input(file_list_input), key_prefix(key_prefix),
key_suffix(key_suffix) { }
@@ -82,7 +88,8 @@ void FarCompileStrings(FarCompileStringsArgs *args) {
fst::FarCompileStrings<Arc>(
args->in_fnames, args->out_fname, args->fst_type, args->far_type,
args->generate_keys, args->fet, args->tt, args->symbols_fname,
- args->unknown_symbol, args->allow_negative_labels, args->file_list_input,
+ args->unknown_symbol, args->keep_symbols, args->initial_symbols,
+ args->allow_negative_labels, args->file_list_input,
args->key_prefix, args->key_suffix);
}
@@ -97,6 +104,8 @@ void FarCompileStrings(
FarTokenType tt,
const string &symbols_fname,
const string &unknown_symbol,
+ bool keep_symbols,
+ bool initial_symbols,
bool allow_negative_labels,
bool file_list_input,
const string &key_prefix,
@@ -143,6 +152,25 @@ void FarCreate(const vector<string> &in_fnames,
const string &key_suffix);
+typedef args::Package<const string &, const string &, float,
+ const string &, const string &> FarEqualInnerArgs;
+typedef args::WithReturnValue<bool, FarEqualInnerArgs> FarEqualArgs;
+
+template <class Arc>
+void FarEqual(FarEqualArgs *args) {
+ args->retval = fst::FarEqual<Arc>(
+ args->args.arg1, args->args.arg2, args->args.arg3,
+ args->args.arg4, args->args.arg5);
+}
+
+bool FarEqual(const string &filename1,
+ const string &filename2,
+ const string &arc_type,
+ float delta = kDelta,
+ const string &begin_key = string(),
+ const string &end_key = string());
+
+
typedef args::Package<const vector<string> &, int32,
const string&, const string&, const string&,
const string&> FarExtractArgs;
@@ -180,7 +208,9 @@ struct FarPrintStringsArgs {
const string &begin_key;
const string &end_key;
const bool print_key;
+ const bool print_weight;
const string &symbols_fname;
+ const bool initial_symbols;
const int32 generate_filenames;
const string &filename_prefix;
const string &filename_suffix;
@@ -188,12 +218,14 @@ struct FarPrintStringsArgs {
FarPrintStringsArgs(
const vector<string> &ifilenames, const FarEntryType entry_type,
const FarTokenType token_type, const string &begin_key,
- const string &end_key, const bool print_key,
- const string &symbols_fname, const int32 generate_filenames,
+ const string &end_key, const bool print_key, const bool print_weight,
+ const string &symbols_fname, const bool initial_symbols,
+ const int32 generate_filenames,
const string &filename_prefix, const string &filename_suffix) :
ifilenames(ifilenames), entry_type(entry_type), token_type(token_type),
- begin_key(begin_key), end_key(end_key), print_key(print_key),
- symbols_fname(symbols_fname),
+ begin_key(begin_key), end_key(end_key),
+ print_key(print_key), print_weight(print_weight),
+ symbols_fname(symbols_fname), initial_symbols(initial_symbols),
generate_filenames(generate_filenames), filename_prefix(filename_prefix),
filename_suffix(filename_suffix) { }
};
@@ -202,9 +234,9 @@ template <class Arc>
void FarPrintStrings(FarPrintStringsArgs *args) {
fst::FarPrintStrings<Arc>(
args->ifilenames, args->entry_type, args->token_type,
- args->begin_key, args->end_key, args->print_key,
- args->symbols_fname, args->generate_filenames, args->filename_prefix,
- args->filename_suffix);
+ args->begin_key, args->end_key, args->print_key, args->print_weight,
+ args->symbols_fname, args->initial_symbols, args->generate_filenames,
+ args->filename_prefix, args->filename_suffix);
}
@@ -215,7 +247,9 @@ void FarPrintStrings(const vector<string> &ifilenames,
const string &begin_key,
const string &end_key,
const bool print_key,
+ const bool print_weight,
const string &symbols_fname,
+ const bool initial_symbols,
const int32 generate_filenames,
const string &filename_prefix,
const string &filename_suffix);
@@ -227,6 +261,7 @@ void FarPrintStrings(const vector<string> &ifilenames,
#define REGISTER_FST_FAR_OPERATIONS(ArcType) \
REGISTER_FST_OPERATION(FarCompileStrings, ArcType, FarCompileStringsArgs); \
REGISTER_FST_OPERATION(FarCreate, ArcType, FarCreateArgs); \
+ REGISTER_FST_OPERATION(FarEqual, ArcType, FarEqualArgs); \
REGISTER_FST_OPERATION(FarExtract, ArcType, FarExtractArgs); \
REGISTER_FST_OPERATION(FarInfo, ArcType, FarInfoArgs); \
REGISTER_FST_OPERATION(FarPrintStrings, ArcType, FarPrintStringsArgs)
diff --git a/src/include/fst/extensions/far/info.h b/src/include/fst/extensions/far/info.h
index f010546..100fe68 100644
--- a/src/include/fst/extensions/far/info.h
+++ b/src/include/fst/extensions/far/info.h
@@ -34,7 +34,7 @@ void CountStatesAndArcs(const Fst<Arc> &fst, size_t *nstate, size_t *narc) {
StateIterator<Fst<Arc> > siter(fst);
for (; !siter.Done(); siter.Next(), ++(*nstate)) {
ArcIterator<Fst<Arc> > aiter(fst, siter.Value());
- for (; !aiter.Done(); aiter.Next(), ++(*narc));
+ for (; !aiter.Done(); aiter.Next(), ++(*narc)) {}
}
}
diff --git a/src/include/fst/extensions/far/print-strings.h b/src/include/fst/extensions/far/print-strings.h
index aff1e51..dcc7351 100644
--- a/src/include/fst/extensions/far/print-strings.h
+++ b/src/include/fst/extensions/far/print-strings.h
@@ -27,17 +27,21 @@
using std::vector;
#include <fst/extensions/far/far.h>
+#include <fst/shortest-distance.h>
#include <fst/string.h>
+DECLARE_string(far_field_separator);
+
namespace fst {
template <class Arc>
void FarPrintStrings(
const vector<string> &ifilenames, const FarEntryType entry_type,
const FarTokenType far_token_type, const string &begin_key,
- const string &end_key, const bool print_key, const string &symbols_fname,
- const int32 generate_filenames, const string &filename_prefix,
- const string &filename_suffix) {
+ const string &end_key, const bool print_key, const bool print_weight,
+ const string &symbols_fname, const bool initial_symbols,
+ const int32 generate_filenames,
+ const string &filename_prefix, const string &filename_suffix) {
typename StringPrinter<Arc>::TokenType token_type;
if (far_token_type == FTT_SYMBOL) {
@@ -54,7 +58,9 @@ void FarPrintStrings(
const SymbolTable *syms = 0;
if (!symbols_fname.empty()) {
// allow negative flag?
- syms = SymbolTable::ReadText(symbols_fname, true);
+ SymbolTableTextOptions opts;
+ opts.allow_negative = true;
+ syms = SymbolTable::ReadText(symbols_fname, opts);
if (!syms) {
FSTERROR() << "FarPrintStrings: error reading symbol table: "
<< symbols_fname;
@@ -62,8 +68,6 @@ void FarPrintStrings(
}
}
- StringPrinter<Arc> string_printer(token_type, syms);
-
FarReader<Arc> *far_reader = FarReader<Arc>::Open(ifilenames);
if (!far_reader) return;
@@ -83,14 +87,21 @@ void FarPrintStrings(
okey = key;
const Fst<Arc> &fst = far_reader->GetFst();
+ if (i == 1 && initial_symbols && syms == 0 && fst.InputSymbols() != 0)
+ syms = fst.InputSymbols()->Copy();
string str;
VLOG(2) << "Handling key: " << key;
+ StringPrinter<Arc> string_printer(
+ token_type, syms ? syms : fst.InputSymbols());
string_printer(fst, &str);
if (entry_type == FET_LINE) {
if (print_key)
- cout << key << "\t";
- cout << str << endl;
+ cout << key << FLAGS_far_field_separator[0];
+ cout << str;
+ if (print_weight)
+ cout << FLAGS_far_field_separator[0] << ShortestDistance(fst);
+ cout << endl;
} else if (entry_type == FET_FILE) {
stringstream sstrm;
if (generate_filenames) {
@@ -117,6 +128,7 @@ void FarPrintStrings(
ostrm << "\n";
}
}
+ delete syms;
}
diff --git a/src/include/fst/extensions/far/stlist.h b/src/include/fst/extensions/far/stlist.h
index 4738181..1cdc80c 100644
--- a/src/include/fst/extensions/far/stlist.h
+++ b/src/include/fst/extensions/far/stlist.h
@@ -26,6 +26,7 @@
#include <iostream>
#include <fstream>
+#include <sstream>
#include <fst/util.h>
#include <algorithm>
@@ -58,7 +59,7 @@ class STListWriter {
explicit STListWriter(const string filename)
: stream_(
- filename.empty() ? &std::cout :
+ filename.empty() ? &cout :
new ofstream(filename.c_str(), ofstream::out | ofstream::binary)),
error_(false) {
WriteType(*stream_, kSTListMagicNumber);
@@ -92,7 +93,7 @@ class STListWriter {
~STListWriter() {
WriteType(*stream_, string());
- if (stream_ != &std::cout)
+ if (stream_ != &cout)
delete stream_;
}
@@ -127,7 +128,7 @@ class STListReader {
for (size_t i = 0; i < filenames.size(); ++i) {
if (filenames[i].empty()) {
if (!has_stdin) {
- streams_[i] = &std::cin;
+ streams_[i] = &cin;
sources_[i] = "stdin";
has_stdin = true;
} else {
@@ -177,7 +178,7 @@ class STListReader {
~STListReader() {
for (size_t i = 0; i < streams_.size(); ++i) {
- if (streams_[i] != &std::cin)
+ if (streams_[i] != &cin)
delete streams_[i];
}
if (entry_)
diff --git a/src/include/fst/extensions/far/sttable.h b/src/include/fst/extensions/far/sttable.h
index 3a03133..3ce0a4b 100644
--- a/src/include/fst/extensions/far/sttable.h
+++ b/src/include/fst/extensions/far/sttable.h
@@ -29,6 +29,7 @@
#include <algorithm>
#include <iostream>
#include <fstream>
+#include <sstream>
#include <fst/util.h>
namespace fst {