From 5b6dc79427b8f7eeb6a7ff68034ab8548ce670ea Mon Sep 17 00:00:00 2001 From: Alexander Gutkin Date: Thu, 28 Feb 2013 00:24:20 +0000 Subject: Bumped OpenFST implementation to openfst-1.3.3-CL41851770. Updated OpenFST implementation to the most recent version used by Greco3 (corresponds to nlp::fst exported at Perforce CL 41851770). In particular this version has an improved PDT support. Change-Id: I5aadfc962297eef73922c67e7d57866f11ee7d81 --- src/include/fst/util.h | 77 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 60 insertions(+), 17 deletions(-) (limited to 'src/include/fst/util.h') diff --git a/src/include/fst/util.h b/src/include/fst/util.h index 1f6046b..4eb8fba 100644 --- a/src/include/fst/util.h +++ b/src/include/fst/util.h @@ -268,17 +268,17 @@ void WeightToStr(Weight w, string *s) { s->append(strm.str().data(), strm.str().size()); } -// Utilities for reading/writing label pairs +// Utilities for reading/writing integer pairs (typically labels) // Returns true on success -template -bool ReadLabelPairs(const string& filename, - vector >* pairs, +template +bool ReadIntPairs(const string& filename, + vector >* pairs, bool allow_negative = false) { ifstream strm(filename.c_str()); if (!strm) { - LOG(ERROR) << "ReadLabelPairs: Can't open file: " << filename; + LOG(ERROR) << "ReadIntPairs: Can't open file: " << filename; return false; } @@ -291,33 +291,34 @@ bool ReadLabelPairs(const string& filename, ++nline; vector col; SplitToVector(line, "\n\t ", &col, true); - if (col.size() == 0 || col[0][0] == '\0') // empty line + // empty line or comment? + if (col.size() == 0 || col[0][0] == '\0' || col[0][0] == '#') continue; if (col.size() != 2) { - LOG(ERROR) << "ReadLabelPairs: Bad number of columns, " + LOG(ERROR) << "ReadIntPairs: Bad number of columns, " << "file = " << filename << ", line = " << nline; return false; } bool err; - Label frmlabel = StrToInt64(col[0], filename, nline, allow_negative, &err); + I i1 = StrToInt64(col[0], filename, nline, allow_negative, &err); if (err) return false; - Label tolabel = StrToInt64(col[1], filename, nline, allow_negative, &err); + I i2 = StrToInt64(col[1], filename, nline, allow_negative, &err); if (err) return false; - pairs->push_back(make_pair(frmlabel, tolabel)); + pairs->push_back(make_pair(i1, i2)); } return true; } // Returns true on success -template -bool WriteLabelPairs(const string& filename, - const vector >& pairs) { +template +bool WriteIntPairs(const string& filename, + const vector >& pairs) { ostream *strm = &cout; if (!filename.empty()) { strm = new ofstream(filename.c_str()); if (!*strm) { - LOG(ERROR) << "WriteLabelPairs: Can't open file: " << filename; + LOG(ERROR) << "WriteIntPairs: Can't open file: " << filename; return false; } } @@ -326,7 +327,7 @@ bool WriteLabelPairs(const string& filename, *strm << pairs[n].first << "\t" << pairs[n].second << "\n"; if (!*strm) { - LOG(ERROR) << "WriteLabelPairs: Write failed: " + LOG(ERROR) << "WriteIntPairs: Write failed: " << (filename.empty() ? "standard output" : filename); return false; } @@ -335,6 +336,21 @@ bool WriteLabelPairs(const string& filename, return true; } +// Utilities for reading/writing label pairs + +template +bool ReadLabelPairs(const string& filename, + vector >* pairs, + bool allow_negative = false) { + return ReadIntPairs(filename, pairs, allow_negative); +} + +template +bool WriteLabelPairs(const string& filename, + vector >& pairs) { + return WriteIntPairs(filename, pairs); +} + // Utilities for converting a type name to a legal C symbol. void ConvertToLegalCSymbol(string *s); @@ -344,8 +360,8 @@ void ConvertToLegalCSymbol(string *s); // UTILITIES FOR STREAM I/O // -bool AlignInput(istream &strm, int align); -bool AlignOutput(ostream &strm, int align); +bool AlignInput(istream &strm); +bool AlignOutput(ostream &strm); // // UTILITIES FOR PROTOCOL BUFFER I/O @@ -380,6 +396,17 @@ public: max_key_ = key; } + void Erase(Key key) { + set_.erase(key); + if (set_.empty()) { + min_key_ = max_key_ = NoKey; + } else if (key == min_key_) { + ++min_key_; + } else if (key == max_key_) { + --max_key_; + } + } + void Clear() { set_.clear(); min_key_ = max_key_ = NoKey; @@ -393,10 +420,26 @@ public: return set_.find(key); } + bool Member(Key key) const { + if (min_key_ == NoKey || key < min_key_ || max_key_ < key) { + return false; // out of range + } else if (min_key_ != NoKey && max_key_ + 1 == min_key_ + set_.size()) { + return true; // dense range + } else { + return set_.find(key) != set_.end(); + } + } + const_iterator Begin() const { return set_.begin(); } const_iterator End() const { return set_.end(); } + // All stored keys are greater than or equal to this value. + Key LowerBound() const { return min_key_; } + + // All stored keys are less than or equal to this value. + Key UpperBound() const { return max_key_; } + private: set set_; Key min_key_; -- cgit v1.2.3