// util.h // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Copyright 2005-2010 Google, Inc. // Author: riley@google.com (Michael Riley) // // \file // FST utility inline definitions. #ifndef FST_LIB_UTIL_H__ #define FST_LIB_UTIL_H__ #include using std::tr1::unordered_map; using std::tr1::unordered_multimap; #include using std::tr1::unordered_set; using std::tr1::unordered_multiset; #include #include #include #include #include #include using std::vector; #include #include #include #include #include // // UTILITY FOR ERROR HANDLING // DECLARE_bool(fst_error_fatal); #define FSTERROR() (FLAGS_fst_error_fatal ? LOG(FATAL) : LOG(ERROR)) namespace fst { // // UTILITIES FOR TYPE I/O // // Read some types from an input stream. // Generic case. template inline istream &ReadType(istream &strm, T *t) { return t->Read(strm); } // Fixed size, contiguous memory read. #define READ_POD_TYPE(T) \ inline istream &ReadType(istream &strm, T *t) { \ return strm.read(reinterpret_cast(t), sizeof(T)); \ } READ_POD_TYPE(bool); READ_POD_TYPE(char); READ_POD_TYPE(signed char); READ_POD_TYPE(unsigned char); READ_POD_TYPE(short); READ_POD_TYPE(unsigned short); READ_POD_TYPE(int); READ_POD_TYPE(unsigned int); READ_POD_TYPE(long); READ_POD_TYPE(unsigned long); READ_POD_TYPE(long long); READ_POD_TYPE(unsigned long long); READ_POD_TYPE(float); READ_POD_TYPE(double); // String case. inline istream &ReadType(istream &strm, string *s) { s->clear(); int32 ns = 0; strm.read(reinterpret_cast(&ns), sizeof(ns)); for (int i = 0; i < ns; ++i) { char c; strm.read(&c, 1); *s += c; } return strm; } // Pair case. template inline istream &ReadType(istream &strm, pair *p) { ReadType(strm, &p->first); ReadType(strm, &p->second); return strm; } template inline istream &ReadType(istream &strm, pair *p) { ReadType(strm, const_cast(&p->first)); ReadType(strm, &p->second); return strm; } // General case - no-op. template void StlReserve(C *c, int64 n) {} // Specialization for vectors. template void StlReserve(vector *c, int64 n) { c->reserve(n); } // STL sequence container. #define READ_STL_SEQ_TYPE(C) \ template \ inline istream &ReadType(istream &strm, C *c) { \ c->clear(); \ int64 n = 0; \ strm.read(reinterpret_cast(&n), sizeof(n)); \ StlReserve(c, n); \ for (ssize_t i = 0; i < n; ++i) { \ typename C::value_type value; \ ReadType(strm, &value); \ c->insert(c->end(), value); \ } \ return strm; \ } READ_STL_SEQ_TYPE(vector); READ_STL_SEQ_TYPE(list); // STL associative container. #define READ_STL_ASSOC_TYPE(C) \ template \ inline istream &ReadType(istream &strm, C *c) { \ c->clear(); \ int64 n = 0; \ strm.read(reinterpret_cast(&n), sizeof(n)); \ for (ssize_t i = 0; i < n; ++i) { \ typename C::value_type value; \ ReadType(strm, &value); \ c->insert(value); \ } \ return strm; \ } READ_STL_ASSOC_TYPE(set); READ_STL_ASSOC_TYPE(unordered_set); READ_STL_ASSOC_TYPE(map); READ_STL_ASSOC_TYPE(unordered_map); // Write some types to an output stream. // Generic case. template inline ostream &WriteType(ostream &strm, const T t) { t.Write(strm); return strm; } // Fixed size, contiguous memory write. #define WRITE_POD_TYPE(T) \ inline ostream &WriteType(ostream &strm, const T t) { \ return strm.write(reinterpret_cast(&t), sizeof(T)); \ } WRITE_POD_TYPE(bool); WRITE_POD_TYPE(char); WRITE_POD_TYPE(signed char); WRITE_POD_TYPE(unsigned char); WRITE_POD_TYPE(short); WRITE_POD_TYPE(unsigned short); WRITE_POD_TYPE(int); WRITE_POD_TYPE(unsigned int); WRITE_POD_TYPE(long); WRITE_POD_TYPE(unsigned long); WRITE_POD_TYPE(long long); WRITE_POD_TYPE(unsigned long long); WRITE_POD_TYPE(float); WRITE_POD_TYPE(double); // String case. inline ostream &WriteType(ostream &strm, const string &s) { int32 ns = s.size(); strm.write(reinterpret_cast(&ns), sizeof(ns)); return strm.write(s.data(), ns); } // Pair case. template inline ostream &WriteType(ostream &strm, const pair &p) { WriteType(strm, p.first); WriteType(strm, p.second); return strm; } // STL sequence container. #define WRITE_STL_SEQ_TYPE(C) \ template \ inline ostream &WriteType(ostream &strm, const C &c) { \ int64 n = c.size(); \ strm.write(reinterpret_cast(&n), sizeof(n)); \ for (typename C::const_iterator it = c.begin(); \ it != c.end(); ++it) \ WriteType(strm, *it); \ return strm; \ } WRITE_STL_SEQ_TYPE(vector); WRITE_STL_SEQ_TYPE(list); // STL associative container. #define WRITE_STL_ASSOC_TYPE(C) \ template \ inline ostream &WriteType(ostream &strm, const C &c) { \ int64 n = c.size(); \ strm.write(reinterpret_cast(&n), sizeof(n)); \ for (typename C::const_iterator it = c.begin(); \ it != c.end(); ++it) \ WriteType(strm, *it); \ return strm; \ } WRITE_STL_ASSOC_TYPE(set); WRITE_STL_ASSOC_TYPE(unordered_set); WRITE_STL_ASSOC_TYPE(map); WRITE_STL_ASSOC_TYPE(unordered_map); // Utilities for converting between int64 or Weight and string. int64 StrToInt64(const string &s, const string &src, size_t nline, bool allow_negative, bool *error = 0); template Weight StrToWeight(const string &s, const string &src, size_t nline) { Weight w; istringstream strm(s); strm >> w; if (!strm) { FSTERROR() << "StrToWeight: Bad weight = \"" << s << "\", source = " << src << ", line = " << nline; return Weight::NoWeight(); } return w; } void Int64ToStr(int64 n, string *s); template void WeightToStr(Weight w, string *s) { ostringstream strm; strm.precision(9); strm << w; s->append(strm.str().data(), strm.str().size()); } // Utilities for reading/writing label pairs // Returns true on success template bool ReadLabelPairs(const string& filename, vector >* pairs, bool allow_negative = false) { ifstream strm(filename.c_str()); if (!strm) { LOG(ERROR) << "ReadLabelPairs: Can't open file: " << filename; return false; } const int kLineLen = 8096; char line[kLineLen]; size_t nline = 0; pairs->clear(); while (strm.getline(line, kLineLen)) { ++nline; vector col; SplitToVector(line, "\n\t ", &col, true); if (col.size() == 0 || col[0][0] == '\0') // empty line continue; if (col.size() != 2) { LOG(ERROR) << "ReadLabelPairs: Bad number of columns, " << "file = " << filename << ", line = " << nline; return false; } bool err; Label frmlabel = StrToInt64(col[0], filename, nline, allow_negative, &err); if (err) return false; Label tolabel = StrToInt64(col[1], filename, nline, allow_negative, &err); if (err) return false; pairs->push_back(make_pair(frmlabel, tolabel)); } return true; } // Returns true on success template bool WriteLabelPairs(const string& filename, const vector >& pairs) { ostream *strm = &cout; if (!filename.empty()) { strm = new ofstream(filename.c_str()); if (!*strm) { LOG(ERROR) << "WriteLabelPairs: Can't open file: " << filename; return false; } } for (ssize_t n = 0; n < pairs.size(); ++n) *strm << pairs[n].first << "\t" << pairs[n].second << "\n"; if (!*strm) { LOG(ERROR) << "WriteLabelPairs: Write failed: " << (filename.empty() ? "standard output" : filename); return false; } if (strm != &cout) delete strm; return true; } // Utilities for converting a type name to a legal C symbol. void ConvertToLegalCSymbol(string *s); // // UTILITIES FOR STREAM I/O // bool AlignInput(istream &strm, int align); bool AlignOutput(ostream &strm, int align); // // UTILITIES FOR PROTOCOL BUFFER I/O // // An associative container for which testing membership is // faster than an STL set if members are restricted to an interval // that excludes most non-members. A 'Key' must have ==, !=, and < defined. // Element 'NoKey' should be a key that marks an uninitialized key and // is otherwise unused. 'Find()' returns an STL const_iterator to the match // found, otherwise it equals 'End()'. template class CompactSet { public: typedef typename set::const_iterator const_iterator; CompactSet() : min_key_(NoKey), max_key_(NoKey) { } CompactSet(const CompactSet &compact_set) : set_(compact_set.set_), min_key_(compact_set.min_key_), max_key_(compact_set.max_key_) { } void Insert(Key key) { set_.insert(key); if (min_key_ == NoKey || key < min_key_) min_key_ = key; if (max_key_ == NoKey || max_key_ < key) max_key_ = key; } void Clear() { set_.clear(); min_key_ = max_key_ = NoKey; } const_iterator Find(Key key) const { if (min_key_ == NoKey || key < min_key_ || max_key_ < key) return set_.end(); else return set_.find(key); } const_iterator Begin() const { return set_.begin(); } const_iterator End() const { return set_.end(); } private: set set_; Key min_key_; Key max_key_; void operator=(const CompactSet &); //disallow }; } // namespace fst #endif // FST_LIB_UTIL_H__