From f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2 Mon Sep 17 00:00:00 2001 From: Ian Hodson Date: Wed, 30 May 2012 21:27:06 +0100 Subject: Add openfst to external, as used by GoogleTTS Moved from GoogleTTS Change-Id: I6bc6bdadaa53bd0f810b88443339f6d899502cc8 --- src/include/fst/util.h | 409 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 409 insertions(+) create mode 100644 src/include/fst/util.h (limited to 'src/include/fst/util.h') diff --git a/src/include/fst/util.h b/src/include/fst/util.h new file mode 100644 index 0000000..87231e1 --- /dev/null +++ b/src/include/fst/util.h @@ -0,0 +1,409 @@ +// util.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: riley@google.com (Michael Riley) +// +// \file +// FST utility inline definitions. + +#ifndef FST_LIB_UTIL_H__ +#define FST_LIB_UTIL_H__ + +#include +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; +#include +using std::tr1::unordered_set; +using std::tr1::unordered_multiset; +#include +#include +#include +#include +#include +#include +using std::vector; + + +#include +#include + +#include +#include + +// +// UTILITY FOR ERROR HANDLING +// + +DECLARE_bool(fst_error_fatal); + +#define FSTERROR() (FLAGS_fst_error_fatal ? LOG(FATAL) : LOG(ERROR)) + +namespace fst { + +// +// UTILITIES FOR TYPE I/O +// + +// Read some types from an input stream. + +// Generic case. +template +inline istream &ReadType(istream &strm, T *t) { + return t->Read(strm); +} + +// Fixed size, contiguous memory read. +#define READ_POD_TYPE(T) \ +inline istream &ReadType(istream &strm, T *t) { \ + return strm.read(reinterpret_cast(t), sizeof(T)); \ +} + +READ_POD_TYPE(bool); +READ_POD_TYPE(char); +READ_POD_TYPE(signed char); +READ_POD_TYPE(unsigned char); +READ_POD_TYPE(short); +READ_POD_TYPE(unsigned short); +READ_POD_TYPE(int); +READ_POD_TYPE(unsigned int); +READ_POD_TYPE(long); +READ_POD_TYPE(unsigned long); +READ_POD_TYPE(long long); +READ_POD_TYPE(unsigned long long); +READ_POD_TYPE(float); +READ_POD_TYPE(double); + +// String case. +inline istream &ReadType(istream &strm, string *s) { + s->clear(); + int32 ns = 0; + strm.read(reinterpret_cast(&ns), sizeof(ns)); + for (int i = 0; i < ns; ++i) { + char c; + strm.read(&c, 1); + *s += c; + } + return strm; +} + +// Pair case. +template +inline istream &ReadType(istream &strm, pair *p) { + ReadType(strm, &p->first); + ReadType(strm, &p->second); + return strm; +} + +template +inline istream &ReadType(istream &strm, pair *p) { + ReadType(strm, const_cast(&p->first)); + ReadType(strm, &p->second); + return strm; +} + +// General case - no-op. +template +void StlReserve(C *c, int64 n) {} + +// Specialization for vectors. +template +void StlReserve(vector *c, int64 n) { + c->reserve(n); +} + +// STL sequence container. +#define READ_STL_SEQ_TYPE(C) \ +template \ +inline istream &ReadType(istream &strm, C *c) { \ + c->clear(); \ + int64 n = 0; \ + strm.read(reinterpret_cast(&n), sizeof(n)); \ + StlReserve(c, n); \ + for (ssize_t i = 0; i < n; ++i) { \ + typename C::value_type value; \ + ReadType(strm, &value); \ + c->insert(c->end(), value); \ + } \ + return strm; \ +} + +READ_STL_SEQ_TYPE(vector); +READ_STL_SEQ_TYPE(list); + +// STL associative container. +#define READ_STL_ASSOC_TYPE(C) \ +template \ +inline istream &ReadType(istream &strm, C *c) { \ + c->clear(); \ + int64 n = 0; \ + strm.read(reinterpret_cast(&n), sizeof(n)); \ + for (ssize_t i = 0; i < n; ++i) { \ + typename C::value_type value; \ + ReadType(strm, &value); \ + c->insert(value); \ + } \ + return strm; \ +} + +READ_STL_ASSOC_TYPE(set); +READ_STL_ASSOC_TYPE(unordered_set); +READ_STL_ASSOC_TYPE(map); +READ_STL_ASSOC_TYPE(unordered_map); + +// Write some types to an output stream. + +// Generic case. +template +inline ostream &WriteType(ostream &strm, const T t) { + t.Write(strm); + return strm; +} + +// Fixed size, contiguous memory write. +#define WRITE_POD_TYPE(T) \ +inline ostream &WriteType(ostream &strm, const T t) { \ + return strm.write(reinterpret_cast(&t), sizeof(T)); \ +} + +WRITE_POD_TYPE(bool); +WRITE_POD_TYPE(char); +WRITE_POD_TYPE(signed char); +WRITE_POD_TYPE(unsigned char); +WRITE_POD_TYPE(short); +WRITE_POD_TYPE(unsigned short); +WRITE_POD_TYPE(int); +WRITE_POD_TYPE(unsigned int); +WRITE_POD_TYPE(long); +WRITE_POD_TYPE(unsigned long); +WRITE_POD_TYPE(long long); +WRITE_POD_TYPE(unsigned long long); +WRITE_POD_TYPE(float); +WRITE_POD_TYPE(double); + +// String case. +inline ostream &WriteType(ostream &strm, const string &s) { + int32 ns = s.size(); + strm.write(reinterpret_cast(&ns), sizeof(ns)); + return strm.write(s.data(), ns); +} + +// Pair case. +template +inline ostream &WriteType(ostream &strm, const pair &p) { + WriteType(strm, p.first); + WriteType(strm, p.second); + return strm; +} + +// STL sequence container. +#define WRITE_STL_SEQ_TYPE(C) \ +template \ +inline ostream &WriteType(ostream &strm, const C &c) { \ + int64 n = c.size(); \ + strm.write(reinterpret_cast(&n), sizeof(n)); \ + for (typename C::const_iterator it = c.begin(); \ + it != c.end(); ++it) \ + WriteType(strm, *it); \ + return strm; \ +} + +WRITE_STL_SEQ_TYPE(vector); +WRITE_STL_SEQ_TYPE(list); + +// STL associative container. +#define WRITE_STL_ASSOC_TYPE(C) \ +template \ +inline ostream &WriteType(ostream &strm, const C &c) { \ + int64 n = c.size(); \ + strm.write(reinterpret_cast(&n), sizeof(n)); \ + for (typename C::const_iterator it = c.begin(); \ + it != c.end(); ++it) \ + WriteType(strm, *it); \ + return strm; \ +} + +WRITE_STL_ASSOC_TYPE(set); +WRITE_STL_ASSOC_TYPE(unordered_set); +WRITE_STL_ASSOC_TYPE(map); +WRITE_STL_ASSOC_TYPE(unordered_map); + +// Utilities for converting between int64 or Weight and string. + +int64 StrToInt64(const string &s, const string &src, size_t nline, + bool allow_negative, bool *error = 0); + +template +Weight StrToWeight(const string &s, const string &src, size_t nline) { + Weight w; + istringstream strm(s); + strm >> w; + if (!strm) { + FSTERROR() << "StrToWeight: Bad weight = \"" << s + << "\", source = " << src << ", line = " << nline; + return Weight::NoWeight(); + } + return w; +} + +void Int64ToStr(int64 n, string *s); + +template +void WeightToStr(Weight w, string *s) { + ostringstream strm; + strm.precision(9); + strm << w; + *s += strm.str(); +} + +// Utilities for reading/writing label pairs + +// Returns true on success +template +bool ReadLabelPairs(const string& filename, + vector >* pairs, + bool allow_negative = false) { + ifstream strm(filename.c_str()); + + if (!strm) { + LOG(ERROR) << "ReadLabelPairs: Can't open file: " << filename; + return false; + } + + const int kLineLen = 8096; + char line[kLineLen]; + size_t nline = 0; + + pairs->clear(); + while (strm.getline(line, kLineLen)) { + ++nline; + vector col; + SplitToVector(line, "\n\t ", &col, true); + if (col.size() == 0 || col[0][0] == '\0') // empty line + continue; + if (col.size() != 2) { + LOG(ERROR) << "ReadLabelPairs: Bad number of columns, " + << "file = " << filename << ", line = " << nline; + return false; + } + + bool err; + Label frmlabel = StrToInt64(col[0], filename, nline, allow_negative, &err); + if (err) return false; + Label tolabel = StrToInt64(col[1], filename, nline, allow_negative, &err); + if (err) return false; + pairs->push_back(make_pair(frmlabel, tolabel)); + } + return true; +} + +// Returns true on success +template +bool WriteLabelPairs(const string& filename, + const vector >& pairs) { + ostream *strm = &std::cout; + if (!filename.empty()) { + strm = new ofstream(filename.c_str()); + if (!*strm) { + LOG(ERROR) << "WriteLabelPairs: Can't open file: " << filename; + return false; + } + } + + for (ssize_t n = 0; n < pairs.size(); ++n) + *strm << pairs[n].first << "\t" << pairs[n].second << "\n"; + + if (!*strm) { + LOG(ERROR) << "WriteLabelPairs: Write failed: " + << (filename.empty() ? "standard output" : filename); + return false; + } + if (strm != &std::cout) + delete strm; + return true; +} + +// Utilities for converting a type name to a legal C symbol. + +void ConvertToLegalCSymbol(string *s); + + +// +// UTILITIES FOR STREAM I/O +// + +bool AlignInput(istream &strm, int align); +bool AlignOutput(ostream &strm, int align); + +// +// UTILITIES FOR PROTOCOL BUFFER I/O +// + + +// An associative container for which testing membership is +// faster than an STL set if members are restricted to an interval +// that excludes most non-members. A 'Key' must have ==, !=, and < defined. +// Element 'NoKey' should be a key that marks an uninitialized key and +// is otherwise unused. 'Find()' returns an STL const_iterator to the match +// found, otherwise it equals 'End()'. +template +class CompactSet { +public: + typedef typename set::const_iterator const_iterator; + + CompactSet() + : min_key_(NoKey), + max_key_(NoKey) { } + + CompactSet(const CompactSet &compact_set) + : set_(compact_set.set_), + min_key_(compact_set.min_key_), + max_key_(compact_set.max_key_) { } + + void Insert(Key key) { + set_.insert(key); + if (min_key_ == NoKey || key < min_key_) + min_key_ = key; + if (max_key_ == NoKey || max_key_ < key) + max_key_ = key; + } + + void Clear() { + set_.clear(); + min_key_ = max_key_ = NoKey; + } + + const_iterator Find(Key key) const { + if (min_key_ == NoKey || + key < min_key_ || max_key_ < key) + return set_.end(); + else + return set_.find(key); + } + + const_iterator Begin() const { return set_.begin(); } + + const_iterator End() const { return set_.end(); } + +private: + set set_; + Key min_key_; + Key max_key_; + + void operator=(const CompactSet &); //disallow +}; + +} // namespace fst + +#endif // FST_LIB_UTIL_H__ -- cgit v1.2.3