aboutsummaryrefslogtreecommitdiff
path: root/src/include/fst/util.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/fst/util.h')
-rw-r--r--src/include/fst/util.h409
1 files changed, 409 insertions, 0 deletions
diff --git a/src/include/fst/util.h b/src/include/fst/util.h
new file mode 100644
index 0000000..87231e1
--- /dev/null
+++ b/src/include/fst/util.h
@@ -0,0 +1,409 @@
+// util.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// FST utility inline definitions.
+
+#ifndef FST_LIB_UTIL_H__
+#define FST_LIB_UTIL_H__
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <unordered_set>
+using std::tr1::unordered_set;
+using std::tr1::unordered_multiset;
+#include <list>
+#include <map>
+#include <set>
+#include <sstream>
+#include <string>
+#include <vector>
+using std::vector;
+
+
+#include <fst/compat.h>
+#include <fst/types.h>
+
+#include <iostream>
+#include <fstream>
+
+//
+// UTILITY FOR ERROR HANDLING
+//
+
+DECLARE_bool(fst_error_fatal);
+
+#define FSTERROR() (FLAGS_fst_error_fatal ? LOG(FATAL) : LOG(ERROR))
+
+namespace fst {
+
+//
+// UTILITIES FOR TYPE I/O
+//
+
+// Read some types from an input stream.
+
+// Generic case.
+template <typename T>
+inline istream &ReadType(istream &strm, T *t) {
+ return t->Read(strm);
+}
+
+// Fixed size, contiguous memory read.
+#define READ_POD_TYPE(T) \
+inline istream &ReadType(istream &strm, T *t) { \
+ return strm.read(reinterpret_cast<char *>(t), sizeof(T)); \
+}
+
+READ_POD_TYPE(bool);
+READ_POD_TYPE(char);
+READ_POD_TYPE(signed char);
+READ_POD_TYPE(unsigned char);
+READ_POD_TYPE(short);
+READ_POD_TYPE(unsigned short);
+READ_POD_TYPE(int);
+READ_POD_TYPE(unsigned int);
+READ_POD_TYPE(long);
+READ_POD_TYPE(unsigned long);
+READ_POD_TYPE(long long);
+READ_POD_TYPE(unsigned long long);
+READ_POD_TYPE(float);
+READ_POD_TYPE(double);
+
+// String case.
+inline istream &ReadType(istream &strm, string *s) {
+ s->clear();
+ int32 ns = 0;
+ strm.read(reinterpret_cast<char *>(&ns), sizeof(ns));
+ for (int i = 0; i < ns; ++i) {
+ char c;
+ strm.read(&c, 1);
+ *s += c;
+ }
+ return strm;
+}
+
+// Pair case.
+template <typename S, typename T>
+inline istream &ReadType(istream &strm, pair<S, T> *p) {
+ ReadType(strm, &p->first);
+ ReadType(strm, &p->second);
+ return strm;
+}
+
+template <typename S, typename T>
+inline istream &ReadType(istream &strm, pair<const S, T> *p) {
+ ReadType(strm, const_cast<S *>(&p->first));
+ ReadType(strm, &p->second);
+ return strm;
+}
+
+// General case - no-op.
+template <typename C>
+void StlReserve(C *c, int64 n) {}
+
+// Specialization for vectors.
+template <typename S, typename T>
+void StlReserve(vector<S, T> *c, int64 n) {
+ c->reserve(n);
+}
+
+// STL sequence container.
+#define READ_STL_SEQ_TYPE(C) \
+template <typename S, typename T> \
+inline istream &ReadType(istream &strm, C<S, T> *c) { \
+ c->clear(); \
+ int64 n = 0; \
+ strm.read(reinterpret_cast<char *>(&n), sizeof(n)); \
+ StlReserve(c, n); \
+ for (ssize_t i = 0; i < n; ++i) { \
+ typename C<S, T>::value_type value; \
+ ReadType(strm, &value); \
+ c->insert(c->end(), value); \
+ } \
+ return strm; \
+}
+
+READ_STL_SEQ_TYPE(vector);
+READ_STL_SEQ_TYPE(list);
+
+// STL associative container.
+#define READ_STL_ASSOC_TYPE(C) \
+template <typename S, typename T, typename U> \
+inline istream &ReadType(istream &strm, C<S, T, U> *c) { \
+ c->clear(); \
+ int64 n = 0; \
+ strm.read(reinterpret_cast<char *>(&n), sizeof(n)); \
+ for (ssize_t i = 0; i < n; ++i) { \
+ typename C<S, T, U>::value_type value; \
+ ReadType(strm, &value); \
+ c->insert(value); \
+ } \
+ return strm; \
+}
+
+READ_STL_ASSOC_TYPE(set);
+READ_STL_ASSOC_TYPE(unordered_set);
+READ_STL_ASSOC_TYPE(map);
+READ_STL_ASSOC_TYPE(unordered_map);
+
+// Write some types to an output stream.
+
+// Generic case.
+template <typename T>
+inline ostream &WriteType(ostream &strm, const T t) {
+ t.Write(strm);
+ return strm;
+}
+
+// Fixed size, contiguous memory write.
+#define WRITE_POD_TYPE(T) \
+inline ostream &WriteType(ostream &strm, const T t) { \
+ return strm.write(reinterpret_cast<const char *>(&t), sizeof(T)); \
+}
+
+WRITE_POD_TYPE(bool);
+WRITE_POD_TYPE(char);
+WRITE_POD_TYPE(signed char);
+WRITE_POD_TYPE(unsigned char);
+WRITE_POD_TYPE(short);
+WRITE_POD_TYPE(unsigned short);
+WRITE_POD_TYPE(int);
+WRITE_POD_TYPE(unsigned int);
+WRITE_POD_TYPE(long);
+WRITE_POD_TYPE(unsigned long);
+WRITE_POD_TYPE(long long);
+WRITE_POD_TYPE(unsigned long long);
+WRITE_POD_TYPE(float);
+WRITE_POD_TYPE(double);
+
+// String case.
+inline ostream &WriteType(ostream &strm, const string &s) {
+ int32 ns = s.size();
+ strm.write(reinterpret_cast<const char *>(&ns), sizeof(ns));
+ return strm.write(s.data(), ns);
+}
+
+// Pair case.
+template <typename S, typename T>
+inline ostream &WriteType(ostream &strm, const pair<S, T> &p) {
+ WriteType(strm, p.first);
+ WriteType(strm, p.second);
+ return strm;
+}
+
+// STL sequence container.
+#define WRITE_STL_SEQ_TYPE(C) \
+template <typename S, typename T> \
+inline ostream &WriteType(ostream &strm, const C<S, T> &c) { \
+ int64 n = c.size(); \
+ strm.write(reinterpret_cast<char *>(&n), sizeof(n)); \
+ for (typename C<S, T>::const_iterator it = c.begin(); \
+ it != c.end(); ++it) \
+ WriteType(strm, *it); \
+ return strm; \
+}
+
+WRITE_STL_SEQ_TYPE(vector);
+WRITE_STL_SEQ_TYPE(list);
+
+// STL associative container.
+#define WRITE_STL_ASSOC_TYPE(C) \
+template <typename S, typename T, typename U> \
+inline ostream &WriteType(ostream &strm, const C<S, T, U> &c) { \
+ int64 n = c.size(); \
+ strm.write(reinterpret_cast<char *>(&n), sizeof(n)); \
+ for (typename C<S, T, U>::const_iterator it = c.begin(); \
+ it != c.end(); ++it) \
+ WriteType(strm, *it); \
+ return strm; \
+}
+
+WRITE_STL_ASSOC_TYPE(set);
+WRITE_STL_ASSOC_TYPE(unordered_set);
+WRITE_STL_ASSOC_TYPE(map);
+WRITE_STL_ASSOC_TYPE(unordered_map);
+
+// Utilities for converting between int64 or Weight and string.
+
+int64 StrToInt64(const string &s, const string &src, size_t nline,
+ bool allow_negative, bool *error = 0);
+
+template <typename Weight>
+Weight StrToWeight(const string &s, const string &src, size_t nline) {
+ Weight w;
+ istringstream strm(s);
+ strm >> w;
+ if (!strm) {
+ FSTERROR() << "StrToWeight: Bad weight = \"" << s
+ << "\", source = " << src << ", line = " << nline;
+ return Weight::NoWeight();
+ }
+ return w;
+}
+
+void Int64ToStr(int64 n, string *s);
+
+template <typename Weight>
+void WeightToStr(Weight w, string *s) {
+ ostringstream strm;
+ strm.precision(9);
+ strm << w;
+ *s += strm.str();
+}
+
+// Utilities for reading/writing label pairs
+
+// Returns true on success
+template <typename Label>
+bool ReadLabelPairs(const string& filename,
+ vector<pair<Label, Label> >* pairs,
+ bool allow_negative = false) {
+ ifstream strm(filename.c_str());
+
+ if (!strm) {
+ LOG(ERROR) << "ReadLabelPairs: Can't open file: " << filename;
+ return false;
+ }
+
+ const int kLineLen = 8096;
+ char line[kLineLen];
+ size_t nline = 0;
+
+ pairs->clear();
+ while (strm.getline(line, kLineLen)) {
+ ++nline;
+ vector<char *> col;
+ SplitToVector(line, "\n\t ", &col, true);
+ if (col.size() == 0 || col[0][0] == '\0') // empty line
+ continue;
+ if (col.size() != 2) {
+ LOG(ERROR) << "ReadLabelPairs: Bad number of columns, "
+ << "file = " << filename << ", line = " << nline;
+ return false;
+ }
+
+ bool err;
+ Label frmlabel = StrToInt64(col[0], filename, nline, allow_negative, &err);
+ if (err) return false;
+ Label tolabel = StrToInt64(col[1], filename, nline, allow_negative, &err);
+ if (err) return false;
+ pairs->push_back(make_pair(frmlabel, tolabel));
+ }
+ return true;
+}
+
+// Returns true on success
+template <typename Label>
+bool WriteLabelPairs(const string& filename,
+ const vector<pair<Label, Label> >& pairs) {
+ ostream *strm = &std::cout;
+ if (!filename.empty()) {
+ strm = new ofstream(filename.c_str());
+ if (!*strm) {
+ LOG(ERROR) << "WriteLabelPairs: Can't open file: " << filename;
+ return false;
+ }
+ }
+
+ for (ssize_t n = 0; n < pairs.size(); ++n)
+ *strm << pairs[n].first << "\t" << pairs[n].second << "\n";
+
+ if (!*strm) {
+ LOG(ERROR) << "WriteLabelPairs: Write failed: "
+ << (filename.empty() ? "standard output" : filename);
+ return false;
+ }
+ if (strm != &std::cout)
+ delete strm;
+ return true;
+}
+
+// Utilities for converting a type name to a legal C symbol.
+
+void ConvertToLegalCSymbol(string *s);
+
+
+//
+// UTILITIES FOR STREAM I/O
+//
+
+bool AlignInput(istream &strm, int align);
+bool AlignOutput(ostream &strm, int align);
+
+//
+// UTILITIES FOR PROTOCOL BUFFER I/O
+//
+
+
+// An associative container for which testing membership is
+// faster than an STL set if members are restricted to an interval
+// that excludes most non-members. A 'Key' must have ==, !=, and < defined.
+// Element 'NoKey' should be a key that marks an uninitialized key and
+// is otherwise unused. 'Find()' returns an STL const_iterator to the match
+// found, otherwise it equals 'End()'.
+template <class Key, Key NoKey>
+class CompactSet {
+public:
+ typedef typename set<Key>::const_iterator const_iterator;
+
+ CompactSet()
+ : min_key_(NoKey),
+ max_key_(NoKey) { }
+
+ CompactSet(const CompactSet<Key, NoKey> &compact_set)
+ : set_(compact_set.set_),
+ min_key_(compact_set.min_key_),
+ max_key_(compact_set.max_key_) { }
+
+ void Insert(Key key) {
+ set_.insert(key);
+ if (min_key_ == NoKey || key < min_key_)
+ min_key_ = key;
+ if (max_key_ == NoKey || max_key_ < key)
+ max_key_ = key;
+ }
+
+ void Clear() {
+ set_.clear();
+ min_key_ = max_key_ = NoKey;
+ }
+
+ const_iterator Find(Key key) const {
+ if (min_key_ == NoKey ||
+ key < min_key_ || max_key_ < key)
+ return set_.end();
+ else
+ return set_.find(key);
+ }
+
+ const_iterator Begin() const { return set_.begin(); }
+
+ const_iterator End() const { return set_.end(); }
+
+private:
+ set<Key> set_;
+ Key min_key_;
+ Key max_key_;
+
+ void operator=(const CompactSet<Key, NoKey> &); //disallow
+};
+
+} // namespace fst
+
+#endif // FST_LIB_UTIL_H__