diff options
author | Alexander Gutkin <agutkin@google.com> | 2013-02-28 00:24:20 +0000 |
---|---|---|
committer | Alexander Gutkin <agutkin@google.com> | 2013-02-28 00:24:20 +0000 |
commit | 5b6dc79427b8f7eeb6a7ff68034ab8548ce670ea (patch) | |
tree | 19e17fc79b8873e66f211276d4dd169c480cede1 /src/lib | |
parent | 3da1eb108d36da35333b2d655202791af854996b (diff) | |
download | openfst-5b6dc79427b8f7eeb6a7ff68034ab8548ce670ea.tar.gz |
Bumped OpenFST implementation to openfst-1.3.3-CL41851770.
Updated OpenFST implementation to the most recent version
used by Greco3 (corresponds to nlp::fst exported at Perforce
CL 41851770).
In particular this version has an improved PDT support.
Change-Id: I5aadfc962297eef73922c67e7d57866f11ee7d81
Diffstat (limited to 'src/lib')
-rw-r--r-- | src/lib/Makefile.am | 4 | ||||
-rw-r--r-- | src/lib/Makefile.in | 51 | ||||
-rw-r--r-- | src/lib/compat.cc | 2 | ||||
-rw-r--r-- | src/lib/flags.cc | 2 | ||||
-rw-r--r-- | src/lib/fst.cc | 26 | ||||
-rw-r--r-- | src/lib/mapped-file.cc | 105 | ||||
-rw-r--r-- | src/lib/symbol-table.cc | 10 | ||||
-rw-r--r-- | src/lib/util.cc | 15 |
8 files changed, 179 insertions, 36 deletions
diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am index 9c3ef63..87c86ec 100644 --- a/src/lib/Makefile.am +++ b/src/lib/Makefile.am @@ -2,5 +2,5 @@ AM_CPPFLAGS = -I$(srcdir)/../include $(ICU_CPPFLAGS) lib_LTLIBRARIES = libfst.la libfst_la_SOURCES = compat.cc flags.cc fst.cc properties.cc \ -symbol-table.cc util.cc symbol-table-ops.cc -libfst_la_LDFLAGS = -version-info 0:0:0 +symbol-table.cc util.cc symbol-table-ops.cc mapped-file.cc +libfst_la_LDFLAGS = -version-info 1:0:0 diff --git a/src/lib/Makefile.in b/src/lib/Makefile.in index 69f3928..2e8c99f 100644 --- a/src/lib/Makefile.in +++ b/src/lib/Makefile.in @@ -1,9 +1,9 @@ -# Makefile.in generated by automake 1.11.1 from Makefile.am. +# Makefile.in generated by automake 1.11.3 from Makefile.am. # @configure_input@ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, -# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, -# Inc. +# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. @@ -37,10 +37,10 @@ host_triplet = @host@ subdir = src/lib DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_icu.m4 \ - $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \ - $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ - $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac +am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d @@ -69,11 +69,17 @@ am__nobase_list = $(am__nobase_strip_setup); \ am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } am__installdirs = "$(DESTDIR)$(libdir)" LTLIBRARIES = $(lib_LTLIBRARIES) libfst_la_LIBADD = am_libfst_la_OBJECTS = compat.lo flags.lo fst.lo properties.lo \ - symbol-table.lo util.lo symbol-table-ops.lo + symbol-table.lo util.lo symbol-table-ops.lo mapped-file.lo libfst_la_OBJECTS = $(am_libfst_la_OBJECTS) libfst_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ @@ -115,6 +121,7 @@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ @@ -124,11 +131,6 @@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ GREP = @GREP@ -ICU_CFLAGS = @ICU_CFLAGS@ -ICU_CONFIG = @ICU_CONFIG@ -ICU_CPPFLAGS = @ICU_CPPFLAGS@ -ICU_CXXFLAGS = @ICU_CXXFLAGS@ -ICU_LIBS = @ICU_LIBS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ @@ -143,6 +145,7 @@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ NM = @NM@ NMEDIT = @NMEDIT@ @@ -168,6 +171,7 @@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ @@ -202,7 +206,6 @@ libexecdir = @libexecdir@ libfstdir = @libfstdir@ localedir = @localedir@ localstatedir = @localstatedir@ -lt_ECHO = @lt_ECHO@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ @@ -221,9 +224,9 @@ top_srcdir = @top_srcdir@ AM_CPPFLAGS = -I$(srcdir)/../include $(ICU_CPPFLAGS) lib_LTLIBRARIES = libfst.la libfst_la_SOURCES = compat.cc flags.cc fst.cc properties.cc \ -symbol-table.cc util.cc symbol-table-ops.cc +symbol-table.cc util.cc symbol-table-ops.cc mapped-file.cc -libfst_la_LDFLAGS = -version-info 0:0:0 +libfst_la_LDFLAGS = -version-info 1:0:0 all: all-am .SUFFIXES: @@ -289,7 +292,7 @@ clean-libLTLIBRARIES: echo "rm -f \"$${dir}/so_locations\""; \ rm -f "$${dir}/so_locations"; \ done -libfst.la: $(libfst_la_OBJECTS) $(libfst_la_DEPENDENCIES) +libfst.la: $(libfst_la_OBJECTS) $(libfst_la_DEPENDENCIES) $(EXTRA_libfst_la_DEPENDENCIES) $(libfst_la_LINK) -rpath $(libdir) $(libfst_la_OBJECTS) $(libfst_la_LIBADD) $(LIBS) mostlyclean-compile: @@ -301,6 +304,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compat.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/flags.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fst.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mapped-file.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/properties.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/symbol-table-ops.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/symbol-table.Plo@am__quote@ @@ -432,10 +436,15 @@ install-am: all-am installcheck: installcheck-am install-strip: - $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ - install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ - `test -z '$(STRIP)' || \ - echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi mostlyclean-generic: clean-generic: diff --git a/src/lib/compat.cc b/src/lib/compat.cc index 2faabc5..7db8be1 100644 --- a/src/lib/compat.cc +++ b/src/lib/compat.cc @@ -33,7 +33,7 @@ void SplitToVector(char* full, const char* delim, vector<char*>* vec, bool omit_empty_strings) { char *p = full; while (p) { - if (p = strpbrk(full, delim)) + if ((p = strpbrk(full, delim))) p[0] = '\0'; if (!omit_empty_strings || full[0] != '\0') vec->push_back(full); diff --git a/src/lib/flags.cc b/src/lib/flags.cc index 222d66b..71be378 100644 --- a/src/lib/flags.cc +++ b/src/lib/flags.cc @@ -49,7 +49,7 @@ void SetFlags(const char *usage, int *argc, char ***argv, string val = ""; // split argval (arg=val) into arg and val - int pos = argval.find("="); + size_t pos = argval.find("="); if (pos != string::npos) { arg = argval.substr(0, pos); val = argval.substr(pos + 1); diff --git a/src/lib/fst.cc b/src/lib/fst.cc index 69bee35..f4bd95f 100644 --- a/src/lib/fst.cc +++ b/src/lib/fst.cc @@ -52,6 +52,9 @@ DEFINE_bool(fst_align, false, "Write FST data aligned where appropriate"); DEFINE_string(save_relabel_ipairs, "", "Save input relabel pairs to file"); DEFINE_string(save_relabel_opairs, "", "Save output relabel pairs to file"); +DEFINE_string(fst_read_mode, "read", + "Default file reading mode for mappable files"); + namespace fst { // Register VectorFst, ConstFst and EditFst for common arcs types @@ -164,4 +167,27 @@ bool FstHeader::Write(ostream &strm, const string &source) const { return true; } +FstReadOptions::FstReadOptions(const string& src, const FstHeader *hdr, + const SymbolTable* isym, const SymbolTable* osym) + : source(src), header(hdr), isymbols(isym), osymbols(osym) { + mode = ReadMode(FLAGS_fst_read_mode); +} + +FstReadOptions::FstReadOptions(const string& src, const SymbolTable* isym, + const SymbolTable* osym) + : source(src), header(0), isymbols(isym), osymbols(osym) { + mode = ReadMode(FLAGS_fst_read_mode); +} + +FstReadOptions::FileReadMode FstReadOptions::ReadMode(const string &mode) { + if (mode == "read") { + return READ; + } + if (mode == "map") { + return MAP; + } + LOG(ERROR) << "Unknown file read mode " << mode; + return READ; +} + } // namespace fst diff --git a/src/lib/mapped-file.cc b/src/lib/mapped-file.cc new file mode 100644 index 0000000..2868cdb --- /dev/null +++ b/src/lib/mapped-file.cc @@ -0,0 +1,105 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: sorenj@google.com (Jeffrey Sorensen) + +#include <fst/mapped-file.h> + +#include <errno.h> +#include <fcntl.h> + +namespace fst { + +// Alignment required for mapping structures (in bytes.) Regions of memory +// that are not aligned upon a 128 bit boundary will be read from the file +// instead. This is consistent with the alignment boundary set in the +// const and compact fst code. +const int MappedFile::kArchAlignment = 16; + +MappedFile::MappedFile(const MemoryRegion ®ion) : region_(region) { } + +MappedFile::~MappedFile() { + if (region_.size != 0) { + if (region_.mmap != NULL) { + VLOG(1) << "munmap'ed " << region_.size << " bytes at " << region_.mmap; + if (munmap(region_.mmap, region_.size) != 0) { + LOG(ERROR) << "failed to unmap region: "<< strerror(errno); + } + } else { + operator delete(region_.data); + } + } +} + +MappedFile* MappedFile::Allocate(size_t size) { + MemoryRegion region; + region.data = size == 0 ? NULL : operator new(size); + region.mmap = NULL; + region.size = size; + return new MappedFile(region); +} + +MappedFile* MappedFile::Borrow(void *data) { + MemoryRegion region; + region.data = data; + region.mmap = data; + region.size = 0; + return new MappedFile(region); +} + +MappedFile* MappedFile::Map(istream* s, const FstReadOptions &opts, + size_t size) { + std::streampos spos = s->tellg(); + if (opts.mode == FstReadOptions::MAP && spos >= 0 && + spos % kArchAlignment == 0) { + size_t pos = spos; + int fd = open(opts.source.c_str(), O_RDONLY); + if (fd != -1) { + int pagesize = getpagesize(); + off_t offset = pos % pagesize; + off_t upsize = size + offset; + void *map = mmap(0, upsize, PROT_READ, MAP_SHARED, fd, pos - offset); + char *data = reinterpret_cast<char*>(map); + if (close(fd) == 0 && map != MAP_FAILED) { + MemoryRegion region; + region.mmap = map; + region.size = upsize; + region.data = reinterpret_cast<void*>(data + offset); + MappedFile *mmf = new MappedFile(region); + s->seekg(pos + size, ios::beg); + if (s) { + VLOG(1) << "mmap'ed region of " << size << " at offset " << pos + << " from " << opts.source.c_str() << " to addr " << map; + return mmf; + } + delete mmf; + } else { + LOG(INFO) << "Mapping of file failed: " << strerror(errno); + } + } + } + // If all else fails resort to reading from file into allocated buffer. + if (opts.mode != FstReadOptions::READ) { + LOG(WARNING) << "File mapping at offset " << spos << " of file " + << opts.source << " could not be honored, reading instead."; + } + MappedFile* mf = Allocate(size); + if (!s->read(reinterpret_cast<char*>(mf->mutable_data()), size)) { + delete mf; + return NULL; + } + return mf; +} + +} // namespace fst diff --git a/src/lib/symbol-table.cc b/src/lib/symbol-table.cc index a195a7c..2484ef5 100644 --- a/src/lib/symbol-table.cc +++ b/src/lib/symbol-table.cc @@ -55,10 +55,11 @@ SymbolTableImpl* SymbolTableImpl::ReadText(istream &strm, continue; if (col.size() != 2) { LOG(ERROR) << "SymbolTable::ReadText: Bad number of columns (" - << col.size() << " skipping), " + << col.size() << "), " << "file = " << filename << ", line = " << nline << ":<" << line << ">"; - continue; + delete impl; + return 0; } const char *symbol = col[0]; const char *value = col[1]; @@ -67,9 +68,10 @@ SymbolTableImpl* SymbolTableImpl::ReadText(istream &strm, if (p < value + strlen(value) || (!opts.allow_negative && key < 0) || key == -1) { LOG(ERROR) << "SymbolTable::ReadText: Bad non-negative integer \"" - << value << "\" (skipping), " + << value << "\", " << "file = " << filename << ", line = " << nline; - continue; + delete impl; + return 0; } impl->AddSymbol(symbol, key); } diff --git a/src/lib/util.cc b/src/lib/util.cc index f754da5..ab877fd 100644 --- a/src/lib/util.cc +++ b/src/lib/util.cc @@ -21,6 +21,7 @@ #include <cctype> #include <string> #include <fst/util.h> +#include <fst/mapped-file.h> // Utility flag definitions @@ -31,7 +32,7 @@ DEFINE_bool(fst_error_fatal, true, namespace fst { int64 StrToInt64(const string &s, const string &src, size_t nline, - bool allow_negative = false, bool *error) { + bool allow_negative, bool *error) { int64 n; const char *cs = s.c_str(); char *p; @@ -59,15 +60,15 @@ void ConvertToLegalCSymbol(string *s) { // Skips over input characters to align to 'align' bytes. Returns // false if can't align. -bool AlignInput(istream &strm, int align) { +bool AlignInput(istream &strm) { char c; - for (int i = 0; i < align; ++i) { + for (int i = 0; i < MappedFile::kArchAlignment; ++i) { int64 pos = strm.tellg(); if (pos < 0) { LOG(ERROR) << "AlignInput: can't determine stream position"; return false; } - if (pos % align == 0) break; + if (pos % MappedFile::kArchAlignment == 0) break; strm.read(&c, 1); } return true; @@ -75,14 +76,14 @@ bool AlignInput(istream &strm, int align) { // Write null output characters to align to 'align' bytes. Returns // false if can't align. -bool AlignOutput(ostream &strm, int align) { - for (int i = 0; i < align; ++i) { +bool AlignOutput(ostream &strm) { + for (int i = 0; i < MappedFile::kArchAlignment; ++i) { int64 pos = strm.tellp(); if (pos < 0) { LOG(ERROR) << "AlignOutput: can't determine stream position"; return false; } - if (pos % align == 0) break; + if (pos % MappedFile::kArchAlignment == 0) break; strm.write("", 1); } return true; |