aboutsummaryrefslogtreecommitdiff
path: root/src/lib
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/Makefile.am4
-rw-r--r--src/lib/Makefile.in51
-rw-r--r--src/lib/compat.cc2
-rw-r--r--src/lib/flags.cc2
-rw-r--r--src/lib/fst.cc26
-rw-r--r--src/lib/mapped-file.cc105
-rw-r--r--src/lib/symbol-table.cc10
-rw-r--r--src/lib/util.cc15
8 files changed, 179 insertions, 36 deletions
diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am
index 9c3ef63..87c86ec 100644
--- a/src/lib/Makefile.am
+++ b/src/lib/Makefile.am
@@ -2,5 +2,5 @@ AM_CPPFLAGS = -I$(srcdir)/../include $(ICU_CPPFLAGS)
lib_LTLIBRARIES = libfst.la
libfst_la_SOURCES = compat.cc flags.cc fst.cc properties.cc \
-symbol-table.cc util.cc symbol-table-ops.cc
-libfst_la_LDFLAGS = -version-info 0:0:0
+symbol-table.cc util.cc symbol-table-ops.cc mapped-file.cc
+libfst_la_LDFLAGS = -version-info 1:0:0
diff --git a/src/lib/Makefile.in b/src/lib/Makefile.in
index 69f3928..2e8c99f 100644
--- a/src/lib/Makefile.in
+++ b/src/lib/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -37,10 +37,10 @@ host_triplet = @host@
subdir = src/lib
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_icu.m4 \
- $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
- $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
- $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \
+ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
@@ -69,11 +69,17 @@ am__nobase_list = $(am__nobase_strip_setup); \
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(libdir)"
LTLIBRARIES = $(lib_LTLIBRARIES)
libfst_la_LIBADD =
am_libfst_la_OBJECTS = compat.lo flags.lo fst.lo properties.lo \
- symbol-table.lo util.lo symbol-table-ops.lo
+ symbol-table.lo util.lo symbol-table-ops.lo mapped-file.lo
libfst_la_OBJECTS = $(am_libfst_la_OBJECTS)
libfst_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
@@ -115,6 +121,7 @@ CXXFLAGS = @CXXFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
@@ -124,11 +131,6 @@ EGREP = @EGREP@
EXEEXT = @EXEEXT@
FGREP = @FGREP@
GREP = @GREP@
-ICU_CFLAGS = @ICU_CFLAGS@
-ICU_CONFIG = @ICU_CONFIG@
-ICU_CPPFLAGS = @ICU_CPPFLAGS@
-ICU_CXXFLAGS = @ICU_CXXFLAGS@
-ICU_LIBS = @ICU_LIBS@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
@@ -143,6 +145,7 @@ LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
NM = @NM@
NMEDIT = @NMEDIT@
@@ -168,6 +171,7 @@ abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_CXX = @ac_ct_CXX@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
@@ -202,7 +206,6 @@ libexecdir = @libexecdir@
libfstdir = @libfstdir@
localedir = @localedir@
localstatedir = @localstatedir@
-lt_ECHO = @lt_ECHO@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
@@ -221,9 +224,9 @@ top_srcdir = @top_srcdir@
AM_CPPFLAGS = -I$(srcdir)/../include $(ICU_CPPFLAGS)
lib_LTLIBRARIES = libfst.la
libfst_la_SOURCES = compat.cc flags.cc fst.cc properties.cc \
-symbol-table.cc util.cc symbol-table-ops.cc
+symbol-table.cc util.cc symbol-table-ops.cc mapped-file.cc
-libfst_la_LDFLAGS = -version-info 0:0:0
+libfst_la_LDFLAGS = -version-info 1:0:0
all: all-am
.SUFFIXES:
@@ -289,7 +292,7 @@ clean-libLTLIBRARIES:
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libfst.la: $(libfst_la_OBJECTS) $(libfst_la_DEPENDENCIES)
+libfst.la: $(libfst_la_OBJECTS) $(libfst_la_DEPENDENCIES) $(EXTRA_libfst_la_DEPENDENCIES)
$(libfst_la_LINK) -rpath $(libdir) $(libfst_la_OBJECTS) $(libfst_la_LIBADD) $(LIBS)
mostlyclean-compile:
@@ -301,6 +304,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compat.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/flags.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fst.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mapped-file.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/properties.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/symbol-table-ops.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/symbol-table.Plo@am__quote@
@@ -432,10 +436,15 @@ install-am: all-am
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
diff --git a/src/lib/compat.cc b/src/lib/compat.cc
index 2faabc5..7db8be1 100644
--- a/src/lib/compat.cc
+++ b/src/lib/compat.cc
@@ -33,7 +33,7 @@ void SplitToVector(char* full, const char* delim, vector<char*>* vec,
bool omit_empty_strings) {
char *p = full;
while (p) {
- if (p = strpbrk(full, delim))
+ if ((p = strpbrk(full, delim)))
p[0] = '\0';
if (!omit_empty_strings || full[0] != '\0')
vec->push_back(full);
diff --git a/src/lib/flags.cc b/src/lib/flags.cc
index 222d66b..71be378 100644
--- a/src/lib/flags.cc
+++ b/src/lib/flags.cc
@@ -49,7 +49,7 @@ void SetFlags(const char *usage, int *argc, char ***argv,
string val = "";
// split argval (arg=val) into arg and val
- int pos = argval.find("=");
+ size_t pos = argval.find("=");
if (pos != string::npos) {
arg = argval.substr(0, pos);
val = argval.substr(pos + 1);
diff --git a/src/lib/fst.cc b/src/lib/fst.cc
index 69bee35..f4bd95f 100644
--- a/src/lib/fst.cc
+++ b/src/lib/fst.cc
@@ -52,6 +52,9 @@ DEFINE_bool(fst_align, false, "Write FST data aligned where appropriate");
DEFINE_string(save_relabel_ipairs, "", "Save input relabel pairs to file");
DEFINE_string(save_relabel_opairs, "", "Save output relabel pairs to file");
+DEFINE_string(fst_read_mode, "read",
+ "Default file reading mode for mappable files");
+
namespace fst {
// Register VectorFst, ConstFst and EditFst for common arcs types
@@ -164,4 +167,27 @@ bool FstHeader::Write(ostream &strm, const string &source) const {
return true;
}
+FstReadOptions::FstReadOptions(const string& src, const FstHeader *hdr,
+ const SymbolTable* isym, const SymbolTable* osym)
+ : source(src), header(hdr), isymbols(isym), osymbols(osym) {
+ mode = ReadMode(FLAGS_fst_read_mode);
+}
+
+FstReadOptions::FstReadOptions(const string& src, const SymbolTable* isym,
+ const SymbolTable* osym)
+ : source(src), header(0), isymbols(isym), osymbols(osym) {
+ mode = ReadMode(FLAGS_fst_read_mode);
+}
+
+FstReadOptions::FileReadMode FstReadOptions::ReadMode(const string &mode) {
+ if (mode == "read") {
+ return READ;
+ }
+ if (mode == "map") {
+ return MAP;
+ }
+ LOG(ERROR) << "Unknown file read mode " << mode;
+ return READ;
+}
+
} // namespace fst
diff --git a/src/lib/mapped-file.cc b/src/lib/mapped-file.cc
new file mode 100644
index 0000000..2868cdb
--- /dev/null
+++ b/src/lib/mapped-file.cc
@@ -0,0 +1,105 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: sorenj@google.com (Jeffrey Sorensen)
+
+#include <fst/mapped-file.h>
+
+#include <errno.h>
+#include <fcntl.h>
+
+namespace fst {
+
+// Alignment required for mapping structures (in bytes.) Regions of memory
+// that are not aligned upon a 128 bit boundary will be read from the file
+// instead. This is consistent with the alignment boundary set in the
+// const and compact fst code.
+const int MappedFile::kArchAlignment = 16;
+
+MappedFile::MappedFile(const MemoryRegion &region) : region_(region) { }
+
+MappedFile::~MappedFile() {
+ if (region_.size != 0) {
+ if (region_.mmap != NULL) {
+ VLOG(1) << "munmap'ed " << region_.size << " bytes at " << region_.mmap;
+ if (munmap(region_.mmap, region_.size) != 0) {
+ LOG(ERROR) << "failed to unmap region: "<< strerror(errno);
+ }
+ } else {
+ operator delete(region_.data);
+ }
+ }
+}
+
+MappedFile* MappedFile::Allocate(size_t size) {
+ MemoryRegion region;
+ region.data = size == 0 ? NULL : operator new(size);
+ region.mmap = NULL;
+ region.size = size;
+ return new MappedFile(region);
+}
+
+MappedFile* MappedFile::Borrow(void *data) {
+ MemoryRegion region;
+ region.data = data;
+ region.mmap = data;
+ region.size = 0;
+ return new MappedFile(region);
+}
+
+MappedFile* MappedFile::Map(istream* s, const FstReadOptions &opts,
+ size_t size) {
+ std::streampos spos = s->tellg();
+ if (opts.mode == FstReadOptions::MAP && spos >= 0 &&
+ spos % kArchAlignment == 0) {
+ size_t pos = spos;
+ int fd = open(opts.source.c_str(), O_RDONLY);
+ if (fd != -1) {
+ int pagesize = getpagesize();
+ off_t offset = pos % pagesize;
+ off_t upsize = size + offset;
+ void *map = mmap(0, upsize, PROT_READ, MAP_SHARED, fd, pos - offset);
+ char *data = reinterpret_cast<char*>(map);
+ if (close(fd) == 0 && map != MAP_FAILED) {
+ MemoryRegion region;
+ region.mmap = map;
+ region.size = upsize;
+ region.data = reinterpret_cast<void*>(data + offset);
+ MappedFile *mmf = new MappedFile(region);
+ s->seekg(pos + size, ios::beg);
+ if (s) {
+ VLOG(1) << "mmap'ed region of " << size << " at offset " << pos
+ << " from " << opts.source.c_str() << " to addr " << map;
+ return mmf;
+ }
+ delete mmf;
+ } else {
+ LOG(INFO) << "Mapping of file failed: " << strerror(errno);
+ }
+ }
+ }
+ // If all else fails resort to reading from file into allocated buffer.
+ if (opts.mode != FstReadOptions::READ) {
+ LOG(WARNING) << "File mapping at offset " << spos << " of file "
+ << opts.source << " could not be honored, reading instead.";
+ }
+ MappedFile* mf = Allocate(size);
+ if (!s->read(reinterpret_cast<char*>(mf->mutable_data()), size)) {
+ delete mf;
+ return NULL;
+ }
+ return mf;
+}
+
+} // namespace fst
diff --git a/src/lib/symbol-table.cc b/src/lib/symbol-table.cc
index a195a7c..2484ef5 100644
--- a/src/lib/symbol-table.cc
+++ b/src/lib/symbol-table.cc
@@ -55,10 +55,11 @@ SymbolTableImpl* SymbolTableImpl::ReadText(istream &strm,
continue;
if (col.size() != 2) {
LOG(ERROR) << "SymbolTable::ReadText: Bad number of columns ("
- << col.size() << " skipping), "
+ << col.size() << "), "
<< "file = " << filename << ", line = " << nline
<< ":<" << line << ">";
- continue;
+ delete impl;
+ return 0;
}
const char *symbol = col[0];
const char *value = col[1];
@@ -67,9 +68,10 @@ SymbolTableImpl* SymbolTableImpl::ReadText(istream &strm,
if (p < value + strlen(value) ||
(!opts.allow_negative && key < 0) || key == -1) {
LOG(ERROR) << "SymbolTable::ReadText: Bad non-negative integer \""
- << value << "\" (skipping), "
+ << value << "\", "
<< "file = " << filename << ", line = " << nline;
- continue;
+ delete impl;
+ return 0;
}
impl->AddSymbol(symbol, key);
}
diff --git a/src/lib/util.cc b/src/lib/util.cc
index f754da5..ab877fd 100644
--- a/src/lib/util.cc
+++ b/src/lib/util.cc
@@ -21,6 +21,7 @@
#include <cctype>
#include <string>
#include <fst/util.h>
+#include <fst/mapped-file.h>
// Utility flag definitions
@@ -31,7 +32,7 @@ DEFINE_bool(fst_error_fatal, true,
namespace fst {
int64 StrToInt64(const string &s, const string &src, size_t nline,
- bool allow_negative = false, bool *error) {
+ bool allow_negative, bool *error) {
int64 n;
const char *cs = s.c_str();
char *p;
@@ -59,15 +60,15 @@ void ConvertToLegalCSymbol(string *s) {
// Skips over input characters to align to 'align' bytes. Returns
// false if can't align.
-bool AlignInput(istream &strm, int align) {
+bool AlignInput(istream &strm) {
char c;
- for (int i = 0; i < align; ++i) {
+ for (int i = 0; i < MappedFile::kArchAlignment; ++i) {
int64 pos = strm.tellg();
if (pos < 0) {
LOG(ERROR) << "AlignInput: can't determine stream position";
return false;
}
- if (pos % align == 0) break;
+ if (pos % MappedFile::kArchAlignment == 0) break;
strm.read(&c, 1);
}
return true;
@@ -75,14 +76,14 @@ bool AlignInput(istream &strm, int align) {
// Write null output characters to align to 'align' bytes. Returns
// false if can't align.
-bool AlignOutput(ostream &strm, int align) {
- for (int i = 0; i < align; ++i) {
+bool AlignOutput(ostream &strm) {
+ for (int i = 0; i < MappedFile::kArchAlignment; ++i) {
int64 pos = strm.tellp();
if (pos < 0) {
LOG(ERROR) << "AlignOutput: can't determine stream position";
return false;
}
- if (pos % align == 0) break;
+ if (pos % MappedFile::kArchAlignment == 0) break;
strm.write("", 1);
}
return true;