diff options
Diffstat (limited to 'src/lib')
-rw-r--r-- | src/lib/Makefile.am | 6 | ||||
-rw-r--r-- | src/lib/Makefile.in | 540 | ||||
-rw-r--r-- | src/lib/compat.cc | 44 | ||||
-rw-r--r-- | src/lib/flags.cc | 103 | ||||
-rw-r--r-- | src/lib/fst.cc | 167 | ||||
-rw-r--r-- | src/lib/properties.cc | 427 | ||||
-rw-r--r-- | src/lib/symbol-table-ops.cc | 140 | ||||
-rw-r--r-- | src/lib/symbol-table.cc | 243 | ||||
-rw-r--r-- | src/lib/temp_Android.temp_mk | 20 | ||||
-rw-r--r-- | src/lib/util.cc | 92 |
10 files changed, 1782 insertions, 0 deletions
diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am new file mode 100644 index 0000000..9c3ef63 --- /dev/null +++ b/src/lib/Makefile.am @@ -0,0 +1,6 @@ +AM_CPPFLAGS = -I$(srcdir)/../include $(ICU_CPPFLAGS) + +lib_LTLIBRARIES = libfst.la +libfst_la_SOURCES = compat.cc flags.cc fst.cc properties.cc \ +symbol-table.cc util.cc symbol-table-ops.cc +libfst_la_LDFLAGS = -version-info 0:0:0 diff --git a/src/lib/Makefile.in b/src/lib/Makefile.in new file mode 100644 index 0000000..69f3928 --- /dev/null +++ b/src/lib/Makefile.in @@ -0,0 +1,540 @@ +# Makefile.in generated by automake 1.11.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, +# Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = src/lib +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_icu.m4 \ + $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \ + $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ + $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h \ + $(top_builddir)/src/include/fst/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__installdirs = "$(DESTDIR)$(libdir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +libfst_la_LIBADD = +am_libfst_la_OBJECTS = compat.lo flags.lo fst.lo properties.lo \ + symbol-table.lo util.lo symbol-table-ops.lo +libfst_la_OBJECTS = $(am_libfst_la_OBJECTS) +libfst_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ + $(CXXFLAGS) $(libfst_la_LDFLAGS) $(LDFLAGS) -o $@ +DEFAULT_INCLUDES = +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +CXXLD = $(CXX) +CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +SOURCES = $(libfst_la_SOURCES) +DIST_SOURCES = $(libfst_la_SOURCES) +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GREP = @GREP@ +ICU_CFLAGS = @ICU_CFLAGS@ +ICU_CONFIG = @ICU_CONFIG@ +ICU_CPPFLAGS = @ICU_CPPFLAGS@ +ICU_CXXFLAGS = @ICU_CXXFLAGS@ +ICU_LIBS = @ICU_LIBS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +libfstdir = @libfstdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lt_ECHO = @lt_ECHO@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CPPFLAGS = -I$(srcdir)/../include $(ICU_CPPFLAGS) +lib_LTLIBRARIES = libfst.la +libfst_la_SOURCES = compat.cc flags.cc fst.cc properties.cc \ +symbol-table.cc util.cc symbol-table-ops.cc + +libfst_la_LDFLAGS = -version-info 0:0:0 +all: all-am + +.SUFFIXES: +.SUFFIXES: .cc .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/lib/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/lib/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)" + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ + dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ + test "$$dir" != "$$p" || dir=.; \ + echo "rm -f \"$${dir}/so_locations\""; \ + rm -f "$${dir}/so_locations"; \ + done +libfst.la: $(libfst_la_OBJECTS) $(libfst_la_DEPENDENCIES) + $(libfst_la_LINK) -rpath $(libdir) $(libfst_la_OBJECTS) $(libfst_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compat.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/flags.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fst.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/properties.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/symbol-table-ops.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/symbol-table.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/util.Plo@am__quote@ + +.cc.o: +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $< + +.cc.obj: +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.cc.lo: +@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(LTCXXCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: + for dir in "$(DESTDIR)$(libdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-libLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ + clean-libLTLIBRARIES clean-libtool ctags distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am \ + install-libLTLIBRARIES install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags uninstall uninstall-am uninstall-libLTLIBRARIES + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/lib/compat.cc b/src/lib/compat.cc new file mode 100644 index 0000000..2faabc5 --- /dev/null +++ b/src/lib/compat.cc @@ -0,0 +1,44 @@ +// compat.cc +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: riley@google.com (Michael Riley) +// +// \file +// Google compatibility definitions. + +#include <cstring> +#include <fst/compat.h> + +using namespace std; + +void FailedNewHandler() { + cerr << "Memory allocation failed\n"; + exit(1); +} + +namespace fst { + +void SplitToVector(char* full, const char* delim, vector<char*>* vec, + bool omit_empty_strings) { + char *p = full; + while (p) { + if (p = strpbrk(full, delim)) + p[0] = '\0'; + if (!omit_empty_strings || full[0] != '\0') + vec->push_back(full); + if (p) + full = p + 1; + } +} +} // namespace fst diff --git a/src/lib/flags.cc b/src/lib/flags.cc new file mode 100644 index 0000000..d05fd73 --- /dev/null +++ b/src/lib/flags.cc @@ -0,0 +1,103 @@ +// flags.cc +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: riley@google.com (Michael Riley) +// +// \file +// Google-style flag handling definitions + +#include <cstring> + +#include <fst/compat.h> +#include <fst/flags.h> + +DEFINE_int32(v, 0, "verbose level"); +DEFINE_bool(help, false, "show usage information"); +DEFINE_string(tmpdir, "/tmp/", "temporary directory"); + +using namespace std; + +static string flag_usage; + +void SetFlags(const char *usage, int *argc, char ***argv, bool remove_flags) { + flag_usage = usage; + int index = 1; + for (; index < *argc; ++index) { + string argval = (*argv)[index]; + + if (argval[0] != '-' || argval == "-") + break; + while (argval[0] == '-') + argval = argval.substr(1); // remove initial '-'s + + string arg = argval; + string val = ""; + + // split argval (arg=val) into arg and val + int pos = argval.find("="); + if (pos != string::npos) { + arg = argval.substr(0, pos); + val = argval.substr(pos + 1); + } + + FlagRegister<bool> *bool_register = + FlagRegister<bool>::GetRegister(); + if (bool_register->SetFlag(arg, val)) + continue; + FlagRegister<string> *string_register = + FlagRegister<string>::GetRegister(); + if (string_register->SetFlag(arg, val)) + continue; + FlagRegister<int32> *int32_register = + FlagRegister<int32>::GetRegister(); + if (int32_register->SetFlag(arg, val)) + continue; + FlagRegister<int64> *int64_register = + FlagRegister<int64>::GetRegister(); + if (int64_register->SetFlag(arg, val)) + continue; + FlagRegister<double> *double_register = + FlagRegister<double>::GetRegister(); + if (double_register->SetFlag(arg, val)) + continue; + + LOG(FATAL) << "SetFlags: Bad option: " << (*argv)[index]; + } + + if (remove_flags) { + for (int i = 0; i < *argc - index; ++i) + (*argv)[i + 1] = (*argv)[i + index]; + *argc -= index - 1; + } + + if (FLAGS_help) { + ShowUsage(); + exit(1); + } +} + +void ShowUsage() { + cout << flag_usage << "\n"; + cout << " Flags Description:\n"; + FlagRegister<bool> *bool_register = FlagRegister<bool>::GetRegister(); + bool_register->ShowUsage(); + FlagRegister<string> *string_register = FlagRegister<string>::GetRegister(); + string_register->ShowUsage(); + FlagRegister<int32> *int32_register = FlagRegister<int32>::GetRegister(); + int32_register->ShowUsage(); + FlagRegister<int64> *int64_register = FlagRegister<int64>::GetRegister(); + int64_register->ShowUsage(); + FlagRegister<double> *double_register = FlagRegister<double>::GetRegister(); + double_register->ShowUsage(); +} diff --git a/src/lib/fst.cc b/src/lib/fst.cc new file mode 100644 index 0000000..69bee35 --- /dev/null +++ b/src/lib/fst.cc @@ -0,0 +1,167 @@ +// fst.cc + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: riley@google.com (Michael Riley) +// +// \file +// FST definitions. + +#include <fst/fst.h> + +// Include these so they are registered +#include <fst/compact-fst.h> +#include <fst/const-fst.h> +#include <fst/matcher-fst.h> +#include <fst/vector-fst.h> +#include <fst/edit-fst.h> + +// FST flag definitions + +DEFINE_bool(fst_verify_properties, false, + "Verify fst properties queried by TestProperties"); + +DEFINE_string(fst_weight_separator, ",", + "Character separator between printed composite weights; " + "must be a single character"); + +DEFINE_string(fst_weight_parentheses, "", + "Characters enclosing the first weight of a printed composite " + "weight (e.g. pair weight, tuple weight and derived classes) to " + "ensure proper I/O of nested composite weights; " + "must have size 0 (none) or 2 (open and close parenthesis)"); + +DEFINE_bool(fst_default_cache_gc, true, "Enable garbage collection of cache"); + +DEFINE_int64(fst_default_cache_gc_limit, 1<<20LL, + "Cache byte size that triggers garbage collection"); + +DEFINE_bool(fst_align, false, "Write FST data aligned where appropriate"); + +DEFINE_string(save_relabel_ipairs, "", "Save input relabel pairs to file"); +DEFINE_string(save_relabel_opairs, "", "Save output relabel pairs to file"); + +namespace fst { + +// Register VectorFst, ConstFst and EditFst for common arcs types +REGISTER_FST(VectorFst, StdArc); +REGISTER_FST(VectorFst, LogArc); +REGISTER_FST(VectorFst, Log64Arc); +REGISTER_FST(ConstFst, StdArc); +REGISTER_FST(ConstFst, LogArc); +REGISTER_FST(ConstFst, Log64Arc); +REGISTER_FST(EditFst, StdArc); +REGISTER_FST(EditFst, LogArc); +REGISTER_FST(EditFst, Log64Arc); + +// Register CompactFst for common arcs with the default (uint32) size type +static FstRegisterer< + CompactFst<StdArc, StringCompactor<StdArc> > > +CompactFst_StdArc_StringCompactor_registerer; +static FstRegisterer< + CompactFst<LogArc, StringCompactor<LogArc> > > +CompactFst_LogArc_StringCompactor_registerer; +static FstRegisterer< + CompactFst<StdArc, WeightedStringCompactor<StdArc> > > +CompactFst_StdArc_WeightedStringCompactor_registerer; +static FstRegisterer< + CompactFst<LogArc, WeightedStringCompactor<LogArc> > > +CompactFst_LogArc_WeightedStringCompactor_registerer; +static FstRegisterer< + CompactFst<StdArc, AcceptorCompactor<StdArc> > > +CompactFst_StdArc_AcceptorCompactor_registerer; +static FstRegisterer< + CompactFst<LogArc, AcceptorCompactor<LogArc> > > +CompactFst_LogArc_AcceptorCompactor_registerer; +static FstRegisterer< + CompactFst<StdArc, UnweightedCompactor<StdArc> > > +CompactFst_StdArc_UnweightedCompactor_registerer; +static FstRegisterer< + CompactFst<LogArc, UnweightedCompactor<LogArc> > > +CompactFst_LogArc_UnweightedCompactor_registerer; +static FstRegisterer< + CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> > > +CompactFst_StdArc_UnweightedAcceptorCompactor_registerer; +static FstRegisterer< + CompactFst<LogArc, UnweightedAcceptorCompactor<LogArc> > > +CompactFst_LogArc_UnweightedAcceptorCompactor_registerer; + +// Fst type definitions for lookahead Fsts. +extern const char arc_lookahead_fst_type[] = "arc_lookahead"; +extern const char ilabel_lookahead_fst_type[] = "ilabel_lookahead"; +extern const char olabel_lookahead_fst_type[] = "olabel_lookahead"; + +// Identifies stream data as an FST (and its endianity) +static const int32 kFstMagicNumber = 2125659606; + +// Check for Fst magic number in stream, to indicate +// caller function that the stream content is an Fst header; +bool IsFstHeader(istream &strm, const string &source) { + int64 pos = strm.tellg(); + bool match = true; + int32 magic_number = 0; + ReadType(strm, &magic_number); + if (magic_number != kFstMagicNumber + ) { + match = false; + } + strm.seekg(pos); + return match; +} + +// Check Fst magic number and read in Fst header. +// If rewind = true, reposition stream to before call (if possible). +bool FstHeader::Read(istream &strm, const string &source, bool rewind) { + int64 pos = 0; + if (rewind) pos = strm.tellg(); + int32 magic_number = 0; + ReadType(strm, &magic_number); + if (magic_number != kFstMagicNumber + ) { + LOG(ERROR) << "FstHeader::Read: Bad FST header: " << source; + if (rewind) strm.seekg(pos); + return false; + } + + ReadType(strm, &fsttype_); + ReadType(strm, &arctype_); + ReadType(strm, &version_); + ReadType(strm, &flags_); + ReadType(strm, &properties_); + ReadType(strm, &start_); + ReadType(strm, &numstates_); + ReadType(strm, &numarcs_); + if (!strm) { + LOG(ERROR) << "FstHeader::Read: read failed: " << source; + return false; + } + if (rewind) strm.seekg(pos); + return true; +} + +// Write Fst magic number and Fst header. +bool FstHeader::Write(ostream &strm, const string &source) const { + WriteType(strm, kFstMagicNumber); + WriteType(strm, fsttype_); + WriteType(strm, arctype_); + WriteType(strm, version_); + WriteType(strm, flags_); + WriteType(strm, properties_); + WriteType(strm, start_); + WriteType(strm, numstates_); + WriteType(strm, numarcs_); + return true; +} + +} // namespace fst diff --git a/src/lib/properties.cc b/src/lib/properties.cc new file mode 100644 index 0000000..db0e2c8 --- /dev/null +++ b/src/lib/properties.cc @@ -0,0 +1,427 @@ +// properties.cc + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: riley@google.com (Michael Riley) +// +// \file +// Functions for updating property bits for various FST operations and +// string names of the properties. + +#include <fst/properties.h> + +#include <stddef.h> +#include <vector> +using std::vector; + +namespace fst { + +// These functions determine the properties associated with the FST +// result of various finite-state operations. The property arguments +// correspond to the operation's FST arguments. The properties +// returned assume the operation modifies its first argument. +// Bitwise-and this result with kCopyProperties for the case when a +// new (possibly delayed) FST is instead constructed. + +// Properties for a concatenatively-closed FST. +uint64 ClosureProperties(uint64 inprops, bool star, bool delayed) { + uint64 outprops = (kError | kAcceptor | kUnweighted | kAccessible) & inprops; + if (!delayed) + outprops |= (kExpanded | kMutable | kCoAccessible | + kNotTopSorted | kNotString) & inprops; + if (!delayed || inprops & kAccessible) + outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic | + kNotILabelSorted | kNotOLabelSorted | kWeighted | + kNotAccessible | kNotCoAccessible) & inprops; + return outprops; +} + +// Properties for a complemented FST. +uint64 ComplementProperties(uint64 inprops) { + uint64 outprops = kAcceptor | kUnweighted | kNoEpsilons | + kNoIEpsilons | kNoOEpsilons | + kIDeterministic | kODeterministic | kAccessible; + outprops |= (kError | kILabelSorted | kOLabelSorted | kInitialCyclic) & + inprops; + if (inprops & kAccessible) + outprops |= kNotILabelSorted | kNotOLabelSorted | kCyclic; + return outprops; +} + +// Properties for a composed FST. +uint64 ComposeProperties(uint64 inprops1, uint64 inprops2) { + uint64 outprops = kError & (inprops1 | inprops2); + if (inprops1 & kAcceptor && inprops2 & kAcceptor) { + outprops |= kAcceptor | kAccessible; + outprops |= (kNoEpsilons | kNoIEpsilons | kNoOEpsilons | kAcyclic | + kInitialAcyclic) & inprops1 & inprops2; + if (kNoIEpsilons & inprops1 & inprops2) + outprops |= (kIDeterministic | kODeterministic) & inprops1 & inprops2; + } else { + outprops |= kAccessible; + outprops |= (kAcceptor | kNoIEpsilons | kAcyclic | kInitialAcyclic) & + inprops1 & inprops2; + if (kNoIEpsilons & inprops1 & inprops2) + outprops |= kIDeterministic & inprops1 & inprops2; + } + return outprops; +} + +// Properties for a concatenated FST. +uint64 ConcatProperties(uint64 inprops1, uint64 inprops2, bool delayed) { + uint64 outprops = + (kAcceptor | kUnweighted | kAcyclic) & inprops1 & inprops2; + outprops |= kError & (inprops1 | inprops2); + + bool empty1 = delayed; // Can fst1 be the empty machine? + bool empty2 = delayed; // Can fst2 be the empty machine? + + if (!delayed) { + outprops |= (kExpanded | kMutable | kNotTopSorted | kNotString) & inprops1; + outprops |= (kNotTopSorted | kNotString) & inprops2; + } + if (!empty1) + outprops |= (kInitialAcyclic | kInitialCyclic) & inprops1; + if (!delayed || inprops1 & kAccessible) + outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic | + kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted | + kNotOLabelSorted | kWeighted | kCyclic | + kNotAccessible | kNotCoAccessible) & inprops1; + if ((inprops1 & (kAccessible | kCoAccessible)) == + (kAccessible | kCoAccessible) && !empty1) { + outprops |= kAccessible & inprops2; + if (!empty2) + outprops |= kCoAccessible & inprops2; + if (!delayed || inprops2 & kAccessible) + outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic | + kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted | + kNotOLabelSorted | kWeighted | kCyclic | + kNotAccessible | kNotCoAccessible) & inprops2; + } + return outprops; +} + +// Properties for a determinized FST. +uint64 DeterminizeProperties(uint64 inprops, bool has_subsequential_label) { + uint64 outprops = kAccessible; + if (((kAcceptor | kNoIEpsilons) & inprops) || has_subsequential_label) + outprops |= kIDeterministic; + outprops |= (kError | kAcceptor | kNoEpsilons | kAcyclic | + kInitialAcyclic | kCoAccessible | kString) & inprops; + if (inprops & kAccessible) + outprops |= (kNotAcceptor | kEpsilons | kIEpsilons | kOEpsilons | + kCyclic) & inprops; + if (inprops & kAcceptor) + outprops |= (kNoIEpsilons | kNoOEpsilons) & inprops; + if ((inprops & kNoIEpsilons) && has_subsequential_label) + outprops |= kNoIEpsilons; + return outprops; +} + +// Properties for factored weight FST. +uint64 FactorWeightProperties(uint64 inprops) { + uint64 outprops = (kExpanded | kMutable | kError | kAcceptor | + kAcyclic | kAccessible | kCoAccessible) & inprops; + if (inprops & kAccessible) + outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic | + kEpsilons | kIEpsilons | kOEpsilons | kCyclic | + kNotILabelSorted | kNotOLabelSorted) + & inprops; + return outprops; +} + +// Properties for an inverted FST. +uint64 InvertProperties(uint64 inprops) { + uint64 outprops = (kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | + kEpsilons | kNoEpsilons | kWeighted | kUnweighted | + kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic | + kTopSorted | kNotTopSorted | + kAccessible | kNotAccessible | + kCoAccessible | kNotCoAccessible | + kString | kNotString) & inprops; + if (kIDeterministic & inprops) + outprops |= kODeterministic; + if (kNonIDeterministic & inprops) + outprops |= kNonODeterministic; + if (kODeterministic & inprops) + outprops |= kIDeterministic; + if (kNonODeterministic & inprops) + outprops |= kNonIDeterministic; + + if (kIEpsilons & inprops) + outprops |= kOEpsilons; + if (kNoIEpsilons & inprops) + outprops |= kNoOEpsilons; + if (kOEpsilons & inprops) + outprops |= kIEpsilons; + if (kNoOEpsilons & inprops) + outprops |= kNoIEpsilons; + + if (kILabelSorted & inprops) + outprops |= kOLabelSorted; + if (kNotILabelSorted & inprops) + outprops |= kNotOLabelSorted; + if (kOLabelSorted & inprops) + outprops |= kILabelSorted; + if (kNotOLabelSorted & inprops) + outprops |= kNotILabelSorted; + return outprops; +} + +// Properties for a projected FST. +uint64 ProjectProperties(uint64 inprops, bool project_input) { + uint64 outprops = kAcceptor; + outprops |= (kExpanded | kMutable | kError | kWeighted | kUnweighted | + kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic | + kTopSorted | kNotTopSorted | kAccessible | kNotAccessible | + kCoAccessible | kNotCoAccessible | + kString | kNotString) & inprops; + if (project_input) { + outprops |= (kIDeterministic | kNonIDeterministic | + kIEpsilons | kNoIEpsilons | + kILabelSorted | kNotILabelSorted) & inprops; + + if (kIDeterministic & inprops) + outprops |= kODeterministic; + if (kNonIDeterministic & inprops) + outprops |= kNonODeterministic; + + if (kIEpsilons & inprops) + outprops |= kOEpsilons | kEpsilons; + if (kNoIEpsilons & inprops) + outprops |= kNoOEpsilons | kNoEpsilons; + + if (kILabelSorted & inprops) + outprops |= kOLabelSorted; + if (kNotILabelSorted & inprops) + outprops |= kNotOLabelSorted; + } else { + outprops |= (kODeterministic | kNonODeterministic | + kOEpsilons | kNoOEpsilons | + kOLabelSorted | kNotOLabelSorted) & inprops; + + if (kODeterministic & inprops) + outprops |= kIDeterministic; + if (kNonODeterministic & inprops) + outprops |= kNonIDeterministic; + + if (kOEpsilons & inprops) + outprops |= kIEpsilons | kEpsilons; + if (kNoOEpsilons & inprops) + outprops |= kNoIEpsilons | kNoEpsilons; + + if (kOLabelSorted & inprops) + outprops |= kILabelSorted; + if (kNotOLabelSorted & inprops) + outprops |= kNotILabelSorted; + } + return outprops; +} + +// Properties for a randgen FST. +uint64 RandGenProperties(uint64 inprops, bool weighted) { + uint64 outprops = kAcyclic | kInitialAcyclic | kAccessible; + outprops |= inprops & kError; + if (weighted) { + outprops |= kTopSorted; + outprops |= (kAcceptor | kNoEpsilons | + kNoIEpsilons | kNoOEpsilons | + kIDeterministic | kODeterministic | + kILabelSorted | kOLabelSorted) & inprops; + } else { + outprops |= kUnweighted; + outprops |= (kAcceptor | kILabelSorted | kOLabelSorted) & inprops; + } + return outprops; +} + +// Properties for a replace FST. +uint64 ReplaceProperties(const vector<uint64>& inprops, + ssize_t root, + bool epsilon_on_replace, + bool no_empty_fsts) { + if (inprops.size() == 0) + return kNullProperties; + uint64 outprops = 0; + for (size_t i = 0; i < inprops.size(); ++i) + outprops |= kError & inprops[i]; + uint64 access_props = no_empty_fsts ? kAccessible | kCoAccessible : 0; + for (size_t i = 0; i < inprops.size(); ++i) + access_props &= (inprops[i] & (kAccessible | kCoAccessible)); + if (access_props == (kAccessible | kCoAccessible)) { + outprops |= access_props; + if (inprops[root] & kInitialCyclic) + outprops |= kInitialCyclic; + uint64 props = 0; + bool string = true; + for (size_t i = 0; i < inprops.size(); ++i) { + if (epsilon_on_replace == false) + props |= kNotAcceptor & inprops[i]; + props |= (kNonIDeterministic | kNonODeterministic | kEpsilons | + kIEpsilons | kOEpsilons | kWeighted | kCyclic | + kNotTopSorted | kNotString) & inprops[i]; + if (!(inprops[i] & kString)) + string = false; + } + outprops |= props; + if (string) + outprops |= kString; + } + bool acceptor = epsilon_on_replace; + bool ideterministic = !epsilon_on_replace; + bool no_iepsilons = !epsilon_on_replace; + bool acyclic = true; + bool unweighted = true; + for (size_t i = 0; i < inprops.size(); ++i) { + if (!(inprops[i] & kAcceptor)) + acceptor = false; + if (!(inprops[i] & kIDeterministic)) + ideterministic = false; + if (!(inprops[i] & kNoIEpsilons)) + no_iepsilons = false; + if (!(inprops[i] & kAcyclic)) + acyclic = false; + if (!(inprops[i] & kUnweighted)) + unweighted = false; + } + if (acceptor) + outprops |= kAcceptor; + if (ideterministic) + outprops |= kIDeterministic; + if (no_iepsilons) + outprops |= kNoIEpsilons; + if (acyclic) + outprops |= kAcyclic; + if (unweighted) + outprops |= kUnweighted; + if (inprops[root] & kInitialAcyclic) + outprops |= kInitialAcyclic; + return outprops; +} + +// Properties for a relabeled FST. +uint64 RelabelProperties(uint64 inprops) { + uint64 outprops = (kExpanded | kMutable | kError | + kWeighted | kUnweighted | + kCyclic | kAcyclic | + kInitialCyclic | kInitialAcyclic | + kTopSorted | kNotTopSorted | + kAccessible | kNotAccessible | + kCoAccessible | kNotCoAccessible | + kString | kNotString) & inprops; + return outprops; +} + +// Properties for a reversed FST. (the superinitial state limits this set) +uint64 ReverseProperties(uint64 inprops) { + uint64 outprops = + (kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kEpsilons | + kIEpsilons | kOEpsilons | kWeighted | kUnweighted | + kCyclic | kAcyclic) & inprops; + return outprops; +} + +// Properties for re-weighted FST. +uint64 ReweightProperties(uint64 inprops) { + uint64 outprops = inprops & kWeightInvariantProperties; + outprops = outprops & ~kCoAccessible; + return outprops; +} + +// Properties for an epsilon-removed FST. +uint64 RmEpsilonProperties(uint64 inprops, bool delayed) { + uint64 outprops = kNoEpsilons; + outprops |= (kError | kAcceptor | kAcyclic | kInitialAcyclic) & inprops; + if (inprops & kAcceptor) + outprops |= kNoIEpsilons | kNoOEpsilons; + if (!delayed) { + outprops |= kExpanded | kMutable; + outprops |= kTopSorted & inprops; + } + if (!delayed || inprops & kAccessible) + outprops |= kNotAcceptor & inprops; + return outprops; +} + +// Properties for shortest path. This function computes how the properties +// of the output of shortest path need to be updated, given that 'props' is +// already known. +uint64 ShortestPathProperties(uint64 props) { + return props | kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible; +} + +// Properties for a synchronized FST. +uint64 SynchronizeProperties(uint64 inprops) { + uint64 outprops = (kError | kAcceptor | kAcyclic | kAccessible | + kCoAccessible | kUnweighted) & inprops; + if (inprops & kAccessible) + outprops |= (kCyclic | kNotCoAccessible | kWeighted) & inprops; + return outprops; +} + +// Properties for a unioned FST. +uint64 UnionProperties(uint64 inprops1, uint64 inprops2, bool delayed) { + uint64 outprops = (kAcceptor | kUnweighted | kAcyclic | kAccessible) + & inprops1 & inprops2; + outprops |= kError & (inprops1 | inprops2); + + bool empty1 = delayed; // Can fst1 be the empty machine? + bool empty2 = delayed; // Can fst2 be the empty machine? + if (!delayed) { + outprops |= (kExpanded | kMutable | kNotTopSorted | kNotString) & inprops1; + outprops |= (kNotTopSorted | kNotString) & inprops2; + } + if (!empty1 && !empty2) { + outprops |= kEpsilons | kIEpsilons | kOEpsilons; + outprops |= kCoAccessible & inprops1 & inprops2; + } + // Note kNotCoAccessible does not hold because of kInitialAcyclic opt. + if (!delayed || inprops1 & kAccessible) + outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic | + kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted | + kNotOLabelSorted | kWeighted | kCyclic | + kNotAccessible) & inprops1; + if (!delayed || inprops2 & kAccessible) + outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic | + kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted | + kNotOLabelSorted | kWeighted | kCyclic | + kNotAccessible | kNotCoAccessible) & inprops2; + return outprops; +} + + +// Property string names (indexed by bit position). +const char *PropertyNames[] = { + // binary + "expanded", "mutable", "error", "", "", "", "", "", + "", "", "", "", "", "", "", "", + // trinary + "acceptor", "not acceptor", + "input deterministic", "non input deterministic", + "output deterministic", "non output deterministic", + "input/output epsilons", "no input/output epsilons", + "input epsilons", "no input epsilons", + "output epsilons", "no output epsilons", + "input label sorted", "not input label sorted", + "output label sorted", "not output label sorted", + "weighted", "unweighted", + "cyclic", "acyclic", + "cyclic at initial state", "acyclic at initial state", + "top sorted", "not top sorted", + "accessible", "not accessible", + "coaccessible", "not coaccessible", + "string", "not string", +}; + +} // namespace fst diff --git a/src/lib/symbol-table-ops.cc b/src/lib/symbol-table-ops.cc new file mode 100644 index 0000000..875bd7a --- /dev/null +++ b/src/lib/symbol-table-ops.cc @@ -0,0 +1,140 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: sorenj@google.com (Jeffrey Sorensen) + +#include <fst/symbol-table-ops.h> + +namespace fst { + +SymbolTable *MergeSymbolTable(const SymbolTable &left, const SymbolTable &right, + bool *right_relabel_output) { + // MergeSymbolTable detects several special cases. It will return a reference + // copied version of SymbolTable of left or right if either symbol table is + // a superset of the other. + SymbolTable *merged = new SymbolTable("merge_" + left.Name() + "_" + + right.Name()); + // copy everything from the left symbol table + bool left_has_all = true, right_has_all = true, relabel = false; + SymbolTableIterator liter(left); + for (; !liter.Done(); liter.Next()) { + merged->AddSymbol(liter.Symbol(), liter.Value()); + if (right_has_all) { + int64 key = right.Find(liter.Symbol()); + if (key == -1) { + right_has_all = false; + } else if (!relabel && key != liter.Value()) { + relabel = true; + } + } + } + if (right_has_all) { + delete merged; + if (right_relabel_output != NULL) { + *right_relabel_output = relabel; + } + return right.Copy(); + } + // add all symbols we can from right symbol table + vector<string> conflicts; + SymbolTableIterator riter(right); + for (; !riter.Done(); riter.Next()) { + int64 key = merged->Find(riter.Symbol()); + if (key != -1) { + // Symbol already exists, maybe with different value + if (key != riter.Value()) { + relabel = true; + } + continue; + } + // Symbol doesn't exist from left + left_has_all = false; + if (!merged->Find(riter.Value()).empty()) { + // we can't add this where we want to, add it later, in order + conflicts.push_back(riter.Symbol()); + continue; + } + // there is a hole and we can add this symbol with its id + merged->AddSymbol(riter.Symbol(), riter.Value()); + } + if (right_relabel_output != NULL) { + *right_relabel_output = relabel; + } + if (left_has_all) { + delete merged; + return left.Copy(); + } + // Add all symbols that conflicted, in order + for (int i= 0; i < conflicts.size(); ++i) { + merged->AddSymbol(conflicts[i]); + } + return merged; +} + +SymbolTable *CompactSymbolTable(const SymbolTable &syms) { + map<int, string> sorted; + SymbolTableIterator stiter(syms); + for (; !stiter.Done(); stiter.Next()) { + sorted[stiter.Value()] = stiter.Symbol(); + } + SymbolTable *compact = new SymbolTable(syms.Name() + "_compact"); + uint64 newkey = 0; + for (map<int, string>::const_iterator si = sorted.begin(); + si != sorted.end(); ++si) { + compact->AddSymbol(si->second, newkey++); + } + return compact; +} + +SymbolTable *FstReadSymbols(const string &filename, bool input_symbols) { + ifstream in(filename.c_str(), ifstream::in | ifstream::binary); + if (!in) { + LOG(ERROR) << "FstReadSymbols: Can't open file " << filename; + return NULL; + } + FstHeader hdr; + if (!hdr.Read(in, filename)) { + LOG(ERROR) << "FstReadSymbols: Couldn't read header from " << filename; + return NULL; + } + if (hdr.GetFlags() & FstHeader::HAS_ISYMBOLS) { + SymbolTable *isymbols = SymbolTable::Read(in, filename); + if (isymbols == NULL) { + LOG(ERROR) << "FstReadSymbols: Could not read input symbols from " + << filename; + return NULL; + } + if (input_symbols) { + return isymbols; + } + delete isymbols; + } + if (hdr.GetFlags() & FstHeader::HAS_OSYMBOLS) { + SymbolTable *osymbols = SymbolTable::Read(in, filename); + if (osymbols == NULL) { + LOG(ERROR) << "FstReadSymbols: Could not read output symbols from " + << filename; + return NULL; + } + if (!input_symbols) { + return osymbols; + } + delete osymbols; + } + LOG(ERROR) << "FstReadSymbols: The file " << filename + << " doesn't contain the requested symbols"; + return NULL; +} + +} // namespace fst diff --git a/src/lib/symbol-table.cc b/src/lib/symbol-table.cc new file mode 100644 index 0000000..8b35cdf --- /dev/null +++ b/src/lib/symbol-table.cc @@ -0,0 +1,243 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// All Rights Reserved. +// +// Author : Johan Schalkwyk +// +// \file +// Classes to provide symbol-to-integer and integer-to-symbol mappings. + +#include <fst/symbol-table.h> +#include <fst/util.h> + +DEFINE_bool(fst_compat_symbols, true, + "Require symbol tables to match when appropriate"); +DEFINE_string(fst_field_separator, "\t ", + "Set of characters used as a separator between printed fields"); + +namespace fst { + +// Maximum line length in textual symbols file. +const int kLineLen = 8096; + +// Identifies stream data as a symbol table (and its endianity) +static const int32 kSymbolTableMagicNumber = 2125658996; + +SymbolTableImpl* SymbolTableImpl::ReadText(istream &strm, + const string &filename, + bool allow_negative) { + SymbolTableImpl* impl = new SymbolTableImpl(filename); + + int64 nline = 0; + char line[kLineLen]; + while (strm.getline(line, kLineLen)) { + ++nline; + vector<char *> col; + string separator = FLAGS_fst_field_separator + "\n"; + SplitToVector(line, separator.c_str(), &col, true); + if (col.size() == 0) // empty line + continue; + if (col.size() != 2) { + LOG(ERROR) << "SymbolTable::ReadText: Bad number of columns (" + << col.size() << " skipping), " + << "file = " << filename << ", line = " << nline + << ":<" << line << ">"; + continue; + } + const char *symbol = col[0]; + const char *value = col[1]; + char *p; + int64 key = strtoll(value, &p, 10); + if (p < value + strlen(value) || + (!allow_negative && key < 0) || key == -1) { + LOG(ERROR) << "SymbolTable::ReadText: Bad non-negative integer \"" + << value << "\" (skipping), " + << "file = " << filename << ", line = " << nline; + continue; + } + impl->AddSymbol(symbol, key); + } + + return impl; +} + +void SymbolTableImpl::MaybeRecomputeCheckSum() const { + if (check_sum_finalized_) + return; + + // Calculate the original label-agnostic check sum. + check_sum_.Reset(); + for (int64 i = 0; i < symbols_.size(); ++i) + check_sum_.Update(symbols_[i], strlen(symbols_[i]) + 1); + check_sum_string_ = check_sum_.Digest(); + + // Calculate the safer, label-dependent check sum. + labeled_check_sum_.Reset(); + for (int64 key = 0; key < dense_key_limit_; ++key) { + ostringstream line; + line << symbols_[key] << '\t' << key; + labeled_check_sum_.Update(line.str()); } + for (map<int64, const char*>::const_iterator it = + key_map_.begin(); + it != key_map_.end(); + ++it) { + if (it->first >= dense_key_limit_) { + ostringstream line; + line << it->second << '\t' << it->first; + labeled_check_sum_.Update(line.str()); + } + } + labeled_check_sum_string_ = labeled_check_sum_.Digest(); + + check_sum_finalized_ = true; +} + +int64 SymbolTableImpl::AddSymbol(const string& symbol, int64 key) { + map<const char *, int64, StrCmp>::const_iterator it = + symbol_map_.find(symbol.c_str()); + if (it == symbol_map_.end()) { // only add if not in table + check_sum_finalized_ = false; + + char *csymbol = new char[symbol.size() + 1]; + strcpy(csymbol, symbol.c_str()); + symbols_.push_back(csymbol); + key_map_[key] = csymbol; + symbol_map_[csymbol] = key; + + if (key >= available_key_) { + available_key_ = key + 1; + } + } else { + // Log if symbol already in table with different key + if (it->second != key) { + VLOG(1) << "SymbolTable::AddSymbol: symbol = " << symbol + << " already in symbol_map_ with key = " + << it->second + << " but supplied new key = " << key + << " (ignoring new key)"; + } + } + return key; +} + +static bool IsInRange(const vector<pair<int64, int64> >& ranges, + int64 key) { + if (ranges.size() == 0) return true; + for (size_t i = 0; i < ranges.size(); ++i) { + if (key >= ranges[i].first && key <= ranges[i].second) + return true; + } + return false; +} + +SymbolTableImpl* SymbolTableImpl::Read(istream &strm, + const SymbolTableReadOptions& opts) { + int32 magic_number = 0; + ReadType(strm, &magic_number); + if (!strm) { + LOG(ERROR) << "SymbolTable::Read: read failed"; + return 0; + } + string name; + ReadType(strm, &name); + SymbolTableImpl* impl = new SymbolTableImpl(name); + ReadType(strm, &impl->available_key_); + int64 size; + ReadType(strm, &size); + if (!strm) { + LOG(ERROR) << "SymbolTable::Read: read failed"; + delete impl; + return 0; + } + + string symbol; + int64 key; + impl->check_sum_finalized_ = false; + for (size_t i = 0; i < size; ++i) { + ReadType(strm, &symbol); + ReadType(strm, &key); + if (!strm) { + LOG(ERROR) << "SymbolTable::Read: read failed"; + delete impl; + return 0; + } + + char *csymbol = new char[symbol.size() + 1]; + strcpy(csymbol, symbol.c_str()); + impl->symbols_.push_back(csymbol); + if (key == impl->dense_key_limit_ && + key == impl->symbols_.size() - 1) + impl->dense_key_limit_ = impl->symbols_.size(); + else + impl->key_map_[key] = csymbol; + + if (IsInRange(opts.string_hash_ranges, key)) { + impl->symbol_map_[csymbol] = key; + } + } + return impl; +} + +bool SymbolTableImpl::Write(ostream &strm) const { + WriteType(strm, kSymbolTableMagicNumber); + WriteType(strm, name_); + WriteType(strm, available_key_); + int64 size = symbols_.size(); + WriteType(strm, size); + // first write out dense keys + int64 i = 0; + for (; i < dense_key_limit_; ++i) { + WriteType(strm, string(symbols_[i])); + WriteType(strm, i); + } + // next write out the remaining non densely packed keys + for (map<const char *, int64, StrCmp>::const_iterator it = + symbol_map_.begin(); it != symbol_map_.end(); ++it) { + if ((it->second >= 0) && (it->second < dense_key_limit_)) + continue; + WriteType(strm, string(it->first)); + WriteType(strm, it->second); + ++i; + } + if (i != size) { + LOG(ERROR) << "SymbolTable::Write: write failed"; + return false; + } + strm.flush(); + if (!strm) { + LOG(ERROR) << "SymbolTable::Write: write failed"; + return false; + } + return true; +} + +const int64 SymbolTable::kNoSymbol; + + +void SymbolTable::AddTable(const SymbolTable& table) { + for (SymbolTableIterator iter(table); !iter.Done(); iter.Next()) + impl_->AddSymbol(iter.Symbol()); +} + +bool SymbolTable::WriteText(ostream &strm) const { + for (SymbolTableIterator iter(*this); !iter.Done(); iter.Next()) { + ostringstream line; + line << iter.Symbol() << FLAGS_fst_field_separator[0] << iter.Value() + << '\n'; + strm.write(line.str().c_str(), line.str().length()); + } + return true; +} +} // namespace fst diff --git a/src/lib/temp_Android.temp_mk b/src/lib/temp_Android.temp_mk new file mode 100644 index 0000000..7a6936e --- /dev/null +++ b/src/lib/temp_Android.temp_mk @@ -0,0 +1,20 @@ +# +# Copyright 2012 Google Inc. All Rights Reserved. +# Author: npereira@google.com (Nicole Pereira) +# +# Android makefile for openfst library. +# + +OPENFST_DIR := $(call my-dir) + +LOCAL_PATH := $(call my-dir) +include $(CLEAR_VARS) + +LOCAL_MODULE := libopenfst_patts + +LOCAL_CPP_EXTENSION := .cc +LOCAL_C_INCLUDES += $(OPENFST_DIR)/src/include/ + +#LOCAL_SRC_FILES := $(call private-function-all-cpp-files-under, src) + +include $(BUILD_STATIC_LIBRARY) diff --git a/src/lib/util.cc b/src/lib/util.cc new file mode 100644 index 0000000..eeba92b --- /dev/null +++ b/src/lib/util.cc @@ -0,0 +1,92 @@ +// util.cc + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: riley@google.com (Michael Riley) +// +// \file +// FST utility definitions. + +#include <cctype> +#include <string> +#include <fst/util.h> + +// Utility flag definitions + +DEFINE_bool(fst_error_fatal, true, + "FST errors are fatal; o.w. return objects flagged as bad: " + " e.g., FSTs - kError prop. true, FST weights - not a member()"); + +namespace fst { + +int64 StrToInt64(const string &s, const string &src, size_t nline, + bool allow_negative = false, bool *error) { + int64 n; + const char *cs = s.c_str(); + char *p; + if (error) *error = false; + n = strtoll(cs, &p, 10); + if (p < cs + s.size() || (!allow_negative && n < 0)) { + FSTERROR() << "StrToInt64: Bad integer = " << s + << "\", source = " << src << ", line = " << nline; + if (error) *error = true; + return 0; + } + return n; +} + +void Int64ToStr(int64 n, string *s) { + ostringstream nstr; + nstr << n; + *s = nstr.str(); +} + +void ConvertToLegalCSymbol(string *s) { + for (string::iterator it = s->begin(); it != s->end(); ++it) + if (!isalnum(*it)) *it = '_'; +} + +// Skips over input characters to align to 'align' bytes. Returns +// false if can't align. +bool AlignInput(istream &strm, int align) { + char c; + for (int i = 0; i < align; ++i) { + int64 pos = strm.tellg(); + if (pos < 0) { + LOG(ERROR) << "AlignInput: can't determine stream position"; + return false; + } + if (pos % align == 0) break; + strm.read(&c, 1); + } + return true; +} + +// Write null output characters to align to 'align' bytes. Returns +// false if can't align. +bool AlignOutput(ostream &strm, int align) { + for (int i = 0; i < align; ++i) { + int64 pos = strm.tellp(); + if (pos < 0) { + LOG(ERROR) << "AlignOutput: can't determine stream position"; + return false; + } + if (pos % align == 0) break; + strm.write("", 1); + } + return true; +} + + +} // namespace fst |