aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am1
-rw-r--r--src/Makefile.in565
-rw-r--r--src/bin/Makefile.am82
-rw-r--r--src/bin/Makefile.in923
-rw-r--r--src/bin/fstarcsort.cc67
-rw-r--r--src/bin/fstclosure.cc56
-rw-r--r--src/bin/fstcompile.cc92
-rw-r--r--src/bin/fstcompose.cc95
-rw-r--r--src/bin/fstconcat.cc62
-rw-r--r--src/bin/fstconnect.cc52
-rw-r--r--src/bin/fstconvert.cc58
-rw-r--r--src/bin/fstdeterminize.cc68
-rw-r--r--src/bin/fstdifference.cc87
-rw-r--r--src/bin/fstdraw.cc118
-rw-r--r--src/bin/fstencode.cc68
-rw-r--r--src/bin/fstepsnormalize.cc58
-rw-r--r--src/bin/fstequal.cc61
-rw-r--r--src/bin/fstequivalent.cc91
-rw-r--r--src/bin/fstinfo.cc59
-rw-r--r--src/bin/fstintersect.cc88
-rw-r--r--src/bin/fstinvert.cc51
-rw-r--r--src/bin/fstmap.cc97
-rw-r--r--src/bin/fstminimize.cc67
-rw-r--r--src/bin/fstprint.cc105
-rw-r--r--src/bin/fstproject.cc58
-rw-r--r--src/bin/fstprune.cc64
-rw-r--r--src/bin/fstpush.cc77
-rw-r--r--src/bin/fstrandgen.cc82
-rw-r--r--src/bin/fstrelabel.cc117
-rw-r--r--src/bin/fstreplace.cc65
-rw-r--r--src/bin/fstreverse.cc59
-rw-r--r--src/bin/fstreweight.cc63
-rw-r--r--src/bin/fstrmepsilon.cc89
-rw-r--r--src/bin/fstshortestdistance.cc93
-rw-r--r--src/bin/fstshortestpath.cc92
-rw-r--r--src/bin/fstsymbols.cc117
-rw-r--r--src/bin/fstsynchronize.cc54
-rw-r--r--src/bin/fsttopsort.cc53
-rw-r--r--src/bin/fstunion.cc67
-rw-r--r--src/extensions/Makefile.am21
-rw-r--r--src/extensions/Makefile.in570
-rw-r--r--src/extensions/compact/Makefile.am52
-rw-r--r--src/extensions/compact/Makefile.in771
-rw-r--r--src/extensions/compact/compact16_acceptor-fst.cc31
-rw-r--r--src/extensions/compact/compact16_string-fst.cc31
-rw-r--r--src/extensions/compact/compact16_unweighted-fst.cc31
-rw-r--r--src/extensions/compact/compact16_unweighted_acceptor-fst.cc31
-rw-r--r--src/extensions/compact/compact16_weighted_string-fst.cc31
-rw-r--r--src/extensions/compact/compact64_acceptor-fst.cc31
-rw-r--r--src/extensions/compact/compact64_string-fst.cc31
-rw-r--r--src/extensions/compact/compact64_unweighted-fst.cc31
-rw-r--r--src/extensions/compact/compact64_unweighted_acceptor-fst.cc31
-rw-r--r--src/extensions/compact/compact64_weighted_string-fst.cc31
-rw-r--r--src/extensions/compact/compact8_acceptor-fst.cc31
-rw-r--r--src/extensions/compact/compact8_string-fst.cc31
-rw-r--r--src/extensions/compact/compact8_unweighted-fst.cc31
-rw-r--r--src/extensions/compact/compact8_unweighted_acceptor-fst.cc31
-rw-r--r--src/extensions/compact/compact8_weighted_string-fst.cc31
-rw-r--r--src/extensions/const/Makefile.am16
-rw-r--r--src/extensions/const/Makefile.in566
-rw-r--r--src/extensions/const/const16-fst.cc32
-rw-r--r--src/extensions/const/const64-fst.cc32
-rw-r--r--src/extensions/const/const8-fst.cc32
-rw-r--r--src/extensions/far/Makefile.am34
-rw-r--r--src/extensions/far/Makefile.in673
-rw-r--r--src/extensions/far/compile-strings.cc36
-rw-r--r--src/extensions/far/farcompilestrings.cc84
-rw-r--r--src/extensions/far/farcreate.cc62
-rw-r--r--src/extensions/far/farextract.cc62
-rw-r--r--src/extensions/far/farinfo.cc56
-rw-r--r--src/extensions/far/farprintstrings.cc70
-rw-r--r--src/extensions/far/farscript.cc113
-rw-r--r--src/extensions/far/main.cc118
-rw-r--r--src/extensions/far/stlist.cc31
-rw-r--r--src/extensions/far/sttable.cc31
-rw-r--r--src/extensions/lookahead/Makefile.am18
-rw-r--r--src/extensions/lookahead/Makefile.in578
-rw-r--r--src/extensions/lookahead/arc_lookahead-fst.cc28
-rw-r--r--src/extensions/lookahead/ilabel_lookahead-fst.cc30
-rw-r--r--src/extensions/lookahead/olabel_lookahead-fst.cc30
-rw-r--r--src/extensions/pdt/Makefile.am29
-rw-r--r--src/extensions/pdt/Makefile.in663
-rw-r--r--src/extensions/pdt/pdtcompose.cc82
-rw-r--r--src/extensions/pdt/pdtexpand.cc71
-rw-r--r--src/extensions/pdt/pdtinfo.cc59
-rw-r--r--src/extensions/pdt/pdtreplace.cc73
-rw-r--r--src/extensions/pdt/pdtreverse.cc61
-rw-r--r--src/extensions/pdt/pdtscript.cc115
-rw-r--r--src/extensions/pdt/pdtshortestpath.cc81
-rw-r--r--src/include/Makefile.am67
-rw-r--r--src/include/Makefile.in590
-rw-r--r--src/include/fst/accumulator.h745
-rw-r--r--src/include/fst/add-on.h306
-rw-r--r--src/include/fst/arc-map.h1146
-rw-r--r--src/include/fst/arc.h306
-rw-r--r--src/include/fst/arcfilter.h99
-rw-r--r--src/include/fst/arcsort.h203
-rw-r--r--src/include/fst/bi-table.h396
-rw-r--r--src/include/fst/cache.h738
-rw-r--r--src/include/fst/closure.h155
-rw-r--r--src/include/fst/compact-fst.h1307
-rw-r--r--src/include/fst/compat.h152
-rw-r--r--src/include/fst/complement.h338
-rw-r--r--src/include/fst/compose-filter.h542
-rw-r--r--src/include/fst/compose.h673
-rw-r--r--src/include/fst/concat.h246
-rw-r--r--src/include/fst/config.h12
-rw-r--r--src/include/fst/config.h.in11
-rw-r--r--src/include/fst/connect.h319
-rw-r--r--src/include/fst/const-fst.h483
-rw-r--r--src/include/fst/determinize.h887
-rw-r--r--src/include/fst/dfs-visit.h204
-rw-r--r--src/include/fst/difference.h189
-rw-r--r--src/include/fst/edit-fst.h774
-rw-r--r--src/include/fst/encode.h599
-rw-r--r--src/include/fst/epsnormalize.h74
-rw-r--r--src/include/fst/equal.h124
-rw-r--r--src/include/fst/equivalent.h274
-rw-r--r--src/include/fst/expanded-fst.h189
-rw-r--r--src/include/fst/expectation-weight.h142
-rw-r--r--src/include/fst/extensions/far/compile-strings.h271
-rw-r--r--src/include/fst/extensions/far/create.h87
-rw-r--r--src/include/fst/extensions/far/extract.h85
-rw-r--r--src/include/fst/extensions/far/far.h360
-rw-r--r--src/include/fst/extensions/far/farlib.h31
-rw-r--r--src/include/fst/extensions/far/farscript.h234
-rw-r--r--src/include/fst/extensions/far/info.h128
-rw-r--r--src/include/fst/extensions/far/main.h43
-rw-r--r--src/include/fst/extensions/far/print-strings.h126
-rw-r--r--src/include/fst/extensions/far/stlist.h304
-rw-r--r--src/include/fst/extensions/far/sttable.h370
-rw-r--r--src/include/fst/extensions/pdt/collection.h122
-rw-r--r--src/include/fst/extensions/pdt/compose.h146
-rw-r--r--src/include/fst/extensions/pdt/expand.h975
-rw-r--r--src/include/fst/extensions/pdt/info.h175
-rw-r--r--src/include/fst/extensions/pdt/paren.h496
-rw-r--r--src/include/fst/extensions/pdt/pdt.h212
-rw-r--r--src/include/fst/extensions/pdt/pdtlib.h30
-rw-r--r--src/include/fst/extensions/pdt/pdtscript.h284
-rw-r--r--src/include/fst/extensions/pdt/replace.h192
-rw-r--r--src/include/fst/extensions/pdt/reverse.h58
-rw-r--r--src/include/fst/extensions/pdt/shortest-path.h790
-rw-r--r--src/include/fst/factor-weight.h476
-rw-r--r--src/include/fst/flags.h224
-rw-r--r--src/include/fst/float-weight.h598
-rw-r--r--src/include/fst/fst-decl.h125
-rw-r--r--src/include/fst/fst.h942
-rw-r--r--src/include/fst/fstlib.h151
-rw-r--r--src/include/fst/generic-register.h159
-rw-r--r--src/include/fst/heap.h206
-rw-r--r--src/include/fst/icu.h103
-rw-r--r--src/include/fst/intersect.h172
-rw-r--r--src/include/fst/interval-set.h381
-rw-r--r--src/include/fst/invert.h125
-rw-r--r--src/include/fst/label-reachable.h565
-rw-r--r--src/include/fst/lexicographic-weight.h151
-rw-r--r--src/include/fst/lock.h81
-rw-r--r--src/include/fst/log.h66
-rw-r--r--src/include/fst/lookahead-filter.h698
-rw-r--r--src/include/fst/lookahead-matcher.h813
-rw-r--r--src/include/fst/map.h121
-rw-r--r--src/include/fst/matcher-fst.h359
-rw-r--r--src/include/fst/matcher.h1116
-rw-r--r--src/include/fst/minimize.h584
-rw-r--r--src/include/fst/mutable-fst.h378
-rw-r--r--src/include/fst/pair-weight.h280
-rw-r--r--src/include/fst/partition.h290
-rw-r--r--src/include/fst/power-weight.h159
-rw-r--r--src/include/fst/product-weight.h115
-rw-r--r--src/include/fst/project.h148
-rw-r--r--src/include/fst/properties.h460
-rw-r--r--src/include/fst/prune.h339
-rw-r--r--src/include/fst/push.h175
-rw-r--r--src/include/fst/queue.h889
-rw-r--r--src/include/fst/randequivalent.h135
-rw-r--r--src/include/fst/randgen.h712
-rw-r--r--src/include/fst/random-weight.h348
-rw-r--r--src/include/fst/rational.h330
-rw-r--r--src/include/fst/register.h132
-rw-r--r--src/include/fst/relabel.h524
-rw-r--r--src/include/fst/replace-util.h550
-rw-r--r--src/include/fst/replace.h1453
-rw-r--r--src/include/fst/reverse.h91
-rw-r--r--src/include/fst/reweight.h146
-rw-r--r--src/include/fst/rmepsilon.h601
-rw-r--r--src/include/fst/rmfinalepsilon.h107
-rw-r--r--src/include/fst/script/arcsort.h49
-rw-r--r--src/include/fst/script/arg-packs.h240
-rw-r--r--src/include/fst/script/closure.h41
-rw-r--r--src/include/fst/script/compile-impl.h215
-rw-r--r--src/include/fst/script/compile.h92
-rw-r--r--src/include/fst/script/compose.h63
-rw-r--r--src/include/fst/script/concat.h54
-rw-r--r--src/include/fst/script/connect.h45
-rw-r--r--src/include/fst/script/convert.h49
-rw-r--r--src/include/fst/script/decode.h46
-rw-r--r--src/include/fst/script/determinize.h68
-rw-r--r--src/include/fst/script/difference.h67
-rw-r--r--src/include/fst/script/draw-impl.h234
-rw-r--r--src/include/fst/script/draw.h113
-rw-r--r--src/include/fst/script/encode.h58
-rw-r--r--src/include/fst/script/epsnormalize.h44
-rw-r--r--src/include/fst/script/equal.h45
-rw-r--r--src/include/fst/script/equivalent.h47
-rw-r--r--src/include/fst/script/fst-class.h343
-rw-r--r--src/include/fst/script/fstscript-decl.h35
-rw-r--r--src/include/fst/script/fstscript.h154
-rw-r--r--src/include/fst/script/info-impl.h325
-rw-r--r--src/include/fst/script/info.h48
-rw-r--r--src/include/fst/script/intersect.h65
-rw-r--r--src/include/fst/script/invert.h43
-rw-r--r--src/include/fst/script/map.h115
-rw-r--r--src/include/fst/script/minimize.h45
-rw-r--r--src/include/fst/script/print-impl.h149
-rw-r--r--src/include/fst/script/print.h86
-rw-r--r--src/include/fst/script/project.h43
-rw-r--r--src/include/fst/script/prune.h153
-rw-r--r--src/include/fst/script/push.h70
-rw-r--r--src/include/fst/script/randequivalent.h105
-rw-r--r--src/include/fst/script/randgen.h76
-rw-r--r--src/include/fst/script/register.h120
-rw-r--r--src/include/fst/script/relabel.h102
-rw-r--r--src/include/fst/script/replace.h62
-rw-r--r--src/include/fst/script/reverse.h42
-rw-r--r--src/include/fst/script/reweight.h53
-rw-r--r--src/include/fst/script/rmepsilon.h211
-rw-r--r--src/include/fst/script/script-impl.h206
-rw-r--r--src/include/fst/script/shortest-distance.h250
-rw-r--r--src/include/fst/script/shortest-path.h190
-rw-r--r--src/include/fst/script/symbols.h20
-rw-r--r--src/include/fst/script/synchronize.h42
-rw-r--r--src/include/fst/script/text-io.h50
-rw-r--r--src/include/fst/script/topsort.h40
-rw-r--r--src/include/fst/script/union.h42
-rw-r--r--src/include/fst/script/verify.h40
-rw-r--r--src/include/fst/script/weight-class.h216
-rw-r--r--src/include/fst/shortest-distance.h347
-rw-r--r--src/include/fst/shortest-path.h501
-rw-r--r--src/include/fst/signed-log-weight.h367
-rw-r--r--src/include/fst/slist.h61
-rw-r--r--src/include/fst/sparse-power-weight.h225
-rw-r--r--src/include/fst/sparse-tuple-weight.h640
-rw-r--r--src/include/fst/state-map.h601
-rw-r--r--src/include/fst/state-reachable.h198
-rw-r--r--src/include/fst/state-table.h469
-rw-r--r--src/include/fst/statesort.h97
-rw-r--r--src/include/fst/string-weight.h560
-rw-r--r--src/include/fst/string.h247
-rw-r--r--src/include/fst/symbol-table-ops.h91
-rw-r--r--src/include/fst/symbol-table.h507
-rw-r--r--src/include/fst/synchronize.h457
-rw-r--r--src/include/fst/test-properties.h246
-rw-r--r--src/include/fst/topsort.h112
-rw-r--r--src/include/fst/tuple-weight.h332
-rw-r--r--src/include/fst/types.h38
-rw-r--r--src/include/fst/union-find.h110
-rw-r--r--src/include/fst/union.h185
-rw-r--r--src/include/fst/util.h409
-rw-r--r--src/include/fst/vector-fst.h727
-rw-r--r--src/include/fst/verify.h126
-rw-r--r--src/include/fst/visit.h270
-rw-r--r--src/include/fst/weight.h179
-rw-r--r--src/lib/Makefile.am6
-rw-r--r--src/lib/Makefile.in540
-rw-r--r--src/lib/compat.cc44
-rw-r--r--src/lib/flags.cc103
-rw-r--r--src/lib/fst.cc167
-rw-r--r--src/lib/properties.cc427
-rw-r--r--src/lib/symbol-table-ops.cc140
-rw-r--r--src/lib/symbol-table.cc243
-rw-r--r--src/lib/temp_Android.temp_mk20
-rw-r--r--src/lib/util.cc92
-rw-r--r--src/script/Makefile.am15
-rw-r--r--src/script/Makefile.in601
-rw-r--r--src/script/arcsort.cc35
-rw-r--r--src/script/closure.cc35
-rw-r--r--src/script/compile.cc43
-rw-r--r--src/script/compose.cc51
-rw-r--r--src/script/concat.cc48
-rw-r--r--src/script/connect.cc33
-rw-r--r--src/script/convert.cc40
-rw-r--r--src/script/decode.cc36
-rw-r--r--src/script/determinize.cc38
-rw-r--r--src/script/difference.cc50
-rw-r--r--src/script/draw.cc55
-rw-r--r--src/script/encode.cc37
-rw-r--r--src/script/epsnormalize.cc37
-rw-r--r--src/script/equal.cc40
-rw-r--r--src/script/equivalent.cc42
-rw-r--r--src/script/fst-class.cc141
-rw-r--r--src/script/info.cc39
-rw-r--r--src/script/intersect.cc50
-rw-r--r--src/script/invert.cc33
-rw-r--r--src/script/map.cc39
-rw-r--r--src/script/minimize.cc36
-rw-r--r--src/script/print.cc41
-rw-r--r--src/script/project.cc35
-rw-r--r--src/script/prune.cc76
-rw-r--r--src/script/push.cc49
-rw-r--r--src/script/randequivalent.cc61
-rw-r--r--src/script/randgen.cc37
-rw-r--r--src/script/relabel.cc68
-rw-r--r--src/script/replace.cc45
-rw-r--r--src/script/reverse.cc37
-rw-r--r--src/script/reweight.cc36
-rw-r--r--src/script/rmepsilon.cc61
-rw-r--r--src/script/script-impl.cc39
-rw-r--r--src/script/shortest-distance.cc66
-rw-r--r--src/script/shortest-path.cc53
-rw-r--r--src/script/synchronize.cc36
-rw-r--r--src/script/text-io.cc95
-rw-r--r--src/script/topsort.cc37
-rw-r--r--src/script/union.cc37
-rw-r--r--src/script/verify.cc37
-rw-r--r--src/script/weight-class.cc45
-rw-r--r--src/test/Makefile.am12
-rw-r--r--src/test/Makefile.in608
-rw-r--r--src/test/algo_test.cc155
-rw-r--r--src/test/algo_test.h1315
-rw-r--r--src/test/fst_test.cc228
-rw-r--r--src/test/fst_test.h299
-rw-r--r--src/test/weight-tester.h225
-rw-r--r--src/test/weight_test.cc258
323 files changed, 65113 insertions, 0 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
new file mode 100644
index 0000000..b3ca481
--- /dev/null
+++ b/src/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = include lib script bin test extensions
diff --git a/src/Makefile.in b/src/Makefile.in
new file mode 100644
index 0000000..f073530
--- /dev/null
+++ b/src/Makefile.in
@@ -0,0 +1,565 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = src
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_icu.m4 \
+ $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+ $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+ $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h \
+ $(top_builddir)/src/include/fst/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+SOURCES =
+DIST_SOURCES =
+RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
+ html-recursive info-recursive install-data-recursive \
+ install-dvi-recursive install-exec-recursive \
+ install-html-recursive install-info-recursive \
+ install-pdf-recursive install-ps-recursive install-recursive \
+ installcheck-recursive installdirs-recursive pdf-recursive \
+ ps-recursive uninstall-recursive
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
+ distclean-recursive maintainer-clean-recursive
+AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
+ $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
+ distdir
+ETAGS = etags
+CTAGS = ctags
+DIST_SUBDIRS = $(SUBDIRS)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+am__relativize = \
+ dir0=`pwd`; \
+ sed_first='s,^\([^/]*\)/.*$$,\1,'; \
+ sed_rest='s,^[^/]*/*,,'; \
+ sed_last='s,^.*/\([^/]*\)$$,\1,'; \
+ sed_butlast='s,/*[^/]*$$,,'; \
+ while test -n "$$dir1"; do \
+ first=`echo "$$dir1" | sed -e "$$sed_first"`; \
+ if test "$$first" != "."; then \
+ if test "$$first" = ".."; then \
+ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
+ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
+ else \
+ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
+ if test "$$first2" = "$$first"; then \
+ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
+ else \
+ dir2="../$$dir2"; \
+ fi; \
+ dir0="$$dir0"/"$$first"; \
+ fi; \
+ fi; \
+ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
+ done; \
+ reldir="$$dir2"
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+ICU_CFLAGS = @ICU_CFLAGS@
+ICU_CONFIG = @ICU_CONFIG@
+ICU_CPPFLAGS = @ICU_CPPFLAGS@
+ICU_CXXFLAGS = @ICU_CXXFLAGS@
+ICU_LIBS = @ICU_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+libfstdir = @libfstdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lt_ECHO = @lt_ECHO@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+SUBDIRS = include lib script bin test extensions
+all: all-recursive
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run `make' without going through this Makefile.
+# To change the values of `make' variables: instead of editing Makefiles,
+# (1) if the variable is set in `config.status', edit `config.status'
+# (which will cause the Makefiles to be regenerated when you run `make');
+# (2) otherwise, pass the desired values on the `make' command line.
+$(RECURSIVE_TARGETS):
+ @fail= failcom='exit 1'; \
+ for f in x $$MAKEFLAGS; do \
+ case $$f in \
+ *=* | --[!k]*);; \
+ *k*) failcom='fail=yes';; \
+ esac; \
+ done; \
+ dot_seen=no; \
+ target=`echo $@ | sed s/-recursive//`; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ dot_seen=yes; \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || eval $$failcom; \
+ done; \
+ if test "$$dot_seen" = "no"; then \
+ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+ fi; test -z "$$fail"
+
+$(RECURSIVE_CLEAN_TARGETS):
+ @fail= failcom='exit 1'; \
+ for f in x $$MAKEFLAGS; do \
+ case $$f in \
+ *=* | --[!k]*);; \
+ *k*) failcom='fail=yes';; \
+ esac; \
+ done; \
+ dot_seen=no; \
+ case "$@" in \
+ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+ *) list='$(SUBDIRS)' ;; \
+ esac; \
+ rev=''; for subdir in $$list; do \
+ if test "$$subdir" = "."; then :; else \
+ rev="$$subdir $$rev"; \
+ fi; \
+ done; \
+ rev="$$rev ."; \
+ target=`echo $@ | sed s/-recursive//`; \
+ for subdir in $$rev; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || eval $$failcom; \
+ done && test -z "$$fail"
+tags-recursive:
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
+ done
+ctags-recursive:
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
+ done
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+ include_option=--etags-include; \
+ empty_fix=.; \
+ else \
+ include_option=--include; \
+ empty_fix=; \
+ fi; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ test ! -f $$subdir/TAGS || \
+ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
+ fi; \
+ done; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: CTAGS
+CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+ @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ test -d "$(distdir)/$$subdir" \
+ || $(MKDIR_P) "$(distdir)/$$subdir" \
+ || exit 1; \
+ fi; \
+ done
+ @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
+ $(am__relativize); \
+ new_distdir=$$reldir; \
+ dir1=$$subdir; dir2="$(top_distdir)"; \
+ $(am__relativize); \
+ new_top_distdir=$$reldir; \
+ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
+ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
+ ($(am__cd) $$subdir && \
+ $(MAKE) $(AM_MAKEFLAGS) \
+ top_distdir="$$new_top_distdir" \
+ distdir="$$new_distdir" \
+ am__remove_distdir=: \
+ am__skip_length_check=: \
+ am__skip_mode_fix=: \
+ distdir) \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-recursive
+all-am: Makefile
+installdirs: installdirs-recursive
+installdirs-am:
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-recursive
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-recursive
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-recursive
+ -rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-tags
+
+dvi: dvi-recursive
+
+dvi-am:
+
+html: html-recursive
+
+html-am:
+
+info: info-recursive
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-recursive
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-recursive
+
+install-html-am:
+
+install-info: install-info-recursive
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-recursive
+
+install-pdf-am:
+
+install-ps: install-ps-recursive
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-recursive
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-recursive
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-recursive
+
+pdf-am:
+
+ps: ps-recursive
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \
+ install-am install-strip tags-recursive
+
+.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
+ all all-am check check-am clean clean-generic clean-libtool \
+ ctags ctags-recursive distclean distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-data \
+ install-data-am install-dvi install-dvi-am install-exec \
+ install-exec-am install-html install-html-am install-info \
+ install-info-am install-man install-pdf install-pdf-am \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs installdirs-am maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-generic \
+ mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \
+ uninstall uninstall-am
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/bin/Makefile.am b/src/bin/Makefile.am
new file mode 100644
index 0000000..13fc64c
--- /dev/null
+++ b/src/bin/Makefile.am
@@ -0,0 +1,82 @@
+AM_CPPFLAGS = -I$(srcdir)/../include -I$(srcdir)/../script $(ICU_FLAGS)
+LDADD = ../script/libfstscript.la ../lib/libfst.la -lm -ldl
+
+if HAVE_BIN
+bin_PROGRAMS = fstarcsort fstclosure fstcompile fstcompose fstconcat \
+fstconnect fstconvert fstdeterminize fstdifference fstdraw fstencode \
+fstepsnormalize fstequal fstequivalent fstinfo fstintersect fstinvert \
+fstmap fstminimize fstprint fstproject fstprune fstpush fstrandgen \
+fstrelabel fstreplace fstreverse fstreweight fstrmepsilon \
+fstshortestdistance fstshortestpath fstsymbols fstsynchronize fsttopsort \
+fstunion
+
+fstarcsort_SOURCES = fstarcsort.cc
+
+fstclosure_SOURCES = fstclosure.cc
+
+fstcompile_SOURCES = fstcompile.cc
+
+fstcompose_SOURCES = fstcompose.cc
+
+fstconcat_SOURCES = fstconcat.cc
+
+fstconnect_SOURCES = fstconnect.cc
+
+fstconvert_SOURCES = fstconvert.cc
+
+fstdeterminize_SOURCES = fstdeterminize.cc
+
+fstdifference_SOURCES = fstdifference.cc
+
+fstdraw_SOURCES = fstdraw.cc
+
+fstencode_SOURCES = fstencode.cc
+
+fstepsnormalize_SOURCES = fstepsnormalize.cc
+
+fstequal_SOURCES = fstequal.cc
+
+fstequivalent_SOURCES = fstequivalent.cc
+
+fstinfo_SOURCES = fstinfo.cc
+
+fstintersect_SOURCES = fstintersect.cc
+
+fstinvert_SOURCES = fstinvert.cc
+
+fstmap_SOURCES = fstmap.cc
+
+fstminimize_SOURCES = fstminimize.cc
+
+fstprint_SOURCES = fstprint.cc
+
+fstproject_SOURCES = fstproject.cc
+
+fstprune_SOURCES = fstprune.cc
+
+fstpush_SOURCES = fstpush.cc
+
+fstrandgen_SOURCES = fstrandgen.cc
+
+fstrelabel_SOURCES = fstrelabel.cc
+
+fstreplace_SOURCES = fstreplace.cc
+
+fstreverse_SOURCES = fstreverse.cc
+
+fstreweight_SOURCES = fstreweight.cc
+
+fstrmepsilon_SOURCES = fstrmepsilon.cc
+
+fstshortestdistance_SOURCES = fstshortestdistance.cc
+
+fstshortestpath_SOURCES = fstshortestpath.cc
+
+fstsymbols_SOURCES = fstsymbols.cc
+
+fstsynchronize_SOURCES = fstsynchronize.cc
+
+fsttopsort_SOURCES = fsttopsort.cc
+
+fstunion_SOURCES = fstunion.cc
+endif \ No newline at end of file
diff --git a/src/bin/Makefile.in b/src/bin/Makefile.in
new file mode 100644
index 0000000..5d27164
--- /dev/null
+++ b/src/bin/Makefile.in
@@ -0,0 +1,923 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+@HAVE_BIN_TRUE@bin_PROGRAMS = fstarcsort$(EXEEXT) fstclosure$(EXEEXT) \
+@HAVE_BIN_TRUE@ fstcompile$(EXEEXT) fstcompose$(EXEEXT) \
+@HAVE_BIN_TRUE@ fstconcat$(EXEEXT) fstconnect$(EXEEXT) \
+@HAVE_BIN_TRUE@ fstconvert$(EXEEXT) fstdeterminize$(EXEEXT) \
+@HAVE_BIN_TRUE@ fstdifference$(EXEEXT) fstdraw$(EXEEXT) \
+@HAVE_BIN_TRUE@ fstencode$(EXEEXT) fstepsnormalize$(EXEEXT) \
+@HAVE_BIN_TRUE@ fstequal$(EXEEXT) fstequivalent$(EXEEXT) \
+@HAVE_BIN_TRUE@ fstinfo$(EXEEXT) fstintersect$(EXEEXT) \
+@HAVE_BIN_TRUE@ fstinvert$(EXEEXT) fstmap$(EXEEXT) \
+@HAVE_BIN_TRUE@ fstminimize$(EXEEXT) fstprint$(EXEEXT) \
+@HAVE_BIN_TRUE@ fstproject$(EXEEXT) fstprune$(EXEEXT) \
+@HAVE_BIN_TRUE@ fstpush$(EXEEXT) fstrandgen$(EXEEXT) \
+@HAVE_BIN_TRUE@ fstrelabel$(EXEEXT) fstreplace$(EXEEXT) \
+@HAVE_BIN_TRUE@ fstreverse$(EXEEXT) fstreweight$(EXEEXT) \
+@HAVE_BIN_TRUE@ fstrmepsilon$(EXEEXT) \
+@HAVE_BIN_TRUE@ fstshortestdistance$(EXEEXT) \
+@HAVE_BIN_TRUE@ fstshortestpath$(EXEEXT) fstsymbols$(EXEEXT) \
+@HAVE_BIN_TRUE@ fstsynchronize$(EXEEXT) fsttopsort$(EXEEXT) \
+@HAVE_BIN_TRUE@ fstunion$(EXEEXT)
+subdir = src/bin
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_icu.m4 \
+ $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+ $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+ $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h \
+ $(top_builddir)/src/include/fst/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__installdirs = "$(DESTDIR)$(bindir)"
+PROGRAMS = $(bin_PROGRAMS)
+am__fstarcsort_SOURCES_DIST = fstarcsort.cc
+@HAVE_BIN_TRUE@am_fstarcsort_OBJECTS = fstarcsort.$(OBJEXT)
+fstarcsort_OBJECTS = $(am_fstarcsort_OBJECTS)
+fstarcsort_LDADD = $(LDADD)
+fstarcsort_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstclosure_SOURCES_DIST = fstclosure.cc
+@HAVE_BIN_TRUE@am_fstclosure_OBJECTS = fstclosure.$(OBJEXT)
+fstclosure_OBJECTS = $(am_fstclosure_OBJECTS)
+fstclosure_LDADD = $(LDADD)
+fstclosure_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstcompile_SOURCES_DIST = fstcompile.cc
+@HAVE_BIN_TRUE@am_fstcompile_OBJECTS = fstcompile.$(OBJEXT)
+fstcompile_OBJECTS = $(am_fstcompile_OBJECTS)
+fstcompile_LDADD = $(LDADD)
+fstcompile_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstcompose_SOURCES_DIST = fstcompose.cc
+@HAVE_BIN_TRUE@am_fstcompose_OBJECTS = fstcompose.$(OBJEXT)
+fstcompose_OBJECTS = $(am_fstcompose_OBJECTS)
+fstcompose_LDADD = $(LDADD)
+fstcompose_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstconcat_SOURCES_DIST = fstconcat.cc
+@HAVE_BIN_TRUE@am_fstconcat_OBJECTS = fstconcat.$(OBJEXT)
+fstconcat_OBJECTS = $(am_fstconcat_OBJECTS)
+fstconcat_LDADD = $(LDADD)
+fstconcat_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstconnect_SOURCES_DIST = fstconnect.cc
+@HAVE_BIN_TRUE@am_fstconnect_OBJECTS = fstconnect.$(OBJEXT)
+fstconnect_OBJECTS = $(am_fstconnect_OBJECTS)
+fstconnect_LDADD = $(LDADD)
+fstconnect_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstconvert_SOURCES_DIST = fstconvert.cc
+@HAVE_BIN_TRUE@am_fstconvert_OBJECTS = fstconvert.$(OBJEXT)
+fstconvert_OBJECTS = $(am_fstconvert_OBJECTS)
+fstconvert_LDADD = $(LDADD)
+fstconvert_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstdeterminize_SOURCES_DIST = fstdeterminize.cc
+@HAVE_BIN_TRUE@am_fstdeterminize_OBJECTS = fstdeterminize.$(OBJEXT)
+fstdeterminize_OBJECTS = $(am_fstdeterminize_OBJECTS)
+fstdeterminize_LDADD = $(LDADD)
+fstdeterminize_DEPENDENCIES = ../script/libfstscript.la \
+ ../lib/libfst.la
+am__fstdifference_SOURCES_DIST = fstdifference.cc
+@HAVE_BIN_TRUE@am_fstdifference_OBJECTS = fstdifference.$(OBJEXT)
+fstdifference_OBJECTS = $(am_fstdifference_OBJECTS)
+fstdifference_LDADD = $(LDADD)
+fstdifference_DEPENDENCIES = ../script/libfstscript.la \
+ ../lib/libfst.la
+am__fstdraw_SOURCES_DIST = fstdraw.cc
+@HAVE_BIN_TRUE@am_fstdraw_OBJECTS = fstdraw.$(OBJEXT)
+fstdraw_OBJECTS = $(am_fstdraw_OBJECTS)
+fstdraw_LDADD = $(LDADD)
+fstdraw_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstencode_SOURCES_DIST = fstencode.cc
+@HAVE_BIN_TRUE@am_fstencode_OBJECTS = fstencode.$(OBJEXT)
+fstencode_OBJECTS = $(am_fstencode_OBJECTS)
+fstencode_LDADD = $(LDADD)
+fstencode_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstepsnormalize_SOURCES_DIST = fstepsnormalize.cc
+@HAVE_BIN_TRUE@am_fstepsnormalize_OBJECTS = fstepsnormalize.$(OBJEXT)
+fstepsnormalize_OBJECTS = $(am_fstepsnormalize_OBJECTS)
+fstepsnormalize_LDADD = $(LDADD)
+fstepsnormalize_DEPENDENCIES = ../script/libfstscript.la \
+ ../lib/libfst.la
+am__fstequal_SOURCES_DIST = fstequal.cc
+@HAVE_BIN_TRUE@am_fstequal_OBJECTS = fstequal.$(OBJEXT)
+fstequal_OBJECTS = $(am_fstequal_OBJECTS)
+fstequal_LDADD = $(LDADD)
+fstequal_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstequivalent_SOURCES_DIST = fstequivalent.cc
+@HAVE_BIN_TRUE@am_fstequivalent_OBJECTS = fstequivalent.$(OBJEXT)
+fstequivalent_OBJECTS = $(am_fstequivalent_OBJECTS)
+fstequivalent_LDADD = $(LDADD)
+fstequivalent_DEPENDENCIES = ../script/libfstscript.la \
+ ../lib/libfst.la
+am__fstinfo_SOURCES_DIST = fstinfo.cc
+@HAVE_BIN_TRUE@am_fstinfo_OBJECTS = fstinfo.$(OBJEXT)
+fstinfo_OBJECTS = $(am_fstinfo_OBJECTS)
+fstinfo_LDADD = $(LDADD)
+fstinfo_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstintersect_SOURCES_DIST = fstintersect.cc
+@HAVE_BIN_TRUE@am_fstintersect_OBJECTS = fstintersect.$(OBJEXT)
+fstintersect_OBJECTS = $(am_fstintersect_OBJECTS)
+fstintersect_LDADD = $(LDADD)
+fstintersect_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstinvert_SOURCES_DIST = fstinvert.cc
+@HAVE_BIN_TRUE@am_fstinvert_OBJECTS = fstinvert.$(OBJEXT)
+fstinvert_OBJECTS = $(am_fstinvert_OBJECTS)
+fstinvert_LDADD = $(LDADD)
+fstinvert_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstmap_SOURCES_DIST = fstmap.cc
+@HAVE_BIN_TRUE@am_fstmap_OBJECTS = fstmap.$(OBJEXT)
+fstmap_OBJECTS = $(am_fstmap_OBJECTS)
+fstmap_LDADD = $(LDADD)
+fstmap_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstminimize_SOURCES_DIST = fstminimize.cc
+@HAVE_BIN_TRUE@am_fstminimize_OBJECTS = fstminimize.$(OBJEXT)
+fstminimize_OBJECTS = $(am_fstminimize_OBJECTS)
+fstminimize_LDADD = $(LDADD)
+fstminimize_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstprint_SOURCES_DIST = fstprint.cc
+@HAVE_BIN_TRUE@am_fstprint_OBJECTS = fstprint.$(OBJEXT)
+fstprint_OBJECTS = $(am_fstprint_OBJECTS)
+fstprint_LDADD = $(LDADD)
+fstprint_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstproject_SOURCES_DIST = fstproject.cc
+@HAVE_BIN_TRUE@am_fstproject_OBJECTS = fstproject.$(OBJEXT)
+fstproject_OBJECTS = $(am_fstproject_OBJECTS)
+fstproject_LDADD = $(LDADD)
+fstproject_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstprune_SOURCES_DIST = fstprune.cc
+@HAVE_BIN_TRUE@am_fstprune_OBJECTS = fstprune.$(OBJEXT)
+fstprune_OBJECTS = $(am_fstprune_OBJECTS)
+fstprune_LDADD = $(LDADD)
+fstprune_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstpush_SOURCES_DIST = fstpush.cc
+@HAVE_BIN_TRUE@am_fstpush_OBJECTS = fstpush.$(OBJEXT)
+fstpush_OBJECTS = $(am_fstpush_OBJECTS)
+fstpush_LDADD = $(LDADD)
+fstpush_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstrandgen_SOURCES_DIST = fstrandgen.cc
+@HAVE_BIN_TRUE@am_fstrandgen_OBJECTS = fstrandgen.$(OBJEXT)
+fstrandgen_OBJECTS = $(am_fstrandgen_OBJECTS)
+fstrandgen_LDADD = $(LDADD)
+fstrandgen_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstrelabel_SOURCES_DIST = fstrelabel.cc
+@HAVE_BIN_TRUE@am_fstrelabel_OBJECTS = fstrelabel.$(OBJEXT)
+fstrelabel_OBJECTS = $(am_fstrelabel_OBJECTS)
+fstrelabel_LDADD = $(LDADD)
+fstrelabel_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstreplace_SOURCES_DIST = fstreplace.cc
+@HAVE_BIN_TRUE@am_fstreplace_OBJECTS = fstreplace.$(OBJEXT)
+fstreplace_OBJECTS = $(am_fstreplace_OBJECTS)
+fstreplace_LDADD = $(LDADD)
+fstreplace_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstreverse_SOURCES_DIST = fstreverse.cc
+@HAVE_BIN_TRUE@am_fstreverse_OBJECTS = fstreverse.$(OBJEXT)
+fstreverse_OBJECTS = $(am_fstreverse_OBJECTS)
+fstreverse_LDADD = $(LDADD)
+fstreverse_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstreweight_SOURCES_DIST = fstreweight.cc
+@HAVE_BIN_TRUE@am_fstreweight_OBJECTS = fstreweight.$(OBJEXT)
+fstreweight_OBJECTS = $(am_fstreweight_OBJECTS)
+fstreweight_LDADD = $(LDADD)
+fstreweight_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstrmepsilon_SOURCES_DIST = fstrmepsilon.cc
+@HAVE_BIN_TRUE@am_fstrmepsilon_OBJECTS = fstrmepsilon.$(OBJEXT)
+fstrmepsilon_OBJECTS = $(am_fstrmepsilon_OBJECTS)
+fstrmepsilon_LDADD = $(LDADD)
+fstrmepsilon_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstshortestdistance_SOURCES_DIST = fstshortestdistance.cc
+@HAVE_BIN_TRUE@am_fstshortestdistance_OBJECTS = \
+@HAVE_BIN_TRUE@ fstshortestdistance.$(OBJEXT)
+fstshortestdistance_OBJECTS = $(am_fstshortestdistance_OBJECTS)
+fstshortestdistance_LDADD = $(LDADD)
+fstshortestdistance_DEPENDENCIES = ../script/libfstscript.la \
+ ../lib/libfst.la
+am__fstshortestpath_SOURCES_DIST = fstshortestpath.cc
+@HAVE_BIN_TRUE@am_fstshortestpath_OBJECTS = fstshortestpath.$(OBJEXT)
+fstshortestpath_OBJECTS = $(am_fstshortestpath_OBJECTS)
+fstshortestpath_LDADD = $(LDADD)
+fstshortestpath_DEPENDENCIES = ../script/libfstscript.la \
+ ../lib/libfst.la
+am__fstsymbols_SOURCES_DIST = fstsymbols.cc
+@HAVE_BIN_TRUE@am_fstsymbols_OBJECTS = fstsymbols.$(OBJEXT)
+fstsymbols_OBJECTS = $(am_fstsymbols_OBJECTS)
+fstsymbols_LDADD = $(LDADD)
+fstsymbols_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstsynchronize_SOURCES_DIST = fstsynchronize.cc
+@HAVE_BIN_TRUE@am_fstsynchronize_OBJECTS = fstsynchronize.$(OBJEXT)
+fstsynchronize_OBJECTS = $(am_fstsynchronize_OBJECTS)
+fstsynchronize_LDADD = $(LDADD)
+fstsynchronize_DEPENDENCIES = ../script/libfstscript.la \
+ ../lib/libfst.la
+am__fsttopsort_SOURCES_DIST = fsttopsort.cc
+@HAVE_BIN_TRUE@am_fsttopsort_OBJECTS = fsttopsort.$(OBJEXT)
+fsttopsort_OBJECTS = $(am_fsttopsort_OBJECTS)
+fsttopsort_LDADD = $(LDADD)
+fsttopsort_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+am__fstunion_SOURCES_DIST = fstunion.cc
+@HAVE_BIN_TRUE@am_fstunion_OBJECTS = fstunion.$(OBJEXT)
+fstunion_OBJECTS = $(am_fstunion_OBJECTS)
+fstunion_LDADD = $(LDADD)
+fstunion_DEPENDENCIES = ../script/libfstscript.la ../lib/libfst.la
+DEFAULT_INCLUDES =
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+SOURCES = $(fstarcsort_SOURCES) $(fstclosure_SOURCES) \
+ $(fstcompile_SOURCES) $(fstcompose_SOURCES) \
+ $(fstconcat_SOURCES) $(fstconnect_SOURCES) \
+ $(fstconvert_SOURCES) $(fstdeterminize_SOURCES) \
+ $(fstdifference_SOURCES) $(fstdraw_SOURCES) \
+ $(fstencode_SOURCES) $(fstepsnormalize_SOURCES) \
+ $(fstequal_SOURCES) $(fstequivalent_SOURCES) \
+ $(fstinfo_SOURCES) $(fstintersect_SOURCES) \
+ $(fstinvert_SOURCES) $(fstmap_SOURCES) $(fstminimize_SOURCES) \
+ $(fstprint_SOURCES) $(fstproject_SOURCES) $(fstprune_SOURCES) \
+ $(fstpush_SOURCES) $(fstrandgen_SOURCES) $(fstrelabel_SOURCES) \
+ $(fstreplace_SOURCES) $(fstreverse_SOURCES) \
+ $(fstreweight_SOURCES) $(fstrmepsilon_SOURCES) \
+ $(fstshortestdistance_SOURCES) $(fstshortestpath_SOURCES) \
+ $(fstsymbols_SOURCES) $(fstsynchronize_SOURCES) \
+ $(fsttopsort_SOURCES) $(fstunion_SOURCES)
+DIST_SOURCES = $(am__fstarcsort_SOURCES_DIST) \
+ $(am__fstclosure_SOURCES_DIST) $(am__fstcompile_SOURCES_DIST) \
+ $(am__fstcompose_SOURCES_DIST) $(am__fstconcat_SOURCES_DIST) \
+ $(am__fstconnect_SOURCES_DIST) $(am__fstconvert_SOURCES_DIST) \
+ $(am__fstdeterminize_SOURCES_DIST) \
+ $(am__fstdifference_SOURCES_DIST) $(am__fstdraw_SOURCES_DIST) \
+ $(am__fstencode_SOURCES_DIST) \
+ $(am__fstepsnormalize_SOURCES_DIST) \
+ $(am__fstequal_SOURCES_DIST) $(am__fstequivalent_SOURCES_DIST) \
+ $(am__fstinfo_SOURCES_DIST) $(am__fstintersect_SOURCES_DIST) \
+ $(am__fstinvert_SOURCES_DIST) $(am__fstmap_SOURCES_DIST) \
+ $(am__fstminimize_SOURCES_DIST) $(am__fstprint_SOURCES_DIST) \
+ $(am__fstproject_SOURCES_DIST) $(am__fstprune_SOURCES_DIST) \
+ $(am__fstpush_SOURCES_DIST) $(am__fstrandgen_SOURCES_DIST) \
+ $(am__fstrelabel_SOURCES_DIST) $(am__fstreplace_SOURCES_DIST) \
+ $(am__fstreverse_SOURCES_DIST) $(am__fstreweight_SOURCES_DIST) \
+ $(am__fstrmepsilon_SOURCES_DIST) \
+ $(am__fstshortestdistance_SOURCES_DIST) \
+ $(am__fstshortestpath_SOURCES_DIST) \
+ $(am__fstsymbols_SOURCES_DIST) \
+ $(am__fstsynchronize_SOURCES_DIST) \
+ $(am__fsttopsort_SOURCES_DIST) $(am__fstunion_SOURCES_DIST)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+ICU_CFLAGS = @ICU_CFLAGS@
+ICU_CONFIG = @ICU_CONFIG@
+ICU_CPPFLAGS = @ICU_CPPFLAGS@
+ICU_CXXFLAGS = @ICU_CXXFLAGS@
+ICU_LIBS = @ICU_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+libfstdir = @libfstdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lt_ECHO = @lt_ECHO@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AM_CPPFLAGS = -I$(srcdir)/../include -I$(srcdir)/../script $(ICU_FLAGS)
+LDADD = ../script/libfstscript.la ../lib/libfst.la -lm -ldl
+@HAVE_BIN_TRUE@fstarcsort_SOURCES = fstarcsort.cc
+@HAVE_BIN_TRUE@fstclosure_SOURCES = fstclosure.cc
+@HAVE_BIN_TRUE@fstcompile_SOURCES = fstcompile.cc
+@HAVE_BIN_TRUE@fstcompose_SOURCES = fstcompose.cc
+@HAVE_BIN_TRUE@fstconcat_SOURCES = fstconcat.cc
+@HAVE_BIN_TRUE@fstconnect_SOURCES = fstconnect.cc
+@HAVE_BIN_TRUE@fstconvert_SOURCES = fstconvert.cc
+@HAVE_BIN_TRUE@fstdeterminize_SOURCES = fstdeterminize.cc
+@HAVE_BIN_TRUE@fstdifference_SOURCES = fstdifference.cc
+@HAVE_BIN_TRUE@fstdraw_SOURCES = fstdraw.cc
+@HAVE_BIN_TRUE@fstencode_SOURCES = fstencode.cc
+@HAVE_BIN_TRUE@fstepsnormalize_SOURCES = fstepsnormalize.cc
+@HAVE_BIN_TRUE@fstequal_SOURCES = fstequal.cc
+@HAVE_BIN_TRUE@fstequivalent_SOURCES = fstequivalent.cc
+@HAVE_BIN_TRUE@fstinfo_SOURCES = fstinfo.cc
+@HAVE_BIN_TRUE@fstintersect_SOURCES = fstintersect.cc
+@HAVE_BIN_TRUE@fstinvert_SOURCES = fstinvert.cc
+@HAVE_BIN_TRUE@fstmap_SOURCES = fstmap.cc
+@HAVE_BIN_TRUE@fstminimize_SOURCES = fstminimize.cc
+@HAVE_BIN_TRUE@fstprint_SOURCES = fstprint.cc
+@HAVE_BIN_TRUE@fstproject_SOURCES = fstproject.cc
+@HAVE_BIN_TRUE@fstprune_SOURCES = fstprune.cc
+@HAVE_BIN_TRUE@fstpush_SOURCES = fstpush.cc
+@HAVE_BIN_TRUE@fstrandgen_SOURCES = fstrandgen.cc
+@HAVE_BIN_TRUE@fstrelabel_SOURCES = fstrelabel.cc
+@HAVE_BIN_TRUE@fstreplace_SOURCES = fstreplace.cc
+@HAVE_BIN_TRUE@fstreverse_SOURCES = fstreverse.cc
+@HAVE_BIN_TRUE@fstreweight_SOURCES = fstreweight.cc
+@HAVE_BIN_TRUE@fstrmepsilon_SOURCES = fstrmepsilon.cc
+@HAVE_BIN_TRUE@fstshortestdistance_SOURCES = fstshortestdistance.cc
+@HAVE_BIN_TRUE@fstshortestpath_SOURCES = fstshortestpath.cc
+@HAVE_BIN_TRUE@fstsymbols_SOURCES = fstsymbols.cc
+@HAVE_BIN_TRUE@fstsynchronize_SOURCES = fstsynchronize.cc
+@HAVE_BIN_TRUE@fsttopsort_SOURCES = fsttopsort.cc
+@HAVE_BIN_TRUE@fstunion_SOURCES = fstunion.cc
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .cc .lo .o .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/bin/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/bin/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-binPROGRAMS: $(bin_PROGRAMS)
+ @$(NORMAL_INSTALL)
+ test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
+ @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed 's/$(EXEEXT)$$//' | \
+ while read p p1; do if test -f $$p || test -f $$p1; \
+ then echo "$$p"; echo "$$p"; else :; fi; \
+ done | \
+ sed -e 'p;s,.*/,,;n;h' -e 's|.*|.|' \
+ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+ sed 'N;N;N;s,\n, ,g' | \
+ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+ if ($$2 == $$4) files[d] = files[d] " " $$1; \
+ else { print "f", $$3 "/" $$4, $$1; } } \
+ END { for (d in files) print "f", d, files[d] }' | \
+ while read type dir files; do \
+ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+ test -z "$$files" || { \
+ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+ } \
+ ; done
+
+uninstall-binPROGRAMS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+ files=`for p in $$list; do echo "$$p"; done | \
+ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+ -e 's/$$/$(EXEEXT)/' `; \
+ test -n "$$list" || exit 0; \
+ echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+ cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+ @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
+ echo " rm -f" $$list; \
+ rm -f $$list || exit $$?; \
+ test -n "$(EXEEXT)" || exit 0; \
+ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f" $$list; \
+ rm -f $$list
+fstarcsort$(EXEEXT): $(fstarcsort_OBJECTS) $(fstarcsort_DEPENDENCIES)
+ @rm -f fstarcsort$(EXEEXT)
+ $(CXXLINK) $(fstarcsort_OBJECTS) $(fstarcsort_LDADD) $(LIBS)
+fstclosure$(EXEEXT): $(fstclosure_OBJECTS) $(fstclosure_DEPENDENCIES)
+ @rm -f fstclosure$(EXEEXT)
+ $(CXXLINK) $(fstclosure_OBJECTS) $(fstclosure_LDADD) $(LIBS)
+fstcompile$(EXEEXT): $(fstcompile_OBJECTS) $(fstcompile_DEPENDENCIES)
+ @rm -f fstcompile$(EXEEXT)
+ $(CXXLINK) $(fstcompile_OBJECTS) $(fstcompile_LDADD) $(LIBS)
+fstcompose$(EXEEXT): $(fstcompose_OBJECTS) $(fstcompose_DEPENDENCIES)
+ @rm -f fstcompose$(EXEEXT)
+ $(CXXLINK) $(fstcompose_OBJECTS) $(fstcompose_LDADD) $(LIBS)
+fstconcat$(EXEEXT): $(fstconcat_OBJECTS) $(fstconcat_DEPENDENCIES)
+ @rm -f fstconcat$(EXEEXT)
+ $(CXXLINK) $(fstconcat_OBJECTS) $(fstconcat_LDADD) $(LIBS)
+fstconnect$(EXEEXT): $(fstconnect_OBJECTS) $(fstconnect_DEPENDENCIES)
+ @rm -f fstconnect$(EXEEXT)
+ $(CXXLINK) $(fstconnect_OBJECTS) $(fstconnect_LDADD) $(LIBS)
+fstconvert$(EXEEXT): $(fstconvert_OBJECTS) $(fstconvert_DEPENDENCIES)
+ @rm -f fstconvert$(EXEEXT)
+ $(CXXLINK) $(fstconvert_OBJECTS) $(fstconvert_LDADD) $(LIBS)
+fstdeterminize$(EXEEXT): $(fstdeterminize_OBJECTS) $(fstdeterminize_DEPENDENCIES)
+ @rm -f fstdeterminize$(EXEEXT)
+ $(CXXLINK) $(fstdeterminize_OBJECTS) $(fstdeterminize_LDADD) $(LIBS)
+fstdifference$(EXEEXT): $(fstdifference_OBJECTS) $(fstdifference_DEPENDENCIES)
+ @rm -f fstdifference$(EXEEXT)
+ $(CXXLINK) $(fstdifference_OBJECTS) $(fstdifference_LDADD) $(LIBS)
+fstdraw$(EXEEXT): $(fstdraw_OBJECTS) $(fstdraw_DEPENDENCIES)
+ @rm -f fstdraw$(EXEEXT)
+ $(CXXLINK) $(fstdraw_OBJECTS) $(fstdraw_LDADD) $(LIBS)
+fstencode$(EXEEXT): $(fstencode_OBJECTS) $(fstencode_DEPENDENCIES)
+ @rm -f fstencode$(EXEEXT)
+ $(CXXLINK) $(fstencode_OBJECTS) $(fstencode_LDADD) $(LIBS)
+fstepsnormalize$(EXEEXT): $(fstepsnormalize_OBJECTS) $(fstepsnormalize_DEPENDENCIES)
+ @rm -f fstepsnormalize$(EXEEXT)
+ $(CXXLINK) $(fstepsnormalize_OBJECTS) $(fstepsnormalize_LDADD) $(LIBS)
+fstequal$(EXEEXT): $(fstequal_OBJECTS) $(fstequal_DEPENDENCIES)
+ @rm -f fstequal$(EXEEXT)
+ $(CXXLINK) $(fstequal_OBJECTS) $(fstequal_LDADD) $(LIBS)
+fstequivalent$(EXEEXT): $(fstequivalent_OBJECTS) $(fstequivalent_DEPENDENCIES)
+ @rm -f fstequivalent$(EXEEXT)
+ $(CXXLINK) $(fstequivalent_OBJECTS) $(fstequivalent_LDADD) $(LIBS)
+fstinfo$(EXEEXT): $(fstinfo_OBJECTS) $(fstinfo_DEPENDENCIES)
+ @rm -f fstinfo$(EXEEXT)
+ $(CXXLINK) $(fstinfo_OBJECTS) $(fstinfo_LDADD) $(LIBS)
+fstintersect$(EXEEXT): $(fstintersect_OBJECTS) $(fstintersect_DEPENDENCIES)
+ @rm -f fstintersect$(EXEEXT)
+ $(CXXLINK) $(fstintersect_OBJECTS) $(fstintersect_LDADD) $(LIBS)
+fstinvert$(EXEEXT): $(fstinvert_OBJECTS) $(fstinvert_DEPENDENCIES)
+ @rm -f fstinvert$(EXEEXT)
+ $(CXXLINK) $(fstinvert_OBJECTS) $(fstinvert_LDADD) $(LIBS)
+fstmap$(EXEEXT): $(fstmap_OBJECTS) $(fstmap_DEPENDENCIES)
+ @rm -f fstmap$(EXEEXT)
+ $(CXXLINK) $(fstmap_OBJECTS) $(fstmap_LDADD) $(LIBS)
+fstminimize$(EXEEXT): $(fstminimize_OBJECTS) $(fstminimize_DEPENDENCIES)
+ @rm -f fstminimize$(EXEEXT)
+ $(CXXLINK) $(fstminimize_OBJECTS) $(fstminimize_LDADD) $(LIBS)
+fstprint$(EXEEXT): $(fstprint_OBJECTS) $(fstprint_DEPENDENCIES)
+ @rm -f fstprint$(EXEEXT)
+ $(CXXLINK) $(fstprint_OBJECTS) $(fstprint_LDADD) $(LIBS)
+fstproject$(EXEEXT): $(fstproject_OBJECTS) $(fstproject_DEPENDENCIES)
+ @rm -f fstproject$(EXEEXT)
+ $(CXXLINK) $(fstproject_OBJECTS) $(fstproject_LDADD) $(LIBS)
+fstprune$(EXEEXT): $(fstprune_OBJECTS) $(fstprune_DEPENDENCIES)
+ @rm -f fstprune$(EXEEXT)
+ $(CXXLINK) $(fstprune_OBJECTS) $(fstprune_LDADD) $(LIBS)
+fstpush$(EXEEXT): $(fstpush_OBJECTS) $(fstpush_DEPENDENCIES)
+ @rm -f fstpush$(EXEEXT)
+ $(CXXLINK) $(fstpush_OBJECTS) $(fstpush_LDADD) $(LIBS)
+fstrandgen$(EXEEXT): $(fstrandgen_OBJECTS) $(fstrandgen_DEPENDENCIES)
+ @rm -f fstrandgen$(EXEEXT)
+ $(CXXLINK) $(fstrandgen_OBJECTS) $(fstrandgen_LDADD) $(LIBS)
+fstrelabel$(EXEEXT): $(fstrelabel_OBJECTS) $(fstrelabel_DEPENDENCIES)
+ @rm -f fstrelabel$(EXEEXT)
+ $(CXXLINK) $(fstrelabel_OBJECTS) $(fstrelabel_LDADD) $(LIBS)
+fstreplace$(EXEEXT): $(fstreplace_OBJECTS) $(fstreplace_DEPENDENCIES)
+ @rm -f fstreplace$(EXEEXT)
+ $(CXXLINK) $(fstreplace_OBJECTS) $(fstreplace_LDADD) $(LIBS)
+fstreverse$(EXEEXT): $(fstreverse_OBJECTS) $(fstreverse_DEPENDENCIES)
+ @rm -f fstreverse$(EXEEXT)
+ $(CXXLINK) $(fstreverse_OBJECTS) $(fstreverse_LDADD) $(LIBS)
+fstreweight$(EXEEXT): $(fstreweight_OBJECTS) $(fstreweight_DEPENDENCIES)
+ @rm -f fstreweight$(EXEEXT)
+ $(CXXLINK) $(fstreweight_OBJECTS) $(fstreweight_LDADD) $(LIBS)
+fstrmepsilon$(EXEEXT): $(fstrmepsilon_OBJECTS) $(fstrmepsilon_DEPENDENCIES)
+ @rm -f fstrmepsilon$(EXEEXT)
+ $(CXXLINK) $(fstrmepsilon_OBJECTS) $(fstrmepsilon_LDADD) $(LIBS)
+fstshortestdistance$(EXEEXT): $(fstshortestdistance_OBJECTS) $(fstshortestdistance_DEPENDENCIES)
+ @rm -f fstshortestdistance$(EXEEXT)
+ $(CXXLINK) $(fstshortestdistance_OBJECTS) $(fstshortestdistance_LDADD) $(LIBS)
+fstshortestpath$(EXEEXT): $(fstshortestpath_OBJECTS) $(fstshortestpath_DEPENDENCIES)
+ @rm -f fstshortestpath$(EXEEXT)
+ $(CXXLINK) $(fstshortestpath_OBJECTS) $(fstshortestpath_LDADD) $(LIBS)
+fstsymbols$(EXEEXT): $(fstsymbols_OBJECTS) $(fstsymbols_DEPENDENCIES)
+ @rm -f fstsymbols$(EXEEXT)
+ $(CXXLINK) $(fstsymbols_OBJECTS) $(fstsymbols_LDADD) $(LIBS)
+fstsynchronize$(EXEEXT): $(fstsynchronize_OBJECTS) $(fstsynchronize_DEPENDENCIES)
+ @rm -f fstsynchronize$(EXEEXT)
+ $(CXXLINK) $(fstsynchronize_OBJECTS) $(fstsynchronize_LDADD) $(LIBS)
+fsttopsort$(EXEEXT): $(fsttopsort_OBJECTS) $(fsttopsort_DEPENDENCIES)
+ @rm -f fsttopsort$(EXEEXT)
+ $(CXXLINK) $(fsttopsort_OBJECTS) $(fsttopsort_LDADD) $(LIBS)
+fstunion$(EXEEXT): $(fstunion_OBJECTS) $(fstunion_DEPENDENCIES)
+ @rm -f fstunion$(EXEEXT)
+ $(CXXLINK) $(fstunion_OBJECTS) $(fstunion_LDADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstarcsort.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstclosure.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstcompile.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstcompose.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstconcat.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstconnect.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstconvert.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstdeterminize.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstdifference.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstdraw.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstencode.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstepsnormalize.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstequal.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstequivalent.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstinfo.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstintersect.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstinvert.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstmap.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstminimize.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstprint.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstproject.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstprune.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstpush.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstrandgen.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstrelabel.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstreplace.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstreverse.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstreweight.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstrmepsilon.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstshortestdistance.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstshortestpath.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstsymbols.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstsynchronize.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fsttopsort.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fstunion.Po@am__quote@
+
+.cc.o:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $<
+
+.cc.obj:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cc.lo:
+@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(LTCXXCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS)
+installdirs:
+ for dir in "$(DESTDIR)$(bindir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-binPROGRAMS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \
+ clean-generic clean-libtool ctags distclean distclean-compile \
+ distclean-generic distclean-libtool distclean-tags distdir dvi \
+ dvi-am html html-am info info-am install install-am \
+ install-binPROGRAMS install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am install-man \
+ install-pdf install-pdf-am install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+ pdf pdf-am ps ps-am tags uninstall uninstall-am \
+ uninstall-binPROGRAMS
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/bin/fstarcsort.cc b/src/bin/fstarcsort.cc
new file mode 100644
index 0000000..33e534d
--- /dev/null
+++ b/src/bin/fstarcsort.cc
@@ -0,0 +1,67 @@
+// fstarcsort.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Sorts arcs of an FST.
+//
+
+#include <string>
+
+#include <fst/compat.h>
+#include <fst/script/arcsort.h>
+
+DEFINE_string(sort_type, "ilabel",
+ "Comparison method, one of: \"ilabel\", \"olabel\"");
+
+int main(int argc, char **argv) {
+ using fst::script::FstClass;
+ using fst::script::MutableFstClass;
+ using fst::script::ArcSort;
+
+ string usage = "Sorts arcs of an FST.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [in.fst [out.fst]]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_name = (argc > 1 && (strcmp(argv[1], "-") != 0)) ? argv[1] : "";
+ string out_name = argc > 2 ? argv[2] : "";
+
+ MutableFstClass *fst = MutableFstClass::Read(in_name, true);
+ if (!fst) return 1;
+
+ if (FLAGS_sort_type == "ilabel") {
+ ArcSort(fst, fst::script::ILABEL_COMPARE);
+ } else if (FLAGS_sort_type == "olabel") {
+ ArcSort(fst, fst::script::OLABEL_COMPARE);
+ } else {
+ LOG(ERROR) << argv[0] << ": Unknown sort type \""
+ << FLAGS_sort_type << "\"\n";
+ return 1;
+ }
+
+ fst->Write(out_name);
+
+ return 0;
+}
diff --git a/src/bin/fstclosure.cc b/src/bin/fstclosure.cc
new file mode 100644
index 0000000..569c708
--- /dev/null
+++ b/src/bin/fstclosure.cc
@@ -0,0 +1,56 @@
+// fstclosure.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Creates the Kleene closure of an FST.
+//
+
+#include <fst/script/closure.h>
+
+DEFINE_bool(closure_plus, false,
+ "Do not add the empty path (T+ instead of T*)");
+
+int main(int argc, char **argv) {
+ using fst::script::FstClass;
+ using fst::script::MutableFstClass;
+
+ string usage = "Creates the Kleene closure of an FST.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [in.fst [out.fst]]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_fname = (argc > 1 && strcmp(argv[1], "-") != 0) ? argv[1] : "";
+ string out_fname = argc > 2 ? argv[2] : "";
+
+ MutableFstClass *fst = MutableFstClass::Read(in_fname, true);
+ if (!fst) return 1;
+
+ fst::ClosureType closure_type =
+ FLAGS_closure_plus ? fst::CLOSURE_PLUS : fst::CLOSURE_STAR;
+
+ fst::script::Closure(fst, closure_type);
+ fst->Write(out_fname);
+
+ return 0;
+}
diff --git a/src/bin/fstcompile.cc b/src/bin/fstcompile.cc
new file mode 100644
index 0000000..db62f95
--- /dev/null
+++ b/src/bin/fstcompile.cc
@@ -0,0 +1,92 @@
+// fstcompile.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Creates binary FSTs from simple text format used by AT&T
+// (see http://www.research.att.com/projects/mohri/fsm/doc4/fsm.5.html).
+
+#include <fst/script/compile.h>
+
+DEFINE_bool(acceptor, false, "Input in acceptor format");
+DEFINE_string(arc_type, "standard", "Output arc type");
+DEFINE_string(fst_type, "vector", "Output FST type");
+DEFINE_string(isymbols, "", "Input label symbol table");
+DEFINE_string(osymbols, "", "Output label symbol table");
+DEFINE_string(ssymbols, "", "State label symbol table");
+DEFINE_bool(keep_isymbols, false, "Store input label symbol table with FST");
+DEFINE_bool(keep_osymbols, false, "Store output label symbol table with FST");
+DEFINE_bool(keep_state_numbering, false, "Do not renumber input states");
+DEFINE_bool(allow_negative_labels, false,
+ "Allow negative labels (not recommended; may cause conflicts)");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::istream;
+ using fst::ifstream;
+ using fst::SymbolTable;
+
+ string usage = "Creates binary FSTs from simple text format.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [text.fst [binary.fst]]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ const char *source = "standard input";
+ istream *istrm = &std::cin;
+ if (argc > 1 && strcmp(argv[1], "-") != 0) {
+ source = argv[1];
+ istrm = new fst::ifstream(argv[1]);
+ if (!*istrm) {
+ LOG(ERROR) << argv[0] << ": Open failed, file = " << argv[1];
+ return 1;
+ }
+ }
+ const SymbolTable *isyms = 0, *osyms = 0, *ssyms = 0;
+
+ if (!FLAGS_isymbols.empty()) {
+ isyms = SymbolTable::ReadText(FLAGS_isymbols, FLAGS_allow_negative_labels);
+ if (!isyms) exit(1);
+ }
+
+ if (!FLAGS_osymbols.empty()) {
+ osyms = SymbolTable::ReadText(FLAGS_osymbols, FLAGS_allow_negative_labels);
+ if (!osyms) exit(1);
+ }
+
+ if (!FLAGS_ssymbols.empty()) {
+ ssyms = SymbolTable::ReadText(FLAGS_ssymbols);
+ if (!ssyms) exit(1);
+ }
+
+ string dest = argc > 2 ? argv[2] : "";
+
+ s::CompileFst(*istrm, source, dest, FLAGS_fst_type, FLAGS_arc_type,
+ isyms, osyms, ssyms,
+ FLAGS_acceptor, FLAGS_keep_isymbols, FLAGS_keep_osymbols,
+ FLAGS_keep_state_numbering, FLAGS_allow_negative_labels);
+
+ if (istrm != &std::cin)
+ delete istrm;
+
+ return 0;
+}
diff --git a/src/bin/fstcompose.cc b/src/bin/fstcompose.cc
new file mode 100644
index 0000000..0ba8c47
--- /dev/null
+++ b/src/bin/fstcompose.cc
@@ -0,0 +1,95 @@
+// fstcompose.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Composes two FSTs.
+//
+
+#include <fst/script/compose.h>
+#include <fst/script/connect.h>
+
+
+DEFINE_string(compose_filter, "auto",
+ "Composition filter, one of: \"alt_sequence\", \"auto\", "
+ "\"match\", \"sequence\"");
+DEFINE_bool(connect, true, "Trim output");
+
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+ using fst::script::MutableFstClass;
+ using fst::script::VectorFstClass;
+
+ string usage = "Composes two FSTs.\n\n Usage: ";
+ usage += argv[0];
+ usage += " in1.fst in2.fst [out.fst]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc < 3 || argc > 4) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in1_name = strcmp(argv[1], "-") != 0 ? argv[1] : "";
+ string in2_name = (argc > 2 && (strcmp(argv[2], "-") != 0)) ? argv[2] : "";
+ string out_name = argc > 3 ? argv[3] : "";
+
+ if (in1_name.empty() && in2_name.empty()) {
+ LOG(ERROR) << argv[0] << ": Can't take both inputs from standard input.";
+ return 1;
+ }
+
+ FstClass *ifst1 = FstClass::Read(in1_name);
+ if (!ifst1) return 1;
+
+ FstClass *ifst2 = FstClass::Read(in2_name);
+ if (!ifst2) return 1;
+
+ if (ifst1->ArcType() != ifst2->ArcType()) {
+ LOG(ERROR) << argv[0] << ": Input FSTs must have the same arc type.";
+ return 1;
+ }
+
+ VectorFstClass ofst(ifst1->ArcType());
+
+ fst::ComposeFilter compose_filter;
+
+ if (FLAGS_compose_filter == "alt_sequence") {
+ compose_filter = fst::ALT_SEQUENCE_FILTER;
+ } else if (FLAGS_compose_filter == "auto") {
+ compose_filter = fst::AUTO_FILTER;
+ } else if (FLAGS_compose_filter == "match") {
+ compose_filter = fst::MATCH_FILTER;
+ } else if (FLAGS_compose_filter == "sequence") {
+ compose_filter = fst::SEQUENCE_FILTER;
+ } else {
+ LOG(ERROR) << argv[0] << "Unknown compose filter type: "
+ << FLAGS_compose_filter;
+ return 1;
+ }
+
+ fst::ComposeOptions opts(FLAGS_connect, compose_filter);
+
+ s::Compose(*ifst1, *ifst2, &ofst, opts);
+
+ ofst.Write(out_name);
+
+ return 0;
+}
diff --git a/src/bin/fstconcat.cc b/src/bin/fstconcat.cc
new file mode 100644
index 0000000..fbe9b68
--- /dev/null
+++ b/src/bin/fstconcat.cc
@@ -0,0 +1,62 @@
+// fstconcat.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Concatenates two FSTs.
+//
+
+#include <string>
+
+#include <fst/script/concat.h>
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+ using fst::script::MutableFstClass;
+
+ string usage = "Concatenates two FSTs.\n\n Usage: ";
+ usage += argv[0];
+ usage += " in1.fst in2.fst [out.fst]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc < 3 || argc > 4) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in1_name = strcmp(argv[1], "-") == 0 ? "" : argv[1];
+ string in2_name = strcmp(argv[2], "-") == 0 ? "" : argv[2];
+ string out_fname = argc > 3 ? argv[3] : "";
+
+ if (in1_name.empty() && in2_name.empty()) {
+ LOG(ERROR) << argv[0] << ": Can't take both inputs from standard input.";
+ return 1;
+ }
+
+ MutableFstClass *fst1 = MutableFstClass::Read(in1_name, true);
+ if (!fst1) return 1;
+
+ FstClass *fst2 = FstClass::Read(in2_name);
+ if (!fst2) return 1;
+
+ s::Concat(fst1, *fst2);
+ fst1->Write(out_fname);
+
+ return 0;
+}
diff --git a/src/bin/fstconnect.cc b/src/bin/fstconnect.cc
new file mode 100644
index 0000000..f774767
--- /dev/null
+++ b/src/bin/fstconnect.cc
@@ -0,0 +1,52 @@
+// fstconnect.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Removes useless (inaccessible or non-coaccessible) states and arcs
+// from an FST.
+//
+
+#include <fst/script/connect.h>
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+ using fst::script::MutableFstClass;
+
+ string usage = "Removes useless states and arcs from an FST.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [in.fst [out.fst]]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_name = (argc > 1 && strcmp(argv[1], "-") != 0) ? argv[1] : "";
+ string out_name = argc > 2 ? argv[2] : "";
+
+ MutableFstClass *fst = MutableFstClass::Read(in_name, true);
+ if (!fst) return 1;
+
+ s::Connect(fst);
+ fst->Write(out_name);
+
+ return 0;
+}
diff --git a/src/bin/fstconvert.cc b/src/bin/fstconvert.cc
new file mode 100644
index 0000000..f7c4ad9
--- /dev/null
+++ b/src/bin/fstconvert.cc
@@ -0,0 +1,58 @@
+// fstconvert.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Converts an FST to another type.
+//
+
+#include <fst/script/convert.h>
+
+DEFINE_string(fst_type, "vector", "Output FST type");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+
+ string usage = "Converts an FST to another type.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [in.fst [out.fst]]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_name = (argc > 1 && strcmp(argv[1], "-") != 0) ? argv[1] : "";
+ string out_name = argc > 2 ? argv[2] : "";
+
+ FstClass *ifst = FstClass::Read(in_name);
+ if (!ifst) return 1;
+
+ FstClass *ofst = ifst;
+ if (!ofst) return 1;
+
+ if (ofst->FstType() != FLAGS_fst_type) {
+ ofst = s::Convert(*ifst, FLAGS_fst_type);
+ }
+
+ ofst->Write(out_name);
+
+ return 0;
+}
diff --git a/src/bin/fstdeterminize.cc b/src/bin/fstdeterminize.cc
new file mode 100644
index 0000000..aa5064f
--- /dev/null
+++ b/src/bin/fstdeterminize.cc
@@ -0,0 +1,68 @@
+// fstdeterminize.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Determinizes an FST.
+//
+
+#include <fst/script/determinize.h>
+
+DEFINE_double(delta, fst::kDelta, "Comparison/quantization delta");
+DEFINE_int64(nstate, fst::kNoStateId, "State number threshold");
+DEFINE_string(weight, "", "Weight threshold");
+DEFINE_int64(subsequential_label, 0,
+ "Input label of arc corresponding to residual final output when"
+ " producing a subsequential transducer");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+ using fst::script::MutableFstClass;
+ using fst::script::VectorFstClass;
+ using fst::script::WeightClass;
+
+ string usage = "Determinizes an FST.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [in.fst [out.fst]]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_name = (argc > 1 && strcmp(argv[1], "-") != 0) ? argv[1] : "";
+ string out_name = argc > 2 ? argv[2] : "";
+
+ FstClass *ifst = FstClass::Read(in_name);
+ if (!ifst) return 1;
+
+ VectorFstClass ofst(ifst->ArcType());
+
+ s::DeterminizeOptions opts(
+ FLAGS_delta, FLAGS_weight.empty() ?
+ WeightClass::Zero() : WeightClass(ifst->WeightType(), FLAGS_weight),
+ FLAGS_nstate, FLAGS_subsequential_label);
+
+ s::Determinize(*ifst, &ofst, opts);
+
+ ofst.Write(out_name);
+
+ return 0;
+}
diff --git a/src/bin/fstdifference.cc b/src/bin/fstdifference.cc
new file mode 100644
index 0000000..5459b03
--- /dev/null
+++ b/src/bin/fstdifference.cc
@@ -0,0 +1,87 @@
+// fstdifference.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Subtracts an unweighted DFA from an FSA.
+//
+
+#include <fst/script/difference.h>
+#include <fst/script/connect.h>
+
+DEFINE_string(compose_filter, "auto",
+ "Composition filter, one of: \"alt_sequence\", \"auto\","
+ " \"match\", \"sequence\"");
+DEFINE_bool(connect, true, "Trim output");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+ using fst::script::MutableFstClass;
+ using fst::script::VectorFstClass;
+
+ string usage = "Subtracts an unweighted DFA from an FSA.\n\n Usage: ";
+ usage += argv[0];
+ usage += " in1.fst in2.fst [out.fst]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc < 3 || argc > 4) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in1_name = strcmp(argv[1], "-") == 0 ? "" : argv[1];
+ string in2_name = strcmp(argv[2], "-") == 0 ? "" : argv[2];
+ string out_name = argc > 3 ? argv[3] : "";
+
+ if (in1_name.empty() && in2_name.empty()) {
+ LOG(ERROR) << argv[0] << ": Can't take both inputs from standard input.";
+ return 1;
+ }
+
+ FstClass *ifst1 = FstClass::Read(in1_name);
+ if (!ifst1) return 1;
+ FstClass *ifst2 = FstClass::Read(in2_name);
+ if (!ifst2) return 1;
+
+ VectorFstClass ofst(ifst1->ArcType());
+
+ fst::ComposeFilter cf;
+
+ if (FLAGS_compose_filter == "auto") {
+ cf = fst::AUTO_FILTER;
+ } else if (FLAGS_compose_filter == "sequence") {
+ cf = fst::SEQUENCE_FILTER;
+ } else if (FLAGS_compose_filter == "alt_sequence") {
+ cf = fst::ALT_SEQUENCE_FILTER;
+ } else if (FLAGS_compose_filter == "match") {
+ cf = fst::MATCH_FILTER;
+ } else {
+ LOG(ERROR) << argv[0] << ": Bad filter type \""
+ << FLAGS_compose_filter << "\"";
+ return 1;
+ }
+
+ fst::DifferenceOptions opts(FLAGS_connect, cf);
+
+ s::Difference(*ifst1, *ifst2, &ofst, opts);
+
+ ofst.Write(out_name);
+
+ return 0;
+}
diff --git a/src/bin/fstdraw.cc b/src/bin/fstdraw.cc
new file mode 100644
index 0000000..51ebb2d
--- /dev/null
+++ b/src/bin/fstdraw.cc
@@ -0,0 +1,118 @@
+// fstdraw.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Draws a binary FSTs in the Graphviz dot text format
+
+#include <fst/script/draw.h>
+
+DEFINE_bool(acceptor, false, "Input in acceptor format");
+DEFINE_string(isymbols, "", "Input label symbol table");
+DEFINE_string(osymbols, "", "Output label symbol table");
+DEFINE_string(ssymbols, "", "State label symbol table");
+DEFINE_bool(numeric, false, "Print numeric labels");
+DEFINE_string(save_isymbols, "", "Save input symbol table to file");
+DEFINE_string(save_osymbols, "", "Save output symbol table to file");
+DEFINE_int32(precision, 5, "Set precision (number of char/float)");
+DEFINE_bool(show_weight_one, false,
+ "Print/draw arc weights and final weights equal to Weight::One()");
+DEFINE_string(title, "", "Set figure title");
+DEFINE_bool(portrait, false, "Portrait mode (def: landscape)");
+DEFINE_bool(vertical, false, "Draw bottom-to-top instead of left-to-right");
+DEFINE_int32(fontsize, 14, "Set fontsize");
+DEFINE_double(height, 11, "Set height");
+DEFINE_double(width, 8.5, "Set width");
+DEFINE_double(nodesep, 0.25,
+ "Set minimum separation between nodes (see dot documentation)");
+DEFINE_double(ranksep, 0.40,
+ "Set minimum separation between ranks (see dot documentation)");
+DEFINE_bool(allow_negative_labels, false,
+ "Allow negative labels (not recommended; may cause conflicts)");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::ostream;
+ using fst::SymbolTable;
+
+ string usage = "Prints out binary FSTs in dot text format.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [binary.fst [text.fst]]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_name = (argc > 1 && strcmp(argv[1], "-") != 0) ? argv[1] : "";
+
+ s::FstClass *fst = s::FstClass::Read(in_name);
+ if (!fst) return 1;
+
+ ostream *ostrm = &std::cout;
+ string dest = "stdout";
+ if (argc == 3) {
+ dest = argv[2];
+ ostrm = new fst::ofstream(argv[2]);
+ if (!*ostrm) {
+ LOG(ERROR) << argv[0] << ": Open failed, file = " << argv[2];
+ return 1;
+ }
+ }
+ ostrm->precision(FLAGS_precision);
+
+ const SymbolTable *isyms = 0, *osyms = 0, *ssyms = 0;
+
+ if (!FLAGS_isymbols.empty() && !FLAGS_numeric) {
+ isyms = SymbolTable::ReadText(FLAGS_isymbols, FLAGS_allow_negative_labels);
+ if (!isyms) exit(1);
+ }
+
+ if (!FLAGS_osymbols.empty() && !FLAGS_numeric) {
+ osyms = SymbolTable::ReadText(FLAGS_osymbols, FLAGS_allow_negative_labels);
+ if (!osyms) exit(1);
+ }
+
+ if (!FLAGS_ssymbols.empty() && !FLAGS_numeric) {
+ ssyms = SymbolTable::ReadText(FLAGS_ssymbols);
+ if (!ssyms) exit(1);
+ }
+
+ if (!isyms && !FLAGS_numeric)
+ isyms = fst->InputSymbols();
+ if (!osyms && !FLAGS_numeric)
+ osyms = fst->OutputSymbols();
+
+ s::DrawFst(*fst, isyms, osyms, ssyms, FLAGS_acceptor,
+ FLAGS_title, FLAGS_width, FLAGS_height,
+ FLAGS_portrait, FLAGS_vertical,
+ FLAGS_ranksep, FLAGS_nodesep,
+ FLAGS_fontsize, FLAGS_precision,
+ FLAGS_show_weight_one, ostrm, dest);
+
+ if (isyms && !FLAGS_save_isymbols.empty())
+ isyms->WriteText(FLAGS_save_isymbols);
+
+ if (osyms && !FLAGS_save_osymbols.empty())
+ osyms->WriteText(FLAGS_save_osymbols);
+
+ if (ostrm != &std::cout)
+ delete ostrm;
+ return 0;
+}
diff --git a/src/bin/fstencode.cc b/src/bin/fstencode.cc
new file mode 100644
index 0000000..95dd49c
--- /dev/null
+++ b/src/bin/fstencode.cc
@@ -0,0 +1,68 @@
+// fstencode.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Encode transducer labels and/or weights.
+//
+
+#include <fst/script/encode.h>
+#include <fst/script/decode.h>
+
+/// EncodeMain specific flag definitions
+DEFINE_bool(encode_labels, false, "Encode output labels");
+DEFINE_bool(encode_weights, false, "Encode weights");
+DEFINE_bool(encode_reuse, false, "Re-use existing codex");
+DEFINE_bool(decode, false, "Decode labels and/or weights");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+ using fst::script::MutableFstClass;
+
+ string usage = "Encodes transducer labels and/or weights.\n\n Usage: ";
+ usage += argv[0];
+ usage += " in.fst codex [out.fst]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc < 3 || argc > 4) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_name = (strcmp(argv[1], "-") != 0) ? argv[1] : "";
+ string codex_name = argv[2];
+ string out_name = argc > 3 ? argv[3] : "";
+
+ MutableFstClass *fst = MutableFstClass::Read(in_name, true);
+ if (!fst) return 1;
+
+ if (FLAGS_decode == false) {
+ uint32 flags = 0;
+ flags |= FLAGS_encode_labels ? fst::kEncodeLabels : 0;
+ flags |= FLAGS_encode_weights ? fst::kEncodeWeights : 0;
+ s::Encode(fst, flags, FLAGS_encode_reuse, codex_name);
+ fst->Write(out_name);
+ } else {
+ s::Decode(fst, codex_name);
+ fst->Write(out_name);
+ }
+
+ delete fst;
+ return 0;
+}
diff --git a/src/bin/fstepsnormalize.cc b/src/bin/fstepsnormalize.cc
new file mode 100644
index 0000000..c8813c6
--- /dev/null
+++ b/src/bin/fstepsnormalize.cc
@@ -0,0 +1,58 @@
+// fstepsnormalize.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Epsilon normalizes an FST.
+//
+
+#include <fst/script/epsnormalize.h>
+
+DEFINE_bool(eps_norm_output, false, "Normalize output epsilons");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+ using fst::script::VectorFstClass;
+
+
+ string usage = "Epsilon normalizes an FST.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [in.fst [out.fst]]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_name = (argc > 1 && strcmp(argv[1], "-") != 0) ? argv[1] : "";
+ string out_name = argc > 2 ? argv[2] : "";
+
+ FstClass *ifst = FstClass::Read(in_name);
+ if (!ifst) return 1;
+
+ fst::EpsNormalizeType eps_norm_type = FLAGS_eps_norm_output ?
+ fst::EPS_NORM_OUTPUT : fst::EPS_NORM_INPUT;
+
+ VectorFstClass ofst(ifst->ArcType());
+ s::EpsNormalize(*ifst, &ofst, eps_norm_type);
+ ofst.Write(out_name);
+
+ return 0;
+}
diff --git a/src/bin/fstequal.cc b/src/bin/fstequal.cc
new file mode 100644
index 0000000..885b330
--- /dev/null
+++ b/src/bin/fstequal.cc
@@ -0,0 +1,61 @@
+// fstequal.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Two FSTS are equal iff they their exit status is zero.
+//
+
+#include <fst/script/equal.h>
+
+DEFINE_double(delta, fst::kDelta, "Comparison/quantization delta");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+
+ string usage = "Two FSTs are equal iff the exit status is zero.\n\n Usage: ";
+ usage += argv[0];
+ usage += " in1.fst in2.fst\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc != 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in1_name = strcmp(argv[1], "-") == 0 ? "" : argv[1];
+ string in2_name = strcmp(argv[2], "-") == 0 ? "" : argv[2];
+
+ if (in1_name.empty() && in2_name.empty()) {
+ LOG(ERROR) << argv[0] << ": Can't take both inputs from standard input.";
+ return 1;
+ }
+
+ FstClass *ifst1 = FstClass::Read(in1_name);
+ if (!ifst1) return 1;
+
+ FstClass *ifst2 = FstClass::Read(in2_name);
+ if (!ifst2) return 1;
+
+ bool result = s::Equal(*ifst1, *ifst2, FLAGS_delta);
+ if (!result)
+ VLOG(1) << "FSTs are not equal.";
+
+ return result ? 0 : 2;
+}
diff --git a/src/bin/fstequivalent.cc b/src/bin/fstequivalent.cc
new file mode 100644
index 0000000..e7e09f2
--- /dev/null
+++ b/src/bin/fstequivalent.cc
@@ -0,0 +1,91 @@
+// fstequivalent.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Two DFAs are equivalent iff their exit status is zero.
+//
+
+#include <fst/script/equivalent.h>
+#include <fst/script/randequivalent.h>
+
+DEFINE_double(delta, fst::kDelta, "Comparison/quantization delta");
+DEFINE_bool(random, false,
+ "Test equivalence by randomly selecting paths in the input FSTs");
+DEFINE_int32(max_length, INT_MAX, "Maximum path length");
+DEFINE_int32(npath, 1, "Number of paths to generate");
+DEFINE_int32(seed, time(0), "Random seed");
+DEFINE_string(select, "uniform", "Selection type: one of: "
+ " \"uniform\", \"log_prob (when appropriate),"
+ " \"fast_log_prob\" (when appropriate)");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+
+ string usage = "Two DFAs are equivalent iff the exit status is zero.\n\n"
+ " Usage: ";
+ usage += argv[0];
+ usage += " in1.fst in2.fst\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc != 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in1_name = strcmp(argv[1], "-") == 0 ? "" : argv[1];
+ string in2_name = strcmp(argv[2], "-") == 0 ? "" : argv[2];
+
+ if (in1_name.empty() && in2_name.empty()) {
+ LOG(ERROR) << argv[0] << ": Can't take both inputs from standard input.";
+ return 1;
+ }
+
+ FstClass *ifst1 = FstClass::Read(in1_name);
+ if (!ifst1) return 1;
+
+ FstClass *ifst2 = FstClass::Read(in2_name);
+ if (!ifst2) return 1;
+
+ if (!FLAGS_random) {
+ return s::Equivalent(*ifst1, *ifst2, FLAGS_delta) ? 0 : 2;
+ } else {
+ s::RandArcSelection ras;
+
+ if (FLAGS_select == "uniform") {
+ ras = s::UNIFORM_ARC_SELECTOR;
+ } else if (FLAGS_select == "log_prob") {
+ ras = s::LOG_PROB_ARC_SELECTOR;
+ } else if (FLAGS_select == "fast_log_prob") {
+ ras = s::FAST_LOG_PROB_ARC_SELECTOR;
+ } else {
+ LOG(ERROR) << argv[0] << ": Unknown selection type \""
+ << FLAGS_select << "\"\n";
+ return 1;
+ }
+
+ return s::RandEquivalent(
+ *ifst1, *ifst2,
+ FLAGS_seed,
+ FLAGS_npath,
+ FLAGS_delta,
+ fst::RandGenOptions<s::RandArcSelection>(
+ ras, FLAGS_max_length)) ? 0 : 2;
+ }
+}
diff --git a/src/bin/fstinfo.cc b/src/bin/fstinfo.cc
new file mode 100644
index 0000000..23816be
--- /dev/null
+++ b/src/bin/fstinfo.cc
@@ -0,0 +1,59 @@
+// fstinfo.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Prints out various information about an FST such as number of states
+// and arcs and property values (see properties.h).
+//
+
+#include <fst/script/info.h>
+
+DEFINE_string(arc_filter, "any", "Arc filter: one of :"
+ " \"any\", \"epsilon\", \"iepsilon\", \"oepsilon\"");
+DEFINE_string(info_type, "auto",
+ "Info format: one of: \"auto\", \"long\", \"short\"");
+DEFINE_bool(pipe, false, "Send info to stderr, input to stdout");
+DEFINE_bool(test_properties, true,
+ "Compute property values (if unknown to FST)");
+DEFINE_bool(fst_verify, true, "Verify FST sanity");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+
+ string usage = "Prints out information about an FST.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [in.fst]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 2) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_name = (argc > 1 && (strcmp(argv[1], "-") != 0)) ? argv[1] : "";
+
+ FstClass *ifst = FstClass::Read(in_name);
+ if (!ifst) return 1;
+
+ s::PrintFstInfo(*ifst, FLAGS_test_properties, FLAGS_arc_filter,
+ FLAGS_info_type, FLAGS_fst_verify, FLAGS_pipe);
+
+ return 0;
+}
diff --git a/src/bin/fstintersect.cc b/src/bin/fstintersect.cc
new file mode 100644
index 0000000..b3558fc
--- /dev/null
+++ b/src/bin/fstintersect.cc
@@ -0,0 +1,88 @@
+// fstintersect.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Intersects two FSTs.
+//
+
+#include <fst/script/intersect.h>
+#include <fst/script/connect.h>
+
+DEFINE_string(compose_filter, "auto",
+ "Composition filter, one of: \"alt_sequence\", \"auto\", "
+ "\"match\", \"sequence\"");
+DEFINE_bool(connect, true, "Trim output");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+ using fst::script::VectorFstClass;
+
+
+ string usage = "Intersects two FSAs.\n\n Usage: ";
+ usage += argv[0];
+ usage += " in1.fst in2.fst [out.fst]\n";
+ usage += " Flags: connect\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc < 3 || argc > 4) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in1_name = strcmp(argv[1], "-") == 0 ? "" : argv[1];
+ string in2_name = strcmp(argv[2], "-") == 0 ? "" : argv[2];
+ string out_name = argc > 3 ? argv[3] : "";
+
+ if (in1_name.empty() && in2_name.empty()) {
+ LOG(ERROR) << argv[0] << ": Can't take both inputs from standard input.";
+ return 1;
+ }
+
+ FstClass *ifst1 = FstClass::Read(in1_name);
+ if (!ifst1) return 1;
+ FstClass *ifst2 = FstClass::Read(in2_name);
+ if (!ifst2) return 1;
+
+ VectorFstClass ofst(ifst1->ArcType());
+
+ fst::ComposeFilter compose_filter;
+
+ if (FLAGS_compose_filter == "alt_sequence") {
+ compose_filter = fst::ALT_SEQUENCE_FILTER;
+ } else if (FLAGS_compose_filter == "auto") {
+ compose_filter = fst::AUTO_FILTER;
+ } else if (FLAGS_compose_filter == "match") {
+ compose_filter = fst::MATCH_FILTER;
+ } else if (FLAGS_compose_filter == "sequence") {
+ compose_filter = fst::SEQUENCE_FILTER;
+ } else {
+ LOG(ERROR) << argv[0] << "Unknown compose filter type: "
+ << FLAGS_compose_filter;
+ return 1;
+ }
+
+ fst::IntersectOptions opts(FLAGS_connect, compose_filter);
+
+ s::Intersect(*ifst1, *ifst2, &ofst, opts);
+
+ ofst.Write(out_name);
+
+ return 0;
+}
diff --git a/src/bin/fstinvert.cc b/src/bin/fstinvert.cc
new file mode 100644
index 0000000..f9b348b
--- /dev/null
+++ b/src/bin/fstinvert.cc
@@ -0,0 +1,51 @@
+// fstinvert.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Inverts a transduction.
+//
+
+#include <fst/script/invert.h>
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+ using fst::script::MutableFstClass;
+
+ string usage = "Inverts a transduction.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [in.fst [out.fst]]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_name = (argc > 1 && strcmp(argv[1], "-") != 0) ? argv[1] : "";
+ string out_name = argc > 2 ? argv[2] : "";
+
+ MutableFstClass *fst = MutableFstClass::Read(in_name, true);
+ if (!fst) return 1;
+
+ s::Invert(fst);
+ fst->Write(out_name);
+
+ return 0;
+}
diff --git a/src/bin/fstmap.cc b/src/bin/fstmap.cc
new file mode 100644
index 0000000..231c725
--- /dev/null
+++ b/src/bin/fstmap.cc
@@ -0,0 +1,97 @@
+// fstmap.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Applies an operation to each arc of an FST.
+//
+
+#include <string>
+
+#include <fst/script/map.h>
+
+DEFINE_double(delta, fst::kDelta, "Comparison/quantization delta");
+DEFINE_string(map_type, "identity",
+ "Map operation, one of: \"arc_sum\", \"identity\", \"invert\", "
+ "\"plus (--weight)\", \"quantize (--delta)\", \"rmweight\", "
+ "\"superfinal\", \"times (--weight)\", \"to_log\", \"to_log64\", "
+ "\"to_standard\"");
+DEFINE_string(weight, "", "Weight parameter");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+ using fst::script::MutableFstClass;
+ using fst::script::VectorFstClass;
+
+ string usage = "Applies an operation to each arc of an FST.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [in.fst [out.fst]]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_name = (argc > 1 && strcmp(argv[1], "-") != 0) ? argv[1] : "";
+ string out_name = argc > 2 ? argv[2] : "";
+
+ FstClass *ifst = FstClass::Read(in_name);
+ if (!ifst) return 1;
+
+ s::WeightClass w = !FLAGS_weight.empty() ?
+ s::WeightClass(ifst->WeightType(), FLAGS_weight) :
+ (FLAGS_map_type == "times" ?
+ s::WeightClass::One() : s::WeightClass::Zero());
+
+ s::MapType mt;
+ if (FLAGS_map_type == "arc_sum") {
+ mt = s::ARC_SUM_MAPPER;
+ } else if (FLAGS_map_type == "identity") {
+ mt = s::IDENTITY_MAPPER;
+ } else if (FLAGS_map_type == "invert") {
+ mt = s::INVERT_MAPPER;
+ } else if (FLAGS_map_type == "plus") {
+ mt = s::PLUS_MAPPER;
+ } else if (FLAGS_map_type == "quantize") {
+ mt = s::QUANTIZE_MAPPER;
+ } else if (FLAGS_map_type == "rmweight") {
+ mt = s::RMWEIGHT_MAPPER;
+ } else if (FLAGS_map_type == "superfinal") {
+ mt = s::SUPERFINAL_MAPPER;
+ } else if (FLAGS_map_type == "times") {
+ mt = s::TIMES_MAPPER;
+ } else if (FLAGS_map_type == "to_log") {
+ mt = s::TO_LOG_MAPPER;
+ } else if (FLAGS_map_type == "to_log64") {
+ mt = s::TO_LOG64_MAPPER;
+ } else if (FLAGS_map_type == "to_standard") {
+ mt = s::TO_STD_MAPPER;
+ } else {
+ LOG(ERROR) << argv[0] << ": Unknown map type \""
+ << FLAGS_map_type << "\"\n";
+ return 1;
+ }
+
+ FstClass *ofst = s::Map(*ifst, mt, FLAGS_delta, w);
+
+ ofst->Write(out_name);
+
+ return 0;
+}
diff --git a/src/bin/fstminimize.cc b/src/bin/fstminimize.cc
new file mode 100644
index 0000000..8d9d635
--- /dev/null
+++ b/src/bin/fstminimize.cc
@@ -0,0 +1,67 @@
+// fstminimize.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Minimizes a deterministic FSA.
+//
+
+#include <fst/script/minimize.h>
+
+DEFINE_double(delta, fst::kDelta, "Comparison/quantization delta");
+
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+ using fst::script::MutableFstClass;
+ using fst::script::VectorFstClass;
+
+ string usage = "Minimizes a deterministic FST.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [in.fst [out1.fst [out2.fst]]]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 4) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_name = (argc > 1 && strcmp(argv[1], "-") != 0) ? argv[1] : "";
+ string out1_name = (argc > 2 && strcmp(argv[2], "-") != 0) ? argv[2] : "";
+ string out2_name = (argc > 3 && strcmp(argv[3], "-") != 0) ? argv[3] : "";
+
+ if (out1_name.empty() && out2_name.empty() && argc > 3) {
+ LOG(ERROR) << "Both outputs can't be standard out.";
+ return 1;
+ }
+
+ MutableFstClass *fst1 = MutableFstClass::Read(in_name, true);
+ if (!fst1) return 1;
+
+ MutableFstClass *fst2 = argc > 3 ?
+ new VectorFstClass(fst1->ArcType()) : 0;
+
+ s::Minimize(fst1, fst2, FLAGS_delta);
+
+ fst1->Write(out1_name);
+ if (fst2)
+ fst2->Write(out2_name);
+
+ return 0;
+}
diff --git a/src/bin/fstprint.cc b/src/bin/fstprint.cc
new file mode 100644
index 0000000..b42bb7e
--- /dev/null
+++ b/src/bin/fstprint.cc
@@ -0,0 +1,105 @@
+// fstprint.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Prints out binary FSTs in simple text format used by AT&T
+// (see http://www.research.att.com/projects/mohri/fsm/doc4/fsm.5.html).
+
+#include <fst/script/print.h>
+
+DEFINE_bool(acceptor, false, "Input in acceptor format");
+DEFINE_string(isymbols, "", "Input label symbol table");
+DEFINE_string(osymbols, "", "Output label symbol table");
+DEFINE_string(ssymbols, "", "State label symbol table");
+DEFINE_bool(numeric, false, "Print numeric labels");
+DEFINE_string(save_isymbols, "", "Save input symbol table to file");
+DEFINE_string(save_osymbols, "", "Save output symbol table to file");
+DEFINE_bool(show_weight_one, false,
+ "Print/draw arc weights and final weights equal to Weight::One()");
+DEFINE_bool(allow_negative_labels, false,
+ "Allow negative labels (not recommended; may cause conflicts)");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::ostream;
+ using fst::SymbolTable;
+
+ string usage = "Prints out binary FSTs in simple text format.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [binary.fst [text.fst]]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_name = (argc > 1 && strcmp(argv[1], "-") != 0) ? argv[1] : "";
+ string out_name = argc > 2 ? argv[2] : "";
+
+ s::FstClass *fst = s::FstClass::Read(in_name);
+ if (!fst) return 1;
+
+ ostream *ostrm = &std::cout;
+ string dest = "standard output";
+ if (argc == 3) {
+ dest = argv[2];
+ ostrm = new fst::ofstream(argv[2]);
+ if (!*ostrm) {
+ LOG(ERROR) << argv[0] << ": Open failed, file = " << argv[2];
+ return 1;
+ }
+ }
+ ostrm->precision(9);
+
+ const SymbolTable *isyms = 0, *osyms = 0, *ssyms = 0;
+
+ if (!FLAGS_isymbols.empty() && !FLAGS_numeric) {
+ isyms = SymbolTable::ReadText(FLAGS_isymbols, FLAGS_allow_negative_labels);
+ if (!isyms) exit(1);
+ }
+
+ if (!FLAGS_osymbols.empty() && !FLAGS_numeric) {
+ osyms = SymbolTable::ReadText(FLAGS_osymbols, FLAGS_allow_negative_labels);
+ if (!osyms) exit(1);
+ }
+
+ if (!FLAGS_ssymbols.empty() && !FLAGS_numeric) {
+ ssyms = SymbolTable::ReadText(FLAGS_ssymbols);
+ if (!ssyms) exit(1);
+ }
+
+ if (!isyms && !FLAGS_numeric)
+ isyms = fst->InputSymbols();
+ if (!osyms && !FLAGS_numeric)
+ osyms = fst->OutputSymbols();
+
+ s::PrintFst(*fst, *ostrm, dest, isyms, osyms, ssyms,
+ FLAGS_acceptor, FLAGS_show_weight_one);
+
+ if (isyms && !FLAGS_save_isymbols.empty())
+ isyms->WriteText(FLAGS_save_isymbols);
+
+ if (osyms && !FLAGS_save_osymbols.empty())
+ osyms->WriteText(FLAGS_save_osymbols);
+
+ if (ostrm != &std::cout)
+ delete ostrm;
+ return 0;
+}
diff --git a/src/bin/fstproject.cc b/src/bin/fstproject.cc
new file mode 100644
index 0000000..a67f431
--- /dev/null
+++ b/src/bin/fstproject.cc
@@ -0,0 +1,58 @@
+// fstproject.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to work with FstClass
+//
+// \file
+// Projects a transduction onto its input or output language.
+//
+
+#include <fst/script/project.h>
+
+DEFINE_bool(project_output, false, "Project on output (vs. input)");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+ using fst::script::MutableFstClass;
+
+ string usage = "Projects a transduction onto its input"
+ " or output language.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [in.fst [out.fst]]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_name = (argc > 1 && strcmp(argv[1], "-") != 0) ? argv[1] : "";
+ string out_name = argc > 2 ? argv[2] : "";
+
+ MutableFstClass *fst = MutableFstClass::Read(in_name, true);
+ if (!fst) return 1;
+
+ fst::ProjectType project_type = FLAGS_project_output ?
+ fst::PROJECT_OUTPUT : fst::PROJECT_INPUT;
+
+ s::Project(fst, project_type);
+
+ fst->Write(out_name);
+
+ return 0;
+}
diff --git a/src/bin/fstprune.cc b/src/bin/fstprune.cc
new file mode 100644
index 0000000..2041b7c
--- /dev/null
+++ b/src/bin/fstprune.cc
@@ -0,0 +1,64 @@
+// fstprune.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Prunes states and arcs of an FST w.r.t. the shortest path weight.
+//
+
+#include <fst/script/prune.h>
+
+DEFINE_double(delta, fst::kDelta, "Comparison/quantization delta");
+DEFINE_int64(nstate, fst::kNoStateId, "State number threshold");
+DEFINE_string(weight, "", "Weight threshold");
+
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+ using fst::script::MutableFstClass;
+ using fst::script::WeightClass;
+
+ string usage = "Prunes states and arcs of an FST.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [in.fst [out.fst]]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_name = (argc > 1 && strcmp(argv[1], "-") != 0) ? argv[1] : "";
+ string out_name = argc > 2 ? argv[2] : "";
+
+ MutableFstClass *fst = MutableFstClass::Read(in_name, true);
+ if (!fst) return 1;
+
+ WeightClass weight_threshold = FLAGS_weight.empty() ?
+ WeightClass::Zero() :
+ WeightClass(fst->WeightType(), FLAGS_weight);
+
+ s::PruneOptions opts(weight_threshold, FLAGS_nstate, 0, FLAGS_delta);
+
+ s::Prune(fst, opts);
+
+ fst->Write(out_name);
+
+ return 0;
+}
diff --git a/src/bin/fstpush.cc b/src/bin/fstpush.cc
new file mode 100644
index 0000000..3b849b5
--- /dev/null
+++ b/src/bin/fstpush.cc
@@ -0,0 +1,77 @@
+// fstpush.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+// \file
+// Pushes weights and/or output labels in an FST toward the initial or
+// final states.
+
+#include <fst/script/push.h>
+
+DEFINE_double(delta, fst::kDelta, "Comparison/quantization delta");
+DEFINE_bool(push_weights, false, "Push weights");
+DEFINE_bool(push_labels, false, "Push output labels");
+DEFINE_bool(remove_total_weight, false,
+ "Remove total weight when pushing weights");
+DEFINE_bool(remove_common_affix, false,
+ "Remove common prefix/suffix when pushing labels");
+DEFINE_bool(to_final, false, "Push/reweight to final (vs. to initial) states");
+
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+ using fst::script::VectorFstClass;
+
+ string usage = "Pushes weights and/or olabels in an FST.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [in.fst [out.fst]]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_name = (argc > 1 && strcmp(argv[1], "-") != 0) ? argv[1] : "";
+ string out_name = argc > 2 ? argv[2] : "";
+
+ FstClass *ifst = FstClass::Read(in_name);
+ if (!ifst) return 1;
+
+ uint32 flags = 0;
+ if (FLAGS_push_weights)
+ flags |= fst::kPushWeights;
+ if (FLAGS_push_labels)
+ flags |= fst::kPushLabels;
+ if (FLAGS_remove_total_weight)
+ flags |= fst::kPushRemoveTotalWeight;
+ if (FLAGS_remove_common_affix)
+ flags |= fst::kPushRemoveCommonAffix;
+
+ VectorFstClass ofst(ifst->ArcType());
+
+ if (FLAGS_to_final) {
+ s::Push(*ifst, &ofst, flags, fst::REWEIGHT_TO_FINAL, FLAGS_delta);
+ } else {
+ s::Push(*ifst, &ofst, flags, fst::REWEIGHT_TO_INITIAL, FLAGS_delta);
+ }
+
+ ofst.Write(out_name);
+
+ return 0;
+}
diff --git a/src/bin/fstrandgen.cc b/src/bin/fstrandgen.cc
new file mode 100644
index 0000000..f48d0cf
--- /dev/null
+++ b/src/bin/fstrandgen.cc
@@ -0,0 +1,82 @@
+// fstrandgen.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Generates random paths through an FST.
+
+#include <fst/script/randgen.h>
+
+DEFINE_int32(max_length, INT_MAX, "Maximum path length");
+DEFINE_int64(npath, 1, "Number of paths to generate");
+DEFINE_int32(seed, time(0), "Random seed");
+DEFINE_string(select, "uniform", "Selection type: one of: "
+ " \"uniform\", \"log_prob\" (when appropriate),"
+ " \"fast_log_prob\" (when appropriate)");
+DEFINE_bool(weighted, false,
+ "Output tree weighted by path count vs. unweighted paths");
+DEFINE_bool(remove_total_weight, false,
+ "Remove total weight when output weighted");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+ using fst::script::VectorFstClass;
+
+ string usage = "Generates random paths through an FST.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [in.fst [out.fst]]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ VLOG(1) << argv[0] << ": Seed = " << FLAGS_seed;
+
+ string in_name = (argc > 1 && strcmp(argv[1], "-") != 0) ? argv[1] : "";
+ string out_name = argc > 2 ? argv[2] : "";
+
+ FstClass *ifst = FstClass::Read(in_name);
+ if (!ifst) return 1;
+
+ VectorFstClass ofst(ifst->ArcType());
+
+ s::RandArcSelection ras;
+
+ if (FLAGS_select == "uniform") {
+ ras = s::UNIFORM_ARC_SELECTOR;
+ } else if (FLAGS_select == "log_prob") {
+ ras = s::LOG_PROB_ARC_SELECTOR;
+ } else if (FLAGS_select == "fast_log_prob") {
+ ras = s::FAST_LOG_PROB_ARC_SELECTOR;
+ } else {
+ LOG(ERROR) << argv[0] << ": Unknown selection type \""
+ << FLAGS_select << "\"\n";
+ return 1;
+ }
+
+ s::RandGen(*ifst, &ofst, FLAGS_seed,
+ fst::RandGenOptions<s::RandArcSelection>(
+ ras, FLAGS_max_length, FLAGS_npath,
+ FLAGS_weighted, FLAGS_remove_total_weight));
+
+ ofst.Write(out_name);
+ return 0;
+}
diff --git a/src/bin/fstrelabel.cc b/src/bin/fstrelabel.cc
new file mode 100644
index 0000000..cc86450
--- /dev/null
+++ b/src/bin/fstrelabel.cc
@@ -0,0 +1,117 @@
+// fstrelabel.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: johans@google.com (Johan Schalkwyk)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Relabel input or output space of Fst
+//
+
+#include <string>
+#include <vector>
+using std::vector;
+#include <utility>
+using std::pair; using std::make_pair;
+
+#include <fst/script/relabel.h>
+#include <fst/script/weight-class.h>
+#include <fst/util.h>
+
+DEFINE_string(isymbols, "", "Input label symbol table");
+DEFINE_string(osymbols, "", "Output label symbol table");
+DEFINE_string(relabel_isymbols, "", "Input symbol set to relabel to");
+DEFINE_string(relabel_osymbols, "", "Ouput symbol set to relabel to");
+DEFINE_string(relabel_ipairs, "", "Input relabel pairs (numeric)");
+DEFINE_string(relabel_opairs, "", "Output relabel pairs (numeric)");
+
+DEFINE_bool(allow_negative_labels, false,
+ "Allow negative labels (not recommended; may cause conflicts)");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::SymbolTable;
+ using fst::script::FstClass;
+ using fst::script::MutableFstClass;
+
+ string usage = "Relabels the input and/or the output labels of the FST.\n\n"
+ " Usage: ";
+ usage += argv[0];
+ usage += " [in.fst [out.fst]]\n";
+ usage += " Using SymbolTables flags:\n";
+ usage += " -relabel_isymbols isyms.txt\n";
+ usage += " -relabel_osymbols osyms.txt\n";
+ usage += " Using numeric labels flags:\n";
+ usage += " -relabel_ipairs ipairs.txt\n";
+ usage += " -relabel_opairs opairs.txts\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_name = (argc > 1 && (strcmp(argv[1], "-") != 0)) ? argv[1] : "";
+ string out_name = argc > 2 ? argv[2] : "";
+
+ MutableFstClass *fst = MutableFstClass::Read(in_name, true);
+ if (!fst) return 1;
+
+ // Relabel with symbol tables
+ if (!FLAGS_relabel_isymbols.empty() || !FLAGS_relabel_osymbols.empty()) {
+ bool attach_new_isymbols = (fst->InputSymbols() != 0);
+ const SymbolTable* old_isymbols = FLAGS_isymbols.empty()
+ ? fst->InputSymbols()
+ : SymbolTable::ReadText(FLAGS_isymbols, FLAGS_allow_negative_labels);
+ const SymbolTable* relabel_isymbols = FLAGS_relabel_isymbols.empty()
+ ? NULL
+ : SymbolTable::ReadText(FLAGS_relabel_isymbols,
+ FLAGS_allow_negative_labels);
+
+ bool attach_new_osymbols = (fst->OutputSymbols() != 0);
+ const SymbolTable* old_osymbols = FLAGS_osymbols.empty()
+ ? fst->OutputSymbols()
+ : SymbolTable::ReadText(FLAGS_osymbols, FLAGS_allow_negative_labels);
+ const SymbolTable* relabel_osymbols = FLAGS_relabel_osymbols.empty()
+ ? NULL
+ : SymbolTable::ReadText(FLAGS_relabel_osymbols,
+ FLAGS_allow_negative_labels);
+
+ s::Relabel(fst,
+ old_isymbols, relabel_isymbols, attach_new_isymbols,
+ old_osymbols, relabel_osymbols, attach_new_osymbols);
+ } else {
+ // read in relabel pairs and parse
+ typedef int64 Label;
+ vector<pair<Label, Label> > ipairs;
+ vector<pair<Label, Label> > opairs;
+ if (!FLAGS_relabel_ipairs.empty()) {
+ if(!fst::ReadLabelPairs(FLAGS_relabel_ipairs, &ipairs,
+ FLAGS_allow_negative_labels))
+ return 1;
+ }
+ if (!FLAGS_relabel_opairs.empty()) {
+ if (!fst::ReadLabelPairs(FLAGS_relabel_opairs, &opairs,
+ FLAGS_allow_negative_labels))
+ return 1;
+ }
+ s::Relabel(fst, ipairs, opairs);
+ }
+
+ fst->Write(out_name);
+
+ return 0;
+}
diff --git a/src/bin/fstreplace.cc b/src/bin/fstreplace.cc
new file mode 100644
index 0000000..5d1d770
--- /dev/null
+++ b/src/bin/fstreplace.cc
@@ -0,0 +1,65 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: johans@google.com (Johan Schalkwyk)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+
+#include <fst/script/replace.h>
+
+DEFINE_bool(epsilon_on_replace, false, "Create an espilon arc when recursing");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+ using fst::script::VectorFstClass;
+
+ string usage = "Recursively replaces FST arcs with other FST(s).\n\n"
+ " Usage: ";
+ usage += argv[0];
+ usage += " root.fst rootlabel [rule1.fst label1 ...] [out.fst]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc < 4) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_fname = argv[1];
+ string out_fname = argc % 2 == 0 ? argv[argc - 1] : "";
+
+ FstClass *ifst = FstClass::Read(in_fname);
+ if (!ifst) return 1;
+
+ typedef int64 Label;
+ typedef pair<Label, const s::FstClass* > FstTuple;
+ vector<FstTuple> fst_tuples;
+ Label root = atoll(argv[2]);
+ fst_tuples.push_back(make_pair(root, ifst));
+
+ for (size_t i = 3; i < argc - 1; i += 2) {
+ ifst = s::FstClass::Read(argv[i]);
+ if (!ifst) return 1;
+ Label lab = atoll(argv[i + 1]);
+ fst_tuples.push_back(make_pair(lab, ifst));
+ }
+
+ VectorFstClass ofst(ifst->ArcType());
+ Replace(fst_tuples, &ofst, root, FLAGS_epsilon_on_replace);
+
+ ofst.Write(out_fname);
+
+ return 0;
+}
diff --git a/src/bin/fstreverse.cc b/src/bin/fstreverse.cc
new file mode 100644
index 0000000..7507ffa
--- /dev/null
+++ b/src/bin/fstreverse.cc
@@ -0,0 +1,59 @@
+// fstreverse.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) Changed to use FstClass
+//
+// \file
+// Reverses the paths in an FST.
+//
+
+#include <string>
+
+#include <fst/script/reverse.h>
+#include <fst/script/fst-class.h>
+#include <iostream>
+#include <fstream>
+
+int main(int argc, char **argv) {
+ using fst::script::FstClass;
+ using fst::script::VectorFstClass;
+ using fst::script::Reverse;
+
+ string usage = "Reverses the paths in an FST.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [in.fst [out.fst]]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_name = (argc > 1 && (strcmp(argv[1], "-") != 0)) ? argv[1] : "";
+ string out_name = argc > 2 ? argv[2] : "";
+
+ FstClass *ifst = FstClass::Read(in_name);
+ if (!ifst) return 1;
+
+ VectorFstClass *out = new VectorFstClass(ifst->ArcType());
+
+ Reverse(*ifst, out);
+
+ out->Write(out_name);
+
+ return 0;
+}
diff --git a/src/bin/fstreweight.cc b/src/bin/fstreweight.cc
new file mode 100644
index 0000000..3913a6d
--- /dev/null
+++ b/src/bin/fstreweight.cc
@@ -0,0 +1,63 @@
+// fstreweight.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Reweights an FST.
+//
+
+#include <fst/script/reweight.h>
+#include <fst/script/text-io.h>
+
+DEFINE_bool(to_final, false, "Push/reweight to final (vs. to initial) states");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+ using fst::script::MutableFstClass;
+
+ string usage = "Reweights an FST.\n\n Usage: ";
+ usage += argv[0];
+ usage += " in.fst potential.txt [out.fst]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc < 3 || argc > 4) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_fname = argv[1];
+ string potentials_fname = argv[2];
+ string out_fname = argc > 3 ? argv[3] : "";
+
+ MutableFstClass *fst = MutableFstClass::Read(in_fname, true);
+ if (!fst) return 1;
+
+ vector<s::WeightClass> potential;
+ if (!s::ReadPotentials(fst->WeightType(), potentials_fname, &potential))
+ return 1;
+
+ fst::ReweightType reweight_type = FLAGS_to_final ?
+ fst::REWEIGHT_TO_FINAL :
+ fst::REWEIGHT_TO_INITIAL;
+
+ s::Reweight(fst, potential, reweight_type);
+ fst->Write(out_fname);
+
+ return 0;
+}
diff --git a/src/bin/fstrmepsilon.cc b/src/bin/fstrmepsilon.cc
new file mode 100644
index 0000000..63a80b8
--- /dev/null
+++ b/src/bin/fstrmepsilon.cc
@@ -0,0 +1,89 @@
+// fstrmepsilon.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Removes epsilons from an FST.
+//
+
+#include <fst/script/rmepsilon.h>
+
+DEFINE_bool(connect, true, "Trim output");
+DEFINE_double(delta, fst::kDelta, "Comparison/quantization delta");
+DEFINE_int64(nstate, fst::kNoStateId, "State number threshold");
+DEFINE_bool(reverse, false, "Perform in the reverse direction");
+DEFINE_string(weight, "", "Weight threshold");
+DEFINE_string(queue_type, "auto", "Queue type: one of: \"auto\", "
+ "\"fifo\", \"lifo\", \"shortest\", \"state\", \"top\"");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+ using fst::script::MutableFstClass;
+ using fst::script::VectorFstClass;
+ using fst::script::WeightClass;
+
+ string usage = "Removes epsilons from an FST.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [in.fst [out.fst]]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_fname = (argc > 1 && strcmp(argv[1], "-") != 0) ? argv[1] : "";
+ string out_fname = argc > 2 ? argv[2] : "";
+
+ FstClass *ifst = FstClass::Read(in_fname);
+ if (!ifst) return 1;
+
+ WeightClass weight_threshold = FLAGS_weight.empty() ?
+ WeightClass::Zero() :
+ WeightClass(ifst->WeightType(), FLAGS_weight);
+
+ fst::QueueType qt;
+
+ if (FLAGS_queue_type == "auto") {
+ qt = fst::AUTO_QUEUE;
+ } else if (FLAGS_queue_type == "fifo") {
+ qt = fst::FIFO_QUEUE;
+ } else if (FLAGS_queue_type == "lifo") {
+ qt = fst::LIFO_QUEUE;
+ } else if (FLAGS_queue_type == "shortest") {
+ qt = fst::SHORTEST_FIRST_QUEUE;
+ } else if (FLAGS_queue_type == "state") {
+ qt = fst::STATE_ORDER_QUEUE;
+ } else if (FLAGS_queue_type == "top") {
+ qt = fst::TOP_ORDER_QUEUE;
+ } else {
+ LOG(ERROR) << "Unknown or unsupported queue type: " << FLAGS_queue_type;
+ return 1;
+ }
+
+ s::RmEpsilonOptions opts(qt, FLAGS_delta, FLAGS_connect,
+ weight_threshold, FLAGS_nstate);
+
+ MutableFstClass *ofst = new VectorFstClass(ifst->ArcType());
+ s::RmEpsilon(*ifst, ofst, FLAGS_reverse, opts);
+
+ ofst->Write(out_fname);
+
+ return 0;
+}
diff --git a/src/bin/fstshortestdistance.cc b/src/bin/fstshortestdistance.cc
new file mode 100644
index 0000000..01f5617
--- /dev/null
+++ b/src/bin/fstshortestdistance.cc
@@ -0,0 +1,93 @@
+// fstshortestdistance.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Find shortest distances in an FST.
+
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/script/shortest-distance.h>
+#include <fst/script/text-io.h>
+
+DEFINE_bool(reverse, false, "Perform in the reverse direction");
+DEFINE_double(delta, fst::kDelta, "Comparison/quantization delta");
+DEFINE_int64(nstate, fst::kNoStateId, "State number threhold");
+DEFINE_string(queue_type, "auto", "Queue type: one of: \"auto\", "
+ "\"fifo\", \"lifo\", \"shortest\", \"state\", \"top\"");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+
+ string usage = "Finds shortest distance(s) in an FST.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [in.fst [distance.txt]]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_fname = (argc > 1 && (strcmp(argv[1], "-") != 0)) ? argv[1] : "";
+ string out_fname = argc > 2 ? argv[2] : "";
+
+ FstClass *ifst = FstClass::Read(in_fname);
+ if (!ifst) return 1;
+
+ vector<s::WeightClass> distance;
+
+ fst::QueueType qt;
+
+ if (FLAGS_queue_type == "auto") {
+ qt = fst::AUTO_QUEUE;
+ } else if (FLAGS_queue_type == "fifo") {
+ qt = fst::FIFO_QUEUE;
+ } else if (FLAGS_queue_type == "lifo") {
+ qt = fst::LIFO_QUEUE;
+ } else if (FLAGS_queue_type == "shortest") {
+ qt = fst::SHORTEST_FIRST_QUEUE;
+ } else if (FLAGS_queue_type == "state") {
+ qt = fst::STATE_ORDER_QUEUE;
+ } else if (FLAGS_queue_type == "top") {
+ qt = fst::TOP_ORDER_QUEUE;
+ } else {
+ LOG(ERROR) << "Unknown or unsupported queue type: " << FLAGS_queue_type;
+ return 1;
+ }
+
+ if (FLAGS_reverse && qt != fst::AUTO_QUEUE) {
+ LOG(ERROR) << "Specifying a non-default queue with reverse not supported.";
+ return 1;
+ }
+
+ if (FLAGS_reverse) {
+ s::ShortestDistance(*ifst, &distance, FLAGS_reverse, FLAGS_delta);
+ } else {
+ s::ShortestDistanceOptions opts(qt, s::ANY_ARC_FILTER,
+ FLAGS_nstate, FLAGS_delta);
+ s::ShortestDistance(*ifst, &distance, opts);
+ }
+
+ s::WritePotentials(out_fname, distance);
+
+ return 0;
+}
diff --git a/src/bin/fstshortestpath.cc b/src/bin/fstshortestpath.cc
new file mode 100644
index 0000000..4a9e720
--- /dev/null
+++ b/src/bin/fstshortestpath.cc
@@ -0,0 +1,92 @@
+// fstshortestpath.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Find shortest path(s) in an FST.
+
+#include <fst/script/shortest-path.h>
+
+DEFINE_double(delta, fst::kDelta, "Comparison/quantization delta");
+DEFINE_int64(nshortest, 1, "Return N-shortest paths");
+DEFINE_bool(unique, false, "Return unique strings");
+DEFINE_string(weight, "", "Weight threshold");
+DEFINE_int64(nstate, fst::kNoStateId, "State number threshold");
+DEFINE_string(queue_type, "auto", "Queue type: one of \"auto\", "
+ "\"fifo\", \"lifo\", \"shortest\', \"state\", \"top\"");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+ using fst::script::MutableFstClass;
+ using fst::script::VectorFstClass;
+ using fst::script::WeightClass;
+
+ string usage = "Finds shortest path(s) in an FST.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [in.fst [out.fst]]\n";
+
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_fname = (argc > 1 && (strcmp(argv[1], "-") != 0)) ? argv[1] : "";
+ string out_fname = argc > 2 ? argv[2] : "";
+
+ FstClass *ifst = FstClass::Read(in_fname);
+ if (!ifst) return 1;
+
+ WeightClass weight_threshold = FLAGS_weight.empty() ?
+ WeightClass::Zero() :
+ WeightClass(ifst->WeightType(), FLAGS_weight);
+
+ VectorFstClass ofst(ifst->ArcType());
+ vector<WeightClass> distance;
+
+ fst::QueueType qt;
+
+ if (FLAGS_queue_type == "auto") {
+ qt = fst::AUTO_QUEUE;
+ } else if (FLAGS_queue_type == "fifo") {
+ qt = fst::FIFO_QUEUE;
+ } else if (FLAGS_queue_type == "lifo") {
+ qt = fst::LIFO_QUEUE;
+ } else if (FLAGS_queue_type == "shortest") {
+ qt = fst::SHORTEST_FIRST_QUEUE;
+ } else if (FLAGS_queue_type == "state") {
+ qt = fst::STATE_ORDER_QUEUE;
+ } else if (FLAGS_queue_type == "top") {
+ qt = fst::TOP_ORDER_QUEUE;
+ } else {
+ LOG(ERROR) << "Unknown or unsupported queue type: " << FLAGS_queue_type;
+ return 1;
+ }
+
+ s::ShortestPathOptions opts(
+ qt, FLAGS_nshortest, FLAGS_unique, false, FLAGS_delta,
+ false, weight_threshold, FLAGS_nstate);
+
+ s::ShortestPath(*ifst, &ofst, &distance, opts);
+
+ ofst.Write(out_fname);
+
+ return 0;
+}
diff --git a/src/bin/fstsymbols.cc b/src/bin/fstsymbols.cc
new file mode 100644
index 0000000..c07f1be
--- /dev/null
+++ b/src/bin/fstsymbols.cc
@@ -0,0 +1,117 @@
+// fstsymbols.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Performs operations (set, clear, relabel) on the symbols table
+// attached to the input Fst.
+//
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/verify.h>
+#include <fst/util.h>
+
+DEFINE_string(isymbols, "", "Input label symbol table");
+DEFINE_string(osymbols, "", "Output label symbol table");
+DEFINE_bool(clear_isymbols, false, "Clear input symbol table");
+DEFINE_bool(clear_osymbols, false, "Clear output symbol table");
+DEFINE_string(relabel_ipairs, "", "Input relabel pairs (numeric)");
+DEFINE_string(relabel_opairs, "", "Output relabel pairs (numeric)");
+DEFINE_string(save_isymbols, "", "Save fst file's input symbol table to file");
+DEFINE_string(save_osymbols, "", "Save fst file's output symbol table to file");
+DEFINE_bool(allow_negative_labels, false,
+ "Allow negative labels (not recommended; may cause conflicts)");
+DEFINE_bool(verify, false, "Verify fst properities before saving");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::SymbolTable;
+
+ string usage = "Performs operations (set, clear, relabel) on the symbol"
+ " tables attached to an FST.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [in.fst [out.fst]]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_fname = argc > 1 && strcmp(argv[1], "-") != 0 ? argv[1] : "";
+ string out_fname = argc > 2 ? argv[2] : "";
+
+ s::MutableFstClass *fst = s::MutableFstClass::Read(in_fname, true);
+ if (!fst) return 1;
+
+ if (!FLAGS_save_isymbols.empty()) {
+ const SymbolTable *isyms = fst->InputSymbols();
+ if (isyms) {
+ isyms->WriteText(FLAGS_save_isymbols);
+ } else {
+ LOG(ERROR) << "save isymbols requested but there are no input symbols.";
+ }
+ }
+
+ if (!FLAGS_save_osymbols.empty()) {
+ const SymbolTable *osyms = fst->OutputSymbols();
+ if (osyms) {
+ osyms->WriteText(FLAGS_save_osymbols);
+ } else {
+ LOG(ERROR) << "save osymbols requested but there are no output symbols.";
+ }
+ }
+
+ if (FLAGS_clear_isymbols)
+ fst->SetInputSymbols(0);
+ else if (!FLAGS_isymbols.empty())
+ fst->SetInputSymbols(
+ SymbolTable::ReadText(FLAGS_isymbols, FLAGS_allow_negative_labels));
+
+ if (FLAGS_clear_osymbols)
+ fst->SetOutputSymbols(0);
+ else if (!FLAGS_osymbols.empty())
+ fst->SetOutputSymbols(
+ SymbolTable::ReadText(FLAGS_osymbols, FLAGS_allow_negative_labels));
+
+ if (!FLAGS_relabel_ipairs.empty()) {
+ typedef int64 Label;
+ vector<pair<Label, Label> > ipairs;
+ fst::ReadLabelPairs(FLAGS_relabel_ipairs, &ipairs,
+ FLAGS_allow_negative_labels);
+ SymbolTable *isyms = RelabelSymbolTable(fst->InputSymbols(), ipairs);
+ fst->SetInputSymbols(isyms);
+ delete isyms;
+ }
+
+ if (!FLAGS_relabel_opairs.empty()) {
+ typedef int64 Label;
+ vector<pair<Label, Label> > opairs;
+ fst::ReadLabelPairs(FLAGS_relabel_opairs, &opairs,
+ FLAGS_allow_negative_labels);
+ SymbolTable *osyms = RelabelSymbolTable(fst->OutputSymbols(), opairs);
+ fst->SetOutputSymbols(osyms);
+ delete osyms;
+ }
+
+ if (FLAGS_verify && !s::Verify(*fst))
+ return 1;
+ fst->Write(out_fname);
+ return 0;
+}
diff --git a/src/bin/fstsynchronize.cc b/src/bin/fstsynchronize.cc
new file mode 100644
index 0000000..7137504
--- /dev/null
+++ b/src/bin/fstsynchronize.cc
@@ -0,0 +1,54 @@
+// fstsynchronize.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Synchronizes an FST.
+//
+
+#include <fst/script/synchronize.h>
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+ using fst::script::VectorFstClass;
+
+ string usage = "Synchronizes an FST.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [in.fst [out.fst]]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_name = (argc > 1 && strcmp(argv[1], "-") != 0) ? argv[1] : "";
+ string out_name = argc > 2 ? argv[2] : "";
+
+ FstClass *ifst = FstClass::Read(in_name);
+ if (!ifst) return 1;
+
+ VectorFstClass ofst(ifst->ArcType());
+
+ s::Synchronize(*ifst, &ofst);
+
+ ofst.Write(out_name);
+
+ return 0;
+}
diff --git a/src/bin/fsttopsort.cc b/src/bin/fsttopsort.cc
new file mode 100644
index 0000000..6f04215
--- /dev/null
+++ b/src/bin/fsttopsort.cc
@@ -0,0 +1,53 @@
+// fsttopsort.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Topologically sorts an FST.
+//
+
+#include <fst/script/topsort.h>
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+ using fst::script::FstClass;
+ using fst::script::MutableFstClass;
+
+ string usage = "Topologically sorts an FST.\n\n Usage: ";
+ usage += argv[0];
+ usage += " [in.fst [out.fst]]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_fname = argc > 1 && strcmp(argv[1], "-") != 0 ? argv[1] : "";
+ string out_fname = argc > 2 ? argv[2] : "";
+
+ MutableFstClass *fst = MutableFstClass::Read(in_fname, true);
+ if (!fst) return 1;
+
+ bool acyclic = TopSort(fst);
+ if (!acyclic)
+ LOG(WARNING) << argv[0] << ": Input FST is cyclic";
+ fst->Write(out_fname);
+
+ return 0;
+}
diff --git a/src/bin/fstunion.cc b/src/bin/fstunion.cc
new file mode 100644
index 0000000..41c4f83
--- /dev/null
+++ b/src/bin/fstunion.cc
@@ -0,0 +1,67 @@
+// fstunion.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) - to use FstClass
+//
+// \file
+// Creates the union of two FSTs.
+//
+
+#include <string>
+
+#include <fst/script/union.h>
+#include <iostream>
+#include <fstream>
+
+int main(int argc, char **argv) {
+ using fst::script::FstClass;
+ using fst::script::MutableFstClass;
+ using fst::script::Union;
+
+ string usage = "Creates the union of two FSTs.\n\n Usage: ";
+ usage += argv[0];
+ usage += " in1.fst in2.fst [out.fst]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc < 3 || argc > 4) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in1_name = strcmp(argv[1], "-") != 0 ? argv[1] : "";
+ string in2_name = strcmp(argv[2], "-") != 0 ? argv[2] : "";
+ string out_name = argc > 3 ? argv[3] : "";
+
+ if (in1_name == "" && in2_name == "") {
+ LOG(ERROR) << argv[0]
+ << ": Can't use standard i/o for both inputs.";
+ return 1;
+ }
+
+ MutableFstClass *fst1 = MutableFstClass::Read(in1_name, true);
+ if (!fst1) return 1;
+
+ FstClass *fst2 = FstClass::Read(in2_name);
+ if (!fst2) {
+ return 1;
+ }
+
+ Union(fst1, *fst2);
+ fst1->Write(out_name);
+
+ return 0;
+}
diff --git a/src/extensions/Makefile.am b/src/extensions/Makefile.am
new file mode 100644
index 0000000..64d5483
--- /dev/null
+++ b/src/extensions/Makefile.am
@@ -0,0 +1,21 @@
+if HAVE_COMPACT
+compactdir = compact
+endif
+
+if HAVE_CONST
+constdir = const
+endif
+
+if HAVE_FAR
+fardir = far
+endif
+
+if HAVE_LOOKAHEAD
+lookaheaddir = lookahead
+endif
+
+if HAVE_PDT
+pdtdir = pdt
+endif
+
+SUBDIRS = $(compactdir) $(constdir) $(fardir) $(lookaheaddir) $(pdtdir)
diff --git a/src/extensions/Makefile.in b/src/extensions/Makefile.in
new file mode 100644
index 0000000..43a74c8
--- /dev/null
+++ b/src/extensions/Makefile.in
@@ -0,0 +1,570 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = src/extensions
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_icu.m4 \
+ $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+ $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+ $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h \
+ $(top_builddir)/src/include/fst/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+SOURCES =
+DIST_SOURCES =
+RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
+ html-recursive info-recursive install-data-recursive \
+ install-dvi-recursive install-exec-recursive \
+ install-html-recursive install-info-recursive \
+ install-pdf-recursive install-ps-recursive install-recursive \
+ installcheck-recursive installdirs-recursive pdf-recursive \
+ ps-recursive uninstall-recursive
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
+ distclean-recursive maintainer-clean-recursive
+AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
+ $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
+ distdir
+ETAGS = etags
+CTAGS = ctags
+DIST_SUBDIRS = compact const far lookahead pdt
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+am__relativize = \
+ dir0=`pwd`; \
+ sed_first='s,^\([^/]*\)/.*$$,\1,'; \
+ sed_rest='s,^[^/]*/*,,'; \
+ sed_last='s,^.*/\([^/]*\)$$,\1,'; \
+ sed_butlast='s,/*[^/]*$$,,'; \
+ while test -n "$$dir1"; do \
+ first=`echo "$$dir1" | sed -e "$$sed_first"`; \
+ if test "$$first" != "."; then \
+ if test "$$first" = ".."; then \
+ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
+ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
+ else \
+ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
+ if test "$$first2" = "$$first"; then \
+ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
+ else \
+ dir2="../$$dir2"; \
+ fi; \
+ dir0="$$dir0"/"$$first"; \
+ fi; \
+ fi; \
+ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
+ done; \
+ reldir="$$dir2"
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+ICU_CFLAGS = @ICU_CFLAGS@
+ICU_CONFIG = @ICU_CONFIG@
+ICU_CPPFLAGS = @ICU_CPPFLAGS@
+ICU_CXXFLAGS = @ICU_CXXFLAGS@
+ICU_LIBS = @ICU_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+libfstdir = @libfstdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lt_ECHO = @lt_ECHO@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+@HAVE_COMPACT_TRUE@compactdir = compact
+@HAVE_CONST_TRUE@constdir = const
+@HAVE_FAR_TRUE@fardir = far
+@HAVE_LOOKAHEAD_TRUE@lookaheaddir = lookahead
+@HAVE_PDT_TRUE@pdtdir = pdt
+SUBDIRS = $(compactdir) $(constdir) $(fardir) $(lookaheaddir) $(pdtdir)
+all: all-recursive
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/extensions/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/extensions/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run `make' without going through this Makefile.
+# To change the values of `make' variables: instead of editing Makefiles,
+# (1) if the variable is set in `config.status', edit `config.status'
+# (which will cause the Makefiles to be regenerated when you run `make');
+# (2) otherwise, pass the desired values on the `make' command line.
+$(RECURSIVE_TARGETS):
+ @fail= failcom='exit 1'; \
+ for f in x $$MAKEFLAGS; do \
+ case $$f in \
+ *=* | --[!k]*);; \
+ *k*) failcom='fail=yes';; \
+ esac; \
+ done; \
+ dot_seen=no; \
+ target=`echo $@ | sed s/-recursive//`; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ dot_seen=yes; \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || eval $$failcom; \
+ done; \
+ if test "$$dot_seen" = "no"; then \
+ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+ fi; test -z "$$fail"
+
+$(RECURSIVE_CLEAN_TARGETS):
+ @fail= failcom='exit 1'; \
+ for f in x $$MAKEFLAGS; do \
+ case $$f in \
+ *=* | --[!k]*);; \
+ *k*) failcom='fail=yes';; \
+ esac; \
+ done; \
+ dot_seen=no; \
+ case "$@" in \
+ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+ *) list='$(SUBDIRS)' ;; \
+ esac; \
+ rev=''; for subdir in $$list; do \
+ if test "$$subdir" = "."; then :; else \
+ rev="$$subdir $$rev"; \
+ fi; \
+ done; \
+ rev="$$rev ."; \
+ target=`echo $@ | sed s/-recursive//`; \
+ for subdir in $$rev; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || eval $$failcom; \
+ done && test -z "$$fail"
+tags-recursive:
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
+ done
+ctags-recursive:
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
+ done
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+ include_option=--etags-include; \
+ empty_fix=.; \
+ else \
+ include_option=--include; \
+ empty_fix=; \
+ fi; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ test ! -f $$subdir/TAGS || \
+ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
+ fi; \
+ done; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: CTAGS
+CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+ @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ test -d "$(distdir)/$$subdir" \
+ || $(MKDIR_P) "$(distdir)/$$subdir" \
+ || exit 1; \
+ fi; \
+ done
+ @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
+ $(am__relativize); \
+ new_distdir=$$reldir; \
+ dir1=$$subdir; dir2="$(top_distdir)"; \
+ $(am__relativize); \
+ new_top_distdir=$$reldir; \
+ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
+ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
+ ($(am__cd) $$subdir && \
+ $(MAKE) $(AM_MAKEFLAGS) \
+ top_distdir="$$new_top_distdir" \
+ distdir="$$new_distdir" \
+ am__remove_distdir=: \
+ am__skip_length_check=: \
+ am__skip_mode_fix=: \
+ distdir) \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-recursive
+all-am: Makefile
+installdirs: installdirs-recursive
+installdirs-am:
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-recursive
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-recursive
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-recursive
+ -rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-tags
+
+dvi: dvi-recursive
+
+dvi-am:
+
+html: html-recursive
+
+html-am:
+
+info: info-recursive
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-recursive
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-recursive
+
+install-html-am:
+
+install-info: install-info-recursive
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-recursive
+
+install-pdf-am:
+
+install-ps: install-ps-recursive
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-recursive
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-recursive
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-recursive
+
+pdf-am:
+
+ps: ps-recursive
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \
+ install-am install-strip tags-recursive
+
+.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
+ all all-am check check-am clean clean-generic clean-libtool \
+ ctags ctags-recursive distclean distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-data \
+ install-data-am install-dvi install-dvi-am install-exec \
+ install-exec-am install-html install-html-am install-info \
+ install-info-am install-man install-pdf install-pdf-am \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs installdirs-am maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-generic \
+ mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \
+ uninstall uninstall-am
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/extensions/compact/Makefile.am b/src/extensions/compact/Makefile.am
new file mode 100644
index 0000000..b80cd3e
--- /dev/null
+++ b/src/extensions/compact/Makefile.am
@@ -0,0 +1,52 @@
+AM_CPPFLAGS = -I$(srcdir)/../../include $(ICU_CPPFLAGS)
+
+libfstdir = @libfstdir@
+libfst_LTLIBRARIES = libfstcompact.la compact8_acceptor-fst.la compact8_string-fst.la compact8_unweighted-fst.la compact8_unweighted_acceptor-fst.la compact8_weighted_string-fst.la compact16_acceptor-fst.la compact16_string-fst.la compact16_unweighted-fst.la compact16_unweighted_acceptor-fst.la compact16_weighted_string-fst.la compact64_acceptor-fst.la compact64_string-fst.la compact64_unweighted-fst.la compact64_unweighted_acceptor-fst.la compact64_weighted_string-fst.la
+
+libfstcompact_la_SOURCES = compact8_acceptor-fst.cc compact8_string-fst.cc compact8_unweighted-fst.cc compact8_unweighted_acceptor-fst.cc compact8_weighted_string-fst.cc compact16_acceptor-fst.cc compact16_string-fst.cc compact16_unweighted-fst.cc compact16_unweighted_acceptor-fst.cc compact16_weighted_string-fst.cc compact64_acceptor-fst.cc compact64_string-fst.cc compact64_unweighted-fst.cc compact64_unweighted_acceptor-fst.cc compact64_weighted_string-fst.cc
+libfstcompact_la_LDFLAGS = -version-info 0:0:0
+
+compact8_acceptor_fst_la_SOURCES = compact8_acceptor-fst.cc
+compact8_acceptor_fst_la_LDFLAGS = -module
+
+compact8_string_fst_la_SOURCES = compact8_string-fst.cc
+compact8_string_fst_la_LDFLAGS = -module
+
+compact8_unweighted_fst_la_SOURCES = compact8_unweighted-fst.cc
+compact8_unweighted_fst_la_LDFLAGS = -module
+
+compact8_unweighted_acceptor_fst_la_SOURCES = compact8_unweighted_acceptor-fst.cc
+compact8_unweighted_acceptor_fst_la_LDFLAGS = -module
+
+compact8_weighted_string_fst_la_SOURCES = compact8_weighted_string-fst.cc
+compact8_weighted_string_fst_la_LDFLAGS = -module
+
+compact16_acceptor_fst_la_SOURCES = compact16_acceptor-fst.cc
+compact16_acceptor_fst_la_LDFLAGS = -module
+
+compact16_string_fst_la_SOURCES = compact16_string-fst.cc
+compact16_string_fst_la_LDFLAGS = -module
+
+compact16_unweighted_fst_la_SOURCES = compact16_unweighted-fst.cc
+compact16_unweighted_fst_la_LDFLAGS = -module
+
+compact16_unweighted_acceptor_fst_la_SOURCES = compact16_unweighted_acceptor-fst.cc
+compact16_unweighted_acceptor_fst_la_LDFLAGS = -module
+
+compact16_weighted_string_fst_la_SOURCES = compact16_weighted_string-fst.cc
+compact16_weighted_string_fst_la_LDFLAGS = -module
+
+compact64_acceptor_fst_la_SOURCES = compact64_acceptor-fst.cc
+compact64_acceptor_fst_la_LDFLAGS = -module
+
+compact64_string_fst_la_SOURCES = compact64_string-fst.cc
+compact64_string_fst_la_LDFLAGS = -module
+
+compact64_unweighted_fst_la_SOURCES = compact64_unweighted-fst.cc
+compact64_unweighted_fst_la_LDFLAGS = -module
+
+compact64_unweighted_acceptor_fst_la_SOURCES = compact64_unweighted_acceptor-fst.cc
+compact64_unweighted_acceptor_fst_la_LDFLAGS = -module
+
+compact64_weighted_string_fst_la_SOURCES = compact64_weighted_string-fst.cc
+compact64_weighted_string_fst_la_LDFLAGS = -module
diff --git a/src/extensions/compact/Makefile.in b/src/extensions/compact/Makefile.in
new file mode 100644
index 0000000..fe31e46
--- /dev/null
+++ b/src/extensions/compact/Makefile.in
@@ -0,0 +1,771 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = src/extensions/compact
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_icu.m4 \
+ $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+ $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+ $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h \
+ $(top_builddir)/src/include/fst/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__installdirs = "$(DESTDIR)$(libfstdir)"
+LTLIBRARIES = $(libfst_LTLIBRARIES)
+compact16_acceptor_fst_la_LIBADD =
+am_compact16_acceptor_fst_la_OBJECTS = compact16_acceptor-fst.lo
+compact16_acceptor_fst_la_OBJECTS = \
+ $(am_compact16_acceptor_fst_la_OBJECTS)
+compact16_acceptor_fst_la_LINK = $(LIBTOOL) --tag=CXX \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+ $(AM_CXXFLAGS) $(CXXFLAGS) \
+ $(compact16_acceptor_fst_la_LDFLAGS) $(LDFLAGS) -o $@
+compact16_string_fst_la_LIBADD =
+am_compact16_string_fst_la_OBJECTS = compact16_string-fst.lo
+compact16_string_fst_la_OBJECTS = \
+ $(am_compact16_string_fst_la_OBJECTS)
+compact16_string_fst_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(compact16_string_fst_la_LDFLAGS) $(LDFLAGS) -o \
+ $@
+compact16_unweighted_fst_la_LIBADD =
+am_compact16_unweighted_fst_la_OBJECTS = compact16_unweighted-fst.lo
+compact16_unweighted_fst_la_OBJECTS = \
+ $(am_compact16_unweighted_fst_la_OBJECTS)
+compact16_unweighted_fst_la_LINK = $(LIBTOOL) --tag=CXX \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+ $(AM_CXXFLAGS) $(CXXFLAGS) \
+ $(compact16_unweighted_fst_la_LDFLAGS) $(LDFLAGS) -o $@
+compact16_unweighted_acceptor_fst_la_LIBADD =
+am_compact16_unweighted_acceptor_fst_la_OBJECTS = \
+ compact16_unweighted_acceptor-fst.lo
+compact16_unweighted_acceptor_fst_la_OBJECTS = \
+ $(am_compact16_unweighted_acceptor_fst_la_OBJECTS)
+compact16_unweighted_acceptor_fst_la_LINK = $(LIBTOOL) --tag=CXX \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+ $(AM_CXXFLAGS) $(CXXFLAGS) \
+ $(compact16_unweighted_acceptor_fst_la_LDFLAGS) $(LDFLAGS) -o \
+ $@
+compact16_weighted_string_fst_la_LIBADD =
+am_compact16_weighted_string_fst_la_OBJECTS = \
+ compact16_weighted_string-fst.lo
+compact16_weighted_string_fst_la_OBJECTS = \
+ $(am_compact16_weighted_string_fst_la_OBJECTS)
+compact16_weighted_string_fst_la_LINK = $(LIBTOOL) --tag=CXX \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+ $(AM_CXXFLAGS) $(CXXFLAGS) \
+ $(compact16_weighted_string_fst_la_LDFLAGS) $(LDFLAGS) -o $@
+compact64_acceptor_fst_la_LIBADD =
+am_compact64_acceptor_fst_la_OBJECTS = compact64_acceptor-fst.lo
+compact64_acceptor_fst_la_OBJECTS = \
+ $(am_compact64_acceptor_fst_la_OBJECTS)
+compact64_acceptor_fst_la_LINK = $(LIBTOOL) --tag=CXX \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+ $(AM_CXXFLAGS) $(CXXFLAGS) \
+ $(compact64_acceptor_fst_la_LDFLAGS) $(LDFLAGS) -o $@
+compact64_string_fst_la_LIBADD =
+am_compact64_string_fst_la_OBJECTS = compact64_string-fst.lo
+compact64_string_fst_la_OBJECTS = \
+ $(am_compact64_string_fst_la_OBJECTS)
+compact64_string_fst_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(compact64_string_fst_la_LDFLAGS) $(LDFLAGS) -o \
+ $@
+compact64_unweighted_fst_la_LIBADD =
+am_compact64_unweighted_fst_la_OBJECTS = compact64_unweighted-fst.lo
+compact64_unweighted_fst_la_OBJECTS = \
+ $(am_compact64_unweighted_fst_la_OBJECTS)
+compact64_unweighted_fst_la_LINK = $(LIBTOOL) --tag=CXX \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+ $(AM_CXXFLAGS) $(CXXFLAGS) \
+ $(compact64_unweighted_fst_la_LDFLAGS) $(LDFLAGS) -o $@
+compact64_unweighted_acceptor_fst_la_LIBADD =
+am_compact64_unweighted_acceptor_fst_la_OBJECTS = \
+ compact64_unweighted_acceptor-fst.lo
+compact64_unweighted_acceptor_fst_la_OBJECTS = \
+ $(am_compact64_unweighted_acceptor_fst_la_OBJECTS)
+compact64_unweighted_acceptor_fst_la_LINK = $(LIBTOOL) --tag=CXX \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+ $(AM_CXXFLAGS) $(CXXFLAGS) \
+ $(compact64_unweighted_acceptor_fst_la_LDFLAGS) $(LDFLAGS) -o \
+ $@
+compact64_weighted_string_fst_la_LIBADD =
+am_compact64_weighted_string_fst_la_OBJECTS = \
+ compact64_weighted_string-fst.lo
+compact64_weighted_string_fst_la_OBJECTS = \
+ $(am_compact64_weighted_string_fst_la_OBJECTS)
+compact64_weighted_string_fst_la_LINK = $(LIBTOOL) --tag=CXX \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+ $(AM_CXXFLAGS) $(CXXFLAGS) \
+ $(compact64_weighted_string_fst_la_LDFLAGS) $(LDFLAGS) -o $@
+compact8_acceptor_fst_la_LIBADD =
+am_compact8_acceptor_fst_la_OBJECTS = compact8_acceptor-fst.lo
+compact8_acceptor_fst_la_OBJECTS = \
+ $(am_compact8_acceptor_fst_la_OBJECTS)
+compact8_acceptor_fst_la_LINK = $(LIBTOOL) --tag=CXX \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+ $(AM_CXXFLAGS) $(CXXFLAGS) $(compact8_acceptor_fst_la_LDFLAGS) \
+ $(LDFLAGS) -o $@
+compact8_string_fst_la_LIBADD =
+am_compact8_string_fst_la_OBJECTS = compact8_string-fst.lo
+compact8_string_fst_la_OBJECTS = $(am_compact8_string_fst_la_OBJECTS)
+compact8_string_fst_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(compact8_string_fst_la_LDFLAGS) $(LDFLAGS) -o $@
+compact8_unweighted_fst_la_LIBADD =
+am_compact8_unweighted_fst_la_OBJECTS = compact8_unweighted-fst.lo
+compact8_unweighted_fst_la_OBJECTS = \
+ $(am_compact8_unweighted_fst_la_OBJECTS)
+compact8_unweighted_fst_la_LINK = $(LIBTOOL) --tag=CXX \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+ $(AM_CXXFLAGS) $(CXXFLAGS) \
+ $(compact8_unweighted_fst_la_LDFLAGS) $(LDFLAGS) -o $@
+compact8_unweighted_acceptor_fst_la_LIBADD =
+am_compact8_unweighted_acceptor_fst_la_OBJECTS = \
+ compact8_unweighted_acceptor-fst.lo
+compact8_unweighted_acceptor_fst_la_OBJECTS = \
+ $(am_compact8_unweighted_acceptor_fst_la_OBJECTS)
+compact8_unweighted_acceptor_fst_la_LINK = $(LIBTOOL) --tag=CXX \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+ $(AM_CXXFLAGS) $(CXXFLAGS) \
+ $(compact8_unweighted_acceptor_fst_la_LDFLAGS) $(LDFLAGS) -o \
+ $@
+compact8_weighted_string_fst_la_LIBADD =
+am_compact8_weighted_string_fst_la_OBJECTS = \
+ compact8_weighted_string-fst.lo
+compact8_weighted_string_fst_la_OBJECTS = \
+ $(am_compact8_weighted_string_fst_la_OBJECTS)
+compact8_weighted_string_fst_la_LINK = $(LIBTOOL) --tag=CXX \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+ $(AM_CXXFLAGS) $(CXXFLAGS) \
+ $(compact8_weighted_string_fst_la_LDFLAGS) $(LDFLAGS) -o $@
+libfstcompact_la_LIBADD =
+am_libfstcompact_la_OBJECTS = compact8_acceptor-fst.lo \
+ compact8_string-fst.lo compact8_unweighted-fst.lo \
+ compact8_unweighted_acceptor-fst.lo \
+ compact8_weighted_string-fst.lo compact16_acceptor-fst.lo \
+ compact16_string-fst.lo compact16_unweighted-fst.lo \
+ compact16_unweighted_acceptor-fst.lo \
+ compact16_weighted_string-fst.lo compact64_acceptor-fst.lo \
+ compact64_string-fst.lo compact64_unweighted-fst.lo \
+ compact64_unweighted_acceptor-fst.lo \
+ compact64_weighted_string-fst.lo
+libfstcompact_la_OBJECTS = $(am_libfstcompact_la_OBJECTS)
+libfstcompact_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(libfstcompact_la_LDFLAGS) $(LDFLAGS) -o $@
+DEFAULT_INCLUDES =
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+SOURCES = $(compact16_acceptor_fst_la_SOURCES) \
+ $(compact16_string_fst_la_SOURCES) \
+ $(compact16_unweighted_fst_la_SOURCES) \
+ $(compact16_unweighted_acceptor_fst_la_SOURCES) \
+ $(compact16_weighted_string_fst_la_SOURCES) \
+ $(compact64_acceptor_fst_la_SOURCES) \
+ $(compact64_string_fst_la_SOURCES) \
+ $(compact64_unweighted_fst_la_SOURCES) \
+ $(compact64_unweighted_acceptor_fst_la_SOURCES) \
+ $(compact64_weighted_string_fst_la_SOURCES) \
+ $(compact8_acceptor_fst_la_SOURCES) \
+ $(compact8_string_fst_la_SOURCES) \
+ $(compact8_unweighted_fst_la_SOURCES) \
+ $(compact8_unweighted_acceptor_fst_la_SOURCES) \
+ $(compact8_weighted_string_fst_la_SOURCES) \
+ $(libfstcompact_la_SOURCES)
+DIST_SOURCES = $(compact16_acceptor_fst_la_SOURCES) \
+ $(compact16_string_fst_la_SOURCES) \
+ $(compact16_unweighted_fst_la_SOURCES) \
+ $(compact16_unweighted_acceptor_fst_la_SOURCES) \
+ $(compact16_weighted_string_fst_la_SOURCES) \
+ $(compact64_acceptor_fst_la_SOURCES) \
+ $(compact64_string_fst_la_SOURCES) \
+ $(compact64_unweighted_fst_la_SOURCES) \
+ $(compact64_unweighted_acceptor_fst_la_SOURCES) \
+ $(compact64_weighted_string_fst_la_SOURCES) \
+ $(compact8_acceptor_fst_la_SOURCES) \
+ $(compact8_string_fst_la_SOURCES) \
+ $(compact8_unweighted_fst_la_SOURCES) \
+ $(compact8_unweighted_acceptor_fst_la_SOURCES) \
+ $(compact8_weighted_string_fst_la_SOURCES) \
+ $(libfstcompact_la_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+ICU_CFLAGS = @ICU_CFLAGS@
+ICU_CONFIG = @ICU_CONFIG@
+ICU_CPPFLAGS = @ICU_CPPFLAGS@
+ICU_CXXFLAGS = @ICU_CXXFLAGS@
+ICU_LIBS = @ICU_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+libfstdir = @libfstdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lt_ECHO = @lt_ECHO@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AM_CPPFLAGS = -I$(srcdir)/../../include $(ICU_CPPFLAGS)
+libfst_LTLIBRARIES = libfstcompact.la compact8_acceptor-fst.la compact8_string-fst.la compact8_unweighted-fst.la compact8_unweighted_acceptor-fst.la compact8_weighted_string-fst.la compact16_acceptor-fst.la compact16_string-fst.la compact16_unweighted-fst.la compact16_unweighted_acceptor-fst.la compact16_weighted_string-fst.la compact64_acceptor-fst.la compact64_string-fst.la compact64_unweighted-fst.la compact64_unweighted_acceptor-fst.la compact64_weighted_string-fst.la
+libfstcompact_la_SOURCES = compact8_acceptor-fst.cc compact8_string-fst.cc compact8_unweighted-fst.cc compact8_unweighted_acceptor-fst.cc compact8_weighted_string-fst.cc compact16_acceptor-fst.cc compact16_string-fst.cc compact16_unweighted-fst.cc compact16_unweighted_acceptor-fst.cc compact16_weighted_string-fst.cc compact64_acceptor-fst.cc compact64_string-fst.cc compact64_unweighted-fst.cc compact64_unweighted_acceptor-fst.cc compact64_weighted_string-fst.cc
+libfstcompact_la_LDFLAGS = -version-info 0:0:0
+compact8_acceptor_fst_la_SOURCES = compact8_acceptor-fst.cc
+compact8_acceptor_fst_la_LDFLAGS = -module
+compact8_string_fst_la_SOURCES = compact8_string-fst.cc
+compact8_string_fst_la_LDFLAGS = -module
+compact8_unweighted_fst_la_SOURCES = compact8_unweighted-fst.cc
+compact8_unweighted_fst_la_LDFLAGS = -module
+compact8_unweighted_acceptor_fst_la_SOURCES = compact8_unweighted_acceptor-fst.cc
+compact8_unweighted_acceptor_fst_la_LDFLAGS = -module
+compact8_weighted_string_fst_la_SOURCES = compact8_weighted_string-fst.cc
+compact8_weighted_string_fst_la_LDFLAGS = -module
+compact16_acceptor_fst_la_SOURCES = compact16_acceptor-fst.cc
+compact16_acceptor_fst_la_LDFLAGS = -module
+compact16_string_fst_la_SOURCES = compact16_string-fst.cc
+compact16_string_fst_la_LDFLAGS = -module
+compact16_unweighted_fst_la_SOURCES = compact16_unweighted-fst.cc
+compact16_unweighted_fst_la_LDFLAGS = -module
+compact16_unweighted_acceptor_fst_la_SOURCES = compact16_unweighted_acceptor-fst.cc
+compact16_unweighted_acceptor_fst_la_LDFLAGS = -module
+compact16_weighted_string_fst_la_SOURCES = compact16_weighted_string-fst.cc
+compact16_weighted_string_fst_la_LDFLAGS = -module
+compact64_acceptor_fst_la_SOURCES = compact64_acceptor-fst.cc
+compact64_acceptor_fst_la_LDFLAGS = -module
+compact64_string_fst_la_SOURCES = compact64_string-fst.cc
+compact64_string_fst_la_LDFLAGS = -module
+compact64_unweighted_fst_la_SOURCES = compact64_unweighted-fst.cc
+compact64_unweighted_fst_la_LDFLAGS = -module
+compact64_unweighted_acceptor_fst_la_SOURCES = compact64_unweighted_acceptor-fst.cc
+compact64_unweighted_acceptor_fst_la_LDFLAGS = -module
+compact64_weighted_string_fst_la_SOURCES = compact64_weighted_string-fst.cc
+compact64_weighted_string_fst_la_LDFLAGS = -module
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .cc .lo .o .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/extensions/compact/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/extensions/compact/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-libfstLTLIBRARIES: $(libfst_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ test -z "$(libfstdir)" || $(MKDIR_P) "$(DESTDIR)$(libfstdir)"
+ @list='$(libfst_LTLIBRARIES)'; test -n "$(libfstdir)" || list=; \
+ list2=; for p in $$list; do \
+ if test -f $$p; then \
+ list2="$$list2 $$p"; \
+ else :; fi; \
+ done; \
+ test -z "$$list2" || { \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libfstdir)'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libfstdir)"; \
+ }
+
+uninstall-libfstLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(libfst_LTLIBRARIES)'; test -n "$(libfstdir)" || list=; \
+ for p in $$list; do \
+ $(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libfstdir)/$$f'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libfstdir)/$$f"; \
+ done
+
+clean-libfstLTLIBRARIES:
+ -test -z "$(libfst_LTLIBRARIES)" || rm -f $(libfst_LTLIBRARIES)
+ @list='$(libfst_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test "$$dir" != "$$p" || dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+compact16_acceptor-fst.la: $(compact16_acceptor_fst_la_OBJECTS) $(compact16_acceptor_fst_la_DEPENDENCIES)
+ $(compact16_acceptor_fst_la_LINK) -rpath $(libfstdir) $(compact16_acceptor_fst_la_OBJECTS) $(compact16_acceptor_fst_la_LIBADD) $(LIBS)
+compact16_string-fst.la: $(compact16_string_fst_la_OBJECTS) $(compact16_string_fst_la_DEPENDENCIES)
+ $(compact16_string_fst_la_LINK) -rpath $(libfstdir) $(compact16_string_fst_la_OBJECTS) $(compact16_string_fst_la_LIBADD) $(LIBS)
+compact16_unweighted-fst.la: $(compact16_unweighted_fst_la_OBJECTS) $(compact16_unweighted_fst_la_DEPENDENCIES)
+ $(compact16_unweighted_fst_la_LINK) -rpath $(libfstdir) $(compact16_unweighted_fst_la_OBJECTS) $(compact16_unweighted_fst_la_LIBADD) $(LIBS)
+compact16_unweighted_acceptor-fst.la: $(compact16_unweighted_acceptor_fst_la_OBJECTS) $(compact16_unweighted_acceptor_fst_la_DEPENDENCIES)
+ $(compact16_unweighted_acceptor_fst_la_LINK) -rpath $(libfstdir) $(compact16_unweighted_acceptor_fst_la_OBJECTS) $(compact16_unweighted_acceptor_fst_la_LIBADD) $(LIBS)
+compact16_weighted_string-fst.la: $(compact16_weighted_string_fst_la_OBJECTS) $(compact16_weighted_string_fst_la_DEPENDENCIES)
+ $(compact16_weighted_string_fst_la_LINK) -rpath $(libfstdir) $(compact16_weighted_string_fst_la_OBJECTS) $(compact16_weighted_string_fst_la_LIBADD) $(LIBS)
+compact64_acceptor-fst.la: $(compact64_acceptor_fst_la_OBJECTS) $(compact64_acceptor_fst_la_DEPENDENCIES)
+ $(compact64_acceptor_fst_la_LINK) -rpath $(libfstdir) $(compact64_acceptor_fst_la_OBJECTS) $(compact64_acceptor_fst_la_LIBADD) $(LIBS)
+compact64_string-fst.la: $(compact64_string_fst_la_OBJECTS) $(compact64_string_fst_la_DEPENDENCIES)
+ $(compact64_string_fst_la_LINK) -rpath $(libfstdir) $(compact64_string_fst_la_OBJECTS) $(compact64_string_fst_la_LIBADD) $(LIBS)
+compact64_unweighted-fst.la: $(compact64_unweighted_fst_la_OBJECTS) $(compact64_unweighted_fst_la_DEPENDENCIES)
+ $(compact64_unweighted_fst_la_LINK) -rpath $(libfstdir) $(compact64_unweighted_fst_la_OBJECTS) $(compact64_unweighted_fst_la_LIBADD) $(LIBS)
+compact64_unweighted_acceptor-fst.la: $(compact64_unweighted_acceptor_fst_la_OBJECTS) $(compact64_unweighted_acceptor_fst_la_DEPENDENCIES)
+ $(compact64_unweighted_acceptor_fst_la_LINK) -rpath $(libfstdir) $(compact64_unweighted_acceptor_fst_la_OBJECTS) $(compact64_unweighted_acceptor_fst_la_LIBADD) $(LIBS)
+compact64_weighted_string-fst.la: $(compact64_weighted_string_fst_la_OBJECTS) $(compact64_weighted_string_fst_la_DEPENDENCIES)
+ $(compact64_weighted_string_fst_la_LINK) -rpath $(libfstdir) $(compact64_weighted_string_fst_la_OBJECTS) $(compact64_weighted_string_fst_la_LIBADD) $(LIBS)
+compact8_acceptor-fst.la: $(compact8_acceptor_fst_la_OBJECTS) $(compact8_acceptor_fst_la_DEPENDENCIES)
+ $(compact8_acceptor_fst_la_LINK) -rpath $(libfstdir) $(compact8_acceptor_fst_la_OBJECTS) $(compact8_acceptor_fst_la_LIBADD) $(LIBS)
+compact8_string-fst.la: $(compact8_string_fst_la_OBJECTS) $(compact8_string_fst_la_DEPENDENCIES)
+ $(compact8_string_fst_la_LINK) -rpath $(libfstdir) $(compact8_string_fst_la_OBJECTS) $(compact8_string_fst_la_LIBADD) $(LIBS)
+compact8_unweighted-fst.la: $(compact8_unweighted_fst_la_OBJECTS) $(compact8_unweighted_fst_la_DEPENDENCIES)
+ $(compact8_unweighted_fst_la_LINK) -rpath $(libfstdir) $(compact8_unweighted_fst_la_OBJECTS) $(compact8_unweighted_fst_la_LIBADD) $(LIBS)
+compact8_unweighted_acceptor-fst.la: $(compact8_unweighted_acceptor_fst_la_OBJECTS) $(compact8_unweighted_acceptor_fst_la_DEPENDENCIES)
+ $(compact8_unweighted_acceptor_fst_la_LINK) -rpath $(libfstdir) $(compact8_unweighted_acceptor_fst_la_OBJECTS) $(compact8_unweighted_acceptor_fst_la_LIBADD) $(LIBS)
+compact8_weighted_string-fst.la: $(compact8_weighted_string_fst_la_OBJECTS) $(compact8_weighted_string_fst_la_DEPENDENCIES)
+ $(compact8_weighted_string_fst_la_LINK) -rpath $(libfstdir) $(compact8_weighted_string_fst_la_OBJECTS) $(compact8_weighted_string_fst_la_LIBADD) $(LIBS)
+libfstcompact.la: $(libfstcompact_la_OBJECTS) $(libfstcompact_la_DEPENDENCIES)
+ $(libfstcompact_la_LINK) -rpath $(libfstdir) $(libfstcompact_la_OBJECTS) $(libfstcompact_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compact16_acceptor-fst.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compact16_string-fst.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compact16_unweighted-fst.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compact16_unweighted_acceptor-fst.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compact16_weighted_string-fst.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compact64_acceptor-fst.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compact64_string-fst.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compact64_unweighted-fst.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compact64_unweighted_acceptor-fst.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compact64_weighted_string-fst.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compact8_acceptor-fst.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compact8_string-fst.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compact8_unweighted-fst.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compact8_unweighted_acceptor-fst.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compact8_weighted_string-fst.Plo@am__quote@
+
+.cc.o:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $<
+
+.cc.obj:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cc.lo:
+@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(LTCXXCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+ for dir in "$(DESTDIR)$(libfstdir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libfstLTLIBRARIES clean-libtool \
+ mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-libfstLTLIBRARIES
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-libfstLTLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+ clean-libfstLTLIBRARIES clean-libtool ctags distclean \
+ distclean-compile distclean-generic distclean-libtool \
+ distclean-tags distdir dvi dvi-am html html-am info info-am \
+ install install-am install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am \
+ install-libfstLTLIBRARIES install-man install-pdf \
+ install-pdf-am install-ps install-ps-am install-strip \
+ installcheck installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am uninstall-libfstLTLIBRARIES
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/extensions/compact/compact16_acceptor-fst.cc b/src/extensions/compact/compact16_acceptor-fst.cc
new file mode 100644
index 0000000..a7f750c
--- /dev/null
+++ b/src/extensions/compact/compact16_acceptor-fst.cc
@@ -0,0 +1,31 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/fst.h>
+#include <fst/compact-fst.h>
+
+using fst::FstRegisterer;
+using fst::CompactFst;
+using fst::LogArc;
+using fst::StdArc;
+using fst::AcceptorCompactor;
+
+static FstRegisterer<
+ CompactFst<StdArc, AcceptorCompactor<StdArc>, uint16> >
+CompactFst_StdArc_AcceptorCompactor_uint16_registerer;
+static FstRegisterer<
+ CompactFst<LogArc, AcceptorCompactor<LogArc>, uint16> >
+CompactFst_LogArc_AcceptorCompactor_uint16_registerer;
diff --git a/src/extensions/compact/compact16_string-fst.cc b/src/extensions/compact/compact16_string-fst.cc
new file mode 100644
index 0000000..c54b163
--- /dev/null
+++ b/src/extensions/compact/compact16_string-fst.cc
@@ -0,0 +1,31 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/fst.h>
+#include <fst/compact-fst.h>
+
+using fst::FstRegisterer;
+using fst::CompactFst;
+using fst::LogArc;
+using fst::StdArc;
+using fst::StringCompactor;
+
+static FstRegisterer<
+ CompactFst<StdArc, StringCompactor<StdArc>, uint16> >
+CompactFst_StdArc_StringCompactor_uint16_registerer;
+static FstRegisterer<
+ CompactFst<LogArc, StringCompactor<LogArc>, uint16> >
+CompactFst_LogArc_StringCompactor_uint16_registerer;
diff --git a/src/extensions/compact/compact16_unweighted-fst.cc b/src/extensions/compact/compact16_unweighted-fst.cc
new file mode 100644
index 0000000..beb73eb
--- /dev/null
+++ b/src/extensions/compact/compact16_unweighted-fst.cc
@@ -0,0 +1,31 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/fst.h>
+#include <fst/compact-fst.h>
+
+using fst::FstRegisterer;
+using fst::CompactFst;
+using fst::LogArc;
+using fst::StdArc;
+using fst::UnweightedCompactor;
+
+static FstRegisterer<
+ CompactFst<StdArc, UnweightedCompactor<StdArc>, uint16> >
+CompactFst_StdArc_UnweightedCompactor_uint16_registerer;
+static FstRegisterer<
+ CompactFst<LogArc, UnweightedCompactor<LogArc>, uint16> >
+CompactFst_LogArc_UnweightedCompactor_uint16_registerer;
diff --git a/src/extensions/compact/compact16_unweighted_acceptor-fst.cc b/src/extensions/compact/compact16_unweighted_acceptor-fst.cc
new file mode 100644
index 0000000..8b312ea
--- /dev/null
+++ b/src/extensions/compact/compact16_unweighted_acceptor-fst.cc
@@ -0,0 +1,31 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/fst.h>
+#include <fst/compact-fst.h>
+
+using fst::FstRegisterer;
+using fst::CompactFst;
+using fst::LogArc;
+using fst::StdArc;
+using fst::UnweightedAcceptorCompactor;
+
+static FstRegisterer<
+ CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc>, uint16> >
+CompactFst_StdArc_UnweightedAcceptorCompactor_uint16_registerer;
+static FstRegisterer<
+ CompactFst<LogArc, UnweightedAcceptorCompactor<LogArc>, uint16> >
+CompactFst_LogArc_UnweightedAcceptorCompactor_uint16_registerer;
diff --git a/src/extensions/compact/compact16_weighted_string-fst.cc b/src/extensions/compact/compact16_weighted_string-fst.cc
new file mode 100644
index 0000000..8ef1f07
--- /dev/null
+++ b/src/extensions/compact/compact16_weighted_string-fst.cc
@@ -0,0 +1,31 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/fst.h>
+#include <fst/compact-fst.h>
+
+using fst::FstRegisterer;
+using fst::CompactFst;
+using fst::LogArc;
+using fst::StdArc;
+using fst::WeightedStringCompactor;
+
+static FstRegisterer<
+ CompactFst<StdArc, WeightedStringCompactor<StdArc>, uint16> >
+CompactFst_StdArc_WeightedStringCompactor_uint16_registerer;
+static FstRegisterer<
+ CompactFst<LogArc, WeightedStringCompactor<LogArc>, uint16> >
+CompactFst_LogArc_WeightedStringCompactor_uint16_registerer;
diff --git a/src/extensions/compact/compact64_acceptor-fst.cc b/src/extensions/compact/compact64_acceptor-fst.cc
new file mode 100644
index 0000000..3524845
--- /dev/null
+++ b/src/extensions/compact/compact64_acceptor-fst.cc
@@ -0,0 +1,31 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/fst.h>
+#include <fst/compact-fst.h>
+
+using fst::FstRegisterer;
+using fst::CompactFst;
+using fst::LogArc;
+using fst::StdArc;
+using fst::AcceptorCompactor;
+
+static FstRegisterer<
+ CompactFst<StdArc, AcceptorCompactor<StdArc>, uint64> >
+CompactFst_StdArc_AcceptorCompactor_uint64_registerer;
+static FstRegisterer<
+ CompactFst<LogArc, AcceptorCompactor<LogArc>, uint64> >
+CompactFst_LogArc_AcceptorCompactor_uint64_registerer;
diff --git a/src/extensions/compact/compact64_string-fst.cc b/src/extensions/compact/compact64_string-fst.cc
new file mode 100644
index 0000000..eddb9af
--- /dev/null
+++ b/src/extensions/compact/compact64_string-fst.cc
@@ -0,0 +1,31 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/fst.h>
+#include <fst/compact-fst.h>
+
+using fst::FstRegisterer;
+using fst::CompactFst;
+using fst::LogArc;
+using fst::StdArc;
+using fst::StringCompactor;
+
+static FstRegisterer<
+ CompactFst<StdArc, StringCompactor<StdArc>, uint64> >
+CompactFst_StdArc_StringCompactor_uint64_registerer;
+static FstRegisterer<
+ CompactFst<LogArc, StringCompactor<LogArc>, uint64> >
+CompactFst_LogArc_StringCompactor_uint64_registerer;
diff --git a/src/extensions/compact/compact64_unweighted-fst.cc b/src/extensions/compact/compact64_unweighted-fst.cc
new file mode 100644
index 0000000..3926739
--- /dev/null
+++ b/src/extensions/compact/compact64_unweighted-fst.cc
@@ -0,0 +1,31 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/fst.h>
+#include <fst/compact-fst.h>
+
+using fst::FstRegisterer;
+using fst::CompactFst;
+using fst::LogArc;
+using fst::StdArc;
+using fst::UnweightedCompactor;
+
+static FstRegisterer<
+ CompactFst<StdArc, UnweightedCompactor<StdArc>, uint64> >
+CompactFst_StdArc_UnweightedCompactor_uint64_registerer;
+static FstRegisterer<
+ CompactFst<LogArc, UnweightedCompactor<LogArc>, uint64> >
+CompactFst_LogArc_UnweightedCompactor_uint64_registerer;
diff --git a/src/extensions/compact/compact64_unweighted_acceptor-fst.cc b/src/extensions/compact/compact64_unweighted_acceptor-fst.cc
new file mode 100644
index 0000000..d0705ca
--- /dev/null
+++ b/src/extensions/compact/compact64_unweighted_acceptor-fst.cc
@@ -0,0 +1,31 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/fst.h>
+#include <fst/compact-fst.h>
+
+using fst::FstRegisterer;
+using fst::CompactFst;
+using fst::LogArc;
+using fst::StdArc;
+using fst::UnweightedAcceptorCompactor;
+
+static FstRegisterer<
+ CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc>, uint64> >
+CompactFst_StdArc_UnweightedAcceptorCompactor_uint64_registerer;
+static FstRegisterer<
+ CompactFst<LogArc, UnweightedAcceptorCompactor<LogArc>, uint64> >
+CompactFst_LogArc_UnweightedAcceptorCompactor_uint64_registerer;
diff --git a/src/extensions/compact/compact64_weighted_string-fst.cc b/src/extensions/compact/compact64_weighted_string-fst.cc
new file mode 100644
index 0000000..9508972
--- /dev/null
+++ b/src/extensions/compact/compact64_weighted_string-fst.cc
@@ -0,0 +1,31 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/fst.h>
+#include <fst/compact-fst.h>
+
+using fst::FstRegisterer;
+using fst::CompactFst;
+using fst::LogArc;
+using fst::StdArc;
+using fst::WeightedStringCompactor;
+
+static FstRegisterer<
+ CompactFst<StdArc, WeightedStringCompactor<StdArc>, uint64> >
+CompactFst_StdArc_WeightedStringCompactor_uint64_registerer;
+static FstRegisterer<
+ CompactFst<LogArc, WeightedStringCompactor<LogArc>, uint64> >
+CompactFst_LogArc_WeightedStringCompactor_uint64_registerer;
diff --git a/src/extensions/compact/compact8_acceptor-fst.cc b/src/extensions/compact/compact8_acceptor-fst.cc
new file mode 100644
index 0000000..d43b171
--- /dev/null
+++ b/src/extensions/compact/compact8_acceptor-fst.cc
@@ -0,0 +1,31 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/fst.h>
+#include <fst/compact-fst.h>
+
+using fst::FstRegisterer;
+using fst::CompactFst;
+using fst::LogArc;
+using fst::StdArc;
+using fst::AcceptorCompactor;
+
+static FstRegisterer<
+ CompactFst<StdArc, AcceptorCompactor<StdArc>, uint8> >
+CompactFst_StdArc_AcceptorCompactor_uint8_registerer;
+static FstRegisterer<
+ CompactFst<LogArc, AcceptorCompactor<LogArc>, uint8> >
+CompactFst_LogArc_AcceptorCompactor_uint8_registerer;
diff --git a/src/extensions/compact/compact8_string-fst.cc b/src/extensions/compact/compact8_string-fst.cc
new file mode 100644
index 0000000..8899bc7
--- /dev/null
+++ b/src/extensions/compact/compact8_string-fst.cc
@@ -0,0 +1,31 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/fst.h>
+#include <fst/compact-fst.h>
+
+using fst::FstRegisterer;
+using fst::CompactFst;
+using fst::LogArc;
+using fst::StdArc;
+using fst::StringCompactor;
+
+static FstRegisterer<
+ CompactFst<StdArc, StringCompactor<StdArc>, uint8> >
+CompactFst_StdArc_StringCompactor_uint8_registerer;
+static FstRegisterer<
+ CompactFst<LogArc, StringCompactor<LogArc>, uint8> >
+CompactFst_LogArc_StringCompactor_uint8_registerer;
diff --git a/src/extensions/compact/compact8_unweighted-fst.cc b/src/extensions/compact/compact8_unweighted-fst.cc
new file mode 100644
index 0000000..2b784bc
--- /dev/null
+++ b/src/extensions/compact/compact8_unweighted-fst.cc
@@ -0,0 +1,31 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/fst.h>
+#include <fst/compact-fst.h>
+
+using fst::FstRegisterer;
+using fst::CompactFst;
+using fst::LogArc;
+using fst::StdArc;
+using fst::UnweightedCompactor;
+
+static FstRegisterer<
+ CompactFst<StdArc, UnweightedCompactor<StdArc>, uint8> >
+CompactFst_StdArc_UnweightedCompactor_uint8_registerer;
+static FstRegisterer<
+ CompactFst<LogArc, UnweightedCompactor<LogArc>, uint8> >
+CompactFst_LogArc_UnweightedCompactor_uint8_registerer;
diff --git a/src/extensions/compact/compact8_unweighted_acceptor-fst.cc b/src/extensions/compact/compact8_unweighted_acceptor-fst.cc
new file mode 100644
index 0000000..227d671
--- /dev/null
+++ b/src/extensions/compact/compact8_unweighted_acceptor-fst.cc
@@ -0,0 +1,31 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/fst.h>
+#include <fst/compact-fst.h>
+
+using fst::FstRegisterer;
+using fst::CompactFst;
+using fst::LogArc;
+using fst::StdArc;
+using fst::UnweightedAcceptorCompactor;
+
+static FstRegisterer<
+ CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc>, uint8> >
+CompactFst_StdArc_UnweightedAcceptorCompactor_uint8_registerer;
+static FstRegisterer<
+ CompactFst<LogArc, UnweightedAcceptorCompactor<LogArc>, uint8> >
+CompactFst_LogArc_UnweightedAcceptorCompactor_uint8_registerer;
diff --git a/src/extensions/compact/compact8_weighted_string-fst.cc b/src/extensions/compact/compact8_weighted_string-fst.cc
new file mode 100644
index 0000000..552017e
--- /dev/null
+++ b/src/extensions/compact/compact8_weighted_string-fst.cc
@@ -0,0 +1,31 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/fst.h>
+#include <fst/compact-fst.h>
+
+using fst::FstRegisterer;
+using fst::CompactFst;
+using fst::LogArc;
+using fst::StdArc;
+using fst::WeightedStringCompactor;
+
+static FstRegisterer<
+ CompactFst<StdArc, WeightedStringCompactor<StdArc>, uint8> >
+CompactFst_StdArc_WeightedStringCompactor_uint8_registerer;
+static FstRegisterer<
+ CompactFst<LogArc, WeightedStringCompactor<LogArc>, uint8> >
+CompactFst_LogArc_WeightedStringCompactor_uint8_registerer;
diff --git a/src/extensions/const/Makefile.am b/src/extensions/const/Makefile.am
new file mode 100644
index 0000000..08a97d5
--- /dev/null
+++ b/src/extensions/const/Makefile.am
@@ -0,0 +1,16 @@
+AM_CPPFLAGS = -I$(srcdir)/../../include $(ICU_CPPFLAGS)
+
+libfstdir = @libfstdir@
+libfst_LTLIBRARIES = libfstconst.la const8-fst.la const16-fst.la const64-fst.la
+
+libfstconst_la_SOURCES = const8-fst.cc const16-fst.cc const64-fst.cc
+libfstconst_la_LDFLAGS = -version-info 0:0:0
+
+const8_fst_la_SOURCES = const8-fst.cc
+const8_fst_la_LDFLAGS = -module
+
+const16_fst_la_SOURCES = const16-fst.cc
+const16_fst_la_LDFLAGS = -module
+
+const64_fst_la_SOURCES = const64-fst.cc
+const64_fst_la_LDFLAGS = -module
diff --git a/src/extensions/const/Makefile.in b/src/extensions/const/Makefile.in
new file mode 100644
index 0000000..906750a
--- /dev/null
+++ b/src/extensions/const/Makefile.in
@@ -0,0 +1,566 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = src/extensions/const
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_icu.m4 \
+ $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+ $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+ $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h \
+ $(top_builddir)/src/include/fst/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__installdirs = "$(DESTDIR)$(libfstdir)"
+LTLIBRARIES = $(libfst_LTLIBRARIES)
+const16_fst_la_LIBADD =
+am_const16_fst_la_OBJECTS = const16-fst.lo
+const16_fst_la_OBJECTS = $(am_const16_fst_la_OBJECTS)
+const16_fst_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(const16_fst_la_LDFLAGS) $(LDFLAGS) -o $@
+const64_fst_la_LIBADD =
+am_const64_fst_la_OBJECTS = const64-fst.lo
+const64_fst_la_OBJECTS = $(am_const64_fst_la_OBJECTS)
+const64_fst_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(const64_fst_la_LDFLAGS) $(LDFLAGS) -o $@
+const8_fst_la_LIBADD =
+am_const8_fst_la_OBJECTS = const8-fst.lo
+const8_fst_la_OBJECTS = $(am_const8_fst_la_OBJECTS)
+const8_fst_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(const8_fst_la_LDFLAGS) $(LDFLAGS) -o $@
+libfstconst_la_LIBADD =
+am_libfstconst_la_OBJECTS = const8-fst.lo const16-fst.lo \
+ const64-fst.lo
+libfstconst_la_OBJECTS = $(am_libfstconst_la_OBJECTS)
+libfstconst_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(libfstconst_la_LDFLAGS) $(LDFLAGS) -o $@
+DEFAULT_INCLUDES =
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+SOURCES = $(const16_fst_la_SOURCES) $(const64_fst_la_SOURCES) \
+ $(const8_fst_la_SOURCES) $(libfstconst_la_SOURCES)
+DIST_SOURCES = $(const16_fst_la_SOURCES) $(const64_fst_la_SOURCES) \
+ $(const8_fst_la_SOURCES) $(libfstconst_la_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+ICU_CFLAGS = @ICU_CFLAGS@
+ICU_CONFIG = @ICU_CONFIG@
+ICU_CPPFLAGS = @ICU_CPPFLAGS@
+ICU_CXXFLAGS = @ICU_CXXFLAGS@
+ICU_LIBS = @ICU_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+libfstdir = @libfstdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lt_ECHO = @lt_ECHO@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AM_CPPFLAGS = -I$(srcdir)/../../include $(ICU_CPPFLAGS)
+libfst_LTLIBRARIES = libfstconst.la const8-fst.la const16-fst.la const64-fst.la
+libfstconst_la_SOURCES = const8-fst.cc const16-fst.cc const64-fst.cc
+libfstconst_la_LDFLAGS = -version-info 0:0:0
+const8_fst_la_SOURCES = const8-fst.cc
+const8_fst_la_LDFLAGS = -module
+const16_fst_la_SOURCES = const16-fst.cc
+const16_fst_la_LDFLAGS = -module
+const64_fst_la_SOURCES = const64-fst.cc
+const64_fst_la_LDFLAGS = -module
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .cc .lo .o .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/extensions/const/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/extensions/const/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-libfstLTLIBRARIES: $(libfst_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ test -z "$(libfstdir)" || $(MKDIR_P) "$(DESTDIR)$(libfstdir)"
+ @list='$(libfst_LTLIBRARIES)'; test -n "$(libfstdir)" || list=; \
+ list2=; for p in $$list; do \
+ if test -f $$p; then \
+ list2="$$list2 $$p"; \
+ else :; fi; \
+ done; \
+ test -z "$$list2" || { \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libfstdir)'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libfstdir)"; \
+ }
+
+uninstall-libfstLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(libfst_LTLIBRARIES)'; test -n "$(libfstdir)" || list=; \
+ for p in $$list; do \
+ $(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libfstdir)/$$f'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libfstdir)/$$f"; \
+ done
+
+clean-libfstLTLIBRARIES:
+ -test -z "$(libfst_LTLIBRARIES)" || rm -f $(libfst_LTLIBRARIES)
+ @list='$(libfst_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test "$$dir" != "$$p" || dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+const16-fst.la: $(const16_fst_la_OBJECTS) $(const16_fst_la_DEPENDENCIES)
+ $(const16_fst_la_LINK) -rpath $(libfstdir) $(const16_fst_la_OBJECTS) $(const16_fst_la_LIBADD) $(LIBS)
+const64-fst.la: $(const64_fst_la_OBJECTS) $(const64_fst_la_DEPENDENCIES)
+ $(const64_fst_la_LINK) -rpath $(libfstdir) $(const64_fst_la_OBJECTS) $(const64_fst_la_LIBADD) $(LIBS)
+const8-fst.la: $(const8_fst_la_OBJECTS) $(const8_fst_la_DEPENDENCIES)
+ $(const8_fst_la_LINK) -rpath $(libfstdir) $(const8_fst_la_OBJECTS) $(const8_fst_la_LIBADD) $(LIBS)
+libfstconst.la: $(libfstconst_la_OBJECTS) $(libfstconst_la_DEPENDENCIES)
+ $(libfstconst_la_LINK) -rpath $(libfstdir) $(libfstconst_la_OBJECTS) $(libfstconst_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/const16-fst.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/const64-fst.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/const8-fst.Plo@am__quote@
+
+.cc.o:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $<
+
+.cc.obj:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cc.lo:
+@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(LTCXXCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+ for dir in "$(DESTDIR)$(libfstdir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libfstLTLIBRARIES clean-libtool \
+ mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-libfstLTLIBRARIES
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-libfstLTLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+ clean-libfstLTLIBRARIES clean-libtool ctags distclean \
+ distclean-compile distclean-generic distclean-libtool \
+ distclean-tags distdir dvi dvi-am html html-am info info-am \
+ install install-am install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am \
+ install-libfstLTLIBRARIES install-man install-pdf \
+ install-pdf-am install-ps install-ps-am install-strip \
+ installcheck installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am uninstall-libfstLTLIBRARIES
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/extensions/const/const16-fst.cc b/src/extensions/const/const16-fst.cc
new file mode 100644
index 0000000..8eece5a
--- /dev/null
+++ b/src/extensions/const/const16-fst.cc
@@ -0,0 +1,32 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/fst.h>
+#include <fst/const-fst.h>
+
+using fst::FstRegisterer;
+using fst::ConstFst;
+using fst::LogArc;
+using fst::Log64Arc;
+using fst::StdArc;
+
+// Register ConstFst for common arcs types with uint16 size type
+static FstRegisterer< ConstFst<StdArc, uint16> >
+ ConstFst_StdArc_uint16_registerer;
+static FstRegisterer< ConstFst<LogArc, uint16> >
+ ConstFst_LogArc_uint16_registerer;
+static FstRegisterer< ConstFst<Log64Arc, uint16> >
+ ConstFst_Log64Arc_uint16_registerer;
diff --git a/src/extensions/const/const64-fst.cc b/src/extensions/const/const64-fst.cc
new file mode 100644
index 0000000..0f635e3
--- /dev/null
+++ b/src/extensions/const/const64-fst.cc
@@ -0,0 +1,32 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/fst.h>
+#include <fst/const-fst.h>
+
+using fst::FstRegisterer;
+using fst::ConstFst;
+using fst::LogArc;
+using fst::Log64Arc;
+using fst::StdArc;
+
+// Register ConstFst for common arcs types with uint64 size type
+static FstRegisterer< ConstFst<StdArc, uint64> >
+ ConstFst_StdArc_uint64_registerer;
+static FstRegisterer< ConstFst<LogArc, uint64> >
+ ConstFst_LogArc_uint64_registerer;
+static FstRegisterer< ConstFst<Log64Arc, uint64> >
+ ConstFst_Log64Arc_uint64_registerer;
diff --git a/src/extensions/const/const8-fst.cc b/src/extensions/const/const8-fst.cc
new file mode 100644
index 0000000..33f9ce7
--- /dev/null
+++ b/src/extensions/const/const8-fst.cc
@@ -0,0 +1,32 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/fst.h>
+#include <fst/const-fst.h>
+
+using fst::FstRegisterer;
+using fst::ConstFst;
+using fst::LogArc;
+using fst::Log64Arc;
+using fst::StdArc;
+
+// Register ConstFst for common arcs types with uint8 size type
+static FstRegisterer< ConstFst<StdArc, uint8> >
+ ConstFst_StdArc_uint8_registerer;
+static FstRegisterer< ConstFst<LogArc, uint8> >
+ ConstFst_LogArc_uint8_registerer;
+static FstRegisterer< ConstFst<Log64Arc, uint8> >
+ ConstFst_Log64Arc_uint8_registerer;
diff --git a/src/extensions/far/Makefile.am b/src/extensions/far/Makefile.am
new file mode 100644
index 0000000..61f7cc8
--- /dev/null
+++ b/src/extensions/far/Makefile.am
@@ -0,0 +1,34 @@
+AM_CPPFLAGS = -I$(srcdir)/../../include $(ICU_CPPFLAGS)
+
+libfstdir = @libfstdir@
+
+if HAVE_SCRIPT
+libfst_LTLIBRARIES = libfstfarscript.la libfstfar.la
+else
+libfst_LTLIBRARIES = libfstfar.la
+endif
+
+libfstfar_la_SOURCES = sttable.cc stlist.cc
+libfstfar_la_LDFLAGS = -version-info 0:0:0
+
+if HAVE_SCRIPT
+libfstfarscript_la_SOURCES = farscript.cc compile-strings.cc main.cc
+libfstfarscript_la_LDFLAGS = -version-info 0:0:0
+endif
+
+if HAVE_BIN
+bin_PROGRAMS = farcompilestrings farcreate farextract farinfo farprintstrings
+
+LDADD = libfstfarscript.la libfstfar.la ../../script/libfstscript.la \
+ ../../lib/libfst.la -lm -ldl
+
+farcompilestrings_SOURCES = farcompilestrings.cc
+
+farcreate_SOURCES = farcreate.cc
+
+farextract_SOURCES = farextract.cc
+
+farinfo_SOURCES = farinfo.cc
+
+farprintstrings_SOURCES = farprintstrings.cc
+endif
diff --git a/src/extensions/far/Makefile.in b/src/extensions/far/Makefile.in
new file mode 100644
index 0000000..8e05cab
--- /dev/null
+++ b/src/extensions/far/Makefile.in
@@ -0,0 +1,673 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+@HAVE_BIN_TRUE@bin_PROGRAMS = farcompilestrings$(EXEEXT) \
+@HAVE_BIN_TRUE@ farcreate$(EXEEXT) farextract$(EXEEXT) \
+@HAVE_BIN_TRUE@ farinfo$(EXEEXT) farprintstrings$(EXEEXT)
+subdir = src/extensions/far
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_icu.m4 \
+ $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+ $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+ $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h \
+ $(top_builddir)/src/include/fst/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__installdirs = "$(DESTDIR)$(libfstdir)" "$(DESTDIR)$(bindir)"
+LTLIBRARIES = $(libfst_LTLIBRARIES)
+libfstfar_la_LIBADD =
+am_libfstfar_la_OBJECTS = sttable.lo stlist.lo
+libfstfar_la_OBJECTS = $(am_libfstfar_la_OBJECTS)
+libfstfar_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(libfstfar_la_LDFLAGS) $(LDFLAGS) -o $@
+@HAVE_SCRIPT_FALSE@am_libfstfar_la_rpath = -rpath $(libfstdir)
+@HAVE_SCRIPT_TRUE@am_libfstfar_la_rpath = -rpath $(libfstdir)
+libfstfarscript_la_LIBADD =
+am__libfstfarscript_la_SOURCES_DIST = farscript.cc compile-strings.cc \
+ main.cc
+@HAVE_SCRIPT_TRUE@am_libfstfarscript_la_OBJECTS = farscript.lo \
+@HAVE_SCRIPT_TRUE@ compile-strings.lo main.lo
+libfstfarscript_la_OBJECTS = $(am_libfstfarscript_la_OBJECTS)
+libfstfarscript_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(libfstfarscript_la_LDFLAGS) $(LDFLAGS) -o $@
+@HAVE_SCRIPT_TRUE@am_libfstfarscript_la_rpath = -rpath $(libfstdir)
+PROGRAMS = $(bin_PROGRAMS)
+am__farcompilestrings_SOURCES_DIST = farcompilestrings.cc
+@HAVE_BIN_TRUE@am_farcompilestrings_OBJECTS = \
+@HAVE_BIN_TRUE@ farcompilestrings.$(OBJEXT)
+farcompilestrings_OBJECTS = $(am_farcompilestrings_OBJECTS)
+farcompilestrings_LDADD = $(LDADD)
+@HAVE_BIN_TRUE@farcompilestrings_DEPENDENCIES = libfstfarscript.la \
+@HAVE_BIN_TRUE@ libfstfar.la ../../script/libfstscript.la \
+@HAVE_BIN_TRUE@ ../../lib/libfst.la
+am__farcreate_SOURCES_DIST = farcreate.cc
+@HAVE_BIN_TRUE@am_farcreate_OBJECTS = farcreate.$(OBJEXT)
+farcreate_OBJECTS = $(am_farcreate_OBJECTS)
+farcreate_LDADD = $(LDADD)
+@HAVE_BIN_TRUE@farcreate_DEPENDENCIES = libfstfarscript.la \
+@HAVE_BIN_TRUE@ libfstfar.la ../../script/libfstscript.la \
+@HAVE_BIN_TRUE@ ../../lib/libfst.la
+am__farextract_SOURCES_DIST = farextract.cc
+@HAVE_BIN_TRUE@am_farextract_OBJECTS = farextract.$(OBJEXT)
+farextract_OBJECTS = $(am_farextract_OBJECTS)
+farextract_LDADD = $(LDADD)
+@HAVE_BIN_TRUE@farextract_DEPENDENCIES = libfstfarscript.la \
+@HAVE_BIN_TRUE@ libfstfar.la ../../script/libfstscript.la \
+@HAVE_BIN_TRUE@ ../../lib/libfst.la
+am__farinfo_SOURCES_DIST = farinfo.cc
+@HAVE_BIN_TRUE@am_farinfo_OBJECTS = farinfo.$(OBJEXT)
+farinfo_OBJECTS = $(am_farinfo_OBJECTS)
+farinfo_LDADD = $(LDADD)
+@HAVE_BIN_TRUE@farinfo_DEPENDENCIES = libfstfarscript.la libfstfar.la \
+@HAVE_BIN_TRUE@ ../../script/libfstscript.la \
+@HAVE_BIN_TRUE@ ../../lib/libfst.la
+am__farprintstrings_SOURCES_DIST = farprintstrings.cc
+@HAVE_BIN_TRUE@am_farprintstrings_OBJECTS = farprintstrings.$(OBJEXT)
+farprintstrings_OBJECTS = $(am_farprintstrings_OBJECTS)
+farprintstrings_LDADD = $(LDADD)
+@HAVE_BIN_TRUE@farprintstrings_DEPENDENCIES = libfstfarscript.la \
+@HAVE_BIN_TRUE@ libfstfar.la ../../script/libfstscript.la \
+@HAVE_BIN_TRUE@ ../../lib/libfst.la
+DEFAULT_INCLUDES =
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+SOURCES = $(libfstfar_la_SOURCES) $(libfstfarscript_la_SOURCES) \
+ $(farcompilestrings_SOURCES) $(farcreate_SOURCES) \
+ $(farextract_SOURCES) $(farinfo_SOURCES) \
+ $(farprintstrings_SOURCES)
+DIST_SOURCES = $(libfstfar_la_SOURCES) \
+ $(am__libfstfarscript_la_SOURCES_DIST) \
+ $(am__farcompilestrings_SOURCES_DIST) \
+ $(am__farcreate_SOURCES_DIST) $(am__farextract_SOURCES_DIST) \
+ $(am__farinfo_SOURCES_DIST) \
+ $(am__farprintstrings_SOURCES_DIST)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+ICU_CFLAGS = @ICU_CFLAGS@
+ICU_CONFIG = @ICU_CONFIG@
+ICU_CPPFLAGS = @ICU_CPPFLAGS@
+ICU_CXXFLAGS = @ICU_CXXFLAGS@
+ICU_LIBS = @ICU_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+libfstdir = @libfstdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lt_ECHO = @lt_ECHO@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AM_CPPFLAGS = -I$(srcdir)/../../include $(ICU_CPPFLAGS)
+@HAVE_SCRIPT_FALSE@libfst_LTLIBRARIES = libfstfar.la
+@HAVE_SCRIPT_TRUE@libfst_LTLIBRARIES = libfstfarscript.la libfstfar.la
+libfstfar_la_SOURCES = sttable.cc stlist.cc
+libfstfar_la_LDFLAGS = -version-info 0:0:0
+@HAVE_SCRIPT_TRUE@libfstfarscript_la_SOURCES = farscript.cc compile-strings.cc main.cc
+@HAVE_SCRIPT_TRUE@libfstfarscript_la_LDFLAGS = -version-info 0:0:0
+@HAVE_BIN_TRUE@LDADD = libfstfarscript.la libfstfar.la ../../script/libfstscript.la \
+@HAVE_BIN_TRUE@ ../../lib/libfst.la -lm -ldl
+
+@HAVE_BIN_TRUE@farcompilestrings_SOURCES = farcompilestrings.cc
+@HAVE_BIN_TRUE@farcreate_SOURCES = farcreate.cc
+@HAVE_BIN_TRUE@farextract_SOURCES = farextract.cc
+@HAVE_BIN_TRUE@farinfo_SOURCES = farinfo.cc
+@HAVE_BIN_TRUE@farprintstrings_SOURCES = farprintstrings.cc
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .cc .lo .o .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/extensions/far/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/extensions/far/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-libfstLTLIBRARIES: $(libfst_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ test -z "$(libfstdir)" || $(MKDIR_P) "$(DESTDIR)$(libfstdir)"
+ @list='$(libfst_LTLIBRARIES)'; test -n "$(libfstdir)" || list=; \
+ list2=; for p in $$list; do \
+ if test -f $$p; then \
+ list2="$$list2 $$p"; \
+ else :; fi; \
+ done; \
+ test -z "$$list2" || { \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libfstdir)'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libfstdir)"; \
+ }
+
+uninstall-libfstLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(libfst_LTLIBRARIES)'; test -n "$(libfstdir)" || list=; \
+ for p in $$list; do \
+ $(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libfstdir)/$$f'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libfstdir)/$$f"; \
+ done
+
+clean-libfstLTLIBRARIES:
+ -test -z "$(libfst_LTLIBRARIES)" || rm -f $(libfst_LTLIBRARIES)
+ @list='$(libfst_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test "$$dir" != "$$p" || dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+libfstfar.la: $(libfstfar_la_OBJECTS) $(libfstfar_la_DEPENDENCIES)
+ $(libfstfar_la_LINK) $(am_libfstfar_la_rpath) $(libfstfar_la_OBJECTS) $(libfstfar_la_LIBADD) $(LIBS)
+libfstfarscript.la: $(libfstfarscript_la_OBJECTS) $(libfstfarscript_la_DEPENDENCIES)
+ $(libfstfarscript_la_LINK) $(am_libfstfarscript_la_rpath) $(libfstfarscript_la_OBJECTS) $(libfstfarscript_la_LIBADD) $(LIBS)
+install-binPROGRAMS: $(bin_PROGRAMS)
+ @$(NORMAL_INSTALL)
+ test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
+ @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed 's/$(EXEEXT)$$//' | \
+ while read p p1; do if test -f $$p || test -f $$p1; \
+ then echo "$$p"; echo "$$p"; else :; fi; \
+ done | \
+ sed -e 'p;s,.*/,,;n;h' -e 's|.*|.|' \
+ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+ sed 'N;N;N;s,\n, ,g' | \
+ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+ if ($$2 == $$4) files[d] = files[d] " " $$1; \
+ else { print "f", $$3 "/" $$4, $$1; } } \
+ END { for (d in files) print "f", d, files[d] }' | \
+ while read type dir files; do \
+ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+ test -z "$$files" || { \
+ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+ } \
+ ; done
+
+uninstall-binPROGRAMS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+ files=`for p in $$list; do echo "$$p"; done | \
+ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+ -e 's/$$/$(EXEEXT)/' `; \
+ test -n "$$list" || exit 0; \
+ echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+ cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+ @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
+ echo " rm -f" $$list; \
+ rm -f $$list || exit $$?; \
+ test -n "$(EXEEXT)" || exit 0; \
+ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f" $$list; \
+ rm -f $$list
+farcompilestrings$(EXEEXT): $(farcompilestrings_OBJECTS) $(farcompilestrings_DEPENDENCIES)
+ @rm -f farcompilestrings$(EXEEXT)
+ $(CXXLINK) $(farcompilestrings_OBJECTS) $(farcompilestrings_LDADD) $(LIBS)
+farcreate$(EXEEXT): $(farcreate_OBJECTS) $(farcreate_DEPENDENCIES)
+ @rm -f farcreate$(EXEEXT)
+ $(CXXLINK) $(farcreate_OBJECTS) $(farcreate_LDADD) $(LIBS)
+farextract$(EXEEXT): $(farextract_OBJECTS) $(farextract_DEPENDENCIES)
+ @rm -f farextract$(EXEEXT)
+ $(CXXLINK) $(farextract_OBJECTS) $(farextract_LDADD) $(LIBS)
+farinfo$(EXEEXT): $(farinfo_OBJECTS) $(farinfo_DEPENDENCIES)
+ @rm -f farinfo$(EXEEXT)
+ $(CXXLINK) $(farinfo_OBJECTS) $(farinfo_LDADD) $(LIBS)
+farprintstrings$(EXEEXT): $(farprintstrings_OBJECTS) $(farprintstrings_DEPENDENCIES)
+ @rm -f farprintstrings$(EXEEXT)
+ $(CXXLINK) $(farprintstrings_OBJECTS) $(farprintstrings_LDADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compile-strings.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/farcompilestrings.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/farcreate.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/farextract.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/farinfo.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/farprintstrings.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/farscript.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/main.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stlist.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sttable.Plo@am__quote@
+
+.cc.o:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $<
+
+.cc.obj:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cc.lo:
+@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(LTCXXCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES) $(PROGRAMS)
+installdirs:
+ for dir in "$(DESTDIR)$(libfstdir)" "$(DESTDIR)$(bindir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic clean-libfstLTLIBRARIES \
+ clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-libfstLTLIBRARIES
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-binPROGRAMS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS uninstall-libfstLTLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \
+ clean-generic clean-libfstLTLIBRARIES clean-libtool ctags \
+ distclean distclean-compile distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-binPROGRAMS \
+ install-data install-data-am install-dvi install-dvi-am \
+ install-exec install-exec-am install-html install-html-am \
+ install-info install-info-am install-libfstLTLIBRARIES \
+ install-man install-pdf install-pdf-am install-ps \
+ install-ps-am install-strip installcheck installcheck-am \
+ installdirs maintainer-clean maintainer-clean-generic \
+ mostlyclean mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
+ uninstall-am uninstall-binPROGRAMS uninstall-libfstLTLIBRARIES
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/extensions/far/compile-strings.cc b/src/extensions/far/compile-strings.cc
new file mode 100644
index 0000000..e8a99cc
--- /dev/null
+++ b/src/extensions/far/compile-strings.cc
@@ -0,0 +1,36 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/extensions/far/compile-strings.h>
+#include <iostream>
+#include <fstream>
+
+namespace fst {
+
+// Compute the minimal length required to
+// encode each line number as a decimal number
+int KeySize(const char *filename) {
+ ifstream istrm(filename);
+ istrm.seekg(0);
+ string s;
+ int nline = 0;
+ while (getline(istrm, s))
+ ++nline;
+ istrm.seekg(0);
+ return nline ? ceil(log10(nline + 1)) : 1;
+}
+
+} // namespace fst
diff --git a/src/extensions/far/farcompilestrings.cc b/src/extensions/far/farcompilestrings.cc
new file mode 100644
index 0000000..0c9b352
--- /dev/null
+++ b/src/extensions/far/farcompilestrings.cc
@@ -0,0 +1,84 @@
+// farcompilestrings.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use new arc-type dispatching
+//
+// \file
+// Compiles a set of stings as FSTs and stores them in a finite-state
+// archive.
+//
+
+#include <fst/extensions/far/farscript.h>
+#include <fst/extensions/far/main.h>
+#include <iostream>
+#include <fstream>
+
+DEFINE_string(key_prefix, "", "Prefix to append to keys");
+DEFINE_string(key_suffix, "", "Suffix to append to keys");
+DEFINE_int32(generate_keys, 0,
+ "Generate N digit numeric keys (def: use file basenames)");
+DEFINE_string(far_type, "default", "FAR file format type: one of: ");
+DEFINE_bool(allow_negative_labels, false,
+ "Allow negative labels (not recommended; may cause conflicts)");
+DEFINE_string(arc_type, "standard", "Output arc type");
+DEFINE_string(entry_type, "line", "Entry type: one of : "
+ "\"file\" (one FST per file), \"line\" (one FST per line)");
+DEFINE_string(fst_type, "vector", "Output FST type");
+DEFINE_string(token_type, "symbol", "Token type: one of : "
+ "\"symbol\", \"byte\", \"utf8\"");
+DEFINE_string(symbols, "", "Label symbol table");
+DEFINE_string(unknown_symbol, "", "");
+DEFINE_bool(file_list_input, false,
+ "Each input files contains a list of files to be processed");
+
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+
+ string usage = "Compiles a set of strings as FSTs and stores them in";
+ usage += " a finite-state archive.\n\n Usage:";
+ usage += argv[0];
+ usage += " in1.txt [in2.txt ...] out.far\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+
+ if (argc < 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ vector<string> in_fnames(argc - 2);
+
+ for (unsigned i = 1; i < argc - 1; ++i) {
+ in_fnames[i - 1] = argv[i];
+ }
+
+ string out_fname = argv[argc - 1];
+
+ fst::FarEntryType fet = fst::StringToFarEntryType(FLAGS_entry_type);
+ fst::FarTokenType ftt = fst::StringToFarTokenType(FLAGS_token_type);
+ fst::FarType far_type = fst::FarTypeFromString(FLAGS_far_type);
+
+ s::FarCompileStrings(in_fnames, out_fname, FLAGS_arc_type, FLAGS_fst_type,
+ far_type, FLAGS_generate_keys, fet, ftt,
+ FLAGS_symbols, FLAGS_unknown_symbol,
+ FLAGS_allow_negative_labels,
+ FLAGS_file_list_input, FLAGS_key_prefix,
+ FLAGS_key_suffix);
+
+ return 0;
+}
diff --git a/src/extensions/far/farcreate.cc b/src/extensions/far/farcreate.cc
new file mode 100644
index 0000000..009fc34
--- /dev/null
+++ b/src/extensions/far/farcreate.cc
@@ -0,0 +1,62 @@
+// farcreate.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use new dispatch
+//
+// \file
+// Creates a finite-state archive from input FSTs.
+//
+
+#include <fst/extensions/far/farscript.h>
+#include <fst/extensions/far/main.h>
+#include <fst/extensions/far/far.h>
+
+DEFINE_string(key_prefix, "", "Prefix to append to keys");
+DEFINE_string(key_suffix, "", "Suffix to append to keys");
+DEFINE_int32(generate_keys, 0,
+ "Generate N digit numeric keys (def: use file basenames)");
+DEFINE_string(far_type, "default",
+ "FAR file format type: one of: \"default\", \"stlist\", \"sstable\", \"sttable\"");
+DEFINE_bool(file_list_input, false,
+ "Each input files contains a list of files to be processed");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+
+ string usage = "Creates a finite-state archive from input FSTs.\n\n Usage:";
+ usage += argv[0];
+ usage += " in1.fst [in2.fst ...] out.far\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+
+ if (argc < 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ vector<string> in_fnames;
+ for (int i = 1; i < argc - 1; ++i)
+ in_fnames.push_back(argv[i]);
+
+ string out_fname = argv[argc - 1];
+ string arc_type = fst::LoadArcTypeFromFst(in_fnames[0]);
+ fst::FarType far_type = fst::FarTypeFromString(FLAGS_far_type);
+
+ s::FarCreate(in_fnames, out_fname, arc_type, FLAGS_generate_keys,
+ FLAGS_file_list_input, far_type, FLAGS_key_prefix,
+ FLAGS_key_suffix);
+}
diff --git a/src/extensions/far/farextract.cc b/src/extensions/far/farextract.cc
new file mode 100644
index 0000000..72b2d15
--- /dev/null
+++ b/src/extensions/far/farextract.cc
@@ -0,0 +1,62 @@
+// farextract.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use new arc dispatch
+//
+// \file
+// Extracts component FSTs from an finite-state archive.
+//
+
+#include <fst/extensions/far/main.h>
+#include <fst/extensions/far/farscript.h>
+
+DEFINE_string(filename_prefix, "", "Prefix to append to filenames");
+DEFINE_string(filename_suffix, "", "Suffix to append to filenames");
+DEFINE_int32(generate_filenames, 0,
+ "Generate N digit numeric filenames (def: use keys)");
+DEFINE_string(begin_key, "",
+ "First key to extract (def: first key in archive)");
+DEFINE_string(end_key, "",
+ "Last key to extract (def: last key in archive)");
+
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+
+ string usage = "Extracts FSTs from a finite-state archive.\n\n Usage:";
+ usage += argv[0];
+ usage += " in1.far [in2.far...]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+
+ if (argc < 2) {
+ ShowUsage();
+ return 1;
+ }
+
+ vector<string> ifilenames;
+ for (int i = 1; i < argc; ++i)
+ ifilenames.push_back(argv[i]);
+
+ const string &arc_type = fst::LoadArcTypeFromFar(ifilenames[0]);
+
+ s::FarExtract(ifilenames, arc_type, FLAGS_generate_filenames,
+ FLAGS_begin_key, FLAGS_end_key, FLAGS_filename_prefix,
+ FLAGS_filename_suffix);
+
+ return 0;
+}
diff --git a/src/extensions/far/farinfo.cc b/src/extensions/far/farinfo.cc
new file mode 100644
index 0000000..e8036a9
--- /dev/null
+++ b/src/extensions/far/farinfo.cc
@@ -0,0 +1,56 @@
+// farinfo.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use new arc dispatching
+//
+// \file
+// Prints some basic information about the FSTs in an FST archive.
+//
+
+#include <fst/extensions/far/main.h>
+#include <fst/extensions/far/farscript.h>
+
+DEFINE_string(begin_key, "",
+ "First key to extract (def: first key in archive)");
+DEFINE_string(end_key, "",
+ "Last key to extract (def: last key in archive)");
+
+DEFINE_bool(list_fsts, false, "Display FST information for each key");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+
+ string usage = "Prints some basic information about the FSTs in an FST ";
+ usage += "archive.\n\n Usage:";
+ usage += argv[0];
+ usage += " in1.far [in2.far...]\n";
+ usage += " Flags: begin_key end_key list_fsts";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+
+ if (argc < 2) {
+ ShowUsage();
+ return 1;
+ }
+
+ vector<string> filenames;
+ for (int i = 1; i < argc; ++i)
+ filenames.push_back(argv[i]);
+
+ s::FarInfo(filenames, fst::LoadArcTypeFromFar(filenames[0]),
+ FLAGS_begin_key, FLAGS_end_key, FLAGS_list_fsts);
+}
diff --git a/src/extensions/far/farprintstrings.cc b/src/extensions/far/farprintstrings.cc
new file mode 100644
index 0000000..ab7d52b
--- /dev/null
+++ b/src/extensions/far/farprintstrings.cc
@@ -0,0 +1,70 @@
+// farprintstrings.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Output as strings the string FSTs in a finite-state archive.
+//
+
+#include <fst/extensions/far/farscript.h>
+
+DEFINE_string(filename_prefix, "", "Prefix to append to filenames");
+DEFINE_string(filename_suffix, "", "Suffix to append to filenames");
+DEFINE_int32(generate_filenames, 0,
+ "Generate N digit numeric filenames (def: use keys)");
+DEFINE_string(begin_key, "",
+ "First key to extract (def: first key in archive)");
+DEFINE_string(end_key, "",
+ "Last key to extract (def: last key in archive)");
+// PrintStringsMain specific flag definitions.
+DEFINE_bool(print_key, false, "Prefix each string by its key");
+DEFINE_string(entry_type, "line", "Entry type: one of : "
+ "\"file\" (one FST per file), \"line\" (one FST per line)");
+DEFINE_string(token_type, "symbol", "Token type: one of : "
+ "\"symbol\", \"byte\", \"utf8\"");
+DEFINE_string(symbols, "", "Label symbol table");
+
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+
+ string usage = "Print as string the string FSTs in an archive.\n\n Usage:";
+ usage += argv[0];
+ usage += " in1.far [in2.far ...]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+
+ if (argc < 2) {
+ ShowUsage();
+ return 1;
+ }
+
+ vector<string> ifilenames;
+ for (int i = 1; i < argc; ++i)
+ ifilenames.push_back(argv[i]);
+
+ string arc_type = fst::LoadArcTypeFromFar(ifilenames[0]);
+
+ s::FarPrintStrings(ifilenames, arc_type,
+ fst::StringToFarEntryType(FLAGS_entry_type),
+ fst::StringToFarTokenType(FLAGS_token_type),
+ FLAGS_begin_key, FLAGS_end_key, FLAGS_print_key,
+ FLAGS_symbols, FLAGS_generate_filenames,
+ FLAGS_filename_prefix, FLAGS_filename_suffix);
+
+ return 0;
+}
diff --git a/src/extensions/far/farscript.cc b/src/extensions/far/farscript.cc
new file mode 100644
index 0000000..a04645d
--- /dev/null
+++ b/src/extensions/far/farscript.cc
@@ -0,0 +1,113 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+// Definitions of 'scriptable' versions of FAR operations, that is,
+// those that can be called with FstClass-type arguments.
+
+#include <fst/extensions/far/farscript.h>
+#include <fst/script/script-impl.h>
+#include <fst/extensions/far/far.h>
+
+namespace fst {
+namespace script {
+
+void FarCompileStrings(const vector<string> &in_fnames,
+ const string &out_fname,
+ const string &arc_type,
+ const string &fst_type,
+ const FarType &far_type,
+ int32 generate_keys,
+ FarEntryType fet,
+ FarTokenType tt,
+ const string &symbols_fname,
+ const string &unknown_symbol,
+ bool allow_negative_labels,
+ bool file_list_input,
+ const string &key_prefix,
+ const string &key_suffix) {
+ FarCompileStringsArgs args(in_fnames, out_fname, fst_type, far_type,
+ generate_keys, fet, tt, symbols_fname,
+ unknown_symbol, allow_negative_labels,
+ file_list_input, key_prefix, key_suffix);
+
+ Apply<Operation<FarCompileStringsArgs> >("FarCompileStrings", arc_type,
+ &args);
+}
+
+void FarCreate(const vector<string> &in_fnames,
+ const string &out_fname,
+ const string &arc_type,
+ const int32 generate_keys,
+ const bool file_list_input,
+ const FarType &far_type,
+ const string &key_prefix,
+ const string &key_suffix) {
+ FarCreateArgs args(in_fnames, out_fname, generate_keys, file_list_input,
+ far_type, key_prefix, key_suffix);
+
+ Apply<Operation<FarCreateArgs> >("FarCreate", arc_type, &args);
+}
+
+void FarExtract(const vector<string> &ifilenames,
+ const string &arc_type,
+ int32 generate_filenames, const string &begin_key,
+ const string &end_key, const string &filename_prefix,
+ const string &filename_suffix) {
+ FarExtractArgs args(ifilenames, generate_filenames, begin_key, end_key,
+ filename_prefix, filename_suffix);
+
+ Apply<Operation<FarExtractArgs> >("FarExtract", arc_type, &args);
+}
+
+void FarInfo(const vector<string> &filenames,
+ const string &arc_type,
+ const string &begin_key,
+ const string &end_key,
+ const bool list_fsts) {
+ FarInfoArgs args(filenames, begin_key, end_key, list_fsts);
+
+ Apply<Operation<FarInfoArgs> >("FarInfo", arc_type, &args);
+}
+
+void FarPrintStrings(const vector<string> &ifilenames,
+ const string &arc_type,
+ const FarEntryType entry_type,
+ const FarTokenType token_type,
+ const string &begin_key,
+ const string &end_key,
+ const bool print_key,
+ const string &symbols_fname,
+ const int32 generate_filenames,
+ const string &filename_prefix,
+ const string &filename_suffix) {
+ FarPrintStringsArgs args(ifilenames, entry_type, token_type, begin_key,
+ end_key, print_key, symbols_fname,
+ generate_filenames,
+ filename_prefix,
+ filename_suffix);
+
+ Apply<Operation<FarPrintStringsArgs> >("FarPrintStrings", arc_type,
+ &args);
+}
+
+// Instantiate all templates for common arc types.
+
+REGISTER_FST_FAR_OPERATIONS(StdArc);
+REGISTER_FST_FAR_OPERATIONS(LogArc);
+REGISTER_FST_FAR_OPERATIONS(Log64Arc);
+
+} // namespace script
+} // namespace fst
diff --git a/src/extensions/far/main.cc b/src/extensions/far/main.cc
new file mode 100644
index 0000000..b01d639
--- /dev/null
+++ b/src/extensions/far/main.cc
@@ -0,0 +1,118 @@
+// main.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to not use new arc-dispatch
+//
+// \file
+// Definitions and functions for invoking and using Far main
+// functions that support multiple and extensible arc types.
+
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <iostream>
+#include <fstream>
+#include <fst/extensions/far/main.h>
+
+namespace fst {
+
+// Return the 'FarType' value corresponding to a far type name.
+FarType FarTypeFromString(const string &str) {
+ FarType type = FAR_DEFAULT;
+ if (str == "stlist")
+ type = FAR_STLIST;
+ else if (str == "sttable")
+ type = FAR_STTABLE;
+ else if (str == "default")
+ type = FAR_DEFAULT;
+ return type;
+}
+
+
+// Return the textual name corresponding to a 'FarType;.
+string FarTypeToString(FarType type) {
+ switch (type) {
+ case FAR_STLIST:
+ return "stlist";
+ case FAR_STTABLE:
+ return "sttable";
+ case FAR_DEFAULT:
+ return "default";
+ default:
+ return "<unknown>";
+ }
+}
+
+FarEntryType StringToFarEntryType(const string &s) {
+ if (s == "line") {
+ return FET_LINE;
+ } else if (s == "file") {
+ return FET_FILE;
+ } else {
+ FSTERROR() << "Unknown FAR entry type: " << s;
+ return FET_LINE; // compiler requires return
+ }
+}
+
+FarTokenType StringToFarTokenType(const string &s) {
+ if (s == "symbol") {
+ return FTT_SYMBOL;
+ } else if (s == "byte") {
+ return FTT_BYTE;
+ } else if (s == "utf8") {
+ return FTT_UTF8;
+ } else {
+ FSTERROR() << "Unknown FAR entry type: " << s;
+ return FTT_SYMBOL; // compiler requires return
+ }
+}
+
+
+string LoadArcTypeFromFar(const string &far_fname) {
+ FarHeader hdr;
+
+ if (far_fname.empty()) {
+ LOG(ERROR) << "Reading FAR from standard in not supported";
+ return "";
+ }
+
+ if (!hdr.Read(far_fname)) {
+ LOG(ERROR) << "Error reading FAR: " << far_fname;
+ return "";
+ }
+
+ string atype = hdr.ArcType();
+ if (atype == "unknown") {
+ LOG(ERROR) << "Empty FST archive: " << far_fname;
+ return "";
+ }
+
+ return atype;
+}
+
+string LoadArcTypeFromFst(const string &fst_fname) {
+ FstHeader hdr;
+ ifstream in(fst_fname.c_str(), ifstream::in | ifstream::binary);
+ if (!hdr.Read(in, fst_fname)) {
+ LOG(ERROR) << "Error reading FST: " << fst_fname;
+ return "";
+ }
+
+ return hdr.ArcType();
+}
+
+} // namespace fst
diff --git a/src/extensions/far/stlist.cc b/src/extensions/far/stlist.cc
new file mode 100644
index 0000000..f75b592
--- /dev/null
+++ b/src/extensions/far/stlist.cc
@@ -0,0 +1,31 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/extensions/far/stlist.h>
+
+namespace fst {
+
+bool IsSTList(const string &filename) {
+ ifstream strm(filename.c_str());
+ if (!strm)
+ return false;
+
+ int32 magic_number = 0;
+ ReadType(strm, &magic_number);
+ return magic_number == kSTListMagicNumber;
+}
+
+} // namespace fst
diff --git a/src/extensions/far/sttable.cc b/src/extensions/far/sttable.cc
new file mode 100644
index 0000000..76e91b8
--- /dev/null
+++ b/src/extensions/far/sttable.cc
@@ -0,0 +1,31 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/extensions/far/sttable.h>
+
+namespace fst {
+
+bool IsSTTable(const string &filename) {
+ ifstream strm(filename.c_str());
+ if (!strm)
+ return false;
+
+ int32 magic_number = 0;
+ ReadType(strm, &magic_number);
+ return magic_number == kSTTableMagicNumber;
+}
+
+} // namespace fst
diff --git a/src/extensions/lookahead/Makefile.am b/src/extensions/lookahead/Makefile.am
new file mode 100644
index 0000000..895ef29
--- /dev/null
+++ b/src/extensions/lookahead/Makefile.am
@@ -0,0 +1,18 @@
+AM_CPPFLAGS = -I$(srcdir)/../../include $(ICU_CPPFLAGS)
+
+libfstdir = @libfstdir@
+libfst_LTLIBRARIES = libfstlookahead.la arc_lookahead-fst.la \
+ilabel_lookahead-fst.la olabel_lookahead-fst.la
+
+libfstlookahead_la_SOURCES = arc_lookahead-fst.cc ilabel_lookahead-fst.cc \
+olabel_lookahead-fst.cc
+libfstlookahead_la_LDFLAGS = -version-info 0:0:0
+
+arc_lookahead_fst_la_SOURCES = arc_lookahead-fst.cc
+arc_lookahead_fst_la_LDFLAGS = -module
+
+ilabel_lookahead_fst_la_SOURCES = ilabel_lookahead-fst.cc
+ilabel_lookahead_fst_la_LDFLAGS = -module
+
+olabel_lookahead_fst_la_SOURCES = olabel_lookahead-fst.cc
+olabel_lookahead_fst_la_LDFLAGS = -module
diff --git a/src/extensions/lookahead/Makefile.in b/src/extensions/lookahead/Makefile.in
new file mode 100644
index 0000000..4cb078f
--- /dev/null
+++ b/src/extensions/lookahead/Makefile.in
@@ -0,0 +1,578 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = src/extensions/lookahead
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_icu.m4 \
+ $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+ $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+ $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h \
+ $(top_builddir)/src/include/fst/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__installdirs = "$(DESTDIR)$(libfstdir)"
+LTLIBRARIES = $(libfst_LTLIBRARIES)
+arc_lookahead_fst_la_LIBADD =
+am_arc_lookahead_fst_la_OBJECTS = arc_lookahead-fst.lo
+arc_lookahead_fst_la_OBJECTS = $(am_arc_lookahead_fst_la_OBJECTS)
+arc_lookahead_fst_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(arc_lookahead_fst_la_LDFLAGS) $(LDFLAGS) -o $@
+ilabel_lookahead_fst_la_LIBADD =
+am_ilabel_lookahead_fst_la_OBJECTS = ilabel_lookahead-fst.lo
+ilabel_lookahead_fst_la_OBJECTS = \
+ $(am_ilabel_lookahead_fst_la_OBJECTS)
+ilabel_lookahead_fst_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(ilabel_lookahead_fst_la_LDFLAGS) $(LDFLAGS) -o \
+ $@
+libfstlookahead_la_LIBADD =
+am_libfstlookahead_la_OBJECTS = arc_lookahead-fst.lo \
+ ilabel_lookahead-fst.lo olabel_lookahead-fst.lo
+libfstlookahead_la_OBJECTS = $(am_libfstlookahead_la_OBJECTS)
+libfstlookahead_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(libfstlookahead_la_LDFLAGS) $(LDFLAGS) -o $@
+olabel_lookahead_fst_la_LIBADD =
+am_olabel_lookahead_fst_la_OBJECTS = olabel_lookahead-fst.lo
+olabel_lookahead_fst_la_OBJECTS = \
+ $(am_olabel_lookahead_fst_la_OBJECTS)
+olabel_lookahead_fst_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(olabel_lookahead_fst_la_LDFLAGS) $(LDFLAGS) -o \
+ $@
+DEFAULT_INCLUDES =
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+SOURCES = $(arc_lookahead_fst_la_SOURCES) \
+ $(ilabel_lookahead_fst_la_SOURCES) \
+ $(libfstlookahead_la_SOURCES) \
+ $(olabel_lookahead_fst_la_SOURCES)
+DIST_SOURCES = $(arc_lookahead_fst_la_SOURCES) \
+ $(ilabel_lookahead_fst_la_SOURCES) \
+ $(libfstlookahead_la_SOURCES) \
+ $(olabel_lookahead_fst_la_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+ICU_CFLAGS = @ICU_CFLAGS@
+ICU_CONFIG = @ICU_CONFIG@
+ICU_CPPFLAGS = @ICU_CPPFLAGS@
+ICU_CXXFLAGS = @ICU_CXXFLAGS@
+ICU_LIBS = @ICU_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+libfstdir = @libfstdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lt_ECHO = @lt_ECHO@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AM_CPPFLAGS = -I$(srcdir)/../../include $(ICU_CPPFLAGS)
+libfst_LTLIBRARIES = libfstlookahead.la arc_lookahead-fst.la \
+ilabel_lookahead-fst.la olabel_lookahead-fst.la
+
+libfstlookahead_la_SOURCES = arc_lookahead-fst.cc ilabel_lookahead-fst.cc \
+olabel_lookahead-fst.cc
+
+libfstlookahead_la_LDFLAGS = -version-info 0:0:0
+arc_lookahead_fst_la_SOURCES = arc_lookahead-fst.cc
+arc_lookahead_fst_la_LDFLAGS = -module
+ilabel_lookahead_fst_la_SOURCES = ilabel_lookahead-fst.cc
+ilabel_lookahead_fst_la_LDFLAGS = -module
+olabel_lookahead_fst_la_SOURCES = olabel_lookahead-fst.cc
+olabel_lookahead_fst_la_LDFLAGS = -module
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .cc .lo .o .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/extensions/lookahead/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/extensions/lookahead/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-libfstLTLIBRARIES: $(libfst_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ test -z "$(libfstdir)" || $(MKDIR_P) "$(DESTDIR)$(libfstdir)"
+ @list='$(libfst_LTLIBRARIES)'; test -n "$(libfstdir)" || list=; \
+ list2=; for p in $$list; do \
+ if test -f $$p; then \
+ list2="$$list2 $$p"; \
+ else :; fi; \
+ done; \
+ test -z "$$list2" || { \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libfstdir)'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libfstdir)"; \
+ }
+
+uninstall-libfstLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(libfst_LTLIBRARIES)'; test -n "$(libfstdir)" || list=; \
+ for p in $$list; do \
+ $(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libfstdir)/$$f'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libfstdir)/$$f"; \
+ done
+
+clean-libfstLTLIBRARIES:
+ -test -z "$(libfst_LTLIBRARIES)" || rm -f $(libfst_LTLIBRARIES)
+ @list='$(libfst_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test "$$dir" != "$$p" || dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+arc_lookahead-fst.la: $(arc_lookahead_fst_la_OBJECTS) $(arc_lookahead_fst_la_DEPENDENCIES)
+ $(arc_lookahead_fst_la_LINK) -rpath $(libfstdir) $(arc_lookahead_fst_la_OBJECTS) $(arc_lookahead_fst_la_LIBADD) $(LIBS)
+ilabel_lookahead-fst.la: $(ilabel_lookahead_fst_la_OBJECTS) $(ilabel_lookahead_fst_la_DEPENDENCIES)
+ $(ilabel_lookahead_fst_la_LINK) -rpath $(libfstdir) $(ilabel_lookahead_fst_la_OBJECTS) $(ilabel_lookahead_fst_la_LIBADD) $(LIBS)
+libfstlookahead.la: $(libfstlookahead_la_OBJECTS) $(libfstlookahead_la_DEPENDENCIES)
+ $(libfstlookahead_la_LINK) -rpath $(libfstdir) $(libfstlookahead_la_OBJECTS) $(libfstlookahead_la_LIBADD) $(LIBS)
+olabel_lookahead-fst.la: $(olabel_lookahead_fst_la_OBJECTS) $(olabel_lookahead_fst_la_DEPENDENCIES)
+ $(olabel_lookahead_fst_la_LINK) -rpath $(libfstdir) $(olabel_lookahead_fst_la_OBJECTS) $(olabel_lookahead_fst_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/arc_lookahead-fst.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilabel_lookahead-fst.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/olabel_lookahead-fst.Plo@am__quote@
+
+.cc.o:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $<
+
+.cc.obj:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cc.lo:
+@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(LTCXXCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+ for dir in "$(DESTDIR)$(libfstdir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libfstLTLIBRARIES clean-libtool \
+ mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-libfstLTLIBRARIES
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-libfstLTLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+ clean-libfstLTLIBRARIES clean-libtool ctags distclean \
+ distclean-compile distclean-generic distclean-libtool \
+ distclean-tags distdir dvi dvi-am html html-am info info-am \
+ install install-am install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am \
+ install-libfstLTLIBRARIES install-man install-pdf \
+ install-pdf-am install-ps install-ps-am install-strip \
+ installcheck installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am uninstall-libfstLTLIBRARIES
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/extensions/lookahead/arc_lookahead-fst.cc b/src/extensions/lookahead/arc_lookahead-fst.cc
new file mode 100644
index 0000000..f3c827e
--- /dev/null
+++ b/src/extensions/lookahead/arc_lookahead-fst.cc
@@ -0,0 +1,28 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/fst.h>
+#include <fst/matcher-fst.h>
+
+using fst::FstRegisterer;
+using fst::StdArcLookAheadFst;
+using fst::LogArcLookAheadFst;
+using fst::LogArc;
+using fst::StdArc;
+
+// Register ArcLookAhead Fsts with common arc types
+static FstRegisterer<StdArcLookAheadFst> ArcLookAheadFst_StdArc_registerer;
+static FstRegisterer<LogArcLookAheadFst> ArcLookAheadrFst_LogArc_registerer;
diff --git a/src/extensions/lookahead/ilabel_lookahead-fst.cc b/src/extensions/lookahead/ilabel_lookahead-fst.cc
new file mode 100644
index 0000000..8df0819
--- /dev/null
+++ b/src/extensions/lookahead/ilabel_lookahead-fst.cc
@@ -0,0 +1,30 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/fst.h>
+#include <fst/matcher-fst.h>
+
+using fst::FstRegisterer;
+using fst::StdILabelLookAheadFst;
+using fst::LogILabelLookAheadFst;
+using fst::LogArc;
+using fst::StdArc;
+
+// Register InputLabelLookAhead Fsts with common arc types
+static FstRegisterer<StdILabelLookAheadFst>
+ILabelLookAheadFst_StdArc_registerer;
+static FstRegisterer<LogILabelLookAheadFst>
+ILabelLookAheadFst_LogArc_registerer;
diff --git a/src/extensions/lookahead/olabel_lookahead-fst.cc b/src/extensions/lookahead/olabel_lookahead-fst.cc
new file mode 100644
index 0000000..9542b56
--- /dev/null
+++ b/src/extensions/lookahead/olabel_lookahead-fst.cc
@@ -0,0 +1,30 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#include <fst/fst.h>
+#include <fst/matcher-fst.h>
+
+using fst::FstRegisterer;
+using fst::StdOLabelLookAheadFst;
+using fst::LogOLabelLookAheadFst;
+using fst::LogArc;
+using fst::StdArc;
+
+// Register OLabelLookAhead Fsts with common arc types
+static FstRegisterer<StdOLabelLookAheadFst>
+OLabelLookAheadFst_StdArc_registerer;
+static FstRegisterer<LogOLabelLookAheadFst>
+OLabelLookAheadFst_LogArc_registerer;
diff --git a/src/extensions/pdt/Makefile.am b/src/extensions/pdt/Makefile.am
new file mode 100644
index 0000000..9936b52
--- /dev/null
+++ b/src/extensions/pdt/Makefile.am
@@ -0,0 +1,29 @@
+AM_CPPFLAGS = -I$(srcdir)/../../include $(ICU_CPPFLAGS)
+
+if HAVE_BIN
+bin_PROGRAMS = pdtcompose pdtexpand pdtinfo pdtreplace pdtreverse \
+ pdtshortestpath
+
+LDADD = libfstpdtscript.la ../../script/libfstscript.la \
+ ../../lib/libfst.la -lm -ldl
+
+pdtcompose_SOURCES = pdtcompose.cc
+
+pdtexpand_SOURCES = pdtexpand.cc
+
+pdtinfo_SOURCES = pdtinfo.cc
+
+pdtreplace_SOURCES = pdtreplace.cc
+
+pdtreverse_SOURCES = pdtreverse.cc
+
+pdtshortestpath_SOURCES = pdtshortestpath.cc
+endif
+
+libfstdir = @libfstdir@
+
+if HAVE_SCRIPT
+libfst_LTLIBRARIES = libfstpdtscript.la
+libfstpdtscript_la_SOURCES = pdtscript.cc
+libfstpdtscript_la_LDFLAGS = -version-info 0:0:0
+endif
diff --git a/src/extensions/pdt/Makefile.in b/src/extensions/pdt/Makefile.in
new file mode 100644
index 0000000..018f344
--- /dev/null
+++ b/src/extensions/pdt/Makefile.in
@@ -0,0 +1,663 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+@HAVE_BIN_TRUE@bin_PROGRAMS = pdtcompose$(EXEEXT) pdtexpand$(EXEEXT) \
+@HAVE_BIN_TRUE@ pdtinfo$(EXEEXT) pdtreplace$(EXEEXT) \
+@HAVE_BIN_TRUE@ pdtreverse$(EXEEXT) pdtshortestpath$(EXEEXT)
+subdir = src/extensions/pdt
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_icu.m4 \
+ $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+ $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+ $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h \
+ $(top_builddir)/src/include/fst/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__installdirs = "$(DESTDIR)$(libfstdir)" "$(DESTDIR)$(bindir)"
+LTLIBRARIES = $(libfst_LTLIBRARIES)
+libfstpdtscript_la_LIBADD =
+am__libfstpdtscript_la_SOURCES_DIST = pdtscript.cc
+@HAVE_SCRIPT_TRUE@am_libfstpdtscript_la_OBJECTS = pdtscript.lo
+libfstpdtscript_la_OBJECTS = $(am_libfstpdtscript_la_OBJECTS)
+libfstpdtscript_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(libfstpdtscript_la_LDFLAGS) $(LDFLAGS) -o $@
+@HAVE_SCRIPT_TRUE@am_libfstpdtscript_la_rpath = -rpath $(libfstdir)
+PROGRAMS = $(bin_PROGRAMS)
+am__pdtcompose_SOURCES_DIST = pdtcompose.cc
+@HAVE_BIN_TRUE@am_pdtcompose_OBJECTS = pdtcompose.$(OBJEXT)
+pdtcompose_OBJECTS = $(am_pdtcompose_OBJECTS)
+pdtcompose_LDADD = $(LDADD)
+@HAVE_BIN_TRUE@pdtcompose_DEPENDENCIES = libfstpdtscript.la \
+@HAVE_BIN_TRUE@ ../../script/libfstscript.la \
+@HAVE_BIN_TRUE@ ../../lib/libfst.la
+am__pdtexpand_SOURCES_DIST = pdtexpand.cc
+@HAVE_BIN_TRUE@am_pdtexpand_OBJECTS = pdtexpand.$(OBJEXT)
+pdtexpand_OBJECTS = $(am_pdtexpand_OBJECTS)
+pdtexpand_LDADD = $(LDADD)
+@HAVE_BIN_TRUE@pdtexpand_DEPENDENCIES = libfstpdtscript.la \
+@HAVE_BIN_TRUE@ ../../script/libfstscript.la \
+@HAVE_BIN_TRUE@ ../../lib/libfst.la
+am__pdtinfo_SOURCES_DIST = pdtinfo.cc
+@HAVE_BIN_TRUE@am_pdtinfo_OBJECTS = pdtinfo.$(OBJEXT)
+pdtinfo_OBJECTS = $(am_pdtinfo_OBJECTS)
+pdtinfo_LDADD = $(LDADD)
+@HAVE_BIN_TRUE@pdtinfo_DEPENDENCIES = libfstpdtscript.la \
+@HAVE_BIN_TRUE@ ../../script/libfstscript.la \
+@HAVE_BIN_TRUE@ ../../lib/libfst.la
+am__pdtreplace_SOURCES_DIST = pdtreplace.cc
+@HAVE_BIN_TRUE@am_pdtreplace_OBJECTS = pdtreplace.$(OBJEXT)
+pdtreplace_OBJECTS = $(am_pdtreplace_OBJECTS)
+pdtreplace_LDADD = $(LDADD)
+@HAVE_BIN_TRUE@pdtreplace_DEPENDENCIES = libfstpdtscript.la \
+@HAVE_BIN_TRUE@ ../../script/libfstscript.la \
+@HAVE_BIN_TRUE@ ../../lib/libfst.la
+am__pdtreverse_SOURCES_DIST = pdtreverse.cc
+@HAVE_BIN_TRUE@am_pdtreverse_OBJECTS = pdtreverse.$(OBJEXT)
+pdtreverse_OBJECTS = $(am_pdtreverse_OBJECTS)
+pdtreverse_LDADD = $(LDADD)
+@HAVE_BIN_TRUE@pdtreverse_DEPENDENCIES = libfstpdtscript.la \
+@HAVE_BIN_TRUE@ ../../script/libfstscript.la \
+@HAVE_BIN_TRUE@ ../../lib/libfst.la
+am__pdtshortestpath_SOURCES_DIST = pdtshortestpath.cc
+@HAVE_BIN_TRUE@am_pdtshortestpath_OBJECTS = pdtshortestpath.$(OBJEXT)
+pdtshortestpath_OBJECTS = $(am_pdtshortestpath_OBJECTS)
+pdtshortestpath_LDADD = $(LDADD)
+@HAVE_BIN_TRUE@pdtshortestpath_DEPENDENCIES = libfstpdtscript.la \
+@HAVE_BIN_TRUE@ ../../script/libfstscript.la \
+@HAVE_BIN_TRUE@ ../../lib/libfst.la
+DEFAULT_INCLUDES =
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+SOURCES = $(libfstpdtscript_la_SOURCES) $(pdtcompose_SOURCES) \
+ $(pdtexpand_SOURCES) $(pdtinfo_SOURCES) $(pdtreplace_SOURCES) \
+ $(pdtreverse_SOURCES) $(pdtshortestpath_SOURCES)
+DIST_SOURCES = $(am__libfstpdtscript_la_SOURCES_DIST) \
+ $(am__pdtcompose_SOURCES_DIST) $(am__pdtexpand_SOURCES_DIST) \
+ $(am__pdtinfo_SOURCES_DIST) $(am__pdtreplace_SOURCES_DIST) \
+ $(am__pdtreverse_SOURCES_DIST) \
+ $(am__pdtshortestpath_SOURCES_DIST)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+ICU_CFLAGS = @ICU_CFLAGS@
+ICU_CONFIG = @ICU_CONFIG@
+ICU_CPPFLAGS = @ICU_CPPFLAGS@
+ICU_CXXFLAGS = @ICU_CXXFLAGS@
+ICU_LIBS = @ICU_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+libfstdir = @libfstdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lt_ECHO = @lt_ECHO@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AM_CPPFLAGS = -I$(srcdir)/../../include $(ICU_CPPFLAGS)
+@HAVE_BIN_TRUE@LDADD = libfstpdtscript.la ../../script/libfstscript.la \
+@HAVE_BIN_TRUE@ ../../lib/libfst.la -lm -ldl
+
+@HAVE_BIN_TRUE@pdtcompose_SOURCES = pdtcompose.cc
+@HAVE_BIN_TRUE@pdtexpand_SOURCES = pdtexpand.cc
+@HAVE_BIN_TRUE@pdtinfo_SOURCES = pdtinfo.cc
+@HAVE_BIN_TRUE@pdtreplace_SOURCES = pdtreplace.cc
+@HAVE_BIN_TRUE@pdtreverse_SOURCES = pdtreverse.cc
+@HAVE_BIN_TRUE@pdtshortestpath_SOURCES = pdtshortestpath.cc
+@HAVE_SCRIPT_TRUE@libfst_LTLIBRARIES = libfstpdtscript.la
+@HAVE_SCRIPT_TRUE@libfstpdtscript_la_SOURCES = pdtscript.cc
+@HAVE_SCRIPT_TRUE@libfstpdtscript_la_LDFLAGS = -version-info 0:0:0
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .cc .lo .o .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/extensions/pdt/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/extensions/pdt/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-libfstLTLIBRARIES: $(libfst_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ test -z "$(libfstdir)" || $(MKDIR_P) "$(DESTDIR)$(libfstdir)"
+ @list='$(libfst_LTLIBRARIES)'; test -n "$(libfstdir)" || list=; \
+ list2=; for p in $$list; do \
+ if test -f $$p; then \
+ list2="$$list2 $$p"; \
+ else :; fi; \
+ done; \
+ test -z "$$list2" || { \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libfstdir)'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libfstdir)"; \
+ }
+
+uninstall-libfstLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(libfst_LTLIBRARIES)'; test -n "$(libfstdir)" || list=; \
+ for p in $$list; do \
+ $(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libfstdir)/$$f'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libfstdir)/$$f"; \
+ done
+
+clean-libfstLTLIBRARIES:
+ -test -z "$(libfst_LTLIBRARIES)" || rm -f $(libfst_LTLIBRARIES)
+ @list='$(libfst_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test "$$dir" != "$$p" || dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+libfstpdtscript.la: $(libfstpdtscript_la_OBJECTS) $(libfstpdtscript_la_DEPENDENCIES)
+ $(libfstpdtscript_la_LINK) $(am_libfstpdtscript_la_rpath) $(libfstpdtscript_la_OBJECTS) $(libfstpdtscript_la_LIBADD) $(LIBS)
+install-binPROGRAMS: $(bin_PROGRAMS)
+ @$(NORMAL_INSTALL)
+ test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
+ @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed 's/$(EXEEXT)$$//' | \
+ while read p p1; do if test -f $$p || test -f $$p1; \
+ then echo "$$p"; echo "$$p"; else :; fi; \
+ done | \
+ sed -e 'p;s,.*/,,;n;h' -e 's|.*|.|' \
+ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+ sed 'N;N;N;s,\n, ,g' | \
+ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+ if ($$2 == $$4) files[d] = files[d] " " $$1; \
+ else { print "f", $$3 "/" $$4, $$1; } } \
+ END { for (d in files) print "f", d, files[d] }' | \
+ while read type dir files; do \
+ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+ test -z "$$files" || { \
+ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+ } \
+ ; done
+
+uninstall-binPROGRAMS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+ files=`for p in $$list; do echo "$$p"; done | \
+ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+ -e 's/$$/$(EXEEXT)/' `; \
+ test -n "$$list" || exit 0; \
+ echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+ cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+ @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
+ echo " rm -f" $$list; \
+ rm -f $$list || exit $$?; \
+ test -n "$(EXEEXT)" || exit 0; \
+ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f" $$list; \
+ rm -f $$list
+pdtcompose$(EXEEXT): $(pdtcompose_OBJECTS) $(pdtcompose_DEPENDENCIES)
+ @rm -f pdtcompose$(EXEEXT)
+ $(CXXLINK) $(pdtcompose_OBJECTS) $(pdtcompose_LDADD) $(LIBS)
+pdtexpand$(EXEEXT): $(pdtexpand_OBJECTS) $(pdtexpand_DEPENDENCIES)
+ @rm -f pdtexpand$(EXEEXT)
+ $(CXXLINK) $(pdtexpand_OBJECTS) $(pdtexpand_LDADD) $(LIBS)
+pdtinfo$(EXEEXT): $(pdtinfo_OBJECTS) $(pdtinfo_DEPENDENCIES)
+ @rm -f pdtinfo$(EXEEXT)
+ $(CXXLINK) $(pdtinfo_OBJECTS) $(pdtinfo_LDADD) $(LIBS)
+pdtreplace$(EXEEXT): $(pdtreplace_OBJECTS) $(pdtreplace_DEPENDENCIES)
+ @rm -f pdtreplace$(EXEEXT)
+ $(CXXLINK) $(pdtreplace_OBJECTS) $(pdtreplace_LDADD) $(LIBS)
+pdtreverse$(EXEEXT): $(pdtreverse_OBJECTS) $(pdtreverse_DEPENDENCIES)
+ @rm -f pdtreverse$(EXEEXT)
+ $(CXXLINK) $(pdtreverse_OBJECTS) $(pdtreverse_LDADD) $(LIBS)
+pdtshortestpath$(EXEEXT): $(pdtshortestpath_OBJECTS) $(pdtshortestpath_DEPENDENCIES)
+ @rm -f pdtshortestpath$(EXEEXT)
+ $(CXXLINK) $(pdtshortestpath_OBJECTS) $(pdtshortestpath_LDADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdtcompose.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdtexpand.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdtinfo.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdtreplace.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdtreverse.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdtscript.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdtshortestpath.Po@am__quote@
+
+.cc.o:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $<
+
+.cc.obj:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cc.lo:
+@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(LTCXXCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES) $(PROGRAMS)
+installdirs:
+ for dir in "$(DESTDIR)$(libfstdir)" "$(DESTDIR)$(bindir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic clean-libfstLTLIBRARIES \
+ clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-libfstLTLIBRARIES
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-binPROGRAMS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS uninstall-libfstLTLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \
+ clean-generic clean-libfstLTLIBRARIES clean-libtool ctags \
+ distclean distclean-compile distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-binPROGRAMS \
+ install-data install-data-am install-dvi install-dvi-am \
+ install-exec install-exec-am install-html install-html-am \
+ install-info install-info-am install-libfstLTLIBRARIES \
+ install-man install-pdf install-pdf-am install-ps \
+ install-ps-am install-strip installcheck installcheck-am \
+ installdirs maintainer-clean maintainer-clean-generic \
+ mostlyclean mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
+ uninstall-am uninstall-binPROGRAMS uninstall-libfstLTLIBRARIES
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/extensions/pdt/pdtcompose.cc b/src/extensions/pdt/pdtcompose.cc
new file mode 100644
index 0000000..0bab405
--- /dev/null
+++ b/src/extensions/pdt/pdtcompose.cc
@@ -0,0 +1,82 @@
+// pdtcompose.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Composes a PDT and an FST.
+//
+
+#include <vector>
+using std::vector;
+#include <utility>
+using std::pair; using std::make_pair;
+
+#include <fst/util.h>
+#include <fst/extensions/pdt/pdtscript.h>
+#include <fst/script/connect.h>
+
+DEFINE_string(pdt_parentheses, "", "PDT parenthesis label pairs.");
+DEFINE_bool(left_pdt, true, "1st arg is PDT (o.w. 2nd arg).");
+DEFINE_bool(connect, true, "Trim output");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+
+ string usage = "Compose a PDT and an FST.\n\n Usage: ";
+ usage += argv[0];
+ usage += " in.pdt in.fst [out.pdt]\n";
+ usage += " in.fst in.pdt [out.pdt]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc < 3 || argc > 4) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in1_name = strcmp(argv[1], "-") == 0 ? "" : argv[1];
+ string in2_name = strcmp(argv[2], "-") == 0 ? "" : argv[2];
+ string out_name = argc > 3 ? argv[3] : "";
+
+ if (in1_name.empty() && in2_name.empty()) {
+ LOG(ERROR) << argv[0] << ": Can't take both inputs from standard input.";
+ return 1;
+ }
+
+ s::FstClass *ifst1 = s::FstClass::Read(in1_name);
+ if (!ifst1) return 1;
+ s::FstClass *ifst2 = s::FstClass::Read(in2_name);
+ if (!ifst2) return 1;
+
+ if (FLAGS_pdt_parentheses.empty()) {
+ LOG(ERROR) << argv[0] << ": No PDT parenthesis label pairs provided";
+ return 1;
+ }
+
+ vector<pair<int64, int64> > parens;
+ fst::ReadLabelPairs(FLAGS_pdt_parentheses, &parens, false);
+
+ s::VectorFstClass ofst(ifst1->ArcType());
+ fst::ComposeOptions copts(false);
+
+ s::PdtCompose(*ifst1, *ifst2, parens, &ofst, copts, FLAGS_left_pdt);
+
+ if (FLAGS_connect)
+ s::Connect(&ofst);
+ ofst.Write(out_name);
+
+ return 0;
+}
diff --git a/src/extensions/pdt/pdtexpand.cc b/src/extensions/pdt/pdtexpand.cc
new file mode 100644
index 0000000..2134932
--- /dev/null
+++ b/src/extensions/pdt/pdtexpand.cc
@@ -0,0 +1,71 @@
+// pdtexpand.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Expands a PDT and an FST.
+//
+
+#include <fst/extensions/pdt/pdtscript.h>
+#include <fst/util.h>
+
+DEFINE_string(pdt_parentheses, "", "PDT parenthesis label pairs.");
+DEFINE_bool(connect, true, "Trim output");
+DEFINE_bool(keep_parentheses, false, "Keep PDT parentheses in result.");
+DEFINE_string(weight, "", "Weight threshold");
+
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+
+ string usage = "Expand a PDT and an FST.\n\n Usage: ";
+ usage += argv[0];
+ usage += " in.pdt [out.fst]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_name = (argc > 1 && (strcmp(argv[1], "-") != 0)) ? argv[1] : "";
+ string out_name = argc > 2 ? argv[2] : "";
+
+ s::FstClass *ifst = s::FstClass::Read(in_name);
+ if (!ifst) return 1;
+
+ if (FLAGS_pdt_parentheses.empty()) {
+ LOG(ERROR) << argv[0] << ": No PDT parenthesis label pairs provided";
+ return 1;
+ }
+
+ vector<pair<int64, int64> > parens;
+ fst::ReadLabelPairs(FLAGS_pdt_parentheses, &parens, false);
+
+ s::WeightClass weight_threshold = FLAGS_weight.empty() ?
+ s::WeightClass::Zero() :
+ s::WeightClass(ifst->WeightType(), FLAGS_weight);
+
+ s::VectorFstClass ofst(ifst->ArcType());
+ s::PdtExpand(*ifst, parens, &ofst, s::PdtExpandOptions(
+ FLAGS_connect, FLAGS_keep_parentheses, weight_threshold));
+
+ ofst.Write(out_name);
+
+ return 0;
+}
diff --git a/src/extensions/pdt/pdtinfo.cc b/src/extensions/pdt/pdtinfo.cc
new file mode 100644
index 0000000..89227d3
--- /dev/null
+++ b/src/extensions/pdt/pdtinfo.cc
@@ -0,0 +1,59 @@
+// pdtinfo.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Prints out various information about a PDT such as number of
+// states, arcs and parentheses.
+//
+
+#include <fst/extensions/pdt/pdtscript.h>
+#include <fst/util.h>
+
+DEFINE_string(pdt_parentheses, "", "PDT parenthesis label pairs.");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+
+ string usage = "Prints out information about a PDT.\n\n Usage: ";
+ usage += argv[0];
+ usage += " in.pdt\n";
+
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 2) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_name = (argc > 1 && (strcmp(argv[1], "-") != 0)) ? argv[1] : "";
+
+ s::FstClass *ifst = s::FstClass::Read(in_name);
+ if (!ifst) return 1;
+
+ if (FLAGS_pdt_parentheses.empty()) {
+ LOG(ERROR) << argv[0] << ": No PDT parenthesis label pairs provided";
+ return 1;
+ }
+
+ vector<pair<int64, int64> > parens;
+ fst::ReadLabelPairs(FLAGS_pdt_parentheses, &parens, false);
+
+ s::PrintPdtInfo(*ifst, parens);
+
+ return 0;
+}
diff --git a/src/extensions/pdt/pdtreplace.cc b/src/extensions/pdt/pdtreplace.cc
new file mode 100644
index 0000000..543b6ea
--- /dev/null
+++ b/src/extensions/pdt/pdtreplace.cc
@@ -0,0 +1,73 @@
+// pdtreplace.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+#include <fst/extensions/pdt/pdtscript.h>
+#include <fst/vector-fst.h>
+#include <fst/util.h>
+
+DEFINE_string(pdt_parentheses, "", "PDT parenthesis label pairs.");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+
+ string usage = "Recursively replace Fst arcs with other Fst(s).\n";
+ usage += " Usage: ";
+ usage += argv[0];
+ usage += " root.fst rootlabel [rule1.fst label1 ...] [out.fst]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc < 4) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_fname = argv[1];
+ string out_fname = argc % 2 == 0 ? argv[argc - 1] : "";
+
+ s::FstClass *ifst = s::FstClass::Read(in_fname);
+ if (!ifst) return 1;
+
+ typedef int64 Label;
+ typedef pair<Label, const s::FstClass* > FstTuple;
+ vector<FstTuple> fst_tuples;
+ Label root = atoll(argv[2]);
+ fst_tuples.push_back(make_pair(root, ifst));
+
+ for (size_t i = 3; i < argc - 1; i += 2) {
+ ifst = s::FstClass::Read(argv[i]);
+ if (!ifst) return 1;
+ Label lab = atoll(argv[i + 1]);
+ fst_tuples.push_back(make_pair(lab, ifst));
+ }
+
+ s::VectorFstClass ofst(ifst->ArcType());
+ vector<pair<int64, int64> > parens;
+ s::PdtReplace(fst_tuples, &ofst, &parens, root);
+
+ if (!FLAGS_pdt_parentheses.empty())
+ fst::WriteLabelPairs(FLAGS_pdt_parentheses, parens);
+
+ ofst.Write(out_fname);
+
+ return 0;
+}
diff --git a/src/extensions/pdt/pdtreverse.cc b/src/extensions/pdt/pdtreverse.cc
new file mode 100644
index 0000000..e79cd7f
--- /dev/null
+++ b/src/extensions/pdt/pdtreverse.cc
@@ -0,0 +1,61 @@
+// pdtreverse.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Reverse a PDT.
+//
+
+#include <fst/extensions/pdt/pdtscript.h>
+#include <fst/util.h>
+
+DEFINE_string(pdt_parentheses, "", "PDT parenthesis label pairs.");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+
+ string usage = "Reverse a PDT.\n\n Usage: ";
+ usage += argv[0];
+ usage += " in.pdt [out.fst]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_name = (argc > 1 && (strcmp(argv[1], "-") != 0)) ? argv[1] : "";
+ string out_name = argc > 2 ? argv[2] : "";
+
+ s::FstClass *ifst = s::FstClass::Read(in_name);
+ if (!ifst) return 1;
+
+ if (FLAGS_pdt_parentheses.empty()) {
+ LOG(ERROR) << argv[0] << ": No PDT parenthesis label pairs provided";
+ return 1;
+ }
+
+ vector<pair<int64, int64> > parens, rparens;
+ fst::ReadLabelPairs(FLAGS_pdt_parentheses, &parens, false);
+
+ s::VectorFstClass ofst(ifst->ArcType());
+ s::PdtReverse(*ifst, parens, &ofst);
+
+ ofst.Write(out_name);
+
+ return 0;
+}
diff --git a/src/extensions/pdt/pdtscript.cc b/src/extensions/pdt/pdtscript.cc
new file mode 100644
index 0000000..33e29ce
--- /dev/null
+++ b/src/extensions/pdt/pdtscript.cc
@@ -0,0 +1,115 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+// Definitions of 'scriptable' versions of pdt operations, that is,
+// those that can be called with FstClass-type arguments.
+
+// See comments in nlp/fst/script/script-impl.h for how the registration
+// mechanism allows these to work with various arc types.
+
+#include <vector>
+using std::vector;
+#include <utility>
+using std::pair; using std::make_pair;
+
+
+#include <fst/extensions/pdt/compose.h>
+#include <fst/extensions/pdt/expand.h>
+#include <fst/extensions/pdt/pdtscript.h>
+#include <fst/extensions/pdt/replace.h>
+#include <fst/extensions/pdt/reverse.h>
+#include <fst/extensions/pdt/shortest-path.h>
+#include <fst/script/script-impl.h>
+
+namespace fst {
+namespace script {
+
+void PdtCompose(const FstClass &ifst1,
+ const FstClass &ifst2,
+ const vector<pair<int64, int64> > &parens,
+ MutableFstClass *ofst,
+ const ComposeOptions &copts,
+ bool left_pdt) {
+ if (!ArcTypesMatch(ifst1, ifst2, "PdtCompose") ||
+ !ArcTypesMatch(ifst1, *ofst, "PdtCompose")) return;
+
+ PdtComposeArgs args(ifst1, ifst2, parens, ofst, copts, left_pdt);
+
+ Apply<Operation<PdtComposeArgs> >("PdtCompose", ifst1.ArcType(), &args);
+}
+
+void PdtExpand(const FstClass &ifst,
+ const vector<pair<int64, int64> > &parens,
+ MutableFstClass *ofst, const PdtExpandOptions &opts) {
+ PdtExpandArgs args(ifst, parens, ofst, opts);
+
+ Apply<Operation<PdtExpandArgs> >("PdtExpand", ifst.ArcType(), &args);
+}
+
+void PdtExpand(const FstClass &ifst,
+ const vector<pair<int64, int64> > &parens,
+ MutableFstClass *ofst, bool connect) {
+ PdtExpand(ifst, parens, ofst, PdtExpandOptions(connect));
+}
+
+void PdtReplace(const vector<pair<int64, const FstClass*> > &fst_tuples,
+ MutableFstClass *ofst,
+ vector<pair<int64, int64> > *parens,
+ const int64 &root) {
+ for (unsigned i = 0; i < fst_tuples.size() - 1; ++i) {
+ if (!ArcTypesMatch(*(fst_tuples[i].second),
+ *(fst_tuples[i+1].second), "PdtReplace")) return;
+ }
+
+ if (!ArcTypesMatch((*fst_tuples[0].second), *ofst, "PdtReplace")) return;
+
+ PdtReplaceArgs args(fst_tuples, ofst, parens, root);
+
+ Apply<Operation<PdtReplaceArgs> >("PdtReplace", ofst->ArcType(), &args);
+}
+
+void PdtReverse(const FstClass &ifst,
+ const vector<pair<int64, int64> > &parens,
+ MutableFstClass *ofst) {
+ PdtReverseArgs args(ifst, parens, ofst);
+
+ Apply<Operation<PdtReverseArgs> >("PdtReverse", ifst.ArcType(), &args);
+}
+
+void PdtShortestPath(const FstClass &ifst,
+ const vector<pair<int64, int64> > &parens,
+ MutableFstClass *ofst,
+ const PdtShortestPathOptions &opts) {
+ PdtShortestPathArgs args(ifst, parens, ofst, opts);
+
+ Apply<Operation<PdtShortestPathArgs> >("PdtShortestPath",
+ ifst.ArcType(), &args);
+}
+
+void PrintPdtInfo(const FstClass &ifst,
+ const vector<pair<int64, int64> > &parens) {
+ PrintPdtInfoArgs args(ifst, parens);
+ Apply<Operation<PrintPdtInfoArgs> >("PrintPdtInfo", ifst.ArcType(), &args);
+}
+
+// Register operations for common arc types.
+
+REGISTER_FST_PDT_OPERATIONS(StdArc);
+REGISTER_FST_PDT_OPERATIONS(LogArc);
+REGISTER_FST_PDT_OPERATIONS(Log64Arc);
+
+} // namespace script
+} // namespace fst
diff --git a/src/extensions/pdt/pdtshortestpath.cc b/src/extensions/pdt/pdtshortestpath.cc
new file mode 100644
index 0000000..ad67969
--- /dev/null
+++ b/src/extensions/pdt/pdtshortestpath.cc
@@ -0,0 +1,81 @@
+// pdtshortestpath.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
+//
+// \file
+// Return the shortest path in a PDT.
+//
+
+#include <fst/extensions/pdt/pdtscript.h>
+#include <fst/util.h>
+
+
+DEFINE_bool(keep_parentheses, false, "Keep PDT parentheses in result.");
+DEFINE_string(queue_type, "fifo", "Queue type: one of: "
+ "\"fifo\", \"lifo\", \"state\"");
+DEFINE_bool(path_gc, true, "Garbage collect shortest path data");
+DEFINE_string(pdt_parentheses, "", "PDT parenthesis label pairs.");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+
+ string usage = "Shortest path in a PDT.\n\n Usage: ";
+ usage += argv[0];
+ usage += " in.pdt [out.fst]\n";
+
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(usage.c_str(), &argc, &argv, true);
+ if (argc > 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string in_name = (argc > 1 && (strcmp(argv[1], "-") != 0)) ? argv[1] : "";
+ string out_name = argc > 2 ? argv[2] : "";
+
+ s::FstClass *ifst = s::FstClass::Read(in_name);
+ if (!ifst) return 1;
+
+ if (FLAGS_pdt_parentheses.empty()) {
+ LOG(ERROR) << argv[0] << ": No PDT parenthesis label pairs provided";
+ return 1;
+ }
+
+ vector<pair<int64, int64> > parens, rparens;
+ fst::ReadLabelPairs(FLAGS_pdt_parentheses, &parens, false);
+
+ s::VectorFstClass ofst(ifst->ArcType());
+
+ fst::QueueType qt;
+
+ if (FLAGS_queue_type == "fifo") {
+ qt = fst::FIFO_QUEUE;
+ } else if (FLAGS_queue_type == "lifo") {
+ qt = fst::LIFO_QUEUE;
+ } else if (FLAGS_queue_type == "state") {
+ qt = fst::STATE_ORDER_QUEUE;
+ } else {
+ LOG(ERROR) << "Unknown or unsupported queue type: " << FLAGS_queue_type;
+ return 1;
+ }
+
+ s::PdtShortestPathOptions opts(qt, FLAGS_keep_parentheses, FLAGS_path_gc);
+ s::PdtShortestPath(*ifst, parens, &ofst, opts);
+ ofst.Write(out_name);
+
+ return 0;
+}
diff --git a/src/include/Makefile.am b/src/include/Makefile.am
new file mode 100644
index 0000000..b4e6213
--- /dev/null
+++ b/src/include/Makefile.am
@@ -0,0 +1,67 @@
+if HAVE_FAR
+far_include_headers = fst/extensions/far/compile-strings.h \
+fst/extensions/far/create.h fst/extensions/far/extract.h \
+fst/extensions/far/far.h fst/extensions/far/farlib.h \
+fst/extensions/far/farscript.h fst/extensions/far/info.h \
+fst/extensions/far/main.h fst/extensions/far/print-strings.h \
+fst/extensions/far/stlist.h fst/extensions/far/sttable.h
+endif
+
+if HAVE_PDT
+pdt_include_headers = fst/extensions/pdt/collection.h \
+fst/extensions/pdt/compose.h fst/extensions/pdt/expand.h \
+fst/extensions/pdt/info.h fst/extensions/pdt/paren.h \
+fst/extensions/pdt/pdt.h fst/extensions/pdt/pdtlib.h \
+fst/extensions/pdt/pdtscript.h fst/extensions/pdt/replace.h \
+fst/extensions/pdt/reverse.h fst/extensions/pdt/shortest-path.h
+endif
+
+script_include_headers = fst/script/arcsort.h fst/script/arg-packs.h \
+fst/script/closure.h fst/script/compile-impl.h fst/script/compile.h \
+fst/script/compose.h fst/script/concat.h fst/script/connect.h \
+fst/script/convert.h fst/script/decode.h fst/script/determinize.h \
+fst/script/difference.h fst/script/draw-impl.h fst/script/draw.h \
+fst/script/encode.h fst/script/epsnormalize.h fst/script/equal.h \
+fst/script/equivalent.h fst/script/fst-class.h fst/script/fstscript.h \
+fst/script/info-impl.h fst/script/info.h fst/script/intersect.h \
+fst/script/invert.h fst/script/map.h fst/script/minimize.h \
+fst/script/print-impl.h fst/script/print.h fst/script/project.h \
+fst/script/prune.h fst/script/push.h fst/script/randequivalent.h \
+fst/script/randgen.h fst/script/register.h fst/script/relabel.h \
+fst/script/replace.h fst/script/reverse.h fst/script/reweight.h \
+fst/script/rmepsilon.h fst/script/script-impl.h \
+fst/script/shortest-distance.h fst/script/shortest-path.h \
+fst/script/symbols.h fst/script/synchronize.h fst/script/text-io.h \
+fst/script/topsort.h fst/script/union.h fst/script/weight-class.h \
+fst/script/fstscript-decl.h fst/script/verify.h
+
+nobase_include_HEADERS = fst/arc.h fst/determinize.h fst/intersect.h \
+fst/queue.h fst/statesort.h fst/arcfilter.h fst/dfs-visit.h \
+fst/invert.h fst/randequivalent.h fst/string-weight.h \
+fst/difference.h fst/lexicographic-weight.h fst/randgen.h \
+fst/symbol-table.h fst/arcsort.h fst/encode.h fst/lock.h \
+fst/random-weight.h fst/synchronize.h \
+fst/epsnormalize.h fst/log.h fst/rational.h fst/test-properties.h \
+fst/cache.h fst/equal.h fst/arc-map.h fst/map.h fst/register.h fst/topsort.h \
+fst/closure.h fst/equivalent.h fst/matcher.h fst/matcher-fst.h fst/relabel.h \
+fst/union-find.h fst/compact-fst.h fst/expanded-fst.h fst/minimize.h \
+fst/replace.h fst/union.h fst/compat.h fst/factor-weight.h fst/state-map.h \
+fst/mutable-fst.h fst/reverse.h fst/util.h fst/complement.h \
+fst/flags.h fst/partition.h fst/reweight.h fst/vector-fst.h \
+fst/compose-filter.h fst/float-weight.h fst/product-weight.h \
+fst/rmepsilon.h fst/verify.h fst/compose.h fst/fst-decl.h \
+fst/project.h fst/rmfinalepsilon.h fst/visit.h fst/concat.h \
+fst/fst.h fst/properties.h fst/shortest-distance.h fst/weight.h \
+fst/connect.h fst/fstlib.h fst/prune.h fst/shortest-path.h \
+fst/const-fst.h fst/heap.h fst/push.h fst/state-table.h fst/slist.h \
+fst/pair-weight.h fst/config.h fst/tuple-weight.h fst/power-weight.h \
+fst/lookahead-matcher.h fst/types.h fst/add-on.h fst/label-reachable.h \
+fst/accumulator.h fst/interval-set.h fst/state-reachable.h \
+fst/lookahead-filter.h fst/generic-register.h fst/edit-fst.h \
+fst/replace-util.h fst/icu.h fst/string.h fst/signed-log-weight.h \
+fst/sparse-tuple-weight.h fst/sparse-power-weight.h fst/expectation-weight.h \
+fst/symbol-table-ops.h fst/bi-table.h \
+$(far_include_headers) \
+$(pdt_include_headers) \
+$(script_include_headers)
+
diff --git a/src/include/Makefile.in b/src/include/Makefile.in
new file mode 100644
index 0000000..ab6c28d
--- /dev/null
+++ b/src/include/Makefile.in
@@ -0,0 +1,590 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = src/include
+DIST_COMMON = $(am__nobase_include_HEADERS_DIST) $(srcdir)/Makefile.am \
+ $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_icu.m4 \
+ $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+ $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+ $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h \
+ $(top_builddir)/src/include/fst/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+SOURCES =
+DIST_SOURCES =
+am__nobase_include_HEADERS_DIST = fst/arc.h fst/determinize.h \
+ fst/intersect.h fst/queue.h fst/statesort.h fst/arcfilter.h \
+ fst/dfs-visit.h fst/invert.h fst/randequivalent.h \
+ fst/string-weight.h fst/difference.h \
+ fst/lexicographic-weight.h fst/randgen.h fst/symbol-table.h \
+ fst/arcsort.h fst/encode.h fst/lock.h fst/random-weight.h \
+ fst/synchronize.h fst/epsnormalize.h fst/log.h fst/rational.h \
+ fst/test-properties.h fst/cache.h fst/equal.h fst/arc-map.h \
+ fst/map.h fst/register.h fst/topsort.h fst/closure.h \
+ fst/equivalent.h fst/matcher.h fst/matcher-fst.h fst/relabel.h \
+ fst/union-find.h fst/compact-fst.h fst/expanded-fst.h \
+ fst/minimize.h fst/replace.h fst/union.h fst/compat.h \
+ fst/factor-weight.h fst/state-map.h fst/mutable-fst.h \
+ fst/reverse.h fst/util.h fst/complement.h fst/flags.h \
+ fst/partition.h fst/reweight.h fst/vector-fst.h \
+ fst/compose-filter.h fst/float-weight.h fst/product-weight.h \
+ fst/rmepsilon.h fst/verify.h fst/compose.h fst/fst-decl.h \
+ fst/project.h fst/rmfinalepsilon.h fst/visit.h fst/concat.h \
+ fst/fst.h fst/properties.h fst/shortest-distance.h \
+ fst/weight.h fst/connect.h fst/fstlib.h fst/prune.h \
+ fst/shortest-path.h fst/const-fst.h fst/heap.h fst/push.h \
+ fst/state-table.h fst/slist.h fst/pair-weight.h fst/config.h \
+ fst/tuple-weight.h fst/power-weight.h fst/lookahead-matcher.h \
+ fst/types.h fst/add-on.h fst/label-reachable.h \
+ fst/accumulator.h fst/interval-set.h fst/state-reachable.h \
+ fst/lookahead-filter.h fst/generic-register.h fst/edit-fst.h \
+ fst/replace-util.h fst/icu.h fst/string.h \
+ fst/signed-log-weight.h fst/sparse-tuple-weight.h \
+ fst/sparse-power-weight.h fst/expectation-weight.h \
+ fst/symbol-table-ops.h fst/bi-table.h \
+ fst/extensions/far/compile-strings.h \
+ fst/extensions/far/create.h fst/extensions/far/extract.h \
+ fst/extensions/far/far.h fst/extensions/far/farlib.h \
+ fst/extensions/far/farscript.h fst/extensions/far/info.h \
+ fst/extensions/far/main.h fst/extensions/far/print-strings.h \
+ fst/extensions/far/stlist.h fst/extensions/far/sttable.h \
+ fst/extensions/pdt/collection.h fst/extensions/pdt/compose.h \
+ fst/extensions/pdt/expand.h fst/extensions/pdt/info.h \
+ fst/extensions/pdt/paren.h fst/extensions/pdt/pdt.h \
+ fst/extensions/pdt/pdtlib.h fst/extensions/pdt/pdtscript.h \
+ fst/extensions/pdt/replace.h fst/extensions/pdt/reverse.h \
+ fst/extensions/pdt/shortest-path.h fst/script/arcsort.h \
+ fst/script/arg-packs.h fst/script/closure.h \
+ fst/script/compile-impl.h fst/script/compile.h \
+ fst/script/compose.h fst/script/concat.h fst/script/connect.h \
+ fst/script/convert.h fst/script/decode.h \
+ fst/script/determinize.h fst/script/difference.h \
+ fst/script/draw-impl.h fst/script/draw.h fst/script/encode.h \
+ fst/script/epsnormalize.h fst/script/equal.h \
+ fst/script/equivalent.h fst/script/fst-class.h \
+ fst/script/fstscript.h fst/script/info-impl.h \
+ fst/script/info.h fst/script/intersect.h fst/script/invert.h \
+ fst/script/map.h fst/script/minimize.h fst/script/print-impl.h \
+ fst/script/print.h fst/script/project.h fst/script/prune.h \
+ fst/script/push.h fst/script/randequivalent.h \
+ fst/script/randgen.h fst/script/register.h \
+ fst/script/relabel.h fst/script/replace.h fst/script/reverse.h \
+ fst/script/reweight.h fst/script/rmepsilon.h \
+ fst/script/script-impl.h fst/script/shortest-distance.h \
+ fst/script/shortest-path.h fst/script/symbols.h \
+ fst/script/synchronize.h fst/script/text-io.h \
+ fst/script/topsort.h fst/script/union.h \
+ fst/script/weight-class.h fst/script/fstscript-decl.h \
+ fst/script/verify.h
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__installdirs = "$(DESTDIR)$(includedir)"
+HEADERS = $(nobase_include_HEADERS)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+ICU_CFLAGS = @ICU_CFLAGS@
+ICU_CONFIG = @ICU_CONFIG@
+ICU_CPPFLAGS = @ICU_CPPFLAGS@
+ICU_CXXFLAGS = @ICU_CXXFLAGS@
+ICU_LIBS = @ICU_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+libfstdir = @libfstdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lt_ECHO = @lt_ECHO@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+@HAVE_FAR_TRUE@far_include_headers = fst/extensions/far/compile-strings.h \
+@HAVE_FAR_TRUE@fst/extensions/far/create.h fst/extensions/far/extract.h \
+@HAVE_FAR_TRUE@fst/extensions/far/far.h fst/extensions/far/farlib.h \
+@HAVE_FAR_TRUE@fst/extensions/far/farscript.h fst/extensions/far/info.h \
+@HAVE_FAR_TRUE@fst/extensions/far/main.h fst/extensions/far/print-strings.h \
+@HAVE_FAR_TRUE@fst/extensions/far/stlist.h fst/extensions/far/sttable.h
+
+@HAVE_PDT_TRUE@pdt_include_headers = fst/extensions/pdt/collection.h \
+@HAVE_PDT_TRUE@fst/extensions/pdt/compose.h fst/extensions/pdt/expand.h \
+@HAVE_PDT_TRUE@fst/extensions/pdt/info.h fst/extensions/pdt/paren.h \
+@HAVE_PDT_TRUE@fst/extensions/pdt/pdt.h fst/extensions/pdt/pdtlib.h \
+@HAVE_PDT_TRUE@fst/extensions/pdt/pdtscript.h fst/extensions/pdt/replace.h \
+@HAVE_PDT_TRUE@fst/extensions/pdt/reverse.h fst/extensions/pdt/shortest-path.h
+
+script_include_headers = fst/script/arcsort.h fst/script/arg-packs.h \
+fst/script/closure.h fst/script/compile-impl.h fst/script/compile.h \
+fst/script/compose.h fst/script/concat.h fst/script/connect.h \
+fst/script/convert.h fst/script/decode.h fst/script/determinize.h \
+fst/script/difference.h fst/script/draw-impl.h fst/script/draw.h \
+fst/script/encode.h fst/script/epsnormalize.h fst/script/equal.h \
+fst/script/equivalent.h fst/script/fst-class.h fst/script/fstscript.h \
+fst/script/info-impl.h fst/script/info.h fst/script/intersect.h \
+fst/script/invert.h fst/script/map.h fst/script/minimize.h \
+fst/script/print-impl.h fst/script/print.h fst/script/project.h \
+fst/script/prune.h fst/script/push.h fst/script/randequivalent.h \
+fst/script/randgen.h fst/script/register.h fst/script/relabel.h \
+fst/script/replace.h fst/script/reverse.h fst/script/reweight.h \
+fst/script/rmepsilon.h fst/script/script-impl.h \
+fst/script/shortest-distance.h fst/script/shortest-path.h \
+fst/script/symbols.h fst/script/synchronize.h fst/script/text-io.h \
+fst/script/topsort.h fst/script/union.h fst/script/weight-class.h \
+fst/script/fstscript-decl.h fst/script/verify.h
+
+nobase_include_HEADERS = fst/arc.h fst/determinize.h fst/intersect.h \
+fst/queue.h fst/statesort.h fst/arcfilter.h fst/dfs-visit.h \
+fst/invert.h fst/randequivalent.h fst/string-weight.h \
+fst/difference.h fst/lexicographic-weight.h fst/randgen.h \
+fst/symbol-table.h fst/arcsort.h fst/encode.h fst/lock.h \
+fst/random-weight.h fst/synchronize.h \
+fst/epsnormalize.h fst/log.h fst/rational.h fst/test-properties.h \
+fst/cache.h fst/equal.h fst/arc-map.h fst/map.h fst/register.h fst/topsort.h \
+fst/closure.h fst/equivalent.h fst/matcher.h fst/matcher-fst.h fst/relabel.h \
+fst/union-find.h fst/compact-fst.h fst/expanded-fst.h fst/minimize.h \
+fst/replace.h fst/union.h fst/compat.h fst/factor-weight.h fst/state-map.h \
+fst/mutable-fst.h fst/reverse.h fst/util.h fst/complement.h \
+fst/flags.h fst/partition.h fst/reweight.h fst/vector-fst.h \
+fst/compose-filter.h fst/float-weight.h fst/product-weight.h \
+fst/rmepsilon.h fst/verify.h fst/compose.h fst/fst-decl.h \
+fst/project.h fst/rmfinalepsilon.h fst/visit.h fst/concat.h \
+fst/fst.h fst/properties.h fst/shortest-distance.h fst/weight.h \
+fst/connect.h fst/fstlib.h fst/prune.h fst/shortest-path.h \
+fst/const-fst.h fst/heap.h fst/push.h fst/state-table.h fst/slist.h \
+fst/pair-weight.h fst/config.h fst/tuple-weight.h fst/power-weight.h \
+fst/lookahead-matcher.h fst/types.h fst/add-on.h fst/label-reachable.h \
+fst/accumulator.h fst/interval-set.h fst/state-reachable.h \
+fst/lookahead-filter.h fst/generic-register.h fst/edit-fst.h \
+fst/replace-util.h fst/icu.h fst/string.h fst/signed-log-weight.h \
+fst/sparse-tuple-weight.h fst/sparse-power-weight.h fst/expectation-weight.h \
+fst/symbol-table-ops.h fst/bi-table.h \
+$(far_include_headers) \
+$(pdt_include_headers) \
+$(script_include_headers)
+
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/include/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/include/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+install-nobase_includeHEADERS: $(nobase_include_HEADERS)
+ @$(NORMAL_INSTALL)
+ test -z "$(includedir)" || $(MKDIR_P) "$(DESTDIR)$(includedir)"
+ @list='$(nobase_include_HEADERS)'; test -n "$(includedir)" || list=; \
+ $(am__nobase_list) | while read dir files; do \
+ xfiles=; for file in $$files; do \
+ if test -f "$$file"; then xfiles="$$xfiles $$file"; \
+ else xfiles="$$xfiles $(srcdir)/$$file"; fi; done; \
+ test -z "$$xfiles" || { \
+ test "x$$dir" = x. || { \
+ echo "$(MKDIR_P) '$(DESTDIR)$(includedir)/$$dir'"; \
+ $(MKDIR_P) "$(DESTDIR)$(includedir)/$$dir"; }; \
+ echo " $(INSTALL_HEADER) $$xfiles '$(DESTDIR)$(includedir)/$$dir'"; \
+ $(INSTALL_HEADER) $$xfiles "$(DESTDIR)$(includedir)/$$dir" || exit $$?; }; \
+ done
+
+uninstall-nobase_includeHEADERS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(nobase_include_HEADERS)'; test -n "$(includedir)" || list=; \
+ $(am__nobase_strip_setup); files=`$(am__nobase_strip)`; \
+ test -n "$$files" || exit 0; \
+ echo " ( cd '$(DESTDIR)$(includedir)' && rm -f" $$files ")"; \
+ cd "$(DESTDIR)$(includedir)" && rm -f $$files
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(HEADERS)
+installdirs:
+ for dir in "$(DESTDIR)$(includedir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-nobase_includeHEADERS
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-nobase_includeHEADERS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+ clean-libtool ctags distclean distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-data \
+ install-data-am install-dvi install-dvi-am install-exec \
+ install-exec-am install-html install-html-am install-info \
+ install-info-am install-man install-nobase_includeHEADERS \
+ install-pdf install-pdf-am install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am uninstall-nobase_includeHEADERS
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/include/fst/accumulator.h b/src/include/fst/accumulator.h
new file mode 100644
index 0000000..fcb960c
--- /dev/null
+++ b/src/include/fst/accumulator.h
@@ -0,0 +1,745 @@
+// accumulator.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Classes to accumulate arc weights. Useful for weight lookahead.
+
+#ifndef FST_LIB_ACCUMULATOR_H__
+#define FST_LIB_ACCUMULATOR_H__
+
+#include <algorithm>
+#include <functional>
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <vector>
+using std::vector;
+
+#include <fst/arcfilter.h>
+#include <fst/arcsort.h>
+#include <fst/dfs-visit.h>
+#include <fst/expanded-fst.h>
+#include <fst/replace.h>
+
+namespace fst {
+
+// This class accumulates arc weights using the semiring Plus().
+template <class A>
+class DefaultAccumulator {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ DefaultAccumulator() {}
+
+ DefaultAccumulator(const DefaultAccumulator<A> &acc) {}
+
+ void Init(const Fst<A>& fst, bool copy = false) {}
+
+ void SetState(StateId) {}
+
+ Weight Sum(Weight w, Weight v) {
+ return Plus(w, v);
+ }
+
+ template <class ArcIterator>
+ Weight Sum(Weight w, ArcIterator *aiter, ssize_t begin,
+ ssize_t end) {
+ Weight sum = w;
+ aiter->Seek(begin);
+ for (ssize_t pos = begin; pos < end; aiter->Next(), ++pos)
+ sum = Plus(sum, aiter->Value().weight);
+ return sum;
+ }
+
+ bool Error() const { return false; }
+
+ private:
+ void operator=(const DefaultAccumulator<A> &); // Disallow
+};
+
+
+// This class accumulates arc weights using the log semiring Plus()
+// assuming an arc weight has a WeightConvert specialization to
+// and from log64 weights.
+template <class A>
+class LogAccumulator {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ LogAccumulator() {}
+
+ LogAccumulator(const LogAccumulator<A> &acc) {}
+
+ void Init(const Fst<A>& fst, bool copy = false) {}
+
+ void SetState(StateId) {}
+
+ Weight Sum(Weight w, Weight v) {
+ return LogPlus(w, v);
+ }
+
+ template <class ArcIterator>
+ Weight Sum(Weight w, ArcIterator *aiter, ssize_t begin,
+ ssize_t end) {
+ Weight sum = w;
+ aiter->Seek(begin);
+ for (ssize_t pos = begin; pos < end; aiter->Next(), ++pos)
+ sum = LogPlus(sum, aiter->Value().weight);
+ return sum;
+ }
+
+ bool Error() const { return false; }
+
+ private:
+ double LogPosExp(double x) { return log(1.0F + exp(-x)); }
+
+ Weight LogPlus(Weight w, Weight v) {
+ double f1 = to_log_weight_(w).Value();
+ double f2 = to_log_weight_(v).Value();
+ if (f1 > f2)
+ return to_weight_(f2 - LogPosExp(f1 - f2));
+ else
+ return to_weight_(f1 - LogPosExp(f2 - f1));
+ }
+
+ WeightConvert<Weight, Log64Weight> to_log_weight_;
+ WeightConvert<Log64Weight, Weight> to_weight_;
+
+ void operator=(const LogAccumulator<A> &); // Disallow
+};
+
+
+// Stores shareable data for fast log accumulator copies.
+class FastLogAccumulatorData {
+ public:
+ FastLogAccumulatorData() {}
+
+ vector<double> *Weights() { return &weights_; }
+ vector<ssize_t> *WeightPositions() { return &weight_positions_; }
+ double *WeightEnd() { return &(weights_[weights_.size() - 1]); };
+ int RefCount() const { return ref_count_.count(); }
+ int IncrRefCount() { return ref_count_.Incr(); }
+ int DecrRefCount() { return ref_count_.Decr(); }
+
+ private:
+ // Cummulative weight per state for all states s.t. # of arcs >
+ // arc_limit_ with arcs in order. Special first element per state
+ // being Log64Weight::Zero();
+ vector<double> weights_;
+ // Maps from state to corresponding beginning weight position in
+ // weights_. Position -1 means no pre-computed weights for that
+ // state.
+ vector<ssize_t> weight_positions_;
+ RefCounter ref_count_; // Reference count.
+
+ DISALLOW_COPY_AND_ASSIGN(FastLogAccumulatorData);
+};
+
+
+// This class accumulates arc weights using the log semiring Plus()
+// assuming an arc weight has a WeightConvert specialization to and
+// from log64 weights. The member function Init(fst) has to be called
+// to setup pre-computed weight information.
+template <class A>
+class FastLogAccumulator {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ explicit FastLogAccumulator(ssize_t arc_limit = 20, ssize_t arc_period = 10)
+ : arc_limit_(arc_limit),
+ arc_period_(arc_period),
+ data_(new FastLogAccumulatorData()),
+ error_(false) {}
+
+ FastLogAccumulator(const FastLogAccumulator<A> &acc)
+ : arc_limit_(acc.arc_limit_),
+ arc_period_(acc.arc_period_),
+ data_(acc.data_),
+ error_(acc.error_) {
+ data_->IncrRefCount();
+ }
+
+ ~FastLogAccumulator() {
+ if (!data_->DecrRefCount())
+ delete data_;
+ }
+
+ void SetState(StateId s) {
+ vector<double> &weights = *data_->Weights();
+ vector<ssize_t> &weight_positions = *data_->WeightPositions();
+
+ if (weight_positions.size() <= s) {
+ FSTERROR() << "FastLogAccumulator::SetState: invalid state id.";
+ error_ = true;
+ return;
+ }
+
+ ssize_t pos = weight_positions[s];
+ if (pos >= 0)
+ state_weights_ = &(weights[pos]);
+ else
+ state_weights_ = 0;
+ }
+
+ Weight Sum(Weight w, Weight v) {
+ return LogPlus(w, v);
+ }
+
+ template <class ArcIterator>
+ Weight Sum(Weight w, ArcIterator *aiter, ssize_t begin,
+ ssize_t end) {
+ if (error_) return Weight::NoWeight();
+ Weight sum = w;
+ // Finds begin and end of pre-stored weights
+ ssize_t index_begin = -1, index_end = -1;
+ ssize_t stored_begin = end, stored_end = end;
+ if (state_weights_ != 0) {
+ index_begin = begin > 0 ? (begin - 1)/ arc_period_ + 1 : 0;
+ index_end = end / arc_period_;
+ stored_begin = index_begin * arc_period_;
+ stored_end = index_end * arc_period_;
+ }
+ // Computes sum before pre-stored weights
+ if (begin < stored_begin) {
+ ssize_t pos_end = min(stored_begin, end);
+ aiter->Seek(begin);
+ for (ssize_t pos = begin; pos < pos_end; aiter->Next(), ++pos)
+ sum = LogPlus(sum, aiter->Value().weight);
+ }
+ // Computes sum between pre-stored weights
+ if (stored_begin < stored_end) {
+ sum = LogPlus(sum, LogMinus(state_weights_[index_end],
+ state_weights_[index_begin]));
+ }
+ // Computes sum after pre-stored weights
+ if (stored_end < end) {
+ ssize_t pos_start = max(stored_begin, stored_end);
+ aiter->Seek(pos_start);
+ for (ssize_t pos = pos_start; pos < end; aiter->Next(), ++pos)
+ sum = LogPlus(sum, aiter->Value().weight);
+ }
+ return sum;
+ }
+
+ template <class F>
+ void Init(const F &fst, bool copy = false) {
+ if (copy)
+ return;
+ vector<double> &weights = *data_->Weights();
+ vector<ssize_t> &weight_positions = *data_->WeightPositions();
+ if (!weights.empty() || arc_limit_ < arc_period_) {
+ FSTERROR() << "FastLogAccumulator: initialization error.";
+ error_ = true;
+ return;
+ }
+ weight_positions.reserve(CountStates(fst));
+
+ ssize_t weight_position = 0;
+ for(StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ if (fst.NumArcs(s) >= arc_limit_) {
+ double sum = FloatLimits<double>::kPosInfinity;
+ weight_positions.push_back(weight_position);
+ weights.push_back(sum);
+ ++weight_position;
+ ssize_t narcs = 0;
+ for(ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
+ const A &arc = aiter.Value();
+ sum = LogPlus(sum, arc.weight);
+ // Stores cumulative weight distribution per arc_period_.
+ if (++narcs % arc_period_ == 0) {
+ weights.push_back(sum);
+ ++weight_position;
+ }
+ }
+ } else {
+ weight_positions.push_back(-1);
+ }
+ }
+ }
+
+ bool Error() const { return error_; }
+
+ private:
+ double LogPosExp(double x) {
+ return x == FloatLimits<double>::kPosInfinity ?
+ 0.0 : log(1.0F + exp(-x));
+ }
+
+ double LogMinusExp(double x) {
+ return x == FloatLimits<double>::kPosInfinity ?
+ 0.0 : log(1.0F - exp(-x));
+ }
+
+ Weight LogPlus(Weight w, Weight v) {
+ double f1 = to_log_weight_(w).Value();
+ double f2 = to_log_weight_(v).Value();
+ if (f1 > f2)
+ return to_weight_(f2 - LogPosExp(f1 - f2));
+ else
+ return to_weight_(f1 - LogPosExp(f2 - f1));
+ }
+
+ double LogPlus(double f1, Weight v) {
+ double f2 = to_log_weight_(v).Value();
+ if (f1 == FloatLimits<double>::kPosInfinity)
+ return f2;
+ else if (f1 > f2)
+ return f2 - LogPosExp(f1 - f2);
+ else
+ return f1 - LogPosExp(f2 - f1);
+ }
+
+ Weight LogMinus(double f1, double f2) {
+ if (f1 >= f2) {
+ FSTERROR() << "FastLogAcumulator::LogMinus: f1 >= f2 with f1 = " << f1
+ << " and f2 = " << f2;
+ error_ = true;
+ return Weight::NoWeight();
+ }
+ if (f2 == FloatLimits<double>::kPosInfinity)
+ return to_weight_(f1);
+ else
+ return to_weight_(f1 - LogMinusExp(f2 - f1));
+ }
+
+ WeightConvert<Weight, Log64Weight> to_log_weight_;
+ WeightConvert<Log64Weight, Weight> to_weight_;
+
+ ssize_t arc_limit_; // Minimum # of arcs to pre-compute state
+ ssize_t arc_period_; // Save cumulative weights per 'arc_period_'.
+ bool init_; // Cumulative weights initialized?
+ FastLogAccumulatorData *data_;
+ double *state_weights_;
+ bool error_;
+
+ void operator=(const FastLogAccumulator<A> &); // Disallow
+};
+
+
+// Stores shareable data for cache log accumulator copies.
+// All copies share the same cache.
+template <class A>
+class CacheLogAccumulatorData {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ CacheLogAccumulatorData(bool gc, size_t gc_limit)
+ : cache_gc_(gc), cache_limit_(gc_limit), cache_size_(0) {}
+
+ ~CacheLogAccumulatorData() {
+ for(typename unordered_map<StateId, CacheState>::iterator it = cache_.begin();
+ it != cache_.end();
+ ++it)
+ delete it->second.weights;
+ }
+
+ bool CacheDisabled() const { return cache_gc_ && cache_limit_ == 0; }
+
+ vector<double> *GetWeights(StateId s) {
+ typename unordered_map<StateId, CacheState>::iterator it = cache_.find(s);
+ if (it != cache_.end()) {
+ it->second.recent = true;
+ return it->second.weights;
+ } else {
+ return 0;
+ }
+ }
+
+ void AddWeights(StateId s, vector<double> *weights) {
+ if (cache_gc_ && cache_size_ >= cache_limit_)
+ GC(false);
+ cache_.insert(make_pair(s, CacheState(weights, true)));
+ if (cache_gc_)
+ cache_size_ += weights->capacity() * sizeof(double);
+ }
+
+ int RefCount() const { return ref_count_.count(); }
+ int IncrRefCount() { return ref_count_.Incr(); }
+ int DecrRefCount() { return ref_count_.Decr(); }
+
+ private:
+ // Cached information for a given state.
+ struct CacheState {
+ vector<double>* weights; // Accumulated weights for this state.
+ bool recent; // Has this state been accessed since last GC?
+
+ CacheState(vector<double> *w, bool r) : weights(w), recent(r) {}
+ };
+
+ // Garbage collect: Delete from cache states that have not been
+ // accessed since the last GC ('free_recent = false') until
+ // 'cache_size_' is 2/3 of 'cache_limit_'. If it does not free enough
+ // memory, start deleting recently accessed states.
+ void GC(bool free_recent) {
+ size_t cache_target = (2 * cache_limit_)/3 + 1;
+ typename unordered_map<StateId, CacheState>::iterator it = cache_.begin();
+ while (it != cache_.end() && cache_size_ > cache_target) {
+ CacheState &cs = it->second;
+ if (free_recent || !cs.recent) {
+ cache_size_ -= cs.weights->capacity() * sizeof(double);
+ delete cs.weights;
+ cache_.erase(it++);
+ } else {
+ cs.recent = false;
+ ++it;
+ }
+ }
+ if (!free_recent && cache_size_ > cache_target)
+ GC(true);
+ }
+
+ unordered_map<StateId, CacheState> cache_; // Cache
+ bool cache_gc_; // Enable garbage collection
+ size_t cache_limit_; // # of bytes cached
+ size_t cache_size_; // # of bytes allowed before GC
+ RefCounter ref_count_;
+
+ DISALLOW_COPY_AND_ASSIGN(CacheLogAccumulatorData);
+};
+
+// This class accumulates arc weights using the log semiring Plus()
+// has a WeightConvert specialization to and from log64 weights. It
+// is similar to the FastLogAccumator. However here, the accumulated
+// weights are pre-computed and stored only for the states that are
+// visited. The member function Init(fst) has to be called to setup
+// this accumulator.
+template <class A>
+class CacheLogAccumulator {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ explicit CacheLogAccumulator(ssize_t arc_limit = 10, bool gc = false,
+ size_t gc_limit = 10 * 1024 * 1024)
+ : arc_limit_(arc_limit), fst_(0), data_(
+ new CacheLogAccumulatorData<A>(gc, gc_limit)), s_(kNoStateId),
+ error_(false) {}
+
+ CacheLogAccumulator(const CacheLogAccumulator<A> &acc)
+ : arc_limit_(acc.arc_limit_), fst_(acc.fst_ ? acc.fst_->Copy() : 0),
+ data_(acc.data_), s_(kNoStateId), error_(acc.error_) {
+ data_->IncrRefCount();
+ }
+
+ ~CacheLogAccumulator() {
+ if (fst_)
+ delete fst_;
+ if (!data_->DecrRefCount())
+ delete data_;
+ }
+
+ // Arg 'arc_limit' specifies minimum # of arcs to pre-compute state.
+ void Init(const Fst<A> &fst, bool copy = false) {
+ if (copy) {
+ delete fst_;
+ } else if (fst_) {
+ FSTERROR() << "CacheLogAccumulator: initialization error.";
+ error_ = true;
+ return;
+ }
+ fst_ = fst.Copy();
+ }
+
+ void SetState(StateId s, int depth = 0) {
+ if (s == s_)
+ return;
+ s_ = s;
+
+ if (data_->CacheDisabled() || error_) {
+ weights_ = 0;
+ return;
+ }
+
+ if (!fst_) {
+ FSTERROR() << "CacheLogAccumulator::SetState: incorrectly initialized.";
+ error_ = true;
+ weights_ = 0;
+ return;
+ }
+
+ weights_ = data_->GetWeights(s);
+ if ((weights_ == 0) && (fst_->NumArcs(s) >= arc_limit_)) {
+ weights_ = new vector<double>;
+ weights_->reserve(fst_->NumArcs(s) + 1);
+ weights_->push_back(FloatLimits<double>::kPosInfinity);
+ data_->AddWeights(s, weights_);
+ }
+ }
+
+ Weight Sum(Weight w, Weight v) {
+ return LogPlus(w, v);
+ }
+
+ template <class Iterator>
+ Weight Sum(Weight w, Iterator *aiter, ssize_t begin,
+ ssize_t end) {
+ if (weights_ == 0) {
+ Weight sum = w;
+ aiter->Seek(begin);
+ for (ssize_t pos = begin; pos < end; aiter->Next(), ++pos)
+ sum = LogPlus(sum, aiter->Value().weight);
+ return sum;
+ } else {
+ if (weights_->size() <= end)
+ for (aiter->Seek(weights_->size() - 1);
+ weights_->size() <= end;
+ aiter->Next())
+ weights_->push_back(LogPlus(weights_->back(),
+ aiter->Value().weight));
+ return LogPlus(w, LogMinus((*weights_)[end], (*weights_)[begin]));
+ }
+ }
+
+ template <class Iterator>
+ size_t LowerBound(double w, Iterator *aiter) {
+ if (weights_ != 0) {
+ return lower_bound(weights_->begin() + 1,
+ weights_->end(),
+ w,
+ std::greater<double>())
+ - weights_->begin() - 1;
+ } else {
+ size_t n = 0;
+ double x = FloatLimits<double>::kPosInfinity;
+ for(aiter->Reset(); !aiter->Done(); aiter->Next(), ++n) {
+ x = LogPlus(x, aiter->Value().weight);
+ if (x < w) break;
+ }
+ return n;
+ }
+ }
+
+ bool Error() const { return error_; }
+
+ private:
+ double LogPosExp(double x) {
+ return x == FloatLimits<double>::kPosInfinity ?
+ 0.0 : log(1.0F + exp(-x));
+ }
+
+ double LogMinusExp(double x) {
+ return x == FloatLimits<double>::kPosInfinity ?
+ 0.0 : log(1.0F - exp(-x));
+ }
+
+ Weight LogPlus(Weight w, Weight v) {
+ double f1 = to_log_weight_(w).Value();
+ double f2 = to_log_weight_(v).Value();
+ if (f1 > f2)
+ return to_weight_(f2 - LogPosExp(f1 - f2));
+ else
+ return to_weight_(f1 - LogPosExp(f2 - f1));
+ }
+
+ double LogPlus(double f1, Weight v) {
+ double f2 = to_log_weight_(v).Value();
+ if (f1 == FloatLimits<double>::kPosInfinity)
+ return f2;
+ else if (f1 > f2)
+ return f2 - LogPosExp(f1 - f2);
+ else
+ return f1 - LogPosExp(f2 - f1);
+ }
+
+ Weight LogMinus(double f1, double f2) {
+ if (f1 >= f2) {
+ FSTERROR() << "CacheLogAcumulator::LogMinus: f1 >= f2 with f1 = " << f1
+ << " and f2 = " << f2;
+ error_ = true;
+ return Weight::NoWeight();
+ }
+ if (f2 == FloatLimits<double>::kPosInfinity)
+ return to_weight_(f1);
+ else
+ return to_weight_(f1 - LogMinusExp(f2 - f1));
+ }
+
+ WeightConvert<Weight, Log64Weight> to_log_weight_;
+ WeightConvert<Log64Weight, Weight> to_weight_;
+
+ ssize_t arc_limit_; // Minimum # of arcs to cache a state
+ vector<double> *weights_; // Accumulated weights for cur. state
+ const Fst<A>* fst_; // Input fst
+ CacheLogAccumulatorData<A> *data_; // Cache data
+ StateId s_; // Current state
+ bool error_;
+
+ void operator=(const CacheLogAccumulator<A> &); // Disallow
+};
+
+
+// Stores shareable data for replace accumulator copies.
+template <class Accumulator, class T>
+class ReplaceAccumulatorData {
+ public:
+ typedef typename Accumulator::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef T StateTable;
+ typedef typename T::StateTuple StateTuple;
+
+ ReplaceAccumulatorData() : state_table_(0) {}
+
+ ReplaceAccumulatorData(const vector<Accumulator*> &accumulators)
+ : state_table_(0), accumulators_(accumulators) {}
+
+ ~ReplaceAccumulatorData() {
+ for (size_t i = 0; i < fst_array_.size(); ++i)
+ delete fst_array_[i];
+ for (size_t i = 0; i < accumulators_.size(); ++i)
+ delete accumulators_[i];
+ }
+
+ void Init(const vector<pair<Label, const Fst<Arc>*> > &fst_tuples,
+ const StateTable *state_table) {
+ state_table_ = state_table;
+ accumulators_.resize(fst_tuples.size());
+ for (size_t i = 0; i < accumulators_.size(); ++i) {
+ if (!accumulators_[i])
+ accumulators_[i] = new Accumulator;
+ accumulators_[i]->Init(*(fst_tuples[i].second));
+ fst_array_.push_back(fst_tuples[i].second->Copy());
+ }
+ }
+
+ const StateTuple &GetTuple(StateId s) const {
+ return state_table_->Tuple(s);
+ }
+
+ Accumulator *GetAccumulator(size_t i) { return accumulators_[i]; }
+
+ const Fst<Arc> *GetFst(size_t i) const { return fst_array_[i]; }
+
+ int RefCount() const { return ref_count_.count(); }
+ int IncrRefCount() { return ref_count_.Incr(); }
+ int DecrRefCount() { return ref_count_.Decr(); }
+
+ private:
+ const T * state_table_;
+ vector<Accumulator*> accumulators_;
+ vector<const Fst<Arc>*> fst_array_;
+ RefCounter ref_count_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReplaceAccumulatorData);
+};
+
+// This class accumulates weights in a ReplaceFst. The 'Init' method
+// takes as input the argument used to build the ReplaceFst and the
+// ReplaceFst state table. It uses accumulators of type 'Accumulator'
+// in the underlying FSTs.
+template <class Accumulator,
+ class T = DefaultReplaceStateTable<typename Accumulator::Arc> >
+class ReplaceAccumulator {
+ public:
+ typedef typename Accumulator::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+ typedef T StateTable;
+ typedef typename T::StateTuple StateTuple;
+
+ ReplaceAccumulator()
+ : init_(false), data_(new ReplaceAccumulatorData<Accumulator, T>()),
+ error_(false) {}
+
+ ReplaceAccumulator(const vector<Accumulator*> &accumulators)
+ : init_(false),
+ data_(new ReplaceAccumulatorData<Accumulator, T>(accumulators)),
+ error_(false) {}
+
+ ReplaceAccumulator(const ReplaceAccumulator<Accumulator, T> &acc)
+ : init_(acc.init_), data_(acc.data_), error_(acc.error_) {
+ if (!init_)
+ FSTERROR() << "ReplaceAccumulator: can't copy unintialized accumulator";
+ data_->IncrRefCount();
+ }
+
+ ~ReplaceAccumulator() {
+ if (!data_->DecrRefCount())
+ delete data_;
+ }
+
+ // Does not take ownership of the state table, the state table
+ // is own by the ReplaceFst
+ void Init(const vector<pair<Label, const Fst<Arc>*> > &fst_tuples,
+ const StateTable *state_table) {
+ init_ = true;
+ data_->Init(fst_tuples, state_table);
+ }
+
+ void SetState(StateId s) {
+ if (!init_) {
+ FSTERROR() << "ReplaceAccumulator::SetState: incorrectly initialized.";
+ error_ = true;
+ return;
+ }
+ StateTuple tuple = data_->GetTuple(s);
+ fst_id_ = tuple.fst_id - 1; // Replace FST ID is 1-based
+ data_->GetAccumulator(fst_id_)->SetState(tuple.fst_state);
+ if ((tuple.prefix_id != 0) &&
+ (data_->GetFst(fst_id_)->Final(tuple.fst_state) != Weight::Zero())) {
+ offset_ = 1;
+ offset_weight_ = data_->GetFst(fst_id_)->Final(tuple.fst_state);
+ } else {
+ offset_ = 0;
+ offset_weight_ = Weight::Zero();
+ }
+ }
+
+ Weight Sum(Weight w, Weight v) {
+ if (error_) return Weight::NoWeight();
+ return data_->GetAccumulator(fst_id_)->Sum(w, v);
+ }
+
+ template <class ArcIterator>
+ Weight Sum(Weight w, ArcIterator *aiter, ssize_t begin,
+ ssize_t end) {
+ if (error_) return Weight::NoWeight();
+ Weight sum = begin == end ? Weight::Zero()
+ : data_->GetAccumulator(fst_id_)->Sum(
+ w, aiter, begin ? begin - offset_ : 0, end - offset_);
+ if (begin == 0 && end != 0 && offset_ > 0)
+ sum = Sum(offset_weight_, sum);
+ return sum;
+ }
+
+ bool Error() const { return error_; }
+
+ private:
+ bool init_;
+ ReplaceAccumulatorData<Accumulator, T> *data_;
+ Label fst_id_;
+ size_t offset_;
+ Weight offset_weight_;
+ bool error_;
+
+ void operator=(const ReplaceAccumulator<Accumulator, T> &); // Disallow
+};
+
+} // namespace fst
+
+#endif // FST_LIB_ACCUMULATOR_H__
diff --git a/src/include/fst/add-on.h b/src/include/fst/add-on.h
new file mode 100644
index 0000000..ee21a93
--- /dev/null
+++ b/src/include/fst/add-on.h
@@ -0,0 +1,306 @@
+// add-on.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Fst implementation class to attach an arbitrary object with a
+// read/write method to an FST and its file rep. The FST is given a
+// new type name.
+
+#ifndef FST_LIB_ADD_ON_FST_H__
+#define FST_LIB_ADD_ON_FST_H__
+
+#include <stddef.h>
+#include <string>
+
+#include <fst/fst.h>
+
+
+namespace fst {
+
+// Identifies stream data as an add-on fst.
+static const int32 kAddOnMagicNumber = 446681434;
+
+
+//
+// Some useful add-on objects.
+//
+
+// Nothing to save.
+class NullAddOn {
+ public:
+ NullAddOn() {}
+
+ static NullAddOn *Read(istream &istrm) {
+ return new NullAddOn();
+ };
+
+ bool Write(ostream &ostrm) const { return true; }
+
+ int RefCount() const { return ref_count_.count(); }
+ int IncrRefCount() { return ref_count_.Incr(); }
+ int DecrRefCount() { return ref_count_.Decr(); }
+
+ private:
+ RefCounter ref_count_;
+
+ DISALLOW_COPY_AND_ASSIGN(NullAddOn);
+};
+
+
+// Create a new add-on from a pair of add-ons.
+template <class A1, class A2>
+class AddOnPair {
+ public:
+ // Argument reference count incremented.
+ AddOnPair(A1 *a1, A2 *a2)
+ : a1_(a1), a2_(a2) {
+ if (a1_)
+ a1_->IncrRefCount();
+ if (a2_)
+ a2_->IncrRefCount();
+ }
+
+ ~AddOnPair() {
+ if (a1_ && !a1_->DecrRefCount())
+ delete a1_;
+ if (a2_ && !a2_->DecrRefCount())
+ delete a2_;
+ }
+
+ A1 *First() const { return a1_; }
+ A2 *Second() const { return a2_; }
+
+ static AddOnPair<A1, A2> *Read(istream &istrm) {
+ A1 *a1 = 0;
+ bool have_addon1 = false;
+ ReadType(istrm, &have_addon1);
+ if (have_addon1)
+ a1 = A1::Read(istrm);
+
+ A2 *a2 = 0;
+ bool have_addon2 = false;
+ ReadType(istrm, &have_addon2);
+ if (have_addon2)
+ a2 = A2::Read(istrm);
+
+ AddOnPair<A1, A2> *a = new AddOnPair<A1, A2>(a1, a2);
+ if (a1)
+ a1->DecrRefCount();
+ if (a2)
+ a2->DecrRefCount();
+ return a;
+ };
+
+ bool Write(ostream &ostrm) const {
+ bool have_addon1 = a1_;
+ WriteType(ostrm, have_addon1);
+ if (have_addon1)
+ a1_->Write(ostrm);
+ bool have_addon2 = a2_;
+ WriteType(ostrm, have_addon2);
+ if (have_addon2)
+ a2_->Write(ostrm);
+ return true;
+ }
+
+ int RefCount() const { return ref_count_.count(); }
+
+ int IncrRefCount() {
+ return ref_count_.Incr();
+ }
+
+ int DecrRefCount() {
+ return ref_count_.Decr();
+ }
+
+ private:
+ A1 *a1_;
+ A2 *a2_;
+ RefCounter ref_count_;
+
+ DISALLOW_COPY_AND_ASSIGN(AddOnPair);
+};
+
+
+// Add to an Fst F a type T object. T must have a 'T* Read(istream &)',
+// a 'bool Write(ostream &)' method, and 'int RecCount(), 'int IncrRefCount()'
+// and 'int DecrRefCount()' methods (e.g. 'MatcherData' in matcher-fst.h).
+// The result is a new Fst implemenation with type name 'type'.
+template<class F, class T>
+class AddOnImpl : public FstImpl<typename F::Arc> {
+ public:
+ typedef typename F::Arc Arc;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ using FstImpl<Arc>::SetType;
+ using FstImpl<Arc>::SetProperties;
+ using FstImpl<Arc>::WriteHeader;
+
+ // If 't' is non-zero, its reference count is incremented.
+ AddOnImpl(const F &fst, const string &type, T *t = 0)
+ : fst_(fst), t_(t) {
+ SetType(type);
+ SetProperties(fst_.Properties(kFstProperties, false));
+ if (t_)
+ t_->IncrRefCount();
+ }
+
+ // If 't' is non-zero, its reference count is incremented.
+ AddOnImpl(const Fst<Arc> &fst, const string &type, T *t = 0)
+ : fst_(fst), t_(t) {
+ SetType(type);
+ SetProperties(fst_.Properties(kFstProperties, false));
+ if (t_)
+ t_->IncrRefCount();
+ }
+
+ AddOnImpl(const AddOnImpl<F, T> &impl)
+ : fst_(impl.fst_), t_(impl.t_) {
+ SetType(impl.Type());
+ SetProperties(fst_.Properties(kCopyProperties, false));
+ if (t_)
+ t_->IncrRefCount();
+ }
+
+ ~AddOnImpl() {
+ if (t_ && !t_->DecrRefCount())
+ delete t_;
+ }
+
+ StateId Start() const { return fst_.Start(); }
+ Weight Final(StateId s) const { return fst_.Final(s); }
+ size_t NumArcs(StateId s) const { return fst_.NumArcs(s); }
+
+ size_t NumInputEpsilons(StateId s) const {
+ return fst_.NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) const {
+ return fst_.NumOutputEpsilons(s);
+ }
+
+ size_t NumStates() const { return fst_.NumStates(); }
+
+ static AddOnImpl<F, T> *Read(istream &strm, const FstReadOptions &opts) {
+ FstReadOptions nopts(opts);
+ FstHeader hdr;
+ if (!nopts.header) {
+ hdr.Read(strm, nopts.source);
+ nopts.header = &hdr;
+ }
+ AddOnImpl<F, T> *impl = new AddOnImpl<F, T>(nopts.header->FstType());
+ if (!impl->ReadHeader(strm, nopts, kMinFileVersion, &hdr))
+ return 0;
+ delete impl; // Used here only for checking types.
+
+ int32 magic_number = 0;
+ ReadType(strm, &magic_number); // Ensures this is an add-on Fst.
+ if (magic_number != kAddOnMagicNumber) {
+ LOG(ERROR) << "AddOnImpl::Read: Bad add-on header: " << nopts.source;
+ return 0;
+ }
+
+ FstReadOptions fopts(opts);
+ fopts.header = 0; // Contained header was written out.
+ F *fst = F::Read(strm, fopts);
+ if (!fst)
+ return 0;
+
+ T *t = 0;
+ bool have_addon = false;
+ ReadType(strm, &have_addon);
+ if (have_addon) { // Read add-on object if present.
+ t = T::Read(strm);
+ if (!t)
+ return 0;
+ }
+ impl = new AddOnImpl<F, T>(*fst, nopts.header->FstType(), t);
+ delete fst;
+ if (t)
+ t->DecrRefCount();
+ return impl;
+ }
+
+ bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ FstHeader hdr;
+ FstWriteOptions nopts(opts);
+ nopts.write_isymbols = false; // Let contained FST hold any symbols.
+ nopts.write_osymbols = false;
+ WriteHeader(strm, nopts, kFileVersion, &hdr);
+ WriteType(strm, kAddOnMagicNumber); // Ensures this is an add-on Fst.
+ FstWriteOptions fopts(opts);
+ fopts.write_header = true; // Force writing contained header.
+ if (!fst_.Write(strm, fopts))
+ return false;
+ bool have_addon = t_;
+ WriteType(strm, have_addon);
+ if (have_addon) // Write add-on object if present.
+ t_->Write(strm);
+ return true;
+ }
+
+ void InitStateIterator(StateIteratorData<Arc> *data) const {
+ fst_.InitStateIterator(data);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
+ fst_.InitArcIterator(s, data);
+ }
+
+ F &GetFst() { return fst_; }
+
+ const F &GetFst() const { return fst_; }
+
+ T *GetAddOn() const { return t_; }
+
+ // If 't' is non-zero, its reference count is incremented.
+ void SetAddOn(T *t) {
+ if (t == t_)
+ return;
+ if (t_ && !t_->DecrRefCount())
+ delete t_;
+ t_ = t;
+ if (t_)
+ t_->IncrRefCount();
+ }
+
+ private:
+ explicit AddOnImpl(const string &type) : t_(0) {
+ SetType(type);
+ SetProperties(kExpanded);
+ }
+
+ // Current file format version
+ static const int kFileVersion = 1;
+ // Minimum file format version supported
+ static const int kMinFileVersion = 1;
+
+ F fst_;
+ T *t_;
+
+ void operator=(const AddOnImpl<F, T> &fst); // Disallow
+};
+
+template <class F, class T> const int AddOnImpl<F, T>::kFileVersion;
+template <class F, class T> const int AddOnImpl<F, T>::kMinFileVersion;
+
+
+} // namespace fst
+
+#endif // FST_LIB_ADD_ON_FST_H__
diff --git a/src/include/fst/arc-map.h b/src/include/fst/arc-map.h
new file mode 100644
index 0000000..3055f71
--- /dev/null
+++ b/src/include/fst/arc-map.h
@@ -0,0 +1,1146 @@
+// arc-map.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to map over/transform arcs e.g., change semirings or
+// implement project/invert. Consider using when operation does
+// not change the number of arcs (except possibly superfinal arcs).
+
+#ifndef FST_LIB_ARC_MAP_H__
+#define FST_LIB_ARC_MAP_H__
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <string>
+#include <utility>
+using std::pair; using std::make_pair;
+
+#include <fst/cache.h>
+#include <fst/mutable-fst.h>
+
+
+namespace fst {
+
+// This determines how final weights are mapped.
+enum MapFinalAction {
+ // A final weight is mapped into a final weight. An error
+ // is raised if this is not possible.
+ MAP_NO_SUPERFINAL,
+
+ // A final weight is mapped to an arc to the superfinal state
+ // when the result cannot be represented as a final weight.
+ // The superfinal state will be added only if it is needed.
+ MAP_ALLOW_SUPERFINAL,
+
+ // A final weight is mapped to an arc to the superfinal state
+ // unless the result can be represented as a final weight of weight
+ // Zero(). The superfinal state is always added (if the input is
+ // not the empty Fst).
+ MAP_REQUIRE_SUPERFINAL
+};
+
+// This determines how symbol tables are mapped.
+enum MapSymbolsAction {
+ // Symbols should be cleared in the result by the map.
+ MAP_CLEAR_SYMBOLS,
+
+ // Symbols should be copied from the input FST by the map.
+ MAP_COPY_SYMBOLS,
+
+ // Symbols should not be modified in the result by the map itself.
+ // (They may set by the mapper).
+ MAP_NOOP_SYMBOLS
+};
+
+// ArcMapper Interface - class determinies how arcs and final weights
+// are mapped. Useful for implementing operations that do not change
+// the number of arcs (expect possibly superfinal arcs).
+//
+// class ArcMapper {
+// public:
+// typedef A FromArc;
+// typedef B ToArc;
+//
+// // Maps an arc type A to arc type B.
+// B operator()(const A &arc);
+// // Specifies final action the mapper requires (see above).
+// // The mapper will be passed final weights as arcs of the
+// // form A(0, 0, weight, kNoStateId).
+// MapFinalAction FinalAction() const;
+// // Specifies input symbol table action the mapper requires (see above).
+// MapSymbolsAction InputSymbolsAction() const;
+// // Specifies output symbol table action the mapper requires (see above).
+// MapSymbolsAction OutputSymbolsAction() const;
+// // This specifies the known properties of an Fst mapped by this
+// // mapper. It takes as argument the input Fst's known properties.
+// uint64 Properties(uint64 props) const;
+// };
+//
+// The ArcMap functions and classes below will use the FinalAction()
+// method of the mapper to determine how to treat final weights,
+// e.g. whether to add a superfinal state. They will use the Properties()
+// method to set the result Fst properties.
+//
+// We include a various map versions below. One dimension of
+// variation is whether the mapping mutates its input, writes to a
+// new result Fst, or is an on-the-fly Fst. Another dimension is how
+// we pass the mapper. We allow passing the mapper by pointer
+// for cases that we need to change the state of the user's mapper.
+// This is the case with the encode mapper, which is reused during
+// decoding. We also include map versions that pass the mapper
+// by value or const reference when this suffices.
+
+
+// Maps an arc type A using a mapper function object C, passed
+// by pointer. This version modifies its Fst input.
+template<class A, class C>
+void ArcMap(MutableFst<A> *fst, C* mapper) {
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ fst->SetInputSymbols(0);
+
+ if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ fst->SetOutputSymbols(0);
+
+ if (fst->Start() == kNoStateId)
+ return;
+
+ uint64 props = fst->Properties(kFstProperties, false);
+
+ MapFinalAction final_action = mapper->FinalAction();
+ StateId superfinal = kNoStateId;
+ if (final_action == MAP_REQUIRE_SUPERFINAL) {
+ superfinal = fst->AddState();
+ fst->SetFinal(superfinal, Weight::One());
+ }
+
+ for (StateId s = 0; s < fst->NumStates(); ++s) {
+ for (MutableArcIterator< MutableFst<A> > aiter(fst, s);
+ !aiter.Done(); aiter.Next()) {
+ const A &arc = aiter.Value();
+ aiter.SetValue((*mapper)(arc));
+ }
+
+ switch (final_action) {
+ case MAP_NO_SUPERFINAL:
+ default: {
+ A final_arc = (*mapper)(A(0, 0, fst->Final(s), kNoStateId));
+ if (final_arc.ilabel != 0 || final_arc.olabel != 0) {
+ FSTERROR() << "ArcMap: non-zero arc labels for superfinal arc";
+ fst->SetProperties(kError, kError);
+ }
+
+ fst->SetFinal(s, final_arc.weight);
+ break;
+ }
+ case MAP_ALLOW_SUPERFINAL: {
+ if (s != superfinal) {
+ A final_arc = (*mapper)(A(0, 0, fst->Final(s), kNoStateId));
+ if (final_arc.ilabel != 0 || final_arc.olabel != 0) {
+ // Add a superfinal state if not already done.
+ if (superfinal == kNoStateId) {
+ superfinal = fst->AddState();
+ fst->SetFinal(superfinal, Weight::One());
+ }
+ final_arc.nextstate = superfinal;
+ fst->AddArc(s, final_arc);
+ fst->SetFinal(s, Weight::Zero());
+ } else {
+ fst->SetFinal(s, final_arc.weight);
+ }
+ break;
+ }
+ }
+ case MAP_REQUIRE_SUPERFINAL: {
+ if (s != superfinal) {
+ A final_arc = (*mapper)(A(0, 0, fst->Final(s), kNoStateId));
+ if (final_arc.ilabel != 0 || final_arc.olabel != 0 ||
+ final_arc.weight != Weight::Zero())
+ fst->AddArc(s, A(final_arc.ilabel, final_arc.olabel,
+ final_arc.weight, superfinal));
+ fst->SetFinal(s, Weight::Zero());
+ }
+ break;
+ }
+ }
+ }
+ fst->SetProperties(mapper->Properties(props), kFstProperties);
+}
+
+
+// Maps an arc type A using a mapper function object C, passed
+// by value. This version modifies its Fst input.
+template<class A, class C>
+void ArcMap(MutableFst<A> *fst, C mapper) {
+ ArcMap(fst, &mapper);
+}
+
+
+// Maps an arc type A to an arc type B using mapper function
+// object C, passed by pointer. This version writes the mapped
+// input Fst to an output MutableFst.
+template<class A, class B, class C>
+void ArcMap(const Fst<A> &ifst, MutableFst<B> *ofst, C* mapper) {
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ ofst->DeleteStates();
+
+ if (mapper->InputSymbolsAction() == MAP_COPY_SYMBOLS)
+ ofst->SetInputSymbols(ifst.InputSymbols());
+ else if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ ofst->SetInputSymbols(0);
+
+ if (mapper->OutputSymbolsAction() == MAP_COPY_SYMBOLS)
+ ofst->SetOutputSymbols(ifst.OutputSymbols());
+ else if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ ofst->SetOutputSymbols(0);
+
+ uint64 iprops = ifst.Properties(kCopyProperties, false);
+
+ if (ifst.Start() == kNoStateId) {
+ if (iprops & kError) ofst->SetProperties(kError, kError);
+ return;
+ }
+
+ MapFinalAction final_action = mapper->FinalAction();
+ if (ifst.Properties(kExpanded, false)) {
+ ofst->ReserveStates(CountStates(ifst) +
+ final_action == MAP_NO_SUPERFINAL ? 0 : 1);
+ }
+
+ // Add all states.
+ for (StateIterator< Fst<A> > siter(ifst); !siter.Done(); siter.Next())
+ ofst->AddState();
+
+ StateId superfinal = kNoStateId;
+ if (final_action == MAP_REQUIRE_SUPERFINAL) {
+ superfinal = ofst->AddState();
+ ofst->SetFinal(superfinal, B::Weight::One());
+ }
+ for (StateIterator< Fst<A> > siter(ifst); !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ if (s == ifst.Start())
+ ofst->SetStart(s);
+
+ ofst->ReserveArcs(s, ifst.NumArcs(s));
+ for (ArcIterator< Fst<A> > aiter(ifst, s); !aiter.Done(); aiter.Next())
+ ofst->AddArc(s, (*mapper)(aiter.Value()));
+
+ switch (final_action) {
+ case MAP_NO_SUPERFINAL:
+ default: {
+ B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId));
+ if (final_arc.ilabel != 0 || final_arc.olabel != 0) {
+ FSTERROR() << "ArcMap: non-zero arc labels for superfinal arc";
+ ofst->SetProperties(kError, kError);
+ }
+ ofst->SetFinal(s, final_arc.weight);
+ break;
+ }
+ case MAP_ALLOW_SUPERFINAL: {
+ B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId));
+ if (final_arc.ilabel != 0 || final_arc.olabel != 0) {
+ // Add a superfinal state if not already done.
+ if (superfinal == kNoStateId) {
+ superfinal = ofst->AddState();
+ ofst->SetFinal(superfinal, B::Weight::One());
+ }
+ final_arc.nextstate = superfinal;
+ ofst->AddArc(s, final_arc);
+ ofst->SetFinal(s, B::Weight::Zero());
+ } else {
+ ofst->SetFinal(s, final_arc.weight);
+ }
+ break;
+ }
+ case MAP_REQUIRE_SUPERFINAL: {
+ B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId));
+ if (final_arc.ilabel != 0 || final_arc.olabel != 0 ||
+ final_arc.weight != B::Weight::Zero())
+ ofst->AddArc(s, B(final_arc.ilabel, final_arc.olabel,
+ final_arc.weight, superfinal));
+ ofst->SetFinal(s, B::Weight::Zero());
+ break;
+ }
+ }
+ }
+ uint64 oprops = ofst->Properties(kFstProperties, false);
+ ofst->SetProperties(mapper->Properties(iprops) | oprops, kFstProperties);
+}
+
+// Maps an arc type A to an arc type B using mapper function
+// object C, passed by value. This version writes the mapped input
+// Fst to an output MutableFst.
+template<class A, class B, class C>
+void ArcMap(const Fst<A> &ifst, MutableFst<B> *ofst, C mapper) {
+ ArcMap(ifst, ofst, &mapper);
+}
+
+
+struct ArcMapFstOptions : public CacheOptions {
+ // ArcMapFst default caching behaviour is to do no caching. Most
+ // mappers are cheap and therefore we save memory by not doing
+ // caching.
+ ArcMapFstOptions() : CacheOptions(true, 0) {}
+ ArcMapFstOptions(const CacheOptions& opts) : CacheOptions(opts) {}
+};
+
+
+template <class A, class B, class C> class ArcMapFst;
+
+// Implementation of delayed ArcMapFst.
+template <class A, class B, class C>
+class ArcMapFstImpl : public CacheImpl<B> {
+ public:
+ using FstImpl<B>::SetType;
+ using FstImpl<B>::SetProperties;
+ using FstImpl<B>::SetInputSymbols;
+ using FstImpl<B>::SetOutputSymbols;
+
+ using VectorFstBaseImpl<typename CacheImpl<B>::State>::NumStates;
+
+ using CacheImpl<B>::PushArc;
+ using CacheImpl<B>::HasArcs;
+ using CacheImpl<B>::HasFinal;
+ using CacheImpl<B>::HasStart;
+ using CacheImpl<B>::SetArcs;
+ using CacheImpl<B>::SetFinal;
+ using CacheImpl<B>::SetStart;
+
+ friend class StateIterator< ArcMapFst<A, B, C> >;
+
+ typedef B Arc;
+ typedef typename B::Weight Weight;
+ typedef typename B::StateId StateId;
+
+ ArcMapFstImpl(const Fst<A> &fst, const C &mapper,
+ const ArcMapFstOptions& opts)
+ : CacheImpl<B>(opts),
+ fst_(fst.Copy()),
+ mapper_(new C(mapper)),
+ own_mapper_(true),
+ superfinal_(kNoStateId),
+ nstates_(0) {
+ Init();
+ }
+
+ ArcMapFstImpl(const Fst<A> &fst, C *mapper,
+ const ArcMapFstOptions& opts)
+ : CacheImpl<B>(opts),
+ fst_(fst.Copy()),
+ mapper_(mapper),
+ own_mapper_(false),
+ superfinal_(kNoStateId),
+ nstates_(0) {
+ Init();
+ }
+
+ ArcMapFstImpl(const ArcMapFstImpl<A, B, C> &impl)
+ : CacheImpl<B>(impl),
+ fst_(impl.fst_->Copy(true)),
+ mapper_(new C(*impl.mapper_)),
+ own_mapper_(true),
+ superfinal_(kNoStateId),
+ nstates_(0) {
+ Init();
+ }
+
+ ~ArcMapFstImpl() {
+ delete fst_;
+ if (own_mapper_) delete mapper_;
+ }
+
+ StateId Start() {
+ if (!HasStart())
+ SetStart(FindOState(fst_->Start()));
+ return CacheImpl<B>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ switch (final_action_) {
+ case MAP_NO_SUPERFINAL:
+ default: {
+ B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)),
+ kNoStateId));
+ if (final_arc.ilabel != 0 || final_arc.olabel != 0) {
+ FSTERROR() << "ArcMapFst: non-zero arc labels for superfinal arc";
+ SetProperties(kError, kError);
+ }
+ SetFinal(s, final_arc.weight);
+ break;
+ }
+ case MAP_ALLOW_SUPERFINAL: {
+ if (s == superfinal_) {
+ SetFinal(s, Weight::One());
+ } else {
+ B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)),
+ kNoStateId));
+ if (final_arc.ilabel == 0 && final_arc.olabel == 0)
+ SetFinal(s, final_arc.weight);
+ else
+ SetFinal(s, Weight::Zero());
+ }
+ break;
+ }
+ case MAP_REQUIRE_SUPERFINAL: {
+ SetFinal(s, s == superfinal_ ? Weight::One() : Weight::Zero());
+ break;
+ }
+ }
+ }
+ return CacheImpl<B>::Final(s);
+ }
+
+ size_t NumArcs(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<B>::NumArcs(s);
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<B>::NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<B>::NumOutputEpsilons(s);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) && (fst_->Properties(kError, false) ||
+ (mapper_->Properties(0) & kError)))
+ SetProperties(kError, kError);
+ return FstImpl<Arc>::Properties(mask);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<B> *data) {
+ if (!HasArcs(s))
+ Expand(s);
+ CacheImpl<B>::InitArcIterator(s, data);
+ }
+
+ void Expand(StateId s) {
+ // Add exiting arcs.
+ if (s == superfinal_) { SetArcs(s); return; }
+
+ for (ArcIterator< Fst<A> > aiter(*fst_, FindIState(s));
+ !aiter.Done(); aiter.Next()) {
+ A aarc(aiter.Value());
+ aarc.nextstate = FindOState(aarc.nextstate);
+ const B& barc = (*mapper_)(aarc);
+ PushArc(s, barc);
+ }
+
+ // Check for superfinal arcs.
+ if (!HasFinal(s) || Final(s) == Weight::Zero())
+ switch (final_action_) {
+ case MAP_NO_SUPERFINAL:
+ default:
+ break;
+ case MAP_ALLOW_SUPERFINAL: {
+ B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)),
+ kNoStateId));
+ if (final_arc.ilabel != 0 || final_arc.olabel != 0) {
+ if (superfinal_ == kNoStateId)
+ superfinal_ = nstates_++;
+ final_arc.nextstate = superfinal_;
+ PushArc(s, final_arc);
+ }
+ break;
+ }
+ case MAP_REQUIRE_SUPERFINAL: {
+ B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)),
+ kNoStateId));
+ if (final_arc.ilabel != 0 || final_arc.olabel != 0 ||
+ final_arc.weight != B::Weight::Zero())
+ PushArc(s, B(final_arc.ilabel, final_arc.olabel,
+ final_arc.weight, superfinal_));
+ break;
+ }
+ }
+ SetArcs(s);
+ }
+
+ private:
+ void Init() {
+ SetType("map");
+
+ if (mapper_->InputSymbolsAction() == MAP_COPY_SYMBOLS)
+ SetInputSymbols(fst_->InputSymbols());
+ else if (mapper_->InputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ SetInputSymbols(0);
+
+ if (mapper_->OutputSymbolsAction() == MAP_COPY_SYMBOLS)
+ SetOutputSymbols(fst_->OutputSymbols());
+ else if (mapper_->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ SetOutputSymbols(0);
+
+ if (fst_->Start() == kNoStateId) {
+ final_action_ = MAP_NO_SUPERFINAL;
+ SetProperties(kNullProperties);
+ } else {
+ final_action_ = mapper_->FinalAction();
+ uint64 props = fst_->Properties(kCopyProperties, false);
+ SetProperties(mapper_->Properties(props));
+ if (final_action_ == MAP_REQUIRE_SUPERFINAL)
+ superfinal_ = 0;
+ }
+ }
+
+ // Maps from output state to input state.
+ StateId FindIState(StateId s) {
+ if (superfinal_ == kNoStateId || s < superfinal_)
+ return s;
+ else
+ return s - 1;
+ }
+
+ // Maps from input state to output state.
+ StateId FindOState(StateId is) {
+ StateId os;
+ if (superfinal_ == kNoStateId || is < superfinal_)
+ os = is;
+ else
+ os = is + 1;
+
+ if (os >= nstates_)
+ nstates_ = os + 1;
+
+ return os;
+ }
+
+
+ const Fst<A> *fst_;
+ C* mapper_;
+ bool own_mapper_;
+ MapFinalAction final_action_;
+
+ StateId superfinal_;
+ StateId nstates_;
+
+ void operator=(const ArcMapFstImpl<A, B, C> &); // disallow
+};
+
+
+// Maps an arc type A to an arc type B using Mapper function object
+// C. This version is a delayed Fst.
+template <class A, class B, class C>
+class ArcMapFst : public ImplToFst< ArcMapFstImpl<A, B, C> > {
+ public:
+ friend class ArcIterator< ArcMapFst<A, B, C> >;
+ friend class StateIterator< ArcMapFst<A, B, C> >;
+
+ typedef B Arc;
+ typedef typename B::Weight Weight;
+ typedef typename B::StateId StateId;
+ typedef CacheState<B> State;
+ typedef ArcMapFstImpl<A, B, C> Impl;
+
+ ArcMapFst(const Fst<A> &fst, const C &mapper, const ArcMapFstOptions& opts)
+ : ImplToFst<Impl>(new Impl(fst, mapper, opts)) {}
+
+ ArcMapFst(const Fst<A> &fst, C* mapper, const ArcMapFstOptions& opts)
+ : ImplToFst<Impl>(new Impl(fst, mapper, opts)) {}
+
+ ArcMapFst(const Fst<A> &fst, const C &mapper)
+ : ImplToFst<Impl>(new Impl(fst, mapper, ArcMapFstOptions())) {}
+
+ ArcMapFst(const Fst<A> &fst, C* mapper)
+ : ImplToFst<Impl>(new Impl(fst, mapper, ArcMapFstOptions())) {}
+
+ // See Fst<>::Copy() for doc.
+ ArcMapFst(const ArcMapFst<A, B, C> &fst, bool safe = false)
+ : ImplToFst<Impl>(fst, safe) {}
+
+ // Get a copy of this ArcMapFst. See Fst<>::Copy() for further doc.
+ virtual ArcMapFst<A, B, C> *Copy(bool safe = false) const {
+ return new ArcMapFst<A, B, C>(*this, safe);
+ }
+
+ virtual inline void InitStateIterator(StateIteratorData<B> *data) const;
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<B> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const ArcMapFst<A, B, C> &fst); // disallow
+};
+
+
+// Specialization for ArcMapFst.
+template<class A, class B, class C>
+class StateIterator< ArcMapFst<A, B, C> > : public StateIteratorBase<B> {
+ public:
+ typedef typename B::StateId StateId;
+
+ explicit StateIterator(const ArcMapFst<A, B, C> &fst)
+ : impl_(fst.GetImpl()), siter_(*impl_->fst_), s_(0),
+ superfinal_(impl_->final_action_ == MAP_REQUIRE_SUPERFINAL)
+ { CheckSuperfinal(); }
+
+ bool Done() const { return siter_.Done() && !superfinal_; }
+
+ StateId Value() const { return s_; }
+
+ void Next() {
+ ++s_;
+ if (!siter_.Done()) {
+ siter_.Next();
+ CheckSuperfinal();
+ }
+ else if (superfinal_)
+ superfinal_ = false;
+ }
+
+ void Reset() {
+ s_ = 0;
+ siter_.Reset();
+ superfinal_ = impl_->final_action_ == MAP_REQUIRE_SUPERFINAL;
+ CheckSuperfinal();
+ }
+
+ private:
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ bool Done_() const { return Done(); }
+ StateId Value_() const { return Value(); }
+ void Next_() { Next(); }
+ void Reset_() { Reset(); }
+
+ void CheckSuperfinal() {
+ if (impl_->final_action_ != MAP_ALLOW_SUPERFINAL || superfinal_)
+ return;
+ if (!siter_.Done()) {
+ B final_arc = (*impl_->mapper_)(A(0, 0, impl_->fst_->Final(s_),
+ kNoStateId));
+ if (final_arc.ilabel != 0 || final_arc.olabel != 0)
+ superfinal_ = true;
+ }
+ }
+
+ const ArcMapFstImpl<A, B, C> *impl_;
+ StateIterator< Fst<A> > siter_;
+ StateId s_;
+ bool superfinal_; // true if there is a superfinal state and not done
+
+ DISALLOW_COPY_AND_ASSIGN(StateIterator);
+};
+
+
+// Specialization for ArcMapFst.
+template <class A, class B, class C>
+class ArcIterator< ArcMapFst<A, B, C> >
+ : public CacheArcIterator< ArcMapFst<A, B, C> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const ArcMapFst<A, B, C> &fst, StateId s)
+ : CacheArcIterator< ArcMapFst<A, B, C> >(fst.GetImpl(), s) {
+ if (!fst.GetImpl()->HasArcs(s))
+ fst.GetImpl()->Expand(s);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+template <class A, class B, class C> inline
+void ArcMapFst<A, B, C>::InitStateIterator(StateIteratorData<B> *data)
+ const {
+ data->base = new StateIterator< ArcMapFst<A, B, C> >(*this);
+}
+
+
+//
+// Utility Mappers
+//
+
+// Mapper that returns its input.
+template <class A>
+struct IdentityArcMapper {
+ typedef A FromArc;
+ typedef A ToArc;
+
+ A operator()(const A &arc) const { return arc; }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const { return props; }
+};
+
+
+// Mapper that returns its input with final states redirected to
+// a single super-final state.
+template <class A>
+struct SuperFinalMapper {
+ typedef A FromArc;
+ typedef A ToArc;
+
+ A operator()(const A &arc) const { return arc; }
+
+ MapFinalAction FinalAction() const { return MAP_REQUIRE_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const {
+ return props & kAddSuperFinalProperties;
+ }
+};
+
+
+// Mapper that leaves labels and nextstate unchanged and constructs a new weight
+// from the underlying value of the arc weight. Requires that there is a
+// WeightConvert class specialization that converts the weights.
+template <class A, class B>
+class WeightConvertMapper {
+ public:
+ typedef A FromArc;
+ typedef B ToArc;
+ typedef typename FromArc::Weight FromWeight;
+ typedef typename ToArc::Weight ToWeight;
+
+ ToArc operator()(const FromArc &arc) const {
+ return ToArc(arc.ilabel, arc.olabel,
+ convert_weight_(arc.weight), arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const { return props; }
+
+ private:
+ WeightConvert<FromWeight, ToWeight> convert_weight_;
+};
+
+// Non-precision-changing weight conversions.
+// Consider using more efficient Cast (fst.h) instead.
+typedef WeightConvertMapper<StdArc, LogArc> StdToLogMapper;
+typedef WeightConvertMapper<LogArc, StdArc> LogToStdMapper;
+
+// Precision-changing weight conversions.
+typedef WeightConvertMapper<StdArc, Log64Arc> StdToLog64Mapper;
+typedef WeightConvertMapper<LogArc, Log64Arc> LogToLog64Mapper;
+typedef WeightConvertMapper<Log64Arc, StdArc> Log64ToStdMapper;
+typedef WeightConvertMapper<Log64Arc, LogArc> Log64ToLogMapper;
+
+// Mapper from A to GallicArc<A>.
+template <class A, StringType S = STRING_LEFT>
+struct ToGallicMapper {
+ typedef A FromArc;
+ typedef GallicArc<A, S> ToArc;
+
+ typedef StringWeight<typename A::Label, S> SW;
+ typedef typename A::Weight AW;
+ typedef typename GallicArc<A, S>::Weight GW;
+
+ ToArc operator()(const A &arc) const {
+ // 'Super-final' arc.
+ if (arc.nextstate == kNoStateId && arc.weight != AW::Zero())
+ return ToArc(0, 0, GW(SW::One(), arc.weight), kNoStateId);
+ // 'Super-non-final' arc.
+ else if (arc.nextstate == kNoStateId)
+ return ToArc(0, 0, GW(SW::Zero(), arc.weight), kNoStateId);
+ // Epsilon label.
+ else if (arc.olabel == 0)
+ return ToArc(arc.ilabel, arc.ilabel,
+ GW(SW::One(), arc.weight), arc.nextstate);
+ // Regular label.
+ else
+ return ToArc(arc.ilabel, arc.ilabel,
+ GW(SW(arc.olabel), arc.weight), arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const {
+ return ProjectProperties(props, true) & kWeightInvariantProperties;
+ }
+};
+
+
+// Mapper from GallicArc<A> to A.
+template <class A, StringType S = STRING_LEFT>
+struct FromGallicMapper {
+ typedef GallicArc<A, S> FromArc;
+ typedef A ToArc;
+
+ typedef typename A::Label Label;
+ typedef StringWeight<Label, S> SW;
+ typedef typename A::Weight AW;
+ typedef typename GallicArc<A, S>::Weight GW;
+
+ FromGallicMapper(Label superfinal_label = 0)
+ : superfinal_label_(superfinal_label), error_(false) {}
+
+ A operator()(const FromArc &arc) const {
+ // 'Super-non-final' arc.
+ if (arc.nextstate == kNoStateId && arc.weight == GW::Zero())
+ return A(arc.ilabel, 0, AW::Zero(), kNoStateId);
+
+ SW w1 = arc.weight.Value1();
+ AW w2 = arc.weight.Value2();
+ StringWeightIterator<Label, S> iter1(w1);
+
+ Label l = w1.Size() == 1 ? iter1.Value() : 0;
+
+ if (l == kStringInfinity || l == kStringBad ||
+ arc.ilabel != arc.olabel || w1.Size() > 1) {
+ FSTERROR() << "FromGallicMapper: unrepesentable weight";
+ error_ = true;
+ }
+
+ if (arc.ilabel == 0 && l != 0 && arc.nextstate == kNoStateId)
+ return A(superfinal_label_, l, w2, arc.nextstate);
+ else
+ return A(arc.ilabel, l, w2, arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_ALLOW_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;}
+
+ uint64 Properties(uint64 inprops) const {
+ uint64 outprops = inprops & kOLabelInvariantProperties &
+ kWeightInvariantProperties & kAddSuperFinalProperties;
+ if (error_)
+ outprops |= kError;
+ return outprops;
+ }
+
+ private:
+ Label superfinal_label_;
+ mutable bool error_;
+};
+
+
+// Mapper from GallicArc<A> to A.
+template <class A, StringType S = STRING_LEFT>
+struct GallicToNewSymbolsMapper {
+ typedef GallicArc<A, S> FromArc;
+ typedef A ToArc;
+
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef StringWeight<Label, S> SW;
+ typedef typename A::Weight AW;
+ typedef typename GallicArc<A, S>::Weight GW;
+
+ GallicToNewSymbolsMapper(MutableFst<ToArc> *fst)
+ : fst_(fst), lmax_(0), osymbols_(fst->OutputSymbols()),
+ isymbols_(0), error_(false) {
+ fst_->DeleteStates();
+ state_ = fst_->AddState();
+ fst_->SetStart(state_);
+ fst_->SetFinal(state_, AW::One());
+ if (osymbols_) {
+ string name = osymbols_->Name() + "_from_gallic";
+ fst_->SetInputSymbols(new SymbolTable(name));
+ isymbols_ = fst_->MutableInputSymbols();
+ isymbols_->AddSymbol(osymbols_->Find((int64) 0), 0);
+ } else {
+ fst_->SetInputSymbols(0);
+ }
+ }
+
+ A operator()(const FromArc &arc) {
+ // 'Super-non-final' arc.
+ if (arc.nextstate == kNoStateId && arc.weight == GW::Zero())
+ return A(arc.ilabel, 0, AW::Zero(), kNoStateId);
+
+ SW w1 = arc.weight.Value1();
+ AW w2 = arc.weight.Value2();
+ Label l;
+
+ if (w1.Size() == 0) {
+ l = 0;
+ } else {
+ typename Map::iterator miter = map_.find(w1);
+ if (miter != map_.end()) {
+ l = (*miter).second;
+ } else {
+ l = ++lmax_;
+ map_.insert(pair<const SW, Label>(w1, l));
+ StringWeightIterator<Label, S> iter1(w1);
+ StateId n;
+ string s;
+ for(size_t i = 0, p = state_;
+ i < w1.Size();
+ ++i, iter1.Next(), p = n) {
+ n = i == w1.Size() - 1 ? state_ : fst_->AddState();
+ fst_->AddArc(p, ToArc(i ? 0 : l, iter1.Value(), AW::One(), n));
+ if (isymbols_) {
+ if (i) s = s + "_";
+ s = s + osymbols_->Find(iter1.Value());
+ }
+ }
+ if (isymbols_)
+ isymbols_->AddSymbol(s, l);
+ }
+ }
+
+ if (l == kStringInfinity || l == kStringBad || arc.ilabel != arc.olabel) {
+ FSTERROR() << "GallicToNewSymbolMapper: unrepesentable weight";
+ error_ = true;
+ }
+
+ return A(arc.ilabel, l, w2, arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_ALLOW_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; }
+
+ uint64 Properties(uint64 inprops) const {
+ uint64 outprops = inprops & kOLabelInvariantProperties &
+ kWeightInvariantProperties & kAddSuperFinalProperties;
+ if (error_)
+ outprops |= kError;
+ return outprops;
+ }
+
+ private:
+ class StringKey {
+ public:
+ size_t operator()(const SW &x) const {
+ return x.Hash();
+ }
+ };
+
+ typedef unordered_map<SW, Label, StringKey> Map;
+
+ MutableFst<ToArc> *fst_;
+ Map map_;
+ Label lmax_;
+ StateId state_;
+ const SymbolTable *osymbols_;
+ SymbolTable *isymbols_;
+ mutable bool error_;
+
+ DISALLOW_COPY_AND_ASSIGN(GallicToNewSymbolsMapper);
+};
+
+
+// Mapper to add a constant to all weights.
+template <class A>
+struct PlusMapper {
+ typedef A FromArc;
+ typedef A ToArc;
+ typedef typename A::Weight Weight;
+
+ explicit PlusMapper(Weight w) : weight_(w) {}
+
+ A operator()(const A &arc) const {
+ if (arc.weight == Weight::Zero())
+ return arc;
+ Weight w = Plus(arc.weight, weight_);
+ return A(arc.ilabel, arc.olabel, w, arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const {
+ return props & kWeightInvariantProperties;
+ }
+
+ private:
+
+
+
+ Weight weight_;
+};
+
+
+// Mapper to (right) multiply a constant to all weights.
+template <class A>
+struct TimesMapper {
+ typedef A FromArc;
+ typedef A ToArc;
+ typedef typename A::Weight Weight;
+
+ explicit TimesMapper(Weight w) : weight_(w) {}
+
+ A operator()(const A &arc) const {
+ if (arc.weight == Weight::Zero())
+ return arc;
+ Weight w = Times(arc.weight, weight_);
+ return A(arc.ilabel, arc.olabel, w, arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const {
+ return props & kWeightInvariantProperties;
+ }
+
+ private:
+ Weight weight_;
+};
+
+
+// Mapper to reciprocate all non-Zero() weights.
+template <class A>
+struct InvertWeightMapper {
+ typedef A FromArc;
+ typedef A ToArc;
+ typedef typename A::Weight Weight;
+
+ A operator()(const A &arc) const {
+ if (arc.weight == Weight::Zero())
+ return arc;
+ Weight w = Divide(Weight::One(), arc.weight);
+ return A(arc.ilabel, arc.olabel, w, arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const {
+ return props & kWeightInvariantProperties;
+ }
+};
+
+
+// Mapper to map all non-Zero() weights to One().
+template <class A, class B = A>
+struct RmWeightMapper {
+ typedef A FromArc;
+ typedef B ToArc;
+ typedef typename FromArc::Weight FromWeight;
+ typedef typename ToArc::Weight ToWeight;
+
+ B operator()(const A &arc) const {
+ ToWeight w = arc.weight != FromWeight::Zero() ?
+ ToWeight::One() : ToWeight::Zero();
+ return B(arc.ilabel, arc.olabel, w, arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const {
+ return (props & kWeightInvariantProperties) | kUnweighted;
+ }
+};
+
+
+// Mapper to quantize all weights.
+template <class A, class B = A>
+struct QuantizeMapper {
+ typedef A FromArc;
+ typedef B ToArc;
+ typedef typename FromArc::Weight FromWeight;
+ typedef typename ToArc::Weight ToWeight;
+
+ QuantizeMapper() : delta_(kDelta) {}
+
+ explicit QuantizeMapper(float d) : delta_(d) {}
+
+ B operator()(const A &arc) const {
+ ToWeight w = arc.weight.Quantize(delta_);
+ return B(arc.ilabel, arc.olabel, w, arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const {
+ return props & kWeightInvariantProperties;
+ }
+
+ private:
+ float delta_;
+};
+
+
+// Mapper from A to B under the assumption:
+// B::Weight = A::Weight::ReverseWeight
+// B::Label == A::Label
+// B::StateId == A::StateId
+// The weight is reversed, while the label and nextstate preserved
+// in the mapping.
+template <class A, class B>
+struct ReverseWeightMapper {
+ typedef A FromArc;
+ typedef B ToArc;
+
+ B operator()(const A &arc) const {
+ return B(arc.ilabel, arc.olabel, arc.weight.Reverse(), arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const { return props; }
+};
+
+} // namespace fst
+
+#endif // FST_LIB_ARC_MAP_H__
diff --git a/src/include/fst/arc.h b/src/include/fst/arc.h
new file mode 100644
index 0000000..56086c9
--- /dev/null
+++ b/src/include/fst/arc.h
@@ -0,0 +1,306 @@
+// arc.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+//
+// Commonly used Fst arc types.
+
+#ifndef FST_LIB_ARC_H__
+#define FST_LIB_ARC_H__
+
+#include <string>
+
+
+#include <fst/expectation-weight.h>
+#include <fst/float-weight.h>
+#include <fst/lexicographic-weight.h>
+#include <fst/power-weight.h>
+#include <fst/product-weight.h>
+#include <fst/signed-log-weight.h>
+#include <fst/sparse-power-weight.h>
+#include <iostream>
+#include <fstream>
+#include <fst/string-weight.h>
+
+
+namespace fst {
+
+template <class W>
+class ArcTpl {
+ public:
+ typedef W Weight;
+ typedef int Label;
+ typedef int StateId;
+
+ ArcTpl(Label i, Label o, const Weight& w, StateId s)
+ : ilabel(i), olabel(o), weight(w), nextstate(s) {}
+
+ ArcTpl() {}
+
+ static const string &Type(void) {
+ static const string type =
+ (Weight::Type() == "tropical") ? "standard" : Weight::Type();
+ return type;
+ }
+
+ Label ilabel;
+ Label olabel;
+ Weight weight;
+ StateId nextstate;
+};
+
+typedef ArcTpl<TropicalWeight> StdArc;
+typedef ArcTpl<LogWeight> LogArc;
+typedef ArcTpl<Log64Weight> Log64Arc;
+typedef ArcTpl<SignedLogWeight> SignedLogArc;
+typedef ArcTpl<SignedLog64Weight> SignedLog64Arc;
+typedef ArcTpl<MinMaxWeight> MinMaxArc;
+
+
+// Arc with integer labels and state Ids and string weights.
+template <StringType S = STRING_LEFT>
+class StringArc {
+ public:
+ typedef int Label;
+ typedef StringWeight<int, S> Weight;
+ typedef int StateId;
+
+ StringArc(Label i, Label o, Weight w, StateId s)
+ : ilabel(i), olabel(o), weight(w), nextstate(s) {}
+
+ StringArc() {}
+
+ static const string &Type() { // Arc type name
+ static const string type =
+ S == STRING_LEFT ? "standard_string" :
+ (S == STRING_RIGHT ? "right_standard_string" :
+ (S == STRING_LEFT_RESTRICT ? "restricted_string" :
+ "right_restricted_string"));
+ return type;
+ }
+
+ Label ilabel; // Transition input label
+ Label olabel; // Transition output label
+ Weight weight; // Transition weight
+ StateId nextstate; // Transition destination state
+};
+
+
+// Arc with label and state Id type the same as template arg and with
+// weights over the Gallic semiring w.r.t the output labels and weights of A.
+template <class A, StringType S = STRING_LEFT>
+struct GallicArc {
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef GallicWeight<Label, typename A::Weight, S> Weight;
+
+ GallicArc() {}
+
+ GallicArc(Label i, Label o, Weight w, StateId s)
+ : ilabel(i), olabel(o), weight(w), nextstate(s) {}
+
+ GallicArc(const A &arc)
+ : ilabel(arc.ilabel), olabel(arc.ilabel),
+ weight(arc.olabel, arc.weight), nextstate(arc.nextstate) {}
+
+ static const string &Type() { // Arc type name
+ static const string type =
+ (S == STRING_LEFT ? "gallic_" :
+ (S == STRING_RIGHT ? "right_gallic_" :
+ (S == STRING_LEFT_RESTRICT ? "restricted_gallic_" :
+ "right_restricted_gallic_"))) + A::Type();
+ return type;
+ }
+
+ Label ilabel; // Transition input label
+ Label olabel; // Transition output label
+ Weight weight; // Transition weight
+ StateId nextstate; // Transition destination state
+};
+
+
+// Arc with the reverse of the weight found in its template arg.
+template <class A> struct ReverseArc {
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight AWeight;
+ typedef typename AWeight::ReverseWeight Weight;
+ typedef typename A::StateId StateId;
+
+ ReverseArc(Label i, Label o, Weight w, StateId s)
+ : ilabel(i), olabel(o), weight(w), nextstate(s) {}
+
+ ReverseArc() {}
+
+ static const string &Type() { // Arc type name
+ static const string type = "reverse_" + Arc::Type();
+ return type;
+ }
+
+ Label ilabel; // Transition input label
+ Label olabel; // Transition output label
+ Weight weight; // Transition weight
+ StateId nextstate; // Transition destination state
+};
+
+
+// Arc with integer labels and state Ids and lexicographic weights.
+template<class W1, class W2>
+struct LexicographicArc {
+ typedef int Label;
+ typedef LexicographicWeight<W1, W2> Weight;
+ typedef int StateId;
+
+ LexicographicArc(Label i, Label o, Weight w, StateId s)
+ : ilabel(i), olabel(o), weight(w), nextstate(s) {}
+
+ LexicographicArc() {}
+
+ static const string &Type() { // Arc type name
+ static const string type = Weight::Type();
+ return type;
+ }
+
+ Label ilabel; // Transition input label
+ Label olabel; // Transition output label
+ Weight weight; // Transition weight
+ StateId nextstate; // Transition destination state
+};
+
+
+// Arc with integer labels and state Ids and product weights.
+template<class W1, class W2>
+struct ProductArc {
+ typedef int Label;
+ typedef ProductWeight<W1, W2> Weight;
+ typedef int StateId;
+
+ ProductArc(Label i, Label o, Weight w, StateId s)
+ : ilabel(i), olabel(o), weight(w), nextstate(s) {}
+
+ ProductArc() {}
+
+ static const string &Type() { // Arc type name
+ static const string type = Weight::Type();
+ return type;
+ }
+
+ Label ilabel; // Transition input label
+ Label olabel; // Transition output label
+ Weight weight; // Transition weight
+ StateId nextstate; // Transition destination state
+};
+
+
+// Arc with label and state Id type the same as first template arg and with
+// weights over the n-th cartesian power of the weight type of the
+// template arg.
+template <class A, unsigned int n>
+struct PowerArc {
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef PowerWeight<typename A::Weight, n> Weight;
+
+ PowerArc() {}
+
+ PowerArc(Label i, Label o, Weight w, StateId s)
+ : ilabel(i), olabel(o), weight(w), nextstate(s) {}
+
+ static const string &Type() { // Arc type name
+ static string type;
+ if (type.empty()) {
+ string power;
+ Int64ToStr(n, &power);
+ type = A::Type() + "_^" + power;
+ }
+ return type;
+ }
+
+ Label ilabel; // Transition input label
+ Label olabel; // Transition output label
+ Weight weight; // Transition weight
+ StateId nextstate; // Transition destination state
+};
+
+
+// Arc with label and state Id type the same as first template arg and with
+// weights over the arbitrary cartesian power of the weight type.
+template <class A, class K = int>
+struct SparsePowerArc {
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef SparsePowerWeight<typename A::Weight, K> Weight;
+
+ SparsePowerArc() {}
+
+ SparsePowerArc(Label i, Label o, Weight w, StateId s)
+ : ilabel(i), olabel(o), weight(w), nextstate(s) {}
+
+ static const string &Type() { // Arc type name
+ static string type;
+ if (type.empty()) { type = A::Type() + "_^n"; }
+ if(sizeof(K) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(8 * sizeof(K), &size);
+ type += "_" + size;
+ }
+ return type;
+ }
+
+ Label ilabel; // Transition input label
+ Label olabel; // Transition output label
+ Weight weight; // Transition weight
+ StateId nextstate; // Transition destination state
+};
+
+
+// Arc with label and state Id type the same as first template arg and with
+// expectation weight over the first template arg weight type and the
+// second template arg.
+template <class A, class X2>
+struct ExpectationArc {
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight X1;
+ typedef ExpectationWeight<X1, X2> Weight;
+
+ ExpectationArc() {}
+
+ ExpectationArc(Label i, Label o, Weight w, StateId s)
+ : ilabel(i), olabel(o), weight(w), nextstate(s) {}
+
+ static const string &Type() { // Arc type name
+ static string type;
+ if (type.empty()) {
+ type = "expectation_" + A::Type() + "_" + X2::Type();
+ }
+ return type;
+ }
+
+ Label ilabel; // Transition input label
+ Label olabel; // Transition output label
+ Weight weight; // Transition weight
+ StateId nextstate; // Transition destination state
+};
+
+} // namespace fst
+
+#endif // FST_LIB_ARC_H__
diff --git a/src/include/fst/arcfilter.h b/src/include/fst/arcfilter.h
new file mode 100644
index 0000000..179dc2c
--- /dev/null
+++ b/src/include/fst/arcfilter.h
@@ -0,0 +1,99 @@
+// arcfilter.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Function objects to restrict which arcs are traversed in an FST.
+
+#ifndef FST_LIB_ARCFILTER_H__
+#define FST_LIB_ARCFILTER_H__
+
+
+#include <fst/fst.h>
+#include <fst/util.h>
+
+
+namespace fst {
+
+// True for all arcs.
+template <class A>
+class AnyArcFilter {
+public:
+ bool operator()(const A &arc) const { return true; }
+};
+
+
+// True for (input/output) epsilon arcs.
+template <class A>
+class EpsilonArcFilter {
+public:
+ bool operator()(const A &arc) const {
+ return arc.ilabel == 0 && arc.olabel == 0;
+ }
+};
+
+
+// True for input epsilon arcs.
+template <class A>
+class InputEpsilonArcFilter {
+public:
+ bool operator()(const A &arc) const {
+ return arc.ilabel == 0;
+ }
+};
+
+
+// True for output epsilon arcs.
+template <class A>
+class OutputEpsilonArcFilter {
+public:
+ bool operator()(const A &arc) const {
+ return arc.olabel == 0;
+ }
+};
+
+
+// True if specified labels match (don't match) when keep_match is
+// true (false).
+template <class A>
+class MultiLabelArcFilter {
+public:
+ typedef typename A::Label Label;
+
+ MultiLabelArcFilter(bool match_input = true, bool keep_match = true)
+ : match_input_(match_input),
+ keep_match_(keep_match) {}
+
+
+ bool operator()(const A &arc) const {
+ Label label = match_input_ ? arc.ilabel : arc.olabel;
+ bool match = labels_.Find(label) != labels_.End();
+ return keep_match_ ? match : !match;
+ }
+
+ void AddLabel(Label label) {
+ labels_.Insert(label);
+ }
+
+private:
+ CompactSet<Label, kNoLabel> labels_;
+ bool match_input_;
+ bool keep_match_;
+};
+
+} // namespace fst
+
+#endif // FST_LIB_ARCFILTER_H__
diff --git a/src/include/fst/arcsort.h b/src/include/fst/arcsort.h
new file mode 100644
index 0000000..38f4f95
--- /dev/null
+++ b/src/include/fst/arcsort.h
@@ -0,0 +1,203 @@
+// arcsort.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Functions and classes to sort arcs in an FST.
+
+#ifndef FST_LIB_ARCSORT_H__
+#define FST_LIB_ARCSORT_H__
+
+#include <algorithm>
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+#include <fst/state-map.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+template <class Arc, class Compare>
+class ArcSortMapper {
+ public:
+ typedef Arc FromArc;
+ typedef Arc ToArc;
+
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ ArcSortMapper(const Fst<Arc> &fst, const Compare &comp)
+ : fst_(fst), comp_(comp), i_(0) {}
+
+ // Allows updating Fst argument; pass only if changed.
+ ArcSortMapper(const ArcSortMapper<Arc, Compare> &mapper,
+ const Fst<Arc> *fst = 0)
+ : fst_(fst ? *fst : mapper.fst_), comp_(mapper.comp_), i_(0) {}
+
+ StateId Start() { return fst_.Start(); }
+ Weight Final(StateId s) const { return fst_.Final(s); }
+
+ void SetState(StateId s) {
+ i_ = 0;
+ arcs_.clear();
+ arcs_.reserve(fst_.NumArcs(s));
+ for (ArcIterator< Fst<Arc> > aiter(fst_, s); !aiter.Done(); aiter.Next())
+ arcs_.push_back(aiter.Value());
+ sort(arcs_.begin(), arcs_.end(), comp_);
+ }
+
+ bool Done() const { return i_ >= arcs_.size(); }
+ const Arc &Value() const { return arcs_[i_]; }
+ void Next() { ++i_; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+ uint64 Properties(uint64 props) const { return comp_.Properties(props); }
+
+ private:
+ const Fst<Arc> &fst_;
+ const Compare &comp_;
+ vector<Arc> arcs_;
+ ssize_t i_; // current arc position
+
+ void operator=(const ArcSortMapper<Arc, Compare> &); // disallow
+};
+
+
+// Sorts the arcs in an FST according to function object 'comp' of
+// type Compare. This version modifies its input. Comparison function
+// objects ILabelCompare and OLabelCompare are provived by the
+// library. In general, Compare must meet the requirements for an STL
+// sort comparision function object. It must also have a member
+// Properties(uint64) that specifies the known properties of the
+// sorted FST; it takes as argument the input FST's known properties
+// before the sort.
+//
+// Complexity:
+// - Time: O(V D log D)
+// - Space: O(D)
+// where V = # of states and D = maximum out-degree.
+template<class Arc, class Compare>
+void ArcSort(MutableFst<Arc> *fst, Compare comp) {
+ ArcSortMapper<Arc, Compare> mapper(*fst, comp);
+ StateMap(fst, mapper);
+}
+
+typedef CacheOptions ArcSortFstOptions;
+
+// Sorts the arcs in an FST according to function object 'comp' of
+// type Compare. This version is a delayed Fst. Comparsion function
+// objects ILabelCompare and OLabelCompare are provided by the
+// library. In general, Compare must meet the requirements for an STL
+// comparision function object (e.g. as used for STL sort). It must
+// also have a member Properties(uint64) that specifies the known
+// properties of the sorted FST; it takes as argument the input FST's
+// known properties.
+//
+// Complexity:
+// - Time: O(v d log d)
+// - Space: O(d)
+// where v = # of states visited, d = maximum out-degree of states
+// visited. Constant time and space to visit an input state is assumed
+// and exclusive of caching.
+template <class A, class C>
+class ArcSortFst : public StateMapFst<A, A, ArcSortMapper<A, C> > {
+ public:
+ typedef A Arc;
+ typedef ArcSortMapper<A, C> M;
+
+ ArcSortFst(const Fst<A> &fst, const C &comp)
+ : StateMapFst<A, A, M>(fst, ArcSortMapper<A, C>(fst, comp)) {}
+
+ ArcSortFst(const Fst<A> &fst, const C &comp, const ArcSortFstOptions &opts)
+ : StateMapFst<A, A, M>(fst, ArcSortMapper<A, C>(fst, comp), opts) {}
+
+ // See Fst<>::Copy() for doc.
+ ArcSortFst(const ArcSortFst<A, C> &fst, bool safe = false)
+ : StateMapFst<A, A, M>(fst, safe) {}
+
+ // Get a copy of this ArcSortFst. See Fst<>::Copy() for further doc.
+ virtual ArcSortFst<A, C> *Copy(bool safe = false) const {
+ return new ArcSortFst(*this, safe);
+ }
+};
+
+
+// Specialization for ArcSortFst.
+template <class A, class C>
+class StateIterator< ArcSortFst<A, C> >
+ : public StateIterator< StateMapFst<A, A, ArcSortMapper<A, C> > > {
+ public:
+ explicit StateIterator(const ArcSortFst<A, C> &fst)
+ : StateIterator< StateMapFst<A, A, ArcSortMapper<A, C> > >(fst) {}
+};
+
+
+// Specialization for ArcSortFst.
+template <class A, class C>
+class ArcIterator< ArcSortFst<A, C> >
+ : public ArcIterator< StateMapFst<A, A, ArcSortMapper<A, C> > > {
+ public:
+ ArcIterator(const ArcSortFst<A, C> &fst, typename A::StateId s)
+ : ArcIterator< StateMapFst<A, A, ArcSortMapper<A, C> > >(fst, s) {}
+};
+
+
+// Compare class for comparing input labels of arcs.
+template<class A> class ILabelCompare {
+ public:
+ bool operator() (A arc1, A arc2) const {
+ return arc1.ilabel < arc2.ilabel;
+ }
+
+ uint64 Properties(uint64 props) const {
+ return (props & kArcSortProperties) | kILabelSorted |
+ (props & kAcceptor ? kOLabelSorted : 0);
+ }
+};
+
+
+// Compare class for comparing output labels of arcs.
+template<class A> class OLabelCompare {
+ public:
+ bool operator() (const A &arc1, const A &arc2) const {
+ return arc1.olabel < arc2.olabel;
+ }
+
+ uint64 Properties(uint64 props) const {
+ return (props & kArcSortProperties) | kOLabelSorted |
+ (props & kAcceptor ? kILabelSorted : 0);
+ }
+};
+
+
+// Useful aliases when using StdArc.
+template<class C> class StdArcSortFst : public ArcSortFst<StdArc, C> {
+ public:
+ typedef StdArc Arc;
+ typedef C Compare;
+};
+
+typedef ILabelCompare<StdArc> StdILabelCompare;
+
+typedef OLabelCompare<StdArc> StdOLabelCompare;
+
+} // namespace fst
+
+#endif // FST_LIB_ARCSORT_H__
diff --git a/src/include/fst/bi-table.h b/src/include/fst/bi-table.h
new file mode 100644
index 0000000..dbb436c
--- /dev/null
+++ b/src/include/fst/bi-table.h
@@ -0,0 +1,396 @@
+// bi-table.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Classes for representing a bijective mapping between an arbitrary entry
+// of type T and a signed integral ID.
+
+#ifndef FST_LIB_BI_TABLE_H__
+#define FST_LIB_BI_TABLE_H__
+
+#include <deque>
+#include <vector>
+using std::vector;
+
+namespace fst {
+
+// BI TABLES - these determine a bijective mapping between an
+// arbitrary entry of type T and an signed integral ID of type I. The IDs are
+// allocated starting from 0 in order.
+//
+// template <class I, class T>
+// class BiTable {
+// public:
+//
+// // Required constructors.
+// BiTable();
+//
+// // Lookup integer ID from entry. If it doesn't exist, then add it.
+// I FindId(const T &entry);
+// // Lookup entry from integer ID.
+// const T &FindEntry(I) const;
+// // # of stored entries.
+// I Size() const;
+// };
+
+// An implementation using a hash map for the entry to ID mapping.
+// The entry T must have == defined and the default constructor
+// must produce an entry that will never be seen. H is the hash function.
+template <class I, class T, class H>
+class HashBiTable {
+ public:
+
+ HashBiTable() {
+ T empty_entry;
+ }
+
+ I FindId(const T &entry) {
+ I &id_ref = entry2id_[entry];
+ if (id_ref == 0) { // T not found; store and assign it a new ID.
+ id2entry_.push_back(entry);
+ id_ref = id2entry_.size();
+ }
+ return id_ref - 1; // NB: id_ref = ID + 1
+ }
+
+ const T &FindEntry(I s) const {
+ return id2entry_[s];
+ }
+
+ I Size() const { return id2entry_.size(); }
+
+ private:
+ unordered_map<T, I, H> entry2id_;
+ vector<T> id2entry_;
+
+ DISALLOW_COPY_AND_ASSIGN(HashBiTable);
+};
+
+
+// An implementation using a hash set for the entry to ID
+// mapping. The hash set holds 'keys' which are either the ID
+// or kCurrentKey. These keys can be mapped to entrys either by
+// looking up in the entry vector or, if kCurrentKey, in current_entry_
+// member. The hash and key equality functions map to entries first.
+// The entry T must have == defined and the default constructor
+// must produce a entry that will never be seen. H is the hash
+// function.
+template <class I, class T, class H>
+class CompactHashBiTable {
+ public:
+ friend class HashFunc;
+ friend class HashEqual;
+
+ CompactHashBiTable()
+ : hash_func_(*this),
+ hash_equal_(*this),
+ keys_(0, hash_func_, hash_equal_) {
+ }
+
+ // Reserves space for table_size elements.
+ explicit CompactHashBiTable(size_t table_size)
+ : hash_func_(*this),
+ hash_equal_(*this),
+ keys_(table_size, hash_func_, hash_equal_) {
+ id2entry_.reserve(table_size);
+ }
+
+ I FindId(const T &entry) {
+ current_entry_ = &entry;
+ typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey);
+ if (it == keys_.end()) {
+ I key = id2entry_.size();
+ id2entry_.push_back(entry);
+ keys_.insert(key);
+ return key;
+ } else {
+ return *it;
+ }
+ }
+
+ const T &FindEntry(I s) const { return id2entry_[s]; }
+ I Size() const { return id2entry_.size(); }
+
+ private:
+ static const I kEmptyKey; // -1
+ static const I kCurrentKey; // -2
+
+ class HashFunc {
+ public:
+ HashFunc(const CompactHashBiTable &ht) : ht_(&ht) {}
+
+ size_t operator()(I k) const { return hf(ht_->Key2T(k)); }
+ private:
+ const CompactHashBiTable *ht_;
+ H hf;
+ };
+
+ class HashEqual {
+ public:
+ HashEqual(const CompactHashBiTable &ht) : ht_(&ht) {}
+
+ bool operator()(I k1, I k2) const {
+ return ht_->Key2T(k1) == ht_->Key2T(k2);
+ }
+ private:
+ const CompactHashBiTable *ht_;
+ };
+
+ typedef unordered_set<I, HashFunc, HashEqual> KeyHashSet;
+
+ const T &Key2T(I k) const {
+ if (k == kEmptyKey)
+ return empty_entry_;
+ else if (k == kCurrentKey)
+ return *current_entry_;
+ else
+ return id2entry_[k];
+ }
+
+ HashFunc hash_func_;
+ HashEqual hash_equal_;
+ KeyHashSet keys_;
+ vector<T> id2entry_;
+ const T empty_entry_;
+ const T *current_entry_;
+
+ DISALLOW_COPY_AND_ASSIGN(CompactHashBiTable);
+};
+
+template <class I, class T, class H>
+const I CompactHashBiTable<I, T, H>::kEmptyKey = -1;
+
+template <class I, class T, class H>
+const I CompactHashBiTable<I, T, H>::kCurrentKey = -2;
+
+
+// An implementation using a vector for the entry to ID mapping.
+// It is passed a function object FP that should fingerprint entries
+// uniquely to an integer that can used as a vector index. Normally,
+// VectorBiTable constructs the FP object. The user can instead
+// pass in this object; in that case, VectorBiTable takes its
+// ownership.
+template <class I, class T, class FP>
+class VectorBiTable {
+ public:
+ explicit VectorBiTable(FP *fp = 0) : fp_(fp ? fp : new FP()) {}
+
+ ~VectorBiTable() { delete fp_; }
+
+ I FindId(const T &entry) {
+ ssize_t fp = (*fp_)(entry);
+ if (fp >= fp2id_.size())
+ fp2id_.resize(fp + 1);
+ I &id_ref = fp2id_[fp];
+ if (id_ref == 0) { // T not found; store and assign it a new ID.
+ id2entry_.push_back(entry);
+ id_ref = id2entry_.size();
+ }
+ return id_ref - 1; // NB: id_ref = ID + 1
+ }
+
+ const T &FindEntry(I s) const { return id2entry_[s]; }
+
+ I Size() const { return id2entry_.size(); }
+
+ const FP &Fingerprint() const { return *fp_; }
+
+ private:
+ FP *fp_;
+ vector<I> fp2id_;
+ vector<T> id2entry_;
+
+ DISALLOW_COPY_AND_ASSIGN(VectorBiTable);
+};
+
+
+// An implementation using a vector and a compact hash table. The
+// selecting functor S returns true for entries to be hashed in the
+// vector. The fingerprinting functor FP returns a unique fingerprint
+// for each entry to be hashed in the vector (these need to be
+// suitable for indexing in a vector). The hash functor H is used when
+// hashing entry into the compact hash table.
+template <class I, class T, class S, class FP, class H>
+class VectorHashBiTable {
+ public:
+ friend class HashFunc;
+ friend class HashEqual;
+
+ VectorHashBiTable(S *s, FP *fp, H *h,
+ size_t vector_size = 0,
+ size_t entry_size = 0)
+ : selector_(s),
+ fp_(fp),
+ h_(h),
+ hash_func_(*this),
+ hash_equal_(*this),
+ keys_(0, hash_func_, hash_equal_) {
+ if (vector_size)
+ fp2id_.reserve(vector_size);
+ if (entry_size)
+ id2entry_.reserve(entry_size);
+ }
+
+ ~VectorHashBiTable() {
+ delete selector_;
+ delete fp_;
+ delete h_;
+ }
+
+ I FindId(const T &entry) {
+ if ((*selector_)(entry)) { // Use the vector if 'selector_(entry) == true'
+ uint64 fp = (*fp_)(entry);
+ if (fp2id_.size() <= fp)
+ fp2id_.resize(fp + 1, 0);
+ if (fp2id_[fp] == 0) {
+ id2entry_.push_back(entry);
+ fp2id_[fp] = id2entry_.size();
+ }
+ return fp2id_[fp] - 1; // NB: assoc_value = ID + 1
+ } else { // Use the hash table otherwise.
+ current_entry_ = &entry;
+ typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey);
+ if (it == keys_.end()) {
+ I key = id2entry_.size();
+ id2entry_.push_back(entry);
+ keys_.insert(key);
+ return key;
+ } else {
+ return *it;
+ }
+ }
+ }
+
+ const T &FindEntry(I s) const {
+ return id2entry_[s];
+ }
+
+ I Size() const { return id2entry_.size(); }
+
+ const S &Selector() const { return *selector_; }
+
+ const FP &Fingerprint() const { return *fp_; }
+
+ const H &Hash() const { return *h_; }
+
+ private:
+ static const I kEmptyKey;
+ static const I kCurrentKey;
+
+ class HashFunc {
+ public:
+ HashFunc(const VectorHashBiTable &ht) : ht_(&ht) {}
+
+ size_t operator()(I k) const { return (*(ht_->h_))(ht_->Key2Entry(k)); }
+ private:
+ const VectorHashBiTable *ht_;
+ };
+
+ class HashEqual {
+ public:
+ HashEqual(const VectorHashBiTable &ht) : ht_(&ht) {}
+
+ bool operator()(I k1, I k2) const {
+ return ht_->Key2Entry(k1) == ht_->Key2Entry(k2);
+ }
+ private:
+ const VectorHashBiTable *ht_;
+ };
+
+ typedef unordered_set<I, HashFunc, HashEqual> KeyHashSet;
+
+ const T &Key2Entry(I k) const {
+ if (k == kEmptyKey)
+ return empty_entry_;
+ else if (k == kCurrentKey)
+ return *current_entry_;
+ else
+ return id2entry_[k];
+ }
+
+
+ S *selector_; // Returns true if entry hashed into vector
+ FP *fp_; // Fingerprint used when hashing entry into vector
+ H *h_; // Hash function used when hashing entry into hash_set
+
+ vector<T> id2entry_; // Maps state IDs to entry
+ vector<I> fp2id_; // Maps entry fingerprints to IDs
+
+ // Compact implementation of the hash table mapping entrys to
+ // state IDs using the hash function 'h_'
+ HashFunc hash_func_;
+ HashEqual hash_equal_;
+ KeyHashSet keys_;
+ const T empty_entry_;
+ const T *current_entry_;
+
+ DISALLOW_COPY_AND_ASSIGN(VectorHashBiTable);
+};
+
+template <class I, class T, class S, class FP, class H>
+const I VectorHashBiTable<I, T, S, FP, H>::kEmptyKey = -1;
+
+template <class I, class T, class S, class FP, class H>
+const I VectorHashBiTable<I, T, S, FP, H>::kCurrentKey = -2;
+
+
+// An implementation using a hash map for the entry to ID
+// mapping. This version permits erasing of s. The entry T
+// must have == defined and its default constructor must produce a
+// entry that will never be seen. F is the hash function.
+template <class I, class T, class F>
+class ErasableBiTable {
+ public:
+ ErasableBiTable() : first_(0) {}
+
+ I FindId(const T &entry) {
+ I &id_ref = entry2id_[entry];
+ if (id_ref == 0) { // T not found; store and assign it a new ID.
+ id2entry_.push_back(entry);
+ id_ref = id2entry_.size() + first_;
+ }
+ return id_ref - 1; // NB: id_ref = ID + 1
+ }
+
+ const T &FindEntry(I s) const { return id2entry_[s - first_]; }
+
+ I Size() const { return id2entry_.size(); }
+
+ void Erase(I s) {
+ T &entry = id2entry_[s - first_];
+ typename unordered_map<T, I, F>::iterator it =
+ entry2id_.find(entry);
+ entry2id_.erase(it);
+ id2entry_[s - first_] = empty_entry_;
+ while (!id2entry_.empty() && id2entry_.front() == empty_entry_) {
+ id2entry_.pop_front();
+ ++first_;
+ }
+ }
+
+ private:
+ unordered_map<T, I, F> entry2id_;
+ deque<T> id2entry_;
+ const T empty_entry_;
+ I first_; // I of first element in the deque;
+
+ DISALLOW_COPY_AND_ASSIGN(ErasableBiTable);
+};
+
+} // namespace fst
+
+#endif // FST_LIB_BI_TABLE_H__
diff --git a/src/include/fst/cache.h b/src/include/fst/cache.h
new file mode 100644
index 0000000..a6a92d4
--- /dev/null
+++ b/src/include/fst/cache.h
@@ -0,0 +1,738 @@
+// cache.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// An Fst implementation that caches FST elements of a delayed
+// computation.
+
+#ifndef FST_LIB_CACHE_H__
+#define FST_LIB_CACHE_H__
+
+#include <vector>
+using std::vector;
+#include <list>
+
+#include <fst/vector-fst.h>
+
+
+DECLARE_bool(fst_default_cache_gc);
+DECLARE_int64(fst_default_cache_gc_limit);
+
+namespace fst {
+
+struct CacheOptions {
+ bool gc; // enable GC
+ size_t gc_limit; // # of bytes allowed before GC
+
+ CacheOptions(bool g, size_t l) : gc(g), gc_limit(l) {}
+ CacheOptions()
+ : gc(FLAGS_fst_default_cache_gc),
+ gc_limit(FLAGS_fst_default_cache_gc_limit) {}
+};
+
+// A CacheStateAllocator allocates and frees CacheStates
+// template <class S>
+// struct CacheStateAllocator {
+// S *Allocate(StateId s);
+// void Free(S *state, StateId s);
+// };
+//
+
+// A simple allocator class, can be overridden as needed,
+// maintains a single entry cache.
+template <class S>
+struct DefaultCacheStateAllocator {
+ typedef typename S::Arc::StateId StateId;
+
+ DefaultCacheStateAllocator() : mru_(NULL) { }
+
+ ~DefaultCacheStateAllocator() {
+ delete mru_;
+ }
+
+ S *Allocate(StateId s) {
+ if (mru_) {
+ S *state = mru_;
+ mru_ = NULL;
+ state->Reset();
+ return state;
+ }
+ return new S();
+ }
+
+ void Free(S *state, StateId s) {
+ if (mru_) {
+ delete mru_;
+ }
+ mru_ = state;
+ }
+
+ private:
+ S *mru_;
+};
+
+// VectorState but additionally has a flags data member (see
+// CacheState below). This class is used to cache FST elements with
+// the flags used to indicate what has been cached. Use HasStart()
+// HasFinal(), and HasArcs() to determine if cached and SetStart(),
+// SetFinal(), AddArc(), (or PushArc() and SetArcs()) to cache. Note you
+// must set the final weight even if the state is non-final to mark it as
+// cached. If the 'gc' option is 'false', cached items have the extent
+// of the FST - minimizing computation. If the 'gc' option is 'true',
+// garbage collection of states (not in use in an arc iterator) is
+// performed, in a rough approximation of LRU order, when 'gc_limit'
+// bytes is reached - controlling memory use. When 'gc_limit' is 0,
+// special optimizations apply - minimizing memory use.
+
+template <class S, class C = DefaultCacheStateAllocator<S> >
+class CacheBaseImpl : public VectorFstBaseImpl<S> {
+ public:
+ typedef S State;
+ typedef C Allocator;
+ typedef typename State::Arc Arc;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ using FstImpl<Arc>::Type;
+ using FstImpl<Arc>::Properties;
+ using FstImpl<Arc>::SetProperties;
+ using VectorFstBaseImpl<State>::NumStates;
+ using VectorFstBaseImpl<State>::AddState;
+ using VectorFstBaseImpl<State>::SetState;
+
+ explicit CacheBaseImpl(C *allocator = 0)
+ : cache_start_(false), nknown_states_(0), min_unexpanded_state_id_(0),
+ cache_first_state_id_(kNoStateId), cache_first_state_(0),
+ cache_gc_(FLAGS_fst_default_cache_gc), cache_size_(0),
+ cache_limit_(FLAGS_fst_default_cache_gc_limit > kMinCacheLimit ||
+ FLAGS_fst_default_cache_gc_limit == 0 ?
+ FLAGS_fst_default_cache_gc_limit : kMinCacheLimit) {
+ allocator_ = allocator ? allocator : new C();
+ }
+
+ explicit CacheBaseImpl(const CacheOptions &opts, C *allocator = 0)
+ : cache_start_(false), nknown_states_(0),
+ min_unexpanded_state_id_(0), cache_first_state_id_(kNoStateId),
+ cache_first_state_(0), cache_gc_(opts.gc), cache_size_(0),
+ cache_limit_(opts.gc_limit > kMinCacheLimit || opts.gc_limit == 0 ?
+ opts.gc_limit : kMinCacheLimit) {
+ allocator_ = allocator ? allocator : new C();
+ }
+
+ // Preserve gc parameters, but initially cache nothing.
+ CacheBaseImpl(const CacheBaseImpl &impl)
+ : cache_start_(false), nknown_states_(0),
+ min_unexpanded_state_id_(0), cache_first_state_id_(kNoStateId),
+ cache_first_state_(0), cache_gc_(impl.cache_gc_), cache_size_(0),
+ cache_limit_(impl.cache_limit_) {
+ allocator_ = new C();
+ }
+
+ ~CacheBaseImpl() {
+ allocator_->Free(cache_first_state_, cache_first_state_id_);
+ delete allocator_;
+ }
+
+ // Gets a state from its ID; state must exist.
+ const S *GetState(StateId s) const {
+ if (s == cache_first_state_id_)
+ return cache_first_state_;
+ else
+ return VectorFstBaseImpl<S>::GetState(s);
+ }
+
+ // Gets a state from its ID; state must exist.
+ S *GetState(StateId s) {
+ if (s == cache_first_state_id_)
+ return cache_first_state_;
+ else
+ return VectorFstBaseImpl<S>::GetState(s);
+ }
+
+ // Gets a state from its ID; return 0 if it doesn't exist.
+ const S *CheckState(StateId s) const {
+ if (s == cache_first_state_id_)
+ return cache_first_state_;
+ else if (s < NumStates())
+ return VectorFstBaseImpl<S>::GetState(s);
+ else
+ return 0;
+ }
+
+ // Gets a state from its ID; add it if necessary.
+ S *ExtendState(StateId s) {
+ if (s == cache_first_state_id_) {
+ return cache_first_state_; // Return 1st cached state
+ } else if (cache_limit_ == 0 && cache_first_state_id_ == kNoStateId) {
+ cache_first_state_id_ = s; // Remember 1st cached state
+ cache_first_state_ = allocator_->Allocate(s);
+ return cache_first_state_;
+ } else if (cache_first_state_id_ != kNoStateId &&
+ cache_first_state_->ref_count == 0) {
+ // With Default allocator, the Free and Allocate will reuse the same S*.
+ allocator_->Free(cache_first_state_, cache_first_state_id_);
+ cache_first_state_id_ = s;
+ cache_first_state_ = allocator_->Allocate(s);
+ return cache_first_state_; // Return 1st cached state
+ } else {
+ while (NumStates() <= s) // Add state to main cache
+ AddState(0);
+ if (!VectorFstBaseImpl<S>::GetState(s)) {
+ SetState(s, allocator_->Allocate(s));
+ if (cache_first_state_id_ != kNoStateId) { // Forget 1st cached state
+ while (NumStates() <= cache_first_state_id_)
+ AddState(0);
+ SetState(cache_first_state_id_, cache_first_state_);
+ if (cache_gc_) {
+ cache_states_.push_back(cache_first_state_id_);
+ cache_size_ += sizeof(S) +
+ cache_first_state_->arcs.capacity() * sizeof(Arc);
+ }
+ cache_limit_ = kMinCacheLimit;
+ cache_first_state_id_ = kNoStateId;
+ cache_first_state_ = 0;
+ }
+ if (cache_gc_) {
+ cache_states_.push_back(s);
+ cache_size_ += sizeof(S);
+ if (cache_size_ > cache_limit_)
+ GC(s, false);
+ }
+ }
+ S *state = VectorFstBaseImpl<S>::GetState(s);
+ return state;
+ }
+ }
+
+ void SetStart(StateId s) {
+ VectorFstBaseImpl<S>::SetStart(s);
+ cache_start_ = true;
+ if (s >= nknown_states_)
+ nknown_states_ = s + 1;
+ }
+
+ void SetFinal(StateId s, Weight w) {
+ S *state = ExtendState(s);
+ state->final = w;
+ state->flags |= kCacheFinal | kCacheRecent | kCacheModified;
+ }
+
+ // AddArc adds a single arc to state s and does incremental cache
+ // book-keeping. For efficiency, prefer PushArc and SetArcs below
+ // when possible.
+ void AddArc(StateId s, const Arc &arc) {
+ S *state = ExtendState(s);
+ state->arcs.push_back(arc);
+ if (arc.ilabel == 0) {
+ ++state->niepsilons;
+ }
+ if (arc.olabel == 0) {
+ ++state->noepsilons;
+ }
+ const Arc *parc = state->arcs.empty() ? 0 : &(state->arcs.back());
+ SetProperties(AddArcProperties(Properties(), s, arc, parc));
+ state->flags |= kCacheModified;
+ if (cache_gc_ && s != cache_first_state_id_) {
+ cache_size_ += sizeof(Arc);
+ if (cache_size_ > cache_limit_)
+ GC(s, false);
+ }
+ }
+
+ // Adds a single arc to state s but delays cache book-keeping.
+ // SetArcs must be called when all PushArc calls at a state are
+ // complete. Do not mix with calls to AddArc.
+ void PushArc(StateId s, const Arc &arc) {
+ S *state = ExtendState(s);
+ state->arcs.push_back(arc);
+ }
+
+ // Marks arcs of state s as cached and does cache book-keeping after all
+ // calls to PushArc have been completed. Do not mix with calls to AddArc.
+ void SetArcs(StateId s) {
+ S *state = ExtendState(s);
+ vector<Arc> &arcs = state->arcs;
+ state->niepsilons = state->noepsilons = 0;
+ for (size_t a = 0; a < arcs.size(); ++a) {
+ const Arc &arc = arcs[a];
+ if (arc.nextstate >= nknown_states_)
+ nknown_states_ = arc.nextstate + 1;
+ if (arc.ilabel == 0)
+ ++state->niepsilons;
+ if (arc.olabel == 0)
+ ++state->noepsilons;
+ }
+ ExpandedState(s);
+ state->flags |= kCacheArcs | kCacheRecent | kCacheModified;
+ if (cache_gc_ && s != cache_first_state_id_) {
+ cache_size_ += arcs.capacity() * sizeof(Arc);
+ if (cache_size_ > cache_limit_)
+ GC(s, false);
+ }
+ };
+
+ void ReserveArcs(StateId s, size_t n) {
+ S *state = ExtendState(s);
+ state->arcs.reserve(n);
+ }
+
+ void DeleteArcs(StateId s, size_t n) {
+ S *state = ExtendState(s);
+ const vector<Arc> &arcs = GetState(s)->arcs;
+ for (size_t i = 0; i < n; ++i) {
+ size_t j = arcs.size() - i - 1;
+ if (arcs[j].ilabel == 0)
+ --GetState(s)->niepsilons;
+ if (arcs[j].olabel == 0)
+ --GetState(s)->noepsilons;
+ }
+ state->arcs.resize(arcs.size() - n);
+ SetProperties(DeleteArcsProperties(Properties()));
+ state->flags |= kCacheModified;
+ }
+
+ void DeleteArcs(StateId s) {
+ S *state = ExtendState(s);
+ state->niepsilons = 0;
+ state->noepsilons = 0;
+ state->arcs.clear();
+ SetProperties(DeleteArcsProperties(Properties()));
+ state->flags |= kCacheModified;
+ }
+
+ // Is the start state cached?
+ bool HasStart() const {
+ if (!cache_start_ && Properties(kError))
+ cache_start_ = true;
+ return cache_start_;
+ }
+
+ // Is the final weight of state s cached?
+ bool HasFinal(StateId s) const {
+ const S *state = CheckState(s);
+ if (state && state->flags & kCacheFinal) {
+ state->flags |= kCacheRecent;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ // Are arcs of state s cached?
+ bool HasArcs(StateId s) const {
+ const S *state = CheckState(s);
+ if (state && state->flags & kCacheArcs) {
+ state->flags |= kCacheRecent;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ Weight Final(StateId s) const {
+ const S *state = GetState(s);
+ return state->final;
+ }
+
+ size_t NumArcs(StateId s) const {
+ const S *state = GetState(s);
+ return state->arcs.size();
+ }
+
+ size_t NumInputEpsilons(StateId s) const {
+ const S *state = GetState(s);
+ return state->niepsilons;
+ }
+
+ size_t NumOutputEpsilons(StateId s) const {
+ const S *state = GetState(s);
+ return state->noepsilons;
+ }
+
+ // Provides information needed for generic arc iterator.
+ void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
+ const S *state = GetState(s);
+ data->base = 0;
+ data->narcs = state->arcs.size();
+ data->arcs = data->narcs > 0 ? &(state->arcs[0]) : 0;
+ data->ref_count = &(state->ref_count);
+ ++(*data->ref_count);
+ }
+
+ // Number of known states.
+ StateId NumKnownStates() const { return nknown_states_; }
+
+ // Update number of known states taking in account the existence of state s.
+ void UpdateNumKnownStates(StateId s) {
+ if (s >= nknown_states_)
+ nknown_states_ = s + 1;
+ }
+
+ // Find the mininum never-expanded state Id
+ StateId MinUnexpandedState() const {
+ while (min_unexpanded_state_id_ < expanded_states_.size() &&
+ expanded_states_[min_unexpanded_state_id_])
+ ++min_unexpanded_state_id_;
+ return min_unexpanded_state_id_;
+ }
+
+ // Removes from cache_states_ and uncaches (not referenced-counted)
+ // states that have not been accessed since the last GC until
+ // cache_limit_/3 bytes are uncached. If that fails to free enough,
+ // recurs uncaching recently visited states as well. If still
+ // unable to free enough memory, then widens cache_limit_.
+ void GC(StateId current, bool free_recent) {
+ if (!cache_gc_)
+ return;
+ VLOG(2) << "CacheImpl: Enter GC: object = " << Type() << "(" << this
+ << "), free recently cached = " << free_recent
+ << ", cache size = " << cache_size_
+ << ", cache limit = " << cache_limit_ << "\n";
+ typename list<StateId>::iterator siter = cache_states_.begin();
+
+ size_t cache_target = (2 * cache_limit_)/3 + 1;
+ while (siter != cache_states_.end()) {
+ StateId s = *siter;
+ S* state = VectorFstBaseImpl<S>::GetState(s);
+ if (cache_size_ > cache_target && state->ref_count == 0 &&
+ (free_recent || !(state->flags & kCacheRecent)) && s != current) {
+ cache_size_ -= sizeof(S) + state->arcs.capacity() * sizeof(Arc);
+ allocator_->Free(state, s);
+ SetState(s, 0);
+ cache_states_.erase(siter++);
+ } else {
+ state->flags &= ~kCacheRecent;
+ ++siter;
+ }
+ }
+ if (!free_recent && cache_size_ > cache_target) {
+ GC(current, true);
+ } else {
+ while (cache_size_ > cache_target) {
+ cache_limit_ *= 2;
+ cache_target *= 2;
+ }
+ }
+ VLOG(2) << "CacheImpl: Exit GC: object = " << Type() << "(" << this
+ << "), free recently cached = " << free_recent
+ << ", cache size = " << cache_size_
+ << ", cache limit = " << cache_limit_ << "\n";
+ }
+
+ void ExpandedState(StateId s) {
+ if (s < min_unexpanded_state_id_)
+ return;
+ while (expanded_states_.size() <= s)
+ expanded_states_.push_back(false);
+ expanded_states_[s] = true;
+ }
+
+ // Caching on/off switch, limit and size accessors.
+ bool GetCacheGc() const { return cache_gc_; }
+ size_t GetCacheLimit() const { return cache_limit_; }
+ size_t GetCacheSize() const { return cache_size_; }
+
+ private:
+ static const size_t kMinCacheLimit = 8096; // Minimum (non-zero) cache limit
+ static const uint32 kCacheFinal = 0x0001; // Final weight has been cached
+ static const uint32 kCacheArcs = 0x0002; // Arcs have been cached
+ static const uint32 kCacheRecent = 0x0004; // Mark as visited since GC
+
+ public:
+ static const uint32 kCacheModified = 0x0008; // Mark state as modified
+ static const uint32 kCacheFlags = kCacheFinal | kCacheArcs | kCacheRecent
+ | kCacheModified;
+
+ protected:
+ C *allocator_; // used to allocate new states
+
+ private:
+ mutable bool cache_start_; // Is the start state cached?
+ StateId nknown_states_; // # of known states
+ vector<bool> expanded_states_; // states that have been expanded
+ mutable StateId min_unexpanded_state_id_; // minimum never-expanded state Id
+ StateId cache_first_state_id_; // First cached state id
+ S *cache_first_state_; // First cached state
+ list<StateId> cache_states_; // list of currently cached states
+ bool cache_gc_; // enable GC
+ size_t cache_size_; // # of bytes cached
+ size_t cache_limit_; // # of bytes allowed before GC
+
+ void operator=(const CacheBaseImpl<S> &impl); // disallow
+};
+
+template <class S, class C> const uint32 CacheBaseImpl<S, C>::kCacheFinal;
+template <class S, class C> const uint32 CacheBaseImpl<S, C>::kCacheArcs;
+template <class S, class C> const uint32 CacheBaseImpl<S, C>::kCacheRecent;
+template <class S, class C> const uint32 CacheBaseImpl<S, C>::kCacheModified;
+template <class S, class C> const size_t CacheBaseImpl<S, C>::kMinCacheLimit;
+
+// Arcs implemented by an STL vector per state. Similar to VectorState
+// but adds flags and ref count to keep track of what has been cached.
+template <class A>
+struct CacheState {
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ CacheState() : final(Weight::Zero()), flags(0), ref_count(0) {}
+
+ void Reset() {
+ flags = 0;
+ ref_count = 0;
+ arcs.resize(0);
+ }
+
+ Weight final; // Final weight
+ vector<A> arcs; // Arcs represenation
+ size_t niepsilons; // # of input epsilons
+ size_t noepsilons; // # of output epsilons
+ mutable uint32 flags;
+ mutable int ref_count;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(CacheState);
+};
+
+// A CacheBaseImpl with a commonly used CacheState.
+template <class A>
+class CacheImpl : public CacheBaseImpl< CacheState<A> > {
+ public:
+ typedef CacheState<A> State;
+
+ CacheImpl() {}
+
+ explicit CacheImpl(const CacheOptions &opts)
+ : CacheBaseImpl< CacheState<A> >(opts) {}
+
+ CacheImpl(const CacheImpl<State> &impl) : CacheBaseImpl<State>(impl) {}
+
+ private:
+ void operator=(const CacheImpl<State> &impl); // disallow
+};
+
+
+// Use this to make a state iterator for a CacheBaseImpl-derived Fst,
+// which must have type 'State' defined. Note this iterator only
+// returns those states reachable from the initial state, so consider
+// implementing a class-specific one.
+template <class F>
+class CacheStateIterator : public StateIteratorBase<typename F::Arc> {
+ public:
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename F::State State;
+ typedef CacheBaseImpl<State> Impl;
+
+ CacheStateIterator(const F &fst, Impl *impl)
+ : fst_(fst), impl_(impl), s_(0) {}
+
+ bool Done() const {
+ if (s_ < impl_->NumKnownStates())
+ return false;
+ fst_.Start(); // force start state
+ if (s_ < impl_->NumKnownStates())
+ return false;
+ for (StateId u = impl_->MinUnexpandedState();
+ u < impl_->NumKnownStates();
+ u = impl_->MinUnexpandedState()) {
+ // force state expansion
+ ArcIterator<F> aiter(fst_, u);
+ aiter.SetFlags(kArcValueFlags, kArcValueFlags | kArcNoCache);
+ for (; !aiter.Done(); aiter.Next())
+ impl_->UpdateNumKnownStates(aiter.Value().nextstate);
+ impl_->ExpandedState(u);
+ if (s_ < impl_->NumKnownStates())
+ return false;
+ }
+ return true;
+ }
+
+ StateId Value() const { return s_; }
+
+ void Next() { ++s_; }
+
+ void Reset() { s_ = 0; }
+
+ private:
+ // This allows base class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual bool Done_() const { return Done(); }
+ virtual StateId Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+ virtual void Reset_() { Reset(); }
+
+ const F &fst_;
+ Impl *impl_;
+ StateId s_;
+};
+
+
+// Use this to make an arc iterator for a CacheBaseImpl-derived Fst,
+// which must have types 'Arc' and 'State' defined.
+template <class F,
+ class C = DefaultCacheStateAllocator<CacheState<typename F::Arc> > >
+class CacheArcIterator {
+ public:
+ typedef typename F::Arc Arc;
+ typedef typename F::State State;
+ typedef typename Arc::StateId StateId;
+ typedef CacheBaseImpl<State, C> Impl;
+
+ CacheArcIterator(Impl *impl, StateId s) : i_(0) {
+ state_ = impl->ExtendState(s);
+ ++state_->ref_count;
+ }
+
+ ~CacheArcIterator() { --state_->ref_count; }
+
+ bool Done() const { return i_ >= state_->arcs.size(); }
+
+ const Arc& Value() const { return state_->arcs[i_]; }
+
+ void Next() { ++i_; }
+
+ size_t Position() const { return i_; }
+
+ void Reset() { i_ = 0; }
+
+ void Seek(size_t a) { i_ = a; }
+
+ uint32 Flags() const {
+ return kArcValueFlags;
+ }
+
+ void SetFlags(uint32 flags, uint32 mask) {}
+
+ private:
+ const State *state_;
+ size_t i_;
+
+ DISALLOW_COPY_AND_ASSIGN(CacheArcIterator);
+};
+
+// Use this to make a mutable arc iterator for a CacheBaseImpl-derived Fst,
+// which must have types 'Arc' and 'State' defined.
+template <class F,
+ class C = DefaultCacheStateAllocator<CacheState<typename F::Arc> > >
+class CacheMutableArcIterator
+ : public MutableArcIteratorBase<typename F::Arc> {
+ public:
+ typedef typename F::State State;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef CacheBaseImpl<State, C> Impl;
+
+ // You will need to call MutateCheck() in the constructor.
+ CacheMutableArcIterator(Impl *impl, StateId s) : i_(0), s_(s), impl_(impl) {
+ state_ = impl_->ExtendState(s_);
+ ++state_->ref_count;
+ };
+
+ ~CacheMutableArcIterator() {
+ --state_->ref_count;
+ }
+
+ bool Done() const { return i_ >= state_->arcs.size(); }
+
+ const Arc& Value() const { return state_->arcs[i_]; }
+
+ void Next() { ++i_; }
+
+ size_t Position() const { return i_; }
+
+ void Reset() { i_ = 0; }
+
+ void Seek(size_t a) { i_ = a; }
+
+ void SetValue(const Arc& arc) {
+ state_->flags |= CacheBaseImpl<State, C>::kCacheModified;
+ uint64 properties = impl_->Properties();
+ Arc& oarc = state_->arcs[i_];
+ if (oarc.ilabel != oarc.olabel)
+ properties &= ~kNotAcceptor;
+ if (oarc.ilabel == 0) {
+ --state_->niepsilons;
+ properties &= ~kIEpsilons;
+ if (oarc.olabel == 0)
+ properties &= ~kEpsilons;
+ }
+ if (oarc.olabel == 0) {
+ --state_->noepsilons;
+ properties &= ~kOEpsilons;
+ }
+ if (oarc.weight != Weight::Zero() && oarc.weight != Weight::One())
+ properties &= ~kWeighted;
+ oarc = arc;
+ if (arc.ilabel != arc.olabel) {
+ properties |= kNotAcceptor;
+ properties &= ~kAcceptor;
+ }
+ if (arc.ilabel == 0) {
+ ++state_->niepsilons;
+ properties |= kIEpsilons;
+ properties &= ~kNoIEpsilons;
+ if (arc.olabel == 0) {
+ properties |= kEpsilons;
+ properties &= ~kNoEpsilons;
+ }
+ }
+ if (arc.olabel == 0) {
+ ++state_->noepsilons;
+ properties |= kOEpsilons;
+ properties &= ~kNoOEpsilons;
+ }
+ if (arc.weight != Weight::Zero() && arc.weight != Weight::One()) {
+ properties |= kWeighted;
+ properties &= ~kUnweighted;
+ }
+ properties &= kSetArcProperties | kAcceptor | kNotAcceptor |
+ kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons |
+ kOEpsilons | kNoOEpsilons | kWeighted | kUnweighted;
+ impl_->SetProperties(properties);
+ }
+
+ uint32 Flags() const {
+ return kArcValueFlags;
+ }
+
+ void SetFlags(uint32 f, uint32 m) {}
+
+ private:
+ virtual bool Done_() const { return Done(); }
+ virtual const Arc& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+ virtual size_t Position_() const { return Position(); }
+ virtual void Reset_() { Reset(); }
+ virtual void Seek_(size_t a) { Seek(a); }
+ virtual void SetValue_(const Arc &a) { SetValue(a); }
+ uint32 Flags_() const { return Flags(); }
+ void SetFlags_(uint32 f, uint32 m) { SetFlags(f, m); }
+
+ size_t i_;
+ StateId s_;
+ Impl *impl_;
+ State *state_;
+
+ DISALLOW_COPY_AND_ASSIGN(CacheMutableArcIterator);
+};
+
+} // namespace fst
+
+#endif // FST_LIB_CACHE_H__
diff --git a/src/include/fst/closure.h b/src/include/fst/closure.h
new file mode 100644
index 0000000..541562b
--- /dev/null
+++ b/src/include/fst/closure.h
@@ -0,0 +1,155 @@
+// closure.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Functions and classes to compute the concatenative closure of an Fst.
+
+#ifndef FST_LIB_CLOSURE_H__
+#define FST_LIB_CLOSURE_H__
+
+#include <vector>
+using std::vector;
+#include <algorithm>
+
+#include <fst/mutable-fst.h>
+#include <fst/rational.h>
+
+
+namespace fst {
+
+// Computes the concatenative closure. This version modifies its
+// MutableFst input. If FST transduces string x to y with weight a,
+// then the closure transduces x to y with weight a, xx to yy with
+// weight Times(a, a), xxx to yyy with with Times(Times(a, a), a),
+// etc. If closure_type == CLOSURE_STAR, then the empty string is
+// transduced to itself with weight Weight::One() as well.
+//
+// Complexity:
+// - Time: O(V)
+// - Space: O(V)
+// where V = # of states.
+template<class Arc>
+void Closure(MutableFst<Arc> *fst, ClosureType closure_type) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ uint64 props = fst->Properties(kFstProperties, false);
+ StateId start = fst->Start();
+ for (StateIterator< MutableFst<Arc> > siter(*fst);
+ !siter.Done();
+ siter.Next()) {
+ StateId s = siter.Value();
+ Weight final = fst->Final(s);
+ if (final != Weight::Zero())
+ fst->AddArc(s, Arc(0, 0, final, start));
+ }
+ if (closure_type == CLOSURE_STAR) {
+ fst->ReserveStates(fst->NumStates() + 1);
+ StateId nstart = fst->AddState();
+ fst->SetStart(nstart);
+ fst->SetFinal(nstart, Weight::One());
+ if (start != kNoLabel)
+ fst->AddArc(nstart, Arc(0, 0, Weight::One(), start));
+ }
+ fst->SetProperties(ClosureProperties(props, closure_type == CLOSURE_STAR),
+ kFstProperties);
+}
+
+// Computes the concatenative closure. This version modifies its
+// RationalFst input.
+template<class Arc>
+void Closure(RationalFst<Arc> *fst, ClosureType closure_type) {
+ fst->GetImpl()->AddClosure(closure_type);
+}
+
+
+struct ClosureFstOptions : RationalFstOptions {
+ ClosureType type;
+
+ ClosureFstOptions(const RationalFstOptions &opts, ClosureType t)
+ : RationalFstOptions(opts), type(t) {}
+ explicit ClosureFstOptions(ClosureType t) : type(t) {}
+ ClosureFstOptions() : type(CLOSURE_STAR) {}
+};
+
+
+// Computes the concatenative closure. This version is a delayed
+// Fst. If FST transduces string x to y with weight a, then the
+// closure transduces x to y with weight a, xx to yy with weight
+// Times(a, a), xxx to yyy with weight Times(Times(a, a), a), etc. If
+// closure_type == CLOSURE_STAR, then The empty string is transduced
+// to itself with weight Weight::One() as well.
+//
+// Complexity:
+// - Time: O(v)
+// - Space: O(v)
+// where v = # of states visited. Constant time and space to visit an
+// input state or arc is assumed and exclusive of caching.
+template <class A>
+class ClosureFst : public RationalFst<A> {
+ public:
+ using ImplToFst< RationalFstImpl<A> >::GetImpl;
+
+ typedef A Arc;
+
+ ClosureFst(const Fst<A> &fst, ClosureType closure_type) {
+ GetImpl()->InitClosure(fst, closure_type);
+ }
+
+ ClosureFst(const Fst<A> &fst, const ClosureFstOptions &opts)
+ : RationalFst<A>(opts) {
+ GetImpl()->InitClosure(fst, opts.type);
+ }
+
+ // See Fst<>::Copy() for doc.
+ ClosureFst(const ClosureFst<A> &fst, bool safe = false)
+ : RationalFst<A>(fst, safe) {}
+
+ // Get a copy of this ClosureFst. See Fst<>::Copy() for further doc.
+ virtual ClosureFst<A> *Copy(bool safe = false) const {
+ return new ClosureFst<A>(*this, safe);
+ }
+};
+
+
+// Specialization for ClosureFst.
+template <class A>
+class StateIterator< ClosureFst<A> > : public StateIterator< RationalFst<A> > {
+ public:
+ explicit StateIterator(const ClosureFst<A> &fst)
+ : StateIterator< RationalFst<A> >(fst) {}
+};
+
+
+// Specialization for ClosureFst.
+template <class A>
+class ArcIterator< ClosureFst<A> > : public ArcIterator< RationalFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const ClosureFst<A> &fst, StateId s)
+ : ArcIterator< RationalFst<A> >(fst, s) {}
+};
+
+
+// Useful alias when using StdArc.
+typedef ClosureFst<StdArc> StdClosureFst;
+
+} // namespace fst
+
+#endif // FST_LIB_CLOSURE_H__
diff --git a/src/include/fst/compact-fst.h b/src/include/fst/compact-fst.h
new file mode 100644
index 0000000..efa567a
--- /dev/null
+++ b/src/include/fst/compact-fst.h
@@ -0,0 +1,1307 @@
+// compact-fst.h
+
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// FST Class for memory-efficient representation of common types of
+// FSTs: linear automata, acceptors, unweighted FSTs, ...
+
+#ifndef FST_LIB_COMPACT_FST_H__
+#define FST_LIB_COMPACT_FST_H__
+
+#include <iterator>
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+#include <fst/expanded-fst.h>
+#include <fst/fst-decl.h> // For optional argument declarations
+#include <fst/matcher.h>
+#include <fst/test-properties.h>
+#include <fst/util.h>
+
+
+namespace fst {
+
+struct CompactFstOptions : public CacheOptions {
+ // CompactFst default caching behaviour is to do no caching. Most
+ // compactors are cheap and therefore we save memory by not doing
+ // caching.
+ CompactFstOptions() : CacheOptions(true, 0) {}
+ CompactFstOptions(const CacheOptions &opts) : CacheOptions(opts) {}
+};
+
+// Compactor Interface - class determinies how arcs and final weights
+// are compacted and expanded.
+//
+// Final weights are treated as transitions to the superfinal state,
+// i.e. ilabel = olabel = kNoLabel and nextstate = kNoStateId.
+//
+// There are two types of compactors:
+//
+// * Fixed out-degree compactors: 'compactor.Size()' returns a
+// positive integer 's'. An FST can be compacted by this compactor
+// only if each state has exactly 's' outgoing transitions (counting a
+// non-Zero() final weight as a transition). A typical example is a
+// compactor for string FSTs, i.e. 's == 1'.
+//
+// * Variable out-degree compactors: 'compactor.Size() == -1'. There
+// are no out-degree restrictions for these compactors.
+//
+//
+// class Compactor {
+// public:
+// // Element is the type of the compacted transitions.
+// typedef ... Element;
+// // Return the compacted representation of a transition 'arc'
+// // at a state 's'.
+// Element Compact(StateId s, const Arc &arc);
+// // Return the transition at state 's' represented by the compacted
+// // transition 'e'.
+// Arc Expand(StateId s, const Element &e);
+// // Return -1 for variable out-degree compactors, and the mandatory
+// // out-degree otherwise.
+// ssize_t Size();
+// // Test whether 'fst' can be compacted by this compactor.
+// bool Compatible(const Fst<A> &fst);
+// // Return the properties that are always true for an fst
+// // compacted using this compactor
+// uint64 Properties();
+// // Return a string identifying the type of compactor.
+// static const string &Type();
+// // Write a compactor to a file.
+// bool Write(ostream &strm);
+// // Read a compactor from a file.
+// static Compactor *Read(istream &strm);
+// // Default constructor (optional, see comment below).
+// Compactor();
+// };
+//
+// The default constructor is only required for FST_REGISTER to work
+// (i.e. enabling Convert() and the command-line utilities to work
+// with this new compactor). However, a default constructor always
+// needs to be specify for this code to compile, but one can have it
+// simply raised an error when called:
+//
+// Compactor::Compactor() {
+// FSTERROR() << "Compactor: no default constructor";
+// }
+
+
+// Implementation data for Compact Fst, which can shared between otherwise
+// independent copies.
+//
+// The implementation contains two arrays: 'states_' and 'compacts_'.
+//
+// For fixed out-degree compactors, the 'states_' array is unallocated.
+// The 'compacts_' contains the compacted transitions. Its size is
+// 'ncompacts_'. The outgoing transitions at a given state are stored
+// consecutively. For a given state 's', its 'compactor.Size()' outgoing
+// transitions (including superfinal transition when 's' is final), are
+// stored in position ['s*compactor.Size()', '(s+1)*compactor_.Size()').
+//
+// For variable out-degree compactors, the states_ array has size
+// 'nstates_ + 1' and contains pointers to positions into 'compacts_'.
+// For a given state 's', the compacted transitions of 's' are
+// stored in positions [ 'states_[s]', 'states_[s + 1]' ) in 'compacts_'.
+// By convention, 'states_[nstates_] == ncompacts_'.
+//
+// In both cases, the superfinal transitons (when 's' is final, i.e.
+// 'Final(s) != Weight::Zero()') is stored first.
+//
+// The unsigned type U is used to represent indices into the compacts_
+// array.
+template <class E, class U>
+class CompactFstData {
+ public:
+ typedef E CompactElement;
+ typedef U Unsigned;
+
+ CompactFstData()
+ : states_(0),
+ compacts_(0),
+ nstates_(0),
+ ncompacts_(0),
+ narcs_(0),
+ start_(kNoStateId),
+ error_(false) {}
+
+ template <class A, class Compactor>
+ CompactFstData(const Fst<A> &fst, const Compactor &compactor);
+
+ template <class Iterator, class Compactor>
+ CompactFstData(const Iterator &begin, const Iterator &end,
+ const Compactor &compactor);
+
+ ~CompactFstData() {
+ delete[] states_;
+ delete[] compacts_;
+ }
+
+ template <class Compactor>
+ static CompactFstData<E, U> *Read(istream &strm,
+ const FstReadOptions &opts,
+ const FstHeader &hdr,
+ const Compactor &compactor);
+
+ bool Write(ostream &strm, const FstWriteOptions &opts) const;
+
+ Unsigned States(ssize_t i) const { return states_[i]; }
+ const CompactElement &Compacts(size_t i) const { return compacts_[i]; }
+ size_t NumStates() const { return nstates_; }
+ size_t NumCompacts() const { return ncompacts_; }
+ size_t NumArcs() const { return narcs_; }
+ ssize_t Start() const { return start_; }
+
+ int RefCount() const { return ref_count_.count(); }
+ int IncrRefCount() { return ref_count_.Incr(); }
+ int DecrRefCount() { return ref_count_.Decr(); }
+
+ bool Error() const { return error_; }
+
+ private:
+ // Byte alignment for states and arcs in file format (version 1 only)
+ static const int kFileAlign = 16;
+
+ Unsigned *states_;
+ CompactElement *compacts_;
+ size_t nstates_;
+ size_t ncompacts_;
+ size_t narcs_;
+ ssize_t start_;
+ RefCounter ref_count_;
+ bool error_;
+};
+
+template <class E, class U>
+const int CompactFstData<E, U>::kFileAlign;
+
+
+template <class E, class U>
+template <class A, class C>
+CompactFstData<E, U>::CompactFstData(const Fst<A> &fst, const C &compactor)
+ : states_(0),
+ compacts_(0),
+ nstates_(0),
+ ncompacts_(0),
+ narcs_(0),
+ start_(kNoStateId),
+ error_(false) {
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+ start_ = fst.Start();
+ // Count # of states and arcs.
+ StateId nfinals = 0;
+ for (StateIterator< Fst<A> > siter(fst);
+ !siter.Done();
+ siter.Next()) {
+ ++nstates_;
+ StateId s = siter.Value();
+ for (ArcIterator< Fst<A> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next())
+ ++narcs_;
+ if (fst.Final(s) != Weight::Zero()) ++nfinals;
+ }
+ if (compactor.Size() == -1) {
+ states_ = new Unsigned[nstates_ + 1];
+ ncompacts_ = narcs_ + nfinals;
+ compacts_ = new CompactElement[ncompacts_];
+ states_[nstates_] = ncompacts_;
+ } else {
+ states_ = 0;
+ ncompacts_ = nstates_ * compactor.Size();
+ if ((narcs_ + nfinals) != ncompacts_) {
+ FSTERROR() << "CompactFstData: compactor incompatible with fst";
+ error_ = true;
+ return;
+ }
+ compacts_ = new CompactElement[ncompacts_];
+ }
+ size_t pos = 0, fpos = 0;
+ for (StateId s = 0; s < nstates_; ++s) {
+ fpos = pos;
+ if (compactor.Size() == -1)
+ states_[s] = pos;
+ if (fst.Final(s) != Weight::Zero())
+ compacts_[pos++] = compactor.Compact(s, A(kNoLabel, kNoLabel,
+ fst.Final(s), kNoStateId));
+ for (ArcIterator< Fst<A> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next()) {
+ compacts_[pos++] = compactor.Compact(s, aiter.Value());
+ }
+ if ((compactor.Size() != -1) && ((pos - fpos) != compactor.Size())) {
+ FSTERROR() << "CompactFstData: compactor incompatible with fst";
+ error_ = true;
+ return;
+ }
+ }
+ if (pos != ncompacts_) {
+ FSTERROR() << "CompactFstData: compactor incompatible with fst";
+ error_ = true;
+ return;
+ }
+}
+
+template <class E, class U>
+template <class Iterator, class C>
+CompactFstData<E, U>::CompactFstData(const Iterator &begin,
+ const Iterator &end,
+ const C &compactor)
+ : states_(0),
+ compacts_(0),
+ nstates_(0),
+ ncompacts_(0),
+ narcs_(0),
+ start_(kNoStateId),
+ error_(false) {
+ typedef typename C::Arc Arc;
+ typedef typename Arc::Weight Weight;
+ if (compactor.Size() != -1) {
+ ncompacts_ = distance(begin, end);
+ if (compactor.Size() == 1) {
+ // For strings, allow implicit final weight.
+ // Empty input is the empty string.
+ if (ncompacts_ == 0) {
+ ++ncompacts_;
+ } else {
+ Arc arc = compactor.Expand(ncompacts_ - 1,
+ *(begin + (ncompacts_ - 1)));
+ if (arc.ilabel != kNoLabel)
+ ++ncompacts_;
+ }
+ }
+ if (ncompacts_ % compactor.Size()) {
+ FSTERROR() << "CompactFstData: size of input container incompatible"
+ << " with compactor";
+ error_ = true;
+ return;
+ }
+ if (ncompacts_ == 0)
+ return;
+ start_ = 0;
+ nstates_ = ncompacts_ / compactor.Size();
+ compacts_ = new CompactElement[ncompacts_];
+ size_t i = 0;
+ Iterator it = begin;
+ for(; it != end; ++it, ++i){
+ compacts_[i] = *it;
+ if (compactor.Expand(i, *it).ilabel != kNoLabel)
+ ++narcs_;
+ }
+ if (i < ncompacts_)
+ compacts_[i] = compactor.Compact(i, Arc(kNoLabel, kNoLabel,
+ Weight::One(), kNoStateId));
+ } else {
+ if (distance(begin, end) == 0)
+ return;
+ // Count # of states, arcs and compacts.
+ Iterator it = begin;
+ for(size_t i = 0; it != end; ++it, ++i) {
+ Arc arc = compactor.Expand(i, *it);
+ if (arc.ilabel != kNoLabel) {
+ ++narcs_;
+ ++ncompacts_;
+ } else {
+ ++nstates_;
+ if (arc.weight != Weight::Zero())
+ ++ncompacts_;
+ }
+ }
+ start_ = 0;
+ compacts_ = new CompactElement[ncompacts_];
+ states_ = new Unsigned[nstates_ + 1];
+ states_[nstates_] = ncompacts_;
+ size_t i = 0, s = 0;
+ for(it = begin; it != end; ++it) {
+ Arc arc = compactor.Expand(i, *it);
+ if (arc.ilabel != kNoLabel) {
+ compacts_[i++] = *it;
+ } else {
+ states_[s++] = i;
+ if (arc.weight != Weight::Zero())
+ compacts_[i++] = *it;
+ }
+ }
+ if ((s != nstates_) || (i != ncompacts_)) {
+ FSTERROR() << "CompactFstData: ill-formed input container";
+ error_ = true;
+ return;
+ }
+ }
+}
+
+template <class E, class U>
+template <class C>
+CompactFstData<E, U> *CompactFstData<E, U>::Read(
+ istream &strm,
+ const FstReadOptions &opts,
+ const FstHeader &hdr,
+ const C &compactor) {
+ CompactFstData<E, U> *data = new CompactFstData<E, U>();
+ data->start_ = hdr.Start();
+ data->nstates_ = hdr.NumStates();
+ data->narcs_ = hdr.NumArcs();
+
+ if (compactor.Size() == -1) {
+ data->states_ = new Unsigned[data->nstates_ + 1];
+ if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) &&
+ !AlignInput(strm, kFileAlign)) {
+ LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source;
+ delete data;
+ return 0;
+ }
+ // TODO: memory map this
+ size_t b = (data->nstates_ + 1) * sizeof(Unsigned);
+ strm.read(reinterpret_cast<char *>(data->states_), b);
+ if (!strm) {
+ LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source;
+ delete data;
+ return 0;
+ }
+ } else {
+ data->states_ = 0;
+ }
+ data->ncompacts_ = compactor.Size() == -1
+ ? data->states_[data->nstates_]
+ : data->nstates_ * compactor.Size();
+ data->compacts_ = new CompactElement[data->ncompacts_];
+ // TODO: memory map this
+ size_t b = data->ncompacts_ * sizeof(CompactElement);
+ if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) &&
+ !AlignInput(strm, kFileAlign)) {
+ LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source;
+ delete data;
+ return 0;
+ }
+ strm.read(reinterpret_cast<char *>(data->compacts_), b);
+ if (!strm) {
+ LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source;
+ delete data;
+ return 0;
+ }
+ return data;
+}
+
+template<class E, class U>
+bool CompactFstData<E, U>::Write(ostream &strm,
+ const FstWriteOptions &opts) const {
+ if (states_) {
+ if (opts.align && !AlignOutput(strm, kFileAlign)) {
+ LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
+ return false;
+ }
+ strm.write(reinterpret_cast<char *>(states_),
+ (nstates_ + 1) * sizeof(Unsigned));
+ }
+ if (opts.align && !AlignOutput(strm, kFileAlign)) {
+ LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
+ return false;
+ }
+ strm.write(reinterpret_cast<char *>(compacts_),
+ ncompacts_ * sizeof(CompactElement));
+
+ strm.flush();
+ if (!strm) {
+ LOG(ERROR) << "CompactFst::Write: Write failed: " << opts.source;
+ return false;
+ }
+ return true;
+}
+
+template <class A, class C, class U> class CompactFst;
+template <class F, class G> void Cast(const F &, G *);
+
+// Implementation class for CompactFst, which contains CompactFstData
+// and Fst cache.
+template <class A, class C, class U>
+class CompactFstImpl : public CacheImpl<A> {
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::Properties;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+ using FstImpl<A>::WriteHeader;
+
+ using CacheImpl<A>::PushArc;
+ using CacheImpl<A>::HasArcs;
+ using CacheImpl<A>::HasFinal;
+ using CacheImpl<A>::HasStart;
+ using CacheImpl<A>::SetArcs;
+ using CacheImpl<A>::SetFinal;
+ using CacheImpl<A>::SetStart;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef C Compactor;
+ typedef typename C::Element CompactElement;
+ typedef U Unsigned;
+
+ CompactFstImpl()
+ : CacheImpl<A>(CompactFstOptions()),
+ compactor_(0),
+ own_compactor_(false),
+ data_(0) {
+ string type = "compact";
+ if (sizeof(U) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(8 * sizeof(U), &size);
+ type += size;
+ }
+ type += "_";
+ type += C::Type();
+ SetType(type);
+ SetProperties(kNullProperties | kStaticProperties);
+ }
+
+ CompactFstImpl(const Fst<Arc> &fst, const C &compactor,
+ const CompactFstOptions &opts)
+ : CacheImpl<A>(opts),
+ compactor_(new C(compactor)),
+ own_compactor_(true),
+ data_(0) {
+ Init(fst);
+ }
+
+ CompactFstImpl(const Fst<Arc> &fst, C *compactor,
+ const CompactFstOptions &opts)
+ : CacheImpl<A>(opts),
+ compactor_(compactor),
+ own_compactor_(false),
+ data_(0) {
+ Init(fst);
+ }
+
+ template <class Iterator>
+ CompactFstImpl(const Iterator &b, const Iterator &e, const C &compactor,
+ const CompactFstOptions &opts)
+ : CacheImpl<A>(opts),
+ compactor_(new C(compactor)),
+ own_compactor_(true),
+ data_(0) {
+ Init(b, e);
+ }
+
+ template <class Iterator>
+ CompactFstImpl(const Iterator &b, const Iterator &e, C *compactor,
+ const CompactFstOptions &opts)
+ : CacheImpl<A>(opts),
+ compactor_(compactor),
+ own_compactor_(false),
+ data_(0) {
+ Init(b, e);
+ }
+
+ CompactFstImpl(const CompactFstImpl<A, C, U> &impl)
+ : CacheImpl<A>(impl),
+ compactor_(new C(*impl.compactor_)),
+ own_compactor_(true),
+ data_(impl.data_) {
+ if (data_)
+ data_->IncrRefCount();
+ SetType(impl.Type());
+ SetProperties(impl.Properties());
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ ~CompactFstImpl(){
+ if (own_compactor_)
+ delete compactor_;
+ if (data_ && !data_->DecrRefCount())
+ delete data_;
+ }
+
+ StateId Start() {
+ if (!HasStart()) {
+ SetStart(data_->Start());
+ }
+ return CacheImpl<A>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ Arc arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId);
+ if ((compactor_->Size() != -1) ||
+ (data_->States(s) != data_->States(s + 1)))
+ arc = ComputeArc(s,
+ compactor_->Size() == -1
+ ? data_->States(s)
+ : s * compactor_->Size());
+ SetFinal(s, arc.ilabel == kNoLabel ? arc.weight : Weight::Zero());
+ }
+ return CacheImpl<A>::Final(s);
+ }
+
+ StateId NumStates() const {
+ if (Properties(kError)) return 0;
+ return data_->NumStates();
+ }
+
+ size_t NumArcs(StateId s) {
+ if (HasArcs(s))
+ return CacheImpl<A>::NumArcs(s);
+ Unsigned i, num_arcs;
+ if (compactor_->Size() == -1) {
+ i = data_->States(s);
+ num_arcs = data_->States(s + 1) - i;
+ } else {
+ i = s * compactor_->Size();
+ num_arcs = compactor_->Size();
+ }
+ if (num_arcs > 0) {
+ const A &arc = ComputeArc(s, i, kArcILabelValue);
+ if (arc.ilabel == kNoStateId) {
+ --num_arcs;
+ }
+ }
+ return num_arcs;
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s) && !Properties(kILabelSorted))
+ Expand(s);
+ if (HasArcs(s))
+ return CacheImpl<A>::NumInputEpsilons(s);
+ return CountEpsilons(s, false);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s) && !Properties(kOLabelSorted))
+ Expand(s);
+ if (HasArcs(s))
+ return CacheImpl<A>::NumOutputEpsilons(s);
+ return CountEpsilons(s, true);
+ }
+
+ size_t CountEpsilons(StateId s, bool output_epsilons) {
+ size_t begin = compactor_->Size() == -1 ?
+ data_->States(s) : s * compactor_->Size();
+ size_t end = compactor_->Size() == -1 ?
+ data_->States(s + 1) : (s + 1) * compactor_->Size();
+ size_t num_eps = 0;
+ for (size_t i = begin; i < end; ++i) {
+ const A &arc = ComputeArc(
+ s, i, output_epsilons ? kArcOLabelValue : kArcILabelValue);
+ const typename A::Label &label =
+ (output_epsilons ? arc.olabel : arc.ilabel);
+ if (label == kNoLabel)
+ continue;
+ else if (label > 0)
+ break;
+ ++num_eps;
+ }
+ return num_eps;
+ }
+
+ static CompactFstImpl<A, C, U> *Read(istream &strm,
+ const FstReadOptions &opts) {
+ CompactFstImpl<A, C, U> *impl = new CompactFstImpl<A, C, U>();
+ FstHeader hdr;
+ if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
+ delete impl;
+ return 0;
+ }
+
+ // Ensures compatibility
+ if (hdr.Version() == kAlignedFileVersion)
+ hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED);
+
+ impl->compactor_ = C::Read(strm);
+ if (!impl->compactor_) {
+ delete impl;
+ return 0;
+ }
+ impl->own_compactor_ = true;
+ impl->data_ = CompactFstData<CompactElement, U>::Read(strm, opts, hdr,
+ *impl->compactor_);
+ if (!impl->data_) {
+ delete impl;
+ return 0;
+ }
+ return impl;
+ }
+
+ bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ FstHeader hdr;
+ hdr.SetStart(data_->Start());
+ hdr.SetNumStates(data_->NumStates());
+ hdr.SetNumArcs(data_->NumArcs());
+
+ // Ensures compatibility
+ int file_version = opts.align ? kAlignedFileVersion : kFileVersion;
+ WriteHeader(strm, opts, file_version, &hdr);
+
+ compactor_->Write(strm);
+ return data_->Write(strm, opts);
+ }
+
+ // Provide information needed for generic state iterator
+ void InitStateIterator(StateIteratorData<A> *data) const {
+ data->base = 0;
+ data->nstates = data_->NumStates();
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
+ if (!HasArcs(s))
+ Expand(s);
+ CacheImpl<A>::InitArcIterator(s, data);
+ }
+
+ Arc ComputeArc(StateId s, Unsigned i, uint32 f = kArcValueFlags) const {
+ return compactor_->Expand(s, data_->Compacts(i), f);
+ }
+
+ void Expand(StateId s) {
+ size_t begin = compactor_->Size() == -1 ?
+ data_->States(s) : s * compactor_->Size();
+ size_t end = compactor_->Size() == -1 ?
+ data_->States(s + 1) : (s + 1) * compactor_->Size();
+ for (size_t i = begin; i < end; ++i) {
+ const Arc &arc = ComputeArc(s, i);
+ if (arc.ilabel == kNoLabel) continue;
+ PushArc(s, arc);
+ }
+ SetArcs(s);
+ }
+
+ template <class Iterator>
+ void SetCompactElements(const Iterator &b, const Iterator &e) {
+ if (data_ && !data_->DecrRefCount())
+ delete data_;
+ data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_);
+ }
+
+ C *GetCompactor() const { return compactor_; }
+ CompactFstData<CompactElement, U> *Data() const { return data_; }
+
+ protected:
+ template <class B, class D>
+ explicit CompactFstImpl(const CompactFstImpl<B, D, U> &impl)
+ : CacheImpl<A>(CacheOptions(impl.GetCacheGc(), impl.GetCacheLimit())),
+ compactor_(new C(*impl.GetCompactor())),
+ own_compactor_(true),
+ data_(impl.Data()) {
+ if (data_)
+ data_->IncrRefCount();
+ SetType(impl.Type());
+ SetProperties(impl.Properties());
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ private:
+ void Init(const Fst<Arc> &fst) {
+ string type = "compact";
+ if (sizeof(U) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(8 * sizeof(U), &size);
+ type += size;
+ }
+ type += "_";
+ type += compactor_->Type();
+ SetType(type);
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ data_ = new CompactFstData<CompactElement, U>(fst, *compactor_);
+ if (data_->Error())
+ SetProperties(kError, kError);
+ uint64 copy_properties = fst.Properties(kCopyProperties, true);
+ if ((copy_properties & kError) || !compactor_->Compatible(fst)) {
+ FSTERROR() << "CompactFstImpl: input fst incompatible with compactor";
+ SetProperties(kError, kError);
+ return;
+ }
+ SetProperties(copy_properties | kStaticProperties);
+ }
+
+ template <class Iterator>
+ void Init(const Iterator &b, const Iterator &e) {
+ string type = "compact";
+ if (sizeof(U) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(8 * sizeof(U), &size);
+ type += size;
+ }
+ type += "_";
+ type += compactor_->Type();
+ SetType(type);
+ SetProperties(kStaticProperties | compactor_->Properties());
+ data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_);
+ if (data_->Error())
+ SetProperties(kError, kError);
+ }
+
+ // Properties always true of this Fst class
+ static const uint64 kStaticProperties = kExpanded;
+ // Current unaligned file format version
+ static const int kFileVersion = 2;
+ // Current aligned file format version
+ static const int kAlignedFileVersion = 1;
+ // Minimum file format version supported
+ static const int kMinFileVersion = 1;
+
+ C *compactor_;
+ bool own_compactor_;
+ CompactFstData<CompactElement, U> *data_;
+};
+
+template <class A, class C, class U>
+const uint64 CompactFstImpl<A, C, U>::kStaticProperties;
+template <class A, class C, class U>
+const int CompactFstImpl<A, C, U>::kFileVersion;
+template <class A, class C, class U>
+const int CompactFstImpl<A, C, U>::kAlignedFileVersion;
+template <class A, class C, class U>
+const int CompactFstImpl<A, C, U>::kMinFileVersion;
+
+
+// CompactFst. This class attaches interface to implementation and
+// handles reference counting, delegating most methods to
+// ImplToExpandedFst. The unsigned type U is used to represent indices
+// into the compact arc array (uint32 by default, declared in
+// fst-decl.h).
+template <class A, class C, class U>
+class CompactFst : public ImplToExpandedFst< CompactFstImpl<A, C, U> > {
+ public:
+ friend class StateIterator< CompactFst<A, C, U> >;
+ friend class ArcIterator< CompactFst<A, C, U> >;
+ template <class F, class G> void friend Cast(const F &, G *);
+
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef CompactFstImpl<A, C, U> Impl;
+ typedef CacheState<A> State;
+ typedef U Unsigned;
+
+ CompactFst() : ImplToExpandedFst<Impl>(new Impl()) {}
+
+ explicit CompactFst(const Fst<A> &fst, const C &compactor = C(),
+ const CompactFstOptions &opts = CompactFstOptions())
+ : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {}
+
+ CompactFst(const Fst<A> &fst, C *compactor,
+ const CompactFstOptions &opts = CompactFstOptions())
+ : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {}
+
+ // The following 2 constructors take as input two iterators delimiting
+ // a set of (already) compacted transitions, starting with the
+ // transitions out of the initial state. The format of the input
+ // differs for fixed out-degree and variable out-degree compactors.
+ //
+ // - For fixed out-degree compactors, the final weight (encoded as a
+ // compacted transition) needs to be given only for final
+ // states. All strings (compactor of size 1) will be assume to be
+ // terminated by a final state even when the final state is not
+ // implicitely given.
+ //
+ // - For variable out-degree compactors, the final weight (encoded
+ // as a compacted transition) needs to be given for all states and
+ // must appeared first in the list (for state s, final weight of s,
+ // followed by outgoing transitons in s).
+ //
+ // These 2 constructors allows the direct construction of a CompactFst
+ // without first creating a more memory hungry 'regular' FST. This
+ // is useful when memory usage is severely constrained.
+ template <class Iterator>
+ explicit CompactFst(const Iterator &begin, const Iterator &end,
+ const C &compactor = C(),
+ const CompactFstOptions &opts = CompactFstOptions())
+ : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {}
+
+ template <class Iterator>
+ CompactFst(const Iterator &begin, const Iterator &end,
+ C *compactor, const CompactFstOptions &opts = CompactFstOptions())
+ : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {}
+
+ // See Fst<>::Copy() for doc.
+ CompactFst(const CompactFst<A, C, U> &fst, bool safe = false)
+ : ImplToExpandedFst<Impl>(fst, safe) {}
+
+ // Get a copy of this CompactFst. See Fst<>::Copy() for further doc.
+ virtual CompactFst<A, C, U> *Copy(bool safe = false) const {
+ return new CompactFst<A, C, U>(*this, safe);
+ }
+
+ // Read a CompactFst from an input stream; return NULL on error
+ static CompactFst<A, C, U> *Read(istream &strm, const FstReadOptions &opts) {
+ Impl* impl = Impl::Read(strm, opts);
+ return impl ? new CompactFst<A, C, U>(impl) : 0;
+ }
+
+ // Read a CompactFst from a file; return NULL on error
+ // Empty filename reads from standard input
+ static CompactFst<A, C, U> *Read(const string &filename) {
+ Impl* impl = ImplToExpandedFst<Impl>::Read(filename);
+ return impl ? new CompactFst<A, C, U>(impl) : 0;
+ }
+
+ virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ return GetImpl()->Write(strm, opts);
+ }
+
+ virtual bool Write(const string &filename) const {
+ return Fst<A>::WriteFile(filename);
+ }
+
+ virtual void InitStateIterator(StateIteratorData<A> *data) const {
+ GetImpl()->InitStateIterator(data);
+ }
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ virtual MatcherBase<A> *InitMatcher(MatchType match_type) const {
+ return new SortedMatcher<CompactFst<A, C, U> >(*this, match_type);
+ }
+
+ template <class Iterator>
+ void SetCompactElements(const Iterator &b, const Iterator &e) {
+ GetImpl()->SetCompactElements(b, e);
+ }
+
+ private:
+ CompactFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {}
+
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); }
+
+ void SetImpl(Impl *impl, bool own_impl = false) {
+ ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl);
+ }
+
+ void operator=(const CompactFst<A, C, U> &fst); // disallow
+};
+
+
+// Specialization for CompactFst; see generic version in fst.h
+// for sample usage (but use the CompactFst type!). This version
+// should inline.
+template <class A, class C, class U>
+class StateIterator< CompactFst<A, C, U> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ explicit StateIterator(const CompactFst<A, C, U> &fst)
+ : nstates_(fst.GetImpl()->NumStates()), s_(0) {}
+
+ bool Done() const { return s_ >= nstates_; }
+
+ StateId Value() const { return s_; }
+
+ void Next() { ++s_; }
+
+ void Reset() { s_ = 0; }
+
+ private:
+ StateId nstates_;
+ StateId s_;
+
+ DISALLOW_COPY_AND_ASSIGN(StateIterator);
+};
+
+// Specialization for CompactFst.
+// Never caches, always iterates over the underlying compact elements.
+template <class A, class C, class U>
+class ArcIterator< CompactFst<A, C, U> > {
+ public:
+ typedef typename A::StateId StateId;
+ typedef typename C::Element CompactElement;
+
+ ArcIterator(const CompactFst<A, C, U> &fst, StateId s)
+ : compactor_(fst.GetImpl()->GetCompactor()), state_(s), compacts_(0),
+ pos_(0), flags_(kArcValueFlags) {
+
+ const CompactFstData<CompactElement, U> *data = fst.GetImpl()->Data();
+ size_t offset;
+ if (compactor_->Size() == -1) { // Variable out-degree compactor
+ offset = data->States(s);
+ num_arcs_ = data->States(s + 1) - offset;
+ } else { // Fixed out-degree compactor
+ offset = s * compactor_->Size();
+ num_arcs_ = compactor_->Size();
+ }
+ if (num_arcs_ > 0) {
+ compacts_ = &(data->Compacts(offset));
+ arc_ = compactor_->Expand(s, *compacts_, kArcILabelValue);
+ if (arc_.ilabel == kNoStateId) {
+ ++compacts_;
+ --num_arcs_;
+ }
+ }
+ }
+
+ ~ArcIterator() {}
+
+ bool Done() const { return pos_ >= num_arcs_; }
+
+ const A& Value() const {
+ arc_ = compactor_->Expand(state_, compacts_[pos_], flags_);
+ return arc_;
+ }
+
+ void Next() { ++pos_; }
+
+ size_t Position() const { return pos_; }
+
+ void Reset() { pos_ = 0; }
+
+ void Seek(size_t pos) { pos_ = pos; }
+
+ uint32 Flags() const { return flags_; }
+
+ void SetFlags(uint32 f, uint32 m) {
+ flags_ &= ~m;
+ flags_ |= (f & kArcValueFlags);
+ }
+
+ private:
+ C *compactor_;
+ StateId state_;
+ const CompactElement *compacts_;
+ size_t pos_;
+ size_t num_arcs_;
+ mutable A arc_;
+ uint32 flags_;
+
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+// // Specialization for CompactFst.
+// // This is an optionally caching arc iterator.
+// // TODO(allauzen): implements the kArcValueFlags, the current
+// // implementation only implements the kArcNoCache flag.
+// template <class A, class C, class U>
+// class ArcIterator< CompactFst<A, C, U> > {
+// public:
+// typedef typename A::StateId StateId;
+
+// ArcIterator(const CompactFst<A, C, U> &fst, StateId s)
+// : fst_(fst), state_(s), pos_(0), num_arcs_(0), offset_(0),
+// flags_(kArcValueFlags) {
+// cache_data_.ref_count = 0;
+
+// if (fst_.GetImpl()->HasArcs(state_)) {
+// fst_.GetImpl()->InitArcIterator(s, &cache_data_);
+// num_arcs_ = cache_data_.narcs;
+// return;
+// }
+
+// const C *compactor = fst_.GetImpl()->GetCompactor();
+// const CompactFstData<A, C, U> *data = fst_.GetImpl()->Data();
+// if (compactor->Size() == -1) { // Variable out-degree compactor
+// offset_ = data->States(s);
+// num_arcs_ = data->States(s + 1) - offset_;
+// } else { // Fixed out-degree compactor
+// offset_ = s * compactor->Size();
+// num_arcs_ = compactor->Size();
+// }
+// if (num_arcs_ > 0) {
+// const A &arc = fst_.GetImpl()->ComputeArc(s, offset_);
+// if (arc.ilabel == kNoStateId) {
+// ++offset_;
+// --num_arcs_;
+// }
+// }
+// }
+
+
+// ~ArcIterator() {
+// if (cache_data_.ref_count)
+// --(*cache_data_.ref_count);
+// }
+
+// bool Done() const { return pos_ >= num_arcs_; }
+
+// const A& Value() const {
+// if (cache_data_.ref_count == 0) {
+// if (flags_ & kArcNoCache) {
+// arc_ = fst_.GetImpl()->ComputeArc(state_, pos_ + offset_);
+// return arc_;
+// } else {
+// fst_.GetImpl()->InitArcIterator(state_, &cache_data_);
+// }
+// }
+// return cache_data_.arcs[pos_];
+// }
+
+// void Next() { ++pos_; }
+
+// size_t Position() const { return pos_; }
+
+// void Reset() { pos_ = 0; }
+
+// void Seek(size_t pos) { pos_ = pos; }
+
+// uint32 Flags() const { return flags_; }
+
+// void SetFlags(uint32 f, uint32 m) {
+// flags_ &= ~m;
+// flags_ |= f;
+
+// if (!(flags_ & kArcNoCache) && cache_data_.ref_count == 0)
+// fst_.GetImpl()->InitArcIterator(state_, &cache_data_);
+// }
+
+// private:
+// mutable const CompactFst<A, C, U> &fst_;
+// StateId state_;
+// size_t pos_;
+// size_t num_arcs_;
+// size_t offset_;
+// uint32 flags_;
+// mutable A arc_;
+// mutable ArcIteratorData<A> cache_data_;
+
+// DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+// };
+
+
+//
+// Utility Compactors
+//
+
+// Compactor for unweighted string FSTs
+template <class A>
+class StringCompactor {
+ public:
+ typedef A Arc;
+ typedef typename A::Label Element;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ Element Compact(StateId s, const A &arc) const { return arc.ilabel; }
+
+ Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
+ return Arc(p, p, Weight::One(), p != kNoLabel ? s + 1 : kNoStateId);
+ }
+
+ ssize_t Size() const { return 1; }
+
+ uint64 Properties() const {
+ return kString | kAcceptor | kUnweighted;
+ }
+
+ bool Compatible(const Fst<A> &fst) const {
+ uint64 props = Properties();
+ return fst.Properties(props, true) == props;
+ }
+
+ static const string &Type() {
+ static const string type = "string";
+ return type;
+ }
+
+ bool Write(ostream &strm) const { return true; }
+
+ static StringCompactor *Read(istream &strm) {
+ return new StringCompactor;
+ }
+};
+
+
+// Compactor for weighted string FSTs
+template <class A>
+class WeightedStringCompactor {
+ public:
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+ typedef pair<Label, Weight> Element;
+
+ Element Compact(StateId s, const A &arc) const {
+ return make_pair(arc.ilabel, arc.weight);
+ }
+
+ Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
+ return Arc(p.first, p.first, p.second,
+ p.first != kNoLabel ? s + 1 : kNoStateId);
+ }
+
+ ssize_t Size() const { return 1;}
+
+ uint64 Properties() const {
+ return kString | kAcceptor;
+ }
+
+ bool Compatible(const Fst<A> &fst) const {
+ uint64 props = Properties();
+ return fst.Properties(props, true) == props;
+ }
+
+ static const string &Type() {
+ static const string type = "weighted_string";
+ return type;
+ }
+
+ bool Write(ostream &strm) const { return true; }
+
+ static WeightedStringCompactor *Read(istream &strm) {
+ return new WeightedStringCompactor;
+ }
+};
+
+
+// Compactor for unweighted acceptor FSTs
+template <class A>
+class UnweightedAcceptorCompactor {
+ public:
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+ typedef pair<Label, StateId> Element;
+
+ Element Compact(StateId s, const A &arc) const {
+ return make_pair(arc.ilabel, arc.nextstate);
+ }
+
+ Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
+ return Arc(p.first, p.first, Weight::One(), p.second);
+ }
+
+ ssize_t Size() const { return -1;}
+
+ uint64 Properties() const {
+ return kAcceptor | kUnweighted;
+ }
+
+ bool Compatible(const Fst<A> &fst) const {
+ uint64 props = Properties();
+ return fst.Properties(props, true) == props;
+ }
+
+ static const string &Type() {
+ static const string type = "unweighted_acceptor";
+ return type;
+ }
+
+ bool Write(ostream &strm) const { return true; }
+
+ static UnweightedAcceptorCompactor *Read(istream &istrm) {
+ return new UnweightedAcceptorCompactor;
+ }
+};
+
+
+// Compactor for weighted acceptor FSTs
+template <class A>
+class AcceptorCompactor {
+ public:
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+ typedef pair< pair<Label, Weight>, StateId > Element;
+
+ Element Compact(StateId s, const A &arc) const {
+ return make_pair(make_pair(arc.ilabel, arc.weight), arc.nextstate);
+ }
+
+ Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
+ return Arc(p.first.first, p.first.first, p.first.second, p.second);
+ }
+
+ ssize_t Size() const { return -1;}
+
+ uint64 Properties() const {
+ return kAcceptor;
+ }
+
+ bool Compatible(const Fst<A> &fst) const {
+ uint64 props = Properties();
+ return fst.Properties(props, true) == props;
+ }
+
+ static const string &Type() {
+ static const string type = "acceptor";
+ return type;
+ }
+
+ bool Write(ostream &strm) const { return true; }
+
+ static AcceptorCompactor *Read(istream &strm) {
+ return new AcceptorCompactor;
+ }
+};
+
+
+// Compactor for unweighted FSTs
+template <class A>
+class UnweightedCompactor {
+ public:
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+ typedef pair< pair<Label, Label>, StateId > Element;
+
+ Element Compact(StateId s, const A &arc) const {
+ return make_pair(make_pair(arc.ilabel, arc.olabel), arc.nextstate);
+ }
+
+ Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
+ return Arc(p.first.first, p.first.second, Weight::One(), p.second);
+ }
+
+ ssize_t Size() const { return -1; }
+
+ uint64 Properties() const {
+ return kUnweighted;
+ }
+
+ bool Compatible(const Fst<A> &fst) const {
+ uint64 props = Properties();
+ return fst.Properties(props, true) == props;
+ }
+
+ static const string &Type() {
+ static const string type = "unweighted";
+ return type;
+ }
+
+ bool Write(ostream &strm) const { return true; }
+
+ static UnweightedCompactor *Read(istream &strm) {
+ return new UnweightedCompactor;
+ }
+};
+
+
+// Uselful aliases when using StdArc
+typedef CompactFst< StdArc, StringCompactor<StdArc> >
+StdCompactStringFst;
+typedef CompactFst< StdArc, WeightedStringCompactor<StdArc> >
+StdCompactWeightedStringFst;
+typedef CompactFst<StdArc, AcceptorCompactor<StdArc> >
+StdCompactAcceptorFst;
+typedef CompactFst<StdArc, UnweightedCompactor<StdArc> >
+StdCompactUnweightedFst;
+typedef CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> >
+StdCompactUnweightedAcceptorFst;
+
+} // namespace fst
+
+#endif // FST_LIB_COMPACT_FST_H__
diff --git a/src/include/fst/compat.h b/src/include/fst/compat.h
new file mode 100644
index 0000000..034b57e
--- /dev/null
+++ b/src/include/fst/compat.h
@@ -0,0 +1,152 @@
+// compat.h
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Google compatibility declarations and inline definitions.
+
+#ifndef FST_LIB_COMPAT_H__
+#define FST_LIB_COMPAT_H__
+
+#include <dlfcn.h>
+
+#include <climits>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <string>
+#include <vector>
+
+// Makes copy constructor and operator= private
+#define DISALLOW_COPY_AND_ASSIGN(type) \
+ type(const type&); \
+ void operator=(const type&)
+
+#include <fst/config.h>
+#include <fst/types.h>
+#include <fst/lock.h>
+#include <fst/flags.h>
+#include <fst/log.h>
+
+#ifdef HAVE_ICU
+#include <fst/icu.h>
+#endif
+
+using std::cin;
+using std::cout;
+using std::cerr;
+using std::endl;
+using std::string;
+
+void FailedNewHandler();
+
+namespace fst {
+
+using namespace std;
+
+void SplitToVector(char *line, const char *delim,
+ std::vector<char *> *vec, bool omit_empty_strings);
+
+// Downcasting
+template<typename To, typename From>
+inline To down_cast(From* f) {
+ return static_cast<To>(f);
+}
+
+// Bitcasting
+template <class Dest, class Source>
+inline Dest bit_cast(const Source& source) {
+ // Compile time assertion: sizeof(Dest) == sizeof(Source)
+ // A compile error here means your Dest and Source have different sizes.
+ typedef char VerifySizesAreEqual [sizeof(Dest) == sizeof(Source) ? 1 :
+ -1];
+ Dest dest;
+ memcpy(&dest, &source, sizeof(dest));
+ return dest;
+}
+
+// Check sums
+class CheckSummer {
+ public:
+ CheckSummer() : count_(0) {
+ check_sum_.resize(kCheckSumLength, '\0');
+ }
+
+ void Reset() {
+ count_ = 0;
+ for (int i = 0; i < kCheckSumLength; ++i)
+ check_sum_[0] = '\0';
+ }
+
+ void Update(void const *data, int size) {
+ const char *p = reinterpret_cast<const char *>(data);
+ for (int i = 0; i < size; ++i)
+ check_sum_[(count_++) % kCheckSumLength] ^= p[i];
+ }
+
+ void Update(string const &data) {
+ for (int i = 0; i < data.size(); ++i)
+ check_sum_[(count_++) % kCheckSumLength] ^= data[i];
+ }
+
+ string Digest() {
+ return check_sum_;
+ }
+
+ private:
+ static const int kCheckSumLength = 32;
+ int count_;
+ string check_sum_;
+
+ DISALLOW_COPY_AND_ASSIGN(CheckSummer);
+};
+
+// Define the UTF8 string conversion function to throw an error
+// when the ICU Library is missing or disabled.
+#ifndef HAVE_ICU
+
+template <class Label>
+bool UTF8StringToLabels(const string&, std::vector<Label>*) {
+ LOG(ERROR) << "UTF8StringToLabels: ICU Library required for UTF8 handling";
+ return false;
+}
+
+template <class Label>
+bool LabelsToUTF8String(const std::vector<Label>&, string*) {
+ LOG(ERROR) << "LabelsToUTF8String: ICU Library required for UTF8 handling";
+ return false;
+}
+
+#endif // HAVE_ICU
+
+} // namespace fst
+
+
+// Define missing hash functions if needed
+#ifndef HAVE_STD__TR1__HASH_LONG_LONG_UNSIGNED_
+namespace std {
+namespace tr1 {
+
+template <class T> class hash;
+
+template<> struct hash<uint64> {
+ size_t operator()(uint64 x) const { return x; }
+};
+
+}
+}
+#endif // HAVE_STD__TR1__HASH_LONG_LONG_UNSIGNED_
+
+#endif // FST_LIB_COMPAT_H__
diff --git a/src/include/fst/complement.h b/src/include/fst/complement.h
new file mode 100644
index 0000000..dacf396
--- /dev/null
+++ b/src/include/fst/complement.h
@@ -0,0 +1,338 @@
+// complement.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to complement an Fst.
+
+#ifndef FST_LIB_COMPLEMENT_H__
+#define FST_LIB_COMPLEMENT_H__
+
+#include <algorithm>
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/fst.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+template <class A> class ComplementFst;
+
+// Implementation of delayed ComplementFst. The algorithm used
+// completes the (deterministic) FSA and then exchanges final and
+// non-final states. Completion, i.e. ensuring that all labels can be
+// read from every state, is accomplished by using RHO labels, which
+// match all labels that are otherwise not found leaving a state. The
+// first state in the output is reserved to be a new state that is the
+// destination of all RHO labels. Each remaining output state s
+// corresponds to input state s - 1. The first arc in the output at
+// these states is the rho label, the remaining arcs correspond to the
+// input arcs.
+template <class A>
+class ComplementFstImpl : public FstImpl<A> {
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+
+ friend class StateIterator< ComplementFst<A> >;
+ friend class ArcIterator< ComplementFst<A> >;
+
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ explicit ComplementFstImpl(const Fst<A> &fst) : fst_(fst.Copy()) {
+ SetType("complement");
+ uint64 props = fst.Properties(kILabelSorted, false);
+ SetProperties(ComplementProperties(props), kCopyProperties);
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ }
+
+ ComplementFstImpl(const ComplementFstImpl<A> &impl)
+ : fst_(impl.fst_->Copy()) {
+ SetType("complement");
+ SetProperties(impl.Properties(), kCopyProperties);
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ ~ComplementFstImpl() { delete fst_; }
+
+ StateId Start() const {
+ if (Properties(kError))
+ return kNoStateId;
+
+ StateId start = fst_->Start();
+ if (start != kNoStateId)
+ return start + 1;
+ else
+ return 0;
+ }
+
+ // Exchange final and non-final states; make rho destination state final.
+ Weight Final(StateId s) const {
+ if (s == 0 || fst_->Final(s - 1) == Weight::Zero())
+ return Weight::One();
+ else
+ return Weight::Zero();
+ }
+
+ size_t NumArcs(StateId s) const {
+ if (s == 0)
+ return 1;
+ else
+ return fst_->NumArcs(s - 1) + 1;
+ }
+
+ size_t NumInputEpsilons(StateId s) const {
+ return s == 0 ? 0 : fst_->NumInputEpsilons(s - 1);
+ }
+
+ size_t NumOutputEpsilons(StateId s) const {
+ return s == 0 ? 0 : fst_->NumOutputEpsilons(s - 1);
+ }
+
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) && fst_->Properties(kError, false))
+ SetProperties(kError, kError);
+ return FstImpl<Arc>::Properties(mask);
+ }
+
+
+ private:
+ const Fst<A> *fst_;
+
+ void operator=(const ComplementFstImpl<A> &fst); // Disallow
+};
+
+
+// Complements an automaton. This is a library-internal operation that
+// introduces a (negative) 'rho' label; use Difference/DifferenceFst in
+// user code, which will not see this label. This version is a delayed Fst.
+//
+// This class attaches interface to implementation and handles
+// reference counting, delegating most methods to ImplToFst.
+template <class A>
+class ComplementFst : public ImplToFst< ComplementFstImpl<A> > {
+ public:
+ friend class StateIterator< ComplementFst<A> >;
+ friend class ArcIterator< ComplementFst<A> >;
+
+ using ImplToFst< ComplementFstImpl<A> >::GetImpl;
+
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef ComplementFstImpl<A> Impl;
+
+ explicit ComplementFst(const Fst<A> &fst)
+ : ImplToFst<Impl>(new Impl(fst)) {
+ uint64 props = kUnweighted | kNoEpsilons | kIDeterministic | kAcceptor;
+ if (fst.Properties(props, true) != props) {
+ FSTERROR() << "ComplementFst: argument not an unweighted "
+ << "epsilon-free deterministic acceptor";
+ GetImpl()->SetProperties(kError, kError);
+ }
+ }
+
+ // See Fst<>::Copy() for doc.
+ ComplementFst(const ComplementFst<A> &fst, bool safe = false)
+ : ImplToFst<Impl>(fst, safe) {}
+
+ // Get a copy of this ComplementFst. See Fst<>::Copy() for further doc.
+ virtual ComplementFst<A> *Copy(bool safe = false) const {
+ return new ComplementFst<A>(*this, safe);
+ }
+
+ virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
+
+ virtual inline void InitArcIterator(StateId s,
+ ArcIteratorData<A> *data) const;
+
+ // Label that represents the rho transition.
+ // We use a negative value, which is thus private to the library and
+ // which will preserve FST label sort order.
+ static const Label kRhoLabel = -2;
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const ComplementFst<A> &fst); // disallow
+};
+
+template <class A> const typename A::Label ComplementFst<A>::kRhoLabel;
+
+
+// Specialization for ComplementFst.
+template <class A>
+class StateIterator< ComplementFst<A> > : public StateIteratorBase<A> {
+ public:
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+
+ explicit StateIterator(const ComplementFst<A> &fst)
+ : siter_(*fst.GetImpl()->fst_), s_(0) {
+ }
+
+ bool Done() const { return s_ > 0 && siter_.Done(); }
+
+ StateId Value() const { return s_; }
+
+ void Next() {
+ if (s_ != 0)
+ siter_.Next();
+ ++s_;
+ }
+
+ void Reset() {
+ siter_.Reset();
+ s_ = 0;
+ }
+
+ private:
+ // This allows base class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual bool Done_() const { return Done(); }
+ virtual StateId Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+ virtual void Reset_() { Reset(); }
+
+ StateIterator< Fst<A> > siter_;
+ StateId s_;
+
+ DISALLOW_COPY_AND_ASSIGN(StateIterator);
+};
+
+
+// Specialization for ComplementFst.
+template <class A>
+class ArcIterator< ComplementFst<A> > : public ArcIteratorBase<A> {
+ public:
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+
+ ArcIterator(const ComplementFst<A> &fst, StateId s)
+ : aiter_(0), s_(s), pos_(0) {
+ if (s_ != 0)
+ aiter_ = new ArcIterator< Fst<A> >(*fst.GetImpl()->fst_, s - 1);
+ }
+
+ virtual ~ArcIterator() { delete aiter_; }
+
+ bool Done() const {
+ if (s_ != 0)
+ return pos_ > 0 && aiter_->Done();
+ else
+ return pos_ > 0;
+ }
+
+ // Adds the rho label to the rho destination state.
+ const A& Value() const {
+ if (pos_ == 0) {
+ arc_.ilabel = arc_.olabel = ComplementFst<A>::kRhoLabel;
+ arc_.weight = Weight::One();
+ arc_.nextstate = 0;
+ } else {
+ arc_ = aiter_->Value();
+ ++arc_.nextstate;
+ }
+ return arc_;
+ }
+
+ void Next() {
+ if (s_ != 0 && pos_ > 0)
+ aiter_->Next();
+ ++pos_;
+ }
+
+ size_t Position() const {
+ return pos_;
+ }
+
+ void Reset() {
+ if (s_ != 0)
+ aiter_->Reset();
+ pos_ = 0;
+ }
+
+ void Seek(size_t a) {
+ if (s_ != 0) {
+ if (a == 0) {
+ aiter_->Reset();
+ } else {
+ aiter_->Seek(a - 1);
+ }
+ }
+ pos_ = a;
+ }
+
+ uint32 Flags() const {
+ return kArcValueFlags;
+ }
+
+ void SetFlags(uint32 f, uint32 m) {}
+
+ private:
+ // This allows base class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual bool Done_() const { return Done(); }
+ virtual const A& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+ virtual size_t Position_() const { return Position(); }
+ virtual void Reset_() { Reset(); }
+ virtual void Seek_(size_t a) { Seek(a); }
+ uint32 Flags_() const { return Flags(); }
+ void SetFlags_(uint32 f, uint32 m) { SetFlags(f, m); }
+
+ ArcIterator< Fst<A> > *aiter_;
+ StateId s_;
+ size_t pos_;
+ mutable A arc_;
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+
+template <class A> inline void
+ComplementFst<A>::InitStateIterator(StateIteratorData<A> *data) const {
+ data->base = new StateIterator< ComplementFst<A> >(*this);
+}
+
+template <class A> inline void
+ComplementFst<A>::InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ data->base = new ArcIterator< ComplementFst<A> >(*this, s);
+}
+
+
+// Useful alias when using StdArc.
+typedef ComplementFst<StdArc> StdComplementFst;
+
+} // namespace fst
+
+#endif // FST_LIB_COMPLEMENT_H__
diff --git a/src/include/fst/compose-filter.h b/src/include/fst/compose-filter.h
new file mode 100644
index 0000000..6bf7736
--- /dev/null
+++ b/src/include/fst/compose-filter.h
@@ -0,0 +1,542 @@
+// compose-filter.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Classes for filtering the composition matches, e.g. for correct epsilon
+// handling.
+
+#ifndef FST_LIB_COMPOSE_FILTER_H__
+#define FST_LIB_COMPOSE_FILTER_H__
+
+#include <fst/fst.h>
+#include <fst/fst-decl.h> // For optional argument declarations
+#include <fst/matcher.h>
+
+
+namespace fst {
+
+
+// COMPOSITION FILTER STATE - this represents the state of
+// the composition filter. It has the form:
+//
+// class FilterState {
+// public:
+// // Required constructors
+// FilterState();
+// FilterState(const FilterState &f);
+// // An invalid filter state.
+// static const FilterState NoState();
+// // Maps state to integer for hashing.
+// size_t Hash() const;
+// // Equality of filter states.
+// bool operator==(const FilterState &f) const;
+// // Inequality of filter states.
+// bool operator!=(const FilterState &f) const;
+// // Assignment to filter states.
+// FilterState& operator=(const FilterState& f);
+// };
+
+
+// Filter state that is a signed integral type.
+template <typename T>
+class IntegerFilterState {
+ public:
+ IntegerFilterState() : state_(kNoStateId) {}
+ explicit IntegerFilterState(T s) : state_(s) {}
+
+ static const IntegerFilterState NoState() { return IntegerFilterState(); }
+
+ size_t Hash() const { return static_cast<size_t>(state_); }
+
+ bool operator==(const IntegerFilterState &f) const {
+ return state_ == f.state_;
+ }
+
+ bool operator!=(const IntegerFilterState &f) const {
+ return state_ != f.state_;
+ }
+
+ T GetState() const { return state_; }
+
+ void SetState(T state) { state_ = state; }
+
+private:
+ T state_;
+};
+
+typedef IntegerFilterState<signed char> CharFilterState;
+typedef IntegerFilterState<short> ShortFilterState;
+typedef IntegerFilterState<int> IntFilterState;
+
+
+// Filter state that is a weight (class).
+template <class W>
+class WeightFilterState {
+ public:
+ WeightFilterState() : weight_(W::Zero()) {}
+ explicit WeightFilterState(W w) : weight_(w) {}
+
+ static const WeightFilterState NoState() { return WeightFilterState(); }
+
+ size_t Hash() const { return weight_.Hash(); }
+
+ bool operator==(const WeightFilterState &f) const {
+ return weight_ == f.weight_;
+ }
+
+ bool operator!=(const WeightFilterState &f) const {
+ return weight_ != f.weight_;
+ }
+
+ W GetWeight() const { return weight_; }
+
+ void SetWeight(W w) { weight_ = w; }
+
+private:
+ W weight_;
+};
+
+
+// Filter state that is the combination of two filter states.
+template <class F1, class F2>
+class PairFilterState {
+ public:
+ PairFilterState() : f1_(F1::NoState()), f2_(F2::NoState()) {}
+
+ PairFilterState(const F1 &f1, const F2 &f2) : f1_(f1), f2_(f2) {}
+
+ static const PairFilterState NoState() { return PairFilterState(); }
+
+ size_t Hash() const {
+ size_t h1 = f1_.Hash();
+ size_t h2 = f2_.Hash();
+ const int lshift = 5;
+ const int rshift = CHAR_BIT * sizeof(size_t) - 5;
+ return h1 << lshift ^ h1 >> rshift ^ h2;
+ }
+
+ bool operator==(const PairFilterState &f) const {
+ return f1_ == f.f1_ && f2_ == f.f2_;
+ }
+
+ bool operator!=(const PairFilterState &f) const {
+ return f1_ != f.f1_ || f2_ != f.f2_;
+ }
+
+ const F1 &GetState1() const { return f1_; }
+ const F2 &GetState2() const { return f2_; }
+
+ void SetState(const F1 &f1, const F2 &f2) {
+ f1_ = f1;
+ f2_ = f2;
+ }
+
+private:
+ F1 f1_;
+ F2 f2_;
+};
+
+
+// COMPOSITION FILTERS - these determine which matches are allowed to
+// proceed. The filter's state is represented by the type
+// ComposeFilter::FilterState. The basic filters handle correct
+// epsilon matching. Their interface is:
+//
+// template <class M1, class M2>
+// class ComposeFilter {
+// public:
+// typedef typename M1::FST1 FST1;
+// typedef typename M1::FST2 FST2;
+// typedef typename FST1::Arc Arc;
+// typedef ... FilterState;
+// typedef ... Matcher1;
+// typedef ... Matcher2;
+//
+// // Required constructors.
+// ComposeFilter(const FST1 &fst1, const FST2 &fst2,
+// // M1 *matcher1 = 0, M2 *matcher2 = 0);
+// // If safe=true, the copy is thread-safe. See Fst<>::Copy()
+// // for further doc.
+// ComposeFilter(const ComposeFilter<M1, M2> &filter,
+// // bool safe = false);
+// // Return start state of filter.
+// FilterState Start() const;
+// // Specifies current composition state.
+// void SetState(StateId s1, StateId s2, const FilterState &f);
+//
+// // Apply filter at current composition state to these transitions.
+// // If an arc label to be matched is kNolabel, then that side
+// // does not consume a symbol. Returns the new filter state or,
+// // if disallowed, FilterState::NoState(). The filter is permitted to
+// // modify its inputs, e.g. for optimizations.
+// FilterState FilterArc(Arc *arc1, Arc *arc2) const;
+
+// // Apply filter at current composition state to these final weights
+// // (cf. superfinal transitions). The filter may modify its inputs,
+// // e.g. for optimizations.
+// void FilterFinal(Weight *final1, Weight *final2) const;
+//
+// // Return resp matchers. Ownership stays with filter. These
+// // methods allow the filter to access and possibly modify
+// // the composition matchers (useful e.g. with lookahead).
+// Matcher1 *GetMatcher1();
+// Matcher2 *GetMatcher2();
+//
+// // This specifies how the filter affects the composition result
+// // properties. It takes as argument the properties that would
+// // apply with a trivial composition fitler.
+// uint64 Properties(uint64 props) const;
+// };
+
+// This filter requires epsilons on FST1 to be read before epsilons on FST2.
+template <class M1, class M2>
+class SequenceComposeFilter {
+ public:
+ typedef typename M1::FST FST1;
+ typedef typename M2::FST FST2;
+ typedef typename FST1::Arc Arc;
+ typedef CharFilterState FilterState;
+ typedef M1 Matcher1;
+ typedef M2 Matcher2;
+
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ SequenceComposeFilter(const FST1 &fst1, const FST2 &fst2,
+ M1 *matcher1 = 0, M2 *matcher2 = 0)
+ : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)),
+ matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)),
+ fst1_(matcher1_->GetFst()),
+ s1_(kNoStateId),
+ s2_(kNoStateId),
+ f_(kNoStateId) {}
+
+ SequenceComposeFilter(const SequenceComposeFilter<M1, M2> &filter,
+ bool safe = false)
+ : matcher1_(filter.matcher1_->Copy(safe)),
+ matcher2_(filter.matcher2_->Copy(safe)),
+ fst1_(matcher1_->GetFst()),
+ s1_(kNoStateId),
+ s2_(kNoStateId),
+ f_(kNoStateId) {}
+
+ ~SequenceComposeFilter() {
+ delete matcher1_;
+ delete matcher2_;
+ }
+
+ FilterState Start() const { return FilterState(0); }
+
+ void SetState(StateId s1, StateId s2, const FilterState &f) {
+ if (s1_ == s1 && s2_ == s2 && f == f_)
+ return;
+ s1_ = s1;
+ s2_ = s2;
+ f_ = f;
+ size_t na1 = internal::NumArcs(fst1_, s1);
+ size_t ne1 = internal::NumOutputEpsilons(fst1_, s1);
+ bool fin1 = internal::Final(fst1_, s1) != Weight::Zero();
+ alleps1_ = na1 == ne1 && !fin1;
+ noeps1_ = ne1 == 0;
+ }
+
+ FilterState FilterArc(Arc *arc1, Arc *arc2) const {
+ if (arc1->olabel == kNoLabel)
+ return alleps1_ ? FilterState::NoState() :
+ noeps1_ ? FilterState(0) : FilterState(1);
+ else if (arc2->ilabel == kNoLabel)
+ return f_ != FilterState(0) ? FilterState::NoState() : FilterState(0);
+ else
+ return arc1->olabel == 0 ? FilterState::NoState() : FilterState(0);
+ }
+
+ void FilterFinal(Weight *, Weight *) const {}
+
+ // Return resp matchers. Ownership stays with filter.
+ Matcher1 *GetMatcher1() { return matcher1_; }
+ Matcher2 *GetMatcher2() { return matcher2_; }
+
+ uint64 Properties(uint64 props) const { return props; }
+
+ private:
+ Matcher1 *matcher1_;
+ Matcher2 *matcher2_;
+ const FST1 &fst1_;
+ StateId s1_; // Current fst1_ state;
+ StateId s2_; // Current fst2_ state;
+ FilterState f_; // Current filter state
+ bool alleps1_; // Only epsilons (and non-final) leaving s1_?
+ bool noeps1_; // No epsilons leaving s1_?
+
+ void operator=(const SequenceComposeFilter<M1, M2> &); // disallow
+};
+
+
+// This filter requires epsilons on FST2 to be read before epsilons on FST1.
+template <class M1, class M2>
+class AltSequenceComposeFilter {
+ public:
+ typedef typename M1::FST FST1;
+ typedef typename M2::FST FST2;
+ typedef typename FST1::Arc Arc;
+ typedef CharFilterState FilterState;
+ typedef M1 Matcher1;
+ typedef M2 Matcher2;
+
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ AltSequenceComposeFilter(const FST1 &fst1, const FST2 &fst2,
+ M1 *matcher1 = 0, M2 *matcher2 = 0)
+ : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)),
+ matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)),
+ fst2_(matcher2_->GetFst()),
+ s1_(kNoStateId),
+ s2_(kNoStateId),
+ f_(kNoStateId) {}
+
+ AltSequenceComposeFilter(const AltSequenceComposeFilter<M1, M2> &filter,
+ bool safe = false)
+ : matcher1_(filter.matcher1_->Copy(safe)),
+ matcher2_(filter.matcher2_->Copy(safe)),
+ fst2_(matcher2_->GetFst()),
+ s1_(kNoStateId),
+ s2_(kNoStateId),
+ f_(kNoStateId) {}
+
+ ~AltSequenceComposeFilter() {
+ delete matcher1_;
+ delete matcher2_;
+ }
+
+ FilterState Start() const { return FilterState(0); }
+
+ void SetState(StateId s1, StateId s2, const FilterState &f) {
+ if (s1_ == s1 && s2_ == s2 && f == f_)
+ return;
+ s1_ = s1;
+ s2_ = s2;
+ f_ = f;
+ size_t na2 = internal::NumArcs(fst2_, s2);
+ size_t ne2 = internal::NumInputEpsilons(fst2_, s2);
+ bool fin2 = internal::Final(fst2_, s2) != Weight::Zero();
+ alleps2_ = na2 == ne2 && !fin2;
+ noeps2_ = ne2 == 0;
+ }
+
+ FilterState FilterArc(Arc *arc1, Arc *arc2) const {
+ if (arc2->ilabel == kNoLabel)
+ return alleps2_ ? FilterState::NoState() :
+ noeps2_ ? FilterState(0) : FilterState(1);
+ else if (arc1->olabel == kNoLabel)
+ return f_ == FilterState(1) ? FilterState::NoState() : FilterState(0);
+ else
+ return arc1->olabel == 0 ? FilterState::NoState() : FilterState(0);
+ }
+
+ void FilterFinal(Weight *, Weight *) const {}
+
+ // Return resp matchers. Ownership stays with filter.
+ Matcher1 *GetMatcher1() { return matcher1_; }
+ Matcher2 *GetMatcher2() { return matcher2_; }
+
+ uint64 Properties(uint64 props) const { return props; }
+
+ private:
+ Matcher1 *matcher1_;
+ Matcher2 *matcher2_;
+ const FST2 &fst2_;
+ StateId s1_; // Current fst1_ state;
+ StateId s2_; // Current fst2_ state;
+ FilterState f_; // Current filter state
+ bool alleps2_; // Only epsilons (and non-final) leaving s2_?
+ bool noeps2_; // No epsilons leaving s2_?
+
+void operator=(const AltSequenceComposeFilter<M1, M2> &); // disallow
+};
+
+
+// This filter requires epsilons on FST1 to be matched with epsilons on FST2
+// whenever possible.
+template <class M1, class M2>
+class MatchComposeFilter {
+ public:
+ typedef typename M1::FST FST1;
+ typedef typename M2::FST FST2;
+ typedef typename FST1::Arc Arc;
+ typedef CharFilterState FilterState;
+ typedef M1 Matcher1;
+ typedef M2 Matcher2;
+
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ MatchComposeFilter(const FST1 &fst1, const FST2 &fst2,
+ M1 *matcher1 = 0, M2 *matcher2 = 0)
+ : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)),
+ matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)),
+ fst1_(matcher1_->GetFst()),
+ fst2_(matcher2_->GetFst()),
+ s1_(kNoStateId),
+ s2_(kNoStateId),
+ f_(kNoStateId) {}
+
+ MatchComposeFilter(const MatchComposeFilter<M1, M2> &filter,
+ bool safe = false)
+ : matcher1_(filter.matcher1_->Copy(safe)),
+ matcher2_(filter.matcher2_->Copy(safe)),
+ fst1_(matcher1_->GetFst()),
+ fst2_(matcher2_->GetFst()),
+ s1_(kNoStateId),
+ s2_(kNoStateId),
+ f_(kNoStateId) {}
+
+ ~MatchComposeFilter() {
+ delete matcher1_;
+ delete matcher2_;
+ }
+
+ FilterState Start() const { return FilterState(0); }
+
+ void SetState(StateId s1, StateId s2, const FilterState &f) {
+ if (s1_ == s1 && s2_ == s2 && f == f_)
+ return;
+ s1_ = s1;
+ s2_ = s2;
+ f_ = f;
+ size_t na1 = internal::NumArcs(fst1_, s1);
+ size_t ne1 = internal::NumOutputEpsilons(fst1_, s1);
+ bool f1 = internal::Final(fst1_, s1) != Weight::Zero();
+ alleps1_ = na1 == ne1 && !f1;
+ noeps1_ = ne1 == 0;
+ size_t na2 = internal::NumArcs(fst2_, s2);
+ size_t ne2 = internal::NumInputEpsilons(fst2_, s2);
+ bool f2 = internal::Final(fst2_, s2) != Weight::Zero();
+ alleps2_ = na2 == ne2 && !f2;
+ noeps2_ = ne2 == 0;
+ }
+
+ FilterState FilterArc(Arc *arc1, Arc *arc2) const {
+ if (arc2->ilabel == kNoLabel) // Epsilon on Fst1
+ return f_ == FilterState(0) ?
+ (noeps2_ ? FilterState(0) :
+ (alleps2_ ? FilterState::NoState(): FilterState(1))) :
+ (f_ == FilterState(1) ? FilterState(1) : FilterState::NoState());
+ else if (arc1->olabel == kNoLabel) // Epsilon on Fst2
+ return f_ == FilterState(0) ?
+ (noeps1_ ? FilterState(0) :
+ (alleps1_ ? FilterState::NoState() : FilterState(2))) :
+ (f_ == FilterState(2) ? FilterState(2) : FilterState::NoState());
+ else if (arc1->olabel == 0) // Epsilon on both
+ return f_ == FilterState(0) ? FilterState(0) : FilterState::NoState();
+ else // Both are non-epsilons
+ return FilterState(0);
+ }
+
+ void FilterFinal(Weight *, Weight *) const {}
+
+ // Return resp matchers. Ownership stays with filter.
+ Matcher1 *GetMatcher1() { return matcher1_; }
+ Matcher2 *GetMatcher2() { return matcher2_; }
+
+ uint64 Properties(uint64 props) const { return props; }
+
+ private:
+ Matcher1 *matcher1_;
+ Matcher2 *matcher2_;
+ const FST1 &fst1_;
+ const FST2 &fst2_;
+ StateId s1_; // Current fst1_ state;
+ StateId s2_; // Current fst2_ state;
+ FilterState f_; // Current filter state ID
+ bool alleps1_, alleps2_; // Only epsilons (and non-final) leaving s1, s2?
+ bool noeps1_, noeps2_; // No epsilons leaving s1, s2?
+
+ void operator=(const MatchComposeFilter<M1, M2> &); // disallow
+};
+
+
+// This filter works with the MultiEpsMatcher to determine if
+// 'multi-epsilons' are preserved in the composition output
+// (rather than rewritten as 0) and ensures correct properties.
+template <class F>
+class MultiEpsFilter {
+ public:
+ typedef typename F::FST1 FST1;
+ typedef typename F::FST2 FST2;
+ typedef typename F::Arc Arc;
+ typedef typename F::Matcher1 Matcher1;
+ typedef typename F::Matcher2 Matcher2;
+ typedef typename F::FilterState FilterState;
+ typedef MultiEpsFilter<F> Filter;
+
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ MultiEpsFilter(const FST1 &fst1, const FST2 &fst2,
+ Matcher1 *matcher1 = 0, Matcher2 *matcher2 = 0,
+ bool keep_multi_eps = false)
+ : filter_(fst1, fst2, matcher1, matcher2),
+ keep_multi_eps_(keep_multi_eps) {}
+
+ MultiEpsFilter(const Filter &filter, bool safe = false)
+ : filter_(filter.filter_, safe),
+ keep_multi_eps_(filter.keep_multi_eps_) {}
+
+ FilterState Start() const { return filter_.Start(); }
+
+ void SetState(StateId s1, StateId s2, const FilterState &f) {
+ return filter_.SetState(s1, s2, f);
+ }
+
+ FilterState FilterArc(Arc *arc1, Arc *arc2) const {
+ FilterState f = filter_.FilterArc(arc1, arc2);
+ if (keep_multi_eps_) {
+ if (arc1->olabel == kNoLabel)
+ arc1->ilabel = arc2->ilabel;
+ if (arc2->ilabel == kNoLabel)
+ arc2->olabel = arc1->olabel;
+ }
+ return f;
+ }
+
+ void FilterFinal(Weight *w1, Weight *w2) const {
+ return filter_.FilterFinal(w1, w2);
+ }
+
+ // Return resp matchers. Ownership stays with filter.
+ Matcher1 *GetMatcher1() { return filter_.GetMatcher1(); }
+ Matcher2 *GetMatcher2() { return filter_.GetMatcher2(); }
+
+ uint64 Properties(uint64 iprops) const {
+ uint64 oprops = filter_.Properties(iprops);
+ return oprops & kILabelInvariantProperties & kOLabelInvariantProperties;
+ }
+
+ private:
+ F filter_;
+ bool keep_multi_eps_;
+};
+
+} // namespace fst
+
+
+#endif // FST_LIB_COMPOSE_FILTER_H__
diff --git a/src/include/fst/compose.h b/src/include/fst/compose.h
new file mode 100644
index 0000000..c0bf4b1
--- /dev/null
+++ b/src/include/fst/compose.h
@@ -0,0 +1,673 @@
+// compose.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to compute the composition of two FSTs
+
+#ifndef FST_LIB_COMPOSE_H__
+#define FST_LIB_COMPOSE_H__
+
+#include <algorithm>
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+#include <fst/compose-filter.h>
+#include <fst/lookahead-filter.h>
+#include <fst/matcher.h>
+#include <fst/state-table.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+// Delayed composition options templated on the arc type, the matcher,
+// the composition filter, and the composition state table. By
+// default, the matchers, filter, and state table are constructed by
+// composition. If set below, the user can instead pass in these
+// objects; in that case, ComposeFst takes their ownership. This
+// version controls composition implemented between generic Fst<Arc>
+// types and a shared matcher type M for Fst<Arc>. This should be
+// adequate for most applications, giving a reasonable tradeoff
+// between efficiency and code sharing (but see ComposeFstImplOptions).
+template <class A,
+ class M = Matcher<Fst<A> >,
+ class F = SequenceComposeFilter<M>,
+ class T = GenericComposeStateTable<A, typename F::FilterState> >
+struct ComposeFstOptions : public CacheOptions {
+ M *matcher1; // FST1 matcher (see matcher.h)
+ M *matcher2; // FST2 matcher
+ F *filter; // Composition filter (see compose-filter.h)
+ T *state_table; // Composition state table (see compose-state-table.h)
+
+ explicit ComposeFstOptions(const CacheOptions &opts,
+ M *mat1 = 0, M *mat2 = 0,
+ F *filt = 0, T *sttable= 0)
+ : CacheOptions(opts), matcher1(mat1), matcher2(mat2),
+ filter(filt), state_table(sttable) {}
+
+ ComposeFstOptions() : matcher1(0), matcher2(0), filter(0), state_table(0) {}
+};
+
+
+// Delayed composition options templated on the two matcher types, the
+// composition filter, and the composition state table. By default,
+// the matchers, filter, and state table are constructed by
+// composition. If set below, the user can instead pass in these
+// objects; in that case, ComposeFst takes their ownership. This
+// version controls composition implemented using arbitrary matchers
+// (of the same Arc type but otherwise arbitrary Fst type). The user
+// must ensure the matchers are compatible. These options permit the
+// most efficient use, but shares the least code. This is for advanced
+// use only in the most demanding or specialized applications that can
+// benefit from it (o.w. prefer ComposeFstOptions).
+template <class M1, class M2,
+ class F = SequenceComposeFilter<M1, M2>,
+ class T = GenericComposeStateTable<typename M1::Arc,
+ typename F::FilterState> >
+struct ComposeFstImplOptions : public CacheOptions {
+ M1 *matcher1; // FST1 matcher (see matcher.h)
+ M2 *matcher2; // FST2 matcher
+ F *filter; // Composition filter (see compose-filter.h)
+ T *state_table; // Composition state table (see compose-state-table.h)
+
+ explicit ComposeFstImplOptions(const CacheOptions &opts,
+ M1 *mat1 = 0, M2 *mat2 = 0,
+ F *filt = 0, T *sttable= 0)
+ : CacheOptions(opts), matcher1(mat1), matcher2(mat2),
+ filter(filt), state_table(sttable) {}
+
+ ComposeFstImplOptions()
+ : matcher1(0), matcher2(0), filter(0), state_table(0) {}
+};
+
+
+// Implementation of delayed composition. This base class is
+// common to the variants with different matchers, composition filters
+// and state tables.
+template <class A>
+class ComposeFstImplBase : public CacheImpl<A> {
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::Properties;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+
+ using CacheBaseImpl< CacheState<A> >::HasStart;
+ using CacheBaseImpl< CacheState<A> >::HasFinal;
+ using CacheBaseImpl< CacheState<A> >::HasArcs;
+ using CacheBaseImpl< CacheState<A> >::SetFinal;
+ using CacheBaseImpl< CacheState<A> >::SetStart;
+
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+
+ ComposeFstImplBase(const Fst<A> &fst1, const Fst<A> &fst2,
+ const CacheOptions &opts)
+ :CacheImpl<A>(opts) {
+ VLOG(2) << "ComposeFst(" << this << "): Begin";
+ SetType("compose");
+
+ if (!CompatSymbols(fst2.InputSymbols(), fst1.OutputSymbols())) {
+ FSTERROR() << "ComposeFst: output symbol table of 1st argument "
+ << "does not match input symbol table of 2nd argument";
+ SetProperties(kError, kError);
+ }
+
+ SetInputSymbols(fst1.InputSymbols());
+ SetOutputSymbols(fst2.OutputSymbols());
+ }
+
+ ComposeFstImplBase(const ComposeFstImplBase<A> &impl)
+ : CacheImpl<A>(impl) {
+ SetProperties(impl.Properties(), kCopyProperties);
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ virtual ComposeFstImplBase<A> *Copy() = 0;
+
+ virtual ~ComposeFstImplBase() {}
+
+ StateId Start() {
+ if (!HasStart()) {
+ StateId start = ComputeStart();
+ if (start != kNoStateId) {
+ SetStart(start);
+ }
+ }
+ return CacheImpl<A>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ Weight final = ComputeFinal(s);
+ SetFinal(s, final);
+ }
+ return CacheImpl<A>::Final(s);
+ }
+
+ virtual void Expand(StateId s) = 0;
+
+ size_t NumArcs(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumArcs(s);
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumOutputEpsilons(s);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
+ if (!HasArcs(s))
+ Expand(s);
+ CacheImpl<A>::InitArcIterator(s, data);
+ }
+
+ protected:
+ virtual StateId ComputeStart() = 0;
+ virtual Weight ComputeFinal(StateId s) = 0;
+};
+
+
+// Implementaion of delayed composition templated on the matchers (see
+// matcher.h), composition filter (see compose-filter-inl.h) and
+// the composition state table (see compose-state-table.h).
+template <class M1, class M2, class F, class T>
+class ComposeFstImpl : public ComposeFstImplBase<typename M1::Arc> {
+ typedef typename M1::FST FST1;
+ typedef typename M2::FST FST2;
+ typedef typename M1::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+ typedef typename F::FilterState FilterState;
+ typedef typename F::Matcher1 Matcher1;
+ typedef typename F::Matcher2 Matcher2;
+
+ using CacheBaseImpl<CacheState<Arc> >::SetArcs;
+ using FstImpl<Arc>::SetType;
+ using FstImpl<Arc>::SetProperties;
+
+ typedef ComposeStateTuple<StateId, FilterState> StateTuple;
+
+ public:
+ ComposeFstImpl(const FST1 &fst1, const FST2 &fst2,
+ const ComposeFstImplOptions<M1, M2, F, T> &opts);
+
+ ComposeFstImpl(const ComposeFstImpl<M1, M2, F, T> &impl)
+ : ComposeFstImplBase<Arc>(impl),
+ filter_(new F(*impl.filter_, true)),
+ matcher1_(filter_->GetMatcher1()),
+ matcher2_(filter_->GetMatcher2()),
+ fst1_(matcher1_->GetFst()),
+ fst2_(matcher2_->GetFst()),
+ state_table_(new T(*impl.state_table_)),
+ match_type_(impl.match_type_) {}
+
+ ~ComposeFstImpl() {
+ VLOG(2) << "ComposeFst(" << this
+ << "): End: # of visited states: " << state_table_->Size();
+
+ delete filter_;
+ delete state_table_;
+ }
+
+ virtual ComposeFstImpl<M1, M2, F, T> *Copy() {
+ return new ComposeFstImpl<M1, M2, F, T>(*this);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) &&
+ (fst1_.Properties(kError, false) ||
+ fst2_.Properties(kError, false) ||
+ (matcher1_->Properties(0) & kError) ||
+ (matcher2_->Properties(0) & kError) |
+ (filter_->Properties(0) & kError) ||
+ state_table_->Error())) {
+ SetProperties(kError, kError);
+ }
+ return FstImpl<Arc>::Properties(mask);
+ }
+
+ // Arranges it so that the first arg to OrderedExpand is the Fst
+ // that will be matched on.
+ void Expand(StateId s) {
+ const StateTuple &tuple = state_table_->Tuple(s);
+ StateId s1 = tuple.state_id1;
+ StateId s2 = tuple.state_id2;
+ filter_->SetState(s1, s2, tuple.filter_state);
+ if (match_type_ == MATCH_OUTPUT ||
+ (match_type_ == MATCH_BOTH &&
+ internal::NumArcs(fst1_, s1) > internal::NumArcs(fst2_, s2)))
+ OrderedExpand(s, fst1_, s1, fst2_, s2, matcher1_, false);
+ else
+ OrderedExpand(s, fst2_, s2, fst1_, s1, matcher2_, true);
+ }
+
+ private:
+ // This does that actual matching of labels in the composition. The
+ // arguments are ordered so matching is called on state 'sa' of
+ // 'fsta' for each arc leaving state 'sb' of 'fstb'. The 'match_input' arg
+ // determines whether the input or output label of arcs at 'sb' is
+ // the one to match on.
+ template <class FST, class Matcher>
+ void OrderedExpand(StateId s, const Fst<Arc> &, StateId sa,
+ const FST &fstb, StateId sb,
+ Matcher *matchera, bool match_input) {
+ matchera->SetState(sa);
+
+ // First process non-consuming symbols (e.g., epsilons) on FSTA.
+ Arc loop(match_input ? 0 : kNoLabel, match_input ? kNoLabel : 0,
+ Weight::One(), sb);
+ MatchArc(s, matchera, loop, match_input);
+
+ // Then process matches on FSTB.
+ for (ArcIterator<FST> iterb(fstb, sb); !iterb.Done(); iterb.Next())
+ MatchArc(s, matchera, iterb.Value(), match_input);
+
+ SetArcs(s);
+ }
+
+ // Matches a single transition from 'fstb' against 'fata' at 's'.
+ template <class Matcher>
+ void MatchArc(StateId s, Matcher *matchera,
+ const Arc &arc, bool match_input) {
+ if (matchera->Find(match_input ? arc.olabel : arc.ilabel)) {
+ for (; !matchera->Done(); matchera->Next()) {
+ Arc arca = matchera->Value();
+ Arc arcb = arc;
+ if (match_input) {
+ const FilterState &f = filter_->FilterArc(&arcb, &arca);
+ if (f != FilterState::NoState())
+ AddArc(s, arcb, arca, f);
+ } else {
+ const FilterState &f = filter_->FilterArc(&arca, &arcb);
+ if (f != FilterState::NoState())
+ AddArc(s, arca, arcb, f);
+ }
+ }
+ }
+ }
+
+ // Add a matching transition at 's'.
+ void AddArc(StateId s, const Arc &arc1, const Arc &arc2,
+ const FilterState &f) {
+ StateTuple tuple(arc1.nextstate, arc2.nextstate, f);
+ Arc oarc(arc1.ilabel, arc2.olabel, Times(arc1.weight, arc2.weight),
+ state_table_->FindState(tuple));
+ CacheImpl<Arc>::PushArc(s, oarc);
+ }
+
+ StateId ComputeStart() {
+ StateId s1 = fst1_.Start();
+ if (s1 == kNoStateId)
+ return kNoStateId;
+
+ StateId s2 = fst2_.Start();
+ if (s2 == kNoStateId)
+ return kNoStateId;
+
+ const FilterState &f = filter_->Start();
+ StateTuple tuple(s1, s2, f);
+ return state_table_->FindState(tuple);
+ }
+
+ Weight ComputeFinal(StateId s) {
+ const StateTuple &tuple = state_table_->Tuple(s);
+ StateId s1 = tuple.state_id1;
+ Weight final1 = internal::Final(fst1_, s1);
+ if (final1 == Weight::Zero())
+ return final1;
+
+ StateId s2 = tuple.state_id2;
+ Weight final2 = internal::Final(fst2_, s2);
+ if (final2 == Weight::Zero())
+ return final2;
+
+ filter_->SetState(s1, s2, tuple.filter_state);
+ filter_->FilterFinal(&final1, &final2);
+ return Times(final1, final2);
+ }
+
+ F *filter_;
+ Matcher1 *matcher1_;
+ Matcher2 *matcher2_;
+ const FST1 &fst1_;
+ const FST2 &fst2_;
+ T *state_table_;
+
+ MatchType match_type_;
+
+ void operator=(const ComposeFstImpl<M1, M2, F, T> &); // disallow
+};
+
+template <class M1, class M2, class F, class T> inline
+ComposeFstImpl<M1, M2, F, T>::ComposeFstImpl(
+ const FST1 &fst1, const FST2 &fst2,
+ const ComposeFstImplOptions<M1, M2, F, T> &opts)
+ : ComposeFstImplBase<Arc>(fst1, fst2, opts),
+ filter_(opts.filter ? opts.filter :
+ new F(fst1, fst2, opts.matcher1, opts.matcher2)),
+ matcher1_(filter_->GetMatcher1()),
+ matcher2_(filter_->GetMatcher2()),
+ fst1_(matcher1_->GetFst()),
+ fst2_(matcher2_->GetFst()),
+ state_table_(opts.state_table ? opts.state_table :
+ new T(fst1_, fst2_)) {
+ MatchType type1 = matcher1_->Type(false);
+ MatchType type2 = matcher2_->Type(false);
+ if (type1 == MATCH_OUTPUT && type2 == MATCH_INPUT) {
+ match_type_ = MATCH_BOTH;
+ } else if (type1 == MATCH_OUTPUT) {
+ match_type_ = MATCH_OUTPUT;
+ } else if (type2 == MATCH_INPUT) {
+ match_type_ = MATCH_INPUT;
+ } else if (matcher1_->Type(true) == MATCH_OUTPUT) {
+ match_type_ = MATCH_OUTPUT;
+ } else if (matcher2_->Type(true) == MATCH_INPUT) {
+ match_type_ = MATCH_INPUT;
+ } else {
+ FSTERROR() << "ComposeFst: 1st argument cannot match on output labels "
+ << "and 2nd argument cannot match on input labels (sort?).";
+ SetProperties(kError, kError);
+ }
+ uint64 fprops1 = fst1.Properties(kFstProperties, false);
+ uint64 fprops2 = fst2.Properties(kFstProperties, false);
+ uint64 mprops1 = matcher1_->Properties(fprops1);
+ uint64 mprops2 = matcher2_->Properties(fprops2);
+ uint64 cprops = ComposeProperties(mprops1, mprops2);
+ SetProperties(filter_->Properties(cprops), kCopyProperties);
+ if (state_table_->Error()) SetProperties(kError, kError);
+ VLOG(2) << "ComposeFst(" << this << "): Initialized";
+}
+
+
+// Computes the composition of two transducers. This version is a
+// delayed Fst. If FST1 transduces string x to y with weight a and FST2
+// transduces y to z with weight b, then their composition transduces
+// string x to z with weight Times(x, z).
+//
+// The output labels of the first transducer or the input labels of
+// the second transducer must be sorted (with the default matcher).
+// The weights need to form a commutative semiring (valid for
+// TropicalWeight and LogWeight).
+//
+// Complexity:
+// Assuming the first FST is unsorted and the second is sorted:
+// - Time: O(v1 v2 d1 (log d2 + m2)),
+// - Space: O(v1 v2)
+// where vi = # of states visited, di = maximum out-degree, and mi the
+// maximum multiplicity of the states visited for the ith
+// FST. Constant time and space to visit an input state or arc is
+// assumed and exclusive of caching.
+//
+// Caveats:
+// - ComposeFst does not trim its output (since it is a delayed operation).
+// - The efficiency of composition can be strongly affected by several factors:
+// - the choice of which tnansducer is sorted - prefer sorting the FST
+// that has the greater average out-degree.
+// - the amount of non-determinism
+// - the presence and location of epsilon transitions - avoid epsilon
+// transitions on the output side of the first transducer or
+// the input side of the second transducer or prefer placing
+// them later in a path since they delay matching and can
+// introduce non-coaccessible states and transitions.
+//
+// This class attaches interface to implementation and handles
+// reference counting, delegating most methods to ImplToFst.
+template <class A>
+class ComposeFst : public ImplToFst< ComposeFstImplBase<A> > {
+ public:
+ friend class ArcIterator< ComposeFst<A> >;
+ friend class StateIterator< ComposeFst<A> >;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+ typedef ComposeFstImplBase<A> Impl;
+
+ using ImplToFst<Impl>::SetImpl;
+
+ // Compose specifying only caching options.
+ ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2,
+ const CacheOptions &opts = CacheOptions())
+ : ImplToFst<Impl>(CreateBase(fst1, fst2, opts)) {}
+
+ // Compose specifying one shared matcher type M. Requires input
+ // Fsts and matcher FST type (M::FST) be Fst<A>. Recommended for
+ // best code-sharing and matcher compatiblity.
+ template <class M, class F, class T>
+ ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2,
+ const ComposeFstOptions<A, M, F, T> &opts)
+ : ImplToFst<Impl>(CreateBase1(fst1, fst2, opts)) {}
+
+ // Compose specifying two matcher types M1 and M2. Requires input
+ // Fsts (of the same Arc type but o.w. arbitrary) match the
+ // corresponding matcher FST types (M1::FST, M2::FST). Recommended
+ // only for advanced use in demanding or specialized applications
+ // due to potential code bloat and matcher incompatibilities.
+ template <class M1, class M2, class F, class T>
+ ComposeFst(const typename M1::FST &fst1, const typename M2::FST &fst2,
+ const ComposeFstImplOptions<M1, M2, F, T> &opts)
+ : ImplToFst<Impl>(CreateBase2(fst1, fst2, opts)) {}
+
+ // See Fst<>::Copy() for doc.
+ ComposeFst(const ComposeFst<A> &fst, bool safe = false) {
+ if (safe)
+ SetImpl(fst.GetImpl()->Copy());
+ else
+ SetImpl(fst.GetImpl(), false);
+ }
+
+ // Get a copy of this ComposeFst. See Fst<>::Copy() for further doc.
+ virtual ComposeFst<A> *Copy(bool safe = false) const {
+ return new ComposeFst<A>(*this, safe);
+ }
+
+ virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ protected:
+ ComposeFst() {}
+
+ // Create compose implementation specifying two matcher types.
+ template <class M1, class M2, class F, class T>
+ static Impl *CreateBase2(
+ const typename M1::FST &fst1, const typename M2::FST &fst2,
+ const ComposeFstImplOptions<M1, M2, F, T> &opts) {
+ Impl *impl = new ComposeFstImpl<M1, M2, F, T>(fst1, fst2, opts);
+ if (!(Weight::Properties() & kCommutative)) {
+ int64 props1 = fst1.Properties(kUnweighted, true);
+ int64 props2 = fst2.Properties(kUnweighted, true);
+ if (!(props1 & kUnweighted) && !(props2 & kUnweighted)) {
+ FSTERROR() << "ComposeFst: Weights must be a commutative semiring: "
+ << Weight::Type();
+ impl->SetProperties(kError, kError);
+ }
+ }
+ return impl;
+ }
+
+ // Create compose implementation specifying one matcher type.
+ // Requires input Fsts and matcher FST type (M::FST) be Fst<A>
+ template <class M, class F, class T>
+ static Impl *CreateBase1(const Fst<A> &fst1, const Fst<A> &fst2,
+ const ComposeFstOptions<A, M, F, T> &opts) {
+ ComposeFstImplOptions<M, M, F, T> nopts(opts, opts.matcher1, opts.matcher2,
+ opts.filter, opts.state_table);
+ return CreateBase2(fst1, fst2, nopts);
+ }
+
+ // Create compose implementation specifying no matcher type.
+ static Impl *CreateBase(const Fst<A> &fst1, const Fst<A> &fst2,
+ const CacheOptions &opts) {
+ switch (LookAheadMatchType(fst1, fst2)) { // Check for lookahead matchers
+ default:
+ case MATCH_NONE: { // Default composition (no look-ahead)
+ ComposeFstOptions<Arc> nopts(opts);
+ return CreateBase1(fst1, fst2, nopts);
+ }
+ case MATCH_OUTPUT: { // Lookahead on fst1
+ typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::FstMatcher M;
+ typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::ComposeFilter F;
+ ComposeFstOptions<Arc, M, F> nopts(opts);
+ return CreateBase1(fst1, fst2, nopts);
+ }
+ case MATCH_INPUT: { // Lookahead on fst2
+ typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::FstMatcher M;
+ typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::ComposeFilter F;
+ ComposeFstOptions<Arc, M, F> nopts(opts);
+ return CreateBase1(fst1, fst2, nopts);
+ }
+ }
+ }
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const ComposeFst<A> &fst); // disallow
+};
+
+
+// Specialization for ComposeFst.
+template<class A>
+class StateIterator< ComposeFst<A> >
+ : public CacheStateIterator< ComposeFst<A> > {
+ public:
+ explicit StateIterator(const ComposeFst<A> &fst)
+ : CacheStateIterator< ComposeFst<A> >(fst, fst.GetImpl()) {}
+};
+
+
+// Specialization for ComposeFst.
+template <class A>
+class ArcIterator< ComposeFst<A> >
+ : public CacheArcIterator< ComposeFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const ComposeFst<A> &fst, StateId s)
+ : CacheArcIterator< ComposeFst<A> >(fst.GetImpl(), s) {
+ if (!fst.GetImpl()->HasArcs(s))
+ fst.GetImpl()->Expand(s);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+template <class A> inline
+void ComposeFst<A>::InitStateIterator(StateIteratorData<A> *data) const {
+ data->base = new StateIterator< ComposeFst<A> >(*this);
+}
+
+// Useful alias when using StdArc.
+typedef ComposeFst<StdArc> StdComposeFst;
+
+enum ComposeFilter { AUTO_FILTER, SEQUENCE_FILTER, ALT_SEQUENCE_FILTER,
+ MATCH_FILTER };
+
+struct ComposeOptions {
+ bool connect; // Connect output
+ ComposeFilter filter_type; // Which pre-defined filter to use
+
+ ComposeOptions(bool c, ComposeFilter ft = AUTO_FILTER)
+ : connect(c), filter_type(ft) {}
+ ComposeOptions() : connect(true), filter_type(AUTO_FILTER) {}
+};
+
+// Computes the composition of two transducers. This version writes
+// the composed FST into a MurableFst. If FST1 transduces string x to
+// y with weight a and FST2 transduces y to z with weight b, then
+// their composition transduces string x to z with weight
+// Times(x, z).
+//
+// The output labels of the first transducer or the input labels of
+// the second transducer must be sorted. The weights need to form a
+// commutative semiring (valid for TropicalWeight and LogWeight).
+//
+// Complexity:
+// Assuming the first FST is unsorted and the second is sorted:
+// - Time: O(V1 V2 D1 (log D2 + M2)),
+// - Space: O(V1 V2 D1 M2)
+// where Vi = # of states, Di = maximum out-degree, and Mi is
+// the maximum multiplicity for the ith FST.
+//
+// Caveats:
+// - Compose trims its output.
+// - The efficiency of composition can be strongly affected by several factors:
+// - the choice of which tnansducer is sorted - prefer sorting the FST
+// that has the greater average out-degree.
+// - the amount of non-determinism
+// - the presence and location of epsilon transitions - avoid epsilon
+// transitions on the output side of the first transducer or
+// the input side of the second transducer or prefer placing
+// them later in a path since they delay matching and can
+// introduce non-coaccessible states and transitions.
+template<class Arc>
+void Compose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
+ MutableFst<Arc> *ofst,
+ const ComposeOptions &opts = ComposeOptions()) {
+ typedef Matcher< Fst<Arc> > M;
+
+ if (opts.filter_type == AUTO_FILTER) {
+ CacheOptions nopts;
+ nopts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = ComposeFst<Arc>(ifst1, ifst2, nopts);
+ } else if (opts.filter_type == SEQUENCE_FILTER) {
+ ComposeFstOptions<Arc> copts;
+ copts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
+ } else if (opts.filter_type == ALT_SEQUENCE_FILTER) {
+ ComposeFstOptions<Arc, M, AltSequenceComposeFilter<M> > copts;
+ copts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
+ } else if (opts.filter_type == MATCH_FILTER) {
+ ComposeFstOptions<Arc, M, MatchComposeFilter<M> > copts;
+ copts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
+ }
+
+ if (opts.connect)
+ Connect(ofst);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_COMPOSE_H__
diff --git a/src/include/fst/concat.h b/src/include/fst/concat.h
new file mode 100644
index 0000000..8500d50
--- /dev/null
+++ b/src/include/fst/concat.h
@@ -0,0 +1,246 @@
+// concat.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Functions and classes to compute the concat of two FSTs.
+
+#ifndef FST_LIB_CONCAT_H__
+#define FST_LIB_CONCAT_H__
+
+#include <vector>
+using std::vector;
+#include <algorithm>
+
+#include <fst/mutable-fst.h>
+#include <fst/rational.h>
+
+
+namespace fst {
+
+// Computes the concatenation (product) of two FSTs. If FST1
+// transduces string x to y with weight a and FST2 transduces string w
+// to v with weight b, then their concatenation transduces string xw
+// to yv with Times(a, b).
+//
+// This version modifies its MutableFst argument (in first position).
+//
+// Complexity:
+// - Time: O(V1 + V2 + E2)
+// - Space: O(V1 + V2 + E2)
+// where Vi = # of states and Ei = # of arcs of the ith FST.
+//
+template<class Arc>
+void Concat(MutableFst<Arc> *fst1, const Fst<Arc> &fst2) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ // TODO(riley): restore when voice actions issues fixed
+ // Check that the symbol table are compatible
+ if (!CompatSymbols(fst1->InputSymbols(), fst2.InputSymbols()) ||
+ !CompatSymbols(fst1->OutputSymbols(), fst2.OutputSymbols())) {
+ LOG(ERROR) << "Concat: input/output symbol tables of 1st argument "
+ << "do not match input/output symbol tables of 2nd argument";
+ // fst1->SetProperties(kError, kError);
+ // return;
+ }
+
+ uint64 props1 = fst1->Properties(kFstProperties, false);
+ uint64 props2 = fst2.Properties(kFstProperties, false);
+
+ StateId start1 = fst1->Start();
+ if (start1 == kNoStateId) {
+ if (props2 & kError) fst1->SetProperties(kError, kError);
+ return;
+ }
+
+ StateId numstates1 = fst1->NumStates();
+ if (fst2.Properties(kExpanded, false))
+ fst1->ReserveStates(numstates1 + CountStates(fst2));
+
+ for (StateIterator< Fst<Arc> > siter2(fst2);
+ !siter2.Done();
+ siter2.Next()) {
+ StateId s1 = fst1->AddState();
+ StateId s2 = siter2.Value();
+ fst1->SetFinal(s1, fst2.Final(s2));
+ fst1->ReserveArcs(s1, fst2.NumArcs(s2));
+ for (ArcIterator< Fst<Arc> > aiter(fst2, s2);
+ !aiter.Done();
+ aiter.Next()) {
+ Arc arc = aiter.Value();
+ arc.nextstate += numstates1;
+ fst1->AddArc(s1, arc);
+ }
+ }
+
+ StateId start2 = fst2.Start();
+ for (StateId s1 = 0; s1 < numstates1; ++s1) {
+ Weight final = fst1->Final(s1);
+ if (final != Weight::Zero()) {
+ fst1->SetFinal(s1, Weight::Zero());
+ if (start2 != kNoStateId)
+ fst1->AddArc(s1, Arc(0, 0, final, start2 + numstates1));
+ }
+ }
+ if (start2 != kNoStateId)
+ fst1->SetProperties(ConcatProperties(props1, props2), kFstProperties);
+}
+
+// Computes the concatentation of two FSTs. This version modifies its
+// MutableFst argument (in second position).
+//
+// Complexity:
+// - Time: O(V1 + E1)
+// - Space: O(V1 + E1)
+// where Vi = # of states and Ei = # of arcs of the ith FST.
+//
+template<class Arc>
+void Concat(const Fst<Arc> &fst1, MutableFst<Arc> *fst2) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ // Check that the symbol table are compatible
+ if (!CompatSymbols(fst1.InputSymbols(), fst2->InputSymbols()) ||
+ !CompatSymbols(fst1.OutputSymbols(), fst2->OutputSymbols())) {
+ LOG(ERROR) << "Concat: input/output symbol tables of 1st argument "
+ << "do not match input/output symbol tables of 2nd argument";
+ // fst2->SetProperties(kError, kError);
+ // return;
+ }
+
+ uint64 props1 = fst1.Properties(kFstProperties, false);
+ uint64 props2 = fst2->Properties(kFstProperties, false);
+
+ StateId start2 = fst2->Start();
+ if (start2 == kNoStateId) {
+ if (props1 & kError) fst2->SetProperties(kError, kError);
+ return;
+ }
+
+ StateId numstates2 = fst2->NumStates();
+ if (fst1.Properties(kExpanded, false))
+ fst2->ReserveStates(numstates2 + CountStates(fst1));
+
+ for (StateIterator< Fst<Arc> > siter(fst1);
+ !siter.Done();
+ siter.Next()) {
+ StateId s1 = siter.Value();
+ StateId s2 = fst2->AddState();
+ Weight final = fst1.Final(s1);
+ fst2->ReserveArcs(s2, fst1.NumArcs(s1) + (final != Weight::Zero() ? 1 : 0));
+ if (final != Weight::Zero())
+ fst2->AddArc(s2, Arc(0, 0, final, start2));
+ for (ArcIterator< Fst<Arc> > aiter(fst1, s1);
+ !aiter.Done();
+ aiter.Next()) {
+ Arc arc = aiter.Value();
+ arc.nextstate += numstates2;
+ fst2->AddArc(s2, arc);
+ }
+ }
+ StateId start1 = fst1.Start();
+ fst2->SetStart(start1 == kNoStateId ? fst2->AddState() : start1 + numstates2);
+ if (start1 != kNoStateId)
+ fst2->SetProperties(ConcatProperties(props1, props2), kFstProperties);
+}
+
+
+// Computes the concatentation of two FSTs. This version modifies its
+// RationalFst input (in first position).
+template<class Arc>
+void Concat(RationalFst<Arc> *fst1, const Fst<Arc> &fst2) {
+ fst1->GetImpl()->AddConcat(fst2, true);
+}
+
+// Computes the concatentation of two FSTs. This version modifies its
+// RationalFst input (in second position).
+template<class Arc>
+void Concat(const Fst<Arc> &fst1, RationalFst<Arc> *fst2) {
+ fst2->GetImpl()->AddConcat(fst1, false);
+}
+
+typedef RationalFstOptions ConcatFstOptions;
+
+
+// Computes the concatenation (product) of two FSTs; this version is a
+// delayed Fst. If FST1 transduces string x to y with weight a and FST2
+// transduces string w to v with weight b, then their concatenation
+// transduces string xw to yv with Times(a, b).
+//
+// Complexity:
+// - Time: O(v1 + e1 + v2 + e2),
+// - Space: O(v1 + v2)
+// where vi = # of states visited and ei = # of arcs visited of the
+// ith FST. Constant time and space to visit an input state or arc is
+// assumed and exclusive of caching.
+template <class A>
+class ConcatFst : public RationalFst<A> {
+ public:
+ using ImplToFst< RationalFstImpl<A> >::GetImpl;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ ConcatFst(const Fst<A> &fst1, const Fst<A> &fst2) {
+ GetImpl()->InitConcat(fst1, fst2);
+ }
+
+ ConcatFst(const Fst<A> &fst1, const Fst<A> &fst2,
+ const ConcatFstOptions &opts) : RationalFst<A>(opts) {
+ GetImpl()->InitConcat(fst1, fst2);
+ }
+
+ // See Fst<>::Copy() for doc.
+ ConcatFst(const ConcatFst<A> &fst, bool safe = false)
+ : RationalFst<A>(fst, safe) {}
+
+ // Get a copy of this ConcatFst. See Fst<>::Copy() for further doc.
+ virtual ConcatFst<A> *Copy(bool safe = false) const {
+ return new ConcatFst<A>(*this, safe);
+ }
+};
+
+
+// Specialization for ConcatFst.
+template <class A>
+class StateIterator< ConcatFst<A> > : public StateIterator< RationalFst<A> > {
+ public:
+ explicit StateIterator(const ConcatFst<A> &fst)
+ : StateIterator< RationalFst<A> >(fst) {}
+};
+
+
+// Specialization for ConcatFst.
+template <class A>
+class ArcIterator< ConcatFst<A> > : public ArcIterator< RationalFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const ConcatFst<A> &fst, StateId s)
+ : ArcIterator< RationalFst<A> >(fst, s) {}
+};
+
+
+// Useful alias when using StdArc.
+typedef ConcatFst<StdArc> StdConcatFst;
+
+} // namespace fst
+
+#endif // FST_LIB_CONCAT_H__
diff --git a/src/include/fst/config.h b/src/include/fst/config.h
new file mode 100644
index 0000000..046b49c
--- /dev/null
+++ b/src/include/fst/config.h
@@ -0,0 +1,12 @@
+/* src/include/fst/config.h. Generated from config.h.in by configure. */
+// OpenFst config file
+
+/* Define to 1 if you have the ICU library. */
+/* #undef HAVE_ICU */
+
+/* Define to 1 if the system has the type `std::tr1::hash<long long
+ unsigned>'. */
+#define HAVE_STD__TR1__HASH_LONG_LONG_UNSIGNED_ 1
+
+/* Define to 1 if the system has the type `__gnu_cxx::slist<int>'. */
+#define HAVE___GNU_CXX__SLIST_INT_ 1
diff --git a/src/include/fst/config.h.in b/src/include/fst/config.h.in
new file mode 100644
index 0000000..7815dfc
--- /dev/null
+++ b/src/include/fst/config.h.in
@@ -0,0 +1,11 @@
+// OpenFst config file
+
+/* Define to 1 if you have the ICU library. */
+#undef HAVE_ICU
+
+/* Define to 1 if the system has the type `std::tr1::hash<long long
+ unsigned>'. */
+#define HAVE_STD__TR1__HASH_LONG_LONG_UNSIGNED_ 1
+
+/* Define to 1 if the system has the type `__gnu_cxx::slist<int>'. */
+#define HAVE___GNU_CXX__SLIST_INT_ 1
diff --git a/src/include/fst/connect.h b/src/include/fst/connect.h
new file mode 100644
index 0000000..427808c
--- /dev/null
+++ b/src/include/fst/connect.h
@@ -0,0 +1,319 @@
+// connect.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Classes and functions to remove unsuccessful paths from an Fst.
+
+#ifndef FST_LIB_CONNECT_H__
+#define FST_LIB_CONNECT_H__
+
+#include <vector>
+using std::vector;
+
+#include <fst/dfs-visit.h>
+#include <fst/union-find.h>
+#include <fst/mutable-fst.h>
+
+
+namespace fst {
+
+// Finds and returns connected components. Use with Visit().
+template <class A>
+class CcVisitor {
+ public:
+ typedef A Arc;
+ typedef typename Arc::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ // cc[i]: connected component number for state i.
+ CcVisitor(vector<StateId> *cc)
+ : comps_(new UnionFind<StateId>(0, kNoStateId)),
+ cc_(cc),
+ nstates_(0) { }
+
+ // comps: connected components equiv classes.
+ CcVisitor(UnionFind<StateId> *comps)
+ : comps_(comps),
+ cc_(0),
+ nstates_(0) { }
+
+ ~CcVisitor() {
+ if (cc_) // own comps_?
+ delete comps_;
+ }
+
+ void InitVisit(const Fst<A> &fst) { }
+
+ bool InitState(StateId s, StateId root) {
+ ++nstates_;
+ if (comps_->FindSet(s) == kNoStateId)
+ comps_->MakeSet(s);
+ return true;
+ }
+
+ bool WhiteArc(StateId s, const A &arc) {
+ comps_->MakeSet(arc.nextstate);
+ comps_->Union(s, arc.nextstate);
+ return true;
+ }
+
+ bool GreyArc(StateId s, const A &arc) {
+ comps_->Union(s, arc.nextstate);
+ return true;
+ }
+
+ bool BlackArc(StateId s, const A &arc) {
+ comps_->Union(s, arc.nextstate);
+ return true;
+ }
+
+ void FinishState(StateId s) { }
+
+ void FinishVisit() {
+ if (cc_)
+ GetCcVector(cc_);
+ }
+
+ // cc[i]: connected component number for state i.
+ // Returns number of components.
+ int GetCcVector(vector<StateId> *cc) {
+ cc->clear();
+ cc->resize(nstates_, kNoStateId);
+ StateId ncomp = 0;
+ for (StateId i = 0; i < nstates_; ++i) {
+ StateId rep = comps_->FindSet(i);
+ StateId &comp = (*cc)[rep];
+ if (comp == kNoStateId) {
+ comp = ncomp;
+ ++ncomp;
+ }
+ (*cc)[i] = comp;
+ }
+ return ncomp;
+ }
+
+ private:
+ UnionFind<StateId> *comps_; // Components
+ vector<StateId> *cc_; // State's cc number
+ StateId nstates_; // State count
+};
+
+
+// Finds and returns strongly-connected components, accessible and
+// coaccessible states and related properties. Uses Tarjan's single
+// DFS SCC algorithm (see Aho, et al, "Design and Analysis of Computer
+// Algorithms", 189pp). Use with DfsVisit();
+template <class A>
+class SccVisitor {
+ public:
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ // scc[i]: strongly-connected component number for state i.
+ // SCC numbers will be in topological order for acyclic input.
+ // access[i]: accessibility of state i.
+ // coaccess[i]: coaccessibility of state i.
+ // Any of above can be NULL.
+ // props: related property bits (cyclicity, initial cyclicity,
+ // accessibility, coaccessibility) set/cleared (o.w. unchanged).
+ SccVisitor(vector<StateId> *scc, vector<bool> *access,
+ vector<bool> *coaccess, uint64 *props)
+ : scc_(scc), access_(access), coaccess_(coaccess), props_(props) {}
+ SccVisitor(uint64 *props)
+ : scc_(0), access_(0), coaccess_(0), props_(props) {}
+
+ void InitVisit(const Fst<A> &fst);
+
+ bool InitState(StateId s, StateId root);
+
+ bool TreeArc(StateId s, const A &arc) { return true; }
+
+ bool BackArc(StateId s, const A &arc) {
+ StateId t = arc.nextstate;
+ if ((*dfnumber_)[t] < (*lowlink_)[s])
+ (*lowlink_)[s] = (*dfnumber_)[t];
+ if ((*coaccess_)[t])
+ (*coaccess_)[s] = true;
+ *props_ |= kCyclic;
+ *props_ &= ~kAcyclic;
+ if (arc.nextstate == start_) {
+ *props_ |= kInitialCyclic;
+ *props_ &= ~kInitialAcyclic;
+ }
+ return true;
+ }
+
+ bool ForwardOrCrossArc(StateId s, const A &arc) {
+ StateId t = arc.nextstate;
+ if ((*dfnumber_)[t] < (*dfnumber_)[s] /* cross edge */ &&
+ (*onstack_)[t] && (*dfnumber_)[t] < (*lowlink_)[s])
+ (*lowlink_)[s] = (*dfnumber_)[t];
+ if ((*coaccess_)[t])
+ (*coaccess_)[s] = true;
+ return true;
+ }
+
+ void FinishState(StateId s, StateId p, const A *);
+
+ void FinishVisit() {
+ // Numbers SCC's in topological order when acyclic.
+ if (scc_)
+ for (StateId i = 0; i < scc_->size(); ++i)
+ (*scc_)[i] = nscc_ - 1 - (*scc_)[i];
+ if (coaccess_internal_)
+ delete coaccess_;
+ delete dfnumber_;
+ delete lowlink_;
+ delete onstack_;
+ delete scc_stack_;
+ }
+
+ private:
+ vector<StateId> *scc_; // State's scc number
+ vector<bool> *access_; // State's accessibility
+ vector<bool> *coaccess_; // State's coaccessibility
+ uint64 *props_;
+ const Fst<A> *fst_;
+ StateId start_;
+ StateId nstates_; // State count
+ StateId nscc_; // SCC count
+ bool coaccess_internal_;
+ vector<StateId> *dfnumber_; // state discovery times
+ vector<StateId> *lowlink_; // lowlink[s] == dfnumber[s] => SCC root
+ vector<bool> *onstack_; // is a state on the SCC stack
+ vector<StateId> *scc_stack_; // SCC stack (w/ random access)
+};
+
+template <class A> inline
+void SccVisitor<A>::InitVisit(const Fst<A> &fst) {
+ if (scc_)
+ scc_->clear();
+ if (access_)
+ access_->clear();
+ if (coaccess_) {
+ coaccess_->clear();
+ coaccess_internal_ = false;
+ } else {
+ coaccess_ = new vector<bool>;
+ coaccess_internal_ = true;
+ }
+ *props_ |= kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible;
+ *props_ &= ~(kCyclic | kInitialCyclic | kNotAccessible | kNotCoAccessible);
+ fst_ = &fst;
+ start_ = fst.Start();
+ nstates_ = 0;
+ nscc_ = 0;
+ dfnumber_ = new vector<StateId>;
+ lowlink_ = new vector<StateId>;
+ onstack_ = new vector<bool>;
+ scc_stack_ = new vector<StateId>;
+}
+
+template <class A> inline
+bool SccVisitor<A>::InitState(StateId s, StateId root) {
+ scc_stack_->push_back(s);
+ while (dfnumber_->size() <= s) {
+ if (scc_)
+ scc_->push_back(-1);
+ if (access_)
+ access_->push_back(false);
+ coaccess_->push_back(false);
+ dfnumber_->push_back(-1);
+ lowlink_->push_back(-1);
+ onstack_->push_back(false);
+ }
+ (*dfnumber_)[s] = nstates_;
+ (*lowlink_)[s] = nstates_;
+ (*onstack_)[s] = true;
+ if (root == start_) {
+ if (access_)
+ (*access_)[s] = true;
+ } else {
+ if (access_)
+ (*access_)[s] = false;
+ *props_ |= kNotAccessible;
+ *props_ &= ~kAccessible;
+ }
+ ++nstates_;
+ return true;
+}
+
+template <class A> inline
+void SccVisitor<A>::FinishState(StateId s, StateId p, const A *) {
+ if (fst_->Final(s) != Weight::Zero())
+ (*coaccess_)[s] = true;
+ if ((*dfnumber_)[s] == (*lowlink_)[s]) { // root of new SCC
+ bool scc_coaccess = false;
+ size_t i = scc_stack_->size();
+ StateId t;
+ do {
+ t = (*scc_stack_)[--i];
+ if ((*coaccess_)[t])
+ scc_coaccess = true;
+ } while (s != t);
+ do {
+ t = scc_stack_->back();
+ if (scc_)
+ (*scc_)[t] = nscc_;
+ if (scc_coaccess)
+ (*coaccess_)[t] = true;
+ (*onstack_)[t] = false;
+ scc_stack_->pop_back();
+ } while (s != t);
+ if (!scc_coaccess) {
+ *props_ |= kNotCoAccessible;
+ *props_ &= ~kCoAccessible;
+ }
+ ++nscc_;
+ }
+ if (p != kNoStateId) {
+ if ((*coaccess_)[s])
+ (*coaccess_)[p] = true;
+ if ((*lowlink_)[s] < (*lowlink_)[p])
+ (*lowlink_)[p] = (*lowlink_)[s];
+ }
+}
+
+
+// Trims an FST, removing states and arcs that are not on successful
+// paths. This version modifies its input.
+//
+// Complexity:
+// - Time: O(V + E)
+// - Space: O(V + E)
+// where V = # of states and E = # of arcs.
+template<class Arc>
+void Connect(MutableFst<Arc> *fst) {
+ typedef typename Arc::StateId StateId;
+
+ vector<bool> access;
+ vector<bool> coaccess;
+ uint64 props = 0;
+ SccVisitor<Arc> scc_visitor(0, &access, &coaccess, &props);
+ DfsVisit(*fst, &scc_visitor);
+ vector<StateId> dstates;
+ for (StateId s = 0; s < access.size(); ++s)
+ if (!access[s] || !coaccess[s])
+ dstates.push_back(s);
+ fst->DeleteStates(dstates);
+ fst->SetProperties(kAccessible | kCoAccessible, kAccessible | kCoAccessible);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_CONNECT_H__
diff --git a/src/include/fst/const-fst.h b/src/include/fst/const-fst.h
new file mode 100644
index 0000000..f68e8ed
--- /dev/null
+++ b/src/include/fst/const-fst.h
@@ -0,0 +1,483 @@
+// const-fst.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Simple concrete immutable FST whose states and arcs are each stored
+// in single arrays.
+
+#ifndef FST_LIB_CONST_FST_H__
+#define FST_LIB_CONST_FST_H__
+
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/expanded-fst.h>
+#include <fst/fst-decl.h> // For optional argument declarations
+#include <fst/test-properties.h>
+#include <fst/util.h>
+
+
+namespace fst {
+
+template <class A, class U> class ConstFst;
+template <class F, class G> void Cast(const F &, G *);
+
+// States and arcs each implemented by single arrays, templated on the
+// Arc definition. The unsigned type U is used to represent indices into
+// the arc array.
+template <class A, class U>
+class ConstFstImpl : public FstImpl<A> {
+ public:
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::Properties;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef U Unsigned;
+
+ ConstFstImpl()
+ : states_(0), arcs_(0), nstates_(0), narcs_(0), start_(kNoStateId) {
+ string type = "const";
+ if (sizeof(U) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(8 * sizeof(U), &size);
+ type += size;
+ }
+ SetType(type);
+ SetProperties(kNullProperties | kStaticProperties);
+ }
+
+ explicit ConstFstImpl(const Fst<A> &fst);
+
+ ~ConstFstImpl() {
+ delete[] states_;
+ delete[] arcs_;
+ }
+
+ StateId Start() const { return start_; }
+
+ Weight Final(StateId s) const { return states_[s].final; }
+
+ StateId NumStates() const { return nstates_; }
+
+ size_t NumArcs(StateId s) const { return states_[s].narcs; }
+
+ size_t NumInputEpsilons(StateId s) const { return states_[s].niepsilons; }
+
+ size_t NumOutputEpsilons(StateId s) const { return states_[s].noepsilons; }
+
+ static ConstFstImpl<A, U> *Read(istream &strm, const FstReadOptions &opts);
+
+ bool Write(ostream &strm, const FstWriteOptions &opts) const;
+
+ A *Arcs(StateId s) { return arcs_ + states_[s].pos; }
+
+ // Provide information needed for generic state iterator
+ void InitStateIterator(StateIteratorData<A> *data) const {
+ data->base = 0;
+ data->nstates = nstates_;
+ }
+
+ // Provide information needed for the generic arc iterator
+ void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ data->base = 0;
+ data->arcs = arcs_ + states_[s].pos;
+ data->narcs = states_[s].narcs;
+ data->ref_count = 0;
+ }
+
+ private:
+ friend class ConstFst<A, U>; // Allow finding narcs_, nstates_ during Write
+
+ // States implemented by array *states_ below, arcs by (single) *arcs_.
+ struct State {
+ Weight final; // Final weight
+ Unsigned pos; // Start of state's arcs in *arcs_
+ Unsigned narcs; // Number of arcs (per state)
+ Unsigned niepsilons; // # of input epsilons
+ Unsigned noepsilons; // # of output epsilons
+ State() : final(Weight::Zero()), niepsilons(0), noepsilons(0) {}
+ };
+
+ // Properties always true of this Fst class
+ static const uint64 kStaticProperties = kExpanded;
+ // Current unaligned file format version. The unaligned version was added and
+ // made the default since the aligned version does not work on pipes.
+ static const int kFileVersion = 2;
+ // Current aligned file format version
+ static const int kAlignedFileVersion = 1;
+ // Minimum file format version supported
+ static const int kMinFileVersion = 1;
+ // Byte alignment for states and arcs in file format (version 1 only)
+ static const int kFileAlign = 16;
+
+ State *states_; // States represenation
+ A *arcs_; // Arcs representation
+ StateId nstates_; // Number of states
+ size_t narcs_; // Number of arcs (per FST)
+ StateId start_; // Initial state
+
+ DISALLOW_COPY_AND_ASSIGN(ConstFstImpl);
+};
+
+template <class A, class U>
+const uint64 ConstFstImpl<A, U>::kStaticProperties;
+template <class A, class U>
+const int ConstFstImpl<A, U>::kFileVersion;
+template <class A, class U>
+const int ConstFstImpl<A, U>::kAlignedFileVersion;
+template <class A, class U>
+const int ConstFstImpl<A, U>::kMinFileVersion;
+template <class A, class U>
+const int ConstFstImpl<A, U>::kFileAlign;
+
+
+template<class A, class U>
+ConstFstImpl<A, U>::ConstFstImpl(const Fst<A> &fst) : nstates_(0), narcs_(0) {
+ string type = "const";
+ if (sizeof(U) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(sizeof(U) * 8, &size);
+ type += size;
+ }
+ SetType(type);
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ start_ = fst.Start();
+
+ // Count # of states and arcs.
+ for (StateIterator< Fst<A> > siter(fst);
+ !siter.Done();
+ siter.Next()) {
+ ++nstates_;
+ StateId s = siter.Value();
+ for (ArcIterator< Fst<A> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next())
+ ++narcs_;
+ }
+ states_ = new State[nstates_];
+ arcs_ = new A[narcs_];
+ size_t pos = 0;
+ for (StateId s = 0; s < nstates_; ++s) {
+ states_[s].final = fst.Final(s);
+ states_[s].pos = pos;
+ states_[s].narcs = 0;
+ states_[s].niepsilons = 0;
+ states_[s].noepsilons = 0;
+ for (ArcIterator< Fst<A> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const A &arc = aiter.Value();
+ ++states_[s].narcs;
+ if (arc.ilabel == 0)
+ ++states_[s].niepsilons;
+ if (arc.olabel == 0)
+ ++states_[s].noepsilons;
+ arcs_[pos++] = arc;
+ }
+ }
+ SetProperties(fst.Properties(kCopyProperties, true) | kStaticProperties);
+}
+
+
+template<class A, class U>
+ConstFstImpl<A, U> *ConstFstImpl<A, U>::Read(istream &strm,
+ const FstReadOptions &opts) {
+ ConstFstImpl<A, U> *impl = new ConstFstImpl<A, U>;
+ FstHeader hdr;
+ if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
+ delete impl;
+ return 0;
+ }
+ impl->start_ = hdr.Start();
+ impl->nstates_ = hdr.NumStates();
+ impl->narcs_ = hdr.NumArcs();
+ impl->states_ = new State[impl->nstates_];
+ impl->arcs_ = new A[impl->narcs_];
+
+ // Ensures compatibility
+ if (hdr.Version() == kAlignedFileVersion)
+ hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED);
+
+ if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) &&
+ !AlignInput(strm, kFileAlign)) {
+ LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source;
+ delete impl;
+ return 0;
+ }
+ size_t b = impl->nstates_ * sizeof(typename ConstFstImpl<A, U>::State);
+ strm.read(reinterpret_cast<char *>(impl->states_), b);
+ if (!strm) {
+ LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source;
+ delete impl;
+ return 0;
+ }
+ if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) &&
+ !AlignInput(strm, kFileAlign)) {
+ LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source;
+ delete impl;
+ return 0;
+ }
+ b = impl->narcs_ * sizeof(A);
+ strm.read(reinterpret_cast<char *>(impl->arcs_), b);
+ if (!strm) {
+ LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source;
+ delete impl;
+ return 0;
+ }
+ return impl;
+}
+
+// Simple concrete immutable FST. This class attaches interface to
+// implementation and handles reference counting, delegating most
+// methods to ImplToExpandedFst. The unsigned type U is used to
+// represent indices into the arc array (uint32 by default, declared
+// in fst-decl.h).
+template <class A, class U>
+class ConstFst : public ImplToExpandedFst< ConstFstImpl<A, U> > {
+ public:
+ friend class StateIterator< ConstFst<A, U> >;
+ friend class ArcIterator< ConstFst<A, U> >;
+ template <class F, class G> void friend Cast(const F &, G *);
+
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef ConstFstImpl<A, U> Impl;
+ typedef U Unsigned;
+
+ ConstFst() : ImplToExpandedFst<Impl>(new Impl()) {}
+
+ explicit ConstFst(const Fst<A> &fst)
+ : ImplToExpandedFst<Impl>(new Impl(fst)) {}
+
+ ConstFst(const ConstFst<A, U> &fst) : ImplToExpandedFst<Impl>(fst) {}
+
+ // Get a copy of this ConstFst. See Fst<>::Copy() for further doc.
+ virtual ConstFst<A, U> *Copy(bool safe = false) const {
+ return new ConstFst<A, U>(*this);
+ }
+
+ // Read a ConstFst from an input stream; return NULL on error
+ static ConstFst<A, U> *Read(istream &strm, const FstReadOptions &opts) {
+ Impl* impl = Impl::Read(strm, opts);
+ return impl ? new ConstFst<A, U>(impl) : 0;
+ }
+
+ // Read a ConstFst from a file; return NULL on error
+ // Empty filename reads from standard input
+ static ConstFst<A, U> *Read(const string &filename) {
+ Impl* impl = ImplToExpandedFst<Impl>::Read(filename);
+ return impl ? new ConstFst<A, U>(impl) : 0;
+ }
+
+ virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ return WriteFst(*this, strm, opts);
+ }
+
+ virtual bool Write(const string &filename) const {
+ return Fst<A>::WriteFile(filename);
+ }
+
+ template <class F>
+ static bool WriteFst(const F &fst, ostream &strm,
+ const FstWriteOptions &opts);
+
+ virtual void InitStateIterator(StateIteratorData<Arc> *data) const {
+ GetImpl()->InitStateIterator(data);
+ }
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ private:
+ explicit ConstFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {}
+
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); }
+
+ void SetImpl(Impl *impl, bool own_impl = true) {
+ ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl);
+ }
+
+ void operator=(const ConstFst<A, U> &fst); // disallow
+};
+
+// Writes Fst in Const format, potentially with a pass over the machine
+// before writing to compute number of states and arcs.
+//
+template <class A, class U>
+template <class F>
+bool ConstFst<A, U>::WriteFst(const F &fst, ostream &strm,
+ const FstWriteOptions &opts) {
+ static const int kFileVersion = 2;
+ static const int kAlignedFileVersion = 1;
+ static const int kFileAlign = 16;
+ int file_version = opts.align ? kAlignedFileVersion : kFileVersion;
+ size_t num_arcs = -1, num_states = -1;
+ size_t start_offset = 0;
+ bool update_header = true;
+ if (fst.Type() == ConstFst<A, U>().Type()) {
+ const ConstFst<A, U> *const_fst = static_cast<const ConstFst<A, U> *>(&fst);
+ num_arcs = const_fst->GetImpl()->narcs_;
+ num_states = const_fst->GetImpl()->nstates_;
+ update_header = false;
+ } else if ((start_offset = strm.tellp()) == -1) {
+ // precompute values needed for header when we cannot seek to rewrite it.
+ for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
+ num_arcs += fst.NumArcs(siter.Value());
+ num_states++;
+ }
+ update_header = false;
+ }
+ FstHeader hdr;
+ hdr.SetStart(fst.Start());
+ hdr.SetNumStates(num_states);
+ hdr.SetNumArcs(num_arcs);
+ string type = "const";
+ if (sizeof(U) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(8 * sizeof(U), &size);
+ type += size;
+ }
+ FstImpl<A>::WriteFstHeader(fst, strm, opts, file_version, type, &hdr);
+ if (opts.align && !AlignOutput(strm, kFileAlign)) {
+ LOG(ERROR) << "Could not align file during write after header";
+ return false;
+ }
+ size_t pos = 0, states = 0;
+ typename ConstFstImpl<A, U>::State state;
+ for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
+ state.final = fst.Final(siter.Value());
+ state.pos = pos;
+ state.narcs = fst.NumArcs(siter.Value());
+ state.niepsilons = fst.NumInputEpsilons(siter.Value());
+ state.noepsilons = fst.NumOutputEpsilons(siter.Value());
+ strm.write(reinterpret_cast<const char *>(&state), sizeof(state));
+ pos += state.narcs;
+ states++;
+ }
+ hdr.SetNumStates(states);
+ hdr.SetNumArcs(pos);
+ if (opts.align && !AlignOutput(strm, kFileAlign)) {
+ LOG(ERROR) << "Could not align file during write after writing states";
+ }
+ for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
+ const A &arc = aiter.Value();
+ strm.write(reinterpret_cast<const char *>(&arc), sizeof(arc));
+ }
+ }
+ strm.flush();
+ if (!strm) {
+ LOG(ERROR) << "WriteAsVectorFst write failed: " << opts.source;
+ return false;
+ }
+ if (update_header) {
+ return FstImpl<A>::UpdateFstHeader(fst, strm, opts, file_version, type,
+ &hdr, start_offset);
+ } else {
+ if (hdr.NumStates() != num_states) {
+ LOG(ERROR) << "Inconsistent number of states observed during write";
+ return false;
+ }
+ if (hdr.NumArcs() != num_arcs) {
+ LOG(ERROR) << "Inconsistent number of arcs observed during write";
+ return false;
+ }
+ }
+ return true;
+}
+
+// Specialization for ConstFst; see generic version in fst.h
+// for sample usage (but use the ConstFst type!). This version
+// should inline.
+template <class A, class U>
+class StateIterator< ConstFst<A, U> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ explicit StateIterator(const ConstFst<A, U> &fst)
+ : nstates_(fst.GetImpl()->NumStates()), s_(0) {}
+
+ bool Done() const { return s_ >= nstates_; }
+
+ StateId Value() const { return s_; }
+
+ void Next() { ++s_; }
+
+ void Reset() { s_ = 0; }
+
+ private:
+ StateId nstates_;
+ StateId s_;
+
+ DISALLOW_COPY_AND_ASSIGN(StateIterator);
+};
+
+
+// Specialization for ConstFst; see generic version in fst.h
+// for sample usage (but use the ConstFst type!). This version
+// should inline.
+template <class A, class U>
+class ArcIterator< ConstFst<A, U> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const ConstFst<A, U> &fst, StateId s)
+ : arcs_(fst.GetImpl()->Arcs(s)),
+ narcs_(fst.GetImpl()->NumArcs(s)), i_(0) {}
+
+ bool Done() const { return i_ >= narcs_; }
+
+ const A& Value() const { return arcs_[i_]; }
+
+ void Next() { ++i_; }
+
+ size_t Position() const { return i_; }
+
+ void Reset() { i_ = 0; }
+
+ void Seek(size_t a) { i_ = a; }
+
+ uint32 Flags() const {
+ return kArcValueFlags;
+ }
+
+ void SetFlags(uint32 f, uint32 m) {}
+
+ private:
+ const A *arcs_;
+ size_t narcs_;
+ size_t i_;
+
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+// A useful alias when using StdArc.
+typedef ConstFst<StdArc> StdConstFst;
+
+} // namespace fst
+
+#endif // FST_LIB_CONST_FST_H__
diff --git a/src/include/fst/determinize.h b/src/include/fst/determinize.h
new file mode 100644
index 0000000..417142f
--- /dev/null
+++ b/src/include/fst/determinize.h
@@ -0,0 +1,887 @@
+// determinize.h
+
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Functions and classes to determinize an FST.
+
+#ifndef FST_LIB_DETERMINIZE_H__
+#define FST_LIB_DETERMINIZE_H__
+
+#include <algorithm>
+#include <climits>
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <map>
+#include <fst/slist.h>
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+#include <fst/factor-weight.h>
+#include <fst/arc-map.h>
+#include <fst/prune.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+//
+// COMMON DIVISORS - these are used in determinization to compute
+// the transition weights. In the simplest case, it is just the same
+// as the semiring Plus(). However, other choices permit more efficient
+// determinization when the output contains strings.
+//
+
+// The default common divisor uses the semiring Plus.
+template <class W>
+class DefaultCommonDivisor {
+ public:
+ typedef W Weight;
+
+ W operator()(const W &w1, const W &w2) const { return Plus(w1, w2); }
+};
+
+
+// The label common divisor for a (left) string semiring selects a
+// single letter common prefix or the empty string. This is used in
+// the determinization of output strings so that at most a single
+// letter will appear in the output of a transtion.
+template <typename L, StringType S>
+class LabelCommonDivisor {
+ public:
+ typedef StringWeight<L, S> Weight;
+
+ Weight operator()(const Weight &w1, const Weight &w2) const {
+ StringWeightIterator<L, S> iter1(w1);
+ StringWeightIterator<L, S> iter2(w2);
+
+ if (!(StringWeight<L, S>::Properties() & kLeftSemiring)) {
+ FSTERROR() << "LabelCommonDivisor: Weight needs to be left semiring";
+ return Weight::NoWeight();
+ } else if (w1.Size() == 0 || w2.Size() == 0) {
+ return Weight::One();
+ } else if (w1 == Weight::Zero()) {
+ return Weight(iter2.Value());
+ } else if (w2 == Weight::Zero()) {
+ return Weight(iter1.Value());
+ } else if (iter1.Value() == iter2.Value()) {
+ return Weight(iter1.Value());
+ } else {
+ return Weight::One();
+ }
+ }
+};
+
+
+// The gallic common divisor uses the label common divisor on the
+// string component and the template argument D common divisor on the
+// weight component, which defaults to the default common divisor.
+template <class L, class W, StringType S, class D = DefaultCommonDivisor<W> >
+class GallicCommonDivisor {
+ public:
+ typedef GallicWeight<L, W, S> Weight;
+
+ Weight operator()(const Weight &w1, const Weight &w2) const {
+ return Weight(label_common_divisor_(w1.Value1(), w2.Value1()),
+ weight_common_divisor_(w1.Value2(), w2.Value2()));
+ }
+
+ private:
+ LabelCommonDivisor<L, S> label_common_divisor_;
+ D weight_common_divisor_;
+};
+
+// Options for finite-state transducer determinization.
+template <class Arc>
+struct DeterminizeFstOptions : CacheOptions {
+ typedef typename Arc::Label Label;
+ float delta; // Quantization delta for subset weights
+ Label subsequential_label; // Label used for residual final output
+ // when producing subsequential transducers.
+
+ explicit DeterminizeFstOptions(const CacheOptions &opts,
+ float del = kDelta,
+ Label lab = 0)
+ : CacheOptions(opts), delta(del), subsequential_label(lab) {}
+
+ explicit DeterminizeFstOptions(float del = kDelta, Label lab = 0)
+ : delta(del), subsequential_label(lab) {}
+};
+
+
+// Implementation of delayed DeterminizeFst. This base class is
+// common to the variants that implement acceptor and transducer
+// determinization.
+template <class A>
+class DeterminizeFstImplBase : public CacheImpl<A> {
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::Properties;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+
+ using CacheBaseImpl< CacheState<A> >::HasStart;
+ using CacheBaseImpl< CacheState<A> >::HasFinal;
+ using CacheBaseImpl< CacheState<A> >::HasArcs;
+ using CacheBaseImpl< CacheState<A> >::SetFinal;
+ using CacheBaseImpl< CacheState<A> >::SetStart;
+
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+
+ DeterminizeFstImplBase(const Fst<A> &fst,
+ const DeterminizeFstOptions<A> &opts)
+ : CacheImpl<A>(opts), fst_(fst.Copy()) {
+ SetType("determinize");
+ uint64 props = fst.Properties(kFstProperties, false);
+ SetProperties(DeterminizeProperties(props,
+ opts.subsequential_label != 0),
+ kCopyProperties);
+
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ }
+
+ DeterminizeFstImplBase(const DeterminizeFstImplBase<A> &impl)
+ : CacheImpl<A>(impl),
+ fst_(impl.fst_->Copy(true)) {
+ SetType("determinize");
+ SetProperties(impl.Properties(), kCopyProperties);
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ virtual ~DeterminizeFstImplBase() { delete fst_; }
+
+ virtual DeterminizeFstImplBase<A> *Copy() = 0;
+
+ StateId Start() {
+ if (!HasStart()) {
+ StateId start = ComputeStart();
+ if (start != kNoStateId) {
+ SetStart(start);
+ }
+ }
+ return CacheImpl<A>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ Weight final = ComputeFinal(s);
+ SetFinal(s, final);
+ }
+ return CacheImpl<A>::Final(s);
+ }
+
+ virtual void Expand(StateId s) = 0;
+
+ size_t NumArcs(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumArcs(s);
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumOutputEpsilons(s);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
+ if (!HasArcs(s))
+ Expand(s);
+ CacheImpl<A>::InitArcIterator(s, data);
+ }
+
+ virtual StateId ComputeStart() = 0;
+
+ virtual Weight ComputeFinal(StateId s) = 0;
+
+ const Fst<A> &GetFst() const { return *fst_; }
+
+ private:
+ const Fst<A> *fst_; // Input Fst
+
+ void operator=(const DeterminizeFstImplBase<A> &); // disallow
+};
+
+
+// Implementation of delayed determinization for weighted acceptors.
+// It is templated on the arc type A and the common divisor D.
+template <class A, class D>
+class DeterminizeFsaImpl : public DeterminizeFstImplBase<A> {
+ public:
+ using FstImpl<A>::SetProperties;
+ using DeterminizeFstImplBase<A>::GetFst;
+ using DeterminizeFstImplBase<A>::SetArcs;
+
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ struct Element {
+ Element() {}
+
+ Element(StateId s, Weight w) : state_id(s), weight(w) {}
+
+ StateId state_id; // Input state Id
+ Weight weight; // Residual weight
+ };
+ typedef slist<Element> Subset;
+ typedef map<Label, Subset*> LabelMap;
+
+ DeterminizeFsaImpl(const Fst<A> &fst, D common_divisor,
+ const vector<Weight> *in_dist, vector<Weight> *out_dist,
+ const DeterminizeFstOptions<A> &opts)
+ : DeterminizeFstImplBase<A>(fst, opts),
+ delta_(opts.delta),
+ in_dist_(in_dist),
+ out_dist_(out_dist),
+ common_divisor_(common_divisor),
+ subset_hash_(0, SubsetKey(), SubsetEqual(&elements_)) {
+ if (!fst.Properties(kAcceptor, true)) {
+ FSTERROR() << "DeterminizeFst: argument not an acceptor";
+ SetProperties(kError, kError);
+ }
+ if (!(Weight::Properties() & kLeftSemiring)) {
+ FSTERROR() << "DeterminizeFst: Weight needs to be left distributive: "
+ << Weight::Type();
+ SetProperties(kError, kError);
+ }
+ if (out_dist_)
+ out_dist_->clear();
+ }
+
+ DeterminizeFsaImpl(const DeterminizeFsaImpl<A, D> &impl)
+ : DeterminizeFstImplBase<A>(impl),
+ delta_(impl.delta_),
+ in_dist_(0),
+ out_dist_(0),
+ common_divisor_(impl.common_divisor_),
+ subset_hash_(0, SubsetKey(), SubsetEqual(&elements_)) {
+ if (impl.out_dist_) {
+ FSTERROR() << "DeterminizeFsaImpl: cannot copy with out_dist vector";
+ SetProperties(kError, kError);
+ }
+ }
+
+ virtual ~DeterminizeFsaImpl() {
+ for (int i = 0; i < subsets_.size(); ++i)
+ delete subsets_[i];
+ }
+
+ virtual DeterminizeFsaImpl<A, D> *Copy() {
+ return new DeterminizeFsaImpl<A, D>(*this);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) && (GetFst().Properties(kError, false)))
+ SetProperties(kError, kError);
+ return FstImpl<A>::Properties(mask);
+ }
+
+ virtual StateId ComputeStart() {
+ StateId s = GetFst().Start();
+ if (s == kNoStateId)
+ return kNoStateId;
+ Element element(s, Weight::One());
+ Subset *subset = new Subset;
+ subset->push_front(element);
+ return FindState(subset);
+ }
+
+ virtual Weight ComputeFinal(StateId s) {
+ Subset *subset = subsets_[s];
+ Weight final = Weight::Zero();
+ for (typename Subset::iterator siter = subset->begin();
+ siter != subset->end();
+ ++siter) {
+ Element &element = *siter;
+ final = Plus(final, Times(element.weight,
+ GetFst().Final(element.state_id)));
+ if (!final.Member())
+ SetProperties(kError, kError);
+ }
+ return final;
+ }
+
+ // Finds the state corresponding to a subset. Only creates a new state
+ // if the subset is not found in the subset hash. FindState takes
+ // ownership of the subset argument (so that it doesn't have to copy it
+ // if it creates a new state).
+ //
+ // The method exploits the following device: all pairs stored in the
+ // associative container subset_hash_ are of the form (subset,
+ // id(subset) + 1), i.e. subset_hash_[subset] > 0 if subset has been
+ // stored previously. For unassigned subsets, the call to
+ // subset_hash_[subset] creates a new pair (subset, 0). As a result,
+ // subset_hash_[subset] == 0 iff subset is new.
+ StateId FindState(Subset *subset) {
+ StateId &assoc_value = subset_hash_[subset];
+ if (assoc_value == 0) { // subset wasn't present; create new state
+ StateId s = CreateState(subset);
+ assoc_value = s + 1;
+ return s;
+ } else {
+ delete subset;
+ return assoc_value - 1; // NB: assoc_value = ID + 1
+ }
+ }
+
+ StateId CreateState(Subset *subset) {
+ StateId s = subsets_.size();
+ subsets_.push_back(subset);
+ if (in_dist_)
+ out_dist_->push_back(ComputeDistance(subset));
+ return s;
+ }
+
+ // Compute distance from a state to the final states in the DFA
+ // given the distances in the NFA.
+ Weight ComputeDistance(const Subset *subset) {
+ Weight outd = Weight::Zero();
+ for (typename Subset::const_iterator siter = subset->begin();
+ siter != subset->end(); ++siter) {
+ const Element &element = *siter;
+ Weight ind = element.state_id < in_dist_->size() ?
+ (*in_dist_)[element.state_id] : Weight::Zero();
+ outd = Plus(outd, Times(element.weight, ind));
+ }
+ return outd;
+ }
+
+ // Computes the outgoing transitions from a state, creating new destination
+ // states as needed.
+ virtual void Expand(StateId s) {
+
+ LabelMap label_map;
+ LabelSubsets(s, &label_map);
+
+ for (typename LabelMap::iterator liter = label_map.begin();
+ liter != label_map.end();
+ ++liter)
+ AddArc(s, liter->first, liter->second);
+ SetArcs(s);
+ }
+
+ private:
+ // Constructs destination subsets per label. At return, subset
+ // element weights include the input automaton label weights and the
+ // subsets may contain duplicate states.
+ void LabelSubsets(StateId s, LabelMap *label_map) {
+ Subset *src_subset = subsets_[s];
+
+ for (typename Subset::iterator siter = src_subset->begin();
+ siter != src_subset->end();
+ ++siter) {
+ Element &src_element = *siter;
+ for (ArcIterator< Fst<A> > aiter(GetFst(), src_element.state_id);
+ !aiter.Done();
+ aiter.Next()) {
+ const A &arc = aiter.Value();
+ Element dest_element(arc.nextstate,
+ Times(src_element.weight, arc.weight));
+ Subset* &dest_subset = (*label_map)[arc.ilabel];
+ if (dest_subset == 0)
+ dest_subset = new Subset;
+ dest_subset->push_front(dest_element);
+ }
+ }
+ }
+
+ // Adds an arc from state S to the destination state associated
+ // with subset DEST_SUBSET (as created by LabelSubsets).
+ void AddArc(StateId s, Label label, Subset *dest_subset) {
+ A arc;
+ arc.ilabel = label;
+ arc.olabel = label;
+ arc.weight = Weight::Zero();
+
+ typename Subset::iterator oiter;
+ for (typename Subset::iterator diter = dest_subset->begin();
+ diter != dest_subset->end();) {
+ Element &dest_element = *diter;
+ // Computes label weight.
+ arc.weight = common_divisor_(arc.weight, dest_element.weight);
+
+ while (elements_.size() <= dest_element.state_id)
+ elements_.push_back(0);
+ Element *matching_element = elements_[dest_element.state_id];
+ if (matching_element) {
+ // Found duplicate state: sums state weight and deletes dup.
+ matching_element->weight = Plus(matching_element->weight,
+ dest_element.weight);
+ if (!matching_element->weight.Member())
+ SetProperties(kError, kError);
+ ++diter;
+ dest_subset->erase_after(oiter);
+ } else {
+ // Saves element so we can check for duplicate for this state.
+ elements_[dest_element.state_id] = &dest_element;
+ oiter = diter;
+ ++diter;
+ }
+ }
+
+ // Divides out label weight from destination subset elements.
+ // Quantizes to ensure comparisons are effective.
+ // Clears element vector.
+ for (typename Subset::iterator diter = dest_subset->begin();
+ diter != dest_subset->end();
+ ++diter) {
+ Element &dest_element = *diter;
+ dest_element.weight = Divide(dest_element.weight, arc.weight,
+ DIVIDE_LEFT);
+ dest_element.weight = dest_element.weight.Quantize(delta_);
+ elements_[dest_element.state_id] = 0;
+ }
+
+ arc.nextstate = FindState(dest_subset);
+ CacheImpl<A>::PushArc(s, arc);
+ }
+
+ // Comparison object for hashing Subset(s). Subsets are not sorted in this
+ // implementation, so ordering must not be assumed in the equivalence
+ // test.
+ class SubsetEqual {
+ public:
+ // Constructor takes vector needed to check equality. See immediately
+ // below for constraints on it.
+ explicit SubsetEqual(vector<Element *> *elements)
+ : elements_(elements) {}
+
+ // At each call to operator(), the elements_ vector should contain
+ // only NULLs. When this operator returns, elements_ will still
+ // have this property.
+ bool operator()(Subset* subset1, Subset* subset2) const {
+ if (subset1->size() != subset2->size())
+ return false;
+
+ // Loads first subset elements in element vector.
+ for (typename Subset::iterator iter1 = subset1->begin();
+ iter1 != subset1->end();
+ ++iter1) {
+ Element &element1 = *iter1;
+ while (elements_->size() <= element1.state_id)
+ elements_->push_back(0);
+ (*elements_)[element1.state_id] = &element1;
+ }
+
+ // Checks second subset matches first via element vector.
+ for (typename Subset::iterator iter2 = subset2->begin();
+ iter2 != subset2->end();
+ ++iter2) {
+ Element &element2 = *iter2;
+ while (elements_->size() <= element2.state_id)
+ elements_->push_back(0);
+ Element *element1 = (*elements_)[element2.state_id];
+ if (!element1 || element1->weight != element2.weight) {
+ // Mismatch found. Resets element vector before returning false.
+ for (typename Subset::iterator iter1 = subset1->begin();
+ iter1 != subset1->end();
+ ++iter1)
+ (*elements_)[iter1->state_id] = 0;
+ return false;
+ } else {
+ (*elements_)[element2.state_id] = 0; // Clears entry
+ }
+ }
+ return true;
+ }
+ private:
+ vector<Element *> *elements_;
+ };
+
+ // Hash function for Subset to Fst states. Subset elements are not
+ // sorted in this implementation, so the hash must be invariant
+ // under subset reordering.
+ class SubsetKey {
+ public:
+ size_t operator()(const Subset* subset) const {
+ size_t hash = 0;
+ for (typename Subset::const_iterator iter = subset->begin();
+ iter != subset->end();
+ ++iter) {
+ const Element &element = *iter;
+ int lshift = element.state_id % (CHAR_BIT * sizeof(size_t) - 1) + 1;
+ int rshift = CHAR_BIT * sizeof(size_t) - lshift;
+ size_t n = element.state_id;
+ hash ^= n << lshift ^ n >> rshift ^ element.weight.Hash();
+ }
+ return hash;
+ }
+ };
+
+ float delta_; // Quantization delta for subset weights
+ const vector<Weight> *in_dist_; // Distance to final NFA states
+ vector<Weight> *out_dist_; // Distance to final DFA states
+
+ D common_divisor_;
+
+ // Used to test equivalence of subsets.
+ vector<Element *> elements_;
+
+ // Maps from StateId to Subset.
+ vector<Subset *> subsets_;
+
+ // Hashes from Subset to its StateId in the output automaton.
+ typedef unordered_map<Subset *, StateId, SubsetKey, SubsetEqual>
+ SubsetHash;
+
+ // Hashes from Label to Subsets corr. to destination states of current state.
+ SubsetHash subset_hash_;
+
+ void operator=(const DeterminizeFsaImpl<A, D> &); // disallow
+};
+
+
+// Implementation of delayed determinization for transducers.
+// Transducer determinization is implemented by mapping the input to
+// the Gallic semiring as an acceptor whose weights contain the output
+// strings and using acceptor determinization above to determinize
+// that acceptor.
+template <class A, StringType S>
+class DeterminizeFstImpl : public DeterminizeFstImplBase<A> {
+ public:
+ using FstImpl<A>::SetProperties;
+ using DeterminizeFstImplBase<A>::GetFst;
+ using CacheBaseImpl< CacheState<A> >::GetCacheGc;
+ using CacheBaseImpl< CacheState<A> >::GetCacheLimit;
+
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ typedef ToGallicMapper<A, S> ToMapper;
+ typedef FromGallicMapper<A, S> FromMapper;
+
+ typedef typename ToMapper::ToArc ToArc;
+ typedef ArcMapFst<A, ToArc, ToMapper> ToFst;
+ typedef ArcMapFst<ToArc, A, FromMapper> FromFst;
+
+ typedef GallicCommonDivisor<Label, Weight, S> CommonDivisor;
+ typedef GallicFactor<Label, Weight, S> FactorIterator;
+
+ DeterminizeFstImpl(const Fst<A> &fst, const DeterminizeFstOptions<A> &opts)
+ : DeterminizeFstImplBase<A>(fst, opts),
+ delta_(opts.delta),
+ subsequential_label_(opts.subsequential_label) {
+ Init(GetFst());
+ }
+
+ DeterminizeFstImpl(const DeterminizeFstImpl<A, S> &impl)
+ : DeterminizeFstImplBase<A>(impl),
+ delta_(impl.delta_),
+ subsequential_label_(impl.subsequential_label_) {
+ Init(GetFst());
+ }
+
+ ~DeterminizeFstImpl() { delete from_fst_; }
+
+ virtual DeterminizeFstImpl<A, S> *Copy() {
+ return new DeterminizeFstImpl<A, S>(*this);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) && (GetFst().Properties(kError, false) ||
+ from_fst_->Properties(kError, false)))
+ SetProperties(kError, kError);
+ return FstImpl<A>::Properties(mask);
+ }
+
+ virtual StateId ComputeStart() { return from_fst_->Start(); }
+
+ virtual Weight ComputeFinal(StateId s) { return from_fst_->Final(s); }
+
+ virtual void Expand(StateId s) {
+ for (ArcIterator<FromFst> aiter(*from_fst_, s);
+ !aiter.Done();
+ aiter.Next())
+ CacheImpl<A>::PushArc(s, aiter.Value());
+ CacheImpl<A>::SetArcs(s);
+ }
+
+ private:
+ // Initialization of transducer determinization implementation, which
+ // is defined after DeterminizeFst since it calls it.
+ void Init(const Fst<A> &fst);
+
+ float delta_;
+ Label subsequential_label_;
+ FromFst *from_fst_;
+
+ void operator=(const DeterminizeFstImpl<A, S> &); // disallow
+};
+
+
+// Determinizes a weighted transducer. This version is a delayed
+// Fst. The result will be an equivalent FST that has the property
+// that no state has two transitions with the same input label.
+// For this algorithm, epsilon transitions are treated as regular
+// symbols (cf. RmEpsilon).
+//
+// The transducer must be functional. The weights must be (weakly)
+// left divisible (valid for TropicalWeight and LogWeight for instance)
+// and be zero-sum-free if for all a,b: (Plus(a, b) = 0 => a = b = 0.
+//
+// Complexity:
+// - Determinizable: exponential (polynomial in the size of the output)
+// - Non-determinizable) does not terminate
+//
+// The determinizable automata include all unweighted and all acyclic input.
+//
+// References:
+// - Mehryar Mohri, "Finite-State Transducers in Language and Speech
+// Processing". Computational Linguistics, 23:2, 1997.
+//
+// This class attaches interface to implementation and handles
+// reference counting, delegating most methods to ImplToFst.
+template <class A>
+class DeterminizeFst : public ImplToFst< DeterminizeFstImplBase<A> > {
+ public:
+ friend class ArcIterator< DeterminizeFst<A> >;
+ friend class StateIterator< DeterminizeFst<A> >;
+ template <class B, StringType S> friend class DeterminizeFstImpl;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef CacheState<A> State;
+ typedef DeterminizeFstImplBase<A> Impl;
+
+ using ImplToFst<Impl>::SetImpl;
+
+ explicit DeterminizeFst(
+ const Fst<A> &fst,
+ const DeterminizeFstOptions<A> &opts = DeterminizeFstOptions<A>()) {
+ if (fst.Properties(kAcceptor, true)) {
+ // Calls implementation for acceptors.
+ typedef DefaultCommonDivisor<Weight> D;
+ SetImpl(new DeterminizeFsaImpl<A, D>(fst, D(), 0, 0, opts));
+ } else {
+ // Calls implementation for transducers.
+ SetImpl(new DeterminizeFstImpl<A, STRING_LEFT_RESTRICT>(fst, opts));
+ }
+ }
+
+ // This acceptor-only version additionally computes the distance to
+ // final states in the output if provided with those distances for the
+ // input. Useful for e.g. unique N-shortest paths.
+ DeterminizeFst(
+ const Fst<A> &fst,
+ const vector<Weight> &in_dist, vector<Weight> *out_dist,
+ const DeterminizeFstOptions<A> &opts = DeterminizeFstOptions<A>()) {
+ if (!fst.Properties(kAcceptor, true)) {
+ FSTERROR() << "DeterminizeFst:"
+ << " distance to final states computed for acceptors only";
+ GetImpl()->SetProperties(kError, kError);
+ }
+ typedef DefaultCommonDivisor<Weight> D;
+ SetImpl(new DeterminizeFsaImpl<A, D>(fst, D(), &in_dist, out_dist, opts));
+ }
+
+ // See Fst<>::Copy() for doc.
+ DeterminizeFst(const DeterminizeFst<A> &fst, bool safe = false) {
+ if (safe)
+ SetImpl(fst.GetImpl()->Copy());
+ else
+ SetImpl(fst.GetImpl(), false);
+ }
+
+ // Get a copy of this DeterminizeFst. See Fst<>::Copy() for further doc.
+ virtual DeterminizeFst<A> *Copy(bool safe = false) const {
+ return new DeterminizeFst<A>(*this, safe);
+ }
+
+ virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ private:
+ // This private version is for passing the common divisor to
+ // FSA determinization.
+ template <class D>
+ DeterminizeFst(const Fst<A> &fst, const D &common_div,
+ const DeterminizeFstOptions<A> &opts)
+ : ImplToFst<Impl>(
+ new DeterminizeFsaImpl<A, D>(fst, common_div, 0, 0, opts)) {}
+
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const DeterminizeFst<A> &fst); // Disallow
+};
+
+
+// Initialization of transducer determinization implementation. which
+// is defined after DeterminizeFst since it calls it.
+template <class A, StringType S>
+void DeterminizeFstImpl<A, S>::Init(const Fst<A> &fst) {
+ // Mapper to an acceptor.
+ ToFst to_fst(fst, ToMapper());
+
+ // Determinize acceptor.
+ // This recursive call terminates since it passes the common divisor
+ // to a private constructor.
+ CacheOptions copts(GetCacheGc(), GetCacheLimit());
+ DeterminizeFstOptions<ToArc> dopts(copts, delta_);
+ DeterminizeFst<ToArc> det_fsa(to_fst, CommonDivisor(), dopts);
+
+ // Mapper back to transducer.
+ FactorWeightOptions<ToArc> fopts(CacheOptions(true, 0), delta_,
+ kFactorFinalWeights,
+ subsequential_label_,
+ subsequential_label_);
+ FactorWeightFst<ToArc, FactorIterator> factored_fst(det_fsa, fopts);
+ from_fst_ = new FromFst(factored_fst, FromMapper(subsequential_label_));
+}
+
+
+// Specialization for DeterminizeFst.
+template <class A>
+class StateIterator< DeterminizeFst<A> >
+ : public CacheStateIterator< DeterminizeFst<A> > {
+ public:
+ explicit StateIterator(const DeterminizeFst<A> &fst)
+ : CacheStateIterator< DeterminizeFst<A> >(fst, fst.GetImpl()) {}
+};
+
+
+// Specialization for DeterminizeFst.
+template <class A>
+class ArcIterator< DeterminizeFst<A> >
+ : public CacheArcIterator< DeterminizeFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const DeterminizeFst<A> &fst, StateId s)
+ : CacheArcIterator< DeterminizeFst<A> >(fst.GetImpl(), s) {
+ if (!fst.GetImpl()->HasArcs(s))
+ fst.GetImpl()->Expand(s);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+
+template <class A> inline
+void DeterminizeFst<A>::InitStateIterator(StateIteratorData<A> *data) const
+{
+ data->base = new StateIterator< DeterminizeFst<A> >(*this);
+}
+
+
+// Useful aliases when using StdArc.
+typedef DeterminizeFst<StdArc> StdDeterminizeFst;
+
+
+template <class Arc>
+struct DeterminizeOptions {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::Label Label;
+
+ float delta; // Quantization delta for subset weights.
+ Weight weight_threshold; // Pruning weight threshold.
+ StateId state_threshold; // Pruning state threshold.
+ Label subsequential_label; // Label used for residual final output
+ // when producing subsequential transducers.
+
+ explicit DeterminizeOptions(float d = kDelta, Weight w = Weight::Zero(),
+ StateId n = kNoStateId, Label l = 0)
+ : delta(d), weight_threshold(w), state_threshold(n),
+ subsequential_label(l) {}
+};
+
+
+// Determinizes a weighted transducer. This version writes the
+// determinized Fst to an output MutableFst. The result will be an
+// equivalent FSt that has the property that no state has two
+// transitions with the same input label. For this algorithm, epsilon
+// transitions are treated as regular symbols (cf. RmEpsilon).
+//
+// The transducer must be functional. The weights must be (weakly)
+// left divisible (valid for TropicalWeight and LogWeight).
+//
+// Complexity:
+// - Determinizable: exponential (polynomial in the size of the output)
+// - Non-determinizable: does not terminate
+//
+// The determinizable automata include all unweighted and all acyclic input.
+//
+// References:
+// - Mehryar Mohri, "Finite-State Transducers in Language and Speech
+// Processing". Computational Linguistics, 23:2, 1997.
+template <class Arc>
+void Determinize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst,
+ const DeterminizeOptions<Arc> &opts
+ = DeterminizeOptions<Arc>()) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ DeterminizeFstOptions<Arc> nopts;
+ nopts.delta = opts.delta;
+ nopts.subsequential_label = opts.subsequential_label;
+
+ nopts.gc_limit = 0; // Cache only the last state for fastest copy.
+
+ if (opts.weight_threshold != Weight::Zero() ||
+ opts.state_threshold != kNoStateId) {
+ if (ifst.Properties(kAcceptor, false)) {
+ vector<Weight> idistance, odistance;
+ ShortestDistance(ifst, &idistance, true);
+ DeterminizeFst<Arc> dfst(ifst, idistance, &odistance, nopts);
+ PruneOptions< Arc, AnyArcFilter<Arc> > popts(opts.weight_threshold,
+ opts.state_threshold,
+ AnyArcFilter<Arc>(),
+ &odistance);
+ Prune(dfst, ofst, popts);
+ } else {
+ *ofst = DeterminizeFst<Arc>(ifst, nopts);
+ Prune(ofst, opts.weight_threshold, opts.state_threshold);
+ }
+ } else {
+ *ofst = DeterminizeFst<Arc>(ifst, nopts);
+ }
+}
+
+
+} // namespace fst
+
+#endif // FST_LIB_DETERMINIZE_H__
diff --git a/src/include/fst/dfs-visit.h b/src/include/fst/dfs-visit.h
new file mode 100644
index 0000000..b47c78d
--- /dev/null
+++ b/src/include/fst/dfs-visit.h
@@ -0,0 +1,204 @@
+// dfs-visit.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Depth-first search visitation. See visit.h for more general
+// search queue disciplines.
+
+#ifndef FST_LIB_DFS_VISIT_H__
+#define FST_LIB_DFS_VISIT_H__
+
+#include <stack>
+#include <vector>
+using std::vector;
+
+#include <fst/arcfilter.h>
+#include <fst/fst.h>
+
+
+namespace fst {
+
+// Visitor Interface - class determines actions taken during a Dfs.
+// If any of the boolean member functions return false, the DFS is
+// aborted by first calling FinishState() on all currently grey states
+// and then calling FinishVisit().
+//
+// Note this is similar to the more general visitor interface in visit.h
+// except that FinishState returns additional information appropriate only for
+// a DFS and some methods names here are better suited to a DFS.
+//
+// template <class Arc>
+// class Visitor {
+// public:
+// typedef typename Arc::StateId StateId;
+//
+// Visitor(T *return_data);
+// // Invoked before DFS visit
+// void InitVisit(const Fst<Arc> &fst);
+// // Invoked when state discovered (2nd arg is DFS tree root)
+// bool InitState(StateId s, StateId root);
+// // Invoked when tree arc examined (to white/undiscovered state)
+// bool TreeArc(StateId s, const Arc &a);
+// // Invoked when back arc examined (to grey/unfinished state)
+// bool BackArc(StateId s, const Arc &a);
+// // Invoked when forward or cross arc examined (to black/finished state)
+// bool ForwardOrCrossArc(StateId s, const Arc &a);
+// // Invoked when state finished (PARENT is kNoStateID and ARC == NULL
+// // when S is tree root)
+// void FinishState(StateId s, StateId parent, const Arc *parent_arc);
+// // Invoked after DFS visit
+// void FinishVisit();
+// };
+
+// An Fst state's DFS status
+const int kDfsWhite = 0; // Undiscovered
+const int kDfsGrey = 1; // Discovered & unfinished
+const int kDfsBlack = 2; // Finished
+
+// An Fst state's DFS stack state
+template <class Arc>
+struct DfsState {
+ typedef typename Arc::StateId StateId;
+
+ DfsState(const Fst<Arc> &fst, StateId s): state_id(s), arc_iter(fst, s) {}
+
+ StateId state_id; // Fst state ...
+ ArcIterator< Fst<Arc> > arc_iter; // and its corresponding arcs
+};
+
+
+// Performs depth-first visitation. Visitor class argument determines
+// actions and contains any return data. ArcFilter determines arcs
+// that are considered.
+//
+// Note this is similar to Visit() in visit.h called with a LIFO
+// queue except this version has a Visitor class specialized and
+// augmented for a DFS.
+template <class Arc, class V, class ArcFilter>
+void DfsVisit(const Fst<Arc> &fst, V *visitor, ArcFilter filter) {
+ typedef typename Arc::StateId StateId;
+
+ visitor->InitVisit(fst);
+
+ StateId start = fst.Start();
+ if (start == kNoStateId) {
+ visitor->FinishVisit();
+ return;
+ }
+
+ vector<char> state_color; // Fst state DFS status
+ stack<DfsState<Arc> *> state_stack; // DFS execution stack
+
+ StateId nstates = start + 1; // # of known states in general case
+ bool expanded = false;
+ if (fst.Properties(kExpanded, false)) { // tests if expanded case, then
+ nstates = CountStates(fst); // uses ExpandedFst::NumStates().
+ expanded = true;
+ }
+
+ state_color.resize(nstates, kDfsWhite);
+ StateIterator< Fst<Arc> > siter(fst);
+
+ // Continue DFS while true
+ bool dfs = true;
+
+ // Iterate over trees in DFS forest.
+ for (StateId root = start; dfs && root < nstates;) {
+ state_color[root] = kDfsGrey;
+ state_stack.push(new DfsState<Arc>(fst, root));
+ dfs = visitor->InitState(root, root);
+ while (!state_stack.empty()) {
+ DfsState<Arc> *dfs_state = state_stack.top();
+ StateId s = dfs_state->state_id;
+ if (s >= state_color.size()) {
+ nstates = s + 1;
+ state_color.resize(nstates, kDfsWhite);
+ }
+ ArcIterator< Fst<Arc> > &aiter = dfs_state->arc_iter;
+ if (!dfs || aiter.Done()) {
+ state_color[s] = kDfsBlack;
+ delete dfs_state;
+ state_stack.pop();
+ if (!state_stack.empty()) {
+ DfsState<Arc> *parent_state = state_stack.top();
+ StateId p = parent_state->state_id;
+ ArcIterator< Fst<Arc> > &piter = parent_state->arc_iter;
+ visitor->FinishState(s, p, &piter.Value());
+ piter.Next();
+ } else {
+ visitor->FinishState(s, kNoStateId, 0);
+ }
+ continue;
+ }
+ const Arc &arc = aiter.Value();
+ if (arc.nextstate >= state_color.size()) {
+ nstates = arc.nextstate + 1;
+ state_color.resize(nstates, kDfsWhite);
+ }
+ if (!filter(arc)) {
+ aiter.Next();
+ continue;
+ }
+ int next_color = state_color[arc.nextstate];
+ switch (next_color) {
+ default:
+ case kDfsWhite:
+ dfs = visitor->TreeArc(s, arc);
+ if (!dfs) break;
+ state_color[arc.nextstate] = kDfsGrey;
+ state_stack.push(new DfsState<Arc>(fst, arc.nextstate));
+ dfs = visitor->InitState(arc.nextstate, root);
+ break;
+ case kDfsGrey:
+ dfs = visitor->BackArc(s, arc);
+ aiter.Next();
+ break;
+ case kDfsBlack:
+ dfs = visitor->ForwardOrCrossArc(s, arc);
+ aiter.Next();
+ break;
+ }
+ }
+
+ // Find next tree root
+ for (root = root == start ? 0 : root + 1;
+ root < nstates && state_color[root] != kDfsWhite;
+ ++root);
+
+ // Check for a state beyond the largest known state
+ if (!expanded && root == nstates) {
+ for (; !siter.Done(); siter.Next()) {
+ if (siter.Value() == nstates) {
+ ++nstates;
+ state_color.push_back(kDfsWhite);
+ break;
+ }
+ }
+ }
+ }
+ visitor->FinishVisit();
+}
+
+
+template <class Arc, class V>
+void DfsVisit(const Fst<Arc> &fst, V *visitor) {
+ DfsVisit(fst, visitor, AnyArcFilter<Arc>());
+}
+
+} // namespace fst
+
+#endif // FST_LIB_DFS_VISIT_H__
diff --git a/src/include/fst/difference.h b/src/include/fst/difference.h
new file mode 100644
index 0000000..8a3306f
--- /dev/null
+++ b/src/include/fst/difference.h
@@ -0,0 +1,189 @@
+// difference.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to compute the difference between two FSAs
+
+#ifndef FST_LIB_DIFFERENCE_H__
+#define FST_LIB_DIFFERENCE_H__
+
+#include <vector>
+using std::vector;
+#include <algorithm>
+
+#include <fst/cache.h>
+#include <fst/compose.h>
+#include <fst/complement.h>
+
+
+namespace fst {
+
+template <class A,
+ class M = Matcher<Fst<A> >,
+ class F = SequenceComposeFilter<M>,
+ class T = GenericComposeStateTable<A, typename F::FilterState> >
+struct DifferenceFstOptions : public ComposeFstOptions<A, M, F, T> {
+ explicit DifferenceFstOptions(const CacheOptions &opts,
+ M *mat1 = 0, M *mat2 = 0,
+ F *filt = 0, T *sttable= 0)
+ : ComposeFstOptions<A, M, F, T>(mat1, mat2, filt, sttable) { }
+
+ DifferenceFstOptions() {}
+};
+
+// Computes the difference between two FSAs. This version is a delayed
+// Fst. Only strings that are in the first automaton but not in second
+// are retained in the result.
+//
+// The first argument must be an acceptor; the second argument must be
+// an unweighted, epsilon-free, deterministic acceptor. One of the
+// arguments must be label-sorted.
+//
+// Complexity: same as ComposeFst.
+//
+// Caveats: same as ComposeFst.
+template <class A>
+class DifferenceFst : public ComposeFst<A> {
+ public:
+ using ImplToFst< ComposeFstImplBase<A> >::SetImpl;
+ using ImplToFst< ComposeFstImplBase<A> >::GetImpl;
+
+ using ComposeFst<A>::CreateBase1;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ // A - B = A ^ B'.
+ DifferenceFst(const Fst<A> &fst1, const Fst<A> &fst2,
+ const CacheOptions &opts = CacheOptions()) {
+ typedef RhoMatcher< Matcher<Fst<A> > > R;
+
+ ComplementFst<A> cfst(fst2);
+ ComposeFstOptions<A, R> copts(CacheOptions(),
+ new R(fst1, MATCH_NONE),
+ new R(cfst, MATCH_INPUT,
+ ComplementFst<A>::kRhoLabel));
+ SetImpl(CreateBase1(fst1, cfst, copts));
+
+ if (!fst1.Properties(kAcceptor, true)) {
+ FSTERROR() << "DifferenceFst: 1st argument not an acceptor";
+ GetImpl()->SetProperties(kError, kError);
+ }
+ }
+
+ template <class M, class F, class T>
+ DifferenceFst(const Fst<A> &fst1, const Fst<A> &fst2,
+ const DifferenceFstOptions<A, M, F, T> &opts) {
+ typedef RhoMatcher<M> R;
+
+ ComplementFst<A> cfst(fst2);
+ ComposeFstOptions<A, R> copts(opts);
+ copts.matcher1 = new R(fst1, MATCH_NONE, kNoLabel, MATCHER_REWRITE_ALWAYS,
+ opts.matcher1);
+ copts.matcher2 = new R(cfst, MATCH_INPUT, ComplementFst<A>::kRhoLabel,
+ MATCHER_REWRITE_ALWAYS, opts.matcher2);
+
+ SetImpl(CreateBase1(fst1, cfst, copts));
+
+ if (!fst1.Properties(kAcceptor, true)) {
+ FSTERROR() << "DifferenceFst: 1st argument not an acceptor";
+ GetImpl()->SetProperties(kError, kError);
+ }
+ }
+
+ // See Fst<>::Copy() for doc.
+ DifferenceFst(const DifferenceFst<A> &fst, bool safe = false)
+ : ComposeFst<A>(fst, safe) {}
+
+ // Get a copy of this DifferenceFst. See Fst<>::Copy() for further doc.
+ virtual DifferenceFst<A> *Copy(bool safe = false) const {
+ return new DifferenceFst<A>(*this, safe);
+ }
+};
+
+
+// Specialization for DifferenceFst.
+template <class A>
+class StateIterator< DifferenceFst<A> >
+ : public StateIterator< ComposeFst<A> > {
+ public:
+ explicit StateIterator(const DifferenceFst<A> &fst)
+ : StateIterator< ComposeFst<A> >(fst) {}
+};
+
+
+// Specialization for DifferenceFst.
+template <class A>
+class ArcIterator< DifferenceFst<A> >
+ : public ArcIterator< ComposeFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const DifferenceFst<A> &fst, StateId s)
+ : ArcIterator< ComposeFst<A> >(fst, s) {}
+};
+
+// Useful alias when using StdArc.
+typedef DifferenceFst<StdArc> StdDifferenceFst;
+
+
+typedef ComposeOptions DifferenceOptions;
+
+
+// Computes the difference between two FSAs. This version is writes
+// the difference to an output MutableFst. Only strings that are in
+// the first automaton but not in second are retained in the result.
+//
+// The first argument must be an acceptor; the second argument must be
+// an unweighted, epsilon-free, deterministic acceptor. One of the
+// arguments must be label-sorted.
+//
+// Complexity: same as Compose.
+//
+// Caveats: same as Compose.
+template<class Arc>
+void Difference(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
+ MutableFst<Arc> *ofst,
+ const DifferenceOptions &opts = DifferenceOptions()) {
+ typedef Matcher< Fst<Arc> > M;
+
+ if (opts.filter_type == AUTO_FILTER) {
+ CacheOptions nopts;
+ nopts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = DifferenceFst<Arc>(ifst1, ifst2, nopts);
+ } else if (opts.filter_type == SEQUENCE_FILTER) {
+ DifferenceFstOptions<Arc> dopts;
+ dopts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts);
+ } else if (opts.filter_type == ALT_SEQUENCE_FILTER) {
+ DifferenceFstOptions<Arc, M, AltSequenceComposeFilter<M> > dopts;
+ dopts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts);
+ } else if (opts.filter_type == MATCH_FILTER) {
+ DifferenceFstOptions<Arc, M, MatchComposeFilter<M> > dopts;
+ dopts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts);
+ }
+
+ if (opts.connect)
+ Connect(ofst);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_DIFFERENCE_H__
diff --git a/src/include/fst/edit-fst.h b/src/include/fst/edit-fst.h
new file mode 100644
index 0000000..303cb24
--- /dev/null
+++ b/src/include/fst/edit-fst.h
@@ -0,0 +1,774 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: dbikel@google.com (Dan Bikel)
+//
+// An \ref Fst implementation that allows non-destructive edit operations on an
+// existing fst.
+
+#ifndef FST_LIB_EDIT_FST_H_
+#define FST_LIB_EDIT_FST_H_
+
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+
+namespace fst {
+
+// The EditFst class enables non-destructive edit operations on a wrapped
+// ExpandedFst. The implementation uses copy-on-write semantics at the node
+// level: if a user has an underlying fst on which he or she wants to perform a
+// relatively small number of edits (read: mutations), then this implementation
+// will copy the edited node to an internal MutableFst and perform any edits in
+// situ on that copied node. This class supports all the methods of MutableFst
+// except for DeleteStates(const vector<StateId> &); thus, new nodes may also be
+// added, and one may add transitions from existing nodes of the wrapped fst to
+// new nodes.
+//
+// N.B.: The documentation for Fst::Copy(true) says that its behavior is
+// undefined if invoked on an fst that has already been accessed. This class
+// requires that the Fst implementation it wraps provides consistent, reliable
+// behavior when its Copy(true) method is invoked, where consistent means
+// the graph structure, graph properties and state numbering and do not change.
+// VectorFst and CompactFst, for example, are both well-behaved in this regard.
+
+// The EditFstData class is a container for all mutable data for EditFstImpl;
+// also, this class provides most of the actual implementation of what EditFst
+// does (that is, most of EditFstImpl's methods delegate to methods in this, the
+// EditFstData class). Instances of this class are reference-counted and can be
+// shared between otherwise independent EditFstImpl instances. This scheme
+// allows EditFstImpl to implement the thread-safe, copy-on-write semantics
+// required by Fst::Copy(true).
+//
+// template parameters:
+// A the type of arc to use
+// WrappedFstT the type of fst wrapped by the EditFst instance that
+// this EditFstData instance is backing
+// MutableFstT the type of mutable fst to use internally for edited states;
+// crucially, MutableFstT::Copy(false) *must* yield an fst that is
+// thread-safe for reading (VectorFst, for example, has this property)
+template <typename A,
+ typename WrappedFstT = ExpandedFst<A>,
+ typename MutableFstT = VectorFst<A> >
+class EditFstData {
+ public:
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef typename unordered_map<StateId, StateId>::const_iterator
+ IdMapIterator;
+ typedef typename unordered_map<StateId, Weight>::const_iterator
+ FinalWeightIterator;
+
+
+ EditFstData() : num_new_states_(0) {
+ SetEmptyAndDeleteKeysForInternalMaps();
+ }
+
+ EditFstData(const EditFstData &other) :
+ edits_(other.edits_),
+ external_to_internal_ids_(other.external_to_internal_ids_),
+ edited_final_weights_(other.edited_final_weights_),
+ num_new_states_(other.num_new_states_) {
+ }
+
+ ~EditFstData() {
+ }
+
+ static EditFstData<A, WrappedFstT, MutableFstT> *Read(istream &strm,
+ const FstReadOptions &opts);
+
+ bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ // Serialize all private data members of this class.
+ FstWriteOptions edits_opts(opts);
+ edits_opts.write_header = true; // Force writing contained header.
+ edits_.Write(strm, edits_opts);
+ WriteType(strm, external_to_internal_ids_);
+ WriteType(strm, edited_final_weights_);
+ WriteType(strm, num_new_states_);
+ if (!strm) {
+ LOG(ERROR) << "EditFstData::Write: write failed: " << opts.source;
+ return false;
+ }
+ return true;
+ }
+
+ int RefCount() const { return ref_count_.count(); }
+ int IncrRefCount() { return ref_count_.Incr(); }
+ int DecrRefCount() { return ref_count_.Decr(); }
+
+ StateId NumNewStates() const {
+ return num_new_states_;
+ }
+
+ // accessor methods for the fst holding edited states
+ StateId EditedStart() const {
+ return edits_.Start();
+ }
+
+ Weight Final(StateId s, const WrappedFstT *wrapped) const {
+ FinalWeightIterator final_weight_it = GetFinalWeightIterator(s);
+ if (final_weight_it == NotInFinalWeightMap()) {
+ IdMapIterator it = GetEditedIdMapIterator(s);
+ return it == NotInEditedMap() ?
+ wrapped->Final(s) : edits_.Final(it->second);
+ }
+ else {
+ return final_weight_it->second;
+ }
+ }
+
+ size_t NumArcs(StateId s, const WrappedFstT *wrapped) const {
+ IdMapIterator it = GetEditedIdMapIterator(s);
+ return it == NotInEditedMap() ?
+ wrapped->NumArcs(s) : edits_.NumArcs(it->second);
+ }
+
+ size_t NumInputEpsilons(StateId s, const WrappedFstT *wrapped) const {
+ IdMapIterator it = GetEditedIdMapIterator(s);
+ return it == NotInEditedMap() ?
+ wrapped->NumInputEpsilons(s) :
+ edits_.NumInputEpsilons(it->second);
+ }
+
+ size_t NumOutputEpsilons(StateId s, const WrappedFstT *wrapped) const {
+ IdMapIterator it = GetEditedIdMapIterator(s);
+ return it == NotInEditedMap() ?
+ wrapped->NumOutputEpsilons(s) :
+ edits_.NumOutputEpsilons(it->second);
+ }
+
+ void SetEditedProperties(uint64 props, uint64 mask) {
+ edits_.SetProperties(props, mask);
+ }
+
+ // non-const MutableFst operations
+
+ // Sets the start state for this fst.
+ void SetStart(StateId s) {
+ edits_.SetStart(s);
+ }
+
+ // Sets the final state for this fst.
+ Weight SetFinal(StateId s, Weight w, const WrappedFstT *wrapped) {
+ Weight old_weight = Final(s, wrapped);
+ IdMapIterator it = GetEditedIdMapIterator(s);
+ // if we haven't already edited state s, don't add it to edited_ (which can
+ // be expensive if s has many transitions); just use the
+ // edited_final_weights_ map
+ if (it == NotInEditedMap()) {
+ edited_final_weights_[s] = w;
+ }
+ else {
+ edits_.SetFinal(GetEditableInternalId(s, wrapped), w);
+ }
+ return old_weight;
+ }
+
+ // Adds a new state to this fst, initially with no arcs.
+ StateId AddState(StateId curr_num_states) {
+ StateId internal_state_id = edits_.AddState();
+ StateId external_state_id = curr_num_states;
+ external_to_internal_ids_[external_state_id] = internal_state_id;
+ num_new_states_++;
+ return external_state_id;
+ }
+
+ // Adds the specified arc to the specified state of this fst.
+ const A *AddArc(StateId s, const Arc &arc, const WrappedFstT *wrapped) {
+ StateId internal_id = GetEditableInternalId(s, wrapped);
+
+ size_t num_arcs = edits_.NumArcs(internal_id);
+ ArcIterator<MutableFstT> arc_it(edits_, internal_id);
+ const A *prev_arc = NULL;
+ if (num_arcs > 0) {
+ // grab the final arc associated with this state in edits_
+ arc_it.Seek(num_arcs - 1);
+ prev_arc = &(arc_it.Value());
+ }
+ edits_.AddArc(internal_id, arc);
+ return prev_arc;
+ }
+
+ void DeleteStates() {
+ edits_.DeleteStates();
+ num_new_states_ = 0;
+ external_to_internal_ids_.clear();
+ edited_final_weights_.clear();
+ }
+
+ // Removes all but the first n outgoing arcs of the specified state.
+ void DeleteArcs(StateId s, size_t n, const WrappedFstT *wrapped) {
+ edits_.DeleteArcs(GetEditableInternalId(s, wrapped), n);
+ }
+
+ // Removes all outgoing arcs from the specified state.
+ void DeleteArcs(StateId s, const WrappedFstT *wrapped) {
+ edits_.DeleteArcs(GetEditableInternalId(s, wrapped));
+ }
+
+ // end methods for non-const MutableFst operations
+
+ // Provides information for the generic arc iterator.
+ void InitArcIterator(StateId s, ArcIteratorData<Arc> *data,
+ const WrappedFstT *wrapped) const {
+ IdMapIterator id_map_it = GetEditedIdMapIterator(s);
+ if (id_map_it == NotInEditedMap()) {
+ VLOG(3) << "EditFstData::InitArcIterator: iterating on state "
+ << s << " of original fst";
+ wrapped->InitArcIterator(s, data);
+ } else {
+ VLOG(2) << "EditFstData::InitArcIterator: iterating on edited state "
+ << s << " (internal state id: " << id_map_it->second << ")";
+ edits_.InitArcIterator(id_map_it->second, data);
+ }
+ }
+
+ // Provides information for the generic mutable arc iterator.
+ void InitMutableArcIterator(StateId s, MutableArcIteratorData<A> *data,
+ const WrappedFstT *wrapped) {
+ data->base =
+ new MutableArcIterator<MutableFstT>(&edits_,
+ GetEditableInternalId(s, wrapped));
+ }
+
+ // Prints out the map from external to internal state id's (for debugging
+ // purposes).
+ void PrintMap() {
+ for (IdMapIterator map_it = external_to_internal_ids_.begin();
+ map_it != NotInEditedMap(); ++map_it) {
+ LOG(INFO) << "(external,internal)=("
+ << map_it->first << "," << map_it->second << ")";
+ }
+ }
+
+
+ private:
+ void SetEmptyAndDeleteKeysForInternalMaps() {
+ }
+
+ // Returns the iterator of the map from external to internal state id's
+ // of edits_ for the specified external state id.
+ IdMapIterator GetEditedIdMapIterator(StateId s) const {
+ return external_to_internal_ids_.find(s);
+ }
+ IdMapIterator NotInEditedMap() const {
+ return external_to_internal_ids_.end();
+ }
+
+ FinalWeightIterator GetFinalWeightIterator(StateId s) const {
+ return edited_final_weights_.find(s);
+ }
+ FinalWeightIterator NotInFinalWeightMap() const {
+ return edited_final_weights_.end();
+ }
+
+ // Returns the internal state id of the specified external id if the state has
+ // already been made editable, or else copies the state from wrapped_
+ // to edits_ and returns the state id of the newly editable state in edits_.
+ //
+ // \return makes the specified state editable if it isn't already and returns
+ // its state id in edits_
+ StateId GetEditableInternalId(StateId s, const WrappedFstT *wrapped) {
+ IdMapIterator id_map_it = GetEditedIdMapIterator(s);
+ if (id_map_it == NotInEditedMap()) {
+ StateId new_internal_id = edits_.AddState();
+ VLOG(2) << "EditFstData::GetEditableInternalId: editing state " << s
+ << " of original fst; new internal state id:" << new_internal_id;
+ external_to_internal_ids_[s] = new_internal_id;
+ for (ArcIterator< Fst<A> > arc_iterator(*wrapped, s);
+ !arc_iterator.Done();
+ arc_iterator.Next()) {
+ edits_.AddArc(new_internal_id, arc_iterator.Value());
+ }
+ // copy the final weight
+ FinalWeightIterator final_weight_it = GetFinalWeightIterator(s);
+ if (final_weight_it == NotInFinalWeightMap()) {
+ edits_.SetFinal(new_internal_id, wrapped->Final(s));
+ } else {
+ edits_.SetFinal(new_internal_id, final_weight_it->second);
+ edited_final_weights_.erase(s);
+ }
+ return new_internal_id;
+ } else {
+ return id_map_it->second;
+ }
+ }
+
+ // A mutable fst (by default, a VectorFst) to contain new states, and/or
+ // copies of states from a wrapped ExpandedFst that have been modified in
+ // some way.
+ MutableFstT edits_;
+ // A mapping from external state id's to the internal id's of states that
+ // appear in edits_.
+ unordered_map<StateId, StateId> external_to_internal_ids_;
+ // A mapping from external state id's to final state weights assigned to
+ // those states. The states in this map are *only* those whose final weight
+ // has been modified; if any other part of the state has been modified,
+ // the entire state is copied to edits_, and all modifications reside there.
+ unordered_map<StateId, Weight> edited_final_weights_;
+ // The number of new states added to this mutable fst impl, which is <= the
+ // number of states in edits_ (since edits_ contains both edited *and* new
+ // states).
+ StateId num_new_states_;
+ RefCounter ref_count_;
+};
+
+// EditFstData method implementations: just the Read method.
+template <typename A, typename WrappedFstT, typename MutableFstT>
+EditFstData<A, WrappedFstT, MutableFstT> *
+EditFstData<A, WrappedFstT, MutableFstT>::Read(istream &strm,
+ const FstReadOptions &opts) {
+ EditFstData<A, WrappedFstT, MutableFstT> *data =
+ new EditFstData<A, WrappedFstT, MutableFstT>();
+ // next read in MutabelFstT machine that stores edits
+ FstReadOptions edits_opts(opts);
+ edits_opts.header = 0; // Contained header was written out, so read it in.
+
+ // Because our internal representation of edited states is a solid object
+ // of type MutableFstT (defaults to VectorFst<A>) and not a pointer,
+ // and because the static Read method allocates a new object on the heap,
+ // we need to call Read, check if there was a failure, use
+ // MutableFstT::operator= to assign the object (not the pointer) to the
+ // edits_ data member (which will increase the ref count by 1 on the impl)
+ // and, finally, delete the heap-allocated object.
+ MutableFstT *edits = MutableFstT::Read(strm, edits_opts);
+ if (!edits) {
+ return 0;
+ }
+ data->edits_ = *edits;
+ delete edits;
+ // finally, read in rest of private data members
+ ReadType(strm, &data->external_to_internal_ids_);
+ ReadType(strm, &data->edited_final_weights_);
+ ReadType(strm, &data->num_new_states_);
+ if (!strm) {
+ LOG(ERROR) << "EditFst::Read: read failed: " << opts.source;
+ return 0;
+ }
+ return data;
+}
+
+// This class enables non-destructive edit operations on a wrapped ExpandedFst.
+// The implementation uses copy-on-write semantics at the node level: if a user
+// has an underlying fst on which he or she wants to perform a relatively small
+// number of edits (read: mutations), then this implementation will copy the
+// edited node to an internal MutableFst and perform any edits in situ on that
+// copied node. This class supports all the methods of MutableFst except for
+// DeleteStates(const vector<StateId> &); thus, new nodes may also be added, and
+// one may add transitions from existing nodes of the wrapped fst to new nodes.
+//
+// template parameters:
+// A the type of arc to use
+// WrappedFstT the type of fst wrapped by the EditFst instance that
+// this EditFstImpl instance is backing
+// MutableFstT the type of mutable fst to use internally for edited states;
+// crucially, MutableFstT::Copy(false) *must* yield an fst that is
+// thread-safe for reading (VectorFst, for example, has this property)
+template <typename A,
+ typename WrappedFstT = ExpandedFst<A>,
+ typename MutableFstT = VectorFst<A> >
+class EditFstImpl : public FstImpl<A> {
+ public:
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+ using FstImpl<A>::WriteHeader;
+
+ typedef A Arc;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ // Constructs an editable fst implementation with no states. Effectively,
+ // this initially-empty fst will in every way mimic the behavior of
+ // a VectorFst--more precisely, a VectorFstImpl instance--but with slightly
+ // slower performance (by a constant factor), due to the fact that
+ // this class maintains a mapping between external state id's and
+ // their internal equivalents.
+ EditFstImpl() {
+ FstImpl<A>::SetType("edit");
+ wrapped_ = new MutableFstT();
+ InheritPropertiesFromWrapped();
+ data_ = new EditFstData<A, WrappedFstT, MutableFstT>();
+ }
+
+ // Wraps the specified ExpandedFst. This constructor requires that the
+ // specified Fst is an ExpandedFst instance. This requirement is only enforced
+ // at runtime. (See below for the reason.)
+ //
+ // This library uses the pointer-to-implementation or "PIMPL" design pattern.
+ // In particular, to make it convenient to bind an implementation class to its
+ // interface, there are a pair of template "binder" classes, one for immutable
+ // and one for mutable fst's (ImplToFst and ImplToMutableFst, respectively).
+ // As it happens, the API for the ImplToMutableFst<I,F> class requires that
+ // the implementation class--the template parameter "I"--have a constructor
+ // taking a const Fst<A> reference. Accordingly, the constructor here must
+ // perform a static_cast to the WrappedFstT type required by EditFst and
+ // therefore EditFstImpl.
+ explicit EditFstImpl(const Fst<A> &wrapped)
+ : wrapped_(static_cast<WrappedFstT *>(wrapped.Copy())) {
+ FstImpl<A>::SetType("edit");
+
+ data_ = new EditFstData<A, WrappedFstT, MutableFstT>();
+ // have edits_ inherit all properties from wrapped_
+ data_->SetEditedProperties(wrapped_->Properties(kFstProperties, false),
+ kFstProperties);
+ InheritPropertiesFromWrapped();
+ }
+
+ // A copy constructor for this implementation class, used to implement
+ // the Copy() method of the Fst interface.
+ EditFstImpl(const EditFstImpl &impl)
+ : wrapped_(static_cast<WrappedFstT *>(impl.wrapped_->Copy(true))),
+ data_(impl.data_) {
+ data_->IncrRefCount();
+ SetProperties(impl.Properties());
+ }
+
+ ~EditFstImpl() {
+ delete wrapped_;
+ if (!data_->DecrRefCount()) {
+ delete data_;
+ }
+ }
+
+ // const Fst/ExpandedFst operations, declared in the Fst and ExpandedFst
+ // interfaces
+ StateId Start() const {
+ StateId edited_start = data_->EditedStart();
+ return edited_start == kNoStateId ? wrapped_->Start() : edited_start;
+ }
+
+ Weight Final(StateId s) const {
+ return data_->Final(s, wrapped_);
+ }
+
+ size_t NumArcs(StateId s) const {
+ return data_->NumArcs(s, wrapped_);
+ }
+
+ size_t NumInputEpsilons(StateId s) const {
+ return data_->NumInputEpsilons(s, wrapped_);
+ }
+
+ size_t NumOutputEpsilons(StateId s) const {
+ return data_->NumOutputEpsilons(s, wrapped_);
+ }
+
+ StateId NumStates() const {
+ return wrapped_->NumStates() + data_->NumNewStates();
+ }
+
+ static EditFstImpl<A, WrappedFstT, MutableFstT> *
+ Read(istream &strm,
+ const FstReadOptions &opts);
+
+ bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ FstHeader hdr;
+ hdr.SetStart(Start());
+ hdr.SetNumStates(NumStates());
+ FstWriteOptions header_opts(opts);
+ header_opts.write_isymbols = false; // Let contained FST hold any symbols.
+ header_opts.write_osymbols = false;
+ WriteHeader(strm, header_opts, kFileVersion, &hdr);
+
+ // First, serialize wrapped fst to stream.
+ FstWriteOptions wrapped_opts(opts);
+ wrapped_opts.write_header = true; // Force writing contained header.
+ wrapped_->Write(strm, wrapped_opts);
+
+ data_->Write(strm, opts);
+
+ strm.flush();
+ if (!strm) {
+ LOG(ERROR) << "EditFst::Write: write failed: " << opts.source;
+ return false;
+ }
+ return true;
+ }
+ // end const Fst operations
+
+ // non-const MutableFst operations
+
+ // Sets the start state for this fst.
+ void SetStart(StateId s) {
+ MutateCheck();
+ data_->SetStart(s);
+ SetProperties(SetStartProperties(FstImpl<A>::Properties()));
+ }
+
+ // Sets the final state for this fst.
+ void SetFinal(StateId s, Weight w) {
+ MutateCheck();
+ Weight old_weight = data_->SetFinal(s, w, wrapped_);
+ SetProperties(SetFinalProperties(FstImpl<A>::Properties(), old_weight, w));
+ }
+
+ // Adds a new state to this fst, initially with no arcs.
+ StateId AddState() {
+ MutateCheck();
+ SetProperties(AddStateProperties(FstImpl<A>::Properties()));
+ return data_->AddState(NumStates());
+ }
+
+ // Adds the specified arc to the specified state of this fst.
+ void AddArc(StateId s, const Arc &arc) {
+ MutateCheck();
+ const A *prev_arc = data_->AddArc(s, arc, wrapped_);
+ SetProperties(AddArcProperties(FstImpl<A>::Properties(), s, arc, prev_arc));
+ }
+
+ void DeleteStates(const vector<StateId>& dstates) {
+ FSTERROR() << ": EditFstImpl::DeleteStates(const std::vector<StateId>&): "
+ << " not implemented";
+ SetProperties(kError, kError);
+ }
+
+ // Deletes all states in this fst.
+ void DeleteStates();
+
+ // Removes all but the first n outgoing arcs of the specified state.
+ void DeleteArcs(StateId s, size_t n) {
+ MutateCheck();
+ data_->DeleteArcs(s, n, wrapped_);
+ SetProperties(DeleteArcsProperties(FstImpl<A>::Properties()));
+ }
+
+ // Removes all outgoing arcs from the specified state.
+ void DeleteArcs(StateId s) {
+ MutateCheck();
+ data_->DeleteArcs(s, wrapped_);
+ SetProperties(DeleteArcsProperties(FstImpl<A>::Properties()));
+ }
+
+ void ReserveStates(StateId s) {
+ }
+
+ void ReserveArcs(StateId s, size_t n) {
+ }
+
+ // end non-const MutableFst operations
+
+ // Provides information for the generic state iterator.
+ void InitStateIterator(StateIteratorData<Arc> *data) const {
+ data->base = 0;
+ data->nstates = NumStates();
+ }
+
+ // Provides information for the generic arc iterator.
+ void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
+ data_->InitArcIterator(s, data, wrapped_);
+ }
+
+ // Provides information for the generic mutable arc iterator.
+ void InitMutableArcIterator(StateId s, MutableArcIteratorData<A> *data) {
+ MutateCheck();
+ data_->InitMutableArcIterator(s, data, wrapped_);
+ }
+
+ private:
+ typedef typename unordered_map<StateId, StateId>::const_iterator
+ IdMapIterator;
+ typedef typename unordered_map<StateId, Weight>::const_iterator
+ FinalWeightIterator;
+ // Properties always true of this Fst class
+ static const uint64 kStaticProperties = kExpanded | kMutable;
+ // Current file format version
+ static const int kFileVersion = 2;
+ // Minimum file format version supported
+ static const int kMinFileVersion = 2;
+
+ // Causes this fst to inherit all the properties from its wrapped fst, except
+ // for the two properties that always apply to EditFst instances: kExpanded
+ // and kMutable.
+ void InheritPropertiesFromWrapped() {
+ SetProperties(wrapped_->Properties(kCopyProperties, false) |
+ kStaticProperties);
+ SetInputSymbols(wrapped_->InputSymbols());
+ SetOutputSymbols(wrapped_->OutputSymbols());
+ }
+
+ // This method ensures that any operations that alter the mutable data
+ // portion of this EditFstImpl cause the data_ member to be copied when its
+ // reference count is greater than 1. Note that this method is distinct from
+ // MutableFst::Mutate, which gets invoked whenever one of the basic mutation
+ // methods defined in MutableFst is invoked, such as SetInputSymbols.
+ // The MutateCheck here in EditFstImpl is invoked whenever one of the
+ // mutating methods specifically related to the types of edits provided
+ // by EditFst is performed, such as changing an arc of an existing state
+ // of the wrapped fst via a MutableArcIterator, or adding a new state via
+ // AddState().
+ void MutateCheck() {
+ if (data_->RefCount() > 1) {
+ EditFstData<A, WrappedFstT, MutableFstT> *data_copy =
+ new EditFstData<A, WrappedFstT, MutableFstT>(*data_);
+ if (data_ && !data_->DecrRefCount()) {
+ delete data_;
+ }
+ data_ = data_copy;
+ }
+ }
+
+ // The fst that this fst wraps. The purpose of this class is to enable
+ // non-destructive edits on this wrapped fst.
+ const WrappedFstT *wrapped_;
+ // The mutable data for this EditFst instance, with delegates for all the
+ // methods that can mutate data.
+ EditFstData<A, WrappedFstT, MutableFstT> *data_;
+};
+
+template <typename A, typename WrappedFstT, typename MutableFstT>
+const uint64 EditFstImpl<A, WrappedFstT, MutableFstT>::kStaticProperties;
+
+// EditFstImpl IMPLEMENTATION STARTS HERE
+
+template<typename A, typename WrappedFstT, typename MutableFstT>
+inline void EditFstImpl<A, WrappedFstT, MutableFstT>::DeleteStates() {
+ data_->DeleteStates();
+ delete wrapped_;
+ // we are deleting all states, so just forget about pointer to wrapped_
+ // and do what default constructor does: set wrapped_ to a new VectorFst
+ wrapped_ = new MutableFstT();
+ uint64 newProps = DeleteAllStatesProperties(FstImpl<A>::Properties(),
+ kStaticProperties);
+ FstImpl<A>::SetProperties(newProps);
+}
+
+template <typename A, typename WrappedFstT, typename MutableFstT>
+EditFstImpl<A, WrappedFstT, MutableFstT> *
+EditFstImpl<A, WrappedFstT, MutableFstT>::Read(istream &strm,
+ const FstReadOptions &opts) {
+ EditFstImpl<A, WrappedFstT, MutableFstT> *impl = new EditFstImpl();
+ FstHeader hdr;
+ if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
+ return 0;
+ }
+ impl->SetStart(hdr.Start());
+
+ // first, read in wrapped fst
+ FstReadOptions wrapped_opts(opts);
+ wrapped_opts.header = 0; // Contained header was written out, so read it in.
+ Fst<A> *wrapped_fst = Fst<A>::Read(strm, wrapped_opts);
+ if (!wrapped_fst) {
+ return 0;
+ }
+ impl->wrapped_ = static_cast<WrappedFstT *>(wrapped_fst);
+
+ impl->data_ = EditFstData<A, WrappedFstT, MutableFstT>::Read(strm, opts);
+
+ if (!impl->data_) {
+ delete wrapped_fst;
+ return 0;
+ }
+
+ return impl;
+}
+
+// END EditFstImpl IMPLEMENTATION
+
+// Concrete, editable FST. This class attaches interface to implementation.
+template <typename A,
+ typename WrappedFstT = ExpandedFst<A>,
+ typename MutableFstT = VectorFst<A> >
+class EditFst :
+ public ImplToMutableFst< EditFstImpl<A, WrappedFstT, MutableFstT> > {
+ public:
+ friend class MutableArcIterator< EditFst<A, WrappedFstT, MutableFstT> >;
+
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef EditFstImpl<A, WrappedFstT, MutableFstT> Impl;
+
+ EditFst() : ImplToMutableFst<Impl>(new Impl()) {}
+
+ explicit EditFst(const Fst<A> &fst) :
+ ImplToMutableFst<Impl>(new Impl(fst)) {}
+
+ explicit EditFst(const WrappedFstT &fst) :
+ ImplToMutableFst<Impl>(new Impl(fst)) {}
+
+ // See Fst<>::Copy() for doc.
+ EditFst(const EditFst<A, WrappedFstT, MutableFstT> &fst, bool safe = false) :
+ ImplToMutableFst<Impl>(fst, safe) {}
+
+ virtual ~EditFst() {}
+
+ // Get a copy of this EditFst. See Fst<>::Copy() for further doc.
+ virtual EditFst<A, WrappedFstT, MutableFstT> *Copy(bool safe = false) const {
+ return new EditFst<A, WrappedFstT, MutableFstT>(*this, safe);
+ }
+
+ EditFst<A, WrappedFstT, MutableFstT> &
+ operator=(const EditFst<A, WrappedFstT, MutableFstT> &fst) {
+ SetImpl(fst.GetImpl(), false);
+ return *this;
+ }
+
+ virtual EditFst<A, WrappedFstT, MutableFstT> &operator=(const Fst<A> &fst) {
+ if (this != &fst) {
+ SetImpl(new Impl(fst));
+ }
+ return *this;
+ }
+
+ // Read an EditFst from an input stream; return NULL on error.
+ static EditFst<A, WrappedFstT, MutableFstT> *
+ Read(istream &strm,
+ const FstReadOptions &opts) {
+ Impl* impl = Impl::Read(strm, opts);
+ return impl ? new EditFst<A>(impl) : 0;
+ }
+
+ // Read an EditFst from a file; return NULL on error.
+ // Empty filename reads from standard input.
+ static EditFst<A, WrappedFstT, MutableFstT> *Read(const string &filename) {
+ Impl* impl = ImplToExpandedFst<Impl, MutableFst<A> >::Read(filename);
+ return impl ? new EditFst<A, WrappedFstT, MutableFstT>(impl) : 0;
+ }
+
+ virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ return GetImpl()->Write(strm, opts);
+ }
+
+ virtual bool Write(const string &filename) const {
+ return Fst<A>::WriteFile(filename);
+ }
+
+ virtual void InitStateIterator(StateIteratorData<Arc> *data) const {
+ GetImpl()->InitStateIterator(data);
+ }
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ virtual
+ void InitMutableArcIterator(StateId s, MutableArcIteratorData<A> *data) {
+ GetImpl()->InitMutableArcIterator(s, data);
+ }
+ private:
+ explicit EditFst(Impl *impl) : ImplToMutableFst<Impl>(impl) {}
+
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst< Impl, MutableFst<A> >::GetImpl(); }
+
+ void SetImpl(Impl *impl, bool own_impl = true) {
+ ImplToFst< Impl, MutableFst<A> >::SetImpl(impl, own_impl);
+ }
+};
+
+} // namespace fst
+
+#endif // FST_LIB_EDIT_FST_H_
diff --git a/src/include/fst/encode.h b/src/include/fst/encode.h
new file mode 100644
index 0000000..7245b45
--- /dev/null
+++ b/src/include/fst/encode.h
@@ -0,0 +1,599 @@
+// encode.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: johans@google.com (Johan Schalkwyk)
+//
+// \file
+// Class to encode and decoder an fst.
+
+#ifndef FST_LIB_ENCODE_H__
+#define FST_LIB_ENCODE_H__
+
+#include <climits>
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/arc-map.h>
+#include <fst/rmfinalepsilon.h>
+
+
+namespace fst {
+
+static const uint32 kEncodeLabels = 0x0001;
+static const uint32 kEncodeWeights = 0x0002;
+static const uint32 kEncodeFlags = 0x0003; // All non-internal flags
+
+static const uint32 kEncodeHasISymbols = 0x0004; // For internal use
+static const uint32 kEncodeHasOSymbols = 0x0008; // For internal use
+
+enum EncodeType { ENCODE = 1, DECODE = 2 };
+
+// Identifies stream data as an encode table (and its endianity)
+static const int32 kEncodeMagicNumber = 2129983209;
+
+
+// The following class encapsulates implementation details for the
+// encoding and decoding of label/weight tuples used for encoding
+// and decoding of Fsts. The EncodeTable is bidirectional. I.E it
+// stores both the Tuple of encode labels and weights to a unique
+// label, and the reverse.
+template <class A> class EncodeTable {
+ public:
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+
+ // Encoded data consists of arc input/output labels and arc weight
+ struct Tuple {
+ Tuple() {}
+ Tuple(Label ilabel_, Label olabel_, Weight weight_)
+ : ilabel(ilabel_), olabel(olabel_), weight(weight_) {}
+ Tuple(const Tuple& tuple)
+ : ilabel(tuple.ilabel), olabel(tuple.olabel), weight(tuple.weight) {}
+
+ Label ilabel;
+ Label olabel;
+ Weight weight;
+ };
+
+ // Comparison object for hashing EncodeTable Tuple(s).
+ class TupleEqual {
+ public:
+ bool operator()(const Tuple* x, const Tuple* y) const {
+ return (x->ilabel == y->ilabel &&
+ x->olabel == y->olabel &&
+ x->weight == y->weight);
+ }
+ };
+
+ // Hash function for EncodeTabe Tuples. Based on the encode flags
+ // we either hash the labels, weights or combination of them.
+ class TupleKey {
+ public:
+ TupleKey()
+ : encode_flags_(kEncodeLabels | kEncodeWeights) {}
+
+ TupleKey(const TupleKey& key)
+ : encode_flags_(key.encode_flags_) {}
+
+ explicit TupleKey(uint32 encode_flags)
+ : encode_flags_(encode_flags) {}
+
+ size_t operator()(const Tuple* x) const {
+ size_t hash = x->ilabel;
+ const int lshift = 5;
+ const int rshift = CHAR_BIT * sizeof(size_t) - 5;
+ if (encode_flags_ & kEncodeLabels)
+ hash = hash << lshift ^ hash >> rshift ^ x->olabel;
+ if (encode_flags_ & kEncodeWeights)
+ hash = hash << lshift ^ hash >> rshift ^ x->weight.Hash();
+ return hash;
+ }
+
+ private:
+ int32 encode_flags_;
+ };
+
+ typedef unordered_map<const Tuple*,
+ Label,
+ TupleKey,
+ TupleEqual> EncodeHash;
+
+ explicit EncodeTable(uint32 encode_flags)
+ : flags_(encode_flags),
+ encode_hash_(1024, TupleKey(encode_flags)),
+ isymbols_(0), osymbols_(0) {}
+
+ ~EncodeTable() {
+ for (size_t i = 0; i < encode_tuples_.size(); ++i) {
+ delete encode_tuples_[i];
+ }
+ delete isymbols_;
+ delete osymbols_;
+ }
+
+ // Given an arc encode either input/ouptut labels or input/costs or both
+ Label Encode(const A &arc) {
+ const Tuple tuple(arc.ilabel,
+ flags_ & kEncodeLabels ? arc.olabel : 0,
+ flags_ & kEncodeWeights ? arc.weight : Weight::One());
+ typename EncodeHash::const_iterator it = encode_hash_.find(&tuple);
+ if (it == encode_hash_.end()) {
+ encode_tuples_.push_back(new Tuple(tuple));
+ encode_hash_[encode_tuples_.back()] = encode_tuples_.size();
+ return encode_tuples_.size();
+ } else {
+ return it->second;
+ }
+ }
+
+ // Given an arc, look up its encoded label. Returns kNoLabel if not found.
+ Label GetLabel(const A &arc) const {
+ const Tuple tuple(arc.ilabel,
+ flags_ & kEncodeLabels ? arc.olabel : 0,
+ flags_ & kEncodeWeights ? arc.weight : Weight::One());
+ typename EncodeHash::const_iterator it = encode_hash_.find(&tuple);
+ if (it == encode_hash_.end()) {
+ return kNoLabel;
+ } else {
+ return it->second;
+ }
+ }
+
+ // Given an encode arc Label decode back to input/output labels and costs
+ const Tuple* Decode(Label key) const {
+ if (key < 1 || key > encode_tuples_.size()) {
+ LOG(ERROR) << "EncodeTable::Decode: unknown decode key: " << key;
+ return 0;
+ }
+ return encode_tuples_[key - 1];
+ }
+
+ size_t Size() const { return encode_tuples_.size(); }
+
+ bool Write(ostream &strm, const string &source) const;
+
+ static EncodeTable<A> *Read(istream &strm, const string &source);
+
+ const uint32 flags() const { return flags_ & kEncodeFlags; }
+
+ int RefCount() const { return ref_count_.count(); }
+ int IncrRefCount() { return ref_count_.Incr(); }
+ int DecrRefCount() { return ref_count_.Decr(); }
+
+
+ SymbolTable *InputSymbols() const { return isymbols_; }
+
+ SymbolTable *OutputSymbols() const { return osymbols_; }
+
+ void SetInputSymbols(const SymbolTable* syms) {
+ if (isymbols_) delete isymbols_;
+ if (syms) {
+ isymbols_ = syms->Copy();
+ flags_ |= kEncodeHasISymbols;
+ } else {
+ isymbols_ = 0;
+ flags_ &= ~kEncodeHasISymbols;
+ }
+ }
+
+ void SetOutputSymbols(const SymbolTable* syms) {
+ if (osymbols_) delete osymbols_;
+ if (syms) {
+ osymbols_ = syms->Copy();
+ flags_ |= kEncodeHasOSymbols;
+ } else {
+ osymbols_ = 0;
+ flags_ &= ~kEncodeHasOSymbols;
+ }
+ }
+
+ private:
+ uint32 flags_;
+ vector<Tuple*> encode_tuples_;
+ EncodeHash encode_hash_;
+ RefCounter ref_count_;
+ SymbolTable *isymbols_; // Pre-encoded ilabel symbol table
+ SymbolTable *osymbols_; // Pre-encoded olabel symbol table
+
+ DISALLOW_COPY_AND_ASSIGN(EncodeTable);
+};
+
+template <class A> inline
+bool EncodeTable<A>::Write(ostream &strm, const string &source) const {
+ WriteType(strm, kEncodeMagicNumber);
+ WriteType(strm, flags_);
+ int64 size = encode_tuples_.size();
+ WriteType(strm, size);
+ for (size_t i = 0; i < size; ++i) {
+ const Tuple* tuple = encode_tuples_[i];
+ WriteType(strm, tuple->ilabel);
+ WriteType(strm, tuple->olabel);
+ tuple->weight.Write(strm);
+ }
+
+ if (flags_ & kEncodeHasISymbols)
+ isymbols_->Write(strm);
+
+ if (flags_ & kEncodeHasOSymbols)
+ osymbols_->Write(strm);
+
+ strm.flush();
+ if (!strm) {
+ LOG(ERROR) << "EncodeTable::Write: write failed: " << source;
+ return false;
+ }
+ return true;
+}
+
+template <class A> inline
+EncodeTable<A> *EncodeTable<A>::Read(istream &strm, const string &source) {
+ int32 magic_number = 0;
+ ReadType(strm, &magic_number);
+ if (magic_number != kEncodeMagicNumber) {
+ LOG(ERROR) << "EncodeTable::Read: Bad encode table header: " << source;
+ return 0;
+ }
+ uint32 flags;
+ ReadType(strm, &flags);
+ EncodeTable<A> *table = new EncodeTable<A>(flags);
+
+ int64 size;
+ ReadType(strm, &size);
+ if (!strm) {
+ LOG(ERROR) << "EncodeTable::Read: read failed: " << source;
+ return 0;
+ }
+
+ for (size_t i = 0; i < size; ++i) {
+ Tuple* tuple = new Tuple();
+ ReadType(strm, &tuple->ilabel);
+ ReadType(strm, &tuple->olabel);
+ tuple->weight.Read(strm);
+ if (!strm) {
+ LOG(ERROR) << "EncodeTable::Read: read failed: " << source;
+ return 0;
+ }
+ table->encode_tuples_.push_back(tuple);
+ table->encode_hash_[table->encode_tuples_.back()] =
+ table->encode_tuples_.size();
+ }
+
+ if (flags & kEncodeHasISymbols)
+ table->isymbols_ = SymbolTable::Read(strm, source);
+
+ if (flags & kEncodeHasOSymbols)
+ table->osymbols_ = SymbolTable::Read(strm, source);
+
+ return table;
+}
+
+
+// A mapper to encode/decode weighted transducers. Encoding of an
+// Fst is useful for performing classical determinization or minimization
+// on a weighted transducer by treating it as an unweighted acceptor over
+// encoded labels.
+//
+// The Encode mapper stores the encoding in a local hash table (EncodeTable)
+// This table is shared (and reference counted) between the encoder and
+// decoder. A decoder has read only access to the EncodeTable.
+//
+// The EncodeMapper allows on the fly encoding of the machine. As the
+// EncodeTable is generated the same table may by used to decode the machine
+// on the fly. For example in the following sequence of operations
+//
+// Encode -> Determinize -> Decode
+//
+// we will use the encoding table generated during the encode step in the
+// decode, even though the encoding is not complete.
+//
+template <class A> class EncodeMapper {
+ typedef typename A::Weight Weight;
+ typedef typename A::Label Label;
+ public:
+ EncodeMapper(uint32 flags, EncodeType type)
+ : flags_(flags),
+ type_(type),
+ table_(new EncodeTable<A>(flags)),
+ error_(false) {}
+
+ EncodeMapper(const EncodeMapper& mapper)
+ : flags_(mapper.flags_),
+ type_(mapper.type_),
+ table_(mapper.table_),
+ error_(false) {
+ table_->IncrRefCount();
+ }
+
+ // Copy constructor but setting the type, typically to DECODE
+ EncodeMapper(const EncodeMapper& mapper, EncodeType type)
+ : flags_(mapper.flags_),
+ type_(type),
+ table_(mapper.table_),
+ error_(mapper.error_) {
+ table_->IncrRefCount();
+ }
+
+ ~EncodeMapper() {
+ if (!table_->DecrRefCount()) delete table_;
+ }
+
+ A operator()(const A &arc);
+
+ MapFinalAction FinalAction() const {
+ return (type_ == ENCODE && (flags_ & kEncodeWeights)) ?
+ MAP_REQUIRE_SUPERFINAL : MAP_NO_SUPERFINAL;
+ }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;}
+
+ uint64 Properties(uint64 inprops) {
+ uint64 outprops = inprops;
+ if (error_) outprops |= kError;
+
+ uint64 mask = kFstProperties;
+ if (flags_ & kEncodeLabels)
+ mask &= kILabelInvariantProperties & kOLabelInvariantProperties;
+ if (flags_ & kEncodeWeights)
+ mask &= kILabelInvariantProperties & kWeightInvariantProperties &
+ (type_ == ENCODE ? kAddSuperFinalProperties :
+ kRmSuperFinalProperties);
+
+ return outprops & mask;
+ }
+
+ const uint32 flags() const { return flags_; }
+ const EncodeType type() const { return type_; }
+ const EncodeTable<A> &table() const { return *table_; }
+
+ bool Write(ostream &strm, const string& source) {
+ return table_->Write(strm, source);
+ }
+
+ bool Write(const string& filename) {
+ ofstream strm(filename.c_str(), ofstream::out | ofstream::binary);
+ if (!strm) {
+ LOG(ERROR) << "EncodeMap: Can't open file: " << filename;
+ return false;
+ }
+ return Write(strm, filename);
+ }
+
+ static EncodeMapper<A> *Read(istream &strm,
+ const string& source,
+ EncodeType type = ENCODE) {
+ EncodeTable<A> *table = EncodeTable<A>::Read(strm, source);
+ return table ? new EncodeMapper(table->flags(), type, table) : 0;
+ }
+
+ static EncodeMapper<A> *Read(const string& filename,
+ EncodeType type = ENCODE) {
+ ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
+ if (!strm) {
+ LOG(ERROR) << "EncodeMap: Can't open file: " << filename;
+ return NULL;
+ }
+ return Read(strm, filename, type);
+ }
+
+ SymbolTable *InputSymbols() const { return table_->InputSymbols(); }
+
+ SymbolTable *OutputSymbols() const { return table_->OutputSymbols(); }
+
+ void SetInputSymbols(const SymbolTable* syms) {
+ table_->SetInputSymbols(syms);
+ }
+
+ void SetOutputSymbols(const SymbolTable* syms) {
+ table_->SetOutputSymbols(syms);
+ }
+
+ private:
+ uint32 flags_;
+ EncodeType type_;
+ EncodeTable<A>* table_;
+ bool error_;
+
+ explicit EncodeMapper(uint32 flags, EncodeType type, EncodeTable<A> *table)
+ : flags_(flags), type_(type), table_(table) {}
+ void operator=(const EncodeMapper &); // Disallow.
+};
+
+template <class A> inline
+A EncodeMapper<A>::operator()(const A &arc) {
+ if (type_ == ENCODE) { // labels and/or weights to single label
+ if ((arc.nextstate == kNoStateId && !(flags_ & kEncodeWeights)) ||
+ (arc.nextstate == kNoStateId && (flags_ & kEncodeWeights) &&
+ arc.weight == Weight::Zero())) {
+ return arc;
+ } else {
+ Label label = table_->Encode(arc);
+ return A(label,
+ flags_ & kEncodeLabels ? label : arc.olabel,
+ flags_ & kEncodeWeights ? Weight::One() : arc.weight,
+ arc.nextstate);
+ }
+ } else { // type_ == DECODE
+ if (arc.nextstate == kNoStateId) {
+ return arc;
+ } else {
+ if (arc.ilabel == 0) return arc;
+ if (flags_ & kEncodeLabels && arc.ilabel != arc.olabel) {
+ FSTERROR() << "EncodeMapper: Label-encoded arc has different "
+ "input and output labels";
+ error_ = true;
+ }
+ if (flags_ & kEncodeWeights && arc.weight != Weight::One()) {
+ FSTERROR() <<
+ "EncodeMapper: Weight-encoded arc has non-trivial weight";
+ error_ = true;
+ }
+ const typename EncodeTable<A>::Tuple* tuple = table_->Decode(arc.ilabel);
+ if (!tuple) {
+ FSTERROR() << "EncodeMapper: decode failed";
+ error_ = true;
+ return A(kNoLabel, kNoLabel, Weight::NoWeight(), arc.nextstate);
+ } else {
+ return A(tuple->ilabel,
+ flags_ & kEncodeLabels ? tuple->olabel : arc.olabel,
+ flags_ & kEncodeWeights ? tuple->weight : arc.weight,
+ arc.nextstate);
+ }
+ }
+ }
+}
+
+
+// Complexity: O(nstates + narcs)
+template<class A> inline
+void Encode(MutableFst<A> *fst, EncodeMapper<A>* mapper) {
+ mapper->SetInputSymbols(fst->InputSymbols());
+ mapper->SetOutputSymbols(fst->OutputSymbols());
+ ArcMap(fst, mapper);
+}
+
+template<class A> inline
+void Decode(MutableFst<A>* fst, const EncodeMapper<A>& mapper) {
+ ArcMap(fst, EncodeMapper<A>(mapper, DECODE));
+ RmFinalEpsilon(fst);
+ fst->SetInputSymbols(mapper.InputSymbols());
+ fst->SetOutputSymbols(mapper.OutputSymbols());
+}
+
+
+// On the fly label and/or weight encoding of input Fst
+//
+// Complexity:
+// - Constructor: O(1)
+// - Traversal: O(nstates_visited + narcs_visited), assuming constant
+// time to visit an input state or arc.
+template <class A>
+class EncodeFst : public ArcMapFst<A, A, EncodeMapper<A> > {
+ public:
+ typedef A Arc;
+ typedef EncodeMapper<A> C;
+ typedef ArcMapFstImpl< A, A, EncodeMapper<A> > Impl;
+ using ImplToFst<Impl>::GetImpl;
+
+ EncodeFst(const Fst<A> &fst, EncodeMapper<A>* encoder)
+ : ArcMapFst<A, A, C>(fst, encoder, ArcMapFstOptions()) {
+ encoder->SetInputSymbols(fst.InputSymbols());
+ encoder->SetOutputSymbols(fst.OutputSymbols());
+ }
+
+ EncodeFst(const Fst<A> &fst, const EncodeMapper<A>& encoder)
+ : ArcMapFst<A, A, C>(fst, encoder, ArcMapFstOptions()) {}
+
+ // See Fst<>::Copy() for doc.
+ EncodeFst(const EncodeFst<A> &fst, bool copy = false)
+ : ArcMapFst<A, A, C>(fst, copy) {}
+
+ // Get a copy of this EncodeFst. See Fst<>::Copy() for further doc.
+ virtual EncodeFst<A> *Copy(bool safe = false) const {
+ if (safe) {
+ FSTERROR() << "EncodeFst::Copy(true): not allowed.";
+ GetImpl()->SetProperties(kError, kError);
+ }
+ return new EncodeFst(*this);
+ }
+};
+
+
+// On the fly label and/or weight encoding of input Fst
+//
+// Complexity:
+// - Constructor: O(1)
+// - Traversal: O(nstates_visited + narcs_visited), assuming constant
+// time to visit an input state or arc.
+template <class A>
+class DecodeFst : public ArcMapFst<A, A, EncodeMapper<A> > {
+ public:
+ typedef A Arc;
+ typedef EncodeMapper<A> C;
+ typedef ArcMapFstImpl< A, A, EncodeMapper<A> > Impl;
+ using ImplToFst<Impl>::GetImpl;
+
+ DecodeFst(const Fst<A> &fst, const EncodeMapper<A>& encoder)
+ : ArcMapFst<A, A, C>(fst,
+ EncodeMapper<A>(encoder, DECODE),
+ ArcMapFstOptions()) {
+ GetImpl()->SetInputSymbols(encoder.InputSymbols());
+ GetImpl()->SetOutputSymbols(encoder.OutputSymbols());
+ }
+
+ // See Fst<>::Copy() for doc.
+ DecodeFst(const DecodeFst<A> &fst, bool safe = false)
+ : ArcMapFst<A, A, C>(fst, safe) {}
+
+ // Get a copy of this DecodeFst. See Fst<>::Copy() for further doc.
+ virtual DecodeFst<A> *Copy(bool safe = false) const {
+ return new DecodeFst(*this, safe);
+ }
+};
+
+
+// Specialization for EncodeFst.
+template <class A>
+class StateIterator< EncodeFst<A> >
+ : public StateIterator< ArcMapFst<A, A, EncodeMapper<A> > > {
+ public:
+ explicit StateIterator(const EncodeFst<A> &fst)
+ : StateIterator< ArcMapFst<A, A, EncodeMapper<A> > >(fst) {}
+};
+
+
+// Specialization for EncodeFst.
+template <class A>
+class ArcIterator< EncodeFst<A> >
+ : public ArcIterator< ArcMapFst<A, A, EncodeMapper<A> > > {
+ public:
+ ArcIterator(const EncodeFst<A> &fst, typename A::StateId s)
+ : ArcIterator< ArcMapFst<A, A, EncodeMapper<A> > >(fst, s) {}
+};
+
+
+// Specialization for DecodeFst.
+template <class A>
+class StateIterator< DecodeFst<A> >
+ : public StateIterator< ArcMapFst<A, A, EncodeMapper<A> > > {
+ public:
+ explicit StateIterator(const DecodeFst<A> &fst)
+ : StateIterator< ArcMapFst<A, A, EncodeMapper<A> > >(fst) {}
+};
+
+
+// Specialization for DecodeFst.
+template <class A>
+class ArcIterator< DecodeFst<A> >
+ : public ArcIterator< ArcMapFst<A, A, EncodeMapper<A> > > {
+ public:
+ ArcIterator(const DecodeFst<A> &fst, typename A::StateId s)
+ : ArcIterator< ArcMapFst<A, A, EncodeMapper<A> > >(fst, s) {}
+};
+
+
+// Useful aliases when using StdArc.
+typedef EncodeFst<StdArc> StdEncodeFst;
+
+typedef DecodeFst<StdArc> StdDecodeFst;
+
+} // namespace fst
+
+#endif // FST_LIB_ENCODE_H__
diff --git a/src/include/fst/epsnormalize.h b/src/include/fst/epsnormalize.h
new file mode 100644
index 0000000..696242b
--- /dev/null
+++ b/src/include/fst/epsnormalize.h
@@ -0,0 +1,74 @@
+// epsnormalize.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Function that implements epsilon normalization.
+
+#ifndef FST_LIB_EPSNORMALIZE_H__
+#define FST_LIB_EPSNORMALIZE_H__
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <fst/slist.h>
+
+
+#include <fst/factor-weight.h>
+#include <fst/invert.h>
+#include <fst/arc-map.h>
+#include <fst/rmepsilon.h>
+
+
+namespace fst {
+
+enum EpsNormalizeType {EPS_NORM_INPUT, EPS_NORM_OUTPUT};
+
+// Returns an equivalent FST that is epsilon-normalized. An acceptor is
+// epsilon-normalized if it is epsilon-removed. A transducer is input
+// epsilon-normalized if additionally if on each path any epsilon input
+// label follows all non-epsilon input labels. Output epsilon-normalized
+// is defined similarly.
+//
+// The input FST needs to be functional.
+//
+// References:
+// - Mehryar Mohri. "Generic epsilon-removal and input epsilon-normalization
+// algorithms for weighted transducers", International Journal of Computer
+// Science, 13(1): 129-143, 2002.
+template <class Arc>
+void EpsNormalize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst,
+ EpsNormalizeType type = EPS_NORM_INPUT) {
+ VectorFst< GallicArc<Arc, STRING_RIGHT_RESTRICT> > gfst;
+ if (type == EPS_NORM_INPUT)
+ ArcMap(ifst, &gfst, ToGallicMapper<Arc, STRING_RIGHT_RESTRICT>());
+ else // type == EPS_NORM_OUTPUT
+ ArcMap(InvertFst<Arc>(ifst), &gfst,
+ ToGallicMapper<Arc, STRING_RIGHT_RESTRICT>());
+ RmEpsilon(&gfst);
+ FactorWeightFst< GallicArc<Arc, STRING_RIGHT_RESTRICT>,
+ GallicFactor<typename Arc::Label,
+ typename Arc::Weight, STRING_RIGHT_RESTRICT> >
+ fwfst(gfst);
+ ArcMap(fwfst, ofst, FromGallicMapper<Arc, STRING_RIGHT_RESTRICT>());
+ ofst->SetOutputSymbols(ifst.OutputSymbols());
+ if(type == EPS_NORM_OUTPUT)
+ Invert(ofst);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_EPSNORMALIZE_H__
diff --git a/src/include/fst/equal.h b/src/include/fst/equal.h
new file mode 100644
index 0000000..33be198
--- /dev/null
+++ b/src/include/fst/equal.h
@@ -0,0 +1,124 @@
+// test.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Function to test equality of two Fsts.
+
+#ifndef FST_LIB_EQUAL_H__
+#define FST_LIB_EQUAL_H__
+
+#include <fst/fst.h>
+
+
+namespace fst {
+
+// Tests if two Fsts have the same states and arcs in the same order.
+template<class Arc>
+bool Equal(const Fst<Arc> &fst1, const Fst<Arc> &fst2, float delta = kDelta) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ if (fst1.Start() != fst2.Start()) {
+ VLOG(1) << "Equal: mismatched start states";
+ return false;
+ }
+
+ StateIterator< Fst<Arc> > siter1(fst1);
+ StateIterator< Fst<Arc> > siter2(fst2);
+
+ while (!siter1.Done() || !siter2.Done()) {
+ if (siter1.Done() || siter2.Done()) {
+ VLOG(1) << "Equal: mismatched # of states";
+ return false;
+ }
+ StateId s1 = siter1.Value();
+ StateId s2 = siter2.Value();
+ if (s1 != s2) {
+ VLOG(1) << "Equal: mismatched states:"
+ << ", state1 = " << s1
+ << ", state2 = " << s2;
+ return false;
+ }
+ Weight final1 = fst1.Final(s1);
+ Weight final2 = fst2.Final(s2);
+ if (!ApproxEqual(final1, final2, delta)) {
+ VLOG(1) << "Equal: mismatched final weights:"
+ << " state = " << s1
+ << ", final1 = " << final1
+ << ", final2 = " << final2;
+ return false;
+ }
+ ArcIterator< Fst<Arc> > aiter1(fst1, s1);
+ ArcIterator< Fst<Arc> > aiter2(fst2, s2);
+ for (size_t a = 0; !aiter1.Done() || !aiter2.Done(); ++a) {
+ if (aiter1.Done() || aiter2.Done()) {
+ VLOG(1) << "Equal: mismatched # of arcs"
+ << " state = " << s1;
+ return false;
+ }
+ Arc arc1 = aiter1.Value();
+ Arc arc2 = aiter2.Value();
+ if (arc1.ilabel != arc2.ilabel) {
+ VLOG(1) << "Equal: mismatched arc input labels:"
+ << " state = " << s1
+ << ", arc = " << a
+ << ", ilabel1 = " << arc1.ilabel
+ << ", ilabel2 = " << arc2.ilabel;
+ return false;
+ } else if (arc1.olabel != arc2.olabel) {
+ VLOG(1) << "Equal: mismatched arc output labels:"
+ << " state = " << s1
+ << ", arc = " << a
+ << ", olabel1 = " << arc1.olabel
+ << ", olabel2 = " << arc2.olabel;
+ return false;
+ } else if (!ApproxEqual(arc1.weight, arc2.weight, delta)) {
+ VLOG(1) << "Equal: mismatched arc weights:"
+ << " state = " << s1
+ << ", arc = " << a
+ << ", weight1 = " << arc1.weight
+ << ", weight2 = " << arc2.weight;
+ return false;
+ } else if (arc1.nextstate != arc2.nextstate) {
+ VLOG(1) << "Equal: mismatched input label:"
+ << " state = " << s1
+ << ", arc = " << a
+ << ", nextstate1 = " << arc1.nextstate
+ << ", nextstate2 = " << arc2.nextstate;
+ return false;
+ }
+ aiter1.Next();
+ aiter2.Next();
+
+ }
+ // Sanity checks: should never fail
+ if (fst1.NumArcs(s1) != fst2.NumArcs(s2) ||
+ fst1.NumInputEpsilons(s1) != fst2.NumInputEpsilons(s2) ||
+ fst1.NumOutputEpsilons(s1) != fst2.NumOutputEpsilons(s2)) {
+ FSTERROR() << "Equal: inconsistent arc/epsilon counts";
+ }
+
+ siter1.Next();
+ siter2.Next();
+ }
+ return true;
+}
+
+} // namespace fst
+
+
+#endif // FST_LIB_EQUAL_H__
diff --git a/src/include/fst/equivalent.h b/src/include/fst/equivalent.h
new file mode 100644
index 0000000..f05ff87
--- /dev/null
+++ b/src/include/fst/equivalent.h
@@ -0,0 +1,274 @@
+// equivalent.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: wojciech@google.com (Wojciech Skut)
+//
+// \file Functions and classes to determine the equivalence of two
+// FSTs.
+
+#ifndef FST_LIB_EQUIVALENT_H__
+#define FST_LIB_EQUIVALENT_H__
+
+#include <algorithm>
+#include <deque>
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+#include <fst/encode.h>
+#include <fst/push.h>
+#include <fst/union-find.h>
+#include <fst/vector-fst.h>
+
+
+namespace fst {
+
+// Traits-like struct holding utility functions/typedefs/constants for
+// the equivalence algorithm.
+//
+// Encoding device: in order to make the statesets of the two acceptors
+// disjoint, we map Arc::StateId on the type MappedId. The states of
+// the first acceptor are mapped on odd numbers (s -> 2s + 1), and
+// those of the second one on even numbers (s -> 2s + 2). The number 0
+// is reserved for an implicit (non-final) 'dead state' (required for
+// the correct treatment of non-coaccessible states; kNoStateId is
+// mapped to kDeadState for both acceptors). The union-find algorithm
+// operates on the mapped IDs.
+template <class Arc>
+struct EquivalenceUtil {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef StateId MappedId; // ID for an equivalence class.
+
+ // MappedId for an implicit dead state.
+ static const MappedId kDeadState = 0;
+
+ // MappedId for lookup failure.
+ static const MappedId kInvalidId = -1;
+
+ // Maps state ID to the representative of the corresponding
+ // equivalence class. The parameter 'which_fst' takes the values 1
+ // and 2, identifying the input FST.
+ static MappedId MapState(StateId s, int32 which_fst) {
+ return
+ (kNoStateId == s)
+ ?
+ kDeadState
+ :
+ (static_cast<MappedId>(s) << 1) + which_fst;
+ }
+ // Maps set ID to State ID.
+ static StateId UnMapState(MappedId id) {
+ return static_cast<StateId>((--id) >> 1);
+ }
+ // Convenience function: checks if state with MappedId 's' is final
+ // in acceptor 'fa'.
+ static bool IsFinal(const Fst<Arc> &fa, MappedId s) {
+ return
+ (kDeadState == s) ?
+ false : (fa.Final(UnMapState(s)) != Weight::Zero());
+ }
+ // Convenience function: returns the representative of 'id' in 'sets',
+ // creating a new set if needed.
+ static MappedId FindSet(UnionFind<MappedId> *sets, MappedId id) {
+ MappedId repr = sets->FindSet(id);
+ if (repr != kInvalidId) {
+ return repr;
+ } else {
+ sets->MakeSet(id);
+ return id;
+ }
+ }
+};
+
+template <class Arc> const
+typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kDeadState;
+
+template <class Arc> const
+typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kInvalidId;
+
+
+// Equivalence checking algorithm: determines if the two FSTs
+// <code>fst1</code> and <code>fst2</code> are equivalent. The input
+// FSTs must be deterministic input-side epsilon-free acceptors,
+// unweighted or with weights over a left semiring. Two acceptors are
+// considered equivalent if they accept exactly the same set of
+// strings (with the same weights).
+//
+// The algorithm (cf. Aho, Hopcroft and Ullman, "The Design and
+// Analysis of Computer Programs") successively constructs sets of
+// states that can be reached by the same prefixes, starting with a
+// set containing the start states of both acceptors. A disjoint tree
+// forest (the union-find algorithm) is used to represent the sets of
+// states. The algorithm returns 'false' if one of the constructed
+// sets contains both final and non-final states. Returns optional error
+// value (when FLAGS_error_fatal = false).
+//
+// Complexity: quasi-linear, i.e. O(n G(n)), where
+// n = |S1| + |S2| is the number of states in both acceptors
+// G(n) is a very slowly growing function that can be approximated
+// by 4 by all practical purposes.
+//
+template <class Arc>
+bool Equivalent(const Fst<Arc> &fst1,
+ const Fst<Arc> &fst2,
+ double delta = kDelta, bool *error = 0) {
+ typedef typename Arc::Weight Weight;
+ if (error) *error = false;
+
+ // Check that the symbol table are compatible
+ if (!CompatSymbols(fst1.InputSymbols(), fst2.InputSymbols()) ||
+ !CompatSymbols(fst1.OutputSymbols(), fst2.OutputSymbols())) {
+ FSTERROR() << "Equivalent: input/output symbol tables of 1st argument "
+ << "do not match input/output symbol tables of 2nd argument";
+ if (error) *error = true;
+ return false;
+ }
+ // Check properties first:
+ uint64 props = kNoEpsilons | kIDeterministic | kAcceptor;
+ if (fst1.Properties(props, true) != props) {
+ FSTERROR() << "Equivalent: first argument not an"
+ << " epsilon-free deterministic acceptor";
+ if (error) *error = true;
+ return false;
+ }
+ if (fst2.Properties(props, true) != props) {
+ FSTERROR() << "Equivalent: second argument not an"
+ << " epsilon-free deterministic acceptor";
+ if (error) *error = true;
+ return false;
+ }
+
+ if ((fst1.Properties(kUnweighted , true) != kUnweighted)
+ || (fst2.Properties(kUnweighted , true) != kUnweighted)) {
+ VectorFst<Arc> efst1(fst1);
+ VectorFst<Arc> efst2(fst2);
+ Push(&efst1, REWEIGHT_TO_INITIAL, delta);
+ Push(&efst2, REWEIGHT_TO_INITIAL, delta);
+ ArcMap(&efst1, QuantizeMapper<Arc>(delta));
+ ArcMap(&efst2, QuantizeMapper<Arc>(delta));
+ EncodeMapper<Arc> mapper(kEncodeWeights|kEncodeLabels, ENCODE);
+ ArcMap(&efst1, &mapper);
+ ArcMap(&efst2, &mapper);
+ return Equivalent(efst1, efst2);
+ }
+
+ // Convenience typedefs:
+ typedef typename Arc::StateId StateId;
+ typedef EquivalenceUtil<Arc> Util;
+ typedef typename Util::MappedId MappedId;
+ enum { FST1 = 1, FST2 = 2 }; // Required by Util::MapState(...)
+
+ MappedId s1 = Util::MapState(fst1.Start(), FST1);
+ MappedId s2 = Util::MapState(fst2.Start(), FST2);
+
+ // The union-find structure.
+ UnionFind<MappedId> eq_classes(1000, Util::kInvalidId);
+
+ // Initialize the union-find structure.
+ eq_classes.MakeSet(s1);
+ eq_classes.MakeSet(s2);
+
+ // Data structure for the (partial) acceptor transition function of
+ // fst1 and fst2: input labels mapped to pairs of MappedId's
+ // representing destination states of the corresponding arcs in fst1
+ // and fst2, respectively.
+ typedef
+ unordered_map<typename Arc::Label, pair<MappedId, MappedId> >
+ Label2StatePairMap;
+
+ Label2StatePairMap arc_pairs;
+
+ // Pairs of MappedId's to be processed, organized in a queue.
+ deque<pair<MappedId, MappedId> > q;
+
+ bool ret = true;
+ // Early return if the start states differ w.r.t. being final.
+ if (Util::IsFinal(fst1, s1) != Util::IsFinal(fst2, s2)) {
+ ret = false;
+ }
+
+ // Main loop: explores the two acceptors in a breadth-first manner,
+ // updating the equivalence relation on the statesets. Loop
+ // invariant: each block of states contains either final states only
+ // or non-final states only.
+ for (q.push_back(make_pair(s1, s2)); ret && !q.empty(); q.pop_front()) {
+ s1 = q.front().first;
+ s2 = q.front().second;
+
+ // Representatives of the equivalence classes of s1/s2.
+ MappedId rep1 = Util::FindSet(&eq_classes, s1);
+ MappedId rep2 = Util::FindSet(&eq_classes, s2);
+
+ if (rep1 != rep2) {
+ eq_classes.Union(rep1, rep2);
+ arc_pairs.clear();
+
+ // Copy outgoing arcs starting at s1 into the hashtable.
+ if (Util::kDeadState != s1) {
+ ArcIterator<Fst<Arc> > arc_iter(fst1, Util::UnMapState(s1));
+ for (; !arc_iter.Done(); arc_iter.Next()) {
+ const Arc &arc = arc_iter.Value();
+ if (arc.weight != Weight::Zero()) { // Zero-weight arcs
+ // are treated as
+ // non-exisitent.
+ arc_pairs[arc.ilabel].first = Util::MapState(arc.nextstate, FST1);
+ }
+ }
+ }
+ // Copy outgoing arcs starting at s2 into the hashtable.
+ if (Util::kDeadState != s2) {
+ ArcIterator<Fst<Arc> > arc_iter(fst2, Util::UnMapState(s2));
+ for (; !arc_iter.Done(); arc_iter.Next()) {
+ const Arc &arc = arc_iter.Value();
+ if (arc.weight != Weight::Zero()) { // Zero-weight arcs
+ // are treated as
+ // non-existent.
+ arc_pairs[arc.ilabel].second = Util::MapState(arc.nextstate, FST2);
+ }
+ }
+ }
+ // Iterate through the hashtable and process pairs of target
+ // states.
+ for (typename Label2StatePairMap::const_iterator
+ arc_iter = arc_pairs.begin();
+ arc_iter != arc_pairs.end();
+ ++arc_iter) {
+ const pair<MappedId, MappedId> &p = arc_iter->second;
+ if (Util::IsFinal(fst1, p.first) != Util::IsFinal(fst2, p.second)) {
+ // Detected inconsistency: return false.
+ ret = false;
+ break;
+ }
+ q.push_back(p);
+ }
+ }
+ }
+
+ if (fst1.Properties(kError, false) || fst2.Properties(kError, false)) {
+ if (error) *error = true;
+ return false;
+ }
+
+ return ret;
+}
+
+} // namespace fst
+
+#endif // FST_LIB_EQUIVALENT_H__
diff --git a/src/include/fst/expanded-fst.h b/src/include/fst/expanded-fst.h
new file mode 100644
index 0000000..b44b81c
--- /dev/null
+++ b/src/include/fst/expanded-fst.h
@@ -0,0 +1,189 @@
+// expanded-fst.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Generic FST augmented with state count - interface class definition.
+//
+
+#ifndef FST_LIB_EXPANDED_FST_H__
+#define FST_LIB_EXPANDED_FST_H__
+
+#include <sys/types.h>
+#include <string>
+
+#include <fst/fst.h>
+
+
+namespace fst {
+
+// A generic FST plus state count.
+template <class A>
+class ExpandedFst : public Fst<A> {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+
+ virtual StateId NumStates() const = 0; // State count
+
+ // Get a copy of this ExpandedFst. See Fst<>::Copy() for further doc.
+ virtual ExpandedFst<A> *Copy(bool safe = false) const = 0;
+
+ // Read an ExpandedFst from an input stream; return NULL on error.
+ static ExpandedFst<A> *Read(istream &strm, const FstReadOptions &opts) {
+ FstReadOptions ropts(opts);
+ FstHeader hdr;
+ if (ropts.header)
+ hdr = *opts.header;
+ else {
+ if (!hdr.Read(strm, opts.source))
+ return 0;
+ ropts.header = &hdr;
+ }
+ if (!(hdr.Properties() & kExpanded)) {
+ LOG(ERROR) << "ExpandedFst::Read: Not an ExpandedFst: " << ropts.source;
+ return 0;
+ }
+ FstRegister<A> *registr = FstRegister<A>::GetRegister();
+ const typename FstRegister<A>::Reader reader =
+ registr->GetReader(hdr.FstType());
+ if (!reader) {
+ LOG(ERROR) << "ExpandedFst::Read: Unknown FST type \"" << hdr.FstType()
+ << "\" (arc type = \"" << A::Type()
+ << "\"): " << ropts.source;
+ return 0;
+ }
+ Fst<A> *fst = reader(strm, ropts);
+ if (!fst) return 0;
+ return static_cast<ExpandedFst<A> *>(fst);
+ }
+
+ // Read an ExpandedFst from a file; return NULL on error.
+ // Empty filename reads from standard input.
+ static ExpandedFst<A> *Read(const string &filename) {
+ if (!filename.empty()) {
+ ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
+ if (!strm) {
+ LOG(ERROR) << "ExpandedFst::Read: Can't open file: " << filename;
+ return 0;
+ }
+ return Read(strm, FstReadOptions(filename));
+ } else {
+ return Read(std::cin, FstReadOptions("standard input"));
+ }
+ }
+};
+
+
+namespace internal {
+
+// ExpandedFst<A> case - abstract methods.
+template <class A> inline
+typename A::Weight Final(const ExpandedFst<A> &fst, typename A::StateId s) {
+ return fst.Final(s);
+}
+
+template <class A> inline
+ssize_t NumArcs(const ExpandedFst<A> &fst, typename A::StateId s) {
+ return fst.NumArcs(s);
+}
+
+template <class A> inline
+ssize_t NumInputEpsilons(const ExpandedFst<A> &fst, typename A::StateId s) {
+ return fst.NumInputEpsilons(s);
+}
+
+template <class A> inline
+ssize_t NumOutputEpsilons(const ExpandedFst<A> &fst, typename A::StateId s) {
+ return fst.NumOutputEpsilons(s);
+}
+
+} // namespace internal
+
+
+// A useful alias when using StdArc.
+typedef ExpandedFst<StdArc> StdExpandedFst;
+
+
+// This is a helper class template useful for attaching an ExpandedFst
+// interface to its implementation, handling reference counting. It
+// delegates to ImplToFst the handling of the Fst interface methods.
+template < class I, class F = ExpandedFst<typename I::Arc> >
+class ImplToExpandedFst : public ImplToFst<I, F> {
+ public:
+ typedef typename I::Arc Arc;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ using ImplToFst<I, F>::GetImpl;
+
+ virtual StateId NumStates() const { return GetImpl()->NumStates(); }
+
+ protected:
+ ImplToExpandedFst() : ImplToFst<I, F>() {}
+
+ ImplToExpandedFst(I *impl) : ImplToFst<I, F>(impl) {}
+
+ ImplToExpandedFst(const ImplToExpandedFst<I, F> &fst)
+ : ImplToFst<I, F>(fst) {}
+
+ ImplToExpandedFst(const ImplToExpandedFst<I, F> &fst, bool safe)
+ : ImplToFst<I, F>(fst, safe) {}
+
+ // Read FST implementation from a file; return NULL on error.
+ // Empty filename reads from standard input.
+ static I *Read(const string &filename) {
+ if (!filename.empty()) {
+ ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
+ if (!strm) {
+ LOG(ERROR) << "ExpandedFst::Read: Can't open file: " << filename;
+ return 0;
+ }
+ return I::Read(strm, FstReadOptions(filename));
+ } else {
+ return I::Read(std::cin, FstReadOptions("standard input"));
+ }
+ }
+
+ private:
+ // Disallow
+ ImplToExpandedFst<I, F> &operator=(const ImplToExpandedFst<I, F> &fst);
+
+ ImplToExpandedFst<I, F> &operator=(const Fst<Arc> &fst) {
+ FSTERROR() << "ImplToExpandedFst: Assignment operator disallowed";
+ GetImpl()->SetProperties(kError, kError);
+ return *this;
+ }
+};
+
+// Function to return the number of states in an FST, counting them
+// if necessary.
+template <class Arc>
+typename Arc::StateId CountStates(const Fst<Arc> &fst) {
+ if (fst.Properties(kExpanded, false)) {
+ const ExpandedFst<Arc> *efst = static_cast<const ExpandedFst<Arc> *>(&fst);
+ return efst->NumStates();
+ } else {
+ typename Arc::StateId nstates = 0;
+ for (StateIterator< Fst<Arc> > siter(fst); !siter.Done(); siter.Next())
+ ++nstates;
+ return nstates;
+ }
+}
+
+} // namespace fst
+
+#endif // FST_LIB_EXPANDED_FST_H__
diff --git a/src/include/fst/expectation-weight.h b/src/include/fst/expectation-weight.h
new file mode 100644
index 0000000..5226cad
--- /dev/null
+++ b/src/include/fst/expectation-weight.h
@@ -0,0 +1,142 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: krr@google.com (Kasturi Rangan Raghavan)
+// Inspiration: shumash@google.com (Masha Maria Shugrina)
+// \file
+// Expectation semiring as described by Jason Eisner:
+// See: doi=10.1.1.22.9398
+// Multiplex semiring operations and identities:
+// One: <One, Zero>
+// Zero: <Zero, Zero>
+// Plus: <a1, b1> + <a2, b2> = < (a1 + a2) , (b1 + b2) >
+// Times: <a1, b1> * <a2, b2> = < (a1 * a2) , [(a1 * b2) + (a2 * b1)] >
+// Division: Undefined (currently)
+//
+// Usually used to store the pair <probability, random_variable> so that
+// ShortestDistance[Fst<ArcTpl<ExpectationWeight<P, V> > >]
+// == < PosteriorProbability, Expected_Value[V] >
+
+#ifndef FST_LIB_EXPECTATION_WEIGHT_H_
+#define FST_LIB_EXPECTATION_WEIGHT_H_
+
+#include<string>
+
+#include <fst/pair-weight.h>
+
+
+namespace fst {
+
+// X1 is usually a probability weight like LogWeight
+// X2 is usually a random variable or vector
+// see SignedLogWeight or SparsePowerWeight
+//
+// If X1 is distinct from X2, it is required that there is an external
+// product between X1 and X2 and if both semriring are commutative, or
+// left or right semirings, then result must have those properties.
+template <class X1, class X2>
+class ExpectationWeight : public PairWeight<X1, X2> {
+ public:
+ using PairWeight<X1, X2>::Value1;
+ using PairWeight<X1, X2>::Value2;
+
+ using PairWeight<X1, X2>::Reverse;
+ using PairWeight<X1, X2>::Quantize;
+ using PairWeight<X1, X2>::Member;
+
+ typedef X1 W1;
+ typedef X2 W2;
+
+ typedef ExpectationWeight<typename X1::ReverseWeight,
+ typename X2::ReverseWeight> ReverseWeight;
+
+ ExpectationWeight() : PairWeight<X1, X2>(Zero()) { }
+
+ ExpectationWeight(const ExpectationWeight<X1, X2>& w)
+ : PairWeight<X1, X2> (w) { }
+
+ ExpectationWeight(const PairWeight<X1, X2>& w)
+ : PairWeight<X1, X2> (w) { }
+
+ ExpectationWeight(const X1& x1, const X2& x2)
+ : PairWeight<X1, X2>(x1, x2) { }
+
+ static const ExpectationWeight<X1, X2> &Zero() {
+ static const ExpectationWeight<X1, X2> zero(X1::Zero(), X2::Zero());
+ return zero;
+ }
+
+ static const ExpectationWeight<X1, X2> &One() {
+ static const ExpectationWeight<X1, X2> one(X1::One(), X2::Zero());
+ return one;
+ }
+
+ static const ExpectationWeight<X1, X2> &NoWeight() {
+ static const ExpectationWeight<X1, X2> no_weight(X1::NoWeight(),
+ X2::NoWeight());
+ return no_weight;
+ }
+
+ static const string &Type() {
+ static const string type = "expectation_" + X1::Type() + "_" + X2::Type();
+ return type;
+ }
+
+ PairWeight<X1, X2> Quantize(float delta = kDelta) const {
+ return PairWeight<X1, X2>::Quantize();
+ }
+
+ ReverseWeight Reverse() const {
+ return PairWeight<X1, X2>::Reverse();
+ }
+
+ bool Member() const {
+ return PairWeight<X1, X2>::Member();
+ }
+
+ static uint64 Properties() {
+ uint64 props1 = W1::Properties();
+ uint64 props2 = W2::Properties();
+ return props1 & props2 & (kLeftSemiring | kRightSemiring |
+ kCommutative | kIdempotent);
+ }
+};
+
+template <class X1, class X2>
+inline ExpectationWeight<X1, X2> Plus(const ExpectationWeight<X1, X2> &w,
+ const ExpectationWeight<X1, X2> &v) {
+ return ExpectationWeight<X1, X2>(Plus(w.Value1(), v.Value1()),
+ Plus(w.Value2(), v.Value2()));
+}
+
+
+template <class X1, class X2>
+inline ExpectationWeight<X1, X2> Times(const ExpectationWeight<X1, X2> &w,
+ const ExpectationWeight<X1, X2> &v) {
+ return ExpectationWeight<X1, X2>(Times(w.Value1(), v.Value1()),
+ Plus(Times(w.Value1(), v.Value2()),
+ Times(w.Value2(), v.Value1())));
+}
+
+template <class X1, class X2>
+inline ExpectationWeight<X1, X2> Divide(const ExpectationWeight<X1, X2> &w,
+ const ExpectationWeight<X1, X2> &v,
+ DivideType typ = DIVIDE_ANY) {
+ FSTERROR() << "ExpectationWeight::Divide: not implemented";
+ return ExpectationWeight<X1, X2>::NoWeight();
+}
+
+} // namespace fst
+
+#endif // FST_LIB_EXPECTATION_WEIGHT_H_
diff --git a/src/include/fst/extensions/far/compile-strings.h b/src/include/fst/extensions/far/compile-strings.h
new file mode 100644
index 0000000..d7f4d6b
--- /dev/null
+++ b/src/include/fst/extensions/far/compile-strings.h
@@ -0,0 +1,271 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Authors: allauzen@google.com (Cyril Allauzen)
+// ttai@google.com (Terry Tai)
+// jpr@google.com (Jake Ratkiewicz)
+
+
+#ifndef FST_EXTENSIONS_FAR_COMPILE_STRINGS_H_
+#define FST_EXTENSIONS_FAR_COMPILE_STRINGS_H_
+
+#include <libgen.h>
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/extensions/far/far.h>
+#include <fst/string.h>
+
+namespace fst {
+
+// Construct a reader that provides FSTs from a file (stream) either on a
+// line-by-line basis or on a per-stream basis. Note that the freshly
+// constructed reader is already set to the first input.
+//
+// Sample Usage:
+// for (StringReader<Arc> reader(...); !reader.Done(); reader.Next()) {
+// Fst *fst = reader.GetVectorFst();
+// }
+template <class A>
+class StringReader {
+ public:
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename StringCompiler<A>::TokenType TokenType;
+
+ enum EntryType { LINE = 1, FILE = 2 };
+
+ StringReader(istream &istrm,
+ const string &source,
+ EntryType entry_type,
+ TokenType token_type,
+ bool allow_negative_labels,
+ const SymbolTable *syms = 0,
+ Label unknown_label = kNoStateId)
+ : nline_(0), strm_(istrm), source_(source), entry_type_(entry_type),
+ token_type_(token_type), done_(false),
+ compiler_(token_type, syms, unknown_label, allow_negative_labels) {
+ Next(); // Initialize the reader to the first input.
+ }
+
+ bool Done() {
+ return done_;
+ }
+
+ void Next() {
+ VLOG(1) << "Processing source " << source_ << " at line " << nline_;
+ if (!strm_) { // We're done if we have no more input.
+ done_ = true;
+ return;
+ }
+ if (entry_type_ == LINE) {
+ getline(strm_, content_);
+ ++nline_;
+ } else {
+ content_.clear();
+ string line;
+ while (getline(strm_, line)) {
+ ++nline_;
+ content_.append(line);
+ content_.append("\n");
+ }
+ }
+ if (!strm_ && content_.empty()) // We're also done if we read off all the
+ done_ = true; // whitespace at the end of a file.
+ }
+
+ VectorFst<A> *GetVectorFst() {
+ VectorFst<A> *fst = new VectorFst<A>;
+ if (compiler_(content_, fst)) {
+ return fst;
+ } else {
+ delete fst;
+ return NULL;
+ }
+ }
+
+ CompactFst<A, StringCompactor<A> > *GetCompactFst() {
+ CompactFst<A, StringCompactor<A> > *fst =
+ new CompactFst<A, StringCompactor<A> >;
+ if (compiler_(content_, fst)) {
+ return fst;
+ } else {
+ delete fst;
+ return NULL;
+ }
+ }
+
+ private:
+ size_t nline_;
+ istream &strm_;
+ string source_;
+ EntryType entry_type_;
+ TokenType token_type_;
+ bool done_;
+ StringCompiler<A> compiler_;
+ string content_; // The actual content of the input stream's next FST.
+
+ DISALLOW_COPY_AND_ASSIGN(StringReader);
+};
+
+// Compute the minimal length required to encode each line number as a decimal
+// number.
+int KeySize(const char *filename);
+
+template <class Arc>
+void FarCompileStrings(const vector<string> &in_fnames,
+ const string &out_fname,
+ const string &fst_type,
+ const FarType &far_type,
+ int32 generate_keys,
+ FarEntryType fet,
+ FarTokenType tt,
+ const string &symbols_fname,
+ const string &unknown_symbol,
+ bool allow_negative_labels,
+ bool file_list_input,
+ const string &key_prefix,
+ const string &key_suffix) {
+ typename StringReader<Arc>::EntryType entry_type;
+ if (fet == FET_LINE) {
+ entry_type = StringReader<Arc>::LINE;
+ } else if (fet == FET_FILE) {
+ entry_type = StringReader<Arc>::FILE;
+ } else {
+ FSTERROR() << "FarCompileStrings: unknown entry type";
+ return;
+ }
+
+ typename StringCompiler<Arc>::TokenType token_type;
+ if (tt == FTT_SYMBOL) {
+ token_type = StringCompiler<Arc>::SYMBOL;
+ } else if (tt == FTT_BYTE) {
+ token_type = StringCompiler<Arc>::BYTE;
+ } else if (tt == FTT_UTF8) {
+ token_type = StringCompiler<Arc>::UTF8;
+ } else {
+ FSTERROR() << "FarCompileStrings: unknown token type";
+ return;
+ }
+
+ bool compact;
+ if (fst_type.empty() || (fst_type == "vector")) {
+ compact = false;
+ } else if (fst_type == "compact") {
+ compact = true;
+ } else {
+ FSTERROR() << "FarCompileStrings: unknown fst type: "
+ << fst_type;
+ return;
+ }
+
+ const SymbolTable *syms = 0;
+ typename Arc::Label unknown_label = kNoLabel;
+ if (!symbols_fname.empty()) {
+ syms = SymbolTable::ReadText(symbols_fname,
+ allow_negative_labels);
+ if (!syms) {
+ FSTERROR() << "FarCompileStrings: error reading symbol table: "
+ << symbols_fname;
+ return;
+ }
+ if (!unknown_symbol.empty()) {
+ unknown_label = syms->Find(unknown_symbol);
+ if (unknown_label == kNoLabel) {
+ FSTERROR() << "FarCompileStrings: unknown label \"" << unknown_label
+ << "\" missing from symbol table: " << symbols_fname;
+ return;
+ }
+ }
+ }
+
+ FarWriter<Arc> *far_writer =
+ FarWriter<Arc>::Create(out_fname, far_type);
+ if (!far_writer) return;
+
+ vector<string> inputs;
+ if (file_list_input) {
+ for (int i = 1; i < in_fnames.size(); ++i) {
+ ifstream istrm(in_fnames[i].c_str());
+ string str;
+ while (getline(istrm, str))
+ inputs.push_back(str);
+ }
+ } else {
+ inputs = in_fnames;
+ }
+
+ for (int i = 0, n = 0; i < inputs.size(); ++i) {
+ int key_size = generate_keys ? generate_keys :
+ (entry_type == StringReader<Arc>::FILE ? 1 :
+ KeySize(inputs[i].c_str()));
+ ifstream istrm(inputs[i].c_str());
+
+ for (StringReader<Arc> reader(
+ istrm, inputs[i], entry_type, token_type,
+ allow_negative_labels, syms, unknown_label);
+ !reader.Done();
+ reader.Next()) {
+ ++n;
+ const Fst<Arc> *fst;
+ if (compact)
+ fst = reader.GetCompactFst();
+ else
+ fst = reader.GetVectorFst();
+ if (!fst) {
+ FSTERROR() << "FarCompileStrings: compiling string number " << n
+ << " in file " << inputs[i] << " failed with token_type = "
+ << (tt == FTT_BYTE ? "byte" :
+ (tt == FTT_UTF8 ? "utf8" :
+ (tt == FTT_SYMBOL ? "symbol" : "unknown")))
+ << " and entry_type = "
+ << (fet == FET_LINE ? "line" :
+ (fet == FET_FILE ? "file" : "unknown"));
+ delete far_writer;
+ delete syms;
+ return;
+ }
+ ostringstream keybuf;
+ keybuf.width(key_size);
+ keybuf.fill('0');
+ keybuf << n;
+ string key;
+ if (generate_keys > 0) {
+ key = keybuf.str();
+ } else {
+ char* filename = new char[inputs[i].size() + 1];
+ strcpy(filename, inputs[i].c_str());
+ key = basename(filename);
+ if (entry_type != StringReader<Arc>::FILE) {
+ key += "-";
+ key += keybuf.str();
+ }
+ delete[] filename;
+ }
+ far_writer->Add(key_prefix + key + key_suffix, *fst);
+ delete fst;
+ }
+ if (generate_keys == 0)
+ n = 0;
+ }
+
+ delete far_writer;
+}
+
+} // namespace fst
+
+
+#endif // FST_EXTENSIONS_FAR_COMPILE_STRINGS_H_
diff --git a/src/include/fst/extensions/far/create.h b/src/include/fst/extensions/far/create.h
new file mode 100644
index 0000000..edb31e7
--- /dev/null
+++ b/src/include/fst/extensions/far/create.h
@@ -0,0 +1,87 @@
+// create-main.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use new dispatch
+//
+// \file
+// Creates a finite-state archive from component FSTs. Includes
+// helper function for farcreate.cc that templates the main on the arc
+// type to support multiple and extensible arc types.
+//
+
+#ifndef FST_EXTENSIONS_FAR_CREATE_H__
+#define FST_EXTENSIONS_FAR_CREATE_H__
+
+#include <libgen.h>
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/extensions/far/far.h>
+
+namespace fst {
+
+template <class Arc>
+void FarCreate(const vector<string> &in_fnames,
+ const string &out_fname,
+ const int32 generate_keys,
+ const bool file_list_input,
+ const FarType &far_type,
+ const string &key_prefix,
+ const string &key_suffix) {
+ FarWriter<Arc> *far_writer =
+ FarWriter<Arc>::Create(out_fname, far_type);
+ if (!far_writer) return;
+
+ vector<string> inputs;
+ if (file_list_input) {
+ for (int i = 1; i < in_fnames.size(); ++i) {
+ ifstream istrm(in_fnames[i].c_str());
+ string str;
+ while (getline(istrm, str))
+ inputs.push_back(str);
+ }
+ } else {
+ inputs = in_fnames;
+ }
+
+ for (int i = 0; i < inputs.size(); ++i) {
+ Fst<Arc> *ifst = Fst<Arc>::Read(inputs[i]);
+ if (!ifst) return;
+ string key;
+ if (generate_keys > 0) {
+ ostringstream keybuf;
+ keybuf.width(generate_keys);
+ keybuf.fill('0');
+ keybuf << i + 1;
+ key = keybuf.str();
+ } else {
+ char* filename = new char[inputs[i].size() + 1];
+ strcpy(filename, inputs[i].c_str());
+ key = basename(filename);
+ delete[] filename;
+ }
+
+ far_writer->Add(key_prefix + key + key_suffix, *ifst);
+ delete ifst;
+ }
+
+ delete far_writer;
+}
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_FAR_CREATE_H__
diff --git a/src/include/fst/extensions/far/extract.h b/src/include/fst/extensions/far/extract.h
new file mode 100644
index 0000000..022ca60
--- /dev/null
+++ b/src/include/fst/extensions/far/extract.h
@@ -0,0 +1,85 @@
+// extract-main.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to use the new arc-dispatch
+
+// \file
+// Extracts component FSTs from an finite-state archive.
+//
+
+#ifndef FST_EXTENSIONS_FAR_EXTRACT_H__
+#define FST_EXTENSIONS_FAR_EXTRACT_H__
+
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/extensions/far/far.h>
+
+namespace fst {
+
+template<class Arc>
+void FarExtract(const vector<string> &ifilenames,
+ const int32 &generate_filenames,
+ const string &begin_key,
+ const string &end_key,
+ const string &filename_prefix,
+ const string &filename_suffix) {
+ FarReader<Arc> *far_reader = FarReader<Arc>::Open(ifilenames);
+ if (!far_reader) return;
+
+ if (!begin_key.empty())
+ far_reader->Find(begin_key);
+
+ string okey;
+ int nrep = 0;
+ for (int i = 1; !far_reader->Done(); far_reader->Next(), ++i) {
+ string key = far_reader->GetKey();
+ if (!end_key.empty() && end_key < key)
+ break;
+ const Fst<Arc> &fst = far_reader->GetFst();
+
+ if (key == okey)
+ ++nrep;
+ else
+ nrep = 0;
+
+ okey = key;
+
+ string ofilename;
+ if (generate_filenames) {
+ ostringstream tmp;
+ tmp.width(generate_filenames);
+ tmp.fill('0');
+ tmp << i;
+ ofilename = tmp.str();
+ } else {
+ if (nrep > 0) {
+ ostringstream tmp;
+ tmp << '.' << nrep;
+ key += tmp.str();
+ }
+ ofilename = key;
+ }
+ fst.Write(filename_prefix + ofilename + filename_suffix);
+ }
+
+ return;
+}
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_FAR_EXTRACT_H__
diff --git a/src/include/fst/extensions/far/far.h b/src/include/fst/extensions/far/far.h
new file mode 100644
index 0000000..82b9e5c
--- /dev/null
+++ b/src/include/fst/extensions/far/far.h
@@ -0,0 +1,360 @@
+// far.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Finite-State Transducer (FST) archive classes.
+//
+
+#ifndef FST_EXTENSIONS_FAR_FAR_H__
+#define FST_EXTENSIONS_FAR_FAR_H__
+
+#include <fst/extensions/far/stlist.h>
+#include <fst/extensions/far/sttable.h>
+#include <fst/fst.h>
+#include <fst/vector-fst.h>
+
+namespace fst {
+
+enum FarEntryType { FET_LINE, FET_FILE };
+enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 };
+
+// FST archive header class
+class FarHeader {
+ public:
+ const string &FarType() const { return fartype_; }
+ const string &ArcType() const { return arctype_; }
+
+ bool Read(const string &filename) {
+ FstHeader fsthdr;
+ if (filename.empty()) { // Header reading unsupported on stdin.
+ return false;
+ } else if (IsSTTable(filename)) { // Check if STTable
+ ReadSTTableHeader(filename, &fsthdr);
+ fartype_ = "sttable";
+ arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
+ return true;
+ } else if (IsSTList(filename)) { // Check if STList
+ ReadSTListHeader(filename, &fsthdr);
+ fartype_ = "sttable";
+ arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
+ return true;
+ }
+ return false;
+ }
+
+ private:
+ string fartype_;
+ string arctype_;
+};
+
+enum FarType { FAR_DEFAULT = 0, FAR_STTABLE = 1, FAR_STLIST = 2,
+ FAR_SSTABLE = 3 };
+
+// This class creates an archive of FSTs.
+template <class A>
+class FarWriter {
+ public:
+ typedef A Arc;
+
+ // Creates a new (empty) FST archive; returns NULL on error.
+ static FarWriter *Create(const string &filename, FarType type = FAR_DEFAULT);
+
+ // Adds an FST to the end of an archive. Keys must be non-empty and
+ // in lexicographic order. FSTs must have a suitable write method.
+ virtual void Add(const string &key, const Fst<A> &fst) = 0;
+
+ virtual FarType Type() const = 0;
+
+ virtual bool Error() const = 0;
+
+ virtual ~FarWriter() {}
+
+ protected:
+ FarWriter() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(FarWriter);
+};
+
+
+// This class iterates through an existing archive of FSTs.
+template <class A>
+class FarReader {
+ public:
+ typedef A Arc;
+
+ // Opens an existing FST archive in a single file; returns NULL on error.
+ // Sets current position to the beginning of the achive.
+ static FarReader *Open(const string &filename);
+
+ // Opens an existing FST archive in multiple files; returns NULL on error.
+ // Sets current position to the beginning of the achive.
+ static FarReader *Open(const vector<string> &filenames);
+
+ // Resets current posision to beginning of archive.
+ virtual void Reset() = 0;
+
+ // Sets current position to first entry >= key. Returns true if a match.
+ virtual bool Find(const string &key) = 0;
+
+ // Current position at end of archive?
+ virtual bool Done() const = 0;
+
+ // Move current position to next FST.
+ virtual void Next() = 0;
+
+ // Returns key at the current position. This reference is invalidated if
+ // the current position in the archive is changed.
+ virtual const string &GetKey() const = 0;
+
+ // Returns FST at the current position. This reference is invalidated if
+ // the current position in the archive is changed.
+ virtual const Fst<A> &GetFst() const = 0;
+
+ virtual FarType Type() const = 0;
+
+ virtual bool Error() const = 0;
+
+ virtual ~FarReader() {}
+
+ protected:
+ FarReader() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(FarReader);
+};
+
+
+template <class A>
+class FstWriter {
+ public:
+ void operator()(ostream &strm, const Fst<A> &fst) const {
+ fst.Write(strm, FstWriteOptions());
+ }
+};
+
+
+template <class A>
+class STTableFarWriter : public FarWriter<A> {
+ public:
+ typedef A Arc;
+
+ static STTableFarWriter *Create(const string filename) {
+ STTableWriter<Fst<A>, FstWriter<A> > *writer =
+ STTableWriter<Fst<A>, FstWriter<A> >::Create(filename);
+ return new STTableFarWriter(writer);
+ }
+
+ void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
+
+ FarType Type() const { return FAR_STTABLE; }
+
+ bool Error() const { return writer_->Error(); }
+
+ ~STTableFarWriter() { delete writer_; }
+
+ private:
+ explicit STTableFarWriter(STTableWriter<Fst<A>, FstWriter<A> > *writer)
+ : writer_(writer) {}
+
+ private:
+ STTableWriter<Fst<A>, FstWriter<A> > *writer_;
+
+ DISALLOW_COPY_AND_ASSIGN(STTableFarWriter);
+};
+
+
+template <class A>
+class STListFarWriter : public FarWriter<A> {
+ public:
+ typedef A Arc;
+
+ static STListFarWriter *Create(const string filename) {
+ STListWriter<Fst<A>, FstWriter<A> > *writer =
+ STListWriter<Fst<A>, FstWriter<A> >::Create(filename);
+ return new STListFarWriter(writer);
+ }
+
+ void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
+
+ FarType Type() const { return FAR_STLIST; }
+
+ bool Error() const { return writer_->Error(); }
+
+ ~STListFarWriter() { delete writer_; }
+
+ private:
+ explicit STListFarWriter(STListWriter<Fst<A>, FstWriter<A> > *writer)
+ : writer_(writer) {}
+
+ private:
+ STListWriter<Fst<A>, FstWriter<A> > *writer_;
+
+ DISALLOW_COPY_AND_ASSIGN(STListFarWriter);
+};
+
+
+template <class A>
+FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) {
+ switch(type) {
+ case FAR_DEFAULT:
+ if (filename.empty())
+ return STListFarWriter<A>::Create(filename);
+ case FAR_STTABLE:
+ return STTableFarWriter<A>::Create(filename);
+ break;
+ case FAR_STLIST:
+ return STListFarWriter<A>::Create(filename);
+ break;
+ default:
+ LOG(ERROR) << "FarWriter::Create: unknown far type";
+ return 0;
+ }
+}
+
+
+template <class A>
+class FstReader {
+ public:
+ Fst<A> *operator()(istream &strm) const {
+ return Fst<A>::Read(strm, FstReadOptions());
+ }
+};
+
+
+template <class A>
+class STTableFarReader : public FarReader<A> {
+ public:
+ typedef A Arc;
+
+ static STTableFarReader *Open(const string &filename) {
+ STTableReader<Fst<A>, FstReader<A> > *reader =
+ STTableReader<Fst<A>, FstReader<A> >::Open(filename);
+ // TODO: error check
+ return new STTableFarReader(reader);
+ }
+
+ static STTableFarReader *Open(const vector<string> &filenames) {
+ STTableReader<Fst<A>, FstReader<A> > *reader =
+ STTableReader<Fst<A>, FstReader<A> >::Open(filenames);
+ // TODO: error check
+ return new STTableFarReader(reader);
+ }
+
+ void Reset() { reader_->Reset(); }
+
+ bool Find(const string &key) { return reader_->Find(key); }
+
+ bool Done() const { return reader_->Done(); }
+
+ void Next() { return reader_->Next(); }
+
+ const string &GetKey() const { return reader_->GetKey(); }
+
+ const Fst<A> &GetFst() const { return reader_->GetEntry(); }
+
+ FarType Type() const { return FAR_STTABLE; }
+
+ bool Error() const { return reader_->Error(); }
+
+ ~STTableFarReader() { delete reader_; }
+
+ private:
+ explicit STTableFarReader(STTableReader<Fst<A>, FstReader<A> > *reader)
+ : reader_(reader) {}
+
+ private:
+ STTableReader<Fst<A>, FstReader<A> > *reader_;
+
+ DISALLOW_COPY_AND_ASSIGN(STTableFarReader);
+};
+
+
+template <class A>
+class STListFarReader : public FarReader<A> {
+ public:
+ typedef A Arc;
+
+ static STListFarReader *Open(const string &filename) {
+ STListReader<Fst<A>, FstReader<A> > *reader =
+ STListReader<Fst<A>, FstReader<A> >::Open(filename);
+ // TODO: error check
+ return new STListFarReader(reader);
+ }
+
+ static STListFarReader *Open(const vector<string> &filenames) {
+ STListReader<Fst<A>, FstReader<A> > *reader =
+ STListReader<Fst<A>, FstReader<A> >::Open(filenames);
+ // TODO: error check
+ return new STListFarReader(reader);
+ }
+
+ void Reset() { reader_->Reset(); }
+
+ bool Find(const string &key) { return reader_->Find(key); }
+
+ bool Done() const { return reader_->Done(); }
+
+ void Next() { return reader_->Next(); }
+
+ const string &GetKey() const { return reader_->GetKey(); }
+
+ const Fst<A> &GetFst() const { return reader_->GetEntry(); }
+
+ FarType Type() const { return FAR_STLIST; }
+
+ bool Error() const { return reader_->Error(); }
+
+ ~STListFarReader() { delete reader_; }
+
+ private:
+ explicit STListFarReader(STListReader<Fst<A>, FstReader<A> > *reader)
+ : reader_(reader) {}
+
+ private:
+ STListReader<Fst<A>, FstReader<A> > *reader_;
+
+ DISALLOW_COPY_AND_ASSIGN(STListFarReader);
+};
+
+
+template <class A>
+FarReader<A> *FarReader<A>::Open(const string &filename) {
+ if (filename.empty())
+ return STListFarReader<A>::Open(filename);
+ else if (IsSTTable(filename))
+ return STTableFarReader<A>::Open(filename);
+ else if (IsSTList(filename))
+ return STListFarReader<A>::Open(filename);
+ return 0;
+}
+
+
+template <class A>
+FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) {
+ if (!filenames.empty() && filenames[0].empty())
+ return STListFarReader<A>::Open(filenames);
+ else if (!filenames.empty() && IsSTTable(filenames[0]))
+ return STTableFarReader<A>::Open(filenames);
+ else if (!filenames.empty() && IsSTList(filenames[0]))
+ return STListFarReader<A>::Open(filenames);
+ return 0;
+}
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_FAR_FAR_H__
diff --git a/src/include/fst/extensions/far/farlib.h b/src/include/fst/extensions/far/farlib.h
new file mode 100644
index 0000000..91ba224
--- /dev/null
+++ b/src/include/fst/extensions/far/farlib.h
@@ -0,0 +1,31 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+// A finite-state archive (FAR) is used to store an indexable collection of
+// FSTs in a single file. Utilities are provided to create FARs from FSTs,
+// to iterate over FARs, and to extract specific FSTs from FARs.
+
+#ifndef FST_EXTENSIONS_FAR_FARLIB_H_
+#define FST_EXTENSIONS_FAR_FARLIB_H_
+
+#include <fst/extensions/far/far.h>
+#include <fst/extensions/far/compile-strings.h>
+#include <fst/extensions/far/create.h>
+#include <fst/extensions/far/extract.h>
+#include <fst/extensions/far/info.h>
+#include <fst/extensions/far/print-strings.h>
+
+#endif // FST_EXTENSIONS_FAR_FARLIB_H_
diff --git a/src/include/fst/extensions/far/farscript.h b/src/include/fst/extensions/far/farscript.h
new file mode 100644
index 0000000..9c3b1ca
--- /dev/null
+++ b/src/include/fst/extensions/far/farscript.h
@@ -0,0 +1,234 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+// Convenience file for including all of the FAR operations,
+// or registering them for new arc types.
+
+#ifndef FST_EXTENSIONS_FAR_FARSCRIPT_H_
+#define FST_EXTENSIONS_FAR_FARSCRIPT_H_
+
+#include <vector>
+using std::vector;
+#include <string>
+
+#include <fst/script/arg-packs.h>
+#include <fst/extensions/far/compile-strings.h>
+#include <fst/extensions/far/create.h>
+#include <fst/extensions/far/extract.h>
+#include <fst/extensions/far/info.h>
+#include <fst/extensions/far/print-strings.h>
+#include <fst/extensions/far/far.h>
+
+#include <fst/types.h>
+
+namespace fst {
+namespace script {
+
+// Note: it is safe to pass these strings as references because
+// this struct is only used to pass them deeper in the call graph.
+// Be sure you understand why this is so before using this struct
+// for anything else!
+struct FarCompileStringsArgs {
+ const vector<string> &in_fnames;
+ const string &out_fname;
+ const string &fst_type;
+ const FarType &far_type;
+ const int32 generate_keys;
+ const FarEntryType fet;
+ const FarTokenType tt;
+ const string &symbols_fname;
+ const string &unknown_symbol;
+ const bool allow_negative_labels;
+ const bool file_list_input;
+ const string &key_prefix;
+ const string &key_suffix;
+
+ FarCompileStringsArgs(const vector<string> &in_fnames,
+ const string &out_fname,
+ const string &fst_type,
+ const FarType &far_type,
+ int32 generate_keys,
+ FarEntryType fet,
+ FarTokenType tt,
+ const string &symbols_fname,
+ const string &unknown_symbol,
+ bool allow_negative_labels,
+ bool file_list_input,
+ const string &key_prefix,
+ const string &key_suffix) :
+ in_fnames(in_fnames), out_fname(out_fname), fst_type(fst_type),
+ far_type(far_type), generate_keys(generate_keys), fet(fet),
+ tt(tt), symbols_fname(symbols_fname), unknown_symbol(unknown_symbol),
+ allow_negative_labels(allow_negative_labels),
+ file_list_input(file_list_input), key_prefix(key_prefix),
+ key_suffix(key_suffix) { }
+};
+
+template <class Arc>
+void FarCompileStrings(FarCompileStringsArgs *args) {
+ fst::FarCompileStrings<Arc>(
+ args->in_fnames, args->out_fname, args->fst_type, args->far_type,
+ args->generate_keys, args->fet, args->tt, args->symbols_fname,
+ args->unknown_symbol, args->allow_negative_labels, args->file_list_input,
+ args->key_prefix, args->key_suffix);
+}
+
+void FarCompileStrings(
+ const vector<string> &in_fnames,
+ const string &out_fname,
+ const string &arc_type,
+ const string &fst_type,
+ const FarType &far_type,
+ int32 generate_keys,
+ FarEntryType fet,
+ FarTokenType tt,
+ const string &symbols_fname,
+ const string &unknown_symbol,
+ bool allow_negative_labels,
+ bool file_list_input,
+ const string &key_prefix,
+ const string &key_suffix);
+
+
+// Note: it is safe to pass these strings as references because
+// this struct is only used to pass them deeper in the call graph.
+// Be sure you understand why this is so before using this struct
+// for anything else!
+struct FarCreateArgs {
+ const vector<string> &in_fnames;
+ const string &out_fname;
+ const int32 generate_keys;
+ const bool file_list_input;
+ const FarType &far_type;
+ const string &key_prefix;
+ const string &key_suffix;
+
+ FarCreateArgs(
+ const vector<string> &in_fnames, const string &out_fname,
+ const int32 generate_keys, const bool file_list_input,
+ const FarType &far_type, const string &key_prefix,
+ const string &key_suffix)
+ : in_fnames(in_fnames), out_fname(out_fname),
+ generate_keys(generate_keys), file_list_input(file_list_input),
+ far_type(far_type), key_prefix(key_prefix), key_suffix(key_suffix) { }
+};
+
+template<class Arc>
+void FarCreate(FarCreateArgs *args) {
+ fst::FarCreate<Arc>(args->in_fnames, args->out_fname, args->generate_keys,
+ args->file_list_input, args->far_type,
+ args->key_prefix, args->key_suffix);
+}
+
+void FarCreate(const vector<string> &in_fnames,
+ const string &out_fname,
+ const string &arc_type,
+ const int32 generate_keys,
+ const bool file_list_input,
+ const FarType &far_type,
+ const string &key_prefix,
+ const string &key_suffix);
+
+
+typedef args::Package<const vector<string> &, int32,
+ const string&, const string&, const string&,
+ const string&> FarExtractArgs;
+
+template<class Arc>
+void FarExtract(FarExtractArgs *args) {
+ fst::FarExtract<Arc>(
+ args->arg1, args->arg2, args->arg3, args->arg4, args->arg5, args->arg6);
+}
+
+void FarExtract(const vector<string> &ifilenames,
+ const string &arc_type,
+ int32 generate_filenames, const string &begin_key,
+ const string &end_key, const string &filename_prefix,
+ const string &filename_suffix);
+
+typedef args::Package<const vector<string> &, const string &,
+ const string &, const bool> FarInfoArgs;
+
+template <class Arc>
+void FarInfo(FarInfoArgs *args) {
+ fst::FarInfo<Arc>(args->arg1, args->arg2, args->arg3, args->arg4);
+}
+
+void FarInfo(const vector<string> &filenames,
+ const string &arc_type,
+ const string &begin_key,
+ const string &end_key,
+ const bool list_fsts);
+
+struct FarPrintStringsArgs {
+ const vector<string> &ifilenames;
+ const FarEntryType entry_type;
+ const FarTokenType token_type;
+ const string &begin_key;
+ const string &end_key;
+ const bool print_key;
+ const string &symbols_fname;
+ const int32 generate_filenames;
+ const string &filename_prefix;
+ const string &filename_suffix;
+
+ FarPrintStringsArgs(
+ const vector<string> &ifilenames, const FarEntryType entry_type,
+ const FarTokenType token_type, const string &begin_key,
+ const string &end_key, const bool print_key,
+ const string &symbols_fname, const int32 generate_filenames,
+ const string &filename_prefix, const string &filename_suffix) :
+ ifilenames(ifilenames), entry_type(entry_type), token_type(token_type),
+ begin_key(begin_key), end_key(end_key), print_key(print_key),
+ symbols_fname(symbols_fname),
+ generate_filenames(generate_filenames), filename_prefix(filename_prefix),
+ filename_suffix(filename_suffix) { }
+};
+
+template <class Arc>
+void FarPrintStrings(FarPrintStringsArgs *args) {
+ fst::FarPrintStrings<Arc>(
+ args->ifilenames, args->entry_type, args->token_type,
+ args->begin_key, args->end_key, args->print_key,
+ args->symbols_fname, args->generate_filenames, args->filename_prefix,
+ args->filename_suffix);
+}
+
+
+void FarPrintStrings(const vector<string> &ifilenames,
+ const string &arc_type,
+ const FarEntryType entry_type,
+ const FarTokenType token_type,
+ const string &begin_key,
+ const string &end_key,
+ const bool print_key,
+ const string &symbols_fname,
+ const int32 generate_filenames,
+ const string &filename_prefix,
+ const string &filename_suffix);
+
+} // namespace script
+} // namespace fst
+
+
+#define REGISTER_FST_FAR_OPERATIONS(ArcType) \
+ REGISTER_FST_OPERATION(FarCompileStrings, ArcType, FarCompileStringsArgs); \
+ REGISTER_FST_OPERATION(FarCreate, ArcType, FarCreateArgs); \
+ REGISTER_FST_OPERATION(FarExtract, ArcType, FarExtractArgs); \
+ REGISTER_FST_OPERATION(FarInfo, ArcType, FarInfoArgs); \
+ REGISTER_FST_OPERATION(FarPrintStrings, ArcType, FarPrintStringsArgs)
+
+#endif // FST_EXTENSIONS_FAR_FARSCRIPT_H_
diff --git a/src/include/fst/extensions/far/info.h b/src/include/fst/extensions/far/info.h
new file mode 100644
index 0000000..f010546
--- /dev/null
+++ b/src/include/fst/extensions/far/info.h
@@ -0,0 +1,128 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+// Modified: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_EXTENSIONS_FAR_INFO_H_
+#define FST_EXTENSIONS_FAR_INFO_H_
+
+#include <iomanip>
+#include <set>
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/extensions/far/far.h>
+#include <fst/extensions/far/main.h> // For FarTypeToString
+
+namespace fst {
+
+template <class Arc>
+void CountStatesAndArcs(const Fst<Arc> &fst, size_t *nstate, size_t *narc) {
+ StateIterator<Fst<Arc> > siter(fst);
+ for (; !siter.Done(); siter.Next(), ++(*nstate)) {
+ ArcIterator<Fst<Arc> > aiter(fst, siter.Value());
+ for (; !aiter.Done(); aiter.Next(), ++(*narc));
+ }
+}
+
+struct KeyInfo {
+ string key;
+ string type;
+ size_t nstate;
+ size_t narc;
+
+ KeyInfo(string k, string t, int64 ns = 0, int64 na = 0)
+ : key(k), type(t), nstate(ns), narc(na) {}
+};
+
+template <class Arc>
+void FarInfo(const vector<string> &filenames, const string &begin_key,
+ const string &end_key, const bool list_fsts) {
+ FarReader<Arc> *far_reader = FarReader<Arc>::Open(filenames);
+ if (!far_reader) return;
+
+ if (!begin_key.empty())
+ far_reader->Find(begin_key);
+
+ vector<KeyInfo> *infos = list_fsts ? new vector<KeyInfo>() : 0;
+ size_t nfst = 0, nstate = 0, narc = 0;
+ set<string> fst_types;
+ for (; !far_reader->Done(); far_reader->Next()) {
+ string key = far_reader->GetKey();
+ if (!end_key.empty() && end_key < key)
+ break;
+ ++nfst;
+ const Fst<Arc> &fst = far_reader->GetFst();
+ fst_types.insert(fst.Type());
+ if (infos) {
+ KeyInfo info(key, fst.Type());
+ CountStatesAndArcs(fst, &info.nstate, &info.narc);
+ nstate += info.nstate;
+ nstate += info.narc;
+ infos->push_back(info);
+ } else {
+ CountStatesAndArcs(fst, &nstate, &narc);
+ }
+ }
+
+ if (!infos) {
+ cout << std::left << setw(50) << "far type"
+ << FarTypeToString(far_reader->Type()) << endl;
+ cout << std::left << setw(50) << "arc type" << Arc::Type() << endl;
+ cout << std::left << setw(50) << "fst type";
+ for (set<string>::const_iterator iter = fst_types.begin();
+ iter != fst_types.end();
+ ++iter) {
+ if (iter != fst_types.begin())
+ cout << ",";
+ cout << *iter;
+ }
+ cout << endl;
+ cout << std::left << setw(50) << "# of FSTs" << nfst << endl;
+ cout << std::left << setw(50) << "total # of states" << nstate << endl;
+ cout << std::left << setw(50) << "total # of arcs" << narc << endl;
+
+ } else {
+ int wkey = 10, wtype = 10, wnstate = 16, wnarc = 16;
+ for (size_t i = 0; i < infos->size(); ++i) {
+ const KeyInfo &info = (*infos)[i];
+ if (info.key.size() + 2 > wkey)
+ wkey = info.key.size() + 2;
+ if (info.type.size() + 2 > wtype)
+ wtype = info.type.size() + 2;
+ if (ceil(log10(info.nstate)) + 2 > wnstate)
+ wnstate = ceil(log10(info.nstate)) + 2;
+ if (ceil(log10(info.narc)) + 2 > wnarc)
+ wnarc = ceil(log10(info.narc)) + 2;
+ }
+
+ cout << std::left << setw(wkey) << "key" << setw(wtype) << "type"
+ << std::right << setw(wnstate) << "# of states"
+ << setw(wnarc) << "# of arcs" << endl;
+
+ for (size_t i = 0; i < infos->size(); ++i) {
+ const KeyInfo &info = (*infos)[i];
+ cout << std::left << setw(wkey) << info.key << setw(wtype) << info.type
+ << std::right << setw(wnstate) << info.nstate
+ << setw(wnarc) << info.narc << endl;
+ }
+ }
+}
+
+} // namespace fst
+
+
+#endif // FST_EXTENSIONS_FAR_INFO_H_
diff --git a/src/include/fst/extensions/far/main.h b/src/include/fst/extensions/far/main.h
new file mode 100644
index 0000000..00ccfef
--- /dev/null
+++ b/src/include/fst/extensions/far/main.h
@@ -0,0 +1,43 @@
+// main.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Classes and functions for registering and invoking Far main
+// functions that support multiple and extensible arc types.
+
+#ifndef FST_EXTENSIONS_FAR_MAIN_H__
+#define FST_EXTENSIONS_FAR_MAIN_H__
+
+#include <fst/extensions/far/far.h>
+
+namespace fst {
+
+FarEntryType StringToFarEntryType(const string &s);
+FarTokenType StringToFarTokenType(const string &s);
+
+// Return the 'FarType' value corresponding to a far type name.
+FarType FarTypeFromString(const string &str);
+
+// Return the textual name corresponding to a 'FarType;.
+string FarTypeToString(FarType type);
+
+string LoadArcTypeFromFar(const string& far_fname);
+string LoadArcTypeFromFst(const string& far_fname);
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_FAR_MAIN_H__
diff --git a/src/include/fst/extensions/far/print-strings.h b/src/include/fst/extensions/far/print-strings.h
new file mode 100644
index 0000000..aff1e51
--- /dev/null
+++ b/src/include/fst/extensions/far/print-strings.h
@@ -0,0 +1,126 @@
+// printstrings-main.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+// Modified by: jpr@google.com (Jake Ratkiewicz)
+//
+// \file
+// Output as strings the string FSTs in a finite-state archive.
+
+#ifndef FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
+#define FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
+
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/extensions/far/far.h>
+#include <fst/string.h>
+
+namespace fst {
+
+template <class Arc>
+void FarPrintStrings(
+ const vector<string> &ifilenames, const FarEntryType entry_type,
+ const FarTokenType far_token_type, const string &begin_key,
+ const string &end_key, const bool print_key, const string &symbols_fname,
+ const int32 generate_filenames, const string &filename_prefix,
+ const string &filename_suffix) {
+
+ typename StringPrinter<Arc>::TokenType token_type;
+ if (far_token_type == FTT_SYMBOL) {
+ token_type = StringPrinter<Arc>::SYMBOL;
+ } else if (far_token_type == FTT_BYTE) {
+ token_type = StringPrinter<Arc>::BYTE;
+ } else if (far_token_type == FTT_UTF8) {
+ token_type = StringPrinter<Arc>::UTF8;
+ } else {
+ FSTERROR() << "FarPrintStrings: unknown token type";
+ return;
+ }
+
+ const SymbolTable *syms = 0;
+ if (!symbols_fname.empty()) {
+ // allow negative flag?
+ syms = SymbolTable::ReadText(symbols_fname, true);
+ if (!syms) {
+ FSTERROR() << "FarPrintStrings: error reading symbol table: "
+ << symbols_fname;
+ return;
+ }
+ }
+
+ StringPrinter<Arc> string_printer(token_type, syms);
+
+ FarReader<Arc> *far_reader = FarReader<Arc>::Open(ifilenames);
+ if (!far_reader) return;
+
+ if (!begin_key.empty())
+ far_reader->Find(begin_key);
+
+ string okey;
+ int nrep = 0;
+ for (int i = 1; !far_reader->Done(); far_reader->Next(), ++i) {
+ string key = far_reader->GetKey();
+ if (!end_key.empty() && end_key < key)
+ break;
+ if (okey == key)
+ ++nrep;
+ else
+ nrep = 0;
+ okey = key;
+
+ const Fst<Arc> &fst = far_reader->GetFst();
+ string str;
+ VLOG(2) << "Handling key: " << key;
+ string_printer(fst, &str);
+
+ if (entry_type == FET_LINE) {
+ if (print_key)
+ cout << key << "\t";
+ cout << str << endl;
+ } else if (entry_type == FET_FILE) {
+ stringstream sstrm;
+ if (generate_filenames) {
+ sstrm.fill('0');
+ sstrm << std::right << setw(generate_filenames) << i;
+ } else {
+ sstrm << key;
+ if (nrep > 0)
+ sstrm << "." << nrep;
+ }
+
+ string filename;
+ filename = filename_prefix + sstrm.str() + filename_suffix;
+
+ ofstream ostrm(filename.c_str());
+ if (!ostrm) {
+ FSTERROR() << "FarPrintStrings: Can't open file:" << filename;
+ delete syms;
+ delete far_reader;
+ return;
+ }
+ ostrm << str;
+ if (token_type == StringPrinter<Arc>::SYMBOL)
+ ostrm << "\n";
+ }
+ }
+}
+
+
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
diff --git a/src/include/fst/extensions/far/stlist.h b/src/include/fst/extensions/far/stlist.h
new file mode 100644
index 0000000..4738181
--- /dev/null
+++ b/src/include/fst/extensions/far/stlist.h
@@ -0,0 +1,304 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// A generic (string,type) list file format.
+//
+// This is a stripped-down version of STTable that does
+// not support the Find() operation but that does support
+// reading/writting from standard in/out.
+
+#ifndef FST_EXTENSIONS_FAR_STLIST_H_
+#define FST_EXTENSIONS_FAR_STLIST_H_
+
+#include <iostream>
+#include <fstream>
+#include <fst/util.h>
+
+#include <algorithm>
+#include <functional>
+#include <queue>
+#include <string>
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+namespace fst {
+
+static const int32 kSTListMagicNumber = 5656924;
+static const int32 kSTListFileVersion = 1;
+
+// String-type list writing class for object of type 'T' using functor 'W'
+// to write an object of type 'T' from a stream. 'W' must conform to the
+// following interface:
+//
+// struct Writer {
+// void operator()(ostream &, const T &) const;
+// };
+//
+template <class T, class W>
+class STListWriter {
+ public:
+ typedef T EntryType;
+ typedef W EntryWriter;
+
+ explicit STListWriter(const string filename)
+ : stream_(
+ filename.empty() ? &std::cout :
+ new ofstream(filename.c_str(), ofstream::out | ofstream::binary)),
+ error_(false) {
+ WriteType(*stream_, kSTListMagicNumber);
+ WriteType(*stream_, kSTListFileVersion);
+ if (!stream_) {
+ FSTERROR() << "STListWriter::STListWriter: error writing to file: "
+ << filename;
+ error_ = true;
+ }
+ }
+
+ static STListWriter<T, W> *Create(const string &filename) {
+ return new STListWriter<T, W>(filename);
+ }
+
+ void Add(const string &key, const T &t) {
+ if (key == "") {
+ FSTERROR() << "STListWriter::Add: key empty: " << key;
+ error_ = true;
+ } else if (key < last_key_) {
+ FSTERROR() << "STListWriter::Add: key disorder: " << key;
+ error_ = true;
+ }
+ if (error_) return;
+ last_key_ = key;
+ WriteType(*stream_, key);
+ entry_writer_(*stream_, t);
+ }
+
+ bool Error() const { return error_; }
+
+ ~STListWriter() {
+ WriteType(*stream_, string());
+ if (stream_ != &std::cout)
+ delete stream_;
+ }
+
+ private:
+ EntryWriter entry_writer_; // Write functor for 'EntryType'
+ ostream *stream_; // Output stream
+ string last_key_; // Last key
+ bool error_;
+
+ DISALLOW_COPY_AND_ASSIGN(STListWriter);
+};
+
+
+// String-type list reading class for object of type 'T' using functor 'R'
+// to read an object of type 'T' form a stream. 'R' must conform to the
+// following interface:
+//
+// struct Reader {
+// T *operator()(istream &) const;
+// };
+//
+template <class T, class R>
+class STListReader {
+ public:
+ typedef T EntryType;
+ typedef R EntryReader;
+
+ explicit STListReader(const vector<string> &filenames)
+ : sources_(filenames), entry_(0), error_(false) {
+ streams_.resize(filenames.size(), 0);
+ bool has_stdin = false;
+ for (size_t i = 0; i < filenames.size(); ++i) {
+ if (filenames[i].empty()) {
+ if (!has_stdin) {
+ streams_[i] = &std::cin;
+ sources_[i] = "stdin";
+ has_stdin = true;
+ } else {
+ FSTERROR() << "STListReader::STListReader: stdin should only "
+ << "appear once in the input file list.";
+ error_ = true;
+ return;
+ }
+ } else {
+ streams_[i] = new ifstream(
+ filenames[i].c_str(), ifstream::in | ifstream::binary);
+ }
+ int32 magic_number = 0, file_version = 0;
+ ReadType(*streams_[i], &magic_number);
+ ReadType(*streams_[i], &file_version);
+ if (magic_number != kSTListMagicNumber) {
+ FSTERROR() << "STListReader::STTableReader: wrong file type: "
+ << filenames[i];
+ error_ = true;
+ return;
+ }
+ if (file_version != kSTListFileVersion) {
+ FSTERROR() << "STListReader::STTableReader: wrong file version: "
+ << filenames[i];
+ error_ = true;
+ return;
+ }
+ string key;
+ ReadType(*streams_[i], &key);
+ if (!key.empty())
+ heap_.push(make_pair(key, i));
+ if (!*streams_[i]) {
+ FSTERROR() << "STTableReader: error reading file: " << sources_[i];
+ error_ = true;
+ return;
+ }
+ }
+ if (heap_.empty()) return;
+ size_t current = heap_.top().second;
+ entry_ = entry_reader_(*streams_[current]);
+ if (!entry_ || !*streams_[current]) {
+ FSTERROR() << "STTableReader: error reading entry for key: "
+ << heap_.top().first << ", file: " << sources_[current];
+ error_ = true;
+ }
+ }
+
+ ~STListReader() {
+ for (size_t i = 0; i < streams_.size(); ++i) {
+ if (streams_[i] != &std::cin)
+ delete streams_[i];
+ }
+ if (entry_)
+ delete entry_;
+ }
+
+ static STListReader<T, R> *Open(const string &filename) {
+ vector<string> filenames;
+ filenames.push_back(filename);
+ return new STListReader<T, R>(filenames);
+ }
+
+ static STListReader<T, R> *Open(const vector<string> &filenames) {
+ return new STListReader<T, R>(filenames);
+ }
+
+ void Reset() {
+ FSTERROR()
+ << "STListReader::Reset: stlist does not support reset operation";
+ error_ = true;
+ }
+
+ bool Find(const string &key) {
+ FSTERROR()
+ << "STListReader::Find: stlist does not support find operation";
+ error_ = true;
+ return false;
+ }
+
+ bool Done() const {
+ return error_ || heap_.empty();
+ }
+
+ void Next() {
+ if (error_) return;
+ size_t current = heap_.top().second;
+ string key;
+ heap_.pop();
+ ReadType(*(streams_[current]), &key);
+ if (!*streams_[current]) {
+ FSTERROR() << "STTableReader: error reading file: "
+ << sources_[current];
+ error_ = true;
+ return;
+ }
+ if (!key.empty())
+ heap_.push(make_pair(key, current));
+
+ if(!heap_.empty()) {
+ current = heap_.top().second;
+ if (entry_)
+ delete entry_;
+ entry_ = entry_reader_(*streams_[current]);
+ if (!entry_ || !*streams_[current]) {
+ FSTERROR() << "STTableReader: error reading entry for key: "
+ << heap_.top().first << ", file: " << sources_[current];
+ error_ = true;
+ }
+ }
+ }
+
+ const string &GetKey() const {
+ return heap_.top().first;
+ }
+
+ const EntryType &GetEntry() const {
+ return *entry_;
+ }
+
+ bool Error() const { return error_; }
+
+ private:
+ EntryReader entry_reader_; // Read functor for 'EntryType'
+ vector<istream*> streams_; // Input streams
+ vector<string> sources_; // and corresponding file names
+ priority_queue<
+ pair<string, size_t>, vector<pair<string, size_t> >,
+ greater<pair<string, size_t> > > heap_; // (Key, stream id) heap
+ mutable EntryType *entry_; // Pointer to the currently read entry
+ bool error_;
+
+ DISALLOW_COPY_AND_ASSIGN(STListReader);
+};
+
+
+// String-type list header reading function template on the entry header
+// type 'H' having a member function:
+// Read(istream &strm, const string &filename);
+// Checks that 'filename' is an STTable and call the H::Read() on the last
+// entry in the STTable.
+// Does not support reading from stdin.
+template <class H>
+bool ReadSTListHeader(const string &filename, H *header) {
+ if (filename.empty()) {
+ LOG(ERROR) << "ReadSTListHeader: reading header not supported on stdin";
+ return false;
+ }
+ ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
+ int32 magic_number = 0, file_version = 0;
+ ReadType(strm, &magic_number);
+ ReadType(strm, &file_version);
+ if (magic_number != kSTListMagicNumber) {
+ LOG(ERROR) << "ReadSTTableHeader: wrong file type: " << filename;
+ return false;
+ }
+ if (file_version != kSTListFileVersion) {
+ LOG(ERROR) << "ReadSTTableHeader: wrong file version: " << filename;
+ return false;
+ }
+ string key;
+ ReadType(strm, &key);
+ header->Read(strm, filename + ":" + key);
+ if (!strm) {
+ LOG(ERROR) << "ReadSTTableHeader: error reading file: " << filename;
+ return false;
+ }
+ return true;
+}
+
+bool IsSTList(const string &filename);
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_FAR_STLIST_H_
diff --git a/src/include/fst/extensions/far/sttable.h b/src/include/fst/extensions/far/sttable.h
new file mode 100644
index 0000000..3a03133
--- /dev/null
+++ b/src/include/fst/extensions/far/sttable.h
@@ -0,0 +1,370 @@
+// sttable.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// A generic string-to-type table file format
+//
+// This is not meant as a generalization of SSTable. This is more of
+// a simple replacement for SSTable in order to provide an open-source
+// implementation of the FAR format for the external version of the
+// FST Library.
+
+#ifndef FST_EXTENSIONS_FAR_STTABLE_H_
+#define FST_EXTENSIONS_FAR_STTABLE_H_
+
+#include <algorithm>
+#include <iostream>
+#include <fstream>
+#include <fst/util.h>
+
+namespace fst {
+
+static const int32 kSTTableMagicNumber = 2125656924;
+static const int32 kSTTableFileVersion = 1;
+
+// String-to-type table writing class for object of type 'T' using functor 'W'
+// to write an object of type 'T' from a stream. 'W' must conform to the
+// following interface:
+//
+// struct Writer {
+// void operator()(ostream &, const T &) const;
+// };
+//
+template <class T, class W>
+class STTableWriter {
+ public:
+ typedef T EntryType;
+ typedef W EntryWriter;
+
+ explicit STTableWriter(const string &filename)
+ : stream_(filename.c_str(), ofstream::out | ofstream::binary),
+ error_(false) {
+ WriteType(stream_, kSTTableMagicNumber);
+ WriteType(stream_, kSTTableFileVersion);
+ if (!stream_) {
+ FSTERROR() << "STTableWriter::STTableWriter: error writing to file: "
+ << filename;
+ error_=true;
+ }
+ }
+
+ static STTableWriter<T, W> *Create(const string &filename) {
+ if (filename.empty()) {
+ LOG(ERROR) << "STTableWriter: writing to standard out unsupported.";
+ return 0;
+ }
+ return new STTableWriter<T, W>(filename);
+ }
+
+ void Add(const string &key, const T &t) {
+ if (key == "") {
+ FSTERROR() << "STTableWriter::Add: key empty: " << key;
+ error_ = true;
+ } else if (key < last_key_) {
+ FSTERROR() << "STTableWriter::Add: key disorder: " << key;
+ error_ = true;
+ }
+ if (error_) return;
+ last_key_ = key;
+ positions_.push_back(stream_.tellp());
+ WriteType(stream_, key);
+ entry_writer_(stream_, t);
+ }
+
+ bool Error() const { return error_; }
+
+ ~STTableWriter() {
+ WriteType(stream_, positions_);
+ WriteType(stream_, static_cast<int64>(positions_.size()));
+ }
+
+ private:
+ EntryWriter entry_writer_; // Write functor for 'EntryType'
+ ofstream stream_; // Output stream
+ vector<int64> positions_; // Position in file of each key-entry pair
+ string last_key_; // Last key
+ bool error_;
+
+ DISALLOW_COPY_AND_ASSIGN(STTableWriter);
+};
+
+
+// String-to-type table reading class for object of type 'T' using functor 'R'
+// to read an object of type 'T' form a stream. 'R' must conform to the
+// following interface:
+//
+// struct Reader {
+// T *operator()(istream &) const;
+// };
+//
+template <class T, class R>
+class STTableReader {
+ public:
+ typedef T EntryType;
+ typedef R EntryReader;
+
+ explicit STTableReader(const vector<string> &filenames)
+ : sources_(filenames), entry_(0), error_(false) {
+ compare_ = new Compare(&keys_);
+ keys_.resize(filenames.size());
+ streams_.resize(filenames.size(), 0);
+ positions_.resize(filenames.size());
+ for (size_t i = 0; i < filenames.size(); ++i) {
+ streams_[i] = new ifstream(
+ filenames[i].c_str(), ifstream::in | ifstream::binary);
+ int32 magic_number = 0, file_version = 0;
+ ReadType(*streams_[i], &magic_number);
+ ReadType(*streams_[i], &file_version);
+ if (magic_number != kSTTableMagicNumber) {
+ FSTERROR() << "STTableReader::STTableReader: wrong file type: "
+ << filenames[i];
+ error_ = true;
+ return;
+ }
+ if (file_version != kSTTableFileVersion) {
+ FSTERROR() << "STTableReader::STTableReader: wrong file version: "
+ << filenames[i];
+ error_ = true;
+ return;
+ }
+ int64 num_entries;
+ streams_[i]->seekg(-static_cast<int>(sizeof(int64)), ios_base::end);
+ ReadType(*streams_[i], &num_entries);
+ streams_[i]->seekg(-static_cast<int>(sizeof(int64)) *
+ (num_entries + 1), ios_base::end);
+ positions_[i].resize(num_entries);
+ for (size_t j = 0; (j < num_entries) && (*streams_[i]); ++j)
+ ReadType(*streams_[i], &(positions_[i][j]));
+ streams_[i]->seekg(positions_[i][0]);
+ if (!*streams_[i]) {
+ FSTERROR() << "STTableReader::STTableReader: error reading file: "
+ << filenames[i];
+ error_ = true;
+ return;
+ }
+
+ }
+ MakeHeap();
+ }
+
+ ~STTableReader() {
+ for (size_t i = 0; i < streams_.size(); ++i)
+ delete streams_[i];
+ delete compare_;
+ if (entry_)
+ delete entry_;
+ }
+
+ static STTableReader<T, R> *Open(const string &filename) {
+ if (filename.empty()) {
+ LOG(ERROR) << "STTableReader: reading from standard in not supported";
+ return 0;
+ }
+ vector<string> filenames;
+ filenames.push_back(filename);
+ return new STTableReader<T, R>(filenames);
+ }
+
+ static STTableReader<T, R> *Open(const vector<string> &filenames) {
+ return new STTableReader<T, R>(filenames);
+ }
+
+ void Reset() {
+ if (error_) return;
+ for (size_t i = 0; i < streams_.size(); ++i)
+ streams_[i]->seekg(positions_[i].front());
+ MakeHeap();
+ }
+
+ bool Find(const string &key) {
+ if (error_) return false;
+ for (size_t i = 0; i < streams_.size(); ++i)
+ LowerBound(i, key);
+ MakeHeap();
+ return keys_[current_] == key;
+ }
+
+ bool Done() const { return error_ || heap_.empty(); }
+
+ void Next() {
+ if (error_) return;
+ if (streams_[current_]->tellg() <= positions_[current_].back()) {
+ ReadType(*(streams_[current_]), &(keys_[current_]));
+ if (!*streams_[current_]) {
+ FSTERROR() << "STTableReader: error reading file: "
+ << sources_[current_];
+ error_ = true;
+ return;
+ }
+ push_heap(heap_.begin(), heap_.end(), *compare_);
+ } else {
+ heap_.pop_back();
+ }
+ if (!heap_.empty())
+ PopHeap();
+ }
+
+ const string &GetKey() const {
+ return keys_[current_];
+ }
+
+ const EntryType &GetEntry() const {
+ return *entry_;
+ }
+
+ bool Error() const { return error_; }
+
+ private:
+ // Comparison functor used to compare stream IDs in the heap
+ struct Compare {
+ Compare(const vector<string> *keys) : keys_(keys) {}
+
+ bool operator()(size_t i, size_t j) const {
+ return (*keys_)[i] > (*keys_)[j];
+ };
+
+ private:
+ const vector<string> *keys_;
+ };
+
+ // Position the stream with ID 'id' at the position corresponding
+ // to the lower bound for key 'find_key'
+ void LowerBound(size_t id, const string &find_key) {
+ ifstream *strm = streams_[id];
+ const vector<int64> &positions = positions_[id];
+ size_t low = 0, high = positions.size() - 1;
+
+ while (low < high) {
+ size_t mid = (low + high)/2;
+ strm->seekg(positions[mid]);
+ string key;
+ ReadType(*strm, &key);
+ if (key > find_key) {
+ high = mid;
+ } else if (key < find_key) {
+ low = mid + 1;
+ } else {
+ for (size_t i = mid; i > low; --i) {
+ strm->seekg(positions[i - 1]);
+ ReadType(*strm, &key);
+ if (key != find_key) {
+ strm->seekg(positions[i]);
+ return;
+ }
+ }
+ strm->seekg(positions[low]);
+ return;
+ }
+ }
+ strm->seekg(positions[low]);
+ }
+
+ // Add all streams to the heap
+ void MakeHeap() {
+ heap_.clear();
+ for (size_t i = 0; i < streams_.size(); ++i) {
+ ReadType(*streams_[i], &(keys_[i]));
+ if (!*streams_[i]) {
+ FSTERROR() << "STTableReader: error reading file: " << sources_[i];
+ error_ = true;
+ return;
+ }
+ heap_.push_back(i);
+ }
+ make_heap(heap_.begin(), heap_.end(), *compare_);
+ PopHeap();
+ }
+
+ // Position the stream with the lowest key at the top
+ // of the heap, set 'current_' to the ID of that stream
+ // and read the current entry from that stream
+ void PopHeap() {
+ pop_heap(heap_.begin(), heap_.end(), *compare_);
+ current_ = heap_.back();
+ if (entry_)
+ delete entry_;
+ entry_ = entry_reader_(*streams_[current_]);
+ if (!entry_)
+ error_ = true;
+ if (!*streams_[current_]) {
+ FSTERROR() << "STTableReader: error reading entry for key: "
+ << keys_[current_] << ", file: " << sources_[current_];
+ error_ = true;
+ }
+ }
+
+
+ EntryReader entry_reader_; // Read functor for 'EntryType'
+ vector<ifstream*> streams_; // Input streams
+ vector<string> sources_; // and corresponding file names
+ vector<vector<int64> > positions_; // Index of positions for each stream
+ vector<string> keys_; // Lowest unread key for each stream
+ vector<int64> heap_; // Heap containing ID of streams with unread keys
+ int64 current_; // Id of current stream to be read
+ Compare *compare_; // Functor comparing stream IDs for the heap
+ mutable EntryType *entry_; // Pointer to the currently read entry
+ bool error_;
+
+ DISALLOW_COPY_AND_ASSIGN(STTableReader);
+};
+
+
+// String-to-type table header reading function template on the entry header
+// type 'H' having a member function:
+// Read(istream &strm, const string &filename);
+// Checks that 'filename' is an STTable and call the H::Read() on the last
+// entry in the STTable.
+template <class H>
+bool ReadSTTableHeader(const string &filename, H *header) {
+ ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
+ int32 magic_number = 0, file_version = 0;
+ ReadType(strm, &magic_number);
+ ReadType(strm, &file_version);
+ if (magic_number != kSTTableMagicNumber) {
+ LOG(ERROR) << "ReadSTTableHeader: wrong file type: " << filename;
+ return false;
+ }
+ if (file_version != kSTTableFileVersion) {
+ LOG(ERROR) << "ReadSTTableHeader: wrong file version: " << filename;
+ return false;
+ }
+ int64 i = -1;
+ strm.seekg(-static_cast<int>(sizeof(int64)), ios_base::end);
+ ReadType(strm, &i); // Read number of entries
+ if (!strm) {
+ LOG(ERROR) << "ReadSTTableHeader: error reading file: " << filename;
+ return false;
+ }
+ if (i == 0) return true; // No entry header to read
+ strm.seekg(-2 * static_cast<int>(sizeof(int64)), ios_base::end);
+ ReadType(strm, &i); // Read position for last entry in file
+ strm.seekg(i);
+ string key;
+ ReadType(strm, &key);
+ header->Read(strm, filename + ":" + key);
+ if (!strm) {
+ LOG(ERROR) << "ReadSTTableHeader: error reading file: " << filename;
+ return false;
+ }
+ return true;
+}
+
+bool IsSTTable(const string &filename);
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_FAR_STTABLE_H_
diff --git a/src/include/fst/extensions/pdt/collection.h b/src/include/fst/extensions/pdt/collection.h
new file mode 100644
index 0000000..26be504
--- /dev/null
+++ b/src/include/fst/extensions/pdt/collection.h
@@ -0,0 +1,122 @@
+// collection.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to store a collection of sets with elements of type T.
+
+#ifndef FST_EXTENSIONS_PDT_COLLECTION_H__
+#define FST_EXTENSIONS_PDT_COLLECTION_H__
+
+#include <algorithm>
+#include <vector>
+using std::vector;
+
+#include <fst/bi-table.h>
+
+namespace fst {
+
+// Stores a collection of non-empty sets with elements of type T. A
+// default constructor, equality ==, a total order <, and an STL-style
+// hash class must be defined on the elements. Provides signed
+// integer ID (of type I) of each unique set. The IDs are allocated
+// starting from 0 in order.
+template <class I, class T>
+class Collection {
+ public:
+ struct Node { // Trie node
+ I node_id; // Root is kNoNodeId;
+ T element;
+
+ Node() : node_id(kNoNodeId), element(T()) {}
+ Node(I i, const T &t) : node_id(i), element(t) {}
+
+ bool operator==(const Node& n) const {
+ return n.node_id == node_id && n.element == element;
+ }
+ };
+
+ struct NodeHash {
+ size_t operator()(const Node &n) const {
+ return n.node_id + hash_(n.element) * kPrime;
+ }
+ };
+
+ typedef CompactHashBiTable<I, Node, NodeHash> NodeTable;
+
+ class SetIterator {
+ public:
+ SetIterator(I id, Node node, NodeTable *node_table)
+ :id_(id), node_(node), node_table_(node_table) {}
+
+ bool Done() const { return id_ == kNoNodeId; }
+
+ const T &Element() const { return node_.element; }
+
+ void Next() {
+ id_ = node_.node_id;
+ if (id_ != kNoNodeId)
+ node_ = node_table_->FindEntry(id_);
+ }
+
+ private:
+ I id_; // Iterator set node id
+ Node node_; // Iterator set node
+ NodeTable *node_table_;
+ };
+
+ Collection() {}
+
+ // Lookups integer ID from set. If it doesn't exist, then adds it.
+ // Set elements should be in strict order (and therefore unique).
+ I FindId(const vector<T> &set) {
+ I node_id = kNoNodeId;
+ for (ssize_t i = set.size() - 1; i >= 0; --i) {
+ Node node(node_id, set[i]);
+ node_id = node_table_.FindId(node);
+ }
+ return node_id;
+ }
+
+ // Finds set given integer ID. Returns true if ID corresponds
+ // to set. Use iterators below to traverse result.
+ SetIterator FindSet(I id) {
+ if (id < 0 && id >= node_table_.Size()) {
+ return SetIterator(kNoNodeId, Node(kNoNodeId, T()), &node_table_);
+ } else {
+ return SetIterator(id, node_table_.FindEntry(id), &node_table_);
+ }
+ }
+
+ private:
+ static const I kNoNodeId;
+ static const size_t kPrime;
+ static std::tr1::hash<T> hash_;
+
+ NodeTable node_table_;
+
+ DISALLOW_COPY_AND_ASSIGN(Collection);
+};
+
+template<class I, class T> const I Collection<I, T>::kNoNodeId = -1;
+
+template <class I, class T> const size_t Collection<I, T>::kPrime = 7853;
+
+template <class I, class T> std::tr1::hash<T> Collection<I, T>::hash_;
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_PDT_COLLECTION_H__
diff --git a/src/include/fst/extensions/pdt/compose.h b/src/include/fst/extensions/pdt/compose.h
new file mode 100644
index 0000000..364d76f
--- /dev/null
+++ b/src/include/fst/extensions/pdt/compose.h
@@ -0,0 +1,146 @@
+// compose.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Compose a PDT and an FST.
+
+#ifndef FST_EXTENSIONS_PDT_COMPOSE_H__
+#define FST_EXTENSIONS_PDT_COMPOSE_H__
+
+#include <fst/compose.h>
+
+namespace fst {
+
+// Class to setup composition options for PDT composition.
+// Default is for the PDT as the first composition argument.
+template <class Arc, bool left_pdt = true>
+class PdtComposeOptions : public
+ComposeFstOptions<Arc,
+ MultiEpsMatcher< Matcher<Fst<Arc> > >,
+ MultiEpsFilter<AltSequenceComposeFilter<
+ MultiEpsMatcher<
+ Matcher<Fst<Arc> > > > > > {
+ public:
+ typedef typename Arc::Label Label;
+ typedef MultiEpsMatcher< Matcher<Fst<Arc> > > PdtMatcher;
+ typedef MultiEpsFilter<AltSequenceComposeFilter<PdtMatcher> > PdtFilter;
+ typedef ComposeFstOptions<Arc, PdtMatcher, PdtFilter> COptions;
+ using COptions::matcher1;
+ using COptions::matcher2;
+ using COptions::filter;
+
+ PdtComposeOptions(const Fst<Arc> &ifst1,
+ const vector<pair<Label, Label> > &parens,
+ const Fst<Arc> &ifst2) {
+ matcher1 = new PdtMatcher(ifst1, MATCH_OUTPUT, kMultiEpsList);
+ matcher2 = new PdtMatcher(ifst2, MATCH_INPUT, kMultiEpsLoop);
+
+ // Treat parens as multi-epsilons when composing.
+ for (size_t i = 0; i < parens.size(); ++i) {
+ matcher1->AddMultiEpsLabel(parens[i].first);
+ matcher1->AddMultiEpsLabel(parens[i].second);
+ matcher2->AddMultiEpsLabel(parens[i].first);
+ matcher2->AddMultiEpsLabel(parens[i].second);
+ }
+
+ filter = new PdtFilter(ifst1, ifst2, matcher1, matcher2, true);
+ }
+};
+
+// Class to setup composition options for PDT with FST composition.
+// Specialization is for the FST as the first composition argument.
+template <class Arc>
+class PdtComposeOptions<Arc, false> : public
+ComposeFstOptions<Arc,
+ MultiEpsMatcher< Matcher<Fst<Arc> > >,
+ MultiEpsFilter<SequenceComposeFilter<
+ MultiEpsMatcher<
+ Matcher<Fst<Arc> > > > > > {
+ public:
+ typedef typename Arc::Label Label;
+ typedef MultiEpsMatcher< Matcher<Fst<Arc> > > PdtMatcher;
+ typedef MultiEpsFilter<SequenceComposeFilter<PdtMatcher> > PdtFilter;
+ typedef ComposeFstOptions<Arc, PdtMatcher, PdtFilter> COptions;
+ using COptions::matcher1;
+ using COptions::matcher2;
+ using COptions::filter;
+
+ PdtComposeOptions(const Fst<Arc> &ifst1,
+ const Fst<Arc> &ifst2,
+ const vector<pair<Label, Label> > &parens) {
+ matcher1 = new PdtMatcher(ifst1, MATCH_OUTPUT, kMultiEpsLoop);
+ matcher2 = new PdtMatcher(ifst2, MATCH_INPUT, kMultiEpsList);
+
+ // Treat parens as multi-epsilons when composing.
+ for (size_t i = 0; i < parens.size(); ++i) {
+ matcher1->AddMultiEpsLabel(parens[i].first);
+ matcher1->AddMultiEpsLabel(parens[i].second);
+ matcher2->AddMultiEpsLabel(parens[i].first);
+ matcher2->AddMultiEpsLabel(parens[i].second);
+ }
+
+ filter = new PdtFilter(ifst1, ifst2, matcher1, matcher2, true);
+ }
+};
+
+
+// Composes pushdown transducer (PDT) encoded as an FST (1st arg) and
+// an FST (2nd arg) with the result also a PDT encoded as an Fst. (3rd arg).
+// In the PDTs, some transitions are labeled with open or close
+// parentheses. To be interpreted as a PDT, the parens must balance on
+// a path (see PdtExpand()). The open-close parenthesis label pairs
+// are passed in 'parens'.
+template <class Arc>
+void Compose(const Fst<Arc> &ifst1,
+ const vector<pair<typename Arc::Label,
+ typename Arc::Label> > &parens,
+ const Fst<Arc> &ifst2,
+ MutableFst<Arc> *ofst,
+ const ComposeOptions &opts = ComposeOptions()) {
+
+ PdtComposeOptions<Arc, true> copts(ifst1, parens, ifst2);
+ copts.gc_limit = 0;
+ *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
+ if (opts.connect)
+ Connect(ofst);
+}
+
+
+// Composes an FST (1st arg) and pushdown transducer (PDT) encoded as
+// an FST (2nd arg) with the result also a PDT encoded as an Fst (3rd arg).
+// In the PDTs, some transitions are labeled with open or close
+// parentheses. To be interpreted as a PDT, the parens must balance on
+// a path (see ExpandFst()). The open-close parenthesis label pairs
+// are passed in 'parens'.
+template <class Arc>
+void Compose(const Fst<Arc> &ifst1,
+ const Fst<Arc> &ifst2,
+ const vector<pair<typename Arc::Label,
+ typename Arc::Label> > &parens,
+ MutableFst<Arc> *ofst,
+ const ComposeOptions &opts = ComposeOptions()) {
+
+ PdtComposeOptions<Arc, false> copts(ifst1, ifst2, parens);
+ copts.gc_limit = 0;
+ *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
+ if (opts.connect)
+ Connect(ofst);
+}
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_PDT_COMPOSE_H__
diff --git a/src/include/fst/extensions/pdt/expand.h b/src/include/fst/extensions/pdt/expand.h
new file mode 100644
index 0000000..f464403
--- /dev/null
+++ b/src/include/fst/extensions/pdt/expand.h
@@ -0,0 +1,975 @@
+// expand.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Expand a PDT to an FST.
+
+#ifndef FST_EXTENSIONS_PDT_EXPAND_H__
+#define FST_EXTENSIONS_PDT_EXPAND_H__
+
+#include <vector>
+using std::vector;
+
+#include <fst/extensions/pdt/pdt.h>
+#include <fst/extensions/pdt/paren.h>
+#include <fst/extensions/pdt/shortest-path.h>
+#include <fst/extensions/pdt/reverse.h>
+#include <fst/cache.h>
+#include <fst/mutable-fst.h>
+#include <fst/queue.h>
+#include <fst/state-table.h>
+#include <fst/test-properties.h>
+
+namespace fst {
+
+template <class Arc>
+struct ExpandFstOptions : public CacheOptions {
+ bool keep_parentheses;
+ PdtStack<typename Arc::StateId, typename Arc::Label> *stack;
+ PdtStateTable<typename Arc::StateId, typename Arc::StateId> *state_table;
+
+ ExpandFstOptions(
+ const CacheOptions &opts = CacheOptions(),
+ bool kp = false,
+ PdtStack<typename Arc::StateId, typename Arc::Label> *s = 0,
+ PdtStateTable<typename Arc::StateId, typename Arc::StateId> *st = 0)
+ : CacheOptions(opts), keep_parentheses(kp), stack(s), state_table(st) {}
+};
+
+// Properties for an expanded PDT.
+inline uint64 ExpandProperties(uint64 inprops) {
+ return inprops & (kAcceptor | kAcyclic | kInitialAcyclic | kUnweighted);
+}
+
+
+// Implementation class for ExpandFst
+template <class A>
+class ExpandFstImpl
+ : public CacheImpl<A> {
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::Properties;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+
+ using CacheBaseImpl< CacheState<A> >::PushArc;
+ using CacheBaseImpl< CacheState<A> >::HasArcs;
+ using CacheBaseImpl< CacheState<A> >::HasFinal;
+ using CacheBaseImpl< CacheState<A> >::HasStart;
+ using CacheBaseImpl< CacheState<A> >::SetArcs;
+ using CacheBaseImpl< CacheState<A> >::SetFinal;
+ using CacheBaseImpl< CacheState<A> >::SetStart;
+
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef StateId StackId;
+ typedef PdtStateTuple<StateId, StackId> StateTuple;
+
+ ExpandFstImpl(const Fst<A> &fst,
+ const vector<pair<typename Arc::Label,
+ typename Arc::Label> > &parens,
+ const ExpandFstOptions<A> &opts)
+ : CacheImpl<A>(opts), fst_(fst.Copy()),
+ stack_(opts.stack ? opts.stack: new PdtStack<StateId, Label>(parens)),
+ state_table_(opts.state_table ? opts.state_table :
+ new PdtStateTable<StateId, StackId>()),
+ own_stack_(opts.stack == 0), own_state_table_(opts.state_table == 0),
+ keep_parentheses_(opts.keep_parentheses) {
+ SetType("expand");
+
+ uint64 props = fst.Properties(kFstProperties, false);
+ SetProperties(ExpandProperties(props), kCopyProperties);
+
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ }
+
+ ExpandFstImpl(const ExpandFstImpl &impl)
+ : CacheImpl<A>(impl),
+ fst_(impl.fst_->Copy(true)),
+ stack_(new PdtStack<StateId, Label>(*impl.stack_)),
+ state_table_(new PdtStateTable<StateId, StackId>()),
+ own_stack_(true), own_state_table_(true),
+ keep_parentheses_(impl.keep_parentheses_) {
+ SetType("expand");
+ SetProperties(impl.Properties(), kCopyProperties);
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ ~ExpandFstImpl() {
+ delete fst_;
+ if (own_stack_)
+ delete stack_;
+ if (own_state_table_)
+ delete state_table_;
+ }
+
+ StateId Start() {
+ if (!HasStart()) {
+ StateId s = fst_->Start();
+ if (s == kNoStateId)
+ return kNoStateId;
+ StateTuple tuple(s, 0);
+ StateId start = state_table_->FindState(tuple);
+ SetStart(start);
+ }
+ return CacheImpl<A>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ const StateTuple &tuple = state_table_->Tuple(s);
+ Weight w = fst_->Final(tuple.state_id);
+ if (w != Weight::Zero() && tuple.stack_id == 0)
+ SetFinal(s, w);
+ else
+ SetFinal(s, Weight::Zero());
+ }
+ return CacheImpl<A>::Final(s);
+ }
+
+ size_t NumArcs(StateId s) {
+ if (!HasArcs(s)) {
+ ExpandState(s);
+ }
+ return CacheImpl<A>::NumArcs(s);
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ ExpandState(s);
+ return CacheImpl<A>::NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ ExpandState(s);
+ return CacheImpl<A>::NumOutputEpsilons(s);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
+ if (!HasArcs(s))
+ ExpandState(s);
+ CacheImpl<A>::InitArcIterator(s, data);
+ }
+
+ // Computes the outgoing transitions from a state, creating new destination
+ // states as needed.
+ void ExpandState(StateId s) {
+ StateTuple tuple = state_table_->Tuple(s);
+ for (ArcIterator< Fst<A> > aiter(*fst_, tuple.state_id);
+ !aiter.Done(); aiter.Next()) {
+ Arc arc = aiter.Value();
+ StackId stack_id = stack_->Find(tuple.stack_id, arc.ilabel);
+ if (stack_id == -1) {
+ // Non-matching close parenthesis
+ continue;
+ } else if ((stack_id != tuple.stack_id) && !keep_parentheses_) {
+ // Stack push/pop
+ arc.ilabel = arc.olabel = 0;
+ }
+
+ StateTuple ntuple(arc.nextstate, stack_id);
+ arc.nextstate = state_table_->FindState(ntuple);
+ PushArc(s, arc);
+ }
+ SetArcs(s);
+ }
+
+ const PdtStack<StackId, Label> &GetStack() const { return *stack_; }
+
+ const PdtStateTable<StateId, StackId> &GetStateTable() const {
+ return *state_table_;
+ }
+
+ private:
+ const Fst<A> *fst_;
+
+ PdtStack<StackId, Label> *stack_;
+ PdtStateTable<StateId, StackId> *state_table_;
+ bool own_stack_;
+ bool own_state_table_;
+ bool keep_parentheses_;
+
+ void operator=(const ExpandFstImpl<A> &); // disallow
+};
+
+// Expands a pushdown transducer (PDT) encoded as an FST into an FST.
+// This version is a delayed Fst. In the PDT, some transitions are
+// labeled with open or close parentheses. To be interpreted as a PDT,
+// the parens must balance on a path. The open-close parenthesis label
+// pairs are passed in 'parens'. The expansion enforces the
+// parenthesis constraints. The PDT must be expandable as an FST.
+//
+// This class attaches interface to implementation and handles
+// reference counting, delegating most methods to ImplToFst.
+template <class A>
+class ExpandFst : public ImplToFst< ExpandFstImpl<A> > {
+ public:
+ friend class ArcIterator< ExpandFst<A> >;
+ friend class StateIterator< ExpandFst<A> >;
+
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef StateId StackId;
+ typedef CacheState<A> State;
+ typedef ExpandFstImpl<A> Impl;
+
+ ExpandFst(const Fst<A> &fst,
+ const vector<pair<typename Arc::Label,
+ typename Arc::Label> > &parens)
+ : ImplToFst<Impl>(new Impl(fst, parens, ExpandFstOptions<A>())) {}
+
+ ExpandFst(const Fst<A> &fst,
+ const vector<pair<typename Arc::Label,
+ typename Arc::Label> > &parens,
+ const ExpandFstOptions<A> &opts)
+ : ImplToFst<Impl>(new Impl(fst, parens, opts)) {}
+
+ // See Fst<>::Copy() for doc.
+ ExpandFst(const ExpandFst<A> &fst, bool safe = false)
+ : ImplToFst<Impl>(fst, safe) {}
+
+ // Get a copy of this ExpandFst. See Fst<>::Copy() for further doc.
+ virtual ExpandFst<A> *Copy(bool safe = false) const {
+ return new ExpandFst<A>(*this, safe);
+ }
+
+ virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ const PdtStack<StackId, Label> &GetStack() const {
+ return GetImpl()->GetStack();
+ }
+
+ const PdtStateTable<StateId, StackId> &GetStateTable() const {
+ return GetImpl()->GetStateTable();
+ }
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const ExpandFst<A> &fst); // Disallow
+};
+
+
+// Specialization for ExpandFst.
+template<class A>
+class StateIterator< ExpandFst<A> >
+ : public CacheStateIterator< ExpandFst<A> > {
+ public:
+ explicit StateIterator(const ExpandFst<A> &fst)
+ : CacheStateIterator< ExpandFst<A> >(fst, fst.GetImpl()) {}
+};
+
+
+// Specialization for ExpandFst.
+template <class A>
+class ArcIterator< ExpandFst<A> >
+ : public CacheArcIterator< ExpandFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const ExpandFst<A> &fst, StateId s)
+ : CacheArcIterator< ExpandFst<A> >(fst.GetImpl(), s) {
+ if (!fst.GetImpl()->HasArcs(s))
+ fst.GetImpl()->ExpandState(s);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+
+template <class A> inline
+void ExpandFst<A>::InitStateIterator(StateIteratorData<A> *data) const
+{
+ data->base = new StateIterator< ExpandFst<A> >(*this);
+}
+
+//
+// PrunedExpand Class
+//
+
+// Prunes the delayed expansion of a pushdown transducer (PDT) encoded
+// as an FST into an FST. In the PDT, some transitions are labeled
+// with open or close parentheses. To be interpreted as a PDT, the
+// parens must balance on a path. The open-close parenthesis label
+// pairs are passed in 'parens'. The expansion enforces the
+// parenthesis constraints.
+//
+// The algorithm works by visiting the delayed ExpandFst using a
+// shortest-stack first queue discipline and relies on the
+// shortest-distance information computed using a reverse
+// shortest-path call to perform the pruning.
+//
+// The algorithm maintains the same state ordering between the ExpandFst
+// being visited 'efst_' and the result of pruning written into the
+// MutableFst 'ofst_' to improve readability of the code.
+//
+template <class A>
+class PrunedExpand {
+ public:
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+ typedef StateId StackId;
+ typedef PdtStack<StackId, Label> Stack;
+ typedef PdtStateTable<StateId, StackId> StateTable;
+ typedef typename PdtBalanceData<Arc>::SetIterator SetIterator;
+
+ // Constructor taking as input a PDT specified by 'ifst' and 'parens'.
+ // 'keep_parentheses' specifies whether parentheses are replaced by
+ // epsilons or not during the expansion. 'opts' is the cache options
+ // used to instantiate the underlying ExpandFst.
+ PrunedExpand(const Fst<A> &ifst,
+ const vector<pair<Label, Label> > &parens,
+ bool keep_parentheses = false,
+ const CacheOptions &opts = CacheOptions())
+ : ifst_(ifst.Copy()),
+ keep_parentheses_(keep_parentheses),
+ stack_(parens),
+ efst_(ifst, parens,
+ ExpandFstOptions<Arc>(opts, true, &stack_, &state_table_)),
+ queue_(state_table_, stack_, stack_length_, distance_, fdistance_) {
+ Reverse(*ifst_, parens, &rfst_);
+ VectorFst<Arc> path;
+ reverse_shortest_path_ = new SP(
+ rfst_, parens,
+ PdtShortestPathOptions<A, FifoQueue<StateId> >(true, false));
+ reverse_shortest_path_->ShortestPath(&path);
+ balance_data_ = reverse_shortest_path_->GetBalanceData()->Reverse(
+ rfst_.NumStates(), 10, -1);
+
+ InitCloseParenMultimap(parens);
+ }
+
+ ~PrunedExpand() {
+ delete ifst_;
+ delete reverse_shortest_path_;
+ delete balance_data_;
+ }
+
+ // Expands and prunes with weight threshold 'threshold' the input PDT.
+ // Writes the result in 'ofst'.
+ void Expand(MutableFst<A> *ofst, const Weight &threshold);
+
+ private:
+ static const uint8 kEnqueued;
+ static const uint8 kExpanded;
+ static const uint8 kSourceState;
+
+ // Comparison functor used by the queue:
+ // 1. states corresponding to shortest stack first,
+ // 2. among stacks of the same length, reverse lexicographic order is used,
+ // 3. among states with the same stack, shortest-first order is used.
+ class StackCompare {
+ public:
+ StackCompare(const StateTable &st,
+ const Stack &s, const vector<StackId> &sl,
+ const vector<Weight> &d, const vector<Weight> &fd)
+ : state_table_(st), stack_(s), stack_length_(sl),
+ distance_(d), fdistance_(fd) {}
+
+ bool operator()(StateId s1, StateId s2) const {
+ StackId si1 = state_table_.Tuple(s1).stack_id;
+ StackId si2 = state_table_.Tuple(s2).stack_id;
+ if (stack_length_[si1] < stack_length_[si2])
+ return true;
+ if (stack_length_[si1] > stack_length_[si2])
+ return false;
+ // If stack id equal, use A*
+ if (si1 == si2) {
+ Weight w1 = (s1 < distance_.size()) && (s1 < fdistance_.size()) ?
+ Times(distance_[s1], fdistance_[s1]) : Weight::Zero();
+ Weight w2 = (s2 < distance_.size()) && (s2 < fdistance_.size()) ?
+ Times(distance_[s2], fdistance_[s2]) : Weight::Zero();
+ return less_(w1, w2);
+ }
+ // If lenghts are equal, use reverse lexico.
+ for (; si1 != si2; si1 = stack_.Pop(si1), si2 = stack_.Pop(si2)) {
+ if (stack_.Top(si1) < stack_.Top(si2)) return true;
+ if (stack_.Top(si1) > stack_.Top(si2)) return false;
+ }
+ return false;
+ }
+
+ private:
+ const StateTable &state_table_;
+ const Stack &stack_;
+ const vector<StackId> &stack_length_;
+ const vector<Weight> &distance_;
+ const vector<Weight> &fdistance_;
+ NaturalLess<Weight> less_;
+ };
+
+ class ShortestStackFirstQueue
+ : public ShortestFirstQueue<StateId, StackCompare> {
+ public:
+ ShortestStackFirstQueue(
+ const PdtStateTable<StateId, StackId> &st,
+ const Stack &s,
+ const vector<StackId> &sl,
+ const vector<Weight> &d, const vector<Weight> &fd)
+ : ShortestFirstQueue<StateId, StackCompare>(
+ StackCompare(st, s, sl, d, fd)) {}
+ };
+
+
+ void InitCloseParenMultimap(const vector<pair<Label, Label> > &parens);
+ Weight DistanceToDest(StateId state, StateId source) const;
+ uint8 Flags(StateId s) const;
+ void SetFlags(StateId s, uint8 flags, uint8 mask);
+ Weight Distance(StateId s) const;
+ void SetDistance(StateId s, Weight w);
+ Weight FinalDistance(StateId s) const;
+ void SetFinalDistance(StateId s, Weight w);
+ StateId SourceState(StateId s) const;
+ void SetSourceState(StateId s, StateId p);
+ void AddStateAndEnqueue(StateId s);
+ void Relax(StateId s, const A &arc, Weight w);
+ bool PruneArc(StateId s, const A &arc);
+ void ProcStart();
+ void ProcFinal(StateId s);
+ bool ProcNonParen(StateId s, const A &arc, bool add_arc);
+ bool ProcOpenParen(StateId s, const A &arc, StackId si, StackId nsi);
+ bool ProcCloseParen(StateId s, const A &arc);
+ void ProcDestStates(StateId s, StackId si);
+
+ Fst<A> *ifst_; // Input PDT
+ VectorFst<Arc> rfst_; // Reversed PDT
+ bool keep_parentheses_; // Keep parentheses in ofst?
+ StateTable state_table_; // State table for efst_
+ Stack stack_; // Stack trie
+ ExpandFst<Arc> efst_; // Expanded PDT
+ vector<StackId> stack_length_; // Length of stack for given stack id
+ vector<Weight> distance_; // Distance from initial state in efst_/ofst
+ vector<Weight> fdistance_; // Distance to final states in efst_/ofst
+ ShortestStackFirstQueue queue_; // Queue used to visit efst_
+ vector<uint8> flags_; // Status flags for states in efst_/ofst
+ vector<StateId> sources_; // PDT source state for each expanded state
+
+ typedef PdtShortestPath<Arc, FifoQueue<StateId> > SP;
+ typedef typename SP::CloseParenMultimap ParenMultimap;
+ SP *reverse_shortest_path_; // Shortest path for rfst_
+ PdtBalanceData<Arc> *balance_data_; // Not owned by shortest_path_
+ ParenMultimap close_paren_multimap_; // Maps open paren arcs to
+ // balancing close paren arcs.
+
+ MutableFst<Arc> *ofst_; // Output fst
+ Weight limit_; // Weight limit
+
+ typedef unordered_map<StateId, Weight> DestMap;
+ DestMap dest_map_;
+ StackId current_stack_id_;
+ // 'current_stack_id_' is the stack id of the states currently at the top
+ // of queue, i.e., the states currently being popped and processed.
+ // 'dest_map_' maps a state 's' in 'ifst_' that is the source
+ // of a close parentheses matching the top of 'current_stack_id_; to
+ // the shortest-distance from '(s, current_stack_id_)' to the final
+ // states in 'efst_'.
+ ssize_t current_paren_id_; // Paren id at top of current stack
+ ssize_t cached_stack_id_;
+ StateId cached_source_;
+ slist<pair<StateId, Weight> > cached_dest_list_;
+ // 'cached_dest_list_' contains the set of pair of destination
+ // states and weight to final states for source state
+ // 'cached_source_' and paren id 'cached_paren_id': the set of
+ // source state of a close parenthesis with paren id
+ // 'cached_paren_id' balancing an incoming open parenthesis with
+ // paren id 'cached_paren_id' in state 'cached_source_'.
+
+ NaturalLess<Weight> less_;
+};
+
+template <class A> const uint8 PrunedExpand<A>::kEnqueued = 0x01;
+template <class A> const uint8 PrunedExpand<A>::kExpanded = 0x02;
+template <class A> const uint8 PrunedExpand<A>::kSourceState = 0x04;
+
+
+// Initializes close paren multimap, mapping pairs (s,paren_id) to
+// all the arcs out of s labeled with close parenthese for paren_id.
+template <class A>
+void PrunedExpand<A>::InitCloseParenMultimap(
+ const vector<pair<Label, Label> > &parens) {
+ unordered_map<Label, Label> paren_id_map;
+ for (Label i = 0; i < parens.size(); ++i) {
+ const pair<Label, Label> &p = parens[i];
+ paren_id_map[p.first] = i;
+ paren_id_map[p.second] = i;
+ }
+
+ for (StateIterator<Fst<Arc> > siter(*ifst_); !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ for (ArcIterator<Fst<Arc> > aiter(*ifst_, s);
+ !aiter.Done(); aiter.Next()) {
+ const Arc &arc = aiter.Value();
+ typename unordered_map<Label, Label>::const_iterator pit
+ = paren_id_map.find(arc.ilabel);
+ if (pit == paren_id_map.end()) continue;
+ if (arc.ilabel == parens[pit->second].second) { // Close paren
+ ParenState<Arc> paren_state(pit->second, s);
+ close_paren_multimap_.insert(make_pair(paren_state, arc));
+ }
+ }
+ }
+}
+
+
+// Returns the weight of the shortest balanced path from 'source' to 'dest'
+// in 'ifst_', 'dest' must be the source state of a close paren arc.
+template <class A>
+typename A::Weight PrunedExpand<A>::DistanceToDest(StateId source,
+ StateId dest) const {
+ typename SP::SearchState s(source + 1, dest + 1);
+ VLOG(2) << "D(" << source << ", " << dest << ") ="
+ << reverse_shortest_path_->GetShortestPathData().Distance(s);
+ return reverse_shortest_path_->GetShortestPathData().Distance(s);
+}
+
+// Returns the flags for state 's' in 'ofst_'.
+template <class A>
+uint8 PrunedExpand<A>::Flags(StateId s) const {
+ return s < flags_.size() ? flags_[s] : 0;
+}
+
+// Modifies the flags for state 's' in 'ofst_'.
+template <class A>
+void PrunedExpand<A>::SetFlags(StateId s, uint8 flags, uint8 mask) {
+ while (flags_.size() <= s) flags_.push_back(0);
+ flags_[s] &= ~mask;
+ flags_[s] |= flags & mask;
+}
+
+
+// Returns the shortest distance from the initial state to 's' in 'ofst_'.
+template <class A>
+typename A::Weight PrunedExpand<A>::Distance(StateId s) const {
+ return s < distance_.size() ? distance_[s] : Weight::Zero();
+}
+
+// Sets the shortest distance from the initial state to 's' in 'ofst_' to 'w'.
+template <class A>
+void PrunedExpand<A>::SetDistance(StateId s, Weight w) {
+ while (distance_.size() <= s ) distance_.push_back(Weight::Zero());
+ distance_[s] = w;
+}
+
+
+// Returns the shortest distance from 's' to the final states in 'ofst_'.
+template <class A>
+typename A::Weight PrunedExpand<A>::FinalDistance(StateId s) const {
+ return s < fdistance_.size() ? fdistance_[s] : Weight::Zero();
+}
+
+// Sets the shortest distance from 's' to the final states in 'ofst_' to 'w'.
+template <class A>
+void PrunedExpand<A>::SetFinalDistance(StateId s, Weight w) {
+ while (fdistance_.size() <= s) fdistance_.push_back(Weight::Zero());
+ fdistance_[s] = w;
+}
+
+// Returns the PDT "source" state of state 's' in 'ofst_'.
+template <class A>
+typename A::StateId PrunedExpand<A>::SourceState(StateId s) const {
+ return s < sources_.size() ? sources_[s] : kNoStateId;
+}
+
+// Sets the PDT "source" state of state 's' in 'ofst_' to state 'p' in 'ifst_'.
+template <class A>
+void PrunedExpand<A>::SetSourceState(StateId s, StateId p) {
+ while (sources_.size() <= s) sources_.push_back(kNoStateId);
+ sources_[s] = p;
+}
+
+// Adds state 's' of 'efst_' to 'ofst_' and inserts it in the queue,
+// modifying the flags for 's' accordingly.
+template <class A>
+void PrunedExpand<A>::AddStateAndEnqueue(StateId s) {
+ if (!(Flags(s) & (kEnqueued | kExpanded))) {
+ while (ofst_->NumStates() <= s) ofst_->AddState();
+ queue_.Enqueue(s);
+ SetFlags(s, kEnqueued, kEnqueued);
+ } else if (Flags(s) & kEnqueued) {
+ queue_.Update(s);
+ }
+ // TODO(allauzen): Check everything is fine when kExpanded?
+}
+
+// Relaxes arc 'arc' out of state 's' in 'ofst_':
+// * if the distance to 's' times the weight of 'arc' is smaller than
+// the currently stored distance for 'arc.nextstate',
+// updates 'Distance(arc.nextstate)' with new estimate;
+// * if 'fd' is less than the currently stored distance from 'arc.nextstate'
+// to the final state, updates with new estimate.
+template <class A>
+void PrunedExpand<A>::Relax(StateId s, const A &arc, Weight fd) {
+ Weight nd = Times(Distance(s), arc.weight);
+ if (less_(nd, Distance(arc.nextstate))) {
+ SetDistance(arc.nextstate, nd);
+ SetSourceState(arc.nextstate, SourceState(s));
+ }
+ if (less_(fd, FinalDistance(arc.nextstate)))
+ SetFinalDistance(arc.nextstate, fd);
+ VLOG(2) << "Relax: " << s << ", d[s] = " << Distance(s) << ", to "
+ << arc.nextstate << ", d[ns] = " << Distance(arc.nextstate)
+ << ", nd = " << nd;
+}
+
+// Returns 'true' if the arc 'arc' out of state 's' in 'efst_' needs to
+// be pruned.
+template <class A>
+bool PrunedExpand<A>::PruneArc(StateId s, const A &arc) {
+ VLOG(2) << "Prune ?";
+ Weight fd = Weight::Zero();
+
+ if ((cached_source_ != SourceState(s)) ||
+ (cached_stack_id_ != current_stack_id_)) {
+ cached_source_ = SourceState(s);
+ cached_stack_id_ = current_stack_id_;
+ cached_dest_list_.clear();
+ if (cached_source_ != ifst_->Start()) {
+ for (SetIterator set_iter =
+ balance_data_->Find(current_paren_id_, cached_source_);
+ !set_iter.Done(); set_iter.Next()) {
+ StateId dest = set_iter.Element();
+ typename DestMap::const_iterator iter = dest_map_.find(dest);
+ cached_dest_list_.push_front(*iter);
+ }
+ } else {
+ // TODO(allauzen): queue discipline should prevent this never
+ // from happening; replace by a check.
+ cached_dest_list_.push_front(
+ make_pair(rfst_.Start() -1, Weight::One()));
+ }
+ }
+
+ for (typename slist<pair<StateId, Weight> >::const_iterator iter =
+ cached_dest_list_.begin();
+ iter != cached_dest_list_.end();
+ ++iter) {
+ fd = Plus(fd,
+ Times(DistanceToDest(state_table_.Tuple(arc.nextstate).state_id,
+ iter->first),
+ iter->second));
+ }
+ Relax(s, arc, fd);
+ Weight w = Times(Distance(s), Times(arc.weight, fd));
+ return less_(limit_, w);
+}
+
+// Adds start state of 'efst_' to 'ofst_', enqueues it and initializes
+// the distance data structures.
+template <class A>
+void PrunedExpand<A>::ProcStart() {
+ StateId s = efst_.Start();
+ AddStateAndEnqueue(s);
+ ofst_->SetStart(s);
+ SetSourceState(s, ifst_->Start());
+
+ current_stack_id_ = 0;
+ current_paren_id_ = -1;
+ stack_length_.push_back(0);
+ dest_map_[rfst_.Start() - 1] = Weight::One(); // not needed
+
+ cached_source_ = ifst_->Start();
+ cached_stack_id_ = 0;
+ cached_dest_list_.push_front(
+ make_pair(rfst_.Start() -1, Weight::One()));
+
+ PdtStateTuple<StateId, StackId> tuple(rfst_.Start() - 1, 0);
+ SetFinalDistance(state_table_.FindState(tuple), Weight::One());
+ SetDistance(s, Weight::One());
+ SetFinalDistance(s, DistanceToDest(ifst_->Start(), rfst_.Start() - 1));
+ VLOG(2) << DistanceToDest(ifst_->Start(), rfst_.Start() - 1);
+}
+
+// Makes 's' final in 'ofst_' if shortest accepting path ending in 's'
+// is below threshold.
+template <class A>
+void PrunedExpand<A>::ProcFinal(StateId s) {
+ Weight final = efst_.Final(s);
+ if ((final == Weight::Zero()) || less_(limit_, Times(Distance(s), final)))
+ return;
+ ofst_->SetFinal(s, final);
+}
+
+// Returns true when arc (or meta-arc) 'arc' out of 's' in 'efst_' is
+// below the threshold. When 'add_arc' is true, 'arc' is added to 'ofst_'.
+template <class A>
+bool PrunedExpand<A>::ProcNonParen(StateId s, const A &arc, bool add_arc) {
+ VLOG(2) << "ProcNonParen: " << s << " to " << arc.nextstate
+ << ", " << arc.ilabel << ":" << arc.olabel << " / " << arc.weight
+ << ", add_arc = " << (add_arc ? "true" : "false");
+ if (PruneArc(s, arc)) return false;
+ if(add_arc) ofst_->AddArc(s, arc);
+ AddStateAndEnqueue(arc.nextstate);
+ return true;
+}
+
+// Processes an open paren arc 'arc' out of state 's' in 'ofst_'.
+// When 'arc' is labeled with an open paren,
+// 1. considers each (shortest) balanced path starting in 's' by
+// taking 'arc' and ending by a close paren balancing the open
+// paren of 'arc' as a meta-arc, processes and prunes each meta-arc
+// as a non-paren arc, inserting its destination to the queue;
+// 2. if at least one of these meta-arcs has not been pruned,
+// adds the destination of 'arc' to 'ofst_' as a new source state
+// for the stack id 'nsi' and inserts it in the queue.
+template <class A>
+bool PrunedExpand<A>::ProcOpenParen(StateId s, const A &arc, StackId si,
+ StackId nsi) {
+ // Update the stack lenght when needed: |nsi| = |si| + 1.
+ while (stack_length_.size() <= nsi) stack_length_.push_back(-1);
+ if (stack_length_[nsi] == -1)
+ stack_length_[nsi] = stack_length_[si] + 1;
+
+ StateId ns = arc.nextstate;
+ VLOG(2) << "Open paren: " << s << "(" << state_table_.Tuple(s).state_id
+ << ") to " << ns << "(" << state_table_.Tuple(ns).state_id << ")";
+ bool proc_arc = false;
+ Weight fd = Weight::Zero();
+ ssize_t paren_id = stack_.ParenId(arc.ilabel);
+ slist<StateId> sources;
+ for (SetIterator set_iter =
+ balance_data_->Find(paren_id, state_table_.Tuple(ns).state_id);
+ !set_iter.Done(); set_iter.Next()) {
+ sources.push_front(set_iter.Element());
+ }
+ for (typename slist<StateId>::const_iterator sources_iter = sources.begin();
+ sources_iter != sources.end();
+ ++ sources_iter) {
+ StateId source = *sources_iter;
+ VLOG(2) << "Close paren source: " << source;
+ ParenState<Arc> paren_state(paren_id, source);
+ for (typename ParenMultimap::const_iterator iter =
+ close_paren_multimap_.find(paren_state);
+ iter != close_paren_multimap_.end() && paren_state == iter->first;
+ ++iter) {
+ Arc meta_arc = iter->second;
+ PdtStateTuple<StateId, StackId> tuple(meta_arc.nextstate, si);
+ meta_arc.nextstate = state_table_.FindState(tuple);
+ VLOG(2) << state_table_.Tuple(ns).state_id << ", " << source;
+ VLOG(2) << "Meta arc weight = " << arc.weight << " Times "
+ << DistanceToDest(state_table_.Tuple(ns).state_id, source)
+ << " Times " << meta_arc.weight;
+ meta_arc.weight = Times(
+ arc.weight,
+ Times(DistanceToDest(state_table_.Tuple(ns).state_id, source),
+ meta_arc.weight));
+ proc_arc |= ProcNonParen(s, meta_arc, false);
+ fd = Plus(fd, Times(
+ Times(
+ DistanceToDest(state_table_.Tuple(ns).state_id, source),
+ iter->second.weight),
+ FinalDistance(meta_arc.nextstate)));
+ }
+ }
+ if (proc_arc) {
+ VLOG(2) << "Proc open paren " << s << " to " << arc.nextstate;
+ ofst_->AddArc(
+ s, keep_parentheses_ ? arc : Arc(0, 0, arc.weight, arc.nextstate));
+ AddStateAndEnqueue(arc.nextstate);
+ Weight nd = Times(Distance(s), arc.weight);
+ if(less_(nd, Distance(arc.nextstate)))
+ SetDistance(arc.nextstate, nd);
+ // FinalDistance not necessary for source state since pruning
+ // decided using the meta-arcs above. But this is a problem with
+ // A*, hence:
+ if (less_(fd, FinalDistance(arc.nextstate)))
+ SetFinalDistance(arc.nextstate, fd);
+ SetFlags(arc.nextstate, kSourceState, kSourceState);
+ }
+ return proc_arc;
+}
+
+// Checks that shortest path through close paren arc in 'efst_' is
+// below threshold, if so adds it to 'ofst_'.
+template <class A>
+bool PrunedExpand<A>::ProcCloseParen(StateId s, const A &arc) {
+ Weight w = Times(Distance(s),
+ Times(arc.weight, FinalDistance(arc.nextstate)));
+ if (less_(limit_, w))
+ return false;
+ ofst_->AddArc(
+ s, keep_parentheses_ ? arc : Arc(0, 0, arc.weight, arc.nextstate));
+ return true;
+}
+
+// When 's' in 'ofst_' is a source state for stack id 'si', identifies
+// all the corresponding possible destination states, that is, all the
+// states in 'ifst_' that have an outgoing close paren arc balancing
+// the incoming open paren taken to get to 's', and for each such
+// state 't', computes the shortest distance from (t, si) to the final
+// states in 'ofst_'. Stores this information in 'dest_map_'.
+template <class A>
+void PrunedExpand<A>::ProcDestStates(StateId s, StackId si) {
+ if (!(Flags(s) & kSourceState)) return;
+ if (si != current_stack_id_) {
+ dest_map_.clear();
+ current_stack_id_ = si;
+ current_paren_id_ = stack_.Top(current_stack_id_);
+ VLOG(2) << "StackID " << si << " dequeued for first time";
+ }
+ // TODO(allauzen): clean up source state business; rename current function to
+ // ProcSourceState.
+ SetSourceState(s, state_table_.Tuple(s).state_id);
+
+ ssize_t paren_id = stack_.Top(si);
+ for (SetIterator set_iter =
+ balance_data_->Find(paren_id, state_table_.Tuple(s).state_id);
+ !set_iter.Done(); set_iter.Next()) {
+ StateId dest_state = set_iter.Element();
+ if (dest_map_.find(dest_state) != dest_map_.end())
+ continue;
+ Weight dest_weight = Weight::Zero();
+ ParenState<Arc> paren_state(paren_id, dest_state);
+ for (typename ParenMultimap::const_iterator iter =
+ close_paren_multimap_.find(paren_state);
+ iter != close_paren_multimap_.end() && paren_state == iter->first;
+ ++iter) {
+ const Arc &arc = iter->second;
+ PdtStateTuple<StateId, StackId> tuple(arc.nextstate, stack_.Pop(si));
+ dest_weight = Plus(dest_weight,
+ Times(arc.weight,
+ FinalDistance(state_table_.FindState(tuple))));
+ }
+ dest_map_[dest_state] = dest_weight;
+ VLOG(2) << "State " << dest_state << " is a dest state for stack id "
+ << si << " with weight " << dest_weight;
+ }
+}
+
+// Expands and prunes with weight threshold 'threshold' the input PDT.
+// Writes the result in 'ofst'.
+template <class A>
+void PrunedExpand<A>::Expand(
+ MutableFst<A> *ofst, const typename A::Weight &threshold) {
+ ofst_ = ofst;
+ ofst_->DeleteStates();
+ ofst_->SetInputSymbols(ifst_->InputSymbols());
+ ofst_->SetOutputSymbols(ifst_->OutputSymbols());
+
+ limit_ = Times(DistanceToDest(ifst_->Start(), rfst_.Start() - 1), threshold);
+ flags_.clear();
+
+ ProcStart();
+
+ while (!queue_.Empty()) {
+ StateId s = queue_.Head();
+ queue_.Dequeue();
+ SetFlags(s, kExpanded, kExpanded | kEnqueued);
+ VLOG(2) << s << " dequeued!";
+
+ ProcFinal(s);
+ StackId stack_id = state_table_.Tuple(s).stack_id;
+ ProcDestStates(s, stack_id);
+
+ for (ArcIterator<ExpandFst<Arc> > aiter(efst_, s);
+ !aiter.Done();
+ aiter.Next()) {
+ Arc arc = aiter.Value();
+ StackId nextstack_id = state_table_.Tuple(arc.nextstate).stack_id;
+ if (stack_id == nextstack_id)
+ ProcNonParen(s, arc, true);
+ else if (stack_id == stack_.Pop(nextstack_id))
+ ProcOpenParen(s, arc, stack_id, nextstack_id);
+ else
+ ProcCloseParen(s, arc);
+ }
+ VLOG(2) << "d[" << s << "] = " << Distance(s)
+ << ", fd[" << s << "] = " << FinalDistance(s);
+ }
+}
+
+//
+// Expand() Functions
+//
+
+template <class Arc>
+struct ExpandOptions {
+ bool connect;
+ bool keep_parentheses;
+ typename Arc::Weight weight_threshold;
+
+ ExpandOptions(bool c = true, bool k = false,
+ typename Arc::Weight w = Arc::Weight::Zero())
+ : connect(c), keep_parentheses(k), weight_threshold(w) {}
+};
+
+// Expands a pushdown transducer (PDT) encoded as an FST into an FST.
+// This version writes the expanded PDT result to a MutableFst.
+// In the PDT, some transitions are labeled with open or close
+// parentheses. To be interpreted as a PDT, the parens must balance on
+// a path. The open-close parenthesis label pairs are passed in
+// 'parens'. The expansion enforces the parenthesis constraints. The
+// PDT must be expandable as an FST.
+template <class Arc>
+void Expand(
+ const Fst<Arc> &ifst,
+ const vector<pair<typename Arc::Label, typename Arc::Label> > &parens,
+ MutableFst<Arc> *ofst,
+ const ExpandOptions<Arc> &opts) {
+ typedef typename Arc::Label Label;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef typename ExpandFst<Arc>::StackId StackId;
+
+ ExpandFstOptions<Arc> eopts;
+ eopts.gc_limit = 0;
+ if (opts.weight_threshold == Weight::Zero()) {
+ eopts.keep_parentheses = opts.keep_parentheses;
+ *ofst = ExpandFst<Arc>(ifst, parens, eopts);
+ } else {
+ PrunedExpand<Arc> pruned_expand(ifst, parens, opts.keep_parentheses);
+ pruned_expand.Expand(ofst, opts.weight_threshold);
+ }
+
+ if (opts.connect)
+ Connect(ofst);
+}
+
+// Expands a pushdown transducer (PDT) encoded as an FST into an FST.
+// This version writes the expanded PDT result to a MutableFst.
+// In the PDT, some transitions are labeled with open or close
+// parentheses. To be interpreted as a PDT, the parens must balance on
+// a path. The open-close parenthesis label pairs are passed in
+// 'parens'. The expansion enforces the parenthesis constraints. The
+// PDT must be expandable as an FST.
+template<class Arc>
+void Expand(
+ const Fst<Arc> &ifst,
+ const vector<pair<typename Arc::Label, typename Arc::Label> > &parens,
+ MutableFst<Arc> *ofst,
+ bool connect = true, bool keep_parentheses = false) {
+ Expand(ifst, parens, ofst, ExpandOptions<Arc>(connect, keep_parentheses));
+}
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_PDT_EXPAND_H__
diff --git a/src/include/fst/extensions/pdt/info.h b/src/include/fst/extensions/pdt/info.h
new file mode 100644
index 0000000..ef9a860
--- /dev/null
+++ b/src/include/fst/extensions/pdt/info.h
@@ -0,0 +1,175 @@
+// info.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Prints information about a PDT.
+
+#ifndef FST_EXTENSIONS_PDT_INFO_H__
+#define FST_EXTENSIONS_PDT_INFO_H__
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <tr1/unordered_set>
+using std::tr1::unordered_set;
+using std::tr1::unordered_multiset;
+#include <vector>
+using std::vector;
+
+#include <fst/fst.h>
+#include <fst/extensions/pdt/pdt.h>
+
+namespace fst {
+
+// Compute various information about PDTs, helper class for pdtinfo.cc.
+template <class A> class PdtInfo {
+public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+
+ PdtInfo(const Fst<A> &fst,
+ const vector<pair<typename A::Label,
+ typename A::Label> > &parens);
+
+ const string& FstType() const { return fst_type_; }
+ const string& ArcType() const { return A::Type(); }
+
+ int64 NumStates() const { return nstates_; }
+ int64 NumArcs() const { return narcs_; }
+ int64 NumOpenParens() const { return nopen_parens_; }
+ int64 NumCloseParens() const { return nclose_parens_; }
+ int64 NumUniqueOpenParens() const { return nuniq_open_parens_; }
+ int64 NumUniqueCloseParens() const { return nuniq_close_parens_; }
+ int64 NumOpenParenStates() const { return nopen_paren_states_; }
+ int64 NumCloseParenStates() const { return nclose_paren_states_; }
+
+ private:
+ string fst_type_;
+ int64 nstates_;
+ int64 narcs_;
+ int64 nopen_parens_;
+ int64 nclose_parens_;
+ int64 nuniq_open_parens_;
+ int64 nuniq_close_parens_;
+ int64 nopen_paren_states_;
+ int64 nclose_paren_states_;
+
+ DISALLOW_COPY_AND_ASSIGN(PdtInfo);
+};
+
+template <class A>
+PdtInfo<A>::PdtInfo(const Fst<A> &fst,
+ const vector<pair<typename A::Label,
+ typename A::Label> > &parens)
+ : fst_type_(fst.Type()),
+ nstates_(0),
+ narcs_(0),
+ nopen_parens_(0),
+ nclose_parens_(0),
+ nuniq_open_parens_(0),
+ nuniq_close_parens_(0),
+ nopen_paren_states_(0),
+ nclose_paren_states_(0) {
+ unordered_map<Label, size_t> paren_map;
+ unordered_set<Label> paren_set;
+ unordered_set<StateId> open_paren_state_set;
+ unordered_set<StateId> close_paren_state_set;
+
+ for (size_t i = 0; i < parens.size(); ++i) {
+ const pair<Label, Label> &p = parens[i];
+ paren_map[p.first] = i;
+ paren_map[p.second] = i;
+ }
+
+ for (StateIterator< Fst<A> > siter(fst);
+ !siter.Done();
+ siter.Next()) {
+ ++nstates_;
+ StateId s = siter.Value();
+ for (ArcIterator< Fst<A> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const A &arc = aiter.Value();
+ ++narcs_;
+ typename unordered_map<Label, size_t>::const_iterator pit
+ = paren_map.find(arc.ilabel);
+ if (pit != paren_map.end()) {
+ Label open_paren = parens[pit->second].first;
+ Label close_paren = parens[pit->second].second;
+ if (arc.ilabel == open_paren) {
+ ++nopen_parens_;
+ if (!paren_set.count(open_paren)) {
+ ++nuniq_open_parens_;
+ paren_set.insert(open_paren);
+ }
+ if (!open_paren_state_set.count(arc.nextstate)) {
+ ++nopen_paren_states_;
+ open_paren_state_set.insert(arc.nextstate);
+ }
+ } else {
+ ++nclose_parens_;
+ if (!paren_set.count(close_paren)) {
+ ++nuniq_close_parens_;
+ paren_set.insert(close_paren);
+ }
+ if (!close_paren_state_set.count(s)) {
+ ++nclose_paren_states_;
+ close_paren_state_set.insert(s);
+ }
+
+ }
+ }
+ }
+ }
+}
+
+
+template <class A>
+void PrintPdtInfo(const PdtInfo<A> &pdtinfo) {
+ ios_base::fmtflags old = cout.setf(ios::left);
+ cout.width(50);
+ cout << "fst type" << pdtinfo.FstType().c_str() << endl;
+ cout.width(50);
+ cout << "arc type" << pdtinfo.ArcType().c_str() << endl;
+ cout.width(50);
+ cout << "# of states" << pdtinfo.NumStates() << endl;
+ cout.width(50);
+ cout << "# of arcs" << pdtinfo.NumArcs() << endl;
+ cout.width(50);
+ cout << "# of open parentheses" << pdtinfo.NumOpenParens() << endl;
+ cout.width(50);
+ cout << "# of close parentheses" << pdtinfo.NumCloseParens() << endl;
+ cout.width(50);
+ cout << "# of unique open parentheses"
+ << pdtinfo.NumUniqueOpenParens() << endl;
+ cout.width(50);
+ cout << "# of unique close parentheses"
+ << pdtinfo.NumUniqueCloseParens() << endl;
+ cout.width(50);
+ cout << "# of open parenthesis dest. states"
+ << pdtinfo.NumOpenParenStates() << endl;
+ cout.width(50);
+ cout << "# of close parenthesis source states"
+ << pdtinfo.NumCloseParenStates() << endl;
+ cout.setf(old);
+}
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_PDT_INFO_H__
diff --git a/src/include/fst/extensions/pdt/paren.h b/src/include/fst/extensions/pdt/paren.h
new file mode 100644
index 0000000..7b9887f
--- /dev/null
+++ b/src/include/fst/extensions/pdt/paren.h
@@ -0,0 +1,496 @@
+// paren.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// Common classes for PDT parentheses
+
+// \file
+
+#ifndef FST_EXTENSIONS_PDT_PAREN_H_
+#define FST_EXTENSIONS_PDT_PAREN_H_
+
+#include <algorithm>
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <tr1/unordered_set>
+using std::tr1::unordered_set;
+using std::tr1::unordered_multiset;
+#include <set>
+
+#include <fst/extensions/pdt/pdt.h>
+#include <fst/extensions/pdt/collection.h>
+#include <fst/fst.h>
+#include <fst/dfs-visit.h>
+
+
+namespace fst {
+
+//
+// ParenState: Pair of an open (close) parenthesis and
+// its destination (source) state.
+//
+
+template <class A>
+class ParenState {
+ public:
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+
+ struct Hash {
+ size_t operator()(const ParenState<A> &p) const {
+ return p.paren_id + p.state_id * kPrime;
+ }
+ };
+
+ Label paren_id; // ID of open (close) paren
+ StateId state_id; // destination (source) state of open (close) paren
+
+ ParenState() : paren_id(kNoLabel), state_id(kNoStateId) {}
+
+ ParenState(Label p, StateId s) : paren_id(p), state_id(s) {}
+
+ bool operator==(const ParenState<A> &p) const {
+ if (&p == this)
+ return true;
+ return p.paren_id == this->paren_id && p.state_id == this->state_id;
+ }
+
+ bool operator!=(const ParenState<A> &p) const { return !(p == *this); }
+
+ bool operator<(const ParenState<A> &p) const {
+ return paren_id < this->paren.id ||
+ (p.paren_id == this->paren.id && p.state_id < this->state_id);
+ }
+
+ private:
+ static const size_t kPrime;
+};
+
+template <class A>
+const size_t ParenState<A>::kPrime = 7853;
+
+
+// Creates an FST-style iterator from STL map and iterator.
+template <class M>
+class MapIterator {
+ public:
+ typedef typename M::const_iterator StlIterator;
+ typedef typename M::value_type PairType;
+ typedef typename PairType::second_type ValueType;
+
+ MapIterator(const M &m, StlIterator iter)
+ : map_(m), begin_(iter), iter_(iter) {}
+
+ bool Done() const {
+ return iter_ == map_.end() || iter_->first != begin_->first;
+ }
+
+ ValueType Value() const { return iter_->second; }
+ void Next() { ++iter_; }
+ void Reset() { iter_ = begin_; }
+
+ private:
+ const M &map_;
+ StlIterator begin_;
+ StlIterator iter_;
+};
+
+//
+// PdtParenReachable: Provides various parenthesis reachability information
+// on a PDT.
+//
+
+template <class A>
+class PdtParenReachable {
+ public:
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ public:
+ // Maps from state ID to reachable paren IDs from (to) that state.
+ typedef unordered_multimap<StateId, Label> ParenMultiMap;
+
+ // Maps from paren ID and state ID to reachable state set ID
+ typedef unordered_map<ParenState<A>, ssize_t,
+ typename ParenState<A>::Hash> StateSetMap;
+
+ // Maps from paren ID and state ID to arcs exiting that state with that
+ // Label.
+ typedef unordered_multimap<ParenState<A>, A,
+ typename ParenState<A>::Hash> ParenArcMultiMap;
+
+ typedef MapIterator<ParenMultiMap> ParenIterator;
+
+ typedef MapIterator<ParenArcMultiMap> ParenArcIterator;
+
+ typedef typename Collection<ssize_t, StateId>::SetIterator SetIterator;
+
+ // Computes close (open) parenthesis reachabilty information for
+ // a PDT with bounded stack.
+ PdtParenReachable(const Fst<A> &fst,
+ const vector<pair<Label, Label> > &parens, bool close)
+ : fst_(fst),
+ parens_(parens),
+ close_(close) {
+ for (Label i = 0; i < parens.size(); ++i) {
+ const pair<Label, Label> &p = parens[i];
+ paren_id_map_[p.first] = i;
+ paren_id_map_[p.second] = i;
+ }
+
+ if (close_) {
+ StateId start = fst.Start();
+ if (start == kNoStateId)
+ return;
+ DFSearch(start, start);
+ } else {
+ FSTERROR() << "PdtParenReachable: open paren info not implemented";
+ }
+ }
+
+ // Given a state ID, returns an iterator over paren IDs
+ // for close (open) parens reachable from that state along balanced
+ // paths.
+ ParenIterator FindParens(StateId s) const {
+ return ParenIterator(paren_multimap_, paren_multimap_.find(s));
+ }
+
+ // Given a paren ID and a state ID s, returns an iterator over
+ // states that can be reached along balanced paths from (to) s that
+ // have have close (open) parentheses matching the paren ID exiting
+ // (entering) those states.
+ SetIterator FindStates(Label paren_id, StateId s) const {
+ ParenState<A> paren_state(paren_id, s);
+ typename StateSetMap::const_iterator id_it = set_map_.find(paren_state);
+ if (id_it == set_map_.end()) {
+ return state_sets_.FindSet(-1);
+ } else {
+ return state_sets_.FindSet(id_it->second);
+ }
+ }
+
+ // Given a paren Id and a state ID s, return an iterator over
+ // arcs that exit (enter) s and are labeled with a close (open)
+ // parenthesis matching the paren ID.
+ ParenArcIterator FindParenArcs(Label paren_id, StateId s) const {
+ ParenState<A> paren_state(paren_id, s);
+ return ParenArcIterator(paren_arc_multimap_,
+ paren_arc_multimap_.find(paren_state));
+ }
+
+ private:
+ // DFS that gathers paren and state set information.
+ // Bool returns false when cycle detected.
+ bool DFSearch(StateId s, StateId start);
+
+ // Unions state sets together gathered by the DFS.
+ void ComputeStateSet(StateId s);
+
+ // Gather state set(s) from state 'nexts'.
+ void UpdateStateSet(StateId nexts, set<Label> *paren_set,
+ vector< set<StateId> > *state_sets) const;
+
+ const Fst<A> &fst_;
+ const vector<pair<Label, Label> > &parens_; // Paren ID -> Labels
+ bool close_; // Close/open paren info?
+ unordered_map<Label, Label> paren_id_map_; // Paren labels -> ID
+ ParenMultiMap paren_multimap_; // Paren reachability
+ ParenArcMultiMap paren_arc_multimap_; // Paren Arcs
+ vector<char> state_color_; // DFS state
+ mutable Collection<ssize_t, StateId> state_sets_; // Reachable states -> ID
+ StateSetMap set_map_; // ID -> Reachable states
+ DISALLOW_COPY_AND_ASSIGN(PdtParenReachable);
+};
+
+// DFS that gathers paren and state set information.
+template <class A>
+bool PdtParenReachable<A>::DFSearch(StateId s, StateId start) {
+ if (s >= state_color_.size())
+ state_color_.resize(s + 1, kDfsWhite);
+
+ if (state_color_[s] == kDfsBlack)
+ return true;
+
+ if (state_color_[s] == kDfsGrey)
+ return false;
+
+ state_color_[s] = kDfsGrey;
+
+ for (ArcIterator<Fst<A> > aiter(fst_, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const A &arc = aiter.Value();
+
+ typename unordered_map<Label, Label>::const_iterator pit
+ = paren_id_map_.find(arc.ilabel);
+ if (pit != paren_id_map_.end()) { // paren?
+ Label paren_id = pit->second;
+ if (arc.ilabel == parens_[paren_id].first) { // open paren
+ DFSearch(arc.nextstate, arc.nextstate);
+ for (SetIterator set_iter = FindStates(paren_id, arc.nextstate);
+ !set_iter.Done(); set_iter.Next()) {
+ for (ParenArcIterator paren_arc_iter =
+ FindParenArcs(paren_id, set_iter.Element());
+ !paren_arc_iter.Done();
+ paren_arc_iter.Next()) {
+ const A &cparc = paren_arc_iter.Value();
+ DFSearch(cparc.nextstate, start);
+ }
+ }
+ }
+ } else { // non-paren
+ if(!DFSearch(arc.nextstate, start)) {
+ FSTERROR() << "PdtReachable: Underlying cyclicity not supported";
+ return true;
+ }
+ }
+ }
+ ComputeStateSet(s);
+ state_color_[s] = kDfsBlack;
+ return true;
+}
+
+// Unions state sets together gathered by the DFS.
+template <class A>
+void PdtParenReachable<A>::ComputeStateSet(StateId s) {
+ set<Label> paren_set;
+ vector< set<StateId> > state_sets(parens_.size());
+ for (ArcIterator< Fst<A> > aiter(fst_, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const A &arc = aiter.Value();
+
+ typename unordered_map<Label, Label>::const_iterator pit
+ = paren_id_map_.find(arc.ilabel);
+ if (pit != paren_id_map_.end()) { // paren?
+ Label paren_id = pit->second;
+ if (arc.ilabel == parens_[paren_id].first) { // open paren
+ for (SetIterator set_iter =
+ FindStates(paren_id, arc.nextstate);
+ !set_iter.Done(); set_iter.Next()) {
+ for (ParenArcIterator paren_arc_iter =
+ FindParenArcs(paren_id, set_iter.Element());
+ !paren_arc_iter.Done();
+ paren_arc_iter.Next()) {
+ const A &cparc = paren_arc_iter.Value();
+ UpdateStateSet(cparc.nextstate, &paren_set, &state_sets);
+ }
+ }
+ } else { // close paren
+ paren_set.insert(paren_id);
+ state_sets[paren_id].insert(s);
+ ParenState<A> paren_state(paren_id, s);
+ paren_arc_multimap_.insert(make_pair(paren_state, arc));
+ }
+ } else { // non-paren
+ UpdateStateSet(arc.nextstate, &paren_set, &state_sets);
+ }
+ }
+
+ vector<StateId> state_set;
+ for (typename set<Label>::iterator paren_iter = paren_set.begin();
+ paren_iter != paren_set.end(); ++paren_iter) {
+ state_set.clear();
+ Label paren_id = *paren_iter;
+ paren_multimap_.insert(make_pair(s, paren_id));
+ for (typename set<StateId>::iterator state_iter
+ = state_sets[paren_id].begin();
+ state_iter != state_sets[paren_id].end();
+ ++state_iter) {
+ state_set.push_back(*state_iter);
+ }
+ ParenState<A> paren_state(paren_id, s);
+ set_map_[paren_state] = state_sets_.FindId(state_set);
+ }
+}
+
+// Gather state set(s) from state 'nexts'.
+template <class A>
+void PdtParenReachable<A>::UpdateStateSet(
+ StateId nexts, set<Label> *paren_set,
+ vector< set<StateId> > *state_sets) const {
+ for(ParenIterator paren_iter = FindParens(nexts);
+ !paren_iter.Done(); paren_iter.Next()) {
+ Label paren_id = paren_iter.Value();
+ paren_set->insert(paren_id);
+ for (SetIterator set_iter = FindStates(paren_id, nexts);
+ !set_iter.Done(); set_iter.Next()) {
+ (*state_sets)[paren_id].insert(set_iter.Element());
+ }
+ }
+}
+
+
+// Store balancing parenthesis data for a PDT. Allows on-the-fly
+// construction (e.g. in PdtShortestPath) unlike PdtParenReachable above.
+template <class A>
+class PdtBalanceData {
+ public:
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+
+ // Hash set for open parens
+ typedef unordered_set<ParenState<A>, typename ParenState<A>::Hash> OpenParenSet;
+
+ // Maps from open paren destination state to parenthesis ID.
+ typedef unordered_multimap<StateId, Label> OpenParenMap;
+
+ // Maps from open paren state to source states of matching close parens
+ typedef unordered_multimap<ParenState<A>, StateId,
+ typename ParenState<A>::Hash> CloseParenMap;
+
+ // Maps from open paren state to close source set ID
+ typedef unordered_map<ParenState<A>, ssize_t,
+ typename ParenState<A>::Hash> CloseSourceMap;
+
+ typedef typename Collection<ssize_t, StateId>::SetIterator SetIterator;
+
+ PdtBalanceData() {}
+
+ void Clear() {
+ open_paren_map_.clear();
+ close_paren_map_.clear();
+ }
+
+ // Adds an open parenthesis with destination state 'open_dest'.
+ void OpenInsert(Label paren_id, StateId open_dest) {
+ ParenState<A> key(paren_id, open_dest);
+ if (!open_paren_set_.count(key)) {
+ open_paren_set_.insert(key);
+ open_paren_map_.insert(make_pair(open_dest, paren_id));
+ }
+ }
+
+ // Adds a matching closing parenthesis with source state
+ // 'close_source' that balances an open_parenthesis with destination
+ // state 'open_dest' if OpenInsert() previously called
+ // (o.w. CloseInsert() does nothing).
+ void CloseInsert(Label paren_id, StateId open_dest, StateId close_source) {
+ ParenState<A> key(paren_id, open_dest);
+ if (open_paren_set_.count(key))
+ close_paren_map_.insert(make_pair(key, close_source));
+ }
+
+ // Find close paren source states matching an open parenthesis.
+ // Methods that follow, iterate through those matching states.
+ // Should be called only after FinishInsert(open_dest).
+ SetIterator Find(Label paren_id, StateId open_dest) {
+ ParenState<A> close_key(paren_id, open_dest);
+ typename CloseSourceMap::const_iterator id_it =
+ close_source_map_.find(close_key);
+ if (id_it == close_source_map_.end()) {
+ return close_source_sets_.FindSet(-1);
+ } else {
+ return close_source_sets_.FindSet(id_it->second);
+ }
+ }
+
+ // Call when all open and close parenthesis insertions wrt open
+ // parentheses entering 'open_dest' are finished. Must be called
+ // before Find(open_dest). Stores close paren source state sets
+ // efficiently.
+ void FinishInsert(StateId open_dest) {
+ vector<StateId> close_sources;
+ for (typename OpenParenMap::iterator oit = open_paren_map_.find(open_dest);
+ oit != open_paren_map_.end() && oit->first == open_dest;) {
+ Label paren_id = oit->second;
+ close_sources.clear();
+ ParenState<A> okey(paren_id, open_dest);
+ open_paren_set_.erase(open_paren_set_.find(okey));
+ for (typename CloseParenMap::iterator cit = close_paren_map_.find(okey);
+ cit != close_paren_map_.end() && cit->first == okey;) {
+ close_sources.push_back(cit->second);
+ close_paren_map_.erase(cit++);
+ }
+ sort(close_sources.begin(), close_sources.end());
+ typename vector<StateId>::iterator unique_end =
+ unique(close_sources.begin(), close_sources.end());
+ close_sources.resize(unique_end - close_sources.begin());
+
+ if (!close_sources.empty())
+ close_source_map_[okey] = close_source_sets_.FindId(close_sources);
+ open_paren_map_.erase(oit++);
+ }
+ }
+
+ // Return a new balance data object representing the reversed balance
+ // information.
+ PdtBalanceData<A> *Reverse(StateId num_states,
+ StateId num_split,
+ StateId state_id_shift) const;
+
+ private:
+ OpenParenSet open_paren_set_; // open par. at dest?
+
+ OpenParenMap open_paren_map_; // open parens per state
+ ParenState<A> open_dest_; // cur open dest. state
+ typename OpenParenMap::const_iterator open_iter_; // cur open parens/state
+
+ CloseParenMap close_paren_map_; // close states/open
+ // paren and state
+
+ CloseSourceMap close_source_map_; // paren, state to set ID
+ mutable Collection<ssize_t, StateId> close_source_sets_;
+};
+
+// Return a new balance data object representing the reversed balance
+// information.
+template <class A>
+PdtBalanceData<A> *PdtBalanceData<A>::Reverse(
+ StateId num_states,
+ StateId num_split,
+ StateId state_id_shift) const {
+ PdtBalanceData<A> *bd = new PdtBalanceData<A>;
+ unordered_set<StateId> close_sources;
+ StateId split_size = num_states / num_split;
+
+ for (StateId i = 0; i < num_states; i+= split_size) {
+ close_sources.clear();
+
+ for (typename CloseSourceMap::const_iterator
+ sit = close_source_map_.begin();
+ sit != close_source_map_.end();
+ ++sit) {
+ ParenState<A> okey = sit->first;
+ StateId open_dest = okey.state_id;
+ Label paren_id = okey.paren_id;
+ for (SetIterator set_iter = close_source_sets_.FindSet(sit->second);
+ !set_iter.Done(); set_iter.Next()) {
+ StateId close_source = set_iter.Element();
+ if ((close_source < i) || (close_source >= i + split_size))
+ continue;
+ close_sources.insert(close_source + state_id_shift);
+ bd->OpenInsert(paren_id, close_source + state_id_shift);
+ bd->CloseInsert(paren_id, close_source + state_id_shift,
+ open_dest + state_id_shift);
+ }
+ }
+
+ for (typename unordered_set<StateId>::const_iterator it
+ = close_sources.begin();
+ it != close_sources.end();
+ ++it) {
+ bd->FinishInsert(*it);
+ }
+
+ }
+ return bd;
+}
+
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_PDT_PAREN_H_
diff --git a/src/include/fst/extensions/pdt/pdt.h b/src/include/fst/extensions/pdt/pdt.h
new file mode 100644
index 0000000..171541f
--- /dev/null
+++ b/src/include/fst/extensions/pdt/pdt.h
@@ -0,0 +1,212 @@
+// pdt.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Common classes for PDT expansion/traversal.
+
+#ifndef FST_EXTENSIONS_PDT_PDT_H__
+#define FST_EXTENSIONS_PDT_PDT_H__
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <map>
+#include <set>
+
+#include <fst/state-table.h>
+#include <fst/fst.h>
+
+namespace fst {
+
+// Provides bijection between parenthesis stacks and signed integral
+// stack IDs. Each stack ID is unique to each distinct stack. The
+// open-close parenthesis label pairs are passed in 'parens'.
+template <typename K, typename L>
+class PdtStack {
+ public:
+ typedef K StackId;
+ typedef L Label;
+
+ // The stacks are stored in a tree. The nodes are stored in vector
+ // 'nodes_'. Each node represents the top of some stack and is
+ // ID'ed by its position in the vector. Its parent node represents
+ // the stack with the top 'popped' and its children are stored in
+ // 'child_map_' accessed by stack_id and label. The paren_id is
+ // the position in 'parens' of the parenthesis for that node.
+ struct StackNode {
+ StackId parent_id;
+ size_t paren_id;
+
+ StackNode(StackId p, size_t i) : parent_id(p), paren_id(i) {}
+ };
+
+ PdtStack(const vector<pair<Label, Label> > &parens)
+ : parens_(parens), min_paren_(kNoLabel), max_paren_(kNoLabel) {
+ for (size_t i = 0; i < parens.size(); ++i) {
+ const pair<Label, Label> &p = parens[i];
+ paren_map_[p.first] = i;
+ paren_map_[p.second] = i;
+
+ if (min_paren_ == kNoLabel || p.first < min_paren_)
+ min_paren_ = p.first;
+ if (p.second < min_paren_)
+ min_paren_ = p.second;
+
+ if (max_paren_ == kNoLabel || p.first > max_paren_)
+ max_paren_ = p.first;
+ if (p.second > max_paren_)
+ max_paren_ = p.second;
+ }
+ nodes_.push_back(StackNode(-1, -1)); // Tree root.
+ }
+
+ // Returns stack ID given the current stack ID (0 if empty) and
+ // label read. 'Pushes' onto a stack if the label is an open
+ // parenthesis, returning the new stack ID. 'Pops' the stack if the
+ // label is a close parenthesis that matches the top of the stack,
+ // returning the parent stack ID. Returns -1 if label is an
+ // unmatched close parenthesis. Otherwise, returns the current stack
+ // ID.
+ StackId Find(StackId stack_id, Label label) {
+ if (min_paren_ == kNoLabel || label < min_paren_ || label > max_paren_)
+ return stack_id; // Non-paren.
+
+ typename unordered_map<Label, size_t>::const_iterator pit
+ = paren_map_.find(label);
+ if (pit == paren_map_.end()) // Non-paren.
+ return stack_id;
+ ssize_t paren_id = pit->second;
+
+ if (label == parens_[paren_id].first) { // Open paren.
+ StackId &child_id = child_map_[make_pair(stack_id, label)];
+ if (child_id == 0) { // Child not found, push label.
+ child_id = nodes_.size();
+ nodes_.push_back(StackNode(stack_id, paren_id));
+ }
+ return child_id;
+ }
+
+ const StackNode &node = nodes_[stack_id];
+ if (paren_id == node.paren_id) // Matching close paren.
+ return node.parent_id;
+
+ return -1; // Non-matching close paren.
+ }
+
+ // Returns the stack ID obtained by "popping" the label at the top
+ // of the current stack ID.
+ StackId Pop(StackId stack_id) const {
+ return nodes_[stack_id].parent_id;
+ }
+
+ // Returns the paren ID at the top of the stack for 'stack_id'
+ ssize_t Top(StackId stack_id) const {
+ return nodes_[stack_id].paren_id;
+ }
+
+ ssize_t ParenId(Label label) const {
+ typename unordered_map<Label, size_t>::const_iterator pit
+ = paren_map_.find(label);
+ if (pit == paren_map_.end()) // Non-paren.
+ return -1;
+ return pit->second;
+ }
+
+ private:
+ struct ChildHash {
+ size_t operator()(const pair<StackId, Label> &p) const {
+ return p.first + p.second * kPrime;
+ }
+ };
+
+ static const size_t kPrime;
+
+ vector<pair<Label, Label> > parens_;
+ vector<StackNode> nodes_;
+ unordered_map<Label, size_t> paren_map_;
+ unordered_map<pair<StackId, Label>,
+ StackId, ChildHash> child_map_; // Child of stack node wrt label
+ Label min_paren_; // For faster paren. check
+ Label max_paren_; // For faster paren. check
+};
+
+template <typename T, typename L>
+const size_t PdtStack<T, L>::kPrime = 7853;
+
+
+// State tuple for PDT expansion
+template <typename S, typename K>
+struct PdtStateTuple {
+ typedef S StateId;
+ typedef K StackId;
+
+ StateId state_id;
+ StackId stack_id;
+
+ PdtStateTuple()
+ : state_id(kNoStateId), stack_id(-1) {}
+
+ PdtStateTuple(StateId fs, StackId ss)
+ : state_id(fs), stack_id(ss) {}
+};
+
+// Equality of PDT state tuples.
+template <typename S, typename K>
+inline bool operator==(const PdtStateTuple<S, K>& x,
+ const PdtStateTuple<S, K>& y) {
+ if (&x == &y)
+ return true;
+ return x.state_id == y.state_id && x.stack_id == y.stack_id;
+}
+
+
+// Hash function object for PDT state tuples
+template <class T>
+class PdtStateHash {
+ public:
+ size_t operator()(const T &tuple) const {
+ return tuple.state_id + tuple.stack_id * kPrime;
+ }
+
+ private:
+ static const size_t kPrime;
+};
+
+template <typename T>
+const size_t PdtStateHash<T>::kPrime = 7853;
+
+
+// Tuple to PDT state bijection.
+template <class S, class K>
+class PdtStateTable
+ : public CompactHashStateTable<PdtStateTuple<S, K>,
+ PdtStateHash<PdtStateTuple<S, K> > > {
+ public:
+ typedef S StateId;
+ typedef K StackId;
+
+ PdtStateTable() {}
+
+ PdtStateTable(const PdtStateTable<S, K> &table) {}
+
+ private:
+ void operator=(const PdtStateTable<S, K> &table); // disallow
+};
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_PDT_PDT_H__
diff --git a/src/include/fst/extensions/pdt/pdtlib.h b/src/include/fst/extensions/pdt/pdtlib.h
new file mode 100644
index 0000000..71c8123
--- /dev/null
+++ b/src/include/fst/extensions/pdt/pdtlib.h
@@ -0,0 +1,30 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+// This is an experimental push-down transducer(PDT) library. A PDT is
+// encoded as an FST, where some transitions are labeled with open or close
+// parentheses. To be interpreted as a PDT, the parentheses must balance on a
+// path.
+
+#ifndef FST_EXTENSIONS_PDT_PDTLIB_H_
+#define FST_EXTENSIONS_PDT_PDTLIB_H_
+
+#include <fst/extensions/pdt/pdt.h>
+#include <fst/extensions/pdt/compose.h>
+#include <fst/extensions/pdt/expand.h>
+#include <fst/extensions/pdt/replace.h>
+
+#endif // FST_EXTENSIONS_PDT_PDTLIB_H_
diff --git a/src/include/fst/extensions/pdt/pdtscript.h b/src/include/fst/extensions/pdt/pdtscript.h
new file mode 100644
index 0000000..c2a1cf4
--- /dev/null
+++ b/src/include/fst/extensions/pdt/pdtscript.h
@@ -0,0 +1,284 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+// Convenience file for including all PDT operations at once, and/or
+// registering them for new arc types.
+
+#ifndef FST_EXTENSIONS_PDT_PDTSCRIPT_H_
+#define FST_EXTENSIONS_PDT_PDTSCRIPT_H_
+
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+#include <fst/compose.h> // for ComposeOptions
+#include <fst/util.h>
+
+#include <fst/script/fst-class.h>
+#include <fst/script/arg-packs.h>
+#include <fst/script/shortest-path.h>
+
+#include <fst/extensions/pdt/compose.h>
+#include <fst/extensions/pdt/expand.h>
+#include <fst/extensions/pdt/info.h>
+#include <fst/extensions/pdt/replace.h>
+#include <fst/extensions/pdt/reverse.h>
+#include <fst/extensions/pdt/shortest-path.h>
+
+
+namespace fst {
+namespace script {
+
+// PDT COMPOSE
+
+typedef args::Package<const FstClass &,
+ const FstClass &,
+ const vector<pair<int64, int64> >&,
+ MutableFstClass *,
+ const ComposeOptions &,
+ bool> PdtComposeArgs;
+
+template<class Arc>
+void PdtCompose(PdtComposeArgs *args) {
+ const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>());
+ const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg4->GetMutableFst<Arc>();
+
+ vector<pair<typename Arc::Label, typename Arc::Label> > parens(
+ args->arg3.size());
+
+ for (size_t i = 0; i < parens.size(); ++i) {
+ parens[i].first = args->arg3[i].first;
+ parens[i].second = args->arg3[i].second;
+ }
+
+ if (args->arg6) {
+ Compose(ifst1, parens, ifst2, ofst, args->arg5);
+ } else {
+ Compose(ifst1, ifst2, parens, ofst, args->arg5);
+ }
+}
+
+void PdtCompose(const FstClass & ifst1,
+ const FstClass & ifst2,
+ const vector<pair<int64, int64> > &parens,
+ MutableFstClass *ofst,
+ const ComposeOptions &copts,
+ bool left_pdt);
+
+// PDT EXPAND
+
+struct PdtExpandOptions {
+ bool connect;
+ bool keep_parentheses;
+ WeightClass weight_threshold;
+
+ PdtExpandOptions(bool c = true, bool k = false,
+ WeightClass w = WeightClass::Zero())
+ : connect(c), keep_parentheses(k), weight_threshold(w) {}
+};
+
+typedef args::Package<const FstClass &,
+ const vector<pair<int64, int64> >&,
+ MutableFstClass *, PdtExpandOptions> PdtExpandArgs;
+
+template<class Arc>
+void PdtExpand(PdtExpandArgs *args) {
+ const Fst<Arc> &fst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>();
+
+ vector<pair<typename Arc::Label, typename Arc::Label> > parens(
+ args->arg2.size());
+ for (size_t i = 0; i < parens.size(); ++i) {
+ parens[i].first = args->arg2[i].first;
+ parens[i].second = args->arg2[i].second;
+ }
+ Expand(fst, parens, ofst,
+ ExpandOptions<Arc>(
+ args->arg4.connect, args->arg4.keep_parentheses,
+ *(args->arg4.weight_threshold.GetWeight<typename Arc::Weight>())));
+}
+
+void PdtExpand(const FstClass &ifst,
+ const vector<pair<int64, int64> > &parens,
+ MutableFstClass *ofst, const PdtExpandOptions &opts);
+
+void PdtExpand(const FstClass &ifst,
+ const vector<pair<int64, int64> > &parens,
+ MutableFstClass *ofst, bool connect);
+
+// PDT REPLACE
+
+typedef args::Package<const vector<pair<int64, const FstClass*> > &,
+ MutableFstClass *,
+ vector<pair<int64, int64> > *,
+ const int64 &> PdtReplaceArgs;
+template<class Arc>
+void PdtReplace(PdtReplaceArgs *args) {
+ vector<pair<typename Arc::Label, const Fst<Arc> *> > tuples(
+ args->arg1.size());
+ for (size_t i = 0; i < tuples.size(); ++i) {
+ tuples[i].first = args->arg1[i].first;
+ tuples[i].second = (args->arg1[i].second)->GetFst<Arc>();
+ }
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+ vector<pair<typename Arc::Label, typename Arc::Label> > parens(
+ args->arg3->size());
+
+ for (size_t i = 0; i < parens.size(); ++i) {
+ parens[i].first = args->arg3->at(i).first;
+ parens[i].second = args->arg3->at(i).second;
+ }
+ Replace(tuples, ofst, &parens, args->arg4);
+
+ // now copy parens back
+ args->arg3->resize(parens.size());
+ for (size_t i = 0; i < parens.size(); ++i) {
+ (*args->arg3)[i].first = parens[i].first;
+ (*args->arg3)[i].second = parens[i].second;
+ }
+}
+
+void PdtReplace(const vector<pair<int64, const FstClass*> > &fst_tuples,
+ MutableFstClass *ofst,
+ vector<pair<int64, int64> > *parens,
+ const int64 &root);
+
+// PDT REVERSE
+
+typedef args::Package<const FstClass &,
+ const vector<pair<int64, int64> >&,
+ MutableFstClass *> PdtReverseArgs;
+
+template<class Arc>
+void PdtReverse(PdtReverseArgs *args) {
+ const Fst<Arc> &fst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>();
+
+ vector<pair<typename Arc::Label, typename Arc::Label> > parens(
+ args->arg2.size());
+ for (size_t i = 0; i < parens.size(); ++i) {
+ parens[i].first = args->arg2[i].first;
+ parens[i].second = args->arg2[i].second;
+ }
+ Reverse(fst, parens, ofst);
+}
+
+void PdtReverse(const FstClass &ifst,
+ const vector<pair<int64, int64> > &parens,
+ MutableFstClass *ofst);
+
+
+// PDT SHORTESTPATH
+
+struct PdtShortestPathOptions {
+ QueueType queue_type;
+ bool keep_parentheses;
+ bool path_gc;
+
+ PdtShortestPathOptions(QueueType qt = FIFO_QUEUE,
+ bool kp = false, bool gc = true)
+ : queue_type(qt), keep_parentheses(kp), path_gc(gc) {}
+};
+
+typedef args::Package<const FstClass &,
+ const vector<pair<int64, int64> >&,
+ MutableFstClass *,
+ const PdtShortestPathOptions &> PdtShortestPathArgs;
+
+template<class Arc>
+void PdtShortestPath(PdtShortestPathArgs *args) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ const Fst<Arc> &fst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>();
+ const PdtShortestPathOptions &opts = args->arg4;
+
+
+ vector<pair<Label, Label> > parens(args->arg2.size());
+ for (size_t i = 0; i < parens.size(); ++i) {
+ parens[i].first = args->arg2[i].first;
+ parens[i].second = args->arg2[i].second;
+ }
+
+ switch (opts.queue_type) {
+ default:
+ FSTERROR() << "Unknown queue type: " << opts.queue_type;
+ case FIFO_QUEUE: {
+ typedef FifoQueue<StateId> Queue;
+ fst::PdtShortestPathOptions<Arc, Queue> spopts(opts.keep_parentheses,
+ opts.path_gc);
+ ShortestPath(fst, parens, ofst, spopts);
+ return;
+ }
+ case LIFO_QUEUE: {
+ typedef LifoQueue<StateId> Queue;
+ fst::PdtShortestPathOptions<Arc, Queue> spopts(opts.keep_parentheses,
+ opts.path_gc);
+ ShortestPath(fst, parens, ofst, spopts);
+ return;
+ }
+ case STATE_ORDER_QUEUE: {
+ typedef StateOrderQueue<StateId> Queue;
+ fst::PdtShortestPathOptions<Arc, Queue> spopts(opts.keep_parentheses,
+ opts.path_gc);
+ ShortestPath(fst, parens, ofst, spopts);
+ return;
+ }
+ }
+}
+
+void PdtShortestPath(const FstClass &ifst,
+ const vector<pair<int64, int64> > &parens,
+ MutableFstClass *ofst,
+ const PdtShortestPathOptions &opts =
+ PdtShortestPathOptions());
+
+// PRINT INFO
+
+typedef args::Package<const FstClass &,
+ const vector<pair<int64, int64> > &> PrintPdtInfoArgs;
+
+template<class Arc>
+void PrintPdtInfo(PrintPdtInfoArgs *args) {
+ const Fst<Arc> &fst = *(args->arg1.GetFst<Arc>());
+ vector<pair<typename Arc::Label, typename Arc::Label> > parens(
+ args->arg2.size());
+ for (size_t i = 0; i < parens.size(); ++i) {
+ parens[i].first = args->arg2[i].first;
+ parens[i].second = args->arg2[i].second;
+ }
+ PdtInfo<Arc> pdtinfo(fst, parens);
+ PrintPdtInfo(pdtinfo);
+}
+
+void PrintPdtInfo(const FstClass &ifst,
+ const vector<pair<int64, int64> > &parens);
+
+} // namespace script
+} // namespace fst
+
+
+#define REGISTER_FST_PDT_OPERATIONS(ArcType) \
+ REGISTER_FST_OPERATION(PdtCompose, ArcType, PdtComposeArgs); \
+ REGISTER_FST_OPERATION(PdtExpand, ArcType, PdtExpandArgs); \
+ REGISTER_FST_OPERATION(PdtReplace, ArcType, PdtReplaceArgs); \
+ REGISTER_FST_OPERATION(PdtReverse, ArcType, PdtReverseArgs); \
+ REGISTER_FST_OPERATION(PdtShortestPath, ArcType, PdtShortestPathArgs); \
+ REGISTER_FST_OPERATION(PrintPdtInfo, ArcType, PrintPdtInfoArgs)
+#endif // FST_EXTENSIONS_PDT_PDTSCRIPT_H_
diff --git a/src/include/fst/extensions/pdt/replace.h b/src/include/fst/extensions/pdt/replace.h
new file mode 100644
index 0000000..a85d0fe
--- /dev/null
+++ b/src/include/fst/extensions/pdt/replace.h
@@ -0,0 +1,192 @@
+// replace.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Recursively replace Fst arcs with other Fst(s) returning a PDT.
+
+#ifndef FST_EXTENSIONS_PDT_REPLACE_H__
+#define FST_EXTENSIONS_PDT_REPLACE_H__
+
+#include <fst/replace.h>
+
+namespace fst {
+
+// Hash to paren IDs
+template <typename S>
+struct ReplaceParenHash {
+ size_t operator()(const pair<size_t, S> &p) const {
+ return p.first + p.second * kPrime;
+ }
+ private:
+ static const size_t kPrime = 7853;
+};
+
+template <typename S> const size_t ReplaceParenHash<S>::kPrime;
+
+// Builds a pushdown transducer (PDT) from an RTN specification
+// identical to that in fst/lib/replace.h. The result is a PDT
+// encoded as the FST 'ofst' where some transitions are labeled with
+// open or close parentheses. To be interpreted as a PDT, the parens
+// must balance on a path (see PdtExpand()). The open/close
+// parenthesis label pairs are returned in 'parens'.
+template <class Arc>
+void Replace(const vector<pair<typename Arc::Label,
+ const Fst<Arc>* > >& ifst_array,
+ MutableFst<Arc> *ofst,
+ vector<pair<typename Arc::Label,
+ typename Arc::Label> > *parens,
+ typename Arc::Label root) {
+ typedef typename Arc::Label Label;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ ofst->DeleteStates();
+ parens->clear();
+
+ unordered_map<Label, size_t> label2id;
+ for (size_t i = 0; i < ifst_array.size(); ++i)
+ label2id[ifst_array[i].first] = i;
+
+ Label max_label = kNoLabel;
+
+ deque<size_t> non_term_queue; // Queue of non-terminals to replace
+ unordered_set<Label> non_term_set; // Set of non-terminals to replace
+ non_term_queue.push_back(root);
+ non_term_set.insert(root);
+
+ // PDT state corr. to ith replace FST start state.
+ vector<StateId> fst_start(ifst_array.size(), kNoLabel);
+ // PDT state, weight pairs corr. to ith replace FST final state & weights.
+ vector< vector<pair<StateId, Weight> > > fst_final(ifst_array.size());
+
+ // Builds single Fst combining all referenced input Fsts. Leaves in the
+ // non-termnals for now. Tabulate the PDT states that correspond to
+ // the start and final states of the input Fsts.
+ for (StateId soff = 0; !non_term_queue.empty(); soff = ofst->NumStates()) {
+ Label label = non_term_queue.front();
+ non_term_queue.pop_front();
+ size_t fst_id = label2id[label];
+
+ const Fst<Arc> *ifst = ifst_array[fst_id].second;
+ for (StateIterator< Fst<Arc> > siter(*ifst);
+ !siter.Done(); siter.Next()) {
+ StateId is = siter.Value();
+ StateId os = ofst->AddState();
+ if (is == ifst->Start()) {
+ fst_start[fst_id] = os;
+ if (label == root)
+ ofst->SetStart(os);
+ }
+ if (ifst->Final(is) != Weight::Zero()) {
+ if (label == root)
+ ofst->SetFinal(os, ifst->Final(is));
+ fst_final[fst_id].push_back(make_pair(os, ifst->Final(is)));
+ }
+ for (ArcIterator< Fst<Arc> > aiter(*ifst, is);
+ !aiter.Done(); aiter.Next()) {
+ Arc arc = aiter.Value();
+ if (max_label == kNoLabel || arc.olabel > max_label)
+ max_label = arc.olabel;
+ typename unordered_map<Label, size_t>::const_iterator it =
+ label2id.find(arc.olabel);
+ if (it != label2id.end()) {
+ size_t nfst_id = it->second;
+ if (ifst_array[nfst_id].second->Start() == -1)
+ continue;
+ if (non_term_set.count(arc.olabel) == 0) {
+ non_term_queue.push_back(arc.olabel);
+ non_term_set.insert(arc.olabel);
+ }
+ }
+ arc.nextstate += soff;
+ ofst->AddArc(os, arc);
+ }
+ }
+ }
+
+ // Changes each non-terminal transition to an open parenthesis
+ // transition redirected to the PDT state that corresponds to the
+ // start state of the input FST for the non-terminal. Adds close parenthesis
+ // transitions from the PDT states corr. to the final states of the
+ // input FST for the non-terminal to the former destination state of the
+ // non-terminal transition.
+
+ typedef MutableArcIterator< MutableFst<Arc> > MIter;
+ typedef unordered_map<pair<size_t, StateId >, size_t,
+ ReplaceParenHash<StateId> > ParenMap;
+
+ // Parenthesis pair ID per fst, state pair.
+ ParenMap paren_map;
+ // # of parenthesis pairs per fst.
+ vector<size_t> nparens(ifst_array.size(), 0);
+ // Initial open parenthesis label
+ Label first_paren = max_label + 1;
+
+ for (StateIterator< Fst<Arc> > siter(*ofst);
+ !siter.Done(); siter.Next()) {
+ StateId os = siter.Value();
+ MIter *aiter = new MIter(ofst, os);
+ for (size_t n = 0; !aiter->Done(); aiter->Next(), ++n) {
+ Arc arc = aiter->Value();
+ typename unordered_map<Label, size_t>::const_iterator lit =
+ label2id.find(arc.olabel);
+ if (lit != label2id.end()) {
+ size_t nfst_id = lit->second;
+
+ // Get parentheses. Ensures distinct parenthesis pair per
+ // non-terminal and destination state but otherwise reuses them.
+ Label open_paren = kNoLabel, close_paren = kNoLabel;
+ pair<size_t, StateId> paren_key(nfst_id, arc.nextstate);
+ typename ParenMap::const_iterator pit = paren_map.find(paren_key);
+ if (pit != paren_map.end()) {
+ size_t paren_id = pit->second;
+ open_paren = (*parens)[paren_id].first;
+ close_paren = (*parens)[paren_id].second;
+ } else {
+ size_t paren_id = nparens[nfst_id]++;
+ open_paren = first_paren + 2 * paren_id;
+ close_paren = open_paren + 1;
+ paren_map[paren_key] = paren_id;
+ if (paren_id >= parens->size())
+ parens->push_back(make_pair(open_paren, close_paren));
+ }
+
+ // Sets open parenthesis.
+ Arc sarc(open_paren, open_paren, arc.weight, fst_start[nfst_id]);
+ aiter->SetValue(sarc);
+
+ // Adds close parentheses.
+ for (size_t i = 0; i < fst_final[nfst_id].size(); ++i) {
+ pair<StateId, Weight> &p = fst_final[nfst_id][i];
+ Arc farc(close_paren, close_paren, p.second, arc.nextstate);
+
+ ofst->AddArc(p.first, farc);
+ if (os == p.first) { // Invalidated iterator
+ delete aiter;
+ aiter = new MIter(ofst, os);
+ aiter->Seek(n);
+ }
+ }
+ }
+ }
+ delete aiter;
+ }
+}
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_PDT_REPLACE_H__
diff --git a/src/include/fst/extensions/pdt/reverse.h b/src/include/fst/extensions/pdt/reverse.h
new file mode 100644
index 0000000..b20e1c5
--- /dev/null
+++ b/src/include/fst/extensions/pdt/reverse.h
@@ -0,0 +1,58 @@
+// reverse.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Expand a PDT to an FST.
+
+#ifndef FST_EXTENSIONS_PDT_REVERSE_H__
+#define FST_EXTENSIONS_PDT_REVERSE_H__
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <vector>
+using std::vector;
+
+#include <fst/mutable-fst.h>
+#include <fst/relabel.h>
+#include <fst/reverse.h>
+
+namespace fst {
+
+// Reverses a pushdown transducer (PDT) encoded as an FST.
+template<class Arc, class RevArc>
+void Reverse(const Fst<Arc> &ifst,
+ const vector<pair<typename Arc::Label,
+ typename Arc::Label> > &parens,
+ MutableFst<RevArc> *ofst) {
+ typedef typename Arc::Label Label;
+
+ // Reverses FST
+ Reverse(ifst, ofst);
+
+ // Exchanges open and close parenthesis pairs
+ vector<pair<Label, Label> > relabel_pairs;
+ for (size_t i = 0; i < parens.size(); ++i) {
+ relabel_pairs.push_back(make_pair(parens[i].first, parens[i].second));
+ relabel_pairs.push_back(make_pair(parens[i].second, parens[i].first));
+ }
+ Relabel(ofst, relabel_pairs, relabel_pairs);
+}
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_PDT_REVERSE_H__
diff --git a/src/include/fst/extensions/pdt/shortest-path.h b/src/include/fst/extensions/pdt/shortest-path.h
new file mode 100644
index 0000000..e90471b
--- /dev/null
+++ b/src/include/fst/extensions/pdt/shortest-path.h
@@ -0,0 +1,790 @@
+// shortest-path.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Functions to find shortest paths in a PDT.
+
+#ifndef FST_EXTENSIONS_PDT_SHORTEST_PATH_H__
+#define FST_EXTENSIONS_PDT_SHORTEST_PATH_H__
+
+#include <fst/shortest-path.h>
+#include <fst/extensions/pdt/paren.h>
+#include <fst/extensions/pdt/pdt.h>
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <tr1/unordered_set>
+using std::tr1::unordered_set;
+using std::tr1::unordered_multiset;
+#include <stack>
+#include <vector>
+using std::vector;
+
+namespace fst {
+
+template <class Arc, class Queue>
+struct PdtShortestPathOptions {
+ bool keep_parentheses;
+ bool path_gc;
+
+ PdtShortestPathOptions(bool kp = false, bool gc = true)
+ : keep_parentheses(kp), path_gc(gc) {}
+};
+
+
+// Class to store PDT shortest path results. Stores shortest path
+// tree info 'Distance()', Parent(), and ArcParent() information keyed
+// on two types:
+// (1) By SearchState: This is a usual node in a shortest path tree but:
+// (a) is w.r.t a PDT search state - a pair of a PDT state and
+// a 'start' state, which is either the PDT start state or
+// the destination state of an open parenthesis.
+// (b) the Distance() is from this 'start' state to the search state.
+// (c) Parent().state is kNoLabel for the 'start' state.
+//
+// (2) By ParenSpec: This connects shortest path trees depending on the
+// the parenthesis taken. Given the parenthesis spec:
+// (a) the Distance() is from the Parent() 'start' state to the
+// parenthesis destination state.
+// (b) the ArcParent() is the parenthesis arc.
+template <class Arc>
+class PdtShortestPathData {
+ public:
+ static const uint8 kFinal;
+
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::Label Label;
+
+ struct SearchState {
+ SearchState() : state(kNoStateId), start(kNoStateId) {}
+
+ SearchState(StateId s, StateId t) : state(s), start(t) {}
+
+ bool operator==(const SearchState &s) const {
+ if (&s == this)
+ return true;
+ return s.state == this->state && s.start == this->start;
+ }
+
+ StateId state; // PDT state
+ StateId start; // PDT paren 'source' state
+ };
+
+
+ // Specifies paren id, source and dest 'start' states of a paren.
+ // These are the 'start' states of the respective sub-graphs.
+ struct ParenSpec {
+ ParenSpec()
+ : paren_id(kNoLabel), src_start(kNoStateId), dest_start(kNoStateId) {}
+
+ ParenSpec(Label id, StateId s, StateId d)
+ : paren_id(id), src_start(s), dest_start(d) {}
+
+ Label paren_id; // Id of parenthesis
+ StateId src_start; // sub-graph 'start' state for paren source.
+ StateId dest_start; // sub-graph 'start' state for paren dest.
+
+ bool operator==(const ParenSpec &x) const {
+ if (&x == this)
+ return true;
+ return x.paren_id == this->paren_id &&
+ x.src_start == this->src_start &&
+ x.dest_start == this->dest_start;
+ }
+ };
+
+ struct SearchData {
+ SearchData() : distance(Weight::Zero()),
+ parent(kNoStateId, kNoStateId),
+ paren_id(kNoLabel),
+ flags(0) {}
+
+ Weight distance; // Distance to this state from PDT 'start' state
+ SearchState parent; // Parent state in shortest path tree
+ int16 paren_id; // If parent arc has paren, paren ID, o.w. kNoLabel
+ uint8 flags; // First byte reserved for PdtShortestPathData use
+ };
+
+ PdtShortestPathData(bool gc)
+ : state_(kNoStateId, kNoStateId),
+ paren_(kNoLabel, kNoStateId, kNoStateId),
+ gc_(gc),
+ nstates_(0),
+ ngc_(0),
+ finished_(false) {}
+
+ ~PdtShortestPathData() {
+ VLOG(1) << "opm size: " << paren_map_.size();
+ VLOG(1) << "# of search states: " << nstates_;
+ if (gc_)
+ VLOG(1) << "# of GC'd search states: " << ngc_;
+ }
+
+ void Clear() {
+ search_map_.clear();
+ search_multimap_.clear();
+ paren_map_.clear();
+ state_ = SearchState(kNoStateId, kNoStateId);
+ nstates_ = 0;
+ ngc_ = 0;
+ }
+
+ Weight Distance(SearchState s) const {
+ SearchData *data = GetSearchData(s);
+ return data->distance;
+ }
+
+ Weight Distance(const ParenSpec &paren) const {
+ SearchData *data = GetSearchData(paren);
+ return data->distance;
+ }
+
+ SearchState Parent(SearchState s) const {
+ SearchData *data = GetSearchData(s);
+ return data->parent;
+ }
+
+ SearchState Parent(const ParenSpec &paren) const {
+ SearchData *data = GetSearchData(paren);
+ return data->parent;
+ }
+
+ Label ParenId(SearchState s) const {
+ SearchData *data = GetSearchData(s);
+ return data->paren_id;
+ }
+
+ uint8 Flags(SearchState s) const {
+ SearchData *data = GetSearchData(s);
+ return data->flags;
+ }
+
+ void SetDistance(SearchState s, Weight w) {
+ SearchData *data = GetSearchData(s);
+ data->distance = w;
+ }
+
+ void SetDistance(const ParenSpec &paren, Weight w) {
+ SearchData *data = GetSearchData(paren);
+ data->distance = w;
+ }
+
+ void SetParent(SearchState s, SearchState p) {
+ SearchData *data = GetSearchData(s);
+ data->parent = p;
+ }
+
+ void SetParent(const ParenSpec &paren, SearchState p) {
+ SearchData *data = GetSearchData(paren);
+ data->parent = p;
+ }
+
+ void SetParenId(SearchState s, Label p) {
+ if (p >= 32768)
+ FSTERROR() << "PdtShortestPathData: Paren ID does not fits in an int16";
+ SearchData *data = GetSearchData(s);
+ data->paren_id = p;
+ }
+
+ void SetFlags(SearchState s, uint8 f, uint8 mask) {
+ SearchData *data = GetSearchData(s);
+ data->flags &= ~mask;
+ data->flags |= f & mask;
+ }
+
+ void GC(StateId s);
+
+ void Finish() { finished_ = true; }
+
+ private:
+ static const Arc kNoArc;
+ static const size_t kPrime0;
+ static const size_t kPrime1;
+ static const uint8 kInited;
+ static const uint8 kMarked;
+
+ // Hash for search state
+ struct SearchStateHash {
+ size_t operator()(const SearchState &s) const {
+ return s.state + s.start * kPrime0;
+ }
+ };
+
+ // Hash for paren map
+ struct ParenHash {
+ size_t operator()(const ParenSpec &paren) const {
+ return paren.paren_id + paren.src_start * kPrime0 +
+ paren.dest_start * kPrime1;
+ }
+ };
+
+ typedef unordered_map<SearchState, SearchData, SearchStateHash> SearchMap;
+
+ typedef unordered_multimap<StateId, StateId> SearchMultimap;
+
+ // Hash map from paren spec to open paren data
+ typedef unordered_map<ParenSpec, SearchData, ParenHash> ParenMap;
+
+ SearchData *GetSearchData(SearchState s) const {
+ if (s == state_)
+ return state_data_;
+ if (finished_) {
+ typename SearchMap::iterator it = search_map_.find(s);
+ if (it == search_map_.end())
+ return &null_search_data_;
+ state_ = s;
+ return state_data_ = &(it->second);
+ } else {
+ state_ = s;
+ state_data_ = &search_map_[s];
+ if (!(state_data_->flags & kInited)) {
+ ++nstates_;
+ if (gc_)
+ search_multimap_.insert(make_pair(s.start, s.state));
+ state_data_->flags = kInited;
+ }
+ return state_data_;
+ }
+ }
+
+ SearchData *GetSearchData(ParenSpec paren) const {
+ if (paren == paren_)
+ return paren_data_;
+ if (finished_) {
+ typename ParenMap::iterator it = paren_map_.find(paren);
+ if (it == paren_map_.end())
+ return &null_search_data_;
+ paren_ = paren;
+ return state_data_ = &(it->second);
+ } else {
+ paren_ = paren;
+ return paren_data_ = &paren_map_[paren];
+ }
+ }
+
+ mutable SearchMap search_map_; // Maps from search state to data
+ mutable SearchMultimap search_multimap_; // Maps from 'start' to subgraph
+ mutable ParenMap paren_map_; // Maps paren spec to search data
+ mutable SearchState state_; // Last state accessed
+ mutable SearchData *state_data_; // Last state data accessed
+ mutable ParenSpec paren_; // Last paren spec accessed
+ mutable SearchData *paren_data_; // Last paren data accessed
+ bool gc_; // Allow GC?
+ mutable size_t nstates_; // Total number of search states
+ size_t ngc_; // Number of GC'd search states
+ mutable SearchData null_search_data_; // Null search data
+ bool finished_; // Read-only access when true
+
+ DISALLOW_COPY_AND_ASSIGN(PdtShortestPathData);
+};
+
+// Deletes inaccessible search data from a given 'start' (open paren dest)
+// state. Assumes 'final' (close paren source or PDT final) states have
+// been flagged 'kFinal'.
+template<class Arc>
+void PdtShortestPathData<Arc>::GC(StateId start) {
+ if (!gc_)
+ return;
+ vector<StateId> final;
+ for (typename SearchMultimap::iterator mmit = search_multimap_.find(start);
+ mmit != search_multimap_.end() && mmit->first == start;
+ ++mmit) {
+ SearchState s(mmit->second, start);
+ const SearchData &data = search_map_[s];
+ if (data.flags & kFinal)
+ final.push_back(s.state);
+ }
+
+ // Mark phase
+ for (size_t i = 0; i < final.size(); ++i) {
+ SearchState s(final[i], start);
+ while (s.state != kNoLabel) {
+ SearchData *sdata = &search_map_[s];
+ if (sdata->flags & kMarked)
+ break;
+ sdata->flags |= kMarked;
+ SearchState p = sdata->parent;
+ if (p.start != start && p.start != kNoLabel) { // entering sub-subgraph
+ ParenSpec paren(sdata->paren_id, s.start, p.start);
+ SearchData *pdata = &paren_map_[paren];
+ s = pdata->parent;
+ } else {
+ s = p;
+ }
+ }
+ }
+
+ // Sweep phase
+ typename SearchMultimap::iterator mmit = search_multimap_.find(start);
+ while (mmit != search_multimap_.end() && mmit->first == start) {
+ SearchState s(mmit->second, start);
+ typename SearchMap::iterator mit = search_map_.find(s);
+ const SearchData &data = mit->second;
+ if (!(data.flags & kMarked)) {
+ search_map_.erase(mit);
+ ++ngc_;
+ }
+ search_multimap_.erase(mmit++);
+ }
+}
+
+template<class Arc> const Arc PdtShortestPathData<Arc>::kNoArc
+ = Arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId);
+
+template<class Arc> const size_t PdtShortestPathData<Arc>::kPrime0 = 7853;
+
+template<class Arc> const size_t PdtShortestPathData<Arc>::kPrime1 = 7867;
+
+template<class Arc> const uint8 PdtShortestPathData<Arc>::kInited = 0x01;
+
+template<class Arc> const uint8 PdtShortestPathData<Arc>::kFinal = 0x02;
+
+template<class Arc> const uint8 PdtShortestPathData<Arc>::kMarked = 0x04;
+
+
+// This computes the single source shortest (balanced) path (SSSP)
+// through a weighted PDT that has a bounded stack (i.e. is expandable
+// as an FST). It is a generalization of the classic SSSP graph
+// algorithm that removes a state s from a queue (defined by a
+// user-provided queue type) and relaxes the destination states of
+// transitions leaving s. In this PDT version, states that have
+// entering open parentheses are treated as source states for a
+// sub-graph SSSP problem with the shortest path up to the open
+// parenthesis being first saved. When a close parenthesis is then
+// encountered any balancing open parenthesis is examined for this
+// saved information and multiplied back. In this way, each sub-graph
+// is entered only once rather than repeatedly. If every state in the
+// input PDT has the property that there is a unique 'start' state for
+// it with entering open parentheses, then this algorithm is quite
+// straight-forward. In general, this will not be the case, so the
+// algorithm (implicitly) creates a new graph where each state is a
+// pair of an original state and a possible parenthesis 'start' state
+// for that state.
+template<class Arc, class Queue>
+class PdtShortestPath {
+ public:
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::Label Label;
+
+ typedef PdtShortestPathData<Arc> SpData;
+ typedef typename SpData::SearchState SearchState;
+ typedef typename SpData::ParenSpec ParenSpec;
+
+ typedef typename PdtParenReachable<Arc>::SetIterator StateSetIterator;
+ typedef typename PdtBalanceData<Arc>::SetIterator CloseSourceIterator;
+
+ PdtShortestPath(const Fst<Arc> &ifst,
+ const vector<pair<Label, Label> > &parens,
+ const PdtShortestPathOptions<Arc, Queue> &opts)
+ : kFinal(SpData::kFinal),
+ ifst_(ifst.Copy()),
+ parens_(parens),
+ keep_parens_(opts.keep_parentheses),
+ start_(ifst.Start()),
+ sp_data_(opts.path_gc),
+ error_(false) {
+
+ if ((Weight::Properties() & (kPath | kRightSemiring))
+ != (kPath | kRightSemiring)) {
+ FSTERROR() << "SingleShortestPath: Weight needs to have the path"
+ << " property and be right distributive: " << Weight::Type();
+ error_ = true;
+ }
+
+ for (Label i = 0; i < parens.size(); ++i) {
+ const pair<Label, Label> &p = parens[i];
+ paren_id_map_[p.first] = i;
+ paren_id_map_[p.second] = i;
+ }
+ };
+
+ ~PdtShortestPath() {
+ VLOG(1) << "# of input states: " << CountStates(*ifst_);
+ VLOG(1) << "# of enqueued: " << nenqueued_;
+ VLOG(1) << "cpmm size: " << close_paren_multimap_.size();
+ delete ifst_;
+ }
+
+ void ShortestPath(MutableFst<Arc> *ofst) {
+ Init(ofst);
+ GetDistance(start_);
+ GetPath();
+ sp_data_.Finish();
+ if (error_) ofst->SetProperties(kError, kError);
+ }
+
+ const PdtShortestPathData<Arc> &GetShortestPathData() const {
+ return sp_data_;
+ }
+
+ PdtBalanceData<Arc> *GetBalanceData() { return &balance_data_; }
+
+ private:
+ static const Arc kNoArc;
+ static const uint8 kEnqueued;
+ static const uint8 kExpanded;
+ const uint8 kFinal;
+
+ public:
+ // Hash multimap from close paren label to an paren arc.
+ typedef unordered_multimap<ParenState<Arc>, Arc,
+ typename ParenState<Arc>::Hash> CloseParenMultimap;
+
+ const CloseParenMultimap &GetCloseParenMultimap() const {
+ return close_paren_multimap_;
+ }
+
+ private:
+ void Init(MutableFst<Arc> *ofst);
+ void GetDistance(StateId start);
+ void ProcFinal(SearchState s);
+ void ProcArcs(SearchState s);
+ void ProcOpenParen(Label paren_id, SearchState s, Arc arc, Weight w);
+ void ProcCloseParen(Label paren_id, SearchState s, const Arc &arc, Weight w);
+ void ProcNonParen(SearchState s, const Arc &arc, Weight w);
+ void Relax(SearchState s, SearchState t, Arc arc, Weight w, Label paren_id);
+ void Enqueue(SearchState d);
+ void GetPath();
+ Arc GetPathArc(SearchState s, SearchState p, Label paren_id, bool open);
+
+ Fst<Arc> *ifst_;
+ MutableFst<Arc> *ofst_;
+ const vector<pair<Label, Label> > &parens_;
+ bool keep_parens_;
+ Queue *state_queue_; // current state queue
+ StateId start_;
+ Weight f_distance_;
+ SearchState f_parent_;
+ SpData sp_data_;
+ unordered_map<Label, Label> paren_id_map_;
+ CloseParenMultimap close_paren_multimap_;
+ PdtBalanceData<Arc> balance_data_;
+ ssize_t nenqueued_;
+ bool error_;
+
+ DISALLOW_COPY_AND_ASSIGN(PdtShortestPath);
+};
+
+template<class Arc, class Queue>
+void PdtShortestPath<Arc, Queue>::Init(MutableFst<Arc> *ofst) {
+ ofst_ = ofst;
+ ofst->DeleteStates();
+ ofst->SetInputSymbols(ifst_->InputSymbols());
+ ofst->SetOutputSymbols(ifst_->OutputSymbols());
+
+ if (ifst_->Start() == kNoStateId)
+ return;
+
+ f_distance_ = Weight::Zero();
+ f_parent_ = SearchState(kNoStateId, kNoStateId);
+
+ sp_data_.Clear();
+ close_paren_multimap_.clear();
+ balance_data_.Clear();
+ nenqueued_ = 0;
+
+ // Find open parens per destination state and close parens per source state.
+ for (StateIterator<Fst<Arc> > siter(*ifst_); !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ for (ArcIterator<Fst<Arc> > aiter(*ifst_, s);
+ !aiter.Done(); aiter.Next()) {
+ const Arc &arc = aiter.Value();
+ typename unordered_map<Label, Label>::const_iterator pit
+ = paren_id_map_.find(arc.ilabel);
+ if (pit != paren_id_map_.end()) { // Is a paren?
+ Label paren_id = pit->second;
+ if (arc.ilabel == parens_[paren_id].first) { // Open paren
+ balance_data_.OpenInsert(paren_id, arc.nextstate);
+ } else { // Close paren
+ ParenState<Arc> paren_state(paren_id, s);
+ close_paren_multimap_.insert(make_pair(paren_state, arc));
+ }
+ }
+ }
+ }
+}
+
+// Computes the shortest distance stored in a recursive way. Each
+// sub-graph (i.e. different paren 'start' state) begins with weight One().
+template<class Arc, class Queue>
+void PdtShortestPath<Arc, Queue>::GetDistance(StateId start) {
+ if (start == kNoStateId)
+ return;
+
+ Queue state_queue;
+ state_queue_ = &state_queue;
+ SearchState q(start, start);
+ Enqueue(q);
+ sp_data_.SetDistance(q, Weight::One());
+
+ while (!state_queue_->Empty()) {
+ StateId state = state_queue_->Head();
+ state_queue_->Dequeue();
+ SearchState s(state, start);
+ sp_data_.SetFlags(s, 0, kEnqueued);
+ ProcFinal(s);
+ ProcArcs(s);
+ sp_data_.SetFlags(s, kExpanded, kExpanded);
+ }
+ balance_data_.FinishInsert(start);
+ sp_data_.GC(start);
+}
+
+// Updates best complete path.
+template<class Arc, class Queue>
+void PdtShortestPath<Arc, Queue>::ProcFinal(SearchState s) {
+ if (ifst_->Final(s.state) != Weight::Zero() && s.start == start_) {
+ Weight w = Times(sp_data_.Distance(s),
+ ifst_->Final(s.state));
+ if (f_distance_ != Plus(f_distance_, w)) {
+ if (f_parent_.state != kNoStateId)
+ sp_data_.SetFlags(f_parent_, 0, kFinal);
+ sp_data_.SetFlags(s, kFinal, kFinal);
+
+ f_distance_ = Plus(f_distance_, w);
+ f_parent_ = s;
+ }
+ }
+}
+
+// Processes all arcs leaving the state s.
+template<class Arc, class Queue>
+void PdtShortestPath<Arc, Queue>::ProcArcs(SearchState s) {
+ for (ArcIterator< Fst<Arc> > aiter(*ifst_, s.state);
+ !aiter.Done();
+ aiter.Next()) {
+ Arc arc = aiter.Value();
+ Weight w = Times(sp_data_.Distance(s), arc.weight);
+
+ typename unordered_map<Label, Label>::const_iterator pit
+ = paren_id_map_.find(arc.ilabel);
+ if (pit != paren_id_map_.end()) { // Is a paren?
+ Label paren_id = pit->second;
+ if (arc.ilabel == parens_[paren_id].first)
+ ProcOpenParen(paren_id, s, arc, w);
+ else
+ ProcCloseParen(paren_id, s, arc, w);
+ } else {
+ ProcNonParen(s, arc, w);
+ }
+ }
+}
+
+// Saves the shortest path info for reaching this parenthesis
+// and starts a new SSSP in the sub-graph pointed to by the parenthesis
+// if previously unvisited. Otherwise it finds any previously encountered
+// closing parentheses and relaxes them using the recursively stored
+// shortest distance to them.
+template<class Arc, class Queue> inline
+void PdtShortestPath<Arc, Queue>::ProcOpenParen(
+ Label paren_id, SearchState s, Arc arc, Weight w) {
+
+ SearchState d(arc.nextstate, arc.nextstate);
+ ParenSpec paren(paren_id, s.start, d.start);
+ Weight pdist = sp_data_.Distance(paren);
+ if (pdist != Plus(pdist, w)) {
+ sp_data_.SetDistance(paren, w);
+ sp_data_.SetParent(paren, s);
+ Weight dist = sp_data_.Distance(d);
+ if (dist == Weight::Zero()) {
+ Queue *state_queue = state_queue_;
+ GetDistance(d.start);
+ state_queue_ = state_queue;
+ }
+ for (CloseSourceIterator set_iter =
+ balance_data_.Find(paren_id, arc.nextstate);
+ !set_iter.Done(); set_iter.Next()) {
+ SearchState cpstate(set_iter.Element(), d.start);
+ ParenState<Arc> paren_state(paren_id, cpstate.state);
+ for (typename CloseParenMultimap::const_iterator cpit =
+ close_paren_multimap_.find(paren_state);
+ cpit != close_paren_multimap_.end() && paren_state == cpit->first;
+ ++cpit) {
+ const Arc &cparc = cpit->second;
+ Weight cpw = Times(w, Times(sp_data_.Distance(cpstate),
+ cparc.weight));
+ Relax(cpstate, s, cparc, cpw, paren_id);
+ }
+ }
+ }
+}
+
+// Saves the correspondence between each closing parenthesis and its
+// balancing open parenthesis info. Relaxes any close parenthesis
+// destination state that has a balancing previously encountered open
+// parenthesis.
+template<class Arc, class Queue> inline
+void PdtShortestPath<Arc, Queue>::ProcCloseParen(
+ Label paren_id, SearchState s, const Arc &arc, Weight w) {
+ ParenState<Arc> paren_state(paren_id, s.start);
+ if (!(sp_data_.Flags(s) & kExpanded)) {
+ balance_data_.CloseInsert(paren_id, s.start, s.state);
+ sp_data_.SetFlags(s, kFinal, kFinal);
+ }
+}
+
+// For non-parentheses, classical relaxation.
+template<class Arc, class Queue> inline
+void PdtShortestPath<Arc, Queue>::ProcNonParen(
+ SearchState s, const Arc &arc, Weight w) {
+ Relax(s, s, arc, w, kNoLabel);
+}
+
+// Classical relaxation on the search graph for 'arc' from state 's'.
+// State 't' is in the same sub-graph as the nextstate should be (i.e.
+// has the same paren 'start'.
+template<class Arc, class Queue> inline
+void PdtShortestPath<Arc, Queue>::Relax(
+ SearchState s, SearchState t, Arc arc, Weight w, Label paren_id) {
+ SearchState d(arc.nextstate, t.start);
+ Weight dist = sp_data_.Distance(d);
+ if (dist != Plus(dist, w)) {
+ sp_data_.SetParent(d, s);
+ sp_data_.SetParenId(d, paren_id);
+ sp_data_.SetDistance(d, Plus(dist, w));
+ Enqueue(d);
+ }
+}
+
+template<class Arc, class Queue> inline
+void PdtShortestPath<Arc, Queue>::Enqueue(SearchState s) {
+ if (!(sp_data_.Flags(s) & kEnqueued)) {
+ state_queue_->Enqueue(s.state);
+ sp_data_.SetFlags(s, kEnqueued, kEnqueued);
+ ++nenqueued_;
+ } else {
+ state_queue_->Update(s.state);
+ }
+}
+
+// Follows parent pointers to find the shortest path. Uses a stack
+// since the shortest distance is stored recursively.
+template<class Arc, class Queue>
+void PdtShortestPath<Arc, Queue>::GetPath() {
+ SearchState s = f_parent_, d = SearchState(kNoStateId, kNoStateId);
+ StateId s_p = kNoStateId, d_p = kNoStateId;
+ Arc arc(kNoArc);
+ Label paren_id = kNoLabel;
+ stack<ParenSpec> paren_stack;
+ while (s.state != kNoStateId) {
+ d_p = s_p;
+ s_p = ofst_->AddState();
+ if (d.state == kNoStateId) {
+ ofst_->SetFinal(s_p, ifst_->Final(f_parent_.state));
+ } else {
+ if (paren_id != kNoLabel) { // paren?
+ if (arc.ilabel == parens_[paren_id].first) { // open paren
+ paren_stack.pop();
+ } else { // close paren
+ ParenSpec paren(paren_id, d.start, s.start);
+ paren_stack.push(paren);
+ }
+ if (!keep_parens_)
+ arc.ilabel = arc.olabel = 0;
+ }
+ arc.nextstate = d_p;
+ ofst_->AddArc(s_p, arc);
+ }
+ d = s;
+ s = sp_data_.Parent(d);
+ paren_id = sp_data_.ParenId(d);
+ if (s.state != kNoStateId) {
+ arc = GetPathArc(s, d, paren_id, false);
+ } else if (!paren_stack.empty()) {
+ ParenSpec paren = paren_stack.top();
+ s = sp_data_.Parent(paren);
+ paren_id = paren.paren_id;
+ arc = GetPathArc(s, d, paren_id, true);
+ }
+ }
+ ofst_->SetStart(s_p);
+ ofst_->SetProperties(
+ ShortestPathProperties(ofst_->Properties(kFstProperties, false)),
+ kFstProperties);
+}
+
+
+// Finds transition with least weight between two states with label matching
+// paren_id and open/close paren type or a non-paren if kNoLabel.
+template<class Arc, class Queue>
+Arc PdtShortestPath<Arc, Queue>::GetPathArc(
+ SearchState s, SearchState d, Label paren_id, bool open_paren) {
+ Arc path_arc = kNoArc;
+ for (ArcIterator< Fst<Arc> > aiter(*ifst_, s.state);
+ !aiter.Done();
+ aiter.Next()) {
+ const Arc &arc = aiter.Value();
+ if (arc.nextstate != d.state)
+ continue;
+ Label arc_paren_id = kNoLabel;
+ typename unordered_map<Label, Label>::const_iterator pit
+ = paren_id_map_.find(arc.ilabel);
+ if (pit != paren_id_map_.end()) {
+ arc_paren_id = pit->second;
+ bool arc_open_paren = arc.ilabel == parens_[arc_paren_id].first;
+ if (arc_open_paren != open_paren)
+ continue;
+ }
+ if (arc_paren_id != paren_id)
+ continue;
+ if (arc.weight == Plus(arc.weight, path_arc.weight))
+ path_arc = arc;
+ }
+ if (path_arc.nextstate == kNoStateId) {
+ FSTERROR() << "PdtShortestPath::GetPathArc failed to find arc";
+ error_ = true;
+ }
+ return path_arc;
+}
+
+template<class Arc, class Queue>
+const Arc PdtShortestPath<Arc, Queue>::kNoArc
+ = Arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId);
+
+template<class Arc, class Queue>
+const uint8 PdtShortestPath<Arc, Queue>::kEnqueued = 0x10;
+
+template<class Arc, class Queue>
+const uint8 PdtShortestPath<Arc, Queue>::kExpanded = 0x20;
+
+template<class Arc, class Queue>
+void ShortestPath(const Fst<Arc> &ifst,
+ const vector<pair<typename Arc::Label,
+ typename Arc::Label> > &parens,
+ MutableFst<Arc> *ofst,
+ const PdtShortestPathOptions<Arc, Queue> &opts) {
+ PdtShortestPath<Arc, Queue> psp(ifst, parens, opts);
+ psp.ShortestPath(ofst);
+}
+
+template<class Arc>
+void ShortestPath(const Fst<Arc> &ifst,
+ const vector<pair<typename Arc::Label,
+ typename Arc::Label> > &parens,
+ MutableFst<Arc> *ofst) {
+ typedef FifoQueue<typename Arc::StateId> Queue;
+ PdtShortestPathOptions<Arc, Queue> opts;
+ PdtShortestPath<Arc, Queue> psp(ifst, parens, opts);
+ psp.ShortestPath(ofst);
+}
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_PDT_SHORTEST_PATH_H__
diff --git a/src/include/fst/factor-weight.h b/src/include/fst/factor-weight.h
new file mode 100644
index 0000000..ce0d58d
--- /dev/null
+++ b/src/include/fst/factor-weight.h
@@ -0,0 +1,476 @@
+// factor-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Classes to factor weights in an FST.
+
+#ifndef FST_LIB_FACTOR_WEIGHT_H__
+#define FST_LIB_FACTOR_WEIGHT_H__
+
+#include <algorithm>
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <fst/slist.h>
+#include <string>
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+const uint32 kFactorFinalWeights = 0x00000001;
+const uint32 kFactorArcWeights = 0x00000002;
+
+template <class Arc>
+struct FactorWeightOptions : CacheOptions {
+ typedef typename Arc::Label Label;
+ float delta;
+ uint32 mode; // factor arc weights and/or final weights
+ Label final_ilabel; // input label of arc created when factoring final w's
+ Label final_olabel; // output label of arc created when factoring final w's
+
+ FactorWeightOptions(const CacheOptions &opts, float d,
+ uint32 m = kFactorArcWeights | kFactorFinalWeights,
+ Label il = 0, Label ol = 0)
+ : CacheOptions(opts), delta(d), mode(m), final_ilabel(il),
+ final_olabel(ol) {}
+
+ explicit FactorWeightOptions(
+ float d, uint32 m = kFactorArcWeights | kFactorFinalWeights,
+ Label il = 0, Label ol = 0)
+ : delta(d), mode(m), final_ilabel(il), final_olabel(ol) {}
+
+ FactorWeightOptions(uint32 m = kFactorArcWeights | kFactorFinalWeights,
+ Label il = 0, Label ol = 0)
+ : delta(kDelta), mode(m), final_ilabel(il), final_olabel(ol) {}
+};
+
+
+// A factor iterator takes as argument a weight w and returns a
+// sequence of pairs of weights (xi,yi) such that the sum of the
+// products xi times yi is equal to w. If w is fully factored,
+// the iterator should return nothing.
+//
+// template <class W>
+// class FactorIterator {
+// public:
+// FactorIterator(W w);
+// bool Done() const;
+// void Next();
+// pair<W, W> Value() const;
+// void Reset();
+// }
+
+
+// Factor trivially.
+template <class W>
+class IdentityFactor {
+ public:
+ IdentityFactor(const W &w) {}
+ bool Done() const { return true; }
+ void Next() {}
+ pair<W, W> Value() const { return make_pair(W::One(), W::One()); } // unused
+ void Reset() {}
+};
+
+
+// Factor a StringWeight w as 'ab' where 'a' is a label.
+template <typename L, StringType S = STRING_LEFT>
+class StringFactor {
+ public:
+ StringFactor(const StringWeight<L, S> &w)
+ : weight_(w), done_(w.Size() <= 1) {}
+
+ bool Done() const { return done_; }
+
+ void Next() { done_ = true; }
+
+ pair< StringWeight<L, S>, StringWeight<L, S> > Value() const {
+ StringWeightIterator<L, S> iter(weight_);
+ StringWeight<L, S> w1(iter.Value());
+ StringWeight<L, S> w2;
+ for (iter.Next(); !iter.Done(); iter.Next())
+ w2.PushBack(iter.Value());
+ return make_pair(w1, w2);
+ }
+
+ void Reset() { done_ = weight_.Size() <= 1; }
+
+ private:
+ StringWeight<L, S> weight_;
+ bool done_;
+};
+
+
+// Factor a GallicWeight using StringFactor.
+template <class L, class W, StringType S = STRING_LEFT>
+class GallicFactor {
+ public:
+ GallicFactor(const GallicWeight<L, W, S> &w)
+ : weight_(w), done_(w.Value1().Size() <= 1) {}
+
+ bool Done() const { return done_; }
+
+ void Next() { done_ = true; }
+
+ pair< GallicWeight<L, W, S>, GallicWeight<L, W, S> > Value() const {
+ StringFactor<L, S> iter(weight_.Value1());
+ GallicWeight<L, W, S> w1(iter.Value().first, weight_.Value2());
+ GallicWeight<L, W, S> w2(iter.Value().second, W::One());
+ return make_pair(w1, w2);
+ }
+
+ void Reset() { done_ = weight_.Value1().Size() <= 1; }
+
+ private:
+ GallicWeight<L, W, S> weight_;
+ bool done_;
+};
+
+
+// Implementation class for FactorWeight
+template <class A, class F>
+class FactorWeightFstImpl
+ : public CacheImpl<A> {
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+
+ using CacheBaseImpl< CacheState<A> >::PushArc;
+ using CacheBaseImpl< CacheState<A> >::HasStart;
+ using CacheBaseImpl< CacheState<A> >::HasFinal;
+ using CacheBaseImpl< CacheState<A> >::HasArcs;
+ using CacheBaseImpl< CacheState<A> >::SetArcs;
+ using CacheBaseImpl< CacheState<A> >::SetFinal;
+ using CacheBaseImpl< CacheState<A> >::SetStart;
+
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef F FactorIterator;
+
+ struct Element {
+ Element() {}
+
+ Element(StateId s, Weight w) : state(s), weight(w) {}
+
+ StateId state; // Input state Id
+ Weight weight; // Residual weight
+ };
+
+ FactorWeightFstImpl(const Fst<A> &fst, const FactorWeightOptions<A> &opts)
+ : CacheImpl<A>(opts),
+ fst_(fst.Copy()),
+ delta_(opts.delta),
+ mode_(opts.mode),
+ final_ilabel_(opts.final_ilabel),
+ final_olabel_(opts.final_olabel) {
+ SetType("factor_weight");
+ uint64 props = fst.Properties(kFstProperties, false);
+ SetProperties(FactorWeightProperties(props), kCopyProperties);
+
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+
+ if (mode_ == 0)
+ LOG(WARNING) << "FactorWeightFst: factor mode is set to 0: "
+ << "factoring neither arc weights nor final weights.";
+ }
+
+ FactorWeightFstImpl(const FactorWeightFstImpl<A, F> &impl)
+ : CacheImpl<A>(impl),
+ fst_(impl.fst_->Copy(true)),
+ delta_(impl.delta_),
+ mode_(impl.mode_),
+ final_ilabel_(impl.final_ilabel_),
+ final_olabel_(impl.final_olabel_) {
+ SetType("factor_weight");
+ SetProperties(impl.Properties(), kCopyProperties);
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ ~FactorWeightFstImpl() {
+ delete fst_;
+ }
+
+ StateId Start() {
+ if (!HasStart()) {
+ StateId s = fst_->Start();
+ if (s == kNoStateId)
+ return kNoStateId;
+ StateId start = FindState(Element(fst_->Start(), Weight::One()));
+ SetStart(start);
+ }
+ return CacheImpl<A>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ const Element &e = elements_[s];
+ // TODO: fix so cast is unnecessary
+ Weight w = e.state == kNoStateId
+ ? e.weight
+ : (Weight) Times(e.weight, fst_->Final(e.state));
+ FactorIterator f(w);
+ if (!(mode_ & kFactorFinalWeights) || f.Done())
+ SetFinal(s, w);
+ else
+ SetFinal(s, Weight::Zero());
+ }
+ return CacheImpl<A>::Final(s);
+ }
+
+ size_t NumArcs(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumArcs(s);
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumOutputEpsilons(s);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) && fst_->Properties(kError, false))
+ SetProperties(kError, kError);
+ return FstImpl<Arc>::Properties(mask);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
+ if (!HasArcs(s))
+ Expand(s);
+ CacheImpl<A>::InitArcIterator(s, data);
+ }
+
+
+ // Find state corresponding to an element. Create new state
+ // if element not found.
+ StateId FindState(const Element &e) {
+ if (!(mode_ & kFactorArcWeights) && e.weight == Weight::One()) {
+ while (unfactored_.size() <= e.state)
+ unfactored_.push_back(kNoStateId);
+ if (unfactored_[e.state] == kNoStateId) {
+ unfactored_[e.state] = elements_.size();
+ elements_.push_back(e);
+ }
+ return unfactored_[e.state];
+ } else {
+ typename ElementMap::iterator eit = element_map_.find(e);
+ if (eit != element_map_.end()) {
+ return (*eit).second;
+ } else {
+ StateId s = elements_.size();
+ elements_.push_back(e);
+ element_map_.insert(pair<const Element, StateId>(e, s));
+ return s;
+ }
+ }
+ }
+
+ // Computes the outgoing transitions from a state, creating new destination
+ // states as needed.
+ void Expand(StateId s) {
+ Element e = elements_[s];
+ if (e.state != kNoStateId) {
+ for (ArcIterator< Fst<A> > ait(*fst_, e.state);
+ !ait.Done();
+ ait.Next()) {
+ const A &arc = ait.Value();
+ Weight w = Times(e.weight, arc.weight);
+ FactorIterator fit(w);
+ if (!(mode_ & kFactorArcWeights) || fit.Done()) {
+ StateId d = FindState(Element(arc.nextstate, Weight::One()));
+ PushArc(s, Arc(arc.ilabel, arc.olabel, w, d));
+ } else {
+ for (; !fit.Done(); fit.Next()) {
+ const pair<Weight, Weight> &p = fit.Value();
+ StateId d = FindState(Element(arc.nextstate,
+ p.second.Quantize(delta_)));
+ PushArc(s, Arc(arc.ilabel, arc.olabel, p.first, d));
+ }
+ }
+ }
+ }
+
+ if ((mode_ & kFactorFinalWeights) &&
+ ((e.state == kNoStateId) ||
+ (fst_->Final(e.state) != Weight::Zero()))) {
+ Weight w = e.state == kNoStateId
+ ? e.weight
+ : Times(e.weight, fst_->Final(e.state));
+ for (FactorIterator fit(w);
+ !fit.Done();
+ fit.Next()) {
+ const pair<Weight, Weight> &p = fit.Value();
+ StateId d = FindState(Element(kNoStateId,
+ p.second.Quantize(delta_)));
+ PushArc(s, Arc(final_ilabel_, final_olabel_, p.first, d));
+ }
+ }
+ SetArcs(s);
+ }
+
+ private:
+ static const size_t kPrime = 7853;
+
+ // Equality function for Elements, assume weights have been quantized.
+ class ElementEqual {
+ public:
+ bool operator()(const Element &x, const Element &y) const {
+ return x.state == y.state && x.weight == y.weight;
+ }
+ };
+
+ // Hash function for Elements to Fst states.
+ class ElementKey {
+ public:
+ size_t operator()(const Element &x) const {
+ return static_cast<size_t>(x.state * kPrime + x.weight.Hash());
+ }
+ private:
+ };
+
+ typedef unordered_map<Element, StateId, ElementKey, ElementEqual> ElementMap;
+
+ const Fst<A> *fst_;
+ float delta_;
+ uint32 mode_; // factoring arc and/or final weights
+ Label final_ilabel_; // ilabel of arc created when factoring final w's
+ Label final_olabel_; // olabel of arc created when factoring final w's
+ vector<Element> elements_; // mapping Fst state to Elements
+ ElementMap element_map_; // mapping Elements to Fst state
+ // mapping between old/new 'StateId' for states that do not need to
+ // be factored when 'mode_' is '0' or 'kFactorFinalWeights'
+ vector<StateId> unfactored_;
+
+ void operator=(const FactorWeightFstImpl<A, F> &); // disallow
+};
+
+template <class A, class F> const size_t FactorWeightFstImpl<A, F>::kPrime;
+
+
+// FactorWeightFst takes as template parameter a FactorIterator as
+// defined above. The result of weight factoring is a transducer
+// equivalent to the input whose path weights have been factored
+// according to the FactorIterator. States and transitions will be
+// added as necessary. The algorithm is a generalization to arbitrary
+// weights of the second step of the input epsilon-normalization
+// algorithm due to Mohri, "Generic epsilon-removal and input
+// epsilon-normalization algorithms for weighted transducers",
+// International Journal of Computer Science 13(1): 129-143 (2002).
+//
+// This class attaches interface to implementation and handles
+// reference counting, delegating most methods to ImplToFst.
+template <class A, class F>
+class FactorWeightFst : public ImplToFst< FactorWeightFstImpl<A, F> > {
+ public:
+ friend class ArcIterator< FactorWeightFst<A, F> >;
+ friend class StateIterator< FactorWeightFst<A, F> >;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+ typedef FactorWeightFstImpl<A, F> Impl;
+
+ FactorWeightFst(const Fst<A> &fst)
+ : ImplToFst<Impl>(new Impl(fst, FactorWeightOptions<A>())) {}
+
+ FactorWeightFst(const Fst<A> &fst, const FactorWeightOptions<A> &opts)
+ : ImplToFst<Impl>(new Impl(fst, opts)) {}
+
+ // See Fst<>::Copy() for doc.
+ FactorWeightFst(const FactorWeightFst<A, F> &fst, bool copy)
+ : ImplToFst<Impl>(fst, copy) {}
+
+ // Get a copy of this FactorWeightFst. See Fst<>::Copy() for further doc.
+ virtual FactorWeightFst<A, F> *Copy(bool copy = false) const {
+ return new FactorWeightFst<A, F>(*this, copy);
+ }
+
+ virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const FactorWeightFst<A, F> &fst); // Disallow
+};
+
+
+// Specialization for FactorWeightFst.
+template<class A, class F>
+class StateIterator< FactorWeightFst<A, F> >
+ : public CacheStateIterator< FactorWeightFst<A, F> > {
+ public:
+ explicit StateIterator(const FactorWeightFst<A, F> &fst)
+ : CacheStateIterator< FactorWeightFst<A, F> >(fst, fst.GetImpl()) {}
+};
+
+
+// Specialization for FactorWeightFst.
+template <class A, class F>
+class ArcIterator< FactorWeightFst<A, F> >
+ : public CacheArcIterator< FactorWeightFst<A, F> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const FactorWeightFst<A, F> &fst, StateId s)
+ : CacheArcIterator< FactorWeightFst<A, F> >(fst.GetImpl(), s) {
+ if (!fst.GetImpl()->HasArcs(s))
+ fst.GetImpl()->Expand(s);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+template <class A, class F> inline
+void FactorWeightFst<A, F>::InitStateIterator(StateIteratorData<A> *data) const
+{
+ data->base = new StateIterator< FactorWeightFst<A, F> >(*this);
+}
+
+
+} // namespace fst
+
+#endif // FST_LIB_FACTOR_WEIGHT_H__
diff --git a/src/include/fst/flags.h b/src/include/fst/flags.h
new file mode 100644
index 0000000..ec3d301
--- /dev/null
+++ b/src/include/fst/flags.h
@@ -0,0 +1,224 @@
+// flags.h
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Google-style flag handling declarations and inline definitions.
+
+#ifndef FST_LIB_FLAGS_H__
+#define FST_LIB_FLAGS_H__
+
+#include <iostream>
+#include <map>
+#include <string>
+
+#include <fst/types.h>
+#include <fst/lock.h>
+
+using std::string;
+
+//
+// FLAGS USAGE:
+//
+// Definition example:
+//
+// DEFINE_int32(length, 0, "length");
+//
+// This defines variable FLAGS_length, initialized to 0.
+//
+// Declaration example:
+//
+// DECLARE_int32(length);
+//
+// SetFlags() can be used to set flags from the command line
+// using, for example, '--length=2'.
+//
+// ShowUsage() can be used to print out command and flag usage.
+//
+
+#define DECLARE_bool(name) extern bool FLAGS_ ## name
+#define DECLARE_string(name) extern string FLAGS_ ## name
+#define DECLARE_int32(name) extern int32 FLAGS_ ## name
+#define DECLARE_int64(name) extern int64 FLAGS_ ## name
+#define DECLARE_double(name) extern double FLAGS_ ## name
+
+template <typename T>
+struct FlagDescription {
+ FlagDescription(T *addr, const char *doc, const char *type, const T val)
+ : address(addr), doc_string(doc), type_name(type), default_value(val) {}
+
+ T *address;
+ const char *doc_string;
+ const char *type_name;
+ const T default_value;
+};
+
+template <typename T>
+class FlagRegister {
+ public:
+ static FlagRegister<T> *GetRegister() {
+ fst::FstOnceInit(&register_init_, &FlagRegister<T>::Init);
+ return register_;
+ }
+
+ const FlagDescription<T> &GetFlagDescription(const string &name) const {
+ fst::MutexLock l(register_lock_);
+ typename std::map< string, FlagDescription<T> >::const_iterator it =
+ flag_table_.find(name);
+ return it != flag_table_.end() ? it->second : 0;
+ }
+ void SetDescription(const string &name,
+ const FlagDescription<T> &desc) {
+ fst::MutexLock l(register_lock_);
+ flag_table_.insert(make_pair(name, desc));
+ }
+
+ bool SetFlag(const string &val, bool *address) const {
+ if (val == "true" || val == "1" || val.empty()) {
+ *address = true;
+ return true;
+ } else if (val == "false" || val == "0") {
+ *address = false;
+ return true;
+ }
+ else {
+ return false;
+ }
+ }
+ bool SetFlag(const string &val, string *address) const {
+ *address = val;
+ return true;
+ }
+ bool SetFlag(const string &val, int32 *address) const {
+ char *p = 0;
+ *address = strtol(val.c_str(), &p, 0);
+ return !val.empty() && *p == '\0';
+ }
+ bool SetFlag(const string &val, int64 *address) const {
+ char *p = 0;
+ *address = strtoll(val.c_str(), &p, 0);
+ return !val.empty() && *p == '\0';
+ }
+ bool SetFlag(const string &val, double *address) const {
+ char *p = 0;
+ *address = strtod(val.c_str(), &p);
+ return !val.empty() && *p == '\0';
+ }
+
+ bool SetFlag(const string &arg, const string &val) const {
+ for (typename std::map< string,
+ FlagDescription<T> >::const_iterator it =
+ flag_table_.begin();
+ it != flag_table_.end();
+ ++it) {
+ const string &name = it->first;
+ const FlagDescription<T> &desc = it->second;
+ if (arg == name)
+ return SetFlag(val, desc.address);
+ }
+ return false;
+ }
+
+ void ShowDefault(bool default_value) const {
+ std::cout << ", default = ";
+ std::cout << (default_value ? "true" : "false");
+ }
+ void ShowDefault(const string &default_value) const {
+ std::cout << ", default = ";
+ std::cout << "\"" << default_value << "\"";
+ }
+ template<typename V> void ShowDefault(const V& default_value) const {
+ std::cout << ", default = ";
+ std::cout << default_value;
+ }
+ void ShowUsage() const {
+ for (typename std::map< string,
+ FlagDescription<T> >::const_iterator it =
+ flag_table_.begin();
+ it != flag_table_.end();
+ ++it) {
+ const string &name = it->first;
+ const FlagDescription<T> &desc = it->second;
+ std::cout << " --" << name
+ << ": type = " << desc.type_name;
+ ShowDefault(desc.default_value);
+ std::cout << "\n " << desc.doc_string << "\n";
+ }
+ }
+
+ private:
+ static void Init() {
+ register_lock_ = new fst::Mutex;
+ register_ = new FlagRegister<T>;
+ }
+ static fst::FstOnceType register_init_; // ensures only called once
+ static fst::Mutex* register_lock_; // multithreading lock
+ static FlagRegister<T> *register_;
+
+ std::map< string, FlagDescription<T> > flag_table_;
+};
+
+template <class T>
+fst::FstOnceType FlagRegister<T>::register_init_ = fst::FST_ONCE_INIT;
+
+template <class T>
+fst::Mutex *FlagRegister<T>::register_lock_ = 0;
+
+template <class T>
+FlagRegister<T> *FlagRegister<T>::register_ = 0;
+
+
+template <typename T>
+class FlagRegisterer {
+ public:
+ FlagRegisterer(const string &name, const FlagDescription<T> &desc) {
+ FlagRegister<T> *registr = FlagRegister<T>::GetRegister();
+ registr->SetDescription(name, desc);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(FlagRegisterer);
+};
+
+
+#define DEFINE_VAR(type, name, value, doc) \
+ type FLAGS_ ## name = value; \
+ static FlagRegisterer<type> \
+ name ## _flags_registerer(#name, FlagDescription<type>(&FLAGS_ ## name, \
+ doc, \
+ #type, \
+ value))
+
+#define DEFINE_bool(name, value, doc) DEFINE_VAR(bool, name, value, doc)
+#define DEFINE_string(name, value, doc) \
+ DEFINE_VAR(string, name, value, doc)
+#define DEFINE_int32(name, value, doc) DEFINE_VAR(int32, name, value, doc)
+#define DEFINE_int64(name, value, doc) DEFINE_VAR(int64, name, value, doc)
+#define DEFINE_double(name, value, doc) DEFINE_VAR(double, name, value, doc)
+
+
+// Temporary directory
+DECLARE_string(tmpdir);
+
+void SetFlags(const char *usage, int *argc, char ***argv, bool remove_flags);
+
+// Deprecated - for backward compatibility
+inline void InitFst(const char *usage, int *argc, char ***argv, bool rmflags) {
+ return SetFlags(usage, argc, argv, rmflags);
+}
+
+void ShowUsage();
+
+#endif // FST_LIB_FLAGS_H__
diff --git a/src/include/fst/float-weight.h b/src/include/fst/float-weight.h
new file mode 100644
index 0000000..530cbdd
--- /dev/null
+++ b/src/include/fst/float-weight.h
@@ -0,0 +1,598 @@
+// float-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Float weight set and associated semiring operation definitions.
+//
+
+#ifndef FST_LIB_FLOAT_WEIGHT_H__
+#define FST_LIB_FLOAT_WEIGHT_H__
+
+#include <limits>
+#include <climits>
+#include <sstream>
+#include <string>
+
+#include <fst/util.h>
+#include <fst/weight.h>
+
+
+namespace fst {
+
+// numeric limits class
+template <class T>
+class FloatLimits {
+ public:
+ static const T kPosInfinity;
+ static const T kNegInfinity;
+ static const T kNumberBad;
+};
+
+template <class T>
+const T FloatLimits<T>::kPosInfinity = numeric_limits<T>::infinity();
+
+template <class T>
+const T FloatLimits<T>::kNegInfinity = -FloatLimits<T>::kPosInfinity;
+
+template <class T>
+const T FloatLimits<T>::kNumberBad = numeric_limits<T>::quiet_NaN();
+
+// weight class to be templated on floating-points types
+template <class T = float>
+class FloatWeightTpl {
+ public:
+ FloatWeightTpl() {}
+
+ FloatWeightTpl(T f) : value_(f) {}
+
+ FloatWeightTpl(const FloatWeightTpl<T> &w) : value_(w.value_) {}
+
+ FloatWeightTpl<T> &operator=(const FloatWeightTpl<T> &w) {
+ value_ = w.value_;
+ return *this;
+ }
+
+ istream &Read(istream &strm) {
+ return ReadType(strm, &value_);
+ }
+
+ ostream &Write(ostream &strm) const {
+ return WriteType(strm, value_);
+ }
+
+ size_t Hash() const {
+ union {
+ T f;
+ size_t s;
+ } u;
+ u.s = 0;
+ u.f = value_;
+ return u.s;
+ }
+
+ const T &Value() const { return value_; }
+
+ protected:
+ void SetValue(const T &f) { value_ = f; }
+
+ inline static string GetPrecisionString() {
+ int64 size = sizeof(T);
+ if (size == sizeof(float)) return "";
+ size *= CHAR_BIT;
+
+ string result;
+ Int64ToStr(size, &result);
+ return result;
+ }
+
+ private:
+ T value_;
+};
+
+// Single-precision float weight
+typedef FloatWeightTpl<float> FloatWeight;
+
+template <class T>
+inline bool operator==(const FloatWeightTpl<T> &w1,
+ const FloatWeightTpl<T> &w2) {
+ // Volatile qualifier thwarts over-aggressive compiler optimizations
+ // that lead to problems esp. with NaturalLess().
+ volatile T v1 = w1.Value();
+ volatile T v2 = w2.Value();
+ return v1 == v2;
+}
+
+inline bool operator==(const FloatWeightTpl<double> &w1,
+ const FloatWeightTpl<double> &w2) {
+ return operator==<double>(w1, w2);
+}
+
+inline bool operator==(const FloatWeightTpl<float> &w1,
+ const FloatWeightTpl<float> &w2) {
+ return operator==<float>(w1, w2);
+}
+
+template <class T>
+inline bool operator!=(const FloatWeightTpl<T> &w1,
+ const FloatWeightTpl<T> &w2) {
+ return !(w1 == w2);
+}
+
+inline bool operator!=(const FloatWeightTpl<double> &w1,
+ const FloatWeightTpl<double> &w2) {
+ return operator!=<double>(w1, w2);
+}
+
+inline bool operator!=(const FloatWeightTpl<float> &w1,
+ const FloatWeightTpl<float> &w2) {
+ return operator!=<float>(w1, w2);
+}
+
+template <class T>
+inline bool ApproxEqual(const FloatWeightTpl<T> &w1,
+ const FloatWeightTpl<T> &w2,
+ float delta = kDelta) {
+ return w1.Value() <= w2.Value() + delta && w2.Value() <= w1.Value() + delta;
+}
+
+template <class T>
+inline ostream &operator<<(ostream &strm, const FloatWeightTpl<T> &w) {
+ if (w.Value() == FloatLimits<T>::kPosInfinity)
+ return strm << "Infinity";
+ else if (w.Value() == FloatLimits<T>::kNegInfinity)
+ return strm << "-Infinity";
+ else if (w.Value() != w.Value()) // Fails for NaN
+ return strm << "BadNumber";
+ else
+ return strm << w.Value();
+}
+
+template <class T>
+inline istream &operator>>(istream &strm, FloatWeightTpl<T> &w) {
+ string s;
+ strm >> s;
+ if (s == "Infinity") {
+ w = FloatWeightTpl<T>(FloatLimits<T>::kPosInfinity);
+ } else if (s == "-Infinity") {
+ w = FloatWeightTpl<T>(FloatLimits<T>::kNegInfinity);
+ } else {
+ char *p;
+ T f = strtod(s.c_str(), &p);
+ if (p < s.c_str() + s.size())
+ strm.clear(std::ios::badbit);
+ else
+ w = FloatWeightTpl<T>(f);
+ }
+ return strm;
+}
+
+
+// Tropical semiring: (min, +, inf, 0)
+template <class T>
+class TropicalWeightTpl : public FloatWeightTpl<T> {
+ public:
+ using FloatWeightTpl<T>::Value;
+
+ typedef TropicalWeightTpl<T> ReverseWeight;
+
+ TropicalWeightTpl() : FloatWeightTpl<T>() {}
+
+ TropicalWeightTpl(T f) : FloatWeightTpl<T>(f) {}
+
+ TropicalWeightTpl(const TropicalWeightTpl<T> &w) : FloatWeightTpl<T>(w) {}
+
+ static const TropicalWeightTpl<T> Zero() {
+ return TropicalWeightTpl<T>(FloatLimits<T>::kPosInfinity); }
+
+ static const TropicalWeightTpl<T> One() {
+ return TropicalWeightTpl<T>(0.0F); }
+
+ static const TropicalWeightTpl<T> NoWeight() {
+ return TropicalWeightTpl<T>(FloatLimits<T>::kNumberBad); }
+
+ static const string &Type() {
+ static const string type = "tropical" +
+ FloatWeightTpl<T>::GetPrecisionString();
+ return type;
+ }
+
+ bool Member() const {
+ // First part fails for IEEE NaN
+ return Value() == Value() && Value() != FloatLimits<T>::kNegInfinity;
+ }
+
+ TropicalWeightTpl<T> Quantize(float delta = kDelta) const {
+ if (Value() == FloatLimits<T>::kNegInfinity ||
+ Value() == FloatLimits<T>::kPosInfinity ||
+ Value() != Value())
+ return *this;
+ else
+ return TropicalWeightTpl<T>(floor(Value()/delta + 0.5F) * delta);
+ }
+
+ TropicalWeightTpl<T> Reverse() const { return *this; }
+
+ static uint64 Properties() {
+ return kLeftSemiring | kRightSemiring | kCommutative |
+ kPath | kIdempotent;
+ }
+};
+
+// Single precision tropical weight
+typedef TropicalWeightTpl<float> TropicalWeight;
+
+template <class T>
+inline TropicalWeightTpl<T> Plus(const TropicalWeightTpl<T> &w1,
+ const TropicalWeightTpl<T> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return TropicalWeightTpl<T>::NoWeight();
+ return w1.Value() < w2.Value() ? w1 : w2;
+}
+
+inline TropicalWeightTpl<float> Plus(const TropicalWeightTpl<float> &w1,
+ const TropicalWeightTpl<float> &w2) {
+ return Plus<float>(w1, w2);
+}
+
+inline TropicalWeightTpl<double> Plus(const TropicalWeightTpl<double> &w1,
+ const TropicalWeightTpl<double> &w2) {
+ return Plus<double>(w1, w2);
+}
+
+template <class T>
+inline TropicalWeightTpl<T> Times(const TropicalWeightTpl<T> &w1,
+ const TropicalWeightTpl<T> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return TropicalWeightTpl<T>::NoWeight();
+ T f1 = w1.Value(), f2 = w2.Value();
+ if (f1 == FloatLimits<T>::kPosInfinity)
+ return w1;
+ else if (f2 == FloatLimits<T>::kPosInfinity)
+ return w2;
+ else
+ return TropicalWeightTpl<T>(f1 + f2);
+}
+
+inline TropicalWeightTpl<float> Times(const TropicalWeightTpl<float> &w1,
+ const TropicalWeightTpl<float> &w2) {
+ return Times<float>(w1, w2);
+}
+
+inline TropicalWeightTpl<double> Times(const TropicalWeightTpl<double> &w1,
+ const TropicalWeightTpl<double> &w2) {
+ return Times<double>(w1, w2);
+}
+
+template <class T>
+inline TropicalWeightTpl<T> Divide(const TropicalWeightTpl<T> &w1,
+ const TropicalWeightTpl<T> &w2,
+ DivideType typ = DIVIDE_ANY) {
+ if (!w1.Member() || !w2.Member())
+ return TropicalWeightTpl<T>::NoWeight();
+ T f1 = w1.Value(), f2 = w2.Value();
+ if (f2 == FloatLimits<T>::kPosInfinity)
+ return FloatLimits<T>::kNumberBad;
+ else if (f1 == FloatLimits<T>::kPosInfinity)
+ return FloatLimits<T>::kPosInfinity;
+ else
+ return TropicalWeightTpl<T>(f1 - f2);
+}
+
+inline TropicalWeightTpl<float> Divide(const TropicalWeightTpl<float> &w1,
+ const TropicalWeightTpl<float> &w2,
+ DivideType typ = DIVIDE_ANY) {
+ return Divide<float>(w1, w2, typ);
+}
+
+inline TropicalWeightTpl<double> Divide(const TropicalWeightTpl<double> &w1,
+ const TropicalWeightTpl<double> &w2,
+ DivideType typ = DIVIDE_ANY) {
+ return Divide<double>(w1, w2, typ);
+}
+
+
+// Log semiring: (log(e^-x + e^y), +, inf, 0)
+template <class T>
+class LogWeightTpl : public FloatWeightTpl<T> {
+ public:
+ using FloatWeightTpl<T>::Value;
+
+ typedef LogWeightTpl ReverseWeight;
+
+ LogWeightTpl() : FloatWeightTpl<T>() {}
+
+ LogWeightTpl(T f) : FloatWeightTpl<T>(f) {}
+
+ LogWeightTpl(const LogWeightTpl<T> &w) : FloatWeightTpl<T>(w) {}
+
+ static const LogWeightTpl<T> Zero() {
+ return LogWeightTpl<T>(FloatLimits<T>::kPosInfinity);
+ }
+
+ static const LogWeightTpl<T> One() {
+ return LogWeightTpl<T>(0.0F);
+ }
+
+ static const LogWeightTpl<T> NoWeight() {
+ return LogWeightTpl<T>(FloatLimits<T>::kNumberBad); }
+
+ static const string &Type() {
+ static const string type = "log" + FloatWeightTpl<T>::GetPrecisionString();
+ return type;
+ }
+
+ bool Member() const {
+ // First part fails for IEEE NaN
+ return Value() == Value() && Value() != FloatLimits<T>::kNegInfinity;
+ }
+
+ LogWeightTpl<T> Quantize(float delta = kDelta) const {
+ if (Value() == FloatLimits<T>::kNegInfinity ||
+ Value() == FloatLimits<T>::kPosInfinity ||
+ Value() != Value())
+ return *this;
+ else
+ return LogWeightTpl<T>(floor(Value()/delta + 0.5F) * delta);
+ }
+
+ LogWeightTpl<T> Reverse() const { return *this; }
+
+ static uint64 Properties() {
+ return kLeftSemiring | kRightSemiring | kCommutative;
+ }
+};
+
+// Single-precision log weight
+typedef LogWeightTpl<float> LogWeight;
+// Double-precision log weight
+typedef LogWeightTpl<double> Log64Weight;
+
+template <class T>
+inline T LogExp(T x) { return log(1.0F + exp(-x)); }
+
+template <class T>
+inline LogWeightTpl<T> Plus(const LogWeightTpl<T> &w1,
+ const LogWeightTpl<T> &w2) {
+ T f1 = w1.Value(), f2 = w2.Value();
+ if (f1 == FloatLimits<T>::kPosInfinity)
+ return w2;
+ else if (f2 == FloatLimits<T>::kPosInfinity)
+ return w1;
+ else if (f1 > f2)
+ return LogWeightTpl<T>(f2 - LogExp(f1 - f2));
+ else
+ return LogWeightTpl<T>(f1 - LogExp(f2 - f1));
+}
+
+inline LogWeightTpl<float> Plus(const LogWeightTpl<float> &w1,
+ const LogWeightTpl<float> &w2) {
+ return Plus<float>(w1, w2);
+}
+
+inline LogWeightTpl<double> Plus(const LogWeightTpl<double> &w1,
+ const LogWeightTpl<double> &w2) {
+ return Plus<double>(w1, w2);
+}
+
+template <class T>
+inline LogWeightTpl<T> Times(const LogWeightTpl<T> &w1,
+ const LogWeightTpl<T> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return LogWeightTpl<T>::NoWeight();
+ T f1 = w1.Value(), f2 = w2.Value();
+ if (f1 == FloatLimits<T>::kPosInfinity)
+ return w1;
+ else if (f2 == FloatLimits<T>::kPosInfinity)
+ return w2;
+ else
+ return LogWeightTpl<T>(f1 + f2);
+}
+
+inline LogWeightTpl<float> Times(const LogWeightTpl<float> &w1,
+ const LogWeightTpl<float> &w2) {
+ return Times<float>(w1, w2);
+}
+
+inline LogWeightTpl<double> Times(const LogWeightTpl<double> &w1,
+ const LogWeightTpl<double> &w2) {
+ return Times<double>(w1, w2);
+}
+
+template <class T>
+inline LogWeightTpl<T> Divide(const LogWeightTpl<T> &w1,
+ const LogWeightTpl<T> &w2,
+ DivideType typ = DIVIDE_ANY) {
+ if (!w1.Member() || !w2.Member())
+ return LogWeightTpl<T>::NoWeight();
+ T f1 = w1.Value(), f2 = w2.Value();
+ if (f2 == FloatLimits<T>::kPosInfinity)
+ return FloatLimits<T>::kNumberBad;
+ else if (f1 == FloatLimits<T>::kPosInfinity)
+ return FloatLimits<T>::kPosInfinity;
+ else
+ return LogWeightTpl<T>(f1 - f2);
+}
+
+inline LogWeightTpl<float> Divide(const LogWeightTpl<float> &w1,
+ const LogWeightTpl<float> &w2,
+ DivideType typ = DIVIDE_ANY) {
+ return Divide<float>(w1, w2, typ);
+}
+
+inline LogWeightTpl<double> Divide(const LogWeightTpl<double> &w1,
+ const LogWeightTpl<double> &w2,
+ DivideType typ = DIVIDE_ANY) {
+ return Divide<double>(w1, w2, typ);
+}
+
+// MinMax semiring: (min, max, inf, -inf)
+template <class T>
+class MinMaxWeightTpl : public FloatWeightTpl<T> {
+ public:
+ using FloatWeightTpl<T>::Value;
+
+ typedef MinMaxWeightTpl<T> ReverseWeight;
+
+ MinMaxWeightTpl() : FloatWeightTpl<T>() {}
+
+ MinMaxWeightTpl(T f) : FloatWeightTpl<T>(f) {}
+
+ MinMaxWeightTpl(const MinMaxWeightTpl<T> &w) : FloatWeightTpl<T>(w) {}
+
+ static const MinMaxWeightTpl<T> Zero() {
+ return MinMaxWeightTpl<T>(FloatLimits<T>::kPosInfinity);
+ }
+
+ static const MinMaxWeightTpl<T> One() {
+ return MinMaxWeightTpl<T>(FloatLimits<T>::kNegInfinity);
+ }
+
+ static const MinMaxWeightTpl<T> NoWeight() {
+ return MinMaxWeightTpl<T>(FloatLimits<T>::kNumberBad); }
+
+ static const string &Type() {
+ static const string type = "minmax" +
+ FloatWeightTpl<T>::GetPrecisionString();
+ return type;
+ }
+
+ bool Member() const {
+ // Fails for IEEE NaN
+ return Value() == Value();
+ }
+
+ MinMaxWeightTpl<T> Quantize(float delta = kDelta) const {
+ // If one of infinities, or a NaN
+ if (Value() == FloatLimits<T>::kNegInfinity ||
+ Value() == FloatLimits<T>::kPosInfinity ||
+ Value() != Value())
+ return *this;
+ else
+ return MinMaxWeightTpl<T>(floor(Value()/delta + 0.5F) * delta);
+ }
+
+ MinMaxWeightTpl<T> Reverse() const { return *this; }
+
+ static uint64 Properties() {
+ return kLeftSemiring | kRightSemiring | kCommutative | kIdempotent | kPath;
+ }
+};
+
+// Single-precision min-max weight
+typedef MinMaxWeightTpl<float> MinMaxWeight;
+
+// Min
+template <class T>
+inline MinMaxWeightTpl<T> Plus(
+ const MinMaxWeightTpl<T> &w1, const MinMaxWeightTpl<T> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return MinMaxWeightTpl<T>::NoWeight();
+ return w1.Value() < w2.Value() ? w1 : w2;
+}
+
+inline MinMaxWeightTpl<float> Plus(
+ const MinMaxWeightTpl<float> &w1, const MinMaxWeightTpl<float> &w2) {
+ return Plus<float>(w1, w2);
+}
+
+inline MinMaxWeightTpl<double> Plus(
+ const MinMaxWeightTpl<double> &w1, const MinMaxWeightTpl<double> &w2) {
+ return Plus<double>(w1, w2);
+}
+
+// Max
+template <class T>
+inline MinMaxWeightTpl<T> Times(
+ const MinMaxWeightTpl<T> &w1, const MinMaxWeightTpl<T> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return MinMaxWeightTpl<T>::NoWeight();
+ return w1.Value() >= w2.Value() ? w1 : w2;
+}
+
+inline MinMaxWeightTpl<float> Times(
+ const MinMaxWeightTpl<float> &w1, const MinMaxWeightTpl<float> &w2) {
+ return Times<float>(w1, w2);
+}
+
+inline MinMaxWeightTpl<double> Times(
+ const MinMaxWeightTpl<double> &w1, const MinMaxWeightTpl<double> &w2) {
+ return Times<double>(w1, w2);
+}
+
+// Defined only for special cases
+template <class T>
+inline MinMaxWeightTpl<T> Divide(const MinMaxWeightTpl<T> &w1,
+ const MinMaxWeightTpl<T> &w2,
+ DivideType typ = DIVIDE_ANY) {
+ if (!w1.Member() || !w2.Member())
+ return MinMaxWeightTpl<T>::NoWeight();
+ // min(w1, x) = w2, w1 >= w2 => min(w1, x) = w2, x = w2
+ return w1.Value() >= w2.Value() ? w1 : FloatLimits<T>::kNumberBad;
+}
+
+inline MinMaxWeightTpl<float> Divide(const MinMaxWeightTpl<float> &w1,
+ const MinMaxWeightTpl<float> &w2,
+ DivideType typ = DIVIDE_ANY) {
+ return Divide<float>(w1, w2, typ);
+}
+
+inline MinMaxWeightTpl<double> Divide(const MinMaxWeightTpl<double> &w1,
+ const MinMaxWeightTpl<double> &w2,
+ DivideType typ = DIVIDE_ANY) {
+ return Divide<double>(w1, w2, typ);
+}
+
+//
+// WEIGHT CONVERTER SPECIALIZATIONS.
+//
+
+// Convert to tropical
+template <>
+struct WeightConvert<LogWeight, TropicalWeight> {
+ TropicalWeight operator()(LogWeight w) const { return w.Value(); }
+};
+
+template <>
+struct WeightConvert<Log64Weight, TropicalWeight> {
+ TropicalWeight operator()(Log64Weight w) const { return w.Value(); }
+};
+
+// Convert to log
+template <>
+struct WeightConvert<TropicalWeight, LogWeight> {
+ LogWeight operator()(TropicalWeight w) const { return w.Value(); }
+};
+
+template <>
+struct WeightConvert<Log64Weight, LogWeight> {
+ LogWeight operator()(Log64Weight w) const { return w.Value(); }
+};
+
+// Convert to log64
+template <>
+struct WeightConvert<TropicalWeight, Log64Weight> {
+ Log64Weight operator()(TropicalWeight w) const { return w.Value(); }
+};
+
+template <>
+struct WeightConvert<LogWeight, Log64Weight> {
+ Log64Weight operator()(LogWeight w) const { return w.Value(); }
+};
+
+} // namespace fst
+
+#endif // FST_LIB_FLOAT_WEIGHT_H__
diff --git a/src/include/fst/fst-decl.h b/src/include/fst/fst-decl.h
new file mode 100644
index 0000000..0e2cdf1
--- /dev/null
+++ b/src/include/fst/fst-decl.h
@@ -0,0 +1,125 @@
+// fst-decl.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// This file contains declarations of classes in the Fst template library.
+//
+
+#ifndef FST_LIB_FST_DECL_H__
+#define FST_LIB_FST_DECL_H__
+
+#include <fst/types.h>
+
+namespace fst {
+
+class SymbolTable;
+class SymbolTableIterator;
+
+template <class W> class FloatWeightTpl;
+template <class W> class TropicalWeightTpl;
+template <class W> class LogWeightTpl;
+template <class W> class MinMaxWeightTpl;
+
+typedef FloatWeightTpl<float> FloatWeight;
+typedef TropicalWeightTpl<float> TropicalWeight;
+typedef LogWeightTpl<float> LogWeight;
+typedef MinMaxWeightTpl<float> MinMaxWeight;
+
+template <class W> class ArcTpl;
+typedef ArcTpl<TropicalWeight> StdArc;
+typedef ArcTpl<LogWeight> LogArc;
+
+template <class A, class C, class U = uint32> class CompactFst;
+template <class A, class U = uint32> class ConstFst;
+template <class A, class W, class M> class EditFst;
+template <class A> class ExpandedFst;
+template <class A> class Fst;
+template <class A> class MutableFst;
+template <class A> class VectorFst;
+
+template <class A, class C> class ArcSortFst;
+template <class A> class ClosureFst;
+template <class A> class ComposeFst;
+template <class A> class ConcatFst;
+template <class A> class DeterminizeFst;
+template <class A> class DeterminizeFst;
+template <class A> class DifferenceFst;
+template <class A> class IntersectFst;
+template <class A> class InvertFst;
+template <class A, class B, class C> class ArcMapFst;
+template <class A> class ProjectFst;
+template <class A, class B, class S> class RandGenFst;
+template <class A> class RelabelFst;
+template <class A, class T> class ReplaceFst;
+template <class A> class RmEpsilonFst;
+template <class A> class UnionFst;
+
+template <class T, class Compare, bool max> class Heap;
+
+template <class A> class AcceptorCompactor;
+template <class A> class StringCompactor;
+template <class A> class UnweightedAcceptorCompactor;
+template <class A> class UnweightedCompactor;
+template <class A> class WeightedStringCompactor;
+
+template <class A, class P> class DefaultReplaceStateTable;
+
+typedef CompactFst<StdArc, AcceptorCompactor<StdArc> >
+StdCompactAcceptorFst;
+typedef CompactFst< StdArc, StringCompactor<StdArc> >
+StdCompactStringFst;
+typedef CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> >
+StdCompactUnweightedAcceptorFst;
+typedef CompactFst<StdArc, UnweightedCompactor<StdArc> >
+StdCompactUnweightedFst;
+typedef CompactFst< StdArc, WeightedStringCompactor<StdArc> >
+StdCompactWeightedStringFst;
+typedef ConstFst<StdArc> StdConstFst;
+typedef ExpandedFst<StdArc> StdExpandedFst;
+typedef Fst<StdArc> StdFst;
+typedef MutableFst<StdArc> StdMutableFst;
+typedef VectorFst<StdArc> StdVectorFst;
+
+
+template <class C> class StdArcSortFst;
+typedef ClosureFst<StdArc> StdClosureFst;
+typedef ComposeFst<StdArc> StdComposeFst;
+typedef ConcatFst<StdArc> StdConcatFst;
+typedef DeterminizeFst<StdArc> StdDeterminizeFst;
+typedef DifferenceFst<StdArc> StdDifferenceFst;
+typedef IntersectFst<StdArc> StdIntersectFst;
+typedef InvertFst<StdArc> StdInvertFst;
+typedef ProjectFst<StdArc> StdProjectFst;
+typedef RelabelFst<StdArc> StdRelabelFst;
+typedef ReplaceFst<StdArc, DefaultReplaceStateTable<StdArc, ssize_t> >
+StdReplaceFst;
+typedef RmEpsilonFst<StdArc> StdRmEpsilonFst;
+typedef UnionFst<StdArc> StdUnionFst;
+
+template <typename T> class IntegerFilterState;
+typedef IntegerFilterState<signed char> CharFilterState;
+typedef IntegerFilterState<short> ShortFilterState;
+typedef IntegerFilterState<int> IntFilterState;
+
+template <class F> class Matcher;
+template <class M1, class M2 = M1> class SequenceComposeFilter;
+template <class M1, class M2 = M1> class AltSequenceComposeFilter;
+template <class M1, class M2 = M1> class MatchComposeFilter;
+
+} // namespace fst
+
+#endif // FST_LIB_FST_DECL_H__
diff --git a/src/include/fst/fst.h b/src/include/fst/fst.h
new file mode 100644
index 0000000..9c4d0db
--- /dev/null
+++ b/src/include/fst/fst.h
@@ -0,0 +1,942 @@
+// fst.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Finite-State Transducer (FST) - abstract base class definition,
+// state and arc iterator interface, and suggested base implementation.
+//
+
+#ifndef FST_LIB_FST_H__
+#define FST_LIB_FST_H__
+
+#include <stddef.h>
+#include <sys/types.h>
+#include <cmath>
+#include <string>
+
+#include <fst/compat.h>
+#include <fst/types.h>
+
+#include <fst/arc.h>
+#include <fst/properties.h>
+#include <fst/register.h>
+#include <iostream>
+#include <fstream>
+#include <fst/symbol-table.h>
+#include <fst/util.h>
+
+
+DECLARE_bool(fst_align);
+
+namespace fst {
+
+bool IsFstHeader(istream &, const string &);
+
+class FstHeader;
+template <class A> class StateIteratorData;
+template <class A> class ArcIteratorData;
+template <class A> class MatcherBase;
+
+struct FstReadOptions {
+ string source; // Where you're reading from
+ const FstHeader *header; // Pointer to Fst header. If non-zero, use
+ // this info (don't read a stream header)
+ const SymbolTable* isymbols; // Pointer to input symbols. If non-zero, use
+ // this info (read and skip stream isymbols)
+ const SymbolTable* osymbols; // Pointer to output symbols. If non-zero, use
+ // this info (read and skip stream osymbols)
+
+ explicit FstReadOptions(const string& src = "<unspecfied>",
+ const FstHeader *hdr = 0,
+ const SymbolTable* isym = 0,
+ const SymbolTable* osym = 0)
+ : source(src), header(hdr), isymbols(isym), osymbols(osym) {}
+
+ explicit FstReadOptions(const string& src,
+ const SymbolTable* isym,
+ const SymbolTable* osym = 0)
+ : source(src), header(0), isymbols(isym), osymbols(osym) {}
+};
+
+
+struct FstWriteOptions {
+ string source; // Where you're writing to
+ bool write_header; // Write the header?
+ bool write_isymbols; // Write input symbols?
+ bool write_osymbols; // Write output symbols?
+ bool align; // Write data aligned where appropriate;
+ // this may fail on pipes
+
+ explicit FstWriteOptions(const string& src = "<unspecifed>",
+ bool hdr = true, bool isym = true,
+ bool osym = true, bool alig = FLAGS_fst_align)
+ : source(src), write_header(hdr),
+ write_isymbols(isym), write_osymbols(osym), align(alig) {}
+};
+
+//
+// Fst HEADER CLASS
+//
+// This is the recommended Fst file header representation.
+//
+class FstHeader {
+ public:
+ enum {
+ HAS_ISYMBOLS = 0x1, // Has input symbol table
+ HAS_OSYMBOLS = 0x2, // Has output symbol table
+ IS_ALIGNED = 0x4, // Memory-aligned (where appropriate)
+ } Flags;
+
+ FstHeader() : version_(0), flags_(0), properties_(0), start_(-1),
+ numstates_(0), numarcs_(0) {}
+ const string &FstType() const { return fsttype_; }
+ const string &ArcType() const { return arctype_; }
+ int32 Version() const { return version_; }
+ int32 GetFlags() const { return flags_; }
+ uint64 Properties() const { return properties_; }
+ int64 Start() const { return start_; }
+ int64 NumStates() const { return numstates_; }
+ int64 NumArcs() const { return numarcs_; }
+
+ void SetFstType(const string& type) { fsttype_ = type; }
+ void SetArcType(const string& type) { arctype_ = type; }
+ void SetVersion(int32 version) { version_ = version; }
+ void SetFlags(int32 flags) { flags_ = flags; }
+ void SetProperties(uint64 properties) { properties_ = properties; }
+ void SetStart(int64 start) { start_ = start; }
+ void SetNumStates(int64 numstates) { numstates_ = numstates; }
+ void SetNumArcs(int64 numarcs) { numarcs_ = numarcs; }
+
+ bool Read(istream &strm, const string &source, bool rewind = false);
+ bool Write(ostream &strm, const string &source) const;
+
+ private:
+
+ string fsttype_; // E.g. "vector"
+ string arctype_; // E.g. "standard"
+ int32 version_; // Type version #
+ int32 flags_; // File format bits
+ uint64 properties_; // FST property bits
+ int64 start_; // Start state
+ int64 numstates_; // # of states
+ int64 numarcs_; // # of arcs
+};
+
+
+// Specifies matcher action.
+enum MatchType { MATCH_INPUT, // Match input label.
+ MATCH_OUTPUT, // Match output label.
+ MATCH_BOTH, // Match input or output label.
+ MATCH_NONE, // Match nothing.
+ MATCH_UNKNOWN }; // Match type unknown.
+
+//
+// Fst INTERFACE CLASS DEFINITION
+//
+
+// A generic FST, templated on the arc definition, with
+// common-demoninator methods (use StateIterator and ArcIterator to
+// iterate over its states and arcs).
+template <class A>
+class Fst {
+ public:
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ virtual ~Fst() {}
+
+ virtual StateId Start() const = 0; // Initial state
+
+ virtual Weight Final(StateId) const = 0; // State's final weight
+
+ virtual size_t NumArcs(StateId) const = 0; // State's arc count
+
+ virtual size_t NumInputEpsilons(StateId)
+ const = 0; // State's input epsilon count
+
+ virtual size_t NumOutputEpsilons(StateId)
+ const = 0; // State's output epsilon count
+
+ // If test=false, return stored properties bits for mask (some poss. unknown)
+ // If test=true, return property bits for mask (computing o.w. unknown)
+ virtual uint64 Properties(uint64 mask, bool test)
+ const = 0; // Property bits
+
+ virtual const string& Type() const = 0; // Fst type name
+
+ // Get a copy of this Fst. The copying behaves as follows:
+ //
+ // (1) The copying is constant time if safe = false or if safe = true
+ // and is on an otherwise unaccessed Fst.
+ //
+ // (2) If safe = true, the copy is thread-safe in that the original
+ // and copy can be safely accessed (but not necessarily mutated) by
+ // separate threads. For some Fst types, 'Copy(true)' should only be
+ // called on an Fst that has not otherwise been accessed. Its behavior
+ // is undefined otherwise.
+ //
+ // (3) If a MutableFst is copied and then mutated, then the original is
+ // unmodified and vice versa (often by a copy-on-write on the initial
+ // mutation, which may not be constant time).
+ virtual Fst<A> *Copy(bool safe = false) const = 0;
+
+ // Read an Fst from an input stream; returns NULL on error
+ static Fst<A> *Read(istream &strm, const FstReadOptions &opts) {
+ FstReadOptions ropts(opts);
+ FstHeader hdr;
+ if (ropts.header)
+ hdr = *opts.header;
+ else {
+ if (!hdr.Read(strm, opts.source))
+ return 0;
+ ropts.header = &hdr;
+ }
+ FstRegister<A> *registr = FstRegister<A>::GetRegister();
+ const typename FstRegister<A>::Reader reader =
+ registr->GetReader(hdr.FstType());
+ if (!reader) {
+ LOG(ERROR) << "Fst::Read: Unknown FST type \"" << hdr.FstType()
+ << "\" (arc type = \"" << A::Type()
+ << "\"): " << ropts.source;
+ return 0;
+ }
+ return reader(strm, ropts);
+ };
+
+ // Read an Fst from a file; return NULL on error
+ // Empty filename reads from standard input
+ static Fst<A> *Read(const string &filename) {
+ if (!filename.empty()) {
+ ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
+ if (!strm) {
+ LOG(ERROR) << "Fst::Read: Can't open file: " << filename;
+ return 0;
+ }
+ return Read(strm, FstReadOptions(filename));
+ } else {
+ return Read(std::cin, FstReadOptions("standard input"));
+ }
+ }
+
+ // Write an Fst to an output stream; return false on error
+ virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ LOG(ERROR) << "Fst::Write: No write stream method for " << Type()
+ << " Fst type";
+ return false;
+ }
+
+ // Write an Fst to a file; return false on error
+ // Empty filename writes to standard output
+ virtual bool Write(const string &filename) const {
+ LOG(ERROR) << "Fst::Write: No write filename method for " << Type()
+ << " Fst type";
+ return false;
+ }
+
+ // Return input label symbol table; return NULL if not specified
+ virtual const SymbolTable* InputSymbols() const = 0;
+
+ // Return output label symbol table; return NULL if not specified
+ virtual const SymbolTable* OutputSymbols() const = 0;
+
+ // For generic state iterator construction; not normally called
+ // directly by users.
+ virtual void InitStateIterator(StateIteratorData<A> *) const = 0;
+
+ // For generic arc iterator construction; not normally called
+ // directly by users.
+ virtual void InitArcIterator(StateId s, ArcIteratorData<A> *) const = 0;
+
+ // For generic matcher construction; not normally called
+ // directly by users.
+ virtual MatcherBase<A> *InitMatcher(MatchType match_type) const;
+
+ protected:
+
+ bool WriteFile(const string &filename) const {
+ if (!filename.empty()) {
+ ofstream strm(filename.c_str(), ofstream::out | ofstream::binary);
+ if (!strm) {
+ LOG(ERROR) << "Fst::Write: Can't open file: " << filename;
+ return false;
+ }
+ return Write(strm, FstWriteOptions(filename));
+ } else {
+ return Write(std::cout, FstWriteOptions("standard output"));
+ }
+ }
+};
+
+
+//
+// STATE and ARC ITERATOR DEFINITIONS
+//
+
+// State iterator interface templated on the Arc definition; used
+// for StateIterator specializations returned by the InitStateIterator
+// Fst method.
+template <class A>
+class StateIteratorBase {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+
+ virtual ~StateIteratorBase() {}
+
+ bool Done() const { return Done_(); } // End of iterator?
+ StateId Value() const { return Value_(); } // Current state (when !Done)
+ void Next() { Next_(); } // Advance to next state (when !Done)
+ void Reset() { Reset_(); } // Return to initial condition
+
+ private:
+ // This allows base class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual bool Done_() const = 0;
+ virtual StateId Value_() const = 0;
+ virtual void Next_() = 0;
+ virtual void Reset_() = 0;
+};
+
+
+// StateIterator initialization data
+
+template <class A> struct StateIteratorData {
+ StateIteratorBase<A> *base; // Specialized iterator if non-zero
+ typename A::StateId nstates; // O.w. total # of states
+};
+
+
+// Generic state iterator, templated on the FST definition
+// - a wrapper around pointer to specific one.
+// Here is a typical use: \code
+// for (StateIterator<StdFst> siter(fst);
+// !siter.Done();
+// siter.Next()) {
+// StateId s = siter.Value();
+// ...
+// } \endcode
+template <class F>
+class StateIterator {
+ public:
+ typedef F FST;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+
+ explicit StateIterator(const F &fst) : s_(0) {
+ fst.InitStateIterator(&data_);
+ }
+
+ ~StateIterator() { if (data_.base) delete data_.base; }
+
+ bool Done() const {
+ return data_.base ? data_.base->Done() : s_ >= data_.nstates;
+ }
+
+ StateId Value() const { return data_.base ? data_.base->Value() : s_; }
+
+ void Next() {
+ if (data_.base)
+ data_.base->Next();
+ else
+ ++s_;
+ }
+
+ void Reset() {
+ if (data_.base)
+ data_.base->Reset();
+ else
+ s_ = 0;
+ }
+
+ private:
+ StateIteratorData<Arc> data_;
+ StateId s_;
+
+ DISALLOW_COPY_AND_ASSIGN(StateIterator);
+};
+
+
+// Flags to control the behavior on an arc iterator:
+static const uint32 kArcILabelValue = 0x0001; // Value() gives valid ilabel
+static const uint32 kArcOLabelValue = 0x0002; // " " " olabel
+static const uint32 kArcWeightValue = 0x0004; // " " " weight
+static const uint32 kArcNextStateValue = 0x0008; // " " " nextstate
+static const uint32 kArcNoCache = 0x0010; // No need to cache arcs
+
+static const uint32 kArcValueFlags =
+ kArcILabelValue | kArcOLabelValue |
+ kArcWeightValue | kArcNextStateValue;
+
+static const uint32 kArcFlags = kArcValueFlags | kArcNoCache;
+
+
+// Arc iterator interface, templated on the Arc definition; used
+// for Arc iterator specializations that are returned by the InitArcIterator
+// Fst method.
+template <class A>
+class ArcIteratorBase {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+
+ virtual ~ArcIteratorBase() {}
+
+ bool Done() const { return Done_(); } // End of iterator?
+ const A& Value() const { return Value_(); } // Current arc (when !Done)
+ void Next() { Next_(); } // Advance to next arc (when !Done)
+ size_t Position() const { return Position_(); } // Return current position
+ void Reset() { Reset_(); } // Return to initial condition
+ void Seek(size_t a) { Seek_(a); } // Random arc access by position
+ uint32 Flags() const { return Flags_(); } // Return current behavorial flags
+ void SetFlags(uint32 flags, uint32 mask) { // Set behavorial flags
+ SetFlags_(flags, mask);
+ }
+
+ private:
+ // This allows base class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual bool Done_() const = 0;
+ virtual const A& Value_() const = 0;
+ virtual void Next_() = 0;
+ virtual size_t Position_() const = 0;
+ virtual void Reset_() = 0;
+ virtual void Seek_(size_t a) = 0;
+ virtual uint32 Flags_() const = 0;
+ virtual void SetFlags_(uint32 flags, uint32 mask) = 0;
+};
+
+
+// ArcIterator initialization data
+template <class A> struct ArcIteratorData {
+ ArcIteratorBase<A> *base; // Specialized iterator if non-zero
+ const A *arcs; // O.w. arcs pointer
+ size_t narcs; // ... and arc count
+ int *ref_count; // ... and reference count if non-zero
+};
+
+
+// Generic arc iterator, templated on the FST definition
+// - a wrapper around pointer to specific one.
+// Here is a typical use: \code
+// for (ArcIterator<StdFst> aiter(fst, s));
+// !aiter.Done();
+// aiter.Next()) {
+// StdArc &arc = aiter.Value();
+// ...
+// } \endcode
+template <class F>
+class ArcIterator {
+ public:
+ typedef F FST;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+
+ ArcIterator(const F &fst, StateId s) : i_(0) {
+ fst.InitArcIterator(s, &data_);
+ }
+
+ explicit ArcIterator(const ArcIteratorData<Arc> &data) : data_(data), i_(0) {
+ if (data_.ref_count)
+ ++(*data_.ref_count);
+ }
+
+ ~ArcIterator() {
+ if (data_.base)
+ delete data_.base;
+ else if (data_.ref_count)
+ --(*data_.ref_count);
+ }
+
+ bool Done() const {
+ return data_.base ? data_.base->Done() : i_ >= data_.narcs;
+ }
+
+ const Arc& Value() const {
+ return data_.base ? data_.base->Value() : data_.arcs[i_];
+ }
+
+ void Next() {
+ if (data_.base)
+ data_.base->Next();
+ else
+ ++i_;
+ }
+
+ void Reset() {
+ if (data_.base)
+ data_.base->Reset();
+ else
+ i_ = 0;
+ }
+
+ void Seek(size_t a) {
+ if (data_.base)
+ data_.base->Seek(a);
+ else
+ i_ = a;
+ }
+
+ size_t Position() const {
+ return data_.base ? data_.base->Position() : i_;
+ }
+
+ uint32 Flags() const {
+ if (data_.base)
+ return data_.base->Flags();
+ else
+ return kArcValueFlags;
+ }
+
+ void SetFlags(uint32 flags, uint32 mask) {
+ if (data_.base)
+ data_.base->SetFlags(flags, mask);
+ }
+
+ private:
+ ArcIteratorData<Arc> data_;
+ size_t i_;
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+//
+// MATCHER DEFINITIONS
+//
+
+template <class A>
+MatcherBase<A> *Fst<A>::InitMatcher(MatchType match_type) const {
+ return 0; // Use the default matcher
+}
+
+
+//
+// FST ACCESSORS - Useful functions in high-performance cases.
+//
+
+namespace internal {
+
+// General case - requires non-abstract, 'final' methods. Use for inlining.
+template <class F> inline
+typename F::Arc::Weight Final(const F &fst, typename F::Arc::StateId s) {
+ return fst.F::Final(s);
+}
+
+template <class F> inline
+ssize_t NumArcs(const F &fst, typename F::Arc::StateId s) {
+ return fst.F::NumArcs(s);
+}
+
+template <class F> inline
+ssize_t NumInputEpsilons(const F &fst, typename F::Arc::StateId s) {
+ return fst.F::NumInputEpsilons(s);
+}
+
+template <class F> inline
+ssize_t NumOutputEpsilons(const F &fst, typename F::Arc::StateId s) {
+ return fst.F::NumOutputEpsilons(s);
+}
+
+
+// Fst<A> case - abstract methods.
+template <class A> inline
+typename A::Weight Final(const Fst<A> &fst, typename A::StateId s) {
+ return fst.Final(s);
+}
+
+template <class A> inline
+ssize_t NumArcs(const Fst<A> &fst, typename A::StateId s) {
+ return fst.NumArcs(s);
+}
+
+template <class A> inline
+ssize_t NumInputEpsilons(const Fst<A> &fst, typename A::StateId s) {
+ return fst.NumInputEpsilons(s);
+}
+
+template <class A> inline
+ssize_t NumOutputEpsilons(const Fst<A> &fst, typename A::StateId s) {
+ return fst.NumOutputEpsilons(s);
+}
+
+} // namespace internal
+
+// A useful alias when using StdArc.
+typedef Fst<StdArc> StdFst;
+
+
+//
+// CONSTANT DEFINITIONS
+//
+
+const int kNoStateId = -1; // Not a valid state ID
+const int kNoLabel = -1; // Not a valid label
+
+//
+// Fst IMPLEMENTATION BASE
+//
+// This is the recommended Fst implementation base class. It will
+// handle reference counts, property bits, type information and symbols.
+//
+
+template <class A> class FstImpl {
+ public:
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ FstImpl()
+ : properties_(0), type_("null"), isymbols_(0), osymbols_(0) {}
+
+ FstImpl(const FstImpl<A> &impl)
+ : properties_(impl.properties_), type_(impl.type_),
+ isymbols_(impl.isymbols_ ? impl.isymbols_->Copy() : 0),
+ osymbols_(impl.osymbols_ ? impl.osymbols_->Copy() : 0) {}
+
+ virtual ~FstImpl() {
+ delete isymbols_;
+ delete osymbols_;
+ }
+
+ const string& Type() const { return type_; }
+
+ void SetType(const string &type) { type_ = type; }
+
+ virtual uint64 Properties() const { return properties_; }
+
+ virtual uint64 Properties(uint64 mask) const { return properties_ & mask; }
+
+ void SetProperties(uint64 props) {
+ properties_ &= kError; // kError can't be cleared
+ properties_ |= props;
+ }
+
+ void SetProperties(uint64 props, uint64 mask) {
+ properties_ &= ~mask | kError; // kError can't be cleared
+ properties_ |= props & mask;
+ }
+
+ // Allows (only) setting error bit on const FST impls
+ void SetProperties(uint64 props, uint64 mask) const {
+ if (mask != kError)
+ FSTERROR() << "FstImpl::SetProperties() const: can only set kError";
+ properties_ |= kError;
+ }
+
+ const SymbolTable* InputSymbols() const { return isymbols_; }
+
+ const SymbolTable* OutputSymbols() const { return osymbols_; }
+
+ SymbolTable* InputSymbols() { return isymbols_; }
+
+ SymbolTable* OutputSymbols() { return osymbols_; }
+
+ void SetInputSymbols(const SymbolTable* isyms) {
+ if (isymbols_) delete isymbols_;
+ isymbols_ = isyms ? isyms->Copy() : 0;
+ }
+
+ void SetOutputSymbols(const SymbolTable* osyms) {
+ if (osymbols_) delete osymbols_;
+ osymbols_ = osyms ? osyms->Copy() : 0;
+ }
+
+ int RefCount() const {
+ return ref_count_.count();
+ }
+
+ int IncrRefCount() {
+ return ref_count_.Incr();
+ }
+
+ int DecrRefCount() {
+ return ref_count_.Decr();
+ }
+
+ // Read-in header and symbols from input stream, initialize Fst, and
+ // return the header. If opts.header is non-null, skip read-in and
+ // use the option value. If opts.[io]symbols is non-null, read-in
+ // (if present), but use the option value.
+ bool ReadHeader(istream &strm, const FstReadOptions& opts,
+ int min_version, FstHeader *hdr);
+
+ // Write-out header and symbols from output stream.
+ // If a opts.header is false, skip writing header.
+ // If opts.[io]symbols is false, skip writing those symbols.
+ // This method is needed for Impl's that implement Write methods.
+ void WriteHeader(ostream &strm, const FstWriteOptions& opts,
+ int version, FstHeader *hdr) const {
+ if (opts.write_header) {
+ hdr->SetFstType(type_);
+ hdr->SetArcType(A::Type());
+ hdr->SetVersion(version);
+ hdr->SetProperties(properties_);
+ int32 file_flags = 0;
+ if (isymbols_ && opts.write_isymbols)
+ file_flags |= FstHeader::HAS_ISYMBOLS;
+ if (osymbols_ && opts.write_osymbols)
+ file_flags |= FstHeader::HAS_OSYMBOLS;
+ if (opts.align)
+ file_flags |= FstHeader::IS_ALIGNED;
+ hdr->SetFlags(file_flags);
+ hdr->Write(strm, opts.source);
+ }
+ if (isymbols_ && opts.write_isymbols) isymbols_->Write(strm);
+ if (osymbols_ && opts.write_osymbols) osymbols_->Write(strm);
+ }
+
+ // Write-out header and symbols to output stream.
+ // If a opts.header is false, skip writing header.
+ // If opts.[io]symbols is false, skip writing those symbols.
+ // type is the Fst type being written.
+ // This method is used in the cross-type serialization methods Fst::WriteFst.
+ static void WriteFstHeader(const Fst<A> &fst, ostream &strm,
+ const FstWriteOptions& opts, int version,
+ const string &type, FstHeader *hdr) {
+ if (opts.write_header) {
+ hdr->SetFstType(type);
+ hdr->SetArcType(A::Type());
+ hdr->SetVersion(version);
+ hdr->SetProperties(fst.Properties(kFstProperties, false));
+ int32 file_flags = 0;
+ if (fst.InputSymbols() && opts.write_isymbols)
+ file_flags |= FstHeader::HAS_ISYMBOLS;
+ if (fst.OutputSymbols() && opts.write_osymbols)
+ file_flags |= FstHeader::HAS_OSYMBOLS;
+ if (opts.align)
+ file_flags |= FstHeader::IS_ALIGNED;
+ hdr->SetFlags(file_flags);
+ hdr->Write(strm, opts.source);
+ }
+ if (fst.InputSymbols() && opts.write_isymbols) {
+ fst.InputSymbols()->Write(strm);
+ }
+ if (fst.OutputSymbols() && opts.write_osymbols) {
+ fst.OutputSymbols()->Write(strm);
+ }
+ }
+
+ // In serialization routines where the header cannot be written until after
+ // the machine has been serialized, this routine can be called to seek to
+ // the beginning of the file an rewrite the header with updated fields.
+ // It repositions the file pointer back at the end of the file.
+ // returns true on success, false on failure.
+ static bool UpdateFstHeader(const Fst<A> &fst, ostream &strm,
+ const FstWriteOptions& opts, int version,
+ const string &type, FstHeader *hdr,
+ size_t header_offset) {
+ strm.seekp(header_offset);
+ if (!strm) {
+ LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
+ return false;
+ }
+ WriteFstHeader(fst, strm, opts, version, type, hdr);
+ if (!strm) {
+ LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
+ return false;
+ }
+ strm.seekp(0, ios_base::end);
+ if (!strm) {
+ LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
+ return false;
+ }
+ return true;
+ }
+
+ protected:
+ mutable uint64 properties_; // Property bits
+
+ private:
+ string type_; // Unique name of Fst class
+ SymbolTable *isymbols_; // Ilabel symbol table
+ SymbolTable *osymbols_; // Olabel symbol table
+ RefCounter ref_count_; // Reference count
+
+ void operator=(const FstImpl<A> &impl); // disallow
+};
+
+template <class A> inline
+bool FstImpl<A>::ReadHeader(istream &strm, const FstReadOptions& opts,
+ int min_version, FstHeader *hdr) {
+ if (opts.header)
+ *hdr = *opts.header;
+ else if (!hdr->Read(strm, opts.source))
+ return false;
+
+ if (FLAGS_v >= 2) {
+ LOG(INFO) << "FstImpl::ReadHeader: source: " << opts.source
+ << ", fst_type: " << hdr->FstType()
+ << ", arc_type: " << A::Type()
+ << ", version: " << hdr->Version()
+ << ", flags: " << hdr->GetFlags();
+ }
+
+ if (hdr->FstType() != type_) {
+ LOG(ERROR) << "FstImpl::ReadHeader: Fst not of type \"" << type_
+ << "\": " << opts.source;
+ return false;
+ }
+ if (hdr->ArcType() != A::Type()) {
+ LOG(ERROR) << "FstImpl::ReadHeader: Arc not of type \"" << A::Type()
+ << "\": " << opts.source;
+ return false;
+ }
+ if (hdr->Version() < min_version) {
+ LOG(ERROR) << "FstImpl::ReadHeader: Obsolete " << type_
+ << " Fst version: " << opts.source;
+ return false;
+ }
+ properties_ = hdr->Properties();
+ if (hdr->GetFlags() & FstHeader::HAS_ISYMBOLS)
+ isymbols_ = SymbolTable::Read(strm, opts.source);
+ if (hdr->GetFlags() & FstHeader::HAS_OSYMBOLS)
+ osymbols_ =SymbolTable::Read(strm, opts.source);
+
+ if (opts.isymbols) {
+ delete isymbols_;
+ isymbols_ = opts.isymbols->Copy();
+ }
+ if (opts.osymbols) {
+ delete osymbols_;
+ osymbols_ = opts.osymbols->Copy();
+ }
+ return true;
+}
+
+
+template<class Arc>
+uint64 TestProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known);
+
+
+// This is a helper class template useful for attaching an Fst interface to
+// its implementation, handling reference counting.
+template < class I, class F = Fst<typename I::Arc> >
+class ImplToFst : public F {
+ public:
+ typedef typename I::Arc Arc;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ virtual ~ImplToFst() { if (!impl_->DecrRefCount()) delete impl_; }
+
+ virtual StateId Start() const { return impl_->Start(); }
+
+ virtual Weight Final(StateId s) const { return impl_->Final(s); }
+
+ virtual size_t NumArcs(StateId s) const { return impl_->NumArcs(s); }
+
+ virtual size_t NumInputEpsilons(StateId s) const {
+ return impl_->NumInputEpsilons(s);
+ }
+
+ virtual size_t NumOutputEpsilons(StateId s) const {
+ return impl_->NumOutputEpsilons(s);
+ }
+
+ virtual uint64 Properties(uint64 mask, bool test) const {
+ if (test) {
+ uint64 knownprops, testprops = TestProperties(*this, mask, &knownprops);
+ impl_->SetProperties(testprops, knownprops);
+ return testprops & mask;
+ } else {
+ return impl_->Properties(mask);
+ }
+ }
+
+ virtual const string& Type() const { return impl_->Type(); }
+
+ virtual const SymbolTable* InputSymbols() const {
+ return impl_->InputSymbols();
+ }
+
+ virtual const SymbolTable* OutputSymbols() const {
+ return impl_->OutputSymbols();
+ }
+
+ protected:
+ ImplToFst() : impl_(0) {}
+
+ ImplToFst(I *impl) : impl_(impl) {}
+
+ ImplToFst(const ImplToFst<I, F> &fst) {
+ impl_ = fst.impl_;
+ impl_->IncrRefCount();
+ }
+
+ // This constructor presumes there is a copy constructor for the
+ // implementation.
+ ImplToFst(const ImplToFst<I, F> &fst, bool safe) {
+ if (safe) {
+ impl_ = new I(*(fst.impl_));
+ } else {
+ impl_ = fst.impl_;
+ impl_->IncrRefCount();
+ }
+ }
+
+ I *GetImpl() const { return impl_; }
+
+ // Change Fst implementation pointer. If 'own_impl' is true,
+ // ownership of the input implementation is given to this
+ // object; otherwise, the input implementation's reference count
+ // should be incremented.
+ void SetImpl(I *impl, bool own_impl = true) {
+ if (!own_impl)
+ impl->IncrRefCount();
+ if (impl_ && !impl_->DecrRefCount()) delete impl_;
+ impl_ = impl;
+ }
+
+ private:
+ // Disallow
+ ImplToFst<I, F> &operator=(const ImplToFst<I, F> &fst);
+
+ ImplToFst<I, F> &operator=(const Fst<Arc> &fst) {
+ FSTERROR() << "ImplToFst: Assignment operator disallowed";
+ GetImpl()->SetProperties(kError, kError);
+ return *this;
+ }
+
+ I *impl_;
+};
+
+
+// Converts FSTs by casting their implementations, where this makes
+// sense (which excludes implementations with weight-dependent virtual
+// methods). Must be a friend of the Fst classes involved (currently
+// the concrete Fsts: VectorFst, ConstFst, CompactFst).
+template<class F, class G> void Cast(const F &ifst, G *ofst) {
+ ofst->SetImpl(reinterpret_cast<typename G::Impl *>(ifst.GetImpl()), false);
+}
+
+// Fst Serialization
+template <class A>
+void FstToString(const Fst<A> &fst, string *result) {
+ ostringstream ostrm;
+ fst.Write(ostrm, FstWriteOptions("FstToString"));
+ *result = ostrm.str();
+}
+
+template <class A>
+Fst<A> *StringToFst(const string &s) {
+ istringstream istrm(s);
+ return Fst<A>::Read(istrm, FstReadOptions("StringToFst"));
+}
+
+} // namespace fst
+
+#endif // FST_LIB_FST_H__
diff --git a/src/include/fst/fstlib.h b/src/include/fst/fstlib.h
new file mode 100644
index 0000000..c05c775
--- /dev/null
+++ b/src/include/fst/fstlib.h
@@ -0,0 +1,151 @@
+// fstlib.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \page FstLib FST - Weighted Finite State Transducers
+// This is a library for constructing, combining, optimizing, and
+// searching "weighted finite-state transducers" (FSTs). Weighted
+// finite-state transducers are automata where each transition has an
+// input label, an output label, and a weight. The more familiar
+// finite-state acceptor is represented as a transducer with each
+// transition's input and output the same. Finite-state acceptors
+// are used to represent sets of strings (specifically, "regular" or
+// "rational sets"); finite-state transducers are used to represent
+// binary relations between pairs of strings (specifically, "rational
+// transductions"). The weights can be used to represent the cost of
+// taking a particular transition.
+//
+// In this library, the transducers are templated on the Arc
+// (transition) definition, which allows changing the label, weight,
+// and state ID sets. Labels and state IDs are restricted to signed
+// integral types but the weight can be an arbitrary type whose
+// members satisfy certain algebraic ("semiring") properties.
+//
+// For more information, see the FST Library Wiki page:
+// http://wiki.corp.google.com/twiki/bin/view/Main/FstLibrary
+
+// \file
+// This convenience file includes all other FST inl.h files.
+//
+
+#ifndef FST_LIB_FSTLIB_H__
+#define FST_LIB_FSTLIB_H__
+
+
+// Abstract FST classes
+#include <fst/fst.h>
+#include <fst/expanded-fst.h>
+#include <fst/mutable-fst.h>
+
+// Concrete FST classes
+#include <fst/compact-fst.h>
+#include <fst/const-fst.h>
+#include <fst/edit-fst.h>
+#include <fst/vector-fst.h>
+
+// FST algorithms and delayed FST classes
+#include <fst/arcsort.h>
+#include <fst/arc-map.h>
+#include <fst/closure.h>
+#include <fst/compose.h>
+#include <fst/concat.h>
+#include <fst/connect.h>
+#include <fst/determinize.h>
+#include <fst/difference.h>
+#include <fst/encode.h>
+#include <fst/epsnormalize.h>
+#include <fst/equal.h>
+#include <fst/equivalent.h>
+#include <fst/factor-weight.h>
+#include <fst/intersect.h>
+#include <fst/invert.h>
+#include <fst/map.h>
+#include <fst/minimize.h>
+#include <fst/project.h>
+#include <fst/prune.h>
+#include <fst/push.h>
+#include <fst/randequivalent.h>
+#include <fst/randgen.h>
+#include <fst/rational.h>
+#include <fst/relabel.h>
+#include <fst/replace.h>
+#include <fst/replace-util.h>
+#include <fst/reverse.h>
+#include <fst/reweight.h>
+#include <fst/rmepsilon.h>
+#include <fst/rmfinalepsilon.h>
+#include <fst/shortest-distance.h>
+#include <fst/shortest-path.h>
+#include <fst/statesort.h>
+#include <fst/state-map.h>
+#include <fst/synchronize.h>
+#include <fst/topsort.h>
+#include <fst/union.h>
+#include <fst/verify.h>
+#include <fst/visit.h>
+
+// Weights
+#include <fst/weight.h>
+#include <fst/expectation-weight.h>
+#include <fst/float-weight.h>
+#include <fst/lexicographic-weight.h>
+#include <fst/pair-weight.h>
+#include <fst/power-weight.h>
+#include <fst/product-weight.h>
+#include <fst/random-weight.h>
+#include <fst/signed-log-weight.h>
+#include <fst/sparse-power-weight.h>
+#include <fst/sparse-tuple-weight.h>
+#include <fst/string-weight.h>
+#include <fst/tuple-weight.h>
+
+// Auxiliary classes for composition
+#include <fst/compose-filter.h>
+#include <fst/lookahead-filter.h>
+#include <fst/lookahead-matcher.h>
+#include <fst/matcher-fst.h>
+#include <fst/matcher.h>
+#include <fst/state-table.h>
+
+// Data structures
+#include <fst/heap.h>
+#include <fst/interval-set.h>
+#include <fst/queue.h>
+#include <fst/union-find.h>
+
+// Miscellaneous
+#include <fst/accumulator.h>
+#include <fst/add-on.h>
+#include <fst/arc.h>
+#include <fst/arcfilter.h>
+#include <fst/cache.h>
+#include <fst/complement.h>
+#include <fst/dfs-visit.h>
+#include <fst/generic-register.h>
+#include <fst/label-reachable.h>
+#include <fst/partition.h>
+#include <fst/properties.h>
+#include <fst/register.h>
+#include <fst/state-reachable.h>
+#include <iostream>
+#include <fstream>
+#include <fst/symbol-table.h>
+#include <fst/symbol-table-ops.h>
+#include <fst/test-properties.h>
+#include <fst/util.h>
+
+
+#endif // FST_LIB_FSTLIB_H__
diff --git a/src/include/fst/generic-register.h b/src/include/fst/generic-register.h
new file mode 100644
index 0000000..4f8b512
--- /dev/null
+++ b/src/include/fst/generic-register.h
@@ -0,0 +1,159 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_LIB_GENERIC_REGISTER_H_
+#define FST_LIB_GENERIC_REGISTER_H_
+
+#include <map>
+#include <string>
+
+#include <fst/compat.h>
+#include <fst/types.h>
+
+// Generic class representing a globally-stored correspondence between
+// objects of KeyType and EntryType.
+// KeyType must:
+// a) be such as can be stored as a key in a map<>
+// b) be concatenable with a const char* with the + operator
+// (or you must subclass and redefine LoadEntryFromSharedObject)
+// EntryType must be default constructible.
+//
+// The third template parameter should be the type of a subclass of this class
+// (think CRTP). This is to allow GetRegister() to instantiate and return
+// an object of the appropriate type.
+
+namespace fst {
+
+template<class KeyType, class EntryType, class RegisterType>
+class GenericRegister {
+ public:
+ typedef KeyType Key;
+ typedef EntryType Entry;
+
+ static RegisterType *GetRegister() {
+ FstOnceInit(&register_init_,
+ &RegisterType::Init);
+
+ return register_;
+ }
+
+ void SetEntry(const KeyType &key,
+ const EntryType &entry) {
+ MutexLock l(register_lock_);
+
+ register_table_.insert(make_pair(key, entry));
+ }
+
+ EntryType GetEntry(const KeyType &key) const {
+ const EntryType *entry = LookupEntry(key);
+ if (entry) {
+ return *entry;
+ } else {
+ return LoadEntryFromSharedObject(key);
+ }
+ }
+
+ virtual ~GenericRegister() { }
+
+ protected:
+ // Override this if you want to be able to load missing definitions from
+ // shared object files.
+ virtual EntryType LoadEntryFromSharedObject(const KeyType &key) const {
+ string so_filename = ConvertKeyToSoFilename(key);
+
+ void *handle = dlopen(so_filename.c_str(), RTLD_LAZY);
+ if (handle == 0) {
+ LOG(ERROR) << "GenericRegister::GetEntry : " << dlerror();
+ return EntryType();
+ }
+
+ // We assume that the DSO constructs a static object in its global
+ // scope that does the registration. Thus we need only load it, not
+ // call any methods.
+ const EntryType *entry = this->LookupEntry(key);
+ if (entry == 0) {
+ LOG(ERROR) << "GenericRegister::GetEntry : "
+ << "lookup failed in shared object: " << so_filename;
+ return EntryType();
+ }
+ return *entry;
+ }
+
+ // Override this to define how to turn a key into an SO filename.
+ virtual string ConvertKeyToSoFilename(const KeyType& key) const = 0;
+
+ virtual const EntryType *LookupEntry(
+ const KeyType &key) const {
+ MutexLock l(register_lock_);
+
+ typename RegisterMapType::const_iterator it = register_table_.find(key);
+
+ if (it != register_table_.end()) {
+ return &it->second;
+ } else {
+ return 0;
+ }
+ }
+
+ private:
+ typedef map<KeyType, EntryType> RegisterMapType;
+
+ static void Init() {
+ register_lock_ = new Mutex;
+ register_ = new RegisterType;
+ }
+
+ static FstOnceType register_init_;
+ static Mutex *register_lock_;
+ static RegisterType *register_;
+
+ RegisterMapType register_table_;
+};
+
+template<class KeyType, class EntryType, class RegisterType>
+FstOnceType GenericRegister<KeyType, EntryType,
+ RegisterType>::register_init_ = FST_ONCE_INIT;
+
+template<class KeyType, class EntryType, class RegisterType>
+Mutex *GenericRegister<KeyType, EntryType, RegisterType>::register_lock_ = 0;
+
+template<class KeyType, class EntryType, class RegisterType>
+RegisterType *GenericRegister<KeyType, EntryType, RegisterType>::register_ = 0;
+
+//
+// GENERIC REGISTRATION
+//
+
+// Generic register-er class capable of creating new register entries in the
+// given RegisterType template parameter. This type must define types Key
+// and Entry, and have appropriate static GetRegister() and instance
+// SetEntry() functions. An easy way to accomplish this is to have RegisterType
+// be the type of a subclass of GenericRegister.
+template<class RegisterType>
+class GenericRegisterer {
+ public:
+ typedef typename RegisterType::Key Key;
+ typedef typename RegisterType::Entry Entry;
+
+ GenericRegisterer(Key key, Entry entry) {
+ RegisterType *reg = RegisterType::GetRegister();
+ reg->SetEntry(key, entry);
+ }
+};
+
+} // namespace fst
+
+#endif // FST_LIB_GENERIC_REGISTER_H_
diff --git a/src/include/fst/heap.h b/src/include/fst/heap.h
new file mode 100644
index 0000000..a7affbd
--- /dev/null
+++ b/src/include/fst/heap.h
@@ -0,0 +1,206 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// All Rights Reserved.
+// Author: Johan Schalkwyk (johans@google.com)
+//
+// \file
+// Implementation of a heap as in STL, but allows tracking positions
+// in heap using a key. The key can be used to do an in-place update of
+// values in the heap.
+
+#ifndef FST_LIB_HEAP_H__
+#define FST_LIB_HEAP_H__
+
+#include <vector>
+using std::vector;
+#include <functional>
+
+#include <fst/compat.h>
+namespace fst {
+
+//
+// \class Heap
+// \brief A templated heap implementation that support in-place update
+// of values.
+//
+// The templated heap implementation is a little different from the
+// STL priority_queue and the *_heap operations in STL. This heap
+// supports indexing of values in the heap via an associated key.
+//
+// Each value is internally associated with a key which is returned
+// to the calling functions on heap insert. This key can be used
+// to later update the specific value in the heap.
+//
+// \param T the element type of the hash, can be POD, Data or Ptr to Data
+// \param Compare Comparison class for determiningg min-heapness.
+// \param whether heap top should be max or min element w.r.t. Compare
+//
+
+static const int kNoKey = -1;
+template <class T, class Compare, bool max>
+class Heap {
+ public:
+
+ // Initialize with a specific comparator
+ Heap(Compare comp) : comp_(comp), size_(0) { }
+
+ // Create a heap with initial size of internal arrays of 0
+ Heap() : size_(0) { }
+
+ ~Heap() { }
+
+ // Insert a value into the heap
+ int Insert(const T& val) {
+ if (size_ < A_.size()) {
+ A_[size_] = val;
+ pos_[key_[size_]] = size_;
+ } else {
+ A_.push_back(val);
+ pos_.push_back(size_);
+ key_.push_back(size_);
+ }
+
+ ++size_;
+ return Insert(val, size_ - 1);
+ }
+
+ // Update a value at position given by the key. The pos array is first
+ // indexed by the key. The position gives the position in the heap array.
+ // Once we have the position we can then use the standard heap operations
+ // to calculate the parent and child positions.
+ void Update(int key, const T& val) {
+ int i = pos_[key];
+ if (Better(val, A_[Parent(i)])) {
+ Insert(val, i);
+ } else {
+ A_[i] = val;
+ Heapify(i);
+ }
+ }
+
+ // Return the greatest (max=true) / least (max=false) value w.r.t.
+ // from the heap.
+ T Pop() {
+ T top = A_[0];
+
+ Swap(0, size_-1);
+ size_--;
+ Heapify(0);
+ return top;
+ }
+
+ // Return the greatest (max=true) / least (max=false) value w.r.t.
+ // comp object from the heap.
+ T Top() const {
+ return A_[0];
+ }
+
+ // Check if the heap is empty
+ bool Empty() const {
+ return size_ == 0;
+ }
+
+ void Clear() {
+ size_ = 0;
+ }
+
+
+ //
+ // The following protected routines are used in a supportive role
+ // for managing the heap and keeping the heap properties.
+ //
+ private:
+ // Compute left child of parent
+ int Left(int i) {
+ return 2*(i+1)-1; // 0 -> 1, 1 -> 3
+ }
+
+ // Compute right child of parent
+ int Right(int i) {
+ return 2*(i+1); // 0 -> 2, 1 -> 4
+ }
+
+ // Given a child compute parent
+ int Parent(int i) {
+ return (i-1)/2; // 1 -> 0, 2 -> 0, 3 -> 1, 4-> 1
+ }
+
+ // Swap a child, parent. Use to move element up/down tree.
+ // Note a little tricky here. When we swap we need to swap:
+ // the value
+ // the associated keys
+ // the position of the value in the heap
+ void Swap(int j, int k) {
+ int tkey = key_[j];
+ pos_[key_[j] = key_[k]] = j;
+ pos_[key_[k] = tkey] = k;
+
+ T val = A_[j];
+ A_[j] = A_[k];
+ A_[k] = val;
+ }
+
+ // Returns the greater (max=true) / least (max=false) of two
+ // elements.
+ bool Better(const T& x, const T& y) {
+ return max ? comp_(y, x) : comp_(x, y);
+ }
+
+ // Heapify subtree rooted at index i.
+ void Heapify(int i) {
+ int l = Left(i);
+ int r = Right(i);
+ int largest;
+
+ if (l < size_ && Better(A_[l], A_[i]) )
+ largest = l;
+ else
+ largest = i;
+
+ if (r < size_ && Better(A_[r], A_[largest]) )
+ largest = r;
+
+ if (largest != i) {
+ Swap(i, largest);
+ Heapify(largest);
+ }
+ }
+
+
+ // Insert (update) element at subtree rooted at index i
+ int Insert(const T& val, int i) {
+ int p;
+ while (i > 0 && !Better(A_[p = Parent(i)], val)) {
+ Swap(i, p);
+ i = p;
+ }
+
+ return key_[i];
+ }
+
+ private:
+ Compare comp_;
+
+ vector<int> pos_;
+ vector<int> key_;
+ vector<T> A_;
+ int size_;
+
+ // DISALLOW_COPY_AND_ASSIGN(Heap);
+};
+
+} // namespace fst
+
+#endif // FST_LIB_HEAP_H__
diff --git a/src/include/fst/icu.h b/src/include/fst/icu.h
new file mode 100644
index 0000000..6b74c2e
--- /dev/null
+++ b/src/include/fst/icu.h
@@ -0,0 +1,103 @@
+// icu.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: roubert@google.com (Fredrik Roubert)
+
+// Wrapper class for UErrorCode, with conversion operators for direct use in
+// ICU C and C++ APIs.
+//
+// Features:
+// - The constructor initializes the internal UErrorCode to U_ZERO_ERROR,
+// removing one common source of errors.
+// - Same use in C APIs taking a UErrorCode* (pointer) and C++ taking
+// UErrorCode& (reference), via conversion operators.
+// - Automatic checking for success when it goes out of scope. On failure,
+// the destructor will FSTERROR() an error message.
+//
+// Most of ICU will handle errors gracefully and provide sensible fallbacks.
+// Using IcuErrorCode, it is therefore possible to write very compact code
+// that does sensible things on failure and provides logging for debugging.
+//
+// Example:
+//
+// IcuErrorCode icuerrorcode;
+// return collator.compareUTF8(a, b, icuerrorcode) == UCOL_EQUAL;
+
+#ifndef FST_LIB_ICU_H_
+#define FST_LIB_ICU_H_
+
+#include <unicode/errorcode.h>
+#include <unicode/unistr.h>
+#include <unicode/ustring.h>
+#include <unicode/utf8.h>
+
+class IcuErrorCode : public icu::ErrorCode {
+ public:
+ IcuErrorCode() {}
+ virtual ~IcuErrorCode() { if (isFailure()) handleFailure(); }
+
+ // Redefine 'errorName()' in order to be compatible with ICU version 4.2
+ const char* errorName() const {
+ return u_errorName(errorCode);
+ }
+
+ protected:
+ virtual void handleFailure() const {
+ FSTERROR() << errorName();
+}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(IcuErrorCode);
+};
+
+namespace fst {
+
+template <class Label>
+bool UTF8StringToLabels(const string &str, vector<Label> *labels) {
+ const char *c_str = str.c_str();
+ int32_t length = str.size();
+ UChar32 c;
+ for (int32_t i = 0; i < length; /* no update */) {
+ U8_NEXT(c_str, i, length, c);
+ if (c < 0) {
+ LOG(ERROR) << "UTF8StringToLabels: Invalid character found: " << c;
+ return false;
+ }
+ labels->push_back(c);
+ }
+ return true;
+}
+
+template <class Label>
+bool LabelsToUTF8String(const vector<Label> &labels, string *str) {
+ icu::UnicodeString u_str;
+ char c_str[5];
+ for (size_t i = 0; i < labels.size(); ++i) {
+ u_str.setTo(labels[i]);
+ IcuErrorCode error;
+ u_strToUTF8(c_str, 5, NULL, u_str.getTerminatedBuffer(), -1, error);
+ if (error.isFailure()) {
+ LOG(ERROR) << "LabelsToUTF8String: Bad encoding: "
+ << error.errorName();
+ return false;
+ }
+ *str += c_str;
+ }
+ return true;
+}
+
+} // namespace fst
+
+#endif // FST_LIB_ICU_H_
diff --git a/src/include/fst/intersect.h b/src/include/fst/intersect.h
new file mode 100644
index 0000000..f46116f
--- /dev/null
+++ b/src/include/fst/intersect.h
@@ -0,0 +1,172 @@
+// intersect.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to compute the intersection of two FSAs
+
+#ifndef FST_LIB_INTERSECT_H__
+#define FST_LIB_INTERSECT_H__
+
+#include <algorithm>
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+#include <fst/compose.h>
+
+
+namespace fst {
+
+template <class A,
+ class M = Matcher<Fst<A> >,
+ class F = SequenceComposeFilter<M>,
+ class T = GenericComposeStateTable<A, typename F::FilterState> >
+struct IntersectFstOptions : public ComposeFstOptions<A, M, F, T> {
+ explicit IntersectFstOptions(const CacheOptions &opts,
+ M *mat1 = 0, M *mat2 = 0,
+ F *filt = 0, T *sttable= 0)
+ : ComposeFstOptions<A, M, F, T>(opts, mat1, mat2, filt, sttable) { }
+
+ IntersectFstOptions() {}
+};
+
+// Computes the intersection (Hadamard product) of two FSAs. This
+// version is a delayed Fst. Only strings that are in both automata
+// are retained in the result.
+//
+// The two arguments must be acceptors. One of the arguments must be
+// label-sorted.
+//
+// Complexity: same as ComposeFst.
+//
+// Caveats: same as ComposeFst.
+template <class A>
+class IntersectFst : public ComposeFst<A> {
+ public:
+ using ComposeFst<A>::CreateBase;
+ using ComposeFst<A>::CreateBase1;
+ using ComposeFst<A>::Properties;
+ using ImplToFst< ComposeFstImplBase<A> >::GetImpl;
+ using ImplToFst< ComposeFstImplBase<A> >::SetImpl;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ IntersectFst(const Fst<A> &fst1, const Fst<A> &fst2,
+ const CacheOptions opts = CacheOptions()) {
+ bool acceptors = fst1.Properties(kAcceptor, true) &&
+ fst2.Properties(kAcceptor, true);
+ SetImpl(CreateBase(fst1, fst2, opts));
+ if (!acceptors) {
+ FSTERROR() << "IntersectFst: input FSTs are not acceptors";
+ GetImpl()->SetProperties(kError);
+ }
+ }
+
+ template <class M, class F, class T>
+ IntersectFst(const Fst<A> &fst1, const Fst<A> &fst2,
+ const IntersectFstOptions<A, M, F, T> &opts) {
+ bool acceptors = fst1.Properties(kAcceptor, true) &&
+ fst2.Properties(kAcceptor, true);
+ SetImpl(CreateBase1(fst1, fst2, opts));
+ if (!acceptors) {
+ FSTERROR() << "IntersectFst: input FSTs are not acceptors";
+ GetImpl()->SetProperties(kError);
+ }
+ }
+
+ // See Fst<>::Copy() for doc.
+ IntersectFst(const IntersectFst<A> &fst, bool safe = false) :
+ ComposeFst<A>(fst, safe) {}
+
+ // Get a copy of this IntersectFst. See Fst<>::Copy() for further doc.
+ virtual IntersectFst<A> *Copy(bool safe = false) const {
+ return new IntersectFst<A>(*this, safe);
+ }
+};
+
+
+// Specialization for IntersectFst.
+template <class A>
+class StateIterator< IntersectFst<A> >
+ : public StateIterator< ComposeFst<A> > {
+ public:
+ explicit StateIterator(const IntersectFst<A> &fst)
+ : StateIterator< ComposeFst<A> >(fst) {}
+};
+
+
+// Specialization for IntersectFst.
+template <class A>
+class ArcIterator< IntersectFst<A> >
+ : public ArcIterator< ComposeFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const IntersectFst<A> &fst, StateId s)
+ : ArcIterator< ComposeFst<A> >(fst, s) {}
+};
+
+// Useful alias when using StdArc.
+typedef IntersectFst<StdArc> StdIntersectFst;
+
+
+typedef ComposeOptions IntersectOptions;
+
+
+// Computes the intersection (Hadamard product) of two FSAs. This
+// version writes the intersection to an output MurableFst. Only
+// strings that are in both automata are retained in the result.
+//
+// The two arguments must be acceptors. One of the arguments must be
+// label-sorted.
+//
+// Complexity: same as Compose.
+//
+// Caveats: same as Compose.
+template<class Arc>
+void Intersect(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
+ MutableFst<Arc> *ofst,
+ const IntersectOptions &opts = IntersectOptions()) {
+ typedef Matcher< Fst<Arc> > M;
+
+ if (opts.filter_type == AUTO_FILTER) {
+ CacheOptions nopts;
+ nopts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = IntersectFst<Arc>(ifst1, ifst2, nopts);
+ } else if (opts.filter_type == SEQUENCE_FILTER) {
+ IntersectFstOptions<Arc> iopts;
+ iopts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = IntersectFst<Arc>(ifst1, ifst2, iopts);
+ } else if (opts.filter_type == ALT_SEQUENCE_FILTER) {
+ IntersectFstOptions<Arc, M, AltSequenceComposeFilter<M> > iopts;
+ iopts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = IntersectFst<Arc>(ifst1, ifst2, iopts);
+ } else if (opts.filter_type == MATCH_FILTER) {
+ IntersectFstOptions<Arc, M, MatchComposeFilter<M> > iopts;
+ iopts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = IntersectFst<Arc>(ifst1, ifst2, iopts);
+ }
+
+ if (opts.connect)
+ Connect(ofst);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_INTERSECT_H__
diff --git a/src/include/fst/interval-set.h b/src/include/fst/interval-set.h
new file mode 100644
index 0000000..cf6ac54
--- /dev/null
+++ b/src/include/fst/interval-set.h
@@ -0,0 +1,381 @@
+// interval-set.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to represent and operate on sets of intervals.
+
+#ifndef FST_LIB_INTERVAL_SET_H__
+#define FST_LIB_INTERVAL_SET_H__
+
+#include <iostream>
+#include <vector>
+using std::vector;
+
+
+#include <fst/util.h>
+
+
+namespace fst {
+
+// Stores and operates on a set of half-open integral intervals [a,b)
+// of signed integers of type T.
+template <typename T>
+class IntervalSet {
+ public:
+ struct Interval {
+ T begin;
+ T end;
+
+ Interval() : begin(-1), end(-1) {}
+
+ Interval(T b, T e) : begin(b), end(e) {}
+
+ bool operator<(const Interval &i) const {
+ return begin < i.begin || (begin == i.begin && end > i.end);
+ }
+
+ bool operator==(const Interval &i) const {
+ return begin == i.begin && end == i.end;
+ }
+
+ bool operator!=(const Interval &i) const {
+ return begin != i.begin || end != i.end;
+ }
+
+ istream &Read(istream &strm) {
+ T n;
+ ReadType(strm, &n);
+ begin = n;
+ ReadType(strm, &n);
+ end = n;
+ return strm;
+ }
+
+ ostream &Write(ostream &strm) const {
+ T n = begin;
+ WriteType(strm, n);
+ n = end;
+ WriteType(strm, n);
+ return strm;
+ }
+ };
+
+ IntervalSet() : count_(-1) {}
+
+ // Returns the interval set as a vector.
+ vector<Interval> *Intervals() { return &intervals_; }
+
+ const vector<Interval> *Intervals() const { return &intervals_; }
+
+ const bool Empty() const { return intervals_.empty(); }
+
+ const T Size() const { return intervals_.size(); }
+
+ // Number of points in the intervals (undefined if not normalized).
+ const T Count() const { return count_; }
+
+ void Clear() {
+ intervals_.clear();
+ count_ = 0;
+ }
+
+ // Adds an interval set to the set. The result may not be normalized.
+ void Union(const IntervalSet<T> &iset) {
+ const vector<Interval> *intervals = iset.Intervals();
+ for (typename vector<Interval>::const_iterator it = intervals->begin();
+ it != intervals->end(); ++it)
+ intervals_.push_back(*it);
+ }
+
+ // Requires intervals be normalized.
+ bool Member(T value) const {
+ Interval interval(value, value);
+ typename vector<Interval>::const_iterator lb =
+ lower_bound(intervals_.begin(), intervals_.end(), interval);
+ if (lb == intervals_.begin())
+ return false;
+ return (--lb)->end > value;
+ }
+
+ // Requires intervals be normalized.
+ bool operator==(const IntervalSet<T>& iset) const {
+ return *(iset.Intervals()) == intervals_;
+ }
+
+ // Requires intervals be normalized.
+ bool operator!=(const IntervalSet<T>& iset) const {
+ return *(iset.Intervals()) != intervals_;
+ }
+
+ bool Singleton() const {
+ return intervals_.size() == 1 &&
+ intervals_[0].begin + 1 == intervals_[0].end;
+ }
+
+
+ // Sorts; collapses overlapping and adjacent interals; sets count.
+ void Normalize();
+
+ // Intersects an interval set with the set. Requires intervals be
+ // normalized. The result is normalized.
+ void Intersect(const IntervalSet<T> &iset, IntervalSet<T> *oset) const;
+
+ // Complements the set w.r.t [0, maxval). Requires intervals be
+ // normalized. The result is normalized.
+ void Complement(T maxval, IntervalSet<T> *oset) const;
+
+ // Subtract an interval set from the set. Requires intervals be
+ // normalized. The result is normalized.
+ void Difference(const IntervalSet<T> &iset, IntervalSet<T> *oset) const;
+
+ // Determines if an interval set overlaps with the set. Requires
+ // intervals be normalized.
+ bool Overlaps(const IntervalSet<T> &iset) const;
+
+ // Determines if an interval set overlaps with the set but neither
+ // is contained in the other. Requires intervals be normalized.
+ bool StrictlyOverlaps(const IntervalSet<T> &iset) const;
+
+ // Determines if an interval set is contained within the set. Requires
+ // intervals be normalized.
+ bool Contains(const IntervalSet<T> &iset) const;
+
+ istream &Read(istream &strm) {
+ ReadType(strm, &intervals_);
+ return ReadType(strm, &count_);
+ }
+
+ ostream &Write(ostream &strm) const {
+ WriteType(strm, intervals_);
+ return WriteType(strm, count_);
+ }
+
+ private:
+ vector<Interval> intervals_;
+ T count_;
+};
+
+// Sorts; collapses overlapping and adjacent interavls; sets count.
+template <typename T>
+void IntervalSet<T>::Normalize() {
+ sort(intervals_.begin(), intervals_.end());
+
+ count_ = 0;
+ T size = 0;
+ for (T i = 0; i < intervals_.size(); ++i) {
+ Interval &inti = intervals_[i];
+ if (inti.begin == inti.end)
+ continue;
+ for (T j = i + 1; j < intervals_.size(); ++j) {
+ Interval &intj = intervals_[j];
+ if (intj.begin > inti.end)
+ break;
+ if (intj.end > inti.end)
+ inti.end = intj.end;
+ ++i;
+ }
+ count_ += inti.end - inti.begin;
+ intervals_[size++] = inti;
+ }
+ intervals_.resize(size);
+}
+
+// Intersects an interval set with the set. Requires intervals be normalized.
+// The result is normalized.
+template <typename T>
+void IntervalSet<T>::Intersect(const IntervalSet<T> &iset,
+ IntervalSet<T> *oset) const {
+ const vector<Interval> *iintervals = iset.Intervals();
+ vector<Interval> *ointervals = oset->Intervals();
+ typename vector<Interval>::const_iterator it1 = intervals_.begin();
+ typename vector<Interval>::const_iterator it2 = iintervals->begin();
+
+ ointervals->clear();
+ oset->count_ = 0;
+
+ while (it1 != intervals_.end() && it2 != iintervals->end()) {
+ if (it1->end <= it2->begin) {
+ ++it1;
+ } else if (it2->end <= it1->begin) {
+ ++it2;
+ } else {
+ Interval interval;
+ interval.begin = max(it1->begin, it2->begin);
+ interval.end = min(it1->end, it2->end);
+ ointervals->push_back(interval);
+ oset->count_ += interval.end - interval.begin;
+ if (it1->end < it2->end)
+ ++it1;
+ else
+ ++it2;
+ }
+ }
+}
+
+// Complements the set w.r.t [0, maxval). Requires intervals be normalized.
+// The result is normalized.
+template <typename T>
+void IntervalSet<T>::Complement(T maxval, IntervalSet<T> *oset) const {
+ vector<Interval> *ointervals = oset->Intervals();
+ ointervals->clear();
+ oset->count_ = 0;
+
+ Interval interval;
+ interval.begin = 0;
+ for (typename vector<Interval>::const_iterator it = intervals_.begin();
+ it != intervals_.end();
+ ++it) {
+ interval.end = min(it->begin, maxval);
+ if (interval.begin < interval.end) {
+ ointervals->push_back(interval);
+ oset->count_ += interval.end - interval.begin;
+ }
+ interval.begin = it->end;
+ }
+ interval.end = maxval;
+ if (interval.begin < interval.end) {
+ ointervals->push_back(interval);
+ oset->count_ += interval.end - interval.begin;
+ }
+}
+
+// Subtract an interval set from the set. Requires intervals be normalized.
+// The result is normalized.
+template <typename T>
+void IntervalSet<T>::Difference(const IntervalSet<T> &iset,
+ IntervalSet<T> *oset) const {
+ if (intervals_.empty()) {
+ oset->Intervals()->clear();
+ oset->count_ = 0;
+ } else {
+ IntervalSet<T> cset;
+ iset.Complement(intervals_.back().end, &cset);
+ Intersect(cset, oset);
+ }
+}
+
+// Determines if an interval set overlaps with the set. Requires
+// intervals be normalized.
+template <typename T>
+bool IntervalSet<T>::Overlaps(const IntervalSet<T> &iset) const {
+ const vector<Interval> *intervals = iset.Intervals();
+ typename vector<Interval>::const_iterator it1 = intervals_.begin();
+ typename vector<Interval>::const_iterator it2 = intervals->begin();
+
+ while (it1 != intervals_.end() && it2 != intervals->end()) {
+ if (it1->end <= it2->begin) {
+ ++it1;
+ } else if (it2->end <= it1->begin) {
+ ++it2;
+ } else {
+ return true;
+ }
+ }
+ return false;
+}
+
+// Determines if an interval set overlaps with the set but neither
+// is contained in the other. Requires intervals be normalized.
+template <typename T>
+bool IntervalSet<T>::StrictlyOverlaps(const IntervalSet<T> &iset) const {
+ const vector<Interval> *intervals = iset.Intervals();
+ typename vector<Interval>::const_iterator it1 = intervals_.begin();
+ typename vector<Interval>::const_iterator it2 = intervals->begin();
+ bool only1 = false; // point in intervals_ but not intervals
+ bool only2 = false; // point in intervals but not intervals_
+ bool overlap = false; // point in both intervals_ and intervals
+
+ while (it1 != intervals_.end() && it2 != intervals->end()) {
+ if (it1->end <= it2->begin) { // no overlap - it1 first
+ only1 = true;
+ ++it1;
+ } else if (it2->end <= it1->begin) { // no overlap - it2 first
+ only2 = true;
+ ++it2;
+ } else if (it2->begin == it1->begin && it2->end == it1->end) { // equals
+ overlap = true;
+ ++it1;
+ ++it2;
+ } else if (it2->begin <= it1->begin && it2->end >= it1->end) { // 1 c 2
+ only2 = true;
+ overlap = true;
+ ++it1;
+ } else if (it1->begin <= it2->begin && it1->end >= it2->end) { // 2 c 1
+ only1 = true;
+ overlap = true;
+ ++it2;
+ } else { // strict overlap
+ only1 = true;
+ only2 = true;
+ overlap = true;
+ }
+ if (only1 == true && only2 == true && overlap == true)
+ return true;
+ }
+ if (it1 != intervals_.end())
+ only1 = true;
+ if (it2 != intervals->end())
+ only2 = true;
+
+ return only1 == true && only2 == true && overlap == true;
+}
+
+// Determines if an interval set is contained within the set. Requires
+// intervals be normalized.
+template <typename T>
+bool IntervalSet<T>::Contains(const IntervalSet<T> &iset) const {
+ if (iset.Count() > Count())
+ return false;
+
+ const vector<Interval> *intervals = iset.Intervals();
+ typename vector<Interval>::const_iterator it1 = intervals_.begin();
+ typename vector<Interval>::const_iterator it2 = intervals->begin();
+
+ while (it1 != intervals_.end() && it2 != intervals->end()) {
+ if (it1->end <= it2->begin) { // no overlap - it1 first
+ ++it1;
+ } else if (it2->begin < it1->begin || it2->end > it1->end) { // no C
+ return false;
+ } else if (it2->end == it1->end) {
+ ++it1;
+ ++it2;
+ } else {
+ ++it2;
+ }
+ }
+ return it2 == intervals->end();
+}
+
+template <typename T>
+ostream &operator<<(ostream &strm, const IntervalSet<T> &s) {
+ typedef typename IntervalSet<T>::Interval Interval;
+ const vector<Interval> *intervals = s.Intervals();
+ strm << "{";
+ for (typename vector<Interval>::const_iterator it = intervals->begin();
+ it != intervals->end();
+ ++it) {
+ if (it != intervals->begin())
+ strm << ",";
+ strm << "[" << it->begin << "," << it->end << ")";
+ }
+ strm << "}";
+ return strm;
+}
+
+} // namespace fst
+
+#endif // FST_LIB_INTERVAL_SET_H__
diff --git a/src/include/fst/invert.h b/src/include/fst/invert.h
new file mode 100644
index 0000000..bc83a5d
--- /dev/null
+++ b/src/include/fst/invert.h
@@ -0,0 +1,125 @@
+// invert.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Functions and classes to invert an Fst.
+
+#ifndef FST_LIB_INVERT_H__
+#define FST_LIB_INVERT_H__
+
+#include <fst/arc-map.h>
+#include <fst/mutable-fst.h>
+
+
+namespace fst {
+
+// Mapper to implement inversion of an arc.
+template <class A> struct InvertMapper {
+ InvertMapper() {}
+
+ A operator()(const A &arc) {
+ return A(arc.olabel, arc.ilabel, arc.weight, arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;}
+
+ uint64 Properties(uint64 props) { return InvertProperties(props); }
+};
+
+
+// Inverts the transduction corresponding to an FST by exchanging the
+// FST's input and output labels. This version modifies its input.
+//
+// Complexity:
+// - Time: O(V + E)
+// - Space: O(1)
+// where V = # of states and E = # of arcs.
+template<class Arc> inline
+void Invert(MutableFst<Arc> *fst) {
+ SymbolTable *input = fst->InputSymbols() ? fst->InputSymbols()->Copy() : 0;
+ SymbolTable *output = fst->OutputSymbols() ? fst->OutputSymbols()->Copy() : 0;
+ ArcMap(fst, InvertMapper<Arc>());
+ fst->SetInputSymbols(output);
+ fst->SetOutputSymbols(input);
+ delete input;
+ delete output;
+}
+
+
+// Inverts the transduction corresponding to an FST by exchanging the
+// FST's input and output labels. This version is a delayed Fst.
+//
+// Complexity:
+// - Time: O(v + e)
+// - Space: O(1)
+// where v = # of states visited, e = # of arcs visited. Constant
+// time and to visit an input state or arc is assumed and exclusive
+// of caching.
+template <class A>
+class InvertFst : public ArcMapFst<A, A, InvertMapper<A> > {
+ public:
+ typedef A Arc;
+ typedef InvertMapper<A> C;
+ typedef ArcMapFstImpl< A, A, InvertMapper<A> > Impl;
+ using ImplToFst<Impl>::GetImpl;
+
+ explicit InvertFst(const Fst<A> &fst) : ArcMapFst<A, A, C>(fst, C()) {
+ GetImpl()->SetOutputSymbols(fst.InputSymbols());
+ GetImpl()->SetInputSymbols(fst.OutputSymbols());
+ }
+
+ // See Fst<>::Copy() for doc.
+ InvertFst(const InvertFst<A> &fst, bool safe = false)
+ : ArcMapFst<A, A, C>(fst, safe) {}
+
+ // Get a copy of this InvertFst. See Fst<>::Copy() for further doc.
+ virtual InvertFst<A> *Copy(bool safe = false) const {
+ return new InvertFst(*this, safe);
+ }
+};
+
+
+// Specialization for InvertFst.
+template <class A>
+class StateIterator< InvertFst<A> >
+ : public StateIterator< ArcMapFst<A, A, InvertMapper<A> > > {
+ public:
+ explicit StateIterator(const InvertFst<A> &fst)
+ : StateIterator< ArcMapFst<A, A, InvertMapper<A> > >(fst) {}
+};
+
+
+// Specialization for InvertFst.
+template <class A>
+class ArcIterator< InvertFst<A> >
+ : public ArcIterator< ArcMapFst<A, A, InvertMapper<A> > > {
+ public:
+ ArcIterator(const InvertFst<A> &fst, typename A::StateId s)
+ : ArcIterator< ArcMapFst<A, A, InvertMapper<A> > >(fst, s) {}
+};
+
+
+// Useful alias when using StdArc.
+typedef InvertFst<StdArc> StdInvertFst;
+
+} // namespace fst
+
+#endif // FST_LIB_INVERT_H__
diff --git a/src/include/fst/label-reachable.h b/src/include/fst/label-reachable.h
new file mode 100644
index 0000000..8f9aca8
--- /dev/null
+++ b/src/include/fst/label-reachable.h
@@ -0,0 +1,565 @@
+// label_reachable.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to determine if a non-epsilon label can be read as the
+// first non-epsilon symbol along some path from a given state.
+
+
+#ifndef FST_LIB_LABEL_REACHABLE_H__
+#define FST_LIB_LABEL_REACHABLE_H__
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <vector>
+using std::vector;
+
+#include <fst/accumulator.h>
+#include <fst/arcsort.h>
+#include <fst/interval-set.h>
+#include <fst/state-reachable.h>
+#include <fst/vector-fst.h>
+
+
+namespace fst {
+
+// Stores shareable data for label reachable class copies.
+template <typename L>
+class LabelReachableData {
+ public:
+ typedef L Label;
+ typedef typename IntervalSet<L>::Interval Interval;
+
+ explicit LabelReachableData(bool reach_input, bool keep_relabel_data = true)
+ : reach_input_(reach_input),
+ keep_relabel_data_(keep_relabel_data),
+ have_relabel_data_(true),
+ final_label_(kNoLabel) {}
+
+ ~LabelReachableData() {}
+
+ bool ReachInput() const { return reach_input_; }
+
+ vector< IntervalSet<L> > *IntervalSets() { return &isets_; }
+
+ unordered_map<L, L> *Label2Index() {
+ if (!have_relabel_data_)
+ FSTERROR() << "LabelReachableData: no relabeling data";
+ return &label2index_;
+ }
+
+ Label FinalLabel() {
+ if (final_label_ == kNoLabel)
+ final_label_ = label2index_[kNoLabel];
+ return final_label_;
+ }
+
+ static LabelReachableData<L> *Read(istream &istrm) {
+ LabelReachableData<L> *data = new LabelReachableData<L>();
+
+ ReadType(istrm, &data->reach_input_);
+ ReadType(istrm, &data->keep_relabel_data_);
+ data->have_relabel_data_ = data->keep_relabel_data_;
+ if (data->keep_relabel_data_)
+ ReadType(istrm, &data->label2index_);
+ ReadType(istrm, &data->final_label_);
+ ReadType(istrm, &data->isets_);
+ return data;
+ }
+
+ bool Write(ostream &ostrm) {
+ WriteType(ostrm, reach_input_);
+ WriteType(ostrm, keep_relabel_data_);
+ if (keep_relabel_data_)
+ WriteType(ostrm, label2index_);
+ WriteType(ostrm, FinalLabel());
+ WriteType(ostrm, isets_);
+ return true;
+ }
+
+ int RefCount() const { return ref_count_.count(); }
+ int IncrRefCount() { return ref_count_.Incr(); }
+ int DecrRefCount() { return ref_count_.Decr(); }
+
+ private:
+ LabelReachableData() {}
+
+ bool reach_input_; // Input or output labels considered?
+ bool keep_relabel_data_; // Save label2index_ to file?
+ bool have_relabel_data_; // Using label2index_?
+ Label final_label_; // Final label
+ RefCounter ref_count_; // Reference count.
+ unordered_map<L, L> label2index_; // Finds index for a label.
+ vector<IntervalSet <L> > isets_; // Interval sets per state.
+
+ DISALLOW_COPY_AND_ASSIGN(LabelReachableData);
+};
+
+
+// Tests reachability of labels from a given state. If reach_input =
+// true, then input labels are considered, o.w. output labels are
+// considered. To test for reachability from a state s, first do
+// SetState(s). Then a label l can be reached from state s of FST f
+// iff Reach(r) is true where r = Relabel(l). The relabeling is
+// required to ensure a compact representation of the reachable
+// labels.
+
+// The whole FST can be relabeled instead with Relabel(&f,
+// reach_input) so that the test Reach(r) applies directly to the
+// labels of the transformed FST f. The relabeled FST will also be
+// sorted appropriately for composition.
+//
+// Reachablity of a final state from state s (via an epsilon path)
+// can be tested with ReachFinal();
+//
+// Reachability can also be tested on the set of labels specified by
+// an arc iterator, useful for FST composition. In particular,
+// Reach(aiter, ...) is true if labels on the input (output) side of
+// the transitions of the arc iterator, when iter_input is true
+// (false), can be reached from the state s. The iterator labels must
+// have already been relabeled.
+//
+// With the arc iterator test of reachability, the begin position, end
+// position and accumulated arc weight of the matches can be
+// returned. The optional template argument controls how reachable arc
+// weights are accumulated. The default uses the semiring
+// Plus(). Alternative ones can be used to distribute the weights in
+// composition in various ways.
+template <class A, class S = DefaultAccumulator<A> >
+class LabelReachable {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename IntervalSet<Label>::Interval Interval;
+
+ LabelReachable(const Fst<A> &fst, bool reach_input, S *s = 0,
+ bool keep_relabel_data = true)
+ : fst_(new VectorFst<Arc>(fst)),
+ s_(kNoStateId),
+ data_(new LabelReachableData<Label>(reach_input, keep_relabel_data)),
+ accumulator_(s ? s : new S()),
+ ncalls_(0),
+ nintervals_(0),
+ error_(false) {
+ StateId ins = fst_->NumStates();
+ TransformFst();
+ FindIntervals(ins);
+ delete fst_;
+ }
+
+ explicit LabelReachable(LabelReachableData<Label> *data, S *s = 0)
+ : fst_(0),
+ s_(kNoStateId),
+ data_(data),
+ accumulator_(s ? s : new S()),
+ ncalls_(0),
+ nintervals_(0),
+ error_(false) {
+ data_->IncrRefCount();
+ }
+
+ LabelReachable(const LabelReachable<A, S> &reachable) :
+ fst_(0),
+ s_(kNoStateId),
+ data_(reachable.data_),
+ accumulator_(new S(*reachable.accumulator_)),
+ ncalls_(0),
+ nintervals_(0),
+ error_(reachable.error_) {
+ data_->IncrRefCount();
+ }
+
+ ~LabelReachable() {
+ if (!data_->DecrRefCount())
+ delete data_;
+ delete accumulator_;
+ if (ncalls_ > 0) {
+ VLOG(2) << "# of calls: " << ncalls_;
+ VLOG(2) << "# of intervals/call: " << (nintervals_ / ncalls_);
+ }
+ }
+
+ // Relabels w.r.t labels that give compact label sets.
+ Label Relabel(Label label) {
+ if (label == 0 || error_)
+ return label;
+ unordered_map<Label, Label> &label2index = *data_->Label2Index();
+ Label &relabel = label2index[label];
+ if (!relabel) // Add new label
+ relabel = label2index.size() + 1;
+ return relabel;
+ }
+
+ // Relabels Fst w.r.t to labels that give compact label sets.
+ void Relabel(MutableFst<Arc> *fst, bool relabel_input) {
+ for (StateIterator< MutableFst<Arc> > siter(*fst);
+ !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ for (MutableArcIterator< MutableFst<Arc> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next()) {
+ Arc arc = aiter.Value();
+ if (relabel_input)
+ arc.ilabel = Relabel(arc.ilabel);
+ else
+ arc.olabel = Relabel(arc.olabel);
+ aiter.SetValue(arc);
+ }
+ }
+ if (relabel_input) {
+ ArcSort(fst, ILabelCompare<Arc>());
+ fst->SetInputSymbols(0);
+ } else {
+ ArcSort(fst, OLabelCompare<Arc>());
+ fst->SetOutputSymbols(0);
+ }
+ }
+
+ // Returns relabeling pairs (cf. relabel.h::Relabel()).
+ // If 'avoid_collisions' is true, extra pairs are added to
+ // ensure no collisions when relabeling automata that have
+ // labels unseen here.
+ void RelabelPairs(vector<pair<Label, Label> > *pairs,
+ bool avoid_collisions = false) {
+ pairs->clear();
+ unordered_map<Label, Label> &label2index = *data_->Label2Index();
+ // Maps labels to their new values in [1, label2index().size()]
+ for (typename unordered_map<Label, Label>::const_iterator
+ it = label2index.begin(); it != label2index.end(); ++it)
+ if (it->second != data_->FinalLabel())
+ pairs->push_back(pair<Label, Label>(it->first, it->second));
+ if (avoid_collisions) {
+ // Ensures any label in [1, label2index().size()] is mapped either
+ // by the above step or to label2index() + 1 (to avoid collisions).
+ for (int i = 1; i <= label2index.size(); ++i) {
+ typename unordered_map<Label, Label>::const_iterator
+ it = label2index.find(i);
+ if (it == label2index.end() || it->second == data_->FinalLabel())
+ pairs->push_back(pair<Label, Label>(i, label2index.size() + 1));
+ }
+ }
+ }
+
+ // Set current state. Optionally set state associated
+ // with arc iterator to be passed to Reach.
+ void SetState(StateId s, StateId aiter_s = kNoStateId) {
+ s_ = s;
+ if (aiter_s != kNoStateId) {
+ accumulator_->SetState(aiter_s);
+ if (accumulator_->Error()) error_ = true;
+ }
+ }
+
+ // Can reach this label from current state?
+ // Original labels must be transformed by the Relabel methods above.
+ bool Reach(Label label) {
+ if (label == 0 || error_)
+ return false;
+ vector< IntervalSet<Label> > &isets = *data_->IntervalSets();
+ return isets[s_].Member(label);
+
+ }
+
+ // Can reach final state (via epsilon transitions) from this state?
+ bool ReachFinal() {
+ if (error_) return false;
+ vector< IntervalSet<Label> > &isets = *data_->IntervalSets();
+ return isets[s_].Member(data_->FinalLabel());
+ }
+
+ // Initialize with secondary FST to be used with Reach(Iterator,...).
+ // If copy is true, then 'fst' is a copy of the FST used in the
+ // previous call to this method (useful to avoid unnecessary updates).
+ template <class F>
+ void ReachInit(const F &fst, bool copy = false) {
+ accumulator_->Init(fst, copy);
+ if (accumulator_->Error()) error_ = true;
+ }
+
+ // Can reach any arc iterator label between iterator positions
+ // aiter_begin and aiter_end? If aiter_input = true, then iterator
+ // input labels are considered, o.w. output labels are considered.
+ // Arc iterator labels must be transformed by the Relabel methods
+ // above. If compute_weight is true, user may call ReachWeight().
+ template <class Iterator>
+ bool Reach(Iterator *aiter, ssize_t aiter_begin,
+ ssize_t aiter_end, bool aiter_input, bool compute_weight) {
+ if (error_) return false;
+ vector< IntervalSet<Label> > &isets = *data_->IntervalSets();
+ const vector<Interval> *intervals = isets[s_].Intervals();
+ ++ncalls_;
+ nintervals_ += intervals->size();
+
+ reach_begin_ = -1;
+ reach_end_ = -1;
+ reach_weight_ = Weight::Zero();
+
+ uint32 flags = aiter->Flags(); // save flags to restore them on exit
+ aiter->SetFlags(kArcNoCache, kArcNoCache); // make caching optional
+ aiter->Seek(aiter_begin);
+
+ if (2 * (aiter_end - aiter_begin) < intervals->size()) {
+ // Check each arc against intervals.
+ // Set arc iterator flags to only compute the ilabel or olabel values,
+ // since they are the only values required for most of the arcs processed.
+ aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue,
+ kArcValueFlags);
+ Label reach_label = kNoLabel;
+ for (ssize_t aiter_pos = aiter_begin;
+ aiter_pos < aiter_end; aiter->Next(), ++aiter_pos) {
+ const A &arc = aiter->Value();
+ Label label = aiter_input ? arc.ilabel : arc.olabel;
+ if (label == reach_label || Reach(label)) {
+ reach_label = label;
+ if (reach_begin_ < 0)
+ reach_begin_ = aiter_pos;
+ reach_end_ = aiter_pos + 1;
+ if (compute_weight) {
+ if (!(aiter->Flags() & kArcWeightValue)) {
+ // If the 'arc.weight' wasn't computed by the call
+ // to 'aiter->Value()' above, we need to call
+ // 'aiter->Value()' again after having set the arc iterator
+ // flags to compute the arc weight value.
+ aiter->SetFlags(kArcWeightValue, kArcValueFlags);
+ const A &arcb = aiter->Value();
+ // Call the accumulator.
+ reach_weight_ = accumulator_->Sum(reach_weight_, arcb.weight);
+ // Only ilabel or olabel required to process the following
+ // arcs.
+ aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue,
+ kArcValueFlags);
+ } else {
+ // Call the accumulator.
+ reach_weight_ = accumulator_->Sum(reach_weight_, arc.weight);
+ }
+ }
+ }
+ }
+ } else {
+ // Check each interval against arcs
+ ssize_t begin_low, end_low = aiter_begin;
+ for (typename vector<Interval>::const_iterator
+ iiter = intervals->begin();
+ iiter != intervals->end(); ++iiter) {
+ begin_low = LowerBound(aiter, end_low, aiter_end,
+ aiter_input, iiter->begin);
+ end_low = LowerBound(aiter, begin_low, aiter_end,
+ aiter_input, iiter->end);
+ if (end_low - begin_low > 0) {
+ if (reach_begin_ < 0)
+ reach_begin_ = begin_low;
+ reach_end_ = end_low;
+ if (compute_weight) {
+ aiter->SetFlags(kArcWeightValue, kArcValueFlags);
+ reach_weight_ = accumulator_->Sum(reach_weight_, aiter,
+ begin_low, end_low);
+ }
+ }
+ }
+ }
+
+ aiter->SetFlags(flags, kArcFlags); // restore original flag values
+ return reach_begin_ >= 0;
+ }
+
+ // Returns iterator position of first matching arc.
+ ssize_t ReachBegin() const { return reach_begin_; }
+
+ // Returns iterator position one past last matching arc.
+ ssize_t ReachEnd() const { return reach_end_; }
+
+ // Return the sum of the weights for matching arcs.
+ // Valid only if compute_weight was true in Reach() call.
+ Weight ReachWeight() const { return reach_weight_; }
+
+ // Access to the relabeling map. Excludes epsilon (0) label but
+ // includes kNoLabel that is used internally for super-final
+ // transitons.
+ const unordered_map<Label, Label>& Label2Index() const {
+ return *data_->Label2Index();
+ }
+
+ LabelReachableData<Label> *GetData() const { return data_; }
+
+ bool Error() const { return error_ || accumulator_->Error(); }
+
+ private:
+ // Redirects labeled arcs (input or output labels determined by
+ // ReachInput()) to new label-specific final states. Each original
+ // final state is redirected via a transition labeled with kNoLabel
+ // to a new kNoLabel-specific final state. Creates super-initial
+ // state for all states with zero in-degree.
+ void TransformFst() {
+ StateId ins = fst_->NumStates();
+ StateId ons = ins;
+
+ vector<ssize_t> indeg(ins, 0);
+
+ // Redirects labeled arcs to new final states.
+ for (StateId s = 0; s < ins; ++s) {
+ for (MutableArcIterator< VectorFst<Arc> > aiter(fst_, s);
+ !aiter.Done();
+ aiter.Next()) {
+ Arc arc = aiter.Value();
+ Label label = data_->ReachInput() ? arc.ilabel : arc.olabel;
+ if (label) {
+ if (label2state_.find(label) == label2state_.end()) {
+ label2state_[label] = ons;
+ indeg.push_back(0);
+ ++ons;
+ }
+ arc.nextstate = label2state_[label];
+ aiter.SetValue(arc);
+ }
+ ++indeg[arc.nextstate]; // Finds in-degrees for next step.
+ }
+
+ // Redirects final weights to new final state.
+ Weight final = fst_->Final(s);
+ if (final != Weight::Zero()) {
+ if (label2state_.find(kNoLabel) == label2state_.end()) {
+ label2state_[kNoLabel] = ons;
+ indeg.push_back(0);
+ ++ons;
+ }
+ Arc arc(kNoLabel, kNoLabel, final, label2state_[kNoLabel]);
+ fst_->AddArc(s, arc);
+ ++indeg[arc.nextstate]; // Finds in-degrees for next step.
+
+ fst_->SetFinal(s, Weight::Zero());
+ }
+ }
+
+ // Add new final states to Fst.
+ while (fst_->NumStates() < ons) {
+ StateId s = fst_->AddState();
+ fst_->SetFinal(s, Weight::One());
+ }
+
+ // Creates a super-initial state for all states with zero in-degree.
+ StateId start = fst_->AddState();
+ fst_->SetStart(start);
+ for (StateId s = 0; s < start; ++s) {
+ if (indeg[s] == 0) {
+ Arc arc(0, 0, Weight::One(), s);
+ fst_->AddArc(start, arc);
+ }
+ }
+ }
+
+ void FindIntervals(StateId ins) {
+ StateReachable<A, Label> state_reachable(*fst_);
+ if (state_reachable.Error()) {
+ error_ = true;
+ return;
+ }
+
+ vector<Label> &state2index = state_reachable.State2Index();
+ vector< IntervalSet<Label> > &isets = *data_->IntervalSets();
+ isets = state_reachable.IntervalSets();
+ isets.resize(ins);
+
+ unordered_map<Label, Label> &label2index = *data_->Label2Index();
+ for (typename unordered_map<Label, StateId>::const_iterator
+ it = label2state_.begin();
+ it != label2state_.end();
+ ++it) {
+ Label l = it->first;
+ StateId s = it->second;
+ Label i = state2index[s];
+ label2index[l] = i;
+ }
+ label2state_.clear();
+
+ double nintervals = 0;
+ ssize_t non_intervals = 0;
+ for (ssize_t s = 0; s < ins; ++s) {
+ nintervals += isets[s].Size();
+ if (isets[s].Size() > 1) {
+ ++non_intervals;
+ VLOG(3) << "state: " << s << " # of intervals: " << isets[s].Size();
+ }
+ }
+ VLOG(2) << "# of states: " << ins;
+ VLOG(2) << "# of intervals: " << nintervals;
+ VLOG(2) << "# of intervals/state: " << nintervals/ins;
+ VLOG(2) << "# of non-interval states: " << non_intervals;
+ }
+
+ template <class Iterator>
+ ssize_t LowerBound(Iterator *aiter, ssize_t aiter_begin,
+ ssize_t aiter_end, bool aiter_input,
+ Label match_label) const {
+ // Only need to compute the ilabel or olabel of arcs when
+ // performing the binary search.
+ aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue,
+ kArcValueFlags);
+ ssize_t low = aiter_begin;
+ ssize_t high = aiter_end;
+ while (low < high) {
+ ssize_t mid = (low + high) / 2;
+ aiter->Seek(mid);
+ Label label = aiter_input ?
+ aiter->Value().ilabel : aiter->Value().olabel;
+ if (label > match_label) {
+ high = mid;
+ } else if (label < match_label) {
+ low = mid + 1;
+ } else {
+ // Find first matching label (when non-deterministic)
+ for (ssize_t i = mid; i > low; --i) {
+ aiter->Seek(i - 1);
+ label = aiter_input ? aiter->Value().ilabel : aiter->Value().olabel;
+ if (label != match_label) {
+ aiter->Seek(i);
+ aiter->SetFlags(kArcValueFlags, kArcValueFlags);
+ return i;
+ }
+ }
+ aiter->SetFlags(kArcValueFlags, kArcValueFlags);
+ return low;
+ }
+ }
+ aiter->Seek(low);
+ aiter->SetFlags(kArcValueFlags, kArcValueFlags);
+ return low;
+ }
+
+ VectorFst<Arc> *fst_;
+ StateId s_; // Current state
+ unordered_map<Label, StateId> label2state_; // Finds final state for a label
+
+ ssize_t reach_begin_; // Iterator pos of first match
+ ssize_t reach_end_; // Iterator pos after last match
+ Weight reach_weight_; // Gives weight sum of arc iterator
+ // arcs with reachable labels.
+ LabelReachableData<Label> *data_; // Shareable data between copies
+ S *accumulator_; // Sums arc weights
+
+ double ncalls_;
+ double nintervals_;
+ bool error_;
+
+ void operator=(const LabelReachable<A, S> &); // Disallow
+};
+
+} // namespace fst
+
+#endif // FST_LIB_LABEL_REACHABLE_H__
diff --git a/src/include/fst/lexicographic-weight.h b/src/include/fst/lexicographic-weight.h
new file mode 100644
index 0000000..4b55c50
--- /dev/null
+++ b/src/include/fst/lexicographic-weight.h
@@ -0,0 +1,151 @@
+// lexicographic-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: rws@google.com (Richard Sproat)
+//
+// \file
+// Lexicographic weight set and associated semiring operation definitions.
+//
+// A lexicographic weight is a sequence of weights, each of which must have the
+// path property and Times() must be (strongly) cancellative
+// (for all a,b,c != Zero(): Times(c, a) = Times(c, b) => a = b,
+// Times(a, c) = Times(b, c) => a = b).
+// The + operation on two weights a and b is the lexicographically
+// prior of a and b.
+
+#ifndef FST_LIB_LEXICOGRAPHIC_WEIGHT_H__
+#define FST_LIB_LEXICOGRAPHIC_WEIGHT_H__
+
+#include <string>
+
+#include <fst/pair-weight.h>
+#include <fst/weight.h>
+
+
+namespace fst {
+
+template<class W1, class W2>
+class LexicographicWeight : public PairWeight<W1, W2> {
+ public:
+ using PairWeight<W1, W2>::Value1;
+ using PairWeight<W1, W2>::Value2;
+ using PairWeight<W1, W2>::SetValue1;
+ using PairWeight<W1, W2>::SetValue2;
+ using PairWeight<W1, W2>::Zero;
+ using PairWeight<W1, W2>::One;
+ using PairWeight<W1, W2>::NoWeight;
+ using PairWeight<W1, W2>::Quantize;
+ using PairWeight<W1, W2>::Reverse;
+
+ typedef LexicographicWeight<typename W1::ReverseWeight,
+ typename W2::ReverseWeight>
+ ReverseWeight;
+
+ LexicographicWeight() {}
+
+ LexicographicWeight(const PairWeight<W1, W2>& w)
+ : PairWeight<W1, W2>(w) {}
+
+ LexicographicWeight(W1 w1, W2 w2) : PairWeight<W1, W2>(w1, w2) {
+ uint64 props = kPath;
+ if ((W1::Properties() & props) != props) {
+ FSTERROR() << "LexicographicWeight must "
+ << "have the path property: " << W1::Type();
+ SetValue1(W1::NoWeight());
+ }
+ if ((W2::Properties() & props) != props) {
+ FSTERROR() << "LexicographicWeight must "
+ << "have the path property: " << W2::Type();
+ SetValue2(W2::NoWeight());
+ }
+ }
+
+ static const LexicographicWeight<W1, W2> &Zero() {
+ static const LexicographicWeight<W1, W2> zero(PairWeight<W1, W2>::Zero());
+ return zero;
+ }
+
+ static const LexicographicWeight<W1, W2> &One() {
+ static const LexicographicWeight<W1, W2> one(PairWeight<W1, W2>::One());
+ return one;
+ }
+
+ static const LexicographicWeight<W1, W2> &NoWeight() {
+ static const LexicographicWeight<W1, W2> no_weight(
+ PairWeight<W1, W2>::NoWeight());
+ return no_weight;
+ }
+
+ static const string &Type() {
+ static const string type = W1::Type() + "_LT_" + W2::Type();
+ return type;
+ }
+
+ bool Member() const {
+ if (!Value1().Member() || !Value2().Member()) return false;
+ // Lexicographic weights cannot mix zeroes and non-zeroes.
+ if (Value1() == W1::Zero() && Value2() == W2::Zero()) return true;
+ if (Value1() != W1::Zero() && Value2() != W2::Zero()) return true;
+ return false;
+ }
+
+ LexicographicWeight<W1, W2> Quantize(float delta = kDelta) const {
+ return PairWeight<W1, W2>::Quantize();
+ }
+
+ ReverseWeight Reverse() const {
+ return PairWeight<W1, W2>::Reverse();
+ }
+
+ static uint64 Properties() {
+ uint64 props1 = W1::Properties();
+ uint64 props2 = W2::Properties();
+ return props1 & props2 & (kLeftSemiring | kRightSemiring | kPath |
+ kIdempotent | kCommutative);
+ }
+};
+
+template <class W1, class W2>
+inline LexicographicWeight<W1, W2> Plus(const LexicographicWeight<W1, W2> &w,
+ const LexicographicWeight<W1, W2> &v) {
+ if (!w.Member() || !v.Member())
+ return LexicographicWeight<W1, W2>::NoWeight();
+ NaturalLess<W1> less1;
+ NaturalLess<W2> less2;
+ if (less1(w.Value1(), v.Value1())) return w;
+ if (less1(v.Value1(), w.Value1())) return v;
+ if (less2(w.Value2(), v.Value2())) return w;
+ if (less2(v.Value2(), w.Value2())) return v;
+ return w;
+}
+
+template <class W1, class W2>
+inline LexicographicWeight<W1, W2> Times(const LexicographicWeight<W1, W2> &w,
+ const LexicographicWeight<W1, W2> &v) {
+ return LexicographicWeight<W1, W2>(Times(w.Value1(), v.Value1()),
+ Times(w.Value2(), v.Value2()));
+}
+
+template <class W1, class W2>
+inline LexicographicWeight<W1, W2> Divide(const LexicographicWeight<W1, W2> &w,
+ const LexicographicWeight<W1, W2> &v,
+ DivideType typ = DIVIDE_ANY) {
+ return LexicographicWeight<W1, W2>(Divide(w.Value1(), v.Value1(), typ),
+ Divide(w.Value2(), v.Value2(), typ));
+}
+
+} // namespace fst
+
+#endif // FST_LIB_LEXICOGRAPHIC_WEIGHT_H__
diff --git a/src/include/fst/lock.h b/src/include/fst/lock.h
new file mode 100644
index 0000000..3adf7df
--- /dev/null
+++ b/src/include/fst/lock.h
@@ -0,0 +1,81 @@
+// lock.h
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Google-compatibility locking declarations and inline definitions
+
+#ifndef FST_LIB_LOCK_H__
+#define FST_LIB_LOCK_H__
+
+#include <fst/compat.h> // for DISALLOW_COPY_AND_ASSIGN
+
+namespace fst {
+
+using namespace std;
+
+//
+// Single initialization - single-thread implementation
+//
+
+typedef int FstOnceType;
+
+static const int FST_ONCE_INIT = 1;
+
+inline int FstOnceInit(FstOnceType *once, void (*init)(void)) {
+ if (*once)
+ (*init)();
+ *once = 0;
+ return 0;
+}
+
+//
+// Thread locking - single-thread (non-)implementation
+//
+
+class Mutex {
+ public:
+ Mutex() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Mutex);
+};
+
+class MutexLock {
+ public:
+ MutexLock(Mutex *) {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(MutexLock);
+};
+
+// Reference counting - single-thread implementation
+class RefCounter {
+ public:
+ RefCounter() : count_(1) {}
+
+ int count() const { return count_; }
+ int Incr() const { return ++count_; }
+ int Decr() const { return --count_; }
+
+ private:
+ mutable int count_;
+
+ DISALLOW_COPY_AND_ASSIGN(RefCounter);
+};
+
+} // namespace fst
+
+#endif // FST_LIB_LOCK_H__
diff --git a/src/include/fst/log.h b/src/include/fst/log.h
new file mode 100644
index 0000000..d1492cd
--- /dev/null
+++ b/src/include/fst/log.h
@@ -0,0 +1,66 @@
+// log.h
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Google-style logging declarations and inline definitions.
+
+#ifndef FST_LIB_LOG_H__
+#define FST_LIB_LOG_H__
+
+#include <cassert>
+#include <iostream>
+#include <string>
+
+#include <fst/types.h>
+#include <fst/flags.h>
+
+using std::string;
+
+DECLARE_int32(v);
+
+class LogMessage {
+ public:
+ LogMessage(const string &type) : fatal_(type == "FATAL") {
+ std::cerr << type << ": ";
+ }
+ ~LogMessage() {
+ std::cerr << std::endl;
+ if(fatal_)
+ exit(1);
+ }
+ std::ostream &stream() { return std::cerr; }
+
+ private:
+ bool fatal_;
+};
+
+#define LOG(type) LogMessage(#type).stream()
+#define VLOG(level) if ((level) <= FLAGS_v) LOG(INFO)
+
+// Checks
+inline void CHECK(bool x) { assert(x); }
+
+#define CHECK_EQ(x, y) CHECK((x) == (y))
+#define CHECK_LT(x, y) CHECK((x) < (y))
+#define CHECK_GT(x, y) CHECK((x) > (y))
+#define CHECK_LE(x, y) CHECK((x) <= (y))
+#define CHECK_GE(x, y) CHECK((x) >= (y))
+#define CHECK_NE(x, y) CHECK((x) != (y))
+
+// Ports
+#define ATTRIBUTE_DEPRECATED __attribute__((deprecated))
+
+#endif // FST_LIB_LOG_H__
diff --git a/src/include/fst/lookahead-filter.h b/src/include/fst/lookahead-filter.h
new file mode 100644
index 0000000..e11c1bb
--- /dev/null
+++ b/src/include/fst/lookahead-filter.h
@@ -0,0 +1,698 @@
+// lookahead-filter.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Composition filters to support lookahead matchers, useful for improving
+// composition efficiency with certain inputs.
+
+#ifndef FST_LIB_LOOKAHEAD_FILTER_H__
+#define FST_LIB_LOOKAHEAD_FILTER_H__
+
+#include <vector>
+using std::vector;
+
+#include <fst/fst.h>
+#include <fst/lookahead-matcher.h>
+
+
+namespace fst {
+
+// Identifies and verifies the capabilities of the matcher to be used for
+// lookahead with the composition filters below. This version is passed
+// the matchers.
+template <class M1, class M2>
+MatchType LookAheadMatchType(const M1 &m1, const M2 &m2) {
+ MatchType type1 = m1.Type(false);
+ MatchType type2 = m2.Type(false);
+ if (type1 == MATCH_OUTPUT &&
+ m1.Flags() & kOutputLookAheadMatcher)
+ return MATCH_OUTPUT;
+ else if (type2 == MATCH_INPUT &&
+ m2.Flags() & kInputLookAheadMatcher)
+ return MATCH_INPUT;
+ else if (m1.Flags() & kOutputLookAheadMatcher &&
+ m1.Type(true) == MATCH_OUTPUT)
+ return MATCH_OUTPUT;
+ else if (m2.Flags() & kInputLookAheadMatcher &&
+ m2.Type(true) == MATCH_INPUT)
+ return MATCH_INPUT;
+ else
+ return MATCH_NONE;
+}
+
+// Identifies and verifies the capabilities of the matcher to be used for
+// lookahead with the composition filters below. This version uses the
+// Fst's default matchers.
+template <class Arc>
+MatchType LookAheadMatchType(const Fst<Arc> &fst1, const Fst<Arc> &fst2) {
+ LookAheadMatcher< Fst <Arc> > matcher1(fst1, MATCH_OUTPUT);
+ LookAheadMatcher< Fst <Arc> > matcher2(fst2, MATCH_INPUT);
+ return LookAheadMatchType(matcher1, matcher2);
+}
+
+//
+// LookAheadSelector - a helper class for selecting among possibly
+// distinct FST and matcher types w/o using a common base class. This
+// lets us avoid virtual function calls.
+//
+
+// Stores and returns the appropriate FST and matcher for lookahead.
+// It is templated on the matcher types. General case has no methods
+// since not currently supported.
+template <class M1, class M2, MatchType MT>
+class LookAheadSelector {
+};
+
+// Stores and returns the appropriate FST and matcher for lookahead.
+// Specialized for two matchers of same type with the (match) 'type'
+// arg determining which is used for lookahead.
+template <class M, MatchType MT>
+class LookAheadSelector<M, M, MT> {
+ public:
+ typedef typename M::Arc Arc;
+ typedef typename M::FST F;
+
+ LookAheadSelector(M *lmatcher1, M *lmatcher2, MatchType type)
+ : lmatcher1_(lmatcher1->Copy()),
+ lmatcher2_(lmatcher2->Copy()),
+ type_(type) {}
+
+ LookAheadSelector(const LookAheadSelector<M, M, MT> &selector)
+ : lmatcher1_(selector.lmatcher1_->Copy()),
+ lmatcher2_(selector.lmatcher2_->Copy()),
+ type_(selector.type_) {}
+
+ ~LookAheadSelector() {
+ delete lmatcher1_;
+ delete lmatcher2_;
+ }
+
+ const F &GetFst() const {
+ return type_ == MATCH_OUTPUT ? lmatcher2_->GetFst() :
+ lmatcher1_->GetFst();
+ }
+
+ M *GetMatcher() const {
+ return type_ == MATCH_OUTPUT ? lmatcher1_ : lmatcher2_;
+ }
+
+ private:
+ M *lmatcher1_;
+ M *lmatcher2_;
+ MatchType type_;
+
+ void operator=(const LookAheadSelector<M, M, MT> &); // disallow
+};
+
+// Stores and returns the appropriate FST and matcher for lookahead.
+// Specialized for lookahead on input labels.
+template <class M1, class M2>
+class LookAheadSelector<M1, M2, MATCH_INPUT> {
+ public:
+ typedef typename M1::FST F1;
+
+ LookAheadSelector(M1 *lmatcher1, M2 *lmatcher2, MatchType)
+ : fst_(lmatcher1->GetFst().Copy()),
+ lmatcher_(lmatcher2->Copy()) {}
+
+ LookAheadSelector(const LookAheadSelector<M1, M2, MATCH_INPUT> &selector)
+ : fst_(selector.fst_->Copy()),
+ lmatcher_(selector.lmatcher_->Copy()) {}
+
+ ~LookAheadSelector() {
+ delete lmatcher_;
+ delete fst_;
+ }
+
+ const F1 &GetFst() const { return *fst_; }
+
+ M2 *GetMatcher() const { return lmatcher_; }
+
+ private:
+ const F1 *fst_;
+ M2 *lmatcher_;
+
+ void operator=(const LookAheadSelector<M1, M2, MATCH_INPUT> &); // disallow
+};
+
+
+// Stores and returns the appropriate FST and matcher for lookahead.
+// Specialized for lookahead on output labels.
+template <class M1, class M2>
+class LookAheadSelector<M1, M2, MATCH_OUTPUT> {
+ public:
+ typedef typename M2::FST F2;
+
+ LookAheadSelector(M1 *lmatcher1, M2 *lmatcher2, MatchType)
+ : fst_(lmatcher2->GetFst().Copy()),
+ lmatcher_(lmatcher1->Copy()) {}
+
+ LookAheadSelector(const LookAheadSelector<M1, M2, MATCH_OUTPUT> &selector)
+ : fst_(selector.fst_->Copy()),
+ lmatcher_(selector.lmatcher_->Copy()) {}
+
+ ~LookAheadSelector() {
+ delete lmatcher_;
+ delete fst_;
+ }
+
+ const F2 &GetFst() const { return *fst_; }
+
+ M1 *GetMatcher() const { return lmatcher_; }
+
+ private:
+ const F2 *fst_;
+ M1 *lmatcher_;
+
+ void operator=(const LookAheadSelector<M1, M2, MATCH_OUTPUT> &); // disallow
+};
+
+// This filter uses a lookahead matcher in FilterArc(arc1, arc2) to
+// examine the future of the composition state (arc1.nextstate,
+// arc2.nextstate), blocking moving forward when its determined to be
+// non-coaccessible. It is templated on an underlying filter,
+// typically the epsilon filter. Which matcher is the lookahead
+// matcher is determined by the template argument MT unless it is
+// MATCH_BOTH. In that case, both matcher arguments must be lookahead
+// matchers of the same type and one will be selected by
+// LookAheadMatchType() based on their capability.
+template <class F,
+ class M1 = LookAheadMatcher<typename F::FST1>,
+ class M2 = M1,
+ MatchType MT = MATCH_BOTH>
+class LookAheadComposeFilter {
+ public:
+ typedef typename F::FST1 FST1;
+ typedef typename F::FST2 FST2;
+ typedef typename F::Arc Arc;
+ typedef typename F::Matcher1 Matcher1;
+ typedef typename F::Matcher2 Matcher2;
+ typedef typename F::FilterState FilterState;
+ typedef LookAheadComposeFilter<F, M1, M2, MT> Filter;
+
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ LookAheadComposeFilter(const FST1 &fst1, const FST2 &fst2,
+ M1 *matcher1, M2 *matcher2)
+ : filter_(fst1, fst2, matcher1, matcher2),
+ lookahead_type_(MT == MATCH_BOTH ?
+ LookAheadMatchType(*filter_.GetMatcher1(),
+ *filter_.GetMatcher2()) : MT),
+ selector_(filter_.GetMatcher1(), filter_.GetMatcher2(),
+ lookahead_type_),
+ flags_(lookahead_type_ == MATCH_OUTPUT ?
+ filter_.GetMatcher1()->Flags() :
+ filter_.GetMatcher2()->Flags()) {
+ if (lookahead_type_ == MATCH_NONE) {
+ FSTERROR() << "LookAheadComposeFilter: 1st argument cannot "
+ << "match/look-ahead on output labels and 2nd argument "
+ << "cannot match/look-ahead on input labels.";
+ }
+ selector_.GetMatcher()->InitLookAheadFst(selector_.GetFst());
+ }
+
+ LookAheadComposeFilter(const LookAheadComposeFilter<F, M1, M2, MT> &filter,
+ bool safe = false)
+ : filter_(filter.filter_, safe),
+ lookahead_type_(filter.lookahead_type_),
+ selector_(filter_.GetMatcher1(), filter_.GetMatcher2(),
+ lookahead_type_),
+ flags_(filter.flags_) {
+ selector_.GetMatcher()->InitLookAheadFst(selector_.GetFst(), true);
+ }
+
+ FilterState Start() const {
+ return filter_.Start();
+ }
+
+ void SetState(StateId s1, StateId s2, const FilterState &f) {
+ filter_.SetState(s1, s2, f);
+ }
+
+ FilterState FilterArc(Arc *arc1, Arc *arc2) const {
+ lookahead_arc_ = false;
+
+ const FilterState &f = filter_.FilterArc(arc1, arc2);
+ if (f == FilterState::NoState())
+ return FilterState::NoState();
+
+ return LookAheadOutput() ? LookAheadFilterArc(arc1, arc2, f) :
+ LookAheadFilterArc(arc2, arc1, f);
+ }
+
+ void FilterFinal(Weight *weight1, Weight *weight2) const {
+ filter_.FilterFinal(weight1, weight2);
+ }
+
+ // Return resp matchers. Ownership stays with filter.
+ Matcher1 *GetMatcher1() { return filter_.GetMatcher1(); }
+ Matcher2 *GetMatcher2() { return filter_.GetMatcher2(); }
+
+ const LookAheadSelector<Matcher1, Matcher2, MT> &Selector() const {
+ return selector_;
+ }
+
+ uint64 Properties(uint64 inprops) const {
+ uint64 outprops = filter_.Properties(inprops);
+ if (lookahead_type_ == MATCH_NONE)
+ outprops |= kError;
+ return outprops;
+ }
+
+ uint32 LookAheadFlags() const { return flags_; }
+
+ bool LookAheadArc() const { return lookahead_arc_; }
+
+ bool LookAheadOutput() const {
+ if (MT == MATCH_OUTPUT)
+ return true;
+ else if (MT == MATCH_INPUT)
+ return false;
+ else if (lookahead_type_ == MATCH_OUTPUT)
+ return true;
+ else
+ return false;
+ }
+
+ private:
+ FilterState LookAheadFilterArc(Arc *arca, Arc *arcb,
+ const FilterState &f) const {
+ Label &labela = LookAheadOutput() ? arca->olabel : arca->ilabel;
+
+ if (labela != 0 && !(flags_ & kLookAheadNonEpsilons))
+ return f;
+ if (labela == 0 && !(flags_ & kLookAheadEpsilons))
+ return f;
+
+ lookahead_arc_ = true;
+ selector_.GetMatcher()->SetState(arca->nextstate);
+
+ return selector_.GetMatcher()->LookAheadFst(selector_.GetFst(),
+ arcb->nextstate) ? f :
+ FilterState::NoState();
+ }
+
+ F filter_; // Underlying filter
+ MatchType lookahead_type_; // Lookahead match type
+ LookAheadSelector<Matcher1, Matcher2, MT> selector_;
+ uint32 flags_; // Lookahead flags
+ mutable bool lookahead_arc_; // Look-ahead performed at last FilterArc()?
+
+ void operator=(const LookAheadComposeFilter<F, M1, M2> &); // disallow
+};
+
+
+// This filter adds weight-pushing to a lookahead composition filter
+// using the LookAheadWeight() method of matcher argument. It is
+// templated on an underlying lookahead filter, typically the basic
+// lookahead filter. Weight-pushing in composition brings weights
+// forward as much as possible based on the lookahead information.
+template <class F,
+ class M1 = LookAheadMatcher<typename F::FST1>,
+ class M2 = M1,
+ MatchType MT = MATCH_BOTH>
+class PushWeightsComposeFilter {
+ public:
+ typedef typename F::FST1 FST1;
+ typedef typename F::FST2 FST2;
+ typedef typename F::Arc Arc;
+ typedef typename F::Matcher1 Matcher1;
+ typedef typename F::Matcher2 Matcher2;
+ typedef typename F::FilterState FilterState1;
+ typedef WeightFilterState<typename Arc::Weight> FilterState2;
+ typedef PairFilterState<FilterState1, FilterState2> FilterState;
+
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ PushWeightsComposeFilter(const FST1 &fst1, const FST2 &fst2,
+ M1 *matcher1, M2 *matcher2)
+ : filter_(fst1, fst2, matcher1, matcher2),
+ f_(FilterState::NoState()) {}
+
+ PushWeightsComposeFilter(const PushWeightsComposeFilter<F, M1, M2, MT>
+ &filter,
+ bool safe = false)
+ : filter_(filter.filter_, safe),
+ f_(FilterState::NoState()) {}
+
+ FilterState Start() const {
+ return FilterState(filter_.Start(), FilterState2(Weight::One()));
+ }
+
+ void SetState(StateId s1, StateId s2, const FilterState &f) {
+ f_ = f;
+ filter_.SetState(s1, s2, f.GetState1());
+ }
+
+ FilterState FilterArc(Arc *arc1, Arc *arc2) const {
+ const FilterState1 &f1 = filter_.FilterArc(arc1, arc2);
+ if (f1 == FilterState1::NoState())
+ return FilterState::NoState();
+
+ if (!(LookAheadFlags() & kLookAheadWeight))
+ return FilterState(f1, FilterState2(Weight::One()));
+
+ const Weight &lweight = filter_.LookAheadArc() ?
+ Selector().GetMatcher()->LookAheadWeight() : Weight::One();
+ const FilterState2 &f2 = f_.GetState2();
+ const Weight &fweight = f2.GetWeight();
+
+ arc2->weight = Divide(Times(arc2->weight, lweight), fweight);
+ return FilterState(f1, FilterState2(lweight));
+ }
+
+ void FilterFinal(Weight *weight1, Weight *weight2) const {
+ filter_.FilterFinal(weight1, weight2);
+ if (!(LookAheadFlags() & kLookAheadWeight) || *weight1 == Weight::Zero())
+ return;
+
+ const FilterState2 &f2 = f_.GetState2();
+ const Weight &fweight = f2.GetWeight();
+ *weight1 = Divide(*weight1, fweight);
+ }
+ // Return resp matchers. Ownership states with filter.
+ Matcher1 *GetMatcher1() { return filter_.GetMatcher1(); }
+ Matcher2 *GetMatcher2() { return filter_.GetMatcher2(); }
+
+ const LookAheadSelector<Matcher1, Matcher2, MT> &Selector() const {
+ return filter_.Selector();
+ }
+
+ uint32 LookAheadFlags() const { return filter_.LookAheadFlags(); }
+ bool LookAheadArc() const { return filter_.LookAheadArc(); }
+ bool LookAheadOutput() const { return filter_.LookAheadOutput(); }
+
+ uint64 Properties(uint64 props) const {
+ return filter_.Properties(props) & kWeightInvariantProperties;
+ }
+
+ private:
+ F filter_; // Underlying filter
+ FilterState f_; // Current filter state
+
+ void operator=(const PushWeightsComposeFilter<F, M1, M2, MT> &); // disallow
+};
+
+// This filter adds label-pushing to a lookahead composition filter
+// using the LookAheadPrefix() method of the matcher argument. It is
+// templated on an underlying filter, typically the basic lookahead
+// or weight-pushing lookahead filter. Label-pushing in composition
+// matches labels as early as possible based on the lookahead
+// information.
+template <class F,
+ class M1 = LookAheadMatcher<typename F::FST1>,
+ class M2 = M1,
+ MatchType MT = MATCH_BOTH>
+class PushLabelsComposeFilter {
+ public:
+ typedef typename F::FST1 FST1;
+ typedef typename F::FST2 FST2;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ typedef MultiEpsMatcher<typename F::Matcher1> Matcher1;
+ typedef MultiEpsMatcher<typename F::Matcher2> Matcher2;
+ typedef typename F::FilterState FilterState1;
+ typedef IntegerFilterState<typename Arc::Label> FilterState2;
+ typedef PairFilterState<FilterState1, FilterState2> FilterState;
+
+ PushLabelsComposeFilter(const FST1 &fst1, const FST2 &fst2,
+ M1 *matcher1, M2 *matcher2)
+ : filter_(fst1, fst2, matcher1, matcher2),
+ f_(FilterState::NoState()),
+ fst1_(filter_.GetMatcher1()->GetFst()),
+ fst2_(filter_.GetMatcher2()->GetFst()),
+ matcher1_(fst1_, MATCH_OUTPUT,
+ filter_.LookAheadOutput() ? kMultiEpsList : kMultiEpsLoop,
+ filter_.GetMatcher1(),
+ false),
+ matcher2_(fst2_, MATCH_INPUT,
+ filter_.LookAheadOutput() ? kMultiEpsLoop : kMultiEpsList,
+ filter_.GetMatcher2(),
+ false) {}
+
+ PushLabelsComposeFilter(const PushLabelsComposeFilter<F, M1, M2, MT> &filter,
+ bool safe = false)
+ : filter_(filter.filter_, safe),
+ f_(FilterState::NoState()),
+ fst1_(filter_.GetMatcher1()->GetFst()),
+ fst2_(filter_.GetMatcher2()->GetFst()),
+ matcher1_(fst1_, MATCH_OUTPUT,
+ filter_.LookAheadOutput() ? kMultiEpsList : kMultiEpsLoop,
+ filter_.GetMatcher1(),
+ false),
+ matcher2_(fst2_, MATCH_INPUT,
+ filter_.LookAheadOutput() ? kMultiEpsLoop : kMultiEpsList,
+ filter_.GetMatcher2(),
+ false) {
+ }
+
+ FilterState Start() const {
+ return FilterState(filter_.Start(), FilterState2(kNoLabel));
+ }
+
+ void SetState(StateId s1, StateId s2, const FilterState &f) {
+ f_ = f;
+ filter_.SetState(s1, s2, f.GetState1());
+ if (!(LookAheadFlags() & kLookAheadPrefix))
+ return;
+
+ narcsa_ = LookAheadOutput() ? internal::NumArcs(fst1_, s1)
+ : internal::NumArcs(fst2_, s2);
+
+ const FilterState2 &f2 = f_.GetState2();
+ const Label &flabel = f2.GetState();
+
+ GetMatcher1()->ClearMultiEpsLabels();
+ GetMatcher2()->ClearMultiEpsLabels();
+ if (flabel != kNoLabel) { // Have a lookahead label?
+ GetMatcher1()->AddMultiEpsLabel(flabel); // Yes, make it a multi-epsilon
+ GetMatcher2()->AddMultiEpsLabel(flabel); // label so that it matches the
+ } // implicit epsilon arc to be
+ } // modified below when pushing.
+
+ FilterState FilterArc(Arc *arc1, Arc *arc2) const {
+ if (!(LookAheadFlags() & kLookAheadPrefix))
+ return FilterState(filter_.FilterArc(arc1, arc2),
+ FilterState2(kNoLabel));
+
+ const FilterState2 &f2 = f_.GetState2();
+ const Label &flabel = f2.GetState();
+ if (flabel != kNoLabel) // Have a lookahead label?
+ return LookAheadOutput() ? PushedLabelFilterArc(arc1, arc2, flabel) :
+ PushedLabelFilterArc(arc2, arc1, flabel);
+
+ const FilterState1 &f1 = filter_.FilterArc(arc1, arc2);
+ if (f1 == FilterState1::NoState())
+ return FilterState::NoState();
+
+ if (!filter_.LookAheadArc())
+ return FilterState(f1, FilterState2(kNoLabel));
+
+ return LookAheadOutput() ? PushLabelFilterArc(arc1, arc2, f1) :
+ PushLabelFilterArc(arc2, arc1, f1);
+ }
+
+ void FilterFinal(Weight *weight1, Weight *weight2) const {
+ filter_.FilterFinal(weight1, weight2);
+ if (!(LookAheadFlags() & kLookAheadPrefix) ||
+ *weight1 == Weight::Zero())
+ return;
+
+ const FilterState2 &f2 = f_.GetState2();
+ const Label &flabel = f2.GetState();
+ if (flabel != kNoLabel)
+ *weight1 = Weight::Zero();
+ }
+
+ // Return resp matchers. Ownership states with filter.
+ Matcher1 *GetMatcher1() { return &matcher1_; }
+ Matcher2 *GetMatcher2() { return &matcher2_; }
+
+ uint64 Properties(uint64 iprops) const {
+ uint64 oprops = filter_.Properties(iprops);
+ if (LookAheadOutput())
+ return oprops & kOLabelInvariantProperties;
+ else
+ return oprops & kILabelInvariantProperties;
+ }
+
+ private:
+ const LookAheadSelector<typename F::Matcher1, typename F::Matcher2, MT>
+ &Selector() const {
+ return filter_.Selector();
+ }
+
+ // Consumes an already pushed label.
+ FilterState PushedLabelFilterArc(Arc *arca, Arc *arcb,
+ Label flabel) const {
+ Label &labela = LookAheadOutput() ? arca->olabel : arca->ilabel;
+ const Label &labelb = LookAheadOutput() ? arcb->ilabel : arcb->olabel;
+
+ if (labelb != kNoLabel) {
+ return FilterState::NoState(); // Block non- (multi-) epsilon label
+ } else if (labela == flabel) {
+ labela = 0; // Convert match to multi-eps to eps
+ return Start();
+ } else if (labela == 0) {
+ if (narcsa_ == 1)
+ return f_; // Take eps; keep state w/ label
+ Selector().GetMatcher()->SetState(arca->nextstate);
+ if (Selector().GetMatcher()->LookAheadLabel(flabel))
+ return f_; // Take eps; keep state w/ label
+ else
+ return FilterState::NoState(); // Block non-coaccessible path
+ } else {
+ return FilterState::NoState(); // Block mismatch to multi-eps label
+ }
+ }
+
+ // Pushes a label forward when possible.
+ FilterState PushLabelFilterArc(Arc *arca, Arc *arcb,
+ const FilterState1 &f1) const {
+ Label &labela = LookAheadOutput() ? arca->olabel : arca->ilabel;
+ const Label &labelb = LookAheadOutput() ? arcb->olabel : arcb->ilabel;
+
+ if (labelb != 0) // No place to push.
+ return FilterState(f1, FilterState2(kNoLabel));
+ if (labela != 0 && // Wrong lookahead prefix type?
+ LookAheadFlags() & kLookAheadNonEpsilonPrefix)
+ return FilterState(f1, FilterState2(kNoLabel));
+
+ Arc larc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId);
+
+ if (Selector().GetMatcher()->LookAheadPrefix(&larc)) { // Have prefix arc?
+ labela = LookAheadOutput() ? larc.ilabel : larc.olabel;
+ arcb->ilabel = larc.ilabel; // Yes, go forward on that arc,
+ arcb->olabel = larc.olabel; // thus pushing the label.
+ arcb->weight = Times(arcb->weight, larc.weight);
+ arcb->nextstate = larc.nextstate;
+ return FilterState(f1, FilterState2(labela));
+ } else {
+ return FilterState(f1, FilterState2(kNoLabel));
+ }
+ }
+
+ uint32 LookAheadFlags() const { return filter_.LookAheadFlags(); }
+ bool LookAheadArc() const { return filter_.LookAheadArc(); }
+ bool LookAheadOutput() const { return filter_.LookAheadOutput(); }
+
+ F filter_; // Underlying filter
+ FilterState f_ ; // Current filter state
+ const FST1 &fst1_;
+ const FST2 &fst2_;
+ Matcher1 matcher1_; // Multi-epsilon matcher for fst1
+ Matcher2 matcher2_; // Multi-epsilon matcher for fst2
+ ssize_t narcsa_; // Number of arcs leaving look-ahead match FST
+
+ void operator=(const PushLabelsComposeFilter<F, M1, M2, MT> &); // disallow
+};
+
+//
+// CONVENIENCE CLASS useful for setting up composition with a default
+// look-ahead matcher and filter.
+//
+
+template <class A, MatchType type> // MATCH_NONE
+class DefaultLookAhead {
+ public:
+ typedef Matcher< Fst<A> > M;
+ typedef SequenceComposeFilter<M> ComposeFilter;
+ typedef M FstMatcher;
+};
+
+// Specializes for MATCH_INPUT to allow lookahead.
+template <class A>
+class DefaultLookAhead<A, MATCH_INPUT> {
+ public:
+ typedef LookAheadMatcher< Fst<A> > M;
+ typedef SequenceComposeFilter<M> SF;
+ typedef LookAheadComposeFilter<SF, M> ComposeFilter;
+ typedef M FstMatcher;
+};
+
+// Specializes for MATCH_OUTPUT to allow lookahead.
+template <class A>
+class DefaultLookAhead<A, MATCH_OUTPUT> {
+ public:
+ typedef LookAheadMatcher< Fst<A> > M;
+ typedef AltSequenceComposeFilter<M> SF;
+ typedef LookAheadComposeFilter<SF, M> ComposeFilter;
+ typedef M FstMatcher;
+};
+
+// Specializes for StdArc to allow weight and label pushing.
+template <>
+class DefaultLookAhead<StdArc, MATCH_INPUT> {
+ public:
+ typedef StdArc A;
+ typedef LookAheadMatcher< Fst<A> > M;
+ typedef SequenceComposeFilter<M> SF;
+ typedef LookAheadComposeFilter<SF, M> LF;
+ typedef PushWeightsComposeFilter<LF, M> WF;
+ typedef PushLabelsComposeFilter<WF, M> ComposeFilter;
+ typedef M FstMatcher;
+};
+
+// Specializes for StdArc to allow weight and label pushing.
+template <>
+class DefaultLookAhead<StdArc, MATCH_OUTPUT> {
+ public:
+ typedef StdArc A;
+ typedef LookAheadMatcher< Fst<A> > M;
+ typedef AltSequenceComposeFilter<M> SF;
+ typedef LookAheadComposeFilter<SF, M> LF;
+ typedef PushWeightsComposeFilter<LF, M> WF;
+ typedef PushLabelsComposeFilter<WF, M> ComposeFilter;
+ typedef M FstMatcher;
+};
+
+// Specializes for LogArc to allow weight and label pushing.
+template <>
+class DefaultLookAhead<LogArc, MATCH_INPUT> {
+ public:
+ typedef LogArc A;
+ typedef LookAheadMatcher< Fst<A> > M;
+ typedef SequenceComposeFilter<M> SF;
+ typedef LookAheadComposeFilter<SF, M> LF;
+ typedef PushWeightsComposeFilter<LF, M> WF;
+ typedef PushLabelsComposeFilter<WF, M> ComposeFilter;
+ typedef M FstMatcher;
+};
+
+// Specializes for LogArc to allow weight and label pushing.
+template <>
+class DefaultLookAhead<LogArc, MATCH_OUTPUT> {
+ public:
+ typedef LogArc A;
+ typedef LookAheadMatcher< Fst<A> > M;
+ typedef AltSequenceComposeFilter<M> SF;
+ typedef LookAheadComposeFilter<SF, M> LF;
+ typedef PushWeightsComposeFilter<LF, M> WF;
+ typedef PushLabelsComposeFilter<WF, M> ComposeFilter;
+ typedef M FstMatcher;
+};
+
+} // namespace fst
+
+#endif // FST_LIB_LOOKAHEAD_FILTER_H__
diff --git a/src/include/fst/lookahead-matcher.h b/src/include/fst/lookahead-matcher.h
new file mode 100644
index 0000000..10d9c01
--- /dev/null
+++ b/src/include/fst/lookahead-matcher.h
@@ -0,0 +1,813 @@
+// lookahead-matcher.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Classes to add lookahead to FST matchers, useful e.g. for improving
+// composition efficiency with certain inputs.
+
+#ifndef FST_LIB_LOOKAHEAD_MATCHER_H__
+#define FST_LIB_LOOKAHEAD_MATCHER_H__
+
+#include <fst/add-on.h>
+#include <fst/const-fst.h>
+#include <fst/fst.h>
+#include <fst/label-reachable.h>
+#include <fst/matcher.h>
+
+
+DECLARE_string(save_relabel_ipairs);
+DECLARE_string(save_relabel_opairs);
+
+namespace fst {
+
+// LOOKAHEAD MATCHERS - these have the interface of Matchers (see
+// matcher.h) and these additional methods:
+//
+// template <class F>
+// class LookAheadMatcher {
+// public:
+// typedef F FST;
+// typedef F::Arc Arc;
+// typedef typename Arc::StateId StateId;
+// typedef typename Arc::Label Label;
+// typedef typename Arc::Weight Weight;
+//
+// // Required constructors.
+// LookAheadMatcher(const F &fst, MatchType match_type);
+// // If safe=true, the copy is thread-safe (except the lookahead Fst is
+// // preserved). See Fst<>::Cop() for further doc.
+// LookAheadMatcher(const LookAheadMatcher &matcher, bool safe = false);
+//
+// Below are methods for looking ahead for a match to a label and
+// more generally, to a rational set. Each returns false if there is
+// definitely not a match and returns true if there possibly is a
+// match.
+
+// // LABEL LOOKAHEAD: Can 'label' be read from the current matcher state
+// // after possibly following epsilon transitions?
+// bool LookAheadLabel(Label label) const;
+//
+// // RATIONAL LOOKAHEAD: The next methods allow looking ahead for an
+// // arbitrary rational set of strings, specified by an FST and a state
+// // from which to begin the matching. If the lookahead FST is a
+// // transducer, this looks on the side different from the matcher
+// // 'match_type' (cf. composition).
+//
+// // Are there paths P from 's' in the lookahead FST that can be read from
+// // the cur. matcher state?
+// bool LookAheadFst(const Fst<Arc>& fst, StateId s);
+//
+// // Gives an estimate of the combined weight of the paths P in the
+// // lookahead and matcher FSTs for the last call to LookAheadFst.
+// // A trivial implementation returns Weight::One(). Non-trivial
+// // implementations are useful for weight-pushing in composition.
+// Weight LookAheadWeight() const;
+//
+// // Is there is a single non-epsilon arc found in the lookahead FST
+// // that begins P (after possibly following any epsilons) in the last
+// // call LookAheadFst? If so, return true and copy it to '*arc', o.w.
+// // return false. A trivial implementation returns false. Non-trivial
+// // implementations are useful for label-pushing in composition.
+// bool LookAheadPrefix(Arc *arc);
+//
+// // Optionally pre-specifies the lookahead FST that will be passed
+// // to LookAheadFst() for possible precomputation. If copy is true,
+// // then 'fst' is a copy of the FST used in the previous call to
+// // this method (useful to avoid unnecessary updates).
+// void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false);
+//
+// };
+
+//
+// LOOK-AHEAD FLAGS (see also kMatcherFlags in matcher.h):
+//
+// Matcher is a lookahead matcher when 'match_type' is MATCH_INPUT.
+const uint32 kInputLookAheadMatcher = 0x00000001;
+
+// Matcher is a lookahead matcher when 'match_type' is MATCH_OUTPUT.
+const uint32 kOutputLookAheadMatcher = 0x00000002;
+
+// A non-trivial implementation of LookAheadWeight() method defined and
+// should be used?
+const uint32 kLookAheadWeight = 0x00000004;
+
+// A non-trivial implementation of LookAheadPrefix() method defined and
+// should be used?
+const uint32 kLookAheadPrefix = 0x00000008;
+
+// Look-ahead of matcher FST non-epsilon arcs?
+const uint32 kLookAheadNonEpsilons = 0x00000010;
+
+// Look-ahead of matcher FST epsilon arcs?
+const uint32 kLookAheadEpsilons = 0x00000020;
+
+// Ignore epsilon paths for the lookahead prefix? Note this gives
+// correct results in composition only with an appropriate composition
+// filter since it depends on the filter blocking the ignored paths.
+const uint32 kLookAheadNonEpsilonPrefix = 0x00000040;
+
+// For LabelLookAheadMatcher, save relabeling data to file
+const uint32 kLookAheadKeepRelabelData = 0x00000080;
+
+// Flags used for lookahead matchers.
+const uint32 kLookAheadFlags = 0x000000ff;
+
+// LookAhead Matcher interface, templated on the Arc definition; used
+// for lookahead matcher specializations that are returned by the
+// InitMatcher() Fst method.
+template <class A>
+class LookAheadMatcherBase : public MatcherBase<A> {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+
+ LookAheadMatcherBase()
+ : weight_(Weight::One()),
+ prefix_arc_(kNoLabel, kNoLabel, Weight::One(), kNoStateId) {}
+
+ virtual ~LookAheadMatcherBase() {}
+
+ bool LookAheadLabel(Label label) const { return LookAheadLabel_(label); }
+
+ bool LookAheadFst(const Fst<Arc> &fst, StateId s) {
+ return LookAheadFst_(fst, s);
+ }
+
+ Weight LookAheadWeight() const { return weight_; }
+
+ bool LookAheadPrefix(Arc *arc) const {
+ if (prefix_arc_.nextstate != kNoStateId) {
+ *arc = prefix_arc_;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ virtual void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) = 0;
+
+ protected:
+ void SetLookAheadWeight(const Weight &w) { weight_ = w; }
+
+ void SetLookAheadPrefix(const Arc &arc) { prefix_arc_ = arc; }
+
+ void ClearLookAheadPrefix() { prefix_arc_.nextstate = kNoStateId; }
+
+ private:
+ virtual bool LookAheadLabel_(Label label) const = 0;
+ virtual bool LookAheadFst_(const Fst<Arc> &fst,
+ StateId s) = 0; // This must set l.a. weight and
+ // prefix if non-trivial.
+ Weight weight_; // Look-ahead weight
+ Arc prefix_arc_; // Look-ahead prefix arc
+};
+
+
+// Don't really lookahead, just declare future looks good regardless.
+template <class M>
+class TrivialLookAheadMatcher
+ : public LookAheadMatcherBase<typename M::FST::Arc> {
+ public:
+ typedef typename M::FST FST;
+ typedef typename M::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ TrivialLookAheadMatcher(const FST &fst, MatchType match_type)
+ : matcher_(fst, match_type) {}
+
+ TrivialLookAheadMatcher(const TrivialLookAheadMatcher<M> &lmatcher,
+ bool safe = false)
+ : matcher_(lmatcher.matcher_, safe) {}
+
+ // General matcher methods
+ TrivialLookAheadMatcher<M> *Copy(bool safe = false) const {
+ return new TrivialLookAheadMatcher<M>(*this, safe);
+ }
+
+ MatchType Type(bool test) const { return matcher_.Type(test); }
+ void SetState(StateId s) { return matcher_.SetState(s); }
+ bool Find(Label label) { return matcher_.Find(label); }
+ bool Done() const { return matcher_.Done(); }
+ const Arc& Value() const { return matcher_.Value(); }
+ void Next() { matcher_.Next(); }
+ virtual const FST &GetFst() const { return matcher_.GetFst(); }
+ uint64 Properties(uint64 props) const { return matcher_.Properties(props); }
+ uint32 Flags() const {
+ return matcher_.Flags() | kInputLookAheadMatcher | kOutputLookAheadMatcher;
+ }
+
+ // Look-ahead methods.
+ bool LookAheadLabel(Label label) const { return true; }
+ bool LookAheadFst(const Fst<Arc> &fst, StateId s) {return true; }
+ Weight LookAheadWeight() const { return Weight::One(); }
+ bool LookAheadPrefix(Arc *arc) const { return false; }
+ void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) {}
+
+ private:
+ // This allows base class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual void SetState_(StateId s) { SetState(s); }
+ virtual bool Find_(Label label) { return Find(label); }
+ virtual bool Done_() const { return Done(); }
+ virtual const Arc& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+
+ bool LookAheadLabel_(Label l) const { return LookAheadLabel(l); }
+
+ bool LookAheadFst_(const Fst<Arc> &fst, StateId s) {
+ return LookAheadFst(fst, s);
+ }
+
+ Weight LookAheadWeight_() const { return LookAheadWeight(); }
+ bool LookAheadPrefix_(Arc *arc) const { return LookAheadPrefix(arc); }
+
+ M matcher_;
+};
+
+// Look-ahead of one transition. Template argument F accepts flags to
+// control behavior.
+template <class M, uint32 F = kLookAheadNonEpsilons | kLookAheadEpsilons |
+ kLookAheadWeight | kLookAheadPrefix>
+class ArcLookAheadMatcher
+ : public LookAheadMatcherBase<typename M::FST::Arc> {
+ public:
+ typedef typename M::FST FST;
+ typedef typename M::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+ typedef NullAddOn MatcherData;
+
+ using LookAheadMatcherBase<Arc>::LookAheadWeight;
+ using LookAheadMatcherBase<Arc>::SetLookAheadPrefix;
+ using LookAheadMatcherBase<Arc>::SetLookAheadWeight;
+ using LookAheadMatcherBase<Arc>::ClearLookAheadPrefix;
+
+ ArcLookAheadMatcher(const FST &fst, MatchType match_type,
+ MatcherData *data = 0)
+ : matcher_(fst, match_type),
+ fst_(matcher_.GetFst()),
+ lfst_(0),
+ s_(kNoStateId) {}
+
+ ArcLookAheadMatcher(const ArcLookAheadMatcher<M, F> &lmatcher,
+ bool safe = false)
+ : matcher_(lmatcher.matcher_, safe),
+ fst_(matcher_.GetFst()),
+ lfst_(lmatcher.lfst_),
+ s_(kNoStateId) {}
+
+ // General matcher methods
+ ArcLookAheadMatcher<M, F> *Copy(bool safe = false) const {
+ return new ArcLookAheadMatcher<M, F>(*this, safe);
+ }
+
+ MatchType Type(bool test) const { return matcher_.Type(test); }
+
+ void SetState(StateId s) {
+ s_ = s;
+ matcher_.SetState(s);
+ }
+
+ bool Find(Label label) { return matcher_.Find(label); }
+ bool Done() const { return matcher_.Done(); }
+ const Arc& Value() const { return matcher_.Value(); }
+ void Next() { matcher_.Next(); }
+ const FST &GetFst() const { return fst_; }
+ uint64 Properties(uint64 props) const { return matcher_.Properties(props); }
+ uint32 Flags() const {
+ return matcher_.Flags() | kInputLookAheadMatcher |
+ kOutputLookAheadMatcher | F;
+ }
+
+ // Writable matcher methods
+ MatcherData *GetData() const { return 0; }
+
+ // Look-ahead methods.
+ bool LookAheadLabel(Label label) const { return matcher_.Find(label); }
+
+ // Checks if there is a matching (possibly super-final) transition
+ // at (s_, s).
+ bool LookAheadFst(const Fst<Arc> &fst, StateId s);
+
+ void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) {
+ lfst_ = &fst;
+ }
+
+ private:
+ // This allows base class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual void SetState_(StateId s) { SetState(s); }
+ virtual bool Find_(Label label) { return Find(label); }
+ virtual bool Done_() const { return Done(); }
+ virtual const Arc& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+
+ bool LookAheadLabel_(Label l) const { return LookAheadLabel(l); }
+ bool LookAheadFst_(const Fst<Arc> &fst, StateId s) {
+ return LookAheadFst(fst, s);
+ }
+
+ mutable M matcher_;
+ const FST &fst_; // Matcher FST
+ const Fst<Arc> *lfst_; // Look-ahead FST
+ StateId s_; // Matcher state
+};
+
+template <class M, uint32 F>
+bool ArcLookAheadMatcher<M, F>::LookAheadFst(const Fst<Arc> &fst, StateId s) {
+ if (&fst != lfst_)
+ InitLookAheadFst(fst);
+
+ bool ret = false;
+ ssize_t nprefix = 0;
+ if (F & kLookAheadWeight)
+ SetLookAheadWeight(Weight::Zero());
+ if (F & kLookAheadPrefix)
+ ClearLookAheadPrefix();
+ if (fst_.Final(s_) != Weight::Zero() &&
+ lfst_->Final(s) != Weight::Zero()) {
+ if (!(F & (kLookAheadWeight | kLookAheadPrefix)))
+ return true;
+ ++nprefix;
+ if (F & kLookAheadWeight)
+ SetLookAheadWeight(Plus(LookAheadWeight(),
+ Times(fst_.Final(s_), lfst_->Final(s))));
+ ret = true;
+ }
+ if (matcher_.Find(kNoLabel)) {
+ if (!(F & (kLookAheadWeight | kLookAheadPrefix)))
+ return true;
+ ++nprefix;
+ if (F & kLookAheadWeight)
+ for (; !matcher_.Done(); matcher_.Next())
+ SetLookAheadWeight(Plus(LookAheadWeight(), matcher_.Value().weight));
+ ret = true;
+ }
+ for (ArcIterator< Fst<Arc> > aiter(*lfst_, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const Arc &arc = aiter.Value();
+ Label label = kNoLabel;
+ switch (matcher_.Type(false)) {
+ case MATCH_INPUT:
+ label = arc.olabel;
+ break;
+ case MATCH_OUTPUT:
+ label = arc.ilabel;
+ break;
+ default:
+ FSTERROR() << "ArcLookAheadMatcher::LookAheadFst: bad match type";
+ return true;
+ }
+ if (label == 0) {
+ if (!(F & (kLookAheadWeight | kLookAheadPrefix)))
+ return true;
+ if (!(F & kLookAheadNonEpsilonPrefix))
+ ++nprefix;
+ if (F & kLookAheadWeight)
+ SetLookAheadWeight(Plus(LookAheadWeight(), arc.weight));
+ ret = true;
+ } else if (matcher_.Find(label)) {
+ if (!(F & (kLookAheadWeight | kLookAheadPrefix)))
+ return true;
+ for (; !matcher_.Done(); matcher_.Next()) {
+ ++nprefix;
+ if (F & kLookAheadWeight)
+ SetLookAheadWeight(Plus(LookAheadWeight(),
+ Times(arc.weight,
+ matcher_.Value().weight)));
+ if ((F & kLookAheadPrefix) && nprefix == 1)
+ SetLookAheadPrefix(arc);
+ }
+ ret = true;
+ }
+ }
+ if (F & kLookAheadPrefix) {
+ if (nprefix == 1)
+ SetLookAheadWeight(Weight::One()); // Avoids double counting.
+ else
+ ClearLookAheadPrefix();
+ }
+ return ret;
+}
+
+
+// Template argument F accepts flags to control behavior.
+// It must include precisely one of KInputLookAheadMatcher or
+// KOutputLookAheadMatcher.
+template <class M, uint32 F = kLookAheadEpsilons | kLookAheadWeight |
+ kLookAheadPrefix | kLookAheadNonEpsilonPrefix |
+ kLookAheadKeepRelabelData,
+ class S = DefaultAccumulator<typename M::Arc> >
+class LabelLookAheadMatcher
+ : public LookAheadMatcherBase<typename M::FST::Arc> {
+ public:
+ typedef typename M::FST FST;
+ typedef typename M::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+ typedef LabelReachableData<Label> MatcherData;
+
+ using LookAheadMatcherBase<Arc>::LookAheadWeight;
+ using LookAheadMatcherBase<Arc>::SetLookAheadPrefix;
+ using LookAheadMatcherBase<Arc>::SetLookAheadWeight;
+ using LookAheadMatcherBase<Arc>::ClearLookAheadPrefix;
+
+ LabelLookAheadMatcher(const FST &fst, MatchType match_type,
+ MatcherData *data = 0, S *s = 0)
+ : matcher_(fst, match_type),
+ lfst_(0),
+ label_reachable_(0),
+ s_(kNoStateId),
+ error_(false) {
+ if (!(F & (kInputLookAheadMatcher | kOutputLookAheadMatcher))) {
+ FSTERROR() << "LabelLookaheadMatcher: bad matcher flags: " << F;
+ error_ = true;
+ }
+ bool reach_input = match_type == MATCH_INPUT;
+ if (data) {
+ if (reach_input == data->ReachInput())
+ label_reachable_ = new LabelReachable<Arc, S>(data, s);
+ } else if ((reach_input && (F & kInputLookAheadMatcher)) ||
+ (!reach_input && (F & kOutputLookAheadMatcher))) {
+ label_reachable_ = new LabelReachable<Arc, S>(
+ fst, reach_input, s, F & kLookAheadKeepRelabelData);
+ }
+ }
+
+ LabelLookAheadMatcher(const LabelLookAheadMatcher<M, F, S> &lmatcher,
+ bool safe = false)
+ : matcher_(lmatcher.matcher_, safe),
+ lfst_(lmatcher.lfst_),
+ label_reachable_(
+ lmatcher.label_reachable_ ?
+ new LabelReachable<Arc, S>(*lmatcher.label_reachable_) : 0),
+ s_(kNoStateId),
+ error_(lmatcher.error_) {}
+
+ ~LabelLookAheadMatcher() {
+ delete label_reachable_;
+ }
+
+ // General matcher methods
+ LabelLookAheadMatcher<M, F, S> *Copy(bool safe = false) const {
+ return new LabelLookAheadMatcher<M, F, S>(*this, safe);
+ }
+
+ MatchType Type(bool test) const { return matcher_.Type(test); }
+
+ void SetState(StateId s) {
+ if (s_ == s)
+ return;
+ s_ = s;
+ match_set_state_ = false;
+ reach_set_state_ = false;
+ }
+
+ bool Find(Label label) {
+ if (!match_set_state_) {
+ matcher_.SetState(s_);
+ match_set_state_ = true;
+ }
+ return matcher_.Find(label);
+ }
+
+ bool Done() const { return matcher_.Done(); }
+ const Arc& Value() const { return matcher_.Value(); }
+ void Next() { matcher_.Next(); }
+ const FST &GetFst() const { return matcher_.GetFst(); }
+
+ uint64 Properties(uint64 inprops) const {
+ uint64 outprops = matcher_.Properties(inprops);
+ if (error_ || (label_reachable_ && label_reachable_->Error()))
+ outprops |= kError;
+ return outprops;
+ }
+
+ uint32 Flags() const {
+ if (label_reachable_ && label_reachable_->GetData()->ReachInput())
+ return matcher_.Flags() | F | kInputLookAheadMatcher;
+ else if (label_reachable_ && !label_reachable_->GetData()->ReachInput())
+ return matcher_.Flags() | F | kOutputLookAheadMatcher;
+ else
+ return matcher_.Flags();
+ }
+
+ // Writable matcher methods
+ MatcherData *GetData() const {
+ return label_reachable_ ? label_reachable_->GetData() : 0;
+ };
+
+ // Look-ahead methods.
+ bool LookAheadLabel(Label label) const {
+ if (label == 0)
+ return true;
+
+ if (label_reachable_) {
+ if (!reach_set_state_) {
+ label_reachable_->SetState(s_);
+ reach_set_state_ = true;
+ }
+ return label_reachable_->Reach(label);
+ } else {
+ return true;
+ }
+ }
+
+ // Checks if there is a matching (possibly super-final) transition
+ // at (s_, s).
+ template <class L>
+ bool LookAheadFst(const L &fst, StateId s);
+
+ void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) {
+ lfst_ = &fst;
+ if (label_reachable_)
+ label_reachable_->ReachInit(fst, copy);
+ }
+
+ template <class L>
+ void InitLookAheadFst(const L& fst, bool copy = false) {
+ lfst_ = static_cast<const Fst<Arc> *>(&fst);
+ if (label_reachable_)
+ label_reachable_->ReachInit(fst, copy);
+ }
+
+ private:
+ // This allows base class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual void SetState_(StateId s) { SetState(s); }
+ virtual bool Find_(Label label) { return Find(label); }
+ virtual bool Done_() const { return Done(); }
+ virtual const Arc& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+
+ bool LookAheadLabel_(Label l) const { return LookAheadLabel(l); }
+ bool LookAheadFst_(const Fst<Arc> &fst, StateId s) {
+ return LookAheadFst(fst, s);
+ }
+
+ mutable M matcher_;
+ const Fst<Arc> *lfst_; // Look-ahead FST
+ LabelReachable<Arc, S> *label_reachable_; // Label reachability info
+ StateId s_; // Matcher state
+ bool match_set_state_; // matcher_.SetState called?
+ mutable bool reach_set_state_; // reachable_.SetState called?
+ bool error_;
+};
+
+template <class M, uint32 F, class S>
+template <class L> inline
+bool LabelLookAheadMatcher<M, F, S>::LookAheadFst(const L &fst, StateId s) {
+ if (static_cast<const Fst<Arc> *>(&fst) != lfst_)
+ InitLookAheadFst(fst);
+
+ SetLookAheadWeight(Weight::One());
+ ClearLookAheadPrefix();
+
+ if (!label_reachable_)
+ return true;
+
+ label_reachable_->SetState(s_, s);
+ reach_set_state_ = true;
+
+ bool compute_weight = F & kLookAheadWeight;
+ bool compute_prefix = F & kLookAheadPrefix;
+
+ bool reach_input = Type(false) == MATCH_OUTPUT;
+ ArcIterator<L> aiter(fst, s);
+ bool reach_arc = label_reachable_->Reach(&aiter, 0,
+ internal::NumArcs(*lfst_, s),
+ reach_input, compute_weight);
+ if (reach_arc) {
+ ssize_t begin = label_reachable_->ReachBegin();
+ ssize_t end = label_reachable_->ReachEnd();
+ if (compute_prefix && end - begin == 1) {
+ aiter.Seek(begin);
+ SetLookAheadPrefix(aiter.Value());
+ compute_weight = false;
+ } else if (compute_weight) {
+ SetLookAheadWeight(label_reachable_->ReachWeight());
+ }
+ }
+ Weight lfinal = internal::Final(*lfst_, s);
+ bool reach_final = lfinal != Weight::Zero() &&
+ label_reachable_->ReachFinal();
+ if (reach_final && compute_weight)
+ SetLookAheadWeight(reach_arc ?
+ Plus(LookAheadWeight(), lfinal) : lfinal);
+
+ return reach_arc || reach_final;
+}
+
+
+// Label-lookahead relabeling class.
+template <class A>
+class LabelLookAheadRelabeler {
+ public:
+ typedef typename A::Label Label;
+ typedef LabelReachableData<Label> MatcherData;
+ typedef AddOnPair<MatcherData, MatcherData> D;
+
+ // Relabels matcher Fst - initialization function object.
+ template <typename I>
+ LabelLookAheadRelabeler(I **impl);
+
+ // Relabels arbitrary Fst. Class L should be a label-lookahead Fst.
+ template <class L>
+ static void Relabel(MutableFst<A> *fst, const L &mfst,
+ bool relabel_input) {
+ typename L::Impl *impl = mfst.GetImpl();
+ D *data = impl->GetAddOn();
+ LabelReachable<A> reachable(data->First() ?
+ data->First() : data->Second());
+ reachable.Relabel(fst, relabel_input);
+ }
+
+ // Returns relabeling pairs (cf. relabel.h::Relabel()).
+ // Class L should be a label-lookahead Fst.
+ // If 'avoid_collisions' is true, extra pairs are added to
+ // ensure no collisions when relabeling automata that have
+ // labels unseen here.
+ template <class L>
+ static void RelabelPairs(const L &mfst, vector<pair<Label, Label> > *pairs,
+ bool avoid_collisions = false) {
+ typename L::Impl *impl = mfst.GetImpl();
+ D *data = impl->GetAddOn();
+ LabelReachable<A> reachable(data->First() ?
+ data->First() : data->Second());
+ reachable.RelabelPairs(pairs, avoid_collisions);
+ }
+};
+
+template <class A>
+template <typename I> inline
+LabelLookAheadRelabeler<A>::LabelLookAheadRelabeler(I **impl) {
+ Fst<A> &fst = (*impl)->GetFst();
+ D *data = (*impl)->GetAddOn();
+ const string name = (*impl)->Type();
+ bool is_mutable = fst.Properties(kMutable, false);
+ MutableFst<A> *mfst = 0;
+ if (is_mutable) {
+ mfst = static_cast<MutableFst<A> *>(&fst);
+ } else {
+ mfst = new VectorFst<A>(fst);
+ data->IncrRefCount();
+ delete *impl;
+ }
+ if (data->First()) { // reach_input
+ LabelReachable<A> reachable(data->First());
+ reachable.Relabel(mfst, true);
+ if (!FLAGS_save_relabel_ipairs.empty()) {
+ vector<pair<Label, Label> > pairs;
+ reachable.RelabelPairs(&pairs, true);
+ WriteLabelPairs(FLAGS_save_relabel_ipairs, pairs);
+ }
+ } else {
+ LabelReachable<A> reachable(data->Second());
+ reachable.Relabel(mfst, false);
+ if (!FLAGS_save_relabel_opairs.empty()) {
+ vector<pair<Label, Label> > pairs;
+ reachable.RelabelPairs(&pairs, true);
+ WriteLabelPairs(FLAGS_save_relabel_opairs, pairs);
+ }
+ }
+ if (!is_mutable) {
+ *impl = new I(*mfst, name);
+ (*impl)->SetAddOn(data);
+ delete mfst;
+ data->DecrRefCount();
+ }
+}
+
+
+// Generic lookahead matcher, templated on the FST definition
+// - a wrapper around pointer to specific one.
+template <class F>
+class LookAheadMatcher {
+ public:
+ typedef F FST;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+ typedef LookAheadMatcherBase<Arc> LBase;
+
+ LookAheadMatcher(const F &fst, MatchType match_type) {
+ base_ = fst.InitMatcher(match_type);
+ if (!base_)
+ base_ = new SortedMatcher<F>(fst, match_type);
+ lookahead_ = false;
+ }
+
+ LookAheadMatcher(const LookAheadMatcher<F> &matcher, bool safe = false) {
+ base_ = matcher.base_->Copy(safe);
+ lookahead_ = matcher.lookahead_;
+ }
+
+ ~LookAheadMatcher() { delete base_; }
+
+ // General matcher methods
+ LookAheadMatcher<F> *Copy(bool safe = false) const {
+ return new LookAheadMatcher<F>(*this, safe);
+ }
+
+ MatchType Type(bool test) const { return base_->Type(test); }
+ void SetState(StateId s) { base_->SetState(s); }
+ bool Find(Label label) { return base_->Find(label); }
+ bool Done() const { return base_->Done(); }
+ const Arc& Value() const { return base_->Value(); }
+ void Next() { base_->Next(); }
+ const F &GetFst() const { return static_cast<const F &>(base_->GetFst()); }
+
+ uint64 Properties(uint64 props) const { return base_->Properties(props); }
+
+ uint32 Flags() const { return base_->Flags(); }
+
+ // Look-ahead methods
+ bool LookAheadLabel(Label label) const {
+ if (LookAheadCheck()) {
+ LBase *lbase = static_cast<LBase *>(base_);
+ return lbase->LookAheadLabel(label);
+ } else {
+ return true;
+ }
+ }
+
+ bool LookAheadFst(const Fst<Arc> &fst, StateId s) {
+ if (LookAheadCheck()) {
+ LBase *lbase = static_cast<LBase *>(base_);
+ return lbase->LookAheadFst(fst, s);
+ } else {
+ return true;
+ }
+ }
+
+ Weight LookAheadWeight() const {
+ if (LookAheadCheck()) {
+ LBase *lbase = static_cast<LBase *>(base_);
+ return lbase->LookAheadWeight();
+ } else {
+ return Weight::One();
+ }
+ }
+
+ bool LookAheadPrefix(Arc *arc) const {
+ if (LookAheadCheck()) {
+ LBase *lbase = static_cast<LBase *>(base_);
+ return lbase->LookAheadPrefix(arc);
+ } else {
+ return false;
+ }
+ }
+
+ void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) {
+ if (LookAheadCheck()) {
+ LBase *lbase = static_cast<LBase *>(base_);
+ lbase->InitLookAheadFst(fst, copy);
+ }
+ }
+
+ private:
+ bool LookAheadCheck() const {
+ if (!lookahead_) {
+ lookahead_ = base_->Flags() &
+ (kInputLookAheadMatcher | kOutputLookAheadMatcher);
+ if (!lookahead_) {
+ FSTERROR() << "LookAheadMatcher: No look-ahead matcher defined";
+ }
+ }
+ return lookahead_;
+ }
+
+ MatcherBase<Arc> *base_;
+ mutable bool lookahead_;
+
+ void operator=(const LookAheadMatcher<Arc> &); // disallow
+};
+
+} // namespace fst
+
+#endif // FST_LIB_LOOKAHEAD_MATCHER_H__
diff --git a/src/include/fst/map.h b/src/include/fst/map.h
new file mode 100644
index 0000000..419cac4
--- /dev/null
+++ b/src/include/fst/map.h
@@ -0,0 +1,121 @@
+// map.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Compatability file for old-style Map() functions and MapFst class
+// that have been renamed to ArcMap (cf. StateMap).
+
+#ifndef FST_LIB_MAP_H__
+#define FST_LIB_MAP_H__
+
+
+#include <fst/arc-map.h>
+
+
+namespace fst {
+
+template<class A, class C>
+void Map(MutableFst<A> *fst, C* mapper) {
+ ArcMap(fst, mapper);
+}
+
+template<class A, class C>
+void Map(MutableFst<A> *fst, C mapper) {
+ ArcMap(fst, mapper);
+}
+
+template<class A, class B, class C>
+void Map(const Fst<A> &ifst, MutableFst<B> *ofst, C* mapper) {
+ ArcMap(ifst, ofst, mapper);
+}
+
+template<class A, class B, class C>
+void Map(const Fst<A> &ifst, MutableFst<B> *ofst, C mapper) {
+ ArcMap(ifst, ofst, mapper);
+}
+
+typedef ArcMapFstOptions MapFstOptions;
+
+template <class A, class B, class C>
+class MapFst : public ArcMapFst<A, B, C> {
+ public:
+ typedef B Arc;
+ typedef typename B::Weight Weight;
+ typedef typename B::StateId StateId;
+ typedef CacheState<B> State;
+
+ MapFst(const Fst<A> &fst, const C &mapper, const MapFstOptions& opts)
+ : ArcMapFst<A, B, C>(fst, mapper, opts) {}
+
+ MapFst(const Fst<A> &fst, C* mapper, const MapFstOptions& opts)
+ : ArcMapFst<A, B, C>(fst, mapper, opts) {}
+
+ MapFst(const Fst<A> &fst, const C &mapper)
+ : ArcMapFst<A, B, C>(fst, mapper) {}
+
+ MapFst(const Fst<A> &fst, C* mapper) : ArcMapFst<A, B, C>(fst, mapper) {}
+
+ // See Fst<>::Copy() for doc.
+ MapFst(const ArcMapFst<A, B, C> &fst, bool safe = false)
+ : ArcMapFst<A, B, C>(fst, safe) {}
+
+ // Get a copy of this MapFst. See Fst<>::Copy() for further doc.
+virtual MapFst<A, B, C> *Copy(bool safe = false) const {
+ return new MapFst(*this, safe);
+ }
+};
+
+
+// Specialization for MapFst.
+template <class A, class B, class C>
+class StateIterator< MapFst<A, B, C> >
+ : public StateIterator< ArcMapFst<A, B, C> > {
+ public:
+ explicit StateIterator(const ArcMapFst<A, B, C> &fst)
+ : StateIterator< ArcMapFst<A, B, C> >(fst) {}
+};
+
+
+// Specialization for MapFst.
+template <class A, class B, class C>
+class ArcIterator< MapFst<A, B, C> >
+ : public ArcIterator< ArcMapFst<A, B, C> > {
+ public:
+ ArcIterator(const ArcMapFst<A, B, C> &fst, typename A::StateId s)
+ : ArcIterator< ArcMapFst<A, B, C> >(fst, s) {}
+};
+
+
+template <class A>
+struct IdentityMapper {
+ typedef A FromArc;
+ typedef A ToArc;
+
+ A operator()(const A &arc) const { return arc; }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const { return props; }
+};
+
+} // namespace fst
+
+#endif // FST_LIB_MAP_H__
diff --git a/src/include/fst/matcher-fst.h b/src/include/fst/matcher-fst.h
new file mode 100644
index 0000000..73e64ad
--- /dev/null
+++ b/src/include/fst/matcher-fst.h
@@ -0,0 +1,359 @@
+// matcher-fst.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to add a matcher to an FST.
+
+#ifndef FST_LIB_MATCHER_FST_FST_H__
+#define FST_LIB_MATCHER_FST_FST_H__
+
+#include <fst/add-on.h>
+#include <fst/const-fst.h>
+#include <fst/lookahead-matcher.h>
+
+
+namespace fst {
+
+// WRITABLE MATCHERS - these have the interface of Matchers (see
+// matcher.h) and these additional methods:
+//
+// template <class F>
+// class Matcher {
+// public:
+// typedef ... MatcherData; // Initialization data
+// ...
+// // Constructor with additional argument for external initialization
+// // data; matcher increments its reference count on construction and
+// // decrements the reference count, and if 0 deletes, on destruction.
+// Matcher(const F &fst, MatchType type, MatcherData *data);
+//
+// // Returns pointer to initialization data that can be
+// // passed to a Matcher constructor.
+// MatcherData *GetData() const;
+// };
+
+// The matcher initialization data class must have the form:
+// class MatcherData {
+// public:
+// // Required copy constructor.
+// MatcherData(const MatcherData &);
+// //
+// // Required I/O methods.
+// static MatcherData *Read(istream &istrm);
+// bool Write(ostream &ostrm);
+//
+// // Required reference counting.
+// int RefCount() const;
+// int IncrRefCount();
+// int DecrRefCount();
+// };
+
+// Default MatcherFst initializer - does nothing.
+template <class M>
+class NullMatcherFstInit {
+ public:
+ typedef AddOnPair<typename M::MatcherData, typename M::MatcherData> D;
+ typedef AddOnImpl<typename M::FST, D> Impl;
+ NullMatcherFstInit(Impl **) {}
+};
+
+// Class to add a matcher M to an Fst F. Creates a new Fst of type name N.
+// Optional function object I can be used to initialize the Fst.
+template <class F, class M, const char* N,
+ class I = NullMatcherFstInit<M> >
+class MatcherFst
+ : public ImplToExpandedFst<
+ AddOnImpl<F,
+ AddOnPair<typename M::MatcherData,
+ typename M::MatcherData> > > {
+ public:
+ friend class StateIterator< MatcherFst<F, M, N, I> >;
+ friend class ArcIterator< MatcherFst<F, M, N, I> >;
+
+ typedef F FST;
+ typedef M FstMatcher;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef AddOnPair<typename M::MatcherData, typename M::MatcherData> D;
+ typedef AddOnImpl<F, D> Impl;
+
+ MatcherFst() : ImplToExpandedFst<Impl>(new Impl(F(), N)) {}
+
+ explicit MatcherFst(const F &fst)
+ : ImplToExpandedFst<Impl>(CreateImpl(fst, N)) {}
+
+ explicit MatcherFst(const Fst<Arc> &fst)
+ : ImplToExpandedFst<Impl>(CreateImpl(fst, N)) {}
+
+ // See Fst<>::Copy() for doc.
+ MatcherFst(const MatcherFst<F, M, N, I> &fst, bool safe = false)
+ : ImplToExpandedFst<Impl>(fst, safe) {}
+
+ // Get a copy of this MatcherFst. See Fst<>::Copy() for further doc.
+ virtual MatcherFst<F, M, N, I> *Copy(bool safe = false) const {
+ return new MatcherFst<F, M, N, I>(*this, safe);
+ }
+
+ // Read a MatcherFst from an input stream; return NULL on error
+ static MatcherFst<F, M, N, I> *Read(istream &strm,
+ const FstReadOptions &opts) {
+ Impl *impl = Impl::Read(strm, opts);
+ return impl ? new MatcherFst<F, M, N, I>(impl) : 0;
+ }
+
+ // Read a MatcherFst from a file; return NULL on error
+ // Empty filename reads from standard input
+ static MatcherFst<F, M, N, I> *Read(const string &filename) {
+ Impl *impl = ImplToExpandedFst<Impl>::Read(filename);
+ return impl ? new MatcherFst<F, M, N, I>(impl) : 0;
+ }
+
+ virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ return GetImpl()->Write(strm, opts);
+ }
+
+ virtual bool Write(const string &filename) const {
+ return Fst<Arc>::WriteFile(filename);
+ }
+
+ virtual void InitStateIterator(StateIteratorData<Arc> *data) const {
+ return GetImpl()->InitStateIterator(data);
+ }
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
+ return GetImpl()->InitArcIterator(s, data);
+ }
+
+ virtual M *InitMatcher(MatchType match_type) const {
+ return new M(GetFst(), match_type, GetData(match_type));
+ }
+
+ // Allows access to MatcherFst components.
+ Impl *GetImpl() const {
+ return ImplToFst<Impl, ExpandedFst<Arc> >::GetImpl();
+ }
+
+ F& GetFst() const { return GetImpl()->GetFst(); }
+
+ typename M::MatcherData *GetData(MatchType match_type) const {
+ D *data = GetImpl()->GetAddOn();
+ return match_type == MATCH_INPUT ? data->First() : data->Second();
+ }
+
+ private:
+ static Impl *CreateImpl(const F &fst, const string &name) {
+ M imatcher(fst, MATCH_INPUT);
+ M omatcher(fst, MATCH_OUTPUT);
+ D *data = new D(imatcher.GetData(), omatcher.GetData());
+ Impl *impl = new Impl(fst, name);
+ impl->SetAddOn(data);
+ I init(&impl);
+ data->DecrRefCount();
+ return impl;
+ }
+
+ static Impl *CreateImpl(const Fst<Arc> &fst, const string &name) {
+ F ffst(fst);
+ return CreateImpl(ffst, name);
+ }
+
+ explicit MatcherFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {}
+
+ // Makes visible to friends.
+ void SetImpl(Impl *impl, bool own_impl = true) {
+ ImplToFst< Impl, ExpandedFst<Arc> >::SetImpl(impl, own_impl);
+ }
+
+ void operator=(const MatcherFst<F, M, N, I> &fst); // disallow
+};
+
+
+// Specialization fo MatcherFst.
+template <class F, class M, const char* N, class I>
+class StateIterator< MatcherFst<F, M, N, I> > : public StateIterator<F> {
+ public:
+ explicit StateIterator(const MatcherFst<F, M, N, I> &fst) :
+ StateIterator<F>(fst.GetImpl()->GetFst()) {}
+};
+
+
+// Specialization for MatcherFst.
+template <class F, class M, const char* N, class I>
+class ArcIterator< MatcherFst<F, M, N, I> > : public ArcIterator<F> {
+ public:
+ ArcIterator(const MatcherFst<F, M, N, I> &fst, typename F::Arc::StateId s)
+ : ArcIterator<F>(fst.GetImpl()->GetFst(), s) {}
+};
+
+
+// Specialization for MatcherFst
+template <class F, class M, const char* N, class I>
+class Matcher< MatcherFst<F, M, N, I> > {
+ public:
+ typedef MatcherFst<F, M, N, I> FST;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+
+ Matcher(const FST &fst, MatchType match_type) {
+ matcher_ = fst.InitMatcher(match_type);
+ }
+
+ Matcher(const Matcher<FST> &matcher) {
+ matcher_ = matcher.matcher_->Copy();
+ }
+
+ ~Matcher() { delete matcher_; }
+
+ Matcher<FST> *Copy() const {
+ return new Matcher<FST>(*this);
+ }
+
+ MatchType Type(bool test) const { return matcher_->Type(test); }
+ void SetState(StateId s) { matcher_->SetState(s); }
+ bool Find(Label label) { return matcher_->Find(label); }
+ bool Done() const { return matcher_->Done(); }
+ const Arc& Value() const { return matcher_->Value(); }
+ void Next() { matcher_->Next(); }
+ uint64 Properties(uint64 props) const { return matcher_->Properties(props); }
+ uint32 Flags() const { return matcher_->Flags(); }
+
+ private:
+ M *matcher_;
+
+ void operator=(const Matcher<Arc> &); // disallow
+};
+
+
+// Specialization for MatcherFst
+template <class F, class M, const char* N, class I>
+class LookAheadMatcher< MatcherFst<F, M, N, I> > {
+ public:
+ typedef MatcherFst<F, M, N, I> FST;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ LookAheadMatcher(const FST &fst, MatchType match_type) {
+ matcher_ = fst.InitMatcher(match_type);
+ }
+
+ LookAheadMatcher(const LookAheadMatcher<FST> &matcher, bool safe = false) {
+ matcher_ = matcher.matcher_->Copy(safe);
+ }
+
+ ~LookAheadMatcher() { delete matcher_; }
+
+ // General matcher methods
+ LookAheadMatcher<FST> *Copy(bool safe = false) const {
+ return new LookAheadMatcher<FST>(*this, safe);
+ }
+
+ MatchType Type(bool test) const { return matcher_->Type(test); }
+ void SetState(StateId s) { matcher_->SetState(s); }
+ bool Find(Label label) { return matcher_->Find(label); }
+ bool Done() const { return matcher_->Done(); }
+ const Arc& Value() const { return matcher_->Value(); }
+ void Next() { matcher_->Next(); }
+ const FST &GetFst() const { return matcher_->GetFst(); }
+ uint64 Properties(uint64 props) const { return matcher_->Properties(props); }
+ uint32 Flags() const { return matcher_->Flags(); }
+
+ // Look-ahead methods
+ bool LookAheadLabel(Label label) const {
+ return matcher_->LookAheadLabel(label);
+ }
+
+ bool LookAheadFst(const Fst<Arc> &fst, StateId s) {
+ return matcher_->LookAheadFst(fst, s);
+ }
+
+ Weight LookAheadWeight() const { return matcher_->LookAheadWeight(); }
+
+ bool LookAheadPrefix(Arc *arc) const {
+ return matcher_->LookAheadPrefix(arc);
+ }
+
+ void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) {
+ matcher_->InitLookAheadFst(fst, copy);
+ }
+
+ private:
+ M *matcher_;
+
+ void operator=(const LookAheadMatcher<FST> &); // disallow
+};
+
+//
+// Useful aliases when using StdArc and LogArc.
+//
+
+// Arc look-ahead matchers
+extern const char arc_lookahead_fst_type[];
+
+typedef MatcherFst<ConstFst<StdArc>,
+ ArcLookAheadMatcher<SortedMatcher<ConstFst<StdArc> > >,
+ arc_lookahead_fst_type> StdArcLookAheadFst;
+
+typedef MatcherFst<ConstFst<LogArc>,
+ ArcLookAheadMatcher<SortedMatcher<ConstFst<LogArc> > >,
+ arc_lookahead_fst_type> LogArcLookAheadFst;
+
+
+// Label look-ahead matchers
+extern const char ilabel_lookahead_fst_type[];
+extern const char olabel_lookahead_fst_type[];
+
+static const uint32 ilabel_lookahead_flags = kInputLookAheadMatcher |
+ kLookAheadWeight | kLookAheadPrefix |
+ kLookAheadEpsilons | kLookAheadNonEpsilonPrefix;
+static const uint32 olabel_lookahead_flags = kOutputLookAheadMatcher |
+ kLookAheadWeight | kLookAheadPrefix |
+ kLookAheadEpsilons | kLookAheadNonEpsilonPrefix;
+
+typedef MatcherFst<ConstFst<StdArc>,
+ LabelLookAheadMatcher<SortedMatcher<ConstFst<StdArc> >,
+ ilabel_lookahead_flags,
+ FastLogAccumulator<StdArc> >,
+ ilabel_lookahead_fst_type,
+ LabelLookAheadRelabeler<StdArc> > StdILabelLookAheadFst;
+
+typedef MatcherFst<ConstFst<LogArc>,
+ LabelLookAheadMatcher<SortedMatcher<ConstFst<LogArc> >,
+ ilabel_lookahead_flags,
+ FastLogAccumulator<LogArc> >,
+ ilabel_lookahead_fst_type,
+ LabelLookAheadRelabeler<LogArc> > LogILabelLookAheadFst;
+
+typedef MatcherFst<ConstFst<StdArc>,
+ LabelLookAheadMatcher<SortedMatcher<ConstFst<StdArc> >,
+ olabel_lookahead_flags,
+ FastLogAccumulator<StdArc> >,
+ olabel_lookahead_fst_type,
+ LabelLookAheadRelabeler<StdArc> > StdOLabelLookAheadFst;
+
+typedef MatcherFst<ConstFst<LogArc>,
+ LabelLookAheadMatcher<SortedMatcher<ConstFst<LogArc> >,
+ olabel_lookahead_flags,
+ FastLogAccumulator<LogArc> >,
+ olabel_lookahead_fst_type,
+ LabelLookAheadRelabeler<LogArc> > LogOLabelLookAheadFst;
+
+} // namespace fst
+
+#endif // FST_LIB_MATCHER_FST_FST_H__
diff --git a/src/include/fst/matcher.h b/src/include/fst/matcher.h
new file mode 100644
index 0000000..a89325b
--- /dev/null
+++ b/src/include/fst/matcher.h
@@ -0,0 +1,1116 @@
+// matcher.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Classes to allow matching labels leaving FST states.
+
+#ifndef FST_LIB_MATCHER_H__
+#define FST_LIB_MATCHER_H__
+
+#include <algorithm>
+#include <set>
+
+#include <fst/mutable-fst.h> // for all internal FST accessors
+
+
+namespace fst {
+
+// MATCHERS - these can find and iterate through requested labels at
+// FST states. In the simplest form, these are just some associative
+// map or search keyed on labels. More generally, they may
+// implement matching special labels that represent sets of labels
+// such as 'sigma' (all), 'rho' (rest), or 'phi' (fail).
+// The Matcher interface is:
+//
+// template <class F>
+// class Matcher {
+// public:
+// typedef F FST;
+// typedef F::Arc Arc;
+// typedef typename Arc::StateId StateId;
+// typedef typename Arc::Label Label;
+// typedef typename Arc::Weight Weight;
+//
+// // Required constructors.
+// Matcher(const F &fst, MatchType type);
+// // If safe=true, the copy is thread-safe. See Fst<>::Copy()
+// // for further doc.
+// Matcher(const Matcher &matcher, bool safe = false);
+//
+// // If safe=true, the copy is thread-safe. See Fst<>::Copy()
+// // for further doc.
+// Matcher<F> *Copy(bool safe = false) const;
+//
+// // Returns the match type that can be provided (depending on
+// // compatibility of the input FST). It is either
+// // the requested match type, MATCH_NONE, or MATCH_UNKNOWN.
+// // If 'test' is false, a constant time test is performed, but
+// // MATCH_UNKNOWN may be returned. If 'test' is true,
+// // a definite answer is returned, but may involve more costly
+// // computation (e.g., visiting the Fst).
+// MatchType Type(bool test) const;
+// // Specifies the current state.
+// void SetState(StateId s);
+//
+// // This finds matches to a label at the current state.
+// // Returns true if a match found. kNoLabel matches any
+// // 'non-consuming' transitions, e.g., epsilon transitions,
+// // which do not require a matching symbol.
+// bool Find(Label label);
+// // These iterate through any matches found:
+// bool Done() const; // No more matches.
+// const A& Value() const; // Current arc (when !Done)
+// void Next(); // Advance to next arc (when !Done)
+//
+// // Return matcher FST.
+// const F& GetFst() const;
+// // This specifies the known Fst properties as viewed from this
+// // matcher. It takes as argument the input Fst's known properties.
+// uint64 Properties(uint64 props) const;
+// };
+
+// Flags used for basic matchers (see also lookahead.h).
+const uint32 kMatcherFlags = 0x00000000;
+
+// Matcher interface, templated on the Arc definition; used
+// for matcher specializations that are returned by the
+// InitMatcher Fst method.
+template <class A>
+class MatcherBase {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+
+ virtual ~MatcherBase() {}
+
+ virtual MatcherBase<A> *Copy(bool safe = false) const = 0;
+ virtual MatchType Type(bool test) const = 0;
+ void SetState(StateId s) { SetState_(s); }
+ bool Find(Label label) { return Find_(label); }
+ bool Done() const { return Done_(); }
+ const A& Value() const { return Value_(); }
+ void Next() { Next_(); }
+ virtual const Fst<A> &GetFst() const = 0;
+ virtual uint64 Properties(uint64 props) const = 0;
+ virtual uint32 Flags() const { return 0; }
+ private:
+ virtual void SetState_(StateId s) = 0;
+ virtual bool Find_(Label label) = 0;
+ virtual bool Done_() const = 0;
+ virtual const A& Value_() const = 0;
+ virtual void Next_() = 0;
+};
+
+
+// A matcher that expects sorted labels on the side to be matched.
+// If match_type == MATCH_INPUT, epsilons match the implicit self loop
+// Arc(kNoLabel, 0, Weight::One(), current_state) as well as any
+// actual epsilon transitions. If match_type == MATCH_OUTPUT, then
+// Arc(0, kNoLabel, Weight::One(), current_state) is instead matched.
+template <class F>
+class SortedMatcher : public MatcherBase<typename F::Arc> {
+ public:
+ typedef F FST;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ // Labels >= binary_label will be searched for by binary search,
+ // o.w. linear search is used.
+ SortedMatcher(const F &fst, MatchType match_type,
+ Label binary_label = 1)
+ : fst_(fst.Copy()),
+ s_(kNoStateId),
+ aiter_(0),
+ match_type_(match_type),
+ binary_label_(binary_label),
+ match_label_(kNoLabel),
+ narcs_(0),
+ loop_(kNoLabel, 0, Weight::One(), kNoStateId),
+ error_(false) {
+ switch(match_type_) {
+ case MATCH_INPUT:
+ case MATCH_NONE:
+ break;
+ case MATCH_OUTPUT:
+ swap(loop_.ilabel, loop_.olabel);
+ break;
+ default:
+ FSTERROR() << "SortedMatcher: bad match type";
+ match_type_ = MATCH_NONE;
+ error_ = true;
+ }
+ }
+
+ SortedMatcher(const SortedMatcher<F> &matcher, bool safe = false)
+ : fst_(matcher.fst_->Copy(safe)),
+ s_(kNoStateId),
+ aiter_(0),
+ match_type_(matcher.match_type_),
+ binary_label_(matcher.binary_label_),
+ match_label_(kNoLabel),
+ narcs_(0),
+ loop_(matcher.loop_),
+ error_(matcher.error_) {}
+
+ virtual ~SortedMatcher() {
+ if (aiter_)
+ delete aiter_;
+ delete fst_;
+ }
+
+ virtual SortedMatcher<F> *Copy(bool safe = false) const {
+ return new SortedMatcher<F>(*this, safe);
+ }
+
+ virtual MatchType Type(bool test) const {
+ if (match_type_ == MATCH_NONE)
+ return match_type_;
+
+ uint64 true_prop = match_type_ == MATCH_INPUT ?
+ kILabelSorted : kOLabelSorted;
+ uint64 false_prop = match_type_ == MATCH_INPUT ?
+ kNotILabelSorted : kNotOLabelSorted;
+ uint64 props = fst_->Properties(true_prop | false_prop, test);
+
+ if (props & true_prop)
+ return match_type_;
+ else if (props & false_prop)
+ return MATCH_NONE;
+ else
+ return MATCH_UNKNOWN;
+ }
+
+ void SetState(StateId s) {
+ if (s_ == s)
+ return;
+ s_ = s;
+ if (match_type_ == MATCH_NONE) {
+ FSTERROR() << "SortedMatcher: bad match type";
+ error_ = true;
+ }
+ if (aiter_)
+ delete aiter_;
+ aiter_ = new ArcIterator<F>(*fst_, s);
+ aiter_->SetFlags(kArcNoCache, kArcNoCache);
+ narcs_ = internal::NumArcs(*fst_, s);
+ loop_.nextstate = s;
+ }
+
+ bool Find(Label match_label);
+
+ bool Done() const {
+ if (current_loop_)
+ return false;
+ if (aiter_->Done())
+ return true;
+ aiter_->SetFlags(
+ match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue,
+ kArcValueFlags);
+ Label label = match_type_ == MATCH_INPUT ?
+ aiter_->Value().ilabel : aiter_->Value().olabel;
+ return label != match_label_;
+ }
+
+ const Arc& Value() const {
+ if (current_loop_) {
+ return loop_;
+ }
+ aiter_->SetFlags(kArcValueFlags, kArcValueFlags);
+ return aiter_->Value();
+ }
+
+ void Next() {
+ if (current_loop_)
+ current_loop_ = false;
+ else
+ aiter_->Next();
+ }
+
+ virtual const F &GetFst() const { return *fst_; }
+
+ virtual uint64 Properties(uint64 inprops) const {
+ uint64 outprops = inprops;
+ if (error_) outprops |= kError;
+ return outprops;
+ }
+
+ private:
+ virtual void SetState_(StateId s) { SetState(s); }
+ virtual bool Find_(Label label) { return Find(label); }
+ virtual bool Done_() const { return Done(); }
+ virtual const Arc& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+
+ const F *fst_;
+ StateId s_; // Current state
+ ArcIterator<F> *aiter_; // Iterator for current state
+ MatchType match_type_; // Type of match to perform
+ Label binary_label_; // Least label for binary search
+ Label match_label_; // Current label to be matched
+ size_t narcs_; // Current state arc count
+ Arc loop_; // For non-consuming symbols
+ bool current_loop_; // Current arc is the implicit loop
+ bool error_; // Error encountered
+
+ void operator=(const SortedMatcher<F> &); // Disallow
+};
+
+template <class F> inline
+bool SortedMatcher<F>::Find(Label match_label) {
+ if (error_) {
+ current_loop_ = false;
+ match_label_ = kNoLabel;
+ return false;
+ }
+ current_loop_ = match_label == 0;
+ match_label_ = match_label == kNoLabel ? 0 : match_label;
+ aiter_->SetFlags(
+ match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue,
+ kArcValueFlags);
+ if (match_label_ >= binary_label_) {
+ // Binary search for match.
+ size_t low = 0;
+ size_t high = narcs_;
+ while (low < high) {
+ size_t mid = (low + high) / 2;
+ aiter_->Seek(mid);
+ Label label = match_type_ == MATCH_INPUT ?
+ aiter_->Value().ilabel : aiter_->Value().olabel;
+ if (label > match_label_) {
+ high = mid;
+ } else if (label < match_label_) {
+ low = mid + 1;
+ } else {
+ // find first matching label (when non-determinism)
+ for (size_t i = mid; i > low; --i) {
+ aiter_->Seek(i - 1);
+ label = match_type_ == MATCH_INPUT ? aiter_->Value().ilabel :
+ aiter_->Value().olabel;
+ if (label != match_label_) {
+ aiter_->Seek(i);
+ return true;
+ }
+ }
+ return true;
+ }
+ }
+ return current_loop_;
+ } else {
+ // Linear search for match.
+ for (aiter_->Reset(); !aiter_->Done(); aiter_->Next()) {
+ Label label = match_type_ == MATCH_INPUT ?
+ aiter_->Value().ilabel : aiter_->Value().olabel;
+ if (label == match_label_) {
+ return true;
+ }
+ if (label > match_label_)
+ break;
+ }
+ return current_loop_;
+ }
+}
+
+
+// Specifies whether during matching we rewrite both the input and output sides.
+enum MatcherRewriteMode {
+ MATCHER_REWRITE_AUTO = 0, // Rewrites both sides iff acceptor.
+ MATCHER_REWRITE_ALWAYS,
+ MATCHER_REWRITE_NEVER
+};
+
+
+// For any requested label that doesn't match at a state, this matcher
+// considers all transitions that match the label 'rho_label' (rho =
+// 'rest'). Each such rho transition found is returned with the
+// rho_label rewritten as the requested label (both sides if an
+// acceptor, or if 'rewrite_both' is true and both input and output
+// labels of the found transition are 'rho_label'). If 'rho_label' is
+// kNoLabel, this special matching is not done. RhoMatcher is
+// templated itself on a matcher, which is used to perform the
+// underlying matching. By default, the underlying matcher is
+// constructed by RhoMatcher. The user can instead pass in this
+// object; in that case, RhoMatcher takes its ownership.
+template <class M>
+class RhoMatcher : public MatcherBase<typename M::Arc> {
+ public:
+ typedef typename M::FST FST;
+ typedef typename M::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ RhoMatcher(const FST &fst,
+ MatchType match_type,
+ Label rho_label = kNoLabel,
+ MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO,
+ M *matcher = 0)
+ : matcher_(matcher ? matcher : new M(fst, match_type)),
+ match_type_(match_type),
+ rho_label_(rho_label),
+ error_(false) {
+ if (match_type == MATCH_BOTH) {
+ FSTERROR() << "RhoMatcher: bad match type";
+ match_type_ = MATCH_NONE;
+ error_ = true;
+ }
+ if (rho_label == 0) {
+ FSTERROR() << "RhoMatcher: 0 cannot be used as rho_label";
+ rho_label_ = kNoLabel;
+ error_ = true;
+ }
+
+ if (rewrite_mode == MATCHER_REWRITE_AUTO)
+ rewrite_both_ = fst.Properties(kAcceptor, true);
+ else if (rewrite_mode == MATCHER_REWRITE_ALWAYS)
+ rewrite_both_ = true;
+ else
+ rewrite_both_ = false;
+ }
+
+ RhoMatcher(const RhoMatcher<M> &matcher, bool safe = false)
+ : matcher_(new M(*matcher.matcher_, safe)),
+ match_type_(matcher.match_type_),
+ rho_label_(matcher.rho_label_),
+ rewrite_both_(matcher.rewrite_both_),
+ error_(matcher.error_) {}
+
+ virtual ~RhoMatcher() {
+ delete matcher_;
+ }
+
+ virtual RhoMatcher<M> *Copy(bool safe = false) const {
+ return new RhoMatcher<M>(*this, safe);
+ }
+
+ virtual MatchType Type(bool test) const { return matcher_->Type(test); }
+
+ void SetState(StateId s) {
+ matcher_->SetState(s);
+ has_rho_ = rho_label_ != kNoLabel;
+ }
+
+ bool Find(Label match_label) {
+ if (match_label == rho_label_ && rho_label_ != kNoLabel) {
+ FSTERROR() << "RhoMatcher::Find: bad label (rho)";
+ error_ = true;
+ return false;
+ }
+ if (matcher_->Find(match_label)) {
+ rho_match_ = kNoLabel;
+ return true;
+ } else if (has_rho_ && match_label != 0 && match_label != kNoLabel &&
+ (has_rho_ = matcher_->Find(rho_label_))) {
+ rho_match_ = match_label;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ bool Done() const { return matcher_->Done(); }
+
+ const Arc& Value() const {
+ if (rho_match_ == kNoLabel) {
+ return matcher_->Value();
+ } else {
+ rho_arc_ = matcher_->Value();
+ if (rewrite_both_) {
+ if (rho_arc_.ilabel == rho_label_)
+ rho_arc_.ilabel = rho_match_;
+ if (rho_arc_.olabel == rho_label_)
+ rho_arc_.olabel = rho_match_;
+ } else if (match_type_ == MATCH_INPUT) {
+ rho_arc_.ilabel = rho_match_;
+ } else {
+ rho_arc_.olabel = rho_match_;
+ }
+ return rho_arc_;
+ }
+ }
+
+ void Next() { matcher_->Next(); }
+
+ virtual const FST &GetFst() const { return matcher_->GetFst(); }
+
+ virtual uint64 Properties(uint64 props) const;
+
+ private:
+ virtual void SetState_(StateId s) { SetState(s); }
+ virtual bool Find_(Label label) { return Find(label); }
+ virtual bool Done_() const { return Done(); }
+ virtual const Arc& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+
+ M *matcher_;
+ MatchType match_type_; // Type of match requested
+ Label rho_label_; // Label that represents the rho transition
+ bool rewrite_both_; // Rewrite both sides when both are 'rho_label_'
+ bool has_rho_; // Are there possibly rhos at the current state?
+ Label rho_match_; // Current label that matches rho transition
+ mutable Arc rho_arc_; // Arc to return when rho match
+ bool error_; // Error encountered
+
+ void operator=(const RhoMatcher<M> &); // Disallow
+};
+
+template <class M> inline
+uint64 RhoMatcher<M>::Properties(uint64 inprops) const {
+ uint64 outprops = matcher_->Properties(inprops);
+ if (error_) outprops |= kError;
+
+ if (match_type_ == MATCH_NONE) {
+ return outprops;
+ } else if (match_type_ == MATCH_INPUT) {
+ if (rewrite_both_) {
+ return outprops & ~(kODeterministic | kNonODeterministic | kString |
+ kILabelSorted | kNotILabelSorted |
+ kOLabelSorted | kNotOLabelSorted);
+ } else {
+ return outprops & ~(kODeterministic | kAcceptor | kString |
+ kILabelSorted | kNotILabelSorted);
+ }
+ } else if (match_type_ == MATCH_OUTPUT) {
+ if (rewrite_both_) {
+ return outprops & ~(kIDeterministic | kNonIDeterministic | kString |
+ kILabelSorted | kNotILabelSorted |
+ kOLabelSorted | kNotOLabelSorted);
+ } else {
+ return outprops & ~(kIDeterministic | kAcceptor | kString |
+ kOLabelSorted | kNotOLabelSorted);
+ }
+ } else {
+ // Shouldn't ever get here.
+ FSTERROR() << "RhoMatcher:: bad match type: " << match_type_;
+ return 0;
+ }
+}
+
+
+// For any requested label, this matcher considers all transitions
+// that match the label 'sigma_label' (sigma = "any"), and this in
+// additions to transitions with the requested label. Each such sigma
+// transition found is returned with the sigma_label rewritten as the
+// requested label (both sides if an acceptor, or if 'rewrite_both' is
+// true and both input and output labels of the found transition are
+// 'sigma_label'). If 'sigma_label' is kNoLabel, this special
+// matching is not done. SigmaMatcher is templated itself on a
+// matcher, which is used to perform the underlying matching. By
+// default, the underlying matcher is constructed by SigmaMatcher.
+// The user can instead pass in this object; in that case,
+// SigmaMatcher takes its ownership.
+template <class M>
+class SigmaMatcher : public MatcherBase<typename M::Arc> {
+ public:
+ typedef typename M::FST FST;
+ typedef typename M::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ SigmaMatcher(const FST &fst,
+ MatchType match_type,
+ Label sigma_label = kNoLabel,
+ MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO,
+ M *matcher = 0)
+ : matcher_(matcher ? matcher : new M(fst, match_type)),
+ match_type_(match_type),
+ sigma_label_(sigma_label),
+ error_(false) {
+ if (match_type == MATCH_BOTH) {
+ FSTERROR() << "SigmaMatcher: bad match type";
+ match_type_ = MATCH_NONE;
+ error_ = true;
+ }
+ if (sigma_label == 0) {
+ FSTERROR() << "SigmaMatcher: 0 cannot be used as sigma_label";
+ sigma_label_ = kNoLabel;
+ error_ = true;
+ }
+
+ if (rewrite_mode == MATCHER_REWRITE_AUTO)
+ rewrite_both_ = fst.Properties(kAcceptor, true);
+ else if (rewrite_mode == MATCHER_REWRITE_ALWAYS)
+ rewrite_both_ = true;
+ else
+ rewrite_both_ = false;
+ }
+
+ SigmaMatcher(const SigmaMatcher<M> &matcher, bool safe = false)
+ : matcher_(new M(*matcher.matcher_, safe)),
+ match_type_(matcher.match_type_),
+ sigma_label_(matcher.sigma_label_),
+ rewrite_both_(matcher.rewrite_both_),
+ error_(matcher.error_) {}
+
+ virtual ~SigmaMatcher() {
+ delete matcher_;
+ }
+
+ virtual SigmaMatcher<M> *Copy(bool safe = false) const {
+ return new SigmaMatcher<M>(*this, safe);
+ }
+
+ virtual MatchType Type(bool test) const { return matcher_->Type(test); }
+
+ void SetState(StateId s) {
+ matcher_->SetState(s);
+ has_sigma_ =
+ sigma_label_ != kNoLabel ? matcher_->Find(sigma_label_) : false;
+ }
+
+ bool Find(Label match_label) {
+ match_label_ = match_label;
+ if (match_label == sigma_label_ && sigma_label_ != kNoLabel) {
+ FSTERROR() << "SigmaMatcher::Find: bad label (sigma)";
+ error_ = true;
+ return false;
+ }
+ if (matcher_->Find(match_label)) {
+ sigma_match_ = kNoLabel;
+ return true;
+ } else if (has_sigma_ && match_label != 0 && match_label != kNoLabel &&
+ matcher_->Find(sigma_label_)) {
+ sigma_match_ = match_label;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ bool Done() const {
+ return matcher_->Done();
+ }
+
+ const Arc& Value() const {
+ if (sigma_match_ == kNoLabel) {
+ return matcher_->Value();
+ } else {
+ sigma_arc_ = matcher_->Value();
+ if (rewrite_both_) {
+ if (sigma_arc_.ilabel == sigma_label_)
+ sigma_arc_.ilabel = sigma_match_;
+ if (sigma_arc_.olabel == sigma_label_)
+ sigma_arc_.olabel = sigma_match_;
+ } else if (match_type_ == MATCH_INPUT) {
+ sigma_arc_.ilabel = sigma_match_;
+ } else {
+ sigma_arc_.olabel = sigma_match_;
+ }
+ return sigma_arc_;
+ }
+ }
+
+ void Next() {
+ matcher_->Next();
+ if (matcher_->Done() && has_sigma_ && (sigma_match_ == kNoLabel) &&
+ (match_label_ > 0)) {
+ matcher_->Find(sigma_label_);
+ sigma_match_ = match_label_;
+ }
+ }
+
+ virtual const FST &GetFst() const { return matcher_->GetFst(); }
+
+ virtual uint64 Properties(uint64 props) const;
+
+private:
+ virtual void SetState_(StateId s) { SetState(s); }
+ virtual bool Find_(Label label) { return Find(label); }
+ virtual bool Done_() const { return Done(); }
+ virtual const Arc& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+
+ M *matcher_;
+ MatchType match_type_; // Type of match requested
+ Label sigma_label_; // Label that represents the sigma transition
+ bool rewrite_both_; // Rewrite both sides when both are 'sigma_label_'
+ bool has_sigma_; // Are there sigmas at the current state?
+ Label sigma_match_; // Current label that matches sigma transition
+ mutable Arc sigma_arc_; // Arc to return when sigma match
+ Label match_label_; // Label being matched
+ bool error_; // Error encountered
+
+ void operator=(const SigmaMatcher<M> &); // disallow
+};
+
+template <class M> inline
+uint64 SigmaMatcher<M>::Properties(uint64 inprops) const {
+ uint64 outprops = matcher_->Properties(inprops);
+ if (error_) outprops |= kError;
+
+ if (match_type_ == MATCH_NONE) {
+ return outprops;
+ } else if (rewrite_both_) {
+ return outprops & ~(kIDeterministic | kNonIDeterministic |
+ kODeterministic | kNonODeterministic |
+ kILabelSorted | kNotILabelSorted |
+ kOLabelSorted | kNotOLabelSorted |
+ kString);
+ } else if (match_type_ == MATCH_INPUT) {
+ return outprops & ~(kIDeterministic | kNonIDeterministic |
+ kODeterministic | kNonODeterministic |
+ kILabelSorted | kNotILabelSorted |
+ kString | kAcceptor);
+ } else if (match_type_ == MATCH_OUTPUT) {
+ return outprops & ~(kIDeterministic | kNonIDeterministic |
+ kODeterministic | kNonODeterministic |
+ kOLabelSorted | kNotOLabelSorted |
+ kString | kAcceptor);
+ } else {
+ // Shouldn't ever get here.
+ FSTERROR() << "SigmaMatcher:: bad match type: " << match_type_;
+ return 0;
+ }
+}
+
+
+// For any requested label that doesn't match at a state, this matcher
+// considers the *unique* transition that matches the label 'phi_label'
+// (phi = 'fail'), and recursively looks for a match at its
+// destination. When 'phi_loop' is true, if no match is found but a
+// phi self-loop is found, then the phi transition found is returned
+// with the phi_label rewritten as the requested label (both sides if
+// an acceptor, or if 'rewrite_both' is true and both input and output
+// labels of the found transition are 'phi_label'). If 'phi_label' is
+// kNoLabel, this special matching is not done. PhiMatcher is
+// templated itself on a matcher, which is used to perform the
+// underlying matching. By default, the underlying matcher is
+// constructed by PhiMatcher. The user can instead pass in this
+// object; in that case, PhiMatcher takes its ownership.
+// Warning: phi non-determinism not supported (for simplicity).
+template <class M>
+class PhiMatcher : public MatcherBase<typename M::Arc> {
+ public:
+ typedef typename M::FST FST;
+ typedef typename M::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ PhiMatcher(const FST &fst,
+ MatchType match_type,
+ Label phi_label = kNoLabel,
+ bool phi_loop = true,
+ MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO,
+ M *matcher = 0)
+ : matcher_(matcher ? matcher : new M(fst, match_type)),
+ match_type_(match_type),
+ phi_label_(phi_label),
+ state_(kNoStateId),
+ phi_loop_(phi_loop),
+ error_(false) {
+ if (match_type == MATCH_BOTH) {
+ FSTERROR() << "PhiMatcher: bad match type";
+ match_type_ = MATCH_NONE;
+ error_ = true;
+ }
+ if (phi_label == 0) {
+ FSTERROR() << "PhiMatcher: 0 cannot be used as phi_label";
+ phi_label_ = kNoLabel;
+ error_ = true;
+ }
+
+ if (rewrite_mode == MATCHER_REWRITE_AUTO)
+ rewrite_both_ = fst.Properties(kAcceptor, true);
+ else if (rewrite_mode == MATCHER_REWRITE_ALWAYS)
+ rewrite_both_ = true;
+ else
+ rewrite_both_ = false;
+ }
+
+ PhiMatcher(const PhiMatcher<M> &matcher, bool safe = false)
+ : matcher_(new M(*matcher.matcher_, safe)),
+ match_type_(matcher.match_type_),
+ phi_label_(matcher.phi_label_),
+ rewrite_both_(matcher.rewrite_both_),
+ state_(kNoStateId),
+ phi_loop_(matcher.phi_loop_),
+ error_(matcher.error_) {}
+
+ virtual ~PhiMatcher() {
+ delete matcher_;
+ }
+
+ virtual PhiMatcher<M> *Copy(bool safe = false) const {
+ return new PhiMatcher<M>(*this, safe);
+ }
+
+ virtual MatchType Type(bool test) const { return matcher_->Type(test); }
+
+ void SetState(StateId s) {
+ matcher_->SetState(s);
+ state_ = s;
+ has_phi_ = phi_label_ != kNoLabel;
+ }
+
+ bool Find(Label match_label);
+
+ bool Done() const { return matcher_->Done(); }
+
+ const Arc& Value() const {
+ if ((phi_match_ == kNoLabel) && (phi_weight_ == Weight::One())) {
+ return matcher_->Value();
+ } else {
+ phi_arc_ = matcher_->Value();
+ phi_arc_.weight = Times(phi_weight_, phi_arc_.weight);
+ if (phi_match_ != kNoLabel) {
+ if (rewrite_both_) {
+ if (phi_arc_.ilabel == phi_label_)
+ phi_arc_.ilabel = phi_match_;
+ if (phi_arc_.olabel == phi_label_)
+ phi_arc_.olabel = phi_match_;
+ } else if (match_type_ == MATCH_INPUT) {
+ phi_arc_.ilabel = phi_match_;
+ } else {
+ phi_arc_.olabel = phi_match_;
+ }
+ }
+ return phi_arc_;
+ }
+ }
+
+ void Next() { matcher_->Next(); }
+
+ virtual const FST &GetFst() const { return matcher_->GetFst(); }
+
+ virtual uint64 Properties(uint64 props) const;
+
+private:
+ virtual void SetState_(StateId s) { SetState(s); }
+ virtual bool Find_(Label label) { return Find(label); }
+ virtual bool Done_() const { return Done(); }
+ virtual const Arc& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+
+ M *matcher_;
+ MatchType match_type_; // Type of match requested
+ Label phi_label_; // Label that represents the phi transition
+ bool rewrite_both_; // Rewrite both sides when both are 'phi_label_'
+ bool has_phi_; // Are there possibly phis at the current state?
+ Label phi_match_; // Current label that matches phi loop
+ mutable Arc phi_arc_; // Arc to return
+ StateId state_; // State where looking for matches
+ Weight phi_weight_; // Product of the weights of phi transitions taken
+ bool phi_loop_; // When true, phi self-loop are allowed and treated
+ // as rho (required for Aho-Corasick)
+ bool error_; // Error encountered
+
+ void operator=(const PhiMatcher<M> &); // disallow
+};
+
+template <class M> inline
+bool PhiMatcher<M>::Find(Label match_label) {
+ if (match_label == phi_label_ && phi_label_ != kNoLabel) {
+ FSTERROR() << "PhiMatcher::Find: bad label (phi)";
+ error_ = true;
+ return false;
+ }
+ matcher_->SetState(state_);
+ phi_match_ = kNoLabel;
+ phi_weight_ = Weight::One();
+ if (!has_phi_ || match_label == 0 || match_label == kNoLabel)
+ return matcher_->Find(match_label);
+ StateId state = state_;
+ while (!matcher_->Find(match_label)) {
+ if (!matcher_->Find(phi_label_))
+ return false;
+ if (phi_loop_ && matcher_->Value().nextstate == state) {
+ phi_match_ = match_label;
+ return true;
+ }
+ phi_weight_ = Times(phi_weight_, matcher_->Value().weight);
+ state = matcher_->Value().nextstate;
+ matcher_->Next();
+ if (!matcher_->Done()) {
+ FSTERROR() << "PhiMatcher: phi non-determinism not supported";
+ error_ = true;
+ }
+ matcher_->SetState(state);
+ }
+ return true;
+}
+
+template <class M> inline
+uint64 PhiMatcher<M>::Properties(uint64 inprops) const {
+ uint64 outprops = matcher_->Properties(inprops);
+ if (error_) outprops |= kError;
+
+ if (match_type_ == MATCH_NONE) {
+ return outprops;
+ } else if (match_type_ == MATCH_INPUT) {
+ if (rewrite_both_) {
+ return outprops & ~(kODeterministic | kNonODeterministic | kString |
+ kILabelSorted | kNotILabelSorted |
+ kOLabelSorted | kNotOLabelSorted);
+ } else {
+ return outprops & ~(kODeterministic | kAcceptor | kString |
+ kILabelSorted | kNotILabelSorted |
+ kOLabelSorted | kNotOLabelSorted);
+ }
+ } else if (match_type_ == MATCH_OUTPUT) {
+ if (rewrite_both_) {
+ return outprops & ~(kIDeterministic | kNonIDeterministic | kString |
+ kILabelSorted | kNotILabelSorted |
+ kOLabelSorted | kNotOLabelSorted);
+ } else {
+ return outprops & ~(kIDeterministic | kAcceptor | kString |
+ kILabelSorted | kNotILabelSorted |
+ kOLabelSorted | kNotOLabelSorted);
+ }
+ } else {
+ // Shouldn't ever get here.
+ FSTERROR() << "PhiMatcher:: bad match type: " << match_type_;
+ return 0;
+ }
+}
+
+
+//
+// MULTI-EPS MATCHER FLAGS
+//
+
+// Return multi-epsilon arcs for Find(kNoLabel).
+const uint32 kMultiEpsList = 0x00000001;
+
+// Return a kNolabel loop for Find(multi_eps).
+const uint32 kMultiEpsLoop = 0x00000002;
+
+// MultiEpsMatcher: allows treating multiple non-0 labels as
+// non-consuming labels in addition to 0 that is always
+// non-consuming. Precise behavior controlled by 'flags' argument. By
+// default, the underlying matcher is constructed by
+// MultiEpsMatcher. The user can instead pass in this object; in that
+// case, MultiEpsMatcher takes its ownership iff 'own_matcher' is
+// true.
+template <class M>
+class MultiEpsMatcher {
+ public:
+ typedef typename M::FST FST;
+ typedef typename M::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ MultiEpsMatcher(const FST &fst, MatchType match_type,
+ uint32 flags = (kMultiEpsLoop | kMultiEpsList),
+ M *matcher = 0, bool own_matcher = true)
+ : matcher_(matcher ? matcher : new M(fst, match_type)),
+ flags_(flags),
+ own_matcher_(matcher ? own_matcher : true) {
+ if (match_type == MATCH_INPUT) {
+ loop_.ilabel = kNoLabel;
+ loop_.olabel = 0;
+ } else {
+ loop_.ilabel = 0;
+ loop_.olabel = kNoLabel;
+ }
+ loop_.weight = Weight::One();
+ loop_.nextstate = kNoStateId;
+ }
+
+ MultiEpsMatcher(const MultiEpsMatcher<M> &matcher, bool safe = false)
+ : matcher_(new M(*matcher.matcher_, safe)),
+ flags_(matcher.flags_),
+ own_matcher_(true),
+ multi_eps_labels_(matcher.multi_eps_labels_),
+ loop_(matcher.loop_) {
+ loop_.nextstate = kNoStateId;
+ }
+
+ ~MultiEpsMatcher() {
+ if (own_matcher_)
+ delete matcher_;
+ }
+
+ MultiEpsMatcher<M> *Copy(bool safe = false) const {
+ return new MultiEpsMatcher<M>(*this, safe);
+ }
+
+ MatchType Type(bool test) const { return matcher_->Type(test); }
+
+ void SetState(StateId s) {
+ matcher_->SetState(s);
+ loop_.nextstate = s;
+ }
+
+ bool Find(Label match_label);
+
+ bool Done() const {
+ return done_;
+ }
+
+ const Arc& Value() const {
+ return current_loop_ ? loop_ : matcher_->Value();
+ }
+
+ void Next() {
+ if (!current_loop_) {
+ matcher_->Next();
+ done_ = matcher_->Done();
+ if (done_ && multi_eps_iter_ != multi_eps_labels_.End()) {
+ ++multi_eps_iter_;
+ while ((multi_eps_iter_ != multi_eps_labels_.End()) &&
+ !matcher_->Find(*multi_eps_iter_))
+ ++multi_eps_iter_;
+ if (multi_eps_iter_ != multi_eps_labels_.End())
+ done_ = false;
+ else
+ done_ = !matcher_->Find(kNoLabel);
+
+ }
+ } else {
+ done_ = true;
+ }
+ }
+
+ const FST &GetFst() const { return matcher_->GetFst(); }
+
+ uint64 Properties(uint64 props) const { return matcher_->Properties(props); }
+
+ uint32 Flags() const { return matcher_->Flags(); }
+
+ void AddMultiEpsLabel(Label label) {
+ if (label == 0) {
+ FSTERROR() << "MultiEpsMatcher: Bad multi-eps label: 0";
+ } else {
+ multi_eps_labels_.Insert(label);
+ }
+ }
+
+ void ClearMultiEpsLabels() {
+ multi_eps_labels_.Clear();
+ }
+
+private:
+ // Specialized for 'set' - log lookup
+ bool IsMultiEps(const set<Label> &multi_eps_labels, Label label) const {
+ return multi_eps_labels.Find(label) != multi_eps_labels.end();
+ }
+
+ M *matcher_;
+ uint32 flags_;
+ bool own_matcher_; // Does this class delete the matcher?
+
+ // Multi-eps label set
+ CompactSet<Label, kNoLabel> multi_eps_labels_;
+ typename CompactSet<Label, kNoLabel>::const_iterator multi_eps_iter_;
+
+ bool current_loop_; // Current arc is the implicit loop
+ mutable Arc loop_; // For non-consuming symbols
+ bool done_; // Matching done
+
+ void operator=(const MultiEpsMatcher<M> &); // Disallow
+};
+
+template <class M> inline
+bool MultiEpsMatcher<M>::Find(Label match_label) {
+ multi_eps_iter_ = multi_eps_labels_.End();
+ current_loop_ = false;
+ bool ret;
+ if (match_label == 0) {
+ ret = matcher_->Find(0);
+ } else if (match_label == kNoLabel) {
+ if (flags_ & kMultiEpsList) {
+ // return all non-consuming arcs (incl. epsilon)
+ multi_eps_iter_ = multi_eps_labels_.Begin();
+ while ((multi_eps_iter_ != multi_eps_labels_.End()) &&
+ !matcher_->Find(*multi_eps_iter_))
+ ++multi_eps_iter_;
+ if (multi_eps_iter_ != multi_eps_labels_.End())
+ ret = true;
+ else
+ ret = matcher_->Find(kNoLabel);
+ } else {
+ // return all epsilon arcs
+ ret = matcher_->Find(kNoLabel);
+ }
+ } else if ((flags_ & kMultiEpsLoop) &&
+ multi_eps_labels_.Find(match_label) != multi_eps_labels_.End()) {
+ // return 'implicit' loop
+ current_loop_ = true;
+ ret = true;
+ } else {
+ ret = matcher_->Find(match_label);
+ }
+ done_ = !ret;
+ return ret;
+}
+
+
+// Generic matcher, templated on the FST definition
+// - a wrapper around pointer to specific one.
+// Here is a typical use: \code
+// Matcher<StdFst> matcher(fst, MATCH_INPUT);
+// matcher.SetState(state);
+// if (matcher.Find(label))
+// for (; !matcher.Done(); matcher.Next()) {
+// StdArc &arc = matcher.Value();
+// ...
+// } \endcode
+template <class F>
+class Matcher {
+ public:
+ typedef F FST;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ Matcher(const F &fst, MatchType match_type) {
+ base_ = fst.InitMatcher(match_type);
+ if (!base_)
+ base_ = new SortedMatcher<F>(fst, match_type);
+ }
+
+ Matcher(const Matcher<F> &matcher, bool safe = false) {
+ base_ = matcher.base_->Copy(safe);
+ }
+
+ // Takes ownership of the provided matcher
+ Matcher(MatcherBase<Arc>* base_matcher) { base_ = base_matcher; }
+
+ ~Matcher() { delete base_; }
+
+ Matcher<F> *Copy(bool safe = false) const {
+ return new Matcher<F>(*this, safe);
+ }
+
+ MatchType Type(bool test) const { return base_->Type(test); }
+ void SetState(StateId s) { base_->SetState(s); }
+ bool Find(Label label) { return base_->Find(label); }
+ bool Done() const { return base_->Done(); }
+ const Arc& Value() const { return base_->Value(); }
+ void Next() { base_->Next(); }
+ const F &GetFst() const { return static_cast<const F &>(base_->GetFst()); }
+ uint64 Properties(uint64 props) const { return base_->Properties(props); }
+ uint32 Flags() const { return base_->Flags() & kMatcherFlags; }
+
+ private:
+ MatcherBase<Arc> *base_;
+
+ void operator=(const Matcher<Arc> &); // disallow
+};
+
+} // namespace fst
+
+
+
+#endif // FST_LIB_MATCHER_H__
diff --git a/src/include/fst/minimize.h b/src/include/fst/minimize.h
new file mode 100644
index 0000000..3fbe3ba
--- /dev/null
+++ b/src/include/fst/minimize.h
@@ -0,0 +1,584 @@
+// minimize.h
+// minimize.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: johans@google.com (Johan Schalkwyk)
+//
+// \file Functions and classes to minimize a finite state acceptor
+//
+
+#ifndef FST_LIB_MINIMIZE_H__
+#define FST_LIB_MINIMIZE_H__
+
+#include <cmath>
+
+#include <algorithm>
+#include <map>
+#include <queue>
+#include <vector>
+using std::vector;
+
+#include <fst/arcsort.h>
+#include <fst/connect.h>
+#include <fst/dfs-visit.h>
+#include <fst/encode.h>
+#include <fst/factor-weight.h>
+#include <fst/fst.h>
+#include <fst/mutable-fst.h>
+#include <fst/partition.h>
+#include <fst/push.h>
+#include <fst/queue.h>
+#include <fst/reverse.h>
+#include <fst/state-map.h>
+
+
+namespace fst {
+
+// comparator for creating partition based on sorting on
+// - states
+// - final weight
+// - out degree,
+// - (input label, output label, weight, destination_block)
+template <class A>
+class StateComparator {
+ public:
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ static const uint32 kCompareFinal = 0x00000001;
+ static const uint32 kCompareOutDegree = 0x00000002;
+ static const uint32 kCompareArcs = 0x00000004;
+ static const uint32 kCompareAll = 0x00000007;
+
+ StateComparator(const Fst<A>& fst,
+ const Partition<typename A::StateId>& partition,
+ uint32 flags = kCompareAll)
+ : fst_(fst), partition_(partition), flags_(flags) {}
+
+ // compare state x with state y based on sort criteria
+ bool operator()(const StateId x, const StateId y) const {
+ // check for final state equivalence
+ if (flags_ & kCompareFinal) {
+ const size_t xfinal = fst_.Final(x).Hash();
+ const size_t yfinal = fst_.Final(y).Hash();
+ if (xfinal < yfinal) return true;
+ else if (xfinal > yfinal) return false;
+ }
+
+ if (flags_ & kCompareOutDegree) {
+ // check for # arcs
+ if (fst_.NumArcs(x) < fst_.NumArcs(y)) return true;
+ if (fst_.NumArcs(x) > fst_.NumArcs(y)) return false;
+
+ if (flags_ & kCompareArcs) {
+ // # arcs are equal, check for arc match
+ for (ArcIterator<Fst<A> > aiter1(fst_, x), aiter2(fst_, y);
+ !aiter1.Done() && !aiter2.Done(); aiter1.Next(), aiter2.Next()) {
+ const A& arc1 = aiter1.Value();
+ const A& arc2 = aiter2.Value();
+ if (arc1.ilabel < arc2.ilabel) return true;
+ if (arc1.ilabel > arc2.ilabel) return false;
+
+ if (partition_.class_id(arc1.nextstate) <
+ partition_.class_id(arc2.nextstate)) return true;
+ if (partition_.class_id(arc1.nextstate) >
+ partition_.class_id(arc2.nextstate)) return false;
+ }
+ }
+ }
+
+ return false;
+ }
+
+ private:
+ const Fst<A>& fst_;
+ const Partition<typename A::StateId>& partition_;
+ const uint32 flags_;
+};
+
+template <class A> const uint32 StateComparator<A>::kCompareFinal;
+template <class A> const uint32 StateComparator<A>::kCompareOutDegree;
+template <class A> const uint32 StateComparator<A>::kCompareArcs;
+template <class A> const uint32 StateComparator<A>::kCompareAll;
+
+
+// Computes equivalence classes for cyclic Fsts. For cyclic minimization
+// we use the classic HopCroft minimization algorithm, which is of
+//
+// O(E)log(N),
+//
+// where E is the number of edges in the machine and N is number of states.
+//
+// The following paper describes the original algorithm
+// An N Log N algorithm for minimizing states in a finite automaton
+// by John HopCroft, January 1971
+//
+template <class A, class Queue>
+class CyclicMinimizer {
+ public:
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::StateId ClassId;
+ typedef typename A::Weight Weight;
+ typedef ReverseArc<A> RevA;
+
+ CyclicMinimizer(const ExpandedFst<A>& fst) {
+ Initialize(fst);
+ Compute(fst);
+ }
+
+ ~CyclicMinimizer() {
+ delete aiter_queue_;
+ }
+
+ const Partition<StateId>& partition() const {
+ return P_;
+ }
+
+ // helper classes
+ private:
+ typedef ArcIterator<Fst<RevA> > ArcIter;
+ class ArcIterCompare {
+ public:
+ ArcIterCompare(const Partition<StateId>& partition)
+ : partition_(partition) {}
+
+ ArcIterCompare(const ArcIterCompare& comp)
+ : partition_(comp.partition_) {}
+
+ // compare two iterators based on there input labels, and proto state
+ // (partition class Ids)
+ bool operator()(const ArcIter* x, const ArcIter* y) const {
+ const RevA& xarc = x->Value();
+ const RevA& yarc = y->Value();
+ return (xarc.ilabel > yarc.ilabel);
+ }
+
+ private:
+ const Partition<StateId>& partition_;
+ };
+
+ typedef priority_queue<ArcIter*, vector<ArcIter*>, ArcIterCompare>
+ ArcIterQueue;
+
+ // helper methods
+ private:
+ // prepartitions the space into equivalence classes with
+ // same final weight
+ // same # arcs per state
+ // same outgoing arcs
+ void PrePartition(const Fst<A>& fst) {
+ VLOG(5) << "PrePartition";
+
+ typedef map<StateId, StateId, StateComparator<A> > EquivalenceMap;
+ StateComparator<A> comp(fst, P_, StateComparator<A>::kCompareFinal);
+ EquivalenceMap equiv_map(comp);
+
+ StateIterator<Fst<A> > siter(fst);
+ StateId class_id = P_.AddClass();
+ P_.Add(siter.Value(), class_id);
+ equiv_map[siter.Value()] = class_id;
+ L_.Enqueue(class_id);
+ for (siter.Next(); !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ typename EquivalenceMap::const_iterator it = equiv_map.find(s);
+ if (it == equiv_map.end()) {
+ class_id = P_.AddClass();
+ P_.Add(s, class_id);
+ equiv_map[s] = class_id;
+ L_.Enqueue(class_id);
+ } else {
+ P_.Add(s, it->second);
+ equiv_map[s] = it->second;
+ }
+ }
+
+ VLOG(5) << "Initial Partition: " << P_.num_classes();
+ }
+
+ // - Create inverse transition Tr_ = rev(fst)
+ // - loop over states in fst and split on final, creating two blocks
+ // in the partition corresponding to final, non-final
+ void Initialize(const Fst<A>& fst) {
+ // construct Tr
+ Reverse(fst, &Tr_);
+ ILabelCompare<RevA> ilabel_comp;
+ ArcSort(&Tr_, ilabel_comp);
+
+ // initial split (F, S - F)
+ P_.Initialize(Tr_.NumStates() - 1);
+
+ // prep partition
+ PrePartition(fst);
+
+ // allocate arc iterator queue
+ ArcIterCompare comp(P_);
+ aiter_queue_ = new ArcIterQueue(comp);
+ }
+
+ // partition all classes with destination C
+ void Split(ClassId C) {
+ // Prep priority queue. Open arc iterator for each state in C, and
+ // insert into priority queue.
+ for (PartitionIterator<StateId> siter(P_, C);
+ !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ if (Tr_.NumArcs(s + 1))
+ aiter_queue_->push(new ArcIterator<Fst<RevA> >(Tr_, s + 1));
+ }
+
+ // Now pop arc iterator from queue, split entering equivalence class
+ // re-insert updated iterator into queue.
+ Label prev_label = -1;
+ while (!aiter_queue_->empty()) {
+ ArcIterator<Fst<RevA> >* aiter = aiter_queue_->top();
+ aiter_queue_->pop();
+ if (aiter->Done()) {
+ delete aiter;
+ continue;
+ }
+
+ const RevA& arc = aiter->Value();
+ StateId from_state = aiter->Value().nextstate - 1;
+ Label from_label = arc.ilabel;
+ if (prev_label != from_label)
+ P_.FinalizeSplit(&L_);
+
+ StateId from_class = P_.class_id(from_state);
+ if (P_.class_size(from_class) > 1)
+ P_.SplitOn(from_state);
+
+ prev_label = from_label;
+ aiter->Next();
+ if (aiter->Done())
+ delete aiter;
+ else
+ aiter_queue_->push(aiter);
+ }
+ P_.FinalizeSplit(&L_);
+ }
+
+ // Main loop for hopcroft minimization.
+ void Compute(const Fst<A>& fst) {
+ // process active classes (FIFO, or FILO)
+ while (!L_.Empty()) {
+ ClassId C = L_.Head();
+ L_.Dequeue();
+
+ // split on C, all labels in C
+ Split(C);
+ }
+ }
+
+ // helper data
+ private:
+ // Partioning of states into equivalence classes
+ Partition<StateId> P_;
+
+ // L = set of active classes to be processed in partition P
+ Queue L_;
+
+ // reverse transition function
+ VectorFst<RevA> Tr_;
+
+ // Priority queue of open arc iterators for all states in the 'splitter'
+ // equivalence class
+ ArcIterQueue* aiter_queue_;
+};
+
+
+// Computes equivalence classes for acyclic Fsts. The implementation details
+// for this algorithms is documented by the following paper.
+//
+// Minimization of acyclic deterministic automata in linear time
+// Dominque Revuz
+//
+// Complexity O(|E|)
+//
+template <class A>
+class AcyclicMinimizer {
+ public:
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::StateId ClassId;
+ typedef typename A::Weight Weight;
+
+ AcyclicMinimizer(const ExpandedFst<A>& fst) {
+ Initialize(fst);
+ Refine(fst);
+ }
+
+ const Partition<StateId>& partition() {
+ return partition_;
+ }
+
+ // helper classes
+ private:
+ // DFS visitor to compute the height (distance) to final state.
+ class HeightVisitor {
+ public:
+ HeightVisitor() : max_height_(0), num_states_(0) { }
+
+ // invoked before dfs visit
+ void InitVisit(const Fst<A>& fst) {}
+
+ // invoked when state is discovered (2nd arg is DFS tree root)
+ bool InitState(StateId s, StateId root) {
+ // extend height array and initialize height (distance) to 0
+ for (size_t i = height_.size(); i <= s; ++i)
+ height_.push_back(-1);
+
+ if (s >= num_states_) num_states_ = s + 1;
+ return true;
+ }
+
+ // invoked when tree arc examined (to undiscoverted state)
+ bool TreeArc(StateId s, const A& arc) {
+ return true;
+ }
+
+ // invoked when back arc examined (to unfinished state)
+ bool BackArc(StateId s, const A& arc) {
+ return true;
+ }
+
+ // invoked when forward or cross arc examined (to finished state)
+ bool ForwardOrCrossArc(StateId s, const A& arc) {
+ if (height_[arc.nextstate] + 1 > height_[s])
+ height_[s] = height_[arc.nextstate] + 1;
+ return true;
+ }
+
+ // invoked when state finished (parent is kNoStateId for tree root)
+ void FinishState(StateId s, StateId parent, const A* parent_arc) {
+ if (height_[s] == -1) height_[s] = 0;
+ StateId h = height_[s] + 1;
+ if (parent >= 0) {
+ if (h > height_[parent]) height_[parent] = h;
+ if (h > max_height_) max_height_ = h;
+ }
+ }
+
+ // invoked after DFS visit
+ void FinishVisit() {}
+
+ size_t max_height() const { return max_height_; }
+
+ const vector<StateId>& height() const { return height_; }
+
+ const size_t num_states() const { return num_states_; }
+
+ private:
+ vector<StateId> height_;
+ size_t max_height_;
+ size_t num_states_;
+ };
+
+ // helper methods
+ private:
+ // cluster states according to height (distance to final state)
+ void Initialize(const Fst<A>& fst) {
+ // compute height (distance to final state)
+ HeightVisitor hvisitor;
+ DfsVisit(fst, &hvisitor);
+
+ // create initial partition based on height
+ partition_.Initialize(hvisitor.num_states());
+ partition_.AllocateClasses(hvisitor.max_height() + 1);
+ const vector<StateId>& hstates = hvisitor.height();
+ for (size_t s = 0; s < hstates.size(); ++s)
+ partition_.Add(s, hstates[s]);
+ }
+
+ // refine states based on arc sort (out degree, arc equivalence)
+ void Refine(const Fst<A>& fst) {
+ typedef map<StateId, StateId, StateComparator<A> > EquivalenceMap;
+ StateComparator<A> comp(fst, partition_);
+
+ // start with tail (height = 0)
+ size_t height = partition_.num_classes();
+ for (size_t h = 0; h < height; ++h) {
+ EquivalenceMap equiv_classes(comp);
+
+ // sort states within equivalence class
+ PartitionIterator<StateId> siter(partition_, h);
+ equiv_classes[siter.Value()] = h;
+ for (siter.Next(); !siter.Done(); siter.Next()) {
+ const StateId s = siter.Value();
+ typename EquivalenceMap::const_iterator it = equiv_classes.find(s);
+ if (it == equiv_classes.end())
+ equiv_classes[s] = partition_.AddClass();
+ else
+ equiv_classes[s] = it->second;
+ }
+
+ // create refined partition
+ for (siter.Reset(); !siter.Done();) {
+ const StateId s = siter.Value();
+ const StateId old_class = partition_.class_id(s);
+ const StateId new_class = equiv_classes[s];
+
+ // a move operation can invalidate the iterator, so
+ // we first update the iterator to the next element
+ // before we move the current element out of the list
+ siter.Next();
+ if (old_class != new_class)
+ partition_.Move(s, new_class);
+ }
+ }
+ }
+
+ private:
+ Partition<StateId> partition_;
+};
+
+
+// Given a partition and a mutable fst, merge states of Fst inplace
+// (i.e. destructively). Merging works by taking the first state in
+// a class of the partition to be the representative state for the class.
+// Each arc is then reconnected to this state. All states in the class
+// are merged by adding there arcs to the representative state.
+template <class A>
+void MergeStates(
+ const Partition<typename A::StateId>& partition, MutableFst<A>* fst) {
+ typedef typename A::StateId StateId;
+
+ vector<StateId> state_map(partition.num_classes());
+ for (size_t i = 0; i < partition.num_classes(); ++i) {
+ PartitionIterator<StateId> siter(partition, i);
+ state_map[i] = siter.Value(); // first state in partition;
+ }
+
+ // relabel destination states
+ for (size_t c = 0; c < partition.num_classes(); ++c) {
+ for (PartitionIterator<StateId> siter(partition, c);
+ !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ for (MutableArcIterator<MutableFst<A> > aiter(fst, s);
+ !aiter.Done(); aiter.Next()) {
+ A arc = aiter.Value();
+ arc.nextstate = state_map[partition.class_id(arc.nextstate)];
+
+ if (s == state_map[c]) // first state just set destination
+ aiter.SetValue(arc);
+ else
+ fst->AddArc(state_map[c], arc);
+ }
+ }
+ }
+ fst->SetStart(state_map[partition.class_id(fst->Start())]);
+
+ Connect(fst);
+}
+
+template <class A>
+void AcceptorMinimize(MutableFst<A>* fst) {
+ typedef typename A::StateId StateId;
+ if (!(fst->Properties(kAcceptor | kUnweighted, true))) {
+ FSTERROR() << "FST is not an unweighted acceptor";
+ fst->SetProperties(kError, kError);
+ return;
+ }
+
+ // connect fst before minimization, handles disconnected states
+ Connect(fst);
+ if (fst->NumStates() == 0) return;
+
+ if (fst->Properties(kAcyclic, true)) {
+ // Acyclic minimization (revuz)
+ VLOG(2) << "Acyclic Minimization";
+ ArcSort(fst, ILabelCompare<A>());
+ AcyclicMinimizer<A> minimizer(*fst);
+ MergeStates(minimizer.partition(), fst);
+
+ } else {
+ // Cyclic minimizaton (hopcroft)
+ VLOG(2) << "Cyclic Minimization";
+ CyclicMinimizer<A, LifoQueue<StateId> > minimizer(*fst);
+ MergeStates(minimizer.partition(), fst);
+ }
+
+ // Merge in appropriate semiring
+ ArcUniqueMapper<A> mapper(*fst);
+ StateMap(fst, mapper);
+}
+
+
+// In place minimization of deterministic weighted automata and transducers.
+// For transducers, then the 'sfst' argument is not null, the algorithm
+// produces a compact factorization of the minimal transducer.
+//
+// In the acyclic case, we use an algorithm from Dominique Revuz that
+// is linear in the number of arcs (edges) in the machine.
+// Complexity = O(E)
+//
+// In the cyclic case, we use the classical hopcroft minimization.
+// Complexity = O(|E|log(|N|)
+//
+template <class A>
+void Minimize(MutableFst<A>* fst,
+ MutableFst<A>* sfst = 0,
+ float delta = kDelta) {
+ uint64 props = fst->Properties(kAcceptor | kIDeterministic|
+ kWeighted | kUnweighted, true);
+ if (!(props & kIDeterministic)) {
+ FSTERROR() << "FST is not deterministic";
+ fst->SetProperties(kError, kError);
+ return;
+ }
+
+ if (!(props & kAcceptor)) { // weighted transducer
+ VectorFst< GallicArc<A, STRING_LEFT> > gfst;
+ ArcMap(*fst, &gfst, ToGallicMapper<A, STRING_LEFT>());
+ fst->DeleteStates();
+ gfst.SetProperties(kAcceptor, kAcceptor);
+ Push(&gfst, REWEIGHT_TO_INITIAL, delta);
+ ArcMap(&gfst, QuantizeMapper< GallicArc<A, STRING_LEFT> >(delta));
+ EncodeMapper< GallicArc<A, STRING_LEFT> >
+ encoder(kEncodeLabels | kEncodeWeights, ENCODE);
+ Encode(&gfst, &encoder);
+ AcceptorMinimize(&gfst);
+ Decode(&gfst, encoder);
+
+ if (sfst == 0) {
+ FactorWeightFst< GallicArc<A, STRING_LEFT>,
+ GallicFactor<typename A::Label,
+ typename A::Weight, STRING_LEFT> > fwfst(gfst);
+ SymbolTable *osyms = fst->OutputSymbols() ?
+ fst->OutputSymbols()->Copy() : 0;
+ ArcMap(fwfst, fst, FromGallicMapper<A, STRING_LEFT>());
+ fst->SetOutputSymbols(osyms);
+ delete osyms;
+ } else {
+ sfst->SetOutputSymbols(fst->OutputSymbols());
+ GallicToNewSymbolsMapper<A, STRING_LEFT> mapper(sfst);
+ ArcMap(gfst, fst, &mapper);
+ fst->SetOutputSymbols(sfst->InputSymbols());
+ }
+ } else if (props & kWeighted) { // weighted acceptor
+ Push(fst, REWEIGHT_TO_INITIAL, delta);
+ ArcMap(fst, QuantizeMapper<A>(delta));
+ EncodeMapper<A> encoder(kEncodeLabels | kEncodeWeights, ENCODE);
+ Encode(fst, &encoder);
+ AcceptorMinimize(fst);
+ Decode(fst, encoder);
+ } else { // unweighted acceptor
+ AcceptorMinimize(fst);
+ }
+}
+
+} // namespace fst
+
+#endif // FST_LIB_MINIMIZE_H__
diff --git a/src/include/fst/mutable-fst.h b/src/include/fst/mutable-fst.h
new file mode 100644
index 0000000..9afcab3
--- /dev/null
+++ b/src/include/fst/mutable-fst.h
@@ -0,0 +1,378 @@
+// mutable-fst.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Expanded FST augmented with mutators - interface class definition
+// and mutable arc iterator interface.
+//
+
+#ifndef FST_LIB_MUTABLE_FST_H__
+#define FST_LIB_MUTABLE_FST_H__
+
+#include <stddef.h>
+#include <sys/types.h>
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/expanded-fst.h>
+
+
+namespace fst {
+
+template <class A> class MutableArcIteratorData;
+
+// An expanded FST plus mutators (use MutableArcIterator to modify arcs).
+template <class A>
+class MutableFst : public ExpandedFst<A> {
+ public:
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ virtual MutableFst<A> &operator=(const Fst<A> &fst) = 0;
+
+ MutableFst<A> &operator=(const MutableFst<A> &fst) {
+ return operator=(static_cast<const Fst<A> &>(fst));
+ }
+
+ virtual void SetStart(StateId) = 0; // Set the initial state
+ virtual void SetFinal(StateId, Weight) = 0; // Set a state's final weight
+ virtual void SetProperties(uint64 props,
+ uint64 mask) = 0; // Set property bits wrt mask
+
+ virtual StateId AddState() = 0; // Add a state, return its ID
+ virtual void AddArc(StateId, const A &arc) = 0; // Add an arc to state
+
+ virtual void DeleteStates(const vector<StateId>&) = 0; // Delete some states
+ virtual void DeleteStates() = 0; // Delete all states
+ virtual void DeleteArcs(StateId, size_t n) = 0; // Delete some arcs at state
+ virtual void DeleteArcs(StateId) = 0; // Delete all arcs at state
+
+ virtual void ReserveStates(StateId n) { } // Optional, best effort only.
+ virtual void ReserveArcs(StateId s, size_t n) { } // Optional, Best effort.
+
+ // Return input label symbol table; return NULL if not specified
+ virtual const SymbolTable* InputSymbols() const = 0;
+ // Return output label symbol table; return NULL if not specified
+ virtual const SymbolTable* OutputSymbols() const = 0;
+
+ // Return input label symbol table; return NULL if not specified
+ virtual SymbolTable* MutableInputSymbols() = 0;
+ // Return output label symbol table; return NULL if not specified
+ virtual SymbolTable* MutableOutputSymbols() = 0;
+
+ // Set input label symbol table; NULL signifies not unspecified
+ virtual void SetInputSymbols(const SymbolTable* isyms) = 0;
+ // Set output label symbol table; NULL signifies not unspecified
+ virtual void SetOutputSymbols(const SymbolTable* osyms) = 0;
+
+ // Get a copy of this MutableFst. See Fst<>::Copy() for further doc.
+ virtual MutableFst<A> *Copy(bool safe = false) const = 0;
+
+ // Read an MutableFst from an input stream; return NULL on error.
+ static MutableFst<A> *Read(istream &strm, const FstReadOptions &opts) {
+ FstReadOptions ropts(opts);
+ FstHeader hdr;
+ if (ropts.header)
+ hdr = *opts.header;
+ else {
+ if (!hdr.Read(strm, opts.source))
+ return 0;
+ ropts.header = &hdr;
+ }
+ if (!(hdr.Properties() & kMutable)) {
+ LOG(ERROR) << "MutableFst::Read: Not an MutableFst: " << ropts.source;
+ return 0;
+ }
+ FstRegister<A> *registr = FstRegister<A>::GetRegister();
+ const typename FstRegister<A>::Reader reader =
+ registr->GetReader(hdr.FstType());
+ if (!reader) {
+ LOG(ERROR) << "MutableFst::Read: Unknown FST type \"" << hdr.FstType()
+ << "\" (arc type = \"" << A::Type()
+ << "\"): " << ropts.source;
+ return 0;
+ }
+ Fst<A> *fst = reader(strm, ropts);
+ if (!fst) return 0;
+ return static_cast<MutableFst<A> *>(fst);
+ }
+
+ // Read a MutableFst from a file; return NULL on error.
+ // Empty filename reads from standard input. If 'convert' is true,
+ // convert to a mutable FST of type 'convert_type' if file is
+ // a non-mutable FST.
+ static MutableFst<A> *Read(const string &filename, bool convert = false,
+ const string &convert_type = "vector") {
+ if (convert == false) {
+ if (!filename.empty()) {
+ ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
+ if (!strm) {
+ LOG(ERROR) << "MutableFst::Read: Can't open file: " << filename;
+ return 0;
+ }
+ return Read(strm, FstReadOptions(filename));
+ } else {
+ return Read(std::cin, FstReadOptions("standard input"));
+ }
+ } else { // Converts to 'convert_type' if not mutable.
+ Fst<A> *ifst = Fst<A>::Read(filename);
+ if (!ifst) return 0;
+ if (ifst->Properties(kMutable, false)) {
+ return static_cast<MutableFst *>(ifst);
+ } else {
+ Fst<A> *ofst = Convert(*ifst, convert_type);
+ delete ifst;
+ if (!ofst) return 0;
+ if (!ofst->Properties(kMutable, false))
+ LOG(ERROR) << "MutableFst: bad convert type: " << convert_type;
+ return static_cast<MutableFst *>(ofst);
+ }
+ }
+ }
+
+ // For generic mutuble arc iterator construction; not normally called
+ // directly by users.
+ virtual void InitMutableArcIterator(StateId s,
+ MutableArcIteratorData<A> *) = 0;
+};
+
+// Mutable arc iterator interface, templated on the Arc definition; used
+// for mutable Arc iterator specializations that are returned by
+// the InitMutableArcIterator MutableFst method.
+template <class A>
+class MutableArcIteratorBase : public ArcIteratorBase<A> {
+ public:
+ typedef A Arc;
+
+ void SetValue(const A &arc) { SetValue_(arc); } // Set current arc's content
+
+ private:
+ virtual void SetValue_(const A &arc) = 0;
+};
+
+template <class A>
+struct MutableArcIteratorData {
+ MutableArcIteratorBase<A> *base; // Specific iterator
+};
+
+// Generic mutable arc iterator, templated on the FST definition
+// - a wrapper around pointer to specific one.
+// Here is a typical use: \code
+// for (MutableArcIterator<StdFst> aiter(&fst, s));
+// !aiter.Done();
+// aiter.Next()) {
+// StdArc arc = aiter.Value();
+// arc.ilabel = 7;
+// aiter.SetValue(arc);
+// ...
+// } \endcode
+// This version requires function calls.
+template <class F>
+class MutableArcIterator {
+ public:
+ typedef F FST;
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+
+ MutableArcIterator(F *fst, StateId s) {
+ fst->InitMutableArcIterator(s, &data_);
+ }
+ ~MutableArcIterator() { delete data_.base; }
+
+ bool Done() const { return data_.base->Done(); }
+ const Arc& Value() const { return data_.base->Value(); }
+ void Next() { data_.base->Next(); }
+ size_t Position() const { return data_.base->Position(); }
+ void Reset() { data_.base->Reset(); }
+ void Seek(size_t a) { data_.base->Seek(a); }
+ void SetValue(const Arc &a) { data_.base->SetValue(a); }
+ uint32 Flags() const { return data_.base->Flags(); }
+ void SetFlags(uint32 f, uint32 m) {
+ return data_.base->SetFlags(f, m);
+ }
+
+ private:
+ MutableArcIteratorData<Arc> data_;
+ DISALLOW_COPY_AND_ASSIGN(MutableArcIterator);
+};
+
+
+namespace internal {
+
+// MutableFst<A> case - abstract methods.
+template <class A> inline
+typename A::Weight Final(const MutableFst<A> &fst, typename A::StateId s) {
+ return fst.Final(s);
+}
+
+template <class A> inline
+ssize_t NumArcs(const MutableFst<A> &fst, typename A::StateId s) {
+ return fst.NumArcs(s);
+}
+
+template <class A> inline
+ssize_t NumInputEpsilons(const MutableFst<A> &fst, typename A::StateId s) {
+ return fst.NumInputEpsilons(s);
+}
+
+template <class A> inline
+ssize_t NumOutputEpsilons(const MutableFst<A> &fst, typename A::StateId s) {
+ return fst.NumOutputEpsilons(s);
+}
+
+} // namespace internal
+
+
+// A useful alias when using StdArc.
+typedef MutableFst<StdArc> StdMutableFst;
+
+
+// This is a helper class template useful for attaching a MutableFst
+// interface to its implementation, handling reference counting and
+// copy-on-write.
+template <class I, class F = MutableFst<typename I::Arc> >
+class ImplToMutableFst : public ImplToExpandedFst<I, F> {
+ public:
+ typedef typename I::Arc Arc;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ using ImplToFst<I, F>::GetImpl;
+ using ImplToFst<I, F>::SetImpl;
+
+ virtual void SetStart(StateId s) {
+ MutateCheck();
+ GetImpl()->SetStart(s);
+ }
+
+ virtual void SetFinal(StateId s, Weight w) {
+ MutateCheck();
+ GetImpl()->SetFinal(s, w);
+ }
+
+ virtual void SetProperties(uint64 props, uint64 mask) {
+ // Can skip mutate check if extrinsic properties don't change,
+ // since it is then safe to update all (shallow) copies
+ uint64 exprops = kExtrinsicProperties & mask;
+ if (GetImpl()->Properties(exprops) != (props & exprops))
+ MutateCheck();
+ GetImpl()->SetProperties(props, mask);
+ }
+
+ virtual StateId AddState() {
+ MutateCheck();
+ return GetImpl()->AddState();
+ }
+
+ virtual void AddArc(StateId s, const Arc &arc) {
+ MutateCheck();
+ GetImpl()->AddArc(s, arc);
+ }
+
+ virtual void DeleteStates(const vector<StateId> &dstates) {
+ MutateCheck();
+ GetImpl()->DeleteStates(dstates);
+ }
+
+ virtual void DeleteStates() {
+ MutateCheck();
+ GetImpl()->DeleteStates();
+ }
+
+ virtual void DeleteArcs(StateId s, size_t n) {
+ MutateCheck();
+ GetImpl()->DeleteArcs(s, n);
+ }
+
+ virtual void DeleteArcs(StateId s) {
+ MutateCheck();
+ GetImpl()->DeleteArcs(s);
+ }
+
+ virtual void ReserveStates(StateId s) {
+ MutateCheck();
+ GetImpl()->ReserveStates(s);
+ }
+
+ virtual void ReserveArcs(StateId s, size_t n) {
+ MutateCheck();
+ GetImpl()->ReserveArcs(s, n);
+ }
+
+ virtual const SymbolTable* InputSymbols() const {
+ return GetImpl()->InputSymbols();
+ }
+
+ virtual const SymbolTable* OutputSymbols() const {
+ return GetImpl()->OutputSymbols();
+ }
+
+ virtual SymbolTable* MutableInputSymbols() {
+ MutateCheck();
+ return GetImpl()->InputSymbols();
+ }
+
+ virtual SymbolTable* MutableOutputSymbols() {
+ MutateCheck();
+ return GetImpl()->OutputSymbols();
+ }
+
+ virtual void SetInputSymbols(const SymbolTable* isyms) {
+ MutateCheck();
+ GetImpl()->SetInputSymbols(isyms);
+ }
+
+ virtual void SetOutputSymbols(const SymbolTable* osyms) {
+ MutateCheck();
+ GetImpl()->SetOutputSymbols(osyms);
+ }
+
+ protected:
+ ImplToMutableFst() : ImplToExpandedFst<I, F>() {}
+
+ ImplToMutableFst(I *impl) : ImplToExpandedFst<I, F>(impl) {}
+
+
+ ImplToMutableFst(const ImplToMutableFst<I, F> &fst)
+ : ImplToExpandedFst<I, F>(fst) {}
+
+ ImplToMutableFst(const ImplToMutableFst<I, F> &fst, bool safe)
+ : ImplToExpandedFst<I, F>(fst, safe) {}
+
+ void MutateCheck() {
+ // Copy on write
+ if (GetImpl()->RefCount() > 1)
+ SetImpl(new I(*this));
+ }
+
+ private:
+ // Disallow
+ ImplToMutableFst<I, F> &operator=(const ImplToMutableFst<I, F> &fst);
+
+ ImplToMutableFst<I, F> &operator=(const Fst<Arc> &fst) {
+ FSTERROR() << "ImplToMutableFst: Assignment operator disallowed";
+ GetImpl()->SetProperties(kError, kError);
+ return *this;
+ }
+};
+
+
+} // namespace fst
+
+#endif // FST_LIB_MUTABLE_FST_H__
diff --git a/src/include/fst/pair-weight.h b/src/include/fst/pair-weight.h
new file mode 100644
index 0000000..7d8aa11
--- /dev/null
+++ b/src/include/fst/pair-weight.h
@@ -0,0 +1,280 @@
+// pair-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: shumash@google.com (Masha Maria Shugrina)
+//
+// \file
+// Pair weight templated base class for weight classes that
+// contain two weights (e.g. Product, Lexicographic)
+
+#ifndef FST_LIB_PAIR_WEIGHT_H_
+#define FST_LIB_PAIR_WEIGHT_H_
+
+#include <climits>
+#include <stack>
+#include <string>
+
+#include <fst/weight.h>
+
+
+DECLARE_string(fst_weight_parentheses);
+DECLARE_string(fst_weight_separator);
+
+namespace fst {
+
+template<class W1, class W2> class PairWeight;
+template <class W1, class W2>
+istream &operator>>(istream &strm, PairWeight<W1, W2> &w);
+
+template<class W1, class W2>
+class PairWeight {
+ public:
+ friend istream &operator>><W1, W2>(istream&, PairWeight<W1, W2>&);
+
+ typedef PairWeight<typename W1::ReverseWeight,
+ typename W2::ReverseWeight>
+ ReverseWeight;
+
+ PairWeight() {}
+
+ PairWeight(const PairWeight& w) : value1_(w.value1_), value2_(w.value2_) {}
+
+ PairWeight(W1 w1, W2 w2) : value1_(w1), value2_(w2) {}
+
+ static const PairWeight<W1, W2> &Zero() {
+ static const PairWeight<W1, W2> zero(W1::Zero(), W2::Zero());
+ return zero;
+ }
+
+ static const PairWeight<W1, W2> &One() {
+ static const PairWeight<W1, W2> one(W1::One(), W2::One());
+ return one;
+ }
+
+ static const PairWeight<W1, W2> &NoWeight() {
+ static const PairWeight<W1, W2> no_weight(W1::NoWeight(), W2::NoWeight());
+ return no_weight;
+ }
+
+ istream &Read(istream &strm) {
+ value1_.Read(strm);
+ return value2_.Read(strm);
+ }
+
+ ostream &Write(ostream &strm) const {
+ value1_.Write(strm);
+ return value2_.Write(strm);
+ }
+
+ PairWeight<W1, W2> &operator=(const PairWeight<W1, W2> &w) {
+ value1_ = w.Value1();
+ value2_ = w.Value2();
+ return *this;
+ }
+
+ bool Member() const { return value1_.Member() && value2_.Member(); }
+
+ size_t Hash() const {
+ size_t h1 = value1_.Hash();
+ size_t h2 = value2_.Hash();
+ const int lshift = 5;
+ const int rshift = CHAR_BIT * sizeof(size_t) - 5;
+ return h1 << lshift ^ h1 >> rshift ^ h2;
+ }
+
+ PairWeight<W1, W2> Quantize(float delta = kDelta) const {
+ return PairWeight<W1, W2>(value1_.Quantize(delta),
+ value2_.Quantize(delta));
+ }
+
+ ReverseWeight Reverse() const {
+ return ReverseWeight(value1_.Reverse(), value2_.Reverse());
+ }
+
+ const W1& Value1() const { return value1_; }
+
+ const W2& Value2() const { return value2_; }
+
+ protected:
+ void SetValue1(const W1 &w) { value1_ = w; }
+ void SetValue2(const W2 &w) { value2_ = w; }
+
+ // Reads PairWeight when there are not parentheses around pair terms
+ inline static istream &ReadNoParen(
+ istream &strm, PairWeight<W1, W2>& w, char separator) {
+ int c;
+ do {
+ c = strm.get();
+ } while (isspace(c));
+
+ string s1;
+ while (c != separator) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s1 += c;
+ c = strm.get();
+ }
+ istringstream strm1(s1);
+ W1 w1 = W1::Zero();
+ strm1 >> w1;
+
+ // read second element
+ W2 w2 = W2::Zero();
+ strm >> w2;
+
+ w = PairWeight<W1, W2>(w1, w2);
+ return strm;
+ }
+
+ // Reads PairWeight when there are parentheses around pair terms
+ inline static istream &ReadWithParen(
+ istream &strm, PairWeight<W1, W2>& w,
+ char separator, char open_paren, char close_paren) {
+ int c;
+ do {
+ c = strm.get();
+ } while (isspace(c));
+ if (c != open_paren) {
+ FSTERROR() << " is fst_weight_parentheses flag set correcty? ";
+ strm.clear(std::ios::failbit);
+ return strm;
+ }
+ c = strm.get();
+
+ // read first element
+ stack<int> parens;
+ string s1;
+ while (c != separator || !parens.empty()) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s1 += c;
+ // if parens encountered before separator, they must be matched
+ if (c == open_paren) {
+ parens.push(1);
+ } else if (c == close_paren) {
+ // Fail for mismatched parens
+ if (parens.empty()) {
+ strm.clear(std::ios::failbit);
+ return strm;
+ }
+ parens.pop();
+ }
+ c = strm.get();
+ }
+ istringstream strm1(s1);
+ W1 w1 = W1::Zero();
+ strm1 >> w1;
+
+ // read second element
+ string s2;
+ c = strm.get();
+ while (c != EOF) {
+ s2 += c;
+ c = strm.get();
+ }
+ if (s2.empty() || (s2[s2.size() - 1] != close_paren)) {
+ FSTERROR() << " is fst_weight_parentheses flag set correcty? ";
+ strm.clear(std::ios::failbit);
+ return strm;
+ }
+
+ s2.erase(s2.size() - 1, 1);
+ istringstream strm2(s2);
+ W2 w2 = W2::Zero();
+ strm2 >> w2;
+
+ w = PairWeight<W1, W2>(w1, w2);
+ return strm;
+ }
+
+ private:
+ W1 value1_;
+ W2 value2_;
+
+};
+
+template <class W1, class W2>
+inline bool operator==(const PairWeight<W1, W2> &w,
+ const PairWeight<W1, W2> &v) {
+ return w.Value1() == v.Value1() && w.Value2() == v.Value2();
+}
+
+template <class W1, class W2>
+inline bool operator!=(const PairWeight<W1, W2> &w1,
+ const PairWeight<W1, W2> &w2) {
+ return w1.Value1() != w2.Value1() || w1.Value2() != w2.Value2();
+}
+
+
+template <class W1, class W2>
+inline bool ApproxEqual(const PairWeight<W1, W2> &w1,
+ const PairWeight<W1, W2> &w2,
+ float delta = kDelta) {
+ return ApproxEqual(w1.Value1(), w2.Value1(), delta) &&
+ ApproxEqual(w1.Value2(), w2.Value2(), delta);
+}
+
+template <class W1, class W2>
+inline ostream &operator<<(ostream &strm, const PairWeight<W1, W2> &w) {
+ if(FLAGS_fst_weight_separator.size() != 1) {
+ FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ char separator = FLAGS_fst_weight_separator[0];
+ if (FLAGS_fst_weight_parentheses.empty())
+ return strm << w.Value1() << separator << w.Value2();
+
+ if (FLAGS_fst_weight_parentheses.size() != 2) {
+ FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ char open_paren = FLAGS_fst_weight_parentheses[0];
+ char close_paren = FLAGS_fst_weight_parentheses[1];
+ return strm << open_paren << w.Value1() << separator
+ << w.Value2() << close_paren ;
+}
+
+template <class W1, class W2>
+inline istream &operator>>(istream &strm, PairWeight<W1, W2> &w) {
+ if(FLAGS_fst_weight_separator.size() != 1) {
+ FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ char separator = FLAGS_fst_weight_separator[0];
+ bool read_parens = !FLAGS_fst_weight_parentheses.empty();
+ if (read_parens) {
+ if (FLAGS_fst_weight_parentheses.size() != 2) {
+ FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ return PairWeight<W1, W2>::ReadWithParen(
+ strm, w, separator, FLAGS_fst_weight_parentheses[0],
+ FLAGS_fst_weight_parentheses[1]);
+ } else {
+ return PairWeight<W1, W2>::ReadNoParen(strm, w, separator);
+ }
+}
+
+} // namespace fst
+
+#endif // FST_LIB_PAIR_WEIGHT_H_
diff --git a/src/include/fst/partition.h b/src/include/fst/partition.h
new file mode 100644
index 0000000..dcee67b
--- /dev/null
+++ b/src/include/fst/partition.h
@@ -0,0 +1,290 @@
+// partition.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: johans@google.com (Johan Schalkwyk)
+//
+// \file Functions and classes to create a partition of states
+//
+
+#ifndef FST_LIB_PARTITION_H__
+#define FST_LIB_PARTITION_H__
+
+#include <vector>
+using std::vector;
+#include <algorithm>
+
+
+#include <fst/queue.h>
+
+
+
+namespace fst {
+
+template <typename T> class PartitionIterator;
+
+// \class Partition
+// \brief Defines a partitioning of states. Typically used to represent
+// equivalence classes for Fst operations like minimization.
+//
+template <typename T>
+class Partition {
+ friend class PartitionIterator<T>;
+
+ struct Element {
+ Element() : value(0), next(0), prev(0) {}
+ Element(T v) : value(v), next(0), prev(0) {}
+
+ T value;
+ Element* next;
+ Element* prev;
+ };
+
+ public:
+ Partition() {}
+
+ Partition(T num_states) {
+ Initialize(num_states);
+ }
+
+ ~Partition() {
+ for (size_t i = 0; i < elements_.size(); ++i)
+ delete elements_[i];
+ }
+
+ // Create an empty partition for num_states. At initialization time
+ // all elements are not assigned to a class (i.e class_index = -1).
+ // Initialize just creates num_states of elements. All element
+ // operations are then done by simply disconnecting the element from
+ // it current class and placing it at the head of the next class.
+ void Initialize(size_t num_states) {
+ for (size_t i = 0; i < elements_.size(); ++i)
+ delete elements_[i];
+ elements_.clear();
+ classes_.clear();
+ class_index_.clear();
+
+ elements_.resize(num_states);
+ class_index_.resize(num_states, -1);
+ class_size_.reserve(num_states);
+ for (size_t i = 0; i < num_states; ++i)
+ elements_[i] = new Element(i);
+ num_states_ = num_states;
+ }
+
+ // Add a class, resize classes_ and class_size_ resource by 1.
+ size_t AddClass() {
+ size_t num_classes = classes_.size();
+ classes_.resize(num_classes + 1, 0);
+ class_size_.resize(num_classes + 1, 0);
+ class_split_.resize(num_classes + 1, 0);
+ split_size_.resize(num_classes + 1, 0);
+ return num_classes;
+ }
+
+ void AllocateClasses(T num_classes) {
+ size_t n = classes_.size() + num_classes;
+ classes_.resize(n, 0);
+ class_size_.resize(n, 0);
+ class_split_.resize(n, 0);
+ split_size_.resize(n, 0);
+ }
+
+ // Add element_id to class_id. The Add method is used to initialize
+ // partition. Once elements have been added to a class, you need to
+ // use the Move() method move an element from once class to another.
+ void Add(T element_id, T class_id) {
+ Element* element = elements_[element_id];
+
+ if (classes_[class_id])
+ classes_[class_id]->prev = element;
+ element->next = classes_[class_id];
+ element->prev = 0;
+ classes_[class_id] = element;
+
+ class_index_[element_id] = class_id;
+ class_size_[class_id]++;
+ }
+
+ // Move and element_id to class_id. Disconnects (removes) element
+ // from it current class and
+ void Move(T element_id, T class_id) {
+ T old_class_id = class_index_[element_id];
+
+ Element* element = elements_[element_id];
+ if (element->next) element->next->prev = element->prev;
+ if (element->prev) element->prev->next = element->next;
+ else classes_[old_class_id] = element->next;
+
+ Add(element_id, class_id);
+ class_size_[old_class_id]--;
+ }
+
+ // split class on the element_id
+ void SplitOn(T element_id) {
+ T class_id = class_index_[element_id];
+ if (class_size_[class_id] == 1) return;
+
+ // first time class is split
+ if (split_size_[class_id] == 0)
+ visited_classes_.push_back(class_id);
+
+ // increment size of split (set of element at head of chain)
+ split_size_[class_id]++;
+
+ // update split point
+ if (class_split_[class_id] == 0)
+ class_split_[class_id] = classes_[class_id];
+ if (class_split_[class_id] == elements_[element_id])
+ class_split_[class_id] = elements_[element_id]->next;
+
+ // move to head of chain in same class
+ Move(element_id, class_id);
+ }
+
+ // Finalize class_id, split if required, and update class_splits,
+ // class indices of the newly created class. Returns the new_class id
+ // or -1 if no new class was created.
+ T SplitRefine(T class_id) {
+ // only split if necessary
+ if (class_size_[class_id] == split_size_[class_id]) {
+ class_split_[class_id] = 0;
+ split_size_[class_id] = 0;
+ return -1;
+ } else {
+
+ T new_class = AddClass();
+ size_t remainder = class_size_[class_id] - split_size_[class_id];
+ if (remainder < split_size_[class_id]) { // add smaller
+ Element* split_el = class_split_[class_id];
+ classes_[new_class] = split_el;
+ class_size_[class_id] = split_size_[class_id];
+ class_size_[new_class] = remainder;
+ split_el->prev->next = 0;
+ split_el->prev = 0;
+ } else {
+ Element* split_el = class_split_[class_id];
+ classes_[new_class] = classes_[class_id];
+ class_size_[class_id] = remainder;
+ class_size_[new_class] = split_size_[class_id];
+ split_el->prev->next = 0;
+ split_el->prev = 0;
+ classes_[class_id] = split_el;
+ }
+
+ // update class index for element in new class
+ for (Element* el = classes_[new_class]; el; el = el->next)
+ class_index_[el->value] = new_class;
+
+ class_split_[class_id] = 0;
+ split_size_[class_id] = 0;
+
+ return new_class;
+ }
+ }
+
+ // Once all states have been processed for a particular class C, we
+ // can finalize the split. FinalizeSplit() will update each block in the
+ // partition, create new once and update the queue of active classes
+ // that require further refinement.
+ template <class Queue>
+ void FinalizeSplit(Queue* L) {
+ for (size_t i = 0; i < visited_classes_.size(); ++i) {
+ T new_class = SplitRefine(visited_classes_[i]);
+ if (new_class != -1 && L)
+ L->Enqueue(new_class);
+ }
+ visited_classes_.clear();
+ }
+
+
+ const T class_id(T element_id) const {
+ return class_index_[element_id];
+ }
+
+ const vector<T>& class_sizes() const {
+ return class_size_;
+ }
+
+ const size_t class_size(T class_id) const {
+ return class_size_[class_id];
+ }
+
+ const T num_classes() const {
+ return classes_.size();
+ }
+
+
+ private:
+ int num_states_;
+
+ // container of all elements (owner of ptrs)
+ vector<Element*> elements_;
+
+ // linked list of elements belonging to class
+ vector<Element*> classes_;
+
+ // pointer to split point for each class
+ vector<Element*> class_split_;
+
+ // class index of element
+ vector<T> class_index_;
+
+ // class sizes
+ vector<T> class_size_;
+
+ // size of split for each class
+ vector<T> split_size_;
+
+ // set of visited classes to be used in split refine
+ vector<T> visited_classes_;
+};
+
+
+// iterate over members of a class in a partition
+template <typename T>
+class PartitionIterator {
+ typedef typename Partition<T>::Element Element;
+ public:
+ PartitionIterator(const Partition<T>& partition, T class_id)
+ : p_(partition),
+ element_(p_.classes_[class_id]),
+ class_id_(class_id) {}
+
+ bool Done() {
+ return (element_ == 0);
+ }
+
+ const T Value() {
+ return (element_->value);
+ }
+
+ void Next() {
+ element_ = element_->next;
+ }
+
+ void Reset() {
+ element_ = p_.classes_[class_id_];
+ }
+
+ private:
+ const Partition<T>& p_;
+
+ const Element* element_;
+
+ T class_id_;
+};
+} // namespace fst
+
+#endif // FST_LIB_PARTITION_H__
diff --git a/src/include/fst/power-weight.h b/src/include/fst/power-weight.h
new file mode 100644
index 0000000..256928d
--- /dev/null
+++ b/src/include/fst/power-weight.h
@@ -0,0 +1,159 @@
+// power-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Cartesian power weight semiring operation definitions.
+
+#ifndef FST_LIB_POWER_WEIGHT_H__
+#define FST_LIB_POWER_WEIGHT_H__
+
+#include <fst/tuple-weight.h>
+#include <fst/weight.h>
+
+
+namespace fst {
+
+// Cartesian power semiring: W ^ n
+// Forms:
+// - a left semimodule when W is a left semiring,
+// - a right semimodule when W is a right semiring,
+// - a bisemimodule when W is a semiring,
+// the free semimodule of rank n over W
+// The Times operation is overloaded to provide the
+// left and right scalar products.
+template <class W, unsigned int n>
+class PowerWeight : public TupleWeight<W, n> {
+ public:
+ using TupleWeight<W, n>::Zero;
+ using TupleWeight<W, n>::One;
+ using TupleWeight<W, n>::NoWeight;
+ using TupleWeight<W, n>::Quantize;
+ using TupleWeight<W, n>::Reverse;
+
+ typedef PowerWeight<typename W::ReverseWeight, n> ReverseWeight;
+
+ PowerWeight() {}
+
+ PowerWeight(const TupleWeight<W, n> &w) : TupleWeight<W, n>(w) {}
+
+ template <class Iterator>
+ PowerWeight(Iterator begin, Iterator end) : TupleWeight<W, n>(begin, end) {}
+
+ static const PowerWeight<W, n> &Zero() {
+ static const PowerWeight<W, n> zero(TupleWeight<W, n>::Zero());
+ return zero;
+ }
+
+ static const PowerWeight<W, n> &One() {
+ static const PowerWeight<W, n> one(TupleWeight<W, n>::One());
+ return one;
+ }
+
+ static const PowerWeight<W, n> &NoWeight() {
+ static const PowerWeight<W, n> no_weight(TupleWeight<W, n>::NoWeight());
+ return no_weight;
+ }
+
+ static const string &Type() {
+ static string type;
+ if (type.empty()) {
+ string power;
+ Int64ToStr(n, &power);
+ type = W::Type() + "_^" + power;
+ }
+ return type;
+ }
+
+ static uint64 Properties() {
+ uint64 props = W::Properties();
+ return props & (kLeftSemiring | kRightSemiring |
+ kCommutative | kIdempotent);
+ }
+
+ PowerWeight<W, n> Quantize(float delta = kDelta) const {
+ return TupleWeight<W, n>::Quantize(delta);
+ }
+
+ ReverseWeight Reverse() const {
+ return TupleWeight<W, n>::Reverse();
+ }
+};
+
+
+// Semiring plus operation
+template <class W, unsigned int n>
+inline PowerWeight<W, n> Plus(const PowerWeight<W, n> &w1,
+ const PowerWeight<W, n> &w2) {
+ PowerWeight<W, n> w;
+ for (size_t i = 0; i < n; ++i)
+ w.SetValue(i, Plus(w1.Value(i), w2.Value(i)));
+ return w;
+}
+
+// Semiring times operation
+template <class W, unsigned int n>
+inline PowerWeight<W, n> Times(const PowerWeight<W, n> &w1,
+ const PowerWeight<W, n> &w2) {
+ PowerWeight<W, n> w;
+ for (size_t i = 0; i < n; ++i)
+ w.SetValue(i, Times(w1.Value(i), w2.Value(i)));
+ return w;
+}
+
+// Semiring divide operation
+template <class W, unsigned int n>
+inline PowerWeight<W, n> Divide(const PowerWeight<W, n> &w1,
+ const PowerWeight<W, n> &w2,
+ DivideType type = DIVIDE_ANY) {
+ PowerWeight<W, n> w;
+ for (size_t i = 0; i < n; ++i)
+ w.SetValue(i, Divide(w1.Value(i), w2.Value(i), type));
+ return w;
+}
+
+// Semimodule left scalar product
+template <class W, unsigned int n>
+inline PowerWeight<W, n> Times(const W &s, const PowerWeight<W, n> &w) {
+ PowerWeight<W, n> sw;
+ for (size_t i = 0; i < n; ++i)
+ sw.SetValue(i, Times(s, w.Value(i)));
+ return w;
+}
+
+// Semimodule right scalar product
+template <class W, unsigned int n>
+inline PowerWeight<W, n> Times(const PowerWeight<W, n> &w, const W &s) {
+ PowerWeight<W, n> ws;
+ for (size_t i = 0; i < n; ++i)
+ ws.SetValue(i, Times(w.Value(i), s));
+ return w;
+}
+
+// Semimodule dot product
+template <class W, unsigned int n>
+inline W DotProduct(const PowerWeight<W, n> &w1,
+ const PowerWeight<W, n> &w2) {
+ W w = W::Zero();
+ for (size_t i = 0; i < n; ++i)
+ w = Plus(w, Times(w1.Value(i), w2.Value(i)));
+ return w;
+}
+
+
+} // namespace fst
+
+#endif // FST_LIB_POWER_WEIGHT_H__
diff --git a/src/include/fst/product-weight.h b/src/include/fst/product-weight.h
new file mode 100644
index 0000000..16dede8
--- /dev/null
+++ b/src/include/fst/product-weight.h
@@ -0,0 +1,115 @@
+// product-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Product weight set and associated semiring operation definitions.
+
+#ifndef FST_LIB_PRODUCT_WEIGHT_H__
+#define FST_LIB_PRODUCT_WEIGHT_H__
+
+#include <stack>
+#include <string>
+
+#include <fst/pair-weight.h>
+#include <fst/weight.h>
+
+
+namespace fst {
+
+// Product semiring: W1 * W2
+template<class W1, class W2>
+class ProductWeight : public PairWeight<W1, W2> {
+ public:
+ using PairWeight<W1, W2>::Zero;
+ using PairWeight<W1, W2>::One;
+ using PairWeight<W1, W2>::NoWeight;
+ using PairWeight<W1, W2>::Quantize;
+ using PairWeight<W1, W2>::Reverse;
+
+ typedef ProductWeight<typename W1::ReverseWeight, typename W2::ReverseWeight>
+ ReverseWeight;
+
+ ProductWeight() {}
+
+ ProductWeight(const PairWeight<W1, W2>& w) : PairWeight<W1, W2>(w) {}
+
+ ProductWeight(W1 w1, W2 w2) : PairWeight<W1, W2>(w1, w2) {}
+
+ static const ProductWeight<W1, W2> &Zero() {
+ static const ProductWeight<W1, W2> zero(PairWeight<W1, W2>::Zero());
+ return zero;
+ }
+
+ static const ProductWeight<W1, W2> &One() {
+ static const ProductWeight<W1, W2> one(PairWeight<W1, W2>::One());
+ return one;
+ }
+
+ static const ProductWeight<W1, W2> &NoWeight() {
+ static const ProductWeight<W1, W2> no_weight(
+ PairWeight<W1, W2>::NoWeight());
+ return no_weight;
+ }
+
+ static const string &Type() {
+ static const string type = W1::Type() + "_X_" + W2::Type();
+ return type;
+ }
+
+ static uint64 Properties() {
+ uint64 props1 = W1::Properties();
+ uint64 props2 = W2::Properties();
+ return props1 & props2 & (kLeftSemiring | kRightSemiring |
+ kCommutative | kIdempotent);
+ }
+
+ ProductWeight<W1, W2> Quantize(float delta = kDelta) const {
+ return PairWeight<W1, W2>::Quantize(delta);
+ }
+
+ ReverseWeight Reverse() const {
+ return PairWeight<W1, W2>::Reverse();
+ }
+
+
+};
+
+template <class W1, class W2>
+inline ProductWeight<W1, W2> Plus(const ProductWeight<W1, W2> &w,
+ const ProductWeight<W1, W2> &v) {
+ return ProductWeight<W1, W2>(Plus(w.Value1(), v.Value1()),
+ Plus(w.Value2(), v.Value2()));
+}
+
+template <class W1, class W2>
+inline ProductWeight<W1, W2> Times(const ProductWeight<W1, W2> &w,
+ const ProductWeight<W1, W2> &v) {
+ return ProductWeight<W1, W2>(Times(w.Value1(), v.Value1()),
+ Times(w.Value2(), v.Value2()));
+}
+
+template <class W1, class W2>
+inline ProductWeight<W1, W2> Divide(const ProductWeight<W1, W2> &w,
+ const ProductWeight<W1, W2> &v,
+ DivideType typ = DIVIDE_ANY) {
+ return ProductWeight<W1, W2>(Divide(w.Value1(), v.Value1(), typ),
+ Divide(w.Value2(), v.Value2(), typ));
+}
+
+} // namespace fst
+
+#endif // FST_LIB_PRODUCT_WEIGHT_H__
diff --git a/src/include/fst/project.h b/src/include/fst/project.h
new file mode 100644
index 0000000..07946c3
--- /dev/null
+++ b/src/include/fst/project.h
@@ -0,0 +1,148 @@
+// project.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Functions and classes to project an Fst on to its domain or range.
+
+#ifndef FST_LIB_PROJECT_H__
+#define FST_LIB_PROJECT_H__
+
+#include <fst/arc-map.h>
+#include <fst/mutable-fst.h>
+
+
+namespace fst {
+
+// This specifies whether to project on input or output.
+enum ProjectType { PROJECT_INPUT = 1, PROJECT_OUTPUT = 2 };
+
+
+// Mapper to implement projection per arc.
+template <class A> class ProjectMapper {
+ public:
+ explicit ProjectMapper(ProjectType project_type)
+ : project_type_(project_type) {}
+
+ A operator()(const A &arc) {
+ typename A::Label label = project_type_ == PROJECT_INPUT
+ ? arc.ilabel : arc.olabel;
+ return A(label, label, arc.weight, arc.nextstate);
+ }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+ MapSymbolsAction InputSymbolsAction() const {
+ return project_type_ == PROJECT_INPUT ? MAP_COPY_SYMBOLS :
+ MAP_CLEAR_SYMBOLS;
+ }
+
+ MapSymbolsAction OutputSymbolsAction() const {
+ return project_type_ == PROJECT_OUTPUT ? MAP_COPY_SYMBOLS :
+ MAP_CLEAR_SYMBOLS;
+ }
+
+ uint64 Properties(uint64 props) {
+ return ProjectProperties(props, project_type_ == PROJECT_INPUT);
+ }
+
+
+ private:
+ ProjectType project_type_;
+};
+
+
+// Projects an FST onto its domain or range by either copying each arcs'
+// input label to the output label or vice versa. This version modifies
+// its input.
+//
+// Complexity:
+// - Time: O(V + E)
+// - Space: O(1)
+// where V = # of states and E = # of arcs.
+template<class Arc> inline
+void Project(MutableFst<Arc> *fst, ProjectType project_type) {
+ ArcMap(fst, ProjectMapper<Arc>(project_type));
+ if (project_type == PROJECT_INPUT)
+ fst->SetOutputSymbols(fst->InputSymbols());
+ if (project_type == PROJECT_OUTPUT)
+ fst->SetInputSymbols(fst->OutputSymbols());
+}
+
+
+// Projects an FST onto its domain or range by either copying each arc's
+// input label to the output label or vice versa. This version is a delayed
+// Fst.
+//
+// Complexity:
+// - Time: O(v + e)
+// - Space: O(1)
+// where v = # of states visited, e = # of arcs visited. Constant
+// time and to visit an input state or arc is assumed and exclusive
+// of caching.
+template <class A>
+class ProjectFst : public ArcMapFst<A, A, ProjectMapper<A> > {
+ public:
+ typedef A Arc;
+ typedef ProjectMapper<A> C;
+ typedef ArcMapFstImpl< A, A, ProjectMapper<A> > Impl;
+ using ImplToFst<Impl>::GetImpl;
+
+ ProjectFst(const Fst<A> &fst, ProjectType project_type)
+ : ArcMapFst<A, A, C>(fst, C(project_type)) {
+ if (project_type == PROJECT_INPUT)
+ GetImpl()->SetOutputSymbols(fst.InputSymbols());
+ if (project_type == PROJECT_OUTPUT)
+ GetImpl()->SetInputSymbols(fst.OutputSymbols());
+ }
+
+ // See Fst<>::Copy() for doc.
+ ProjectFst(const ProjectFst<A> &fst, bool safe = false)
+ : ArcMapFst<A, A, C>(fst, safe) {}
+
+ // Get a copy of this ProjectFst. See Fst<>::Copy() for further doc.
+ virtual ProjectFst<A> *Copy(bool safe = false) const {
+ return new ProjectFst(*this, safe);
+ }
+};
+
+
+// Specialization for ProjectFst.
+template <class A>
+class StateIterator< ProjectFst<A> >
+ : public StateIterator< ArcMapFst<A, A, ProjectMapper<A> > > {
+ public:
+ explicit StateIterator(const ProjectFst<A> &fst)
+ : StateIterator< ArcMapFst<A, A, ProjectMapper<A> > >(fst) {}
+};
+
+
+// Specialization for ProjectFst.
+template <class A>
+class ArcIterator< ProjectFst<A> >
+ : public ArcIterator< ArcMapFst<A, A, ProjectMapper<A> > > {
+ public:
+ ArcIterator(const ProjectFst<A> &fst, typename A::StateId s)
+ : ArcIterator< ArcMapFst<A, A, ProjectMapper<A> > >(fst, s) {}
+};
+
+
+// Useful alias when using StdArc.
+typedef ProjectFst<StdArc> StdProjectFst;
+
+} // namespace fst
+
+#endif // FST_LIB_PROJECT_H__
diff --git a/src/include/fst/properties.h b/src/include/fst/properties.h
new file mode 100644
index 0000000..8fab16f
--- /dev/null
+++ b/src/include/fst/properties.h
@@ -0,0 +1,460 @@
+// properties.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: Michael Riley <riley@google.com>
+// \file
+// FST property bits.
+
+#ifndef FST_LIB_PROPERTIES_H__
+#define FST_LIB_PROPERTIES_H__
+
+#include <sys/types.h>
+#include <vector>
+using std::vector;
+
+#include <fst/compat.h>
+
+namespace fst {
+
+// The property bits here assert facts about an FST. If individual
+// bits are added, then the composite properties below, the property
+// functions and property names in properties.cc, and
+// TestProperties() in test-properties.h should be updated.
+
+//
+// BINARY PROPERTIES
+//
+// For each property below, there is a single bit. If it is set,
+// the property is true. If it is not set, the property is false.
+//
+
+// The Fst is an ExpandedFst
+const uint64 kExpanded = 0x0000000000000001ULL;
+
+// The Fst is a MutableFst
+const uint64 kMutable = 0x0000000000000002ULL;
+
+// An error was detected while constructing/using the FST
+const uint64 kError = 0x0000000000000004ULL;
+
+//
+// TRINARY PROPERTIES
+//
+// For each of these properties below there is a pair of property bits
+// - one positive and one negative. If the positive bit is set, the
+// property is true. If the negative bit is set, the property is
+// false. If neither is set, the property has unknown value. Both
+// should never be simultaneously set. The individual positive and
+// negative bit pairs should be adjacent with the positive bit
+// at an odd and lower position.
+
+// ilabel == olabel for each arc
+const uint64 kAcceptor = 0x0000000000010000ULL;
+// ilabel != olabel for some arc
+const uint64 kNotAcceptor = 0x0000000000020000ULL;
+
+// ilabels unique leaving each state
+const uint64 kIDeterministic = 0x0000000000040000ULL;
+// ilabels not unique leaving some state
+const uint64 kNonIDeterministic = 0x0000000000080000ULL;
+
+// olabels unique leaving each state
+const uint64 kODeterministic = 0x0000000000100000ULL;
+// olabels not unique leaving some state
+const uint64 kNonODeterministic = 0x0000000000200000ULL;
+
+// FST has input/output epsilons
+const uint64 kEpsilons = 0x0000000000400000ULL;
+// FST has no input/output epsilons
+const uint64 kNoEpsilons = 0x0000000000800000ULL;
+
+// FST has input epsilons
+const uint64 kIEpsilons = 0x0000000001000000ULL;
+// FST has no input epsilons
+const uint64 kNoIEpsilons = 0x0000000002000000ULL;
+
+// FST has output epsilons
+const uint64 kOEpsilons = 0x0000000004000000ULL;
+// FST has no output epsilons
+const uint64 kNoOEpsilons = 0x0000000008000000ULL;
+
+// ilabels sorted wrt < for each state
+const uint64 kILabelSorted = 0x0000000010000000ULL;
+// ilabels not sorted wrt < for some state
+const uint64 kNotILabelSorted = 0x0000000020000000ULL;
+
+// olabels sorted wrt < for each state
+const uint64 kOLabelSorted = 0x0000000040000000ULL;
+// olabels not sorted wrt < for some state
+const uint64 kNotOLabelSorted = 0x0000000080000000ULL;
+
+// Non-trivial arc or final weights
+const uint64 kWeighted = 0x0000000100000000ULL;
+// Only trivial arc and final weights
+const uint64 kUnweighted = 0x0000000200000000ULL;
+
+// FST has cycles
+const uint64 kCyclic = 0x0000000400000000ULL;
+// FST has no cycles
+const uint64 kAcyclic = 0x0000000800000000ULL;
+
+// FST has cycles containing the initial state
+const uint64 kInitialCyclic = 0x0000001000000000ULL;
+// FST has no cycles containing the initial state
+const uint64 kInitialAcyclic = 0x0000002000000000ULL;
+
+// FST is topologically sorted
+const uint64 kTopSorted = 0x0000004000000000ULL;
+// FST is not topologically sorted
+const uint64 kNotTopSorted = 0x0000008000000000ULL;
+
+// All states reachable from the initial state
+const uint64 kAccessible = 0x0000010000000000ULL;
+// Not all states reachable from the initial state
+const uint64 kNotAccessible = 0x0000020000000000ULL;
+
+// All states can reach a final state
+const uint64 kCoAccessible = 0x0000040000000000ULL;
+// Not all states can reach a final state
+const uint64 kNotCoAccessible = 0x0000080000000000ULL;
+
+// If NumStates() > 0, then state 0 is initial, state NumStates()-1 is
+// final, there is a transition from each non-final state i to
+// state i+1, and there are no other transitions.
+const uint64 kString = 0x0000100000000000ULL;
+
+// Not a string FST
+const uint64 kNotString = 0x0000200000000000ULL;
+
+//
+// COMPOSITE PROPERTIES
+//
+
+// Properties of an empty machine
+const uint64 kNullProperties
+ = kAcceptor | kIDeterministic | kODeterministic | kNoEpsilons |
+ kNoIEpsilons | kNoOEpsilons | kILabelSorted | kOLabelSorted |
+ kUnweighted | kAcyclic | kInitialAcyclic | kTopSorted |
+ kAccessible | kCoAccessible | kString;
+
+// Properties that are preserved when an FST is copied
+const uint64 kCopyProperties
+ = kError | kAcceptor | kNotAcceptor | kIDeterministic | kNonIDeterministic |
+ kODeterministic | kNonODeterministic | kEpsilons | kNoEpsilons |
+ kIEpsilons | kNoIEpsilons | kOEpsilons | kNoOEpsilons |
+ kILabelSorted | kNotILabelSorted | kOLabelSorted |
+ kNotOLabelSorted | kWeighted | kUnweighted | kCyclic | kAcyclic |
+ kInitialCyclic | kInitialAcyclic | kTopSorted | kNotTopSorted |
+ kAccessible | kNotAccessible | kCoAccessible | kNotCoAccessible |
+ kString | kNotString;
+
+// Properites that are intrinsic to the FST
+const uint64 kIntrinsicProperties
+ = kExpanded | kMutable | kAcceptor | kNotAcceptor | kIDeterministic |
+ kNonIDeterministic | kODeterministic | kNonODeterministic |
+ kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons |
+ kNoOEpsilons | kILabelSorted | kNotILabelSorted | kOLabelSorted |
+ kNotOLabelSorted | kWeighted | kUnweighted | kCyclic | kAcyclic |
+ kInitialCyclic | kInitialAcyclic | kTopSorted | kNotTopSorted |
+ kAccessible | kNotAccessible | kCoAccessible | kNotCoAccessible |
+ kString | kNotString;
+
+// Properites that are (potentially) extrinsic to the FST
+const uint64 kExtrinsicProperties = kError;
+
+// Properties that are preserved when an FST start state is set
+const uint64 kSetStartProperties
+ = kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
+ kIDeterministic | kNonIDeterministic | kODeterministic |
+ kNonODeterministic | kEpsilons | kNoEpsilons | kIEpsilons |
+ kNoIEpsilons | kOEpsilons | kNoOEpsilons | kILabelSorted |
+ kNotILabelSorted | kOLabelSorted | kNotOLabelSorted | kWeighted |
+ kUnweighted | kCyclic | kAcyclic | kTopSorted | kNotTopSorted |
+ kCoAccessible | kNotCoAccessible;
+
+// Properties that are preserved when an FST final weight is set
+const uint64 kSetFinalProperties
+ = kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
+ kIDeterministic | kNonIDeterministic | kODeterministic |
+ kNonODeterministic | kEpsilons | kNoEpsilons | kIEpsilons |
+ kNoIEpsilons | kOEpsilons | kNoOEpsilons | kILabelSorted |
+ kNotILabelSorted | kOLabelSorted | kNotOLabelSorted | kCyclic |
+ kAcyclic | kInitialCyclic | kInitialAcyclic | kTopSorted |
+ kNotTopSorted | kAccessible | kNotAccessible;
+
+// Properties that are preserved when an FST state is added
+const uint64 kAddStateProperties
+ = kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
+ kIDeterministic | kNonIDeterministic | kODeterministic |
+ kNonODeterministic | kEpsilons | kNoEpsilons | kIEpsilons |
+ kNoIEpsilons | kOEpsilons | kNoOEpsilons | kILabelSorted |
+ kNotILabelSorted | kOLabelSorted | kNotOLabelSorted | kWeighted |
+ kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
+ kInitialAcyclic | kTopSorted | kNotTopSorted | kNotAccessible |
+ kNotCoAccessible | kNotString;
+
+// Properties that are preserved when an FST arc is added
+const uint64 kAddArcProperties = kExpanded | kMutable | kError | kNotAcceptor |
+ kNonIDeterministic | kNonODeterministic | kEpsilons | kIEpsilons |
+ kOEpsilons | kNotILabelSorted | kNotOLabelSorted | kWeighted |
+ kCyclic | kInitialCyclic | kNotTopSorted | kAccessible | kCoAccessible;
+
+// Properties that are preserved when an FST arc is set
+const uint64 kSetArcProperties = kExpanded | kMutable | kError;
+
+// Properties that are preserved when FST states are deleted
+const uint64 kDeleteStatesProperties
+ = kExpanded | kMutable | kError | kAcceptor | kIDeterministic |
+ kODeterministic | kNoEpsilons | kNoIEpsilons | kNoOEpsilons |
+ kILabelSorted | kOLabelSorted | kUnweighted | kAcyclic |
+ kInitialAcyclic | kTopSorted;
+
+// Properties that are preserved when FST arcs are deleted
+const uint64 kDeleteArcsProperties
+ = kExpanded | kMutable | kError | kAcceptor | kIDeterministic |
+ kODeterministic | kNoEpsilons | kNoIEpsilons | kNoOEpsilons |
+ kILabelSorted | kOLabelSorted | kUnweighted | kAcyclic |
+ kInitialAcyclic | kTopSorted | kNotAccessible | kNotCoAccessible;
+
+// Properties that are preserved when an FST's states are reordered
+const uint64 kStateSortProperties = kExpanded | kMutable | kError | kAcceptor |
+ kNotAcceptor | kIDeterministic | kNonIDeterministic |
+ kODeterministic | kNonODeterministic | kEpsilons | kNoEpsilons |
+ kIEpsilons | kNoIEpsilons | kOEpsilons | kNoOEpsilons |
+ kILabelSorted | kNotILabelSorted | kOLabelSorted | kNotOLabelSorted
+ | kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
+ kInitialAcyclic | kAccessible | kNotAccessible | kCoAccessible |
+ kNotCoAccessible;
+
+// Properties that are preserved when an FST's arcs are reordered
+const uint64 kArcSortProperties =
+ kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kIDeterministic |
+ kNonIDeterministic | kODeterministic | kNonODeterministic |
+ kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons |
+ kNoOEpsilons | kWeighted | kUnweighted | kCyclic | kAcyclic |
+ kInitialCyclic | kInitialAcyclic | kTopSorted | kNotTopSorted |
+ kAccessible | kNotAccessible | kCoAccessible | kNotCoAccessible |
+ kString | kNotString;
+
+// Properties that are preserved when an FST's input labels are changed.
+const uint64 kILabelInvariantProperties =
+ kExpanded | kMutable | kError | kODeterministic | kNonODeterministic |
+ kOEpsilons | kNoOEpsilons | kOLabelSorted | kNotOLabelSorted |
+ kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
+ kInitialAcyclic | kTopSorted | kNotTopSorted | kAccessible |
+ kNotAccessible | kCoAccessible | kNotCoAccessible | kString | kNotString;
+
+// Properties that are preserved when an FST's output labels are changed.
+const uint64 kOLabelInvariantProperties =
+ kExpanded | kMutable | kError | kIDeterministic | kNonIDeterministic |
+ kIEpsilons | kNoIEpsilons | kILabelSorted | kNotILabelSorted |
+ kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
+ kInitialAcyclic | kTopSorted | kNotTopSorted | kAccessible |
+ kNotAccessible | kCoAccessible | kNotCoAccessible | kString | kNotString;
+
+// Properties that are preserved when an FST's weights are changed.
+// This assumes that the set of states that are non-final is not changed.
+const uint64 kWeightInvariantProperties =
+ kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kIDeterministic |
+ kNonIDeterministic | kODeterministic | kNonODeterministic |
+ kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons |
+ kNoOEpsilons | kILabelSorted | kNotILabelSorted | kOLabelSorted |
+ kNotOLabelSorted | kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
+ kTopSorted | kNotTopSorted | kAccessible | kNotAccessible | kCoAccessible |
+ kNotCoAccessible | kString | kNotString;
+
+// Properties that are preserved when a superfinal state is added
+// and an FSTs final weights are directed to it via new transitions.
+const uint64 kAddSuperFinalProperties = kExpanded | kMutable | kError |
+ kAcceptor | kNotAcceptor | kNonIDeterministic | kNonODeterministic |
+ kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted | kNotOLabelSorted |
+ kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
+ kInitialAcyclic | kNotTopSorted | kNotAccessible | kCoAccessible |
+ kNotCoAccessible | kNotString;
+
+// Properties that are preserved when a superfinal state is removed
+// and the epsilon transitions directed to it are made final weights.
+const uint64 kRmSuperFinalProperties = kExpanded | kMutable | kError |
+ kAcceptor | kNotAcceptor | kIDeterministic | kODeterministic |
+ kNoEpsilons | kNoIEpsilons | kNoOEpsilons | kILabelSorted | kOLabelSorted |
+ kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
+ kInitialAcyclic | kTopSorted | kAccessible | kCoAccessible |
+ kNotCoAccessible | kString;
+
+// All binary properties
+const uint64 kBinaryProperties = 0x0000000000000007ULL;
+
+// All trinary properties
+const uint64 kTrinaryProperties = 0x00003fffffff0000ULL;
+
+//
+// COMPUTED PROPERTIES
+//
+
+// 1st bit of trinary properties
+const uint64 kPosTrinaryProperties =
+ kTrinaryProperties & 0x5555555555555555ULL;
+
+// 2nd bit of trinary properties
+const uint64 kNegTrinaryProperties =
+ kTrinaryProperties & 0xaaaaaaaaaaaaaaaaULL;
+
+// All properties
+const uint64 kFstProperties = kBinaryProperties | kTrinaryProperties;
+
+//
+// PROPERTY FUNCTIONS and STRING NAMES (defined in properties.cc)
+//
+
+// Below are functions for getting property bit vectors when executing
+// mutating fst operations.
+inline uint64 SetStartProperties(uint64 inprops);
+template <typename Weight>
+uint64 SetFinalProperties(uint64 inprops, Weight old_weight,
+ Weight new_weight);
+inline uint64 AddStateProperties(uint64 inprops);
+template <typename A>
+uint64 AddArcProperties(uint64 inprops, typename A::StateId s, const A &arc,
+ const A *prev_arc);
+inline uint64 DeleteStatesProperties(uint64 inprops);
+inline uint64 DeleteAllStatesProperties(uint64 inprops, uint64 staticProps);
+inline uint64 DeleteArcsProperties(uint64 inprops);
+
+uint64 ClosureProperties(uint64 inprops, bool star, bool delayed = false);
+uint64 ComplementProperties(uint64 inprops);
+uint64 ComposeProperties(uint64 inprops1, uint64 inprops2);
+uint64 ConcatProperties(uint64 inprops1, uint64 inprops2,
+ bool delayed = false);
+uint64 DeterminizeProperties(uint64 inprops, bool has_subsequential_label);
+uint64 FactorWeightProperties(uint64 inprops);
+uint64 InvertProperties(uint64 inprops);
+uint64 ProjectProperties(uint64 inprops, bool project_input);
+uint64 RandGenProperties(uint64 inprops, bool weighted);
+uint64 RelabelProperties(uint64 inprops);
+uint64 ReplaceProperties(const vector<uint64>& inprops,
+ ssize_t root,
+ bool epsilon_on_replace,
+ bool no_empty_fst);
+uint64 ReverseProperties(uint64 inprops);
+uint64 ReweightProperties(uint64 inprops);
+uint64 RmEpsilonProperties(uint64 inprops, bool delayed = false);
+uint64 ShortestPathProperties(uint64 props);
+uint64 SynchronizeProperties(uint64 inprops);
+uint64 UnionProperties(uint64 inprops1, uint64 inprops2, bool delayed = false);
+
+// Definitions of inlined functions.
+
+uint64 SetStartProperties(uint64 inprops) {
+ uint64 outprops = inprops & kSetStartProperties;
+ if (inprops & kAcyclic) {
+ outprops |= kInitialAcyclic;
+ }
+ return outprops;
+}
+
+uint64 AddStateProperties(uint64 inprops) {
+ return inprops & kAddStateProperties;
+}
+
+uint64 DeleteStatesProperties(uint64 inprops) {
+ return inprops & kDeleteStatesProperties;
+}
+
+uint64 DeleteAllStatesProperties(uint64 inprops, uint64 staticprops) {
+ uint64 outprops = inprops & kError;
+ return outprops | kNullProperties | staticprops;
+}
+
+uint64 DeleteArcsProperties(uint64 inprops) {
+ return inprops & kDeleteArcsProperties;
+}
+
+// Definitions of template functions.
+
+//
+template <typename Weight>
+uint64 SetFinalProperties(uint64 inprops, Weight old_weight,
+ Weight new_weight) {
+ uint64 outprops = inprops;
+ if (old_weight != Weight::Zero() && old_weight != Weight::One()) {
+ outprops &= ~kWeighted;
+ }
+ if (new_weight != Weight::Zero() && new_weight != Weight::One()) {
+ outprops |= kWeighted;
+ outprops &= ~kUnweighted;
+ }
+ outprops &= kSetFinalProperties | kWeighted | kUnweighted;
+ return outprops;
+}
+
+/// Gets the properties for the MutableFst::AddArc method.
+///
+/// \param inprops the current properties of the fst
+/// \param s the id of the state to which an arc is being added
+/// \param arc the arc being added to the state with the specified id
+/// \param prev_arc the previously-added (or "last") arc of state s, or NULL if
+/// s currently has no arcs
+template <typename A>
+uint64 AddArcProperties(uint64 inprops, typename A::StateId s,
+ const A &arc, const A *prev_arc) {
+ uint64 outprops = inprops;
+ if (arc.ilabel != arc.olabel) {
+ outprops |= kNotAcceptor;
+ outprops &= ~kAcceptor;
+ }
+ if (arc.ilabel == 0) {
+ outprops |= kIEpsilons;
+ outprops &= ~kNoIEpsilons;
+ if (arc.olabel == 0) {
+ outprops |= kEpsilons;
+ outprops &= ~kNoEpsilons;
+ }
+ }
+ if (arc.olabel == 0) {
+ outprops |= kOEpsilons;
+ outprops &= ~kNoOEpsilons;
+ }
+ if (prev_arc != 0) {
+ if (prev_arc->ilabel > arc.ilabel) {
+ outprops |= kNotILabelSorted;
+ outprops &= ~kILabelSorted;
+ }
+ if (prev_arc->olabel > arc.olabel) {
+ outprops |= kNotOLabelSorted;
+ outprops &= ~kOLabelSorted;
+ }
+ }
+ if (arc.weight != A::Weight::Zero() && arc.weight != A::Weight::One()) {
+ outprops |= kWeighted;
+ outprops &= ~kUnweighted;
+ }
+ if (arc.nextstate <= s) {
+ outprops |= kNotTopSorted;
+ outprops &= ~kTopSorted;
+ }
+ outprops &= kAddArcProperties | kAcceptor |
+ kNoEpsilons | kNoIEpsilons | kNoOEpsilons |
+ kILabelSorted | kOLabelSorted | kUnweighted | kTopSorted;
+ if (outprops & kTopSorted) {
+ outprops |= kAcyclic | kInitialAcyclic;
+ }
+ return outprops;
+}
+
+extern const char *PropertyNames[];
+
+} // namespace fst
+
+#endif // FST_LIB_PROPERTIES_H__
diff --git a/src/include/fst/prune.h b/src/include/fst/prune.h
new file mode 100644
index 0000000..5ea5b4d
--- /dev/null
+++ b/src/include/fst/prune.h
@@ -0,0 +1,339 @@
+// prune.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Functions implementing pruning.
+
+#ifndef FST_LIB_PRUNE_H__
+#define FST_LIB_PRUNE_H__
+
+#include <vector>
+using std::vector;
+
+#include <fst/arcfilter.h>
+#include <fst/heap.h>
+#include <fst/shortest-distance.h>
+
+
+namespace fst {
+
+template <class A, class ArcFilter>
+class PruneOptions {
+ public:
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ // Pruning weight threshold.
+ Weight weight_threshold;
+ // Pruning state threshold.
+ StateId state_threshold;
+ // Arc filter.
+ ArcFilter filter;
+ // If non-zero, passes in pre-computed shortest distance to final states.
+ const vector<Weight> *distance;
+ // Determines the degree of convergence required when computing shortest
+ // distances.
+ float delta;
+
+ explicit PruneOptions(const Weight& w, StateId s, ArcFilter f,
+ vector<Weight> *d = 0, float e = kDelta)
+ : weight_threshold(w),
+ state_threshold(s),
+ filter(f),
+ distance(d),
+ delta(e) {}
+ private:
+ PruneOptions(); // disallow
+};
+
+
+template <class S, class W>
+class PruneCompare {
+ public:
+ typedef S StateId;
+ typedef W Weight;
+
+ PruneCompare(const vector<Weight> &idistance,
+ const vector<Weight> &fdistance)
+ : idistance_(idistance), fdistance_(fdistance) {}
+
+ bool operator()(const StateId x, const StateId y) const {
+ Weight wx = Times(x < idistance_.size() ? idistance_[x] : Weight::Zero(),
+ x < fdistance_.size() ? fdistance_[x] : Weight::Zero());
+ Weight wy = Times(y < idistance_.size() ? idistance_[y] : Weight::Zero(),
+ y < fdistance_.size() ? fdistance_[y] : Weight::Zero());
+ return less_(wx, wy);
+ }
+
+ private:
+ const vector<Weight> &idistance_;
+ const vector<Weight> &fdistance_;
+ NaturalLess<Weight> less_;
+};
+
+
+
+// Pruning algorithm: this version modifies its input and it takes an
+// options class as an argment. Delete states and arcs in 'fst' that
+// do not belong to a successful path whose weight is no more than
+// the weight of the shortest path Times() 'opts.weight_threshold'.
+// When 'opts.state_threshold != kNoStateId', the resulting transducer
+// will restricted further to have at most 'opts.state_threshold'
+// states. Weights need to be commutative and have the path
+// property. The weight 'w' of any cycle needs to be bounded, i.e.,
+// 'Plus(w, W::One()) = One()'.
+template <class Arc, class ArcFilter>
+void Prune(MutableFst<Arc> *fst,
+ const PruneOptions<Arc, ArcFilter> &opts) {
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ if ((Weight::Properties() & (kPath | kCommutative))
+ != (kPath | kCommutative)) {
+ FSTERROR() << "Prune: Weight needs to have the path property and"
+ << " be commutative: "
+ << Weight::Type();
+ fst->SetProperties(kError, kError);
+ return;
+ }
+ StateId ns = fst->NumStates();
+ if (ns == 0) return;
+ vector<Weight> idistance(ns, Weight::Zero());
+ vector<Weight> tmp;
+ if (!opts.distance) {
+ tmp.reserve(ns);
+ ShortestDistance(*fst, &tmp, true, opts.delta);
+ }
+ const vector<Weight> *fdistance = opts.distance ? opts.distance : &tmp;
+
+ if ((opts.state_threshold == 0) ||
+ (fdistance->size() <= fst->Start()) ||
+ ((*fdistance)[fst->Start()] == Weight::Zero())) {
+ fst->DeleteStates();
+ return;
+ }
+ PruneCompare<StateId, Weight> compare(idistance, *fdistance);
+ Heap< StateId, PruneCompare<StateId, Weight>, false> heap(compare);
+ vector<bool> visited(ns, false);
+ vector<size_t> enqueued(ns, kNoKey);
+ vector<StateId> dead;
+ dead.push_back(fst->AddState());
+ NaturalLess<Weight> less;
+ Weight limit = Times((*fdistance)[fst->Start()], opts.weight_threshold);
+
+ StateId num_visited = 0;
+ StateId s = fst->Start();
+ if (!less(limit, (*fdistance)[s])) {
+ idistance[s] = Weight::One();
+ enqueued[s] = heap.Insert(s);
+ ++num_visited;
+ }
+
+ while (!heap.Empty()) {
+ s = heap.Top();
+ heap.Pop();
+ enqueued[s] = kNoKey;
+ visited[s] = true;
+ if (less(limit, Times(idistance[s], fst->Final(s))))
+ fst->SetFinal(s, Weight::Zero());
+ for (MutableArcIterator< MutableFst<Arc> > ait(fst, s);
+ !ait.Done();
+ ait.Next()) {
+ Arc arc = ait.Value();
+ if (!opts.filter(arc)) continue;
+ Weight weight = Times(Times(idistance[s], arc.weight),
+ arc.nextstate < fdistance->size()
+ ? (*fdistance)[arc.nextstate]
+ : Weight::Zero());
+ if (less(limit, weight)) {
+ arc.nextstate = dead[0];
+ ait.SetValue(arc);
+ continue;
+ }
+ if (less(Times(idistance[s], arc.weight), idistance[arc.nextstate]))
+ idistance[arc.nextstate] = Times(idistance[s], arc.weight);
+ if (visited[arc.nextstate]) continue;
+ if ((opts.state_threshold != kNoStateId) &&
+ (num_visited >= opts.state_threshold))
+ continue;
+ if (enqueued[arc.nextstate] == kNoKey) {
+ enqueued[arc.nextstate] = heap.Insert(arc.nextstate);
+ ++num_visited;
+ } else {
+ heap.Update(enqueued[arc.nextstate], arc.nextstate);
+ }
+ }
+ }
+ for (size_t i = 0; i < visited.size(); ++i)
+ if (!visited[i]) dead.push_back(i);
+ fst->DeleteStates(dead);
+}
+
+
+// Pruning algorithm: this version modifies its input and simply takes
+// the pruning threshold as an argument. Delete states and arcs in
+// 'fst' that do not belong to a successful path whose weight is no
+// more than the weight of the shortest path Times()
+// 'weight_threshold'. When 'state_threshold != kNoStateId', the
+// resulting transducer will be restricted further to have at most
+// 'opts.state_threshold' states. Weights need to be commutative and
+// have the path property. The weight 'w' of any cycle needs to be
+// bounded, i.e., 'Plus(w, W::One()) = One()'.
+template <class Arc>
+void Prune(MutableFst<Arc> *fst,
+ typename Arc::Weight weight_threshold,
+ typename Arc::StateId state_threshold = kNoStateId,
+ double delta = kDelta) {
+ PruneOptions<Arc, AnyArcFilter<Arc> > opts(weight_threshold, state_threshold,
+ AnyArcFilter<Arc>(), 0, delta);
+ Prune(fst, opts);
+}
+
+
+// Pruning algorithm: this version writes the pruned input Fst to an
+// output MutableFst and it takes an options class as an argument.
+// 'ofst' contains states and arcs that belong to a successful path in
+// 'ifst' whose weight is no more than the weight of the shortest path
+// Times() 'opts.weight_threshold'. When 'opts.state_threshold !=
+// kNoStateId', 'ofst' will be restricted further to have at most
+// 'opts.state_threshold' states. Weights need to be commutative and
+// have the path property. The weight 'w' of any cycle needs to be
+// bounded, i.e., 'Plus(w, W::One()) = One()'.
+template <class Arc, class ArcFilter>
+void Prune(const Fst<Arc> &ifst,
+ MutableFst<Arc> *ofst,
+ const PruneOptions<Arc, ArcFilter> &opts) {
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ if ((Weight::Properties() & (kPath | kCommutative))
+ != (kPath | kCommutative)) {
+ FSTERROR() << "Prune: Weight needs to have the path property and"
+ << " be commutative: "
+ << Weight::Type();
+ ofst->SetProperties(kError, kError);
+ return;
+ }
+ ofst->DeleteStates();
+ ofst->SetInputSymbols(ifst.InputSymbols());
+ ofst->SetOutputSymbols(ifst.OutputSymbols());
+ if (ifst.Start() == kNoStateId)
+ return;
+ NaturalLess<Weight> less;
+ if (less(opts.weight_threshold, Weight::One()) ||
+ (opts.state_threshold == 0))
+ return;
+ vector<Weight> idistance;
+ vector<Weight> tmp;
+ if (!opts.distance)
+ ShortestDistance(ifst, &tmp, true, opts.delta);
+ const vector<Weight> *fdistance = opts.distance ? opts.distance : &tmp;
+
+ if ((fdistance->size() <= ifst.Start()) ||
+ ((*fdistance)[ifst.Start()] == Weight::Zero())) {
+ return;
+ }
+ PruneCompare<StateId, Weight> compare(idistance, *fdistance);
+ Heap< StateId, PruneCompare<StateId, Weight>, false> heap(compare);
+ vector<StateId> copy;
+ vector<size_t> enqueued;
+ vector<bool> visited;
+
+ StateId s = ifst.Start();
+ Weight limit = Times(s < fdistance->size() ? (*fdistance)[s] : Weight::Zero(),
+ opts.weight_threshold);
+ while (copy.size() <= s)
+ copy.push_back(kNoStateId);
+ copy[s] = ofst->AddState();
+ ofst->SetStart(copy[s]);
+ while (idistance.size() <= s)
+ idistance.push_back(Weight::Zero());
+ idistance[s] = Weight::One();
+ while (enqueued.size() <= s) {
+ enqueued.push_back(kNoKey);
+ visited.push_back(false);
+ }
+ enqueued[s] = heap.Insert(s);
+
+ while (!heap.Empty()) {
+ s = heap.Top();
+ heap.Pop();
+ enqueued[s] = kNoKey;
+ visited[s] = true;
+ if (!less(limit, Times(idistance[s], ifst.Final(s))))
+ ofst->SetFinal(copy[s], ifst.Final(s));
+ for (ArcIterator< Fst<Arc> > ait(ifst, s);
+ !ait.Done();
+ ait.Next()) {
+ const Arc &arc = ait.Value();
+ if (!opts.filter(arc)) continue;
+ Weight weight = Times(Times(idistance[s], arc.weight),
+ arc.nextstate < fdistance->size()
+ ? (*fdistance)[arc.nextstate]
+ : Weight::Zero());
+ if (less(limit, weight)) continue;
+ if ((opts.state_threshold != kNoStateId) &&
+ (ofst->NumStates() >= opts.state_threshold))
+ continue;
+ while (idistance.size() <= arc.nextstate)
+ idistance.push_back(Weight::Zero());
+ if (less(Times(idistance[s], arc.weight),
+ idistance[arc.nextstate]))
+ idistance[arc.nextstate] = Times(idistance[s], arc.weight);
+ while (copy.size() <= arc.nextstate)
+ copy.push_back(kNoStateId);
+ if (copy[arc.nextstate] == kNoStateId)
+ copy[arc.nextstate] = ofst->AddState();
+ ofst->AddArc(copy[s], Arc(arc.ilabel, arc.olabel, arc.weight,
+ copy[arc.nextstate]));
+ while (enqueued.size() <= arc.nextstate) {
+ enqueued.push_back(kNoKey);
+ visited.push_back(false);
+ }
+ if (visited[arc.nextstate]) continue;
+ if (enqueued[arc.nextstate] == kNoKey)
+ enqueued[arc.nextstate] = heap.Insert(arc.nextstate);
+ else
+ heap.Update(enqueued[arc.nextstate], arc.nextstate);
+ }
+ }
+}
+
+
+// Pruning algorithm: this version writes the pruned input Fst to an
+// output MutableFst and simply takes the pruning threshold as an
+// argument. 'ofst' contains states and arcs that belong to a
+// successful path in 'ifst' whose weight is no more than
+// the weight of the shortest path Times() 'weight_threshold'. When
+// 'state_threshold != kNoStateId', 'ofst' will be restricted further
+// to have at most 'opts.state_threshold' states. Weights need to be
+// commutative and have the path property. The weight 'w' of any cycle
+// needs to be bounded, i.e., 'Plus(w, W::One()) = W::One()'.
+template <class Arc>
+void Prune(const Fst<Arc> &ifst,
+ MutableFst<Arc> *ofst,
+ typename Arc::Weight weight_threshold,
+ typename Arc::StateId state_threshold = kNoStateId,
+ float delta = kDelta) {
+ PruneOptions<Arc, AnyArcFilter<Arc> > opts(weight_threshold, state_threshold,
+ AnyArcFilter<Arc>(), 0, delta);
+ Prune(ifst, ofst, opts);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_PRUNE_H_
diff --git a/src/include/fst/push.h b/src/include/fst/push.h
new file mode 100644
index 0000000..1f7a8fa
--- /dev/null
+++ b/src/include/fst/push.h
@@ -0,0 +1,175 @@
+// push.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Class to reweight/push an FST.
+
+#ifndef FST_LIB_PUSH_H__
+#define FST_LIB_PUSH_H__
+
+#include <vector>
+using std::vector;
+
+#include <fst/factor-weight.h>
+#include <fst/fst.h>
+#include <fst/arc-map.h>
+#include <fst/reweight.h>
+#include <fst/shortest-distance.h>
+
+
+namespace fst {
+
+// Private helper functions for Push
+namespace internal {
+
+// Compute the total weight (sum of the weights of all accepting paths) from
+// the output of ShortestDistance. 'distance' is the shortest distance from the
+// initial state when 'reverse == false' and to the final states when
+// 'reverse == true'.
+template <class Arc>
+typename Arc::Weight ComputeTotalWeight(
+ const Fst<Arc> &fst,
+ const vector<typename Arc::Weight> &distance,
+ bool reverse) {
+ if (reverse)
+ return fst.Start() < distance.size() ?
+ distance[fst.Start()] : Arc::Weight::Zero();
+
+ typename Arc::Weight sum = Arc::Weight::Zero();
+ for (typename Arc::StateId s = 0; s < distance.size(); ++s)
+ sum = Plus(sum, Times(distance[s], fst.Final(s)));
+ return sum;
+}
+
+// Divide the weight of every accepting path by 'w'. The weight 'w' is
+// divided at the final states if 'at_final == true' and at the
+// initial state otherwise.
+template <class Arc>
+void RemoveWeight(MutableFst<Arc> *fst, typename Arc::Weight w, bool at_final) {
+ if ((w == Arc::Weight::One()) || (w == Arc::Weight::Zero()))
+ return;
+
+ if (at_final) {
+ // Remove 'w' from the final states
+ for (StateIterator< MutableFst<Arc> > sit(*fst);
+ !sit.Done();
+ sit.Next())
+ fst->SetFinal(sit.Value(),
+ Divide(fst->Final(sit.Value()), w, DIVIDE_RIGHT));
+ } else { // at_final == false
+ // Remove 'w' from the initial state
+ typename Arc::StateId start = fst->Start();
+ for (MutableArcIterator<MutableFst<Arc> > ait(fst, start);
+ !ait.Done();
+ ait.Next()) {
+ Arc arc = ait.Value();
+ arc.weight = Divide(arc.weight, w, DIVIDE_LEFT);
+ ait.SetValue(arc);
+ }
+ fst->SetFinal(start, Divide(fst->Final(start), w, DIVIDE_LEFT));
+ }
+}
+} // namespace internal
+
+// Pushes the weights in FST in the direction defined by TYPE. If
+// pushing towards the initial state, the sum of the weight of the
+// outgoing transitions and final weight at a non-initial state is
+// equal to One() in the resulting machine. If pushing towards the
+// final state, the same property holds on the reverse machine.
+//
+// Weight needs to be left distributive when pushing towards the
+// initial state and right distributive when pushing towards the final
+// states.
+template <class Arc>
+void Push(MutableFst<Arc> *fst,
+ ReweightType type,
+ float delta = kDelta,
+ bool remove_total_weight = false) {
+ vector<typename Arc::Weight> distance;
+ ShortestDistance(*fst, &distance, type == REWEIGHT_TO_INITIAL, delta);
+ typename Arc::Weight total_weight = Arc::Weight::One();
+ if (remove_total_weight)
+ total_weight = internal::ComputeTotalWeight(*fst, distance,
+ type == REWEIGHT_TO_INITIAL);
+ Reweight(fst, distance, type);
+ if (remove_total_weight)
+ internal::RemoveWeight(fst, total_weight, type == REWEIGHT_TO_FINAL);
+}
+
+const uint32 kPushWeights = 0x0001;
+const uint32 kPushLabels = 0x0002;
+const uint32 kPushRemoveTotalWeight = 0x0004;
+const uint32 kPushRemoveCommonAffix = 0x0008;
+
+// OFST obtained from IFST by pushing weights and/or labels according
+// to PTYPE in the direction defined by RTYPE. Weight needs to be
+// left distributive when pushing weights towards the initial state
+// and right distributive when pushing weights towards the final
+// states.
+template <class Arc, ReweightType rtype>
+void Push(const Fst<Arc> &ifst,
+ MutableFst<Arc> *ofst,
+ uint32 ptype,
+ float delta = kDelta) {
+
+ if ((ptype & (kPushWeights | kPushLabels)) == kPushWeights) {
+ *ofst = ifst;
+ Push(ofst, rtype, delta, ptype & kPushRemoveTotalWeight);
+ } else if (ptype & kPushLabels) {
+ const StringType stype = rtype == REWEIGHT_TO_INITIAL
+ ? STRING_LEFT
+ : STRING_RIGHT;
+ vector<typename GallicArc<Arc, stype>::Weight> gdistance;
+ VectorFst<GallicArc<Arc, stype> > gfst;
+ ArcMap(ifst, &gfst, ToGallicMapper<Arc, stype>());
+ if (ptype & kPushWeights ) {
+ ShortestDistance(gfst, &gdistance, rtype == REWEIGHT_TO_INITIAL, delta);
+ } else {
+ ArcMapFst<Arc, Arc, RmWeightMapper<Arc> >
+ uwfst(ifst, RmWeightMapper<Arc>());
+ ArcMapFst<Arc, GallicArc<Arc, stype>, ToGallicMapper<Arc, stype> >
+ guwfst(uwfst, ToGallicMapper<Arc, stype>());
+ ShortestDistance(guwfst, &gdistance, rtype == REWEIGHT_TO_INITIAL, delta);
+ }
+ typename GallicArc<Arc, stype>::Weight total_weight =
+ GallicArc<Arc, stype>::Weight::One();
+ if (ptype & (kPushRemoveTotalWeight | kPushRemoveCommonAffix)) {
+ total_weight = internal::ComputeTotalWeight(
+ gfst, gdistance, rtype == REWEIGHT_TO_INITIAL);
+ total_weight = typename GallicArc<Arc, stype>::Weight(
+ ptype & kPushRemoveCommonAffix ? total_weight.Value1()
+ : StringWeight<typename Arc::Label, stype>::One(),
+ ptype & kPushRemoveTotalWeight ? total_weight.Value2()
+ : Arc::Weight::One());
+ }
+ Reweight(&gfst, gdistance, rtype);
+ if (ptype & (kPushRemoveTotalWeight | kPushRemoveCommonAffix))
+ internal::RemoveWeight(&gfst, total_weight, rtype == REWEIGHT_TO_FINAL);
+ FactorWeightFst< GallicArc<Arc, stype>, GallicFactor<typename Arc::Label,
+ typename Arc::Weight, stype> > fwfst(gfst);
+ ArcMap(fwfst, ofst, FromGallicMapper<Arc, stype>());
+ ofst->SetOutputSymbols(ifst.OutputSymbols());
+ } else {
+ LOG(WARNING) << "Push: pushing type is set to 0: "
+ << "pushing neither labels nor weights.";
+ *ofst = ifst;
+ }
+}
+
+} // namespace fst
+
+#endif /* FST_LIB_PUSH_H_ */
diff --git a/src/include/fst/queue.h b/src/include/fst/queue.h
new file mode 100644
index 0000000..707dffc
--- /dev/null
+++ b/src/include/fst/queue.h
@@ -0,0 +1,889 @@
+// queue.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Functions and classes for various Fst state queues with
+// a unified interface.
+
+#ifndef FST_LIB_QUEUE_H__
+#define FST_LIB_QUEUE_H__
+
+#include <deque>
+#include <vector>
+using std::vector;
+
+#include <fst/arcfilter.h>
+#include <fst/connect.h>
+#include <fst/heap.h>
+#include <fst/topsort.h>
+
+
+namespace fst {
+
+// template <class S>
+// class Queue {
+// public:
+// typedef typename S StateId;
+//
+// // Ctr: may need args (e.g., Fst, comparator) for some queues
+// Queue(...);
+// // Returns the head of the queue
+// StateId Head() const;
+// // Inserts a state
+// void Enqueue(StateId s);
+// // Removes the head of the queue
+// void Dequeue();
+// // Updates ordering of state s when weight changes, if necessary
+// void Update(StateId s);
+// // Does the queue contain no elements?
+// bool Empty() const;
+// // Remove all states from queue
+// void Clear();
+// };
+
+// State queue types.
+enum QueueType {
+ TRIVIAL_QUEUE = 0, // Single state queue
+ FIFO_QUEUE = 1, // First-in, first-out queue
+ LIFO_QUEUE = 2, // Last-in, first-out queue
+ SHORTEST_FIRST_QUEUE = 3, // Shortest-first queue
+ TOP_ORDER_QUEUE = 4, // Topologically-ordered queue
+ STATE_ORDER_QUEUE = 5, // State-ID ordered queue
+ SCC_QUEUE = 6, // Component graph top-ordered meta-queue
+ AUTO_QUEUE = 7, // Auto-selected queue
+ OTHER_QUEUE = 8
+ };
+
+
+// QueueBase, templated on the StateId, is the base class shared by the
+// queues considered by AutoQueue.
+template <class S>
+class QueueBase {
+ public:
+ typedef S StateId;
+
+ QueueBase(QueueType type) : queue_type_(type), error_(false) {}
+ virtual ~QueueBase() {}
+ StateId Head() const { return Head_(); }
+ void Enqueue(StateId s) { Enqueue_(s); }
+ void Dequeue() { Dequeue_(); }
+ void Update(StateId s) { Update_(s); }
+ bool Empty() const { return Empty_(); }
+ void Clear() { Clear_(); }
+ QueueType Type() { return queue_type_; }
+ bool Error() const { return error_; }
+ void SetError(bool error) { error_ = error; }
+
+ private:
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual StateId Head_() const = 0;
+ virtual void Enqueue_(StateId s) = 0;
+ virtual void Dequeue_() = 0;
+ virtual void Update_(StateId s) = 0;
+ virtual bool Empty_() const = 0;
+ virtual void Clear_() = 0;
+
+ QueueType queue_type_;
+ bool error_;
+};
+
+
+// Trivial queue discipline, templated on the StateId. You may enqueue
+// at most one state at a time. It is used for strongly connected components
+// with only one state and no self loops.
+template <class S>
+class TrivialQueue : public QueueBase<S> {
+public:
+ typedef S StateId;
+
+ TrivialQueue() : QueueBase<S>(TRIVIAL_QUEUE), front_(kNoStateId) {}
+ StateId Head() const { return front_; }
+ void Enqueue(StateId s) { front_ = s; }
+ void Dequeue() { front_ = kNoStateId; }
+ void Update(StateId s) {}
+ bool Empty() const { return front_ == kNoStateId; }
+ void Clear() { front_ = kNoStateId; }
+
+
+private:
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual StateId Head_() const { return Head(); }
+ virtual void Enqueue_(StateId s) { Enqueue(s); }
+ virtual void Dequeue_() { Dequeue(); }
+ virtual void Update_(StateId s) { Update(s); }
+ virtual bool Empty_() const { return Empty(); }
+ virtual void Clear_() { return Clear(); }
+
+ StateId front_;
+};
+
+
+// First-in, first-out queue discipline, templated on the StateId.
+template <class S>
+class FifoQueue : public QueueBase<S>, public deque<S> {
+ public:
+ using deque<S>::back;
+ using deque<S>::push_front;
+ using deque<S>::pop_back;
+ using deque<S>::empty;
+ using deque<S>::clear;
+
+ typedef S StateId;
+
+ FifoQueue() : QueueBase<S>(FIFO_QUEUE) {}
+ StateId Head() const { return back(); }
+ void Enqueue(StateId s) { push_front(s); }
+ void Dequeue() { pop_back(); }
+ void Update(StateId s) {}
+ bool Empty() const { return empty(); }
+ void Clear() { clear(); }
+
+ private:
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual StateId Head_() const { return Head(); }
+ virtual void Enqueue_(StateId s) { Enqueue(s); }
+ virtual void Dequeue_() { Dequeue(); }
+ virtual void Update_(StateId s) { Update(s); }
+ virtual bool Empty_() const { return Empty(); }
+ virtual void Clear_() { return Clear(); }
+};
+
+
+// Last-in, first-out queue discipline, templated on the StateId.
+template <class S>
+class LifoQueue : public QueueBase<S>, public deque<S> {
+ public:
+ using deque<S>::front;
+ using deque<S>::push_front;
+ using deque<S>::pop_front;
+ using deque<S>::empty;
+ using deque<S>::clear;
+
+ typedef S StateId;
+
+ LifoQueue() : QueueBase<S>(LIFO_QUEUE) {}
+ StateId Head() const { return front(); }
+ void Enqueue(StateId s) { push_front(s); }
+ void Dequeue() { pop_front(); }
+ void Update(StateId s) {}
+ bool Empty() const { return empty(); }
+ void Clear() { clear(); }
+
+ private:
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual StateId Head_() const { return Head(); }
+ virtual void Enqueue_(StateId s) { Enqueue(s); }
+ virtual void Dequeue_() { Dequeue(); }
+ virtual void Update_(StateId s) { Update(s); }
+ virtual bool Empty_() const { return Empty(); }
+ virtual void Clear_() { return Clear(); }
+};
+
+
+// Shortest-first queue discipline, templated on the StateId and
+// comparison function object. Comparison function object COMP is
+// used to compare two StateIds. If a (single) state's order changes,
+// it can be reordered in the queue with a call to Update().
+// If 'update == false', call to Update() does not reorder the queue.
+template <typename S, typename C, bool update = true>
+class ShortestFirstQueue : public QueueBase<S> {
+ public:
+ typedef S StateId;
+ typedef C Compare;
+
+ ShortestFirstQueue(C comp)
+ : QueueBase<S>(SHORTEST_FIRST_QUEUE), heap_(comp) {}
+
+ StateId Head() const { return heap_.Top(); }
+
+ void Enqueue(StateId s) {
+ if (update) {
+ for (StateId i = key_.size(); i <= s; ++i)
+ key_.push_back(kNoKey);
+ key_[s] = heap_.Insert(s);
+ } else {
+ heap_.Insert(s);
+ }
+ }
+
+ void Dequeue() {
+ if (update)
+ key_[heap_.Pop()] = kNoKey;
+ else
+ heap_.Pop();
+ }
+
+ void Update(StateId s) {
+ if (!update)
+ return;
+ if (s >= key_.size() || key_[s] == kNoKey) {
+ Enqueue(s);
+ } else {
+ heap_.Update(key_[s], s);
+ }
+ }
+
+ bool Empty() const { return heap_.Empty(); }
+
+ void Clear() {
+ heap_.Clear();
+ if (update) key_.clear();
+ }
+
+ private:
+ Heap<S, C, false> heap_;
+ vector<ssize_t> key_;
+
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual StateId Head_() const { return Head(); }
+ virtual void Enqueue_(StateId s) { Enqueue(s); }
+ virtual void Dequeue_() { Dequeue(); }
+ virtual void Update_(StateId s) { Update(s); }
+ virtual bool Empty_() const { return Empty(); }
+ virtual void Clear_() { return Clear(); }
+};
+
+
+// Given a vector that maps from states to weights and a Less
+// comparison function object between weights, this class defines a
+// comparison function object between states.
+template <typename S, typename L>
+class StateWeightCompare {
+ public:
+ typedef L Less;
+ typedef typename L::Weight Weight;
+ typedef S StateId;
+
+ StateWeightCompare(const vector<Weight>& weights, const L &less)
+ : weights_(weights), less_(less) {}
+
+ bool operator()(const S x, const S y) const {
+ return less_(weights_[x], weights_[y]);
+ }
+
+ private:
+ const vector<Weight>& weights_;
+ L less_;
+};
+
+
+// Shortest-first queue discipline, templated on the StateId and Weight, is
+// specialized to use the weight's natural order for the comparison function.
+template <typename S, typename W>
+class NaturalShortestFirstQueue :
+ public ShortestFirstQueue<S, StateWeightCompare<S, NaturalLess<W> > > {
+ public:
+ typedef StateWeightCompare<S, NaturalLess<W> > C;
+
+ NaturalShortestFirstQueue(const vector<W> &distance) :
+ ShortestFirstQueue<S, C>(C(distance, less_)) {}
+
+ private:
+ NaturalLess<W> less_;
+};
+
+// Topological-order queue discipline, templated on the StateId.
+// States are ordered in the queue topologically. The FST must be acyclic.
+template <class S>
+class TopOrderQueue : public QueueBase<S> {
+ public:
+ typedef S StateId;
+
+ // This constructor computes the top. order. It accepts an arc filter
+ // to limit the transitions considered in that computation (e.g., only
+ // the epsilon graph).
+ template <class Arc, class ArcFilter>
+ TopOrderQueue(const Fst<Arc> &fst, ArcFilter filter)
+ : QueueBase<S>(TOP_ORDER_QUEUE), front_(0), back_(kNoStateId),
+ order_(0), state_(0) {
+ bool acyclic;
+ TopOrderVisitor<Arc> top_order_visitor(&order_, &acyclic);
+ DfsVisit(fst, &top_order_visitor, filter);
+ if (!acyclic) {
+ FSTERROR() << "TopOrderQueue: fst is not acyclic.";
+ QueueBase<S>::SetError(true);
+ }
+ state_.resize(order_.size(), kNoStateId);
+ }
+
+ // This constructor is passed the top. order, useful when we know it
+ // beforehand.
+ TopOrderQueue(const vector<StateId> &order)
+ : QueueBase<S>(TOP_ORDER_QUEUE), front_(0), back_(kNoStateId),
+ order_(order), state_(order.size(), kNoStateId) {}
+
+ StateId Head() const { return state_[front_]; }
+
+ void Enqueue(StateId s) {
+ if (front_ > back_) front_ = back_ = order_[s];
+ else if (order_[s] > back_) back_ = order_[s];
+ else if (order_[s] < front_) front_ = order_[s];
+ state_[order_[s]] = s;
+ }
+
+ void Dequeue() {
+ state_[front_] = kNoStateId;
+ while ((front_ <= back_) && (state_[front_] == kNoStateId)) ++front_;
+ }
+
+ void Update(StateId s) {}
+
+ bool Empty() const { return front_ > back_; }
+
+ void Clear() {
+ for (StateId i = front_; i <= back_; ++i) state_[i] = kNoStateId;
+ back_ = kNoStateId;
+ front_ = 0;
+ }
+
+ private:
+ StateId front_;
+ StateId back_;
+ vector<StateId> order_;
+ vector<StateId> state_;
+
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual StateId Head_() const { return Head(); }
+ virtual void Enqueue_(StateId s) { Enqueue(s); }
+ virtual void Dequeue_() { Dequeue(); }
+ virtual void Update_(StateId s) { Update(s); }
+ virtual bool Empty_() const { return Empty(); }
+ virtual void Clear_() { return Clear(); }
+};
+
+
+// State order queue discipline, templated on the StateId.
+// States are ordered in the queue by state Id.
+template <class S>
+class StateOrderQueue : public QueueBase<S> {
+public:
+ typedef S StateId;
+
+ StateOrderQueue()
+ : QueueBase<S>(STATE_ORDER_QUEUE), front_(0), back_(kNoStateId) {}
+
+ StateId Head() const { return front_; }
+
+ void Enqueue(StateId s) {
+ if (front_ > back_) front_ = back_ = s;
+ else if (s > back_) back_ = s;
+ else if (s < front_) front_ = s;
+ while (enqueued_.size() <= s) enqueued_.push_back(false);
+ enqueued_[s] = true;
+ }
+
+ void Dequeue() {
+ enqueued_[front_] = false;
+ while ((front_ <= back_) && (enqueued_[front_] == false)) ++front_;
+ }
+
+ void Update(StateId s) {}
+
+ bool Empty() const { return front_ > back_; }
+
+ void Clear() {
+ for (StateId i = front_; i <= back_; ++i) enqueued_[i] = false;
+ front_ = 0;
+ back_ = kNoStateId;
+ }
+
+private:
+ StateId front_;
+ StateId back_;
+ vector<bool> enqueued_;
+
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual StateId Head_() const { return Head(); }
+ virtual void Enqueue_(StateId s) { Enqueue(s); }
+ virtual void Dequeue_() { Dequeue(); }
+ virtual void Update_(StateId s) { Update(s); }
+ virtual bool Empty_() const { return Empty(); }
+ virtual void Clear_() { return Clear(); }
+
+};
+
+
+// SCC topological-order meta-queue discipline, templated on the StateId S
+// and a queue Q, which is used inside each SCC. It visits the SCC's
+// of an FST in topological order. Its constructor is passed the queues to
+// to use within an SCC.
+template <class S, class Q>
+class SccQueue : public QueueBase<S> {
+ public:
+ typedef S StateId;
+ typedef Q Queue;
+
+ // Constructor takes a vector specifying the SCC number per state
+ // and a vector giving the queue to use per SCC number.
+ SccQueue(const vector<StateId> &scc, vector<Queue*> *queue)
+ : QueueBase<S>(SCC_QUEUE), queue_(queue), scc_(scc), front_(0),
+ back_(kNoStateId) {}
+
+ StateId Head() const {
+ while ((front_ <= back_) &&
+ (((*queue_)[front_] && (*queue_)[front_]->Empty())
+ || (((*queue_)[front_] == 0) &&
+ ((front_ > trivial_queue_.size())
+ || (trivial_queue_[front_] == kNoStateId)))))
+ ++front_;
+ if ((*queue_)[front_])
+ return (*queue_)[front_]->Head();
+ else
+ return trivial_queue_[front_];
+ }
+
+ void Enqueue(StateId s) {
+ if (front_ > back_) front_ = back_ = scc_[s];
+ else if (scc_[s] > back_) back_ = scc_[s];
+ else if (scc_[s] < front_) front_ = scc_[s];
+ if ((*queue_)[scc_[s]]) {
+ (*queue_)[scc_[s]]->Enqueue(s);
+ } else {
+ while (trivial_queue_.size() <= scc_[s])
+ trivial_queue_.push_back(kNoStateId);
+ trivial_queue_[scc_[s]] = s;
+ }
+ }
+
+ void Dequeue() {
+ if ((*queue_)[front_])
+ (*queue_)[front_]->Dequeue();
+ else if (front_ < trivial_queue_.size())
+ trivial_queue_[front_] = kNoStateId;
+ }
+
+ void Update(StateId s) {
+ if ((*queue_)[scc_[s]])
+ (*queue_)[scc_[s]]->Update(s);
+ }
+
+ bool Empty() const {
+ if (front_ < back_) // Queue scc # back_ not empty unless back_==front_
+ return false;
+ else if (front_ > back_)
+ return true;
+ else if ((*queue_)[front_])
+ return (*queue_)[front_]->Empty();
+ else
+ return (front_ > trivial_queue_.size())
+ || (trivial_queue_[front_] == kNoStateId);
+ }
+
+ void Clear() {
+ for (StateId i = front_; i <= back_; ++i)
+ if ((*queue_)[i])
+ (*queue_)[i]->Clear();
+ else if (i < trivial_queue_.size())
+ trivial_queue_[i] = kNoStateId;
+ front_ = 0;
+ back_ = kNoStateId;
+ }
+
+private:
+ vector<Queue*> *queue_;
+ const vector<StateId> &scc_;
+ mutable StateId front_;
+ StateId back_;
+ vector<StateId> trivial_queue_;
+
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual StateId Head_() const { return Head(); }
+ virtual void Enqueue_(StateId s) { Enqueue(s); }
+ virtual void Dequeue_() { Dequeue(); }
+ virtual void Update_(StateId s) { Update(s); }
+ virtual bool Empty_() const { return Empty(); }
+ virtual void Clear_() { return Clear(); }
+
+ DISALLOW_COPY_AND_ASSIGN(SccQueue);
+};
+
+
+// Automatic queue discipline, templated on the StateId. It selects a
+// queue discipline for a given FST based on its properties.
+template <class S>
+class AutoQueue : public QueueBase<S> {
+public:
+ typedef S StateId;
+
+ // This constructor takes a state distance vector that, if non-null and if
+ // the Weight type has the path property, will entertain the
+ // shortest-first queue using the natural order w.r.t to the distance.
+ template <class Arc, class ArcFilter>
+ AutoQueue(const Fst<Arc> &fst, const vector<typename Arc::Weight> *distance,
+ ArcFilter filter) : QueueBase<S>(AUTO_QUEUE) {
+ typedef typename Arc::Weight Weight;
+ typedef StateWeightCompare< StateId, NaturalLess<Weight> > Compare;
+
+ // First check if the FST is known to have these properties.
+ uint64 props = fst.Properties(kAcyclic | kCyclic |
+ kTopSorted | kUnweighted, false);
+ if ((props & kTopSorted) || fst.Start() == kNoStateId) {
+ queue_ = new StateOrderQueue<StateId>();
+ VLOG(2) << "AutoQueue: using state-order discipline";
+ } else if (props & kAcyclic) {
+ queue_ = new TopOrderQueue<StateId>(fst, filter);
+ VLOG(2) << "AutoQueue: using top-order discipline";
+ } else if ((props & kUnweighted) && (Weight::Properties() & kIdempotent)) {
+ queue_ = new LifoQueue<StateId>();
+ VLOG(2) << "AutoQueue: using LIFO discipline";
+ } else {
+ uint64 properties;
+ // Decompose into strongly-connected components.
+ SccVisitor<Arc> scc_visitor(&scc_, 0, 0, &properties);
+ DfsVisit(fst, &scc_visitor, filter);
+ StateId nscc = *max_element(scc_.begin(), scc_.end()) + 1;
+ vector<QueueType> queue_types(nscc);
+ NaturalLess<Weight> *less = 0;
+ Compare *comp = 0;
+ if (distance && (Weight::Properties() & kPath)) {
+ less = new NaturalLess<Weight>;
+ comp = new Compare(*distance, *less);
+ }
+ // Find the queue type to use per SCC.
+ bool unweighted;
+ bool all_trivial;
+ SccQueueType(fst, scc_, &queue_types, filter, less, &all_trivial,
+ &unweighted);
+ // If unweighted and semiring is idempotent, use lifo queue.
+ if (unweighted) {
+ queue_ = new LifoQueue<StateId>();
+ VLOG(2) << "AutoQueue: using LIFO discipline";
+ delete comp;
+ delete less;
+ return;
+ }
+ // If all the scc are trivial, FST is acyclic and the scc# gives
+ // the topological order.
+ if (all_trivial) {
+ queue_ = new TopOrderQueue<StateId>(scc_);
+ VLOG(2) << "AutoQueue: using top-order discipline";
+ delete comp;
+ delete less;
+ return;
+ }
+ VLOG(2) << "AutoQueue: using SCC meta-discipline";
+ queues_.resize(nscc);
+ for (StateId i = 0; i < nscc; ++i) {
+ switch(queue_types[i]) {
+ case TRIVIAL_QUEUE:
+ queues_[i] = 0;
+ VLOG(3) << "AutoQueue: SCC #" << i
+ << ": using trivial discipline";
+ break;
+ case SHORTEST_FIRST_QUEUE:
+ queues_[i] = new ShortestFirstQueue<StateId, Compare, false>(*comp);
+ VLOG(3) << "AutoQueue: SCC #" << i <<
+ ": using shortest-first discipline";
+ break;
+ case LIFO_QUEUE:
+ queues_[i] = new LifoQueue<StateId>();
+ VLOG(3) << "AutoQueue: SCC #" << i
+ << ": using LIFO disciplle";
+ break;
+ case FIFO_QUEUE:
+ default:
+ queues_[i] = new FifoQueue<StateId>();
+ VLOG(3) << "AutoQueue: SCC #" << i
+ << ": using FIFO disciplle";
+ break;
+ }
+ }
+ queue_ = new SccQueue< StateId, QueueBase<StateId> >(scc_, &queues_);
+ delete comp;
+ delete less;
+ }
+ }
+
+ ~AutoQueue() {
+ for (StateId i = 0; i < queues_.size(); ++i)
+ delete queues_[i];
+ delete queue_;
+ }
+
+ StateId Head() const { return queue_->Head(); }
+
+ void Enqueue(StateId s) { queue_->Enqueue(s); }
+
+ void Dequeue() { queue_->Dequeue(); }
+
+ void Update(StateId s) { queue_->Update(s); }
+
+ bool Empty() const { return queue_->Empty(); }
+
+ void Clear() { queue_->Clear(); }
+
+
+ private:
+ QueueBase<StateId> *queue_;
+ vector< QueueBase<StateId>* > queues_;
+ vector<StateId> scc_;
+
+ template <class Arc, class ArcFilter, class Less>
+ static void SccQueueType(const Fst<Arc> &fst,
+ const vector<StateId> &scc,
+ vector<QueueType> *queue_types,
+ ArcFilter filter, Less *less,
+ bool *all_trivial, bool *unweighted);
+
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual StateId Head_() const { return Head(); }
+
+ virtual void Enqueue_(StateId s) { Enqueue(s); }
+
+ virtual void Dequeue_() { Dequeue(); }
+
+ virtual void Update_(StateId s) { Update(s); }
+
+ virtual bool Empty_() const { return Empty(); }
+
+ virtual void Clear_() { return Clear(); }
+
+ DISALLOW_COPY_AND_ASSIGN(AutoQueue);
+};
+
+
+// Examines the states in an Fst's strongly connected components and
+// determines which type of queue to use per SCC. Stores result in
+// vector QUEUE_TYPES, which is assumed to have length equal to the
+// number of SCCs. An arc filter is used to limit the transitions
+// considered (e.g., only the epsilon graph). ALL_TRIVIAL is set
+// to true if every queue is the trivial queue. UNWEIGHTED is set to
+// true if the semiring is idempotent and all the arc weights are equal to
+// Zero() or One().
+template <class StateId>
+template <class A, class ArcFilter, class Less>
+void AutoQueue<StateId>::SccQueueType(const Fst<A> &fst,
+ const vector<StateId> &scc,
+ vector<QueueType> *queue_type,
+ ArcFilter filter, Less *less,
+ bool *all_trivial, bool *unweighted) {
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ *all_trivial = true;
+ *unweighted = true;
+
+ for (StateId i = 0; i < queue_type->size(); ++i)
+ (*queue_type)[i] = TRIVIAL_QUEUE;
+
+ for (StateIterator< Fst<Arc> > sit(fst); !sit.Done(); sit.Next()) {
+ StateId state = sit.Value();
+ for (ArcIterator< Fst<Arc> > ait(fst, state);
+ !ait.Done();
+ ait.Next()) {
+ const Arc &arc = ait.Value();
+ if (!filter(arc)) continue;
+ if (scc[state] == scc[arc.nextstate]) {
+ QueueType &type = (*queue_type)[scc[state]];
+ if (!less || ((*less)(arc.weight, Weight::One())))
+ type = FIFO_QUEUE;
+ else if ((type == TRIVIAL_QUEUE) || (type == LIFO_QUEUE)) {
+ if (!(Weight::Properties() & kIdempotent) ||
+ (arc.weight != Weight::Zero() && arc.weight != Weight::One()))
+ type = SHORTEST_FIRST_QUEUE;
+ else
+ type = LIFO_QUEUE;
+ }
+ if (type != TRIVIAL_QUEUE) *all_trivial = false;
+ }
+ if (!(Weight::Properties() & kIdempotent) ||
+ (arc.weight != Weight::Zero() && arc.weight != Weight::One()))
+ *unweighted = false;
+ }
+ }
+}
+
+
+// An A* estimate is a function object that maps from a state ID to a
+// an estimate of the shortest distance to the final states.
+// The trivial A* estimate is always One().
+template <typename S, typename W>
+struct TrivialAStarEstimate {
+ W operator()(S s) const { return W::One(); }
+};
+
+
+// Given a vector that maps from states to weights representing the
+// shortest distance from the initial state, a Less comparison
+// function object between weights, and an estimate E of the
+// shortest distance to the final states, this class defines a
+// comparison function object between states.
+template <typename S, typename L, typename E>
+class AStarWeightCompare {
+ public:
+ typedef L Less;
+ typedef typename L::Weight Weight;
+ typedef S StateId;
+
+ AStarWeightCompare(const vector<Weight>& weights, const L &less,
+ const E &estimate)
+ : weights_(weights), less_(less), estimate_(estimate) {}
+
+ bool operator()(const S x, const S y) const {
+ Weight wx = Times(weights_[x], estimate_(x));
+ Weight wy = Times(weights_[y], estimate_(y));
+ return less_(wx, wy);
+ }
+
+ private:
+ const vector<Weight>& weights_;
+ L less_;
+ const E &estimate_;
+};
+
+
+// A* queue discipline, templated on the StateId, Weight and an
+// estimate E of the shortest distance to the final states, is specialized
+// to use the weight's natural order for the comparison function.
+template <typename S, typename W, typename E>
+class NaturalAStarQueue :
+ public ShortestFirstQueue<S, AStarWeightCompare<S, NaturalLess<W>, E> > {
+ public:
+ typedef AStarWeightCompare<S, NaturalLess<W>, E> C;
+
+ NaturalAStarQueue(const vector<W> &distance, const E &estimate) :
+ ShortestFirstQueue<S, C>(C(distance, less_, estimate)) {}
+
+ private:
+ NaturalLess<W> less_;
+};
+
+
+// A state equivalence class is a function object that
+// maps from a state ID to an equivalence class (state) ID.
+// The trivial equivalence class maps a state to itself.
+template <typename S>
+struct TrivialStateEquivClass {
+ S operator()(S s) const { return s; }
+};
+
+
+// Pruning queue discipline: Enqueues a state 's' only when its
+// shortest distance (so far), as specified by 'distance', is less
+// than (as specified by 'comp') the shortest distance Times() the
+// 'threshold' to any state in the same equivalence class, as
+// specified by the function object 'class_func'. The underlying
+// queue discipline is specified by 'queue'. The ownership of 'queue'
+// is given to this class.
+template <typename Q, typename L, typename C>
+class PruneQueue : public QueueBase<typename Q::StateId> {
+ public:
+ typedef typename Q::StateId StateId;
+ typedef typename L::Weight Weight;
+
+ PruneQueue(const vector<Weight> &distance, Q *queue, L comp,
+ const C &class_func, Weight threshold)
+ : QueueBase<StateId>(OTHER_QUEUE),
+ distance_(distance),
+ queue_(queue),
+ less_(comp),
+ class_func_(class_func),
+ threshold_(threshold) {}
+
+ ~PruneQueue() { delete queue_; }
+
+ StateId Head() const { return queue_->Head(); }
+
+ void Enqueue(StateId s) {
+ StateId c = class_func_(s);
+ if (c >= class_distance_.size())
+ class_distance_.resize(c + 1, Weight::Zero());
+ if (less_(distance_[s], class_distance_[c]))
+ class_distance_[c] = distance_[s];
+
+ // Enqueue only if below threshold limit
+ Weight limit = Times(class_distance_[c], threshold_);
+ if (less_(distance_[s], limit))
+ queue_->Enqueue(s);
+ }
+
+ void Dequeue() { queue_->Dequeue(); }
+
+ void Update(StateId s) {
+ StateId c = class_func_(s);
+ if (less_(distance_[s], class_distance_[c]))
+ class_distance_[c] = distance_[s];
+ queue_->Update(s);
+ }
+
+ bool Empty() const { return queue_->Empty(); }
+ void Clear() { queue_->Clear(); }
+
+ private:
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual StateId Head_() const { return Head(); }
+ virtual void Enqueue_(StateId s) { Enqueue(s); }
+ virtual void Dequeue_() { Dequeue(); }
+ virtual void Update_(StateId s) { Update(s); }
+ virtual bool Empty_() const { return Empty(); }
+ virtual void Clear_() { return Clear(); }
+
+ const vector<Weight> &distance_; // shortest distance to state
+ Q *queue_;
+ L less_;
+ const C &class_func_; // eqv. class function object
+ Weight threshold_; // pruning weight threshold
+ vector<Weight> class_distance_; // shortest distance to class
+
+ DISALLOW_COPY_AND_ASSIGN(PruneQueue);
+};
+
+
+// Pruning queue discipline (see above) using the weight's natural
+// order for the comparison function. The ownership of 'queue' is
+// given to this class.
+template <typename Q, typename W, typename C>
+class NaturalPruneQueue :
+ public PruneQueue<Q, NaturalLess<W>, C> {
+ public:
+ typedef typename Q::StateId StateId;
+ typedef W Weight;
+
+ NaturalPruneQueue(const vector<W> &distance, Q *queue,
+ const C &class_func_, Weight threshold) :
+ PruneQueue<Q, NaturalLess<W>, C>(distance, queue, less_,
+ class_func_, threshold) {}
+
+ private:
+ NaturalLess<W> less_;
+};
+
+
+} // namespace fst
+
+#endif
diff --git a/src/include/fst/randequivalent.h b/src/include/fst/randequivalent.h
new file mode 100644
index 0000000..1aaccf7
--- /dev/null
+++ b/src/include/fst/randequivalent.h
@@ -0,0 +1,135 @@
+// randequivalent.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Tests if two FSTS are equivalent by checking if random
+// strings from one FST are transduced the same by both FSTs.
+
+#ifndef FST_RANDEQUIVALENT_H__
+#define FST_RANDEQUIVALENT_H__
+
+#include <fst/arcsort.h>
+#include <fst/compose.h>
+#include <fst/project.h>
+#include <fst/randgen.h>
+#include <fst/shortest-distance.h>
+#include <fst/vector-fst.h>
+
+
+namespace fst {
+
+// Test if two FSTs are equivalent by randomly generating 'num_paths'
+// paths (as specified by the RandGenOptions 'opts') in these FSTs.
+//
+// For each randomly generated path, the algorithm computes for each
+// of the two FSTs the sum of the weights of all the successful paths
+// sharing the same input and output labels as the considered randomly
+// generated path and checks that these two values are within
+// 'delta'. Returns optional error value (when FLAGS_error_fatal = false).
+template<class Arc, class ArcSelector>
+bool RandEquivalent(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
+ ssize_t num_paths, float delta,
+ const RandGenOptions<ArcSelector> &opts,
+ bool *error = 0) {
+ typedef typename Arc::Weight Weight;
+ if (error) *error = false;
+
+ // Check that the symbol table are compatible
+ if (!CompatSymbols(fst1.InputSymbols(), fst2.InputSymbols()) ||
+ !CompatSymbols(fst1.OutputSymbols(), fst2.OutputSymbols())) {
+ FSTERROR() << "RandEquivalent: input/output symbol tables of 1st "
+ << "argument do not match input/output symbol tables of 2nd "
+ << "argument";
+ if (error) *error = true;
+ return false;
+ }
+
+ ILabelCompare<Arc> icomp;
+ OLabelCompare<Arc> ocomp;
+ VectorFst<Arc> sfst1(fst1);
+ VectorFst<Arc> sfst2(fst2);
+ Connect(&sfst1);
+ Connect(&sfst2);
+ ArcSort(&sfst1, icomp);
+ ArcSort(&sfst2, icomp);
+
+ bool ret = true;
+ for (ssize_t n = 0; n < num_paths; ++n) {
+ VectorFst<Arc> path;
+ const Fst<Arc> &fst = rand() % 2 ? sfst1 : sfst2;
+ RandGen(fst, &path, opts);
+
+ VectorFst<Arc> ipath(path);
+ VectorFst<Arc> opath(path);
+ Project(&ipath, PROJECT_INPUT);
+ Project(&opath, PROJECT_OUTPUT);
+
+ VectorFst<Arc> cfst1, pfst1;
+ Compose(ipath, sfst1, &cfst1);
+ ArcSort(&cfst1, ocomp);
+ Compose(cfst1, opath, &pfst1);
+ // Give up if there are epsilon cycles in a non-idempotent semiring
+ if (!(Weight::Properties() & kIdempotent) &&
+ pfst1.Properties(kCyclic, true))
+ continue;
+ Weight sum1 = ShortestDistance(pfst1);
+
+ VectorFst<Arc> cfst2, pfst2;
+ Compose(ipath, sfst2, &cfst2);
+ ArcSort(&cfst2, ocomp);
+ Compose(cfst2, opath, &pfst2);
+ // Give up if there are epsilon cycles in a non-idempotent semiring
+ if (!(Weight::Properties() & kIdempotent) &&
+ pfst2.Properties(kCyclic, true))
+ continue;
+ Weight sum2 = ShortestDistance(pfst2);
+
+ if (!ApproxEqual(sum1, sum2, delta)) {
+ VLOG(1) << "Sum1 = " << sum1;
+ VLOG(1) << "Sum2 = " << sum2;
+ ret = false;
+ break;
+ }
+ }
+
+ if (fst1.Properties(kError, false) || fst2.Properties(kError, false)) {
+ if (error) *error = true;
+ return false;
+ }
+
+ return ret;
+}
+
+
+// Test if two FSTs are equivalent by randomly generating 'num_paths' paths
+// of length no more than 'path_length' using the seed 'seed' in these FSTs.
+// Returns optional error value (when FLAGS_error_fatal = false).
+template <class Arc>
+bool RandEquivalent(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
+ ssize_t num_paths, float delta = kDelta,
+ int seed = time(0), int path_length = INT_MAX,
+ bool *error = 0) {
+ UniformArcSelector<Arc> uniform_selector(seed);
+ RandGenOptions< UniformArcSelector<Arc> >
+ opts(uniform_selector, path_length);
+ return RandEquivalent(fst1, fst2, num_paths, delta, opts, error);
+}
+
+
+} // namespace fst
+
+#endif // FST_LIB_RANDEQUIVALENT_H__
diff --git a/src/include/fst/randgen.h b/src/include/fst/randgen.h
new file mode 100644
index 0000000..82ddffa
--- /dev/null
+++ b/src/include/fst/randgen.h
@@ -0,0 +1,712 @@
+// randgen.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Classes and functions to generate random paths through an FST.
+
+#ifndef FST_LIB_RANDGEN_H__
+#define FST_LIB_RANDGEN_H__
+
+#include <cmath>
+#include <cstdlib>
+#include <ctime>
+#include <map>
+
+#include <fst/accumulator.h>
+#include <fst/cache.h>
+#include <fst/dfs-visit.h>
+#include <fst/mutable-fst.h>
+
+namespace fst {
+
+//
+// ARC SELECTORS - these function objects are used to select a random
+// transition to take from an FST's state. They should return a number
+// N s.t. 0 <= N <= NumArcs(). If N < NumArcs(), then the N-th
+// transition is selected. If N == NumArcs(), then the final weight at
+// that state is selected (i.e., the 'super-final' transition is selected).
+// It can be assumed these will not be called unless either there
+// are transitions leaving the state and/or the state is final.
+//
+
+// Randomly selects a transition using the uniform distribution.
+template <class A>
+struct UniformArcSelector {
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ UniformArcSelector(int seed = time(0)) { srand(seed); }
+
+ size_t operator()(const Fst<A> &fst, StateId s) const {
+ double r = rand()/(RAND_MAX + 1.0);
+ size_t n = fst.NumArcs(s);
+ if (fst.Final(s) != Weight::Zero())
+ ++n;
+ return static_cast<size_t>(r * n);
+ }
+};
+
+
+// Randomly selects a transition w.r.t. the weights treated as negative
+// log probabilities after normalizing for the total weight leaving
+// the state. Weight::zero transitions are disregarded.
+// Assumes Weight::Value() accesses the floating point
+// representation of the weight.
+template <class A>
+class LogProbArcSelector {
+ public:
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ LogProbArcSelector(int seed = time(0)) { srand(seed); }
+
+ size_t operator()(const Fst<A> &fst, StateId s) const {
+ // Find total weight leaving state
+ double sum = 0.0;
+ for (ArcIterator< Fst<A> > aiter(fst, s); !aiter.Done();
+ aiter.Next()) {
+ const A &arc = aiter.Value();
+ sum += exp(-to_log_weight_(arc.weight).Value());
+ }
+ sum += exp(-to_log_weight_(fst.Final(s)).Value());
+
+ double r = rand()/(RAND_MAX + 1.0);
+ double p = 0.0;
+ int n = 0;
+ for (ArcIterator< Fst<A> > aiter(fst, s); !aiter.Done();
+ aiter.Next(), ++n) {
+ const A &arc = aiter.Value();
+ p += exp(-to_log_weight_(arc.weight).Value());
+ if (p > r * sum) return n;
+ }
+ return n;
+ }
+
+ private:
+ WeightConvert<Weight, Log64Weight> to_log_weight_;
+};
+
+// Convenience definitions
+typedef LogProbArcSelector<StdArc> StdArcSelector;
+typedef LogProbArcSelector<LogArc> LogArcSelector;
+
+
+// Same as LogProbArcSelector but use CacheLogAccumulator to cache
+// the cummulative weight computations.
+template <class A>
+class FastLogProbArcSelector : public LogProbArcSelector<A> {
+ public:
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+ using LogProbArcSelector<A>::operator();
+
+ FastLogProbArcSelector(int seed = time(0))
+ : LogProbArcSelector<A>(seed),
+ seed_(seed) {}
+
+ size_t operator()(const Fst<A> &fst, StateId s,
+ CacheLogAccumulator<A> *accumulator) const {
+ accumulator->SetState(s);
+ ArcIterator< Fst<A> > aiter(fst, s);
+ // Find total weight leaving state
+ double sum = to_log_weight_(accumulator->Sum(fst.Final(s), &aiter, 0,
+ fst.NumArcs(s))).Value();
+ double r = -log(rand()/(RAND_MAX + 1.0));
+ return accumulator->LowerBound(r + sum, &aiter);
+ }
+
+ int Seed() const { return seed_; }
+ private:
+ int seed_;
+ WeightConvert<Weight, Log64Weight> to_log_weight_;
+};
+
+// Random path state info maintained by RandGenFst and passed to samplers.
+template <typename A>
+struct RandState {
+ typedef typename A::StateId StateId;
+
+ StateId state_id; // current input FST state
+ size_t nsamples; // # of samples to be sampled at this state
+ size_t length; // length of path to this random state
+ size_t select; // previous sample arc selection
+ const RandState<A> *parent; // previous random state on this path
+
+ RandState(StateId s, size_t n, size_t l, size_t k, const RandState<A> *p)
+ : state_id(s), nsamples(n), length(l), select(k), parent(p) {}
+
+ RandState()
+ : state_id(kNoStateId), nsamples(0), length(0), select(0), parent(0) {}
+};
+
+// This class, given an arc selector, samples, with raplacement,
+// multiple random transitions from an FST's state. This is a generic
+// version with a straight-forward use of the arc selector.
+// Specializations may be defined for arc selectors for greater
+// efficiency or special behavior.
+template <class A, class S>
+class ArcSampler {
+ public:
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ // The 'max_length' may be interpreted (including ignored) by a
+ // sampler as it chooses. This generic version interprets this literally.
+ ArcSampler(const Fst<A> &fst, const S &arc_selector,
+ int max_length = INT_MAX)
+ : fst_(fst),
+ arc_selector_(arc_selector),
+ max_length_(max_length) {}
+
+ // Allow updating Fst argument; pass only if changed.
+ ArcSampler(const ArcSampler<A, S> &sampler, const Fst<A> *fst = 0)
+ : fst_(fst ? *fst : sampler.fst_),
+ arc_selector_(sampler.arc_selector_),
+ max_length_(sampler.max_length_) {
+ Reset();
+ }
+
+ // Samples 'rstate.nsamples' from state 'state_id'. The 'rstate.length' is
+ // the length of the path to 'rstate'. Returns true if samples were
+ // collected. No samples may be collected if either there are no (including
+ // 'super-final') transitions leaving that state or if the
+ // 'max_length' has been deemed reached. Use the iterator members to
+ // read the samples. The samples will be in their original order.
+ bool Sample(const RandState<A> &rstate) {
+ sample_map_.clear();
+ if ((fst_.NumArcs(rstate.state_id) == 0 &&
+ fst_.Final(rstate.state_id) == Weight::Zero()) ||
+ rstate.length == max_length_) {
+ Reset();
+ return false;
+ }
+
+ for (size_t i = 0; i < rstate.nsamples; ++i)
+ ++sample_map_[arc_selector_(fst_, rstate.state_id)];
+ Reset();
+ return true;
+ }
+
+ // More samples?
+ bool Done() const { return sample_iter_ == sample_map_.end(); }
+
+ // Gets the next sample.
+ void Next() { ++sample_iter_; }
+
+ // Returns a pair (N, K) where 0 <= N <= NumArcs(s) and 0 < K <= nsamples.
+ // If N < NumArcs(s), then the N-th transition is specified.
+ // If N == NumArcs(s), then the final weight at that state is
+ // specified (i.e., the 'super-final' transition is specified).
+ // For the specified transition, K repetitions have been sampled.
+ pair<size_t, size_t> Value() const { return *sample_iter_; }
+
+ void Reset() { sample_iter_ = sample_map_.begin(); }
+
+ bool Error() const { return false; }
+
+ private:
+ const Fst<A> &fst_;
+ const S &arc_selector_;
+ int max_length_;
+
+ // Stores (N, K) as described for Value().
+ map<size_t, size_t> sample_map_;
+ map<size_t, size_t>::const_iterator sample_iter_;
+
+ // disallow
+ ArcSampler<A, S> & operator=(const ArcSampler<A, S> &s);
+};
+
+
+// Specialization for FastLogProbArcSelector.
+template <class A>
+class ArcSampler<A, FastLogProbArcSelector<A> > {
+ public:
+ typedef FastLogProbArcSelector<A> S;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+ typedef CacheLogAccumulator<A> C;
+
+ ArcSampler(const Fst<A> &fst, const S &arc_selector, int max_length = INT_MAX)
+ : fst_(fst),
+ arc_selector_(arc_selector),
+ max_length_(max_length),
+ accumulator_(new C()) {
+ accumulator_->Init(fst);
+ }
+
+ ArcSampler(const ArcSampler<A, S> &sampler, const Fst<A> *fst = 0)
+ : fst_(fst ? *fst : sampler.fst_),
+ arc_selector_(sampler.arc_selector_),
+ max_length_(sampler.max_length_) {
+ if (fst) {
+ accumulator_ = new C();
+ accumulator_->Init(*fst);
+ } else { // shallow copy
+ accumulator_ = new C(*sampler.accumulator_);
+ }
+ }
+
+ ~ArcSampler() {
+ delete accumulator_;
+ }
+
+ bool Sample(const RandState<A> &rstate) {
+ sample_map_.clear();
+ if ((fst_.NumArcs(rstate.state_id) == 0 &&
+ fst_.Final(rstate.state_id) == Weight::Zero()) ||
+ rstate.length == max_length_) {
+ Reset();
+ return false;
+ }
+
+ for (size_t i = 0; i < rstate.nsamples; ++i)
+ ++sample_map_[arc_selector_(fst_, rstate.state_id, accumulator_)];
+ Reset();
+ return true;
+ }
+
+ bool Done() const { return sample_iter_ == sample_map_.end(); }
+ void Next() { ++sample_iter_; }
+ pair<size_t, size_t> Value() const { return *sample_iter_; }
+ void Reset() { sample_iter_ = sample_map_.begin(); }
+
+ bool Error() const { return accumulator_->Error(); }
+
+ private:
+ const Fst<A> &fst_;
+ const S &arc_selector_;
+ int max_length_;
+
+ // Stores (N, K) as described for Value().
+ map<size_t, size_t> sample_map_;
+ map<size_t, size_t>::const_iterator sample_iter_;
+ C *accumulator_;
+
+ // disallow
+ ArcSampler<A, S> & operator=(const ArcSampler<A, S> &s);
+};
+
+
+// Options for random path generation with RandGenFst. The template argument
+// is an arc sampler, typically class 'ArcSampler' above. Ownership of
+// the sampler is taken by RandGenFst.
+template <class S>
+struct RandGenFstOptions : public CacheOptions {
+ S *arc_sampler; // How to sample transitions at a state
+ size_t npath; // # of paths to generate
+ bool weighted; // Output tree weighted by path count; o.w.
+ // output unweighted DAG
+ bool remove_total_weight; // Remove total weight when output is weighted.
+
+ RandGenFstOptions(const CacheOptions &copts, S *samp,
+ size_t n = 1, bool w = true, bool rw = false)
+ : CacheOptions(copts),
+ arc_sampler(samp),
+ npath(n),
+ weighted(w),
+ remove_total_weight(rw) {}
+};
+
+
+// Implementation of RandGenFst.
+template <class A, class B, class S>
+class RandGenFstImpl : public CacheImpl<B> {
+ public:
+ using FstImpl<B>::SetType;
+ using FstImpl<B>::SetProperties;
+ using FstImpl<B>::SetInputSymbols;
+ using FstImpl<B>::SetOutputSymbols;
+
+ using CacheBaseImpl< CacheState<B> >::AddArc;
+ using CacheBaseImpl< CacheState<B> >::HasArcs;
+ using CacheBaseImpl< CacheState<B> >::HasFinal;
+ using CacheBaseImpl< CacheState<B> >::HasStart;
+ using CacheBaseImpl< CacheState<B> >::SetArcs;
+ using CacheBaseImpl< CacheState<B> >::SetFinal;
+ using CacheBaseImpl< CacheState<B> >::SetStart;
+
+ typedef B Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ RandGenFstImpl(const Fst<A> &fst, const RandGenFstOptions<S> &opts)
+ : CacheImpl<B>(opts),
+ fst_(fst.Copy()),
+ arc_sampler_(opts.arc_sampler),
+ npath_(opts.npath),
+ weighted_(opts.weighted),
+ remove_total_weight_(opts.remove_total_weight),
+ superfinal_(kNoLabel) {
+ SetType("randgen");
+
+ uint64 props = fst.Properties(kFstProperties, false);
+ SetProperties(RandGenProperties(props, weighted_), kCopyProperties);
+
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ }
+
+ RandGenFstImpl(const RandGenFstImpl &impl)
+ : CacheImpl<B>(impl),
+ fst_(impl.fst_->Copy(true)),
+ arc_sampler_(new S(*impl.arc_sampler_, fst_)),
+ npath_(impl.npath_),
+ weighted_(impl.weighted_),
+ superfinal_(kNoLabel) {
+ SetType("randgen");
+ SetProperties(impl.Properties(), kCopyProperties);
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ ~RandGenFstImpl() {
+ for (int i = 0; i < state_table_.size(); ++i)
+ delete state_table_[i];
+ delete fst_;
+ delete arc_sampler_;
+ }
+
+ StateId Start() {
+ if (!HasStart()) {
+ StateId s = fst_->Start();
+ if (s == kNoStateId)
+ return kNoStateId;
+ StateId start = state_table_.size();
+ SetStart(start);
+ RandState<A> *rstate = new RandState<A>(s, npath_, 0, 0, 0);
+ state_table_.push_back(rstate);
+ }
+ return CacheImpl<B>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ Expand(s);
+ }
+ return CacheImpl<B>::Final(s);
+ }
+
+ size_t NumArcs(StateId s) {
+ if (!HasArcs(s)) {
+ Expand(s);
+ }
+ return CacheImpl<B>::NumArcs(s);
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<B>::NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<B>::NumOutputEpsilons(s);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) &&
+ (fst_->Properties(kError, false) || arc_sampler_->Error())) {
+ SetProperties(kError, kError);
+ }
+ return FstImpl<Arc>::Properties(mask);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<B> *data) {
+ if (!HasArcs(s))
+ Expand(s);
+ CacheImpl<B>::InitArcIterator(s, data);
+ }
+
+ // Computes the outgoing transitions from a state, creating new destination
+ // states as needed.
+ void Expand(StateId s) {
+ if (s == superfinal_) {
+ SetFinal(s, Weight::One());
+ SetArcs(s);
+ return;
+ }
+
+ SetFinal(s, Weight::Zero());
+ const RandState<A> &rstate = *state_table_[s];
+ arc_sampler_->Sample(rstate);
+ ArcIterator< Fst<A> > aiter(*fst_, rstate.state_id);
+ size_t narcs = fst_->NumArcs(rstate.state_id);
+ for (;!arc_sampler_->Done(); arc_sampler_->Next()) {
+ const pair<size_t, size_t> &sample_pair = arc_sampler_->Value();
+ size_t pos = sample_pair.first;
+ size_t count = sample_pair.second;
+ double prob = static_cast<double>(count)/rstate.nsamples;
+ if (pos < narcs) { // regular transition
+ aiter.Seek(sample_pair.first);
+ const A &aarc = aiter.Value();
+ Weight weight = weighted_ ? to_weight_(-log(prob)) : Weight::One();
+ B barc(aarc.ilabel, aarc.olabel, weight, state_table_.size());
+ AddArc(s, barc);
+ RandState<A> *nrstate =
+ new RandState<A>(aarc.nextstate, count, rstate.length + 1,
+ pos, &rstate);
+ state_table_.push_back(nrstate);
+ } else { // super-final transition
+ if (weighted_) {
+ Weight weight = remove_total_weight_ ?
+ to_weight_(-log(prob)) : to_weight_(-log(prob * npath_));
+ SetFinal(s, weight);
+ } else {
+ if (superfinal_ == kNoLabel) {
+ superfinal_ = state_table_.size();
+ RandState<A> *nrstate = new RandState<A>(kNoStateId, 0, 0, 0, 0);
+ state_table_.push_back(nrstate);
+ }
+ for (size_t n = 0; n < count; ++n) {
+ B barc(0, 0, Weight::One(), superfinal_);
+ AddArc(s, barc);
+ }
+ }
+ }
+ }
+ SetArcs(s);
+ }
+
+ private:
+ Fst<A> *fst_;
+ S *arc_sampler_;
+ size_t npath_;
+ vector<RandState<A> *> state_table_;
+ bool weighted_;
+ bool remove_total_weight_;
+ StateId superfinal_;
+ WeightConvert<Log64Weight, Weight> to_weight_;
+
+ void operator=(const RandGenFstImpl<A, B, S> &); // disallow
+};
+
+
+// Fst class to randomly generate paths through an FST; details controlled
+// by RandGenOptionsFst. Output format is a tree weighted by the
+// path count.
+template <class A, class B, class S>
+class RandGenFst : public ImplToFst< RandGenFstImpl<A, B, S> > {
+ public:
+ friend class ArcIterator< RandGenFst<A, B, S> >;
+ friend class StateIterator< RandGenFst<A, B, S> >;
+ typedef B Arc;
+ typedef S Sampler;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<B> State;
+ typedef RandGenFstImpl<A, B, S> Impl;
+
+ RandGenFst(const Fst<A> &fst, const RandGenFstOptions<S> &opts)
+ : ImplToFst<Impl>(new Impl(fst, opts)) {}
+
+ // See Fst<>::Copy() for doc.
+ RandGenFst(const RandGenFst<A, B, S> &fst, bool safe = false)
+ : ImplToFst<Impl>(fst, safe) {}
+
+ // Get a copy of this RandGenFst. See Fst<>::Copy() for further doc.
+ virtual RandGenFst<A, B, S> *Copy(bool safe = false) const {
+ return new RandGenFst<A, B, S>(*this, safe);
+ }
+
+ virtual inline void InitStateIterator(StateIteratorData<B> *data) const;
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<B> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const RandGenFst<A, B, S> &fst); // Disallow
+};
+
+
+
+// Specialization for RandGenFst.
+template <class A, class B, class S>
+class StateIterator< RandGenFst<A, B, S> >
+ : public CacheStateIterator< RandGenFst<A, B, S> > {
+ public:
+ explicit StateIterator(const RandGenFst<A, B, S> &fst)
+ : CacheStateIterator< RandGenFst<A, B, S> >(fst, fst.GetImpl()) {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(StateIterator);
+};
+
+
+// Specialization for RandGenFst.
+template <class A, class B, class S>
+class ArcIterator< RandGenFst<A, B, S> >
+ : public CacheArcIterator< RandGenFst<A, B, S> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const RandGenFst<A, B, S> &fst, StateId s)
+ : CacheArcIterator< RandGenFst<A, B, S> >(fst.GetImpl(), s) {
+ if (!fst.GetImpl()->HasArcs(s))
+ fst.GetImpl()->Expand(s);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+
+template <class A, class B, class S> inline
+void RandGenFst<A, B, S>::InitStateIterator(StateIteratorData<B> *data) const
+{
+ data->base = new StateIterator< RandGenFst<A, B, S> >(*this);
+}
+
+// Options for random path generation.
+template <class S>
+struct RandGenOptions {
+ const S &arc_selector; // How an arc is selected at a state
+ int max_length; // Maximum path length
+ size_t npath; // # of paths to generate
+ bool weighted; // Output is tree weighted by path count; o.w.
+ // output unweighted union of paths.
+ bool remove_total_weight; // Remove total weight when output is weighted.
+
+ RandGenOptions(const S &sel, int len = INT_MAX, size_t n = 1,
+ bool w = false, bool rw = false)
+ : arc_selector(sel),
+ max_length(len),
+ npath(n),
+ weighted(w),
+ remove_total_weight(rw) {}
+};
+
+
+template <class IArc, class OArc>
+class RandGenVisitor {
+ public:
+ typedef typename IArc::Weight Weight;
+ typedef typename IArc::StateId StateId;
+
+ RandGenVisitor(MutableFst<OArc> *ofst) : ofst_(ofst) {}
+
+ void InitVisit(const Fst<IArc> &ifst) {
+ ifst_ = &ifst;
+
+ ofst_->DeleteStates();
+ ofst_->SetInputSymbols(ifst.InputSymbols());
+ ofst_->SetOutputSymbols(ifst.OutputSymbols());
+ if (ifst.Properties(kError, false))
+ ofst_->SetProperties(kError, kError);
+ path_.clear();
+ }
+
+ bool InitState(StateId s, StateId root) { return true; }
+
+ bool TreeArc(StateId s, const IArc &arc) {
+ if (ifst_->Final(arc.nextstate) == Weight::Zero()) {
+ path_.push_back(arc);
+ } else {
+ OutputPath();
+ }
+ return true;
+ }
+
+ bool BackArc(StateId s, const IArc &arc) {
+ FSTERROR() << "RandGenVisitor: cyclic input";
+ ofst_->SetProperties(kError, kError);
+ return false;
+ }
+
+ bool ForwardOrCrossArc(StateId s, const IArc &arc) {
+ OutputPath();
+ return true;
+ }
+
+ void FinishState(StateId s, StateId p, const IArc *) {
+ if (p != kNoStateId && ifst_->Final(s) == Weight::Zero())
+ path_.pop_back();
+ }
+
+ void FinishVisit() {}
+
+ private:
+ void OutputPath() {
+ if (ofst_->Start() == kNoStateId) {
+ StateId start = ofst_->AddState();
+ ofst_->SetStart(start);
+ }
+
+ StateId src = ofst_->Start();
+ for (size_t i = 0; i < path_.size(); ++i) {
+ StateId dest = ofst_->AddState();
+ OArc arc(path_[i].ilabel, path_[i].olabel, Weight::One(), dest);
+ ofst_->AddArc(src, arc);
+ src = dest;
+ }
+ ofst_->SetFinal(src, Weight::One());
+ }
+
+ const Fst<IArc> *ifst_;
+ MutableFst<OArc> *ofst_;
+ vector<OArc> path_;
+
+ DISALLOW_COPY_AND_ASSIGN(RandGenVisitor);
+};
+
+
+// Randomly generate paths through an FST; details controlled by
+// RandGenOptions.
+template<class IArc, class OArc, class Selector>
+void RandGen(const Fst<IArc> &ifst, MutableFst<OArc> *ofst,
+ const RandGenOptions<Selector> &opts) {
+ typedef ArcSampler<IArc, Selector> Sampler;
+ typedef RandGenFst<IArc, OArc, Sampler> RandFst;
+ typedef typename OArc::StateId StateId;
+ typedef typename OArc::Weight Weight;
+
+ Sampler* arc_sampler = new Sampler(ifst, opts.arc_selector, opts.max_length);
+ RandGenFstOptions<Sampler> fopts(CacheOptions(true, 0), arc_sampler,
+ opts.npath, opts.weighted,
+ opts.remove_total_weight);
+ RandFst rfst(ifst, fopts);
+ if (opts.weighted) {
+ *ofst = rfst;
+ } else {
+ RandGenVisitor<IArc, OArc> rand_visitor(ofst);
+ DfsVisit(rfst, &rand_visitor);
+ }
+}
+
+// Randomly generate a path through an FST with the uniform distribution
+// over the transitions.
+template<class IArc, class OArc>
+void RandGen(const Fst<IArc> &ifst, MutableFst<OArc> *ofst) {
+ UniformArcSelector<IArc> uniform_selector;
+ RandGenOptions< UniformArcSelector<IArc> > opts(uniform_selector);
+ RandGen(ifst, ofst, opts);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_RANDGEN_H__
diff --git a/src/include/fst/random-weight.h b/src/include/fst/random-weight.h
new file mode 100644
index 0000000..0ccd95d
--- /dev/null
+++ b/src/include/fst/random-weight.h
@@ -0,0 +1,348 @@
+// random-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Function objects to generate random weights in various semirings
+// for testing purposes.
+
+#ifndef FST_LIB_RANDOM_WEIGHT_H__
+#define FST_LIB_RANDOM_WEIGHT_H__
+
+#include <cstdlib>
+#include <ctime>
+#include <vector>
+using std::vector;
+
+
+#include <fst/float-weight.h>
+#include <fst/product-weight.h>
+#include <fst/string-weight.h>
+#include <fst/lexicographic-weight.h>
+#include <fst/power-weight.h>
+#include <fst/signed-log-weight.h>
+#include <fst/sparse-power-weight.h>
+
+
+namespace fst {
+
+// The boolean 'allow_zero' below determines whether Zero() and zero
+// divisors should be returned in the random weight generation.
+
+// This function object returns TropicalWeightTpl<T>'s that are random integers
+// chosen from [0, kNumRandomWeights).
+template <class T>
+class TropicalWeightGenerator_ {
+ public:
+ typedef TropicalWeightTpl<T> Weight;
+
+ TropicalWeightGenerator_(int seed = time(0), bool allow_zero = true)
+ : allow_zero_(allow_zero) {
+ srand(seed);
+ }
+
+ Weight operator() () const {
+ int n = rand() % (kNumRandomWeights + allow_zero_);
+ if (allow_zero_ && n == kNumRandomWeights)
+ return Weight::Zero();
+
+ return Weight(static_cast<T>(n));
+ }
+
+ private:
+ // The number of alternative random weights.
+ static const int kNumRandomWeights = 5;
+
+ bool allow_zero_; // permit Zero() and zero divisors
+};
+
+template <class T> const int TropicalWeightGenerator_<T>::kNumRandomWeights;
+
+typedef TropicalWeightGenerator_<float> TropicalWeightGenerator;
+
+
+// This function object returns LogWeightTpl<T>'s that are random integers
+// chosen from [0, kNumRandomWeights).
+template <class T>
+class LogWeightGenerator_ {
+ public:
+ typedef LogWeightTpl<T> Weight;
+
+ LogWeightGenerator_(int seed = time(0), bool allow_zero = true)
+ : allow_zero_(allow_zero) {
+ srand(seed);
+ }
+
+ Weight operator() () const {
+ int n = rand() % (kNumRandomWeights + allow_zero_);
+ if (allow_zero_ && n == kNumRandomWeights)
+ return Weight::Zero();
+
+ return Weight(static_cast<T>(n));
+ }
+
+ private:
+ // Number of alternative random weights.
+ static const int kNumRandomWeights = 5;
+
+ bool allow_zero_; // permit Zero() and zero divisors
+};
+
+template <class T> const int LogWeightGenerator_<T>::kNumRandomWeights;
+
+typedef LogWeightGenerator_<float> LogWeightGenerator;
+
+
+// This function object returns MinMaxWeightTpl<T>'s that are random integers
+// chosen from (-kNumRandomWeights, kNumRandomWeights) in addition to
+// One(), and Zero() if zero is allowed.
+template <class T>
+class MinMaxWeightGenerator_ {
+ public:
+ typedef MinMaxWeightTpl<T> Weight;
+
+ MinMaxWeightGenerator_(int seed = time(0), bool allow_zero = true)
+ : allow_zero_(allow_zero) {
+ srand(seed);
+ }
+
+ Weight operator() () const {
+ int n = (rand() % (2*kNumRandomWeights + allow_zero_)) - kNumRandomWeights;
+ if (allow_zero_ && n == kNumRandomWeights)
+ return Weight::Zero();
+ else if (n == -kNumRandomWeights)
+ return Weight::One();
+
+ return Weight(static_cast<T>(n));
+ }
+
+ private:
+ // Parameters controlling the number of alternative random weights.
+ static const int kNumRandomWeights = 5;
+
+ bool allow_zero_; // permit Zero() and zero divisors
+};
+
+template <class T> const int MinMaxWeightGenerator_<T>::kNumRandomWeights;
+
+typedef MinMaxWeightGenerator_<float> MinMaxWeightGenerator;
+
+
+// This function object returns StringWeights that are random integer
+// strings chosen from {1,...,kAlphabetSize}^{0,kMaxStringLength} U { Zero }
+template <typename L, StringType S = STRING_LEFT>
+class StringWeightGenerator {
+ public:
+ typedef StringWeight<L, S> Weight;
+
+ StringWeightGenerator(int seed = time(0), bool allow_zero = true)
+ : allow_zero_(allow_zero) {
+ srand(seed);
+ }
+
+ Weight operator() () const {
+ int n = rand() % (kMaxStringLength + allow_zero_);
+ if (allow_zero_ && n == kMaxStringLength)
+ return Weight::Zero();
+
+ vector<L> v;
+ for (int i = 0; i < n; ++i)
+ v.push_back(rand() % kAlphabetSize + 1);
+ return Weight(v.begin(), v.end());
+ }
+
+ private:
+ // Alphabet size for random weights.
+ static const int kAlphabetSize = 5;
+ // Number of alternative random weights.
+ static const int kMaxStringLength = 5;
+
+ bool allow_zero_; // permit Zero() and zero
+};
+
+template <typename L, StringType S>
+const int StringWeightGenerator<L, S>::kAlphabetSize;
+template <typename L, StringType S>
+const int StringWeightGenerator<L, S>::kMaxStringLength;
+
+
+// This function object returns a weight generator over the product of the
+// weights (by default) for the generators G1 and G2.
+template <class G1, class G2,
+ class W = ProductWeight<typename G1::Weight, typename G2::Weight> >
+class ProductWeightGenerator {
+ public:
+ typedef typename G1::Weight W1;
+ typedef typename G2::Weight W2;
+ typedef W Weight;
+
+ ProductWeightGenerator(int seed = time(0), bool allow_zero = true)
+ : generator1_(seed, allow_zero), generator2_(seed, allow_zero) {}
+
+ Weight operator() () const {
+ W1 w1 = generator1_();
+ W2 w2 = generator2_();
+ return Weight(w1, w2);
+ }
+
+ private:
+ G1 generator1_;
+ G2 generator2_;
+};
+
+
+// This function object returns a weight generator for a lexicographic weight
+// composed out of weights for the generators G1 and G2. For lexicographic
+// weights, we cannot generate zeroes for the two subweights separately:
+// weights are members iff both members are zero or both members are non-zero.
+template <class G1, class G2>
+class LexicographicWeightGenerator {
+ public:
+ typedef typename G1::Weight W1;
+ typedef typename G2::Weight W2;
+ typedef LexicographicWeight<W1, W2> Weight;
+
+ LexicographicWeightGenerator(int seed = time(0), bool allow_zero = true)
+ : generator1_(seed, false), generator2_(seed, false),
+ allow_zero_(allow_zero) {}
+
+ Weight operator() () const {
+ if (allow_zero_) {
+ int n = rand() % (kNumRandomWeights + allow_zero_);
+ if (n == kNumRandomWeights)
+ return Weight(W1::Zero(), W2::Zero());
+ }
+ W1 w1 = generator1_();
+ W2 w2 = generator2_();
+ return Weight(w1, w2);
+ }
+
+ private:
+ G1 generator1_;
+ G2 generator2_;
+ static const int kNumRandomWeights = 5;
+ bool allow_zero_;
+};
+
+template <class G1, class G2>
+const int LexicographicWeightGenerator<G1, G2>::kNumRandomWeights;
+
+
+// Product generator of a string weight generator and an
+// arbitrary weight generator.
+template <class L, class G, StringType S = STRING_LEFT>
+class GallicWeightGenerator
+ : public ProductWeightGenerator<StringWeightGenerator<L, S>, G> {
+
+ public:
+ typedef ProductWeightGenerator<StringWeightGenerator<L, S>, G> PG;
+ typedef typename G::Weight W;
+ typedef GallicWeight<L, W, S> Weight;
+
+ GallicWeightGenerator(int seed = time(0), bool allow_zero = true)
+ : PG(seed, allow_zero) {}
+
+ GallicWeightGenerator(const PG &pg) : PG(pg) {}
+};
+
+// This function object returms a weight generator over the catersian power
+// of rank n of the weights for the generator G.
+template <class G, unsigned int n>
+class PowerWeightGenerator {
+ public:
+ typedef typename G::Weight W;
+ typedef PowerWeight<W, n> Weight;
+
+ PowerWeightGenerator(int seed = time(0), bool allow_zero = true)
+ : generator_(seed, allow_zero) {}
+
+ Weight operator()() const {
+ Weight w;
+ for (size_t i = 0; i < n; ++i) {
+ W r = generator_();
+ w.SetValue(i, r);
+ }
+ return w;
+ }
+
+ private:
+ G generator_;
+};
+
+// This function object returns SignedLogWeightTpl<T>'s that are
+// random integers chosen from [0, kNumRandomWeights).
+// The sign is randomly chosen as well.
+template <class T>
+class SignedLogWeightGenerator_ {
+ public:
+ typedef SignedLogWeightTpl<T> Weight;
+
+ SignedLogWeightGenerator_(int seed = time(0), bool allow_zero = true)
+ : allow_zero_(allow_zero) {
+ srand(seed);
+ }
+
+ Weight operator() () const {
+ int m = rand() % 2;
+ int n = rand() % (kNumRandomWeights + allow_zero_);
+
+ return SignedLogWeightTpl<T>(
+ (m == 0) ?
+ TropicalWeight(-1.0) :
+ TropicalWeight(1.0),
+ (allow_zero_ && n == kNumRandomWeights) ?
+ LogWeightTpl<T>::Zero() :
+ LogWeightTpl<T>(static_cast<T>(n)));
+ }
+
+ private:
+ // Number of alternative random weights.
+ static const int kNumRandomWeights = 5;
+ bool allow_zero_; // permit Zero() and zero divisors
+};
+
+template <class T> const int SignedLogWeightGenerator_<T>::kNumRandomWeights;
+
+typedef SignedLogWeightGenerator_<float> SignedLogWeightGenerator;
+
+// This function object returms a weight generator over the catersian power
+// of rank n of the weights for the generator G.
+template <class G, class K, unsigned int n>
+class SparsePowerWeightGenerator {
+ public:
+ typedef typename G::Weight W;
+ typedef SparsePowerWeight<W, K> Weight;
+
+ SparsePowerWeightGenerator(int seed = time(0), bool allow_zero = true)
+ : generator_(seed, allow_zero) {}
+
+ Weight operator()() const {
+ Weight w;
+ for (size_t i = 1; i <= n; ++i) {
+ W r = generator_();
+ K p = i;
+ w.Push(p, r, true);
+ }
+ return w;
+ }
+
+ private:
+ G generator_;
+};
+
+} // namespace fst
+
+#endif // FST_LIB_RANDOM_WEIGHT_H__
diff --git a/src/include/fst/rational.h b/src/include/fst/rational.h
new file mode 100644
index 0000000..96aa00d
--- /dev/null
+++ b/src/include/fst/rational.h
@@ -0,0 +1,330 @@
+// rational.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// An Fst implementation and base interface for delayed unions,
+// concatenations and closures.
+
+#ifndef FST_LIB_RATIONAL_H__
+#define FST_LIB_RATIONAL_H__
+
+#include <algorithm>
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/mutable-fst.h>
+#include <fst/replace.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+typedef CacheOptions RationalFstOptions;
+
+// This specifies whether to add the empty string.
+enum ClosureType { CLOSURE_STAR = 0, // T* -> add the empty string
+ CLOSURE_PLUS = 1 }; // T+ -> don't add the empty string
+
+template <class A> class RationalFst;
+template <class A> void Union(RationalFst<A> *fst1, const Fst<A> &fst2);
+template <class A> void Concat(RationalFst<A> *fst1, const Fst<A> &fst2);
+template <class A> void Concat(const Fst<A> &fst1, RationalFst<A> *fst2);
+template <class A> void Closure(RationalFst<A> *fst, ClosureType closure_type);
+
+
+// Implementation class for delayed unions, concatenations and closures.
+template<class A>
+class RationalFstImpl : public FstImpl<A> {
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::WriteHeader;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+
+ explicit RationalFstImpl(const RationalFstOptions &opts)
+ : nonterminals_(0),
+ replace_(0),
+ replace_options_(opts, 0) {
+ SetType("rational");
+ fst_tuples_.push_back(pair<Label, const Fst<A>*>(0, 0));
+ }
+
+ RationalFstImpl(const RationalFstImpl<A> &impl)
+ : rfst_(impl.rfst_),
+ nonterminals_(impl.nonterminals_),
+
+ replace_(impl.replace_ ? impl.replace_->Copy(true) : 0),
+ replace_options_(impl.replace_options_) {
+ SetType("rational");
+ fst_tuples_.reserve(impl.fst_tuples_.size());
+ for (size_t i = 0; i < impl.fst_tuples_.size(); ++i)
+ fst_tuples_.push_back(make_pair(impl.fst_tuples_[i].first,
+ impl.fst_tuples_[i].second
+ ? impl.fst_tuples_[i].second->Copy(true)
+ : 0));
+ }
+
+ virtual ~RationalFstImpl() {
+ for (size_t i = 0; i < fst_tuples_.size(); ++i)
+ if (fst_tuples_[i].second)
+ delete fst_tuples_[i].second;
+ if (replace_)
+ delete replace_;
+ }
+
+ StateId Start() { return Replace()->Start(); }
+
+ Weight Final(StateId s) { return Replace()->Final(s); }
+
+ size_t NumArcs(StateId s) { return Replace()->NumArcs(s); }
+
+ size_t NumInputEpsilons(StateId s) {
+ return Replace()->NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ return Replace()->NumOutputEpsilons(s);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) && Replace()->Properties(kError, false))
+ SetProperties(kError, kError);
+ return FstImpl<Arc>::Properties(mask);
+ }
+
+ // Implementation of UnionFst(fst1,fst2)
+ void InitUnion(const Fst<A> &fst1, const Fst<A> &fst2) {
+ if (replace_)
+ delete replace_;
+ uint64 props1 = fst1.Properties(kFstProperties, false);
+ uint64 props2 = fst2.Properties(kFstProperties, false);
+ SetInputSymbols(fst1.InputSymbols());
+ SetOutputSymbols(fst1.OutputSymbols());
+ rfst_.AddState();
+ rfst_.AddState();
+ rfst_.SetStart(0);
+ rfst_.SetFinal(1, Weight::One());
+ rfst_.SetInputSymbols(fst1.InputSymbols());
+ rfst_.SetOutputSymbols(fst1.OutputSymbols());
+ nonterminals_ = 2;
+ rfst_.AddArc(0, A(0, -1, Weight::One(), 1));
+ rfst_.AddArc(0, A(0, -2, Weight::One(), 1));
+ fst_tuples_.push_back(make_pair(-1, fst1.Copy()));
+ fst_tuples_.push_back(make_pair(-2, fst2.Copy()));
+ SetProperties(UnionProperties(props1, props2, true), kCopyProperties);
+ }
+
+ // Implementation of ConcatFst(fst1,fst2)
+ void InitConcat(const Fst<A> &fst1, const Fst<A> &fst2) {
+ if (replace_)
+ delete replace_;
+ uint64 props1 = fst1.Properties(kFstProperties, false);
+ uint64 props2 = fst2.Properties(kFstProperties, false);
+ SetInputSymbols(fst1.InputSymbols());
+ SetOutputSymbols(fst1.OutputSymbols());
+ rfst_.AddState();
+ rfst_.AddState();
+ rfst_.AddState();
+ rfst_.SetStart(0);
+ rfst_.SetFinal(2, Weight::One());
+ rfst_.SetInputSymbols(fst1.InputSymbols());
+ rfst_.SetOutputSymbols(fst1.OutputSymbols());
+ nonterminals_ = 2;
+ rfst_.AddArc(0, A(0, -1, Weight::One(), 1));
+ rfst_.AddArc(1, A(0, -2, Weight::One(), 2));
+ fst_tuples_.push_back(make_pair(-1, fst1.Copy()));
+ fst_tuples_.push_back(make_pair(-2, fst2.Copy()));
+ SetProperties(ConcatProperties(props1, props2, true), kCopyProperties);
+ }
+
+ // Implementation of ClosureFst(fst, closure_type)
+ void InitClosure(const Fst<A> &fst, ClosureType closure_type) {
+ if (replace_)
+ delete replace_;
+ uint64 props = fst.Properties(kFstProperties, false);
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ if (closure_type == CLOSURE_STAR) {
+ rfst_.AddState();
+ rfst_.SetStart(0);
+ rfst_.SetFinal(0, Weight::One());
+ rfst_.AddArc(0, A(0, -1, Weight::One(), 0));
+ } else {
+ rfst_.AddState();
+ rfst_.AddState();
+ rfst_.SetStart(0);
+ rfst_.SetFinal(1, Weight::One());
+ rfst_.AddArc(0, A(0, -1, Weight::One(), 1));
+ rfst_.AddArc(1, A(0, 0, Weight::One(), 0));
+ }
+ rfst_.SetInputSymbols(fst.InputSymbols());
+ rfst_.SetOutputSymbols(fst.OutputSymbols());
+ fst_tuples_.push_back(make_pair(-1, fst.Copy()));
+ nonterminals_ = 1;
+ SetProperties(ClosureProperties(props, closure_type == CLOSURE_STAR, true),
+ kCopyProperties);
+ }
+
+ // Implementation of Union(Fst &, RationalFst *)
+ void AddUnion(const Fst<A> &fst) {
+ if (replace_)
+ delete replace_;
+ uint64 props1 = FstImpl<A>::Properties();
+ uint64 props2 = fst.Properties(kFstProperties, false);
+ VectorFst<A> afst;
+ afst.AddState();
+ afst.AddState();
+ afst.SetStart(0);
+ afst.SetFinal(1, Weight::One());
+ ++nonterminals_;
+ afst.AddArc(0, A(0, -nonterminals_, Weight::One(), 1));
+ Union(&rfst_, afst);
+ fst_tuples_.push_back(make_pair(-nonterminals_, fst.Copy()));
+ SetProperties(UnionProperties(props1, props2, true), kCopyProperties);
+ }
+
+ // Implementation of Concat(Fst &, RationalFst *)
+ void AddConcat(const Fst<A> &fst, bool append) {
+ if (replace_)
+ delete replace_;
+ uint64 props1 = FstImpl<A>::Properties();
+ uint64 props2 = fst.Properties(kFstProperties, false);
+ VectorFst<A> afst;
+ afst.AddState();
+ afst.AddState();
+ afst.SetStart(0);
+ afst.SetFinal(1, Weight::One());
+ ++nonterminals_;
+ afst.AddArc(0, A(0, -nonterminals_, Weight::One(), 1));
+ if (append)
+ Concat(&rfst_, afst);
+ else
+ Concat(afst, &rfst_);
+ fst_tuples_.push_back(make_pair(-nonterminals_, fst.Copy()));
+ SetProperties(ConcatProperties(props1, props2, true), kCopyProperties);
+ }
+
+ // Implementation of Closure(RationalFst *, closure_type)
+ void AddClosure(ClosureType closure_type) {
+ if (replace_)
+ delete replace_;
+ uint64 props = FstImpl<A>::Properties();
+ Closure(&rfst_, closure_type);
+ SetProperties(ClosureProperties(props, closure_type == CLOSURE_STAR, true),
+ kCopyProperties);
+ }
+
+ // Returns the underlying ReplaceFst.
+ ReplaceFst<A> *Replace() const {
+ if (!replace_) {
+ fst_tuples_[0].second = rfst_.Copy();
+ replace_ = new ReplaceFst<A>(fst_tuples_, replace_options_);
+ }
+ return replace_;
+ }
+
+ private:
+ VectorFst<A> rfst_; // rational topology machine; uses neg. nonterminals
+ Label nonterminals_; // # of nonterminals used
+ // Contains the nonterminals and their corresponding FSTs.
+ mutable vector<pair<Label, const Fst<A>*> > fst_tuples_;
+ mutable ReplaceFst<A> *replace_; // Underlying ReplaceFst
+ ReplaceFstOptions<A> replace_options_; // Options for creating 'replace_'
+
+ void operator=(const RationalFstImpl<A> &impl); // disallow
+};
+
+// Parent class for the delayed rational operations - delayed union,
+// concatenation, and closure.
+//
+// This class attaches interface to implementation and handles
+// reference counting, delegating most methods to ImplToFst.
+template <class A>
+class RationalFst : public ImplToFst< RationalFstImpl<A> > {
+ public:
+ friend class StateIterator< RationalFst<A> >;
+ friend class ArcIterator< RationalFst<A> >;
+ friend void Union<>(RationalFst<A> *fst1, const Fst<A> &fst2);
+ friend void Concat<>(RationalFst<A> *fst1, const Fst<A> &fst2);
+ friend void Concat<>(const Fst<A> &fst1, RationalFst<A> *fst2);
+ friend void Closure<>(RationalFst<A> *fst, ClosureType closure_type);
+
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef RationalFstImpl<A> Impl;
+
+ virtual void InitStateIterator(StateIteratorData<A> *data) const {
+ GetImpl()->Replace()->InitStateIterator(data);
+ }
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ GetImpl()->Replace()->InitArcIterator(s, data);
+ }
+
+ protected:
+ RationalFst()
+ : ImplToFst<Impl>(new Impl(RationalFstOptions())) {}
+
+ explicit RationalFst(const RationalFstOptions &opts)
+ : ImplToFst<Impl>(new Impl(opts)) {}
+
+ // See Fst<>::Copy() for doc.
+ RationalFst(const RationalFst<A> &fst , bool safe = false)
+ : ImplToFst<Impl>(fst, safe) {}
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const RationalFst<A> &fst); // disallow
+};
+
+
+// Specialization for RationalFst.
+template <class A>
+class StateIterator< RationalFst<A> >
+ : public StateIterator< ReplaceFst<A> > {
+ public:
+ explicit StateIterator(const RationalFst<A> &fst)
+ : StateIterator< ReplaceFst<A> >(*(fst.GetImpl()->Replace())) {}
+};
+
+
+// Specialization for RationalFst.
+template <class A>
+class ArcIterator< RationalFst<A> >
+ : public CacheArcIterator< ReplaceFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const RationalFst<A> &fst, StateId s)
+ : ArcIterator< ReplaceFst<A> >(*(fst.GetImpl()->Replace()), s) {}
+};
+
+} // namespace fst
+
+#endif // FST_LIB_RATIONAL_H__
diff --git a/src/include/fst/register.h b/src/include/fst/register.h
new file mode 100644
index 0000000..55651cd
--- /dev/null
+++ b/src/include/fst/register.h
@@ -0,0 +1,132 @@
+// register.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley), jpr@google.com (Jake Ratkiewicz)
+//
+// \file
+// Classes for registering derived Fsts for generic reading
+//
+
+#ifndef FST_LIB_REGISTER_H__
+#define FST_LIB_REGISTER_H__
+
+#include <string>
+
+
+#include <fst/compat.h>
+#include <iostream>
+#include <fstream>
+#include <fst/util.h>
+#include <fst/generic-register.h>
+
+
+#include <fst/types.h>
+
+namespace fst {
+
+template <class A> class Fst;
+struct FstReadOptions;
+
+// This class represents a single entry in a FstRegister
+template<class A>
+struct FstRegisterEntry {
+ typedef Fst<A> *(*Reader)(istream &strm, const FstReadOptions &opts);
+ typedef Fst<A> *(*Converter)(const Fst<A> &fst);
+
+ Reader reader;
+ Converter converter;
+ FstRegisterEntry() : reader(0), converter(0) {}
+ FstRegisterEntry(Reader r, Converter c) : reader(r), converter(c) { }
+};
+
+// This class maintains the correspondence between a string describing
+// an FST type, and its reader and converter.
+template<class A>
+class FstRegister : public GenericRegister<string, FstRegisterEntry<A>,
+ FstRegister<A> > {
+ public:
+ typedef typename FstRegisterEntry<A>::Reader Reader;
+ typedef typename FstRegisterEntry<A>::Converter Converter;
+
+ const Reader GetReader(const string &type) const {
+ return this->GetEntry(type).reader;
+ }
+
+ const Converter GetConverter(const string &type) const {
+ return this->GetEntry(type).converter;
+ }
+
+ protected:
+ virtual string ConvertKeyToSoFilename(const string& key) const {
+ string legal_type(key);
+
+ ConvertToLegalCSymbol(&legal_type);
+
+ return legal_type + "-fst.so";
+ }
+};
+
+
+// This class registers an Fst type for generic reading and creating.
+// The Fst type must have a default constructor and a copy constructor
+// from 'Fst<Arc>' for this to work.
+template <class F>
+class FstRegisterer
+ : public GenericRegisterer<FstRegister<typename F::Arc> > {
+ public:
+ typedef typename F::Arc Arc;
+ typedef typename FstRegister<Arc>::Entry Entry;
+ typedef typename FstRegister<Arc>::Reader Reader;
+
+ FstRegisterer() :
+ GenericRegisterer<FstRegister<typename F::Arc> >(
+ F().Type(), BuildEntry()) { }
+
+ private:
+ Entry BuildEntry() {
+ F *(*reader)(istream &strm,
+ const FstReadOptions &opts) = &F::Read;
+
+ return Entry(reinterpret_cast<Reader>(reader),
+ &FstRegisterer<F>::Convert);
+ }
+
+ static Fst<Arc> *Convert(const Fst<Arc> &fst) { return new F(fst); }
+};
+
+
+// Convenience macro to generate static FstRegisterer instance.
+#define REGISTER_FST(F, A) \
+static fst::FstRegisterer< F<A> > F ## _ ## A ## _registerer
+
+
+// Converts an fst to type 'type'.
+template <class A>
+Fst<A> *Convert(const Fst<A> &fst, const string &ftype) {
+ FstRegister<A> *registr = FstRegister<A>::GetRegister();
+ const typename FstRegister<A>::Converter
+ converter = registr->GetConverter(ftype);
+ if (!converter) {
+ string atype = A::Type();
+ LOG(ERROR) << "Fst::Convert: Unknown FST type \"" << ftype
+ << "\" (arc type = \"" << atype << "\")";
+ return 0;
+ }
+ return converter(fst);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_REGISTER_H__
diff --git a/src/include/fst/relabel.h b/src/include/fst/relabel.h
new file mode 100644
index 0000000..fbb8942
--- /dev/null
+++ b/src/include/fst/relabel.h
@@ -0,0 +1,524 @@
+// relabel.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: johans@google.com (Johan Schalkwyk)
+//
+// \file
+// Functions and classes to relabel an Fst (either on input or output)
+//
+#ifndef FST_LIB_RELABEL_H__
+#define FST_LIB_RELABEL_H__
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <string>
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+//
+// Relabels either the input labels or output labels. The old to
+// new labels are specified using a vector of pair<Label,Label>.
+// Any label associations not specified are assumed to be identity
+// mapping.
+//
+// \param fst input fst, must be mutable
+// \param ipairs vector of input label pairs indicating old to new mapping
+// \param opairs vector of output label pairs indicating old to new mapping
+//
+template <class A>
+void Relabel(
+ MutableFst<A> *fst,
+ const vector<pair<typename A::Label, typename A::Label> >& ipairs,
+ const vector<pair<typename A::Label, typename A::Label> >& opairs) {
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+
+ uint64 props = fst->Properties(kFstProperties, false);
+
+ // construct label to label hash.
+ unordered_map<Label, Label> input_map;
+ for (size_t i = 0; i < ipairs.size(); ++i) {
+ input_map[ipairs[i].first] = ipairs[i].second;
+ }
+
+ unordered_map<Label, Label> output_map;
+ for (size_t i = 0; i < opairs.size(); ++i) {
+ output_map[opairs[i].first] = opairs[i].second;
+ }
+
+ for (StateIterator<MutableFst<A> > siter(*fst);
+ !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ for (MutableArcIterator<MutableFst<A> > aiter(fst, s);
+ !aiter.Done(); aiter.Next()) {
+ A arc = aiter.Value();
+
+ // relabel input
+ // only relabel if relabel pair defined
+ typename unordered_map<Label, Label>::iterator it =
+ input_map.find(arc.ilabel);
+ if (it != input_map.end()) {
+ if (it->second == kNoLabel) {
+ FSTERROR() << "Input symbol id " << arc.ilabel
+ << " missing from target vocabulary";
+ fst->SetProperties(kError, kError);
+ return;
+ }
+ arc.ilabel = it->second;
+ }
+
+ // relabel output
+ it = output_map.find(arc.olabel);
+ if (it != output_map.end()) {
+ if (it->second == kNoLabel) {
+ FSTERROR() << "Output symbol id " << arc.olabel
+ << " missing from target vocabulary";
+ fst->SetProperties(kError, kError);
+ return;
+ }
+ arc.olabel = it->second;
+ }
+
+ aiter.SetValue(arc);
+ }
+ }
+
+ fst->SetProperties(RelabelProperties(props), kFstProperties);
+}
+
+//
+// Relabels either the input labels or output labels. The old to
+// new labels mappings are specified using an input Symbol set.
+// Any label associations not specified are assumed to be identity
+// mapping.
+//
+// \param fst input fst, must be mutable
+// \param new_isymbols symbol set indicating new mapping of input symbols
+// \param new_osymbols symbol set indicating new mapping of output symbols
+//
+template<class A>
+void Relabel(MutableFst<A> *fst,
+ const SymbolTable* new_isymbols,
+ const SymbolTable* new_osymbols) {
+ Relabel(fst,
+ fst->InputSymbols(), new_isymbols, true,
+ fst->OutputSymbols(), new_osymbols, true);
+}
+
+template<class A>
+void Relabel(MutableFst<A> *fst,
+ const SymbolTable* old_isymbols,
+ const SymbolTable* new_isymbols,
+ bool attach_new_isymbols,
+ const SymbolTable* old_osymbols,
+ const SymbolTable* new_osymbols,
+ bool attach_new_osymbols) {
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+
+ vector<pair<Label, Label> > ipairs;
+ if (old_isymbols && new_isymbols) {
+ for (SymbolTableIterator syms_iter(*old_isymbols); !syms_iter.Done();
+ syms_iter.Next()) {
+ string isymbol = syms_iter.Symbol();
+ int isymbol_val = syms_iter.Value();
+ int new_isymbol_val = new_isymbols->Find(isymbol);
+ ipairs.push_back(make_pair(isymbol_val, new_isymbol_val));
+ }
+ if (attach_new_isymbols)
+ fst->SetInputSymbols(new_isymbols);
+ }
+
+ vector<pair<Label, Label> > opairs;
+ if (old_osymbols && new_osymbols) {
+ for (SymbolTableIterator syms_iter(*old_osymbols); !syms_iter.Done();
+ syms_iter.Next()) {
+ string osymbol = syms_iter.Symbol();
+ int osymbol_val = syms_iter.Value();
+ int new_osymbol_val = new_osymbols->Find(osymbol);
+ opairs.push_back(make_pair(osymbol_val, new_osymbol_val));
+ }
+ if (attach_new_osymbols)
+ fst->SetOutputSymbols(new_osymbols);
+ }
+
+ // call relabel using vector of relabel pairs.
+ Relabel(fst, ipairs, opairs);
+}
+
+
+typedef CacheOptions RelabelFstOptions;
+
+template <class A> class RelabelFst;
+
+//
+// \class RelabelFstImpl
+// \brief Implementation for delayed relabeling
+//
+// Relabels an FST from one symbol set to another. Relabeling
+// can either be on input or output space. RelabelFst implements
+// a delayed version of the relabel. Arcs are relabeled on the fly
+// and not cached. I.e each request is recomputed.
+//
+template<class A>
+class RelabelFstImpl : public CacheImpl<A> {
+ friend class StateIterator< RelabelFst<A> >;
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::WriteHeader;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+
+ using CacheImpl<A>::PushArc;
+ using CacheImpl<A>::HasArcs;
+ using CacheImpl<A>::HasFinal;
+ using CacheImpl<A>::HasStart;
+ using CacheImpl<A>::SetArcs;
+ using CacheImpl<A>::SetFinal;
+ using CacheImpl<A>::SetStart;
+
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+
+ RelabelFstImpl(const Fst<A>& fst,
+ const vector<pair<Label, Label> >& ipairs,
+ const vector<pair<Label, Label> >& opairs,
+ const RelabelFstOptions &opts)
+ : CacheImpl<A>(opts), fst_(fst.Copy()),
+ relabel_input_(false), relabel_output_(false) {
+ uint64 props = fst.Properties(kCopyProperties, false);
+ SetProperties(RelabelProperties(props));
+ SetType("relabel");
+
+ // create input label map
+ if (ipairs.size() > 0) {
+ for (size_t i = 0; i < ipairs.size(); ++i) {
+ input_map_[ipairs[i].first] = ipairs[i].second;
+ }
+ relabel_input_ = true;
+ }
+
+ // create output label map
+ if (opairs.size() > 0) {
+ for (size_t i = 0; i < opairs.size(); ++i) {
+ output_map_[opairs[i].first] = opairs[i].second;
+ }
+ relabel_output_ = true;
+ }
+ }
+
+ RelabelFstImpl(const Fst<A>& fst,
+ const SymbolTable* old_isymbols,
+ const SymbolTable* new_isymbols,
+ const SymbolTable* old_osymbols,
+ const SymbolTable* new_osymbols,
+ const RelabelFstOptions &opts)
+ : CacheImpl<A>(opts), fst_(fst.Copy()),
+ relabel_input_(false), relabel_output_(false) {
+ SetType("relabel");
+
+ uint64 props = fst.Properties(kCopyProperties, false);
+ SetProperties(RelabelProperties(props));
+ SetInputSymbols(old_isymbols);
+ SetOutputSymbols(old_osymbols);
+
+ if (old_isymbols && new_isymbols &&
+ old_isymbols->LabeledCheckSum() != new_isymbols->LabeledCheckSum()) {
+ for (SymbolTableIterator syms_iter(*old_isymbols); !syms_iter.Done();
+ syms_iter.Next()) {
+ input_map_[syms_iter.Value()] = new_isymbols->Find(syms_iter.Symbol());
+ }
+ SetInputSymbols(new_isymbols);
+ relabel_input_ = true;
+ }
+
+ if (old_osymbols && new_osymbols &&
+ old_osymbols->LabeledCheckSum() != new_osymbols->LabeledCheckSum()) {
+ for (SymbolTableIterator syms_iter(*old_osymbols); !syms_iter.Done();
+ syms_iter.Next()) {
+ output_map_[syms_iter.Value()] =
+ new_osymbols->Find(syms_iter.Symbol());
+ }
+ SetOutputSymbols(new_osymbols);
+ relabel_output_ = true;
+ }
+ }
+
+ RelabelFstImpl(const RelabelFstImpl<A>& impl)
+ : CacheImpl<A>(impl),
+ fst_(impl.fst_->Copy(true)),
+ input_map_(impl.input_map_),
+ output_map_(impl.output_map_),
+ relabel_input_(impl.relabel_input_),
+ relabel_output_(impl.relabel_output_) {
+ SetType("relabel");
+ SetProperties(impl.Properties(), kCopyProperties);
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ ~RelabelFstImpl() { delete fst_; }
+
+ StateId Start() {
+ if (!HasStart()) {
+ StateId s = fst_->Start();
+ SetStart(s);
+ }
+ return CacheImpl<A>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ SetFinal(s, fst_->Final(s));
+ }
+ return CacheImpl<A>::Final(s);
+ }
+
+ size_t NumArcs(StateId s) {
+ if (!HasArcs(s)) {
+ Expand(s);
+ }
+ return CacheImpl<A>::NumArcs(s);
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s)) {
+ Expand(s);
+ }
+ return CacheImpl<A>::NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s)) {
+ Expand(s);
+ }
+ return CacheImpl<A>::NumOutputEpsilons(s);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) && fst_->Properties(kError, false))
+ SetProperties(kError, kError);
+ return FstImpl<Arc>::Properties(mask);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<A>* data) {
+ if (!HasArcs(s)) {
+ Expand(s);
+ }
+ CacheImpl<A>::InitArcIterator(s, data);
+ }
+
+ void Expand(StateId s) {
+ for (ArcIterator<Fst<A> > aiter(*fst_, s); !aiter.Done(); aiter.Next()) {
+ A arc = aiter.Value();
+
+ // relabel input
+ if (relabel_input_) {
+ typename unordered_map<Label, Label>::iterator it =
+ input_map_.find(arc.ilabel);
+ if (it != input_map_.end()) { arc.ilabel = it->second; }
+ }
+
+ // relabel output
+ if (relabel_output_) {
+ typename unordered_map<Label, Label>::iterator it =
+ output_map_.find(arc.olabel);
+ if (it != output_map_.end()) { arc.olabel = it->second; }
+ }
+
+ PushArc(s, arc);
+ }
+ SetArcs(s);
+ }
+
+
+ private:
+ const Fst<A> *fst_;
+
+ unordered_map<Label, Label> input_map_;
+ unordered_map<Label, Label> output_map_;
+ bool relabel_input_;
+ bool relabel_output_;
+
+ void operator=(const RelabelFstImpl<A> &); // disallow
+};
+
+
+//
+// \class RelabelFst
+// \brief Delayed implementation of arc relabeling
+//
+// This class attaches interface to implementation and handles
+// reference counting, delegating most methods to ImplToFst.
+template <class A>
+class RelabelFst : public ImplToFst< RelabelFstImpl<A> > {
+ public:
+ friend class ArcIterator< RelabelFst<A> >;
+ friend class StateIterator< RelabelFst<A> >;
+
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+ typedef RelabelFstImpl<A> Impl;
+
+ RelabelFst(const Fst<A>& fst,
+ const vector<pair<Label, Label> >& ipairs,
+ const vector<pair<Label, Label> >& opairs)
+ : ImplToFst<Impl>(new Impl(fst, ipairs, opairs, RelabelFstOptions())) {}
+
+ RelabelFst(const Fst<A>& fst,
+ const vector<pair<Label, Label> >& ipairs,
+ const vector<pair<Label, Label> >& opairs,
+ const RelabelFstOptions &opts)
+ : ImplToFst<Impl>(new Impl(fst, ipairs, opairs, opts)) {}
+
+ RelabelFst(const Fst<A>& fst,
+ const SymbolTable* new_isymbols,
+ const SymbolTable* new_osymbols)
+ : ImplToFst<Impl>(new Impl(fst, fst.InputSymbols(), new_isymbols,
+ fst.OutputSymbols(), new_osymbols,
+ RelabelFstOptions())) {}
+
+ RelabelFst(const Fst<A>& fst,
+ const SymbolTable* new_isymbols,
+ const SymbolTable* new_osymbols,
+ const RelabelFstOptions &opts)
+ : ImplToFst<Impl>(new Impl(fst, fst.InputSymbols(), new_isymbols,
+ fst.OutputSymbols(), new_osymbols, opts)) {}
+
+ RelabelFst(const Fst<A>& fst,
+ const SymbolTable* old_isymbols,
+ const SymbolTable* new_isymbols,
+ const SymbolTable* old_osymbols,
+ const SymbolTable* new_osymbols)
+ : ImplToFst<Impl>(new Impl(fst, old_isymbols, new_isymbols, old_osymbols,
+ new_osymbols, RelabelFstOptions())) {}
+
+ RelabelFst(const Fst<A>& fst,
+ const SymbolTable* old_isymbols,
+ const SymbolTable* new_isymbols,
+ const SymbolTable* old_osymbols,
+ const SymbolTable* new_osymbols,
+ const RelabelFstOptions &opts)
+ : ImplToFst<Impl>(new Impl(fst, old_isymbols, new_isymbols, old_osymbols,
+ new_osymbols, opts)) {}
+
+ // See Fst<>::Copy() for doc.
+ RelabelFst(const RelabelFst<A> &fst, bool safe = false)
+ : ImplToFst<Impl>(fst, safe) {}
+
+ // Get a copy of this RelabelFst. See Fst<>::Copy() for further doc.
+ virtual RelabelFst<A> *Copy(bool safe = false) const {
+ return new RelabelFst<A>(*this, safe);
+ }
+
+ virtual void InitStateIterator(StateIteratorData<A> *data) const;
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ return GetImpl()->InitArcIterator(s, data);
+ }
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const RelabelFst<A> &fst); // disallow
+};
+
+// Specialization for RelabelFst.
+template<class A>
+class StateIterator< RelabelFst<A> > : public StateIteratorBase<A> {
+ public:
+ typedef typename A::StateId StateId;
+
+ explicit StateIterator(const RelabelFst<A> &fst)
+ : impl_(fst.GetImpl()), siter_(*impl_->fst_), s_(0) {}
+
+ bool Done() const { return siter_.Done(); }
+
+ StateId Value() const { return s_; }
+
+ void Next() {
+ if (!siter_.Done()) {
+ ++s_;
+ siter_.Next();
+ }
+ }
+
+ void Reset() {
+ s_ = 0;
+ siter_.Reset();
+ }
+
+ private:
+ bool Done_() const { return Done(); }
+ StateId Value_() const { return Value(); }
+ void Next_() { Next(); }
+ void Reset_() { Reset(); }
+
+ const RelabelFstImpl<A> *impl_;
+ StateIterator< Fst<A> > siter_;
+ StateId s_;
+
+ DISALLOW_COPY_AND_ASSIGN(StateIterator);
+};
+
+
+// Specialization for RelabelFst.
+template <class A>
+class ArcIterator< RelabelFst<A> >
+ : public CacheArcIterator< RelabelFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const RelabelFst<A> &fst, StateId s)
+ : CacheArcIterator< RelabelFst<A> >(fst.GetImpl(), s) {
+ if (!fst.GetImpl()->HasArcs(s))
+ fst.GetImpl()->Expand(s);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+template <class A> inline
+void RelabelFst<A>::InitStateIterator(StateIteratorData<A> *data) const {
+ data->base = new StateIterator< RelabelFst<A> >(*this);
+}
+
+// Useful alias when using StdArc.
+typedef RelabelFst<StdArc> StdRelabelFst;
+
+} // namespace fst
+
+#endif // FST_LIB_RELABEL_H__
diff --git a/src/include/fst/replace-util.h b/src/include/fst/replace-util.h
new file mode 100644
index 0000000..f4a9c05
--- /dev/null
+++ b/src/include/fst/replace-util.h
@@ -0,0 +1,550 @@
+// replace-util.h
+
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+
+// \file
+// Utility classes for the recursive replacement of Fsts (RTNs).
+
+#ifndef FST_LIB_REPLACE_UTIL_H__
+#define FST_LIB_REPLACE_UTIL_H__
+
+#include <vector>
+using std::vector;
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <unordered_set>
+using std::tr1::unordered_set;
+using std::tr1::unordered_multiset;
+#include <map>
+
+#include <fst/connect.h>
+#include <fst/mutable-fst.h>
+#include <fst/topsort.h>
+
+
+namespace fst {
+
+template <class Arc>
+void Replace(const vector<pair<typename Arc::Label, const Fst<Arc>* > >&,
+ MutableFst<Arc> *, typename Arc::Label, bool);
+
+
+// Utility class for the recursive replacement of Fsts (RTNs). The
+// user provides a set of Label, Fst pairs at construction. These are
+// used by methods for testing cyclic dependencies and connectedness
+// and doing RTN connection and specific Fst replacement by label or
+// for various optimization properties. The modified results can be
+// obtained with the GetFstPairs() or GetMutableFstPairs() methods.
+template <class Arc>
+class ReplaceUtil {
+ public:
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ typedef pair<Label, const Fst<Arc>*> FstPair;
+ typedef pair<Label, MutableFst<Arc>*> MutableFstPair;
+ typedef unordered_map<Label, Label> NonTerminalHash;
+
+ // Constructs from mutable Fsts; Fst ownership given to ReplaceUtil.
+ ReplaceUtil(const vector<MutableFstPair> &fst_pairs,
+ Label root_label, bool epsilon_on_replace = false);
+
+ // Constructs from Fsts; Fst ownership retained by caller.
+ ReplaceUtil(const vector<FstPair> &fst_pairs,
+ Label root_label, bool epsilon_on_replace = false);
+
+ // Constructs from ReplaceFst internals; ownership retained by caller.
+ ReplaceUtil(const vector<const Fst<Arc> *> &fst_array,
+ const NonTerminalHash &nonterminal_hash, Label root_fst,
+ bool epsilon_on_replace = false);
+
+ ~ReplaceUtil() {
+ for (Label i = 0; i < fst_array_.size(); ++i)
+ delete fst_array_[i];
+ }
+
+ // True if the non-terminal dependencies are cyclic. Cyclic
+ // dependencies will result in an unexpandable replace fst.
+ bool CyclicDependencies() const {
+ GetDependencies(false);
+ return depprops_ & kCyclic;
+ }
+
+ // Returns true if no useless Fsts, states or transitions.
+ bool Connected() const {
+ GetDependencies(false);
+ uint64 props = kAccessible | kCoAccessible;
+ for (Label i = 0; i < fst_array_.size(); ++i) {
+ if (!fst_array_[i])
+ continue;
+ if (fst_array_[i]->Properties(props, true) != props || !depaccess_[i])
+ return false;
+ }
+ return true;
+ }
+
+ // Removes useless Fsts, states and transitions.
+ void Connect();
+
+ // Replaces Fsts specified by labels.
+ // Does nothing if there are cyclic dependencies.
+ void ReplaceLabels(const vector<Label> &labels);
+
+ // Replaces Fsts that have at most 'nstates' states, 'narcs' arcs and
+ // 'nnonterm' non-terminals (updating in reverse dependency order).
+ // Does nothing if there are cyclic dependencies.
+ void ReplaceBySize(size_t nstates, size_t narcs, size_t nnonterms);
+
+ // Replaces singleton Fsts.
+ // Does nothing if there are cyclic dependencies.
+ void ReplaceTrivial() { ReplaceBySize(2, 1, 1); }
+
+ // Replaces non-terminals that have at most 'ninstances' instances
+ // (updating in dependency order).
+ // Does nothing if there are cyclic dependencies.
+ void ReplaceByInstances(size_t ninstances);
+
+ // Replaces non-terminals that have only one instance.
+ // Does nothing if there are cyclic dependencies.
+ void ReplaceUnique() { ReplaceByInstances(1); }
+
+ // Returns Label, Fst pairs; Fst ownership retained by ReplaceUtil.
+ void GetFstPairs(vector<FstPair> *fst_pairs);
+
+ // Returns Label, MutableFst pairs; Fst ownership given to caller.
+ void GetMutableFstPairs(vector<MutableFstPair> *mutable_fst_pairs);
+
+ private:
+ // Per Fst statistics
+ struct ReplaceStats {
+ StateId nstates; // # of states
+ StateId nfinal; // # of final states
+ size_t narcs; // # of arcs
+ Label nnonterms; // # of non-terminals in Fst
+ size_t nref; // # of non-terminal instances referring to this Fst
+
+ // # of times that ith Fst references this Fst
+ map<Label, size_t> inref;
+ // # of times that this Fst references the ith Fst
+ map<Label, size_t> outref;
+
+ ReplaceStats()
+ : nstates(0),
+ nfinal(0),
+ narcs(0),
+ nnonterms(0),
+ nref(0) {}
+ };
+
+ // Check Mutable Fsts exist o.w. create them.
+ void CheckMutableFsts();
+
+ // Computes the dependency graph of the replace Fsts.
+ // If 'stats' is true, dependency statistics computed as well.
+ void GetDependencies(bool stats) const;
+
+ void ClearDependencies() const {
+ depfst_.DeleteStates();
+ stats_.clear();
+ depprops_ = 0;
+ have_stats_ = false;
+ }
+
+ // Get topological order of dependencies. Returns false with cyclic input.
+ bool GetTopOrder(const Fst<Arc> &fst, vector<Label> *toporder) const;
+
+ // Update statistics assuming that jth Fst will be replaced.
+ void UpdateStats(Label j);
+
+ Label root_label_; // root non-terminal
+ Label root_fst_; // root Fst ID
+ bool epsilon_on_replace_; // see Replace()
+ vector<const Fst<Arc> *> fst_array_; // Fst per ID
+ vector<MutableFst<Arc> *> mutable_fst_array_; // MutableFst per ID
+ vector<Label> nonterminal_array_; // Fst ID to non-terminal
+ NonTerminalHash nonterminal_hash_; // non-terminal to Fst ID
+ mutable VectorFst<Arc> depfst_; // Fst ID dependencies
+ mutable vector<bool> depaccess_; // Fst ID accessibility
+ mutable uint64 depprops_; // dependency Fst props
+ mutable bool have_stats_; // have dependency statistics
+ mutable vector<ReplaceStats> stats_; // Per Fst statistics
+ DISALLOW_COPY_AND_ASSIGN(ReplaceUtil);
+};
+
+template <class Arc>
+ReplaceUtil<Arc>::ReplaceUtil(
+ const vector<MutableFstPair> &fst_pairs,
+ Label root_label, bool epsilon_on_replace)
+ : root_label_(root_label),
+ epsilon_on_replace_(epsilon_on_replace),
+ depprops_(0),
+ have_stats_(false) {
+ fst_array_.push_back(0);
+ mutable_fst_array_.push_back(0);
+ nonterminal_array_.push_back(kNoLabel);
+ for (Label i = 0; i < fst_pairs.size(); ++i) {
+ Label label = fst_pairs[i].first;
+ MutableFst<Arc> *fst = fst_pairs[i].second;
+ nonterminal_hash_[label] = fst_array_.size();
+ nonterminal_array_.push_back(label);
+ fst_array_.push_back(fst);
+ mutable_fst_array_.push_back(fst);
+ }
+ root_fst_ = nonterminal_hash_[root_label_];
+ if (!root_fst_)
+ FSTERROR() << "ReplaceUtil: no root FST for label: " << root_label_;
+}
+
+template <class Arc>
+ReplaceUtil<Arc>::ReplaceUtil(
+ const vector<FstPair> &fst_pairs,
+ Label root_label, bool epsilon_on_replace)
+ : root_label_(root_label),
+ epsilon_on_replace_(epsilon_on_replace),
+ depprops_(0),
+ have_stats_(false) {
+ fst_array_.push_back(0);
+ nonterminal_array_.push_back(kNoLabel);
+ for (Label i = 0; i < fst_pairs.size(); ++i) {
+ Label label = fst_pairs[i].first;
+ const Fst<Arc> *fst = fst_pairs[i].second;
+ nonterminal_hash_[label] = fst_array_.size();
+ nonterminal_array_.push_back(label);
+ fst_array_.push_back(fst->Copy());
+ }
+ root_fst_ = nonterminal_hash_[root_label];
+ if (!root_fst_)
+ FSTERROR() << "ReplaceUtil: no root FST for label: " << root_label_;
+}
+
+template <class Arc>
+ReplaceUtil<Arc>::ReplaceUtil(
+ const vector<const Fst<Arc> *> &fst_array,
+ const NonTerminalHash &nonterminal_hash, Label root_fst,
+ bool epsilon_on_replace)
+ : root_fst_(root_fst),
+ epsilon_on_replace_(epsilon_on_replace),
+ nonterminal_array_(fst_array.size()),
+ nonterminal_hash_(nonterminal_hash),
+ depprops_(0),
+ have_stats_(false) {
+ fst_array_.push_back(0);
+ for (Label i = 1; i < fst_array.size(); ++i)
+ fst_array_.push_back(fst_array[i]->Copy());
+ for (typename NonTerminalHash::const_iterator it =
+ nonterminal_hash.begin(); it != nonterminal_hash.end(); ++it)
+ nonterminal_array_[it->second] = it->first;
+ root_label_ = nonterminal_array_[root_fst_];
+}
+
+template <class Arc>
+void ReplaceUtil<Arc>::GetDependencies(bool stats) const {
+ if (depfst_.NumStates() > 0) {
+ if (stats && !have_stats_)
+ ClearDependencies();
+ else
+ return;
+ }
+
+ have_stats_ = stats;
+ if (have_stats_)
+ stats_.reserve(fst_array_.size());
+
+ for (Label i = 0; i < fst_array_.size(); ++i) {
+ depfst_.AddState();
+ depfst_.SetFinal(i, Weight::One());
+ if (have_stats_)
+ stats_.push_back(ReplaceStats());
+ }
+ depfst_.SetStart(root_fst_);
+
+ // An arc from each state (representing the fst) to the
+ // state representing the fst being replaced
+ for (Label i = 0; i < fst_array_.size(); ++i) {
+ const Fst<Arc> *ifst = fst_array_[i];
+ if (!ifst)
+ continue;
+ for (StateIterator<Fst<Arc> > siter(*ifst); !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ if (have_stats_) {
+ ++stats_[i].nstates;
+ if (ifst->Final(s) != Weight::Zero())
+ ++stats_[i].nfinal;
+ }
+ for (ArcIterator<Fst<Arc> > aiter(*ifst, s);
+ !aiter.Done(); aiter.Next()) {
+ if (have_stats_)
+ ++stats_[i].narcs;
+ const Arc& arc = aiter.Value();
+
+ typename NonTerminalHash::const_iterator it =
+ nonterminal_hash_.find(arc.olabel);
+ if (it != nonterminal_hash_.end()) {
+ Label j = it->second;
+ depfst_.AddArc(i, Arc(arc.olabel, arc.olabel, Weight::One(), j));
+ if (have_stats_) {
+ ++stats_[i].nnonterms;
+ ++stats_[j].nref;
+ ++stats_[j].inref[i];
+ ++stats_[i].outref[j];
+ }
+ }
+ }
+ }
+ }
+
+ // Gets accessibility info
+ SccVisitor<Arc> scc_visitor(0, &depaccess_, 0, &depprops_);
+ DfsVisit(depfst_, &scc_visitor);
+}
+
+template <class Arc>
+void ReplaceUtil<Arc>::UpdateStats(Label j) {
+ if (!have_stats_) {
+ FSTERROR() << "ReplaceUtil::UpdateStats: stats not available";
+ return;
+ }
+
+ if (j == root_fst_) // can't replace root
+ return;
+
+ typedef typename map<Label, size_t>::iterator Iter;
+ for (Iter in = stats_[j].inref.begin();
+ in != stats_[j].inref.end();
+ ++in) {
+ Label i = in->first;
+ size_t ni = in->second;
+ stats_[i].nstates += stats_[j].nstates * ni;
+ stats_[i].narcs += (stats_[j].narcs + 1) * ni; // narcs - 1 + 2 (eps)
+ stats_[i].nnonterms += (stats_[j].nnonterms - 1) * ni;
+ stats_[i].outref.erase(stats_[i].outref.find(j));
+ for (Iter out = stats_[j].outref.begin();
+ out != stats_[j].outref.end();
+ ++out) {
+ Label k = out->first;
+ size_t nk = out->second;
+ stats_[i].outref[k] += ni * nk;
+ }
+ }
+
+ for (Iter out = stats_[j].outref.begin();
+ out != stats_[j].outref.end();
+ ++out) {
+ Label k = out->first;
+ size_t nk = out->second;
+ stats_[k].nref -= nk;
+ stats_[k].inref.erase(stats_[k].inref.find(j));
+ for (Iter in = stats_[j].inref.begin();
+ in != stats_[j].inref.end();
+ ++in) {
+ Label i = in->first;
+ size_t ni = in->second;
+ stats_[k].inref[i] += ni * nk;
+ stats_[k].nref += ni * nk;
+ }
+ }
+}
+
+template <class Arc>
+void ReplaceUtil<Arc>::CheckMutableFsts() {
+ if (mutable_fst_array_.size() == 0) {
+ for (Label i = 0; i < fst_array_.size(); ++i) {
+ if (!fst_array_[i]) {
+ mutable_fst_array_.push_back(0);
+ } else {
+ mutable_fst_array_.push_back(new VectorFst<Arc>(*fst_array_[i]));
+ delete fst_array_[i];
+ fst_array_[i] = mutable_fst_array_[i];
+ }
+ }
+ }
+}
+
+template <class Arc>
+void ReplaceUtil<Arc>::Connect() {
+ CheckMutableFsts();
+ uint64 props = kAccessible | kCoAccessible;
+ for (Label i = 0; i < mutable_fst_array_.size(); ++i) {
+ if (!mutable_fst_array_[i])
+ continue;
+ if (mutable_fst_array_[i]->Properties(props, false) != props)
+ fst::Connect(mutable_fst_array_[i]);
+ }
+ GetDependencies(false);
+ for (Label i = 0; i < mutable_fst_array_.size(); ++i) {
+ MutableFst<Arc> *fst = mutable_fst_array_[i];
+ if (fst && !depaccess_[i]) {
+ delete fst;
+ fst_array_[i] = 0;
+ mutable_fst_array_[i] = 0;
+ }
+ }
+ ClearDependencies();
+}
+
+template <class Arc>
+bool ReplaceUtil<Arc>::GetTopOrder(const Fst<Arc> &fst,
+ vector<Label> *toporder) const {
+ // Finds topological order of dependencies.
+ vector<StateId> order;
+ bool acyclic = false;
+
+ TopOrderVisitor<Arc> top_order_visitor(&order, &acyclic);
+ DfsVisit(fst, &top_order_visitor);
+ if (!acyclic) {
+ LOG(WARNING) << "ReplaceUtil::GetTopOrder: Cyclical label dependencies";
+ return false;
+ }
+
+ toporder->resize(order.size());
+ for (Label i = 0; i < order.size(); ++i)
+ (*toporder)[order[i]] = i;
+
+ return true;
+}
+
+template <class Arc>
+void ReplaceUtil<Arc>::ReplaceLabels(const vector<Label> &labels) {
+ CheckMutableFsts();
+ unordered_set<Label> label_set;
+ for (Label i = 0; i < labels.size(); ++i)
+ if (labels[i] != root_label_) // can't replace root
+ label_set.insert(labels[i]);
+
+ // Finds Fst dependencies restricted to the labels requested.
+ GetDependencies(false);
+ VectorFst<Arc> pfst(depfst_);
+ for (StateId i = 0; i < pfst.NumStates(); ++i) {
+ vector<Arc> arcs;
+ for (ArcIterator< VectorFst<Arc> > aiter(pfst, i);
+ !aiter.Done(); aiter.Next()) {
+ const Arc &arc = aiter.Value();
+ Label label = nonterminal_array_[arc.nextstate];
+ if (label_set.count(label) > 0)
+ arcs.push_back(arc);
+ }
+ pfst.DeleteArcs(i);
+ for (size_t j = 0; j < arcs.size(); ++j)
+ pfst.AddArc(i, arcs[j]);
+ }
+
+ vector<Label> toporder;
+ if (!GetTopOrder(pfst, &toporder)) {
+ ClearDependencies();
+ return;
+ }
+
+ // Visits Fsts in reverse topological order of dependencies and
+ // performs replacements.
+ for (Label o = toporder.size() - 1; o >= 0; --o) {
+ vector<FstPair> fst_pairs;
+ StateId s = toporder[o];
+ for (ArcIterator< VectorFst<Arc> > aiter(pfst, s);
+ !aiter.Done(); aiter.Next()) {
+ const Arc &arc = aiter.Value();
+ Label label = nonterminal_array_[arc.nextstate];
+ const Fst<Arc> *fst = fst_array_[arc.nextstate];
+ fst_pairs.push_back(make_pair(label, fst));
+ }
+ if (fst_pairs.empty())
+ continue;
+ Label label = nonterminal_array_[s];
+ const Fst<Arc> *fst = fst_array_[s];
+ fst_pairs.push_back(make_pair(label, fst));
+
+ Replace(fst_pairs, mutable_fst_array_[s], label, epsilon_on_replace_);
+ }
+ ClearDependencies();
+}
+
+template <class Arc>
+void ReplaceUtil<Arc>::ReplaceBySize(size_t nstates, size_t narcs,
+ size_t nnonterms) {
+ vector<Label> labels;
+ GetDependencies(true);
+
+ vector<Label> toporder;
+ if (!GetTopOrder(depfst_, &toporder)) {
+ ClearDependencies();
+ return;
+ }
+
+ for (Label o = toporder.size() - 1; o >= 0; --o) {
+ Label j = toporder[o];
+ if (stats_[j].nstates <= nstates &&
+ stats_[j].narcs <= narcs &&
+ stats_[j].nnonterms <= nnonterms) {
+ labels.push_back(nonterminal_array_[j]);
+ UpdateStats(j);
+ }
+ }
+ ReplaceLabels(labels);
+}
+
+template <class Arc>
+void ReplaceUtil<Arc>::ReplaceByInstances(size_t ninstances) {
+ vector<Label> labels;
+ GetDependencies(true);
+
+ vector<Label> toporder;
+ if (!GetTopOrder(depfst_, &toporder)) {
+ ClearDependencies();
+ return;
+ }
+ for (Label o = 0; o < toporder.size(); ++o) {
+ Label j = toporder[o];
+ if (stats_[j].nref <= ninstances) {
+ labels.push_back(nonterminal_array_[j]);
+ UpdateStats(j);
+ }
+ }
+ ReplaceLabels(labels);
+}
+
+template <class Arc>
+void ReplaceUtil<Arc>::GetFstPairs(vector<FstPair> *fst_pairs) {
+ CheckMutableFsts();
+ fst_pairs->clear();
+ for (Label i = 0; i < fst_array_.size(); ++i) {
+ Label label = nonterminal_array_[i];
+ const Fst<Arc> *fst = fst_array_[i];
+ if (!fst)
+ continue;
+ fst_pairs->push_back(make_pair(label, fst));
+ }
+}
+
+template <class Arc>
+void ReplaceUtil<Arc>::GetMutableFstPairs(
+ vector<MutableFstPair> *mutable_fst_pairs) {
+ CheckMutableFsts();
+ mutable_fst_pairs->clear();
+ for (Label i = 0; i < mutable_fst_array_.size(); ++i) {
+ Label label = nonterminal_array_[i];
+ MutableFst<Arc> *fst = mutable_fst_array_[i];
+ if (!fst)
+ continue;
+ mutable_fst_pairs->push_back(make_pair(label, fst->Copy()));
+ }
+}
+
+} // namespace fst
+
+#endif // FST_LIB_REPLACE_UTIL_H__
diff --git a/src/include/fst/replace.h b/src/include/fst/replace.h
new file mode 100644
index 0000000..d08c0ea
--- /dev/null
+++ b/src/include/fst/replace.h
@@ -0,0 +1,1453 @@
+// replace.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: johans@google.com (Johan Schalkwyk)
+//
+// \file
+// Functions and classes for the recursive replacement of Fsts.
+//
+
+#ifndef FST_LIB_REPLACE_H__
+#define FST_LIB_REPLACE_H__
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <set>
+#include <string>
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+#include <fst/expanded-fst.h>
+#include <fst/fst.h>
+#include <fst/matcher.h>
+#include <fst/replace-util.h>
+#include <fst/state-table.h>
+#include <fst/test-properties.h>
+
+namespace fst {
+
+//
+// REPLACE STATE TUPLES AND TABLES
+//
+// The replace state table has the form
+//
+// template <class A, class P>
+// class ReplaceStateTable {
+// public:
+// typedef A Arc;
+// typedef P PrefixId;
+// typedef typename A::StateId StateId;
+// typedef ReplaceStateTuple<StateId, PrefixId> StateTuple;
+// typedef typename A::Label Label;
+//
+// // Required constuctor
+// ReplaceStateTable(const vector<pair<Label, const Fst<A>*> > &fst_tuples,
+// Label root);
+//
+// // Required copy constructor that does not copy state
+// ReplaceStateTable(const ReplaceStateTable<A,P> &table);
+//
+// // Lookup state ID by tuple. If it doesn't exist, then add it.
+// StateId FindState(const StateTuple &tuple);
+//
+// // Lookup state tuple by ID.
+// const StateTuple &Tuple(StateId id) const;
+// };
+
+
+// \struct ReplaceStateTuple
+// \brief Tuple of information that uniquely defines a state in replace
+template <class S, class P>
+struct ReplaceStateTuple {
+ typedef S StateId;
+ typedef P PrefixId;
+
+ ReplaceStateTuple()
+ : prefix_id(-1), fst_id(kNoStateId), fst_state(kNoStateId) {}
+
+ ReplaceStateTuple(PrefixId p, StateId f, StateId s)
+ : prefix_id(p), fst_id(f), fst_state(s) {}
+
+ PrefixId prefix_id; // index in prefix table
+ StateId fst_id; // current fst being walked
+ StateId fst_state; // current state in fst being walked, not to be
+ // confused with the state_id of the combined fst
+};
+
+
+// Equality of replace state tuples.
+template <class S, class P>
+inline bool operator==(const ReplaceStateTuple<S, P>& x,
+ const ReplaceStateTuple<S, P>& y) {
+ return x.prefix_id == y.prefix_id &&
+ x.fst_id == y.fst_id &&
+ x.fst_state == y.fst_state;
+}
+
+
+// \class ReplaceRootSelector
+// Functor returning true for tuples corresponding to states in the root FST
+template <class S, class P>
+class ReplaceRootSelector {
+ public:
+ bool operator()(const ReplaceStateTuple<S, P> &tuple) const {
+ return tuple.prefix_id == 0;
+ }
+};
+
+
+// \class ReplaceFingerprint
+// Fingerprint for general replace state tuples.
+template <class S, class P>
+class ReplaceFingerprint {
+ public:
+ ReplaceFingerprint(const vector<uint64> *size_array)
+ : cumulative_size_array_(size_array) {}
+
+ uint64 operator()(const ReplaceStateTuple<S, P> &tuple) const {
+ return tuple.prefix_id * (cumulative_size_array_->back()) +
+ cumulative_size_array_->at(tuple.fst_id - 1) +
+ tuple.fst_state;
+ }
+
+ private:
+ const vector<uint64> *cumulative_size_array_;
+};
+
+
+// \class ReplaceFstStateFingerprint
+// Useful when the fst_state uniquely define the tuple.
+template <class S, class P>
+class ReplaceFstStateFingerprint {
+ public:
+ uint64 operator()(const ReplaceStateTuple<S, P>& tuple) const {
+ return tuple.fst_state;
+ }
+};
+
+
+// \class ReplaceHash
+// A generic hash function for replace state tuples.
+template <typename S, typename P>
+class ReplaceHash {
+ public:
+ size_t operator()(const ReplaceStateTuple<S, P>& t) const {
+ return t.prefix_id + t.fst_id * kPrime0 + t.fst_state * kPrime1;
+ }
+ private:
+ static const size_t kPrime0;
+ static const size_t kPrime1;
+};
+
+template <typename S, typename P>
+const size_t ReplaceHash<S, P>::kPrime0 = 7853;
+
+template <typename S, typename P>
+const size_t ReplaceHash<S, P>::kPrime1 = 7867;
+
+template <class A, class T> class ReplaceFstMatcher;
+
+
+// \class VectorHashReplaceStateTable
+// A two-level state table for replace.
+// Warning: calls CountStates to compute the number of states of each
+// component Fst.
+template <class A, class P = ssize_t>
+class VectorHashReplaceStateTable {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef P PrefixId;
+ typedef ReplaceStateTuple<StateId, P> StateTuple;
+ typedef VectorHashStateTable<ReplaceStateTuple<StateId, P>,
+ ReplaceRootSelector<StateId, P>,
+ ReplaceFstStateFingerprint<StateId, P>,
+ ReplaceFingerprint<StateId, P> > StateTable;
+
+ VectorHashReplaceStateTable(
+ const vector<pair<Label, const Fst<A>*> > &fst_tuples,
+ Label root) : root_size_(0) {
+ cumulative_size_array_.push_back(0);
+ for (size_t i = 0; i < fst_tuples.size(); ++i) {
+ if (fst_tuples[i].first == root) {
+ root_size_ = CountStates(*(fst_tuples[i].second));
+ cumulative_size_array_.push_back(cumulative_size_array_.back());
+ } else {
+ cumulative_size_array_.push_back(cumulative_size_array_.back() +
+ CountStates(*(fst_tuples[i].second)));
+ }
+ }
+ state_table_ = new StateTable(
+ new ReplaceRootSelector<StateId, P>,
+ new ReplaceFstStateFingerprint<StateId, P>,
+ new ReplaceFingerprint<StateId, P>(&cumulative_size_array_),
+ root_size_,
+ root_size_ + cumulative_size_array_.back());
+ }
+
+ VectorHashReplaceStateTable(const VectorHashReplaceStateTable<A, P> &table)
+ : root_size_(table.root_size_),
+ cumulative_size_array_(table.cumulative_size_array_) {
+ state_table_ = new StateTable(
+ new ReplaceRootSelector<StateId, P>,
+ new ReplaceFstStateFingerprint<StateId, P>,
+ new ReplaceFingerprint<StateId, P>(&cumulative_size_array_),
+ root_size_,
+ root_size_ + cumulative_size_array_.back());
+ }
+
+ ~VectorHashReplaceStateTable() {
+ delete state_table_;
+ }
+
+ StateId FindState(const StateTuple &tuple) {
+ return state_table_->FindState(tuple);
+ }
+
+ const StateTuple &Tuple(StateId id) const {
+ return state_table_->Tuple(id);
+ }
+
+ private:
+ StateId root_size_;
+ vector<uint64> cumulative_size_array_;
+ StateTable *state_table_;
+};
+
+
+// \class DefaultReplaceStateTable
+// Default replace state table
+template <class A, class P = ssize_t>
+class DefaultReplaceStateTable : public CompactHashStateTable<
+ ReplaceStateTuple<typename A::StateId, P>,
+ ReplaceHash<typename A::StateId, P> > {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef P PrefixId;
+ typedef ReplaceStateTuple<StateId, P> StateTuple;
+ typedef CompactHashStateTable<StateTuple,
+ ReplaceHash<StateId, PrefixId> > StateTable;
+
+ using StateTable::FindState;
+ using StateTable::Tuple;
+
+ DefaultReplaceStateTable(
+ const vector<pair<Label, const Fst<A>*> > &fst_tuples,
+ Label root) {}
+
+ DefaultReplaceStateTable(const DefaultReplaceStateTable<A, P> &table)
+ : StateTable() {}
+};
+
+//
+// REPLACE FST CLASS
+//
+
+// By default ReplaceFst will copy the input label of the 'replace arc'.
+// For acceptors we do not want this behaviour. Instead we need to
+// create an epsilon arc when recursing into the appropriate Fst.
+// The 'epsilon_on_replace' option can be used to toggle this behaviour.
+template <class A, class T = DefaultReplaceStateTable<A> >
+struct ReplaceFstOptions : CacheOptions {
+ int64 root; // root rule for expansion
+ bool epsilon_on_replace;
+ bool take_ownership; // take ownership of input Fst(s)
+ T* state_table;
+
+ ReplaceFstOptions(const CacheOptions &opts, int64 r)
+ : CacheOptions(opts),
+ root(r),
+ epsilon_on_replace(false),
+ take_ownership(false),
+ state_table(0) {}
+ explicit ReplaceFstOptions(int64 r)
+ : root(r),
+ epsilon_on_replace(false),
+ take_ownership(false),
+ state_table(0) {}
+ ReplaceFstOptions(int64 r, bool epsilon_replace_arc)
+ : root(r),
+ epsilon_on_replace(epsilon_replace_arc),
+ take_ownership(false),
+ state_table(0) {}
+ ReplaceFstOptions()
+ : root(kNoLabel),
+ epsilon_on_replace(false),
+ take_ownership(false),
+ state_table(0) {}
+};
+
+
+// \class ReplaceFstImpl
+// \brief Implementation class for replace class Fst
+//
+// The replace implementation class supports a dynamic
+// expansion of a recursive transition network represented as Fst
+// with dynamic replacable arcs.
+//
+template <class A, class T>
+class ReplaceFstImpl : public CacheImpl<A> {
+ friend class ReplaceFstMatcher<A, T>;
+
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::WriteHeader;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+ using FstImpl<A>::InputSymbols;
+ using FstImpl<A>::OutputSymbols;
+
+ using CacheImpl<A>::PushArc;
+ using CacheImpl<A>::HasArcs;
+ using CacheImpl<A>::HasFinal;
+ using CacheImpl<A>::HasStart;
+ using CacheImpl<A>::SetArcs;
+ using CacheImpl<A>::SetFinal;
+ using CacheImpl<A>::SetStart;
+
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+ typedef A Arc;
+ typedef unordered_map<Label, Label> NonTerminalHash;
+
+ typedef T StateTable;
+ typedef typename T::PrefixId PrefixId;
+ typedef ReplaceStateTuple<StateId, PrefixId> StateTuple;
+
+ // constructor for replace class implementation.
+ // \param fst_tuples array of label/fst tuples, one for each non-terminal
+ ReplaceFstImpl(const vector< pair<Label, const Fst<A>* > >& fst_tuples,
+ const ReplaceFstOptions<A, T> &opts)
+ : CacheImpl<A>(opts),
+ epsilon_on_replace_(opts.epsilon_on_replace),
+ state_table_(opts.state_table ? opts.state_table :
+ new StateTable(fst_tuples, opts.root)) {
+
+ SetType("replace");
+
+ if (fst_tuples.size() > 0) {
+ SetInputSymbols(fst_tuples[0].second->InputSymbols());
+ SetOutputSymbols(fst_tuples[0].second->OutputSymbols());
+ }
+
+ bool all_negative = true; // all nonterminals are negative?
+ bool dense_range = true; // all nonterminals are positive
+ // and form a dense range containing 1?
+ for (size_t i = 0; i < fst_tuples.size(); ++i) {
+ Label nonterminal = fst_tuples[i].first;
+ if (nonterminal >= 0)
+ all_negative = false;
+ if (nonterminal > fst_tuples.size() || nonterminal <= 0)
+ dense_range = false;
+ }
+
+ vector<uint64> inprops;
+ bool all_ilabel_sorted = true;
+ bool all_olabel_sorted = true;
+ bool all_non_empty = true;
+ fst_array_.push_back(0);
+ for (size_t i = 0; i < fst_tuples.size(); ++i) {
+ Label label = fst_tuples[i].first;
+ const Fst<A> *fst = fst_tuples[i].second;
+ nonterminal_hash_[label] = fst_array_.size();
+ nonterminal_set_.insert(label);
+ fst_array_.push_back(opts.take_ownership ? fst : fst->Copy());
+ if (fst->Start() == kNoStateId)
+ all_non_empty = false;
+ if(!fst->Properties(kILabelSorted, false))
+ all_ilabel_sorted = false;
+ if(!fst->Properties(kOLabelSorted, false))
+ all_olabel_sorted = false;
+ inprops.push_back(fst->Properties(kCopyProperties, false));
+ if (i) {
+ if (!CompatSymbols(InputSymbols(), fst->InputSymbols())) {
+ FSTERROR() << "ReplaceFstImpl: input symbols of Fst " << i
+ << " does not match input symbols of base Fst (0'th fst)";
+ SetProperties(kError, kError);
+ }
+ if (!CompatSymbols(OutputSymbols(), fst->OutputSymbols())) {
+ FSTERROR() << "ReplaceFstImpl: output symbols of Fst " << i
+ << " does not match output symbols of base Fst "
+ << "(0'th fst)";
+ SetProperties(kError, kError);
+ }
+ }
+ }
+ Label nonterminal = nonterminal_hash_[opts.root];
+ if ((nonterminal == 0) && (fst_array_.size() > 1)) {
+ FSTERROR() << "ReplaceFstImpl: no Fst corresponding to root label '"
+ << opts.root << "' in the input tuple vector";
+ SetProperties(kError, kError);
+ }
+ root_ = (nonterminal > 0) ? nonterminal : 1;
+
+ SetProperties(ReplaceProperties(inprops, root_ - 1, epsilon_on_replace_,
+ all_non_empty));
+ // We assume that all terminals are positive. The resulting
+ // ReplaceFst is known to be kILabelSorted when all sub-FSTs are
+ // kILabelSorted and one of the 3 following conditions is satisfied:
+ // 1. 'epsilon_on_replace' is false, or
+ // 2. all non-terminals are negative, or
+ // 3. all non-terninals are positive and form a dense range containing 1.
+ if (all_ilabel_sorted &&
+ (!epsilon_on_replace_ || all_negative || dense_range))
+ SetProperties(kILabelSorted, kILabelSorted);
+ // Similarly, the resulting ReplaceFst is known to be
+ // kOLabelSorted when all sub-FSTs are kOLabelSorted and one of
+ // the 2 following conditions is satisfied:
+ // 1. all non-terminals are negative, or
+ // 2. all non-terninals are positive and form a dense range containing 1.
+ if (all_olabel_sorted && (all_negative || dense_range))
+ SetProperties(kOLabelSorted, kOLabelSorted);
+
+ // Enable optional caching as long as sorted and all non empty.
+ if (Properties(kILabelSorted | kOLabelSorted) && all_non_empty)
+ always_cache_ = false;
+ else
+ always_cache_ = true;
+ VLOG(2) << "ReplaceFstImpl::ReplaceFstImpl: always_cache = "
+ << (always_cache_ ? "true" : "false");
+ }
+
+ ReplaceFstImpl(const ReplaceFstImpl& impl)
+ : CacheImpl<A>(impl),
+ epsilon_on_replace_(impl.epsilon_on_replace_),
+ always_cache_(impl.always_cache_),
+ state_table_(new StateTable(*(impl.state_table_))),
+ nonterminal_set_(impl.nonterminal_set_),
+ nonterminal_hash_(impl.nonterminal_hash_),
+ root_(impl.root_) {
+ SetType("replace");
+ SetProperties(impl.Properties(), kCopyProperties);
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ fst_array_.reserve(impl.fst_array_.size());
+ fst_array_.push_back(0);
+ for (size_t i = 1; i < impl.fst_array_.size(); ++i) {
+ fst_array_.push_back(impl.fst_array_[i]->Copy(true));
+ }
+ }
+
+ ~ReplaceFstImpl() {
+ VLOG(2) << "~ReplaceFstImpl: gc = "
+ << (CacheImpl<A>::GetCacheGc() ? "true" : "false")
+ << ", gc_size = " << CacheImpl<A>::GetCacheSize()
+ << ", gc_limit = " << CacheImpl<A>::GetCacheLimit();
+
+ delete state_table_;
+ for (size_t i = 1; i < fst_array_.size(); ++i) {
+ delete fst_array_[i];
+ }
+ }
+
+ // Computes the dependency graph of the replace class and returns
+ // true if the dependencies are cyclic. Cyclic dependencies will result
+ // in an un-expandable replace fst.
+ bool CyclicDependencies() const {
+ ReplaceUtil<A> replace_util(fst_array_, nonterminal_hash_, root_);
+ return replace_util.CyclicDependencies();
+ }
+
+ // Return or compute start state of replace fst
+ StateId Start() {
+ if (!HasStart()) {
+ if (fst_array_.size() == 1) { // no fsts defined for replace
+ SetStart(kNoStateId);
+ return kNoStateId;
+ } else {
+ const Fst<A>* fst = fst_array_[root_];
+ StateId fst_start = fst->Start();
+ if (fst_start == kNoStateId) // root Fst is empty
+ return kNoStateId;
+
+ PrefixId prefix = GetPrefixId(StackPrefix());
+ StateId start = state_table_->FindState(
+ StateTuple(prefix, root_, fst_start));
+ SetStart(start);
+ return start;
+ }
+ } else {
+ return CacheImpl<A>::Start();
+ }
+ }
+
+ // return final weight of state (kInfWeight means state is not final)
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ const StateTuple& tuple = state_table_->Tuple(s);
+ const StackPrefix& stack = stackprefix_array_[tuple.prefix_id];
+ const Fst<A>* fst = fst_array_[tuple.fst_id];
+ StateId fst_state = tuple.fst_state;
+
+ if (fst->Final(fst_state) != Weight::Zero() && stack.Depth() == 0)
+ SetFinal(s, fst->Final(fst_state));
+ else
+ SetFinal(s, Weight::Zero());
+ }
+ return CacheImpl<A>::Final(s);
+ }
+
+ size_t NumArcs(StateId s) {
+ if (HasArcs(s)) { // If state cached, use the cached value.
+ return CacheImpl<A>::NumArcs(s);
+ } else if (always_cache_) { // If always caching, expand and cache state.
+ Expand(s);
+ return CacheImpl<A>::NumArcs(s);
+ } else { // Otherwise compute the number of arcs without expanding.
+ StateTuple tuple = state_table_->Tuple(s);
+ if (tuple.fst_state == kNoStateId)
+ return 0;
+
+ const Fst<A>* fst = fst_array_[tuple.fst_id];
+ size_t num_arcs = fst->NumArcs(tuple.fst_state);
+ if (ComputeFinalArc(tuple, 0))
+ num_arcs++;
+
+ return num_arcs;
+ }
+ }
+
+ // Returns whether a given label is a non terminal
+ bool IsNonTerminal(Label l) const {
+ // TODO(allauzen): be smarter and take advantage of
+ // all_dense or all_negative.
+ // Use also in ComputeArc, this would require changes to replace
+ // so that recursing into an empty fst lead to a non co-accessible
+ // state instead of deleting the arc as done currently.
+ // Current use correct, since i/olabel sorted iff all_non_empty.
+ typename NonTerminalHash::const_iterator it =
+ nonterminal_hash_.find(l);
+ return it != nonterminal_hash_.end();
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (HasArcs(s)) {
+ // If state cached, use the cached value.
+ return CacheImpl<A>::NumInputEpsilons(s);
+ } else if (always_cache_ || !Properties(kILabelSorted)) {
+ // If always caching or if the number of input epsilons is too expensive
+ // to compute without caching (i.e. not ilabel sorted),
+ // then expand and cache state.
+ Expand(s);
+ return CacheImpl<A>::NumInputEpsilons(s);
+ } else {
+ // Otherwise, compute the number of input epsilons without caching.
+ StateTuple tuple = state_table_->Tuple(s);
+ if (tuple.fst_state == kNoStateId)
+ return 0;
+ const Fst<A>* fst = fst_array_[tuple.fst_id];
+ size_t num = 0;
+ if (!epsilon_on_replace_) {
+ // If epsilon_on_replace is false, all input epsilon arcs
+ // are also input epsilons arcs in the underlying machine.
+ fst->NumInputEpsilons(tuple.fst_state);
+ } else {
+ // Otherwise, one need to consider that all non-terminal arcs
+ // in the underlying machine also become input epsilon arc.
+ ArcIterator<Fst<A> > aiter(*fst, tuple.fst_state);
+ for (; !aiter.Done() &&
+ ((aiter.Value().ilabel == 0) ||
+ IsNonTerminal(aiter.Value().olabel));
+ aiter.Next())
+ ++num;
+ }
+ if (ComputeFinalArc(tuple, 0))
+ num++;
+ return num;
+ }
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (HasArcs(s)) {
+ // If state cached, use the cached value.
+ return CacheImpl<A>::NumOutputEpsilons(s);
+ } else if(always_cache_ || !Properties(kOLabelSorted)) {
+ // If always caching or if the number of output epsilons is too expensive
+ // to compute without caching (i.e. not olabel sorted),
+ // then expand and cache state.
+ Expand(s);
+ return CacheImpl<A>::NumOutputEpsilons(s);
+ } else {
+ // Otherwise, compute the number of output epsilons without caching.
+ StateTuple tuple = state_table_->Tuple(s);
+ if (tuple.fst_state == kNoStateId)
+ return 0;
+ const Fst<A>* fst = fst_array_[tuple.fst_id];
+ size_t num = 0;
+ ArcIterator<Fst<A> > aiter(*fst, tuple.fst_state);
+ for (; !aiter.Done() &&
+ ((aiter.Value().olabel == 0) ||
+ IsNonTerminal(aiter.Value().olabel));
+ aiter.Next())
+ ++num;
+ if (ComputeFinalArc(tuple, 0))
+ num++;
+ return num;
+ }
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if (mask & kError) {
+ for (size_t i = 1; i < fst_array_.size(); ++i) {
+ if (fst_array_[i]->Properties(kError, false))
+ SetProperties(kError, kError);
+ }
+ }
+ return FstImpl<Arc>::Properties(mask);
+ }
+
+ // return the base arc iterator, if arcs have not been computed yet,
+ // extend/recurse for new arcs.
+ void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
+ if (!HasArcs(s))
+ Expand(s);
+ CacheImpl<A>::InitArcIterator(s, data);
+ // TODO(allauzen): Set behaviour of generic iterator
+ // Warning: ArcIterator<ReplaceFst<A> >::InitCache()
+ // relies on current behaviour.
+ }
+
+
+ // Extend current state (walk arcs one level deep)
+ void Expand(StateId s) {
+ StateTuple tuple = state_table_->Tuple(s);
+
+ // If local fst is empty
+ if (tuple.fst_state == kNoStateId) {
+ SetArcs(s);
+ return;
+ }
+
+ ArcIterator< Fst<A> > aiter(
+ *(fst_array_[tuple.fst_id]), tuple.fst_state);
+ Arc arc;
+
+ // Create a final arc when needed
+ if (ComputeFinalArc(tuple, &arc))
+ PushArc(s, arc);
+
+ // Expand all arcs leaving the state
+ for (;!aiter.Done(); aiter.Next()) {
+ if (ComputeArc(tuple, aiter.Value(), &arc))
+ PushArc(s, arc);
+ }
+
+ SetArcs(s);
+ }
+
+ void Expand(StateId s, const StateTuple &tuple,
+ const ArcIteratorData<A> &data) {
+ // If local fst is empty
+ if (tuple.fst_state == kNoStateId) {
+ SetArcs(s);
+ return;
+ }
+
+ ArcIterator< Fst<A> > aiter(data);
+ Arc arc;
+
+ // Create a final arc when needed
+ if (ComputeFinalArc(tuple, &arc))
+ AddArc(s, arc);
+
+ // Expand all arcs leaving the state
+ for (; !aiter.Done(); aiter.Next()) {
+ if (ComputeArc(tuple, aiter.Value(), &arc))
+ AddArc(s, arc);
+ }
+
+ SetArcs(s);
+ }
+
+ // If arcp == 0, only returns if a final arc is required, does not
+ // actually compute it.
+ bool ComputeFinalArc(const StateTuple &tuple, A* arcp,
+ uint32 flags = kArcValueFlags) {
+ const Fst<A>* fst = fst_array_[tuple.fst_id];
+ StateId fst_state = tuple.fst_state;
+ if (fst_state == kNoStateId)
+ return false;
+
+ // if state is final, pop up stack
+ const StackPrefix& stack = stackprefix_array_[tuple.prefix_id];
+ if (fst->Final(fst_state) != Weight::Zero() && stack.Depth()) {
+ if (arcp) {
+ arcp->ilabel = 0;
+ arcp->olabel = 0;
+ if (flags & kArcNextStateValue) {
+ PrefixId prefix_id = PopPrefix(stack);
+ const PrefixTuple& top = stack.Top();
+ arcp->nextstate = state_table_->FindState(
+ StateTuple(prefix_id, top.fst_id, top.nextstate));
+ }
+ if (flags & kArcWeightValue)
+ arcp->weight = fst->Final(fst_state);
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ // Compute the arc in the replace fst corresponding to a given
+ // in the underlying machine. Returns false if the underlying arc
+ // corresponds to no arc in the replace.
+ bool ComputeArc(const StateTuple &tuple, const A &arc, A* arcp,
+ uint32 flags = kArcValueFlags) {
+ if (!epsilon_on_replace_ &&
+ (flags == (flags & (kArcILabelValue | kArcWeightValue)))) {
+ *arcp = arc;
+ return true;
+ }
+
+ if (arc.olabel == 0) { // expand local fst
+ StateId nextstate = flags & kArcNextStateValue
+ ? state_table_->FindState(
+ StateTuple(tuple.prefix_id, tuple.fst_id, arc.nextstate))
+ : kNoStateId;
+ *arcp = A(arc.ilabel, arc.olabel, arc.weight, nextstate);
+ } else {
+ // check for non terminal
+ typename NonTerminalHash::const_iterator it =
+ nonterminal_hash_.find(arc.olabel);
+ if (it != nonterminal_hash_.end()) { // recurse into non terminal
+ Label nonterminal = it->second;
+ const Fst<A>* nt_fst = fst_array_[nonterminal];
+ PrefixId nt_prefix = PushPrefix(stackprefix_array_[tuple.prefix_id],
+ tuple.fst_id, arc.nextstate);
+
+ // if start state is valid replace, else arc is implicitly
+ // deleted
+ StateId nt_start = nt_fst->Start();
+ if (nt_start != kNoStateId) {
+ StateId nt_nextstate = flags & kArcNextStateValue
+ ? state_table_->FindState(
+ StateTuple(nt_prefix, nonterminal, nt_start))
+ : kNoStateId;
+ Label ilabel = (epsilon_on_replace_) ? 0 : arc.ilabel;
+ *arcp = A(ilabel, 0, arc.weight, nt_nextstate);
+ } else {
+ return false;
+ }
+ } else {
+ StateId nextstate = flags & kArcNextStateValue
+ ? state_table_->FindState(
+ StateTuple(tuple.prefix_id, tuple.fst_id, arc.nextstate))
+ : kNoStateId;
+ *arcp = A(arc.ilabel, arc.olabel, arc.weight, nextstate);
+ }
+ }
+ return true;
+ }
+
+ // Returns the arc iterator flags supported by this Fst.
+ uint32 ArcIteratorFlags() const {
+ uint32 flags = kArcValueFlags;
+ if (!always_cache_)
+ flags |= kArcNoCache;
+ return flags;
+ }
+
+ T* GetStateTable() const {
+ return state_table_;
+ }
+
+ const Fst<A>* GetFst(Label fst_id) const {
+ return fst_array_[fst_id];
+ }
+
+ bool EpsilonOnReplace() const { return epsilon_on_replace_; }
+
+ // private helper classes
+ private:
+ static const size_t kPrime0;
+
+ // \class PrefixTuple
+ // \brief Tuple of fst_id and destination state (entry in stack prefix)
+ struct PrefixTuple {
+ PrefixTuple(Label f, StateId s) : fst_id(f), nextstate(s) {}
+
+ Label fst_id;
+ StateId nextstate;
+ };
+
+ // \class StackPrefix
+ // \brief Container for stack prefix.
+ class StackPrefix {
+ public:
+ StackPrefix() {}
+
+ // copy constructor
+ StackPrefix(const StackPrefix& x) :
+ prefix_(x.prefix_) {
+ }
+
+ void Push(StateId fst_id, StateId nextstate) {
+ prefix_.push_back(PrefixTuple(fst_id, nextstate));
+ }
+
+ void Pop() {
+ prefix_.pop_back();
+ }
+
+ const PrefixTuple& Top() const {
+ return prefix_[prefix_.size()-1];
+ }
+
+ size_t Depth() const {
+ return prefix_.size();
+ }
+
+ public:
+ vector<PrefixTuple> prefix_;
+ };
+
+
+ // \class StackPrefixEqual
+ // \brief Compare two stack prefix classes for equality
+ class StackPrefixEqual {
+ public:
+ bool operator()(const StackPrefix& x, const StackPrefix& y) const {
+ if (x.prefix_.size() != y.prefix_.size()) return false;
+ for (size_t i = 0; i < x.prefix_.size(); ++i) {
+ if (x.prefix_[i].fst_id != y.prefix_[i].fst_id ||
+ x.prefix_[i].nextstate != y.prefix_[i].nextstate) return false;
+ }
+ return true;
+ }
+ };
+
+ //
+ // \class StackPrefixKey
+ // \brief Hash function for stack prefix to prefix id
+ class StackPrefixKey {
+ public:
+ size_t operator()(const StackPrefix& x) const {
+ size_t sum = 0;
+ for (size_t i = 0; i < x.prefix_.size(); ++i) {
+ sum += x.prefix_[i].fst_id + x.prefix_[i].nextstate*kPrime0;
+ }
+ return sum;
+ }
+ };
+
+ typedef unordered_map<StackPrefix, PrefixId, StackPrefixKey, StackPrefixEqual>
+ StackPrefixHash;
+
+ // private methods
+ private:
+ // hash stack prefix (return unique index into stackprefix array)
+ PrefixId GetPrefixId(const StackPrefix& prefix) {
+ typename StackPrefixHash::iterator it = prefix_hash_.find(prefix);
+ if (it == prefix_hash_.end()) {
+ PrefixId prefix_id = stackprefix_array_.size();
+ stackprefix_array_.push_back(prefix);
+ prefix_hash_[prefix] = prefix_id;
+ return prefix_id;
+ } else {
+ return it->second;
+ }
+ }
+
+ // prefix id after a stack pop
+ PrefixId PopPrefix(StackPrefix prefix) {
+ prefix.Pop();
+ return GetPrefixId(prefix);
+ }
+
+ // prefix id after a stack push
+ PrefixId PushPrefix(StackPrefix prefix, Label fst_id, StateId nextstate) {
+ prefix.Push(fst_id, nextstate);
+ return GetPrefixId(prefix);
+ }
+
+
+ // private data
+ private:
+ // runtime options
+ bool epsilon_on_replace_;
+ bool always_cache_; // Optionally caching arc iterator disabled when true
+
+ // state table
+ StateTable *state_table_;
+
+ // cross index of unique stack prefix
+ // could potentially have one copy of prefix array
+ StackPrefixHash prefix_hash_;
+ vector<StackPrefix> stackprefix_array_;
+
+ set<Label> nonterminal_set_;
+ NonTerminalHash nonterminal_hash_;
+ vector<const Fst<A>*> fst_array_;
+ Label root_;
+
+ void operator=(const ReplaceFstImpl<A, T> &); // disallow
+};
+
+
+template <class A, class T>
+const size_t ReplaceFstImpl<A, T>::kPrime0 = 7853;
+
+//
+// \class ReplaceFst
+// \brief Recursivively replaces arcs in the root Fst with other Fsts.
+// This version is a delayed Fst.
+//
+// ReplaceFst supports dynamic replacement of arcs in one Fst with
+// another Fst. This replacement is recursive. ReplaceFst can be used
+// to support a variety of delayed constructions such as recursive
+// transition networks, union, or closure. It is constructed with an
+// array of Fst(s). One Fst represents the root (or topology)
+// machine. The root Fst refers to other Fsts by recursively replacing
+// arcs labeled as non-terminals with the matching non-terminal
+// Fst. Currently the ReplaceFst uses the output symbols of the arcs
+// to determine whether the arc is a non-terminal arc or not. A
+// non-terminal can be any label that is not a non-zero terminal label
+// in the output alphabet.
+//
+// Note that the constructor uses a vector of pair<>. These correspond
+// to the tuple of non-terminal Label and corresponding Fst. For example
+// to implement the closure operation we need 2 Fsts. The first root
+// Fst is a single Arc on the start State that self loops, it references
+// the particular machine for which we are performing the closure operation.
+//
+// The ReplaceFst class supports an optionally caching arc iterator:
+// ArcIterator< ReplaceFst<A> >
+// The ReplaceFst need to be built such that it is known to be ilabel
+// or olabel sorted (see usage below).
+//
+// Observe that Matcher<Fst<A> > will use the optionally caching arc
+// iterator when available (Fst is ilabel sorted and matching on the
+// input, or Fst is olabel sorted and matching on the output).
+// In order to obtain the most efficient behaviour, it is recommended
+// to set 'epsilon_on_replace' to false (this means constructing acceptors
+// as transducers with epsilons on the input side of nonterminal arcs)
+// and matching on the input side.
+//
+// This class attaches interface to implementation and handles
+// reference counting, delegating most methods to ImplToFst.
+template <class A, class T = DefaultReplaceStateTable<A> >
+class ReplaceFst : public ImplToFst< ReplaceFstImpl<A, T> > {
+ public:
+ friend class ArcIterator< ReplaceFst<A, T> >;
+ friend class StateIterator< ReplaceFst<A, T> >;
+ friend class ReplaceFstMatcher<A, T>;
+
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+ typedef ReplaceFstImpl<A, T> Impl;
+
+ using ImplToFst<Impl>::Properties;
+
+ ReplaceFst(const vector<pair<Label, const Fst<A>* > >& fst_array,
+ Label root)
+ : ImplToFst<Impl>(new Impl(fst_array, ReplaceFstOptions<A, T>(root))) {}
+
+ ReplaceFst(const vector<pair<Label, const Fst<A>* > >& fst_array,
+ const ReplaceFstOptions<A, T> &opts)
+ : ImplToFst<Impl>(new Impl(fst_array, opts)) {}
+
+ // See Fst<>::Copy() for doc.
+ ReplaceFst(const ReplaceFst<A, T>& fst, bool safe = false)
+ : ImplToFst<Impl>(fst, safe) {}
+
+ // Get a copy of this ReplaceFst. See Fst<>::Copy() for further doc.
+ virtual ReplaceFst<A, T> *Copy(bool safe = false) const {
+ return new ReplaceFst<A, T>(*this, safe);
+ }
+
+ virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ virtual MatcherBase<A> *InitMatcher(MatchType match_type) const {
+ if ((GetImpl()->ArcIteratorFlags() & kArcNoCache) &&
+ ((match_type == MATCH_INPUT && Properties(kILabelSorted, false)) ||
+ (match_type == MATCH_OUTPUT && Properties(kOLabelSorted, false)))) {
+ return new ReplaceFstMatcher<A, T>(*this, match_type);
+ }
+ else {
+ VLOG(2) << "Not using replace matcher";
+ return 0;
+ }
+ }
+
+ bool CyclicDependencies() const {
+ return GetImpl()->CyclicDependencies();
+ }
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const ReplaceFst<A> &fst); // disallow
+};
+
+
+// Specialization for ReplaceFst.
+template<class A, class T>
+class StateIterator< ReplaceFst<A, T> >
+ : public CacheStateIterator< ReplaceFst<A, T> > {
+ public:
+ explicit StateIterator(const ReplaceFst<A, T> &fst)
+ : CacheStateIterator< ReplaceFst<A, T> >(fst, fst.GetImpl()) {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(StateIterator);
+};
+
+
+// Specialization for ReplaceFst.
+// Implements optional caching. It can be used as follows:
+//
+// ReplaceFst<A> replace;
+// ArcIterator< ReplaceFst<A> > aiter(replace, s);
+// // Note: ArcIterator< Fst<A> > is always a caching arc iterator.
+// aiter.SetFlags(kArcNoCache, kArcNoCache);
+// // Use the arc iterator, no arc will be cached, no state will be expanded.
+// // The varied 'kArcValueFlags' can be used to decide which part
+// // of arc values needs to be computed.
+// aiter.SetFlags(kArcILabelValue, kArcValueFlags);
+// // Only want the ilabel for this arc
+// aiter.Value(); // Does not compute the destination state.
+// aiter.Next();
+// aiter.SetFlags(kArcNextStateValue, kArcNextStateValue);
+// // Want both ilabel and nextstate for that arc
+// aiter.Value(); // Does compute the destination state and inserts it
+// // in the replace state table.
+// // No Arc has been cached at that point.
+//
+template <class A, class T>
+class ArcIterator< ReplaceFst<A, T> > {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const ReplaceFst<A, T> &fst, StateId s)
+ : fst_(fst), state_(s), pos_(0), offset_(0), flags_(0), arcs_(0),
+ data_flags_(0), final_flags_(0) {
+ cache_data_.ref_count = 0;
+ local_data_.ref_count = 0;
+
+ // If FST does not support optional caching, force caching.
+ if(!(fst_.GetImpl()->ArcIteratorFlags() & kArcNoCache) &&
+ !(fst_.GetImpl()->HasArcs(state_)))
+ fst_.GetImpl()->Expand(state_);
+
+ // If state is already cached, use cached arcs array.
+ if (fst_.GetImpl()->HasArcs(state_)) {
+ (fst_.GetImpl())->template CacheImpl<A>::InitArcIterator(state_,
+ &cache_data_);
+ num_arcs_ = cache_data_.narcs;
+ arcs_ = cache_data_.arcs; // 'arcs_' is a ptr to the cached arcs.
+ data_flags_ = kArcValueFlags; // All the arc member values are valid.
+ } else { // Otherwise delay decision until Value() is called.
+ tuple_ = fst_.GetImpl()->GetStateTable()->Tuple(state_);
+ if (tuple_.fst_state == kNoStateId) {
+ num_arcs_ = 0;
+ } else {
+ // The decision to cache or not to cache has been defered
+ // until Value() or SetFlags() is called. However, the arc
+ // iterator is set up now to be ready for non-caching in order
+ // to keep the Value() method simple and efficient.
+ const Fst<A>* fst = fst_.GetImpl()->GetFst(tuple_.fst_id);
+ fst->InitArcIterator(tuple_.fst_state, &local_data_);
+ // 'arcs_' is a pointer to the arcs in the underlying machine.
+ arcs_ = local_data_.arcs;
+ // Compute the final arc (but not its destination state)
+ // if a final arc is required.
+ bool has_final_arc = fst_.GetImpl()->ComputeFinalArc(
+ tuple_,
+ &final_arc_,
+ kArcValueFlags & ~kArcNextStateValue);
+ // Set the arc value flags that hold for 'final_arc_'.
+ final_flags_ = kArcValueFlags & ~kArcNextStateValue;
+ // Compute the number of arcs.
+ num_arcs_ = local_data_.narcs;
+ if (has_final_arc)
+ ++num_arcs_;
+ // Set the offset between the underlying arc positions and
+ // the positions in the arc iterator.
+ offset_ = num_arcs_ - local_data_.narcs;
+ // Defers the decision to cache or not until Value() or
+ // SetFlags() is called.
+ data_flags_ = 0;
+ }
+ }
+ }
+
+ ~ArcIterator() {
+ if (cache_data_.ref_count)
+ --(*cache_data_.ref_count);
+ if (local_data_.ref_count)
+ --(*local_data_.ref_count);
+ }
+
+ void ExpandAndCache() const {
+ // TODO(allauzen): revisit this
+ // fst_.GetImpl()->Expand(state_, tuple_, local_data_);
+ // (fst_.GetImpl())->CacheImpl<A>*>::InitArcIterator(state_,
+ // &cache_data_);
+ //
+ fst_.InitArcIterator(state_, &cache_data_); // Expand and cache state.
+ arcs_ = cache_data_.arcs; // 'arcs_' is a pointer to the cached arcs.
+ data_flags_ = kArcValueFlags; // All the arc member values are valid.
+ offset_ = 0; // No offset
+
+ }
+
+ void Init() {
+ if (flags_ & kArcNoCache) { // If caching is disabled
+ // 'arcs_' is a pointer to the arcs in the underlying machine.
+ arcs_ = local_data_.arcs;
+ // Set the arcs value flags that hold for 'arcs_'.
+ data_flags_ = kArcWeightValue;
+ if (!fst_.GetImpl()->EpsilonOnReplace())
+ data_flags_ |= kArcILabelValue;
+ // Set the offset between the underlying arc positions and
+ // the positions in the arc iterator.
+ offset_ = num_arcs_ - local_data_.narcs;
+ } else { // Otherwise, expand and cache
+ ExpandAndCache();
+ }
+ }
+
+ bool Done() const { return pos_ >= num_arcs_; }
+
+ const A& Value() const {
+ // If 'data_flags_' was set to 0, non-caching was not requested
+ if (!data_flags_) {
+ // TODO(allauzen): revisit this.
+ if (flags_ & kArcNoCache) {
+ // Should never happen.
+ FSTERROR() << "ReplaceFst: inconsistent arc iterator flags";
+ }
+ ExpandAndCache(); // Expand and cache.
+ }
+
+ if (pos_ - offset_ >= 0) { // The requested arc is not the 'final' arc.
+ const A& arc = arcs_[pos_ - offset_];
+ if ((data_flags_ & flags_) == (flags_ & kArcValueFlags)) {
+ // If the value flags for 'arc' match the recquired value flags
+ // then return 'arc'.
+ return arc;
+ } else {
+ // Otherwise, compute the corresponding arc on-the-fly.
+ fst_.GetImpl()->ComputeArc(tuple_, arc, &arc_, flags_ & kArcValueFlags);
+ return arc_;
+ }
+ } else { // The requested arc is the 'final' arc.
+ if ((final_flags_ & flags_) != (flags_ & kArcValueFlags)) {
+ // If the arc value flags that hold for the final arc
+ // do not match the requested value flags, then
+ // 'final_arc_' needs to be updated.
+ fst_.GetImpl()->ComputeFinalArc(tuple_, &final_arc_,
+ flags_ & kArcValueFlags);
+ final_flags_ = flags_ & kArcValueFlags;
+ }
+ return final_arc_;
+ }
+ }
+
+ void Next() { ++pos_; }
+
+ size_t Position() const { return pos_; }
+
+ void Reset() { pos_ = 0; }
+
+ void Seek(size_t pos) { pos_ = pos; }
+
+ uint32 Flags() const { return flags_; }
+
+ void SetFlags(uint32 f, uint32 mask) {
+ // Update the flags taking into account what flags are supported
+ // by the Fst.
+ flags_ &= ~mask;
+ flags_ |= (f & fst_.GetImpl()->ArcIteratorFlags());
+ // If non-caching is not requested (and caching has not already
+ // been performed), then flush 'data_flags_' to request caching
+ // during the next call to Value().
+ if (!(flags_ & kArcNoCache) && data_flags_ != kArcValueFlags) {
+ if (!fst_.GetImpl()->HasArcs(state_))
+ data_flags_ = 0;
+ }
+ // If 'data_flags_' has been flushed but non-caching is requested
+ // before calling Value(), then set up the iterator for non-caching.
+ if ((f & kArcNoCache) && (!data_flags_))
+ Init();
+ }
+
+ private:
+ const ReplaceFst<A, T> &fst_; // Reference to the FST
+ StateId state_; // State in the FST
+ mutable typename T::StateTuple tuple_; // Tuple corresponding to state_
+
+ ssize_t pos_; // Current position
+ mutable ssize_t offset_; // Offset between position in iterator and in arcs_
+ ssize_t num_arcs_; // Number of arcs at state_
+ uint32 flags_; // Behavorial flags for the arc iterator
+ mutable Arc arc_; // Memory to temporarily store computed arcs
+
+ mutable ArcIteratorData<Arc> cache_data_; // Arc iterator data in cache
+ mutable ArcIteratorData<Arc> local_data_; // Arc iterator data in local fst
+
+ mutable const A* arcs_; // Array of arcs
+ mutable uint32 data_flags_; // Arc value flags valid for data in arcs_
+ mutable Arc final_arc_; // Final arc (when required)
+ mutable uint32 final_flags_; // Arc value flags valid for final_arc_
+
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+
+template <class A, class T>
+class ReplaceFstMatcher : public MatcherBase<A> {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef MultiEpsMatcher<Matcher<Fst<A> > > LocalMatcher;
+
+ ReplaceFstMatcher(const ReplaceFst<A, T> &fst, fst::MatchType match_type)
+ : fst_(fst),
+ impl_(fst_.GetImpl()),
+ s_(fst::kNoStateId),
+ match_type_(match_type),
+ current_loop_(false),
+ final_arc_(false),
+ loop_(fst::kNoLabel, 0, A::Weight::One(), fst::kNoStateId) {
+ if (match_type_ == fst::MATCH_OUTPUT)
+ swap(loop_.ilabel, loop_.olabel);
+ InitMatchers();
+ }
+
+ ReplaceFstMatcher(const ReplaceFstMatcher<A, T> &matcher, bool safe = false)
+ : fst_(matcher.fst_),
+ impl_(fst_.GetImpl()),
+ s_(fst::kNoStateId),
+ match_type_(matcher.match_type_),
+ current_loop_(false),
+ loop_(fst::kNoLabel, 0, A::Weight::One(), fst::kNoStateId) {
+ if (match_type_ == fst::MATCH_OUTPUT)
+ swap(loop_.ilabel, loop_.olabel);
+ InitMatchers();
+ }
+
+ // Create a local matcher for each component Fst of replace.
+ // LocalMatcher is a multi epsilon wrapper matcher. MultiEpsilonMatcher
+ // is used to match each non-terminal arc, since these non-terminal
+ // turn into epsilons on recursion.
+ void InitMatchers() {
+ const vector<const Fst<A>*>& fst_array = impl_->fst_array_;
+ matcher_.resize(fst_array.size(), 0);
+ for (size_t i = 0; i < fst_array.size(); ++i) {
+ if (fst_array[i]) {
+ matcher_[i] =
+ new LocalMatcher(*fst_array[i], match_type_, kMultiEpsList);
+
+ typename set<Label>::iterator it = impl_->nonterminal_set_.begin();
+ for (; it != impl_->nonterminal_set_.end(); ++it) {
+ matcher_[i]->AddMultiEpsLabel(*it);
+ }
+ }
+ }
+ }
+
+ virtual ReplaceFstMatcher<A, T> *Copy(bool safe = false) const {
+ return new ReplaceFstMatcher<A, T>(*this, safe);
+ }
+
+ virtual ~ReplaceFstMatcher() {
+ for (size_t i = 0; i < matcher_.size(); ++i)
+ delete matcher_[i];
+ }
+
+ virtual MatchType Type(bool test) const {
+ if (match_type_ == MATCH_NONE)
+ return match_type_;
+
+ uint64 true_prop = match_type_ == MATCH_INPUT ?
+ kILabelSorted : kOLabelSorted;
+ uint64 false_prop = match_type_ == MATCH_INPUT ?
+ kNotILabelSorted : kNotOLabelSorted;
+ uint64 props = fst_.Properties(true_prop | false_prop, test);
+
+ if (props & true_prop)
+ return match_type_;
+ else if (props & false_prop)
+ return MATCH_NONE;
+ else
+ return MATCH_UNKNOWN;
+ }
+
+ virtual const Fst<A> &GetFst() const {
+ return fst_;
+ }
+
+ virtual uint64 Properties(uint64 props) const {
+ return props;
+ }
+
+ private:
+ // Set the sate from which our matching happens.
+ virtual void SetState_(StateId s) {
+ if (s_ == s) return;
+
+ s_ = s;
+ tuple_ = impl_->GetStateTable()->Tuple(s_);
+ if (tuple_.fst_state == kNoStateId) {
+ done_ = true;
+ return;
+ }
+ // Get current matcher. Used for non epsilon matching
+ current_matcher_ = matcher_[tuple_.fst_id];
+ current_matcher_->SetState(tuple_.fst_state);
+ loop_.nextstate = s_;
+
+ final_arc_ = false;
+ }
+
+ // Search for label, from previous set state. If label == 0, first
+ // hallucinate and epsilon loop, else use the underlying matcher to
+ // search for the label or epsilons.
+ // - Note since the ReplaceFST recursion on non-terminal arcs causes
+ // epsilon transitions to be created we use the MultiEpsilonMatcher
+ // to search for possible matches of non terminals.
+ // - If the component Fst reaches a final state we also need to add
+ // the exiting final arc.
+ virtual bool Find_(Label label) {
+ bool found = false;
+ label_ = label;
+ if (label_ == 0 || label_ == kNoLabel) {
+ // Compute loop directly, saving Replace::ComputeArc
+ if (label_ == 0) {
+ current_loop_ = true;
+ found = true;
+ }
+ // Search for matching multi epsilons
+ final_arc_ = impl_->ComputeFinalArc(tuple_, 0);
+ found = current_matcher_->Find(kNoLabel) || final_arc_ || found;
+ } else {
+ // Search on sub machine directly using sub machine matcher.
+ found = current_matcher_->Find(label_);
+ }
+ return found;
+ }
+
+ virtual bool Done_() const {
+ return !current_loop_ && !final_arc_ && current_matcher_->Done();
+ }
+
+ virtual const Arc& Value_() const {
+ if (current_loop_) {
+ return loop_;
+ }
+ if (final_arc_) {
+ impl_->ComputeFinalArc(tuple_, &arc_);
+ return arc_;
+ }
+ const Arc& component_arc = current_matcher_->Value();
+ impl_->ComputeArc(tuple_, component_arc, &arc_);
+ return arc_;
+ }
+
+ virtual void Next_() {
+ if (current_loop_) {
+ current_loop_ = false;
+ return;
+ }
+ if (final_arc_) {
+ final_arc_ = false;
+ return;
+ }
+ current_matcher_->Next();
+ }
+
+ const ReplaceFst<A, T>& fst_;
+ ReplaceFstImpl<A, T> *impl_;
+ LocalMatcher* current_matcher_;
+ vector<LocalMatcher*> matcher_;
+
+ StateId s_; // Current state
+ Label label_; // Current label
+
+ MatchType match_type_; // Supplied by caller
+ mutable bool done_;
+ mutable bool current_loop_; // Current arc is the implicit loop
+ mutable bool final_arc_; // Current arc for exiting recursion
+ mutable typename T::StateTuple tuple_; // Tuple corresponding to state_
+ mutable Arc arc_;
+ Arc loop_;
+};
+
+template <class A, class T> inline
+void ReplaceFst<A, T>::InitStateIterator(StateIteratorData<A> *data) const {
+ data->base = new StateIterator< ReplaceFst<A, T> >(*this);
+}
+
+typedef ReplaceFst<StdArc> StdReplaceFst;
+
+
+// // Recursivively replaces arcs in the root Fst with other Fsts.
+// This version writes the result of replacement to an output MutableFst.
+//
+// Replace supports replacement of arcs in one Fst with another
+// Fst. This replacement is recursive. Replace takes an array of
+// Fst(s). One Fst represents the root (or topology) machine. The root
+// Fst refers to other Fsts by recursively replacing arcs labeled as
+// non-terminals with the matching non-terminal Fst. Currently Replace
+// uses the output symbols of the arcs to determine whether the arc is
+// a non-terminal arc or not. A non-terminal can be any label that is
+// not a non-zero terminal label in the output alphabet. Note that
+// input argument is a vector of pair<>. These correspond to the tuple
+// of non-terminal Label and corresponding Fst.
+template<class Arc>
+void Replace(const vector<pair<typename Arc::Label,
+ const Fst<Arc>* > >& ifst_array,
+ MutableFst<Arc> *ofst, typename Arc::Label root,
+ bool epsilon_on_replace) {
+ ReplaceFstOptions<Arc> opts(root, epsilon_on_replace);
+ opts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = ReplaceFst<Arc>(ifst_array, opts);
+}
+
+template<class Arc>
+void Replace(const vector<pair<typename Arc::Label,
+ const Fst<Arc>* > >& ifst_array,
+ MutableFst<Arc> *ofst, typename Arc::Label root) {
+ Replace(ifst_array, ofst, root, false);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_REPLACE_H__
diff --git a/src/include/fst/reverse.h b/src/include/fst/reverse.h
new file mode 100644
index 0000000..4d4c75c
--- /dev/null
+++ b/src/include/fst/reverse.h
@@ -0,0 +1,91 @@
+// reverse.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Functions and classes to sort arcs in an FST.
+
+#ifndef FST_LIB_REVERSE_H__
+#define FST_LIB_REVERSE_H__
+
+#include <algorithm>
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+
+
+namespace fst {
+
+// Reverses an FST. The reversed result is written to an output
+// MutableFst. If A transduces string x to y with weight a, then the
+// reverse of A transduces the reverse of x to the reverse of y with
+// weight a.Reverse().
+//
+// Typically, a = a.Reverse() and Arc = RevArc (e.g. for
+// TropicalWeight or LogWeight). In general, e.g. when the weights
+// only form a left or right semiring, the output arc type must match
+// the input arc type except having the reversed Weight type.
+template<class Arc, class RevArc>
+void Reverse(const Fst<Arc> &ifst, MutableFst<RevArc> *ofst) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef typename RevArc::Weight RevWeight;
+
+ ofst->DeleteStates();
+ ofst->SetInputSymbols(ifst.InputSymbols());
+ ofst->SetOutputSymbols(ifst.OutputSymbols());
+ if (ifst.Properties(kExpanded, false))
+ ofst->ReserveStates(CountStates(ifst) + 1);
+ StateId istart = ifst.Start();
+ StateId ostart = ofst->AddState();
+ ofst->SetStart(ostart);
+
+ for (StateIterator< Fst<Arc> > siter(ifst);
+ !siter.Done();
+ siter.Next()) {
+ StateId is = siter.Value();
+ StateId os = is + 1;
+ while (ofst->NumStates() <= os)
+ ofst->AddState();
+ if (is == istart)
+ ofst->SetFinal(os, RevWeight::One());
+
+ Weight final = ifst.Final(is);
+ if (final != Weight::Zero()) {
+ RevArc oarc(0, 0, final.Reverse(), os);
+ ofst->AddArc(0, oarc);
+ }
+
+ for (ArcIterator< Fst<Arc> > aiter(ifst, is);
+ !aiter.Done();
+ aiter.Next()) {
+ const Arc &iarc = aiter.Value();
+ RevArc oarc(iarc.ilabel, iarc.olabel, iarc.weight.Reverse(), os);
+ StateId nos = iarc.nextstate + 1;
+ while (ofst->NumStates() <= nos)
+ ofst->AddState();
+ ofst->AddArc(nos, oarc);
+ }
+ }
+ uint64 iprops = ifst.Properties(kCopyProperties, false);
+ uint64 oprops = ofst->Properties(kFstProperties, false);
+ ofst->SetProperties(ReverseProperties(iprops) | oprops, kFstProperties);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_REVERSE_H__
diff --git a/src/include/fst/reweight.h b/src/include/fst/reweight.h
new file mode 100644
index 0000000..c051c2a
--- /dev/null
+++ b/src/include/fst/reweight.h
@@ -0,0 +1,146 @@
+// reweight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Function to reweight an FST.
+
+#ifndef FST_LIB_REWEIGHT_H__
+#define FST_LIB_REWEIGHT_H__
+
+#include <vector>
+using std::vector;
+
+#include <fst/mutable-fst.h>
+
+
+namespace fst {
+
+enum ReweightType { REWEIGHT_TO_INITIAL, REWEIGHT_TO_FINAL };
+
+// Reweight FST according to the potentials defined by the POTENTIAL
+// vector in the direction defined by TYPE. Weight needs to be left
+// distributive when reweighting towards the initial state and right
+// distributive when reweighting towards the final states.
+//
+// An arc of weight w, with an origin state of potential p and
+// destination state of potential q, is reweighted by p\wq when
+// reweighting towards the initial state and by pw/q when reweighting
+// towards the final states.
+template <class Arc>
+void Reweight(MutableFst<Arc> *fst,
+ const vector<typename Arc::Weight> &potential,
+ ReweightType type) {
+ typedef typename Arc::Weight Weight;
+
+ if (fst->NumStates() == 0)
+ return;
+
+ if (type == REWEIGHT_TO_FINAL && !(Weight::Properties() & kRightSemiring)) {
+ FSTERROR() << "Reweight: Reweighting to the final states requires "
+ << "Weight to be right distributive: "
+ << Weight::Type();
+ fst->SetProperties(kError, kError);
+ return;
+ }
+
+ if (type == REWEIGHT_TO_INITIAL && !(Weight::Properties() & kLeftSemiring)) {
+ FSTERROR() << "Reweight: Reweighting to the initial state requires "
+ << "Weight to be left distributive: "
+ << Weight::Type();
+ fst->SetProperties(kError, kError);
+ return;
+ }
+
+ StateIterator< MutableFst<Arc> > sit(*fst);
+ for (; !sit.Done(); sit.Next()) {
+ typename Arc::StateId state = sit.Value();
+ if (state == potential.size())
+ break;
+ typename Arc::Weight weight = potential[state];
+ if (weight != Weight::Zero()) {
+ for (MutableArcIterator< MutableFst<Arc> > ait(fst, state);
+ !ait.Done();
+ ait.Next()) {
+ Arc arc = ait.Value();
+ if (arc.nextstate >= potential.size())
+ continue;
+ typename Arc::Weight nextweight = potential[arc.nextstate];
+ if (nextweight == Weight::Zero())
+ continue;
+ if (type == REWEIGHT_TO_INITIAL)
+ arc.weight = Divide(Times(arc.weight, nextweight), weight,
+ DIVIDE_LEFT);
+ if (type == REWEIGHT_TO_FINAL)
+ arc.weight = Divide(Times(weight, arc.weight), nextweight,
+ DIVIDE_RIGHT);
+ ait.SetValue(arc);
+ }
+ if (type == REWEIGHT_TO_INITIAL)
+ fst->SetFinal(state, Divide(fst->Final(state), weight, DIVIDE_LEFT));
+ }
+ if (type == REWEIGHT_TO_FINAL)
+ fst->SetFinal(state, Times(weight, fst->Final(state)));
+ }
+
+ // This handles elements past the end of the potentials array.
+ for (; !sit.Done(); sit.Next()) {
+ typename Arc::StateId state = sit.Value();
+ if (type == REWEIGHT_TO_FINAL)
+ fst->SetFinal(state, Times(Weight::Zero(), fst->Final(state)));
+ }
+
+ typename Arc::Weight startweight = fst->Start() < potential.size() ?
+ potential[fst->Start()] : Weight::Zero();
+ if ((startweight != Weight::One()) && (startweight != Weight::Zero())) {
+ if (fst->Properties(kInitialAcyclic, true) & kInitialAcyclic) {
+ typename Arc::StateId state = fst->Start();
+ for (MutableArcIterator< MutableFst<Arc> > ait(fst, state);
+ !ait.Done();
+ ait.Next()) {
+ Arc arc = ait.Value();
+ if (type == REWEIGHT_TO_INITIAL)
+ arc.weight = Times(startweight, arc.weight);
+ else
+ arc.weight = Times(
+ Divide(Weight::One(), startweight, DIVIDE_RIGHT),
+ arc.weight);
+ ait.SetValue(arc);
+ }
+ if (type == REWEIGHT_TO_INITIAL)
+ fst->SetFinal(state, Times(startweight, fst->Final(state)));
+ else
+ fst->SetFinal(state, Times(Divide(Weight::One(), startweight,
+ DIVIDE_RIGHT),
+ fst->Final(state)));
+ } else {
+ typename Arc::StateId state = fst->AddState();
+ Weight w = type == REWEIGHT_TO_INITIAL ? startweight :
+ Divide(Weight::One(), startweight, DIVIDE_RIGHT);
+ Arc arc(0, 0, w, fst->Start());
+ fst->AddArc(state, arc);
+ fst->SetStart(state);
+ }
+ }
+
+ fst->SetProperties(ReweightProperties(
+ fst->Properties(kFstProperties, false)),
+ kFstProperties);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_REWEIGHT_H_
diff --git a/src/include/fst/rmepsilon.h b/src/include/fst/rmepsilon.h
new file mode 100644
index 0000000..ee9753e
--- /dev/null
+++ b/src/include/fst/rmepsilon.h
@@ -0,0 +1,601 @@
+// rmepsilon.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Functions and classes that implemement epsilon-removal.
+
+#ifndef FST_LIB_RMEPSILON_H__
+#define FST_LIB_RMEPSILON_H__
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <fst/slist.h>
+#include <stack>
+#include <string>
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+#include <fst/arcfilter.h>
+#include <fst/cache.h>
+#include <fst/connect.h>
+#include <fst/factor-weight.h>
+#include <fst/invert.h>
+#include <fst/prune.h>
+#include <fst/queue.h>
+#include <fst/shortest-distance.h>
+#include <fst/topsort.h>
+
+
+namespace fst {
+
+template <class Arc, class Queue>
+class RmEpsilonOptions
+ : public ShortestDistanceOptions<Arc, Queue, EpsilonArcFilter<Arc> > {
+ public:
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ bool connect; // Connect output
+ Weight weight_threshold; // Pruning weight threshold.
+ StateId state_threshold; // Pruning state threshold.
+
+ explicit RmEpsilonOptions(Queue *q, float d = kDelta, bool c = true,
+ Weight w = Weight::Zero(),
+ StateId n = kNoStateId)
+ : ShortestDistanceOptions< Arc, Queue, EpsilonArcFilter<Arc> >(
+ q, EpsilonArcFilter<Arc>(), kNoStateId, d),
+ connect(c), weight_threshold(w), state_threshold(n) {}
+ private:
+ RmEpsilonOptions(); // disallow
+};
+
+// Computation state of the epsilon-removal algorithm.
+template <class Arc, class Queue>
+class RmEpsilonState {
+ public:
+ typedef typename Arc::Label Label;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ RmEpsilonState(const Fst<Arc> &fst,
+ vector<Weight> *distance,
+ const RmEpsilonOptions<Arc, Queue> &opts)
+ : fst_(fst), distance_(distance), sd_state_(fst_, distance, opts, true),
+ expand_id_(0) {}
+
+ // Compute arcs and final weight for state 's'
+ void Expand(StateId s);
+
+ // Returns arcs of expanded state.
+ vector<Arc> &Arcs() { return arcs_; }
+
+ // Returns final weight of expanded state.
+ const Weight &Final() const { return final_; }
+
+ // Return true if an error has occured.
+ bool Error() const { return sd_state_.Error(); }
+
+ private:
+ static const size_t kPrime0 = 7853;
+ static const size_t kPrime1 = 7867;
+
+ struct Element {
+ Label ilabel;
+ Label olabel;
+ StateId nextstate;
+
+ Element() {}
+
+ Element(Label i, Label o, StateId s)
+ : ilabel(i), olabel(o), nextstate(s) {}
+ };
+
+ class ElementKey {
+ public:
+ size_t operator()(const Element& e) const {
+ return static_cast<size_t>(e.nextstate);
+ return static_cast<size_t>(e.nextstate +
+ e.ilabel * kPrime0 +
+ e.olabel * kPrime1);
+ }
+
+ private:
+ };
+
+ class ElementEqual {
+ public:
+ bool operator()(const Element &e1, const Element &e2) const {
+ return (e1.ilabel == e2.ilabel) && (e1.olabel == e2.olabel)
+ && (e1.nextstate == e2.nextstate);
+ }
+ };
+
+ typedef unordered_map<Element, pair<StateId, size_t>,
+ ElementKey, ElementEqual> ElementMap;
+
+ const Fst<Arc> &fst_;
+ // Distance from state being expanded in epsilon-closure.
+ vector<Weight> *distance_;
+ // Shortest distance algorithm computation state.
+ ShortestDistanceState<Arc, Queue, EpsilonArcFilter<Arc> > sd_state_;
+ // Maps an element 'e' to a pair 'p' corresponding to a position
+ // in the arcs vector of the state being expanded. 'e' corresponds
+ // to the position 'p.second' in the 'arcs_' vector if 'p.first' is
+ // equal to the state being expanded.
+ ElementMap element_map_;
+ EpsilonArcFilter<Arc> eps_filter_;
+ stack<StateId> eps_queue_; // Queue used to visit the epsilon-closure
+ vector<bool> visited_; // '[i] = true' if state 'i' has been visited
+ slist<StateId> visited_states_; // List of visited states
+ vector<Arc> arcs_; // Arcs of state being expanded
+ Weight final_; // Final weight of state being expanded
+ StateId expand_id_; // Unique ID for each call to Expand
+
+ DISALLOW_COPY_AND_ASSIGN(RmEpsilonState);
+};
+
+template <class Arc, class Queue>
+const size_t RmEpsilonState<Arc, Queue>::kPrime0;
+template <class Arc, class Queue>
+const size_t RmEpsilonState<Arc, Queue>::kPrime1;
+
+
+template <class Arc, class Queue>
+void RmEpsilonState<Arc,Queue>::Expand(typename Arc::StateId source) {
+ final_ = Weight::Zero();
+ arcs_.clear();
+ sd_state_.ShortestDistance(source);
+ if (sd_state_.Error())
+ return;
+ eps_queue_.push(source);
+
+ while (!eps_queue_.empty()) {
+ StateId state = eps_queue_.top();
+ eps_queue_.pop();
+
+ while (visited_.size() <= state) visited_.push_back(false);
+ if (visited_[state]) continue;
+ visited_[state] = true;
+ visited_states_.push_front(state);
+
+ for (ArcIterator< Fst<Arc> > ait(fst_, state);
+ !ait.Done();
+ ait.Next()) {
+ Arc arc = ait.Value();
+ arc.weight = Times((*distance_)[state], arc.weight);
+
+ if (eps_filter_(arc)) {
+ while (visited_.size() <= arc.nextstate)
+ visited_.push_back(false);
+ if (!visited_[arc.nextstate])
+ eps_queue_.push(arc.nextstate);
+ } else {
+ Element element(arc.ilabel, arc.olabel, arc.nextstate);
+ typename ElementMap::iterator it = element_map_.find(element);
+ if (it == element_map_.end()) {
+ element_map_.insert(
+ pair<Element, pair<StateId, size_t> >
+ (element, pair<StateId, size_t>(expand_id_, arcs_.size())));
+ arcs_.push_back(arc);
+ } else {
+ if (((*it).second).first == expand_id_) {
+ Weight &w = arcs_[((*it).second).second].weight;
+ w = Plus(w, arc.weight);
+ } else {
+ ((*it).second).first = expand_id_;
+ ((*it).second).second = arcs_.size();
+ arcs_.push_back(arc);
+ }
+ }
+ }
+ }
+ final_ = Plus(final_, Times((*distance_)[state], fst_.Final(state)));
+ }
+
+ while (!visited_states_.empty()) {
+ visited_[visited_states_.front()] = false;
+ visited_states_.pop_front();
+ }
+ ++expand_id_;
+}
+
+// Removes epsilon-transitions (when both the input and output label
+// are an epsilon) from a transducer. The result will be an equivalent
+// FST that has no such epsilon transitions. This version modifies
+// its input. It allows fine control via the options argument; see
+// below for a simpler interface.
+//
+// The vector 'distance' will be used to hold the shortest distances
+// during the epsilon-closure computation. The state queue discipline
+// and convergence delta are taken in the options argument.
+template <class Arc, class Queue>
+void RmEpsilon(MutableFst<Arc> *fst,
+ vector<typename Arc::Weight> *distance,
+ const RmEpsilonOptions<Arc, Queue> &opts) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::Label Label;
+
+ if (fst->Start() == kNoStateId) {
+ return;
+ }
+
+ // 'noneps_in[s]' will be set to true iff 's' admits a non-epsilon
+ // incoming transition or is the start state.
+ vector<bool> noneps_in(fst->NumStates(), false);
+ noneps_in[fst->Start()] = true;
+ for (StateId i = 0; i < fst->NumStates(); ++i) {
+ for (ArcIterator<Fst<Arc> > aiter(*fst, i);
+ !aiter.Done();
+ aiter.Next()) {
+ if (aiter.Value().ilabel != 0 || aiter.Value().olabel != 0)
+ noneps_in[aiter.Value().nextstate] = true;
+ }
+ }
+
+ // States sorted in topological order when (acyclic) or generic
+ // topological order (cyclic).
+ vector<StateId> states;
+ states.reserve(fst->NumStates());
+
+ if (fst->Properties(kTopSorted, false) & kTopSorted) {
+ for (StateId i = 0; i < fst->NumStates(); i++)
+ states.push_back(i);
+ } else if (fst->Properties(kAcyclic, false) & kAcyclic) {
+ vector<StateId> order;
+ bool acyclic;
+ TopOrderVisitor<Arc> top_order_visitor(&order, &acyclic);
+ DfsVisit(*fst, &top_order_visitor, EpsilonArcFilter<Arc>());
+ // Sanity check: should be acyclic if property bit is set.
+ if(!acyclic) {
+ FSTERROR() << "RmEpsilon: inconsistent acyclic property bit";
+ fst->SetProperties(kError, kError);
+ return;
+ }
+ states.resize(order.size());
+ for (StateId i = 0; i < order.size(); i++)
+ states[order[i]] = i;
+ } else {
+ uint64 props;
+ vector<StateId> scc;
+ SccVisitor<Arc> scc_visitor(&scc, 0, 0, &props);
+ DfsVisit(*fst, &scc_visitor, EpsilonArcFilter<Arc>());
+ vector<StateId> first(scc.size(), kNoStateId);
+ vector<StateId> next(scc.size(), kNoStateId);
+ for (StateId i = 0; i < scc.size(); i++) {
+ if (first[scc[i]] != kNoStateId)
+ next[i] = first[scc[i]];
+ first[scc[i]] = i;
+ }
+ for (StateId i = 0; i < first.size(); i++)
+ for (StateId j = first[i]; j != kNoStateId; j = next[j])
+ states.push_back(j);
+ }
+
+ RmEpsilonState<Arc, Queue>
+ rmeps_state(*fst, distance, opts);
+
+ while (!states.empty()) {
+ StateId state = states.back();
+ states.pop_back();
+ if (!noneps_in[state])
+ continue;
+ rmeps_state.Expand(state);
+ fst->SetFinal(state, rmeps_state.Final());
+ fst->DeleteArcs(state);
+ vector<Arc> &arcs = rmeps_state.Arcs();
+ fst->ReserveArcs(state, arcs.size());
+ while (!arcs.empty()) {
+ fst->AddArc(state, arcs.back());
+ arcs.pop_back();
+ }
+ }
+
+ for (StateId s = 0; s < fst->NumStates(); ++s) {
+ if (!noneps_in[s])
+ fst->DeleteArcs(s);
+ }
+
+ if(rmeps_state.Error())
+ fst->SetProperties(kError, kError);
+ fst->SetProperties(
+ RmEpsilonProperties(fst->Properties(kFstProperties, false)),
+ kFstProperties);
+
+ if (opts.weight_threshold != Weight::Zero() ||
+ opts.state_threshold != kNoStateId)
+ Prune(fst, opts.weight_threshold, opts.state_threshold);
+ if (opts.connect && (opts.weight_threshold == Weight::Zero() ||
+ opts.state_threshold != kNoStateId))
+ Connect(fst);
+}
+
+// Removes epsilon-transitions (when both the input and output label
+// are an epsilon) from a transducer. The result will be an equivalent
+// FST that has no such epsilon transitions. This version modifies its
+// input. It has a simplified interface; see above for a version that
+// allows finer control.
+//
+// Complexity:
+// - Time:
+// - Unweighted: O(V2 + V E)
+// - Acyclic: O(V2 + V E)
+// - Tropical semiring: O(V2 log V + V E)
+// - General: exponential
+// - Space: O(V E)
+// where V = # of states visited, E = # of arcs.
+//
+// References:
+// - Mehryar Mohri. Generic Epsilon-Removal and Input
+// Epsilon-Normalization Algorithms for Weighted Transducers,
+// "International Journal of Computer Science", 13(1):129-143 (2002).
+template <class Arc>
+void RmEpsilon(MutableFst<Arc> *fst,
+ bool connect = true,
+ typename Arc::Weight weight_threshold = Arc::Weight::Zero(),
+ typename Arc::StateId state_threshold = kNoStateId,
+ float delta = kDelta) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::Label Label;
+
+ vector<Weight> distance;
+ AutoQueue<StateId> state_queue(*fst, &distance, EpsilonArcFilter<Arc>());
+ RmEpsilonOptions<Arc, AutoQueue<StateId> >
+ opts(&state_queue, delta, connect, weight_threshold, state_threshold);
+
+ RmEpsilon(fst, &distance, opts);
+}
+
+
+struct RmEpsilonFstOptions : CacheOptions {
+ float delta;
+
+ RmEpsilonFstOptions(const CacheOptions &opts, float delta = kDelta)
+ : CacheOptions(opts), delta(delta) {}
+
+ explicit RmEpsilonFstOptions(float delta = kDelta) : delta(delta) {}
+};
+
+
+// Implementation of delayed RmEpsilonFst.
+template <class A>
+class RmEpsilonFstImpl : public CacheImpl<A> {
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+
+ using CacheBaseImpl< CacheState<A> >::PushArc;
+ using CacheBaseImpl< CacheState<A> >::HasArcs;
+ using CacheBaseImpl< CacheState<A> >::HasFinal;
+ using CacheBaseImpl< CacheState<A> >::HasStart;
+ using CacheBaseImpl< CacheState<A> >::SetArcs;
+ using CacheBaseImpl< CacheState<A> >::SetFinal;
+ using CacheBaseImpl< CacheState<A> >::SetStart;
+
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+
+ RmEpsilonFstImpl(const Fst<A>& fst, const RmEpsilonFstOptions &opts)
+ : CacheImpl<A>(opts),
+ fst_(fst.Copy()),
+ delta_(opts.delta),
+ rmeps_state_(
+ *fst_,
+ &distance_,
+ RmEpsilonOptions<A, FifoQueue<StateId> >(&queue_, delta_, false)) {
+ SetType("rmepsilon");
+ uint64 props = fst.Properties(kFstProperties, false);
+ SetProperties(RmEpsilonProperties(props, true), kCopyProperties);
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ }
+
+ RmEpsilonFstImpl(const RmEpsilonFstImpl &impl)
+ : CacheImpl<A>(impl),
+ fst_(impl.fst_->Copy(true)),
+ delta_(impl.delta_),
+ rmeps_state_(
+ *fst_,
+ &distance_,
+ RmEpsilonOptions<A, FifoQueue<StateId> >(&queue_, delta_, false)) {
+ SetType("rmepsilon");
+ SetProperties(impl.Properties(), kCopyProperties);
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ ~RmEpsilonFstImpl() {
+ delete fst_;
+ }
+
+ StateId Start() {
+ if (!HasStart()) {
+ SetStart(fst_->Start());
+ }
+ return CacheImpl<A>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ Expand(s);
+ }
+ return CacheImpl<A>::Final(s);
+ }
+
+ size_t NumArcs(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumArcs(s);
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumOutputEpsilons(s);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) &&
+ (fst_->Properties(kError, false) || rmeps_state_.Error()))
+ SetProperties(kError, kError);
+ return FstImpl<A>::Properties(mask);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
+ if (!HasArcs(s))
+ Expand(s);
+ CacheImpl<A>::InitArcIterator(s, data);
+ }
+
+ void Expand(StateId s) {
+ rmeps_state_.Expand(s);
+ SetFinal(s, rmeps_state_.Final());
+ vector<A> &arcs = rmeps_state_.Arcs();
+ while (!arcs.empty()) {
+ PushArc(s, arcs.back());
+ arcs.pop_back();
+ }
+ SetArcs(s);
+ }
+
+ private:
+ const Fst<A> *fst_;
+ float delta_;
+ vector<Weight> distance_;
+ FifoQueue<StateId> queue_;
+ RmEpsilonState<A, FifoQueue<StateId> > rmeps_state_;
+
+ void operator=(const RmEpsilonFstImpl<A> &); // disallow
+};
+
+
+// Removes epsilon-transitions (when both the input and output label
+// are an epsilon) from a transducer. The result will be an equivalent
+// FST that has no such epsilon transitions. This version is a
+// delayed Fst.
+//
+// Complexity:
+// - Time:
+// - Unweighted: O(v^2 + v e)
+// - General: exponential
+// - Space: O(v e)
+// where v = # of states visited, e = # of arcs visited. Constant time
+// to visit an input state or arc is assumed and exclusive of caching.
+//
+// References:
+// - Mehryar Mohri. Generic Epsilon-Removal and Input
+// Epsilon-Normalization Algorithms for Weighted Transducers,
+// "International Journal of Computer Science", 13(1):129-143 (2002).
+//
+// This class attaches interface to implementation and handles
+// reference counting, delegating most methods to ImplToFst.
+template <class A>
+class RmEpsilonFst : public ImplToFst< RmEpsilonFstImpl<A> > {
+ public:
+ friend class ArcIterator< RmEpsilonFst<A> >;
+ friend class StateIterator< RmEpsilonFst<A> >;
+
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+ typedef RmEpsilonFstImpl<A> Impl;
+
+ RmEpsilonFst(const Fst<A> &fst)
+ : ImplToFst<Impl>(new Impl(fst, RmEpsilonFstOptions())) {}
+
+ RmEpsilonFst(const Fst<A> &fst, const RmEpsilonFstOptions &opts)
+ : ImplToFst<Impl>(new Impl(fst, opts)) {}
+
+ // See Fst<>::Copy() for doc.
+ RmEpsilonFst(const RmEpsilonFst<A> &fst, bool safe = false)
+ : ImplToFst<Impl>(fst, safe) {}
+
+ // Get a copy of this RmEpsilonFst. See Fst<>::Copy() for further doc.
+ virtual RmEpsilonFst<A> *Copy(bool safe = false) const {
+ return new RmEpsilonFst<A>(*this, safe);
+ }
+
+ virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const RmEpsilonFst<A> &fst); // disallow
+};
+
+// Specialization for RmEpsilonFst.
+template<class A>
+class StateIterator< RmEpsilonFst<A> >
+ : public CacheStateIterator< RmEpsilonFst<A> > {
+ public:
+ explicit StateIterator(const RmEpsilonFst<A> &fst)
+ : CacheStateIterator< RmEpsilonFst<A> >(fst, fst.GetImpl()) {}
+};
+
+
+// Specialization for RmEpsilonFst.
+template <class A>
+class ArcIterator< RmEpsilonFst<A> >
+ : public CacheArcIterator< RmEpsilonFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const RmEpsilonFst<A> &fst, StateId s)
+ : CacheArcIterator< RmEpsilonFst<A> >(fst.GetImpl(), s) {
+ if (!fst.GetImpl()->HasArcs(s))
+ fst.GetImpl()->Expand(s);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+
+template <class A> inline
+void RmEpsilonFst<A>::InitStateIterator(StateIteratorData<A> *data) const {
+ data->base = new StateIterator< RmEpsilonFst<A> >(*this);
+}
+
+
+// Useful alias when using StdArc.
+typedef RmEpsilonFst<StdArc> StdRmEpsilonFst;
+
+} // namespace fst
+
+#endif // FST_LIB_RMEPSILON_H__
diff --git a/src/include/fst/rmfinalepsilon.h b/src/include/fst/rmfinalepsilon.h
new file mode 100644
index 0000000..236d1a7
--- /dev/null
+++ b/src/include/fst/rmfinalepsilon.h
@@ -0,0 +1,107 @@
+// rmfinalepsilon.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: johans@google.com (Johan Schalkwyk)
+//
+// \file
+// Function to remove of final states that have epsilon only input arcs.
+
+#ifndef FST_LIB_RMFINALEPSILON_H__
+#define FST_LIB_RMFINALEPSILON_H__
+
+#include <unordered_set>
+using std::tr1::unordered_set;
+using std::tr1::unordered_multiset;
+#include <vector>
+using std::vector;
+
+#include <fst/connect.h>
+#include <fst/mutable-fst.h>
+
+
+namespace fst {
+
+template<class A>
+void RmFinalEpsilon(MutableFst<A>* fst) {
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ // Determine the coaccesibility of states.
+ vector<bool> access;
+ vector<bool> coaccess;
+ uint64 props = 0;
+ SccVisitor<A> scc_visitor(0, &access, &coaccess, &props);
+ DfsVisit(*fst, &scc_visitor);
+
+ // Find potential list of removable final states. These are final states
+ // that have no outgoing transitions or final states that have a
+ // non-coaccessible future. Complexity O(S)
+ unordered_set<StateId> finals;
+ for (StateIterator<Fst<A> > siter(*fst); !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ if (fst->Final(s) != Weight::Zero()) {
+ bool future_coaccess = false;
+ for (ArcIterator<Fst<A> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
+ const A& arc = aiter.Value();
+ if (coaccess[arc.nextstate]) {
+ future_coaccess = true;
+ break;
+ }
+ }
+ if (!future_coaccess) {
+ finals.insert(s);
+ }
+ }
+ }
+
+ // Move the final weight. Complexity O(E)
+ vector<A> arcs;
+ for (StateIterator<Fst<A> > siter(*fst); !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ Weight w(fst->Final(s));
+
+ arcs.clear();
+ for (ArcIterator<Fst<A> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
+ const A& arc = aiter.Value();
+ // is next state in the list of finals
+ if (finals.find(arc.nextstate) != finals.end()) {
+ // sum up all epsilon arcs
+ if (arc.ilabel == 0 && arc.olabel == 0) {
+ w = Plus(Times(fst->Final(arc.nextstate), arc.weight), w);
+ } else {
+ arcs.push_back(arc);
+ }
+ } else {
+ arcs.push_back(arc);
+ }
+ }
+
+ // If some arcs (epsilon arcs) were deleted, delete all
+ // arcs and add back only the non epsilon arcs
+ if (arcs.size() < fst->NumArcs(s)) {
+ fst->DeleteArcs(s);
+ fst->SetFinal(s, w);
+ for (size_t i = 0; i < arcs.size(); ++i) {
+ fst->AddArc(s, arcs[i]);
+ }
+ }
+ }
+
+ Connect(fst);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_RMFINALEPSILON_H__
diff --git a/src/include/fst/script/arcsort.h b/src/include/fst/script/arcsort.h
new file mode 100644
index 0000000..4277332
--- /dev/null
+++ b/src/include/fst/script/arcsort.h
@@ -0,0 +1,49 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_ARCSORT_H_
+#define FST_SCRIPT_ARCSORT_H_
+
+#include <fst/arcsort.h>
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+
+namespace fst {
+namespace script {
+
+enum ArcSortType { ILABEL_COMPARE, OLABEL_COMPARE };
+
+typedef args::Package<MutableFstClass*, const ArcSortType> ArcSortArgs;
+
+template<class Arc>
+void ArcSort(ArcSortArgs *args) {
+ MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>();
+
+ if (args->arg2 == ILABEL_COMPARE) {
+ ILabelCompare<Arc> icomp;
+ ArcSort(fst, icomp);
+ } else { // OLABEL_COMPARE
+ OLabelCompare<Arc> ocomp;
+ ArcSort(fst, ocomp);
+ }
+}
+
+void ArcSort(MutableFstClass *ofst, ArcSortType sort_type);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_ARCSORT_H_
diff --git a/src/include/fst/script/arg-packs.h b/src/include/fst/script/arg-packs.h
new file mode 100644
index 0000000..8ebf8d8
--- /dev/null
+++ b/src/include/fst/script/arg-packs.h
@@ -0,0 +1,240 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+// Convenience templates for defining arg packs for the FstClass operations.
+
+// See operation-templates.h for a discussion about why these are needed; the
+// short story is that all FstClass operations must be implemented by a version
+// that takes one argument, most likely a struct bundling all the
+// logical arguments together. These template structs provide convenient ways
+// to specify these bundles (e.g. by means of appropriate typedefs).
+
+// The ArgPack template is sufficient for bundling together all the args for
+// a particular function. The function is assumed to be void-returning. If
+// you want a space for a return value, use the WithReturnValue template
+// as follows:
+
+// WithReturnValue<bool, ArgPack<...> >
+
+#ifndef FST_SCRIPT_ARG_PACKS_H_
+#define FST_SCRIPT_ARG_PACKS_H_
+
+namespace fst {
+namespace script {
+namespace args {
+
+// Sentinel value that means "no arg here."
+class none_type { };
+
+// Base arg pack template class. Specializations follow that allow
+// fewer numbers of arguments (down to 2). If the maximum number of arguments
+// increases, you will need to change three things:
+// 1) Add more template parameters to this template
+// 2) Add more specializations to allow fewer numbers of parameters than
+// the new max.
+// 3) Add extra none_types to all existing specializations to fill
+// the new slots.
+
+
+// 9 args (max)
+template<class T1,
+ class T2 = none_type,
+ class T3 = none_type,
+ class T4 = none_type,
+ class T5 = none_type,
+ class T6 = none_type,
+ class T7 = none_type,
+ class T8 = none_type,
+ class T9 = none_type>
+struct Package {
+ T1 arg1;
+ T2 arg2;
+ T3 arg3;
+ T4 arg4;
+ T5 arg5;
+ T6 arg6;
+ T7 arg7;
+ T8 arg8;
+ T9 arg9;
+
+ Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6,
+ T7 arg7, T8 arg8, T9 arg9) :
+ arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5),
+ arg6(arg6), arg7(arg7), arg8(arg8), arg9(arg9) { }
+};
+
+// 8 args
+template<class T1,
+ class T2,
+ class T3,
+ class T4,
+ class T5,
+ class T6,
+ class T7,
+ class T8>
+struct Package<T1, T2, T3, T4, T5, T6, T7, T8, none_type> {
+ T1 arg1;
+ T2 arg2;
+ T3 arg3;
+ T4 arg4;
+ T5 arg5;
+ T6 arg6;
+ T7 arg7;
+ T8 arg8;
+
+ Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6,
+ T7 arg7, T8 arg8) :
+ arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5),
+ arg6(arg6), arg7(arg7), arg8(arg8) { }
+};
+
+// 7 args
+template<class T1,
+ class T2,
+ class T3,
+ class T4,
+ class T5,
+ class T6,
+ class T7>
+struct Package<T1, T2, T3, T4, T5, T6, T7,
+ none_type, none_type> {
+ T1 arg1;
+ T2 arg2;
+ T3 arg3;
+ T4 arg4;
+ T5 arg5;
+ T6 arg6;
+ T7 arg7;
+
+ Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6,
+ T7 arg7) :
+ arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5),
+ arg6(arg6), arg7(arg7) { }
+};
+
+// 6 args
+template<class T1,
+ class T2,
+ class T3,
+ class T4,
+ class T5,
+ class T6>
+struct Package<T1, T2, T3, T4, T5, T6, none_type,
+ none_type, none_type> {
+ T1 arg1;
+ T2 arg2;
+ T3 arg3;
+ T4 arg4;
+ T5 arg5;
+ T6 arg6;
+
+ Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6) :
+ arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5),
+ arg6(arg6) { }
+};
+
+// 5 args
+template<class T1,
+ class T2,
+ class T3,
+ class T4,
+ class T5>
+struct Package<T1, T2, T3, T4, T5, none_type, none_type,
+ none_type, none_type> {
+ T1 arg1;
+ T2 arg2;
+ T3 arg3;
+ T4 arg4;
+ T5 arg5;
+
+ Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5) :
+ arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5) { }
+};
+
+// 4 args
+template<class T1,
+ class T2,
+ class T3,
+ class T4>
+struct Package<T1, T2, T3, T4, none_type, none_type,
+ none_type, none_type, none_type> {
+ T1 arg1;
+ T2 arg2;
+ T3 arg3;
+ T4 arg4;
+
+ Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4) :
+ arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4) { }
+};
+
+// 3 args
+template<class T1,
+ class T2,
+ class T3>
+struct Package<T1, T2, T3, none_type, none_type,
+ none_type, none_type, none_type,
+ none_type> {
+ T1 arg1;
+ T2 arg2;
+ T3 arg3;
+
+ Package(T1 arg1, T2 arg2, T3 arg3) :
+ arg1(arg1), arg2(arg2), arg3(arg3) { }
+};
+
+// 2 args (minimum)
+template<class T1,
+ class T2>
+struct Package<T1, T2, none_type, none_type,
+ none_type, none_type, none_type,
+ none_type, none_type> {
+ T1 arg1;
+ T2 arg2;
+
+ Package(T1 arg1, T2 arg2) :
+ arg1(arg1), arg2(arg2) { }
+};
+
+// Tack this on to an existing arg pack to add a return value.
+// The syntax for accessing the args is then slightly more stilted,
+// as you must do an extra member access (since the args are stored
+// as a member of this class).
+// The alternative is to declare another slew of templates for functions
+// that return a value, analogous to the above.
+
+template<class Retval, class ArgPackage>
+struct WithReturnValue {
+ Retval retval;
+ const ArgPackage &args;
+
+ explicit WithReturnValue(const ArgPackage &args) : args(args) { }
+};
+
+// We don't want to store a reference to a reference, if ArgPackage is
+// already some reference type.
+template<class Retval, class ArgPackage>
+struct WithReturnValue<Retval, ArgPackage&> {
+ Retval retval;
+ const ArgPackage &args;
+
+ explicit WithReturnValue(const ArgPackage &args) : args(args) { }
+};
+
+} // namespace args
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_ARG_PACKS_H_
diff --git a/src/include/fst/script/closure.h b/src/include/fst/script/closure.h
new file mode 100644
index 0000000..93b5ec3
--- /dev/null
+++ b/src/include/fst/script/closure.h
@@ -0,0 +1,41 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_CLOSURE_H_
+#define FST_SCRIPT_CLOSURE_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/closure.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<MutableFstClass*, const ClosureType> ClosureArgs;
+
+template<class Arc>
+void Closure(ClosureArgs *args) {
+ MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>();
+
+ Closure(fst, args->arg2);
+}
+
+void Closure(MutableFstClass *ofst, ClosureType closure_type);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_CLOSURE_H_
diff --git a/src/include/fst/script/compile-impl.h b/src/include/fst/script/compile-impl.h
new file mode 100644
index 0000000..4aab15b
--- /dev/null
+++ b/src/include/fst/script/compile-impl.h
@@ -0,0 +1,215 @@
+// compile.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to to compile a binary Fst from textual input.
+
+#ifndef FST_SCRIPT_COMPILE_IMPL_H_
+#define FST_SCRIPT_COMPILE_IMPL_H_
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <sstream>
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <iostream>
+#include <fstream>
+#include <fst/fst.h>
+#include <fst/util.h>
+#include <fst/vector-fst.h>
+
+DECLARE_string(fst_field_separator);
+
+namespace fst {
+
+// Compile a binary Fst from textual input, helper class for fstcompile.cc
+// WARNING: Stand-alone use of this class not recommended, most code should
+// read/write using the binary format which is much more efficient.
+template <class A> class FstCompiler {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+
+ // WARNING: use of 'allow_negative_labels = true' not recommended; may
+ // cause conflicts
+ FstCompiler(istream &istrm, const string &source,
+ const SymbolTable *isyms, const SymbolTable *osyms,
+ const SymbolTable *ssyms, bool accep, bool ikeep,
+ bool okeep, bool nkeep, bool allow_negative_labels = false)
+ : nline_(0), source_(source),
+ isyms_(isyms), osyms_(osyms), ssyms_(ssyms),
+ nstates_(0), keep_state_numbering_(nkeep),
+ allow_negative_labels_(allow_negative_labels) {
+ char line[kLineLen];
+ while (istrm.getline(line, kLineLen)) {
+ ++nline_;
+ vector<char *> col;
+ string separator = FLAGS_fst_field_separator + "\n";
+ SplitToVector(line, separator.c_str(), &col, true);
+ if (col.size() == 0 || col[0][0] == '\0') // empty line
+ continue;
+ if (col.size() > 5 ||
+ (col.size() > 4 && accep) ||
+ (col.size() == 3 && !accep)) {
+ FSTERROR() << "FstCompiler: Bad number of columns, source = "
+ << source_
+ << ", line = " << nline_;
+ fst_.SetProperties(kError, kError);
+ return;
+ }
+ StateId s = StrToStateId(col[0]);
+ while (s >= fst_.NumStates())
+ fst_.AddState();
+ if (nline_ == 1)
+ fst_.SetStart(s);
+
+ Arc arc;
+ StateId d = s;
+ switch (col.size()) {
+ case 1:
+ fst_.SetFinal(s, Weight::One());
+ break;
+ case 2:
+ fst_.SetFinal(s, StrToWeight(col[1], true));
+ break;
+ case 3:
+ arc.nextstate = d = StrToStateId(col[1]);
+ arc.ilabel = StrToILabel(col[2]);
+ arc.olabel = arc.ilabel;
+ arc.weight = Weight::One();
+ fst_.AddArc(s, arc);
+ break;
+ case 4:
+ arc.nextstate = d = StrToStateId(col[1]);
+ arc.ilabel = StrToILabel(col[2]);
+ if (accep) {
+ arc.olabel = arc.ilabel;
+ arc.weight = StrToWeight(col[3], false);
+ } else {
+ arc.olabel = StrToOLabel(col[3]);
+ arc.weight = Weight::One();
+ }
+ fst_.AddArc(s, arc);
+ break;
+ case 5:
+ arc.nextstate = d = StrToStateId(col[1]);
+ arc.ilabel = StrToILabel(col[2]);
+ arc.olabel = StrToOLabel(col[3]);
+ arc.weight = StrToWeight(col[4], false);
+ fst_.AddArc(s, arc);
+ }
+ while (d >= fst_.NumStates())
+ fst_.AddState();
+ }
+ if (ikeep)
+ fst_.SetInputSymbols(isyms);
+ if (okeep)
+ fst_.SetOutputSymbols(osyms);
+ }
+
+ const VectorFst<A> &Fst() const {
+ return fst_;
+ }
+
+ private:
+ // Maximum line length in text file.
+ static const int kLineLen = 8096;
+
+ int64 StrToId(const char *s, const SymbolTable *syms,
+ const char *name, bool allow_negative = false) const {
+ int64 n = 0;
+
+ if (syms) {
+ n = syms->Find(s);
+ if (n == -1 || (!allow_negative && n < 0)) {
+ FSTERROR() << "FstCompiler: Symbol \"" << s
+ << "\" is not mapped to any integer " << name
+ << ", symbol table = " << syms->Name()
+ << ", source = " << source_ << ", line = " << nline_;
+ fst_.SetProperties(kError, kError);
+ }
+ } else {
+ char *p;
+ n = strtoll(s, &p, 10);
+ if (p < s + strlen(s) || (!allow_negative && n < 0)) {
+ FSTERROR() << "FstCompiler: Bad " << name << " integer = \"" << s
+ << "\", source = " << source_ << ", line = " << nline_;
+ fst_.SetProperties(kError, kError);
+ }
+ }
+ return n;
+ }
+
+ StateId StrToStateId(const char *s) {
+ StateId n = StrToId(s, ssyms_, "state ID");
+
+ if (keep_state_numbering_)
+ return n;
+
+ // remap state IDs to make dense set
+ typename unordered_map<StateId, StateId>::const_iterator it = states_.find(n);
+ if (it == states_.end()) {
+ states_[n] = nstates_;
+ return nstates_++;
+ } else {
+ return it->second;
+ }
+ }
+
+ StateId StrToILabel(const char *s) const {
+ return StrToId(s, isyms_, "arc ilabel", allow_negative_labels_);
+ }
+
+ StateId StrToOLabel(const char *s) const {
+ return StrToId(s, osyms_, "arc olabel", allow_negative_labels_);
+ }
+
+ Weight StrToWeight(const char *s, bool allow_zero) const {
+ Weight w;
+ istringstream strm(s);
+ strm >> w;
+ if (!strm || (!allow_zero && w == Weight::Zero())) {
+ FSTERROR() << "FstCompiler: Bad weight = \"" << s
+ << "\", source = " << source_ << ", line = " << nline_;
+ fst_.SetProperties(kError, kError);
+ w = Weight::NoWeight();
+ }
+ return w;
+ }
+
+ mutable VectorFst<A> fst_;
+ size_t nline_;
+ string source_; // text FST source name
+ const SymbolTable *isyms_; // ilabel symbol table
+ const SymbolTable *osyms_; // olabel symbol table
+ const SymbolTable *ssyms_; // slabel symbol table
+ unordered_map<StateId, StateId> states_; // state ID map
+ StateId nstates_; // number of seen states
+ bool keep_state_numbering_;
+ bool allow_negative_labels_; // not recommended; may cause conflicts
+
+ DISALLOW_COPY_AND_ASSIGN(FstCompiler);
+};
+
+} // namespace fst
+
+#endif // FST_SCRIPT_COMPILE_IMPL_H_
diff --git a/src/include/fst/script/compile.h b/src/include/fst/script/compile.h
new file mode 100644
index 0000000..bb6ea56
--- /dev/null
+++ b/src/include/fst/script/compile.h
@@ -0,0 +1,92 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_COMPILE_H_
+#define FST_SCRIPT_COMPILE_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/compile-impl.h>
+
+namespace fst {
+namespace script {
+
+// Note: it is safe to pass these strings as references because
+// this struct is only used to pass them deeper in the call graph.
+// Be sure you understand why this is so before using this struct
+// for anything else!
+struct FstCompileArgs {
+ fst::istream &istrm;
+ const string &source;
+ const string &dest;
+ const string &fst_type;
+ const fst::SymbolTable *isyms;
+ const fst::SymbolTable *osyms;
+ const fst::SymbolTable *ssyms;
+ const bool accep;
+ const bool ikeep;
+ const bool okeep;
+ const bool nkeep;
+ const bool allow_negative_labels;
+
+ FstCompileArgs(istream &istrm, const string &source, const string &dest,
+ const string &fst_type, const fst::SymbolTable *isyms,
+ const fst::SymbolTable *osyms,
+ const fst::SymbolTable *ssyms,
+ bool accep, bool ikeep, bool okeep, bool nkeep,
+ bool allow_negative_labels = false) :
+ istrm(istrm), source(source), dest(dest), fst_type(fst_type),
+ isyms(isyms), osyms(osyms), ssyms(ssyms), accep(accep), ikeep(ikeep),
+ okeep(okeep), nkeep(nkeep),
+ allow_negative_labels(allow_negative_labels) { }
+};
+
+template<class Arc>
+void CompileFst(FstCompileArgs *args) {
+ using fst::FstCompiler;
+ using fst::Convert;
+ using fst::Fst;
+
+ FstCompiler<Arc> fstcompiler(args->istrm, args->source, args->isyms,
+ args->osyms, args->ssyms,
+ args->accep, args->ikeep,
+ args->okeep, args->nkeep,
+ args->allow_negative_labels);
+
+ const Fst<Arc> *fst = &fstcompiler.Fst();
+ if (args->fst_type != "vector") {
+ fst = Convert<Arc>(*fst, args->fst_type);
+ if (!fst) {
+ FSTERROR() << "Failed to convert FST to desired type: "
+ << args->fst_type;
+ return;
+ }
+ }
+
+ fst->Write(args->dest);
+}
+
+void CompileFst(istream &istrm, const string &source, const string &dest,
+ const string &fst_type, const string &arc_type,
+ const SymbolTable *isyms,
+ const SymbolTable *osyms, const SymbolTable *ssyms,
+ bool accep, bool ikeep, bool okeep, bool nkeep,
+ bool allow_negative_labels);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_COMPILE_H_
diff --git a/src/include/fst/script/compose.h b/src/include/fst/script/compose.h
new file mode 100644
index 0000000..96375f7
--- /dev/null
+++ b/src/include/fst/script/compose.h
@@ -0,0 +1,63 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_COMPOSE_H_
+#define FST_SCRIPT_COMPOSE_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/compose.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<const FstClass&, const FstClass&,
+ MutableFstClass*, ComposeFilter> ComposeArgs1;
+
+template<class Arc>
+void Compose(ComposeArgs1 *args) {
+ const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>());
+ const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>();
+
+ Compose(ifst1, ifst2, ofst, args->arg4);
+}
+
+typedef fst::ComposeOptions ComposeOptions;
+
+typedef args::Package<const FstClass&, const FstClass&,
+ MutableFstClass*, const ComposeOptions &> ComposeArgs2;
+
+template<class Arc>
+void Compose(ComposeArgs2 *args) {
+ const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>());
+ const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>();
+
+ Compose(ifst1, ifst2, ofst, args->arg4);
+}
+
+void Compose(const FstClass &ifst1, const FstClass &ifst2,
+ MutableFstClass *ofst,
+ const ComposeOptions &opts = fst::script::ComposeOptions());
+
+void Compose(const FstClass &ifst1, const FstClass &ifst2,
+ MutableFstClass *ofst, ComposeFilter compose_filter);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_COMPOSE_H_
diff --git a/src/include/fst/script/concat.h b/src/include/fst/script/concat.h
new file mode 100644
index 0000000..46c4407
--- /dev/null
+++ b/src/include/fst/script/concat.h
@@ -0,0 +1,54 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_CONCAT_H_
+#define FST_SCRIPT_CONCAT_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/concat.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<MutableFstClass*, const FstClass&> ConcatArgs1;
+typedef args::Package<const FstClass&, MutableFstClass*> ConcatArgs2;
+
+template<class Arc>
+void Concat(ConcatArgs1 *args) {
+ MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>();
+ const Fst<Arc> &ifst = *(args->arg2.GetFst<Arc>());
+
+ Concat(ofst, ifst);
+}
+
+template<class Arc>
+void Concat(ConcatArgs2 *args) {
+ const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+
+ Concat(ifst, ofst);
+}
+
+void Concat(MutableFstClass *ofst, const FstClass &ifst);
+void Concat(const FstClass &ifst, MutableFstClass *ofst);
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_CONCAT_H_
diff --git a/src/include/fst/script/connect.h b/src/include/fst/script/connect.h
new file mode 100644
index 0000000..19c4390
--- /dev/null
+++ b/src/include/fst/script/connect.h
@@ -0,0 +1,45 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_CONNECT_H_
+#define FST_SCRIPT_CONNECT_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/dfs-visit.h>
+#include <fst/connect.h>
+
+namespace fst {
+namespace script {
+
+// This function confuses SWIG, because both versions have the same args
+#ifndef SWIG
+template<class Arc>
+void Connect(MutableFstClass *fst) {
+ MutableFst<Arc> *typed_fst = fst->GetMutableFst<Arc>();
+
+ Connect(typed_fst);
+}
+#endif
+
+void Connect(MutableFstClass *fst);
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_CONNECT_H_
diff --git a/src/include/fst/script/convert.h b/src/include/fst/script/convert.h
new file mode 100644
index 0000000..2c70a70
--- /dev/null
+++ b/src/include/fst/script/convert.h
@@ -0,0 +1,49 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_CONVERT_H_
+#define FST_SCRIPT_CONVERT_H_
+
+#include <string>
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<const FstClass&, const string&> ConvertInnerArgs;
+typedef args::WithReturnValue<FstClass*, ConvertInnerArgs> ConvertArgs;
+
+template<class Arc>
+void Convert(ConvertArgs *args) {
+ const Fst<Arc> &fst = *(args->args.arg1.GetFst<Arc>());
+ const string &new_type = args->args.arg2;
+
+ Fst<Arc> *result = Convert(fst, new_type);
+ args->retval = new FstClass(result);
+ delete result;
+}
+
+#ifdef SWIG
+%newobject Convert;
+#endif
+FstClass *Convert(const FstClass& f, const string &new_type);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_CONVERT_H_
diff --git a/src/include/fst/script/decode.h b/src/include/fst/script/decode.h
new file mode 100644
index 0000000..1064ad5
--- /dev/null
+++ b/src/include/fst/script/decode.h
@@ -0,0 +1,46 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_DECODE_H_
+#define FST_SCRIPT_DECODE_H_
+
+#include <string>
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/encode.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<MutableFstClass*, const string&> DecodeArgs;
+
+template<class Arc>
+void Decode(DecodeArgs *args) {
+ MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>();
+
+ EncodeMapper<Arc> *decoder = EncodeMapper<Arc>::Read(args->arg2, DECODE);
+ Decode(ofst, *decoder);
+
+ delete decoder;
+}
+
+void Decode(MutableFstClass *fst, const string &coder_fname);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_DECODE_H_
diff --git a/src/include/fst/script/determinize.h b/src/include/fst/script/determinize.h
new file mode 100644
index 0000000..38fd7ad
--- /dev/null
+++ b/src/include/fst/script/determinize.h
@@ -0,0 +1,68 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_DETERMINIZE_H_
+#define FST_SCRIPT_DETERMINIZE_H_
+
+#include <fst/determinize.h>
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/weight-class.h>
+
+namespace fst {
+namespace script {
+
+struct DeterminizeOptions {
+ float delta;
+ WeightClass weight_threshold;
+ int64 state_threshold;
+ int64 subsequential_label;
+
+ explicit DeterminizeOptions(float d = fst::kDelta,
+ WeightClass w =
+ fst::script::WeightClass::Zero(),
+ int64 n = fst::kNoStateId, int64 l = 0)
+ : delta(d), weight_threshold(w), state_threshold(n),
+ subsequential_label(l) {}
+};
+
+typedef args::Package<const FstClass&, MutableFstClass*,
+ const DeterminizeOptions &> DeterminizeArgs;
+
+template<class Arc>
+void Determinize(DeterminizeArgs *args) {
+ const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+ const DeterminizeOptions &opts = args->arg3;
+
+ fst::DeterminizeOptions<Arc> detargs;
+ detargs.delta = opts.delta;
+ detargs.weight_threshold =
+ *(opts.weight_threshold.GetWeight<typename Arc::Weight>());
+ detargs.state_threshold = opts.state_threshold;
+ detargs.subsequential_label = opts.subsequential_label;
+
+ Determinize(ifst, ofst, detargs);
+}
+
+void Determinize(const FstClass &ifst, MutableFstClass *ofst,
+ const DeterminizeOptions &opts =
+ fst::script::DeterminizeOptions());
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_DETERMINIZE_H_
diff --git a/src/include/fst/script/difference.h b/src/include/fst/script/difference.h
new file mode 100644
index 0000000..76490d4
--- /dev/null
+++ b/src/include/fst/script/difference.h
@@ -0,0 +1,67 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_DIFFERENCE_H_
+#define FST_SCRIPT_DIFFERENCE_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/compose.h> // for ComposeFilter
+#include <fst/difference.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<const FstClass&, const FstClass&,
+ MutableFstClass*, ComposeFilter> DifferenceArgs1;
+
+template<class Arc>
+void Difference(DifferenceArgs1 *args) {
+ const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>());
+ const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>();
+
+ Difference(ifst1, ifst2, ofst, args->arg4);
+}
+
+typedef args::Package<const FstClass&, const FstClass&,
+ MutableFstClass*, const ComposeOptions &> DifferenceArgs2;
+
+template<class Arc>
+void Difference(DifferenceArgs2 *args) {
+ const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>());
+ const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>();
+
+ Difference(ifst1, ifst2, ofst, args->arg4);
+}
+
+
+void Difference(const FstClass &ifst1, const FstClass &ifst2,
+ MutableFstClass *ofst,
+ ComposeFilter compose_filter);
+
+void Difference(const FstClass &ifst1, const FstClass &ifst2,
+ MutableFstClass *ofst,
+ const ComposeOptions &opts = fst::script::ComposeOptions());
+
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_DIFFERENCE_H_
diff --git a/src/include/fst/script/draw-impl.h b/src/include/fst/script/draw-impl.h
new file mode 100644
index 0000000..e346649
--- /dev/null
+++ b/src/include/fst/script/draw-impl.h
@@ -0,0 +1,234 @@
+// draw.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Class to draw a binary FST by producing a text file in dot format,
+// helper class to fstdraw.cc
+
+#ifndef FST_SCRIPT_DRAW_IMPL_H_
+#define FST_SCRIPT_DRAW_IMPL_H_
+
+#include <sstream>
+#include <string>
+
+#include <fst/script/fst-class.h>
+#include <fst/fst.h>
+#include <fst/util.h>
+
+namespace fst {
+
+// Print a binary Fst in the dot textual format, helper class for fstdraw.cc
+// WARNING: Stand-alone use not recommend.
+template <class A> class FstDrawer {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+
+ FstDrawer(const Fst<A> &fst,
+ const SymbolTable *isyms,
+ const SymbolTable *osyms,
+ const SymbolTable *ssyms,
+ bool accep,
+ string title,
+ float width,
+ float height,
+ bool portrait,
+ bool vertical,
+ float ranksep,
+ float nodesep,
+ int fontsize,
+ int precision,
+ bool show_weight_one)
+ : fst_(fst), isyms_(isyms), osyms_(osyms), ssyms_(ssyms),
+ accep_(accep && fst.Properties(kAcceptor, true)), ostrm_(0),
+ title_(title), width_(width), height_(height), portrait_(portrait),
+ vertical_(vertical), ranksep_(ranksep), nodesep_(nodesep),
+ fontsize_(fontsize), precision_(precision),
+ show_weight_one_(show_weight_one) {}
+
+ // Draw Fst to an output buffer (or stdout if buf = 0)
+ void Draw(ostream *strm, const string &dest) {
+ ostrm_ = strm;
+ dest_ = dest;
+ StateId start = fst_.Start();
+ if (start == kNoStateId)
+ return;
+
+ PrintString("digraph FST {\n");
+ if (vertical_)
+ PrintString("rankdir = BT;\n");
+ else
+ PrintString("rankdir = LR;\n");
+ PrintString("size = \"");
+ Print(width_);
+ PrintString(",");
+ Print(height_);
+ PrintString("\";\n");
+ if (!dest_.empty())
+ PrintString("label = \"" + title_ + "\";\n");
+ PrintString("center = 1;\n");
+ if (portrait_)
+ PrintString("orientation = Portrait;\n");
+ else
+ PrintString("orientation = Landscape;\n");
+ PrintString("ranksep = \"");
+ Print(ranksep_);
+ PrintString("\";\n");
+ PrintString("nodesep = \"");
+ Print(nodesep_);
+ PrintString("\";\n");
+ // initial state first
+ DrawState(start);
+ for (StateIterator< Fst<A> > siter(fst_);
+ !siter.Done();
+ siter.Next()) {
+ StateId s = siter.Value();
+ if (s != start)
+ DrawState(s);
+ }
+ PrintString("}\n");
+ }
+
+ private:
+ // Maximum line length in text file.
+ static const int kLineLen = 8096;
+
+ void PrintString(const string &s) const {
+ *ostrm_ << s;
+ }
+
+ // Escapes backslash and double quote if these occur in the string. Dot will
+ // not deal gracefully with these if they are not escaped.
+ inline void EscapeChars(const string &s, string* ns) const {
+ const char* c = s.c_str();
+ while (*c) {
+ if (*c == '\\' || *c == '"') ns->push_back('\\');
+ ns->push_back(*c);
+ ++c;
+ }
+ }
+
+ void PrintId(int64 id, const SymbolTable *syms,
+ const char *name) const {
+ if (syms) {
+ string symbol = syms->Find(id);
+ if (symbol == "") {
+ FSTERROR() << "FstDrawer: Integer " << id
+ << " is not mapped to any textual symbol"
+ << ", symbol table = " << syms->Name()
+ << ", destination = " << dest_;
+ symbol = "?";
+ }
+ string nsymbol;
+ EscapeChars(symbol, &nsymbol);
+ PrintString(nsymbol);
+ } else {
+ ostringstream sid;
+ sid << id;
+ PrintString(sid.str());
+ }
+ }
+
+ void PrintStateId(StateId s) const {
+ PrintId(s, ssyms_, "state ID");
+ }
+
+ void PrintILabel(Label l) const {
+ PrintId(l, isyms_, "arc input label");
+ }
+
+ void PrintOLabel(Label l) const {
+ PrintId(l, osyms_, "arc output label");
+ }
+
+ template <class T>
+ void Print(T t) const {
+ *ostrm_ << t;
+ }
+
+ void DrawState(StateId s) const {
+ Print(s);
+ PrintString(" [label = \"");
+ PrintStateId(s);
+ Weight final = fst_.Final(s);
+ if (final != Weight::Zero()) {
+ if (show_weight_one_ || (final != Weight::One())) {
+ PrintString("/");
+ Print(final);
+ }
+ PrintString("\", shape = doublecircle,");
+ } else {
+ PrintString("\", shape = circle,");
+ }
+ if (s == fst_.Start())
+ PrintString(" style = bold,");
+ else
+ PrintString(" style = solid,");
+ PrintString(" fontsize = ");
+ Print(fontsize_);
+ PrintString("]\n");
+ for (ArcIterator< Fst<A> > aiter(fst_, s);
+ !aiter.Done();
+ aiter.Next()) {
+ Arc arc = aiter.Value();
+ PrintString("\t");
+ Print(s);
+ PrintString(" -> ");
+ Print(arc.nextstate);
+ PrintString(" [label = \"");
+ PrintILabel(arc.ilabel);
+ if (!accep_) {
+ PrintString(":");
+ PrintOLabel(arc.olabel);
+ }
+ if (show_weight_one_ || (arc.weight != Weight::One())) {
+ PrintString("/");
+ Print(arc.weight);
+ }
+ PrintString("\", fontsize = ");
+ Print(fontsize_);
+ PrintString("];\n");
+ }
+ }
+
+ const Fst<A> &fst_;
+ const SymbolTable *isyms_; // ilabel symbol table
+ const SymbolTable *osyms_; // olabel symbol table
+ const SymbolTable *ssyms_; // slabel symbol table
+ bool accep_; // print as acceptor when possible
+ ostream *ostrm_; // drawn FST destination
+ string dest_; // drawn FST destination name
+
+ string title_;
+ float width_;
+ float height_;
+ bool portrait_;
+ bool vertical_;
+ float ranksep_;
+ float nodesep_;
+ int fontsize_;
+ int precision_;
+ bool show_weight_one_;
+
+ DISALLOW_COPY_AND_ASSIGN(FstDrawer);
+};
+
+} // namespace fst
+
+#endif // FST_SCRIPT_DRAW_IMPL_H_
diff --git a/src/include/fst/script/draw.h b/src/include/fst/script/draw.h
new file mode 100644
index 0000000..1611ad1
--- /dev/null
+++ b/src/include/fst/script/draw.h
@@ -0,0 +1,113 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_DRAW_H_
+#define FST_SCRIPT_DRAW_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/draw-impl.h>
+#include <iostream>
+#include <fstream>
+
+namespace fst {
+namespace script {
+
+// Note: it is safe to pass these strings as references because
+// this struct is only used to pass them deeper in the call graph.
+// Be sure you understand why this is so before using this struct
+// for anything else!
+struct FstDrawerArgs {
+ const FstClass &fst;
+ const SymbolTable *isyms;
+ const SymbolTable *osyms;
+ const SymbolTable *ssyms;
+ const bool accep;
+ const string& title;
+ const float width;
+ const float height;
+ const bool portrait;
+ const bool vertical;
+ const float ranksep;
+ const float nodesep;
+ const int fontsize;
+ const int precision;
+ const bool show_weight_one;
+ ostream *ostrm;
+ const string &dest;
+
+ FstDrawerArgs(const FstClass &fst,
+ const SymbolTable *isyms,
+ const SymbolTable *osyms,
+ const SymbolTable *ssyms,
+ bool accep,
+ const string &title,
+ float width,
+ float height,
+ bool portrait,
+ bool vertical,
+ float ranksep,
+ float nodesep,
+ int fontsize,
+ int precision,
+ bool show_weight_one,
+ ostream *ostrm,
+ const string &dest) :
+ fst(fst), isyms(isyms), osyms(osyms), ssyms(ssyms), accep(accep),
+ title(title), width(width), height(height), portrait(portrait),
+ vertical(vertical), ranksep(ranksep), nodesep(nodesep),
+ fontsize(fontsize), precision(precision),
+ show_weight_one(show_weight_one), ostrm(ostrm), dest(dest) { }
+};
+
+
+template<class Arc>
+void DrawFst(FstDrawerArgs *args) {
+ const Fst<Arc> &fst = *(args->fst.GetFst<Arc>());
+
+ FstDrawer<Arc> fstdrawer(fst, args->isyms, args->osyms, args->ssyms,
+ args->accep, args->title, args->width,
+ args->height, args->portrait,
+ args->vertical, args->ranksep,
+ args->nodesep, args->fontsize,
+ args->precision, args->show_weight_one);
+ fstdrawer.Draw(args->ostrm, args->dest);
+}
+
+void DrawFst(const FstClass &fst,
+ const SymbolTable *isyms,
+ const SymbolTable *osyms,
+ const SymbolTable *ssyms,
+ bool accep,
+ const string &title,
+ float width,
+ float height,
+ bool portrait,
+ bool vertical,
+ float ranksep,
+ float nodesep,
+ int fontsize,
+ int precision,
+ bool show_weight_one,
+ ostream *ostrm,
+ const string &dest);
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_DRAW_H_
diff --git a/src/include/fst/script/encode.h b/src/include/fst/script/encode.h
new file mode 100644
index 0000000..dc1a290
--- /dev/null
+++ b/src/include/fst/script/encode.h
@@ -0,0 +1,58 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_ENCODE_H_
+#define FST_SCRIPT_ENCODE_H_
+
+#include <string>
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/encode.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<MutableFstClass*, uint32, bool,
+ const string &> EncodeArgs;
+
+template<class Arc>
+void Encode(EncodeArgs *args) {
+ MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>();
+ bool reuse_encoder = args->arg3;
+ const string &coder_fname = args->arg4;
+ uint32 flags = args->arg2;
+
+ EncodeMapper<Arc> *encoder = reuse_encoder
+ ? EncodeMapper<Arc>::Read(coder_fname, ENCODE)
+ : new EncodeMapper<Arc>(flags, ENCODE);
+
+ Encode(ofst, encoder);
+ if (!args->arg3)
+ encoder->Write(coder_fname);
+
+ delete encoder;
+}
+
+void Encode(MutableFstClass *fst, uint32 flags, bool reuse_encoder,
+ const string &coder_fname);
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_ENCODE_H_
diff --git a/src/include/fst/script/epsnormalize.h b/src/include/fst/script/epsnormalize.h
new file mode 100644
index 0000000..50b12da
--- /dev/null
+++ b/src/include/fst/script/epsnormalize.h
@@ -0,0 +1,44 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_EPSNORMALIZE_H_
+#define FST_SCRIPT_EPSNORMALIZE_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/epsnormalize.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<const FstClass&, MutableFstClass*,
+ EpsNormalizeType> EpsNormalizeArgs;
+
+template<class Arc>
+void EpsNormalize(EpsNormalizeArgs *args) {
+ const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+
+ EpsNormalize(ifst, ofst, args->arg3);
+}
+
+void EpsNormalize(const FstClass &ifst, MutableFstClass *ofst,
+ EpsNormalizeType norm_type = EPS_NORM_INPUT);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_EPSNORMALIZE_H_
diff --git a/src/include/fst/script/equal.h b/src/include/fst/script/equal.h
new file mode 100644
index 0000000..9fb2d3c
--- /dev/null
+++ b/src/include/fst/script/equal.h
@@ -0,0 +1,45 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_EQUAL_H_
+#define FST_SCRIPT_EQUAL_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/equal.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<const FstClass&, const FstClass&, float> EqualInnerArgs;
+typedef args::WithReturnValue<bool, EqualInnerArgs> EqualArgs;
+
+template<class Arc>
+void Equal(EqualArgs *args) {
+ const Fst<Arc> &fst1 = *(args->args.arg1.GetFst<Arc>());
+ const Fst<Arc> &fst2 = *(args->args.arg2.GetFst<Arc>());
+
+ args->retval = Equal(fst1, fst2, args->args.arg3);
+}
+
+bool Equal(const FstClass &fst1, const FstClass &fst2,
+ float delta = kDelta);
+
+} // namespace script
+} // namespace fst
+
+
+#endif // FST_SCRIPT_EQUAL_H_
diff --git a/src/include/fst/script/equivalent.h b/src/include/fst/script/equivalent.h
new file mode 100644
index 0000000..43460c6
--- /dev/null
+++ b/src/include/fst/script/equivalent.h
@@ -0,0 +1,47 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_EQUIVALENT_H_
+#define FST_SCRIPT_EQUIVALENT_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/equivalent.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<const FstClass &, const FstClass &,
+ float> EquivalentInnerArgs;
+typedef args::WithReturnValue<bool, EquivalentInnerArgs> EquivalentArgs;
+
+template<class Arc>
+void Equivalent(EquivalentArgs *args) {
+ const Fst<Arc> &fst1 = *(args->args.arg1.GetFst<Arc>());
+ const Fst<Arc> &fst2 = *(args->args.arg2.GetFst<Arc>());
+
+ args->retval = Equivalent(fst1, fst2, args->args.arg3);
+}
+
+bool Equivalent(const FstClass &fst1, const FstClass &fst2,
+ float delta = kDelta);
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_EQUIVALENT_H_
diff --git a/src/include/fst/script/fst-class.h b/src/include/fst/script/fst-class.h
new file mode 100644
index 0000000..3eacab4
--- /dev/null
+++ b/src/include/fst/script/fst-class.h
@@ -0,0 +1,343 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_FST_CLASS_H_
+#define FST_SCRIPT_FST_CLASS_H_
+
+#include <string>
+
+#include <fst/fst.h>
+#include <fst/mutable-fst.h>
+#include <fst/vector-fst.h>
+#include <iostream>
+#include <fstream>
+
+// Classes to support "boxing" all existing types of FST arcs in a single
+// FstClass which hides the arc types. This allows clients to load
+// and work with FSTs without knowing the arc type.
+
+// These classes are only recommended for use in high-level scripting
+// applications. Most users should use the lower-level templated versions
+// corresponding to these classes.
+
+namespace fst {
+namespace script {
+
+//
+// Abstract base class defining the set of functionalities implemented
+// in all impls, and passed through by all bases Below FstClassBase
+// the class hierarchy bifurcates; FstClassImplBase serves as the base
+// class for all implementations (of which FstClassImpl is currently
+// the only one) and FstClass serves as the base class for all
+// interfaces.
+//
+class FstClassBase {
+ public:
+ virtual const string &ArcType() const = 0;
+ virtual const string &FstType() const = 0;
+ virtual const string &WeightType() const = 0;
+ virtual const SymbolTable *InputSymbols() const = 0;
+ virtual const SymbolTable *OutputSymbols() const = 0;
+ virtual void Write(const string& fname) const = 0;
+ virtual uint64 Properties(uint64 mask, bool test) const = 0;
+ virtual ~FstClassBase() { }
+};
+
+class FstClassImplBase : public FstClassBase {
+ public:
+ virtual FstClassImplBase *Copy() = 0;
+ virtual void SetInputSymbols(SymbolTable *is) = 0;
+ virtual void SetOutputSymbols(SymbolTable *is) = 0;
+ virtual ~FstClassImplBase() { }
+};
+
+
+//
+// CONTAINER CLASS
+// Wraps an Fst<Arc>, hiding its arc type. Whether this Fst<Arc>
+// pointer refers to a special kind of FST (e.g. a MutableFst) is
+// known by the type of interface class that owns the pointer to this
+// container.
+//
+
+template<class Arc>
+class FstClassImpl : public FstClassImplBase {
+ public:
+ explicit FstClassImpl(Fst<Arc> *impl,
+ bool should_own = false) :
+ impl_(should_own ? impl : impl->Copy()) { }
+
+ virtual const string &ArcType() const {
+ return Arc::Type();
+ }
+
+ virtual const string &FstType() const {
+ return impl_->Type();
+ }
+
+ virtual const string &WeightType() const {
+ return Arc::Weight::Type();
+ }
+
+ virtual const SymbolTable *InputSymbols() const {
+ return impl_->InputSymbols();
+ }
+
+ virtual const SymbolTable *OutputSymbols() const {
+ return impl_->OutputSymbols();
+ }
+
+ // Warning: calling this method casts the FST to a mutable FST.
+ virtual void SetInputSymbols(SymbolTable *is) {
+ static_cast<MutableFst<Arc> *>(impl_)->SetInputSymbols(is);
+ }
+
+ // Warning: calling this method casts the FST to a mutable FST.
+ virtual void SetOutputSymbols(SymbolTable *os) {
+ static_cast<MutableFst<Arc> *>(impl_)->SetOutputSymbols(os);
+ }
+
+ virtual void Write(const string &fname) const {
+ impl_->Write(fname);
+ }
+
+ virtual uint64 Properties(uint64 mask, bool test) const {
+ return impl_->Properties(mask, test);
+ }
+
+ virtual ~FstClassImpl() { delete impl_; }
+
+ Fst<Arc> *GetImpl() { return impl_; }
+
+ virtual FstClassImpl *Copy() {
+ return new FstClassImpl<Arc>(impl_);
+ }
+
+ private:
+ Fst<Arc> *impl_;
+};
+
+//
+// BASE CLASS DEFINITIONS
+//
+
+class MutableFstClass;
+
+class FstClass : public FstClassBase {
+ public:
+ template<class Arc>
+ static FstClass *Read(istream &stream,
+ const FstReadOptions &opts) {
+ if (!opts.header) {
+ FSTERROR() << "FstClass::Read: options header not specified";
+ return 0;
+ }
+ const FstHeader &hdr = *opts.header;
+
+ if (hdr.Properties() & kMutable) {
+ return ReadTypedFst<MutableFstClass, MutableFst<Arc> >(stream, opts);
+ } else {
+ return ReadTypedFst<FstClass, Fst<Arc> >(stream, opts);
+ }
+ }
+
+ template<class Arc>
+ explicit FstClass(Fst<Arc> *fst) : impl_(new FstClassImpl<Arc>(fst)) { }
+
+ explicit FstClass(const FstClass &other) : impl_(other.impl_->Copy()) { }
+
+ static FstClass *Read(const string &fname);
+
+ virtual const string &ArcType() const {
+ return impl_->ArcType();
+ }
+
+ virtual const string& FstType() const {
+ return impl_->FstType();
+ }
+
+ virtual const SymbolTable *InputSymbols() const {
+ return impl_->InputSymbols();
+ }
+
+ virtual const SymbolTable *OutputSymbols() const {
+ return impl_->OutputSymbols();
+ }
+
+ virtual const string& WeightType() const {
+ return impl_->WeightType();
+ }
+
+ virtual void Write(const string &fname) const {
+ impl_->Write(fname);
+ }
+
+ virtual uint64 Properties(uint64 mask, bool test) const {
+ return impl_->Properties(mask, test);
+ }
+
+ template<class Arc>
+ const Fst<Arc> *GetFst() const {
+ if (Arc::Type() != ArcType()) {
+ return NULL;
+ } else {
+ FstClassImpl<Arc> *typed_impl = static_cast<FstClassImpl<Arc> *>(impl_);
+ return typed_impl->GetImpl();
+ }
+ }
+
+ virtual ~FstClass() { delete impl_; }
+
+ // These methods are required by IO registration
+ template<class Arc>
+ static FstClassImplBase *Convert(const FstClass &other) {
+ LOG(ERROR) << "Doesn't make sense to convert any class to type FstClass.";
+ return 0;
+ }
+
+ template<class Arc>
+ static FstClassImplBase *Create() {
+ LOG(ERROR) << "Doesn't make sense to create an FstClass with a "
+ << "particular arc type.";
+ return 0;
+ }
+ protected:
+ explicit FstClass(FstClassImplBase *impl) : impl_(impl) { }
+
+ // Generic template method for reading an arc-templated FST of type
+ // UnderlyingT, and returning it wrapped as FstClassT, with appropriate
+ // error checking. Called from arc-templated Read() static methods.
+ template<class FstClassT, class UnderlyingT>
+ static FstClassT* ReadTypedFst(istream &stream,
+ const FstReadOptions &opts) {
+ UnderlyingT *u = UnderlyingT::Read(stream, opts);
+ if (!u) {
+ return 0;
+ } else {
+ FstClassT *r = new FstClassT(u);
+ delete u;
+ return r;
+ }
+ }
+
+ FstClassImplBase *GetImpl() { return impl_; }
+ private:
+ FstClassImplBase *impl_;
+};
+
+//
+// Specific types of FstClass with special properties
+//
+
+class MutableFstClass : public FstClass {
+ public:
+ template<class Arc>
+ explicit MutableFstClass(MutableFst<Arc> *fst) :
+ FstClass(fst) { }
+
+ template<class Arc>
+ MutableFst<Arc> *GetMutableFst() {
+ Fst<Arc> *fst = const_cast<Fst<Arc> *>(this->GetFst<Arc>());
+ MutableFst<Arc> *mfst = static_cast<MutableFst<Arc> *>(fst);
+
+ return mfst;
+ }
+
+ template<class Arc>
+ static MutableFstClass *Read(istream &stream,
+ const FstReadOptions &opts) {
+ MutableFst<Arc> *mfst = MutableFst<Arc>::Read(stream, opts);
+ if (!mfst) {
+ return 0;
+ } else {
+ MutableFstClass *retval = new MutableFstClass(mfst);
+ delete mfst;
+ return retval;
+ }
+ }
+
+ static MutableFstClass *Read(const string &fname, bool convert = false);
+
+ virtual void SetInputSymbols(SymbolTable *is) {
+ GetImpl()->SetInputSymbols(is);
+ }
+
+ virtual void SetOutputSymbols(SymbolTable *os) {
+ GetImpl()->SetOutputSymbols(os);
+ }
+
+ // These methods are required by IO registration
+ template<class Arc>
+ static FstClassImplBase *Convert(const FstClass &other) {
+ LOG(ERROR) << "Doesn't make sense to convert any class to type "
+ << "MutableFstClass.";
+ return 0;
+ }
+
+ template<class Arc>
+ static FstClassImplBase *Create() {
+ LOG(ERROR) << "Doesn't make sense to create a MutableFstClass with a "
+ << "particular arc type.";
+ return 0;
+ }
+
+ protected:
+ explicit MutableFstClass(FstClassImplBase *impl) : FstClass(impl) { }
+};
+
+
+class VectorFstClass : public MutableFstClass {
+ public:
+ explicit VectorFstClass(const FstClass &other);
+ explicit VectorFstClass(const string &arc_type);
+
+ template<class Arc>
+ explicit VectorFstClass(VectorFst<Arc> *fst) :
+ MutableFstClass(fst) { }
+
+ template<class Arc>
+ static VectorFstClass *Read(istream &stream,
+ const FstReadOptions &opts) {
+ VectorFst<Arc> *vfst = VectorFst<Arc>::Read(stream, opts);
+ if (!vfst) {
+ return 0;
+ } else {
+ VectorFstClass *retval = new VectorFstClass(vfst);
+ delete vfst;
+ return retval;
+ }
+ }
+
+ static VectorFstClass *Read(const string &fname);
+
+ // Converter / creator for known arc types
+ template<class Arc>
+ static FstClassImplBase *Convert(const FstClass &other) {
+ return new FstClassImpl<Arc>(new VectorFst<Arc>(
+ *other.GetFst<Arc>()), true);
+ }
+
+ template<class Arc>
+ static FstClassImplBase *Create() {
+ return new FstClassImpl<Arc>(new VectorFst<Arc>(), true);
+ }
+};
+
+} // namespace script
+} // namespace fst
+
+
+#endif // FST_SCRIPT_FST_CLASS_H_
diff --git a/src/include/fst/script/fstscript-decl.h b/src/include/fst/script/fstscript-decl.h
new file mode 100644
index 0000000..fee813e
--- /dev/null
+++ b/src/include/fst/script/fstscript-decl.h
@@ -0,0 +1,35 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+// Forward declarations for the FST and FST-script classes.
+
+#ifndef FST_SCRIPT_FSTSCRIPT_DECL_H_
+#define FST_SCRIPT_FSTSCRIPT_DECL_H_
+
+#include <fst/fst-decl.h>
+
+namespace fst {
+namespace script {
+
+class FstClass;
+class MutableFstClass;
+class VectorFstClass;
+class WeightClass;
+
+} // namespace script
+} // namespace fst;
+
+#endif // FST_SCRIPT_FSTSCRIPT_DECL_H_
diff --git a/src/include/fst/script/fstscript.h b/src/include/fst/script/fstscript.h
new file mode 100644
index 0000000..90e1e75
--- /dev/null
+++ b/src/include/fst/script/fstscript.h
@@ -0,0 +1,154 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+// Convenience file that includes all FstScript functionality
+
+#ifndef FST_SCRIPT_FSTSCRIPT_H_
+#define FST_SCRIPT_FSTSCRIPT_H_
+
+// Major classes
+#include <fst/script/fst-class.h>
+#include <fst/script/weight-class.h>
+#include <fst/script/text-io.h>
+
+// Templates like Operation< >, Apply< >
+#include <fst/script/script-impl.h>
+
+// Operations
+#include <fst/script/arcsort.h>
+#include <fst/script/closure.h>
+#include <fst/script/compile.h>
+#include <fst/script/compose.h>
+#include <fst/script/concat.h>
+#include <fst/script/connect.h>
+#include <fst/script/convert.h>
+#include <fst/script/decode.h>
+#include <fst/script/determinize.h>
+#include <fst/script/difference.h>
+#include <fst/script/draw.h>
+#include <fst/script/encode.h>
+#include <fst/script/epsnormalize.h>
+#include <fst/script/equal.h>
+#include <fst/script/equivalent.h>
+#include <fst/script/info.h>
+#include <fst/script/intersect.h>
+#include <fst/script/invert.h>
+#include <fst/script/map.h>
+#include <fst/script/minimize.h>
+#include <fst/script/print.h>
+#include <fst/script/project.h>
+#include <fst/script/prune.h>
+#include <fst/script/push.h>
+#include <fst/script/randequivalent.h>
+#include <fst/script/randgen.h>
+#include <fst/script/relabel.h>
+#include <fst/script/replace.h>
+#include <fst/script/reverse.h>
+#include <fst/script/reweight.h>
+#include <fst/script/rmepsilon.h>
+#include <fst/script/shortest-distance.h>
+#include <fst/script/shortest-path.h>
+#include <fst/script/symbols.h>
+#include <fst/script/synchronize.h>
+#include <fst/script/topsort.h>
+#include <fst/script/union.h>
+#include <fst/script/verify.h>
+
+//
+// REGISTER OPERATIONS
+//
+
+
+// This class is necessary because registering each of the operations
+// separately overfills the stack, as there's so many of them.
+namespace fst {
+namespace script {
+template<class Arc>
+class AllFstOperationsRegisterer {
+ public:
+ AllFstOperationsRegisterer() {
+ RegisterBatch1();
+ RegisterBatch2();
+ }
+
+ private:
+ void RegisterBatch1() {
+ REGISTER_FST_OPERATION(ArcSort, Arc, ArcSortArgs);
+ REGISTER_FST_OPERATION(Closure, Arc, ClosureArgs);
+ REGISTER_FST_OPERATION(CompileFst, Arc, FstCompileArgs);
+ REGISTER_FST_OPERATION(Compose, Arc, ComposeArgs1);
+ REGISTER_FST_OPERATION(Compose, Arc, ComposeArgs2);
+ REGISTER_FST_OPERATION(Concat, Arc, ConcatArgs1);
+ REGISTER_FST_OPERATION(Concat, Arc, ConcatArgs2);
+ REGISTER_FST_OPERATION(Connect, Arc, MutableFstClass);
+ REGISTER_FST_OPERATION(Convert, Arc, ConvertArgs);
+ REGISTER_FST_OPERATION(Decode, Arc, DecodeArgs);
+ REGISTER_FST_OPERATION(Determinize, Arc, DeterminizeArgs);
+ REGISTER_FST_OPERATION(Difference, Arc, DifferenceArgs1);
+ REGISTER_FST_OPERATION(Difference, Arc, DifferenceArgs2);
+ REGISTER_FST_OPERATION(DrawFst, Arc, FstDrawerArgs);
+ REGISTER_FST_OPERATION(Encode, Arc, EncodeArgs);
+ REGISTER_FST_OPERATION(EpsNormalize, Arc, EpsNormalizeArgs);
+ REGISTER_FST_OPERATION(Equal, Arc, EqualArgs);
+ REGISTER_FST_OPERATION(Equivalent, Arc, EquivalentArgs);
+ REGISTER_FST_OPERATION(PrintFstInfo, Arc, InfoArgs);
+ REGISTER_FST_OPERATION(Intersect, Arc, IntersectArgs1);
+ REGISTER_FST_OPERATION(Intersect, Arc, IntersectArgs2);
+ REGISTER_FST_OPERATION(Invert, Arc, MutableFstClass);
+ REGISTER_FST_OPERATION(Map, Arc, MapArgs);
+ REGISTER_FST_OPERATION(Minimize, Arc, MinimizeArgs);
+ }
+
+ void RegisterBatch2() {
+ REGISTER_FST_OPERATION(PrintFst, Arc, FstPrinterArgs);
+ REGISTER_FST_OPERATION(Project, Arc, ProjectArgs);
+ REGISTER_FST_OPERATION(Prune, Arc, PruneArgs1);
+ REGISTER_FST_OPERATION(Prune, Arc, PruneArgs2);
+ REGISTER_FST_OPERATION(Prune, Arc, PruneArgs3);
+ REGISTER_FST_OPERATION(Prune, Arc, PruneArgs4);
+ REGISTER_FST_OPERATION(Push, Arc, PushArgs1);
+ REGISTER_FST_OPERATION(Push, Arc, PushArgs2);
+ REGISTER_FST_OPERATION(RandEquivalent, Arc, RandEquivalentArgs1);
+ REGISTER_FST_OPERATION(RandEquivalent, Arc, RandEquivalentArgs2);
+ REGISTER_FST_OPERATION(RandGen, Arc, RandGenArgs);
+ REGISTER_FST_OPERATION(Relabel, Arc, RelabelArgs1);
+ REGISTER_FST_OPERATION(Relabel, Arc, RelabelArgs2);
+ REGISTER_FST_OPERATION(Relabel, Arc, RelabelArgs3);
+ REGISTER_FST_OPERATION(Replace, Arc, ReplaceArgs);
+ REGISTER_FST_OPERATION(Reverse, Arc, ReverseArgs);
+ REGISTER_FST_OPERATION(Reweight, Arc, ReweightArgs);
+ REGISTER_FST_OPERATION(RmEpsilon, Arc, RmEpsilonArgs1);
+ REGISTER_FST_OPERATION(RmEpsilon, Arc, RmEpsilonArgs2);
+ REGISTER_FST_OPERATION(RmEpsilon, Arc, RmEpsilonArgs3);
+ REGISTER_FST_OPERATION(ShortestDistance, Arc, ShortestDistanceArgs1);
+ REGISTER_FST_OPERATION(ShortestDistance, Arc, ShortestDistanceArgs2);
+ REGISTER_FST_OPERATION(ShortestDistance, Arc, ShortestDistanceArgs3);
+ REGISTER_FST_OPERATION(ShortestPath, Arc, ShortestPathArgs1);
+ REGISTER_FST_OPERATION(ShortestPath, Arc, ShortestPathArgs2);
+ REGISTER_FST_OPERATION(Synchronize, Arc, SynchronizeArgs);
+ REGISTER_FST_OPERATION(TopSort, Arc, TopSortArgs);
+ REGISTER_FST_OPERATION(Union, Arc, UnionArgs);
+ REGISTER_FST_OPERATION(Verify, Arc, VerifyArgs);
+ }
+};
+} // namespace script
+} // namespace fst
+
+
+#define REGISTER_FST_OPERATIONS(Arc) \
+ AllFstOperationsRegisterer<Arc> register_all_fst_operations ## Arc;
+
+#endif // FST_SCRIPT_FSTSCRIPT_H_
diff --git a/src/include/fst/script/info-impl.h b/src/include/fst/script/info-impl.h
new file mode 100644
index 0000000..408fbcd
--- /dev/null
+++ b/src/include/fst/script/info-impl.h
@@ -0,0 +1,325 @@
+// info.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to compute various information about FSTs, helper class for fstinfo.cc
+
+#ifndef FST_SCRIPT_INFO_IMPL_H_
+#define FST_SCRIPT_INFO_IMPL_H_
+
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/connect.h>
+#include <fst/dfs-visit.h>
+#include <fst/fst.h>
+#include <fst/lookahead-matcher.h>
+#include <fst/matcher.h>
+#include <fst/queue.h>
+#include <fst/test-properties.h>
+#include <fst/verify.h>
+#include <fst/visit.h>
+
+namespace fst {
+
+// Compute various information about FSTs, helper class for fstinfo.cc.
+// WARNING: Stand-alone use of this class is not recommended, most code
+// should call directly the relevant library functions: Fst<A>::NumStates,
+// Fst<A>::NumArcs, TestProperties, ...
+template <class A> class FstInfo {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+
+ // When info_type is "short" (or "auto" and not an ExpandedFst)
+ // then only minimal info is computed and can be requested.
+ FstInfo(const Fst<A> &fst, bool test_properties,
+ const string &arc_filter_type = "any",
+ string info_type = "auto", bool verify = true)
+ : fst_type_(fst.Type()),
+ input_symbols_(fst.InputSymbols() ?
+ fst.InputSymbols()->Name() : "none"),
+ output_symbols_(fst.OutputSymbols() ?
+ fst.OutputSymbols()->Name() : "none"),
+ nstates_(0), narcs_(0), start_(kNoStateId), nfinal_(0),
+ nepsilons_(0), niepsilons_(0), noepsilons_(0),
+ naccess_(0), ncoaccess_(0), nconnect_(0), ncc_(0), nscc_(0),
+ input_match_type_(MATCH_NONE), output_match_type_(MATCH_NONE),
+ input_lookahead_(false), output_lookahead_(false),
+ properties_(0), arc_filter_type_(arc_filter_type), long_info_(true) {
+ if (info_type == "long") {
+ long_info_ = true;
+ } else if (info_type == "short") {
+ long_info_ = false;
+ } else if (info_type == "auto") {
+ long_info_ = fst.Properties(kExpanded, false);
+ } else {
+ FSTERROR() << "Bad info type: " << info_type;
+ return;
+ }
+
+ if (!long_info_)
+ return;
+
+ // If the FST is not sane, we return.
+ if (verify && !Verify(fst)) {
+ FSTERROR() << "FstInfo: Verify: FST not well-formed.";
+ return;
+ }
+
+ start_ = fst.Start();
+ properties_ = fst.Properties(kFstProperties, test_properties);
+
+ for (StateIterator< Fst<A> > siter(fst);
+ !siter.Done();
+ siter.Next()) {
+ ++nstates_;
+ StateId s = siter.Value();
+ if (fst.Final(s) != Weight::Zero())
+ ++nfinal_;
+ for (ArcIterator< Fst<A> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const A &arc = aiter.Value();
+ ++narcs_;
+ if (arc.ilabel == 0 && arc.olabel == 0)
+ ++nepsilons_;
+ if (arc.ilabel == 0)
+ ++niepsilons_;
+ if (arc.olabel == 0)
+ ++noepsilons_;
+ }
+ }
+
+ {
+ vector<StateId> cc;
+ CcVisitor<Arc> cc_visitor(&cc);
+ FifoQueue<StateId> fifo_queue;
+ if (arc_filter_type == "any") {
+ Visit(fst, &cc_visitor, &fifo_queue);
+ } else if (arc_filter_type == "epsilon") {
+ Visit(fst, &cc_visitor, &fifo_queue, EpsilonArcFilter<Arc>());
+ } else if (arc_filter_type == "iepsilon") {
+ Visit(fst, &cc_visitor, &fifo_queue, InputEpsilonArcFilter<Arc>());
+ } else if (arc_filter_type == "oepsilon") {
+ Visit(fst, &cc_visitor, &fifo_queue, OutputEpsilonArcFilter<Arc>());
+ } else {
+ FSTERROR() << "Bad arc filter type: " << arc_filter_type;
+ return;
+ }
+
+ for (StateId s = 0; s < cc.size(); ++s) {
+ if (cc[s] >= ncc_)
+ ncc_ = cc[s] + 1;
+ }
+ }
+
+ {
+ vector<StateId> scc;
+ vector<bool> access, coaccess;
+ uint64 props = 0;
+ SccVisitor<Arc> scc_visitor(&scc, &access, &coaccess, &props);
+ if (arc_filter_type == "any") {
+ DfsVisit(fst, &scc_visitor);
+ } else if (arc_filter_type == "epsilon") {
+ DfsVisit(fst, &scc_visitor, EpsilonArcFilter<Arc>());
+ } else if (arc_filter_type == "iepsilon") {
+ DfsVisit(fst, &scc_visitor, InputEpsilonArcFilter<Arc>());
+ } else if (arc_filter_type == "oepsilon") {
+ DfsVisit(fst, &scc_visitor, OutputEpsilonArcFilter<Arc>());
+ } else {
+ FSTERROR() << "Bad arc filter type: " << arc_filter_type;
+ return;
+ }
+
+ for (StateId s = 0; s < scc.size(); ++s) {
+ if (access[s])
+ ++naccess_;
+ if (coaccess[s])
+ ++ncoaccess_;
+ if (access[s] && coaccess[s])
+ ++nconnect_;
+ if (scc[s] >= nscc_)
+ nscc_ = scc[s] + 1;
+ }
+ }
+
+ LookAheadMatcher< Fst<A> > imatcher(fst, MATCH_INPUT);
+ input_match_type_ = imatcher.Type(test_properties);
+ input_lookahead_ = imatcher.Flags() & kInputLookAheadMatcher;
+
+ LookAheadMatcher< Fst<A> > omatcher(fst, MATCH_OUTPUT);
+ output_match_type_ = omatcher.Type(test_properties);
+ output_lookahead_ = omatcher.Flags() & kOutputLookAheadMatcher;
+ }
+
+ // Short info
+ const string& FstType() const { return fst_type_; }
+ const string& ArcType() const { return A::Type(); }
+ const string& InputSymbols() const { return input_symbols_; }
+ const string& OutputSymbols() const { return output_symbols_; }
+ const bool LongInfo() const { return long_info_; }
+ const string& ArcFilterType() const { return arc_filter_type_; }
+
+ // Long info
+ MatchType InputMatchType() const { CheckLong(); return input_match_type_; }
+ MatchType OutputMatchType() const { CheckLong(); return output_match_type_; }
+ bool InputLookAhead() const { CheckLong(); return input_lookahead_; }
+ bool OutputLookAhead() const { CheckLong(); return output_lookahead_; }
+ int64 NumStates() const { CheckLong(); return nstates_; }
+ int64 NumArcs() const { CheckLong(); return narcs_; }
+ int64 Start() const { CheckLong(); return start_; }
+ int64 NumFinal() const { CheckLong(); return nfinal_; }
+ int64 NumEpsilons() const { CheckLong(); return nepsilons_; }
+ int64 NumInputEpsilons() const { CheckLong(); return niepsilons_; }
+ int64 NumOutputEpsilons() const { CheckLong(); return noepsilons_; }
+ int64 NumAccessible() const { CheckLong(); return naccess_; }
+ int64 NumCoAccessible() const { CheckLong(); return ncoaccess_; }
+ int64 NumConnected() const { CheckLong(); return nconnect_; }
+ int64 NumCc() const { CheckLong(); return ncc_; }
+ int64 NumScc() const { CheckLong(); return nscc_; }
+ uint64 Properties() const { CheckLong(); return properties_; }
+
+ private:
+ void CheckLong() const {
+ if (!long_info_)
+ FSTERROR() << "FstInfo: method only available with long info version";
+ }
+
+ string fst_type_;
+ string input_symbols_;
+ string output_symbols_;
+ int64 nstates_;
+ int64 narcs_;
+ int64 start_;
+ int64 nfinal_;
+ int64 nepsilons_;
+ int64 niepsilons_;
+ int64 noepsilons_;
+ int64 naccess_;
+ int64 ncoaccess_;
+ int64 nconnect_;
+ int64 ncc_;
+ int64 nscc_;
+ MatchType input_match_type_;
+ MatchType output_match_type_;
+ bool input_lookahead_;
+ bool output_lookahead_;
+ uint64 properties_;
+ string arc_filter_type_;
+ bool long_info_;
+ DISALLOW_COPY_AND_ASSIGN(FstInfo);
+};
+
+template <class A>
+void PrintFstInfo(const FstInfo<A> &fstinfo, bool pipe = false) {
+ ostream &os = pipe ? cerr : cout;
+
+ ios_base::fmtflags old = os.setf(ios::left);
+ os.width(50);
+ os << "fst type" << fstinfo.FstType() << endl;
+ os.width(50);
+ os << "arc type" << fstinfo.ArcType() << endl;
+ os.width(50);
+ os << "input symbol table" << fstinfo.InputSymbols() << endl;
+ os.width(50);
+ os << "output symbol table" << fstinfo.OutputSymbols() << endl;
+
+ if (!fstinfo.LongInfo()) {
+ os.setf(old);
+ return;
+ }
+
+ os.width(50);
+ os << "# of states" << fstinfo.NumStates() << endl;
+ os.width(50);
+ os << "# of arcs" << fstinfo.NumArcs() << endl;
+ os.width(50);
+ os << "initial state" << fstinfo.Start() << endl;
+ os.width(50);
+ os << "# of final states" << fstinfo.NumFinal() << endl;
+ os.width(50);
+ os << "# of input/output epsilons" << fstinfo.NumEpsilons() << endl;
+ os.width(50);
+ os << "# of input epsilons" << fstinfo.NumInputEpsilons() << endl;
+ os.width(50);
+ os << "# of output epsilons" << fstinfo.NumOutputEpsilons() << endl;
+ os.width(50);
+
+ string arc_type = "";
+ if (fstinfo.ArcFilterType() == "epsilon")
+ arc_type = "epsilon ";
+ else if (fstinfo.ArcFilterType() == "iepsilon")
+ arc_type = "input-epsilon ";
+ else if (fstinfo.ArcFilterType() == "oepsilon")
+ arc_type = "output-epsilon ";
+
+ string accessible_label = "# of " + arc_type + "accessible states";
+ os.width(50);
+ os << accessible_label << fstinfo.NumAccessible() << endl;
+ string coaccessible_label = "# of " + arc_type + "coaccessible states";
+ os.width(50);
+ os << coaccessible_label << fstinfo.NumCoAccessible() << endl;
+ string connected_label = "# of " + arc_type + "connected states";
+ os.width(50);
+ os << connected_label << fstinfo.NumConnected() << endl;
+ string numcc_label = "# of " + arc_type + "connected components";
+ os.width(50);
+ os << numcc_label << fstinfo.NumCc() << endl;
+ string numscc_label = "# of " + arc_type + "strongly conn components";
+ os.width(50);
+ os << numscc_label << fstinfo.NumScc() << endl;
+
+ os.width(50);
+ os << "input matcher"
+ << (fstinfo.InputMatchType() == MATCH_INPUT ? 'y' :
+ fstinfo.InputMatchType() == MATCH_NONE ? 'n' : '?') << endl;
+ os.width(50);
+ os << "output matcher"
+ << (fstinfo.OutputMatchType() == MATCH_OUTPUT ? 'y' :
+ fstinfo.OutputMatchType() == MATCH_NONE ? 'n' : '?') << endl;
+ os.width(50);
+ os << "input lookahead"
+ << (fstinfo.InputLookAhead() ? 'y' : 'n') << endl;
+ os.width(50);
+ os << "output lookahead"
+ << (fstinfo.OutputLookAhead() ? 'y' : 'n') << endl;
+
+ uint64 prop = 1;
+ for (int i = 0; i < 64; ++i, prop <<= 1) {
+ if (prop & kBinaryProperties) {
+ char value = 'n';
+ if (fstinfo.Properties() & prop) value = 'y';
+ os.width(50);
+ os << PropertyNames[i] << value << endl;
+ } else if (prop & kPosTrinaryProperties) {
+ char value = '?';
+ if (fstinfo.Properties() & prop) value = 'y';
+ else if (fstinfo.Properties() & prop << 1) value = 'n';
+ os.width(50);
+ os << PropertyNames[i] << value << endl;
+ }
+ }
+ os.setf(old);
+}
+
+} // namespace fst
+
+#endif // FST_SCRIPT_INFO_IMPL_H_
diff --git a/src/include/fst/script/info.h b/src/include/fst/script/info.h
new file mode 100644
index 0000000..f434bd5
--- /dev/null
+++ b/src/include/fst/script/info.h
@@ -0,0 +1,48 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_INFO_H_
+#define FST_SCRIPT_INFO_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/info-impl.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<const FstClass&, bool, const string&,
+ const string&, bool, bool> InfoArgs;
+
+template<class Arc>
+void PrintFstInfo(InfoArgs *args) {
+ const Fst<Arc> &fst = *(args->arg1.GetFst<Arc>());
+ FstInfo<Arc> fstinfo(fst, args->arg2, args->arg3,
+ args->arg4, args->arg5);
+ PrintFstInfo(fstinfo, args->arg6);
+
+ if (args->arg6)
+ fst.Write("");
+}
+
+void PrintFstInfo(const FstClass &f, bool test_properties,
+ const string &arc_filter, const string &info_type,
+ bool pipe, bool verify);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_INFO_H_
diff --git a/src/include/fst/script/intersect.h b/src/include/fst/script/intersect.h
new file mode 100644
index 0000000..8011024
--- /dev/null
+++ b/src/include/fst/script/intersect.h
@@ -0,0 +1,65 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_INTERSECT_H_
+#define FST_SCRIPT_INTERSECT_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/intersect.h>
+#include <fst/script/compose.h> // for ComposeOptions, ComposeFilter
+
+namespace fst {
+namespace script {
+
+typedef args::Package<const FstClass&, const FstClass&,
+ MutableFstClass*, ComposeFilter> IntersectArgs1;
+
+template<class Arc>
+void Intersect(IntersectArgs1 *args) {
+ const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>());
+ const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>();
+
+ Intersect(ifst1, ifst2, ofst, args->arg4);
+}
+
+typedef args::Package<const FstClass&, const FstClass&,
+ MutableFstClass*, const ComposeOptions &> IntersectArgs2;
+
+template<class Arc>
+void Intersect(IntersectArgs2 *args) {
+ const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>());
+ const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>();
+
+ Intersect(ifst1, ifst2, ofst, args->arg4);
+}
+
+void Intersect(const FstClass &ifst1, const FstClass &ifst2,
+ MutableFstClass *ofst,
+ ComposeFilter compose_filter);
+
+void Intersect(const FstClass &ifst, const FstClass &ifst2,
+ MutableFstClass *ofst,
+ const ComposeOptions &opts = fst::script::ComposeOptions());
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_INTERSECT_H_
diff --git a/src/include/fst/script/invert.h b/src/include/fst/script/invert.h
new file mode 100644
index 0000000..1befd9f
--- /dev/null
+++ b/src/include/fst/script/invert.h
@@ -0,0 +1,43 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_INVERT_H_
+#define FST_SCRIPT_INVERT_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/invert.h>
+
+namespace fst {
+namespace script {
+
+// The following confuses swig, because it has the same arguments
+// as the non-templated version
+#ifndef SWIG
+template<class Arc>
+void Invert(MutableFstClass *fst) {
+ MutableFst<Arc> *typed_fst = fst->GetMutableFst<Arc>();
+
+ Invert(typed_fst);
+}
+#endif
+
+void Invert(MutableFstClass *fst);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_INVERT_H_
diff --git a/src/include/fst/script/map.h b/src/include/fst/script/map.h
new file mode 100644
index 0000000..2332074
--- /dev/null
+++ b/src/include/fst/script/map.h
@@ -0,0 +1,115 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_MAP_H_
+#define FST_SCRIPT_MAP_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/weight-class.h>
+#include <fst/arc-map.h>
+#include <fst/state-map.h>
+
+namespace fst {
+namespace script {
+
+template <class M>
+Fst<typename M::ToArc> *ArcMap(const Fst<typename M::FromArc> &fst,
+ const M &mapper) {
+ typedef typename M::ToArc ToArc;
+ VectorFst<ToArc> *ofst = new VectorFst<ToArc>;
+ ArcMap(fst, ofst, mapper);
+ return ofst;
+}
+
+template <class M>
+Fst<typename M::ToArc> *StateMap(const Fst<typename M::FromArc> &fst,
+ const M &mapper) {
+ typedef typename M::ToArc ToArc;
+ VectorFst<ToArc> *ofst = new VectorFst<ToArc>;
+ StateMap(fst, ofst, mapper);
+ return ofst;
+}
+
+enum MapType { ARC_SUM_MAPPER, IDENTITY_MAPPER, INVERT_MAPPER, PLUS_MAPPER,
+ QUANTIZE_MAPPER, RMWEIGHT_MAPPER, SUPERFINAL_MAPPER,
+ TIMES_MAPPER, TO_LOG_MAPPER, TO_LOG64_MAPPER, TO_STD_MAPPER };
+
+typedef args::Package<const FstClass&, MapType, float,
+ const WeightClass &> MapInnerArgs;
+typedef args::WithReturnValue<FstClass*, MapInnerArgs> MapArgs;
+
+template <class Arc>
+void Map(MapArgs *args) {
+ const Fst<Arc> &ifst = *(args->args.arg1.GetFst<Arc>());
+ MapType map_type = args->args.arg2;
+ float delta = args->args.arg3;
+ typename Arc::Weight w = *(args->args.arg4.GetWeight<typename Arc::Weight>());
+
+ if (map_type == ARC_SUM_MAPPER) {
+ args->retval = new FstClass(
+ script::StateMap(ifst, ArcSumMapper<Arc>(ifst)));
+ } else if (map_type == IDENTITY_MAPPER) {
+ args->retval = new FstClass(
+ script::ArcMap(ifst, IdentityArcMapper<Arc>()));
+ } else if (map_type == INVERT_MAPPER) {
+ args->retval = new FstClass(
+ script::ArcMap(ifst, InvertWeightMapper<Arc>()));
+ } else if (map_type == PLUS_MAPPER) {
+ args->retval = new FstClass(
+ script::ArcMap(ifst, PlusMapper<Arc>(w)));
+ } else if (map_type == QUANTIZE_MAPPER) {
+ args->retval = new FstClass(
+ script::ArcMap(ifst, QuantizeMapper<Arc>(delta)));
+ } else if (map_type == RMWEIGHT_MAPPER) {
+ args->retval = new FstClass(
+ script::ArcMap(ifst, RmWeightMapper<Arc>()));
+ } else if (map_type == SUPERFINAL_MAPPER) {
+ args->retval = new FstClass(
+ script::ArcMap(ifst, SuperFinalMapper<Arc>()));
+ } else if (map_type == TIMES_MAPPER) {
+ args->retval = new FstClass(
+ script::ArcMap(ifst, TimesMapper<Arc>(w)));
+ } else if (map_type == TO_LOG_MAPPER) {
+ args->retval = new FstClass(
+ script::ArcMap(ifst, WeightConvertMapper<Arc, LogArc>()));
+ } else if (map_type == TO_LOG64_MAPPER) {
+ args->retval = new FstClass(
+ script::ArcMap(ifst, WeightConvertMapper<Arc, Log64Arc>()));
+ } else if (map_type == TO_STD_MAPPER) {
+ args->retval = new FstClass(
+ script::ArcMap(ifst, WeightConvertMapper<Arc, StdArc>()));
+ } else {
+ FSTERROR() << "Error: unknown/unsupported mapper type: "
+ << map_type;
+ VectorFst<Arc> *ofst = new VectorFst<Arc>;
+ ofst->SetProperties(kError, kError);
+ args->retval = new FstClass(ofst);
+ }
+}
+
+
+#ifdef SWIG
+%newobject Map;
+#endif
+FstClass *Map(const FstClass& f, MapType map_type,
+ float delta = fst::kDelta,
+ const WeightClass &w = fst::script::WeightClass::Zero());
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_MAP_H_
diff --git a/src/include/fst/script/minimize.h b/src/include/fst/script/minimize.h
new file mode 100644
index 0000000..f250d03
--- /dev/null
+++ b/src/include/fst/script/minimize.h
@@ -0,0 +1,45 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_MINIMIZE_H_
+#define FST_SCRIPT_MINIMIZE_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/minimize.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<MutableFstClass*, MutableFstClass*, float> MinimizeArgs;
+
+template<class Arc>
+void Minimize(MinimizeArgs *args) {
+ MutableFst<Arc> *ofst1 = args->arg1->GetMutableFst<Arc>();
+ MutableFst<Arc> *ofst2 = args->arg2 ? args->arg2->GetMutableFst<Arc>() : 0;
+
+ Minimize(ofst1, ofst2, args->arg3);
+}
+
+void Minimize(MutableFstClass *ofst1, MutableFstClass *ofst2 = 0,
+ float delta = kDelta);
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_MINIMIZE_H_
diff --git a/src/include/fst/script/print-impl.h b/src/include/fst/script/print-impl.h
new file mode 100644
index 0000000..1433a29
--- /dev/null
+++ b/src/include/fst/script/print-impl.h
@@ -0,0 +1,149 @@
+// print.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Stand-alone class to print out binary FSTs in the AT&T format,
+// helper class for fstprint.cc
+
+#ifndef FST_SCRIPT_PRINT_IMPL_H_
+#define FST_SCRIPT_PRINT_IMPL_H_
+
+#include <sstream>
+#include <string>
+
+#include <fst/fst.h>
+#include <fst/util.h>
+
+DECLARE_string(fst_field_separator);
+
+namespace fst {
+
+// Print a binary Fst in textual format, helper class for fstprint.cc
+// WARNING: Stand-alone use of this class not recommended, most code should
+// read/write using the binary format which is much more efficient.
+template <class A> class FstPrinter {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+
+ FstPrinter(const Fst<A> &fst,
+ const SymbolTable *isyms,
+ const SymbolTable *osyms,
+ const SymbolTable *ssyms,
+ bool accep,
+ bool show_weight_one)
+ : fst_(fst), isyms_(isyms), osyms_(osyms), ssyms_(ssyms),
+ accep_(accep && fst.Properties(kAcceptor, true)), ostrm_(0),
+ show_weight_one_(show_weight_one) {}
+
+ // Print Fst to an output stream
+ void Print(ostream *ostrm, const string &dest) {
+ ostrm_ = ostrm;
+ dest_ = dest;
+ StateId start = fst_.Start();
+ if (start == kNoStateId)
+ return;
+ // initial state first
+ PrintState(start);
+ for (StateIterator< Fst<A> > siter(fst_);
+ !siter.Done();
+ siter.Next()) {
+ StateId s = siter.Value();
+ if (s != start)
+ PrintState(s);
+ }
+ }
+
+ private:
+ // Maximum line length in text file.
+ static const int kLineLen = 8096;
+
+ void PrintId(int64 id, const SymbolTable *syms,
+ const char *name) const {
+ if (syms) {
+ string symbol = syms->Find(id);
+ if (symbol == "") {
+ FSTERROR() << "FstPrinter: Integer " << id
+ << " is not mapped to any textual symbol"
+ << ", symbol table = " << syms->Name()
+ << ", destination = " << dest_;
+ symbol = "?";
+ }
+ *ostrm_ << symbol;
+ } else {
+ *ostrm_ << id;
+ }
+ }
+
+ void PrintStateId(StateId s) const {
+ PrintId(s, ssyms_, "state ID");
+ }
+
+ void PrintILabel(Label l) const {
+ PrintId(l, isyms_, "arc input label");
+ }
+
+ void PrintOLabel(Label l) const {
+ PrintId(l, osyms_, "arc output label");
+ }
+
+ void PrintState(StateId s) const {
+ bool output = false;
+ for (ArcIterator< Fst<A> > aiter(fst_, s);
+ !aiter.Done();
+ aiter.Next()) {
+ Arc arc = aiter.Value();
+ PrintStateId(s);
+ *ostrm_ << FLAGS_fst_field_separator[0];
+ PrintStateId(arc.nextstate);
+ *ostrm_ << FLAGS_fst_field_separator[0];
+ PrintILabel(arc.ilabel);
+ if (!accep_) {
+ *ostrm_ << FLAGS_fst_field_separator[0];
+ PrintOLabel(arc.olabel);
+ }
+ if (show_weight_one_ || arc.weight != Weight::One())
+ *ostrm_ << FLAGS_fst_field_separator[0] << arc.weight;
+ *ostrm_ << "\n";
+ output = true;
+ }
+ Weight final = fst_.Final(s);
+ if (final != Weight::Zero() || !output) {
+ PrintStateId(s);
+ if (show_weight_one_ || final != Weight::One()) {
+ *ostrm_ << FLAGS_fst_field_separator[0] << final;
+ }
+ *ostrm_ << "\n";
+ }
+ }
+
+ const Fst<A> &fst_;
+ const SymbolTable *isyms_; // ilabel symbol table
+ const SymbolTable *osyms_; // olabel symbol table
+ const SymbolTable *ssyms_; // slabel symbol table
+ bool accep_; // print as acceptor when possible
+ ostream *ostrm_; // text FST destination
+ string dest_; // text FST destination name
+ bool show_weight_one_; // print weights equal to Weight::One()
+ DISALLOW_COPY_AND_ASSIGN(FstPrinter);
+};
+
+} // namespace fst
+
+#endif // FST_SCRIPT_PRINT_IMPL_H_
diff --git a/src/include/fst/script/print.h b/src/include/fst/script/print.h
new file mode 100644
index 0000000..f82b19b
--- /dev/null
+++ b/src/include/fst/script/print.h
@@ -0,0 +1,86 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_PRINT_H_
+#define FST_SCRIPT_PRINT_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/print-impl.h>
+
+namespace fst {
+namespace script {
+
+// Note: it is safe to pass these strings as references because
+// this struct is only used to pass them deeper in the call graph.
+// Be sure you understand why this is so before using this struct
+// for anything else!
+struct FstPrinterArgs {
+ const FstClass &fst;
+ const SymbolTable *isyms;
+ const SymbolTable *osyms;
+ const SymbolTable *ssyms;
+ const bool accept;
+ const bool show_weight_one;
+ ostream *ostrm;
+ const string &dest;
+
+ FstPrinterArgs(const FstClass &fst,
+ const SymbolTable *isyms,
+ const SymbolTable *osyms,
+ const SymbolTable *ssyms,
+ bool accept,
+ bool show_weight_one,
+ ostream *ostrm,
+ const string &dest) :
+ fst(fst), isyms(isyms), osyms(osyms), ssyms(ssyms), accept(accept),
+ show_weight_one(show_weight_one), ostrm(ostrm), dest(dest) { }
+};
+
+template<class Arc>
+void PrintFst(FstPrinterArgs *args) {
+ const Fst<Arc> &fst = *(args->fst.GetFst<Arc>());
+
+ fst::FstPrinter<Arc> fstprinter(fst, args->isyms, args->osyms,
+ args->ssyms, args->accept,
+ args->show_weight_one);
+ fstprinter.Print(args->ostrm, args->dest);
+}
+
+void PrintFst(const FstClass &fst, ostream &ostrm, const string &dest,
+ const SymbolTable *isyms,
+ const SymbolTable *osyms,
+ const SymbolTable *ssyms,
+ bool accept, bool show_weight_one);
+
+
+// Below are two printing methods with useful defaults for a few of
+// the fst printer arguments.
+template <class Arc>
+void PrintFst(const Fst<Arc> &fst, ostream &os, const string dest = "",
+ const SymbolTable *isyms = NULL,
+ const SymbolTable *osyms = NULL,
+ const SymbolTable *ssyms = NULL) {
+ fst::FstPrinter<Arc> fstprinter(fst, isyms, osyms, ssyms, true, true);
+ fstprinter.Print(&os, dest);
+}
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_PRINT_H_
diff --git a/src/include/fst/script/project.h b/src/include/fst/script/project.h
new file mode 100644
index 0000000..12ee890
--- /dev/null
+++ b/src/include/fst/script/project.h
@@ -0,0 +1,43 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_PROJECT_H_
+#define FST_SCRIPT_PROJECT_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/project.h> // for ProjectType
+
+namespace fst {
+namespace script {
+
+typedef args::Package<MutableFstClass*, ProjectType> ProjectArgs;
+
+template<class Arc>
+void Project(ProjectArgs *args) {
+ MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>();
+
+ Project(ofst, args->arg2);
+}
+
+void Project(MutableFstClass *ofst, ProjectType project_type);
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_PROJECT_H_
diff --git a/src/include/fst/script/prune.h b/src/include/fst/script/prune.h
new file mode 100644
index 0000000..7118ff1
--- /dev/null
+++ b/src/include/fst/script/prune.h
@@ -0,0 +1,153 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_PRUNE_H_
+#define FST_SCRIPT_PRUNE_H_
+
+#include <vector>
+using std::vector;
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/weight-class.h>
+#include <fst/prune.h>
+#include <fst/arcfilter.h>
+
+namespace fst {
+namespace script {
+
+struct PruneOptions {
+ WeightClass weight_threshold;
+ int64 state_threshold;
+ const vector<WeightClass> *distance;
+ float delta;
+
+ explicit PruneOptions(const WeightClass& w, int64 s,
+ vector<WeightClass> *d = 0, float e = kDelta)
+ : weight_threshold(w),
+ state_threshold(s),
+ distance(d),
+ delta(e) {}
+ private:
+ PruneOptions(); // disallow
+};
+
+// converts a script::PruneOptions into a fst::PruneOptions.
+// Notes:
+// If the original opts.distance is not NULL, a new distance will be
+// created with new; it's the client's responsibility to delete this.
+
+template<class A>
+fst::PruneOptions<A, AnyArcFilter<A> > ConvertPruneOptions(
+ const PruneOptions &opts) {
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ Weight weight_threshold = *(opts.weight_threshold.GetWeight<Weight>());
+ StateId state_threshold = opts.state_threshold;
+ vector<Weight> *distance = 0;
+
+ if (opts.distance) {
+ distance = new vector<Weight>(opts.distance->size());
+ for (unsigned i = 0; i < opts.distance->size(); ++i) {
+ (*distance)[i] = *((*opts.distance)[i].GetWeight<Weight>());
+ }
+ }
+
+ return fst::PruneOptions<A, AnyArcFilter<A> >(
+ weight_threshold, state_threshold, AnyArcFilter<A>(), distance,
+ opts.delta);
+}
+
+// 1
+typedef args::Package<MutableFstClass *, const PruneOptions &> PruneArgs1;
+
+template<class Arc>
+void Prune(PruneArgs1 *args) {
+ MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>();
+
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ fst::PruneOptions<Arc, AnyArcFilter<Arc> > opts =
+ ConvertPruneOptions<Arc>(args->arg2);
+ Prune(ofst, opts);
+ delete opts.distance;
+}
+
+// 2
+typedef args::Package<const FstClass &, MutableFstClass *,
+ const PruneOptions &> PruneArgs2;
+
+template<class Arc>
+void Prune(PruneArgs2 *args) {
+ const Fst<Arc>& ifst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+
+ fst::PruneOptions<Arc, AnyArcFilter<Arc> > opts =
+ ConvertPruneOptions<Arc>(args->arg3);
+ Prune(ifst, ofst, opts);
+ delete opts.distance;
+}
+
+// 3
+typedef args::Package<const FstClass &,
+ MutableFstClass *,
+ const WeightClass &, int64, float> PruneArgs3;
+
+template<class Arc>
+void Prune(PruneArgs3 *args) {
+ const Fst<Arc>& ifst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+ typename Arc::Weight w = *(args->arg3.GetWeight<typename Arc::Weight>());
+
+ Prune(ifst, ofst, w, args->arg4, args->arg5);
+}
+
+// 4
+typedef args::Package<MutableFstClass *, const WeightClass&,
+ int64, float> PruneArgs4;
+template<class Arc>
+void Prune(PruneArgs4 *args) {
+ MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>();
+ typename Arc::Weight w = *(args->arg2.GetWeight<typename Arc::Weight>());
+ Prune(fst, w, args->arg3, args->arg4);
+}
+
+
+// 1
+void Prune(MutableFstClass *fst, const PruneOptions &opts);
+
+// 2
+void Prune(const FstClass &ifst, MutableFstClass *fst,
+ const PruneOptions &opts);
+
+// 3
+void Prune(const FstClass &ifst, MutableFstClass *ofst,
+ const WeightClass &weight_threshold,
+ int64 state_threshold = kNoStateId,
+ float delta = kDelta);
+
+// 4
+void Prune(MutableFstClass *fst, const WeightClass& weight_threshold,
+ int64 state_threshold, float delta);
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_PRUNE_H_
diff --git a/src/include/fst/script/push.h b/src/include/fst/script/push.h
new file mode 100644
index 0000000..cebd655
--- /dev/null
+++ b/src/include/fst/script/push.h
@@ -0,0 +1,70 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_PUSH_H_
+#define FST_SCRIPT_PUSH_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/push.h>
+
+namespace fst {
+namespace script {
+
+// 1
+typedef args::Package<MutableFstClass*, ReweightType, float, bool> PushArgs1;
+
+template<class Arc>
+void Push(PushArgs1 *args) {
+ MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>();
+
+ if (args->arg2 == REWEIGHT_TO_FINAL) {
+ fst::Push(ofst, REWEIGHT_TO_FINAL, args->arg3, args->arg4);
+ } else {
+ fst::Push(ofst, REWEIGHT_TO_INITIAL, args->arg3, args->arg4);
+ }
+}
+
+// 2
+typedef args::Package<const FstClass &, MutableFstClass *, uint32,
+ ReweightType, float> PushArgs2;
+
+template<class Arc>
+void Push(PushArgs2 *args) {
+ const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+
+ if (args->arg4 == REWEIGHT_TO_FINAL) {
+ fst::Push<Arc, REWEIGHT_TO_FINAL>(ifst, ofst, args->arg3, args->arg5);
+ } else {
+ fst::Push<Arc, REWEIGHT_TO_INITIAL>(ifst, ofst, args->arg3, args->arg5);
+ }
+}
+
+// 1
+void Push(MutableFstClass *ofst, ReweightType type, float delta = kDelta,
+ bool remove_total_weight = false);
+
+// 2
+void Push(const FstClass &ifst, MutableFstClass *ofst, uint32 flags,
+ ReweightType dir, float delta);
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_PUSH_H_
diff --git a/src/include/fst/script/randequivalent.h b/src/include/fst/script/randequivalent.h
new file mode 100644
index 0000000..b929683
--- /dev/null
+++ b/src/include/fst/script/randequivalent.h
@@ -0,0 +1,105 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_RANDEQUIVALENT_H_
+#define FST_SCRIPT_RANDEQUIVALENT_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/randgen.h> // for RandArcSelection
+#include <fst/randequivalent.h>
+
+namespace fst {
+namespace script {
+
+// 1
+typedef args::Package<const FstClass&, const FstClass&,
+ int32, float, int, int> RandEquivalentInnerArgs1;
+typedef args::WithReturnValue<bool,
+ RandEquivalentInnerArgs1> RandEquivalentArgs1;
+
+template<class Arc>
+void RandEquivalent(RandEquivalentArgs1 *args) {
+ const Fst<Arc> &fst1 = *(args->args.arg1.GetFst<Arc>());
+ const Fst<Arc> &fst2 = *(args->args.arg2.GetFst<Arc>());
+
+ args->retval = RandEquivalent(fst1, fst2, args->args.arg3, args->args.arg4,
+ args->args.arg5, args->args.arg6);
+}
+
+// 2
+typedef args::Package<const FstClass &, const FstClass &, int32,
+ ssize_t, float,
+ const RandGenOptions<RandArcSelection> &>
+ RandEquivalentInnerArgs2;
+
+typedef args::WithReturnValue<bool,
+ RandEquivalentInnerArgs2> RandEquivalentArgs2;
+
+template<class Arc>
+void RandEquivalent(RandEquivalentArgs2 *args) {
+ const Fst<Arc> &fst1 = *(args->args.arg1.GetFst<Arc>());
+ const Fst<Arc> &fst2 = *(args->args.arg2.GetFst<Arc>());
+ const RandGenOptions<RandArcSelection> &opts = args->args.arg6;
+ int32 seed = args->args.arg3;
+
+ if (opts.arc_selector == UNIFORM_ARC_SELECTOR) {
+ UniformArcSelector<Arc> arc_selector(seed);
+ RandGenOptions< UniformArcSelector<Arc> >
+ ropts(arc_selector, opts.max_length, opts.npath);
+
+ args->retval = RandEquivalent(fst1, fst2, args->args.arg4,
+ args->args.arg5, ropts);
+ } else if (opts.arc_selector == FAST_LOG_PROB_ARC_SELECTOR) {
+ FastLogProbArcSelector<Arc> arc_selector(seed);
+ RandGenOptions< FastLogProbArcSelector<Arc> >
+ ropts(arc_selector, opts.max_length, opts.npath);
+
+ args->retval = RandEquivalent(fst1, fst2, args->args.arg4,
+ args->args.arg5, ropts);
+ } else {
+ LogProbArcSelector<Arc> arc_selector(seed);
+ RandGenOptions< LogProbArcSelector<Arc> >
+ ropts(arc_selector, opts.max_length, opts.npath);
+ args->retval = RandEquivalent(fst1, fst2, args->args.arg4,
+ args->args.arg5, ropts);
+ }
+}
+
+
+// 1
+bool RandEquivalent(const FstClass &fst1,
+ const FstClass &fst2,
+ int32 seed = time(0),
+ ssize_t num_paths = 1,
+ float delta = fst::kDelta,
+ int path_length = INT_MAX);
+
+// 2
+bool RandEquivalent(const FstClass &fst1,
+ const FstClass &fst2,
+ int32 seed,
+ ssize_t num_paths,
+ float delta,
+ const fst::RandGenOptions<
+ fst::script::RandArcSelection> &opts);
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_RANDEQUIVALENT_H_
diff --git a/src/include/fst/script/randgen.h b/src/include/fst/script/randgen.h
new file mode 100644
index 0000000..817f9c1
--- /dev/null
+++ b/src/include/fst/script/randgen.h
@@ -0,0 +1,76 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_RANDGEN_H_
+#define FST_SCRIPT_RANDGEN_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/randgen.h>
+
+namespace fst {
+namespace script {
+
+enum RandArcSelection {
+ UNIFORM_ARC_SELECTOR,
+ LOG_PROB_ARC_SELECTOR,
+ FAST_LOG_PROB_ARC_SELECTOR
+};
+
+typedef args::Package<const FstClass &, MutableFstClass*, int32,
+ const RandGenOptions<RandArcSelection> &> RandGenArgs;
+
+template<class Arc>
+void RandGen(RandGenArgs *args) {
+ const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+ int32 seed = args->arg3;
+ const RandGenOptions<RandArcSelection> &opts = args->arg4;
+
+ if (opts.arc_selector == UNIFORM_ARC_SELECTOR) {
+ UniformArcSelector<Arc> arc_selector(seed);
+ RandGenOptions< UniformArcSelector<Arc> >
+ ropts(arc_selector, opts.max_length,
+ opts.npath, opts.weighted);
+ RandGen(ifst, ofst, ropts);
+ } else if (opts.arc_selector == FAST_LOG_PROB_ARC_SELECTOR) {
+ FastLogProbArcSelector<Arc> arc_selector(seed);
+ RandGenOptions< FastLogProbArcSelector<Arc> >
+ ropts(arc_selector, opts.max_length,
+ opts.npath, opts.weighted);
+ RandGen(ifst, ofst, ropts);
+ } else {
+ LogProbArcSelector<Arc> arc_selector(seed);
+ RandGenOptions< LogProbArcSelector<Arc> >
+ ropts(arc_selector, opts.max_length,
+ opts.npath, opts.weighted);
+ RandGen(ifst, ofst, ropts);
+ }
+}
+
+
+// Client-facing prototype
+void RandGen(const FstClass &ifst, MutableFstClass *ofst, int32 seed = time(0),
+ const RandGenOptions<RandArcSelection> &opts =
+ fst::RandGenOptions<fst::script::RandArcSelection>(
+ fst::script::UNIFORM_ARC_SELECTOR));
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_RANDGEN_H_
diff --git a/src/include/fst/script/register.h b/src/include/fst/script/register.h
new file mode 100644
index 0000000..03e0e36
--- /dev/null
+++ b/src/include/fst/script/register.h
@@ -0,0 +1,120 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_REGISTER_H_
+#define FST_SCRIPT_REGISTER_H_
+
+#include <string>
+
+#include <fst/generic-register.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/weight-class.h>
+
+// Holds methods and classes responsible for maintaining
+// the register for FstClass arc types.
+
+namespace fst {
+namespace script {
+
+//
+// Registers for reading and converting various kinds of FST classes.
+//
+
+// This class definition is to avoid a nested class definition inside
+// the IORegistration struct.
+template<class Reader, class Creator, class Converter>
+struct FstClassRegEntry {
+ Reader reader;
+ Creator creator;
+ Converter converter;
+
+ FstClassRegEntry(Reader r, Creator cr, Converter co) :
+ reader(r), creator(cr), converter(co) { }
+ FstClassRegEntry() : reader(0), creator(0), converter(0) { }
+};
+
+template<class Reader, class Creator, class Converter>
+class FstClassIORegister
+ : public GenericRegister<string,
+ FstClassRegEntry<Reader, Creator, Converter>,
+ FstClassIORegister<Reader, Creator,
+ Converter> > {
+ public:
+ Reader GetReader(const string &arc_type) const {
+ return this->GetEntry(arc_type).reader;
+ }
+
+ Creator GetCreator(const string &arc_type) const {
+ return this->GetEntry(arc_type).creator;
+ }
+
+ Converter GetConverter(const string &arc_type) const {
+ return this->GetEntry(arc_type).converter;
+ }
+
+ protected:
+ virtual string ConvertKeyToSoFilename(
+ const string& key) const {
+ string legal_type(key);
+ ConvertToLegalCSymbol(&legal_type);
+
+ return legal_type + "-arc.so";
+ }
+};
+
+//
+// Struct containing everything needed to register a particular type
+// of FST class (e.g. a plain FstClass, or a MutableFstClass, etc)
+//
+template<class FstClassType>
+struct IORegistration {
+ typedef FstClassType *(*Reader)(istream &stream,
+ const FstReadOptions &opts);
+
+ typedef FstClassImplBase *(*Creator)();
+ typedef FstClassImplBase *(*Converter)(const FstClass &other);
+
+ typedef FstClassRegEntry<Reader, Creator, Converter> Entry;
+
+ // FST class Register
+ typedef FstClassIORegister<Reader, Creator, Converter> Register;
+
+ // FST class Register-er
+ typedef GenericRegisterer<FstClassIORegister<Reader, Creator, Converter> >
+ Registerer;
+};
+
+
+//
+// REGISTRATION MACROS
+//
+
+#define REGISTER_FST_CLASS(Class, Arc) \
+ static IORegistration<Class>::Registerer Class ## _ ## Arc ## _registerer( \
+ Arc::Type(), \
+ IORegistration<Class>::Entry(Class::Read<Arc>, \
+ Class::Create<Arc>, \
+ Class::Convert<Arc>))
+
+#define REGISTER_FST_CLASSES(Arc) \
+ REGISTER_FST_CLASS(FstClass, Arc); \
+ REGISTER_FST_CLASS(MutableFstClass, Arc); \
+ REGISTER_FST_CLASS(VectorFstClass, Arc);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_REGISTER_H_
diff --git a/src/include/fst/script/relabel.h b/src/include/fst/script/relabel.h
new file mode 100644
index 0000000..6bbb4c5
--- /dev/null
+++ b/src/include/fst/script/relabel.h
@@ -0,0 +1,102 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_RELABEL_H_
+#define FST_SCRIPT_RELABEL_H_
+
+#include <utility>
+using std::pair; using std::make_pair;
+#include <algorithm>
+#include <vector>
+using std::vector;
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/relabel.h>
+
+namespace fst {
+namespace script {
+
+// 1
+typedef args::Package<MutableFstClass *,
+ const SymbolTable *, const SymbolTable *, bool,
+ const SymbolTable *, const SymbolTable *,
+ bool> RelabelArgs1;
+
+template<class Arc>
+void Relabel(RelabelArgs1 *args) {
+ MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>();
+
+ Relabel(ofst, args->arg2, args->arg3, args->arg4,
+ args->arg5, args->arg6, args->arg7);
+}
+
+// 2
+typedef args::Package<MutableFstClass*,
+ const vector<pair<int64, int64> > &,
+ const vector<pair<int64, int64> > > RelabelArgs2;
+
+template<class Arc>
+void Relabel(RelabelArgs2 *args) {
+ MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>();
+
+ // In case int64 is not the same as Arc::Label,
+ // copy the reassignments
+ typedef typename Arc::Label Label;
+
+ vector<pair<Label, Label> > converted_ipairs(args->arg2.size());
+ copy(args->arg2.begin(), args->arg2.end(), converted_ipairs.begin());
+
+ vector<pair<Label, Label> > converted_opairs(args->arg3.size());
+ copy(args->arg3.begin(), args->arg3.end(), converted_opairs.begin());
+
+ Relabel(ofst, converted_ipairs, converted_opairs);
+}
+
+// 3
+typedef args::Package<MutableFstClass*, const SymbolTable*,
+ const SymbolTable*> RelabelArgs3;
+template<class Arc>
+void Relabel(args::Package<MutableFstClass*, const SymbolTable*,
+ const SymbolTable*> *args) {
+ MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>();
+ Relabel(fst, args->arg2, args->arg3);
+}
+
+
+// 1
+void Relabel(MutableFstClass *ofst,
+ const SymbolTable *old_isyms, const SymbolTable *relabel_isyms,
+ bool attach_new_isyms,
+ const SymbolTable *old_osyms, const SymbolTable *relabel_osyms,
+ bool attch_new_osyms);
+
+// 2
+void Relabel(MutableFstClass *ofst,
+ const vector<pair<int64, int64> > &ipairs,
+ const vector<pair<int64, int64> > &opairs);
+
+
+// 3
+void Relabel(MutableFstClass *fst,
+ const SymbolTable *new_isymbols,
+ const SymbolTable *new_osymbols);
+
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_RELABEL_H_
diff --git a/src/include/fst/script/replace.h b/src/include/fst/script/replace.h
new file mode 100644
index 0000000..5eaf5bf
--- /dev/null
+++ b/src/include/fst/script/replace.h
@@ -0,0 +1,62 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_REPLACE_H_
+#define FST_SCRIPT_REPLACE_H_
+
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/replace.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<const vector<pair<int64, const FstClass *> > &,
+ MutableFstClass *, const int64, bool> ReplaceArgs;
+
+template<class Arc>
+void Replace(ReplaceArgs *args) {
+ // Now that we know the arc type, we construct a vector of
+ // pair<real label, real fst> that the real Replace will use
+ const vector<pair<int64, const FstClass *> >& untyped_tuples =
+ args->arg1;
+
+ vector<pair<typename Arc::Label, const Fst<Arc> *> > fst_tuples(
+ untyped_tuples.size());
+
+ for (unsigned i = 0; i < untyped_tuples.size(); ++i) {
+ fst_tuples[i].first = untyped_tuples[i].first; // convert label
+ fst_tuples[i].second = untyped_tuples[i].second->GetFst<Arc>();
+ }
+
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+
+ Replace(fst_tuples, ofst, args->arg3, args->arg4);
+}
+
+void Replace(const vector<pair<int64, const FstClass *> > &tuples,
+ MutableFstClass *ofst, const int64 &root,
+ bool epsilon_on_replace = false);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_REPLACE_H_
diff --git a/src/include/fst/script/reverse.h b/src/include/fst/script/reverse.h
new file mode 100644
index 0000000..3930875
--- /dev/null
+++ b/src/include/fst/script/reverse.h
@@ -0,0 +1,42 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_REVERSE_H_
+#define FST_SCRIPT_REVERSE_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/reverse.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<const FstClass &, MutableFstClass *> ReverseArgs;
+
+template<class Arc>
+void Reverse(ReverseArgs *args) {
+ const Fst<Arc> &fst1 = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *fst2 = args->arg2->GetMutableFst<Arc>();
+
+ Reverse(fst1, fst2);
+}
+
+void Reverse(const FstClass &fst1, MutableFstClass *fst2);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_REVERSE_H_
diff --git a/src/include/fst/script/reweight.h b/src/include/fst/script/reweight.h
new file mode 100644
index 0000000..7bce839
--- /dev/null
+++ b/src/include/fst/script/reweight.h
@@ -0,0 +1,53 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_REWEIGHT_H_
+#define FST_SCRIPT_REWEIGHT_H_
+
+#include <vector>
+using std::vector;
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/weight-class.h>
+#include <fst/reweight.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<MutableFstClass *, const vector<WeightClass> &,
+ ReweightType> ReweightArgs;
+
+template<class Arc>
+void Reweight(ReweightArgs *args) {
+ MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>();
+ typedef typename Arc::Weight Weight;
+ vector<Weight> potentials(args->arg2.size());
+
+ for (unsigned i = 0; i < args->arg2.size(); ++i) {
+ potentials[i] = *(args->arg2[i].GetWeight<Weight>());
+ }
+
+ Reweight(fst, potentials, args->arg3);
+}
+
+void Reweight(MutableFstClass *fst, const vector<WeightClass> &potential,
+ ReweightType reweight_type);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_REWEIGHT_H_
diff --git a/src/include/fst/script/rmepsilon.h b/src/include/fst/script/rmepsilon.h
new file mode 100644
index 0000000..62fed03
--- /dev/null
+++ b/src/include/fst/script/rmepsilon.h
@@ -0,0 +1,211 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_RMEPSILON_H_
+#define FST_SCRIPT_RMEPSILON_H_
+
+#include <vector>
+using std::vector;
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/weight-class.h>
+#include <fst/script/shortest-distance.h> // for ShortestDistanceOptions
+#include <fst/rmepsilon.h>
+#include <fst/queue.h>
+
+// the following is necessary, or SWIG complains mightily about
+// shortestdistanceoptions not being defined before being used as a base.
+#ifdef SWIG
+%include "nlp/fst/script/shortest-distance.h"
+#endif
+
+
+namespace fst {
+namespace script {
+
+//
+// OPTIONS
+//
+
+struct RmEpsilonOptions : public fst::script::ShortestDistanceOptions {
+ bool connect;
+ WeightClass weight_threshold;
+ int64 state_threshold;
+
+ RmEpsilonOptions(QueueType qt = AUTO_QUEUE, float d = kDelta, bool c = true,
+ WeightClass w = fst::script::WeightClass::Zero(),
+ int64 n = kNoStateId)
+ : ShortestDistanceOptions(qt, EPSILON_ARC_FILTER,
+ kNoStateId, d),
+ connect(c), weight_threshold(w), state_threshold(n) { }
+};
+
+
+//
+// TEMPLATES
+//
+
+// this function takes care of transforming a script-land RmEpsilonOptions
+// into a lib-land RmEpsilonOptions
+template<class Arc>
+void RmEpsilonHelper(MutableFst<Arc> *fst,
+ vector<typename Arc::Weight> *distance,
+ const RmEpsilonOptions &opts) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ typename Arc::Weight weight_thresh =
+ *(opts.weight_threshold.GetWeight<Weight>());
+
+ switch (opts.queue_type) {
+ case AUTO_QUEUE: {
+ AutoQueue<StateId> queue(*fst, distance, EpsilonArcFilter<Arc>());
+ fst::RmEpsilonOptions<Arc, AutoQueue<StateId> > ropts(
+ &queue, opts.delta, opts.connect, weight_thresh,
+ opts.state_threshold);
+ RmEpsilon(fst, distance, ropts);
+ break;
+ }
+ case FIFO_QUEUE: {
+ FifoQueue<StateId> queue;
+ fst::RmEpsilonOptions<Arc, FifoQueue<StateId> > ropts(
+ &queue, opts.delta, opts.connect, weight_thresh,
+ opts.state_threshold);
+ RmEpsilon(fst, distance, ropts);
+ break;
+ }
+ case LIFO_QUEUE: {
+ LifoQueue<StateId> queue;
+ fst::RmEpsilonOptions<Arc, LifoQueue<StateId> > ropts(
+ &queue, opts.delta, opts.connect, weight_thresh,
+ opts.state_threshold);
+ RmEpsilon(fst, distance, ropts);
+ break;
+ }
+ case SHORTEST_FIRST_QUEUE: {
+ NaturalShortestFirstQueue<StateId, Weight> queue(*distance);
+ fst::RmEpsilonOptions<Arc, NaturalShortestFirstQueue<StateId,
+ Weight> > ropts(
+ &queue, opts.delta, opts.connect, weight_thresh,
+ opts.state_threshold);
+ RmEpsilon(fst, distance, ropts);
+ break;
+ }
+ case STATE_ORDER_QUEUE: {
+ StateOrderQueue<StateId> queue;
+ fst::RmEpsilonOptions<Arc, StateOrderQueue<StateId> > ropts(
+ &queue, opts.delta, opts.connect, weight_thresh,
+ opts.state_threshold);
+ RmEpsilon(fst, distance, ropts);
+ break;
+ }
+ case TOP_ORDER_QUEUE: {
+ TopOrderQueue<StateId> queue(*fst, EpsilonArcFilter<Arc>());
+ fst::RmEpsilonOptions<Arc, TopOrderQueue<StateId> > ropts(
+ &queue, opts.delta, opts.connect, weight_thresh,
+ opts.state_threshold);
+ RmEpsilon(fst, distance, ropts);
+ break;
+ }
+ default:
+ FSTERROR() << "Unknown or unsupported queue type: " << opts.queue_type;
+ fst->SetProperties(kError, kError);
+ }
+}
+
+// 1
+typedef args::Package<const FstClass &, MutableFstClass *,
+ bool, const RmEpsilonOptions &> RmEpsilonArgs1;
+
+template<class Arc>
+void RmEpsilon(RmEpsilonArgs1 *args) {
+ const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+ vector<typename Arc::Weight> distance;
+ bool reverse = args->arg3;
+
+ if (reverse) {
+ VectorFst<Arc> rfst;
+ Reverse(ifst, &rfst);
+ RmEpsilonHelper(&rfst, &distance, args->arg4);
+ Reverse(rfst, ofst);
+ } else {
+ *ofst = ifst;
+ }
+ RmEpsilonHelper(ofst, &distance, args->arg4);
+}
+
+// 2
+typedef args::Package<MutableFstClass *, bool,
+ const WeightClass, int64,
+ float> RmEpsilonArgs2;
+
+template<class Arc>
+void RmEpsilon(RmEpsilonArgs2 *args) {
+ MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>();
+ typename Arc::Weight w = *(args->arg3.GetWeight<typename Arc::Weight>());
+
+ RmEpsilon(fst, args->arg2, w, args->arg4, args->arg5);
+}
+
+// 3
+typedef args::Package<MutableFstClass *, vector<WeightClass> *,
+ const RmEpsilonOptions &> RmEpsilonArgs3;
+
+template<class Arc>
+void RmEpsilon(RmEpsilonArgs3 *args) {
+ MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>();
+ const RmEpsilonOptions &opts = args->arg3;
+
+ vector<typename Arc::Weight> weights;
+
+ RmEpsilonHelper(fst, &weights, opts);
+
+ // Copy the weights back
+ args->arg2->resize(weights.size());
+ for (unsigned i = 0; i < weights.size(); ++i) {
+ (*args->arg2)[i] = WeightClass(weights[i]);
+ }
+}
+
+//
+// PROTOTYPES
+//
+
+// 1
+void RmEpsilon(const FstClass &ifst, MutableFstClass *ofst,
+ bool reverse = false,
+ const RmEpsilonOptions& opts =
+ fst::script::RmEpsilonOptions());
+
+// 2
+void RmEpsilon(MutableFstClass *arc, bool connect = true,
+ const WeightClass &weight_threshold =
+ fst::script::WeightClass::Zero(),
+ int64 state_threshold = fst::kNoStateId,
+ float delta = fst::kDelta);
+
+// 3
+void RmEpsilon(MutableFstClass *fst, vector<WeightClass> *distance,
+ const RmEpsilonOptions &opts);
+
+
+} // namespace script
+} // namespace fst
+
+
+#endif // FST_SCRIPT_RMEPSILON_H_
diff --git a/src/include/fst/script/script-impl.h b/src/include/fst/script/script-impl.h
new file mode 100644
index 0000000..452c7c5
--- /dev/null
+++ b/src/include/fst/script/script-impl.h
@@ -0,0 +1,206 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+// This file defines the registration mechanism for new operations.
+// These operations are designed to enable scripts to work with FST classes
+// at a high level.
+
+// If you have a new arc type and want these operations to work with FSTs
+// with that arc type, see below for the registration steps
+// you must take.
+
+// These methods are only recommended for use in high-level scripting
+// applications. Most users should use the lower-level templated versions
+// corresponding to these.
+
+// If you have a new arc type you'd like these operations to work with,
+// use the REGISTER_FST_OPERATIONS macro defined in fstcsript.h
+
+// If you have a custom operation you'd like to define, you need four
+// components. In the following, assume you want to create a new operation
+// with the signature
+//
+// void Foo(const FstClass &ifst, MutableFstClass *ofst);
+//
+// You need:
+//
+// 1) A way to bundle the args that your new Foo operation will take, as
+// a single struct. The template structs in arg-packs.h provide a handy
+// way to do this. In Foo's case, that might look like this:
+//
+// typedef args::Package<const FstClass &,
+// MutableFstClass *> FooArgs;
+//
+// Note: this package of args is going to be passed by non-const pointer.
+//
+// 2) A function template that is able to perform Foo, given the args and
+// arc type. Yours might look like this:
+//
+// template<class Arc>
+// void Foo(FooArgs *args) {
+// // Pull out the actual, arc-templated FSTs
+// const Fst<Arc> &ifst = args->arg1.GetFst<Arc>();
+// MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+//
+// // actually perform foo on ifst and ofst...
+// }
+//
+// 3) a client-facing function for your operation. This would look like
+// the following:
+//
+// void Foo(const FstClass &ifst, MutableFstClass *ofst) {
+// // Check that the arc types of the FSTs match
+// if (!ArcTypesMatch(ifst, *ofst, "Foo")) return;
+// // package the args
+// FooArgs args(ifst, ofst);
+// // Finally, call the operation
+// Apply<Operation<FooArgs> >("Foo", ifst->ArcType(), &args);
+// }
+//
+// The Apply<> function template takes care of the link between 2 and 3,
+// provided you also have:
+//
+// 4) A registration for your new operation, on the arc types you care about.
+// This can be provided easily by the REGISTER_FST_OPERATION macro in
+// operations.h:
+//
+// REGISTER_FST_OPERATION(Foo, StdArc, FooArgs);
+// REGISTER_FST_OPERATION(Foo, MyArc, FooArgs);
+// // .. etc
+//
+//
+// That's it! Now when you call Foo(const FstClass &, MutableFstClass *),
+// it dispatches (in #3) via the Apply<> function to the correct
+// instantiation of the template function in #2.
+//
+
+
+#ifndef FST_SCRIPT_SCRIPT_IMPL_H_
+#define FST_SCRIPT_SCRIPT_IMPL_H_
+
+//
+// This file contains general-purpose templates which are used in the
+// implementation of the operations.
+//
+
+#include <utility>
+using std::pair; using std::make_pair;
+#include <string>
+
+#include <fst/script/fst-class.h>
+#include <fst/generic-register.h>
+#include <fst/script/arg-packs.h>
+
+#include <fst/types.h>
+
+namespace fst {
+namespace script {
+
+//
+// A generic register for operations with various kinds of signatures.
+// Needed since every function signature requires a new registration class.
+// The pair<string, string> is understood to be the operation name and arc
+// type; subclasses (or typedefs) need only provide the operation signature.
+//
+
+template<class OperationSignature>
+class GenericOperationRegister
+ : public GenericRegister<pair<string, string>,
+ OperationSignature,
+ GenericOperationRegister<OperationSignature> > {
+ public:
+ void RegisterOperation(const string &operation_name,
+ const string &arc_type,
+ OperationSignature op) {
+ this->SetEntry(make_pair(operation_name, arc_type), op);
+ }
+
+ OperationSignature GetOperation(
+ const string &operation_name, const string &arc_type) {
+ return this->GetEntry(make_pair(operation_name, arc_type));
+ }
+
+ protected:
+ virtual string ConvertKeyToSoFilename(
+ const pair<string, string>& key) const {
+ // Just use the old-style FST for now.
+ string legal_type(key.second); // the arc type
+ ConvertToLegalCSymbol(&legal_type);
+
+ return legal_type + "-arc.so";
+ }
+};
+
+
+// Operation package - everything you need to register a new type of operation
+
+// The ArgPack should be the type that's passed into each wrapped function -
+// for instance, it might be a struct containing all the args.
+// It's always passed by pointer, so const members should be used to enforce
+// constness where it's needed. Return values should be implemented as a
+// member of ArgPack as well.
+
+template<class ArgPack>
+struct Operation {
+ typedef ArgPack Args;
+ typedef void (*OpType)(ArgPack *args);
+
+ // The register (hash) type
+ typedef GenericOperationRegister<OpType> Register;
+
+ // The register-er type
+ typedef GenericRegisterer<Register> Registerer;
+};
+
+
+// Macro for registering new types of operations.
+
+#define REGISTER_FST_OPERATION(Op, Arc, ArgPack) \
+ static fst::script::Operation<ArgPack>::Registerer \
+ arc_dispatched_operation_ ## ArgPack ## Op ## Arc ## _registerer( \
+ make_pair(#Op, Arc::Type()), Op<Arc>)
+
+
+//
+// Template function to apply an operation by name
+//
+
+template<class OpReg>
+void Apply(const string &op_name, const string &arc_type,
+ typename OpReg::Args *args) {
+ typename OpReg::Register *reg = OpReg::Register::GetRegister();
+
+ typename OpReg::OpType op = reg->GetOperation(op_name, arc_type);
+
+ if (op == 0) {
+ FSTERROR() << "No operation found for \"" << op_name << "\" on "
+ << "arc type " << arc_type;
+ return;
+ }
+
+ op(args);
+}
+
+
+// Helper that logs to ERROR if the arc types of a and b don't match.
+// The op_name is also printed.
+bool ArcTypesMatch(const FstClass &a, const FstClass &b,
+ const string &op_name);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_SCRIPT_IMPL_H_
diff --git a/src/include/fst/script/shortest-distance.h b/src/include/fst/script/shortest-distance.h
new file mode 100644
index 0000000..5fc2976
--- /dev/null
+++ b/src/include/fst/script/shortest-distance.h
@@ -0,0 +1,250 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_SHORTEST_DISTANCE_H_
+#define FST_SCRIPT_SHORTEST_DISTANCE_H_
+
+#include <vector>
+using std::vector;
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/weight-class.h>
+#include <fst/script/prune.h> // for ArcFilterType
+#include <fst/queue.h> // for QueueType
+#include <fst/shortest-distance.h>
+
+namespace fst {
+namespace script {
+
+enum ArcFilterType { ANY_ARC_FILTER, EPSILON_ARC_FILTER,
+ INPUT_EPSILON_ARC_FILTER, OUTPUT_EPSILON_ARC_FILTER };
+
+// See nlp/fst/lib/shortest-distance.h for the template options class
+// that this one shadows
+struct ShortestDistanceOptions {
+ const QueueType queue_type;
+ const ArcFilterType arc_filter_type;
+ const int64 source;
+ const float delta;
+ const bool first_path;
+
+ ShortestDistanceOptions(QueueType qt, ArcFilterType aft, int64 s,
+ float d)
+ : queue_type(qt), arc_filter_type(aft), source(s), delta(d),
+ first_path(false) { }
+};
+
+
+
+// 1
+typedef args::Package<const FstClass &, vector<WeightClass> *,
+ const ShortestDistanceOptions &> ShortestDistanceArgs1;
+
+template<class Queue, class Arc, class ArcFilter>
+struct QueueConstructor {
+ // template<class Arc, class ArcFilter>
+ static Queue *Construct(const Fst<Arc> &,
+ const vector<typename Arc::Weight> *) {
+ return new Queue();
+ }
+};
+
+// Specializations to deal with AutoQueue, NaturalShortestFirstQueue,
+// and TopOrderQueue's different constructors
+template<class Arc, class ArcFilter>
+struct QueueConstructor<AutoQueue<typename Arc::StateId>, Arc, ArcFilter> {
+ // template<class Arc, class ArcFilter>
+ static AutoQueue<typename Arc::StateId> *Construct(
+ const Fst<Arc> &fst,
+ const vector<typename Arc::Weight> *distance) {
+ return new AutoQueue<typename Arc::StateId>(fst, distance, ArcFilter());
+ }
+};
+
+template<class Arc, class ArcFilter>
+struct QueueConstructor<NaturalShortestFirstQueue<typename Arc::StateId,
+ typename Arc::Weight>,
+ Arc, ArcFilter> {
+ // template<class Arc, class ArcFilter>
+ static NaturalShortestFirstQueue<typename Arc::StateId, typename Arc::Weight>
+ *Construct(const Fst<Arc> &fst,
+ const vector<typename Arc::Weight> *distance) {
+ return new NaturalShortestFirstQueue<typename Arc::StateId,
+ typename Arc::Weight>(*distance);
+ }
+};
+
+template<class Arc, class ArcFilter>
+struct QueueConstructor<TopOrderQueue<typename Arc::StateId>, Arc, ArcFilter> {
+ // template<class Arc, class ArcFilter>
+ static TopOrderQueue<typename Arc::StateId> *Construct(
+ const Fst<Arc> &fst, const vector<typename Arc::Weight> *weights) {
+ return new TopOrderQueue<typename Arc::StateId>(fst, ArcFilter());
+ }
+};
+
+
+template<class Arc, class Queue>
+void ShortestDistanceHelper(ShortestDistanceArgs1 *args) {
+ const Fst<Arc> &fst = *(args->arg1.GetFst<Arc>());
+ const ShortestDistanceOptions &opts = args->arg3;
+
+ vector<typename Arc::Weight> weights;
+
+ switch (opts.arc_filter_type) {
+ case ANY_ARC_FILTER: {
+ Queue *queue =
+ QueueConstructor<Queue, Arc, AnyArcFilter<Arc> >::Construct(
+ fst, &weights);
+ fst::ShortestDistanceOptions<Arc, Queue, AnyArcFilter<Arc> > sdopts(
+ queue, AnyArcFilter<Arc>(), opts.source, opts.delta);
+ ShortestDistance(fst, &weights, sdopts);
+ delete queue;
+ break;
+ }
+ case EPSILON_ARC_FILTER: {
+ Queue *queue =
+ QueueConstructor<Queue, Arc, AnyArcFilter<Arc> >::Construct(
+ fst, &weights);
+ fst::ShortestDistanceOptions<Arc, Queue,
+ EpsilonArcFilter<Arc> > sdopts(
+ queue, EpsilonArcFilter<Arc>(), opts.source, opts.delta);
+ ShortestDistance(fst, &weights, sdopts);
+ delete queue;
+ break;
+ }
+ case INPUT_EPSILON_ARC_FILTER: {
+ Queue *queue =
+ QueueConstructor<Queue, Arc, InputEpsilonArcFilter<Arc> >::Construct(
+ fst, &weights);
+ fst::ShortestDistanceOptions<Arc, Queue,
+ InputEpsilonArcFilter<Arc> > sdopts(
+ queue, InputEpsilonArcFilter<Arc>(), opts.source, opts.delta);
+ ShortestDistance(fst, &weights, sdopts);
+ delete queue;
+ break;
+ }
+ case OUTPUT_EPSILON_ARC_FILTER: {
+ Queue *queue =
+ QueueConstructor<Queue, Arc,
+ OutputEpsilonArcFilter<Arc> >::Construct(
+ fst, &weights);
+ fst::ShortestDistanceOptions<Arc, Queue,
+ OutputEpsilonArcFilter<Arc> > sdopts(
+ queue, OutputEpsilonArcFilter<Arc>(), opts.source, opts.delta);
+ ShortestDistance(fst, &weights, sdopts);
+ delete queue;
+ break;
+ }
+ }
+
+ // Copy the weights back
+ args->arg2->resize(weights.size());
+ for (unsigned i = 0; i < weights.size(); ++i) {
+ (*args->arg2)[i] = WeightClass(weights[i]);
+ }
+}
+
+template<class Arc>
+void ShortestDistance(ShortestDistanceArgs1 *args) {
+ const ShortestDistanceOptions &opts = args->arg3;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ // Must consider (opts.queue_type x opts.filter_type) options
+ switch (opts.queue_type) {
+ default:
+ FSTERROR() << "Unknown queue type." << opts.queue_type;
+
+ case AUTO_QUEUE:
+ ShortestDistanceHelper<Arc, AutoQueue<StateId> >(args);
+ return;
+
+ case FIFO_QUEUE:
+ ShortestDistanceHelper<Arc, FifoQueue<StateId> >(args);
+ return;
+
+ case LIFO_QUEUE:
+ ShortestDistanceHelper<Arc, LifoQueue<StateId> >(args);
+ return;
+
+ case SHORTEST_FIRST_QUEUE:
+ ShortestDistanceHelper<Arc,
+ NaturalShortestFirstQueue<StateId, Weight> >(args);
+ return;
+
+ case STATE_ORDER_QUEUE:
+ ShortestDistanceHelper<Arc, StateOrderQueue<StateId> >(args);
+ return;
+
+ case TOP_ORDER_QUEUE:
+ ShortestDistanceHelper<Arc, TopOrderQueue<StateId> >(args);
+ return;
+ }
+}
+
+// 2
+typedef args::Package<const FstClass&, vector<WeightClass>*,
+ bool, double> ShortestDistanceArgs2;
+
+template<class Arc>
+void ShortestDistance(ShortestDistanceArgs2 *args) {
+ const Fst<Arc> &fst = *(args->arg1.GetFst<Arc>());
+ vector<typename Arc::Weight> distance;
+
+ ShortestDistance(fst, &distance, args->arg3, args->arg4);
+
+ // convert the typed weights back into weightclass
+ vector<WeightClass> *retval = args->arg2;
+ retval->resize(distance.size());
+
+ for (unsigned i = 0; i < distance.size(); ++i) {
+ (*retval)[i] = WeightClass(distance[i]);
+ }
+}
+
+// 3
+typedef args::WithReturnValue<WeightClass,
+ const FstClass &> ShortestDistanceArgs3;
+
+template<class Arc>
+void ShortestDistance(ShortestDistanceArgs3 *args) {
+ const Fst<Arc> &fst = *(args->args.GetFst<Arc>());
+
+ args->retval = WeightClass(ShortestDistance(fst));
+}
+
+
+// 1
+void ShortestDistance(const FstClass &fst, vector<WeightClass> *distance,
+ const ShortestDistanceOptions &opts);
+
+// 2
+void ShortestDistance(const FstClass &ifst, vector<WeightClass> *distance,
+ bool reverse = false, double delta = fst::kDelta);
+
+#ifndef SWIG
+// 3
+WeightClass ShortestDistance(const FstClass &ifst);
+#endif
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_SHORTEST_DISTANCE_H_
diff --git a/src/include/fst/script/shortest-path.h b/src/include/fst/script/shortest-path.h
new file mode 100644
index 0000000..b3a3eb9
--- /dev/null
+++ b/src/include/fst/script/shortest-path.h
@@ -0,0 +1,190 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_SHORTEST_PATH_H_
+#define FST_SCRIPT_SHORTEST_PATH_H_
+
+#include <vector>
+using std::vector;
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/script/weight-class.h>
+#include <fst/shortest-path.h>
+#include <fst/script/shortest-distance.h> // for ShortestDistanceOptions
+
+namespace fst {
+namespace script {
+
+struct ShortestPathOptions
+ : public fst::script::ShortestDistanceOptions {
+ const size_t nshortest;
+ const bool unique;
+ const bool has_distance;
+ const bool first_path;
+ const WeightClass weight_threshold;
+ const int64 state_threshold;
+
+ ShortestPathOptions(QueueType qt, size_t n = 1,
+ bool u = false, bool hasdist = false,
+ float d = fst::kDelta, bool fp = false,
+ WeightClass w = fst::script::WeightClass::Zero(),
+ int64 s = fst::kNoStateId)
+ : ShortestDistanceOptions(qt, ANY_ARC_FILTER, kNoStateId, d),
+ nshortest(n), unique(u), has_distance(hasdist), first_path(fp),
+ weight_threshold(w), state_threshold(s) { }
+};
+
+typedef args::Package<const FstClass &, MutableFstClass *,
+ vector<WeightClass> *, const ShortestPathOptions &>
+ ShortestPathArgs1;
+
+
+template<class Arc>
+void ShortestPath(ShortestPathArgs1 *args) {
+ const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+ const ShortestPathOptions &opts = args->arg4;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef AnyArcFilter<Arc> ArcFilter;
+
+ vector<typename Arc::Weight> weights;
+ typename Arc::Weight weight_threshold =
+ *(opts.weight_threshold.GetWeight<Weight>());
+
+ switch (opts.queue_type) {
+ case AUTO_QUEUE: {
+ typedef AutoQueue<StateId> Queue;
+ Queue *queue = QueueConstructor<Queue, Arc,
+ ArcFilter>::Construct(ifst, &weights);
+ fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts(
+ queue, ArcFilter(), opts.nshortest, opts.unique,
+ opts.has_distance, opts.delta, opts.first_path,
+ weight_threshold, opts.state_threshold);
+ ShortestPath(ifst, ofst, &weights, spopts);
+ delete queue;
+ return;
+ }
+ case FIFO_QUEUE: {
+ typedef FifoQueue<StateId> Queue;
+ Queue *queue = QueueConstructor<Queue, Arc,
+ ArcFilter>::Construct(ifst, &weights);
+ fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts(
+ queue, ArcFilter(), opts.nshortest, opts.unique,
+ opts.has_distance, opts.delta, opts.first_path,
+ weight_threshold, opts.state_threshold);
+ ShortestPath(ifst, ofst, &weights, spopts);
+ delete queue;
+ return;
+ }
+ case LIFO_QUEUE: {
+ typedef LifoQueue<StateId> Queue;
+ Queue *queue = QueueConstructor<Queue, Arc,
+ ArcFilter >::Construct(ifst, &weights);
+ fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts(
+ queue, ArcFilter(), opts.nshortest, opts.unique,
+ opts.has_distance, opts.delta, opts.first_path,
+ weight_threshold, opts.state_threshold);
+ ShortestPath(ifst, ofst, &weights, spopts);
+ delete queue;
+ return;
+ }
+ case SHORTEST_FIRST_QUEUE: {
+ typedef NaturalShortestFirstQueue<StateId, Weight> Queue;
+ Queue *queue = QueueConstructor<Queue, Arc,
+ ArcFilter>::Construct(ifst, &weights);
+ fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts(
+ queue, ArcFilter(), opts.nshortest, opts.unique,
+ opts.has_distance, opts.delta, opts.first_path,
+ weight_threshold, opts.state_threshold);
+ ShortestPath(ifst, ofst, &weights, spopts);
+ delete queue;
+ return;
+ }
+ case STATE_ORDER_QUEUE: {
+ typedef StateOrderQueue<StateId> Queue;
+ Queue *queue = QueueConstructor<Queue, Arc,
+ ArcFilter>::Construct(ifst, &weights);
+ fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts(
+ queue, ArcFilter(), opts.nshortest, opts.unique,
+ opts.has_distance, opts.delta, opts.first_path,
+ weight_threshold, opts.state_threshold);
+ ShortestPath(ifst, ofst, &weights, spopts);
+ delete queue;
+ return;
+ }
+ case TOP_ORDER_QUEUE: {
+ typedef TopOrderQueue<StateId> Queue;
+ Queue *queue = QueueConstructor<Queue, Arc,
+ ArcFilter>::Construct(ifst, &weights);
+ fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts(
+ queue, ArcFilter(), opts.nshortest, opts.unique,
+ opts.has_distance, opts.delta, opts.first_path,
+ weight_threshold, opts.state_threshold);
+ ShortestPath(ifst, ofst, &weights, spopts);
+ delete queue;
+ return;
+ }
+ default:
+ FSTERROR() << "Unknown queue type: " << opts.queue_type;
+ ofst->SetProperties(kError, kError);
+ }
+
+ // Copy the weights back
+ args->arg3->resize(weights.size());
+ for (unsigned i = 0; i < weights.size(); ++i) {
+ (*args->arg3)[i] = WeightClass(weights[i]);
+ }
+}
+
+// 2
+typedef args::Package<const FstClass &, MutableFstClass *,
+ size_t, bool, bool, WeightClass,
+ int64> ShortestPathArgs2;
+
+template<class Arc>
+void ShortestPath(ShortestPathArgs2 *args) {
+ const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+ typename Arc::Weight weight_threshold =
+ *(args->arg6.GetWeight<typename Arc::Weight>());
+
+ ShortestPath(ifst, ofst, args->arg3, args->arg4, args->arg5,
+ weight_threshold, args->arg7);
+}
+
+
+// 1
+void ShortestPath(const FstClass &ifst, MutableFstClass *ofst,
+ vector<WeightClass> *distance,
+ const ShortestPathOptions &opts);
+
+
+// 2
+void ShortestPath(const FstClass &ifst, MutableFstClass *ofst,
+ size_t n = 1, bool unique = false,
+ bool first_path = false,
+ WeightClass weight_threshold =
+ fst::script::WeightClass::Zero(),
+ int64 state_threshold = fst::kNoStateId);
+
+} // namespace script
+} // namespace fst
+
+
+
+#endif // FST_SCRIPT_SHORTEST_PATH_H_
diff --git a/src/include/fst/script/symbols.h b/src/include/fst/script/symbols.h
new file mode 100644
index 0000000..927600a
--- /dev/null
+++ b/src/include/fst/script/symbols.h
@@ -0,0 +1,20 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_SYMBOLS_H_
+#define FST_SCRIPT_SYMBOLS_H_
+
+#endif // FST_SCRIPT_SYMBOLS_H_
diff --git a/src/include/fst/script/synchronize.h b/src/include/fst/script/synchronize.h
new file mode 100644
index 0000000..3c0c905
--- /dev/null
+++ b/src/include/fst/script/synchronize.h
@@ -0,0 +1,42 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_SYNCHRONIZE_H_
+#define FST_SCRIPT_SYNCHRONIZE_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/synchronize.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<const FstClass &, MutableFstClass *> SynchronizeArgs;
+
+template<class Arc>
+void Synchronize(SynchronizeArgs *args) {
+ const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>());
+ MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>();
+
+ Synchronize(ifst, ofst);
+}
+
+void Synchronize(const FstClass &ifst, MutableFstClass *ofst);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_SYNCHRONIZE_H_
diff --git a/src/include/fst/script/text-io.h b/src/include/fst/script/text-io.h
new file mode 100644
index 0000000..95cc182
--- /dev/null
+++ b/src/include/fst/script/text-io.h
@@ -0,0 +1,50 @@
+// text-io.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+// Modified: jpr@google.com (Jake Ratkiewicz) to work with generic WeightClass
+//
+// \file
+// Utilities for reading and writing textual strings representing
+// states, labels, and weights and files specifying label-label pairs
+// and potentials (state-weight pairs).
+//
+
+#ifndef FST_SCRIPT_TEXT_IO_H__
+#define FST_SCRIPT_TEXT_IO_H__
+
+#include <string>
+#include <vector>
+using std::vector;
+
+
+#include <iostream>
+#include <fstream>
+#include <fst/script/weight-class.h>
+
+namespace fst {
+namespace script {
+
+bool ReadPotentials(const string &weight_type,
+ const string& filename,
+ vector<WeightClass>* potential);
+
+bool WritePotentials(const string& filename,
+ const vector<WeightClass>& potential);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_TEXT_IO_H__
diff --git a/src/include/fst/script/topsort.h b/src/include/fst/script/topsort.h
new file mode 100644
index 0000000..4e27e48
--- /dev/null
+++ b/src/include/fst/script/topsort.h
@@ -0,0 +1,40 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_TOPSORT_H_
+#define FST_SCRIPT_TOPSORT_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/topsort.h>
+
+namespace fst {
+namespace script {
+
+typedef args::WithReturnValue<bool, MutableFstClass*> TopSortArgs;
+
+template<class Arc>
+void TopSort(TopSortArgs *args) {
+ MutableFst<Arc> *fst = args->args->GetMutableFst<Arc>();
+ args->retval = TopSort(fst);
+}
+
+bool TopSort(MutableFstClass *fst);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_TOPSORT_H_
diff --git a/src/include/fst/script/union.h b/src/include/fst/script/union.h
new file mode 100644
index 0000000..780e484
--- /dev/null
+++ b/src/include/fst/script/union.h
@@ -0,0 +1,42 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#ifndef FST_SCRIPT_UNION_H_
+#define FST_SCRIPT_UNION_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/union.h>
+
+namespace fst {
+namespace script {
+
+typedef args::Package<MutableFstClass *, const FstClass &> UnionArgs;
+
+template<class Arc>
+void Union(UnionArgs *args) {
+ MutableFst<Arc> *fst1 = args->arg1->GetMutableFst<Arc>();
+ const Fst<Arc> &fst2 = *(args->arg2.GetFst<Arc>());
+
+ Union(fst1, fst2);
+}
+
+void Union(MutableFstClass *fst1, const FstClass &fst2);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_UNION_H_
diff --git a/src/include/fst/script/verify.h b/src/include/fst/script/verify.h
new file mode 100644
index 0000000..6904003
--- /dev/null
+++ b/src/include/fst/script/verify.h
@@ -0,0 +1,40 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: sorenj@google.com (Jeffrey Sorensen)
+
+#ifndef FST_SCRIPT_VERIFY_H_
+#define FST_SCRIPT_VERIFY_H_
+
+#include <fst/script/arg-packs.h>
+#include <fst/script/fst-class.h>
+#include <fst/verify.h>
+
+namespace fst {
+namespace script {
+
+typedef args::WithReturnValue<bool, const FstClass *> VerifyArgs;
+
+template<class Arc>
+void Verify(VerifyArgs *args) {
+ const Fst<Arc> *fst = args->args->GetFst<Arc>();
+ args->retval = Verify(*fst);
+}
+
+bool Verify(const FstClass &fst1);
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_VERIFY_H_
diff --git a/src/include/fst/script/weight-class.h b/src/include/fst/script/weight-class.h
new file mode 100644
index 0000000..5a4890f
--- /dev/null
+++ b/src/include/fst/script/weight-class.h
@@ -0,0 +1,216 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+// Represents a generic weight in an FST -- that is, represents a specific
+// type of weight underneath while hiding that type from a client.
+
+
+#ifndef FST_SCRIPT_WEIGHT_CLASS_H_
+#define FST_SCRIPT_WEIGHT_CLASS_H_
+
+#include <string>
+
+#include <fst/generic-register.h>
+#include <fst/util.h>
+
+namespace fst {
+namespace script {
+
+class WeightImplBase {
+ public:
+ virtual WeightImplBase *Copy() const = 0;
+ virtual void Print(ostream *o) const = 0;
+ virtual const string &Type() const = 0;
+ virtual string to_string() const = 0;
+ virtual bool operator == (const WeightImplBase &other) const = 0;
+ virtual ~WeightImplBase() { }
+};
+
+template<class W>
+struct WeightClassImpl : public WeightImplBase {
+ W weight;
+
+ explicit WeightClassImpl(const W& weight) : weight(weight) { }
+
+ virtual WeightClassImpl<W> *Copy() const {
+ return new WeightClassImpl<W>(weight);
+ }
+
+ virtual const string &Type() const { return W::Type(); }
+
+ virtual void Print(ostream *o) const {
+ *o << weight;
+ }
+
+ virtual string to_string() const {
+ ostringstream s;
+ s << weight;
+ return s.str();
+ }
+
+ virtual bool operator == (const WeightImplBase &other) const {
+ if (Type() != other.Type()) {
+ return false;
+ } else {
+ const WeightClassImpl<W> *typed_other =
+ static_cast<const WeightClassImpl<W> *>(&other);
+
+ return typed_other->weight == weight;
+ }
+ }
+};
+
+
+class WeightClass {
+ public:
+ WeightClass() : element_type_(ZERO), impl_(0) { }
+
+ template<class W>
+ explicit WeightClass(const W& weight)
+ : element_type_(OTHER), impl_(new WeightClassImpl<W>(weight)) { }
+
+ WeightClass(const string &weight_type, const string &weight_str);
+
+ WeightClass(const WeightClass &other) :
+ element_type_(other.element_type_),
+ impl_(other.impl_ ? other.impl_->Copy() : 0) { }
+
+ WeightClass &operator = (const WeightClass &other) {
+ if (impl_) delete impl_;
+ impl_ = other.impl_ ? other.impl_->Copy() : 0;
+ element_type_ = other.element_type_;
+ return *this;
+ }
+
+ template<class W>
+ const W* GetWeight() const;
+
+ string to_string() const {
+ switch (element_type_) {
+ case ZERO:
+ return "ZERO";
+ case ONE:
+ return "ONE";
+ default:
+ case OTHER:
+ return impl_->to_string();
+ }
+ }
+
+ bool operator == (const WeightClass &other) const {
+ return element_type_ == other.element_type_ &&
+ ((impl_ && other.impl_ && (*impl_ == *other.impl_)) ||
+ (impl_ == 0 && other.impl_ == 0));
+ }
+
+ static const WeightClass &Zero() {
+ static WeightClass w(ZERO);
+
+ return w;
+ }
+
+ static const WeightClass &One() {
+ static WeightClass w(ONE);
+
+ return w;
+ }
+
+ ~WeightClass() { if (impl_) delete impl_; }
+ private:
+ enum ElementType { ZERO, ONE, OTHER };
+ ElementType element_type_;
+
+ WeightImplBase *impl_;
+
+ explicit WeightClass(ElementType et) : element_type_(et), impl_(0) { }
+
+ friend ostream &operator << (ostream &o, const WeightClass &c);
+};
+
+template<class W>
+const W* WeightClass::GetWeight() const {
+ // We need to store zero and one as statics, because the weight type
+ // W might return them as temporaries. We're returning a pointer,
+ // and it won't do to get the address of a temporary.
+ static const W zero = W::Zero();
+ static const W one = W::One();
+
+ if (element_type_ == ZERO) {
+ return &zero;
+ } else if (element_type_ == ONE) {
+ return &one;
+ } else {
+ if (W::Type() != impl_->Type()) {
+ return NULL;
+ } else {
+ WeightClassImpl<W> *typed_impl =
+ static_cast<WeightClassImpl<W> *>(impl_);
+ return &typed_impl->weight;
+ }
+ }
+}
+
+//
+// Registration for generic weight types.
+//
+
+typedef WeightImplBase* (*StrToWeightImplBaseT)(const string &str,
+ const string &src,
+ size_t nline);
+
+template<class W>
+WeightImplBase* StrToWeightImplBase(const string &str,
+ const string &src, size_t nline) {
+ return new WeightClassImpl<W>(StrToWeight<W>(str, src, nline));
+}
+
+// The following confuses swig, and doesn't need to be wrapped anyway.
+#ifndef SWIG
+ostream& operator << (ostream &o, const WeightClass &c);
+
+class WeightClassRegister : public GenericRegister<string,
+ StrToWeightImplBaseT,
+ WeightClassRegister> {
+ protected:
+ virtual string ConvertKeyToSoFilename(const string &key) const {
+ return key + ".so";
+ }
+};
+
+typedef GenericRegisterer<WeightClassRegister> WeightClassRegisterer;
+#endif
+
+// internal version, needs to be called by wrapper in order for
+// macro args to expand
+#define REGISTER_FST_WEIGHT__(Weight, line) \
+ static WeightClassRegisterer weight_registerer ## _ ## line( \
+ Weight::Type(), \
+ StrToWeightImplBase<Weight>)
+
+// This layer is where __FILE__ and __LINE__ are expanded
+#define REGISTER_FST_WEIGHT_EXPANDER(Weight, line) \
+ REGISTER_FST_WEIGHT__(Weight, line)
+
+//
+// Macro for registering new weight types. Clients call this.
+//
+#define REGISTER_FST_WEIGHT(Weight) \
+ REGISTER_FST_WEIGHT_EXPANDER(Weight, __LINE__)
+
+} // namespace script
+} // namespace fst
+
+#endif // FST_SCRIPT_WEIGHT_CLASS_H_
diff --git a/src/include/fst/shortest-distance.h b/src/include/fst/shortest-distance.h
new file mode 100644
index 0000000..5d38409
--- /dev/null
+++ b/src/include/fst/shortest-distance.h
@@ -0,0 +1,347 @@
+// shortest-distance.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Functions and classes to find shortest distance in an FST.
+
+#ifndef FST_LIB_SHORTEST_DISTANCE_H__
+#define FST_LIB_SHORTEST_DISTANCE_H__
+
+#include <deque>
+#include <vector>
+using std::vector;
+
+#include <fst/arcfilter.h>
+#include <fst/cache.h>
+#include <fst/queue.h>
+#include <fst/reverse.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+template <class Arc, class Queue, class ArcFilter>
+struct ShortestDistanceOptions {
+ typedef typename Arc::StateId StateId;
+
+ Queue *state_queue; // Queue discipline used; owned by caller
+ ArcFilter arc_filter; // Arc filter (e.g., limit to only epsilon graph)
+ StateId source; // If kNoStateId, use the Fst's initial state
+ float delta; // Determines the degree of convergence required
+ bool first_path; // For a semiring with the path property (o.w.
+ // undefined), compute the shortest-distances along
+ // along the first path to a final state found
+ // by the algorithm. That path is the shortest-path
+ // only if the FST has a unique final state (or all
+ // the final states have the same final weight), the
+ // queue discipline is shortest-first and all the
+ // weights in the FST are between One() and Zero()
+ // according to NaturalLess.
+
+ ShortestDistanceOptions(Queue *q, ArcFilter filt, StateId src = kNoStateId,
+ float d = kDelta)
+ : state_queue(q), arc_filter(filt), source(src), delta(d),
+ first_path(false) {}
+};
+
+
+// Computation state of the shortest-distance algorithm. Reusable
+// information is maintained across calls to member function
+// ShortestDistance(source) when 'retain' is true for improved
+// efficiency when calling multiple times from different source states
+// (e.g., in epsilon removal). Contrary to usual conventions, 'fst'
+// may not be freed before this class. Vector 'distance' should not be
+// modified by the user between these calls.
+// The Error() method returns true if an error was encountered.
+template<class Arc, class Queue, class ArcFilter>
+class ShortestDistanceState {
+ public:
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ ShortestDistanceState(
+ const Fst<Arc> &fst,
+ vector<Weight> *distance,
+ const ShortestDistanceOptions<Arc, Queue, ArcFilter> &opts,
+ bool retain)
+ : fst_(fst), distance_(distance), state_queue_(opts.state_queue),
+ arc_filter_(opts.arc_filter), delta_(opts.delta),
+ first_path_(opts.first_path), retain_(retain), source_id_(0),
+ error_(false) {
+ distance_->clear();
+ }
+
+ ~ShortestDistanceState() {}
+
+ void ShortestDistance(StateId source);
+
+ bool Error() const { return error_; }
+
+ private:
+ const Fst<Arc> &fst_;
+ vector<Weight> *distance_;
+ Queue *state_queue_;
+ ArcFilter arc_filter_;
+ float delta_;
+ bool first_path_;
+ bool retain_; // Retain and reuse information across calls
+
+ vector<Weight> rdistance_; // Relaxation distance.
+ vector<bool> enqueued_; // Is state enqueued?
+ vector<StateId> sources_; // Source ID for ith state in 'distance_',
+ // 'rdistance_', and 'enqueued_' if retained.
+ StateId source_id_; // Unique ID characterizing each call to SD
+
+ bool error_;
+};
+
+// Compute the shortest distance. If 'source' is kNoStateId, use
+// the initial state of the Fst.
+template <class Arc, class Queue, class ArcFilter>
+void ShortestDistanceState<Arc, Queue, ArcFilter>::ShortestDistance(
+ StateId source) {
+ if (fst_.Start() == kNoStateId) {
+ if (fst_.Properties(kError, false)) error_ = true;
+ return;
+ }
+
+ if (!(Weight::Properties() & kRightSemiring)) {
+ FSTERROR() << "ShortestDistance: Weight needs to be right distributive: "
+ << Weight::Type();
+ error_ = true;
+ return;
+ }
+
+ if (first_path_ && !(Weight::Properties() & kPath)) {
+ FSTERROR() << "ShortestDistance: first_path option disallowed when "
+ << "Weight does not have the path property: "
+ << Weight::Type();
+ error_ = true;
+ return;
+ }
+
+ state_queue_->Clear();
+
+ if (!retain_) {
+ distance_->clear();
+ rdistance_.clear();
+ enqueued_.clear();
+ }
+
+ if (source == kNoStateId)
+ source = fst_.Start();
+
+ while (distance_->size() <= source) {
+ distance_->push_back(Weight::Zero());
+ rdistance_.push_back(Weight::Zero());
+ enqueued_.push_back(false);
+ }
+ if (retain_) {
+ while (sources_.size() <= source)
+ sources_.push_back(kNoStateId);
+ sources_[source] = source_id_;
+ }
+ (*distance_)[source] = Weight::One();
+ rdistance_[source] = Weight::One();
+ enqueued_[source] = true;
+
+ state_queue_->Enqueue(source);
+
+ while (!state_queue_->Empty()) {
+ StateId s = state_queue_->Head();
+ state_queue_->Dequeue();
+ while (distance_->size() <= s) {
+ distance_->push_back(Weight::Zero());
+ rdistance_.push_back(Weight::Zero());
+ enqueued_.push_back(false);
+ }
+ if (first_path_ && (fst_.Final(s) != Weight::Zero()))
+ break;
+ enqueued_[s] = false;
+ Weight r = rdistance_[s];
+ rdistance_[s] = Weight::Zero();
+ for (ArcIterator< Fst<Arc> > aiter(fst_, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const Arc &arc = aiter.Value();
+ if (!arc_filter_(arc) || arc.weight == Weight::Zero())
+ continue;
+ while (distance_->size() <= arc.nextstate) {
+ distance_->push_back(Weight::Zero());
+ rdistance_.push_back(Weight::Zero());
+ enqueued_.push_back(false);
+ }
+ if (retain_) {
+ while (sources_.size() <= arc.nextstate)
+ sources_.push_back(kNoStateId);
+ if (sources_[arc.nextstate] != source_id_) {
+ (*distance_)[arc.nextstate] = Weight::Zero();
+ rdistance_[arc.nextstate] = Weight::Zero();
+ enqueued_[arc.nextstate] = false;
+ sources_[arc.nextstate] = source_id_;
+ }
+ }
+ Weight &nd = (*distance_)[arc.nextstate];
+ Weight &nr = rdistance_[arc.nextstate];
+ Weight w = Times(r, arc.weight);
+ if (!ApproxEqual(nd, Plus(nd, w), delta_)) {
+ nd = Plus(nd, w);
+ nr = Plus(nr, w);
+ if (!nd.Member() || !nr.Member()) {
+ error_ = true;
+ return;
+ }
+ if (!enqueued_[arc.nextstate]) {
+ state_queue_->Enqueue(arc.nextstate);
+ enqueued_[arc.nextstate] = true;
+ } else {
+ state_queue_->Update(arc.nextstate);
+ }
+ }
+ }
+ }
+ ++source_id_;
+ if (fst_.Properties(kError, false)) error_ = true;
+}
+
+
+// Shortest-distance algorithm: this version allows fine control
+// via the options argument. See below for a simpler interface.
+//
+// This computes the shortest distance from the 'opts.source' state to
+// each visited state S and stores the value in the 'distance' vector.
+// An unvisited state S has distance Zero(), which will be stored in
+// the 'distance' vector if S is less than the maximum visited state.
+// The state queue discipline, arc filter, and convergence delta are
+// taken in the options argument.
+// The 'distance' vector will contain a unique element for which
+// Member() is false if an error was encountered.
+//
+// The weights must must be right distributive and k-closed (i.e., 1 +
+// x + x^2 + ... + x^(k +1) = 1 + x + x^2 + ... + x^k).
+//
+// The algorithm is from Mohri, "Semiring Framweork and Algorithms for
+// Shortest-Distance Problems", Journal of Automata, Languages and
+// Combinatorics 7(3):321-350, 2002. The complexity of algorithm
+// depends on the properties of the semiring and the queue discipline
+// used. Refer to the paper for more details.
+template<class Arc, class Queue, class ArcFilter>
+void ShortestDistance(
+ const Fst<Arc> &fst,
+ vector<typename Arc::Weight> *distance,
+ const ShortestDistanceOptions<Arc, Queue, ArcFilter> &opts) {
+
+ ShortestDistanceState<Arc, Queue, ArcFilter>
+ sd_state(fst, distance, opts, false);
+ sd_state.ShortestDistance(opts.source);
+ if (sd_state.Error()) {
+ distance->clear();
+ distance->resize(1, Arc::Weight::NoWeight());
+ }
+}
+
+// Shortest-distance algorithm: simplified interface. See above for a
+// version that allows finer control.
+//
+// If 'reverse' is false, this computes the shortest distance from the
+// initial state to each state S and stores the value in the
+// 'distance' vector. If 'reverse' is true, this computes the shortest
+// distance from each state to the final states. An unvisited state S
+// has distance Zero(), which will be stored in the 'distance' vector
+// if S is less than the maximum visited state. The state queue
+// discipline is automatically-selected.
+// The 'distance' vector will contain a unique element for which
+// Member() is false if an error was encountered.
+//
+// The weights must must be right (left) distributive if reverse is
+// false (true) and k-closed (i.e., 1 + x + x^2 + ... + x^(k +1) = 1 +
+// x + x^2 + ... + x^k).
+//
+// The algorithm is from Mohri, "Semiring Framweork and Algorithms for
+// Shortest-Distance Problems", Journal of Automata, Languages and
+// Combinatorics 7(3):321-350, 2002. The complexity of algorithm
+// depends on the properties of the semiring and the queue discipline
+// used. Refer to the paper for more details.
+template<class Arc>
+void ShortestDistance(const Fst<Arc> &fst,
+ vector<typename Arc::Weight> *distance,
+ bool reverse = false,
+ float delta = kDelta) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ if (!reverse) {
+ AnyArcFilter<Arc> arc_filter;
+ AutoQueue<StateId> state_queue(fst, distance, arc_filter);
+ ShortestDistanceOptions< Arc, AutoQueue<StateId>, AnyArcFilter<Arc> >
+ opts(&state_queue, arc_filter);
+ opts.delta = delta;
+ ShortestDistance(fst, distance, opts);
+ } else {
+ typedef ReverseArc<Arc> ReverseArc;
+ typedef typename ReverseArc::Weight ReverseWeight;
+ AnyArcFilter<ReverseArc> rarc_filter;
+ VectorFst<ReverseArc> rfst;
+ Reverse(fst, &rfst);
+ vector<ReverseWeight> rdistance;
+ AutoQueue<StateId> state_queue(rfst, &rdistance, rarc_filter);
+ ShortestDistanceOptions< ReverseArc, AutoQueue<StateId>,
+ AnyArcFilter<ReverseArc> >
+ ropts(&state_queue, rarc_filter);
+ ropts.delta = delta;
+ ShortestDistance(rfst, &rdistance, ropts);
+ distance->clear();
+ if (rdistance.size() == 1 && !rdistance[0].Member()) {
+ distance->resize(1, Arc::Weight::NoWeight());
+ return;
+ }
+ while (distance->size() < rdistance.size() - 1)
+ distance->push_back(rdistance[distance->size() + 1].Reverse());
+ }
+}
+
+
+// Return the sum of the weight of all successful paths in an FST, i.e.,
+// the shortest-distance from the initial state to the final states.
+// Returns a weight such that Member() is false if an error was encountered.
+template <class Arc>
+typename Arc::Weight ShortestDistance(const Fst<Arc> &fst, float delta = kDelta) {
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+ vector<Weight> distance;
+ if (Weight::Properties() & kRightSemiring) {
+ ShortestDistance(fst, &distance, false, delta);
+ if (distance.size() == 1 && !distance[0].Member())
+ return Arc::Weight::NoWeight();
+ Weight sum = Weight::Zero();
+ for (StateId s = 0; s < distance.size(); ++s)
+ sum = Plus(sum, Times(distance[s], fst.Final(s)));
+ return sum;
+ } else {
+ ShortestDistance(fst, &distance, true, delta);
+ StateId s = fst.Start();
+ if (distance.size() == 1 && !distance[0].Member())
+ return Arc::Weight::NoWeight();
+ return s != kNoStateId && s < distance.size() ?
+ distance[s] : Weight::Zero();
+ }
+}
+
+
+} // namespace fst
+
+#endif // FST_LIB_SHORTEST_DISTANCE_H__
diff --git a/src/include/fst/shortest-path.h b/src/include/fst/shortest-path.h
new file mode 100644
index 0000000..f12970c
--- /dev/null
+++ b/src/include/fst/shortest-path.h
@@ -0,0 +1,501 @@
+// shortest-path.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Functions to find shortest paths in an FST.
+
+#ifndef FST_LIB_SHORTEST_PATH_H__
+#define FST_LIB_SHORTEST_PATH_H__
+
+#include <functional>
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+#include <fst/determinize.h>
+#include <fst/queue.h>
+#include <fst/shortest-distance.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+template <class Arc, class Queue, class ArcFilter>
+struct ShortestPathOptions
+ : public ShortestDistanceOptions<Arc, Queue, ArcFilter> {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ size_t nshortest; // return n-shortest paths
+ bool unique; // only return paths with distinct input strings
+ bool has_distance; // distance vector already contains the
+ // shortest distance from the initial state
+ bool first_path; // Single shortest path stops after finding the first
+ // path to a final state. That path is the shortest path
+ // only when using the ShortestFirstQueue and
+ // only when all the weights in the FST are between
+ // One() and Zero() according to NaturalLess.
+ Weight weight_threshold; // pruning weight threshold.
+ StateId state_threshold; // pruning state threshold.
+
+ ShortestPathOptions(Queue *q, ArcFilter filt, size_t n = 1, bool u = false,
+ bool hasdist = false, float d = kDelta,
+ bool fp = false, Weight w = Weight::Zero(),
+ StateId s = kNoStateId)
+ : ShortestDistanceOptions<Arc, Queue, ArcFilter>(q, filt, kNoStateId, d),
+ nshortest(n), unique(u), has_distance(hasdist), first_path(fp),
+ weight_threshold(w), state_threshold(s) {}
+};
+
+
+// Shortest-path algorithm: normally not called directly; prefer
+// 'ShortestPath' below with n=1. 'ofst' contains the shortest path in
+// 'ifst'. 'distance' returns the shortest distances from the source
+// state to each state in 'ifst'. 'opts' is used to specify options
+// such as the queue discipline, the arc filter and delta.
+//
+// The shortest path is the lowest weight path w.r.t. the natural
+// semiring order.
+//
+// The weights need to be right distributive and have the path (kPath)
+// property.
+template<class Arc, class Queue, class ArcFilter>
+void SingleShortestPath(const Fst<Arc> &ifst,
+ MutableFst<Arc> *ofst,
+ vector<typename Arc::Weight> *distance,
+ ShortestPathOptions<Arc, Queue, ArcFilter> &opts) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ ofst->DeleteStates();
+ ofst->SetInputSymbols(ifst.InputSymbols());
+ ofst->SetOutputSymbols(ifst.OutputSymbols());
+
+ if (ifst.Start() == kNoStateId) {
+ if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError);
+ return;
+ }
+
+ vector<bool> enqueued;
+ vector<StateId> parent;
+ vector<Arc> arc_parent;
+
+ Queue *state_queue = opts.state_queue;
+ StateId source = opts.source == kNoStateId ? ifst.Start() : opts.source;
+ Weight f_distance = Weight::Zero();
+ StateId f_parent = kNoStateId;
+
+ distance->clear();
+ state_queue->Clear();
+ if (opts.nshortest != 1) {
+ FSTERROR() << "SingleShortestPath: for nshortest > 1, use ShortestPath"
+ << " instead";
+ ofst->SetProperties(kError, kError);
+ return;
+ }
+ if (opts.weight_threshold != Weight::Zero() ||
+ opts.state_threshold != kNoStateId) {
+ FSTERROR() <<
+ "SingleShortestPath: weight and state thresholds not applicable";
+ ofst->SetProperties(kError, kError);
+ return;
+ }
+ if ((Weight::Properties() & (kPath | kRightSemiring))
+ != (kPath | kRightSemiring)) {
+ FSTERROR() << "SingleShortestPath: Weight needs to have the path"
+ << " property and be right distributive: " << Weight::Type();
+ ofst->SetProperties(kError, kError);
+ return;
+ }
+ while (distance->size() < source) {
+ distance->push_back(Weight::Zero());
+ enqueued.push_back(false);
+ parent.push_back(kNoStateId);
+ arc_parent.push_back(Arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId));
+ }
+ distance->push_back(Weight::One());
+ parent.push_back(kNoStateId);
+ arc_parent.push_back(Arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId));
+ state_queue->Enqueue(source);
+ enqueued.push_back(true);
+
+ while (!state_queue->Empty()) {
+ StateId s = state_queue->Head();
+ state_queue->Dequeue();
+ enqueued[s] = false;
+ Weight sd = (*distance)[s];
+ if (ifst.Final(s) != Weight::Zero()) {
+ Weight w = Times(sd, ifst.Final(s));
+ if (f_distance != Plus(f_distance, w)) {
+ f_distance = Plus(f_distance, w);
+ f_parent = s;
+ }
+ if (!f_distance.Member()) {
+ ofst->SetProperties(kError, kError);
+ return;
+ }
+ if (opts.first_path)
+ break;
+ }
+ for (ArcIterator< Fst<Arc> > aiter(ifst, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const Arc &arc = aiter.Value();
+ while (distance->size() <= arc.nextstate) {
+ distance->push_back(Weight::Zero());
+ enqueued.push_back(false);
+ parent.push_back(kNoStateId);
+ arc_parent.push_back(Arc(kNoLabel, kNoLabel, Weight::Zero(),
+ kNoStateId));
+ }
+ Weight &nd = (*distance)[arc.nextstate];
+ Weight w = Times(sd, arc.weight);
+ if (nd != Plus(nd, w)) {
+ nd = Plus(nd, w);
+ if (!nd.Member()) {
+ ofst->SetProperties(kError, kError);
+ return;
+ }
+ parent[arc.nextstate] = s;
+ arc_parent[arc.nextstate] = arc;
+ if (!enqueued[arc.nextstate]) {
+ state_queue->Enqueue(arc.nextstate);
+ enqueued[arc.nextstate] = true;
+ } else {
+ state_queue->Update(arc.nextstate);
+ }
+ }
+ }
+ }
+
+ StateId s_p = kNoStateId, d_p = kNoStateId;
+ for (StateId s = f_parent, d = kNoStateId;
+ s != kNoStateId;
+ d = s, s = parent[s]) {
+ d_p = s_p;
+ s_p = ofst->AddState();
+ if (d == kNoStateId) {
+ ofst->SetFinal(s_p, ifst.Final(f_parent));
+ } else {
+ arc_parent[d].nextstate = d_p;
+ ofst->AddArc(s_p, arc_parent[d]);
+ }
+ }
+ ofst->SetStart(s_p);
+ if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError);
+ ofst->SetProperties(
+ ShortestPathProperties(ofst->Properties(kFstProperties, false)),
+ kFstProperties);
+}
+
+
+template <class S, class W>
+class ShortestPathCompare {
+ public:
+ typedef S StateId;
+ typedef W Weight;
+ typedef pair<StateId, Weight> Pair;
+
+ ShortestPathCompare(const vector<Pair>& pairs,
+ const vector<Weight>& distance,
+ StateId sfinal, float d)
+ : pairs_(pairs), distance_(distance), superfinal_(sfinal), delta_(d) {}
+
+ bool operator()(const StateId x, const StateId y) const {
+ const Pair &px = pairs_[x];
+ const Pair &py = pairs_[y];
+ Weight dx = px.first == superfinal_ ? Weight::One() :
+ px.first < distance_.size() ? distance_[px.first] : Weight::Zero();
+ Weight dy = py.first == superfinal_ ? Weight::One() :
+ py.first < distance_.size() ? distance_[py.first] : Weight::Zero();
+ Weight wx = Times(dx, px.second);
+ Weight wy = Times(dy, py.second);
+ // Penalize complete paths to ensure correct results with inexact weights.
+ // This forms a strict weak order so long as ApproxEqual(a, b) =>
+ // ApproxEqual(a, c) for all c s.t. less_(a, c) && less_(c, b).
+ if (px.first == superfinal_ && py.first != superfinal_) {
+ return less_(wy, wx) || ApproxEqual(wx, wy, delta_);
+ } else if (py.first == superfinal_ && px.first != superfinal_) {
+ return less_(wy, wx) && !ApproxEqual(wx, wy, delta_);
+ } else {
+ return less_(wy, wx);
+ }
+ }
+
+ private:
+ const vector<Pair> &pairs_;
+ const vector<Weight> &distance_;
+ StateId superfinal_;
+ float delta_;
+ NaturalLess<Weight> less_;
+};
+
+
+// N-Shortest-path algorithm: implements the core n-shortest path
+// algorithm. The output is built REVERSED. See below for versions with
+// more options and not reversed.
+//
+// 'ofst' contains the REVERSE of 'n'-shortest paths in 'ifst'.
+// 'distance' must contain the shortest distance from each state to a final
+// state in 'ifst'. 'delta' is the convergence delta.
+//
+// The n-shortest paths are the n-lowest weight paths w.r.t. the
+// natural semiring order. The single path that can be read from the
+// ith of at most n transitions leaving the initial state of 'ofst' is
+// the ith shortest path. Disregarding the initial state and initial
+// transitions, the n-shortest paths, in fact, form a tree rooted at
+// the single final state.
+//
+// The weights need to be left and right distributive (kSemiring) and
+// have the path (kPath) property.
+//
+// The algorithm is from Mohri and Riley, "An Efficient Algorithm for
+// the n-best-strings problem", ICSLP 2002. The algorithm relies on
+// the shortest-distance algorithm. There are some issues with the
+// pseudo-code as written in the paper (viz., line 11).
+//
+// IMPLEMENTATION NOTE: The input fst 'ifst' can be a delayed fst and
+// and at any state in its expansion the values of distance vector need only
+// be defined at that time for the states that are known to exist.
+template<class Arc, class RevArc>
+void NShortestPath(const Fst<RevArc> &ifst,
+ MutableFst<Arc> *ofst,
+ const vector<typename Arc::Weight> &distance,
+ size_t n,
+ float delta = kDelta,
+ typename Arc::Weight weight_threshold = Arc::Weight::Zero(),
+ typename Arc::StateId state_threshold = kNoStateId) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef pair<StateId, Weight> Pair;
+ typedef typename RevArc::Weight RevWeight;
+
+ if (n <= 0) return;
+ if ((Weight::Properties() & (kPath | kSemiring)) != (kPath | kSemiring)) {
+ FSTERROR() << "NShortestPath: Weight needs to have the "
+ << "path property and be distributive: "
+ << Weight::Type();
+ ofst->SetProperties(kError, kError);
+ return;
+ }
+ ofst->DeleteStates();
+ ofst->SetInputSymbols(ifst.InputSymbols());
+ ofst->SetOutputSymbols(ifst.OutputSymbols());
+ // Each state in 'ofst' corresponds to a path with weight w from the
+ // initial state of 'ifst' to a state s in 'ifst', that can be
+ // characterized by a pair (s,w). The vector 'pairs' maps each
+ // state in 'ofst' to the corresponding pair maps states in OFST to
+ // the corresponding pair (s,w).
+ vector<Pair> pairs;
+ // The supefinal state is denoted by -1, 'compare' knows that the
+ // distance from 'superfinal' to the final state is 'Weight::One()',
+ // hence 'distance[superfinal]' is not needed.
+ StateId superfinal = -1;
+ ShortestPathCompare<StateId, Weight>
+ compare(pairs, distance, superfinal, delta);
+ vector<StateId> heap;
+ // 'r[s + 1]', 's' state in 'fst', is the number of states in 'ofst'
+ // which corresponding pair contains 's' ,i.e. , it is number of
+ // paths computed so far to 's'. Valid for 's == -1' (superfinal).
+ vector<int> r;
+ NaturalLess<Weight> less;
+ if (ifst.Start() == kNoStateId ||
+ distance.size() <= ifst.Start() ||
+ distance[ifst.Start()] == Weight::Zero() ||
+ less(weight_threshold, Weight::One()) ||
+ state_threshold == 0) {
+ if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError);
+ return;
+ }
+ ofst->SetStart(ofst->AddState());
+ StateId final = ofst->AddState();
+ ofst->SetFinal(final, Weight::One());
+ while (pairs.size() <= final)
+ pairs.push_back(Pair(kNoStateId, Weight::Zero()));
+ pairs[final] = Pair(ifst.Start(), Weight::One());
+ heap.push_back(final);
+ Weight limit = Times(distance[ifst.Start()], weight_threshold);
+
+ while (!heap.empty()) {
+ pop_heap(heap.begin(), heap.end(), compare);
+ StateId state = heap.back();
+ Pair p = pairs[state];
+ heap.pop_back();
+ Weight d = p.first == superfinal ? Weight::One() :
+ p.first < distance.size() ? distance[p.first] : Weight::Zero();
+
+ if (less(limit, Times(d, p.second)) ||
+ (state_threshold != kNoStateId &&
+ ofst->NumStates() >= state_threshold))
+ continue;
+
+ while (r.size() <= p.first + 1) r.push_back(0);
+ ++r[p.first + 1];
+ if (p.first == superfinal)
+ ofst->AddArc(ofst->Start(), Arc(0, 0, Weight::One(), state));
+ if ((p.first == superfinal) && (r[p.first + 1] == n)) break;
+ if (r[p.first + 1] > n) continue;
+ if (p.first == superfinal) continue;
+
+ for (ArcIterator< Fst<RevArc> > aiter(ifst, p.first);
+ !aiter.Done();
+ aiter.Next()) {
+ const RevArc &rarc = aiter.Value();
+ Arc arc(rarc.ilabel, rarc.olabel, rarc.weight.Reverse(), rarc.nextstate);
+ Weight w = Times(p.second, arc.weight);
+ StateId next = ofst->AddState();
+ pairs.push_back(Pair(arc.nextstate, w));
+ arc.nextstate = state;
+ ofst->AddArc(next, arc);
+ heap.push_back(next);
+ push_heap(heap.begin(), heap.end(), compare);
+ }
+
+ Weight finalw = ifst.Final(p.first).Reverse();
+ if (finalw != Weight::Zero()) {
+ Weight w = Times(p.second, finalw);
+ StateId next = ofst->AddState();
+ pairs.push_back(Pair(superfinal, w));
+ ofst->AddArc(next, Arc(0, 0, finalw, state));
+ heap.push_back(next);
+ push_heap(heap.begin(), heap.end(), compare);
+ }
+ }
+ Connect(ofst);
+ if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError);
+ ofst->SetProperties(
+ ShortestPathProperties(ofst->Properties(kFstProperties, false)),
+ kFstProperties);
+}
+
+
+// N-Shortest-path algorithm: this version allow fine control
+// via the options argument. See below for a simpler interface.
+//
+// 'ofst' contains the n-shortest paths in 'ifst'. 'distance' returns
+// the shortest distances from the source state to each state in
+// 'ifst'. 'opts' is used to specify options such as the number of
+// paths to return, whether they need to have distinct input
+// strings, the queue discipline, the arc filter and the convergence
+// delta.
+//
+// The n-shortest paths are the n-lowest weight paths w.r.t. the
+// natural semiring order. The single path that can be read from the
+// ith of at most n transitions leaving the initial state of 'ofst' is
+// the ith shortest path. Disregarding the initial state and initial
+// transitions, The n-shortest paths, in fact, form a tree rooted at
+// the single final state.
+
+// The weights need to be right distributive and have the path (kPath)
+// property. They need to be left distributive as well for nshortest
+// > 1.
+//
+// The algorithm is from Mohri and Riley, "An Efficient Algorithm for
+// the n-best-strings problem", ICSLP 2002. The algorithm relies on
+// the shortest-distance algorithm. There are some issues with the
+// pseudo-code as written in the paper (viz., line 11).
+template<class Arc, class Queue, class ArcFilter>
+void ShortestPath(const Fst<Arc> &ifst, MutableFst<Arc> *ofst,
+ vector<typename Arc::Weight> *distance,
+ ShortestPathOptions<Arc, Queue, ArcFilter> &opts) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef ReverseArc<Arc> ReverseArc;
+
+ size_t n = opts.nshortest;
+ if (n == 1) {
+ SingleShortestPath(ifst, ofst, distance, opts);
+ return;
+ }
+ if (n <= 0) return;
+ if ((Weight::Properties() & (kPath | kSemiring)) != (kPath | kSemiring)) {
+ FSTERROR() << "ShortestPath: n-shortest: Weight needs to have the "
+ << "path property and be distributive: "
+ << Weight::Type();
+ ofst->SetProperties(kError, kError);
+ return;
+ }
+ if (!opts.has_distance) {
+ ShortestDistance(ifst, distance, opts);
+ if (distance->size() == 1 && !(*distance)[0].Member()) {
+ ofst->SetProperties(kError, kError);
+ return;
+ }
+ }
+ // Algorithm works on the reverse of 'fst' : 'rfst', 'distance' is
+ // the distance to the final state in 'rfst', 'ofst' is built as the
+ // reverse of the tree of n-shortest path in 'rfst'.
+ VectorFst<ReverseArc> rfst;
+ Reverse(ifst, &rfst);
+ Weight d = Weight::Zero();
+ for (ArcIterator< VectorFst<ReverseArc> > aiter(rfst, 0);
+ !aiter.Done(); aiter.Next()) {
+ const ReverseArc &arc = aiter.Value();
+ StateId s = arc.nextstate - 1;
+ if (s < distance->size())
+ d = Plus(d, Times(arc.weight.Reverse(), (*distance)[s]));
+ }
+ distance->insert(distance->begin(), d);
+
+ if (!opts.unique) {
+ NShortestPath(rfst, ofst, *distance, n, opts.delta,
+ opts.weight_threshold, opts.state_threshold);
+ } else {
+ vector<Weight> ddistance;
+ DeterminizeFstOptions<ReverseArc> dopts(opts.delta);
+ DeterminizeFst<ReverseArc> dfst(rfst, *distance, &ddistance, dopts);
+ NShortestPath(dfst, ofst, ddistance, n, opts.delta,
+ opts.weight_threshold, opts.state_threshold);
+ }
+ distance->erase(distance->begin());
+}
+
+
+// Shortest-path algorithm: simplified interface. See above for a
+// version that allows finer control.
+//
+// 'ofst' contains the 'n'-shortest paths in 'ifst'. The queue
+// discipline is automatically selected. When 'unique' == true, only
+// paths with distinct input labels are returned.
+//
+// The n-shortest paths are the n-lowest weight paths w.r.t. the
+// natural semiring order. The single path that can be read from the
+// ith of at most n transitions leaving the initial state of 'ofst' is
+// the ith best path.
+//
+// The weights need to be right distributive and have the path
+// (kPath) property.
+template<class Arc>
+void ShortestPath(const Fst<Arc> &ifst, MutableFst<Arc> *ofst,
+ size_t n = 1, bool unique = false,
+ bool first_path = false,
+ typename Arc::Weight weight_threshold = Arc::Weight::Zero(),
+ typename Arc::StateId state_threshold = kNoStateId) {
+ vector<typename Arc::Weight> distance;
+ AnyArcFilter<Arc> arc_filter;
+ AutoQueue<typename Arc::StateId> state_queue(ifst, &distance, arc_filter);
+ ShortestPathOptions< Arc, AutoQueue<typename Arc::StateId>,
+ AnyArcFilter<Arc> > opts(&state_queue, arc_filter, n, unique, false,
+ kDelta, first_path, weight_threshold,
+ state_threshold);
+ ShortestPath(ifst, ofst, &distance, opts);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_SHORTEST_PATH_H__
diff --git a/src/include/fst/signed-log-weight.h b/src/include/fst/signed-log-weight.h
new file mode 100644
index 0000000..da96479
--- /dev/null
+++ b/src/include/fst/signed-log-weight.h
@@ -0,0 +1,367 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: krr@google.com (Kasturi Rangan Raghavan)
+// \file
+// LogWeight along with sign information that represents the value X in the
+// linear domain as <sign(X), -ln(|X|)>
+// The sign is a TropicalWeight:
+// positive, TropicalWeight.Value() > 0.0, recommended value 1.0
+// negative, TropicalWeight.Value() <= 0.0, recommended value -1.0
+
+#ifndef FST_LIB_SIGNED_LOG_WEIGHT_H_
+#define FST_LIB_SIGNED_LOG_WEIGHT_H_
+
+#include <fst/float-weight.h>
+#include <fst/pair-weight.h>
+
+
+namespace fst {
+template <class T>
+class SignedLogWeightTpl
+ : public PairWeight<TropicalWeight, LogWeightTpl<T> > {
+ public:
+ typedef TropicalWeight X1;
+ typedef LogWeightTpl<T> X2;
+ using PairWeight<X1, X2>::Value1;
+ using PairWeight<X1, X2>::Value2;
+
+ using PairWeight<X1, X2>::Reverse;
+ using PairWeight<X1, X2>::Quantize;
+ using PairWeight<X1, X2>::Member;
+
+ typedef SignedLogWeightTpl<T> ReverseWeight;
+
+ SignedLogWeightTpl() : PairWeight<X1, X2>() {}
+
+ SignedLogWeightTpl(const SignedLogWeightTpl<T>& w)
+ : PairWeight<X1, X2> (w) { }
+
+ SignedLogWeightTpl(const PairWeight<X1, X2>& w)
+ : PairWeight<X1, X2> (w) { }
+
+ SignedLogWeightTpl(const X1& x1, const X2& x2)
+ : PairWeight<X1, X2>(x1, x2) { }
+
+ static const SignedLogWeightTpl<T> &Zero() {
+ static const SignedLogWeightTpl<T> zero(X1(1.0), X2::Zero());
+ return zero;
+ }
+
+ static const SignedLogWeightTpl<T> &One() {
+ static const SignedLogWeightTpl<T> one(X1(1.0), X2::One());
+ return one;
+ }
+
+ static const SignedLogWeightTpl<T> &NoWeight() {
+ static const SignedLogWeightTpl<T> no_weight(X1(1.0), X2::NoWeight());
+ return no_weight;
+ }
+
+ static const string &Type() {
+ static const string type = "signed_log_" + X1::Type() + "_" + X2::Type();
+ return type;
+ }
+
+ ProductWeight<X1, X2> Quantize(float delta = kDelta) const {
+ return PairWeight<X1, X2>::Quantize();
+ }
+
+ ReverseWeight Reverse() const {
+ return PairWeight<X1, X2>::Reverse();
+ }
+
+ bool Member() const {
+ return PairWeight<X1, X2>::Member();
+ }
+
+ static uint64 Properties() {
+ // not idempotent nor path
+ return kLeftSemiring | kRightSemiring | kCommutative;
+ }
+
+ size_t Hash() const {
+ size_t h1;
+ if (Value2() == X2::Zero() || Value1().Value() > 0.0)
+ h1 = TropicalWeight(1.0).Hash();
+ else
+ h1 = TropicalWeight(-1.0).Hash();
+ size_t h2 = Value2().Hash();
+ const int lshift = 5;
+ const int rshift = CHAR_BIT * sizeof(size_t) - 5;
+ return h1 << lshift ^ h1 >> rshift ^ h2;
+ }
+};
+
+template <class T>
+inline SignedLogWeightTpl<T> Plus(const SignedLogWeightTpl<T> &w1,
+ const SignedLogWeightTpl<T> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return SignedLogWeightTpl<T>::NoWeight();
+ bool s1 = w1.Value1().Value() > 0.0;
+ bool s2 = w2.Value1().Value() > 0.0;
+ T f1 = w1.Value2().Value();
+ T f2 = w2.Value2().Value();
+ if (f1 == FloatLimits<T>::kPosInfinity)
+ return w2;
+ else if (f2 == FloatLimits<T>::kPosInfinity)
+ return w1;
+ else if (f1 == f2) {
+ if (s1 == s2)
+ return SignedLogWeightTpl<T>(w1.Value1(), (f2 - log(2.0F)));
+ else
+ return SignedLogWeightTpl<T>::Zero();
+ } else if (f1 > f2) {
+ if (s1 == s2) {
+ return SignedLogWeightTpl<T>(
+ w1.Value1(), (f2 - log(1.0F + exp(f2 - f1))));
+ } else {
+ return SignedLogWeightTpl<T>(
+ w2.Value1(), (f2 - log(1.0F - exp(f2 - f1))));
+ }
+ } else {
+ if (s2 == s1) {
+ return SignedLogWeightTpl<T>(
+ w2.Value1(), (f1 - log(1.0F + exp(f1 - f2))));
+ } else {
+ return SignedLogWeightTpl<T>(
+ w1.Value1(), (f1 - log(1.0F - exp(f1 - f2))));
+ }
+ }
+}
+
+template <class T>
+inline SignedLogWeightTpl<T> Minus(const SignedLogWeightTpl<T> &w1,
+ const SignedLogWeightTpl<T> &w2) {
+ SignedLogWeightTpl<T> minus_w2(-w2.Value1().Value(), w2.Value2());
+ return Plus(w1, minus_w2);
+}
+
+template <class T>
+inline SignedLogWeightTpl<T> Times(const SignedLogWeightTpl<T> &w1,
+ const SignedLogWeightTpl<T> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return SignedLogWeightTpl<T>::NoWeight();
+ bool s1 = w1.Value1().Value() > 0.0;
+ bool s2 = w2.Value1().Value() > 0.0;
+ T f1 = w1.Value2().Value();
+ T f2 = w2.Value2().Value();
+ if (s1 == s2)
+ return SignedLogWeightTpl<T>(TropicalWeight(1.0), (f1 + f2));
+ else
+ return SignedLogWeightTpl<T>(TropicalWeight(-1.0), (f1 + f2));
+}
+
+template <class T>
+inline SignedLogWeightTpl<T> Divide(const SignedLogWeightTpl<T> &w1,
+ const SignedLogWeightTpl<T> &w2,
+ DivideType typ = DIVIDE_ANY) {
+ if (!w1.Member() || !w2.Member())
+ return SignedLogWeightTpl<T>::NoWeight();
+ bool s1 = w1.Value1().Value() > 0.0;
+ bool s2 = w2.Value1().Value() > 0.0;
+ T f1 = w1.Value2().Value();
+ T f2 = w2.Value2().Value();
+ if (f2 == FloatLimits<T>::kPosInfinity)
+ return SignedLogWeightTpl<T>(TropicalWeight(1.0),
+ FloatLimits<T>::kNumberBad);
+ else if (f1 == FloatLimits<T>::kPosInfinity)
+ return SignedLogWeightTpl<T>(TropicalWeight(1.0),
+ FloatLimits<T>::kPosInfinity);
+ else if (s1 == s2)
+ return SignedLogWeightTpl<T>(TropicalWeight(1.0), (f1 - f2));
+ else
+ return SignedLogWeightTpl<T>(TropicalWeight(-1.0), (f1 - f2));
+}
+
+template <class T>
+inline bool ApproxEqual(const SignedLogWeightTpl<T> &w1,
+ const SignedLogWeightTpl<T> &w2,
+ float delta = kDelta) {
+ bool s1 = w1.Value1().Value() > 0.0;
+ bool s2 = w2.Value1().Value() > 0.0;
+ if (s1 == s2) {
+ return ApproxEqual(w1.Value2(), w2.Value2(), delta);
+ } else {
+ return w1.Value2() == LogWeightTpl<T>::Zero()
+ && w2.Value2() == LogWeightTpl<T>::Zero();
+ }
+}
+
+template <class T>
+inline bool operator==(const SignedLogWeightTpl<T> &w1,
+ const SignedLogWeightTpl<T> &w2) {
+ bool s1 = w1.Value1().Value() > 0.0;
+ bool s2 = w2.Value1().Value() > 0.0;
+ if (s1 == s2)
+ return w1.Value2() == w2.Value2();
+ else
+ return (w1.Value2() == LogWeightTpl<T>::Zero()) &&
+ (w2.Value2() == LogWeightTpl<T>::Zero());
+}
+
+
+// Single-precision signed-log weight
+typedef SignedLogWeightTpl<float> SignedLogWeight;
+// Double-precision signed-log weight
+typedef SignedLogWeightTpl<double> SignedLog64Weight;
+
+//
+// WEIGHT CONVERTER SPECIALIZATIONS.
+//
+
+template <class W1, class W2>
+bool SignedLogConvertCheck(W1 w) {
+ if (w.Value1().Value() < 0.0) {
+ FSTERROR() << "WeightConvert: can't convert weight from \""
+ << W1::Type() << "\" to \"" << W2::Type();
+ return false;
+ }
+ return true;
+}
+
+// Convert to tropical
+template <>
+struct WeightConvert<SignedLogWeight, TropicalWeight> {
+ TropicalWeight operator()(SignedLogWeight w) const {
+ if (!SignedLogConvertCheck<SignedLogWeight, TropicalWeight>(w))
+ return TropicalWeight::NoWeight();
+ return w.Value2().Value();
+ }
+};
+
+template <>
+struct WeightConvert<SignedLog64Weight, TropicalWeight> {
+ TropicalWeight operator()(SignedLog64Weight w) const {
+ if (!SignedLogConvertCheck<SignedLog64Weight, TropicalWeight>(w))
+ return TropicalWeight::NoWeight();
+ return w.Value2().Value();
+ }
+};
+
+// Convert to log
+template <>
+struct WeightConvert<SignedLogWeight, LogWeight> {
+ LogWeight operator()(SignedLogWeight w) const {
+ if (!SignedLogConvertCheck<SignedLogWeight, LogWeight>(w))
+ return LogWeight::NoWeight();
+ return w.Value2().Value();
+ }
+};
+
+template <>
+struct WeightConvert<SignedLog64Weight, LogWeight> {
+ LogWeight operator()(SignedLog64Weight w) const {
+ if (!SignedLogConvertCheck<SignedLog64Weight, LogWeight>(w))
+ return LogWeight::NoWeight();
+ return w.Value2().Value();
+ }
+};
+
+// Convert to log64
+template <>
+struct WeightConvert<SignedLogWeight, Log64Weight> {
+ Log64Weight operator()(SignedLogWeight w) const {
+ if (!SignedLogConvertCheck<SignedLogWeight, Log64Weight>(w))
+ return Log64Weight::NoWeight();
+ return w.Value2().Value();
+ }
+};
+
+template <>
+struct WeightConvert<SignedLog64Weight, Log64Weight> {
+ Log64Weight operator()(SignedLog64Weight w) const {
+ if (!SignedLogConvertCheck<SignedLog64Weight, Log64Weight>(w))
+ return Log64Weight::NoWeight();
+ return w.Value2().Value();
+ }
+};
+
+// Convert to signed log
+template <>
+struct WeightConvert<TropicalWeight, SignedLogWeight> {
+ SignedLogWeight operator()(TropicalWeight w) const {
+ TropicalWeight x1 = 1.0;
+ LogWeight x2 = w.Value();
+ return SignedLogWeight(x1, x2);
+ }
+};
+
+template <>
+struct WeightConvert<LogWeight, SignedLogWeight> {
+ SignedLogWeight operator()(LogWeight w) const {
+ TropicalWeight x1 = 1.0;
+ LogWeight x2 = w.Value();
+ return SignedLogWeight(x1, x2);
+ }
+};
+
+template <>
+struct WeightConvert<Log64Weight, SignedLogWeight> {
+ SignedLogWeight operator()(Log64Weight w) const {
+ TropicalWeight x1 = 1.0;
+ LogWeight x2 = w.Value();
+ return SignedLogWeight(x1, x2);
+ }
+};
+
+template <>
+struct WeightConvert<SignedLog64Weight, SignedLogWeight> {
+ SignedLogWeight operator()(SignedLog64Weight w) const {
+ TropicalWeight x1 = w.Value1();
+ LogWeight x2 = w.Value2().Value();
+ return SignedLogWeight(x1, x2);
+ }
+};
+
+// Convert to signed log64
+template <>
+struct WeightConvert<TropicalWeight, SignedLog64Weight> {
+ SignedLog64Weight operator()(TropicalWeight w) const {
+ TropicalWeight x1 = 1.0;
+ Log64Weight x2 = w.Value();
+ return SignedLog64Weight(x1, x2);
+ }
+};
+
+template <>
+struct WeightConvert<LogWeight, SignedLog64Weight> {
+ SignedLog64Weight operator()(LogWeight w) const {
+ TropicalWeight x1 = 1.0;
+ Log64Weight x2 = w.Value();
+ return SignedLog64Weight(x1, x2);
+ }
+};
+
+template <>
+struct WeightConvert<Log64Weight, SignedLog64Weight> {
+ SignedLog64Weight operator()(Log64Weight w) const {
+ TropicalWeight x1 = 1.0;
+ Log64Weight x2 = w.Value();
+ return SignedLog64Weight(x1, x2);
+ }
+};
+
+template <>
+struct WeightConvert<SignedLogWeight, SignedLog64Weight> {
+ SignedLog64Weight operator()(SignedLogWeight w) const {
+ TropicalWeight x1 = w.Value1();
+ Log64Weight x2 = w.Value2().Value();
+ return SignedLog64Weight(x1, x2);
+ }
+};
+
+} // namespace fst
+
+#endif // FST_LIB_SIGNED_LOG_WEIGHT_H_
diff --git a/src/include/fst/slist.h b/src/include/fst/slist.h
new file mode 100644
index 0000000..9f94027
--- /dev/null
+++ b/src/include/fst/slist.h
@@ -0,0 +1,61 @@
+// slist.h
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Includes slist definition or defines in terms of STL list as a fallback.
+
+#ifndef FST_LIB_SLIST_H__
+#define FST_LIB_SLIST_H__
+
+#include <fst/config.h>
+
+#if !defined(__ANDROID__) && defined(HAVE___GNU_CXX__SLIST_INT_)
+
+#include <slist>
+
+namespace fst {
+
+using __gnu_cxx::slist;
+
+}
+
+#else
+
+#include <list>
+
+namespace fst {
+
+using std::list;
+
+template <typename T> class slist : public list<T> {
+ public:
+ typedef typename list<T>::iterator iterator;
+ typedef typename list<T>::const_iterator const_iterator;
+
+ using list<T>::erase;
+
+ iterator erase_after(iterator pos) {
+ iterator npos = pos;
+ erase(++npos);
+ return pos;
+ }
+};
+
+} // namespace fst
+
+#endif // HAVE___GNU_CXX__SLIST_INT_
+
+#endif // FST_LIB_SLIST_H__
diff --git a/src/include/fst/sparse-power-weight.h b/src/include/fst/sparse-power-weight.h
new file mode 100644
index 0000000..a1ff56a
--- /dev/null
+++ b/src/include/fst/sparse-power-weight.h
@@ -0,0 +1,225 @@
+// sparse-power-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: krr@google.com (Kasturi Rangan Raghavan)
+// Inspiration: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Cartesian power weight semiring operation definitions.
+// Uses SparseTupleWeight as underlying representation.
+
+#ifndef FST_LIB_SPARSE_POWER_WEIGHT_H__
+#define FST_LIB_SPARSE_POWER_WEIGHT_H__
+
+#include<string>
+
+#include <fst/sparse-tuple-weight.h>
+#include <fst/weight.h>
+
+
+namespace fst {
+
+// Below SparseTupleWeight*Mapper are used in conjunction with
+// SparseTupleWeightMap to compute the respective semiring operations
+template<class W, class K>
+struct SparseTupleWeightPlusMapper {
+ W Map(const K& k, const W& v1, const W& v2) const {
+ return Plus(v1, v2);
+ }
+};
+
+template<class W, class K>
+struct SparseTupleWeightTimesMapper {
+ W Map(const K& k, const W& v1, const W& v2) const {
+ return Times(v1, v2);
+ }
+};
+
+template<class W, class K>
+struct SparseTupleWeightDivideMapper {
+ SparseTupleWeightDivideMapper(DivideType divide_type) {
+ divide_type_ = divide_type;
+ }
+ W Map(const K& k, const W& v1, const W& v2) const {
+ return Divide(v1, v2, divide_type_);
+ }
+ DivideType divide_type_;
+};
+
+template<class W, class K>
+struct SparseTupleWeightApproxMapper {
+ SparseTupleWeightApproxMapper(float delta) { delta_ = delta; }
+ W Map(const K& k, const W& v1, const W& v2) const {
+ return ApproxEqual(v1, v2, delta_) ? W::One() : W::Zero();
+ }
+ float delta_;
+};
+
+// Sparse cartesian power semiring: W ^ n
+// Forms:
+// - a left semimodule when W is a left semiring,
+// - a right semimodule when W is a right semiring,
+// - a bisemimodule when W is a semiring,
+// the free semimodule of rank n over W
+// The Times operation is overloaded to provide the
+// left and right scalar products.
+// K is the key value type. kNoKey(-1) is reserved for internal use
+template <class W, class K = int>
+class SparsePowerWeight : public SparseTupleWeight<W, K> {
+ public:
+ using SparseTupleWeight<W, K>::Zero;
+ using SparseTupleWeight<W, K>::One;
+ using SparseTupleWeight<W, K>::NoWeight;
+ using SparseTupleWeight<W, K>::Quantize;
+ using SparseTupleWeight<W, K>::Reverse;
+
+ typedef SparsePowerWeight<typename W::ReverseWeight, K> ReverseWeight;
+
+ SparsePowerWeight() {}
+
+ SparsePowerWeight(const SparseTupleWeight<W, K> &w) :
+ SparseTupleWeight<W, K>(w) { }
+
+ template <class Iterator>
+ SparsePowerWeight(Iterator begin, Iterator end) :
+ SparseTupleWeight<W, K>(begin, end) { }
+
+ SparsePowerWeight(const K &key, const W &w) :
+ SparseTupleWeight<W, K>(key, w) { }
+
+ static const SparsePowerWeight<W, K> &Zero() {
+ static const SparsePowerWeight<W, K> zero(SparseTupleWeight<W, K>::Zero());
+ return zero;
+ }
+
+ static const SparsePowerWeight<W, K> &One() {
+ static const SparsePowerWeight<W, K> one(SparseTupleWeight<W, K>::One());
+ return one;
+ }
+
+ static const SparsePowerWeight<W, K> &NoWeight() {
+ static const SparsePowerWeight<W, K> no_weight(
+ SparseTupleWeight<W, K>::NoWeight());
+ return no_weight;
+ }
+
+ // Overide this: Overwrite the Type method to reflect the key type
+ // if using non-default key type.
+ static const string &Type() {
+ static string type;
+ if(type.empty()) {
+ type = W::Type() + "_^n";
+ if(sizeof(K) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(8 * sizeof(K), &size);
+ type += "_" + size;
+ }
+ }
+ return type;
+ }
+
+ static uint64 Properties() {
+ uint64 props = W::Properties();
+ return props & (kLeftSemiring | kRightSemiring |
+ kCommutative | kIdempotent);
+ }
+
+ SparsePowerWeight<W, K> Quantize(float delta = kDelta) const {
+ return SparseTupleWeight<W, K>::Quantize(delta);
+ }
+
+ ReverseWeight Reverse() const {
+ return SparseTupleWeight<W, K>::Reverse();
+ }
+};
+
+// Semimodule plus operation
+template <class W, class K>
+inline SparsePowerWeight<W, K> Plus(const SparsePowerWeight<W, K> &w1,
+ const SparsePowerWeight<W, K> &w2) {
+ SparsePowerWeight<W, K> ret;
+ SparseTupleWeightPlusMapper<W, K> operator_mapper;
+ SparseTupleWeightMap(&ret, w1, w2, operator_mapper);
+ return ret;
+}
+
+// Semimodule times operation
+template <class W, class K>
+inline SparsePowerWeight<W, K> Times(const SparsePowerWeight<W, K> &w1,
+ const SparsePowerWeight<W, K> &w2) {
+ SparsePowerWeight<W, K> ret;
+ SparseTupleWeightTimesMapper<W, K> operator_mapper;
+ SparseTupleWeightMap(&ret, w1, w2, operator_mapper);
+ return ret;
+}
+
+// Semimodule divide operation
+template <class W, class K>
+inline SparsePowerWeight<W, K> Divide(const SparsePowerWeight<W, K> &w1,
+ const SparsePowerWeight<W, K> &w2,
+ DivideType type = DIVIDE_ANY) {
+ SparsePowerWeight<W, K> ret;
+ SparseTupleWeightDivideMapper<W, K> operator_mapper(type);
+ SparseTupleWeightMap(&ret, w1, w2, operator_mapper);
+ return ret;
+}
+
+// Semimodule dot product
+template <class W, class K>
+inline const W& DotProduct(const SparsePowerWeight<W, K> &w1,
+ const SparsePowerWeight<W, K> &w2) {
+ const SparsePowerWeight<W, K>& product = Times(w1, w2);
+ W ret(W::Zero());
+ for (SparseTupleWeightIterator<W, K> it(product); !it.Done(); it.Next()) {
+ ret = Plus(ret, it.Value().second);
+ }
+ return ret;
+}
+
+template <class W, class K>
+inline bool ApproxEqual(const SparsePowerWeight<W, K> &w1,
+ const SparsePowerWeight<W, K> &w2,
+ float delta = kDelta) {
+ SparseTupleWeight<W, K> ret;
+ SparseTupleWeightApproxMapper<W, K> operator_mapper(kDelta);
+ SparseTupleWeightMap(&ret, w1, w2, operator_mapper);
+ return ret == SparsePowerWeight<W, K>::One();
+}
+
+template <class W, class K>
+inline SparsePowerWeight<W, K> Times(const W &k,
+ const SparsePowerWeight<W, K> &w2) {
+ SparsePowerWeight<W, K> w1(k);
+ return Times(w1, w2);
+}
+
+template <class W, class K>
+inline SparsePowerWeight<W, K> Times(const SparsePowerWeight<W, K> &w1,
+ const W &k) {
+ SparsePowerWeight<W, K> w2(k);
+ return Times(w1, w2);
+}
+
+template <class W, class K>
+inline SparsePowerWeight<W, K> Divide(const SparsePowerWeight<W, K> &w1,
+ const W &k,
+ DivideType divide_type = DIVIDE_ANY) {
+ SparsePowerWeight<W, K> w2(k);
+ return Divide(w1, w2, divide_type);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_SPARSE_POWER_WEIGHT_H__
diff --git a/src/include/fst/sparse-tuple-weight.h b/src/include/fst/sparse-tuple-weight.h
new file mode 100644
index 0000000..d316b17
--- /dev/null
+++ b/src/include/fst/sparse-tuple-weight.h
@@ -0,0 +1,640 @@
+// sparse-tuple-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: krr@google.com (Kasturi Rangan Raghavan)
+// Inspiration: allauzen@google.com (Cyril Allauzen)
+// \file
+// Sparse version of tuple-weight, based on tuple-weight.h
+// Internally stores sparse key, value pairs in linked list
+// Default value elemnt is the assumed value of unset keys
+// Internal singleton implementation that stores first key,
+// value pair as a initialized member variable to avoide
+// unnecessary allocation on heap.
+// Use SparseTupleWeightIterator to iterate through the key,value pairs
+// Note: this does NOT iterate through the default value.
+//
+// Sparse tuple weight set operation definitions.
+
+#ifndef FST_LIB_SPARSE_TUPLE_WEIGHT_H__
+#define FST_LIB_SPARSE_TUPLE_WEIGHT_H__
+
+#include<string>
+#include<list>
+#include<stack>
+#include<unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+
+#include <fst/weight.h>
+
+
+DECLARE_string(fst_weight_parentheses);
+DECLARE_string(fst_weight_separator);
+
+namespace fst {
+
+template <class W, class K> class SparseTupleWeight;
+
+template<class W, class K>
+class SparseTupleWeightIterator;
+
+template <class W, class K>
+istream &operator>>(istream &strm, SparseTupleWeight<W, K> &w);
+
+// Arbitrary dimension tuple weight, stored as a sorted linked-list
+// W is any weight class,
+// K is the key value type. kNoKey(-1) is reserved for internal use
+template <class W, class K = int>
+class SparseTupleWeight {
+ public:
+ typedef pair<K, W> Pair;
+ typedef SparseTupleWeight<typename W::ReverseWeight, K> ReverseWeight;
+
+ const static K kNoKey = -1;
+ SparseTupleWeight() {
+ Init();
+ }
+
+ template <class Iterator>
+ SparseTupleWeight(Iterator begin, Iterator end) {
+ Init();
+ // Assumes input iterator is sorted
+ for (Iterator it = begin; it != end; ++it)
+ Push(*it);
+ }
+
+
+ SparseTupleWeight(const K& key, const W &w) {
+ Init();
+ Push(key, w);
+ }
+
+ SparseTupleWeight(const W &w) {
+ Init(w);
+ }
+
+ SparseTupleWeight(const SparseTupleWeight<W, K> &w) {
+ Init(w.DefaultValue());
+ SetDefaultValue(w.DefaultValue());
+ for (SparseTupleWeightIterator<W, K> it(w); !it.Done(); it.Next()) {
+ Push(it.Value());
+ }
+ }
+
+ static const SparseTupleWeight<W, K> &Zero() {
+ static SparseTupleWeight<W, K> zero;
+ return zero;
+ }
+
+ static const SparseTupleWeight<W, K> &One() {
+ static SparseTupleWeight<W, K> one(W::One());
+ return one;
+ }
+
+ static const SparseTupleWeight<W, K> &NoWeight() {
+ static SparseTupleWeight<W, K> no_weight(W::NoWeight());
+ return no_weight;
+ }
+
+ istream &Read(istream &strm) {
+ ReadType(strm, &default_);
+ ReadType(strm, &first_);
+ return ReadType(strm, &rest_);
+ }
+
+ ostream &Write(ostream &strm) const {
+ WriteType(strm, default_);
+ WriteType(strm, first_);
+ return WriteType(strm, rest_);
+ }
+
+ SparseTupleWeight<W, K> &operator=(const SparseTupleWeight<W, K> &w) {
+ if (this == &w) return *this; // check for w = w
+ Init(w.DefaultValue());
+ for (SparseTupleWeightIterator<W, K> it(w); !it.Done(); it.Next()) {
+ Push(it.Value());
+ }
+ return *this;
+ }
+
+ bool Member() const {
+ if (!DefaultValue().Member()) return false;
+ for (SparseTupleWeightIterator<W, K> it(*this); !it.Done(); it.Next()) {
+ if (!it.Value().second.Member()) return false;
+ }
+ return true;
+ }
+
+ // Assumes H() function exists for the hash of the key value
+ size_t Hash() const {
+ uint64 h = 0;
+ std::hash<K> H;
+ for (SparseTupleWeightIterator<W, K> it(*this); !it.Done(); it.Next()) {
+ h = 5 * h + H(it.Value().first);
+ h = 13 * h + it.Value().second.Hash();
+ }
+ return size_t(h);
+ }
+
+ SparseTupleWeight<W, K> Quantize(float delta = kDelta) const {
+ SparseTupleWeight<W, K> w;
+ for (SparseTupleWeightIterator<W, K> it(*this); !it.Done(); it.Next()) {
+ w.Push(it.Value().first, it.Value().second.Quantize(delta));
+ }
+ return w;
+ }
+
+ ReverseWeight Reverse() const {
+ SparseTupleWeight<W, K> w;
+ for (SparseTupleWeightIterator<W, K> it(*this); !it.Done(); it.Next()) {
+ w.Push(it.Value().first, it.Value().second.Reverse());
+ }
+ return w;
+ }
+
+ // Common initializer among constructors.
+ void Init() {
+ Init(W::Zero());
+ }
+
+ void Init(const W& default_value) {
+ first_.first = kNoKey;
+ /* initialized to the reserved key value */
+ default_ = default_value;
+ rest_.clear();
+ }
+
+ size_t Size() const {
+ if (first_.first == kNoKey)
+ return 0;
+ else
+ return rest_.size() + 1;
+ }
+
+ inline void Push(const K &k, const W &w, bool default_value_check = true) {
+ Push(make_pair(k, w), default_value_check);
+ }
+
+ inline void Push(const Pair &p, bool default_value_check = true) {
+ if (default_value_check && p.second == default_) return;
+ if (first_.first == kNoKey) {
+ first_ = p;
+ } else {
+ rest_.push_back(p);
+ }
+ }
+
+ void SetDefaultValue(const W& val) { default_ = val; }
+
+ const W& DefaultValue() const { return default_; }
+
+ protected:
+ static istream& ReadNoParen(
+ istream&, SparseTupleWeight<W, K>&, char separator);
+
+ static istream& ReadWithParen(
+ istream&, SparseTupleWeight<W, K>&,
+ char separator, char open_paren, char close_paren);
+
+ private:
+ // Assumed default value of uninitialized keys, by default W::Zero()
+ W default_;
+
+ // Key values pairs are first stored in first_, then fill rest_
+ // this way we can avoid dynamic allocation in the common case
+ // where the weight is a single key,val pair.
+ Pair first_;
+ list<Pair> rest_;
+
+ friend istream &operator>><W, K>(istream&, SparseTupleWeight<W, K>&);
+ friend class SparseTupleWeightIterator<W, K>;
+};
+
+template<class W, class K>
+class SparseTupleWeightIterator {
+ public:
+ typedef typename SparseTupleWeight<W, K>::Pair Pair;
+ typedef typename list<Pair>::const_iterator const_iterator;
+ typedef typename list<Pair>::iterator iterator;
+
+ explicit SparseTupleWeightIterator(const SparseTupleWeight<W, K>& w)
+ : first_(w.first_), rest_(w.rest_), init_(true),
+ iter_(rest_.begin()) {}
+
+ bool Done() const {
+ if (init_)
+ return first_.first == SparseTupleWeight<W, K>::kNoKey;
+ else
+ return iter_ == rest_.end();
+ }
+
+ const Pair& Value() const { return init_ ? first_ : *iter_; }
+
+ void Next() {
+ if (init_)
+ init_ = false;
+ else
+ ++iter_;
+ }
+
+ void Reset() {
+ init_ = true;
+ iter_ = rest_.begin();
+ }
+
+ private:
+ const Pair &first_;
+ const list<Pair> & rest_;
+ bool init_; // in the initialized state?
+ typename list<Pair>::const_iterator iter_;
+
+ DISALLOW_COPY_AND_ASSIGN(SparseTupleWeightIterator);
+};
+
+template<class W, class K, class M>
+inline void SparseTupleWeightMap(
+ SparseTupleWeight<W, K>* ret,
+ const SparseTupleWeight<W, K>& w1,
+ const SparseTupleWeight<W, K>& w2,
+ const M& operator_mapper) {
+ SparseTupleWeightIterator<W, K> w1_it(w1);
+ SparseTupleWeightIterator<W, K> w2_it(w2);
+ const W& v1_def = w1.DefaultValue();
+ const W& v2_def = w2.DefaultValue();
+ ret->SetDefaultValue(operator_mapper.Map(0, v1_def, v2_def));
+ while (!w1_it.Done() || !w2_it.Done()) {
+ const K& k1 = (w1_it.Done()) ? w2_it.Value().first : w1_it.Value().first;
+ const K& k2 = (w2_it.Done()) ? w1_it.Value().first : w2_it.Value().first;
+ const W& v1 = (w1_it.Done()) ? v1_def : w1_it.Value().second;
+ const W& v2 = (w2_it.Done()) ? v2_def : w2_it.Value().second;
+ if (k1 == k2) {
+ ret->Push(k1, operator_mapper.Map(k1, v1, v2));
+ if (!w1_it.Done()) w1_it.Next();
+ if (!w2_it.Done()) w2_it.Next();
+ } else if (k1 < k2) {
+ ret->Push(k1, operator_mapper.Map(k1, v1, v2_def));
+ w1_it.Next();
+ } else {
+ ret->Push(k2, operator_mapper.Map(k2, v1_def, v2));
+ w2_it.Next();
+ }
+ }
+}
+
+template <class W, class K>
+inline bool operator==(const SparseTupleWeight<W, K> &w1,
+ const SparseTupleWeight<W, K> &w2) {
+ const W& v1_def = w1.DefaultValue();
+ const W& v2_def = w2.DefaultValue();
+ if (v1_def != v2_def) return false;
+
+ SparseTupleWeightIterator<W, K> w1_it(w1);
+ SparseTupleWeightIterator<W, K> w2_it(w2);
+ while (!w1_it.Done() || !w2_it.Done()) {
+ const K& k1 = (w1_it.Done()) ? w2_it.Value().first : w1_it.Value().first;
+ const K& k2 = (w2_it.Done()) ? w1_it.Value().first : w2_it.Value().first;
+ const W& v1 = (w1_it.Done()) ? v1_def : w1_it.Value().second;
+ const W& v2 = (w2_it.Done()) ? v2_def : w2_it.Value().second;
+ if (k1 == k2) {
+ if (v1 != v2) return false;
+ if (!w1_it.Done()) w1_it.Next();
+ if (!w2_it.Done()) w2_it.Next();
+ } else if (k1 < k2) {
+ if (v1 != v2_def) return false;
+ w1_it.Next();
+ } else {
+ if (v1_def != v2) return false;
+ w2_it.Next();
+ }
+ }
+ return true;
+}
+
+template <class W, class K>
+inline bool operator!=(const SparseTupleWeight<W, K> &w1,
+ const SparseTupleWeight<W, K> &w2) {
+ return !(w1 == w2);
+}
+
+template <class W, class K>
+inline ostream &operator<<(ostream &strm, const SparseTupleWeight<W, K> &w) {
+ if(FLAGS_fst_weight_separator.size() != 1) {
+ FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ char separator = FLAGS_fst_weight_separator[0];
+ bool write_parens = false;
+ if (!FLAGS_fst_weight_parentheses.empty()) {
+ if (FLAGS_fst_weight_parentheses.size() != 2) {
+ FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ write_parens = true;
+ }
+
+ if (write_parens)
+ strm << FLAGS_fst_weight_parentheses[0];
+
+ strm << w.DefaultValue();
+ strm << separator;
+
+ size_t n = w.Size();
+ strm << n;
+ strm << separator;
+
+ for (SparseTupleWeightIterator<W, K> it(w); !it.Done(); it.Next()) {
+ strm << it.Value().first;
+ strm << separator;
+ strm << it.Value().second;
+ strm << separator;
+ }
+
+ if (write_parens)
+ strm << FLAGS_fst_weight_parentheses[1];
+
+ return strm;
+}
+
+template <class W, class K>
+inline istream &operator>>(istream &strm, SparseTupleWeight<W, K> &w) {
+ if(FLAGS_fst_weight_separator.size() != 1) {
+ FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ char separator = FLAGS_fst_weight_separator[0];
+
+ if (!FLAGS_fst_weight_parentheses.empty()) {
+ if (FLAGS_fst_weight_parentheses.size() != 2) {
+ FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ return SparseTupleWeight<W, K>::ReadWithParen(
+ strm, w, separator, FLAGS_fst_weight_parentheses[0],
+ FLAGS_fst_weight_parentheses[1]);
+ } else {
+ return SparseTupleWeight<W, K>::ReadNoParen(strm, w, separator);
+ }
+}
+
+// Reads SparseTupleWeight when there are no parentheses around tuple terms
+template <class W, class K>
+inline istream& SparseTupleWeight<W, K>::ReadNoParen(
+ istream &strm,
+ SparseTupleWeight<W, K> &w,
+ char separator) {
+ int c;
+ size_t n;
+
+ do {
+ c = strm.get();
+ } while (isspace(c));
+
+
+ { // Read default weight
+ W default_value;
+ string s;
+ while (c != separator) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s += c;
+ c = strm.get();
+ }
+ istringstream sstrm(s);
+ sstrm >> default_value;
+ w.SetDefaultValue(default_value);
+ }
+
+ c = strm.get();
+
+ { // Read n
+ string s;
+ while (c != separator) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s += c;
+ c = strm.get();
+ }
+ istringstream sstrm(s);
+ sstrm >> n;
+ }
+
+ // Read n elements
+ for (size_t i = 0; i < n; ++i) {
+ // discard separator
+ c = strm.get();
+ K p;
+ W r;
+
+ { // read key
+ string s;
+ while (c != separator) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s += c;
+ c = strm.get();
+ }
+ istringstream sstrm(s);
+ sstrm >> p;
+ }
+
+ c = strm.get();
+
+ { // read weight
+ string s;
+ while (c != separator) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s += c;
+ c = strm.get();
+ }
+ istringstream sstrm(s);
+ sstrm >> r;
+ }
+
+ w.Push(p, r);
+ }
+
+ c = strm.get();
+ if (c != separator) {
+ strm.clear(std::ios::badbit);
+ }
+
+ return strm;
+}
+
+// Reads SparseTupleWeight when there are parentheses around tuple terms
+template <class W, class K>
+inline istream& SparseTupleWeight<W, K>::ReadWithParen(
+ istream &strm,
+ SparseTupleWeight<W, K> &w,
+ char separator,
+ char open_paren,
+ char close_paren) {
+ int c;
+ size_t n;
+
+ do {
+ c = strm.get();
+ } while (isspace(c));
+
+ if (c != open_paren) {
+ FSTERROR() << "is fst_weight_parentheses flag set correcty? ";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+
+ c = strm.get();
+
+ { // Read weight
+ W default_value;
+ stack<int> parens;
+ string s;
+ while (c != separator || !parens.empty()) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s += c;
+ // If parens encountered before separator, they must be matched
+ if (c == open_paren) {
+ parens.push(1);
+ } else if (c == close_paren) {
+ // Fail for mismatched parens
+ if (parens.empty()) {
+ strm.clear(std::ios::failbit);
+ return strm;
+ }
+ parens.pop();
+ }
+ c = strm.get();
+ }
+ istringstream sstrm(s);
+ sstrm >> default_value;
+ w.SetDefaultValue(default_value);
+ }
+
+ c = strm.get();
+
+ { // Read n
+ string s;
+ while (c != separator) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s += c;
+ c = strm.get();
+ }
+ istringstream sstrm(s);
+ sstrm >> n;
+ }
+
+ // Read n elements
+ for (size_t i = 0; i < n; ++i) {
+ // discard separator
+ c = strm.get();
+ K p;
+ W r;
+
+ { // Read key
+ stack<int> parens;
+ string s;
+ while (c != separator || !parens.empty()) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s += c;
+ // If parens encountered before separator, they must be matched
+ if (c == open_paren) {
+ parens.push(1);
+ } else if (c == close_paren) {
+ // Fail for mismatched parens
+ if (parens.empty()) {
+ strm.clear(std::ios::failbit);
+ return strm;
+ }
+ parens.pop();
+ }
+ c = strm.get();
+ }
+ istringstream sstrm(s);
+ sstrm >> p;
+ }
+
+ c = strm.get();
+
+ { // Read weight
+ stack<int> parens;
+ string s;
+ while (c != separator || !parens.empty()) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s += c;
+ // If parens encountered before separator, they must be matched
+ if (c == open_paren) {
+ parens.push(1);
+ } else if (c == close_paren) {
+ // Fail for mismatched parens
+ if (parens.empty()) {
+ strm.clear(std::ios::failbit);
+ return strm;
+ }
+ parens.pop();
+ }
+ c = strm.get();
+ }
+ istringstream sstrm(s);
+ sstrm >> r;
+ }
+
+ w.Push(p, r);
+ }
+
+ if (c != separator) {
+ FSTERROR() << " separator expected, not found! ";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+
+ c = strm.get();
+ if (c != close_paren) {
+ FSTERROR() << " is fst_weight_parentheses flag set correcty? ";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+
+ return strm;
+}
+
+
+
+} // namespace fst
+
+#endif // FST_LIB_SPARSE_TUPLE_WEIGHT_H__
diff --git a/src/include/fst/state-map.h b/src/include/fst/state-map.h
new file mode 100644
index 0000000..ace4a3c
--- /dev/null
+++ b/src/include/fst/state-map.h
@@ -0,0 +1,601 @@
+// map.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to map over/transform states e.g., sort transitions
+// Consider using when operation does not change the number of states.
+
+#ifndef FST_LIB_STATE_MAP_H__
+#define FST_LIB_STATE_MAP_H__
+
+#include <algorithm>
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <string>
+#include <utility>
+using std::pair; using std::make_pair;
+
+#include <fst/cache.h>
+#include <fst/arc-map.h>
+#include <fst/mutable-fst.h>
+
+
+namespace fst {
+
+// StateMapper Interface - class determinies how states are mapped.
+// Useful for implementing operations that do not change the number of states.
+//
+// class StateMapper {
+// public:
+// typedef A FromArc;
+// typedef B ToArc;
+//
+// // Typical constructor
+// StateMapper(const Fst<A> &fst);
+// // Required copy constructor that allows updating Fst argument;
+// // pass only if relevant and changed.
+// StateMapper(const StateMapper &mapper, const Fst<A> *fst = 0);
+//
+// // Specifies initial state of result
+// B::StateId Start() const;
+// // Specifies state's final weight in result
+// B::Weight Final(B::StateId s) const;
+//
+// // These methods iterate through a state's arcs in result
+// // Specifies state to iterate over
+// void SetState(B::StateId s);
+// // End of arcs?
+// bool Done() const;
+// // Current arc
+
+// const B &Value() const;
+// // Advance to next arc (when !Done)
+// void Next();
+//
+// // Specifies input symbol table action the mapper requires (see above).
+// MapSymbolsAction InputSymbolsAction() const;
+// // Specifies output symbol table action the mapper requires (see above).
+// MapSymbolsAction OutputSymbolsAction() const;
+// // This specifies the known properties of an Fst mapped by this
+// // mapper. It takes as argument the input Fst's known properties.
+// uint64 Properties(uint64 props) const;
+// };
+//
+// We include a various state map versions below. One dimension of
+// variation is whether the mapping mutates its input, writes to a
+// new result Fst, or is an on-the-fly Fst. Another dimension is how
+// we pass the mapper. We allow passing the mapper by pointer
+// for cases that we need to change the state of the user's mapper.
+// We also include map versions that pass the mapper
+// by value or const reference when this suffices.
+
+// Maps an arc type A using a mapper function object C, passed
+// by pointer. This version modifies its Fst input.
+template<class A, class C>
+void StateMap(MutableFst<A> *fst, C* mapper) {
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ fst->SetInputSymbols(0);
+
+ if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ fst->SetOutputSymbols(0);
+
+ if (fst->Start() == kNoStateId)
+ return;
+
+ uint64 props = fst->Properties(kFstProperties, false);
+
+ fst->SetStart(mapper->Start());
+
+ for (StateId s = 0; s < fst->NumStates(); ++s) {
+ mapper->SetState(s);
+ fst->DeleteArcs(s);
+ for (; !mapper->Done(); mapper->Next())
+ fst->AddArc(s, mapper->Value());
+ fst->SetFinal(s, mapper->Final(s));
+ }
+
+ fst->SetProperties(mapper->Properties(props), kFstProperties);
+}
+
+// Maps an arc type A using a mapper function object C, passed
+// by value. This version modifies its Fst input.
+template<class A, class C>
+void StateMap(MutableFst<A> *fst, C mapper) {
+ StateMap(fst, &mapper);
+}
+
+
+// Maps an arc type A to an arc type B using mapper function
+// object C, passed by pointer. This version writes the mapped
+// input Fst to an output MutableFst.
+template<class A, class B, class C>
+void StateMap(const Fst<A> &ifst, MutableFst<B> *ofst, C* mapper) {
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ ofst->DeleteStates();
+
+ if (mapper->InputSymbolsAction() == MAP_COPY_SYMBOLS)
+ ofst->SetInputSymbols(ifst.InputSymbols());
+ else if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ ofst->SetInputSymbols(0);
+
+ if (mapper->OutputSymbolsAction() == MAP_COPY_SYMBOLS)
+ ofst->SetOutputSymbols(ifst.OutputSymbols());
+ else if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ ofst->SetOutputSymbols(0);
+
+ uint64 iprops = ifst.Properties(kCopyProperties, false);
+
+ if (ifst.Start() == kNoStateId) {
+ if (iprops & kError) ofst->SetProperties(kError, kError);
+ return;
+ }
+
+ // Add all states.
+ if (ifst.Properties(kExpanded, false))
+ ofst->ReserveStates(CountStates(ifst));
+ for (StateIterator< Fst<A> > siter(ifst); !siter.Done(); siter.Next())
+ ofst->AddState();
+
+ ofst->SetStart(mapper->Start());
+
+ for (StateIterator< Fst<A> > siter(ifst); !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ mapper->SetState(s);
+ for (; !mapper->Done(); mapper->Next())
+ ofst->AddArc(s, mapper->Value());
+ ofst->SetFinal(s, mapper->Final(s));
+ }
+
+ uint64 oprops = ofst->Properties(kFstProperties, false);
+ ofst->SetProperties(mapper->Properties(iprops) | oprops, kFstProperties);
+}
+
+// Maps an arc type A to an arc type B using mapper function
+// object C, passed by value. This version writes the mapped input
+// Fst to an output MutableFst.
+template<class A, class B, class C>
+void StateMap(const Fst<A> &ifst, MutableFst<B> *ofst, C mapper) {
+ StateMap(ifst, ofst, &mapper);
+}
+
+typedef CacheOptions StateMapFstOptions;
+
+template <class A, class B, class C> class StateMapFst;
+
+// Implementation of delayed StateMapFst.
+template <class A, class B, class C>
+class StateMapFstImpl : public CacheImpl<B> {
+ public:
+ using FstImpl<B>::SetType;
+ using FstImpl<B>::SetProperties;
+ using FstImpl<B>::SetInputSymbols;
+ using FstImpl<B>::SetOutputSymbols;
+
+ using VectorFstBaseImpl<typename CacheImpl<B>::State>::NumStates;
+
+ using CacheImpl<B>::PushArc;
+ using CacheImpl<B>::HasArcs;
+ using CacheImpl<B>::HasFinal;
+ using CacheImpl<B>::HasStart;
+ using CacheImpl<B>::SetArcs;
+ using CacheImpl<B>::SetFinal;
+ using CacheImpl<B>::SetStart;
+
+ friend class StateIterator< StateMapFst<A, B, C> >;
+
+ typedef B Arc;
+ typedef typename B::Weight Weight;
+ typedef typename B::StateId StateId;
+
+ StateMapFstImpl(const Fst<A> &fst, const C &mapper,
+ const StateMapFstOptions& opts)
+ : CacheImpl<B>(opts),
+ fst_(fst.Copy()),
+ mapper_(new C(mapper, fst_)),
+ own_mapper_(true) {
+ Init();
+ }
+
+ StateMapFstImpl(const Fst<A> &fst, C *mapper,
+ const StateMapFstOptions& opts)
+ : CacheImpl<B>(opts),
+ fst_(fst.Copy()),
+ mapper_(mapper),
+ own_mapper_(false) {
+ Init();
+ }
+
+ StateMapFstImpl(const StateMapFstImpl<A, B, C> &impl)
+ : CacheImpl<B>(impl),
+ fst_(impl.fst_->Copy(true)),
+ mapper_(new C(*impl.mapper_, fst_)),
+ own_mapper_(true) {
+ Init();
+ }
+
+ ~StateMapFstImpl() {
+ delete fst_;
+ if (own_mapper_) delete mapper_;
+ }
+
+ StateId Start() {
+ if (!HasStart())
+ SetStart(mapper_->Start());
+ return CacheImpl<B>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s))
+ SetFinal(s, mapper_->Final(s));
+ return CacheImpl<B>::Final(s);
+ }
+
+ size_t NumArcs(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<B>::NumArcs(s);
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<B>::NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<B>::NumOutputEpsilons(s);
+ }
+
+ void InitStateIterator(StateIteratorData<A> *data) const {
+ fst_->InitStateIterator(data);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<B> *data) {
+ if (!HasArcs(s))
+ Expand(s);
+ CacheImpl<B>::InitArcIterator(s, data);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) && (fst_->Properties(kError, false) ||
+ (mapper_->Properties(0) & kError)))
+ SetProperties(kError, kError);
+ return FstImpl<Arc>::Properties(mask);
+ }
+
+ void Expand(StateId s) {
+ // Add exiting arcs.
+ for (mapper_->SetState(s); !mapper_->Done(); mapper_->Next())
+ PushArc(s, mapper_->Value());
+ SetArcs(s);
+ }
+
+ private:
+ void Init() {
+ SetType("statemap");
+
+ if (mapper_->InputSymbolsAction() == MAP_COPY_SYMBOLS)
+ SetInputSymbols(fst_->InputSymbols());
+ else if (mapper_->InputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ SetInputSymbols(0);
+
+ if (mapper_->OutputSymbolsAction() == MAP_COPY_SYMBOLS)
+ SetOutputSymbols(fst_->OutputSymbols());
+ else if (mapper_->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS)
+ SetOutputSymbols(0);
+
+ uint64 props = fst_->Properties(kCopyProperties, false);
+ SetProperties(mapper_->Properties(props));
+ }
+
+ const Fst<A> *fst_;
+ C* mapper_;
+ bool own_mapper_;
+
+ void operator=(const StateMapFstImpl<A, B, C> &); // disallow
+};
+
+
+// Maps an arc type A to an arc type B using Mapper function object
+// C. This version is a delayed Fst.
+template <class A, class B, class C>
+class StateMapFst : public ImplToFst< StateMapFstImpl<A, B, C> > {
+ public:
+ friend class ArcIterator< StateMapFst<A, B, C> >;
+
+ typedef B Arc;
+ typedef typename B::Weight Weight;
+ typedef typename B::StateId StateId;
+ typedef CacheState<B> State;
+ typedef StateMapFstImpl<A, B, C> Impl;
+
+ StateMapFst(const Fst<A> &fst, const C &mapper,
+ const StateMapFstOptions& opts)
+ : ImplToFst<Impl>(new Impl(fst, mapper, opts)) {}
+
+ StateMapFst(const Fst<A> &fst, C* mapper, const StateMapFstOptions& opts)
+ : ImplToFst<Impl>(new Impl(fst, mapper, opts)) {}
+
+ StateMapFst(const Fst<A> &fst, const C &mapper)
+ : ImplToFst<Impl>(new Impl(fst, mapper, StateMapFstOptions())) {}
+
+ StateMapFst(const Fst<A> &fst, C* mapper)
+ : ImplToFst<Impl>(new Impl(fst, mapper, StateMapFstOptions())) {}
+
+ // See Fst<>::Copy() for doc.
+ StateMapFst(const StateMapFst<A, B, C> &fst, bool safe = false)
+ : ImplToFst<Impl>(fst, safe) {}
+
+ // Get a copy of this StateMapFst. See Fst<>::Copy() for further doc.
+ virtual StateMapFst<A, B, C> *Copy(bool safe = false) const {
+ return new StateMapFst<A, B, C>(*this, safe);
+ }
+
+ virtual void InitStateIterator(StateIteratorData<A> *data) const {
+ GetImpl()->InitStateIterator(data);
+ }
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<B> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const StateMapFst<A, B, C> &fst); // disallow
+};
+
+
+// Specialization for StateMapFst.
+template <class A, class B, class C>
+class ArcIterator< StateMapFst<A, B, C> >
+ : public CacheArcIterator< StateMapFst<A, B, C> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const StateMapFst<A, B, C> &fst, StateId s)
+ : CacheArcIterator< StateMapFst<A, B, C> >(fst.GetImpl(), s) {
+ if (!fst.GetImpl()->HasArcs(s))
+ fst.GetImpl()->Expand(s);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+//
+// Utility Mappers
+//
+
+// Mapper that returns its input.
+template <class A>
+class IdentityStateMapper {
+ public:
+ typedef A FromArc;
+ typedef A ToArc;
+
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ explicit IdentityStateMapper(const Fst<A> &fst) : fst_(fst), aiter_(0) {}
+
+ // Allows updating Fst argument; pass only if changed.
+ IdentityStateMapper(const IdentityStateMapper<A> &mapper,
+ const Fst<A> *fst = 0)
+ : fst_(fst ? *fst : mapper.fst_), aiter_(0) {}
+
+ ~IdentityStateMapper() { delete aiter_; }
+
+ StateId Start() const { return fst_.Start(); }
+
+ Weight Final(StateId s) const { return fst_.Final(s); }
+
+ void SetState(StateId s) {
+ if (aiter_) delete aiter_;
+ aiter_ = new ArcIterator< Fst<A> >(fst_, s);
+ }
+
+ bool Done() const { return aiter_->Done(); }
+ const A &Value() const { return aiter_->Value(); }
+ void Next() { aiter_->Next(); }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ uint64 Properties(uint64 props) const { return props; }
+
+ private:
+ const Fst<A> &fst_;
+ ArcIterator< Fst<A> > *aiter_;
+};
+
+template <class A>
+class ArcSumMapper {
+ public:
+ typedef A FromArc;
+ typedef A ToArc;
+
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ explicit ArcSumMapper(const Fst<A> &fst) : fst_(fst), i_(0) {}
+
+ // Allows updating Fst argument; pass only if changed.
+ ArcSumMapper(const ArcSumMapper<A> &mapper,
+ const Fst<A> *fst = 0)
+ : fst_(fst ? *fst : mapper.fst_), i_(0) {}
+
+ StateId Start() const { return fst_.Start(); }
+ Weight Final(StateId s) const { return fst_.Final(s); }
+
+ void SetState(StateId s) {
+ i_ = 0;
+ arcs_.clear();
+ arcs_.reserve(fst_.NumArcs(s));
+ for (ArcIterator<Fst<A> > aiter(fst_, s); !aiter.Done(); aiter.Next())
+ arcs_.push_back(aiter.Value());
+
+ // First sorts the exiting arcs by input label, output label
+ // and destination state and then sums weights of arcs with
+ // the same input label, output label, and destination state.
+ sort(arcs_.begin(), arcs_.end(), comp_);
+ size_t narcs = 0;
+ for (size_t i = 0; i < arcs_.size(); ++i) {
+ if (narcs > 0 && equal_(arcs_[i], arcs_[narcs - 1])) {
+ arcs_[narcs - 1].weight = Plus(arcs_[narcs - 1].weight,
+ arcs_[i].weight);
+ } else {
+ arcs_[narcs++] = arcs_[i];
+ }
+ }
+ arcs_.resize(narcs);
+ }
+
+ bool Done() const { return i_ >= arcs_.size(); }
+ const A &Value() const { return arcs_[i_]; }
+ void Next() { ++i_; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ uint64 Properties(uint64 props) const {
+ return props & kArcSortProperties &
+ kDeleteArcsProperties & kWeightInvariantProperties;
+ }
+
+ private:
+ struct Compare {
+ bool operator()(const A& x, const A& y) {
+ if (x.ilabel < y.ilabel) return true;
+ if (x.ilabel > y.ilabel) return false;
+ if (x.olabel < y.olabel) return true;
+ if (x.olabel > y.olabel) return false;
+ if (x.nextstate < y.nextstate) return true;
+ if (x.nextstate > y.nextstate) return false;
+ return false;
+ }
+ };
+
+ struct Equal {
+ bool operator()(const A& x, const A& y) {
+ return (x.ilabel == y.ilabel &&
+ x.olabel == y.olabel &&
+ x.nextstate == y.nextstate);
+ }
+ };
+
+ const Fst<A> &fst_;
+ Compare comp_;
+ Equal equal_;
+ vector<A> arcs_;
+ ssize_t i_; // current arc position
+
+ void operator=(const ArcSumMapper<A> &); // disallow
+};
+
+template <class A>
+class ArcUniqueMapper {
+ public:
+ typedef A FromArc;
+ typedef A ToArc;
+
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ explicit ArcUniqueMapper(const Fst<A> &fst) : fst_(fst), i_(0) {}
+
+ // Allows updating Fst argument; pass only if changed.
+ ArcUniqueMapper(const ArcSumMapper<A> &mapper,
+ const Fst<A> *fst = 0)
+ : fst_(fst ? *fst : mapper.fst_), i_(0) {}
+
+ StateId Start() const { return fst_.Start(); }
+ Weight Final(StateId s) const { return fst_.Final(s); }
+
+ void SetState(StateId s) {
+ i_ = 0;
+ arcs_.clear();
+ arcs_.reserve(fst_.NumArcs(s));
+ for (ArcIterator<Fst<A> > aiter(fst_, s); !aiter.Done(); aiter.Next())
+ arcs_.push_back(aiter.Value());
+
+ // First sorts the exiting arcs by input label, output label
+ // and destination state and then uniques identical arcs
+ sort(arcs_.begin(), arcs_.end(), comp_);
+ typename vector<A>::iterator unique_end =
+ unique(arcs_.begin(), arcs_.end(), equal_);
+ arcs_.resize(unique_end - arcs_.begin());
+ }
+
+ bool Done() const { return i_ >= arcs_.size(); }
+ const A &Value() const { return arcs_[i_]; }
+ void Next() { ++i_; }
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
+
+ uint64 Properties(uint64 props) const {
+ return props & kArcSortProperties & kDeleteArcsProperties;
+ }
+
+ private:
+ struct Compare {
+ bool operator()(const A& x, const A& y) {
+ if (x.ilabel < y.ilabel) return true;
+ if (x.ilabel > y.ilabel) return false;
+ if (x.olabel < y.olabel) return true;
+ if (x.olabel > y.olabel) return false;
+ if (x.nextstate < y.nextstate) return true;
+ if (x.nextstate > y.nextstate) return false;
+ return false;
+ }
+ };
+
+ struct Equal {
+ bool operator()(const A& x, const A& y) {
+ return (x.ilabel == y.ilabel &&
+ x.olabel == y.olabel &&
+ x.nextstate == y.nextstate &&
+ x.weight == y.weight);
+ }
+ };
+
+ const Fst<A> &fst_;
+ Compare comp_;
+ Equal equal_;
+ vector<A> arcs_;
+ ssize_t i_; // current arc position
+
+ void operator=(const ArcUniqueMapper<A> &); // disallow
+};
+
+
+} // namespace fst
+
+#endif // FST_LIB_STATE_MAP_H__
diff --git a/src/include/fst/state-reachable.h b/src/include/fst/state-reachable.h
new file mode 100644
index 0000000..6d0c971
--- /dev/null
+++ b/src/include/fst/state-reachable.h
@@ -0,0 +1,198 @@
+// state-reachable.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to determine whether a given (final) state can be reached from some
+// other given state.
+
+#ifndef FST_LIB_STATE_REACHABLE_H__
+#define FST_LIB_STATE_REACHABLE_H__
+
+#include <vector>
+using std::vector;
+
+#include <fst/dfs-visit.h>
+#include <fst/fst.h>
+#include <fst/interval-set.h>
+
+
+namespace fst {
+
+// Computes the (final) states reachable from a given state in an FST.
+// After this visitor has been called, a final state f can be reached
+// from a state s iff (*isets)[s].Member(state2index[f]) is true, where
+// (*isets[s]) is a set of half-open inteval of final state indices
+// and state2index[f] maps from a final state to its index.
+//
+// If state2index is empty, it is filled-in with suitable indices.
+// If it is non-empty, those indices are used; in this case, the
+// final states must have out-degree 0.
+template <class A, typename I = typename A::StateId>
+class IntervalReachVisitor {
+ public:
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename IntervalSet<I>::Interval Interval;
+
+ IntervalReachVisitor(const Fst<A> &fst,
+ vector< IntervalSet<I> > *isets,
+ vector<I> *state2index)
+ : fst_(fst),
+ isets_(isets),
+ state2index_(state2index),
+ index_(state2index->empty() ? 1 : -1),
+ error_(false) {
+ isets_->clear();
+ }
+
+ void InitVisit(const Fst<A> &fst) { error_ = false; }
+
+ bool InitState(StateId s, StateId r) {
+ while (isets_->size() <= s)
+ isets_->push_back(IntervalSet<Label>());
+ while (state2index_->size() <= s)
+ state2index_->push_back(-1);
+
+ if (fst_.Final(s) != Weight::Zero()) {
+ // Create tree interval
+ vector<Interval> *intervals = (*isets_)[s].Intervals();
+ if (index_ < 0) { // Use state2index_ map to set index
+ if (fst_.NumArcs(s) > 0) {
+ FSTERROR() << "IntervalReachVisitor: state2index map must be empty "
+ << "for this FST";
+ error_ = true;
+ return false;
+ }
+ I index = (*state2index_)[s];
+ if (index < 0) {
+ FSTERROR() << "IntervalReachVisitor: state2index map incomplete";
+ error_ = true;
+ return false;
+ }
+ intervals->push_back(Interval(index, index + 1));
+ } else { // Use pre-order index
+ intervals->push_back(Interval(index_, index_ + 1));
+ (*state2index_)[s] = index_++;
+ }
+ }
+ return true;
+ }
+
+ bool TreeArc(StateId s, const A &arc) {
+ return true;
+ }
+
+ bool BackArc(StateId s, const A &arc) {
+ FSTERROR() << "IntervalReachVisitor: cyclic input";
+ error_ = true;
+ return false;
+ }
+
+ bool ForwardOrCrossArc(StateId s, const A &arc) {
+ // Non-tree interval
+ (*isets_)[s].Union((*isets_)[arc.nextstate]);
+ return true;
+ }
+
+ void FinishState(StateId s, StateId p, const A *arc) {
+ if (index_ >= 0 && fst_.Final(s) != Weight::Zero()) {
+ vector<Interval> *intervals = (*isets_)[s].Intervals();
+ (*intervals)[0].end = index_; // Update tree interval end
+ }
+ (*isets_)[s].Normalize();
+ if (p != kNoStateId)
+ (*isets_)[p].Union((*isets_)[s]); // Propagate intervals to parent
+ }
+
+ void FinishVisit() {}
+
+ bool Error() const { return error_; }
+
+ private:
+ const Fst<A> &fst_;
+ vector< IntervalSet<I> > *isets_;
+ vector<I> *state2index_;
+ I index_;
+ bool error_;
+};
+
+
+// Tests reachability of final states from a given state. To test for
+// reachability from a state s, first do SetState(s). Then a final
+// state f can be reached from state s of FST iff Reach(f) is true.
+template <class A, typename I = typename A::StateId>
+class StateReachable {
+ public:
+ typedef A Arc;
+ typedef I Index;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename IntervalSet<I>::Interval Interval;
+
+ StateReachable(const Fst<A> &fst)
+ : error_(false) {
+ IntervalReachVisitor<Arc> reach_visitor(fst, &isets_, &state2index_);
+ DfsVisit(fst, &reach_visitor);
+ if (reach_visitor.Error()) error_ = true;
+ }
+
+ StateReachable(const StateReachable<A> &reachable) {
+ FSTERROR() << "Copy constructor for state reachable class "
+ << "not yet implemented.";
+ error_ = true;
+ }
+
+ // Set current state.
+ void SetState(StateId s) { s_ = s; }
+
+ // Can reach this label from current state?
+ bool Reach(StateId s) {
+ if (s >= state2index_.size())
+ return false;
+
+ I i = state2index_[s];
+ if (i < 0) {
+ FSTERROR() << "StateReachable: state non-final: " << s;
+ error_ = true;
+ return false;
+ }
+ return isets_[s_].Member(i);
+ }
+
+ // Access to the state-to-index mapping. Unassigned states have index -1.
+ vector<I> &State2Index() { return state2index_; }
+
+ // Access to the interval sets. These specify the reachability
+ // to the final states as intervals of the final state indices.
+ const vector< IntervalSet<I> > &IntervalSets() { return isets_; }
+
+ bool Error() const { return error_; }
+
+ private:
+ StateId s_; // Current state
+ vector< IntervalSet<I> > isets_; // Interval sets per state
+ vector<I> state2index_; // Finds index for a final state
+ bool error_;
+
+ void operator=(const StateReachable<A> &); // Disallow
+};
+
+} // namespace fst
+
+#endif // FST_LIB_STATE_REACHABLE_H__
diff --git a/src/include/fst/state-table.h b/src/include/fst/state-table.h
new file mode 100644
index 0000000..7d863a0
--- /dev/null
+++ b/src/include/fst/state-table.h
@@ -0,0 +1,469 @@
+// state-table.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Classes for representing the mapping between state tuples and state Ids.
+
+#ifndef FST_LIB_STATE_TABLE_H__
+#define FST_LIB_STATE_TABLE_H__
+
+#include <deque>
+#include <vector>
+using std::vector;
+
+#include <fst/bi-table.h>
+#include <fst/expanded-fst.h>
+
+
+namespace fst {
+
+// STATE TABLES - these determine the bijective mapping between state
+// tuples (e.g. in composition triples of two FST states and a
+// composition filter state) and their corresponding state IDs.
+// They are classes, templated on state tuples, of the form:
+//
+// template <class T>
+// class StateTable {
+// public:
+// typedef typename T StateTuple;
+//
+// // Required constructors.
+// StateTable();
+//
+// // Lookup state ID by tuple. If it doesn't exist, then add it.
+// StateId FindState(const StateTuple &);
+// // Lookup state tuple by state ID.
+// const StateTuple<StateId> &Tuple(StateId) const;
+// // # of stored tuples.
+// StateId Size() const;
+// };
+//
+// A state tuple has the form:
+//
+// template <class S>
+// struct StateTuple {
+// typedef typename S StateId;
+//
+// // Required constructor.
+// StateTuple();
+// };
+
+
+// An implementation using a hash map for the tuple to state ID mapping.
+// The state tuple T must have == defined and the default constructor
+// must produce a tuple that will never be seen. H is the hash function.
+template <class T, class H>
+class HashStateTable : public HashBiTable<typename T::StateId, T, H> {
+ public:
+ typedef T StateTuple;
+ typedef typename StateTuple::StateId StateId;
+ using HashBiTable<StateId, T, H>::FindId;
+ using HashBiTable<StateId, T, H>::FindEntry;
+ using HashBiTable<StateId, T, H>::Size;
+
+ HashStateTable() : HashBiTable<StateId, T, H>() {}
+ StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
+ const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
+};
+
+
+// An implementation using a hash set for the tuple to state ID
+// mapping. The state tuple T must have == defined and the default
+// constructor must produce a tuple that will never be seen. H is the
+// hash function.
+template <class T, class H>
+class CompactHashStateTable
+ : public CompactHashBiTable<typename T::StateId, T, H> {
+ public:
+ typedef T StateTuple;
+ typedef typename StateTuple::StateId StateId;
+ using CompactHashBiTable<StateId, T, H>::FindId;
+ using CompactHashBiTable<StateId, T, H>::FindEntry;
+ using CompactHashBiTable<StateId, T, H>::Size;
+
+ CompactHashStateTable() : CompactHashBiTable<StateId, T, H>() {}
+
+ // Reserves space for table_size elements.
+ explicit CompactHashStateTable(size_t table_size)
+ : CompactHashBiTable<StateId, T, H>(table_size) {}
+
+ StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
+ const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
+};
+
+// An implementation using a vector for the tuple to state mapping.
+// It is passed a function object FP that should fingerprint tuples
+// uniquely to an integer that can used as a vector index. Normally,
+// VectorStateTable constructs the FP object. The user can instead
+// pass in this object; in that case, VectorStateTable takes its
+// ownership.
+template <class T, class FP>
+class VectorStateTable
+ : public VectorBiTable<typename T::StateId, T, FP> {
+ public:
+ typedef T StateTuple;
+ typedef typename StateTuple::StateId StateId;
+ using VectorBiTable<StateId, T, FP>::FindId;
+ using VectorBiTable<StateId, T, FP>::FindEntry;
+ using VectorBiTable<StateId, T, FP>::Size;
+ using VectorBiTable<StateId, T, FP>::Fingerprint;
+
+ explicit VectorStateTable(FP *fp = 0) : VectorBiTable<StateId, T, FP>(fp) {}
+ StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
+ const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
+};
+
+
+// An implementation using a vector and a compact hash table. The
+// selecting functor S returns true for tuples to be hashed in the
+// vector. The fingerprinting functor FP returns a unique fingerprint
+// for each tuple to be hashed in the vector (these need to be
+// suitable for indexing in a vector). The hash functor H is used when
+// hashing tuple into the compact hash table.
+template <class T, class S, class FP, class H>
+class VectorHashStateTable
+ : public VectorHashBiTable<typename T::StateId, T, S, FP, H> {
+ public:
+ typedef T StateTuple;
+ typedef typename StateTuple::StateId StateId;
+ using VectorHashBiTable<StateId, T, S, FP, H>::FindId;
+ using VectorHashBiTable<StateId, T, S, FP, H>::FindEntry;
+ using VectorHashBiTable<StateId, T, S, FP, H>::Size;
+ using VectorHashBiTable<StateId, T, S, FP, H>::Selector;
+ using VectorHashBiTable<StateId, T, S, FP, H>::Fingerprint;
+ using VectorHashBiTable<StateId, T, S, FP, H>::Hash;
+
+ VectorHashStateTable(S *s, FP *fp, H *h,
+ size_t vector_size = 0,
+ size_t tuple_size = 0)
+ : VectorHashBiTable<StateId, T, S, FP, H>(
+ s, fp, h, vector_size, tuple_size) {}
+
+ StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
+ const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
+};
+
+
+// An implementation using a hash map for the tuple to state ID
+// mapping. This version permits erasing of states. The state tuple T
+// must have == defined and its default constructor must produce a
+// tuple that will never be seen. F is the hash function.
+template <class T, class F>
+class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, F> {
+ public:
+ typedef T StateTuple;
+ typedef typename StateTuple::StateId StateId;
+ using ErasableBiTable<StateId, T, F>::FindId;
+ using ErasableBiTable<StateId, T, F>::FindEntry;
+ using ErasableBiTable<StateId, T, F>::Size;
+ using ErasableBiTable<StateId, T, F>::Erase;
+
+ ErasableStateTable() : ErasableBiTable<StateId, T, F>() {}
+ StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
+ const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
+};
+
+//
+// COMPOSITION STATE TUPLES AND TABLES
+//
+// The composition state table has the form:
+//
+// template <class A, class F>
+// class ComposeStateTable {
+// public:
+// typedef A Arc;
+// typedef F FilterState;
+// typedef typename A::StateId StateId;
+// typedef ComposeStateTuple<StateId> StateTuple;
+//
+// // Required constructors. Copy constructor does not copy state.
+// ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2);
+// ComposeStateTable(const ComposeStateTable<A, F> &table);
+// // Lookup state ID by tuple. If it doesn't exist, then add it.
+// StateId FindState(const StateTuple &);
+// // Lookup state tuple by state ID.
+// const StateTuple<StateId> &Tuple(StateId) const;
+// // # of stored tuples.
+// StateId Size() const;
+// // Return true if error encountered
+// bool Error() const;
+// };
+
+// Represents the composition state.
+template <typename S, typename F>
+struct ComposeStateTuple {
+ typedef S StateId;
+ typedef F FilterState;
+
+ ComposeStateTuple()
+ : state_id1(kNoStateId), state_id2(kNoStateId),
+ filter_state(FilterState::NoState()) {}
+
+ ComposeStateTuple(StateId s1, StateId s2, const FilterState &f)
+ : state_id1(s1), state_id2(s2), filter_state(f) {}
+
+ StateId state_id1; // State Id on fst1
+ StateId state_id2; // State Id on fst2
+ FilterState filter_state; // State of composition filter
+};
+
+// Equality of composition state tuples.
+template <typename S, typename F>
+inline bool operator==(const ComposeStateTuple<S, F>& x,
+ const ComposeStateTuple<S, F>& y) {
+ if (&x == &y)
+ return true;
+ return x.state_id1 == y.state_id1 &&
+ x.state_id2 == y.state_id2 &&
+ x.filter_state == y.filter_state;
+}
+
+
+// Hashing of composition state tuples.
+template <typename S, typename F>
+class ComposeHash {
+ public:
+ size_t operator()(const ComposeStateTuple<S, F>& t) const {
+ return t.state_id1 + t.state_id2 * kPrime0 +
+ t.filter_state.Hash() * kPrime1;
+ }
+ private:
+ static const size_t kPrime0;
+ static const size_t kPrime1;
+};
+
+template <typename S, typename F>
+const size_t ComposeHash<S, F>::kPrime0 = 7853;
+
+template <typename S, typename F>
+const size_t ComposeHash<S, F>::kPrime1 = 7867;
+
+
+// A HashStateTable over composition tuples.
+template <typename A,
+ typename F,
+ typename H =
+ CompactHashStateTable<ComposeStateTuple<typename A::StateId, F>,
+ ComposeHash<typename A::StateId, F> > >
+class GenericComposeStateTable : public H {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<StateId, F> StateTuple;
+
+ GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {}
+
+ GenericComposeStateTable(const GenericComposeStateTable<A, F> &table) {}
+
+ bool Error() const { return false; }
+
+ private:
+ void operator=(const GenericComposeStateTable<A, F> &table); // disallow
+};
+
+
+// Fingerprint for general composition tuples.
+template <typename S, typename F>
+class ComposeFingerprint {
+ public:
+ typedef S StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<S, F> StateTuple;
+
+ // Required but suboptimal constructor.
+ ComposeFingerprint() : mult1_(8192), mult2_(8192) {
+ LOG(WARNING) << "TupleFingerprint: # of FST states should be provided.";
+ }
+
+ // Constructor is provided the sizes of the input FSTs
+ ComposeFingerprint(StateId nstates1, StateId nstates2)
+ : mult1_(nstates1), mult2_(nstates1 * nstates2) { }
+
+ size_t operator()(const StateTuple &tuple) {
+ return tuple.state_id1 + tuple.state_id2 * mult1_ +
+ tuple.filter_state.Hash() * mult2_;
+ }
+
+ private:
+ ssize_t mult1_;
+ ssize_t mult2_;
+};
+
+
+// Useful when the first composition state determines the tuple.
+template <typename S, typename F>
+class ComposeState1Fingerprint {
+ public:
+ typedef S StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<S, F> StateTuple;
+
+ size_t operator()(const StateTuple &tuple) { return tuple.state_id1; }
+};
+
+
+// Useful when the second composition state determines the tuple.
+template <typename S, typename F>
+class ComposeState2Fingerprint {
+ public:
+ typedef S StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<S, F> StateTuple;
+
+ size_t operator()(const StateTuple &tuple) { return tuple.state_id2; }
+};
+
+
+// A VectorStateTable over composition tuples. This can be used when
+// the product of number of states in FST1 and FST2 (and the
+// composition filter state hash) is manageable. If the FSTs are not
+// expanded Fsts, they will first have their states counted.
+template <typename A, typename F>
+class ProductComposeStateTable : public
+VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
+ ComposeFingerprint<typename A::StateId, F> > {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<StateId, F> StateTuple;
+
+ ProductComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
+ : VectorStateTable<ComposeStateTuple<StateId, F>,
+ ComposeFingerprint<StateId, F> >
+ (new ComposeFingerprint<StateId, F>(CountStates(fst1),
+ CountStates(fst2))) { }
+
+ ProductComposeStateTable(const ProductComposeStateTable<A, F> &table)
+ : VectorStateTable<ComposeStateTuple<StateId, F>,
+ ComposeFingerprint<StateId, F> >
+ (new ComposeFingerprint<StateId, F>(table.Fingerprint())) {}
+
+ bool Error() const { return false; }
+
+ private:
+ void operator=(const ProductComposeStateTable<A, F> &table); // disallow
+};
+
+// A VectorStateTable over composition tuples. This can be used when
+// FST1 is a string (satisfies kStringProperties) and FST2 is
+// epsilon-free and deterministic. It should be used with a
+// composition filter that creates at most one filter state per tuple
+// under these conditions (e.g. SequenceComposeFilter or
+// MatchComposeFilter).
+template <typename A, typename F>
+class StringDetComposeStateTable : public
+VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
+ ComposeState1Fingerprint<typename A::StateId, F> > {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<StateId, F> StateTuple;
+
+ StringDetComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
+ : error_(false) {
+ uint64 props1 = kString;
+ uint64 props2 = kIDeterministic | kNoIEpsilons;
+ if (fst1.Properties(props1, true) != props1 ||
+ fst2.Properties(props2, true) != props2) {
+ FSTERROR() << "StringDetComposeStateTable: fst1 not a string or"
+ << " fst2 not input deterministic and epsilon-free";
+ error_ = true;
+ }
+ }
+
+ StringDetComposeStateTable(const StringDetComposeStateTable<A, F> &table)
+ : error_(table.error_) {}
+
+ bool Error() const { return error_; }
+
+ private:
+ bool error_;
+
+ void operator=(const StringDetComposeStateTable<A, F> &table); // disallow
+};
+
+
+// A VectorStateTable over composition tuples. This can be used when
+// FST2 is a string (satisfies kStringProperties) and FST1 is
+// epsilon-free and deterministic. It should be used with a
+// composition filter that creates at most one filter state per tuple
+// under these conditions (e.g. SequenceComposeFilter or
+// MatchComposeFilter).
+template <typename A, typename F>
+class DetStringComposeStateTable : public
+VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
+ ComposeState1Fingerprint<typename A::StateId, F> > {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<StateId, F> StateTuple;
+
+ DetStringComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
+ :error_(false) {
+ uint64 props1 = kODeterministic | kNoOEpsilons;
+ uint64 props2 = kString;
+ if (fst1.Properties(props1, true) != props1 ||
+ fst2.Properties(props2, true) != props2) {
+ FSTERROR() << "StringDetComposeStateTable: fst2 not a string or"
+ << " fst1 not output deterministic and epsilon-free";
+ error_ = true;
+ }
+ }
+
+ DetStringComposeStateTable(const DetStringComposeStateTable<A, F> &table)
+ : error_(table.error_) {}
+
+ bool Error() const { return error_; }
+
+ private:
+ bool error_;
+
+ void operator=(const DetStringComposeStateTable<A, F> &table); // disallow
+};
+
+
+// An ErasableStateTable over composition tuples. The Erase(StateId) method
+// can be called if the user either is sure that composition will never return
+// to that tuple or doesn't care that if it does, it is assigned a new
+// state ID.
+template <typename A, typename F>
+class ErasableComposeStateTable : public
+ErasableStateTable<ComposeStateTuple<typename A::StateId, F>,
+ ComposeHash<typename A::StateId, F> > {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<StateId, F> StateTuple;
+
+ ErasableComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {}
+
+ ErasableComposeStateTable(const ErasableComposeStateTable<A, F> &table) {}
+
+ bool Error() const { return false; }
+
+ private:
+ void operator=(const ErasableComposeStateTable<A, F> &table); // disallow
+};
+
+} // namespace fst
+
+#endif // FST_LIB_STATE_TABLE_H__
diff --git a/src/include/fst/statesort.h b/src/include/fst/statesort.h
new file mode 100644
index 0000000..6f827f4
--- /dev/null
+++ b/src/include/fst/statesort.h
@@ -0,0 +1,97 @@
+// statesort.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Function to sort states of an Fst.
+
+#ifndef FST_LIB_STATESORT_H__
+#define FST_LIB_STATESORT_H__
+
+#include <vector>
+using std::vector;
+#include <algorithm>
+
+#include <fst/mutable-fst.h>
+
+
+namespace fst {
+
+// Sorts the input states of an FST, modifying it. ORDER[i] gives the
+// the state Id after sorting that corresponds to state Id i before
+// sorting. ORDER must be a permutation of FST's states ID sequence:
+// (0, 1, 2, ..., fst->NumStates() - 1).
+template <class Arc>
+void StateSort(MutableFst<Arc> *fst,
+ const vector<typename Arc::StateId> &order) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ if (order.size() != fst->NumStates()) {
+ FSTERROR() << "StateSort: bad order vector size: " << order.size();
+ fst->SetProperties(kError, kError);
+ return;
+ }
+
+ if (fst->Start() == kNoStateId)
+ return;
+
+ uint64 props = fst->Properties(kStateSortProperties, false);
+
+ vector<bool> done(order.size(), false);
+ vector<Arc> arcsa, arcsb;
+ vector<Arc> *arcs1 = &arcsa, *arcs2 = &arcsb;
+
+ fst->SetStart(order[fst->Start()]);
+
+ for (StateIterator< MutableFst<Arc> > siter(*fst);
+ !siter.Done();
+ siter.Next()) {
+ StateId s1 = siter.Value(), s2;
+ if (done[s1])
+ continue;
+ Weight final1 = fst->Final(s1), final2 = Weight::Zero();
+ arcs1->clear();
+ for (ArcIterator< MutableFst<Arc> > aiter(*fst, s1);
+ !aiter.Done();
+ aiter.Next())
+ arcs1->push_back(aiter.Value());
+ for (; !done[s1]; s1 = s2, final1 = final2, swap(arcs1, arcs2)) {
+ s2 = order[s1];
+ if (!done[s2]) {
+ final2 = fst->Final(s2);
+ arcs2->clear();
+ for (ArcIterator< MutableFst<Arc> > aiter(*fst, s2);
+ !aiter.Done();
+ aiter.Next())
+ arcs2->push_back(aiter.Value());
+ }
+ fst->SetFinal(s2, final1);
+ fst->DeleteArcs(s2);
+ for (size_t i = 0; i < arcs1->size(); ++i) {
+ Arc arc = (*arcs1)[i];
+ arc.nextstate = order[arc.nextstate];
+ fst->AddArc(s2, arc);
+ }
+ done[s1] = true;
+ }
+ }
+ fst->SetProperties(props, kFstProperties);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_STATESORT_H__
diff --git a/src/include/fst/string-weight.h b/src/include/fst/string-weight.h
new file mode 100644
index 0000000..1beeb33
--- /dev/null
+++ b/src/include/fst/string-weight.h
@@ -0,0 +1,560 @@
+// string-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// String weight set and associated semiring operation definitions.
+
+#ifndef FST_LIB_STRING_WEIGHT_H__
+#define FST_LIB_STRING_WEIGHT_H__
+
+#include <list>
+#include <string>
+
+#include <fst/product-weight.h>
+#include <fst/weight.h>
+
+namespace fst {
+
+const int kStringInfinity = -1; // Label for the infinite string
+const int kStringBad = -2; // Label for a non-string
+const char kStringSeparator = '_'; // Label separator in strings
+
+// Determines whether to use left or right string semiring. Includes
+// restricted versions that signal an error if proper prefixes
+// (suffixes) would otherwise be returned by Plus, useful with various
+// algorithms that require functional transducer input with the
+// string semirings.
+enum StringType { STRING_LEFT = 0, STRING_RIGHT = 1 ,
+ STRING_LEFT_RESTRICT = 2, STRING_RIGHT_RESTRICT };
+
+#define REVERSE_STRING_TYPE(S) \
+ ((S) == STRING_LEFT ? STRING_RIGHT : \
+ ((S) == STRING_RIGHT ? STRING_LEFT : \
+ ((S) == STRING_LEFT_RESTRICT ? STRING_RIGHT_RESTRICT : \
+ STRING_LEFT_RESTRICT)))
+
+template <typename L, StringType S = STRING_LEFT>
+class StringWeight;
+
+template <typename L, StringType S = STRING_LEFT>
+class StringWeightIterator;
+
+template <typename L, StringType S = STRING_LEFT>
+class StringWeightReverseIterator;
+
+template <typename L, StringType S>
+bool operator==(const StringWeight<L, S> &, const StringWeight<L, S> &);
+
+
+// String semiring: (longest_common_prefix/suffix, ., Infinity, Epsilon)
+template <typename L, StringType S>
+class StringWeight {
+ public:
+ typedef L Label;
+ typedef StringWeight<L, REVERSE_STRING_TYPE(S)> ReverseWeight;
+
+ friend class StringWeightIterator<L, S>;
+ friend class StringWeightReverseIterator<L, S>;
+ friend bool operator==<>(const StringWeight<L, S> &,
+ const StringWeight<L, S> &);
+
+ StringWeight() { Init(); }
+
+ template <typename Iter>
+ StringWeight(const Iter &begin, const Iter &end) {
+ Init();
+ for (Iter iter = begin; iter != end; ++iter)
+ PushBack(*iter);
+ }
+
+ explicit StringWeight(L l) { Init(); PushBack(l); }
+
+ static const StringWeight<L, S> &Zero() {
+ static const StringWeight<L, S> zero(kStringInfinity);
+ return zero;
+ }
+
+ static const StringWeight<L, S> &One() {
+ static const StringWeight<L, S> one;
+ return one;
+ }
+
+ static const StringWeight<L, S> &NoWeight() {
+ static const StringWeight<L, S> no_weight(kStringBad);
+ return no_weight;
+ }
+
+ static const string &Type() {
+ static const string type =
+ S == STRING_LEFT ? "string" :
+ (S == STRING_RIGHT ? "right_string" :
+ (S == STRING_LEFT_RESTRICT ? "restricted_string" :
+ "right_restricted_string"));
+ return type;
+ }
+
+ bool Member() const;
+
+ istream &Read(istream &strm);
+
+ ostream &Write(ostream &strm) const;
+
+ size_t Hash() const;
+
+ StringWeight<L, S> Quantize(float delta = kDelta) const {
+ return *this;
+ }
+
+ ReverseWeight Reverse() const;
+
+ static uint64 Properties() {
+ return (S == STRING_LEFT || S == STRING_LEFT_RESTRICT ?
+ kLeftSemiring : kRightSemiring) | kIdempotent;
+ }
+
+ // NB: This needs to be uncommented only if default fails for this impl.
+ // StringWeight<L, S> &operator=(const StringWeight<L, S> &w);
+
+ // These operations combined with the StringWeightIterator and
+ // StringWeightReverseIterator provide the access and mutation of
+ // the string internal elements.
+
+ // Common initializer among constructors.
+ void Init() { first_ = 0; }
+
+ // Clear existing StringWeight.
+ void Clear() { first_ = 0; rest_.clear(); }
+
+ size_t Size() const { return first_ ? rest_.size() + 1 : 0; }
+
+ void PushFront(L l) {
+ if (first_)
+ rest_.push_front(first_);
+ first_ = l;
+ }
+
+ void PushBack(L l) {
+ if (!first_)
+ first_ = l;
+ else
+ rest_.push_back(l);
+ }
+
+ private:
+ L first_; // first label in string (0 if empty)
+ list<L> rest_; // remaining labels in string
+};
+
+
+// Traverses string in forward direction.
+template <typename L, StringType S>
+class StringWeightIterator {
+ public:
+ explicit StringWeightIterator(const StringWeight<L, S>& w)
+ : first_(w.first_), rest_(w.rest_), init_(true),
+ iter_(rest_.begin()) {}
+
+ bool Done() const {
+ if (init_) return first_ == 0;
+ else return iter_ == rest_.end();
+ }
+
+ const L& Value() const { return init_ ? first_ : *iter_; }
+
+ void Next() {
+ if (init_) init_ = false;
+ else ++iter_;
+ }
+
+ void Reset() {
+ init_ = true;
+ iter_ = rest_.begin();
+ }
+
+ private:
+ const L &first_;
+ const list<L> &rest_;
+ bool init_; // in the initialized state?
+ typename list<L>::const_iterator iter_;
+
+ DISALLOW_COPY_AND_ASSIGN(StringWeightIterator);
+};
+
+
+// Traverses string in backward direction.
+template <typename L, StringType S>
+class StringWeightReverseIterator {
+ public:
+ explicit StringWeightReverseIterator(const StringWeight<L, S>& w)
+ : first_(w.first_), rest_(w.rest_), fin_(first_ == 0),
+ iter_(rest_.rbegin()) {}
+
+ bool Done() const { return fin_; }
+
+ const L& Value() const { return iter_ == rest_.rend() ? first_ : *iter_; }
+
+ void Next() {
+ if (iter_ == rest_.rend()) fin_ = true;
+ else ++iter_;
+ }
+
+ void Reset() {
+ fin_ = false;
+ iter_ = rest_.rbegin();
+ }
+
+ private:
+ const L &first_;
+ const list<L> &rest_;
+ bool fin_; // in the final state?
+ typename list<L>::const_reverse_iterator iter_;
+
+ DISALLOW_COPY_AND_ASSIGN(StringWeightReverseIterator);
+};
+
+
+// StringWeight member functions follow that require
+// StringWeightIterator or StringWeightReverseIterator.
+
+template <typename L, StringType S>
+inline istream &StringWeight<L, S>::Read(istream &strm) {
+ Clear();
+ int32 size;
+ ReadType(strm, &size);
+ for (int i = 0; i < size; ++i) {
+ L label;
+ ReadType(strm, &label);
+ PushBack(label);
+ }
+ return strm;
+}
+
+template <typename L, StringType S>
+inline ostream &StringWeight<L, S>::Write(ostream &strm) const {
+ int32 size = Size();
+ WriteType(strm, size);
+ for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next()) {
+ L label = iter.Value();
+ WriteType(strm, label);
+ }
+ return strm;
+}
+
+template <typename L, StringType S>
+inline bool StringWeight<L, S>::Member() const {
+ if (Size() != 1)
+ return true;
+ StringWeightIterator<L, S> iter(*this);
+ return iter.Value() != kStringBad;
+}
+
+template <typename L, StringType S>
+inline typename StringWeight<L, S>::ReverseWeight
+StringWeight<L, S>::Reverse() const {
+ ReverseWeight rw;
+ for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next())
+ rw.PushFront(iter.Value());
+ return rw;
+}
+
+template <typename L, StringType S>
+inline size_t StringWeight<L, S>::Hash() const {
+ size_t h = 0;
+ for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next())
+ h ^= h<<1 ^ iter.Value();
+ return h;
+}
+
+// NB: This needs to be uncommented only if default fails for this the impl.
+//
+// template <typename L, StringType S>
+// inline StringWeight<L, S>
+// &StringWeight<L, S>::operator=(const StringWeight<L, S> &w) {
+// if (this != &w) {
+// Clear();
+// for (StringWeightIterator<L, S> iter(w); !iter.Done(); iter.Next())
+// PushBack(iter.Value());
+// }
+// return *this;
+// }
+
+template <typename L, StringType S>
+inline bool operator==(const StringWeight<L, S> &w1,
+ const StringWeight<L, S> &w2) {
+ if (w1.Size() != w2.Size())
+ return false;
+
+ StringWeightIterator<L, S> iter1(w1);
+ StringWeightIterator<L, S> iter2(w2);
+
+ for (; !iter1.Done() ; iter1.Next(), iter2.Next())
+ if (iter1.Value() != iter2.Value())
+ return false;
+
+ return true;
+}
+
+template <typename L, StringType S>
+inline bool operator!=(const StringWeight<L, S> &w1,
+ const StringWeight<L, S> &w2) {
+ return !(w1 == w2);
+}
+
+template <typename L, StringType S>
+inline bool ApproxEqual(const StringWeight<L, S> &w1,
+ const StringWeight<L, S> &w2,
+ float delta = kDelta) {
+ return w1 == w2;
+}
+
+template <typename L, StringType S>
+inline ostream &operator<<(ostream &strm, const StringWeight<L, S> &w) {
+ StringWeightIterator<L, S> iter(w);
+ if (iter.Done())
+ return strm << "Epsilon";
+ else if (iter.Value() == kStringInfinity)
+ return strm << "Infinity";
+ else if (iter.Value() == kStringBad)
+ return strm << "BadString";
+ else
+ for (size_t i = 0; !iter.Done(); ++i, iter.Next()) {
+ if (i > 0)
+ strm << kStringSeparator;
+ strm << iter.Value();
+ }
+ return strm;
+}
+
+template <typename L, StringType S>
+inline istream &operator>>(istream &strm, StringWeight<L, S> &w) {
+ string s;
+ strm >> s;
+ if (s == "Infinity") {
+ w = StringWeight<L, S>::Zero();
+ } else if (s == "Epsilon") {
+ w = StringWeight<L, S>::One();
+ } else {
+ w.Clear();
+ char *p = 0;
+ for (const char *cs = s.c_str(); !p || *p != '\0'; cs = p + 1) {
+ int l = strtoll(cs, &p, 10);
+ if (p == cs || (*p != 0 && *p != kStringSeparator)) {
+ strm.clear(std::ios::badbit);
+ break;
+ }
+ w.PushBack(l);
+ }
+ }
+ return strm;
+}
+
+
+// Default is for the restricted left and right semirings. String
+// equality is required (for non-Zero() input. This restriction
+// is used in e.g. Determinize to ensure functional input.
+template <typename L, StringType S> inline StringWeight<L, S>
+Plus(const StringWeight<L, S> &w1,
+ const StringWeight<L, S> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return StringWeight<L, S>::NoWeight();
+ if (w1 == StringWeight<L, S>::Zero())
+ return w2;
+ if (w2 == StringWeight<L, S>::Zero())
+ return w1;
+
+ if (w1 != w2) {
+ FSTERROR() << "StringWeight::Plus: unequal arguments "
+ << "(non-functional FST?)"
+ << " w1 = " << w1
+ << " w2 = " << w2;
+ return StringWeight<L, S>::NoWeight();
+ }
+
+ return w1;
+}
+
+
+// Longest common prefix for left string semiring.
+template <typename L> inline StringWeight<L, STRING_LEFT>
+Plus(const StringWeight<L, STRING_LEFT> &w1,
+ const StringWeight<L, STRING_LEFT> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return StringWeight<L, STRING_LEFT>::NoWeight();
+ if (w1 == StringWeight<L, STRING_LEFT>::Zero())
+ return w2;
+ if (w2 == StringWeight<L, STRING_LEFT>::Zero())
+ return w1;
+
+ StringWeight<L, STRING_LEFT> sum;
+ StringWeightIterator<L, STRING_LEFT> iter1(w1);
+ StringWeightIterator<L, STRING_LEFT> iter2(w2);
+ for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value();
+ iter1.Next(), iter2.Next())
+ sum.PushBack(iter1.Value());
+ return sum;
+}
+
+
+// Longest common suffix for right string semiring.
+template <typename L> inline StringWeight<L, STRING_RIGHT>
+Plus(const StringWeight<L, STRING_RIGHT> &w1,
+ const StringWeight<L, STRING_RIGHT> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return StringWeight<L, STRING_RIGHT>::NoWeight();
+ if (w1 == StringWeight<L, STRING_RIGHT>::Zero())
+ return w2;
+ if (w2 == StringWeight<L, STRING_RIGHT>::Zero())
+ return w1;
+
+ StringWeight<L, STRING_RIGHT> sum;
+ StringWeightReverseIterator<L, STRING_RIGHT> iter1(w1);
+ StringWeightReverseIterator<L, STRING_RIGHT> iter2(w2);
+ for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value();
+ iter1.Next(), iter2.Next())
+ sum.PushFront(iter1.Value());
+ return sum;
+}
+
+
+template <typename L, StringType S>
+inline StringWeight<L, S> Times(const StringWeight<L, S> &w1,
+ const StringWeight<L, S> &w2) {
+ if (!w1.Member() || !w2.Member())
+ return StringWeight<L, S>::NoWeight();
+ if (w1 == StringWeight<L, S>::Zero() || w2 == StringWeight<L, S>::Zero())
+ return StringWeight<L, S>::Zero();
+
+ StringWeight<L, S> prod(w1);
+ for (StringWeightIterator<L, S> iter(w2); !iter.Done(); iter.Next())
+ prod.PushBack(iter.Value());
+
+ return prod;
+}
+
+
+// Default is for left division in the left string and the
+// left restricted string semirings.
+template <typename L, StringType S> inline StringWeight<L, S>
+Divide(const StringWeight<L, S> &w1,
+ const StringWeight<L, S> &w2,
+ DivideType typ) {
+
+ if (typ != DIVIDE_LEFT) {
+ FSTERROR() << "StringWeight::Divide: only left division is defined "
+ << "for the " << StringWeight<L, S>::Type() << " semiring";
+ return StringWeight<L, S>::NoWeight();
+ }
+
+ if (!w1.Member() || !w2.Member())
+ return StringWeight<L, S>::NoWeight();
+
+ if (w2 == StringWeight<L, S>::Zero())
+ return StringWeight<L, S>(kStringBad);
+ else if (w1 == StringWeight<L, S>::Zero())
+ return StringWeight<L, S>::Zero();
+
+ StringWeight<L, S> div;
+ StringWeightIterator<L, S> iter(w1);
+ for (int i = 0; !iter.Done(); iter.Next(), ++i) {
+ if (i >= w2.Size())
+ div.PushBack(iter.Value());
+ }
+ return div;
+}
+
+
+// Right division in the right string semiring.
+template <typename L> inline StringWeight<L, STRING_RIGHT>
+Divide(const StringWeight<L, STRING_RIGHT> &w1,
+ const StringWeight<L, STRING_RIGHT> &w2,
+ DivideType typ) {
+
+ if (typ != DIVIDE_RIGHT) {
+ FSTERROR() << "StringWeight::Divide: only right division is defined "
+ << "for the right string semiring";
+ return StringWeight<L, STRING_RIGHT>::NoWeight();
+ }
+
+ if (!w1.Member() || !w2.Member())
+ return StringWeight<L, STRING_RIGHT>::NoWeight();
+
+ if (w2 == StringWeight<L, STRING_RIGHT>::Zero())
+ return StringWeight<L, STRING_RIGHT>(kStringBad);
+ else if (w1 == StringWeight<L, STRING_RIGHT>::Zero())
+ return StringWeight<L, STRING_RIGHT>::Zero();
+
+ StringWeight<L, STRING_RIGHT> div;
+ StringWeightReverseIterator<L, STRING_RIGHT> iter(w1);
+ for (int i = 0; !iter.Done(); iter.Next(), ++i) {
+ if (i >= w2.Size())
+ div.PushFront(iter.Value());
+ }
+ return div;
+}
+
+
+// Right division in the right restricted string semiring.
+template <typename L> inline StringWeight<L, STRING_RIGHT_RESTRICT>
+Divide(const StringWeight<L, STRING_RIGHT_RESTRICT> &w1,
+ const StringWeight<L, STRING_RIGHT_RESTRICT> &w2,
+ DivideType typ) {
+
+ if (typ != DIVIDE_RIGHT) {
+ FSTERROR() << "StringWeight::Divide: only right division is defined "
+ << "for the right restricted string semiring";
+ return StringWeight<L, STRING_RIGHT_RESTRICT>::NoWeight();
+ }
+
+ if (!w1.Member() || !w2.Member())
+ return StringWeight<L, STRING_RIGHT_RESTRICT>::NoWeight();
+
+ if (w2 == StringWeight<L, STRING_RIGHT_RESTRICT>::Zero())
+ return StringWeight<L, STRING_RIGHT_RESTRICT>(kStringBad);
+ else if (w1 == StringWeight<L, STRING_RIGHT_RESTRICT>::Zero())
+ return StringWeight<L, STRING_RIGHT_RESTRICT>::Zero();
+
+ StringWeight<L, STRING_RIGHT_RESTRICT> div;
+ StringWeightReverseIterator<L, STRING_RIGHT_RESTRICT> iter(w1);
+ for (int i = 0; !iter.Done(); iter.Next(), ++i) {
+ if (i >= w2.Size())
+ div.PushFront(iter.Value());
+ }
+ return div;
+}
+
+
+// Product of string weight and an arbitray weight.
+template <class L, class W, StringType S = STRING_LEFT>
+struct GallicWeight : public ProductWeight<StringWeight<L, S>, W> {
+ typedef GallicWeight<L, typename W::ReverseWeight, REVERSE_STRING_TYPE(S)>
+ ReverseWeight;
+
+ GallicWeight() {}
+
+ GallicWeight(StringWeight<L, S> w1, W w2)
+ : ProductWeight<StringWeight<L, S>, W>(w1, w2) {}
+
+ explicit GallicWeight(const string &s, int *nread = 0)
+ : ProductWeight<StringWeight<L, S>, W>(s, nread) {}
+
+ GallicWeight(const ProductWeight<StringWeight<L, S>, W> &w)
+ : ProductWeight<StringWeight<L, S>, W>(w) {}
+};
+
+} // namespace fst
+
+#endif // FST_LIB_STRING_WEIGHT_H__
diff --git a/src/include/fst/string.h b/src/include/fst/string.h
new file mode 100644
index 0000000..3099b87
--- /dev/null
+++ b/src/include/fst/string.h
@@ -0,0 +1,247 @@
+
+// string.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Utilities to convert strings into FSTs.
+//
+
+#ifndef FST_LIB_STRING_H_
+#define FST_LIB_STRING_H_
+
+#include <fst/compact-fst.h>
+#include <fst/mutable-fst.h>
+
+DECLARE_string(fst_field_separator);
+
+namespace fst {
+
+// Functor compiling a string in an FST
+template <class A>
+class StringCompiler {
+ public:
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+
+ enum TokenType { SYMBOL = 1, BYTE = 2, UTF8 = 3 };
+
+ StringCompiler(TokenType type, const SymbolTable *syms = 0,
+ Label unknown_label = kNoLabel,
+ bool allow_negative = false)
+ : token_type_(type), syms_(syms), unknown_label_(unknown_label),
+ allow_negative_(allow_negative) {}
+
+ // Compile string 's' into FST 'fst'.
+ template <class F>
+ bool operator()(const string &s, F *fst) {
+ vector<Label> labels;
+ if (!ConvertStringToLabels(s, &labels))
+ return false;
+ Compile(labels, fst);
+ return true;
+ }
+
+ private:
+ bool ConvertStringToLabels(const string &str, vector<Label> *labels) const {
+ labels->clear();
+ if (token_type_ == BYTE) {
+ for (size_t i = 0; i < str.size(); ++i)
+ labels->push_back(static_cast<unsigned char>(str[i]));
+ } else if (token_type_ == UTF8) {
+ return UTF8StringToLabels(str, labels);
+ } else {
+ char *c_str = new char[str.size() + 1];
+ str.copy(c_str, str.size());
+ c_str[str.size()] = 0;
+ vector<char *> vec;
+ string separator = "\n" + FLAGS_fst_field_separator;
+ SplitToVector(c_str, separator.c_str(), &vec, true);
+ for (size_t i = 0; i < vec.size(); ++i) {
+ Label label;
+ if (!ConvertSymbolToLabel(vec[i], &label))
+ return false;
+ labels->push_back(label);
+ }
+ delete[] c_str;
+ }
+ return true;
+ }
+
+ void Compile(const vector<Label> &labels, MutableFst<A> *fst) const {
+ fst->DeleteStates();
+ while (fst->NumStates() <= labels.size())
+ fst->AddState();
+ for (size_t i = 0; i < labels.size(); ++i)
+ fst->AddArc(i, Arc(labels[i], labels[i], Weight::One(), i + 1));
+ fst->SetStart(0);
+ fst->SetFinal(labels.size(), Weight::One());
+ }
+
+ template <class Unsigned>
+ void Compile(const vector<Label> &labels, CompactFst<A, StringCompactor<A>,
+ Unsigned> *fst) const {
+ fst->SetCompactElements(labels.begin(), labels.end());
+ }
+
+ bool ConvertSymbolToLabel(const char *s, Label* output) const {
+ int64 n;
+ if (syms_) {
+ n = syms_->Find(s);
+ if ((n == -1) && (unknown_label_ != kNoLabel))
+ n = unknown_label_;
+ if (n == -1 || (!allow_negative_ && n < 0)) {
+ VLOG(1) << "StringCompiler::ConvertSymbolToLabel: Symbol \"" << s
+ << "\" is not mapped to any integer label, symbol table = "
+ << syms_->Name();
+ return false;
+ }
+ } else {
+ char *p;
+ n = strtoll(s, &p, 10);
+ if (p < s + strlen(s) || (!allow_negative_ && n < 0)) {
+ VLOG(1) << "StringCompiler::ConvertSymbolToLabel: Bad label integer "
+ << "= \"" << s << "\"";
+ return false;
+ }
+ }
+ *output = n;
+ return true;
+ }
+
+ TokenType token_type_; // Token type: symbol, byte or utf8 encoded
+ const SymbolTable *syms_; // Symbol table used when token type is symbol
+ Label unknown_label_; // Label for token missing from symbol table
+ bool allow_negative_; // Negative labels allowed?
+
+ DISALLOW_COPY_AND_ASSIGN(StringCompiler);
+};
+
+// Functor to print a string FST as a string.
+template <class A>
+class StringPrinter {
+ public:
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ enum TokenType { SYMBOL = 1, BYTE = 2, UTF8 = 3 };
+
+ StringPrinter(TokenType token_type,
+ const SymbolTable *syms = 0)
+ : token_type_(token_type), syms_(syms) {}
+
+ // Convert the FST 'fst' into the string 'output'
+ bool operator()(const Fst<A> &fst, string *output) {
+ bool is_a_string = FstToLabels(fst);
+ if (!is_a_string) {
+ VLOG(1) << "StringPrinter::operator(): Fst is not a string.";
+ return false;
+ }
+
+ output->clear();
+
+ if (token_type_ == SYMBOL) {
+ stringstream sstrm;
+ for (size_t i = 0; i < labels_.size(); ++i) {
+ if (i)
+ sstrm << *(FLAGS_fst_field_separator.rbegin());
+ if (!PrintLabel(labels_[i], sstrm))
+ return false;
+ }
+ *output = sstrm.str();
+ } else if (token_type_ == BYTE) {
+ for (size_t i = 0; i < labels_.size(); ++i) {
+ output->push_back(labels_[i]);
+ }
+ } else if (token_type_ == UTF8) {
+ return LabelsToUTF8String(labels_, output);
+ } else {
+ VLOG(1) << "StringPrinter::operator(): Unknown token type: "
+ << token_type_;
+ return false;
+ }
+ return true;
+ }
+
+ private:
+ bool FstToLabels(const Fst<A> &fst) {
+ labels_.clear();
+
+ StateId s = fst.Start();
+ if (s == kNoStateId) {
+ VLOG(2) << "StringPrinter::FstToLabels: Invalid starting state for "
+ << "string fst.";
+ return false;
+ }
+
+ while (fst.Final(s) == Weight::Zero()) {
+ ArcIterator<Fst<A> > aiter(fst, s);
+ if (aiter.Done()) {
+ VLOG(2) << "StringPrinter::FstToLabels: String fst traversal does "
+ << "not reach final state.";
+ return false;
+ }
+
+ const A& arc = aiter.Value();
+ labels_.push_back(arc.olabel);
+
+ s = arc.nextstate;
+ if (s == kNoStateId) {
+ VLOG(2) << "StringPrinter::FstToLabels: Transition to invalid "
+ << "state.";
+ return false;
+ }
+
+ aiter.Next();
+ if (!aiter.Done()) {
+ VLOG(2) << "StringPrinter::FstToLabels: State with multiple "
+ << "outgoing arcs found.";
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ bool PrintLabel(Label lab, ostream& ostrm) {
+ if (syms_) {
+ string symbol = syms_->Find(lab);
+ if (symbol == "") {
+ VLOG(2) << "StringPrinter::PrintLabel: Integer " << lab << " is not "
+ << "mapped to any textual symbol, symbol table = "
+ << syms_->Name();
+ return false;
+ }
+ ostrm << symbol;
+ } else {
+ ostrm << lab;
+ }
+ return true;
+ }
+
+ TokenType token_type_; // Token type: symbol, byte or utf8 encoded
+ const SymbolTable *syms_; // Symbol table used when token type is symbol
+ vector<Label> labels_; // Input FST labels.
+
+ DISALLOW_COPY_AND_ASSIGN(StringPrinter);
+};
+
+} // namespace fst
+
+#endif // FST_LIB_STRING_H_
diff --git a/src/include/fst/symbol-table-ops.h b/src/include/fst/symbol-table-ops.h
new file mode 100644
index 0000000..e46c4c2
--- /dev/null
+++ b/src/include/fst/symbol-table-ops.h
@@ -0,0 +1,91 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: sorenj@google.com (Jeffrey Sorensen)
+
+#ifndef FST_LIB_SYMBOL_TABLE_OPS_H_
+#define FST_LIB_SYMBOL_TABLE_OPS_H_
+
+#include <vector>
+using std::vector;
+#include <string>
+#include <unordered_set>
+using std::tr1::unordered_set;
+using std::tr1::unordered_multiset;
+
+
+#include <fst/fst.h>
+#include <fst/symbol-table.h>
+
+
+namespace fst {
+
+// Returns a minimal symbol table containing only symbols referenced by the
+// passed fst. Symbols preserve their original numbering, so fst does not
+// require relabeling.
+template<class Arc>
+SymbolTable *PruneSymbolTable(const Fst<Arc> &fst, const SymbolTable &syms,
+ bool input) {
+ unordered_set<typename Arc::Label> seen;
+ seen.insert(0); // Always keep epslion
+ StateIterator<Fst<Arc> > siter(fst);
+ for (; !siter.Done(); siter.Next()) {
+ ArcIterator<Fst<Arc> > aiter(fst, siter.Value());
+ for (; !aiter.Done(); aiter.Next()) {
+ typename Arc::Label sym = (input) ? aiter.Value().ilabel :
+ aiter.Value().olabel;
+ seen.insert(sym);
+ }
+ }
+ SymbolTable *pruned = new SymbolTable(syms.Name() + "_pruned");
+ for (SymbolTableIterator stiter(syms); !stiter.Done(); stiter.Next()) {
+ typename Arc::Label label = stiter.Value();
+ if (seen.find(label) != seen.end()) {
+ pruned->AddSymbol(stiter.Symbol(), stiter.Value());
+ }
+ }
+ return pruned;
+}
+
+// Relabels a symbol table to make it a contiguous mapping.
+SymbolTable *CompactSymbolTable(const SymbolTable &syms);
+
+// Merges two SymbolTables, all symbols from left will be merged into right
+// with the same ids. Symbols in right that have conflicting ids with those
+// in left will be assigned to value assigned from the left SymbolTable.
+// The returned symbol table will never modify symbol assignments from the left
+// side, but may do so on the right. If right_relabel_output is non-NULL, it
+// will be assigned true if the symbols from the right table needed to be
+// reassigned.
+// A potential use case is to Compose two Fst's that have different symbol
+// tables. You can reconcile them in the following way:
+// Fst<Arc> a, b;
+// bool relabel;
+// SymbolTable *bnew = MergeSymbolTable(a.OutputSymbols(),
+// b.InputSymbols(), &relabel);
+// if (relabel) {
+// Relabel(b, bnew, NULL);
+// }
+// b.SetInputSymbols(bnew);
+// delete bnew;
+SymbolTable *MergeSymbolTable(const SymbolTable &left, const SymbolTable &right,
+ bool *right_relabel_output = 0);
+
+// Read the symbol table from any Fst::Read()able file, without loading the
+// corresponding Fst. Returns NULL if the Fst does not contain a symbol table
+// or the symbol table cannot be read.
+SymbolTable *FstReadSymbols(const string &filename, bool input);
+
+} // namespace fst
+#endif // FST_LIB_SYMBOL_TABLE_OPS_H_
diff --git a/src/include/fst/symbol-table.h b/src/include/fst/symbol-table.h
new file mode 100644
index 0000000..93ebe76
--- /dev/null
+++ b/src/include/fst/symbol-table.h
@@ -0,0 +1,507 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// All Rights Reserved.
+//
+// Author : Johan Schalkwyk
+//
+// \file
+// Classes to provide symbol-to-integer and integer-to-symbol mappings.
+
+#ifndef FST_LIB_SYMBOL_TABLE_H__
+#define FST_LIB_SYMBOL_TABLE_H__
+
+#include <cstring>
+#include <string>
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+
+#include <fst/compat.h>
+#include <iostream>
+#include <fstream>
+
+
+#include <map>
+
+DECLARE_bool(fst_compat_symbols);
+
+namespace fst {
+
+// WARNING: Reading via symbol table read options should
+// not be used. This is a temporary work around for
+// reading symbol ranges of previously stored symbol sets.
+struct SymbolTableReadOptions {
+ SymbolTableReadOptions() { }
+
+ SymbolTableReadOptions(vector<pair<int64, int64> > string_hash_ranges_,
+ const string& source_)
+ : string_hash_ranges(string_hash_ranges_),
+ source(source_) { }
+
+ vector<pair<int64, int64> > string_hash_ranges;
+ string source;
+};
+
+class SymbolTableImpl {
+ public:
+ SymbolTableImpl(const string &name)
+ : name_(name),
+ available_key_(0),
+ dense_key_limit_(0),
+ check_sum_finalized_(false) {}
+
+ explicit SymbolTableImpl(const SymbolTableImpl& impl)
+ : name_(impl.name_),
+ available_key_(0),
+ dense_key_limit_(0),
+ check_sum_finalized_(false) {
+ for (size_t i = 0; i < impl.symbols_.size(); ++i) {
+ AddSymbol(impl.symbols_[i], impl.Find(impl.symbols_[i]));
+ }
+ }
+
+ ~SymbolTableImpl() {
+ for (size_t i = 0; i < symbols_.size(); ++i)
+ delete[] symbols_[i];
+ }
+
+ // TODO(johans): Add flag to specify whether the symbol
+ // should be indexed as string or int or both.
+ int64 AddSymbol(const string& symbol, int64 key);
+
+ int64 AddSymbol(const string& symbol) {
+ int64 key = Find(symbol);
+ return (key == -1) ? AddSymbol(symbol, available_key_++) : key;
+ }
+
+ static SymbolTableImpl* ReadText(istream &strm,
+ const string &name,
+ bool allow_negative = false);
+
+ static SymbolTableImpl* Read(istream &strm,
+ const SymbolTableReadOptions& opts);
+
+ bool Write(ostream &strm) const;
+
+ //
+ // Return the string associated with the key. If the key is out of
+ // range (<0, >max), return an empty string.
+ string Find(int64 key) const {
+ if (key >=0 && key < dense_key_limit_)
+ return string(symbols_[key]);
+
+ map<int64, const char*>::const_iterator it =
+ key_map_.find(key);
+ if (it == key_map_.end()) {
+ return "";
+ }
+ return string(it->second);
+ }
+
+ //
+ // Return the key associated with the symbol. If the symbol
+ // does not exists, return SymbolTable::kNoSymbol.
+ int64 Find(const string& symbol) const {
+ return Find(symbol.c_str());
+ }
+
+ //
+ // Return the key associated with the symbol. If the symbol
+ // does not exists, return SymbolTable::kNoSymbol.
+ int64 Find(const char* symbol) const {
+ map<const char *, int64, StrCmp>::const_iterator it =
+ symbol_map_.find(symbol);
+ if (it == symbol_map_.end()) {
+ return -1;
+ }
+ return it->second;
+ }
+
+ int64 GetNthKey(ssize_t pos) const {
+ if ((pos < 0) || (pos >= symbols_.size())) return -1;
+ else return Find(symbols_[pos]);
+ }
+
+ const string& Name() const { return name_; }
+
+ int IncrRefCount() const {
+ return ref_count_.Incr();
+ }
+ int DecrRefCount() const {
+ return ref_count_.Decr();
+ }
+ int RefCount() const {
+ return ref_count_.count();
+ }
+
+ string CheckSum() const {
+ MutexLock check_sum_lock(&check_sum_mutex_);
+ MaybeRecomputeCheckSum();
+ return check_sum_string_;
+ }
+
+ string LabeledCheckSum() const {
+ MutexLock check_sum_lock(&check_sum_mutex_);
+ MaybeRecomputeCheckSum();
+ return labeled_check_sum_string_;
+ }
+
+ int64 AvailableKey() const {
+ return available_key_;
+ }
+
+ size_t NumSymbols() const {
+ return symbols_.size();
+ }
+
+ private:
+ // Recomputes the checksums (both of them) if we've had changes since the last
+ // computation (i.e., if check_sum_finalized_ is false).
+ void MaybeRecomputeCheckSum() const;
+
+ struct StrCmp {
+ bool operator()(const char *s1, const char *s2) const {
+ return strcmp(s1, s2) < 0;
+ }
+ };
+
+ string name_;
+ int64 available_key_;
+ int64 dense_key_limit_;
+ vector<const char *> symbols_;
+ map<int64, const char*> key_map_;
+ map<const char *, int64, StrCmp> symbol_map_;
+
+ mutable RefCounter ref_count_;
+ mutable bool check_sum_finalized_;
+ mutable CheckSummer check_sum_;
+ mutable CheckSummer labeled_check_sum_;
+ mutable string check_sum_string_;
+ mutable string labeled_check_sum_string_;
+ mutable Mutex check_sum_mutex_;
+};
+
+//
+// \class SymbolTable
+// \brief Symbol (string) to int and reverse mapping
+//
+// The SymbolTable implements the mappings of labels to strings and reverse.
+// SymbolTables are used to describe the alphabet of the input and output
+// labels for arcs in a Finite State Transducer.
+//
+// SymbolTables are reference counted and can therefore be shared across
+// multiple machines. For example a language model grammar G, with a
+// SymbolTable for the words in the language model can share this symbol
+// table with the lexical representation L o G.
+//
+class SymbolTable {
+ public:
+ static const int64 kNoSymbol = -1;
+
+ // Construct symbol table with a unique name.
+ SymbolTable(const string& name) : impl_(new SymbolTableImpl(name)) {}
+
+ // Create a reference counted copy.
+ SymbolTable(const SymbolTable& table) : impl_(table.impl_) {
+ impl_->IncrRefCount();
+ }
+
+ // Derefence implentation object. When reference count hits 0, delete
+ // implementation.
+ virtual ~SymbolTable() {
+ if (!impl_->DecrRefCount()) delete impl_;
+ }
+
+ // Read an ascii representation of the symbol table from an istream. Pass a
+ // name to give the resulting SymbolTable.
+ static SymbolTable* ReadText(istream &strm,
+ const string& name,
+ bool allow_negative = false) {
+ SymbolTableImpl* impl = SymbolTableImpl::ReadText(strm,
+ name,
+ allow_negative);
+ if (!impl)
+ return 0;
+ else
+ return new SymbolTable(impl);
+ }
+
+ // read an ascii representation of the symbol table
+ static SymbolTable* ReadText(const string& filename,
+ bool allow_negative = false) {
+ ifstream strm(filename.c_str(), ifstream::in);
+ if (!strm) {
+ LOG(ERROR) << "SymbolTable::ReadText: Can't open file " << filename;
+ return 0;
+ }
+ return ReadText(strm, filename, allow_negative);
+ }
+
+
+ // WARNING: Reading via symbol table read options should
+ // not be used. This is a temporary work around.
+ static SymbolTable* Read(istream &strm,
+ const SymbolTableReadOptions& opts) {
+ SymbolTableImpl* impl = SymbolTableImpl::Read(strm, opts);
+ if (!impl)
+ return 0;
+ else
+ return new SymbolTable(impl);
+ }
+
+ // read a binary dump of the symbol table from a stream
+ static SymbolTable* Read(istream &strm, const string& source) {
+ SymbolTableReadOptions opts;
+ opts.source = source;
+ return Read(strm, opts);
+ }
+
+ // read a binary dump of the symbol table
+ static SymbolTable* Read(const string& filename) {
+ ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
+ if (!strm) {
+ LOG(ERROR) << "SymbolTable::Read: Can't open file " << filename;
+ return 0;
+ }
+ return Read(strm, filename);
+ }
+
+ //--------------------------------------------------------
+ // Derivable Interface (final)
+ //--------------------------------------------------------
+ // create a reference counted copy
+ virtual SymbolTable* Copy() const {
+ return new SymbolTable(*this);
+ }
+
+ // Add a symbol with given key to table. A symbol table also
+ // keeps track of the last available key (highest key value in
+ // the symbol table).
+ virtual int64 AddSymbol(const string& symbol, int64 key) {
+ MutateCheck();
+ return impl_->AddSymbol(symbol, key);
+ }
+
+ // Add a symbol to the table. The associated value key is automatically
+ // assigned by the symbol table.
+ virtual int64 AddSymbol(const string& symbol) {
+ MutateCheck();
+ return impl_->AddSymbol(symbol);
+ }
+
+ // Add another symbol table to this table. All key values will be offset
+ // by the current available key (highest key value in the symbol table).
+ // Note string symbols with the same key value with still have the same
+ // key value after the symbol table has been merged, but a different
+ // value. Adding symbol tables do not result in changes in the base table.
+ virtual void AddTable(const SymbolTable& table);
+
+ // return the name of the symbol table
+ virtual const string& Name() const {
+ return impl_->Name();
+ }
+
+ // Return the label-agnostic MD5 check-sum for this table. All new symbols
+ // added to the table will result in an updated checksum.
+ // DEPRECATED.
+ virtual string CheckSum() const {
+ return impl_->CheckSum();
+ }
+
+ // Same as CheckSum(), but this returns an label-dependent version.
+ virtual string LabeledCheckSum() const {
+ return impl_->LabeledCheckSum();
+ }
+
+ virtual bool Write(ostream &strm) const {
+ return impl_->Write(strm);
+ }
+
+ bool Write(const string& filename) const {
+ ofstream strm(filename.c_str(), ofstream::out | ofstream::binary);
+ if (!strm) {
+ LOG(ERROR) << "SymbolTable::Write: Can't open file " << filename;
+ return false;
+ }
+ return Write(strm);
+ }
+
+ // Dump an ascii text representation of the symbol table via a stream
+ virtual bool WriteText(ostream &strm) const;
+
+ // Dump an ascii text representation of the symbol table
+ bool WriteText(const string& filename) const {
+ ofstream strm(filename.c_str());
+ if (!strm) {
+ LOG(ERROR) << "SymbolTable::WriteText: Can't open file " << filename;
+ return false;
+ }
+ return WriteText(strm);
+ }
+
+ // Return the string associated with the key. If the key is out of
+ // range (<0, >max), log error and return an empty string.
+ virtual string Find(int64 key) const {
+ return impl_->Find(key);
+ }
+
+ // Return the key associated with the symbol. If the symbol
+ // does not exists, log error and return SymbolTable::kNoSymbol
+ virtual int64 Find(const string& symbol) const {
+ return impl_->Find(symbol);
+ }
+
+ // Return the key associated with the symbol. If the symbol
+ // does not exists, log error and return SymbolTable::kNoSymbol
+ virtual int64 Find(const char* symbol) const {
+ return impl_->Find(symbol);
+ }
+
+ // Return the current available key (i.e highest key number+1) in
+ // the symbol table
+ virtual int64 AvailableKey(void) const {
+ return impl_->AvailableKey();
+ }
+
+ // Return the current number of symbols in table (not necessarily
+ // equal to AvailableKey())
+ virtual size_t NumSymbols(void) const {
+ return impl_->NumSymbols();
+ }
+
+ virtual int64 GetNthKey(ssize_t pos) const {
+ return impl_->GetNthKey(pos);
+ }
+
+ private:
+ explicit SymbolTable(SymbolTableImpl* impl) : impl_(impl) {}
+
+ void MutateCheck() {
+ // Copy on write
+ if (impl_->RefCount() > 1) {
+ impl_->DecrRefCount();
+ impl_ = new SymbolTableImpl(*impl_);
+ }
+ }
+
+ const SymbolTableImpl* Impl() const {
+ return impl_;
+ }
+
+ private:
+ SymbolTableImpl* impl_;
+
+ void operator=(const SymbolTable &table); // disallow
+};
+
+
+//
+// \class SymbolTableIterator
+// \brief Iterator class for symbols in a symbol table
+class SymbolTableIterator {
+ public:
+ SymbolTableIterator(const SymbolTable& table)
+ : table_(table),
+ pos_(0),
+ nsymbols_(table.NumSymbols()),
+ key_(table.GetNthKey(0)) { }
+
+ ~SymbolTableIterator() { }
+
+ // is iterator done
+ bool Done(void) {
+ return (pos_ == nsymbols_);
+ }
+
+ // return the Value() of the current symbol (int64 key)
+ int64 Value(void) {
+ return key_;
+ }
+
+ // return the string of the current symbol
+ string Symbol(void) {
+ return table_.Find(key_);
+ }
+
+ // advance iterator forward
+ void Next(void) {
+ ++pos_;
+ if (pos_ < nsymbols_) key_ = table_.GetNthKey(pos_);
+ }
+
+ // reset iterator
+ void Reset(void) {
+ pos_ = 0;
+ key_ = table_.GetNthKey(0);
+ }
+
+ private:
+ const SymbolTable& table_;
+ ssize_t pos_;
+ size_t nsymbols_;
+ int64 key_;
+};
+
+
+// Tests compatibilty between two sets of symbol tables
+inline bool CompatSymbols(const SymbolTable *syms1, const SymbolTable *syms2,
+ bool warning = true) {
+ if (!FLAGS_fst_compat_symbols) {
+ return true;
+ } else if (!syms1 && !syms2) {
+ return true;
+ } else if (syms1 && !syms2) {
+ if (warning)
+ LOG(WARNING) <<
+ "CompatSymbols: first symbol table present but second missing";
+ return false;
+ } else if (!syms1 && syms2) {
+ if (warning)
+ LOG(WARNING) <<
+ "CompatSymbols: second symbol table present but first missing";
+ return false;
+ } else if (syms1->LabeledCheckSum() != syms2->LabeledCheckSum()) {
+ if (warning)
+ LOG(WARNING) << "CompatSymbols: Symbol table check sums do not match";
+ return false;
+ } else {
+ return true;
+ }
+}
+
+
+// Relabels a symbol table as specified by the input vector of pairs
+// (old label, new label). The new symbol table only retains symbols
+// for which a relabeling is *explicitely* specified.
+// TODO(allauzen): consider adding options to allow for some form
+// of implicit identity relabeling.
+template <class Label>
+SymbolTable *RelabelSymbolTable(const SymbolTable *table,
+ const vector<pair<Label, Label> > &pairs) {
+ SymbolTable *new_table = new SymbolTable(
+ table->Name().empty() ? string() :
+ (string("relabeled_") + table->Name()));
+
+ for (size_t i = 0; i < pairs.size(); ++i)
+ new_table->AddSymbol(table->Find(pairs[i].first), pairs[i].second);
+
+ return new_table;
+}
+
+} // namespace fst
+
+#endif // FST_LIB_SYMBOL_TABLE_H__
diff --git a/src/include/fst/synchronize.h b/src/include/fst/synchronize.h
new file mode 100644
index 0000000..28d1262
--- /dev/null
+++ b/src/include/fst/synchronize.h
@@ -0,0 +1,457 @@
+// synchronize.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Synchronize an FST with bounded delay.
+
+#ifndef FST_LIB_SYNCHRONIZE_H__
+#define FST_LIB_SYNCHRONIZE_H__
+
+#include <algorithm>
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <unordered_set>
+using std::tr1::unordered_set;
+using std::tr1::unordered_multiset;
+#include <string>
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+typedef CacheOptions SynchronizeFstOptions;
+
+
+// Implementation class for SynchronizeFst
+template <class A>
+class SynchronizeFstImpl
+ : public CacheImpl<A> {
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+
+ using CacheBaseImpl< CacheState<A> >::PushArc;
+ using CacheBaseImpl< CacheState<A> >::HasArcs;
+ using CacheBaseImpl< CacheState<A> >::HasFinal;
+ using CacheBaseImpl< CacheState<A> >::HasStart;
+ using CacheBaseImpl< CacheState<A> >::SetArcs;
+ using CacheBaseImpl< CacheState<A> >::SetFinal;
+ using CacheBaseImpl< CacheState<A> >::SetStart;
+
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ typedef basic_string<Label> String;
+
+ struct Element {
+ Element() {}
+
+ Element(StateId s, const String *i, const String *o)
+ : state(s), istring(i), ostring(o) {}
+
+ StateId state; // Input state Id
+ const String *istring; // Residual input labels
+ const String *ostring; // Residual output labels
+ // Residual strings are represented by const pointers to
+ // basic_string<Label> and are stored in a hash_set. The pointed
+ // memory is owned by the hash_set string_set_.
+ };
+
+ SynchronizeFstImpl(const Fst<A> &fst, const SynchronizeFstOptions &opts)
+ : CacheImpl<A>(opts), fst_(fst.Copy()) {
+ SetType("synchronize");
+ uint64 props = fst.Properties(kFstProperties, false);
+ SetProperties(SynchronizeProperties(props), kCopyProperties);
+
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ }
+
+ SynchronizeFstImpl(const SynchronizeFstImpl &impl)
+ : CacheImpl<A>(impl),
+ fst_(impl.fst_->Copy(true)) {
+ SetType("synchronize");
+ SetProperties(impl.Properties(), kCopyProperties);
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ ~SynchronizeFstImpl() {
+ delete fst_;
+ // Extract pointers from the hash set
+ vector<const String*> strings;
+ typename StringSet::iterator it = string_set_.begin();
+ for (; it != string_set_.end(); ++it)
+ strings.push_back(*it);
+ // Free the extracted pointers
+ for (size_t i = 0; i < strings.size(); ++i)
+ delete strings[i];
+ }
+
+ StateId Start() {
+ if (!HasStart()) {
+ StateId s = fst_->Start();
+ if (s == kNoStateId)
+ return kNoStateId;
+ const String *empty = FindString(new String());
+ StateId start = FindState(Element(fst_->Start(), empty, empty));
+ SetStart(start);
+ }
+ return CacheImpl<A>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ const Element &e = elements_[s];
+ Weight w = e.state == kNoStateId ? Weight::One() : fst_->Final(e.state);
+ if ((w != Weight::Zero()) && (e.istring)->empty() && (e.ostring)->empty())
+ SetFinal(s, w);
+ else
+ SetFinal(s, Weight::Zero());
+ }
+ return CacheImpl<A>::Final(s);
+ }
+
+ size_t NumArcs(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumArcs(s);
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumOutputEpsilons(s);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) && fst_->Properties(kError, false))
+ SetProperties(kError, kError);
+ return FstImpl<Arc>::Properties(mask);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
+ if (!HasArcs(s))
+ Expand(s);
+ CacheImpl<A>::InitArcIterator(s, data);
+ }
+
+ // Returns the first character of the string obtained by
+ // concatenating s and l.
+ Label Car(const String *s, Label l = 0) const {
+ if (!s->empty())
+ return (*s)[0];
+ else
+ return l;
+ }
+
+ // Computes the residual string obtained by removing the first
+ // character in the concatenation of s and l.
+ const String *Cdr(const String *s, Label l = 0) {
+ String *r = new String();
+ for (int i = 1; i < s->size(); ++i)
+ r->push_back((*s)[i]);
+ if (l && !(s->empty())) r->push_back(l);
+ return FindString(r);
+ }
+
+ // Computes the concatenation of s and l.
+ const String *Concat(const String *s, Label l = 0) {
+ String *r = new String();
+ for (int i = 0; i < s->size(); ++i)
+ r->push_back((*s)[i]);
+ if (l) r->push_back(l);
+ return FindString(r);
+ }
+
+ // Tests if the concatenation of s and l is empty
+ bool Empty(const String *s, Label l = 0) const {
+ if (s->empty())
+ return l == 0;
+ else
+ return false;
+ }
+
+ // Finds the string pointed by s in the hash set. Transfers the
+ // pointer ownership to the hash set.
+ const String *FindString(const String *s) {
+ typename StringSet::iterator it = string_set_.find(s);
+ if (it != string_set_.end()) {
+ delete s;
+ return (*it);
+ } else {
+ string_set_.insert(s);
+ return s;
+ }
+ }
+
+ // Finds state corresponding to an element. Creates new state
+ // if element not found.
+ StateId FindState(const Element &e) {
+ typename ElementMap::iterator eit = element_map_.find(e);
+ if (eit != element_map_.end()) {
+ return (*eit).second;
+ } else {
+ StateId s = elements_.size();
+ elements_.push_back(e);
+ element_map_.insert(pair<const Element, StateId>(e, s));
+ return s;
+ }
+ }
+
+
+ // Computes the outgoing transitions from a state, creating new destination
+ // states as needed.
+ void Expand(StateId s) {
+ Element e = elements_[s];
+
+ if (e.state != kNoStateId)
+ for (ArcIterator< Fst<A> > ait(*fst_, e.state);
+ !ait.Done();
+ ait.Next()) {
+ const A &arc = ait.Value();
+ if (!Empty(e.istring, arc.ilabel) && !Empty(e.ostring, arc.olabel)) {
+ const String *istring = Cdr(e.istring, arc.ilabel);
+ const String *ostring = Cdr(e.ostring, arc.olabel);
+ StateId d = FindState(Element(arc.nextstate, istring, ostring));
+ PushArc(s, Arc(Car(e.istring, arc.ilabel),
+ Car(e.ostring, arc.olabel), arc.weight, d));
+ } else {
+ const String *istring = Concat(e.istring, arc.ilabel);
+ const String *ostring = Concat(e.ostring, arc.olabel);
+ StateId d = FindState(Element(arc.nextstate, istring, ostring));
+ PushArc(s, Arc(0 , 0, arc.weight, d));
+ }
+ }
+
+ Weight w = e.state == kNoStateId ? Weight::One() : fst_->Final(e.state);
+ if ((w != Weight::Zero()) &&
+ ((e.istring)->size() + (e.ostring)->size() > 0)) {
+ const String *istring = Cdr(e.istring);
+ const String *ostring = Cdr(e.ostring);
+ StateId d = FindState(Element(kNoStateId, istring, ostring));
+ PushArc(s, Arc(Car(e.istring), Car(e.ostring), w, d));
+ }
+ SetArcs(s);
+ }
+
+ private:
+ // Equality function for Elements, assume strings have been hashed.
+ class ElementEqual {
+ public:
+ bool operator()(const Element &x, const Element &y) const {
+ return x.state == y.state &&
+ x.istring == y.istring &&
+ x.ostring == y.ostring;
+ }
+ };
+
+ // Hash function for Elements to Fst states.
+ class ElementKey {
+ public:
+ size_t operator()(const Element &x) const {
+ size_t key = x.state;
+ key = (key << 1) ^ (x.istring)->size();
+ for (size_t i = 0; i < (x.istring)->size(); ++i)
+ key = (key << 1) ^ (*x.istring)[i];
+ key = (key << 1) ^ (x.ostring)->size();
+ for (size_t i = 0; i < (x.ostring)->size(); ++i)
+ key = (key << 1) ^ (*x.ostring)[i];
+ return key;
+ }
+ };
+
+ // Equality function for strings
+ class StringEqual {
+ public:
+ bool operator()(const String * const &x, const String * const &y) const {
+ if (x->size() != y->size()) return false;
+ for (size_t i = 0; i < x->size(); ++i)
+ if ((*x)[i] != (*y)[i]) return false;
+ return true;
+ }
+ };
+
+ // Hash function for set of strings
+ class StringKey{
+ public:
+ size_t operator()(const String * const & x) const {
+ size_t key = x->size();
+ for (size_t i = 0; i < x->size(); ++i)
+ key = (key << 1) ^ (*x)[i];
+ return key;
+ }
+ };
+
+
+ typedef unordered_map<Element, StateId, ElementKey, ElementEqual> ElementMap;
+ typedef unordered_set<const String*, StringKey, StringEqual> StringSet;
+
+ const Fst<A> *fst_;
+ vector<Element> elements_; // mapping Fst state to Elements
+ ElementMap element_map_; // mapping Elements to Fst state
+ StringSet string_set_;
+
+ void operator=(const SynchronizeFstImpl<A> &); // disallow
+};
+
+
+// Synchronizes a transducer. This version is a delayed Fst. The
+// result will be an equivalent FST that has the property that during
+// the traversal of a path, the delay is either zero or strictly
+// increasing, where the delay is the difference between the number of
+// non-epsilon output labels and input labels along the path.
+//
+// For the algorithm to terminate, the input transducer must have
+// bounded delay, i.e., the delay of every cycle must be zero.
+//
+// Complexity:
+// - A has bounded delay: exponential
+// - A does not have bounded delay: does not terminate
+//
+// References:
+// - Mehryar Mohri. Edit-Distance of Weighted Automata: General
+// Definitions and Algorithms, International Journal of Computer
+// Science, 14(6): 957-982 (2003).
+//
+// This class attaches interface to implementation and handles
+// reference counting, delegating most methods to ImplToFst.
+template <class A>
+class SynchronizeFst : public ImplToFst< SynchronizeFstImpl<A> > {
+ public:
+ friend class ArcIterator< SynchronizeFst<A> >;
+ friend class StateIterator< SynchronizeFst<A> >;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+ typedef SynchronizeFstImpl<A> Impl;
+
+ SynchronizeFst(const Fst<A> &fst)
+ : ImplToFst<Impl>(new Impl(fst, SynchronizeFstOptions())) {}
+
+ SynchronizeFst(const Fst<A> &fst, const SynchronizeFstOptions &opts)
+ : ImplToFst<Impl>(new Impl(fst, opts)) {}
+
+ // See Fst<>::Copy() for doc.
+ SynchronizeFst(const SynchronizeFst<A> &fst, bool safe = false)
+ : ImplToFst<Impl>(fst, safe) {}
+
+ // Get a copy of this SynchronizeFst. See Fst<>::Copy() for further doc.
+ virtual SynchronizeFst<A> *Copy(bool safe = false) const {
+ return new SynchronizeFst<A>(*this, safe);
+ }
+
+ virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const SynchronizeFst<A> &fst); // Disallow
+};
+
+
+// Specialization for SynchronizeFst.
+template<class A>
+class StateIterator< SynchronizeFst<A> >
+ : public CacheStateIterator< SynchronizeFst<A> > {
+ public:
+ explicit StateIterator(const SynchronizeFst<A> &fst)
+ : CacheStateIterator< SynchronizeFst<A> >(fst, fst.GetImpl()) {}
+};
+
+
+// Specialization for SynchronizeFst.
+template <class A>
+class ArcIterator< SynchronizeFst<A> >
+ : public CacheArcIterator< SynchronizeFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const SynchronizeFst<A> &fst, StateId s)
+ : CacheArcIterator< SynchronizeFst<A> >(fst.GetImpl(), s) {
+ if (!fst.GetImpl()->HasArcs(s))
+ fst.GetImpl()->Expand(s);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+
+template <class A> inline
+void SynchronizeFst<A>::InitStateIterator(StateIteratorData<A> *data) const
+{
+ data->base = new StateIterator< SynchronizeFst<A> >(*this);
+}
+
+
+
+// Synchronizes a transducer. This version writes the synchronized
+// result to a MutableFst. The result will be an equivalent FST that
+// has the property that during the traversal of a path, the delay is
+// either zero or strictly increasing, where the delay is the
+// difference between the number of non-epsilon output labels and
+// input labels along the path.
+//
+// For the algorithm to terminate, the input transducer must have
+// bounded delay, i.e., the delay of every cycle must be zero.
+//
+// Complexity:
+// - A has bounded delay: exponential
+// - A does not have bounded delay: does not terminate
+//
+// References:
+// - Mehryar Mohri. Edit-Distance of Weighted Automata: General
+// Definitions and Algorithms, International Journal of Computer
+// Science, 14(6): 957-982 (2003).
+template<class Arc>
+void Synchronize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst) {
+ SynchronizeFstOptions opts;
+ opts.gc_limit = 0; // Cache only the last state for fastest copy.
+ *ofst = SynchronizeFst<Arc>(ifst, opts);
+}
+
+} // namespace fst
+
+#endif // FST_LIB_SYNCHRONIZE_H__
diff --git a/src/include/fst/test-properties.h b/src/include/fst/test-properties.h
new file mode 100644
index 0000000..db1ddcc
--- /dev/null
+++ b/src/include/fst/test-properties.h
@@ -0,0 +1,246 @@
+// test-properties.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Functions to manipulate and test property bits
+
+#ifndef FST_LIB_TEST_PROPERTIES_H__
+#define FST_LIB_TEST_PROPERTIES_H__
+
+#include <unordered_set>
+using std::tr1::unordered_set;
+using std::tr1::unordered_multiset;
+
+#include <fst/dfs-visit.h>
+#include <fst/connect.h>
+
+
+DECLARE_bool(fst_verify_properties);
+
+namespace fst {
+
+// For a binary property, the bit is always returned set.
+// For a trinary (i.e. two-bit) property, both bits are
+// returned set iff either corresponding input bit is set.
+inline uint64 KnownProperties(uint64 props) {
+ return kBinaryProperties | (props & kTrinaryProperties) |
+ ((props & kPosTrinaryProperties) << 1) |
+ ((props & kNegTrinaryProperties) >> 1);
+}
+
+// Tests compatibility between two sets of properties
+inline bool CompatProperties(uint64 props1, uint64 props2) {
+ uint64 known_props1 = KnownProperties(props1);
+ uint64 known_props2 = KnownProperties(props2);
+ uint64 known_props = known_props1 & known_props2;
+ uint64 incompat_props = (props1 & known_props) ^ (props2 & known_props);
+ if (incompat_props) {
+ uint64 prop = 1;
+ for (int i = 0; i < 64; ++i, prop <<= 1)
+ if (prop & incompat_props)
+ LOG(ERROR) << "CompatProperties: mismatch: " << PropertyNames[i]
+ << ": props1 = " << (props1 & prop ? "true" : "false")
+ << ", props2 = " << (props2 & prop ? "true" : "false");
+ return false;
+ } else {
+ return true;
+ }
+}
+
+// Computes FST property values defined in properties.h. The value of
+// each property indicated in the mask will be determined and returned
+// (these will never be unknown here). In the course of determining
+// the properties specifically requested in the mask, certain other
+// properties may be determined (those with little additional expense)
+// and their values will be returned as well. The complete set of
+// known properties (whether true or false) determined by this
+// operation will be assigned to the the value pointed to by KNOWN.
+// If 'use_stored' is true, pre-computed FST properties may be used
+// when possible. This routine is seldom called directly; instead it
+// is used to implement fst.Properties(mask, true).
+template<class Arc>
+uint64 ComputeProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known,
+ bool use_stored) {
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ uint64 fst_props = fst.Properties(kFstProperties, false); // Fst-stored
+
+ // Check stored FST properties first if allowed.
+ if (use_stored) {
+ uint64 known_props = KnownProperties(fst_props);
+ // If FST contains required info, return it.
+ if ((known_props & mask) == mask) {
+ *known = known_props;
+ return fst_props;
+ }
+ }
+
+ // Compute (trinary) properties explicitly.
+
+ // Initialize with binary properties (already known).
+ uint64 comp_props = fst_props & kBinaryProperties;
+
+ // Compute these trinary properties with a DFS. We compute only those
+ // that need a DFS here, since we otherwise would like to avoid a DFS
+ // since its stack could grow large.
+ uint64 dfs_props = kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
+ kAccessible | kNotAccessible |
+ kCoAccessible | kNotCoAccessible;
+ if (mask & dfs_props) {
+ SccVisitor<Arc> scc_visitor(&comp_props);
+ DfsVisit(fst, &scc_visitor);
+ }
+
+ // Compute any remaining trinary properties via a state and arcs iterations
+ if (mask & ~(kBinaryProperties | dfs_props)) {
+ comp_props |= kAcceptor | kNoEpsilons | kNoIEpsilons | kNoOEpsilons |
+ kILabelSorted | kOLabelSorted | kUnweighted | kTopSorted | kString;
+ if (mask & (kIDeterministic | kNonIDeterministic))
+ comp_props |= kIDeterministic;
+ if (mask & (kODeterministic | kNonODeterministic))
+ comp_props |= kODeterministic;
+
+ unordered_set<Label> *ilabels = 0;
+ unordered_set<Label> *olabels = 0;
+
+ StateId nfinal = 0;
+ for (StateIterator< Fst<Arc> > siter(fst);
+ !siter.Done();
+ siter.Next()) {
+ StateId s = siter.Value();
+
+ Arc prev_arc(kNoLabel, kNoLabel, Weight::One(), 0);
+ // Create these only if we need to
+ if (mask & (kIDeterministic | kNonIDeterministic))
+ ilabels = new unordered_set<Label>;
+ if (mask & (kODeterministic | kNonODeterministic))
+ olabels = new unordered_set<Label>;
+
+ for (ArcIterator< Fst<Arc> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const Arc &arc =aiter.Value();
+
+ if (ilabels && ilabels->find(arc.ilabel) != ilabels->end()) {
+ comp_props |= kNonIDeterministic;
+ comp_props &= ~kIDeterministic;
+ }
+ if (olabels && olabels->find(arc.olabel) != olabels->end()) {
+ comp_props |= kNonODeterministic;
+ comp_props &= ~kODeterministic;
+ }
+ if (arc.ilabel != arc.olabel) {
+ comp_props |= kNotAcceptor;
+ comp_props &= ~kAcceptor;
+ }
+ if (arc.ilabel == 0 && arc.olabel == 0) {
+ comp_props |= kEpsilons;
+ comp_props &= ~kNoEpsilons;
+ }
+ if (arc.ilabel == 0) {
+ comp_props |= kIEpsilons;
+ comp_props &= ~kNoIEpsilons;
+ }
+ if (arc.olabel == 0) {
+ comp_props |= kOEpsilons;
+ comp_props &= ~kNoOEpsilons;
+ }
+ if (prev_arc.ilabel != kNoLabel && arc.ilabel < prev_arc.ilabel) {
+ comp_props |= kNotILabelSorted;
+ comp_props &= ~kILabelSorted;
+ }
+ if (prev_arc.olabel != kNoLabel && arc.olabel < prev_arc.olabel) {
+ comp_props |= kNotOLabelSorted;
+ comp_props &= ~kOLabelSorted;
+ }
+ if (arc.weight != Weight::One() && arc.weight != Weight::Zero()) {
+ comp_props |= kWeighted;
+ comp_props &= ~kUnweighted;
+ }
+ if (arc.nextstate <= s) {
+ comp_props |= kNotTopSorted;
+ comp_props &= ~kTopSorted;
+ }
+ if (arc.nextstate != s + 1) {
+ comp_props |= kNotString;
+ comp_props &= ~kString;
+ }
+ prev_arc = arc;
+ if (ilabels)
+ ilabels->insert(arc.ilabel);
+ if (olabels)
+ olabels->insert(arc.olabel);
+ }
+
+ if (nfinal > 0) { // final state not last
+ comp_props |= kNotString;
+ comp_props &= ~kString;
+ }
+
+ Weight final = fst.Final(s);
+
+ if (final != Weight::Zero()) { // final state
+ if (final != Weight::One()) {
+ comp_props |= kWeighted;
+ comp_props &= ~kUnweighted;
+ }
+ ++nfinal;
+ } else { // non-final state
+ if (fst.NumArcs(s) != 1) {
+ comp_props |= kNotString;
+ comp_props &= ~kString;
+ }
+ }
+
+ delete ilabels;
+ delete olabels;
+ }
+
+ if (fst.Start() != kNoStateId && fst.Start() != 0) {
+ comp_props |= kNotString;
+ comp_props &= ~kString;
+ }
+ }
+
+ *known = KnownProperties(comp_props);
+ return comp_props;
+}
+
+// This is a wrapper around ComputeProperties that will cause a fatal
+// error if the stored properties and the computed properties are
+// incompatible when 'FLAGS_fst_verify_properties' is true. This
+// routine is seldom called directly; instead it is used to implement
+// fst.Properties(mask, true).
+template<class Arc>
+uint64 TestProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known) {
+ if (FLAGS_fst_verify_properties) {
+ uint64 stored_props = fst.Properties(kFstProperties, false);
+ uint64 computed_props = ComputeProperties(fst, mask, known, false);
+ if (!CompatProperties(stored_props, computed_props))
+ LOG(FATAL) << "TestProperties: stored Fst properties incorrect"
+ << " (stored: props1, computed: props2)";
+ return computed_props;
+ } else {
+ return ComputeProperties(fst, mask, known, true);
+ }
+}
+
+} // namespace fst
+
+#endif // FST_LIB_TEST_PROPERTIES_H__
diff --git a/src/include/fst/topsort.h b/src/include/fst/topsort.h
new file mode 100644
index 0000000..53735e5
--- /dev/null
+++ b/src/include/fst/topsort.h
@@ -0,0 +1,112 @@
+// topsort.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Topological sort of FSTs
+
+#ifndef FST_LIB_TOPSORT_H__
+#define FST_LIB_TOPSORT_H__
+
+#include <algorithm>
+#include <vector>
+using std::vector;
+
+
+#include <fst/dfs-visit.h>
+#include <fst/fst.h>
+#include <fst/statesort.h>
+
+
+namespace fst {
+
+// DFS visitor class to return topological ordering.
+template <class A>
+class TopOrderVisitor {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+
+ // If acyclic, ORDER[i] gives the topological position of state Id i;
+ // otherwise unchanged. ACYCLIC will be true iff the FST has
+ // no cycles.
+ TopOrderVisitor(vector<StateId> *order, bool *acyclic)
+ : order_(order), acyclic_(acyclic) {}
+
+ void InitVisit(const Fst<A> &fst) {
+ finish_ = new vector<StateId>;
+ *acyclic_ = true;
+ }
+
+ bool InitState(StateId s, StateId r) { return true; }
+
+ bool TreeArc(StateId s, const A &arc) { return true; }
+
+ bool BackArc(StateId s, const A &arc) { return (*acyclic_ = false); }
+
+ bool ForwardOrCrossArc(StateId s, const A &arc) { return true; }
+
+ void FinishState(StateId s, StateId p, const A *) { finish_->push_back(s); }
+
+ void FinishVisit() {
+ if (*acyclic_) {
+ order_->clear();
+ for (StateId s = 0; s < finish_->size(); ++s)
+ order_->push_back(kNoStateId);
+ for (StateId s = 0; s < finish_->size(); ++s)
+ (*order_)[(*finish_)[finish_->size() - s - 1]] = s;
+ }
+ delete finish_;
+ }
+
+ private:
+ vector<StateId> *order_;
+ bool *acyclic_;
+ vector<StateId> *finish_; // states in finishing-time order
+};
+
+
+// Topologically sorts its input if acyclic, modifying it. Otherwise,
+// the input is unchanged. When sorted, all transitions are from
+// lower to higher state IDs.
+//
+// Complexity:
+// - Time: O(V + E)
+// - Space: O(V + E)
+// where V = # of states and E = # of arcs.
+template <class Arc>
+bool TopSort(MutableFst<Arc> *fst) {
+ typedef typename Arc::StateId StateId;
+
+ vector<StateId> order;
+ bool acyclic;
+
+ TopOrderVisitor<Arc> top_order_visitor(&order, &acyclic);
+ DfsVisit(*fst, &top_order_visitor);
+
+ if (acyclic) {
+ StateSort(fst, order);
+ fst->SetProperties(kAcyclic | kInitialAcyclic | kTopSorted,
+ kAcyclic | kInitialAcyclic | kTopSorted);
+ } else {
+ fst->SetProperties(kCyclic | kNotTopSorted, kCyclic | kNotTopSorted);
+ }
+ return acyclic;
+}
+
+} // namespace fst
+
+#endif // FST_LIB_TOPSORT_H__
diff --git a/src/include/fst/tuple-weight.h b/src/include/fst/tuple-weight.h
new file mode 100644
index 0000000..184026c
--- /dev/null
+++ b/src/include/fst/tuple-weight.h
@@ -0,0 +1,332 @@
+// tuple-weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google (Cyril Allauzen)
+//
+// \file
+// Tuple weight set operation definitions.
+
+#ifndef FST_LIB_TUPLE_WEIGHT_H__
+#define FST_LIB_TUPLE_WEIGHT_H__
+
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/weight.h>
+
+
+DECLARE_string(fst_weight_parentheses);
+DECLARE_string(fst_weight_separator);
+
+namespace fst {
+
+template<class W, unsigned int n> class TupleWeight;
+template <class W, unsigned int n>
+istream &operator>>(istream &strm, TupleWeight<W, n> &w);
+
+// n-tuple weight, element of the n-th catersian power of W
+template <class W, unsigned int n>
+class TupleWeight {
+ public:
+ typedef TupleWeight<typename W::ReverseWeight, n> ReverseWeight;
+
+ TupleWeight() {}
+
+ TupleWeight(const TupleWeight &w) {
+ for (size_t i = 0; i < n; ++i)
+ values_[i] = w.values_[i];
+ }
+
+ template <class Iterator>
+ TupleWeight(Iterator begin, Iterator end) {
+ for (Iterator iter = begin; iter != end; ++iter)
+ values_[iter - begin] = *iter;
+ }
+
+ TupleWeight(const W &w) {
+ for (size_t i = 0; i < n; ++i)
+ values_[i] = w;
+ }
+
+ static const TupleWeight<W, n> &Zero() {
+ static const TupleWeight<W, n> zero(W::Zero());
+ return zero;
+ }
+
+ static const TupleWeight<W, n> &One() {
+ static const TupleWeight<W, n> one(W::One());
+ return one;
+ }
+
+ static const TupleWeight<W, n> &NoWeight() {
+ static const TupleWeight<W, n> no_weight(W::NoWeight());
+ return no_weight;
+ }
+
+ static unsigned int Length() {
+ return n;
+ }
+
+ istream &Read(istream &strm) {
+ for (size_t i = 0; i < n; ++i)
+ values_[i].Read(strm);
+ return strm;
+ }
+
+ ostream &Write(ostream &strm) const {
+ for (size_t i = 0; i < n; ++i)
+ values_[i].Write(strm);
+ return strm;
+ }
+
+ TupleWeight<W, n> &operator=(const TupleWeight<W, n> &w) {
+ for (size_t i = 0; i < n; ++i)
+ values_[i] = w.values_[i];
+ return *this;
+ }
+
+ bool Member() const {
+ bool member = true;
+ for (size_t i = 0; i < n; ++i)
+ member = member && values_[i].Member();
+ return member;
+ }
+
+ size_t Hash() const {
+ uint64 hash = 0;
+ for (size_t i = 0; i < n; ++i)
+ hash = 5 * hash + values_[i].Hash();
+ return size_t(hash);
+ }
+
+ TupleWeight<W, n> Quantize(float delta = kDelta) const {
+ TupleWeight<W, n> w;
+ for (size_t i = 0; i < n; ++i)
+ w.values_[i] = values_[i].Quantize(delta);
+ return w;
+ }
+
+ ReverseWeight Reverse() const {
+ TupleWeight<W, n> w;
+ for (size_t i = 0; i < n; ++i)
+ w.values_[i] = values_[i].Reverse();
+ return w;
+ }
+
+ const W& Value(size_t i) const { return values_[i]; }
+
+ void SetValue(size_t i, const W &w) { values_[i] = w; }
+
+ protected:
+ // Reads TupleWeight when there are no parentheses around tuple terms
+ inline static istream &ReadNoParen(istream &strm,
+ TupleWeight<W, n> &w,
+ char separator) {
+ int c;
+ do {
+ c = strm.get();
+ } while (isspace(c));
+
+ for (size_t i = 0; i < n - 1; ++i) {
+ string s;
+ if (i)
+ c = strm.get();
+ while (c != separator) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s += c;
+ c = strm.get();
+ }
+ // read (i+1)-th element
+ istringstream sstrm(s);
+ W r = W::Zero();
+ sstrm >> r;
+ w.SetValue(i, r);
+ }
+
+ // read n-th element
+ W r = W::Zero();
+ strm >> r;
+ w.SetValue(n - 1, r);
+
+ return strm;
+ }
+
+ // Reads TupleWeight when there are parentheses around tuple terms
+ inline static istream &ReadWithParen(istream &strm,
+ TupleWeight<W, n> &w,
+ char separator,
+ char open_paren,
+ char close_paren) {
+ int c;
+ do {
+ c = strm.get();
+ } while (isspace(c));
+
+ if (c != open_paren) {
+ FSTERROR() << " is fst_weight_parentheses flag set correcty? ";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+
+ for (size_t i = 0; i < n - 1; ++i) {
+ // read (i+1)-th element
+ stack<int> parens;
+ string s;
+ c = strm.get();
+ while (c != separator || !parens.empty()) {
+ if (c == EOF) {
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ s += c;
+ // if parens encountered before separator, they must be matched
+ if (c == open_paren) {
+ parens.push(1);
+ } else if (c == close_paren) {
+ // Fail for mismatched parens
+ if (parens.empty()) {
+ strm.clear(std::ios::failbit);
+ return strm;
+ }
+ parens.pop();
+ }
+ c = strm.get();
+ }
+ istringstream sstrm(s);
+ W r = W::Zero();
+ sstrm >> r;
+ w.SetValue(i, r);
+ }
+
+ // read n-th element
+ string s;
+ c = strm.get();
+ while (c != EOF) {
+ s += c;
+ c = strm.get();
+ }
+ if (s.empty() || *s.rbegin() != close_paren) {
+ FSTERROR() << " is fst_weight_parentheses flag set correcty? ";
+ strm.clear(std::ios::failbit);
+ return strm;
+ }
+ s.erase(s.size() - 1, 1);
+ istringstream sstrm(s);
+ W r = W::Zero();
+ sstrm >> r;
+ w.SetValue(n - 1, r);
+
+ return strm;
+ }
+
+
+ private:
+ W values_[n];
+
+ friend istream &operator>><W, n>(istream&, TupleWeight<W, n>&);
+};
+
+template <class W, unsigned int n>
+inline bool operator==(const TupleWeight<W, n> &w1,
+ const TupleWeight<W, n> &w2) {
+ bool equal = true;
+ for (size_t i = 0; i < n; ++i)
+ equal = equal && (w1.Value(i) == w2.Value(i));
+ return equal;
+}
+
+template <class W, unsigned int n>
+inline bool operator!=(const TupleWeight<W, n> &w1,
+ const TupleWeight<W, n> &w2) {
+ bool not_equal = false;
+ for (size_t i = 0; (i < n) && !not_equal; ++i)
+ not_equal = not_equal || (w1.Value(i) != w2.Value(i));
+ return not_equal;
+}
+
+template <class W, unsigned int n>
+inline bool ApproxEqual(const TupleWeight<W, n> &w1,
+ const TupleWeight<W, n> &w2,
+ float delta = kDelta) {
+ bool approx_equal = true;
+ for (size_t i = 0; i < n; ++i)
+ approx_equal = approx_equal &&
+ ApproxEqual(w1.Value(i), w2.Value(i), delta);
+ return approx_equal;
+}
+
+template <class W, unsigned int n>
+inline ostream &operator<<(ostream &strm, const TupleWeight<W, n> &w) {
+ if(FLAGS_fst_weight_separator.size() != 1) {
+ FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ char separator = FLAGS_fst_weight_separator[0];
+ bool write_parens = false;
+ if (!FLAGS_fst_weight_parentheses.empty()) {
+ if (FLAGS_fst_weight_parentheses.size() != 2) {
+ FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ write_parens = true;
+ }
+
+ if (write_parens)
+ strm << FLAGS_fst_weight_parentheses[0];
+ for (size_t i = 0; i < n; ++i) {
+ if(i)
+ strm << separator;
+ strm << w.Value(i);
+ }
+ if (write_parens)
+ strm << FLAGS_fst_weight_parentheses[1];
+
+ return strm;
+}
+
+template <class W, unsigned int n>
+inline istream &operator>>(istream &strm, TupleWeight<W, n> &w) {
+ if(FLAGS_fst_weight_separator.size() != 1) {
+ FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ char separator = FLAGS_fst_weight_separator[0];
+
+ if (!FLAGS_fst_weight_parentheses.empty()) {
+ if (FLAGS_fst_weight_parentheses.size() != 2) {
+ FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2";
+ strm.clear(std::ios::badbit);
+ return strm;
+ }
+ return TupleWeight<W, n>::ReadWithParen(
+ strm, w, separator, FLAGS_fst_weight_parentheses[0],
+ FLAGS_fst_weight_parentheses[1]);
+ } else {
+ return TupleWeight<W, n>::ReadNoParen(strm, w, separator);
+ }
+}
+
+
+
+} // namespace fst
+
+#endif // FST_LIB_TUPLE_WEIGHT_H__
diff --git a/src/include/fst/types.h b/src/include/fst/types.h
new file mode 100644
index 0000000..8c4367a
--- /dev/null
+++ b/src/include/fst/types.h
@@ -0,0 +1,38 @@
+// types.h
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Various type definitions (mostly for Google compatibility).
+
+#include <cstdlib> // for ssize_t
+#include <stdint.h> // *int*_t
+
+#include <fst/compat.h> // for DISALLOW_COPY_AND_ASSIGN
+
+#ifndef FST_LIB_TYPES_H__
+#define FST_LIB_TYPES_H__
+
+typedef int8_t int8;
+typedef int16_t int16;
+typedef int32_t int32;
+typedef int64_t int64;
+
+typedef uint8_t uint8;
+typedef uint16_t uint16;
+typedef uint32_t uint32;
+typedef uint64_t uint64;
+
+#endif // FST_LIB_TYPES_H__
diff --git a/src/include/fst/union-find.h b/src/include/fst/union-find.h
new file mode 100644
index 0000000..c8633e0
--- /dev/null
+++ b/src/include/fst/union-find.h
@@ -0,0 +1,110 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: wojciech@google.com (Wojciech Skut)
+//
+// \file Union-Find algorithm for dense sets of non-negative
+// integers. Implemented using disjoint tree forests with rank
+// heuristics and path compression.
+
+#ifndef __fst_union_find_inl_h__
+#define __fst_union_find_inl_h__
+
+#include <stack>
+#include <vector>
+using std::vector;
+#include <fst/types.h>
+
+namespace fst {
+
+// Union-Find algorithm for dense sets of non-negative integers
+// (exact type: T).
+template <class T>
+class UnionFind {
+ public:
+ // Ctor: creates a disjoint set forest for the range [0;max).
+ // 'fail' is a value indicating that an element hasn't been
+ // initialized using MakeSet(...). The upper bound of the range
+ // can be reset (increased) using MakeSet(...).
+ UnionFind(T max, T fail)
+ : parent_(max, fail), rank_(max), fail_(fail) { }
+
+ // Finds the representative of the set 'item' belongs to.
+ // Performs path compression if needed.
+ T FindSet(T item) {
+ if (item >= parent_.size()
+ || item == fail_
+ || parent_[item] == fail_) return fail_;
+
+ T *p = &parent_[item];
+ for (; *p != item; item = *p, p = &parent_[item]) {
+ exec_stack_.push(p);
+ }
+ for (; ! exec_stack_.empty(); exec_stack_.pop()) {
+ *exec_stack_.top() = *p;
+ }
+ return *p;
+ }
+
+ // Creates the (destructive) union of the sets x and y belong to.
+ void Union(T x, T y) {
+ Link(FindSet(x), FindSet(y));
+ }
+
+ // Initialization of an element: creates a singleton set containing
+ // 'item'. The range [0;max) is reset if item >= max.
+ T MakeSet(T item) {
+ if (item >= parent_.size()) {
+ // New value in parent_ should be initialized to fail_
+ size_t nitem = item > 0 ? 2 * item : 2;
+ parent_.resize(nitem, fail_);
+ rank_.resize(nitem);
+ }
+ parent_[item] = item;
+ return item;
+ }
+
+ // Initialization of all elements starting from 0 to max - 1 to distinct sets
+ void MakeAllSet(T max) {
+ parent_.resize(max);
+ for (T item = 0; item < max; ++item) {
+ parent_[item] = item;
+ }
+ }
+
+ private:
+ vector<T> parent_; // Parent nodes.
+ vector<int> rank_; // Rank of an element = min. depth in tree.
+ T fail_; // Value indicating lookup failure.
+ stack<T*> exec_stack_; // Used for path compression.
+
+ // Links trees rooted in 'x' and 'y'.
+ void Link(T x, T y) {
+ if (x == y) return;
+
+ if (rank_[x] > rank_[y]) {
+ parent_[y] = x;
+ } else {
+ parent_[x] = y;
+ if (rank_[x] == rank_[y]) {
+ ++rank_[y];
+ }
+ }
+ }
+ DISALLOW_COPY_AND_ASSIGN(UnionFind);
+};
+
+} // namespace fst
+
+#endif // __fst_union_find_inl_h__
diff --git a/src/include/fst/union.h b/src/include/fst/union.h
new file mode 100644
index 0000000..a2f97fb
--- /dev/null
+++ b/src/include/fst/union.h
@@ -0,0 +1,185 @@
+// union.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Functions and classes to compute the union of two FSTs.
+
+#ifndef FST_LIB_UNION_H__
+#define FST_LIB_UNION_H__
+
+#include <vector>
+using std::vector;
+#include <algorithm>
+
+#include <fst/mutable-fst.h>
+#include <fst/rational.h>
+
+
+namespace fst {
+
+// Computes the union (sum) of two FSTs. This version writes the
+// union to an output MurableFst. If A transduces string x to y with
+// weight a and B transduces string w to v with weight b, then their
+// union transduces x to y with weight a and w to v with weight b.
+//
+// Complexity:
+// - Time: (V2 + E2)
+// - Space: O(V2 + E2)
+// where Vi = # of states and Ei = # of arcs of the ith FST.
+template <class Arc>
+void Union(MutableFst<Arc> *fst1, const Fst<Arc> &fst2) {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ // TODO(riley): restore when voice actions issues fixed
+ // Check that the symbol table are compatible
+ if (!CompatSymbols(fst1->InputSymbols(), fst2.InputSymbols()) ||
+ !CompatSymbols(fst1->OutputSymbols(), fst2.OutputSymbols())) {
+ LOG(ERROR) << "Union: input/output symbol tables of 1st argument "
+ << "do not match input/output symbol tables of 2nd argument";
+ // fst1->SetProperties(kError, kError);
+ // return;
+ }
+
+ StateId numstates1 = fst1->NumStates();
+ bool initial_acyclic1 = fst1->Properties(kInitialAcyclic, true);
+ uint64 props1 = fst1->Properties(kFstProperties, false);
+ uint64 props2 = fst2.Properties(kFstProperties, false);
+
+ StateId start2 = fst2.Start();
+ if (start2 == kNoStateId) {
+ if (props2 & kError) fst1->SetProperties(kError, kError);
+ return;
+ }
+
+ if (fst2.Properties(kExpanded, false)) {
+ fst1->ReserveStates(
+ numstates1 + CountStates(fst2) + (initial_acyclic1 ? 0 : 1));
+ }
+
+ for (StateIterator< Fst<Arc> > siter(fst2);
+ !siter.Done();
+ siter.Next()) {
+ StateId s1 = fst1->AddState();
+ StateId s2 = siter.Value();
+ fst1->SetFinal(s1, fst2.Final(s2));
+ fst1->ReserveArcs(s1, fst2.NumArcs(s2));
+ for (ArcIterator< Fst<Arc> > aiter(fst2, s2);
+ !aiter.Done();
+ aiter.Next()) {
+ Arc arc = aiter.Value();
+ arc.nextstate += numstates1;
+ fst1->AddArc(s1, arc);
+ }
+ }
+ StateId start1 = fst1->Start();
+ if (start1 == kNoStateId) {
+ fst1->SetStart(start2);
+ fst1->SetProperties(props2, kCopyProperties);
+ return;
+ }
+
+ if (initial_acyclic1) {
+ fst1->AddArc(start1, Arc(0, 0, Weight::One(), start2 + numstates1));
+ } else {
+ StateId nstart1 = fst1->AddState();
+ fst1->SetStart(nstart1);
+ fst1->AddArc(nstart1, Arc(0, 0, Weight::One(), start1));
+ fst1->AddArc(nstart1, Arc(0, 0, Weight::One(), start2 + numstates1));
+ }
+ fst1->SetProperties(UnionProperties(props1, props2), kFstProperties);
+}
+
+
+// Computes the union of two FSTs; this version modifies its
+// RationalFst argument.
+template<class Arc>
+void Union(RationalFst<Arc> *fst1, const Fst<Arc> &fst2) {
+ fst1->GetImpl()->AddUnion(fst2);
+}
+
+
+typedef RationalFstOptions UnionFstOptions;
+
+
+// Computes the union (sum) of two FSTs. This version is a delayed
+// Fst. If A transduces string x to y with weight a and B transduces
+// string w to v with weight b, then their union transduces x to y
+// with weight a and w to v with weight b.
+//
+// Complexity:
+// - Time: O(v1 + e1 + v2 + e2)
+// - Sapce: O(v1 + v2)
+// where vi = # of states visited and ei = # of arcs visited of the
+// ith FST. Constant time and space to visit an input state or arc
+// is assumed and exclusive of caching.
+template <class A>
+class UnionFst : public RationalFst<A> {
+ public:
+ using ImplToFst< RationalFstImpl<A> >::GetImpl;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ UnionFst(const Fst<A> &fst1, const Fst<A> &fst2) {
+ GetImpl()->InitUnion(fst1, fst2);
+ }
+
+ UnionFst(const Fst<A> &fst1, const Fst<A> &fst2, const UnionFstOptions &opts)
+ : RationalFst<A>(opts) {
+ GetImpl()->InitUnion(fst1, fst2);
+ }
+
+ // See Fst<>::Copy() for doc.
+ UnionFst(const UnionFst<A> &fst, bool safe = false)
+ : RationalFst<A>(fst, safe) {}
+
+ // Get a copy of this UnionFst. See Fst<>::Copy() for further doc.
+ virtual UnionFst<A> *Copy(bool safe = false) const {
+ return new UnionFst<A>(*this, safe);
+ }
+};
+
+
+// Specialization for UnionFst.
+template <class A>
+class StateIterator< UnionFst<A> > : public StateIterator< RationalFst<A> > {
+ public:
+ explicit StateIterator(const UnionFst<A> &fst)
+ : StateIterator< RationalFst<A> >(fst) {}
+};
+
+
+// Specialization for UnionFst.
+template <class A>
+class ArcIterator< UnionFst<A> > : public ArcIterator< RationalFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const UnionFst<A> &fst, StateId s)
+ : ArcIterator< RationalFst<A> >(fst, s) {}
+};
+
+
+// Useful alias when using StdArc.
+typedef UnionFst<StdArc> StdUnionFst;
+
+} // namespace fst
+
+#endif // FST_LIB_UNION_H__
diff --git a/src/include/fst/util.h b/src/include/fst/util.h
new file mode 100644
index 0000000..87231e1
--- /dev/null
+++ b/src/include/fst/util.h
@@ -0,0 +1,409 @@
+// util.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// FST utility inline definitions.
+
+#ifndef FST_LIB_UTIL_H__
+#define FST_LIB_UTIL_H__
+
+#include <unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <unordered_set>
+using std::tr1::unordered_set;
+using std::tr1::unordered_multiset;
+#include <list>
+#include <map>
+#include <set>
+#include <sstream>
+#include <string>
+#include <vector>
+using std::vector;
+
+
+#include <fst/compat.h>
+#include <fst/types.h>
+
+#include <iostream>
+#include <fstream>
+
+//
+// UTILITY FOR ERROR HANDLING
+//
+
+DECLARE_bool(fst_error_fatal);
+
+#define FSTERROR() (FLAGS_fst_error_fatal ? LOG(FATAL) : LOG(ERROR))
+
+namespace fst {
+
+//
+// UTILITIES FOR TYPE I/O
+//
+
+// Read some types from an input stream.
+
+// Generic case.
+template <typename T>
+inline istream &ReadType(istream &strm, T *t) {
+ return t->Read(strm);
+}
+
+// Fixed size, contiguous memory read.
+#define READ_POD_TYPE(T) \
+inline istream &ReadType(istream &strm, T *t) { \
+ return strm.read(reinterpret_cast<char *>(t), sizeof(T)); \
+}
+
+READ_POD_TYPE(bool);
+READ_POD_TYPE(char);
+READ_POD_TYPE(signed char);
+READ_POD_TYPE(unsigned char);
+READ_POD_TYPE(short);
+READ_POD_TYPE(unsigned short);
+READ_POD_TYPE(int);
+READ_POD_TYPE(unsigned int);
+READ_POD_TYPE(long);
+READ_POD_TYPE(unsigned long);
+READ_POD_TYPE(long long);
+READ_POD_TYPE(unsigned long long);
+READ_POD_TYPE(float);
+READ_POD_TYPE(double);
+
+// String case.
+inline istream &ReadType(istream &strm, string *s) {
+ s->clear();
+ int32 ns = 0;
+ strm.read(reinterpret_cast<char *>(&ns), sizeof(ns));
+ for (int i = 0; i < ns; ++i) {
+ char c;
+ strm.read(&c, 1);
+ *s += c;
+ }
+ return strm;
+}
+
+// Pair case.
+template <typename S, typename T>
+inline istream &ReadType(istream &strm, pair<S, T> *p) {
+ ReadType(strm, &p->first);
+ ReadType(strm, &p->second);
+ return strm;
+}
+
+template <typename S, typename T>
+inline istream &ReadType(istream &strm, pair<const S, T> *p) {
+ ReadType(strm, const_cast<S *>(&p->first));
+ ReadType(strm, &p->second);
+ return strm;
+}
+
+// General case - no-op.
+template <typename C>
+void StlReserve(C *c, int64 n) {}
+
+// Specialization for vectors.
+template <typename S, typename T>
+void StlReserve(vector<S, T> *c, int64 n) {
+ c->reserve(n);
+}
+
+// STL sequence container.
+#define READ_STL_SEQ_TYPE(C) \
+template <typename S, typename T> \
+inline istream &ReadType(istream &strm, C<S, T> *c) { \
+ c->clear(); \
+ int64 n = 0; \
+ strm.read(reinterpret_cast<char *>(&n), sizeof(n)); \
+ StlReserve(c, n); \
+ for (ssize_t i = 0; i < n; ++i) { \
+ typename C<S, T>::value_type value; \
+ ReadType(strm, &value); \
+ c->insert(c->end(), value); \
+ } \
+ return strm; \
+}
+
+READ_STL_SEQ_TYPE(vector);
+READ_STL_SEQ_TYPE(list);
+
+// STL associative container.
+#define READ_STL_ASSOC_TYPE(C) \
+template <typename S, typename T, typename U> \
+inline istream &ReadType(istream &strm, C<S, T, U> *c) { \
+ c->clear(); \
+ int64 n = 0; \
+ strm.read(reinterpret_cast<char *>(&n), sizeof(n)); \
+ for (ssize_t i = 0; i < n; ++i) { \
+ typename C<S, T, U>::value_type value; \
+ ReadType(strm, &value); \
+ c->insert(value); \
+ } \
+ return strm; \
+}
+
+READ_STL_ASSOC_TYPE(set);
+READ_STL_ASSOC_TYPE(unordered_set);
+READ_STL_ASSOC_TYPE(map);
+READ_STL_ASSOC_TYPE(unordered_map);
+
+// Write some types to an output stream.
+
+// Generic case.
+template <typename T>
+inline ostream &WriteType(ostream &strm, const T t) {
+ t.Write(strm);
+ return strm;
+}
+
+// Fixed size, contiguous memory write.
+#define WRITE_POD_TYPE(T) \
+inline ostream &WriteType(ostream &strm, const T t) { \
+ return strm.write(reinterpret_cast<const char *>(&t), sizeof(T)); \
+}
+
+WRITE_POD_TYPE(bool);
+WRITE_POD_TYPE(char);
+WRITE_POD_TYPE(signed char);
+WRITE_POD_TYPE(unsigned char);
+WRITE_POD_TYPE(short);
+WRITE_POD_TYPE(unsigned short);
+WRITE_POD_TYPE(int);
+WRITE_POD_TYPE(unsigned int);
+WRITE_POD_TYPE(long);
+WRITE_POD_TYPE(unsigned long);
+WRITE_POD_TYPE(long long);
+WRITE_POD_TYPE(unsigned long long);
+WRITE_POD_TYPE(float);
+WRITE_POD_TYPE(double);
+
+// String case.
+inline ostream &WriteType(ostream &strm, const string &s) {
+ int32 ns = s.size();
+ strm.write(reinterpret_cast<const char *>(&ns), sizeof(ns));
+ return strm.write(s.data(), ns);
+}
+
+// Pair case.
+template <typename S, typename T>
+inline ostream &WriteType(ostream &strm, const pair<S, T> &p) {
+ WriteType(strm, p.first);
+ WriteType(strm, p.second);
+ return strm;
+}
+
+// STL sequence container.
+#define WRITE_STL_SEQ_TYPE(C) \
+template <typename S, typename T> \
+inline ostream &WriteType(ostream &strm, const C<S, T> &c) { \
+ int64 n = c.size(); \
+ strm.write(reinterpret_cast<char *>(&n), sizeof(n)); \
+ for (typename C<S, T>::const_iterator it = c.begin(); \
+ it != c.end(); ++it) \
+ WriteType(strm, *it); \
+ return strm; \
+}
+
+WRITE_STL_SEQ_TYPE(vector);
+WRITE_STL_SEQ_TYPE(list);
+
+// STL associative container.
+#define WRITE_STL_ASSOC_TYPE(C) \
+template <typename S, typename T, typename U> \
+inline ostream &WriteType(ostream &strm, const C<S, T, U> &c) { \
+ int64 n = c.size(); \
+ strm.write(reinterpret_cast<char *>(&n), sizeof(n)); \
+ for (typename C<S, T, U>::const_iterator it = c.begin(); \
+ it != c.end(); ++it) \
+ WriteType(strm, *it); \
+ return strm; \
+}
+
+WRITE_STL_ASSOC_TYPE(set);
+WRITE_STL_ASSOC_TYPE(unordered_set);
+WRITE_STL_ASSOC_TYPE(map);
+WRITE_STL_ASSOC_TYPE(unordered_map);
+
+// Utilities for converting between int64 or Weight and string.
+
+int64 StrToInt64(const string &s, const string &src, size_t nline,
+ bool allow_negative, bool *error = 0);
+
+template <typename Weight>
+Weight StrToWeight(const string &s, const string &src, size_t nline) {
+ Weight w;
+ istringstream strm(s);
+ strm >> w;
+ if (!strm) {
+ FSTERROR() << "StrToWeight: Bad weight = \"" << s
+ << "\", source = " << src << ", line = " << nline;
+ return Weight::NoWeight();
+ }
+ return w;
+}
+
+void Int64ToStr(int64 n, string *s);
+
+template <typename Weight>
+void WeightToStr(Weight w, string *s) {
+ ostringstream strm;
+ strm.precision(9);
+ strm << w;
+ *s += strm.str();
+}
+
+// Utilities for reading/writing label pairs
+
+// Returns true on success
+template <typename Label>
+bool ReadLabelPairs(const string& filename,
+ vector<pair<Label, Label> >* pairs,
+ bool allow_negative = false) {
+ ifstream strm(filename.c_str());
+
+ if (!strm) {
+ LOG(ERROR) << "ReadLabelPairs: Can't open file: " << filename;
+ return false;
+ }
+
+ const int kLineLen = 8096;
+ char line[kLineLen];
+ size_t nline = 0;
+
+ pairs->clear();
+ while (strm.getline(line, kLineLen)) {
+ ++nline;
+ vector<char *> col;
+ SplitToVector(line, "\n\t ", &col, true);
+ if (col.size() == 0 || col[0][0] == '\0') // empty line
+ continue;
+ if (col.size() != 2) {
+ LOG(ERROR) << "ReadLabelPairs: Bad number of columns, "
+ << "file = " << filename << ", line = " << nline;
+ return false;
+ }
+
+ bool err;
+ Label frmlabel = StrToInt64(col[0], filename, nline, allow_negative, &err);
+ if (err) return false;
+ Label tolabel = StrToInt64(col[1], filename, nline, allow_negative, &err);
+ if (err) return false;
+ pairs->push_back(make_pair(frmlabel, tolabel));
+ }
+ return true;
+}
+
+// Returns true on success
+template <typename Label>
+bool WriteLabelPairs(const string& filename,
+ const vector<pair<Label, Label> >& pairs) {
+ ostream *strm = &std::cout;
+ if (!filename.empty()) {
+ strm = new ofstream(filename.c_str());
+ if (!*strm) {
+ LOG(ERROR) << "WriteLabelPairs: Can't open file: " << filename;
+ return false;
+ }
+ }
+
+ for (ssize_t n = 0; n < pairs.size(); ++n)
+ *strm << pairs[n].first << "\t" << pairs[n].second << "\n";
+
+ if (!*strm) {
+ LOG(ERROR) << "WriteLabelPairs: Write failed: "
+ << (filename.empty() ? "standard output" : filename);
+ return false;
+ }
+ if (strm != &std::cout)
+ delete strm;
+ return true;
+}
+
+// Utilities for converting a type name to a legal C symbol.
+
+void ConvertToLegalCSymbol(string *s);
+
+
+//
+// UTILITIES FOR STREAM I/O
+//
+
+bool AlignInput(istream &strm, int align);
+bool AlignOutput(ostream &strm, int align);
+
+//
+// UTILITIES FOR PROTOCOL BUFFER I/O
+//
+
+
+// An associative container for which testing membership is
+// faster than an STL set if members are restricted to an interval
+// that excludes most non-members. A 'Key' must have ==, !=, and < defined.
+// Element 'NoKey' should be a key that marks an uninitialized key and
+// is otherwise unused. 'Find()' returns an STL const_iterator to the match
+// found, otherwise it equals 'End()'.
+template <class Key, Key NoKey>
+class CompactSet {
+public:
+ typedef typename set<Key>::const_iterator const_iterator;
+
+ CompactSet()
+ : min_key_(NoKey),
+ max_key_(NoKey) { }
+
+ CompactSet(const CompactSet<Key, NoKey> &compact_set)
+ : set_(compact_set.set_),
+ min_key_(compact_set.min_key_),
+ max_key_(compact_set.max_key_) { }
+
+ void Insert(Key key) {
+ set_.insert(key);
+ if (min_key_ == NoKey || key < min_key_)
+ min_key_ = key;
+ if (max_key_ == NoKey || max_key_ < key)
+ max_key_ = key;
+ }
+
+ void Clear() {
+ set_.clear();
+ min_key_ = max_key_ = NoKey;
+ }
+
+ const_iterator Find(Key key) const {
+ if (min_key_ == NoKey ||
+ key < min_key_ || max_key_ < key)
+ return set_.end();
+ else
+ return set_.find(key);
+ }
+
+ const_iterator Begin() const { return set_.begin(); }
+
+ const_iterator End() const { return set_.end(); }
+
+private:
+ set<Key> set_;
+ Key min_key_;
+ Key max_key_;
+
+ void operator=(const CompactSet<Key, NoKey> &); //disallow
+};
+
+} // namespace fst
+
+#endif // FST_LIB_UTIL_H__
diff --git a/src/include/fst/vector-fst.h b/src/include/fst/vector-fst.h
new file mode 100644
index 0000000..f6d8a6d
--- /dev/null
+++ b/src/include/fst/vector-fst.h
@@ -0,0 +1,727 @@
+// vector-fst.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Simple concrete, mutable FST whose states and arcs are stored in STL
+// vectors.
+
+#ifndef FST_LIB_VECTOR_FST_H__
+#define FST_LIB_VECTOR_FST_H__
+
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/mutable-fst.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+template <class A> class VectorFst;
+template <class F, class G> void Cast(const F &, G *);
+
+
+// States and arcs implemented by STL vectors, templated on the
+// State definition. This does not manage the Fst properties.
+template <class State>
+class VectorFstBaseImpl : public FstImpl<typename State::Arc> {
+ public:
+ typedef typename State::Arc Arc;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ VectorFstBaseImpl() : start_(kNoStateId) {}
+
+ ~VectorFstBaseImpl() {
+ for (StateId s = 0; s < states_.size(); ++s)
+ delete states_[s];
+ }
+
+ StateId Start() const { return start_; }
+
+ Weight Final(StateId s) const { return states_[s]->final; }
+
+ StateId NumStates() const { return states_.size(); }
+
+ size_t NumArcs(StateId s) const { return states_[s]->arcs.size(); }
+
+ void SetStart(StateId s) { start_ = s; }
+
+ void SetFinal(StateId s, Weight w) { states_[s]->final = w; }
+
+ StateId AddState() {
+ states_.push_back(new State);
+ return states_.size() - 1;
+ }
+
+ StateId AddState(State *state) {
+ states_.push_back(state);
+ return states_.size() - 1;
+ }
+
+ void AddArc(StateId s, const Arc &arc) {
+ states_[s]->arcs.push_back(arc);
+ }
+
+ void DeleteStates(const vector<StateId>& dstates) {
+ vector<StateId> newid(states_.size(), 0);
+ for (size_t i = 0; i < dstates.size(); ++i)
+ newid[dstates[i]] = kNoStateId;
+ StateId nstates = 0;
+ for (StateId s = 0; s < states_.size(); ++s) {
+ if (newid[s] != kNoStateId) {
+ newid[s] = nstates;
+ if (s != nstates)
+ states_[nstates] = states_[s];
+ ++nstates;
+ } else {
+ delete states_[s];
+ }
+ }
+ states_.resize(nstates);
+ for (StateId s = 0; s < states_.size(); ++s) {
+ vector<Arc> &arcs = states_[s]->arcs;
+ size_t narcs = 0;
+ for (size_t i = 0; i < arcs.size(); ++i) {
+ StateId t = newid[arcs[i].nextstate];
+ if (t != kNoStateId) {
+ arcs[i].nextstate = t;
+ if (i != narcs)
+ arcs[narcs] = arcs[i];
+ ++narcs;
+ } else {
+ if (arcs[i].ilabel == 0)
+ --states_[s]->niepsilons;
+ if (arcs[i].olabel == 0)
+ --states_[s]->noepsilons;
+ }
+ }
+ arcs.resize(narcs);
+ }
+ if (Start() != kNoStateId)
+ SetStart(newid[Start()]);
+ }
+
+ void DeleteStates() {
+ for (StateId s = 0; s < states_.size(); ++s)
+ delete states_[s];
+ states_.clear();
+ SetStart(kNoStateId);
+ }
+
+ void DeleteArcs(StateId s, size_t n) {
+ states_[s]->arcs.resize(states_[s]->arcs.size() - n);
+ }
+
+ void DeleteArcs(StateId s) { states_[s]->arcs.clear(); }
+
+ State *GetState(StateId s) { return states_[s]; }
+
+ const State *GetState(StateId s) const { return states_[s]; }
+
+ void SetState(StateId s, State *state) { states_[s] = state; }
+
+ void ReserveStates(StateId n) { states_.reserve(n); }
+
+ void ReserveArcs(StateId s, size_t n) { states_[s]->arcs.reserve(n); }
+
+ // Provide information needed for generic state iterator
+ void InitStateIterator(StateIteratorData<Arc> *data) const {
+ data->base = 0;
+ data->nstates = states_.size();
+ }
+
+ // Provide information needed for generic arc iterator
+ void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
+ data->base = 0;
+ data->narcs = states_[s]->arcs.size();
+ data->arcs = data->narcs > 0 ? &states_[s]->arcs[0] : 0;
+ data->ref_count = 0;
+ }
+
+ private:
+ vector<State *> states_; // States represenation.
+ StateId start_; // initial state
+
+ DISALLOW_COPY_AND_ASSIGN(VectorFstBaseImpl);
+};
+
+// Arcs implemented by an STL vector per state.
+template <class A>
+struct VectorState {
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ VectorState() : final(Weight::Zero()), niepsilons(0), noepsilons(0) {}
+
+ Weight final; // Final weight
+ vector<A> arcs; // Arcs represenation
+ size_t niepsilons; // # of input epsilons
+ size_t noepsilons; // # of output epsilons
+};
+
+// This is a VectorFstBaseImpl container that holds VectorState's. It
+// manages Fst properties and the # of input and output epsilons.
+template <class A>
+class VectorFstImpl : public VectorFstBaseImpl< VectorState<A> > {
+ public:
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::Properties;
+
+ using VectorFstBaseImpl<VectorState<A> >::Start;
+ using VectorFstBaseImpl<VectorState<A> >::NumStates;
+ using VectorFstBaseImpl<VectorState<A> >::GetState;
+ using VectorFstBaseImpl<VectorState<A> >::ReserveArcs;
+
+ friend class MutableArcIterator< VectorFst<A> >;
+
+ typedef VectorFstBaseImpl< VectorState<A> > BaseImpl;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ VectorFstImpl() {
+ SetType("vector");
+ SetProperties(kNullProperties | kStaticProperties);
+ }
+ explicit VectorFstImpl(const Fst<A> &fst);
+
+ static VectorFstImpl<A> *Read(istream &strm, const FstReadOptions &opts);
+
+ size_t NumInputEpsilons(StateId s) const { return GetState(s)->niepsilons; }
+
+ size_t NumOutputEpsilons(StateId s) const { return GetState(s)->noepsilons; }
+
+ void SetStart(StateId s) {
+ BaseImpl::SetStart(s);
+ SetProperties(SetStartProperties(Properties()));
+ }
+
+ void SetFinal(StateId s, Weight w) {
+ Weight ow = BaseImpl::Final(s);
+ BaseImpl::SetFinal(s, w);
+ SetProperties(SetFinalProperties(Properties(), ow, w));
+ }
+
+ StateId AddState() {
+ StateId s = BaseImpl::AddState();
+ SetProperties(AddStateProperties(Properties()));
+ return s;
+ }
+
+ void AddArc(StateId s, const A &arc) {
+ VectorState<A> *state = GetState(s);
+ if (arc.ilabel == 0) {
+ ++state->niepsilons;
+ }
+ if (arc.olabel == 0) {
+ ++state->noepsilons;
+ }
+
+ const A *parc = state->arcs.empty() ? 0 : &(state->arcs.back());
+ SetProperties(AddArcProperties(Properties(), s, arc, parc));
+
+ BaseImpl::AddArc(s, arc);
+ }
+
+ void DeleteStates(const vector<StateId> &dstates) {
+ BaseImpl::DeleteStates(dstates);
+ SetProperties(DeleteStatesProperties(Properties()));
+ }
+
+ void DeleteStates() {
+ BaseImpl::DeleteStates();
+ SetProperties(DeleteAllStatesProperties(Properties(),
+ kStaticProperties));
+ }
+
+ void DeleteArcs(StateId s, size_t n) {
+ const vector<A> &arcs = GetState(s)->arcs;
+ for (size_t i = 0; i < n; ++i) {
+ size_t j = arcs.size() - i - 1;
+ if (arcs[j].ilabel == 0)
+ --GetState(s)->niepsilons;
+ if (arcs[j].olabel == 0)
+ --GetState(s)->noepsilons;
+ }
+ BaseImpl::DeleteArcs(s, n);
+ SetProperties(DeleteArcsProperties(Properties()));
+ }
+
+ void DeleteArcs(StateId s) {
+ GetState(s)->niepsilons = 0;
+ GetState(s)->noepsilons = 0;
+ BaseImpl::DeleteArcs(s);
+ SetProperties(DeleteArcsProperties(Properties()));
+ }
+
+ private:
+ // Properties always true of this Fst class
+ static const uint64 kStaticProperties = kExpanded | kMutable;
+ // Current file format version
+ static const int kFileVersion = 2;
+ // Minimum file format version supported
+ static const int kMinFileVersion = 1;
+
+ DISALLOW_COPY_AND_ASSIGN(VectorFstImpl);
+};
+
+template <class A> const uint64 VectorFstImpl<A>::kStaticProperties;
+template <class A> const int VectorFstImpl<A>::kFileVersion;
+template <class A> const int VectorFstImpl<A>::kMinFileVersion;
+
+
+template <class A>
+VectorFstImpl<A>::VectorFstImpl(const Fst<A> &fst) {
+ SetType("vector");
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ BaseImpl::SetStart(fst.Start());
+ if (fst.Properties(kExpanded, false))
+ BaseImpl::ReserveStates(CountStates(fst));
+
+ for (StateIterator< Fst<A> > siter(fst);
+ !siter.Done();
+ siter.Next()) {
+ StateId s = siter.Value();
+ BaseImpl::AddState();
+ BaseImpl::SetFinal(s, fst.Final(s));
+ ReserveArcs(s, fst.NumArcs(s));
+ for (ArcIterator< Fst<A> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const A &arc = aiter.Value();
+ BaseImpl::AddArc(s, arc);
+ if (arc.ilabel == 0)
+ ++GetState(s)->niepsilons;
+ if (arc.olabel == 0)
+ ++GetState(s)->noepsilons;
+ }
+ }
+ SetProperties(fst.Properties(kCopyProperties, false) | kStaticProperties);
+}
+
+template <class A>
+VectorFstImpl<A> *VectorFstImpl<A>::Read(istream &strm,
+ const FstReadOptions &opts) {
+ VectorFstImpl<A> *impl = new VectorFstImpl;
+ FstHeader hdr;
+ if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
+ delete impl;
+ return 0;
+ }
+ impl->BaseImpl::SetStart(hdr.Start());
+ if (hdr.NumStates() != kNoStateId) {
+ impl->ReserveStates(hdr.NumStates());
+ }
+
+ StateId s = 0;
+ for (;hdr.NumStates() == kNoStateId || s < hdr.NumStates(); ++s) {
+ typename A::Weight final;
+ if (!final.Read(strm)) break;
+ impl->BaseImpl::AddState();
+ VectorState<A> *state = impl->GetState(s);
+ state->final = final;
+ int64 narcs;
+ ReadType(strm, &narcs);
+ if (!strm) {
+ LOG(ERROR) << "VectorFst::Read: read failed: " << opts.source;
+ delete impl;
+ return 0;
+ }
+ impl->ReserveArcs(s, narcs);
+ for (size_t j = 0; j < narcs; ++j) {
+ A arc;
+ ReadType(strm, &arc.ilabel);
+ ReadType(strm, &arc.olabel);
+ arc.weight.Read(strm);
+ ReadType(strm, &arc.nextstate);
+ if (!strm) {
+ LOG(ERROR) << "VectorFst::Read: read failed: " << opts.source;
+ delete impl;
+ return 0;
+ }
+ impl->BaseImpl::AddArc(s, arc);
+ if (arc.ilabel == 0)
+ ++state->niepsilons;
+ if (arc.olabel == 0)
+ ++state->noepsilons;
+ }
+ }
+ if (hdr.NumStates() != kNoStateId && s != hdr.NumStates()) {
+ LOG(ERROR) << "VectorFst::Read: unexpected end of file: " << opts.source;
+ delete impl;
+ return 0;
+ }
+ return impl;
+}
+
+// Converts a string into a weight.
+template <class W> class WeightFromString {
+ public:
+ W operator()(const string &s);
+};
+
+// Generic case fails.
+template <class W> inline
+W WeightFromString<W>::operator()(const string &s) {
+ FSTERROR() << "VectorFst::Read: Obsolete file format";
+ return W::NoWeight();
+}
+
+// TropicalWeight version.
+template <> inline
+TropicalWeight WeightFromString<TropicalWeight>::operator()(const string &s) {
+ float f;
+ memcpy(&f, s.data(), sizeof(f));
+ return TropicalWeight(f);
+}
+
+// LogWeight version.
+template <> inline
+LogWeight WeightFromString<LogWeight>::operator()(const string &s) {
+ float f;
+ memcpy(&f, s.data(), sizeof(f));
+ return LogWeight(f);
+}
+
+// Simple concrete, mutable FST. This class attaches interface to
+// implementation and handles reference counting, delegating most
+// methods to ImplToMutableFst. Supports additional operations:
+// ReserveStates and ReserveArcs (cf. STL vectors).
+template <class A>
+class VectorFst : public ImplToMutableFst< VectorFstImpl<A> > {
+ public:
+ friend class StateIterator< VectorFst<A> >;
+ friend class ArcIterator< VectorFst<A> >;
+ friend class MutableArcIterator< VectorFst<A> >;
+ template <class F, class G> friend void Cast(const F &, G *);
+
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef VectorFstImpl<A> Impl;
+
+ VectorFst() : ImplToMutableFst<Impl>(new Impl) {}
+
+ explicit VectorFst(const Fst<A> &fst)
+ : ImplToMutableFst<Impl>(new Impl(fst)) {}
+
+ VectorFst(const VectorFst<A> &fst) : ImplToMutableFst<Impl>(fst) {}
+
+ // Get a copy of this VectorFst. See Fst<>::Copy() for further doc.
+ virtual VectorFst<A> *Copy(bool safe = false) const {
+ return new VectorFst<A>(*this);
+ }
+
+ VectorFst<A> &operator=(const VectorFst<A> &fst) {
+ SetImpl(fst.GetImpl(), false);
+ return *this;
+ }
+
+ virtual VectorFst<A> &operator=(const Fst<A> &fst) {
+ if (this != &fst) SetImpl(new Impl(fst));
+ return *this;
+ }
+
+ // Read a VectorFst from an input stream; return NULL on error
+ static VectorFst<A> *Read(istream &strm, const FstReadOptions &opts) {
+ Impl* impl = Impl::Read(strm, opts);
+ return impl ? new VectorFst<A>(impl) : 0;
+ }
+
+ // Read a VectorFst from a file; return NULL on error
+ // Empty filename reads from standard input
+ static VectorFst<A> *Read(const string &filename) {
+ Impl* impl = ImplToExpandedFst<Impl, MutableFst<A> >::Read(filename);
+ return impl ? new VectorFst<A>(impl) : 0;
+ }
+
+ virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ return WriteFst(*this, strm, opts);
+ }
+
+ virtual bool Write(const string &filename) const {
+ return Fst<A>::WriteFile(filename);
+ }
+
+ template <class F>
+ static bool WriteFst(const F &fst, ostream &strm,
+ const FstWriteOptions &opts);
+
+ void ReserveStates(StateId n) {
+ MutateCheck();
+ GetImpl()->ReserveStates(n);
+ }
+
+ void ReserveArcs(StateId s, size_t n) {
+ MutateCheck();
+ GetImpl()->ReserveArcs(s, n);
+ }
+
+ virtual void InitStateIterator(StateIteratorData<Arc> *data) const {
+ GetImpl()->InitStateIterator(data);
+ }
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ virtual inline
+ void InitMutableArcIterator(StateId s, MutableArcIteratorData<A> *);
+
+ private:
+ explicit VectorFst(Impl *impl) : ImplToMutableFst<Impl>(impl) {}
+
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst< Impl, MutableFst<A> >::GetImpl(); }
+
+ void SetImpl(Impl *impl, bool own_impl = true) {
+ ImplToFst< Impl, MutableFst<A> >::SetImpl(impl, own_impl);
+ }
+
+ void MutateCheck() { return ImplToMutableFst<Impl>::MutateCheck(); }
+};
+
+// Specialization for VectorFst; see generic version in fst.h
+// for sample usage (but use the VectorFst type!). This version
+// should inline.
+template <class A>
+class StateIterator< VectorFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ explicit StateIterator(const VectorFst<A> &fst)
+ : nstates_(fst.GetImpl()->NumStates()), s_(0) {}
+
+ bool Done() const { return s_ >= nstates_; }
+
+ StateId Value() const { return s_; }
+
+ void Next() { ++s_; }
+
+ void Reset() { s_ = 0; }
+
+ private:
+ StateId nstates_;
+ StateId s_;
+
+ DISALLOW_COPY_AND_ASSIGN(StateIterator);
+};
+
+// Writes Fst to file, will call CountStates so may involve two passes if
+// called from an Fst that is not derived from Expanded.
+template <class A>
+template <class F>
+bool VectorFst<A>::WriteFst(const F &fst, ostream &strm,
+ const FstWriteOptions &opts) {
+ static const int kFileVersion = 2;
+ bool update_header = true;
+ FstHeader hdr;
+ hdr.SetStart(fst.Start());
+ hdr.SetNumStates(kNoStateId);
+ size_t start_offset = 0;
+ if (fst.Properties(kExpanded, false) || (start_offset = strm.tellp()) != -1) {
+ hdr.SetNumStates(CountStates(fst));
+ update_header = false;
+ }
+ FstImpl<A>::WriteFstHeader(fst, strm, opts, kFileVersion, "vector", &hdr);
+ StateId num_states = 0;
+ for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
+ typename A::StateId s = siter.Value();
+ fst.Final(s).Write(strm);
+ int64 narcs = fst.NumArcs(s);
+ WriteType(strm, narcs);
+ for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
+ const A &arc = aiter.Value();
+ WriteType(strm, arc.ilabel);
+ WriteType(strm, arc.olabel);
+ arc.weight.Write(strm);
+ WriteType(strm, arc.nextstate);
+ }
+ num_states++;
+ }
+ strm.flush();
+ if (!strm) {
+ LOG(ERROR) << "VectorFst::Write: write failed: " << opts.source;
+ return false;
+ }
+ if (update_header) {
+ hdr.SetNumStates(num_states);
+ return FstImpl<A>::UpdateFstHeader(fst, strm, opts, kFileVersion, "vector",
+ &hdr, start_offset);
+ } else {
+ if (num_states != hdr.NumStates()) {
+ LOG(ERROR) << "Inconsistent number of states observed during write";
+ return false;
+ }
+ }
+ return true;
+}
+
+// Specialization for VectorFst; see generic version in fst.h
+// for sample usage (but use the VectorFst type!). This version
+// should inline.
+template <class A>
+class ArcIterator< VectorFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const VectorFst<A> &fst, StateId s)
+ : arcs_(fst.GetImpl()->GetState(s)->arcs), i_(0) {}
+
+ bool Done() const { return i_ >= arcs_.size(); }
+
+ const A& Value() const { return arcs_[i_]; }
+
+ void Next() { ++i_; }
+
+ void Reset() { i_ = 0; }
+
+ void Seek(size_t a) { i_ = a; }
+
+ size_t Position() const { return i_; }
+
+ uint32 Flags() const {
+ return kArcValueFlags;
+ }
+
+ void SetFlags(uint32 f, uint32 m) {}
+
+ private:
+ const vector<A>& arcs_;
+ size_t i_;
+
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+// Specialization for VectorFst; see generic version in fst.h
+// for sample usage (but use the VectorFst type!). This version
+// should inline.
+template <class A>
+class MutableArcIterator< VectorFst<A> >
+ : public MutableArcIteratorBase<A> {
+ public:
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ MutableArcIterator(VectorFst<A> *fst, StateId s) : i_(0) {
+ fst->MutateCheck();
+ state_ = fst->GetImpl()->GetState(s);
+ properties_ = &fst->GetImpl()->properties_;
+ }
+
+ bool Done() const { return i_ >= state_->arcs.size(); }
+
+ const A& Value() const { return state_->arcs[i_]; }
+
+ void Next() { ++i_; }
+
+ size_t Position() const { return i_; }
+
+ void Reset() { i_ = 0; }
+
+ void Seek(size_t a) { i_ = a; }
+
+ void SetValue(const A &arc) {
+ A& oarc = state_->arcs[i_];
+ if (oarc.ilabel != oarc.olabel)
+ *properties_ &= ~kNotAcceptor;
+ if (oarc.ilabel == 0) {
+ --state_->niepsilons;
+ *properties_ &= ~kIEpsilons;
+ if (oarc.olabel == 0)
+ *properties_ &= ~kEpsilons;
+ }
+ if (oarc.olabel == 0) {
+ --state_->noepsilons;
+ *properties_ &= ~kOEpsilons;
+ }
+ if (oarc.weight != Weight::Zero() && oarc.weight != Weight::One())
+ *properties_ &= ~kWeighted;
+ oarc = arc;
+ if (arc.ilabel != arc.olabel) {
+ *properties_ |= kNotAcceptor;
+ *properties_ &= ~kAcceptor;
+ }
+ if (arc.ilabel == 0) {
+ ++state_->niepsilons;
+ *properties_ |= kIEpsilons;
+ *properties_ &= ~kNoIEpsilons;
+ if (arc.olabel == 0) {
+ *properties_ |= kEpsilons;
+ *properties_ &= ~kNoEpsilons;
+ }
+ }
+ if (arc.olabel == 0) {
+ ++state_->noepsilons;
+ *properties_ |= kOEpsilons;
+ *properties_ &= ~kNoOEpsilons;
+ }
+ if (arc.weight != Weight::Zero() && arc.weight != Weight::One()) {
+ *properties_ |= kWeighted;
+ *properties_ &= ~kUnweighted;
+ }
+ *properties_ &= kSetArcProperties | kAcceptor | kNotAcceptor |
+ kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons |
+ kOEpsilons | kNoOEpsilons | kWeighted | kUnweighted;
+ }
+
+ uint32 Flags() const {
+ return kArcValueFlags;
+ }
+
+ void SetFlags(uint32 f, uint32 m) {}
+
+
+ private:
+ // This allows base-class virtual access to non-virtual derived-
+ // class members of the same name. It makes the derived class more
+ // efficient to use but unsafe to further derive.
+ virtual bool Done_() const { return Done(); }
+ virtual const A& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+ virtual size_t Position_() const { return Position(); }
+ virtual void Reset_() { Reset(); }
+ virtual void Seek_(size_t a) { Seek(a); }
+ virtual void SetValue_(const A &a) { SetValue(a); }
+ uint32 Flags_() const { return Flags(); }
+ void SetFlags_(uint32 f, uint32 m) { SetFlags(f, m); }
+
+ struct VectorState<A> *state_;
+ uint64 *properties_;
+ size_t i_;
+
+ DISALLOW_COPY_AND_ASSIGN(MutableArcIterator);
+};
+
+// Provide information needed for the generic mutable arc iterator
+template <class A> inline
+void VectorFst<A>::InitMutableArcIterator(
+ StateId s, MutableArcIteratorData<A> *data) {
+ data->base = new MutableArcIterator< VectorFst<A> >(this, s);
+}
+
+// A useful alias when using StdArc.
+typedef VectorFst<StdArc> StdVectorFst;
+
+} // namespace fst
+
+#endif // FST_LIB_VECTOR_FST_H__
diff --git a/src/include/fst/verify.h b/src/include/fst/verify.h
new file mode 100644
index 0000000..576cfca
--- /dev/null
+++ b/src/include/fst/verify.h
@@ -0,0 +1,126 @@
+// verify.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Function to verify an Fst's contents
+
+#ifndef FST_LIB_VERIFY_H__
+#define FST_LIB_VERIFY_H__
+
+#include <fst/fst.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+// Verifies that an Fst's contents are sane.
+template<class Arc>
+bool Verify(const Fst<Arc> &fst, bool allow_negative_labels = false) {
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+
+ StateId start = fst.Start();
+ const SymbolTable *isyms = fst.InputSymbols();
+ const SymbolTable *osyms = fst.OutputSymbols();
+
+ // Count states
+ StateId ns = 0;
+ for (StateIterator< Fst<Arc> > siter(fst);
+ !siter.Done();
+ siter.Next())
+ ++ns;
+
+ if (start == kNoStateId && ns > 0) {
+ LOG(ERROR) << "Verify: Fst start state ID unset";
+ return false;
+ } else if (start >= ns) {
+ LOG(ERROR) << "Verify: Fst start state ID exceeds number of states";
+ return false;
+ }
+
+ for (StateIterator< Fst<Arc> > siter(fst);
+ !siter.Done();
+ siter.Next()) {
+ StateId s = siter.Value();
+ size_t na = 0;
+ for (ArcIterator< Fst<Arc> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const Arc &arc =aiter.Value();
+ if (!allow_negative_labels && arc.ilabel < 0) {
+ LOG(ERROR) << "Verify: Fst input label ID of arc at position "
+ << na << " of state " << s << " is negative";
+ return false;
+ } else if (isyms && isyms->Find(arc.ilabel) == "") {
+ LOG(ERROR) << "Verify: Fst input label ID " << arc.ilabel
+ << " of arc at position " << na << " of state " << s
+ << " is missing from input symbol table \""
+ << isyms->Name() << "\"";
+ return false;
+ } else if (!allow_negative_labels && arc.olabel < 0) {
+ LOG(ERROR) << "Verify: Fst output label ID of arc at position "
+ << na << " of state " << s << " is negative";
+ return false;
+ } else if (osyms && osyms->Find(arc.olabel) == "") {
+ LOG(ERROR) << "Verify: Fst output label ID " << arc.olabel
+ << " of arc at position " << na << " of state " << s
+ << " is missing from output symbol table \""
+ << osyms->Name() << "\"";
+ return false;
+ } else if (!arc.weight.Member() || arc.weight == Weight::Zero()) {
+ LOG(ERROR) << "Verify: Fst weight of arc at position "
+ << na << " of state " << s << " is invalid";
+ return false;
+ } else if (arc.nextstate < 0) {
+ LOG(ERROR) << "Verify: Fst destination state ID of arc at position "
+ << na << " of state " << s << " is negative";
+ return false;
+ } else if (arc.nextstate >= ns) {
+ LOG(ERROR) << "Verify: Fst destination state ID of arc at position "
+ << na << " of state " << s
+ << " exceeds number of states";
+ return false;
+ }
+ ++na;
+ }
+ if (!fst.Final(s).Member()) {
+ LOG(ERROR) << "Verify: Fst final weight of state " << s << " is invalid";
+ return false;
+ }
+ }
+ uint64 fst_props = fst.Properties(kFstProperties, false);
+ if (fst_props & kError) {
+ LOG(ERROR) << "Verify: Fst error property is set";
+ return false;
+ }
+
+ uint64 known_props;
+ uint64 test_props = ComputeProperties(fst, kFstProperties, &known_props,
+ false);
+ if (!CompatProperties(fst_props, test_props)) {
+ LOG(ERROR) << "Verify: stored Fst properties incorrect "
+ << "(props1 = stored props, props2 = tested)";
+ return false;
+ } else {
+ return true;
+ }
+}
+
+} // namespace fst
+
+#endif // FST_LIB_VERIFY_H__
diff --git a/src/include/fst/visit.h b/src/include/fst/visit.h
new file mode 100644
index 0000000..31a00a8
--- /dev/null
+++ b/src/include/fst/visit.h
@@ -0,0 +1,270 @@
+// visit.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Queue-dependent visitation of finite-state transducers. See also
+// dfs-visit.h.
+
+#ifndef FST_LIB_VISIT_H__
+#define FST_LIB_VISIT_H__
+
+
+#include <fst/arcfilter.h>
+#include <fst/mutable-fst.h>
+
+
+namespace fst {
+
+// Visitor Interface - class determines actions taken during a visit.
+// If any of the boolean member functions return false, the visit is
+// aborted by first calling FinishState() on all unfinished (grey)
+// states and then calling FinishVisit().
+//
+// Note this is more general than the visitor interface in
+// dfs-visit.h but lacks some DFS-specific behavior.
+//
+// template <class Arc>
+// class Visitor {
+// public:
+// typedef typename Arc::StateId StateId;
+//
+// Visitor(T *return_data);
+// // Invoked before visit
+// void InitVisit(const Fst<Arc> &fst);
+// // Invoked when state discovered (2nd arg is visitation root)
+// bool InitState(StateId s, StateId root);
+// // Invoked when arc to white/undiscovered state examined
+// bool WhiteArc(StateId s, const Arc &a);
+// // Invoked when arc to grey/unfinished state examined
+// bool GreyArc(StateId s, const Arc &a);
+// // Invoked when arc to black/finished state examined
+// bool BlackArc(StateId s, const Arc &a);
+// // Invoked when state finished.
+// void FinishState(StateId s);
+// // Invoked after visit
+// void FinishVisit();
+// };
+
+// Performs queue-dependent visitation. Visitor class argument
+// determines actions and contains any return data. ArcFilter
+// determines arcs that are considered.
+//
+// Note this is more general than DfsVisit() in dfs-visit.h but lacks
+// some DFS-specific Visitor behavior.
+template <class Arc, class V, class Q, class ArcFilter>
+void Visit(const Fst<Arc> &fst, V *visitor, Q *queue, ArcFilter filter) {
+
+ typedef typename Arc::StateId StateId;
+ typedef ArcIterator< Fst<Arc> > AIterator;
+
+ visitor->InitVisit(fst);
+
+ StateId start = fst.Start();
+ if (start == kNoStateId) {
+ visitor->FinishVisit();
+ return;
+ }
+
+ // An Fst state's visit color
+ const unsigned kWhiteState = 0x01; // Undiscovered
+ const unsigned kGreyState = 0x02; // Discovered & unfinished
+ const unsigned kBlackState = 0x04; // Finished
+
+ // We destroy an iterator as soon as possible and mark it so
+ const unsigned kArcIterDone = 0x08; // Arc iterator done and destroyed
+
+ vector<unsigned char> state_status;
+ vector<AIterator *> arc_iterator;
+
+ StateId nstates = start + 1; // # of known states in general case
+ bool expanded = false;
+ if (fst.Properties(kExpanded, false)) { // tests if expanded case, then
+ nstates = CountStates(fst); // uses ExpandedFst::NumStates().
+ expanded = true;
+ }
+
+ state_status.resize(nstates, kWhiteState);
+ arc_iterator.resize(nstates);
+ StateIterator< Fst<Arc> > siter(fst);
+
+ // Continues visit while true
+ bool visit = true;
+
+ // Iterates over trees in visit forest.
+ for (StateId root = start; visit && root < nstates;) {
+ visit = visitor->InitState(root, root);
+ state_status[root] = kGreyState;
+ queue->Enqueue(root);
+ while (!queue->Empty()) {
+ StateId s = queue->Head();
+ if (s >= state_status.size()) {
+ nstates = s + 1;
+ state_status.resize(nstates, kWhiteState);
+ arc_iterator.resize(nstates);
+ }
+ // Creates arc iterator if needed.
+ if (arc_iterator[s] == 0 && !(state_status[s] & kArcIterDone) && visit)
+ arc_iterator[s] = new AIterator(fst, s);
+ // Deletes arc iterator if done.
+ AIterator *aiter = arc_iterator[s];
+ if ((aiter && aiter->Done()) || !visit) {
+ delete aiter;
+ arc_iterator[s] = 0;
+ state_status[s] |= kArcIterDone;
+ }
+ // Dequeues state and marks black if done
+ if (state_status[s] & kArcIterDone) {
+ queue->Dequeue();
+ visitor->FinishState(s);
+ state_status[s] = kBlackState;
+ continue;
+ }
+
+ const Arc &arc = aiter->Value();
+ if (arc.nextstate >= state_status.size()) {
+ nstates = arc.nextstate + 1;
+ state_status.resize(nstates, kWhiteState);
+ arc_iterator.resize(nstates);
+ }
+ // Visits respective arc types
+ if (filter(arc)) {
+ // Enqueues destination state and marks grey if white
+ if (state_status[arc.nextstate] == kWhiteState) {
+ visit = visitor->WhiteArc(s, arc);
+ if (!visit) continue;
+ visit = visitor->InitState(arc.nextstate, root);
+ state_status[arc.nextstate] = kGreyState;
+ queue->Enqueue(arc.nextstate);
+ } else if (state_status[arc.nextstate] == kBlackState) {
+ visit = visitor->BlackArc(s, arc);
+ } else {
+ visit = visitor->GreyArc(s, arc);
+ }
+ }
+ aiter->Next();
+ // Destroys an iterator ASAP for efficiency.
+ if (aiter->Done()) {
+ delete aiter;
+ arc_iterator[s] = 0;
+ state_status[s] |= kArcIterDone;
+ }
+ }
+ // Finds next tree root
+ for (root = root == start ? 0 : root + 1;
+ root < nstates && state_status[root] != kWhiteState;
+ ++root);
+
+ // Check for a state beyond the largest known state
+ if (!expanded && root == nstates) {
+ for (; !siter.Done(); siter.Next()) {
+ if (siter.Value() == nstates) {
+ ++nstates;
+ state_status.push_back(kWhiteState);
+ arc_iterator.push_back(0);
+ break;
+ }
+ }
+ }
+ }
+ visitor->FinishVisit();
+}
+
+
+template <class Arc, class V, class Q>
+inline void Visit(const Fst<Arc> &fst, V *visitor, Q* queue) {
+ Visit(fst, visitor, queue, AnyArcFilter<Arc>());
+}
+
+// Copies input FST to mutable FST following queue order.
+template <class A>
+class CopyVisitor {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+
+ CopyVisitor(MutableFst<Arc> *ofst) : ifst_(0), ofst_(ofst) {}
+
+ void InitVisit(const Fst<A> &ifst) {
+ ifst_ = &ifst;
+ ofst_->DeleteStates();
+ ofst_->SetStart(ifst_->Start());
+ }
+
+ bool InitState(StateId s, StateId) {
+ while (ofst_->NumStates() <= s)
+ ofst_->AddState();
+ return true;
+ }
+
+ bool WhiteArc(StateId s, const Arc &arc) {
+ ofst_->AddArc(s, arc);
+ return true;
+ }
+
+ bool GreyArc(StateId s, const Arc &arc) {
+ ofst_->AddArc(s, arc);
+ return true;
+ }
+
+ bool BlackArc(StateId s, const Arc &arc) {
+ ofst_->AddArc(s, arc);
+ return true;
+ }
+
+ void FinishState(StateId s) {
+ ofst_->SetFinal(s, ifst_->Final(s));
+ }
+
+ void FinishVisit() {}
+
+ private:
+ const Fst<Arc> *ifst_;
+ MutableFst<Arc> *ofst_;
+};
+
+
+// Visits input FST up to a state limit following queue order.
+template <class A>
+class PartialVisitor {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+
+ explicit PartialVisitor(StateId maxvisit) : maxvisit_(maxvisit) {}
+
+ void InitVisit(const Fst<A> &ifst) { nvisit_ = 0; }
+
+ bool InitState(StateId s, StateId) {
+ ++nvisit_;
+ return nvisit_ <= maxvisit_;
+ }
+
+ bool WhiteArc(StateId s, const Arc &arc) { return true; }
+ bool GreyArc(StateId s, const Arc &arc) { return true; }
+ bool BlackArc(StateId s, const Arc &arc) { return true; }
+ void FinishState(StateId s) {}
+ void FinishVisit() {}
+
+ private:
+ StateId maxvisit_;
+ StateId nvisit_;
+};
+
+
+} // namespace fst
+
+#endif // FST_LIB_VISIT_H__
diff --git a/src/include/fst/weight.h b/src/include/fst/weight.h
new file mode 100644
index 0000000..72f5a22
--- /dev/null
+++ b/src/include/fst/weight.h
@@ -0,0 +1,179 @@
+// weight.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// General weight set and associated semiring operation definitions.
+//
+// A semiring is specified by two binary operations Plus and Times and
+// two designated elements Zero and One with the following properties:
+// Plus: associative, commutative, and has Zero as its identity.
+// Times: associative and has identity One, distributes w.r.t. Plus, and
+// has Zero as an annihilator:
+// Times(Zero(), a) == Times(a, Zero()) = Zero().
+//
+// A left semiring distributes on the left; a right semiring is
+// similarly defined.
+//
+// A Weight class is required to be (at least) a left or right semiring.
+//
+// In addition, the following should be defined for a Weight:
+// Member: predicate on set membership.
+// NoWeight: returns an element that is not a member, should only be
+// used to signal an error.
+// >>: reads weight.
+// <<: prints weight.
+// Read(istream &strm): reads from an input stream.
+// Write(ostream &strm): writes to an output stream.
+// Hash: maps weight to size_t.
+// ApproxEqual: approximate equality (for inexact weights)
+// Quantize: quantizes wrt delta (for inexact weights)
+// Divide: for all a,b,c s.t. Times(a, b) == c
+// --> b' = Divide(c, a, DIVIDE_LEFT) if a left semiring, b'.Member()
+// and Times(a, b') == c
+// --> a' = Divide(c, b, DIVIDE_RIGHT) if a right semiring, a'.Member()
+// and Times(a', b) == c
+// --> b' = Divide(c, a)
+// = Divide(c, a, DIVIDE_ANY)
+// = Divide(c, a, DIVIDE_LEFT)
+// = Divide(c, a, DIVIDE_RIGHT) if a commutative semiring,
+// b'.Member() and Times(a, b') == Times(b', a) == c
+// ReverseWeight: the type of the corresponding reverse weight.
+// Typically the same type as Weight for a (both left and right) semiring.
+// For the left string semiring, it is the right string semiring.
+// Reverse: a mapping from Weight to ReverseWeight s.t.
+// --> Reverse(Reverse(a)) = a
+// --> Reverse(Plus(a, b)) = Plus(Reverse(a), Reverse(b))
+// --> Reverse(Times(a, b)) = Times(Reverse(b), Reverse(a))
+// Typically the identity mapping in a (both left and right) semiring.
+// In the left string semiring, it maps to the reverse string
+// in the right string semiring.
+// Properties: specifies additional properties that hold:
+// LeftSemiring: indicates weights form a left semiring.
+// RightSemiring: indicates weights form a right semiring.
+// Commutative: for all a,b: Times(a,b) == Times(b,a)
+// Idempotent: for all a: Plus(a, a) == a.
+// Path Property: for all a, b: Plus(a, b) == a or Plus(a, b) == b.
+
+
+#ifndef FST_LIB_WEIGHT_H__
+#define FST_LIB_WEIGHT_H__
+
+#include <cmath>
+#include <cctype>
+#include <iostream>
+#include <sstream>
+
+#include <fst/compat.h>
+
+#include <fst/util.h>
+
+
+namespace fst {
+
+//
+// CONSTANT DEFINITIONS
+//
+
+// A representable float near .001
+const float kDelta = 1.0F/1024.0F;
+
+// For all a,b,c: Times(c, Plus(a,b)) = Plus(Times(c,a), Times(c, b))
+const uint64 kLeftSemiring = 0x0000000000000001ULL;
+
+// For all a,b,c: Times(Plus(a,b), c) = Plus(Times(a,c), Times(b, c))
+const uint64 kRightSemiring = 0x0000000000000002ULL;
+
+const uint64 kSemiring = kLeftSemiring | kRightSemiring;
+
+// For all a,b: Times(a,b) = Times(b,a)
+const uint64 kCommutative = 0x0000000000000004ULL;
+
+// For all a: Plus(a, a) = a
+const uint64 kIdempotent = 0x0000000000000008ULL;
+
+// For all a,b: Plus(a,b) = a or Plus(a,b) = b
+const uint64 kPath = 0x0000000000000010ULL;
+
+
+// Determines direction of division.
+enum DivideType { DIVIDE_LEFT, // left division
+ DIVIDE_RIGHT, // right division
+ DIVIDE_ANY }; // division in a commutative semiring
+
+// NATURAL ORDER
+//
+// By definition:
+// a <= b iff a + b = a
+// The natural order is a negative partial order iff the semiring is
+// idempotent. It is trivially monotonic for plus. It is left
+// (resp. right) monotonic for times iff the semiring is left
+// (resp. right) distributive. It is a total order iff the semiring
+// has the path property. See Mohri, "Semiring Framework and
+// Algorithms for Shortest-Distance Problems", Journal of Automata,
+// Languages and Combinatorics 7(3):321-350, 2002. We define the
+// strict version of this order below.
+
+template <class W>
+class NaturalLess {
+ public:
+ typedef W Weight;
+
+ NaturalLess() {
+ if (!(W::Properties() & kIdempotent)) {
+ FSTERROR() << "NaturalLess: Weight type is not idempotent: "
+ << W::Type();
+ }
+ }
+
+ bool operator()(const W &w1, const W &w2) const {
+ return (Plus(w1, w2) == w1) && w1 != w2;
+ }
+};
+
+
+// Power is the iterated product for arbitrary semirings such that
+// Power(w, 0) is One() for the semiring, and
+// Power(w, n) = Times(Power(w, n-1), w)
+
+template <class W>
+W Power(W w, size_t n) {
+ W result = W::One();
+ for (size_t i = 0; i < n; ++i) {
+ result = Times(result, w);
+ }
+ return result;
+}
+
+// General weight converter - raises error.
+template <class W1, class W2>
+struct WeightConvert {
+ W2 operator()(W1 w1) const {
+ FSTERROR() << "WeightConvert: can't convert weight from \""
+ << W1::Type() << "\" to \"" << W2::Type();
+ return W2::NoWeight();
+ }
+};
+
+// Specialized weight converter to self.
+template <class W>
+struct WeightConvert<W, W> {
+ W operator()(W w) const { return w; }
+};
+
+} // namespace fst
+
+#endif // FST_LIB_WEIGHT_H__
diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am
new file mode 100644
index 0000000..9c3ef63
--- /dev/null
+++ b/src/lib/Makefile.am
@@ -0,0 +1,6 @@
+AM_CPPFLAGS = -I$(srcdir)/../include $(ICU_CPPFLAGS)
+
+lib_LTLIBRARIES = libfst.la
+libfst_la_SOURCES = compat.cc flags.cc fst.cc properties.cc \
+symbol-table.cc util.cc symbol-table-ops.cc
+libfst_la_LDFLAGS = -version-info 0:0:0
diff --git a/src/lib/Makefile.in b/src/lib/Makefile.in
new file mode 100644
index 0000000..69f3928
--- /dev/null
+++ b/src/lib/Makefile.in
@@ -0,0 +1,540 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = src/lib
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_icu.m4 \
+ $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+ $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+ $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h \
+ $(top_builddir)/src/include/fst/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__installdirs = "$(DESTDIR)$(libdir)"
+LTLIBRARIES = $(lib_LTLIBRARIES)
+libfst_la_LIBADD =
+am_libfst_la_OBJECTS = compat.lo flags.lo fst.lo properties.lo \
+ symbol-table.lo util.lo symbol-table-ops.lo
+libfst_la_OBJECTS = $(am_libfst_la_OBJECTS)
+libfst_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(libfst_la_LDFLAGS) $(LDFLAGS) -o $@
+DEFAULT_INCLUDES =
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+SOURCES = $(libfst_la_SOURCES)
+DIST_SOURCES = $(libfst_la_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+ICU_CFLAGS = @ICU_CFLAGS@
+ICU_CONFIG = @ICU_CONFIG@
+ICU_CPPFLAGS = @ICU_CPPFLAGS@
+ICU_CXXFLAGS = @ICU_CXXFLAGS@
+ICU_LIBS = @ICU_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+libfstdir = @libfstdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lt_ECHO = @lt_ECHO@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AM_CPPFLAGS = -I$(srcdir)/../include $(ICU_CPPFLAGS)
+lib_LTLIBRARIES = libfst.la
+libfst_la_SOURCES = compat.cc flags.cc fst.cc properties.cc \
+symbol-table.cc util.cc symbol-table-ops.cc
+
+libfst_la_LDFLAGS = -version-info 0:0:0
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .cc .lo .o .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/lib/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/lib/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-libLTLIBRARIES: $(lib_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)"
+ @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
+ list2=; for p in $$list; do \
+ if test -f $$p; then \
+ list2="$$list2 $$p"; \
+ else :; fi; \
+ done; \
+ test -z "$$list2" || { \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
+ }
+
+uninstall-libLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
+ for p in $$list; do \
+ $(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \
+ done
+
+clean-libLTLIBRARIES:
+ -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test "$$dir" != "$$p" || dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+libfst.la: $(libfst_la_OBJECTS) $(libfst_la_DEPENDENCIES)
+ $(libfst_la_LINK) -rpath $(libdir) $(libfst_la_OBJECTS) $(libfst_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compat.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/flags.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fst.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/properties.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/symbol-table-ops.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/symbol-table.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/util.Plo@am__quote@
+
+.cc.o:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $<
+
+.cc.obj:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cc.lo:
+@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(LTCXXCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+ for dir in "$(DESTDIR)$(libdir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \
+ mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-libLTLIBRARIES
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-libLTLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+ clean-libLTLIBRARIES clean-libtool ctags distclean \
+ distclean-compile distclean-generic distclean-libtool \
+ distclean-tags distdir dvi dvi-am html html-am info info-am \
+ install install-am install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am \
+ install-libLTLIBRARIES install-man install-pdf install-pdf-am \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am uninstall-libLTLIBRARIES
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/lib/compat.cc b/src/lib/compat.cc
new file mode 100644
index 0000000..2faabc5
--- /dev/null
+++ b/src/lib/compat.cc
@@ -0,0 +1,44 @@
+// compat.cc
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Google compatibility definitions.
+
+#include <cstring>
+#include <fst/compat.h>
+
+using namespace std;
+
+void FailedNewHandler() {
+ cerr << "Memory allocation failed\n";
+ exit(1);
+}
+
+namespace fst {
+
+void SplitToVector(char* full, const char* delim, vector<char*>* vec,
+ bool omit_empty_strings) {
+ char *p = full;
+ while (p) {
+ if (p = strpbrk(full, delim))
+ p[0] = '\0';
+ if (!omit_empty_strings || full[0] != '\0')
+ vec->push_back(full);
+ if (p)
+ full = p + 1;
+ }
+}
+} // namespace fst
diff --git a/src/lib/flags.cc b/src/lib/flags.cc
new file mode 100644
index 0000000..d05fd73
--- /dev/null
+++ b/src/lib/flags.cc
@@ -0,0 +1,103 @@
+// flags.cc
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Google-style flag handling definitions
+
+#include <cstring>
+
+#include <fst/compat.h>
+#include <fst/flags.h>
+
+DEFINE_int32(v, 0, "verbose level");
+DEFINE_bool(help, false, "show usage information");
+DEFINE_string(tmpdir, "/tmp/", "temporary directory");
+
+using namespace std;
+
+static string flag_usage;
+
+void SetFlags(const char *usage, int *argc, char ***argv, bool remove_flags) {
+ flag_usage = usage;
+ int index = 1;
+ for (; index < *argc; ++index) {
+ string argval = (*argv)[index];
+
+ if (argval[0] != '-' || argval == "-")
+ break;
+ while (argval[0] == '-')
+ argval = argval.substr(1); // remove initial '-'s
+
+ string arg = argval;
+ string val = "";
+
+ // split argval (arg=val) into arg and val
+ int pos = argval.find("=");
+ if (pos != string::npos) {
+ arg = argval.substr(0, pos);
+ val = argval.substr(pos + 1);
+ }
+
+ FlagRegister<bool> *bool_register =
+ FlagRegister<bool>::GetRegister();
+ if (bool_register->SetFlag(arg, val))
+ continue;
+ FlagRegister<string> *string_register =
+ FlagRegister<string>::GetRegister();
+ if (string_register->SetFlag(arg, val))
+ continue;
+ FlagRegister<int32> *int32_register =
+ FlagRegister<int32>::GetRegister();
+ if (int32_register->SetFlag(arg, val))
+ continue;
+ FlagRegister<int64> *int64_register =
+ FlagRegister<int64>::GetRegister();
+ if (int64_register->SetFlag(arg, val))
+ continue;
+ FlagRegister<double> *double_register =
+ FlagRegister<double>::GetRegister();
+ if (double_register->SetFlag(arg, val))
+ continue;
+
+ LOG(FATAL) << "SetFlags: Bad option: " << (*argv)[index];
+ }
+
+ if (remove_flags) {
+ for (int i = 0; i < *argc - index; ++i)
+ (*argv)[i + 1] = (*argv)[i + index];
+ *argc -= index - 1;
+ }
+
+ if (FLAGS_help) {
+ ShowUsage();
+ exit(1);
+ }
+}
+
+void ShowUsage() {
+ cout << flag_usage << "\n";
+ cout << " Flags Description:\n";
+ FlagRegister<bool> *bool_register = FlagRegister<bool>::GetRegister();
+ bool_register->ShowUsage();
+ FlagRegister<string> *string_register = FlagRegister<string>::GetRegister();
+ string_register->ShowUsage();
+ FlagRegister<int32> *int32_register = FlagRegister<int32>::GetRegister();
+ int32_register->ShowUsage();
+ FlagRegister<int64> *int64_register = FlagRegister<int64>::GetRegister();
+ int64_register->ShowUsage();
+ FlagRegister<double> *double_register = FlagRegister<double>::GetRegister();
+ double_register->ShowUsage();
+}
diff --git a/src/lib/fst.cc b/src/lib/fst.cc
new file mode 100644
index 0000000..69bee35
--- /dev/null
+++ b/src/lib/fst.cc
@@ -0,0 +1,167 @@
+// fst.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// FST definitions.
+
+#include <fst/fst.h>
+
+// Include these so they are registered
+#include <fst/compact-fst.h>
+#include <fst/const-fst.h>
+#include <fst/matcher-fst.h>
+#include <fst/vector-fst.h>
+#include <fst/edit-fst.h>
+
+// FST flag definitions
+
+DEFINE_bool(fst_verify_properties, false,
+ "Verify fst properties queried by TestProperties");
+
+DEFINE_string(fst_weight_separator, ",",
+ "Character separator between printed composite weights; "
+ "must be a single character");
+
+DEFINE_string(fst_weight_parentheses, "",
+ "Characters enclosing the first weight of a printed composite "
+ "weight (e.g. pair weight, tuple weight and derived classes) to "
+ "ensure proper I/O of nested composite weights; "
+ "must have size 0 (none) or 2 (open and close parenthesis)");
+
+DEFINE_bool(fst_default_cache_gc, true, "Enable garbage collection of cache");
+
+DEFINE_int64(fst_default_cache_gc_limit, 1<<20LL,
+ "Cache byte size that triggers garbage collection");
+
+DEFINE_bool(fst_align, false, "Write FST data aligned where appropriate");
+
+DEFINE_string(save_relabel_ipairs, "", "Save input relabel pairs to file");
+DEFINE_string(save_relabel_opairs, "", "Save output relabel pairs to file");
+
+namespace fst {
+
+// Register VectorFst, ConstFst and EditFst for common arcs types
+REGISTER_FST(VectorFst, StdArc);
+REGISTER_FST(VectorFst, LogArc);
+REGISTER_FST(VectorFst, Log64Arc);
+REGISTER_FST(ConstFst, StdArc);
+REGISTER_FST(ConstFst, LogArc);
+REGISTER_FST(ConstFst, Log64Arc);
+REGISTER_FST(EditFst, StdArc);
+REGISTER_FST(EditFst, LogArc);
+REGISTER_FST(EditFst, Log64Arc);
+
+// Register CompactFst for common arcs with the default (uint32) size type
+static FstRegisterer<
+ CompactFst<StdArc, StringCompactor<StdArc> > >
+CompactFst_StdArc_StringCompactor_registerer;
+static FstRegisterer<
+ CompactFst<LogArc, StringCompactor<LogArc> > >
+CompactFst_LogArc_StringCompactor_registerer;
+static FstRegisterer<
+ CompactFst<StdArc, WeightedStringCompactor<StdArc> > >
+CompactFst_StdArc_WeightedStringCompactor_registerer;
+static FstRegisterer<
+ CompactFst<LogArc, WeightedStringCompactor<LogArc> > >
+CompactFst_LogArc_WeightedStringCompactor_registerer;
+static FstRegisterer<
+ CompactFst<StdArc, AcceptorCompactor<StdArc> > >
+CompactFst_StdArc_AcceptorCompactor_registerer;
+static FstRegisterer<
+ CompactFst<LogArc, AcceptorCompactor<LogArc> > >
+CompactFst_LogArc_AcceptorCompactor_registerer;
+static FstRegisterer<
+ CompactFst<StdArc, UnweightedCompactor<StdArc> > >
+CompactFst_StdArc_UnweightedCompactor_registerer;
+static FstRegisterer<
+ CompactFst<LogArc, UnweightedCompactor<LogArc> > >
+CompactFst_LogArc_UnweightedCompactor_registerer;
+static FstRegisterer<
+ CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> > >
+CompactFst_StdArc_UnweightedAcceptorCompactor_registerer;
+static FstRegisterer<
+ CompactFst<LogArc, UnweightedAcceptorCompactor<LogArc> > >
+CompactFst_LogArc_UnweightedAcceptorCompactor_registerer;
+
+// Fst type definitions for lookahead Fsts.
+extern const char arc_lookahead_fst_type[] = "arc_lookahead";
+extern const char ilabel_lookahead_fst_type[] = "ilabel_lookahead";
+extern const char olabel_lookahead_fst_type[] = "olabel_lookahead";
+
+// Identifies stream data as an FST (and its endianity)
+static const int32 kFstMagicNumber = 2125659606;
+
+// Check for Fst magic number in stream, to indicate
+// caller function that the stream content is an Fst header;
+bool IsFstHeader(istream &strm, const string &source) {
+ int64 pos = strm.tellg();
+ bool match = true;
+ int32 magic_number = 0;
+ ReadType(strm, &magic_number);
+ if (magic_number != kFstMagicNumber
+ ) {
+ match = false;
+ }
+ strm.seekg(pos);
+ return match;
+}
+
+// Check Fst magic number and read in Fst header.
+// If rewind = true, reposition stream to before call (if possible).
+bool FstHeader::Read(istream &strm, const string &source, bool rewind) {
+ int64 pos = 0;
+ if (rewind) pos = strm.tellg();
+ int32 magic_number = 0;
+ ReadType(strm, &magic_number);
+ if (magic_number != kFstMagicNumber
+ ) {
+ LOG(ERROR) << "FstHeader::Read: Bad FST header: " << source;
+ if (rewind) strm.seekg(pos);
+ return false;
+ }
+
+ ReadType(strm, &fsttype_);
+ ReadType(strm, &arctype_);
+ ReadType(strm, &version_);
+ ReadType(strm, &flags_);
+ ReadType(strm, &properties_);
+ ReadType(strm, &start_);
+ ReadType(strm, &numstates_);
+ ReadType(strm, &numarcs_);
+ if (!strm) {
+ LOG(ERROR) << "FstHeader::Read: read failed: " << source;
+ return false;
+ }
+ if (rewind) strm.seekg(pos);
+ return true;
+}
+
+// Write Fst magic number and Fst header.
+bool FstHeader::Write(ostream &strm, const string &source) const {
+ WriteType(strm, kFstMagicNumber);
+ WriteType(strm, fsttype_);
+ WriteType(strm, arctype_);
+ WriteType(strm, version_);
+ WriteType(strm, flags_);
+ WriteType(strm, properties_);
+ WriteType(strm, start_);
+ WriteType(strm, numstates_);
+ WriteType(strm, numarcs_);
+ return true;
+}
+
+} // namespace fst
diff --git a/src/lib/properties.cc b/src/lib/properties.cc
new file mode 100644
index 0000000..db0e2c8
--- /dev/null
+++ b/src/lib/properties.cc
@@ -0,0 +1,427 @@
+// properties.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Functions for updating property bits for various FST operations and
+// string names of the properties.
+
+#include <fst/properties.h>
+
+#include <stddef.h>
+#include <vector>
+using std::vector;
+
+namespace fst {
+
+// These functions determine the properties associated with the FST
+// result of various finite-state operations. The property arguments
+// correspond to the operation's FST arguments. The properties
+// returned assume the operation modifies its first argument.
+// Bitwise-and this result with kCopyProperties for the case when a
+// new (possibly delayed) FST is instead constructed.
+
+// Properties for a concatenatively-closed FST.
+uint64 ClosureProperties(uint64 inprops, bool star, bool delayed) {
+ uint64 outprops = (kError | kAcceptor | kUnweighted | kAccessible) & inprops;
+ if (!delayed)
+ outprops |= (kExpanded | kMutable | kCoAccessible |
+ kNotTopSorted | kNotString) & inprops;
+ if (!delayed || inprops & kAccessible)
+ outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
+ kNotILabelSorted | kNotOLabelSorted | kWeighted |
+ kNotAccessible | kNotCoAccessible) & inprops;
+ return outprops;
+}
+
+// Properties for a complemented FST.
+uint64 ComplementProperties(uint64 inprops) {
+ uint64 outprops = kAcceptor | kUnweighted | kNoEpsilons |
+ kNoIEpsilons | kNoOEpsilons |
+ kIDeterministic | kODeterministic | kAccessible;
+ outprops |= (kError | kILabelSorted | kOLabelSorted | kInitialCyclic) &
+ inprops;
+ if (inprops & kAccessible)
+ outprops |= kNotILabelSorted | kNotOLabelSorted | kCyclic;
+ return outprops;
+}
+
+// Properties for a composed FST.
+uint64 ComposeProperties(uint64 inprops1, uint64 inprops2) {
+ uint64 outprops = kError & (inprops1 | inprops2);
+ if (inprops1 & kAcceptor && inprops2 & kAcceptor) {
+ outprops |= kAcceptor | kAccessible;
+ outprops |= (kNoEpsilons | kNoIEpsilons | kNoOEpsilons | kAcyclic |
+ kInitialAcyclic) & inprops1 & inprops2;
+ if (kNoIEpsilons & inprops1 & inprops2)
+ outprops |= (kIDeterministic | kODeterministic) & inprops1 & inprops2;
+ } else {
+ outprops |= kAccessible;
+ outprops |= (kAcceptor | kNoIEpsilons | kAcyclic | kInitialAcyclic) &
+ inprops1 & inprops2;
+ if (kNoIEpsilons & inprops1 & inprops2)
+ outprops |= kIDeterministic & inprops1 & inprops2;
+ }
+ return outprops;
+}
+
+// Properties for a concatenated FST.
+uint64 ConcatProperties(uint64 inprops1, uint64 inprops2, bool delayed) {
+ uint64 outprops =
+ (kAcceptor | kUnweighted | kAcyclic) & inprops1 & inprops2;
+ outprops |= kError & (inprops1 | inprops2);
+
+ bool empty1 = delayed; // Can fst1 be the empty machine?
+ bool empty2 = delayed; // Can fst2 be the empty machine?
+
+ if (!delayed) {
+ outprops |= (kExpanded | kMutable | kNotTopSorted | kNotString) & inprops1;
+ outprops |= (kNotTopSorted | kNotString) & inprops2;
+ }
+ if (!empty1)
+ outprops |= (kInitialAcyclic | kInitialCyclic) & inprops1;
+ if (!delayed || inprops1 & kAccessible)
+ outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
+ kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
+ kNotOLabelSorted | kWeighted | kCyclic |
+ kNotAccessible | kNotCoAccessible) & inprops1;
+ if ((inprops1 & (kAccessible | kCoAccessible)) ==
+ (kAccessible | kCoAccessible) && !empty1) {
+ outprops |= kAccessible & inprops2;
+ if (!empty2)
+ outprops |= kCoAccessible & inprops2;
+ if (!delayed || inprops2 & kAccessible)
+ outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
+ kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
+ kNotOLabelSorted | kWeighted | kCyclic |
+ kNotAccessible | kNotCoAccessible) & inprops2;
+ }
+ return outprops;
+}
+
+// Properties for a determinized FST.
+uint64 DeterminizeProperties(uint64 inprops, bool has_subsequential_label) {
+ uint64 outprops = kAccessible;
+ if (((kAcceptor | kNoIEpsilons) & inprops) || has_subsequential_label)
+ outprops |= kIDeterministic;
+ outprops |= (kError | kAcceptor | kNoEpsilons | kAcyclic |
+ kInitialAcyclic | kCoAccessible | kString) & inprops;
+ if (inprops & kAccessible)
+ outprops |= (kNotAcceptor | kEpsilons | kIEpsilons | kOEpsilons |
+ kCyclic) & inprops;
+ if (inprops & kAcceptor)
+ outprops |= (kNoIEpsilons | kNoOEpsilons) & inprops;
+ if ((inprops & kNoIEpsilons) && has_subsequential_label)
+ outprops |= kNoIEpsilons;
+ return outprops;
+}
+
+// Properties for factored weight FST.
+uint64 FactorWeightProperties(uint64 inprops) {
+ uint64 outprops = (kExpanded | kMutable | kError | kAcceptor |
+ kAcyclic | kAccessible | kCoAccessible) & inprops;
+ if (inprops & kAccessible)
+ outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
+ kEpsilons | kIEpsilons | kOEpsilons | kCyclic |
+ kNotILabelSorted | kNotOLabelSorted)
+ & inprops;
+ return outprops;
+}
+
+// Properties for an inverted FST.
+uint64 InvertProperties(uint64 inprops) {
+ uint64 outprops = (kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
+ kEpsilons | kNoEpsilons | kWeighted | kUnweighted |
+ kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
+ kTopSorted | kNotTopSorted |
+ kAccessible | kNotAccessible |
+ kCoAccessible | kNotCoAccessible |
+ kString | kNotString) & inprops;
+ if (kIDeterministic & inprops)
+ outprops |= kODeterministic;
+ if (kNonIDeterministic & inprops)
+ outprops |= kNonODeterministic;
+ if (kODeterministic & inprops)
+ outprops |= kIDeterministic;
+ if (kNonODeterministic & inprops)
+ outprops |= kNonIDeterministic;
+
+ if (kIEpsilons & inprops)
+ outprops |= kOEpsilons;
+ if (kNoIEpsilons & inprops)
+ outprops |= kNoOEpsilons;
+ if (kOEpsilons & inprops)
+ outprops |= kIEpsilons;
+ if (kNoOEpsilons & inprops)
+ outprops |= kNoIEpsilons;
+
+ if (kILabelSorted & inprops)
+ outprops |= kOLabelSorted;
+ if (kNotILabelSorted & inprops)
+ outprops |= kNotOLabelSorted;
+ if (kOLabelSorted & inprops)
+ outprops |= kILabelSorted;
+ if (kNotOLabelSorted & inprops)
+ outprops |= kNotILabelSorted;
+ return outprops;
+}
+
+// Properties for a projected FST.
+uint64 ProjectProperties(uint64 inprops, bool project_input) {
+ uint64 outprops = kAcceptor;
+ outprops |= (kExpanded | kMutable | kError | kWeighted | kUnweighted |
+ kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
+ kTopSorted | kNotTopSorted | kAccessible | kNotAccessible |
+ kCoAccessible | kNotCoAccessible |
+ kString | kNotString) & inprops;
+ if (project_input) {
+ outprops |= (kIDeterministic | kNonIDeterministic |
+ kIEpsilons | kNoIEpsilons |
+ kILabelSorted | kNotILabelSorted) & inprops;
+
+ if (kIDeterministic & inprops)
+ outprops |= kODeterministic;
+ if (kNonIDeterministic & inprops)
+ outprops |= kNonODeterministic;
+
+ if (kIEpsilons & inprops)
+ outprops |= kOEpsilons | kEpsilons;
+ if (kNoIEpsilons & inprops)
+ outprops |= kNoOEpsilons | kNoEpsilons;
+
+ if (kILabelSorted & inprops)
+ outprops |= kOLabelSorted;
+ if (kNotILabelSorted & inprops)
+ outprops |= kNotOLabelSorted;
+ } else {
+ outprops |= (kODeterministic | kNonODeterministic |
+ kOEpsilons | kNoOEpsilons |
+ kOLabelSorted | kNotOLabelSorted) & inprops;
+
+ if (kODeterministic & inprops)
+ outprops |= kIDeterministic;
+ if (kNonODeterministic & inprops)
+ outprops |= kNonIDeterministic;
+
+ if (kOEpsilons & inprops)
+ outprops |= kIEpsilons | kEpsilons;
+ if (kNoOEpsilons & inprops)
+ outprops |= kNoIEpsilons | kNoEpsilons;
+
+ if (kOLabelSorted & inprops)
+ outprops |= kILabelSorted;
+ if (kNotOLabelSorted & inprops)
+ outprops |= kNotILabelSorted;
+ }
+ return outprops;
+}
+
+// Properties for a randgen FST.
+uint64 RandGenProperties(uint64 inprops, bool weighted) {
+ uint64 outprops = kAcyclic | kInitialAcyclic | kAccessible;
+ outprops |= inprops & kError;
+ if (weighted) {
+ outprops |= kTopSorted;
+ outprops |= (kAcceptor | kNoEpsilons |
+ kNoIEpsilons | kNoOEpsilons |
+ kIDeterministic | kODeterministic |
+ kILabelSorted | kOLabelSorted) & inprops;
+ } else {
+ outprops |= kUnweighted;
+ outprops |= (kAcceptor | kILabelSorted | kOLabelSorted) & inprops;
+ }
+ return outprops;
+}
+
+// Properties for a replace FST.
+uint64 ReplaceProperties(const vector<uint64>& inprops,
+ ssize_t root,
+ bool epsilon_on_replace,
+ bool no_empty_fsts) {
+ if (inprops.size() == 0)
+ return kNullProperties;
+ uint64 outprops = 0;
+ for (size_t i = 0; i < inprops.size(); ++i)
+ outprops |= kError & inprops[i];
+ uint64 access_props = no_empty_fsts ? kAccessible | kCoAccessible : 0;
+ for (size_t i = 0; i < inprops.size(); ++i)
+ access_props &= (inprops[i] & (kAccessible | kCoAccessible));
+ if (access_props == (kAccessible | kCoAccessible)) {
+ outprops |= access_props;
+ if (inprops[root] & kInitialCyclic)
+ outprops |= kInitialCyclic;
+ uint64 props = 0;
+ bool string = true;
+ for (size_t i = 0; i < inprops.size(); ++i) {
+ if (epsilon_on_replace == false)
+ props |= kNotAcceptor & inprops[i];
+ props |= (kNonIDeterministic | kNonODeterministic | kEpsilons |
+ kIEpsilons | kOEpsilons | kWeighted | kCyclic |
+ kNotTopSorted | kNotString) & inprops[i];
+ if (!(inprops[i] & kString))
+ string = false;
+ }
+ outprops |= props;
+ if (string)
+ outprops |= kString;
+ }
+ bool acceptor = epsilon_on_replace;
+ bool ideterministic = !epsilon_on_replace;
+ bool no_iepsilons = !epsilon_on_replace;
+ bool acyclic = true;
+ bool unweighted = true;
+ for (size_t i = 0; i < inprops.size(); ++i) {
+ if (!(inprops[i] & kAcceptor))
+ acceptor = false;
+ if (!(inprops[i] & kIDeterministic))
+ ideterministic = false;
+ if (!(inprops[i] & kNoIEpsilons))
+ no_iepsilons = false;
+ if (!(inprops[i] & kAcyclic))
+ acyclic = false;
+ if (!(inprops[i] & kUnweighted))
+ unweighted = false;
+ }
+ if (acceptor)
+ outprops |= kAcceptor;
+ if (ideterministic)
+ outprops |= kIDeterministic;
+ if (no_iepsilons)
+ outprops |= kNoIEpsilons;
+ if (acyclic)
+ outprops |= kAcyclic;
+ if (unweighted)
+ outprops |= kUnweighted;
+ if (inprops[root] & kInitialAcyclic)
+ outprops |= kInitialAcyclic;
+ return outprops;
+}
+
+// Properties for a relabeled FST.
+uint64 RelabelProperties(uint64 inprops) {
+ uint64 outprops = (kExpanded | kMutable | kError |
+ kWeighted | kUnweighted |
+ kCyclic | kAcyclic |
+ kInitialCyclic | kInitialAcyclic |
+ kTopSorted | kNotTopSorted |
+ kAccessible | kNotAccessible |
+ kCoAccessible | kNotCoAccessible |
+ kString | kNotString) & inprops;
+ return outprops;
+}
+
+// Properties for a reversed FST. (the superinitial state limits this set)
+uint64 ReverseProperties(uint64 inprops) {
+ uint64 outprops =
+ (kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kEpsilons |
+ kIEpsilons | kOEpsilons | kWeighted | kUnweighted |
+ kCyclic | kAcyclic) & inprops;
+ return outprops;
+}
+
+// Properties for re-weighted FST.
+uint64 ReweightProperties(uint64 inprops) {
+ uint64 outprops = inprops & kWeightInvariantProperties;
+ outprops = outprops & ~kCoAccessible;
+ return outprops;
+}
+
+// Properties for an epsilon-removed FST.
+uint64 RmEpsilonProperties(uint64 inprops, bool delayed) {
+ uint64 outprops = kNoEpsilons;
+ outprops |= (kError | kAcceptor | kAcyclic | kInitialAcyclic) & inprops;
+ if (inprops & kAcceptor)
+ outprops |= kNoIEpsilons | kNoOEpsilons;
+ if (!delayed) {
+ outprops |= kExpanded | kMutable;
+ outprops |= kTopSorted & inprops;
+ }
+ if (!delayed || inprops & kAccessible)
+ outprops |= kNotAcceptor & inprops;
+ return outprops;
+}
+
+// Properties for shortest path. This function computes how the properties
+// of the output of shortest path need to be updated, given that 'props' is
+// already known.
+uint64 ShortestPathProperties(uint64 props) {
+ return props | kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible;
+}
+
+// Properties for a synchronized FST.
+uint64 SynchronizeProperties(uint64 inprops) {
+ uint64 outprops = (kError | kAcceptor | kAcyclic | kAccessible |
+ kCoAccessible | kUnweighted) & inprops;
+ if (inprops & kAccessible)
+ outprops |= (kCyclic | kNotCoAccessible | kWeighted) & inprops;
+ return outprops;
+}
+
+// Properties for a unioned FST.
+uint64 UnionProperties(uint64 inprops1, uint64 inprops2, bool delayed) {
+ uint64 outprops = (kAcceptor | kUnweighted | kAcyclic | kAccessible)
+ & inprops1 & inprops2;
+ outprops |= kError & (inprops1 | inprops2);
+
+ bool empty1 = delayed; // Can fst1 be the empty machine?
+ bool empty2 = delayed; // Can fst2 be the empty machine?
+ if (!delayed) {
+ outprops |= (kExpanded | kMutable | kNotTopSorted | kNotString) & inprops1;
+ outprops |= (kNotTopSorted | kNotString) & inprops2;
+ }
+ if (!empty1 && !empty2) {
+ outprops |= kEpsilons | kIEpsilons | kOEpsilons;
+ outprops |= kCoAccessible & inprops1 & inprops2;
+ }
+ // Note kNotCoAccessible does not hold because of kInitialAcyclic opt.
+ if (!delayed || inprops1 & kAccessible)
+ outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
+ kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
+ kNotOLabelSorted | kWeighted | kCyclic |
+ kNotAccessible) & inprops1;
+ if (!delayed || inprops2 & kAccessible)
+ outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
+ kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
+ kNotOLabelSorted | kWeighted | kCyclic |
+ kNotAccessible | kNotCoAccessible) & inprops2;
+ return outprops;
+}
+
+
+// Property string names (indexed by bit position).
+const char *PropertyNames[] = {
+ // binary
+ "expanded", "mutable", "error", "", "", "", "", "",
+ "", "", "", "", "", "", "", "",
+ // trinary
+ "acceptor", "not acceptor",
+ "input deterministic", "non input deterministic",
+ "output deterministic", "non output deterministic",
+ "input/output epsilons", "no input/output epsilons",
+ "input epsilons", "no input epsilons",
+ "output epsilons", "no output epsilons",
+ "input label sorted", "not input label sorted",
+ "output label sorted", "not output label sorted",
+ "weighted", "unweighted",
+ "cyclic", "acyclic",
+ "cyclic at initial state", "acyclic at initial state",
+ "top sorted", "not top sorted",
+ "accessible", "not accessible",
+ "coaccessible", "not coaccessible",
+ "string", "not string",
+};
+
+} // namespace fst
diff --git a/src/lib/symbol-table-ops.cc b/src/lib/symbol-table-ops.cc
new file mode 100644
index 0000000..875bd7a
--- /dev/null
+++ b/src/lib/symbol-table-ops.cc
@@ -0,0 +1,140 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: sorenj@google.com (Jeffrey Sorensen)
+
+#include <fst/symbol-table-ops.h>
+
+namespace fst {
+
+SymbolTable *MergeSymbolTable(const SymbolTable &left, const SymbolTable &right,
+ bool *right_relabel_output) {
+ // MergeSymbolTable detects several special cases. It will return a reference
+ // copied version of SymbolTable of left or right if either symbol table is
+ // a superset of the other.
+ SymbolTable *merged = new SymbolTable("merge_" + left.Name() + "_" +
+ right.Name());
+ // copy everything from the left symbol table
+ bool left_has_all = true, right_has_all = true, relabel = false;
+ SymbolTableIterator liter(left);
+ for (; !liter.Done(); liter.Next()) {
+ merged->AddSymbol(liter.Symbol(), liter.Value());
+ if (right_has_all) {
+ int64 key = right.Find(liter.Symbol());
+ if (key == -1) {
+ right_has_all = false;
+ } else if (!relabel && key != liter.Value()) {
+ relabel = true;
+ }
+ }
+ }
+ if (right_has_all) {
+ delete merged;
+ if (right_relabel_output != NULL) {
+ *right_relabel_output = relabel;
+ }
+ return right.Copy();
+ }
+ // add all symbols we can from right symbol table
+ vector<string> conflicts;
+ SymbolTableIterator riter(right);
+ for (; !riter.Done(); riter.Next()) {
+ int64 key = merged->Find(riter.Symbol());
+ if (key != -1) {
+ // Symbol already exists, maybe with different value
+ if (key != riter.Value()) {
+ relabel = true;
+ }
+ continue;
+ }
+ // Symbol doesn't exist from left
+ left_has_all = false;
+ if (!merged->Find(riter.Value()).empty()) {
+ // we can't add this where we want to, add it later, in order
+ conflicts.push_back(riter.Symbol());
+ continue;
+ }
+ // there is a hole and we can add this symbol with its id
+ merged->AddSymbol(riter.Symbol(), riter.Value());
+ }
+ if (right_relabel_output != NULL) {
+ *right_relabel_output = relabel;
+ }
+ if (left_has_all) {
+ delete merged;
+ return left.Copy();
+ }
+ // Add all symbols that conflicted, in order
+ for (int i= 0; i < conflicts.size(); ++i) {
+ merged->AddSymbol(conflicts[i]);
+ }
+ return merged;
+}
+
+SymbolTable *CompactSymbolTable(const SymbolTable &syms) {
+ map<int, string> sorted;
+ SymbolTableIterator stiter(syms);
+ for (; !stiter.Done(); stiter.Next()) {
+ sorted[stiter.Value()] = stiter.Symbol();
+ }
+ SymbolTable *compact = new SymbolTable(syms.Name() + "_compact");
+ uint64 newkey = 0;
+ for (map<int, string>::const_iterator si = sorted.begin();
+ si != sorted.end(); ++si) {
+ compact->AddSymbol(si->second, newkey++);
+ }
+ return compact;
+}
+
+SymbolTable *FstReadSymbols(const string &filename, bool input_symbols) {
+ ifstream in(filename.c_str(), ifstream::in | ifstream::binary);
+ if (!in) {
+ LOG(ERROR) << "FstReadSymbols: Can't open file " << filename;
+ return NULL;
+ }
+ FstHeader hdr;
+ if (!hdr.Read(in, filename)) {
+ LOG(ERROR) << "FstReadSymbols: Couldn't read header from " << filename;
+ return NULL;
+ }
+ if (hdr.GetFlags() & FstHeader::HAS_ISYMBOLS) {
+ SymbolTable *isymbols = SymbolTable::Read(in, filename);
+ if (isymbols == NULL) {
+ LOG(ERROR) << "FstReadSymbols: Could not read input symbols from "
+ << filename;
+ return NULL;
+ }
+ if (input_symbols) {
+ return isymbols;
+ }
+ delete isymbols;
+ }
+ if (hdr.GetFlags() & FstHeader::HAS_OSYMBOLS) {
+ SymbolTable *osymbols = SymbolTable::Read(in, filename);
+ if (osymbols == NULL) {
+ LOG(ERROR) << "FstReadSymbols: Could not read output symbols from "
+ << filename;
+ return NULL;
+ }
+ if (!input_symbols) {
+ return osymbols;
+ }
+ delete osymbols;
+ }
+ LOG(ERROR) << "FstReadSymbols: The file " << filename
+ << " doesn't contain the requested symbols";
+ return NULL;
+}
+
+} // namespace fst
diff --git a/src/lib/symbol-table.cc b/src/lib/symbol-table.cc
new file mode 100644
index 0000000..8b35cdf
--- /dev/null
+++ b/src/lib/symbol-table.cc
@@ -0,0 +1,243 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// All Rights Reserved.
+//
+// Author : Johan Schalkwyk
+//
+// \file
+// Classes to provide symbol-to-integer and integer-to-symbol mappings.
+
+#include <fst/symbol-table.h>
+#include <fst/util.h>
+
+DEFINE_bool(fst_compat_symbols, true,
+ "Require symbol tables to match when appropriate");
+DEFINE_string(fst_field_separator, "\t ",
+ "Set of characters used as a separator between printed fields");
+
+namespace fst {
+
+// Maximum line length in textual symbols file.
+const int kLineLen = 8096;
+
+// Identifies stream data as a symbol table (and its endianity)
+static const int32 kSymbolTableMagicNumber = 2125658996;
+
+SymbolTableImpl* SymbolTableImpl::ReadText(istream &strm,
+ const string &filename,
+ bool allow_negative) {
+ SymbolTableImpl* impl = new SymbolTableImpl(filename);
+
+ int64 nline = 0;
+ char line[kLineLen];
+ while (strm.getline(line, kLineLen)) {
+ ++nline;
+ vector<char *> col;
+ string separator = FLAGS_fst_field_separator + "\n";
+ SplitToVector(line, separator.c_str(), &col, true);
+ if (col.size() == 0) // empty line
+ continue;
+ if (col.size() != 2) {
+ LOG(ERROR) << "SymbolTable::ReadText: Bad number of columns ("
+ << col.size() << " skipping), "
+ << "file = " << filename << ", line = " << nline
+ << ":<" << line << ">";
+ continue;
+ }
+ const char *symbol = col[0];
+ const char *value = col[1];
+ char *p;
+ int64 key = strtoll(value, &p, 10);
+ if (p < value + strlen(value) ||
+ (!allow_negative && key < 0) || key == -1) {
+ LOG(ERROR) << "SymbolTable::ReadText: Bad non-negative integer \""
+ << value << "\" (skipping), "
+ << "file = " << filename << ", line = " << nline;
+ continue;
+ }
+ impl->AddSymbol(symbol, key);
+ }
+
+ return impl;
+}
+
+void SymbolTableImpl::MaybeRecomputeCheckSum() const {
+ if (check_sum_finalized_)
+ return;
+
+ // Calculate the original label-agnostic check sum.
+ check_sum_.Reset();
+ for (int64 i = 0; i < symbols_.size(); ++i)
+ check_sum_.Update(symbols_[i], strlen(symbols_[i]) + 1);
+ check_sum_string_ = check_sum_.Digest();
+
+ // Calculate the safer, label-dependent check sum.
+ labeled_check_sum_.Reset();
+ for (int64 key = 0; key < dense_key_limit_; ++key) {
+ ostringstream line;
+ line << symbols_[key] << '\t' << key;
+ labeled_check_sum_.Update(line.str()); }
+ for (map<int64, const char*>::const_iterator it =
+ key_map_.begin();
+ it != key_map_.end();
+ ++it) {
+ if (it->first >= dense_key_limit_) {
+ ostringstream line;
+ line << it->second << '\t' << it->first;
+ labeled_check_sum_.Update(line.str());
+ }
+ }
+ labeled_check_sum_string_ = labeled_check_sum_.Digest();
+
+ check_sum_finalized_ = true;
+}
+
+int64 SymbolTableImpl::AddSymbol(const string& symbol, int64 key) {
+ map<const char *, int64, StrCmp>::const_iterator it =
+ symbol_map_.find(symbol.c_str());
+ if (it == symbol_map_.end()) { // only add if not in table
+ check_sum_finalized_ = false;
+
+ char *csymbol = new char[symbol.size() + 1];
+ strcpy(csymbol, symbol.c_str());
+ symbols_.push_back(csymbol);
+ key_map_[key] = csymbol;
+ symbol_map_[csymbol] = key;
+
+ if (key >= available_key_) {
+ available_key_ = key + 1;
+ }
+ } else {
+ // Log if symbol already in table with different key
+ if (it->second != key) {
+ VLOG(1) << "SymbolTable::AddSymbol: symbol = " << symbol
+ << " already in symbol_map_ with key = "
+ << it->second
+ << " but supplied new key = " << key
+ << " (ignoring new key)";
+ }
+ }
+ return key;
+}
+
+static bool IsInRange(const vector<pair<int64, int64> >& ranges,
+ int64 key) {
+ if (ranges.size() == 0) return true;
+ for (size_t i = 0; i < ranges.size(); ++i) {
+ if (key >= ranges[i].first && key <= ranges[i].second)
+ return true;
+ }
+ return false;
+}
+
+SymbolTableImpl* SymbolTableImpl::Read(istream &strm,
+ const SymbolTableReadOptions& opts) {
+ int32 magic_number = 0;
+ ReadType(strm, &magic_number);
+ if (!strm) {
+ LOG(ERROR) << "SymbolTable::Read: read failed";
+ return 0;
+ }
+ string name;
+ ReadType(strm, &name);
+ SymbolTableImpl* impl = new SymbolTableImpl(name);
+ ReadType(strm, &impl->available_key_);
+ int64 size;
+ ReadType(strm, &size);
+ if (!strm) {
+ LOG(ERROR) << "SymbolTable::Read: read failed";
+ delete impl;
+ return 0;
+ }
+
+ string symbol;
+ int64 key;
+ impl->check_sum_finalized_ = false;
+ for (size_t i = 0; i < size; ++i) {
+ ReadType(strm, &symbol);
+ ReadType(strm, &key);
+ if (!strm) {
+ LOG(ERROR) << "SymbolTable::Read: read failed";
+ delete impl;
+ return 0;
+ }
+
+ char *csymbol = new char[symbol.size() + 1];
+ strcpy(csymbol, symbol.c_str());
+ impl->symbols_.push_back(csymbol);
+ if (key == impl->dense_key_limit_ &&
+ key == impl->symbols_.size() - 1)
+ impl->dense_key_limit_ = impl->symbols_.size();
+ else
+ impl->key_map_[key] = csymbol;
+
+ if (IsInRange(opts.string_hash_ranges, key)) {
+ impl->symbol_map_[csymbol] = key;
+ }
+ }
+ return impl;
+}
+
+bool SymbolTableImpl::Write(ostream &strm) const {
+ WriteType(strm, kSymbolTableMagicNumber);
+ WriteType(strm, name_);
+ WriteType(strm, available_key_);
+ int64 size = symbols_.size();
+ WriteType(strm, size);
+ // first write out dense keys
+ int64 i = 0;
+ for (; i < dense_key_limit_; ++i) {
+ WriteType(strm, string(symbols_[i]));
+ WriteType(strm, i);
+ }
+ // next write out the remaining non densely packed keys
+ for (map<const char *, int64, StrCmp>::const_iterator it =
+ symbol_map_.begin(); it != symbol_map_.end(); ++it) {
+ if ((it->second >= 0) && (it->second < dense_key_limit_))
+ continue;
+ WriteType(strm, string(it->first));
+ WriteType(strm, it->second);
+ ++i;
+ }
+ if (i != size) {
+ LOG(ERROR) << "SymbolTable::Write: write failed";
+ return false;
+ }
+ strm.flush();
+ if (!strm) {
+ LOG(ERROR) << "SymbolTable::Write: write failed";
+ return false;
+ }
+ return true;
+}
+
+const int64 SymbolTable::kNoSymbol;
+
+
+void SymbolTable::AddTable(const SymbolTable& table) {
+ for (SymbolTableIterator iter(table); !iter.Done(); iter.Next())
+ impl_->AddSymbol(iter.Symbol());
+}
+
+bool SymbolTable::WriteText(ostream &strm) const {
+ for (SymbolTableIterator iter(*this); !iter.Done(); iter.Next()) {
+ ostringstream line;
+ line << iter.Symbol() << FLAGS_fst_field_separator[0] << iter.Value()
+ << '\n';
+ strm.write(line.str().c_str(), line.str().length());
+ }
+ return true;
+}
+} // namespace fst
diff --git a/src/lib/temp_Android.temp_mk b/src/lib/temp_Android.temp_mk
new file mode 100644
index 0000000..7a6936e
--- /dev/null
+++ b/src/lib/temp_Android.temp_mk
@@ -0,0 +1,20 @@
+#
+# Copyright 2012 Google Inc. All Rights Reserved.
+# Author: npereira@google.com (Nicole Pereira)
+#
+# Android makefile for openfst library.
+#
+
+OPENFST_DIR := $(call my-dir)
+
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := libopenfst_patts
+
+LOCAL_CPP_EXTENSION := .cc
+LOCAL_C_INCLUDES += $(OPENFST_DIR)/src/include/
+
+#LOCAL_SRC_FILES := $(call private-function-all-cpp-files-under, src)
+
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/lib/util.cc b/src/lib/util.cc
new file mode 100644
index 0000000..eeba92b
--- /dev/null
+++ b/src/lib/util.cc
@@ -0,0 +1,92 @@
+// util.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// FST utility definitions.
+
+#include <cctype>
+#include <string>
+#include <fst/util.h>
+
+// Utility flag definitions
+
+DEFINE_bool(fst_error_fatal, true,
+ "FST errors are fatal; o.w. return objects flagged as bad: "
+ " e.g., FSTs - kError prop. true, FST weights - not a member()");
+
+namespace fst {
+
+int64 StrToInt64(const string &s, const string &src, size_t nline,
+ bool allow_negative = false, bool *error) {
+ int64 n;
+ const char *cs = s.c_str();
+ char *p;
+ if (error) *error = false;
+ n = strtoll(cs, &p, 10);
+ if (p < cs + s.size() || (!allow_negative && n < 0)) {
+ FSTERROR() << "StrToInt64: Bad integer = " << s
+ << "\", source = " << src << ", line = " << nline;
+ if (error) *error = true;
+ return 0;
+ }
+ return n;
+}
+
+void Int64ToStr(int64 n, string *s) {
+ ostringstream nstr;
+ nstr << n;
+ *s = nstr.str();
+}
+
+void ConvertToLegalCSymbol(string *s) {
+ for (string::iterator it = s->begin(); it != s->end(); ++it)
+ if (!isalnum(*it)) *it = '_';
+}
+
+// Skips over input characters to align to 'align' bytes. Returns
+// false if can't align.
+bool AlignInput(istream &strm, int align) {
+ char c;
+ for (int i = 0; i < align; ++i) {
+ int64 pos = strm.tellg();
+ if (pos < 0) {
+ LOG(ERROR) << "AlignInput: can't determine stream position";
+ return false;
+ }
+ if (pos % align == 0) break;
+ strm.read(&c, 1);
+ }
+ return true;
+}
+
+// Write null output characters to align to 'align' bytes. Returns
+// false if can't align.
+bool AlignOutput(ostream &strm, int align) {
+ for (int i = 0; i < align; ++i) {
+ int64 pos = strm.tellp();
+ if (pos < 0) {
+ LOG(ERROR) << "AlignOutput: can't determine stream position";
+ return false;
+ }
+ if (pos % align == 0) break;
+ strm.write("", 1);
+ }
+ return true;
+}
+
+
+} // namespace fst
diff --git a/src/script/Makefile.am b/src/script/Makefile.am
new file mode 100644
index 0000000..00d52a5
--- /dev/null
+++ b/src/script/Makefile.am
@@ -0,0 +1,15 @@
+AM_CPPFLAGS = -I$(srcdir)/../include $(ICU_CPPFLAGS)
+
+if HAVE_SCRIPT
+lib_LTLIBRARIES = libfstscript.la
+libfstscript_la_SOURCES = arcsort.cc closure.cc compile.cc compose.cc \
+concat.cc connect.cc convert.cc decode.cc determinize.cc difference.cc \
+draw.cc encode.cc epsnormalize.cc equal.cc equivalent.cc fst-class.cc \
+info.cc intersect.cc invert.cc map.cc minimize.cc print.cc project.cc \
+prune.cc push.cc randequivalent.cc randgen.cc relabel.cc replace.cc \
+reverse.cc reweight.cc rmepsilon.cc script-impl.cc shortest-distance.cc \
+shortest-path.cc synchronize.cc text-io.cc topsort.cc union.cc \
+weight-class.cc verify.cc
+
+libfstscript_la_LDFLAGS = -version-info 0:0:0
+endif
diff --git a/src/script/Makefile.in b/src/script/Makefile.in
new file mode 100644
index 0000000..bab82dc
--- /dev/null
+++ b/src/script/Makefile.in
@@ -0,0 +1,601 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = src/script
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_icu.m4 \
+ $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+ $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+ $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h \
+ $(top_builddir)/src/include/fst/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__installdirs = "$(DESTDIR)$(libdir)"
+LTLIBRARIES = $(lib_LTLIBRARIES)
+libfstscript_la_LIBADD =
+am__libfstscript_la_SOURCES_DIST = arcsort.cc closure.cc compile.cc \
+ compose.cc concat.cc connect.cc convert.cc decode.cc \
+ determinize.cc difference.cc draw.cc encode.cc epsnormalize.cc \
+ equal.cc equivalent.cc fst-class.cc info.cc intersect.cc \
+ invert.cc map.cc minimize.cc print.cc project.cc prune.cc \
+ push.cc randequivalent.cc randgen.cc relabel.cc replace.cc \
+ reverse.cc reweight.cc rmepsilon.cc script-impl.cc \
+ shortest-distance.cc shortest-path.cc synchronize.cc \
+ text-io.cc topsort.cc union.cc weight-class.cc verify.cc
+@HAVE_SCRIPT_TRUE@am_libfstscript_la_OBJECTS = arcsort.lo closure.lo \
+@HAVE_SCRIPT_TRUE@ compile.lo compose.lo concat.lo connect.lo \
+@HAVE_SCRIPT_TRUE@ convert.lo decode.lo determinize.lo \
+@HAVE_SCRIPT_TRUE@ difference.lo draw.lo encode.lo \
+@HAVE_SCRIPT_TRUE@ epsnormalize.lo equal.lo equivalent.lo \
+@HAVE_SCRIPT_TRUE@ fst-class.lo info.lo intersect.lo invert.lo \
+@HAVE_SCRIPT_TRUE@ map.lo minimize.lo print.lo project.lo \
+@HAVE_SCRIPT_TRUE@ prune.lo push.lo randequivalent.lo \
+@HAVE_SCRIPT_TRUE@ randgen.lo relabel.lo replace.lo reverse.lo \
+@HAVE_SCRIPT_TRUE@ reweight.lo rmepsilon.lo script-impl.lo \
+@HAVE_SCRIPT_TRUE@ shortest-distance.lo shortest-path.lo \
+@HAVE_SCRIPT_TRUE@ synchronize.lo text-io.lo topsort.lo \
+@HAVE_SCRIPT_TRUE@ union.lo weight-class.lo verify.lo
+libfstscript_la_OBJECTS = $(am_libfstscript_la_OBJECTS)
+libfstscript_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(libfstscript_la_LDFLAGS) $(LDFLAGS) -o $@
+@HAVE_SCRIPT_TRUE@am_libfstscript_la_rpath = -rpath $(libdir)
+DEFAULT_INCLUDES =
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+SOURCES = $(libfstscript_la_SOURCES)
+DIST_SOURCES = $(am__libfstscript_la_SOURCES_DIST)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+ICU_CFLAGS = @ICU_CFLAGS@
+ICU_CONFIG = @ICU_CONFIG@
+ICU_CPPFLAGS = @ICU_CPPFLAGS@
+ICU_CXXFLAGS = @ICU_CXXFLAGS@
+ICU_LIBS = @ICU_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+libfstdir = @libfstdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lt_ECHO = @lt_ECHO@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AM_CPPFLAGS = -I$(srcdir)/../include $(ICU_CPPFLAGS)
+@HAVE_SCRIPT_TRUE@lib_LTLIBRARIES = libfstscript.la
+@HAVE_SCRIPT_TRUE@libfstscript_la_SOURCES = arcsort.cc closure.cc compile.cc compose.cc \
+@HAVE_SCRIPT_TRUE@concat.cc connect.cc convert.cc decode.cc determinize.cc difference.cc \
+@HAVE_SCRIPT_TRUE@draw.cc encode.cc epsnormalize.cc equal.cc equivalent.cc fst-class.cc \
+@HAVE_SCRIPT_TRUE@info.cc intersect.cc invert.cc map.cc minimize.cc print.cc project.cc \
+@HAVE_SCRIPT_TRUE@prune.cc push.cc randequivalent.cc randgen.cc relabel.cc replace.cc \
+@HAVE_SCRIPT_TRUE@reverse.cc reweight.cc rmepsilon.cc script-impl.cc shortest-distance.cc \
+@HAVE_SCRIPT_TRUE@shortest-path.cc synchronize.cc text-io.cc topsort.cc union.cc \
+@HAVE_SCRIPT_TRUE@weight-class.cc verify.cc
+
+@HAVE_SCRIPT_TRUE@libfstscript_la_LDFLAGS = -version-info 0:0:0
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .cc .lo .o .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/script/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/script/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-libLTLIBRARIES: $(lib_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)"
+ @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
+ list2=; for p in $$list; do \
+ if test -f $$p; then \
+ list2="$$list2 $$p"; \
+ else :; fi; \
+ done; \
+ test -z "$$list2" || { \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
+ }
+
+uninstall-libLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
+ for p in $$list; do \
+ $(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \
+ done
+
+clean-libLTLIBRARIES:
+ -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test "$$dir" != "$$p" || dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+libfstscript.la: $(libfstscript_la_OBJECTS) $(libfstscript_la_DEPENDENCIES)
+ $(libfstscript_la_LINK) $(am_libfstscript_la_rpath) $(libfstscript_la_OBJECTS) $(libfstscript_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/arcsort.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/closure.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compile.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compose.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/concat.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/connect.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/convert.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/decode.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/determinize.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/difference.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/draw.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encode.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/epsnormalize.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/equal.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/equivalent.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fst-class.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/info.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intersect.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/invert.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/map.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minimize.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/print.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/project.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/prune.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/push.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/randequivalent.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/randgen.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/relabel.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/replace.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reverse.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reweight.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rmepsilon.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/script-impl.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/shortest-distance.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/shortest-path.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synchronize.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/text-io.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/topsort.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/union.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/verify.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/weight-class.Plo@am__quote@
+
+.cc.o:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $<
+
+.cc.obj:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cc.lo:
+@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(LTCXXCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+ for dir in "$(DESTDIR)$(libdir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \
+ mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-libLTLIBRARIES
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-libLTLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+ clean-libLTLIBRARIES clean-libtool ctags distclean \
+ distclean-compile distclean-generic distclean-libtool \
+ distclean-tags distdir dvi dvi-am html html-am info info-am \
+ install install-am install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am \
+ install-libLTLIBRARIES install-man install-pdf install-pdf-am \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am uninstall-libLTLIBRARIES
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/script/arcsort.cc b/src/script/arcsort.cc
new file mode 100644
index 0000000..42c13a0
--- /dev/null
+++ b/src/script/arcsort.cc
@@ -0,0 +1,35 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/arcsort.h>
+#include <fst/script/script-impl.h>
+
+namespace fst {
+namespace script {
+
+void ArcSort(MutableFstClass *fst, ArcSortType sort_type) {
+ ArcSortArgs args(fst, sort_type);
+
+ Apply<Operation<ArcSortArgs> >("ArcSort", fst->ArcType(), &args);
+}
+
+REGISTER_FST_OPERATION(ArcSort, StdArc, ArcSortArgs);
+REGISTER_FST_OPERATION(ArcSort, LogArc, ArcSortArgs);
+REGISTER_FST_OPERATION(ArcSort, Log64Arc, ArcSortArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/closure.cc b/src/script/closure.cc
new file mode 100644
index 0000000..85ad1fc
--- /dev/null
+++ b/src/script/closure.cc
@@ -0,0 +1,35 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/closure.h>
+
+namespace fst {
+namespace script {
+
+void Closure(MutableFstClass *fst, ClosureType closure_type) {
+ ClosureArgs args(fst, closure_type);
+
+ Apply<Operation<ClosureArgs> >("Closure", fst->ArcType(), &args);
+}
+
+REGISTER_FST_OPERATION(Closure, StdArc, ClosureArgs);
+REGISTER_FST_OPERATION(Closure, LogArc, ClosureArgs);
+REGISTER_FST_OPERATION(Closure, Log64Arc, ClosureArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/compile.cc b/src/script/compile.cc
new file mode 100644
index 0000000..57a84b3
--- /dev/null
+++ b/src/script/compile.cc
@@ -0,0 +1,43 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <string>
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/compile.h>
+
+namespace fst {
+namespace script {
+
+void CompileFst(istream &istrm, const string &source, const string &dest,
+ const string &fst_type, const string &arc_type,
+ const SymbolTable *isyms,
+ const SymbolTable *osyms, const SymbolTable *ssyms,
+ bool accep, bool ikeep, bool okeep, bool nkeep,
+ bool allow_negative_labels) {
+ FstCompileArgs args(istrm, source, dest, fst_type, isyms, osyms, ssyms,
+ accep, ikeep, okeep, nkeep, allow_negative_labels);
+
+ Apply<Operation<FstCompileArgs> >("CompileFst", arc_type, &args);
+}
+
+REGISTER_FST_OPERATION(CompileFst, StdArc, FstCompileArgs);
+REGISTER_FST_OPERATION(CompileFst, LogArc, FstCompileArgs);
+REGISTER_FST_OPERATION(CompileFst, Log64Arc, FstCompileArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/compose.cc b/src/script/compose.cc
new file mode 100644
index 0000000..2a363c1
--- /dev/null
+++ b/src/script/compose.cc
@@ -0,0 +1,51 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/compose.h>
+
+namespace fst {
+namespace script {
+
+
+void Compose(const FstClass &ifst1, const FstClass &ifst2,
+ MutableFstClass *ofst, ComposeFilter compose_filter) {
+ if (!ArcTypesMatch(ifst1, ifst2, "Compose") ||
+ !ArcTypesMatch(*ofst, ifst1, "Compose")) return;
+
+ ComposeArgs1 args(ifst1, ifst2, ofst, compose_filter);
+ Apply<Operation<ComposeArgs1> >("Compose", ifst1.ArcType(), &args);
+}
+
+void Compose(const FstClass &ifst1, const FstClass &ifst2,
+ MutableFstClass *ofst, const ComposeOptions &copts) {
+ if (!ArcTypesMatch(ifst1, ifst2, "Compose") ||
+ !ArcTypesMatch(*ofst, ifst1, "Compose")) return;
+
+ ComposeArgs2 args(ifst1, ifst2, ofst, copts);
+ Apply<Operation<ComposeArgs2> >("Compose", ifst1.ArcType(), &args);
+}
+
+REGISTER_FST_OPERATION(Compose, StdArc, ComposeArgs1);
+REGISTER_FST_OPERATION(Compose, LogArc, ComposeArgs1);
+REGISTER_FST_OPERATION(Compose, Log64Arc, ComposeArgs1);
+REGISTER_FST_OPERATION(Compose, StdArc, ComposeArgs2);
+REGISTER_FST_OPERATION(Compose, LogArc, ComposeArgs2);
+REGISTER_FST_OPERATION(Compose, Log64Arc, ComposeArgs2);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/concat.cc b/src/script/concat.cc
new file mode 100644
index 0000000..7df4091
--- /dev/null
+++ b/src/script/concat.cc
@@ -0,0 +1,48 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/concat.h>
+
+namespace fst {
+namespace script {
+
+void Concat(MutableFstClass *ofst, const FstClass &ifst) {
+ if (!ArcTypesMatch(*ofst, ifst, "Concat")) return;
+
+ ConcatArgs1 args(ofst, ifst);
+
+ Apply<Operation<ConcatArgs1> >("Concat", ofst->ArcType(), &args);
+}
+
+void Concat(const FstClass &ifst, MutableFstClass *ofst) {
+ if (!ArcTypesMatch(ifst, *ofst, "Concat")) return;
+
+ ConcatArgs2 args(ifst, ofst);
+
+ Apply<Operation<ConcatArgs2> >("Concat", ofst->ArcType(), &args);
+}
+
+REGISTER_FST_OPERATION(Concat, StdArc, ConcatArgs1);
+REGISTER_FST_OPERATION(Concat, LogArc, ConcatArgs1);
+REGISTER_FST_OPERATION(Concat, Log64Arc, ConcatArgs1);
+REGISTER_FST_OPERATION(Concat, StdArc, ConcatArgs2);
+REGISTER_FST_OPERATION(Concat, LogArc, ConcatArgs2);
+REGISTER_FST_OPERATION(Concat, Log64Arc, ConcatArgs2);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/connect.cc b/src/script/connect.cc
new file mode 100644
index 0000000..177592a
--- /dev/null
+++ b/src/script/connect.cc
@@ -0,0 +1,33 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/connect.h>
+
+namespace fst {
+namespace script {
+
+void Connect(MutableFstClass *fst) {
+ Apply<Operation<MutableFstClass> >("Connect", fst->ArcType(), fst);
+}
+
+REGISTER_FST_OPERATION(Connect, StdArc, MutableFstClass);
+REGISTER_FST_OPERATION(Connect, LogArc, MutableFstClass);
+REGISTER_FST_OPERATION(Connect, Log64Arc, MutableFstClass);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/convert.cc b/src/script/convert.cc
new file mode 100644
index 0000000..c651063
--- /dev/null
+++ b/src/script/convert.cc
@@ -0,0 +1,40 @@
+
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/convert.h>
+
+namespace fst {
+namespace script {
+
+FstClass *Convert(const FstClass &ifst, const string &new_type) {
+ ConvertInnerArgs args(ifst, new_type);
+ ConvertArgs args_with_retval(args);
+
+ Apply<Operation<ConvertArgs> >("Convert", ifst.ArcType(),
+ &args_with_retval);
+
+ return args_with_retval.retval;
+}
+
+REGISTER_FST_OPERATION(Convert, StdArc, ConvertArgs);
+REGISTER_FST_OPERATION(Convert, LogArc, ConvertArgs);
+REGISTER_FST_OPERATION(Convert, Log64Arc, ConvertArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/decode.cc b/src/script/decode.cc
new file mode 100644
index 0000000..2e38f7f
--- /dev/null
+++ b/src/script/decode.cc
@@ -0,0 +1,36 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/decode.h>
+#include <fst/encode.h>
+
+namespace fst {
+namespace script {
+
+void Decode(MutableFstClass *ofst, const string &coder_fname) {
+ DecodeArgs args(ofst, coder_fname);
+
+ Apply<Operation<DecodeArgs> >("Decode", ofst->ArcType(), &args);
+}
+
+REGISTER_FST_OPERATION(Decode, StdArc, DecodeArgs);
+REGISTER_FST_OPERATION(Decode, LogArc, DecodeArgs);
+REGISTER_FST_OPERATION(Decode, Log64Arc, DecodeArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/determinize.cc b/src/script/determinize.cc
new file mode 100644
index 0000000..e1c6759
--- /dev/null
+++ b/src/script/determinize.cc
@@ -0,0 +1,38 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/determinize.h>
+
+namespace fst {
+namespace script {
+
+void Determinize(const FstClass &ifst,
+ MutableFstClass *ofst,
+ const DeterminizeOptions& opts) {
+ if (!ArcTypesMatch(ifst, *ofst, "Determinize")) return;
+
+ DeterminizeArgs args(ifst, ofst, opts);
+ Apply<Operation<DeterminizeArgs> >("Determinize", ifst.ArcType(), &args);
+}
+
+REGISTER_FST_OPERATION(Determinize, StdArc, DeterminizeArgs);
+REGISTER_FST_OPERATION(Determinize, LogArc, DeterminizeArgs);
+REGISTER_FST_OPERATION(Determinize, Log64Arc, DeterminizeArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/difference.cc b/src/script/difference.cc
new file mode 100644
index 0000000..1fcfa3e
--- /dev/null
+++ b/src/script/difference.cc
@@ -0,0 +1,50 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/difference.h>
+
+namespace fst {
+namespace script {
+
+void Difference(const FstClass &ifst1, const FstClass &ifst2,
+ MutableFstClass *ofst, ComposeFilter compose_filter) {
+ if (!ArcTypesMatch(ifst1, ifst2, "Difference") ||
+ !ArcTypesMatch(*ofst, ifst1, "Difference")) return;
+
+ DifferenceArgs1 args(ifst1, ifst2, ofst, compose_filter);
+ Apply<Operation<DifferenceArgs1> >("Difference", ifst1.ArcType(), &args);
+}
+
+void Difference(const FstClass &ifst1, const FstClass &ifst2,
+ MutableFstClass *ofst, const ComposeOptions &copts) {
+ if (!ArcTypesMatch(ifst1, ifst2, "Difference") ||
+ !ArcTypesMatch(*ofst, ifst1, "Difference")) return;
+
+ DifferenceArgs2 args(ifst1, ifst2, ofst, copts);
+ Apply<Operation<DifferenceArgs2> >("Difference", ifst1.ArcType(), &args);
+}
+
+REGISTER_FST_OPERATION(Difference, StdArc, DifferenceArgs1);
+REGISTER_FST_OPERATION(Difference, LogArc, DifferenceArgs1);
+REGISTER_FST_OPERATION(Difference, Log64Arc, DifferenceArgs1);
+REGISTER_FST_OPERATION(Difference, StdArc, DifferenceArgs2);
+REGISTER_FST_OPERATION(Difference, LogArc, DifferenceArgs2);
+REGISTER_FST_OPERATION(Difference, Log64Arc, DifferenceArgs2);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/draw.cc b/src/script/draw.cc
new file mode 100644
index 0000000..b51c8b7
--- /dev/null
+++ b/src/script/draw.cc
@@ -0,0 +1,55 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <string>
+
+#include <fst/script/fst-class.h>
+#include <fst/script/draw.h>
+#include <fst/script/script-impl.h>
+
+namespace fst {
+namespace script {
+
+void DrawFst(const FstClass &fst,
+ const SymbolTable *isyms,
+ const SymbolTable *osyms,
+ const SymbolTable *ssyms,
+ bool accep,
+ const string &title,
+ float width,
+ float height,
+ bool portrait,
+ bool vertical,
+ float ranksep,
+ float nodesep,
+ int fontsize,
+ int precision,
+ bool show_weight_one,
+ ostream *ostrm,
+ const string &dest) {
+ FstDrawerArgs args(fst, isyms, osyms, ssyms, accep, title, width,
+ height, portrait, vertical, ranksep, nodesep,
+ fontsize, precision, show_weight_one, ostrm, dest);
+
+ Apply<Operation<FstDrawerArgs> >("DrawFst", fst.ArcType(), &args);
+}
+
+REGISTER_FST_OPERATION(DrawFst, StdArc, FstDrawerArgs);
+REGISTER_FST_OPERATION(DrawFst, LogArc, FstDrawerArgs);
+REGISTER_FST_OPERATION(DrawFst, Log64Arc, FstDrawerArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/encode.cc b/src/script/encode.cc
new file mode 100644
index 0000000..1464b34
--- /dev/null
+++ b/src/script/encode.cc
@@ -0,0 +1,37 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/encode.h>
+#include <fst/script/encode.h>
+
+namespace fst {
+namespace script {
+
+void Encode(MutableFstClass *ofst, uint32 flags, bool reuse_encoder,
+ const string &coder_fname) {
+ EncodeArgs args(ofst, flags, reuse_encoder, coder_fname);
+
+ Apply<Operation<EncodeArgs> >("Encode", ofst->ArcType(), &args);
+}
+
+REGISTER_FST_OPERATION(Encode, StdArc, EncodeArgs);
+REGISTER_FST_OPERATION(Encode, LogArc, EncodeArgs);
+REGISTER_FST_OPERATION(Encode, Log64Arc, EncodeArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/epsnormalize.cc b/src/script/epsnormalize.cc
new file mode 100644
index 0000000..ee9bbdf
--- /dev/null
+++ b/src/script/epsnormalize.cc
@@ -0,0 +1,37 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/epsnormalize.h>
+
+namespace fst {
+namespace script {
+
+void EpsNormalize(const FstClass &ifst, MutableFstClass *ofst,
+ EpsNormalizeType norm_type) {
+ if (!ArcTypesMatch(ifst, *ofst, "EpsNormalize")) return;
+
+ EpsNormalizeArgs args(ifst, ofst, norm_type);
+ Apply<Operation<EpsNormalizeArgs> >("EpsNormalize", ifst.ArcType(), &args);
+}
+
+REGISTER_FST_OPERATION(EpsNormalize, StdArc, EpsNormalizeArgs);
+REGISTER_FST_OPERATION(EpsNormalize, LogArc, EpsNormalizeArgs);
+REGISTER_FST_OPERATION(EpsNormalize, Log64Arc, EpsNormalizeArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/equal.cc b/src/script/equal.cc
new file mode 100644
index 0000000..da481a5
--- /dev/null
+++ b/src/script/equal.cc
@@ -0,0 +1,40 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/equal.h>
+
+namespace fst {
+namespace script {
+
+bool Equal(const FstClass &fst1, const FstClass &fst2, float kDelta) {
+ if (!ArcTypesMatch(fst1, fst2, "Equal")) return false;
+
+ EqualInnerArgs args(fst1, fst2, kDelta);
+ EqualArgs args_with_retval(args);
+
+ Apply<Operation<EqualArgs> >("Equal", fst1.ArcType(), &args_with_retval);
+
+ return args_with_retval.retval;
+}
+
+REGISTER_FST_OPERATION(Equal, StdArc, EqualArgs);
+REGISTER_FST_OPERATION(Equal, LogArc, EqualArgs);
+REGISTER_FST_OPERATION(Equal, Log64Arc, EqualArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/equivalent.cc b/src/script/equivalent.cc
new file mode 100644
index 0000000..bc0f785
--- /dev/null
+++ b/src/script/equivalent.cc
@@ -0,0 +1,42 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/equivalent.h>
+
+namespace fst {
+namespace script {
+
+bool Equivalent(const FstClass &fst1, const FstClass &fst2,
+ float delta) {
+ if (!ArcTypesMatch(fst1, fst2, "Equivalent")) return false;
+
+ EquivalentInnerArgs args(fst1, fst2, kDelta);
+ EquivalentArgs args_with_retval(args);
+
+ Apply<Operation<EquivalentArgs> >("Equivalent", fst1.ArcType(),
+ &args_with_retval);
+
+ return args_with_retval.retval;
+}
+
+REGISTER_FST_OPERATION(Equivalent, StdArc, EquivalentArgs);
+REGISTER_FST_OPERATION(Equivalent, LogArc, EquivalentArgs);
+REGISTER_FST_OPERATION(Equivalent, Log64Arc, EquivalentArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/fst-class.cc b/src/script/fst-class.cc
new file mode 100644
index 0000000..a784b08
--- /dev/null
+++ b/src/script/fst-class.cc
@@ -0,0 +1,141 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+// These classes are only recommended for use in high-level scripting
+// applications. Most users should use the lower-level templated versions
+// corresponding to these classes.
+
+#include <fst/script/fst-class.h>
+#include <fst/script/register.h>
+#include <fst/fst-decl.h>
+#include <fst/union.h>
+#include <fst/reverse.h>
+#include <fst/equal.h>
+
+namespace fst {
+namespace script {
+
+//
+// REGISTRATION
+//
+
+REGISTER_FST_CLASSES(StdArc);
+REGISTER_FST_CLASSES(LogArc);
+REGISTER_FST_CLASSES(Log64Arc);
+
+//
+// FST CLASS METHODS
+//
+
+template<class FstT>
+FstT *ReadFst(istream &in, const string &fname) {
+ if (!in) {
+ LOG(ERROR) << "ReadFst: Can't open file: " << fname;
+ return 0;
+ }
+
+ FstHeader hdr;
+ if (!hdr.Read(in, fname)) {
+ return 0;
+ }
+
+ FstReadOptions read_options(fname, &hdr);
+
+ typename IORegistration<FstT>::Register *reg =
+ IORegistration<FstT>::Register::GetRegister();
+
+ const typename IORegistration<FstT>::Reader reader =
+ reg->GetReader(hdr.ArcType());
+
+ if (!reader) {
+ LOG(ERROR) << "ReadFst : unknown arc type \""
+ << hdr.ArcType() << "\" : " << read_options.source;
+ return 0;
+ }
+
+ return reader(in, read_options);
+}
+
+FstClass *FstClass::Read(const string &fname) {
+ if (!fname.empty()) {
+ ifstream in(fname.c_str(), ifstream::in | ifstream::binary);
+ return ReadFst<FstClass>(in, fname);
+ } else {
+ return ReadFst<FstClass>(std::cin, "standard input");
+ }
+}
+
+//
+// MUTABLE FST CLASS METHODS
+//
+
+MutableFstClass *MutableFstClass::Read(const string &fname, bool convert) {
+ if (convert == false) {
+ if (!fname.empty()) {
+ ifstream in(fname.c_str(), ifstream::in | ifstream::binary);
+ return ReadFst<MutableFstClass>(in, fname);
+ } else {
+ return ReadFst<MutableFstClass>(std::cin, "standard input");
+ }
+ } else { // Converts to VectorFstClass if not mutable.
+ FstClass *ifst = FstClass::Read(fname);
+ if (!ifst) return 0;
+ if (ifst->Properties(fst::kMutable, false)) {
+ return static_cast<MutableFstClass *>(ifst);
+ } else {
+ MutableFstClass *ofst = new VectorFstClass(*ifst);
+ delete ifst;
+ return ofst;
+ }
+ }
+}
+
+//
+// VECTOR FST CLASS METHODS
+//
+
+IORegistration<VectorFstClass>::Entry GetVFSTRegisterEntry(
+ const string &arc_type) {
+ IORegistration<VectorFstClass>::Register *reg =
+ IORegistration<VectorFstClass>::Register::GetRegister();
+ const IORegistration<VectorFstClass>::Entry &entry = reg->GetEntry(arc_type);
+
+ if (entry.converter == 0) {
+ LOG(ERROR) << "Unknown arc type " << arc_type;
+ return entry;
+ }
+
+ return entry;
+}
+
+VectorFstClass::VectorFstClass(const FstClass &other)
+ : MutableFstClass(GetVFSTRegisterEntry(other.ArcType()).converter(other)) {
+}
+
+VectorFstClass::VectorFstClass(const string &arc_type)
+ : MutableFstClass(GetVFSTRegisterEntry(arc_type).creator()) { }
+
+VectorFstClass *VectorFstClass::Read(const string &fname) {
+ if (!fname.empty()) {
+ ifstream in(fname.c_str(), ifstream::in | ifstream::binary);
+ return ReadFst<VectorFstClass>(in, fname);
+ } else {
+ return ReadFst<VectorFstClass>(std::cin, "standard input");
+ }
+}
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/info.cc b/src/script/info.cc
new file mode 100644
index 0000000..018f46b
--- /dev/null
+++ b/src/script/info.cc
@@ -0,0 +1,39 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <string>
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/info.h>
+
+namespace fst {
+namespace script {
+
+void PrintFstInfo(const FstClass &f, bool test_properties,
+ const string &arc_filter, const string &info_type,
+ bool pipe, bool verify) {
+ InfoArgs args(f, test_properties, arc_filter, info_type, pipe, verify);
+
+ Apply<Operation<InfoArgs> >("PrintFstInfo", f.ArcType(), &args);
+}
+
+REGISTER_FST_OPERATION(PrintFstInfo, StdArc, InfoArgs);
+REGISTER_FST_OPERATION(PrintFstInfo, LogArc, InfoArgs);
+REGISTER_FST_OPERATION(PrintFstInfo, Log64Arc, InfoArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/intersect.cc b/src/script/intersect.cc
new file mode 100644
index 0000000..acb9c49
--- /dev/null
+++ b/src/script/intersect.cc
@@ -0,0 +1,50 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/intersect.h>
+
+namespace fst {
+namespace script {
+
+void Intersect(const FstClass &ifst1, const FstClass &ifst2,
+ MutableFstClass *ofst, ComposeFilter compose_filter) {
+ if (!ArcTypesMatch(ifst1, ifst2, "Intersect") ||
+ !ArcTypesMatch(*ofst, ifst1, "Intersect")) return;
+
+ IntersectArgs1 args(ifst1, ifst2, ofst, compose_filter);
+ Apply<Operation<IntersectArgs1> >("Intersect", ifst1.ArcType(), &args);
+}
+
+void Intersect(const FstClass &ifst1, const FstClass &ifst2,
+ MutableFstClass *ofst, const ComposeOptions &copts) {
+ if (!ArcTypesMatch(ifst1, ifst2, "Intersect") ||
+ !ArcTypesMatch(*ofst, ifst1, "Intersect")) return;
+
+ IntersectArgs2 args(ifst1, ifst2, ofst, copts);
+ Apply<Operation<IntersectArgs2> >("Intersect", ifst1.ArcType(), &args);
+}
+
+REGISTER_FST_OPERATION(Intersect, StdArc, IntersectArgs1);
+REGISTER_FST_OPERATION(Intersect, LogArc, IntersectArgs1);
+REGISTER_FST_OPERATION(Intersect, Log64Arc, IntersectArgs1);
+REGISTER_FST_OPERATION(Intersect, StdArc, IntersectArgs2);
+REGISTER_FST_OPERATION(Intersect, LogArc, IntersectArgs2);
+REGISTER_FST_OPERATION(Intersect, Log64Arc, IntersectArgs2);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/invert.cc b/src/script/invert.cc
new file mode 100644
index 0000000..a82f44d
--- /dev/null
+++ b/src/script/invert.cc
@@ -0,0 +1,33 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/invert.h>
+
+namespace fst {
+namespace script {
+
+void Invert(MutableFstClass *fst) {
+ Apply<Operation<MutableFstClass> >("Invert", fst->ArcType(), fst);
+}
+
+REGISTER_FST_OPERATION(Invert, StdArc, MutableFstClass);
+REGISTER_FST_OPERATION(Invert, LogArc, MutableFstClass);
+REGISTER_FST_OPERATION(Invert, Log64Arc, MutableFstClass);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/map.cc b/src/script/map.cc
new file mode 100644
index 0000000..2ef1086
--- /dev/null
+++ b/src/script/map.cc
@@ -0,0 +1,39 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/map.h>
+
+namespace fst {
+namespace script {
+
+FstClass *Map(const FstClass& ifst, MapType map_type,
+ float delta, const WeightClass &w) {
+ MapInnerArgs args(ifst, map_type, delta, w);
+ MapArgs args_with_retval(args);
+
+ Apply<Operation<MapArgs> >("Map", ifst.ArcType(), &args_with_retval);
+
+ return args_with_retval.retval;
+}
+
+REGISTER_FST_OPERATION(Map, StdArc, MapArgs);
+REGISTER_FST_OPERATION(Map, LogArc, MapArgs);
+REGISTER_FST_OPERATION(Map, Log64Arc, MapArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/minimize.cc b/src/script/minimize.cc
new file mode 100644
index 0000000..461d20a
--- /dev/null
+++ b/src/script/minimize.cc
@@ -0,0 +1,36 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/minimize.h>
+
+namespace fst {
+namespace script {
+
+void Minimize(MutableFstClass *ofst1, MutableFstClass *ofst2, float delta) {
+ if (ofst2 && !ArcTypesMatch(*ofst1, *ofst2, "Minimize")) return;
+ MinimizeArgs args(ofst1, ofst2, delta);
+
+ Apply<Operation<MinimizeArgs> >("Minimize", ofst1->ArcType(), &args);
+}
+
+REGISTER_FST_OPERATION(Minimize, StdArc, MinimizeArgs);
+REGISTER_FST_OPERATION(Minimize, LogArc, MinimizeArgs);
+REGISTER_FST_OPERATION(Minimize, Log64Arc, MinimizeArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/print.cc b/src/script/print.cc
new file mode 100644
index 0000000..4277d20
--- /dev/null
+++ b/src/script/print.cc
@@ -0,0 +1,41 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <string>
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/print.h>
+
+namespace fst {
+namespace script {
+
+void PrintFst(const FstClass &fst, ostream &ostrm, const string &dest,
+ const SymbolTable *isyms,
+ const SymbolTable *osyms,
+ const SymbolTable *ssyms,
+ bool accept, bool show_weight_one) {
+ FstPrinterArgs args(fst, isyms, osyms, ssyms, accept, show_weight_one,
+ &ostrm, dest);
+ Apply<Operation<FstPrinterArgs> >("PrintFst", fst.ArcType(), &args);
+}
+
+REGISTER_FST_OPERATION(PrintFst, StdArc, FstPrinterArgs);
+REGISTER_FST_OPERATION(PrintFst, LogArc, FstPrinterArgs);
+REGISTER_FST_OPERATION(PrintFst, Log64Arc, FstPrinterArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/project.cc b/src/script/project.cc
new file mode 100644
index 0000000..690684a
--- /dev/null
+++ b/src/script/project.cc
@@ -0,0 +1,35 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/project.h>
+
+namespace fst {
+namespace script {
+
+void Project(MutableFstClass *ofst, ProjectType project_type) {
+ ProjectArgs args(ofst, project_type);
+
+ Apply<Operation<ProjectArgs> >("Project", ofst->ArcType(), &args);
+}
+
+REGISTER_FST_OPERATION(Project, StdArc, ProjectArgs);
+REGISTER_FST_OPERATION(Project, LogArc, ProjectArgs);
+REGISTER_FST_OPERATION(Project, Log64Arc, ProjectArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/prune.cc b/src/script/prune.cc
new file mode 100644
index 0000000..36ca22f
--- /dev/null
+++ b/src/script/prune.cc
@@ -0,0 +1,76 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/prune.h>
+
+namespace fst {
+namespace script {
+
+
+// 1
+void Prune(MutableFstClass *fst, const PruneOptions &opts) {
+ PruneArgs1 args(fst, opts);
+
+ Apply<Operation<PruneArgs1> >("Prune", fst->ArcType(), &args);
+}
+
+// 2
+void Prune(const FstClass &ifst, MutableFstClass *fst,
+ const PruneOptions &opts) {
+ PruneArgs2 args(ifst, fst, opts);
+
+ Apply<Operation<PruneArgs2> >("Prune", fst->ArcType(), &args);
+}
+
+// 3
+void Prune(const FstClass &ifst,
+ MutableFstClass *ofst,
+ const WeightClass& weight_threshold,
+ int64 state_threshold, float delta) {
+ PruneArgs3 args(ifst, ofst, weight_threshold, state_threshold, delta);
+
+ Apply<Operation<PruneArgs3> >("Prune", ifst.ArcType(), &args);
+}
+
+// 4
+void Prune(MutableFstClass *fst, const WeightClass& weight_threshold,
+ int64 state_threshold, float delta) {
+ PruneArgs4 args(fst, weight_threshold, state_threshold, delta);
+
+ Apply<Operation<PruneArgs4> >("Prune", fst->ArcType(), &args);
+}
+
+// 1
+REGISTER_FST_OPERATION(Prune, StdArc, PruneArgs1);
+REGISTER_FST_OPERATION(Prune, LogArc, PruneArgs1);
+REGISTER_FST_OPERATION(Prune, Log64Arc, PruneArgs1);
+// 2
+REGISTER_FST_OPERATION(Prune, StdArc, PruneArgs2);
+REGISTER_FST_OPERATION(Prune, LogArc, PruneArgs2);
+REGISTER_FST_OPERATION(Prune, Log64Arc, PruneArgs2);
+// 3
+REGISTER_FST_OPERATION(Prune, StdArc, PruneArgs3);
+REGISTER_FST_OPERATION(Prune, LogArc, PruneArgs3);
+REGISTER_FST_OPERATION(Prune, Log64Arc, PruneArgs3);
+// 4
+REGISTER_FST_OPERATION(Prune, StdArc, PruneArgs4);
+REGISTER_FST_OPERATION(Prune, LogArc, PruneArgs4);
+REGISTER_FST_OPERATION(Prune, Log64Arc, PruneArgs4);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/push.cc b/src/script/push.cc
new file mode 100644
index 0000000..ed3d1d0
--- /dev/null
+++ b/src/script/push.cc
@@ -0,0 +1,49 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/push.h>
+
+namespace fst {
+namespace script {
+
+// 1
+void Push(MutableFstClass *ofst, ReweightType dir, float delta,
+ bool remove_total_weight) {
+ PushArgs1 args(ofst, dir, delta, remove_total_weight);
+ Apply<Operation<PushArgs1> >("Push", ofst->ArcType(), &args);
+}
+
+// 2
+void Push(const FstClass &ifst, MutableFstClass *ofst, uint32 flags,
+ ReweightType dir, float delta) {
+ if (!ArcTypesMatch(ifst, *ofst, "Push")) return;
+
+ PushArgs2 args(ifst, ofst, flags, dir, delta);
+ Apply<Operation<PushArgs2> >("Push", ifst.ArcType(), &args);
+}
+
+
+REGISTER_FST_OPERATION(Push, StdArc, PushArgs1);
+REGISTER_FST_OPERATION(Push, LogArc, PushArgs1);
+REGISTER_FST_OPERATION(Push, Log64Arc, PushArgs1);
+REGISTER_FST_OPERATION(Push, StdArc, PushArgs2);
+REGISTER_FST_OPERATION(Push, LogArc, PushArgs2);
+REGISTER_FST_OPERATION(Push, Log64Arc, PushArgs2);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/randequivalent.cc b/src/script/randequivalent.cc
new file mode 100644
index 0000000..eb44ee9
--- /dev/null
+++ b/src/script/randequivalent.cc
@@ -0,0 +1,61 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/randequivalent.h>
+
+namespace fst {
+namespace script {
+
+// 1
+bool RandEquivalent(const FstClass &fst1, const FstClass &fst2,
+ int32 seed, ssize_t num_paths, float delta,
+ int path_length) {
+ if (!ArcTypesMatch(fst1, fst2, "RandEquivalent")) return false;
+ RandEquivalentInnerArgs1 args(fst1, fst2, seed, num_paths, delta,
+ path_length);
+ RandEquivalentArgs1 args_with_retval(args);
+
+ Apply<Operation<RandEquivalentArgs1> >("RandEquivalent", fst1.ArcType(),
+ &args_with_retval);
+ return args_with_retval.retval;
+}
+
+// 2
+bool RandEquivalent(const FstClass &fst1, const FstClass &fst2, int32 seed,
+ ssize_t num_paths, float delta,
+ const RandGenOptions<RandArcSelection> &opts) {
+ if (!ArcTypesMatch(fst1, fst2, "RandEquivalent")) return false;
+
+ RandEquivalentInnerArgs2 args(fst1, fst2, seed, num_paths, delta, opts);
+ RandEquivalentArgs2 args_with_retval(args);
+
+ Apply<Operation<RandEquivalentArgs2> >(
+ "RandEquivalent", fst1.ArcType(), &args_with_retval);
+
+ return args_with_retval.retval;
+}
+
+REGISTER_FST_OPERATION(RandEquivalent, StdArc, RandEquivalentArgs1);
+REGISTER_FST_OPERATION(RandEquivalent, LogArc, RandEquivalentArgs1);
+REGISTER_FST_OPERATION(RandEquivalent, Log64Arc, RandEquivalentArgs1);
+REGISTER_FST_OPERATION(RandEquivalent, StdArc, RandEquivalentArgs2);
+REGISTER_FST_OPERATION(RandEquivalent, LogArc, RandEquivalentArgs2);
+REGISTER_FST_OPERATION(RandEquivalent, Log64Arc, RandEquivalentArgs2);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/randgen.cc b/src/script/randgen.cc
new file mode 100644
index 0000000..f0efc63
--- /dev/null
+++ b/src/script/randgen.cc
@@ -0,0 +1,37 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/randgen.h>
+
+namespace fst {
+namespace script {
+
+void RandGen(const FstClass &ifst, MutableFstClass *ofst, int32 seed,
+ const RandGenOptions<RandArcSelection> &opts) {
+ if (!ArcTypesMatch(ifst, *ofst, "RandGen")) return;
+
+ RandGenArgs args(ifst, ofst, seed, opts);
+ Apply<Operation<RandGenArgs> >("RandGen", ifst.ArcType(), &args);
+}
+
+REGISTER_FST_OPERATION(RandGen, StdArc, RandGenArgs);
+REGISTER_FST_OPERATION(RandGen, LogArc, RandGenArgs);
+REGISTER_FST_OPERATION(RandGen, Log64Arc, RandGenArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/relabel.cc b/src/script/relabel.cc
new file mode 100644
index 0000000..a214079
--- /dev/null
+++ b/src/script/relabel.cc
@@ -0,0 +1,68 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/relabel.h>
+
+namespace fst {
+namespace script {
+
+// 1
+void Relabel(MutableFstClass *ofst,
+ const SymbolTable *old_isyms, const SymbolTable *relabel_isyms,
+ bool attach_new_isyms,
+ const SymbolTable *old_osyms, const SymbolTable *relabel_osyms,
+ bool attach_new_osyms) {
+ RelabelArgs1 args(ofst, old_isyms, relabel_isyms, attach_new_isyms,
+ old_osyms, relabel_osyms, attach_new_osyms);
+ Apply<Operation<RelabelArgs1> >("Relabel", ofst->ArcType(), &args);
+}
+
+// 2
+void Relabel(MutableFstClass *ofst,
+ const vector<pair<int64, int64> > &ipairs,
+ const vector<pair<int64, int64> > &opairs) {
+ RelabelArgs2 args(ofst, ipairs, opairs);
+
+ Apply<Operation<RelabelArgs2> >("Relabel", ofst->ArcType(), &args);
+}
+
+// 3
+void Relabel(MutableFstClass *fst,
+ const SymbolTable *new_isymbols,
+ const SymbolTable *new_osymbols) {
+ RelabelArgs3 args(fst, new_isymbols, new_osymbols);
+ Apply<Operation<RelabelArgs3> >("Relabel", fst->ArcType(), &args);
+}
+
+// 1
+REGISTER_FST_OPERATION(Relabel, StdArc, RelabelArgs1);
+REGISTER_FST_OPERATION(Relabel, LogArc, RelabelArgs1);
+REGISTER_FST_OPERATION(Relabel, Log64Arc, RelabelArgs1);
+
+// 2
+REGISTER_FST_OPERATION(Relabel, StdArc, RelabelArgs2);
+REGISTER_FST_OPERATION(Relabel, LogArc, RelabelArgs2);
+REGISTER_FST_OPERATION(Relabel, Log64Arc, RelabelArgs2);
+
+// 3
+REGISTER_FST_OPERATION(Relabel, StdArc, RelabelArgs3);
+REGISTER_FST_OPERATION(Relabel, LogArc, RelabelArgs3);
+REGISTER_FST_OPERATION(Relabel, Log64Arc, RelabelArgs3);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/replace.cc b/src/script/replace.cc
new file mode 100644
index 0000000..40c09be
--- /dev/null
+++ b/src/script/replace.cc
@@ -0,0 +1,45 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/replace.h>
+
+namespace fst {
+namespace script {
+
+void Replace(const vector<pair<int64, const FstClass *> > &tuples,
+ MutableFstClass *ofst, const int64 &root,
+ bool epsilon_on_replace) {
+ for (unsigned i = 0; i < tuples.size() - 1; ++i) {
+ if (!ArcTypesMatch(*tuples[i].second, *tuples[i+1].second, "Replace")) {
+ return;
+ }
+ }
+
+ if (!ArcTypesMatch(*tuples[0].second, *ofst, "Replace")) return;
+
+ ReplaceArgs args(tuples, ofst, root, epsilon_on_replace);
+
+ Apply<Operation<ReplaceArgs> >("Replace", ofst->ArcType(), &args);
+}
+
+REGISTER_FST_OPERATION(Replace, StdArc, ReplaceArgs);
+REGISTER_FST_OPERATION(Replace, LogArc, ReplaceArgs);
+REGISTER_FST_OPERATION(Replace, Log64Arc, ReplaceArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/reverse.cc b/src/script/reverse.cc
new file mode 100644
index 0000000..c1c7305
--- /dev/null
+++ b/src/script/reverse.cc
@@ -0,0 +1,37 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/reverse.h>
+
+namespace fst {
+namespace script {
+
+void Reverse(const FstClass &fst1, MutableFstClass *fst2) {
+ if (!ArcTypesMatch(fst1, *fst2, "Reverse")) return;
+
+ ReverseArgs args(fst1, fst2);
+
+ Apply<Operation<ReverseArgs> >("Reverse", fst1.ArcType(), &args);
+}
+
+REGISTER_FST_OPERATION(Reverse, StdArc, ReverseArgs);
+REGISTER_FST_OPERATION(Reverse, LogArc, ReverseArgs);
+REGISTER_FST_OPERATION(Reverse, Log64Arc, ReverseArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/reweight.cc b/src/script/reweight.cc
new file mode 100644
index 0000000..8f2dfd5
--- /dev/null
+++ b/src/script/reweight.cc
@@ -0,0 +1,36 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/reweight.h>
+
+namespace fst {
+namespace script {
+
+void Reweight(MutableFstClass *fst, const vector<WeightClass> &potential,
+ ReweightType reweight_type) {
+ ReweightArgs args(fst, potential, reweight_type);
+
+ Apply<Operation<ReweightArgs> >("Reweight", fst->ArcType(), &args);
+}
+
+REGISTER_FST_OPERATION(Reweight, StdArc, ReweightArgs);
+REGISTER_FST_OPERATION(Reweight, LogArc, ReweightArgs);
+REGISTER_FST_OPERATION(Reweight, Log64Arc, ReweightArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/rmepsilon.cc b/src/script/rmepsilon.cc
new file mode 100644
index 0000000..4d217ea
--- /dev/null
+++ b/src/script/rmepsilon.cc
@@ -0,0 +1,61 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/rmepsilon.h>
+
+namespace fst {
+namespace script {
+
+void RmEpsilon(const FstClass &ifst, MutableFstClass *ofst,
+ bool reverse, const RmEpsilonOptions &opts) {
+ if (!ArcTypesMatch(ifst, *ofst, "RmEpsilon")) return;
+
+ RmEpsilonArgs1 args(ifst, ofst, reverse, opts);
+
+ Apply<Operation<RmEpsilonArgs1> >("RmEpsilon", ifst.ArcType(), &args);
+}
+
+void RmEpsilon(MutableFstClass *fst, bool connect,
+ const WeightClass &weight_threshold,
+ int64 state_threshold, float delta) {
+ RmEpsilonArgs2 args(fst, connect, weight_threshold, state_threshold, delta);
+
+ Apply<Operation<RmEpsilonArgs2> >("RmEpsilon", fst->ArcType(), &args);
+}
+
+void RmEpsilon(MutableFstClass *fst, vector<WeightClass> *distance,
+ const RmEpsilonOptions &opts) {
+ RmEpsilonArgs3 args(fst, distance, opts);
+
+ Apply<Operation<RmEpsilonArgs3> >("RmEpsilon", fst->ArcType(), &args);
+}
+
+REGISTER_FST_OPERATION(RmEpsilon, StdArc, RmEpsilonArgs1);
+REGISTER_FST_OPERATION(RmEpsilon, LogArc, RmEpsilonArgs1);
+REGISTER_FST_OPERATION(RmEpsilon, Log64Arc, RmEpsilonArgs1);
+
+REGISTER_FST_OPERATION(RmEpsilon, StdArc, RmEpsilonArgs2);
+REGISTER_FST_OPERATION(RmEpsilon, LogArc, RmEpsilonArgs2);
+REGISTER_FST_OPERATION(RmEpsilon, Log64Arc, RmEpsilonArgs2);
+
+REGISTER_FST_OPERATION(RmEpsilon, StdArc, RmEpsilonArgs3);
+REGISTER_FST_OPERATION(RmEpsilon, LogArc, RmEpsilonArgs3);
+REGISTER_FST_OPERATION(RmEpsilon, Log64Arc, RmEpsilonArgs3);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/script-impl.cc b/src/script/script-impl.cc
new file mode 100644
index 0000000..96cac72
--- /dev/null
+++ b/src/script/script-impl.cc
@@ -0,0 +1,39 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <string>
+
+#include <fst/script/script-impl.h>
+
+namespace fst {
+namespace script {
+
+//
+// Utility function for checking that arc types match.
+//
+bool ArcTypesMatch(const FstClass &a, const FstClass &b,
+ const string &op_name) {
+ if (a.ArcType() != b.ArcType()) {
+ LOG(ERROR) << "FSTs with non-matching arc types passed to " << op_name
+ << ":\n\t" << a.ArcType() << " and " << b.ArcType();
+ return false;
+ } else {
+ return true;
+ }
+}
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/shortest-distance.cc b/src/script/shortest-distance.cc
new file mode 100644
index 0000000..49ee09b
--- /dev/null
+++ b/src/script/shortest-distance.cc
@@ -0,0 +1,66 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/shortest-distance.h>
+
+namespace fst {
+namespace script {
+
+// 1
+void ShortestDistance(const FstClass &fst, vector<WeightClass> *distance,
+ const ShortestDistanceOptions &opts) {
+ ShortestDistanceArgs1 args(fst, distance, opts);
+
+ Apply<Operation<ShortestDistanceArgs1> >("ShortestDistance", fst.ArcType(),
+ &args);
+}
+
+// 2
+void ShortestDistance(const FstClass &ifst, vector<WeightClass> *distance,
+ bool reverse, double delta) {
+ ShortestDistanceArgs2 args(ifst, distance, reverse, delta);
+
+ Apply<Operation<ShortestDistanceArgs2> >("ShortestDistance", ifst.ArcType(),
+ &args);
+}
+
+// 3
+WeightClass ShortestDistance(const FstClass &ifst) {
+ ShortestDistanceArgs3 args(ifst);
+
+ Apply<Operation<ShortestDistanceArgs3> >("ShortestDistance", ifst.ArcType(),
+ &args);
+
+ return args.retval;
+}
+
+REGISTER_FST_OPERATION(ShortestDistance, StdArc, ShortestDistanceArgs1);
+REGISTER_FST_OPERATION(ShortestDistance, LogArc, ShortestDistanceArgs1);
+REGISTER_FST_OPERATION(ShortestDistance, Log64Arc, ShortestDistanceArgs1);
+
+REGISTER_FST_OPERATION(ShortestDistance, StdArc, ShortestDistanceArgs2);
+REGISTER_FST_OPERATION(ShortestDistance, LogArc, ShortestDistanceArgs2);
+REGISTER_FST_OPERATION(ShortestDistance, Log64Arc, ShortestDistanceArgs2);
+
+REGISTER_FST_OPERATION(ShortestDistance, StdArc, ShortestDistanceArgs3);
+REGISTER_FST_OPERATION(ShortestDistance, LogArc, ShortestDistanceArgs3);
+REGISTER_FST_OPERATION(ShortestDistance, Log64Arc, ShortestDistanceArgs3);
+
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/shortest-path.cc b/src/script/shortest-path.cc
new file mode 100644
index 0000000..75749a0
--- /dev/null
+++ b/src/script/shortest-path.cc
@@ -0,0 +1,53 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/shortest-path.h>
+
+namespace fst {
+namespace script {
+
+void ShortestPath(const FstClass &ifst, MutableFstClass *ofst,
+ vector<WeightClass> *distance,
+ const ShortestPathOptions &opts) {
+ if (!ArcTypesMatch(ifst, *ofst, "ShortestPath")) return;
+
+ ShortestPathArgs1 args(ifst, ofst, distance, opts);
+ Apply<Operation<ShortestPathArgs1> >("ShortestPath", ifst.ArcType(), &args);
+}
+
+void ShortestPath(const FstClass &ifst, MutableFstClass *ofst,
+ size_t n, bool unique, bool first_path,
+ WeightClass weight_threshold, int64 state_threshold) {
+ if (!ArcTypesMatch(ifst, *ofst, "ShortestPath")) return;
+
+ ShortestPathArgs2 args(ifst, ofst, n, unique, first_path, weight_threshold,
+ state_threshold);
+ Apply<Operation<ShortestPathArgs2> >("ShortestPath", ifst.ArcType(), &args);
+}
+
+
+REGISTER_FST_OPERATION(ShortestPath, StdArc, ShortestPathArgs1);
+REGISTER_FST_OPERATION(ShortestPath, LogArc, ShortestPathArgs1);
+REGISTER_FST_OPERATION(ShortestPath, Log64Arc, ShortestPathArgs1);
+
+REGISTER_FST_OPERATION(ShortestPath, StdArc, ShortestPathArgs2);
+REGISTER_FST_OPERATION(ShortestPath, LogArc, ShortestPathArgs2);
+REGISTER_FST_OPERATION(ShortestPath, Log64Arc, ShortestPathArgs2);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/synchronize.cc b/src/script/synchronize.cc
new file mode 100644
index 0000000..edb09c6
--- /dev/null
+++ b/src/script/synchronize.cc
@@ -0,0 +1,36 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/synchronize.h>
+
+namespace fst {
+namespace script {
+
+void Synchronize(const FstClass &ifst, MutableFstClass *ofst) {
+ if (!ArcTypesMatch(ifst, *ofst, "Synchronize")) return;
+
+ SynchronizeArgs args(ifst, ofst);
+ Apply<Operation<SynchronizeArgs> >("Synchronize", ifst.ArcType(), &args);
+}
+
+REGISTER_FST_OPERATION(Synchronize, StdArc, SynchronizeArgs);
+REGISTER_FST_OPERATION(Synchronize, LogArc, SynchronizeArgs);
+REGISTER_FST_OPERATION(Synchronize, Log64Arc, SynchronizeArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/text-io.cc b/src/script/text-io.cc
new file mode 100644
index 0000000..2f5efab
--- /dev/null
+++ b/src/script/text-io.cc
@@ -0,0 +1,95 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/text-io.h>
+
+#include <cstring>
+#include <sstream>
+#include <utility>
+using std::pair; using std::make_pair;
+
+#include <fst/types.h>
+#include <fst/util.h>
+
+namespace fst {
+namespace script {
+
+// Reads vector of weights; returns true on success.
+bool ReadPotentials(const string &weight_type,
+ const string& filename,
+ vector<WeightClass>* potential) {
+ ifstream strm(filename.c_str());
+ if (!strm) {
+ LOG(ERROR) << "ReadPotentials: Can't open file: " << filename;
+ return false;
+ }
+
+ const int kLineLen = 8096;
+ char line[kLineLen];
+ size_t nline = 0;
+
+ potential->clear();
+ while (strm.getline(line, kLineLen)) {
+ ++nline;
+ vector<char *> col;
+ SplitToVector(line, "\n\t ", &col, true);
+ if (col.size() == 0 || col[0][0] == '\0') // empty line
+ continue;
+ if (col.size() != 2) {
+ LOG(ERROR) << "ReadPotentials: Bad number of columns, "
+ << "file = " << filename << ", line = " << nline;
+ return false;
+ }
+
+ ssize_t s = StrToInt64(col[0], filename, nline, false);
+ WeightClass weight(weight_type, col[1]);
+
+ while (potential->size() <= s)
+ potential->push_back(WeightClass::Zero());
+ (*potential)[s] = weight;
+ }
+ return true;
+}
+
+// Writes vector of weights; returns true on success.
+bool WritePotentials(const string& filename,
+ const vector<WeightClass>& potential) {
+ ostream *strm = &std::cout;
+ if (!filename.empty()) {
+ strm = new ofstream(filename.c_str());
+ if (!*strm) {
+ LOG(ERROR) << "WritePotentials: Can't open file: " << filename;
+ delete strm;
+ return false;
+ }
+ }
+
+ strm->precision(9);
+ for (ssize_t s = 0; s < potential.size(); ++s)
+ *strm << s << "\t" << potential[s] << "\n";
+
+ if (!*strm)
+ LOG(ERROR) << "WritePotentials: Write failed: "
+ << (filename.empty() ? "standard output" : filename);
+ bool ret = *strm;
+ if (strm != &std::cout)
+ delete strm;
+ return ret;
+}
+
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/topsort.cc b/src/script/topsort.cc
new file mode 100644
index 0000000..65cc85c
--- /dev/null
+++ b/src/script/topsort.cc
@@ -0,0 +1,37 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/topsort.h>
+
+namespace fst {
+namespace script {
+
+bool TopSort(MutableFstClass *fst) {
+ TopSortArgs args(fst);
+
+ Apply<Operation<TopSortArgs> >("TopSort", fst->ArcType(), &args);
+
+ return args.retval;
+}
+
+REGISTER_FST_OPERATION(TopSort, StdArc, TopSortArgs);
+REGISTER_FST_OPERATION(TopSort, LogArc, TopSortArgs);
+REGISTER_FST_OPERATION(TopSort, Log64Arc, TopSortArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/union.cc b/src/script/union.cc
new file mode 100644
index 0000000..21b8a44
--- /dev/null
+++ b/src/script/union.cc
@@ -0,0 +1,37 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/union.h>
+
+namespace fst {
+namespace script {
+
+void Union(MutableFstClass *fst1, const FstClass &fst2) {
+ if (!ArcTypesMatch(*fst1, fst2, "Union")) return;
+
+ UnionArgs args(fst1, fst2);
+
+ Apply<Operation<UnionArgs> >("Union", fst1->ArcType(), &args);
+}
+
+REGISTER_FST_OPERATION(Union, StdArc, UnionArgs);
+REGISTER_FST_OPERATION(Union, LogArc, UnionArgs);
+REGISTER_FST_OPERATION(Union, Log64Arc, UnionArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/verify.cc b/src/script/verify.cc
new file mode 100644
index 0000000..48e9af4
--- /dev/null
+++ b/src/script/verify.cc
@@ -0,0 +1,37 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <fst/script/fst-class.h>
+#include <fst/script/script-impl.h>
+#include <fst/script/verify.h>
+
+namespace fst {
+namespace script {
+
+bool Verify(const FstClass &fst) {
+ VerifyArgs args(&fst);
+
+ Apply<Operation<VerifyArgs> >("Verify", fst.ArcType(), &args);
+
+ return args.retval;
+}
+
+REGISTER_FST_OPERATION(Verify, StdArc, VerifyArgs);
+REGISTER_FST_OPERATION(Verify, LogArc, VerifyArgs);
+REGISTER_FST_OPERATION(Verify, Log64Arc, VerifyArgs);
+
+} // namespace script
+} // namespace fst
diff --git a/src/script/weight-class.cc b/src/script/weight-class.cc
new file mode 100644
index 0000000..2a586d7
--- /dev/null
+++ b/src/script/weight-class.cc
@@ -0,0 +1,45 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: jpr@google.com (Jake Ratkiewicz)
+
+#include <string>
+
+#include <fst/arc.h>
+#include <fst/script/weight-class.h>
+
+namespace fst {
+namespace script {
+
+REGISTER_FST_WEIGHT(StdArc::Weight);
+REGISTER_FST_WEIGHT(LogArc::Weight);
+REGISTER_FST_WEIGHT(Log64Arc::Weight);
+
+WeightClass::WeightClass(const string &weight_type,
+ const string &weight_str)
+ : element_type_(OTHER) {
+ WeightClassRegister *reg = WeightClassRegister::GetRegister();
+
+ StrToWeightImplBaseT stw = reg->GetEntry(weight_type);
+
+ impl_ = stw(weight_str, "WeightClass", 0);
+};
+
+ostream& operator << (ostream &o, const WeightClass &c) {
+ c.impl_->Print(&o);
+ return o;
+}
+
+} // namespace script
+} // namespace fst
diff --git a/src/test/Makefile.am b/src/test/Makefile.am
new file mode 100644
index 0000000..1456436
--- /dev/null
+++ b/src/test/Makefile.am
@@ -0,0 +1,12 @@
+AM_CPPFLAGS = -I$(srcdir)/../include $(ICU_CPPFLAGS)
+LDADD = ../lib/libfst.la -lm -ldl
+
+check_PROGRAMS = fst_test weight_test algo_test
+
+fst_test_SOURCES = fst_test.cc fst_test.h
+
+weight_test_SOURCES = weight_test.cc weight-tester.h
+
+algo_test_SOURCES = algo_test.cc algo_test.h
+
+TESTS = fst_test weight_test algo_test
diff --git a/src/test/Makefile.in b/src/test/Makefile.in
new file mode 100644
index 0000000..00fe859
--- /dev/null
+++ b/src/test/Makefile.in
@@ -0,0 +1,608 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+check_PROGRAMS = fst_test$(EXEEXT) weight_test$(EXEEXT) \
+ algo_test$(EXEEXT)
+TESTS = fst_test$(EXEEXT) weight_test$(EXEEXT) algo_test$(EXEEXT)
+subdir = src/test
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_icu.m4 \
+ $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+ $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+ $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h \
+ $(top_builddir)/src/include/fst/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am_algo_test_OBJECTS = algo_test.$(OBJEXT)
+algo_test_OBJECTS = $(am_algo_test_OBJECTS)
+algo_test_LDADD = $(LDADD)
+algo_test_DEPENDENCIES = ../lib/libfst.la
+am_fst_test_OBJECTS = fst_test.$(OBJEXT)
+fst_test_OBJECTS = $(am_fst_test_OBJECTS)
+fst_test_LDADD = $(LDADD)
+fst_test_DEPENDENCIES = ../lib/libfst.la
+am_weight_test_OBJECTS = weight_test.$(OBJEXT)
+weight_test_OBJECTS = $(am_weight_test_OBJECTS)
+weight_test_LDADD = $(LDADD)
+weight_test_DEPENDENCIES = ../lib/libfst.la
+DEFAULT_INCLUDES =
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+SOURCES = $(algo_test_SOURCES) $(fst_test_SOURCES) \
+ $(weight_test_SOURCES)
+DIST_SOURCES = $(algo_test_SOURCES) $(fst_test_SOURCES) \
+ $(weight_test_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+am__tty_colors = \
+red=; grn=; lgn=; blu=; std=
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+ICU_CFLAGS = @ICU_CFLAGS@
+ICU_CONFIG = @ICU_CONFIG@
+ICU_CPPFLAGS = @ICU_CPPFLAGS@
+ICU_CXXFLAGS = @ICU_CXXFLAGS@
+ICU_LIBS = @ICU_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+libfstdir = @libfstdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lt_ECHO = @lt_ECHO@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AM_CPPFLAGS = -I$(srcdir)/../include $(ICU_CPPFLAGS)
+LDADD = ../lib/libfst.la -lm -ldl
+fst_test_SOURCES = fst_test.cc fst_test.h
+weight_test_SOURCES = weight_test.cc weight-tester.h
+algo_test_SOURCES = algo_test.cc algo_test.h
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .cc .lo .o .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/test/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/test/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-checkPROGRAMS:
+ @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \
+ echo " rm -f" $$list; \
+ rm -f $$list || exit $$?; \
+ test -n "$(EXEEXT)" || exit 0; \
+ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f" $$list; \
+ rm -f $$list
+algo_test$(EXEEXT): $(algo_test_OBJECTS) $(algo_test_DEPENDENCIES)
+ @rm -f algo_test$(EXEEXT)
+ $(CXXLINK) $(algo_test_OBJECTS) $(algo_test_LDADD) $(LIBS)
+fst_test$(EXEEXT): $(fst_test_OBJECTS) $(fst_test_DEPENDENCIES)
+ @rm -f fst_test$(EXEEXT)
+ $(CXXLINK) $(fst_test_OBJECTS) $(fst_test_LDADD) $(LIBS)
+weight_test$(EXEEXT): $(weight_test_OBJECTS) $(weight_test_DEPENDENCIES)
+ @rm -f weight_test$(EXEEXT)
+ $(CXXLINK) $(weight_test_OBJECTS) $(weight_test_LDADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/algo_test.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fst_test.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/weight_test.Po@am__quote@
+
+.cc.o:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $<
+
+.cc.obj:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cc.lo:
+@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(LTCXXCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+check-TESTS: $(TESTS)
+ @failed=0; all=0; xfail=0; xpass=0; skip=0; \
+ srcdir=$(srcdir); export srcdir; \
+ list=' $(TESTS) '; \
+ $(am__tty_colors); \
+ if test -n "$$list"; then \
+ for tst in $$list; do \
+ if test -f ./$$tst; then dir=./; \
+ elif test -f $$tst; then dir=; \
+ else dir="$(srcdir)/"; fi; \
+ if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \
+ all=`expr $$all + 1`; \
+ case " $(XFAIL_TESTS) " in \
+ *[\ \ ]$$tst[\ \ ]*) \
+ xpass=`expr $$xpass + 1`; \
+ failed=`expr $$failed + 1`; \
+ col=$$red; res=XPASS; \
+ ;; \
+ *) \
+ col=$$grn; res=PASS; \
+ ;; \
+ esac; \
+ elif test $$? -ne 77; then \
+ all=`expr $$all + 1`; \
+ case " $(XFAIL_TESTS) " in \
+ *[\ \ ]$$tst[\ \ ]*) \
+ xfail=`expr $$xfail + 1`; \
+ col=$$lgn; res=XFAIL; \
+ ;; \
+ *) \
+ failed=`expr $$failed + 1`; \
+ col=$$red; res=FAIL; \
+ ;; \
+ esac; \
+ else \
+ skip=`expr $$skip + 1`; \
+ col=$$blu; res=SKIP; \
+ fi; \
+ echo "$${col}$$res$${std}: $$tst"; \
+ done; \
+ if test "$$all" -eq 1; then \
+ tests="test"; \
+ All=""; \
+ else \
+ tests="tests"; \
+ All="All "; \
+ fi; \
+ if test "$$failed" -eq 0; then \
+ if test "$$xfail" -eq 0; then \
+ banner="$$All$$all $$tests passed"; \
+ else \
+ if test "$$xfail" -eq 1; then failures=failure; else failures=failures; fi; \
+ banner="$$All$$all $$tests behaved as expected ($$xfail expected $$failures)"; \
+ fi; \
+ else \
+ if test "$$xpass" -eq 0; then \
+ banner="$$failed of $$all $$tests failed"; \
+ else \
+ if test "$$xpass" -eq 1; then passes=pass; else passes=passes; fi; \
+ banner="$$failed of $$all $$tests did not behave as expected ($$xpass unexpected $$passes)"; \
+ fi; \
+ fi; \
+ dashes="$$banner"; \
+ skipped=""; \
+ if test "$$skip" -ne 0; then \
+ if test "$$skip" -eq 1; then \
+ skipped="($$skip test was not run)"; \
+ else \
+ skipped="($$skip tests were not run)"; \
+ fi; \
+ test `echo "$$skipped" | wc -c` -le `echo "$$banner" | wc -c` || \
+ dashes="$$skipped"; \
+ fi; \
+ report=""; \
+ if test "$$failed" -ne 0 && test -n "$(PACKAGE_BUGREPORT)"; then \
+ report="Please report to $(PACKAGE_BUGREPORT)"; \
+ test `echo "$$report" | wc -c` -le `echo "$$banner" | wc -c` || \
+ dashes="$$report"; \
+ fi; \
+ dashes=`echo "$$dashes" | sed s/./=/g`; \
+ if test "$$failed" -eq 0; then \
+ echo "$$grn$$dashes"; \
+ else \
+ echo "$$red$$dashes"; \
+ fi; \
+ echo "$$banner"; \
+ test -z "$$skipped" || echo "$$skipped"; \
+ test -z "$$report" || echo "$$report"; \
+ echo "$$dashes$$std"; \
+ test "$$failed" -eq 0; \
+ else :; fi
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+ $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
+ $(MAKE) $(AM_MAKEFLAGS) check-TESTS
+check: check-am
+all-am: Makefile
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-checkPROGRAMS clean-generic clean-libtool \
+ mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: check-am install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
+ clean-checkPROGRAMS clean-generic clean-libtool ctags \
+ distclean distclean-compile distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-data \
+ install-data-am install-dvi install-dvi-am install-exec \
+ install-exec-am install-html install-html-am install-info \
+ install-info-am install-man install-pdf install-pdf-am \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/test/algo_test.cc b/src/test/algo_test.cc
new file mode 100644
index 0000000..9538d54
--- /dev/null
+++ b/src/test/algo_test.cc
@@ -0,0 +1,155 @@
+// algo_test.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Regression test for various FST algorithms.
+
+#include "./algo_test.h"
+
+// These determine which semirings are tested. Defining at least
+// TEST_TROPICAL and TEST_LOG is recommended. More increase the
+// comprehensiveness, but also increase the compilation time.
+
+#define TEST_TROPICAL
+#define TEST_LOG
+// #define TEST_MINMAX
+// #define TEST_LEFT_STRING
+// #define TEST_RIGHT_STRING
+// #define TEST_GALLIC
+// #define TEST_LEXICOGRAPHIC
+// #define TEST_POWER
+
+DEFINE_int32(seed, -1, "random seed");
+DEFINE_int32(repeat, 25, "number of test repetitions");
+
+using fst::StdArc;
+using fst::TropicalWeightGenerator;
+
+using fst::LogArc;
+using fst::LogWeightGenerator;
+
+using fst::MinMaxArc;
+using fst::MinMaxWeightGenerator;
+
+using fst::StringArc;
+using fst::StringWeightGenerator;
+using fst::STRING_LEFT;
+using fst::STRING_RIGHT;
+
+using fst::GallicArc;
+using fst::GallicWeightGenerator;
+
+using fst::LexicographicArc;
+using fst::TropicalWeight;
+using fst::LexicographicWeightGenerator;
+
+using fst::ArcTpl;
+using fst::PowerWeight;
+using fst::PowerWeightGenerator;
+
+using fst::AlgoTester;
+
+int main(int argc, char **argv) {
+ FLAGS_fst_verify_properties = true;
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(argv[0], &argc, &argv, true);
+
+ static const int kCacheGcLimit = 20;
+
+ int seed = FLAGS_seed >= 0 ? FLAGS_seed : time(0);
+ srand(seed);
+ LOG(INFO) << "Seed = " << seed;
+
+ FLAGS_fst_default_cache_gc = rand() % 2;
+ FLAGS_fst_default_cache_gc_limit = rand() % kCacheGcLimit;
+ VLOG(1) << "default_cache_gc:" << FLAGS_fst_default_cache_gc;
+ VLOG(1) << "default_cache_gc_limit:" << FLAGS_fst_default_cache_gc_limit;
+
+#ifdef TEST_TROPICAL
+ TropicalWeightGenerator tropical_generator(seed, false);
+ AlgoTester<StdArc, TropicalWeightGenerator>
+ tropical_tester(tropical_generator, seed);
+ tropical_tester.Test();
+#endif // TEST_TROPICAL
+
+#ifdef TEST_LOG
+ LogWeightGenerator log_generator(seed, false);
+ AlgoTester<LogArc, LogWeightGenerator>
+ log_tester(log_generator, seed);
+ log_tester.Test();
+#endif // TEST_LOG
+
+#ifdef TEST_MINMAX
+ MinMaxWeightGenerator minmax_generator(seed, false);
+ AlgoTester<MinMaxArc, MinMaxWeightGenerator>
+ minmax_tester(minmax_generator, seed);
+ minmax_tester.Test();
+#endif
+
+#ifdef TEST_LEFT_STRING
+ StringWeightGenerator<int> left_string_generator(seed, false);
+ AlgoTester<StringArc<>, StringWeightGenerator<int> >
+ left_string_tester(left_string_generator, seed);
+ left_string_tester.Test();
+#endif // TEST_LEFT_STRING
+
+#ifdef TEST_RIGHT_STRING
+ StringWeightGenerator<int, STRING_RIGHT> right_string_generator(seed, false);
+ AlgoTester<StringArc<STRING_RIGHT>,
+ StringWeightGenerator<int, STRING_RIGHT> >
+ right_string_tester(right_string_generator, seed);
+ right_string_tester.Test();
+#endif // TEST_RIGHT_STRING
+
+#ifdef TEST_GALLIC
+ typedef GallicArc<StdArc> StdGallicArc;
+ typedef GallicWeightGenerator<int, TropicalWeightGenerator>
+ TropicalGallicWeightGenerator;
+
+ TropicalGallicWeightGenerator tropical_gallic_generator(seed, false);
+ AlgoTester<StdGallicArc, TropicalGallicWeightGenerator>
+ gallic_tester(tropical_gallic_generator, seed);
+ gallic_tester.Test();
+#endif // TEST_GALLIC
+
+#ifdef TEST_LEXICOGRAPHIC
+ typedef LexicographicArc<TropicalWeight, TropicalWeight>
+ TropicalLexicographicArc;
+ typedef LexicographicWeightGenerator<TropicalWeightGenerator,
+ TropicalWeightGenerator> TropicalLexicographicWeightGenerator;
+ TropicalLexicographicWeightGenerator lexicographic_generator(seed, false);
+ AlgoTester<TropicalLexicographicArc, TropicalLexicographicWeightGenerator>
+ lexicographic_tester(lexicographic_generator, seed);
+ lexicographic_tester.Test();
+#endif // TEST_LEXICOGRAPHIC
+
+#ifdef TEST_POWER
+ typedef PowerWeight<TropicalWeight, 3> TropicalCubeWeight;
+ typedef ArcTpl<TropicalCubeWeight> TropicalCubeArc;
+ typedef PowerWeightGenerator<TropicalWeightGenerator, 3>
+ TropicalCubeWeightGenerator;
+
+ TropicalCubeWeightGenerator tropical_cube_generator(seed, false);
+ AlgoTester<TropicalCubeArc, TropicalCubeWeightGenerator>
+ tropical_cube_tester(tropical_cube_generator, seed);
+ tropical_cube_tester.Test();
+#endif // TEST_POWER
+
+ cout << "PASS" << endl;
+
+ return 0;
+}
diff --git a/src/test/algo_test.h b/src/test/algo_test.h
new file mode 100644
index 0000000..3aca3cc
--- /dev/null
+++ b/src/test/algo_test.h
@@ -0,0 +1,1315 @@
+// algo_test.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Regression test for various FST algorithms.
+
+#ifndef FST_TEST_ALGO_TEST_H__
+#define FST_TEST_ALGO_TEST_H__
+
+#include <fst/fstlib.h>
+#include <fst/random-weight.h>
+
+DECLARE_int32(repeat); // defined in ./algo_test.cc
+
+namespace fst {
+
+// Mapper to change input and output label of every transition into
+// epsilons.
+template <class A>
+class EpsMapper {
+ public:
+ EpsMapper() {}
+
+ A operator()(const A &arc) const {
+ return A(0, 0, arc.weight, arc.nextstate);
+ }
+
+ uint64 Properties(uint64 props) const {
+ props &= ~kNotAcceptor;
+ props |= kAcceptor;
+ props &= ~kNoIEpsilons & ~kNoOEpsilons & ~kNoEpsilons;
+ props |= kIEpsilons | kOEpsilons | kEpsilons;
+ props &= ~kNotILabelSorted & ~kNotOLabelSorted;
+ props |= kILabelSorted | kOLabelSorted;
+ return props;
+ }
+
+ MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
+
+
+ MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+
+ MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
+};
+
+// This class tests a variety of identities and properties that must
+// hold for various algorithms on weighted FSTs.
+template <class Arc, class WeightGenerator>
+class WeightedTester {
+ public:
+ typedef typename Arc::Label Label;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ WeightedTester(int seed, const Fst<Arc> &zero_fst, const Fst<Arc> &one_fst,
+ const Fst<Arc> &univ_fst, WeightGenerator *weight_generator)
+ : seed_(seed), zero_fst_(zero_fst), one_fst_(one_fst),
+ univ_fst_(univ_fst), weight_generator_(weight_generator) {}
+
+ void Test(const Fst<Arc> &T1, const Fst<Arc> &T2, const Fst<Arc> &T3) {
+ TestRational(T1, T2, T3);
+ TestMap(T1);
+ TestCompose(T1, T2, T3);
+ TestSort(T1);
+ TestOptimize(T1);
+ TestSearch(T1);
+ }
+
+ private:
+ // Tests rational operations with identities
+ void TestRational(const Fst<Arc> &T1, const Fst<Arc> &T2,
+ const Fst<Arc> &T3) {
+
+ {
+ VLOG(1) << "Check destructive and delayed union are equivalent.";
+ VectorFst<Arc> U1(T1);
+ Union(&U1, T2);
+ UnionFst<Arc> U2(T1, T2);
+ CHECK(Equiv(U1, U2));
+ }
+
+
+ {
+ VLOG(1) << "Check destructive and delayed concatenation are equivalent.";
+ VectorFst<Arc> C1(T1);
+ Concat(&C1, T2);
+ ConcatFst<Arc> C2(T1, T2);
+ CHECK(Equiv(C1, C2));
+ VectorFst<Arc> C3(T2);
+ Concat(T1, &C3);
+ CHECK(Equiv(C3, C2));
+ }
+
+ {
+ VLOG(1) << "Check destructive and delayed closure* are equivalent.";
+ VectorFst<Arc> C1(T1);
+ Closure(&C1, CLOSURE_STAR);
+ ClosureFst<Arc> C2(T1, CLOSURE_STAR);
+ CHECK(Equiv(C1, C2));
+ }
+
+ {
+ VLOG(1) << "Check destructive and delayed closure+ are equivalent.";
+ VectorFst<Arc> C1(T1);
+ Closure(&C1, CLOSURE_PLUS);
+ ClosureFst<Arc> C2(T1, CLOSURE_PLUS);
+ CHECK(Equiv(C1, C2));
+ }
+
+ {
+ VLOG(1) << "Check union is associative (destructive).";
+ VectorFst<Arc> U1(T1);
+ Union(&U1, T2);
+ Union(&U1, T3);
+
+ VectorFst<Arc> U3(T2);
+ Union(&U3, T3);
+ VectorFst<Arc> U4(T1);
+ Union(&U4, U3);
+
+ CHECK(Equiv(U1, U4));
+ }
+
+ {
+ VLOG(1) << "Check union is associative (delayed).";
+ UnionFst<Arc> U1(T1, T2);
+ UnionFst<Arc> U2(U1, T3);
+
+ UnionFst<Arc> U3(T2, T3);
+ UnionFst<Arc> U4(T1, U3);
+
+ CHECK(Equiv(U2, U4));
+ }
+
+
+ {
+ VLOG(1) << "Check union is associative (destructive delayed).";
+ UnionFst<Arc> U1(T1, T2);
+ Union(&U1, T3);
+
+ UnionFst<Arc> U3(T2, T3);
+ UnionFst<Arc> U4(T1, U3);
+
+ CHECK(Equiv(U1, U4));
+ }
+
+ {
+ VLOG(1) << "Check concatenation is associative (destructive).";
+ VectorFst<Arc> C1(T1);
+ Concat(&C1, T2);
+ Concat(&C1, T3);
+
+ VectorFst<Arc> C3(T2);
+ Concat(&C3, T3);
+ VectorFst<Arc> C4(T1);
+ Concat(&C4, C3);
+
+ CHECK(Equiv(C1, C4));
+ }
+
+ {
+ VLOG(1) << "Check concatenation is associative (delayed).";
+ ConcatFst<Arc> C1(T1, T2);
+ ConcatFst<Arc> C2(C1, T3);
+
+ ConcatFst<Arc> C3(T2, T3);
+ ConcatFst<Arc> C4(T1, C3);
+
+ CHECK(Equiv(C2, C4));
+ }
+
+ {
+ VLOG(1) << "Check concatenation is associative (destructive delayed).";
+ ConcatFst<Arc> C1(T1, T2);
+ Concat(&C1, T3);
+
+ ConcatFst<Arc> C3(T2, T3);
+ ConcatFst<Arc> C4(T1, C3);
+
+ CHECK(Equiv(C1, C4));
+ }
+
+ if (Weight::Properties() & kLeftSemiring) {
+ VLOG(1) << "Check concatenation left distributes"
+ << " over union (destructive).";
+
+ VectorFst<Arc> U1(T1);
+ Union(&U1, T2);
+ VectorFst<Arc> C1(T3);
+ Concat(&C1, U1);
+
+ VectorFst<Arc> C2(T3);
+ Concat(&C2, T1);
+ VectorFst<Arc> C3(T3);
+ Concat(&C3, T2);
+ VectorFst<Arc> U2(C2);
+ Union(&U2, C3);
+
+ CHECK(Equiv(C1, U2));
+ }
+
+ if (Weight::Properties() & kRightSemiring) {
+ VLOG(1) << "Check concatenation right distributes"
+ << " over union (destructive).";
+ VectorFst<Arc> U1(T1);
+ Union(&U1, T2);
+ VectorFst<Arc> C1(U1);
+ Concat(&C1, T3);
+
+ VectorFst<Arc> C2(T1);
+ Concat(&C2, T3);
+ VectorFst<Arc> C3(T2);
+ Concat(&C3, T3);
+ VectorFst<Arc> U2(C2);
+ Union(&U2, C3);
+
+ CHECK(Equiv(C1, U2));
+ }
+
+ if (Weight::Properties() & kLeftSemiring) {
+ VLOG(1) << "Check concatenation left distributes over union (delayed).";
+ UnionFst<Arc> U1(T1, T2);
+ ConcatFst<Arc> C1(T3, U1);
+
+ ConcatFst<Arc> C2(T3, T1);
+ ConcatFst<Arc> C3(T3, T2);
+ UnionFst<Arc> U2(C2, C3);
+
+ CHECK(Equiv(C1, U2));
+ }
+
+ if (Weight::Properties() & kRightSemiring) {
+ VLOG(1) << "Check concatenation right distributes over union (delayed).";
+ UnionFst<Arc> U1(T1, T2);
+ ConcatFst<Arc> C1(U1, T3);
+
+ ConcatFst<Arc> C2(T1, T3);
+ ConcatFst<Arc> C3(T2, T3);
+ UnionFst<Arc> U2(C2, C3);
+
+ CHECK(Equiv(C1, U2));
+ }
+
+
+ if (Weight::Properties() & kLeftSemiring) {
+ VLOG(1) << "Check T T* == T+ (destructive).";
+ VectorFst<Arc> S(T1);
+ Closure(&S, CLOSURE_STAR);
+ VectorFst<Arc> C(T1);
+ Concat(&C, S);
+
+ VectorFst<Arc> P(T1);
+ Closure(&P, CLOSURE_PLUS);
+
+ CHECK(Equiv(C, P));
+ }
+
+
+ if (Weight::Properties() & kRightSemiring) {
+ VLOG(1) << "Check T* T == T+ (destructive).";
+ VectorFst<Arc> S(T1);
+ Closure(&S, CLOSURE_STAR);
+ VectorFst<Arc> C(S);
+ Concat(&C, T1);
+
+ VectorFst<Arc> P(T1);
+ Closure(&P, CLOSURE_PLUS);
+
+ CHECK(Equiv(C, P));
+ }
+
+ if (Weight::Properties() & kLeftSemiring) {
+ VLOG(1) << "Check T T* == T+ (delayed).";
+ ClosureFst<Arc> S(T1, CLOSURE_STAR);
+ ConcatFst<Arc> C(T1, S);
+
+ ClosureFst<Arc> P(T1, CLOSURE_PLUS);
+
+ CHECK(Equiv(C, P));
+ }
+
+ if (Weight::Properties() & kRightSemiring) {
+ VLOG(1) << "Check T* T == T+ (delayed).";
+ ClosureFst<Arc> S(T1, CLOSURE_STAR);
+ ConcatFst<Arc> C(S, T1);
+
+ ClosureFst<Arc> P(T1, CLOSURE_PLUS);
+
+ CHECK(Equiv(C, P));
+ }
+ }
+
+ // Tests map-based operations.
+ void TestMap(const Fst<Arc> &T) {
+
+ {
+ VLOG(1) << "Check destructive and delayed projection are equivalent.";
+ VectorFst<Arc> P1(T);
+ Project(&P1, PROJECT_INPUT);
+ ProjectFst<Arc> P2(T, PROJECT_INPUT);
+ CHECK(Equiv(P1, P2));
+ }
+
+
+ {
+ VLOG(1) << "Check destructive and delayed inversion are equivalent.";
+ VectorFst<Arc> I1(T);
+ Invert(&I1);
+ InvertFst<Arc> I2(T);
+ CHECK(Equiv(I1, I2));
+ }
+
+ {
+ VLOG(1) << "Check Pi_1(T) = Pi_2(T^-1) (destructive).";
+ VectorFst<Arc> P1(T);
+ VectorFst<Arc> I1(T);
+ Project(&P1, PROJECT_INPUT);
+ Invert(&I1);
+ Project(&I1, PROJECT_OUTPUT);
+ CHECK(Equiv(P1, I1));
+ }
+
+ {
+ VLOG(1) << "Check Pi_2(T) = Pi_1(T^-1) (destructive).";
+ VectorFst<Arc> P1(T);
+ VectorFst<Arc> I1(T);
+ Project(&P1, PROJECT_OUTPUT);
+ Invert(&I1);
+ Project(&I1, PROJECT_INPUT);
+ CHECK(Equiv(P1, I1));
+ }
+
+ {
+ VLOG(1) << "Check Pi_1(T) = Pi_2(T^-1) (delayed).";
+ ProjectFst<Arc> P1(T, PROJECT_INPUT);
+ InvertFst<Arc> I1(T);
+ ProjectFst<Arc> P2(I1, PROJECT_OUTPUT);
+ CHECK(Equiv(P1, P2));
+ }
+
+
+ {
+ VLOG(1) << "Check Pi_2(T) = Pi_1(T^-1) (delayed).";
+ ProjectFst<Arc> P1(T, PROJECT_OUTPUT);
+ InvertFst<Arc> I1(T);
+ ProjectFst<Arc> P2(I1, PROJECT_INPUT);
+ CHECK(Equiv(P1, P2));
+ }
+
+
+ {
+ VLOG(1) << "Check destructive relabeling";
+ static const int kNumLabels = 10;
+ // set up relabeling pairs
+ vector<Label> labelset(kNumLabels);
+ for (size_t i = 0; i < kNumLabels; ++i) labelset[i] = i;
+ for (size_t i = 0; i < kNumLabels; ++i) {
+ swap(labelset[i], labelset[rand() % kNumLabels]);
+ }
+
+ vector<pair<Label, Label> > ipairs1(kNumLabels);
+ vector<pair<Label, Label> > opairs1(kNumLabels);
+ for (size_t i = 0; i < kNumLabels; ++i) {
+ ipairs1[i] = make_pair(i, labelset[i]);
+ opairs1[i] = make_pair(labelset[i], i);
+ }
+ VectorFst<Arc> R(T);
+ Relabel(&R, ipairs1, opairs1);
+
+ vector<pair<Label, Label> > ipairs2(kNumLabels);
+ vector<pair<Label, Label> > opairs2(kNumLabels);
+ for (size_t i = 0; i < kNumLabels; ++i) {
+ ipairs2[i] = make_pair(labelset[i], i);
+ opairs2[i] = make_pair(i, labelset[i]);
+ }
+ Relabel(&R, ipairs2, opairs2);
+ CHECK(Equiv(R, T));
+
+ VLOG(1) << "Check on-the-fly relabeling";
+ RelabelFst<Arc> Rdelay(T, ipairs1, opairs1);
+
+ RelabelFst<Arc> RRdelay(Rdelay, ipairs2, opairs2);
+ CHECK(Equiv(RRdelay, T));
+ }
+
+ {
+ VLOG(1) << "Check encoding/decoding (destructive).";
+ VectorFst<Arc> D(T);
+ uint32 encode_props = 0;
+ if (rand() % 2)
+ encode_props |= kEncodeLabels;
+ if (rand() % 2)
+ encode_props |= kEncodeWeights;
+ EncodeMapper<Arc> encoder(encode_props, ENCODE);
+ Encode(&D, &encoder);
+ Decode(&D, encoder);
+ CHECK(Equiv(D, T));
+ }
+
+ {
+ VLOG(1) << "Check encoding/decoding (delayed).";
+ uint32 encode_props = 0;
+ if (rand() % 2)
+ encode_props |= kEncodeLabels;
+ if (rand() % 2)
+ encode_props |= kEncodeWeights;
+ EncodeMapper<Arc> encoder(encode_props, ENCODE);
+ EncodeFst<Arc> E(T, &encoder);
+ VectorFst<Arc> Encoded(E);
+ DecodeFst<Arc> D(Encoded, encoder);
+ CHECK(Equiv(D, T));
+ }
+
+ {
+ VLOG(1) << "Check gallic mappers (constructive).";
+ ToGallicMapper<Arc> to_mapper;
+ FromGallicMapper<Arc> from_mapper;
+ VectorFst< GallicArc<Arc> > G;
+ VectorFst<Arc> F;
+ ArcMap(T, &G, to_mapper);
+ ArcMap(G, &F, from_mapper);
+ CHECK(Equiv(T, F));
+ }
+
+ {
+ VLOG(1) << "Check gallic mappers (delayed).";
+ ToGallicMapper<Arc> to_mapper;
+ FromGallicMapper<Arc> from_mapper;
+ ArcMapFst<Arc, GallicArc<Arc>, ToGallicMapper<Arc> >
+ G(T, to_mapper);
+ ArcMapFst<GallicArc<Arc>, Arc, FromGallicMapper<Arc> >
+ F(G, from_mapper);
+ CHECK(Equiv(T, F));
+ }
+ }
+
+ // Tests compose-based operations.
+ void TestCompose(const Fst<Arc> &T1, const Fst<Arc> &T2,
+ const Fst<Arc> &T3) {
+ if (!(Weight::Properties() & kCommutative))
+ return;
+
+ VectorFst<Arc> S1(T1);
+ VectorFst<Arc> S2(T2);
+ VectorFst<Arc> S3(T3);
+
+ ILabelCompare<Arc> icomp;
+ OLabelCompare<Arc> ocomp;
+
+ ArcSort(&S1, ocomp);
+ ArcSort(&S2, ocomp);
+ ArcSort(&S3, icomp);
+
+ {
+ VLOG(1) << "Check composition is associative.";
+ ComposeFst<Arc> C1(S1, S2);
+
+ ComposeFst<Arc> C2(C1, S3);
+ ComposeFst<Arc> C3(S2, S3);
+ ComposeFst<Arc> C4(S1, C3);
+
+ CHECK(Equiv(C2, C4));
+ }
+
+ {
+ VLOG(1) << "Check composition left distributes over union.";
+ UnionFst<Arc> U1(S2, S3);
+ ComposeFst<Arc> C1(S1, U1);
+
+ ComposeFst<Arc> C2(S1, S2);
+ ComposeFst<Arc> C3(S1, S3);
+ UnionFst<Arc> U2(C2, C3);
+
+ CHECK(Equiv(C1, U2));
+ }
+
+ {
+ VLOG(1) << "Check composition right distributes over union.";
+ UnionFst<Arc> U1(S1, S2);
+ ComposeFst<Arc> C1(U1, S3);
+
+ ComposeFst<Arc> C2(S1, S3);
+ ComposeFst<Arc> C3(S2, S3);
+ UnionFst<Arc> U2(C2, C3);
+
+ CHECK(Equiv(C1, U2));
+ }
+
+ VectorFst<Arc> A1(S1);
+ VectorFst<Arc> A2(S2);
+ VectorFst<Arc> A3(S3);
+ Project(&A1, PROJECT_OUTPUT);
+ Project(&A2, PROJECT_INPUT);
+ Project(&A3, PROJECT_INPUT);
+
+ {
+ VLOG(1) << "Check intersection is commutative.";
+ IntersectFst<Arc> I1(A1, A2);
+ IntersectFst<Arc> I2(A2, A1);
+ CHECK(Equiv(I1, I2));
+ }
+
+ {
+ VLOG(1) << "Check all epsilon filters leads to equivalent results.";
+ typedef Matcher< Fst<Arc> > M;
+ ComposeFst<Arc> C1(S1, S2);
+ ComposeFst<Arc> C2(
+ S1, S2,
+ ComposeFstOptions<Arc, M, AltSequenceComposeFilter<M> >());
+ ComposeFst<Arc> C3(
+ S1, S2,
+ ComposeFstOptions<Arc, M, MatchComposeFilter<M> >());
+
+ CHECK(Equiv(C1, C2));
+ CHECK(Equiv(C1, C3));
+ }
+ }
+
+ // Tests sorting operations
+ void TestSort(const Fst<Arc> &T) {
+ ILabelCompare<Arc> icomp;
+ OLabelCompare<Arc> ocomp;
+
+ {
+ VLOG(1) << "Check arc sorted Fst is equivalent to its input.";
+ VectorFst<Arc> S1(T);
+ ArcSort(&S1, icomp);
+ CHECK(Equiv(T, S1));
+ }
+
+ {
+ VLOG(1) << "Check destructive and delayed arcsort are equivalent.";
+ VectorFst<Arc> S1(T);
+ ArcSort(&S1, icomp);
+ ArcSortFst< Arc, ILabelCompare<Arc> > S2(T, icomp);
+ CHECK(Equiv(S1, S2));
+ }
+
+ {
+ VLOG(1) << "Check ilabel sorting vs. olabel sorting with inversions.";
+ VectorFst<Arc> S1(T);
+ VectorFst<Arc> S2(T);
+ ArcSort(&S1, icomp);
+ Invert(&S2);
+ ArcSort(&S2, ocomp);
+ Invert(&S2);
+ CHECK(Equiv(S1, S2));
+ }
+
+ {
+ VLOG(1) << "Check topologically sorted Fst is equivalent to its input.";
+ VectorFst<Arc> S1(T);
+ TopSort(&S1);
+ CHECK(Equiv(T, S1));
+ }
+
+ {
+ VLOG(1) << "Check reverse(reverse(T)) = T";
+ VectorFst< ReverseArc<Arc> > R1;
+ VectorFst<Arc> R2;
+ Reverse(T, &R1);
+ Reverse(R1, &R2);
+ CHECK(Equiv(T, R2));
+ }
+ }
+
+ // Tests optimization operations
+ void TestOptimize(const Fst<Arc> &T) {
+ uint64 tprops = T.Properties(kFstProperties, true);
+ uint64 wprops = Weight::Properties();
+
+ VectorFst<Arc> A(T);
+ Project(&A, PROJECT_INPUT);
+
+ {
+ VLOG(1) << "Check connected FST is equivalent to its input.";
+ VectorFst<Arc> C1(T);
+ Connect(&C1);
+ CHECK(Equiv(T, C1));
+ }
+
+ if ((wprops & kSemiring) == kSemiring &&
+ (tprops & kAcyclic || wprops & kIdempotent)) {
+ VLOG(1) << "Check epsilon-removed FST is equivalent to its input.";
+ VectorFst<Arc> R1(T);
+ RmEpsilon(&R1);
+ CHECK(Equiv(T, R1));
+
+ VLOG(1) << "Check destructive and delayed epsilon removal"
+ << "are equivalent.";
+ RmEpsilonFst<Arc> R2(T);
+ CHECK(Equiv(R1, R2));
+
+ VLOG(1) << "Check an FST with a large proportion"
+ << " of epsilon transitions:";
+ // Maps all transitions of T to epsilon-transitions and append
+ // a non-epsilon transition.
+ VectorFst<Arc> U;
+ ArcMap(T, &U, EpsMapper<Arc>());
+ VectorFst<Arc> V;
+ V.SetStart(V.AddState());
+ Arc arc(1, 1, Weight::One(), V.AddState());
+ V.AddArc(V.Start(), arc);
+ V.SetFinal(arc.nextstate, Weight::One());
+ Concat(&U, V);
+ // Check that epsilon-removal preserves the shortest-distance
+ // from the initial state to the final states.
+ vector<Weight> d;
+ ShortestDistance(U, &d, true);
+ Weight w = U.Start() < d.size() ? d[U.Start()] : Weight::Zero();
+ VectorFst<Arc> U1(U);
+ RmEpsilon(&U1);
+ ShortestDistance(U1, &d, true);
+ Weight w1 = U1.Start() < d.size() ? d[U1.Start()] : Weight::Zero();
+ CHECK(ApproxEqual(w, w1, kTestDelta));
+ RmEpsilonFst<Arc> U2(U);
+ ShortestDistance(U2, &d, true);
+ Weight w2 = U2.Start() < d.size() ? d[U2.Start()] : Weight::Zero();
+ CHECK(ApproxEqual(w, w2, kTestDelta));
+ }
+
+ if ((wprops & kSemiring) == kSemiring && tprops & kAcyclic) {
+ VLOG(1) << "Check determinized FSA is equivalent to its input.";
+ DeterminizeFst<Arc> D(A);
+ CHECK(Equiv(A, D));
+
+
+ int n;
+ {
+ VLOG(1) << "Check size(min(det(A))) <= size(det(A))"
+ << " and min(det(A)) equiv det(A)";
+ VectorFst<Arc> M(D);
+ n = M.NumStates();
+ Minimize(&M);
+ CHECK(Equiv(D, M));
+ CHECK(M.NumStates() <= n);
+ n = M.NumStates();
+ }
+
+ if (n && (wprops & kIdempotent) == kIdempotent &&
+ A.Properties(kNoEpsilons, true)) {
+ VLOG(1) << "Check that Revuz's algorithm leads to the"
+ << " same number of states as Brozozowski's algorithm";
+
+ // Skip test if A is the empty machine or contains epsilons or
+ // if the semiring is not idempotent (to avoid floating point
+ // errors)
+ VectorFst<Arc> R;
+ Reverse(A, &R);
+ RmEpsilon(&R);
+ DeterminizeFst<Arc> DR(R);
+ VectorFst<Arc> RD;
+ Reverse(DR, &RD);
+ DeterminizeFst<Arc> DRD(RD);
+ VectorFst<Arc> M(DRD);
+ CHECK_EQ(n + 1, M.NumStates()); // Accounts for the epsilon transition
+ // to the initial state
+ }
+ }
+
+ if (Arc::Type() == LogArc::Type() || Arc::Type() == StdArc::Type()) {
+ VLOG(1) << "Check reweight(T) equiv T";
+ vector<Weight> potential;
+ VectorFst<Arc> RI(T);
+ VectorFst<Arc> RF(T);
+ while (potential.size() < RI.NumStates())
+ potential.push_back((*weight_generator_)());
+
+ Reweight(&RI, potential, REWEIGHT_TO_INITIAL);
+ CHECK(Equiv(T, RI));
+
+ Reweight(&RF, potential, REWEIGHT_TO_FINAL);
+ CHECK(Equiv(T, RF));
+ }
+
+ if ((wprops & kIdempotent) || (tprops & kAcyclic)) {
+ VLOG(1) << "Check pushed FST is equivalent to input FST.";
+ // Pushing towards the final state.
+ if (wprops & kRightSemiring) {
+ VectorFst<Arc> P1;
+ Push<Arc, REWEIGHT_TO_FINAL>(T, &P1, kPushLabels);
+ CHECK(Equiv(T, P1));
+
+ VectorFst<Arc> P2;
+ Push<Arc, REWEIGHT_TO_FINAL>(T, &P2, kPushWeights);
+ CHECK(Equiv(T, P2));
+
+ VectorFst<Arc> P3;
+ Push<Arc, REWEIGHT_TO_FINAL>(T, &P3, kPushLabels | kPushWeights);
+ CHECK(Equiv(T, P3));
+ }
+
+ // Pushing towards the initial state.
+ if (wprops & kLeftSemiring) {
+ VectorFst<Arc> P1;
+ Push<Arc, REWEIGHT_TO_INITIAL>(T, &P1, kPushLabels);
+ CHECK(Equiv(T, P1));
+
+ VectorFst<Arc> P2;
+ Push<Arc, REWEIGHT_TO_INITIAL>(T, &P2, kPushWeights);
+ CHECK(Equiv(T, P2));
+ VectorFst<Arc> P3;
+ Push<Arc, REWEIGHT_TO_INITIAL>(T, &P3, kPushLabels | kPushWeights);
+ CHECK(Equiv(T, P3));
+ }
+ }
+
+ if ((wprops & (kPath | kCommutative)) == (kPath | kCommutative)) {
+ VLOG(1) << "Check pruning algorithm";
+ {
+ VLOG(1) << "Check equiv. of constructive and destructive algorithms";
+ Weight thresold = (*weight_generator_)();
+ VectorFst<Arc> P1(T);
+ Prune(&P1, thresold);
+ VectorFst<Arc> P2;
+ Prune(T, &P2, thresold);
+ CHECK(Equiv(P1,P2));
+ }
+
+ {
+ VLOG(1) << "Check prune(reverse) equiv reverse(prune)";
+ Weight thresold = (*weight_generator_)();
+ VectorFst< ReverseArc<Arc> > R;
+ VectorFst<Arc> P1(T);
+ VectorFst<Arc> P2;
+ Prune(&P1, thresold);
+ Reverse(T, &R);
+ Prune(&R, thresold.Reverse());
+ Reverse(R, &P2);
+ CHECK(Equiv(P1, P2));
+ }
+ {
+ VLOG(1) << "Check: ShortestDistance(T- prune(T))"
+ << " > ShortestDistance(T) times Thresold";
+ Weight thresold = (*weight_generator_)();
+ VectorFst<Arc> P;
+ Prune(A, &P, thresold);
+ DifferenceFst<Arc> C(A, DeterminizeFst<Arc>
+ (RmEpsilonFst<Arc>
+ (ArcMapFst<Arc, Arc,
+ RmWeightMapper<Arc> >
+ (P, RmWeightMapper<Arc>()))));
+ Weight sum1 = Times(ShortestDistance(A), thresold);
+ Weight sum2 = ShortestDistance(C);
+ CHECK(Plus(sum1, sum2) == sum1);
+ }
+ }
+ if (tprops & kAcyclic) {
+ VLOG(1) << "Check synchronize(T) equiv T";
+ SynchronizeFst<Arc> S(T);
+ CHECK(Equiv(T, S));
+ }
+ }
+
+ // Tests search operations
+ void TestSearch(const Fst<Arc> &T) {
+ uint64 wprops = Weight::Properties();
+
+ VectorFst<Arc> A(T);
+ Project(&A, PROJECT_INPUT);
+
+ if ((wprops & (kPath | kRightSemiring)) == (kPath | kRightSemiring)) {
+ VLOG(1) << "Check 1-best weight.";
+ VectorFst<Arc> path;
+ ShortestPath(T, &path);
+ Weight tsum = ShortestDistance(T);
+ Weight psum = ShortestDistance(path);
+ CHECK(ApproxEqual(tsum, psum, kTestDelta));
+ }
+
+ if ((wprops & (kPath | kSemiring)) == (kPath | kSemiring)) {
+ VLOG(1) << "Check n-best weights";
+ VectorFst<Arc> R(A);
+ RmEpsilon(&R);
+ int nshortest = rand() % kNumRandomShortestPaths + 2;
+ VectorFst<Arc> paths;
+ ShortestPath(R, &paths, nshortest, true, false,
+ Weight::Zero(), kNumShortestStates);
+ vector<Weight> distance;
+ ShortestDistance(paths, &distance, true);
+ StateId pstart = paths.Start();
+ if (pstart != kNoStateId) {
+ ArcIterator< Fst<Arc> > piter(paths, pstart);
+ for (; !piter.Done(); piter.Next()) {
+ StateId s = piter.Value().nextstate;
+ Weight nsum = s < distance.size() ?
+ Times(piter.Value().weight, distance[s]) : Weight::Zero();
+ VectorFst<Arc> path;
+ ShortestPath(R, &path);
+ Weight dsum = ShortestDistance(path);
+ CHECK(ApproxEqual(nsum, dsum, kTestDelta));
+ ArcMap(&path, RmWeightMapper<Arc>());
+ VectorFst<Arc> S;
+ Difference(R, path, &S);
+ R = S;
+ }
+ }
+ }
+ }
+
+ // Tests if two FSTS are equivalent by checking if random
+ // strings from one FST are transduced the same by both FSTs.
+ bool Equiv(const Fst<Arc> &fst1, const Fst<Arc> &fst2) {
+ VLOG(1) << "Check FSTs for sanity (including property bits).";
+ CHECK(Verify(fst1));
+ CHECK(Verify(fst2));
+
+ UniformArcSelector<Arc> uniform_selector(seed_);
+ RandGenOptions< UniformArcSelector<Arc> >
+ opts(uniform_selector, kRandomPathLength);
+ return RandEquivalent(fst1, fst2, kNumRandomPaths, kTestDelta, opts);
+ }
+
+ // Random seed
+ int seed_;
+
+ // FST with no states
+ VectorFst<Arc> zero_fst_;
+
+ // FST with one state that accepts epsilon.
+ VectorFst<Arc> one_fst_;
+
+ // FST with one state that accepts all strings.
+ VectorFst<Arc> univ_fst_;
+
+ // Generates weights used in testing.
+ WeightGenerator *weight_generator_;
+
+ // Maximum random path length.
+ static const int kRandomPathLength;
+
+ // Number of random paths to explore.
+ static const int kNumRandomPaths;
+
+ // Maximum number of nshortest paths.
+ static const int kNumRandomShortestPaths;
+
+ // Maximum number of nshortest states.
+ static const int kNumShortestStates;
+
+ // Delta for equivalence tests.
+ static const float kTestDelta;
+
+ DISALLOW_COPY_AND_ASSIGN(WeightedTester);
+};
+
+
+template <class A, class WG>
+const int WeightedTester<A, WG>::kRandomPathLength = 25;
+
+template <class A, class WG>
+const int WeightedTester<A, WG>::kNumRandomPaths = 100;
+
+template <class A, class WG>
+const int WeightedTester<A, WG>::kNumRandomShortestPaths = 100;
+
+template <class A, class WG>
+const int WeightedTester<A, WG>::kNumShortestStates = 10000;
+
+template <class A, class WG>
+const float WeightedTester<A, WG>::kTestDelta = .05;
+
+// This class tests a variety of identities and properties that must
+// hold for various algorithms on unweighted FSAs and that are not tested
+// by WeightedTester. Only the specialization does anything interesting.
+template <class Arc>
+class UnweightedTester {
+ public:
+ UnweightedTester(const Fst<Arc> &zero_fsa, const Fst<Arc> &one_fsa,
+ const Fst<Arc> &univ_fsa) {}
+
+ void Test(const Fst<Arc> &A1, const Fst<Arc> &A2, const Fst<Arc> &A3) {}
+};
+
+
+// Specialization for StdArc. This should work for any commutative,
+// idempotent semiring when restricted to the unweighted case
+// (being isomorphic to the boolean semiring).
+template <>
+class UnweightedTester<StdArc> {
+ public:
+ typedef StdArc Arc;
+ typedef Arc::Label Label;
+ typedef Arc::StateId StateId;
+ typedef Arc::Weight Weight;
+
+ UnweightedTester(const Fst<Arc> &zero_fsa, const Fst<Arc> &one_fsa,
+ const Fst<Arc> &univ_fsa)
+ : zero_fsa_(zero_fsa), one_fsa_(one_fsa), univ_fsa_(univ_fsa) {}
+
+ void Test(const Fst<Arc> &A1, const Fst<Arc> &A2, const Fst<Arc> &A3) {
+ TestRational(A1, A2, A3);
+ TestIntersect(A1, A2, A3);
+ TestOptimize(A1);
+ }
+
+ private:
+ // Tests rational operations with identities
+ void TestRational(const Fst<Arc> &A1, const Fst<Arc> &A2,
+ const Fst<Arc> &A3) {
+
+ {
+ VLOG(1) << "Check the union contains its arguments (destructive).";
+ VectorFst<Arc> U(A1);
+ Union(&U, A2);
+
+ CHECK(Subset(A1, U));
+ CHECK(Subset(A2, U));
+ }
+
+ {
+ VLOG(1) << "Check the union contains its arguments (delayed).";
+ UnionFst<Arc> U(A1, A2);
+
+ CHECK(Subset(A1, U));
+ CHECK(Subset(A2, U));
+ }
+
+ {
+ VLOG(1) << "Check if A^n c A* (destructive).";
+ VectorFst<Arc> C(one_fsa_);
+ int n = rand() % 5;
+ for (int i = 0; i < n; ++i)
+ Concat(&C, A1);
+
+ VectorFst<Arc> S(A1);
+ Closure(&S, CLOSURE_STAR);
+ CHECK(Subset(C, S));
+ }
+
+ {
+ VLOG(1) << "Check if A^n c A* (delayed).";
+ int n = rand() % 5;
+ Fst<Arc> *C = new VectorFst<Arc>(one_fsa_);
+ for (int i = 0; i < n; ++i) {
+ ConcatFst<Arc> *F = new ConcatFst<Arc>(*C, A1);
+ delete C;
+ C = F;
+ }
+ ClosureFst<Arc> S(A1, CLOSURE_STAR);
+ CHECK(Subset(*C, S));
+ delete C;
+ }
+ }
+
+ // Tests intersect-based operations.
+ void TestIntersect(const Fst<Arc> &A1, const Fst<Arc> &A2,
+ const Fst<Arc> &A3) {
+ VectorFst<Arc> S1(A1);
+ VectorFst<Arc> S2(A2);
+ VectorFst<Arc> S3(A3);
+
+ ILabelCompare<Arc> comp;
+
+ ArcSort(&S1, comp);
+ ArcSort(&S2, comp);
+ ArcSort(&S3, comp);
+
+ {
+ VLOG(1) << "Check the intersection is contained in its arguments.";
+ IntersectFst<Arc> I1(S1, S2);
+ CHECK(Subset(I1, S1));
+ CHECK(Subset(I1, S2));
+ }
+
+ {
+ VLOG(1) << "Check union distributes over intersection.";
+ IntersectFst<Arc> I1(S1, S2);
+ UnionFst<Arc> U1(I1, S3);
+
+ UnionFst<Arc> U2(S1, S3);
+ UnionFst<Arc> U3(S2, S3);
+ ArcSortFst< Arc, ILabelCompare<Arc> > S4(U3, comp);
+ IntersectFst<Arc> I2(U2, S4);
+
+ CHECK(Equiv(U1, I2));
+ }
+
+ VectorFst<Arc> C1;
+ VectorFst<Arc> C2;
+ Complement(S1, &C1);
+ Complement(S2, &C2);
+ ArcSort(&C1, comp);
+ ArcSort(&C2, comp);
+
+
+ {
+ VLOG(1) << "Check S U S' = Sigma*";
+ UnionFst<Arc> U(S1, C1);
+ CHECK(Equiv(U, univ_fsa_));
+ }
+
+ {
+ VLOG(1) << "Check S n S' = {}";
+ IntersectFst<Arc> I(S1, C1);
+ CHECK(Equiv(I, zero_fsa_));
+ }
+
+ {
+ VLOG(1) << "Check (S1' U S2') == (S1 n S2)'";
+ UnionFst<Arc> U(C1, C2);
+
+ IntersectFst<Arc> I(S1, S2);
+ VectorFst<Arc> C3;
+ Complement(I, &C3);
+ CHECK(Equiv(U, C3));
+ }
+
+ {
+ VLOG(1) << "Check (S1' n S2') == (S1 U S2)'";
+ IntersectFst<Arc> I(C1, C2);
+
+ UnionFst<Arc> U(S1, S2);
+ VectorFst<Arc> C3;
+ Complement(U, &C3);
+ CHECK(Equiv(I, C3));
+ }
+ }
+
+ // Tests optimization operations
+ void TestOptimize(const Fst<Arc> &A) {
+ {
+ VLOG(1) << "Check determinized FSA is equivalent to its input.";
+ DeterminizeFst<Arc> D(A);
+ CHECK(Equiv(A, D));
+ }
+
+ {
+ VLOG(1) << "Check minimized FSA is equivalent to its input.";
+ int n;
+ {
+ RmEpsilonFst<Arc> R(A);
+ DeterminizeFst<Arc> D(R);
+ VectorFst<Arc> M(D);
+ Minimize(&M);
+ CHECK(Equiv(A, M));
+ n = M.NumStates();
+ }
+
+ if (n) { // Skip test if A is the empty machine
+ VLOG(1) << "Check that Hopcroft's and Revuz's algorithms lead to the"
+ << " same number of states as Brozozowski's algorithm";
+ VectorFst<Arc> R;
+ Reverse(A, &R);
+ RmEpsilon(&R);
+ DeterminizeFst<Arc> DR(R);
+ VectorFst<Arc> RD;
+ Reverse(DR, &RD);
+ DeterminizeFst<Arc> DRD(RD);
+ VectorFst<Arc> M(DRD);
+ CHECK_EQ(n + 1, M.NumStates()); // Accounts for the epsilon transition
+ // to the initial state
+ }
+ }
+ }
+
+ // Tests if two FSAS are equivalent.
+ bool Equiv(const Fst<Arc> &fsa1, const Fst<Arc> &fsa2) {
+ VLOG(1) << "Check FSAs for sanity (including property bits).";
+ CHECK(Verify(fsa1));
+ CHECK(Verify(fsa2));
+
+ VectorFst<Arc> vfsa1(fsa1);
+ VectorFst<Arc> vfsa2(fsa2);
+ RmEpsilon(&vfsa1);
+ RmEpsilon(&vfsa2);
+ DeterminizeFst<Arc> dfa1(vfsa1);
+ DeterminizeFst<Arc> dfa2(vfsa2);
+
+ // Test equivalence using union-find algorithm
+ bool equiv1 = Equivalent(dfa1, dfa2);
+
+ // Test equivalence by checking if (S1 - S2) U (S2 - S1) is empty
+ ILabelCompare<Arc> comp;
+ VectorFst<Arc> sdfa1(dfa1);
+ ArcSort(&sdfa1, comp);
+ VectorFst<Arc> sdfa2(dfa2);
+ ArcSort(&sdfa2, comp);
+
+ DifferenceFst<Arc> dfsa1(sdfa1, sdfa2);
+ DifferenceFst<Arc> dfsa2(sdfa2, sdfa1);
+
+ VectorFst<Arc> ufsa(dfsa1);
+ Union(&ufsa, dfsa2);
+ Connect(&ufsa);
+ bool equiv2 = ufsa.NumStates() == 0;
+
+ // Check two equivalence tests match
+ CHECK((equiv1 && equiv2) || (!equiv1 && !equiv2));
+
+ return equiv1;
+ }
+
+ // Tests if FSA1 is a subset of FSA2 (disregarding weights).
+ bool Subset(const Fst<Arc> &fsa1, const Fst<Arc> &fsa2) {
+ VLOG(1) << "Check FSAs (incl. property bits) for sanity";
+ CHECK(Verify(fsa1));
+ CHECK(Verify(fsa2));
+
+ VectorFst<StdArc> vfsa1;
+ VectorFst<StdArc> vfsa2;
+ RmEpsilon(&vfsa1);
+ RmEpsilon(&vfsa2);
+ ILabelCompare<StdArc> comp;
+ ArcSort(&vfsa1, comp);
+ ArcSort(&vfsa2, comp);
+ IntersectFst<StdArc> ifsa(vfsa1, vfsa2);
+ DeterminizeFst<StdArc> dfa1(vfsa1);
+ DeterminizeFst<StdArc> dfa2(ifsa);
+ return Equivalent(dfa1, dfa2);
+ }
+
+ // Returns complement Fsa
+ void Complement(const Fst<Arc> &ifsa, MutableFst<Arc> *ofsa) {
+ RmEpsilonFst<Arc> rfsa(ifsa);
+ DeterminizeFst<Arc> dfa(rfsa);
+ DifferenceFst<Arc> cfsa(univ_fsa_, dfa);
+ *ofsa = cfsa;
+ }
+
+ // FSA with no states
+ VectorFst<Arc> zero_fsa_;
+
+ // FSA with one state that accepts epsilon.
+ VectorFst<Arc> one_fsa_;
+
+ // FSA with one state that accepts all strings.
+ VectorFst<Arc> univ_fsa_;
+
+ DISALLOW_COPY_AND_ASSIGN(UnweightedTester);
+};
+
+
+// This class tests a variety of identities and properties that must
+// hold for various FST algorithms. It randomly generates FSTs, using
+// function object 'weight_generator' to select weights. 'WeightTester'
+// and 'UnweightedTester' are then called.
+template <class Arc, class WeightGenerator>
+class AlgoTester {
+ public:
+ typedef typename Arc::Label Label;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+
+ AlgoTester(WeightGenerator generator, int seed) :
+ weight_generator_(generator), seed_(seed) {
+ one_fst_.AddState();
+ one_fst_.SetStart(0);
+ one_fst_.SetFinal(0, Weight::One());
+
+ univ_fst_.AddState();
+ univ_fst_.SetStart(0);
+ univ_fst_.SetFinal(0, Weight::One());
+ for (int i = 0; i < kNumRandomLabels; ++i)
+ univ_fst_.AddArc(0, Arc(i, i, Weight::One(), 0));
+ }
+
+ void Test() {
+ VLOG(1) << "weight type = " << Weight::Type();
+
+ for (int i = 0; i < FLAGS_repeat; ++i) {
+ // Random transducers
+ VectorFst<Arc> T1;
+ VectorFst<Arc> T2;
+ VectorFst<Arc> T3;
+ RandFst(&T1);
+ RandFst(&T2);
+ RandFst(&T3);
+ WeightedTester<Arc, WeightGenerator>
+ weighted_tester(seed_, zero_fst_, one_fst_,
+ univ_fst_, &weight_generator_);
+ weighted_tester.Test(T1, T2, T3);
+
+ VectorFst<Arc> A1(T1);
+ VectorFst<Arc> A2(T2);
+ VectorFst<Arc> A3(T3);
+ Project(&A1, PROJECT_OUTPUT);
+ Project(&A2, PROJECT_INPUT);
+ Project(&A3, PROJECT_INPUT);
+ ArcMap(&A1, rm_weight_mapper);
+ ArcMap(&A2, rm_weight_mapper);
+ ArcMap(&A3, rm_weight_mapper);
+ UnweightedTester<Arc> unweighted_tester(zero_fst_, one_fst_, univ_fst_);
+ unweighted_tester.Test(A1, A2, A3);
+ }
+ }
+
+ private:
+ // Generates a random FST.
+ void RandFst(MutableFst<Arc> *fst) {
+ // Determines direction of the arcs wrt state numbering. This way we
+ // can force acyclicity when desired.
+ enum ArcDirection { ANY_DIRECTION = 0, FORWARD_DIRECTION = 1,
+ REVERSE_DIRECTION = 2, NUM_DIRECTIONS = 3 };
+
+ ArcDirection arc_direction = ANY_DIRECTION;
+ if (rand()/(RAND_MAX + 1.0) < kAcyclicProb)
+ arc_direction = rand() % 2 ? FORWARD_DIRECTION : REVERSE_DIRECTION;
+
+ fst->DeleteStates();
+ StateId ns = rand() % kNumRandomStates;
+
+ if (ns == 0)
+ return;
+ for (StateId s = 0; s < ns; ++s)
+ fst->AddState();
+
+ StateId start = rand() % ns;
+ fst->SetStart(start);
+
+ size_t na = rand() % kNumRandomArcs;
+ for (size_t n = 0; n < na; ++n) {
+ StateId s = rand() % ns;
+ Arc arc;
+ arc.ilabel = rand() % kNumRandomLabels;
+ arc.olabel = rand() % kNumRandomLabels;
+ arc.weight = weight_generator_();
+ arc.nextstate = rand() % ns;
+
+ if (arc_direction == ANY_DIRECTION ||
+ (arc_direction == FORWARD_DIRECTION && arc.ilabel > arc.olabel) ||
+ (arc_direction == REVERSE_DIRECTION && arc.ilabel < arc.olabel))
+ fst->AddArc(s, arc);
+ }
+
+ StateId nf = rand() % (ns + 1);
+ for (StateId n = 0; n < nf; ++n) {
+ StateId s = rand() % ns;
+ Weight final = weight_generator_();
+ fst->SetFinal(s, final);
+ }
+ VLOG(1) << "Check FST for sanity (including property bits).";
+ CHECK(Verify(*fst));
+
+ // Get/compute all properties.
+ uint64 props = fst->Properties(kFstProperties, true);
+
+ // Select random set of properties to be unknown.
+ uint64 mask = 0;
+ for (int n = 0; n < 8; ++n) {
+ mask |= rand() & 0xff;
+ mask <<= 8;
+ }
+ mask &= ~kTrinaryProperties;
+ fst->SetProperties(props & ~mask, mask);
+ }
+
+ // Generates weights used in testing.
+ WeightGenerator weight_generator_;
+
+ // Random seed
+ int seed_;
+
+ // FST with no states
+ VectorFst<Arc> zero_fst_;
+
+ // FST with one state that accepts epsilon.
+ VectorFst<Arc> one_fst_;
+
+ // FST with one state that accepts all strings.
+ VectorFst<Arc> univ_fst_;
+
+ // Mapper to remove weights from an Fst
+ RmWeightMapper<Arc> rm_weight_mapper;
+
+ // Maximum number of states in random test Fst.
+ static const int kNumRandomStates;
+
+ // Maximum number of arcs in random test Fst.
+ static const int kNumRandomArcs;
+
+ // Number of alternative random labels.
+ static const int kNumRandomLabels;
+
+ // Probability to force an acyclic Fst
+ static const float kAcyclicProb;
+
+ // Maximum random path length.
+ static const int kRandomPathLength;
+
+ // Number of random paths to explore.
+ static const int kNumRandomPaths;
+
+ DISALLOW_COPY_AND_ASSIGN(AlgoTester);
+};
+
+template <class A, class G> const int AlgoTester<A, G>::kNumRandomStates = 10;
+
+template <class A, class G> const int AlgoTester<A, G>::kNumRandomArcs = 25;
+
+template <class A, class G> const int AlgoTester<A, G>::kNumRandomLabels = 5;
+
+template <class A, class G> const float AlgoTester<A, G>::kAcyclicProb = .25;
+
+template <class A, class G> const int AlgoTester<A, G>::kRandomPathLength = 25;
+
+template <class A, class G> const int AlgoTester<A, G>::kNumRandomPaths = 100;
+
+} // namespace fst
+
+#endif // FST_TEST_ALGO_TEST_H__
diff --git a/src/test/fst_test.cc b/src/test/fst_test.cc
new file mode 100644
index 0000000..7d8d600
--- /dev/null
+++ b/src/test/fst_test.cc
@@ -0,0 +1,228 @@
+// fst_test.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Regression test for FST classes.
+
+#include "./fst_test.h"
+
+#include <fst/compact-fst.h>
+#include <fst/const-fst.h>
+#include <fst/edit-fst.h>
+#include <fst/matcher-fst.h>
+
+
+namespace fst {
+
+// A user-defined arc type.
+struct CustomArc {
+ typedef short Label;
+ typedef ProductWeight<TropicalWeight, LogWeight> Weight;
+ typedef int64 StateId;
+
+ CustomArc(Label i, Label o, Weight w, StateId s) :
+ ilabel(i), olabel(o), weight(w), nextstate(s) {}
+ CustomArc() {}
+
+ static const string &Type() { // Arc type name
+ static const string type = "my";
+ return type;
+ }
+
+ Label ilabel; // Transition input label
+ Label olabel; // Transition output label
+ Weight weight; // Transition weight
+ StateId nextstate; // Transition destination state
+};
+
+
+// A user-defined compactor for test FST.
+template <class A>
+class CustomCompactor {
+ public:
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+ typedef pair<Label, Weight> Element;
+
+ Element Compact(StateId s, const A &arc) const {
+ return make_pair(arc.ilabel, arc.weight);
+ }
+
+ Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
+ return p.first == kNoLabel ?
+ Arc(kNoLabel, kNoLabel, p.second, kNoStateId) :
+ Arc(p.first, 0, p.second, s);
+ }
+
+ ssize_t Size() const { return -1;}
+
+ uint64 Properties() const { return 0ULL;}
+
+ bool Compatible(const Fst<A> &fst) const {
+ return true;
+ }
+
+ static const string &Type() {
+ static const string type = "my";
+ return type;
+ }
+
+ bool Write(ostream &strm) const { return true; }
+
+ static CustomCompactor *Read(istream &strm) {
+ return new CustomCompactor;
+ }
+};
+
+
+REGISTER_FST(VectorFst, CustomArc);
+REGISTER_FST(ConstFst, CustomArc);
+static fst::FstRegisterer<
+ CompactFst<StdArc, CustomCompactor<StdArc> > >
+CompactFst_StdArc_CustomCompactor_registerer;
+static fst::FstRegisterer<
+ CompactFst<CustomArc, CustomCompactor<CustomArc> > >
+CompactFst_CustomArc_CustomCompactor_registerer;
+static fst::FstRegisterer<ConstFst<StdArc, uint16> >
+ConstFst_StdArc_uint16_registerer;
+static fst::FstRegisterer<
+ CompactFst<StdArc, CustomCompactor<StdArc>, uint16> >
+CompactFst_StdArc_CustomCompactor_uint16_registerer;
+
+} // namespace fst
+
+
+using fst::FstTester;
+using fst::VectorFst;
+using fst::ConstFst;
+using fst::MatcherFst;
+using fst::CompactFst;
+using fst::Fst;
+using fst::StdArc;
+using fst::CustomArc;
+using fst::CustomCompactor;
+using fst::StdArcLookAheadFst;
+using fst::EditFst;
+
+int main(int argc, char **argv) {
+ FLAGS_fst_verify_properties = true;
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(argv[0], &argc, &argv, true);
+
+ // VectorFst<StdArc> tests
+ {
+ FstTester< VectorFst<StdArc> > std_vector_tester;
+ std_vector_tester.TestBase();
+ std_vector_tester.TestExpanded();
+ std_vector_tester.TestAssign();
+ std_vector_tester.TestCopy();
+ std_vector_tester.TestIO();
+ std_vector_tester.TestMutable();
+ }
+
+ // ConstFst<StdArc> tests
+ {
+ FstTester< ConstFst<StdArc> > std_const_tester;
+ std_const_tester.TestBase();
+ std_const_tester.TestExpanded();
+ std_const_tester.TestCopy();
+ std_const_tester.TestIO();
+ }
+
+ // CompactFst<StdArc, CustomCompactor<StdArc> >
+ {
+ FstTester< CompactFst<StdArc, CustomCompactor<StdArc> > >
+ std_compact_tester;
+ std_compact_tester.TestBase();
+ std_compact_tester.TestExpanded();
+ std_compact_tester.TestCopy();
+ std_compact_tester.TestIO();
+ }
+
+ // VectorFst<CustomArc> tests
+ {
+ FstTester< VectorFst<CustomArc> > std_vector_tester;
+ std_vector_tester.TestBase();
+ std_vector_tester.TestExpanded();
+ std_vector_tester.TestAssign();
+ std_vector_tester.TestCopy();
+ std_vector_tester.TestIO();
+ std_vector_tester.TestMutable();
+ }
+
+ // ConstFst<CustomArc> tests
+ {
+ FstTester< ConstFst<CustomArc> > std_const_tester;
+ std_const_tester.TestBase();
+ std_const_tester.TestExpanded();
+ std_const_tester.TestCopy();
+ std_const_tester.TestIO();
+ }
+
+ // CompactFst<CustomArc, CustomCompactor<CustomArc> >
+ {
+ FstTester< CompactFst<CustomArc, CustomCompactor<CustomArc> > >
+ std_compact_tester;
+ std_compact_tester.TestBase();
+ std_compact_tester.TestExpanded();
+ std_compact_tester.TestCopy();
+ std_compact_tester.TestIO();
+ }
+
+ // ConstFst<StdArc, uint16> tests
+ {
+ FstTester< ConstFst<StdArc, uint16> > std_const_tester;
+ std_const_tester.TestBase();
+ std_const_tester.TestExpanded();
+ std_const_tester.TestCopy();
+ std_const_tester.TestIO();
+ }
+
+ // CompactFst<StdArc, CustomCompactor<StdArc>, uint16>
+ {
+ FstTester< CompactFst<StdArc, CustomCompactor<StdArc>, uint16> >
+ std_compact_tester;
+ std_compact_tester.TestBase();
+ std_compact_tester.TestExpanded();
+ std_compact_tester.TestCopy();
+ std_compact_tester.TestIO();
+ }
+
+ // FstTester<StdArcLookAheadFst>
+ {
+ FstTester<StdArcLookAheadFst> std_matcher_tester;
+ std_matcher_tester.TestBase();
+ std_matcher_tester.TestExpanded();
+ std_matcher_tester.TestCopy();
+ }
+
+ // EditFst<StdArc> tests
+ {
+ FstTester< EditFst<StdArc> > std_edit_tester;
+ std_edit_tester.TestBase();
+ std_edit_tester.TestExpanded();
+ std_edit_tester.TestAssign();
+ std_edit_tester.TestCopy();
+ std_edit_tester.TestMutable();
+ }
+
+ cout << "PASS" << endl;
+
+ return 0;
+}
diff --git a/src/test/fst_test.h b/src/test/fst_test.h
new file mode 100644
index 0000000..da16476
--- /dev/null
+++ b/src/test/fst_test.h
@@ -0,0 +1,299 @@
+// fst_test.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Regression test for FST classes.
+
+#ifndef FST_TEST_FST_TEST_H_
+#define FST_TEST_FST_TEST_H_
+
+#include <fst/equal.h>
+#include <fst/matcher.h>
+#include <fst/vector-fst.h>
+#include <fst/verify.h>
+
+DECLARE_string(tmpdir);
+
+namespace fst {
+
+// This tests an Fst F that is assumed to have a copy method from an
+// arbitrary Fst. Some test functions make further assumptions mostly
+// obvious from their name. These tests are written as member temple
+// functions that take a test fst as its argument so that different
+// Fsts in the interface hierarchy can be tested separately and so
+// that we can instantiate only those tests that make sense for a
+// particular Fst.
+template <class F>
+class FstTester {
+ public:
+ typedef typename F::Arc Arc;
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::Label Label;
+
+ FstTester() {
+ VectorFst<Arc> vfst;
+ InitFst(&vfst, 128);
+ testfst_ = new F(vfst);
+ }
+
+ ~FstTester() {
+ delete testfst_;
+ }
+
+ // This verifies the contents described in InitFst() using
+ // methods defined in a generic Fst.
+ template <class G>
+ void TestBase(const G &fst) const {
+ CHECK(Verify(fst));
+ CHECK_EQ(fst.Start(), 0);
+ StateId ns = 0;
+ StateIterator<G> siter(fst);
+ Matcher<G> matcher(fst, MATCH_INPUT);
+ MatchType match_type = matcher.Type(true);
+ for (; !siter.Done(); siter.Next());
+ for (siter.Reset(); !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ matcher.SetState(s);
+ CHECK_EQ(fst.Final(s), NthWeight(s));
+ size_t na = 0;
+ ArcIterator<G> aiter(fst, s);
+ for (; !aiter.Done(); aiter.Next());
+ for (aiter.Reset(); !aiter.Done(); aiter.Next()) {
+ ++na;
+ const Arc &arc = aiter.Value();
+ CHECK_EQ(arc.ilabel, na);
+ CHECK_EQ(arc.olabel, 0);
+ CHECK_EQ(arc.weight, NthWeight(na));
+ CHECK_EQ(arc.nextstate, s);
+ if (match_type == MATCH_INPUT) {
+ CHECK(matcher.Find(arc.ilabel));
+ CHECK_EQ(matcher.Value().ilabel, arc.ilabel);
+ }
+ }
+ CHECK_EQ(na, s);
+ CHECK_EQ(na, aiter.Position());
+ CHECK_EQ(fst.NumArcs(s), s);
+ CHECK_EQ(fst.NumInputEpsilons(s), 0);
+ CHECK_EQ(fst.NumOutputEpsilons(s), s);
+ CHECK(!matcher.Find(s + 1)); // out-of-range
+ CHECK(!matcher.Find(kNoLabel)); // no explicit epsilons
+ CHECK(matcher.Find(0));
+ CHECK_EQ(matcher.Value().ilabel, kNoLabel); // implicit epsilon loop
+ ++ns;
+ }
+ CHECK(fst.Properties(kNotAcceptor, true));
+ CHECK(fst.Properties(kOEpsilons, true));
+ }
+
+ void TestBase() const {
+ TestBase(*testfst_);
+ }
+
+ // This verifies methods specfic to an ExpandedFst.
+ template <class G>
+ void TestExpanded(const G &fst) const {
+ StateId ns = 0;
+ for (StateIterator<G> siter(fst);
+ !siter.Done();
+ siter.Next()) {
+ ++ns;
+ }
+ CHECK_EQ(fst.NumStates(), ns);
+ CHECK(fst.Properties(kExpanded, false));
+ }
+
+ void TestExpanded() const { TestExpanded(*testfst_); }
+
+ // This verifies methods specific to a MutableFst.
+ template <class G>
+ void TestMutable(G *fst) const {
+ for (StateIterator<G> siter(*fst);
+ !siter.Done();
+ siter.Next()) {
+ StateId s = siter.Value();
+ size_t na = 0;
+ size_t ni = fst->NumInputEpsilons(s);
+ MutableArcIterator<G> aiter(fst, s);
+ for (; !aiter.Done(); aiter.Next());
+ for (aiter.Reset(); !aiter.Done(); aiter.Next()) {
+ ++na;
+ Arc arc = aiter.Value();
+ arc.ilabel = 0;
+ aiter.SetValue(arc);
+ arc = aiter.Value();
+ CHECK_EQ(arc.ilabel, 0);
+ CHECK_EQ(fst->NumInputEpsilons(s), ni + 1);
+ arc.ilabel = na;
+ aiter.SetValue(arc);
+ CHECK_EQ(fst->NumInputEpsilons(s), ni);
+ }
+ }
+
+ G *cfst1 = fst->Copy();
+ cfst1->DeleteStates();
+ CHECK_EQ(cfst1->NumStates(), 0);
+ delete cfst1;
+
+ G *cfst2 = fst->Copy();
+ for (StateIterator<G> siter(*cfst2);
+ !siter.Done();
+ siter.Next()) {
+ StateId s = siter.Value();
+ cfst2->DeleteArcs(s);
+ CHECK_EQ(cfst2->NumArcs(s), 0);
+ CHECK_EQ(cfst2->NumInputEpsilons(s), 0);
+ CHECK_EQ(cfst2->NumOutputEpsilons(s), 0);
+ }
+ delete cfst2;
+ }
+
+ void TestMutable() { TestMutable(testfst_); }
+
+ // This verifies the copy methods.
+ template <class G>
+ void TestAssign(G *fst) const {
+ // Assignment from G
+ G afst1;
+ afst1 = *fst;
+ CHECK(Equal(*fst, afst1));
+
+ // Assignment from Fst
+ G afst2;
+ afst2 = *static_cast<const Fst<Arc> *>(fst);
+ CHECK(Equal(*fst, afst2));
+
+ // Assignment from self
+ afst2.operator=(afst2);
+ CHECK(Equal(*fst, afst2));
+ }
+
+ void TestAssign() { TestAssign(testfst_); }
+
+ // This verifies the copy methods.
+ template <class G>
+ void TestCopy(const G &fst) const {
+ // Copy from G
+ G c1fst(fst);
+ TestBase(c1fst);
+
+ // Copy from Fst
+ const G c2fst(static_cast<const Fst<Arc> &>(fst));
+ TestBase(c2fst);
+
+ // Copy from self
+ const G *c3fst = fst.Copy();
+ TestBase(*c3fst);
+ delete c3fst;
+ }
+
+ void TestCopy() const { TestCopy(*testfst_); }
+
+ // This verifies the read/write methods.
+ template <class G>
+ void TestIO(const G &fst) const {
+ const string filename = FLAGS_tmpdir + "/test.fst";
+ {
+ // write/read
+ CHECK(fst.Write(filename));
+ G *ffst = G::Read(filename);
+ CHECK(ffst);
+ TestBase(*ffst);
+ delete ffst;
+ }
+
+ {
+ // generic read/cast/test
+ Fst<Arc> *gfst = Fst<Arc>::Read(filename);
+ CHECK(gfst);
+ G *dfst = static_cast<G *>(gfst);
+ TestBase(*dfst);
+
+ // generic write/read/test
+ CHECK(gfst->Write(filename));
+ Fst<Arc> *hfst = Fst<Arc>::Read(filename);
+ CHECK(hfst);
+ TestBase(*hfst);
+ delete gfst;
+ delete hfst;
+ }
+
+ // expanded write/read/test
+ if (fst.Properties(kExpanded, false)) {
+ ExpandedFst<Arc> *efst = ExpandedFst<Arc>::Read(filename);
+ CHECK(efst);
+ TestBase(*efst);
+ TestExpanded(*efst);
+ delete efst;
+ }
+
+ // mutable write/read/test
+ if (fst.Properties(kMutable, false)) {
+ MutableFst<Arc> *mfst = MutableFst<Arc>::Read(filename);
+ CHECK(mfst);
+ TestBase(*mfst);
+ TestExpanded(*mfst);
+ TestMutable(mfst);
+ delete mfst;
+ }
+ }
+
+ void TestIO() const { TestIO(*testfst_); }
+
+ private:
+ // This constructs test FSTs. Given a mutable FST, will leave
+ // the FST as follows:
+ // (I) NumStates() = nstates
+ // (II) Start() = 0
+ // (III) Final(s) = NthWeight(s)
+ // (IV) For state s:
+ // (a) NumArcs(s) == s
+ // (b) For ith arc of s:
+ // (1) ilabel = i
+ // (2) olabel = 0
+ // (3) weight = NthWeight(i)
+ // (4) nextstate = s
+ void InitFst(MutableFst<Arc> *fst, size_t nstates) const {
+ fst->DeleteStates();
+ CHECK_GT(nstates, 0);
+
+ for (StateId s = 0; s < nstates; ++s) {
+ fst->AddState();
+ fst->SetFinal(s, NthWeight(s));
+ for (size_t i = 1; i <= s; ++i) {
+ Arc arc(i, 0, NthWeight(i), s);
+ fst->AddArc(s, arc);
+ }
+ }
+
+ fst->SetStart(0);
+ }
+
+ // Generates One() + ... + One() (n times)
+ Weight NthWeight(int n) const {
+ Weight w = Weight::Zero();
+ for (int i = 0; i < n; ++i)
+ w = Plus(w, Weight::One());
+ return w;
+ }
+
+ F *testfst_; // what we're testing
+};
+
+} // namespace fst
+
+#endif // FST_TEST_FST_TEST_H_
diff --git a/src/test/weight-tester.h b/src/test/weight-tester.h
new file mode 100644
index 0000000..751e7d6
--- /dev/null
+++ b/src/test/weight-tester.h
@@ -0,0 +1,225 @@
+// weight-tester.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Utility class for regression testing of Fst weights.
+
+#ifndef FST_TEST_WEIGHT_TESTER_H_
+#define FST_TEST_WEIGHT_TESTER_H_
+
+#include <iostream>
+#include <sstream>
+
+#include <fst/random-weight.h>
+
+namespace fst {
+
+// This class tests a variety of identities and properties that must
+// hold for the Weight class to be well-defined. It calls function object
+// WEIGHT_GENERATOR to select weights that are used in the tests.
+template<class Weight, class WeightGenerator>
+class WeightTester {
+ public:
+ WeightTester(WeightGenerator generator) : weight_generator_(generator) {}
+
+ void Test(int iterations, bool test_division = true) {
+ for (int i = 0; i < iterations; ++i) {
+ // Selects the test weights.
+ Weight w1 = weight_generator_();
+ Weight w2 = weight_generator_();
+ Weight w3 = weight_generator_();
+
+ VLOG(1) << "weight type = " << Weight::Type();
+ VLOG(1) << "w1 = " << w1;
+ VLOG(1) << "w2 = " << w2;
+ VLOG(1) << "w3 = " << w3;
+
+ TestSemiring(w1, w2, w3);
+ if (test_division)
+ TestDivision(w1, w2);
+ TestReverse(w1, w2);
+ TestEquality(w1, w2, w3);
+ TestIO(w1);
+ TestCopy(w1);
+ }
+ }
+
+ private:
+ // Note in the tests below we use ApproxEqual rather than == and add
+ // kDelta to inequalities where the weights might be inexact.
+
+ // Tests (Plus, Times, Zero, One) defines a commutative semiring.
+ void TestSemiring(Weight w1, Weight w2, Weight w3) {
+ // Checks that the operations are closed.
+ CHECK(Plus(w1, w2).Member());
+ CHECK(Times(w1, w2).Member());
+
+ // Checks that the operations are associative.
+ CHECK(ApproxEqual(Plus(w1, Plus(w2, w3)), Plus(Plus(w1, w2), w3)));
+ CHECK(ApproxEqual(Times(w1, Times(w2, w3)), Times(Times(w1, w2), w3)));
+
+ // Checks the identity elements.
+ CHECK(Plus(w1, Weight::Zero()) == w1);
+ CHECK(Plus(Weight::Zero(), w1) == w1);
+ CHECK(Times(w1, Weight::One()) == w1);
+ CHECK(Times(Weight::One(), w1) == w1);
+
+ // Check the no weight element.
+ CHECK(!Weight::NoWeight().Member());
+ CHECK(!Plus(w1, Weight::NoWeight()).Member());
+ CHECK(!Plus(Weight::NoWeight(), w1).Member());
+ CHECK(!Times(w1, Weight::NoWeight()).Member());
+ CHECK(!Times(Weight::NoWeight(), w1).Member());
+
+ // Checks that the operations commute.
+ CHECK(ApproxEqual(Plus(w1, w2), Plus(w2, w1)));
+ if (Weight::Properties() & kCommutative)
+ CHECK(ApproxEqual(Times(w1, w2), Times(w2, w1)));
+
+ // Checks Zero() is the annihilator.
+ CHECK(Times(w1, Weight::Zero()) == Weight::Zero());
+ CHECK(Times(Weight::Zero(), w1) == Weight::Zero());
+
+ // Check Power(w, 0) is Weight::One()
+ CHECK(Power(w1, 0) == Weight::One());
+
+ // Check Power(w, 1) is w
+ CHECK(Power(w1, 1) == w1);
+
+ // Check Power(w, 3) is Times(w, Times(w, w))
+ CHECK(Power(w1, 3) == Times(w1, Times(w1, w1)));
+
+ // Checks distributivity.
+ if (Weight::Properties() & kLeftSemiring)
+ CHECK(ApproxEqual(Times(w1, Plus(w2, w3)),
+ Plus(Times(w1, w2), Times(w1, w3))));
+ if (Weight::Properties() & kRightSemiring)
+ CHECK(ApproxEqual(Times(Plus(w1, w2), w3),
+ Plus(Times(w1, w3), Times(w2, w3))));
+
+ if (Weight::Properties() & kIdempotent)
+ CHECK(Plus(w1, w1) == w1);
+
+ if (Weight::Properties() & kPath)
+ CHECK(Plus(w1, w2) == w1 || Plus(w1, w2) == w2);
+
+ // Ensure weights form a left or right semiring.
+ CHECK(Weight::Properties() & (kLeftSemiring | kRightSemiring));
+
+ // Check when Times() is commutative that it is marked as a semiring.
+ if (Weight::Properties() & kCommutative)
+ CHECK(Weight::Properties() & kSemiring);
+ }
+
+ // Tests division operation.
+ void TestDivision(Weight w1, Weight w2) {
+ Weight p = Times(w1, w2);
+
+ if (Weight::Properties() & kLeftSemiring) {
+ Weight d = Divide(p, w1, DIVIDE_LEFT);
+ if (d.Member())
+ CHECK(ApproxEqual(p, Times(w1, d)));
+ CHECK(!Divide(w1, Weight::NoWeight(), DIVIDE_LEFT).Member());
+ CHECK(!Divide(Weight::NoWeight(), w1, DIVIDE_LEFT).Member());
+ }
+
+ if (Weight::Properties() & kRightSemiring) {
+ Weight d = Divide(p, w2, DIVIDE_RIGHT);
+ if (d.Member())
+ CHECK(ApproxEqual(p, Times(d, w2)));
+ CHECK(!Divide(w1, Weight::NoWeight(), DIVIDE_RIGHT).Member());
+ CHECK(!Divide(Weight::NoWeight(), w1, DIVIDE_RIGHT).Member());
+ }
+
+ if (Weight::Properties() & kCommutative) {
+ Weight d = Divide(p, w1, DIVIDE_RIGHT);
+ if (d.Member())
+ CHECK(ApproxEqual(p, Times(d, w1)));
+ }
+ }
+
+ // Tests reverse operation.
+ void TestReverse(Weight w1, Weight w2) {
+ typedef typename Weight::ReverseWeight ReverseWeight;
+
+ ReverseWeight rw1 = w1.Reverse();
+ ReverseWeight rw2 = w2.Reverse();
+
+ CHECK(rw1.Reverse() == w1);
+ CHECK(Plus(w1, w2).Reverse() == Plus(rw1, rw2));
+ CHECK(Times(w1, w2).Reverse() == Times(rw2, rw1));
+ }
+
+ // Tests == is an equivalence relation.
+ void TestEquality(Weight w1, Weight w2, Weight w3) {
+ // Checks reflexivity.
+ CHECK(w1 == w1);
+
+ // Checks symmetry.
+ CHECK((w1 == w2) == (w2 == w1));
+
+ // Checks transitivity.
+ if (w1 == w2 && w2 == w3)
+ CHECK(w1 == w3);
+ }
+
+ // Tests binary serialization and textual I/O.
+ void TestIO(Weight w) {
+ // Tests binary I/O
+ {
+ ostringstream os;
+ w.Write(os);
+ os.flush();
+ istringstream is(os.str());
+ Weight v;
+ v.Read(is);
+ CHECK_EQ(w, v);
+ }
+
+ // Tests textual I/O.
+ {
+ ostringstream os;
+ os << w;
+ istringstream is(os.str());
+ Weight v(Weight::One());
+ is >> v;
+ CHECK(ApproxEqual(w, v));
+ }
+ }
+
+ // Tests copy constructor and assignment operator
+ void TestCopy(Weight w) {
+ Weight x = w;
+ CHECK(w == x);
+
+ x = Weight(w);
+ CHECK(w == x);
+
+ x.operator=(x);
+ CHECK(w == x);
+
+ }
+
+ // Generates weights used in testing.
+ WeightGenerator weight_generator_;
+
+ DISALLOW_COPY_AND_ASSIGN(WeightTester);
+};
+
+} // namespace fst
+
+#endif // FST_TEST_WEIGHT_TESTER_H_
diff --git a/src/test/weight_test.cc b/src/test/weight_test.cc
new file mode 100644
index 0000000..54ba85d
--- /dev/null
+++ b/src/test/weight_test.cc
@@ -0,0 +1,258 @@
+// weight_test.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Regression test for Fst weights.
+
+#include <cstdlib>
+#include <ctime>
+
+#include <fst/expectation-weight.h>
+#include <fst/float-weight.h>
+#include <fst/random-weight.h>
+#include "./weight-tester.h"
+
+DEFINE_int32(seed, -1, "random seed");
+DEFINE_int32(repeat, 100000, "number of test repetitions");
+
+using fst::TropicalWeight;
+using fst::TropicalWeightGenerator;
+using fst::TropicalWeightTpl;
+using fst::TropicalWeightGenerator_;
+
+using fst::LogWeight;
+using fst::LogWeightGenerator;
+using fst::LogWeightTpl;
+using fst::LogWeightGenerator_;
+
+using fst::MinMaxWeight;
+using fst::MinMaxWeightGenerator;
+using fst::MinMaxWeightTpl;
+using fst::MinMaxWeightGenerator_;
+
+using fst::StringWeight;
+using fst::StringWeightGenerator;
+
+using fst::GallicWeight;
+using fst::GallicWeightGenerator;
+
+using fst::LexicographicWeight;
+using fst::LexicographicWeightGenerator;
+
+using fst::ProductWeight;
+using fst::ProductWeightGenerator;
+
+using fst::PowerWeight;
+using fst::PowerWeightGenerator;
+
+using fst::SignedLogWeightTpl;
+using fst::SignedLogWeightGenerator_;
+
+using fst::ExpectationWeight;
+
+using fst::SparsePowerWeight;
+using fst::SparsePowerWeightGenerator;
+
+using fst::STRING_LEFT;
+using fst::STRING_RIGHT;
+
+using fst::WeightTester;
+
+template <class T>
+void TestTemplatedWeights(int repeat, int seed) {
+ TropicalWeightGenerator_<T> tropical_generator(seed);
+ WeightTester<TropicalWeightTpl<T>, TropicalWeightGenerator_<T> >
+ tropical_tester(tropical_generator);
+ tropical_tester.Test(repeat);
+
+ LogWeightGenerator_<T> log_generator(seed);
+ WeightTester<LogWeightTpl<T>, LogWeightGenerator_<T> >
+ log_tester(log_generator);
+ log_tester.Test(repeat);
+
+ MinMaxWeightGenerator_<T> minmax_generator(seed);
+ WeightTester<MinMaxWeightTpl<T>, MinMaxWeightGenerator_<T> >
+ minmax_tester(minmax_generator);
+ minmax_tester.Test(repeat);
+
+ SignedLogWeightGenerator_<T> signedlog_generator(seed);
+ WeightTester<SignedLogWeightTpl<T>, SignedLogWeightGenerator_<T> >
+ signedlog_tester(signedlog_generator);
+ signedlog_tester.Test(repeat);
+}
+
+int main(int argc, char **argv) {
+ std::set_new_handler(FailedNewHandler);
+ SetFlags(argv[0], &argc, &argv, true);
+
+ int seed = FLAGS_seed >= 0 ? FLAGS_seed : time(0);
+ LOG(INFO) << "Seed = " << seed;
+
+ TestTemplatedWeights<float>(FLAGS_repeat, seed);
+ TestTemplatedWeights<double>(FLAGS_repeat, seed);
+ FLAGS_fst_weight_parentheses = "()";
+ TestTemplatedWeights<float>(FLAGS_repeat, seed);
+ TestTemplatedWeights<double>(FLAGS_repeat, seed);
+ FLAGS_fst_weight_parentheses = "";
+
+ // Make sure type names for templated weights are consistent
+ CHECK(TropicalWeight::Type() == "tropical");
+ CHECK(TropicalWeightTpl<double>::Type() != TropicalWeightTpl<float>::Type());
+ CHECK(LogWeight::Type() == "log");
+ CHECK(LogWeightTpl<double>::Type() != LogWeightTpl<float>::Type());
+ TropicalWeightTpl<double> w(15.0);
+ TropicalWeight tw(15.0);
+
+ StringWeightGenerator<int> left_string_generator(seed);
+ WeightTester<StringWeight<int>, StringWeightGenerator<int> >
+ left_string_tester(left_string_generator);
+ left_string_tester.Test(FLAGS_repeat);
+
+ StringWeightGenerator<int, STRING_RIGHT> right_string_generator(seed);
+ WeightTester<StringWeight<int, STRING_RIGHT>,
+ StringWeightGenerator<int, STRING_RIGHT> >
+ right_string_tester(right_string_generator);
+ right_string_tester.Test(FLAGS_repeat);
+
+ typedef GallicWeight<int, TropicalWeight> TropicalGallicWeight;
+ typedef GallicWeightGenerator<int, TropicalWeightGenerator>
+ TropicalGallicWeightGenerator;
+
+ TropicalGallicWeightGenerator tropical_gallic_generator(seed);
+ WeightTester<TropicalGallicWeight, TropicalGallicWeightGenerator>
+ tropical_gallic_tester(tropical_gallic_generator);
+ tropical_gallic_tester.Test(FLAGS_repeat);
+
+ typedef ProductWeight<TropicalWeight, TropicalWeight> TropicalProductWeight;
+ typedef ProductWeightGenerator<TropicalWeightGenerator,
+ TropicalWeightGenerator> TropicalProductWeightGenerator;
+
+ TropicalProductWeightGenerator tropical_product_generator(seed);
+ WeightTester<TropicalProductWeight, TropicalProductWeightGenerator>
+ tropical_product_weight_tester(tropical_product_generator);
+ tropical_product_weight_tester.Test(FLAGS_repeat);
+
+ typedef PowerWeight<TropicalWeight, 3> TropicalCubeWeight;
+ typedef PowerWeightGenerator<TropicalWeightGenerator, 3>
+ TropicalCubeWeightGenerator;
+
+ TropicalCubeWeightGenerator tropical_cube_generator(seed);
+ WeightTester<TropicalCubeWeight, TropicalCubeWeightGenerator>
+ tropical_cube_weight_tester(tropical_cube_generator);
+ tropical_cube_weight_tester.Test(FLAGS_repeat);
+
+ typedef ProductWeight<TropicalWeight, TropicalProductWeight>
+ SecondNestedProductWeight;
+ typedef ProductWeightGenerator<TropicalWeightGenerator,
+ TropicalProductWeightGenerator> SecondNestedProductWeightGenerator;
+
+ SecondNestedProductWeightGenerator second_nested_product_generator(seed);
+ WeightTester<SecondNestedProductWeight, SecondNestedProductWeightGenerator>
+ second_nested_product_weight_tester(second_nested_product_generator);
+ second_nested_product_weight_tester.Test(FLAGS_repeat);
+
+ // This only works with fst_weight_parentheses = "()"
+ typedef ProductWeight<TropicalProductWeight, TropicalWeight>
+ FirstNestedProductWeight;
+ typedef ProductWeightGenerator<TropicalProductWeightGenerator,
+ TropicalWeightGenerator> FirstNestedProductWeightGenerator;
+
+ FirstNestedProductWeightGenerator first_nested_product_generator(seed);
+ WeightTester<FirstNestedProductWeight, FirstNestedProductWeightGenerator>
+ first_nested_product_weight_tester(first_nested_product_generator);
+
+ typedef PowerWeight<FirstNestedProductWeight, 3> NestedProductCubeWeight;
+ typedef PowerWeightGenerator<FirstNestedProductWeightGenerator, 3>
+ NestedProductCubeWeightGenerator;
+
+ NestedProductCubeWeightGenerator nested_product_cube_generator(seed);
+ WeightTester<NestedProductCubeWeight, NestedProductCubeWeightGenerator>
+ nested_product_cube_weight_tester(nested_product_cube_generator);
+
+ typedef SparsePowerWeight<NestedProductCubeWeight,
+ size_t > SparseNestedProductCubeWeight;
+ typedef SparsePowerWeightGenerator<NestedProductCubeWeightGenerator,
+ size_t, 3> SparseNestedProductCubeWeightGenerator;
+
+ SparseNestedProductCubeWeightGenerator
+ sparse_nested_product_cube_generator(seed);
+ WeightTester<SparseNestedProductCubeWeight,
+ SparseNestedProductCubeWeightGenerator>
+ sparse_nested_product_cube_weight_tester(
+ sparse_nested_product_cube_generator);
+
+ typedef SparsePowerWeight<LogWeight, size_t > LogSparsePowerWeight;
+ typedef SparsePowerWeightGenerator<LogWeightGenerator,
+ size_t, 3> LogSparsePowerWeightGenerator;
+
+ LogSparsePowerWeightGenerator
+ log_sparse_power_weight_generator(seed);
+ WeightTester<LogSparsePowerWeight,
+ LogSparsePowerWeightGenerator>
+ log_sparse_power_weight_tester(
+ log_sparse_power_weight_generator);
+
+ typedef ExpectationWeight<LogWeight, LogWeight>
+ LogLogExpectWeight;
+ typedef ProductWeightGenerator<LogWeightGenerator, LogWeightGenerator,
+ LogLogExpectWeight> LogLogExpectWeightGenerator;
+
+ LogLogExpectWeightGenerator log_log_expect_weight_generator(seed);
+ WeightTester<LogLogExpectWeight, LogLogExpectWeightGenerator>
+ log_log_expect_weight_tester(log_log_expect_weight_generator);
+
+ typedef ExpectationWeight<LogWeight, LogSparsePowerWeight>
+ LogLogSparseExpectWeight;
+ typedef ProductWeightGenerator<
+ LogWeightGenerator,
+ LogSparsePowerWeightGenerator,
+ LogLogSparseExpectWeight> LogLogSparseExpectWeightGenerator;
+
+ LogLogSparseExpectWeightGenerator log_logsparse_expect_weight_generator(seed);
+ WeightTester<LogLogSparseExpectWeight, LogLogSparseExpectWeightGenerator>
+ log_logsparse_expect_weight_tester(log_logsparse_expect_weight_generator);
+
+ // Test all product weight I/O with parentheses
+ FLAGS_fst_weight_parentheses = "()";
+ first_nested_product_weight_tester.Test(FLAGS_repeat);
+ nested_product_cube_weight_tester.Test(FLAGS_repeat);
+ log_sparse_power_weight_tester.Test(1);
+ sparse_nested_product_cube_weight_tester.Test(1);
+ tropical_product_weight_tester.Test(5);
+ second_nested_product_weight_tester.Test(5);
+ tropical_gallic_tester.Test(5);
+ tropical_cube_weight_tester.Test(5);
+ FLAGS_fst_weight_parentheses = "";
+ log_sparse_power_weight_tester.Test(1);
+ log_log_expect_weight_tester.Test(1, false); // disables division
+ log_logsparse_expect_weight_tester.Test(1, false);
+
+ typedef LexicographicWeight<TropicalWeight, TropicalWeight>
+ TropicalLexicographicWeight;
+ typedef LexicographicWeightGenerator<TropicalWeightGenerator,
+ TropicalWeightGenerator> TropicalLexicographicWeightGenerator;
+
+ TropicalLexicographicWeightGenerator tropical_lexicographic_generator(seed);
+ WeightTester<TropicalLexicographicWeight,
+ TropicalLexicographicWeightGenerator>
+ tropical_lexicographic_tester(tropical_lexicographic_generator);
+ tropical_lexicographic_tester.Test(FLAGS_repeat);
+
+ cout << "PASS" << endl;
+
+ return 0;
+}